+ // ADS threshold, then SAD threshold, then keep the best few SADs, then SATD
+ mvsad_t *mvsads = (mvsad_t *)(xs + ((width+15)&~15));
+ int nmvsad = 0, limit;
+ int sad_thresh = i_me_range <= 16 ? 10 : i_me_range <= 24 ? 11 : 12;
+ int bsad = h->pixf.sad[i_pixel]( m->p_fenc[0], FENC_STRIDE, p_fref+bmy*stride+bmx, stride )
+ + BITS_MVD( bmx, bmy );
+ for( my = min_y; my <= max_y; my++ )
+ {
+ int ycost = p_cost_mvy[my<<2];
+ if( bsad <= ycost )
+ continue;
+ bsad -= ycost;
+ xn = h->pixf.ads[i_pixel]( enc_dc, sums_base + min_x + my * stride, delta,
+ cost_fpel_mvx+min_x, xs, width, bsad*17/16 );
+ for( i=0; i<xn-2; i+=3 )
+ {
+ uint8_t *ref = p_fref+min_x+my*stride;
+ int sads[3];
+ h->pixf.sad_x3[i_pixel]( m->p_fenc[0], ref+xs[i], ref+xs[i+1], ref+xs[i+2], stride, sads );
+ for( j=0; j<3; j++ )
+ {
+ int sad = sads[j] + cost_fpel_mvx[xs[i+j]];
+ if( sad < bsad*sad_thresh>>3 )
+ {
+ COPY1_IF_LT( bsad, sad );
+ mvsads[nmvsad].sad = sad + ycost;
+ mvsads[nmvsad].mx = min_x+xs[i+j];
+ mvsads[nmvsad].my = my;
+ nmvsad++;
+ }
+ }
+ }
+ for( ; i<xn; i++ )
+ {
+ int mx = min_x+xs[i];
+ int sad = h->pixf.sad[i_pixel]( m->p_fenc[0], FENC_STRIDE, p_fref+mx+my*stride, stride )
+ + cost_fpel_mvx[xs[i]];
+ if( sad < bsad*sad_thresh>>3 )
+ {
+ COPY1_IF_LT( bsad, sad );
+ mvsads[nmvsad].sad = sad + ycost;
+ mvsads[nmvsad].mx = mx;
+ mvsads[nmvsad].my = my;
+ nmvsad++;
+ }
+ }
+ bsad += ycost;
+ }
+
+ limit = i_me_range / 2;
+ if( nmvsad > limit*2 )