1 /*****************************************************************************
2 * me.c: h264 encoder library (Motion Estimation)
3 *****************************************************************************
4 * Copyright (C) 2003 Laurent Aimar
5 * $Id: me.c,v 1.1 2004/06/03 19:27:08 fenrir Exp $
7 * Authors: Laurent Aimar <fenrir@via.ecp.fr>
8 * Loren Merritt <lorenm@u.washington.edu>
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
23 *****************************************************************************/
29 #include "common/common.h"
32 /* presets selected from good points on the speed-vs-quality curve of several test videos
33 * subpel_iters[i_subpel_refine] = { refine_hpel, refine_qpel, me_hpel, me_qpel }
34 * where me_* are the number of EPZS iterations run on all candidate block types,
35 * and refine_* are run only on the winner. */
36 const static int subpel_iterations[][4] =
44 static void refine_subpel( x264_t *h, x264_me_t *m, int hpel_iters, int qpel_iters );
46 #define COST_MV( mx, my ) \
48 int cost = h->pixf.sad[i_pixel]( m->p_fenc, m->i_stride, \
49 &p_fref[(my)*m->i_stride+(mx)], m->i_stride ) + \
50 m->lm * ( bs_size_se(((mx)<<2) - m->mvp[0] ) + \
51 bs_size_se(((my)<<2) - m->mvp[1] ) ); \
60 void x264_me_search_ref( x264_t *h, x264_me_t *m, int (*mvc)[2], int i_mvc, int *p_fullpel_thresh )
62 const int i_pixel = m->i_pixel;
65 uint8_t *p_fref = m->p_fref;
70 /* XXX: We don't need to clamp because the way diamond work, we will
71 * never go outside padded picture, and predict mv won't compute vector
72 * with componant magnitude greater.
73 * XXX: if some vector can go outside, (accelerator, ....) you need to clip
75 bmx = x264_clip3( ( m->mvp[0] + 2 ) >> 2, -m->i_mv_range, m->i_mv_range );
76 bmy = x264_clip3( ( m->mvp[1] + 2 ) >> 2, -m->i_mv_range, m->i_mv_range );
78 bcost = h->pixf.sad[i_pixel]( m->p_fenc, m->i_stride,
79 &p_fref[bmy * m->i_stride + bmx], m->i_stride );
81 /* try extra predictors if provided */
82 for( i_iter = 0; i_iter < i_mvc; i_iter++ )
84 const int mx = x264_clip3( ( mvc[i_iter][0] + 2 ) >> 2, -m->i_mv_range, m->i_mv_range );
85 const int my = x264_clip3( ( mvc[i_iter][1] + 2 ) >> 2, -m->i_mv_range, m->i_mv_range );
86 if( mx != bmx || my != bmy )
92 if( h->param.analyse.i_subpel_refine >= 2 )
95 /* Don't need to test mv_range each time, we won't go outside picture+padding */
98 for( i_iter = 0; i_iter < 8; i_iter++ )
100 COST_MV( omx-2, omy );
101 COST_MV( omx-1, omy+2 );
102 COST_MV( omx+1, omy+2 );
103 COST_MV( omx+2, omy );
104 COST_MV( omx+1, omy-2 );
105 COST_MV( omx-1, omy-2 );
107 if( bmx == omx && bmy == omy )
114 COST_MV( omx-1, omy-1 );
115 COST_MV( omx-1, omy );
116 COST_MV( omx-1, omy+1 );
117 COST_MV( omx , omy-1 );
118 COST_MV( omx , omy+1 );
119 COST_MV( omx+1, omy-1 );
120 COST_MV( omx+1, omy );
121 COST_MV( omx+1, omy+1 );
126 for( i_iter = 0; i_iter < 16; i_iter++ )
130 COST_MV( omx , omy-1 );
131 COST_MV( omx , omy+1 );
132 COST_MV( omx-1, omy );
133 COST_MV( omx+1, omy );
134 if( bmx == omx && bmy == omy )
143 /* compute the real cost */
144 m->cost = h->pixf.satd[i_pixel]( m->p_fenc, m->i_stride,
145 &p_fref[bmy * m->i_stride + bmx], m->i_stride ) +
146 m->lm * ( bs_size_se( m->mv[0] - m->mvp[0] ) +
147 bs_size_se( m->mv[1] - m->mvp[1] ) );
150 if( h->param.analyse.i_subpel_refine >= 3 )
154 /* early termination (when examining multiple reference frames)
155 * FIXME: this can update fullpel_thresh even if the match
156 * ref is rejected after subpel refinement */
157 if( p_fullpel_thresh )
159 if( (m->cost*7)>>3 > *p_fullpel_thresh )
161 else if( m->cost < *p_fullpel_thresh )
162 *p_fullpel_thresh = m->cost;
165 hpel = subpel_iterations[h->param.analyse.i_subpel_refine][2];
166 qpel = subpel_iterations[h->param.analyse.i_subpel_refine][3];
167 refine_subpel( h, m, hpel, qpel );
172 void x264_me_refine_qpel( x264_t *h, x264_me_t *m )
174 int hpel = subpel_iterations[h->param.analyse.i_subpel_refine][0];
175 int qpel = subpel_iterations[h->param.analyse.i_subpel_refine][1];
176 // if( hpel || qpel )
177 refine_subpel( h, m, hpel, qpel );
180 static void refine_subpel( x264_t *h, x264_me_t *m, int hpel_iters, int qpel_iters )
182 const int bw = x264_pixel_size[m->i_pixel].w;
183 const int bh = x264_pixel_size[m->i_pixel].h;
185 DECLARE_ALIGNED( uint8_t, pix[4][16*16], 16 );
193 for( step = 2; step >= 1; step-- )
195 for( i = step>1 ? hpel_iters : qpel_iters; i > 0; i-- )
197 h->mc[MC_LUMA]( m->p_fref, m->i_stride, pix[0], 16, bmx + 0, bmy - step, bw, bh );
198 h->mc[MC_LUMA]( m->p_fref, m->i_stride, pix[1], 16, bmx + 0, bmy + step, bw, bh );
199 h->mc[MC_LUMA]( m->p_fref, m->i_stride, pix[2], 16, bmx - step, bmy + 0, bw, bh );
200 h->mc[MC_LUMA]( m->p_fref, m->i_stride, pix[3], 16, bmx + step, bmy + 0, bw, bh );
202 cost[0] = h->pixf.satd[m->i_pixel]( m->p_fenc, m->i_stride, pix[0], 16 ) +
203 m->lm * ( bs_size_se( bmx + 0 - m->mvp[0] ) + bs_size_se( bmy - step - m->mvp[1] ) );
204 cost[1] = h->pixf.satd[m->i_pixel]( m->p_fenc, m->i_stride, pix[1], 16 ) +
205 m->lm * ( bs_size_se( bmx + 0 - m->mvp[0] ) + bs_size_se( bmy + step - m->mvp[1] ) );
206 cost[2] = h->pixf.satd[m->i_pixel]( m->p_fenc, m->i_stride, pix[2], 16 ) +
207 m->lm * ( bs_size_se( bmx - step - m->mvp[0] ) + bs_size_se( bmy + 0 - m->mvp[1] ) );
208 cost[3] = h->pixf.satd[m->i_pixel]( m->p_fenc, m->i_stride, pix[3], 16 ) +
209 m->lm * ( bs_size_se( bmx + step - m->mvp[0] ) + bs_size_se( bmy + 0 - m->mvp[1] ) );
212 if( cost[1] < cost[0] ) best = 1;
213 if( cost[2] < cost[best] ) best = 2;
214 if( cost[3] < cost[best] ) best = 3;
216 if( cost[best] < m->cost )
218 m->cost = cost[best];
219 if( best == 0 ) bmy -= step;
220 else if( best == 1 ) bmy += step;
221 else if( best == 2 ) bmx -= step;
222 else if( best == 3 ) bmx += step;