1 /*****************************************************************************
2 * me.c: h264 encoder library (Motion Estimation)
3 *****************************************************************************
4 * Copyright (C) 2003 Laurent Aimar
5 * $Id: me.c,v 1.1 2004/06/03 19:27:08 fenrir Exp $
7 * Authors: Laurent Aimar <fenrir@via.ecp.fr>
8 * Loren Merritt <lorenm@u.washington.edu>
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
23 *****************************************************************************/
29 #include "common/common.h"
32 /* presets selected from good points on the speed-vs-quality curve of several test videos
33 * subpel_iters[i_subpel_refine] = { refine_hpel, refine_qpel, me_hpel, me_qpel }
34 * where me_* are the number of EPZS iterations run on all candidate block types,
35 * and refine_* are run only on the winner. */
36 static const int subpel_iterations[][4] =
44 static void refine_subpel( x264_t *h, x264_me_t *m, int hpel_iters, int qpel_iters );
46 #define COST_MV( mx, my ) \
48 int cost = h->pixf.sad[i_pixel]( m->p_fenc[0], m->i_stride[0], \
49 &p_fref[(my)*m->i_stride[0]+(mx)], m->i_stride[0] ) \
50 + p_cost_mvx[ (mx)<<2 ] \
51 + p_cost_mvy[ (my)<<2 ]; \
60 void x264_me_search_ref( x264_t *h, x264_me_t *m, int (*mvc)[2], int i_mvc, int *p_fullpel_thresh )
62 const int i_pixel = m->i_pixel;
63 const int b_chroma_me = h->mb.b_chroma_me && i_pixel <= PIXEL_8x8;
66 uint8_t *p_fref = m->p_fref[0];
69 const int mv_x_min = h->mb.mv_min_fpel[0];
70 const int mv_y_min = h->mb.mv_min_fpel[1];
71 const int mv_x_max = h->mb.mv_max_fpel[0];
72 const int mv_y_max = h->mb.mv_max_fpel[1];
74 const int16_t *p_cost_mvx = m->p_cost_mv - m->mvp[0];
75 const int16_t *p_cost_mvy = m->p_cost_mv - m->mvp[1];
79 /* XXX: We don't need to clamp because the way diamond work, we will
80 * never go outside padded picture, and predict mv won't compute vector
81 * with componant magnitude greater.
82 * XXX: if some vector can go outside, (accelerator, ....) you need to clip
84 bmx = x264_clip3( ( m->mvp[0] + 2 ) >> 2, mv_x_min, mv_x_max );
85 bmy = x264_clip3( ( m->mvp[1] + 2 ) >> 2, mv_y_min, mv_y_max );
88 /* I don't know why this helps */
89 bcost -= p_cost_mvx[ bmx<<2 ] + p_cost_mvy[ bmy<<2 ];
91 /* try extra predictors if provided */
92 for( i_iter = 0; i_iter < i_mvc; i_iter++ )
94 const int mx = x264_clip3( ( mvc[i_iter][0] + 2 ) >> 2, mv_x_min, mv_x_max );
95 const int my = x264_clip3( ( mvc[i_iter][1] + 2 ) >> 2, mv_y_min, mv_y_max );
96 if( mx != bmx || my != bmy )
102 if( h->mb.i_subpel_refine >= 2 )
105 /* Don't need to test mv_range each time, we won't go outside picture+padding */
108 for( i_iter = 0; i_iter < 8; i_iter++ )
110 COST_MV( omx-2, omy );
111 COST_MV( omx-1, omy+2 );
112 COST_MV( omx+1, omy+2 );
113 COST_MV( omx+2, omy );
114 COST_MV( omx+1, omy-2 );
115 COST_MV( omx-1, omy-2 );
117 if( bmx == omx && bmy == omy )
124 COST_MV( omx-1, omy-1 );
125 COST_MV( omx-1, omy );
126 COST_MV( omx-1, omy+1 );
127 COST_MV( omx , omy-1 );
128 COST_MV( omx , omy+1 );
129 COST_MV( omx+1, omy-1 );
130 COST_MV( omx+1, omy );
131 COST_MV( omx+1, omy+1 );
136 for( i_iter = 0; i_iter < 16; i_iter++ )
140 COST_MV( omx , omy-1 );
141 COST_MV( omx , omy+1 );
142 COST_MV( omx-1, omy );
143 COST_MV( omx+1, omy );
144 if( bmx == omx && bmy == omy )
153 /* compute the real cost */
154 m->cost_mv = p_cost_mvx[ m->mv[0] ] + p_cost_mvy[ m->mv[1] ];
155 m->cost = h->pixf.satd[i_pixel]( m->p_fenc[0], m->i_stride[0],
156 &p_fref[bmy * m->i_stride[0] + bmx], m->i_stride[0] )
160 const int bw = x264_pixel_size[m->i_pixel].w;
161 const int bh = x264_pixel_size[m->i_pixel].h;
162 DECLARE_ALIGNED( uint8_t, pix[8*8*2], 16 );
163 h->mc.mc_chroma( m->p_fref[4], m->i_stride[1], pix, 8, m->mv[0], m->mv[1], bw/2, bh/2 );
164 h->mc.mc_chroma( m->p_fref[5], m->i_stride[1], pix+8*8, 8, m->mv[0], m->mv[1], bw/2, bh/2 );
165 m->cost += h->pixf.satd[i_pixel+3]( m->p_fenc[1], m->i_stride[1], pix, 8 )
166 + h->pixf.satd[i_pixel+3]( m->p_fenc[2], m->i_stride[1], pix+8*8, 8 );
170 if( h->mb.i_subpel_refine >= 3 )
174 /* early termination (when examining multiple reference frames)
175 * FIXME: this can update fullpel_thresh even if the match
176 * ref is rejected after subpel refinement */
177 if( p_fullpel_thresh )
179 if( (m->cost*7)>>3 > *p_fullpel_thresh )
181 else if( m->cost < *p_fullpel_thresh )
182 *p_fullpel_thresh = m->cost;
185 hpel = subpel_iterations[h->mb.i_subpel_refine][2];
186 qpel = subpel_iterations[h->mb.i_subpel_refine][3];
187 refine_subpel( h, m, hpel, qpel );
192 void x264_me_refine_qpel( x264_t *h, x264_me_t *m )
194 int hpel = subpel_iterations[h->mb.i_subpel_refine][0];
195 int qpel = subpel_iterations[h->mb.i_subpel_refine][1];
196 // if( hpel || qpel )
197 refine_subpel( h, m, hpel, qpel );
200 #define COST_MV( mx, my, dir ) \
203 uint8_t *src = h->mc.get_ref( m->p_fref, m->i_stride[0], pix, &stride, mx, my, bw, bh ); \
204 int cost = h->pixf.satd[i_pixel]( m->p_fenc[0], m->i_stride[0], src, stride ) \
205 + p_cost_mvx[ mx ] + p_cost_mvy[ my ]; \
206 if( b_chroma_me && cost < bcost ) \
208 h->mc.mc_chroma( m->p_fref[4], m->i_stride[1], pix, 8, mx, my, bw/2, bh/2 ); \
209 cost += h->pixf.satd[i_pixel+3]( m->p_fenc[1], m->i_stride[1], pix, 8 ); \
212 h->mc.mc_chroma( m->p_fref[5], m->i_stride[1], pix, 8, mx, my, bw/2, bh/2 ); \
213 cost += h->pixf.satd[i_pixel+3]( m->p_fenc[2], m->i_stride[1], pix, 8 ); \
223 static void refine_subpel( x264_t *h, x264_me_t *m, int hpel_iters, int qpel_iters )
225 const int bw = x264_pixel_size[m->i_pixel].w;
226 const int bh = x264_pixel_size[m->i_pixel].h;
227 const int16_t *p_cost_mvx = m->p_cost_mv - m->mvp[0];
228 const int16_t *p_cost_mvy = m->p_cost_mv - m->mvp[1];
229 const int i_pixel = m->i_pixel;
230 const int b_chroma_me = h->mb.b_chroma_me && i_pixel <= PIXEL_8x8;
232 DECLARE_ALIGNED( uint8_t, pix[16*16], 16 );
238 for( step = 2; step >= 1; step-- )
240 for( i = step>1 ? hpel_iters : qpel_iters; i > 0; i-- )
242 int bcost = COST_MAX;
244 COST_MV( bmx, bmy - step, 0 );
245 COST_MV( bmx, bmy + step, 1 );
246 COST_MV( bmx - step, bmy, 2 );
247 COST_MV( bmx + step, bmy, 3 );
249 if( bcost < m->cost )
252 if( bdir == 0 ) bmy -= step;
253 else if( bdir == 1 ) bmy += step;
254 else if( bdir == 2 ) bmx -= step;
255 else if( bdir == 3 ) bmx += step;
263 m->cost_mv = p_cost_mvx[ bmx ] + p_cost_mvy[ bmy ];