1 /*****************************************************************************
2 * mc.c: h264 encoder library (Motion Compensation)
3 *****************************************************************************
4 * Copyright (C) 2003 Laurent Aimar
5 * $Id: mc.c,v 1.1 2004/06/03 19:27:07 fenrir Exp $
7 * Authors: Laurent Aimar <fenrir@via.ecp.fr>
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
22 *****************************************************************************/
48 static inline int x264_tapfilter( uint8_t *pix, int i_pix_next )
50 return pix[-2*i_pix_next] - 5*pix[-1*i_pix_next] + 20*(pix[0] + pix[1*i_pix_next]) - 5*pix[ 2*i_pix_next] + pix[ 3*i_pix_next];
52 static inline int x264_tapfilter1( uint8_t *pix )
54 return pix[-2] - 5*pix[-1] + 20*(pix[0] + pix[1]) - 5*pix[ 2] + pix[ 3];
57 static inline void pixel_avg( uint8_t *dst, int i_dst_stride,
58 uint8_t *src1, int i_src1_stride,
59 uint8_t *src2, int i_src2_stride,
60 int i_width, int i_height )
63 for( y = 0; y < i_height; y++ )
65 for( x = 0; x < i_width; x++ )
67 dst[x] = ( src1[x] + src2[x] + 1 ) >> 1;
70 src1 += i_src1_stride;
71 src2 += i_src2_stride;
75 typedef void (*pf_mc_t)(uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_width, int i_height );
77 static void mc_copy( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_width, int i_height )
81 for( y = 0; y < i_height; y++ )
83 memcpy( dst, src, i_width );
89 static inline void mc_hh( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_width, int i_height )
93 for( y = 0; y < i_height; y++ )
95 for( x = 0; x < i_width; x++ )
97 dst[x] = x264_mc_clip1( ( x264_tapfilter1( &src[x] ) + 16 ) >> 5 );
103 static inline void mc_hv( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_width, int i_height )
107 for( y = 0; y < i_height; y++ )
109 for( x = 0; x < i_width; x++ )
111 dst[x] = x264_mc_clip1( ( x264_tapfilter( &src[x], i_src_stride ) + 16 ) >> 5 );
117 static inline void mc_hc( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_width, int i_height )
123 for( x = 0; x < i_width; x++ )
130 tap[0] = x264_tapfilter1( &pix[-2*i_src_stride] );
131 tap[1] = x264_tapfilter1( &pix[-1*i_src_stride] );
132 tap[2] = x264_tapfilter1( &pix[ 0*i_src_stride] );
133 tap[3] = x264_tapfilter1( &pix[ 1*i_src_stride] );
134 tap[4] = x264_tapfilter1( &pix[ 2*i_src_stride] );
136 for( y = 0; y < i_height; y++ )
138 tap[5] = x264_tapfilter1( &pix[ 3*i_src_stride] );
140 *out = x264_mc_clip1( ( tap[0] - 5*tap[1] + 20 * tap[2] + 20 * tap[3] -5*tap[4] + tap[5] + 512 ) >> 10 );
156 static void mc_xy10( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_width, int i_height )
159 mc_hh( src, i_src_stride, tmp, i_width, i_width, i_height );
160 pixel_avg( dst, i_dst_stride, src, i_src_stride, tmp, i_width, i_width, i_height );
162 static void mc_xy30( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_width, int i_height )
165 mc_hh( src, i_src_stride, tmp, i_width, i_width, i_height );
166 pixel_avg( dst, i_dst_stride, src+1, i_src_stride, tmp, i_width, i_width, i_height );
169 static void mc_xy01( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_width, int i_height )
172 mc_hv( src, i_src_stride, tmp, i_width, i_width, i_height );
173 pixel_avg( dst, i_dst_stride, src, i_src_stride, tmp, i_width, i_width, i_height );
175 static void mc_xy03( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_width, int i_height )
178 mc_hv( src, i_src_stride, tmp, i_width, i_width, i_height );
179 pixel_avg( dst, i_dst_stride, src+i_src_stride, i_src_stride, tmp, i_width, i_width, i_height );
182 static void mc_xy11( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_width, int i_height )
187 mc_hv( src, i_src_stride, tmp1, i_width, i_width, i_height );
188 mc_hh( src, i_src_stride, tmp2, i_width, i_width, i_height );
189 pixel_avg( dst, i_dst_stride, tmp1, i_width, tmp2, i_width, i_width, i_height );
191 static void mc_xy31( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_width, int i_height )
196 mc_hv( src+1, i_src_stride, tmp1, i_width, i_width, i_height );
197 mc_hh( src, i_src_stride, tmp2, i_width, i_width, i_height );
198 pixel_avg( dst, i_dst_stride, tmp1, i_width, tmp2, i_width, i_width, i_height );
200 static void mc_xy13( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_width, int i_height )
205 mc_hv( src, i_src_stride, tmp1, i_width, i_width, i_height );
206 mc_hh( src+i_src_stride, i_src_stride, tmp2, i_width, i_width, i_height );
207 pixel_avg( dst, i_dst_stride, tmp1, i_width, tmp2, i_width, i_width, i_height );
209 static void mc_xy33( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_width, int i_height )
214 mc_hv( src+1, i_src_stride, tmp1, i_width, i_width, i_height );
215 mc_hh( src+i_src_stride, i_src_stride, tmp2, i_width, i_width, i_height );
216 pixel_avg( dst, i_dst_stride, tmp1, i_width, tmp2, i_width, i_width, i_height );
218 static void mc_xy21( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_width, int i_height )
223 mc_hc( src, i_src_stride, tmp1, i_width, i_width, i_height );
224 mc_hh( src, i_src_stride, tmp2, i_width, i_width, i_height );
225 pixel_avg( dst, i_dst_stride, tmp1, i_width, tmp2, i_width, i_width, i_height );
227 static void mc_xy12( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_width, int i_height )
232 mc_hc( src, i_src_stride, tmp1, i_width, i_width, i_height );
233 mc_hv( src, i_src_stride, tmp2, i_width, i_width, i_height );
234 pixel_avg( dst, i_dst_stride, tmp1, i_width, tmp2, i_width, i_width, i_height );
236 static void mc_xy32( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_width, int i_height )
241 mc_hc( src, i_src_stride, tmp1, i_width, i_width, i_height );
242 mc_hv( src+1, i_src_stride, tmp2, i_width, i_width, i_height );
243 pixel_avg( dst, i_dst_stride, tmp1, i_width, tmp2, i_width, i_width, i_height );
245 static void mc_xy23( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_width, int i_height )
250 mc_hc( src, i_src_stride, tmp1, i_width, i_width, i_height );
251 mc_hh( src+i_src_stride, i_src_stride, tmp2, i_width, i_width, i_height );
252 pixel_avg( dst, i_dst_stride, tmp1, i_width, tmp2, i_width, i_width, i_height );
255 static void motion_compensation_luma( uint8_t *src, int i_src_stride,
256 uint8_t *dst, int i_dst_stride,
258 int i_width, int i_height )
260 static pf_mc_t pf_mc[4][4] = /*XXX [dqy][dqx] */
262 { mc_copy, mc_xy10, mc_hh, mc_xy30 },
263 { mc_xy01, mc_xy11, mc_xy21, mc_xy31 },
264 { mc_hv, mc_xy12, mc_hc, mc_xy32 },
265 { mc_xy03, mc_xy13, mc_xy23, mc_xy33 },
268 src += (mvy >> 2) * i_src_stride + (mvx >> 2);
269 pf_mc[mvy&0x03][mvx&0x03]( src, i_src_stride, dst, i_dst_stride, i_width, i_height );
273 static void mc_luma( uint8_t *src[4], int i_src_stride,
274 uint8_t *dst, int i_dst_stride,
276 int i_width, int i_height )
278 uint8_t *src1, *src2;
280 int correction = (mvx&1) && (mvy&1) && ((mvx&2) ^ (mvy&2));
282 int hpel1y = (mvy+1-correction)>>1;
283 int filter1 = (hpel1x & 1) + ( (hpel1y & 1) << 1 );
285 src1 = src[filter1] + (hpel1y >> 1) * i_src_stride + (hpel1x >> 1);
287 if ( (mvx|mvy) & 1 ) /* qpel interpolation needed */
289 int hpel2x = (mvx+1)>>1;
290 int hpel2y = (mvy+correction)>>1;
291 int filter2 = (hpel2x & 1) + ( (hpel2y & 1) <<1 );
293 src2 = src[filter2] + (hpel2y >> 1) * i_src_stride + (hpel2x >> 1);
295 pixel_avg( dst, i_dst_stride, src1, i_src_stride,
296 src2, i_src_stride, i_width, i_height );
300 mc_copy( src1, i_src_stride, dst, i_dst_stride, i_width, i_height );
304 static uint8_t *get_ref( uint8_t *src[4], int i_src_stride,
305 uint8_t *dst, int * i_dst_stride,
307 int i_width, int i_height )
309 uint8_t *src1, *src2;
311 int correction = (mvx&1) && (mvy&1) && ((mvx&2) ^ (mvy&2));
313 int hpel1y = (mvy+1-correction)>>1;
314 int filter1 = (hpel1x & 1) + ( (hpel1y & 1) << 1 );
316 src1 = src[filter1] + (hpel1y >> 1) * i_src_stride + (hpel1x >> 1);
318 if ( (mvx|mvy) & 1 ) /* qpel interpolation needed */
320 int hpel2x = (mvx+1)>>1;
321 int hpel2y = (mvy+correction)>>1;
322 int filter2 = (hpel2x & 1) + ( (hpel2y & 1) <<1 );
324 src2 = src[filter2] + (hpel2y >> 1) * i_src_stride + (hpel2x >> 1);
326 pixel_avg( dst, *i_dst_stride, src1, i_src_stride,
327 src2, i_src_stride, i_width, i_height );
333 *i_dst_stride = i_src_stride;
338 /* full chroma mc (ie until 1/8 pixel)*/
339 static void motion_compensation_chroma( uint8_t *src, int i_src_stride,
340 uint8_t *dst, int i_dst_stride,
342 int i_width, int i_height )
347 const int d8x = mvx&0x07;
348 const int d8y = mvy&0x07;
350 const int cA = (8-d8x)*(8-d8y);
351 const int cB = d8x *(8-d8y);
352 const int cC = (8-d8x)*d8y;
353 const int cD = d8x *d8y;
355 src += (mvy >> 3) * i_src_stride + (mvx >> 3);
356 srcp = &src[i_src_stride];
358 for( y = 0; y < i_height; y++ )
360 for( x = 0; x < i_width; x++ )
362 dst[x] = ( cA*src[x] + cB*src[x+1] +
363 cC*srcp[x] + cD*srcp[x+1] + 32 ) >> 6;
368 srcp += i_src_stride;
373 static void motion_compensation_chroma_sse( uint8_t *src, int i_src_stride,
374 uint8_t *dst, int i_dst_stride,
376 int i_width, int i_height )
379 motion_compensation_chroma(src, i_src_stride, dst, i_dst_stride,
380 mvx, mvy, i_width, i_height);
382 const int d8x = mvx&0x07;
383 const int d8y = mvy&0x07;
385 src += (mvy >> 3) * i_src_stride + (mvx >> 3);
387 x264_mc_chroma_sse(src, i_src_stride, dst, i_dst_stride,
388 d8x, d8y, i_height, i_width);
393 void x264_mc_init( int cpu, x264_mc_functions_t *pf )
395 pf->mc_luma = mc_luma;
396 pf->get_ref = get_ref;
397 pf->mc_chroma = motion_compensation_chroma;
400 if( cpu&X264_CPU_MMXEXT ) {
401 x264_mc_mmxext_init( pf );
402 pf->mc_chroma = motion_compensation_chroma_sse;
406 if( cpu&X264_CPU_SSE2 )
407 x264_mc_sse2_init( pf );
410 if( cpu&X264_CPU_ALTIVEC )
411 x264_mc_altivec_init( pf );
416 void get_funcs_mmx(pf_mc_t*, pf_mc_t*, pf_mc_t*);
417 void get_funcs_sse2(pf_mc_t*, pf_mc_t*, pf_mc_t*);
420 extern void x264_horizontal_filter_mmxext( uint8_t *dst, int i_dst_stride,
421 uint8_t *src, int i_src_stride,
422 int i_width, int i_height );
423 extern void x264_center_filter_mmxext( uint8_t *dst1, int i_dst1_stride,
424 uint8_t *dst2, int i_dst2_stride,
425 uint8_t *src, int i_src_stride,
426 int i_width, int i_height );
428 void x264_frame_filter( int cpu, x264_frame_t *frame )
430 const int x_inc = 16, y_inc = 16;
431 const int stride = frame->i_stride[0];
434 pf_mc_t int_h = mc_hh;
435 pf_mc_t int_v = mc_hv;
436 pf_mc_t int_hv = mc_hc;
440 if( cpu&X264_CPU_MMXEXT )
441 get_funcs_mmx(&int_h, &int_v, &int_hv);
445 if( cpu&X264_CPU_SSE2 )
446 get_funcs_sse2(&int_h, &int_v, &int_hv);
451 if ( cpu & X264_CPU_MMXEXT )
453 x264_horizontal_filter_mmxext(frame->filtered[1] - 8 * stride - 8, stride,
454 frame->plane[0] - 8 * stride - 8, stride,
455 stride - 48, frame->i_lines[0] + 16);
456 x264_center_filter_mmxext(frame->filtered[2] - 8 * stride - 8, stride,
457 frame->filtered[3] - 8 * stride - 8, stride,
458 frame->plane[0] - 8 * stride - 8, stride,
459 stride - 48, frame->i_lines[0] + 16);
464 for( y = -8; y < frame->i_lines[0]+8; y += y_inc )
466 uint8_t *p_in = frame->plane[0] + y * stride - 8;
467 uint8_t *p_h = frame->filtered[1] + y * stride - 8;
468 uint8_t *p_v = frame->filtered[2] + y * stride - 8;
469 uint8_t *p_hv = frame->filtered[3] + y * stride - 8;
470 for( x = -8; x < stride - 64 + 8; x += x_inc )
472 int_h( p_in, stride, p_h, stride, x_inc, y_inc );
473 int_v( p_in, stride, p_v, stride, x_inc, y_inc );
474 int_hv( p_in, stride, p_hv, stride, x_inc, y_inc );
485 void x264_frame_init_lowres( int cpu, x264_frame_t *frame )
488 const int i_stride = frame->i_stride[0];
489 const int i_stride2 = frame->i_stride_lowres;
490 const int i_width2 = i_stride2 - 64;
492 for( y = 0; y < frame->i_lines_lowres - 1; y++ )
494 uint8_t *src0 = &frame->plane[0][2*y*i_stride];
495 uint8_t *src1 = src0+i_stride;
496 uint8_t *src2 = src1+i_stride;
497 uint8_t *dst0 = &frame->lowres[0][y*i_stride2];
498 uint8_t *dsth = &frame->lowres[1][y*i_stride2];
499 uint8_t *dstv = &frame->lowres[2][y*i_stride2];
500 uint8_t *dstc = &frame->lowres[3][y*i_stride2];
501 for( x = 0; x < i_width2 - 1; x++ )
503 dst0[x] = (src0[2*x ] + src0[2*x+1] + src1[2*x ] + src1[2*x+1] + 2) >> 2;
504 dsth[x] = (src0[2*x+1] + src0[2*x+2] + src1[2*x+1] + src1[2*x+2] + 2) >> 2;
505 dstv[x] = (src1[2*x ] + src1[2*x+1] + src2[2*x ] + src2[2*x+1] + 2) >> 2;
506 dstc[x] = (src1[2*x+1] + src1[2*x+2] + src2[2*x+1] + src2[2*x+2] + 2) >> 2;
508 dst0[x] = (src0[2*x ] + src0[2*x+1] + src1[2*x ] + src1[2*x+1] + 2) >> 2;
509 dstv[x] = (src1[2*x ] + src1[2*x+1] + src2[2*x ] + src2[2*x+1] + 2) >> 2;
510 dsth[x] = (src0[2*x+1] + src1[2*x+1] + 1) >> 1;
511 dstc[x] = (src1[2*x+1] + src2[2*x+1] + 1) >> 1;
513 for( i = 0; i < 4; i++ )
514 memcpy( &frame->lowres[i][y*i_stride2], &frame->lowres[i][(y-1)*i_stride2], i_width2 );
516 for( y = 0; y < 16; y++ )
517 for( x = 0; x < 16; x++ )
518 frame->i_cost_est[x][y] = -1;
520 x264_frame_expand_border_lowres( frame );