1 /*****************************************************************************
2 * mc.c: h264 encoder library (Motion Compensation)
3 *****************************************************************************
4 * Copyright (C) 2003 Laurent Aimar
5 * $Id: mc.c,v 1.1 2004/06/03 19:27:07 fenrir Exp $
7 * Authors: Laurent Aimar <fenrir@via.ecp.fr>
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
22 *****************************************************************************/
35 #undef HAVE_MMXEXT /* not finished now */
45 static inline int x264_tapfilter( uint8_t *pix, int i_pix_next )
47 return pix[-2*i_pix_next] - 5*pix[-1*i_pix_next] + 20*(pix[0] + pix[1*i_pix_next]) - 5*pix[ 2*i_pix_next] + pix[ 3*i_pix_next];
49 static inline int x264_tapfilter1( uint8_t *pix )
51 return pix[-2] - 5*pix[-1] + 20*(pix[0] + pix[1]) - 5*pix[ 2] + pix[ 3];
54 static inline void pixel_avg( uint8_t *dst, int i_dst_stride,
55 uint8_t *src1, int i_src1_stride,
56 uint8_t *src2, int i_src2_stride,
57 int i_width, int i_height )
60 for( y = 0; y < i_height; y++ )
62 for( x = 0; x < i_width; x++ )
64 dst[x] = ( src1[x] + src2[x] + 1 ) >> 1;
67 src1 += i_src1_stride;
68 src2 += i_src2_stride;
72 typedef void (*pf_mc_t)(uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_width, int i_height );
74 static void mc_copy( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_width, int i_height )
78 for( y = 0; y < i_height; y++ )
80 memcpy( dst, src, i_width );
86 static inline void mc_hh( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_width, int i_height )
90 for( y = 0; y < i_height; y++ )
92 for( x = 0; x < i_width; x++ )
94 dst[x] = x264_mc_clip1( ( x264_tapfilter1( &src[x] ) + 16 ) >> 5 );
100 static inline void mc_hv( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_width, int i_height )
104 for( y = 0; y < i_height; y++ )
106 for( x = 0; x < i_width; x++ )
108 dst[x] = x264_mc_clip1( ( x264_tapfilter( &src[x], i_src_stride ) + 16 ) >> 5 );
114 static inline void mc_hc( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_width, int i_height )
120 for( x = 0; x < i_width; x++ )
127 tap[0] = x264_tapfilter1( &pix[-2*i_src_stride] );
128 tap[1] = x264_tapfilter1( &pix[-1*i_src_stride] );
129 tap[2] = x264_tapfilter1( &pix[ 0*i_src_stride] );
130 tap[3] = x264_tapfilter1( &pix[ 1*i_src_stride] );
131 tap[4] = x264_tapfilter1( &pix[ 2*i_src_stride] );
133 for( y = 0; y < i_height; y++ )
135 tap[5] = x264_tapfilter1( &pix[ 3*i_src_stride] );
137 *out = x264_mc_clip1( ( tap[0] - 5*tap[1] + 20 * tap[2] + 20 * tap[3] -5*tap[4] + tap[5] + 512 ) >> 10 );
152 static void mc_xy10( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_width, int i_height )
155 mc_hh( src, i_src_stride, tmp, i_width, i_width, i_height );
156 pixel_avg( dst, i_dst_stride, src, i_src_stride, tmp, i_width, i_width, i_height );
158 static void mc_xy30( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_width, int i_height )
161 mc_hh( src, i_src_stride, tmp, i_width, i_width, i_height );
162 pixel_avg( dst, i_dst_stride, src+1, i_src_stride, tmp, i_width, i_width, i_height );
165 static void mc_xy01( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_width, int i_height )
168 mc_hv( src, i_src_stride, tmp, i_width, i_width, i_height );
169 pixel_avg( dst, i_dst_stride, src, i_src_stride, tmp, i_width, i_width, i_height );
171 static void mc_xy03( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_width, int i_height )
174 mc_hv( src, i_src_stride, tmp, i_width, i_width, i_height );
175 pixel_avg( dst, i_dst_stride, src+i_src_stride, i_src_stride, tmp, i_width, i_width, i_height );
178 static void mc_xy11( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_width, int i_height )
183 mc_hv( src, i_src_stride, tmp1, i_width, i_width, i_height );
184 mc_hh( src, i_src_stride, tmp2, i_width, i_width, i_height );
185 pixel_avg( dst, i_dst_stride, tmp1, i_width, tmp2, i_width, i_width, i_height );
187 static void mc_xy31( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_width, int i_height )
192 mc_hv( src+1, i_src_stride, tmp1, i_width, i_width, i_height );
193 mc_hh( src, i_src_stride, tmp2, i_width, i_width, i_height );
194 pixel_avg( dst, i_dst_stride, tmp1, i_width, tmp2, i_width, i_width, i_height );
196 static void mc_xy13( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_width, int i_height )
201 mc_hv( src, i_src_stride, tmp1, i_width, i_width, i_height );
202 mc_hh( src+i_src_stride, i_src_stride, tmp2, i_width, i_width, i_height );
203 pixel_avg( dst, i_dst_stride, tmp1, i_width, tmp2, i_width, i_width, i_height );
205 static void mc_xy33( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_width, int i_height )
210 mc_hv( src+1, i_src_stride, tmp1, i_width, i_width, i_height );
211 mc_hh( src+i_src_stride, i_src_stride, tmp2, i_width, i_width, i_height );
212 pixel_avg( dst, i_dst_stride, tmp1, i_width, tmp2, i_width, i_width, i_height );
214 static void mc_xy21( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_width, int i_height )
219 mc_hc( src, i_src_stride, tmp1, i_width, i_width, i_height );
220 mc_hh( src, i_src_stride, tmp2, i_width, i_width, i_height );
221 pixel_avg( dst, i_dst_stride, tmp1, i_width, tmp2, i_width, i_width, i_height );
223 static void mc_xy12( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_width, int i_height )
228 mc_hc( src, i_src_stride, tmp1, i_width, i_width, i_height );
229 mc_hv( src, i_src_stride, tmp2, i_width, i_width, i_height );
230 pixel_avg( dst, i_dst_stride, tmp1, i_width, tmp2, i_width, i_width, i_height );
232 static void mc_xy32( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_width, int i_height )
237 mc_hc( src, i_src_stride, tmp1, i_width, i_width, i_height );
238 mc_hv( src+1, i_src_stride, tmp2, i_width, i_width, i_height );
239 pixel_avg( dst, i_dst_stride, tmp1, i_width, tmp2, i_width, i_width, i_height );
241 static void mc_xy23( uint8_t *src, int i_src_stride, uint8_t *dst, int i_dst_stride, int i_width, int i_height )
246 mc_hc( src, i_src_stride, tmp1, i_width, i_width, i_height );
247 mc_hh( src+i_src_stride, i_src_stride, tmp2, i_width, i_width, i_height );
248 pixel_avg( dst, i_dst_stride, tmp1, i_width, tmp2, i_width, i_width, i_height );
251 static void motion_compensation_luma( uint8_t *src, int i_src_stride,
252 uint8_t *dst, int i_dst_stride,
254 int i_width, int i_height )
256 static pf_mc_t pf_mc[4][4] = /*XXX [dqy][dqx] */
258 { mc_copy, mc_xy10, mc_hh, mc_xy30 },
259 { mc_xy01, mc_xy11, mc_xy21, mc_xy31 },
260 { mc_hv, mc_xy12, mc_hc, mc_xy32 },
261 { mc_xy03, mc_xy13, mc_xy23, mc_xy33 },
264 src += (mvy >> 2) * i_src_stride + (mvx >> 2);
265 pf_mc[mvy&0x03][mvx&0x03]( src, i_src_stride, dst, i_dst_stride, i_width, i_height );
268 /* full chroma mc (ie until 1/8 pixel)*/
269 static void motion_compensation_chroma( uint8_t *src, int i_src_stride,
270 uint8_t *dst, int i_dst_stride,
272 int i_width, int i_height )
277 const int d8x = mvx&0x07;
278 const int d8y = mvy&0x07;
280 const int cA = (8-d8x)*(8-d8y);
281 const int cB = d8x *(8-d8y);
282 const int cC = (8-d8x)*d8y;
283 const int cD = d8x *d8y;
285 src += (mvy >> 3) * i_src_stride + (mvx >> 3);
286 srcp = &src[i_src_stride];
288 for( y = 0; y < i_height; y++ )
290 for( x = 0; x < i_width; x++ )
292 dst[x] = ( cA*src[x] + cB*src[x+1] +
293 cC*srcp[x] + cD*srcp[x+1] + 32 ) >> 6;
298 srcp += i_src_stride;
302 void x264_mc_init( int cpu, x264_mc_function_t pf[2] )
304 pf[MC_LUMA] = motion_compensation_luma;
305 pf[MC_CHROMA] = motion_compensation_chroma;
308 if( cpu&X264_CPU_MMXEXT )
310 x264_mc_mmxext_init( pf );
314 if( cpu&X264_CPU_ALTIVEC )
316 x264_mc_altivec_init( pf );