1 /*****************************************************************************
2 * pixel.c: h264 encoder
3 *****************************************************************************
4 * Copyright (C) 2003 Laurent Aimar
5 * $Id: pixel.c,v 1.1 2004/06/03 19:27:07 fenrir Exp $
7 * Authors: Laurent Aimar <fenrir@via.ecp.fr>
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
22 *****************************************************************************/
38 # include "i386/pixel.h"
41 # include "ppc/pixel.h"
45 /****************************************************************************
47 ****************************************************************************/
48 #define PIXEL_SAD_C( name, lx, ly ) \
49 static int name( uint8_t *pix1, int i_stride_pix1, \
50 uint8_t *pix2, int i_stride_pix2 ) \
54 for( y = 0; y < ly; y++ ) \
56 for( x = 0; x < lx; x++ ) \
58 i_sum += abs( pix1[x] - pix2[x] ); \
60 pix1 += i_stride_pix1; \
61 pix2 += i_stride_pix2; \
67 PIXEL_SAD_C( pixel_sad_16x16, 16, 16 )
68 PIXEL_SAD_C( pixel_sad_16x8, 16, 8 )
69 PIXEL_SAD_C( pixel_sad_8x16, 8, 16 )
70 PIXEL_SAD_C( pixel_sad_8x8, 8, 8 )
71 PIXEL_SAD_C( pixel_sad_8x4, 8, 4 )
72 PIXEL_SAD_C( pixel_sad_4x8, 4, 8 )
73 PIXEL_SAD_C( pixel_sad_4x4, 4, 4 )
76 /****************************************************************************
78 ****************************************************************************/
79 #define PIXEL_SSD_C( name, lx, ly ) \
80 static int name( uint8_t *pix1, int i_stride_pix1, \
81 uint8_t *pix2, int i_stride_pix2 ) \
85 for( y = 0; y < ly; y++ ) \
87 for( x = 0; x < lx; x++ ) \
89 int d = pix1[x] - pix2[x]; \
92 pix1 += i_stride_pix1; \
93 pix2 += i_stride_pix2; \
98 PIXEL_SSD_C( pixel_ssd_16x16, 16, 16 )
99 PIXEL_SSD_C( pixel_ssd_16x8, 16, 8 )
100 PIXEL_SSD_C( pixel_ssd_8x16, 8, 16 )
101 PIXEL_SSD_C( pixel_ssd_8x8, 8, 8 )
102 PIXEL_SSD_C( pixel_ssd_8x4, 8, 4 )
103 PIXEL_SSD_C( pixel_ssd_4x8, 4, 8 )
104 PIXEL_SSD_C( pixel_ssd_4x4, 4, 4 )
107 static void pixel_sub_4x4( int16_t diff[4][4], uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 )
110 for( y = 0; y < 4; y++ )
112 for( x = 0; x < 4; x++ )
114 diff[y][x] = pix1[x] - pix2[x];
121 static int pixel_satd_wxh( uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2, int i_width, int i_height )
128 for( y = 0; y < i_height; y += 4 )
130 for( x = 0; x < i_width; x += 4 )
134 pixel_sub_4x4( diff, &pix1[x], i_pix1, &pix2[x], i_pix2 );
136 for( d = 0; d < 4; d++ )
141 s01 = diff[d][0] + diff[d][1]; s23 = diff[d][2] + diff[d][3];
142 d01 = diff[d][0] - diff[d][1]; d23 = diff[d][2] - diff[d][3];
144 tmp[d][0] = s01 + s23;
145 tmp[d][1] = s01 - s23;
146 tmp[d][2] = d01 - d23;
147 tmp[d][3] = d01 + d23;
149 for( d = 0; d < 4; d++ )
154 s01 = tmp[0][d] + tmp[1][d]; s23 = tmp[2][d] + tmp[3][d];
155 d01 = tmp[0][d] - tmp[1][d]; d23 = tmp[2][d] - tmp[3][d];
157 i_satd += abs( s01 + s23 ) + abs( s01 - s23 ) + abs( d01 - d23 ) + abs( d01 + d23 );
167 #define PIXEL_SATD_C( name, width, height ) \
168 static int name( uint8_t *pix1, int i_stride_pix1, \
169 uint8_t *pix2, int i_stride_pix2 ) \
171 return pixel_satd_wxh( pix1, i_stride_pix1, pix2, i_stride_pix2, width, height ); \
173 PIXEL_SATD_C( pixel_satd_16x16, 16, 16 )
174 PIXEL_SATD_C( pixel_satd_16x8, 16, 8 )
175 PIXEL_SATD_C( pixel_satd_8x16, 8, 16 )
176 PIXEL_SATD_C( pixel_satd_8x8, 8, 8 )
177 PIXEL_SATD_C( pixel_satd_8x4, 8, 4 )
178 PIXEL_SATD_C( pixel_satd_4x8, 4, 8 )
179 PIXEL_SATD_C( pixel_satd_4x4, 4, 4 )
182 static inline void pixel_avg_wxh( uint8_t *dst, int i_dst, uint8_t *src, int i_src, int width, int height )
185 for( y = 0; y < height; y++ )
187 for( x = 0; x < width; x++ )
189 dst[x] = ( dst[x] + src[x] + 1 ) >> 1;
197 #define PIXEL_AVG_C( name, width, height ) \
198 static void name( uint8_t *pix1, int i_stride_pix1, \
199 uint8_t *pix2, int i_stride_pix2 ) \
201 pixel_avg_wxh( pix1, i_stride_pix1, pix2, i_stride_pix2, width, height ); \
203 PIXEL_AVG_C( pixel_avg_16x16, 16, 16 )
204 PIXEL_AVG_C( pixel_avg_16x8, 16, 8 )
205 PIXEL_AVG_C( pixel_avg_8x16, 8, 16 )
206 PIXEL_AVG_C( pixel_avg_8x8, 8, 8 )
207 PIXEL_AVG_C( pixel_avg_8x4, 8, 4 )
208 PIXEL_AVG_C( pixel_avg_4x8, 4, 8 )
209 PIXEL_AVG_C( pixel_avg_4x4, 4, 4 )
210 PIXEL_AVG_C( pixel_avg_4x2, 4, 2 )
211 PIXEL_AVG_C( pixel_avg_2x4, 2, 4 )
212 PIXEL_AVG_C( pixel_avg_2x2, 2, 2 )
215 /* Implicit weighted bipred only:
216 * assumes log2_denom = 5, offset = 0, weight1 + weight2 = 64 */
217 #define op_scale2(x) dst[x] = x264_clip_uint8( (dst[x]*i_weight1 + src[x]*i_weight2 + (1<<5)) >> 6 )
218 static inline void pixel_avg_weight_wxh( uint8_t *dst, int i_dst, uint8_t *src, int i_src, int width, int height, int i_weight1 ){
220 const int i_weight2 = 64 - i_weight1;
221 for(y=0; y<height; y++, dst += i_dst, src += i_src){
224 if(width==2) continue;
227 if(width==4) continue;
232 if(width==8) continue;
244 #define PIXEL_AVG_WEIGHT_C( width, height ) \
245 static void pixel_avg_weight_##width##x##height( \
246 uint8_t *pix1, int i_stride_pix1, \
247 uint8_t *pix2, int i_stride_pix2, int i_weight1 ) \
249 pixel_avg_weight_wxh( pix1, i_stride_pix1, pix2, i_stride_pix2, width, height, i_weight1 ); \
252 PIXEL_AVG_WEIGHT_C(16,16)
253 PIXEL_AVG_WEIGHT_C(16,8)
254 PIXEL_AVG_WEIGHT_C(8,16)
255 PIXEL_AVG_WEIGHT_C(8,8)
256 PIXEL_AVG_WEIGHT_C(8,4)
257 PIXEL_AVG_WEIGHT_C(4,8)
258 PIXEL_AVG_WEIGHT_C(4,4)
259 PIXEL_AVG_WEIGHT_C(4,2)
260 PIXEL_AVG_WEIGHT_C(2,4)
261 PIXEL_AVG_WEIGHT_C(2,2)
263 #undef PIXEL_AVG_WEIGHT_C
265 /****************************************************************************
267 ****************************************************************************/
268 void x264_pixel_init( int cpu, x264_pixel_function_t *pixf )
270 pixf->sad[PIXEL_16x16] = pixel_sad_16x16;
271 pixf->sad[PIXEL_16x8] = pixel_sad_16x8;
272 pixf->sad[PIXEL_8x16] = pixel_sad_8x16;
273 pixf->sad[PIXEL_8x8] = pixel_sad_8x8;
274 pixf->sad[PIXEL_8x4] = pixel_sad_8x4;
275 pixf->sad[PIXEL_4x8] = pixel_sad_4x8;
276 pixf->sad[PIXEL_4x4] = pixel_sad_4x4;
278 pixf->ssd[PIXEL_16x16] = pixel_ssd_16x16;
279 pixf->ssd[PIXEL_16x8] = pixel_ssd_16x8;
280 pixf->ssd[PIXEL_8x16] = pixel_ssd_8x16;
281 pixf->ssd[PIXEL_8x8] = pixel_ssd_8x8;
282 pixf->ssd[PIXEL_8x4] = pixel_ssd_8x4;
283 pixf->ssd[PIXEL_4x8] = pixel_ssd_4x8;
284 pixf->ssd[PIXEL_4x4] = pixel_ssd_4x4;
286 pixf->satd[PIXEL_16x16]= pixel_satd_16x16;
287 pixf->satd[PIXEL_16x8] = pixel_satd_16x8;
288 pixf->satd[PIXEL_8x16] = pixel_satd_8x16;
289 pixf->satd[PIXEL_8x8] = pixel_satd_8x8;
290 pixf->satd[PIXEL_8x4] = pixel_satd_8x4;
291 pixf->satd[PIXEL_4x8] = pixel_satd_4x8;
292 pixf->satd[PIXEL_4x4] = pixel_satd_4x4;
294 pixf->avg[PIXEL_16x16]= pixel_avg_16x16;
295 pixf->avg[PIXEL_16x8] = pixel_avg_16x8;
296 pixf->avg[PIXEL_8x16] = pixel_avg_8x16;
297 pixf->avg[PIXEL_8x8] = pixel_avg_8x8;
298 pixf->avg[PIXEL_8x4] = pixel_avg_8x4;
299 pixf->avg[PIXEL_4x8] = pixel_avg_4x8;
300 pixf->avg[PIXEL_4x4] = pixel_avg_4x4;
301 pixf->avg[PIXEL_4x2] = pixel_avg_4x2;
302 pixf->avg[PIXEL_2x4] = pixel_avg_2x4;
303 pixf->avg[PIXEL_2x2] = pixel_avg_2x2;
305 pixf->avg_weight[PIXEL_16x16]= pixel_avg_weight_16x16;
306 pixf->avg_weight[PIXEL_16x8] = pixel_avg_weight_16x8;
307 pixf->avg_weight[PIXEL_8x16] = pixel_avg_weight_8x16;
308 pixf->avg_weight[PIXEL_8x8] = pixel_avg_weight_8x8;
309 pixf->avg_weight[PIXEL_8x4] = pixel_avg_weight_8x4;
310 pixf->avg_weight[PIXEL_4x8] = pixel_avg_weight_4x8;
311 pixf->avg_weight[PIXEL_4x4] = pixel_avg_weight_4x4;
312 pixf->avg_weight[PIXEL_4x2] = pixel_avg_weight_4x2;
313 pixf->avg_weight[PIXEL_2x4] = pixel_avg_weight_2x4;
314 pixf->avg_weight[PIXEL_2x2] = pixel_avg_weight_2x2;
317 if( cpu&X264_CPU_MMXEXT )
319 pixf->sad[PIXEL_16x16] = x264_pixel_sad_16x16_mmxext;
320 pixf->sad[PIXEL_16x8 ] = x264_pixel_sad_16x8_mmxext;
321 pixf->sad[PIXEL_8x16 ] = x264_pixel_sad_8x16_mmxext;
322 pixf->sad[PIXEL_8x8 ] = x264_pixel_sad_8x8_mmxext;
323 pixf->sad[PIXEL_8x4 ] = x264_pixel_sad_8x4_mmxext;
324 pixf->sad[PIXEL_4x8 ] = x264_pixel_sad_4x8_mmxext;
325 pixf->sad[PIXEL_4x4] = x264_pixel_sad_4x4_mmxext;
327 pixf->ssd[PIXEL_16x16] = x264_pixel_ssd_16x16_mmxext;
328 pixf->ssd[PIXEL_16x8] = x264_pixel_ssd_16x8_mmxext;
329 pixf->ssd[PIXEL_8x16] = x264_pixel_ssd_8x16_mmxext;
330 pixf->ssd[PIXEL_8x8] = x264_pixel_ssd_8x8_mmxext;
331 pixf->ssd[PIXEL_8x4] = x264_pixel_ssd_8x4_mmxext;
332 pixf->ssd[PIXEL_4x8] = x264_pixel_ssd_4x8_mmxext;
333 pixf->ssd[PIXEL_4x4] = x264_pixel_ssd_4x4_mmxext;
335 pixf->satd[PIXEL_16x16]= x264_pixel_satd_16x16_mmxext;
336 pixf->satd[PIXEL_16x8] = x264_pixel_satd_16x8_mmxext;
337 pixf->satd[PIXEL_8x16] = x264_pixel_satd_8x16_mmxext;
338 pixf->satd[PIXEL_8x8] = x264_pixel_satd_8x8_mmxext;
339 pixf->satd[PIXEL_8x4] = x264_pixel_satd_8x4_mmxext;
340 pixf->satd[PIXEL_4x8] = x264_pixel_satd_4x8_mmxext;
341 pixf->satd[PIXEL_4x4] = x264_pixel_satd_4x4_mmxext;
345 if( cpu&X264_CPU_ALTIVEC )
347 x264_pixel_altivec_init( pixf );