]> git.sesse.net Git - x264/blob - common/pixel.c
cosmetics.
[x264] / common / pixel.c
1 /*****************************************************************************
2  * pixel.c: h264 encoder
3  *****************************************************************************
4  * Copyright (C) 2003 Laurent Aimar
5  * $Id: pixel.c,v 1.1 2004/06/03 19:27:07 fenrir Exp $
6  *
7  * Authors: Laurent Aimar <fenrir@via.ecp.fr>
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License as published by
11  * the Free Software Foundation; either version 2 of the License, or
12  * (at your option) any later version.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  * GNU General Public License for more details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program; if not, write to the Free Software
21  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
22  *****************************************************************************/
23
24 #ifdef HAVE_STDINT_H
25 #include <stdint.h>
26 #else
27 #include <inttypes.h>
28 #endif
29 #include <stdlib.h>
30 #include <string.h>
31 #include <stdarg.h>
32
33 #include "x264.h"
34 #include "pixel.h"
35 #include "clip1.h"
36
37 #ifdef HAVE_MMXEXT
38 #   include "i386/pixel.h"
39 #endif
40 #ifdef ARCH_PPC
41 #   include "ppc/pixel.h"
42 #endif
43
44
45 /****************************************************************************
46  * pixel_sad_WxH
47  ****************************************************************************/
48 #define PIXEL_SAD_C( name, lx, ly ) \
49 static int name( uint8_t *pix1, int i_stride_pix1,  \
50                  uint8_t *pix2, int i_stride_pix2 ) \
51 {                                                   \
52     int i_sum = 0;                                  \
53     int x, y;                                       \
54     for( y = 0; y < ly; y++ )                       \
55     {                                               \
56         for( x = 0; x < lx; x++ )                   \
57         {                                           \
58             i_sum += abs( pix1[x] - pix2[x] );      \
59         }                                           \
60         pix1 += i_stride_pix1;                      \
61         pix2 += i_stride_pix2;                      \
62     }                                               \
63     return i_sum;                                   \
64 }
65
66
67 PIXEL_SAD_C( pixel_sad_16x16, 16, 16 )
68 PIXEL_SAD_C( pixel_sad_16x8,  16,  8 )
69 PIXEL_SAD_C( pixel_sad_8x16,   8, 16 )
70 PIXEL_SAD_C( pixel_sad_8x8,    8,  8 )
71 PIXEL_SAD_C( pixel_sad_8x4,    8,  4 )
72 PIXEL_SAD_C( pixel_sad_4x8,    4,  8 )
73 PIXEL_SAD_C( pixel_sad_4x4,    4,  4 )
74
75
76 /****************************************************************************
77  * pixel_ssd_WxH
78  ****************************************************************************/
79 #define PIXEL_SSD_C( name, lx, ly ) \
80 static int name( uint8_t *pix1, int i_stride_pix1,  \
81                  uint8_t *pix2, int i_stride_pix2 ) \
82 {                                                   \
83     int i_sum = 0;                                  \
84     int x, y;                                       \
85     for( y = 0; y < ly; y++ )                       \
86     {                                               \
87         for( x = 0; x < lx; x++ )                   \
88         {                                           \
89             int d = pix1[x] - pix2[x];              \
90             i_sum += d*d;                           \
91         }                                           \
92         pix1 += i_stride_pix1;                      \
93         pix2 += i_stride_pix2;                      \
94     }                                               \
95     return i_sum;                                   \
96 }
97
98 PIXEL_SSD_C( pixel_ssd_16x16, 16, 16 )
99 PIXEL_SSD_C( pixel_ssd_16x8,  16,  8 )
100 PIXEL_SSD_C( pixel_ssd_8x16,   8, 16 )
101 PIXEL_SSD_C( pixel_ssd_8x8,    8,  8 )
102 PIXEL_SSD_C( pixel_ssd_8x4,    8,  4 )
103 PIXEL_SSD_C( pixel_ssd_4x8,    4,  8 )
104 PIXEL_SSD_C( pixel_ssd_4x4,    4,  4 )
105
106
107 static void pixel_sub_4x4( int16_t diff[4][4], uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2 )
108 {
109     int y, x;
110     for( y = 0; y < 4; y++ )
111     {
112         for( x = 0; x < 4; x++ )
113         {
114             diff[y][x] = pix1[x] - pix2[x];
115         }
116         pix1 += i_pix1;
117         pix2 += i_pix2;
118     }
119 }
120
121 static int pixel_satd_wxh( uint8_t *pix1, int i_pix1, uint8_t *pix2, int i_pix2, int i_width, int i_height )
122 {
123     int16_t tmp[4][4];
124     int16_t diff[4][4];
125     int x, y;
126     int i_satd = 0;
127
128     for( y = 0; y < i_height; y += 4 )
129     {
130         for( x = 0; x < i_width; x += 4 )
131         {
132             int d;
133
134             pixel_sub_4x4( diff, &pix1[x], i_pix1, &pix2[x], i_pix2 );
135
136             for( d = 0; d < 4; d++ )
137             {
138                 int s01, s23;
139                 int d01, d23;
140
141                 s01 = diff[d][0] + diff[d][1]; s23 = diff[d][2] + diff[d][3];
142                 d01 = diff[d][0] - diff[d][1]; d23 = diff[d][2] - diff[d][3];
143
144                 tmp[d][0] = s01 + s23;
145                 tmp[d][1] = s01 - s23;
146                 tmp[d][2] = d01 - d23;
147                 tmp[d][3] = d01 + d23;
148             }
149             for( d = 0; d < 4; d++ )
150             {
151                 int s01, s23;
152                 int d01, d23;
153
154                 s01 = tmp[0][d] + tmp[1][d]; s23 = tmp[2][d] + tmp[3][d];
155                 d01 = tmp[0][d] - tmp[1][d]; d23 = tmp[2][d] - tmp[3][d];
156
157                 i_satd += abs( s01 + s23 ) + abs( s01 - s23 ) + abs( d01 - d23 ) + abs( d01 + d23 );
158             }
159
160         }
161         pix1 += 4 * i_pix1;
162         pix2 += 4 * i_pix2;
163     }
164
165     return i_satd / 2;
166 }
167 #define PIXEL_SATD_C( name, width, height ) \
168 static int name( uint8_t *pix1, int i_stride_pix1, \
169                  uint8_t *pix2, int i_stride_pix2 ) \
170 { \
171     return pixel_satd_wxh( pix1, i_stride_pix1, pix2, i_stride_pix2, width, height ); \
172 }
173 PIXEL_SATD_C( pixel_satd_16x16, 16, 16 )
174 PIXEL_SATD_C( pixel_satd_16x8,  16, 8 )
175 PIXEL_SATD_C( pixel_satd_8x16,  8, 16 )
176 PIXEL_SATD_C( pixel_satd_8x8,   8, 8 )
177 PIXEL_SATD_C( pixel_satd_8x4,   8, 4 )
178 PIXEL_SATD_C( pixel_satd_4x8,   4, 8 )
179 PIXEL_SATD_C( pixel_satd_4x4,   4, 4 )
180
181
182 static inline void pixel_avg_wxh( uint8_t *dst, int i_dst, uint8_t *src, int i_src, int width, int height )
183 {
184     int x, y;
185     for( y = 0; y < height; y++ )
186     {
187         for( x = 0; x < width; x++ )
188         {
189             dst[x] = ( dst[x] + src[x] + 1 ) >> 1;
190         }
191         dst += i_dst;
192         src += i_src;
193     }
194 }
195
196
197 #define PIXEL_AVG_C( name, width, height ) \
198 static void name( uint8_t *pix1, int i_stride_pix1, \
199                   uint8_t *pix2, int i_stride_pix2 ) \
200 { \
201     pixel_avg_wxh( pix1, i_stride_pix1, pix2, i_stride_pix2, width, height ); \
202 }
203 PIXEL_AVG_C( pixel_avg_16x16, 16, 16 )
204 PIXEL_AVG_C( pixel_avg_16x8,  16, 8 )
205 PIXEL_AVG_C( pixel_avg_8x16,  8, 16 )
206 PIXEL_AVG_C( pixel_avg_8x8,   8, 8 )
207 PIXEL_AVG_C( pixel_avg_8x4,   8, 4 )
208 PIXEL_AVG_C( pixel_avg_4x8,   4, 8 )
209 PIXEL_AVG_C( pixel_avg_4x4,   4, 4 )
210 PIXEL_AVG_C( pixel_avg_4x2,   4, 2 )
211 PIXEL_AVG_C( pixel_avg_2x4,   2, 4 )
212 PIXEL_AVG_C( pixel_avg_2x2,   2, 2 )
213
214
215 /* Implicit weighted bipred only:
216  * assumes log2_denom = 5, offset = 0, weight1 + weight2 = 64 */
217 #define op_scale2(x) dst[x] = x264_clip_uint8( (dst[x]*i_weight1 + src[x]*i_weight2 + (1<<5)) >> 6 )
218 static inline void pixel_avg_weight_wxh( uint8_t *dst, int i_dst, uint8_t *src, int i_src, int width, int height, int i_weight1 ){
219     int y;
220     const int i_weight2 = 64 - i_weight1;
221     for(y=0; y<height; y++, dst += i_dst, src += i_src){
222         op_scale2(0);
223         op_scale2(1);
224         if(width==2) continue;
225         op_scale2(2);
226         op_scale2(3);
227         if(width==4) continue;
228         op_scale2(4);
229         op_scale2(5);
230         op_scale2(6);
231         op_scale2(7);
232         if(width==8) continue;
233         op_scale2(8);
234         op_scale2(9);
235         op_scale2(10);
236         op_scale2(11);
237         op_scale2(12);
238         op_scale2(13);
239         op_scale2(14);
240         op_scale2(15);
241     }
242 }
243
244 #define PIXEL_AVG_WEIGHT_C( width, height ) \
245 static void pixel_avg_weight_##width##x##height( \
246                 uint8_t *pix1, int i_stride_pix1, \
247                 uint8_t *pix2, int i_stride_pix2, int i_weight1 ) \
248 { \
249     pixel_avg_weight_wxh( pix1, i_stride_pix1, pix2, i_stride_pix2, width, height, i_weight1 ); \
250 }
251
252 PIXEL_AVG_WEIGHT_C(16,16)
253 PIXEL_AVG_WEIGHT_C(16,8)
254 PIXEL_AVG_WEIGHT_C(8,16)
255 PIXEL_AVG_WEIGHT_C(8,8)
256 PIXEL_AVG_WEIGHT_C(8,4)
257 PIXEL_AVG_WEIGHT_C(4,8)
258 PIXEL_AVG_WEIGHT_C(4,4)
259 PIXEL_AVG_WEIGHT_C(4,2)
260 PIXEL_AVG_WEIGHT_C(2,4)
261 PIXEL_AVG_WEIGHT_C(2,2)
262 #undef op_scale2
263 #undef PIXEL_AVG_WEIGHT_C
264
265 /****************************************************************************
266  * x264_pixel_init:
267  ****************************************************************************/
268 void x264_pixel_init( int cpu, x264_pixel_function_t *pixf )
269 {
270     pixf->sad[PIXEL_16x16] = pixel_sad_16x16;
271     pixf->sad[PIXEL_16x8]  = pixel_sad_16x8;
272     pixf->sad[PIXEL_8x16]  = pixel_sad_8x16;
273     pixf->sad[PIXEL_8x8]   = pixel_sad_8x8;
274     pixf->sad[PIXEL_8x4]   = pixel_sad_8x4;
275     pixf->sad[PIXEL_4x8]   = pixel_sad_4x8;
276     pixf->sad[PIXEL_4x4]   = pixel_sad_4x4;
277
278     pixf->ssd[PIXEL_16x16] = pixel_ssd_16x16;
279     pixf->ssd[PIXEL_16x8]  = pixel_ssd_16x8;
280     pixf->ssd[PIXEL_8x16]  = pixel_ssd_8x16;
281     pixf->ssd[PIXEL_8x8]   = pixel_ssd_8x8;
282     pixf->ssd[PIXEL_8x4]   = pixel_ssd_8x4;
283     pixf->ssd[PIXEL_4x8]   = pixel_ssd_4x8;
284     pixf->ssd[PIXEL_4x4]   = pixel_ssd_4x4;
285
286     pixf->satd[PIXEL_16x16]= pixel_satd_16x16;
287     pixf->satd[PIXEL_16x8] = pixel_satd_16x8;
288     pixf->satd[PIXEL_8x16] = pixel_satd_8x16;
289     pixf->satd[PIXEL_8x8]  = pixel_satd_8x8;
290     pixf->satd[PIXEL_8x4]  = pixel_satd_8x4;
291     pixf->satd[PIXEL_4x8]  = pixel_satd_4x8;
292     pixf->satd[PIXEL_4x4]  = pixel_satd_4x4;
293
294     pixf->avg[PIXEL_16x16]= pixel_avg_16x16;
295     pixf->avg[PIXEL_16x8] = pixel_avg_16x8;
296     pixf->avg[PIXEL_8x16] = pixel_avg_8x16;
297     pixf->avg[PIXEL_8x8]  = pixel_avg_8x8;
298     pixf->avg[PIXEL_8x4]  = pixel_avg_8x4;
299     pixf->avg[PIXEL_4x8]  = pixel_avg_4x8;
300     pixf->avg[PIXEL_4x4]  = pixel_avg_4x4;
301     pixf->avg[PIXEL_4x2]  = pixel_avg_4x2;
302     pixf->avg[PIXEL_2x4]  = pixel_avg_2x4;
303     pixf->avg[PIXEL_2x2]  = pixel_avg_2x2;
304     
305     pixf->avg_weight[PIXEL_16x16]= pixel_avg_weight_16x16;
306     pixf->avg_weight[PIXEL_16x8] = pixel_avg_weight_16x8;
307     pixf->avg_weight[PIXEL_8x16] = pixel_avg_weight_8x16;
308     pixf->avg_weight[PIXEL_8x8]  = pixel_avg_weight_8x8;
309     pixf->avg_weight[PIXEL_8x4]  = pixel_avg_weight_8x4;
310     pixf->avg_weight[PIXEL_4x8]  = pixel_avg_weight_4x8;
311     pixf->avg_weight[PIXEL_4x4]  = pixel_avg_weight_4x4;
312     pixf->avg_weight[PIXEL_4x2]  = pixel_avg_weight_4x2;
313     pixf->avg_weight[PIXEL_2x4]  = pixel_avg_weight_2x4;
314     pixf->avg_weight[PIXEL_2x2]  = pixel_avg_weight_2x2;
315
316 #ifdef HAVE_MMXEXT
317     if( cpu&X264_CPU_MMXEXT )
318     {
319         pixf->sad[PIXEL_16x16] = x264_pixel_sad_16x16_mmxext;
320         pixf->sad[PIXEL_16x8 ] = x264_pixel_sad_16x8_mmxext;
321         pixf->sad[PIXEL_8x16 ] = x264_pixel_sad_8x16_mmxext;
322         pixf->sad[PIXEL_8x8  ] = x264_pixel_sad_8x8_mmxext;
323         pixf->sad[PIXEL_8x4  ] = x264_pixel_sad_8x4_mmxext;
324         pixf->sad[PIXEL_4x8  ] = x264_pixel_sad_4x8_mmxext;
325         pixf->sad[PIXEL_4x4]   = x264_pixel_sad_4x4_mmxext;
326
327         pixf->ssd[PIXEL_16x16] = x264_pixel_ssd_16x16_mmxext;
328         pixf->ssd[PIXEL_16x8]  = x264_pixel_ssd_16x8_mmxext;
329         pixf->ssd[PIXEL_8x16]  = x264_pixel_ssd_8x16_mmxext;
330         pixf->ssd[PIXEL_8x8]   = x264_pixel_ssd_8x8_mmxext;
331         pixf->ssd[PIXEL_8x4]   = x264_pixel_ssd_8x4_mmxext;
332         pixf->ssd[PIXEL_4x8]   = x264_pixel_ssd_4x8_mmxext;
333         pixf->ssd[PIXEL_4x4]   = x264_pixel_ssd_4x4_mmxext;
334   
335         pixf->satd[PIXEL_16x16]= x264_pixel_satd_16x16_mmxext;
336         pixf->satd[PIXEL_16x8] = x264_pixel_satd_16x8_mmxext;
337         pixf->satd[PIXEL_8x16] = x264_pixel_satd_8x16_mmxext;
338         pixf->satd[PIXEL_8x8]  = x264_pixel_satd_8x8_mmxext;
339         pixf->satd[PIXEL_8x4]  = x264_pixel_satd_8x4_mmxext;
340         pixf->satd[PIXEL_4x8]  = x264_pixel_satd_4x8_mmxext;
341         pixf->satd[PIXEL_4x4]  = x264_pixel_satd_4x4_mmxext;
342     }
343 #endif
344 #ifdef ARCH_PPC
345     if( cpu&X264_CPU_ALTIVEC )
346     {
347         x264_pixel_altivec_init( pixf );
348     }
349 #endif
350 }
351