1 /*****************************************************************************
2 * macroblock.c: h264 encoder library
3 *****************************************************************************
4 * Copyright (C) 2003 Laurent Aimar
5 * $Id: macroblock.c,v 1.1 2004/06/03 19:27:08 fenrir Exp $
7 * Authors: Laurent Aimar <fenrir@via.ecp.fr>
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
22 *****************************************************************************/
28 #include "common/common.h"
29 #include "macroblock.h"
32 static const uint8_t block_idx_x[16] =
34 0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, 2, 3, 2, 3
36 static const uint8_t block_idx_y[16] =
38 0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3
40 static const uint8_t block_idx_xy[4][4] =
48 static const int quant_mf[6][4][4] =
50 { { 13107, 8066, 13107, 8066 }, { 8066, 5243, 8066, 5243 },
51 { 13107, 8066, 13107, 8066 }, { 8066, 5243, 8066, 5243 } },
52 { { 11916, 7490, 11916, 7490 }, { 7490, 4660, 7490, 4660 },
53 { 11916, 7490, 11916, 7490 }, { 7490, 4660, 7490, 4660 } },
54 { { 10082, 6554, 10082, 6554 }, { 6554, 4194, 6554, 4194 },
55 { 10082, 6554, 10082, 6554 }, { 6554, 4194, 6554, 4194 } },
56 { { 9362, 5825, 9362, 5825 }, { 5825, 3647, 5825, 3647 },
57 { 9362, 5825, 9362, 5825 }, { 5825, 3647, 5825, 3647 } },
58 { { 8192, 5243, 8192, 5243 }, { 5243, 3355, 5243, 3355 },
59 { 8192, 5243, 8192, 5243 }, { 5243, 3355, 5243, 3355 } },
60 { { 7282, 4559, 7282, 4559 }, { 4559, 2893, 4559, 2893 },
61 { 7282, 4559, 7282, 4559 }, { 4559, 2893, 4559, 2893 } }
64 const int quant8_mf[6][8][8] =
67 { 13107, 12222, 16777, 12222, 13107, 12222, 16777, 12222 },
68 { 12222, 11428, 15481, 11428, 12222, 11428, 15481, 11428 },
69 { 16777, 15481, 20972, 15481, 16777, 15481, 20972, 15481 },
70 { 12222, 11428, 15481, 11428, 12222, 11428, 15481, 11428 },
71 { 13107, 12222, 16777, 12222, 13107, 12222, 16777, 12222 },
72 { 12222, 11428, 15481, 11428, 12222, 11428, 15481, 11428 },
73 { 16777, 15481, 20972, 15481, 16777, 15481, 20972, 15481 },
74 { 12222, 11428, 15481, 11428, 12222, 11428, 15481, 11428 }
76 { 11916, 11058, 14980, 11058, 11916, 11058, 14980, 11058 },
77 { 11058, 10826, 14290, 10826, 11058, 10826, 14290, 10826 },
78 { 14980, 14290, 19174, 14290, 14980, 14290, 19174, 14290 },
79 { 11058, 10826, 14290, 10826, 11058, 10826, 14290, 10826 },
80 { 11916, 11058, 14980, 11058, 11916, 11058, 14980, 11058 },
81 { 11058, 10826, 14290, 10826, 11058, 10826, 14290, 10826 },
82 { 14980, 14290, 19174, 14290, 14980, 14290, 19174, 14290 },
83 { 11058, 10826, 14290, 10826, 11058, 10826, 14290, 10826 }
85 { 10082, 9675, 12710, 9675, 10082, 9675, 12710, 9675 },
86 { 9675, 8943, 11985, 8943, 9675, 8943, 11985, 8943 },
87 { 12710, 11985, 15978, 11985, 12710, 11985, 15978, 11985 },
88 { 9675, 8943, 11985, 8943, 9675, 8943, 11985, 8943 },
89 { 10082, 9675, 12710, 9675, 10082, 9675, 12710, 9675 },
90 { 9675, 8943, 11985, 8943, 9675, 8943, 11985, 8943 },
91 { 12710, 11985, 15978, 11985, 12710, 11985, 15978, 11985 },
92 { 9675, 8943, 11985, 8943, 9675, 8943, 11985, 8943 }
94 { 9362, 8931, 11984, 8931, 9362, 8931, 11984, 8931 },
95 { 8931, 8228, 11259, 8228, 8931, 8228, 11259, 8228 },
96 { 11984, 11259, 14913, 11259, 11984, 11259, 14913, 11259 },
97 { 8931, 8228, 11259, 8228, 8931, 8228, 11259, 8228 },
98 { 9362, 8931, 11984, 8931, 9362, 8931, 11984, 8931 },
99 { 8931, 8228, 11259, 8228, 8931, 8228, 11259, 8228 },
100 { 11984, 11259, 14913, 11259, 11984, 11259, 14913, 11259 },
101 { 8931, 8228, 11259, 8228, 8931, 8228, 11259, 8228 }
103 { 8192, 7740, 10486, 7740, 8192, 7740, 10486, 7740 },
104 { 7740, 7346, 9777, 7346, 7740, 7346, 9777, 7346 },
105 { 10486, 9777, 13159, 9777, 10486, 9777, 13159, 9777 },
106 { 7740, 7346, 9777, 7346, 7740, 7346, 9777, 7346 },
107 { 8192, 7740, 10486, 7740, 8192, 7740, 10486, 7740 },
108 { 7740, 7346, 9777, 7346, 7740, 7346, 9777, 7346 },
109 { 10486, 9777, 13159, 9777, 10486, 9777, 13159, 9777 },
110 { 7740, 7346, 9777, 7346, 7740, 7346, 9777, 7346 }
112 { 7282, 6830, 9118, 6830, 7282, 6830, 9118, 6830 },
113 { 6830, 6428, 8640, 6428, 6830, 6428, 8640, 6428 },
114 { 9118, 8640, 11570, 8640, 9118, 8640, 11570, 8640 },
115 { 6830, 6428, 8640, 6428, 6830, 6428, 8640, 6428 },
116 { 7282, 6830, 9118, 6830, 7282, 6830, 9118, 6830 },
117 { 6830, 6428, 8640, 6428, 6830, 6428, 8640, 6428 },
118 { 9118, 8640, 11570, 8640, 9118, 8640, 11570, 8640 },
119 { 6830, 6428, 8640, 6428, 6830, 6428, 8640, 6428 }
123 static const int i_chroma_qp_table[52] =
125 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
126 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
127 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
128 29, 30, 31, 32, 32, 33, 34, 34, 35, 35,
129 36, 36, 37, 37, 37, 38, 38, 38, 39, 39,
133 /****************************************************************************
134 * Scan and Quant functions
135 ****************************************************************************/
136 //static const int scan_zigzag_x[16]={0, 1, 0, 0, 1, 2, 3, 2, 1, 0, 1, 2, 3, 3, 2, 3};
137 //static const int scan_zigzag_y[16]={0, 0, 1, 2, 1, 0, 0, 1, 2, 3, 3, 2, 1, 2, 3, 3};
139 #define ZIG(i,y,x) level[i] = dct[y][x];
140 static inline void scan_zigzag_8x8full( int level[64], int16_t dct[8][8] )
142 ZIG( 0,0,0) ZIG( 1,0,1) ZIG( 2,1,0) ZIG( 3,2,0)
143 ZIG( 4,1,1) ZIG( 5,0,2) ZIG( 6,0,3) ZIG( 7,1,2)
144 ZIG( 8,2,1) ZIG( 9,3,0) ZIG(10,4,0) ZIG(11,3,1)
145 ZIG(12,2,2) ZIG(13,1,3) ZIG(14,0,4) ZIG(15,0,5)
146 ZIG(16,1,4) ZIG(17,2,3) ZIG(18,3,2) ZIG(19,4,1)
147 ZIG(20,5,0) ZIG(21,6,0) ZIG(22,5,1) ZIG(23,4,2)
148 ZIG(24,3,3) ZIG(25,2,4) ZIG(26,1,5) ZIG(27,0,6)
149 ZIG(28,0,7) ZIG(29,1,6) ZIG(30,2,5) ZIG(31,3,4)
150 ZIG(32,4,3) ZIG(33,5,2) ZIG(34,6,1) ZIG(35,7,0)
151 ZIG(36,7,1) ZIG(37,6,2) ZIG(38,5,3) ZIG(39,4,4)
152 ZIG(40,3,5) ZIG(41,2,6) ZIG(42,1,7) ZIG(43,2,7)
153 ZIG(44,3,6) ZIG(45,4,5) ZIG(46,5,4) ZIG(47,6,3)
154 ZIG(48,7,2) ZIG(49,7,3) ZIG(50,6,4) ZIG(51,5,5)
155 ZIG(52,4,6) ZIG(53,3,7) ZIG(54,4,7) ZIG(55,5,6)
156 ZIG(56,6,5) ZIG(57,7,4) ZIG(58,7,5) ZIG(59,6,6)
157 ZIG(60,5,7) ZIG(61,6,7) ZIG(62,7,6) ZIG(63,7,7)
159 static inline void scan_zigzag_4x4full( int level[16], int16_t dct[4][4] )
161 ZIG( 0,0,0) ZIG( 1,0,1) ZIG( 2,1,0) ZIG( 3,2,0)
162 ZIG( 4,1,1) ZIG( 5,0,2) ZIG( 6,0,3) ZIG( 7,1,2)
163 ZIG( 8,2,1) ZIG( 9,3,0) ZIG(10,3,1) ZIG(11,2,2)
164 ZIG(12,1,3) ZIG(13,2,3) ZIG(14,3,2) ZIG(15,3,3)
166 static inline void scan_zigzag_4x4( int level[15], int16_t dct[4][4] )
168 ZIG( 0,0,1) ZIG( 1,1,0) ZIG( 2,2,0)
169 ZIG( 3,1,1) ZIG( 4,0,2) ZIG( 5,0,3) ZIG( 6,1,2)
170 ZIG( 7,2,1) ZIG( 8,3,0) ZIG( 9,3,1) ZIG(10,2,2)
171 ZIG(11,1,3) ZIG(12,2,3) ZIG(13,3,2) ZIG(14,3,3)
173 static inline void scan_zigzag_2x2_dc( int level[4], int16_t dct[2][2] )
182 #define ZIG(i,y,x) {\
183 int o = x+y*i_stride;\
184 level[i] = p_src[o] - p_dst[o];\
185 p_dst[o] = p_src[o];\
187 static inline void sub_zigzag_4x4full( int level[16], const uint8_t *p_src, uint8_t *p_dst, int i_stride )
189 ZIG( 0,0,0) ZIG( 1,0,1) ZIG( 2,1,0) ZIG( 3,2,0)
190 ZIG( 4,1,1) ZIG( 5,0,2) ZIG( 6,0,3) ZIG( 7,1,2)
191 ZIG( 8,2,1) ZIG( 9,3,0) ZIG(10,3,1) ZIG(11,2,2)
192 ZIG(12,1,3) ZIG(13,2,3) ZIG(14,3,2) ZIG(15,3,3)
194 static inline void sub_zigzag_4x4( int level[15], const uint8_t *p_src, uint8_t *p_dst, int i_stride )
196 ZIG( 0,0,1) ZIG( 1,1,0) ZIG( 2,2,0)
197 ZIG( 3,1,1) ZIG( 4,0,2) ZIG( 5,0,3) ZIG( 6,1,2)
198 ZIG( 7,2,1) ZIG( 8,3,0) ZIG( 9,3,1) ZIG(10,2,2)
199 ZIG(11,1,3) ZIG(12,2,3) ZIG(13,3,2) ZIG(14,3,3)
203 static void quant_8x8( int16_t dct[8][8], int i_qscale, int b_intra )
205 const int i_qbits = 16 + i_qscale / 6;
206 const int i_mf = i_qscale % 6;
207 const int f = ( 1 << i_qbits ) / ( b_intra ? 3 : 6 );
210 for( y = 0; y < 8; y++ )
212 for( x = 0; x < 8; x++ )
215 dct[y][x] = ( f + dct[y][x] * quant8_mf[i_mf][y][x] ) >> i_qbits;
217 dct[y][x] = - ( ( f - dct[y][x] * quant8_mf[i_mf][y][x] ) >> i_qbits );
221 static void quant_4x4( int16_t dct[4][4], int i_qscale, int b_intra )
223 const int i_qbits = 15 + i_qscale / 6;
224 const int i_mf = i_qscale % 6;
225 const int f = ( 1 << i_qbits ) / ( b_intra ? 3 : 6 );
228 for( y = 0; y < 4; y++ )
230 for( x = 0; x < 4; x++ )
233 dct[y][x] = ( f + dct[y][x] * quant_mf[i_mf][y][x] ) >> i_qbits;
235 dct[y][x] = - ( ( f - dct[y][x] * quant_mf[i_mf][y][x] ) >> i_qbits );
239 static void quant_4x4_dc( int16_t dct[4][4], int i_qscale )
241 const int i_qbits = 15 + i_qscale / 6;
242 const int f2 = ( 2 << i_qbits ) / 3;
243 const int i_qmf = quant_mf[i_qscale%6][0][0];
246 for( y = 0; y < 4; y++ )
248 for( x = 0; x < 4; x++ )
251 dct[y][x] =( f2 + dct[y][x] * i_qmf) >> ( 1 + i_qbits );
253 dct[y][x] = - ( ( f2 - dct[y][x] * i_qmf ) >> (1 + i_qbits ) );
257 static void quant_2x2_dc( int16_t dct[2][2], int i_qscale, int b_intra )
259 int const i_qbits = 15 + i_qscale / 6;
260 const int f2 = ( 2 << i_qbits ) / ( b_intra ? 3 : 6 );
261 const int i_qmf = quant_mf[i_qscale%6][0][0];
264 for( y = 0; y < 2; y++ )
266 for( x = 0; x < 2; x++ )
269 dct[y][x] =( f2 + dct[y][x] * i_qmf) >> ( 1 + i_qbits );
271 dct[y][x] = - ( ( f2 - dct[y][x] * i_qmf ) >> (1 + i_qbits ) );
277 static const int f_deadzone_intra[4][4][2] = /* [num][den] */
279 { {1,2}, {3,7}, {2,5}, {1,3} },
280 { {3,7}, {2,5}, {1,3}, {1,4} },
281 { {2,5}, {1,3}, {1,4}, {1,5} },
282 { {1,3}, {1,4}, {1,5}, {1,5} }
284 static const int f_deadzone_inter[4][4][2] = /* [num][den] */
286 { {1,3}, {2,7}, {4,15},{2,9} },
287 { {2,7}, {4,15},{2,9}, {1,6} },
288 { {4,15},{2,9}, {1,6}, {1,7} },
289 { {2,9}, {1,6}, {1,7}, {2,15} }
293 static void quant_4x4( int16_t dct[4][4], int i_qscale, int b_intra )
295 const int(*f_deadzone)[4][4][2] = b_intra ? &f_deadzone_intra : &f_deadzone_inter;
296 const int i_qbits = 15 + i_qscale / 6;
297 const int i_mf = i_qscale % 6;
300 for( y = 0; y < 4; y++ )
302 for( x = 0; x < 4; x++ )
305 const int f = b_intra ?
306 (f_deadzone_intra[y][x][0] * ( 1 << i_qbits ) / f_deadzone_intra[y][x][1])
308 (f_deadzone_inter[y][x][0] * ( 1 << i_qbits ) / f_deadzone_inter[y][x][1]);
310 const int f = (*f_deadzone)[y][x][0] * ( 1 << i_qbits ) / (*f_deadzone)[y][x][1];
315 dct[y][x] =( f + dct[y][x] * quant_mf[i_mf][y][x] ) >> i_qbits;
319 dct[y][x] = - ( ( f - dct[y][x] * quant_mf[i_mf][y][x] ) >> i_qbits );
325 static void quant_4x4_dc( int16_t dct[4][4], int i_qscale )
327 const int i_qbits = 15 + i_qscale / 6;
328 const int i_qmf = quant_mf[i_qscale%6][0][0];
329 const int f2 = f_deadzone_intra[0][0][0] * ( 2 << i_qbits ) / f_deadzone_intra[0][0][1];
332 for( y = 0; y < 4; y++ )
334 for( x = 0; x < 4; x++ )
339 dct[y][x] =( f2 + dct[y][x] * i_qmf) >> ( 1 + i_qbits );
343 dct[y][x] = - ( ( f2 - dct[y][x] * i_qmf ) >> (1 + i_qbits ) );
349 static void quant_2x2_dc( int16_t dct[2][2], int i_qscale, int b_intra )
351 int const i_qbits = 15 + i_qscale / 6;
352 const int i_qmf = quant_mf[i_qscale%6][0][0];
353 const int f2 = b_intra ?
354 (f_deadzone_intra[0][0][0] * ( 2 << i_qbits ) / f_deadzone_intra[0][0][1])
356 (f_deadzone_inter[0][0][0] * ( 2 << i_qbits ) / f_deadzone_inter[0][0][1]);
358 for( y = 0; y < 2; y++ )
360 for( x = 0; x < 2; x++ )
364 dct[y][x] =( f2 + dct[y][x] * i_qmf) >> ( 1 + i_qbits );
368 dct[y][x] = - ( ( f2 - dct[y][x] * i_qmf ) >> (1 + i_qbits ) );
378 * x264_mb_decimate_score: given dct coeffs it returns a score to see if we could empty this dct coeffs
379 * to 0 (low score means set it to null)
380 * Used in inter macroblock (luma and chroma)
381 * luma: for a 8x8 block: if score < 4 -> null
382 * for the complete mb: if score < 6 -> null
383 * chroma: for the complete mb: if score < 7 -> null
385 static int x264_mb_decimate_score( int *dct, int i_max )
387 static const int i_ds_table4[16] = {
388 3,2,2,1,1,1,0,0,0,0,0,0,0,0,0,0 };
389 static const int i_ds_table8[64] = {
390 3,3,3,3,2,2,2,2,2,2,2,2,1,1,1,1,
391 1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,
392 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
393 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 };
395 const int *ds_table = (i_max == 64) ? i_ds_table8 : i_ds_table4;
399 while( idx >= 0 && dct[idx] == 0 )
406 if( abs( dct[idx--] ) > 1 )
410 while( idx >= 0 && dct[idx] == 0 )
415 i_score += ds_table[i_run];
421 void x264_mb_encode_i4x4( x264_t *h, int idx, int i_qscale )
423 const int i_stride = h->mb.pic.i_stride[0];
424 const int i_offset = 4 * block_idx_x[idx] + 4 * block_idx_y[idx] * i_stride;
425 uint8_t *p_src = &h->mb.pic.p_fenc[0][i_offset];
426 uint8_t *p_dst = &h->mb.pic.p_fdec[0][i_offset];
427 int16_t dct4x4[4][4];
429 if( h->mb.b_lossless )
431 sub_zigzag_4x4full( h->dct.block[idx].luma4x4, p_src, p_dst, i_stride );
435 h->dctf.sub4x4_dct( dct4x4, p_src, i_stride, p_dst, i_stride );
436 quant_4x4( dct4x4, i_qscale, 1 );
437 scan_zigzag_4x4full( h->dct.block[idx].luma4x4, dct4x4 );
438 x264_mb_dequant_4x4( dct4x4, i_qscale );
440 /* output samples to fdec */
441 h->dctf.add4x4_idct( p_dst, i_stride, dct4x4 );
444 void x264_mb_encode_i8x8( x264_t *h, int idx, int i_qscale )
446 const int i_stride = h->mb.pic.i_stride[0];
447 const int i_offset = 8 * (idx&1) + 8 * (idx>>1) * i_stride;
448 uint8_t *p_src = &h->mb.pic.p_fenc[0][i_offset];
449 uint8_t *p_dst = &h->mb.pic.p_fdec[0][i_offset];
450 int16_t dct8x8[8][8];
452 h->dctf.sub8x8_dct8( dct8x8, p_src, i_stride, p_dst, i_stride );
453 quant_8x8( dct8x8, i_qscale, 1 );
454 scan_zigzag_8x8full( h->dct.luma8x8[idx], dct8x8 );
455 x264_mb_dequant_8x8( dct8x8, i_qscale );
456 h->dctf.add8x8_idct8( p_dst, i_stride, dct8x8 );
459 static void x264_mb_encode_i16x16( x264_t *h, int i_qscale )
461 const int i_stride = h->mb.pic.i_stride[0];
462 uint8_t *p_src = h->mb.pic.p_fenc[0];
463 uint8_t *p_dst = h->mb.pic.p_fdec[0];
465 int16_t dct4x4[16+1][4][4];
469 if( h->mb.b_lossless )
471 for( i = 0; i < 16; i++ )
473 int o = block_idx_x[i]*4 + block_idx_y[i]*4*i_stride;
474 sub_zigzag_4x4( h->dct.block[i].residual_ac, p_src+o, p_dst+o, i_stride );
475 dct4x4[0][block_idx_y[i]][block_idx_x[i]] = p_src[o] - p_dst[o];
478 scan_zigzag_4x4full( h->dct.luma16x16_dc, dct4x4[0] );
482 h->dctf.sub16x16_dct( &dct4x4[1], p_src, i_stride, p_dst, i_stride );
483 for( i = 0; i < 16; i++ )
486 dct4x4[0][block_idx_y[i]][block_idx_x[i]] = dct4x4[1+i][0][0];
488 /* quant/scan/dequant */
489 quant_4x4( dct4x4[1+i], i_qscale, 1 );
490 scan_zigzag_4x4( h->dct.block[i].residual_ac, dct4x4[1+i] );
491 x264_mb_dequant_4x4( dct4x4[1+i], i_qscale );
494 h->dctf.dct4x4dc( dct4x4[0] );
495 quant_4x4_dc( dct4x4[0], i_qscale );
496 scan_zigzag_4x4full( h->dct.luma16x16_dc, dct4x4[0] );
498 /* output samples to fdec */
499 h->dctf.idct4x4dc( dct4x4[0] );
500 x264_mb_dequant_4x4_dc( dct4x4[0], i_qscale ); /* XXX not inversed */
502 /* calculate dct coeffs */
503 for( i = 0; i < 16; i++ )
506 dct4x4[1+i][0][0] = dct4x4[0][block_idx_y[i]][block_idx_x[i]];
508 /* put pixels to fdec */
509 h->dctf.add16x16_idct( p_dst, i_stride, &dct4x4[1] );
512 static void x264_mb_encode_8x8_chroma( x264_t *h, int b_inter, int i_qscale )
516 for( ch = 0; ch < 2; ch++ )
518 const int i_stride = h->mb.pic.i_stride[1+ch];
519 uint8_t *p_src = h->mb.pic.p_fenc[1+ch];
520 uint8_t *p_dst = h->mb.pic.p_fdec[1+ch];
521 int i_decimate_score = 0;
523 int16_t dct2x2[2][2];
524 int16_t dct4x4[4][4][4];
526 if( h->mb.b_lossless )
528 for( i = 0; i < 4; i++ )
530 int o = block_idx_x[i]*4 + block_idx_y[i]*4*i_stride;
531 sub_zigzag_4x4( h->dct.block[16+i+ch*4].residual_ac, p_src+o, p_dst+o, i_stride );
532 h->dct.chroma_dc[ch][i] = p_src[o] - p_dst[o];
538 h->dctf.sub8x8_dct( dct4x4, p_src, i_stride, p_dst, i_stride );
539 /* calculate dct coeffs */
540 for( i = 0; i < 4; i++ )
543 dct2x2[block_idx_y[i]][block_idx_x[i]] = dct4x4[i][0][0];
545 quant_4x4( dct4x4[i], i_qscale, b_inter ? 0 : 1 );
546 scan_zigzag_4x4( h->dct.block[16+i+ch*4].residual_ac, dct4x4[i] );
547 x264_mb_dequant_4x4( dct4x4[i], i_qscale );
551 i_decimate_score += x264_mb_decimate_score( h->dct.block[16+i+ch*4].residual_ac, 15 );
555 h->dctf.dct2x2dc( dct2x2 );
556 quant_2x2_dc( dct2x2, i_qscale, b_inter ? 0 : 1 );
557 scan_zigzag_2x2_dc( h->dct.chroma_dc[ch], dct2x2 );
559 /* output samples to fdec */
560 h->dctf.idct2x2dc( dct2x2 );
561 x264_mb_dequant_2x2_dc( dct2x2, i_qscale ); /* XXX not inversed */
563 if( b_inter && i_decimate_score < 7 )
565 /* Near null chroma 8x8 block so make it null (bits saving) */
566 for( i = 0; i < 4; i++ )
569 for( x = 0; x < 15; x++ )
571 h->dct.block[16+i+ch*4].residual_ac[x] = 0;
573 for( x = 0; x < 4; x++ )
575 for( y = 0; y < 4; y++ )
583 /* calculate dct coeffs */
584 for( i = 0; i < 4; i++ )
587 dct4x4[i][0][0] = dct2x2[block_idx_y[i]][block_idx_x[i]];
589 h->dctf.add8x8_idct( p_dst, i_stride, dct4x4 );
593 static void x264_macroblock_encode_skip( x264_t *h )
596 h->mb.i_cbp_luma = 0x00;
597 h->mb.i_cbp_chroma = 0x00;
599 for( i = 0; i < 16+8; i++ )
601 h->mb.cache.non_zero_count[x264_scan8[i]] = 0;
605 h->mb.cbp[h->mb.i_mb_xy] = 0;
608 /*****************************************************************************
609 * x264_macroblock_encode_pskip:
610 * Encode an already marked skip block
611 *****************************************************************************/
612 void x264_macroblock_encode_pskip( x264_t *h )
614 const int mvx = x264_clip3( h->mb.cache.mv[0][x264_scan8[0]][0],
615 h->mb.mv_min[0], h->mb.mv_max[0] );
616 const int mvy = x264_clip3( h->mb.cache.mv[0][x264_scan8[0]][1],
617 h->mb.mv_min[1], h->mb.mv_max[1] );
619 /* Motion compensation XXX probably unneeded */
620 h->mc.mc_luma( h->mb.pic.p_fref[0][0], h->mb.pic.i_stride[0],
621 h->mb.pic.p_fdec[0], h->mb.pic.i_stride[0],
625 h->mc.mc_chroma( h->mb.pic.p_fref[0][0][4], h->mb.pic.i_stride[1],
626 h->mb.pic.p_fdec[1], h->mb.pic.i_stride[1],
629 h->mc.mc_chroma( h->mb.pic.p_fref[0][0][5], h->mb.pic.i_stride[2],
630 h->mb.pic.p_fdec[2], h->mb.pic.i_stride[2],
633 x264_macroblock_encode_skip( h );
636 /*****************************************************************************
637 * x264_macroblock_encode:
638 *****************************************************************************/
639 void x264_macroblock_encode( x264_t *h )
642 int i_qp = h->mb.i_qp;
645 if( h->mb.i_type == P_SKIP )
648 x264_macroblock_encode_pskip( h );
651 if( h->mb.i_type == B_SKIP )
653 /* XXX motion compensation is probably unneeded */
655 x264_macroblock_encode_skip( h );
659 if( h->mb.i_type == I_16x16 )
661 const int i_mode = h->mb.i_intra16x16_pred_mode;
662 h->mb.b_transform_8x8 = 0;
663 /* do the right prediction */
664 h->predict_16x16[i_mode]( h->mb.pic.p_fdec[0], h->mb.pic.i_stride[0] );
666 /* encode the 16x16 macroblock */
667 x264_mb_encode_i16x16( h, i_qp );
669 /* fix the pred mode value */
670 h->mb.i_intra16x16_pred_mode = x264_mb_pred_mode16x16_fix[i_mode];
672 else if( h->mb.i_type == I_8x8 )
674 h->mb.b_transform_8x8 = 1;
675 for( i = 0; i < 4; i++ )
677 const int i_dst = h->mb.pic.i_stride[0];
678 uint8_t *p_dst = &h->mb.pic.p_fdec[0][8 * (i&1) + 8 * (i>>1) * i_dst];
679 int i_mode = h->mb.cache.intra4x4_pred_mode[x264_scan8[4*i]];
681 h->predict_8x8[i_mode]( p_dst, i_dst, h->mb.i_neighbour8[i] );
682 x264_mb_encode_i8x8( h, i, i_qp );
683 h->mb.cache.intra4x4_pred_mode[x264_scan8[4*i]] = x264_mb_pred_mode4x4_fix(i_mode);
686 else if( h->mb.i_type == I_4x4 )
688 h->mb.b_transform_8x8 = 0;
689 for( i = 0; i < 16; i++ )
691 const int i_dst = h->mb.pic.i_stride[0];
692 uint8_t *p_dst = &h->mb.pic.p_fdec[0][4 * block_idx_x[i] + 4 * block_idx_y[i] * i_dst];
693 int i_mode = h->mb.cache.intra4x4_pred_mode[x264_scan8[i]];
695 h->predict_4x4[i_mode]( p_dst, i_dst );
696 x264_mb_encode_i4x4( h, i, i_qp );
697 h->mb.cache.intra4x4_pred_mode[x264_scan8[i]] = x264_mb_pred_mode4x4_fix(i_mode);
703 int i_decimate_mb = 0;
705 /* Motion compensation */
708 if( h->mb.b_lossless )
710 for( i4x4 = 0; i4x4 < 16; i4x4++ )
712 int o = block_idx_x[i4x4]*4 + block_idx_y[i4x4]*4 * h->mb.pic.i_stride[0];
713 sub_zigzag_4x4full( h->dct.block[i4x4].luma4x4, h->mb.pic.p_fenc[0]+o, h->mb.pic.p_fdec[0]+o, h->mb.pic.i_stride[0] );
716 else if( h->mb.b_transform_8x8 )
718 int16_t dct8x8[4][8][8];
719 h->dctf.sub16x16_dct8( dct8x8,
720 h->mb.pic.p_fenc[0], h->mb.pic.i_stride[0],
721 h->mb.pic.p_fdec[0], h->mb.pic.i_stride[0] );
723 for( idx = 0; idx < 4; idx++ )
727 quant_8x8( dct8x8[idx], i_qp, 0 );
728 scan_zigzag_8x8full( h->dct.luma8x8[idx], dct8x8[idx] );
729 x264_mb_dequant_8x8( dct8x8[idx], i_qp );
731 i_decimate_8x8 = x264_mb_decimate_score( h->dct.luma8x8[idx], 64 );
732 i_decimate_mb += i_decimate_8x8;
733 if( i_decimate_8x8 < 4 )
735 memset( h->dct.luma8x8[idx], 0, sizeof( h->dct.luma8x8[idx] ) );
736 memset( dct8x8[idx], 0, sizeof( dct8x8[idx] ) );
740 if( i_decimate_mb < 6 )
741 memset( h->dct.luma8x8, 0, sizeof( h->dct.luma8x8 ) );
743 h->dctf.add16x16_idct8( h->mb.pic.p_fdec[0], h->mb.pic.i_stride[0], dct8x8 );
747 int16_t dct4x4[16][4][4];
748 h->dctf.sub16x16_dct( dct4x4,
749 h->mb.pic.p_fenc[0], h->mb.pic.i_stride[0],
750 h->mb.pic.p_fdec[0], h->mb.pic.i_stride[0] );
752 for( i8x8 = 0; i8x8 < 4; i8x8++ )
756 /* encode one 4x4 block */
758 for( i4x4 = 0; i4x4 < 4; i4x4++ )
760 idx = i8x8 * 4 + i4x4;
762 quant_4x4( dct4x4[idx], i_qp, 0 );
763 scan_zigzag_4x4full( h->dct.block[idx].luma4x4, dct4x4[idx] );
764 x264_mb_dequant_4x4( dct4x4[idx], i_qp );
766 i_decimate_8x8 += x264_mb_decimate_score( h->dct.block[idx].luma4x4, 16 );
769 /* decimate this 8x8 block */
770 i_decimate_mb += i_decimate_8x8;
771 if( i_decimate_8x8 < 4 )
773 for( i4x4 = 0; i4x4 < 4; i4x4++ )
776 idx = i8x8 * 4 + i4x4;
777 for( i = 0; i < 16; i++ )
778 h->dct.block[idx].luma4x4[i] = 0;
779 for( x = 0; x < 4; x++ )
780 for( y = 0; y < 4; y++ )
781 dct4x4[idx][x][y] = 0;
786 if( i_decimate_mb < 6 )
787 for( idx = 0; idx < 16; idx++ )
788 for( i = 0; i < 16; i++ )
789 h->dct.block[idx].luma4x4[i] = 0;
791 h->dctf.add16x16_idct( h->mb.pic.p_fdec[0], h->mb.pic.i_stride[0], dct4x4 );
796 i_qp = i_chroma_qp_table[x264_clip3( i_qp + h->pps->i_chroma_qp_index_offset, 0, 51 )];
797 if( IS_INTRA( h->mb.i_type ) )
799 const int i_mode = h->mb.i_chroma_pred_mode;
800 h->predict_8x8c[i_mode]( h->mb.pic.p_fdec[1], h->mb.pic.i_stride[1] );
801 h->predict_8x8c[i_mode]( h->mb.pic.p_fdec[2], h->mb.pic.i_stride[2] );
804 /* encode the 8x8 blocks */
805 x264_mb_encode_8x8_chroma( h, !IS_INTRA( h->mb.i_type ), i_qp );
807 /* Calculate the Luma/Chroma patern and non_zero_count */
808 h->mb.i_cbp_luma = 0x00;
809 if( h->mb.i_type == I_16x16 )
811 for( i = 0; i < 16; i++ )
813 const int nz = array_non_zero_count( h->dct.block[i].residual_ac, 15 );
814 h->mb.cache.non_zero_count[x264_scan8[i]] = nz;
816 h->mb.i_cbp_luma = 0x0f;
819 else if( h->mb.b_transform_8x8 )
821 /* coded_block_flag is enough for CABAC.
822 * the full non_zero_count is done only in CAVLC. */
823 for( i = 0; i < 4; i++ )
825 const int nz = array_non_zero( h->dct.luma8x8[i], 64 );
827 for( j = 0; j < 4; j++ )
828 h->mb.cache.non_zero_count[x264_scan8[4*i+j]] = nz;
830 h->mb.i_cbp_luma |= 1 << i;
835 for( i = 0; i < 16; i++ )
837 const int nz = array_non_zero_count( h->dct.block[i].luma4x4, 16 );
838 h->mb.cache.non_zero_count[x264_scan8[i]] = nz;
840 h->mb.i_cbp_luma |= 1 << (i/4);
844 /* Calculate the chroma patern */
845 h->mb.i_cbp_chroma = 0x00;
846 for( i = 0; i < 8; i++ )
848 const int nz = array_non_zero_count( h->dct.block[16+i].residual_ac, 15 );
849 h->mb.cache.non_zero_count[x264_scan8[16+i]] = nz;
852 h->mb.i_cbp_chroma = 0x02; /* dc+ac (we can't do only ac) */
855 if( h->mb.i_cbp_chroma == 0x00 &&
856 ( array_non_zero_count( h->dct.chroma_dc[0], 4 ) > 0 || array_non_zero_count( h->dct.chroma_dc[1], 4 ) ) > 0 )
858 h->mb.i_cbp_chroma = 0x01; /* dc only */
861 if( h->param.b_cabac )
863 if( h->mb.i_type == I_16x16 && array_non_zero_count( h->dct.luma16x16_dc, 16 ) > 0 )
868 if( array_non_zero_count( h->dct.chroma_dc[0], 4 ) > 0 )
870 if( array_non_zero_count( h->dct.chroma_dc[1], 4 ) > 0 )
875 h->mb.cbp[h->mb.i_mb_xy] = (i_cbp_dc << 8) | (h->mb.i_cbp_chroma << 4) | h->mb.i_cbp_luma;
877 if( h->mb.i_type != I_16x16 && h->mb.i_cbp_luma == 0 && h->mb.i_cbp_chroma == 0 )
879 /* It won'y change anything at the decoder side but it is needed else the
880 * decoder will fail to read the next QP */
881 h->mb.qp[h->mb.i_mb_xy] = h->mb.i_last_qp;
886 * XXX: in the me perhaps we should take x264_mb_predict_mv_pskip into account
887 * (if multiple mv give same result)*/
888 if( h->mb.i_type == P_L0 && h->mb.i_partition == D_16x16 &&
889 h->mb.i_cbp_luma == 0x00 && h->mb.i_cbp_chroma== 0x00 &&
890 h->mb.cache.ref[0][x264_scan8[0]] == 0 )
894 x264_mb_predict_mv_pskip( h, mvp );
895 if( h->mb.cache.mv[0][x264_scan8[0]][0] == mvp[0] &&
896 h->mb.cache.mv[0][x264_scan8[0]][1] == mvp[1] )
898 h->mb.i_type = P_SKIP;
899 h->mb.qp[h->mb.i_mb_xy] = h->mb.i_last_qp; /* Needed */
900 /* XXX qp reset may have issues when used in RD instead of the real encode */
904 /* Check for B_SKIP */
905 if( h->mb.i_type == B_DIRECT &&
906 h->mb.i_cbp_luma == 0x00 && h->mb.i_cbp_chroma== 0x00 )
908 h->mb.i_type = B_SKIP;
909 h->mb.qp[h->mb.i_mb_xy] = h->mb.i_last_qp; /* Needed */
912 if( h->mb.i_cbp_luma == 0 && h->mb.i_type != I_8x8 )
913 h->mb.b_transform_8x8 = 0;
916 /*****************************************************************************
917 * x264_macroblock_probe_skip:
918 * Check if the current MB could be encoded as a [PB]_SKIP (it supposes you use
920 *****************************************************************************/
921 int x264_macroblock_probe_skip( x264_t *h, int b_bidir )
923 DECLARE_ALIGNED( int16_t, dct4x4[16][4][4], 16 );
924 DECLARE_ALIGNED( int16_t, dct2x2[2][2], 16 );
925 DECLARE_ALIGNED( int, dctscan[16], 16 );
927 int i_qp = h->mb.i_qp;
937 x264_mb_predict_mv_pskip( h, mvp );
938 mvp[0] = x264_clip3( mvp[0], h->mb.mv_min[0], h->mb.mv_max[0] );
939 mvp[1] = x264_clip3( mvp[1], h->mb.mv_min[1], h->mb.mv_max[1] );
941 /* Motion compensation */
942 h->mc.mc_luma( h->mb.pic.p_fref[0][0], h->mb.pic.i_stride[0],
943 h->mb.pic.p_fdec[0], h->mb.pic.i_stride[0],
944 mvp[0], mvp[1], 16, 16 );
948 h->dctf.sub16x16_dct( dct4x4, h->mb.pic.p_fenc[0], h->mb.pic.i_stride[0],
949 h->mb.pic.p_fdec[0], h->mb.pic.i_stride[0] );
951 for( i8x8 = 0, i_decimate_mb = 0; i8x8 < 4; i8x8++ )
953 /* encode one 4x4 block */
954 for( i4x4 = 0; i4x4 < 4; i4x4++ )
956 const int idx = i8x8 * 4 + i4x4;
958 quant_4x4( dct4x4[idx], i_qp, 0 );
959 scan_zigzag_4x4full( dctscan, dct4x4[idx] );
961 i_decimate_mb += x264_mb_decimate_score( dctscan, 16 );
963 if( i_decimate_mb >= 6 )
972 i_qp = i_chroma_qp_table[x264_clip3( i_qp + h->pps->i_chroma_qp_index_offset, 0, 51 )];
974 for( ch = 0; ch < 2; ch++ )
976 const int i_stride = h->mb.pic.i_stride[1+ch];
977 uint8_t *p_src = h->mb.pic.p_fenc[1+ch];
978 uint8_t *p_dst = h->mb.pic.p_fdec[1+ch];
982 h->mc.mc_chroma( h->mb.pic.p_fref[0][0][4+ch], i_stride,
983 h->mb.pic.p_fdec[1+ch], i_stride,
984 mvp[0], mvp[1], 8, 8 );
987 h->dctf.sub8x8_dct( dct4x4, p_src, i_stride, p_dst, i_stride );
989 /* calculate dct DC */
990 dct2x2[0][0] = dct4x4[0][0][0];
991 dct2x2[0][1] = dct4x4[1][0][0];
992 dct2x2[1][0] = dct4x4[2][0][0];
993 dct2x2[1][1] = dct4x4[3][0][0];
994 h->dctf.dct2x2dc( dct2x2 );
995 quant_2x2_dc( dct2x2, i_qp, 0 );
996 if( dct2x2[0][0] || dct2x2[0][1] || dct2x2[1][0] || dct2x2[1][1] )
1002 /* calculate dct coeffs */
1003 for( i4x4 = 0, i_decimate_mb = 0; i4x4 < 4; i4x4++ )
1005 quant_4x4( dct4x4[i4x4], i_qp, 0 );
1006 scan_zigzag_4x4( dctscan, dct4x4[i4x4] );
1008 i_decimate_mb += x264_mb_decimate_score( dctscan, 15 );
1009 if( i_decimate_mb >= 7 )