2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of Libav.
7 * Libav is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * Libav is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with Libav; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * H.264 / AVC / MPEG4 part10 prediction functions.
25 * @author Michael Niedermayer <michaelni@gmx.at>
31 #include "h264pred_template.c"
35 #include "h264pred_template.c"
39 #include "h264pred_template.c"
42 static void pred4x4_vertical_vp8_c(uint8_t *src, const uint8_t *topright, int stride){
43 const int lt= src[-1-1*stride];
46 uint32_t v = PACK_4U8((lt + 2*t0 + t1 + 2) >> 2,
47 (t0 + 2*t1 + t2 + 2) >> 2,
48 (t1 + 2*t2 + t3 + 2) >> 2,
49 (t2 + 2*t3 + t4 + 2) >> 2);
51 AV_WN32A(src+0*stride, v);
52 AV_WN32A(src+1*stride, v);
53 AV_WN32A(src+2*stride, v);
54 AV_WN32A(src+3*stride, v);
57 static void pred4x4_horizontal_vp8_c(uint8_t *src, const uint8_t *topright, int stride){
58 const int lt= src[-1-1*stride];
61 AV_WN32A(src+0*stride, ((lt + 2*l0 + l1 + 2) >> 2)*0x01010101);
62 AV_WN32A(src+1*stride, ((l0 + 2*l1 + l2 + 2) >> 2)*0x01010101);
63 AV_WN32A(src+2*stride, ((l1 + 2*l2 + l3 + 2) >> 2)*0x01010101);
64 AV_WN32A(src+3*stride, ((l2 + 2*l3 + l3 + 2) >> 2)*0x01010101);
67 static void pred4x4_down_left_svq3_c(uint8_t *src, const uint8_t *topright, int stride){
70 const av_unused int unu0= t0;
71 const av_unused int unu1= l0;
73 src[0+0*stride]=(l1 + t1)>>1;
75 src[0+1*stride]=(l2 + t2)>>1;
88 src[3+3*stride]=(l3 + t3)>>1;
91 static void pred4x4_down_left_rv40_c(uint8_t *src, const uint8_t *topright, int stride){
97 src[0+0*stride]=(t0 + t2 + 2*t1 + 2 + l0 + l2 + 2*l1 + 2)>>3;
99 src[0+1*stride]=(t1 + t3 + 2*t2 + 2 + l1 + l3 + 2*l2 + 2)>>3;
102 src[0+2*stride]=(t2 + t4 + 2*t3 + 2 + l2 + l4 + 2*l3 + 2)>>3;
106 src[0+3*stride]=(t3 + t5 + 2*t4 + 2 + l3 + l5 + 2*l4 + 2)>>3;
109 src[1+3*stride]=(t4 + t6 + 2*t5 + 2 + l4 + l6 + 2*l5 + 2)>>3;
111 src[2+3*stride]=(t5 + t7 + 2*t6 + 2 + l5 + l7 + 2*l6 + 2)>>3;
112 src[3+3*stride]=(t6 + t7 + 1 + l6 + l7 + 1)>>2;
115 static void pred4x4_down_left_rv40_nodown_c(uint8_t *src, const uint8_t *topright, int stride){
120 src[0+0*stride]=(t0 + t2 + 2*t1 + 2 + l0 + l2 + 2*l1 + 2)>>3;
122 src[0+1*stride]=(t1 + t3 + 2*t2 + 2 + l1 + l3 + 2*l2 + 2)>>3;
125 src[0+2*stride]=(t2 + t4 + 2*t3 + 2 + l2 + 3*l3 + 2)>>3;
129 src[0+3*stride]=(t3 + t5 + 2*t4 + 2 + l3*4 + 2)>>3;
132 src[1+3*stride]=(t4 + t6 + 2*t5 + 2 + l3*4 + 2)>>3;
134 src[2+3*stride]=(t5 + t7 + 2*t6 + 2 + l3*4 + 2)>>3;
135 src[3+3*stride]=(t6 + t7 + 1 + 2*l3 + 1)>>2;
138 static void pred4x4_vertical_left_rv40(uint8_t *src, const uint8_t *topright, int stride,
139 const int l0, const int l1, const int l2, const int l3, const int l4){
143 src[0+0*stride]=(2*t0 + 2*t1 + l1 + 2*l2 + l3 + 4)>>3;
145 src[0+2*stride]=(t1 + t2 + 1)>>1;
147 src[1+2*stride]=(t2 + t3 + 1)>>1;
149 src[2+2*stride]=(t3 + t4+ 1)>>1;
150 src[3+2*stride]=(t4 + t5+ 1)>>1;
151 src[0+1*stride]=(t0 + 2*t1 + t2 + l2 + 2*l3 + l4 + 4)>>3;
153 src[0+3*stride]=(t1 + 2*t2 + t3 + 2)>>2;
155 src[1+3*stride]=(t2 + 2*t3 + t4 + 2)>>2;
157 src[2+3*stride]=(t3 + 2*t4 + t5 + 2)>>2;
158 src[3+3*stride]=(t4 + 2*t5 + t6 + 2)>>2;
161 static void pred4x4_vertical_left_rv40_c(uint8_t *src, const uint8_t *topright, int stride){
165 pred4x4_vertical_left_rv40(src, topright, stride, l0, l1, l2, l3, l4);
168 static void pred4x4_vertical_left_rv40_nodown_c(uint8_t *src, const uint8_t *topright, int stride){
171 pred4x4_vertical_left_rv40(src, topright, stride, l0, l1, l2, l3, l3);
174 static void pred4x4_vertical_left_vp8_c(uint8_t *src, const uint8_t *topright, int stride){
178 src[0+0*stride]=(t0 + t1 + 1)>>1;
180 src[0+2*stride]=(t1 + t2 + 1)>>1;
182 src[1+2*stride]=(t2 + t3 + 1)>>1;
184 src[2+2*stride]=(t3 + t4 + 1)>>1;
185 src[0+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
187 src[0+3*stride]=(t1 + 2*t2 + t3 + 2)>>2;
189 src[1+3*stride]=(t2 + 2*t3 + t4 + 2)>>2;
191 src[2+3*stride]=(t3 + 2*t4 + t5 + 2)>>2;
192 src[3+2*stride]=(t4 + 2*t5 + t6 + 2)>>2;
193 src[3+3*stride]=(t5 + 2*t6 + t7 + 2)>>2;
196 static void pred4x4_horizontal_up_rv40_c(uint8_t *src, const uint8_t *topright, int stride){
202 src[0+0*stride]=(t1 + 2*t2 + t3 + 2*l0 + 2*l1 + 4)>>3;
203 src[1+0*stride]=(t2 + 2*t3 + t4 + l0 + 2*l1 + l2 + 4)>>3;
205 src[0+1*stride]=(t3 + 2*t4 + t5 + 2*l1 + 2*l2 + 4)>>3;
207 src[1+1*stride]=(t4 + 2*t5 + t6 + l1 + 2*l2 + l3 + 4)>>3;
209 src[0+2*stride]=(t5 + 2*t6 + t7 + 2*l2 + 2*l3 + 4)>>3;
211 src[1+2*stride]=(t6 + 3*t7 + l2 + 3*l3 + 4)>>3;
213 src[1+3*stride]=(l3 + 2*l4 + l5 + 2)>>2;
215 src[2+2*stride]=(t6 + t7 + l3 + l4 + 2)>>2;
216 src[2+3*stride]=(l4 + l5 + 1)>>1;
217 src[3+3*stride]=(l4 + 2*l5 + l6 + 2)>>2;
220 static void pred4x4_horizontal_up_rv40_nodown_c(uint8_t *src, const uint8_t *topright, int stride){
225 src[0+0*stride]=(t1 + 2*t2 + t3 + 2*l0 + 2*l1 + 4)>>3;
226 src[1+0*stride]=(t2 + 2*t3 + t4 + l0 + 2*l1 + l2 + 4)>>3;
228 src[0+1*stride]=(t3 + 2*t4 + t5 + 2*l1 + 2*l2 + 4)>>3;
230 src[1+1*stride]=(t4 + 2*t5 + t6 + l1 + 2*l2 + l3 + 4)>>3;
232 src[0+2*stride]=(t5 + 2*t6 + t7 + 2*l2 + 2*l3 + 4)>>3;
234 src[1+2*stride]=(t6 + 3*t7 + l2 + 3*l3 + 4)>>3;
238 src[2+2*stride]=(t6 + t7 + 2*l3 + 2)>>2;
243 static void pred4x4_tm_vp8_c(uint8_t *src, const uint8_t *topright, int stride){
244 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP - src[-1-stride];
245 uint8_t *top = src-stride;
248 for (y = 0; y < 4; y++) {
249 uint8_t *cm_in = cm + src[-1];
250 src[0] = cm_in[top[0]];
251 src[1] = cm_in[top[1]];
252 src[2] = cm_in[top[2]];
253 src[3] = cm_in[top[3]];
258 static void pred16x16_plane_svq3_c(uint8_t *src, int stride){
259 pred16x16_plane_compat_8_c(src, stride, 1, 0);
262 static void pred16x16_plane_rv40_c(uint8_t *src, int stride){
263 pred16x16_plane_compat_8_c(src, stride, 0, 1);
266 static void pred16x16_tm_vp8_c(uint8_t *src, int stride){
267 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP - src[-1-stride];
268 uint8_t *top = src-stride;
271 for (y = 0; y < 16; y++) {
272 uint8_t *cm_in = cm + src[-1];
273 src[0] = cm_in[top[0]];
274 src[1] = cm_in[top[1]];
275 src[2] = cm_in[top[2]];
276 src[3] = cm_in[top[3]];
277 src[4] = cm_in[top[4]];
278 src[5] = cm_in[top[5]];
279 src[6] = cm_in[top[6]];
280 src[7] = cm_in[top[7]];
281 src[8] = cm_in[top[8]];
282 src[9] = cm_in[top[9]];
283 src[10] = cm_in[top[10]];
284 src[11] = cm_in[top[11]];
285 src[12] = cm_in[top[12]];
286 src[13] = cm_in[top[13]];
287 src[14] = cm_in[top[14]];
288 src[15] = cm_in[top[15]];
293 static void pred8x8_left_dc_rv40_c(uint8_t *src, int stride){
299 dc0+= src[-1+i*stride];
300 dc0= 0x01010101*((dc0 + 4)>>3);
303 ((uint32_t*)(src+i*stride))[0]=
304 ((uint32_t*)(src+i*stride))[1]= dc0;
308 static void pred8x8_top_dc_rv40_c(uint8_t *src, int stride){
315 dc0= 0x01010101*((dc0 + 4)>>3);
318 ((uint32_t*)(src+i*stride))[0]=
319 ((uint32_t*)(src+i*stride))[1]= dc0;
323 static void pred8x8_dc_rv40_c(uint8_t *src, int stride){
328 dc0+= src[-1+i*stride] + src[i-stride];
329 dc0+= src[4+i-stride];
330 dc0+= src[-1+(i+4)*stride];
332 dc0= 0x01010101*((dc0 + 8)>>4);
335 ((uint32_t*)(src+i*stride))[0]= dc0;
336 ((uint32_t*)(src+i*stride))[1]= dc0;
339 ((uint32_t*)(src+i*stride))[0]= dc0;
340 ((uint32_t*)(src+i*stride))[1]= dc0;
344 static void pred8x8_tm_vp8_c(uint8_t *src, int stride){
345 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP - src[-1-stride];
346 uint8_t *top = src-stride;
349 for (y = 0; y < 8; y++) {
350 uint8_t *cm_in = cm + src[-1];
351 src[0] = cm_in[top[0]];
352 src[1] = cm_in[top[1]];
353 src[2] = cm_in[top[2]];
354 src[3] = cm_in[top[3]];
355 src[4] = cm_in[top[4]];
356 src[5] = cm_in[top[5]];
357 src[6] = cm_in[top[6]];
358 src[7] = cm_in[top[7]];
364 * Set the intra prediction function pointers.
366 void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth){
367 // MpegEncContext * const s = &h->s;
371 #define FUNC(a, depth) a ## _ ## depth
372 #define FUNCC(a, depth) a ## _ ## depth ## _c
373 #define FUNCD(a) a ## _c
375 #define H264_PRED(depth) \
376 if(codec_id != CODEC_ID_RV40){\
377 if(codec_id == CODEC_ID_VP8) {\
378 h->pred4x4[VERT_PRED ]= FUNCD(pred4x4_vertical_vp8);\
379 h->pred4x4[HOR_PRED ]= FUNCD(pred4x4_horizontal_vp8);\
381 h->pred4x4[VERT_PRED ]= FUNCC(pred4x4_vertical , depth);\
382 h->pred4x4[HOR_PRED ]= FUNCC(pred4x4_horizontal , depth);\
384 h->pred4x4[DC_PRED ]= FUNCC(pred4x4_dc , depth);\
385 if(codec_id == CODEC_ID_SVQ3)\
386 h->pred4x4[DIAG_DOWN_LEFT_PRED ]= FUNCD(pred4x4_down_left_svq3);\
388 h->pred4x4[DIAG_DOWN_LEFT_PRED ]= FUNCC(pred4x4_down_left , depth);\
389 h->pred4x4[DIAG_DOWN_RIGHT_PRED]= FUNCC(pred4x4_down_right , depth);\
390 h->pred4x4[VERT_RIGHT_PRED ]= FUNCC(pred4x4_vertical_right , depth);\
391 h->pred4x4[HOR_DOWN_PRED ]= FUNCC(pred4x4_horizontal_down , depth);\
392 if (codec_id == CODEC_ID_VP8) {\
393 h->pred4x4[VERT_LEFT_PRED ]= FUNCD(pred4x4_vertical_left_vp8);\
395 h->pred4x4[VERT_LEFT_PRED ]= FUNCC(pred4x4_vertical_left , depth);\
396 h->pred4x4[HOR_UP_PRED ]= FUNCC(pred4x4_horizontal_up , depth);\
397 if(codec_id != CODEC_ID_VP8) {\
398 h->pred4x4[LEFT_DC_PRED ]= FUNCC(pred4x4_left_dc , depth);\
399 h->pred4x4[TOP_DC_PRED ]= FUNCC(pred4x4_top_dc , depth);\
400 h->pred4x4[DC_128_PRED ]= FUNCC(pred4x4_128_dc , depth);\
402 h->pred4x4[TM_VP8_PRED ]= FUNCD(pred4x4_tm_vp8);\
403 h->pred4x4[DC_127_PRED ]= FUNCC(pred4x4_127_dc , depth);\
404 h->pred4x4[DC_129_PRED ]= FUNCC(pred4x4_129_dc , depth);\
405 h->pred4x4[VERT_VP8_PRED ]= FUNCC(pred4x4_vertical , depth);\
406 h->pred4x4[HOR_VP8_PRED ]= FUNCC(pred4x4_horizontal , depth);\
409 h->pred4x4[VERT_PRED ]= FUNCC(pred4x4_vertical , depth);\
410 h->pred4x4[HOR_PRED ]= FUNCC(pred4x4_horizontal , depth);\
411 h->pred4x4[DC_PRED ]= FUNCC(pred4x4_dc , depth);\
412 h->pred4x4[DIAG_DOWN_LEFT_PRED ]= FUNCD(pred4x4_down_left_rv40);\
413 h->pred4x4[DIAG_DOWN_RIGHT_PRED]= FUNCC(pred4x4_down_right , depth);\
414 h->pred4x4[VERT_RIGHT_PRED ]= FUNCC(pred4x4_vertical_right , depth);\
415 h->pred4x4[HOR_DOWN_PRED ]= FUNCC(pred4x4_horizontal_down , depth);\
416 h->pred4x4[VERT_LEFT_PRED ]= FUNCD(pred4x4_vertical_left_rv40);\
417 h->pred4x4[HOR_UP_PRED ]= FUNCD(pred4x4_horizontal_up_rv40);\
418 h->pred4x4[LEFT_DC_PRED ]= FUNCC(pred4x4_left_dc , depth);\
419 h->pred4x4[TOP_DC_PRED ]= FUNCC(pred4x4_top_dc , depth);\
420 h->pred4x4[DC_128_PRED ]= FUNCC(pred4x4_128_dc , depth);\
421 h->pred4x4[DIAG_DOWN_LEFT_PRED_RV40_NODOWN]= FUNCD(pred4x4_down_left_rv40_nodown);\
422 h->pred4x4[HOR_UP_PRED_RV40_NODOWN]= FUNCD(pred4x4_horizontal_up_rv40_nodown);\
423 h->pred4x4[VERT_LEFT_PRED_RV40_NODOWN]= FUNCD(pred4x4_vertical_left_rv40_nodown);\
426 h->pred8x8l[VERT_PRED ]= FUNCC(pred8x8l_vertical , depth);\
427 h->pred8x8l[HOR_PRED ]= FUNCC(pred8x8l_horizontal , depth);\
428 h->pred8x8l[DC_PRED ]= FUNCC(pred8x8l_dc , depth);\
429 h->pred8x8l[DIAG_DOWN_LEFT_PRED ]= FUNCC(pred8x8l_down_left , depth);\
430 h->pred8x8l[DIAG_DOWN_RIGHT_PRED]= FUNCC(pred8x8l_down_right , depth);\
431 h->pred8x8l[VERT_RIGHT_PRED ]= FUNCC(pred8x8l_vertical_right , depth);\
432 h->pred8x8l[HOR_DOWN_PRED ]= FUNCC(pred8x8l_horizontal_down , depth);\
433 h->pred8x8l[VERT_LEFT_PRED ]= FUNCC(pred8x8l_vertical_left , depth);\
434 h->pred8x8l[HOR_UP_PRED ]= FUNCC(pred8x8l_horizontal_up , depth);\
435 h->pred8x8l[LEFT_DC_PRED ]= FUNCC(pred8x8l_left_dc , depth);\
436 h->pred8x8l[TOP_DC_PRED ]= FUNCC(pred8x8l_top_dc , depth);\
437 h->pred8x8l[DC_128_PRED ]= FUNCC(pred8x8l_128_dc , depth);\
439 h->pred8x8[VERT_PRED8x8 ]= FUNCC(pred8x8_vertical , depth);\
440 h->pred8x8[HOR_PRED8x8 ]= FUNCC(pred8x8_horizontal , depth);\
441 if (codec_id != CODEC_ID_VP8) {\
442 h->pred8x8[PLANE_PRED8x8]= FUNCC(pred8x8_plane , depth);\
444 h->pred8x8[PLANE_PRED8x8]= FUNCD(pred8x8_tm_vp8);\
445 if(codec_id != CODEC_ID_RV40 && codec_id != CODEC_ID_VP8){\
446 h->pred8x8[DC_PRED8x8 ]= FUNCC(pred8x8_dc , depth);\
447 h->pred8x8[LEFT_DC_PRED8x8]= FUNCC(pred8x8_left_dc , depth);\
448 h->pred8x8[TOP_DC_PRED8x8 ]= FUNCC(pred8x8_top_dc , depth);\
449 h->pred8x8[ALZHEIMER_DC_L0T_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_l0t, depth);\
450 h->pred8x8[ALZHEIMER_DC_0LT_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_0lt, depth);\
451 h->pred8x8[ALZHEIMER_DC_L00_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_l00, depth);\
452 h->pred8x8[ALZHEIMER_DC_0L0_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_0l0, depth);\
454 h->pred8x8[DC_PRED8x8 ]= FUNCD(pred8x8_dc_rv40);\
455 h->pred8x8[LEFT_DC_PRED8x8]= FUNCD(pred8x8_left_dc_rv40);\
456 h->pred8x8[TOP_DC_PRED8x8 ]= FUNCD(pred8x8_top_dc_rv40);\
457 if (codec_id == CODEC_ID_VP8) {\
458 h->pred8x8[DC_127_PRED8x8]= FUNCC(pred8x8_127_dc , depth);\
459 h->pred8x8[DC_129_PRED8x8]= FUNCC(pred8x8_129_dc , depth);\
462 h->pred8x8[DC_128_PRED8x8 ]= FUNCC(pred8x8_128_dc , depth);\
464 h->pred16x16[DC_PRED8x8 ]= FUNCC(pred16x16_dc , depth);\
465 h->pred16x16[VERT_PRED8x8 ]= FUNCC(pred16x16_vertical , depth);\
466 h->pred16x16[HOR_PRED8x8 ]= FUNCC(pred16x16_horizontal , depth);\
469 h->pred16x16[PLANE_PRED8x8 ]= FUNCD(pred16x16_plane_svq3);\
472 h->pred16x16[PLANE_PRED8x8 ]= FUNCD(pred16x16_plane_rv40);\
475 h->pred16x16[PLANE_PRED8x8 ]= FUNCD(pred16x16_tm_vp8);\
476 h->pred16x16[DC_127_PRED8x8]= FUNCC(pred16x16_127_dc , depth);\
477 h->pred16x16[DC_129_PRED8x8]= FUNCC(pred16x16_129_dc , depth);\
480 h->pred16x16[PLANE_PRED8x8 ]= FUNCC(pred16x16_plane , depth);\
483 h->pred16x16[LEFT_DC_PRED8x8]= FUNCC(pred16x16_left_dc , depth);\
484 h->pred16x16[TOP_DC_PRED8x8 ]= FUNCC(pred16x16_top_dc , depth);\
485 h->pred16x16[DC_128_PRED8x8 ]= FUNCC(pred16x16_128_dc , depth);\
487 /* special lossless h/v prediction for h264 */ \
488 h->pred4x4_add [VERT_PRED ]= FUNCC(pred4x4_vertical_add , depth);\
489 h->pred4x4_add [ HOR_PRED ]= FUNCC(pred4x4_horizontal_add , depth);\
490 h->pred8x8l_add [VERT_PRED ]= FUNCC(pred8x8l_vertical_add , depth);\
491 h->pred8x8l_add [ HOR_PRED ]= FUNCC(pred8x8l_horizontal_add , depth);\
492 h->pred8x8_add [VERT_PRED8x8]= FUNCC(pred8x8_vertical_add , depth);\
493 h->pred8x8_add [ HOR_PRED8x8]= FUNCC(pred8x8_horizontal_add , depth);\
494 h->pred16x16_add[VERT_PRED8x8]= FUNCC(pred16x16_vertical_add , depth);\
495 h->pred16x16_add[ HOR_PRED8x8]= FUNCC(pred16x16_horizontal_add , depth);\
509 if (ARCH_ARM) ff_h264_pred_init_arm(h, codec_id, bit_depth);
510 if (HAVE_MMX) ff_h264_pred_init_x86(h, codec_id, bit_depth);