2 * H.26L/H.264/AVC/JVT/14496-10/... decoder
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
5 * This file is part of Libav.
7 * Libav is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * Libav is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with Libav; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 * H.264 / AVC / MPEG4 part10 codec.
25 * @author Michael Niedermayer <michaelni@gmx.at>
28 #include "libavutil/imgutils.h"
32 #include "mpegvideo.h"
35 #include "h264_mvpred.h"
38 #include "rectangle.h"
39 #include "vdpau_internal.h"
40 #include "libavutil/avassert.h"
47 static const uint8_t rem6[52]={
48 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
51 static const uint8_t div6[52]={
52 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
55 static const enum PixelFormat hwaccel_pixfmt_list_h264_jpeg_420[] = {
62 void ff_h264_write_back_intra_pred_mode(H264Context *h){
63 int8_t *mode= h->intra4x4_pred_mode + h->mb2br_xy[h->mb_xy];
65 AV_COPY32(mode, h->intra4x4_pred_mode_cache + 4 + 8*4);
66 mode[4]= h->intra4x4_pred_mode_cache[7+8*3];
67 mode[5]= h->intra4x4_pred_mode_cache[7+8*2];
68 mode[6]= h->intra4x4_pred_mode_cache[7+8*1];
72 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
74 int ff_h264_check_intra4x4_pred_mode(H264Context *h){
75 MpegEncContext * const s = &h->s;
76 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
77 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
80 if(!(h->top_samples_available&0x8000)){
82 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
84 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
87 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
92 if((h->left_samples_available&0x8888)!=0x8888){
93 static const int mask[4]={0x8000,0x2000,0x80,0x20};
95 if(!(h->left_samples_available&mask[i])){
96 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
98 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
101 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
108 } //FIXME cleanup like ff_h264_check_intra_pred_mode
111 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
113 int ff_h264_check_intra_pred_mode(H264Context *h, int mode){
114 MpegEncContext * const s = &h->s;
115 static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
116 static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
119 av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
123 if(!(h->top_samples_available&0x8000)){
126 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
131 if((h->left_samples_available&0x8080) != 0x8080){
133 if(h->left_samples_available&0x8080){ //mad cow disease mode, aka MBAFF + constrained_intra_pred
134 mode= ALZHEIMER_DC_L0T_PRED8x8 + (!(h->left_samples_available&0x8000)) + 2*(mode == DC_128_PRED8x8);
137 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
145 const uint8_t *ff_h264_decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
150 // src[0]&0x80; //forbidden bit
151 h->nal_ref_idc= src[0]>>5;
152 h->nal_unit_type= src[0]&0x1F;
156 for(i=0; i<length; i++)
157 printf("%2X ", src[i]);
160 #if HAVE_FAST_UNALIGNED
163 for(i=0; i+1<length; i+=9){
164 if(!((~AV_RN64A(src+i) & (AV_RN64A(src+i) - 0x0100010001000101ULL)) & 0x8000800080008080ULL))
167 for(i=0; i+1<length; i+=5){
168 if(!((~AV_RN32A(src+i) & (AV_RN32A(src+i) - 0x01000101U)) & 0x80008080U))
171 if(i>0 && !src[i]) i--;
175 for(i=0; i+1<length; i+=2){
177 if(i>0 && src[i-1]==0) i--;
179 if(i+2<length && src[i+1]==0 && src[i+2]<=3){
181 /* startcode, so we must be past the end */
189 if(i>=length-1){ //no escaped 0
191 *consumed= length+1; //+1 for the header
195 bufidx = h->nal_unit_type == NAL_DPC ? 1 : 0; // use second escape buffer for inter data
196 av_fast_malloc(&h->rbsp_buffer[bufidx], &h->rbsp_buffer_size[bufidx], length+FF_INPUT_BUFFER_PADDING_SIZE);
197 dst= h->rbsp_buffer[bufidx];
203 //printf("decoding esc\n");
207 //remove escapes (very rare 1:2^22)
209 dst[di++]= src[si++];
210 dst[di++]= src[si++];
211 }else if(src[si]==0 && src[si+1]==0){
212 if(src[si+2]==3){ //escape
217 }else //next start code
221 dst[di++]= src[si++];
224 dst[di++]= src[si++];
227 memset(dst+di, 0, FF_INPUT_BUFFER_PADDING_SIZE);
230 *consumed= si + 1;//+1 for the header
231 //FIXME store exact number of bits in the getbitcontext (it is needed for decoding)
236 * Identify the exact end of the bitstream
237 * @return the length of the trailing, or 0 if damaged
239 static int ff_h264_decode_rbsp_trailing(H264Context *h, const uint8_t *src){
243 tprintf(h->s.avctx, "rbsp trailing %X\n", v);
254 * DCT transforms the 16 dc values.
255 * @param qp quantization parameter ??? FIXME
257 static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
258 // const int qmul= dequant_coeff[qp][0];
260 int temp[16]; //FIXME check if this is a good idea
261 static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
262 static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
265 const int offset= y_offset[i];
266 const int z0= block[offset+stride*0] + block[offset+stride*4];
267 const int z1= block[offset+stride*0] - block[offset+stride*4];
268 const int z2= block[offset+stride*1] - block[offset+stride*5];
269 const int z3= block[offset+stride*1] + block[offset+stride*5];
278 const int offset= x_offset[i];
279 const int z0= temp[4*0+i] + temp[4*2+i];
280 const int z1= temp[4*0+i] - temp[4*2+i];
281 const int z2= temp[4*1+i] - temp[4*3+i];
282 const int z3= temp[4*1+i] + temp[4*3+i];
284 block[stride*0 +offset]= (z0 + z3)>>1;
285 block[stride*2 +offset]= (z1 + z2)>>1;
286 block[stride*8 +offset]= (z1 - z2)>>1;
287 block[stride*10+offset]= (z0 - z3)>>1;
295 static void chroma_dc_dequant_idct_c(DCTELEM *block, int qmul){
296 const int stride= 16*2;
297 const int xStride= 16;
300 a= block[stride*0 + xStride*0];
301 b= block[stride*0 + xStride*1];
302 c= block[stride*1 + xStride*0];
303 d= block[stride*1 + xStride*1];
310 block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
311 block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
312 block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
313 block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
317 static void chroma_dc_dct_c(DCTELEM *block){
318 const int stride= 16*2;
319 const int xStride= 16;
322 a= block[stride*0 + xStride*0];
323 b= block[stride*0 + xStride*1];
324 c= block[stride*1 + xStride*0];
325 d= block[stride*1 + xStride*1];
332 block[stride*0 + xStride*0]= (a+c);
333 block[stride*0 + xStride*1]= (e+b);
334 block[stride*1 + xStride*0]= (a-c);
335 block[stride*1 + xStride*1]= (e-b);
339 static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
340 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
341 int src_x_offset, int src_y_offset,
342 qpel_mc_func *qpix_op, h264_chroma_mc_func chroma_op){
343 MpegEncContext * const s = &h->s;
344 const int mx= h->mv_cache[list][ scan8[n] ][0] + src_x_offset*8;
345 int my= h->mv_cache[list][ scan8[n] ][1] + src_y_offset*8;
346 const int luma_xy= (mx&3) + ((my&3)<<2);
347 uint8_t * src_y = pic->data[0] + (mx>>2) + (my>>2)*h->mb_linesize;
348 uint8_t * src_cb, * src_cr;
349 int extra_width= h->emu_edge_width;
350 int extra_height= h->emu_edge_height;
352 const int full_mx= mx>>2;
353 const int full_my= my>>2;
354 const int pic_width = 16*s->mb_width;
355 const int pic_height = 16*s->mb_height >> MB_FIELD;
357 if(mx&7) extra_width -= 3;
358 if(my&7) extra_height -= 3;
360 if( full_mx < 0-extra_width
361 || full_my < 0-extra_height
362 || full_mx + 16/*FIXME*/ > pic_width + extra_width
363 || full_my + 16/*FIXME*/ > pic_height + extra_height){
364 s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
365 src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
369 qpix_op[luma_xy](dest_y, src_y, h->mb_linesize); //FIXME try variable height perhaps?
371 qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
374 if(CONFIG_GRAY && s->flags&CODEC_FLAG_GRAY) return;
377 // chroma offset when predicting from a field of opposite parity
378 my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
379 emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
381 src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
382 src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
385 s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
386 src_cb= s->edge_emu_buffer;
388 chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
391 s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
392 src_cr= s->edge_emu_buffer;
394 chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
397 static inline void mc_part_std(H264Context *h, int n, int square, int chroma_height, int delta,
398 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
399 int x_offset, int y_offset,
400 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
401 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
402 int list0, int list1){
403 MpegEncContext * const s = &h->s;
404 qpel_mc_func *qpix_op= qpix_put;
405 h264_chroma_mc_func chroma_op= chroma_put;
407 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
408 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
409 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
410 x_offset += 8*s->mb_x;
411 y_offset += 8*(s->mb_y >> MB_FIELD);
414 Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
415 mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
416 dest_y, dest_cb, dest_cr, x_offset, y_offset,
420 chroma_op= chroma_avg;
424 Picture *ref= &h->ref_list[1][ h->ref_cache[1][ scan8[n] ] ];
425 mc_dir_part(h, ref, n, square, chroma_height, delta, 1,
426 dest_y, dest_cb, dest_cr, x_offset, y_offset,
431 static inline void mc_part_weighted(H264Context *h, int n, int square, int chroma_height, int delta,
432 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
433 int x_offset, int y_offset,
434 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
435 h264_weight_func luma_weight_op, h264_weight_func chroma_weight_op,
436 h264_biweight_func luma_weight_avg, h264_biweight_func chroma_weight_avg,
437 int list0, int list1){
438 MpegEncContext * const s = &h->s;
440 dest_y += 2*x_offset + 2*y_offset*h-> mb_linesize;
441 dest_cb += x_offset + y_offset*h->mb_uvlinesize;
442 dest_cr += x_offset + y_offset*h->mb_uvlinesize;
443 x_offset += 8*s->mb_x;
444 y_offset += 8*(s->mb_y >> MB_FIELD);
447 /* don't optimize for luma-only case, since B-frames usually
448 * use implicit weights => chroma too. */
449 uint8_t *tmp_cb = s->obmc_scratchpad;
450 uint8_t *tmp_cr = s->obmc_scratchpad + 8;
451 uint8_t *tmp_y = s->obmc_scratchpad + 8*h->mb_uvlinesize;
452 int refn0 = h->ref_cache[0][ scan8[n] ];
453 int refn1 = h->ref_cache[1][ scan8[n] ];
455 mc_dir_part(h, &h->ref_list[0][refn0], n, square, chroma_height, delta, 0,
456 dest_y, dest_cb, dest_cr,
457 x_offset, y_offset, qpix_put, chroma_put);
458 mc_dir_part(h, &h->ref_list[1][refn1], n, square, chroma_height, delta, 1,
459 tmp_y, tmp_cb, tmp_cr,
460 x_offset, y_offset, qpix_put, chroma_put);
462 if(h->use_weight == 2){
463 int weight0 = h->implicit_weight[refn0][refn1][s->mb_y&1];
464 int weight1 = 64 - weight0;
465 luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
466 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
467 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
469 luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
470 h->luma_weight[refn0][0][0] , h->luma_weight[refn1][1][0],
471 h->luma_weight[refn0][0][1] + h->luma_weight[refn1][1][1]);
472 chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
473 h->chroma_weight[refn0][0][0][0] , h->chroma_weight[refn1][1][0][0],
474 h->chroma_weight[refn0][0][0][1] + h->chroma_weight[refn1][1][0][1]);
475 chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
476 h->chroma_weight[refn0][0][1][0] , h->chroma_weight[refn1][1][1][0],
477 h->chroma_weight[refn0][0][1][1] + h->chroma_weight[refn1][1][1][1]);
480 int list = list1 ? 1 : 0;
481 int refn = h->ref_cache[list][ scan8[n] ];
482 Picture *ref= &h->ref_list[list][refn];
483 mc_dir_part(h, ref, n, square, chroma_height, delta, list,
484 dest_y, dest_cb, dest_cr, x_offset, y_offset,
485 qpix_put, chroma_put);
487 luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
488 h->luma_weight[refn][list][0], h->luma_weight[refn][list][1]);
489 if(h->use_weight_chroma){
490 chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
491 h->chroma_weight[refn][list][0][0], h->chroma_weight[refn][list][0][1]);
492 chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
493 h->chroma_weight[refn][list][1][0], h->chroma_weight[refn][list][1][1]);
498 static inline void mc_part(H264Context *h, int n, int square, int chroma_height, int delta,
499 uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
500 int x_offset, int y_offset,
501 qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
502 qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
503 h264_weight_func *weight_op, h264_biweight_func *weight_avg,
504 int list0, int list1){
505 if((h->use_weight==2 && list0 && list1
506 && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ][h->s.mb_y&1] != 32))
508 mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
509 x_offset, y_offset, qpix_put, chroma_put,
510 weight_op[0], weight_op[3], weight_avg[0], weight_avg[3], list0, list1);
512 mc_part_std(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
513 x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1);
516 static inline void prefetch_motion(H264Context *h, int list){
517 /* fetch pixels for estimated mv 4 macroblocks ahead
518 * optimized for 64byte cache lines */
519 MpegEncContext * const s = &h->s;
520 const int refn = h->ref_cache[list][scan8[0]];
522 const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8;
523 const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y;
524 uint8_t **src= h->ref_list[list][refn].data;
525 int off= mx + (my + (s->mb_x&3)*4)*h->mb_linesize + 64;
526 s->dsp.prefetch(src[0]+off, s->linesize, 4);
527 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
528 s->dsp.prefetch(src[1]+off, src[2]-src[1], 2);
532 static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
533 qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put),
534 qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg),
535 h264_weight_func *weight_op, h264_biweight_func *weight_avg){
536 MpegEncContext * const s = &h->s;
537 const int mb_xy= h->mb_xy;
538 const int mb_type= s->current_picture.mb_type[mb_xy];
540 assert(IS_INTER(mb_type));
542 prefetch_motion(h, 0);
544 if(IS_16X16(mb_type)){
545 mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
546 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
547 weight_op, weight_avg,
548 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
549 }else if(IS_16X8(mb_type)){
550 mc_part(h, 0, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 0,
551 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
552 &weight_op[1], &weight_avg[1],
553 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
554 mc_part(h, 8, 0, 4, 8, dest_y, dest_cb, dest_cr, 0, 4,
555 qpix_put[1], chroma_put[0], qpix_avg[1], chroma_avg[0],
556 &weight_op[1], &weight_avg[1],
557 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
558 }else if(IS_8X16(mb_type)){
559 mc_part(h, 0, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 0, 0,
560 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
561 &weight_op[2], &weight_avg[2],
562 IS_DIR(mb_type, 0, 0), IS_DIR(mb_type, 0, 1));
563 mc_part(h, 4, 0, 8, 8*h->mb_linesize, dest_y, dest_cb, dest_cr, 4, 0,
564 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
565 &weight_op[2], &weight_avg[2],
566 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
570 assert(IS_8X8(mb_type));
573 const int sub_mb_type= h->sub_mb_type[i];
575 int x_offset= (i&1)<<2;
576 int y_offset= (i&2)<<1;
578 if(IS_SUB_8X8(sub_mb_type)){
579 mc_part(h, n, 1, 4, 0, dest_y, dest_cb, dest_cr, x_offset, y_offset,
580 qpix_put[1], chroma_put[1], qpix_avg[1], chroma_avg[1],
581 &weight_op[3], &weight_avg[3],
582 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
583 }else if(IS_SUB_8X4(sub_mb_type)){
584 mc_part(h, n , 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset,
585 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
586 &weight_op[4], &weight_avg[4],
587 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
588 mc_part(h, n+2, 0, 2, 4, dest_y, dest_cb, dest_cr, x_offset, y_offset+2,
589 qpix_put[2], chroma_put[1], qpix_avg[2], chroma_avg[1],
590 &weight_op[4], &weight_avg[4],
591 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
592 }else if(IS_SUB_4X8(sub_mb_type)){
593 mc_part(h, n , 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset, y_offset,
594 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
595 &weight_op[5], &weight_avg[5],
596 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
597 mc_part(h, n+1, 0, 4, 4*h->mb_linesize, dest_y, dest_cb, dest_cr, x_offset+2, y_offset,
598 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
599 &weight_op[5], &weight_avg[5],
600 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
603 assert(IS_SUB_4X4(sub_mb_type));
605 int sub_x_offset= x_offset + 2*(j&1);
606 int sub_y_offset= y_offset + (j&2);
607 mc_part(h, n+j, 1, 2, 0, dest_y, dest_cb, dest_cr, sub_x_offset, sub_y_offset,
608 qpix_put[2], chroma_put[2], qpix_avg[2], chroma_avg[2],
609 &weight_op[6], &weight_avg[6],
610 IS_DIR(sub_mb_type, 0, 0), IS_DIR(sub_mb_type, 0, 1));
616 prefetch_motion(h, 1);
620 static void free_tables(H264Context *h, int free_rbsp){
623 av_freep(&h->intra4x4_pred_mode);
624 av_freep(&h->chroma_pred_mode_table);
625 av_freep(&h->cbp_table);
626 av_freep(&h->mvd_table[0]);
627 av_freep(&h->mvd_table[1]);
628 av_freep(&h->direct_table);
629 av_freep(&h->non_zero_count);
630 av_freep(&h->slice_table_base);
631 h->slice_table= NULL;
632 av_freep(&h->list_counts);
634 av_freep(&h->mb2b_xy);
635 av_freep(&h->mb2br_xy);
637 for(i = 0; i < MAX_THREADS; i++) {
638 hx = h->thread_context[i];
640 av_freep(&hx->top_borders[1]);
641 av_freep(&hx->top_borders[0]);
642 av_freep(&hx->s.obmc_scratchpad);
644 av_freep(&hx->rbsp_buffer[1]);
645 av_freep(&hx->rbsp_buffer[0]);
646 hx->rbsp_buffer_size[0] = 0;
647 hx->rbsp_buffer_size[1] = 0;
649 if (i) av_freep(&h->thread_context[i]);
653 static void init_dequant8_coeff_table(H264Context *h){
655 h->dequant8_coeff[0] = h->dequant8_buffer[0];
656 h->dequant8_coeff[1] = h->dequant8_buffer[1];
659 if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
660 h->dequant8_coeff[1] = h->dequant8_buffer[0];
668 h->dequant8_coeff[i][q][(x>>3)|((x&7)<<3)] =
669 ((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
670 h->pps.scaling_matrix8[i][x]) << shift;
675 static void init_dequant4_coeff_table(H264Context *h){
678 h->dequant4_coeff[i] = h->dequant4_buffer[i];
680 if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
681 h->dequant4_coeff[i] = h->dequant4_buffer[j];
689 int shift = div6[q] + 2;
692 h->dequant4_coeff[i][q][(x>>2)|((x<<2)&0xF)] =
693 ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
694 h->pps.scaling_matrix4[i][x]) << shift;
699 static void init_dequant_tables(H264Context *h){
701 init_dequant4_coeff_table(h);
702 if(h->pps.transform_8x8_mode)
703 init_dequant8_coeff_table(h);
704 if(h->sps.transform_bypass){
707 h->dequant4_coeff[i][0][x] = 1<<6;
708 if(h->pps.transform_8x8_mode)
711 h->dequant8_coeff[i][0][x] = 1<<6;
716 int ff_h264_alloc_tables(H264Context *h){
717 MpegEncContext * const s = &h->s;
718 const int big_mb_num= s->mb_stride * (s->mb_height+1);
719 const int row_mb_num= 2*s->mb_stride*s->avctx->thread_count;
722 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->intra4x4_pred_mode, row_mb_num * 8 * sizeof(uint8_t), fail)
724 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->non_zero_count , big_mb_num * 32 * sizeof(uint8_t), fail)
725 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base), fail)
726 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->cbp_table, big_mb_num * sizeof(uint16_t), fail)
728 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t), fail)
729 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mvd_table[0], 16*row_mb_num * sizeof(uint8_t), fail);
730 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mvd_table[1], 16*row_mb_num * sizeof(uint8_t), fail);
731 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->direct_table, 4*big_mb_num * sizeof(uint8_t) , fail);
732 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->list_counts, big_mb_num * sizeof(uint8_t), fail)
734 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base));
735 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
737 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mb2b_xy , big_mb_num * sizeof(uint32_t), fail);
738 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mb2br_xy , big_mb_num * sizeof(uint32_t), fail);
739 for(y=0; y<s->mb_height; y++){
740 for(x=0; x<s->mb_width; x++){
741 const int mb_xy= x + y*s->mb_stride;
742 const int b_xy = 4*x + 4*y*h->b_stride;
744 h->mb2b_xy [mb_xy]= b_xy;
745 h->mb2br_xy[mb_xy]= 8*(FMO ? mb_xy : (mb_xy % (2*s->mb_stride)));
749 s->obmc_scratchpad = NULL;
751 if(!h->dequant4_coeff[0])
752 init_dequant_tables(h);
761 * Mimic alloc_tables(), but for every context thread.
763 static void clone_tables(H264Context *dst, H264Context *src, int i){
764 MpegEncContext * const s = &src->s;
765 dst->intra4x4_pred_mode = src->intra4x4_pred_mode + i*8*2*s->mb_stride;
766 dst->non_zero_count = src->non_zero_count;
767 dst->slice_table = src->slice_table;
768 dst->cbp_table = src->cbp_table;
769 dst->mb2b_xy = src->mb2b_xy;
770 dst->mb2br_xy = src->mb2br_xy;
771 dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
772 dst->mvd_table[0] = src->mvd_table[0] + i*8*2*s->mb_stride;
773 dst->mvd_table[1] = src->mvd_table[1] + i*8*2*s->mb_stride;
774 dst->direct_table = src->direct_table;
775 dst->list_counts = src->list_counts;
777 dst->s.obmc_scratchpad = NULL;
778 ff_h264_pred_init(&dst->hpc, src->s.codec_id);
783 * Allocate buffers which are not shared amongst multiple threads.
785 static int context_init(H264Context *h){
786 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t), fail)
787 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t), fail)
789 h->ref_cache[0][scan8[5 ]+1] = h->ref_cache[0][scan8[7 ]+1] = h->ref_cache[0][scan8[13]+1] =
790 h->ref_cache[1][scan8[5 ]+1] = h->ref_cache[1][scan8[7 ]+1] = h->ref_cache[1][scan8[13]+1] = PART_NOT_AVAILABLE;
794 return -1; // free_tables will clean up for us
797 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size);
799 static av_cold void common_init(H264Context *h){
800 MpegEncContext * const s = &h->s;
802 s->width = s->avctx->width;
803 s->height = s->avctx->height;
804 s->codec_id= s->avctx->codec->id;
806 ff_h264dsp_init(&h->h264dsp);
807 ff_h264_pred_init(&h->hpc, s->codec_id);
809 h->dequant_coeff_pps= -1;
810 s->unrestricted_mv=1;
813 dsputil_init(&s->dsp, s->avctx); // needed so that idct permutation is known early
815 memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
816 memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
819 int ff_h264_decode_extradata(H264Context *h)
821 AVCodecContext *avctx = h->s.avctx;
823 if(*(char *)avctx->extradata == 1){
825 unsigned char *p = avctx->extradata;
829 if(avctx->extradata_size < 7) {
830 av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
833 /* sps and pps in the avcC always have length coded with 2 bytes,
834 so put a fake nal_length_size = 2 while parsing them */
835 h->nal_length_size = 2;
836 // Decode sps from avcC
837 cnt = *(p+5) & 0x1f; // Number of sps
839 for (i = 0; i < cnt; i++) {
840 nalsize = AV_RB16(p) + 2;
841 if(decode_nal_units(h, p, nalsize) < 0) {
842 av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
847 // Decode pps from avcC
848 cnt = *(p++); // Number of pps
849 for (i = 0; i < cnt; i++) {
850 nalsize = AV_RB16(p) + 2;
851 if(decode_nal_units(h, p, nalsize) != nalsize) {
852 av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
857 // Now store right nal length size, that will be use to parse all other nals
858 h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
861 if(decode_nal_units(h, avctx->extradata, avctx->extradata_size) < 0)
867 av_cold int ff_h264_decode_init(AVCodecContext *avctx){
868 H264Context *h= avctx->priv_data;
869 MpegEncContext * const s = &h->s;
871 MPV_decode_defaults(s);
876 s->out_format = FMT_H264;
877 s->workaround_bugs= avctx->workaround_bugs;
880 // s->decode_mb= ff_h263_decode_mb;
881 s->quarter_sample = 1;
882 if(!avctx->has_b_frames)
885 avctx->chroma_sample_location = AVCHROMA_LOC_LEFT;
887 ff_h264_decode_init_vlc();
889 h->thread_context[0] = h;
890 h->outputed_poc = INT_MIN;
891 h->prev_poc_msb= 1<<16;
893 ff_h264_reset_sei(h);
894 if(avctx->codec_id == CODEC_ID_H264){
895 if(avctx->ticks_per_frame == 1){
896 s->avctx->time_base.den *=2;
898 avctx->ticks_per_frame = 2;
901 if(avctx->extradata_size > 0 && avctx->extradata &&
902 ff_h264_decode_extradata(h))
905 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames < h->sps.num_reorder_frames){
906 s->avctx->has_b_frames = h->sps.num_reorder_frames;
913 int ff_h264_frame_start(H264Context *h){
914 MpegEncContext * const s = &h->s;
917 if(MPV_frame_start(s, s->avctx) < 0)
919 ff_er_frame_start(s);
921 * MPV_frame_start uses pict_type to derive key_frame.
922 * This is incorrect for H.264; IDR markings must be used.
923 * Zero here; IDR markings per slice in frame or fields are ORed in later.
924 * See decode_nal_units().
926 s->current_picture_ptr->key_frame= 0;
927 s->current_picture_ptr->mmco_reset= 0;
929 assert(s->linesize && s->uvlinesize);
932 h->block_offset[i]= 4*((scan8[i] - scan8[0])&7) + 4*s->linesize*((scan8[i] - scan8[0])>>3);
933 h->block_offset[24+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->linesize*((scan8[i] - scan8[0])>>3);
936 h->block_offset[16+i]=
937 h->block_offset[20+i]= 4*((scan8[i] - scan8[0])&7) + 4*s->uvlinesize*((scan8[i] - scan8[0])>>3);
938 h->block_offset[24+16+i]=
939 h->block_offset[24+20+i]= 4*((scan8[i] - scan8[0])&7) + 8*s->uvlinesize*((scan8[i] - scan8[0])>>3);
942 /* can't be in alloc_tables because linesize isn't known there.
943 * FIXME: redo bipred weight to not require extra buffer? */
944 for(i = 0; i < s->avctx->thread_count; i++)
945 if(h->thread_context[i] && !h->thread_context[i]->s.obmc_scratchpad)
946 h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
948 /* some macroblocks can be accessed before they're available in case of lost slices, mbaff or threading*/
949 memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(*h->slice_table));
951 // s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
953 // We mark the current picture as non-reference after allocating it, so
954 // that if we break out due to an error it can be released automatically
955 // in the next MPV_frame_start().
956 // SVQ3 as well as most other codecs have only last/next/current and thus
957 // get released even with set reference, besides SVQ3 and others do not
958 // mark frames as reference later "naturally".
959 if(s->codec_id != CODEC_ID_SVQ3)
960 s->current_picture_ptr->reference= 0;
962 s->current_picture_ptr->field_poc[0]=
963 s->current_picture_ptr->field_poc[1]= INT_MAX;
964 assert(s->current_picture_ptr->long_ref==0);
969 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int simple){
970 MpegEncContext * const s = &h->s;
975 src_cb -= uvlinesize;
976 src_cr -= uvlinesize;
978 if(!simple && FRAME_MBAFF){
981 top_border = h->top_borders[0][s->mb_x];
982 AV_COPY128(top_border, src_y + 15*linesize);
983 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
984 AV_COPY64(top_border+16, src_cb+7*uvlinesize);
985 AV_COPY64(top_border+24, src_cr+7*uvlinesize);
994 top_border = h->top_borders[top_idx][s->mb_x];
995 // There are two lines saved, the line above the the top macroblock of a pair,
996 // and the line above the bottom macroblock
997 AV_COPY128(top_border, src_y + 16*linesize);
999 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
1000 AV_COPY64(top_border+16, src_cb+8*uvlinesize);
1001 AV_COPY64(top_border+24, src_cr+8*uvlinesize);
1005 static inline void xchg_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int xchg, int simple){
1006 MpegEncContext * const s = &h->s;
1010 uint8_t *top_border_m1;
1011 uint8_t *top_border;
1013 if(!simple && FRAME_MBAFF){
1018 top_idx = MB_MBAFF ? 0 : 1;
1022 if(h->deblocking_filter == 2) {
1023 deblock_left = h->left_type[0];
1024 deblock_top = h->top_type;
1026 deblock_left = (s->mb_x > 0);
1027 deblock_top = (s->mb_y > !!MB_FIELD);
1030 src_y -= linesize + 1;
1031 src_cb -= uvlinesize + 1;
1032 src_cr -= uvlinesize + 1;
1034 top_border_m1 = h->top_borders[top_idx][s->mb_x-1];
1035 top_border = h->top_borders[top_idx][s->mb_x];
1037 #define XCHG(a,b,xchg)\
1038 if (xchg) AV_SWAP64(b,a);\
1039 else AV_COPY64(b,a);
1043 XCHG(top_border_m1+8, src_y -7, 1);
1045 XCHG(top_border+0, src_y +1, xchg);
1046 XCHG(top_border+8, src_y +9, 1);
1047 if(s->mb_x+1 < s->mb_width){
1048 XCHG(h->top_borders[top_idx][s->mb_x+1], src_y +17, 1);
1052 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
1055 XCHG(top_border_m1+16, src_cb -7, 1);
1056 XCHG(top_border_m1+24, src_cr -7, 1);
1058 XCHG(top_border+16, src_cb+1, 1);
1059 XCHG(top_border+24, src_cr+1, 1);
1064 static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
1065 MpegEncContext * const s = &h->s;
1066 const int mb_x= s->mb_x;
1067 const int mb_y= s->mb_y;
1068 const int mb_xy= h->mb_xy;
1069 const int mb_type= s->current_picture.mb_type[mb_xy];
1070 uint8_t *dest_y, *dest_cb, *dest_cr;
1071 int linesize, uvlinesize /*dct_offset*/;
1073 int *block_offset = &h->block_offset[0];
1074 const int transform_bypass = !simple && (s->qscale == 0 && h->sps.transform_bypass);
1075 /* is_h264 should always be true if SVQ3 is disabled. */
1076 const int is_h264 = !CONFIG_SVQ3_DECODER || simple || s->codec_id == CODEC_ID_H264;
1077 void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride);
1078 void (*idct_dc_add)(uint8_t *dst, DCTELEM *block, int stride);
1080 dest_y = s->current_picture.data[0] + (mb_x + mb_y * s->linesize ) * 16;
1081 dest_cb = s->current_picture.data[1] + (mb_x + mb_y * s->uvlinesize) * 8;
1082 dest_cr = s->current_picture.data[2] + (mb_x + mb_y * s->uvlinesize) * 8;
1084 s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + 64, s->linesize, 4);
1085 s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + 64, dest_cr - dest_cb, 2);
1087 h->list_counts[mb_xy]= h->list_count;
1089 if (!simple && MB_FIELD) {
1090 linesize = h->mb_linesize = s->linesize * 2;
1091 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
1092 block_offset = &h->block_offset[24];
1093 if(mb_y&1){ //FIXME move out of this function?
1094 dest_y -= s->linesize*15;
1095 dest_cb-= s->uvlinesize*7;
1096 dest_cr-= s->uvlinesize*7;
1100 for(list=0; list<h->list_count; list++){
1101 if(!USES_LIST(mb_type, list))
1103 if(IS_16X16(mb_type)){
1104 int8_t *ref = &h->ref_cache[list][scan8[0]];
1105 fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
1107 for(i=0; i<16; i+=4){
1108 int ref = h->ref_cache[list][scan8[i]];
1110 fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
1116 linesize = h->mb_linesize = s->linesize;
1117 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
1118 // dct_offset = s->linesize * 16;
1121 if (!simple && IS_INTRA_PCM(mb_type)) {
1122 for (i=0; i<16; i++) {
1123 memcpy(dest_y + i* linesize, h->mb + i*8, 16);
1125 for (i=0; i<8; i++) {
1126 memcpy(dest_cb+ i*uvlinesize, h->mb + 128 + i*4, 8);
1127 memcpy(dest_cr+ i*uvlinesize, h->mb + 160 + i*4, 8);
1130 if(IS_INTRA(mb_type)){
1131 if(h->deblocking_filter)
1132 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
1134 if(simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
1135 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
1136 h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
1139 if(IS_INTRA4x4(mb_type)){
1140 if(simple || !s->encoding){
1141 if(IS_8x8DCT(mb_type)){
1142 if(transform_bypass){
1144 idct_add = s->dsp.add_pixels8;
1146 idct_dc_add = h->h264dsp.h264_idct8_dc_add;
1147 idct_add = h->h264dsp.h264_idct8_add;
1149 for(i=0; i<16; i+=4){
1150 uint8_t * const ptr= dest_y + block_offset[i];
1151 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
1152 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
1153 h->hpc.pred8x8l_add[dir](ptr, h->mb + i*16, linesize);
1155 const int nnz = h->non_zero_count_cache[ scan8[i] ];
1156 h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
1157 (h->topright_samples_available<<i)&0x4000, linesize);
1159 if(nnz == 1 && h->mb[i*16])
1160 idct_dc_add(ptr, h->mb + i*16, linesize);
1162 idct_add (ptr, h->mb + i*16, linesize);
1167 if(transform_bypass){
1169 idct_add = s->dsp.add_pixels4;
1171 idct_dc_add = h->h264dsp.h264_idct_dc_add;
1172 idct_add = h->h264dsp.h264_idct_add;
1174 for(i=0; i<16; i++){
1175 uint8_t * const ptr= dest_y + block_offset[i];
1176 const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
1178 if(transform_bypass && h->sps.profile_idc==244 && dir<=1){
1179 h->hpc.pred4x4_add[dir](ptr, h->mb + i*16, linesize);
1183 if(dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED){
1184 const int topright_avail= (h->topright_samples_available<<i)&0x8000;
1185 assert(mb_y || linesize <= block_offset[i]);
1186 if(!topright_avail){
1187 tr= ptr[3 - linesize]*0x01010101;
1188 topright= (uint8_t*) &tr;
1190 topright= ptr + 4 - linesize;
1194 h->hpc.pred4x4[ dir ](ptr, topright, linesize);
1195 nnz = h->non_zero_count_cache[ scan8[i] ];
1198 if(nnz == 1 && h->mb[i*16])
1199 idct_dc_add(ptr, h->mb + i*16, linesize);
1201 idct_add (ptr, h->mb + i*16, linesize);
1203 ff_svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0);
1210 h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
1212 if(h->non_zero_count_cache[ scan8[LUMA_DC_BLOCK_INDEX] ]){
1213 if(!transform_bypass)
1214 h->h264dsp.h264_luma_dc_dequant_idct(h->mb, h->mb_luma_dc, h->dequant4_coeff[0][s->qscale][0]);
1216 static const uint8_t dc_mapping[16] = { 0*16, 1*16, 4*16, 5*16, 2*16, 3*16, 6*16, 7*16,
1217 8*16, 9*16,12*16,13*16,10*16,11*16,14*16,15*16};
1218 for(i = 0; i < 16; i++)
1219 h->mb[dc_mapping[i]] = h->mb_luma_dc[i];
1223 ff_svq3_luma_dc_dequant_idct_c(h->mb, h->mb_luma_dc, s->qscale);
1225 if(h->deblocking_filter)
1226 xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
1228 hl_motion(h, dest_y, dest_cb, dest_cr,
1229 s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
1230 s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
1231 h->h264dsp.weight_h264_pixels_tab, h->h264dsp.biweight_h264_pixels_tab);
1235 if(!IS_INTRA4x4(mb_type)){
1237 if(IS_INTRA16x16(mb_type)){
1238 if(transform_bypass){
1239 if(h->sps.profile_idc==244 && (h->intra16x16_pred_mode==VERT_PRED8x8 || h->intra16x16_pred_mode==HOR_PRED8x8)){
1240 h->hpc.pred16x16_add[h->intra16x16_pred_mode](dest_y, block_offset, h->mb, linesize);
1242 for(i=0; i<16; i++){
1243 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
1244 s->dsp.add_pixels4(dest_y + block_offset[i], h->mb + i*16, linesize);
1248 h->h264dsp.h264_idct_add16intra(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
1250 }else if(h->cbp&15){
1251 if(transform_bypass){
1252 const int di = IS_8x8DCT(mb_type) ? 4 : 1;
1253 idct_add= IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4;
1254 for(i=0; i<16; i+=di){
1255 if(h->non_zero_count_cache[ scan8[i] ]){
1256 idct_add(dest_y + block_offset[i], h->mb + i*16, linesize);
1260 if(IS_8x8DCT(mb_type)){
1261 h->h264dsp.h264_idct8_add4(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
1263 h->h264dsp.h264_idct_add16(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
1268 for(i=0; i<16; i++){
1269 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
1270 uint8_t * const ptr= dest_y + block_offset[i];
1271 ff_svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
1277 if((simple || !CONFIG_GRAY || !(s->flags&CODEC_FLAG_GRAY)) && (h->cbp&0x30)){
1278 uint8_t *dest[2] = {dest_cb, dest_cr};
1279 if(transform_bypass){
1280 if(IS_INTRA(mb_type) && h->sps.profile_idc==244 && (h->chroma_pred_mode==VERT_PRED8x8 || h->chroma_pred_mode==HOR_PRED8x8)){
1281 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[0], block_offset + 16, h->mb + 16*16, uvlinesize);
1282 h->hpc.pred8x8_add[h->chroma_pred_mode](dest[1], block_offset + 20, h->mb + 20*16, uvlinesize);
1284 idct_add = s->dsp.add_pixels4;
1285 for(i=16; i<16+8; i++){
1286 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16])
1287 idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
1292 if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+0] ])
1293 chroma_dc_dequant_idct_c(h->mb + 16*16 , h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
1294 if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+1] ])
1295 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
1296 h->h264dsp.h264_idct_add8(dest, block_offset,
1298 h->non_zero_count_cache);
1300 chroma_dc_dequant_idct_c(h->mb + 16*16 , h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
1301 chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
1302 for(i=16; i<16+8; i++){
1303 if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
1304 uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
1305 ff_svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, ff_h264_chroma_qp[s->qscale + 12] - 12, 2);
1312 if(h->cbp || IS_INTRA(mb_type))
1313 s->dsp.clear_blocks(h->mb);
1317 * Process a macroblock; this case avoids checks for expensive uncommon cases.
1319 static void hl_decode_mb_simple(H264Context *h){
1320 hl_decode_mb_internal(h, 1);
1324 * Process a macroblock; this handles edge cases, such as interlacing.
1326 static void av_noinline hl_decode_mb_complex(H264Context *h){
1327 hl_decode_mb_internal(h, 0);
1330 void ff_h264_hl_decode_mb(H264Context *h){
1331 MpegEncContext * const s = &h->s;
1332 const int mb_xy= h->mb_xy;
1333 const int mb_type= s->current_picture.mb_type[mb_xy];
1334 int is_complex = CONFIG_SMALL || h->is_complex || IS_INTRA_PCM(mb_type) || s->qscale == 0;
1337 hl_decode_mb_complex(h);
1338 else hl_decode_mb_simple(h);
1341 static int pred_weight_table(H264Context *h){
1342 MpegEncContext * const s = &h->s;
1344 int luma_def, chroma_def;
1347 h->use_weight_chroma= 0;
1348 h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
1350 h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
1351 luma_def = 1<<h->luma_log2_weight_denom;
1352 chroma_def = 1<<h->chroma_log2_weight_denom;
1354 for(list=0; list<2; list++){
1355 h->luma_weight_flag[list] = 0;
1356 h->chroma_weight_flag[list] = 0;
1357 for(i=0; i<h->ref_count[list]; i++){
1358 int luma_weight_flag, chroma_weight_flag;
1360 luma_weight_flag= get_bits1(&s->gb);
1361 if(luma_weight_flag){
1362 h->luma_weight[i][list][0]= get_se_golomb(&s->gb);
1363 h->luma_weight[i][list][1]= get_se_golomb(&s->gb);
1364 if( h->luma_weight[i][list][0] != luma_def
1365 || h->luma_weight[i][list][1] != 0) {
1367 h->luma_weight_flag[list]= 1;
1370 h->luma_weight[i][list][0]= luma_def;
1371 h->luma_weight[i][list][1]= 0;
1375 chroma_weight_flag= get_bits1(&s->gb);
1376 if(chroma_weight_flag){
1379 h->chroma_weight[i][list][j][0]= get_se_golomb(&s->gb);
1380 h->chroma_weight[i][list][j][1]= get_se_golomb(&s->gb);
1381 if( h->chroma_weight[i][list][j][0] != chroma_def
1382 || h->chroma_weight[i][list][j][1] != 0) {
1383 h->use_weight_chroma= 1;
1384 h->chroma_weight_flag[list]= 1;
1390 h->chroma_weight[i][list][j][0]= chroma_def;
1391 h->chroma_weight[i][list][j][1]= 0;
1396 if(h->slice_type_nos != FF_B_TYPE) break;
1398 h->use_weight= h->use_weight || h->use_weight_chroma;
1403 * Initialize implicit_weight table.
1404 * @param field 0/1 initialize the weight for interlaced MBAFF
1405 * -1 initializes the rest
1407 static void implicit_weight_table(H264Context *h, int field){
1408 MpegEncContext * const s = &h->s;
1409 int ref0, ref1, i, cur_poc, ref_start, ref_count0, ref_count1;
1411 for (i = 0; i < 2; i++) {
1412 h->luma_weight_flag[i] = 0;
1413 h->chroma_weight_flag[i] = 0;
1417 cur_poc = s->current_picture_ptr->poc;
1418 if( h->ref_count[0] == 1 && h->ref_count[1] == 1 && !FRAME_MBAFF
1419 && h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
1421 h->use_weight_chroma= 0;
1425 ref_count0= h->ref_count[0];
1426 ref_count1= h->ref_count[1];
1428 cur_poc = s->current_picture_ptr->field_poc[field];
1430 ref_count0= 16+2*h->ref_count[0];
1431 ref_count1= 16+2*h->ref_count[1];
1435 h->use_weight_chroma= 2;
1436 h->luma_log2_weight_denom= 5;
1437 h->chroma_log2_weight_denom= 5;
1439 for(ref0=ref_start; ref0 < ref_count0; ref0++){
1440 int poc0 = h->ref_list[0][ref0].poc;
1441 for(ref1=ref_start; ref1 < ref_count1; ref1++){
1442 int poc1 = h->ref_list[1][ref1].poc;
1443 int td = av_clip(poc1 - poc0, -128, 127);
1446 int tb = av_clip(cur_poc - poc0, -128, 127);
1447 int tx = (16384 + (FFABS(td) >> 1)) / td;
1448 int dist_scale_factor = (tb*tx + 32) >> 8;
1449 if(dist_scale_factor >= -64 && dist_scale_factor <= 128)
1450 w = 64 - dist_scale_factor;
1453 h->implicit_weight[ref0][ref1][0]=
1454 h->implicit_weight[ref0][ref1][1]= w;
1456 h->implicit_weight[ref0][ref1][field]=w;
1463 * instantaneous decoder refresh.
1465 static void idr(H264Context *h){
1466 ff_h264_remove_all_refs(h);
1467 h->prev_frame_num= 0;
1468 h->prev_frame_num_offset= 0;
1473 /* forget old pics after a seek */
1474 static void flush_dpb(AVCodecContext *avctx){
1475 H264Context *h= avctx->priv_data;
1477 for(i=0; i<MAX_DELAYED_PIC_COUNT; i++) {
1478 if(h->delayed_pic[i])
1479 h->delayed_pic[i]->reference= 0;
1480 h->delayed_pic[i]= NULL;
1482 h->outputed_poc= INT_MIN;
1483 h->prev_interlaced_frame = 1;
1485 if(h->s.current_picture_ptr)
1486 h->s.current_picture_ptr->reference= 0;
1487 h->s.first_field= 0;
1488 ff_h264_reset_sei(h);
1489 ff_mpeg_flush(avctx);
1492 static int init_poc(H264Context *h){
1493 MpegEncContext * const s = &h->s;
1494 const int max_frame_num= 1<<h->sps.log2_max_frame_num;
1496 Picture *cur = s->current_picture_ptr;
1498 h->frame_num_offset= h->prev_frame_num_offset;
1499 if(h->frame_num < h->prev_frame_num)
1500 h->frame_num_offset += max_frame_num;
1502 if(h->sps.poc_type==0){
1503 const int max_poc_lsb= 1<<h->sps.log2_max_poc_lsb;
1505 if (h->poc_lsb < h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb >= max_poc_lsb/2)
1506 h->poc_msb = h->prev_poc_msb + max_poc_lsb;
1507 else if(h->poc_lsb > h->prev_poc_lsb && h->prev_poc_lsb - h->poc_lsb < -max_poc_lsb/2)
1508 h->poc_msb = h->prev_poc_msb - max_poc_lsb;
1510 h->poc_msb = h->prev_poc_msb;
1511 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
1513 field_poc[1] = h->poc_msb + h->poc_lsb;
1514 if(s->picture_structure == PICT_FRAME)
1515 field_poc[1] += h->delta_poc_bottom;
1516 }else if(h->sps.poc_type==1){
1517 int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
1520 if(h->sps.poc_cycle_length != 0)
1521 abs_frame_num = h->frame_num_offset + h->frame_num;
1525 if(h->nal_ref_idc==0 && abs_frame_num > 0)
1528 expected_delta_per_poc_cycle = 0;
1529 for(i=0; i < h->sps.poc_cycle_length; i++)
1530 expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
1532 if(abs_frame_num > 0){
1533 int poc_cycle_cnt = (abs_frame_num - 1) / h->sps.poc_cycle_length;
1534 int frame_num_in_poc_cycle = (abs_frame_num - 1) % h->sps.poc_cycle_length;
1536 expectedpoc = poc_cycle_cnt * expected_delta_per_poc_cycle;
1537 for(i = 0; i <= frame_num_in_poc_cycle; i++)
1538 expectedpoc = expectedpoc + h->sps.offset_for_ref_frame[ i ];
1542 if(h->nal_ref_idc == 0)
1543 expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
1545 field_poc[0] = expectedpoc + h->delta_poc[0];
1546 field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
1548 if(s->picture_structure == PICT_FRAME)
1549 field_poc[1] += h->delta_poc[1];
1551 int poc= 2*(h->frame_num_offset + h->frame_num);
1560 if(s->picture_structure != PICT_BOTTOM_FIELD)
1561 s->current_picture_ptr->field_poc[0]= field_poc[0];
1562 if(s->picture_structure != PICT_TOP_FIELD)
1563 s->current_picture_ptr->field_poc[1]= field_poc[1];
1564 cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
1571 * initialize scan tables
1573 static void init_scan_tables(H264Context *h){
1575 for(i=0; i<16; i++){
1576 #define T(x) (x>>2) | ((x<<2) & 0xF)
1577 h->zigzag_scan[i] = T(zigzag_scan[i]);
1578 h-> field_scan[i] = T( field_scan[i]);
1581 for(i=0; i<64; i++){
1582 #define T(x) (x>>3) | ((x&7)<<3)
1583 h->zigzag_scan8x8[i] = T(ff_zigzag_direct[i]);
1584 h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
1585 h->field_scan8x8[i] = T(field_scan8x8[i]);
1586 h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
1589 if(h->sps.transform_bypass){ //FIXME same ugly
1590 h->zigzag_scan_q0 = zigzag_scan;
1591 h->zigzag_scan8x8_q0 = ff_zigzag_direct;
1592 h->zigzag_scan8x8_cavlc_q0 = zigzag_scan8x8_cavlc;
1593 h->field_scan_q0 = field_scan;
1594 h->field_scan8x8_q0 = field_scan8x8;
1595 h->field_scan8x8_cavlc_q0 = field_scan8x8_cavlc;
1597 h->zigzag_scan_q0 = h->zigzag_scan;
1598 h->zigzag_scan8x8_q0 = h->zigzag_scan8x8;
1599 h->zigzag_scan8x8_cavlc_q0 = h->zigzag_scan8x8_cavlc;
1600 h->field_scan_q0 = h->field_scan;
1601 h->field_scan8x8_q0 = h->field_scan8x8;
1602 h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
1606 static void field_end(H264Context *h){
1607 MpegEncContext * const s = &h->s;
1608 AVCodecContext * const avctx= s->avctx;
1611 s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
1612 s->current_picture_ptr->pict_type= s->pict_type;
1614 if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
1615 ff_vdpau_h264_set_reference_frames(s);
1618 ff_h264_execute_ref_pic_marking(h, h->mmco, h->mmco_index);
1619 h->prev_poc_msb= h->poc_msb;
1620 h->prev_poc_lsb= h->poc_lsb;
1622 h->prev_frame_num_offset= h->frame_num_offset;
1623 h->prev_frame_num= h->frame_num;
1625 if (avctx->hwaccel) {
1626 if (avctx->hwaccel->end_frame(avctx) < 0)
1627 av_log(avctx, AV_LOG_ERROR, "hardware accelerator failed to decode picture\n");
1630 if (CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
1631 ff_vdpau_h264_picture_complete(s);
1634 * FIXME: Error handling code does not seem to support interlaced
1635 * when slices span multiple rows
1636 * The ff_er_add_slice calls don't work right for bottom
1637 * fields; they cause massive erroneous error concealing
1638 * Error marking covers both fields (top and bottom).
1639 * This causes a mismatched s->error_count
1640 * and a bad error table. Further, the error count goes to
1641 * INT_MAX when called for bottom field, because mb_y is
1642 * past end by one (callers fault) and resync_mb_y != 0
1643 * causes problems for the first MB line, too.
1654 * Replicate H264 "master" context to thread contexts.
1656 static void clone_slice(H264Context *dst, H264Context *src)
1658 memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset));
1659 dst->s.current_picture_ptr = src->s.current_picture_ptr;
1660 dst->s.current_picture = src->s.current_picture;
1661 dst->s.linesize = src->s.linesize;
1662 dst->s.uvlinesize = src->s.uvlinesize;
1663 dst->s.first_field = src->s.first_field;
1665 dst->prev_poc_msb = src->prev_poc_msb;
1666 dst->prev_poc_lsb = src->prev_poc_lsb;
1667 dst->prev_frame_num_offset = src->prev_frame_num_offset;
1668 dst->prev_frame_num = src->prev_frame_num;
1669 dst->short_ref_count = src->short_ref_count;
1671 memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref));
1672 memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref));
1673 memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
1674 memcpy(dst->ref_list, src->ref_list, sizeof(dst->ref_list));
1676 memcpy(dst->dequant4_coeff, src->dequant4_coeff, sizeof(src->dequant4_coeff));
1677 memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff));
1681 * computes profile from profile_idc and constraint_set?_flags
1685 * @return profile as defined by FF_PROFILE_H264_*
1687 int ff_h264_get_profile(SPS *sps)
1689 int profile = sps->profile_idc;
1691 switch(sps->profile_idc) {
1692 case FF_PROFILE_H264_BASELINE:
1693 // constraint_set1_flag set to 1
1694 profile |= (sps->constraint_set_flags & 1<<1) ? FF_PROFILE_H264_CONSTRAINED : 0;
1696 case FF_PROFILE_H264_HIGH_10:
1697 case FF_PROFILE_H264_HIGH_422:
1698 case FF_PROFILE_H264_HIGH_444_PREDICTIVE:
1699 // constraint_set3_flag set to 1
1700 profile |= (sps->constraint_set_flags & 1<<3) ? FF_PROFILE_H264_INTRA : 0;
1708 * decodes a slice header.
1709 * This will also call MPV_common_init() and frame_start() as needed.
1711 * @param h h264context
1712 * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)
1714 * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded
1716 static int decode_slice_header(H264Context *h, H264Context *h0){
1717 MpegEncContext * const s = &h->s;
1718 MpegEncContext * const s0 = &h0->s;
1719 unsigned int first_mb_in_slice;
1720 unsigned int pps_id;
1721 int num_ref_idx_active_override_flag;
1722 unsigned int slice_type, tmp, i, j;
1723 int default_ref_list_done = 0;
1724 int last_pic_structure;
1726 s->dropable= h->nal_ref_idc == 0;
1728 if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){
1729 s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
1730 s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
1732 s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
1733 s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
1736 first_mb_in_slice= get_ue_golomb(&s->gb);
1738 if(first_mb_in_slice == 0){ //FIXME better field boundary detection
1739 if(h0->current_slice && FIELD_PICTURE){
1743 h0->current_slice = 0;
1744 if (!s0->first_field)
1745 s->current_picture_ptr= NULL;
1748 slice_type= get_ue_golomb_31(&s->gb);
1750 av_log(h->s.avctx, AV_LOG_ERROR, "slice type too large (%d) at %d %d\n", h->slice_type, s->mb_x, s->mb_y);
1755 h->slice_type_fixed=1;
1757 h->slice_type_fixed=0;
1759 slice_type= golomb_to_pict_type[ slice_type ];
1760 if (slice_type == FF_I_TYPE
1761 || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
1762 default_ref_list_done = 1;
1764 h->slice_type= slice_type;
1765 h->slice_type_nos= slice_type & 3;
1767 s->pict_type= h->slice_type; // to make a few old functions happy, it's wrong though
1769 pps_id= get_ue_golomb(&s->gb);
1770 if(pps_id>=MAX_PPS_COUNT){
1771 av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
1774 if(!h0->pps_buffers[pps_id]) {
1775 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing PPS %u referenced\n", pps_id);
1778 h->pps= *h0->pps_buffers[pps_id];
1780 if(!h0->sps_buffers[h->pps.sps_id]) {
1781 av_log(h->s.avctx, AV_LOG_ERROR, "non-existing SPS %u referenced\n", h->pps.sps_id);
1784 h->sps = *h0->sps_buffers[h->pps.sps_id];
1786 s->avctx->profile = ff_h264_get_profile(&h->sps);
1787 s->avctx->level = h->sps.level_idc;
1788 s->avctx->refs = h->sps.ref_frame_count;
1790 if(h == h0 && h->dequant_coeff_pps != pps_id){
1791 h->dequant_coeff_pps = pps_id;
1792 init_dequant_tables(h);
1795 s->mb_width= h->sps.mb_width;
1796 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
1798 h->b_stride= s->mb_width*4;
1800 s->width = 16*s->mb_width - 2*FFMIN(h->sps.crop_right, 7);
1801 if(h->sps.frame_mbs_only_flag)
1802 s->height= 16*s->mb_height - 2*FFMIN(h->sps.crop_bottom, 7);
1804 s->height= 16*s->mb_height - 4*FFMIN(h->sps.crop_bottom, 7);
1806 if (s->context_initialized
1807 && ( s->width != s->avctx->width || s->height != s->avctx->height
1808 || av_cmp_q(h->sps.sar, s->avctx->sample_aspect_ratio))) {
1810 return -1; // width / height changed during parallelized decoding
1812 flush_dpb(s->avctx);
1815 if (!s->context_initialized) {
1817 return -1; // we cant (re-)initialize context during parallel decoding
1819 avcodec_set_dimensions(s->avctx, s->width, s->height);
1820 s->avctx->sample_aspect_ratio= h->sps.sar;
1821 av_assert0(s->avctx->sample_aspect_ratio.den);
1823 if(h->sps.video_signal_type_present_flag){
1824 s->avctx->color_range = h->sps.full_range ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG;
1825 if(h->sps.colour_description_present_flag){
1826 s->avctx->color_primaries = h->sps.color_primaries;
1827 s->avctx->color_trc = h->sps.color_trc;
1828 s->avctx->colorspace = h->sps.colorspace;
1832 if(h->sps.timing_info_present_flag){
1833 int64_t den= h->sps.time_scale;
1834 if(h->x264_build < 44U)
1836 av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
1837 h->sps.num_units_in_tick, den, 1<<30);
1839 s->avctx->pix_fmt = s->avctx->get_format(s->avctx,
1840 s->avctx->codec->pix_fmts ?
1841 s->avctx->codec->pix_fmts :
1842 s->avctx->color_range == AVCOL_RANGE_JPEG ?
1843 hwaccel_pixfmt_list_h264_jpeg_420 :
1844 ff_hwaccel_pixfmt_list_420);
1845 s->avctx->hwaccel = ff_find_hwaccel(s->avctx->codec->id, s->avctx->pix_fmt);
1847 if (MPV_common_init(s) < 0)
1850 h->prev_interlaced_frame = 1;
1852 init_scan_tables(h);
1853 ff_h264_alloc_tables(h);
1855 for(i = 1; i < s->avctx->thread_count; i++) {
1857 c = h->thread_context[i] = av_malloc(sizeof(H264Context));
1858 memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
1859 memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
1860 c->h264dsp = h->h264dsp;
1863 init_scan_tables(c);
1864 clone_tables(c, h, i);
1867 for(i = 0; i < s->avctx->thread_count; i++)
1868 if(context_init(h->thread_context[i]) < 0)
1872 h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
1875 h->mb_aff_frame = 0;
1876 last_pic_structure = s0->picture_structure;
1877 if(h->sps.frame_mbs_only_flag){
1878 s->picture_structure= PICT_FRAME;
1880 if(get_bits1(&s->gb)) { //field_pic_flag
1881 s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
1883 s->picture_structure= PICT_FRAME;
1884 h->mb_aff_frame = h->sps.mb_aff;
1887 h->mb_field_decoding_flag= s->picture_structure != PICT_FRAME;
1889 if(h0->current_slice == 0){
1890 while(h->frame_num != h->prev_frame_num &&
1891 h->frame_num != (h->prev_frame_num+1)%(1<<h->sps.log2_max_frame_num)){
1892 Picture *prev = h->short_ref_count ? h->short_ref[0] : NULL;
1893 av_log(h->s.avctx, AV_LOG_DEBUG, "Frame num gap %d %d\n", h->frame_num, h->prev_frame_num);
1894 if (ff_h264_frame_start(h) < 0)
1896 h->prev_frame_num++;
1897 h->prev_frame_num %= 1<<h->sps.log2_max_frame_num;
1898 s->current_picture_ptr->frame_num= h->prev_frame_num;
1899 ff_generate_sliding_window_mmcos(h);
1900 ff_h264_execute_ref_pic_marking(h, h->mmco, h->mmco_index);
1901 /* Error concealment: if a ref is missing, copy the previous ref in its place.
1902 * FIXME: avoiding a memcpy would be nice, but ref handling makes many assumptions
1903 * about there being no actual duplicates.
1904 * FIXME: this doesn't copy padding for out-of-frame motion vectors. Given we're
1905 * concealing a lost frame, this probably isn't noticable by comparison, but it should
1907 if (h->short_ref_count) {
1909 av_image_copy(h->short_ref[0]->data, h->short_ref[0]->linesize,
1910 (const uint8_t**)prev->data, prev->linesize,
1911 s->avctx->pix_fmt, s->mb_width*16, s->mb_height*16);
1912 h->short_ref[0]->poc = prev->poc+2;
1914 h->short_ref[0]->frame_num = h->prev_frame_num;
1918 /* See if we have a decoded first field looking for a pair... */
1919 if (s0->first_field) {
1920 assert(s0->current_picture_ptr);
1921 assert(s0->current_picture_ptr->data[0]);
1922 assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF);
1924 /* figure out if we have a complementary field pair */
1925 if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
1927 * Previous field is unmatched. Don't display it, but let it
1928 * remain for reference if marked as such.
1930 s0->current_picture_ptr = NULL;
1931 s0->first_field = FIELD_PICTURE;
1934 if (h->nal_ref_idc &&
1935 s0->current_picture_ptr->reference &&
1936 s0->current_picture_ptr->frame_num != h->frame_num) {
1938 * This and previous field were reference, but had
1939 * different frame_nums. Consider this field first in
1940 * pair. Throw away previous field except for reference
1943 s0->first_field = 1;
1944 s0->current_picture_ptr = NULL;
1947 /* Second field in complementary pair */
1948 s0->first_field = 0;
1953 /* Frame or first field in a potentially complementary pair */
1954 assert(!s0->current_picture_ptr);
1955 s0->first_field = FIELD_PICTURE;
1958 if((!FIELD_PICTURE || s0->first_field) && ff_h264_frame_start(h) < 0) {
1959 s0->first_field = 0;
1966 s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
1968 assert(s->mb_num == s->mb_width * s->mb_height);
1969 if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
1970 first_mb_in_slice >= s->mb_num){
1971 av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
1974 s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
1975 s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
1976 if (s->picture_structure == PICT_BOTTOM_FIELD)
1977 s->resync_mb_y = s->mb_y = s->mb_y + 1;
1978 assert(s->mb_y < s->mb_height);
1980 if(s->picture_structure==PICT_FRAME){
1981 h->curr_pic_num= h->frame_num;
1982 h->max_pic_num= 1<< h->sps.log2_max_frame_num;
1984 h->curr_pic_num= 2*h->frame_num + 1;
1985 h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
1988 if(h->nal_unit_type == NAL_IDR_SLICE){
1989 get_ue_golomb(&s->gb); /* idr_pic_id */
1992 if(h->sps.poc_type==0){
1993 h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
1995 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
1996 h->delta_poc_bottom= get_se_golomb(&s->gb);
2000 if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
2001 h->delta_poc[0]= get_se_golomb(&s->gb);
2003 if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
2004 h->delta_poc[1]= get_se_golomb(&s->gb);
2009 if(h->pps.redundant_pic_cnt_present){
2010 h->redundant_pic_count= get_ue_golomb(&s->gb);
2013 //set defaults, might be overridden a few lines later
2014 h->ref_count[0]= h->pps.ref_count[0];
2015 h->ref_count[1]= h->pps.ref_count[1];
2017 if(h->slice_type_nos != FF_I_TYPE){
2018 if(h->slice_type_nos == FF_B_TYPE){
2019 h->direct_spatial_mv_pred= get_bits1(&s->gb);
2021 num_ref_idx_active_override_flag= get_bits1(&s->gb);
2023 if(num_ref_idx_active_override_flag){
2024 h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
2025 if(h->slice_type_nos==FF_B_TYPE)
2026 h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
2028 if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
2029 av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow\n");
2030 h->ref_count[0]= h->ref_count[1]= 1;
2034 if(h->slice_type_nos == FF_B_TYPE)
2041 if(!default_ref_list_done){
2042 ff_h264_fill_default_ref_list(h);
2045 if(h->slice_type_nos!=FF_I_TYPE && ff_h264_decode_ref_pic_list_reordering(h) < 0)
2048 if(h->slice_type_nos!=FF_I_TYPE){
2049 s->last_picture_ptr= &h->ref_list[0][0];
2050 ff_copy_picture(&s->last_picture, s->last_picture_ptr);
2052 if(h->slice_type_nos==FF_B_TYPE){
2053 s->next_picture_ptr= &h->ref_list[1][0];
2054 ff_copy_picture(&s->next_picture, s->next_picture_ptr);
2057 if( (h->pps.weighted_pred && h->slice_type_nos == FF_P_TYPE )
2058 || (h->pps.weighted_bipred_idc==1 && h->slice_type_nos== FF_B_TYPE ) )
2059 pred_weight_table(h);
2060 else if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== FF_B_TYPE){
2061 implicit_weight_table(h, -1);
2064 for (i = 0; i < 2; i++) {
2065 h->luma_weight_flag[i] = 0;
2066 h->chroma_weight_flag[i] = 0;
2071 ff_h264_decode_ref_pic_marking(h0, &s->gb);
2074 ff_h264_fill_mbaff_ref_list(h);
2076 if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== FF_B_TYPE){
2077 implicit_weight_table(h, 0);
2078 implicit_weight_table(h, 1);
2082 if(h->slice_type_nos==FF_B_TYPE && !h->direct_spatial_mv_pred)
2083 ff_h264_direct_dist_scale_factor(h);
2084 ff_h264_direct_ref_list_init(h);
2086 if( h->slice_type_nos != FF_I_TYPE && h->pps.cabac ){
2087 tmp = get_ue_golomb_31(&s->gb);
2089 av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
2092 h->cabac_init_idc= tmp;
2095 h->last_qscale_diff = 0;
2096 tmp = h->pps.init_qp + get_se_golomb(&s->gb);
2098 av_log(s->avctx, AV_LOG_ERROR, "QP %u out of range\n", tmp);
2102 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
2103 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
2104 //FIXME qscale / qp ... stuff
2105 if(h->slice_type == FF_SP_TYPE){
2106 get_bits1(&s->gb); /* sp_for_switch_flag */
2108 if(h->slice_type==FF_SP_TYPE || h->slice_type == FF_SI_TYPE){
2109 get_se_golomb(&s->gb); /* slice_qs_delta */
2112 h->deblocking_filter = 1;
2113 h->slice_alpha_c0_offset = 52;
2114 h->slice_beta_offset = 52;
2115 if( h->pps.deblocking_filter_parameters_present ) {
2116 tmp= get_ue_golomb_31(&s->gb);
2118 av_log(s->avctx, AV_LOG_ERROR, "deblocking_filter_idc %u out of range\n", tmp);
2121 h->deblocking_filter= tmp;
2122 if(h->deblocking_filter < 2)
2123 h->deblocking_filter^= 1; // 1<->0
2125 if( h->deblocking_filter ) {
2126 h->slice_alpha_c0_offset += get_se_golomb(&s->gb) << 1;
2127 h->slice_beta_offset += get_se_golomb(&s->gb) << 1;
2128 if( h->slice_alpha_c0_offset > 104U
2129 || h->slice_beta_offset > 104U){
2130 av_log(s->avctx, AV_LOG_ERROR, "deblocking filter parameters %d %d out of range\n", h->slice_alpha_c0_offset, h->slice_beta_offset);
2136 if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
2137 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type_nos != FF_I_TYPE)
2138 ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type_nos == FF_B_TYPE)
2139 ||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
2140 h->deblocking_filter= 0;
2142 if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
2143 if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
2144 /* Cheat slightly for speed:
2145 Do not bother to deblock across slices. */
2146 h->deblocking_filter = 2;
2148 h0->max_contexts = 1;
2149 if(!h0->single_decode_warning) {
2150 av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
2151 h0->single_decode_warning = 1;
2154 return 1; // deblocking switched inside frame
2157 h->qp_thresh= 15 + 52 - FFMIN(h->slice_alpha_c0_offset, h->slice_beta_offset) - FFMAX3(0, h->pps.chroma_qp_index_offset[0], h->pps.chroma_qp_index_offset[1]);
2160 if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
2161 slice_group_change_cycle= get_bits(&s->gb, ?);
2164 h0->last_slice_type = slice_type;
2165 h->slice_num = ++h0->current_slice;
2166 if(h->slice_num >= MAX_SLICES){
2167 av_log(s->avctx, AV_LOG_ERROR, "Too many slices, increase MAX_SLICES and recompile\n");
2172 int *ref2frm= h->ref2frm[h->slice_num&(MAX_SLICES-1)][j];
2173 for(i=0; i<16; i++){
2175 if(h->ref_list[j][i].data[0]){
2177 uint8_t *base= h->ref_list[j][i].base[0];
2178 for(k=0; k<h->short_ref_count; k++)
2179 if(h->short_ref[k]->base[0] == base){
2183 for(k=0; k<h->long_ref_count; k++)
2184 if(h->long_ref[k] && h->long_ref[k]->base[0] == base){
2185 id_list[i]= h->short_ref_count + k;
2194 ref2frm[i+2]= 4*id_list[i]
2195 +(h->ref_list[j][i].reference&3);
2198 for(i=16; i<48; i++)
2199 ref2frm[i+4]= 4*id_list[(i-16)>>1]
2200 +(h->ref_list[j][i].reference&3);
2203 h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
2204 h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
2206 if(s->avctx->debug&FF_DEBUG_PICT_INFO){
2207 av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c%s%s pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
2209 (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
2211 av_get_pict_type_char(h->slice_type), h->slice_type_fixed ? " fix" : "", h->nal_unit_type == NAL_IDR_SLICE ? " IDR" : "",
2212 pps_id, h->frame_num,
2213 s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
2214 h->ref_count[0], h->ref_count[1],
2216 h->deblocking_filter, h->slice_alpha_c0_offset/2-26, h->slice_beta_offset/2-26,
2218 h->use_weight==1 && h->use_weight_chroma ? "c" : "",
2219 h->slice_type == FF_B_TYPE ? (h->direct_spatial_mv_pred ? "SPAT" : "TEMP") : ""
2226 int ff_h264_get_slice_type(const H264Context *h)
2228 switch (h->slice_type) {
2229 case FF_P_TYPE: return 0;
2230 case FF_B_TYPE: return 1;
2231 case FF_I_TYPE: return 2;
2232 case FF_SP_TYPE: return 3;
2233 case FF_SI_TYPE: return 4;
2240 * @return non zero if the loop filter can be skiped
2242 static int fill_filter_caches(H264Context *h, int mb_type){
2243 MpegEncContext * const s = &h->s;
2244 const int mb_xy= h->mb_xy;
2245 int top_xy, left_xy[2];
2246 int top_type, left_type[2];
2248 top_xy = mb_xy - (s->mb_stride << MB_FIELD);
2250 //FIXME deblocking could skip the intra and nnz parts.
2252 /* Wow, what a mess, why didn't they simplify the interlacing & intra
2253 * stuff, I can't imagine that these complex rules are worth it. */
2255 left_xy[1] = left_xy[0] = mb_xy-1;
2257 const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]);
2258 const int curr_mb_field_flag = IS_INTERLACED(mb_type);
2260 if (left_mb_field_flag != curr_mb_field_flag) {
2261 left_xy[0] -= s->mb_stride;
2264 if(curr_mb_field_flag){
2265 top_xy += s->mb_stride & (((s->current_picture.mb_type[top_xy ]>>7)&1)-1);
2267 if (left_mb_field_flag != curr_mb_field_flag) {
2268 left_xy[1] += s->mb_stride;
2273 h->top_mb_xy = top_xy;
2274 h->left_mb_xy[0] = left_xy[0];
2275 h->left_mb_xy[1] = left_xy[1];
2277 //for sufficiently low qp, filtering wouldn't do anything
2278 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
2279 int qp_thresh = h->qp_thresh; //FIXME strictly we should store qp_thresh for each mb of a slice
2280 int qp = s->current_picture.qscale_table[mb_xy];
2282 && (left_xy[0]<0 || ((qp + s->current_picture.qscale_table[left_xy[0]] + 1)>>1) <= qp_thresh)
2283 && (top_xy < 0 || ((qp + s->current_picture.qscale_table[top_xy ] + 1)>>1) <= qp_thresh)){
2286 if( (left_xy[0]< 0 || ((qp + s->current_picture.qscale_table[left_xy[1] ] + 1)>>1) <= qp_thresh)
2287 && (top_xy < s->mb_stride || ((qp + s->current_picture.qscale_table[top_xy -s->mb_stride] + 1)>>1) <= qp_thresh))
2292 top_type = s->current_picture.mb_type[top_xy] ;
2293 left_type[0] = s->current_picture.mb_type[left_xy[0]];
2294 left_type[1] = s->current_picture.mb_type[left_xy[1]];
2295 if(h->deblocking_filter == 2){
2296 if(h->slice_table[top_xy ] != h->slice_num) top_type= 0;
2297 if(h->slice_table[left_xy[0] ] != h->slice_num) left_type[0]= left_type[1]= 0;
2299 if(h->slice_table[top_xy ] == 0xFFFF) top_type= 0;
2300 if(h->slice_table[left_xy[0] ] == 0xFFFF) left_type[0]= left_type[1] =0;
2302 h->top_type = top_type ;
2303 h->left_type[0]= left_type[0];
2304 h->left_type[1]= left_type[1];
2306 if(IS_INTRA(mb_type))
2309 AV_COPY64(&h->non_zero_count_cache[0+8*1], &h->non_zero_count[mb_xy][ 0]);
2310 AV_COPY64(&h->non_zero_count_cache[0+8*2], &h->non_zero_count[mb_xy][ 8]);
2311 AV_COPY32(&h->non_zero_count_cache[0+8*5], &h->non_zero_count[mb_xy][16]);
2312 AV_COPY32(&h->non_zero_count_cache[4+8*3], &h->non_zero_count[mb_xy][20]);
2313 AV_COPY64(&h->non_zero_count_cache[0+8*4], &h->non_zero_count[mb_xy][24]);
2315 h->cbp= h->cbp_table[mb_xy];
2319 for(list=0; list<h->list_count; list++){
2322 int16_t (*mv_dst)[2];
2323 int16_t (*mv_src)[2];
2325 if(!USES_LIST(mb_type, list)){
2326 fill_rectangle( h->mv_cache[list][scan8[0]], 4, 4, 8, pack16to32(0,0), 4);
2327 AV_WN32A(&h->ref_cache[list][scan8[ 0]], ((LIST_NOT_USED)&0xFF)*0x01010101u);
2328 AV_WN32A(&h->ref_cache[list][scan8[ 2]], ((LIST_NOT_USED)&0xFF)*0x01010101u);
2329 AV_WN32A(&h->ref_cache[list][scan8[ 8]], ((LIST_NOT_USED)&0xFF)*0x01010101u);
2330 AV_WN32A(&h->ref_cache[list][scan8[10]], ((LIST_NOT_USED)&0xFF)*0x01010101u);
2334 ref = &s->current_picture.ref_index[list][4*mb_xy];
2336 int (*ref2frm)[64] = h->ref2frm[ h->slice_num&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
2337 AV_WN32A(&h->ref_cache[list][scan8[ 0]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101);
2338 AV_WN32A(&h->ref_cache[list][scan8[ 2]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101);
2340 AV_WN32A(&h->ref_cache[list][scan8[ 8]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101);
2341 AV_WN32A(&h->ref_cache[list][scan8[10]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101);
2344 b_stride = h->b_stride;
2345 mv_dst = &h->mv_cache[list][scan8[0]];
2346 mv_src = &s->current_picture.motion_val[list][4*s->mb_x + 4*s->mb_y*b_stride];
2348 AV_COPY128(mv_dst + 8*y, mv_src + y*b_stride);
2363 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
2365 AV_COPY32(&h->non_zero_count_cache[4+8*0], &h->non_zero_count[top_xy][4+3*8]);
2369 h->non_zero_count_cache[3+8*1]= h->non_zero_count[left_xy[0]][7+0*8];
2370 h->non_zero_count_cache[3+8*2]= h->non_zero_count[left_xy[0]][7+1*8];
2371 h->non_zero_count_cache[3+8*3]= h->non_zero_count[left_xy[0]][7+2*8];
2372 h->non_zero_count_cache[3+8*4]= h->non_zero_count[left_xy[0]][7+3*8];
2375 // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs
2376 if(!CABAC && h->pps.transform_8x8_mode){
2377 if(IS_8x8DCT(top_type)){
2378 h->non_zero_count_cache[4+8*0]=
2379 h->non_zero_count_cache[5+8*0]= h->cbp_table[top_xy] & 4;
2380 h->non_zero_count_cache[6+8*0]=
2381 h->non_zero_count_cache[7+8*0]= h->cbp_table[top_xy] & 8;
2383 if(IS_8x8DCT(left_type[0])){
2384 h->non_zero_count_cache[3+8*1]=
2385 h->non_zero_count_cache[3+8*2]= h->cbp_table[left_xy[0]]&2; //FIXME check MBAFF
2387 if(IS_8x8DCT(left_type[1])){
2388 h->non_zero_count_cache[3+8*3]=
2389 h->non_zero_count_cache[3+8*4]= h->cbp_table[left_xy[1]]&8; //FIXME check MBAFF
2392 if(IS_8x8DCT(mb_type)){
2393 h->non_zero_count_cache[scan8[0 ]]= h->non_zero_count_cache[scan8[1 ]]=
2394 h->non_zero_count_cache[scan8[2 ]]= h->non_zero_count_cache[scan8[3 ]]= h->cbp & 1;
2396 h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]=
2397 h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= h->cbp & 2;
2399 h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]=
2400 h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= h->cbp & 4;
2402 h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]=
2403 h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= h->cbp & 8;
2407 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
2409 for(list=0; list<h->list_count; list++){
2410 if(USES_LIST(top_type, list)){
2411 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
2412 const int b8_xy= 4*top_xy + 2;
2413 int (*ref2frm)[64] = h->ref2frm[ h->slice_table[top_xy]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
2414 AV_COPY128(h->mv_cache[list][scan8[0] + 0 - 1*8], s->current_picture.motion_val[list][b_xy + 0]);
2415 h->ref_cache[list][scan8[0] + 0 - 1*8]=
2416 h->ref_cache[list][scan8[0] + 1 - 1*8]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 0]];
2417 h->ref_cache[list][scan8[0] + 2 - 1*8]=
2418 h->ref_cache[list][scan8[0] + 3 - 1*8]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 1]];
2420 AV_ZERO128(h->mv_cache[list][scan8[0] + 0 - 1*8]);
2421 AV_WN32A(&h->ref_cache[list][scan8[0] + 0 - 1*8], ((LIST_NOT_USED)&0xFF)*0x01010101u);
2424 if(!IS_INTERLACED(mb_type^left_type[0])){
2425 if(USES_LIST(left_type[0], list)){
2426 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
2427 const int b8_xy= 4*left_xy[0] + 1;
2428 int (*ref2frm)[64] = h->ref2frm[ h->slice_table[left_xy[0]]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
2429 AV_COPY32(h->mv_cache[list][scan8[0] - 1 + 0 ], s->current_picture.motion_val[list][b_xy + h->b_stride*0]);
2430 AV_COPY32(h->mv_cache[list][scan8[0] - 1 + 8 ], s->current_picture.motion_val[list][b_xy + h->b_stride*1]);
2431 AV_COPY32(h->mv_cache[list][scan8[0] - 1 +16 ], s->current_picture.motion_val[list][b_xy + h->b_stride*2]);
2432 AV_COPY32(h->mv_cache[list][scan8[0] - 1 +24 ], s->current_picture.motion_val[list][b_xy + h->b_stride*3]);
2433 h->ref_cache[list][scan8[0] - 1 + 0 ]=
2434 h->ref_cache[list][scan8[0] - 1 + 8 ]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 2*0]];
2435 h->ref_cache[list][scan8[0] - 1 +16 ]=
2436 h->ref_cache[list][scan8[0] - 1 +24 ]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 2*1]];
2438 AV_ZERO32(h->mv_cache [list][scan8[0] - 1 + 0 ]);
2439 AV_ZERO32(h->mv_cache [list][scan8[0] - 1 + 8 ]);
2440 AV_ZERO32(h->mv_cache [list][scan8[0] - 1 +16 ]);
2441 AV_ZERO32(h->mv_cache [list][scan8[0] - 1 +24 ]);
2442 h->ref_cache[list][scan8[0] - 1 + 0 ]=
2443 h->ref_cache[list][scan8[0] - 1 + 8 ]=
2444 h->ref_cache[list][scan8[0] - 1 + 16 ]=
2445 h->ref_cache[list][scan8[0] - 1 + 24 ]= LIST_NOT_USED;
2454 static void loop_filter(H264Context *h){
2455 MpegEncContext * const s = &h->s;
2456 uint8_t *dest_y, *dest_cb, *dest_cr;
2457 int linesize, uvlinesize, mb_x, mb_y;
2458 const int end_mb_y= s->mb_y + FRAME_MBAFF;
2459 const int old_slice_type= h->slice_type;
2461 if(h->deblocking_filter) {
2462 for(mb_x= 0; mb_x<s->mb_width; mb_x++){
2463 for(mb_y=end_mb_y - FRAME_MBAFF; mb_y<= end_mb_y; mb_y++){
2465 mb_xy = h->mb_xy = mb_x + mb_y*s->mb_stride;
2466 h->slice_num= h->slice_table[mb_xy];
2467 mb_type= s->current_picture.mb_type[mb_xy];
2468 h->list_count= h->list_counts[mb_xy];
2471 h->mb_mbaff = h->mb_field_decoding_flag = !!IS_INTERLACED(mb_type);
2475 dest_y = s->current_picture.data[0] + (mb_x + mb_y * s->linesize ) * 16;
2476 dest_cb = s->current_picture.data[1] + (mb_x + mb_y * s->uvlinesize) * 8;
2477 dest_cr = s->current_picture.data[2] + (mb_x + mb_y * s->uvlinesize) * 8;
2478 //FIXME simplify above
2481 linesize = h->mb_linesize = s->linesize * 2;
2482 uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2;
2483 if(mb_y&1){ //FIXME move out of this function?
2484 dest_y -= s->linesize*15;
2485 dest_cb-= s->uvlinesize*7;
2486 dest_cr-= s->uvlinesize*7;
2489 linesize = h->mb_linesize = s->linesize;
2490 uvlinesize = h->mb_uvlinesize = s->uvlinesize;
2492 backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0);
2493 if(fill_filter_caches(h, mb_type))
2495 h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.qscale_table[mb_xy]);
2496 h->chroma_qp[1] = get_chroma_qp(h, 1, s->current_picture.qscale_table[mb_xy]);
2499 ff_h264_filter_mb (h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2501 ff_h264_filter_mb_fast(h, mb_x, mb_y, dest_y, dest_cb, dest_cr, linesize, uvlinesize);
2506 h->slice_type= old_slice_type;
2508 s->mb_y= end_mb_y - FRAME_MBAFF;
2509 h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
2510 h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
2513 static void predict_field_decoding_flag(H264Context *h){
2514 MpegEncContext * const s = &h->s;
2515 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
2516 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
2517 ? s->current_picture.mb_type[mb_xy-1]
2518 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
2519 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
2521 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
2524 static int decode_slice(struct AVCodecContext *avctx, void *arg){
2525 H264Context *h = *(void**)arg;
2526 MpegEncContext * const s = &h->s;
2527 const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
2531 h->is_complex = FRAME_MBAFF || s->picture_structure != PICT_FRAME || s->codec_id != CODEC_ID_H264 ||
2532 (CONFIG_GRAY && (s->flags&CODEC_FLAG_GRAY));
2534 if( h->pps.cabac ) {
2536 align_get_bits( &s->gb );
2539 ff_init_cabac_states( &h->cabac);
2540 ff_init_cabac_decoder( &h->cabac,
2541 s->gb.buffer + get_bits_count(&s->gb)/8,
2542 (get_bits_left(&s->gb) + 7)/8);
2544 ff_h264_init_cabac_states(h);
2548 int ret = ff_h264_decode_mb_cabac(h);
2550 //STOP_TIMER("decode_mb_cabac")
2552 if(ret>=0) ff_h264_hl_decode_mb(h);
2554 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
2557 ret = ff_h264_decode_mb_cabac(h);
2559 if(ret>=0) ff_h264_hl_decode_mb(h);
2562 eos = get_cabac_terminate( &h->cabac );
2564 if((s->workaround_bugs & FF_BUG_TRUNCATED) && h->cabac.bytestream > h->cabac.bytestream_end + 2){
2565 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
2568 if( ret < 0 || h->cabac.bytestream > h->cabac.bytestream_end + 2) {
2569 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d, bytestream (%td)\n", s->mb_x, s->mb_y, h->cabac.bytestream_end - h->cabac.bytestream);
2570 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
2574 if( ++s->mb_x >= s->mb_width ) {
2577 ff_draw_horiz_band(s, 16*s->mb_y, 16);
2579 if(FIELD_OR_MBAFF_PICTURE) {
2581 if(FRAME_MBAFF && s->mb_y < s->mb_height)
2582 predict_field_decoding_flag(h);
2586 if( eos || s->mb_y >= s->mb_height ) {
2587 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
2588 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
2595 int ret = ff_h264_decode_mb_cavlc(h);
2597 if(ret>=0) ff_h264_hl_decode_mb(h);
2599 if(ret>=0 && FRAME_MBAFF){ //FIXME optimal? or let mb_decode decode 16x32 ?
2601 ret = ff_h264_decode_mb_cavlc(h);
2603 if(ret>=0) ff_h264_hl_decode_mb(h);
2608 av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
2609 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
2614 if(++s->mb_x >= s->mb_width){
2617 ff_draw_horiz_band(s, 16*s->mb_y, 16);
2619 if(FIELD_OR_MBAFF_PICTURE) {
2621 if(FRAME_MBAFF && s->mb_y < s->mb_height)
2622 predict_field_decoding_flag(h);
2624 if(s->mb_y >= s->mb_height){
2625 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
2627 if(get_bits_count(&s->gb) == s->gb.size_in_bits ) {
2628 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
2632 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
2639 if(get_bits_count(&s->gb) >= s->gb.size_in_bits && s->mb_skip_run<=0){
2640 tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
2641 if(get_bits_count(&s->gb) == s->gb.size_in_bits ){
2642 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
2646 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
2655 for(;s->mb_y < s->mb_height; s->mb_y++){
2656 for(;s->mb_x < s->mb_width; s->mb_x++){
2657 int ret= decode_mb(h);
2659 ff_h264_hl_decode_mb(h);
2662 av_log(s->avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
2663 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
2668 if(++s->mb_x >= s->mb_width){
2670 if(++s->mb_y >= s->mb_height){
2671 if(get_bits_count(s->gb) == s->gb.size_in_bits){
2672 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
2676 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
2683 if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
2684 if(get_bits_count(s->gb) == s->gb.size_in_bits){
2685 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
2689 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
2696 ff_draw_horiz_band(s, 16*s->mb_y, 16);
2699 return -1; //not reached
2703 * Call decode_slice() for each context.
2705 * @param h h264 master context
2706 * @param context_count number of contexts to execute
2708 static void execute_decode_slices(H264Context *h, int context_count){
2709 MpegEncContext * const s = &h->s;
2710 AVCodecContext * const avctx= s->avctx;
2714 if (s->avctx->hwaccel)
2716 if(s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
2718 if(context_count == 1) {
2719 decode_slice(avctx, &h);
2721 for(i = 1; i < context_count; i++) {
2722 hx = h->thread_context[i];
2723 hx->s.error_recognition = avctx->error_recognition;
2724 hx->s.error_count = 0;
2727 avctx->execute(avctx, (void *)decode_slice,
2728 h->thread_context, NULL, context_count, sizeof(void*));
2730 /* pull back stuff from slices to master context */
2731 hx = h->thread_context[context_count - 1];
2732 s->mb_x = hx->s.mb_x;
2733 s->mb_y = hx->s.mb_y;
2734 s->dropable = hx->s.dropable;
2735 s->picture_structure = hx->s.picture_structure;
2736 for(i = 1; i < context_count; i++)
2737 h->s.error_count += h->thread_context[i]->s.error_count;
2742 static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
2743 MpegEncContext * const s = &h->s;
2744 AVCodecContext * const avctx= s->avctx;
2746 H264Context *hx; ///< thread context
2747 int context_count = 0;
2748 int next_avc= h->is_avc ? 0 : buf_size;
2750 h->max_contexts = avctx->thread_count;
2753 for(i=0; i<50; i++){
2754 av_log(NULL, AV_LOG_ERROR,"%02X ", buf[i]);
2757 if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
2758 h->current_slice = 0;
2759 if (!s->first_field)
2760 s->current_picture_ptr= NULL;
2761 ff_h264_reset_sei(h);
2772 if(buf_index >= next_avc) {
2773 if(buf_index >= buf_size) break;
2775 for(i = 0; i < h->nal_length_size; i++)
2776 nalsize = (nalsize << 8) | buf[buf_index++];
2777 if(nalsize <= 0 || nalsize > buf_size - buf_index){
2778 av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
2781 next_avc= buf_index + nalsize;
2783 // start code prefix search
2784 for(; buf_index + 3 < next_avc; buf_index++){
2785 // This should always succeed in the first iteration.
2786 if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
2790 if(buf_index+3 >= buf_size) break;
2793 if(buf_index >= next_avc) continue;
2796 hx = h->thread_context[context_count];
2798 ptr= ff_h264_decode_nal(hx, buf + buf_index, &dst_length, &consumed, next_avc - buf_index);
2799 if (ptr==NULL || dst_length < 0){
2802 i= buf_index + consumed;
2803 if((s->workaround_bugs & FF_BUG_AUTODETECT) && i+3<next_avc &&
2804 buf[i]==0x00 && buf[i+1]==0x00 && buf[i+2]==0x01 && buf[i+3]==0xE0)
2805 s->workaround_bugs |= FF_BUG_TRUNCATED;
2807 if(!(s->workaround_bugs & FF_BUG_TRUNCATED)){
2808 while(ptr[dst_length - 1] == 0 && dst_length > 0)
2811 bit_length= !dst_length ? 0 : (8*dst_length - ff_h264_decode_rbsp_trailing(h, ptr + dst_length - 1));
2813 if(s->avctx->debug&FF_DEBUG_STARTCODE){
2814 av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length);
2817 if (h->is_avc && (nalsize != consumed) && nalsize){
2818 av_log(h->s.avctx, AV_LOG_DEBUG, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
2821 buf_index += consumed;
2823 //FIXME do not discard SEI id
2826 (s->hurry_up == 1 && h->nal_ref_idc == 0) ||
2828 (avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
2833 switch(hx->nal_unit_type){
2835 if (h->nal_unit_type != NAL_IDR_SLICE) {
2836 av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
2839 idr(h); //FIXME ensure we don't loose some frames if there is reordering
2841 init_get_bits(&hx->s.gb, ptr, bit_length);
2843 hx->inter_gb_ptr= &hx->s.gb;
2844 hx->s.data_partitioning = 0;
2846 if((err = decode_slice_header(hx, h)))
2849 if (h->current_slice == 1) {
2850 if (s->avctx->hwaccel && s->avctx->hwaccel->start_frame(s->avctx, NULL, 0) < 0)
2852 if(CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
2853 ff_vdpau_h264_picture_start(s);
2856 s->current_picture_ptr->key_frame |=
2857 (hx->nal_unit_type == NAL_IDR_SLICE) ||
2858 (h->sei_recovery_frame_cnt >= 0);
2859 if(hx->redundant_pic_count==0
2861 && hx->s.hurry_up < 5
2863 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
2864 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
2865 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
2866 && avctx->skip_frame < AVDISCARD_ALL){
2867 if(avctx->hwaccel) {
2868 if (avctx->hwaccel->decode_slice(avctx, &buf[buf_index - consumed], consumed) < 0)
2871 if(CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU){
2872 static const uint8_t start_code[] = {0x00, 0x00, 0x01};
2873 ff_vdpau_add_data_chunk(s, start_code, sizeof(start_code));
2874 ff_vdpau_add_data_chunk(s, &buf[buf_index - consumed], consumed );
2880 init_get_bits(&hx->s.gb, ptr, bit_length);
2882 hx->inter_gb_ptr= NULL;
2884 if ((err = decode_slice_header(hx, h)) < 0)
2887 hx->s.data_partitioning = 1;
2891 init_get_bits(&hx->intra_gb, ptr, bit_length);
2892 hx->intra_gb_ptr= &hx->intra_gb;
2895 init_get_bits(&hx->inter_gb, ptr, bit_length);
2896 hx->inter_gb_ptr= &hx->inter_gb;
2898 if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
2899 && s->context_initialized
2903 && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
2904 && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
2905 && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
2906 && avctx->skip_frame < AVDISCARD_ALL)
2910 init_get_bits(&s->gb, ptr, bit_length);
2911 ff_h264_decode_sei(h);
2914 init_get_bits(&s->gb, ptr, bit_length);
2915 ff_h264_decode_seq_parameter_set(h);
2917 if(s->flags& CODEC_FLAG_LOW_DELAY)
2920 if(avctx->has_b_frames < 2)
2921 avctx->has_b_frames= !s->low_delay;
2924 init_get_bits(&s->gb, ptr, bit_length);
2926 ff_h264_decode_picture_parameter_set(h, bit_length);
2930 case NAL_END_SEQUENCE:
2931 case NAL_END_STREAM:
2932 case NAL_FILLER_DATA:
2934 case NAL_AUXILIARY_SLICE:
2937 av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", hx->nal_unit_type, bit_length);
2940 if(context_count == h->max_contexts) {
2941 execute_decode_slices(h, context_count);
2946 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
2948 /* Slice could not be decoded in parallel mode, copy down
2949 * NAL unit stuff to context 0 and restart. Note that
2950 * rbsp_buffer is not transferred, but since we no longer
2951 * run in parallel mode this should not be an issue. */
2952 h->nal_unit_type = hx->nal_unit_type;
2953 h->nal_ref_idc = hx->nal_ref_idc;
2959 execute_decode_slices(h, context_count);
2964 * returns the number of bytes consumed for building the current frame
2966 static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
2967 if(pos==0) pos=1; //avoid infinite loops (i doubt that is needed but ...)
2968 if(pos+10>buf_size) pos=buf_size; // oops ;)
2973 static int decode_frame(AVCodecContext *avctx,
2974 void *data, int *data_size,
2977 const uint8_t *buf = avpkt->data;
2978 int buf_size = avpkt->size;
2979 H264Context *h = avctx->priv_data;
2980 MpegEncContext *s = &h->s;
2981 AVFrame *pict = data;
2984 s->flags= avctx->flags;
2985 s->flags2= avctx->flags2;
2987 /* end of stream, output what is still in the buffers */
2989 if (buf_size == 0) {
2993 //FIXME factorize this with the output code below
2994 out = h->delayed_pic[0];
2996 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame && !h->delayed_pic[i]->mmco_reset; i++)
2997 if(h->delayed_pic[i]->poc < out->poc){
2998 out = h->delayed_pic[i];
3002 for(i=out_idx; h->delayed_pic[i]; i++)
3003 h->delayed_pic[i] = h->delayed_pic[i+1];
3006 *data_size = sizeof(AVFrame);
3007 *pict= *(AVFrame*)out;
3013 buf_index=decode_nal_units(h, buf, buf_size);
3017 if (!s->current_picture_ptr && h->nal_unit_type == NAL_END_SEQUENCE) {
3022 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
3023 if (avctx->skip_frame >= AVDISCARD_NONREF
3029 av_log(avctx, AV_LOG_ERROR, "no frame!\n");
3033 if(!(s->flags2 & CODEC_FLAG2_CHUNKS) || (s->mb_y >= s->mb_height && s->mb_height)){
3034 Picture *out = s->current_picture_ptr;
3035 Picture *cur = s->current_picture_ptr;
3036 int i, pics, out_of_order, out_idx;
3040 if (cur->field_poc[0]==INT_MAX || cur->field_poc[1]==INT_MAX) {
3041 /* Wait for second field. */
3045 cur->interlaced_frame = 0;
3046 cur->repeat_pict = 0;
3048 /* Signal interlacing information externally. */
3049 /* Prioritize picture timing SEI information over used decoding process if it exists. */
3051 if(h->sps.pic_struct_present_flag){
3052 switch (h->sei_pic_struct)
3054 case SEI_PIC_STRUCT_FRAME:
3056 case SEI_PIC_STRUCT_TOP_FIELD:
3057 case SEI_PIC_STRUCT_BOTTOM_FIELD:
3058 cur->interlaced_frame = 1;
3060 case SEI_PIC_STRUCT_TOP_BOTTOM:
3061 case SEI_PIC_STRUCT_BOTTOM_TOP:
3062 if (FIELD_OR_MBAFF_PICTURE)
3063 cur->interlaced_frame = 1;
3065 // try to flag soft telecine progressive
3066 cur->interlaced_frame = h->prev_interlaced_frame;
3068 case SEI_PIC_STRUCT_TOP_BOTTOM_TOP:
3069 case SEI_PIC_STRUCT_BOTTOM_TOP_BOTTOM:
3070 // Signal the possibility of telecined film externally (pic_struct 5,6)
3071 // From these hints, let the applications decide if they apply deinterlacing.
3072 cur->repeat_pict = 1;
3074 case SEI_PIC_STRUCT_FRAME_DOUBLING:
3075 // Force progressive here, as doubling interlaced frame is a bad idea.
3076 cur->repeat_pict = 2;
3078 case SEI_PIC_STRUCT_FRAME_TRIPLING:
3079 cur->repeat_pict = 4;
3083 if ((h->sei_ct_type & 3) && h->sei_pic_struct <= SEI_PIC_STRUCT_BOTTOM_TOP)
3084 cur->interlaced_frame = (h->sei_ct_type & (1<<1)) != 0;
3086 /* Derive interlacing flag from used decoding process. */
3087 cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
3089 h->prev_interlaced_frame = cur->interlaced_frame;
3091 if (cur->field_poc[0] != cur->field_poc[1]){
3092 /* Derive top_field_first from field pocs. */
3093 cur->top_field_first = cur->field_poc[0] < cur->field_poc[1];
3095 if(cur->interlaced_frame || h->sps.pic_struct_present_flag){
3096 /* Use picture timing SEI information. Even if it is a information of a past frame, better than nothing. */
3097 if(h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM
3098 || h->sei_pic_struct == SEI_PIC_STRUCT_TOP_BOTTOM_TOP)
3099 cur->top_field_first = 1;
3101 cur->top_field_first = 0;
3103 /* Most likely progressive */
3104 cur->top_field_first = 0;
3108 //FIXME do something with unavailable reference frames
3110 /* Sort B-frames into display order */
3112 if(h->sps.bitstream_restriction_flag
3113 && s->avctx->has_b_frames < h->sps.num_reorder_frames){
3114 s->avctx->has_b_frames = h->sps.num_reorder_frames;
3118 if( s->avctx->strict_std_compliance >= FF_COMPLIANCE_STRICT
3119 && !h->sps.bitstream_restriction_flag){
3120 s->avctx->has_b_frames= MAX_DELAYED_PIC_COUNT;
3125 while(h->delayed_pic[pics]) pics++;
3127 assert(pics <= MAX_DELAYED_PIC_COUNT);
3129 h->delayed_pic[pics++] = cur;
3130 if(cur->reference == 0)
3131 cur->reference = DELAYED_PIC_REF;
3133 out = h->delayed_pic[0];
3135 for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame && !h->delayed_pic[i]->mmco_reset; i++)
3136 if(h->delayed_pic[i]->poc < out->poc){
3137 out = h->delayed_pic[i];
3140 if(s->avctx->has_b_frames == 0 && (h->delayed_pic[0]->key_frame || h->delayed_pic[0]->mmco_reset))
3141 h->outputed_poc= INT_MIN;
3142 out_of_order = out->poc < h->outputed_poc;
3144 if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
3146 else if((out_of_order && pics-1 == s->avctx->has_b_frames && s->avctx->has_b_frames < MAX_DELAYED_PIC_COUNT)
3148 ((h->outputed_poc != INT_MIN && out->poc > h->outputed_poc + 2)
3149 || cur->pict_type == FF_B_TYPE)))
3152 s->avctx->has_b_frames++;
3155 if(out_of_order || pics > s->avctx->has_b_frames){
3156 out->reference &= ~DELAYED_PIC_REF;
3157 for(i=out_idx; h->delayed_pic[i]; i++)
3158 h->delayed_pic[i] = h->delayed_pic[i+1];
3160 if(!out_of_order && pics > s->avctx->has_b_frames){
3161 *data_size = sizeof(AVFrame);
3163 if(out_idx==0 && h->delayed_pic[0] && (h->delayed_pic[0]->key_frame || h->delayed_pic[0]->mmco_reset)) {
3164 h->outputed_poc = INT_MIN;
3166 h->outputed_poc = out->poc;
3167 *pict= *(AVFrame*)out;
3169 av_log(avctx, AV_LOG_DEBUG, "no picture\n");
3174 assert(pict->data[0] || !*data_size);
3175 ff_print_debug_info(s, pict);
3176 //printf("out %d\n", (int)pict->data[0]);
3178 return get_consumed_bytes(s, buf_index, buf_size);
3181 static inline void fill_mb_avail(H264Context *h){
3182 MpegEncContext * const s = &h->s;
3183 const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
3186 h->mb_avail[0]= s->mb_x && h->slice_table[mb_xy - s->mb_stride - 1] == h->slice_num;
3187 h->mb_avail[1]= h->slice_table[mb_xy - s->mb_stride ] == h->slice_num;
3188 h->mb_avail[2]= s->mb_x+1 < s->mb_width && h->slice_table[mb_xy - s->mb_stride + 1] == h->slice_num;
3194 h->mb_avail[3]= s->mb_x && h->slice_table[mb_xy - 1] == h->slice_num;
3195 h->mb_avail[4]= 1; //FIXME move out
3196 h->mb_avail[5]= 0; //FIXME move out
3204 #define SIZE (COUNT*40)
3210 // int int_temp[10000];
3212 AVCodecContext avctx;
3214 dsputil_init(&dsp, &avctx);
3216 init_put_bits(&pb, temp, SIZE);
3217 printf("testing unsigned exp golomb\n");
3218 for(i=0; i<COUNT; i++){
3220 set_ue_golomb(&pb, i);
3221 STOP_TIMER("set_ue_golomb");
3223 flush_put_bits(&pb);
3225 init_get_bits(&gb, temp, 8*SIZE);
3226 for(i=0; i<COUNT; i++){
3229 s= show_bits(&gb, 24);
3232 j= get_ue_golomb(&gb);
3234 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
3237 STOP_TIMER("get_ue_golomb");
3241 init_put_bits(&pb, temp, SIZE);
3242 printf("testing signed exp golomb\n");
3243 for(i=0; i<COUNT; i++){
3245 set_se_golomb(&pb, i - COUNT/2);
3246 STOP_TIMER("set_se_golomb");
3248 flush_put_bits(&pb);
3250 init_get_bits(&gb, temp, 8*SIZE);
3251 for(i=0; i<COUNT; i++){
3254 s= show_bits(&gb, 24);
3257 j= get_se_golomb(&gb);
3258 if(j != i - COUNT/2){
3259 printf("mismatch! at %d (%d should be %d) bits:%6X\n", i, j, i, s);
3262 STOP_TIMER("get_se_golomb");
3266 printf("testing 4x4 (I)DCT\n");
3269 uint8_t src[16], ref[16];
3270 uint64_t error= 0, max_error=0;
3272 for(i=0; i<COUNT; i++){
3274 // printf("%d %d %d\n", r1, r2, (r2-r1)*16);
3275 for(j=0; j<16; j++){
3276 ref[j]= random()%255;
3277 src[j]= random()%255;
3280 h264_diff_dct_c(block, src, ref, 4);
3283 for(j=0; j<16; j++){
3284 // printf("%d ", block[j]);
3285 block[j]= block[j]*4;
3286 if(j&1) block[j]= (block[j]*4 + 2)/5;
3287 if(j&4) block[j]= (block[j]*4 + 2)/5;
3291 h->h264dsp.h264_idct_add(ref, block, 4);
3292 /* for(j=0; j<16; j++){
3293 printf("%d ", ref[j]);
3297 for(j=0; j<16; j++){
3298 int diff= FFABS(src[j] - ref[j]);
3301 max_error= FFMAX(max_error, diff);
3304 printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
3305 printf("testing quantizer\n");
3306 for(qp=0; qp<52; qp++){
3308 src1_block[i]= src2_block[i]= random()%255;
3311 printf("Testing NAL layer\n");
3313 uint8_t bitstream[COUNT];
3314 uint8_t nal[COUNT*2];
3316 memset(&h, 0, sizeof(H264Context));
3318 for(i=0; i<COUNT; i++){
3326 for(j=0; j<COUNT; j++){
3327 bitstream[j]= (random() % 255) + 1;
3330 for(j=0; j<zeros; j++){
3331 int pos= random() % COUNT;
3332 while(bitstream[pos] == 0){
3341 nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
3343 printf("encoding failed\n");
3347 out= ff_h264_decode_nal(&h, nal, &out_length, &consumed, nal_length);
3351 if(out_length != COUNT){
3352 printf("incorrect length %d %d\n", out_length, COUNT);
3356 if(consumed != nal_length){
3357 printf("incorrect consumed length %d %d\n", nal_length, consumed);
3361 if(memcmp(bitstream, out, COUNT)){
3362 printf("mismatch\n");
3368 printf("Testing RBSP\n");
3376 av_cold void ff_h264_free_context(H264Context *h)
3380 free_tables(h, 1); //FIXME cleanup init stuff perhaps
3382 for(i = 0; i < MAX_SPS_COUNT; i++)
3383 av_freep(h->sps_buffers + i);
3385 for(i = 0; i < MAX_PPS_COUNT; i++)
3386 av_freep(h->pps_buffers + i);
3389 av_cold int ff_h264_decode_end(AVCodecContext *avctx)
3391 H264Context *h = avctx->priv_data;
3392 MpegEncContext *s = &h->s;
3394 ff_h264_free_context(h);
3398 // memset(h, 0, sizeof(H264Context));
3403 static const AVProfile profiles[] = {
3404 { FF_PROFILE_H264_BASELINE, "Baseline" },
3405 { FF_PROFILE_H264_CONSTRAINED_BASELINE, "Constrained Baseline" },
3406 { FF_PROFILE_H264_MAIN, "Main" },
3407 { FF_PROFILE_H264_EXTENDED, "Extended" },
3408 { FF_PROFILE_H264_HIGH, "High" },
3409 { FF_PROFILE_H264_HIGH_10, "High 10" },
3410 { FF_PROFILE_H264_HIGH_10_INTRA, "High 10 Intra" },
3411 { FF_PROFILE_H264_HIGH_422, "High 4:2:2" },
3412 { FF_PROFILE_H264_HIGH_422_INTRA, "High 4:2:2 Intra" },
3413 { FF_PROFILE_H264_HIGH_444, "High 4:4:4" },
3414 { FF_PROFILE_H264_HIGH_444_PREDICTIVE, "High 4:4:4 Predictive" },
3415 { FF_PROFILE_H264_HIGH_444_INTRA, "High 4:4:4 Intra" },
3416 { FF_PROFILE_H264_CAVLC_444, "CAVLC 4:4:4" },
3417 { FF_PROFILE_UNKNOWN },
3420 AVCodec ff_h264_decoder = {
3424 sizeof(H264Context),
3425 ff_h264_decode_init,
3429 /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_DELAY,
3431 .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),
3432 .profiles = NULL_IF_CONFIG_SMALL(profiles),
3435 #if CONFIG_H264_VDPAU_DECODER
3436 AVCodec ff_h264_vdpau_decoder = {
3440 sizeof(H264Context),
3441 ff_h264_decode_init,
3445 CODEC_CAP_DR1 | CODEC_CAP_DELAY | CODEC_CAP_HWACCEL_VDPAU,
3447 .long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10 (VDPAU acceleration)"),
3448 .pix_fmts = (const enum PixelFormat[]){PIX_FMT_VDPAU_H264, PIX_FMT_NONE},
3449 .profiles = NULL_IF_CONFIG_SMALL(profiles),