/*
- * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
+ * H.26L/H.264/AVC/JVT/14496-10/... decoder
* Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
*
- * This file is part of FFmpeg.
+ * This file is part of Libav.
*
- * FFmpeg is free software; you can redistribute it and/or
+ * Libav is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * FFmpeg is distributed in the hope that it will be useful,
+ * Libav is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
+ * License along with Libav; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
/**
- * @file libavcodec/h264.c
+ * @file
* H.264 / AVC / MPEG4 part10 codec.
* @author Michael Niedermayer <michaelni@gmx.at>
*/
+#include "libavutil/imgutils.h"
#include "internal.h"
#include "dsputil.h"
#include "avcodec.h"
#include "h264.h"
#include "h264data.h"
#include "h264_mvpred.h"
-#include "h264_parser.h"
#include "golomb.h"
#include "mathops.h"
#include "rectangle.h"
#include "vdpau_internal.h"
+#include "libavutil/avassert.h"
#include "cabac.h"
0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
};
+static const enum PixelFormat hwaccel_pixfmt_list_h264_jpeg_420[] = {
+ PIX_FMT_DXVA2_VLD,
+ PIX_FMT_VAAPI_VLD,
+ PIX_FMT_YUVJ420P,
+ PIX_FMT_NONE
+};
+
void ff_h264_write_back_intra_pred_mode(H264Context *h){
- const int mb_xy= h->mb_xy;
+ int8_t *mode= h->intra4x4_pred_mode + h->mb2br_xy[h->mb_xy];
- h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1];
- h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2];
- h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3];
- h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4];
- h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4];
- h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4];
- h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4];
+ AV_COPY32(mode, h->intra4x4_pred_mode_cache + 4 + 8*4);
+ mode[4]= h->intra4x4_pred_mode_cache[7+8*3];
+ mode[5]= h->intra4x4_pred_mode_cache[7+8*2];
+ mode[6]= h->intra4x4_pred_mode_cache[7+8*1];
}
/**
# if HAVE_FAST_64BIT
# define RS 7
for(i=0; i+1<length; i+=9){
- if(!((~*(const uint64_t*)(src+i) & (*(const uint64_t*)(src+i) - 0x0100010001000101ULL)) & 0x8000800080008080ULL))
+ if(!((~AV_RN64A(src+i) & (AV_RN64A(src+i) - 0x0100010001000101ULL)) & 0x8000800080008080ULL))
# else
# define RS 3
for(i=0; i+1<length; i+=5){
- if(!((~*(const uint32_t*)(src+i) & (*(const uint32_t*)(src+i) - 0x01000101U)) & 0x80008080U))
+ if(!((~AV_RN32A(src+i) & (AV_RN32A(src+i) - 0x01000101U)) & 0x80008080U))
# endif
continue;
if(i>0 && !src[i]) i--;
return dst;
}
-int ff_h264_decode_rbsp_trailing(H264Context *h, const uint8_t *src){
+/**
+ * Identify the exact end of the bitstream
+ * @return the length of the trailing, or 0 if damaged
+ */
+static int ff_h264_decode_rbsp_trailing(H264Context *h, const uint8_t *src){
int v= *src;
int r;
return 0;
}
-/**
- * IDCT transforms the 16 dc values and dequantizes them.
- * @param qp quantization parameter
- */
-static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
-#define stride 16
- int i;
- int temp[16]; //FIXME check if this is a good idea
- static const int x_offset[4]={0, 1*stride, 4* stride, 5*stride};
- static const int y_offset[4]={0, 2*stride, 8* stride, 10*stride};
-
-//memset(block, 64, 2*256);
-//return;
- for(i=0; i<4; i++){
- const int offset= y_offset[i];
- const int z0= block[offset+stride*0] + block[offset+stride*4];
- const int z1= block[offset+stride*0] - block[offset+stride*4];
- const int z2= block[offset+stride*1] - block[offset+stride*5];
- const int z3= block[offset+stride*1] + block[offset+stride*5];
-
- temp[4*i+0]= z0+z3;
- temp[4*i+1]= z1+z2;
- temp[4*i+2]= z1-z2;
- temp[4*i+3]= z0-z3;
- }
-
- for(i=0; i<4; i++){
- const int offset= x_offset[i];
- const int z0= temp[4*0+i] + temp[4*2+i];
- const int z1= temp[4*0+i] - temp[4*2+i];
- const int z2= temp[4*1+i] - temp[4*3+i];
- const int z3= temp[4*1+i] + temp[4*3+i];
-
- block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_residual
- block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
- block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
- block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
- }
-}
-
#if 0
/**
* DCT transforms the 16 dc values.
#undef xStride
#undef stride
-static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
+static void chroma_dc_dequant_idct_c(DCTELEM *block, int qmul){
const int stride= 16*2;
const int xStride= 16;
int a,b,c,d,e;
|| full_my < 0-extra_height
|| full_mx + 16/*FIXME*/ > pic_width + extra_width
|| full_my + 16/*FIXME*/ > pic_height + extra_height){
- ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
+ s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*h->mb_linesize, h->mb_linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
src_y= s->edge_emu_buffer + 2 + 2*h->mb_linesize;
emu=1;
}
src_cr= pic->data[2] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
if(emu){
- ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
+ s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cb, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
src_cb= s->edge_emu_buffer;
}
chroma_op(dest_cb, src_cb, h->mb_uvlinesize, chroma_height, mx&7, my&7);
if(emu){
- ff_emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
+ s->dsp.emulated_edge_mc(s->edge_emu_buffer, src_cr, h->mb_uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
src_cr= s->edge_emu_buffer;
}
chroma_op(dest_cr, src_cr, h->mb_uvlinesize, chroma_height, mx&7, my&7);
x_offset, y_offset, qpix_put, chroma_put);
if(h->use_weight == 2){
- int weight0 = h->implicit_weight[refn0][refn1];
+ int weight0 = h->implicit_weight[refn0][refn1][s->mb_y&1];
int weight1 = 64 - weight0;
luma_weight_avg( dest_y, tmp_y, h-> mb_linesize, 5, weight0, weight1, 0);
chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, 5, weight0, weight1, 0);
chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, 5, weight0, weight1, 0);
}else{
luma_weight_avg(dest_y, tmp_y, h->mb_linesize, h->luma_log2_weight_denom,
- h->luma_weight[0][refn0], h->luma_weight[1][refn1],
- h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
+ h->luma_weight[refn0][0][0] , h->luma_weight[refn1][1][0],
+ h->luma_weight[refn0][0][1] + h->luma_weight[refn1][1][1]);
chroma_weight_avg(dest_cb, tmp_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
- h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
- h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
+ h->chroma_weight[refn0][0][0][0] , h->chroma_weight[refn1][1][0][0],
+ h->chroma_weight[refn0][0][0][1] + h->chroma_weight[refn1][1][0][1]);
chroma_weight_avg(dest_cr, tmp_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
- h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
- h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
+ h->chroma_weight[refn0][0][1][0] , h->chroma_weight[refn1][1][1][0],
+ h->chroma_weight[refn0][0][1][1] + h->chroma_weight[refn1][1][1][1]);
}
}else{
int list = list1 ? 1 : 0;
qpix_put, chroma_put);
luma_weight_op(dest_y, h->mb_linesize, h->luma_log2_weight_denom,
- h->luma_weight[list][refn], h->luma_offset[list][refn]);
+ h->luma_weight[refn][list][0], h->luma_weight[refn][list][1]);
if(h->use_weight_chroma){
chroma_weight_op(dest_cb, h->mb_uvlinesize, h->chroma_log2_weight_denom,
- h->chroma_weight[list][refn][0], h->chroma_offset[list][refn][0]);
+ h->chroma_weight[refn][list][0][0], h->chroma_weight[refn][list][0][1]);
chroma_weight_op(dest_cr, h->mb_uvlinesize, h->chroma_log2_weight_denom,
- h->chroma_weight[list][refn][1], h->chroma_offset[list][refn][1]);
+ h->chroma_weight[refn][list][1][0], h->chroma_weight[refn][list][1][1]);
}
}
}
h264_weight_func *weight_op, h264_biweight_func *weight_avg,
int list0, int list1){
if((h->use_weight==2 && list0 && list1
- && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
+ && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ][h->s.mb_y&1] != 32))
|| h->use_weight==1)
mc_part_weighted(h, n, square, chroma_height, delta, dest_y, dest_cb, dest_cr,
x_offset, y_offset, qpix_put, chroma_put,
}
-static void free_tables(H264Context *h){
+static void free_tables(H264Context *h, int free_rbsp){
int i;
H264Context *hx;
av_freep(&h->intra4x4_pred_mode);
av_freep(&h->list_counts);
av_freep(&h->mb2b_xy);
- av_freep(&h->mb2b8_xy);
+ av_freep(&h->mb2br_xy);
for(i = 0; i < MAX_THREADS; i++) {
hx = h->thread_context[i];
av_freep(&hx->top_borders[1]);
av_freep(&hx->top_borders[0]);
av_freep(&hx->s.obmc_scratchpad);
- av_freep(&hx->rbsp_buffer[1]);
- av_freep(&hx->rbsp_buffer[0]);
- hx->rbsp_buffer_size[0] = 0;
- hx->rbsp_buffer_size[1] = 0;
+ if (free_rbsp){
+ av_freep(&hx->rbsp_buffer[1]);
+ av_freep(&hx->rbsp_buffer[0]);
+ hx->rbsp_buffer_size[0] = 0;
+ hx->rbsp_buffer_size[1] = 0;
+ }
if (i) av_freep(&h->thread_context[i]);
}
}
static void init_dequant8_coeff_table(H264Context *h){
int i,q,x;
- const int transpose = (h->s.dsp.h264_idct8_add != ff_h264_idct8_add_c); //FIXME ugly
h->dequant8_coeff[0] = h->dequant8_buffer[0];
h->dequant8_coeff[1] = h->dequant8_buffer[1];
int shift = div6[q];
int idx = rem6[q];
for(x=0; x<64; x++)
- h->dequant8_coeff[i][q][transpose ? (x>>3)|((x&7)<<3) : x] =
+ h->dequant8_coeff[i][q][(x>>3)|((x&7)<<3)] =
((uint32_t)dequant8_coeff_init[idx][ dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] *
h->pps.scaling_matrix8[i][x]) << shift;
}
static void init_dequant4_coeff_table(H264Context *h){
int i,j,q,x;
- const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
for(i=0; i<6; i++ ){
h->dequant4_coeff[i] = h->dequant4_buffer[i];
for(j=0; j<i; j++){
int shift = div6[q] + 2;
int idx = rem6[q];
for(x=0; x<16; x++)
- h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
+ h->dequant4_coeff[i][q][(x>>2)|((x<<2)&0xF)] =
((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
h->pps.scaling_matrix4[i][x]) << shift;
}
int ff_h264_alloc_tables(H264Context *h){
MpegEncContext * const s = &h->s;
const int big_mb_num= s->mb_stride * (s->mb_height+1);
+ const int row_mb_num= 2*s->mb_stride*s->avctx->thread_count;
int x,y;
- FF_ALLOCZ_OR_GOTO(h->s.avctx, h->intra4x4_pred_mode, big_mb_num * 8 * sizeof(uint8_t), fail)
+ FF_ALLOCZ_OR_GOTO(h->s.avctx, h->intra4x4_pred_mode, row_mb_num * 8 * sizeof(uint8_t), fail)
FF_ALLOCZ_OR_GOTO(h->s.avctx, h->non_zero_count , big_mb_num * 32 * sizeof(uint8_t), fail)
FF_ALLOCZ_OR_GOTO(h->s.avctx, h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base), fail)
FF_ALLOCZ_OR_GOTO(h->s.avctx, h->cbp_table, big_mb_num * sizeof(uint16_t), fail)
FF_ALLOCZ_OR_GOTO(h->s.avctx, h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t), fail)
- FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t), fail);
- FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t), fail);
- FF_ALLOCZ_OR_GOTO(h->s.avctx, h->direct_table, 32*big_mb_num * sizeof(uint8_t) , fail);
+ FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mvd_table[0], 16*row_mb_num * sizeof(uint8_t), fail);
+ FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mvd_table[1], 16*row_mb_num * sizeof(uint8_t), fail);
+ FF_ALLOCZ_OR_GOTO(h->s.avctx, h->direct_table, 4*big_mb_num * sizeof(uint8_t) , fail);
FF_ALLOCZ_OR_GOTO(h->s.avctx, h->list_counts, big_mb_num * sizeof(uint8_t), fail)
memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base));
h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mb2b_xy , big_mb_num * sizeof(uint32_t), fail);
- FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mb2b8_xy , big_mb_num * sizeof(uint32_t), fail);
+ FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mb2br_xy , big_mb_num * sizeof(uint32_t), fail);
for(y=0; y<s->mb_height; y++){
for(x=0; x<s->mb_width; x++){
const int mb_xy= x + y*s->mb_stride;
const int b_xy = 4*x + 4*y*h->b_stride;
- const int b8_xy= 2*x + 2*y*h->b8_stride;
h->mb2b_xy [mb_xy]= b_xy;
- h->mb2b8_xy[mb_xy]= b8_xy;
+ h->mb2br_xy[mb_xy]= 8*(FMO ? mb_xy : (mb_xy % (2*s->mb_stride)));
}
}
return 0;
fail:
- free_tables(h);
+ free_tables(h, 1);
return -1;
}
/**
* Mimic alloc_tables(), but for every context thread.
*/
-static void clone_tables(H264Context *dst, H264Context *src){
- dst->intra4x4_pred_mode = src->intra4x4_pred_mode;
+static void clone_tables(H264Context *dst, H264Context *src, int i){
+ MpegEncContext * const s = &src->s;
+ dst->intra4x4_pred_mode = src->intra4x4_pred_mode + i*8*2*s->mb_stride;
dst->non_zero_count = src->non_zero_count;
dst->slice_table = src->slice_table;
dst->cbp_table = src->cbp_table;
dst->mb2b_xy = src->mb2b_xy;
- dst->mb2b8_xy = src->mb2b8_xy;
+ dst->mb2br_xy = src->mb2br_xy;
dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
- dst->mvd_table[0] = src->mvd_table[0];
- dst->mvd_table[1] = src->mvd_table[1];
+ dst->mvd_table[0] = src->mvd_table[0] + i*8*2*s->mb_stride;
+ dst->mvd_table[1] = src->mvd_table[1] + i*8*2*s->mb_stride;
dst->direct_table = src->direct_table;
dst->list_counts = src->list_counts;
FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t), fail)
FF_ALLOCZ_OR_GOTO(h->s.avctx, h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t), fail)
+ h->ref_cache[0][scan8[5 ]+1] = h->ref_cache[0][scan8[7 ]+1] = h->ref_cache[0][scan8[13]+1] =
+ h->ref_cache[1][scan8[5 ]+1] = h->ref_cache[1][scan8[7 ]+1] = h->ref_cache[1][scan8[13]+1] = PART_NOT_AVAILABLE;
+
return 0;
fail:
return -1; // free_tables will clean up for us
}
+static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size);
+
static av_cold void common_init(H264Context *h){
MpegEncContext * const s = &h->s;
s->height = s->avctx->height;
s->codec_id= s->avctx->codec->id;
+ ff_h264dsp_init(&h->h264dsp);
ff_h264_pred_init(&h->hpc, s->codec_id);
h->dequant_coeff_pps= -1;
memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
}
+int ff_h264_decode_extradata(H264Context *h)
+{
+ AVCodecContext *avctx = h->s.avctx;
+
+ if(*(char *)avctx->extradata == 1){
+ int i, cnt, nalsize;
+ unsigned char *p = avctx->extradata;
+
+ h->is_avc = 1;
+
+ if(avctx->extradata_size < 7) {
+ av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
+ return -1;
+ }
+ /* sps and pps in the avcC always have length coded with 2 bytes,
+ so put a fake nal_length_size = 2 while parsing them */
+ h->nal_length_size = 2;
+ // Decode sps from avcC
+ cnt = *(p+5) & 0x1f; // Number of sps
+ p += 6;
+ for (i = 0; i < cnt; i++) {
+ nalsize = AV_RB16(p) + 2;
+ if(decode_nal_units(h, p, nalsize) < 0) {
+ av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
+ return -1;
+ }
+ p += nalsize;
+ }
+ // Decode pps from avcC
+ cnt = *(p++); // Number of pps
+ for (i = 0; i < cnt; i++) {
+ nalsize = AV_RB16(p) + 2;
+ if(decode_nal_units(h, p, nalsize) != nalsize) {
+ av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
+ return -1;
+ }
+ p += nalsize;
+ }
+ // Now store right nal length size, that will be use to parse all other nals
+ h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
+ } else {
+ h->is_avc = 0;
+ if(decode_nal_units(h, avctx->extradata, avctx->extradata_size) < 0)
+ return -1;
+ }
+ return 0;
+}
+
av_cold int ff_h264_decode_init(AVCodecContext *avctx){
H264Context *h= avctx->priv_data;
MpegEncContext * const s = &h->s;
ff_h264_decode_init_vlc();
- if(avctx->extradata_size > 0 && avctx->extradata &&
- *(char *)avctx->extradata == 1){
- h->is_avc = 1;
- h->got_avcC = 0;
- } else {
- h->is_avc = 0;
- }
-
h->thread_context[0] = h;
h->outputed_poc = INT_MIN;
h->prev_poc_msb= 1<<16;
}
avctx->ticks_per_frame = 2;
}
+
+ if(avctx->extradata_size > 0 && avctx->extradata &&
+ ff_h264_decode_extradata(h))
+ return -1;
+
+ if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames < h->sps.num_reorder_frames){
+ s->avctx->has_b_frames = h->sps.num_reorder_frames;
+ s->low_delay = 0;
+ }
+
return 0;
}
/* can't be in alloc_tables because linesize isn't known there.
* FIXME: redo bipred weight to not require extra buffer? */
for(i = 0; i < s->avctx->thread_count; i++)
- if(!h->thread_context[i]->s.obmc_scratchpad)
+ if(h->thread_context[i] && !h->thread_context[i]->s.obmc_scratchpad)
h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
- /* some macroblocks will be accessed before they're available */
- if(FRAME_MBAFF || s->avctx->thread_count > 1)
- memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(*h->slice_table));
+ /* some macroblocks can be accessed before they're available in case of lost slices, mbaff or threading*/
+ memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(*h->slice_table));
// s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
MpegEncContext * const s = &h->s;
int deblock_left;
int deblock_top;
- int mb_xy;
int top_idx = 1;
uint8_t *top_border_m1;
uint8_t *top_border;
}
if(h->deblocking_filter == 2) {
- mb_xy = h->mb_xy;
- deblock_left = h->slice_table[mb_xy] == h->slice_table[mb_xy - 1];
- deblock_top = h->slice_table[mb_xy] == h->slice_table[h->top_mb_xy];
+ deblock_left = h->left_type[0];
+ deblock_top = h->top_type;
} else {
deblock_left = (s->mb_x > 0);
deblock_top = (s->mb_y > !!MB_FIELD);
idct_dc_add =
idct_add = s->dsp.add_pixels8;
}else{
- idct_dc_add = s->dsp.h264_idct8_dc_add;
- idct_add = s->dsp.h264_idct8_add;
+ idct_dc_add = h->h264dsp.h264_idct8_dc_add;
+ idct_add = h->h264dsp.h264_idct8_add;
}
for(i=0; i<16; i+=4){
uint8_t * const ptr= dest_y + block_offset[i];
idct_dc_add =
idct_add = s->dsp.add_pixels4;
}else{
- idct_dc_add = s->dsp.h264_idct_dc_add;
- idct_add = s->dsp.h264_idct_add;
+ idct_dc_add = h->h264dsp.h264_idct_dc_add;
+ idct_add = h->h264dsp.h264_idct_add;
}
for(i=0; i<16; i++){
uint8_t * const ptr= dest_y + block_offset[i];
}else{
h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
if(is_h264){
- if(!transform_bypass)
- h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
+ if(h->non_zero_count_cache[ scan8[LUMA_DC_BLOCK_INDEX] ]){
+ if(!transform_bypass)
+ h->h264dsp.h264_luma_dc_dequant_idct(h->mb, h->mb_luma_dc, h->dequant4_coeff[0][s->qscale][0]);
+ else{
+ static const uint8_t dc_mapping[16] = { 0*16, 1*16, 4*16, 5*16, 2*16, 3*16, 6*16, 7*16,
+ 8*16, 9*16,12*16,13*16,10*16,11*16,14*16,15*16};
+ for(i = 0; i < 16; i++)
+ h->mb[dc_mapping[i]] = h->mb_luma_dc[i];
+ }
+ }
}else
- ff_svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
+ ff_svq3_luma_dc_dequant_idct_c(h->mb, h->mb_luma_dc, s->qscale);
}
if(h->deblocking_filter)
xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0, simple);
hl_motion(h, dest_y, dest_cb, dest_cr,
s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab,
s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab,
- s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
+ h->h264dsp.weight_h264_pixels_tab, h->h264dsp.biweight_h264_pixels_tab);
}
}
}
}else{
- s->dsp.h264_idct_add16intra(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
+ h->h264dsp.h264_idct_add16intra(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
}
}else if(h->cbp&15){
if(transform_bypass){
}
}else{
if(IS_8x8DCT(mb_type)){
- s->dsp.h264_idct8_add4(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
+ h->h264dsp.h264_idct8_add4(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
}else{
- s->dsp.h264_idct_add16(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
+ h->h264dsp.h264_idct_add16(dest_y, block_offset, h->mb, linesize, h->non_zero_count_cache);
}
}
}
}
}
}else{
- chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp[0], h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
- chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp[1], h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
if(is_h264){
- idct_add = s->dsp.h264_idct_add;
- idct_dc_add = s->dsp.h264_idct_dc_add;
- for(i=16; i<16+8; i++){
- if(h->non_zero_count_cache[ scan8[i] ])
- idct_add (dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
- else if(h->mb[i*16])
- idct_dc_add(dest[(i&4)>>2] + block_offset[i], h->mb + i*16, uvlinesize);
- }
+ if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+0] ])
+ chroma_dc_dequant_idct_c(h->mb + 16*16 , h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
+ if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+1] ])
+ chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
+ h->h264dsp.h264_idct_add8(dest, block_offset,
+ h->mb, uvlinesize,
+ h->non_zero_count_cache);
}else{
+ chroma_dc_dequant_idct_c(h->mb + 16*16 , h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]);
+ chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]);
for(i=16; i<16+8; i++){
if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
uint8_t * const ptr= dest[(i&4)>>2] + block_offset[i];
h->use_weight= 0;
h->use_weight_chroma= 0;
h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
- h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
+ if(CHROMA)
+ h->chroma_log2_weight_denom= get_ue_golomb(&s->gb);
luma_def = 1<<h->luma_log2_weight_denom;
chroma_def = 1<<h->chroma_log2_weight_denom;
luma_weight_flag= get_bits1(&s->gb);
if(luma_weight_flag){
- h->luma_weight[list][i]= get_se_golomb(&s->gb);
- h->luma_offset[list][i]= get_se_golomb(&s->gb);
- if( h->luma_weight[list][i] != luma_def
- || h->luma_offset[list][i] != 0) {
+ h->luma_weight[i][list][0]= get_se_golomb(&s->gb);
+ h->luma_weight[i][list][1]= get_se_golomb(&s->gb);
+ if( h->luma_weight[i][list][0] != luma_def
+ || h->luma_weight[i][list][1] != 0) {
h->use_weight= 1;
h->luma_weight_flag[list]= 1;
}
}else{
- h->luma_weight[list][i]= luma_def;
- h->luma_offset[list][i]= 0;
+ h->luma_weight[i][list][0]= luma_def;
+ h->luma_weight[i][list][1]= 0;
}
if(CHROMA){
if(chroma_weight_flag){
int j;
for(j=0; j<2; j++){
- h->chroma_weight[list][i][j]= get_se_golomb(&s->gb);
- h->chroma_offset[list][i][j]= get_se_golomb(&s->gb);
- if( h->chroma_weight[list][i][j] != chroma_def
- || h->chroma_offset[list][i][j] != 0) {
+ h->chroma_weight[i][list][j][0]= get_se_golomb(&s->gb);
+ h->chroma_weight[i][list][j][1]= get_se_golomb(&s->gb);
+ if( h->chroma_weight[i][list][j][0] != chroma_def
+ || h->chroma_weight[i][list][j][1] != 0) {
h->use_weight_chroma= 1;
h->chroma_weight_flag[list]= 1;
}
}else{
int j;
for(j=0; j<2; j++){
- h->chroma_weight[list][i][j]= chroma_def;
- h->chroma_offset[list][i][j]= 0;
+ h->chroma_weight[i][list][j][0]= chroma_def;
+ h->chroma_weight[i][list][j][1]= 0;
}
}
}
return 0;
}
-static void implicit_weight_table(H264Context *h){
+/**
+ * Initialize implicit_weight table.
+ * @param field 0/1 initialize the weight for interlaced MBAFF
+ * -1 initializes the rest
+ */
+static void implicit_weight_table(H264Context *h, int field){
MpegEncContext * const s = &h->s;
- int ref0, ref1, i;
- int cur_poc = s->current_picture_ptr->poc;
+ int ref0, ref1, i, cur_poc, ref_start, ref_count0, ref_count1;
for (i = 0; i < 2; i++) {
h->luma_weight_flag[i] = 0;
h->chroma_weight_flag[i] = 0;
}
- if( h->ref_count[0] == 1 && h->ref_count[1] == 1
+ if(field < 0){
+ cur_poc = s->current_picture_ptr->poc;
+ if( h->ref_count[0] == 1 && h->ref_count[1] == 1 && !FRAME_MBAFF
&& h->ref_list[0][0].poc + h->ref_list[1][0].poc == 2*cur_poc){
h->use_weight= 0;
h->use_weight_chroma= 0;
return;
}
+ ref_start= 0;
+ ref_count0= h->ref_count[0];
+ ref_count1= h->ref_count[1];
+ }else{
+ cur_poc = s->current_picture_ptr->field_poc[field];
+ ref_start= 16;
+ ref_count0= 16+2*h->ref_count[0];
+ ref_count1= 16+2*h->ref_count[1];
+ }
h->use_weight= 2;
h->use_weight_chroma= 2;
h->luma_log2_weight_denom= 5;
h->chroma_log2_weight_denom= 5;
- for(ref0=0; ref0 < h->ref_count[0]; ref0++){
+ for(ref0=ref_start; ref0 < ref_count0; ref0++){
int poc0 = h->ref_list[0][ref0].poc;
- for(ref1=0; ref1 < h->ref_count[1]; ref1++){
+ for(ref1=ref_start; ref1 < ref_count1; ref1++){
int poc1 = h->ref_list[1][ref1].poc;
int td = av_clip(poc1 - poc0, -128, 127);
+ int w= 32;
if(td){
int tb = av_clip(cur_poc - poc0, -128, 127);
int tx = (16384 + (FFABS(td) >> 1)) / td;
- int dist_scale_factor = av_clip((tb*tx + 32) >> 6, -1024, 1023) >> 2;
- if(dist_scale_factor < -64 || dist_scale_factor > 128)
- h->implicit_weight[ref0][ref1] = 32;
- else
- h->implicit_weight[ref0][ref1] = 64 - dist_scale_factor;
- }else
- h->implicit_weight[ref0][ref1] = 32;
+ int dist_scale_factor = (tb*tx + 32) >> 8;
+ if(dist_scale_factor >= -64 && dist_scale_factor <= 128)
+ w = 64 - dist_scale_factor;
+ }
+ if(field<0){
+ h->implicit_weight[ref0][ref1][0]=
+ h->implicit_weight[ref0][ref1][1]= w;
+ }else{
+ h->implicit_weight[ref0][ref1][field]=w;
+ }
}
}
}
* initialize scan tables
*/
static void init_scan_tables(H264Context *h){
- MpegEncContext * const s = &h->s;
int i;
- if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
- memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
- memcpy(h-> field_scan, field_scan, 16*sizeof(uint8_t));
- }else{
- for(i=0; i<16; i++){
+ for(i=0; i<16; i++){
#define T(x) (x>>2) | ((x<<2) & 0xF)
- h->zigzag_scan[i] = T(zigzag_scan[i]);
- h-> field_scan[i] = T( field_scan[i]);
+ h->zigzag_scan[i] = T(zigzag_scan[i]);
+ h-> field_scan[i] = T( field_scan[i]);
#undef T
- }
}
- if(s->dsp.h264_idct8_add == ff_h264_idct8_add_c){
- memcpy(h->zigzag_scan8x8, ff_zigzag_direct, 64*sizeof(uint8_t));
- memcpy(h->zigzag_scan8x8_cavlc, zigzag_scan8x8_cavlc, 64*sizeof(uint8_t));
- memcpy(h->field_scan8x8, field_scan8x8, 64*sizeof(uint8_t));
- memcpy(h->field_scan8x8_cavlc, field_scan8x8_cavlc, 64*sizeof(uint8_t));
- }else{
- for(i=0; i<64; i++){
+ for(i=0; i<64; i++){
#define T(x) (x>>3) | ((x&7)<<3)
- h->zigzag_scan8x8[i] = T(ff_zigzag_direct[i]);
- h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
- h->field_scan8x8[i] = T(field_scan8x8[i]);
- h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
+ h->zigzag_scan8x8[i] = T(ff_zigzag_direct[i]);
+ h->zigzag_scan8x8_cavlc[i] = T(zigzag_scan8x8_cavlc[i]);
+ h->field_scan8x8[i] = T(field_scan8x8[i]);
+ h->field_scan8x8_cavlc[i] = T(field_scan8x8_cavlc[i]);
#undef T
- }
}
if(h->sps.transform_bypass){ //FIXME same ugly
h->zigzag_scan_q0 = zigzag_scan;
}
/**
- * Replicates H264 "master" context to thread contexts.
+ * Replicate H264 "master" context to thread contexts.
*/
static void clone_slice(H264Context *dst, H264Context *src)
{
memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff));
}
+/**
+ * computes profile from profile_idc and constraint_set?_flags
+ *
+ * @param sps SPS
+ *
+ * @return profile as defined by FF_PROFILE_H264_*
+ */
+int ff_h264_get_profile(SPS *sps)
+{
+ int profile = sps->profile_idc;
+
+ switch(sps->profile_idc) {
+ case FF_PROFILE_H264_BASELINE:
+ // constraint_set1_flag set to 1
+ profile |= (sps->constraint_set_flags & 1<<1) ? FF_PROFILE_H264_CONSTRAINED : 0;
+ break;
+ case FF_PROFILE_H264_HIGH_10:
+ case FF_PROFILE_H264_HIGH_422:
+ case FF_PROFILE_H264_HIGH_444_PREDICTIVE:
+ // constraint_set3_flag set to 1
+ profile |= (sps->constraint_set_flags & 1<<3) ? FF_PROFILE_H264_INTRA : 0;
+ break;
+ }
+
+ return profile;
+}
+
/**
* decodes a slice header.
* This will also call MPV_common_init() and frame_start() as needed.
}
h->sps = *h0->sps_buffers[h->pps.sps_id];
+ s->avctx->profile = ff_h264_get_profile(&h->sps);
+ s->avctx->level = h->sps.level_idc;
+ s->avctx->refs = h->sps.ref_frame_count;
+
if(h == h0 && h->dequant_coeff_pps != pps_id){
h->dequant_coeff_pps = pps_id;
init_dequant_tables(h);
s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
h->b_stride= s->mb_width*4;
- h->b8_stride= s->mb_width*2;
s->width = 16*s->mb_width - 2*FFMIN(h->sps.crop_right, 7);
if(h->sps.frame_mbs_only_flag)
s->height= 16*s->mb_height - 2*FFMIN(h->sps.crop_bottom, 7);
else
- s->height= 16*s->mb_height - 4*FFMIN(h->sps.crop_bottom, 3);
+ s->height= 16*s->mb_height - 4*FFMIN(h->sps.crop_bottom, 7);
if (s->context_initialized
- && ( s->width != s->avctx->width || s->height != s->avctx->height)) {
+ && ( s->width != s->avctx->width || s->height != s->avctx->height
+ || av_cmp_q(h->sps.sar, s->avctx->sample_aspect_ratio))) {
if(h != h0)
return -1; // width / height changed during parallelized decoding
- free_tables(h);
+ free_tables(h, 0);
flush_dpb(s->avctx);
MPV_common_end(s);
}
avcodec_set_dimensions(s->avctx, s->width, s->height);
s->avctx->sample_aspect_ratio= h->sps.sar;
- if(!s->avctx->sample_aspect_ratio.den)
- s->avctx->sample_aspect_ratio.den = 1;
+ av_assert0(s->avctx->sample_aspect_ratio.den);
if(h->sps.video_signal_type_present_flag){
s->avctx->color_range = h->sps.full_range ? AVCOL_RANGE_JPEG : AVCOL_RANGE_MPEG;
av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
h->sps.num_units_in_tick, den, 1<<30);
}
- s->avctx->pix_fmt = s->avctx->get_format(s->avctx, s->avctx->codec->pix_fmts);
+ s->avctx->pix_fmt = s->avctx->get_format(s->avctx,
+ s->avctx->codec->pix_fmts ?
+ s->avctx->codec->pix_fmts :
+ s->avctx->color_range == AVCOL_RANGE_JPEG ?
+ hwaccel_pixfmt_list_h264_jpeg_420 :
+ ff_hwaccel_pixfmt_list_420);
s->avctx->hwaccel = ff_find_hwaccel(s->avctx->codec->id, s->avctx->pix_fmt);
if (MPV_common_init(s) < 0)
c = h->thread_context[i] = av_malloc(sizeof(H264Context));
memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
+ c->h264dsp = h->h264dsp;
c->sps = h->sps;
c->pps = h->pps;
init_scan_tables(c);
- clone_tables(c, h);
+ clone_tables(c, h, i);
}
for(i = 0; i < s->avctx->thread_count; i++)
if(h0->current_slice == 0){
while(h->frame_num != h->prev_frame_num &&
h->frame_num != (h->prev_frame_num+1)%(1<<h->sps.log2_max_frame_num)){
- av_log(NULL, AV_LOG_DEBUG, "Frame num gap %d %d\n", h->frame_num, h->prev_frame_num);
+ Picture *prev = h->short_ref_count ? h->short_ref[0] : NULL;
+ av_log(h->s.avctx, AV_LOG_DEBUG, "Frame num gap %d %d\n", h->frame_num, h->prev_frame_num);
if (ff_h264_frame_start(h) < 0)
return -1;
h->prev_frame_num++;
h->prev_frame_num %= 1<<h->sps.log2_max_frame_num;
s->current_picture_ptr->frame_num= h->prev_frame_num;
- ff_h264_execute_ref_pic_marking(h, NULL, 0);
+ ff_generate_sliding_window_mmcos(h);
+ ff_h264_execute_ref_pic_marking(h, h->mmco, h->mmco_index);
+ /* Error concealment: if a ref is missing, copy the previous ref in its place.
+ * FIXME: avoiding a memcpy would be nice, but ref handling makes many assumptions
+ * about there being no actual duplicates.
+ * FIXME: this doesn't copy padding for out-of-frame motion vectors. Given we're
+ * concealing a lost frame, this probably isn't noticable by comparison, but it should
+ * be fixed. */
+ if (h->short_ref_count) {
+ if (prev) {
+ av_image_copy(h->short_ref[0]->data, h->short_ref[0]->linesize,
+ (const uint8_t**)prev->data, prev->linesize,
+ s->avctx->pix_fmt, s->mb_width*16, s->mb_height*16);
+ h->short_ref[0]->poc = prev->poc+2;
+ }
+ h->short_ref[0]->frame_num = h->prev_frame_num;
+ }
}
/* See if we have a decoded first field looking for a pair... */
if( (h->pps.weighted_pred && h->slice_type_nos == FF_P_TYPE )
|| (h->pps.weighted_bipred_idc==1 && h->slice_type_nos== FF_B_TYPE ) )
pred_weight_table(h);
- else if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== FF_B_TYPE)
- implicit_weight_table(h);
- else {
+ else if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== FF_B_TYPE){
+ implicit_weight_table(h, -1);
+ }else {
h->use_weight = 0;
for (i = 0; i < 2; i++) {
h->luma_weight_flag[i] = 0;
if(h->nal_ref_idc)
ff_h264_decode_ref_pic_marking(h0, &s->gb);
- if(FRAME_MBAFF)
+ if(FRAME_MBAFF){
ff_h264_fill_mbaff_ref_list(h);
+ if(h->pps.weighted_bipred_idc==2 && h->slice_type_nos== FF_B_TYPE){
+ implicit_weight_table(h, 0);
+ implicit_weight_table(h, 1);
+ }
+ }
+
if(h->slice_type_nos==FF_B_TYPE && !h->direct_spatial_mv_pred)
ff_h264_direct_dist_scale_factor(h);
ff_h264_direct_ref_list_init(h);
h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
- s->avctx->refs= h->sps.ref_frame_count;
-
if(s->avctx->debug&FF_DEBUG_PICT_INFO){
av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c%s%s pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s %s\n",
h->slice_num,
}
}
+/**
+ *
+ * @return non zero if the loop filter can be skiped
+ */
+static int fill_filter_caches(H264Context *h, int mb_type){
+ MpegEncContext * const s = &h->s;
+ const int mb_xy= h->mb_xy;
+ int top_xy, left_xy[2];
+ int top_type, left_type[2];
+
+ top_xy = mb_xy - (s->mb_stride << MB_FIELD);
+
+ //FIXME deblocking could skip the intra and nnz parts.
+
+ /* Wow, what a mess, why didn't they simplify the interlacing & intra
+ * stuff, I can't imagine that these complex rules are worth it. */
+
+ left_xy[1] = left_xy[0] = mb_xy-1;
+ if(FRAME_MBAFF){
+ const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]);
+ const int curr_mb_field_flag = IS_INTERLACED(mb_type);
+ if(s->mb_y&1){
+ if (left_mb_field_flag != curr_mb_field_flag) {
+ left_xy[0] -= s->mb_stride;
+ }
+ }else{
+ if(curr_mb_field_flag){
+ top_xy += s->mb_stride & (((s->current_picture.mb_type[top_xy ]>>7)&1)-1);
+ }
+ if (left_mb_field_flag != curr_mb_field_flag) {
+ left_xy[1] += s->mb_stride;
+ }
+ }
+ }
+
+ h->top_mb_xy = top_xy;
+ h->left_mb_xy[0] = left_xy[0];
+ h->left_mb_xy[1] = left_xy[1];
+ {
+ //for sufficiently low qp, filtering wouldn't do anything
+ //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
+ int qp_thresh = h->qp_thresh; //FIXME strictly we should store qp_thresh for each mb of a slice
+ int qp = s->current_picture.qscale_table[mb_xy];
+ if(qp <= qp_thresh
+ && (left_xy[0]<0 || ((qp + s->current_picture.qscale_table[left_xy[0]] + 1)>>1) <= qp_thresh)
+ && (top_xy < 0 || ((qp + s->current_picture.qscale_table[top_xy ] + 1)>>1) <= qp_thresh)){
+ if(!FRAME_MBAFF)
+ return 1;
+ if( (left_xy[0]< 0 || ((qp + s->current_picture.qscale_table[left_xy[1] ] + 1)>>1) <= qp_thresh)
+ && (top_xy < s->mb_stride || ((qp + s->current_picture.qscale_table[top_xy -s->mb_stride] + 1)>>1) <= qp_thresh))
+ return 1;
+ }
+ }
+
+ top_type = s->current_picture.mb_type[top_xy] ;
+ left_type[0] = s->current_picture.mb_type[left_xy[0]];
+ left_type[1] = s->current_picture.mb_type[left_xy[1]];
+ if(h->deblocking_filter == 2){
+ if(h->slice_table[top_xy ] != h->slice_num) top_type= 0;
+ if(h->slice_table[left_xy[0] ] != h->slice_num) left_type[0]= left_type[1]= 0;
+ }else{
+ if(h->slice_table[top_xy ] == 0xFFFF) top_type= 0;
+ if(h->slice_table[left_xy[0] ] == 0xFFFF) left_type[0]= left_type[1] =0;
+ }
+ h->top_type = top_type ;
+ h->left_type[0]= left_type[0];
+ h->left_type[1]= left_type[1];
+
+ if(IS_INTRA(mb_type))
+ return 0;
+
+ AV_COPY64(&h->non_zero_count_cache[0+8*1], &h->non_zero_count[mb_xy][ 0]);
+ AV_COPY64(&h->non_zero_count_cache[0+8*2], &h->non_zero_count[mb_xy][ 8]);
+ AV_COPY32(&h->non_zero_count_cache[0+8*5], &h->non_zero_count[mb_xy][16]);
+ AV_COPY32(&h->non_zero_count_cache[4+8*3], &h->non_zero_count[mb_xy][20]);
+ AV_COPY64(&h->non_zero_count_cache[0+8*4], &h->non_zero_count[mb_xy][24]);
+
+ h->cbp= h->cbp_table[mb_xy];
+
+ {
+ int list;
+ for(list=0; list<h->list_count; list++){
+ int8_t *ref;
+ int y, b_stride;
+ int16_t (*mv_dst)[2];
+ int16_t (*mv_src)[2];
+
+ if(!USES_LIST(mb_type, list)){
+ fill_rectangle( h->mv_cache[list][scan8[0]], 4, 4, 8, pack16to32(0,0), 4);
+ AV_WN32A(&h->ref_cache[list][scan8[ 0]], ((LIST_NOT_USED)&0xFF)*0x01010101u);
+ AV_WN32A(&h->ref_cache[list][scan8[ 2]], ((LIST_NOT_USED)&0xFF)*0x01010101u);
+ AV_WN32A(&h->ref_cache[list][scan8[ 8]], ((LIST_NOT_USED)&0xFF)*0x01010101u);
+ AV_WN32A(&h->ref_cache[list][scan8[10]], ((LIST_NOT_USED)&0xFF)*0x01010101u);
+ continue;
+ }
+
+ ref = &s->current_picture.ref_index[list][4*mb_xy];
+ {
+ int (*ref2frm)[64] = h->ref2frm[ h->slice_num&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
+ AV_WN32A(&h->ref_cache[list][scan8[ 0]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101);
+ AV_WN32A(&h->ref_cache[list][scan8[ 2]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101);
+ ref += 2;
+ AV_WN32A(&h->ref_cache[list][scan8[ 8]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101);
+ AV_WN32A(&h->ref_cache[list][scan8[10]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101);
+ }
+
+ b_stride = h->b_stride;
+ mv_dst = &h->mv_cache[list][scan8[0]];
+ mv_src = &s->current_picture.motion_val[list][4*s->mb_x + 4*s->mb_y*b_stride];
+ for(y=0; y<4; y++){
+ AV_COPY128(mv_dst + 8*y, mv_src + y*b_stride);
+ }
+
+ }
+ }
+
+
+/*
+0 . T T. T T T T
+1 L . .L . . . .
+2 L . .L . . . .
+3 . T TL . . . .
+4 L . .L . . . .
+5 L . .. . . . .
+*/
+//FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
+ if(top_type){
+ AV_COPY32(&h->non_zero_count_cache[4+8*0], &h->non_zero_count[top_xy][4+3*8]);
+ }
+
+ if(left_type[0]){
+ h->non_zero_count_cache[3+8*1]= h->non_zero_count[left_xy[0]][7+0*8];
+ h->non_zero_count_cache[3+8*2]= h->non_zero_count[left_xy[0]][7+1*8];
+ h->non_zero_count_cache[3+8*3]= h->non_zero_count[left_xy[0]][7+2*8];
+ h->non_zero_count_cache[3+8*4]= h->non_zero_count[left_xy[0]][7+3*8];
+ }
+
+ // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs
+ if(!CABAC && h->pps.transform_8x8_mode){
+ if(IS_8x8DCT(top_type)){
+ h->non_zero_count_cache[4+8*0]=
+ h->non_zero_count_cache[5+8*0]= h->cbp_table[top_xy] & 4;
+ h->non_zero_count_cache[6+8*0]=
+ h->non_zero_count_cache[7+8*0]= h->cbp_table[top_xy] & 8;
+ }
+ if(IS_8x8DCT(left_type[0])){
+ h->non_zero_count_cache[3+8*1]=
+ h->non_zero_count_cache[3+8*2]= h->cbp_table[left_xy[0]]&2; //FIXME check MBAFF
+ }
+ if(IS_8x8DCT(left_type[1])){
+ h->non_zero_count_cache[3+8*3]=
+ h->non_zero_count_cache[3+8*4]= h->cbp_table[left_xy[1]]&8; //FIXME check MBAFF
+ }
+
+ if(IS_8x8DCT(mb_type)){
+ h->non_zero_count_cache[scan8[0 ]]= h->non_zero_count_cache[scan8[1 ]]=
+ h->non_zero_count_cache[scan8[2 ]]= h->non_zero_count_cache[scan8[3 ]]= h->cbp & 1;
+
+ h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]=
+ h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= h->cbp & 2;
+
+ h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]=
+ h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= h->cbp & 4;
+
+ h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]=
+ h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= h->cbp & 8;
+ }
+ }
+
+ if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
+ int list;
+ for(list=0; list<h->list_count; list++){
+ if(USES_LIST(top_type, list)){
+ const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
+ const int b8_xy= 4*top_xy + 2;
+ int (*ref2frm)[64] = h->ref2frm[ h->slice_table[top_xy]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
+ AV_COPY128(h->mv_cache[list][scan8[0] + 0 - 1*8], s->current_picture.motion_val[list][b_xy + 0]);
+ h->ref_cache[list][scan8[0] + 0 - 1*8]=
+ h->ref_cache[list][scan8[0] + 1 - 1*8]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 0]];
+ h->ref_cache[list][scan8[0] + 2 - 1*8]=
+ h->ref_cache[list][scan8[0] + 3 - 1*8]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 1]];
+ }else{
+ AV_ZERO128(h->mv_cache[list][scan8[0] + 0 - 1*8]);
+ AV_WN32A(&h->ref_cache[list][scan8[0] + 0 - 1*8], ((LIST_NOT_USED)&0xFF)*0x01010101u);
+ }
+
+ if(!IS_INTERLACED(mb_type^left_type[0])){
+ if(USES_LIST(left_type[0], list)){
+ const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
+ const int b8_xy= 4*left_xy[0] + 1;
+ int (*ref2frm)[64] = h->ref2frm[ h->slice_table[left_xy[0]]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
+ AV_COPY32(h->mv_cache[list][scan8[0] - 1 + 0 ], s->current_picture.motion_val[list][b_xy + h->b_stride*0]);
+ AV_COPY32(h->mv_cache[list][scan8[0] - 1 + 8 ], s->current_picture.motion_val[list][b_xy + h->b_stride*1]);
+ AV_COPY32(h->mv_cache[list][scan8[0] - 1 +16 ], s->current_picture.motion_val[list][b_xy + h->b_stride*2]);
+ AV_COPY32(h->mv_cache[list][scan8[0] - 1 +24 ], s->current_picture.motion_val[list][b_xy + h->b_stride*3]);
+ h->ref_cache[list][scan8[0] - 1 + 0 ]=
+ h->ref_cache[list][scan8[0] - 1 + 8 ]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 2*0]];
+ h->ref_cache[list][scan8[0] - 1 +16 ]=
+ h->ref_cache[list][scan8[0] - 1 +24 ]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 2*1]];
+ }else{
+ AV_ZERO32(h->mv_cache [list][scan8[0] - 1 + 0 ]);
+ AV_ZERO32(h->mv_cache [list][scan8[0] - 1 + 8 ]);
+ AV_ZERO32(h->mv_cache [list][scan8[0] - 1 +16 ]);
+ AV_ZERO32(h->mv_cache [list][scan8[0] - 1 +24 ]);
+ h->ref_cache[list][scan8[0] - 1 + 0 ]=
+ h->ref_cache[list][scan8[0] - 1 + 8 ]=
+ h->ref_cache[list][scan8[0] - 1 + 16 ]=
+ h->ref_cache[list][scan8[0] - 1 + 24 ]= LIST_NOT_USED;
+ }
+ }
+ }
+ }
+
+ return 0;
+}
+
static void loop_filter(H264Context *h){
MpegEncContext * const s = &h->s;
uint8_t *dest_y, *dest_cb, *dest_cr;
h->slice_type= old_slice_type;
s->mb_x= 0;
s->mb_y= end_mb_y - FRAME_MBAFF;
+ h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
+ h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
+}
+
+static void predict_field_decoding_flag(H264Context *h){
+ MpegEncContext * const s = &h->s;
+ const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
+ int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
+ ? s->current_picture.mb_type[mb_xy-1]
+ : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
+ ? s->current_picture.mb_type[mb_xy-s->mb_stride]
+ : 0;
+ h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
}
static int decode_slice(struct AVCodecContext *avctx, void *arg){
++s->mb_y;
if(FIELD_OR_MBAFF_PICTURE) {
++s->mb_y;
+ if(FRAME_MBAFF && s->mb_y < s->mb_height)
+ predict_field_decoding_flag(h);
}
}
++s->mb_y;
if(FIELD_OR_MBAFF_PICTURE) {
++s->mb_y;
+ if(FRAME_MBAFF && s->mb_y < s->mb_height)
+ predict_field_decoding_flag(h);
}
if(s->mb_y >= s->mb_height){
tprintf(s->avctx, "slice end %d %d\n", get_bits_count(&s->gb), s->gb.size_in_bits);
nalsize = 0;
for(i = 0; i < h->nal_length_size; i++)
nalsize = (nalsize << 8) | buf[buf_index++];
- if(nalsize <= 1 || nalsize > buf_size - buf_index){
- if(nalsize == 1){
- buf_index++;
- continue;
- }else{
- av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
- break;
- }
+ if(nalsize <= 0 || nalsize > buf_size - buf_index){
+ av_log(h->s.avctx, AV_LOG_ERROR, "AVC: nal size %d\n", nalsize);
+ break;
}
next_avc= buf_index + nalsize;
} else {
buf_index += consumed;
- if( (s->hurry_up == 1 && h->nal_ref_idc == 0) //FIXME do not discard SEI id
- ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
+ //FIXME do not discard SEI id
+ if(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0)
continue;
again:
if((err = decode_slice_header(hx, h)))
break;
- avctx->profile = hx->sps.profile_idc;
- avctx->level = hx->sps.level_idc;
-
- if (s->avctx->hwaccel && h->current_slice == 1) {
- if (s->avctx->hwaccel->start_frame(s->avctx, NULL, 0) < 0)
+ if (h->current_slice == 1) {
+ if (s->avctx->hwaccel && s->avctx->hwaccel->start_frame(s->avctx, NULL, 0) < 0)
return -1;
+ if(CONFIG_H264_VDPAU_DECODER && s->avctx->codec->capabilities&CODEC_CAP_HWACCEL_VDPAU)
+ ff_vdpau_h264_picture_start(s);
}
s->current_picture_ptr->key_frame |=
(hx->nal_unit_type == NAL_IDR_SLICE) ||
(h->sei_recovery_frame_cnt >= 0);
- if(hx->redundant_pic_count==0 && hx->s.hurry_up < 5
+ if(hx->redundant_pic_count==0
&& (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
&& (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
&& (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
if ((err = decode_slice_header(hx, h)) < 0)
break;
- avctx->profile = hx->sps.profile_idc;
- avctx->level = hx->sps.level_idc;
-
hx->s.data_partitioning = 1;
break;
if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
&& s->context_initialized
- && s->hurry_up < 5
&& (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
&& (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type_nos!=FF_B_TYPE)
&& (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type_nos==FF_I_TYPE)
s->flags2= avctx->flags2;
/* end of stream, output what is still in the buffers */
+ out:
if (buf_size == 0) {
Picture *out;
int i, out_idx;
return 0;
}
- if(h->is_avc && !h->got_avcC) {
- int i, cnt, nalsize;
- unsigned char *p = avctx->extradata;
- if(avctx->extradata_size < 7) {
- av_log(avctx, AV_LOG_ERROR, "avcC too short\n");
- return -1;
- }
- if(*p != 1) {
- av_log(avctx, AV_LOG_ERROR, "Unknown avcC version %d\n", *p);
- return -1;
- }
- /* sps and pps in the avcC always have length coded with 2 bytes,
- so put a fake nal_length_size = 2 while parsing them */
- h->nal_length_size = 2;
- // Decode sps from avcC
- cnt = *(p+5) & 0x1f; // Number of sps
- p += 6;
- for (i = 0; i < cnt; i++) {
- nalsize = AV_RB16(p) + 2;
- if(decode_nal_units(h, p, nalsize) < 0) {
- av_log(avctx, AV_LOG_ERROR, "Decoding sps %d from avcC failed\n", i);
- return -1;
- }
- p += nalsize;
- }
- // Decode pps from avcC
- cnt = *(p++); // Number of pps
- for (i = 0; i < cnt; i++) {
- nalsize = AV_RB16(p) + 2;
- if(decode_nal_units(h, p, nalsize) != nalsize) {
- av_log(avctx, AV_LOG_ERROR, "Decoding pps %d from avcC failed\n", i);
- return -1;
- }
- p += nalsize;
- }
- // Now store right nal length size, that will be use to parse all other nals
- h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
- // Do not reparse avcC
- h->got_avcC = 1;
- }
-
- if(!h->got_avcC && !h->is_avc && s->avctx->extradata_size){
- if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
- return -1;
- h->got_avcC = 1;
- }
-
buf_index=decode_nal_units(h, buf, buf_size);
if(buf_index < 0)
return -1;
+ if (!s->current_picture_ptr && h->nal_unit_type == NAL_END_SEQUENCE) {
+ buf_size = 0;
+ goto out;
+ }
+
if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
- if (avctx->skip_frame >= AVDISCARD_NONREF || s->hurry_up) return 0;
+ if (avctx->skip_frame >= AVDISCARD_NONREF)
+ return 0;
av_log(avctx, AV_LOG_ERROR, "no frame!\n");
return -1;
}
}
// printf("\n");
- s->dsp.h264_idct_add(ref, block, 4);
+ h->h264dsp.h264_idct_add(ref, block, 4);
/* for(j=0; j<16; j++){
printf("%d ", ref[j]);
}
{
int i;
- free_tables(h); //FIXME cleanup init stuff perhaps
+ free_tables(h, 1); //FIXME cleanup init stuff perhaps
for(i = 0; i < MAX_SPS_COUNT; i++)
av_freep(h->sps_buffers + i);
return 0;
}
+static const AVProfile profiles[] = {
+ { FF_PROFILE_H264_BASELINE, "Baseline" },
+ { FF_PROFILE_H264_CONSTRAINED_BASELINE, "Constrained Baseline" },
+ { FF_PROFILE_H264_MAIN, "Main" },
+ { FF_PROFILE_H264_EXTENDED, "Extended" },
+ { FF_PROFILE_H264_HIGH, "High" },
+ { FF_PROFILE_H264_HIGH_10, "High 10" },
+ { FF_PROFILE_H264_HIGH_10_INTRA, "High 10 Intra" },
+ { FF_PROFILE_H264_HIGH_422, "High 4:2:2" },
+ { FF_PROFILE_H264_HIGH_422_INTRA, "High 4:2:2 Intra" },
+ { FF_PROFILE_H264_HIGH_444, "High 4:4:4" },
+ { FF_PROFILE_H264_HIGH_444_PREDICTIVE, "High 4:4:4 Predictive" },
+ { FF_PROFILE_H264_HIGH_444_INTRA, "High 4:4:4 Intra" },
+ { FF_PROFILE_H264_CAVLC_444, "CAVLC 4:4:4" },
+ { FF_PROFILE_UNKNOWN },
+};
-AVCodec h264_decoder = {
+AVCodec ff_h264_decoder = {
"h264",
- CODEC_TYPE_VIDEO,
+ AVMEDIA_TYPE_VIDEO,
CODEC_ID_H264,
sizeof(H264Context),
ff_h264_decode_init,
NULL,
ff_h264_decode_end,
decode_frame,
- /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_DELAY,
+ /*CODEC_CAP_DRAW_HORIZ_BAND |*/ CODEC_CAP_DR1 | CODEC_CAP_DELAY |
+ CODEC_CAP_SLICE_THREADS,
.flush= flush_dpb,
.long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10"),
- .pix_fmts= ff_hwaccel_pixfmt_list_420,
+ .profiles = NULL_IF_CONFIG_SMALL(profiles),
};
#if CONFIG_H264_VDPAU_DECODER
-AVCodec h264_vdpau_decoder = {
+AVCodec ff_h264_vdpau_decoder = {
"h264_vdpau",
- CODEC_TYPE_VIDEO,
+ AVMEDIA_TYPE_VIDEO,
CODEC_ID_H264,
sizeof(H264Context),
ff_h264_decode_init,
.flush= flush_dpb,
.long_name = NULL_IF_CONFIG_SMALL("H.264 / AVC / MPEG-4 AVC / MPEG-4 part 10 (VDPAU acceleration)"),
.pix_fmts = (const enum PixelFormat[]){PIX_FMT_VDPAU_H264, PIX_FMT_NONE},
+ .profiles = NULL_IF_CONFIG_SMALL(profiles),
};
#endif