#include "h264data.h"
#include "h264_parser.h"
#include "golomb.h"
+#include "rectangle.h"
#include "cabac.h"
};
-/**
- * fill a rectangle.
- * @param h height of the rectangle, should be a constant
- * @param w width of the rectangle, should be a constant
- * @param size the size of val (1 or 4), should be a constant
- */
-static av_always_inline void fill_rectangle(void *vp, int w, int h, int stride, uint32_t val, int size){
- uint8_t *p= (uint8_t*)vp;
- assert(size==1 || size==4);
- assert(w<=4);
-
- w *= size;
- stride *= size;
-
- assert((((long)vp)&(FFMIN(w, STRIDE_ALIGN)-1)) == 0);
- assert((stride&(w-1))==0);
- if(w==2){
- const uint16_t v= size==4 ? val : val*0x0101;
- *(uint16_t*)(p + 0*stride)= v;
- if(h==1) return;
- *(uint16_t*)(p + 1*stride)= v;
- if(h==2) return;
- *(uint16_t*)(p + 2*stride)= v;
- *(uint16_t*)(p + 3*stride)= v;
- }else if(w==4){
- const uint32_t v= size==4 ? val : val*0x01010101;
- *(uint32_t*)(p + 0*stride)= v;
- if(h==1) return;
- *(uint32_t*)(p + 1*stride)= v;
- if(h==2) return;
- *(uint32_t*)(p + 2*stride)= v;
- *(uint32_t*)(p + 3*stride)= v;
- }else if(w==8){
- //gcc can't optimize 64bit math on x86_32
-#if defined(ARCH_X86_64) || (defined(MP_WORDSIZE) && MP_WORDSIZE >= 64)
- const uint64_t v= val*0x0100000001ULL;
- *(uint64_t*)(p + 0*stride)= v;
- if(h==1) return;
- *(uint64_t*)(p + 1*stride)= v;
- if(h==2) return;
- *(uint64_t*)(p + 2*stride)= v;
- *(uint64_t*)(p + 3*stride)= v;
- }else if(w==16){
- const uint64_t v= val*0x0100000001ULL;
- *(uint64_t*)(p + 0+0*stride)= v;
- *(uint64_t*)(p + 8+0*stride)= v;
- *(uint64_t*)(p + 0+1*stride)= v;
- *(uint64_t*)(p + 8+1*stride)= v;
- if(h==2) return;
- *(uint64_t*)(p + 0+2*stride)= v;
- *(uint64_t*)(p + 8+2*stride)= v;
- *(uint64_t*)(p + 0+3*stride)= v;
- *(uint64_t*)(p + 8+3*stride)= v;
-#else
- *(uint32_t*)(p + 0+0*stride)= val;
- *(uint32_t*)(p + 4+0*stride)= val;
- if(h==1) return;
- *(uint32_t*)(p + 0+1*stride)= val;
- *(uint32_t*)(p + 4+1*stride)= val;
- if(h==2) return;
- *(uint32_t*)(p + 0+2*stride)= val;
- *(uint32_t*)(p + 4+2*stride)= val;
- *(uint32_t*)(p + 0+3*stride)= val;
- *(uint32_t*)(p + 4+3*stride)= val;
- }else if(w==16){
- *(uint32_t*)(p + 0+0*stride)= val;
- *(uint32_t*)(p + 4+0*stride)= val;
- *(uint32_t*)(p + 8+0*stride)= val;
- *(uint32_t*)(p +12+0*stride)= val;
- *(uint32_t*)(p + 0+1*stride)= val;
- *(uint32_t*)(p + 4+1*stride)= val;
- *(uint32_t*)(p + 8+1*stride)= val;
- *(uint32_t*)(p +12+1*stride)= val;
- if(h==2) return;
- *(uint32_t*)(p + 0+2*stride)= val;
- *(uint32_t*)(p + 4+2*stride)= val;
- *(uint32_t*)(p + 8+2*stride)= val;
- *(uint32_t*)(p +12+2*stride)= val;
- *(uint32_t*)(p + 0+3*stride)= val;
- *(uint32_t*)(p + 4+3*stride)= val;
- *(uint32_t*)(p + 8+3*stride)= val;
- *(uint32_t*)(p +12+3*stride)= val;
-#endif
- }else
- assert(0);
- assert(h==4);
-}
-
static void fill_caches(H264Context *h, int mb_type, int for_deblock){
MpegEncContext * const s = &h->s;
const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
int topleft_xy, top_xy, topright_xy, left_xy[2];
int topleft_type, top_type, topright_type, left_type[2];
int left_block[8];
+ int topleft_partition= -1;
int i;
+ top_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
+
//FIXME deblocking could skip the intra and nnz parts.
- if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[mb_xy-s->mb_stride]) && !FRAME_MBAFF)
+ if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
return;
- //wow what a mess, why didn't they simplify the interlacing&intra stuff, i can't imagine that these complex rules are worth it
+ /* Wow, what a mess, why didn't they simplify the interlacing & intra
+ * stuff, I can't imagine that these complex rules are worth it. */
- top_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
topleft_xy = top_xy - 1;
topright_xy= top_xy + 1;
left_xy[1] = left_xy[0] = mb_xy-1;
: (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
) {
topleft_xy -= s->mb_stride;
+ } else if(bottom && curr_mb_frame_flag && !left_mb_frame_flag) {
+ topleft_xy += s->mb_stride;
+ // take topleft mv from the middle of the mb, as opposed to all other modes which use the bottom-right partition
+ topleft_partition = 0;
}
if (bottom
? !curr_mb_frame_flag // bottom macroblock
continue;
if(USES_LIST(topleft_type, list)){
- const int b_xy = h->mb2b_xy[topleft_xy] + 3 + 3*h->b_stride;
- const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + h->b8_stride;
+ const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride);
+ const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride);
*(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
}else{
*(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
*(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
- if(h->slice_type == B_TYPE){
+ if(h->slice_type == FF_B_TYPE){
fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
if(IS_DIRECT(top_type)){
#define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
const int x4 = X4, y4 = Y4;\
const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
- if(!USES_LIST(mb_type,list) && !IS_8X8(mb_type))\
+ if(!USES_LIST(mb_type,list))\
return LIST_NOT_USED;\
mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
h->mv_cache[list][scan8[0]-2][0] = mv[0];\
&& !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
&& i >= scan8[0]+8){
// leftshift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's ok.
- SET_DIAG_MV(>>1, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
+ SET_DIAG_MV(/2, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
}
}
#undef SET_DIAG_MV
Picture * const ref1 = &h->ref_list[1][0];
Picture * const cur = s->current_picture_ptr;
int list, i, j;
- if(cur->pict_type == I_TYPE)
+ if(cur->pict_type == FF_I_TYPE)
cur->ref_count[0] = 0;
- if(cur->pict_type != B_TYPE)
+ if(cur->pict_type != FF_B_TYPE)
cur->ref_count[1] = 0;
for(list=0; list<2; list++){
cur->ref_count[list] = h->ref_count[list];
for(j=0; j<h->ref_count[list]; j++)
cur->ref_poc[list][j] = h->ref_list[list][j].poc;
}
- if(cur->pict_type != B_TYPE || h->direct_spatial_mv_pred)
+ if(cur->pict_type != FF_B_TYPE || h->direct_spatial_mv_pred)
return;
for(list=0; list<2; list++){
for(i=0; i<ref1->ref_count[list]; i++){
}
if(ref[1] < 0){
- *mb_type &= ~MB_TYPE_P0L1;
- sub_mb_type &= ~MB_TYPE_P0L1;
+ if(!is_b8x8)
+ *mb_type &= ~MB_TYPE_L1;
+ sub_mb_type &= ~MB_TYPE_L1;
}else if(ref[0] < 0){
- *mb_type &= ~MB_TYPE_P0L0;
- sub_mb_type &= ~MB_TYPE_P0L0;
+ if(!is_b8x8)
+ *mb_type &= ~MB_TYPE_L0;
+ sub_mb_type &= ~MB_TYPE_L0;
}
- if(IS_16X16(*mb_type)){
+ if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col)){
+ int pair_xy = s->mb_x + (s->mb_y&~1)*s->mb_stride;
+ int mb_types_col[2];
+ int b8_stride = h->b8_stride;
+ int b4_stride = h->b_stride;
+
+ *mb_type = (*mb_type & ~MB_TYPE_16x16) | MB_TYPE_8x8;
+
+ if(IS_INTERLACED(*mb_type)){
+ mb_types_col[0] = h->ref_list[1][0].mb_type[pair_xy];
+ mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
+ if(s->mb_y&1){
+ l1ref0 -= 2*b8_stride;
+ l1ref1 -= 2*b8_stride;
+ l1mv0 -= 4*b4_stride;
+ l1mv1 -= 4*b4_stride;
+ }
+ b8_stride *= 3;
+ b4_stride *= 6;
+ }else{
+ int cur_poc = s->current_picture_ptr->poc;
+ int *col_poc = h->ref_list[1]->field_poc;
+ int col_parity = FFABS(col_poc[0] - cur_poc) >= FFABS(col_poc[1] - cur_poc);
+ int dy = 2*col_parity - (s->mb_y&1);
+ mb_types_col[0] =
+ mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy + col_parity*s->mb_stride];
+ l1ref0 += dy*b8_stride;
+ l1ref1 += dy*b8_stride;
+ l1mv0 += 2*dy*b4_stride;
+ l1mv1 += 2*dy*b4_stride;
+ b8_stride = 0;
+ }
+
+ for(i8=0; i8<4; i8++){
+ int x8 = i8&1;
+ int y8 = i8>>1;
+ int xy8 = x8+y8*b8_stride;
+ int xy4 = 3*x8+y8*b4_stride;
+ int a=0, b=0;
+
+ if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
+ continue;
+ h->sub_mb_type[i8] = sub_mb_type;
+
+ fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
+ fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
+ if(!IS_INTRA(mb_types_col[y8])
+ && ( (l1ref0[xy8] == 0 && FFABS(l1mv0[xy4][0]) <= 1 && FFABS(l1mv0[xy4][1]) <= 1)
+ || (l1ref0[xy8] < 0 && l1ref1[xy8] == 0 && FFABS(l1mv1[xy4][0]) <= 1 && FFABS(l1mv1[xy4][1]) <= 1))){
+ if(ref[0] > 0)
+ a= pack16to32(mv[0][0],mv[0][1]);
+ if(ref[1] > 0)
+ b= pack16to32(mv[1][0],mv[1][1]);
+ }else{
+ a= pack16to32(mv[0][0],mv[0][1]);
+ b= pack16to32(mv[1][0],mv[1][1]);
+ }
+ fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, a, 4);
+ fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, b, 4);
+ }
+ }else if(IS_16X16(*mb_type)){
int a=0, b=0;
fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
}
}
- if(h->slice_type == B_TYPE && h->pps.cabac){
+ if(h->slice_type == FF_B_TYPE && h->pps.cabac){
if(IS_8X8(mb_type)){
uint8_t *direct_table = &h->direct_table[b8_xy];
direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
* @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
* @returns decoded bytes, might be src+1 if no escapes
*/
-static uint8_t *decode_nal(H264Context *h, uint8_t *src, int *dst_length, int *consumed, int length){
+static const uint8_t *decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
int i, si, di;
uint8_t *dst;
int bufidx;
* identifies the exact end of the bitstream
* @return the length of the trailing, or 0 if damaged
*/
-static int decode_rbsp_trailing(H264Context *h, uint8_t *src){
+static int decode_rbsp_trailing(H264Context *h, const uint8_t *src){
int v= *src;
int r;
return h->pps.chroma_qp_table[t][qscale & 0xff];
}
-//FIXME need to check that this does not overflow signed 32 bit for low qp, i am not sure, it's very close
+//FIXME need to check that this does not overflow signed 32 bit for low qp, I am not sure, it's very close
//FIXME check that gcc inlines this (and optimizes intra & separate_dc stuff away)
static inline int quantize_c(DCTELEM *block, uint8_t *scantable, int qscale, int intra, int separate_dc){
int i;
prefetch_motion(h, 1);
}
-static void decode_init_vlc(void){
+static av_cold void decode_init_vlc(void){
static int done = 0;
if (!done) {
av_freep(&hx->top_borders[1]);
av_freep(&hx->top_borders[0]);
av_freep(&hx->s.obmc_scratchpad);
- av_freep(&hx->s.allocated_edge_emu_buffer);
}
}
CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(uint8_t))
CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
- if( h->pps.cabac ) {
- CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
- CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
- CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
- CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
- }
+ CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
+ CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
+ CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
+ CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(uint8_t));
h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
* Allocate buffers which are not shared amongst multiple threads.
*/
static int context_init(H264Context *h){
- MpegEncContext * const s = &h->s;
-
CHECKED_ALLOCZ(h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
CHECKED_ALLOCZ(h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
- // edge emu needs blocksize + filter length - 1 (=17x17 for halfpel / 21x21 for h264)
- CHECKED_ALLOCZ(s->allocated_edge_emu_buffer,
- (s->width+64)*2*21*2); //(width + edge + align)*interlaced*MBsize*tolerance
- s->edge_emu_buffer= s->allocated_edge_emu_buffer + (s->width+64)*2*21;
return 0;
fail:
return -1; // free_tables will clean up for us
}
-static void common_init(H264Context *h){
+static av_cold void common_init(H264Context *h){
MpegEncContext * const s = &h->s;
s->width = s->avctx->width;
memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
}
-static int decode_init(AVCodecContext *avctx){
+static av_cold int decode_init(AVCodecContext *avctx){
H264Context *h= avctx->priv_data;
MpegEncContext * const s = &h->s;
continue;
if(IS_16X16(mb_type)){
int8_t *ref = &h->ref_cache[list][scan8[0]];
- fill_rectangle(ref, 4, 4, 8, 16+*ref^(s->mb_y&1), 1);
+ fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
}else{
for(i=0; i<16; i+=4){
//FIXME can refs be smaller than 8x8 when !direct_8x8_inference ?
int ref = h->ref_cache[list][scan8[i]];
if(ref >= 0)
- fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, 16+ref^(s->mb_y&1), 1);
+ fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
}
}
}
frame_list[1] = h->default_ref_list[1];
}
- if(h->slice_type==B_TYPE){
+ if(h->slice_type==FF_B_TYPE){
int list;
int len[2];
int short_len[2];
if(sel != PICT_FRAME) continue;
frame_list[ list ][index ]= *h->long_ref[i];
- frame_list[ list ][index++].pic_id= i;;
+ frame_list[ list ][index++].pic_id= i;
}
len[list] = index;
-
- if(list && (smallest_poc_greater_than_current<=0 || smallest_poc_greater_than_current>=h->short_ref_count) && (1 < index)){
- // swap the two first elements of L1 when
- // L0 and L1 are identical
- Picture temp= frame_list[1][0];
- frame_list[1][0] = frame_list[1][1];
- frame_list[1][1] = temp;
- }
-
}
for(list=0; list<2; list++){
s->picture_structure,
short_len[list]);
+ // swap the two first elements of L1 when L0 and L1 are identical
+ if(list && len[0] > 1 && len[0] == len[1])
+ for(i=0; h->default_ref_list[0][i].data[0] == h->default_ref_list[1][i].data[0]; i++)
+ if(i == len[0]){
+ FFSWAP(Picture, h->default_ref_list[1][0], h->default_ref_list[1][1]);
+ break;
+ }
+
if(len[list] < h->ref_count[ list ])
memset(&h->default_ref_list[list][len[list]], 0, sizeof(Picture)*(h->ref_count[ list ] - len[list]));
}
sel = h->long_ref[i]->reference | structure_sel;
if(sel != PICT_FRAME) continue;
frame_list[0][index ]= *h->long_ref[i];
- frame_list[0][index++].pic_id= i;;
+ frame_list[0][index++].pic_id= i;
}
if (FIELD_PICTURE)
for (i=0; i<h->ref_count[0]; i++) {
tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
}
- if(h->slice_type==B_TYPE){
+ if(h->slice_type==FF_B_TYPE){
for (i=0; i<h->ref_count[1]; i++) {
- tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[0][i].data[0]);
+ tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[1][i].data[0]);
}
}
#endif
print_short_term(h);
print_long_term(h);
- if(h->slice_type==I_TYPE || h->slice_type==SI_TYPE) return 0; //FIXME move before func
+ if(h->slice_type==FF_I_TYPE || h->slice_type==FF_SI_TYPE) return 0; //FIXME move before func
for(list=0; list<h->list_count; list++){
memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
}
}
- if(h->slice_type==B_TYPE && !h->direct_spatial_mv_pred)
+ if(h->slice_type==FF_B_TYPE && !h->direct_spatial_mv_pred)
direct_dist_scale_factor(h);
direct_ref_list_init(h);
return 0;
}
}
}
- if(h->slice_type != B_TYPE) break;
+ if(h->slice_type != FF_B_TYPE) break;
}
h->use_weight= h->use_weight || h->use_weight_chroma;
return 0;
s->current_picture_ptr->reference |= s->picture_structure;
}
+ if (h->long_ref_count + h->short_ref_count > h->sps.ref_frame_count){
+
+ /* We have too many reference frames, probably due to corrupted
+ * stream. Need to discard one frame. Prevents overrun of the
+ * short_ref and long_ref buffers.
+ */
+ av_log(h->s.avctx, AV_LOG_ERROR,
+ "number of reference frames exceeds max (probably "
+ "corrupt input), discarding one\n");
+
+ if (h->long_ref_count) {
+ for (i = 0; i < 16; ++i)
+ if (h->long_ref[i])
+ break;
+
+ assert(i < 16);
+ pic = h->long_ref[i];
+ remove_long_at_index(h, i);
+ } else {
+ pic = h->short_ref[h->short_ref_count - 1];
+ remove_short_at_index(h, h->short_ref_count - 1);
+ }
+ unreference_pic(h, pic, 0);
+ }
+
print_short_term(h);
print_long_term(h);
return 0;
}else{
assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
- if(h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count &&
+ if(h->short_ref_count && h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count &&
!(FIELD_PICTURE && !s->first_field && s->current_picture_ptr->reference)) {
h->mmco[0].opcode= MMCO_SHORT2UNUSED;
h->mmco[0].short_pic_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
* @param h h264context
* @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)
*
- * @return 0 if okay, <0 if an error occured, 1 if decoding must not be multithreaded
+ * @return 0 if okay, <0 if an error occurred, 1 if decoding must not be multithreaded
*/
static int decode_slice_header(H264Context *h, H264Context *h0){
MpegEncContext * const s = &h->s;
unsigned int first_mb_in_slice;
unsigned int pps_id;
int num_ref_idx_active_override_flag;
- static const uint8_t slice_type_map[5]= {P_TYPE, B_TYPE, I_TYPE, SP_TYPE, SI_TYPE};
+ static const uint8_t slice_type_map[5]= {FF_P_TYPE, FF_B_TYPE, FF_I_TYPE, FF_SP_TYPE, FF_SI_TYPE};
unsigned int slice_type, tmp, i;
int default_ref_list_done = 0;
int last_pic_structure;
s->dropable= h->nal_ref_idc == 0;
+ if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){
+ s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
+ s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
+ }else{
+ s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
+ s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
+ }
+
first_mb_in_slice= get_ue_golomb(&s->gb);
if((s->flags2 & CODEC_FLAG2_CHUNKS) && first_mb_in_slice == 0){
h->slice_type_fixed=0;
slice_type= slice_type_map[ slice_type ];
- if (slice_type == I_TYPE
+ if (slice_type == FF_I_TYPE
|| (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
default_ref_list_done = 1;
}
h->slice_type= slice_type;
s->pict_type= h->slice_type; // to make a few old func happy, it's wrong though
+ if (s->pict_type == FF_B_TYPE && s0->last_picture_ptr == NULL) {
+ av_log(h->s.avctx, AV_LOG_ERROR,
+ "B picture before any references, skipping\n");
+ return -1;
+ }
pps_id= get_ue_golomb(&s->gb);
if(pps_id>=MAX_PPS_COUNT){
h->b_stride= s->mb_width*4;
h->b8_stride= s->mb_width*2;
- s->width = 16*s->mb_width - 2*(h->sps.crop_left + h->sps.crop_right );
+ s->width = 16*s->mb_width - 2*FFMIN(h->sps.crop_right, 7);
if(h->sps.frame_mbs_only_flag)
- s->height= 16*s->mb_height - 2*(h->sps.crop_top + h->sps.crop_bottom);
+ s->height= 16*s->mb_height - 2*FFMIN(h->sps.crop_bottom, 7);
else
- s->height= 16*s->mb_height - 4*(h->sps.crop_top + h->sps.crop_bottom); //FIXME recheck
+ s->height= 16*s->mb_height - 4*FFMIN(h->sps.crop_bottom, 3);
if (s->context_initialized
&& ( s->width != s->avctx->width || s->height != s->avctx->height)) {
for(i = 1; i < s->avctx->thread_count; i++) {
H264Context *c;
c = h->thread_context[i] = av_malloc(sizeof(H264Context));
- memcpy(c, h, sizeof(MpegEncContext));
+ memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
c->sps = h->sps;
c->pps = h->pps;
h->ref_count[0]= h->pps.ref_count[0];
h->ref_count[1]= h->pps.ref_count[1];
- if(h->slice_type == P_TYPE || h->slice_type == SP_TYPE || h->slice_type == B_TYPE){
- if(h->slice_type == B_TYPE){
+ if(h->slice_type == FF_P_TYPE || h->slice_type == FF_SP_TYPE || h->slice_type == FF_B_TYPE){
+ if(h->slice_type == FF_B_TYPE){
h->direct_spatial_mv_pred= get_bits1(&s->gb);
- if(FIELD_OR_MBAFF_PICTURE && h->direct_spatial_mv_pred)
- av_log(h->s.avctx, AV_LOG_ERROR, "Interlaced pictures + spatial direct mode is not implemented\n");
}
num_ref_idx_active_override_flag= get_bits1(&s->gb);
if(num_ref_idx_active_override_flag){
h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
- if(h->slice_type==B_TYPE)
+ if(h->slice_type==FF_B_TYPE)
h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
return -1;
}
}
- if(h->slice_type == B_TYPE)
+ if(h->slice_type == FF_B_TYPE)
h->list_count= 2;
else
h->list_count= 1;
if(decode_ref_pic_list_reordering(h) < 0)
return -1;
- if( (h->pps.weighted_pred && (h->slice_type == P_TYPE || h->slice_type == SP_TYPE ))
- || (h->pps.weighted_bipred_idc==1 && h->slice_type==B_TYPE ) )
+ if( (h->pps.weighted_pred && (h->slice_type == FF_P_TYPE || h->slice_type == FF_SP_TYPE ))
+ || (h->pps.weighted_bipred_idc==1 && h->slice_type==FF_B_TYPE ) )
pred_weight_table(h);
- else if(h->pps.weighted_bipred_idc==2 && h->slice_type==B_TYPE)
+ else if(h->pps.weighted_bipred_idc==2 && h->slice_type==FF_B_TYPE)
implicit_weight_table(h);
else
h->use_weight = 0;
if(FRAME_MBAFF)
fill_mbaff_ref_list(h);
- if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE && h->pps.cabac ){
+ if( h->slice_type != FF_I_TYPE && h->slice_type != FF_SI_TYPE && h->pps.cabac ){
tmp = get_ue_golomb(&s->gb);
if(tmp > 2){
av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
//FIXME qscale / qp ... stuff
- if(h->slice_type == SP_TYPE){
+ if(h->slice_type == FF_SP_TYPE){
get_bits1(&s->gb); /* sp_for_switch_flag */
}
- if(h->slice_type==SP_TYPE || h->slice_type == SI_TYPE){
+ if(h->slice_type==FF_SP_TYPE || h->slice_type == FF_SI_TYPE){
get_se_golomb(&s->gb); /* slice_qs_delta */
}
}
if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
- ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type != I_TYPE)
- ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type == B_TYPE)
+ ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type != FF_I_TYPE)
+ ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type == FF_B_TYPE)
||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
h->deblocking_filter= 0;
if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
/* Cheat slightly for speed:
- Dont bother to deblock across slices */
+ Do not bother to deblock across slices. */
h->deblocking_filter = 2;
} else {
h0->max_contexts = 1;
);
}
- if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){
- s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
- s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
- }else{
- s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
- s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
- }
-
return 0;
}
* @param n block index
* @param scantable scantable
* @param max_coeff number of coefficients in the block
- * @return <0 if an error occured
+ * @return <0 if an error occurred
*/
static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
MpegEncContext * const s = &h->s;
if(MB_FIELD)
mb_type|= MB_TYPE_INTERLACED;
- if( h->slice_type == B_TYPE )
+ if( h->slice_type == FF_B_TYPE )
{
// just for fill_caches. pred_direct_motion will set the real mb_type
mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
cbp = 0; /* avoid warning. FIXME: find a solution without slowing
down the code */
- if(h->slice_type != I_TYPE && h->slice_type != SI_TYPE){
+ if(h->slice_type != FF_I_TYPE && h->slice_type != FF_SI_TYPE){
if(s->mb_skip_run==-1)
s->mb_skip_run= get_ue_golomb(&s->gb);
h->prev_mb_skipped= 0;
mb_type= get_ue_golomb(&s->gb);
- if(h->slice_type == B_TYPE){
+ if(h->slice_type == FF_B_TYPE){
if(mb_type < 23){
partition_count= b_mb_type_info[mb_type].partition_count;
mb_type= b_mb_type_info[mb_type].type;
mb_type -= 23;
goto decode_intra_mb;
}
- }else if(h->slice_type == P_TYPE /*|| h->slice_type == SP_TYPE */){
+ }else if(h->slice_type == FF_P_TYPE /*|| h->slice_type == FF_SP_TYPE */){
if(mb_type < 5){
partition_count= p_mb_type_info[mb_type].partition_count;
mb_type= p_mb_type_info[mb_type].type;
goto decode_intra_mb;
}
}else{
- assert(h->slice_type == I_TYPE);
+ assert(h->slice_type == FF_I_TYPE);
decode_intra_mb:
if(mb_type > 25){
av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
}else if(partition_count==4){
int i, j, sub_partition_count[4], list, ref[2][4];
- if(h->slice_type == B_TYPE){
+ if(h->slice_type == FF_B_TYPE){
for(i=0; i<4; i++){
h->sub_mb_type[i]= get_ue_golomb(&s->gb);
if(h->sub_mb_type[i] >=13){
h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
}
}else{
- assert(h->slice_type == P_TYPE || h->slice_type == SP_TYPE); //FIXME SP correct ?
+ assert(h->slice_type == FF_P_TYPE || h->slice_type == FF_SP_TYPE); //FIXME SP correct ?
for(i=0; i<4; i++){
h->sub_mb_type[i]= get_ue_golomb(&s->gb);
if(h->sub_mb_type[i] >=4){
static int decode_cabac_mb_type( H264Context *h ) {
MpegEncContext * const s = &h->s;
- if( h->slice_type == I_TYPE ) {
+ if( h->slice_type == FF_I_TYPE ) {
return decode_cabac_intra_mb_type(h, 3, 1);
- } else if( h->slice_type == P_TYPE ) {
+ } else if( h->slice_type == FF_P_TYPE ) {
if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
/* P-type */
if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
} else {
return decode_cabac_intra_mb_type(h, 17, 0) + 5;
}
- } else if( h->slice_type == B_TYPE ) {
+ } else if( h->slice_type == FF_B_TYPE ) {
const int mba_xy = h->left_mb_xy[0];
const int mbb_xy = h->top_mb_xy;
int ctx = 0;
if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
ctx++;
- if( h->slice_type == B_TYPE )
+ if( h->slice_type == FF_B_TYPE )
ctx += 13;
return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
}
int ref = 0;
int ctx = 0;
- if( h->slice_type == B_TYPE) {
+ if( h->slice_type == FF_B_TYPE) {
if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
ctx++;
if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
return ctx + 4 * cat;
}
-static const attribute_used uint8_t last_coeff_flag_offset_8x8[63] = {
+DECLARE_ASM_CONST(1, const uint8_t, last_coeff_flag_offset_8x8[63]) = {
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
if( !qmul ) {
block[j] = get_cabac_bypass_sign( CC, -1);
}else{
- block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;;
+ block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;
}
abslevel1++;
s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?)
tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
- if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE ) {
+ if( h->slice_type != FF_I_TYPE && h->slice_type != FF_SI_TYPE ) {
int skip;
/* a skipped mb needs the aff flag from the following mb */
if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
return -1;
}
- if( h->slice_type == B_TYPE ) {
+ if( h->slice_type == FF_B_TYPE ) {
if( mb_type < 23 ){
partition_count= b_mb_type_info[mb_type].partition_count;
mb_type= b_mb_type_info[mb_type].type;
mb_type -= 23;
goto decode_intra_mb;
}
- } else if( h->slice_type == P_TYPE ) {
+ } else if( h->slice_type == FF_P_TYPE ) {
if( mb_type < 5) {
partition_count= p_mb_type_info[mb_type].partition_count;
mb_type= p_mb_type_info[mb_type].type;
goto decode_intra_mb;
}
} else {
- assert(h->slice_type == I_TYPE);
+ assert(h->slice_type == FF_I_TYPE);
decode_intra_mb:
partition_count = 0;
cbp= i_mb_type_info[mb_type].cbp;
} else if( partition_count == 4 ) {
int i, j, sub_partition_count[4], list, ref[2][4];
- if( h->slice_type == B_TYPE ) {
+ if( h->slice_type == FF_B_TYPE ) {
for( i = 0; i < 4; i++ ) {
h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
int step = IS_8x8DCT(mb_type) ? 2 : 1;
edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
- (h->slice_type == B_TYPE), edges, step, mask_edge0, mask_edge1 );
+ (h->slice_type == FF_B_TYPE), edges, step, mask_edge0, mask_edge1 );
}
if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
bSv[0][0] = 0x0004000400040004ULL;
int b_idx= 8 + 4 + edge * (dir ? 8:1);
int bn_idx= b_idx - (dir ? 8:1);
int v = 0;
- for( l = 0; !v && l < 1 + (h->slice_type == B_TYPE); l++ ) {
+ for( l = 0; !v && l < 1 + (h->slice_type == FF_B_TYPE); l++ ) {
v |= ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
else if(!mv_done)
{
bS[i] = 0;
- for( l = 0; l < 1 + (h->slice_type == B_TYPE); l++ ) {
+ for( l = 0; l < 1 + (h->slice_type == FF_B_TYPE); l++ ) {
if( ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
/* calculate pre-state */
for( i= 0; i < 460; i++ ) {
int pre;
- if( h->slice_type == I_TYPE )
+ if( h->slice_type == FF_I_TYPE )
pre = av_clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
else
pre = av_clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
if( aspect_ratio_idc == EXTENDED_SAR ) {
sps->sar.num= get_bits(&s->gb, 16);
sps->sar.den= get_bits(&s->gb, 16);
- }else if(aspect_ratio_idc < 14){
+ }else if(aspect_ratio_idc < sizeof(pixel_aspect)/sizeof(*pixel_aspect)){
sps->sar= pixel_aspect[aspect_ratio_idc];
}else{
av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
}
tmp= get_ue_golomb(&s->gb);
- if(tmp > MAX_PICTURE_COUNT-2){
+ if(tmp > MAX_PICTURE_COUNT-2 || tmp >= 32){
av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
+ return -1;
}
sps->ref_frame_count= tmp;
sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
if(sps->crop_left || sps->crop_top){
av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
}
+ if(sps->crop_right >= 8 || sps->crop_bottom >= (8>> !h->sps.frame_mbs_only_flag)){
+ av_log(h->s.avctx, AV_LOG_ERROR, "brainfart cropping not supported, this could look slightly wrong ...\n");
+ }
}else{
sps->crop_left =
sps->crop_right =
}
-static int decode_nal_units(H264Context *h, uint8_t *buf, int buf_size){
+static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
MpegEncContext * const s = &h->s;
AVCodecContext * const avctx= s->avctx;
int buf_index=0;
int consumed;
int dst_length;
int bit_length;
- uint8_t *ptr;
+ const uint8_t *ptr;
int i, nalsize = 0;
int err;
av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length);
}
- if (h->is_avc && (nalsize != consumed))
+ if (h->is_avc && (nalsize != consumed)){
av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
+ consumed= nalsize;
+ }
buf_index += consumed;
s->current_picture_ptr->key_frame|= (hx->nal_unit_type == NAL_IDR_SLICE);
if(hx->redundant_pic_count==0 && hx->s.hurry_up < 5
&& (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
- && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type!=B_TYPE)
- && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type==I_TYPE)
+ && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type!=FF_B_TYPE)
+ && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type==FF_I_TYPE)
&& avctx->skip_frame < AVDISCARD_ALL)
context_count++;
break;
&& s->context_initialized
&& s->hurry_up < 5
&& (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
- && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type!=B_TYPE)
- && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type==I_TYPE)
+ && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type!=FF_B_TYPE)
+ && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type==FF_I_TYPE)
&& avctx->skip_frame < AVDISCARD_ALL)
context_count++;
break;
static int decode_frame(AVCodecContext *avctx,
void *data, int *data_size,
- uint8_t *buf, int buf_size)
+ const uint8_t *buf, int buf_size)
{
H264Context *h = avctx->priv_data;
MpegEncContext *s = &h->s;
else if((out_of_order && pics-1 == s->avctx->has_b_frames && pics < 15)
|| (s->low_delay &&
((!cross_idr && prev && out->poc > prev->poc + 2)
- || cur->pict_type == B_TYPE)))
+ || cur->pict_type == FF_B_TYPE)))
{
s->low_delay = 0;
s->avctx->has_b_frames++;
}
#endif
-#if 0 //selftest
+#ifdef TEST
+#undef printf
#undef random
#define COUNT 8000
#define SIZE (COUNT*40)
STOP_TIMER("get_se_golomb");
}
+#if 0
printf("testing 4x4 (I)DCT\n");
DCTELEM block[16];
}
}
printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
-#if 0
printf("testing quantizer\n");
for(qp=0; qp<52; qp++){
for(i=0; i<16; i++)
src1_block[i]= src2_block[i]= random()%255;
}
-#endif
printf("Testing NAL layer\n");
uint8_t bitstream[COUNT];
return -1;
}
}
+#endif
printf("Testing RBSP\n");
return 0;
}
-#endif
+#endif /* TEST */
-static int decode_end(AVCodecContext *avctx)
+static av_cold int decode_end(AVCodecContext *avctx)
{
H264Context *h = avctx->priv_data;
MpegEncContext *s = &h->s;