#include "h264data.h"
#include "h264_parser.h"
#include "golomb.h"
+#include "rectangle.h"
#include "cabac.h"
//#undef NDEBUG
#include <assert.h>
+/**
+ * Value of Picture.reference when Picture is not a reference picture, but
+ * is held for delayed output.
+ */
+#define DELAYED_PIC_REF 4
+
static VLC coeff_token_vlc[4];
static VLC chroma_dc_coeff_token_vlc;
};
-/**
- * fill a rectangle.
- * @param h height of the rectangle, should be a constant
- * @param w width of the rectangle, should be a constant
- * @param size the size of val (1 or 4), should be a constant
- */
-static av_always_inline void fill_rectangle(void *vp, int w, int h, int stride, uint32_t val, int size){
- uint8_t *p= (uint8_t*)vp;
- assert(size==1 || size==4);
- assert(w<=4);
-
- w *= size;
- stride *= size;
-
- assert((((long)vp)&(FFMIN(w, STRIDE_ALIGN)-1)) == 0);
- assert((stride&(w-1))==0);
- if(w==2){
- const uint16_t v= size==4 ? val : val*0x0101;
- *(uint16_t*)(p + 0*stride)= v;
- if(h==1) return;
- *(uint16_t*)(p + 1*stride)= v;
- if(h==2) return;
- *(uint16_t*)(p + 2*stride)=
- *(uint16_t*)(p + 3*stride)= v;
- }else if(w==4){
- const uint32_t v= size==4 ? val : val*0x01010101;
- *(uint32_t*)(p + 0*stride)= v;
- if(h==1) return;
- *(uint32_t*)(p + 1*stride)= v;
- if(h==2) return;
- *(uint32_t*)(p + 2*stride)=
- *(uint32_t*)(p + 3*stride)= v;
- }else if(w==8){
- //gcc can't optimize 64bit math on x86_32
-#if defined(ARCH_X86_64) || (defined(MP_WORDSIZE) && MP_WORDSIZE >= 64)
- const uint64_t v= val*0x0100000001ULL;
- *(uint64_t*)(p + 0*stride)= v;
- if(h==1) return;
- *(uint64_t*)(p + 1*stride)= v;
- if(h==2) return;
- *(uint64_t*)(p + 2*stride)=
- *(uint64_t*)(p + 3*stride)= v;
- }else if(w==16){
- const uint64_t v= val*0x0100000001ULL;
- *(uint64_t*)(p + 0+0*stride)=
- *(uint64_t*)(p + 8+0*stride)=
- *(uint64_t*)(p + 0+1*stride)=
- *(uint64_t*)(p + 8+1*stride)= v;
- if(h==2) return;
- *(uint64_t*)(p + 0+2*stride)=
- *(uint64_t*)(p + 8+2*stride)=
- *(uint64_t*)(p + 0+3*stride)=
- *(uint64_t*)(p + 8+3*stride)= v;
-#else
- *(uint32_t*)(p + 0+0*stride)=
- *(uint32_t*)(p + 4+0*stride)= val;
- if(h==1) return;
- *(uint32_t*)(p + 0+1*stride)=
- *(uint32_t*)(p + 4+1*stride)= val;
- if(h==2) return;
- *(uint32_t*)(p + 0+2*stride)=
- *(uint32_t*)(p + 4+2*stride)=
- *(uint32_t*)(p + 0+3*stride)=
- *(uint32_t*)(p + 4+3*stride)= val;
- }else if(w==16){
- *(uint32_t*)(p + 0+0*stride)=
- *(uint32_t*)(p + 4+0*stride)=
- *(uint32_t*)(p + 8+0*stride)=
- *(uint32_t*)(p +12+0*stride)=
- *(uint32_t*)(p + 0+1*stride)=
- *(uint32_t*)(p + 4+1*stride)=
- *(uint32_t*)(p + 8+1*stride)=
- *(uint32_t*)(p +12+1*stride)= val;
- if(h==2) return;
- *(uint32_t*)(p + 0+2*stride)=
- *(uint32_t*)(p + 4+2*stride)=
- *(uint32_t*)(p + 8+2*stride)=
- *(uint32_t*)(p +12+2*stride)=
- *(uint32_t*)(p + 0+3*stride)=
- *(uint32_t*)(p + 4+3*stride)=
- *(uint32_t*)(p + 8+3*stride)=
- *(uint32_t*)(p +12+3*stride)= val;
-#endif
- }else
- assert(0);
- assert(h==4);
-}
-
static void fill_caches(H264Context *h, int mb_type, int for_deblock){
MpegEncContext * const s = &h->s;
const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
int topleft_xy, top_xy, topright_xy, left_xy[2];
int topleft_type, top_type, topright_type, left_type[2];
int left_block[8];
+ int topleft_partition= -1;
int i;
+ top_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
+
//FIXME deblocking could skip the intra and nnz parts.
- if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[mb_xy-s->mb_stride]) && !FRAME_MBAFF)
+ if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
return;
- //wow what a mess, why didn't they simplify the interlacing&intra stuff, i can't imagine that these complex rules are worth it
+ /* Wow, what a mess, why didn't they simplify the interlacing & intra
+ * stuff, I can't imagine that these complex rules are worth it. */
- top_xy = mb_xy - s->mb_stride;
topleft_xy = top_xy - 1;
topright_xy= top_xy + 1;
left_xy[1] = left_xy[0] = mb_xy-1;
: (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
) {
topleft_xy -= s->mb_stride;
+ } else if(bottom && curr_mb_frame_flag && !left_mb_frame_flag) {
+ topleft_xy += s->mb_stride;
+ // take topleft mv from the middle of the mb, as opposed to all other modes which use the bottom-right partition
+ topleft_partition = 0;
}
if (bottom
? !curr_mb_frame_flag // bottom macroblock
continue;
if(USES_LIST(topleft_type, list)){
- const int b_xy = h->mb2b_xy[topleft_xy] + 3 + 3*h->b_stride;
- const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + h->b8_stride;
+ const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride);
+ const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride);
*(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
}else{
*(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
*(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
- if(h->slice_type == B_TYPE){
+ if(h->slice_type == FF_B_TYPE){
fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
if(IS_DIRECT(top_type)){
#define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
const int x4 = X4, y4 = Y4;\
const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
- if(!USES_LIST(mb_type,list) && !IS_8X8(mb_type))\
+ if(!USES_LIST(mb_type,list))\
return LIST_NOT_USED;\
mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
h->mv_cache[list][scan8[0]-2][0] = mv[0];\
&& !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
&& i >= scan8[0]+8){
// leftshift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's ok.
- SET_DIAG_MV(>>1, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
+ SET_DIAG_MV(/2, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
}
}
#undef SET_DIAG_MV
Picture * const ref1 = &h->ref_list[1][0];
Picture * const cur = s->current_picture_ptr;
int list, i, j;
- if(cur->pict_type == I_TYPE)
+ if(cur->pict_type == FF_I_TYPE)
cur->ref_count[0] = 0;
- if(cur->pict_type != B_TYPE)
+ if(cur->pict_type != FF_B_TYPE)
cur->ref_count[1] = 0;
for(list=0; list<2; list++){
cur->ref_count[list] = h->ref_count[list];
for(j=0; j<h->ref_count[list]; j++)
cur->ref_poc[list][j] = h->ref_list[list][j].poc;
}
- if(cur->pict_type != B_TYPE || h->direct_spatial_mv_pred)
+ if(cur->pict_type != FF_B_TYPE || h->direct_spatial_mv_pred)
return;
for(list=0; list<2; list++){
for(i=0; i<ref1->ref_count[list]; i++){
}
if(ref[1] < 0){
- *mb_type &= ~MB_TYPE_P0L1;
- sub_mb_type &= ~MB_TYPE_P0L1;
+ if(!is_b8x8)
+ *mb_type &= ~MB_TYPE_L1;
+ sub_mb_type &= ~MB_TYPE_L1;
}else if(ref[0] < 0){
- *mb_type &= ~MB_TYPE_P0L0;
- sub_mb_type &= ~MB_TYPE_P0L0;
+ if(!is_b8x8)
+ *mb_type &= ~MB_TYPE_L0;
+ sub_mb_type &= ~MB_TYPE_L0;
}
- if(IS_16X16(*mb_type)){
+ if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col)){
+ int pair_xy = s->mb_x + (s->mb_y&~1)*s->mb_stride;
+ int mb_types_col[2];
+ int b8_stride = h->b8_stride;
+ int b4_stride = h->b_stride;
+
+ *mb_type = (*mb_type & ~MB_TYPE_16x16) | MB_TYPE_8x8;
+
+ if(IS_INTERLACED(*mb_type)){
+ mb_types_col[0] = h->ref_list[1][0].mb_type[pair_xy];
+ mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
+ if(s->mb_y&1){
+ l1ref0 -= 2*b8_stride;
+ l1ref1 -= 2*b8_stride;
+ l1mv0 -= 4*b4_stride;
+ l1mv1 -= 4*b4_stride;
+ }
+ b8_stride *= 3;
+ b4_stride *= 6;
+ }else{
+ int cur_poc = s->current_picture_ptr->poc;
+ int *col_poc = h->ref_list[1]->field_poc;
+ int col_parity = FFABS(col_poc[0] - cur_poc) >= FFABS(col_poc[1] - cur_poc);
+ int dy = 2*col_parity - (s->mb_y&1);
+ mb_types_col[0] =
+ mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy + col_parity*s->mb_stride];
+ l1ref0 += dy*b8_stride;
+ l1ref1 += dy*b8_stride;
+ l1mv0 += 2*dy*b4_stride;
+ l1mv1 += 2*dy*b4_stride;
+ b8_stride = 0;
+ }
+
+ for(i8=0; i8<4; i8++){
+ int x8 = i8&1;
+ int y8 = i8>>1;
+ int xy8 = x8+y8*b8_stride;
+ int xy4 = 3*x8+y8*b4_stride;
+ int a=0, b=0;
+
+ if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
+ continue;
+ h->sub_mb_type[i8] = sub_mb_type;
+
+ fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
+ fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
+ if(!IS_INTRA(mb_types_col[y8])
+ && ( (l1ref0[xy8] == 0 && FFABS(l1mv0[xy4][0]) <= 1 && FFABS(l1mv0[xy4][1]) <= 1)
+ || (l1ref0[xy8] < 0 && l1ref1[xy8] == 0 && FFABS(l1mv1[xy4][0]) <= 1 && FFABS(l1mv1[xy4][1]) <= 1))){
+ if(ref[0] > 0)
+ a= pack16to32(mv[0][0],mv[0][1]);
+ if(ref[1] > 0)
+ b= pack16to32(mv[1][0],mv[1][1]);
+ }else{
+ a= pack16to32(mv[0][0],mv[0][1]);
+ b= pack16to32(mv[1][0],mv[1][1]);
+ }
+ fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, a, 4);
+ fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, b, 4);
+ }
+ }else if(IS_16X16(*mb_type)){
int a=0, b=0;
fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
}
}
- if(h->slice_type == B_TYPE && h->pps.cabac){
+ if(h->slice_type == FF_B_TYPE && h->pps.cabac){
if(IS_8X8(mb_type)){
uint8_t *direct_table = &h->direct_table[b8_xy];
direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
* @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
* @returns decoded bytes, might be src+1 if no escapes
*/
-static uint8_t *decode_nal(H264Context *h, uint8_t *src, int *dst_length, int *consumed, int length){
+static const uint8_t *decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
int i, si, di;
uint8_t *dst;
int bufidx;
* identifies the exact end of the bitstream
* @return the length of the trailing, or 0 if damaged
*/
-static int decode_rbsp_trailing(H264Context *h, uint8_t *src){
+static int decode_rbsp_trailing(H264Context *h, const uint8_t *src){
int v= *src;
int r;
return h->pps.chroma_qp_table[t][qscale & 0xff];
}
-//FIXME need to check that this does not overflow signed 32 bit for low qp, i am not sure, it's very close
+//FIXME need to check that this does not overflow signed 32 bit for low qp, I am not sure, it's very close
//FIXME check that gcc inlines this (and optimizes intra & separate_dc stuff away)
static inline int quantize_c(DCTELEM *block, uint8_t *scantable, int qscale, int intra, int separate_dc){
int i;
return last_non_zero;
}
-static void pred4x4_vertical_c(uint8_t *src, uint8_t *topright, int stride){
- const uint32_t a= ((uint32_t*)(src-stride))[0];
- ((uint32_t*)(src+0*stride))[0]= a;
- ((uint32_t*)(src+1*stride))[0]= a;
- ((uint32_t*)(src+2*stride))[0]= a;
- ((uint32_t*)(src+3*stride))[0]= a;
-}
-
-static void pred4x4_horizontal_c(uint8_t *src, uint8_t *topright, int stride){
- ((uint32_t*)(src+0*stride))[0]= src[-1+0*stride]*0x01010101;
- ((uint32_t*)(src+1*stride))[0]= src[-1+1*stride]*0x01010101;
- ((uint32_t*)(src+2*stride))[0]= src[-1+2*stride]*0x01010101;
- ((uint32_t*)(src+3*stride))[0]= src[-1+3*stride]*0x01010101;
-}
-
-static void pred4x4_dc_c(uint8_t *src, uint8_t *topright, int stride){
- const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride]
- + src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 4) >>3;
-
- ((uint32_t*)(src+0*stride))[0]=
- ((uint32_t*)(src+1*stride))[0]=
- ((uint32_t*)(src+2*stride))[0]=
- ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
-}
-
-static void pred4x4_left_dc_c(uint8_t *src, uint8_t *topright, int stride){
- const int dc= ( src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 2) >>2;
-
- ((uint32_t*)(src+0*stride))[0]=
- ((uint32_t*)(src+1*stride))[0]=
- ((uint32_t*)(src+2*stride))[0]=
- ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
-}
-
-static void pred4x4_top_dc_c(uint8_t *src, uint8_t *topright, int stride){
- const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride] + 2) >>2;
-
- ((uint32_t*)(src+0*stride))[0]=
- ((uint32_t*)(src+1*stride))[0]=
- ((uint32_t*)(src+2*stride))[0]=
- ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
-}
-
-static void pred4x4_128_dc_c(uint8_t *src, uint8_t *topright, int stride){
- ((uint32_t*)(src+0*stride))[0]=
- ((uint32_t*)(src+1*stride))[0]=
- ((uint32_t*)(src+2*stride))[0]=
- ((uint32_t*)(src+3*stride))[0]= 128U*0x01010101U;
-}
-
-
-#define LOAD_TOP_RIGHT_EDGE\
- const int av_unused t4= topright[0];\
- const int av_unused t5= topright[1];\
- const int av_unused t6= topright[2];\
- const int av_unused t7= topright[3];\
-
-#define LOAD_LEFT_EDGE\
- const int av_unused l0= src[-1+0*stride];\
- const int av_unused l1= src[-1+1*stride];\
- const int av_unused l2= src[-1+2*stride];\
- const int av_unused l3= src[-1+3*stride];\
-
-#define LOAD_TOP_EDGE\
- const int av_unused t0= src[ 0-1*stride];\
- const int av_unused t1= src[ 1-1*stride];\
- const int av_unused t2= src[ 2-1*stride];\
- const int av_unused t3= src[ 3-1*stride];\
-
-static void pred4x4_down_right_c(uint8_t *src, uint8_t *topright, int stride){
- const int lt= src[-1-1*stride];
- LOAD_TOP_EDGE
- LOAD_LEFT_EDGE
-
- src[0+3*stride]=(l3 + 2*l2 + l1 + 2)>>2;
- src[0+2*stride]=
- src[1+3*stride]=(l2 + 2*l1 + l0 + 2)>>2;
- src[0+1*stride]=
- src[1+2*stride]=
- src[2+3*stride]=(l1 + 2*l0 + lt + 2)>>2;
- src[0+0*stride]=
- src[1+1*stride]=
- src[2+2*stride]=
- src[3+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
- src[1+0*stride]=
- src[2+1*stride]=
- src[3+2*stride]=(lt + 2*t0 + t1 + 2)>>2;
- src[2+0*stride]=
- src[3+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
- src[3+0*stride]=(t1 + 2*t2 + t3 + 2)>>2;
-}
-
-static void pred4x4_down_left_c(uint8_t *src, uint8_t *topright, int stride){
- LOAD_TOP_EDGE
- LOAD_TOP_RIGHT_EDGE
-// LOAD_LEFT_EDGE
-
- src[0+0*stride]=(t0 + t2 + 2*t1 + 2)>>2;
- src[1+0*stride]=
- src[0+1*stride]=(t1 + t3 + 2*t2 + 2)>>2;
- src[2+0*stride]=
- src[1+1*stride]=
- src[0+2*stride]=(t2 + t4 + 2*t3 + 2)>>2;
- src[3+0*stride]=
- src[2+1*stride]=
- src[1+2*stride]=
- src[0+3*stride]=(t3 + t5 + 2*t4 + 2)>>2;
- src[3+1*stride]=
- src[2+2*stride]=
- src[1+3*stride]=(t4 + t6 + 2*t5 + 2)>>2;
- src[3+2*stride]=
- src[2+3*stride]=(t5 + t7 + 2*t6 + 2)>>2;
- src[3+3*stride]=(t6 + 3*t7 + 2)>>2;
-}
-
-static void pred4x4_vertical_right_c(uint8_t *src, uint8_t *topright, int stride){
- const int lt= src[-1-1*stride];
- LOAD_TOP_EDGE
- LOAD_LEFT_EDGE
-
- src[0+0*stride]=
- src[1+2*stride]=(lt + t0 + 1)>>1;
- src[1+0*stride]=
- src[2+2*stride]=(t0 + t1 + 1)>>1;
- src[2+0*stride]=
- src[3+2*stride]=(t1 + t2 + 1)>>1;
- src[3+0*stride]=(t2 + t3 + 1)>>1;
- src[0+1*stride]=
- src[1+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
- src[1+1*stride]=
- src[2+3*stride]=(lt + 2*t0 + t1 + 2)>>2;
- src[2+1*stride]=
- src[3+3*stride]=(t0 + 2*t1 + t2 + 2)>>2;
- src[3+1*stride]=(t1 + 2*t2 + t3 + 2)>>2;
- src[0+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
- src[0+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
-}
-
-static void pred4x4_vertical_left_c(uint8_t *src, uint8_t *topright, int stride){
- LOAD_TOP_EDGE
- LOAD_TOP_RIGHT_EDGE
-
- src[0+0*stride]=(t0 + t1 + 1)>>1;
- src[1+0*stride]=
- src[0+2*stride]=(t1 + t2 + 1)>>1;
- src[2+0*stride]=
- src[1+2*stride]=(t2 + t3 + 1)>>1;
- src[3+0*stride]=
- src[2+2*stride]=(t3 + t4+ 1)>>1;
- src[3+2*stride]=(t4 + t5+ 1)>>1;
- src[0+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
- src[1+1*stride]=
- src[0+3*stride]=(t1 + 2*t2 + t3 + 2)>>2;
- src[2+1*stride]=
- src[1+3*stride]=(t2 + 2*t3 + t4 + 2)>>2;
- src[3+1*stride]=
- src[2+3*stride]=(t3 + 2*t4 + t5 + 2)>>2;
- src[3+3*stride]=(t4 + 2*t5 + t6 + 2)>>2;
-}
-
-static void pred4x4_horizontal_up_c(uint8_t *src, uint8_t *topright, int stride){
- LOAD_LEFT_EDGE
-
- src[0+0*stride]=(l0 + l1 + 1)>>1;
- src[1+0*stride]=(l0 + 2*l1 + l2 + 2)>>2;
- src[2+0*stride]=
- src[0+1*stride]=(l1 + l2 + 1)>>1;
- src[3+0*stride]=
- src[1+1*stride]=(l1 + 2*l2 + l3 + 2)>>2;
- src[2+1*stride]=
- src[0+2*stride]=(l2 + l3 + 1)>>1;
- src[3+1*stride]=
- src[1+2*stride]=(l2 + 2*l3 + l3 + 2)>>2;
- src[3+2*stride]=
- src[1+3*stride]=
- src[0+3*stride]=
- src[2+2*stride]=
- src[2+3*stride]=
- src[3+3*stride]=l3;
-}
-
-static void pred4x4_horizontal_down_c(uint8_t *src, uint8_t *topright, int stride){
- const int lt= src[-1-1*stride];
- LOAD_TOP_EDGE
- LOAD_LEFT_EDGE
-
- src[0+0*stride]=
- src[2+1*stride]=(lt + l0 + 1)>>1;
- src[1+0*stride]=
- src[3+1*stride]=(l0 + 2*lt + t0 + 2)>>2;
- src[2+0*stride]=(lt + 2*t0 + t1 + 2)>>2;
- src[3+0*stride]=(t0 + 2*t1 + t2 + 2)>>2;
- src[0+1*stride]=
- src[2+2*stride]=(l0 + l1 + 1)>>1;
- src[1+1*stride]=
- src[3+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
- src[0+2*stride]=
- src[2+3*stride]=(l1 + l2+ 1)>>1;
- src[1+2*stride]=
- src[3+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
- src[0+3*stride]=(l2 + l3 + 1)>>1;
- src[1+3*stride]=(l1 + 2*l2 + l3 + 2)>>2;
-}
-
-void ff_pred16x16_vertical_c(uint8_t *src, int stride){
- int i;
- const uint32_t a= ((uint32_t*)(src-stride))[0];
- const uint32_t b= ((uint32_t*)(src-stride))[1];
- const uint32_t c= ((uint32_t*)(src-stride))[2];
- const uint32_t d= ((uint32_t*)(src-stride))[3];
-
- for(i=0; i<16; i++){
- ((uint32_t*)(src+i*stride))[0]= a;
- ((uint32_t*)(src+i*stride))[1]= b;
- ((uint32_t*)(src+i*stride))[2]= c;
- ((uint32_t*)(src+i*stride))[3]= d;
- }
-}
-
-void ff_pred16x16_horizontal_c(uint8_t *src, int stride){
- int i;
-
- for(i=0; i<16; i++){
- ((uint32_t*)(src+i*stride))[0]=
- ((uint32_t*)(src+i*stride))[1]=
- ((uint32_t*)(src+i*stride))[2]=
- ((uint32_t*)(src+i*stride))[3]= src[-1+i*stride]*0x01010101;
- }
-}
-
-void ff_pred16x16_dc_c(uint8_t *src, int stride){
- int i, dc=0;
-
- for(i=0;i<16; i++){
- dc+= src[-1+i*stride];
- }
-
- for(i=0;i<16; i++){
- dc+= src[i-stride];
- }
-
- dc= 0x01010101*((dc + 16)>>5);
-
- for(i=0; i<16; i++){
- ((uint32_t*)(src+i*stride))[0]=
- ((uint32_t*)(src+i*stride))[1]=
- ((uint32_t*)(src+i*stride))[2]=
- ((uint32_t*)(src+i*stride))[3]= dc;
- }
-}
-
-void ff_pred16x16_left_dc_c(uint8_t *src, int stride){
- int i, dc=0;
-
- for(i=0;i<16; i++){
- dc+= src[-1+i*stride];
- }
-
- dc= 0x01010101*((dc + 8)>>4);
-
- for(i=0; i<16; i++){
- ((uint32_t*)(src+i*stride))[0]=
- ((uint32_t*)(src+i*stride))[1]=
- ((uint32_t*)(src+i*stride))[2]=
- ((uint32_t*)(src+i*stride))[3]= dc;
- }
-}
-
-void ff_pred16x16_top_dc_c(uint8_t *src, int stride){
- int i, dc=0;
-
- for(i=0;i<16; i++){
- dc+= src[i-stride];
- }
- dc= 0x01010101*((dc + 8)>>4);
-
- for(i=0; i<16; i++){
- ((uint32_t*)(src+i*stride))[0]=
- ((uint32_t*)(src+i*stride))[1]=
- ((uint32_t*)(src+i*stride))[2]=
- ((uint32_t*)(src+i*stride))[3]= dc;
- }
-}
-
-void ff_pred16x16_128_dc_c(uint8_t *src, int stride){
- int i;
-
- for(i=0; i<16; i++){
- ((uint32_t*)(src+i*stride))[0]=
- ((uint32_t*)(src+i*stride))[1]=
- ((uint32_t*)(src+i*stride))[2]=
- ((uint32_t*)(src+i*stride))[3]= 0x01010101U*128U;
- }
-}
-
-static inline void pred16x16_plane_compat_c(uint8_t *src, int stride, const int svq3){
- int i, j, k;
- int a;
- uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
- const uint8_t * const src0 = src+7-stride;
- const uint8_t *src1 = src+8*stride-1;
- const uint8_t *src2 = src1-2*stride; // == src+6*stride-1;
- int H = src0[1] - src0[-1];
- int V = src1[0] - src2[ 0];
- for(k=2; k<=8; ++k) {
- src1 += stride; src2 -= stride;
- H += k*(src0[k] - src0[-k]);
- V += k*(src1[0] - src2[ 0]);
- }
- if(svq3){
- H = ( 5*(H/4) ) / 16;
- V = ( 5*(V/4) ) / 16;
-
- /* required for 100% accuracy */
- i = H; H = V; V = i;
- }else{
- H = ( 5*H+32 ) >> 6;
- V = ( 5*V+32 ) >> 6;
- }
-
- a = 16*(src1[0] + src2[16] + 1) - 7*(V+H);
- for(j=16; j>0; --j) {
- int b = a;
- a += V;
- for(i=-16; i<0; i+=4) {
- src[16+i] = cm[ (b ) >> 5 ];
- src[17+i] = cm[ (b+ H) >> 5 ];
- src[18+i] = cm[ (b+2*H) >> 5 ];
- src[19+i] = cm[ (b+3*H) >> 5 ];
- b += 4*H;
- }
- src += stride;
- }
-}
-
-void ff_pred16x16_plane_c(uint8_t *src, int stride){
- pred16x16_plane_compat_c(src, stride, 0);
-}
-
-void ff_pred8x8_vertical_c(uint8_t *src, int stride){
- int i;
- const uint32_t a= ((uint32_t*)(src-stride))[0];
- const uint32_t b= ((uint32_t*)(src-stride))[1];
-
- for(i=0; i<8; i++){
- ((uint32_t*)(src+i*stride))[0]= a;
- ((uint32_t*)(src+i*stride))[1]= b;
- }
-}
-
-void ff_pred8x8_horizontal_c(uint8_t *src, int stride){
- int i;
-
- for(i=0; i<8; i++){
- ((uint32_t*)(src+i*stride))[0]=
- ((uint32_t*)(src+i*stride))[1]= src[-1+i*stride]*0x01010101;
- }
-}
-
-void ff_pred8x8_128_dc_c(uint8_t *src, int stride){
- int i;
-
- for(i=0; i<8; i++){
- ((uint32_t*)(src+i*stride))[0]=
- ((uint32_t*)(src+i*stride))[1]= 0x01010101U*128U;
- }
-}
-
-void ff_pred8x8_left_dc_c(uint8_t *src, int stride){
- int i;
- int dc0, dc2;
-
- dc0=dc2=0;
- for(i=0;i<4; i++){
- dc0+= src[-1+i*stride];
- dc2+= src[-1+(i+4)*stride];
- }
- dc0= 0x01010101*((dc0 + 2)>>2);
- dc2= 0x01010101*((dc2 + 2)>>2);
-
- for(i=0; i<4; i++){
- ((uint32_t*)(src+i*stride))[0]=
- ((uint32_t*)(src+i*stride))[1]= dc0;
- }
- for(i=4; i<8; i++){
- ((uint32_t*)(src+i*stride))[0]=
- ((uint32_t*)(src+i*stride))[1]= dc2;
- }
-}
-
-void ff_pred8x8_top_dc_c(uint8_t *src, int stride){
- int i;
- int dc0, dc1;
-
- dc0=dc1=0;
- for(i=0;i<4; i++){
- dc0+= src[i-stride];
- dc1+= src[4+i-stride];
- }
- dc0= 0x01010101*((dc0 + 2)>>2);
- dc1= 0x01010101*((dc1 + 2)>>2);
-
- for(i=0; i<4; i++){
- ((uint32_t*)(src+i*stride))[0]= dc0;
- ((uint32_t*)(src+i*stride))[1]= dc1;
- }
- for(i=4; i<8; i++){
- ((uint32_t*)(src+i*stride))[0]= dc0;
- ((uint32_t*)(src+i*stride))[1]= dc1;
- }
-}
-
-
-void ff_pred8x8_dc_c(uint8_t *src, int stride){
- int i;
- int dc0, dc1, dc2, dc3;
-
- dc0=dc1=dc2=0;
- for(i=0;i<4; i++){
- dc0+= src[-1+i*stride] + src[i-stride];
- dc1+= src[4+i-stride];
- dc2+= src[-1+(i+4)*stride];
- }
- dc3= 0x01010101*((dc1 + dc2 + 4)>>3);
- dc0= 0x01010101*((dc0 + 4)>>3);
- dc1= 0x01010101*((dc1 + 2)>>2);
- dc2= 0x01010101*((dc2 + 2)>>2);
-
- for(i=0; i<4; i++){
- ((uint32_t*)(src+i*stride))[0]= dc0;
- ((uint32_t*)(src+i*stride))[1]= dc1;
- }
- for(i=4; i<8; i++){
- ((uint32_t*)(src+i*stride))[0]= dc2;
- ((uint32_t*)(src+i*stride))[1]= dc3;
- }
-}
-
-void ff_pred8x8_plane_c(uint8_t *src, int stride){
- int j, k;
- int a;
- uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
- const uint8_t * const src0 = src+3-stride;
- const uint8_t *src1 = src+4*stride-1;
- const uint8_t *src2 = src1-2*stride; // == src+2*stride-1;
- int H = src0[1] - src0[-1];
- int V = src1[0] - src2[ 0];
- for(k=2; k<=4; ++k) {
- src1 += stride; src2 -= stride;
- H += k*(src0[k] - src0[-k]);
- V += k*(src1[0] - src2[ 0]);
- }
- H = ( 17*H+16 ) >> 5;
- V = ( 17*V+16 ) >> 5;
-
- a = 16*(src1[0] + src2[8]+1) - 3*(V+H);
- for(j=8; j>0; --j) {
- int b = a;
- a += V;
- src[0] = cm[ (b ) >> 5 ];
- src[1] = cm[ (b+ H) >> 5 ];
- src[2] = cm[ (b+2*H) >> 5 ];
- src[3] = cm[ (b+3*H) >> 5 ];
- src[4] = cm[ (b+4*H) >> 5 ];
- src[5] = cm[ (b+5*H) >> 5 ];
- src[6] = cm[ (b+6*H) >> 5 ];
- src[7] = cm[ (b+7*H) >> 5 ];
- src += stride;
- }
-}
-
-#define SRC(x,y) src[(x)+(y)*stride]
-#define PL(y) \
- const int l##y = (SRC(-1,y-1) + 2*SRC(-1,y) + SRC(-1,y+1) + 2) >> 2;
-#define PREDICT_8x8_LOAD_LEFT \
- const int l0 = ((has_topleft ? SRC(-1,-1) : SRC(-1,0)) \
- + 2*SRC(-1,0) + SRC(-1,1) + 2) >> 2; \
- PL(1) PL(2) PL(3) PL(4) PL(5) PL(6) \
- const int l7 av_unused = (SRC(-1,6) + 3*SRC(-1,7) + 2) >> 2
-
-#define PT(x) \
- const int t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
-#define PREDICT_8x8_LOAD_TOP \
- const int t0 = ((has_topleft ? SRC(-1,-1) : SRC(0,-1)) \
- + 2*SRC(0,-1) + SRC(1,-1) + 2) >> 2; \
- PT(1) PT(2) PT(3) PT(4) PT(5) PT(6) \
- const int t7 av_unused = ((has_topright ? SRC(8,-1) : SRC(7,-1)) \
- + 2*SRC(7,-1) + SRC(6,-1) + 2) >> 2
-
-#define PTR(x) \
- t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
-#define PREDICT_8x8_LOAD_TOPRIGHT \
- int t8, t9, t10, t11, t12, t13, t14, t15; \
- if(has_topright) { \
- PTR(8) PTR(9) PTR(10) PTR(11) PTR(12) PTR(13) PTR(14) \
- t15 = (SRC(14,-1) + 3*SRC(15,-1) + 2) >> 2; \
- } else t8=t9=t10=t11=t12=t13=t14=t15= SRC(7,-1);
-
-#define PREDICT_8x8_LOAD_TOPLEFT \
- const int lt = (SRC(-1,0) + 2*SRC(-1,-1) + SRC(0,-1) + 2) >> 2
-
-#define PREDICT_8x8_DC(v) \
- int y; \
- for( y = 0; y < 8; y++ ) { \
- ((uint32_t*)src)[0] = \
- ((uint32_t*)src)[1] = v; \
- src += stride; \
- }
-
-static void pred8x8l_128_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
-{
- PREDICT_8x8_DC(0x80808080);
-}
-static void pred8x8l_left_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
-{
- PREDICT_8x8_LOAD_LEFT;
- const uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7+4) >> 3) * 0x01010101;
- PREDICT_8x8_DC(dc);
-}
-static void pred8x8l_top_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
-{
- PREDICT_8x8_LOAD_TOP;
- const uint32_t dc = ((t0+t1+t2+t3+t4+t5+t6+t7+4) >> 3) * 0x01010101;
- PREDICT_8x8_DC(dc);
-}
-static void pred8x8l_dc_c(uint8_t *src, int has_topleft, int has_topright, int stride)
-{
- PREDICT_8x8_LOAD_LEFT;
- PREDICT_8x8_LOAD_TOP;
- const uint32_t dc = ((l0+l1+l2+l3+l4+l5+l6+l7
- +t0+t1+t2+t3+t4+t5+t6+t7+8) >> 4) * 0x01010101;
- PREDICT_8x8_DC(dc);
-}
-static void pred8x8l_horizontal_c(uint8_t *src, int has_topleft, int has_topright, int stride)
-{
- PREDICT_8x8_LOAD_LEFT;
-#define ROW(y) ((uint32_t*)(src+y*stride))[0] =\
- ((uint32_t*)(src+y*stride))[1] = 0x01010101 * l##y
- ROW(0); ROW(1); ROW(2); ROW(3); ROW(4); ROW(5); ROW(6); ROW(7);
-#undef ROW
-}
-static void pred8x8l_vertical_c(uint8_t *src, int has_topleft, int has_topright, int stride)
-{
- int y;
- PREDICT_8x8_LOAD_TOP;
- src[0] = t0;
- src[1] = t1;
- src[2] = t2;
- src[3] = t3;
- src[4] = t4;
- src[5] = t5;
- src[6] = t6;
- src[7] = t7;
- for( y = 1; y < 8; y++ )
- *(uint64_t*)(src+y*stride) = *(uint64_t*)src;
-}
-static void pred8x8l_down_left_c(uint8_t *src, int has_topleft, int has_topright, int stride)
-{
- PREDICT_8x8_LOAD_TOP;
- PREDICT_8x8_LOAD_TOPRIGHT;
- SRC(0,0)= (t0 + 2*t1 + t2 + 2) >> 2;
- SRC(0,1)=SRC(1,0)= (t1 + 2*t2 + t3 + 2) >> 2;
- SRC(0,2)=SRC(1,1)=SRC(2,0)= (t2 + 2*t3 + t4 + 2) >> 2;
- SRC(0,3)=SRC(1,2)=SRC(2,1)=SRC(3,0)= (t3 + 2*t4 + t5 + 2) >> 2;
- SRC(0,4)=SRC(1,3)=SRC(2,2)=SRC(3,1)=SRC(4,0)= (t4 + 2*t5 + t6 + 2) >> 2;
- SRC(0,5)=SRC(1,4)=SRC(2,3)=SRC(3,2)=SRC(4,1)=SRC(5,0)= (t5 + 2*t6 + t7 + 2) >> 2;
- SRC(0,6)=SRC(1,5)=SRC(2,4)=SRC(3,3)=SRC(4,2)=SRC(5,1)=SRC(6,0)= (t6 + 2*t7 + t8 + 2) >> 2;
- SRC(0,7)=SRC(1,6)=SRC(2,5)=SRC(3,4)=SRC(4,3)=SRC(5,2)=SRC(6,1)=SRC(7,0)= (t7 + 2*t8 + t9 + 2) >> 2;
- SRC(1,7)=SRC(2,6)=SRC(3,5)=SRC(4,4)=SRC(5,3)=SRC(6,2)=SRC(7,1)= (t8 + 2*t9 + t10 + 2) >> 2;
- SRC(2,7)=SRC(3,6)=SRC(4,5)=SRC(5,4)=SRC(6,3)=SRC(7,2)= (t9 + 2*t10 + t11 + 2) >> 2;
- SRC(3,7)=SRC(4,6)=SRC(5,5)=SRC(6,4)=SRC(7,3)= (t10 + 2*t11 + t12 + 2) >> 2;
- SRC(4,7)=SRC(5,6)=SRC(6,5)=SRC(7,4)= (t11 + 2*t12 + t13 + 2) >> 2;
- SRC(5,7)=SRC(6,6)=SRC(7,5)= (t12 + 2*t13 + t14 + 2) >> 2;
- SRC(6,7)=SRC(7,6)= (t13 + 2*t14 + t15 + 2) >> 2;
- SRC(7,7)= (t14 + 3*t15 + 2) >> 2;
-}
-static void pred8x8l_down_right_c(uint8_t *src, int has_topleft, int has_topright, int stride)
-{
- PREDICT_8x8_LOAD_TOP;
- PREDICT_8x8_LOAD_LEFT;
- PREDICT_8x8_LOAD_TOPLEFT;
- SRC(0,7)= (l7 + 2*l6 + l5 + 2) >> 2;
- SRC(0,6)=SRC(1,7)= (l6 + 2*l5 + l4 + 2) >> 2;
- SRC(0,5)=SRC(1,6)=SRC(2,7)= (l5 + 2*l4 + l3 + 2) >> 2;
- SRC(0,4)=SRC(1,5)=SRC(2,6)=SRC(3,7)= (l4 + 2*l3 + l2 + 2) >> 2;
- SRC(0,3)=SRC(1,4)=SRC(2,5)=SRC(3,6)=SRC(4,7)= (l3 + 2*l2 + l1 + 2) >> 2;
- SRC(0,2)=SRC(1,3)=SRC(2,4)=SRC(3,5)=SRC(4,6)=SRC(5,7)= (l2 + 2*l1 + l0 + 2) >> 2;
- SRC(0,1)=SRC(1,2)=SRC(2,3)=SRC(3,4)=SRC(4,5)=SRC(5,6)=SRC(6,7)= (l1 + 2*l0 + lt + 2) >> 2;
- SRC(0,0)=SRC(1,1)=SRC(2,2)=SRC(3,3)=SRC(4,4)=SRC(5,5)=SRC(6,6)=SRC(7,7)= (l0 + 2*lt + t0 + 2) >> 2;
- SRC(1,0)=SRC(2,1)=SRC(3,2)=SRC(4,3)=SRC(5,4)=SRC(6,5)=SRC(7,6)= (lt + 2*t0 + t1 + 2) >> 2;
- SRC(2,0)=SRC(3,1)=SRC(4,2)=SRC(5,3)=SRC(6,4)=SRC(7,5)= (t0 + 2*t1 + t2 + 2) >> 2;
- SRC(3,0)=SRC(4,1)=SRC(5,2)=SRC(6,3)=SRC(7,4)= (t1 + 2*t2 + t3 + 2) >> 2;
- SRC(4,0)=SRC(5,1)=SRC(6,2)=SRC(7,3)= (t2 + 2*t3 + t4 + 2) >> 2;
- SRC(5,0)=SRC(6,1)=SRC(7,2)= (t3 + 2*t4 + t5 + 2) >> 2;
- SRC(6,0)=SRC(7,1)= (t4 + 2*t5 + t6 + 2) >> 2;
- SRC(7,0)= (t5 + 2*t6 + t7 + 2) >> 2;
-
-}
-static void pred8x8l_vertical_right_c(uint8_t *src, int has_topleft, int has_topright, int stride)
-{
- PREDICT_8x8_LOAD_TOP;
- PREDICT_8x8_LOAD_LEFT;
- PREDICT_8x8_LOAD_TOPLEFT;
- SRC(0,6)= (l5 + 2*l4 + l3 + 2) >> 2;
- SRC(0,7)= (l6 + 2*l5 + l4 + 2) >> 2;
- SRC(0,4)=SRC(1,6)= (l3 + 2*l2 + l1 + 2) >> 2;
- SRC(0,5)=SRC(1,7)= (l4 + 2*l3 + l2 + 2) >> 2;
- SRC(0,2)=SRC(1,4)=SRC(2,6)= (l1 + 2*l0 + lt + 2) >> 2;
- SRC(0,3)=SRC(1,5)=SRC(2,7)= (l2 + 2*l1 + l0 + 2) >> 2;
- SRC(0,1)=SRC(1,3)=SRC(2,5)=SRC(3,7)= (l0 + 2*lt + t0 + 2) >> 2;
- SRC(0,0)=SRC(1,2)=SRC(2,4)=SRC(3,6)= (lt + t0 + 1) >> 1;
- SRC(1,1)=SRC(2,3)=SRC(3,5)=SRC(4,7)= (lt + 2*t0 + t1 + 2) >> 2;
- SRC(1,0)=SRC(2,2)=SRC(3,4)=SRC(4,6)= (t0 + t1 + 1) >> 1;
- SRC(2,1)=SRC(3,3)=SRC(4,5)=SRC(5,7)= (t0 + 2*t1 + t2 + 2) >> 2;
- SRC(2,0)=SRC(3,2)=SRC(4,4)=SRC(5,6)= (t1 + t2 + 1) >> 1;
- SRC(3,1)=SRC(4,3)=SRC(5,5)=SRC(6,7)= (t1 + 2*t2 + t3 + 2) >> 2;
- SRC(3,0)=SRC(4,2)=SRC(5,4)=SRC(6,6)= (t2 + t3 + 1) >> 1;
- SRC(4,1)=SRC(5,3)=SRC(6,5)=SRC(7,7)= (t2 + 2*t3 + t4 + 2) >> 2;
- SRC(4,0)=SRC(5,2)=SRC(6,4)=SRC(7,6)= (t3 + t4 + 1) >> 1;
- SRC(5,1)=SRC(6,3)=SRC(7,5)= (t3 + 2*t4 + t5 + 2) >> 2;
- SRC(5,0)=SRC(6,2)=SRC(7,4)= (t4 + t5 + 1) >> 1;
- SRC(6,1)=SRC(7,3)= (t4 + 2*t5 + t6 + 2) >> 2;
- SRC(6,0)=SRC(7,2)= (t5 + t6 + 1) >> 1;
- SRC(7,1)= (t5 + 2*t6 + t7 + 2) >> 2;
- SRC(7,0)= (t6 + t7 + 1) >> 1;
-}
-static void pred8x8l_horizontal_down_c(uint8_t *src, int has_topleft, int has_topright, int stride)
-{
- PREDICT_8x8_LOAD_TOP;
- PREDICT_8x8_LOAD_LEFT;
- PREDICT_8x8_LOAD_TOPLEFT;
- SRC(0,7)= (l6 + l7 + 1) >> 1;
- SRC(1,7)= (l5 + 2*l6 + l7 + 2) >> 2;
- SRC(0,6)=SRC(2,7)= (l5 + l6 + 1) >> 1;
- SRC(1,6)=SRC(3,7)= (l4 + 2*l5 + l6 + 2) >> 2;
- SRC(0,5)=SRC(2,6)=SRC(4,7)= (l4 + l5 + 1) >> 1;
- SRC(1,5)=SRC(3,6)=SRC(5,7)= (l3 + 2*l4 + l5 + 2) >> 2;
- SRC(0,4)=SRC(2,5)=SRC(4,6)=SRC(6,7)= (l3 + l4 + 1) >> 1;
- SRC(1,4)=SRC(3,5)=SRC(5,6)=SRC(7,7)= (l2 + 2*l3 + l4 + 2) >> 2;
- SRC(0,3)=SRC(2,4)=SRC(4,5)=SRC(6,6)= (l2 + l3 + 1) >> 1;
- SRC(1,3)=SRC(3,4)=SRC(5,5)=SRC(7,6)= (l1 + 2*l2 + l3 + 2) >> 2;
- SRC(0,2)=SRC(2,3)=SRC(4,4)=SRC(6,5)= (l1 + l2 + 1) >> 1;
- SRC(1,2)=SRC(3,3)=SRC(5,4)=SRC(7,5)= (l0 + 2*l1 + l2 + 2) >> 2;
- SRC(0,1)=SRC(2,2)=SRC(4,3)=SRC(6,4)= (l0 + l1 + 1) >> 1;
- SRC(1,1)=SRC(3,2)=SRC(5,3)=SRC(7,4)= (lt + 2*l0 + l1 + 2) >> 2;
- SRC(0,0)=SRC(2,1)=SRC(4,2)=SRC(6,3)= (lt + l0 + 1) >> 1;
- SRC(1,0)=SRC(3,1)=SRC(5,2)=SRC(7,3)= (l0 + 2*lt + t0 + 2) >> 2;
- SRC(2,0)=SRC(4,1)=SRC(6,2)= (t1 + 2*t0 + lt + 2) >> 2;
- SRC(3,0)=SRC(5,1)=SRC(7,2)= (t2 + 2*t1 + t0 + 2) >> 2;
- SRC(4,0)=SRC(6,1)= (t3 + 2*t2 + t1 + 2) >> 2;
- SRC(5,0)=SRC(7,1)= (t4 + 2*t3 + t2 + 2) >> 2;
- SRC(6,0)= (t5 + 2*t4 + t3 + 2) >> 2;
- SRC(7,0)= (t6 + 2*t5 + t4 + 2) >> 2;
-}
-static void pred8x8l_vertical_left_c(uint8_t *src, int has_topleft, int has_topright, int stride)
-{
- PREDICT_8x8_LOAD_TOP;
- PREDICT_8x8_LOAD_TOPRIGHT;
- SRC(0,0)= (t0 + t1 + 1) >> 1;
- SRC(0,1)= (t0 + 2*t1 + t2 + 2) >> 2;
- SRC(0,2)=SRC(1,0)= (t1 + t2 + 1) >> 1;
- SRC(0,3)=SRC(1,1)= (t1 + 2*t2 + t3 + 2) >> 2;
- SRC(0,4)=SRC(1,2)=SRC(2,0)= (t2 + t3 + 1) >> 1;
- SRC(0,5)=SRC(1,3)=SRC(2,1)= (t2 + 2*t3 + t4 + 2) >> 2;
- SRC(0,6)=SRC(1,4)=SRC(2,2)=SRC(3,0)= (t3 + t4 + 1) >> 1;
- SRC(0,7)=SRC(1,5)=SRC(2,3)=SRC(3,1)= (t3 + 2*t4 + t5 + 2) >> 2;
- SRC(1,6)=SRC(2,4)=SRC(3,2)=SRC(4,0)= (t4 + t5 + 1) >> 1;
- SRC(1,7)=SRC(2,5)=SRC(3,3)=SRC(4,1)= (t4 + 2*t5 + t6 + 2) >> 2;
- SRC(2,6)=SRC(3,4)=SRC(4,2)=SRC(5,0)= (t5 + t6 + 1) >> 1;
- SRC(2,7)=SRC(3,5)=SRC(4,3)=SRC(5,1)= (t5 + 2*t6 + t7 + 2) >> 2;
- SRC(3,6)=SRC(4,4)=SRC(5,2)=SRC(6,0)= (t6 + t7 + 1) >> 1;
- SRC(3,7)=SRC(4,5)=SRC(5,3)=SRC(6,1)= (t6 + 2*t7 + t8 + 2) >> 2;
- SRC(4,6)=SRC(5,4)=SRC(6,2)=SRC(7,0)= (t7 + t8 + 1) >> 1;
- SRC(4,7)=SRC(5,5)=SRC(6,3)=SRC(7,1)= (t7 + 2*t8 + t9 + 2) >> 2;
- SRC(5,6)=SRC(6,4)=SRC(7,2)= (t8 + t9 + 1) >> 1;
- SRC(5,7)=SRC(6,5)=SRC(7,3)= (t8 + 2*t9 + t10 + 2) >> 2;
- SRC(6,6)=SRC(7,4)= (t9 + t10 + 1) >> 1;
- SRC(6,7)=SRC(7,5)= (t9 + 2*t10 + t11 + 2) >> 2;
- SRC(7,6)= (t10 + t11 + 1) >> 1;
- SRC(7,7)= (t10 + 2*t11 + t12 + 2) >> 2;
-}
-static void pred8x8l_horizontal_up_c(uint8_t *src, int has_topleft, int has_topright, int stride)
-{
- PREDICT_8x8_LOAD_LEFT;
- SRC(0,0)= (l0 + l1 + 1) >> 1;
- SRC(1,0)= (l0 + 2*l1 + l2 + 2) >> 2;
- SRC(0,1)=SRC(2,0)= (l1 + l2 + 1) >> 1;
- SRC(1,1)=SRC(3,0)= (l1 + 2*l2 + l3 + 2) >> 2;
- SRC(0,2)=SRC(2,1)=SRC(4,0)= (l2 + l3 + 1) >> 1;
- SRC(1,2)=SRC(3,1)=SRC(5,0)= (l2 + 2*l3 + l4 + 2) >> 2;
- SRC(0,3)=SRC(2,2)=SRC(4,1)=SRC(6,0)= (l3 + l4 + 1) >> 1;
- SRC(1,3)=SRC(3,2)=SRC(5,1)=SRC(7,0)= (l3 + 2*l4 + l5 + 2) >> 2;
- SRC(0,4)=SRC(2,3)=SRC(4,2)=SRC(6,1)= (l4 + l5 + 1) >> 1;
- SRC(1,4)=SRC(3,3)=SRC(5,2)=SRC(7,1)= (l4 + 2*l5 + l6 + 2) >> 2;
- SRC(0,5)=SRC(2,4)=SRC(4,3)=SRC(6,2)= (l5 + l6 + 1) >> 1;
- SRC(1,5)=SRC(3,4)=SRC(5,3)=SRC(7,2)= (l5 + 2*l6 + l7 + 2) >> 2;
- SRC(0,6)=SRC(2,5)=SRC(4,4)=SRC(6,3)= (l6 + l7 + 1) >> 1;
- SRC(1,6)=SRC(3,5)=SRC(5,4)=SRC(7,3)= (l6 + 3*l7 + 2) >> 2;
- SRC(0,7)=SRC(1,7)=SRC(2,6)=SRC(2,7)=SRC(3,6)=
- SRC(3,7)=SRC(4,5)=SRC(4,6)=SRC(4,7)=SRC(5,5)=
- SRC(5,6)=SRC(5,7)=SRC(6,4)=SRC(6,5)=SRC(6,6)=
- SRC(6,7)=SRC(7,4)=SRC(7,5)=SRC(7,6)=SRC(7,7)= l7;
-}
-#undef PREDICT_8x8_LOAD_LEFT
-#undef PREDICT_8x8_LOAD_TOP
-#undef PREDICT_8x8_LOAD_TOPLEFT
-#undef PREDICT_8x8_LOAD_TOPRIGHT
-#undef PREDICT_8x8_DC
-#undef PTR
-#undef PT
-#undef PL
-#undef SRC
-
static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square, int chroma_height, int delta, int list,
uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
int src_x_offset, int src_y_offset,
const int full_mx= mx>>2;
const int full_my= my>>2;
const int pic_width = 16*s->mb_width;
- const int pic_height = 16*s->mb_height >> MB_MBAFF;
+ const int pic_height = 16*s->mb_height >> MB_FIELD;
if(!pic->data[0]) //FIXME this is unacceptable, some senseable error concealment must be done for missing reference frames
return;
qpix_op[luma_xy](dest_y + delta, src_y + delta, h->mb_linesize);
}
- if(s->flags&CODEC_FLAG_GRAY) return;
+ if(ENABLE_GRAY && s->flags&CODEC_FLAG_GRAY) return;
- if(MB_MBAFF){
+ if(MB_FIELD){
// chroma offset when predicting from a field of opposite parity
- my += 2 * ((s->mb_y & 1) - (h->ref_cache[list][scan8[n]] & 1));
+ my += 2 * ((s->mb_y & 1) - (pic->reference - 1));
emu |= (my>>3) < 0 || (my>>3) + 8 >= (pic_height>>1);
}
src_cb= pic->data[1] + (mx>>3) + (my>>3)*h->mb_uvlinesize;
dest_cb += x_offset + y_offset*h->mb_uvlinesize;
dest_cr += x_offset + y_offset*h->mb_uvlinesize;
x_offset += 8*s->mb_x;
- y_offset += 8*(s->mb_y >> MB_MBAFF);
+ y_offset += 8*(s->mb_y >> MB_FIELD);
if(list0){
Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
dest_cb += x_offset + y_offset*h->mb_uvlinesize;
dest_cr += x_offset + y_offset*h->mb_uvlinesize;
x_offset += 8*s->mb_x;
- y_offset += 8*(s->mb_y >> MB_MBAFF);
+ y_offset += 8*(s->mb_y >> MB_FIELD);
if(list0 && list1){
/* don't optimize for luma-only case, since B-frames usually
}
}
-/**
- * Sets the intra prediction function pointers.
- */
-static void init_pred_ptrs(H264Context *h){
-// MpegEncContext * const s = &h->s;
-
- h->pred4x4[VERT_PRED ]= pred4x4_vertical_c;
- h->pred4x4[HOR_PRED ]= pred4x4_horizontal_c;
- h->pred4x4[DC_PRED ]= pred4x4_dc_c;
- h->pred4x4[DIAG_DOWN_LEFT_PRED ]= pred4x4_down_left_c;
- h->pred4x4[DIAG_DOWN_RIGHT_PRED]= pred4x4_down_right_c;
- h->pred4x4[VERT_RIGHT_PRED ]= pred4x4_vertical_right_c;
- h->pred4x4[HOR_DOWN_PRED ]= pred4x4_horizontal_down_c;
- h->pred4x4[VERT_LEFT_PRED ]= pred4x4_vertical_left_c;
- h->pred4x4[HOR_UP_PRED ]= pred4x4_horizontal_up_c;
- h->pred4x4[LEFT_DC_PRED ]= pred4x4_left_dc_c;
- h->pred4x4[TOP_DC_PRED ]= pred4x4_top_dc_c;
- h->pred4x4[DC_128_PRED ]= pred4x4_128_dc_c;
-
- h->pred8x8l[VERT_PRED ]= pred8x8l_vertical_c;
- h->pred8x8l[HOR_PRED ]= pred8x8l_horizontal_c;
- h->pred8x8l[DC_PRED ]= pred8x8l_dc_c;
- h->pred8x8l[DIAG_DOWN_LEFT_PRED ]= pred8x8l_down_left_c;
- h->pred8x8l[DIAG_DOWN_RIGHT_PRED]= pred8x8l_down_right_c;
- h->pred8x8l[VERT_RIGHT_PRED ]= pred8x8l_vertical_right_c;
- h->pred8x8l[HOR_DOWN_PRED ]= pred8x8l_horizontal_down_c;
- h->pred8x8l[VERT_LEFT_PRED ]= pred8x8l_vertical_left_c;
- h->pred8x8l[HOR_UP_PRED ]= pred8x8l_horizontal_up_c;
- h->pred8x8l[LEFT_DC_PRED ]= pred8x8l_left_dc_c;
- h->pred8x8l[TOP_DC_PRED ]= pred8x8l_top_dc_c;
- h->pred8x8l[DC_128_PRED ]= pred8x8l_128_dc_c;
-
- h->pred8x8[DC_PRED8x8 ]= ff_pred8x8_dc_c;
- h->pred8x8[VERT_PRED8x8 ]= ff_pred8x8_vertical_c;
- h->pred8x8[HOR_PRED8x8 ]= ff_pred8x8_horizontal_c;
- h->pred8x8[PLANE_PRED8x8 ]= ff_pred8x8_plane_c;
- h->pred8x8[LEFT_DC_PRED8x8]= ff_pred8x8_left_dc_c;
- h->pred8x8[TOP_DC_PRED8x8 ]= ff_pred8x8_top_dc_c;
- h->pred8x8[DC_128_PRED8x8 ]= ff_pred8x8_128_dc_c;
-
- h->pred16x16[DC_PRED8x8 ]= ff_pred16x16_dc_c;
- h->pred16x16[VERT_PRED8x8 ]= ff_pred16x16_vertical_c;
- h->pred16x16[HOR_PRED8x8 ]= ff_pred16x16_horizontal_c;
- h->pred16x16[PLANE_PRED8x8 ]= ff_pred16x16_plane_c;
- h->pred16x16[LEFT_DC_PRED8x8]= ff_pred16x16_left_dc_c;
- h->pred16x16[TOP_DC_PRED8x8 ]= ff_pred16x16_top_dc_c;
- h->pred16x16[DC_128_PRED8x8 ]= ff_pred16x16_128_dc_c;
-}
-
static void free_tables(H264Context *h){
int i;
+ H264Context *hx;
av_freep(&h->intra4x4_pred_mode);
av_freep(&h->chroma_pred_mode_table);
av_freep(&h->cbp_table);
av_freep(&h->direct_table);
av_freep(&h->non_zero_count);
av_freep(&h->slice_table_base);
- av_freep(&h->top_borders[1]);
- av_freep(&h->top_borders[0]);
h->slice_table= NULL;
av_freep(&h->mb2b_xy);
av_freep(&h->mb2b8_xy);
- av_freep(&h->s.obmc_scratchpad);
-
for(i = 0; i < MAX_SPS_COUNT; i++)
av_freep(h->sps_buffers + i);
for(i = 0; i < MAX_PPS_COUNT; i++)
av_freep(h->pps_buffers + i);
+
+ for(i = 0; i < h->s.avctx->thread_count; i++) {
+ hx = h->thread_context[i];
+ if(!hx) continue;
+ av_freep(&hx->top_borders[1]);
+ av_freep(&hx->top_borders[0]);
+ av_freep(&hx->s.obmc_scratchpad);
+ }
}
static void init_dequant8_coeff_table(H264Context *h){
CHECKED_ALLOCZ(h->non_zero_count , big_mb_num * 16 * sizeof(uint8_t))
CHECKED_ALLOCZ(h->slice_table_base , (big_mb_num+s->mb_stride) * sizeof(uint8_t))
- CHECKED_ALLOCZ(h->top_borders[0] , s->mb_width * (16+8+8) * sizeof(uint8_t))
- CHECKED_ALLOCZ(h->top_borders[1] , s->mb_width * (16+8+8) * sizeof(uint8_t))
CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
- if( h->pps.cabac ) {
- CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
- CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
- CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
- CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
- }
+ CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
+ CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
+ CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
+ CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(uint8_t));
h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
return -1;
}
+/**
+ * Mimic alloc_tables(), but for every context thread.
+ */
+static void clone_tables(H264Context *dst, H264Context *src){
+ dst->intra4x4_pred_mode = src->intra4x4_pred_mode;
+ dst->non_zero_count = src->non_zero_count;
+ dst->slice_table = src->slice_table;
+ dst->cbp_table = src->cbp_table;
+ dst->mb2b_xy = src->mb2b_xy;
+ dst->mb2b8_xy = src->mb2b8_xy;
+ dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
+ dst->mvd_table[0] = src->mvd_table[0];
+ dst->mvd_table[1] = src->mvd_table[1];
+ dst->direct_table = src->direct_table;
+
+ dst->s.obmc_scratchpad = NULL;
+ ff_h264_pred_init(&dst->hpc, src->s.codec_id);
+}
+
+/**
+ * Init context
+ * Allocate buffers which are not shared amongst multiple threads.
+ */
+static int context_init(H264Context *h){
+ CHECKED_ALLOCZ(h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
+ CHECKED_ALLOCZ(h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
+
+ return 0;
+fail:
+ return -1; // free_tables will clean up for us
+}
+
static void common_init(H264Context *h){
MpegEncContext * const s = &h->s;
s->height = s->avctx->height;
s->codec_id= s->avctx->codec->id;
- init_pred_ptrs(h);
+ ff_h264_pred_init(&h->hpc, s->codec_id);
h->dequant_coeff_pps= -1;
s->unrestricted_mv=1;
// set defaults
// s->decode_mb= ff_h263_decode_mb;
+ s->quarter_sample = 1;
s->low_delay= 1;
avctx->pix_fmt= PIX_FMT_YUV420P;
h->is_avc = 0;
}
+ h->thread_context[0] = h;
return 0;
}
if(MPV_frame_start(s, s->avctx) < 0)
return -1;
ff_er_frame_start(s);
+ /*
+ * MPV_frame_start uses pict_type to derive key_frame.
+ * This is incorrect for H.264; IDR markings must be used.
+ * Zero here; IDR markings per slice in frame or fields are OR'd in later.
+ * See decode_nal_units().
+ */
+ s->current_picture_ptr->key_frame= 0;
assert(s->linesize && s->uvlinesize);
/* can't be in alloc_tables because linesize isn't known there.
* FIXME: redo bipred weight to not require extra buffer? */
- if(!s->obmc_scratchpad)
- s->obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
+ for(i = 0; i < s->avctx->thread_count; i++)
+ if(!h->thread_context[i]->s.obmc_scratchpad)
+ h->thread_context[i]->s.obmc_scratchpad = av_malloc(16*2*s->linesize + 8*2*s->uvlinesize);
/* some macroblocks will be accessed before they're available */
- if(FRAME_MBAFF)
+ if(FRAME_MBAFF || s->avctx->thread_count > 1)
memset(h->slice_table, -1, (s->mb_height*s->mb_stride-1) * sizeof(uint8_t));
// s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
*(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y + 16*linesize);
*(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
- if(simple || !(s->flags&CODEC_FLAG_GRAY)){
+ if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
h->left_border[17 ]= h->top_borders[0][s->mb_x][16+7];
h->left_border[17+9]= h->top_borders[0][s->mb_x][24+7];
for(i=1; i<9; i++){
}
}
- if(simple || !(s->flags&CODEC_FLAG_GRAY)){
+ if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
if(deblock_left){
for(i = !deblock_top; i<9; i++){
XCHG(h->left_border[i+17 ], src_cb[i*uvlinesize], temp8, xchg);
*(uint64_t*)(h->top_borders[1][s->mb_x]+0)= *(uint64_t*)(src_y + 33*linesize);
*(uint64_t*)(h->top_borders[1][s->mb_x]+8)= *(uint64_t*)(src_y +8+33*linesize);
- if(!(s->flags&CODEC_FLAG_GRAY)){
+ if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
h->left_border[34 ]= h->top_borders[0][s->mb_x][16+7];
h->left_border[34+ 1]= h->top_borders[1][s->mb_x][16+7];
h->left_border[34+18 ]= h->top_borders[0][s->mb_x][24+7];
}
}
- if(!(s->flags&CODEC_FLAG_GRAY)){
+ if(!ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
if(deblock_left){
for(i = (!deblock_top) << 1; i<18; i++){
XCHG(h->left_border[i+34 ], src_cb[i*uvlinesize], temp8, xchg);
continue;
if(IS_16X16(mb_type)){
int8_t *ref = &h->ref_cache[list][scan8[0]];
- fill_rectangle(ref, 4, 4, 8, 16+*ref^(s->mb_y&1), 1);
+ fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
}else{
for(i=0; i<16; i+=4){
//FIXME can refs be smaller than 8x8 when !direct_8x8_inference ?
int ref = h->ref_cache[list][scan8[i]];
if(ref >= 0)
- fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, 16+ref^(s->mb_y&1), 1);
+ fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
}
}
}
if(h->deblocking_filter && (simple || !FRAME_MBAFF))
xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 1, simple);
- if(simple || !(s->flags&CODEC_FLAG_GRAY)){
- h->pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
- h->pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
+ if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
+ h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cb, uvlinesize);
+ h->hpc.pred8x8[ h->chroma_pred_mode ](dest_cr, uvlinesize);
}
if(IS_INTRA4x4(mb_type)){
uint8_t * const ptr= dest_y + block_offset[i];
const int dir= h->intra4x4_pred_mode_cache[ scan8[i] ];
const int nnz = h->non_zero_count_cache[ scan8[i] ];
- h->pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
+ h->hpc.pred8x8l[ dir ](ptr, (h->topleft_samples_available<<i)&0x8000,
(h->topright_samples_available<<i)&0x4000, linesize);
if(nnz){
if(nnz == 1 && h->mb[i*16])
}else
topright= NULL;
- h->pred4x4[ dir ](ptr, topright, linesize);
+ h->hpc.pred4x4[ dir ](ptr, topright, linesize);
nnz = h->non_zero_count_cache[ scan8[i] ];
if(nnz){
if(is_h264){
}
}
}else{
- h->pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
+ h->hpc.pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
if(is_h264){
if(!transform_bypass)
- h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[IS_INTRA(mb_type) ? 0:3][s->qscale][0]);
+ h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[0][s->qscale][0]);
}else
svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
}
}
}
- if(simple || !(s->flags&CODEC_FLAG_GRAY)){
+ if(simple || !ENABLE_GRAY || !(s->flags&CODEC_FLAG_GRAY)){
uint8_t *dest[2] = {dest_cb, dest_cr};
if(transform_bypass){
idct_add = idct_dc_add = s->dsp.add_pixels4;
const int mb_y= s->mb_y;
const int mb_xy= mb_x + mb_y*s->mb_stride;
const int mb_type= s->current_picture.mb_type[mb_xy];
- int is_complex = FRAME_MBAFF || MB_FIELD || IS_INTRA_PCM(mb_type) || s->codec_id != CODEC_ID_H264 || (s->flags&CODEC_FLAG_GRAY) || s->encoding;
+ int is_complex = FRAME_MBAFF || MB_FIELD || IS_INTRA_PCM(mb_type) || s->codec_id != CODEC_ID_H264 || (ENABLE_GRAY && (s->flags&CODEC_FLAG_GRAY)) || s->encoding;
if(!s->decode)
return;
else hl_decode_mb_simple(h);
}
+static void pic_as_field(Picture *pic, const int parity){
+ int i;
+ for (i = 0; i < 4; ++i) {
+ if (parity == PICT_BOTTOM_FIELD)
+ pic->data[i] += pic->linesize[i];
+ pic->reference = parity;
+ pic->linesize[i] *= 2;
+ }
+}
+
+static int split_field_copy(Picture *dest, Picture *src,
+ int parity, int id_add){
+ int match = !!(src->reference & parity);
+
+ if (match) {
+ *dest = *src;
+ pic_as_field(dest, parity);
+ dest->pic_id *= 2;
+ dest->pic_id += id_add;
+ }
+
+ return match;
+}
+
+/**
+ * Split one reference list into field parts, interleaving by parity
+ * as per H.264 spec section 8.2.4.2.5. Output fields have their data pointers
+ * set to look at the actual start of data for that field.
+ *
+ * @param dest output list
+ * @param dest_len maximum number of fields to put in dest
+ * @param src the source reference list containing fields and/or field pairs
+ * (aka short_ref/long_ref, or
+ * refFrameListXShortTerm/refFrameListLongTerm in spec-speak)
+ * @param src_len number of Picture's in source (pairs and unmatched fields)
+ * @param parity the parity of the picture being decoded/needing
+ * these ref pics (PICT_{TOP,BOTTOM}_FIELD)
+ * @return number of fields placed in dest
+ */
+static int split_field_half_ref_list(Picture *dest, int dest_len,
+ Picture *src, int src_len, int parity){
+ int same_parity = 1;
+ int same_i = 0;
+ int opp_i = 0;
+ int out_i;
+ int field_output;
+
+ for (out_i = 0; out_i < dest_len; out_i += field_output) {
+ if (same_parity && same_i < src_len) {
+ field_output = split_field_copy(dest + out_i, src + same_i,
+ parity, 1);
+ same_parity = !field_output;
+ same_i++;
+
+ } else if (opp_i < src_len) {
+ field_output = split_field_copy(dest + out_i, src + opp_i,
+ PICT_FRAME - parity, 0);
+ same_parity = field_output;
+ opp_i++;
+
+ } else {
+ break;
+ }
+ }
+
+ return out_i;
+}
+
+/**
+ * Split the reference frame list into a reference field list.
+ * This implements H.264 spec 8.2.4.2.5 for a combined input list.
+ * The input list contains both reference field pairs and
+ * unmatched reference fields; it is ordered as spec describes
+ * RefPicListX for frames in 8.2.4.2.1 and 8.2.4.2.3, except that
+ * unmatched field pairs are also present. Conceptually this is equivalent
+ * to concatenation of refFrameListXShortTerm with refFrameListLongTerm.
+ *
+ * @param dest output reference list where ordered fields are to be placed
+ * @param dest_len max number of fields to place at dest
+ * @param src source reference list, as described above
+ * @param src_len number of pictures (pairs and unmatched fields) in src
+ * @param parity parity of field being currently decoded
+ * (one of PICT_{TOP,BOTTOM}_FIELD)
+ * @param long_i index into src array that holds first long reference picture,
+ * or src_len if no long refs present.
+ */
+static int split_field_ref_list(Picture *dest, int dest_len,
+ Picture *src, int src_len,
+ int parity, int long_i){
+
+ int i = split_field_half_ref_list(dest, dest_len, src, long_i, parity);
+ dest += i;
+ dest_len -= i;
+
+ i += split_field_half_ref_list(dest, dest_len, src + long_i,
+ src_len - long_i, parity);
+ return i;
+}
+
/**
* fills the default_ref_list.
*/
MpegEncContext * const s = &h->s;
int i;
int smallest_poc_greater_than_current = -1;
+ int structure_sel;
Picture sorted_short_ref[32];
+ Picture field_entry_list[2][32];
+ Picture *frame_list[2];
- if(h->slice_type==B_TYPE){
+ if (FIELD_PICTURE) {
+ structure_sel = PICT_FRAME;
+ frame_list[0] = field_entry_list[0];
+ frame_list[1] = field_entry_list[1];
+ } else {
+ structure_sel = 0;
+ frame_list[0] = h->default_ref_list[0];
+ frame_list[1] = h->default_ref_list[1];
+ }
+
+ if(h->slice_type==FF_B_TYPE){
+ int list;
+ int len[2];
+ int short_len[2];
int out_i;
int limit= INT_MIN;
}
}
}
- }
- if(s->picture_structure == PICT_FRAME){
- if(h->slice_type==B_TYPE){
- int list;
- tprintf(h->s.avctx, "current poc: %d, smallest_poc_greater_than_current: %d\n", s->current_picture_ptr->poc, smallest_poc_greater_than_current);
-
- // find the largest poc
- for(list=0; list<2; list++){
- int index = 0;
- int j= -99;
- int step= list ? -1 : 1;
+ tprintf(h->s.avctx, "current poc: %d, smallest_poc_greater_than_current: %d\n", s->current_picture_ptr->poc, smallest_poc_greater_than_current);
- for(i=0; i<h->short_ref_count && index < h->ref_count[list]; i++, j+=step) {
- while(j<0 || j>= h->short_ref_count){
- if(j != -99 && step == (list ? -1 : 1))
- return -1;
- step = -step;
- j= smallest_poc_greater_than_current + (step>>1);
- }
- if(sorted_short_ref[j].reference != 3) continue;
- h->default_ref_list[list][index ]= sorted_short_ref[j];
- h->default_ref_list[list][index++].pic_id= sorted_short_ref[j].frame_num;
+ // find the largest poc
+ for(list=0; list<2; list++){
+ int index = 0;
+ int j= -99;
+ int step= list ? -1 : 1;
+
+ for(i=0; i<h->short_ref_count && index < h->ref_count[list]; i++, j+=step) {
+ int sel;
+ while(j<0 || j>= h->short_ref_count){
+ if(j != -99 && step == (list ? -1 : 1))
+ return -1;
+ step = -step;
+ j= smallest_poc_greater_than_current + (step>>1);
}
+ sel = sorted_short_ref[j].reference | structure_sel;
+ if(sel != PICT_FRAME) continue;
+ frame_list[list][index ]= sorted_short_ref[j];
+ frame_list[list][index++].pic_id= sorted_short_ref[j].frame_num;
+ }
+ short_len[list] = index;
- for(i = 0; i < 16 && index < h->ref_count[ list ]; i++){
- if(h->long_ref[i] == NULL) continue;
- if(h->long_ref[i]->reference != 3) continue;
+ for(i = 0; i < 16 && index < h->ref_count[ list ]; i++){
+ int sel;
+ if(h->long_ref[i] == NULL) continue;
+ sel = h->long_ref[i]->reference | structure_sel;
+ if(sel != PICT_FRAME) continue;
- h->default_ref_list[ list ][index ]= *h->long_ref[i];
- h->default_ref_list[ list ][index++].pic_id= i;;
- }
+ frame_list[ list ][index ]= *h->long_ref[i];
+ frame_list[ list ][index++].pic_id= i;
+ }
+ len[list] = index;
+ }
- if(list && (smallest_poc_greater_than_current<=0 || smallest_poc_greater_than_current>=h->short_ref_count) && (1 < index)){
- // swap the two first elements of L1 when
- // L0 and L1 are identical
- Picture temp= h->default_ref_list[1][0];
- h->default_ref_list[1][0] = h->default_ref_list[1][1];
- h->default_ref_list[1][1] = temp;
- }
+ for(list=0; list<2; list++){
+ if (FIELD_PICTURE)
+ len[list] = split_field_ref_list(h->default_ref_list[list],
+ h->ref_count[list],
+ frame_list[list],
+ len[list],
+ s->picture_structure,
+ short_len[list]);
+
+ // swap the two first elements of L1 when L0 and L1 are identical
+ if(list && len[0] > 1 && len[0] == len[1])
+ for(i=0; h->default_ref_list[0][i].data[0] == h->default_ref_list[1][i].data[0]; i++)
+ if(i == len[0]){
+ FFSWAP(Picture, h->default_ref_list[1][0], h->default_ref_list[1][1]);
+ break;
+ }
- if(index < h->ref_count[ list ])
- memset(&h->default_ref_list[list][index], 0, sizeof(Picture)*(h->ref_count[ list ] - index));
- }
- }else{
- int index=0;
- for(i=0; i<h->short_ref_count; i++){
- if(h->short_ref[i]->reference != 3) continue; //FIXME refernce field shit
- h->default_ref_list[0][index ]= *h->short_ref[i];
- h->default_ref_list[0][index++].pic_id= h->short_ref[i]->frame_num;
- }
- for(i = 0; i < 16; i++){
- if(h->long_ref[i] == NULL) continue;
- if(h->long_ref[i]->reference != 3) continue;
- h->default_ref_list[0][index ]= *h->long_ref[i];
- h->default_ref_list[0][index++].pic_id= i;;
- }
- if(index < h->ref_count[0])
- memset(&h->default_ref_list[0][index], 0, sizeof(Picture)*(h->ref_count[0] - index));
+ if(len[list] < h->ref_count[ list ])
+ memset(&h->default_ref_list[list][len[list]], 0, sizeof(Picture)*(h->ref_count[ list ] - len[list]));
}
- }else{ //FIELD
- if(h->slice_type==B_TYPE){
- }else{
- //FIXME second field balh
+
+
+ }else{
+ int index=0;
+ int short_len;
+ for(i=0; i<h->short_ref_count; i++){
+ int sel;
+ sel = h->short_ref[i]->reference | structure_sel;
+ if(sel != PICT_FRAME) continue;
+ frame_list[0][index ]= *h->short_ref[i];
+ frame_list[0][index++].pic_id= h->short_ref[i]->frame_num;
+ }
+ short_len = index;
+ for(i = 0; i < 16; i++){
+ int sel;
+ if(h->long_ref[i] == NULL) continue;
+ sel = h->long_ref[i]->reference | structure_sel;
+ if(sel != PICT_FRAME) continue;
+ frame_list[0][index ]= *h->long_ref[i];
+ frame_list[0][index++].pic_id= i;
}
+
+ if (FIELD_PICTURE)
+ index = split_field_ref_list(h->default_ref_list[0],
+ h->ref_count[0], frame_list[0],
+ index, s->picture_structure,
+ short_len);
+
+ if(index < h->ref_count[0])
+ memset(&h->default_ref_list[0][index], 0, sizeof(Picture)*(h->ref_count[0] - index));
}
#ifdef TRACE
for (i=0; i<h->ref_count[0]; i++) {
tprintf(h->s.avctx, "List0: %s fn:%d 0x%p\n", (h->default_ref_list[0][i].long_ref ? "LT" : "ST"), h->default_ref_list[0][i].pic_id, h->default_ref_list[0][i].data[0]);
}
- if(h->slice_type==B_TYPE){
+ if(h->slice_type==FF_B_TYPE){
for (i=0; i<h->ref_count[1]; i++) {
- tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[0][i].data[0]);
+ tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[1][i].data[0]);
}
}
#endif
static void print_short_term(H264Context *h);
static void print_long_term(H264Context *h);
+/**
+ * Extract structure information about the picture described by pic_num in
+ * the current decoding context (frame or field). Note that pic_num is
+ * picture number without wrapping (so, 0<=pic_num<max_pic_num).
+ * @param pic_num picture number for which to extract structure information
+ * @param structure one of PICT_XXX describing structure of picture
+ * with pic_num
+ * @return frame number (short term) or long term index of picture
+ * described by pic_num
+ */
+static int pic_num_extract(H264Context *h, int pic_num, int *structure){
+ MpegEncContext * const s = &h->s;
+
+ *structure = s->picture_structure;
+ if(FIELD_PICTURE){
+ if (!(pic_num & 1))
+ /* opposite field */
+ *structure ^= PICT_FRAME;
+ pic_num >>= 1;
+ }
+
+ return pic_num;
+}
+
static int decode_ref_pic_list_reordering(H264Context *h){
MpegEncContext * const s = &h->s;
- int list, index;
+ int list, index, pic_structure;
print_short_term(h);
print_long_term(h);
- if(h->slice_type==I_TYPE || h->slice_type==SI_TYPE) return 0; //FIXME move before func
+ if(h->slice_type==FF_I_TYPE || h->slice_type==FF_SI_TYPE) return 0; //FIXME move before func
for(list=0; list<h->list_count; list++){
memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
if(reordering_of_pic_nums_idc<3){
if(reordering_of_pic_nums_idc<2){
const unsigned int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
+ int frame_num;
- if(abs_diff_pic_num >= h->max_pic_num){
+ if(abs_diff_pic_num > h->max_pic_num){
av_log(h->s.avctx, AV_LOG_ERROR, "abs_diff_pic_num overflow\n");
return -1;
}
else pred+= abs_diff_pic_num;
pred &= h->max_pic_num - 1;
+ frame_num = pic_num_extract(h, pred, &pic_structure);
+
for(i= h->short_ref_count-1; i>=0; i--){
ref = h->short_ref[i];
- assert(ref->reference == 3);
+ assert(ref->reference);
assert(!ref->long_ref);
- if(ref->data[0] != NULL && ref->frame_num == pred && ref->long_ref == 0) // ignore non existing pictures by testing data[0] pointer
+ if(ref->data[0] != NULL &&
+ ref->frame_num == frame_num &&
+ (ref->reference & pic_structure) &&
+ ref->long_ref == 0) // ignore non existing pictures by testing data[0] pointer
break;
}
if(i>=0)
- ref->pic_id= ref->frame_num;
+ ref->pic_id= pred;
}else{
+ int long_idx;
pic_id= get_ue_golomb(&s->gb); //long_term_pic_idx
- if(pic_id>31){
+
+ long_idx= pic_num_extract(h, pic_id, &pic_structure);
+
+ if(long_idx>31){
av_log(h->s.avctx, AV_LOG_ERROR, "long_term_pic_idx overflow\n");
return -1;
}
- ref = h->long_ref[pic_id];
- if(ref){
+ ref = h->long_ref[long_idx];
+ assert(!(ref && !ref->reference));
+ if(ref && (ref->reference & pic_structure)){
ref->pic_id= pic_id;
- assert(ref->reference == 3);
assert(ref->long_ref);
i=0;
}else{
h->ref_list[list][i]= h->ref_list[list][i-1];
}
h->ref_list[list][index]= *ref;
+ if (FIELD_PICTURE){
+ pic_as_field(&h->ref_list[list][index], pic_structure);
+ }
}
}else{
av_log(h->s.avctx, AV_LOG_ERROR, "illegal reordering_of_pic_nums_idc\n");
}
}
- if(h->slice_type==B_TYPE && !h->direct_spatial_mv_pred)
+ if(h->slice_type==FF_B_TYPE && !h->direct_spatial_mv_pred)
direct_dist_scale_factor(h);
direct_ref_list_init(h);
return 0;
field[0] = *frame;
for(j=0; j<3; j++)
field[0].linesize[j] <<= 1;
+ field[0].reference = PICT_TOP_FIELD;
field[1] = field[0];
for(j=0; j<3; j++)
field[1].data[j] += frame->linesize[j];
+ field[1].reference = PICT_BOTTOM_FIELD;
h->luma_weight[list][16+2*i] = h->luma_weight[list][16+2*i+1] = h->luma_weight[list][i];
h->luma_offset[list][16+2*i] = h->luma_offset[list][16+2*i+1] = h->luma_offset[list][i];
}
}
}
- if(h->slice_type != B_TYPE) break;
+ if(h->slice_type != FF_B_TYPE) break;
}
h->use_weight= h->use_weight || h->use_weight_chroma;
return 0;
}
}
-static inline void unreference_pic(H264Context *h, Picture *pic){
+/**
+ * Mark a picture as no longer needed for reference. The refmask
+ * argument allows unreferencing of individual fields or the whole frame.
+ * If the picture becomes entirely unreferenced, but is being held for
+ * display purposes, it is marked as such.
+ * @param refmask mask of fields to unreference; the mask is bitwise
+ * anded with the reference marking of pic
+ * @return non-zero if pic becomes entirely unreferenced (except possibly
+ * for display purposes) zero if one of the fields remains in
+ * reference
+ */
+static inline int unreference_pic(H264Context *h, Picture *pic, int refmask){
int i;
- pic->reference=0;
- if(pic == h->delayed_output_pic)
- pic->reference=1;
- else{
- for(i = 0; h->delayed_pic[i]; i++)
- if(pic == h->delayed_pic[i]){
- pic->reference=1;
- break;
- }
+ if (pic->reference &= refmask) {
+ return 0;
+ } else {
+ if(pic == h->delayed_output_pic)
+ pic->reference=DELAYED_PIC_REF;
+ else{
+ for(i = 0; h->delayed_pic[i]; i++)
+ if(pic == h->delayed_pic[i]){
+ pic->reference=DELAYED_PIC_REF;
+ break;
+ }
+ }
+ return 1;
}
}
for(i=0; i<16; i++){
if (h->long_ref[i] != NULL) {
- unreference_pic(h, h->long_ref[i]);
+ unreference_pic(h, h->long_ref[i], 0);
h->long_ref[i]= NULL;
}
}
h->long_ref_count=0;
for(i=0; i<h->short_ref_count; i++){
- unreference_pic(h, h->short_ref[i]);
+ unreference_pic(h, h->short_ref[i], 0);
h->short_ref[i]= NULL;
}
h->short_ref_count=0;
idr(h);
if(h->s.current_picture_ptr)
h->s.current_picture_ptr->reference= 0;
+ h->s.first_field= 0;
+ ff_mpeg_flush(avctx);
}
/**
- *
- * @return the removed picture or NULL if an error occurs
+ * Find a Picture in the short term reference list by frame number.
+ * @param frame_num frame number to search for
+ * @param idx the index into h->short_ref where returned picture is found
+ * undefined if no picture found.
+ * @return pointer to the found picture, or NULL if no pic with the provided
+ * frame number is found
*/
-static Picture * remove_short(H264Context *h, int frame_num){
+static Picture * find_short(H264Context *h, int frame_num, int *idx){
MpegEncContext * const s = &h->s;
int i;
- if(s->avctx->debug&FF_DEBUG_MMCO)
- av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
-
for(i=0; i<h->short_ref_count; i++){
Picture *pic= h->short_ref[i];
if(s->avctx->debug&FF_DEBUG_MMCO)
av_log(h->s.avctx, AV_LOG_DEBUG, "%d %d %p\n", i, pic->frame_num, pic);
- if(pic->frame_num == frame_num){
- h->short_ref[i]= NULL;
- memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i - 1)*sizeof(Picture*));
- h->short_ref_count--;
+ if(pic->frame_num == frame_num) {
+ *idx = i;
return pic;
}
}
return NULL;
}
+/**
+ * Remove a picture from the short term reference list by its index in
+ * that list. This does no checking on the provided index; it is assumed
+ * to be valid. Other list entries are shifted down.
+ * @param i index into h->short_ref of picture to remove.
+ */
+static void remove_short_at_index(H264Context *h, int i){
+ assert(i > 0 && i < h->short_ref_count);
+ h->short_ref[i]= NULL;
+ if (--h->short_ref_count)
+ memmove(&h->short_ref[i], &h->short_ref[i+1], (h->short_ref_count - i)*sizeof(Picture*));
+}
+
+/**
+ *
+ * @return the removed picture or NULL if an error occurs
+ */
+static Picture * remove_short(H264Context *h, int frame_num){
+ MpegEncContext * const s = &h->s;
+ Picture *pic;
+ int i;
+
+ if(s->avctx->debug&FF_DEBUG_MMCO)
+ av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
+
+ pic = find_short(h, frame_num, &i);
+ if (pic)
+ remove_short_at_index(h, i);
+
+ return pic;
+}
+
+/**
+ * Remove a picture from the long term reference list by its index in
+ * that list. This does no checking on the provided index; it is assumed
+ * to be valid. The removed entry is set to NULL. Other entries are unaffected.
+ * @param i index into h->long_ref of picture to remove.
+ */
+static void remove_long_at_index(H264Context *h, int i){
+ h->long_ref[i]= NULL;
+ h->long_ref_count--;
+}
+
/**
*
* @return the removed picture or NULL if an error occurs
Picture *pic;
pic= h->long_ref[i];
- h->long_ref[i]= NULL;
- if(pic) h->long_ref_count--;
+ if (pic)
+ remove_long_at_index(h, i);
return pic;
}
static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
MpegEncContext * const s = &h->s;
int i, j;
- int current_is_long=0;
+ int current_ref_assigned=0;
Picture *pic;
if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
for(i=0; i<mmco_count; i++){
+ int structure, frame_num, unref_pic;
if(s->avctx->debug&FF_DEBUG_MMCO)
- av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_frame_num, h->mmco[i].long_index);
+ av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_pic_num, h->mmco[i].long_arg);
switch(mmco[i].opcode){
case MMCO_SHORT2UNUSED:
- pic= remove_short(h, mmco[i].short_frame_num);
- if(pic)
- unreference_pic(h, pic);
- else if(s->avctx->debug&FF_DEBUG_MMCO)
- av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: remove_short() failure\n");
+ if(s->avctx->debug&FF_DEBUG_MMCO)
+ av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short %d count %d\n", h->mmco[i].short_pic_num, h->short_ref_count);
+ frame_num = pic_num_extract(h, mmco[i].short_pic_num, &structure);
+ pic = find_short(h, frame_num, &j);
+ if (pic) {
+ if (unreference_pic(h, pic, structure ^ PICT_FRAME))
+ remove_short_at_index(h, j);
+ } else if(s->avctx->debug&FF_DEBUG_MMCO)
+ av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref short failure\n");
break;
case MMCO_SHORT2LONG:
- pic= remove_long(h, mmco[i].long_index);
- if(pic) unreference_pic(h, pic);
+ if (FIELD_PICTURE && mmco[i].long_arg < h->long_ref_count &&
+ h->long_ref[mmco[i].long_arg]->frame_num ==
+ mmco[i].short_pic_num / 2) {
+ /* do nothing, we've already moved this field pair. */
+ } else {
+ int frame_num = mmco[i].short_pic_num >> FIELD_PICTURE;
- h->long_ref[ mmco[i].long_index ]= remove_short(h, mmco[i].short_frame_num);
- if (h->long_ref[ mmco[i].long_index ]){
- h->long_ref[ mmco[i].long_index ]->long_ref=1;
- h->long_ref_count++;
+ pic= remove_long(h, mmco[i].long_arg);
+ if(pic) unreference_pic(h, pic, 0);
+
+ h->long_ref[ mmco[i].long_arg ]= remove_short(h, frame_num);
+ if (h->long_ref[ mmco[i].long_arg ]){
+ h->long_ref[ mmco[i].long_arg ]->long_ref=1;
+ h->long_ref_count++;
+ }
}
break;
case MMCO_LONG2UNUSED:
- pic= remove_long(h, mmco[i].long_index);
- if(pic)
- unreference_pic(h, pic);
- else if(s->avctx->debug&FF_DEBUG_MMCO)
- av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: remove_long() failure\n");
+ j = pic_num_extract(h, mmco[i].long_arg, &structure);
+ pic = h->long_ref[j];
+ if (pic) {
+ if (unreference_pic(h, pic, structure ^ PICT_FRAME))
+ remove_long_at_index(h, j);
+ } else if(s->avctx->debug&FF_DEBUG_MMCO)
+ av_log(h->s.avctx, AV_LOG_DEBUG, "mmco: unref long failure\n");
break;
case MMCO_LONG:
- pic= remove_long(h, mmco[i].long_index);
- if(pic) unreference_pic(h, pic);
+ unref_pic = 1;
+ if (FIELD_PICTURE && !s->first_field) {
+ if (h->long_ref[mmco[i].long_arg] == s->current_picture_ptr) {
+ /* Just mark second field as referenced */
+ unref_pic = 0;
+ } else if (s->current_picture_ptr->reference) {
+ /* First field in pair is in short term list or
+ * at a different long term index.
+ * This is not allowed; see 7.4.3, notes 2 and 3.
+ * Report the problem and keep the pair where it is,
+ * and mark this field valid.
+ */
+ av_log(h->s.avctx, AV_LOG_ERROR,
+ "illegal long term reference assignment for second "
+ "field in complementary field pair (first field is "
+ "short term or has non-matching long index)\n");
+ unref_pic = 0;
+ }
+ }
- h->long_ref[ mmco[i].long_index ]= s->current_picture_ptr;
- h->long_ref[ mmco[i].long_index ]->long_ref=1;
- h->long_ref_count++;
+ if (unref_pic) {
+ pic= remove_long(h, mmco[i].long_arg);
+ if(pic) unreference_pic(h, pic, 0);
+
+ h->long_ref[ mmco[i].long_arg ]= s->current_picture_ptr;
+ h->long_ref[ mmco[i].long_arg ]->long_ref=1;
+ h->long_ref_count++;
+ }
- current_is_long=1;
+ s->current_picture_ptr->reference |= s->picture_structure;
+ current_ref_assigned=1;
break;
case MMCO_SET_MAX_LONG:
- assert(mmco[i].long_index <= 16);
+ assert(mmco[i].long_arg <= 16);
// just remove the long term which index is greater than new max
- for(j = mmco[i].long_index; j<16; j++){
+ for(j = mmco[i].long_arg; j<16; j++){
pic = remove_long(h, j);
- if (pic) unreference_pic(h, pic);
+ if (pic) unreference_pic(h, pic, 0);
}
break;
case MMCO_RESET:
while(h->short_ref_count){
pic= remove_short(h, h->short_ref[0]->frame_num);
- if(pic) unreference_pic(h, pic);
+ if(pic) unreference_pic(h, pic, 0);
}
for(j = 0; j < 16; j++) {
pic= remove_long(h, j);
- if(pic) unreference_pic(h, pic);
+ if(pic) unreference_pic(h, pic, 0);
}
break;
default: assert(0);
}
}
- if(!current_is_long){
+ if (!current_ref_assigned && FIELD_PICTURE &&
+ !s->first_field && s->current_picture_ptr->reference) {
+
+ /* Second field of complementary field pair; the first field of
+ * which is already referenced. If short referenced, it
+ * should be first entry in short_ref. If not, it must exist
+ * in long_ref; trying to put it on the short list here is an
+ * error in the encoded bit stream (ref: 7.4.3, NOTE 2 and 3).
+ */
+ if (h->short_ref_count && h->short_ref[0] == s->current_picture_ptr) {
+ /* Just mark the second field valid */
+ s->current_picture_ptr->reference = PICT_FRAME;
+ } else if (s->current_picture_ptr->long_ref) {
+ av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term reference "
+ "assignment for second field "
+ "in complementary field pair "
+ "(first field is long term)\n");
+ } else {
+ /*
+ * First field in reference, but not in any sensible place on our
+ * reference lists. This shouldn't happen unless reference
+ * handling somewhere else is wrong.
+ */
+ assert(0);
+ }
+ current_ref_assigned = 1;
+ }
+
+ if(!current_ref_assigned){
pic= remove_short(h, s->current_picture_ptr->frame_num);
if(pic){
- unreference_pic(h, pic);
+ unreference_pic(h, pic, 0);
av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
}
h->short_ref[0]= s->current_picture_ptr;
h->short_ref[0]->long_ref=0;
h->short_ref_count++;
+ s->current_picture_ptr->reference |= s->picture_structure;
+ }
+
+ if (h->long_ref_count + h->short_ref_count > h->sps.ref_frame_count){
+
+ /* We have too many reference frames, probably due to corrupted
+ * stream. Need to discard one frame. Prevents overrun of the
+ * short_ref and long_ref buffers.
+ */
+ av_log(h->s.avctx, AV_LOG_ERROR,
+ "number of reference frames exceeds max (probably "
+ "corrupt input), discarding one\n");
+
+ if (h->long_ref_count) {
+ for (i = 0; i < 16; ++i)
+ if (h->long_ref[i])
+ break;
+
+ assert(i < 16);
+ pic = h->long_ref[i];
+ remove_long_at_index(h, i);
+ } else {
+ pic = h->short_ref[h->short_ref_count - 1];
+ remove_short_at_index(h, h->short_ref_count - 1);
+ }
+ unreference_pic(h, pic, 0);
}
print_short_term(h);
if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
s->broken_link= get_bits1(gb) -1;
- h->mmco[0].long_index= get_bits1(gb) - 1; // current_long_term_idx
- if(h->mmco[0].long_index == -1)
+ h->mmco[0].long_arg= get_bits1(gb) - 1; // current_long_term_idx
+ if(h->mmco[0].long_arg == -1)
h->mmco_index= 0;
else{
h->mmco[0].opcode= MMCO_LONG;
h->mmco[i].opcode= opcode;
if(opcode==MMCO_SHORT2UNUSED || opcode==MMCO_SHORT2LONG){
- h->mmco[i].short_frame_num= (h->frame_num - get_ue_golomb(gb) - 1) & ((1<<h->sps.log2_max_frame_num)-1); //FIXME fields
-/* if(h->mmco[i].short_frame_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_frame_num ] == NULL){
+ h->mmco[i].short_pic_num= (h->curr_pic_num - get_ue_golomb(gb) - 1) & (h->max_pic_num - 1);
+/* if(h->mmco[i].short_pic_num >= h->short_ref_count || h->short_ref[ h->mmco[i].short_pic_num ] == NULL){
av_log(s->avctx, AV_LOG_ERROR, "illegal short ref in memory management control operation %d\n", mmco);
return -1;
}*/
}
if(opcode==MMCO_SHORT2LONG || opcode==MMCO_LONG2UNUSED || opcode==MMCO_LONG || opcode==MMCO_SET_MAX_LONG){
- unsigned int long_index= get_ue_golomb(gb);
- if(/*h->mmco[i].long_index >= h->long_ref_count || h->long_ref[ h->mmco[i].long_index ] == NULL*/ long_index >= 16){
+ unsigned int long_arg= get_ue_golomb(gb);
+ if(long_arg >= 32 || (long_arg >= 16 && !(opcode == MMCO_LONG2UNUSED && FIELD_PICTURE))){
av_log(h->s.avctx, AV_LOG_ERROR, "illegal long ref in memory management control operation %d\n", opcode);
return -1;
}
- h->mmco[i].long_index= long_index;
+ h->mmco[i].long_arg= long_arg;
}
if(opcode > (unsigned)MMCO_LONG){
}else{
assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
- if(h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count){ //FIXME fields
+ if(h->short_ref_count && h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count &&
+ !(FIELD_PICTURE && !s->first_field && s->current_picture_ptr->reference)) {
h->mmco[0].opcode= MMCO_SHORT2UNUSED;
- h->mmco[0].short_frame_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
+ h->mmco[0].short_pic_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
h->mmco_index= 1;
+ if (FIELD_PICTURE) {
+ h->mmco[0].short_pic_num *= 2;
+ h->mmco[1].opcode= MMCO_SHORT2UNUSED;
+ h->mmco[1].short_pic_num= h->mmco[0].short_pic_num + 1;
+ h->mmco_index= 2;
+ }
}else
h->mmco_index= 0;
}
field_poc[1]= poc;
}
- if(s->picture_structure != PICT_BOTTOM_FIELD)
+ if(s->picture_structure != PICT_BOTTOM_FIELD) {
s->current_picture_ptr->field_poc[0]= field_poc[0];
- if(s->picture_structure != PICT_TOP_FIELD)
+ s->current_picture_ptr->poc = field_poc[0];
+ }
+ if(s->picture_structure != PICT_TOP_FIELD) {
s->current_picture_ptr->field_poc[1]= field_poc[1];
- if(s->picture_structure == PICT_FRAME) // FIXME field pix?
- s->current_picture_ptr->poc= FFMIN(field_poc[0], field_poc[1]);
+ s->current_picture_ptr->poc = field_poc[1];
+ }
+ if(!FIELD_PICTURE || !s->first_field) {
+ Picture *cur = s->current_picture_ptr;
+ cur->poc= FFMIN(cur->field_poc[0], cur->field_poc[1]);
+ }
return 0;
}
h->field_scan8x8_cavlc_q0 = h->field_scan8x8_cavlc;
}
}
+
+/**
+ * Replicates H264 "master" context to thread contexts.
+ */
+static void clone_slice(H264Context *dst, H264Context *src)
+{
+ memcpy(dst->block_offset, src->block_offset, sizeof(dst->block_offset));
+ dst->s.current_picture_ptr = src->s.current_picture_ptr;
+ dst->s.current_picture = src->s.current_picture;
+ dst->s.linesize = src->s.linesize;
+ dst->s.uvlinesize = src->s.uvlinesize;
+ dst->s.first_field = src->s.first_field;
+
+ dst->prev_poc_msb = src->prev_poc_msb;
+ dst->prev_poc_lsb = src->prev_poc_lsb;
+ dst->prev_frame_num_offset = src->prev_frame_num_offset;
+ dst->prev_frame_num = src->prev_frame_num;
+ dst->short_ref_count = src->short_ref_count;
+
+ memcpy(dst->short_ref, src->short_ref, sizeof(dst->short_ref));
+ memcpy(dst->long_ref, src->long_ref, sizeof(dst->long_ref));
+ memcpy(dst->default_ref_list, src->default_ref_list, sizeof(dst->default_ref_list));
+ memcpy(dst->ref_list, src->ref_list, sizeof(dst->ref_list));
+
+ memcpy(dst->dequant4_coeff, src->dequant4_coeff, sizeof(src->dequant4_coeff));
+ memcpy(dst->dequant8_coeff, src->dequant8_coeff, sizeof(src->dequant8_coeff));
+}
+
/**
* decodes a slice header.
* this will allso call MPV_common_init() and frame_start() as needed
+ *
+ * @param h h264context
+ * @param h0 h264 master context (differs from 'h' when doing sliced based parallel decoding)
+ *
+ * @return 0 if okay, <0 if an error occured, 1 if decoding must not be multithreaded
*/
-static int decode_slice_header(H264Context *h){
+static int decode_slice_header(H264Context *h, H264Context *h0){
MpegEncContext * const s = &h->s;
+ MpegEncContext * const s0 = &h0->s;
unsigned int first_mb_in_slice;
unsigned int pps_id;
int num_ref_idx_active_override_flag;
- static const uint8_t slice_type_map[5]= {P_TYPE, B_TYPE, I_TYPE, SP_TYPE, SI_TYPE};
- unsigned int slice_type, tmp;
+ static const uint8_t slice_type_map[5]= {FF_P_TYPE, FF_B_TYPE, FF_I_TYPE, FF_SP_TYPE, FF_SI_TYPE};
+ unsigned int slice_type, tmp, i;
int default_ref_list_done = 0;
+ int last_pic_structure;
- s->current_picture.reference= h->nal_ref_idc != 0;
s->dropable= h->nal_ref_idc == 0;
+ if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !h->nal_ref_idc){
+ s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
+ s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
+ }else{
+ s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
+ s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
+ }
+
first_mb_in_slice= get_ue_golomb(&s->gb);
if((s->flags2 & CODEC_FLAG2_CHUNKS) && first_mb_in_slice == 0){
- h->slice_num = 0;
- s->current_picture_ptr= NULL;
+ h0->current_slice = 0;
+ if (!s0->first_field)
+ s->current_picture_ptr= NULL;
}
slice_type= get_ue_golomb(&s->gb);
h->slice_type_fixed=0;
slice_type= slice_type_map[ slice_type ];
- if (slice_type == I_TYPE
- || (h->slice_num != 0 && slice_type == h->slice_type) ) {
+ if (slice_type == FF_I_TYPE
+ || (h0->current_slice != 0 && slice_type == h0->last_slice_type) ) {
default_ref_list_done = 1;
}
h->slice_type= slice_type;
s->pict_type= h->slice_type; // to make a few old func happy, it's wrong though
+ if (s->pict_type == FF_B_TYPE && s0->last_picture_ptr == NULL) {
+ av_log(h->s.avctx, AV_LOG_ERROR,
+ "B picture before any references, skipping\n");
+ return -1;
+ }
pps_id= get_ue_golomb(&s->gb);
if(pps_id>=MAX_PPS_COUNT){
av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
return -1;
}
- if(!h->pps_buffers[pps_id]) {
+ if(!h0->pps_buffers[pps_id]) {
av_log(h->s.avctx, AV_LOG_ERROR, "non existing PPS referenced\n");
return -1;
}
- h->pps= *h->pps_buffers[pps_id];
+ h->pps= *h0->pps_buffers[pps_id];
- if(!h->sps_buffers[h->pps.sps_id]) {
+ if(!h0->sps_buffers[h->pps.sps_id]) {
av_log(h->s.avctx, AV_LOG_ERROR, "non existing SPS referenced\n");
return -1;
}
- h->sps = *h->sps_buffers[h->pps.sps_id];
+ h->sps = *h0->sps_buffers[h->pps.sps_id];
- if(h->dequant_coeff_pps != pps_id){
+ if(h == h0 && h->dequant_coeff_pps != pps_id){
h->dequant_coeff_pps = pps_id;
init_dequant_tables(h);
}
if (s->context_initialized
&& ( s->width != s->avctx->width || s->height != s->avctx->height)) {
+ if(h != h0)
+ return -1; // width / height changed during parallelized decoding
free_tables(h);
MPV_common_end(s);
}
if (!s->context_initialized) {
+ if(h != h0)
+ return -1; // we cant (re-)initialize context during parallel decoding
if (MPV_common_init(s) < 0)
return -1;
+ s->first_field = 0;
init_scan_tables(h);
alloc_tables(h);
+ for(i = 1; i < s->avctx->thread_count; i++) {
+ H264Context *c;
+ c = h->thread_context[i] = av_malloc(sizeof(H264Context));
+ memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
+ memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
+ c->sps = h->sps;
+ c->pps = h->pps;
+ init_scan_tables(c);
+ clone_tables(c, h);
+ }
+
+ for(i = 0; i < s->avctx->thread_count; i++)
+ if(context_init(h->thread_context[i]) < 0)
+ return -1;
+
s->avctx->width = s->width;
s->avctx->height = s->height;
s->avctx->sample_aspect_ratio= h->sps.sar;
}
}
- if(h->slice_num == 0){
- if(frame_start(h) < 0)
- return -1;
- }
-
- s->current_picture_ptr->frame_num= //FIXME frame_num cleanup
h->frame_num= get_bits(&s->gb, h->sps.log2_max_frame_num);
h->mb_mbaff = 0;
h->mb_aff_frame = 0;
+ last_pic_structure = s0->picture_structure;
if(h->sps.frame_mbs_only_flag){
s->picture_structure= PICT_FRAME;
}else{
if(get_bits1(&s->gb)) { //field_pic_flag
s->picture_structure= PICT_TOP_FIELD + get_bits1(&s->gb); //bottom_field_flag
- av_log(h->s.avctx, AV_LOG_ERROR, "PAFF interlacing is not implemented\n");
} else {
s->picture_structure= PICT_FRAME;
h->mb_aff_frame = h->sps.mb_aff;
}
}
+
+ if(h0->current_slice == 0){
+ /* See if we have a decoded first field looking for a pair... */
+ if (s0->first_field) {
+ assert(s0->current_picture_ptr);
+ assert(s0->current_picture_ptr->data[0]);
+ assert(s0->current_picture_ptr->reference != DELAYED_PIC_REF);
+
+ /* figure out if we have a complementary field pair */
+ if (!FIELD_PICTURE || s->picture_structure == last_pic_structure) {
+ /*
+ * Previous field is unmatched. Don't display it, but let it
+ * remain for reference if marked as such.
+ */
+ s0->current_picture_ptr = NULL;
+ s0->first_field = FIELD_PICTURE;
+
+ } else {
+ if (h->nal_ref_idc &&
+ s0->current_picture_ptr->reference &&
+ s0->current_picture_ptr->frame_num != h->frame_num) {
+ /*
+ * This and previous field were reference, but had
+ * different frame_nums. Consider this field first in
+ * pair. Throw away previous field except for reference
+ * purposes.
+ */
+ s0->first_field = 1;
+ s0->current_picture_ptr = NULL;
+
+ } else {
+ /* Second field in complementary pair */
+ s0->first_field = 0;
+ }
+ }
+
+ } else {
+ /* Frame or first field in a potentially complementary pair */
+ assert(!s0->current_picture_ptr);
+ s0->first_field = FIELD_PICTURE;
+ }
+
+ if((!FIELD_PICTURE || s0->first_field) && frame_start(h) < 0) {
+ s0->first_field = 0;
+ return -1;
+ }
+ }
+ if(h != h0)
+ clone_slice(h, h0);
+
+ s->current_picture_ptr->frame_num= h->frame_num; //FIXME frame_num cleanup
+
assert(s->mb_num == s->mb_width * s->mb_height);
- if(first_mb_in_slice << h->mb_aff_frame >= s->mb_num ||
+ if(first_mb_in_slice << FIELD_OR_MBAFF_PICTURE >= s->mb_num ||
first_mb_in_slice >= s->mb_num){
av_log(h->s.avctx, AV_LOG_ERROR, "first_mb_in_slice overflow\n");
return -1;
}
s->resync_mb_x = s->mb_x = first_mb_in_slice % s->mb_width;
- s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << h->mb_aff_frame;
+ s->resync_mb_y = s->mb_y = (first_mb_in_slice / s->mb_width) << FIELD_OR_MBAFF_PICTURE;
+ if (s->picture_structure == PICT_BOTTOM_FIELD)
+ s->resync_mb_y = s->mb_y = s->mb_y + 1;
assert(s->mb_y < s->mb_height);
if(s->picture_structure==PICT_FRAME){
h->curr_pic_num= h->frame_num;
h->max_pic_num= 1<< h->sps.log2_max_frame_num;
}else{
- h->curr_pic_num= 2*h->frame_num;
+ h->curr_pic_num= 2*h->frame_num + 1;
h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
}
h->ref_count[0]= h->pps.ref_count[0];
h->ref_count[1]= h->pps.ref_count[1];
- if(h->slice_type == P_TYPE || h->slice_type == SP_TYPE || h->slice_type == B_TYPE){
- if(h->slice_type == B_TYPE){
+ if(h->slice_type == FF_P_TYPE || h->slice_type == FF_SP_TYPE || h->slice_type == FF_B_TYPE){
+ if(h->slice_type == FF_B_TYPE){
h->direct_spatial_mv_pred= get_bits1(&s->gb);
- if(h->sps.mb_aff && h->direct_spatial_mv_pred)
- av_log(h->s.avctx, AV_LOG_ERROR, "MBAFF + spatial direct mode is not implemented\n");
}
num_ref_idx_active_override_flag= get_bits1(&s->gb);
if(num_ref_idx_active_override_flag){
h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
- if(h->slice_type==B_TYPE)
+ if(h->slice_type==FF_B_TYPE)
h->ref_count[1]= get_ue_golomb(&s->gb) + 1;
if(h->ref_count[0]-1 > 32-1 || h->ref_count[1]-1 > 32-1){
return -1;
}
}
- if(h->slice_type == B_TYPE)
+ if(h->slice_type == FF_B_TYPE)
h->list_count= 2;
else
h->list_count= 1;
if(decode_ref_pic_list_reordering(h) < 0)
return -1;
- if( (h->pps.weighted_pred && (h->slice_type == P_TYPE || h->slice_type == SP_TYPE ))
- || (h->pps.weighted_bipred_idc==1 && h->slice_type==B_TYPE ) )
+ if( (h->pps.weighted_pred && (h->slice_type == FF_P_TYPE || h->slice_type == FF_SP_TYPE ))
+ || (h->pps.weighted_bipred_idc==1 && h->slice_type==FF_B_TYPE ) )
pred_weight_table(h);
- else if(h->pps.weighted_bipred_idc==2 && h->slice_type==B_TYPE)
+ else if(h->pps.weighted_bipred_idc==2 && h->slice_type==FF_B_TYPE)
implicit_weight_table(h);
else
h->use_weight = 0;
- if(s->current_picture.reference)
- decode_ref_pic_marking(h, &s->gb);
+ if(h->nal_ref_idc)
+ decode_ref_pic_marking(h0, &s->gb);
if(FRAME_MBAFF)
fill_mbaff_ref_list(h);
- if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE && h->pps.cabac ){
+ if( h->slice_type != FF_I_TYPE && h->slice_type != FF_SI_TYPE && h->pps.cabac ){
tmp = get_ue_golomb(&s->gb);
if(tmp > 2){
av_log(s->avctx, AV_LOG_ERROR, "cabac_init_idc overflow\n");
h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
//FIXME qscale / qp ... stuff
- if(h->slice_type == SP_TYPE){
+ if(h->slice_type == FF_SP_TYPE){
get_bits1(&s->gb); /* sp_for_switch_flag */
}
- if(h->slice_type==SP_TYPE || h->slice_type == SI_TYPE){
+ if(h->slice_type==FF_SP_TYPE || h->slice_type == FF_SI_TYPE){
get_se_golomb(&s->gb); /* slice_qs_delta */
}
h->slice_beta_offset = get_se_golomb(&s->gb) << 1;
}
}
+
if( s->avctx->skip_loop_filter >= AVDISCARD_ALL
- ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type != I_TYPE)
- ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type == B_TYPE)
+ ||(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY && h->slice_type != FF_I_TYPE)
+ ||(s->avctx->skip_loop_filter >= AVDISCARD_BIDIR && h->slice_type == FF_B_TYPE)
||(s->avctx->skip_loop_filter >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
h->deblocking_filter= 0;
+ if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
+ if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
+ /* Cheat slightly for speed:
+ Do not bother to deblock across slices. */
+ h->deblocking_filter = 2;
+ } else {
+ h0->max_contexts = 1;
+ if(!h0->single_decode_warning) {
+ av_log(s->avctx, AV_LOG_INFO, "Cannot parallelize deblocking type 1, decoding such frames in sequential order\n");
+ h0->single_decode_warning = 1;
+ }
+ if(h != h0)
+ return 1; // deblocking switched inside frame
+ }
+ }
+
#if 0 //FMO
if( h->pps.num_slice_groups > 1 && h->pps.mb_slice_group_map_type >= 3 && h->pps.mb_slice_group_map_type <= 5)
slice_group_change_cycle= get_bits(&s->gb, ?);
#endif
- h->slice_num++;
+ h0->last_slice_type = slice_type;
+ h->slice_num = ++h0->current_slice;
h->emu_edge_width= (s->flags&CODEC_FLAG_EMU_EDGE) ? 0 : 16;
- h->emu_edge_height= FRAME_MBAFF ? 0 : h->emu_edge_width;
+ h->emu_edge_height= (FRAME_MBAFF || FIELD_PICTURE) ? 0 : h->emu_edge_width;
if(s->avctx->debug&FF_DEBUG_PICT_INFO){
av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c pps:%u frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s\n",
);
}
- if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !s->current_picture.reference){
- s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab;
- s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab;
- }else{
- s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab;
- s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab;
- }
-
return 0;
}
if(MB_FIELD)
mb_type|= MB_TYPE_INTERLACED;
- if( h->slice_type == B_TYPE )
+ if( h->slice_type == FF_B_TYPE )
{
// just for fill_caches. pred_direct_motion will set the real mb_type
mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
cbp = 0; /* avoid warning. FIXME: find a solution without slowing
down the code */
- if(h->slice_type != I_TYPE && h->slice_type != SI_TYPE){
+ if(h->slice_type != FF_I_TYPE && h->slice_type != FF_SI_TYPE){
if(s->mb_skip_run==-1)
s->mb_skip_run= get_ue_golomb(&s->gb);
h->prev_mb_skipped= 0;
mb_type= get_ue_golomb(&s->gb);
- if(h->slice_type == B_TYPE){
+ if(h->slice_type == FF_B_TYPE){
if(mb_type < 23){
partition_count= b_mb_type_info[mb_type].partition_count;
mb_type= b_mb_type_info[mb_type].type;
mb_type -= 23;
goto decode_intra_mb;
}
- }else if(h->slice_type == P_TYPE /*|| h->slice_type == SP_TYPE */){
+ }else if(h->slice_type == FF_P_TYPE /*|| h->slice_type == FF_SP_TYPE */){
if(mb_type < 5){
partition_count= p_mb_type_info[mb_type].partition_count;
mb_type= p_mb_type_info[mb_type].type;
goto decode_intra_mb;
}
}else{
- assert(h->slice_type == I_TYPE);
+ assert(h->slice_type == FF_I_TYPE);
decode_intra_mb:
if(mb_type > 25){
av_log(h->s.avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_pict_type_char(h->slice_type), s->mb_x, s->mb_y);
}else if(partition_count==4){
int i, j, sub_partition_count[4], list, ref[2][4];
- if(h->slice_type == B_TYPE){
+ if(h->slice_type == FF_B_TYPE){
for(i=0; i<4; i++){
h->sub_mb_type[i]= get_ue_golomb(&s->gb);
if(h->sub_mb_type[i] >=13){
h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
}
}else{
- assert(h->slice_type == P_TYPE || h->slice_type == SP_TYPE); //FIXME SP correct ?
+ assert(h->slice_type == FF_P_TYPE || h->slice_type == FF_SP_TYPE); //FIXME SP correct ?
for(i=0; i<4; i++){
h->sub_mb_type[i]= get_ue_golomb(&s->gb);
if(h->sub_mb_type[i] >=4){
static int decode_cabac_mb_type( H264Context *h ) {
MpegEncContext * const s = &h->s;
- if( h->slice_type == I_TYPE ) {
+ if( h->slice_type == FF_I_TYPE ) {
return decode_cabac_intra_mb_type(h, 3, 1);
- } else if( h->slice_type == P_TYPE ) {
+ } else if( h->slice_type == FF_P_TYPE ) {
if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
/* P-type */
if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
} else {
return decode_cabac_intra_mb_type(h, 17, 0) + 5;
}
- } else if( h->slice_type == B_TYPE ) {
+ } else if( h->slice_type == FF_B_TYPE ) {
const int mba_xy = h->left_mb_xy[0];
const int mbb_xy = h->top_mb_xy;
int ctx = 0;
}else{
int mb_xy = mb_x + mb_y*s->mb_stride;
mba_xy = mb_xy - 1;
- mbb_xy = mb_xy - s->mb_stride;
+ mbb_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
}
if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
ctx++;
- if( h->slice_type == B_TYPE )
+ if( h->slice_type == FF_B_TYPE )
ctx += 13;
return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
}
return 3;
}
-static const uint8_t block_idx_x[16] = {
- 0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, 2, 3, 2, 3
-};
-static const uint8_t block_idx_y[16] = {
- 0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3
-};
-static const uint8_t block_idx_xy[4][4] = {
- { 0, 2, 8, 10},
- { 1, 3, 9, 11},
- { 4, 6, 12, 14},
- { 5, 7, 13, 15}
-};
-
static int decode_cabac_mb_cbp_luma( H264Context *h) {
- int cbp = 0;
- int cbp_b = -1;
- int i8x8;
-
- if( h->slice_table[h->top_mb_xy] == h->slice_num ) {
- cbp_b = h->top_cbp;
- tprintf(h->s.avctx, "cbp_b = top_cbp = %x\n", cbp_b);
- }
-
- for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
- int cbp_a = -1;
- int x, y;
- int ctx = 0;
-
- x = block_idx_x[4*i8x8];
- y = block_idx_y[4*i8x8];
-
- if( x > 0 )
- cbp_a = cbp;
- else if( h->slice_table[h->left_mb_xy[0]] == h->slice_num ) {
- cbp_a = h->left_cbp;
- tprintf(h->s.avctx, "cbp_a = left_cbp = %x\n", cbp_a);
- }
-
- if( y > 0 )
- cbp_b = cbp;
-
- /* No need to test for skip as we put 0 for skip block */
- /* No need to test for IPCM as we put 1 for IPCM block */
- if( cbp_a >= 0 ) {
- int i8x8a = block_idx_xy[(x-1)&0x03][y]/4;
- if( ((cbp_a >> i8x8a)&0x01) == 0 )
- ctx++;
- }
-
- if( cbp_b >= 0 ) {
- int i8x8b = block_idx_xy[x][(y-1)&0x03]/4;
- if( ((cbp_b >> i8x8b)&0x01) == 0 )
- ctx += 2;
- }
-
- if( get_cabac( &h->cabac, &h->cabac_state[73 + ctx] ) ) {
- cbp |= 1 << i8x8;
- }
- }
+ int cbp_b, cbp_a, ctx, cbp = 0;
+
+ cbp_a = h->slice_table[h->left_mb_xy[0]] == h->slice_num ? h->left_cbp : -1;
+ cbp_b = h->slice_table[h->top_mb_xy] == h->slice_num ? h->top_cbp : -1;
+
+ ctx = !(cbp_a & 0x02) + 2 * !(cbp_b & 0x04);
+ cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]);
+ ctx = !(cbp & 0x01) + 2 * !(cbp_b & 0x08);
+ cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 1;
+ ctx = !(cbp_a & 0x08) + 2 * !(cbp & 0x01);
+ cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 2;
+ ctx = !(cbp & 0x04) + 2 * !(cbp & 0x02);
+ cbp |= get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 3;
return cbp;
}
static int decode_cabac_mb_cbp_chroma( H264Context *h) {
return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
}
static int decode_cabac_mb_dqp( H264Context *h) {
- MpegEncContext * const s = &h->s;
- int mbn_xy;
int ctx = 0;
int val = 0;
- if( s->mb_x > 0 )
- mbn_xy = s->mb_x + s->mb_y*s->mb_stride - 1;
- else
- mbn_xy = s->mb_width - 1 + (s->mb_y-1)*s->mb_stride;
-
if( h->last_qscale_diff != 0 )
ctx++;
int ref = 0;
int ctx = 0;
- if( h->slice_type == B_TYPE) {
+ if( h->slice_type == FF_B_TYPE) {
if( refa > 0 && !h->direct_cache[scan8[n] - 1] )
ctx++;
if( refb > 0 && !h->direct_cache[scan8[n] - 8] )
return ctx + 4 * cat;
}
-static const attribute_used uint8_t last_coeff_flag_offset_8x8[63] = {
+DECLARE_ASM_CONST(1, const uint8_t, last_coeff_flag_offset_8x8[63]) = {
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
};
-static int decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff) {
+static void decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff) {
const int mb_xy = h->s.mb_x + h->s.mb_y*h->s.mb_stride;
static const int significant_coeff_flag_offset[2][6] = {
{ 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
h->cabac.low = cc.low ;
h->cabac.bytestream= cc.bytestream;
#endif
- return 0;
+ return;
}
}
index[coeff_count++] = last;\
}
const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
-#if defined(ARCH_X86) && defined(CONFIG_7REGS) && defined(HAVE_EBX_AVAILABLE) && !defined(BROKEN_RELOCATIONS)
+#if defined(ARCH_X86) && defined(HAVE_7REGS) && defined(HAVE_EBX_AVAILABLE) && !defined(BROKEN_RELOCATIONS)
coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
} else {
coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
if( !qmul ) {
block[j] = get_cabac_bypass_sign( CC, -1);
}else{
- block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;;
+ block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;
}
abslevel1++;
h->cabac.low = cc.low ;
h->cabac.bytestream= cc.bytestream;
#endif
- return 0;
+
}
static inline void compute_mb_neighbors(H264Context *h)
if (left_mb_frame_flag != curr_mb_frame_flag) {
h->left_mb_xy[0] = pair_xy - 1;
}
+ } else if (FIELD_PICTURE) {
+ h->top_mb_xy -= s->mb_stride;
}
return;
}
s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?)
tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
- if( h->slice_type != I_TYPE && h->slice_type != SI_TYPE ) {
+ if( h->slice_type != FF_I_TYPE && h->slice_type != FF_SI_TYPE ) {
int skip;
/* a skipped mb needs the aff flag from the following mb */
if( FRAME_MBAFF && s->mb_x==0 && (s->mb_y&1)==0 )
return -1;
}
- if( h->slice_type == B_TYPE ) {
+ if( h->slice_type == FF_B_TYPE ) {
if( mb_type < 23 ){
partition_count= b_mb_type_info[mb_type].partition_count;
mb_type= b_mb_type_info[mb_type].type;
mb_type -= 23;
goto decode_intra_mb;
}
- } else if( h->slice_type == P_TYPE ) {
+ } else if( h->slice_type == FF_P_TYPE ) {
if( mb_type < 5) {
partition_count= p_mb_type_info[mb_type].partition_count;
mb_type= p_mb_type_info[mb_type].type;
goto decode_intra_mb;
}
} else {
- assert(h->slice_type == I_TYPE);
+ assert(h->slice_type == FF_I_TYPE);
decode_intra_mb:
partition_count = 0;
cbp= i_mb_type_info[mb_type].cbp;
} else if( partition_count == 4 ) {
int i, j, sub_partition_count[4], list, ref[2][4];
- if( h->slice_type == B_TYPE ) {
+ if( h->slice_type == FF_B_TYPE ) {
for( i = 0; i < 4; i++ ) {
h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
pred_direct_motion(h, &mb_type);
+ h->ref_cache[0][scan8[4]] =
+ h->ref_cache[1][scan8[4]] =
+ h->ref_cache[0][scan8[12]] =
+ h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
if( h->ref_count[0] > 1 || h->ref_count[1] > 1 ) {
for( i = 0; i < 4; i++ )
if( IS_DIRECT(h->sub_mb_type[i]) )
for(list=0; list<h->list_count; list++){
for(i=0; i<4; i++){
+ h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
if(IS_DIRECT(h->sub_mb_type[i])){
fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
continue;
}
- h->ref_cache[list][ scan8[4*i] ]=h->ref_cache[list][ scan8[4*i]+1 ];
if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
const int sub_mb_type= h->sub_mb_type[i];
if( cbp || IS_INTRA16x16( mb_type ) ) {
const uint8_t *scan, *scan8x8, *dc_scan;
+ const uint32_t *qmul;
int dqp;
if(IS_INTERLACED(mb_type)){
if( IS_INTRA16x16( mb_type ) ) {
int i;
//av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
- if( decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16) < 0)
- return -1;
+ decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16);
+
if( cbp&15 ) {
+ qmul = h->dequant4_coeff[0][s->qscale];
for( i = 0; i < 16; i++ ) {
//av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
- if( decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 )
- return -1;
+ decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, qmul, 15);
}
} else {
fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
if( cbp & (1<<i8x8) ) {
if( IS_8x8DCT(mb_type) ) {
- if( decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
- scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64) < 0 )
- return -1;
- } else
- for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
- const int index = 4*i8x8 + i4x4;
- //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
+ decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
+ scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
+ } else {
+ qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale];
+ for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
+ const int index = 4*i8x8 + i4x4;
+ //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
//START_TIMER
- if( decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) < 0 )
- return -1;
+ decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, qmul, 16);
//STOP_TIMER("decode_residual")
+ }
}
} else {
uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
int c;
for( c = 0; c < 2; c++ ) {
//av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
- if( decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4) < 0)
- return -1;
+ decode_cabac_residual(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, NULL, 4);
}
}
if( cbp&0x20 ) {
int c, i;
for( c = 0; c < 2; c++ ) {
- const uint32_t *qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
+ qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
for( i = 0; i < 4; i++ ) {
const int index = 16 + 4 * c + i;
//av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
- if( decode_cabac_residual(h, h->mb + 16*index, 4, index - 16, scan + 1, qmul, 15) < 0)
- return -1;
+ decode_cabac_residual(h, h->mb + 16*index, 4, index - 16, scan + 1, qmul, 15);
}
}
} else {
static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
MpegEncContext * const s = &h->s;
+ int mb_y_firstrow = s->picture_structure == PICT_BOTTOM_FIELD;
int mb_xy, mb_type;
int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
mb_xy = mb_x + mb_y*s->mb_stride;
- if(mb_x==0 || mb_y==0 || !s->dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff ||
+ if(mb_x==0 || mb_y==mb_y_firstrow || !s->dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff ||
(h->deblocking_filter == 2 && (h->slice_table[mb_xy] != h->slice_table[h->top_mb_xy] ||
h->slice_table[mb_xy] != h->slice_table[mb_xy - 1]))) {
filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
if( IS_INTRA(mb_type) ) {
int16_t bS4[4] = {4,4,4,4};
int16_t bS3[4] = {3,3,3,3};
+ int16_t *bSH = FIELD_PICTURE ? bS3 : bS4;
if( IS_8x8DCT(mb_type) ) {
filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
- filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bS4, qp1 );
+ filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
} else {
filter_mb_edgev( h, &img_y[4*0], linesize, bS4, qp0 );
filter_mb_edgev( h, &img_y[4*1], linesize, bS3, qp );
filter_mb_edgev( h, &img_y[4*2], linesize, bS3, qp );
filter_mb_edgev( h, &img_y[4*3], linesize, bS3, qp );
- filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bS4, qp1 );
+ filter_mb_edgeh( h, &img_y[4*0*linesize], linesize, bSH, qp1 );
filter_mb_edgeh( h, &img_y[4*1*linesize], linesize, bS3, qp );
filter_mb_edgeh( h, &img_y[4*2*linesize], linesize, bS3, qp );
filter_mb_edgeh( h, &img_y[4*3*linesize], linesize, bS3, qp );
filter_mb_edgecv( h, &img_cb[2*2], uvlinesize, bS3, qpc );
filter_mb_edgecv( h, &img_cr[2*0], uvlinesize, bS4, qpc0 );
filter_mb_edgecv( h, &img_cr[2*2], uvlinesize, bS3, qpc );
- filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bS4, qpc1 );
+ filter_mb_edgech( h, &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
filter_mb_edgech( h, &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc );
- filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bS4, qpc1 );
+ filter_mb_edgech( h, &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1 );
filter_mb_edgech( h, &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc );
return;
} else {
int step = IS_8x8DCT(mb_type) ? 2 : 1;
edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4;
s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache,
- (h->slice_type == B_TYPE), edges, step, mask_edge0, mask_edge1 );
+ (h->slice_type == FF_B_TYPE), edges, step, mask_edge0, mask_edge1 );
}
if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) )
bSv[0][0] = 0x0004000400040004ULL;
if( IS_INTRA(s->current_picture.mb_type[h->top_mb_xy]) )
- bSv[1][0] = 0x0004000400040004ULL;
+ bSv[1][0] = FIELD_PICTURE ? 0x0003000300030003ULL : 0x0004000400040004ULL;
#define FILTER(hv,dir,edge)\
if(bSv[dir][edge]) {\
int b_idx= 8 + 4 + edge * (dir ? 8:1);
int bn_idx= b_idx - (dir ? 8:1);
int v = 0;
- for( l = 0; !v && l < 1 + (h->slice_type == B_TYPE); l++ ) {
+ for( l = 0; !v && l < 1 + (h->slice_type == FF_B_TYPE); l++ ) {
v |= ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
else if(!mv_done)
{
bS[i] = 0;
- for( l = 0; l < 1 + (h->slice_type == B_TYPE); l++ ) {
+ for( l = 0; l < 1 + (h->slice_type == FF_B_TYPE); l++ ) {
if( ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
FFABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
}
}
-static int decode_slice(H264Context *h){
+static int decode_slice(struct AVCodecContext *avctx, H264Context *h){
MpegEncContext * const s = &h->s;
const int part_mask= s->partitioned_frame ? (AC_END|AC_ERROR) : 0x7F;
/* calculate pre-state */
for( i= 0; i < 460; i++ ) {
int pre;
- if( h->slice_type == I_TYPE )
+ if( h->slice_type == FF_I_TYPE )
pre = av_clip( ((cabac_context_init_I[i][0] * s->qscale) >>4 ) + cabac_context_init_I[i][1], 1, 126 );
else
pre = av_clip( ((cabac_context_init_PB[h->cabac_init_idc][i][0] * s->qscale) >>4 ) + cabac_context_init_PB[h->cabac_init_idc][i][1], 1, 126 );
s->mb_x = 0;
ff_draw_horiz_band(s, 16*s->mb_y, 16);
++s->mb_y;
- if(FRAME_MBAFF) {
+ if(FIELD_OR_MBAFF_PICTURE) {
++s->mb_y;
}
}
s->mb_x=0;
ff_draw_horiz_band(s, 16*s->mb_y, 16);
++s->mb_y;
- if(FRAME_MBAFF) {
+ if(FIELD_OR_MBAFF_PICTURE) {
++s->mb_y;
}
if(s->mb_y >= s->mb_height){
if( aspect_ratio_idc == EXTENDED_SAR ) {
sps->sar.num= get_bits(&s->gb, 16);
sps->sar.den= get_bits(&s->gb, 16);
- }else if(aspect_ratio_idc < 14){
+ }else if(aspect_ratio_idc < sizeof(pixel_aspect)/sizeof(*pixel_aspect)){
sps->sar= pixel_aspect[aspect_ratio_idc];
}else{
av_log(h->s.avctx, AV_LOG_ERROR, "illegal aspect ratio\n");
}
tmp= get_ue_golomb(&s->gb);
- if(tmp > MAX_PICTURE_COUNT-2){
+ if(tmp > MAX_PICTURE_COUNT-2 || tmp >= 32){
av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
+ return -1;
}
sps->ref_frame_count= tmp;
sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
return 0;
}
-static int decode_nal_units(H264Context *h, uint8_t *buf, int buf_size){
+/**
+ * Call decode_slice() for each context.
+ *
+ * @param h h264 master context
+ * @param context_count number of contexts to execute
+ */
+static void execute_decode_slices(H264Context *h, int context_count){
+ MpegEncContext * const s = &h->s;
+ AVCodecContext * const avctx= s->avctx;
+ H264Context *hx;
+ int i;
+
+ if(context_count == 1) {
+ decode_slice(avctx, h);
+ } else {
+ for(i = 1; i < context_count; i++) {
+ hx = h->thread_context[i];
+ hx->s.error_resilience = avctx->error_resilience;
+ hx->s.error_count = 0;
+ }
+
+ avctx->execute(avctx, (void *)decode_slice,
+ (void **)h->thread_context, NULL, context_count);
+
+ /* pull back stuff from slices to master context */
+ hx = h->thread_context[context_count - 1];
+ s->mb_x = hx->s.mb_x;
+ s->mb_y = hx->s.mb_y;
+ s->dropable = hx->s.dropable;
+ s->picture_structure = hx->s.picture_structure;
+ for(i = 1; i < context_count; i++)
+ h->s.error_count += h->thread_context[i]->s.error_count;
+ }
+}
+
+
+static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
MpegEncContext * const s = &h->s;
AVCodecContext * const avctx= s->avctx;
int buf_index=0;
+ H264Context *hx; ///< thread context
+ int context_count = 0;
+
+ h->max_contexts = avctx->thread_count;
#if 0
int i;
for(i=0; i<50; i++){
}
#endif
if(!(s->flags2 & CODEC_FLAG2_CHUNKS)){
- h->slice_num = 0;
- s->current_picture_ptr= NULL;
+ h->current_slice = 0;
+ if (!s->first_field)
+ s->current_picture_ptr= NULL;
}
for(;;){
int consumed;
int dst_length;
int bit_length;
- uint8_t *ptr;
+ const uint8_t *ptr;
int i, nalsize = 0;
+ int err;
if(h->is_avc) {
if(buf_index >= buf_size) break;
buf_index+=3;
}
- ptr= decode_nal(h, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
+ hx = h->thread_context[context_count];
+
+ ptr= decode_nal(hx, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
if (ptr==NULL || dst_length < 0){
return -1;
}
bit_length= !dst_length ? 0 : (8*dst_length - decode_rbsp_trailing(h, ptr + dst_length - 1));
if(s->avctx->debug&FF_DEBUG_STARTCODE){
- av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", h->nal_unit_type, buf_index, buf_size, dst_length);
+ av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", hx->nal_unit_type, buf_index, buf_size, dst_length);
}
- if (h->is_avc && (nalsize != consumed))
+ if (h->is_avc && (nalsize != consumed)){
av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
+ consumed= nalsize;
+ }
buf_index += consumed;
||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc == 0))
continue;
- switch(h->nal_unit_type){
+ again:
+ err = 0;
+ switch(hx->nal_unit_type){
case NAL_IDR_SLICE:
+ if (h->nal_unit_type != NAL_IDR_SLICE) {
+ av_log(h->s.avctx, AV_LOG_ERROR, "Invalid mix of idr and non-idr slices");
+ return -1;
+ }
idr(h); //FIXME ensure we don't loose some frames if there is reordering
case NAL_SLICE:
- init_get_bits(&s->gb, ptr, bit_length);
- h->intra_gb_ptr=
- h->inter_gb_ptr= &s->gb;
- s->data_partitioning = 0;
-
- if(decode_slice_header(h) < 0){
- av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
- break;
- }
- s->current_picture_ptr->key_frame= (h->nal_unit_type == NAL_IDR_SLICE);
- if(h->redundant_pic_count==0 && s->hurry_up < 5
- && (avctx->skip_frame < AVDISCARD_NONREF || h->nal_ref_idc)
- && (avctx->skip_frame < AVDISCARD_BIDIR || h->slice_type!=B_TYPE)
- && (avctx->skip_frame < AVDISCARD_NONKEY || h->slice_type==I_TYPE)
+ init_get_bits(&hx->s.gb, ptr, bit_length);
+ hx->intra_gb_ptr=
+ hx->inter_gb_ptr= &hx->s.gb;
+ hx->s.data_partitioning = 0;
+
+ if((err = decode_slice_header(hx, h)))
+ break;
+
+ s->current_picture_ptr->key_frame|= (hx->nal_unit_type == NAL_IDR_SLICE);
+ if(hx->redundant_pic_count==0 && hx->s.hurry_up < 5
+ && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
+ && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type!=FF_B_TYPE)
+ && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type==FF_I_TYPE)
&& avctx->skip_frame < AVDISCARD_ALL)
- decode_slice(h);
+ context_count++;
break;
case NAL_DPA:
- init_get_bits(&s->gb, ptr, bit_length);
- h->intra_gb_ptr=
- h->inter_gb_ptr= NULL;
- s->data_partitioning = 1;
+ init_get_bits(&hx->s.gb, ptr, bit_length);
+ hx->intra_gb_ptr=
+ hx->inter_gb_ptr= NULL;
+ hx->s.data_partitioning = 1;
- if(decode_slice_header(h) < 0){
- av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
- }
+ err = decode_slice_header(hx, h);
break;
case NAL_DPB:
- init_get_bits(&h->intra_gb, ptr, bit_length);
- h->intra_gb_ptr= &h->intra_gb;
+ init_get_bits(&hx->intra_gb, ptr, bit_length);
+ hx->intra_gb_ptr= &hx->intra_gb;
break;
case NAL_DPC:
- init_get_bits(&h->inter_gb, ptr, bit_length);
- h->inter_gb_ptr= &h->inter_gb;
+ init_get_bits(&hx->inter_gb, ptr, bit_length);
+ hx->inter_gb_ptr= &hx->inter_gb;
- if(h->redundant_pic_count==0 && h->intra_gb_ptr && s->data_partitioning
+ if(hx->redundant_pic_count==0 && hx->intra_gb_ptr && hx->s.data_partitioning
&& s->context_initialized
&& s->hurry_up < 5
- && (avctx->skip_frame < AVDISCARD_NONREF || h->nal_ref_idc)
- && (avctx->skip_frame < AVDISCARD_BIDIR || h->slice_type!=B_TYPE)
- && (avctx->skip_frame < AVDISCARD_NONKEY || h->slice_type==I_TYPE)
+ && (avctx->skip_frame < AVDISCARD_NONREF || hx->nal_ref_idc)
+ && (avctx->skip_frame < AVDISCARD_BIDIR || hx->slice_type!=FF_B_TYPE)
+ && (avctx->skip_frame < AVDISCARD_NONKEY || hx->slice_type==FF_I_TYPE)
&& avctx->skip_frame < AVDISCARD_ALL)
- decode_slice(h);
+ context_count++;
break;
case NAL_SEI:
init_get_bits(&s->gb, ptr, bit_length);
case NAL_AUXILIARY_SLICE:
break;
default:
- av_log(avctx, AV_LOG_ERROR, "Unknown NAL code: %d\n", h->nal_unit_type);
+ av_log(avctx, AV_LOG_DEBUG, "Unknown NAL code: %d (%d bits)\n", h->nal_unit_type, bit_length);
}
- }
+ if(context_count == h->max_contexts) {
+ execute_decode_slices(h, context_count);
+ context_count = 0;
+ }
+
+ if (err < 0)
+ av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
+ else if(err == 1) {
+ /* Slice could not be decoded in parallel mode, copy down
+ * NAL unit stuff to context 0 and restart. Note that
+ * rbsp_buffer is not transfered, but since we no longer
+ * run in parallel mode this should not be an issue. */
+ h->nal_unit_type = hx->nal_unit_type;
+ h->nal_ref_idc = hx->nal_ref_idc;
+ hx = h;
+ goto again;
+ }
+ }
+ if(context_count)
+ execute_decode_slices(h, context_count);
return buf_index;
}
static int decode_frame(AVCodecContext *avctx,
void *data, int *data_size,
- uint8_t *buf, int buf_size)
+ const uint8_t *buf, int buf_size)
{
H264Context *h = avctx->priv_data;
MpegEncContext *s = &h->s;
return -1;
if(!(s->flags2 & CODEC_FLAG2_CHUNKS) && !s->current_picture_ptr){
+ if (avctx->skip_frame >= AVDISCARD_NONREF || s->hurry_up) return 0;
av_log(avctx, AV_LOG_ERROR, "no frame!\n");
return -1;
}
h->prev_frame_num_offset= h->frame_num_offset;
h->prev_frame_num= h->frame_num;
- if(s->current_picture_ptr->reference){
+ if(!s->dropable) {
h->prev_poc_msb= h->poc_msb;
h->prev_poc_lsb= h->poc_lsb;
- }
- if(s->current_picture_ptr->reference)
execute_ref_pic_marking(h, h->mmco, h->mmco_index);
+ }
- ff_er_frame_end(s);
+ /*
+ * FIXME: Error handling code does not seem to support interlaced
+ * when slices span multiple rows
+ * The ff_er_add_slice calls don't work right for bottom
+ * fields; they cause massive erroneous error concealing
+ * Error marking covers both fields (top and bottom).
+ * This causes a mismatched s->error_count
+ * and a bad error table. Further, the error count goes to
+ * INT_MAX when called for bottom field, because mb_y is
+ * past end by one (callers fault) and resync_mb_y != 0
+ * causes problems for the first MB line, too.
+ */
+ if (!FIELD_PICTURE)
+ ff_er_frame_end(s);
MPV_frame_end(s);
- //FIXME do something with unavailable reference frames
+ if (s->first_field) {
+ /* Wait for second field. */
+ *data_size = 0;
+
+ } else {
+ cur->interlaced_frame = FIELD_OR_MBAFF_PICTURE;
+ /* Derive top_field_first from field pocs. */
+ cur->top_field_first = cur->field_poc[0] < cur->field_poc[1];
+
+ //FIXME do something with unavailable reference frames
#if 0 //decode order
- *data_size = sizeof(AVFrame);
+ *data_size = sizeof(AVFrame);
#else
- /* Sort B-frames into display order */
+ /* Sort B-frames into display order */
- if(h->sps.bitstream_restriction_flag
- && s->avctx->has_b_frames < h->sps.num_reorder_frames){
- s->avctx->has_b_frames = h->sps.num_reorder_frames;
- s->low_delay = 0;
- }
-
- pics = 0;
- while(h->delayed_pic[pics]) pics++;
-
- assert(pics+1 < sizeof(h->delayed_pic) / sizeof(h->delayed_pic[0]));
+ if(h->sps.bitstream_restriction_flag
+ && s->avctx->has_b_frames < h->sps.num_reorder_frames){
+ s->avctx->has_b_frames = h->sps.num_reorder_frames;
+ s->low_delay = 0;
+ }
- h->delayed_pic[pics++] = cur;
- if(cur->reference == 0)
- cur->reference = 1;
+ pics = 0;
+ while(h->delayed_pic[pics]) pics++;
- cross_idr = 0;
- for(i=0; h->delayed_pic[i]; i++)
- if(h->delayed_pic[i]->key_frame || h->delayed_pic[i]->poc==0)
- cross_idr = 1;
+ assert(pics+1 < sizeof(h->delayed_pic) / sizeof(h->delayed_pic[0]));
- out = h->delayed_pic[0];
- out_idx = 0;
- for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame; i++)
- if(h->delayed_pic[i]->poc < out->poc){
- out = h->delayed_pic[i];
- out_idx = i;
- }
+ h->delayed_pic[pics++] = cur;
+ if(cur->reference == 0)
+ cur->reference = DELAYED_PIC_REF;
- out_of_order = !cross_idr && prev && out->poc < prev->poc;
- if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
- { }
- else if(prev && pics <= s->avctx->has_b_frames)
- out = prev;
- else if((out_of_order && pics-1 == s->avctx->has_b_frames && pics < 15)
- || (s->low_delay &&
- ((!cross_idr && prev && out->poc > prev->poc + 2)
- || cur->pict_type == B_TYPE)))
- {
- s->low_delay = 0;
- s->avctx->has_b_frames++;
- out = prev;
- }
- else if(out_of_order)
- out = prev;
+ cross_idr = 0;
+ for(i=0; h->delayed_pic[i]; i++)
+ if(h->delayed_pic[i]->key_frame || h->delayed_pic[i]->poc==0)
+ cross_idr = 1;
- if(out_of_order || pics > s->avctx->has_b_frames){
- for(i=out_idx; h->delayed_pic[i]; i++)
- h->delayed_pic[i] = h->delayed_pic[i+1];
- }
+ out = h->delayed_pic[0];
+ out_idx = 0;
+ for(i=1; h->delayed_pic[i] && !h->delayed_pic[i]->key_frame; i++)
+ if(h->delayed_pic[i]->poc < out->poc){
+ out = h->delayed_pic[i];
+ out_idx = i;
+ }
- if(prev == out)
- *data_size = 0;
- else
- *data_size = sizeof(AVFrame);
- if(prev && prev != out && prev->reference == 1)
- prev->reference = 0;
- h->delayed_output_pic = out;
+ out_of_order = !cross_idr && prev && out->poc < prev->poc;
+ if(h->sps.bitstream_restriction_flag && s->avctx->has_b_frames >= h->sps.num_reorder_frames)
+ { }
+ else if(prev && pics <= s->avctx->has_b_frames)
+ out = prev;
+ else if((out_of_order && pics-1 == s->avctx->has_b_frames && pics < 15)
+ || (s->low_delay &&
+ ((!cross_idr && prev && out->poc > prev->poc + 2)
+ || cur->pict_type == FF_B_TYPE)))
+ {
+ s->low_delay = 0;
+ s->avctx->has_b_frames++;
+ out = prev;
+ }
+ else if(out_of_order)
+ out = prev;
+
+ if(out_of_order || pics > s->avctx->has_b_frames){
+ for(i=out_idx; h->delayed_pic[i]; i++)
+ h->delayed_pic[i] = h->delayed_pic[i+1];
+ }
+
+ if(prev == out)
+ *data_size = 0;
+ else
+ *data_size = sizeof(AVFrame);
+ if(prev && prev != out && prev->reference == DELAYED_PIC_REF)
+ prev->reference = 0;
+ h->delayed_output_pic = out;
#endif
- if(out)
- *pict= *(AVFrame*)out;
- else
- av_log(avctx, AV_LOG_DEBUG, "no picture\n");
+ if(out)
+ *pict= *(AVFrame*)out;
+ else
+ av_log(avctx, AV_LOG_DEBUG, "no picture\n");
+ }
}
assert(pict->data[0] || !*data_size);
#if 0 //?
/* Return the Picture timestamp as the frame number */
- /* we substract 1 because it is added on utils.c */
+ /* we subtract 1 because it is added on utils.c */
avctx->frame_number = s->picture_number - 1;
#endif
return get_consumed_bytes(s, buf_index, buf_size);
}
#endif
-#if 0 //selftest
+#ifdef TEST
+#undef printf
+#undef random
#define COUNT 8000
#define SIZE (COUNT*40)
-int main(){
+int main(void){
int i;
uint8_t temp[SIZE];
PutBitContext pb;
STOP_TIMER("get_se_golomb");
}
+#if 0
printf("testing 4x4 (I)DCT\n");
DCTELEM block[16];
}
}
printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
-#if 0
printf("testing quantizer\n");
for(qp=0; qp<52; qp++){
for(i=0; i<16; i++)
src1_block[i]= src2_block[i]= random()%255;
}
-#endif
printf("Testing NAL layer\n");
uint8_t bitstream[COUNT];
return -1;
}
}
+#endif
printf("Testing RBSP\n");
return 0;
}
-#endif
+#endif /* TEST */
static int decode_end(AVCodecContext *avctx)