fix possible overflow with memmove

[ffmpeg] / libavcodec / h264.c
diff --git a/libavcodec/h264.c b/libavcodec/h264.c

index 19d83197addde2a22463f468ef17bbdb57157ed5..7f37cb46af44d0c7f226dff2ef0e9fca08b594e8 100644 (file)
--- a/libavcodec/h264.c
+++ b/libavcodec/h264.c
@@ -32,6 +32,7 @@
  #include "h264data.h"
  #include "h264_parser.h"
  #include "golomb.h"
+#include "rectangle.h"
  
  #include "cabac.h"
  
@@ -75,109 +76,23 @@ const uint8_t ff_div6[52]={
  };
  
  
-/**
- * fill a rectangle.
- * @param h height of the rectangle, should be a constant
- * @param w width of the rectangle, should be a constant
- * @param size the size of val (1 or 4), should be a constant
- */
-static av_always_inline void fill_rectangle(void *vp, int w, int h, int stride, uint32_t val, int size){
-    uint8_t *p= (uint8_t*)vp;
-    assert(size==1 || size==4);
-    assert(w<=4);
-
-    w      *= size;
-    stride *= size;
-
-    assert((((long)vp)&(FFMIN(w, STRIDE_ALIGN)-1)) == 0);
-    assert((stride&(w-1))==0);
-    if(w==2){
-        const uint16_t v= size==4 ? val : val*0x0101;
-        *(uint16_t*)(p + 0*stride)= v;
-        if(h==1) return;
-        *(uint16_t*)(p + 1*stride)= v;
-        if(h==2) return;
-        *(uint16_t*)(p + 2*stride)= v;
-        *(uint16_t*)(p + 3*stride)= v;
-    }else if(w==4){
-        const uint32_t v= size==4 ? val : val*0x01010101;
-        *(uint32_t*)(p + 0*stride)= v;
-        if(h==1) return;
-        *(uint32_t*)(p + 1*stride)= v;
-        if(h==2) return;
-        *(uint32_t*)(p + 2*stride)= v;
-        *(uint32_t*)(p + 3*stride)= v;
-    }else if(w==8){
-    //gcc can't optimize 64bit math on x86_32
-#if defined(ARCH_X86_64) || (defined(MP_WORDSIZE) && MP_WORDSIZE >= 64)
-        const uint64_t v= val*0x0100000001ULL;
-        *(uint64_t*)(p + 0*stride)= v;
-        if(h==1) return;
-        *(uint64_t*)(p + 1*stride)= v;
-        if(h==2) return;
-        *(uint64_t*)(p + 2*stride)= v;
-        *(uint64_t*)(p + 3*stride)= v;
-    }else if(w==16){
-        const uint64_t v= val*0x0100000001ULL;
-        *(uint64_t*)(p + 0+0*stride)= v;
-        *(uint64_t*)(p + 8+0*stride)= v;
-        *(uint64_t*)(p + 0+1*stride)= v;
-        *(uint64_t*)(p + 8+1*stride)= v;
-        if(h==2) return;
-        *(uint64_t*)(p + 0+2*stride)= v;
-        *(uint64_t*)(p + 8+2*stride)= v;
-        *(uint64_t*)(p + 0+3*stride)= v;
-        *(uint64_t*)(p + 8+3*stride)= v;
-#else
-        *(uint32_t*)(p + 0+0*stride)= val;
-        *(uint32_t*)(p + 4+0*stride)= val;
-        if(h==1) return;
-        *(uint32_t*)(p + 0+1*stride)= val;
-        *(uint32_t*)(p + 4+1*stride)= val;
-        if(h==2) return;
-        *(uint32_t*)(p + 0+2*stride)= val;
-        *(uint32_t*)(p + 4+2*stride)= val;
-        *(uint32_t*)(p + 0+3*stride)= val;
-        *(uint32_t*)(p + 4+3*stride)= val;
-    }else if(w==16){
-        *(uint32_t*)(p + 0+0*stride)= val;
-        *(uint32_t*)(p + 4+0*stride)= val;
-        *(uint32_t*)(p + 8+0*stride)= val;
-        *(uint32_t*)(p +12+0*stride)= val;
-        *(uint32_t*)(p + 0+1*stride)= val;
-        *(uint32_t*)(p + 4+1*stride)= val;
-        *(uint32_t*)(p + 8+1*stride)= val;
-        *(uint32_t*)(p +12+1*stride)= val;
-        if(h==2) return;
-        *(uint32_t*)(p + 0+2*stride)= val;
-        *(uint32_t*)(p + 4+2*stride)= val;
-        *(uint32_t*)(p + 8+2*stride)= val;
-        *(uint32_t*)(p +12+2*stride)= val;
-        *(uint32_t*)(p + 0+3*stride)= val;
-        *(uint32_t*)(p + 4+3*stride)= val;
-        *(uint32_t*)(p + 8+3*stride)= val;
-        *(uint32_t*)(p +12+3*stride)= val;
-#endif
-    }else
-        assert(0);
-    assert(h==4);
-}
-
  static void fill_caches(H264Context *h, int mb_type, int for_deblock){
      MpegEncContext * const s = &h->s;
      const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
      int topleft_xy, top_xy, topright_xy, left_xy[2];
      int topleft_type, top_type, topright_type, left_type[2];
      int left_block[8];
+    int topleft_partition= -1;
      int i;
  
+    top_xy     = mb_xy  - (s->mb_stride << FIELD_PICTURE);
+
      //FIXME deblocking could skip the intra and nnz parts.
-    if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[mb_xy-s->mb_stride]) && !FRAME_MBAFF)
+    if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
          return;
  
      //wow what a mess, why didn't they simplify the interlacing&intra stuff, i can't imagine that these complex rules are worth it
  
-    top_xy     = mb_xy  - (s->mb_stride << FIELD_PICTURE);
      topleft_xy = top_xy - 1;
      topright_xy= top_xy + 1;
      left_xy[1] = left_xy[0] = mb_xy-1;
@@ -212,6 +127,10 @@ static void fill_caches(H264Context *h, int mb_type, int for_deblock){
                  : (!curr_mb_frame_flag && !topleft_mb_frame_flag) // top macroblock
                  ) {
              topleft_xy -= s->mb_stride;
+        } else if(bottom && curr_mb_frame_flag && !left_mb_frame_flag) {
+            topleft_xy += s->mb_stride;
+            // take topleft mv from the middle of the mb, as opposed to all other modes which use the bottom-right partition
+            topleft_partition = 0;
          }
          if (bottom
                  ? !curr_mb_frame_flag // bottom macroblock
@@ -489,8 +408,8 @@ static void fill_caches(H264Context *h, int mb_type, int for_deblock){
                  continue;
  
              if(USES_LIST(topleft_type, list)){
-                const int b_xy = h->mb2b_xy[topleft_xy] + 3 + 3*h->b_stride;
-                const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + h->b8_stride;
+                const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride);
+                const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride);
                  *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
                  h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
              }else{
@@ -787,7 +706,7 @@ static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, in
  #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
                  const int x4 = X4, y4 = Y4;\
                  const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
-                if(!USES_LIST(mb_type,list) && !IS_8X8(mb_type))\
+                if(!USES_LIST(mb_type,list))\
                      return LIST_NOT_USED;\
                  mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
                  h->mv_cache[list][scan8[0]-2][0] = mv[0];\
@@ -808,7 +727,7 @@ static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, in
                 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
                 && i >= scan8[0]+8){
                  // leftshift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's ok.
-                SET_DIAG_MV(>>1, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
+                SET_DIAG_MV(/2, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
              }
          }
  #undef SET_DIAG_MV
@@ -1103,14 +1022,76 @@ static inline void pred_direct_motion(H264Context * const h, int *mb_type){
          }
  
          if(ref[1] < 0){
-            *mb_type &= ~MB_TYPE_P0L1;
-            sub_mb_type &= ~MB_TYPE_P0L1;
+            if(!is_b8x8)
+                *mb_type &= ~MB_TYPE_L1;
+            sub_mb_type &= ~MB_TYPE_L1;
          }else if(ref[0] < 0){
-            *mb_type &= ~MB_TYPE_P0L0;
-            sub_mb_type &= ~MB_TYPE_P0L0;
+            if(!is_b8x8)
+                *mb_type &= ~MB_TYPE_L0;
+            sub_mb_type &= ~MB_TYPE_L0;
          }
  
-        if(IS_16X16(*mb_type)){
+        if(IS_INTERLACED(*mb_type) != IS_INTERLACED(mb_type_col)){
+            int pair_xy = s->mb_x + (s->mb_y&~1)*s->mb_stride;
+            int mb_types_col[2];
+            int b8_stride = h->b8_stride;
+            int b4_stride = h->b_stride;
+
+            *mb_type = (*mb_type & ~MB_TYPE_16x16) | MB_TYPE_8x8;
+
+            if(IS_INTERLACED(*mb_type)){
+                mb_types_col[0] = h->ref_list[1][0].mb_type[pair_xy];
+                mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy+s->mb_stride];
+                if(s->mb_y&1){
+                    l1ref0 -= 2*b8_stride;
+                    l1ref1 -= 2*b8_stride;
+                    l1mv0 -= 4*b4_stride;
+                    l1mv1 -= 4*b4_stride;
+                }
+                b8_stride *= 3;
+                b4_stride *= 6;
+            }else{
+                int cur_poc = s->current_picture_ptr->poc;
+                int *col_poc = h->ref_list[1]->field_poc;
+                int col_parity = FFABS(col_poc[0] - cur_poc) >= FFABS(col_poc[1] - cur_poc);
+                int dy = 2*col_parity - (s->mb_y&1);
+                mb_types_col[0] =
+                mb_types_col[1] = h->ref_list[1][0].mb_type[pair_xy + col_parity*s->mb_stride];
+                l1ref0 += dy*b8_stride;
+                l1ref1 += dy*b8_stride;
+                l1mv0 += 2*dy*b4_stride;
+                l1mv1 += 2*dy*b4_stride;
+                b8_stride = 0;
+            }
+
+            for(i8=0; i8<4; i8++){
+                int x8 = i8&1;
+                int y8 = i8>>1;
+                int xy8 = x8+y8*b8_stride;
+                int xy4 = 3*x8+y8*b4_stride;
+                int a=0, b=0;
+
+                if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
+                    continue;
+                h->sub_mb_type[i8] = sub_mb_type;
+
+                fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
+                fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
+                if(!IS_INTRA(mb_types_col[y8])
+                   && (   (l1ref0[xy8] == 0 && FFABS(l1mv0[xy4][0]) <= 1 && FFABS(l1mv0[xy4][1]) <= 1)
+                       || (l1ref0[xy8]  < 0 && l1ref1[xy8] == 0 && FFABS(l1mv1[xy4][0]) <= 1 && FFABS(l1mv1[xy4][1]) <= 1))){
+                    if(ref[0] > 0)
+                        a= pack16to32(mv[0][0],mv[0][1]);
+                    if(ref[1] > 0)
+                        b= pack16to32(mv[1][0],mv[1][1]);
+                }else{
+                    a= pack16to32(mv[0][0],mv[0][1]);
+                    b= pack16to32(mv[1][0],mv[1][1]);
+                }
+                fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, a, 4);
+                fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, b, 4);
+            }
+        }else if(IS_16X16(*mb_type)){
              int a=0, b=0;
  
              fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
@@ -1394,7 +1375,7 @@ static inline void write_back_motion(H264Context *h, int mb_type){
   * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
   * @returns decoded bytes, might be src+1 if no escapes
   */
-static uint8_t *decode_nal(H264Context *h, uint8_t *src, int *dst_length, int *consumed, int length){
+static const uint8_t *decode_nal(H264Context *h, const uint8_t *src, int *dst_length, int *consumed, int length){
      int i, si, di;
      uint8_t *dst;
      int bufidx;
@@ -1461,7 +1442,7 @@ static uint8_t *decode_nal(H264Context *h, uint8_t *src, int *dst_length, int *c
   * identifies the exact end of the bitstream
   * @return the length of the trailing, or 0 if damaged
   */
-static int decode_rbsp_trailing(H264Context *h, uint8_t *src){
+static int decode_rbsp_trailing(H264Context *h, const uint8_t *src){
      int v= *src;
      int r;
  
@@ -2037,7 +2018,6 @@ static void free_tables(H264Context *h){
          av_freep(&hx->top_borders[1]);
          av_freep(&hx->top_borders[0]);
          av_freep(&hx->s.obmc_scratchpad);
-        av_freep(&hx->s.allocated_edge_emu_buffer);
      }
  }
  
@@ -2121,12 +2101,10 @@ static int alloc_tables(H264Context *h){
      CHECKED_ALLOCZ(h->slice_table_base  , (big_mb_num+s->mb_stride) * sizeof(uint8_t))
      CHECKED_ALLOCZ(h->cbp_table, big_mb_num * sizeof(uint16_t))
  
-    if( h->pps.cabac ) {
-        CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
-        CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
-        CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
-        CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
-    }
+    CHECKED_ALLOCZ(h->chroma_pred_mode_table, big_mb_num * sizeof(uint8_t))
+    CHECKED_ALLOCZ(h->mvd_table[0], 32*big_mb_num * sizeof(uint16_t));
+    CHECKED_ALLOCZ(h->mvd_table[1], 32*big_mb_num * sizeof(uint16_t));
+    CHECKED_ALLOCZ(h->direct_table, 32*big_mb_num * sizeof(uint8_t));
  
      memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride)  * sizeof(uint8_t));
      h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
@@ -2179,15 +2157,9 @@ static void clone_tables(H264Context *dst, H264Context *src){
   * Allocate buffers which are not shared amongst multiple threads.
   */
  static int context_init(H264Context *h){
-    MpegEncContext * const s = &h->s;
-
      CHECKED_ALLOCZ(h->top_borders[0], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
      CHECKED_ALLOCZ(h->top_borders[1], h->s.mb_width * (16+8+8) * sizeof(uint8_t))
  
-    // edge emu needs blocksize + filter length - 1 (=17x17 for halfpel / 21x21 for h264)
-    CHECKED_ALLOCZ(s->allocated_edge_emu_buffer,
-                   (s->width+64)*2*21*2); //(width + edge + align)*interlaced*MBsize*tolerance
-    s->edge_emu_buffer= s->allocated_edge_emu_buffer + (s->width+64)*2*21;
      return 0;
  fail:
      return -1; // free_tables will clean up for us
@@ -2496,13 +2468,13 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple){
                      continue;
                  if(IS_16X16(mb_type)){
                      int8_t *ref = &h->ref_cache[list][scan8[0]];
-                    fill_rectangle(ref, 4, 4, 8, 16+*ref^(s->mb_y&1), 1);
+                    fill_rectangle(ref, 4, 4, 8, (16+*ref)^(s->mb_y&1), 1);
                  }else{
                      for(i=0; i<16; i+=4){
                          //FIXME can refs be smaller than 8x8 when !direct_8x8_inference ?
                          int ref = h->ref_cache[list][scan8[i]];
                          if(ref >= 0)
-                            fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, 16+ref^(s->mb_y&1), 1);
+                            fill_rectangle(&h->ref_cache[list][scan8[i]], 2, 2, 8, (16+ref)^(s->mb_y&1), 1);
                      }
                  }
              }
@@ -2947,18 +2919,9 @@ static int fill_default_ref_list(H264Context *h){
                  if(sel != PICT_FRAME) continue;
  
                  frame_list[ list ][index  ]= *h->long_ref[i];
-                frame_list[ list ][index++].pic_id= i;;
+                frame_list[ list ][index++].pic_id= i;
              }
              len[list] = index;
-
-            if(list && (smallest_poc_greater_than_current<=0 || smallest_poc_greater_than_current>=h->short_ref_count) && (1 < index)){
-                // swap the two first elements of L1 when
-                // L0 and L1 are identical
-                Picture temp= frame_list[1][0];
-                frame_list[1][0] = frame_list[1][1];
-                frame_list[1][1] = temp;
-            }
-
          }
  
          for(list=0; list<2; list++){
@@ -2970,6 +2933,14 @@ static int fill_default_ref_list(H264Context *h){
                                                   s->picture_structure,
                                                   short_len[list]);
  
+            // swap the two first elements of L1 when L0 and L1 are identical
+            if(list && len[0] > 1 && len[0] == len[1])
+                for(i=0; h->default_ref_list[0][i].data[0] == h->default_ref_list[1][i].data[0]; i++)
+                    if(i == len[0]){
+                        FFSWAP(Picture, h->default_ref_list[1][0], h->default_ref_list[1][1]);
+                        break;
+                    }
+
              if(len[list] < h->ref_count[ list ])
                  memset(&h->default_ref_list[list][len[list]], 0, sizeof(Picture)*(h->ref_count[ list ] - len[list]));
          }
@@ -2992,7 +2963,7 @@ static int fill_default_ref_list(H264Context *h){
              sel = h->long_ref[i]->reference | structure_sel;
              if(sel != PICT_FRAME) continue;
              frame_list[0][index  ]= *h->long_ref[i];
-            frame_list[0][index++].pic_id= i;;
+            frame_list[0][index++].pic_id= i;
          }
  
          if (FIELD_PICTURE)
@@ -3010,7 +2981,7 @@ static int fill_default_ref_list(H264Context *h){
      }
      if(h->slice_type==B_TYPE){
          for (i=0; i<h->ref_count[1]; i++) {
-            tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[0][i].data[0]);
+            tprintf(h->s.avctx, "List1: %s fn:%d 0x%p\n", (h->default_ref_list[1][i].long_ref ? "LT" : "ST"), h->default_ref_list[1][i].pic_id, h->default_ref_list[1][i].data[0]);
          }
      }
  #endif
@@ -3611,6 +3582,31 @@ static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
          s->current_picture_ptr->reference |= s->picture_structure;
      }
  
+    if (h->long_ref_count + h->short_ref_count > h->sps.ref_frame_count){
+
+        /* We have too many reference frames, probably due to corrupted
+         * stream. Need to discard one frame. Prevents overrun of the
+         * short_ref and long_ref buffers.
+         */
+        av_log(h->s.avctx, AV_LOG_ERROR,
+               "number of reference frames exceeds max (probably "
+               "corrupt input), discarding one\n");
+
+        if (h->long_ref_count) {
+            for (i = 0; i < 16; ++i)
+                if (h->long_ref[i])
+                    break;
+
+            assert(i < 16);
+            pic = h->long_ref[i];
+            remove_long_at_index(h, i);
+        } else {
+            pic = h->short_ref[h->short_ref_count - 1];
+            remove_short_at_index(h, h->short_ref_count - 1);
+        }
+        unreference_pic(h, pic, 0);
+    }
+
      print_short_term(h);
      print_long_term(h);
      return 0;
@@ -3662,7 +3658,7 @@ static int decode_ref_pic_marking(H264Context *h, GetBitContext *gb){
          }else{
              assert(h->long_ref_count + h->short_ref_count <= h->sps.ref_frame_count);
  
-            if(h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count &&
+            if(h->short_ref_count && h->long_ref_count + h->short_ref_count == h->sps.ref_frame_count &&
                      !(FIELD_PICTURE && !s->first_field && s->current_picture_ptr->reference)) {
                  h->mmco[0].opcode= MMCO_SHORT2UNUSED;
                  h->mmco[0].short_pic_num= h->short_ref[ h->short_ref_count - 1 ]->frame_num;
@@ -3910,6 +3906,11 @@ static int decode_slice_header(H264Context *h, H264Context *h0){
      h->slice_type= slice_type;
  
      s->pict_type= h->slice_type; // to make a few old func happy, it's wrong though
+    if (s->pict_type == B_TYPE && s0->last_picture_ptr == NULL) {
+        av_log(h->s.avctx, AV_LOG_ERROR,
+               "B picture before any references, skipping\n");
+        return -1;
+    }
  
      pps_id= get_ue_golomb(&s->gb);
      if(pps_id>=MAX_PPS_COUNT){
@@ -3965,7 +3966,7 @@ static int decode_slice_header(H264Context *h, H264Context *h0){
          for(i = 1; i < s->avctx->thread_count; i++) {
              H264Context *c;
              c = h->thread_context[i] = av_malloc(sizeof(H264Context));
-            memcpy(c, h, sizeof(MpegEncContext));
+            memcpy(c, h->s.thread_context[i], sizeof(MpegEncContext));
              memset(&c->s + 1, 0, sizeof(H264Context) - sizeof(MpegEncContext));
              c->sps = h->sps;
              c->pps = h->pps;
@@ -4111,8 +4112,6 @@ static int decode_slice_header(H264Context *h, H264Context *h0){
      if(h->slice_type == P_TYPE || h->slice_type == SP_TYPE || h->slice_type == B_TYPE){
          if(h->slice_type == B_TYPE){
              h->direct_spatial_mv_pred= get_bits1(&s->gb);
-            if(FIELD_OR_MBAFF_PICTURE && h->direct_spatial_mv_pred)
-                av_log(h->s.avctx, AV_LOG_ERROR, "Interlaced pictures + spatial direct mode is not implemented\n");
          }
          num_ref_idx_active_override_flag= get_bits1(&s->gb);
  
@@ -4209,7 +4208,7 @@ static int decode_slice_header(H264Context *h, H264Context *h0){
      if(h->deblocking_filter == 1 && h0->max_contexts > 1) {
          if(s->avctx->flags2 & CODEC_FLAG2_FAST) {
              /* Cheat slightly for speed:
-               Dont bother to deblock across slices */
+               Do not bother to deblock across slices. */
              h->deblocking_filter = 2;
          } else {
              h0->max_contexts = 1;
@@ -5361,7 +5360,7 @@ static inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx ) {
      return ctx + 4 * cat;
  }
  
-static const attribute_used uint8_t last_coeff_flag_offset_8x8[63] = {
+DECLARE_ASM_CONST(1, const uint8_t, last_coeff_flag_offset_8x8[63]) = {
      0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
      2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
      3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
@@ -5499,7 +5498,7 @@ static void decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int
              if( !qmul ) {
                  block[j] = get_cabac_bypass_sign( CC, -1);
              }else{
-                block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;;
+                block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;
              }
  
              abslevel1++;
@@ -7204,8 +7203,9 @@ static inline int decode_seq_parameter_set(H264Context *h){
      }
  
      tmp= get_ue_golomb(&s->gb);
-    if(tmp > MAX_PICTURE_COUNT-2){
+    if(tmp > MAX_PICTURE_COUNT-2 || tmp >= 32){
          av_log(h->s.avctx, AV_LOG_ERROR, "too many reference frames\n");
+        return -1;
      }
      sps->ref_frame_count= tmp;
      sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
@@ -7425,7 +7425,7 @@ static void execute_decode_slices(H264Context *h, int context_count){
  }
  
  
-static int decode_nal_units(H264Context *h, uint8_t *buf, int buf_size){
+static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){
      MpegEncContext * const s = &h->s;
      AVCodecContext * const avctx= s->avctx;
      int buf_index=0;
@@ -7449,7 +7449,7 @@ static int decode_nal_units(H264Context *h, uint8_t *buf, int buf_size){
          int consumed;
          int dst_length;
          int bit_length;
-        uint8_t *ptr;
+        const uint8_t *ptr;
          int i, nalsize = 0;
          int err;
  
@@ -7627,7 +7627,7 @@ static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
  
  static int decode_frame(AVCodecContext *avctx,
                               void *data, int *data_size,
-                             uint8_t *buf, int buf_size)
+                             const uint8_t *buf, int buf_size)
  {
      H264Context *h = avctx->priv_data;
      MpegEncContext *s = &h->s;
@@ -7875,7 +7875,8 @@ static inline void fill_mb_avail(H264Context *h){
  }
  #endif
  
-#if 0 //selftest
+#ifdef TEST
+#undef printf
  #undef random
  #define COUNT 8000
  #define SIZE (COUNT*40)
@@ -7939,6 +7940,7 @@ int main(void){
          STOP_TIMER("get_se_golomb");
      }
  
+#if 0
      printf("testing 4x4 (I)DCT\n");
  
      DCTELEM block[16];
@@ -7978,14 +7980,12 @@ int main(void){
          }
      }
      printf("error=%f max_error=%d\n", ((float)error)/COUNT/16, (int)max_error );
-#if 0
      printf("testing quantizer\n");
      for(qp=0; qp<52; qp++){
          for(i=0; i<16; i++)
              src1_block[i]= src2_block[i]= random()%255;
  
      }
-#endif
      printf("Testing NAL layer\n");
  
      uint8_t bitstream[COUNT];
@@ -8041,13 +8041,14 @@ int main(void){
              return -1;
          }
      }
+#endif
  
      printf("Testing RBSP\n");
  
  
      return 0;
  }
-#endif
+#endif /* TEST */
  
  
  static int decode_end(AVCodecContext *avctx)