]> git.sesse.net Git - ffmpeg/blobdiff - libavcodec/h264_cavlc.c
Merge commit 'cfe64613923a2d47644a87386146ada1f9f6b659'
[ffmpeg] / libavcodec / h264_cavlc.c
index 3ccd7565448d5b3e4e29406bbf61fcd436fc4e6e..ce14dcef5ed6d230225655349b7a14fee4dc3f4c 100644 (file)
@@ -2,20 +2,20 @@
  * H.26L/H.264/AVC/JVT/14496-10/... cavlc bitstream decoding
  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
  *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
@@ -26,6 +26,7 @@
  */
 
 #define CABAC(h) 0
+#define UNCHECKED_BITSTREAM_READER 1
 
 #include "internal.h"
 #include "avcodec.h"
@@ -34,8 +35,8 @@
 #include "h264_mvpred.h"
 #include "golomb.h"
 #include "mpegutils.h"
+#include "libavutil/avassert.h"
 
-#include <assert.h>
 
 static const uint8_t golomb_to_inter_cbp_gray[16]={
  0, 1, 2, 4, 8, 3, 5,10,12,15, 7,11,13,14, 6, 9,
@@ -283,7 +284,7 @@ static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2];
  * Get the predicted number of non-zero coefficients.
  * @param n block index
  */
-static inline int pred_non_zero_count(H264Context *h, H264SliceContext *sl, int n)
+static inline int pred_non_zero_count(const H264Context *h, H264SliceContext *sl, int n)
 {
     const int index8= scan8[n];
     const int left = sl->non_zero_count_cache[index8 - 1];
@@ -360,7 +361,7 @@ av_cold void ff_h264_decode_init_vlc(void){
          * the packed static coeff_token_vlc table sizes
          * were initialized correctly.
          */
-        assert(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
+        av_assert0(offset == FF_ARRAY_ELEMS(coeff_token_vlc_tables));
 
         for(i=0; i<3; i++){
             chroma_dc_total_zeros_vlc[i].table = chroma_dc_total_zeros_vlc_tables[i];
@@ -442,7 +443,7 @@ static inline int get_level_prefix(GetBitContext *gb){
  * @param max_coeff number of coefficients in the block
  * @return <0 if an error occurred
  */
-static int decode_residual(H264Context *h, H264SliceContext *sl,
+static int decode_residual(const H264Context *h, H264SliceContext *sl,
                            GetBitContext *gb, int16_t *block, int n,
                            const uint8_t *scantable, const uint32_t *qmul,
                            int max_coeff)
@@ -477,13 +478,13 @@ static int decode_residual(H264Context *h, H264SliceContext *sl,
     if(total_coeff==0)
         return 0;
     if(total_coeff > (unsigned)max_coeff) {
-        av_log(h->avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", h->mb_x, h->mb_y, total_coeff);
+        av_log(h->avctx, AV_LOG_ERROR, "corrupted macroblock %d %d (total_coeff=%d)\n", sl->mb_x, sl->mb_y, total_coeff);
         return -1;
     }
 
     trailing_ones= coeff_token&3;
     tprintf(h->avctx, "trailing:%d, total:%d\n", trailing_ones, total_coeff);
-    assert(total_coeff<=16);
+    av_assert2(total_coeff<=16);
 
     i = show_bits(gb, 3);
     skip_bits(gb, trailing_ones);
@@ -515,7 +516,7 @@ static int decode_residual(H264Context *h, H264SliceContext *sl,
                 else
                     level_code= prefix + get_bits(gb, 4); //part
             }else{
-                level_code= 30 + get_bits(gb, prefix-3); //part
+                level_code= 30;
                 if(prefix>=16){
                     if(prefix > 25+3){
                         av_log(h->avctx, AV_LOG_ERROR, "Invalid level prefix\n");
@@ -523,6 +524,7 @@ static int decode_residual(H264Context *h, H264SliceContext *sl,
                     }
                     level_code += (1<<(prefix-3))-4096;
                 }
+                level_code += get_bits(gb, prefix-3); //part
             }
 
             if(trailing_ones < 3) level_code += 2;
@@ -552,9 +554,15 @@ static int decode_residual(H264Context *h, H264SliceContext *sl,
                 if(prefix<15){
                     level_code = (prefix<<suffix_length) + get_bits(gb, suffix_length);
                 }else{
-                    level_code = (15<<suffix_length) + get_bits(gb, prefix-3);
-                    if(prefix>=16)
+                    level_code = 15<<suffix_length;
+                    if (prefix>=16) {
+                        if(prefix > 25+3){
+                            av_log(h->avctx, AV_LOG_ERROR, "Invalid level prefix\n");
+                            return AVERROR_INVALIDDATA;
+                        }
                         level_code += (1<<(prefix-3))-4096;
+                    }
+                    level_code += get_bits(gb, prefix-3);
                 }
                 mask= -(level_code&1);
                 level_code= (((2+level_code)>>1) ^ mask) - mask;
@@ -569,13 +577,13 @@ static int decode_residual(H264Context *h, H264SliceContext *sl,
     else{
         if (max_coeff <= 8) {
             if (max_coeff == 4)
-                zeros_left = get_vlc2(gb, chroma_dc_total_zeros_vlc[total_coeff - 1].table,
+                zeros_left = get_vlc2(gb, (chroma_dc_total_zeros_vlc-1)[total_coeff].table,
                                       CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1);
             else
-                zeros_left = get_vlc2(gb, chroma422_dc_total_zeros_vlc[total_coeff - 1].table,
+                zeros_left = get_vlc2(gb, (chroma422_dc_total_zeros_vlc-1)[total_coeff].table,
                                       CHROMA422_DC_TOTAL_ZEROS_VLC_BITS, 1);
         } else {
-            zeros_left= get_vlc2(gb, total_zeros_vlc[total_coeff - 1].table, TOTAL_ZEROS_VLC_BITS, 1);
+            zeros_left= get_vlc2(gb, (total_zeros_vlc-1)[ total_coeff ].table, TOTAL_ZEROS_VLC_BITS, 1);
         }
     }
 
@@ -585,7 +593,7 @@ static int decode_residual(H264Context *h, H264SliceContext *sl,
         ((type*)block)[*scantable] = level[0]; \
         for(i=1;i<total_coeff && zeros_left > 0;i++) { \
             if(zeros_left < 7) \
-                run_before= get_vlc2(gb, run_vlc[zeros_left - 1].table, RUN_VLC_BITS, 1); \
+                run_before= get_vlc2(gb, (run_vlc-1)[zeros_left].table, RUN_VLC_BITS, 1); \
             else \
                 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
             zeros_left -= run_before; \
@@ -600,7 +608,7 @@ static int decode_residual(H264Context *h, H264SliceContext *sl,
         ((type*)block)[*scantable] = ((int)(level[0] * qmul[*scantable] + 32))>>6; \
         for(i=1;i<total_coeff && zeros_left > 0;i++) { \
             if(zeros_left < 7) \
-                run_before= get_vlc2(gb, run_vlc[zeros_left - 1].table, RUN_VLC_BITS, 1); \
+                run_before= get_vlc2(gb, (run_vlc-1)[zeros_left].table, RUN_VLC_BITS, 1); \
             else \
                 run_before= get_vlc2(gb, run7_vlc.table, RUN7_VLC_BITS, 2); \
             zeros_left -= run_before; \
@@ -613,40 +621,44 @@ static int decode_residual(H264Context *h, H264SliceContext *sl,
         } \
     }
 
-    if (zeros_left < 0) {
-        av_log(h->avctx, AV_LOG_ERROR,
-               "negative number of zero coeffs at %d %d\n", h->mb_x, h->mb_y);
-        return AVERROR_INVALIDDATA;
-    }
-
     if (h->pixel_shift) {
         STORE_BLOCK(int32_t)
     } else {
         STORE_BLOCK(int16_t)
     }
 
+    if(zeros_left<0){
+        av_log(h->avctx, AV_LOG_ERROR, "negative number of zero coeffs at %d %d\n", sl->mb_x, sl->mb_y);
+        return -1;
+    }
+
     return 0;
 }
 
-static av_always_inline int decode_luma_residual(H264Context *h, H264SliceContext *sl, GetBitContext *gb, const uint8_t *scan, const uint8_t *scan8x8, int pixel_shift, int mb_type, int cbp, int p){
+static av_always_inline
+int decode_luma_residual(const H264Context *h, H264SliceContext *sl,
+                         GetBitContext *gb, const uint8_t *scan,
+                         const uint8_t *scan8x8, int pixel_shift,
+                         int mb_type, int cbp, int p)
+{
     int i4x4, i8x8;
     int qscale = p == 0 ? sl->qscale : sl->chroma_qp[p - 1];
     if(IS_INTRA16x16(mb_type)){
-        AV_ZERO128(h->mb_luma_dc[p]+0);
-        AV_ZERO128(h->mb_luma_dc[p]+8);
-        AV_ZERO128(h->mb_luma_dc[p]+16);
-        AV_ZERO128(h->mb_luma_dc[p]+24);
-        if( decode_residual(h, sl, h->intra_gb_ptr, h->mb_luma_dc[p], LUMA_DC_BLOCK_INDEX+p, scan, NULL, 16) < 0){
+        AV_ZERO128(sl->mb_luma_dc[p]+0);
+        AV_ZERO128(sl->mb_luma_dc[p]+8);
+        AV_ZERO128(sl->mb_luma_dc[p]+16);
+        AV_ZERO128(sl->mb_luma_dc[p]+24);
+        if (decode_residual(h, sl, gb, sl->mb_luma_dc[p], LUMA_DC_BLOCK_INDEX + p, scan, NULL, 16) < 0) {
             return -1; //FIXME continue if partitioned and other return -1 too
         }
 
-        assert((cbp&15) == 0 || (cbp&15) == 15);
+        av_assert2((cbp&15) == 0 || (cbp&15) == 15);
 
         if(cbp&15){
             for(i8x8=0; i8x8<4; i8x8++){
                 for(i4x4=0; i4x4<4; i4x4++){
                     const int index= i4x4 + 4*i8x8 + p*16;
-                    if( decode_residual(h, sl, h->intra_gb_ptr, h->mb + (16*index << pixel_shift),
+                    if( decode_residual(h, sl, gb, sl->mb + (16*index << pixel_shift),
                         index, scan + 1, h->dequant4_coeff[p][qscale], 15) < 0 ){
                         return -1;
                     }
@@ -664,7 +676,7 @@ static av_always_inline int decode_luma_residual(H264Context *h, H264SliceContex
         for(i8x8=0; i8x8<4; i8x8++){
             if(cbp & (1<<i8x8)){
                 if(IS_8x8DCT(mb_type)){
-                    int16_t *buf = &h->mb[64*i8x8+256*p << pixel_shift];
+                    int16_t *buf = &sl->mb[64*i8x8+256*p << pixel_shift];
                     uint8_t *nnz;
                     for(i4x4=0; i4x4<4; i4x4++){
                         const int index= i4x4 + 4*i8x8 + p*16;
@@ -678,7 +690,7 @@ static av_always_inline int decode_luma_residual(H264Context *h, H264SliceContex
                 }else{
                     for(i4x4=0; i4x4<4; i4x4++){
                         const int index= i4x4 + 4*i8x8 + p*16;
-                        if( decode_residual(h, sl, gb, h->mb + (16*index << pixel_shift), index,
+                        if( decode_residual(h, sl, gb, sl->mb + (16*index << pixel_shift), index,
                                             scan, h->dequant4_coeff[cqm][qscale], 16) < 0 ){
                             return -1;
                         }
@@ -694,7 +706,7 @@ static av_always_inline int decode_luma_residual(H264Context *h, H264SliceContex
     }
 }
 
-int ff_h264_decode_mb_cavlc(H264Context *h, H264SliceContext *sl)
+int ff_h264_decode_mb_cavlc(const H264Context *h, H264SliceContext *sl)
 {
     int mb_xy;
     int partition_count;
@@ -702,33 +714,34 @@ int ff_h264_decode_mb_cavlc(H264Context *h, H264SliceContext *sl)
     int dct8x8_allowed= h->pps.transform_8x8_mode;
     int decode_chroma = h->sps.chroma_format_idc == 1 || h->sps.chroma_format_idc == 2;
     const int pixel_shift = h->pixel_shift;
+    unsigned local_ref_count[2];
 
-    mb_xy = h->mb_xy = h->mb_x + h->mb_y*h->mb_stride;
+    mb_xy = sl->mb_xy = sl->mb_x + sl->mb_y*h->mb_stride;
 
-    tprintf(h->avctx, "pic:%d mb:%d/%d\n", h->frame_num, h->mb_x, h->mb_y);
+    tprintf(h->avctx, "pic:%d mb:%d/%d\n", h->frame_num, sl->mb_x, sl->mb_y);
     cbp = 0; /* avoid warning. FIXME: find a solution without slowing
                 down the code */
     if (sl->slice_type_nos != AV_PICTURE_TYPE_I) {
-        if(h->mb_skip_run==-1)
-            h->mb_skip_run= get_ue_golomb(&h->gb);
+        if (sl->mb_skip_run == -1)
+            sl->mb_skip_run = get_ue_golomb_long(&sl->gb);
 
-        if (h->mb_skip_run--) {
-            if(FRAME_MBAFF(h) && (h->mb_y&1) == 0){
-                if(h->mb_skip_run==0)
-                    h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&h->gb);
+        if (sl->mb_skip_run--) {
+            if (FRAME_MBAFF(h) && (sl->mb_y & 1) == 0) {
+                if (sl->mb_skip_run == 0)
+                    sl->mb_mbaff = sl->mb_field_decoding_flag = get_bits1(&sl->gb);
             }
             decode_mb_skip(h, sl);
             return 0;
         }
     }
     if (FRAME_MBAFF(h)) {
-        if( (h->mb_y&1) == 0 )
-            h->mb_mbaff = h->mb_field_decoding_flag = get_bits1(&h->gb);
+        if ((sl->mb_y & 1) == 0)
+            sl->mb_mbaff = sl->mb_field_decoding_flag = get_bits1(&sl->gb);
     }
 
     sl->prev_mb_skipped = 0;
 
-    mb_type= get_ue_golomb(&h->gb);
+    mb_type= get_ue_golomb(&sl->gb);
     if (sl->slice_type_nos == AV_PICTURE_TYPE_B) {
         if(mb_type < 23){
             partition_count= b_mb_type_info[mb_type].partition_count;
@@ -746,12 +759,12 @@ int ff_h264_decode_mb_cavlc(H264Context *h, H264SliceContext *sl)
             goto decode_intra_mb;
         }
     }else{
-       assert(sl->slice_type_nos == AV_PICTURE_TYPE_I);
+       av_assert2(sl->slice_type_nos == AV_PICTURE_TYPE_I);
         if (sl->slice_type == AV_PICTURE_TYPE_SI && mb_type)
             mb_type--;
 decode_intra_mb:
         if(mb_type > 25){
-            av_log(h->avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_picture_type_char(sl->slice_type), h->mb_x, h->mb_y);
+            av_log(h->avctx, AV_LOG_ERROR, "mb_type %d in %c slice too large at %d %d\n", mb_type, av_get_picture_type_char(sl->slice_type), sl->mb_x, sl->mb_y);
             return -1;
         }
         partition_count=0;
@@ -760,7 +773,7 @@ decode_intra_mb:
         mb_type= i_mb_type_info[mb_type].type;
     }
 
-    if(MB_FIELD(h))
+    if (MB_FIELD(sl))
         mb_type |= MB_TYPE_INTERLACED;
 
     h->slice_table[mb_xy] = sl->slice_num;
@@ -770,12 +783,12 @@ decode_intra_mb:
                             h->sps.bit_depth_luma;
 
         // We assume these blocks are very rare so we do not optimize it.
-        h->intra_pcm_ptr = align_get_bits(&h->gb);
-        if (get_bits_left(&h->gb) < mb_size) {
+        sl->intra_pcm_ptr = align_get_bits(&sl->gb);
+        if (get_bits_left(&sl->gb) < mb_size) {
             av_log(h->avctx, AV_LOG_ERROR, "Not enough data for an intra PCM block.\n");
             return AVERROR_INVALIDDATA;
         }
-        skip_bits_long(&h->gb, mb_size);
+        skip_bits_long(&sl->gb, mb_size);
 
         // In deblocking, the quantizer is 0
         h->cur_pic.qscale_table[mb_xy] = 0;
@@ -786,6 +799,9 @@ decode_intra_mb:
         return 0;
     }
 
+    local_ref_count[0] = sl->ref_count[0] << MB_MBAFF(sl);
+    local_ref_count[1] = sl->ref_count[1] << MB_MBAFF(sl);
+
     fill_decode_neighbors(h, sl, mb_type);
     fill_decode_caches(h, sl, mb_type);
 
@@ -796,7 +812,7 @@ decode_intra_mb:
         if(IS_INTRA4x4(mb_type)){
             int i;
             int di = 1;
-            if(dct8x8_allowed && get_bits1(&h->gb)){
+            if(dct8x8_allowed && get_bits1(&sl->gb)){
                 mb_type |= MB_TYPE_8x8DCT;
                 di = 4;
             }
@@ -805,8 +821,8 @@ decode_intra_mb:
             for(i=0; i<16; i+=di){
                 int mode = pred_intra_mode(h, sl, i);
 
-                if(!get_bits1(&h->gb)){
-                    const int rem_mode= get_bits(&h->gb, 3);
+                if(!get_bits1(&sl->gb)){
+                    const int rem_mode= get_bits(&sl->gb, 3);
                     mode = rem_mode + (rem_mode >= mode);
                 }
 
@@ -824,7 +840,7 @@ decode_intra_mb:
                 return -1;
         }
         if(decode_chroma){
-            pred_mode= ff_h264_check_intra_pred_mode(h, sl, get_ue_golomb_31(&h->gb), 1);
+            pred_mode= ff_h264_check_intra_pred_mode(h, sl, get_ue_golomb_31(&sl->gb), 1);
             if(pred_mode < 0)
                 return -1;
             sl->chroma_pred_mode = pred_mode;
@@ -836,15 +852,15 @@ decode_intra_mb:
 
         if (sl->slice_type_nos == AV_PICTURE_TYPE_B) {
             for(i=0; i<4; i++){
-                h->sub_mb_type[i]= get_ue_golomb_31(&h->gb);
-                if(h->sub_mb_type[i] >=13){
-                    av_log(h->avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], h->mb_x, h->mb_y);
+                sl->sub_mb_type[i]= get_ue_golomb_31(&sl->gb);
+                if(sl->sub_mb_type[i] >=13){
+                    av_log(h->avctx, AV_LOG_ERROR, "B sub_mb_type %u out of range at %d %d\n", sl->sub_mb_type[i], sl->mb_x, sl->mb_y);
                     return -1;
                 }
-                sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
-                h->sub_mb_type[i]=      b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
+                sub_partition_count[i]= b_sub_mb_type_info[ sl->sub_mb_type[i] ].partition_count;
+                sl->sub_mb_type[i]=      b_sub_mb_type_info[ sl->sub_mb_type[i] ].type;
             }
-            if( IS_DIRECT(h->sub_mb_type[0]|h->sub_mb_type[1]|h->sub_mb_type[2]|h->sub_mb_type[3])) {
+            if( IS_DIRECT(sl->sub_mb_type[0]|sl->sub_mb_type[1]|sl->sub_mb_type[2]|sl->sub_mb_type[3])) {
                 ff_h264_pred_direct_motion(h, sl, &mb_type);
                 sl->ref_cache[0][scan8[4]] =
                 sl->ref_cache[1][scan8[4]] =
@@ -852,30 +868,30 @@ decode_intra_mb:
                 sl->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
             }
         }else{
-            assert(sl->slice_type_nos == AV_PICTURE_TYPE_P); //FIXME SP correct ?
+            av_assert2(sl->slice_type_nos == AV_PICTURE_TYPE_P); //FIXME SP correct ?
             for(i=0; i<4; i++){
-                h->sub_mb_type[i]= get_ue_golomb_31(&h->gb);
-                if(h->sub_mb_type[i] >=4){
-                    av_log(h->avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", h->sub_mb_type[i], h->mb_x, h->mb_y);
+                sl->sub_mb_type[i]= get_ue_golomb_31(&sl->gb);
+                if(sl->sub_mb_type[i] >=4){
+                    av_log(h->avctx, AV_LOG_ERROR, "P sub_mb_type %u out of range at %d %d\n", sl->sub_mb_type[i], sl->mb_x, sl->mb_y);
                     return -1;
                 }
-                sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
-                h->sub_mb_type[i]=      p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
+                sub_partition_count[i]= p_sub_mb_type_info[ sl->sub_mb_type[i] ].partition_count;
+                sl->sub_mb_type[i]=      p_sub_mb_type_info[ sl->sub_mb_type[i] ].type;
             }
         }
 
-        for(list=0; list<h->list_count; list++){
-            int ref_count = IS_REF0(mb_type) ? 1 : h->ref_count[list] << MB_MBAFF(h);
+        for (list = 0; list < sl->list_count; list++) {
+            int ref_count = IS_REF0(mb_type) ? 1 : local_ref_count[list];
             for(i=0; i<4; i++){
-                if(IS_DIRECT(h->sub_mb_type[i])) continue;
-                if(IS_DIR(h->sub_mb_type[i], 0, list)){
+                if(IS_DIRECT(sl->sub_mb_type[i])) continue;
+                if(IS_DIR(sl->sub_mb_type[i], 0, list)){
                     unsigned int tmp;
                     if(ref_count == 1){
                         tmp= 0;
                     }else if(ref_count == 2){
-                        tmp= get_bits1(&h->gb)^1;
+                        tmp= get_bits1(&sl->gb)^1;
                     }else{
-                        tmp= get_ue_golomb_31(&h->gb);
+                        tmp= get_ue_golomb_31(&sl->gb);
                         if(tmp>=ref_count){
                             av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", tmp);
                             return -1;
@@ -890,27 +906,27 @@ decode_intra_mb:
         }
 
         if(dct8x8_allowed)
-            dct8x8_allowed = get_dct8x8_allowed(h);
+            dct8x8_allowed = get_dct8x8_allowed(h, sl);
 
-        for(list=0; list<h->list_count; list++){
+        for (list = 0; list < sl->list_count; list++) {
             for(i=0; i<4; i++){
-                if(IS_DIRECT(h->sub_mb_type[i])) {
+                if(IS_DIRECT(sl->sub_mb_type[i])) {
                     sl->ref_cache[list][ scan8[4*i] ] = sl->ref_cache[list][ scan8[4*i]+1 ];
                     continue;
                 }
                 sl->ref_cache[list][ scan8[4*i]   ]=sl->ref_cache[list][ scan8[4*i]+1 ]=
                 sl->ref_cache[list][ scan8[4*i]+8 ]=sl->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
 
-                if(IS_DIR(h->sub_mb_type[i], 0, list)){
-                    const int sub_mb_type= h->sub_mb_type[i];
+                if(IS_DIR(sl->sub_mb_type[i], 0, list)){
+                    const int sub_mb_type= sl->sub_mb_type[i];
                     const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
                     for(j=0; j<sub_partition_count[i]; j++){
                         int mx, my;
                         const int index= 4*i + block_width*j;
                         int16_t (* mv_cache)[2]= &sl->mv_cache[list][ scan8[index] ];
                         pred_motion(h, sl, index, block_width, list, sl->ref_cache[list][ scan8[index] ], &mx, &my);
-                        mx += get_se_golomb(&h->gb);
-                        my += get_se_golomb(&h->gb);
+                        mx += get_se_golomb(&sl->gb);
+                        my += get_se_golomb(&sl->gb);
                         tprintf(h->avctx, "final mv:%d %d\n", mx, my);
 
                         if(IS_SUB_8X8(sub_mb_type)){
@@ -942,17 +958,16 @@ decode_intra_mb:
         int list, mx, my, i;
          //FIXME we should set ref_idx_l? to 0 if we use that later ...
         if(IS_16X16(mb_type)){
-            for(list=0; list<h->list_count; list++){
+            for (list = 0; list < sl->list_count; list++) {
                     unsigned int val;
                     if(IS_DIR(mb_type, 0, list)){
-                        int rc = h->ref_count[list] << MB_MBAFF(h);
-                        if (rc == 1) {
+                        if(local_ref_count[list]==1){
                             val= 0;
-                        } else if (rc == 2) {
-                            val= get_bits1(&h->gb)^1;
+                        } else if(local_ref_count[list]==2){
+                            val= get_bits1(&sl->gb)^1;
                         }else{
-                            val= get_ue_golomb_31(&h->gb);
-                            if (val >= rc) {
+                            val= get_ue_golomb_31(&sl->gb);
+                            if (val >= local_ref_count[list]){
                                 av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
                                 return -1;
                             }
@@ -960,11 +975,11 @@ decode_intra_mb:
                     fill_rectangle(&sl->ref_cache[list][ scan8[0] ], 4, 4, 8, val, 1);
                     }
             }
-            for(list=0; list<h->list_count; list++){
+            for (list = 0; list < sl->list_count; list++) {
                 if(IS_DIR(mb_type, 0, list)){
                     pred_motion(h, sl, 0, 4, list, sl->ref_cache[list][ scan8[0] ], &mx, &my);
-                    mx += get_se_golomb(&h->gb);
-                    my += get_se_golomb(&h->gb);
+                    mx += get_se_golomb(&sl->gb);
+                    my += get_se_golomb(&sl->gb);
                     tprintf(h->avctx, "final mv:%d %d\n", mx, my);
 
                     fill_rectangle(sl->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
@@ -972,18 +987,17 @@ decode_intra_mb:
             }
         }
         else if(IS_16X8(mb_type)){
-            for(list=0; list<h->list_count; list++){
+            for (list = 0; list < sl->list_count; list++) {
                     for(i=0; i<2; i++){
                         unsigned int val;
                         if(IS_DIR(mb_type, i, list)){
-                            int rc = h->ref_count[list] << MB_MBAFF(h);
-                            if (rc == 1) {
+                            if(local_ref_count[list] == 1) {
                                 val= 0;
-                            } else if (rc == 2) {
-                                val= get_bits1(&h->gb)^1;
+                            } else if(local_ref_count[list] == 2) {
+                                val= get_bits1(&sl->gb)^1;
                             }else{
-                                val= get_ue_golomb_31(&h->gb);
-                                if (val >= rc) {
+                                val= get_ue_golomb_31(&sl->gb);
+                                if (val >= local_ref_count[list]){
                                     av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
                                     return -1;
                                 }
@@ -993,13 +1007,13 @@ decode_intra_mb:
                         fill_rectangle(&sl->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, val, 1);
                     }
             }
-            for(list=0; list<h->list_count; list++){
+            for (list = 0; list < sl->list_count; list++) {
                 for(i=0; i<2; i++){
                     unsigned int val;
                     if(IS_DIR(mb_type, i, list)){
                         pred_16x8_motion(h, sl, 8*i, list, sl->ref_cache[list][scan8[0] + 16*i], &mx, &my);
-                        mx += get_se_golomb(&h->gb);
-                        my += get_se_golomb(&h->gb);
+                        mx += get_se_golomb(&sl->gb);
+                        my += get_se_golomb(&sl->gb);
                         tprintf(h->avctx, "final mv:%d %d\n", mx, my);
 
                         val= pack16to32(mx,my);
@@ -1009,19 +1023,18 @@ decode_intra_mb:
                 }
             }
         }else{
-            assert(IS_8X16(mb_type));
-            for(list=0; list<h->list_count; list++){
+            av_assert2(IS_8X16(mb_type));
+            for (list = 0; list < sl->list_count; list++) {
                     for(i=0; i<2; i++){
                         unsigned int val;
                         if(IS_DIR(mb_type, i, list)){ //FIXME optimize
-                            int rc = h->ref_count[list] << MB_MBAFF(h);
-                            if (rc == 1) {
+                            if(local_ref_count[list]==1){
                                 val= 0;
-                            } else if (rc == 2) {
-                                val= get_bits1(&h->gb)^1;
+                            } else if(local_ref_count[list]==2){
+                                val= get_bits1(&sl->gb)^1;
                             }else{
-                                val= get_ue_golomb_31(&h->gb);
-                                if (val >= rc) {
+                                val= get_ue_golomb_31(&sl->gb);
+                                if (val >= local_ref_count[list]){
                                     av_log(h->avctx, AV_LOG_ERROR, "ref %u overflow\n", val);
                                     return -1;
                                 }
@@ -1031,13 +1044,13 @@ decode_intra_mb:
                         fill_rectangle(&sl->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, val, 1);
                     }
             }
-            for(list=0; list<h->list_count; list++){
+            for (list = 0; list < sl->list_count; list++) {
                 for(i=0; i<2; i++){
                     unsigned int val;
                     if(IS_DIR(mb_type, i, list)){
                         pred_8x16_motion(h, sl, i*4, list, sl->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
-                        mx += get_se_golomb(&h->gb);
-                        my += get_se_golomb(&h->gb);
+                        mx += get_se_golomb(&sl->gb);
+                        my += get_se_golomb(&sl->gb);
                         tprintf(h->avctx, "final mv:%d %d\n", mx, my);
 
                         val= pack16to32(mx,my);
@@ -1053,29 +1066,34 @@ decode_intra_mb:
         write_back_motion(h, sl, mb_type);
 
     if(!IS_INTRA16x16(mb_type)){
-        cbp= get_ue_golomb(&h->gb);
+        cbp= get_ue_golomb(&sl->gb);
 
         if(decode_chroma){
             if(cbp > 47){
-                av_log(h->avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, h->mb_x, h->mb_y);
+                av_log(h->avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, sl->mb_x, sl->mb_y);
                 return -1;
             }
             if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp[cbp];
             else                     cbp= golomb_to_inter_cbp   [cbp];
         }else{
             if(cbp > 15){
-                av_log(h->avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, h->mb_x, h->mb_y);
+                av_log(h->avctx, AV_LOG_ERROR, "cbp too large (%u) at %d %d\n", cbp, sl->mb_x, sl->mb_y);
                 return -1;
             }
             if(IS_INTRA4x4(mb_type)) cbp= golomb_to_intra4x4_cbp_gray[cbp];
             else                     cbp= golomb_to_inter_cbp_gray[cbp];
         }
+    } else {
+        if (!decode_chroma && cbp>15) {
+            av_log(h->avctx, AV_LOG_ERROR, "gray chroma\n");
+            return AVERROR_INVALIDDATA;
+        }
     }
 
     if(dct8x8_allowed && (cbp&15) && !IS_INTRA(mb_type)){
-        mb_type |= MB_TYPE_8x8DCT*get_bits1(&h->gb);
+        mb_type |= MB_TYPE_8x8DCT*get_bits1(&sl->gb);
     }
-    h->cbp=
+    sl->cbp=
     h->cbp_table[mb_xy]= cbp;
     h->cur_pic.mb_type[mb_xy] = mb_type;
 
@@ -1083,7 +1101,7 @@ decode_intra_mb:
         int i4x4, i8x8, chroma_idx;
         int dquant;
         int ret;
-        GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
+        GetBitContext *gb = &sl->gb;
         const uint8_t *scan, *scan8x8;
         const int max_qp = 51 + 6*(h->sps.bit_depth_luma-8);
 
@@ -1095,7 +1113,7 @@ decode_intra_mb:
             scan    = sl->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
         }
 
-        dquant= get_se_golomb(&h->gb);
+        dquant= get_se_golomb(&sl->gb);
 
         sl->qscale += dquant;
 
@@ -1103,7 +1121,7 @@ decode_intra_mb:
             if (sl->qscale < 0) sl->qscale += max_qp + 1;
             else                sl->qscale -= max_qp+1;
             if (((unsigned)sl->qscale) > max_qp){
-                av_log(h->avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, h->mb_x, h->mb_y);
+                av_log(h->avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, sl->mb_x, sl->mb_y);
                 return -1;
             }
         }
@@ -1122,12 +1140,15 @@ decode_intra_mb:
             if (decode_luma_residual(h, sl, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 2) < 0 ) {
                 return -1;
             }
-        } else if (CHROMA422(h)) {
+        } else {
+            const int num_c8x8 = h->sps.chroma_format_idc;
+
             if(cbp&0x30){
                 for(chroma_idx=0; chroma_idx<2; chroma_idx++)
-                    if (decode_residual(h, sl, gb, h->mb + ((256 + 16*16*chroma_idx) << pixel_shift),
-                                        CHROMA_DC_BLOCK_INDEX+chroma_idx, chroma422_dc_scan,
-                                        NULL, 8) < 0) {
+                    if (decode_residual(h, sl, gb, sl->mb + ((256 + 16*16*chroma_idx) << pixel_shift),
+                                        CHROMA_DC_BLOCK_INDEX+chroma_idx,
+                                        CHROMA422(h) ? chroma422_dc_scan : chroma_dc_scan,
+                                        NULL, 4*num_c8x8) < 0) {
                         return -1;
                     }
             }
@@ -1135,8 +1156,8 @@ decode_intra_mb:
             if(cbp&0x20){
                 for(chroma_idx=0; chroma_idx<2; chroma_idx++){
                     const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][sl->chroma_qp[chroma_idx]];
-                    int16_t *mb = h->mb + (16*(16 + 16*chroma_idx) << pixel_shift);
-                    for (i8x8 = 0; i8x8 < 2; i8x8++) {
+                    int16_t *mb = sl->mb + (16*(16 + 16*chroma_idx) << pixel_shift);
+                    for (i8x8 = 0; i8x8<num_c8x8; i8x8++) {
                         for (i4x4 = 0; i4x4 < 4; i4x4++) {
                             const int index = 16 + 16*chroma_idx + 8*i8x8 + i4x4;
                             if (decode_residual(h, sl, gb, mb, index, scan + 1, qmul, 15) < 0)
@@ -1149,28 +1170,6 @@ decode_intra_mb:
                 fill_rectangle(&sl->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
                 fill_rectangle(&sl->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
             }
-        } else /* yuv420 */ {
-            if(cbp&0x30){
-                for(chroma_idx=0; chroma_idx<2; chroma_idx++)
-                    if( decode_residual(h, sl, gb, h->mb + ((256 + 16*16*chroma_idx) << pixel_shift), CHROMA_DC_BLOCK_INDEX+chroma_idx, chroma_dc_scan, NULL, 4) < 0){
-                        return -1;
-                    }
-            }
-
-            if(cbp&0x20){
-                for(chroma_idx=0; chroma_idx<2; chroma_idx++){
-                    const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][sl->chroma_qp[chroma_idx]];
-                    for(i4x4=0; i4x4<4; i4x4++){
-                        const int index= 16 + 16*chroma_idx + i4x4;
-                        if( decode_residual(h, sl, gb, h->mb + (16*index << pixel_shift), index, scan + 1, qmul, 15) < 0){
-                            return -1;
-                        }
-                    }
-                }
-            }else{
-                fill_rectangle(&sl->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1);
-                fill_rectangle(&sl->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1);
-            }
         }
     }else{
         fill_rectangle(&sl->non_zero_count_cache[scan8[ 0]], 4, 4, 8, 0, 1);