+ if (zeros_left < 0) {
+ av_log(h->avctx, AV_LOG_ERROR,
+ "negative number of zero coeffs at %d %d\n", sl->mb_x, sl->mb_y);
+ return AVERROR_INVALIDDATA;
+ }
+
+ if (h->pixel_shift) {
+ STORE_BLOCK(int32_t)
+ } else {
+ STORE_BLOCK(int16_t)
+ }
+
+ return 0;
+}
+
+static av_always_inline
+int decode_luma_residual(const H264Context *h, H264SliceContext *sl,
+ GetBitContext *gb, const uint8_t *scan,
+ const uint8_t *scan8x8, int pixel_shift,
+ int mb_type, int cbp, int p)
+{
+ int i4x4, i8x8;
+ int qscale = p == 0 ? sl->qscale : sl->chroma_qp[p - 1];
+ if(IS_INTRA16x16(mb_type)){
+ AV_ZERO128(sl->mb_luma_dc[p]+0);
+ AV_ZERO128(sl->mb_luma_dc[p]+8);
+ AV_ZERO128(sl->mb_luma_dc[p]+16);
+ AV_ZERO128(sl->mb_luma_dc[p]+24);
+ if (decode_residual(h, sl, gb, sl->mb_luma_dc[p], LUMA_DC_BLOCK_INDEX + p, scan, NULL, 16) < 0) {
+ return -1; //FIXME continue if partitioned and other return -1 too
+ }
+
+ assert((cbp&15) == 0 || (cbp&15) == 15);
+
+ if(cbp&15){
+ for(i8x8=0; i8x8<4; i8x8++){
+ for(i4x4=0; i4x4<4; i4x4++){
+ const int index= i4x4 + 4*i8x8 + p*16;
+ if( decode_residual(h, sl, gb, sl->mb + (16*index << pixel_shift),
+ index, scan + 1, h->ps.pps->dequant4_coeff[p][qscale], 15) < 0 ){
+ return -1;
+ }
+ }
+ }
+ return 0xf;
+ }else{
+ fill_rectangle(&sl->non_zero_count_cache[scan8[p*16]], 4, 4, 8, 0, 1);
+ return 0;