Simplify H.264 decode_cabac_mb_cbp_luma(), giving a ~0.5% speedup.

[ffmpeg] / libavcodec / snow.c
diff --git a/libavcodec/snow.c b/libavcodec/snow.c

index bf2a3d8666dd0b84e9471b597df7ef2673372a5c..0300e58c7ce5c2af898e2e0579dea78cd692f2f9 100644 (file)
--- a/libavcodec/snow.c
+++ b/libavcodec/snow.c
@@ -454,6 +454,7 @@ typedef struct SnowContext{
      int last_spatial_decomposition_type;
      int temporal_decomposition_type;
      int spatial_decomposition_count;
+    int last_spatial_decomposition_count;
      int temporal_decomposition_count;
      int max_ref_frames;
      int ref_frames;
@@ -1656,7 +1657,7 @@ static void reset_contexts(SnowContext *s){ //FIXME better initial contexts
      int plane_index, level, orientation;
  
      for(plane_index=0; plane_index<3; plane_index++){
-        for(level=0; level<s->spatial_decomposition_count; level++){
+        for(level=0; level<MAX_DECOMPOSITIONS; level++){
              for(orientation=level ? 1:0; orientation<4; orientation++){
                  memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
              }
@@ -3516,8 +3517,21 @@ static void correlate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int
      }
  }
  
+static void encode_qlogs(SnowContext *s){
+    int plane_index, level, orientation;
+
+    for(plane_index=0; plane_index<2; plane_index++){
+        for(level=0; level<s->spatial_decomposition_count; level++){
+            for(orientation=level ? 1:0; orientation<4; orientation++){
+                if(orientation==2) continue;
+                put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
+            }
+        }
+    }
+}
+
  static void encode_header(SnowContext *s){
-    int plane_index, level, orientation, i;
+    int plane_index, i;
      uint8_t kstate[32];
  
      memset(kstate, MID_STATE, sizeof(kstate));
@@ -3550,14 +3564,7 @@ static void encode_header(SnowContext *s){
  //        put_rac(&s->c, s->header_state, s->rate_scalability);
          put_symbol(&s->c, s->header_state, s->max_ref_frames-1, 0);
  
-        for(plane_index=0; plane_index<2; plane_index++){
-            for(level=0; level<s->spatial_decomposition_count; level++){
-                for(orientation=level ? 1:0; orientation<4; orientation++){
-                    if(orientation==2) continue;
-                    put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
-                }
-            }
-        }
+        encode_qlogs(s);
      }
  
      if(!s->keyframe){
@@ -3568,8 +3575,7 @@ static void encode_header(SnowContext *s){
              update_mc |= p->last_diag_mc != p->diag_mc;
              update_mc |= !!memcmp(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
          }
-        if(!s->always_reset)
-            put_rac(&s->c, s->header_state, update_mc);
+        put_rac(&s->c, s->header_state, update_mc);
          if(update_mc){
              for(plane_index=0; plane_index<2; plane_index++){
                  Plane *p= &s->plane[plane_index];
@@ -3583,6 +3589,12 @@ static void encode_header(SnowContext *s){
                  memcpy(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
              }
          }
+        if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){
+            put_rac(&s->c, s->header_state, 1);
+            put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
+            encode_qlogs(s);
+        }else
+            put_rac(&s->c, s->header_state, 0);
      }
  
      put_symbol(&s->c, s->header_state, s->spatial_decomposition_type - s->last_spatial_decomposition_type, 1);
@@ -3596,10 +3608,27 @@ static void encode_header(SnowContext *s){
      s->last_qbias                     = s->qbias;
      s->last_mv_scale                  = s->mv_scale;
      s->last_block_max_depth           = s->block_max_depth;
+    s->last_spatial_decomposition_count= s->spatial_decomposition_count;
  }
  
-static int decode_header(SnowContext *s){
+static void decode_qlogs(SnowContext *s){
      int plane_index, level, orientation;
+
+    for(plane_index=0; plane_index<3; plane_index++){
+        for(level=0; level<s->spatial_decomposition_count; level++){
+            for(orientation=level ? 1:0; orientation<4; orientation++){
+                int q;
+                if     (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
+                else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
+                else                    q= get_symbol(&s->c, s->header_state, 1);
+                s->plane[plane_index].band[level][orientation].qlog= q;
+            }
+        }
+    }
+}
+
+static int decode_header(SnowContext *s){
+    int plane_index;
      uint8_t kstate[32];
  
      memset(kstate, MID_STATE, sizeof(kstate));
@@ -3630,21 +3659,11 @@ static int decode_header(SnowContext *s){
  //        s->rate_scalability= get_rac(&s->c, s->header_state);
          s->max_ref_frames= get_symbol(&s->c, s->header_state, 0)+1;
  
-        for(plane_index=0; plane_index<3; plane_index++){
-            for(level=0; level<s->spatial_decomposition_count; level++){
-                for(orientation=level ? 1:0; orientation<4; orientation++){
-                    int q;
-                    if     (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
-                    else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
-                    else                    q= get_symbol(&s->c, s->header_state, 1);
-                    s->plane[plane_index].band[level][orientation].qlog= q;
-                }
-            }
-        }
+        decode_qlogs(s);
      }
  
      if(!s->keyframe){
-        if(s->always_reset || get_rac(&s->c, s->header_state)){
+        if(get_rac(&s->c, s->header_state)){
              for(plane_index=0; plane_index<2; plane_index++){
                  int htaps, i, sum=0, absum=0;
                  Plane *p= &s->plane[plane_index];
@@ -3663,6 +3682,10 @@ static int decode_header(SnowContext *s){
              s->plane[2].htaps  = s->plane[1].htaps;
              memcpy(s->plane[2].hcoeff, s->plane[1].hcoeff, sizeof(s->plane[1].hcoeff));
          }
+        if(get_rac(&s->c, s->header_state)){
+            s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0);
+            decode_qlogs(s);
+        }
      }
  
      s->spatial_decomposition_type+= get_symbol(&s->c, s->header_state, 1);
@@ -3697,7 +3720,6 @@ static void init_qexp(void){
  static int common_init(AVCodecContext *avctx){
      SnowContext *s = avctx->priv_data;
      int width, height;
-    int level, orientation, plane_index;
      int i, j;
  
      s->avctx= avctx;
@@ -3745,12 +3767,6 @@ static int common_init(AVCodecContext *avctx){
      if(!qexp[0])
          init_qexp();
  
-    s->spatial_decomposition_count= 5;
-    s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
-
-    s->chroma_h_shift= 1; //FIXME XXX
-    s->chroma_v_shift= 1;
-
  //    dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
  
      width= s->avctx->width;
@@ -3759,8 +3775,18 @@ static int common_init(AVCodecContext *avctx){
      s->spatial_idwt_buffer= av_mallocz(width*height*sizeof(IDWTELEM));
      s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM)); //FIXME this doesnt belong here
  
-    s->mv_scale= (s->avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
-    s->block_max_depth= (s->avctx->flags & CODEC_FLAG_4MV) ? 1 : 0;
+    for(i=0; i<MAX_REF_FRAMES; i++)
+        for(j=0; j<MAX_REF_FRAMES; j++)
+            scale_mv_ref[i][j] = 256*(i+1)/(j+1);
+
+    s->avctx->get_buffer(s->avctx, &s->mconly_picture);
+
+    return 0;
+}
+
+static int common_init_after_header(AVCodecContext *avctx){
+    SnowContext *s = avctx->priv_data;
+    int plane_index, level, orientation;
  
      for(plane_index=0; plane_index<3; plane_index++){
          int w= s->avctx->width;
@@ -3773,13 +3799,6 @@ static int common_init(AVCodecContext *avctx){
          s->plane[plane_index].width = w;
          s->plane[plane_index].height= h;
  
-        s->plane[plane_index].diag_mc= 1;
-        s->plane[plane_index].htaps= 6;
-        s->plane[plane_index].hcoeff[0]=  40;
-        s->plane[plane_index].hcoeff[1]= -10;
-        s->plane[plane_index].hcoeff[2]=   2;
-        s->plane[plane_index].fast_mc= 1;
-
  //av_log(NULL, AV_LOG_DEBUG, "%d %d\n", w, h);
          for(level=s->spatial_decomposition_count-1; level>=0; level--){
              for(orientation=level ? 1 : 0; orientation<4; orientation++){
@@ -3807,6 +3826,8 @@ static int common_init(AVCodecContext *avctx){
  
                  if(level)
                      b->parent= &s->plane[plane_index].band[level-1][orientation];
+                //FIXME avoid this realloc
+                av_freep(&b->x_coeff);
                  b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff));
              }
              w= (w+1)>>1;
@@ -3814,19 +3835,6 @@ static int common_init(AVCodecContext *avctx){
          }
      }
  
-    for(i=0; i<MAX_REF_FRAMES; i++)
-        for(j=0; j<MAX_REF_FRAMES; j++)
-            scale_mv_ref[i][j] = 256*(i+1)/(j+1);
-
-    reset_contexts(s);
-/*
-    width= s->width= avctx->width;
-    height= s->height= avctx->height;
-
-    assert(width && height);
-*/
-    s->avctx->get_buffer(s->avctx, &s->mconly_picture);
-
      return 0;
  }
  
@@ -3932,6 +3940,23 @@ static int encode_init(AVCodecContext *avctx)
          return -1;
      }
  
+    s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
+
+    s->chroma_h_shift= 1; //FIXME XXX
+    s->chroma_v_shift= 1;
+
+    s->mv_scale       = (avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
+    s->block_max_depth= (avctx->flags & CODEC_FLAG_4MV ) ? 1 : 0;
+
+    for(plane_index=0; plane_index<3; plane_index++){
+        s->plane[plane_index].diag_mc= 1;
+        s->plane[plane_index].htaps= 6;
+        s->plane[plane_index].hcoeff[0]=  40;
+        s->plane[plane_index].hcoeff[1]= -10;
+        s->plane[plane_index].hcoeff[2]=   2;
+        s->plane[plane_index].fast_mc= 1;
+    }
+
      common_init(avctx);
      alloc_blocks(s);
  
@@ -3959,11 +3984,6 @@ static int encode_init(AVCodecContext *avctx)
      }
      s->pass1_rc= !(avctx->flags & (CODEC_FLAG_QSCALE|CODEC_FLAG_PASS2));
  
-    for(plane_index=0; plane_index<3; plane_index++){
-        calculate_vissual_weight(s, &s->plane[plane_index]);
-    }
-
-
      avctx->coded_frame= &s->current_picture;
      switch(avctx->pix_fmt){
  //    case PIX_FMT_YUV444P:
@@ -4189,9 +4209,22 @@ static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size,
  
  redo_frame:
  
+    if(pict->pict_type == I_TYPE)
+        s->spatial_decomposition_count= 5;
+    else
+        s->spatial_decomposition_count= 5;
+
      s->m.pict_type = pict->pict_type;
      s->qbias= pict->pict_type == P_TYPE ? 2 : 0;
  
+    common_init_after_header(avctx);
+
+    if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){
+        for(plane_index=0; plane_index<3; plane_index++){
+            calculate_vissual_weight(s, &s->plane[plane_index]);
+        }
+    }
+
      encode_header(s);
      s->m.misc_bits = 8*(s->c.bytestream - s->c.bytestream_start);
      encode_blocks(s, 1);
@@ -4414,7 +4447,9 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8
      ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
  
      s->current_picture.pict_type= FF_I_TYPE; //FIXME I vs. P
-    decode_header(s);
+    if(decode_header(s)<0)
+        return -1;
+    common_init_after_header(avctx);
  
      // realloc slice buffer for the case that spatial_decomposition_count changed
      slice_buffer_destroy(&s->sb);