Optimize short-term prediction by reducing index arithmetic.

[ffmpeg] / libavcodec / huffyuv.c
diff --git a/libavcodec/huffyuv.c b/libavcodec/huffyuv.c

index 5ecb138045d41457320f9f63c849e9ccafef5633..4fa6ddf7f8a8c5815411b67cac6ba20a65b0f20a 100644 (file)
--- a/libavcodec/huffyuv.c
+++ b/libavcodec/huffyuv.c
@@ -35,7 +35,7 @@
  
  #define VLC_BITS 11
  
-#ifdef WORDS_BIGENDIAN
+#if HAVE_BIGENDIAN
  #define B 3
  #define G 2
  #define R 1
@@ -129,47 +129,6 @@ static const unsigned char classic_add_chroma[256] = {
      6, 12,  8, 10,  7,  9,  6,  4,  6,  2,  2,  3,  3,  3,  3,  2,
  };
  
-static inline int add_left_prediction(uint8_t *dst, uint8_t *src, int w, int acc){
-    int i;
-
-    for(i=0; i<w-1; i++){
-        acc+= src[i];
-        dst[i]= acc;
-        i++;
-        acc+= src[i];
-        dst[i]= acc;
-    }
-
-    for(; i<w; i++){
-        acc+= src[i];
-        dst[i]= acc;
-    }
-
-    return acc;
-}
-
-static inline void add_left_prediction_bgr32(uint8_t *dst, uint8_t *src, int w, int *red, int *green, int *blue){
-    int i;
-    int r,g,b;
-    r= *red;
-    g= *green;
-    b= *blue;
-
-    for(i=0; i<w; i++){
-        b+= src[4*i+B];
-        g+= src[4*i+G];
-        r+= src[4*i+R];
-
-        dst[4*i+B]= b;
-        dst[4*i+G]= g;
-        dst[4*i+R]= r;
-    }
-
-    *red= r;
-    *green= g;
-    *blue= b;
-}
-
  static inline int sub_left_prediction(HYuvContext *s, uint8_t *dst, uint8_t *src, int w, int left){
      int i;
      if(w<32){
@@ -213,7 +172,7 @@ static inline void sub_left_prediction_bgr32(HYuvContext *s, uint8_t *dst, uint8
      *blue=  src[(w-1)*4+B];
  }
  
-static void read_len_table(uint8_t *dst, GetBitContext *gb){
+static int read_len_table(uint8_t *dst, GetBitContext *gb){
      int i, val, repeat;
  
      for(i=0; i<256;){
@@ -222,9 +181,14 @@ static void read_len_table(uint8_t *dst, GetBitContext *gb){
          if(repeat==0)
              repeat= get_bits(gb, 8);
  //printf("%d %d\n", val, repeat);
+        if(i+repeat > 256) {
+            av_log(NULL, AV_LOG_ERROR, "Error reading huffman table\n");
+            return -1;
+        }
          while (repeat--)
              dst[i++] = val;
      }
+    return 0;
  }
  
  static int generate_bits_table(uint32_t *dst, uint8_t *len_table){
@@ -379,8 +343,8 @@ static int read_huffman_tables(HYuvContext *s, uint8_t *src, int length){
      init_get_bits(&gb, src, length*8);
  
      for(i=0; i<3; i++){
-        read_len_table(s->len[i], &gb);
-
+        if(read_len_table(s->len[i], &gb)<0)
+            return -1;
          if(generate_bits_table(s->bits[i], s->len[i])<0){
              return -1;
          }
@@ -404,9 +368,11 @@ static int read_old_huffman_tables(HYuvContext *s){
      int i;
  
      init_get_bits(&gb, classic_shift_luma, sizeof(classic_shift_luma)*8);
-    read_len_table(s->len[0], &gb);
+    if(read_len_table(s->len[0], &gb)<0)
+        return -1;
      init_get_bits(&gb, classic_shift_chroma, sizeof(classic_shift_chroma)*8);
-    read_len_table(s->len[1], &gb);
+    if(read_len_table(s->len[1], &gb)<0)
+        return -1;
  
      for(i=0; i<256; i++) s->bits[0][i] = classic_add_luma  [i];
      for(i=0; i<256; i++) s->bits[1][i] = classic_add_chroma[i];
@@ -440,9 +406,7 @@ static av_cold void alloc_temp(HYuvContext *s){
              s->temp[i]= av_malloc(s->width + 16);
          }
      }else{
-        for(i=0; i<2; i++){
-            s->temp[i]= av_malloc(4*s->width + 16);
-        }
+        s->temp[0]= av_malloc(4*s->width + 16);
      }
  }
  
@@ -729,9 +693,16 @@ static void decode_422_bitstream(HYuvContext *s, int count){
  
      count/=2;
  
-    for(i=0; i<count; i++){
-        READ_2PIX(s->temp[0][2*i  ], s->temp[1][i], 1);
-        READ_2PIX(s->temp[0][2*i+1], s->temp[2][i], 2);
+    if(count >= (get_bits_left(&s->gb))/(31*4)){
+        for(i=0; i<count && get_bits_count(&s->gb) < s->gb.size_in_bits; i++){
+            READ_2PIX(s->temp[0][2*i  ], s->temp[1][i], 1);
+            READ_2PIX(s->temp[0][2*i+1], s->temp[2][i], 2);
+        }
+    }else{
+        for(i=0; i<count; i++){
+            READ_2PIX(s->temp[0][2*i  ], s->temp[1][i], 1);
+            READ_2PIX(s->temp[0][2*i+1], s->temp[2][i], 2);
+        }
      }
  }
  
@@ -740,8 +711,14 @@ static void decode_gray_bitstream(HYuvContext *s, int count){
  
      count/=2;
  
-    for(i=0; i<count; i++){
-        READ_2PIX(s->temp[0][2*i  ], s->temp[0][2*i+1], 0);
+    if(count >= (get_bits_left(&s->gb))/(31*2)){
+        for(i=0; i<count && get_bits_count(&s->gb) < s->gb.size_in_bits; i++){
+            READ_2PIX(s->temp[0][2*i  ], s->temp[0][2*i+1], 0);
+        }
+    }else{
+        for(i=0; i<count; i++){
+            READ_2PIX(s->temp[0][2*i  ], s->temp[0][2*i+1], 0);
+        }
      }
  }
  
@@ -963,6 +940,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, AVPac
      if (!s->bitstream_buffer)
          return AVERROR(ENOMEM);
  
+    memset(s->bitstream_buffer + buf_size, 0, FF_INPUT_BUFFER_PADDING_SIZE);
      s->dsp.bswap_buf((uint32_t*)s->bitstream_buffer, (const uint32_t*)buf, buf_size/4);
  
      if(p->data[0])
@@ -1015,10 +993,10 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, AVPac
              case LEFT:
              case PLANE:
                  decode_422_bitstream(s, width-2);
-                lefty= add_left_prediction(p->data[0] + 2, s->temp[0], width-2, lefty);
+                lefty= s->dsp.add_hfyu_left_prediction(p->data[0] + 2, s->temp[0], width-2, lefty);
                  if(!(s->flags&CODEC_FLAG_GRAY)){
-                    leftu= add_left_prediction(p->data[1] + 1, s->temp[1], width2-1, leftu);
-                    leftv= add_left_prediction(p->data[2] + 1, s->temp[2], width2-1, leftv);
+                    leftu= s->dsp.add_hfyu_left_prediction(p->data[1] + 1, s->temp[1], width2-1, leftu);
+                    leftv= s->dsp.add_hfyu_left_prediction(p->data[2] + 1, s->temp[2], width2-1, leftv);
                  }
  
                  for(cy=y=1; y<s->height; y++,cy++){
@@ -1029,7 +1007,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, AVPac
  
                          ydst= p->data[0] + p->linesize[0]*y;
  
-                        lefty= add_left_prediction(ydst, s->temp[0], width, lefty);
+                        lefty= s->dsp.add_hfyu_left_prediction(ydst, s->temp[0], width, lefty);
                          if(s->predictor == PLANE){
                              if(y>s->interlaced)
                                  s->dsp.add_bytes(ydst, ydst - fake_ystride, width);
@@ -1045,10 +1023,10 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, AVPac
                      vdst= p->data[2] + p->linesize[2]*cy;
  
                      decode_422_bitstream(s, width);
-                    lefty= add_left_prediction(ydst, s->temp[0], width, lefty);
+                    lefty= s->dsp.add_hfyu_left_prediction(ydst, s->temp[0], width, lefty);
                      if(!(s->flags&CODEC_FLAG_GRAY)){
-                        leftu= add_left_prediction(udst, s->temp[1], width2, leftu);
-                        leftv= add_left_prediction(vdst, s->temp[2], width2, leftv);
+                        leftu= s->dsp.add_hfyu_left_prediction(udst, s->temp[1], width2, leftu);
+                        leftv= s->dsp.add_hfyu_left_prediction(vdst, s->temp[2], width2, leftv);
                      }
                      if(s->predictor == PLANE){
                          if(cy>s->interlaced){
@@ -1066,10 +1044,10 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, AVPac
              case MEDIAN:
                  /* first line except first 2 pixels is left predicted */
                  decode_422_bitstream(s, width-2);
-                lefty= add_left_prediction(p->data[0] + 2, s->temp[0], width-2, lefty);
+                lefty= s->dsp.add_hfyu_left_prediction(p->data[0] + 2, s->temp[0], width-2, lefty);
                  if(!(s->flags&CODEC_FLAG_GRAY)){
-                    leftu= add_left_prediction(p->data[1] + 1, s->temp[1], width2-1, leftu);
-                    leftv= add_left_prediction(p->data[2] + 1, s->temp[2], width2-1, leftv);
+                    leftu= s->dsp.add_hfyu_left_prediction(p->data[1] + 1, s->temp[1], width2-1, leftu);
+                    leftv= s->dsp.add_hfyu_left_prediction(p->data[2] + 1, s->temp[2], width2-1, leftv);
                  }
  
                  cy=y=1;
@@ -1077,20 +1055,20 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, AVPac
                  /* second line is left predicted for interlaced case */
                  if(s->interlaced){
                      decode_422_bitstream(s, width);
-                    lefty= add_left_prediction(p->data[0] + p->linesize[0], s->temp[0], width, lefty);
+                    lefty= s->dsp.add_hfyu_left_prediction(p->data[0] + p->linesize[0], s->temp[0], width, lefty);
                      if(!(s->flags&CODEC_FLAG_GRAY)){
-                        leftu= add_left_prediction(p->data[1] + p->linesize[2], s->temp[1], width2, leftu);
-                        leftv= add_left_prediction(p->data[2] + p->linesize[1], s->temp[2], width2, leftv);
+                        leftu= s->dsp.add_hfyu_left_prediction(p->data[1] + p->linesize[2], s->temp[1], width2, leftu);
+                        leftv= s->dsp.add_hfyu_left_prediction(p->data[2] + p->linesize[1], s->temp[2], width2, leftv);
                      }
                      y++; cy++;
                  }
  
                  /* next 4 pixels are left predicted too */
                  decode_422_bitstream(s, 4);
-                lefty= add_left_prediction(p->data[0] + fake_ystride, s->temp[0], 4, lefty);
+                lefty= s->dsp.add_hfyu_left_prediction(p->data[0] + fake_ystride, s->temp[0], 4, lefty);
                  if(!(s->flags&CODEC_FLAG_GRAY)){
-                    leftu= add_left_prediction(p->data[1] + fake_ustride, s->temp[1], 2, leftu);
-                    leftv= add_left_prediction(p->data[2] + fake_vstride, s->temp[2], 2, leftv);
+                    leftu= s->dsp.add_hfyu_left_prediction(p->data[1] + fake_ustride, s->temp[1], 2, leftu);
+                    leftv= s->dsp.add_hfyu_left_prediction(p->data[2] + fake_vstride, s->temp[2], 2, leftv);
                  }
  
                  /* next line except the first 4 pixels is median predicted */
@@ -1158,12 +1136,12 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, AVPac
              case LEFT:
              case PLANE:
                  decode_bgr_bitstream(s, width-1);
-                add_left_prediction_bgr32(p->data[0] + last_line+4, s->temp[0], width-1, &leftr, &leftg, &leftb);
+                s->dsp.add_hfyu_left_prediction_bgr32(p->data[0] + last_line+4, s->temp[0], width-1, &leftr, &leftg, &leftb);
  
                  for(y=s->height-2; y>=0; y--){ //Yes it is stored upside down.
                      decode_bgr_bitstream(s, width);
  
-                    add_left_prediction_bgr32(p->data[0] + p->linesize[0]*y, s->temp[0], width, &leftr, &leftg, &leftb);
+                    s->dsp.add_hfyu_left_prediction_bgr32(p->data[0] + p->linesize[0]*y, s->temp[0], width, &leftr, &leftg, &leftb);
                      if(s->predictor == PLANE){
                          if((y&s->interlaced)==0 && y<s->height-1-s->interlaced){
                              s->dsp.add_bytes(p->data[0] + p->linesize[0]*y,
@@ -1206,6 +1184,9 @@ static av_cold int decode_end(AVCodecContext *avctx)
      HYuvContext *s = avctx->priv_data;
      int i;
  
+    if (s->picture.data[0])
+        avctx->release_buffer(avctx, &s->picture);
+
      common_end(s);
      av_freep(&s->bitstream_buffer);
  
@@ -1257,11 +1238,11 @@ static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size,
          put_bits(&s->pb, 8, leftu= p->data[1][0]);
          put_bits(&s->pb, 8,        p->data[0][0]);
  
-        lefty= sub_left_prediction(s, s->temp[0], p->data[0]+2, width-2 , lefty);
-        leftu= sub_left_prediction(s, s->temp[1], p->data[1]+1, width2-1, leftu);
-        leftv= sub_left_prediction(s, s->temp[2], p->data[2]+1, width2-1, leftv);
+        lefty= sub_left_prediction(s, s->temp[0], p->data[0], width , 0);
+        leftu= sub_left_prediction(s, s->temp[1], p->data[1], width2, 0);
+        leftv= sub_left_prediction(s, s->temp[2], p->data[2], width2, 0);
  
-        encode_422_bitstream(s, 0, width-2);
+        encode_422_bitstream(s, 2, width-2);
  
          if(s->predictor==MEDIAN){
              int lefttopy, lefttopu, lefttopv;
@@ -1467,7 +1448,7 @@ AVCodec huffyuv_encoder = {
      encode_init,
      encode_frame,
      encode_end,
-    .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV422P, PIX_FMT_RGB32, PIX_FMT_NONE},
+    .pix_fmts= (const enum PixelFormat[]){PIX_FMT_YUV422P, PIX_FMT_RGB32, PIX_FMT_NONE},
      .long_name = NULL_IF_CONFIG_SMALL("Huffyuv / HuffYUV"),
  };
  #endif
@@ -1481,7 +1462,7 @@ AVCodec ffvhuff_encoder = {
      encode_init,
      encode_frame,
      encode_end,
-    .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_YUV422P, PIX_FMT_RGB32, PIX_FMT_NONE},
+    .pix_fmts= (const enum PixelFormat[]){PIX_FMT_YUV420P, PIX_FMT_YUV422P, PIX_FMT_RGB32, PIX_FMT_NONE},
      .long_name = NULL_IF_CONFIG_SMALL("Huffyuv FFmpeg variant"),
  };
  #endif