Non Altivec optimizations already present at the top

[ffmpeg] / libavcodec / vp3.c
diff --git a/libavcodec/vp3.c b/libavcodec/vp3.c

index 39df4f59ab5d7c581643847812f2606052398d43..b7c2dcd5f3038f01eb4e8b5106ba40230a8fc858 100644 (file)
--- a/libavcodec/vp3.c
+++ b/libavcodec/vp3.c
@@ -1,18 +1,20 @@
  /*
   * Copyright (C) 2003-2004 the ffmpeg project
   *
- * This library is free software; you can redistribute it and/or
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
   * modify it under the terms of the GNU Lesser General Public
   * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
+ * version 2.1 of the License, or (at your option) any later version.
   *
- * This library is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
   * but WITHOUT ANY WARRANTY; without even the implied warranty of
   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   * Lesser General Public License for more details.
   *
   * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
   * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
   *
   */
@@ -315,7 +317,7 @@ typedef struct Vp3DecodeContext {
      int last_coded_c_fragment;
  
      uint8_t edge_emu_buffer[9*2048]; //FIXME dynamic alloc
-    uint8_t qscale_table[2048]; //FIXME dynamic alloc (width+15)/16
+    int8_t qscale_table[2048]; //FIXME dynamic alloc (width+15)/16
  
      /* Huffman decode */
      int hti;
@@ -344,8 +346,6 @@ static int theora_decode_tables(AVCodecContext *avctx, GetBitContext *gb);
  static int init_block_mapping(Vp3DecodeContext *s)
  {
      int i, j;
-    signed int hilbert_walk_y[16];
-    signed int hilbert_walk_c[16];
      signed int hilbert_walk_mb[4];
  
      int current_fragment = 0;
@@ -384,41 +384,6 @@ static int init_block_mapping(Vp3DecodeContext *s)
  
      debug_vp3("  vp3: initialize block mapping tables\n");
  
-    /* figure out hilbert pattern per these frame dimensions */
-    hilbert_walk_y[0]  = 1;
-    hilbert_walk_y[1]  = 1;
-    hilbert_walk_y[2]  = s->fragment_width;
-    hilbert_walk_y[3]  = -1;
-    hilbert_walk_y[4]  = s->fragment_width;
-    hilbert_walk_y[5]  = s->fragment_width;
-    hilbert_walk_y[6]  = 1;
-    hilbert_walk_y[7]  = -s->fragment_width;
-    hilbert_walk_y[8]  = 1;
-    hilbert_walk_y[9]  = s->fragment_width;
-    hilbert_walk_y[10]  = 1;
-    hilbert_walk_y[11] = -s->fragment_width;
-    hilbert_walk_y[12] = -s->fragment_width;
-    hilbert_walk_y[13] = -1;
-    hilbert_walk_y[14] = -s->fragment_width;
-    hilbert_walk_y[15] = 1;
-
-    hilbert_walk_c[0]  = 1;
-    hilbert_walk_c[1]  = 1;
-    hilbert_walk_c[2]  = s->fragment_width / 2;
-    hilbert_walk_c[3]  = -1;
-    hilbert_walk_c[4]  = s->fragment_width / 2;
-    hilbert_walk_c[5]  = s->fragment_width / 2;
-    hilbert_walk_c[6]  = 1;
-    hilbert_walk_c[7]  = -s->fragment_width / 2;
-    hilbert_walk_c[8]  = 1;
-    hilbert_walk_c[9]  = s->fragment_width / 2;
-    hilbert_walk_c[10]  = 1;
-    hilbert_walk_c[11] = -s->fragment_width / 2;
-    hilbert_walk_c[12] = -s->fragment_width / 2;
-    hilbert_walk_c[13] = -1;
-    hilbert_walk_c[14] = -s->fragment_width / 2;
-    hilbert_walk_c[15] = 1;
-
      hilbert_walk_mb[0] = 1;
      hilbert_walk_mb[1] = s->macroblock_width;
      hilbert_walk_mb[2] = 1;
@@ -439,7 +404,6 @@ static int init_block_mapping(Vp3DecodeContext *s)
              current_height = 0;
              superblock_row_inc = 3 * s->fragment_width -
                  (s->y_superblock_width * 4 - s->fragment_width);
-            hilbert = hilbert_walk_y;
  
              /* the first operation for this variable is to advance by 1 */
              current_fragment = -1;
@@ -453,7 +417,6 @@ static int init_block_mapping(Vp3DecodeContext *s)
              current_height = 0;
              superblock_row_inc = 3 * (s->fragment_width / 2) -
                  (s->c_superblock_width * 4 - s->fragment_width / 2);
-            hilbert = hilbert_walk_c;
  
              /* the first operation for this variable is to advance by 1 */
              current_fragment = s->fragment_start[1] - 1;
@@ -467,7 +430,6 @@ static int init_block_mapping(Vp3DecodeContext *s)
              current_height = 0;
              superblock_row_inc = 3 * (s->fragment_width / 2) -
                  (s->c_superblock_width * 4 - s->fragment_width / 2);
-            hilbert = hilbert_walk_c;
  
              /* the first operation for this variable is to advance by 1 */
              current_fragment = s->fragment_start[2] - 1;
@@ -485,7 +447,7 @@ static int init_block_mapping(Vp3DecodeContext *s)
  
          /* iterate through all 16 fragments in a superblock */
          for (j = 0; j < 16; j++) {
-            current_fragment += hilbert[j];
+            current_fragment += travel_width[j] + right_edge * travel_height[j];
              current_width += travel_width[j];
              current_height += travel_height[j];
  
@@ -647,7 +609,7 @@ static void init_dequantizer(Vp3DecodeContext *s)
  {
      int ac_scale_factor = s->coded_ac_scale_factor[s->quality_index];
      int dc_scale_factor = s->coded_dc_scale_factor[s->quality_index];
-    int i, j, plane, inter, qri, bmi, bmj, qistart;
+    int i, plane, inter, qri, bmi, bmj, qistart;
  
      debug_vp3("  vp3: initializing dequantization tables\n");
  
@@ -1367,10 +1329,7 @@ static void reverse_dc_prediction(Vp3DecodeContext *s,
      int x, y;
      int i = first_fragment;
  
-    short predicted_dc;
-
-    /* validity flags for the left, up-left, up, and up-right fragments */
-    int fl, ful, fu, fur;
+    int predicted_dc;
  
      /* DC values for the left, up-left, up, and up-right fragments */
      int vl, vul, vu, vur;
@@ -1384,26 +1343,24 @@ static void reverse_dc_prediction(Vp3DecodeContext *s,
       *   1: up multiplier
       *   2: up-right multiplier
       *   3: left multiplier
-     *   4: mask
-     *   5: right bit shift divisor (e.g., 7 means >>=7, a.k.a. div by 128)
       */
-    int predictor_transform[16][6] = {
-        {  0,  0,  0,  0,   0,  0 },
-        {  0,  0,  0,  1,   0,  0 },        // PL
-        {  0,  0,  1,  0,   0,  0 },        // PUR
-        {  0,  0, 53, 75, 127,  7 },        // PUR|PL
-        {  0,  1,  0,  0,   0,  0 },        // PU
-        {  0,  1,  0,  1,   1,  1 },        // PU|PL
-        {  0,  1,  0,  0,   0,  0 },        // PU|PUR
-        {  0,  0, 53, 75, 127,  7 },        // PU|PUR|PL
-        {  1,  0,  0,  0,   0,  0 },        // PUL
-        {  0,  0,  0,  1,   0,  0 },        // PUL|PL
-        {  1,  0,  1,  0,   1,  1 },        // PUL|PUR
-        {  0,  0, 53, 75, 127,  7 },        // PUL|PUR|PL
-        {  0,  1,  0,  0,   0,  0 },        // PUL|PU
-        {-26, 29,  0, 29,  31,  5 },        // PUL|PU|PL
-        {  3, 10,  3,  0,  15,  4 },        // PUL|PU|PUR
-        {-26, 29,  0, 29,  31,  5 }         // PUL|PU|PUR|PL
+    int predictor_transform[16][4] = {
+        {  0,  0,  0,  0},
+        {  0,  0,  0,128},        // PL
+        {  0,  0,128,  0},        // PUR
+        {  0,  0, 53, 75},        // PUR|PL
+        {  0,128,  0,  0},        // PU
+        {  0, 64,  0, 64},        // PU|PL
+        {  0,128,  0,  0},        // PU|PUR
+        {  0,  0, 53, 75},        // PU|PUR|PL
+        {128,  0,  0,  0},        // PUL
+        {  0,  0,  0,128},        // PUL|PL
+        { 64,  0, 64,  0},        // PUL|PUR
+        {  0,  0, 53, 75},        // PUL|PUR|PL
+        {  0,128,  0,  0},        // PUL|PU
+       {-104,116,  0,116},        // PUL|PU|PL
+        { 24, 80, 24,  0},        // PUL|PU|PUR
+       {-104,116,  0,116}         // PUL|PU|PUR|PL
      };
  
      /* This table shows which types of blocks can use other blocks for
@@ -1445,32 +1402,32 @@ static void reverse_dc_prediction(Vp3DecodeContext *s,
  
                  current_frame_type =
                      compatible_frame[s->all_fragments[i].coding_method];
-                debug_dc_pred(" frag %d: group %d, orig DC = %d, ",
-                    i, -1, DC_COEFF(i));
+                debug_dc_pred(" frag %d: orig DC = %d, ",
+                    i, DC_COEFF(i));
  
                  transform= 0;
                  if(x){
                      l= i-1;
                      vl = DC_COEFF(l);
-                    fl = FRAME_CODED(l) && COMPATIBLE_FRAME(l);
-                    transform |= fl*PL;
+                    if(FRAME_CODED(l) && COMPATIBLE_FRAME(l))
+                        transform |= PL;
                  }
                  if(y){
                      u= i-fragment_width;
                      vu = DC_COEFF(u);
-                    fu = FRAME_CODED(u) && COMPATIBLE_FRAME(u);
-                    transform |= fu*PU;
+                    if(FRAME_CODED(u) && COMPATIBLE_FRAME(u))
+                        transform |= PU;
                      if(x){
                          ul= i-fragment_width-1;
                          vul = DC_COEFF(ul);
-                        ful = FRAME_CODED(ul) && COMPATIBLE_FRAME(ul);
-                        transform |= ful*PUL;
+                        if(FRAME_CODED(ul) && COMPATIBLE_FRAME(ul))
+                            transform |= PUL;
                      }
                      if(x + 1 < fragment_width){
                          ur= i-fragment_width+1;
                          vur = DC_COEFF(ur);
-                        fur = FRAME_CODED(ur) && COMPATIBLE_FRAME(ur);
-                        transform |= fur*PUR;
+                        if(FRAME_CODED(ur) && COMPATIBLE_FRAME(ur))
+                            transform |= PUR;
                      }
                  }
  
@@ -1493,13 +1450,7 @@ static void reverse_dc_prediction(Vp3DecodeContext *s,
                          (predictor_transform[transform][2] * vur) +
                          (predictor_transform[transform][3] * vl);
  
-                    /* if there is a shift value in the transform, add
-                     * the sign bit before the shift */
-                    if (predictor_transform[transform][5] != 0) {
-                        predicted_dc += ((predicted_dc >> 15) &
-                            predictor_transform[transform][4]);
-                        predicted_dc >>= predictor_transform[transform][5];
-                    }
+                    predicted_dc /= 128;
  
                      /* check for outranging on the [ul u l] and
                       * [ul u ur l] predictors */
@@ -2066,18 +2017,14 @@ static int vp3_decode_init(AVCodecContext *avctx)
  
      if (!s->theora_tables)
      {
-        for (i = 0; i < 64; i++)
+        for (i = 0; i < 64; i++) {
              s->coded_dc_scale_factor[i] = vp31_dc_scale_factor[i];
-        for (i = 0; i < 64; i++)
              s->coded_ac_scale_factor[i] = vp31_ac_scale_factor[i];
-        for (i = 0; i < 64; i++)
              s->base_matrix[0][i] = vp31_intra_y_dequant[i];
-        for (i = 0; i < 64; i++)
              s->base_matrix[1][i] = vp31_intra_c_dequant[i];
-        for (i = 0; i < 64; i++)
              s->base_matrix[2][i] = vp31_inter_dequant[i];
-        for (i = 0; i < 64; i++)
              s->filter_limit_values[i] = vp31_filter_limit_values[i];
+        }
  
          for(inter=0; inter<2; inter++){
              for(plane=0; plane<3; plane++){
@@ -2286,10 +2233,15 @@ static int vp3_decode_frame(AVCodecContext *avctx,
                  vp3_calculate_pixel_addresses(s);
              else
                  theora_calculate_pixel_addresses(s);
+            s->pixel_addresses_inited = 1;
          }
      } else {
          /* allocate a new current frame */
          s->current_frame.reference = 3;
+        if (!s->pixel_addresses_inited) {
+            av_log(s->avctx, AV_LOG_ERROR, "vp3: first frame not a keyframe\n");
+            return -1;
+        }
          if(avctx->get_buffer(avctx, &s->current_frame) < 0) {
              av_log(s->avctx, AV_LOG_ERROR, "vp3: get_buffer() failed\n");
              return -1;