Merge remote-tracking branch 'qatar/master'

[ffmpeg] / libavcodec / dsputil.c
diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c

index 967406eedf2807212a177d7c22268adf22e17c56..4c13cb3ca48c7bc424e2d16980de179e0443296f 100644 (file)
--- a/libavcodec/dsputil.c
+++ b/libavcodec/dsputil.c
@@ -5,20 +5,20 @@
   *
   * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
   *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
   *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
   * modify it under the terms of the GNU Lesser General Public
   * License as published by the Free Software Foundation; either
   * version 2.1 of the License, or (at your option) any later version.
   *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
   * but WITHOUT ANY WARRANTY; without even the implied warranty of
   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   * Lesser General Public License for more details.
   *
   * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
   * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
   */
  
@@ -38,7 +38,7 @@
  #include "config.h"
  #include "ac3dec.h"
  #include "vorbis.h"
-#include "png.h"
+#include "diracdsp.h"
  
  uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, };
  uint32_t ff_squareTbl[512] = {0, };
@@ -145,6 +145,41 @@ void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_s
      }
  }
  
+void ff_init_scantable_permutation(uint8_t *idct_permutation,
+                                   int idct_permutation_type)
+{
+    int i;
+
+    switch(idct_permutation_type){
+    case FF_NO_IDCT_PERM:
+        for(i=0; i<64; i++)
+            idct_permutation[i]= i;
+        break;
+    case FF_LIBMPEG2_IDCT_PERM:
+        for(i=0; i<64; i++)
+            idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
+        break;
+    case FF_SIMPLE_IDCT_PERM:
+        for(i=0; i<64; i++)
+            idct_permutation[i]= simple_mmx_permutation[i];
+        break;
+    case FF_TRANSPOSE_IDCT_PERM:
+        for(i=0; i<64; i++)
+            idct_permutation[i]= ((i&7)<<3) | (i>>3);
+        break;
+    case FF_PARTTRANS_IDCT_PERM:
+        for(i=0; i<64; i++)
+            idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3);
+        break;
+    case FF_SSE2_IDCT_PERM:
+        for(i=0; i<64; i++)
+            idct_permutation[i]= (i&0x38) | idct_sse2_row_perm[i&7];
+        break;
+    default:
+        av_log(NULL, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n");
+    }
+}
+
  static int pix_sum_c(uint8_t * pix, int line_size)
  {
      int s, i, j;
@@ -1294,6 +1329,51 @@ void ff_avg_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
  }
  #endif /* CONFIG_RV40_DECODER */
  
+#if CONFIG_DIRAC_DECODER
+#define DIRAC_MC(OPNAME)\
+void ff_ ## OPNAME ## _dirac_pixels8_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
+{\
+     OPNAME ## _pixels8_8_c(dst, src[0], stride, h);\
+}\
+void ff_ ## OPNAME ## _dirac_pixels16_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
+{\
+    OPNAME ## _pixels16_8_c(dst, src[0], stride, h);\
+}\
+void ff_ ## OPNAME ## _dirac_pixels32_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
+{\
+    OPNAME ## _pixels16_8_c(dst   , src[0]   , stride, h);\
+    OPNAME ## _pixels16_8_c(dst+16, src[0]+16, stride, h);\
+}\
+void ff_ ## OPNAME ## _dirac_pixels8_l2_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
+{\
+    OPNAME ## _pixels8_l2_8(dst, src[0], src[1], stride, stride, stride, h);\
+}\
+void ff_ ## OPNAME ## _dirac_pixels16_l2_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
+{\
+    OPNAME ## _pixels16_l2_8(dst, src[0], src[1], stride, stride, stride, h);\
+}\
+void ff_ ## OPNAME ## _dirac_pixels32_l2_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
+{\
+    OPNAME ## _pixels16_l2_8(dst   , src[0]   , src[1]   , stride, stride, stride, h);\
+    OPNAME ## _pixels16_l2_8(dst+16, src[0]+16, src[1]+16, stride, stride, stride, h);\
+}\
+void ff_ ## OPNAME ## _dirac_pixels8_l4_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
+{\
+    OPNAME ## _pixels8_l4_8(dst, src[0], src[1], src[2], src[3], stride, stride, stride, stride, stride, h);\
+}\
+void ff_ ## OPNAME ## _dirac_pixels16_l4_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
+{\
+    OPNAME ## _pixels16_l4_8(dst, src[0], src[1], src[2], src[3], stride, stride, stride, stride, stride, h);\
+}\
+void ff_ ## OPNAME ## _dirac_pixels32_l4_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
+{\
+    OPNAME ## _pixels16_l4_8(dst   , src[0]   , src[1]   , src[2]   , src[3]   , stride, stride, stride, stride, stride, h);\
+    OPNAME ## _pixels16_l4_8(dst+16, src[0]+16, src[1]+16, src[2]+16, src[3]+16, stride, stride, stride, stride, stride, h);\
+}
+DIRAC_MC(put)
+DIRAC_MC(avg)
+#endif
+
  static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){
      uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
      int i;
@@ -1847,17 +1927,6 @@ static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){
          dst[i+0] += src[i+0];
  }
  
-static void add_bytes_l2_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
-    long i;
-    for(i=0; i<=w-sizeof(long); i+=sizeof(long)){
-        long a = *(long*)(src1+i);
-        long b = *(long*)(src2+i);
-        *(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80);
-    }
-    for(; i<w; i++)
-        dst[i] = src1[i]+src2[i];
-}
-
  static void diff_bytes_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
      long i;
  #if !HAVE_FAST_UNALIGNED
@@ -2474,6 +2543,18 @@ static void butterflies_float_c(float *restrict v1, float *restrict v2,
      }
  }
  
+static void butterflies_float_interleave_c(float *dst, const float *src0,
+                                           const float *src1, int len)
+{
+    int i;
+    for (i = 0; i < len; i++) {
+        float f1 = src0[i];
+        float f2 = src1[i];
+        dst[2*i    ] = f1 + f2;
+        dst[2*i + 1] = f1 - f2;
+    }
+}
+
  static float scalarproduct_float_c(const float *v1, const float *v2, int len)
  {
      float p = 0.0;
@@ -2740,7 +2821,7 @@ int ff_check_alignment(void){
                  "Compiler did not align stack variables. Libavcodec has been miscompiled\n"
                  "and may be very slow or crash. This is not a bug in libavcodec,\n"
                  "but in the compiler. You may try recompiling using gcc >= 4.2.\n"
-                "Do not report crashes to Libav developers.\n");
+                "Do not report crashes to FFmpeg developers.\n");
  #endif
              did_fail=1;
          }
@@ -2956,7 +3037,6 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
      c->ssd_int8_vs_int16 = ssd_int8_vs_int16_c;
  
      c->add_bytes= add_bytes_c;
-    c->add_bytes_l2= add_bytes_l2_c;
      c->diff_bytes= diff_bytes_c;
      c->add_hfyu_median_prediction= add_hfyu_median_prediction_c;
      c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_c;
@@ -2964,9 +3044,6 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
      c->add_hfyu_left_prediction_bgr32 = add_hfyu_left_prediction_bgr32_c;
      c->bswap_buf= bswap_buf;
      c->bswap16_buf = bswap16_buf;
-#if CONFIG_PNG_DECODER
-    c->add_png_paeth_prediction= ff_add_png_paeth_prediction;
-#endif
  
      if (CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
          c->h263_h_loop_filter= h263_h_loop_filter_c;
@@ -3001,6 +3078,7 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
      c->vector_clip_int32 = vector_clip_int32_c;
      c->scalarproduct_float = scalarproduct_float_c;
      c->butterflies_float = butterflies_float_c;
+    c->butterflies_float_interleave = butterflies_float_interleave_c;
      c->vector_fmul_scalar = vector_fmul_scalar_c;
      c->vector_fmac_scalar = vector_fmac_scalar_c;
  
@@ -3123,32 +3201,6 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
              c->avg_2tap_qpel_pixels_tab[0][i]= c->avg_h264_qpel_pixels_tab[0][i];
      }
  
-    switch(c->idct_permutation_type){
-    case FF_NO_IDCT_PERM:
-        for(i=0; i<64; i++)
-            c->idct_permutation[i]= i;
-        break;
-    case FF_LIBMPEG2_IDCT_PERM:
-        for(i=0; i<64; i++)
-            c->idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
-        break;
-    case FF_SIMPLE_IDCT_PERM:
-        for(i=0; i<64; i++)
-            c->idct_permutation[i]= simple_mmx_permutation[i];
-        break;
-    case FF_TRANSPOSE_IDCT_PERM:
-        for(i=0; i<64; i++)
-            c->idct_permutation[i]= ((i&7)<<3) | (i>>3);
-        break;
-    case FF_PARTTRANS_IDCT_PERM:
-        for(i=0; i<64; i++)
-            c->idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3);
-        break;
-    case FF_SSE2_IDCT_PERM:
-        for(i=0; i<64; i++)
-            c->idct_permutation[i]= (i&0x38) | idct_sse2_row_perm[i&7];
-        break;
-    default:
-        av_log(avctx, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n");
-    }
+    ff_init_scantable_permutation(c->idct_permutation,
+                                  c->idct_permutation_type);
  }