Merge remote-tracking branch 'qatar/master'

[ffmpeg] / libavcodec / dsputil.c
diff --git a/libavcodec/dsputil.c b/libavcodec/dsputil.c

index 74b8fd7466caf70edf94f63f4cb17b7089da5f1d..f4cfecdc7e9b6b3ba8c90492e0c8b482dd3eb005 100644 (file)
--- a/libavcodec/dsputil.c
+++ b/libavcodec/dsputil.c
@@ -5,20 +5,20 @@
   *
   * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
   *
- * This file is part of Libav.
+ * This file is part of FFmpeg.
   *
- * Libav is free software; you can redistribute it and/or
+ * FFmpeg is free software; you can redistribute it and/or
   * modify it under the terms of the GNU Lesser General Public
   * License as published by the Free Software Foundation; either
   * version 2.1 of the License, or (at your option) any later version.
   *
- * Libav is distributed in the hope that it will be useful,
+ * FFmpeg is distributed in the hope that it will be useful,
   * but WITHOUT ANY WARRANTY; without even the implied warranty of
   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   * Lesser General Public License for more details.
   *
   * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
+ * License along with FFmpeg; if not, write to the Free Software
   * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
   */
  
@@ -38,6 +38,7 @@
  #include "config.h"
  #include "ac3dec.h"
  #include "vorbis.h"
+#include "diracdsp.h"
  
  uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, };
  uint32_t ff_squareTbl[512] = {0, };
@@ -366,18 +367,17 @@ void ff_put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
                               int line_size)
  {
      int i;
-    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
  
      /* read the pixels */
      for(i=0;i<8;i++) {
-        pixels[0] = cm[block[0]];
-        pixels[1] = cm[block[1]];
-        pixels[2] = cm[block[2]];
-        pixels[3] = cm[block[3]];
-        pixels[4] = cm[block[4]];
-        pixels[5] = cm[block[5]];
-        pixels[6] = cm[block[6]];
-        pixels[7] = cm[block[7]];
+        pixels[0] = av_clip_uint8(block[0]);
+        pixels[1] = av_clip_uint8(block[1]);
+        pixels[2] = av_clip_uint8(block[2]);
+        pixels[3] = av_clip_uint8(block[3]);
+        pixels[4] = av_clip_uint8(block[4]);
+        pixels[5] = av_clip_uint8(block[5]);
+        pixels[6] = av_clip_uint8(block[6]);
+        pixels[7] = av_clip_uint8(block[7]);
  
          pixels += line_size;
          block += 8;
@@ -388,14 +388,13 @@ static void put_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels
                                   int line_size)
  {
      int i;
-    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
  
      /* read the pixels */
      for(i=0;i<4;i++) {
-        pixels[0] = cm[block[0]];
-        pixels[1] = cm[block[1]];
-        pixels[2] = cm[block[2]];
-        pixels[3] = cm[block[3]];
+        pixels[0] = av_clip_uint8(block[0]);
+        pixels[1] = av_clip_uint8(block[1]);
+        pixels[2] = av_clip_uint8(block[2]);
+        pixels[3] = av_clip_uint8(block[3]);
  
          pixels += line_size;
          block += 8;
@@ -406,12 +405,11 @@ static void put_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels
                                   int line_size)
  {
      int i;
-    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
  
      /* read the pixels */
      for(i=0;i<2;i++) {
-        pixels[0] = cm[block[0]];
-        pixels[1] = cm[block[1]];
+        pixels[0] = av_clip_uint8(block[0]);
+        pixels[1] = av_clip_uint8(block[1]);
  
          pixels += line_size;
          block += 8;
@@ -443,18 +441,17 @@ void ff_add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
                               int line_size)
  {
      int i;
-    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
  
      /* read the pixels */
      for(i=0;i<8;i++) {
-        pixels[0] = cm[pixels[0] + block[0]];
-        pixels[1] = cm[pixels[1] + block[1]];
-        pixels[2] = cm[pixels[2] + block[2]];
-        pixels[3] = cm[pixels[3] + block[3]];
-        pixels[4] = cm[pixels[4] + block[4]];
-        pixels[5] = cm[pixels[5] + block[5]];
-        pixels[6] = cm[pixels[6] + block[6]];
-        pixels[7] = cm[pixels[7] + block[7]];
+        pixels[0] = av_clip_uint8(pixels[0] + block[0]);
+        pixels[1] = av_clip_uint8(pixels[1] + block[1]);
+        pixels[2] = av_clip_uint8(pixels[2] + block[2]);
+        pixels[3] = av_clip_uint8(pixels[3] + block[3]);
+        pixels[4] = av_clip_uint8(pixels[4] + block[4]);
+        pixels[5] = av_clip_uint8(pixels[5] + block[5]);
+        pixels[6] = av_clip_uint8(pixels[6] + block[6]);
+        pixels[7] = av_clip_uint8(pixels[7] + block[7]);
          pixels += line_size;
          block += 8;
      }
@@ -464,14 +461,13 @@ static void add_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels
                            int line_size)
  {
      int i;
-    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
  
      /* read the pixels */
      for(i=0;i<4;i++) {
-        pixels[0] = cm[pixels[0] + block[0]];
-        pixels[1] = cm[pixels[1] + block[1]];
-        pixels[2] = cm[pixels[2] + block[2]];
-        pixels[3] = cm[pixels[3] + block[3]];
+        pixels[0] = av_clip_uint8(pixels[0] + block[0]);
+        pixels[1] = av_clip_uint8(pixels[1] + block[1]);
+        pixels[2] = av_clip_uint8(pixels[2] + block[2]);
+        pixels[3] = av_clip_uint8(pixels[3] + block[3]);
          pixels += line_size;
          block += 8;
      }
@@ -481,12 +477,11 @@ static void add_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels
                            int line_size)
  {
      int i;
-    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
  
      /* read the pixels */
      for(i=0;i<2;i++) {
-        pixels[0] = cm[pixels[0] + block[0]];
-        pixels[1] = cm[pixels[1] + block[1]];
+        pixels[0] = av_clip_uint8(pixels[0] + block[0]);
+        pixels[1] = av_clip_uint8(pixels[1] + block[1]);
          pixels += line_size;
          block += 8;
      }
@@ -1328,6 +1323,51 @@ void ff_avg_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, int stride){
  }
  #endif /* CONFIG_RV40_DECODER */
  
+#if CONFIG_DIRAC_DECODER
+#define DIRAC_MC(OPNAME)\
+void ff_ ## OPNAME ## _dirac_pixels8_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
+{\
+     OPNAME ## _pixels8_8_c(dst, src[0], stride, h);\
+}\
+void ff_ ## OPNAME ## _dirac_pixels16_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
+{\
+    OPNAME ## _pixels16_8_c(dst, src[0], stride, h);\
+}\
+void ff_ ## OPNAME ## _dirac_pixels32_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
+{\
+    OPNAME ## _pixels16_8_c(dst   , src[0]   , stride, h);\
+    OPNAME ## _pixels16_8_c(dst+16, src[0]+16, stride, h);\
+}\
+void ff_ ## OPNAME ## _dirac_pixels8_l2_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
+{\
+    OPNAME ## _pixels8_l2_8(dst, src[0], src[1], stride, stride, stride, h);\
+}\
+void ff_ ## OPNAME ## _dirac_pixels16_l2_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
+{\
+    OPNAME ## _pixels16_l2_8(dst, src[0], src[1], stride, stride, stride, h);\
+}\
+void ff_ ## OPNAME ## _dirac_pixels32_l2_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
+{\
+    OPNAME ## _pixels16_l2_8(dst   , src[0]   , src[1]   , stride, stride, stride, h);\
+    OPNAME ## _pixels16_l2_8(dst+16, src[0]+16, src[1]+16, stride, stride, stride, h);\
+}\
+void ff_ ## OPNAME ## _dirac_pixels8_l4_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
+{\
+    OPNAME ## _pixels8_l4_8(dst, src[0], src[1], src[2], src[3], stride, stride, stride, stride, stride, h);\
+}\
+void ff_ ## OPNAME ## _dirac_pixels16_l4_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
+{\
+    OPNAME ## _pixels16_l4_8(dst, src[0], src[1], src[2], src[3], stride, stride, stride, stride, stride, h);\
+}\
+void ff_ ## OPNAME ## _dirac_pixels32_l4_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
+{\
+    OPNAME ## _pixels16_l4_8(dst   , src[0]   , src[1]   , src[2]   , src[3]   , stride, stride, stride, stride, stride, h);\
+    OPNAME ## _pixels16_l4_8(dst+16, src[0]+16, src[1]+16, src[2]+16, src[3]+16, stride, stride, stride, stride, stride, h);\
+}
+DIRAC_MC(put)
+DIRAC_MC(avg)
+#endif
+
  static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){
      uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
      int i;
@@ -1881,7 +1921,7 @@ static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){
          dst[i+0] += src[i+0];
  }
  
-static void diff_bytes_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
+static void diff_bytes_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w){
      long i;
  #if !HAVE_FAST_UNALIGNED
      if((long)src2 & (sizeof(long)-1)){
@@ -2565,12 +2605,12 @@ static void vector_clipf_c(float *dst, const float *src, float min, float max, i
      }
  }
  
-static int32_t scalarproduct_int16_c(const int16_t * v1, const int16_t * v2, int order, int shift)
+static int32_t scalarproduct_int16_c(const int16_t * v1, const int16_t * v2, int order)
  {
      int res = 0;
  
      while (order--)
-        res += (*v1++ * *v2++) >> shift;
+        res += *v1++ * *v2++;
  
      return res;
  }
@@ -2733,15 +2773,11 @@ static void ff_jref_idct2_add(uint8_t *dest, int line_size, DCTELEM *block)
  
  static void ff_jref_idct1_put(uint8_t *dest, int line_size, DCTELEM *block)
  {
-    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
-
-    dest[0] = cm[(block[0] + 4)>>3];
+    dest[0] = av_clip_uint8((block[0] + 4)>>3);
  }
  static void ff_jref_idct1_add(uint8_t *dest, int line_size, DCTELEM *block)
  {
-    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
-
-    dest[0] = cm[dest[0] + ((block[0] + 4)>>3)];
+    dest[0] = av_clip_uint8(dest[0] + ((block[0] + 4)>>3));
  }
  
  static void just_return(void *mem av_unused, int stride av_unused, int h av_unused) { return; }
@@ -2775,7 +2811,7 @@ int ff_check_alignment(void){
                  "Compiler did not align stack variables. Libavcodec has been miscompiled\n"
                  "and may be very slow or crash. This is not a bug in libavcodec,\n"
                  "but in the compiler. You may try recompiling using gcc >= 4.2.\n"
-                "Do not report crashes to Libav developers.\n");
+                "Do not report crashes to FFmpeg developers.\n");
  #endif
              did_fail=1;
          }
@@ -3016,7 +3052,7 @@ av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx)
      c->add_8x8basis= add_8x8basis_c;
  
  #if CONFIG_VORBIS_DECODER
-    c->vorbis_inverse_coupling = vorbis_inverse_coupling;
+    c->vorbis_inverse_coupling = ff_vorbis_inverse_coupling;
  #endif
  #if CONFIG_AC3_DECODER
      c->ac3_downmix = ff_ac3_downmix_c;
@@ -3155,3 +3191,8 @@ av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx)
      ff_init_scantable_permutation(c->idct_permutation,
                                    c->idct_permutation_type);
  }
+
+av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
+{
+    ff_dsputil_init(c, avctx);
+}