Add new yuv444 pixfmts to avcodec_align_dimensions2

[ffmpeg] / libavcodec / simple_idct.c
diff --git a/libavcodec/simple_idct.c b/libavcodec/simple_idct.c

index 2c026f08f2708eb3c67e92aaf7e35b51dcd4c84a..4af11bec51d75a5af53aeae076ae5bf279815696 100644 (file)
--- a/libavcodec/simple_idct.c
+++ b/libavcodec/simple_idct.c
@@ -3,25 +3,25 @@
   *
   * Copyright (c) 2001 Michael Niedermayer <michaelni@gmx.at>
   *
- * This file is part of FFmpeg.
+ * This file is part of Libav.
   *
- * FFmpeg is free software; you can redistribute it and/or
+ * Libav is free software; you can redistribute it and/or
   * modify it under the terms of the GNU Lesser General Public
   * License as published by the Free Software Foundation; either
   * version 2.1 of the License, or (at your option) any later version.
   *
- * FFmpeg is distributed in the hope that it will be useful,
+ * Libav is distributed in the hope that it will be useful,
   * but WITHOUT ANY WARRANTY; without even the implied warranty of
   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   * Lesser General Public License for more details.
   *
   * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
+ * License along with Libav; if not, write to the Free Software
   * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
   */
  
  /**
- * @file simple_idct.c
+ * @file
   * simpleidct in C.
   */
  
@@ -31,6 +31,7 @@
   */
  #include "avcodec.h"
  #include "dsputil.h"
+#include "mathops.h"
  #include "simple_idct.h"
  
  #if 0
@@ -55,37 +56,17 @@
  #define COL_SHIFT 20 // 6
  #endif
  
-#if defined(ARCH_POWERPC_405)
-
-/* signed 16x16 -> 32 multiply add accumulate */
-#define MAC16(rt, ra, rb) \
-    asm ("maclhw %0, %2, %3" : "=r" (rt) : "0" (rt), "r" (ra), "r" (rb));
-
-/* signed 16x16 -> 32 multiply */
-#define MUL16(rt, ra, rb) \
-    asm ("mullhw %0, %1, %2" : "=r" (rt) : "r" (ra), "r" (rb));
-
-#else
-
-/* signed 16x16 -> 32 multiply add accumulate */
-#define MAC16(rt, ra, rb) rt += (ra) * (rb)
-
-/* signed 16x16 -> 32 multiply */
-#define MUL16(rt, ra, rb) rt = (ra) * (rb)
-
-#endif
-
  static inline void idctRowCondDC (DCTELEM * row)
  {
          int a0, a1, a2, a3, b0, b1, b2, b3;
-#ifdef FAST_64BIT
+#if HAVE_FAST_64BIT
          uint64_t temp;
  #else
          uint32_t temp;
  #endif
  
-#ifdef FAST_64BIT
-#ifdef WORDS_BIGENDIAN
+#if HAVE_FAST_64BIT
+#if HAVE_BIGENDIAN
  #define ROW0_MASK 0xffff000000000000LL
  #else
  #define ROW0_MASK 0xffffLL
@@ -137,16 +118,16 @@ static inline void idctRowCondDC (DCTELEM * row)
          a2 -= W6 * row[2];
          a3 -= W2 * row[2];
  
-        MUL16(b0, W1, row[1]);
+        b0 = MUL16(W1, row[1]);
          MAC16(b0, W3, row[3]);
-        MUL16(b1, W3, row[1]);
+        b1 = MUL16(W3, row[1]);
          MAC16(b1, -W7, row[3]);
-        MUL16(b2, W5, row[1]);
+        b2 = MUL16(W5, row[1]);
          MAC16(b2, -W1, row[3]);
-        MUL16(b3, W7, row[1]);
+        b3 = MUL16(W7, row[1]);
          MAC16(b3, -W5, row[3]);
  
-#ifdef FAST_64BIT
+#if HAVE_FAST_64BIT
          temp = ((uint64_t*)row)[1];
  #else
          temp = ((uint32_t*)row)[2] | ((uint32_t*)row)[3];
@@ -197,10 +178,10 @@ static inline void idctSparseColPut (uint8_t *dest, int line_size,
          a2 +=  - W6*col[8*2];
          a3 +=  - W2*col[8*2];
  
-        MUL16(b0, W1, col[8*1]);
-        MUL16(b1, W3, col[8*1]);
-        MUL16(b2, W5, col[8*1]);
-        MUL16(b3, W7, col[8*1]);
+        b0 = MUL16(W1, col[8*1]);
+        b1 = MUL16(W3, col[8*1]);
+        b2 = MUL16(W5, col[8*1]);
+        b3 = MUL16(W7, col[8*1]);
  
          MAC16(b0, + W3, col[8*3]);
          MAC16(b1, - W7, col[8*3]);
@@ -269,10 +250,10 @@ static inline void idctSparseColAdd (uint8_t *dest, int line_size,
          a2 +=  - W6*col[8*2];
          a3 +=  - W2*col[8*2];
  
-        MUL16(b0, W1, col[8*1]);
-        MUL16(b1, W3, col[8*1]);
-        MUL16(b2, W5, col[8*1]);
-        MUL16(b3, W7, col[8*1]);
+        b0 = MUL16(W1, col[8*1]);
+        b1 = MUL16(W3, col[8*1]);
+        b2 = MUL16(W5, col[8*1]);
+        b3 = MUL16(W7, col[8*1]);
  
          MAC16(b0, + W3, col[8*3]);
          MAC16(b1, - W7, col[8*3]);
@@ -339,10 +320,10 @@ static inline void idctSparseCol (DCTELEM * col)
          a2 +=  - W6*col[8*2];
          a3 +=  - W2*col[8*2];
  
-        MUL16(b0, W1, col[8*1]);
-        MUL16(b1, W3, col[8*1]);
-        MUL16(b2, W5, col[8*1]);
-        MUL16(b3, W7, col[8*1]);
+        b0 = MUL16(W1, col[8*1]);
+        b1 = MUL16(W3, col[8*1]);
+        b2 = MUL16(W5, col[8*1]);
+        b3 = MUL16(W7, col[8*1]);
  
          MAC16(b0, + W3, col[8*3]);
          MAC16(b1, - W7, col[8*3]);
@@ -387,7 +368,7 @@ static inline void idctSparseCol (DCTELEM * col)
          col[56] = ((a0 - b0) >> COL_SHIFT);
  }
  
-void simple_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
+void ff_simple_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
  {
      int i;
      for(i=0; i<8; i++)
@@ -397,7 +378,7 @@ void simple_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
          idctSparseColPut(dest + i, line_size, block + i);
  }
  
-void simple_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
+void ff_simple_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
  {
      int i;
      for(i=0; i<8; i++)
@@ -407,7 +388,7 @@ void simple_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
          idctSparseColAdd(dest + i, line_size, block + i);
  }
  
-void simple_idct(DCTELEM *block)
+void ff_simple_idct(DCTELEM *block)
  {
      int i;
      for(i=0; i<8; i++)
@@ -428,7 +409,7 @@ void simple_idct(DCTELEM *block)
     and the butterfly must be multiplied by 0.5 * sqrt(2.0) */
  #define C_SHIFT (4+1+12)
  
-static inline void idct4col(uint8_t *dest, int line_size, const DCTELEM *col)
+static inline void idct4col_put(uint8_t *dest, int line_size, const DCTELEM *col)
  {
      int c0, c1, c2, c3, a0, a1, a2, a3;
      const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
@@ -465,7 +446,7 @@ static inline void idct4col(uint8_t *dest, int line_size, const DCTELEM *col)
  /* XXX: I think a 1.0/sqrt(2) normalization should be needed to
     compensate the extra butterfly stage - I don't have the full DV
     specification */
-void simple_idct248_put(uint8_t *dest, int line_size, DCTELEM *block)
+void ff_simple_idct248_put(uint8_t *dest, int line_size, DCTELEM *block)
  {
      int i;
      DCTELEM *ptr;
@@ -491,8 +472,8 @@ void simple_idct248_put(uint8_t *dest, int line_size, DCTELEM *block)
  
      /* IDCT4 and store */
      for(i=0;i<8;i++) {
-        idct4col(dest + i, 2 * line_size, block + i);
-        idct4col(dest + line_size + i, 2 * line_size, block + 8 + i);
+        idct4col_put(dest + i, 2 * line_size, block + i);
+        idct4col_put(dest + line_size + i, 2 * line_size, block + 8 + i);
      }
  }
  
@@ -555,7 +536,7 @@ static inline void idct4row(DCTELEM *row)
      row[3]= (c0 - c1) >> R_SHIFT;
  }
  
-void simple_idct84_add(uint8_t *dest, int line_size, DCTELEM *block)
+void ff_simple_idct84_add(uint8_t *dest, int line_size, DCTELEM *block)
  {
      int i;
  
@@ -570,7 +551,7 @@ void simple_idct84_add(uint8_t *dest, int line_size, DCTELEM *block)
      }
  }
  
-void simple_idct48_add(uint8_t *dest, int line_size, DCTELEM *block)
+void ff_simple_idct48_add(uint8_t *dest, int line_size, DCTELEM *block)
  {
      int i;
  
@@ -585,3 +566,17 @@ void simple_idct48_add(uint8_t *dest, int line_size, DCTELEM *block)
      }
  }
  
+void ff_simple_idct44_add(uint8_t *dest, int line_size, DCTELEM *block)
+{
+    int i;
+
+    /* IDCT4 on each line */
+    for(i=0; i<4; i++) {
+        idct4row(block + i*8);
+    }
+
+    /* IDCT4 and store */
+    for(i=0; i<4; i++){
+        idct4col_add(dest + i, line_size, block + i);
+    }
+}