Merge commit 'fca3c3b61952aacc45e9ca54d86a762946c21942'

[ffmpeg] / libavcodec / hevcdsp_template.c
diff --git a/libavcodec/hevcdsp_template.c b/libavcodec/hevcdsp_template.c

index b840d179c334df3e898c655267008631582ddcac..665436cce300c44031c6c520bdbcc6b76a1ba257 100644 (file)
--- a/libavcodec/hevcdsp_template.c
+++ b/libavcodec/hevcdsp_template.c
@@ -42,8 +42,8 @@ static void FUNC(put_pcm)(uint8_t *_dst, ptrdiff_t stride, int width, int height
      }
  }
  
-static av_always_inline void FUNC(transquant_bypass)(uint8_t *_dst, int16_t *coeffs,
-                                                     ptrdiff_t stride, int size)
+static av_always_inline void FUNC(add_residual)(uint8_t *_dst, int16_t *res,
+                                                ptrdiff_t stride, int size)
  {
      int x, y;
      pixel *dst = (pixel *)_dst;
@@ -52,35 +52,35 @@ static av_always_inline void FUNC(transquant_bypass)(uint8_t *_dst, int16_t *coe
  
      for (y = 0; y < size; y++) {
          for (x = 0; x < size; x++) {
-            dst[x] = av_clip_pixel(dst[x] + *coeffs);
-            coeffs++;
+            dst[x] = av_clip_pixel(dst[x] + *res);
+            res++;
          }
          dst += stride;
      }
  }
  
-static void FUNC(transform_add4x4)(uint8_t *_dst, int16_t *coeffs,
-                                       ptrdiff_t stride)
+static void FUNC(add_residual4x4)(uint8_t *_dst, int16_t *res,
+                                  ptrdiff_t stride)
  {
-    FUNC(transquant_bypass)(_dst, coeffs, stride, 4);
+    FUNC(add_residual)(_dst, res, stride, 4);
  }
  
-static void FUNC(transform_add8x8)(uint8_t *_dst, int16_t *coeffs,
-                                       ptrdiff_t stride)
+static void FUNC(add_residual8x8)(uint8_t *_dst, int16_t *res,
+                                  ptrdiff_t stride)
  {
-    FUNC(transquant_bypass)(_dst, coeffs, stride, 8);
+    FUNC(add_residual)(_dst, res, stride, 8);
  }
  
-static void FUNC(transform_add16x16)(uint8_t *_dst, int16_t *coeffs,
-                                         ptrdiff_t stride)
+static void FUNC(add_residual16x16)(uint8_t *_dst, int16_t *res,
+                                    ptrdiff_t stride)
  {
-    FUNC(transquant_bypass)(_dst, coeffs, stride, 16);
+    FUNC(add_residual)(_dst, res, stride, 16);
  }
  
-static void FUNC(transform_add32x32)(uint8_t *_dst, int16_t *coeffs,
-                                         ptrdiff_t stride)
+static void FUNC(add_residual32x32)(uint8_t *_dst, int16_t *res,
+                                    ptrdiff_t stride)
  {
-    FUNC(transquant_bypass)(_dst, coeffs, stride, 32);
+    FUNC(add_residual)(_dst, res, stride, 32);
  }
  
  
@@ -106,13 +106,11 @@ static void FUNC(transform_rdpcm)(int16_t *_coeffs, int16_t log2_size, int mode)
      }
  }
  
-static void FUNC(transform_skip)(int16_t *_coeffs, int16_t log2_size)
+static void FUNC(dequant)(int16_t *coeffs, int16_t log2_size)
  {
      int shift  = 15 - BIT_DEPTH - log2_size;
      int x, y;
      int size = 1 << log2_size;
-    int16_t *coeffs = _coeffs;
-
  
      if (shift > 0) {
          int offset = 1 << (shift - 1);
@@ -134,8 +132,6 @@ static void FUNC(transform_skip)(int16_t *_coeffs, int16_t log2_size)
  
  #define SET(dst, x)   (dst) = (x)
  #define SCALE(dst, x) (dst) = av_clip_int16(((x) + add) >> shift)
-#define ADD_AND_SCALE(dst, x)                                           \
-    (dst) = av_clip_pixel((dst) + av_clip_int16(((x) + add) >> shift))
  
  #define TR_4x4_LUMA(dst, src, step, assign)                             \
      do {                                                                \
@@ -174,112 +170,113 @@ static void FUNC(transform_4x4_luma)(int16_t *coeffs)
  
  #undef TR_4x4_LUMA
  
-#define TR_4(dst, src, dstep, sstep, assign, end)                              \
-    do {                                                                       \
-        const int e0 = 64 * src[0 * sstep] + 64 * src[2 * sstep];              \
-        const int e1 = 64 * src[0 * sstep] - 64 * src[2 * sstep];              \
-        const int o0 = 83 * src[1 * sstep] + 36 * src[3 * sstep];              \
-        const int o1 = 36 * src[1 * sstep] - 83 * src[3 * sstep];              \
-                                                                               \
-        assign(dst[0 * dstep], e0 + o0);                                       \
-        assign(dst[1 * dstep], e1 + o1);                                       \
-        assign(dst[2 * dstep], e1 - o1);                                       \
-        assign(dst[3 * dstep], e0 - o0);                                       \
+#define TR_4(dst, src, dstep, sstep, assign, end)                 \
+    do {                                                          \
+        const int e0 = 64 * src[0 * sstep] + 64 * src[2 * sstep]; \
+        const int e1 = 64 * src[0 * sstep] - 64 * src[2 * sstep]; \
+        const int o0 = 83 * src[1 * sstep] + 36 * src[3 * sstep]; \
+        const int o1 = 36 * src[1 * sstep] - 83 * src[3 * sstep]; \
+                                                                  \
+        assign(dst[0 * dstep], e0 + o0);                          \
+        assign(dst[1 * dstep], e1 + o1);                          \
+        assign(dst[2 * dstep], e1 - o1);                          \
+        assign(dst[3 * dstep], e0 - o0);                          \
      } while (0)
  
-#define TR_8(dst, src, dstep, sstep, assign, end)                              \
-    do {                                                                       \
-        int i, j;                                                              \
-        int e_8[4];                                                            \
-        int o_8[4] = { 0 };                                                    \
-        for (i = 0; i < 4; i++)                                                \
-            for (j = 1; j < end; j += 2)                                       \
-                o_8[i] += transform[4 * j][i] * src[j * sstep];                \
-        TR_4(e_8, src, 1, 2 * sstep, SET, 4);                                  \
-                                                                               \
-        for (i = 0; i < 4; i++) {                                              \
-            assign(dst[i * dstep], e_8[i] + o_8[i]);                           \
-            assign(dst[(7 - i) * dstep], e_8[i] - o_8[i]);                     \
-        }                                                                      \
+#define TR_8(dst, src, dstep, sstep, assign, end)                 \
+    do {                                                          \
+        int i, j;                                                 \
+        int e_8[4];                                               \
+        int o_8[4] = { 0 };                                       \
+        for (i = 0; i < 4; i++)                                   \
+            for (j = 1; j < end; j += 2)                          \
+                o_8[i] += transform[4 * j][i] * src[j * sstep];   \
+        TR_4(e_8, src, 1, 2 * sstep, SET, 4);                     \
+                                                                  \
+        for (i = 0; i < 4; i++) {                                 \
+            assign(dst[i * dstep], e_8[i] + o_8[i]);              \
+            assign(dst[(7 - i) * dstep], e_8[i] - o_8[i]);        \
+        }                                                         \
      } while (0)
  
-#define TR_16(dst, src, dstep, sstep, assign, end)                             \
-    do {                                                                       \
-        int i, j;                                                              \
-        int e_16[8];                                                           \
-        int o_16[8] = { 0 };                                                   \
-        for (i = 0; i < 8; i++)                                                \
-            for (j = 1; j < end; j += 2)                                       \
-                o_16[i] += transform[2 * j][i] * src[j * sstep];               \
-        TR_8(e_16, src, 1, 2 * sstep, SET, 8);                                 \
-                                                                               \
-        for (i = 0; i < 8; i++) {                                              \
-            assign(dst[i * dstep], e_16[i] + o_16[i]);                         \
-            assign(dst[(15 - i) * dstep], e_16[i] - o_16[i]);                  \
-        }                                                                      \
+#define TR_16(dst, src, dstep, sstep, assign, end)                \
+    do {                                                          \
+        int i, j;                                                 \
+        int e_16[8];                                              \
+        int o_16[8] = { 0 };                                      \
+        for (i = 0; i < 8; i++)                                   \
+            for (j = 1; j < end; j += 2)                          \
+                o_16[i] += transform[2 * j][i] * src[j * sstep];  \
+        TR_8(e_16, src, 1, 2 * sstep, SET, 8);                    \
+                                                                  \
+        for (i = 0; i < 8; i++) {                                 \
+            assign(dst[i * dstep], e_16[i] + o_16[i]);            \
+            assign(dst[(15 - i) * dstep], e_16[i] - o_16[i]);     \
+        }                                                         \
      } while (0)
  
-#define TR_32(dst, src, dstep, sstep, assign, end)                             \
-    do {                                                                       \
-        int i, j;                                                              \
-        int e_32[16];                                                          \
-        int o_32[16] = { 0 };                                                  \
-        for (i = 0; i < 16; i++)                                               \
-            for (j = 1; j < end; j += 2)                                       \
-                o_32[i] += transform[j][i] * src[j * sstep];                   \
-        TR_16(e_32, src, 1, 2 * sstep, SET, end/2);                            \
-                                                                               \
-        for (i = 0; i < 16; i++) {                                             \
-            assign(dst[i * dstep], e_32[i] + o_32[i]);                         \
-            assign(dst[(31 - i) * dstep], e_32[i] - o_32[i]);                  \
-        }                                                                      \
+#define TR_32(dst, src, dstep, sstep, assign, end)                \
+    do {                                                          \
+        int i, j;                                                 \
+        int e_32[16];                                             \
+        int o_32[16] = { 0 };                                     \
+        for (i = 0; i < 16; i++)                                  \
+            for (j = 1; j < end; j += 2)                          \
+                o_32[i] += transform[j][i] * src[j * sstep];      \
+        TR_16(e_32, src, 1, 2 * sstep, SET, end / 2);             \
+                                                                  \
+        for (i = 0; i < 16; i++) {                                \
+            assign(dst[i * dstep], e_32[i] + o_32[i]);            \
+            assign(dst[(31 - i) * dstep], e_32[i] - o_32[i]);     \
+        }                                                         \
      } while (0)
  
-#define IDCT_VAR4(H)                                                          \
-    int      limit2   = FFMIN(col_limit + 4, H)
-#define IDCT_VAR8(H)                                                          \
-        int      limit   = FFMIN(col_limit, H);                               \
-        int      limit2   = FFMIN(col_limit + 4, H)
+#define IDCT_VAR4(H)                                              \
+    int limit2 = FFMIN(col_limit + 4, H)
+#define IDCT_VAR8(H)                                              \
+    int limit  = FFMIN(col_limit, H);                             \
+    int limit2 = FFMIN(col_limit + 4, H)
  #define IDCT_VAR16(H)   IDCT_VAR8(H)
  #define IDCT_VAR32(H)   IDCT_VAR8(H)
  
-#define IDCT(H)                                                              \
-static void FUNC(idct_##H ##x ##H )(                                         \
-                   int16_t *coeffs, int col_limit) {                         \
-    int i;                                                                   \
-    int      shift   = 7;                                                    \
-    int      add     = 1 << (shift - 1);                                     \
-    int16_t *src     = coeffs;                                               \
-    IDCT_VAR ##H(H);                                                         \
-                                                                             \
-    for (i = 0; i < H; i++) {                                                \
-        TR_ ## H(src, src, H, H, SCALE, limit2);                             \
-        if (limit2 < H && i%4 == 0 && !!i)                                   \
-            limit2 -= 4;                                                     \
-        src++;                                                               \
-    }                                                                        \
-                                                                             \
-    shift   = 20 - BIT_DEPTH;                                                \
-    add     = 1 << (shift - 1);                                              \
-    for (i = 0; i < H; i++) {                                                \
-        TR_ ## H(coeffs, coeffs, 1, 1, SCALE, limit);                        \
-        coeffs += H;                                                         \
-    }                                                                        \
+#define IDCT(H)                                                   \
+static void FUNC(idct_ ## H ## x ## H )(int16_t *coeffs,          \
+                                        int col_limit)            \
+{                                                                 \
+    int i;                                                        \
+    int      shift = 7;                                           \
+    int      add   = 1 << (shift - 1);                            \
+    int16_t *src   = coeffs;                                      \
+    IDCT_VAR ## H(H);                                             \
+                                                                  \
+    for (i = 0; i < H; i++) {                                     \
+        TR_ ## H(src, src, H, H, SCALE, limit2);                  \
+        if (limit2 < H && i%4 == 0 && !!i)                        \
+            limit2 -= 4;                                          \
+        src++;                                                    \
+    }                                                             \
+                                                                  \
+    shift = 20 - BIT_DEPTH;                                       \
+    add   = 1 << (shift - 1);                                     \
+    for (i = 0; i < H; i++) {                                     \
+        TR_ ## H(coeffs, coeffs, 1, 1, SCALE, limit);             \
+        coeffs += H;                                              \
+    }                                                             \
  }
  
-#define IDCT_DC(H)                                                           \
-static void FUNC(idct_##H ##x ##H ##_dc)(                                    \
-                   int16_t *coeffs) {                                        \
-    int i, j;                                                                \
-    int      shift   = 14 - BIT_DEPTH;                                       \
-    int      add     = 1 << (shift - 1);                                     \
-    int      coeff   = (((coeffs[0] + 1) >> 1) + add) >> shift;              \
-                                                                             \
-    for (j = 0; j < H; j++) {                                                \
-        for (i = 0; i < H; i++) {                                            \
-            coeffs[i+j*H] = coeff;                                           \
-        }                                                                    \
-    }                                                                        \
+#define IDCT_DC(H)                                                \
+static void FUNC(idct_ ## H ## x ## H ## _dc)(int16_t *coeffs)    \
+{                                                                 \
+    int i, j;                                                     \
+    int shift = 14 - BIT_DEPTH;                                   \
+    int add   = 1 << (shift - 1);                                 \
+    int coeff = (((coeffs[0] + 1) >> 1) + add) >> shift;          \
+                                                                  \
+    for (j = 0; j < H; j++) {                                     \
+        for (i = 0; i < H; i++) {                                 \
+            coeffs[i + j * H] = coeff;                            \
+        }                                                         \
+    }                                                             \
  }
  
  IDCT( 4)
@@ -299,7 +296,6 @@ IDCT_DC(32)
  
  #undef SET
  #undef SCALE
-#undef ADD_AND_SCALE
  
  static void FUNC(sao_band_filter)(uint8_t *_dst, uint8_t *_src,
                                    ptrdiff_t stride_dst, ptrdiff_t stride_src,
@@ -1497,7 +1493,9 @@ static void FUNC(put_hevc_epel_bi_w_hv)(uint8_t *_dst, ptrdiff_t _dststride, uin
          dst  += dststride;
          src2 += MAX_PB_SIZE;
      }
-}// line zero
+}
+
+// line zero
  #define P3 pix[-4 * xstride]
  #define P2 pix[-3 * xstride]
  #define P1 pix[-2 * xstride]