]> git.sesse.net Git - ffmpeg/blobdiff - libavcodec/x86/h264dsp_init.c
Merge commit '90bc423212396e96a02edc1118982ab7f7766a63'
[ffmpeg] / libavcodec / x86 / h264dsp_init.c
index c8cd0650c7caa2e68fb5a1216961863db5760e02..7b3d17f9711906cf711e0658f9ab55806fd5cbe4 100644 (file)
@@ -78,6 +78,11 @@ IDCT_ADD_REP_FUNC2(, 8, 8, sse2)
 IDCT_ADD_REP_FUNC2(, 8, 10, sse2)
 IDCT_ADD_REP_FUNC2(, 8, 10, avx)
 
+IDCT_ADD_REP_FUNC2(, 8_422, 8, mmx)
+
+IDCT_ADD_REP_FUNC2(, 8_422, 10, sse2)
+IDCT_ADD_REP_FUNC2(, 8_422, 10, avx)
+
 void ff_h264_luma_dc_dequant_idct_mmx(int16_t *output, int16_t *input, int qmul);
 void ff_h264_luma_dc_dequant_idct_sse2(int16_t *output, int16_t *input, int qmul);
 
@@ -103,34 +108,38 @@ void ff_deblock_ ## DIR ## _ ## TYPE ## _ ## DEPTH ## _ ## OPT(uint8_t *pix,  \
                                                                int beta);
 
 #define LF_FUNCS(type, depth)                   \
-LF_FUNC(h,  chroma,       depth, mmxext)        \
-LF_IFUNC(h, chroma_intra, depth, mmxext)        \
-LF_FUNC(v,  chroma,       depth, mmxext)        \
-LF_IFUNC(v, chroma_intra, depth, mmxext)        \
-LF_FUNC(h,  luma,         depth, mmxext)        \
-LF_IFUNC(h, luma_intra,   depth, mmxext)        \
-LF_FUNC(h,  luma,         depth, sse2)          \
-LF_IFUNC(h, luma_intra,   depth, sse2)          \
-LF_FUNC(v,  luma,         depth, sse2)          \
-LF_IFUNC(v, luma_intra,   depth, sse2)          \
-LF_FUNC(h,  chroma,       depth, sse2)          \
-LF_IFUNC(h, chroma_intra, depth, sse2)          \
-LF_FUNC(v,  chroma,       depth, sse2)          \
-LF_IFUNC(v, chroma_intra, depth, sse2)          \
-LF_FUNC(h,  luma,         depth, avx)           \
-LF_IFUNC(h, luma_intra,   depth, avx)           \
-LF_FUNC(v,  luma,         depth, avx)           \
-LF_IFUNC(v, luma_intra,   depth, avx)           \
-LF_FUNC(h,  chroma,       depth, avx)           \
-LF_IFUNC(h, chroma_intra, depth, avx)           \
-LF_FUNC(v,  chroma,       depth, avx)           \
-LF_IFUNC(v, chroma_intra, depth, avx)
+LF_FUNC(h,  chroma,          depth, mmxext)     \
+LF_IFUNC(h, chroma_intra,    depth, mmxext)     \
+LF_FUNC(h,  chroma422,       depth, mmxext)     \
+LF_IFUNC(h, chroma422_intra, depth, mmxext)     \
+LF_FUNC(v,  chroma,          depth, mmxext)     \
+LF_IFUNC(v, chroma_intra,    depth, mmxext)     \
+LF_FUNC(h,  luma,            depth, mmxext)     \
+LF_IFUNC(h, luma_intra,      depth, mmxext)     \
+LF_FUNC(h,  luma,            depth, sse2)       \
+LF_IFUNC(h, luma_intra,      depth, sse2)       \
+LF_FUNC(v,  luma,            depth, sse2)       \
+LF_IFUNC(v, luma_intra,      depth, sse2)       \
+LF_FUNC(h,  chroma,          depth, sse2)       \
+LF_IFUNC(h, chroma_intra,    depth, sse2)       \
+LF_FUNC(h,  chroma422,       depth, sse2)       \
+LF_IFUNC(h, chroma422_intra, depth, sse2)       \
+LF_FUNC(v,  chroma,          depth, sse2)       \
+LF_IFUNC(v, chroma_intra,    depth, sse2)       \
+LF_FUNC(h,  luma,            depth, avx)        \
+LF_IFUNC(h, luma_intra,      depth, avx)        \
+LF_FUNC(v,  luma,            depth, avx)        \
+LF_IFUNC(v, luma_intra,      depth, avx)        \
+LF_FUNC(h,  chroma,          depth, avx)        \
+LF_IFUNC(h, chroma_intra,    depth, avx)        \
+LF_FUNC(h,  chroma422,       depth, avx)        \
+LF_IFUNC(h, chroma422_intra, depth, avx)        \
+LF_FUNC(v,  chroma,          depth, avx)        \
+LF_IFUNC(v, chroma_intra,    depth, avx)
 
 LF_FUNCS(uint8_t,   8)
 LF_FUNCS(uint16_t, 10)
 
-void ff_deblock_h_chroma422_8_mmxext(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0);
-
 #if ARCH_X86_32 && HAVE_MMXEXT_EXTERNAL
 LF_FUNC(v8, luma, 8, mmxext)
 static void deblock_v_luma_8_mmxext(uint8_t *pix, int stride, int alpha,
@@ -157,13 +166,13 @@ LF_IFUNC(v, luma_intra, 10, mmxext)
 /* weighted prediction */
 
 #define H264_WEIGHT(W, OPT)                                             \
-void ff_h264_weight_ ## W ## _ ## OPT(uint8_t *dst, int stride,         \
+void ff_h264_weight_ ## W ## _ ## OPT(uint8_t *dst, ptrdiff_t stride,   \
                                       int height, int log2_denom,       \
                                       int weight, int offset);
 
 #define H264_BIWEIGHT(W, OPT)                                           \
 void ff_h264_biweight_ ## W ## _ ## OPT(uint8_t *dst, uint8_t *src,     \
-                                        int stride, int height,         \
+                                        ptrdiff_t stride, int height,   \
                                         int log2_denom, int weightd,    \
                                         int weights, int offset);
 
@@ -183,7 +192,7 @@ H264_BIWEIGHT_MMX(4)
 
 #define H264_WEIGHT_10(W, DEPTH, OPT)                                   \
 void ff_h264_weight_ ## W ## _ ## DEPTH ## _ ## OPT(uint8_t *dst,       \
-                                                    int stride,         \
+                                                    ptrdiff_t stride,   \
                                                     int height,         \
                                                     int log2_denom,     \
                                                     int weight,         \
@@ -192,7 +201,7 @@ void ff_h264_weight_ ## W ## _ ## DEPTH ## _ ## OPT(uint8_t *dst,       \
 #define H264_BIWEIGHT_10(W, DEPTH, OPT)                                 \
 void ff_h264_biweight_ ## W ## _ ## DEPTH ## _ ## OPT(uint8_t *dst,     \
                                                       uint8_t *src,     \
-                                                      int stride,       \
+                                                      ptrdiff_t stride, \
                                                       int height,       \
                                                       int log2_denom,   \
                                                       int weightd,      \
@@ -227,8 +236,11 @@ av_cold void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
 
             c->h264_idct_add16 = ff_h264_idct_add16_8_mmx;
             c->h264_idct8_add4 = ff_h264_idct8_add4_8_mmx;
-            if (chroma_format_idc <= 1)
+            if (chroma_format_idc <= 1) {
                 c->h264_idct_add8 = ff_h264_idct_add8_8_mmx;
+            } else {
+                c->h264_idct_add8 = ff_h264_idct_add8_422_8_mmx;
+            }
             c->h264_idct_add16intra = ff_h264_idct_add16intra_8_mmx;
             if (cpu_flags & AV_CPU_FLAG_CMOV)
                 c->h264_luma_dc_dequant_idct = ff_h264_luma_dc_dequant_idct_mmx;
@@ -249,6 +261,7 @@ av_cold void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
                 c->h264_h_loop_filter_chroma_intra = ff_deblock_h_chroma_intra_8_mmxext;
             } else {
                 c->h264_h_loop_filter_chroma = ff_deblock_h_chroma422_8_mmxext;
+                c->h264_h_loop_filter_chroma_intra = ff_deblock_h_chroma422_intra_8_mmxext;
             }
 #if ARCH_X86_32 && HAVE_MMXEXT_EXTERNAL
             c->h264_v_loop_filter_luma       = deblock_v_luma_8_mmxext;
@@ -300,6 +313,11 @@ av_cold void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
 #if ARCH_X86_32
             c->h264_v_loop_filter_chroma       = ff_deblock_v_chroma_10_mmxext;
             c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_10_mmxext;
+            if (chroma_format_idc <= 1) {
+                c->h264_h_loop_filter_chroma = ff_deblock_h_chroma_10_mmxext;
+            } else {
+                c->h264_h_loop_filter_chroma = ff_deblock_h_chroma422_10_mmxext;
+            }
             c->h264_v_loop_filter_luma         = ff_deblock_v_luma_10_mmxext;
             c->h264_h_loop_filter_luma         = ff_deblock_h_luma_10_mmxext;
             c->h264_v_loop_filter_luma_intra   = ff_deblock_v_luma_intra_10_mmxext;
@@ -312,8 +330,11 @@ av_cold void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
             c->h264_idct8_dc_add = ff_h264_idct8_dc_add_10_sse2;
 
             c->h264_idct_add16 = ff_h264_idct_add16_10_sse2;
-            if (chroma_format_idc <= 1)
+            if (chroma_format_idc <= 1) {
                 c->h264_idct_add8 = ff_h264_idct_add8_10_sse2;
+            } else {
+                c->h264_idct_add8 = ff_h264_idct_add8_422_10_sse2;
+            }
             c->h264_idct_add16intra = ff_h264_idct_add16intra_10_sse2;
 #if HAVE_ALIGNED_STACK
             c->h264_idct8_add  = ff_h264_idct8_add_10_sse2;
@@ -330,6 +351,11 @@ av_cold void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
 
             c->h264_v_loop_filter_chroma       = ff_deblock_v_chroma_10_sse2;
             c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_10_sse2;
+            if (chroma_format_idc <= 1) {
+                c->h264_h_loop_filter_chroma = ff_deblock_h_chroma_10_sse2;
+            } else {
+                c->h264_h_loop_filter_chroma = ff_deblock_h_chroma422_10_sse2;
+            }
 #if HAVE_ALIGNED_STACK
             c->h264_v_loop_filter_luma       = ff_deblock_v_luma_10_sse2;
             c->h264_h_loop_filter_luma       = ff_deblock_h_luma_10_sse2;
@@ -352,8 +378,11 @@ av_cold void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
             c->h264_idct8_dc_add = ff_h264_idct8_dc_add_10_avx;
 
             c->h264_idct_add16 = ff_h264_idct_add16_10_avx;
-            if (chroma_format_idc <= 1)
+            if (chroma_format_idc <= 1) {
                 c->h264_idct_add8 = ff_h264_idct_add8_10_avx;
+            } else {
+                c->h264_idct_add8 = ff_h264_idct_add8_422_10_avx;
+            }
             c->h264_idct_add16intra = ff_h264_idct_add16intra_10_avx;
 #if HAVE_ALIGNED_STACK
             c->h264_idct8_add  = ff_h264_idct8_add_10_avx;
@@ -362,6 +391,11 @@ av_cold void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
 
             c->h264_v_loop_filter_chroma       = ff_deblock_v_chroma_10_avx;
             c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_10_avx;
+            if (chroma_format_idc <= 1) {
+                c->h264_h_loop_filter_chroma = ff_deblock_h_chroma_10_avx;
+            } else {
+                c->h264_h_loop_filter_chroma = ff_deblock_h_chroma422_10_avx;
+            }
 #if HAVE_ALIGNED_STACK
             c->h264_v_loop_filter_luma         = ff_deblock_v_luma_10_avx;
             c->h264_h_loop_filter_luma         = ff_deblock_h_luma_10_avx;