+#define idct_dc_proto(size, bitd, opt) \
+ void ff_hevc_idct_ ## size ## _dc_add_ ## bitd ## _ ## opt(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride)
+
+idct_dc_proto(4, 8,mmxext);
+idct_dc_proto(8, 8,mmxext);
+idct_dc_proto(16,8, sse2);
+idct_dc_proto(32,8, sse2);
+
+idct_dc_proto(32,8, avx2);
+
+idct_dc_proto(4, 10,mmxext);
+idct_dc_proto(8, 10, sse2);
+idct_dc_proto(16,10, sse2);
+idct_dc_proto(32,10, sse2);
+idct_dc_proto(8, 10, avx);
+idct_dc_proto(16,10, avx);
+idct_dc_proto(32,10, avx);
+
+idct_dc_proto(16,10, avx2);
+idct_dc_proto(32,10, avx2);
+
+#define IDCT_DC_FUNCS(W, opt) \
+void ff_hevc_idct_ ## W ## _dc_8_ ## opt(int16_t *coeffs); \
+void ff_hevc_idct_ ## W ## _dc_10_ ## opt(int16_t *coeffs)
+
+IDCT_DC_FUNCS(4x4, mmxext);
+IDCT_DC_FUNCS(8x8, mmxext);
+IDCT_DC_FUNCS(8x8, sse2);
+IDCT_DC_FUNCS(16x16, sse2);
+IDCT_DC_FUNCS(32x32, sse2);
+IDCT_DC_FUNCS(16x16, avx2);
+IDCT_DC_FUNCS(32x32, avx2);
+
+#define IDCT_FUNCS(opt) \
+void ff_hevc_idct_4x4_8_ ## opt(int16_t *coeffs, int col_limit); \
+void ff_hevc_idct_4x4_10_ ## opt(int16_t *coeffs, int col_limit); \
+void ff_hevc_idct_8x8_8_ ## opt(int16_t *coeffs, int col_limit); \
+void ff_hevc_idct_8x8_10_ ## opt(int16_t *coeffs, int col_limit); \
+void ff_hevc_idct_16x16_8_ ## opt(int16_t *coeffs, int col_limit); \
+void ff_hevc_idct_16x16_10_ ## opt(int16_t *coeffs, int col_limit); \
+void ff_hevc_idct_32x32_8_ ## opt(int16_t *coeffs, int col_limit); \
+void ff_hevc_idct_32x32_10_ ## opt(int16_t *coeffs, int col_limit);
+
+IDCT_FUNCS(sse2)
+IDCT_FUNCS(avx)
+
+void ff_hevc_add_residual_4_8_mmxext(uint8_t *dst, int16_t *res, ptrdiff_t stride);
+void ff_hevc_add_residual_8_8_sse2(uint8_t *dst, int16_t *res, ptrdiff_t stride);
+void ff_hevc_add_residual_16_8_sse2(uint8_t *dst, int16_t *res, ptrdiff_t stride);
+void ff_hevc_add_residual_32_8_sse2(uint8_t *dst, int16_t *res, ptrdiff_t stride);
+
+void ff_hevc_add_residual_8_8_avx(uint8_t *dst, int16_t *res, ptrdiff_t stride);
+void ff_hevc_add_residual_16_8_avx(uint8_t *dst, int16_t *res, ptrdiff_t stride);
+void ff_hevc_add_residual_32_8_avx(uint8_t *dst, int16_t *res, ptrdiff_t stride);
+
+void ff_hevc_add_residual_32_8_avx2(uint8_t *dst, int16_t *res, ptrdiff_t stride);
+
+void ff_hevc_add_residual_4_10_mmxext(uint8_t *dst, int16_t *res, ptrdiff_t stride);
+void ff_hevc_add_residual_8_10_sse2(uint8_t *dst, int16_t *res, ptrdiff_t stride);
+void ff_hevc_add_residual_16_10_sse2(uint8_t *dst, int16_t *res, ptrdiff_t stride);
+void ff_hevc_add_residual_32_10_sse2(uint8_t *dst, int16_t *res, ptrdiff_t stride);
+
+void ff_hevc_add_residual_16_10_avx2(uint8_t *dst, int16_t *res, ptrdiff_t stride);
+void ff_hevc_add_residual_32_10_avx2(uint8_t *dst, int16_t *res, ptrdiff_t stride);
+