]> git.sesse.net Git - ffmpeg/blob - libavcodec/x86/hevcdsp_init.c
xsubdec: Convert to the new bitstream reader
[ffmpeg] / libavcodec / x86 / hevcdsp_init.c
1 /*
2  * Copyright (c) 2013 Seppo Tomperi
3  * Copyright (c) 2013 - 2014 Pierre-Edouard Lepere
4  *
5  * This file is part of Libav.
6  *
7  * Libav is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * Libav is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with Libav; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21
22 #include "config.h"
23
24 #include "libavutil/cpu.h"
25 #include "libavutil/x86/cpu.h"
26
27 #include "libavcodec/hevcdsp.h"
28
29 #define LFC_FUNC(DIR, DEPTH, OPT) \
30 void ff_hevc_ ## DIR ## _loop_filter_chroma_ ## DEPTH ## _ ## OPT(uint8_t *pix, ptrdiff_t stride, int *tc, uint8_t *no_p, uint8_t *no_q);
31
32 #define LFL_FUNC(DIR, DEPTH, OPT) \
33 void ff_hevc_ ## DIR ## _loop_filter_luma_ ## DEPTH ## _ ## OPT(uint8_t *pix, ptrdiff_t stride, int beta, int *tc, uint8_t *no_p, uint8_t *no_q);
34
35 #define LFC_FUNCS(type, depth) \
36     LFC_FUNC(h, depth, sse2)   \
37     LFC_FUNC(v, depth, sse2)
38
39 #define LFL_FUNCS(type, depth) \
40     LFL_FUNC(h, depth, ssse3)  \
41     LFL_FUNC(v, depth, ssse3)
42
43 LFC_FUNCS(uint8_t, 8)
44 LFC_FUNCS(uint8_t, 10)
45 LFL_FUNCS(uint8_t, 8)
46 LFL_FUNCS(uint8_t, 10)
47
48 #define idct_dc_proto(size, bitd, opt) \
49                 void ff_hevc_idct_ ## size ## _dc_add_ ## bitd ## _ ## opt(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride)
50
51 idct_dc_proto(4, 8,mmxext);
52 idct_dc_proto(8, 8,mmxext);
53 idct_dc_proto(16,8,  sse2);
54 idct_dc_proto(32,8,  sse2);
55
56 idct_dc_proto(32,8,  avx2);
57
58 idct_dc_proto(4, 10,mmxext);
59 idct_dc_proto(8, 10,  sse2);
60 idct_dc_proto(16,10,  sse2);
61 idct_dc_proto(32,10,  sse2);
62 idct_dc_proto(8, 10,   avx);
63 idct_dc_proto(16,10,   avx);
64 idct_dc_proto(32,10,   avx);
65
66 idct_dc_proto(16,10,  avx2);
67 idct_dc_proto(32,10,  avx2);
68
69 #define IDCT_DC_FUNCS(W, opt) \
70 void ff_hevc_idct_ ## W ## _dc_8_ ## opt(int16_t *coeffs); \
71 void ff_hevc_idct_ ## W ## _dc_10_ ## opt(int16_t *coeffs)
72
73 IDCT_DC_FUNCS(4x4,   mmxext);
74 IDCT_DC_FUNCS(8x8,   mmxext);
75 IDCT_DC_FUNCS(8x8,   sse2);
76 IDCT_DC_FUNCS(16x16, sse2);
77 IDCT_DC_FUNCS(32x32, sse2);
78 IDCT_DC_FUNCS(16x16, avx2);
79 IDCT_DC_FUNCS(32x32, avx2);
80
81 #define IDCT_FUNCS(opt)                                             \
82 void ff_hevc_idct_4x4_8_    ## opt(int16_t *coeffs, int col_limit); \
83 void ff_hevc_idct_4x4_10_   ## opt(int16_t *coeffs, int col_limit); \
84 void ff_hevc_idct_8x8_8_    ## opt(int16_t *coeffs, int col_limit); \
85 void ff_hevc_idct_8x8_10_   ## opt(int16_t *coeffs, int col_limit); \
86 void ff_hevc_idct_16x16_8_  ## opt(int16_t *coeffs, int col_limit); \
87 void ff_hevc_idct_16x16_10_ ## opt(int16_t *coeffs, int col_limit); \
88 void ff_hevc_idct_32x32_8_  ## opt(int16_t *coeffs, int col_limit); \
89 void ff_hevc_idct_32x32_10_ ## opt(int16_t *coeffs, int col_limit);
90
91 IDCT_FUNCS(sse2)
92 IDCT_FUNCS(avx)
93
94 void ff_hevc_add_residual_4_8_mmxext(uint8_t *dst, int16_t *res, ptrdiff_t stride);
95 void ff_hevc_add_residual_8_8_sse2(uint8_t *dst, int16_t *res, ptrdiff_t stride);
96 void ff_hevc_add_residual_16_8_sse2(uint8_t *dst, int16_t *res, ptrdiff_t stride);
97 void ff_hevc_add_residual_32_8_sse2(uint8_t *dst, int16_t *res, ptrdiff_t stride);
98
99 void ff_hevc_add_residual_8_8_avx(uint8_t *dst, int16_t *res, ptrdiff_t stride);
100 void ff_hevc_add_residual_16_8_avx(uint8_t *dst, int16_t *res, ptrdiff_t stride);
101 void ff_hevc_add_residual_32_8_avx(uint8_t *dst, int16_t *res, ptrdiff_t stride);
102
103 void ff_hevc_add_residual_32_8_avx2(uint8_t *dst, int16_t *res, ptrdiff_t stride);
104
105 void ff_hevc_add_residual_4_10_mmxext(uint8_t *dst, int16_t *res, ptrdiff_t stride);
106 void ff_hevc_add_residual_8_10_sse2(uint8_t *dst, int16_t *res, ptrdiff_t stride);
107 void ff_hevc_add_residual_16_10_sse2(uint8_t *dst, int16_t *res, ptrdiff_t stride);
108 void ff_hevc_add_residual_32_10_sse2(uint8_t *dst, int16_t *res, ptrdiff_t stride);
109
110 void ff_hevc_add_residual_16_10_avx2(uint8_t *dst, int16_t *res, ptrdiff_t stride);
111 void ff_hevc_add_residual_32_10_avx2(uint8_t *dst, int16_t *res, ptrdiff_t stride);
112
113 #define GET_PIXELS(width, depth, cf)                                                                      \
114 void ff_hevc_get_pixels_ ## width ## _ ## depth ## _ ## cf(int16_t *dst, ptrdiff_t dststride,             \
115                                                            uint8_t *src, ptrdiff_t srcstride,             \
116                                                            int height, int mx, int my, int16_t *mcbuffer);
117
118 GET_PIXELS(4,  8, sse2)
119 GET_PIXELS(8,  8, sse2)
120 GET_PIXELS(12, 8, sse2)
121 GET_PIXELS(16, 8, sse2)
122 GET_PIXELS(24, 8, sse2)
123 GET_PIXELS(32, 8, sse2)
124 GET_PIXELS(48, 8, sse2)
125 GET_PIXELS(64, 8, sse2)
126
127 GET_PIXELS(4,  10, sse2)
128 GET_PIXELS(8,  10, sse2)
129 GET_PIXELS(12, 10, sse2)
130 GET_PIXELS(16, 10, sse2)
131 GET_PIXELS(24, 10, sse2)
132 GET_PIXELS(32, 10, sse2)
133 GET_PIXELS(48, 10, sse2)
134 GET_PIXELS(64, 10, sse2)
135
136 /* those are independent of the bit depth, so declared separately */
137 #define INTERP_HV_FUNC(width, cf)                                                         \
138 void ff_hevc_qpel_hv_ ## width ## _ ## cf(int16_t *dst, ptrdiff_t dststride,              \
139                                           int16_t *src, ptrdiff_t srcstride,              \
140                                           int height, int mx, int my, int16_t *mcbuffer); \
141 void ff_hevc_epel_hv_ ## width ## _ ## cf(int16_t *dst, ptrdiff_t dststride,              \
142                                           int16_t *src, ptrdiff_t srcstride,              \
143                                           int height, int mx, int my, int16_t *mcbuffer);
144
145 INTERP_HV_FUNC(4,  avx)
146 INTERP_HV_FUNC(8,  avx)
147 INTERP_HV_FUNC(12, avx)
148 INTERP_HV_FUNC(16, avx)
149 INTERP_HV_FUNC(24, avx)
150 INTERP_HV_FUNC(32, avx)
151 INTERP_HV_FUNC(48, avx)
152 INTERP_HV_FUNC(64, avx)
153
154 #if ARCH_X86_64 && HAVE_AVX_EXTERNAL
155 #define QPEL_FUNC_HV(width, depth, cf_h, cf_v, cf_hv)                                                         \
156 static void hevc_qpel_hv_ ## width ## _ ## depth ## _ ## cf_hv(int16_t *dst, ptrdiff_t dststride,             \
157                                                                uint8_t *src, ptrdiff_t srcstride,             \
158                                                                int height, int mx, int my, int16_t *mcbuffer) \
159 {                                                                                                             \
160     const ptrdiff_t stride = FFALIGN(width + 7, 8);                                                           \
161     ff_hevc_qpel_h_ ## width ## _ ## depth ## _ ## cf_h(mcbuffer, 2 * stride, src - 3 * srcstride, srcstride, \
162                                                         height + 7, mx, my, mcbuffer);                        \
163     ff_hevc_qpel_hv_ ## width ## _ ## cf_hv(dst, dststride, mcbuffer + 3 * stride, 2 * stride,                \
164                                             height, mx, my, mcbuffer);                                        \
165 }
166 #else
167 #define QPEL_FUNC_HV(width, depth, cf_h, cf_v, cf_hv)
168 #endif /* ARCH_X86_64 && HAVE_AVX_EXTERNAL */
169
170 #define QPEL_FUNCS(width, depth, cf_h, cf_v, cf_hv)                                                           \
171 void ff_hevc_qpel_h_ ## width ## _ ## depth ## _ ## cf_h(int16_t *dst, ptrdiff_t dststride,                   \
172                                                          uint8_t *src, ptrdiff_t srcstride,                   \
173                                                          int height, int mx, int my, int16_t *mcbuffer);      \
174 void ff_hevc_qpel_v_ ## width ## _ ## depth ## _ ## cf_v(int16_t *dst, ptrdiff_t dststride,                   \
175                                                          uint8_t *src, ptrdiff_t srcstride,                   \
176                                                          int height, int mx, int my, int16_t *mcbuffer);      \
177 QPEL_FUNC_HV(width, depth, cf_h, cf_v, cf_hv)
178
179 QPEL_FUNCS(4,  8, ssse3, ssse3, avx)
180 QPEL_FUNCS(8,  8, ssse3, ssse3, avx)
181 QPEL_FUNCS(12, 8, ssse3, ssse3, avx)
182 QPEL_FUNCS(16, 8, ssse3, ssse3, avx)
183 QPEL_FUNCS(24, 8, ssse3, ssse3, avx)
184 QPEL_FUNCS(32, 8, ssse3, ssse3, avx)
185 QPEL_FUNCS(48, 8, ssse3, ssse3, avx)
186 QPEL_FUNCS(64, 8, ssse3, ssse3, avx)
187
188 QPEL_FUNCS(4,  10, avx, avx, avx)
189 QPEL_FUNCS(8,  10, avx, avx, avx)
190 QPEL_FUNCS(12, 10, avx, avx, avx)
191 QPEL_FUNCS(16, 10, avx, avx, avx)
192 QPEL_FUNCS(24, 10, avx, avx, avx)
193 QPEL_FUNCS(32, 10, avx, avx, avx)
194 QPEL_FUNCS(48, 10, avx, avx, avx)
195 QPEL_FUNCS(64, 10, avx, avx, avx)
196
197 #if ARCH_X86_64 && HAVE_AVX_EXTERNAL
198 #define EPEL_FUNC_HV(width, depth, cf_h, cf_v, cf_hv)                                                         \
199 static void hevc_epel_hv_ ## width ## _ ## depth ## _ ## cf_hv(int16_t *dst, ptrdiff_t dststride,             \
200                                                                uint8_t *src, ptrdiff_t srcstride,             \
201                                                                int height, int mx, int my, int16_t *mcbuffer) \
202 {                                                                                                             \
203     const ptrdiff_t stride = FFALIGN(width + 3, 8);                                                           \
204     ff_hevc_epel_h_ ## width ## _ ## depth ## _ ## cf_h(mcbuffer, 2 * stride, src - srcstride, srcstride,     \
205                                                         height + 3, mx, my, mcbuffer);                        \
206     ff_hevc_epel_hv_ ## width ## _ ## cf_hv(dst, dststride, mcbuffer + stride, 2 * stride,                    \
207                                             height, mx, my, mcbuffer);                                        \
208 }
209 #else
210 #define EPEL_FUNC_HV(width, depth, cf_h, cf_v, cf_hv)
211 #endif /* ARCH_X86_64 && HAVE_AVX_EXTERNAL */
212
213 #define EPEL_FUNCS(width, depth, cf_h, cf_v, cf_hv)                                                           \
214 void ff_hevc_epel_h_ ## width ## _ ## depth ## _ ## cf_h(int16_t *dst, ptrdiff_t dststride,                   \
215                                                          uint8_t *src, ptrdiff_t srcstride,                   \
216                                                          int height, int mx, int my, int16_t *mcbuffer);      \
217 void ff_hevc_epel_v_ ## width ## _ ## depth ## _ ## cf_v(int16_t *dst, ptrdiff_t dststride,                   \
218                                                          uint8_t *src, ptrdiff_t srcstride,                   \
219                                                          int height, int mx, int my, int16_t *mcbuffer);      \
220 EPEL_FUNC_HV(width, depth, cf_h, cf_v, cf_hv)
221
222 EPEL_FUNCS(4,  8, ssse3, ssse3, avx)
223 EPEL_FUNCS(8,  8, ssse3, ssse3, avx)
224 EPEL_FUNCS(12, 8, ssse3, ssse3, avx)
225 EPEL_FUNCS(16, 8, ssse3, ssse3, avx)
226 EPEL_FUNCS(24, 8, ssse3, ssse3, avx)
227 EPEL_FUNCS(32, 8, ssse3, ssse3, avx)
228
229 EPEL_FUNCS(4,  10, avx, avx, avx)
230 EPEL_FUNCS(8,  10, avx, avx, avx)
231 EPEL_FUNCS(12, 10, avx, avx, avx)
232 EPEL_FUNCS(16, 10, avx, avx, avx)
233 EPEL_FUNCS(24, 10, avx, avx, avx)
234 EPEL_FUNCS(32, 10, avx, avx, avx)
235
236 #define PUT_PRED(width, depth, cf_uw, cf_w) \
237 void ff_hevc_put_unweighted_pred_ ## width ## _ ## depth ## _ ## cf_uw(uint8_t *dst, ptrdiff_t dststride,                   \
238                                                                        int16_t *src, ptrdiff_t srcstride,                   \
239                                                                        int height);                                         \
240 void ff_hevc_put_unweighted_pred_avg_ ## width ## _ ## depth ## _ ## cf_uw(uint8_t *dst, ptrdiff_t dststride,               \
241                                                                            int16_t *src1, int16_t *src2,                    \
242                                                                            ptrdiff_t srcstride, int height);                \
243 void ff_hevc_put_weighted_pred_ ## width ## _ ## depth ## _ ## cf_w(uint8_t denom, int16_t weight, int16_t offset,          \
244                                                                     uint8_t *dst, ptrdiff_t dststride,                      \
245                                                                     int16_t *src, ptrdiff_t srcstride,                      \
246                                                                     int height);                                            \
247 void ff_hevc_put_weighted_pred_avg_ ## width ## _ ## depth ## _ ## cf_w(uint8_t denom, int16_t weight0, int16_t weight1,    \
248                                                                         int16_t offset0, int16_t offset1,                   \
249                                                                         uint8_t *dst, ptrdiff_t dststride,                  \
250                                                                         int16_t *src0, int16_t *src1, ptrdiff_t srcstride,  \
251                                                                         int height);
252
253 PUT_PRED(4,  8, sse2, sse4)
254 PUT_PRED(8,  8, sse2, sse4)
255 PUT_PRED(12, 8, sse2, sse4)
256 PUT_PRED(16, 8, sse2, sse4)
257 PUT_PRED(24, 8, sse2, sse4)
258 PUT_PRED(32, 8, sse2, sse4)
259 PUT_PRED(48, 8, sse2, sse4)
260 PUT_PRED(64, 8, sse2, sse4)
261
262 PUT_PRED(4,  10, sse2, sse4)
263 PUT_PRED(8,  10, sse2, sse4)
264 PUT_PRED(12, 10, sse2, sse4)
265 PUT_PRED(16, 10, sse2, sse4)
266 PUT_PRED(24, 10, sse2, sse4)
267 PUT_PRED(32, 10, sse2, sse4)
268 PUT_PRED(48, 10, sse2, sse4)
269 PUT_PRED(64, 10, sse2, sse4)
270
271 void ff_hevc_dsp_init_x86(HEVCDSPContext *c, const int bit_depth)
272 {
273     int cpu_flags = av_get_cpu_flags();
274
275 #define SET_LUMA_FUNCS(tabname, funcname, depth, cf)      \
276     c->tabname[0] = funcname ## _4_  ## depth ## _ ## cf; \
277     c->tabname[1] = funcname ## _8_  ## depth ## _ ## cf; \
278     c->tabname[2] = funcname ## _12_ ## depth ## _ ## cf; \
279     c->tabname[3] = funcname ## _16_ ## depth ## _ ## cf; \
280     c->tabname[4] = funcname ## _24_ ## depth ## _ ## cf; \
281     c->tabname[5] = funcname ## _32_ ## depth ## _ ## cf; \
282     c->tabname[6] = funcname ## _48_ ## depth ## _ ## cf; \
283     c->tabname[7] = funcname ## _64_ ## depth ## _ ## cf;
284
285 #define SET_CHROMA_FUNCS(tabname, funcname, depth, cf)    \
286     c->tabname[1] = funcname ## _4_  ## depth ## _ ## cf; \
287     c->tabname[3] = funcname ## _8_  ## depth ## _ ## cf; \
288     c->tabname[4] = funcname ## _12_ ## depth ## _ ## cf; \
289     c->tabname[5] = funcname ## _16_ ## depth ## _ ## cf; \
290     c->tabname[6] = funcname ## _24_ ## depth ## _ ## cf; \
291     c->tabname[7] = funcname ## _32_ ## depth ## _ ## cf;
292
293 #define SET_QPEL_FUNCS(v, h, depth, cf, name) SET_LUMA_FUNCS  (put_hevc_qpel[v][h], name, depth, cf)
294 #define SET_EPEL_FUNCS(v, h, depth, cf, name) SET_CHROMA_FUNCS(put_hevc_epel[v][h], name, depth, cf)
295
296     if (bit_depth == 8) {
297         if (EXTERNAL_MMXEXT(cpu_flags)) {
298             c->idct_dc[0] = ff_hevc_idct_4x4_dc_8_mmxext;
299             c->idct_dc[1] = ff_hevc_idct_8x8_dc_8_mmxext;
300
301             c->add_residual[0] = ff_hevc_add_residual_4_8_mmxext;
302         }
303         if (EXTERNAL_SSE2(cpu_flags)) {
304             c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_8_sse2;
305             c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_8_sse2;
306
307             c->add_residual[1] = ff_hevc_add_residual_8_8_sse2;
308             c->add_residual[2] = ff_hevc_add_residual_16_8_sse2;
309             c->add_residual[3] = ff_hevc_add_residual_32_8_sse2;
310
311             c->idct_dc[1] = ff_hevc_idct_8x8_dc_8_sse2;
312             c->idct_dc[2] = ff_hevc_idct_16x16_dc_8_sse2;
313             c->idct_dc[3] = ff_hevc_idct_32x32_dc_8_sse2;
314
315             c->idct[0]    = ff_hevc_idct_4x4_8_sse2;
316             c->idct[1]    = ff_hevc_idct_8x8_8_sse2;
317
318             SET_QPEL_FUNCS(0, 0, 8, sse2, ff_hevc_get_pixels);
319             SET_EPEL_FUNCS(0, 0, 8, sse2, ff_hevc_get_pixels);
320
321             SET_LUMA_FUNCS(put_unweighted_pred,              ff_hevc_put_unweighted_pred,     8, sse2);
322             SET_LUMA_FUNCS(put_unweighted_pred_avg,          ff_hevc_put_unweighted_pred_avg, 8, sse2);
323             SET_CHROMA_FUNCS(put_unweighted_pred_chroma,     ff_hevc_put_unweighted_pred,     8, sse2);
324             SET_CHROMA_FUNCS(put_unweighted_pred_avg_chroma, ff_hevc_put_unweighted_pred_avg, 8, sse2);
325         }
326         if (EXTERNAL_SSSE3(cpu_flags)) {
327             SET_QPEL_FUNCS(0, 1, 8, ssse3, ff_hevc_qpel_h);
328             SET_QPEL_FUNCS(1, 0, 8, ssse3, ff_hevc_qpel_v);
329             SET_EPEL_FUNCS(0, 1, 8, ssse3, ff_hevc_epel_h);
330             SET_EPEL_FUNCS(1, 0, 8, ssse3, ff_hevc_epel_v);
331
332         }
333         if (EXTERNAL_AVX(cpu_flags)) {
334             c->idct[0] = ff_hevc_idct_4x4_8_avx;
335             c->idct[1] = ff_hevc_idct_8x8_8_avx;
336             c->add_residual[1] = ff_hevc_add_residual_8_8_avx;
337             c->add_residual[2] = ff_hevc_add_residual_16_8_avx;
338             c->add_residual[3] = ff_hevc_add_residual_32_8_avx;
339         }
340         if (EXTERNAL_AVX2(cpu_flags)) {
341             c->add_residual[3] = ff_hevc_add_residual_32_8_avx2;
342         }
343     } else if (bit_depth == 10) {
344         if (EXTERNAL_MMXEXT(cpu_flags)) {
345             c->idct_dc[0] = ff_hevc_idct_4x4_dc_10_mmxext;
346             c->idct_dc[1] = ff_hevc_idct_8x8_dc_10_mmxext;
347
348             c->add_residual[0] = ff_hevc_add_residual_4_10_mmxext;
349         }
350         if (EXTERNAL_SSE2(cpu_flags)) {
351             c->hevc_v_loop_filter_chroma = ff_hevc_v_loop_filter_chroma_10_sse2;
352             c->hevc_h_loop_filter_chroma = ff_hevc_h_loop_filter_chroma_10_sse2;
353
354             c->idct_dc[1] = ff_hevc_idct_8x8_dc_10_sse2;
355             c->idct_dc[2] = ff_hevc_idct_16x16_dc_10_sse2;
356             c->idct_dc[3] = ff_hevc_idct_32x32_dc_10_sse2;
357
358             c->idct[0]    = ff_hevc_idct_4x4_10_sse2;
359             c->idct[1]    = ff_hevc_idct_8x8_10_sse2;
360             SET_QPEL_FUNCS(0, 0, 10, sse2, ff_hevc_get_pixels);
361             SET_EPEL_FUNCS(0, 0, 10, sse2, ff_hevc_get_pixels);
362
363             SET_LUMA_FUNCS(put_unweighted_pred,              ff_hevc_put_unweighted_pred,     10, sse2);
364             SET_LUMA_FUNCS(put_unweighted_pred_avg,          ff_hevc_put_unweighted_pred_avg, 10, sse2);
365             SET_CHROMA_FUNCS(put_unweighted_pred_chroma,     ff_hevc_put_unweighted_pred,     10, sse2);
366             SET_CHROMA_FUNCS(put_unweighted_pred_avg_chroma, ff_hevc_put_unweighted_pred_avg, 10, sse2);
367
368             c->add_residual[1] = ff_hevc_add_residual_8_10_sse2;
369             c->add_residual[2] = ff_hevc_add_residual_16_10_sse2;
370             c->add_residual[3] = ff_hevc_add_residual_32_10_sse2;
371         }
372         if (EXTERNAL_AVX(cpu_flags)) {
373             c->idct[0] = ff_hevc_idct_4x4_10_avx;
374             c->idct[1] = ff_hevc_idct_8x8_10_avx;
375         }
376         if (EXTERNAL_AVX2(cpu_flags)) {
377             c->add_residual[2] = ff_hevc_add_residual_16_10_avx2;
378             c->add_residual[3] = ff_hevc_add_residual_32_10_avx2;
379         }
380     }
381
382 #if ARCH_X86_64
383     if (bit_depth == 8) {
384         if (EXTERNAL_SSE2(cpu_flags)) {
385             c->idct[2] = ff_hevc_idct_16x16_8_sse2;
386             c->idct[3] = ff_hevc_idct_32x32_8_sse2;
387         }
388         if (EXTERNAL_SSSE3(cpu_flags)) {
389             c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_8_ssse3;
390             c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_8_ssse3;
391         }
392
393         if (EXTERNAL_SSE4(cpu_flags)) {
394             SET_LUMA_FUNCS(weighted_pred,              ff_hevc_put_weighted_pred,     8, sse4);
395             SET_CHROMA_FUNCS(weighted_pred_chroma,     ff_hevc_put_weighted_pred,     8, sse4);
396             SET_LUMA_FUNCS(weighted_pred_avg,          ff_hevc_put_weighted_pred_avg, 8, sse4);
397             SET_CHROMA_FUNCS(weighted_pred_avg_chroma, ff_hevc_put_weighted_pred_avg, 8, sse4);
398         }
399
400         if (EXTERNAL_AVX(cpu_flags)) {
401 #if HAVE_AVX_EXTERNAL
402             SET_QPEL_FUNCS(1, 1, 8, avx, hevc_qpel_hv);
403             SET_EPEL_FUNCS(1, 1, 8, avx, hevc_epel_hv);
404 #endif /* HAVE_AVX_EXTERNAL */
405             c->idct[2] = ff_hevc_idct_16x16_8_avx;
406             c->idct[3] = ff_hevc_idct_32x32_8_avx;
407         }
408         if (EXTERNAL_AVX2(cpu_flags)) {
409             c->idct_dc[2] = ff_hevc_idct_16x16_dc_8_avx2;
410             c->idct_dc[3] = ff_hevc_idct_32x32_dc_8_avx2;
411         }
412     } else if (bit_depth == 10) {
413         if (EXTERNAL_SSE2(cpu_flags)) {
414             c->idct[2] = ff_hevc_idct_16x16_10_sse2;
415             c->idct[3] = ff_hevc_idct_32x32_10_sse2;
416         }
417         if (EXTERNAL_SSSE3(cpu_flags)) {
418             c->hevc_v_loop_filter_luma = ff_hevc_v_loop_filter_luma_10_ssse3;
419             c->hevc_h_loop_filter_luma = ff_hevc_h_loop_filter_luma_10_ssse3;
420         }
421         if (EXTERNAL_SSE4(cpu_flags)) {
422             SET_LUMA_FUNCS(weighted_pred,              ff_hevc_put_weighted_pred,     10, sse4);
423             SET_CHROMA_FUNCS(weighted_pred_chroma,     ff_hevc_put_weighted_pred,     10, sse4);
424             SET_LUMA_FUNCS(weighted_pred_avg,          ff_hevc_put_weighted_pred_avg, 10, sse4);
425             SET_CHROMA_FUNCS(weighted_pred_avg_chroma, ff_hevc_put_weighted_pred_avg, 10, sse4);
426         }
427         if (EXTERNAL_AVX(cpu_flags)) {
428 #if HAVE_AVX_EXTERNAL
429             SET_QPEL_FUNCS(0, 1, 10, avx, ff_hevc_qpel_h);
430             SET_QPEL_FUNCS(1, 0, 10, avx, ff_hevc_qpel_v);
431             SET_QPEL_FUNCS(1, 1, 10, avx, hevc_qpel_hv);
432             SET_EPEL_FUNCS(0, 1, 10, avx, ff_hevc_epel_h);
433             SET_EPEL_FUNCS(1, 0, 10, avx, ff_hevc_epel_v);
434             SET_EPEL_FUNCS(1, 1, 10, avx, hevc_epel_hv);
435 #endif /* HAVE_AVX_EXTERNAL */
436             c->idct[2] = ff_hevc_idct_16x16_10_avx;
437             c->idct[3] = ff_hevc_idct_32x32_10_avx;
438         }
439         if (EXTERNAL_AVX2(cpu_flags)) {
440             c->idct_dc[2] = ff_hevc_idct_16x16_dc_10_avx2;
441             c->idct_dc[3] = ff_hevc_idct_32x32_dc_10_avx2;
442         }
443     }
444 #endif /* ARCH_X86_64 */
445 }