2 * Copyright (c) 2015 Paul B Mahol
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 #include "libavutil/attributes.h"
22 #include "libavutil/cpu.h"
23 #include "libavutil/x86/cpu.h"
24 #include "libavfilter/blend.h"
26 #define BLEND_FUNC(name, opt) \
27 void ff_blend_##name##_##opt(const uint8_t *top, ptrdiff_t top_linesize, \
28 const uint8_t *bottom, ptrdiff_t bottom_linesize, \
29 uint8_t *dst, ptrdiff_t dst_linesize, \
30 ptrdiff_t width, ptrdiff_t height, \
31 struct FilterParams *param, double *values, int starty);
33 BLEND_FUNC(addition, sse2)
34 BLEND_FUNC(addition, avx2)
35 BLEND_FUNC(grainmerge, sse2)
36 BLEND_FUNC(grainmerge, avx2)
37 BLEND_FUNC(average, sse2)
38 BLEND_FUNC(average, avx2)
41 BLEND_FUNC(darken, sse2)
42 BLEND_FUNC(darken, avx2)
43 BLEND_FUNC(grainextract, sse2)
44 BLEND_FUNC(grainextract, avx2)
45 BLEND_FUNC(multiply, sse2)
46 BLEND_FUNC(multiply, avx2)
47 BLEND_FUNC(screen, sse2)
48 BLEND_FUNC(screen, avx2)
49 BLEND_FUNC(hardmix, sse2)
50 BLEND_FUNC(hardmix, avx2)
51 BLEND_FUNC(divide, sse2)
52 BLEND_FUNC(lighten, sse2)
53 BLEND_FUNC(lighten, avx2)
56 BLEND_FUNC(phoenix, sse2)
57 BLEND_FUNC(phoenix, avx2)
58 BLEND_FUNC(subtract, sse2)
59 BLEND_FUNC(subtract, avx2)
62 BLEND_FUNC(difference, sse2)
63 BLEND_FUNC(difference, ssse3)
64 BLEND_FUNC(difference, avx2)
65 BLEND_FUNC(extremity, sse2)
66 BLEND_FUNC(extremity, ssse3)
67 BLEND_FUNC(extremity, avx2)
68 BLEND_FUNC(negation, sse2)
69 BLEND_FUNC(negation, ssse3)
70 BLEND_FUNC(negation, avx2)
73 BLEND_FUNC(addition_16, sse2)
74 BLEND_FUNC(addition_16, avx2)
75 BLEND_FUNC(and_16, sse2)
76 BLEND_FUNC(and_16, avx2)
77 BLEND_FUNC(darken_16, sse4)
78 BLEND_FUNC(darken_16, avx2)
79 BLEND_FUNC(difference_16, sse4)
80 BLEND_FUNC(difference_16, avx2)
81 BLEND_FUNC(lighten_16, sse4)
82 BLEND_FUNC(lighten_16, avx2)
83 BLEND_FUNC(or_16, sse2)
84 BLEND_FUNC(or_16, avx2)
85 BLEND_FUNC(phoenix_16, sse4)
86 BLEND_FUNC(phoenix_16, avx2)
87 BLEND_FUNC(subtract_16, sse2)
88 BLEND_FUNC(subtract_16, avx2)
89 BLEND_FUNC(xor_16, sse2)
90 BLEND_FUNC(xor_16, avx2)
91 #endif /* ARCH_X86_64 */
93 av_cold void ff_blend_init_x86(FilterParams *param, int is_16bit)
95 int cpu_flags = av_get_cpu_flags();
98 if (EXTERNAL_SSE2(cpu_flags) && param->opacity == 1) {
99 switch (param->mode) {
100 case BLEND_ADDITION: param->blend = ff_blend_addition_sse2; break;
101 case BLEND_GRAINMERGE: param->blend = ff_blend_grainmerge_sse2; break;
102 case BLEND_AND: param->blend = ff_blend_and_sse2; break;
103 case BLEND_AVERAGE: param->blend = ff_blend_average_sse2; break;
104 case BLEND_DARKEN: param->blend = ff_blend_darken_sse2; break;
105 case BLEND_GRAINEXTRACT: param->blend = ff_blend_grainextract_sse2; break;
106 case BLEND_DIVIDE: param->blend = ff_blend_divide_sse2; break;
107 case BLEND_HARDMIX: param->blend = ff_blend_hardmix_sse2; break;
108 case BLEND_LIGHTEN: param->blend = ff_blend_lighten_sse2; break;
109 case BLEND_MULTIPLY: param->blend = ff_blend_multiply_sse2; break;
110 case BLEND_OR: param->blend = ff_blend_or_sse2; break;
111 case BLEND_PHOENIX: param->blend = ff_blend_phoenix_sse2; break;
112 case BLEND_SCREEN: param->blend = ff_blend_screen_sse2; break;
113 case BLEND_SUBTRACT: param->blend = ff_blend_subtract_sse2; break;
114 case BLEND_XOR: param->blend = ff_blend_xor_sse2; break;
115 case BLEND_DIFFERENCE: param->blend = ff_blend_difference_sse2; break;
116 case BLEND_EXTREMITY: param->blend = ff_blend_extremity_sse2; break;
117 case BLEND_NEGATION: param->blend = ff_blend_negation_sse2; break;
120 if (EXTERNAL_SSSE3(cpu_flags) && param->opacity == 1) {
121 switch (param->mode) {
122 case BLEND_DIFFERENCE: param->blend = ff_blend_difference_ssse3; break;
123 case BLEND_EXTREMITY: param->blend = ff_blend_extremity_ssse3; break;
124 case BLEND_NEGATION: param->blend = ff_blend_negation_ssse3; break;
128 if (EXTERNAL_AVX2_FAST(cpu_flags) && param->opacity == 1) {
129 switch (param->mode) {
130 case BLEND_ADDITION: param->blend = ff_blend_addition_avx2; break;
131 case BLEND_GRAINMERGE: param->blend = ff_blend_grainmerge_avx2; break;
132 case BLEND_AND: param->blend = ff_blend_and_avx2; break;
133 case BLEND_AVERAGE: param->blend = ff_blend_average_avx2; break;
134 case BLEND_DARKEN: param->blend = ff_blend_darken_avx2; break;
135 case BLEND_GRAINEXTRACT: param->blend = ff_blend_grainextract_avx2; break;
136 case BLEND_HARDMIX: param->blend = ff_blend_hardmix_avx2; break;
137 case BLEND_LIGHTEN: param->blend = ff_blend_lighten_avx2; break;
138 case BLEND_MULTIPLY: param->blend = ff_blend_multiply_avx2; break;
139 case BLEND_OR: param->blend = ff_blend_or_avx2; break;
140 case BLEND_PHOENIX: param->blend = ff_blend_phoenix_avx2; break;
141 case BLEND_SCREEN: param->blend = ff_blend_screen_avx2; break;
142 case BLEND_SUBTRACT: param->blend = ff_blend_subtract_avx2; break;
143 case BLEND_XOR: param->blend = ff_blend_xor_avx2; break;
144 case BLEND_DIFFERENCE: param->blend = ff_blend_difference_avx2; break;
145 case BLEND_EXTREMITY: param->blend = ff_blend_extremity_avx2; break;
146 case BLEND_NEGATION: param->blend = ff_blend_negation_avx2; break;
149 } else { /* is_16_bit */
151 if (EXTERNAL_SSE2(cpu_flags) && param->opacity == 1) {
152 switch (param->mode) {
153 case BLEND_ADDITION: param->blend = ff_blend_addition_16_sse2; break;
154 case BLEND_AND: param->blend = ff_blend_and_16_sse2; break;
155 case BLEND_OR: param->blend = ff_blend_or_16_sse2; break;
156 case BLEND_SUBTRACT: param->blend = ff_blend_subtract_16_sse2; break;
157 case BLEND_XOR: param->blend = ff_blend_xor_16_sse2; break;
160 if (EXTERNAL_SSE4(cpu_flags) && param->opacity == 1) {
161 switch (param->mode) {
162 case BLEND_DARKEN: param->blend = ff_blend_darken_16_sse4; break;
163 case BLEND_DIFFERENCE: param->blend = ff_blend_difference_16_sse4; break;
164 case BLEND_LIGHTEN: param->blend = ff_blend_lighten_16_sse4; break;
165 case BLEND_PHOENIX: param->blend = ff_blend_phoenix_16_sse4; break;
168 if (EXTERNAL_AVX2_FAST(cpu_flags) && param->opacity == 1) {
169 switch (param->mode) {
170 case BLEND_ADDITION: param->blend = ff_blend_addition_16_avx2; break;
171 case BLEND_AND: param->blend = ff_blend_and_16_avx2; break;
172 case BLEND_DARKEN: param->blend = ff_blend_darken_16_avx2; break;
173 case BLEND_DIFFERENCE: param->blend = ff_blend_difference_16_avx2; break;
174 case BLEND_LIGHTEN: param->blend = ff_blend_lighten_16_avx2; break;
175 case BLEND_OR: param->blend = ff_blend_or_16_avx2; break;
176 case BLEND_PHOENIX: param->blend = ff_blend_phoenix_16_avx2; break;
177 case BLEND_SUBTRACT: param->blend = ff_blend_subtract_16_avx2; break;
178 case BLEND_XOR: param->blend = ff_blend_xor_16_avx2; break;
181 #endif /* ARCH_X86_64 */