1 ;*****************************************************************************
2 ;* x86-optimized functions for blend filter
4 ;* Copyright (C) 2015 Paul B Mahol
6 ;* This file is part of FFmpeg.
8 ;* FFmpeg is free software; you can redistribute it and/or
9 ;* modify it under the terms of the GNU Lesser General Public
10 ;* License as published by the Free Software Foundation; either
11 ;* version 2.1 of the License, or (at your option) any later version.
13 ;* FFmpeg is distributed in the hope that it will be useful,
14 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
15 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 ;* Lesser General Public License for more details.
18 ;* You should have received a copy of the GNU Lesser General Public
19 ;* License along with FFmpeg; if not, write to the Free Software
20 ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 ;******************************************************************************
23 %include "libavutil/x86/x86util.asm"
27 ps_255: times 4 dd 255.0
29 pw_128: times 8 dw 128
30 pw_255: times 8 dw 255
31 pb_127: times 16 db 127
32 pb_128: times 16 db 128
33 pb_255: times 16 db 255
39 cglobal blend_%1, 6, 9, %2, top, top_linesize, bottom, bottom_linesize, dst, dst_linesize, width, end, x
40 mov widthd, dword widthm
42 cglobal blend_%1, 5, 7, %2, top, top_linesize, bottom, bottom_linesize, dst, end, x
43 %define dst_linesizeq r5mp
54 add topq, top_linesizeq
55 add bottomq, bottom_linesizeq
56 add dstq, dst_linesizeq
69 movu m1, [bottomq + xq]
81 BLEND_SIMPLE addition, addusb
82 BLEND_SIMPLE subtract, subusb
83 BLEND_SIMPLE darken, minub
84 BLEND_SIMPLE lighten, maxub
86 BLEND_INIT difference128, 4
94 movh m1, [bottomq + xq]
105 %macro MULTIPLY 3 ; a, b, pw_1
106 pmullw %1, %2 ; xxxxxxxx a * b
111 psrlw %1, 8 ; 00xx00xx a * b / 255
114 %macro SCREEN 4 ; a, b, pw_1, pw_255
115 pxor %1, %4 ; 00xx00xx 255 - a
118 pxor %1, %4 ; 00xx00xx 255 - x / 255
121 BLEND_INIT multiply, 4
130 movh m0, [topq + xq] ; 0000xxxx
131 movh m1, [bottomq + xq]
132 punpcklbw m0, m2 ; 00xx00xx
137 packuswb m0, m0 ; 0000xxxx
152 movh m0, [topq + xq] ; 0000xxxx
153 movh m1, [bottomq + xq]
154 punpcklbw m0, m2 ; 00xx00xx
157 SCREEN m0, m1, m3, m4
159 packuswb m0, m0 ; 0000xxxx
166 BLEND_INIT average, 3
173 movh m1, [bottomq + xq]
184 BLEND_INIT addition128, 4
192 movh m1, [bottomq + xq]
203 BLEND_INIT hardmix, 5
212 movu m1, [bottomq + xq]
229 movd m0, [topq + xq] ; 000000xx
230 movd m1, [bottomq + xq]
231 punpcklbw m0, m2 ; 00000x0x
233 punpcklwd m0, m2 ; 000x000x
239 mulps m0, m3 ; a / b * 255
243 packssdw m0, m0 ; 00000x0x
244 packuswb m0, m0 ; 000000xx
251 BLEND_INIT phoenix, 4
258 movu m1, [bottomq + xq]
271 BLEND_INIT difference, 3
278 movh m1, [bottomq + xq]
289 BLEND_INIT negation, 5
297 movh m1, [bottomq + xq]