1 ;******************************************************************************
2 ;* Copyright (c) 2012 Michael Niedermayer
4 ;* This file is part of FFmpeg.
6 ;* FFmpeg is free software; you can redistribute it and/or
7 ;* modify it under the terms of the GNU Lesser General Public
8 ;* License as published by the Free Software Foundation; either
9 ;* version 2.1 of the License, or (at your option) any later version.
11 ;* FFmpeg is distributed in the hope that it will be useful,
12 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
13 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 ;* Lesser General Public License for more details.
16 ;* You should have received a copy of the GNU Lesser General Public
17 ;* License along with FFmpeg; if not, write to the Free Software
18 ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 ;******************************************************************************
21 %include "libavutil/x86/x86inc.asm"
22 %include "libavutil/x86/x86util.asm"
33 cglobal mix_2_1_%1_float, 7, 7, 6, out, in1, in2, coeffp, index1, index2, len
36 jne mix_2_1_float_u_int %+ SUFFIX
38 jne mix_2_1_float_u_int %+ SUFFIX
40 jne mix_2_1_float_u_int %+ SUFFIX
42 mix_2_1_float_u_int %+ SUFFIX
44 VBROADCASTSS m4, [coeffpq + 4*index1q]
45 VBROADCASTSS m5, [coeffpq + 4*index2q]
53 mulps m0, m4, [in1q + lenq ]
54 mulps m1, m5, [in2q + lenq ]
55 mulps m2, m4, [in1q + lenq + mmsize]
56 mulps m3, m5, [in2q + lenq + mmsize]
58 movu m0, [in1q + lenq ]
59 movu m1, [in2q + lenq ]
60 movu m2, [in1q + lenq + mmsize]
61 movu m3, [in2q + lenq + mmsize]
69 mov%1 [outq + lenq ], m0
70 mov%1 [outq + lenq + mmsize], m2
77 cglobal mix_1_1_%1_float, 5, 5, 3, out, in, coeffp, index, len
80 jne mix_1_1_float_u_int %+ SUFFIX
82 jne mix_1_1_float_u_int %+ SUFFIX
84 mix_1_1_float_u_int %+ SUFFIX
86 VBROADCASTSS m2, [coeffpq + 4*indexq]
93 mulps m0, m2, [inq + lenq ]
94 mulps m1, m2, [inq + lenq + mmsize]
96 movu m0, [inq + lenq ]
97 movu m1, [inq + lenq + mmsize]
101 mov%1 [outq + lenq ], m0
102 mov%1 [outq + lenq + mmsize], m1
109 cglobal mix_1_1_%1_int16, 5, 5, 6, out, in, coeffp, index, len
112 jne mix_1_1_int16_u_int %+ SUFFIX
114 jne mix_1_1_int16_u_int %+ SUFFIX
116 mix_1_1_int16_u_int %+ SUFFIX
118 movd m4, [coeffpq + 4*indexq]
131 mov%1 m0, [inq + lenq ]
132 mov%1 m2, [inq + lenq + mmsize]
149 mov%1 [outq + lenq ], m0
150 mov%1 [outq + lenq + mmsize], m2
162 cglobal mix_2_1_%1_int16, 7, 7, 8, out, in1, in2, coeffp, index1, index2, len
165 jne mix_2_1_int16_u_int %+ SUFFIX
167 jne mix_2_1_int16_u_int %+ SUFFIX
169 jne mix_2_1_int16_u_int %+ SUFFIX
171 mix_2_1_int16_u_int %+ SUFFIX
173 movd m4, [coeffpq + 4*index1q]
174 movd m6, [coeffpq + 4*index2q]
189 mov%1 m0, [in1q + lenq ]
190 mov%1 m2, [in2q + lenq ]
195 mov%1 m2, [in1q + lenq + mmsize]
196 mov%1 m6, [in2q + lenq + mmsize]
215 mov%1 [outq + lenq ], m0
216 mov%1 [outq + lenq + mmsize], m2