1 ;******************************************************************************
2 ;* Copyright (c) 2012 Michael Niedermayer
4 ;* This file is part of FFmpeg.
6 ;* FFmpeg is free software; you can redistribute it and/or
7 ;* modify it under the terms of the GNU Lesser General Public
8 ;* License as published by the Free Software Foundation; either
9 ;* version 2.1 of the License, or (at your option) any later version.
11 ;* FFmpeg is distributed in the hope that it will be useful,
12 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
13 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 ;* Lesser General Public License for more details.
16 ;* You should have received a copy of the GNU Lesser General Public
17 ;* License along with FFmpeg; if not, write to the Free Software
18 ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 ;******************************************************************************
21 %include "libavutil/x86/x86inc.asm"
22 %include "libavutil/x86/x86util.asm"
26 flt2pm31: times 8 dd 4.6566129e-10
27 flt2p31 : times 8 dd 2147483648.0
28 flt2p15 : times 8 dd 32768.0
32 %macro INT16_TO_INT32 1
33 cglobal int16_to_int32_%1, 3, 3, 3, dst, src, len
39 jne int16_to_int32_u_int %+ SUFFIX
41 jne int16_to_int32_u_int %+ SUFFIX
43 int16_to_int32_u_int %+ SUFFIX
55 mov%1 [ dstq+2*lenq], m0
56 mov%1 [mmsize + dstq+2*lenq], m1
67 %macro INT32_TO_INT16 1
68 cglobal int32_to_int16_%1, 3, 3, 2, dst, src, len
74 jne int32_to_int16_u_int %+ SUFFIX
76 jne int32_to_int16_u_int %+ SUFFIX
78 int32_to_int16_u_int %+ SUFFIX
80 lea srcq, [srcq + 2*lenq]
84 mov%1 m0, [ srcq+2*lenq]
85 mov%1 m1, [mmsize + srcq+2*lenq]
89 mov%1 [ dstq+lenq], m0
95 ;to, from, a/u, log2_outsize, log_intsize, const
97 cglobal pack_2ch_%2_to_%1_%3, 3, 4, 5, dst, src, len, src2
98 mov src2q , [srcq+gprsize]
103 jne pack_2ch_%1_to_%2_u_int %+ SUFFIX
105 jne pack_2ch_%1_to_%2_u_int %+ SUFFIX
107 jne pack_2ch_%1_to_%2_u_int %+ SUFFIX
109 pack_2ch_%1_to_%2_u_int %+ SUFFIX
111 lea srcq , [srcq + (1<<%5)*lenq]
112 lea src2q, [src2q + (1<<%5)*lenq]
113 lea dstq , [dstq + (2<<%4)*lenq]
117 mov%3 m0, [ srcq +(1<<%5)*lenq]
119 mov%3 m2, [ src2q+(1<<%5)*lenq]
128 mov%3 m2, [mmsize + srcq +(1<<%5)*lenq]
130 mov%3 m4, [mmsize + src2q+(1<<%5)*lenq]
135 mov%3 [ dstq+(2<<%4)*lenq], m0
136 mov%3 [ mmsize + dstq+(2<<%4)*lenq], m1
138 mov%3 [2*mmsize + dstq+(2<<%4)*lenq], m2
139 mov%3 [3*mmsize + dstq+(2<<%4)*lenq], m3
140 add lenq, 4*mmsize/(2<<%4)
142 add lenq, 2*mmsize/(2<<%4)
149 cglobal %2_to_%1_%3, 3, 3, 6, dst, src, len
154 jne %2_to_%1_u_int %+ SUFFIX
156 jne %2_to_%1_u_int %+ SUFFIX
158 %2_to_%1_u_int %+ SUFFIX
160 lea srcq , [srcq + (1<<%5)*lenq]
161 lea dstq , [dstq + (1<<%4)*lenq]
165 mov%3 m0, [ srcq +(1<<%5)*lenq]
166 mov%3 m1, [ mmsize + srcq +(1<<%5)*lenq]
168 mov%3 m2, [2*mmsize + srcq +(1<<%5)*lenq]
169 mov%3 m3, [3*mmsize + srcq +(1<<%5)*lenq]
172 mov%3 [ dstq+(1<<%4)*lenq], m0
173 mov%3 [ mmsize + dstq+(1<<%4)*lenq], m1
175 mov%3 [2*mmsize + dstq+(1<<%4)*lenq], m2
176 mov%3 [3*mmsize + dstq+(1<<%4)*lenq], m3
177 add lenq, 4*mmsize/(1<<%4)
179 add lenq, 2*mmsize/(1<<%4)
185 %macro INT16_TO_INT32_N 0
197 %macro INT32_TO_INT16_N 0
207 %macro INT32_TO_FLOAT_INIT 0
210 %macro INT32_TO_FLOAT_N 0
217 %macro FLOAT_TO_INT32_INIT 0
220 %macro FLOAT_TO_INT32_N 0
231 %macro INT16_TO_FLOAT_INIT 0
234 %macro INT16_TO_FLOAT_N 0
246 %macro FLOAT_TO_INT16_INIT 0
249 %macro FLOAT_TO_INT16_N 0
274 PACK_2CH int16, int16, u, 1, 1
275 PACK_2CH int16, int16, a, 1, 1
276 PACK_2CH int32, int32, u, 2, 2
277 PACK_2CH int32, int32, a, 2, 2
278 PACK_2CH int32, int16, u, 2, 1, INT16_TO_INT32_N
279 PACK_2CH int32, int16, a, 2, 1, INT16_TO_INT32_N
280 PACK_2CH int16, int32, u, 1, 2, INT32_TO_INT16_N
281 PACK_2CH int16, int32, a, 1, 2, INT32_TO_INT16_N
284 CONV float, int32, u, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
285 CONV float, int32, a, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
286 CONV int32, float, u, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
287 CONV int32, float, a, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
288 CONV float, int16, u, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT
289 CONV float, int16, a, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT
290 CONV int16, float, u, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT
291 CONV int16, float, a, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT
293 PACK_2CH float, int32, u, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
294 PACK_2CH float, int32, a, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
295 PACK_2CH int32, float, u, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
296 PACK_2CH int32, float, a, 2, 2, FLOAT_TO_INT32_N, FLOAT_TO_INT32_INIT
297 PACK_2CH float, int16, u, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT
298 PACK_2CH float, int16, a, 2, 1, INT16_TO_FLOAT_N, INT16_TO_FLOAT_INIT
299 PACK_2CH int16, float, u, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT
300 PACK_2CH int16, float, a, 1, 2, FLOAT_TO_INT16_N, FLOAT_TO_INT16_INIT
305 CONV float, int32, u, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT
306 CONV float, int32, a, 2, 2, INT32_TO_FLOAT_N, INT32_TO_FLOAT_INIT