1 /*****************************************************************************
2 * ac3_downmix_sse.c: ac3 downmix functions
3 *****************************************************************************
4 * Copyright (C) 1999, 2000, 2001 VideoLAN
5 * $Id: ac3_downmix_sse.c,v 1.1 2001/05/14 15:58:04 reno Exp $
7 * Authors: Renaud Dartus <reno@videolan.org>
8 * Aaron Holtzman <aholtzma@engr.uvic.ca>
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
23 *****************************************************************************/
33 #include "stream_control.h"
34 #include "input_ext-dec.h"
35 #include "ac3_decoder.h"
40 __asm__ (".float 0f0.7071068");
43 void downmix_3f_2r_to_2ch_sse (float * samples, dm_par_t * dm_par)
45 __asm__ __volatile__ (
47 "movl $64, %%ecx\n" /* loop counter */
49 "movss (%%ebx), %%xmm5\n" /* unit */
50 "shufps $0, %%xmm5, %%xmm5\n" /* unit | unit | unit | unit */
52 "movss 4(%%ebx), %%xmm6\n" /* clev */
53 "shufps $0, %%xmm6, %%xmm6\n" /* clev | clev | clev | clev */
55 "movss 8(%%ebx), %%xmm7\n" /* slev */
56 "shufps $0, %%xmm7, %%xmm7\n" /* slev | slev | slev | slev */
59 "movups (%%eax), %%xmm0\n" /* left */
60 "movups 2048(%%eax), %%xmm1\n" /* right */
61 "movups 1024(%%eax), %%xmm2\n" /* center */
62 "movups 3072(%%eax), %%xmm3\n" /* leftsur */
63 "movups 4096(%%eax), %%xmm4\n" /* rithgsur */
64 "mulps %%xmm5, %%xmm0\n"
65 "mulps %%xmm5, %%xmm1\n"
66 "mulps %%xmm6, %%xmm2\n"
67 "addps %%xmm2, %%xmm0\n"
68 "addps %%xmm2, %%xmm1\n"
69 "mulps %%xmm7, %%xmm3\n"
70 "mulps %%xmm7, %%xmm4\n"
71 "addps %%xmm3, %%xmm0\n"
72 "addps %%xmm4, %%xmm1\n"
74 "movups %%xmm0, (%%eax)\n"
75 "movups %%xmm1, 1024(%%eax)\n"
83 : "a" (samples), "b" (dm_par));
86 void downmix_2f_2r_to_2ch_sse (float *samples, dm_par_t * dm_par)
88 __asm__ __volatile__ (
90 "movl $64, %%ecx\n" /* loop counter */
92 "movss (%%ebx), %%xmm5\n" /* unit */
93 "shufps $0, %%xmm5, %%xmm5\n" /* unit | unit | unit | unit */
95 "movss 8(%%ebx), %%xmm7\n" /* slev */
96 "shufps $0, %%xmm7, %%xmm7\n" /* slev | slev | slev | slev */
99 "movups (%%eax), %%xmm0\n" /* left */
100 "movups 1024(%%eax), %%xmm1\n" /* right */
101 "movups 2048(%%eax), %%xmm3\n" /* leftsur */
102 "movups 3072(%%eax), %%xmm4\n" /* rightsur */
103 "mulps %%xmm5, %%xmm0\n"
104 "mulps %%xmm5, %%xmm1\n"
105 "mulps %%xmm7, %%xmm3\n"
106 "mulps %%xmm7, %%xmm4\n"
107 "addps %%xmm3, %%xmm0\n"
108 "addps %%xmm4, %%xmm1\n"
110 "movups %%xmm0, (%%eax)\n"
111 "movups %%xmm1, 1024(%%eax)\n"
119 : "a" (samples), "b" (dm_par));
121 void downmix_3f_1r_to_2ch_sse (float *samples, dm_par_t * dm_par)
123 __asm__ __volatile__ (
126 "movl $64, %%ecx\n" /* loop counter */
128 "movss (%%ebx), %%xmm5\n" /* unit */
129 "shufps $0, %%xmm5, %%xmm5\n" /* unit | unit | unit | unit */
131 "movss 4(%%ebx), %%xmm6\n" /* clev */
132 "shufps $0, %%xmm6, %%xmm6\n" /* clev | clev | clev | clev */
134 "movss 8(%%ebx), %%xmm7\n" /* slev */
135 "shufps $0, %%xmm7, %%xmm7\n" /* slev | slev | slev | slev */
138 "movups (%%eax), %%xmm0\n" /* left */
139 "movups 2048(%%eax), %%xmm1\n" /* right */
140 "movups 1024(%%eax), %%xmm2\n" /* center */
141 "movups 3072(%%eax), %%xmm3\n" /* sur */
142 "mulps %%xmm5, %%xmm0\n"
143 "mulps %%xmm5, %%xmm1\n"
144 "mulps %%xmm6, %%xmm2\n"
145 "addps %%xmm2, %%xmm0\n"
146 "mulps %%xmm7, %%xmm3\n"
147 "addps %%xmm2, %%xmm1\n"
148 "subps %%xmm3, %%xmm0\n"
149 "addps %%xmm3, %%xmm1\n"
151 "movups %%xmm0, (%%eax)\n"
152 "movups %%xmm1, 1024(%%eax)\n"
160 : "a" (samples), "b" (dm_par));
163 void downmix_2f_1r_to_2ch_sse (float *samples, dm_par_t * dm_par)
165 __asm__ __volatile__ (
167 "movl $64, %%ecx\n" /* loop counter */
169 "movss (%%ebx), %%xmm5\n" /* unit */
170 "shufps $0, %%xmm5, %%xmm5\n" /* unit | unit | unit | unit */
172 "movss 8(%%ebx), %%xmm7\n" /* slev */
173 "shufps $0, %%xmm7, %%xmm7\n" /* slev | slev | slev | slev */
176 "movups (%%eax), %%xmm0\n" /* left */
177 "movups 1024(%%eax), %%xmm1\n" /* right */
178 "movups 2048(%%eax), %%xmm3\n" /* sur */
179 "mulps %%xmm5, %%xmm0\n"
180 "mulps %%xmm5, %%xmm1\n"
181 "mulps %%xmm7, %%xmm3\n"
182 "subps %%xmm3, %%xmm0\n"
183 "addps %%xmm3, %%xmm1\n"
185 "movups %%xmm0, (%%eax)\n"
186 "movups %%xmm1, 1024(%%eax)\n"
194 : "a" (samples), "b" (dm_par));
198 void downmix_3f_0r_to_2ch_sse (float *samples, dm_par_t * dm_par)
200 __asm__ __volatile__ (
202 "movl $64, %%ecx\n" /* loop counter */
204 "movss (%%ebx), %%xmm5\n" /* unit */
205 "shufps $0, %%xmm5, %%xmm5\n" /* unit | unit | unit | unit */
207 "movss 4(%%ebx), %%xmm6\n" /* clev */
208 "shufps $0, %%xmm6, %%xmm6\n" /* clev | clev | clev | clev */
211 "movups (%%eax), %%xmm0\n" /*left */
212 "movups 2048(%%eax), %%xmm1\n" /* right */
213 "movups 1024(%%eax), %%xmm2\n" /* center */
214 "mulps %%xmm5, %%xmm0\n"
215 "mulps %%xmm5, %%xmm1\n"
216 "mulps %%xmm6, %%xmm2\n"
217 "addps %%xmm2, %%xmm0\n"
218 "addps %%xmm2, %%xmm1\n"
220 "movups %%xmm0, (%%eax)\n"
221 "movups %%xmm1, 1024(%%eax)\n"
229 : "a" (samples), "b" (dm_par));
232 void stream_sample_1ch_to_s16_sse (s16 *s16_samples, float *left)
234 __asm__ __volatile__ (
238 "movl $sqrt2, %%edx\n"
239 "movss (%%edx), %%xmm7\n"
240 "shufps $0, %%xmm7, %%xmm7\n" /* sqrt2 | sqrt2 | sqrt2 | sqrt2 */
244 "movups (%%ebx), %%xmm0\n" /* c3 | c2 | c1 | c0 */
245 "mulps %%xmm7, %%xmm0\n"
246 "movhlps %%xmm0, %%xmm2\n" /* c3 | c2 */
248 "cvtps2pi %%xmm0, %%mm0\n" /* c1 c0 --> mm0, int_32 */
249 "cvtps2pi %%xmm2, %%mm1\n" /* c3 c2 --> mm1, int_32 */
251 "packssdw %%mm0, %%mm0\n" /* c1 c1 c0 c0 --> mm0, int_16 */
252 "packssdw %%mm1, %%mm1\n" /* c3 c3 c2 c2 --> mm1, int_16 */
254 "movq %%mm0, (%%eax)\n"
255 "movq %%mm1, 8(%%eax)\n"
265 : "=a" (s16_samples), "=b" (left)
266 : "a" (s16_samples), "b" (left));
269 void stream_sample_2ch_to_s16_sse (s16 *s16_samples, float *left, float *right)
272 __asm__ __volatile__ (
277 "movups (%%ebx), %%xmm0\n" /* l3 | l2 | l1 | l0 */
278 "movups (%%edx), %%xmm1\n" /* r3 | r2 | r1 | r0 */
279 "movhlps %%xmm0, %%xmm2\n" /* l3 | l2 */
280 "movhlps %%xmm1, %%xmm3\n" /* r3 | r2 */
281 "unpcklps %%xmm1, %%xmm0\n" /* r1 | l1 | r0 | l0 */
282 "unpcklps %%xmm3, %%xmm2\n" /* r3 | l3 | r2 | l2 */
284 "cvtps2pi %%xmm0, %%mm0\n" /* r0 l0 --> mm0, int_32 */
285 "movhlps %%xmm0, %%xmm0\n"
286 "cvtps2pi %%xmm0, %%mm1\n" /* r1 l1 --> mm1, int_32 */
287 "cvtps2pi %%xmm2, %%mm2\n" /* r2 l2 --> mm2, int_32 */
288 "movhlps %%xmm2, %%xmm2\n"
289 "cvtps2pi %%xmm2, %%mm3\n" /* r3 l3 --> mm3, int_32 */
291 "packssdw %%mm1, %%mm0\n" /* r1 l1 r0 l0 --> mm0, int_16 */
292 "packssdw %%mm3, %%mm2\n" /* r3 l3 r2 l2 --> mm2, int_16 */
294 "movq %%mm0, (%%eax)\n"
295 "movq %%mm2, 8(%%eax)\n"
305 : "=a" (s16_samples), "=b" (left), "=d" (right)
306 : "a" (s16_samples), "b" (left), "d" (right));