2 * Copyright (C) 2006 Loren Merritt <lorenm@u.washington.edu>
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 #include "libavutil/cpu.h"
23 #include "libavcodec/vorbisdsp.h"
24 #include "dsputil_mmx.h" // for ff_pdw_80000000
28 static void vorbis_inverse_coupling_3dnow(float *mag, float *ang, int blocksize)
31 __asm__ volatile ("pxor %%mm7, %%mm7":);
32 for (i = 0; i < blocksize; i += 2) {
36 "movq %%mm0, %%mm2 \n\t"
37 "movq %%mm1, %%mm3 \n\t"
38 "pfcmpge %%mm7, %%mm2 \n\t" // m <= 0.0
39 "pfcmpge %%mm7, %%mm3 \n\t" // a <= 0.0
40 "pslld $31, %%mm2 \n\t" // keep only the sign bit
41 "pxor %%mm2, %%mm1 \n\t"
42 "movq %%mm3, %%mm4 \n\t"
43 "pand %%mm1, %%mm3 \n\t"
44 "pandn %%mm1, %%mm4 \n\t"
45 "pfadd %%mm0, %%mm3 \n\t" // a = m + ((a < 0) & (a ^ sign(m)))
46 "pfsub %%mm4, %%mm0 \n\t" // m = m + ((a > 0) & (a ^ sign(m)))
49 : "+m"(mag[i]), "+m"(ang[i])
53 __asm__ volatile ("femms");
57 static void vorbis_inverse_coupling_sse(float *mag, float *ang, int blocksize)
62 "movaps %0, %%xmm5 \n\t"
63 :: "m"(ff_pdw_80000000[0])
65 for (i = 0; i < blocksize; i += 4) {
67 "movaps %0, %%xmm0 \n\t"
68 "movaps %1, %%xmm1 \n\t"
69 "xorps %%xmm2, %%xmm2 \n\t"
70 "xorps %%xmm3, %%xmm3 \n\t"
71 "cmpleps %%xmm0, %%xmm2 \n\t" // m <= 0.0
72 "cmpleps %%xmm1, %%xmm3 \n\t" // a <= 0.0
73 "andps %%xmm5, %%xmm2 \n\t" // keep only the sign bit
74 "xorps %%xmm2, %%xmm1 \n\t"
75 "movaps %%xmm3, %%xmm4 \n\t"
76 "andps %%xmm1, %%xmm3 \n\t"
77 "andnps %%xmm1, %%xmm4 \n\t"
78 "addps %%xmm0, %%xmm3 \n\t" // a = m + ((a < 0) & (a ^ sign(m)))
79 "subps %%xmm4, %%xmm0 \n\t" // m = m + ((a > 0) & (a ^ sign(m)))
80 "movaps %%xmm3, %1 \n\t"
81 "movaps %%xmm0, %0 \n\t"
82 : "+m"(mag[i]), "+m"(ang[i])
89 void ff_vorbisdsp_init_x86(VorbisDSPContext *dsp)
92 int mm_flags = av_get_cpu_flags();
95 if (mm_flags & AV_CPU_FLAG_3DNOW)
96 dsp->vorbis_inverse_coupling = vorbis_inverse_coupling_3dnow;
97 #endif /* ARCH_X86_32 */
98 if (mm_flags & AV_CPU_FLAG_SSE)
99 dsp->vorbis_inverse_coupling = vorbis_inverse_coupling_sse;
100 #endif /* HAVE_INLINE_ASM */