--- /dev/null
- * This file is part of Libav.
+ /*
+ * ARM NEON optimised DSP functions
+ * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
+ *
- * Libav is free software; you can redistribute it and/or
++ * This file is part of FFmpeg.
+ *
- * Libav is distributed in the hope that it will be useful,
++ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
- * License along with Libav; if not, write to the Free Software
++ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+ #include "libavutil/attributes.h"
+ #include "libavutil/cpu.h"
+ #include "libavutil/arm/cpu.h"
+ #include "libavcodec/vorbisdsp.h"
+
+ void ff_vorbis_inverse_coupling_neon(float *mag, float *ang, int blocksize);
+
+ void ff_vorbisdsp_init_arm(VorbisDSPContext *c)
+ {
+ int cpu_flags = av_get_cpu_flags();
+
+ if (have_neon(cpu_flags)) {
+ c->vorbis_inverse_coupling = ff_vorbis_inverse_coupling_neon;
+ }
+ }
--- /dev/null
- * This file is part of Libav.
+ /*
+ * ARM NEON optimised DSP functions
+ * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
+ *
- * Libav is free software; you can redistribute it and/or
++ * This file is part of FFmpeg.
+ *
- * Libav is distributed in the hope that it will be useful,
++ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
- * License along with Libav; if not, write to the Free Software
++ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+ #include "libavutil/arm/asm.S"
+
+ function ff_vorbis_inverse_coupling_neon, export=1
+ vmov.i32 q10, #1<<31
+ subs r2, r2, #4
+ mov r3, r0
+ mov r12, r1
+ beq 3f
+
+ vld1.32 {d24-d25},[r1,:128]!
+ vld1.32 {d22-d23},[r0,:128]!
+ vcle.s32 q8, q12, #0
+ vand q9, q11, q10
+ veor q12, q12, q9
+ vand q2, q12, q8
+ vbic q3, q12, q8
+ vadd.f32 q12, q11, q2
+ vsub.f32 q11, q11, q3
+ 1: vld1.32 {d2-d3}, [r1,:128]!
+ vld1.32 {d0-d1}, [r0,:128]!
+ vcle.s32 q8, q1, #0
+ vand q9, q0, q10
+ veor q1, q1, q9
+ vst1.32 {d24-d25},[r3, :128]!
+ vst1.32 {d22-d23},[r12,:128]!
+ vand q2, q1, q8
+ vbic q3, q1, q8
+ vadd.f32 q1, q0, q2
+ vsub.f32 q0, q0, q3
+ subs r2, r2, #8
+ ble 2f
+ vld1.32 {d24-d25},[r1,:128]!
+ vld1.32 {d22-d23},[r0,:128]!
+ vcle.s32 q8, q12, #0
+ vand q9, q11, q10
+ veor q12, q12, q9
+ vst1.32 {d2-d3}, [r3, :128]!
+ vst1.32 {d0-d1}, [r12,:128]!
+ vand q2, q12, q8
+ vbic q3, q12, q8
+ vadd.f32 q12, q11, q2
+ vsub.f32 q11, q11, q3
+ b 1b
+
+ 2: vst1.32 {d2-d3}, [r3, :128]!
+ vst1.32 {d0-d1}, [r12,:128]!
+ it lt
+ bxlt lr
+
+ 3: vld1.32 {d2-d3}, [r1,:128]
+ vld1.32 {d0-d1}, [r0,:128]
+ vcle.s32 q8, q1, #0
+ vand q9, q0, q10
+ veor q1, q1, q9
+ vand q2, q1, q8
+ vbic q3, q1, q8
+ vadd.f32 q1, q0, q2
+ vsub.f32 q0, q0, q3
+ vst1.32 {d2-d3}, [r0,:128]!
+ vst1.32 {d0-d1}, [r1,:128]!
+ bx lr
+ endfunc
#include "mathops.h"
#include "mpegvideo.h"
#include "config.h"
- #include "vorbis.h"
+#include "diracdsp.h"
uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, };
uint32_t ff_squareTbl[512] = {0, };
--- /dev/null
- * This file is part of Libav.
+ /*
+ * Copyright (c) 2006 Luca Barbato <lu_zero@gentoo.org>
+ *
- * Libav is free software; you can redistribute it and/or
++ * This file is part of FFmpeg.
+ *
- * Libav is distributed in the hope that it will be useful,
++ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
- * License along with Libav; if not, write to the Free Software
++ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+ #include "config.h"
+ #if HAVE_ALTIVEC_H
+ #include <altivec.h>
+ #endif
+ #include "libavutil/cpu.h"
+ #include "libavutil/ppc/types_altivec.h"
+ #include "libavutil/ppc/util_altivec.h"
+ #include "libavcodec/vorbisdsp.h"
+
+ #if HAVE_ALTIVEC
+ static void vorbis_inverse_coupling_altivec(float *mag, float *ang,
+ int blocksize)
+ {
+ int i;
+ vector float m, a;
+ vector bool int t0, t1;
+ const vector unsigned int v_31 = //XXX
+ vec_add(vec_add(vec_splat_u32(15),vec_splat_u32(15)),vec_splat_u32(1));
+ for (i = 0; i < blocksize; i += 4) {
+ m = vec_ld(0, mag+i);
+ a = vec_ld(0, ang+i);
+ t0 = vec_cmple(m, (vector float)vec_splat_u32(0));
+ t1 = vec_cmple(a, (vector float)vec_splat_u32(0));
+ a = vec_xor(a, (vector float) vec_sl((vector unsigned int)t0, v_31));
+ t0 = (vector bool int)vec_and(a, t1);
+ t1 = (vector bool int)vec_andc(a, t1);
+ a = vec_sub(m, (vector float)t1);
+ m = vec_add(m, (vector float)t0);
+ vec_stl(a, 0, ang+i);
+ vec_stl(m, 0, mag+i);
+ }
+ }
+ #endif /* HAVE_ALTIVEC */
+
+ void ff_vorbisdsp_init_ppc(VorbisDSPContext* c)
+ {
+ #if HAVE_ALTIVEC
+ if (av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC) {
+ c->vorbis_inverse_coupling = vorbis_inverse_coupling_altivec;
+ }
+ #endif /* HAVE_ALTIVEC */
+ }
#define BITSTREAM_READER_LE
#include "libavutil/float_dsp.h"
+#include "libavutil/avassert.h"
#include "avcodec.h"
#include "get_bits.h"
- #include "dsputil.h"
#include "fft.h"
#include "fmtconvert.h"
#include "internal.h"
--- /dev/null
- * This file is part of Libav.
+ /*
- * Libav is free software; you can redistribute it and/or
++ * This file is part of FFmpeg.
+ *
- * Libav is distributed in the hope that it will be useful,
++ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
- * License along with Libav; if not, write to the Free Software
++ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+ #include "config.h"
+ #include "vorbisdsp.h"
+ #include "vorbis.h"
+
+ void ff_vorbisdsp_init(VorbisDSPContext *dsp)
+ {
+ dsp->vorbis_inverse_coupling = ff_vorbis_inverse_coupling;
+
+ if (ARCH_X86)
+ ff_vorbisdsp_init_x86(dsp);
+ if (ARCH_PPC)
+ ff_vorbisdsp_init_ppc(dsp);
+ if (ARCH_ARM)
+ ff_vorbisdsp_init_arm(dsp);
+ }
--- /dev/null
- * This file is part of Libav.
+ /*
- * Libav is free software; you can redistribute it and/or
++ * This file is part of FFmpeg.
+ *
- * Libav is distributed in the hope that it will be useful,
++ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
- * License along with Libav; if not, write to the Free Software
++ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+ #ifndef AVCODEC_VORBISDSP_H
+ #define AVCODEC_VORBISDSP_H
+
+ typedef struct VorbisDSPContext {
+ /* assume len is a multiple of 4, and arrays are 16-byte aligned */
+ void (*vorbis_inverse_coupling)(float *mag, float *ang, int blocksize);
+ } VorbisDSPContext;
+
+ void ff_vorbisdsp_init(VorbisDSPContext *dsp);
+
+ /* for internal use only */
+ void ff_vorbisdsp_init_x86(VorbisDSPContext *dsp);
+ void ff_vorbisdsp_init_arm(VorbisDSPContext *dsp);
+ void ff_vorbisdsp_init_ppc(VorbisDSPContext *dsp);
+
+ #endif /* AVCODEC_VORBISDSP_H */
avg_pixels8_mmxext(dst, src, stride, 8);
}
- static void vorbis_inverse_coupling_3dnow(float *mag, float *ang, int blocksize)
- {
- int i;
- __asm__ volatile ("pxor %%mm7, %%mm7":);
- for (i = 0; i < blocksize; i += 2) {
- __asm__ volatile (
- "movq %0, %%mm0 \n\t"
- "movq %1, %%mm1 \n\t"
- "movq %%mm0, %%mm2 \n\t"
- "movq %%mm1, %%mm3 \n\t"
- "pfcmpge %%mm7, %%mm2 \n\t" // m <= 0.0
- "pfcmpge %%mm7, %%mm3 \n\t" // a <= 0.0
- "pslld $31, %%mm2 \n\t" // keep only the sign bit
- "pxor %%mm2, %%mm1 \n\t"
- "movq %%mm3, %%mm4 \n\t"
- "pand %%mm1, %%mm3 \n\t"
- "pandn %%mm1, %%mm4 \n\t"
- "pfadd %%mm0, %%mm3 \n\t" // a = m + ((a < 0) & (a ^ sign(m)))
- "pfsub %%mm4, %%mm0 \n\t" // m = m + ((a > 0) & (a ^ sign(m)))
- "movq %%mm3, %1 \n\t"
- "movq %%mm0, %0 \n\t"
- : "+m"(mag[i]), "+m"(ang[i])
- :: "memory"
- );
- }
- __asm__ volatile ("femms");
- }
-
- static void vorbis_inverse_coupling_sse(float *mag, float *ang, int blocksize)
- {
- int i;
-
- __asm__ volatile (
- "movaps %0, %%xmm5 \n\t"
- :: "m"(ff_pdw_80000000[0])
- );
- for (i = 0; i < blocksize; i += 4) {
- __asm__ volatile (
- "movaps %0, %%xmm0 \n\t"
- "movaps %1, %%xmm1 \n\t"
- "xorps %%xmm2, %%xmm2 \n\t"
- "xorps %%xmm3, %%xmm3 \n\t"
- "cmpleps %%xmm0, %%xmm2 \n\t" // m <= 0.0
- "cmpleps %%xmm1, %%xmm3 \n\t" // a <= 0.0
- "andps %%xmm5, %%xmm2 \n\t" // keep only the sign bit
- "xorps %%xmm2, %%xmm1 \n\t"
- "movaps %%xmm3, %%xmm4 \n\t"
- "andps %%xmm1, %%xmm3 \n\t"
- "andnps %%xmm1, %%xmm4 \n\t"
- "addps %%xmm0, %%xmm3 \n\t" // a = m + ((a < 0) & (a ^ sign(m)))
- "subps %%xmm4, %%xmm0 \n\t" // m = m + ((a > 0) & (a ^ sign(m)))
- "movaps %%xmm3, %1 \n\t"
- "movaps %%xmm0, %0 \n\t"
- : "+m"(mag[i]), "+m"(ang[i])
- :: "memory"
- );
- }
- }
-
+/* only used in VP3/5/6 */
+static void put_vp_no_rnd_pixels8_l2_mmx(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h)
+{
+// START_TIMER
+ MOVQ_BFE(mm6);
+ __asm__ volatile(
+ "1: \n\t"
+ "movq (%1), %%mm0 \n\t"
+ "movq (%2), %%mm1 \n\t"
+ "movq (%1,%4), %%mm2 \n\t"
+ "movq (%2,%4), %%mm3 \n\t"
+ PAVGBP_MMX_NO_RND(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
+ "movq %%mm4, (%3) \n\t"
+ "movq %%mm5, (%3,%4) \n\t"
+
+ "movq (%1,%4,2), %%mm0 \n\t"
+ "movq (%2,%4,2), %%mm1 \n\t"
+ "movq (%1,%5), %%mm2 \n\t"
+ "movq (%2,%5), %%mm3 \n\t"
+ "lea (%1,%4,4), %1 \n\t"
+ "lea (%2,%4,4), %2 \n\t"
+ PAVGBP_MMX_NO_RND(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
+ "movq %%mm4, (%3,%4,2) \n\t"
+ "movq %%mm5, (%3,%5) \n\t"
+ "lea (%3,%4,4), %3 \n\t"
+ "subl $4, %0 \n\t"
+ "jnz 1b \n\t"
+ :"+r"(h), "+r"(a), "+r"(b), "+r"(dst)
+ :"r"((x86_reg)stride), "r"((x86_reg)3L*stride)
+ :"memory");
+// STOP_TIMER("put_vp_no_rnd_pixels8_l2_mmx")
+}
+static void put_vp_no_rnd_pixels16_l2_mmx(uint8_t *dst, const uint8_t *a, const uint8_t *b, int stride, int h)
+{
+ put_vp_no_rnd_pixels8_l2_mmx(dst, a, b, stride, h);
+ put_vp_no_rnd_pixels8_l2_mmx(dst+8, a+8, b+8, stride, h);
+}
+
+#if CONFIG_DIRAC_DECODER
+#define DIRAC_PIXOP(OPNAME, EXT)\
+void ff_ ## OPNAME ## _dirac_pixels8_ ## EXT(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
+{\
+ OPNAME ## _pixels8_ ## EXT(dst, src[0], stride, h);\
+}\
+void ff_ ## OPNAME ## _dirac_pixels16_ ## EXT(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
+{\
+ OPNAME ## _pixels16_ ## EXT(dst, src[0], stride, h);\
+}\
+void ff_ ## OPNAME ## _dirac_pixels32_ ## EXT(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
+{\
+ OPNAME ## _pixels16_ ## EXT(dst , src[0] , stride, h);\
+ OPNAME ## _pixels16_ ## EXT(dst+16, src[0]+16, stride, h);\
+}
+
+DIRAC_PIXOP(put, mmx)
+DIRAC_PIXOP(avg, mmx)
+DIRAC_PIXOP(avg, mmxext)
+
+#if HAVE_YASM
+void ff_put_dirac_pixels16_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h)
+{
+ ff_put_pixels16_sse2(dst, src[0], stride, h);
+}
+void ff_avg_dirac_pixels16_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h)
+{
+ ff_avg_pixels16_sse2(dst, src[0], stride, h);
+}
+void ff_put_dirac_pixels32_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h)
+{
+ ff_put_pixels16_sse2(dst , src[0] , stride, h);
+ ff_put_pixels16_sse2(dst+16, src[0]+16, stride, h);
+}
+void ff_avg_dirac_pixels32_sse2(uint8_t *dst, const uint8_t *src[5], int stride, int h)
+{
+ ff_avg_pixels16_sse2(dst , src[0] , stride, h);
+ ff_avg_pixels16_sse2(dst+16, src[0]+16, stride, h);
+}
+#endif
+#endif
+
+/* XXX: Those functions should be suppressed ASAP when all IDCTs are
+ * converted. */
+#if CONFIG_GPL
+static void ff_libmpeg2mmx_idct_put(uint8_t *dest, int line_size,
+ DCTELEM *block)
+{
+ ff_mmx_idct(block);
+ ff_put_pixels_clamped_mmx(block, dest, line_size);
+}
+
+static void ff_libmpeg2mmx_idct_add(uint8_t *dest, int line_size,
+ DCTELEM *block)
+{
+ ff_mmx_idct(block);
+ ff_add_pixels_clamped_mmx(block, dest, line_size);
+}
+
+static void ff_libmpeg2mmx2_idct_put(uint8_t *dest, int line_size,
+ DCTELEM *block)
+{
+ ff_mmxext_idct(block);
+ ff_put_pixels_clamped_mmx(block, dest, line_size);
+}
+
+static void ff_libmpeg2mmx2_idct_add(uint8_t *dest, int line_size,
+ DCTELEM *block)
+{
+ ff_mmxext_idct(block);
+ ff_add_pixels_clamped_mmx(block, dest, line_size);
+}
+#endif
+
static void vector_clipf_sse(float *dst, const float *src,
float min, float max, int len)
{
--- /dev/null
- * This file is part of Libav.
+ /*
+ * Copyright (C) 2006 Loren Merritt <lorenm@u.washington.edu>
+ *
- * Libav is free software; you can redistribute it and/or
++ * This file is part of FFmpeg.
+ *
- * Libav is distributed in the hope that it will be useful,
++ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
- * License along with Libav; if not, write to the Free Software
++ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+ #include "config.h"
+ #include "libavutil/cpu.h"
+ #include "libavcodec/vorbisdsp.h"
+ #include "dsputil_mmx.h" // for ff_pdw_80000000
+
+ #if HAVE_INLINE_ASM
+ #if ARCH_X86_32
+ static void vorbis_inverse_coupling_3dnow(float *mag, float *ang, int blocksize)
+ {
+ int i;
+ __asm__ volatile ("pxor %%mm7, %%mm7":);
+ for (i = 0; i < blocksize; i += 2) {
+ __asm__ volatile (
+ "movq %0, %%mm0 \n\t"
+ "movq %1, %%mm1 \n\t"
+ "movq %%mm0, %%mm2 \n\t"
+ "movq %%mm1, %%mm3 \n\t"
+ "pfcmpge %%mm7, %%mm2 \n\t" // m <= 0.0
+ "pfcmpge %%mm7, %%mm3 \n\t" // a <= 0.0
+ "pslld $31, %%mm2 \n\t" // keep only the sign bit
+ "pxor %%mm2, %%mm1 \n\t"
+ "movq %%mm3, %%mm4 \n\t"
+ "pand %%mm1, %%mm3 \n\t"
+ "pandn %%mm1, %%mm4 \n\t"
+ "pfadd %%mm0, %%mm3 \n\t" // a = m + ((a < 0) & (a ^ sign(m)))
+ "pfsub %%mm4, %%mm0 \n\t" // m = m + ((a > 0) & (a ^ sign(m)))
+ "movq %%mm3, %1 \n\t"
+ "movq %%mm0, %0 \n\t"
+ : "+m"(mag[i]), "+m"(ang[i])
+ :: "memory"
+ );
+ }
+ __asm__ volatile ("femms");
+ }
+ #endif
+
+ static void vorbis_inverse_coupling_sse(float *mag, float *ang, int blocksize)
+ {
+ int i;
+
+ __asm__ volatile (
+ "movaps %0, %%xmm5 \n\t"
+ :: "m"(ff_pdw_80000000[0])
+ );
+ for (i = 0; i < blocksize; i += 4) {
+ __asm__ volatile (
+ "movaps %0, %%xmm0 \n\t"
+ "movaps %1, %%xmm1 \n\t"
+ "xorps %%xmm2, %%xmm2 \n\t"
+ "xorps %%xmm3, %%xmm3 \n\t"
+ "cmpleps %%xmm0, %%xmm2 \n\t" // m <= 0.0
+ "cmpleps %%xmm1, %%xmm3 \n\t" // a <= 0.0
+ "andps %%xmm5, %%xmm2 \n\t" // keep only the sign bit
+ "xorps %%xmm2, %%xmm1 \n\t"
+ "movaps %%xmm3, %%xmm4 \n\t"
+ "andps %%xmm1, %%xmm3 \n\t"
+ "andnps %%xmm1, %%xmm4 \n\t"
+ "addps %%xmm0, %%xmm3 \n\t" // a = m + ((a < 0) & (a ^ sign(m)))
+ "subps %%xmm4, %%xmm0 \n\t" // m = m + ((a > 0) & (a ^ sign(m)))
+ "movaps %%xmm3, %1 \n\t"
+ "movaps %%xmm0, %0 \n\t"
+ : "+m"(mag[i]), "+m"(ang[i])
+ :: "memory"
+ );
+ }
+ }
+ #endif
+
+ void ff_vorbisdsp_init_x86(VorbisDSPContext *dsp)
+ {
+ #if HAVE_INLINE_ASM
+ int mm_flags = av_get_cpu_flags();
+
+ #if ARCH_X86_32
+ if (mm_flags & AV_CPU_FLAG_3DNOW)
+ dsp->vorbis_inverse_coupling = vorbis_inverse_coupling_3dnow;
+ #endif /* ARCH_X86_32 */
+ if (mm_flags & AV_CPU_FLAG_SSE)
+ dsp->vorbis_inverse_coupling = vorbis_inverse_coupling_sse;
+ #endif /* HAVE_INLINE_ASM */
+ }