--downmix options added.
#
PLUGINS_DIR := alsa beos darwin dsp dummy \
dvd esd fb ggi glide gnome gtk \
- idct \
+ downmix idct imdct \
macosx mga \
motion \
mpeg null qt sdl \
#
PLUGINS_TARGETS := alsa/alsa beos/beos darwin/darwin dsp/dsp dummy/dummy \
dvd/dvd esd/esd fb/fb ggi/ggi glide/glide gnome/gnome gtk/gtk \
+ downmix/downmix downmix/downmixsse downmix/downmix3dn \
idct/idct idct/idctclassic idct/idctmmx idct/idctmmxext \
+ imdct/imdct imdct/imdctsse \
macosx/macosx mga/mga \
motion/motion motion/motionmmx motion/motionmmxext \
mpeg/es mpeg/ps mpeg/ts null/null qt/qt sdl/sdl \
src/ac3_decoder/ac3_bit_allocate.o \
src/ac3_decoder/ac3_mantissa.o \
src/ac3_decoder/ac3_rematrix.o \
- src/ac3_decoder/ac3_imdct.o \
- src/ac3_decoder/ac3_imdct_c.o \
- src/ac3_decoder/ac3_srfft.o \
- src/ac3_decoder/ac3_downmix.o \
- src/ac3_decoder/ac3_downmix_c.o
+ src/ac3_decoder/ac3_imdct.o
AC3_SPDIF = src/ac3_spdif/ac3_spdif.o \
src/ac3_spdif/ac3_iec958.o
ARCH=${host_cpu}
-BUILTINS="${BUILTINS} es ps ts yuv idct idctclassic motion"
+BUILTINS="${BUILTINS} es ps ts yuv idct idctclassic motion imdct downmix"
case x$host_os in
xmingw32msvc)
fi
rm -f conftest*
-echo $ac_n "checking if \$CC groks MMX EXT (SSE) inline assembly""... $ac_c" 1>&6
-echo "configure:3200: checking if \$CC groks MMX EXT (SSE) inline assembly" >&5
+echo $ac_n "checking if \$CC groks MMX EXT or SSE inline assembly""... $ac_c" 1>&6
+echo "configure:3200: checking if \$CC groks MMX EXT or SSE inline assembly" >&5
cat > conftest.$ac_ext <<EOF
#line 3202 "configure"
#include "confdefs.h"
EOF
if { (eval echo configure:3209: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then
rm -rf conftest*
- ACCEL_PLUGINS="${ACCEL_PLUGINS} idctmmxext motionmmxext"
+ ACCEL_PLUGINS="${ACCEL_PLUGINS} idctmmxext motionmmxext imdctsse downmix3dn downmixsse"
echo "$ac_t""yes" 1>&6
else
echo "configure: failed program was:" >&5
dnl
dnl default modules
dnl
-BUILTINS="${BUILTINS} es ps ts yuv idct idctclassic motion"
+BUILTINS="${BUILTINS} es ps ts yuv idct idctclassic motion imdct downmix"
dnl
dnl Accelerated modules
ACCEL_PLUGINS="${ACCEL_PLUGINS} ${MMX_PLUGINS}"
AC_MSG_RESULT(yes), AC_MSG_RESULT(no))
-AC_MSG_CHECKING([if \$CC groks MMX EXT (SSE) inline assembly])
+AC_MSG_CHECKING([if \$CC groks MMX EXT or SSE inline assembly])
AC_TRY_COMPILE([void quux(){void *p;asm("maskmovq %%mm1,%%mm2"::"r"(p));}],,
- ACCEL_PLUGINS="${ACCEL_PLUGINS} idctmmxext motionmmxext"
+ ACCEL_PLUGINS="${ACCEL_PLUGINS} idctmmxext motionmmxext imdctsse downmix3dn downmixsse"
AC_MSG_RESULT(yes), AC_MSG_RESULT(no))
dnl
.B \-\-spdif
Activate hardware AC3 pass-through mode.
.TP
+.B \-\-downmix <module>
+Specify a module for AC3 downmix: "downmix", "downmixsse", for instance.
+.TP
+.B \-\-imdct <module>
+Specify a module for AC3 IMDCT: "imdct", "imdctsse", for instance.
+.TP
.B \-\-novideo
Disable video output.
.TP
vlc_channels=<filename> channels list
.TP
.B Audio parameters:
- vlc_aout=<method name> audio method
- vlc_dsp=<filename> dsp device path
- vlc_stereo={1|0} stereo or mono output
- vlc_spdif={1|0} AC3 pass-through mode
- vlc_audio_rate=<rate> output rate
+ vlc_aout=<method name> audio method
+ vlc_dsp=<filename> dsp device path
+ vlc_stereo={1|0} stereo or mono output
+ vlc_spdif={1|0} AC3 pass-through mode
+ vlc_downmix=<method name> AC3 downmix method
+ vlc_imdct=<method name> AC3 IMDCT method
+ vlc_audio_rate=<rate> output rate
.TP
.B Video parameters:
vlc_vout=<method name> display method
vlc_grayscale={1|0} grayscale or color
vlc_fullscreen={1|0} full screen
vlc_overlay={1|0} overlay
+ vlc_motion=<method name> motion compensation method
vlc_idct=<method name> IDCT method
vlc_yuv=<method name> YUV method
vlc_synchro={I|I+|IP|IP+|IPB} synchro algorithm
--- /dev/null
+/*****************************************************************************
+ * ac3_downmix.h : AC3 downmix types
+ *****************************************************************************
+ * Copyright (C) 1999, 2000 VideoLAN
+ * $Id: ac3_downmix.h,v 1.3 2001/05/15 16:19:42 sam Exp $
+ *
+ * Authors: Michel Kaempf <maxx@via.ecp.fr>
+ * Renaud Dartus <reno@videolan.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
+ *****************************************************************************/
+
+typedef struct dm_par_s {
+ float unit;
+ float clev;
+ float slev;
+} dm_par_t;
+
+typedef struct downmix_s {
+ /* Module used and shortcuts */
+ struct module_s * p_module;
+ void (*pf_downmix_3f_2r_to_2ch)(float *, dm_par_t * dm_par);
+ void (*pf_downmix_3f_1r_to_2ch)(float *, dm_par_t * dm_par);
+ void (*pf_downmix_2f_2r_to_2ch)(float *, dm_par_t * dm_par);
+ void (*pf_downmix_2f_1r_to_2ch)(float *, dm_par_t * dm_par);
+ void (*pf_downmix_3f_0r_to_2ch)(float *, dm_par_t * dm_par);
+ void (*pf_stream_sample_2ch_to_s16)(s16 *, float *left, float *right);
+ void (*pf_stream_sample_1ch_to_s16)(s16 *, float *center);
+} downmix_t;
+
--- /dev/null
+/*****************************************************************************
+ * ac3_imdct.h : AC3 IMDCT types
+ *****************************************************************************
+ * Copyright (C) 1999, 2000 VideoLAN
+ * $Id: ac3_imdct.h,v 1.3 2001/05/15 16:19:42 sam Exp $
+ *
+ * Authors: Michel Kaempf <maxx@via.ecp.fr>
+ * Renaud Dartus <reno@videolan.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
+ *****************************************************************************/
+
+typedef struct complex_s {
+ float real;
+ float imag;
+} complex_t;
+
+#define N 512
+
+typedef struct imdct_s
+{
+ complex_t buf[N/4];
+
+ /* Delay buffer for time domain interleaving */
+ float delay[6][256];
+ float delay1[6][256];
+
+ /* Twiddle factors for IMDCT */
+ float xcos1[N/4];
+ float xsin1[N/4];
+ float xcos2[N/8];
+ float xsin2[N/8];
+
+ /* Twiddle factor LUT */
+ complex_t *w[7];
+ complex_t w_1[1];
+ complex_t w_2[2];
+ complex_t w_4[4];
+ complex_t w_8[8];
+ complex_t w_16[16];
+ complex_t w_32[32];
+ complex_t w_64[64];
+
+ float xcos_sin_sse[128 * 4] __attribute__((aligned(16)));
+
+ /* Module used and shortcuts */
+ struct module_s * p_module;
+ void (*pf_imdct_init) (struct imdct_s *);
+ //void (*pf_fft_64p) (complex_t *a);
+ void (*pf_imdct_256)(struct imdct_s *, float data[], float delay[]);
+ void (*pf_imdct_256_nol)(struct imdct_s *, float data[], float delay[]);
+ void (*pf_imdct_512)(struct imdct_s *, float data[], float delay[]);
+ void (*pf_imdct_512_nol)(struct imdct_s *, float data[], float delay[]);
+
+} imdct_t;
+
#define AOUT_SPDIF_VAR "vlc_spdif"
#define AOUT_SPDIF_DEFAULT 0
+/* Environment variable containing the AC3 downmix method */
+#define DOWNMIX_METHOD_VAR "vlc_downmix"
+
+/* Environment variable containing the AC3 IMDCT method */
+#define IMDCT_METHOD_VAR "vlc_imdct"
+
/* Volume */
#define VOLUME_DEFAULT 512
#define VOLUME_STEP 128
* modules.h : Module management functions.
*****************************************************************************
* Copyright (C) 2001 VideoLAN
- * $Id: modules.h,v 1.23 2001/05/06 04:32:02 sam Exp $
+ * $Id: modules.h,v 1.24 2001/05/15 16:19:42 sam Exp $
*
* Authors: Samuel Hocevar <sam@zoy.org>
*
#define MODULE_CAPABILITY_DECAPS 1 << 3 /* Decaps */
#define MODULE_CAPABILITY_ADEC 1 << 4 /* Audio decoder */
#define MODULE_CAPABILITY_VDEC 1 << 5 /* Video decoder */
-#define MODULE_CAPABILITY_MOTION 1 << 6 /* Video decoder */
+#define MODULE_CAPABILITY_MOTION 1 << 6 /* Motion compensation */
#define MODULE_CAPABILITY_IDCT 1 << 7 /* IDCT transformation */
#define MODULE_CAPABILITY_AOUT 1 << 8 /* Audio output */
#define MODULE_CAPABILITY_VOUT 1 << 9 /* Video output */
#define MODULE_CAPABILITY_YUV 1 << 10 /* YUV colorspace conversion */
-#define MODULE_CAPABILITY_AFX 1 << 11 /* Audio effects */
-#define MODULE_CAPABILITY_VFX 1 << 12 /* Video effects */
+#define MODULE_CAPABILITY_IMDCT 1 << 11 /* IMDCT transformation */
+#define MODULE_CAPABILITY_DOWNMIX 1 << 12 /* AC3 downmix */
/* FIXME: kludge */
struct input_area_s;
+struct imdct_s;
+struct complex_s;
+struct dm_par_s;
/* FIXME: not yet used */
typedef struct probedata_s
void ( * pf_end ) ( struct vout_thread_s * );
} yuv;
+ /* IMDCT plugin */
+ struct
+ {
+ void ( * pf_imdct_init ) ( struct imdct_s * );
+ void ( * pf_imdct_256 ) ( struct imdct_s *,
+ float data[], float delay[] );
+ void ( * pf_imdct_256_nol )( struct imdct_s *,
+ float data[], float delay[] );
+ void ( * pf_imdct_512 ) ( struct imdct_s *,
+ float data[], float delay[] );
+ void ( * pf_imdct_512_nol )( struct imdct_s *,
+ float data[], float delay[] );
+// void ( * pf_fft_64p ) ( struct complex_s * );
+
+ } imdct;
+
+ /* AC3 downmix plugin */
+ struct
+ {
+ void ( * pf_downmix_3f_2r_to_2ch ) ( float *, struct dm_par_s * );
+ void ( * pf_downmix_3f_1r_to_2ch ) ( float *, struct dm_par_s * );
+ void ( * pf_downmix_2f_2r_to_2ch ) ( float *, struct dm_par_s * );
+ void ( * pf_downmix_2f_1r_to_2ch ) ( float *, struct dm_par_s * );
+ void ( * pf_downmix_3f_0r_to_2ch ) ( float *, struct dm_par_s * );
+ void ( * pf_stream_sample_2ch_to_s16 ) ( s16 *, float *, float * );
+ void ( * pf_stream_sample_1ch_to_s16 ) ( s16 *, float * );
+
+ } downmix;
+
} functions;
} function_list_t;
function_list_t aout;
function_list_t vout;
function_list_t yuv;
- function_list_t afx;
- function_list_t vfx;
+ function_list_t imdct;
+ function_list_t downmix;
} module_functions_t;
--- /dev/null
+###############################################################################
+# vlc (VideoLAN Client) downmix module makefile
+# (c)2001 VideoLAN
+###############################################################################
+
+#
+# Objects
+#
+
+PLUGIN_DOWNMIX = downmix.o ac3_downmix_c.o
+PLUGIN_DOWNMIXSSE = downmixsse.o ac3_downmix_sse.o
+PLUGIN_DOWNMIX3DN = downmix3dn.o ac3_downmix_3dn.o
+
+BUILTIN_DOWNMIX = $(PLUGIN_DOWNMIX:%.o=BUILTIN_DOWNMIX_%.o)
+BUILTIN_DOWNMIXSSE = $(PLUGIN_DOWNMIXSSE:%.o=BUILTIN_DOWNMIXSSE_%.o)
+BUILTIN_DOWNMIX3DN = $(PLUGIN_DOWNMIX3DN:%.o=BUILTIN_DOWNMIX3DN_%.o)
+
+PLUGIN_C = $(PLUGIN_DOWNMIX) $(PLUGIN_DOWNMIXSSE) $(PLUGIN_DOWNMIX3DN)
+ALL_OBJ = $(PLUGIN_C) $(BUILTIN_DOWNMIX) $(BUILTIN_DOWNMIXSSE) $(BUILTIN_DOWNMIX3DN)
+
+#
+# Virtual targets
+#
+
+include ../../Makefile.modules
+
+$(BUILTIN_DOWNMIX): BUILTIN_DOWNMIX_%.o: .dep/%.d
+$(BUILTIN_DOWNMIX): BUILTIN_DOWNMIX_%.o: %.c
+ $(CC) $(CFLAGS) -DBUILTIN -DMODULE_NAME=downmix -c -o $@ $<
+
+$(BUILTIN_DOWNMIXSSE): BUILTIN_DOWNMIXSSE_%.o: .dep/%.d
+$(BUILTIN_DOWNMIXSSE): BUILTIN_DOWNMIXSSE_%.o: %.c
+ $(CC) $(CFLAGS) -DBUILTIN -DMODULE_NAME=downmixsse -c -o $@ $<
+
+$(BUILTIN_DOWNMIX3DN): BUILTIN_DOWNMIX3DN_%.o: .dep/%.d
+$(BUILTIN_DOWNMIX3DN): BUILTIN_DOWNMIX3DN_%.o: %.c
+ $(CC) $(CFLAGS) -DBUILTIN -DMODULE_NAME=downmix3dn -c -o $@ $<
+
+#
+# Real targets
+#
+
+../../lib/downmix.so: $(PLUGIN_DOWNMIX)
+ $(CC) $(PCFLAGS) -o $@ $^ $(PLCFLAGS)
+
+../../lib/downmix.a: $(BUILTIN_DOWNMIX)
+ ar r $@ $^
+ $(RANLIB) $@
+
+../../lib/downmixsse.so: $(PLUGIN_DOWNMIXSSE)
+ $(CC) $(PCFLAGS) -o $@ $^ $(PLCFLAGS)
+
+../../lib/downmixsse.a: $(BUILTIN_DOWNMIXSSE)
+ ar r $@ $^
+ $(RANLIB) $@
+
+../../lib/downmix3dn.so: $(PLUGIN_DOWNMIX3DN)
+ $(CC) $(PCFLAGS) -o $@ $^ $(PLCFLAGS)
+
+../../lib/downmix3dn.a: $(BUILTIN_DOWNMIX3DN)
+ ar r $@ $^
+ $(RANLIB) $@
+
--- /dev/null
+/*****************************************************************************
+ * ac3_downmix_3dn.c: accelerated 3D Now! ac3 downmix functions
+ *****************************************************************************
+ * Copyright (C) 1999, 2000, 2001 VideoLAN
+ * $Id: ac3_downmix_3dn.c,v 1.1 2001/05/15 16:19:42 sam Exp $
+ *
+ * Authors: Renaud Dartus <reno@videolan.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
+ *****************************************************************************/
+
+#define MODULE_NAME downmix3dn
+#include "modules_inner.h"
+
+/*****************************************************************************
+ * Preamble
+ *****************************************************************************/
+#include "defs.h"
+
+#include "config.h"
+#include "common.h"
+#include "threads.h"
+#include "mtime.h"
+#include "tests.h"
+
+#include "ac3_downmix.h"
+
+void sqrt2_3dn (void)
+{
+ __asm__ (".float 0f0.7071068");
+}
+
+void _M( downmix_3f_2r_to_2ch ) (float * samples, dm_par_t * dm_par)
+{
+ __asm__ __volatile__ (
+ "pushl %%ecx\n"
+ "movl $128, %%ecx\n" /* loop counter */
+
+ "movd (%%ebx), %%mm5\n" /* unit */
+ "punpckldq %%mm5, %%mm5\n" /* unit | unit */
+
+ "movd 4(%%ebx), %%mm6\n" /* clev */
+ "punpckldq %%mm6, %%mm6\n" /* clev | clev */
+
+ "movd 8(%%ebx), %%mm7\n" /* slev */
+ "punpckldq %%mm7, %%mm7\n" /* slev | slev */
+
+".loop:\n"
+ "movq (%%eax), %%mm0\n" /* left */
+ "movq 2048(%%eax), %%mm1\n" /* right */
+ "movq 1024(%%eax), %%mm2\n" /* center */
+ "movq 3072(%%eax), %%mm3\n" /* leftsur */
+ "movq 4096(%%eax), %%mm4\n" /* rightsur */
+ "pfmul %%mm5, %%mm0\n"
+ "pfmul %%mm5, %%mm1\n"
+ "pfmul %%mm6, %%mm2\n"
+ "pfadd %%mm2, %%mm0\n"
+ "pfadd %%mm2, %%mm1\n"
+ "pfmul %%mm7, %%mm3\n"
+ "pfmul %%mm7, %%mm4\n"
+ "pfadd %%mm3, %%mm0\n"
+ "pfadd %%mm4, %%mm1\n"
+
+ "movq %%mm0, (%%eax)\n"
+ "movq %%mm1, 1024(%%eax)\n"
+
+ "addl $8, %%eax\n"
+ "decl %%ecx\n"
+ "jnz .loop\n"
+
+ "popl %%ecx\n"
+ "femms\n"
+ : "=a" (samples)
+ : "a" (samples), "b" (dm_par));
+}
+
+void _M( downmix_2f_2r_to_2ch ) (float *samples, dm_par_t * dm_par)
+{
+ __asm__ __volatile__ (
+ "pushl %%ecx\n"
+ "movl $128, %%ecx\n" /* loop counter */
+
+ "movd (%%ebx), %%mm5\n" /* unit */
+ "punpckldq %%mm5, %%mm5\n" /* unit | unit */
+
+ "movd 8(%%ebx), %%mm7\n" /* slev */
+ "punpckldq %%mm7, %%mm7\n" /* slev | slev */
+
+".loop3:\n"
+ "movq (%%eax), %%mm0\n" /* left */
+ "movq 1024(%%eax), %%mm1\n" /* right */
+ "movq 2048(%%eax), %%mm3\n" /* leftsur */
+ "movq 3072(%%eax), %%mm4\n" /* rightsur */
+ "pfmul %%mm5, %%mm0\n"
+ "pfmul %%mm5, %%mm1\n"
+ "pfmul %%mm7, %%mm3\n"
+ "pfmul %%mm7, %%mm4\n"
+ "pfadd %%mm3, %%mm0\n"
+ "pfadd %%mm4, %%mm1\n"
+
+ "movq %%mm0, (%%eax)\n"
+ "movq %%mm1, 1024(%%eax)\n"
+
+ "addl $8, %%eax\n"
+ "decl %%ecx\n"
+ "jnz .loop3\n"
+
+ "popl %%ecx\n"
+ "femms\n"
+ : "=a" (samples)
+ : "a" (samples), "b" (dm_par));
+}
+
+void _M( downmix_3f_1r_to_2ch ) (float *samples, dm_par_t * dm_par)
+{
+ __asm__ __volatile__ (
+
+ "pushl %%ecx\n"
+ "movl $128, %%ecx\n" /* loop counter */
+
+ "movd (%%ebx), %%mm5\n" /* unit */
+ "punpckldq %%mm5, %%mm5\n" /* unit | unit */
+
+ "movd 4(%%ebx), %%mm6\n" /* clev */
+ "punpckldq %%mm6, %%mm6\n" /* clev | clev */
+
+ "movd 8(%%ebx), %%mm7\n" /* slev */
+ "punpckldq %%mm7, %%mm7\n" /* slev | slev */
+
+".loop4:\n"
+ "movq (%%eax), %%mm0\n" /* left */
+ "movq 2048(%%eax), %%mm1\n" /* right */
+ "movq 1024(%%eax), %%mm2\n" /* center */
+ "movq 3072(%%eax), %%mm3\n" /* sur */
+ "pfmul %%mm5, %%mm0\n"
+ "pfmul %%mm5, %%mm1\n"
+ "pfmul %%mm6, %%mm2\n"
+ "pfadd %%mm2, %%mm0\n"
+ "pfmul %%mm7, %%mm3\n"
+ "pfadd %%mm2, %%mm1\n"
+ "pfsub %%mm3, %%mm0\n"
+ "pfadd %%mm3, %%mm1\n"
+
+ "movq %%mm0, (%%eax)\n"
+ "movq %%mm1, 1024(%%eax)\n"
+
+ "addl $8, %%eax\n"
+ "decl %%ecx\n"
+ "jnz .loop4\n"
+
+ "popl %%ecx\n"
+ "femms\n"
+ : "=a" (samples)
+ : "a" (samples), "b" (dm_par));
+}
+
+void _M( downmix_2f_1r_to_2ch ) (float *samples, dm_par_t * dm_par)
+{
+ __asm__ __volatile__ (
+ "pushl %%ecx\n"
+ "movl $128, %%ecx\n" /* loop counter */
+
+ "movd (%%ebx), %%mm5\n" /* unit */
+ "punpckldq %%mm5, %%mm5\n" /* unit | unit */
+
+ "movd 8(%%ebx), %%mm7\n" /* slev */
+ "punpckldq %%mm7, %%mm7\n" /* slev | slev */
+
+".loop5:\n"
+ "movq (%%eax), %%mm0\n" /* left */
+ "movq 1024(%%eax), %%mm1\n" /* right */
+ "movq 2048(%%eax), %%mm3\n" /* sur */
+ "pfmul %%mm5, %%mm0\n"
+ "pfmul %%mm5, %%mm1\n"
+ "pfmul %%mm7, %%mm3\n"
+ "pfsub %%mm3, %%mm0\n"
+ "pfadd %%mm3, %%mm1\n"
+
+ "movq %%mm0, (%%eax)\n"
+ "movq %%mm1, 1024(%%eax)\n"
+
+ "addl $8, %%eax\n"
+ "decl %%ecx\n"
+ "jnz .loop5\n"
+
+ "popl %%ecx\n"
+ "femms\n"
+ : "=a" (samples)
+ : "a" (samples), "b" (dm_par));
+}
+
+void _M( downmix_3f_0r_to_2ch ) (float *samples, dm_par_t * dm_par)
+{
+ __asm__ __volatile__ (
+ "pushl %%ecx\n"
+ "movl $128, %%ecx\n" /* loop counter */
+
+ "movd (%%ebx), %%mm5\n" /* unit */
+ "punpckldq %%mm5, %%mm5\n" /* unit | unit */
+
+ "movd 4(%%ebx), %%mm6\n" /* clev */
+ "punpckldq %%mm6, %%mm6\n" /* clev | clev */
+
+".loop6:\n"
+ "movq (%%eax), %%mm0\n" /*left */
+ "movq 2048(%%eax), %%mm1\n" /* right */
+ "movq 1024(%%eax), %%mm2\n" /* center */
+ "pfmul %%mm5, %%mm0\n"
+ "pfmul %%mm5, %%mm1\n"
+ "pfmul %%mm6, %%mm2\n"
+ "pfadd %%mm2, %%mm0\n"
+ "pfadd %%mm2, %%mm1\n"
+
+ "movq %%mm0, (%%eax)\n"
+ "movq %%mm1, 1024(%%eax)\n"
+
+ "addl $8, %%eax\n"
+ "decl %%ecx\n"
+ "jnz .loop6\n"
+
+ "popl %%ecx\n"
+ "femms\n"
+ : "=a" (samples)
+ : "a" (samples), "b" (dm_par));
+}
+
+void _M( stream_sample_1ch_to_s16 ) (s16 *s16_samples, float *left)
+{
+ __asm__ __volatile__ (
+ "pushl %%ecx\n"
+ "pushl %%edx\n"
+
+ "movl $sqrt2_3dn, %%edx\n"
+ "movd (%%edx), %%mm7\n"
+ "punpckldq %%mm7, %%mm7\n" /* sqrt2 | sqrt2 */
+ "movl $128, %%ecx\n"
+
+".loop2:\n"
+ "movq (%%ebx), %%mm0\n" /* c1 | c0 */
+ "pfmul %%mm7, %%mm0\n"
+
+ "pf2id %%mm0, %%mm0\n" /* c1 c0 --> mm0, int_32 */
+
+ "packssdw %%mm0, %%mm0\n" /* c1 c1 c0 c0 --> mm0, int_16 */
+
+ "movq %%mm0, (%%eax)\n"
+ "addl $8, %%eax\n"
+ "addl $8, %%ebx\n"
+
+ "decl %%ecx\n"
+ "jnz .loop2\n"
+
+ "popl %%edx\n"
+ "popl %%ecx\n"
+ "femms\n"
+ : "=a" (s16_samples), "=b" (left)
+ : "a" (s16_samples), "b" (left));
+}
+
+void _M( stream_sample_2ch_to_s16 ) (s16 *s16_samples, float *left, float *right)
+{
+
+ __asm__ __volatile__ (
+ "pushl %%ecx\n"
+ "movl $128, %%ecx\n"
+
+".loop1:\n"
+ "movq (%%ebx), %%mm0\n" /* l1 | l0 */
+ "movq (%%edx), %%mm1\n" /* r1 | r0 */
+ "movq %%mm0, %%mm2\n" /* l1 | l0 */
+ "punpckldq %%mm1, %%mm0\n" /* r0 | l0 */
+ "punpckhdq %%mm1, %%mm2\n" /* r1 | l1 */
+
+ "pf2id %%mm0, %%mm0\n" /* r0 l0 --> mm0, int_32 */
+ "pf2id %%mm2, %%mm2\n" /* r0 l0 --> mm0, int_32 */
+
+ "packssdw %%mm2, %%mm0\n" /* r1 l1 r0 l0 --> mm0, int_16 */
+
+ "movq %%mm0, (%%eax)\n"
+ "movq %%mm2, 8(%%eax)\n"
+ "addl $8, %%eax\n"
+ "addl $8, %%ebx\n"
+ "addl $8, %%edx\n"
+
+ "decl %%ecx\n"
+ "jnz .loop1\n"
+
+ "popl %%ecx\n"
+ "femms\n"
+ : "=a" (s16_samples), "=b" (left), "=d" (right)
+ : "a" (s16_samples), "b" (left), "d" (right));
+
+}
+
/*****************************************************************************
- * ac3_downmix_c.c: ac3 downmix functions
+ * ac3_downmix_c.c: ac3 downmix functions in C
*****************************************************************************
* Copyright (C) 1999, 2000, 2001 VideoLAN
- * $Id: ac3_downmix_c.c,v 1.8 2001/05/14 15:58:04 reno Exp $
+ * $Id: ac3_downmix_c.c,v 1.1 2001/05/15 16:19:42 sam Exp $
*
* Authors: Renaud Dartus <reno@videolan.org>
* Aaron Holtzman <aholtzma@engr.uvic.ca>
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
*****************************************************************************/
+#define MODULE_NAME downmix
+#include "modules_inner.h"
+
+/*****************************************************************************
+ * Preamble
+ *****************************************************************************/
#include "defs.h"
#include <string.h> /* memcpy() */
#include "threads.h"
#include "mtime.h"
-#include "stream_control.h"
-#include "input_ext-dec.h"
+#include "ac3_downmix.h"
-#include "ac3_decoder.h"
-
-void downmix_3f_2r_to_2ch_c (float *samples, dm_par_t *dm_par)
+void _M( downmix_3f_2r_to_2ch ) (float *samples, dm_par_t *dm_par)
{
int i;
float *left, *right, *center, *left_sur, *right_sur;
}
}
-void downmix_2f_2r_to_2ch_c (float *samples, dm_par_t *dm_par)
+void _M( downmix_2f_2r_to_2ch ) (float *samples, dm_par_t *dm_par)
{
int i;
float *left, *right, *left_sur, *right_sur;
}
}
-void downmix_3f_1r_to_2ch_c (float *samples, dm_par_t *dm_par)
+void _M( downmix_3f_1r_to_2ch ) (float *samples, dm_par_t *dm_par)
{
int i;
float *left, *right, *center, *right_sur;
}
-void downmix_2f_1r_to_2ch_c (float *samples, dm_par_t *dm_par)
+void _M( downmix_2f_1r_to_2ch ) (float *samples, dm_par_t *dm_par)
{
int i;
float *left, *right, *right_sur;
}
-void downmix_3f_0r_to_2ch_c (float *samples, dm_par_t *dm_par)
+void _M( downmix_3f_0r_to_2ch ) (float *samples, dm_par_t *dm_par)
{
int i;
float *left, *right, *center;
}
-void stream_sample_2ch_to_s16_c (s16 *out_buf, float *left, float *right)
+void _M( stream_sample_2ch_to_s16 ) (s16 *out_buf, float *left, float *right)
{
int i;
for (i=0; i < 256; i++) {
}
-void stream_sample_1ch_to_s16_c (s16 *out_buf, float *center)
+void _M( stream_sample_1ch_to_s16 ) (s16 *out_buf, float *center)
{
int i;
float tmp;
*out_buf++ = tmp;
}
}
+
--- /dev/null
+/*****************************************************************************
+ * ac3_downmix_common.h: ac3 downmix functions headers
+ *****************************************************************************
+ * Copyright (C) 1999, 2000, 2001 VideoLAN
+ * $Id: ac3_downmix_common.h,v 1.1 2001/05/15 16:19:42 sam Exp $
+ *
+ * Authors: Renaud Dartus <reno@videolan.org>
+ * Aaron Holtzman <aholtzma@engr.uvic.ca>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
+ *****************************************************************************/
+
+void _M( downmix_3f_2r_to_2ch ) ( float *, dm_par_t * );
+void _M( downmix_2f_2r_to_2ch ) ( float *, dm_par_t * );
+void _M( downmix_3f_1r_to_2ch ) ( float *, dm_par_t * );
+void _M( downmix_2f_1r_to_2ch ) ( float *, dm_par_t * );
+void _M( downmix_3f_0r_to_2ch ) ( float *, dm_par_t * );
+void _M( stream_sample_2ch_to_s16 ) ( s16 *, float *, float * );
+void _M( stream_sample_1ch_to_s16 ) ( s16 *, float * );
+
--- /dev/null
+/*****************************************************************************
+ * ac3_downmix_sse.c: accelerated SSE ac3 downmix functions
+ *****************************************************************************
+ * Copyright (C) 1999, 2000, 2001 VideoLAN
+ * $Id: ac3_downmix_sse.c,v 1.1 2001/05/15 16:19:42 sam Exp $
+ *
+ * Authors: Renaud Dartus <reno@videolan.org>
+ * Aaron Holtzman <aholtzma@engr.uvic.ca>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
+ *****************************************************************************/
+
+#define MODULE_NAME downmixsse
+#include "modules_inner.h"
+
+/*****************************************************************************
+ * Preamble
+ *****************************************************************************/
+#include "defs.h"
+
+#include "config.h"
+#include "common.h"
+#include "threads.h"
+#include "mtime.h"
+#include "tests.h"
+
+#include "ac3_downmix.h"
+
+void sqrt2_sse (void)
+{
+ __asm__ (".float 0f0.7071068");
+}
+
+void _M( downmix_3f_2r_to_2ch ) (float * samples, dm_par_t * dm_par)
+{
+ __asm__ __volatile__ (
+ "pushl %%ecx\n"
+ "movl $64, %%ecx\n" /* loop counter */
+
+ "movss (%%ebx), %%xmm5\n" /* unit */
+ "shufps $0, %%xmm5, %%xmm5\n" /* unit | unit | unit | unit */
+
+ "movss 4(%%ebx), %%xmm6\n" /* clev */
+ "shufps $0, %%xmm6, %%xmm6\n" /* clev | clev | clev | clev */
+
+ "movss 8(%%ebx), %%xmm7\n" /* slev */
+ "shufps $0, %%xmm7, %%xmm7\n" /* slev | slev | slev | slev */
+
+".loop:\n"
+ "movups (%%eax), %%xmm0\n" /* left */
+ "movups 2048(%%eax), %%xmm1\n" /* right */
+ "movups 1024(%%eax), %%xmm2\n" /* center */
+ "movups 3072(%%eax), %%xmm3\n" /* leftsur */
+ "movups 4096(%%eax), %%xmm4\n" /* rithgsur */
+ "mulps %%xmm5, %%xmm0\n"
+ "mulps %%xmm5, %%xmm1\n"
+ "mulps %%xmm6, %%xmm2\n"
+ "addps %%xmm2, %%xmm0\n"
+ "addps %%xmm2, %%xmm1\n"
+ "mulps %%xmm7, %%xmm3\n"
+ "mulps %%xmm7, %%xmm4\n"
+ "addps %%xmm3, %%xmm0\n"
+ "addps %%xmm4, %%xmm1\n"
+
+ "movups %%xmm0, (%%eax)\n"
+ "movups %%xmm1, 1024(%%eax)\n"
+
+ "addl $16, %%eax\n"
+ "decl %%ecx\n"
+ "jnz .loop\n"
+
+ "popl %%ecx\n"
+ : "=a" (samples)
+ : "a" (samples), "b" (dm_par));
+}
+
+void _M( downmix_2f_2r_to_2ch ) (float *samples, dm_par_t * dm_par)
+{
+ __asm__ __volatile__ (
+ "pushl %%ecx\n"
+ "movl $64, %%ecx\n" /* loop counter */
+
+ "movss (%%ebx), %%xmm5\n" /* unit */
+ "shufps $0, %%xmm5, %%xmm5\n" /* unit | unit | unit | unit */
+
+ "movss 8(%%ebx), %%xmm7\n" /* slev */
+ "shufps $0, %%xmm7, %%xmm7\n" /* slev | slev | slev | slev */
+
+".loop3:\n"
+ "movups (%%eax), %%xmm0\n" /* left */
+ "movups 1024(%%eax), %%xmm1\n" /* right */
+ "movups 2048(%%eax), %%xmm3\n" /* leftsur */
+ "movups 3072(%%eax), %%xmm4\n" /* rightsur */
+ "mulps %%xmm5, %%xmm0\n"
+ "mulps %%xmm5, %%xmm1\n"
+ "mulps %%xmm7, %%xmm3\n"
+ "mulps %%xmm7, %%xmm4\n"
+ "addps %%xmm3, %%xmm0\n"
+ "addps %%xmm4, %%xmm1\n"
+
+ "movups %%xmm0, (%%eax)\n"
+ "movups %%xmm1, 1024(%%eax)\n"
+
+ "addl $16, %%eax\n"
+ "decl %%ecx\n"
+ "jnz .loop3\n"
+
+ "popl %%ecx\n"
+ : "=a" (samples)
+ : "a" (samples), "b" (dm_par));
+}
+
+void _M( downmix_3f_1r_to_2ch ) (float *samples, dm_par_t * dm_par)
+{
+ __asm__ __volatile__ (
+
+ "pushl %%ecx\n"
+ "movl $64, %%ecx\n" /* loop counter */
+
+ "movss (%%ebx), %%xmm5\n" /* unit */
+ "shufps $0, %%xmm5, %%xmm5\n" /* unit | unit | unit | unit */
+
+ "movss 4(%%ebx), %%xmm6\n" /* clev */
+ "shufps $0, %%xmm6, %%xmm6\n" /* clev | clev | clev | clev */
+
+ "movss 8(%%ebx), %%xmm7\n" /* slev */
+ "shufps $0, %%xmm7, %%xmm7\n" /* slev | slev | slev | slev */
+
+".loop4:\n"
+ "movups (%%eax), %%xmm0\n" /* left */
+ "movups 2048(%%eax), %%xmm1\n" /* right */
+ "movups 1024(%%eax), %%xmm2\n" /* center */
+ "movups 3072(%%eax), %%xmm3\n" /* sur */
+ "mulps %%xmm5, %%xmm0\n"
+ "mulps %%xmm5, %%xmm1\n"
+ "mulps %%xmm6, %%xmm2\n"
+ "addps %%xmm2, %%xmm0\n"
+ "mulps %%xmm7, %%xmm3\n"
+ "addps %%xmm2, %%xmm1\n"
+ "subps %%xmm3, %%xmm0\n"
+ "addps %%xmm3, %%xmm1\n"
+
+ "movups %%xmm0, (%%eax)\n"
+ "movups %%xmm1, 1024(%%eax)\n"
+
+ "addl $16, %%eax\n"
+ "decl %%ecx\n"
+ "jnz .loop4\n"
+
+ "popl %%ecx\n"
+ : "=a" (samples)
+ : "a" (samples), "b" (dm_par));
+
+}
+
+void _M( downmix_2f_1r_to_2ch ) (float *samples, dm_par_t * dm_par)
+{
+ __asm__ __volatile__ (
+ "pushl %%ecx\n"
+ "movl $64, %%ecx\n" /* loop counter */
+
+ "movss (%%ebx), %%xmm5\n" /* unit */
+ "shufps $0, %%xmm5, %%xmm5\n" /* unit | unit | unit | unit */
+
+ "movss 8(%%ebx), %%xmm7\n" /* slev */
+ "shufps $0, %%xmm7, %%xmm7\n" /* slev | slev | slev | slev */
+
+".loop5:\n"
+ "movups (%%eax), %%xmm0\n" /* left */
+ "movups 1024(%%eax), %%xmm1\n" /* right */
+ "movups 2048(%%eax), %%xmm3\n" /* sur */
+ "mulps %%xmm5, %%xmm0\n"
+ "mulps %%xmm5, %%xmm1\n"
+ "mulps %%xmm7, %%xmm3\n"
+ "subps %%xmm3, %%xmm0\n"
+ "addps %%xmm3, %%xmm1\n"
+
+ "movups %%xmm0, (%%eax)\n"
+ "movups %%xmm1, 1024(%%eax)\n"
+
+ "addl $16, %%eax\n"
+ "decl %%ecx\n"
+ "jnz .loop5\n"
+
+ "popl %%ecx\n"
+ : "=a" (samples)
+ : "a" (samples), "b" (dm_par));
+
+
+}
+
+void _M( downmix_3f_0r_to_2ch ) (float *samples, dm_par_t * dm_par)
+{
+ __asm__ __volatile__ (
+ "pushl %%ecx\n"
+ "movl $64, %%ecx\n" /* loop counter */
+
+ "movss (%%ebx), %%xmm5\n" /* unit */
+ "shufps $0, %%xmm5, %%xmm5\n" /* unit | unit | unit | unit */
+
+ "movss 4(%%ebx), %%xmm6\n" /* clev */
+ "shufps $0, %%xmm6, %%xmm6\n" /* clev | clev | clev | clev */
+
+".loop6:\n"
+ "movups (%%eax), %%xmm0\n" /*left */
+ "movups 2048(%%eax), %%xmm1\n" /* right */
+ "movups 1024(%%eax), %%xmm2\n" /* center */
+ "mulps %%xmm5, %%xmm0\n"
+ "mulps %%xmm5, %%xmm1\n"
+ "mulps %%xmm6, %%xmm2\n"
+ "addps %%xmm2, %%xmm0\n"
+ "addps %%xmm2, %%xmm1\n"
+
+ "movups %%xmm0, (%%eax)\n"
+ "movups %%xmm1, 1024(%%eax)\n"
+
+ "addl $16, %%eax\n"
+ "decl %%ecx\n"
+ "jnz .loop6\n"
+
+ "popl %%ecx\n"
+ : "=a" (samples)
+ : "a" (samples), "b" (dm_par));
+}
+
+void _M( stream_sample_1ch_to_s16 ) (s16 *s16_samples, float *left)
+{
+ __asm__ __volatile__ (
+ "pushl %%ecx\n"
+ "pushl %%edx\n"
+
+ "movl $sqrt2_sse, %%edx\n"
+ "movss (%%edx), %%xmm7\n"
+ "shufps $0, %%xmm7, %%xmm7\n" /* sqrt2 | sqrt2 | sqrt2 | sqrt2 */
+ "movl $64, %%ecx\n"
+
+".loop2:\n"
+ "movups (%%ebx), %%xmm0\n" /* c3 | c2 | c1 | c0 */
+ "mulps %%xmm7, %%xmm0\n"
+ "movhlps %%xmm0, %%xmm2\n" /* c3 | c2 */
+
+ "cvtps2pi %%xmm0, %%mm0\n" /* c1 c0 --> mm0, int_32 */
+ "cvtps2pi %%xmm2, %%mm1\n" /* c3 c2 --> mm1, int_32 */
+
+ "packssdw %%mm0, %%mm0\n" /* c1 c1 c0 c0 --> mm0, int_16 */
+ "packssdw %%mm1, %%mm1\n" /* c3 c3 c2 c2 --> mm1, int_16 */
+
+ "movq %%mm0, (%%eax)\n"
+ "movq %%mm1, 8(%%eax)\n"
+ "addl $16, %%eax\n"
+ "addl $16, %%ebx\n"
+
+ "decl %%ecx\n"
+ "jnz .loop2\n"
+
+ "popl %%edx\n"
+ "popl %%ecx\n"
+ "emms\n"
+ : "=a" (s16_samples), "=b" (left)
+ : "a" (s16_samples), "b" (left));
+}
+
+void _M( stream_sample_2ch_to_s16 ) (s16 *s16_samples, float *left, float *right)
+{
+
+ __asm__ __volatile__ (
+ "pushl %%ecx\n"
+ "movl $64, %%ecx\n"
+
+".loop1:\n"
+ "movups (%%ebx), %%xmm0\n" /* l3 | l2 | l1 | l0 */
+ "movups (%%edx), %%xmm1\n" /* r3 | r2 | r1 | r0 */
+ "movhlps %%xmm0, %%xmm2\n" /* l3 | l2 */
+ "movhlps %%xmm1, %%xmm3\n" /* r3 | r2 */
+ "unpcklps %%xmm1, %%xmm0\n" /* r1 | l1 | r0 | l0 */
+ "unpcklps %%xmm3, %%xmm2\n" /* r3 | l3 | r2 | l2 */
+
+ "cvtps2pi %%xmm0, %%mm0\n" /* r0 l0 --> mm0, int_32 */
+ "movhlps %%xmm0, %%xmm0\n"
+ "cvtps2pi %%xmm0, %%mm1\n" /* r1 l1 --> mm1, int_32 */
+ "cvtps2pi %%xmm2, %%mm2\n" /* r2 l2 --> mm2, int_32 */
+ "movhlps %%xmm2, %%xmm2\n"
+ "cvtps2pi %%xmm2, %%mm3\n" /* r3 l3 --> mm3, int_32 */
+
+ "packssdw %%mm1, %%mm0\n" /* r1 l1 r0 l0 --> mm0, int_16 */
+ "packssdw %%mm3, %%mm2\n" /* r3 l3 r2 l2 --> mm2, int_16 */
+
+ "movq %%mm0, (%%eax)\n"
+ "movq %%mm2, 8(%%eax)\n"
+ "addl $16, %%eax\n"
+ "addl $16, %%ebx\n"
+ "addl $16, %%edx\n"
+
+ "decl %%ecx\n"
+ "jnz .loop1\n"
+
+ "popl %%ecx\n"
+ "emms\n"
+ : "=a" (s16_samples), "=b" (left), "=d" (right)
+ : "a" (s16_samples), "b" (left), "d" (right));
+
+}
+
--- /dev/null
+/*****************************************************************************
+ * downmix.c : AC3 downmix module
+ *****************************************************************************
+ * Copyright (C) 1999, 2000 VideoLAN
+ * $Id: downmix.c,v 1.1 2001/05/15 16:19:42 sam Exp $
+ *
+ * Authors: Gaël Hendryckx <jimmy@via.ecp.fr>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
+ *****************************************************************************/
+
+#define MODULE_NAME downmix
+#include "modules_inner.h"
+
+/*****************************************************************************
+ * Preamble
+ *****************************************************************************/
+#include "defs.h"
+
+#include <stdlib.h>
+
+#include "config.h"
+#include "common.h"
+#include "threads.h"
+#include "mtime.h"
+#include "tests.h"
+
+#include "ac3_downmix.h"
+#include "ac3_downmix_common.h"
+
+#include "modules.h"
+
+/*****************************************************************************
+ * Local and extern prototypes.
+ *****************************************************************************/
+static void downmix_getfunctions( function_list_t * p_function_list );
+static int downmix_Probe ( probedata_t *p_data );
+
+/*****************************************************************************
+ * Build configuration tree.
+ *****************************************************************************/
+MODULE_CONFIG_START
+ADD_WINDOW( "Configuration for AC3 downmix module" )
+ ADD_COMMENT( "Ha, ha -- nothing to configure yet" )
+MODULE_CONFIG_END
+
+/*****************************************************************************
+ * InitModule: get the module structure and configuration.
+ *****************************************************************************
+ * We have to fill psz_name, psz_longname and psz_version. These variables
+ * will be strdup()ed later by the main application because the module can
+ * be unloaded later to save memory, and we want to be able to access this
+ * data even after the module has been unloaded.
+ *****************************************************************************/
+MODULE_INIT
+{
+ p_module->psz_name = MODULE_STRING;
+ p_module->psz_longname = "AC3 downmix module";
+ p_module->psz_version = VERSION;
+
+ p_module->i_capabilities = MODULE_CAPABILITY_NULL
+ | MODULE_CAPABILITY_DOWNMIX;
+
+ return( 0 );
+}
+
+/*****************************************************************************
+ * ActivateModule: set the module to an usable state.
+ *****************************************************************************
+ * This function fills the capability functions and the configuration
+ * structure. Once ActivateModule() has been called, the i_usage can
+ * be set to 0 and calls to NeedModule() be made to increment it. To unload
+ * the module, one has to wait until i_usage == 0 and call DeactivateModule().
+ *****************************************************************************/
+MODULE_ACTIVATE
+{
+ p_module->p_functions = malloc( sizeof( module_functions_t ) );
+ if( p_module->p_functions == NULL )
+ {
+ return( -1 );
+ }
+
+ downmix_getfunctions( &p_module->p_functions->downmix );
+
+ p_module->p_config = p_config;
+
+ return( 0 );
+}
+
+/*****************************************************************************
+ * DeactivateModule: make sure the module can be unloaded.
+ *****************************************************************************
+ * This function must only be called when i_usage == 0. If it successfully
+ * returns, i_usage can be set to -1 and the module unloaded. Be careful to
+ * lock usage_lock during the whole process.
+ *****************************************************************************/
+MODULE_DEACTIVATE
+{
+ free( p_module->p_functions );
+
+ return( 0 );
+}
+
+/* Following functions are local */
+
+/*****************************************************************************
+ * Functions exported as capabilities. They are declared as static so that
+ * we don't pollute the namespace too much.
+ *****************************************************************************/
+static void downmix_getfunctions( function_list_t * p_function_list )
+{
+ p_function_list->pf_probe = downmix_Probe;
+#define F p_function_list->functions.downmix
+ F.pf_downmix_3f_2r_to_2ch = _M( downmix_3f_2r_to_2ch );
+ F.pf_downmix_3f_1r_to_2ch = _M( downmix_3f_1r_to_2ch );
+ F.pf_downmix_2f_2r_to_2ch = _M( downmix_2f_2r_to_2ch );
+ F.pf_downmix_2f_1r_to_2ch = _M( downmix_2f_1r_to_2ch );
+ F.pf_downmix_3f_0r_to_2ch = _M( downmix_3f_0r_to_2ch );
+ F.pf_stream_sample_2ch_to_s16 = _M( stream_sample_2ch_to_s16 );
+ F.pf_stream_sample_1ch_to_s16 = _M( stream_sample_1ch_to_s16 );
+#undef F
+}
+
+/*****************************************************************************
+ * downmix_Probe: returns a preference score
+ *****************************************************************************/
+static int downmix_Probe( probedata_t *p_data )
+{
+ if( TestMethod( DOWNMIX_METHOD_VAR, "downmix" ) )
+ {
+ return( 999 );
+ }
+
+ /* This plugin always works */
+ return( 50 );
+}
+
--- /dev/null
+/*****************************************************************************
+ * downmix3dn.c : accelerated 3D Now! AC3 downmix module
+ *****************************************************************************
+ * Copyright (C) 1999, 2000 VideoLAN
+ * $Id: downmix3dn.c,v 1.1 2001/05/15 16:19:42 sam Exp $
+ *
+ * Authors: Gaël Hendryckx <jimmy@via.ecp.fr>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
+ *****************************************************************************/
+
+#define MODULE_NAME downmix3dn
+#include "modules_inner.h"
+
+/*****************************************************************************
+ * Preamble
+ *****************************************************************************/
+#include "defs.h"
+
+#include <stdlib.h>
+
+#include "config.h"
+#include "common.h"
+#include "threads.h"
+#include "mtime.h"
+#include "tests.h"
+
+#include "ac3_downmix.h"
+#include "ac3_downmix_common.h"
+
+#include "modules.h"
+
+/*****************************************************************************
+ * Local and extern prototypes.
+ *****************************************************************************/
+static void downmix_getfunctions( function_list_t * p_function_list );
+static int downmix_Probe ( probedata_t *p_data );
+
+/*****************************************************************************
+ * Build configuration tree.
+ *****************************************************************************/
+MODULE_CONFIG_START
+ADD_WINDOW( "Configuration for AC3 downmix3dn module" )
+ ADD_COMMENT( "Ha, ha -- nothing to configure yet" )
+MODULE_CONFIG_END
+
+/*****************************************************************************
+ * InitModule: get the module structure and configuration.
+ *****************************************************************************
+ * We have to fill psz_name, psz_longname and psz_version. These variables
+ * will be strdup()ed later by the main application because the module can
+ * be unloaded later to save memory, and we want to be able to access this
+ * data even after the module has been unloaded.
+ *****************************************************************************/
+MODULE_INIT
+{
+ p_module->psz_name = MODULE_STRING;
+ p_module->psz_longname = "3D Now! AC3 downmix module";
+ p_module->psz_version = VERSION;
+
+ p_module->i_capabilities = MODULE_CAPABILITY_NULL
+ | MODULE_CAPABILITY_DOWNMIX;
+
+ return( 0 );
+}
+
+/*****************************************************************************
+ * ActivateModule: set the module to an usable state.
+ *****************************************************************************
+ * This function fills the capability functions and the configuration
+ * structure. Once ActivateModule() has been called, the i_usage can
+ * be set to 0 and calls to NeedModule() be made to increment it. To unload
+ * the module, one has to wait until i_usage == 0 and call DeactivateModule().
+ *****************************************************************************/
+MODULE_ACTIVATE
+{
+ p_module->p_functions = malloc( sizeof( module_functions_t ) );
+ if( p_module->p_functions == NULL )
+ {
+ return( -1 );
+ }
+
+ downmix_getfunctions( &p_module->p_functions->downmix );
+
+ p_module->p_config = p_config;
+
+ return( 0 );
+}
+
+/*****************************************************************************
+ * DeactivateModule: make sure the module can be unloaded.
+ *****************************************************************************
+ * This function must only be called when i_usage == 0. If it successfully
+ * returns, i_usage can be set to -1 and the module unloaded. Be careful to
+ * lock usage_lock during the whole process.
+ *****************************************************************************/
+MODULE_DEACTIVATE
+{
+ free( p_module->p_functions );
+
+ return( 0 );
+}
+
+/* Following functions are local */
+
+/*****************************************************************************
+ * Functions exported as capabilities. They are declared as static so that
+ * we don't pollute the namespace too much.
+ *****************************************************************************/
+static void downmix_getfunctions( function_list_t * p_function_list )
+{
+ p_function_list->pf_probe = downmix_Probe;
+#define F p_function_list->functions.downmix
+ F.pf_downmix_3f_2r_to_2ch = _M( downmix_3f_2r_to_2ch );
+ F.pf_downmix_3f_1r_to_2ch = _M( downmix_3f_1r_to_2ch );
+ F.pf_downmix_2f_2r_to_2ch = _M( downmix_2f_2r_to_2ch );
+ F.pf_downmix_2f_1r_to_2ch = _M( downmix_2f_1r_to_2ch );
+ F.pf_downmix_3f_0r_to_2ch = _M( downmix_3f_0r_to_2ch );
+ F.pf_stream_sample_2ch_to_s16 = _M( stream_sample_2ch_to_s16 );
+ F.pf_stream_sample_1ch_to_s16 = _M( stream_sample_1ch_to_s16 );
+#undef F
+}
+
+/*****************************************************************************
+ * downmix_Probe: returns a preference score
+ *****************************************************************************/
+static int downmix_Probe( probedata_t *p_data )
+{
+ if( !TestCPU( CPU_CAPABILITY_3DNOW ) )
+ {
+ return( 0 );
+ }
+
+ if( TestMethod( DOWNMIX_METHOD_VAR, "downmix3dn" ) )
+ {
+ return( 999 );
+ }
+
+ /* This plugin always works */
+ return( 200 );
+}
+
--- /dev/null
+/*****************************************************************************
+ * downmixsse.c : accelerated SSE AC3 downmix module
+ *****************************************************************************
+ * Copyright (C) 1999, 2000 VideoLAN
+ * $Id: downmixsse.c,v 1.1 2001/05/15 16:19:42 sam Exp $
+ *
+ * Authors: Gaël Hendryckx <jimmy@via.ecp.fr>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
+ *****************************************************************************/
+
+#define MODULE_NAME downmixsse
+#include "modules_inner.h"
+
+/*****************************************************************************
+ * Preamble
+ *****************************************************************************/
+#include "defs.h"
+
+#include <stdlib.h>
+
+#include "config.h"
+#include "common.h"
+#include "threads.h"
+#include "mtime.h"
+#include "tests.h"
+
+#include "ac3_downmix.h"
+#include "ac3_downmix_common.h"
+
+#include "modules.h"
+
+/*****************************************************************************
+ * Local and extern prototypes.
+ *****************************************************************************/
+static void downmix_getfunctions( function_list_t * p_function_list );
+static int downmix_Probe ( probedata_t *p_data );
+
+/*****************************************************************************
+ * Build configuration tree.
+ *****************************************************************************/
+MODULE_CONFIG_START
+ADD_WINDOW( "Configuration for AC3 downmixsse module" )
+ ADD_COMMENT( "Ha, ha -- nothing to configure yet" )
+MODULE_CONFIG_END
+
+/*****************************************************************************
+ * InitModule: get the module structure and configuration.
+ *****************************************************************************
+ * We have to fill psz_name, psz_longname and psz_version. These variables
+ * will be strdup()ed later by the main application because the module can
+ * be unloaded later to save memory, and we want to be able to access this
+ * data even after the module has been unloaded.
+ *****************************************************************************/
+MODULE_INIT
+{
+ p_module->psz_name = MODULE_STRING;
+ p_module->psz_longname = "SSE AC3 downmix module";
+ p_module->psz_version = VERSION;
+
+ p_module->i_capabilities = MODULE_CAPABILITY_NULL
+ | MODULE_CAPABILITY_DOWNMIX;
+
+ return( 0 );
+}
+
+/*****************************************************************************
+ * ActivateModule: set the module to an usable state.
+ *****************************************************************************
+ * This function fills the capability functions and the configuration
+ * structure. Once ActivateModule() has been called, the i_usage can
+ * be set to 0 and calls to NeedModule() be made to increment it. To unload
+ * the module, one has to wait until i_usage == 0 and call DeactivateModule().
+ *****************************************************************************/
+MODULE_ACTIVATE
+{
+ p_module->p_functions = malloc( sizeof( module_functions_t ) );
+ if( p_module->p_functions == NULL )
+ {
+ return( -1 );
+ }
+
+ downmix_getfunctions( &p_module->p_functions->downmix );
+
+ p_module->p_config = p_config;
+
+ return( 0 );
+}
+
+/*****************************************************************************
+ * DeactivateModule: make sure the module can be unloaded.
+ *****************************************************************************
+ * This function must only be called when i_usage == 0. If it successfully
+ * returns, i_usage can be set to -1 and the module unloaded. Be careful to
+ * lock usage_lock during the whole process.
+ *****************************************************************************/
+MODULE_DEACTIVATE
+{
+ free( p_module->p_functions );
+
+ return( 0 );
+}
+
+/* Following functions are local */
+
+/*****************************************************************************
+ * Functions exported as capabilities. They are declared as static so that
+ * we don't pollute the namespace too much.
+ *****************************************************************************/
+static void downmix_getfunctions( function_list_t * p_function_list )
+{
+ p_function_list->pf_probe = downmix_Probe;
+#define F p_function_list->functions.downmix
+ F.pf_downmix_3f_2r_to_2ch = _M( downmix_3f_2r_to_2ch );
+ F.pf_downmix_3f_1r_to_2ch = _M( downmix_3f_1r_to_2ch );
+ F.pf_downmix_2f_2r_to_2ch = _M( downmix_2f_2r_to_2ch );
+ F.pf_downmix_2f_1r_to_2ch = _M( downmix_2f_1r_to_2ch );
+ F.pf_downmix_3f_0r_to_2ch = _M( downmix_3f_0r_to_2ch );
+ F.pf_stream_sample_2ch_to_s16 = _M( stream_sample_2ch_to_s16 );
+ F.pf_stream_sample_1ch_to_s16 = _M( stream_sample_1ch_to_s16 );
+#undef F
+}
+
+/*****************************************************************************
+ * downmix_Probe: returns a preference score
+ *****************************************************************************/
+static int downmix_Probe( probedata_t *p_data )
+{
+ if( !TestCPU( CPU_CAPABILITY_SSE ) )
+ {
+ return( 0 );
+ }
+
+ if( TestMethod( DOWNMIX_METHOD_VAR, "downmixsse" ) )
+ {
+ return( 999 );
+ }
+
+ /* This plugin always works */
+ return( 200 );
+}
+
* idctaltivec.c : Altivec IDCT module
*****************************************************************************
* Copyright (C) 1999, 2000 VideoLAN
- * $Id: idctaltivec.c,v 1.5 2001/05/06 04:32:02 sam Exp $
+ * $Id: idctaltivec.c,v 1.6 2001/05/15 16:19:42 sam Exp $
*
* Authors: Christophe Massiot <massiot@via.ecp.fr>
*
*****************************************************************************/
static int idct_Probe( probedata_t *p_data )
{
- if( TestCPU( CPU_CAPABILITY_ALTIVEC ) )
+ if( !TestCPU( CPU_CAPABILITY_ALTIVEC ) )
{
- if( TestMethod( IDCT_METHOD_VAR, "idctaltivec" ) )
- {
- return( 999 );
- }
- else
- {
- /* The Altivec iDCT is deactivated until it really works */
- return( 0 /* 200 */ );
- }
+ return( 0 );
}
- else
+
+ if( TestMethod( IDCT_METHOD_VAR, "idctaltivec" ) )
{
- return( 0 );
+ return( 999 );
}
+
+ /* The Altivec iDCT is deactivated until it really works */
+ return( 0 /* 200 */ );
}
/*****************************************************************************
* idctmmx.c : MMX IDCT module
*****************************************************************************
* Copyright (C) 1999, 2000 VideoLAN
- * $Id: idctmmx.c,v 1.10 2001/05/06 04:32:02 sam Exp $
+ * $Id: idctmmx.c,v 1.11 2001/05/15 16:19:42 sam Exp $
*
* Authors: Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
* Michel Lespinasse <walken@zoy.org>
*****************************************************************************/
static int idct_Probe( probedata_t *p_data )
{
- if( TestCPU( CPU_CAPABILITY_MMX ) )
+ if( !TestCPU( CPU_CAPABILITY_MMX ) )
{
- if( TestMethod( IDCT_METHOD_VAR, "idctmmx" ) )
- {
- return( 999 );
- }
- else
- {
- return( 150 );
- }
+ return( 0 );
}
- else
+
+ if( TestMethod( IDCT_METHOD_VAR, "idctmmx" ) )
{
- return( 0 );
+ return( 999 );
}
+
+ return( 150 );
}
/*****************************************************************************
* idctmmxext.c : MMX EXT IDCT module
*****************************************************************************
* Copyright (C) 1999, 2000 VideoLAN
- * $Id: idctmmxext.c,v 1.7 2001/05/06 04:32:02 sam Exp $
+ * $Id: idctmmxext.c,v 1.8 2001/05/15 16:19:42 sam Exp $
*
* Authors: Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
* Michel Lespinasse <walken@zoy.org>
*****************************************************************************/
static int idct_Probe( probedata_t *p_data )
{
- if( TestCPU( CPU_CAPABILITY_MMXEXT ) )
+ if( !TestCPU( CPU_CAPABILITY_MMXEXT ) )
{
- if( TestMethod( IDCT_METHOD_VAR, "idctmmxext" ) )
- {
- return( 999 );
- }
- else
- {
- return( 200 );
- }
+ return( 0 );
}
- else
+
+ if( TestMethod( IDCT_METHOD_VAR, "idctmmxext" ) )
{
- return( 0 );
+ return( 999 );
}
+
+ return( 200 );
+
}
/*****************************************************************************
--- /dev/null
+###############################################################################
+# vlc (VideoLAN Client) imdct module makefile
+# (c)2001 VideoLAN
+###############################################################################
+
+#
+# Objects
+#
+
+PLUGIN_IMDCT = imdct.o ac3_imdct_c.o ac3_srfft_c.o
+PLUGIN_IMDCTSSE = imdctsse.o ac3_imdct_sse.o ac3_srfft_sse.o
+PLUGIN_IMDCTCOMMON = ac3_imdct_common.o
+
+BUILTIN_IMDCT = $(PLUGIN_IMDCT:%.o=BUILTIN_IMDCT_%.o) \
+ $(PLUGIN_IMDCTCOMMON:%.o=BUILTIN_IMDCT_%.o)
+BUILTIN_IMDCTSSE = $(PLUGIN_IMDCTSSE:%.o=BUILTIN_IMDCTSSE_%.o) \
+ $(PLUGIN_IMDCTCOMMON:%.o=BUILTIN_IMDCTSSE_%.o)
+
+PLUGIN_C = $(PLUGIN_IMDCT) $(PLUGIN_IMDCTSSE) $(PLUGIN_IMDCTCOMMON)
+ALL_OBJ = $(PLUGIN_C) $(BUILTIN_IMDCT) $(BUILTIN_IMDCTSSE)
+
+#
+# Virtual targets
+#
+
+include ../../Makefile.modules
+
+$(BUILTIN_IMDCT): BUILTIN_IMDCT_%.o: .dep/%.d
+$(BUILTIN_IMDCT): BUILTIN_IMDCT_%.o: %.c
+ $(CC) $(CFLAGS) -DBUILTIN -DMODULE_NAME=imdct -c -o $@ $<
+
+$(BUILTIN_IMDCTSSE): BUILTIN_IMDCTSSE_%.o: .dep/%.d
+$(BUILTIN_IMDCTSSE): BUILTIN_IMDCTSSE_%.o: %.c
+ $(CC) $(CFLAGS) -DBUILTIN -DMODULE_NAME=imdctsse -c -o $@ $<
+
+#
+# Real targets
+#
+
+../../lib/imdct.so: $(PLUGIN_IMDCT) $(PLUGIN_IMDCTCOMMON)
+ $(CC) $(PCFLAGS) -o $@ $^ $(PLCFLAGS)
+
+../../lib/imdct.a: $(BUILTIN_IMDCT)
+ ar r $@ $^
+ $(RANLIB) $@
+
+../../lib/imdctsse.so: $(PLUGIN_IMDCTSSE) $(PLUGIN_IMDCTCOMMON)
+ $(CC) $(PCFLAGS) -o $@ $^ $(PLCFLAGS)
+
+../../lib/imdctsse.a: $(BUILTIN_IMDCTSSE)
+ ar r $@ $^
+ $(RANLIB) $@
+
--- /dev/null
+/*****************************************************************************
+ * ac3_imdct_c.c: ac3 DCT in C
+ *****************************************************************************
+ * Copyright (C) 1999, 2000 VideoLAN
+ * $Id: ac3_imdct_c.c,v 1.1 2001/05/15 16:19:42 sam Exp $
+ *
+ * Authors: Renaud Dartus <reno@videolan.org>
+ * Aaron Holtzman <aholtzma@engr.uvic.ca>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
+ *****************************************************************************/
+
+#define MODULE_NAME imdct
+#include "modules_inner.h"
+
+/*****************************************************************************
+ * Preamble
+ *****************************************************************************/
+#include "defs.h"
+
+#include <string.h> /* memcpy() */
+
+#include <math.h>
+#include <stdio.h>
+
+#include "config.h"
+#include "common.h"
+#include "threads.h"
+#include "mtime.h"
+
+#include "ac3_imdct.h"
+#include "ac3_imdct_common.h"
+
+#ifndef M_PI
+# define M_PI 3.14159265358979323846
+#endif
+
+void _M( fft_64p ) ( complex_t *x );
+void _M( fft_128p ) ( complex_t *x );
+
+static float window[] = {
+ 0.00014, 0.00024, 0.00037, 0.00051, 0.00067, 0.00086, 0.00107, 0.00130,
+ 0.00157, 0.00187, 0.00220, 0.00256, 0.00297, 0.00341, 0.00390, 0.00443,
+ 0.00501, 0.00564, 0.00632, 0.00706, 0.00785, 0.00871, 0.00962, 0.01061,
+ 0.01166, 0.01279, 0.01399, 0.01526, 0.01662, 0.01806, 0.01959, 0.02121,
+ 0.02292, 0.02472, 0.02662, 0.02863, 0.03073, 0.03294, 0.03527, 0.03770,
+ 0.04025, 0.04292, 0.04571, 0.04862, 0.05165, 0.05481, 0.05810, 0.06153,
+ 0.06508, 0.06878, 0.07261, 0.07658, 0.08069, 0.08495, 0.08935, 0.09389,
+ 0.09859, 0.10343, 0.10842, 0.11356, 0.11885, 0.12429, 0.12988, 0.13563,
+ 0.14152, 0.14757, 0.15376, 0.16011, 0.16661, 0.17325, 0.18005, 0.18699,
+ 0.19407, 0.20130, 0.20867, 0.21618, 0.22382, 0.23161, 0.23952, 0.24757,
+ 0.25574, 0.26404, 0.27246, 0.28100, 0.28965, 0.29841, 0.30729, 0.31626,
+ 0.32533, 0.33450, 0.34376, 0.35311, 0.36253, 0.37204, 0.38161, 0.39126,
+ 0.40096, 0.41072, 0.42054, 0.43040, 0.44030, 0.45023, 0.46020, 0.47019,
+ 0.48020, 0.49022, 0.50025, 0.51028, 0.52031, 0.53033, 0.54033, 0.55031,
+ 0.56026, 0.57019, 0.58007, 0.58991, 0.59970, 0.60944, 0.61912, 0.62873,
+ 0.63827, 0.64774, 0.65713, 0.66643, 0.67564, 0.68476, 0.69377, 0.70269,
+ 0.71150, 0.72019, 0.72877, 0.73723, 0.74557, 0.75378, 0.76186, 0.76981,
+ 0.77762, 0.78530, 0.79283, 0.80022, 0.80747, 0.81457, 0.82151, 0.82831,
+ 0.83496, 0.84145, 0.84779, 0.85398, 0.86001, 0.86588, 0.87160, 0.87716,
+ 0.88257, 0.88782, 0.89291, 0.89785, 0.90264, 0.90728, 0.91176, 0.91610,
+ 0.92028, 0.92432, 0.92822, 0.93197, 0.93558, 0.93906, 0.94240, 0.94560,
+ 0.94867, 0.95162, 0.95444, 0.95713, 0.95971, 0.96217, 0.96451, 0.96674,
+ 0.96887, 0.97089, 0.97281, 0.97463, 0.97635, 0.97799, 0.97953, 0.98099,
+ 0.98236, 0.98366, 0.98488, 0.98602, 0.98710, 0.98811, 0.98905, 0.98994,
+ 0.99076, 0.99153, 0.99225, 0.99291, 0.99353, 0.99411, 0.99464, 0.99513,
+ 0.99558, 0.99600, 0.99639, 0.99674, 0.99706, 0.99736, 0.99763, 0.99788,
+ 0.99811, 0.99831, 0.99850, 0.99867, 0.99882, 0.99895, 0.99908, 0.99919,
+ 0.99929, 0.99938, 0.99946, 0.99953, 0.99959, 0.99965, 0.99969, 0.99974,
+ 0.99978, 0.99981, 0.99984, 0.99986, 0.99988, 0.99990, 0.99992, 0.99993,
+ 0.99994, 0.99995, 0.99996, 0.99997, 0.99998, 0.99998, 0.99998, 0.99999,
+ 0.99999, 0.99999, 0.99999, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000,
+ 1.00000, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000
+};
+
+static const int pm128[128] =
+{
+ 0, 16, 32, 48, 64, 80, 96, 112, 8, 40, 72, 104, 24, 56, 88, 120,
+ 4, 20, 36, 52, 68, 84, 100, 116, 12, 28, 44, 60, 76, 92, 108, 124,
+ 2, 18, 34, 50, 66, 82, 98, 114, 10, 42, 74, 106, 26, 58, 90, 122,
+ 6, 22, 38, 54, 70, 86, 102, 118, 14, 46, 78, 110, 30, 62, 94, 126,
+ 1, 17, 33, 49, 65, 81, 97, 113, 9, 41, 73, 105, 25, 57, 89, 121,
+ 5, 21, 37, 53, 69, 85, 101, 117, 13, 29, 45, 61, 77, 93, 109, 125,
+ 3, 19, 35, 51, 67, 83, 99, 115, 11, 43, 75, 107, 27, 59, 91, 123,
+ 7, 23, 39, 55, 71, 87, 103, 119, 15, 31, 47, 63, 79, 95, 111, 127
+};
+
+static const int pm64[64] =
+{
+ 0, 8, 16, 24, 32, 40, 48, 56,
+ 4, 20, 36, 52, 12, 28, 44, 60,
+ 2, 10, 18, 26, 34, 42, 50, 58,
+ 6, 14, 22, 30, 38, 46, 54, 62,
+ 1, 9, 17, 25, 33, 41, 49, 57,
+ 5, 21, 37, 53, 13, 29, 45, 61,
+ 3, 11, 19, 27, 35, 43, 51, 59,
+ 7, 23, 39, 55, 15, 31, 47, 63
+};
+
+void _M( imdct_init ) (imdct_t * p_imdct)
+{
+ int i;
+ float scale = 181.019;
+
+ /* Twiddle factors to turn IFFT into IMDCT */
+ for (i=0; i < 128; i++) {
+ p_imdct->xcos1[i] = cos(2.0f * M_PI * (8*i+1)/(8*N)) * scale;
+ p_imdct->xsin1[i] = sin(2.0f * M_PI * (8*i+1)/(8*N)) * scale;
+ }
+}
+
+void _M( imdct_do_512 ) (imdct_t * p_imdct, float data[], float delay[])
+{
+ int i, j;
+ float tmp_a_r, tmp_a_i;
+ float *data_ptr;
+ float *delay_ptr;
+ float *window_ptr;
+
+ /* 512 IMDCT with source and dest data in 'data'
+ * Pre IFFT complex multiply plus IFFT complex conjugate */
+
+ for( i=0; i < 128; i++) {
+ j = pm128[i];
+ /* a = (data[256-2*j-1] - data[2*j]) * (xcos1[j] + xsin1[j]);
+ * c = data[2*j] * xcos1[j];
+ * b = data[256-2*j-1] * xsin1[j];
+ * buf1[i].real = a - b + c;
+ * buf1[i].imag = b + c; */
+ p_imdct->buf[i].real = (data[256-2*j-1] * p_imdct->xcos1[j]) - (data[2*j] * p_imdct->xsin1[j]);
+ p_imdct->buf[i].imag = -1.0 * (data[2*j] * p_imdct->xcos1[j] + data[256-2*j-1] * p_imdct->xsin1[j]);
+ }
+
+ _M( fft_128p ) ( &p_imdct->buf[0] );
+
+ /* Post IFFT complex multiply plus IFFT complex conjugate */
+ for (i=0; i < 128; i++) {
+ tmp_a_r = p_imdct->buf[i].real;
+ tmp_a_i = p_imdct->buf[i].imag;
+ /* a = (tmp_a_r - tmp_a_i) * (xcos1[j] + xsin1[j]);
+ * b = tmp_a_r * xsin1[j];
+ * c = tmp_a_i * xcos1[j];
+ * buf[j].real = a - b + c;
+ * buf[j].imag = b + c; */
+ p_imdct->buf[i].real =(tmp_a_r * p_imdct->xcos1[i]) + (tmp_a_i * p_imdct->xsin1[i]);
+ p_imdct->buf[i].imag =(tmp_a_r * p_imdct->xsin1[i]) - (tmp_a_i * p_imdct->xcos1[i]);
+ }
+
+ data_ptr = data;
+ delay_ptr = delay;
+ window_ptr = window;
+
+ /* Window and convert to real valued signal */
+ for (i=0; i< 64; i++) {
+ *data_ptr++ = -p_imdct->buf[64+i].imag * *window_ptr++ + *delay_ptr++;
+ *data_ptr++ = p_imdct->buf[64-i-1].real * *window_ptr++ + *delay_ptr++;
+ }
+
+ for(i=0; i< 64; i++) {
+ *data_ptr++ = -p_imdct->buf[i].real * *window_ptr++ + *delay_ptr++;
+ *data_ptr++ = p_imdct->buf[128-i-1].imag * *window_ptr++ + *delay_ptr++;
+ }
+
+ /* The trailing edge of the window goes into the delay line */
+ delay_ptr = delay;
+
+ for(i=0; i< 64; i++) {
+ *delay_ptr++ = -p_imdct->buf[64+i].real * *--window_ptr;
+ *delay_ptr++ = p_imdct->buf[64-i-1].imag * *--window_ptr;
+ }
+
+ for(i=0; i<64; i++) {
+ *delay_ptr++ = p_imdct->buf[i].imag * *--window_ptr;
+ *delay_ptr++ = -p_imdct->buf[128-i-1].real * *--window_ptr;
+ }
+}
+
+
+void _M( imdct_do_512_nol ) (imdct_t * p_imdct, float data[], float delay[])
+{
+ int i, j;
+
+ float tmp_a_i;
+ float tmp_a_r;
+
+ float *data_ptr;
+ float *delay_ptr;
+ float *window_ptr;
+
+ /* 512 IMDCT with source and dest data in 'data'
+ * Pre IFFT complex multiply plus IFFT cmplx conjugate */
+
+ for( i=0; i < 128; i++) {
+ /* z[i] = (X[256-2*i-1] + j * X[2*i]) * (xcos1[i] + j * xsin1[i]) */
+ j = pm128[i];
+ /* a = (data[256-2*j-1] - data[2*j]) * (xcos1[j] + xsin1[j]);
+ * c = data[2*j] * xcos1[j];
+ * b = data[256-2*j-1] * xsin1[j];
+ * buf1[i].real = a - b + c;
+ * buf1[i].imag = b + c; */
+ p_imdct->buf[i].real = (data[256-2*j-1] * p_imdct->xcos1[j]) - (data[2*j] * p_imdct->xsin1[j]);
+ p_imdct->buf[i].imag = -1.0 * (data[2*j] * p_imdct->xcos1[j] + data[256-2*j-1] * p_imdct->xsin1[j]);
+ }
+
+ _M( fft_128p ) ( &p_imdct->buf[0] );
+
+ /* Post IFFT complex multiply plus IFFT complex conjugate*/
+ for (i=0; i < 128; i++) {
+ /* y[n] = z[n] * (xcos1[n] + j * xsin1[n]) ;
+ * int j1 = i; */
+ tmp_a_r = p_imdct->buf[i].real;
+ tmp_a_i = p_imdct->buf[i].imag;
+ /* a = (tmp_a_r - tmp_a_i) * (xcos1[j] + xsin1[j]);
+ * b = tmp_a_r * xsin1[j];
+ * c = tmp_a_i * xcos1[j];
+ * buf[j].real = a - b + c;
+ * buf[j].imag = b + c; */
+ p_imdct->buf[i].real =(tmp_a_r * p_imdct->xcos1[i]) + (tmp_a_i * p_imdct->xsin1[i]);
+ p_imdct->buf[i].imag =(tmp_a_r * p_imdct->xsin1[i]) - (tmp_a_i * p_imdct->xcos1[i]);
+ }
+
+ data_ptr = data;
+ delay_ptr = delay;
+ window_ptr = window;
+
+ /* Window and convert to real valued signal, no overlap here*/
+ for (i=0; i< 64; i++) {
+ *data_ptr++ = -p_imdct->buf[64+i].imag * *window_ptr++;
+ *data_ptr++ = p_imdct->buf[64-i-1].real * *window_ptr++;
+ }
+
+ for(i=0; i< 64; i++) {
+ *data_ptr++ = -p_imdct->buf[i].real * *window_ptr++;
+ *data_ptr++ = p_imdct->buf[128-i-1].imag * *window_ptr++;
+ }
+
+ /* The trailing edge of the window goes into the delay line */
+ delay_ptr = delay;
+
+ for(i=0; i< 64; i++) {
+ *delay_ptr++ = -p_imdct->buf[64+i].real * *--window_ptr;
+ *delay_ptr++ = p_imdct->buf[64-i-1].imag * *--window_ptr;
+ }
+
+ for(i=0; i<64; i++) {
+ *delay_ptr++ = p_imdct->buf[i].imag * *--window_ptr;
+ *delay_ptr++ = -p_imdct->buf[128-i-1].real * *--window_ptr;
+ }
+}
+
--- /dev/null
+/*****************************************************************************
+ * ac3_imdct_common.c: common ac3 DCT functions
+ *****************************************************************************
+ * Copyright (C) 1999, 2000 VideoLAN
+ * $Id: ac3_imdct_common.c,v 1.1 2001/05/15 16:19:42 sam Exp $
+ *
+ * Authors: Renaud Dartus <reno@videolan.org>
+ * Aaron Holtzman <aholtzma@engr.uvic.ca>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
+ *****************************************************************************/
+
+/* MODULE_NAME defined in Makefile together with -DBUILTIN */
+#ifdef BUILTIN
+# include "modules_inner.h"
+#else
+# define _M( foo ) foo
+#endif
+
+/*****************************************************************************
+ * Preamble
+ *****************************************************************************/
+#include "defs.h"
+
+#include <string.h> /* memcpy() */
+
+#include <math.h>
+#include <stdio.h>
+
+#include "config.h"
+#include "common.h"
+#include "threads.h"
+#include "mtime.h"
+
+#include "ac3_imdct.h"
+
+#ifndef M_PI
+# define M_PI 3.14159265358979323846
+#endif
+
+static float window[] = {
+ 0.00014, 0.00024, 0.00037, 0.00051, 0.00067, 0.00086, 0.00107, 0.00130,
+ 0.00157, 0.00187, 0.00220, 0.00256, 0.00297, 0.00341, 0.00390, 0.00443,
+ 0.00501, 0.00564, 0.00632, 0.00706, 0.00785, 0.00871, 0.00962, 0.01061,
+ 0.01166, 0.01279, 0.01399, 0.01526, 0.01662, 0.01806, 0.01959, 0.02121,
+ 0.02292, 0.02472, 0.02662, 0.02863, 0.03073, 0.03294, 0.03527, 0.03770,
+ 0.04025, 0.04292, 0.04571, 0.04862, 0.05165, 0.05481, 0.05810, 0.06153,
+ 0.06508, 0.06878, 0.07261, 0.07658, 0.08069, 0.08495, 0.08935, 0.09389,
+ 0.09859, 0.10343, 0.10842, 0.11356, 0.11885, 0.12429, 0.12988, 0.13563,
+ 0.14152, 0.14757, 0.15376, 0.16011, 0.16661, 0.17325, 0.18005, 0.18699,
+ 0.19407, 0.20130, 0.20867, 0.21618, 0.22382, 0.23161, 0.23952, 0.24757,
+ 0.25574, 0.26404, 0.27246, 0.28100, 0.28965, 0.29841, 0.30729, 0.31626,
+ 0.32533, 0.33450, 0.34376, 0.35311, 0.36253, 0.37204, 0.38161, 0.39126,
+ 0.40096, 0.41072, 0.42054, 0.43040, 0.44030, 0.45023, 0.46020, 0.47019,
+ 0.48020, 0.49022, 0.50025, 0.51028, 0.52031, 0.53033, 0.54033, 0.55031,
+ 0.56026, 0.57019, 0.58007, 0.58991, 0.59970, 0.60944, 0.61912, 0.62873,
+ 0.63827, 0.64774, 0.65713, 0.66643, 0.67564, 0.68476, 0.69377, 0.70269,
+ 0.71150, 0.72019, 0.72877, 0.73723, 0.74557, 0.75378, 0.76186, 0.76981,
+ 0.77762, 0.78530, 0.79283, 0.80022, 0.80747, 0.81457, 0.82151, 0.82831,
+ 0.83496, 0.84145, 0.84779, 0.85398, 0.86001, 0.86588, 0.87160, 0.87716,
+ 0.88257, 0.88782, 0.89291, 0.89785, 0.90264, 0.90728, 0.91176, 0.91610,
+ 0.92028, 0.92432, 0.92822, 0.93197, 0.93558, 0.93906, 0.94240, 0.94560,
+ 0.94867, 0.95162, 0.95444, 0.95713, 0.95971, 0.96217, 0.96451, 0.96674,
+ 0.96887, 0.97089, 0.97281, 0.97463, 0.97635, 0.97799, 0.97953, 0.98099,
+ 0.98236, 0.98366, 0.98488, 0.98602, 0.98710, 0.98811, 0.98905, 0.98994,
+ 0.99076, 0.99153, 0.99225, 0.99291, 0.99353, 0.99411, 0.99464, 0.99513,
+ 0.99558, 0.99600, 0.99639, 0.99674, 0.99706, 0.99736, 0.99763, 0.99788,
+ 0.99811, 0.99831, 0.99850, 0.99867, 0.99882, 0.99895, 0.99908, 0.99919,
+ 0.99929, 0.99938, 0.99946, 0.99953, 0.99959, 0.99965, 0.99969, 0.99974,
+ 0.99978, 0.99981, 0.99984, 0.99986, 0.99988, 0.99990, 0.99992, 0.99993,
+ 0.99994, 0.99995, 0.99996, 0.99997, 0.99998, 0.99998, 0.99998, 0.99999,
+ 0.99999, 0.99999, 0.99999, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000,
+ 1.00000, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000
+};
+
+static const int pm128[128] =
+{
+ 0, 16, 32, 48, 64, 80, 96, 112, 8, 40, 72, 104, 24, 56, 88, 120,
+ 4, 20, 36, 52, 68, 84, 100, 116, 12, 28, 44, 60, 76, 92, 108, 124,
+ 2, 18, 34, 50, 66, 82, 98, 114, 10, 42, 74, 106, 26, 58, 90, 122,
+ 6, 22, 38, 54, 70, 86, 102, 118, 14, 46, 78, 110, 30, 62, 94, 126,
+ 1, 17, 33, 49, 65, 81, 97, 113, 9, 41, 73, 105, 25, 57, 89, 121,
+ 5, 21, 37, 53, 69, 85, 101, 117, 13, 29, 45, 61, 77, 93, 109, 125,
+ 3, 19, 35, 51, 67, 83, 99, 115, 11, 43, 75, 107, 27, 59, 91, 123,
+ 7, 23, 39, 55, 71, 87, 103, 119, 15, 31, 47, 63, 79, 95, 111, 127
+};
+
+static const int pm64[64] =
+{
+ 0, 8, 16, 24, 32, 40, 48, 56,
+ 4, 20, 36, 52, 12, 28, 44, 60,
+ 2, 10, 18, 26, 34, 42, 50, 58,
+ 6, 14, 22, 30, 38, 46, 54, 62,
+ 1, 9, 17, 25, 33, 41, 49, 57,
+ 5, 21, 37, 53, 13, 29, 45, 61,
+ 3, 11, 19, 27, 35, 43, 51, 59,
+ 7, 23, 39, 55, 15, 31, 47, 63
+};
+
+void _M( imdct_do_256 ) (imdct_t * p_imdct, float data[],float delay[])
+{
+ int i, j, k;
+ int p, q;
+
+ float tmp_a_i;
+ float tmp_a_r;
+
+ float *data_ptr;
+ float *delay_ptr;
+ float *window_ptr;
+
+ complex_t *buf1, *buf2;
+
+ buf1 = &p_imdct->buf[0];
+ buf2 = &p_imdct->buf[64];
+
+ /* Pre IFFT complex multiply plus IFFT complex conjugate */
+ for (k=0; k<64; k++) {
+ /* X1[k] = X[2*k]
+ * X2[k] = X[2*k+1] */
+
+ j = pm64[k];
+ p = 2 * (128-2*j-1);
+ q = 2 * (2 * j);
+
+ /* Z1[k] = (X1[128-2*k-1] + j * X1[2*k]) * (xcos2[k] + j * xsin2[k]); */
+ buf1[k].real = data[p] * p_imdct->xcos2[j] - data[q] * p_imdct->xsin2[j];
+ buf1[k].imag = -1.0f*(data[q] * p_imdct->xcos2[j] + data[p] * p_imdct->xsin2[j]);
+ /* Z2[k] = (X2[128-2*k-1] + j * X2[2*k]) * (xcos2[k] + j * xsin2[k]); */
+ buf2[k].real = data[p + 1] * p_imdct->xcos2[j] - data[q + 1] * p_imdct->xsin2[j];
+ buf2[k].imag = -1.0f*(data[q + 1] * p_imdct->xcos2[j] + data[p + 1] * p_imdct->xsin2[j]);
+ }
+
+ _M( fft_64p ) ( &buf1[0] );
+ _M( fft_64p ) ( &buf2[0] );
+
+ /* Post IFFT complex multiply */
+ for( i=0; i < 64; i++) {
+ tmp_a_r = buf1[i].real;
+ tmp_a_i = -buf1[i].imag;
+ buf1[i].real = (tmp_a_r * p_imdct->xcos2[i]) - (tmp_a_i * p_imdct->xsin2[i]);
+ buf1[i].imag = (tmp_a_r * p_imdct->xsin2[i]) + (tmp_a_i * p_imdct->xcos2[i]);
+ tmp_a_r = buf2[i].real;
+ tmp_a_i = -buf2[i].imag;
+ buf2[i].real = (tmp_a_r * p_imdct->xcos2[i]) - (tmp_a_i * p_imdct->xsin2[i]);
+ buf2[i].imag = (tmp_a_r * p_imdct->xsin2[i]) + (tmp_a_i * p_imdct->xcos2[i]);
+ }
+
+ data_ptr = data;
+ delay_ptr = delay;
+ window_ptr = window;
+
+ /* Window and convert to real valued signal */
+ for(i=0; i< 64; i++) {
+ *data_ptr++ = -buf1[i].imag * *window_ptr++ + *delay_ptr++;
+ *data_ptr++ = buf1[64-i-1].real * *window_ptr++ + *delay_ptr++;
+ }
+
+ for(i=0; i< 64; i++) {
+ *data_ptr++ = -buf1[i].real * *window_ptr++ + *delay_ptr++;
+ *data_ptr++ = buf1[64-i-1].imag * *window_ptr++ + *delay_ptr++;
+ }
+
+ delay_ptr = delay;
+
+ for(i=0; i< 64; i++) {
+ *delay_ptr++ = -buf2[i].real * *--window_ptr;
+ *delay_ptr++ = buf2[64-i-1].imag * *--window_ptr;
+ }
+
+ for(i=0; i< 64; i++) {
+ *delay_ptr++ = buf2[i].imag * *--window_ptr;
+ *delay_ptr++ = -buf2[64-i-1].real * *--window_ptr;
+ }
+}
+
+
+void _M( imdct_do_256_nol ) (imdct_t * p_imdct, float data[], float delay[])
+{
+ int i, j, k;
+ int p, q;
+
+ float tmp_a_i;
+ float tmp_a_r;
+
+ float *data_ptr;
+ float *delay_ptr;
+ float *window_ptr;
+
+ complex_t *buf1, *buf2;
+
+ buf1 = &p_imdct->buf[0];
+ buf2 = &p_imdct->buf[64];
+
+ /* Pre IFFT complex multiply plus IFFT cmplx conjugate */
+ for(k=0; k<64; k++) {
+ /* X1[k] = X[2*k]
+ * X2[k] = X[2*k+1] */
+ j = pm64[k];
+ p = 2 * (128-2*j-1);
+ q = 2 * (2 * j);
+
+ /* Z1[k] = (X1[128-2*k-1] + j * X1[2*k]) * (xcos2[k] + j * xsin2[k]); */
+ buf1[k].real = data[p] * p_imdct->xcos2[j] - data[q] * p_imdct->xsin2[j];
+ buf1[k].imag = -1.0f*(data[q] * p_imdct->xcos2[j] + data[p] * p_imdct->xsin2[j]);
+ /* Z2[k] = (X2[128-2*k-1] + j * X2[2*k]) * (xcos2[k] + j * xsin2[k]); */
+ buf2[k].real = data[p + 1] * p_imdct->xcos2[j] - data[q + 1] * p_imdct->xsin2[j];
+ buf2[k].imag = -1.0f*(data[q + 1] * p_imdct->xcos2[j] + data[p + 1] * p_imdct->xsin2[j]);
+ }
+
+ _M( fft_64p ) ( &buf1[0] );
+ _M( fft_64p ) ( &buf2[0] );
+
+ /* Post IFFT complex multiply */
+ for( i=0; i < 64; i++) {
+ /* y1[n] = z1[n] * (xcos2[n] + j * xs in2[n]) ; */
+ tmp_a_r = buf1[i].real;
+ tmp_a_i = -buf1[i].imag;
+ buf1[i].real =(tmp_a_r * p_imdct->xcos2[i]) - (tmp_a_i * p_imdct->xsin2[i]);
+ buf1[i].imag =(tmp_a_r * p_imdct->xsin2[i]) + (tmp_a_i * p_imdct->xcos2[i]);
+ /* y2[n] = z2[n] * (xcos2[n] + j * xsin2[n]) ; */
+ tmp_a_r = buf2[i].real;
+ tmp_a_i = -buf2[i].imag;
+ buf2[i].real =(tmp_a_r * p_imdct->xcos2[i]) - (tmp_a_i * p_imdct->xsin2[i]);
+ buf2[i].imag =(tmp_a_r * p_imdct->xsin2[i]) + (tmp_a_i * p_imdct->xcos2[i]);
+ }
+
+ data_ptr = data;
+ delay_ptr = delay;
+ window_ptr = window;
+
+ /* Window and convert to real valued signal, no overlap */
+ for(i=0; i< 64; i++) {
+ *data_ptr++ = -buf1[i].imag * *window_ptr++;
+ *data_ptr++ = buf1[64-i-1].real * *window_ptr++;
+ }
+
+ for(i=0; i< 64; i++) {
+ *data_ptr++ = -buf1[i].real * *window_ptr++ + *delay_ptr++;
+ *data_ptr++ = buf1[64-i-1].imag * *window_ptr++ + *delay_ptr++;
+ }
+
+ delay_ptr = delay;
+
+ for(i=0; i< 64; i++) {
+ *delay_ptr++ = -buf2[i].real * *--window_ptr;
+ *delay_ptr++ = buf2[64-i-1].imag * *--window_ptr;
+ }
+
+ for(i=0; i< 64; i++) {
+ *delay_ptr++ = buf2[i].imag * *--window_ptr;
+ *delay_ptr++ = -buf2[64-i-1].real * *--window_ptr;
+ }
+}
+
/*****************************************************************************
- * ac3_imdct_c.h: ac3 DCT
+ * ac3_imdct_common.h: common ac3 DCT headers
*****************************************************************************
* Copyright (C) 1999, 2000 VideoLAN
- * $Id: ac3_imdct_c.h,v 1.2 2001/04/30 21:10:25 reno Exp $
+ * $Id: ac3_imdct_common.h,v 1.1 2001/05/15 16:19:42 sam Exp $
*
* Authors: Renaud Dartus <reno@videolan.org>
* Aaron Holtzman <aholtzma@engr.uvic.ca>
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
*****************************************************************************/
-int imdct_init_c (imdct_t * p_imdct);
-void imdct_do_256(imdct_t * p_imdct, float data[], float delay[]);
-void imdct_do_256_nol(imdct_t * p_imdct, float data[], float delay[]);
-void imdct_do_512_c(imdct_t * p_imdct, float data[], float delay[]);
-void imdct_do_512_nol_c(imdct_t * p_imdct, float data[], float delay[]);
+void _M( imdct_init ) ( imdct_t * p_imdct );
+void _M( imdct_do_256 ) ( imdct_t * p_imdct, float data[], float delay[] );
+void _M( imdct_do_256_nol ) ( imdct_t * p_imdct, float data[], float delay[] );
+void _M( imdct_do_512 ) ( imdct_t * p_imdct, float data[], float delay[] );
+void _M( imdct_do_512_nol ) ( imdct_t * p_imdct, float data[], float delay[] );
--- /dev/null
+/*****************************************************************************
+ * ac3_imdct_sse.c: accelerated SSE ac3 DCT
+ *****************************************************************************
+ * Copyright (C) 1999, 2000 VideoLAN
+ * $Id: ac3_imdct_sse.c,v 1.1 2001/05/15 16:19:42 sam Exp $
+ *
+ * Authors: Renaud Dartus <reno@videolan.org>
+ * Aaron Holtzman <aholtzma@engr.uvic.ca>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
+ *****************************************************************************/
+
+#define MODULE_NAME imdctsse
+#include "modules_inner.h"
+
+/*****************************************************************************
+ * Preamble
+ *****************************************************************************/
+#include "defs.h"
+
+#include <math.h>
+#include <stdio.h>
+
+#include "config.h"
+#include "common.h"
+#include "threads.h"
+#include "mtime.h"
+
+#include "ac3_imdct.h"
+#include "ac3_imdct_common.h"
+
+static const float window[] = {
+ 0.00014, 0.00024, 0.00037, 0.00051, 0.00067, 0.00086, 0.00107, 0.00130,
+ 0.00157, 0.00187, 0.00220, 0.00256, 0.00297, 0.00341, 0.00390, 0.00443,
+ 0.00501, 0.00564, 0.00632, 0.00706, 0.00785, 0.00871, 0.00962, 0.01061,
+ 0.01166, 0.01279, 0.01399, 0.01526, 0.01662, 0.01806, 0.01959, 0.02121,
+ 0.02292, 0.02472, 0.02662, 0.02863, 0.03073, 0.03294, 0.03527, 0.03770,
+ 0.04025, 0.04292, 0.04571, 0.04862, 0.05165, 0.05481, 0.05810, 0.06153,
+ 0.06508, 0.06878, 0.07261, 0.07658, 0.08069, 0.08495, 0.08935, 0.09389,
+ 0.09859, 0.10343, 0.10842, 0.11356, 0.11885, 0.12429, 0.12988, 0.13563,
+ 0.14152, 0.14757, 0.15376, 0.16011, 0.16661, 0.17325, 0.18005, 0.18699,
+ 0.19407, 0.20130, 0.20867, 0.21618, 0.22382, 0.23161, 0.23952, 0.24757,
+ 0.25574, 0.26404, 0.27246, 0.28100, 0.28965, 0.29841, 0.30729, 0.31626,
+ 0.32533, 0.33450, 0.34376, 0.35311, 0.36253, 0.37204, 0.38161, 0.39126,
+ 0.40096, 0.41072, 0.42054, 0.43040, 0.44030, 0.45023, 0.46020, 0.47019,
+ 0.48020, 0.49022, 0.50025, 0.51028, 0.52031, 0.53033, 0.54033, 0.55031,
+ 0.56026, 0.57019, 0.58007, 0.58991, 0.59970, 0.60944, 0.61912, 0.62873,
+ 0.63827, 0.64774, 0.65713, 0.66643, 0.67564, 0.68476, 0.69377, 0.70269,
+ 0.71150, 0.72019, 0.72877, 0.73723, 0.74557, 0.75378, 0.76186, 0.76981,
+ 0.77762, 0.78530, 0.79283, 0.80022, 0.80747, 0.81457, 0.82151, 0.82831,
+ 0.83496, 0.84145, 0.84779, 0.85398, 0.86001, 0.86588, 0.87160, 0.87716,
+ 0.88257, 0.88782, 0.89291, 0.89785, 0.90264, 0.90728, 0.91176, 0.91610,
+ 0.92028, 0.92432, 0.92822, 0.93197, 0.93558, 0.93906, 0.94240, 0.94560,
+ 0.94867, 0.95162, 0.95444, 0.95713, 0.95971, 0.96217, 0.96451, 0.96674,
+ 0.96887, 0.97089, 0.97281, 0.97463, 0.97635, 0.97799, 0.97953, 0.98099,
+ 0.98236, 0.98366, 0.98488, 0.98602, 0.98710, 0.98811, 0.98905, 0.98994,
+ 0.99076, 0.99153, 0.99225, 0.99291, 0.99353, 0.99411, 0.99464, 0.99513,
+ 0.99558, 0.99600, 0.99639, 0.99674, 0.99706, 0.99736, 0.99763, 0.99788,
+ 0.99811, 0.99831, 0.99850, 0.99867, 0.99882, 0.99895, 0.99908, 0.99919,
+ 0.99929, 0.99938, 0.99946, 0.99953, 0.99959, 0.99965, 0.99969, 0.99974,
+ 0.99978, 0.99981, 0.99984, 0.99986, 0.99988, 0.99990, 0.99992, 0.99993,
+ 0.99994, 0.99995, 0.99996, 0.99997, 0.99998, 0.99998, 0.99998, 0.99999,
+ 0.99999, 0.99999, 0.99999, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000,
+ 1.00000, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000
+};
+
+static const int pm128[128] =
+{
+ 0, 16, 32, 48, 64, 80, 96, 112, 8, 40, 72, 104, 24, 56, 88, 120,
+ 4, 20, 36, 52, 68, 84, 100, 116, 12, 28, 44, 60, 76, 92, 108, 124,
+ 2, 18, 34, 50, 66, 82, 98, 114, 10, 42, 74, 106, 26, 58, 90, 122,
+ 6, 22, 38, 54, 70, 86, 102, 118, 14, 46, 78, 110, 30, 62, 94, 126,
+ 1, 17, 33, 49, 65, 81, 97, 113, 9, 41, 73, 105, 25, 57, 89, 121,
+ 5, 21, 37, 53, 69, 85, 101, 117, 13, 29, 45, 61, 77, 93, 109, 125,
+ 3, 19, 35, 51, 67, 83, 99, 115, 11, 43, 75, 107, 27, 59, 91, 123,
+ 7, 23, 39, 55, 71, 87, 103, 119, 15, 31, 47, 63, 79, 95, 111, 127
+};
+
+void _M( fft_64p ) ( complex_t *x );
+void _M( fft_128p ) ( complex_t *a );
+
+static void imdct512_pre_ifft_twiddle_sse (const int *pmt, complex_t *buf, float *data, float *xcos_sin_sse);
+static void imdct512_post_ifft_twiddle_sse (complex_t *buf, float *xcos_sin_sse);
+static void imdct512_window_delay_sse (complex_t *buf, float *data_ptr, const float *window_prt, float *delay_prt);
+static void imdct512_window_delay_nol_sse (complex_t *buf, float *data_ptr, const float *window_prt, float *delay_prt);
+
+
+void _M( imdct_init ) (imdct_t * p_imdct)
+{
+ int i;
+ float scale = 181.019;
+
+ for (i=0; i < 128; i++)
+ {
+ float xcos_i = cos(2.0f * M_PI * (8*i+1)/(8*N)) * scale;
+ float xsin_i = sin(2.0f * M_PI * (8*i+1)/(8*N)) * scale;
+ p_imdct->xcos_sin_sse[i * 4] = xcos_i;
+ p_imdct->xcos_sin_sse[i * 4 + 1] = -xsin_i;
+ p_imdct->xcos_sin_sse[i * 4 + 2] = -xsin_i;
+ p_imdct->xcos_sin_sse[i * 4 + 3] = -xcos_i;
+ }
+}
+
+void _M( imdct_do_512 ) (imdct_t * p_imdct, float data[], float delay[])
+{
+ imdct512_pre_ifft_twiddle_sse (pm128, p_imdct->buf, data, p_imdct->xcos_sin_sse);
+ _M( fft_128p ) ( p_imdct->buf );
+ imdct512_post_ifft_twiddle_sse (p_imdct->buf, p_imdct->xcos_sin_sse);
+ imdct512_window_delay_sse (p_imdct->buf, data, window, delay);
+}
+
+
+void _M( imdct_do_512_nol ) (imdct_t * p_imdct, float data[], float delay[])
+{
+ imdct512_pre_ifft_twiddle_sse (pm128, p_imdct->buf, data, p_imdct->xcos_sin_sse);
+ _M( fft_128p ) ( p_imdct->buf );
+ imdct512_post_ifft_twiddle_sse (p_imdct->buf, p_imdct->xcos_sin_sse);
+ imdct512_window_delay_nol_sse (p_imdct->buf, data, window, delay);
+}
+
+static void imdct512_pre_ifft_twiddle_sse (const int *pmt, complex_t *buf, float *data, float *xcos_sin_sse)
+{
+ __asm__ __volatile__ (
+ "pushl %%ebp\n"
+ "movl %%esp, %%ebp\n"
+ "addl $-4, %%esp\n" /* local variable, loop counter */
+
+ "pushl %%eax\n"
+ "pushl %%ebx\n"
+ "pushl %%ecx\n"
+ "pushl %%edx\n"
+ "pushl %%edi\n"
+ "pushl %%esi\n"
+
+ "movl 8(%%ebp), %%eax\n" /* pmt */
+ "movl 12(%%ebp), %%ebx\n" /* buf */
+ "movl 16(%%ebp), %%ecx\n" /* data */
+ "movl 20(%%ebp), %%edx\n" /* xcos_sin_sse */
+ "movl $64, -4(%%ebp)\n"
+
+".loop:\n"
+ "movl (%%eax), %%esi\n"
+ "movl 4(%%eax), %%edi\n"
+ "movss (%%ecx, %%esi, 8), %%xmm1\n" /* 2j */
+ "movss (%%ecx, %%edi, 8), %%xmm3\n" /* 2(j+1) */
+
+ "shll $1, %%esi\n"
+ "shll $1, %%edi\n"
+
+ "movups (%%edx, %%esi, 8), %%xmm0\n" /* -c_j | -s_j | -s_j | c_j */
+ "movups (%%edx, %%edi, 8), %%xmm2\n" /* -c_j+1 | -s_j+1 | -s_j+1 | c_j+1 */
+
+ "negl %%esi\n"
+ "negl %%edi\n"
+
+ "movss 1020(%%ecx, %%esi, 4), %%xmm4\n" /* 255-2j */
+ "addl $8, %%eax\n"
+ "movss 1020(%%ecx, %%edi, 4), %%xmm5\n" /* 255-2(j+1) */
+
+ "shufps $0, %%xmm1, %%xmm4\n" /* 2j | 2j | 255-2j | 255-2j */
+ "shufps $0, %%xmm3, %%xmm5\n" /* 2(j+1) | 2(j+1) | 255-2(j+1) | 255-2(j+1) */
+ "mulps %%xmm4, %%xmm0\n"
+ "mulps %%xmm5, %%xmm2\n"
+ "movhlps %%xmm0, %%xmm1\n"
+ "movhlps %%xmm2, %%xmm3\n"
+ "addl $16, %%ebx\n"
+ "addps %%xmm1, %%xmm0\n"
+ "addps %%xmm3, %%xmm2\n"
+ "movlhps %%xmm2, %%xmm0\n"
+
+ "movups %%xmm0, -16(%%ebx)\n"
+ "decl -4(%%ebp)\n"
+ "jnz .loop\n"
+
+ "popl %%esi\n"
+ "popl %%edi\n"
+ "popl %%edx\n"
+ "popl %%ecx\n"
+ "popl %%ebx\n"
+ "popl %%eax\n"
+
+ "addl $4, %%esp\n"
+ "popl %%ebp\n"
+ ::);
+}
+
+static void imdct512_post_ifft_twiddle_sse (complex_t *buf, float *xcos_sin_sse)
+{
+ __asm__ __volatile__ (
+ "pushl %%ebx\n"
+ "movl $32, %%ebx\n" /* loop counter */
+
+".loop1:\n"
+ "movups (%%eax), %%xmm0\n" /* im1 | re1 | im0 | re0 */
+
+ "movups (%%ecx), %%xmm2\n" /* -c | -s | -s | c */
+ "movhlps %%xmm0, %%xmm1\n" /* im1 | re1 */
+ "movups 16(%%ecx), %%xmm3\n" /* -c1 | -s1 | -s1 | c1 */
+
+ "shufps $0x50, %%xmm0, %%xmm0\n" /* im0 | im0 | re0 | re0 */
+ "shufps $0x50, %%xmm1, %%xmm1\n" /* im1 | im1 | re1 | re1 */
+
+ "movups 16(%%eax), %%xmm4\n" /* im3 | re3 | im2 | re2 */
+
+ "shufps $0x27, %%xmm2, %%xmm2\n" /* c | -s | -s | -c */
+ "movhlps %%xmm4, %%xmm5\n" /* im3 | re3 */
+ "shufps $0x27, %%xmm3, %%xmm3\n" /* c1 | -s1 | -s1 | -c1 */
+
+ "movups 32(%%ecx), %%xmm6\n" /* -c2 | -s2 | -s2 | c2 */
+ "movups 48(%%ecx), %%xmm7\n" /* -c3 | -s3 | -s3 | c3 */
+
+ "shufps $0x50, %%xmm4, %%xmm4\n" /* im2 | im2 | re2 | re2 */
+ "shufps $0x50, %%xmm5, %%xmm5\n" /* im3 | im3 | re3 | re3 */
+
+ "mulps %%xmm2, %%xmm0\n"
+ "mulps %%xmm3, %%xmm1\n"
+
+ "shufps $0x27, %%xmm6, %%xmm6\n" /* c2 | -s2 | -s2 | -c2 */
+ "shufps $0x27, %%xmm7, %%xmm7\n" /* c3 | -s3 | -s3 | -c3 */
+
+ "movhlps %%xmm0, %%xmm2\n"
+ "movhlps %%xmm1, %%xmm3\n"
+
+ "mulps %%xmm6, %%xmm4\n"
+ "mulps %%xmm7, %%xmm5\n"
+
+ "addps %%xmm2, %%xmm0\n"
+ "addps %%xmm3, %%xmm1\n"
+
+ "movhlps %%xmm4, %%xmm6\n"
+ "movhlps %%xmm5, %%xmm7\n"
+
+ "addps %%xmm6, %%xmm4\n"
+ "addps %%xmm7, %%xmm5\n"
+
+ "movlhps %%xmm1, %%xmm0\n"
+ "movlhps %%xmm5, %%xmm4\n"
+
+ "movups %%xmm0, (%%eax)\n"
+ "movups %%xmm4, 16(%%eax)\n"
+ "addl $64, %%ecx\n"
+ "addl $32, %%eax\n"
+ "decl %%ebx\n"
+ "jnz .loop1\n"
+
+ "popl %%ebx\n"
+ : "=a" (buf)
+ : "a" (buf), "c" (xcos_sin_sse) );
+}
+
+static void imdct512_window_delay_sse (complex_t *buf, float *data_ptr, const float *window_prt, float *delay_prt)
+{
+ __asm__ __volatile__ (
+ "pushl %%ebp\n"
+ "movl %%esp, %%ebp\n"
+
+ "pushl %%eax\n"
+ "pushl %%ebx\n"
+ "pushl %%ecx\n"
+ "pushl %%edx\n"
+ "pushl %%esi\n"
+ "pushl %%edi\n"
+
+ "movl 20(%%ebp), %%ebx\n" /* delay */
+ "movl 16(%%ebp), %%edx\n" /* window */
+
+ "movl 8(%%ebp), %%eax\n" /* buf */
+ "movl $16, %%ecx\n" /* loop count */
+ "leal 516(%%eax), %%esi\n" /* buf[64].im */
+ "leal 504(%%eax), %%edi\n" /* buf[63].re */
+ "movl 12(%%ebp), %%eax\n" /* data */
+
+".first_128_samples:\n"
+ "movss (%%esi), %%xmm0\n"
+ "movss 8(%%esi), %%xmm2\n"
+ "movss (%%edi), %%xmm1\n"
+ "movss -8(%%edi), %%xmm3\n"
+
+ "movlhps %%xmm2, %%xmm0\n" /* 0.0 | im1 | 0.0 | im0 */
+ "movlhps %%xmm3, %%xmm1\n" /* 0.0 | re1 | 0.0 | re0 */
+
+ "movups (%%edx), %%xmm4\n" /* w3 | w2 | w1 | w0 */
+ "movups (%%ebx), %%xmm5\n" /* d3 | d2 | d1 | d0 */
+ "shufps $0xb1, %%xmm1, %%xmm1\n"/* re1 | 0.0 | re0 | 0.0 */
+
+ "movss 16(%%esi), %%xmm6\n" /* im2 */
+ "movss 24(%%esi), %%xmm7\n" /* im3 */
+ "subps %%xmm1, %%xmm0\n" /* -re1 | im1 | -re0 | im0 */
+ "movss -16(%%edi), %%xmm2\n" /* re2 */
+ "movss -24(%%edi), %%xmm3\n" /* re3 */
+ "mulps %%xmm4, %%xmm0\n"
+ "movlhps %%xmm7, %%xmm6\n" /* 0.0 | im3 | 0.0 | im2 */
+ "movlhps %%xmm3, %%xmm2\n" /* 0.0 | re3 | 0.0 | re2 */
+ "addps %%xmm5, %%xmm0\n"
+ "shufps $0xb1, %%xmm2, %%xmm2\n"/* re3 | 0.0 | re2 | 0.0 */
+ "movups 16(%%edx), %%xmm4\n" /* w7 | w6 | w5 | w4 */
+ "movups 16(%%ebx), %%xmm5\n" /* d7 | d6 | d5 | d4 */
+ "subps %%xmm2, %%xmm6\n" /* -re3 | im3 | -re2 | im2 */
+ "addl $32, %%edx\n"
+ "movups %%xmm0, (%%eax)\n"
+ "addl $32, %%ebx\n"
+ "mulps %%xmm4, %%xmm6\n"
+ "addl $32, %%esi\n"
+ "addl $32, %%eax\n"
+ "addps %%xmm5, %%xmm6\n"
+ "addl $-32, %%edi\n"
+ "movups %%xmm6, -16(%%eax)\n"
+ "decl %%ecx\n"
+ "jnz .first_128_samples\n"
+
+ "movl 8(%%ebp), %%esi\n" /* buf[0].re */
+ "leal 1020(%%esi), %%edi\n" /* buf[127].im */
+ "movl $16, %%ecx\n" /* loop count */
+
+".second_128_samples:\n"
+ "movss (%%esi), %%xmm0\n" /* buf[i].re */
+ "movss 8(%%esi), %%xmm2\n" /* re1 */
+ "movss (%%edi), %%xmm1\n" /* buf[127-i].im */
+ "movss -8(%%edi), %%xmm3\n" /* im1 */
+
+ "movlhps %%xmm2, %%xmm0\n" /* 0.0 | re1 | 0.0 | re0 */
+ "movlhps %%xmm3, %%xmm1\n" /* 0.0 | im1 | 0.0 | im1 */
+
+ "movups (%%edx), %%xmm4\n" /* w3 | w2 | w1 | w0 */
+ "movups (%%ebx), %%xmm5\n" /* d3 | d2 | d1 | d0 */
+
+ "shufps $0xb1, %%xmm1, %%xmm1\n"/* im1 | 0.0 | im0 | 0.0 */
+ "movss 16(%%esi), %%xmm6\n" /* re2 */
+ "movss 24(%%esi), %%xmm7\n" /* re3 */
+ "movss -16(%%edi), %%xmm2\n" /* im2 */
+ "movss -24(%%edi), %%xmm3\n" /* im3 */
+ "subps %%xmm1, %%xmm0\n" /* -im1 | re1 | -im0 | re0 */
+ "movlhps %%xmm7, %%xmm6\n" /* 0.0 | re3 | 0.0 | re2 */
+ "movlhps %%xmm3, %%xmm2\n" /* 0.0 | im3 | 0.0 | im2 */
+ "mulps %%xmm4, %%xmm0\n"
+ "shufps $0xb1, %%xmm2, %%xmm2\n"/* im3 | 0.0 | im2 | 0.0 */
+ "movups 16(%%edx), %%xmm4\n" /* w7 | w6 | w5 | w4 */
+ "addl $32, %%esi\n"
+ "subps %%xmm2, %%xmm6\n" /* -im3 | re3 | -im2 | re2 */
+ "addps %%xmm5, %%xmm0\n"
+ "mulps %%xmm4, %%xmm6\n"
+ "addl $-32, %%edi\n"
+ "movups 16(%%ebx), %%xmm5\n" /* d7 | d6 | d5 | d4 */
+ "movups %%xmm0, (%%eax)\n"
+ "addps %%xmm5, %%xmm6\n"
+ "addl $32, %%edx\n"
+ "addl $32, %%eax\n"
+ "addl $32, %%ebx\n"
+ "movups %%xmm6, -16(%%eax)\n"
+ "decl %%ecx\n"
+ "jnz .second_128_samples\n"
+
+ "movl 8(%%ebp), %%eax\n"
+ "leal 512(%%eax), %%esi\n" /* buf[64].re */
+ "leal 508(%%eax), %%edi\n" /* buf[63].im */
+ "movl $16, %%ecx\n" /* loop count */
+ "movl 20(%%ebp), %%eax\n" /* delay */
+
+".first_128_delay:\n"
+ "movss (%%esi), %%xmm0\n"
+ "movss 8(%%esi), %%xmm2\n"
+ "movss (%%edi), %%xmm1\n"
+ "movss -8(%%edi), %%xmm3\n"
+
+ "movlhps %%xmm2, %%xmm0\n" /* 0.0 | re1 | 0.0 | re0 */
+ "movlhps %%xmm3, %%xmm1\n" /* 0.0 | im1 | 0.0 | im0 */
+
+ "movups -16(%%edx), %%xmm4\n" /* w3 | w2 | w1 | w0 */
+ "shufps $0xb1, %%xmm1, %%xmm1\n"/* im1 | 0.0 | im0 | 0.0 */
+ "movss 16(%%esi), %%xmm6\n" /* re2 */
+ "movss 24(%%esi), %%xmm7\n" /* re3 */
+ "movss -16(%%edi), %%xmm2\n" /* im2 */
+ "movss -24(%%edi), %%xmm3\n" /* im3 */
+ "subps %%xmm1, %%xmm0\n" /* -im1 | re1 | -im0 | re0 */
+ "addl $-32, %%edx\n"
+ "movlhps %%xmm7, %%xmm6\n" /* 0.0 | re3 | 0.0 | re2 */
+ "movlhps %%xmm3, %%xmm2\n" /* 0.0 | im3 | 0.0 | im2 */
+ "mulps %%xmm4, %%xmm0\n"
+ "movups (%%edx), %%xmm5\n" /* w7 | w6 | w5 | w4 */
+ "shufps $0xb1, %%xmm2, %%xmm2\n"/* im3 | 0.0 | im2 | 0.0 */
+ "movups %%xmm0, (%%eax)\n"
+ "addl $32, %%esi\n"
+ "subps %%xmm2, %%xmm6\n" /* -im3 | re3 | -im2 | re2 */
+ "addl $-32, %%edi\n"
+ "mulps %%xmm5, %%xmm6\n"
+ "addl $32, %%eax\n"
+ "movups %%xmm6, -16(%%eax)\n"
+ "decl %%ecx\n"
+ "jnz .first_128_delay\n"
+
+ "movl 8(%%ebp), %%ebx\n"
+ "leal 4(%%ebx), %%esi\n" /* buf[0].im */
+ "leal 1016(%%ebx), %%edi\n" /* buf[127].re */
+ "movl $16, %%ecx\n" /* loop count */
+
+".second_128_delay:\n"
+ "movss (%%esi), %%xmm0\n"
+ "movss 8(%%esi), %%xmm2\n"
+ "movss (%%edi), %%xmm1\n"
+ "movss -8(%%edi), %%xmm3\n"
+
+ "movlhps %%xmm2, %%xmm0\n" /* 0.0 | im1 | 0.0 | im0 */
+ "movlhps %%xmm3, %%xmm1\n" /* 0.0 | re1 | 0.0 | re0 */
+
+ "movups -16(%%edx), %%xmm4\n" /* w3 | w2 | w1 | w0 */
+ "shufps $0xb1, %%xmm1, %%xmm1\n"/* re1 | 0.0 | re0 | 0.0 */
+ "movss 16(%%esi), %%xmm6\n" /* im2 */
+ "movss 24(%%esi), %%xmm7\n" /* im3 */
+ "movss -16(%%edi), %%xmm2\n" /* re2 */
+ "movss -24(%%edi), %%xmm3\n" /* re3 */
+ "subps %%xmm0, %%xmm1\n" /* re1 | -im1 | re0 | -im0 */
+ "addl $-32, %%edx\n"
+ "movlhps %%xmm7, %%xmm6\n" /* 0.0 | im3 | 0.0 | im2 */
+ "movlhps %%xmm3, %%xmm2\n" /* 0.0 | re3 | 0.0 | re2 */
+ "mulps %%xmm4, %%xmm1\n"
+ "movups (%%edx), %%xmm5\n" /* w7 | w6 | w5 | w4 */
+ "shufps $0xb1, %%xmm2, %%xmm2\n"/* re3 | 0.0 | re2 | 0.0 */
+ "movups %%xmm1, (%%eax)\n"
+ "addl $32, %%esi\n"
+ "subps %%xmm6, %%xmm2\n" /* re | -im3 | re | -im2 */
+ "addl $-32, %%edi\n"
+ "mulps %%xmm5, %%xmm2\n"
+ "addl $32, %%eax\n"
+ "movups %%xmm2, -16(%%eax)\n"
+ "decl %%ecx\n"
+ "jnz .second_128_delay\n"
+
+ "popl %%edi\n"
+ "popl %%esi\n"
+ "popl %%edx\n"
+ "popl %%ecx\n"
+ "popl %%ebx\n"
+ "popl %%eax\n"
+
+ "leave\n"
+ ::);
+}
+
+static void imdct512_window_delay_nol_sse (complex_t *buf, float *data_ptr, const float *window_prt, float *delay_prt)
+{
+ __asm__ __volatile__ (
+ "pushl %%ebp\n"
+ "movl %%esp, %%ebp\n"
+
+ "pushl %%eax\n"
+ "pushl %%ebx\n"
+ "pushl %%ecx\n"
+ "pushl %%edx\n"
+ "pushl %%esi\n"
+ "pushl %%edi\n"
+
+ /* movl 20(%%ebp), %%ebx delay */
+ "movl 16(%%ebp), %%edx\n" /* window */
+
+ "movl 8(%%ebp), %%eax\n" /* buf */
+ "movl $16, %%ecx\n" /* loop count */
+ "leal 516(%%eax), %%esi\n" /* buf[64].im */
+ "leal 504(%%eax), %%edi\n" /* buf[63].re */
+ "movl 12(%%ebp), %%eax\n" /* data */
+
+".first_128_sample:\n"
+ "movss (%%esi), %%xmm0\n"
+ "movss 8(%%esi), %%xmm2\n"
+ "movss (%%edi), %%xmm1\n"
+ "movss -8(%%edi), %%xmm3\n"
+
+ "movlhps %%xmm2, %%xmm0\n" /* 0.0 | im1 | 0.0 | im0 */
+ "movlhps %%xmm3, %%xmm1\n" /* 0.0 | re1 | 0.0 | re0 */
+
+ "movups (%%edx), %%xmm4\n" /* w3 | w2 | w1 | w0 */
+ /* movups (%%ebx), %%xmm5 d3 | d2 | d1 | d0 */
+ "shufps $0xb1, %%xmm1, %%xmm1\n"/* re1 | 0.0 | re0 | 0.0 */
+
+ "movss 16(%%esi), %%xmm6\n" /* im2 */
+ "movss 24(%%esi), %%xmm7\n" /* im3 */
+ "subps %%xmm1, %%xmm0\n" /* -re1 | im1 | -re0 | im0 */
+ "movss -16(%%edi), %%xmm2\n" /* re2 */
+ "movss -24(%%edi), %%xmm3\n" /* re3 */
+ "mulps %%xmm4, %%xmm0\n"
+ "movlhps %%xmm7, %%xmm6\n" /* 0.0 | im3 | 0.0 | im2 */
+ "movlhps %%xmm3, %%xmm2\n" /* 0.0 | re3 | 0.0 | re2 */
+ /* addps %%xmm5, %%xmm0 */
+ "shufps $0xb1, %%xmm2, %%xmm2\n"/* re3 | 0.0 | re2 | 0.0 */
+ "movups 16(%%edx), %%xmm4\n" /* w7 | w6 | w5 | w4 */
+ /* movups 16(%%ebx), %%xmm5 d7 | d6 | d5 | d4 */
+ "subps %%xmm2, %%xmm6\n" /* -re3 | im3 | -re2 | im2 */
+ "addl $32, %%edx\n"
+ "movups %%xmm0, (%%eax)\n"
+ /* addl $32, %%ebx */
+ "mulps %%xmm4, %%xmm6\n"
+ "addl $32, %%esi\n"
+ "addl $32, %%eax\n"
+ /* addps %%xmm5, %%xmm6 */
+ "addl $-32, %%edi\n"
+ "movups %%xmm6, -16(%%eax)\n"
+ "decl %%ecx\n"
+ "jnz .first_128_sample\n"
+
+ "movl 8(%%ebp), %%esi\n" /* buf[0].re */
+ "leal 1020(%%esi), %%edi\n" /* buf[127].im */
+ "movl $16, %%ecx\n" /* loop count */
+
+".second_128_sample:\n"
+ "movss (%%esi), %%xmm0\n" /* buf[i].re */
+ "movss 8(%%esi), %%xmm2\n" /* re1 */
+ "movss (%%edi), %%xmm1\n" /* buf[127-i].im */
+ "movss -8(%%edi), %%xmm3\n" /* im1 */
+
+ "movlhps %%xmm2, %%xmm0\n" /* 0.0 | re1 | 0.0 | re0 */
+ "movlhps %%xmm3, %%xmm1\n" /* 0.0 | im1 | 0.0 | im1 */
+
+ "movups (%%edx), %%xmm4\n" /* w3 | w2 | w1 | w0 */
+ /* movups (%%ebx), %%xmm5 d3 | d2 | d1 | d0 */
+
+ "shufps $0xb1, %%xmm1, %%xmm1\n"/* im1 | 0.0 | im0 | 0.0 */
+ "movss 16(%%esi), %%xmm6\n" /* re2 */
+ "movss 24(%%esi), %%xmm7\n" /* re3 */
+ "movss -16(%%edi), %%xmm2\n" /* im2 */
+ "movss -24(%%edi), %%xmm3\n" /* im3 */
+ "subps %%xmm1, %%xmm0\n" /* -im1 | re1 | -im0 | re0 */
+ "movlhps %%xmm7, %%xmm6\n" /* 0.0 | re3 | 0.0 | re2 */
+ "movlhps %%xmm3, %%xmm2\n" /* 0.0 | im3 | 0.0 | im2 */
+ "mulps %%xmm4, %%xmm0\n"
+ "shufps $0xb1, %%xmm2, %%xmm2\n"/* im3 | 0.0 | im2 | 0.0 */
+ "movups 16(%%edx), %%xmm4\n" /* w7 | w6 | w5 | w4 */
+ "addl $32, %%esi\n"
+ "subps %%xmm2, %%xmm6\n" /* -im3 | re3 | -im2 | re2 */
+ /* addps %%xmm5, %%xmm0 */
+ "mulps %%xmm4, %%xmm6\n"
+ "addl $-32, %%edi\n"
+ /* movups 16(%%ebx), %%xmm5 d7 | d6 | d5 | d4 */
+ "movups %%xmm0, (%%eax)\n"
+ /* addps %%xmm5, %%xmm6 */
+ "addl $32, %%edx\n"
+ "addl $32, %%eax\n"
+ /* addl $32, %%ebx */
+ "movups %%xmm6, -16(%%eax)\n"
+ "decl %%ecx\n"
+ "jnz .second_128_sample\n"
+
+ "movl 8(%%ebp), %%eax\n"
+ "leal 512(%%eax), %%esi\n" /* buf[64].re */
+ "leal 508(%%eax), %%edi\n" /* buf[63].im */
+ "movl $16, %%ecx\n" /* loop count */
+ "movl 20(%%ebp), %%eax\n" /* delay */
+
+".first_128_delays:\n"
+ "movss (%%esi), %%xmm0\n"
+ "movss 8(%%esi), %%xmm2\n"
+ "movss (%%edi), %%xmm1\n"
+ "movss -8(%%edi), %%xmm3\n"
+
+ "movlhps %%xmm2, %%xmm0\n" /* 0.0 | re1 | 0.0 | re0 */
+ "movlhps %%xmm3, %%xmm1\n" /* 0.0 | im1 | 0.0 | im0 */
+
+ "movups -16(%%edx), %%xmm4\n" /* w3 | w2 | w1 | w0 */
+ "shufps $0xb1, %%xmm1, %%xmm1\n"/* im1 | 0.0 | im0 | 0.0 */
+ "movss 16(%%esi), %%xmm6\n" /* re2 */
+ "movss 24(%%esi), %%xmm7\n" /* re3 */
+ "movss -16(%%edi), %%xmm2\n" /* im2 */
+ "movss -24(%%edi), %%xmm3\n" /* im3 */
+ "subps %%xmm1, %%xmm0\n" /* -im1 | re1 | -im0 | re0 */
+ "addl $-32, %%edx\n"
+ "movlhps %%xmm7, %%xmm6\n" /* 0.0 | re3 | 0.0 | re2 */
+ "movlhps %%xmm3, %%xmm2\n" /* 0.0 | im3 | 0.0 | im2 */
+ "mulps %%xmm4, %%xmm0\n"
+ "movups (%%edx), %%xmm5\n" /* w7 | w6 | w5 | w4 */
+ "shufps $0xb1, %%xmm2, %%xmm2\n"/* im3 | 0.0 | im2 | 0.0 */
+ "movups %%xmm0, (%%eax)\n"
+ "addl $32, %%esi\n"
+ "subps %%xmm2, %%xmm6\n" /* -im3 | re3 | -im2 | re2 */
+ "addl $-32, %%edi\n"
+ "mulps %%xmm5, %%xmm6\n"
+ "addl $32, %%eax\n"
+ "movups %%xmm6, -16(%%eax)\n"
+ "decl %%ecx\n"
+ "jnz .first_128_delays\n"
+
+ "movl 8(%%ebp), %%ebx\n"
+ "leal 4(%%ebx), %%esi\n" /* buf[0].im */
+ "leal 1016(%%ebx), %%edi\n" /* buf[127].re */
+ "movl $16, %%ecx\n" /* loop count */
+
+".second_128_delays:\n"
+ "movss (%%esi), %%xmm0\n"
+ "movss 8(%%esi), %%xmm2\n"
+ "movss (%%edi), %%xmm1\n"
+ "movss -8(%%edi), %%xmm3\n"
+
+ "movlhps %%xmm2, %%xmm0\n" /* 0.0 | im1 | 0.0 | im0 */
+ "movlhps %%xmm3, %%xmm1\n" /* 0.0 | re1 | 0.0 | re0 */
+
+ "movups -16(%%edx), %%xmm4\n" /* w3 | w2 | w1 | w0 */
+ "shufps $0xb1, %%xmm1, %%xmm1\n"/* re1 | 0.0 | re0 | 0.0 */
+ "movss 16(%%esi), %%xmm6\n" /* im2 */
+ "movss 24(%%esi), %%xmm7\n" /* im3 */
+ "movss -16(%%edi), %%xmm2\n" /* re2 */
+ "movss -24(%%edi), %%xmm3\n" /* re3 */
+ "subps %%xmm0, %%xmm1\n" /* re1 | -im1 | re0 | -im0 */
+ "addl $-32, %%edx\n"
+ "movlhps %%xmm7, %%xmm6\n" /* 0.0 | im3 | 0.0 | im2 */
+ "movlhps %%xmm3, %%xmm2\n" /* 0.0 | re3 | 0.0 | re2 */
+ "mulps %%xmm4, %%xmm1\n"
+ "movups (%%edx), %%xmm5\n" /* w7 | w6 | w5 | w4 */
+ "shufps $0xb1, %%xmm2, %%xmm2\n"/* re3 | 0.0 | re2 | 0.0 */
+ "movups %%xmm1, (%%eax)\n"
+ "addl $32, %%esi\n"
+ "subps %%xmm6, %%xmm2\n" /* re | -im3 | re | -im2 */
+ "addl $-32, %%edi\n"
+ "mulps %%xmm5, %%xmm2\n"
+ "addl $32, %%eax\n"
+ "movups %%xmm2, -16(%%eax)\n"
+ "decl %%ecx\n"
+ "jnz .second_128_delays\n"
+
+ "popl %%edi\n"
+ "popl %%esi\n"
+ "popl %%edx\n"
+ "popl %%ecx\n"
+ "popl %%ebx\n"
+ "popl %%eax\n"
+
+ "leave\n"
+ ::);
+}
/*****************************************************************************
- * ac3_srfft.h: ac3 FFT
+ * ac3_srfft.h: ac3 FFT tables
*****************************************************************************
* Copyright (C) 1999, 2000 VideoLAN
- * $Id: ac3_srfft.h,v 1.3 2001/05/14 15:58:04 reno Exp $
+ * $Id: ac3_srfft.h,v 1.1 2001/05/15 16:19:42 sam Exp $
*
* Authors: Renaud Dartus <reno@videolan.org>
* Aaron Holtzman <aholtzma@engr.uvic.ca>
a_i += v_i; \
A13.imag = a_i; \
}
+
/*****************************************************************************
- * ac3_srfft.c: ac3 FFT
+ * ac3_srfft.c: ac3 FFT in C
*****************************************************************************
* Copyright (C) 1999, 2000 VideoLAN
- * $Id: ac3_srfft.c,v 1.4 2001/05/14 15:58:04 reno Exp $
+ * $Id: ac3_srfft_c.c,v 1.1 2001/05/15 16:19:42 sam Exp $
*
* Authors: Renaud Dartus <reno@videolan.org>
* Aaron Holtzman <aholtzma@engr.uvic.ca>
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
*****************************************************************************/
+#define MODULE_NAME imdct
+#include "modules_inner.h"
+
+/*****************************************************************************
+ * Preamble
+ *****************************************************************************/
#include "defs.h"
#include <string.h> /* memcpy() */
#include "threads.h"
#include "mtime.h"
-#include "stream_control.h"
-#include "input_ext-dec.h"
-
-#include "ac3_decoder.h"
+#include "ac3_imdct.h"
#include "ac3_srfft.h"
static void fft_8 (complex_t *x);
static void fft_asmb(int k, complex_t *x, complex_t *wTB,
- const complex_t *d, const complex_t *d_3)
+ const complex_t *d, const complex_t *d_3)
{
register complex_t *x2k, *x3k, *x4k, *wB;
register float a_r, a_i, a1_r, a1_i, u_r, u_i, v_r, v_i;
}
-void fft_64p_c (complex_t *a)
+void _M( fft_64p ) ( complex_t *a )
{
fft_8(&a[0]); fft_4(&a[8]); fft_4(&a[12]);
fft_asmb16(&a[0], &a[8]);
}
-void fft_128p_c (complex_t *a)
+void _M( fft_128p ) ( complex_t *a )
{
fft_8(&a[0]); fft_4(&a[8]); fft_4(&a[12]);
fft_asmb16(&a[0], &a[8]);
/* fft_128(&a[0]); */
fft_asmb(16, &a[0], &a[64], &delta128[0], &delta128_3[0]);
}
+
--- /dev/null
+/*****************************************************************************
+ * ac3_srfft_sse.c: accelerated SSE ac3 fft functions
+ *****************************************************************************
+ * Copyright (C) 1999, 2000, 2001 VideoLAN
+ * $Id: ac3_srfft_sse.c,v 1.1 2001/05/15 16:19:42 sam Exp $
+ *
+ * Authors: Renaud Dartus <reno@videolan.org>
+ * Aaron Holtzman <aholtzma@engr.uvic.ca>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
+ *****************************************************************************/
+
+#define MODULE_NAME imdctsse
+#include "modules_inner.h"
+
+/*****************************************************************************
+ * Preamble
+ *****************************************************************************/
+#include <stdio.h>
+
+#include "defs.h"
+
+#include <math.h>
+#include <stdio.h>
+
+#include "config.h"
+#include "common.h"
+#include "threads.h"
+#include "mtime.h"
+
+#include "ac3_imdct.h"
+#include "ac3_srfft.h"
+
+void hsqrt2 (void);
+void C_1 (void);
+static void fft_4_sse (complex_t *x);
+static void fft_8_sse (complex_t *x);
+static void fft_asmb_sse (int k, complex_t *x, complex_t *wTB,
+ const complex_t *d, const complex_t *d_3);
+
+void _M( fft_64p ) ( complex_t *a )
+{
+ fft_8_sse(&a[0]); fft_4_sse(&a[8]); fft_4_sse(&a[12]);
+ fft_asmb_sse(2, &a[0], &a[8], &delta16[0], &delta16_3[0]);
+
+ fft_8_sse(&a[16]), fft_8_sse(&a[24]);
+ fft_asmb_sse(4, &a[0], &a[16],&delta32[0], &delta32_3[0]);
+
+ fft_8_sse(&a[32]); fft_4_sse(&a[40]); fft_4_sse(&a[44]);
+ fft_asmb_sse(2, &a[32], &a[40], &delta16[0], &delta16_3[0]);
+
+ fft_8_sse(&a[48]); fft_4_sse(&a[56]); fft_4_sse(&a[60]);
+ fft_asmb_sse(2, &a[48], &a[56], &delta16[0], &delta16_3[0]);
+
+ fft_asmb_sse(8, &a[0], &a[32],&delta64[0], &delta64_3[0]);
+}
+
+void _M( fft_128p ) ( complex_t *a )
+{
+ fft_8_sse(&a[0]); fft_4_sse(&a[8]); fft_4_sse(&a[12]);
+ fft_asmb_sse(2, &a[0], &a[8], &delta16[0], &delta16_3[0]);
+
+ fft_8_sse(&a[16]), fft_8_sse(&a[24]);
+ fft_asmb_sse(4, &a[0], &a[16],&delta32[0], &delta32_3[0]);
+
+ fft_8_sse(&a[32]); fft_4_sse(&a[40]); fft_4_sse(&a[44]);
+ fft_asmb_sse(2, &a[32], &a[40], &delta16[0], &delta16_3[0]);
+
+ fft_8_sse(&a[48]); fft_4_sse(&a[56]); fft_4_sse(&a[60]);
+ fft_asmb_sse(2, &a[48], &a[56], &delta16[0], &delta16_3[0]);
+
+ fft_asmb_sse(8, &a[0], &a[32],&delta64[0], &delta64_3[0]);
+
+ fft_8_sse(&a[64]); fft_4_sse(&a[72]); fft_4_sse(&a[76]);
+ /* fft_16(&a[64]); */
+ fft_asmb_sse(2, &a[64], &a[72], &delta16[0], &delta16_3[0]);
+
+ fft_8_sse(&a[80]); fft_8_sse(&a[88]);
+
+ /* fft_32(&a[64]); */
+ fft_asmb_sse(4, &a[64], &a[80],&delta32[0], &delta32_3[0]);
+
+ fft_8_sse(&a[96]); fft_4_sse(&a[104]), fft_4_sse(&a[108]);
+ /* fft_16(&a[96]); */
+ fft_asmb_sse(2, &a[96], &a[104], &delta16[0], &delta16_3[0]);
+
+ fft_8_sse(&a[112]), fft_8_sse(&a[120]);
+ /* fft_32(&a[96]); */
+ fft_asmb_sse(4, &a[96], &a[112], &delta32[0], &delta32_3[0]);
+
+ /* fft_128(&a[0]); */
+ fft_asmb_sse(16, &a[0], &a[64], &delta128[0], &delta128_3[0]);
+}
+
+void hsqrt2 (void)
+{
+ __asm__ (
+ ".float 0f0.707106781188\n"
+ ".float 0f0.707106781188\n"
+ ".float 0f-0.707106781188\n"
+ ".float 0f-0.707106781188\n"
+ );
+}
+
+void C_1 (void)
+{
+ __asm__ (
+ ".float 0f-1.0\n"
+ ".float 0f1.0\n"
+ ".float 0f-1.0\n"
+ ".float 0f1.0\n"
+ );
+}
+
+static void fft_4_sse (complex_t *x)
+{
+ __asm__ __volatile__ (
+ "movups (%%eax), %%xmm0\n" /* x[1] | x[0] */
+ "movups 16(%%eax), %%xmm2\n" /* x[3] | x[2] */
+ "movups %%xmm0, %%xmm1\n" /* x[1] | x[0] */
+ "addps %%xmm2, %%xmm0\n" /* x[1] + x[3] | x[0] + x[2] */
+ "subps %%xmm2, %%xmm1\n" /* x[1] - x[3] | x[0] - x[2] */
+ "xorps %%xmm6, %%xmm6\n"
+ "movhlps %%xmm1, %%xmm4\n" /* ? | x[1] - x[3] */
+ "movhlps %%xmm0, %%xmm3\n" /* ? | x[1] + x[3] */
+ "subss %%xmm4, %%xmm6\n" /* 0 | -(x[1] - x[3]).re */
+ "movlhps %%xmm1, %%xmm0\n" /* x[0] - x[2] | x[0] + x[2] */
+ "movlhps %%xmm6, %%xmm4\n" /* 0 | -(x[1] - x[3]).re | (x[1] - x[3]).im | (x[3]-x[1]).re */
+ "movups %%xmm0, %%xmm2\n" /* x[0] - x[2] | x[0] + x[2] */
+ "shufps $0x94, %%xmm4, %%xmm3\n" /* i*(x[1] - x[3]) | x[1] + x[3] */
+ "addps %%xmm3, %%xmm0\n"
+ "subps %%xmm3, %%xmm2\n"
+ "movups %%xmm0, (%%eax)\n"
+ "movups %%xmm2, 16(%%eax)\n"
+ : "=a" (x)
+ : "a" (x) );
+}
+
+static void fft_8_sse (complex_t *x)
+{
+ __asm__ __volatile__ (
+ "pushl %%ebx\n"
+
+ "movlps (%%eax), %%xmm0\n" /* x[0] */
+ "movlps 32(%%eax), %%xmm1\n" /* x[4] */
+ "movhps 16(%%eax), %%xmm0\n" /* x[2] | x[0] */
+ "movhps 48(%%eax), %%xmm1\n" /* x[6] | x[4] */
+ "movups %%xmm0, %%xmm2\n" /* x[2] | x[0] */
+ "xorps %%xmm3, %%xmm3\n"
+ "addps %%xmm1, %%xmm0\n" /* x[2] + x[6] | x[0] + x[4] */
+ "subps %%xmm1, %%xmm2\n" /* x[2] - x[6] | x[0] - x[4] */
+ "movhlps %%xmm0, %%xmm5\n" /* x[2] + x[6] */
+ "movhlps %%xmm2, %%xmm4\n" /* x[2] - x[6] */
+ "movlhps %%xmm2, %%xmm0\n" /* x[0] - x[4] | x[0] + x[4] */
+ "subss %%xmm4, %%xmm3\n" /* (x[2]-x[6]).im | -(x[2]-x[6]).re */
+ "movups %%xmm0, %%xmm7\n" /* x[0] - x[4] | x[0] + x[4] */
+ "movups %%xmm3, %%xmm4\n" /* (x[2]-x[6]).im | -(x[2]-x[6]).re */
+ "movlps 8(%%eax), %%xmm1\n" /* x[1] */
+ "shufps $0x14, %%xmm4, %%xmm5\n" /* i*(x[2] - x[6]) | x[2] + x[6] */
+
+ "addps %%xmm5, %%xmm0\n" /* yt = i*(x2-x6)+x0-x4 | x2+x6+x0+x4 */
+ "subps %%xmm5, %%xmm7\n" /* yb = i*(x6-x2)+x0-x4 | -x6-x2+x0+x4 */
+
+ "movhps 24(%%eax), %%xmm1\n" /* x[3] | x[1] */
+ "movl $hsqrt2, %%ebx\n"
+ "movlps 40(%%eax), %%xmm2\n" /* x[5] */
+ "movhps 56(%%eax), %%xmm2\n" /* x[7] | x[5] */
+ "movups %%xmm1, %%xmm3\n" /* x[3] | x[1] */
+ "addps %%xmm2, %%xmm1\n" /* x[3] + x[7] | x[1] + x[5] */
+ "subps %%xmm2, %%xmm3\n" /* x[3] - x[7] | x[1] - x[5] */
+ "movups (%%ebx), %%xmm4\n" /* -1/sqrt2 | -1/sqrt2 | 1/sqrt2 | 1/sqrt2 */
+ "movups %%xmm3, %%xmm6\n" /* x[3] - x[7] | x[1] - x[5] */
+ "mulps %%xmm4, %%xmm3\n" /* -1/s2*(x[3] - x[7]) | 1/s2*(x[1] - x[5]) */
+ "shufps $0xc8, %%xmm4, %%xmm4\n" /* -1/sqrt2 | 1/sqrt2 | -1/sqrt2 | 1/sqrt2 */
+ "shufps $0xb1, %%xmm6, %%xmm6\n" /* (x3-x7).re|(x3-x7).im|(x1-x5).re|(x1-x5).im */
+ "mulps %%xmm4, %%xmm6\n" /* (x7-x3).re/s2|(x3-x7).im/s2|(x5-x1).re/s2|(x1-x5).im/s2 */
+ "addps %%xmm3, %%xmm6\n" /* (-1-i)/sqrt2 * (x[3]-x[7]) | (1-i)/sqrt2 * (x[1] - x[5]) */
+ "movhlps %%xmm1, %%xmm5\n" /* x[3] + x[7] */
+ "movlhps %%xmm6, %%xmm1\n" /* (1+i)/sqrt2 * (x[1]-x[5]) | x[1]+x[5] */
+ "shufps $0xe4, %%xmm6, %%xmm5\n" /* (-1-i)/sqrt2 * (x[3]-x[7]) | x[3]+x[7] */
+ "movups %%xmm1, %%xmm3\n" /* (1-i)/sqrt2 * (x[1]-x[5]) | x[1]+x[5] */
+ "movl $C_1, %%ebx\n"
+ "addps %%xmm5, %%xmm1\n" /* u */
+ "subps %%xmm5, %%xmm3\n" /* v */
+ "movups %%xmm0, %%xmm2\n" /* yb */
+ "movups %%xmm7, %%xmm4\n" /* yt */
+ "movups (%%ebx), %%xmm5\n"
+ "mulps %%xmm5, %%xmm3\n"
+ "addps %%xmm1, %%xmm0\n" /* yt + u */
+ "subps %%xmm1, %%xmm2\n" /* yt - u */
+ "shufps $0xb1, %%xmm3, %%xmm3\n" /* -i * v */
+ "movups %%xmm0, (%%eax)\n"
+ "movups %%xmm2, 32(%%eax)\n"
+ "addps %%xmm3, %%xmm4\n" /* yb - i*v */
+ "subps %%xmm3, %%xmm7\n" /* yb + i*v */
+ "movups %%xmm4, 16(%%eax)\n"
+ "movups %%xmm7, 48(%%eax)\n"
+
+ "popl %%ebx\n"
+ : "=a" (x)
+ : "a" (x));
+}
+
+
+static void fft_asmb_sse (int k, complex_t *x, complex_t *wTB,
+ const complex_t *d, const complex_t *d_3)
+{
+ __asm__ __volatile__ (
+ "pushl %%ebp\n"
+ "movl %%esp, %%ebp\n"
+
+ "subl $4, %%esp\n"
+
+ "pushl %%eax\n"
+ "pushl %%ebx\n"
+ "pushl %%ecx\n"
+ "pushl %%edx\n"
+ "pushl %%esi\n"
+ "pushl %%edi\n"
+
+ "movl 8(%%ebp), %%ecx\n" /* k */
+ "movl 12(%%ebp), %%eax\n" /* x */
+ "movl %%ecx, -4(%%ebp)\n" /* k */
+ "movl 16(%%ebp), %%ebx\n" /* wT */
+ "movl 20(%%ebp), %%edx\n" /* d */
+ "movl 24(%%ebp), %%esi\n" /* d3 */
+ "shll $4, %%ecx\n" /* 16k */
+ "addl $8, %%edx\n"
+ "leal (%%eax, %%ecx, 2), %%edi\n"
+ "addl $8, %%esi\n"
+
+ /* TRANSZERO and TRANS */
+ "movups (%%eax), %%xmm0\n" /* x[1] | x[0] */
+ "movups (%%ebx), %%xmm1\n" /* wT[1] | wT[0] */
+ "movups (%%ebx, %%ecx), %%xmm2\n" /* wB[1] | wB[0] */
+ "movlps (%%edx), %%xmm3\n" /* d */
+ "movlps (%%esi), %%xmm4\n" /* d3 */
+ "movhlps %%xmm1, %%xmm5\n" /* wT[1] */
+ "movhlps %%xmm2, %%xmm6\n" /* wB[1] */
+ "shufps $0x50, %%xmm3, %%xmm3\n" /* d[1].im | d[1].im | d[1].re | d[1].re */
+ "shufps $0x50, %%xmm4, %%xmm4\n" /* d3[1].im | d3[1].im | d3[i].re | d3[i].re */
+ "movlhps %%xmm5, %%xmm5\n" /* wT[1] | wT[1] */
+ "movlhps %%xmm6, %%xmm6\n" /* wB[1] | wB[1] */
+ "mulps %%xmm3, %%xmm5\n"
+ "mulps %%xmm4, %%xmm6\n"
+ "movhlps %%xmm5, %%xmm7\n" /* wT[1].im * d[1].im | wT[1].re * d[1].im */
+ "movlhps %%xmm6, %%xmm5\n" /* wB[1].im * d3[1].re | wB[1].re * d3[1].re | wT[1].im * d[1].re | wT[1].re * d[1].re */
+ "shufps $0xb1, %%xmm6, %%xmm7\n" /* wB[1].re * d3[1].im | wB[i].im * d3[1].im | wT[1].re * d[1].im | wT[1].im * d[1].im */
+ "movl $C_1, %%edi\n"
+ "movups (%%edi), %%xmm4\n"
+ "mulps %%xmm4, %%xmm7\n"
+ "addps %%xmm7, %%xmm5\n" /* wB[1] * d3[1] | wT[1] * d[1] */
+ "movlhps %%xmm5, %%xmm1\n" /* d[1] * wT[1] | wT[0] */
+ "shufps $0xe4, %%xmm5, %%xmm2\n" /* d3[1] * wB[1] | wB[0] */
+ "movups %%xmm1, %%xmm3\n" /* d[1] * wT[1] | wT[0] */
+ "leal (%%eax, %%ecx, 2), %%edi\n"
+ "addps %%xmm2, %%xmm1\n" /* u */
+ "subps %%xmm2, %%xmm3\n" /* v */
+ "mulps %%xmm4, %%xmm3\n"
+ "movups (%%eax, %%ecx), %%xmm5\n" /* xk[1] | xk[0] */
+ "shufps $0xb1, %%xmm3, %%xmm3\n" /* -i * v */
+ "movups %%xmm0, %%xmm2\n" /* x[1] | x[0] */
+ "movups %%xmm5, %%xmm6\n" /* xk[1] | xk[0] */
+ "addps %%xmm1, %%xmm0\n"
+ "subps %%xmm1, %%xmm2\n"
+ "addps %%xmm3, %%xmm5\n"
+ "subps %%xmm3, %%xmm6\n"
+ "movups %%xmm0, (%%eax)\n"
+ "movups %%xmm2, (%%edi)\n"
+ "movups %%xmm5, (%%eax, %%ecx)\n"
+ "movups %%xmm6, (%%edi, %%ecx)\n"
+ "addl $16, %%eax\n"
+ "addl $16, %%ebx\n"
+ "addl $8, %%edx\n"
+ "addl $8, %%esi\n"
+ "decl -4(%%ebp)\n"
+
+".loop:\n"
+ "movups (%%ebx), %%xmm0\n" /* wT[1] | wT[0] */
+ "movups (%%edx), %%xmm1\n" /* d[1] | d[0] */
+
+ "movups (%%ebx, %%ecx), %%xmm4\n" /* wB[1] | wB[0] */
+ "movups (%%esi), %%xmm5\n" /* d3[1] | d3[0] */
+
+ "movhlps %%xmm0, %%xmm2\n" /* wT[1] */
+ "movhlps %%xmm1, %%xmm3\n" /* d[1] */
+
+ "movhlps %%xmm4, %%xmm6\n" /* wB[1] */
+ "movhlps %%xmm5, %%xmm7\n" /* d3[1] */
+
+ "shufps $0x50, %%xmm1, %%xmm1\n" /* d[0].im | d[0].im | d[0].re | d[0].re */
+ "shufps $0x50, %%xmm3, %%xmm3\n" /* d[1].im | d[1].im | d[1].re | d[1].re */
+
+ "movlhps %%xmm0, %%xmm0\n" /* wT[0] | wT[0] */
+ "shufps $0x50, %%xmm5, %%xmm5\n" /* d3[0].im | d3[0].im | d3[0].re | d3[0].re */
+ "movlhps %%xmm2, %%xmm2\n" /* wT[1] | wT[1] */
+ "shufps $0x50, %%xmm7, %%xmm7\n" /* d3[1].im | d3[1].im | d3[1].re | d3[1].re */
+
+ "mulps %%xmm1, %%xmm0\n" /* d[0].im * wT[0].im | d[0].im * wT[0].re | d[0].re * wT[0].im | d[0].re * wT[0].re */
+ "mulps %%xmm3, %%xmm2\n" /* d[1].im * wT[1].im | d[1].im * wT[1].re | d[1].re * wT[1].im | d[1].re * wT[1].re */
+ "movlhps %%xmm4, %%xmm4\n" /* wB[0] | wB[0] */
+ "movlhps %%xmm6, %%xmm6\n" /* wB[1] | wB[1] */
+
+ "movhlps %%xmm0, %%xmm1\n" /* d[0].im * wT[0].im | d[0].im * wT[0].re */
+ "movlhps %%xmm2, %%xmm0\n" /* d[1].re * wT[1].im | d[1].re * wT[1].re | d[0].re * wT[0].im | d[0].re * wT[0].re */
+ "mulps %%xmm5, %%xmm4\n" /* wB[0].im * d3[0].im | wB[0].re * d3[0].im | wB[0].im * d3[0].re | wB[0].re * d3[0].re */
+ "mulps %%xmm7, %%xmm6\n" /* wB[1].im * d3[1].im | wB[1].re * d3[1].im | wB[1].im * d3[1].re | wB[1].re * d3[1].re */
+ "shufps $0xb1, %%xmm2, %%xmm1\n" /* d[1].im * wT[1].re | d[1].im * wT[1].im | d[0].im * wT[0].re | d[0].im * wT[0].im */
+ "movl $C_1, %%edi\n"
+ "movups (%%edi), %%xmm3\n" /* 1.0 | -1.0 | 1.0 | -1.0 */
+
+ "movhlps %%xmm4, %%xmm5\n" /* wB[0].im * d3[0].im | wB[0].re * d3[0].im */
+ "mulps %%xmm3, %%xmm1\n" /* d[1].im * wT[1].re | -d[1].im * wT[1].im | d[0].im * wT[0].re | -d[0].im * wT[0].im */
+ "movlhps %%xmm6, %%xmm4\n" /* wB[1].im * d3[1].re | wB[1].re * d3[1].re | wB[0].im * d3[0].re | wB[0].im * d3[0].re */
+ "addps %%xmm1, %%xmm0\n" /* wT[1] * d[1] | wT[0] * d[0] */
+
+ "shufps $0xb1, %%xmm6, %%xmm5\n" /* wB[1].re * d3[1].im | wB[1].im * d3[1].im | wB[0].re * d3[0].im | wB[0].im * d3[0].im */
+ "mulps %%xmm3, %%xmm5\n" /* wB[1].re * d3[1].im | -wB[1].im * d3[1].im | wB[0].re * d3[0].im | -wB[0].im * d3[0].im */
+ "addps %%xmm5, %%xmm4\n" /* wB[1] * d3[1] | wB[0] * d3[0] */
+
+ "movups %%xmm0, %%xmm1\n" /* wT[1] * d[1] | wT[0] * d[0] */
+ "addps %%xmm4, %%xmm0\n" /* u */
+ "subps %%xmm4, %%xmm1\n" /* v */
+ "movups (%%eax), %%xmm6\n" /* x[1] | x[0] */
+ "leal (%%eax, %%ecx, 2), %%edi\n"
+ "mulps %%xmm3, %%xmm1\n"
+ "addl $16, %%ebx\n"
+ "addl $16, %%esi\n"
+ "shufps $0xb1, %%xmm1, %%xmm1\n" /* -i * v */
+ "movups (%%eax, %%ecx), %%xmm7\n" /* xk[1] | xk[0] */
+ "movups %%xmm6, %%xmm2\n"
+ "movups %%xmm7, %%xmm4\n"
+ "addps %%xmm0, %%xmm6\n"
+ "subps %%xmm0, %%xmm2\n"
+ "movups %%xmm6, (%%eax)\n"
+ "movups %%xmm2, (%%edi)\n"
+ "addps %%xmm1, %%xmm7\n"
+ "subps %%xmm1, %%xmm4\n"
+ "addl $16, %%edx\n"
+ "movups %%xmm7, (%%eax, %%ecx)\n"
+ "movups %%xmm4, (%%edi, %%ecx)\n"
+
+ "addl $16, %%eax\n"
+ "decl -4(%%ebp)\n"
+ "jnz .loop\n"
+
+".end:\n"
+ "popl %%edi\n"
+ "popl %%esi\n"
+ "popl %%edx\n"
+ "popl %%ecx\n"
+ "popl %%ebx\n"
+ "popl %%eax\n"
+
+ "addl $4, %%esp\n"
+
+ "leave\n"
+ ::);
+}
+
--- /dev/null
+/*****************************************************************************
+ * imdct.c : IMDCT module
+ *****************************************************************************
+ * Copyright (C) 1999, 2000 VideoLAN
+ * $Id: imdct.c,v 1.1 2001/05/15 16:19:42 sam Exp $
+ *
+ * Authors: Gaël Hendryckx <jimmy@via.ecp.fr>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
+ *****************************************************************************/
+
+#define MODULE_NAME imdct
+#include "modules_inner.h"
+
+/*****************************************************************************
+ * Preamble
+ *****************************************************************************/
+#include "defs.h"
+
+#include <stdlib.h>
+
+#include "config.h"
+#include "common.h"
+#include "threads.h"
+#include "mtime.h"
+#include "tests.h"
+
+#include "ac3_imdct.h"
+#include "ac3_imdct_common.h"
+
+#include "modules.h"
+
+/*****************************************************************************
+ * Local and extern prototypes.
+ *****************************************************************************/
+static void imdct_getfunctions( function_list_t * p_function_list );
+static int imdct_Probe ( probedata_t *p_data );
+
+/*****************************************************************************
+ * Build configuration tree.
+ *****************************************************************************/
+MODULE_CONFIG_START
+ADD_WINDOW( "Configuration for IMDCT module" )
+ ADD_COMMENT( "Ha, ha -- nothing to configure yet" )
+MODULE_CONFIG_END
+
+/*****************************************************************************
+ * InitModule: get the module structure and configuration.
+ *****************************************************************************
+ * We have to fill psz_name, psz_longname and psz_version. These variables
+ * will be strdup()ed later by the main application because the module can
+ * be unloaded later to save memory, and we want to be able to access this
+ * data even after the module has been unloaded.
+ *****************************************************************************/
+MODULE_INIT
+{
+ p_module->psz_name = MODULE_STRING;
+ p_module->psz_longname = "AC3 IMDCT module";
+ p_module->psz_version = VERSION;
+
+ p_module->i_capabilities = MODULE_CAPABILITY_NULL
+ | MODULE_CAPABILITY_IMDCT;
+
+ return( 0 );
+}
+
+/*****************************************************************************
+ * ActivateModule: set the module to an usable state.
+ *****************************************************************************
+ * This function fills the capability functions and the configuration
+ * structure. Once ActivateModule() has been called, the i_usage can
+ * be set to 0 and calls to NeedModule() be made to increment it. To unload
+ * the module, one has to wait until i_usage == 0 and call DeactivateModule().
+ *****************************************************************************/
+MODULE_ACTIVATE
+{
+ p_module->p_functions = malloc( sizeof( module_functions_t ) );
+ if( p_module->p_functions == NULL )
+ {
+ return( -1 );
+ }
+
+ imdct_getfunctions( &p_module->p_functions->imdct );
+
+ p_module->p_config = p_config;
+
+ return( 0 );
+}
+
+/*****************************************************************************
+ * DeactivateModule: make sure the module can be unloaded.
+ *****************************************************************************
+ * This function must only be called when i_usage == 0. If it successfully
+ * returns, i_usage can be set to -1 and the module unloaded. Be careful to
+ * lock usage_lock during the whole process.
+ *****************************************************************************/
+MODULE_DEACTIVATE
+{
+ free( p_module->p_functions );
+
+ return( 0 );
+}
+
+/* Following functions are local */
+
+/*****************************************************************************
+ * Functions exported as capabilities. They are declared as static so that
+ * we don't pollute the namespace too much.
+ *****************************************************************************/
+static void imdct_getfunctions( function_list_t * p_function_list )
+{
+ p_function_list->pf_probe = imdct_Probe;
+#define F p_function_list->functions.imdct
+ F.pf_imdct_init = _M( imdct_init );
+ F.pf_imdct_256 = _M( imdct_do_256 );
+ F.pf_imdct_256_nol = _M( imdct_do_256_nol );
+ F.pf_imdct_512 = _M( imdct_do_512 );
+ F.pf_imdct_512_nol = _M( imdct_do_512_nol );
+#undef F
+}
+
+/*****************************************************************************
+ * imdct_Probe: returns a preference score
+ *****************************************************************************/
+static int imdct_Probe( probedata_t *p_data )
+{
+ if( TestMethod( IMDCT_METHOD_VAR, "imdct" ) )
+ {
+ return( 999 );
+ }
+
+ /* This plugin always works */
+ return( 50 );
+}
+
--- /dev/null
+/*****************************************************************************
+ * imdctsse.c : accelerated SSE IMDCT module
+ *****************************************************************************
+ * Copyright (C) 1999, 2000 VideoLAN
+ * $Id: imdctsse.c,v 1.1 2001/05/15 16:19:42 sam Exp $
+ *
+ * Authors: Gaël Hendryckx <jimmy@via.ecp.fr>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
+ *****************************************************************************/
+
+#define MODULE_NAME imdctsse
+#include "modules_inner.h"
+
+/*****************************************************************************
+ * Preamble
+ *****************************************************************************/
+#include "defs.h"
+
+#include <stdlib.h>
+
+#include "config.h"
+#include "common.h"
+#include "threads.h"
+#include "mtime.h"
+#include "tests.h"
+
+#include "ac3_imdct.h"
+#include "ac3_imdct_common.h"
+
+#include "modules.h"
+
+/*****************************************************************************
+ * Local and extern prototypes.
+ *****************************************************************************/
+static void imdct_getfunctions( function_list_t * p_function_list );
+static int imdct_Probe ( probedata_t *p_data );
+
+/*****************************************************************************
+ * Build configuration tree.
+ *****************************************************************************/
+MODULE_CONFIG_START
+ADD_WINDOW( "Configuration for IMDCT module" )
+ ADD_COMMENT( "Ha, ha -- nothing to configure yet" )
+MODULE_CONFIG_END
+
+/*****************************************************************************
+ * InitModule: get the module structure and configuration.
+ *****************************************************************************
+ * We have to fill psz_name, psz_longname and psz_version. These variables
+ * will be strdup()ed later by the main application because the module can
+ * be unloaded later to save memory, and we want to be able to access this
+ * data even after the module has been unloaded.
+ *****************************************************************************/
+MODULE_INIT
+{
+ p_module->psz_name = MODULE_STRING;
+ p_module->psz_longname = "AC3 IMDCT module";
+ p_module->psz_version = VERSION;
+
+ p_module->i_capabilities = MODULE_CAPABILITY_NULL
+ | MODULE_CAPABILITY_IMDCT;
+
+ return( 0 );
+}
+
+/*****************************************************************************
+ * ActivateModule: set the module to an usable state.
+ *****************************************************************************
+ * This function fills the capability functions and the configuration
+ * structure. Once ActivateModule() has been called, the i_usage can
+ * be set to 0 and calls to NeedModule() be made to increment it. To unload
+ * the module, one has to wait until i_usage == 0 and call DeactivateModule().
+ *****************************************************************************/
+MODULE_ACTIVATE
+{
+ p_module->p_functions = malloc( sizeof( module_functions_t ) );
+ if( p_module->p_functions == NULL )
+ {
+ return( -1 );
+ }
+
+ imdct_getfunctions( &p_module->p_functions->imdct );
+
+ p_module->p_config = p_config;
+
+ return( 0 );
+}
+
+/*****************************************************************************
+ * DeactivateModule: make sure the module can be unloaded.
+ *****************************************************************************
+ * This function must only be called when i_usage == 0. If it successfully
+ * returns, i_usage can be set to -1 and the module unloaded. Be careful to
+ * lock usage_lock during the whole process.
+ *****************************************************************************/
+MODULE_DEACTIVATE
+{
+ free( p_module->p_functions );
+
+ return( 0 );
+}
+
+/* Following functions are local */
+
+/*****************************************************************************
+ * Functions exported as capabilities. They are declared as static so that
+ * we don't pollute the namespace too much.
+ *****************************************************************************/
+static void imdct_getfunctions( function_list_t * p_function_list )
+{
+ p_function_list->pf_probe = imdct_Probe;
+#define F p_function_list->functions.imdct
+ F.pf_imdct_init = _M( imdct_init );
+ F.pf_imdct_256 = _M( imdct_do_256 );
+ F.pf_imdct_256_nol = _M( imdct_do_256_nol );
+ F.pf_imdct_512 = _M( imdct_do_512 );
+ F.pf_imdct_512_nol = _M( imdct_do_512_nol );
+#undef F
+}
+
+/*****************************************************************************
+ * imdct_Probe: returns a preference score
+ *****************************************************************************/
+static int imdct_Probe( probedata_t *p_data )
+{
+ if( !TestCPU( CPU_CAPABILITY_SSE ) )
+ {
+ return( 0 );
+ }
+
+ if( TestMethod( IDCT_METHOD_VAR, "imdctsse" ) )
+ {
+ return( 999 );
+ }
+
+ /* This plugin always works */
+ return( 200 );
+}
+
* motionmmx.c : MMX motion compensation module for vlc
*****************************************************************************
* Copyright (C) 2000 VideoLAN
- * $Id: motionmmx.c,v 1.4 2001/04/15 04:19:57 sam Exp $
+ * $Id: motionmmx.c,v 1.5 2001/05/15 16:19:42 sam Exp $
*
* Authors: Christophe Massiot <massiot@via.ecp.fr>
*
*****************************************************************************/
int _M( motion_Probe )( probedata_t *p_data )
{
- if( TestCPU( CPU_CAPABILITY_MMX ) )
+ if( !TestCPU( CPU_CAPABILITY_MMX ) )
{
- if( TestMethod( MOTION_METHOD_VAR, "motionmmx" ) )
- {
- return( 999 );
- }
- else
- {
- return( 150 );
- }
+ return( 0 );
}
- else
+
+ if( TestMethod( MOTION_METHOD_VAR, "motionmmx" ) )
{
- return( 0 );
+ return( 999 );
}
+
+ return( 150 );
}
* motionmmxext.c : MMX EXT motion compensation module for vlc
*****************************************************************************
* Copyright (C) 2000 VideoLAN
- * $Id: motionmmxext.c,v 1.4 2001/04/15 04:19:57 sam Exp $
+ * $Id: motionmmxext.c,v 1.5 2001/05/15 16:19:42 sam Exp $
*
* Authors: Christophe Massiot <massiot@via.ecp.fr>
*
*****************************************************************************/
int _M( motion_Probe )( probedata_t *p_data )
{
- if( TestCPU( CPU_CAPABILITY_MMXEXT ) )
+ if( !TestCPU( CPU_CAPABILITY_MMXEXT ) )
{
- if( TestMethod( MOTION_METHOD_VAR, "motionmmxext" ) )
- {
- return( 999 );
- }
- else
- {
- return( 200 );
- }
+ return( 0 );
}
- else
+
+ if( TestMethod( MOTION_METHOD_VAR, "motionmmxext" ) )
{
- return( 0 );
+ return( 999 );
}
+
+ return( 200 );
}
* Provides functions to perform the YUV conversion.
*****************************************************************************
* Copyright (C) 1999, 2000 VideoLAN
- * $Id: video_yuvmmx.c,v 1.8 2001/04/15 04:19:58 sam Exp $
+ * $Id: video_yuvmmx.c,v 1.9 2001/05/15 16:19:42 sam Exp $
*
* Authors: Samuel Hocevar <sam@zoy.org>
*
static int yuv_Probe( probedata_t *p_data )
{
/* Test for MMX support in the CPU */
- if( TestCPU( CPU_CAPABILITY_MMX ) )
+ if( !TestCPU( CPU_CAPABILITY_MMX ) )
{
- if( TestMethod( YUV_METHOD_VAR, "yuvmmx" ) )
- {
- return( 999 );
- }
- else
- {
- return( 100 );
- }
+ return( 0 );
}
- else
+
+ if( TestMethod( YUV_METHOD_VAR, "yuvmmx" ) )
{
- return( 0 );
+ return( 999 );
}
+
+ return( 100 );
}
/*****************************************************************************
* ac3_bit_allocate.c: ac3 allocation tables
*****************************************************************************
* Copyright (C) 2000 VideoLAN
- * $Id: ac3_bit_allocate.c,v 1.21 2001/05/14 15:58:03 reno Exp $
+ * $Id: ac3_bit_allocate.c,v 1.22 2001/05/15 16:19:42 sam Exp $
*
* Authors: Michel Kaempf <maxx@via.ecp.fr>
* Aaron Holtzman <aholtzma@engr.uvic.ca>
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
*****************************************************************************/
+
+/*****************************************************************************
+ * Preamble
+ *****************************************************************************/
#include "defs.h"
#include <string.h> /* memcpy() */
#include "threads.h"
#include "mtime.h"
-#include "intf_msg.h" /* intf_DbgMsg(), intf_ErrMsg() */
-
#include "stream_control.h"
#include "input_ext-dec.h"
+#include "ac3_imdct.h"
+#include "ac3_downmix.h"
#include "ac3_decoder.h"
+
#include "ac3_internal.h" /* DELTA_BIT_REUSE */
* ac3_decoder.c: core ac3 decoder
*****************************************************************************
* Copyright (C) 1999, 2000 VideoLAN
- * $Id: ac3_decoder.c,v 1.33 2001/05/14 15:58:03 reno Exp $
+ * $Id: ac3_decoder.c,v 1.34 2001/05/15 16:19:42 sam Exp $
*
* Authors: Michel Kaempf <maxx@via.ecp.fr>
* Michel Lespinasse <walken@zoy.org>
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
*****************************************************************************/
+/*****************************************************************************
+ * Preamble
+ *****************************************************************************/
#include "defs.h"
#include <string.h> /* memcpy() */
#include "audio_output.h"
+#include "ac3_imdct.h"
+#include "ac3_downmix.h"
#include "ac3_decoder.h"
#include "ac3_decoder_thread.h" /* ac3dec_thread_t */
+
#include "ac3_internal.h"
static const float cmixlev_lut[4] = { 0.707, 0.595, 0.500, 0.707 };
{
p_ac3dec->mantissa.lfsr_state = 1; /* dither_gen initialization */
imdct_init(&p_ac3dec->imdct);
- downmix_init(&p_ac3dec->downmix);
return 0;
}
int ac3_decode_frame (ac3dec_t * p_ac3dec, s16 * buffer)
{
int i;
- ac3dec_thread_t * p_ac3dec_t = (ac3dec_thread_t *) p_ac3dec->bit_stream.p_callback_arg;
+ ac3dec_thread_t * p_ac3thread = (ac3dec_thread_t *) p_ac3dec->bit_stream.p_callback_arg;
if (parse_bsi (p_ac3dec))
{
return 1;
}
- /* compute downmix parameters
- * downmix to tow channels for now */
- p_ac3dec->dm_par.clev = 0.0;
+ /* compute downmix parameters
+ * downmix to tow channels for now */
+ p_ac3dec->dm_par.clev = 0.0;
p_ac3dec->dm_par.slev = 0.0;
p_ac3dec->dm_par.unit = 1.0;
- if (p_ac3dec->bsi.acmod & 0x1) /* have center */
- p_ac3dec->dm_par.clev = cmixlev_lut[p_ac3dec->bsi.cmixlev];
+ if (p_ac3dec->bsi.acmod & 0x1) /* have center */
+ p_ac3dec->dm_par.clev = cmixlev_lut[p_ac3dec->bsi.cmixlev];
- if (p_ac3dec->bsi.acmod & 0x4) /* have surround channels */
- p_ac3dec->dm_par.slev = smixlev_lut[p_ac3dec->bsi.surmixlev];
+ if (p_ac3dec->bsi.acmod & 0x4) /* have surround channels */
+ p_ac3dec->dm_par.slev = smixlev_lut[p_ac3dec->bsi.surmixlev];
p_ac3dec->dm_par.unit /= 1.0 + p_ac3dec->dm_par.clev + p_ac3dec->dm_par.slev;
- p_ac3dec->dm_par.clev *= p_ac3dec->dm_par.unit;
- p_ac3dec->dm_par.slev *= p_ac3dec->dm_par.unit;
+ p_ac3dec->dm_par.clev *= p_ac3dec->dm_par.unit;
+ p_ac3dec->dm_par.slev *= p_ac3dec->dm_par.unit;
for (i = 0; i < 6; i++) {
/* Initialize freq/time sample storage */
(p_ac3dec->bsi.nfchans + p_ac3dec->bsi.lfeon));
- if ((p_ac3dec_t->p_fifo->b_die) && (p_ac3dec_t->p_fifo->b_error))
+ if( p_ac3thread->p_fifo->b_die || p_ac3thread->p_fifo->b_error )
{
return 1;
}
- if (parse_audblk (p_ac3dec, i))
+ if( parse_audblk( p_ac3dec, i ) )
{
- intf_WarnMsg (3,"ac3dec warn: error during audioblock");
- parse_auxdata (p_ac3dec);
+ intf_WarnMsg( 3, "ac3dec warning: error during audioblock" );
+ parse_auxdata( p_ac3dec );
return 1;
}
- if ((p_ac3dec_t->p_fifo->b_die) && (p_ac3dec_t->p_fifo->b_error))
+ if( p_ac3thread->p_fifo->b_die || p_ac3thread->p_fifo->b_error )
{
return 1;
}
- if (exponent_unpack (p_ac3dec))
+ if( exponent_unpack( p_ac3dec ) )
{
- intf_WarnMsg (3,"ac3dec warn: error during unpack");
- parse_auxdata (p_ac3dec);
+ intf_WarnMsg( 3, "ac3dec warning: error during unpack" );
+ parse_auxdata( p_ac3dec );
return 1;
}
+
bit_allocate (p_ac3dec);
mantissa_unpack (p_ac3dec);
- if ((p_ac3dec_t->p_fifo->b_die) && (p_ac3dec_t->p_fifo->b_error))
+ if( p_ac3thread->p_fifo->b_die || p_ac3thread->p_fifo->b_error )
{
return 1;
}
if (p_ac3dec->bsi.acmod == 0x2)
+ {
rematrix (p_ac3dec);
+ }
+
imdct (p_ac3dec, buffer);
- buffer += 2*256;
+ buffer += 2 * 256;
}
parse_auxdata (p_ac3dec);
return 0;
}
+
* ac3_decoder.h : ac3 decoder interface
*****************************************************************************
* Copyright (C) 1999, 2000 VideoLAN
- * $Id: ac3_decoder.h,v 1.8 2001/05/14 15:58:03 reno Exp $
+ * $Id: ac3_decoder.h,v 1.9 2001/05/15 16:19:42 sam Exp $
*
* Authors: Michel Kaempf <maxx@via.ecp.fr>
* Renaud Dartus <reno@videolan.org>
u16 lfsr_state;
} mantissa_t;
-typedef struct complex_s {
- float real;
- float imag;
-} complex_t;
-
-#define N 512
-
-typedef struct imdct_s
-{
- complex_t buf[N/4];
-
- /* Delay buffer for time domain interleaving */
- float delay[6][256];
- float delay1[6][256];
-
- /* Twiddle factors for IMDCT */
- float xcos1[N/4];
- float xsin1[N/4];
- float xcos2[N/8];
- float xsin2[N/8];
-
- /* Twiddle factor LUT */
- complex_t *w[7];
- complex_t w_1[1];
- complex_t w_2[2];
- complex_t w_4[4];
- complex_t w_8[8];
- complex_t w_16[16];
- complex_t w_32[32];
- complex_t w_64[64];
-
- float xcos_sin_sse[128 * 4] __attribute__((aligned(16)));
-
- /* Functions */
- void (*fft_64p) (complex_t *a);
-
- void (*imdct_do_512)(struct imdct_s * p_imdct, float data[], float delay[]);
- void (*imdct_do_512_nol)(struct imdct_s * p_imdct, float data[], float delay[]);
-
-} imdct_t;
-
-typedef struct dm_par_s {
- float unit;
- float clev;
- float slev;
-} dm_par_t;
-
-typedef struct downmix_s {
- void (*downmix_3f_2r_to_2ch)(float *samples, dm_par_t * dm_par);
- void (*downmix_3f_1r_to_2ch)(float *samples, dm_par_t * dm_par);
- void (*downmix_2f_2r_to_2ch)(float *samples, dm_par_t * dm_par);
- void (*downmix_2f_1r_to_2ch)(float *samples, dm_par_t * dm_par);
- void (*downmix_3f_0r_to_2ch)(float *samples, dm_par_t * dm_par);
- void (*stream_sample_2ch_to_s16)(s16 *s16_samples, float *left, float *right);
- void (*stream_sample_1ch_to_s16)(s16 *s16_samples, float *center);
-} downmix_t;
-
struct ac3dec_s
{
/*
downmix_t downmix;
};
+
* ac3_decoder_thread.c: ac3 decoder thread
*****************************************************************************
* Copyright (C) 1999, 2000 VideoLAN
- * $Id: ac3_decoder_thread.c,v 1.32 2001/05/06 04:32:02 sam Exp $
+ * $Id: ac3_decoder_thread.c,v 1.33 2001/05/15 16:19:42 sam Exp $
*
* Authors: Michel Lespinasse <walken@zoy.org>
*
#include <unistd.h> /* getpid() */
-#include <stdio.h> /* "intf_msg.h" */
#include <stdlib.h> /* malloc(), free() */
#include <string.h> /* memset() */
#include "common.h"
#include "threads.h"
#include "mtime.h"
+#include "modules.h"
#include "intf_msg.h" /* intf_DbgMsg(), intf_ErrMsg() */
#include "audio_output.h"
+#include "ac3_imdct.h"
+#include "ac3_downmix.h"
#include "ac3_decoder.h"
#include "ac3_decoder_thread.h"
static void BitstreamCallback ( bit_stream_t *p_bit_stream,
boolean_t b_new_pes );
-
/*****************************************************************************
* ac3dec_CreateThread: creates an ac3 decoder thread
*****************************************************************************/
vlc_thread_t ac3dec_CreateThread( adec_config_t * p_config )
{
- ac3dec_thread_t * p_ac3dec_t;
+ ac3dec_thread_t * p_ac3thread;
intf_DbgMsg( "ac3dec debug: creating ac3 decoder thread" );
/* Allocate the memory needed to store the thread's structure */
- if((p_ac3dec_t = (ac3dec_thread_t *)malloc(sizeof(ac3dec_thread_t)))==NULL)
+ if((p_ac3thread = (ac3dec_thread_t *)malloc(sizeof(ac3dec_thread_t)))==NULL)
{
intf_ErrMsg ( "ac3dec error: not enough memory "
"for ac3dec_CreateThread() to create the new thread");
/*
* Initialize the thread properties
*/
- p_ac3dec_t->p_config = p_config;
- p_ac3dec_t->p_fifo = p_config->decoder_config.p_decoder_fifo;
+ p_ac3thread->p_config = p_config;
+ p_ac3thread->p_fifo = p_config->decoder_config.p_decoder_fifo;
+
+ /*
+ * Choose the best downmix module
+ */
+#define DOWNMIX p_ac3thread->ac3_decoder.downmix
+ DOWNMIX.p_module = module_Need( MODULE_CAPABILITY_DOWNMIX, NULL );
+
+ if( DOWNMIX.p_module == NULL )
+ {
+ intf_ErrMsg( "ac3dec error: no suitable downmix module" );
+ free( p_ac3thread );
+ return( 0 );
+ }
+
+#define F DOWNMIX.p_module->p_functions->downmix.functions.downmix
+ DOWNMIX.pf_downmix_3f_2r_to_2ch = F.pf_downmix_3f_2r_to_2ch;
+ DOWNMIX.pf_downmix_2f_2r_to_2ch = F.pf_downmix_2f_2r_to_2ch;
+ DOWNMIX.pf_downmix_3f_1r_to_2ch = F.pf_downmix_3f_1r_to_2ch;
+ DOWNMIX.pf_downmix_2f_1r_to_2ch = F.pf_downmix_2f_1r_to_2ch;
+ DOWNMIX.pf_downmix_3f_0r_to_2ch = F.pf_downmix_3f_0r_to_2ch;
+ DOWNMIX.pf_stream_sample_2ch_to_s16 = F.pf_stream_sample_2ch_to_s16;
+ DOWNMIX.pf_stream_sample_1ch_to_s16 = F.pf_stream_sample_1ch_to_s16;
+#undef F
+#undef DOWNMIX
+
+ /*
+ * Choose the best IMDCT module
+ */
+#define IMDCT p_ac3thread->ac3_decoder.imdct
+ IMDCT.p_module = module_Need( MODULE_CAPABILITY_IMDCT, NULL );
+
+ if( IMDCT.p_module == NULL )
+ {
+ intf_ErrMsg( "ac3dec error: no suitable IMDCT module" );
+ module_Unneed( p_ac3thread->ac3_decoder.downmix.p_module );
+ free( p_ac3thread );
+ return( 0 );
+ }
+
+#define F IMDCT.p_module->p_functions->imdct.functions.imdct
+ IMDCT.pf_imdct_init = F.pf_imdct_init;
+ IMDCT.pf_imdct_256 = F.pf_imdct_256;
+ IMDCT.pf_imdct_256_nol = F.pf_imdct_256_nol;
+ IMDCT.pf_imdct_512 = F.pf_imdct_512;
+ IMDCT.pf_imdct_512_nol = F.pf_imdct_512_nol;
+#undef F
+#undef IMDCT
/* Initialize the ac3 decoder structures */
- ac3_init (&p_ac3dec_t->ac3_decoder);
+ ac3_init (&p_ac3thread->ac3_decoder);
/*
* Initialize the output properties
*/
- p_ac3dec_t->p_aout_fifo = NULL;
+ p_ac3thread->p_aout_fifo = NULL;
/* Spawn the ac3 decoder thread */
- if (vlc_thread_create(&p_ac3dec_t->thread_id, "ac3 decoder",
- (vlc_thread_func_t)RunThread, (void *)p_ac3dec_t))
+ if (vlc_thread_create(&p_ac3thread->thread_id, "ac3 decoder",
+ (vlc_thread_func_t)RunThread, (void *)p_ac3thread))
{
intf_ErrMsg( "ac3dec error: can't spawn ac3 decoder thread" );
- free (p_ac3dec_t);
+ module_Unneed( p_ac3thread->ac3_decoder.downmix.p_module );
+ module_Unneed( p_ac3thread->ac3_decoder.imdct.p_module );
+ free (p_ac3thread);
return 0;
}
- intf_DbgMsg ("ac3dec debug: ac3 decoder thread (%p) created", p_ac3dec_t);
- return p_ac3dec_t->thread_id;
+ intf_DbgMsg ("ac3dec debug: ac3 decoder thread (%p) created", p_ac3thread);
+ return p_ac3thread->thread_id;
}
/* Following functions are local */
/*****************************************************************************
* InitThread : initialize an ac3 decoder thread
*****************************************************************************/
-static int InitThread (ac3dec_thread_t * p_ac3dec_t)
+static int InitThread (ac3dec_thread_t * p_ac3thread)
{
- intf_DbgMsg("ac3dec debug: initializing ac3 decoder thread %p",p_ac3dec_t);
+ intf_DbgMsg("ac3dec debug: initializing ac3 decoder thread %p",p_ac3thread);
- p_ac3dec_t->p_config->decoder_config.pf_init_bit_stream(
- &p_ac3dec_t->ac3_decoder.bit_stream,
- p_ac3dec_t->p_config->decoder_config.p_decoder_fifo,
- BitstreamCallback, (void *) p_ac3dec_t );
+ p_ac3thread->p_config->decoder_config.pf_init_bit_stream(
+ &p_ac3thread->ac3_decoder.bit_stream,
+ p_ac3thread->p_config->decoder_config.p_decoder_fifo,
+ BitstreamCallback, (void *) p_ac3thread );
/* Creating the audio output fifo */
- p_ac3dec_t->p_aout_fifo = aout_CreateFifo( AOUT_ADEC_STEREO_FIFO, 2, 0, 0,
+ p_ac3thread->p_aout_fifo = aout_CreateFifo( AOUT_ADEC_STEREO_FIFO, 2, 0, 0,
AC3DEC_FRAME_SIZE, NULL );
- if ( p_ac3dec_t->p_aout_fifo == NULL )
+ if ( p_ac3thread->p_aout_fifo == NULL )
{
return -1;
}
- intf_DbgMsg("ac3dec debug: ac3 decoder thread %p initialized", p_ac3dec_t);
+ intf_DbgMsg("ac3dec debug: ac3 decoder thread %p initialized", p_ac3thread);
return 0;
}
/*****************************************************************************
* RunThread : ac3 decoder thread
*****************************************************************************/
-static void RunThread (ac3dec_thread_t * p_ac3dec_t)
+static void RunThread (ac3dec_thread_t * p_ac3thread)
{
int sync;
- intf_DbgMsg ("ac3dec debug: running ac3 decoder thread (%p) (pid == %i)", p_ac3dec_t, getpid());
+ intf_DbgMsg ("ac3dec debug: running ac3 decoder thread (%p) (pid == %i)", p_ac3thread, getpid());
/* Initializing the ac3 decoder thread */
- if (InitThread (p_ac3dec_t)) /* XXX?? */
+ if (InitThread (p_ac3thread)) /* XXX?? */
{
- p_ac3dec_t->p_fifo->b_error = 1;
+ p_ac3thread->p_fifo->b_error = 1;
}
sync = 0;
- p_ac3dec_t->sync_ptr = 0;
+ p_ac3thread->sync_ptr = 0;
/* ac3 decoder thread's main loop */
/* FIXME : do we have enough room to store the decoded frames ?? */
- while ((!p_ac3dec_t->p_fifo->b_die) && (!p_ac3dec_t->p_fifo->b_error))
+ while ((!p_ac3thread->p_fifo->b_die) && (!p_ac3thread->p_fifo->b_error))
{
s16 * buffer;
ac3_sync_info_t sync_info;
if (!sync) {
do {
- GetBits(&p_ac3dec_t->ac3_decoder.bit_stream,8);
- } while ((!p_ac3dec_t->sync_ptr) && (!p_ac3dec_t->p_fifo->b_die)
- && (!p_ac3dec_t->p_fifo->b_error));
+ GetBits(&p_ac3thread->ac3_decoder.bit_stream,8);
+ } while ((!p_ac3thread->sync_ptr) && (!p_ac3thread->p_fifo->b_die)
+ && (!p_ac3thread->p_fifo->b_error));
- ptr = p_ac3dec_t->sync_ptr;
+ ptr = p_ac3thread->sync_ptr;
- while(ptr-- && (!p_ac3dec_t->p_fifo->b_die)
- && (!p_ac3dec_t->p_fifo->b_error))
+ while(ptr-- && (!p_ac3thread->p_fifo->b_die)
+ && (!p_ac3thread->p_fifo->b_error))
{
- p_ac3dec_t->ac3_decoder.bit_stream.p_byte++;
+ p_ac3thread->ac3_decoder.bit_stream.p_byte++;
}
/* we are in sync now */
sync = 1;
}
- if (DECODER_FIFO_START(*p_ac3dec_t->p_fifo)->i_pts)
+ if (DECODER_FIFO_START(*p_ac3thread->p_fifo)->i_pts)
{
- p_ac3dec_t->p_aout_fifo->date[p_ac3dec_t->p_aout_fifo->l_end_frame] =
- DECODER_FIFO_START(*p_ac3dec_t->p_fifo)->i_pts;
- DECODER_FIFO_START(*p_ac3dec_t->p_fifo)->i_pts = 0;
+ p_ac3thread->p_aout_fifo->date[p_ac3thread->p_aout_fifo->l_end_frame] =
+ DECODER_FIFO_START(*p_ac3thread->p_fifo)->i_pts;
+ DECODER_FIFO_START(*p_ac3thread->p_fifo)->i_pts = 0;
} else {
- p_ac3dec_t->p_aout_fifo->date[p_ac3dec_t->p_aout_fifo->l_end_frame] =
+ p_ac3thread->p_aout_fifo->date[p_ac3thread->p_aout_fifo->l_end_frame] =
LAST_MDATE;
}
- if (ac3_sync_frame (&p_ac3dec_t->ac3_decoder, &sync_info))
+ if (ac3_sync_frame (&p_ac3thread->ac3_decoder, &sync_info))
{
sync = 0;
goto bad_frame;
}
- p_ac3dec_t->p_aout_fifo->l_rate = sync_info.sample_rate;
+ p_ac3thread->p_aout_fifo->l_rate = sync_info.sample_rate;
- buffer = ((s16 *)p_ac3dec_t->p_aout_fifo->buffer) +
- (p_ac3dec_t->p_aout_fifo->l_end_frame * AC3DEC_FRAME_SIZE);
+ buffer = ((s16 *)p_ac3thread->p_aout_fifo->buffer) +
+ (p_ac3thread->p_aout_fifo->l_end_frame * AC3DEC_FRAME_SIZE);
- if (ac3_decode_frame (&p_ac3dec_t->ac3_decoder, buffer))
+ if (ac3_decode_frame (&p_ac3thread->ac3_decoder, buffer))
{
sync = 0;
goto bad_frame;
}
- vlc_mutex_lock (&p_ac3dec_t->p_aout_fifo->data_lock);
- p_ac3dec_t->p_aout_fifo->l_end_frame =
- (p_ac3dec_t->p_aout_fifo->l_end_frame + 1) & AOUT_FIFO_SIZE;
- vlc_cond_signal (&p_ac3dec_t->p_aout_fifo->data_wait);
- vlc_mutex_unlock (&p_ac3dec_t->p_aout_fifo->data_lock);
+ vlc_mutex_lock (&p_ac3thread->p_aout_fifo->data_lock);
+ p_ac3thread->p_aout_fifo->l_end_frame =
+ (p_ac3thread->p_aout_fifo->l_end_frame + 1) & AOUT_FIFO_SIZE;
+ vlc_cond_signal (&p_ac3thread->p_aout_fifo->data_wait);
+ vlc_mutex_unlock (&p_ac3thread->p_aout_fifo->data_lock);
bad_frame:
- RealignBits(&p_ac3dec_t->ac3_decoder.bit_stream);
+ RealignBits(&p_ac3thread->ac3_decoder.bit_stream);
}
/* If b_error is set, the ac3 decoder thread enters the error loop */
- if (p_ac3dec_t->p_fifo->b_error)
+ if (p_ac3thread->p_fifo->b_error)
{
- ErrorThread (p_ac3dec_t);
+ ErrorThread (p_ac3thread);
}
/* End of the ac3 decoder thread */
- EndThread (p_ac3dec_t);
+ EndThread (p_ac3thread);
}
/*****************************************************************************
* ErrorThread : ac3 decoder's RunThread() error loop
*****************************************************************************/
-static void ErrorThread (ac3dec_thread_t * p_ac3dec_t)
+static void ErrorThread (ac3dec_thread_t * p_ac3thread)
{
/* We take the lock, because we are going to read/write the start/end
* indexes of the decoder fifo */
- vlc_mutex_lock (&p_ac3dec_t->p_fifo->data_lock);
+ vlc_mutex_lock (&p_ac3thread->p_fifo->data_lock);
/* Wait until a `die' order is sent */
- while (!p_ac3dec_t->p_fifo->b_die)
+ while (!p_ac3thread->p_fifo->b_die)
{
/* Trash all received PES packets */
- while (!DECODER_FIFO_ISEMPTY(*p_ac3dec_t->p_fifo))
+ while (!DECODER_FIFO_ISEMPTY(*p_ac3thread->p_fifo))
{
- p_ac3dec_t->p_fifo->pf_delete_pes(p_ac3dec_t->p_fifo->p_packets_mgt,
- DECODER_FIFO_START(*p_ac3dec_t->p_fifo));
- DECODER_FIFO_INCSTART (*p_ac3dec_t->p_fifo);
+ p_ac3thread->p_fifo->pf_delete_pes(p_ac3thread->p_fifo->p_packets_mgt,
+ DECODER_FIFO_START(*p_ac3thread->p_fifo));
+ DECODER_FIFO_INCSTART (*p_ac3thread->p_fifo);
}
/* Waiting for the input thread to put new PES packets in the fifo */
- vlc_cond_wait (&p_ac3dec_t->p_fifo->data_wait,
- &p_ac3dec_t->p_fifo->data_lock);
+ vlc_cond_wait (&p_ac3thread->p_fifo->data_wait,
+ &p_ac3thread->p_fifo->data_lock);
}
/* We can release the lock before leaving */
- vlc_mutex_unlock (&p_ac3dec_t->p_fifo->data_lock);
+ vlc_mutex_unlock (&p_ac3thread->p_fifo->data_lock);
}
/*****************************************************************************
* EndThread : ac3 decoder thread destruction
*****************************************************************************/
-static void EndThread (ac3dec_thread_t * p_ac3dec_t)
+static void EndThread (ac3dec_thread_t * p_ac3thread)
{
- intf_DbgMsg ("ac3dec debug: destroying ac3 decoder thread %p", p_ac3dec_t);
+ intf_DbgMsg ("ac3dec debug: destroying ac3 decoder thread %p", p_ac3thread);
/* If the audio output fifo was created, we destroy it */
- if (p_ac3dec_t->p_aout_fifo != NULL)
+ if (p_ac3thread->p_aout_fifo != NULL)
{
- aout_DestroyFifo (p_ac3dec_t->p_aout_fifo);
+ aout_DestroyFifo (p_ac3thread->p_aout_fifo);
/* Make sure the output thread leaves the NextFrame() function */
- vlc_mutex_lock (&(p_ac3dec_t->p_aout_fifo->data_lock));
- vlc_cond_signal (&(p_ac3dec_t->p_aout_fifo->data_wait));
- vlc_mutex_unlock (&(p_ac3dec_t->p_aout_fifo->data_lock));
-
+ vlc_mutex_lock (&(p_ac3thread->p_aout_fifo->data_lock));
+ vlc_cond_signal (&(p_ac3thread->p_aout_fifo->data_wait));
+ vlc_mutex_unlock (&(p_ac3thread->p_aout_fifo->data_lock));
}
+ /* Unlock the modules */
+ module_Unneed( p_ac3thread->ac3_decoder.downmix.p_module );
+ module_Unneed( p_ac3thread->ac3_decoder.imdct.p_module );
+
/* Destroy descriptor */
- free( p_ac3dec_t->p_config );
- free( p_ac3dec_t );
+ free( p_ac3thread->p_config );
+ free( p_ac3thread );
- intf_DbgMsg ("ac3dec debug: ac3 decoder thread %p destroyed", p_ac3dec_t);
+ intf_DbgMsg ("ac3dec debug: ac3 decoder thread %p destroyed", p_ac3thread);
}
/*****************************************************************************
boolean_t b_new_pes)
{
- ac3dec_thread_t *p_ac3dec_t=(ac3dec_thread_t *)p_bit_stream->p_callback_arg;
+ ac3dec_thread_t *p_ac3thread=(ac3dec_thread_t *)p_bit_stream->p_callback_arg;
if( b_new_pes )
{
ptr = *(p_bit_stream->p_byte + 1);
ptr <<= 8;
ptr |= *(p_bit_stream->p_byte + 2);
- p_ac3dec_t->sync_ptr = ptr;
+ p_ac3thread->sync_ptr = ptr;
p_bit_stream->p_byte += 3;
}
}
+
+++ /dev/null
-/*****************************************************************************
- * ac3_downmix.c: ac3 downmix functions
- *****************************************************************************
- * Copyright (C) 1999, 2000 VideoLAN
- * $Id: ac3_downmix.c,v 1.23 2001/05/14 15:58:03 reno Exp $
- *
- * Authors: Michel Kaempf <maxx@via.ecp.fr>
- * Aaron Holtzman <aholtzma@engr.uvic.ca>
- * Renaud Dartus <reno@videolan.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
- *****************************************************************************/
-#include "defs.h"
-
-#include <string.h> /* memcpy() */
-
-#include "config.h"
-#include "common.h"
-#include "threads.h"
-#include "mtime.h"
-
-#include "intf_msg.h" /* intf_DbgMsg(), intf_ErrMsg() */
-#include "tests.h"
-
-#include "stream_control.h"
-#include "input_ext-dec.h"
-
-#include "ac3_decoder.h"
-#include "ac3_downmix.h"
-
-void downmix_init (downmix_t * p_downmix)
-{
-#if 0
- if ( TestCPU (CPU_CAPABILITY_SSE) )
- {
- intf_WarnMsg (1,"ac3dec: using MMX_SSE for downmix");
- p_downmix->downmix_3f_2r_to_2ch = downmix_3f_2r_to_2ch_sse;
- p_downmix->downmix_2f_2r_to_2ch = downmix_2f_2r_to_2ch_sse;
- p_downmix->downmix_3f_1r_to_2ch = downmix_3f_1r_to_2ch_sse;
- p_downmix->downmix_2f_1r_to_2ch = downmix_2f_1r_to_2ch_sse;
- p_downmix->downmix_3f_0r_to_2ch = downmix_3f_0r_to_2ch_sse;
- p_downmix->stream_sample_2ch_to_s16 = stream_sample_2ch_to_s16_sse;
- p_downmix->stream_sample_1ch_to_s16 = stream_sample_1ch_to_s16_sse;
- }
- else if ( TestCPU (CPU_CAPABILITY_3DNOW) )
- {
- intf_WarnMsg (1,"ac3dec: using MMX_3DNOW for downmix");
- p_downmix->downmix_3f_2r_to_2ch = downmix_3f_2r_to_2ch_3dn;
- p_downmix->downmix_2f_2r_to_2ch = downmix_2f_2r_to_2ch_3dn;
- p_downmix->downmix_3f_1r_to_2ch = downmix_3f_1r_to_2ch_3dn;
- p_downmix->downmix_2f_1r_to_2ch = downmix_2f_1r_to_2ch_3dn;
- p_downmix->downmix_3f_0r_to_2ch = downmix_3f_0r_to_2ch_3dn;
- p_downmix->stream_sample_2ch_to_s16 = stream_sample_2ch_to_s16_3dn;
- p_downmix->stream_sample_1ch_to_s16 = stream_sample_1ch_to_s16_3dn;
- }
- else
-#endif
- {
- p_downmix->downmix_3f_2r_to_2ch = downmix_3f_2r_to_2ch_c;
- p_downmix->downmix_2f_2r_to_2ch = downmix_2f_2r_to_2ch_c;
- p_downmix->downmix_3f_1r_to_2ch = downmix_3f_1r_to_2ch_c;
- p_downmix->downmix_2f_1r_to_2ch = downmix_2f_1r_to_2ch_c;
- p_downmix->downmix_3f_0r_to_2ch = downmix_3f_0r_to_2ch_c;
- p_downmix->stream_sample_2ch_to_s16 = stream_sample_2ch_to_s16_c;
- p_downmix->stream_sample_1ch_to_s16 = stream_sample_1ch_to_s16_c;
- }
-}
+++ /dev/null
-/*****************************************************************************
- * ac3_downmix.h: ac3 downmix functions
- *****************************************************************************
- * Copyright (C) 2000, 2001 VideoLAN
- * $Id: ac3_downmix.h,v 1.7 2001/05/14 15:58:04 reno Exp $
- *
- * Authors: Renaud Dartus <reno@videolan.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
- *****************************************************************************/
-
-/* C functions */
-void downmix_3f_2r_to_2ch_c(float *samples, dm_par_t * dm_par);
-void downmix_3f_1r_to_2ch_c(float *samples, dm_par_t * dm_par);
-void downmix_2f_2r_to_2ch_c(float *samples, dm_par_t * dm_par);
-void downmix_2f_1r_to_2ch_c(float *samples, dm_par_t * dm_par);
-void downmix_3f_0r_to_2ch_c(float *samples, dm_par_t * dm_par);
-void stream_sample_2ch_to_s16_c(s16 *s16_samples, float *left, float *right);
-void stream_sample_1ch_to_s16_c(s16 *s16_samples, float *center);
-
-/* SSE functions */
-void downmix_3f_2r_to_2ch_sse(float *samples, dm_par_t * dm_par);
-void downmix_3f_1r_to_2ch_sse(float *samples, dm_par_t * dm_par);
-void downmix_2f_2r_to_2ch_sse(float *samples, dm_par_t * dm_par);
-void downmix_2f_1r_to_2ch_sse(float *samples, dm_par_t * dm_par);
-void downmix_3f_0r_to_2ch_sse(float *samples, dm_par_t * dm_par);
-void stream_sample_2ch_to_s16_sse(s16 *s16_samples, float *left, float *right);
-void stream_sample_1ch_to_s16_sse(s16 *s16_samples, float *center);
-
-/* 3DNow! functions */
-void downmix_3f_2r_to_2ch_3dn(float *samples, dm_par_t * dm_par);
-void downmix_3f_1r_to_2ch_3dn(float *samples, dm_par_t * dm_par);
-void downmix_2f_2r_to_2ch_3dn(float *samples, dm_par_t * dm_par);
-void downmix_2f_1r_to_2ch_3dn(float *samples, dm_par_t * dm_par);
-void downmix_3f_0r_to_2ch_3dn(float *samples, dm_par_t * dm_par);
-void stream_sample_2ch_to_s16_3dn(s16 *s16_samples, float *left, float *right);
-void stream_sample_1ch_to_s16_3dn(s16 *s16_samples, float *center);
-
-
+++ /dev/null
-/*****************************************************************************
- * ac3_downmix_3dn.c: ac3 downmix functions
- *****************************************************************************
- * Copyright (C) 1999, 2000, 2001 VideoLAN
- * $Id: ac3_downmix_3dn.c,v 1.1 2001/05/14 15:58:04 reno Exp $
- *
- * Authors: Renaud Dartus <reno@videolan.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
- *****************************************************************************/
-
-#include "defs.h"
-
-#include "config.h"
-#include "common.h"
-#include "threads.h"
-#include "mtime.h"
-#include "tests.h"
-
-#include "stream_control.h"
-#include "input_ext-dec.h"
-#include "ac3_decoder.h"
-
-
-void downmix_3f_2r_to_2ch_3dn (float * samples, dm_par_t * dm_par)
-{
- __asm__ __volatile__ (
- "pushl %%ecx\n"
- "movl $128, %%ecx\n" /* loop counter */
-
- "movd (%%ebx), %%mm5\n" /* unit */
- "punpckldq %%mm5, %%mm5\n" /* unit | unit */
-
- "movd 4(%%ebx), %%mm6\n" /* clev */
- "punpckldq %%mm6, %%mm6\n" /* clev | clev */
-
- "movd 8(%%ebx), %%mm7\n" /* slev */
- "punpckldq %%mm7, %%mm7\n" /* slev | slev */
-
-".loop:\n"
- "movq (%%eax), %%mm0\n" /* left */
- "movq 2048(%%eax), %%mm1\n" /* right */
- "movq 1024(%%eax), %%mm2\n" /* center */
- "movq 3072(%%eax), %%mm3\n" /* leftsur */
- "movq 4096(%%eax), %%mm4\n" /* rightsur */
- "pfmul %%mm5, %%mm0\n"
- "pfmul %%mm5, %%mm1\n"
- "pfmul %%mm6, %%mm2\n"
- "pfadd %%mm2, %%mm0\n"
- "pfadd %%mm2, %%mm1\n"
- "pfmul %%mm7, %%mm3\n"
- "pfmul %%mm7, %%mm4\n"
- "pfadd %%mm3, %%mm0\n"
- "pfadd %%mm4, %%mm1\n"
-
- "movq %%mm0, (%%eax)\n"
- "movq %%mm1, 1024(%%eax)\n"
-
- "addl $8, %%eax\n"
- "decl %%ecx\n"
- "jnz .loop\n"
-
- "popl %%ecx\n"
- "femms\n"
- : "=a" (samples)
- : "a" (samples), "b" (dm_par));
-}
-
-void downmix_2f_2r_to_2ch_3dn (float *samples, dm_par_t * dm_par)
-{
- __asm__ __volatile__ (
- "pushl %%ecx\n"
- "movl $128, %%ecx\n" /* loop counter */
-
- "movd (%%ebx), %%mm5\n" /* unit */
- "punpckldq %%mm5, %%mm5\n" /* unit | unit */
-
- "movd 8(%%ebx), %%mm7\n" /* slev */
- "punpckldq %%mm7, %%mm7\n" /* slev | slev */
-
-".loop3:\n"
- "movq (%%eax), %%mm0\n" /* left */
- "movq 1024(%%eax), %%mm1\n" /* right */
- "movq 2048(%%eax), %%mm3\n" /* leftsur */
- "movq 3072(%%eax), %%mm4\n" /* rightsur */
- "pfmul %%mm5, %%mm0\n"
- "pfmul %%mm5, %%mm1\n"
- "pfmul %%mm7, %%mm3\n"
- "pfmul %%mm7, %%mm4\n"
- "pfadd %%mm3, %%mm0\n"
- "pfadd %%mm4, %%mm1\n"
-
- "movq %%mm0, (%%eax)\n"
- "movq %%mm1, 1024(%%eax)\n"
-
- "addl $8, %%eax\n"
- "decl %%ecx\n"
- "jnz .loop3\n"
-
- "popl %%ecx\n"
- "femms\n"
- : "=a" (samples)
- : "a" (samples), "b" (dm_par));
-}
-void downmix_3f_1r_to_2ch_3dn (float *samples, dm_par_t * dm_par)
-{
- __asm__ __volatile__ (
-
- "pushl %%ecx\n"
- "movl $128, %%ecx\n" /* loop counter */
-
- "movd (%%ebx), %%mm5\n" /* unit */
- "punpckldq %%mm5, %%mm5\n" /* unit | unit */
-
- "movd 4(%%ebx), %%mm6\n" /* clev */
- "punpckldq %%mm6, %%mm6\n" /* clev | clev */
-
- "movd 8(%%ebx), %%mm7\n" /* slev */
- "punpckldq %%mm7, %%mm7\n" /* slev | slev */
-
-".loop4:\n"
- "movq (%%eax), %%mm0\n" /* left */
- "movq 2048(%%eax), %%mm1\n" /* right */
- "movq 1024(%%eax), %%mm2\n" /* center */
- "movq 3072(%%eax), %%mm3\n" /* sur */
- "pfmul %%mm5, %%mm0\n"
- "pfmul %%mm5, %%mm1\n"
- "pfmul %%mm6, %%mm2\n"
- "pfadd %%mm2, %%mm0\n"
- "pfmul %%mm7, %%mm3\n"
- "pfadd %%mm2, %%mm1\n"
- "pfsub %%mm3, %%mm0\n"
- "pfadd %%mm3, %%mm1\n"
-
- "movq %%mm0, (%%eax)\n"
- "movq %%mm1, 1024(%%eax)\n"
-
- "addl $8, %%eax\n"
- "decl %%ecx\n"
- "jnz .loop4\n"
-
- "popl %%ecx\n"
- "femms\n"
- : "=a" (samples)
- : "a" (samples), "b" (dm_par));
-}
-void downmix_2f_1r_to_2ch_3dn (float *samples, dm_par_t * dm_par)
-{
- __asm__ __volatile__ (
- "pushl %%ecx\n"
- "movl $128, %%ecx\n" /* loop counter */
-
- "movd (%%ebx), %%mm5\n" /* unit */
- "punpckldq %%mm5, %%mm5\n" /* unit | unit */
-
- "movd 8(%%ebx), %%mm7\n" /* slev */
- "punpckldq %%mm7, %%mm7\n" /* slev | slev */
-
-".loop5:\n"
- "movq (%%eax), %%mm0\n" /* left */
- "movq 1024(%%eax), %%mm1\n" /* right */
- "movq 2048(%%eax), %%mm3\n" /* sur */
- "pfmul %%mm5, %%mm0\n"
- "pfmul %%mm5, %%mm1\n"
- "pfmul %%mm7, %%mm3\n"
- "pfsub %%mm3, %%mm0\n"
- "pfadd %%mm3, %%mm1\n"
-
- "movq %%mm0, (%%eax)\n"
- "movq %%mm1, 1024(%%eax)\n"
-
- "addl $8, %%eax\n"
- "decl %%ecx\n"
- "jnz .loop5\n"
-
- "popl %%ecx\n"
- "femms\n"
- : "=a" (samples)
- : "a" (samples), "b" (dm_par));
-}
-
-void downmix_3f_0r_to_2ch_3dn (float *samples, dm_par_t * dm_par)
-{
- __asm__ __volatile__ (
- "pushl %%ecx\n"
- "movl $128, %%ecx\n" /* loop counter */
-
- "movd (%%ebx), %%mm5\n" /* unit */
- "punpckldq %%mm5, %%mm5\n" /* unit | unit */
-
- "movd 4(%%ebx), %%mm6\n" /* clev */
- "punpckldq %%mm6, %%mm6\n" /* clev | clev */
-
-".loop6:\n"
- "movq (%%eax), %%mm0\n" /*left */
- "movq 2048(%%eax), %%mm1\n" /* right */
- "movq 1024(%%eax), %%mm2\n" /* center */
- "pfmul %%mm5, %%mm0\n"
- "pfmul %%mm5, %%mm1\n"
- "pfmul %%mm6, %%mm2\n"
- "pfadd %%mm2, %%mm0\n"
- "pfadd %%mm2, %%mm1\n"
-
- "movq %%mm0, (%%eax)\n"
- "movq %%mm1, 1024(%%eax)\n"
-
- "addl $8, %%eax\n"
- "decl %%ecx\n"
- "jnz .loop6\n"
-
- "popl %%ecx\n"
- "femms\n"
- : "=a" (samples)
- : "a" (samples), "b" (dm_par));
-}
-
-void stream_sample_1ch_to_s16_3dn (s16 *s16_samples, float *left)
-{
- __asm__ __volatile__ (
- "pushl %%ecx\n"
- "pushl %%edx\n"
-
- "movl $sqrt2, %%edx\n"
- "movd (%%edx), %%mm7\n"
- "punpckldq %%mm7, %%mm7\n" /* sqrt2 | sqrt2 */
- "movl $128, %%ecx\n"
-
-".loop2:\n"
- "movq (%%ebx), %%mm0\n" /* c1 | c0 */
- "pfmul %%mm7, %%mm0\n"
-
- "pf2id %%mm0, %%mm0\n" /* c1 c0 --> mm0, int_32 */
-
- "packssdw %%mm0, %%mm0\n" /* c1 c1 c0 c0 --> mm0, int_16 */
-
- "movq %%mm0, (%%eax)\n"
- "addl $8, %%eax\n"
- "addl $8, %%ebx\n"
-
- "decl %%ecx\n"
- "jnz .loop2\n"
-
- "popl %%edx\n"
- "popl %%ecx\n"
- "femms\n"
- : "=a" (s16_samples), "=b" (left)
- : "a" (s16_samples), "b" (left));
-}
-
-void stream_sample_2ch_to_s16_3dn (s16 *s16_samples, float *left, float *right)
-{
-
- __asm__ __volatile__ (
- "pushl %%ecx\n"
- "movl $128, %%ecx\n"
-
-".loop1:\n"
- "movq (%%ebx), %%mm0\n" /* l1 | l0 */
- "movq (%%edx), %%mm1\n" /* r1 | r0 */
- "movq %%mm0, %%mm2\n" /* l1 | l0 */
- "punpckldq %%mm1, %%mm0\n" /* r0 | l0 */
- "punpckhdq %%mm1, %%mm2\n" /* r1 | l1 */
-
- "pf2id %%mm0, %%mm0\n" /* r0 l0 --> mm0, int_32 */
- "pf2id %%mm2, %%mm2\n" /* r0 l0 --> mm0, int_32 */
-
- "packssdw %%mm2, %%mm0\n" /* r1 l1 r0 l0 --> mm0, int_16 */
-
- "movq %%mm0, (%%eax)\n"
- "movq %%mm2, 8(%%eax)\n"
- "addl $8, %%eax\n"
- "addl $8, %%ebx\n"
- "addl $8, %%edx\n"
-
- "decl %%ecx\n"
- "jnz .loop1\n"
-
- "popl %%ecx\n"
- "femms\n"
- : "=a" (s16_samples), "=b" (left), "=d" (right)
- : "a" (s16_samples), "b" (left), "d" (right));
-
-}
+++ /dev/null
-/*****************************************************************************
- * ac3_downmix_sse.c: ac3 downmix functions
- *****************************************************************************
- * Copyright (C) 1999, 2000, 2001 VideoLAN
- * $Id: ac3_downmix_sse.c,v 1.1 2001/05/14 15:58:04 reno Exp $
- *
- * Authors: Renaud Dartus <reno@videolan.org>
- * Aaron Holtzman <aholtzma@engr.uvic.ca>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
- *****************************************************************************/
-
-#include "defs.h"
-
-#include "config.h"
-#include "common.h"
-#include "threads.h"
-#include "mtime.h"
-#include "tests.h"
-
-#include "stream_control.h"
-#include "input_ext-dec.h"
-#include "ac3_decoder.h"
-
-
-void sqrt2 (void)
-{
- __asm__ (".float 0f0.7071068");
-}
-
-void downmix_3f_2r_to_2ch_sse (float * samples, dm_par_t * dm_par)
-{
- __asm__ __volatile__ (
- "pushl %%ecx\n"
- "movl $64, %%ecx\n" /* loop counter */
-
- "movss (%%ebx), %%xmm5\n" /* unit */
- "shufps $0, %%xmm5, %%xmm5\n" /* unit | unit | unit | unit */
-
- "movss 4(%%ebx), %%xmm6\n" /* clev */
- "shufps $0, %%xmm6, %%xmm6\n" /* clev | clev | clev | clev */
-
- "movss 8(%%ebx), %%xmm7\n" /* slev */
- "shufps $0, %%xmm7, %%xmm7\n" /* slev | slev | slev | slev */
-
-".loop:\n"
- "movups (%%eax), %%xmm0\n" /* left */
- "movups 2048(%%eax), %%xmm1\n" /* right */
- "movups 1024(%%eax), %%xmm2\n" /* center */
- "movups 3072(%%eax), %%xmm3\n" /* leftsur */
- "movups 4096(%%eax), %%xmm4\n" /* rithgsur */
- "mulps %%xmm5, %%xmm0\n"
- "mulps %%xmm5, %%xmm1\n"
- "mulps %%xmm6, %%xmm2\n"
- "addps %%xmm2, %%xmm0\n"
- "addps %%xmm2, %%xmm1\n"
- "mulps %%xmm7, %%xmm3\n"
- "mulps %%xmm7, %%xmm4\n"
- "addps %%xmm3, %%xmm0\n"
- "addps %%xmm4, %%xmm1\n"
-
- "movups %%xmm0, (%%eax)\n"
- "movups %%xmm1, 1024(%%eax)\n"
-
- "addl $16, %%eax\n"
- "decl %%ecx\n"
- "jnz .loop\n"
-
- "popl %%ecx\n"
- : "=a" (samples)
- : "a" (samples), "b" (dm_par));
-}
-
-void downmix_2f_2r_to_2ch_sse (float *samples, dm_par_t * dm_par)
-{
- __asm__ __volatile__ (
- "pushl %%ecx\n"
- "movl $64, %%ecx\n" /* loop counter */
-
- "movss (%%ebx), %%xmm5\n" /* unit */
- "shufps $0, %%xmm5, %%xmm5\n" /* unit | unit | unit | unit */
-
- "movss 8(%%ebx), %%xmm7\n" /* slev */
- "shufps $0, %%xmm7, %%xmm7\n" /* slev | slev | slev | slev */
-
-".loop3:\n"
- "movups (%%eax), %%xmm0\n" /* left */
- "movups 1024(%%eax), %%xmm1\n" /* right */
- "movups 2048(%%eax), %%xmm3\n" /* leftsur */
- "movups 3072(%%eax), %%xmm4\n" /* rightsur */
- "mulps %%xmm5, %%xmm0\n"
- "mulps %%xmm5, %%xmm1\n"
- "mulps %%xmm7, %%xmm3\n"
- "mulps %%xmm7, %%xmm4\n"
- "addps %%xmm3, %%xmm0\n"
- "addps %%xmm4, %%xmm1\n"
-
- "movups %%xmm0, (%%eax)\n"
- "movups %%xmm1, 1024(%%eax)\n"
-
- "addl $16, %%eax\n"
- "decl %%ecx\n"
- "jnz .loop3\n"
-
- "popl %%ecx\n"
- : "=a" (samples)
- : "a" (samples), "b" (dm_par));
-}
-void downmix_3f_1r_to_2ch_sse (float *samples, dm_par_t * dm_par)
-{
- __asm__ __volatile__ (
-
- "pushl %%ecx\n"
- "movl $64, %%ecx\n" /* loop counter */
-
- "movss (%%ebx), %%xmm5\n" /* unit */
- "shufps $0, %%xmm5, %%xmm5\n" /* unit | unit | unit | unit */
-
- "movss 4(%%ebx), %%xmm6\n" /* clev */
- "shufps $0, %%xmm6, %%xmm6\n" /* clev | clev | clev | clev */
-
- "movss 8(%%ebx), %%xmm7\n" /* slev */
- "shufps $0, %%xmm7, %%xmm7\n" /* slev | slev | slev | slev */
-
-".loop4:\n"
- "movups (%%eax), %%xmm0\n" /* left */
- "movups 2048(%%eax), %%xmm1\n" /* right */
- "movups 1024(%%eax), %%xmm2\n" /* center */
- "movups 3072(%%eax), %%xmm3\n" /* sur */
- "mulps %%xmm5, %%xmm0\n"
- "mulps %%xmm5, %%xmm1\n"
- "mulps %%xmm6, %%xmm2\n"
- "addps %%xmm2, %%xmm0\n"
- "mulps %%xmm7, %%xmm3\n"
- "addps %%xmm2, %%xmm1\n"
- "subps %%xmm3, %%xmm0\n"
- "addps %%xmm3, %%xmm1\n"
-
- "movups %%xmm0, (%%eax)\n"
- "movups %%xmm1, 1024(%%eax)\n"
-
- "addl $16, %%eax\n"
- "decl %%ecx\n"
- "jnz .loop4\n"
-
- "popl %%ecx\n"
- : "=a" (samples)
- : "a" (samples), "b" (dm_par));
-
-}
-void downmix_2f_1r_to_2ch_sse (float *samples, dm_par_t * dm_par)
-{
- __asm__ __volatile__ (
- "pushl %%ecx\n"
- "movl $64, %%ecx\n" /* loop counter */
-
- "movss (%%ebx), %%xmm5\n" /* unit */
- "shufps $0, %%xmm5, %%xmm5\n" /* unit | unit | unit | unit */
-
- "movss 8(%%ebx), %%xmm7\n" /* slev */
- "shufps $0, %%xmm7, %%xmm7\n" /* slev | slev | slev | slev */
-
-".loop5:\n"
- "movups (%%eax), %%xmm0\n" /* left */
- "movups 1024(%%eax), %%xmm1\n" /* right */
- "movups 2048(%%eax), %%xmm3\n" /* sur */
- "mulps %%xmm5, %%xmm0\n"
- "mulps %%xmm5, %%xmm1\n"
- "mulps %%xmm7, %%xmm3\n"
- "subps %%xmm3, %%xmm0\n"
- "addps %%xmm3, %%xmm1\n"
-
- "movups %%xmm0, (%%eax)\n"
- "movups %%xmm1, 1024(%%eax)\n"
-
- "addl $16, %%eax\n"
- "decl %%ecx\n"
- "jnz .loop5\n"
-
- "popl %%ecx\n"
- : "=a" (samples)
- : "a" (samples), "b" (dm_par));
-
-
-}
-void downmix_3f_0r_to_2ch_sse (float *samples, dm_par_t * dm_par)
-{
- __asm__ __volatile__ (
- "pushl %%ecx\n"
- "movl $64, %%ecx\n" /* loop counter */
-
- "movss (%%ebx), %%xmm5\n" /* unit */
- "shufps $0, %%xmm5, %%xmm5\n" /* unit | unit | unit | unit */
-
- "movss 4(%%ebx), %%xmm6\n" /* clev */
- "shufps $0, %%xmm6, %%xmm6\n" /* clev | clev | clev | clev */
-
-".loop6:\n"
- "movups (%%eax), %%xmm0\n" /*left */
- "movups 2048(%%eax), %%xmm1\n" /* right */
- "movups 1024(%%eax), %%xmm2\n" /* center */
- "mulps %%xmm5, %%xmm0\n"
- "mulps %%xmm5, %%xmm1\n"
- "mulps %%xmm6, %%xmm2\n"
- "addps %%xmm2, %%xmm0\n"
- "addps %%xmm2, %%xmm1\n"
-
- "movups %%xmm0, (%%eax)\n"
- "movups %%xmm1, 1024(%%eax)\n"
-
- "addl $16, %%eax\n"
- "decl %%ecx\n"
- "jnz .loop6\n"
-
- "popl %%ecx\n"
- : "=a" (samples)
- : "a" (samples), "b" (dm_par));
-}
-
-void stream_sample_1ch_to_s16_sse (s16 *s16_samples, float *left)
-{
- __asm__ __volatile__ (
- "pushl %%ecx\n"
- "pushl %%edx\n"
-
- "movl $sqrt2, %%edx\n"
- "movss (%%edx), %%xmm7\n"
- "shufps $0, %%xmm7, %%xmm7\n" /* sqrt2 | sqrt2 | sqrt2 | sqrt2 */
- "movl $64, %%ecx\n"
-
-".loop2:\n"
- "movups (%%ebx), %%xmm0\n" /* c3 | c2 | c1 | c0 */
- "mulps %%xmm7, %%xmm0\n"
- "movhlps %%xmm0, %%xmm2\n" /* c3 | c2 */
-
- "cvtps2pi %%xmm0, %%mm0\n" /* c1 c0 --> mm0, int_32 */
- "cvtps2pi %%xmm2, %%mm1\n" /* c3 c2 --> mm1, int_32 */
-
- "packssdw %%mm0, %%mm0\n" /* c1 c1 c0 c0 --> mm0, int_16 */
- "packssdw %%mm1, %%mm1\n" /* c3 c3 c2 c2 --> mm1, int_16 */
-
- "movq %%mm0, (%%eax)\n"
- "movq %%mm1, 8(%%eax)\n"
- "addl $16, %%eax\n"
- "addl $16, %%ebx\n"
-
- "decl %%ecx\n"
- "jnz .loop2\n"
-
- "popl %%edx\n"
- "popl %%ecx\n"
- "emms\n"
- : "=a" (s16_samples), "=b" (left)
- : "a" (s16_samples), "b" (left));
-}
-
-void stream_sample_2ch_to_s16_sse (s16 *s16_samples, float *left, float *right)
-{
-
- __asm__ __volatile__ (
- "pushl %%ecx\n"
- "movl $64, %%ecx\n"
-
-".loop1:\n"
- "movups (%%ebx), %%xmm0\n" /* l3 | l2 | l1 | l0 */
- "movups (%%edx), %%xmm1\n" /* r3 | r2 | r1 | r0 */
- "movhlps %%xmm0, %%xmm2\n" /* l3 | l2 */
- "movhlps %%xmm1, %%xmm3\n" /* r3 | r2 */
- "unpcklps %%xmm1, %%xmm0\n" /* r1 | l1 | r0 | l0 */
- "unpcklps %%xmm3, %%xmm2\n" /* r3 | l3 | r2 | l2 */
-
- "cvtps2pi %%xmm0, %%mm0\n" /* r0 l0 --> mm0, int_32 */
- "movhlps %%xmm0, %%xmm0\n"
- "cvtps2pi %%xmm0, %%mm1\n" /* r1 l1 --> mm1, int_32 */
- "cvtps2pi %%xmm2, %%mm2\n" /* r2 l2 --> mm2, int_32 */
- "movhlps %%xmm2, %%xmm2\n"
- "cvtps2pi %%xmm2, %%mm3\n" /* r3 l3 --> mm3, int_32 */
-
- "packssdw %%mm1, %%mm0\n" /* r1 l1 r0 l0 --> mm0, int_16 */
- "packssdw %%mm3, %%mm2\n" /* r3 l3 r2 l2 --> mm2, int_16 */
-
- "movq %%mm0, (%%eax)\n"
- "movq %%mm2, 8(%%eax)\n"
- "addl $16, %%eax\n"
- "addl $16, %%ebx\n"
- "addl $16, %%edx\n"
-
- "decl %%ecx\n"
- "jnz .loop1\n"
-
- "popl %%ecx\n"
- "emms\n"
- : "=a" (s16_samples), "=b" (left), "=d" (right)
- : "a" (s16_samples), "b" (left), "d" (right));
-
-}
* ac3_exponent.c: ac3 exponent calculations
*****************************************************************************
* Copyright (C) 1999, 2000 VideoLAN
- * $Id: ac3_exponent.c,v 1.24 2001/05/14 15:58:04 reno Exp $
+ * $Id: ac3_exponent.c,v 1.25 2001/05/15 16:19:42 sam Exp $
*
* Authors: Michel Kaempf <maxx@via.ecp.fr>
* Michel Lespinasse <walken@zoy.org>
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
*****************************************************************************/
+
+/*****************************************************************************
+ * Preamble
+ *****************************************************************************/
#include "defs.h"
#include <string.h> /* memcpy(), memset() */
#include "audio_output.h"
+#include "ac3_imdct.h"
+#include "ac3_downmix.h"
#include "ac3_decoder.h"
#include "ac3_internal.h"
-static const s16 exps_1[128] =
-{
- -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
- -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 0, 0, 0
-};
-
-static const s16 exps_2[128] =
-{
- -2,-2,-2,-2,-2,-1,-1,-1,-1,-1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
- -2,-2,-2,-2,-2,-1,-1,-1,-1,-1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
- -2,-2,-2,-2,-2,-1,-1,-1,-1,-1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
- -2,-2,-2,-2,-2,-1,-1,-1,-1,-1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
- -2,-2,-2,-2,-2,-1,-1,-1,-1,-1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
- 0, 0, 0
-};
-
-static const s16 exps_3[128] =
-{
- -2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,
- -2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,
- -2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,
- -2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,
- -2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,
- 0, 0, 0
-};
-
-#define UNPACK_FBW 1
-#define UNPACK_CPL 2
-#define UNPACK_LFE 4
-
-static __inline__ int exp_unpack_ch (ac3dec_t * p_ac3dec, u16 type,
- u16 expstr, u16 ngrps, u16 initial_exp,
- u16 exps[], u16 * dest)
-{
- u16 i,j;
- s16 exp_acc;
-
- if (expstr == EXP_REUSE)
- {
- return 0;
- }
-
- /* Handle the initial absolute exponent */
- exp_acc = initial_exp;
- j = 0;
-
- /* In the case of a fbw channel then the initial absolute values is
- * also an exponent */
- if (type != UNPACK_CPL)
- {
- dest[j++] = exp_acc;
- }
-
- /* Loop through the groups and fill the dest array appropriately */
- switch (expstr)
- {
- case EXP_D15: /* 1 */
- for (i = 0; i < ngrps; i++)
- {
- if (exps[i] > 124)
- {
- intf_ErrMsg ( "ac3dec error: invalid exponent" );
- return 1;
- }
- exp_acc += (exps_1[exps[i]] /*- 2*/);
- dest[j++] = exp_acc;
- exp_acc += (exps_2[exps[i]] /*- 2*/);
- dest[j++] = exp_acc;
- exp_acc += (exps_3[exps[i]] /*- 2*/);
- dest[j++] = exp_acc;
- }
- break;
-
- case EXP_D25: /* 2 */
- for (i = 0; i < ngrps; i++)
- {
- if (exps[i] > 124)
- {
- intf_ErrMsg ( "ac3dec error: invalid exponent" );
- return 1;
- }
- exp_acc += (exps_1[exps[i]] /*- 2*/);
- dest[j++] = exp_acc;
- dest[j++] = exp_acc;
- exp_acc += (exps_2[exps[i]] /*- 2*/);
- dest[j++] = exp_acc;
- dest[j++] = exp_acc;
- exp_acc += (exps_3[exps[i]] /*- 2*/);
- dest[j++] = exp_acc;
- dest[j++] = exp_acc;
- }
- break;
-
- case EXP_D45: /* 3 */
- for (i = 0; i < ngrps; i++)
- {
- if (exps[i] > 124)
- {
- intf_ErrMsg ( "ac3dec error: invalid exponent" );
- return 1;
- }
- exp_acc += (exps_1[exps[i]] /*- 2*/);
- dest[j++] = exp_acc;
- dest[j++] = exp_acc;
- dest[j++] = exp_acc;
- dest[j++] = exp_acc;
- exp_acc += (exps_2[exps[i]] /*- 2*/);
- dest[j++] = exp_acc;
- dest[j++] = exp_acc;
- dest[j++] = exp_acc;
- dest[j++] = exp_acc;
- exp_acc += (exps_3[exps[i]] /*- 2*/);
- dest[j++] = exp_acc;
- dest[j++] = exp_acc;
- dest[j++] = exp_acc;
- dest[j++] = exp_acc;
- }
- break;
- }
-
- return 0;
-}
+#include "ac3_exponent.h"
int exponent_unpack (ac3dec_t * p_ac3dec)
{
--- /dev/null
+/*****************************************************************************
+ * ac3_exponent.h: ac3 exponent calculations
+ *****************************************************************************
+ * Copyright (C) 1999, 2000 VideoLAN
+ * $Id: ac3_exponent.h,v 1.5 2001/05/15 16:19:42 sam Exp $
+ *
+ * Authors: Michel Kaempf <maxx@via.ecp.fr>
+ * Michel Lespinasse <walken@zoy.org>
+ * Aaron Holtzman <aholtzma@engr.uvic.ca>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
+ *****************************************************************************/
+
+static const s16 exps_1[128] =
+{
+ -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
+ -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 0, 0, 0
+};
+
+static const s16 exps_2[128] =
+{
+ -2,-2,-2,-2,-2,-1,-1,-1,-1,-1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
+ -2,-2,-2,-2,-2,-1,-1,-1,-1,-1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
+ -2,-2,-2,-2,-2,-1,-1,-1,-1,-1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
+ -2,-2,-2,-2,-2,-1,-1,-1,-1,-1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
+ -2,-2,-2,-2,-2,-1,-1,-1,-1,-1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
+ 0, 0, 0
+};
+
+static const s16 exps_3[128] =
+{
+ -2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,
+ -2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,
+ -2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,
+ -2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,
+ -2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,
+ 0, 0, 0
+};
+
+#define UNPACK_FBW 1
+#define UNPACK_CPL 2
+#define UNPACK_LFE 4
+
+static __inline__ int exp_unpack_ch (ac3dec_t * p_ac3dec, u16 type,
+ u16 expstr, u16 ngrps, u16 initial_exp,
+ u16 exps[], u16 * dest)
+{
+ u16 i,j;
+ s16 exp_acc;
+
+ if (expstr == EXP_REUSE)
+ {
+ return 0;
+ }
+
+ /* Handle the initial absolute exponent */
+ exp_acc = initial_exp;
+ j = 0;
+
+ /* In the case of a fbw channel then the initial absolute values is
+ * also an exponent */
+ if (type != UNPACK_CPL)
+ {
+ dest[j++] = exp_acc;
+ }
+
+ /* Loop through the groups and fill the dest array appropriately */
+ switch (expstr)
+ {
+ case EXP_D15: /* 1 */
+ for (i = 0; i < ngrps; i++)
+ {
+ if (exps[i] > 124)
+ {
+ intf_ErrMsg ( "ac3dec error: invalid exponent" );
+ return 1;
+ }
+ exp_acc += (exps_1[exps[i]] /*- 2*/);
+ dest[j++] = exp_acc;
+ exp_acc += (exps_2[exps[i]] /*- 2*/);
+ dest[j++] = exp_acc;
+ exp_acc += (exps_3[exps[i]] /*- 2*/);
+ dest[j++] = exp_acc;
+ }
+ break;
+
+ case EXP_D25: /* 2 */
+ for (i = 0; i < ngrps; i++)
+ {
+ if (exps[i] > 124)
+ {
+ intf_ErrMsg ( "ac3dec error: invalid exponent" );
+ return 1;
+ }
+ exp_acc += (exps_1[exps[i]] /*- 2*/);
+ dest[j++] = exp_acc;
+ dest[j++] = exp_acc;
+ exp_acc += (exps_2[exps[i]] /*- 2*/);
+ dest[j++] = exp_acc;
+ dest[j++] = exp_acc;
+ exp_acc += (exps_3[exps[i]] /*- 2*/);
+ dest[j++] = exp_acc;
+ dest[j++] = exp_acc;
+ }
+ break;
+
+ case EXP_D45: /* 3 */
+ for (i = 0; i < ngrps; i++)
+ {
+ if (exps[i] > 124)
+ {
+ intf_ErrMsg ( "ac3dec error: invalid exponent" );
+ return 1;
+ }
+ exp_acc += (exps_1[exps[i]] /*- 2*/);
+ dest[j++] = exp_acc;
+ dest[j++] = exp_acc;
+ dest[j++] = exp_acc;
+ dest[j++] = exp_acc;
+ exp_acc += (exps_2[exps[i]] /*- 2*/);
+ dest[j++] = exp_acc;
+ dest[j++] = exp_acc;
+ dest[j++] = exp_acc;
+ dest[j++] = exp_acc;
+ exp_acc += (exps_3[exps[i]] /*- 2*/);
+ dest[j++] = exp_acc;
+ dest[j++] = exp_acc;
+ dest[j++] = exp_acc;
+ dest[j++] = exp_acc;
+ }
+ break;
+ }
+
+ return 0;
+}
+
* ac3_imdct.c: ac3 DCT
*****************************************************************************
* Copyright (C) 1999, 2000 VideoLAN
- * $Id: ac3_imdct.c,v 1.19 2001/05/14 15:58:04 reno Exp $
+ * $Id: ac3_imdct.c,v 1.20 2001/05/15 16:19:42 sam Exp $
*
* Authors: Michel Kaempf <maxx@via.ecp.fr>
* Aaron Holtzman <aholtzma@engr.uvic.ca>
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
*****************************************************************************/
+/*****************************************************************************
+ * Preamble
+ *****************************************************************************/
#include "defs.h"
#include <string.h> /* memcpy() */
#include "stream_control.h"
#include "input_ext-dec.h"
+#include "ac3_imdct.h"
+#include "ac3_downmix.h"
#include "ac3_decoder.h"
-#include "ac3_imdct_c.h" /* imdct_init_c */
-#include "ac3_imdct_sse.h" /* imdct_init_sse */
-
-#include "tests.h" /* TestCPU */
-
#ifndef M_PI
# define M_PI 3.14159265358979323846
#endif
-
void imdct_init(imdct_t * p_imdct)
{
- int i;
- float scale = 181.019;
-#if 0
- if ( TestCPU (CPU_CAPABILITY_SSE) )
- {
- imdct_init_sse (p_imdct);
- }
- else
-#endif
- {
- imdct_init_c (p_imdct);
- }
+ int i;
+ float scale = 181.019;
+
+ p_imdct->pf_imdct_init( p_imdct );
- /* More twiddle factors to turn IFFT into IMDCT */
- for (i=0; i < 64; i++) {
- p_imdct->xcos2[i] = cos(2.0f * M_PI * (8*i+1)/(4*N)) * scale;
- p_imdct->xsin2[i] = sin(2.0f * M_PI * (8*i+1)/(4*N)) * scale;
- }
+ /* More twiddle factors to turn IFFT into IMDCT */
+ for (i=0; i < 64; i++) {
+ p_imdct->xcos2[i] = cos(2.0f * M_PI * (8*i+1)/(4*N)) * scale;
+ p_imdct->xsin2[i] = sin(2.0f * M_PI * (8*i+1)/(4*N)) * scale;
+ }
}
void imdct (ac3dec_t * p_ac3dec, s16 * buffer)
{
- int i;
- int doable = 0;
- float *center=NULL, *left, *right, *left_sur, *right_sur;
- float *delay_left, *delay_right;
- float *delay1_left, *delay1_right, *delay1_center, *delay1_sr, *delay1_sl;
- float right_tmp, left_tmp;
- void (*do_imdct)(imdct_t * p_imdct, float data[], float delay[]);
+ int i;
+ int doable = 0;
+ float *center=NULL, *left, *right, *left_sur, *right_sur;
+ float *delay_left, *delay_right;
+ float *delay1_left, *delay1_right, *delay1_center, *delay1_sr, *delay1_sl;
+ float right_tmp, left_tmp;
+ void (*do_imdct)(imdct_t * p_imdct, float data[], float delay[]);
- /* test if dm in frequency is doable */
- if (!(doable = p_ac3dec->audblk.blksw[0]))
+ /* test if dm in frequency is doable */
+ if (!(doable = p_ac3dec->audblk.blksw[0]))
{
- do_imdct = p_ac3dec->imdct.imdct_do_512;
+ do_imdct = p_ac3dec->imdct.pf_imdct_512;
}
- else
+ else
{
- do_imdct = imdct_do_256; /* There is only a C function */
+ do_imdct = p_ac3dec->imdct.pf_imdct_256;
}
- /* downmix in the frequency domain if all the channels
- * use the same imdct */
- for (i=0; i < p_ac3dec->bsi.nfchans; i++)
+ /* downmix in the frequency domain if all the channels
+ * use the same imdct */
+ for (i=0; i < p_ac3dec->bsi.nfchans; i++)
{
- if (doable != p_ac3dec->audblk.blksw[i])
+ if (doable != p_ac3dec->audblk.blksw[i])
{
- do_imdct = NULL;
- break;
- }
- }
+ do_imdct = NULL;
+ break;
+ }
+ }
if (do_imdct)
{
- /* dowmix first and imdct */
+ /* dowmix first and imdct */
switch(p_ac3dec->bsi.acmod)
{
- case 7: /* 3/2 */
- p_ac3dec->downmix.downmix_3f_2r_to_2ch (p_ac3dec->samples[0], &p_ac3dec->dm_par);
- break;
- case 6: /* 2/2 */
- p_ac3dec->downmix.downmix_2f_2r_to_2ch (p_ac3dec->samples[0], &p_ac3dec->dm_par);
- break;
- case 5: /* 3/1 */
- p_ac3dec->downmix.downmix_3f_1r_to_2ch (p_ac3dec->samples[0], &p_ac3dec->dm_par);
- break;
- case 4: /* 2/1 */
- p_ac3dec->downmix.downmix_2f_1r_to_2ch (p_ac3dec->samples[0], &p_ac3dec->dm_par);
- break;
- case 3: /* 3/0 */
- p_ac3dec->downmix.downmix_3f_0r_to_2ch (p_ac3dec->samples[0], &p_ac3dec->dm_par);
- break;
- case 2:
- break;
- default: /* 1/0 */
-// if (p_ac3dec->bsi.acmod == 1)
- center = p_ac3dec->samples[0];
-// else if (p_ac3dec->bsi.acmod == 0)
+ case 7: /* 3/2 */
+ p_ac3dec->downmix.pf_downmix_3f_2r_to_2ch (p_ac3dec->samples[0], &p_ac3dec->dm_par);
+ break;
+ case 6: /* 2/2 */
+ p_ac3dec->downmix.pf_downmix_2f_2r_to_2ch (p_ac3dec->samples[0], &p_ac3dec->dm_par);
+ break;
+ case 5: /* 3/1 */
+ p_ac3dec->downmix.pf_downmix_3f_1r_to_2ch (p_ac3dec->samples[0], &p_ac3dec->dm_par);
+ break;
+ case 4: /* 2/1 */
+ p_ac3dec->downmix.pf_downmix_2f_1r_to_2ch (p_ac3dec->samples[0], &p_ac3dec->dm_par);
+ break;
+ case 3: /* 3/0 */
+ p_ac3dec->downmix.pf_downmix_3f_0r_to_2ch (p_ac3dec->samples[0], &p_ac3dec->dm_par);
+ break;
+ case 2:
+ break;
+ default: /* 1/0 */
+// if (p_ac3dec->bsi.acmod == 1)
+ center = p_ac3dec->samples[0];
+// else if (p_ac3dec->bsi.acmod == 0)
// center = samples[ac3_config.dual_mono_ch_sel];
do_imdct(&p_ac3dec->imdct, center, p_ac3dec->imdct.delay[0]); /* no downmix*/
- p_ac3dec->downmix.stream_sample_1ch_to_s16 (buffer, center);
+ p_ac3dec->downmix.pf_stream_sample_1ch_to_s16 (buffer, center);
- return;
+ return;
break;
}
- do_imdct (&p_ac3dec->imdct, p_ac3dec->samples[0], p_ac3dec->imdct.delay[0]);
- do_imdct (&p_ac3dec->imdct, p_ac3dec->samples[1], p_ac3dec->imdct.delay[1]);
- p_ac3dec->downmix.stream_sample_2ch_to_s16(buffer, p_ac3dec->samples[0], p_ac3dec->samples[1]);
+ do_imdct (&p_ac3dec->imdct, p_ac3dec->samples[0], p_ac3dec->imdct.delay[0]);
+ do_imdct (&p_ac3dec->imdct, p_ac3dec->samples[1], p_ac3dec->imdct.delay[1]);
+ p_ac3dec->downmix.pf_stream_sample_2ch_to_s16(buffer, p_ac3dec->samples[0], p_ac3dec->samples[1]);
- } else {
+ } else {
/* imdct and then downmix
- * delay and samples should be saved and mixed
- * fprintf(stderr, "time domain downmix\n"); */
- for (i=0; i<p_ac3dec->bsi.nfchans; i++)
+ * delay and samples should be saved and mixed
+ * fprintf(stderr, "time domain downmix\n"); */
+ for (i=0; i<p_ac3dec->bsi.nfchans; i++)
{
- if (p_ac3dec->audblk.blksw[i])
+ if (p_ac3dec->audblk.blksw[i])
/* There is only a C function */
- imdct_do_256_nol (&p_ac3dec->imdct, p_ac3dec->samples[i], p_ac3dec->imdct.delay1[i]);
- else
- p_ac3dec->imdct.imdct_do_512_nol (&p_ac3dec->imdct, p_ac3dec->samples[i], p_ac3dec->imdct.delay1[i]);
- }
+ p_ac3dec->imdct.pf_imdct_256_nol (&p_ac3dec->imdct, p_ac3dec->samples[i], p_ac3dec->imdct.delay1[i]);
+ else
+ p_ac3dec->imdct.pf_imdct_512_nol (&p_ac3dec->imdct, p_ac3dec->samples[i], p_ac3dec->imdct.delay1[i]);
+ }
- /* mix the sample, overlap */
- switch(p_ac3dec->bsi.acmod)
+ /* mix the sample, overlap */
+ switch(p_ac3dec->bsi.acmod)
{
- case 7: /* 3/2 */
- left = p_ac3dec->samples[0];
- center = p_ac3dec->samples[1];
- right = p_ac3dec->samples[2];
- left_sur = p_ac3dec->samples[3];
- right_sur = p_ac3dec->samples[4];
- delay_left = p_ac3dec->imdct.delay[0];
- delay_right = p_ac3dec->imdct.delay[1];
- delay1_left = p_ac3dec->imdct.delay1[0];
- delay1_center = p_ac3dec->imdct.delay1[1];
- delay1_right = p_ac3dec->imdct.delay1[2];
- delay1_sl = p_ac3dec->imdct.delay1[3];
- delay1_sr = p_ac3dec->imdct.delay1[4];
+ case 7: /* 3/2 */
+ left = p_ac3dec->samples[0];
+ center = p_ac3dec->samples[1];
+ right = p_ac3dec->samples[2];
+ left_sur = p_ac3dec->samples[3];
+ right_sur = p_ac3dec->samples[4];
+ delay_left = p_ac3dec->imdct.delay[0];
+ delay_right = p_ac3dec->imdct.delay[1];
+ delay1_left = p_ac3dec->imdct.delay1[0];
+ delay1_center = p_ac3dec->imdct.delay1[1];
+ delay1_right = p_ac3dec->imdct.delay1[2];
+ delay1_sl = p_ac3dec->imdct.delay1[3];
+ delay1_sr = p_ac3dec->imdct.delay1[4];
- for (i = 0; i < 256; i++) {
- left_tmp = p_ac3dec->dm_par.unit * *left++ + p_ac3dec->dm_par.clev * *center + p_ac3dec->dm_par.slev * *left_sur++;
- right_tmp= p_ac3dec->dm_par.unit * *right++ + p_ac3dec->dm_par.clev * *center++ + p_ac3dec->dm_par.slev * *right_sur++;
- *buffer++ = (s16)(left_tmp + *delay_left);
- *buffer++ = (s16)(right_tmp + *delay_right);
- *delay_left++ = p_ac3dec->dm_par.unit * *delay1_left++ + p_ac3dec->dm_par.clev * *delay1_center + p_ac3dec->dm_par.slev * *delay1_sl++;
- *delay_right++ = p_ac3dec->dm_par.unit * *delay1_right++ + p_ac3dec->dm_par.clev * *center++ + p_ac3dec->dm_par.slev * *delay1_sr++;
- }
- break;
- case 6: /* 2/2 */
- left = p_ac3dec->samples[0];
- right = p_ac3dec->samples[1];
- left_sur = p_ac3dec->samples[2];
- right_sur = p_ac3dec->samples[3];
- delay_left = p_ac3dec->imdct.delay[0];
- delay_right = p_ac3dec->imdct.delay[1];
- delay1_left = p_ac3dec->imdct.delay1[0];
- delay1_right = p_ac3dec->imdct.delay1[1];
- delay1_sl = p_ac3dec->imdct.delay1[2];
- delay1_sr = p_ac3dec->imdct.delay1[3];
+ for (i = 0; i < 256; i++) {
+ left_tmp = p_ac3dec->dm_par.unit * *left++ + p_ac3dec->dm_par.clev * *center + p_ac3dec->dm_par.slev * *left_sur++;
+ right_tmp= p_ac3dec->dm_par.unit * *right++ + p_ac3dec->dm_par.clev * *center++ + p_ac3dec->dm_par.slev * *right_sur++;
+ *buffer++ = (s16)(left_tmp + *delay_left);
+ *buffer++ = (s16)(right_tmp + *delay_right);
+ *delay_left++ = p_ac3dec->dm_par.unit * *delay1_left++ + p_ac3dec->dm_par.clev * *delay1_center + p_ac3dec->dm_par.slev * *delay1_sl++;
+ *delay_right++ = p_ac3dec->dm_par.unit * *delay1_right++ + p_ac3dec->dm_par.clev * *center++ + p_ac3dec->dm_par.slev * *delay1_sr++;
+ }
+ break;
+ case 6: /* 2/2 */
+ left = p_ac3dec->samples[0];
+ right = p_ac3dec->samples[1];
+ left_sur = p_ac3dec->samples[2];
+ right_sur = p_ac3dec->samples[3];
+ delay_left = p_ac3dec->imdct.delay[0];
+ delay_right = p_ac3dec->imdct.delay[1];
+ delay1_left = p_ac3dec->imdct.delay1[0];
+ delay1_right = p_ac3dec->imdct.delay1[1];
+ delay1_sl = p_ac3dec->imdct.delay1[2];
+ delay1_sr = p_ac3dec->imdct.delay1[3];
- for (i = 0; i < 256; i++) {
- left_tmp = p_ac3dec->dm_par.unit * *left++ + p_ac3dec->dm_par.slev * *left_sur++;
- right_tmp= p_ac3dec->dm_par.unit * *right++ + p_ac3dec->dm_par.slev * *right_sur++;
- *buffer++ = (s16)(left_tmp + *delay_left);
- *buffer++ = (s16)(right_tmp + *delay_right);
- *delay_left++ = p_ac3dec->dm_par.unit * *delay1_left++ + p_ac3dec->dm_par.slev * *delay1_sl++;
- *delay_right++ = p_ac3dec->dm_par.unit * *delay1_right++ + p_ac3dec->dm_par.slev * *delay1_sr++;
- }
- break;
- case 5: /* 3/1 */
- left = p_ac3dec->samples[0];
- center = p_ac3dec->samples[1];
- right = p_ac3dec->samples[2];
- right_sur = p_ac3dec->samples[3];
- delay_left = p_ac3dec->imdct.delay[0];
- delay_right = p_ac3dec->imdct.delay[1];
- delay1_left = p_ac3dec->imdct.delay1[0];
- delay1_center = p_ac3dec->imdct.delay1[1];
- delay1_right = p_ac3dec->imdct.delay1[2];
- delay1_sl = p_ac3dec->imdct.delay1[3];
+ for (i = 0; i < 256; i++) {
+ left_tmp = p_ac3dec->dm_par.unit * *left++ + p_ac3dec->dm_par.slev * *left_sur++;
+ right_tmp= p_ac3dec->dm_par.unit * *right++ + p_ac3dec->dm_par.slev * *right_sur++;
+ *buffer++ = (s16)(left_tmp + *delay_left);
+ *buffer++ = (s16)(right_tmp + *delay_right);
+ *delay_left++ = p_ac3dec->dm_par.unit * *delay1_left++ + p_ac3dec->dm_par.slev * *delay1_sl++;
+ *delay_right++ = p_ac3dec->dm_par.unit * *delay1_right++ + p_ac3dec->dm_par.slev * *delay1_sr++;
+ }
+ break;
+ case 5: /* 3/1 */
+ left = p_ac3dec->samples[0];
+ center = p_ac3dec->samples[1];
+ right = p_ac3dec->samples[2];
+ right_sur = p_ac3dec->samples[3];
+ delay_left = p_ac3dec->imdct.delay[0];
+ delay_right = p_ac3dec->imdct.delay[1];
+ delay1_left = p_ac3dec->imdct.delay1[0];
+ delay1_center = p_ac3dec->imdct.delay1[1];
+ delay1_right = p_ac3dec->imdct.delay1[2];
+ delay1_sl = p_ac3dec->imdct.delay1[3];
- for (i = 0; i < 256; i++) {
- left_tmp = p_ac3dec->dm_par.unit * *left++ + p_ac3dec->dm_par.clev * *center - p_ac3dec->dm_par.slev * *right_sur;
- right_tmp= p_ac3dec->dm_par.unit * *right++ + p_ac3dec->dm_par.clev * *center++ + p_ac3dec->dm_par.slev * *right_sur++;
- *buffer++ = (s16)(left_tmp + *delay_left);
- *buffer++ = (s16)(right_tmp + *delay_right);
- *delay_left++ = p_ac3dec->dm_par.unit * *delay1_left++ + p_ac3dec->dm_par.clev * *delay1_center + p_ac3dec->dm_par.slev * *delay1_sl;
- *delay_right++ = p_ac3dec->dm_par.unit * *delay1_right++ + p_ac3dec->dm_par.clev * *center++ + p_ac3dec->dm_par.slev * *delay1_sl++;
- }
- break;
- case 4: /* 2/1 */
- left = p_ac3dec->samples[0];
- right = p_ac3dec->samples[1];
- right_sur = p_ac3dec->samples[2];
- delay_left = p_ac3dec->imdct.delay[0];
- delay_right = p_ac3dec->imdct.delay[1];
- delay1_left = p_ac3dec->imdct.delay1[0];
- delay1_right = p_ac3dec->imdct.delay1[1];
- delay1_sl = p_ac3dec->imdct.delay1[2];
+ for (i = 0; i < 256; i++) {
+ left_tmp = p_ac3dec->dm_par.unit * *left++ + p_ac3dec->dm_par.clev * *center - p_ac3dec->dm_par.slev * *right_sur;
+ right_tmp= p_ac3dec->dm_par.unit * *right++ + p_ac3dec->dm_par.clev * *center++ + p_ac3dec->dm_par.slev * *right_sur++;
+ *buffer++ = (s16)(left_tmp + *delay_left);
+ *buffer++ = (s16)(right_tmp + *delay_right);
+ *delay_left++ = p_ac3dec->dm_par.unit * *delay1_left++ + p_ac3dec->dm_par.clev * *delay1_center + p_ac3dec->dm_par.slev * *delay1_sl;
+ *delay_right++ = p_ac3dec->dm_par.unit * *delay1_right++ + p_ac3dec->dm_par.clev * *center++ + p_ac3dec->dm_par.slev * *delay1_sl++;
+ }
+ break;
+ case 4: /* 2/1 */
+ left = p_ac3dec->samples[0];
+ right = p_ac3dec->samples[1];
+ right_sur = p_ac3dec->samples[2];
+ delay_left = p_ac3dec->imdct.delay[0];
+ delay_right = p_ac3dec->imdct.delay[1];
+ delay1_left = p_ac3dec->imdct.delay1[0];
+ delay1_right = p_ac3dec->imdct.delay1[1];
+ delay1_sl = p_ac3dec->imdct.delay1[2];
- for (i = 0; i < 256; i++) {
- left_tmp = p_ac3dec->dm_par.unit * *left++ - p_ac3dec->dm_par.slev * *right_sur;
- right_tmp= p_ac3dec->dm_par.unit * *right++ + p_ac3dec->dm_par.slev * *right_sur++;
- *buffer++ = (s16)(left_tmp + *delay_left);
- *buffer++ = (s16)(right_tmp + *delay_right);
- *delay_left++ = p_ac3dec->dm_par.unit * *delay1_left++ + p_ac3dec->dm_par.slev * *delay1_sl;
- *delay_right++ = p_ac3dec->dm_par.unit * *delay1_right++ + p_ac3dec->dm_par.slev * *delay1_sl++;
- }
- break;
- case 3: /* 3/0 */
- left = p_ac3dec->samples[0];
- center = p_ac3dec->samples[1];
- right = p_ac3dec->samples[2];
- delay_left = p_ac3dec->imdct.delay[0];
- delay_right = p_ac3dec->imdct.delay[1];
- delay1_left = p_ac3dec->imdct.delay1[0];
- delay1_center = p_ac3dec->imdct.delay1[1];
- delay1_right = p_ac3dec->imdct.delay1[2];
+ for (i = 0; i < 256; i++) {
+ left_tmp = p_ac3dec->dm_par.unit * *left++ - p_ac3dec->dm_par.slev * *right_sur;
+ right_tmp= p_ac3dec->dm_par.unit * *right++ + p_ac3dec->dm_par.slev * *right_sur++;
+ *buffer++ = (s16)(left_tmp + *delay_left);
+ *buffer++ = (s16)(right_tmp + *delay_right);
+ *delay_left++ = p_ac3dec->dm_par.unit * *delay1_left++ + p_ac3dec->dm_par.slev * *delay1_sl;
+ *delay_right++ = p_ac3dec->dm_par.unit * *delay1_right++ + p_ac3dec->dm_par.slev * *delay1_sl++;
+ }
+ break;
+ case 3: /* 3/0 */
+ left = p_ac3dec->samples[0];
+ center = p_ac3dec->samples[1];
+ right = p_ac3dec->samples[2];
+ delay_left = p_ac3dec->imdct.delay[0];
+ delay_right = p_ac3dec->imdct.delay[1];
+ delay1_left = p_ac3dec->imdct.delay1[0];
+ delay1_center = p_ac3dec->imdct.delay1[1];
+ delay1_right = p_ac3dec->imdct.delay1[2];
- for (i = 0; i < 256; i++) {
- left_tmp = p_ac3dec->dm_par.unit * *left++ + p_ac3dec->dm_par.clev * *center;
- right_tmp= p_ac3dec->dm_par.unit * *right++ + p_ac3dec->dm_par.clev * *center++;
- *buffer++ = (s16)(left_tmp + *delay_left);
- *buffer++ = (s16)(right_tmp + *delay_right);
- *delay_left++ = p_ac3dec->dm_par.unit * *delay1_left++ + p_ac3dec->dm_par.clev * *delay1_center;
- *delay_right++ = p_ac3dec->dm_par.unit * *delay1_right++ + p_ac3dec->dm_par.clev * *center++;
- }
- break;
- case 2: /* copy to output */
- for (i = 0; i < 256; i++) {
- *buffer++ = (s16)p_ac3dec->samples[0][i];
- *buffer++ = (s16)p_ac3dec->samples[1][i];
- }
- break;
- }
- }
+ for (i = 0; i < 256; i++) {
+ left_tmp = p_ac3dec->dm_par.unit * *left++ + p_ac3dec->dm_par.clev * *center;
+ right_tmp= p_ac3dec->dm_par.unit * *right++ + p_ac3dec->dm_par.clev * *center++;
+ *buffer++ = (s16)(left_tmp + *delay_left);
+ *buffer++ = (s16)(right_tmp + *delay_right);
+ *delay_left++ = p_ac3dec->dm_par.unit * *delay1_left++ + p_ac3dec->dm_par.clev * *delay1_center;
+ *delay_right++ = p_ac3dec->dm_par.unit * *delay1_right++ + p_ac3dec->dm_par.clev * *center++;
+ }
+ break;
+ case 2: /* copy to output */
+ for (i = 0; i < 256; i++) {
+ *buffer++ = (s16)p_ac3dec->samples[0][i];
+ *buffer++ = (s16)p_ac3dec->samples[1][i];
+ }
+ break;
+ }
+ }
}
+++ /dev/null
-/*****************************************************************************
- * ac3_imdct_c.c: ac3 DCT
- *****************************************************************************
- * Copyright (C) 1999, 2000 VideoLAN
- * $Id: ac3_imdct_c.c,v 1.3 2001/05/14 15:58:04 reno Exp $
- *
- * Authors: Renaud Dartus <reno@videolan.org>
- * Aaron Holtzman <aholtzma@engr.uvic.ca>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
- *****************************************************************************/
-
-#include "defs.h"
-
-#include <string.h> /* memcpy() */
-
-#include <math.h>
-#include <stdio.h>
-
-#include "config.h"
-#include "common.h"
-#include "threads.h"
-#include "mtime.h"
-
-#include "stream_control.h"
-#include "input_ext-dec.h"
-
-#include "ac3_decoder.h"
-#include "ac3_imdct_c.h"
-
-#ifndef M_PI
-# define M_PI 3.14159265358979323846
-#endif
-
-void fft_64p_c (complex_t *x);
-void fft_128p_c (complex_t *x);
-
-static float window[] = {
- 0.00014, 0.00024, 0.00037, 0.00051, 0.00067, 0.00086, 0.00107, 0.00130,
- 0.00157, 0.00187, 0.00220, 0.00256, 0.00297, 0.00341, 0.00390, 0.00443,
- 0.00501, 0.00564, 0.00632, 0.00706, 0.00785, 0.00871, 0.00962, 0.01061,
- 0.01166, 0.01279, 0.01399, 0.01526, 0.01662, 0.01806, 0.01959, 0.02121,
- 0.02292, 0.02472, 0.02662, 0.02863, 0.03073, 0.03294, 0.03527, 0.03770,
- 0.04025, 0.04292, 0.04571, 0.04862, 0.05165, 0.05481, 0.05810, 0.06153,
- 0.06508, 0.06878, 0.07261, 0.07658, 0.08069, 0.08495, 0.08935, 0.09389,
- 0.09859, 0.10343, 0.10842, 0.11356, 0.11885, 0.12429, 0.12988, 0.13563,
- 0.14152, 0.14757, 0.15376, 0.16011, 0.16661, 0.17325, 0.18005, 0.18699,
- 0.19407, 0.20130, 0.20867, 0.21618, 0.22382, 0.23161, 0.23952, 0.24757,
- 0.25574, 0.26404, 0.27246, 0.28100, 0.28965, 0.29841, 0.30729, 0.31626,
- 0.32533, 0.33450, 0.34376, 0.35311, 0.36253, 0.37204, 0.38161, 0.39126,
- 0.40096, 0.41072, 0.42054, 0.43040, 0.44030, 0.45023, 0.46020, 0.47019,
- 0.48020, 0.49022, 0.50025, 0.51028, 0.52031, 0.53033, 0.54033, 0.55031,
- 0.56026, 0.57019, 0.58007, 0.58991, 0.59970, 0.60944, 0.61912, 0.62873,
- 0.63827, 0.64774, 0.65713, 0.66643, 0.67564, 0.68476, 0.69377, 0.70269,
- 0.71150, 0.72019, 0.72877, 0.73723, 0.74557, 0.75378, 0.76186, 0.76981,
- 0.77762, 0.78530, 0.79283, 0.80022, 0.80747, 0.81457, 0.82151, 0.82831,
- 0.83496, 0.84145, 0.84779, 0.85398, 0.86001, 0.86588, 0.87160, 0.87716,
- 0.88257, 0.88782, 0.89291, 0.89785, 0.90264, 0.90728, 0.91176, 0.91610,
- 0.92028, 0.92432, 0.92822, 0.93197, 0.93558, 0.93906, 0.94240, 0.94560,
- 0.94867, 0.95162, 0.95444, 0.95713, 0.95971, 0.96217, 0.96451, 0.96674,
- 0.96887, 0.97089, 0.97281, 0.97463, 0.97635, 0.97799, 0.97953, 0.98099,
- 0.98236, 0.98366, 0.98488, 0.98602, 0.98710, 0.98811, 0.98905, 0.98994,
- 0.99076, 0.99153, 0.99225, 0.99291, 0.99353, 0.99411, 0.99464, 0.99513,
- 0.99558, 0.99600, 0.99639, 0.99674, 0.99706, 0.99736, 0.99763, 0.99788,
- 0.99811, 0.99831, 0.99850, 0.99867, 0.99882, 0.99895, 0.99908, 0.99919,
- 0.99929, 0.99938, 0.99946, 0.99953, 0.99959, 0.99965, 0.99969, 0.99974,
- 0.99978, 0.99981, 0.99984, 0.99986, 0.99988, 0.99990, 0.99992, 0.99993,
- 0.99994, 0.99995, 0.99996, 0.99997, 0.99998, 0.99998, 0.99998, 0.99999,
- 0.99999, 0.99999, 0.99999, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000,
- 1.00000, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000
-};
-
-static const int pm128[128] =
-{
- 0, 16, 32, 48, 64, 80, 96, 112, 8, 40, 72, 104, 24, 56, 88, 120,
- 4, 20, 36, 52, 68, 84, 100, 116, 12, 28, 44, 60, 76, 92, 108, 124,
- 2, 18, 34, 50, 66, 82, 98, 114, 10, 42, 74, 106, 26, 58, 90, 122,
- 6, 22, 38, 54, 70, 86, 102, 118, 14, 46, 78, 110, 30, 62, 94, 126,
- 1, 17, 33, 49, 65, 81, 97, 113, 9, 41, 73, 105, 25, 57, 89, 121,
- 5, 21, 37, 53, 69, 85, 101, 117, 13, 29, 45, 61, 77, 93, 109, 125,
- 3, 19, 35, 51, 67, 83, 99, 115, 11, 43, 75, 107, 27, 59, 91, 123,
- 7, 23, 39, 55, 71, 87, 103, 119, 15, 31, 47, 63, 79, 95, 111, 127
-};
-
-static const int pm64[64] =
-{
- 0, 8, 16, 24, 32, 40, 48, 56,
- 4, 20, 36, 52, 12, 28, 44, 60,
- 2, 10, 18, 26, 34, 42, 50, 58,
- 6, 14, 22, 30, 38, 46, 54, 62,
- 1, 9, 17, 25, 33, 41, 49, 57,
- 5, 21, 37, 53, 13, 29, 45, 61,
- 3, 11, 19, 27, 35, 43, 51, 59,
- 7, 23, 39, 55, 15, 31, 47, 63
-};
-
-int imdct_init_c (imdct_t * p_imdct)
-{
- int i;
- float scale = 181.019;
-
- p_imdct->imdct_do_512 = imdct_do_512_c;
- p_imdct->imdct_do_512_nol = imdct_do_512_nol_c;
- p_imdct->fft_64p = fft_64p_c;
-
- /* Twiddle factors to turn IFFT into IMDCT */
-
- for (i=0; i < 128; i++) {
- p_imdct->xcos1[i] = cos(2.0f * M_PI * (8*i+1)/(8*N)) * scale;
- p_imdct->xsin1[i] = sin(2.0f * M_PI * (8*i+1)/(8*N)) * scale;
- }
-
- return 0;
-}
-
-void imdct_do_256 (imdct_t * p_imdct, float data[],float delay[])
-{
- int i, j, k;
- int p, q;
-
- float tmp_a_i;
- float tmp_a_r;
-
- float *data_ptr;
- float *delay_ptr;
- float *window_ptr;
-
- complex_t *buf1, *buf2;
-
- buf1 = &p_imdct->buf[0];
- buf2 = &p_imdct->buf[64];
-
- /* Pre IFFT complex multiply plus IFFT complex conjugate */
- for (k=0; k<64; k++) {
- /* X1[k] = X[2*k]
- * X2[k] = X[2*k+1] */
-
- j = pm64[k];
- p = 2 * (128-2*j-1);
- q = 2 * (2 * j);
-
- /* Z1[k] = (X1[128-2*k-1] + j * X1[2*k]) * (xcos2[k] + j * xsin2[k]); */
- buf1[k].real = data[p] * p_imdct->xcos2[j] - data[q] * p_imdct->xsin2[j];
- buf1[k].imag = -1.0f*(data[q] * p_imdct->xcos2[j] + data[p] * p_imdct->xsin2[j]);
- /* Z2[k] = (X2[128-2*k-1] + j * X2[2*k]) * (xcos2[k] + j * xsin2[k]); */
- buf2[k].real = data[p + 1] * p_imdct->xcos2[j] - data[q + 1] * p_imdct->xsin2[j];
- buf2[k].imag = -1.0f*(data[q + 1] * p_imdct->xcos2[j] + data[p + 1] * p_imdct->xsin2[j]);
- }
-
- p_imdct->fft_64p(&buf1[0]);
- p_imdct->fft_64p(&buf2[0]);
-
- /* Post IFFT complex multiply */
- for( i=0; i < 64; i++) {
- tmp_a_r = buf1[i].real;
- tmp_a_i = -buf1[i].imag;
- buf1[i].real = (tmp_a_r * p_imdct->xcos2[i]) - (tmp_a_i * p_imdct->xsin2[i]);
- buf1[i].imag = (tmp_a_r * p_imdct->xsin2[i]) + (tmp_a_i * p_imdct->xcos2[i]);
- tmp_a_r = buf2[i].real;
- tmp_a_i = -buf2[i].imag;
- buf2[i].real = (tmp_a_r * p_imdct->xcos2[i]) - (tmp_a_i * p_imdct->xsin2[i]);
- buf2[i].imag = (tmp_a_r * p_imdct->xsin2[i]) + (tmp_a_i * p_imdct->xcos2[i]);
- }
-
- data_ptr = data;
- delay_ptr = delay;
- window_ptr = window;
-
- /* Window and convert to real valued signal */
- for(i=0; i< 64; i++) {
- *data_ptr++ = -buf1[i].imag * *window_ptr++ + *delay_ptr++;
- *data_ptr++ = buf1[64-i-1].real * *window_ptr++ + *delay_ptr++;
- }
-
- for(i=0; i< 64; i++) {
- *data_ptr++ = -buf1[i].real * *window_ptr++ + *delay_ptr++;
- *data_ptr++ = buf1[64-i-1].imag * *window_ptr++ + *delay_ptr++;
- }
-
- delay_ptr = delay;
-
- for(i=0; i< 64; i++) {
- *delay_ptr++ = -buf2[i].real * *--window_ptr;
- *delay_ptr++ = buf2[64-i-1].imag * *--window_ptr;
- }
-
- for(i=0; i< 64; i++) {
- *delay_ptr++ = buf2[i].imag * *--window_ptr;
- *delay_ptr++ = -buf2[64-i-1].real * *--window_ptr;
- }
-}
-
-
-void imdct_do_256_nol (imdct_t * p_imdct, float data[], float delay[])
-{
- int i, j, k;
- int p, q;
-
- float tmp_a_i;
- float tmp_a_r;
-
- float *data_ptr;
- float *delay_ptr;
- float *window_ptr;
-
- complex_t *buf1, *buf2;
-
- buf1 = &p_imdct->buf[0];
- buf2 = &p_imdct->buf[64];
-
- /* Pre IFFT complex multiply plus IFFT cmplx conjugate */
- for(k=0; k<64; k++) {
- /* X1[k] = X[2*k]
- * X2[k] = X[2*k+1] */
- j = pm64[k];
- p = 2 * (128-2*j-1);
- q = 2 * (2 * j);
-
- /* Z1[k] = (X1[128-2*k-1] + j * X1[2*k]) * (xcos2[k] + j * xsin2[k]); */
- buf1[k].real = data[p] * p_imdct->xcos2[j] - data[q] * p_imdct->xsin2[j];
- buf1[k].imag = -1.0f*(data[q] * p_imdct->xcos2[j] + data[p] * p_imdct->xsin2[j]);
- /* Z2[k] = (X2[128-2*k-1] + j * X2[2*k]) * (xcos2[k] + j * xsin2[k]); */
- buf2[k].real = data[p + 1] * p_imdct->xcos2[j] - data[q + 1] * p_imdct->xsin2[j];
- buf2[k].imag = -1.0f*(data[q + 1] * p_imdct->xcos2[j] + data[p + 1] * p_imdct->xsin2[j]);
- }
-
- p_imdct->fft_64p(&buf1[0]);
- p_imdct->fft_64p(&buf2[0]);
-
- /* Post IFFT complex multiply */
- for( i=0; i < 64; i++) {
- /* y1[n] = z1[n] * (xcos2[n] + j * xs in2[n]) ; */
- tmp_a_r = buf1[i].real;
- tmp_a_i = -buf1[i].imag;
- buf1[i].real =(tmp_a_r * p_imdct->xcos2[i]) - (tmp_a_i * p_imdct->xsin2[i]);
- buf1[i].imag =(tmp_a_r * p_imdct->xsin2[i]) + (tmp_a_i * p_imdct->xcos2[i]);
- /* y2[n] = z2[n] * (xcos2[n] + j * xsin2[n]) ; */
- tmp_a_r = buf2[i].real;
- tmp_a_i = -buf2[i].imag;
- buf2[i].real =(tmp_a_r * p_imdct->xcos2[i]) - (tmp_a_i * p_imdct->xsin2[i]);
- buf2[i].imag =(tmp_a_r * p_imdct->xsin2[i]) + (tmp_a_i * p_imdct->xcos2[i]);
- }
-
- data_ptr = data;
- delay_ptr = delay;
- window_ptr = window;
-
- /* Window and convert to real valued signal, no overlap */
- for(i=0; i< 64; i++) {
- *data_ptr++ = -buf1[i].imag * *window_ptr++;
- *data_ptr++ = buf1[64-i-1].real * *window_ptr++;
- }
-
- for(i=0; i< 64; i++) {
- *data_ptr++ = -buf1[i].real * *window_ptr++ + *delay_ptr++;
- *data_ptr++ = buf1[64-i-1].imag * *window_ptr++ + *delay_ptr++;
- }
-
- delay_ptr = delay;
-
- for(i=0; i< 64; i++) {
- *delay_ptr++ = -buf2[i].real * *--window_ptr;
- *delay_ptr++ = buf2[64-i-1].imag * *--window_ptr;
- }
-
- for(i=0; i< 64; i++) {
- *delay_ptr++ = buf2[i].imag * *--window_ptr;
- *delay_ptr++ = -buf2[64-i-1].real * *--window_ptr;
- }
-}
-
-void imdct_do_512_c (imdct_t * p_imdct, float data[], float delay[])
-{
- int i, j;
- float tmp_a_r, tmp_a_i;
- float *data_ptr;
- float *delay_ptr;
- float *window_ptr;
-
- /* 512 IMDCT with source and dest data in 'data'
- * Pre IFFT complex multiply plus IFFT complex conjugate */
-
- for( i=0; i < 128; i++) {
- j = pm128[i];
- /* a = (data[256-2*j-1] - data[2*j]) * (xcos1[j] + xsin1[j]);
- * c = data[2*j] * xcos1[j];
- * b = data[256-2*j-1] * xsin1[j];
- * buf1[i].real = a - b + c;
- * buf1[i].imag = b + c; */
- p_imdct->buf[i].real = (data[256-2*j-1] * p_imdct->xcos1[j]) - (data[2*j] * p_imdct->xsin1[j]);
- p_imdct->buf[i].imag = -1.0 * (data[2*j] * p_imdct->xcos1[j] + data[256-2*j-1] * p_imdct->xsin1[j]);
- }
-
- fft_128p_c (&p_imdct->buf[0]);
-
- /* Post IFFT complex multiply plus IFFT complex conjugate */
- for (i=0; i < 128; i++) {
- tmp_a_r = p_imdct->buf[i].real;
- tmp_a_i = p_imdct->buf[i].imag;
- /* a = (tmp_a_r - tmp_a_i) * (xcos1[j] + xsin1[j]);
- * b = tmp_a_r * xsin1[j];
- * c = tmp_a_i * xcos1[j];
- * buf[j].real = a - b + c;
- * buf[j].imag = b + c; */
- p_imdct->buf[i].real =(tmp_a_r * p_imdct->xcos1[i]) + (tmp_a_i * p_imdct->xsin1[i]);
- p_imdct->buf[i].imag =(tmp_a_r * p_imdct->xsin1[i]) - (tmp_a_i * p_imdct->xcos1[i]);
- }
-
- data_ptr = data;
- delay_ptr = delay;
- window_ptr = window;
-
- /* Window and convert to real valued signal */
- for (i=0; i< 64; i++) {
- *data_ptr++ = -p_imdct->buf[64+i].imag * *window_ptr++ + *delay_ptr++;
- *data_ptr++ = p_imdct->buf[64-i-1].real * *window_ptr++ + *delay_ptr++;
- }
-
- for(i=0; i< 64; i++) {
- *data_ptr++ = -p_imdct->buf[i].real * *window_ptr++ + *delay_ptr++;
- *data_ptr++ = p_imdct->buf[128-i-1].imag * *window_ptr++ + *delay_ptr++;
- }
-
- /* The trailing edge of the window goes into the delay line */
- delay_ptr = delay;
-
- for(i=0; i< 64; i++) {
- *delay_ptr++ = -p_imdct->buf[64+i].real * *--window_ptr;
- *delay_ptr++ = p_imdct->buf[64-i-1].imag * *--window_ptr;
- }
-
- for(i=0; i<64; i++) {
- *delay_ptr++ = p_imdct->buf[i].imag * *--window_ptr;
- *delay_ptr++ = -p_imdct->buf[128-i-1].real * *--window_ptr;
- }
-}
-
-
-void imdct_do_512_nol_c (imdct_t * p_imdct, float data[], float delay[])
-{
- int i, j;
-
- float tmp_a_i;
- float tmp_a_r;
-
- float *data_ptr;
- float *delay_ptr;
- float *window_ptr;
-
- /* 512 IMDCT with source and dest data in 'data'
- * Pre IFFT complex multiply plus IFFT cmplx conjugate */
-
- for( i=0; i < 128; i++) {
- /* z[i] = (X[256-2*i-1] + j * X[2*i]) * (xcos1[i] + j * xsin1[i]) */
- j = pm128[i];
- /* a = (data[256-2*j-1] - data[2*j]) * (xcos1[j] + xsin1[j]);
- * c = data[2*j] * xcos1[j];
- * b = data[256-2*j-1] * xsin1[j];
- * buf1[i].real = a - b + c;
- * buf1[i].imag = b + c; */
- p_imdct->buf[i].real = (data[256-2*j-1] * p_imdct->xcos1[j]) - (data[2*j] * p_imdct->xsin1[j]);
- p_imdct->buf[i].imag = -1.0 * (data[2*j] * p_imdct->xcos1[j] + data[256-2*j-1] * p_imdct->xsin1[j]);
- }
-
- fft_128p_c (&p_imdct->buf[0]);
-
- /* Post IFFT complex multiply plus IFFT complex conjugate*/
- for (i=0; i < 128; i++) {
- /* y[n] = z[n] * (xcos1[n] + j * xsin1[n]) ;
- * int j1 = i; */
- tmp_a_r = p_imdct->buf[i].real;
- tmp_a_i = p_imdct->buf[i].imag;
- /* a = (tmp_a_r - tmp_a_i) * (xcos1[j] + xsin1[j]);
- * b = tmp_a_r * xsin1[j];
- * c = tmp_a_i * xcos1[j];
- * buf[j].real = a - b + c;
- * buf[j].imag = b + c; */
- p_imdct->buf[i].real =(tmp_a_r * p_imdct->xcos1[i]) + (tmp_a_i * p_imdct->xsin1[i]);
- p_imdct->buf[i].imag =(tmp_a_r * p_imdct->xsin1[i]) - (tmp_a_i * p_imdct->xcos1[i]);
- }
-
- data_ptr = data;
- delay_ptr = delay;
- window_ptr = window;
-
- /* Window and convert to real valued signal, no overlap here*/
- for (i=0; i< 64; i++) {
- *data_ptr++ = -p_imdct->buf[64+i].imag * *window_ptr++;
- *data_ptr++ = p_imdct->buf[64-i-1].real * *window_ptr++;
- }
-
- for(i=0; i< 64; i++) {
- *data_ptr++ = -p_imdct->buf[i].real * *window_ptr++;
- *data_ptr++ = p_imdct->buf[128-i-1].imag * *window_ptr++;
- }
-
- /* The trailing edge of the window goes into the delay line */
- delay_ptr = delay;
-
- for(i=0; i< 64; i++) {
- *delay_ptr++ = -p_imdct->buf[64+i].real * *--window_ptr;
- *delay_ptr++ = p_imdct->buf[64-i-1].imag * *--window_ptr;
- }
-
- for(i=0; i<64; i++) {
- *delay_ptr++ = p_imdct->buf[i].imag * *--window_ptr;
- *delay_ptr++ = -p_imdct->buf[128-i-1].real * *--window_ptr;
- }
-}
+++ /dev/null
-/*****************************************************************************
- * ac3_imdct_sse.c: ac3 DCT
- *****************************************************************************
- * Copyright (C) 1999, 2000 VideoLAN
- * $Id: ac3_imdct_sse.c,v 1.1 2001/05/14 15:58:04 reno Exp $
- *
- * Authors: Renaud Dartus <reno@videolan.org>
- * Aaron Holtzman <aholtzma@engr.uvic.ca>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
- *****************************************************************************/
-
-#include "defs.h"
-
-#include <math.h>
-#include <stdio.h>
-
-#include "config.h"
-#include "common.h"
-#include "threads.h"
-#include "mtime.h"
-
-#include "intf_msg.h" /* intf_DbgMsg(), intf_ErrMsg() */
-
-#include "stream_control.h"
-#include "input_ext-dec.h"
-
-#include "ac3_decoder.h"
-
-#include "ac3_imdct_sse.h"
-
-static const float window[] = {
- 0.00014, 0.00024, 0.00037, 0.00051, 0.00067, 0.00086, 0.00107, 0.00130,
- 0.00157, 0.00187, 0.00220, 0.00256, 0.00297, 0.00341, 0.00390, 0.00443,
- 0.00501, 0.00564, 0.00632, 0.00706, 0.00785, 0.00871, 0.00962, 0.01061,
- 0.01166, 0.01279, 0.01399, 0.01526, 0.01662, 0.01806, 0.01959, 0.02121,
- 0.02292, 0.02472, 0.02662, 0.02863, 0.03073, 0.03294, 0.03527, 0.03770,
- 0.04025, 0.04292, 0.04571, 0.04862, 0.05165, 0.05481, 0.05810, 0.06153,
- 0.06508, 0.06878, 0.07261, 0.07658, 0.08069, 0.08495, 0.08935, 0.09389,
- 0.09859, 0.10343, 0.10842, 0.11356, 0.11885, 0.12429, 0.12988, 0.13563,
- 0.14152, 0.14757, 0.15376, 0.16011, 0.16661, 0.17325, 0.18005, 0.18699,
- 0.19407, 0.20130, 0.20867, 0.21618, 0.22382, 0.23161, 0.23952, 0.24757,
- 0.25574, 0.26404, 0.27246, 0.28100, 0.28965, 0.29841, 0.30729, 0.31626,
- 0.32533, 0.33450, 0.34376, 0.35311, 0.36253, 0.37204, 0.38161, 0.39126,
- 0.40096, 0.41072, 0.42054, 0.43040, 0.44030, 0.45023, 0.46020, 0.47019,
- 0.48020, 0.49022, 0.50025, 0.51028, 0.52031, 0.53033, 0.54033, 0.55031,
- 0.56026, 0.57019, 0.58007, 0.58991, 0.59970, 0.60944, 0.61912, 0.62873,
- 0.63827, 0.64774, 0.65713, 0.66643, 0.67564, 0.68476, 0.69377, 0.70269,
- 0.71150, 0.72019, 0.72877, 0.73723, 0.74557, 0.75378, 0.76186, 0.76981,
- 0.77762, 0.78530, 0.79283, 0.80022, 0.80747, 0.81457, 0.82151, 0.82831,
- 0.83496, 0.84145, 0.84779, 0.85398, 0.86001, 0.86588, 0.87160, 0.87716,
- 0.88257, 0.88782, 0.89291, 0.89785, 0.90264, 0.90728, 0.91176, 0.91610,
- 0.92028, 0.92432, 0.92822, 0.93197, 0.93558, 0.93906, 0.94240, 0.94560,
- 0.94867, 0.95162, 0.95444, 0.95713, 0.95971, 0.96217, 0.96451, 0.96674,
- 0.96887, 0.97089, 0.97281, 0.97463, 0.97635, 0.97799, 0.97953, 0.98099,
- 0.98236, 0.98366, 0.98488, 0.98602, 0.98710, 0.98811, 0.98905, 0.98994,
- 0.99076, 0.99153, 0.99225, 0.99291, 0.99353, 0.99411, 0.99464, 0.99513,
- 0.99558, 0.99600, 0.99639, 0.99674, 0.99706, 0.99736, 0.99763, 0.99788,
- 0.99811, 0.99831, 0.99850, 0.99867, 0.99882, 0.99895, 0.99908, 0.99919,
- 0.99929, 0.99938, 0.99946, 0.99953, 0.99959, 0.99965, 0.99969, 0.99974,
- 0.99978, 0.99981, 0.99984, 0.99986, 0.99988, 0.99990, 0.99992, 0.99993,
- 0.99994, 0.99995, 0.99996, 0.99997, 0.99998, 0.99998, 0.99998, 0.99999,
- 0.99999, 0.99999, 0.99999, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000,
- 1.00000, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000
-};
-
-static const int pm128[128] =
-{
- 0, 16, 32, 48, 64, 80, 96, 112, 8, 40, 72, 104, 24, 56, 88, 120,
- 4, 20, 36, 52, 68, 84, 100, 116, 12, 28, 44, 60, 76, 92, 108, 124,
- 2, 18, 34, 50, 66, 82, 98, 114, 10, 42, 74, 106, 26, 58, 90, 122,
- 6, 22, 38, 54, 70, 86, 102, 118, 14, 46, 78, 110, 30, 62, 94, 126,
- 1, 17, 33, 49, 65, 81, 97, 113, 9, 41, 73, 105, 25, 57, 89, 121,
- 5, 21, 37, 53, 69, 85, 101, 117, 13, 29, 45, 61, 77, 93, 109, 125,
- 3, 19, 35, 51, 67, 83, 99, 115, 11, 43, 75, 107, 27, 59, 91, 123,
- 7, 23, 39, 55, 71, 87, 103, 119, 15, 31, 47, 63, 79, 95, 111, 127
-};
-
-void fft_64p_sse (complex_t *x);
-void fft_128p_sse(complex_t *a);
-static void imdct512_pre_ifft_twiddle_sse (const int *pmt, complex_t *buf, float *data, float *xcos_sin_sse);
-static void imdct512_post_ifft_twiddle_sse (complex_t *buf, float *xcos_sin_sse);
-static void imdct512_window_delay_sse (complex_t *buf, float *data_ptr, float *window_prt, float *delay_prt);
-static void imdct512_window_delay_nol_sse (complex_t *buf, float *data_ptr, float *window_prt, float *delay_prt);
-
-
-int imdct_init_sse (imdct_t * p_imdct)
-{
- int i;
- float scale = 181.019;
-
- intf_WarnMsg (1, "ac3dec: using MMX_SSE for imdct");
- p_imdct->imdct_do_512 = imdct_do_512_sse;
- p_imdct->imdct_do_512_nol = imdct_do_512_nol_sse;
- p_imdct->fft_64p = fft_64p_sse;
-
- for (i=0; i < 128; i++)
- {
- float xcos_i = cos(2.0f * M_PI * (8*i+1)/(8*N)) * scale;
- float xsin_i = sin(2.0f * M_PI * (8*i+1)/(8*N)) * scale;
- p_imdct->xcos_sin_sse[i * 4] = xcos_i;
- p_imdct->xcos_sin_sse[i * 4 + 1] = -xsin_i;
- p_imdct->xcos_sin_sse[i * 4 + 2] = -xsin_i;
- p_imdct->xcos_sin_sse[i * 4 + 3] = -xcos_i;
- }
- return 0;
-}
-
-void imdct_do_512_sse (imdct_t * p_imdct, float data[], float delay[])
-{
- imdct512_pre_ifft_twiddle_sse (pm128, p_imdct->buf, data, p_imdct->xcos_sin_sse);
- fft_128p_sse (p_imdct->buf);
- imdct512_post_ifft_twiddle_sse (p_imdct->buf, p_imdct->xcos_sin_sse);
- imdct512_window_delay_sse (p_imdct->buf, data, window, delay);
-}
-
-
-void imdct_do_512_nol_sse (imdct_t * p_imdct, float data[], float delay[])
-{
- imdct512_pre_ifft_twiddle_sse (pm128, p_imdct->buf, data, p_imdct->xcos_sin_sse);
- fft_128p_sse (p_imdct->buf);
- imdct512_post_ifft_twiddle_sse (p_imdct->buf, p_imdct->xcos_sin_sse);
- imdct512_window_delay_nol_sse (p_imdct->buf, data, window, delay);
-}
-
-static void imdct512_pre_ifft_twiddle_sse (const int *pmt, complex_t *buf, float *data, float *xcos_sin_sse)
-{
- __asm__ __volatile__ (
- "pushl %%ebp\n"
- "movl %%esp, %%ebp\n"
- "addl $-4, %%esp\n" /* local variable, loop counter */
-
- "pushl %%eax\n"
- "pushl %%ebx\n"
- "pushl %%ecx\n"
- "pushl %%edx\n"
- "pushl %%edi\n"
- "pushl %%esi\n"
-
- "movl 8(%%ebp), %%eax\n" /* pmt */
- "movl 12(%%ebp), %%ebx\n" /* buf */
- "movl 16(%%ebp), %%ecx\n" /* data */
- "movl 20(%%ebp), %%edx\n" /* xcos_sin_sse */
- "movl $64, -4(%%ebp)\n"
-
-".loop:\n"
- "movl (%%eax), %%esi\n"
- "movl 4(%%eax), %%edi\n"
- "movss (%%ecx, %%esi, 8), %%xmm1\n" /* 2j */
- "movss (%%ecx, %%edi, 8), %%xmm3\n" /* 2(j+1) */
-
- "shll $1, %%esi\n"
- "shll $1, %%edi\n"
-
- "movups (%%edx, %%esi, 8), %%xmm0\n" /* -c_j | -s_j | -s_j | c_j */
- "movups (%%edx, %%edi, 8), %%xmm2\n" /* -c_j+1 | -s_j+1 | -s_j+1 | c_j+1 */
-
- "negl %%esi\n"
- "negl %%edi\n"
-
- "movss 1020(%%ecx, %%esi, 4), %%xmm4\n" /* 255-2j */
- "addl $8, %%eax\n"
- "movss 1020(%%ecx, %%edi, 4), %%xmm5\n" /* 255-2(j+1) */
-
- "shufps $0, %%xmm1, %%xmm4\n" /* 2j | 2j | 255-2j | 255-2j */
- "shufps $0, %%xmm3, %%xmm5\n" /* 2(j+1) | 2(j+1) | 255-2(j+1) | 255-2(j+1) */
- "mulps %%xmm4, %%xmm0\n"
- "mulps %%xmm5, %%xmm2\n"
- "movhlps %%xmm0, %%xmm1\n"
- "movhlps %%xmm2, %%xmm3\n"
- "addl $16, %%ebx\n"
- "addps %%xmm1, %%xmm0\n"
- "addps %%xmm3, %%xmm2\n"
- "movlhps %%xmm2, %%xmm0\n"
-
- "movups %%xmm0, -16(%%ebx)\n"
- "decl -4(%%ebp)\n"
- "jnz .loop\n"
-
- "popl %%esi\n"
- "popl %%edi\n"
- "popl %%edx\n"
- "popl %%ecx\n"
- "popl %%ebx\n"
- "popl %%eax\n"
-
- "addl $4, %%esp\n"
- "popl %%ebp\n"
- ::);
-}
-
-static void imdct512_post_ifft_twiddle_sse (complex_t *buf, float *xcos_sin_sse)
-{
- __asm__ __volatile__ (
- "pushl %%ecx\n"
- "movl $32, %%ecx\n" /* loop counter */
-
-".loop1:\n"
- "movups (%%eax), %%xmm0\n" /* im1 | re1 | im0 | re0 */
-
- "movups (%%ebx), %%xmm2\n" /* -c | -s | -s | c */
- "movhlps %%xmm0, %%xmm1\n" /* im1 | re1 */
- "movups 16(%%ebx), %%xmm3\n" /* -c1 | -s1 | -s1 | c1 */
-
- "shufps $0x50, %%xmm0, %%xmm0\n" /* im0 | im0 | re0 | re0 */
- "shufps $0x50, %%xmm1, %%xmm1\n" /* im1 | im1 | re1 | re1 */
-
- "movups 16(%%eax), %%xmm4\n" /* im3 | re3 | im2 | re2 */
-
- "shufps $0x27, %%xmm2, %%xmm2\n" /* c | -s | -s | -c */
- "movhlps %%xmm4, %%xmm5\n" /* im3 | re3 */
- "shufps $0x27, %%xmm3, %%xmm3\n" /* c1 | -s1 | -s1 | -c1 */
-
- "movups 32(%%ebx), %%xmm6\n" /* -c2 | -s2 | -s2 | c2 */
- "movups 48(%%ebx), %%xmm7\n" /* -c3 | -s3 | -s3 | c3 */
-
- "shufps $0x50, %%xmm4, %%xmm4\n" /* im2 | im2 | re2 | re2 */
- "shufps $0x50, %%xmm5, %%xmm5\n" /* im3 | im3 | re3 | re3 */
-
- "mulps %%xmm2, %%xmm0\n"
- "mulps %%xmm3, %%xmm1\n"
-
- "shufps $0x27, %%xmm6, %%xmm6\n" /* c2 | -s2 | -s2 | -c2 */
- "shufps $0x27, %%xmm7, %%xmm7\n" /* c3 | -s3 | -s3 | -c3 */
-
- "movhlps %%xmm0, %%xmm2\n"
- "movhlps %%xmm1, %%xmm3\n"
-
- "mulps %%xmm6, %%xmm4\n"
- "mulps %%xmm7, %%xmm5\n"
-
- "addps %%xmm2, %%xmm0\n"
- "addps %%xmm3, %%xmm1\n"
-
- "movhlps %%xmm4, %%xmm6\n"
- "movhlps %%xmm5, %%xmm7\n"
-
- "addps %%xmm6, %%xmm4\n"
- "addps %%xmm7, %%xmm5\n"
-
- "movlhps %%xmm1, %%xmm0\n"
- "movlhps %%xmm5, %%xmm4\n"
-
- "movups %%xmm0, (%%eax)\n"
- "movups %%xmm4, 16(%%eax)\n"
- "addl $64, %%ebx\n"
- "addl $32, %%eax\n"
- "decl %%ecx\n"
- "jnz .loop1\n"
-
- "popl %%ecx\n"
- : "=a" (buf)
- : "a" (buf), "b" (xcos_sin_sse) );
-}
-
-static void imdct512_window_delay_sse (complex_t *buf, float *data_ptr, float *window_prt, float *delay_prt)
-{
- __asm__ __volatile__ (
- "pushl %%ebp\n"
- "movl %%esp, %%ebp\n"
-
- "pushl %%eax\n"
- "pushl %%ebx\n"
- "pushl %%ecx\n"
- "pushl %%edx\n"
- "pushl %%esi\n"
- "pushl %%edi\n"
-
- "movl 20(%%ebp), %%ebx\n" /* delay */
- "movl 16(%%ebp), %%edx\n" /* window */
-
- "movl 8(%%ebp), %%eax\n" /* buf */
- "movl $16, %%ecx\n" /* loop count */
- "leal 516(%%eax), %%esi\n" /* buf[64].im */
- "leal 504(%%eax), %%edi\n" /* buf[63].re */
- "movl 12(%%ebp), %%eax\n" /* data */
-
-".first_128_samples:\n"
- "movss (%%esi), %%xmm0\n"
- "movss 8(%%esi), %%xmm2\n"
- "movss (%%edi), %%xmm1\n"
- "movss -8(%%edi), %%xmm3\n"
-
- "movlhps %%xmm2, %%xmm0\n" /* 0.0 | im1 | 0.0 | im0 */
- "movlhps %%xmm3, %%xmm1\n" /* 0.0 | re1 | 0.0 | re0 */
-
- "movups (%%edx), %%xmm4\n" /* w3 | w2 | w1 | w0 */
- "movups (%%ebx), %%xmm5\n" /* d3 | d2 | d1 | d0 */
- "shufps $0xb1, %%xmm1, %%xmm1\n"/* re1 | 0.0 | re0 | 0.0 */
-
- "movss 16(%%esi), %%xmm6\n" /* im2 */
- "movss 24(%%esi), %%xmm7\n" /* im3 */
- "subps %%xmm1, %%xmm0\n" /* -re1 | im1 | -re0 | im0 */
- "movss -16(%%edi), %%xmm2\n" /* re2 */
- "movss -24(%%edi), %%xmm3\n" /* re3 */
- "mulps %%xmm4, %%xmm0\n"
- "movlhps %%xmm7, %%xmm6\n" /* 0.0 | im3 | 0.0 | im2 */
- "movlhps %%xmm3, %%xmm2\n" /* 0.0 | re3 | 0.0 | re2 */
- "addps %%xmm5, %%xmm0\n"
- "shufps $0xb1, %%xmm2, %%xmm2\n"/* re3 | 0.0 | re2 | 0.0 */
- "movups 16(%%edx), %%xmm4\n" /* w7 | w6 | w5 | w4 */
- "movups 16(%%ebx), %%xmm5\n" /* d7 | d6 | d5 | d4 */
- "subps %%xmm2, %%xmm6\n" /* -re3 | im3 | -re2 | im2 */
- "addl $32, %%edx\n"
- "movups %%xmm0, (%%eax)\n"
- "addl $32, %%ebx\n"
- "mulps %%xmm4, %%xmm6\n"
- "addl $32, %%esi\n"
- "addl $32, %%eax\n"
- "addps %%xmm5, %%xmm6\n"
- "addl $-32, %%edi\n"
- "movups %%xmm6, -16(%%eax)\n"
- "decl %%ecx\n"
- "jnz .first_128_samples\n"
-
- "movl 8(%%ebp), %%esi\n" /* buf[0].re */
- "leal 1020(%%esi), %%edi\n" /* buf[127].im */
- "movl $16, %%ecx\n" /* loop count */
-
-".second_128_samples:\n"
- "movss (%%esi), %%xmm0\n" /* buf[i].re */
- "movss 8(%%esi), %%xmm2\n" /* re1 */
- "movss (%%edi), %%xmm1\n" /* buf[127-i].im */
- "movss -8(%%edi), %%xmm3\n" /* im1 */
-
- "movlhps %%xmm2, %%xmm0\n" /* 0.0 | re1 | 0.0 | re0 */
- "movlhps %%xmm3, %%xmm1\n" /* 0.0 | im1 | 0.0 | im1 */
-
- "movups (%%edx), %%xmm4\n" /* w3 | w2 | w1 | w0 */
- "movups (%%ebx), %%xmm5\n" /* d3 | d2 | d1 | d0 */
-
- "shufps $0xb1, %%xmm1, %%xmm1\n"/* im1 | 0.0 | im0 | 0.0 */
- "movss 16(%%esi), %%xmm6\n" /* re2 */
- "movss 24(%%esi), %%xmm7\n" /* re3 */
- "movss -16(%%edi), %%xmm2\n" /* im2 */
- "movss -24(%%edi), %%xmm3\n" /* im3 */
- "subps %%xmm1, %%xmm0\n" /* -im1 | re1 | -im0 | re0 */
- "movlhps %%xmm7, %%xmm6\n" /* 0.0 | re3 | 0.0 | re2 */
- "movlhps %%xmm3, %%xmm2\n" /* 0.0 | im3 | 0.0 | im2 */
- "mulps %%xmm4, %%xmm0\n"
- "shufps $0xb1, %%xmm2, %%xmm2\n"/* im3 | 0.0 | im2 | 0.0 */
- "movups 16(%%edx), %%xmm4\n" /* w7 | w6 | w5 | w4 */
- "addl $32, %%esi\n"
- "subps %%xmm2, %%xmm6\n" /* -im3 | re3 | -im2 | re2 */
- "addps %%xmm5, %%xmm0\n"
- "mulps %%xmm4, %%xmm6\n"
- "addl $-32, %%edi\n"
- "movups 16(%%ebx), %%xmm5\n" /* d7 | d6 | d5 | d4 */
- "movups %%xmm0, (%%eax)\n"
- "addps %%xmm5, %%xmm6\n"
- "addl $32, %%edx\n"
- "addl $32, %%eax\n"
- "addl $32, %%ebx\n"
- "movups %%xmm6, -16(%%eax)\n"
- "decl %%ecx\n"
- "jnz .second_128_samples\n"
-
- "movl 8(%%ebp), %%eax\n"
- "leal 512(%%eax), %%esi\n" /* buf[64].re */
- "leal 508(%%eax), %%edi\n" /* buf[63].im */
- "movl $16, %%ecx\n" /* loop count */
- "movl 20(%%ebp), %%eax\n" /* delay */
-
-".first_128_delay:\n"
- "movss (%%esi), %%xmm0\n"
- "movss 8(%%esi), %%xmm2\n"
- "movss (%%edi), %%xmm1\n"
- "movss -8(%%edi), %%xmm3\n"
-
- "movlhps %%xmm2, %%xmm0\n" /* 0.0 | re1 | 0.0 | re0 */
- "movlhps %%xmm3, %%xmm1\n" /* 0.0 | im1 | 0.0 | im0 */
-
- "movups -16(%%edx), %%xmm4\n" /* w3 | w2 | w1 | w0 */
- "shufps $0xb1, %%xmm1, %%xmm1\n"/* im1 | 0.0 | im0 | 0.0 */
- "movss 16(%%esi), %%xmm6\n" /* re2 */
- "movss 24(%%esi), %%xmm7\n" /* re3 */
- "movss -16(%%edi), %%xmm2\n" /* im2 */
- "movss -24(%%edi), %%xmm3\n" /* im3 */
- "subps %%xmm1, %%xmm0\n" /* -im1 | re1 | -im0 | re0 */
- "addl $-32, %%edx\n"
- "movlhps %%xmm7, %%xmm6\n" /* 0.0 | re3 | 0.0 | re2 */
- "movlhps %%xmm3, %%xmm2\n" /* 0.0 | im3 | 0.0 | im2 */
- "mulps %%xmm4, %%xmm0\n"
- "movups (%%edx), %%xmm5\n" /* w7 | w6 | w5 | w4 */
- "shufps $0xb1, %%xmm2, %%xmm2\n"/* im3 | 0.0 | im2 | 0.0 */
- "movups %%xmm0, (%%eax)\n"
- "addl $32, %%esi\n"
- "subps %%xmm2, %%xmm6\n" /* -im3 | re3 | -im2 | re2 */
- "addl $-32, %%edi\n"
- "mulps %%xmm5, %%xmm6\n"
- "addl $32, %%eax\n"
- "movups %%xmm6, -16(%%eax)\n"
- "decl %%ecx\n"
- "jnz .first_128_delay\n"
-
- "movl 8(%%ebp), %%ebx\n"
- "leal 4(%%ebx), %%esi\n" /* buf[0].im */
- "leal 1016(%%ebx), %%edi\n" /* buf[127].re */
- "movl $16, %%ecx\n" /* loop count */
-
-".second_128_delay:\n"
- "movss (%%esi), %%xmm0\n"
- "movss 8(%%esi), %%xmm2\n"
- "movss (%%edi), %%xmm1\n"
- "movss -8(%%edi), %%xmm3\n"
-
- "movlhps %%xmm2, %%xmm0\n" /* 0.0 | im1 | 0.0 | im0 */
- "movlhps %%xmm3, %%xmm1\n" /* 0.0 | re1 | 0.0 | re0 */
-
- "movups -16(%%edx), %%xmm4\n" /* w3 | w2 | w1 | w0 */
- "shufps $0xb1, %%xmm1, %%xmm1\n"/* re1 | 0.0 | re0 | 0.0 */
- "movss 16(%%esi), %%xmm6\n" /* im2 */
- "movss 24(%%esi), %%xmm7\n" /* im3 */
- "movss -16(%%edi), %%xmm2\n" /* re2 */
- "movss -24(%%edi), %%xmm3\n" /* re3 */
- "subps %%xmm0, %%xmm1\n" /* re1 | -im1 | re0 | -im0 */
- "addl $-32, %%edx\n"
- "movlhps %%xmm7, %%xmm6\n" /* 0.0 | im3 | 0.0 | im2 */
- "movlhps %%xmm3, %%xmm2\n" /* 0.0 | re3 | 0.0 | re2 */
- "mulps %%xmm4, %%xmm1\n"
- "movups (%%edx), %%xmm5\n" /* w7 | w6 | w5 | w4 */
- "shufps $0xb1, %%xmm2, %%xmm2\n"/* re3 | 0.0 | re2 | 0.0 */
- "movups %%xmm1, (%%eax)\n"
- "addl $32, %%esi\n"
- "subps %%xmm6, %%xmm2\n" /* re | -im3 | re | -im2 */
- "addl $-32, %%edi\n"
- "mulps %%xmm5, %%xmm2\n"
- "addl $32, %%eax\n"
- "movups %%xmm2, -16(%%eax)\n"
- "decl %%ecx\n"
- "jnz .second_128_delay\n"
-
- "popl %%edi\n"
- "popl %%esi\n"
- "popl %%edx\n"
- "popl %%ecx\n"
- "popl %%ebx\n"
- "popl %%eax\n"
-
- "leave\n"
- ::);
-}
-
-static void imdct512_window_delay_nol_sse (complex_t *buf, float *data_ptr, float *window_prt, float *delay_prt)
-{
- __asm__ __volatile__ (
- "pushl %%ebp\n"
- "movl %%esp, %%ebp\n"
-
- "pushl %%eax\n"
- "pushl %%ebx\n"
- "pushl %%ecx\n"
- "pushl %%edx\n"
- "pushl %%esi\n"
- "pushl %%edi\n"
-
- /* movl 20(%%ebp), %%ebx delay */
- "movl 16(%%ebp), %%edx\n" /* window */
-
- "movl 8(%%ebp), %%eax\n" /* buf */
- "movl $16, %%ecx\n" /* loop count */
- "leal 516(%%eax), %%esi\n" /* buf[64].im */
- "leal 504(%%eax), %%edi\n" /* buf[63].re */
- "movl 12(%%ebp), %%eax\n" /* data */
-
-".first_128_sample:\n"
- "movss (%%esi), %%xmm0\n"
- "movss 8(%%esi), %%xmm2\n"
- "movss (%%edi), %%xmm1\n"
- "movss -8(%%edi), %%xmm3\n"
-
- "movlhps %%xmm2, %%xmm0\n" /* 0.0 | im1 | 0.0 | im0 */
- "movlhps %%xmm3, %%xmm1\n" /* 0.0 | re1 | 0.0 | re0 */
-
- "movups (%%edx), %%xmm4\n" /* w3 | w2 | w1 | w0 */
- /* movups (%%ebx), %%xmm5 d3 | d2 | d1 | d0 */
- "shufps $0xb1, %%xmm1, %%xmm1\n"/* re1 | 0.0 | re0 | 0.0 */
-
- "movss 16(%%esi), %%xmm6\n" /* im2 */
- "movss 24(%%esi), %%xmm7\n" /* im3 */
- "subps %%xmm1, %%xmm0\n" /* -re1 | im1 | -re0 | im0 */
- "movss -16(%%edi), %%xmm2\n" /* re2 */
- "movss -24(%%edi), %%xmm3\n" /* re3 */
- "mulps %%xmm4, %%xmm0\n"
- "movlhps %%xmm7, %%xmm6\n" /* 0.0 | im3 | 0.0 | im2 */
- "movlhps %%xmm3, %%xmm2\n" /* 0.0 | re3 | 0.0 | re2 */
- /* addps %%xmm5, %%xmm0 */
- "shufps $0xb1, %%xmm2, %%xmm2\n"/* re3 | 0.0 | re2 | 0.0 */
- "movups 16(%%edx), %%xmm4\n" /* w7 | w6 | w5 | w4 */
- /* movups 16(%%ebx), %%xmm5 d7 | d6 | d5 | d4 */
- "subps %%xmm2, %%xmm6\n" /* -re3 | im3 | -re2 | im2 */
- "addl $32, %%edx\n"
- "movups %%xmm0, (%%eax)\n"
- /* addl $32, %%ebx */
- "mulps %%xmm4, %%xmm6\n"
- "addl $32, %%esi\n"
- "addl $32, %%eax\n"
- /* addps %%xmm5, %%xmm6 */
- "addl $-32, %%edi\n"
- "movups %%xmm6, -16(%%eax)\n"
- "decl %%ecx\n"
- "jnz .first_128_sample\n"
-
- "movl 8(%%ebp), %%esi\n" /* buf[0].re */
- "leal 1020(%%esi), %%edi\n" /* buf[127].im */
- "movl $16, %%ecx\n" /* loop count */
-
-".second_128_sample:\n"
- "movss (%%esi), %%xmm0\n" /* buf[i].re */
- "movss 8(%%esi), %%xmm2\n" /* re1 */
- "movss (%%edi), %%xmm1\n" /* buf[127-i].im */
- "movss -8(%%edi), %%xmm3\n" /* im1 */
-
- "movlhps %%xmm2, %%xmm0\n" /* 0.0 | re1 | 0.0 | re0 */
- "movlhps %%xmm3, %%xmm1\n" /* 0.0 | im1 | 0.0 | im1 */
-
- "movups (%%edx), %%xmm4\n" /* w3 | w2 | w1 | w0 */
- /* movups (%%ebx), %%xmm5 d3 | d2 | d1 | d0 */
-
- "shufps $0xb1, %%xmm1, %%xmm1\n"/* im1 | 0.0 | im0 | 0.0 */
- "movss 16(%%esi), %%xmm6\n" /* re2 */
- "movss 24(%%esi), %%xmm7\n" /* re3 */
- "movss -16(%%edi), %%xmm2\n" /* im2 */
- "movss -24(%%edi), %%xmm3\n" /* im3 */
- "subps %%xmm1, %%xmm0\n" /* -im1 | re1 | -im0 | re0 */
- "movlhps %%xmm7, %%xmm6\n" /* 0.0 | re3 | 0.0 | re2 */
- "movlhps %%xmm3, %%xmm2\n" /* 0.0 | im3 | 0.0 | im2 */
- "mulps %%xmm4, %%xmm0\n"
- "shufps $0xb1, %%xmm2, %%xmm2\n"/* im3 | 0.0 | im2 | 0.0 */
- "movups 16(%%edx), %%xmm4\n" /* w7 | w6 | w5 | w4 */
- "addl $32, %%esi\n"
- "subps %%xmm2, %%xmm6\n" /* -im3 | re3 | -im2 | re2 */
- /* addps %%xmm5, %%xmm0 */
- "mulps %%xmm4, %%xmm6\n"
- "addl $-32, %%edi\n"
- /* movups 16(%%ebx), %%xmm5 d7 | d6 | d5 | d4 */
- "movups %%xmm0, (%%eax)\n"
- /* addps %%xmm5, %%xmm6 */
- "addl $32, %%edx\n"
- "addl $32, %%eax\n"
- /* addl $32, %%ebx */
- "movups %%xmm6, -16(%%eax)\n"
- "decl %%ecx\n"
- "jnz .second_128_sample\n"
-
- "movl 8(%%ebp), %%eax\n"
- "leal 512(%%eax), %%esi\n" /* buf[64].re */
- "leal 508(%%eax), %%edi\n" /* buf[63].im */
- "movl $16, %%ecx\n" /* loop count */
- "movl 20(%%ebp), %%eax\n" /* delay */
-
-".first_128_delays:\n"
- "movss (%%esi), %%xmm0\n"
- "movss 8(%%esi), %%xmm2\n"
- "movss (%%edi), %%xmm1\n"
- "movss -8(%%edi), %%xmm3\n"
-
- "movlhps %%xmm2, %%xmm0\n" /* 0.0 | re1 | 0.0 | re0 */
- "movlhps %%xmm3, %%xmm1\n" /* 0.0 | im1 | 0.0 | im0 */
-
- "movups -16(%%edx), %%xmm4\n" /* w3 | w2 | w1 | w0 */
- "shufps $0xb1, %%xmm1, %%xmm1\n"/* im1 | 0.0 | im0 | 0.0 */
- "movss 16(%%esi), %%xmm6\n" /* re2 */
- "movss 24(%%esi), %%xmm7\n" /* re3 */
- "movss -16(%%edi), %%xmm2\n" /* im2 */
- "movss -24(%%edi), %%xmm3\n" /* im3 */
- "subps %%xmm1, %%xmm0\n" /* -im1 | re1 | -im0 | re0 */
- "addl $-32, %%edx\n"
- "movlhps %%xmm7, %%xmm6\n" /* 0.0 | re3 | 0.0 | re2 */
- "movlhps %%xmm3, %%xmm2\n" /* 0.0 | im3 | 0.0 | im2 */
- "mulps %%xmm4, %%xmm0\n"
- "movups (%%edx), %%xmm5\n" /* w7 | w6 | w5 | w4 */
- "shufps $0xb1, %%xmm2, %%xmm2\n"/* im3 | 0.0 | im2 | 0.0 */
- "movups %%xmm0, (%%eax)\n"
- "addl $32, %%esi\n"
- "subps %%xmm2, %%xmm6\n" /* -im3 | re3 | -im2 | re2 */
- "addl $-32, %%edi\n"
- "mulps %%xmm5, %%xmm6\n"
- "addl $32, %%eax\n"
- "movups %%xmm6, -16(%%eax)\n"
- "decl %%ecx\n"
- "jnz .first_128_delays\n"
-
- "movl 8(%%ebp), %%ebx\n"
- "leal 4(%%ebx), %%esi\n" /* buf[0].im */
- "leal 1016(%%ebx), %%edi\n" /* buf[127].re */
- "movl $16, %%ecx\n" /* loop count */
-
-".second_128_delays:\n"
- "movss (%%esi), %%xmm0\n"
- "movss 8(%%esi), %%xmm2\n"
- "movss (%%edi), %%xmm1\n"
- "movss -8(%%edi), %%xmm3\n"
-
- "movlhps %%xmm2, %%xmm0\n" /* 0.0 | im1 | 0.0 | im0 */
- "movlhps %%xmm3, %%xmm1\n" /* 0.0 | re1 | 0.0 | re0 */
-
- "movups -16(%%edx), %%xmm4\n" /* w3 | w2 | w1 | w0 */
- "shufps $0xb1, %%xmm1, %%xmm1\n"/* re1 | 0.0 | re0 | 0.0 */
- "movss 16(%%esi), %%xmm6\n" /* im2 */
- "movss 24(%%esi), %%xmm7\n" /* im3 */
- "movss -16(%%edi), %%xmm2\n" /* re2 */
- "movss -24(%%edi), %%xmm3\n" /* re3 */
- "subps %%xmm0, %%xmm1\n" /* re1 | -im1 | re0 | -im0 */
- "addl $-32, %%edx\n"
- "movlhps %%xmm7, %%xmm6\n" /* 0.0 | im3 | 0.0 | im2 */
- "movlhps %%xmm3, %%xmm2\n" /* 0.0 | re3 | 0.0 | re2 */
- "mulps %%xmm4, %%xmm1\n"
- "movups (%%edx), %%xmm5\n" /* w7 | w6 | w5 | w4 */
- "shufps $0xb1, %%xmm2, %%xmm2\n"/* re3 | 0.0 | re2 | 0.0 */
- "movups %%xmm1, (%%eax)\n"
- "addl $32, %%esi\n"
- "subps %%xmm6, %%xmm2\n" /* re | -im3 | re | -im2 */
- "addl $-32, %%edi\n"
- "mulps %%xmm5, %%xmm2\n"
- "addl $32, %%eax\n"
- "movups %%xmm2, -16(%%eax)\n"
- "decl %%ecx\n"
- "jnz .second_128_delays\n"
-
- "popl %%edi\n"
- "popl %%esi\n"
- "popl %%edx\n"
- "popl %%ecx\n"
- "popl %%ebx\n"
- "popl %%eax\n"
-
- "leave\n"
- ::);
-}
+++ /dev/null
-int imdct_init_sse (imdct_t * p_imdct);
-void imdct_do_512_sse(imdct_t * p_imdct, float data[], float delay[]);
-void imdct_do_512_nol_sse(imdct_t * p_imdct, float data[], float delay[]);
* ac3_internals.h: needed by the ac3 decoder
*****************************************************************************
* Copyright (C) 2000 VideoLAN
- * $Id: ac3_internal.h,v 1.9 2001/05/14 15:58:04 reno Exp $
+ * $Id: ac3_internal.h,v 1.10 2001/05/15 16:19:42 sam Exp $
*
* Authors: Michel Lespinasse <walken@zoy.org>
*
/* ac3_bit_allocate.c */
void bit_allocate (ac3dec_t *);
-/* ac3_downmix.c */
-void downmix_init (downmix_t * p_downmix);
-
/* ac3_exponent.c */
int exponent_unpack (ac3dec_t *);
/* ac3_rematrix.c */
void rematrix (ac3dec_t *);
+
* ac3_mantissa.c: ac3 mantissa computation
*****************************************************************************
* Copyright (C) 1999, 2000, 2001 VideoLAN
- * $Id: ac3_mantissa.c,v 1.28 2001/05/14 15:58:04 reno Exp $
+ * $Id: ac3_mantissa.c,v 1.29 2001/05/15 16:19:42 sam Exp $
*
* Authors: Michel Kaempf <maxx@via.ecp.fr>
* Aaron Holtzman <aholtzma@engr.uvic.ca>
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
*****************************************************************************/
+/*****************************************************************************
+ * Preamble
+ *****************************************************************************/
#include "defs.h"
#include <string.h> /* memcpy() */
#include "threads.h"
#include "mtime.h"
+#include "intf_msg.h"
+
#include "stream_control.h"
#include "input_ext-dec.h"
#include "audio_output.h"
+#include "ac3_imdct.h"
+#include "ac3_downmix.h"
#include "ac3_decoder.h"
-#include "intf_msg.h"
-
-#define Q0 ((-2 << 15) / 3.0)
-#define Q1 (0)
-#define Q2 ((2 << 15) / 3.0)
-static const float q_1_0[ 32 ] =
-{
- Q0, Q0, Q0, Q0, Q0, Q0, Q0, Q0, Q0,
- Q1, Q1, Q1, Q1, Q1, Q1, Q1, Q1, Q1,
- Q2, Q2, Q2, Q2, Q2, Q2, Q2, Q2, Q2,
- 0, 0, 0, 0, 0
-};
-static const float q_1_1[ 32 ] =
-{
- Q0, Q0, Q0, Q1, Q1, Q1, Q2, Q2, Q2,
- Q0, Q0, Q0, Q1, Q1, Q1, Q2, Q2, Q2,
- Q0, Q0, Q0, Q1, Q1, Q1, Q2, Q2, Q2,
- 0, 0, 0, 0, 0
-};
-static const float q_1_2[ 32 ] =
-{
- Q0, Q1, Q2, Q0, Q1, Q2, Q0, Q1, Q2,
- Q0, Q1, Q2, Q0, Q1, Q2, Q0, Q1, Q2,
- Q0, Q1, Q2, Q0, Q1, Q2, Q0, Q1, Q2,
- 0, 0, 0, 0, 0
-};
-#undef Q0
-#undef Q1
-#undef Q2
-
-#define Q0 ((-4 << 15) / 5.0)
-#define Q1 ((-2 << 15) / 5.0)
-#define Q2 (0)
-#define Q3 ((2 << 15) / 5.0)
-#define Q4 ((4 << 15) / 5.0)
-static const float q_2_0[ 128 ] =
-{
- Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,
- Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,
- Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,
- Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,
- Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,
- 0, 0, 0
-};
-static const float q_2_1[ 128 ] =
-{
- Q0,Q0,Q0,Q0,Q0,Q1,Q1,Q1,Q1,Q1,Q2,Q2,Q2,Q2,Q2,Q3,Q3,Q3,Q3,Q3,Q4,Q4,Q4,Q4,Q4,
- Q0,Q0,Q0,Q0,Q0,Q1,Q1,Q1,Q1,Q1,Q2,Q2,Q2,Q2,Q2,Q3,Q3,Q3,Q3,Q3,Q4,Q4,Q4,Q4,Q4,
- Q0,Q0,Q0,Q0,Q0,Q1,Q1,Q1,Q1,Q1,Q2,Q2,Q2,Q2,Q2,Q3,Q3,Q3,Q3,Q3,Q4,Q4,Q4,Q4,Q4,
- Q0,Q0,Q0,Q0,Q0,Q1,Q1,Q1,Q1,Q1,Q2,Q2,Q2,Q2,Q2,Q3,Q3,Q3,Q3,Q3,Q4,Q4,Q4,Q4,Q4,
- Q0,Q0,Q0,Q0,Q0,Q1,Q1,Q1,Q1,Q1,Q2,Q2,Q2,Q2,Q2,Q3,Q3,Q3,Q3,Q3,Q4,Q4,Q4,Q4,Q4,
- 0, 0, 0
-};
-static const float q_2_2[ 128 ] =
-{
- Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,
- Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,
- Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,
- Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,
- Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,
- 0, 0, 0
-};
-#undef Q0
-#undef Q1
-#undef Q2
-#undef Q3
-#undef Q4
-
-#define Q0 ((-10 << 15) / 11.0)
-#define Q1 ((-8 << 15) / 11.0)
-#define Q2 ((-6 << 15) / 11.0)
-#define Q3 ((-4 << 15) / 11.0)
-#define Q4 ((-2 << 15) / 11.0)
-#define Q5 (0)
-#define Q6 ((2 << 15) / 11.0)
-#define Q7 ((4 << 15) / 11.0)
-#define Q8 ((6 << 15) / 11.0)
-#define Q9 ((8 << 15) / 11.0)
-#define QA ((10 << 15) / 11.0)
-static const float q_4_0[ 128 ] =
-{
- Q0, Q0, Q0, Q0, Q0, Q0, Q0, Q0, Q0, Q0, Q0,
- Q1, Q1, Q1, Q1, Q1, Q1, Q1, Q1, Q1, Q1, Q1,
- Q2, Q2, Q2, Q2, Q2, Q2, Q2, Q2, Q2, Q2, Q2,
- Q3, Q3, Q3, Q3, Q3, Q3, Q3, Q3, Q3, Q3, Q3,
- Q4, Q4, Q4, Q4, Q4, Q4, Q4, Q4, Q4, Q4, Q4,
- Q5, Q5, Q5, Q5, Q5, Q5, Q5, Q5, Q5, Q5, Q5,
- Q6, Q6, Q6, Q6, Q6, Q6, Q6, Q6, Q6, Q6, Q6,
- Q7, Q7, Q7, Q7, Q7, Q7, Q7, Q7, Q7, Q7, Q7,
- Q8, Q8, Q8, Q8, Q8, Q8, Q8, Q8, Q8, Q8, Q8,
- Q9, Q9, Q9, Q9, Q9, Q9, Q9, Q9, Q9, Q9, Q9,
- QA, QA, QA, QA, QA, QA, QA, QA, QA, QA, QA,
- 0, 0, 0, 0, 0, 0, 0
-};
-static const float q_4_1[ 128 ] =
-{
- Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA,
- Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA,
- Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA,
- Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA,
- Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA,
- Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA,
- Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA,
- Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA,
- Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA,
- Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA,
- Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA,
- 0, 0, 0, 0, 0, 0, 0
-};
-#undef Q0
-#undef Q1
-#undef Q2
-#undef Q3
-#undef Q4
-#undef Q5
-#undef Q6
-#undef Q7
-#undef Q8
-#undef Q9
-#undef QA
-
-/* Lookup tables of 0.16 two's complement quantization values */
-
-static const float q_3[8] =
-{
- (-6 << 15)/7.0, (-4 << 15)/7.0, (-2 << 15)/7.0,
- 0 , (2 << 15)/7.0, (4 << 15)/7.0,
- (6 << 15)/7.0, 0
-};
-
-static const float q_5[16] =
-{
- (-14 << 15)/15.0, (-12 << 15)/15.0, (-10 << 15)/15.0,
- (-8 << 15)/15.0, (-6 << 15)/15.0, (-4 << 15)/15.0,
- (-2 << 15)/15.0, 0 , (2 << 15)/15.0,
- (4 << 15)/15.0, (6 << 15)/15.0, (8 << 15)/15.0,
- (10 << 15)/15.0, (12 << 15)/15.0, (14 << 15)/15.0,
- 0
-};
-
-/* Conversion from bap to number of bits in the mantissas
- * zeros account for cases 0,1,2,4 which are special cased */
-static const u16 qnttztab[16] =
-{
- 0, 0, 0, 3, 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16
-};
-
-static const float scale_factor[25] =
-{
- 6.10351562500000000000000000e-05,
- 3.05175781250000000000000000e-05,
- 1.52587890625000000000000000e-05,
- 7.62939453125000000000000000e-06,
- 3.81469726562500000000000000e-06,
- 1.90734863281250000000000000e-06,
- 9.53674316406250000000000000e-07,
- 4.76837158203125000000000000e-07,
- 2.38418579101562500000000000e-07,
- 1.19209289550781250000000000e-07,
- 5.96046447753906250000000000e-08,
- 2.98023223876953125000000000e-08,
- 1.49011611938476562500000000e-08,
- 7.45058059692382812500000000e-09,
- 3.72529029846191406250000000e-09,
- 1.86264514923095703125000000e-09,
- 9.31322574615478515625000000e-10,
- 4.65661287307739257812500000e-10,
- 2.32830643653869628906250000e-10,
- 1.16415321826934814453125000e-10,
- 5.82076609134674072265625000e-11,
- 2.91038304567337036132812500e-11,
- 1.45519152283668518066406250e-11,
- 7.27595761418342590332031250e-12,
- 3.63797880709171295166015625e-12,
-};
-
-static const u16 dither_lut[256] =
-{
- 0x0000, 0xa011, 0xe033, 0x4022, 0x6077, 0xc066, 0x8044, 0x2055,
- 0xc0ee, 0x60ff, 0x20dd, 0x80cc, 0xa099, 0x0088, 0x40aa, 0xe0bb,
- 0x21cd, 0x81dc, 0xc1fe, 0x61ef, 0x41ba, 0xe1ab, 0xa189, 0x0198,
- 0xe123, 0x4132, 0x0110, 0xa101, 0x8154, 0x2145, 0x6167, 0xc176,
- 0x439a, 0xe38b, 0xa3a9, 0x03b8, 0x23ed, 0x83fc, 0xc3de, 0x63cf,
- 0x8374, 0x2365, 0x6347, 0xc356, 0xe303, 0x4312, 0x0330, 0xa321,
- 0x6257, 0xc246, 0x8264, 0x2275, 0x0220, 0xa231, 0xe213, 0x4202,
- 0xa2b9, 0x02a8, 0x428a, 0xe29b, 0xc2ce, 0x62df, 0x22fd, 0x82ec,
- 0x8734, 0x2725, 0x6707, 0xc716, 0xe743, 0x4752, 0x0770, 0xa761,
- 0x47da, 0xe7cb, 0xa7e9, 0x07f8, 0x27ad, 0x87bc, 0xc79e, 0x678f,
- 0xa6f9, 0x06e8, 0x46ca, 0xe6db, 0xc68e, 0x669f, 0x26bd, 0x86ac,
- 0x6617, 0xc606, 0x8624, 0x2635, 0x0660, 0xa671, 0xe653, 0x4642,
- 0xc4ae, 0x64bf, 0x249d, 0x848c, 0xa4d9, 0x04c8, 0x44ea, 0xe4fb,
- 0x0440, 0xa451, 0xe473, 0x4462, 0x6437, 0xc426, 0x8404, 0x2415,
- 0xe563, 0x4572, 0x0550, 0xa541, 0x8514, 0x2505, 0x6527, 0xc536,
- 0x258d, 0x859c, 0xc5be, 0x65af, 0x45fa, 0xe5eb, 0xa5c9, 0x05d8,
- 0xae79, 0x0e68, 0x4e4a, 0xee5b, 0xce0e, 0x6e1f, 0x2e3d, 0x8e2c,
- 0x6e97, 0xce86, 0x8ea4, 0x2eb5, 0x0ee0, 0xaef1, 0xeed3, 0x4ec2,
- 0x8fb4, 0x2fa5, 0x6f87, 0xcf96, 0xefc3, 0x4fd2, 0x0ff0, 0xafe1,
- 0x4f5a, 0xef4b, 0xaf69, 0x0f78, 0x2f2d, 0x8f3c, 0xcf1e, 0x6f0f,
- 0xede3, 0x4df2, 0x0dd0, 0xadc1, 0x8d94, 0x2d85, 0x6da7, 0xcdb6,
- 0x2d0d, 0x8d1c, 0xcd3e, 0x6d2f, 0x4d7a, 0xed6b, 0xad49, 0x0d58,
- 0xcc2e, 0x6c3f, 0x2c1d, 0x8c0c, 0xac59, 0x0c48, 0x4c6a, 0xec7b,
- 0x0cc0, 0xacd1, 0xecf3, 0x4ce2, 0x6cb7, 0xcca6, 0x8c84, 0x2c95,
- 0x294d, 0x895c, 0xc97e, 0x696f, 0x493a, 0xe92b, 0xa909, 0x0918,
- 0xe9a3, 0x49b2, 0x0990, 0xa981, 0x89d4, 0x29c5, 0x69e7, 0xc9f6,
- 0x0880, 0xa891, 0xe8b3, 0x48a2, 0x68f7, 0xc8e6, 0x88c4, 0x28d5,
- 0xc86e, 0x687f, 0x285d, 0x884c, 0xa819, 0x0808, 0x482a, 0xe83b,
- 0x6ad7, 0xcac6, 0x8ae4, 0x2af5, 0x0aa0, 0xaab1, 0xea93, 0x4a82,
- 0xaa39, 0x0a28, 0x4a0a, 0xea1b, 0xca4e, 0x6a5f, 0x2a7d, 0x8a6c,
- 0x4b1a, 0xeb0b, 0xab29, 0x0b38, 0x2b6d, 0x8b7c, 0xcb5e, 0x6b4f,
- 0x8bf4, 0x2be5, 0x6bc7, 0xcbd6, 0xeb83, 0x4b92, 0x0bb0, 0xaba1
-};
-
-static __inline__ u16 dither_gen (mantissa_t * p_mantissa)
-{
- s16 state;
-
- state = dither_lut[p_mantissa->lfsr_state >> 8] ^
- (p_mantissa->lfsr_state << 8);
- p_mantissa->lfsr_state = (u16) state;
- return ( (state * (s32) (0.707106 * 256.0)) >> 8 );
-}
-
-
-/* Fetch an unpacked, left justified, and properly biased/dithered mantissa value */
-static __inline__ float coeff_get_float (ac3dec_t * p_ac3dec, u16 bap, u16 dithflag,
- u16 exp)
-{
- u16 group_code = 0;
-
- /* If the bap is 0-5 then we have special cases to take care of */
- switch (bap)
- {
- case 0:
- if (dithflag)
- {
- return ( dither_gen(&p_ac3dec->mantissa) * scale_factor[exp] );
- }
- return (0);
-
- case 1:
- if (p_ac3dec->mantissa.q_1_pointer >= 0)
- {
- return (p_ac3dec->mantissa.q_1[p_ac3dec->mantissa.q_1_pointer--] *
- scale_factor[exp]);
- }
-
- p_ac3dec->total_bits_read += 5;
- if ((group_code = GetBits (&p_ac3dec->bit_stream,5)) > 26)
- {
- intf_WarnMsg ( 3, "ac3dec warn: invalid mantissa (1)" );
- return 0;
- }
-
- p_ac3dec->mantissa.q_1[ 1 ] = q_1_1[ group_code ];
- p_ac3dec->mantissa.q_1[ 0 ] = q_1_2[ group_code ];
-
- p_ac3dec->mantissa.q_1_pointer = 1;
-
- return (q_1_0[group_code] * scale_factor[exp]);
-
- case 2:
- if (p_ac3dec->mantissa.q_2_pointer >= 0)
- {
- return (p_ac3dec->mantissa.q_2[p_ac3dec->mantissa.q_2_pointer--] *
- scale_factor[exp]);
- }
-
- p_ac3dec->total_bits_read += 7;
- if ((group_code = GetBits (&p_ac3dec->bit_stream,7)) > 124)
- {
- intf_WarnMsg ( 3, "ac3dec warn: invalid mantissa (2)" );
- return 0;
- }
-
- p_ac3dec->mantissa.q_2[ 1 ] = q_2_1[ group_code ];
- p_ac3dec->mantissa.q_2[ 0 ] = q_2_2[ group_code ];
-
- p_ac3dec->mantissa.q_2_pointer = 1;
-
- return (q_2_0[group_code] * scale_factor[exp]);
-
- case 3:
- p_ac3dec->total_bits_read += 3;
- if ((group_code = GetBits (&p_ac3dec->bit_stream,3)) > 6)
- {
- intf_WarnMsg ( 3, "ac3dec warn: invalid mantissa (3)" );
- return 0;
- }
-
- return (q_3[group_code] * scale_factor[exp]);
-
- case 4:
- if (p_ac3dec->mantissa.q_4_pointer >= 0)
- {
- return (p_ac3dec->mantissa.q_4[p_ac3dec->mantissa.q_4_pointer--] *
- scale_factor[exp]);
- }
-
- p_ac3dec->total_bits_read += 7;
- if ((group_code = GetBits (&p_ac3dec->bit_stream,7)) > 120)
- {
- intf_WarnMsg ( 3, "ac3dec warn: invalid mantissa (4)" );
- return 0;
- }
-
- p_ac3dec->mantissa.q_4[ 0 ] = q_4_1[group_code];
-
- p_ac3dec->mantissa.q_4_pointer = 0;
-
- return (q_4_0[group_code] * scale_factor[exp]);
-
- case 5:
- p_ac3dec->total_bits_read += 4;
- if ((group_code = GetBits (&p_ac3dec->bit_stream,4)) > 14)
- {
- intf_WarnMsg ( 3, "ac3dec warn: invalid mantissa (5)" );
- return 0;
- }
-
- return (q_5[group_code] * scale_factor[exp]);
-
- default:
- group_code = GetBits (&p_ac3dec->bit_stream,qnttztab[bap]);
- group_code <<= 16 - qnttztab[bap];
- p_ac3dec->total_bits_read += qnttztab[bap];
-
- return ((s16)(group_code) * scale_factor[exp]);
- }
-}
-
-/* Uncouple the coupling channel into a fbw channel */
-static __inline__ void uncouple_channel (ac3dec_t * p_ac3dec, u32 ch)
-{
- u32 bnd = 0;
- u32 sub_bnd = 0;
- u32 i,j;
- float cpl_coord = 1.0;
- u32 cpl_exp_tmp;
- u32 cpl_mant_tmp;
-
- for (i = p_ac3dec->audblk.cplstrtmant; i < p_ac3dec->audblk.cplendmant;)
- {
- if (!p_ac3dec->audblk.cplbndstrc[sub_bnd++])
- {
- cpl_exp_tmp = p_ac3dec->audblk.cplcoexp[ch][bnd] +
- 3 * p_ac3dec->audblk.mstrcplco[ch];
- if (p_ac3dec->audblk.cplcoexp[ch][bnd] == 15)
- {
- cpl_mant_tmp = (p_ac3dec->audblk.cplcomant[ch][bnd]) << 11;
- }
- else
- {
- cpl_mant_tmp = ((0x10) | p_ac3dec->audblk.cplcomant[ch][bnd]) << 10;
- }
- cpl_coord = (cpl_mant_tmp) * scale_factor[cpl_exp_tmp] * 8.0f;
-
- /* Invert the phase for the right channel if necessary */
- if (p_ac3dec->bsi.acmod == 0x02 && p_ac3dec->audblk.phsflginu &&
- ch == 1 && p_ac3dec->audblk.phsflg[bnd])
- {
- cpl_coord *= -1;
- }
- bnd++;
- }
-
- for (j=0;j < 12; j++)
- {
- /* Get new dither values for each channel if necessary,
- * so the channels are uncorrelated */
- if (p_ac3dec->audblk.dithflag[ch] && !p_ac3dec->audblk.cpl_bap[i])
- {
- p_ac3dec->samples[ch][i] = cpl_coord * dither_gen(&p_ac3dec->mantissa) *
- scale_factor[p_ac3dec->audblk.cpl_exp[i]];
- } else {
- p_ac3dec->samples[ch][i] = cpl_coord * p_ac3dec->audblk.cpl_flt[i];
- }
- i++;
- }
- }
-}
+#include "ac3_mantissa.h"
void mantissa_unpack (ac3dec_t * p_ac3dec)
{
--- /dev/null
+/*****************************************************************************
+ * ac3_mantissa.h: ac3 mantissa computation
+ *****************************************************************************
+ * Copyright (C) 1999, 2000, 2001 VideoLAN
+ * $Id: ac3_mantissa.h,v 1.4 2001/05/15 16:19:42 sam Exp $
+ *
+ * Authors: Michel Kaempf <maxx@via.ecp.fr>
+ * Aaron Holtzman <aholtzma@engr.uvic.ca>
+ * Renaud Dartus <reno@videolan.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
+ *****************************************************************************/
+
+#define Q0 ((-2 << 15) / 3.0)
+#define Q1 (0)
+#define Q2 ((2 << 15) / 3.0)
+static const float q_1_0[ 32 ] =
+{
+ Q0, Q0, Q0, Q0, Q0, Q0, Q0, Q0, Q0,
+ Q1, Q1, Q1, Q1, Q1, Q1, Q1, Q1, Q1,
+ Q2, Q2, Q2, Q2, Q2, Q2, Q2, Q2, Q2,
+ 0, 0, 0, 0, 0
+};
+static const float q_1_1[ 32 ] =
+{
+ Q0, Q0, Q0, Q1, Q1, Q1, Q2, Q2, Q2,
+ Q0, Q0, Q0, Q1, Q1, Q1, Q2, Q2, Q2,
+ Q0, Q0, Q0, Q1, Q1, Q1, Q2, Q2, Q2,
+ 0, 0, 0, 0, 0
+};
+static const float q_1_2[ 32 ] =
+{
+ Q0, Q1, Q2, Q0, Q1, Q2, Q0, Q1, Q2,
+ Q0, Q1, Q2, Q0, Q1, Q2, Q0, Q1, Q2,
+ Q0, Q1, Q2, Q0, Q1, Q2, Q0, Q1, Q2,
+ 0, 0, 0, 0, 0
+};
+#undef Q0
+#undef Q1
+#undef Q2
+
+#define Q0 ((-4 << 15) / 5.0)
+#define Q1 ((-2 << 15) / 5.0)
+#define Q2 (0)
+#define Q3 ((2 << 15) / 5.0)
+#define Q4 ((4 << 15) / 5.0)
+static const float q_2_0[ 128 ] =
+{
+ Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,
+ Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,
+ Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,
+ Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,
+ Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,
+ 0, 0, 0
+};
+static const float q_2_1[ 128 ] =
+{
+ Q0,Q0,Q0,Q0,Q0,Q1,Q1,Q1,Q1,Q1,Q2,Q2,Q2,Q2,Q2,Q3,Q3,Q3,Q3,Q3,Q4,Q4,Q4,Q4,Q4,
+ Q0,Q0,Q0,Q0,Q0,Q1,Q1,Q1,Q1,Q1,Q2,Q2,Q2,Q2,Q2,Q3,Q3,Q3,Q3,Q3,Q4,Q4,Q4,Q4,Q4,
+ Q0,Q0,Q0,Q0,Q0,Q1,Q1,Q1,Q1,Q1,Q2,Q2,Q2,Q2,Q2,Q3,Q3,Q3,Q3,Q3,Q4,Q4,Q4,Q4,Q4,
+ Q0,Q0,Q0,Q0,Q0,Q1,Q1,Q1,Q1,Q1,Q2,Q2,Q2,Q2,Q2,Q3,Q3,Q3,Q3,Q3,Q4,Q4,Q4,Q4,Q4,
+ Q0,Q0,Q0,Q0,Q0,Q1,Q1,Q1,Q1,Q1,Q2,Q2,Q2,Q2,Q2,Q3,Q3,Q3,Q3,Q3,Q4,Q4,Q4,Q4,Q4,
+ 0, 0, 0
+};
+static const float q_2_2[ 128 ] =
+{
+ Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,
+ Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,
+ Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,
+ Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,
+ Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,
+ 0, 0, 0
+};
+#undef Q0
+#undef Q1
+#undef Q2
+#undef Q3
+#undef Q4
+
+#define Q0 ((-10 << 15) / 11.0)
+#define Q1 ((-8 << 15) / 11.0)
+#define Q2 ((-6 << 15) / 11.0)
+#define Q3 ((-4 << 15) / 11.0)
+#define Q4 ((-2 << 15) / 11.0)
+#define Q5 (0)
+#define Q6 ((2 << 15) / 11.0)
+#define Q7 ((4 << 15) / 11.0)
+#define Q8 ((6 << 15) / 11.0)
+#define Q9 ((8 << 15) / 11.0)
+#define QA ((10 << 15) / 11.0)
+static const float q_4_0[ 128 ] =
+{
+ Q0, Q0, Q0, Q0, Q0, Q0, Q0, Q0, Q0, Q0, Q0,
+ Q1, Q1, Q1, Q1, Q1, Q1, Q1, Q1, Q1, Q1, Q1,
+ Q2, Q2, Q2, Q2, Q2, Q2, Q2, Q2, Q2, Q2, Q2,
+ Q3, Q3, Q3, Q3, Q3, Q3, Q3, Q3, Q3, Q3, Q3,
+ Q4, Q4, Q4, Q4, Q4, Q4, Q4, Q4, Q4, Q4, Q4,
+ Q5, Q5, Q5, Q5, Q5, Q5, Q5, Q5, Q5, Q5, Q5,
+ Q6, Q6, Q6, Q6, Q6, Q6, Q6, Q6, Q6, Q6, Q6,
+ Q7, Q7, Q7, Q7, Q7, Q7, Q7, Q7, Q7, Q7, Q7,
+ Q8, Q8, Q8, Q8, Q8, Q8, Q8, Q8, Q8, Q8, Q8,
+ Q9, Q9, Q9, Q9, Q9, Q9, Q9, Q9, Q9, Q9, Q9,
+ QA, QA, QA, QA, QA, QA, QA, QA, QA, QA, QA,
+ 0, 0, 0, 0, 0, 0, 0
+};
+static const float q_4_1[ 128 ] =
+{
+ Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA,
+ Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA,
+ Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA,
+ Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA,
+ Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA,
+ Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA,
+ Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA,
+ Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA,
+ Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA,
+ Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA,
+ Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA,
+ 0, 0, 0, 0, 0, 0, 0
+};
+#undef Q0
+#undef Q1
+#undef Q2
+#undef Q3
+#undef Q4
+#undef Q5
+#undef Q6
+#undef Q7
+#undef Q8
+#undef Q9
+#undef QA
+
+/* Lookup tables of 0.16 two's complement quantization values */
+
+static const float q_3[8] =
+{
+ (-6 << 15)/7.0, (-4 << 15)/7.0, (-2 << 15)/7.0,
+ 0 , (2 << 15)/7.0, (4 << 15)/7.0,
+ (6 << 15)/7.0, 0
+};
+
+static const float q_5[16] =
+{
+ (-14 << 15)/15.0, (-12 << 15)/15.0, (-10 << 15)/15.0,
+ (-8 << 15)/15.0, (-6 << 15)/15.0, (-4 << 15)/15.0,
+ (-2 << 15)/15.0, 0 , (2 << 15)/15.0,
+ (4 << 15)/15.0, (6 << 15)/15.0, (8 << 15)/15.0,
+ (10 << 15)/15.0, (12 << 15)/15.0, (14 << 15)/15.0,
+ 0
+};
+
+/* Conversion from bap to number of bits in the mantissas
+ * zeros account for cases 0,1,2,4 which are special cased */
+static const u16 qnttztab[16] =
+{
+ 0, 0, 0, 3, 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16
+};
+
+static const float scale_factor[25] =
+{
+ 6.10351562500000000000000000e-05,
+ 3.05175781250000000000000000e-05,
+ 1.52587890625000000000000000e-05,
+ 7.62939453125000000000000000e-06,
+ 3.81469726562500000000000000e-06,
+ 1.90734863281250000000000000e-06,
+ 9.53674316406250000000000000e-07,
+ 4.76837158203125000000000000e-07,
+ 2.38418579101562500000000000e-07,
+ 1.19209289550781250000000000e-07,
+ 5.96046447753906250000000000e-08,
+ 2.98023223876953125000000000e-08,
+ 1.49011611938476562500000000e-08,
+ 7.45058059692382812500000000e-09,
+ 3.72529029846191406250000000e-09,
+ 1.86264514923095703125000000e-09,
+ 9.31322574615478515625000000e-10,
+ 4.65661287307739257812500000e-10,
+ 2.32830643653869628906250000e-10,
+ 1.16415321826934814453125000e-10,
+ 5.82076609134674072265625000e-11,
+ 2.91038304567337036132812500e-11,
+ 1.45519152283668518066406250e-11,
+ 7.27595761418342590332031250e-12,
+ 3.63797880709171295166015625e-12,
+};
+
+static const u16 dither_lut[256] =
+{
+ 0x0000, 0xa011, 0xe033, 0x4022, 0x6077, 0xc066, 0x8044, 0x2055,
+ 0xc0ee, 0x60ff, 0x20dd, 0x80cc, 0xa099, 0x0088, 0x40aa, 0xe0bb,
+ 0x21cd, 0x81dc, 0xc1fe, 0x61ef, 0x41ba, 0xe1ab, 0xa189, 0x0198,
+ 0xe123, 0x4132, 0x0110, 0xa101, 0x8154, 0x2145, 0x6167, 0xc176,
+ 0x439a, 0xe38b, 0xa3a9, 0x03b8, 0x23ed, 0x83fc, 0xc3de, 0x63cf,
+ 0x8374, 0x2365, 0x6347, 0xc356, 0xe303, 0x4312, 0x0330, 0xa321,
+ 0x6257, 0xc246, 0x8264, 0x2275, 0x0220, 0xa231, 0xe213, 0x4202,
+ 0xa2b9, 0x02a8, 0x428a, 0xe29b, 0xc2ce, 0x62df, 0x22fd, 0x82ec,
+ 0x8734, 0x2725, 0x6707, 0xc716, 0xe743, 0x4752, 0x0770, 0xa761,
+ 0x47da, 0xe7cb, 0xa7e9, 0x07f8, 0x27ad, 0x87bc, 0xc79e, 0x678f,
+ 0xa6f9, 0x06e8, 0x46ca, 0xe6db, 0xc68e, 0x669f, 0x26bd, 0x86ac,
+ 0x6617, 0xc606, 0x8624, 0x2635, 0x0660, 0xa671, 0xe653, 0x4642,
+ 0xc4ae, 0x64bf, 0x249d, 0x848c, 0xa4d9, 0x04c8, 0x44ea, 0xe4fb,
+ 0x0440, 0xa451, 0xe473, 0x4462, 0x6437, 0xc426, 0x8404, 0x2415,
+ 0xe563, 0x4572, 0x0550, 0xa541, 0x8514, 0x2505, 0x6527, 0xc536,
+ 0x258d, 0x859c, 0xc5be, 0x65af, 0x45fa, 0xe5eb, 0xa5c9, 0x05d8,
+ 0xae79, 0x0e68, 0x4e4a, 0xee5b, 0xce0e, 0x6e1f, 0x2e3d, 0x8e2c,
+ 0x6e97, 0xce86, 0x8ea4, 0x2eb5, 0x0ee0, 0xaef1, 0xeed3, 0x4ec2,
+ 0x8fb4, 0x2fa5, 0x6f87, 0xcf96, 0xefc3, 0x4fd2, 0x0ff0, 0xafe1,
+ 0x4f5a, 0xef4b, 0xaf69, 0x0f78, 0x2f2d, 0x8f3c, 0xcf1e, 0x6f0f,
+ 0xede3, 0x4df2, 0x0dd0, 0xadc1, 0x8d94, 0x2d85, 0x6da7, 0xcdb6,
+ 0x2d0d, 0x8d1c, 0xcd3e, 0x6d2f, 0x4d7a, 0xed6b, 0xad49, 0x0d58,
+ 0xcc2e, 0x6c3f, 0x2c1d, 0x8c0c, 0xac59, 0x0c48, 0x4c6a, 0xec7b,
+ 0x0cc0, 0xacd1, 0xecf3, 0x4ce2, 0x6cb7, 0xcca6, 0x8c84, 0x2c95,
+ 0x294d, 0x895c, 0xc97e, 0x696f, 0x493a, 0xe92b, 0xa909, 0x0918,
+ 0xe9a3, 0x49b2, 0x0990, 0xa981, 0x89d4, 0x29c5, 0x69e7, 0xc9f6,
+ 0x0880, 0xa891, 0xe8b3, 0x48a2, 0x68f7, 0xc8e6, 0x88c4, 0x28d5,
+ 0xc86e, 0x687f, 0x285d, 0x884c, 0xa819, 0x0808, 0x482a, 0xe83b,
+ 0x6ad7, 0xcac6, 0x8ae4, 0x2af5, 0x0aa0, 0xaab1, 0xea93, 0x4a82,
+ 0xaa39, 0x0a28, 0x4a0a, 0xea1b, 0xca4e, 0x6a5f, 0x2a7d, 0x8a6c,
+ 0x4b1a, 0xeb0b, 0xab29, 0x0b38, 0x2b6d, 0x8b7c, 0xcb5e, 0x6b4f,
+ 0x8bf4, 0x2be5, 0x6bc7, 0xcbd6, 0xeb83, 0x4b92, 0x0bb0, 0xaba1
+};
+
+static __inline__ u16 dither_gen (mantissa_t * p_mantissa)
+{
+ s16 state;
+
+ state = dither_lut[p_mantissa->lfsr_state >> 8] ^
+ (p_mantissa->lfsr_state << 8);
+ p_mantissa->lfsr_state = (u16) state;
+ return ( (state * (s32) (0.707106 * 256.0)) >> 8 );
+}
+
+
+/* Fetch an unpacked, left justified, and properly biased/dithered mantissa value */
+static __inline__ float coeff_get_float (ac3dec_t * p_ac3dec, u16 bap, u16 dithflag,
+ u16 exp)
+{
+ u16 group_code = 0;
+
+ /* If the bap is 0-5 then we have special cases to take care of */
+ switch (bap)
+ {
+ case 0:
+ if (dithflag)
+ {
+ return ( dither_gen(&p_ac3dec->mantissa) * scale_factor[exp] );
+ }
+ return (0);
+
+ case 1:
+ if (p_ac3dec->mantissa.q_1_pointer >= 0)
+ {
+ return (p_ac3dec->mantissa.q_1[p_ac3dec->mantissa.q_1_pointer--] *
+ scale_factor[exp]);
+ }
+
+ p_ac3dec->total_bits_read += 5;
+ if ((group_code = GetBits (&p_ac3dec->bit_stream,5)) > 26)
+ {
+ intf_WarnMsg ( 3, "ac3dec warn: invalid mantissa (1)" );
+ return 0;
+ }
+
+ p_ac3dec->mantissa.q_1[ 1 ] = q_1_1[ group_code ];
+ p_ac3dec->mantissa.q_1[ 0 ] = q_1_2[ group_code ];
+
+ p_ac3dec->mantissa.q_1_pointer = 1;
+
+ return (q_1_0[group_code] * scale_factor[exp]);
+
+ case 2:
+ if (p_ac3dec->mantissa.q_2_pointer >= 0)
+ {
+ return (p_ac3dec->mantissa.q_2[p_ac3dec->mantissa.q_2_pointer--] *
+ scale_factor[exp]);
+ }
+
+ p_ac3dec->total_bits_read += 7;
+ if ((group_code = GetBits (&p_ac3dec->bit_stream,7)) > 124)
+ {
+ intf_WarnMsg ( 3, "ac3dec warn: invalid mantissa (2)" );
+ return 0;
+ }
+
+ p_ac3dec->mantissa.q_2[ 1 ] = q_2_1[ group_code ];
+ p_ac3dec->mantissa.q_2[ 0 ] = q_2_2[ group_code ];
+
+ p_ac3dec->mantissa.q_2_pointer = 1;
+
+ return (q_2_0[group_code] * scale_factor[exp]);
+
+ case 3:
+ p_ac3dec->total_bits_read += 3;
+ if ((group_code = GetBits (&p_ac3dec->bit_stream,3)) > 6)
+ {
+ intf_WarnMsg ( 3, "ac3dec warn: invalid mantissa (3)" );
+ return 0;
+ }
+
+ return (q_3[group_code] * scale_factor[exp]);
+
+ case 4:
+ if (p_ac3dec->mantissa.q_4_pointer >= 0)
+ {
+ return (p_ac3dec->mantissa.q_4[p_ac3dec->mantissa.q_4_pointer--] *
+ scale_factor[exp]);
+ }
+
+ p_ac3dec->total_bits_read += 7;
+ if ((group_code = GetBits (&p_ac3dec->bit_stream,7)) > 120)
+ {
+ intf_WarnMsg ( 3, "ac3dec warn: invalid mantissa (4)" );
+ return 0;
+ }
+
+ p_ac3dec->mantissa.q_4[ 0 ] = q_4_1[group_code];
+
+ p_ac3dec->mantissa.q_4_pointer = 0;
+
+ return (q_4_0[group_code] * scale_factor[exp]);
+
+ case 5:
+ p_ac3dec->total_bits_read += 4;
+ if ((group_code = GetBits (&p_ac3dec->bit_stream,4)) > 14)
+ {
+ intf_WarnMsg ( 3, "ac3dec warn: invalid mantissa (5)" );
+ return 0;
+ }
+
+ return (q_5[group_code] * scale_factor[exp]);
+
+ default:
+ group_code = GetBits (&p_ac3dec->bit_stream,qnttztab[bap]);
+ group_code <<= 16 - qnttztab[bap];
+ p_ac3dec->total_bits_read += qnttztab[bap];
+
+ return ((s16)(group_code) * scale_factor[exp]);
+ }
+}
+
+/* Uncouple the coupling channel into a fbw channel */
+static __inline__ void uncouple_channel (ac3dec_t * p_ac3dec, u32 ch)
+{
+ u32 bnd = 0;
+ u32 sub_bnd = 0;
+ u32 i,j;
+ float cpl_coord = 1.0;
+ u32 cpl_exp_tmp;
+ u32 cpl_mant_tmp;
+
+ for (i = p_ac3dec->audblk.cplstrtmant; i < p_ac3dec->audblk.cplendmant;)
+ {
+ if (!p_ac3dec->audblk.cplbndstrc[sub_bnd++])
+ {
+ cpl_exp_tmp = p_ac3dec->audblk.cplcoexp[ch][bnd] +
+ 3 * p_ac3dec->audblk.mstrcplco[ch];
+ if (p_ac3dec->audblk.cplcoexp[ch][bnd] == 15)
+ {
+ cpl_mant_tmp = (p_ac3dec->audblk.cplcomant[ch][bnd]) << 11;
+ }
+ else
+ {
+ cpl_mant_tmp = ((0x10) | p_ac3dec->audblk.cplcomant[ch][bnd]) << 10;
+ }
+ cpl_coord = (cpl_mant_tmp) * scale_factor[cpl_exp_tmp] * 8.0f;
+
+ /* Invert the phase for the right channel if necessary */
+ if (p_ac3dec->bsi.acmod == 0x02 && p_ac3dec->audblk.phsflginu &&
+ ch == 1 && p_ac3dec->audblk.phsflg[bnd])
+ {
+ cpl_coord *= -1;
+ }
+ bnd++;
+ }
+
+ for (j=0;j < 12; j++)
+ {
+ /* Get new dither values for each channel if necessary,
+ * so the channels are uncorrelated */
+ if (p_ac3dec->audblk.dithflag[ch] && !p_ac3dec->audblk.cpl_bap[i])
+ {
+ p_ac3dec->samples[ch][i] = cpl_coord * dither_gen(&p_ac3dec->mantissa) *
+ scale_factor[p_ac3dec->audblk.cpl_exp[i]];
+ } else {
+ p_ac3dec->samples[ch][i] = cpl_coord * p_ac3dec->audblk.cpl_flt[i];
+ }
+ i++;
+ }
+ }
+}
+
* ac3_parse.c: ac3 parsing procedures
*****************************************************************************
* Copyright (C) 1999, 2000, 2001 VideoLAN
- * $Id: ac3_parse.c,v 1.22 2001/05/14 15:58:04 reno Exp $
+ * $Id: ac3_parse.c,v 1.23 2001/05/15 16:19:42 sam Exp $
*
* Authors: Michel Kaempf <maxx@via.ecp.fr>
* Aaron Holtzman <aholtzma@engr.uvic.ca>
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
*****************************************************************************/
+/*****************************************************************************
+ * Preamble
+ *****************************************************************************/
#include "defs.h"
#include <string.h> /* memset() */
#include "threads.h"
#include "mtime.h"
+#include "intf_msg.h"
+
#include "stream_control.h"
#include "input_ext-dec.h"
#include "audio_output.h"
-#include "intf_msg.h"
+#include "ac3_imdct.h"
+#include "ac3_downmix.h"
#include "ac3_decoder.h"
#include "ac3_decoder_thread.h" /* ac3dec_thread_t */
for(i=0;i<p_ac3dec->bsi.nfchans;i++)
intf_ErrMsg ("%1d",p_ac3dec->audblk.blksw[i]);
intf_ErrMsg ("]");
-
- intf_ErrMsg ("\n");
}
#endif
+
* ac3_rematrix.c: ac3 audio rematrixing
*****************************************************************************
* Copyright (C) 1999, 2000 VideoLAN
- * $Id: ac3_rematrix.c,v 1.17 2001/05/14 15:58:04 reno Exp $
+ * $Id: ac3_rematrix.c,v 1.18 2001/05/15 16:19:42 sam Exp $
*
* Authors: Michel Kaempf <maxx@via.ecp.fr>
* Aaron Holtzman <aholtzma@engr.uvic.ca>
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
*****************************************************************************/
+
+/*****************************************************************************
+ * Preamble
+ *****************************************************************************/
#include "defs.h"
#include <string.h> /* memcpy() */
#include "stream_control.h"
#include "input_ext-dec.h"
+#include "ac3_imdct.h"
+#include "ac3_downmix.h"
#include "ac3_decoder.h"
struct rematrix_band_s {
}
}
}
+
+++ /dev/null
-/*****************************************************************************
- * ac3_srfft_sse.c: ac3 fft functions
- *****************************************************************************
- * Copyright (C) 1999, 2000, 2001 VideoLAN
- * $Id: ac3_srfft_sse.c,v 1.1 2001/05/14 15:58:04 reno Exp $
- *
- * Authors: Renaud Dartus <reno@videolan.org>
- * Aaron Holtzman <aholtzma@engr.uvic.ca>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
- *****************************************************************************/
-
-#include <stdio.h>
-
-#include "defs.h"
-
-#include <math.h>
-#include <stdio.h>
-
-#include "config.h"
-#include "common.h"
-#include "threads.h"
-#include "mtime.h"
-
-#include "stream_control.h"
-#include "input_ext-dec.h"
-
-#include "ac3_decoder.h"
-#include "ac3_srfft.h"
-
-void hsqrt2 (void);
-void C_1 (void);
-static void fft_4_sse (complex_t *x);
-static void fft_8_sse (complex_t *x);
-static void fft_asmb_sse (int k, complex_t *x, complex_t *wTB,
- const complex_t *d, const complex_t *d_3);
-
-void fft_64p_sse(complex_t *a)
-{
- fft_8_sse(&a[0]); fft_4_sse(&a[8]); fft_4_sse(&a[12]);
- fft_asmb_sse(2, &a[0], &a[8], &delta16[0], &delta16_3[0]);
-
- fft_8_sse(&a[16]), fft_8_sse(&a[24]);
- fft_asmb_sse(4, &a[0], &a[16],&delta32[0], &delta32_3[0]);
-
- fft_8_sse(&a[32]); fft_4_sse(&a[40]); fft_4_sse(&a[44]);
- fft_asmb_sse(2, &a[32], &a[40], &delta16[0], &delta16_3[0]);
-
- fft_8_sse(&a[48]); fft_4_sse(&a[56]); fft_4_sse(&a[60]);
- fft_asmb_sse(2, &a[48], &a[56], &delta16[0], &delta16_3[0]);
-
- fft_asmb_sse(8, &a[0], &a[32],&delta64[0], &delta64_3[0]);
-}
-
-
-void fft_128p_sse(complex_t *a)
-{
- fft_8_sse(&a[0]); fft_4_sse(&a[8]); fft_4_sse(&a[12]);
- fft_asmb_sse(2, &a[0], &a[8], &delta16[0], &delta16_3[0]);
-
- fft_8_sse(&a[16]), fft_8_sse(&a[24]);
- fft_asmb_sse(4, &a[0], &a[16],&delta32[0], &delta32_3[0]);
-
- fft_8_sse(&a[32]); fft_4_sse(&a[40]); fft_4_sse(&a[44]);
- fft_asmb_sse(2, &a[32], &a[40], &delta16[0], &delta16_3[0]);
-
- fft_8_sse(&a[48]); fft_4_sse(&a[56]); fft_4_sse(&a[60]);
- fft_asmb_sse(2, &a[48], &a[56], &delta16[0], &delta16_3[0]);
-
- fft_asmb_sse(8, &a[0], &a[32],&delta64[0], &delta64_3[0]);
-
- fft_8_sse(&a[64]); fft_4_sse(&a[72]); fft_4_sse(&a[76]);
- /* fft_16(&a[64]); */
- fft_asmb_sse(2, &a[64], &a[72], &delta16[0], &delta16_3[0]);
-
- fft_8_sse(&a[80]); fft_8_sse(&a[88]);
-
- /* fft_32(&a[64]); */
- fft_asmb_sse(4, &a[64], &a[80],&delta32[0], &delta32_3[0]);
-
- fft_8_sse(&a[96]); fft_4_sse(&a[104]), fft_4_sse(&a[108]);
- /* fft_16(&a[96]); */
- fft_asmb_sse(2, &a[96], &a[104], &delta16[0], &delta16_3[0]);
-
- fft_8_sse(&a[112]), fft_8_sse(&a[120]);
- /* fft_32(&a[96]); */
- fft_asmb_sse(4, &a[96], &a[112], &delta32[0], &delta32_3[0]);
-
- /* fft_128(&a[0]); */
- fft_asmb_sse(16, &a[0], &a[64], &delta128[0], &delta128_3[0]);
-}
-
-void hsqrt2 (void)
-{
- __asm__ (
- ".float 0f0.707106781188\n"
- ".float 0f0.707106781188\n"
- ".float 0f-0.707106781188\n"
- ".float 0f-0.707106781188\n"
- );
-}
-
-void C_1 (void)
-{
- __asm__ (
- ".float 0f-1.0\n"
- ".float 0f1.0\n"
- ".float 0f-1.0\n"
- ".float 0f1.0\n"
- );
-}
-
-static void fft_4_sse (complex_t *x)
-{
- __asm__ __volatile__ (
- "movups (%%eax), %%xmm0\n" /* x[1] | x[0] */
- "movups 16(%%eax), %%xmm2\n" /* x[3] | x[2] */
- "movups %%xmm0, %%xmm1\n" /* x[1] | x[0] */
- "addps %%xmm2, %%xmm0\n" /* x[1] + x[3] | x[0] + x[2] */
- "subps %%xmm2, %%xmm1\n" /* x[1] - x[3] | x[0] - x[2] */
- "xorps %%xmm6, %%xmm6\n"
- "movhlps %%xmm1, %%xmm4\n" /* ? | x[1] - x[3] */
- "movhlps %%xmm0, %%xmm3\n" /* ? | x[1] + x[3] */
- "subss %%xmm4, %%xmm6\n" /* 0 | -(x[1] - x[3]).re */
- "movlhps %%xmm1, %%xmm0\n" /* x[0] - x[2] | x[0] + x[2] */
- "movlhps %%xmm6, %%xmm4\n" /* 0 | -(x[1] - x[3]).re | (x[1] - x[3]).im | (x[3]-x[1]).re */
- "movups %%xmm0, %%xmm2\n" /* x[0] - x[2] | x[0] + x[2] */
- "shufps $0x94, %%xmm4, %%xmm3\n" /* i*(x[1] - x[3]) | x[1] + x[3] */
- "addps %%xmm3, %%xmm0\n"
- "subps %%xmm3, %%xmm2\n"
- "movups %%xmm0, (%%eax)\n"
- "movups %%xmm2, 16(%%eax)\n"
- : "=a" (x)
- : "a" (x) );
-}
-
-static void fft_8_sse (complex_t *x)
-{
- __asm__ __volatile__ (
- "pushl %%ebx\n"
-
- "movlps (%%eax), %%xmm0\n" /* x[0] */
- "movlps 32(%%eax), %%xmm1\n" /* x[4] */
- "movhps 16(%%eax), %%xmm0\n" /* x[2] | x[0] */
- "movhps 48(%%eax), %%xmm1\n" /* x[6] | x[4] */
- "movups %%xmm0, %%xmm2\n" /* x[2] | x[0] */
- "xorps %%xmm3, %%xmm3\n"
- "addps %%xmm1, %%xmm0\n" /* x[2] + x[6] | x[0] + x[4] */
- "subps %%xmm1, %%xmm2\n" /* x[2] - x[6] | x[0] - x[4] */
- "movhlps %%xmm0, %%xmm5\n" /* x[2] + x[6] */
- "movhlps %%xmm2, %%xmm4\n" /* x[2] - x[6] */
- "movlhps %%xmm2, %%xmm0\n" /* x[0] - x[4] | x[0] + x[4] */
- "subss %%xmm4, %%xmm3\n" /* (x[2]-x[6]).im | -(x[2]-x[6]).re */
- "movups %%xmm0, %%xmm7\n" /* x[0] - x[4] | x[0] + x[4] */
- "movups %%xmm3, %%xmm4\n" /* (x[2]-x[6]).im | -(x[2]-x[6]).re */
- "movlps 8(%%eax), %%xmm1\n" /* x[1] */
- "shufps $0x14, %%xmm4, %%xmm5\n" /* i*(x[2] - x[6]) | x[2] + x[6] */
-
- "addps %%xmm5, %%xmm0\n" /* yt = i*(x2-x6)+x0-x4 | x2+x6+x0+x4 */
- "subps %%xmm5, %%xmm7\n" /* yb = i*(x6-x2)+x0-x4 | -x6-x2+x0+x4 */
-
- "movhps 24(%%eax), %%xmm1\n" /* x[3] | x[1] */
- "movl $hsqrt2, %%ebx\n"
- "movlps 40(%%eax), %%xmm2\n" /* x[5] */
- "movhps 56(%%eax), %%xmm2\n" /* x[7] | x[5] */
- "movups %%xmm1, %%xmm3\n" /* x[3] | x[1] */
- "addps %%xmm2, %%xmm1\n" /* x[3] + x[7] | x[1] + x[5] */
- "subps %%xmm2, %%xmm3\n" /* x[3] - x[7] | x[1] - x[5] */
- "movups (%%ebx), %%xmm4\n" /* -1/sqrt2 | -1/sqrt2 | 1/sqrt2 | 1/sqrt2 */
- "movups %%xmm3, %%xmm6\n" /* x[3] - x[7] | x[1] - x[5] */
- "mulps %%xmm4, %%xmm3\n" /* -1/s2*(x[3] - x[7]) | 1/s2*(x[1] - x[5]) */
- "shufps $0xc8, %%xmm4, %%xmm4\n" /* -1/sqrt2 | 1/sqrt2 | -1/sqrt2 | 1/sqrt2 */
- "shufps $0xb1, %%xmm6, %%xmm6\n" /* (x3-x7).re|(x3-x7).im|(x1-x5).re|(x1-x5).im */
- "mulps %%xmm4, %%xmm6\n" /* (x7-x3).re/s2|(x3-x7).im/s2|(x5-x1).re/s2|(x1-x5).im/s2 */
- "addps %%xmm3, %%xmm6\n" /* (-1-i)/sqrt2 * (x[3]-x[7]) | (1-i)/sqrt2 * (x[1] - x[5]) */
- "movhlps %%xmm1, %%xmm5\n" /* x[3] + x[7] */
- "movlhps %%xmm6, %%xmm1\n" /* (1+i)/sqrt2 * (x[1]-x[5]) | x[1]+x[5] */
- "shufps $0xe4, %%xmm6, %%xmm5\n" /* (-1-i)/sqrt2 * (x[3]-x[7]) | x[3]+x[7] */
- "movups %%xmm1, %%xmm3\n" /* (1-i)/sqrt2 * (x[1]-x[5]) | x[1]+x[5] */
- "movl $C_1, %%ebx\n"
- "addps %%xmm5, %%xmm1\n" /* u */
- "subps %%xmm5, %%xmm3\n" /* v */
- "movups %%xmm0, %%xmm2\n" /* yb */
- "movups %%xmm7, %%xmm4\n" /* yt */
- "movups (%%ebx), %%xmm5\n"
- "mulps %%xmm5, %%xmm3\n"
- "addps %%xmm1, %%xmm0\n" /* yt + u */
- "subps %%xmm1, %%xmm2\n" /* yt - u */
- "shufps $0xb1, %%xmm3, %%xmm3\n" /* -i * v */
- "movups %%xmm0, (%%eax)\n"
- "movups %%xmm2, 32(%%eax)\n"
- "addps %%xmm3, %%xmm4\n" /* yb - i*v */
- "subps %%xmm3, %%xmm7\n" /* yb + i*v */
- "movups %%xmm4, 16(%%eax)\n"
- "movups %%xmm7, 48(%%eax)\n"
-
- "popl %%ebx\n"
- : "=a" (x)
- : "a" (x));
-}
-
-
-static void fft_asmb_sse (int k, complex_t *x, complex_t *wTB,
- const complex_t *d, const complex_t *d_3)
-{
- __asm__ __volatile__ (
- "pushl %%ebp\n"
- "movl %%esp, %%ebp\n"
-
- "subl $4, %%esp\n"
-
- "pushl %%eax\n"
- "pushl %%ebx\n"
- "pushl %%ecx\n"
- "pushl %%edx\n"
- "pushl %%esi\n"
- "pushl %%edi\n"
-
- "movl 8(%%ebp), %%ecx\n" /* k */
- "movl 12(%%ebp), %%eax\n" /* x */
- "movl %%ecx, -4(%%ebp)\n" /* k */
- "movl 16(%%ebp), %%ebx\n" /* wT */
- "movl 20(%%ebp), %%edx\n" /* d */
- "movl 24(%%ebp), %%esi\n" /* d3 */
- "shll $4, %%ecx\n" /* 16k */
- "addl $8, %%edx\n"
- "leal (%%eax, %%ecx, 2), %%edi\n"
- "addl $8, %%esi\n"
-
- /* TRANSZERO and TRANS */
- "movups (%%eax), %%xmm0\n" /* x[1] | x[0] */
- "movups (%%ebx), %%xmm1\n" /* wT[1] | wT[0] */
- "movups (%%ebx, %%ecx), %%xmm2\n" /* wB[1] | wB[0] */
- "movlps (%%edx), %%xmm3\n" /* d */
- "movlps (%%esi), %%xmm4\n" /* d3 */
- "movhlps %%xmm1, %%xmm5\n" /* wT[1] */
- "movhlps %%xmm2, %%xmm6\n" /* wB[1] */
- "shufps $0x50, %%xmm3, %%xmm3\n" /* d[1].im | d[1].im | d[1].re | d[1].re */
- "shufps $0x50, %%xmm4, %%xmm4\n" /* d3[1].im | d3[1].im | d3[i].re | d3[i].re */
- "movlhps %%xmm5, %%xmm5\n" /* wT[1] | wT[1] */
- "movlhps %%xmm6, %%xmm6\n" /* wB[1] | wB[1] */
- "mulps %%xmm3, %%xmm5\n"
- "mulps %%xmm4, %%xmm6\n"
- "movhlps %%xmm5, %%xmm7\n" /* wT[1].im * d[1].im | wT[1].re * d[1].im */
- "movlhps %%xmm6, %%xmm5\n" /* wB[1].im * d3[1].re | wB[1].re * d3[1].re | wT[1].im * d[1].re | wT[1].re * d[1].re */
- "shufps $0xb1, %%xmm6, %%xmm7\n" /* wB[1].re * d3[1].im | wB[i].im * d3[1].im | wT[1].re * d[1].im | wT[1].im * d[1].im */
- "movl $C_1, %%edi\n"
- "movups (%%edi), %%xmm4\n"
- "mulps %%xmm4, %%xmm7\n"
- "addps %%xmm7, %%xmm5\n" /* wB[1] * d3[1] | wT[1] * d[1] */
- "movlhps %%xmm5, %%xmm1\n" /* d[1] * wT[1] | wT[0] */
- "shufps $0xe4, %%xmm5, %%xmm2\n" /* d3[1] * wB[1] | wB[0] */
- "movups %%xmm1, %%xmm3\n" /* d[1] * wT[1] | wT[0] */
- "leal (%%eax, %%ecx, 2), %%edi\n"
- "addps %%xmm2, %%xmm1\n" /* u */
- "subps %%xmm2, %%xmm3\n" /* v */
- "mulps %%xmm4, %%xmm3\n"
- "movups (%%eax, %%ecx), %%xmm5\n" /* xk[1] | xk[0] */
- "shufps $0xb1, %%xmm3, %%xmm3\n" /* -i * v */
- "movups %%xmm0, %%xmm2\n" /* x[1] | x[0] */
- "movups %%xmm5, %%xmm6\n" /* xk[1] | xk[0] */
- "addps %%xmm1, %%xmm0\n"
- "subps %%xmm1, %%xmm2\n"
- "addps %%xmm3, %%xmm5\n"
- "subps %%xmm3, %%xmm6\n"
- "movups %%xmm0, (%%eax)\n"
- "movups %%xmm2, (%%edi)\n"
- "movups %%xmm5, (%%eax, %%ecx)\n"
- "movups %%xmm6, (%%edi, %%ecx)\n"
- "addl $16, %%eax\n"
- "addl $16, %%ebx\n"
- "addl $8, %%edx\n"
- "addl $8, %%esi\n"
- "decl -4(%%ebp)\n"
-
-".loop:\n"
- "movups (%%ebx), %%xmm0\n" /* wT[1] | wT[0] */
- "movups (%%edx), %%xmm1\n" /* d[1] | d[0] */
-
- "movups (%%ebx, %%ecx), %%xmm4\n" /* wB[1] | wB[0] */
- "movups (%%esi), %%xmm5\n" /* d3[1] | d3[0] */
-
- "movhlps %%xmm0, %%xmm2\n" /* wT[1] */
- "movhlps %%xmm1, %%xmm3\n" /* d[1] */
-
- "movhlps %%xmm4, %%xmm6\n" /* wB[1] */
- "movhlps %%xmm5, %%xmm7\n" /* d3[1] */
-
- "shufps $0x50, %%xmm1, %%xmm1\n" /* d[0].im | d[0].im | d[0].re | d[0].re */
- "shufps $0x50, %%xmm3, %%xmm3\n" /* d[1].im | d[1].im | d[1].re | d[1].re */
-
- "movlhps %%xmm0, %%xmm0\n" /* wT[0] | wT[0] */
- "shufps $0x50, %%xmm5, %%xmm5\n" /* d3[0].im | d3[0].im | d3[0].re | d3[0].re */
- "movlhps %%xmm2, %%xmm2\n" /* wT[1] | wT[1] */
- "shufps $0x50, %%xmm7, %%xmm7\n" /* d3[1].im | d3[1].im | d3[1].re | d3[1].re */
-
- "mulps %%xmm1, %%xmm0\n" /* d[0].im * wT[0].im | d[0].im * wT[0].re | d[0].re * wT[0].im | d[0].re * wT[0].re */
- "mulps %%xmm3, %%xmm2\n" /* d[1].im * wT[1].im | d[1].im * wT[1].re | d[1].re * wT[1].im | d[1].re * wT[1].re */
- "movlhps %%xmm4, %%xmm4\n" /* wB[0] | wB[0] */
- "movlhps %%xmm6, %%xmm6\n" /* wB[1] | wB[1] */
-
- "movhlps %%xmm0, %%xmm1\n" /* d[0].im * wT[0].im | d[0].im * wT[0].re */
- "movlhps %%xmm2, %%xmm0\n" /* d[1].re * wT[1].im | d[1].re * wT[1].re | d[0].re * wT[0].im | d[0].re * wT[0].re */
- "mulps %%xmm5, %%xmm4\n" /* wB[0].im * d3[0].im | wB[0].re * d3[0].im | wB[0].im * d3[0].re | wB[0].re * d3[0].re */
- "mulps %%xmm7, %%xmm6\n" /* wB[1].im * d3[1].im | wB[1].re * d3[1].im | wB[1].im * d3[1].re | wB[1].re * d3[1].re */
- "shufps $0xb1, %%xmm2, %%xmm1\n" /* d[1].im * wT[1].re | d[1].im * wT[1].im | d[0].im * wT[0].re | d[0].im * wT[0].im */
- "movl $C_1, %%edi\n"
- "movups (%%edi), %%xmm3\n" /* 1.0 | -1.0 | 1.0 | -1.0 */
-
- "movhlps %%xmm4, %%xmm5\n" /* wB[0].im * d3[0].im | wB[0].re * d3[0].im */
- "mulps %%xmm3, %%xmm1\n" /* d[1].im * wT[1].re | -d[1].im * wT[1].im | d[0].im * wT[0].re | -d[0].im * wT[0].im */
- "movlhps %%xmm6, %%xmm4\n" /* wB[1].im * d3[1].re | wB[1].re * d3[1].re | wB[0].im * d3[0].re | wB[0].im * d3[0].re */
- "addps %%xmm1, %%xmm0\n" /* wT[1] * d[1] | wT[0] * d[0] */
-
- "shufps $0xb1, %%xmm6, %%xmm5\n" /* wB[1].re * d3[1].im | wB[1].im * d3[1].im | wB[0].re * d3[0].im | wB[0].im * d3[0].im */
- "mulps %%xmm3, %%xmm5\n" /* wB[1].re * d3[1].im | -wB[1].im * d3[1].im | wB[0].re * d3[0].im | -wB[0].im * d3[0].im */
- "addps %%xmm5, %%xmm4\n" /* wB[1] * d3[1] | wB[0] * d3[0] */
-
- "movups %%xmm0, %%xmm1\n" /* wT[1] * d[1] | wT[0] * d[0] */
- "addps %%xmm4, %%xmm0\n" /* u */
- "subps %%xmm4, %%xmm1\n" /* v */
- "movups (%%eax), %%xmm6\n" /* x[1] | x[0] */
- "leal (%%eax, %%ecx, 2), %%edi\n"
- "mulps %%xmm3, %%xmm1\n"
- "addl $16, %%ebx\n"
- "addl $16, %%esi\n"
- "shufps $0xb1, %%xmm1, %%xmm1\n" /* -i * v */
- "movups (%%eax, %%ecx), %%xmm7\n" /* xk[1] | xk[0] */
- "movups %%xmm6, %%xmm2\n"
- "movups %%xmm7, %%xmm4\n"
- "addps %%xmm0, %%xmm6\n"
- "subps %%xmm0, %%xmm2\n"
- "movups %%xmm6, (%%eax)\n"
- "movups %%xmm2, (%%edi)\n"
- "addps %%xmm1, %%xmm7\n"
- "subps %%xmm1, %%xmm4\n"
- "addl $16, %%edx\n"
- "movups %%xmm7, (%%eax, %%ecx)\n"
- "movups %%xmm4, (%%edi, %%ecx)\n"
-
- "addl $16, %%eax\n"
- "decl -4(%%ebp)\n"
- "jnz .loop\n"
-
-".end:\n"
- "popl %%edi\n"
- "popl %%esi\n"
- "popl %%edx\n"
- "popl %%ecx\n"
- "popl %%ebx\n"
- "popl %%eax\n"
-
- "addl $4, %%esp\n"
-
- "leave\n"
- ::);
-}
* aout_u8.c: 8 bit unsigned audio output functions
*****************************************************************************
* Copyright (C) 1999, 2000, 2001 VideoLAN
- * $Id: aout_u8.c,v 1.4 2001/05/06 04:32:02 sam Exp $
+ * $Id: aout_u8.c,v 1.5 2001/05/15 16:19:42 sam Exp $
*
* Authors: Michel Kaempf <maxx@via.ecp.fr>
*
int i_fifo;
long l_buffer, l_buffer_limit, l_bytes;
- intf_DbgMsg("adec debug: running audio output U8_S_thread (%p) (pid == %i)", p_aout, getpid());
-
/* As the s32_buffer was created with calloc(), we don't have to set this
* memory to zero and we can immediately jump into the thread's loop */
while ( ! p_aout->b_die )
* and spawn threads.
*****************************************************************************
* Copyright (C) 1998, 1999, 2000 VideoLAN
- * $Id: main.c,v 1.94 2001/05/14 15:58:04 reno Exp $
+ * $Id: main.c,v 1.95 2001/05/15 16:19:42 sam Exp $
*
* Authors: Vincent Seguin <seguin@via.ecp.fr>
* Samuel Hocevar <sam@zoy.org>
#define OPT_MOTION 181
#define OPT_IDCT 182
#define OPT_YUV 183
+#define OPT_DOWNMIX 184
+#define OPT_IMDCT 185
#define OPT_SYNCHRO 190
#define OPT_WARNING 191
{ "stereo", 0, 0, OPT_STEREO },
{ "mono", 0, 0, OPT_MONO },
{ "spdif", 0, 0, OPT_SPDIF },
+ { "downmix", 1, 0, OPT_DOWNMIX },
+ { "imdct", 1, 0, OPT_IMDCT },
/* Video options */
{ "novideo", 0, 0, OPT_NOVIDEO },
case OPT_SPDIF: /* --spdif */
main_PutIntVariable( AOUT_SPDIF_VAR, 1 );
break;
+ case OPT_DOWNMIX: /* --downmix */
+ main_PutPszVariable( DOWNMIX_METHOD_VAR, optarg );
+ break;
+ case OPT_IMDCT: /* --imdct */
+ main_PutPszVariable( IMDCT_METHOD_VAR, optarg );
+ break;
/* Video options */
case OPT_NOVIDEO: /* --novideo */
"\n -A, --aout <module> \taudio output method"
"\n --stereo, --mono \tstereo/mono audio"
"\n --spdif \tAC3 pass-through mode"
+ "\n --downmix <module> \tAC3 downmix method"
+ "\n --imdct <module> \tAC3 IMDCT method"
"\n"
"\n --novideo \tdisable video"
"\n -V, --vout <module> \tvideo output method"
"\n " AOUT_DSP_VAR "=<filename> \tdsp device path"
"\n " AOUT_STEREO_VAR "={1|0} \tstereo or mono output"
"\n " AOUT_SPDIF_VAR "={1|0} \tAC3 pass-through mode"
+ "\n " DOWNMIX_METHOD_VAR "=<method name> \tAC3 downmix method"
+ "\n " IMDCT_METHOD_VAR "=<method name> \tAC3 IMDCT method"
"\n " AOUT_RATE_VAR "=<rate> \toutput rate" );
/* Video parameters */