From: Sam Hocevar Date: Tue, 15 May 2001 16:19:42 +0000 (+0000) Subject: * AC3 IMDCT and downmix functions are now in plugins, --imdct and X-Git-Tag: 0.2.81~119 X-Git-Url: https://git.sesse.net/?a=commitdiff_plain;h=501cb1ba4bfc11b6987df0cf1d27fbf91c48f495;p=vlc * AC3 IMDCT and downmix functions are now in plugins, --imdct and --downmix options added. --- diff --git a/Makefile b/Makefile index fbe187721d..9a868def9f 100644 --- a/Makefile +++ b/Makefile @@ -13,7 +13,7 @@ # PLUGINS_DIR := alsa beos darwin dsp dummy \ dvd esd fb ggi glide gnome gtk \ - idct \ + downmix idct imdct \ macosx mga \ motion \ mpeg null qt sdl \ @@ -24,7 +24,9 @@ PLUGINS_DIR := alsa beos darwin dsp dummy \ # PLUGINS_TARGETS := alsa/alsa beos/beos darwin/darwin dsp/dsp dummy/dummy \ dvd/dvd esd/esd fb/fb ggi/ggi glide/glide gnome/gnome gtk/gtk \ + downmix/downmix downmix/downmixsse downmix/downmix3dn \ idct/idct idct/idctclassic idct/idctmmx idct/idctmmxext \ + imdct/imdct imdct/imdctsse \ macosx/macosx mga/mga \ motion/motion motion/motionmmx motion/motionmmxext \ mpeg/es mpeg/ps mpeg/ts null/null qt/qt sdl/sdl \ @@ -69,11 +71,7 @@ AC3_DECODER = src/ac3_decoder/ac3_decoder_thread.o \ src/ac3_decoder/ac3_bit_allocate.o \ src/ac3_decoder/ac3_mantissa.o \ src/ac3_decoder/ac3_rematrix.o \ - src/ac3_decoder/ac3_imdct.o \ - src/ac3_decoder/ac3_imdct_c.o \ - src/ac3_decoder/ac3_srfft.o \ - src/ac3_decoder/ac3_downmix.o \ - src/ac3_decoder/ac3_downmix_c.o + src/ac3_decoder/ac3_imdct.o AC3_SPDIF = src/ac3_spdif/ac3_spdif.o \ src/ac3_spdif/ac3_iec958.o diff --git a/configure b/configure index 8624e778ad..eb8c146924 100755 --- a/configure +++ b/configure @@ -3162,7 +3162,7 @@ fi ARCH=${host_cpu} -BUILTINS="${BUILTINS} es ps ts yuv idct idctclassic motion" +BUILTINS="${BUILTINS} es ps ts yuv idct idctclassic motion imdct downmix" case x$host_os in xmingw32msvc) @@ -3195,8 +3195,8 @@ else fi rm -f conftest* -echo $ac_n "checking if \$CC groks MMX EXT (SSE) inline assembly""... $ac_c" 1>&6 -echo "configure:3200: checking if \$CC groks MMX EXT (SSE) inline assembly" >&5 +echo $ac_n "checking if \$CC groks MMX EXT or SSE inline assembly""... $ac_c" 1>&6 +echo "configure:3200: checking if \$CC groks MMX EXT or SSE inline assembly" >&5 cat > conftest.$ac_ext <&5; (eval $ac_compile) 2>&5; }; then rm -rf conftest* - ACCEL_PLUGINS="${ACCEL_PLUGINS} idctmmxext motionmmxext" + ACCEL_PLUGINS="${ACCEL_PLUGINS} idctmmxext motionmmxext imdctsse downmix3dn downmixsse" echo "$ac_t""yes" 1>&6 else echo "configure: failed program was:" >&5 diff --git a/configure.in b/configure.in index 2c380c1043..6b85cc1816 100644 --- a/configure.in +++ b/configure.in @@ -135,7 +135,7 @@ ARCH=${host_cpu} dnl dnl default modules dnl -BUILTINS="${BUILTINS} es ps ts yuv idct idctclassic motion" +BUILTINS="${BUILTINS} es ps ts yuv idct idctclassic motion imdct downmix" dnl dnl Accelerated modules @@ -154,9 +154,9 @@ AC_TRY_COMPILE([void quux(){void *p;asm("packuswb %%mm1,%%mm2"::"r"(p));}],, ACCEL_PLUGINS="${ACCEL_PLUGINS} ${MMX_PLUGINS}" AC_MSG_RESULT(yes), AC_MSG_RESULT(no)) -AC_MSG_CHECKING([if \$CC groks MMX EXT (SSE) inline assembly]) +AC_MSG_CHECKING([if \$CC groks MMX EXT or SSE inline assembly]) AC_TRY_COMPILE([void quux(){void *p;asm("maskmovq %%mm1,%%mm2"::"r"(p));}],, - ACCEL_PLUGINS="${ACCEL_PLUGINS} idctmmxext motionmmxext" + ACCEL_PLUGINS="${ACCEL_PLUGINS} idctmmxext motionmmxext imdctsse downmix3dn downmixsse" AC_MSG_RESULT(yes), AC_MSG_RESULT(no)) dnl diff --git a/doc/vlc.1 b/doc/vlc.1 index 494508125f..3565198da8 100644 --- a/doc/vlc.1 +++ b/doc/vlc.1 @@ -52,6 +52,12 @@ Choose stereo or mono audio output. .B \-\-spdif Activate hardware AC3 pass-through mode. .TP +.B \-\-downmix +Specify a module for AC3 downmix: "downmix", "downmixsse", for instance. +.TP +.B \-\-imdct +Specify a module for AC3 IMDCT: "imdct", "imdctsse", for instance. +.TP .B \-\-novideo Disable video output. .TP @@ -145,11 +151,13 @@ also accepts a lot of parameters to customize its behaviour. vlc_channels= channels list .TP .B Audio parameters: - vlc_aout= audio method - vlc_dsp= dsp device path - vlc_stereo={1|0} stereo or mono output - vlc_spdif={1|0} AC3 pass-through mode - vlc_audio_rate= output rate + vlc_aout= audio method + vlc_dsp= dsp device path + vlc_stereo={1|0} stereo or mono output + vlc_spdif={1|0} AC3 pass-through mode + vlc_downmix= AC3 downmix method + vlc_imdct= AC3 IMDCT method + vlc_audio_rate= output rate .TP .B Video parameters: vlc_vout= display method @@ -160,6 +168,7 @@ also accepts a lot of parameters to customize its behaviour. vlc_grayscale={1|0} grayscale or color vlc_fullscreen={1|0} full screen vlc_overlay={1|0} overlay + vlc_motion= motion compensation method vlc_idct= IDCT method vlc_yuv= YUV method vlc_synchro={I|I+|IP|IP+|IPB} synchro algorithm diff --git a/include/ac3_downmix.h b/include/ac3_downmix.h new file mode 100644 index 0000000000..8dede4cfdd --- /dev/null +++ b/include/ac3_downmix.h @@ -0,0 +1,42 @@ +/***************************************************************************** + * ac3_downmix.h : AC3 downmix types + ***************************************************************************** + * Copyright (C) 1999, 2000 VideoLAN + * $Id: ac3_downmix.h,v 1.3 2001/05/15 16:19:42 sam Exp $ + * + * Authors: Michel Kaempf + * Renaud Dartus + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA. + *****************************************************************************/ + +typedef struct dm_par_s { + float unit; + float clev; + float slev; +} dm_par_t; + +typedef struct downmix_s { + /* Module used and shortcuts */ + struct module_s * p_module; + void (*pf_downmix_3f_2r_to_2ch)(float *, dm_par_t * dm_par); + void (*pf_downmix_3f_1r_to_2ch)(float *, dm_par_t * dm_par); + void (*pf_downmix_2f_2r_to_2ch)(float *, dm_par_t * dm_par); + void (*pf_downmix_2f_1r_to_2ch)(float *, dm_par_t * dm_par); + void (*pf_downmix_3f_0r_to_2ch)(float *, dm_par_t * dm_par); + void (*pf_stream_sample_2ch_to_s16)(s16 *, float *left, float *right); + void (*pf_stream_sample_1ch_to_s16)(s16 *, float *center); +} downmix_t; + diff --git a/include/ac3_imdct.h b/include/ac3_imdct.h new file mode 100644 index 0000000000..4720653c15 --- /dev/null +++ b/include/ac3_imdct.h @@ -0,0 +1,68 @@ +/***************************************************************************** + * ac3_imdct.h : AC3 IMDCT types + ***************************************************************************** + * Copyright (C) 1999, 2000 VideoLAN + * $Id: ac3_imdct.h,v 1.3 2001/05/15 16:19:42 sam Exp $ + * + * Authors: Michel Kaempf + * Renaud Dartus + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA. + *****************************************************************************/ + +typedef struct complex_s { + float real; + float imag; +} complex_t; + +#define N 512 + +typedef struct imdct_s +{ + complex_t buf[N/4]; + + /* Delay buffer for time domain interleaving */ + float delay[6][256]; + float delay1[6][256]; + + /* Twiddle factors for IMDCT */ + float xcos1[N/4]; + float xsin1[N/4]; + float xcos2[N/8]; + float xsin2[N/8]; + + /* Twiddle factor LUT */ + complex_t *w[7]; + complex_t w_1[1]; + complex_t w_2[2]; + complex_t w_4[4]; + complex_t w_8[8]; + complex_t w_16[16]; + complex_t w_32[32]; + complex_t w_64[64]; + + float xcos_sin_sse[128 * 4] __attribute__((aligned(16))); + + /* Module used and shortcuts */ + struct module_s * p_module; + void (*pf_imdct_init) (struct imdct_s *); + //void (*pf_fft_64p) (complex_t *a); + void (*pf_imdct_256)(struct imdct_s *, float data[], float delay[]); + void (*pf_imdct_256_nol)(struct imdct_s *, float data[], float delay[]); + void (*pf_imdct_512)(struct imdct_s *, float data[], float delay[]); + void (*pf_imdct_512_nol)(struct imdct_s *, float data[], float delay[]); + +} imdct_t; + diff --git a/include/config.h.in b/include/config.h.in index 81b73b587b..94c04da15e 100644 --- a/include/config.h.in +++ b/include/config.h.in @@ -264,6 +264,12 @@ #define AOUT_SPDIF_VAR "vlc_spdif" #define AOUT_SPDIF_DEFAULT 0 +/* Environment variable containing the AC3 downmix method */ +#define DOWNMIX_METHOD_VAR "vlc_downmix" + +/* Environment variable containing the AC3 IMDCT method */ +#define IMDCT_METHOD_VAR "vlc_imdct" + /* Volume */ #define VOLUME_DEFAULT 512 #define VOLUME_STEP 128 diff --git a/include/modules.h b/include/modules.h index 212af24828..33c0cbbdc7 100644 --- a/include/modules.h +++ b/include/modules.h @@ -2,7 +2,7 @@ * modules.h : Module management functions. ***************************************************************************** * Copyright (C) 2001 VideoLAN - * $Id: modules.h,v 1.23 2001/05/06 04:32:02 sam Exp $ + * $Id: modules.h,v 1.24 2001/05/15 16:19:42 sam Exp $ * * Authors: Samuel Hocevar * @@ -65,16 +65,19 @@ typedef void * module_handle_t; #define MODULE_CAPABILITY_DECAPS 1 << 3 /* Decaps */ #define MODULE_CAPABILITY_ADEC 1 << 4 /* Audio decoder */ #define MODULE_CAPABILITY_VDEC 1 << 5 /* Video decoder */ -#define MODULE_CAPABILITY_MOTION 1 << 6 /* Video decoder */ +#define MODULE_CAPABILITY_MOTION 1 << 6 /* Motion compensation */ #define MODULE_CAPABILITY_IDCT 1 << 7 /* IDCT transformation */ #define MODULE_CAPABILITY_AOUT 1 << 8 /* Audio output */ #define MODULE_CAPABILITY_VOUT 1 << 9 /* Video output */ #define MODULE_CAPABILITY_YUV 1 << 10 /* YUV colorspace conversion */ -#define MODULE_CAPABILITY_AFX 1 << 11 /* Audio effects */ -#define MODULE_CAPABILITY_VFX 1 << 12 /* Video effects */ +#define MODULE_CAPABILITY_IMDCT 1 << 11 /* IMDCT transformation */ +#define MODULE_CAPABILITY_DOWNMIX 1 << 12 /* AC3 downmix */ /* FIXME: kludge */ struct input_area_s; +struct imdct_s; +struct complex_s; +struct dm_par_s; /* FIXME: not yet used */ typedef struct probedata_s @@ -190,6 +193,35 @@ typedef struct function_list_s void ( * pf_end ) ( struct vout_thread_s * ); } yuv; + /* IMDCT plugin */ + struct + { + void ( * pf_imdct_init ) ( struct imdct_s * ); + void ( * pf_imdct_256 ) ( struct imdct_s *, + float data[], float delay[] ); + void ( * pf_imdct_256_nol )( struct imdct_s *, + float data[], float delay[] ); + void ( * pf_imdct_512 ) ( struct imdct_s *, + float data[], float delay[] ); + void ( * pf_imdct_512_nol )( struct imdct_s *, + float data[], float delay[] ); +// void ( * pf_fft_64p ) ( struct complex_s * ); + + } imdct; + + /* AC3 downmix plugin */ + struct + { + void ( * pf_downmix_3f_2r_to_2ch ) ( float *, struct dm_par_s * ); + void ( * pf_downmix_3f_1r_to_2ch ) ( float *, struct dm_par_s * ); + void ( * pf_downmix_2f_2r_to_2ch ) ( float *, struct dm_par_s * ); + void ( * pf_downmix_2f_1r_to_2ch ) ( float *, struct dm_par_s * ); + void ( * pf_downmix_3f_0r_to_2ch ) ( float *, struct dm_par_s * ); + void ( * pf_stream_sample_2ch_to_s16 ) ( s16 *, float *, float * ); + void ( * pf_stream_sample_1ch_to_s16 ) ( s16 *, float * ); + + } downmix; + } functions; } function_list_t; @@ -208,8 +240,8 @@ typedef struct module_functions_s function_list_t aout; function_list_t vout; function_list_t yuv; - function_list_t afx; - function_list_t vfx; + function_list_t imdct; + function_list_t downmix; } module_functions_t; diff --git a/plugins/downmix/.cvsignore b/plugins/downmix/.cvsignore new file mode 100644 index 0000000000..63e7180a26 --- /dev/null +++ b/plugins/downmix/.cvsignore @@ -0,0 +1 @@ +.dep diff --git a/plugins/downmix/Makefile b/plugins/downmix/Makefile new file mode 100644 index 0000000000..deb95734a8 --- /dev/null +++ b/plugins/downmix/Makefile @@ -0,0 +1,63 @@ +############################################################################### +# vlc (VideoLAN Client) downmix module makefile +# (c)2001 VideoLAN +############################################################################### + +# +# Objects +# + +PLUGIN_DOWNMIX = downmix.o ac3_downmix_c.o +PLUGIN_DOWNMIXSSE = downmixsse.o ac3_downmix_sse.o +PLUGIN_DOWNMIX3DN = downmix3dn.o ac3_downmix_3dn.o + +BUILTIN_DOWNMIX = $(PLUGIN_DOWNMIX:%.o=BUILTIN_DOWNMIX_%.o) +BUILTIN_DOWNMIXSSE = $(PLUGIN_DOWNMIXSSE:%.o=BUILTIN_DOWNMIXSSE_%.o) +BUILTIN_DOWNMIX3DN = $(PLUGIN_DOWNMIX3DN:%.o=BUILTIN_DOWNMIX3DN_%.o) + +PLUGIN_C = $(PLUGIN_DOWNMIX) $(PLUGIN_DOWNMIXSSE) $(PLUGIN_DOWNMIX3DN) +ALL_OBJ = $(PLUGIN_C) $(BUILTIN_DOWNMIX) $(BUILTIN_DOWNMIXSSE) $(BUILTIN_DOWNMIX3DN) + +# +# Virtual targets +# + +include ../../Makefile.modules + +$(BUILTIN_DOWNMIX): BUILTIN_DOWNMIX_%.o: .dep/%.d +$(BUILTIN_DOWNMIX): BUILTIN_DOWNMIX_%.o: %.c + $(CC) $(CFLAGS) -DBUILTIN -DMODULE_NAME=downmix -c -o $@ $< + +$(BUILTIN_DOWNMIXSSE): BUILTIN_DOWNMIXSSE_%.o: .dep/%.d +$(BUILTIN_DOWNMIXSSE): BUILTIN_DOWNMIXSSE_%.o: %.c + $(CC) $(CFLAGS) -DBUILTIN -DMODULE_NAME=downmixsse -c -o $@ $< + +$(BUILTIN_DOWNMIX3DN): BUILTIN_DOWNMIX3DN_%.o: .dep/%.d +$(BUILTIN_DOWNMIX3DN): BUILTIN_DOWNMIX3DN_%.o: %.c + $(CC) $(CFLAGS) -DBUILTIN -DMODULE_NAME=downmix3dn -c -o $@ $< + +# +# Real targets +# + +../../lib/downmix.so: $(PLUGIN_DOWNMIX) + $(CC) $(PCFLAGS) -o $@ $^ $(PLCFLAGS) + +../../lib/downmix.a: $(BUILTIN_DOWNMIX) + ar r $@ $^ + $(RANLIB) $@ + +../../lib/downmixsse.so: $(PLUGIN_DOWNMIXSSE) + $(CC) $(PCFLAGS) -o $@ $^ $(PLCFLAGS) + +../../lib/downmixsse.a: $(BUILTIN_DOWNMIXSSE) + ar r $@ $^ + $(RANLIB) $@ + +../../lib/downmix3dn.so: $(PLUGIN_DOWNMIX3DN) + $(CC) $(PCFLAGS) -o $@ $^ $(PLCFLAGS) + +../../lib/downmix3dn.a: $(BUILTIN_DOWNMIX3DN) + ar r $@ $^ + $(RANLIB) $@ + diff --git a/plugins/downmix/ac3_downmix_3dn.c b/plugins/downmix/ac3_downmix_3dn.c new file mode 100644 index 0000000000..fa8e7f0f7c --- /dev/null +++ b/plugins/downmix/ac3_downmix_3dn.c @@ -0,0 +1,306 @@ +/***************************************************************************** + * ac3_downmix_3dn.c: accelerated 3D Now! ac3 downmix functions + ***************************************************************************** + * Copyright (C) 1999, 2000, 2001 VideoLAN + * $Id: ac3_downmix_3dn.c,v 1.1 2001/05/15 16:19:42 sam Exp $ + * + * Authors: Renaud Dartus + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA. + *****************************************************************************/ + +#define MODULE_NAME downmix3dn +#include "modules_inner.h" + +/***************************************************************************** + * Preamble + *****************************************************************************/ +#include "defs.h" + +#include "config.h" +#include "common.h" +#include "threads.h" +#include "mtime.h" +#include "tests.h" + +#include "ac3_downmix.h" + +void sqrt2_3dn (void) +{ + __asm__ (".float 0f0.7071068"); +} + +void _M( downmix_3f_2r_to_2ch ) (float * samples, dm_par_t * dm_par) +{ + __asm__ __volatile__ ( + "pushl %%ecx\n" + "movl $128, %%ecx\n" /* loop counter */ + + "movd (%%ebx), %%mm5\n" /* unit */ + "punpckldq %%mm5, %%mm5\n" /* unit | unit */ + + "movd 4(%%ebx), %%mm6\n" /* clev */ + "punpckldq %%mm6, %%mm6\n" /* clev | clev */ + + "movd 8(%%ebx), %%mm7\n" /* slev */ + "punpckldq %%mm7, %%mm7\n" /* slev | slev */ + +".loop:\n" + "movq (%%eax), %%mm0\n" /* left */ + "movq 2048(%%eax), %%mm1\n" /* right */ + "movq 1024(%%eax), %%mm2\n" /* center */ + "movq 3072(%%eax), %%mm3\n" /* leftsur */ + "movq 4096(%%eax), %%mm4\n" /* rightsur */ + "pfmul %%mm5, %%mm0\n" + "pfmul %%mm5, %%mm1\n" + "pfmul %%mm6, %%mm2\n" + "pfadd %%mm2, %%mm0\n" + "pfadd %%mm2, %%mm1\n" + "pfmul %%mm7, %%mm3\n" + "pfmul %%mm7, %%mm4\n" + "pfadd %%mm3, %%mm0\n" + "pfadd %%mm4, %%mm1\n" + + "movq %%mm0, (%%eax)\n" + "movq %%mm1, 1024(%%eax)\n" + + "addl $8, %%eax\n" + "decl %%ecx\n" + "jnz .loop\n" + + "popl %%ecx\n" + "femms\n" + : "=a" (samples) + : "a" (samples), "b" (dm_par)); +} + +void _M( downmix_2f_2r_to_2ch ) (float *samples, dm_par_t * dm_par) +{ + __asm__ __volatile__ ( + "pushl %%ecx\n" + "movl $128, %%ecx\n" /* loop counter */ + + "movd (%%ebx), %%mm5\n" /* unit */ + "punpckldq %%mm5, %%mm5\n" /* unit | unit */ + + "movd 8(%%ebx), %%mm7\n" /* slev */ + "punpckldq %%mm7, %%mm7\n" /* slev | slev */ + +".loop3:\n" + "movq (%%eax), %%mm0\n" /* left */ + "movq 1024(%%eax), %%mm1\n" /* right */ + "movq 2048(%%eax), %%mm3\n" /* leftsur */ + "movq 3072(%%eax), %%mm4\n" /* rightsur */ + "pfmul %%mm5, %%mm0\n" + "pfmul %%mm5, %%mm1\n" + "pfmul %%mm7, %%mm3\n" + "pfmul %%mm7, %%mm4\n" + "pfadd %%mm3, %%mm0\n" + "pfadd %%mm4, %%mm1\n" + + "movq %%mm0, (%%eax)\n" + "movq %%mm1, 1024(%%eax)\n" + + "addl $8, %%eax\n" + "decl %%ecx\n" + "jnz .loop3\n" + + "popl %%ecx\n" + "femms\n" + : "=a" (samples) + : "a" (samples), "b" (dm_par)); +} + +void _M( downmix_3f_1r_to_2ch ) (float *samples, dm_par_t * dm_par) +{ + __asm__ __volatile__ ( + + "pushl %%ecx\n" + "movl $128, %%ecx\n" /* loop counter */ + + "movd (%%ebx), %%mm5\n" /* unit */ + "punpckldq %%mm5, %%mm5\n" /* unit | unit */ + + "movd 4(%%ebx), %%mm6\n" /* clev */ + "punpckldq %%mm6, %%mm6\n" /* clev | clev */ + + "movd 8(%%ebx), %%mm7\n" /* slev */ + "punpckldq %%mm7, %%mm7\n" /* slev | slev */ + +".loop4:\n" + "movq (%%eax), %%mm0\n" /* left */ + "movq 2048(%%eax), %%mm1\n" /* right */ + "movq 1024(%%eax), %%mm2\n" /* center */ + "movq 3072(%%eax), %%mm3\n" /* sur */ + "pfmul %%mm5, %%mm0\n" + "pfmul %%mm5, %%mm1\n" + "pfmul %%mm6, %%mm2\n" + "pfadd %%mm2, %%mm0\n" + "pfmul %%mm7, %%mm3\n" + "pfadd %%mm2, %%mm1\n" + "pfsub %%mm3, %%mm0\n" + "pfadd %%mm3, %%mm1\n" + + "movq %%mm0, (%%eax)\n" + "movq %%mm1, 1024(%%eax)\n" + + "addl $8, %%eax\n" + "decl %%ecx\n" + "jnz .loop4\n" + + "popl %%ecx\n" + "femms\n" + : "=a" (samples) + : "a" (samples), "b" (dm_par)); +} + +void _M( downmix_2f_1r_to_2ch ) (float *samples, dm_par_t * dm_par) +{ + __asm__ __volatile__ ( + "pushl %%ecx\n" + "movl $128, %%ecx\n" /* loop counter */ + + "movd (%%ebx), %%mm5\n" /* unit */ + "punpckldq %%mm5, %%mm5\n" /* unit | unit */ + + "movd 8(%%ebx), %%mm7\n" /* slev */ + "punpckldq %%mm7, %%mm7\n" /* slev | slev */ + +".loop5:\n" + "movq (%%eax), %%mm0\n" /* left */ + "movq 1024(%%eax), %%mm1\n" /* right */ + "movq 2048(%%eax), %%mm3\n" /* sur */ + "pfmul %%mm5, %%mm0\n" + "pfmul %%mm5, %%mm1\n" + "pfmul %%mm7, %%mm3\n" + "pfsub %%mm3, %%mm0\n" + "pfadd %%mm3, %%mm1\n" + + "movq %%mm0, (%%eax)\n" + "movq %%mm1, 1024(%%eax)\n" + + "addl $8, %%eax\n" + "decl %%ecx\n" + "jnz .loop5\n" + + "popl %%ecx\n" + "femms\n" + : "=a" (samples) + : "a" (samples), "b" (dm_par)); +} + +void _M( downmix_3f_0r_to_2ch ) (float *samples, dm_par_t * dm_par) +{ + __asm__ __volatile__ ( + "pushl %%ecx\n" + "movl $128, %%ecx\n" /* loop counter */ + + "movd (%%ebx), %%mm5\n" /* unit */ + "punpckldq %%mm5, %%mm5\n" /* unit | unit */ + + "movd 4(%%ebx), %%mm6\n" /* clev */ + "punpckldq %%mm6, %%mm6\n" /* clev | clev */ + +".loop6:\n" + "movq (%%eax), %%mm0\n" /*left */ + "movq 2048(%%eax), %%mm1\n" /* right */ + "movq 1024(%%eax), %%mm2\n" /* center */ + "pfmul %%mm5, %%mm0\n" + "pfmul %%mm5, %%mm1\n" + "pfmul %%mm6, %%mm2\n" + "pfadd %%mm2, %%mm0\n" + "pfadd %%mm2, %%mm1\n" + + "movq %%mm0, (%%eax)\n" + "movq %%mm1, 1024(%%eax)\n" + + "addl $8, %%eax\n" + "decl %%ecx\n" + "jnz .loop6\n" + + "popl %%ecx\n" + "femms\n" + : "=a" (samples) + : "a" (samples), "b" (dm_par)); +} + +void _M( stream_sample_1ch_to_s16 ) (s16 *s16_samples, float *left) +{ + __asm__ __volatile__ ( + "pushl %%ecx\n" + "pushl %%edx\n" + + "movl $sqrt2_3dn, %%edx\n" + "movd (%%edx), %%mm7\n" + "punpckldq %%mm7, %%mm7\n" /* sqrt2 | sqrt2 */ + "movl $128, %%ecx\n" + +".loop2:\n" + "movq (%%ebx), %%mm0\n" /* c1 | c0 */ + "pfmul %%mm7, %%mm0\n" + + "pf2id %%mm0, %%mm0\n" /* c1 c0 --> mm0, int_32 */ + + "packssdw %%mm0, %%mm0\n" /* c1 c1 c0 c0 --> mm0, int_16 */ + + "movq %%mm0, (%%eax)\n" + "addl $8, %%eax\n" + "addl $8, %%ebx\n" + + "decl %%ecx\n" + "jnz .loop2\n" + + "popl %%edx\n" + "popl %%ecx\n" + "femms\n" + : "=a" (s16_samples), "=b" (left) + : "a" (s16_samples), "b" (left)); +} + +void _M( stream_sample_2ch_to_s16 ) (s16 *s16_samples, float *left, float *right) +{ + + __asm__ __volatile__ ( + "pushl %%ecx\n" + "movl $128, %%ecx\n" + +".loop1:\n" + "movq (%%ebx), %%mm0\n" /* l1 | l0 */ + "movq (%%edx), %%mm1\n" /* r1 | r0 */ + "movq %%mm0, %%mm2\n" /* l1 | l0 */ + "punpckldq %%mm1, %%mm0\n" /* r0 | l0 */ + "punpckhdq %%mm1, %%mm2\n" /* r1 | l1 */ + + "pf2id %%mm0, %%mm0\n" /* r0 l0 --> mm0, int_32 */ + "pf2id %%mm2, %%mm2\n" /* r0 l0 --> mm0, int_32 */ + + "packssdw %%mm2, %%mm0\n" /* r1 l1 r0 l0 --> mm0, int_16 */ + + "movq %%mm0, (%%eax)\n" + "movq %%mm2, 8(%%eax)\n" + "addl $8, %%eax\n" + "addl $8, %%ebx\n" + "addl $8, %%edx\n" + + "decl %%ecx\n" + "jnz .loop1\n" + + "popl %%ecx\n" + "femms\n" + : "=a" (s16_samples), "=b" (left), "=d" (right) + : "a" (s16_samples), "b" (left), "d" (right)); + +} + diff --git a/src/ac3_decoder/ac3_downmix_c.c b/plugins/downmix/ac3_downmix_c.c similarity index 82% rename from src/ac3_decoder/ac3_downmix_c.c rename to plugins/downmix/ac3_downmix_c.c index 08573af563..759933d6a7 100644 --- a/src/ac3_decoder/ac3_downmix_c.c +++ b/plugins/downmix/ac3_downmix_c.c @@ -1,8 +1,8 @@ /***************************************************************************** - * ac3_downmix_c.c: ac3 downmix functions + * ac3_downmix_c.c: ac3 downmix functions in C ***************************************************************************** * Copyright (C) 1999, 2000, 2001 VideoLAN - * $Id: ac3_downmix_c.c,v 1.8 2001/05/14 15:58:04 reno Exp $ + * $Id: ac3_downmix_c.c,v 1.1 2001/05/15 16:19:42 sam Exp $ * * Authors: Renaud Dartus * Aaron Holtzman @@ -22,6 +22,12 @@ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA. *****************************************************************************/ +#define MODULE_NAME downmix +#include "modules_inner.h" + +/***************************************************************************** + * Preamble + *****************************************************************************/ #include "defs.h" #include /* memcpy() */ @@ -31,12 +37,9 @@ #include "threads.h" #include "mtime.h" -#include "stream_control.h" -#include "input_ext-dec.h" +#include "ac3_downmix.h" -#include "ac3_decoder.h" - -void downmix_3f_2r_to_2ch_c (float *samples, dm_par_t *dm_par) +void _M( downmix_3f_2r_to_2ch ) (float *samples, dm_par_t *dm_par) { int i; float *left, *right, *center, *left_sur, *right_sur; @@ -56,7 +59,7 @@ void downmix_3f_2r_to_2ch_c (float *samples, dm_par_t *dm_par) } } -void downmix_2f_2r_to_2ch_c (float *samples, dm_par_t *dm_par) +void _M( downmix_2f_2r_to_2ch ) (float *samples, dm_par_t *dm_par) { int i; float *left, *right, *left_sur, *right_sur; @@ -75,7 +78,7 @@ void downmix_2f_2r_to_2ch_c (float *samples, dm_par_t *dm_par) } } -void downmix_3f_1r_to_2ch_c (float *samples, dm_par_t *dm_par) +void _M( downmix_3f_1r_to_2ch ) (float *samples, dm_par_t *dm_par) { int i; float *left, *right, *center, *right_sur; @@ -95,7 +98,7 @@ void downmix_3f_1r_to_2ch_c (float *samples, dm_par_t *dm_par) } -void downmix_2f_1r_to_2ch_c (float *samples, dm_par_t *dm_par) +void _M( downmix_2f_1r_to_2ch ) (float *samples, dm_par_t *dm_par) { int i; float *left, *right, *right_sur; @@ -114,7 +117,7 @@ void downmix_2f_1r_to_2ch_c (float *samples, dm_par_t *dm_par) } -void downmix_3f_0r_to_2ch_c (float *samples, dm_par_t *dm_par) +void _M( downmix_3f_0r_to_2ch ) (float *samples, dm_par_t *dm_par) { int i; float *left, *right, *center; @@ -133,7 +136,7 @@ void downmix_3f_0r_to_2ch_c (float *samples, dm_par_t *dm_par) } -void stream_sample_2ch_to_s16_c (s16 *out_buf, float *left, float *right) +void _M( stream_sample_2ch_to_s16 ) (s16 *out_buf, float *left, float *right) { int i; for (i=0; i < 256; i++) { @@ -143,7 +146,7 @@ void stream_sample_2ch_to_s16_c (s16 *out_buf, float *left, float *right) } -void stream_sample_1ch_to_s16_c (s16 *out_buf, float *center) +void _M( stream_sample_1ch_to_s16 ) (s16 *out_buf, float *center) { int i; float tmp; @@ -153,3 +156,4 @@ void stream_sample_1ch_to_s16_c (s16 *out_buf, float *center) *out_buf++ = tmp; } } + diff --git a/plugins/downmix/ac3_downmix_common.h b/plugins/downmix/ac3_downmix_common.h new file mode 100644 index 0000000000..305cdb462f --- /dev/null +++ b/plugins/downmix/ac3_downmix_common.h @@ -0,0 +1,32 @@ +/***************************************************************************** + * ac3_downmix_common.h: ac3 downmix functions headers + ***************************************************************************** + * Copyright (C) 1999, 2000, 2001 VideoLAN + * $Id: ac3_downmix_common.h,v 1.1 2001/05/15 16:19:42 sam Exp $ + * + * Authors: Renaud Dartus + * Aaron Holtzman + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA. + *****************************************************************************/ + +void _M( downmix_3f_2r_to_2ch ) ( float *, dm_par_t * ); +void _M( downmix_2f_2r_to_2ch ) ( float *, dm_par_t * ); +void _M( downmix_3f_1r_to_2ch ) ( float *, dm_par_t * ); +void _M( downmix_2f_1r_to_2ch ) ( float *, dm_par_t * ); +void _M( downmix_3f_0r_to_2ch ) ( float *, dm_par_t * ); +void _M( stream_sample_2ch_to_s16 ) ( s16 *, float *, float * ); +void _M( stream_sample_1ch_to_s16 ) ( s16 *, float * ); + diff --git a/plugins/downmix/ac3_downmix_sse.c b/plugins/downmix/ac3_downmix_sse.c new file mode 100644 index 0000000000..ce7ebc653b --- /dev/null +++ b/plugins/downmix/ac3_downmix_sse.c @@ -0,0 +1,315 @@ +/***************************************************************************** + * ac3_downmix_sse.c: accelerated SSE ac3 downmix functions + ***************************************************************************** + * Copyright (C) 1999, 2000, 2001 VideoLAN + * $Id: ac3_downmix_sse.c,v 1.1 2001/05/15 16:19:42 sam Exp $ + * + * Authors: Renaud Dartus + * Aaron Holtzman + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA. + *****************************************************************************/ + +#define MODULE_NAME downmixsse +#include "modules_inner.h" + +/***************************************************************************** + * Preamble + *****************************************************************************/ +#include "defs.h" + +#include "config.h" +#include "common.h" +#include "threads.h" +#include "mtime.h" +#include "tests.h" + +#include "ac3_downmix.h" + +void sqrt2_sse (void) +{ + __asm__ (".float 0f0.7071068"); +} + +void _M( downmix_3f_2r_to_2ch ) (float * samples, dm_par_t * dm_par) +{ + __asm__ __volatile__ ( + "pushl %%ecx\n" + "movl $64, %%ecx\n" /* loop counter */ + + "movss (%%ebx), %%xmm5\n" /* unit */ + "shufps $0, %%xmm5, %%xmm5\n" /* unit | unit | unit | unit */ + + "movss 4(%%ebx), %%xmm6\n" /* clev */ + "shufps $0, %%xmm6, %%xmm6\n" /* clev | clev | clev | clev */ + + "movss 8(%%ebx), %%xmm7\n" /* slev */ + "shufps $0, %%xmm7, %%xmm7\n" /* slev | slev | slev | slev */ + +".loop:\n" + "movups (%%eax), %%xmm0\n" /* left */ + "movups 2048(%%eax), %%xmm1\n" /* right */ + "movups 1024(%%eax), %%xmm2\n" /* center */ + "movups 3072(%%eax), %%xmm3\n" /* leftsur */ + "movups 4096(%%eax), %%xmm4\n" /* rithgsur */ + "mulps %%xmm5, %%xmm0\n" + "mulps %%xmm5, %%xmm1\n" + "mulps %%xmm6, %%xmm2\n" + "addps %%xmm2, %%xmm0\n" + "addps %%xmm2, %%xmm1\n" + "mulps %%xmm7, %%xmm3\n" + "mulps %%xmm7, %%xmm4\n" + "addps %%xmm3, %%xmm0\n" + "addps %%xmm4, %%xmm1\n" + + "movups %%xmm0, (%%eax)\n" + "movups %%xmm1, 1024(%%eax)\n" + + "addl $16, %%eax\n" + "decl %%ecx\n" + "jnz .loop\n" + + "popl %%ecx\n" + : "=a" (samples) + : "a" (samples), "b" (dm_par)); +} + +void _M( downmix_2f_2r_to_2ch ) (float *samples, dm_par_t * dm_par) +{ + __asm__ __volatile__ ( + "pushl %%ecx\n" + "movl $64, %%ecx\n" /* loop counter */ + + "movss (%%ebx), %%xmm5\n" /* unit */ + "shufps $0, %%xmm5, %%xmm5\n" /* unit | unit | unit | unit */ + + "movss 8(%%ebx), %%xmm7\n" /* slev */ + "shufps $0, %%xmm7, %%xmm7\n" /* slev | slev | slev | slev */ + +".loop3:\n" + "movups (%%eax), %%xmm0\n" /* left */ + "movups 1024(%%eax), %%xmm1\n" /* right */ + "movups 2048(%%eax), %%xmm3\n" /* leftsur */ + "movups 3072(%%eax), %%xmm4\n" /* rightsur */ + "mulps %%xmm5, %%xmm0\n" + "mulps %%xmm5, %%xmm1\n" + "mulps %%xmm7, %%xmm3\n" + "mulps %%xmm7, %%xmm4\n" + "addps %%xmm3, %%xmm0\n" + "addps %%xmm4, %%xmm1\n" + + "movups %%xmm0, (%%eax)\n" + "movups %%xmm1, 1024(%%eax)\n" + + "addl $16, %%eax\n" + "decl %%ecx\n" + "jnz .loop3\n" + + "popl %%ecx\n" + : "=a" (samples) + : "a" (samples), "b" (dm_par)); +} + +void _M( downmix_3f_1r_to_2ch ) (float *samples, dm_par_t * dm_par) +{ + __asm__ __volatile__ ( + + "pushl %%ecx\n" + "movl $64, %%ecx\n" /* loop counter */ + + "movss (%%ebx), %%xmm5\n" /* unit */ + "shufps $0, %%xmm5, %%xmm5\n" /* unit | unit | unit | unit */ + + "movss 4(%%ebx), %%xmm6\n" /* clev */ + "shufps $0, %%xmm6, %%xmm6\n" /* clev | clev | clev | clev */ + + "movss 8(%%ebx), %%xmm7\n" /* slev */ + "shufps $0, %%xmm7, %%xmm7\n" /* slev | slev | slev | slev */ + +".loop4:\n" + "movups (%%eax), %%xmm0\n" /* left */ + "movups 2048(%%eax), %%xmm1\n" /* right */ + "movups 1024(%%eax), %%xmm2\n" /* center */ + "movups 3072(%%eax), %%xmm3\n" /* sur */ + "mulps %%xmm5, %%xmm0\n" + "mulps %%xmm5, %%xmm1\n" + "mulps %%xmm6, %%xmm2\n" + "addps %%xmm2, %%xmm0\n" + "mulps %%xmm7, %%xmm3\n" + "addps %%xmm2, %%xmm1\n" + "subps %%xmm3, %%xmm0\n" + "addps %%xmm3, %%xmm1\n" + + "movups %%xmm0, (%%eax)\n" + "movups %%xmm1, 1024(%%eax)\n" + + "addl $16, %%eax\n" + "decl %%ecx\n" + "jnz .loop4\n" + + "popl %%ecx\n" + : "=a" (samples) + : "a" (samples), "b" (dm_par)); + +} + +void _M( downmix_2f_1r_to_2ch ) (float *samples, dm_par_t * dm_par) +{ + __asm__ __volatile__ ( + "pushl %%ecx\n" + "movl $64, %%ecx\n" /* loop counter */ + + "movss (%%ebx), %%xmm5\n" /* unit */ + "shufps $0, %%xmm5, %%xmm5\n" /* unit | unit | unit | unit */ + + "movss 8(%%ebx), %%xmm7\n" /* slev */ + "shufps $0, %%xmm7, %%xmm7\n" /* slev | slev | slev | slev */ + +".loop5:\n" + "movups (%%eax), %%xmm0\n" /* left */ + "movups 1024(%%eax), %%xmm1\n" /* right */ + "movups 2048(%%eax), %%xmm3\n" /* sur */ + "mulps %%xmm5, %%xmm0\n" + "mulps %%xmm5, %%xmm1\n" + "mulps %%xmm7, %%xmm3\n" + "subps %%xmm3, %%xmm0\n" + "addps %%xmm3, %%xmm1\n" + + "movups %%xmm0, (%%eax)\n" + "movups %%xmm1, 1024(%%eax)\n" + + "addl $16, %%eax\n" + "decl %%ecx\n" + "jnz .loop5\n" + + "popl %%ecx\n" + : "=a" (samples) + : "a" (samples), "b" (dm_par)); + + +} + +void _M( downmix_3f_0r_to_2ch ) (float *samples, dm_par_t * dm_par) +{ + __asm__ __volatile__ ( + "pushl %%ecx\n" + "movl $64, %%ecx\n" /* loop counter */ + + "movss (%%ebx), %%xmm5\n" /* unit */ + "shufps $0, %%xmm5, %%xmm5\n" /* unit | unit | unit | unit */ + + "movss 4(%%ebx), %%xmm6\n" /* clev */ + "shufps $0, %%xmm6, %%xmm6\n" /* clev | clev | clev | clev */ + +".loop6:\n" + "movups (%%eax), %%xmm0\n" /*left */ + "movups 2048(%%eax), %%xmm1\n" /* right */ + "movups 1024(%%eax), %%xmm2\n" /* center */ + "mulps %%xmm5, %%xmm0\n" + "mulps %%xmm5, %%xmm1\n" + "mulps %%xmm6, %%xmm2\n" + "addps %%xmm2, %%xmm0\n" + "addps %%xmm2, %%xmm1\n" + + "movups %%xmm0, (%%eax)\n" + "movups %%xmm1, 1024(%%eax)\n" + + "addl $16, %%eax\n" + "decl %%ecx\n" + "jnz .loop6\n" + + "popl %%ecx\n" + : "=a" (samples) + : "a" (samples), "b" (dm_par)); +} + +void _M( stream_sample_1ch_to_s16 ) (s16 *s16_samples, float *left) +{ + __asm__ __volatile__ ( + "pushl %%ecx\n" + "pushl %%edx\n" + + "movl $sqrt2_sse, %%edx\n" + "movss (%%edx), %%xmm7\n" + "shufps $0, %%xmm7, %%xmm7\n" /* sqrt2 | sqrt2 | sqrt2 | sqrt2 */ + "movl $64, %%ecx\n" + +".loop2:\n" + "movups (%%ebx), %%xmm0\n" /* c3 | c2 | c1 | c0 */ + "mulps %%xmm7, %%xmm0\n" + "movhlps %%xmm0, %%xmm2\n" /* c3 | c2 */ + + "cvtps2pi %%xmm0, %%mm0\n" /* c1 c0 --> mm0, int_32 */ + "cvtps2pi %%xmm2, %%mm1\n" /* c3 c2 --> mm1, int_32 */ + + "packssdw %%mm0, %%mm0\n" /* c1 c1 c0 c0 --> mm0, int_16 */ + "packssdw %%mm1, %%mm1\n" /* c3 c3 c2 c2 --> mm1, int_16 */ + + "movq %%mm0, (%%eax)\n" + "movq %%mm1, 8(%%eax)\n" + "addl $16, %%eax\n" + "addl $16, %%ebx\n" + + "decl %%ecx\n" + "jnz .loop2\n" + + "popl %%edx\n" + "popl %%ecx\n" + "emms\n" + : "=a" (s16_samples), "=b" (left) + : "a" (s16_samples), "b" (left)); +} + +void _M( stream_sample_2ch_to_s16 ) (s16 *s16_samples, float *left, float *right) +{ + + __asm__ __volatile__ ( + "pushl %%ecx\n" + "movl $64, %%ecx\n" + +".loop1:\n" + "movups (%%ebx), %%xmm0\n" /* l3 | l2 | l1 | l0 */ + "movups (%%edx), %%xmm1\n" /* r3 | r2 | r1 | r0 */ + "movhlps %%xmm0, %%xmm2\n" /* l3 | l2 */ + "movhlps %%xmm1, %%xmm3\n" /* r3 | r2 */ + "unpcklps %%xmm1, %%xmm0\n" /* r1 | l1 | r0 | l0 */ + "unpcklps %%xmm3, %%xmm2\n" /* r3 | l3 | r2 | l2 */ + + "cvtps2pi %%xmm0, %%mm0\n" /* r0 l0 --> mm0, int_32 */ + "movhlps %%xmm0, %%xmm0\n" + "cvtps2pi %%xmm0, %%mm1\n" /* r1 l1 --> mm1, int_32 */ + "cvtps2pi %%xmm2, %%mm2\n" /* r2 l2 --> mm2, int_32 */ + "movhlps %%xmm2, %%xmm2\n" + "cvtps2pi %%xmm2, %%mm3\n" /* r3 l3 --> mm3, int_32 */ + + "packssdw %%mm1, %%mm0\n" /* r1 l1 r0 l0 --> mm0, int_16 */ + "packssdw %%mm3, %%mm2\n" /* r3 l3 r2 l2 --> mm2, int_16 */ + + "movq %%mm0, (%%eax)\n" + "movq %%mm2, 8(%%eax)\n" + "addl $16, %%eax\n" + "addl $16, %%ebx\n" + "addl $16, %%edx\n" + + "decl %%ecx\n" + "jnz .loop1\n" + + "popl %%ecx\n" + "emms\n" + : "=a" (s16_samples), "=b" (left), "=d" (right) + : "a" (s16_samples), "b" (left), "d" (right)); + +} + diff --git a/plugins/downmix/downmix.c b/plugins/downmix/downmix.c new file mode 100644 index 0000000000..675424b887 --- /dev/null +++ b/plugins/downmix/downmix.c @@ -0,0 +1,149 @@ +/***************************************************************************** + * downmix.c : AC3 downmix module + ***************************************************************************** + * Copyright (C) 1999, 2000 VideoLAN + * $Id: downmix.c,v 1.1 2001/05/15 16:19:42 sam Exp $ + * + * Authors: Gaël Hendryckx + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA. + *****************************************************************************/ + +#define MODULE_NAME downmix +#include "modules_inner.h" + +/***************************************************************************** + * Preamble + *****************************************************************************/ +#include "defs.h" + +#include + +#include "config.h" +#include "common.h" +#include "threads.h" +#include "mtime.h" +#include "tests.h" + +#include "ac3_downmix.h" +#include "ac3_downmix_common.h" + +#include "modules.h" + +/***************************************************************************** + * Local and extern prototypes. + *****************************************************************************/ +static void downmix_getfunctions( function_list_t * p_function_list ); +static int downmix_Probe ( probedata_t *p_data ); + +/***************************************************************************** + * Build configuration tree. + *****************************************************************************/ +MODULE_CONFIG_START +ADD_WINDOW( "Configuration for AC3 downmix module" ) + ADD_COMMENT( "Ha, ha -- nothing to configure yet" ) +MODULE_CONFIG_END + +/***************************************************************************** + * InitModule: get the module structure and configuration. + ***************************************************************************** + * We have to fill psz_name, psz_longname and psz_version. These variables + * will be strdup()ed later by the main application because the module can + * be unloaded later to save memory, and we want to be able to access this + * data even after the module has been unloaded. + *****************************************************************************/ +MODULE_INIT +{ + p_module->psz_name = MODULE_STRING; + p_module->psz_longname = "AC3 downmix module"; + p_module->psz_version = VERSION; + + p_module->i_capabilities = MODULE_CAPABILITY_NULL + | MODULE_CAPABILITY_DOWNMIX; + + return( 0 ); +} + +/***************************************************************************** + * ActivateModule: set the module to an usable state. + ***************************************************************************** + * This function fills the capability functions and the configuration + * structure. Once ActivateModule() has been called, the i_usage can + * be set to 0 and calls to NeedModule() be made to increment it. To unload + * the module, one has to wait until i_usage == 0 and call DeactivateModule(). + *****************************************************************************/ +MODULE_ACTIVATE +{ + p_module->p_functions = malloc( sizeof( module_functions_t ) ); + if( p_module->p_functions == NULL ) + { + return( -1 ); + } + + downmix_getfunctions( &p_module->p_functions->downmix ); + + p_module->p_config = p_config; + + return( 0 ); +} + +/***************************************************************************** + * DeactivateModule: make sure the module can be unloaded. + ***************************************************************************** + * This function must only be called when i_usage == 0. If it successfully + * returns, i_usage can be set to -1 and the module unloaded. Be careful to + * lock usage_lock during the whole process. + *****************************************************************************/ +MODULE_DEACTIVATE +{ + free( p_module->p_functions ); + + return( 0 ); +} + +/* Following functions are local */ + +/***************************************************************************** + * Functions exported as capabilities. They are declared as static so that + * we don't pollute the namespace too much. + *****************************************************************************/ +static void downmix_getfunctions( function_list_t * p_function_list ) +{ + p_function_list->pf_probe = downmix_Probe; +#define F p_function_list->functions.downmix + F.pf_downmix_3f_2r_to_2ch = _M( downmix_3f_2r_to_2ch ); + F.pf_downmix_3f_1r_to_2ch = _M( downmix_3f_1r_to_2ch ); + F.pf_downmix_2f_2r_to_2ch = _M( downmix_2f_2r_to_2ch ); + F.pf_downmix_2f_1r_to_2ch = _M( downmix_2f_1r_to_2ch ); + F.pf_downmix_3f_0r_to_2ch = _M( downmix_3f_0r_to_2ch ); + F.pf_stream_sample_2ch_to_s16 = _M( stream_sample_2ch_to_s16 ); + F.pf_stream_sample_1ch_to_s16 = _M( stream_sample_1ch_to_s16 ); +#undef F +} + +/***************************************************************************** + * downmix_Probe: returns a preference score + *****************************************************************************/ +static int downmix_Probe( probedata_t *p_data ) +{ + if( TestMethod( DOWNMIX_METHOD_VAR, "downmix" ) ) + { + return( 999 ); + } + + /* This plugin always works */ + return( 50 ); +} + diff --git a/plugins/downmix/downmix3dn.c b/plugins/downmix/downmix3dn.c new file mode 100644 index 0000000000..f05a8a78dc --- /dev/null +++ b/plugins/downmix/downmix3dn.c @@ -0,0 +1,154 @@ +/***************************************************************************** + * downmix3dn.c : accelerated 3D Now! AC3 downmix module + ***************************************************************************** + * Copyright (C) 1999, 2000 VideoLAN + * $Id: downmix3dn.c,v 1.1 2001/05/15 16:19:42 sam Exp $ + * + * Authors: Gaël Hendryckx + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA. + *****************************************************************************/ + +#define MODULE_NAME downmix3dn +#include "modules_inner.h" + +/***************************************************************************** + * Preamble + *****************************************************************************/ +#include "defs.h" + +#include + +#include "config.h" +#include "common.h" +#include "threads.h" +#include "mtime.h" +#include "tests.h" + +#include "ac3_downmix.h" +#include "ac3_downmix_common.h" + +#include "modules.h" + +/***************************************************************************** + * Local and extern prototypes. + *****************************************************************************/ +static void downmix_getfunctions( function_list_t * p_function_list ); +static int downmix_Probe ( probedata_t *p_data ); + +/***************************************************************************** + * Build configuration tree. + *****************************************************************************/ +MODULE_CONFIG_START +ADD_WINDOW( "Configuration for AC3 downmix3dn module" ) + ADD_COMMENT( "Ha, ha -- nothing to configure yet" ) +MODULE_CONFIG_END + +/***************************************************************************** + * InitModule: get the module structure and configuration. + ***************************************************************************** + * We have to fill psz_name, psz_longname and psz_version. These variables + * will be strdup()ed later by the main application because the module can + * be unloaded later to save memory, and we want to be able to access this + * data even after the module has been unloaded. + *****************************************************************************/ +MODULE_INIT +{ + p_module->psz_name = MODULE_STRING; + p_module->psz_longname = "3D Now! AC3 downmix module"; + p_module->psz_version = VERSION; + + p_module->i_capabilities = MODULE_CAPABILITY_NULL + | MODULE_CAPABILITY_DOWNMIX; + + return( 0 ); +} + +/***************************************************************************** + * ActivateModule: set the module to an usable state. + ***************************************************************************** + * This function fills the capability functions and the configuration + * structure. Once ActivateModule() has been called, the i_usage can + * be set to 0 and calls to NeedModule() be made to increment it. To unload + * the module, one has to wait until i_usage == 0 and call DeactivateModule(). + *****************************************************************************/ +MODULE_ACTIVATE +{ + p_module->p_functions = malloc( sizeof( module_functions_t ) ); + if( p_module->p_functions == NULL ) + { + return( -1 ); + } + + downmix_getfunctions( &p_module->p_functions->downmix ); + + p_module->p_config = p_config; + + return( 0 ); +} + +/***************************************************************************** + * DeactivateModule: make sure the module can be unloaded. + ***************************************************************************** + * This function must only be called when i_usage == 0. If it successfully + * returns, i_usage can be set to -1 and the module unloaded. Be careful to + * lock usage_lock during the whole process. + *****************************************************************************/ +MODULE_DEACTIVATE +{ + free( p_module->p_functions ); + + return( 0 ); +} + +/* Following functions are local */ + +/***************************************************************************** + * Functions exported as capabilities. They are declared as static so that + * we don't pollute the namespace too much. + *****************************************************************************/ +static void downmix_getfunctions( function_list_t * p_function_list ) +{ + p_function_list->pf_probe = downmix_Probe; +#define F p_function_list->functions.downmix + F.pf_downmix_3f_2r_to_2ch = _M( downmix_3f_2r_to_2ch ); + F.pf_downmix_3f_1r_to_2ch = _M( downmix_3f_1r_to_2ch ); + F.pf_downmix_2f_2r_to_2ch = _M( downmix_2f_2r_to_2ch ); + F.pf_downmix_2f_1r_to_2ch = _M( downmix_2f_1r_to_2ch ); + F.pf_downmix_3f_0r_to_2ch = _M( downmix_3f_0r_to_2ch ); + F.pf_stream_sample_2ch_to_s16 = _M( stream_sample_2ch_to_s16 ); + F.pf_stream_sample_1ch_to_s16 = _M( stream_sample_1ch_to_s16 ); +#undef F +} + +/***************************************************************************** + * downmix_Probe: returns a preference score + *****************************************************************************/ +static int downmix_Probe( probedata_t *p_data ) +{ + if( !TestCPU( CPU_CAPABILITY_3DNOW ) ) + { + return( 0 ); + } + + if( TestMethod( DOWNMIX_METHOD_VAR, "downmix3dn" ) ) + { + return( 999 ); + } + + /* This plugin always works */ + return( 200 ); +} + diff --git a/plugins/downmix/downmixsse.c b/plugins/downmix/downmixsse.c new file mode 100644 index 0000000000..34d2c172b6 --- /dev/null +++ b/plugins/downmix/downmixsse.c @@ -0,0 +1,154 @@ +/***************************************************************************** + * downmixsse.c : accelerated SSE AC3 downmix module + ***************************************************************************** + * Copyright (C) 1999, 2000 VideoLAN + * $Id: downmixsse.c,v 1.1 2001/05/15 16:19:42 sam Exp $ + * + * Authors: Gaël Hendryckx + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA. + *****************************************************************************/ + +#define MODULE_NAME downmixsse +#include "modules_inner.h" + +/***************************************************************************** + * Preamble + *****************************************************************************/ +#include "defs.h" + +#include + +#include "config.h" +#include "common.h" +#include "threads.h" +#include "mtime.h" +#include "tests.h" + +#include "ac3_downmix.h" +#include "ac3_downmix_common.h" + +#include "modules.h" + +/***************************************************************************** + * Local and extern prototypes. + *****************************************************************************/ +static void downmix_getfunctions( function_list_t * p_function_list ); +static int downmix_Probe ( probedata_t *p_data ); + +/***************************************************************************** + * Build configuration tree. + *****************************************************************************/ +MODULE_CONFIG_START +ADD_WINDOW( "Configuration for AC3 downmixsse module" ) + ADD_COMMENT( "Ha, ha -- nothing to configure yet" ) +MODULE_CONFIG_END + +/***************************************************************************** + * InitModule: get the module structure and configuration. + ***************************************************************************** + * We have to fill psz_name, psz_longname and psz_version. These variables + * will be strdup()ed later by the main application because the module can + * be unloaded later to save memory, and we want to be able to access this + * data even after the module has been unloaded. + *****************************************************************************/ +MODULE_INIT +{ + p_module->psz_name = MODULE_STRING; + p_module->psz_longname = "SSE AC3 downmix module"; + p_module->psz_version = VERSION; + + p_module->i_capabilities = MODULE_CAPABILITY_NULL + | MODULE_CAPABILITY_DOWNMIX; + + return( 0 ); +} + +/***************************************************************************** + * ActivateModule: set the module to an usable state. + ***************************************************************************** + * This function fills the capability functions and the configuration + * structure. Once ActivateModule() has been called, the i_usage can + * be set to 0 and calls to NeedModule() be made to increment it. To unload + * the module, one has to wait until i_usage == 0 and call DeactivateModule(). + *****************************************************************************/ +MODULE_ACTIVATE +{ + p_module->p_functions = malloc( sizeof( module_functions_t ) ); + if( p_module->p_functions == NULL ) + { + return( -1 ); + } + + downmix_getfunctions( &p_module->p_functions->downmix ); + + p_module->p_config = p_config; + + return( 0 ); +} + +/***************************************************************************** + * DeactivateModule: make sure the module can be unloaded. + ***************************************************************************** + * This function must only be called when i_usage == 0. If it successfully + * returns, i_usage can be set to -1 and the module unloaded. Be careful to + * lock usage_lock during the whole process. + *****************************************************************************/ +MODULE_DEACTIVATE +{ + free( p_module->p_functions ); + + return( 0 ); +} + +/* Following functions are local */ + +/***************************************************************************** + * Functions exported as capabilities. They are declared as static so that + * we don't pollute the namespace too much. + *****************************************************************************/ +static void downmix_getfunctions( function_list_t * p_function_list ) +{ + p_function_list->pf_probe = downmix_Probe; +#define F p_function_list->functions.downmix + F.pf_downmix_3f_2r_to_2ch = _M( downmix_3f_2r_to_2ch ); + F.pf_downmix_3f_1r_to_2ch = _M( downmix_3f_1r_to_2ch ); + F.pf_downmix_2f_2r_to_2ch = _M( downmix_2f_2r_to_2ch ); + F.pf_downmix_2f_1r_to_2ch = _M( downmix_2f_1r_to_2ch ); + F.pf_downmix_3f_0r_to_2ch = _M( downmix_3f_0r_to_2ch ); + F.pf_stream_sample_2ch_to_s16 = _M( stream_sample_2ch_to_s16 ); + F.pf_stream_sample_1ch_to_s16 = _M( stream_sample_1ch_to_s16 ); +#undef F +} + +/***************************************************************************** + * downmix_Probe: returns a preference score + *****************************************************************************/ +static int downmix_Probe( probedata_t *p_data ) +{ + if( !TestCPU( CPU_CAPABILITY_SSE ) ) + { + return( 0 ); + } + + if( TestMethod( DOWNMIX_METHOD_VAR, "downmixsse" ) ) + { + return( 999 ); + } + + /* This plugin always works */ + return( 200 ); +} + diff --git a/plugins/idct/idctaltivec.c b/plugins/idct/idctaltivec.c index d1b6f4ee2d..8a4e2ca8ef 100644 --- a/plugins/idct/idctaltivec.c +++ b/plugins/idct/idctaltivec.c @@ -2,7 +2,7 @@ * idctaltivec.c : Altivec IDCT module ***************************************************************************** * Copyright (C) 1999, 2000 VideoLAN - * $Id: idctaltivec.c,v 1.5 2001/05/06 04:32:02 sam Exp $ + * $Id: idctaltivec.c,v 1.6 2001/05/15 16:19:42 sam Exp $ * * Authors: Christophe Massiot * @@ -146,22 +146,18 @@ static void idct_getfunctions( function_list_t * p_function_list ) *****************************************************************************/ static int idct_Probe( probedata_t *p_data ) { - if( TestCPU( CPU_CAPABILITY_ALTIVEC ) ) + if( !TestCPU( CPU_CAPABILITY_ALTIVEC ) ) { - if( TestMethod( IDCT_METHOD_VAR, "idctaltivec" ) ) - { - return( 999 ); - } - else - { - /* The Altivec iDCT is deactivated until it really works */ - return( 0 /* 200 */ ); - } + return( 0 ); } - else + + if( TestMethod( IDCT_METHOD_VAR, "idctaltivec" ) ) { - return( 0 ); + return( 999 ); } + + /* The Altivec iDCT is deactivated until it really works */ + return( 0 /* 200 */ ); } /***************************************************************************** diff --git a/plugins/idct/idctmmx.c b/plugins/idct/idctmmx.c index c0b22526c6..acc3702347 100644 --- a/plugins/idct/idctmmx.c +++ b/plugins/idct/idctmmx.c @@ -2,7 +2,7 @@ * idctmmx.c : MMX IDCT module ***************************************************************************** * Copyright (C) 1999, 2000 VideoLAN - * $Id: idctmmx.c,v 1.10 2001/05/06 04:32:02 sam Exp $ + * $Id: idctmmx.c,v 1.11 2001/05/15 16:19:42 sam Exp $ * * Authors: Aaron Holtzman * Michel Lespinasse @@ -151,21 +151,17 @@ static void idct_getfunctions( function_list_t * p_function_list ) *****************************************************************************/ static int idct_Probe( probedata_t *p_data ) { - if( TestCPU( CPU_CAPABILITY_MMX ) ) + if( !TestCPU( CPU_CAPABILITY_MMX ) ) { - if( TestMethod( IDCT_METHOD_VAR, "idctmmx" ) ) - { - return( 999 ); - } - else - { - return( 150 ); - } + return( 0 ); } - else + + if( TestMethod( IDCT_METHOD_VAR, "idctmmx" ) ) { - return( 0 ); + return( 999 ); } + + return( 150 ); } /***************************************************************************** diff --git a/plugins/idct/idctmmxext.c b/plugins/idct/idctmmxext.c index f8a281d61c..e2c2e6688e 100644 --- a/plugins/idct/idctmmxext.c +++ b/plugins/idct/idctmmxext.c @@ -2,7 +2,7 @@ * idctmmxext.c : MMX EXT IDCT module ***************************************************************************** * Copyright (C) 1999, 2000 VideoLAN - * $Id: idctmmxext.c,v 1.7 2001/05/06 04:32:02 sam Exp $ + * $Id: idctmmxext.c,v 1.8 2001/05/15 16:19:42 sam Exp $ * * Authors: Aaron Holtzman * Michel Lespinasse @@ -151,21 +151,18 @@ static void idct_getfunctions( function_list_t * p_function_list ) *****************************************************************************/ static int idct_Probe( probedata_t *p_data ) { - if( TestCPU( CPU_CAPABILITY_MMXEXT ) ) + if( !TestCPU( CPU_CAPABILITY_MMXEXT ) ) { - if( TestMethod( IDCT_METHOD_VAR, "idctmmxext" ) ) - { - return( 999 ); - } - else - { - return( 200 ); - } + return( 0 ); } - else + + if( TestMethod( IDCT_METHOD_VAR, "idctmmxext" ) ) { - return( 0 ); + return( 999 ); } + + return( 200 ); + } /***************************************************************************** diff --git a/plugins/imdct/.cvsignore b/plugins/imdct/.cvsignore new file mode 100644 index 0000000000..63e7180a26 --- /dev/null +++ b/plugins/imdct/.cvsignore @@ -0,0 +1 @@ +.dep diff --git a/plugins/imdct/Makefile b/plugins/imdct/Makefile new file mode 100644 index 0000000000..330287c9b6 --- /dev/null +++ b/plugins/imdct/Makefile @@ -0,0 +1,53 @@ +############################################################################### +# vlc (VideoLAN Client) imdct module makefile +# (c)2001 VideoLAN +############################################################################### + +# +# Objects +# + +PLUGIN_IMDCT = imdct.o ac3_imdct_c.o ac3_srfft_c.o +PLUGIN_IMDCTSSE = imdctsse.o ac3_imdct_sse.o ac3_srfft_sse.o +PLUGIN_IMDCTCOMMON = ac3_imdct_common.o + +BUILTIN_IMDCT = $(PLUGIN_IMDCT:%.o=BUILTIN_IMDCT_%.o) \ + $(PLUGIN_IMDCTCOMMON:%.o=BUILTIN_IMDCT_%.o) +BUILTIN_IMDCTSSE = $(PLUGIN_IMDCTSSE:%.o=BUILTIN_IMDCTSSE_%.o) \ + $(PLUGIN_IMDCTCOMMON:%.o=BUILTIN_IMDCTSSE_%.o) + +PLUGIN_C = $(PLUGIN_IMDCT) $(PLUGIN_IMDCTSSE) $(PLUGIN_IMDCTCOMMON) +ALL_OBJ = $(PLUGIN_C) $(BUILTIN_IMDCT) $(BUILTIN_IMDCTSSE) + +# +# Virtual targets +# + +include ../../Makefile.modules + +$(BUILTIN_IMDCT): BUILTIN_IMDCT_%.o: .dep/%.d +$(BUILTIN_IMDCT): BUILTIN_IMDCT_%.o: %.c + $(CC) $(CFLAGS) -DBUILTIN -DMODULE_NAME=imdct -c -o $@ $< + +$(BUILTIN_IMDCTSSE): BUILTIN_IMDCTSSE_%.o: .dep/%.d +$(BUILTIN_IMDCTSSE): BUILTIN_IMDCTSSE_%.o: %.c + $(CC) $(CFLAGS) -DBUILTIN -DMODULE_NAME=imdctsse -c -o $@ $< + +# +# Real targets +# + +../../lib/imdct.so: $(PLUGIN_IMDCT) $(PLUGIN_IMDCTCOMMON) + $(CC) $(PCFLAGS) -o $@ $^ $(PLCFLAGS) + +../../lib/imdct.a: $(BUILTIN_IMDCT) + ar r $@ $^ + $(RANLIB) $@ + +../../lib/imdctsse.so: $(PLUGIN_IMDCTSSE) $(PLUGIN_IMDCTCOMMON) + $(CC) $(PCFLAGS) -o $@ $^ $(PLCFLAGS) + +../../lib/imdctsse.a: $(BUILTIN_IMDCTSSE) + ar r $@ $^ + $(RANLIB) $@ + diff --git a/plugins/imdct/ac3_imdct_c.c b/plugins/imdct/ac3_imdct_c.c new file mode 100644 index 0000000000..3ebf16c9b3 --- /dev/null +++ b/plugins/imdct/ac3_imdct_c.c @@ -0,0 +1,262 @@ +/***************************************************************************** + * ac3_imdct_c.c: ac3 DCT in C + ***************************************************************************** + * Copyright (C) 1999, 2000 VideoLAN + * $Id: ac3_imdct_c.c,v 1.1 2001/05/15 16:19:42 sam Exp $ + * + * Authors: Renaud Dartus + * Aaron Holtzman + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA. + *****************************************************************************/ + +#define MODULE_NAME imdct +#include "modules_inner.h" + +/***************************************************************************** + * Preamble + *****************************************************************************/ +#include "defs.h" + +#include /* memcpy() */ + +#include +#include + +#include "config.h" +#include "common.h" +#include "threads.h" +#include "mtime.h" + +#include "ac3_imdct.h" +#include "ac3_imdct_common.h" + +#ifndef M_PI +# define M_PI 3.14159265358979323846 +#endif + +void _M( fft_64p ) ( complex_t *x ); +void _M( fft_128p ) ( complex_t *x ); + +static float window[] = { + 0.00014, 0.00024, 0.00037, 0.00051, 0.00067, 0.00086, 0.00107, 0.00130, + 0.00157, 0.00187, 0.00220, 0.00256, 0.00297, 0.00341, 0.00390, 0.00443, + 0.00501, 0.00564, 0.00632, 0.00706, 0.00785, 0.00871, 0.00962, 0.01061, + 0.01166, 0.01279, 0.01399, 0.01526, 0.01662, 0.01806, 0.01959, 0.02121, + 0.02292, 0.02472, 0.02662, 0.02863, 0.03073, 0.03294, 0.03527, 0.03770, + 0.04025, 0.04292, 0.04571, 0.04862, 0.05165, 0.05481, 0.05810, 0.06153, + 0.06508, 0.06878, 0.07261, 0.07658, 0.08069, 0.08495, 0.08935, 0.09389, + 0.09859, 0.10343, 0.10842, 0.11356, 0.11885, 0.12429, 0.12988, 0.13563, + 0.14152, 0.14757, 0.15376, 0.16011, 0.16661, 0.17325, 0.18005, 0.18699, + 0.19407, 0.20130, 0.20867, 0.21618, 0.22382, 0.23161, 0.23952, 0.24757, + 0.25574, 0.26404, 0.27246, 0.28100, 0.28965, 0.29841, 0.30729, 0.31626, + 0.32533, 0.33450, 0.34376, 0.35311, 0.36253, 0.37204, 0.38161, 0.39126, + 0.40096, 0.41072, 0.42054, 0.43040, 0.44030, 0.45023, 0.46020, 0.47019, + 0.48020, 0.49022, 0.50025, 0.51028, 0.52031, 0.53033, 0.54033, 0.55031, + 0.56026, 0.57019, 0.58007, 0.58991, 0.59970, 0.60944, 0.61912, 0.62873, + 0.63827, 0.64774, 0.65713, 0.66643, 0.67564, 0.68476, 0.69377, 0.70269, + 0.71150, 0.72019, 0.72877, 0.73723, 0.74557, 0.75378, 0.76186, 0.76981, + 0.77762, 0.78530, 0.79283, 0.80022, 0.80747, 0.81457, 0.82151, 0.82831, + 0.83496, 0.84145, 0.84779, 0.85398, 0.86001, 0.86588, 0.87160, 0.87716, + 0.88257, 0.88782, 0.89291, 0.89785, 0.90264, 0.90728, 0.91176, 0.91610, + 0.92028, 0.92432, 0.92822, 0.93197, 0.93558, 0.93906, 0.94240, 0.94560, + 0.94867, 0.95162, 0.95444, 0.95713, 0.95971, 0.96217, 0.96451, 0.96674, + 0.96887, 0.97089, 0.97281, 0.97463, 0.97635, 0.97799, 0.97953, 0.98099, + 0.98236, 0.98366, 0.98488, 0.98602, 0.98710, 0.98811, 0.98905, 0.98994, + 0.99076, 0.99153, 0.99225, 0.99291, 0.99353, 0.99411, 0.99464, 0.99513, + 0.99558, 0.99600, 0.99639, 0.99674, 0.99706, 0.99736, 0.99763, 0.99788, + 0.99811, 0.99831, 0.99850, 0.99867, 0.99882, 0.99895, 0.99908, 0.99919, + 0.99929, 0.99938, 0.99946, 0.99953, 0.99959, 0.99965, 0.99969, 0.99974, + 0.99978, 0.99981, 0.99984, 0.99986, 0.99988, 0.99990, 0.99992, 0.99993, + 0.99994, 0.99995, 0.99996, 0.99997, 0.99998, 0.99998, 0.99998, 0.99999, + 0.99999, 0.99999, 0.99999, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000, + 1.00000, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000 +}; + +static const int pm128[128] = +{ + 0, 16, 32, 48, 64, 80, 96, 112, 8, 40, 72, 104, 24, 56, 88, 120, + 4, 20, 36, 52, 68, 84, 100, 116, 12, 28, 44, 60, 76, 92, 108, 124, + 2, 18, 34, 50, 66, 82, 98, 114, 10, 42, 74, 106, 26, 58, 90, 122, + 6, 22, 38, 54, 70, 86, 102, 118, 14, 46, 78, 110, 30, 62, 94, 126, + 1, 17, 33, 49, 65, 81, 97, 113, 9, 41, 73, 105, 25, 57, 89, 121, + 5, 21, 37, 53, 69, 85, 101, 117, 13, 29, 45, 61, 77, 93, 109, 125, + 3, 19, 35, 51, 67, 83, 99, 115, 11, 43, 75, 107, 27, 59, 91, 123, + 7, 23, 39, 55, 71, 87, 103, 119, 15, 31, 47, 63, 79, 95, 111, 127 +}; + +static const int pm64[64] = +{ + 0, 8, 16, 24, 32, 40, 48, 56, + 4, 20, 36, 52, 12, 28, 44, 60, + 2, 10, 18, 26, 34, 42, 50, 58, + 6, 14, 22, 30, 38, 46, 54, 62, + 1, 9, 17, 25, 33, 41, 49, 57, + 5, 21, 37, 53, 13, 29, 45, 61, + 3, 11, 19, 27, 35, 43, 51, 59, + 7, 23, 39, 55, 15, 31, 47, 63 +}; + +void _M( imdct_init ) (imdct_t * p_imdct) +{ + int i; + float scale = 181.019; + + /* Twiddle factors to turn IFFT into IMDCT */ + for (i=0; i < 128; i++) { + p_imdct->xcos1[i] = cos(2.0f * M_PI * (8*i+1)/(8*N)) * scale; + p_imdct->xsin1[i] = sin(2.0f * M_PI * (8*i+1)/(8*N)) * scale; + } +} + +void _M( imdct_do_512 ) (imdct_t * p_imdct, float data[], float delay[]) +{ + int i, j; + float tmp_a_r, tmp_a_i; + float *data_ptr; + float *delay_ptr; + float *window_ptr; + + /* 512 IMDCT with source and dest data in 'data' + * Pre IFFT complex multiply plus IFFT complex conjugate */ + + for( i=0; i < 128; i++) { + j = pm128[i]; + /* a = (data[256-2*j-1] - data[2*j]) * (xcos1[j] + xsin1[j]); + * c = data[2*j] * xcos1[j]; + * b = data[256-2*j-1] * xsin1[j]; + * buf1[i].real = a - b + c; + * buf1[i].imag = b + c; */ + p_imdct->buf[i].real = (data[256-2*j-1] * p_imdct->xcos1[j]) - (data[2*j] * p_imdct->xsin1[j]); + p_imdct->buf[i].imag = -1.0 * (data[2*j] * p_imdct->xcos1[j] + data[256-2*j-1] * p_imdct->xsin1[j]); + } + + _M( fft_128p ) ( &p_imdct->buf[0] ); + + /* Post IFFT complex multiply plus IFFT complex conjugate */ + for (i=0; i < 128; i++) { + tmp_a_r = p_imdct->buf[i].real; + tmp_a_i = p_imdct->buf[i].imag; + /* a = (tmp_a_r - tmp_a_i) * (xcos1[j] + xsin1[j]); + * b = tmp_a_r * xsin1[j]; + * c = tmp_a_i * xcos1[j]; + * buf[j].real = a - b + c; + * buf[j].imag = b + c; */ + p_imdct->buf[i].real =(tmp_a_r * p_imdct->xcos1[i]) + (tmp_a_i * p_imdct->xsin1[i]); + p_imdct->buf[i].imag =(tmp_a_r * p_imdct->xsin1[i]) - (tmp_a_i * p_imdct->xcos1[i]); + } + + data_ptr = data; + delay_ptr = delay; + window_ptr = window; + + /* Window and convert to real valued signal */ + for (i=0; i< 64; i++) { + *data_ptr++ = -p_imdct->buf[64+i].imag * *window_ptr++ + *delay_ptr++; + *data_ptr++ = p_imdct->buf[64-i-1].real * *window_ptr++ + *delay_ptr++; + } + + for(i=0; i< 64; i++) { + *data_ptr++ = -p_imdct->buf[i].real * *window_ptr++ + *delay_ptr++; + *data_ptr++ = p_imdct->buf[128-i-1].imag * *window_ptr++ + *delay_ptr++; + } + + /* The trailing edge of the window goes into the delay line */ + delay_ptr = delay; + + for(i=0; i< 64; i++) { + *delay_ptr++ = -p_imdct->buf[64+i].real * *--window_ptr; + *delay_ptr++ = p_imdct->buf[64-i-1].imag * *--window_ptr; + } + + for(i=0; i<64; i++) { + *delay_ptr++ = p_imdct->buf[i].imag * *--window_ptr; + *delay_ptr++ = -p_imdct->buf[128-i-1].real * *--window_ptr; + } +} + + +void _M( imdct_do_512_nol ) (imdct_t * p_imdct, float data[], float delay[]) +{ + int i, j; + + float tmp_a_i; + float tmp_a_r; + + float *data_ptr; + float *delay_ptr; + float *window_ptr; + + /* 512 IMDCT with source and dest data in 'data' + * Pre IFFT complex multiply plus IFFT cmplx conjugate */ + + for( i=0; i < 128; i++) { + /* z[i] = (X[256-2*i-1] + j * X[2*i]) * (xcos1[i] + j * xsin1[i]) */ + j = pm128[i]; + /* a = (data[256-2*j-1] - data[2*j]) * (xcos1[j] + xsin1[j]); + * c = data[2*j] * xcos1[j]; + * b = data[256-2*j-1] * xsin1[j]; + * buf1[i].real = a - b + c; + * buf1[i].imag = b + c; */ + p_imdct->buf[i].real = (data[256-2*j-1] * p_imdct->xcos1[j]) - (data[2*j] * p_imdct->xsin1[j]); + p_imdct->buf[i].imag = -1.0 * (data[2*j] * p_imdct->xcos1[j] + data[256-2*j-1] * p_imdct->xsin1[j]); + } + + _M( fft_128p ) ( &p_imdct->buf[0] ); + + /* Post IFFT complex multiply plus IFFT complex conjugate*/ + for (i=0; i < 128; i++) { + /* y[n] = z[n] * (xcos1[n] + j * xsin1[n]) ; + * int j1 = i; */ + tmp_a_r = p_imdct->buf[i].real; + tmp_a_i = p_imdct->buf[i].imag; + /* a = (tmp_a_r - tmp_a_i) * (xcos1[j] + xsin1[j]); + * b = tmp_a_r * xsin1[j]; + * c = tmp_a_i * xcos1[j]; + * buf[j].real = a - b + c; + * buf[j].imag = b + c; */ + p_imdct->buf[i].real =(tmp_a_r * p_imdct->xcos1[i]) + (tmp_a_i * p_imdct->xsin1[i]); + p_imdct->buf[i].imag =(tmp_a_r * p_imdct->xsin1[i]) - (tmp_a_i * p_imdct->xcos1[i]); + } + + data_ptr = data; + delay_ptr = delay; + window_ptr = window; + + /* Window and convert to real valued signal, no overlap here*/ + for (i=0; i< 64; i++) { + *data_ptr++ = -p_imdct->buf[64+i].imag * *window_ptr++; + *data_ptr++ = p_imdct->buf[64-i-1].real * *window_ptr++; + } + + for(i=0; i< 64; i++) { + *data_ptr++ = -p_imdct->buf[i].real * *window_ptr++; + *data_ptr++ = p_imdct->buf[128-i-1].imag * *window_ptr++; + } + + /* The trailing edge of the window goes into the delay line */ + delay_ptr = delay; + + for(i=0; i< 64; i++) { + *delay_ptr++ = -p_imdct->buf[64+i].real * *--window_ptr; + *delay_ptr++ = p_imdct->buf[64-i-1].imag * *--window_ptr; + } + + for(i=0; i<64; i++) { + *delay_ptr++ = p_imdct->buf[i].imag * *--window_ptr; + *delay_ptr++ = -p_imdct->buf[128-i-1].real * *--window_ptr; + } +} + diff --git a/plugins/imdct/ac3_imdct_common.c b/plugins/imdct/ac3_imdct_common.c new file mode 100644 index 0000000000..15ff1d9c49 --- /dev/null +++ b/plugins/imdct/ac3_imdct_common.c @@ -0,0 +1,267 @@ +/***************************************************************************** + * ac3_imdct_common.c: common ac3 DCT functions + ***************************************************************************** + * Copyright (C) 1999, 2000 VideoLAN + * $Id: ac3_imdct_common.c,v 1.1 2001/05/15 16:19:42 sam Exp $ + * + * Authors: Renaud Dartus + * Aaron Holtzman + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA. + *****************************************************************************/ + +/* MODULE_NAME defined in Makefile together with -DBUILTIN */ +#ifdef BUILTIN +# include "modules_inner.h" +#else +# define _M( foo ) foo +#endif + +/***************************************************************************** + * Preamble + *****************************************************************************/ +#include "defs.h" + +#include /* memcpy() */ + +#include +#include + +#include "config.h" +#include "common.h" +#include "threads.h" +#include "mtime.h" + +#include "ac3_imdct.h" + +#ifndef M_PI +# define M_PI 3.14159265358979323846 +#endif + +static float window[] = { + 0.00014, 0.00024, 0.00037, 0.00051, 0.00067, 0.00086, 0.00107, 0.00130, + 0.00157, 0.00187, 0.00220, 0.00256, 0.00297, 0.00341, 0.00390, 0.00443, + 0.00501, 0.00564, 0.00632, 0.00706, 0.00785, 0.00871, 0.00962, 0.01061, + 0.01166, 0.01279, 0.01399, 0.01526, 0.01662, 0.01806, 0.01959, 0.02121, + 0.02292, 0.02472, 0.02662, 0.02863, 0.03073, 0.03294, 0.03527, 0.03770, + 0.04025, 0.04292, 0.04571, 0.04862, 0.05165, 0.05481, 0.05810, 0.06153, + 0.06508, 0.06878, 0.07261, 0.07658, 0.08069, 0.08495, 0.08935, 0.09389, + 0.09859, 0.10343, 0.10842, 0.11356, 0.11885, 0.12429, 0.12988, 0.13563, + 0.14152, 0.14757, 0.15376, 0.16011, 0.16661, 0.17325, 0.18005, 0.18699, + 0.19407, 0.20130, 0.20867, 0.21618, 0.22382, 0.23161, 0.23952, 0.24757, + 0.25574, 0.26404, 0.27246, 0.28100, 0.28965, 0.29841, 0.30729, 0.31626, + 0.32533, 0.33450, 0.34376, 0.35311, 0.36253, 0.37204, 0.38161, 0.39126, + 0.40096, 0.41072, 0.42054, 0.43040, 0.44030, 0.45023, 0.46020, 0.47019, + 0.48020, 0.49022, 0.50025, 0.51028, 0.52031, 0.53033, 0.54033, 0.55031, + 0.56026, 0.57019, 0.58007, 0.58991, 0.59970, 0.60944, 0.61912, 0.62873, + 0.63827, 0.64774, 0.65713, 0.66643, 0.67564, 0.68476, 0.69377, 0.70269, + 0.71150, 0.72019, 0.72877, 0.73723, 0.74557, 0.75378, 0.76186, 0.76981, + 0.77762, 0.78530, 0.79283, 0.80022, 0.80747, 0.81457, 0.82151, 0.82831, + 0.83496, 0.84145, 0.84779, 0.85398, 0.86001, 0.86588, 0.87160, 0.87716, + 0.88257, 0.88782, 0.89291, 0.89785, 0.90264, 0.90728, 0.91176, 0.91610, + 0.92028, 0.92432, 0.92822, 0.93197, 0.93558, 0.93906, 0.94240, 0.94560, + 0.94867, 0.95162, 0.95444, 0.95713, 0.95971, 0.96217, 0.96451, 0.96674, + 0.96887, 0.97089, 0.97281, 0.97463, 0.97635, 0.97799, 0.97953, 0.98099, + 0.98236, 0.98366, 0.98488, 0.98602, 0.98710, 0.98811, 0.98905, 0.98994, + 0.99076, 0.99153, 0.99225, 0.99291, 0.99353, 0.99411, 0.99464, 0.99513, + 0.99558, 0.99600, 0.99639, 0.99674, 0.99706, 0.99736, 0.99763, 0.99788, + 0.99811, 0.99831, 0.99850, 0.99867, 0.99882, 0.99895, 0.99908, 0.99919, + 0.99929, 0.99938, 0.99946, 0.99953, 0.99959, 0.99965, 0.99969, 0.99974, + 0.99978, 0.99981, 0.99984, 0.99986, 0.99988, 0.99990, 0.99992, 0.99993, + 0.99994, 0.99995, 0.99996, 0.99997, 0.99998, 0.99998, 0.99998, 0.99999, + 0.99999, 0.99999, 0.99999, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000, + 1.00000, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000 +}; + +static const int pm128[128] = +{ + 0, 16, 32, 48, 64, 80, 96, 112, 8, 40, 72, 104, 24, 56, 88, 120, + 4, 20, 36, 52, 68, 84, 100, 116, 12, 28, 44, 60, 76, 92, 108, 124, + 2, 18, 34, 50, 66, 82, 98, 114, 10, 42, 74, 106, 26, 58, 90, 122, + 6, 22, 38, 54, 70, 86, 102, 118, 14, 46, 78, 110, 30, 62, 94, 126, + 1, 17, 33, 49, 65, 81, 97, 113, 9, 41, 73, 105, 25, 57, 89, 121, + 5, 21, 37, 53, 69, 85, 101, 117, 13, 29, 45, 61, 77, 93, 109, 125, + 3, 19, 35, 51, 67, 83, 99, 115, 11, 43, 75, 107, 27, 59, 91, 123, + 7, 23, 39, 55, 71, 87, 103, 119, 15, 31, 47, 63, 79, 95, 111, 127 +}; + +static const int pm64[64] = +{ + 0, 8, 16, 24, 32, 40, 48, 56, + 4, 20, 36, 52, 12, 28, 44, 60, + 2, 10, 18, 26, 34, 42, 50, 58, + 6, 14, 22, 30, 38, 46, 54, 62, + 1, 9, 17, 25, 33, 41, 49, 57, + 5, 21, 37, 53, 13, 29, 45, 61, + 3, 11, 19, 27, 35, 43, 51, 59, + 7, 23, 39, 55, 15, 31, 47, 63 +}; + +void _M( imdct_do_256 ) (imdct_t * p_imdct, float data[],float delay[]) +{ + int i, j, k; + int p, q; + + float tmp_a_i; + float tmp_a_r; + + float *data_ptr; + float *delay_ptr; + float *window_ptr; + + complex_t *buf1, *buf2; + + buf1 = &p_imdct->buf[0]; + buf2 = &p_imdct->buf[64]; + + /* Pre IFFT complex multiply plus IFFT complex conjugate */ + for (k=0; k<64; k++) { + /* X1[k] = X[2*k] + * X2[k] = X[2*k+1] */ + + j = pm64[k]; + p = 2 * (128-2*j-1); + q = 2 * (2 * j); + + /* Z1[k] = (X1[128-2*k-1] + j * X1[2*k]) * (xcos2[k] + j * xsin2[k]); */ + buf1[k].real = data[p] * p_imdct->xcos2[j] - data[q] * p_imdct->xsin2[j]; + buf1[k].imag = -1.0f*(data[q] * p_imdct->xcos2[j] + data[p] * p_imdct->xsin2[j]); + /* Z2[k] = (X2[128-2*k-1] + j * X2[2*k]) * (xcos2[k] + j * xsin2[k]); */ + buf2[k].real = data[p + 1] * p_imdct->xcos2[j] - data[q + 1] * p_imdct->xsin2[j]; + buf2[k].imag = -1.0f*(data[q + 1] * p_imdct->xcos2[j] + data[p + 1] * p_imdct->xsin2[j]); + } + + _M( fft_64p ) ( &buf1[0] ); + _M( fft_64p ) ( &buf2[0] ); + + /* Post IFFT complex multiply */ + for( i=0; i < 64; i++) { + tmp_a_r = buf1[i].real; + tmp_a_i = -buf1[i].imag; + buf1[i].real = (tmp_a_r * p_imdct->xcos2[i]) - (tmp_a_i * p_imdct->xsin2[i]); + buf1[i].imag = (tmp_a_r * p_imdct->xsin2[i]) + (tmp_a_i * p_imdct->xcos2[i]); + tmp_a_r = buf2[i].real; + tmp_a_i = -buf2[i].imag; + buf2[i].real = (tmp_a_r * p_imdct->xcos2[i]) - (tmp_a_i * p_imdct->xsin2[i]); + buf2[i].imag = (tmp_a_r * p_imdct->xsin2[i]) + (tmp_a_i * p_imdct->xcos2[i]); + } + + data_ptr = data; + delay_ptr = delay; + window_ptr = window; + + /* Window and convert to real valued signal */ + for(i=0; i< 64; i++) { + *data_ptr++ = -buf1[i].imag * *window_ptr++ + *delay_ptr++; + *data_ptr++ = buf1[64-i-1].real * *window_ptr++ + *delay_ptr++; + } + + for(i=0; i< 64; i++) { + *data_ptr++ = -buf1[i].real * *window_ptr++ + *delay_ptr++; + *data_ptr++ = buf1[64-i-1].imag * *window_ptr++ + *delay_ptr++; + } + + delay_ptr = delay; + + for(i=0; i< 64; i++) { + *delay_ptr++ = -buf2[i].real * *--window_ptr; + *delay_ptr++ = buf2[64-i-1].imag * *--window_ptr; + } + + for(i=0; i< 64; i++) { + *delay_ptr++ = buf2[i].imag * *--window_ptr; + *delay_ptr++ = -buf2[64-i-1].real * *--window_ptr; + } +} + + +void _M( imdct_do_256_nol ) (imdct_t * p_imdct, float data[], float delay[]) +{ + int i, j, k; + int p, q; + + float tmp_a_i; + float tmp_a_r; + + float *data_ptr; + float *delay_ptr; + float *window_ptr; + + complex_t *buf1, *buf2; + + buf1 = &p_imdct->buf[0]; + buf2 = &p_imdct->buf[64]; + + /* Pre IFFT complex multiply plus IFFT cmplx conjugate */ + for(k=0; k<64; k++) { + /* X1[k] = X[2*k] + * X2[k] = X[2*k+1] */ + j = pm64[k]; + p = 2 * (128-2*j-1); + q = 2 * (2 * j); + + /* Z1[k] = (X1[128-2*k-1] + j * X1[2*k]) * (xcos2[k] + j * xsin2[k]); */ + buf1[k].real = data[p] * p_imdct->xcos2[j] - data[q] * p_imdct->xsin2[j]; + buf1[k].imag = -1.0f*(data[q] * p_imdct->xcos2[j] + data[p] * p_imdct->xsin2[j]); + /* Z2[k] = (X2[128-2*k-1] + j * X2[2*k]) * (xcos2[k] + j * xsin2[k]); */ + buf2[k].real = data[p + 1] * p_imdct->xcos2[j] - data[q + 1] * p_imdct->xsin2[j]; + buf2[k].imag = -1.0f*(data[q + 1] * p_imdct->xcos2[j] + data[p + 1] * p_imdct->xsin2[j]); + } + + _M( fft_64p ) ( &buf1[0] ); + _M( fft_64p ) ( &buf2[0] ); + + /* Post IFFT complex multiply */ + for( i=0; i < 64; i++) { + /* y1[n] = z1[n] * (xcos2[n] + j * xs in2[n]) ; */ + tmp_a_r = buf1[i].real; + tmp_a_i = -buf1[i].imag; + buf1[i].real =(tmp_a_r * p_imdct->xcos2[i]) - (tmp_a_i * p_imdct->xsin2[i]); + buf1[i].imag =(tmp_a_r * p_imdct->xsin2[i]) + (tmp_a_i * p_imdct->xcos2[i]); + /* y2[n] = z2[n] * (xcos2[n] + j * xsin2[n]) ; */ + tmp_a_r = buf2[i].real; + tmp_a_i = -buf2[i].imag; + buf2[i].real =(tmp_a_r * p_imdct->xcos2[i]) - (tmp_a_i * p_imdct->xsin2[i]); + buf2[i].imag =(tmp_a_r * p_imdct->xsin2[i]) + (tmp_a_i * p_imdct->xcos2[i]); + } + + data_ptr = data; + delay_ptr = delay; + window_ptr = window; + + /* Window and convert to real valued signal, no overlap */ + for(i=0; i< 64; i++) { + *data_ptr++ = -buf1[i].imag * *window_ptr++; + *data_ptr++ = buf1[64-i-1].real * *window_ptr++; + } + + for(i=0; i< 64; i++) { + *data_ptr++ = -buf1[i].real * *window_ptr++ + *delay_ptr++; + *data_ptr++ = buf1[64-i-1].imag * *window_ptr++ + *delay_ptr++; + } + + delay_ptr = delay; + + for(i=0; i< 64; i++) { + *delay_ptr++ = -buf2[i].real * *--window_ptr; + *delay_ptr++ = buf2[64-i-1].imag * *--window_ptr; + } + + for(i=0; i< 64; i++) { + *delay_ptr++ = buf2[i].imag * *--window_ptr; + *delay_ptr++ = -buf2[64-i-1].real * *--window_ptr; + } +} + diff --git a/src/ac3_decoder/ac3_imdct_c.h b/plugins/imdct/ac3_imdct_common.h similarity index 69% rename from src/ac3_decoder/ac3_imdct_c.h rename to plugins/imdct/ac3_imdct_common.h index 5863dc9d88..ce0a7ab6d8 100644 --- a/src/ac3_decoder/ac3_imdct_c.h +++ b/plugins/imdct/ac3_imdct_common.h @@ -1,8 +1,8 @@ /***************************************************************************** - * ac3_imdct_c.h: ac3 DCT + * ac3_imdct_common.h: common ac3 DCT headers ***************************************************************************** * Copyright (C) 1999, 2000 VideoLAN - * $Id: ac3_imdct_c.h,v 1.2 2001/04/30 21:10:25 reno Exp $ + * $Id: ac3_imdct_common.h,v 1.1 2001/05/15 16:19:42 sam Exp $ * * Authors: Renaud Dartus * Aaron Holtzman @@ -22,9 +22,9 @@ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA. *****************************************************************************/ -int imdct_init_c (imdct_t * p_imdct); -void imdct_do_256(imdct_t * p_imdct, float data[], float delay[]); -void imdct_do_256_nol(imdct_t * p_imdct, float data[], float delay[]); -void imdct_do_512_c(imdct_t * p_imdct, float data[], float delay[]); -void imdct_do_512_nol_c(imdct_t * p_imdct, float data[], float delay[]); +void _M( imdct_init ) ( imdct_t * p_imdct ); +void _M( imdct_do_256 ) ( imdct_t * p_imdct, float data[], float delay[] ); +void _M( imdct_do_256_nol ) ( imdct_t * p_imdct, float data[], float delay[] ); +void _M( imdct_do_512 ) ( imdct_t * p_imdct, float data[], float delay[] ); +void _M( imdct_do_512_nol ) ( imdct_t * p_imdct, float data[], float delay[] ); diff --git a/plugins/imdct/ac3_imdct_sse.c b/plugins/imdct/ac3_imdct_sse.c new file mode 100644 index 0000000000..d426f55a66 --- /dev/null +++ b/plugins/imdct/ac3_imdct_sse.c @@ -0,0 +1,637 @@ +/***************************************************************************** + * ac3_imdct_sse.c: accelerated SSE ac3 DCT + ***************************************************************************** + * Copyright (C) 1999, 2000 VideoLAN + * $Id: ac3_imdct_sse.c,v 1.1 2001/05/15 16:19:42 sam Exp $ + * + * Authors: Renaud Dartus + * Aaron Holtzman + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA. + *****************************************************************************/ + +#define MODULE_NAME imdctsse +#include "modules_inner.h" + +/***************************************************************************** + * Preamble + *****************************************************************************/ +#include "defs.h" + +#include +#include + +#include "config.h" +#include "common.h" +#include "threads.h" +#include "mtime.h" + +#include "ac3_imdct.h" +#include "ac3_imdct_common.h" + +static const float window[] = { + 0.00014, 0.00024, 0.00037, 0.00051, 0.00067, 0.00086, 0.00107, 0.00130, + 0.00157, 0.00187, 0.00220, 0.00256, 0.00297, 0.00341, 0.00390, 0.00443, + 0.00501, 0.00564, 0.00632, 0.00706, 0.00785, 0.00871, 0.00962, 0.01061, + 0.01166, 0.01279, 0.01399, 0.01526, 0.01662, 0.01806, 0.01959, 0.02121, + 0.02292, 0.02472, 0.02662, 0.02863, 0.03073, 0.03294, 0.03527, 0.03770, + 0.04025, 0.04292, 0.04571, 0.04862, 0.05165, 0.05481, 0.05810, 0.06153, + 0.06508, 0.06878, 0.07261, 0.07658, 0.08069, 0.08495, 0.08935, 0.09389, + 0.09859, 0.10343, 0.10842, 0.11356, 0.11885, 0.12429, 0.12988, 0.13563, + 0.14152, 0.14757, 0.15376, 0.16011, 0.16661, 0.17325, 0.18005, 0.18699, + 0.19407, 0.20130, 0.20867, 0.21618, 0.22382, 0.23161, 0.23952, 0.24757, + 0.25574, 0.26404, 0.27246, 0.28100, 0.28965, 0.29841, 0.30729, 0.31626, + 0.32533, 0.33450, 0.34376, 0.35311, 0.36253, 0.37204, 0.38161, 0.39126, + 0.40096, 0.41072, 0.42054, 0.43040, 0.44030, 0.45023, 0.46020, 0.47019, + 0.48020, 0.49022, 0.50025, 0.51028, 0.52031, 0.53033, 0.54033, 0.55031, + 0.56026, 0.57019, 0.58007, 0.58991, 0.59970, 0.60944, 0.61912, 0.62873, + 0.63827, 0.64774, 0.65713, 0.66643, 0.67564, 0.68476, 0.69377, 0.70269, + 0.71150, 0.72019, 0.72877, 0.73723, 0.74557, 0.75378, 0.76186, 0.76981, + 0.77762, 0.78530, 0.79283, 0.80022, 0.80747, 0.81457, 0.82151, 0.82831, + 0.83496, 0.84145, 0.84779, 0.85398, 0.86001, 0.86588, 0.87160, 0.87716, + 0.88257, 0.88782, 0.89291, 0.89785, 0.90264, 0.90728, 0.91176, 0.91610, + 0.92028, 0.92432, 0.92822, 0.93197, 0.93558, 0.93906, 0.94240, 0.94560, + 0.94867, 0.95162, 0.95444, 0.95713, 0.95971, 0.96217, 0.96451, 0.96674, + 0.96887, 0.97089, 0.97281, 0.97463, 0.97635, 0.97799, 0.97953, 0.98099, + 0.98236, 0.98366, 0.98488, 0.98602, 0.98710, 0.98811, 0.98905, 0.98994, + 0.99076, 0.99153, 0.99225, 0.99291, 0.99353, 0.99411, 0.99464, 0.99513, + 0.99558, 0.99600, 0.99639, 0.99674, 0.99706, 0.99736, 0.99763, 0.99788, + 0.99811, 0.99831, 0.99850, 0.99867, 0.99882, 0.99895, 0.99908, 0.99919, + 0.99929, 0.99938, 0.99946, 0.99953, 0.99959, 0.99965, 0.99969, 0.99974, + 0.99978, 0.99981, 0.99984, 0.99986, 0.99988, 0.99990, 0.99992, 0.99993, + 0.99994, 0.99995, 0.99996, 0.99997, 0.99998, 0.99998, 0.99998, 0.99999, + 0.99999, 0.99999, 0.99999, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000, + 1.00000, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000 +}; + +static const int pm128[128] = +{ + 0, 16, 32, 48, 64, 80, 96, 112, 8, 40, 72, 104, 24, 56, 88, 120, + 4, 20, 36, 52, 68, 84, 100, 116, 12, 28, 44, 60, 76, 92, 108, 124, + 2, 18, 34, 50, 66, 82, 98, 114, 10, 42, 74, 106, 26, 58, 90, 122, + 6, 22, 38, 54, 70, 86, 102, 118, 14, 46, 78, 110, 30, 62, 94, 126, + 1, 17, 33, 49, 65, 81, 97, 113, 9, 41, 73, 105, 25, 57, 89, 121, + 5, 21, 37, 53, 69, 85, 101, 117, 13, 29, 45, 61, 77, 93, 109, 125, + 3, 19, 35, 51, 67, 83, 99, 115, 11, 43, 75, 107, 27, 59, 91, 123, + 7, 23, 39, 55, 71, 87, 103, 119, 15, 31, 47, 63, 79, 95, 111, 127 +}; + +void _M( fft_64p ) ( complex_t *x ); +void _M( fft_128p ) ( complex_t *a ); + +static void imdct512_pre_ifft_twiddle_sse (const int *pmt, complex_t *buf, float *data, float *xcos_sin_sse); +static void imdct512_post_ifft_twiddle_sse (complex_t *buf, float *xcos_sin_sse); +static void imdct512_window_delay_sse (complex_t *buf, float *data_ptr, const float *window_prt, float *delay_prt); +static void imdct512_window_delay_nol_sse (complex_t *buf, float *data_ptr, const float *window_prt, float *delay_prt); + + +void _M( imdct_init ) (imdct_t * p_imdct) +{ + int i; + float scale = 181.019; + + for (i=0; i < 128; i++) + { + float xcos_i = cos(2.0f * M_PI * (8*i+1)/(8*N)) * scale; + float xsin_i = sin(2.0f * M_PI * (8*i+1)/(8*N)) * scale; + p_imdct->xcos_sin_sse[i * 4] = xcos_i; + p_imdct->xcos_sin_sse[i * 4 + 1] = -xsin_i; + p_imdct->xcos_sin_sse[i * 4 + 2] = -xsin_i; + p_imdct->xcos_sin_sse[i * 4 + 3] = -xcos_i; + } +} + +void _M( imdct_do_512 ) (imdct_t * p_imdct, float data[], float delay[]) +{ + imdct512_pre_ifft_twiddle_sse (pm128, p_imdct->buf, data, p_imdct->xcos_sin_sse); + _M( fft_128p ) ( p_imdct->buf ); + imdct512_post_ifft_twiddle_sse (p_imdct->buf, p_imdct->xcos_sin_sse); + imdct512_window_delay_sse (p_imdct->buf, data, window, delay); +} + + +void _M( imdct_do_512_nol ) (imdct_t * p_imdct, float data[], float delay[]) +{ + imdct512_pre_ifft_twiddle_sse (pm128, p_imdct->buf, data, p_imdct->xcos_sin_sse); + _M( fft_128p ) ( p_imdct->buf ); + imdct512_post_ifft_twiddle_sse (p_imdct->buf, p_imdct->xcos_sin_sse); + imdct512_window_delay_nol_sse (p_imdct->buf, data, window, delay); +} + +static void imdct512_pre_ifft_twiddle_sse (const int *pmt, complex_t *buf, float *data, float *xcos_sin_sse) +{ + __asm__ __volatile__ ( + "pushl %%ebp\n" + "movl %%esp, %%ebp\n" + "addl $-4, %%esp\n" /* local variable, loop counter */ + + "pushl %%eax\n" + "pushl %%ebx\n" + "pushl %%ecx\n" + "pushl %%edx\n" + "pushl %%edi\n" + "pushl %%esi\n" + + "movl 8(%%ebp), %%eax\n" /* pmt */ + "movl 12(%%ebp), %%ebx\n" /* buf */ + "movl 16(%%ebp), %%ecx\n" /* data */ + "movl 20(%%ebp), %%edx\n" /* xcos_sin_sse */ + "movl $64, -4(%%ebp)\n" + +".loop:\n" + "movl (%%eax), %%esi\n" + "movl 4(%%eax), %%edi\n" + "movss (%%ecx, %%esi, 8), %%xmm1\n" /* 2j */ + "movss (%%ecx, %%edi, 8), %%xmm3\n" /* 2(j+1) */ + + "shll $1, %%esi\n" + "shll $1, %%edi\n" + + "movups (%%edx, %%esi, 8), %%xmm0\n" /* -c_j | -s_j | -s_j | c_j */ + "movups (%%edx, %%edi, 8), %%xmm2\n" /* -c_j+1 | -s_j+1 | -s_j+1 | c_j+1 */ + + "negl %%esi\n" + "negl %%edi\n" + + "movss 1020(%%ecx, %%esi, 4), %%xmm4\n" /* 255-2j */ + "addl $8, %%eax\n" + "movss 1020(%%ecx, %%edi, 4), %%xmm5\n" /* 255-2(j+1) */ + + "shufps $0, %%xmm1, %%xmm4\n" /* 2j | 2j | 255-2j | 255-2j */ + "shufps $0, %%xmm3, %%xmm5\n" /* 2(j+1) | 2(j+1) | 255-2(j+1) | 255-2(j+1) */ + "mulps %%xmm4, %%xmm0\n" + "mulps %%xmm5, %%xmm2\n" + "movhlps %%xmm0, %%xmm1\n" + "movhlps %%xmm2, %%xmm3\n" + "addl $16, %%ebx\n" + "addps %%xmm1, %%xmm0\n" + "addps %%xmm3, %%xmm2\n" + "movlhps %%xmm2, %%xmm0\n" + + "movups %%xmm0, -16(%%ebx)\n" + "decl -4(%%ebp)\n" + "jnz .loop\n" + + "popl %%esi\n" + "popl %%edi\n" + "popl %%edx\n" + "popl %%ecx\n" + "popl %%ebx\n" + "popl %%eax\n" + + "addl $4, %%esp\n" + "popl %%ebp\n" + ::); +} + +static void imdct512_post_ifft_twiddle_sse (complex_t *buf, float *xcos_sin_sse) +{ + __asm__ __volatile__ ( + "pushl %%ebx\n" + "movl $32, %%ebx\n" /* loop counter */ + +".loop1:\n" + "movups (%%eax), %%xmm0\n" /* im1 | re1 | im0 | re0 */ + + "movups (%%ecx), %%xmm2\n" /* -c | -s | -s | c */ + "movhlps %%xmm0, %%xmm1\n" /* im1 | re1 */ + "movups 16(%%ecx), %%xmm3\n" /* -c1 | -s1 | -s1 | c1 */ + + "shufps $0x50, %%xmm0, %%xmm0\n" /* im0 | im0 | re0 | re0 */ + "shufps $0x50, %%xmm1, %%xmm1\n" /* im1 | im1 | re1 | re1 */ + + "movups 16(%%eax), %%xmm4\n" /* im3 | re3 | im2 | re2 */ + + "shufps $0x27, %%xmm2, %%xmm2\n" /* c | -s | -s | -c */ + "movhlps %%xmm4, %%xmm5\n" /* im3 | re3 */ + "shufps $0x27, %%xmm3, %%xmm3\n" /* c1 | -s1 | -s1 | -c1 */ + + "movups 32(%%ecx), %%xmm6\n" /* -c2 | -s2 | -s2 | c2 */ + "movups 48(%%ecx), %%xmm7\n" /* -c3 | -s3 | -s3 | c3 */ + + "shufps $0x50, %%xmm4, %%xmm4\n" /* im2 | im2 | re2 | re2 */ + "shufps $0x50, %%xmm5, %%xmm5\n" /* im3 | im3 | re3 | re3 */ + + "mulps %%xmm2, %%xmm0\n" + "mulps %%xmm3, %%xmm1\n" + + "shufps $0x27, %%xmm6, %%xmm6\n" /* c2 | -s2 | -s2 | -c2 */ + "shufps $0x27, %%xmm7, %%xmm7\n" /* c3 | -s3 | -s3 | -c3 */ + + "movhlps %%xmm0, %%xmm2\n" + "movhlps %%xmm1, %%xmm3\n" + + "mulps %%xmm6, %%xmm4\n" + "mulps %%xmm7, %%xmm5\n" + + "addps %%xmm2, %%xmm0\n" + "addps %%xmm3, %%xmm1\n" + + "movhlps %%xmm4, %%xmm6\n" + "movhlps %%xmm5, %%xmm7\n" + + "addps %%xmm6, %%xmm4\n" + "addps %%xmm7, %%xmm5\n" + + "movlhps %%xmm1, %%xmm0\n" + "movlhps %%xmm5, %%xmm4\n" + + "movups %%xmm0, (%%eax)\n" + "movups %%xmm4, 16(%%eax)\n" + "addl $64, %%ecx\n" + "addl $32, %%eax\n" + "decl %%ebx\n" + "jnz .loop1\n" + + "popl %%ebx\n" + : "=a" (buf) + : "a" (buf), "c" (xcos_sin_sse) ); +} + +static void imdct512_window_delay_sse (complex_t *buf, float *data_ptr, const float *window_prt, float *delay_prt) +{ + __asm__ __volatile__ ( + "pushl %%ebp\n" + "movl %%esp, %%ebp\n" + + "pushl %%eax\n" + "pushl %%ebx\n" + "pushl %%ecx\n" + "pushl %%edx\n" + "pushl %%esi\n" + "pushl %%edi\n" + + "movl 20(%%ebp), %%ebx\n" /* delay */ + "movl 16(%%ebp), %%edx\n" /* window */ + + "movl 8(%%ebp), %%eax\n" /* buf */ + "movl $16, %%ecx\n" /* loop count */ + "leal 516(%%eax), %%esi\n" /* buf[64].im */ + "leal 504(%%eax), %%edi\n" /* buf[63].re */ + "movl 12(%%ebp), %%eax\n" /* data */ + +".first_128_samples:\n" + "movss (%%esi), %%xmm0\n" + "movss 8(%%esi), %%xmm2\n" + "movss (%%edi), %%xmm1\n" + "movss -8(%%edi), %%xmm3\n" + + "movlhps %%xmm2, %%xmm0\n" /* 0.0 | im1 | 0.0 | im0 */ + "movlhps %%xmm3, %%xmm1\n" /* 0.0 | re1 | 0.0 | re0 */ + + "movups (%%edx), %%xmm4\n" /* w3 | w2 | w1 | w0 */ + "movups (%%ebx), %%xmm5\n" /* d3 | d2 | d1 | d0 */ + "shufps $0xb1, %%xmm1, %%xmm1\n"/* re1 | 0.0 | re0 | 0.0 */ + + "movss 16(%%esi), %%xmm6\n" /* im2 */ + "movss 24(%%esi), %%xmm7\n" /* im3 */ + "subps %%xmm1, %%xmm0\n" /* -re1 | im1 | -re0 | im0 */ + "movss -16(%%edi), %%xmm2\n" /* re2 */ + "movss -24(%%edi), %%xmm3\n" /* re3 */ + "mulps %%xmm4, %%xmm0\n" + "movlhps %%xmm7, %%xmm6\n" /* 0.0 | im3 | 0.0 | im2 */ + "movlhps %%xmm3, %%xmm2\n" /* 0.0 | re3 | 0.0 | re2 */ + "addps %%xmm5, %%xmm0\n" + "shufps $0xb1, %%xmm2, %%xmm2\n"/* re3 | 0.0 | re2 | 0.0 */ + "movups 16(%%edx), %%xmm4\n" /* w7 | w6 | w5 | w4 */ + "movups 16(%%ebx), %%xmm5\n" /* d7 | d6 | d5 | d4 */ + "subps %%xmm2, %%xmm6\n" /* -re3 | im3 | -re2 | im2 */ + "addl $32, %%edx\n" + "movups %%xmm0, (%%eax)\n" + "addl $32, %%ebx\n" + "mulps %%xmm4, %%xmm6\n" + "addl $32, %%esi\n" + "addl $32, %%eax\n" + "addps %%xmm5, %%xmm6\n" + "addl $-32, %%edi\n" + "movups %%xmm6, -16(%%eax)\n" + "decl %%ecx\n" + "jnz .first_128_samples\n" + + "movl 8(%%ebp), %%esi\n" /* buf[0].re */ + "leal 1020(%%esi), %%edi\n" /* buf[127].im */ + "movl $16, %%ecx\n" /* loop count */ + +".second_128_samples:\n" + "movss (%%esi), %%xmm0\n" /* buf[i].re */ + "movss 8(%%esi), %%xmm2\n" /* re1 */ + "movss (%%edi), %%xmm1\n" /* buf[127-i].im */ + "movss -8(%%edi), %%xmm3\n" /* im1 */ + + "movlhps %%xmm2, %%xmm0\n" /* 0.0 | re1 | 0.0 | re0 */ + "movlhps %%xmm3, %%xmm1\n" /* 0.0 | im1 | 0.0 | im1 */ + + "movups (%%edx), %%xmm4\n" /* w3 | w2 | w1 | w0 */ + "movups (%%ebx), %%xmm5\n" /* d3 | d2 | d1 | d0 */ + + "shufps $0xb1, %%xmm1, %%xmm1\n"/* im1 | 0.0 | im0 | 0.0 */ + "movss 16(%%esi), %%xmm6\n" /* re2 */ + "movss 24(%%esi), %%xmm7\n" /* re3 */ + "movss -16(%%edi), %%xmm2\n" /* im2 */ + "movss -24(%%edi), %%xmm3\n" /* im3 */ + "subps %%xmm1, %%xmm0\n" /* -im1 | re1 | -im0 | re0 */ + "movlhps %%xmm7, %%xmm6\n" /* 0.0 | re3 | 0.0 | re2 */ + "movlhps %%xmm3, %%xmm2\n" /* 0.0 | im3 | 0.0 | im2 */ + "mulps %%xmm4, %%xmm0\n" + "shufps $0xb1, %%xmm2, %%xmm2\n"/* im3 | 0.0 | im2 | 0.0 */ + "movups 16(%%edx), %%xmm4\n" /* w7 | w6 | w5 | w4 */ + "addl $32, %%esi\n" + "subps %%xmm2, %%xmm6\n" /* -im3 | re3 | -im2 | re2 */ + "addps %%xmm5, %%xmm0\n" + "mulps %%xmm4, %%xmm6\n" + "addl $-32, %%edi\n" + "movups 16(%%ebx), %%xmm5\n" /* d7 | d6 | d5 | d4 */ + "movups %%xmm0, (%%eax)\n" + "addps %%xmm5, %%xmm6\n" + "addl $32, %%edx\n" + "addl $32, %%eax\n" + "addl $32, %%ebx\n" + "movups %%xmm6, -16(%%eax)\n" + "decl %%ecx\n" + "jnz .second_128_samples\n" + + "movl 8(%%ebp), %%eax\n" + "leal 512(%%eax), %%esi\n" /* buf[64].re */ + "leal 508(%%eax), %%edi\n" /* buf[63].im */ + "movl $16, %%ecx\n" /* loop count */ + "movl 20(%%ebp), %%eax\n" /* delay */ + +".first_128_delay:\n" + "movss (%%esi), %%xmm0\n" + "movss 8(%%esi), %%xmm2\n" + "movss (%%edi), %%xmm1\n" + "movss -8(%%edi), %%xmm3\n" + + "movlhps %%xmm2, %%xmm0\n" /* 0.0 | re1 | 0.0 | re0 */ + "movlhps %%xmm3, %%xmm1\n" /* 0.0 | im1 | 0.0 | im0 */ + + "movups -16(%%edx), %%xmm4\n" /* w3 | w2 | w1 | w0 */ + "shufps $0xb1, %%xmm1, %%xmm1\n"/* im1 | 0.0 | im0 | 0.0 */ + "movss 16(%%esi), %%xmm6\n" /* re2 */ + "movss 24(%%esi), %%xmm7\n" /* re3 */ + "movss -16(%%edi), %%xmm2\n" /* im2 */ + "movss -24(%%edi), %%xmm3\n" /* im3 */ + "subps %%xmm1, %%xmm0\n" /* -im1 | re1 | -im0 | re0 */ + "addl $-32, %%edx\n" + "movlhps %%xmm7, %%xmm6\n" /* 0.0 | re3 | 0.0 | re2 */ + "movlhps %%xmm3, %%xmm2\n" /* 0.0 | im3 | 0.0 | im2 */ + "mulps %%xmm4, %%xmm0\n" + "movups (%%edx), %%xmm5\n" /* w7 | w6 | w5 | w4 */ + "shufps $0xb1, %%xmm2, %%xmm2\n"/* im3 | 0.0 | im2 | 0.0 */ + "movups %%xmm0, (%%eax)\n" + "addl $32, %%esi\n" + "subps %%xmm2, %%xmm6\n" /* -im3 | re3 | -im2 | re2 */ + "addl $-32, %%edi\n" + "mulps %%xmm5, %%xmm6\n" + "addl $32, %%eax\n" + "movups %%xmm6, -16(%%eax)\n" + "decl %%ecx\n" + "jnz .first_128_delay\n" + + "movl 8(%%ebp), %%ebx\n" + "leal 4(%%ebx), %%esi\n" /* buf[0].im */ + "leal 1016(%%ebx), %%edi\n" /* buf[127].re */ + "movl $16, %%ecx\n" /* loop count */ + +".second_128_delay:\n" + "movss (%%esi), %%xmm0\n" + "movss 8(%%esi), %%xmm2\n" + "movss (%%edi), %%xmm1\n" + "movss -8(%%edi), %%xmm3\n" + + "movlhps %%xmm2, %%xmm0\n" /* 0.0 | im1 | 0.0 | im0 */ + "movlhps %%xmm3, %%xmm1\n" /* 0.0 | re1 | 0.0 | re0 */ + + "movups -16(%%edx), %%xmm4\n" /* w3 | w2 | w1 | w0 */ + "shufps $0xb1, %%xmm1, %%xmm1\n"/* re1 | 0.0 | re0 | 0.0 */ + "movss 16(%%esi), %%xmm6\n" /* im2 */ + "movss 24(%%esi), %%xmm7\n" /* im3 */ + "movss -16(%%edi), %%xmm2\n" /* re2 */ + "movss -24(%%edi), %%xmm3\n" /* re3 */ + "subps %%xmm0, %%xmm1\n" /* re1 | -im1 | re0 | -im0 */ + "addl $-32, %%edx\n" + "movlhps %%xmm7, %%xmm6\n" /* 0.0 | im3 | 0.0 | im2 */ + "movlhps %%xmm3, %%xmm2\n" /* 0.0 | re3 | 0.0 | re2 */ + "mulps %%xmm4, %%xmm1\n" + "movups (%%edx), %%xmm5\n" /* w7 | w6 | w5 | w4 */ + "shufps $0xb1, %%xmm2, %%xmm2\n"/* re3 | 0.0 | re2 | 0.0 */ + "movups %%xmm1, (%%eax)\n" + "addl $32, %%esi\n" + "subps %%xmm6, %%xmm2\n" /* re | -im3 | re | -im2 */ + "addl $-32, %%edi\n" + "mulps %%xmm5, %%xmm2\n" + "addl $32, %%eax\n" + "movups %%xmm2, -16(%%eax)\n" + "decl %%ecx\n" + "jnz .second_128_delay\n" + + "popl %%edi\n" + "popl %%esi\n" + "popl %%edx\n" + "popl %%ecx\n" + "popl %%ebx\n" + "popl %%eax\n" + + "leave\n" + ::); +} + +static void imdct512_window_delay_nol_sse (complex_t *buf, float *data_ptr, const float *window_prt, float *delay_prt) +{ + __asm__ __volatile__ ( + "pushl %%ebp\n" + "movl %%esp, %%ebp\n" + + "pushl %%eax\n" + "pushl %%ebx\n" + "pushl %%ecx\n" + "pushl %%edx\n" + "pushl %%esi\n" + "pushl %%edi\n" + + /* movl 20(%%ebp), %%ebx delay */ + "movl 16(%%ebp), %%edx\n" /* window */ + + "movl 8(%%ebp), %%eax\n" /* buf */ + "movl $16, %%ecx\n" /* loop count */ + "leal 516(%%eax), %%esi\n" /* buf[64].im */ + "leal 504(%%eax), %%edi\n" /* buf[63].re */ + "movl 12(%%ebp), %%eax\n" /* data */ + +".first_128_sample:\n" + "movss (%%esi), %%xmm0\n" + "movss 8(%%esi), %%xmm2\n" + "movss (%%edi), %%xmm1\n" + "movss -8(%%edi), %%xmm3\n" + + "movlhps %%xmm2, %%xmm0\n" /* 0.0 | im1 | 0.0 | im0 */ + "movlhps %%xmm3, %%xmm1\n" /* 0.0 | re1 | 0.0 | re0 */ + + "movups (%%edx), %%xmm4\n" /* w3 | w2 | w1 | w0 */ + /* movups (%%ebx), %%xmm5 d3 | d2 | d1 | d0 */ + "shufps $0xb1, %%xmm1, %%xmm1\n"/* re1 | 0.0 | re0 | 0.0 */ + + "movss 16(%%esi), %%xmm6\n" /* im2 */ + "movss 24(%%esi), %%xmm7\n" /* im3 */ + "subps %%xmm1, %%xmm0\n" /* -re1 | im1 | -re0 | im0 */ + "movss -16(%%edi), %%xmm2\n" /* re2 */ + "movss -24(%%edi), %%xmm3\n" /* re3 */ + "mulps %%xmm4, %%xmm0\n" + "movlhps %%xmm7, %%xmm6\n" /* 0.0 | im3 | 0.0 | im2 */ + "movlhps %%xmm3, %%xmm2\n" /* 0.0 | re3 | 0.0 | re2 */ + /* addps %%xmm5, %%xmm0 */ + "shufps $0xb1, %%xmm2, %%xmm2\n"/* re3 | 0.0 | re2 | 0.0 */ + "movups 16(%%edx), %%xmm4\n" /* w7 | w6 | w5 | w4 */ + /* movups 16(%%ebx), %%xmm5 d7 | d6 | d5 | d4 */ + "subps %%xmm2, %%xmm6\n" /* -re3 | im3 | -re2 | im2 */ + "addl $32, %%edx\n" + "movups %%xmm0, (%%eax)\n" + /* addl $32, %%ebx */ + "mulps %%xmm4, %%xmm6\n" + "addl $32, %%esi\n" + "addl $32, %%eax\n" + /* addps %%xmm5, %%xmm6 */ + "addl $-32, %%edi\n" + "movups %%xmm6, -16(%%eax)\n" + "decl %%ecx\n" + "jnz .first_128_sample\n" + + "movl 8(%%ebp), %%esi\n" /* buf[0].re */ + "leal 1020(%%esi), %%edi\n" /* buf[127].im */ + "movl $16, %%ecx\n" /* loop count */ + +".second_128_sample:\n" + "movss (%%esi), %%xmm0\n" /* buf[i].re */ + "movss 8(%%esi), %%xmm2\n" /* re1 */ + "movss (%%edi), %%xmm1\n" /* buf[127-i].im */ + "movss -8(%%edi), %%xmm3\n" /* im1 */ + + "movlhps %%xmm2, %%xmm0\n" /* 0.0 | re1 | 0.0 | re0 */ + "movlhps %%xmm3, %%xmm1\n" /* 0.0 | im1 | 0.0 | im1 */ + + "movups (%%edx), %%xmm4\n" /* w3 | w2 | w1 | w0 */ + /* movups (%%ebx), %%xmm5 d3 | d2 | d1 | d0 */ + + "shufps $0xb1, %%xmm1, %%xmm1\n"/* im1 | 0.0 | im0 | 0.0 */ + "movss 16(%%esi), %%xmm6\n" /* re2 */ + "movss 24(%%esi), %%xmm7\n" /* re3 */ + "movss -16(%%edi), %%xmm2\n" /* im2 */ + "movss -24(%%edi), %%xmm3\n" /* im3 */ + "subps %%xmm1, %%xmm0\n" /* -im1 | re1 | -im0 | re0 */ + "movlhps %%xmm7, %%xmm6\n" /* 0.0 | re3 | 0.0 | re2 */ + "movlhps %%xmm3, %%xmm2\n" /* 0.0 | im3 | 0.0 | im2 */ + "mulps %%xmm4, %%xmm0\n" + "shufps $0xb1, %%xmm2, %%xmm2\n"/* im3 | 0.0 | im2 | 0.0 */ + "movups 16(%%edx), %%xmm4\n" /* w7 | w6 | w5 | w4 */ + "addl $32, %%esi\n" + "subps %%xmm2, %%xmm6\n" /* -im3 | re3 | -im2 | re2 */ + /* addps %%xmm5, %%xmm0 */ + "mulps %%xmm4, %%xmm6\n" + "addl $-32, %%edi\n" + /* movups 16(%%ebx), %%xmm5 d7 | d6 | d5 | d4 */ + "movups %%xmm0, (%%eax)\n" + /* addps %%xmm5, %%xmm6 */ + "addl $32, %%edx\n" + "addl $32, %%eax\n" + /* addl $32, %%ebx */ + "movups %%xmm6, -16(%%eax)\n" + "decl %%ecx\n" + "jnz .second_128_sample\n" + + "movl 8(%%ebp), %%eax\n" + "leal 512(%%eax), %%esi\n" /* buf[64].re */ + "leal 508(%%eax), %%edi\n" /* buf[63].im */ + "movl $16, %%ecx\n" /* loop count */ + "movl 20(%%ebp), %%eax\n" /* delay */ + +".first_128_delays:\n" + "movss (%%esi), %%xmm0\n" + "movss 8(%%esi), %%xmm2\n" + "movss (%%edi), %%xmm1\n" + "movss -8(%%edi), %%xmm3\n" + + "movlhps %%xmm2, %%xmm0\n" /* 0.0 | re1 | 0.0 | re0 */ + "movlhps %%xmm3, %%xmm1\n" /* 0.0 | im1 | 0.0 | im0 */ + + "movups -16(%%edx), %%xmm4\n" /* w3 | w2 | w1 | w0 */ + "shufps $0xb1, %%xmm1, %%xmm1\n"/* im1 | 0.0 | im0 | 0.0 */ + "movss 16(%%esi), %%xmm6\n" /* re2 */ + "movss 24(%%esi), %%xmm7\n" /* re3 */ + "movss -16(%%edi), %%xmm2\n" /* im2 */ + "movss -24(%%edi), %%xmm3\n" /* im3 */ + "subps %%xmm1, %%xmm0\n" /* -im1 | re1 | -im0 | re0 */ + "addl $-32, %%edx\n" + "movlhps %%xmm7, %%xmm6\n" /* 0.0 | re3 | 0.0 | re2 */ + "movlhps %%xmm3, %%xmm2\n" /* 0.0 | im3 | 0.0 | im2 */ + "mulps %%xmm4, %%xmm0\n" + "movups (%%edx), %%xmm5\n" /* w7 | w6 | w5 | w4 */ + "shufps $0xb1, %%xmm2, %%xmm2\n"/* im3 | 0.0 | im2 | 0.0 */ + "movups %%xmm0, (%%eax)\n" + "addl $32, %%esi\n" + "subps %%xmm2, %%xmm6\n" /* -im3 | re3 | -im2 | re2 */ + "addl $-32, %%edi\n" + "mulps %%xmm5, %%xmm6\n" + "addl $32, %%eax\n" + "movups %%xmm6, -16(%%eax)\n" + "decl %%ecx\n" + "jnz .first_128_delays\n" + + "movl 8(%%ebp), %%ebx\n" + "leal 4(%%ebx), %%esi\n" /* buf[0].im */ + "leal 1016(%%ebx), %%edi\n" /* buf[127].re */ + "movl $16, %%ecx\n" /* loop count */ + +".second_128_delays:\n" + "movss (%%esi), %%xmm0\n" + "movss 8(%%esi), %%xmm2\n" + "movss (%%edi), %%xmm1\n" + "movss -8(%%edi), %%xmm3\n" + + "movlhps %%xmm2, %%xmm0\n" /* 0.0 | im1 | 0.0 | im0 */ + "movlhps %%xmm3, %%xmm1\n" /* 0.0 | re1 | 0.0 | re0 */ + + "movups -16(%%edx), %%xmm4\n" /* w3 | w2 | w1 | w0 */ + "shufps $0xb1, %%xmm1, %%xmm1\n"/* re1 | 0.0 | re0 | 0.0 */ + "movss 16(%%esi), %%xmm6\n" /* im2 */ + "movss 24(%%esi), %%xmm7\n" /* im3 */ + "movss -16(%%edi), %%xmm2\n" /* re2 */ + "movss -24(%%edi), %%xmm3\n" /* re3 */ + "subps %%xmm0, %%xmm1\n" /* re1 | -im1 | re0 | -im0 */ + "addl $-32, %%edx\n" + "movlhps %%xmm7, %%xmm6\n" /* 0.0 | im3 | 0.0 | im2 */ + "movlhps %%xmm3, %%xmm2\n" /* 0.0 | re3 | 0.0 | re2 */ + "mulps %%xmm4, %%xmm1\n" + "movups (%%edx), %%xmm5\n" /* w7 | w6 | w5 | w4 */ + "shufps $0xb1, %%xmm2, %%xmm2\n"/* re3 | 0.0 | re2 | 0.0 */ + "movups %%xmm1, (%%eax)\n" + "addl $32, %%esi\n" + "subps %%xmm6, %%xmm2\n" /* re | -im3 | re | -im2 */ + "addl $-32, %%edi\n" + "mulps %%xmm5, %%xmm2\n" + "addl $32, %%eax\n" + "movups %%xmm2, -16(%%eax)\n" + "decl %%ecx\n" + "jnz .second_128_delays\n" + + "popl %%edi\n" + "popl %%esi\n" + "popl %%edx\n" + "popl %%ecx\n" + "popl %%ebx\n" + "popl %%eax\n" + + "leave\n" + ::); +} diff --git a/src/ac3_decoder/ac3_srfft.h b/plugins/imdct/ac3_srfft.h similarity index 99% rename from src/ac3_decoder/ac3_srfft.h rename to plugins/imdct/ac3_srfft.h index c068b4dff5..27a2511676 100644 --- a/src/ac3_decoder/ac3_srfft.h +++ b/plugins/imdct/ac3_srfft.h @@ -1,8 +1,8 @@ /***************************************************************************** - * ac3_srfft.h: ac3 FFT + * ac3_srfft.h: ac3 FFT tables ***************************************************************************** * Copyright (C) 1999, 2000 VideoLAN - * $Id: ac3_srfft.h,v 1.3 2001/05/14 15:58:04 reno Exp $ + * $Id: ac3_srfft.h,v 1.1 2001/05/15 16:19:42 sam Exp $ * * Authors: Renaud Dartus * Aaron Holtzman @@ -289,3 +289,4 @@ static const complex_t delta128_3[32] = a_i += v_i; \ A13.imag = a_i; \ } + diff --git a/src/ac3_decoder/ac3_srfft.c b/plugins/imdct/ac3_srfft_c.c similarity index 93% rename from src/ac3_decoder/ac3_srfft.c rename to plugins/imdct/ac3_srfft_c.c index ee165ffd1c..d3fdc58c70 100644 --- a/src/ac3_decoder/ac3_srfft.c +++ b/plugins/imdct/ac3_srfft_c.c @@ -1,8 +1,8 @@ /***************************************************************************** - * ac3_srfft.c: ac3 FFT + * ac3_srfft.c: ac3 FFT in C ***************************************************************************** * Copyright (C) 1999, 2000 VideoLAN - * $Id: ac3_srfft.c,v 1.4 2001/05/14 15:58:04 reno Exp $ + * $Id: ac3_srfft_c.c,v 1.1 2001/05/15 16:19:42 sam Exp $ * * Authors: Renaud Dartus * Aaron Holtzman @@ -22,6 +22,12 @@ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA. *****************************************************************************/ +#define MODULE_NAME imdct +#include "modules_inner.h" + +/***************************************************************************** + * Preamble + *****************************************************************************/ #include "defs.h" #include /* memcpy() */ @@ -34,10 +40,7 @@ #include "threads.h" #include "mtime.h" -#include "stream_control.h" -#include "input_ext-dec.h" - -#include "ac3_decoder.h" +#include "ac3_imdct.h" #include "ac3_srfft.h" static void fft_8 (complex_t *x); @@ -206,7 +209,7 @@ static void fft_8 (complex_t *x) static void fft_asmb(int k, complex_t *x, complex_t *wTB, - const complex_t *d, const complex_t *d_3) + const complex_t *d, const complex_t *d_3) { register complex_t *x2k, *x3k, *x4k, *wB; register float a_r, a_i, a1_r, a1_i, u_r, u_i, v_r, v_i; @@ -256,7 +259,7 @@ static void fft_asmb16(complex_t *x, complex_t *wTB) } -void fft_64p_c (complex_t *a) +void _M( fft_64p ) ( complex_t *a ) { fft_8(&a[0]); fft_4(&a[8]); fft_4(&a[12]); fft_asmb16(&a[0], &a[8]); @@ -274,7 +277,7 @@ void fft_64p_c (complex_t *a) } -void fft_128p_c (complex_t *a) +void _M( fft_128p ) ( complex_t *a ) { fft_8(&a[0]); fft_4(&a[8]); fft_4(&a[12]); fft_asmb16(&a[0], &a[8]); @@ -310,3 +313,4 @@ void fft_128p_c (complex_t *a) /* fft_128(&a[0]); */ fft_asmb(16, &a[0], &a[64], &delta128[0], &delta128_3[0]); } + diff --git a/plugins/imdct/ac3_srfft_sse.c b/plugins/imdct/ac3_srfft_sse.c new file mode 100644 index 0000000000..2de563b57b --- /dev/null +++ b/plugins/imdct/ac3_srfft_sse.c @@ -0,0 +1,372 @@ +/***************************************************************************** + * ac3_srfft_sse.c: accelerated SSE ac3 fft functions + ***************************************************************************** + * Copyright (C) 1999, 2000, 2001 VideoLAN + * $Id: ac3_srfft_sse.c,v 1.1 2001/05/15 16:19:42 sam Exp $ + * + * Authors: Renaud Dartus + * Aaron Holtzman + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA. + *****************************************************************************/ + +#define MODULE_NAME imdctsse +#include "modules_inner.h" + +/***************************************************************************** + * Preamble + *****************************************************************************/ +#include + +#include "defs.h" + +#include +#include + +#include "config.h" +#include "common.h" +#include "threads.h" +#include "mtime.h" + +#include "ac3_imdct.h" +#include "ac3_srfft.h" + +void hsqrt2 (void); +void C_1 (void); +static void fft_4_sse (complex_t *x); +static void fft_8_sse (complex_t *x); +static void fft_asmb_sse (int k, complex_t *x, complex_t *wTB, + const complex_t *d, const complex_t *d_3); + +void _M( fft_64p ) ( complex_t *a ) +{ + fft_8_sse(&a[0]); fft_4_sse(&a[8]); fft_4_sse(&a[12]); + fft_asmb_sse(2, &a[0], &a[8], &delta16[0], &delta16_3[0]); + + fft_8_sse(&a[16]), fft_8_sse(&a[24]); + fft_asmb_sse(4, &a[0], &a[16],&delta32[0], &delta32_3[0]); + + fft_8_sse(&a[32]); fft_4_sse(&a[40]); fft_4_sse(&a[44]); + fft_asmb_sse(2, &a[32], &a[40], &delta16[0], &delta16_3[0]); + + fft_8_sse(&a[48]); fft_4_sse(&a[56]); fft_4_sse(&a[60]); + fft_asmb_sse(2, &a[48], &a[56], &delta16[0], &delta16_3[0]); + + fft_asmb_sse(8, &a[0], &a[32],&delta64[0], &delta64_3[0]); +} + +void _M( fft_128p ) ( complex_t *a ) +{ + fft_8_sse(&a[0]); fft_4_sse(&a[8]); fft_4_sse(&a[12]); + fft_asmb_sse(2, &a[0], &a[8], &delta16[0], &delta16_3[0]); + + fft_8_sse(&a[16]), fft_8_sse(&a[24]); + fft_asmb_sse(4, &a[0], &a[16],&delta32[0], &delta32_3[0]); + + fft_8_sse(&a[32]); fft_4_sse(&a[40]); fft_4_sse(&a[44]); + fft_asmb_sse(2, &a[32], &a[40], &delta16[0], &delta16_3[0]); + + fft_8_sse(&a[48]); fft_4_sse(&a[56]); fft_4_sse(&a[60]); + fft_asmb_sse(2, &a[48], &a[56], &delta16[0], &delta16_3[0]); + + fft_asmb_sse(8, &a[0], &a[32],&delta64[0], &delta64_3[0]); + + fft_8_sse(&a[64]); fft_4_sse(&a[72]); fft_4_sse(&a[76]); + /* fft_16(&a[64]); */ + fft_asmb_sse(2, &a[64], &a[72], &delta16[0], &delta16_3[0]); + + fft_8_sse(&a[80]); fft_8_sse(&a[88]); + + /* fft_32(&a[64]); */ + fft_asmb_sse(4, &a[64], &a[80],&delta32[0], &delta32_3[0]); + + fft_8_sse(&a[96]); fft_4_sse(&a[104]), fft_4_sse(&a[108]); + /* fft_16(&a[96]); */ + fft_asmb_sse(2, &a[96], &a[104], &delta16[0], &delta16_3[0]); + + fft_8_sse(&a[112]), fft_8_sse(&a[120]); + /* fft_32(&a[96]); */ + fft_asmb_sse(4, &a[96], &a[112], &delta32[0], &delta32_3[0]); + + /* fft_128(&a[0]); */ + fft_asmb_sse(16, &a[0], &a[64], &delta128[0], &delta128_3[0]); +} + +void hsqrt2 (void) +{ + __asm__ ( + ".float 0f0.707106781188\n" + ".float 0f0.707106781188\n" + ".float 0f-0.707106781188\n" + ".float 0f-0.707106781188\n" + ); +} + +void C_1 (void) +{ + __asm__ ( + ".float 0f-1.0\n" + ".float 0f1.0\n" + ".float 0f-1.0\n" + ".float 0f1.0\n" + ); +} + +static void fft_4_sse (complex_t *x) +{ + __asm__ __volatile__ ( + "movups (%%eax), %%xmm0\n" /* x[1] | x[0] */ + "movups 16(%%eax), %%xmm2\n" /* x[3] | x[2] */ + "movups %%xmm0, %%xmm1\n" /* x[1] | x[0] */ + "addps %%xmm2, %%xmm0\n" /* x[1] + x[3] | x[0] + x[2] */ + "subps %%xmm2, %%xmm1\n" /* x[1] - x[3] | x[0] - x[2] */ + "xorps %%xmm6, %%xmm6\n" + "movhlps %%xmm1, %%xmm4\n" /* ? | x[1] - x[3] */ + "movhlps %%xmm0, %%xmm3\n" /* ? | x[1] + x[3] */ + "subss %%xmm4, %%xmm6\n" /* 0 | -(x[1] - x[3]).re */ + "movlhps %%xmm1, %%xmm0\n" /* x[0] - x[2] | x[0] + x[2] */ + "movlhps %%xmm6, %%xmm4\n" /* 0 | -(x[1] - x[3]).re | (x[1] - x[3]).im | (x[3]-x[1]).re */ + "movups %%xmm0, %%xmm2\n" /* x[0] - x[2] | x[0] + x[2] */ + "shufps $0x94, %%xmm4, %%xmm3\n" /* i*(x[1] - x[3]) | x[1] + x[3] */ + "addps %%xmm3, %%xmm0\n" + "subps %%xmm3, %%xmm2\n" + "movups %%xmm0, (%%eax)\n" + "movups %%xmm2, 16(%%eax)\n" + : "=a" (x) + : "a" (x) ); +} + +static void fft_8_sse (complex_t *x) +{ + __asm__ __volatile__ ( + "pushl %%ebx\n" + + "movlps (%%eax), %%xmm0\n" /* x[0] */ + "movlps 32(%%eax), %%xmm1\n" /* x[4] */ + "movhps 16(%%eax), %%xmm0\n" /* x[2] | x[0] */ + "movhps 48(%%eax), %%xmm1\n" /* x[6] | x[4] */ + "movups %%xmm0, %%xmm2\n" /* x[2] | x[0] */ + "xorps %%xmm3, %%xmm3\n" + "addps %%xmm1, %%xmm0\n" /* x[2] + x[6] | x[0] + x[4] */ + "subps %%xmm1, %%xmm2\n" /* x[2] - x[6] | x[0] - x[4] */ + "movhlps %%xmm0, %%xmm5\n" /* x[2] + x[6] */ + "movhlps %%xmm2, %%xmm4\n" /* x[2] - x[6] */ + "movlhps %%xmm2, %%xmm0\n" /* x[0] - x[4] | x[0] + x[4] */ + "subss %%xmm4, %%xmm3\n" /* (x[2]-x[6]).im | -(x[2]-x[6]).re */ + "movups %%xmm0, %%xmm7\n" /* x[0] - x[4] | x[0] + x[4] */ + "movups %%xmm3, %%xmm4\n" /* (x[2]-x[6]).im | -(x[2]-x[6]).re */ + "movlps 8(%%eax), %%xmm1\n" /* x[1] */ + "shufps $0x14, %%xmm4, %%xmm5\n" /* i*(x[2] - x[6]) | x[2] + x[6] */ + + "addps %%xmm5, %%xmm0\n" /* yt = i*(x2-x6)+x0-x4 | x2+x6+x0+x4 */ + "subps %%xmm5, %%xmm7\n" /* yb = i*(x6-x2)+x0-x4 | -x6-x2+x0+x4 */ + + "movhps 24(%%eax), %%xmm1\n" /* x[3] | x[1] */ + "movl $hsqrt2, %%ebx\n" + "movlps 40(%%eax), %%xmm2\n" /* x[5] */ + "movhps 56(%%eax), %%xmm2\n" /* x[7] | x[5] */ + "movups %%xmm1, %%xmm3\n" /* x[3] | x[1] */ + "addps %%xmm2, %%xmm1\n" /* x[3] + x[7] | x[1] + x[5] */ + "subps %%xmm2, %%xmm3\n" /* x[3] - x[7] | x[1] - x[5] */ + "movups (%%ebx), %%xmm4\n" /* -1/sqrt2 | -1/sqrt2 | 1/sqrt2 | 1/sqrt2 */ + "movups %%xmm3, %%xmm6\n" /* x[3] - x[7] | x[1] - x[5] */ + "mulps %%xmm4, %%xmm3\n" /* -1/s2*(x[3] - x[7]) | 1/s2*(x[1] - x[5]) */ + "shufps $0xc8, %%xmm4, %%xmm4\n" /* -1/sqrt2 | 1/sqrt2 | -1/sqrt2 | 1/sqrt2 */ + "shufps $0xb1, %%xmm6, %%xmm6\n" /* (x3-x7).re|(x3-x7).im|(x1-x5).re|(x1-x5).im */ + "mulps %%xmm4, %%xmm6\n" /* (x7-x3).re/s2|(x3-x7).im/s2|(x5-x1).re/s2|(x1-x5).im/s2 */ + "addps %%xmm3, %%xmm6\n" /* (-1-i)/sqrt2 * (x[3]-x[7]) | (1-i)/sqrt2 * (x[1] - x[5]) */ + "movhlps %%xmm1, %%xmm5\n" /* x[3] + x[7] */ + "movlhps %%xmm6, %%xmm1\n" /* (1+i)/sqrt2 * (x[1]-x[5]) | x[1]+x[5] */ + "shufps $0xe4, %%xmm6, %%xmm5\n" /* (-1-i)/sqrt2 * (x[3]-x[7]) | x[3]+x[7] */ + "movups %%xmm1, %%xmm3\n" /* (1-i)/sqrt2 * (x[1]-x[5]) | x[1]+x[5] */ + "movl $C_1, %%ebx\n" + "addps %%xmm5, %%xmm1\n" /* u */ + "subps %%xmm5, %%xmm3\n" /* v */ + "movups %%xmm0, %%xmm2\n" /* yb */ + "movups %%xmm7, %%xmm4\n" /* yt */ + "movups (%%ebx), %%xmm5\n" + "mulps %%xmm5, %%xmm3\n" + "addps %%xmm1, %%xmm0\n" /* yt + u */ + "subps %%xmm1, %%xmm2\n" /* yt - u */ + "shufps $0xb1, %%xmm3, %%xmm3\n" /* -i * v */ + "movups %%xmm0, (%%eax)\n" + "movups %%xmm2, 32(%%eax)\n" + "addps %%xmm3, %%xmm4\n" /* yb - i*v */ + "subps %%xmm3, %%xmm7\n" /* yb + i*v */ + "movups %%xmm4, 16(%%eax)\n" + "movups %%xmm7, 48(%%eax)\n" + + "popl %%ebx\n" + : "=a" (x) + : "a" (x)); +} + + +static void fft_asmb_sse (int k, complex_t *x, complex_t *wTB, + const complex_t *d, const complex_t *d_3) +{ + __asm__ __volatile__ ( + "pushl %%ebp\n" + "movl %%esp, %%ebp\n" + + "subl $4, %%esp\n" + + "pushl %%eax\n" + "pushl %%ebx\n" + "pushl %%ecx\n" + "pushl %%edx\n" + "pushl %%esi\n" + "pushl %%edi\n" + + "movl 8(%%ebp), %%ecx\n" /* k */ + "movl 12(%%ebp), %%eax\n" /* x */ + "movl %%ecx, -4(%%ebp)\n" /* k */ + "movl 16(%%ebp), %%ebx\n" /* wT */ + "movl 20(%%ebp), %%edx\n" /* d */ + "movl 24(%%ebp), %%esi\n" /* d3 */ + "shll $4, %%ecx\n" /* 16k */ + "addl $8, %%edx\n" + "leal (%%eax, %%ecx, 2), %%edi\n" + "addl $8, %%esi\n" + + /* TRANSZERO and TRANS */ + "movups (%%eax), %%xmm0\n" /* x[1] | x[0] */ + "movups (%%ebx), %%xmm1\n" /* wT[1] | wT[0] */ + "movups (%%ebx, %%ecx), %%xmm2\n" /* wB[1] | wB[0] */ + "movlps (%%edx), %%xmm3\n" /* d */ + "movlps (%%esi), %%xmm4\n" /* d3 */ + "movhlps %%xmm1, %%xmm5\n" /* wT[1] */ + "movhlps %%xmm2, %%xmm6\n" /* wB[1] */ + "shufps $0x50, %%xmm3, %%xmm3\n" /* d[1].im | d[1].im | d[1].re | d[1].re */ + "shufps $0x50, %%xmm4, %%xmm4\n" /* d3[1].im | d3[1].im | d3[i].re | d3[i].re */ + "movlhps %%xmm5, %%xmm5\n" /* wT[1] | wT[1] */ + "movlhps %%xmm6, %%xmm6\n" /* wB[1] | wB[1] */ + "mulps %%xmm3, %%xmm5\n" + "mulps %%xmm4, %%xmm6\n" + "movhlps %%xmm5, %%xmm7\n" /* wT[1].im * d[1].im | wT[1].re * d[1].im */ + "movlhps %%xmm6, %%xmm5\n" /* wB[1].im * d3[1].re | wB[1].re * d3[1].re | wT[1].im * d[1].re | wT[1].re * d[1].re */ + "shufps $0xb1, %%xmm6, %%xmm7\n" /* wB[1].re * d3[1].im | wB[i].im * d3[1].im | wT[1].re * d[1].im | wT[1].im * d[1].im */ + "movl $C_1, %%edi\n" + "movups (%%edi), %%xmm4\n" + "mulps %%xmm4, %%xmm7\n" + "addps %%xmm7, %%xmm5\n" /* wB[1] * d3[1] | wT[1] * d[1] */ + "movlhps %%xmm5, %%xmm1\n" /* d[1] * wT[1] | wT[0] */ + "shufps $0xe4, %%xmm5, %%xmm2\n" /* d3[1] * wB[1] | wB[0] */ + "movups %%xmm1, %%xmm3\n" /* d[1] * wT[1] | wT[0] */ + "leal (%%eax, %%ecx, 2), %%edi\n" + "addps %%xmm2, %%xmm1\n" /* u */ + "subps %%xmm2, %%xmm3\n" /* v */ + "mulps %%xmm4, %%xmm3\n" + "movups (%%eax, %%ecx), %%xmm5\n" /* xk[1] | xk[0] */ + "shufps $0xb1, %%xmm3, %%xmm3\n" /* -i * v */ + "movups %%xmm0, %%xmm2\n" /* x[1] | x[0] */ + "movups %%xmm5, %%xmm6\n" /* xk[1] | xk[0] */ + "addps %%xmm1, %%xmm0\n" + "subps %%xmm1, %%xmm2\n" + "addps %%xmm3, %%xmm5\n" + "subps %%xmm3, %%xmm6\n" + "movups %%xmm0, (%%eax)\n" + "movups %%xmm2, (%%edi)\n" + "movups %%xmm5, (%%eax, %%ecx)\n" + "movups %%xmm6, (%%edi, %%ecx)\n" + "addl $16, %%eax\n" + "addl $16, %%ebx\n" + "addl $8, %%edx\n" + "addl $8, %%esi\n" + "decl -4(%%ebp)\n" + +".loop:\n" + "movups (%%ebx), %%xmm0\n" /* wT[1] | wT[0] */ + "movups (%%edx), %%xmm1\n" /* d[1] | d[0] */ + + "movups (%%ebx, %%ecx), %%xmm4\n" /* wB[1] | wB[0] */ + "movups (%%esi), %%xmm5\n" /* d3[1] | d3[0] */ + + "movhlps %%xmm0, %%xmm2\n" /* wT[1] */ + "movhlps %%xmm1, %%xmm3\n" /* d[1] */ + + "movhlps %%xmm4, %%xmm6\n" /* wB[1] */ + "movhlps %%xmm5, %%xmm7\n" /* d3[1] */ + + "shufps $0x50, %%xmm1, %%xmm1\n" /* d[0].im | d[0].im | d[0].re | d[0].re */ + "shufps $0x50, %%xmm3, %%xmm3\n" /* d[1].im | d[1].im | d[1].re | d[1].re */ + + "movlhps %%xmm0, %%xmm0\n" /* wT[0] | wT[0] */ + "shufps $0x50, %%xmm5, %%xmm5\n" /* d3[0].im | d3[0].im | d3[0].re | d3[0].re */ + "movlhps %%xmm2, %%xmm2\n" /* wT[1] | wT[1] */ + "shufps $0x50, %%xmm7, %%xmm7\n" /* d3[1].im | d3[1].im | d3[1].re | d3[1].re */ + + "mulps %%xmm1, %%xmm0\n" /* d[0].im * wT[0].im | d[0].im * wT[0].re | d[0].re * wT[0].im | d[0].re * wT[0].re */ + "mulps %%xmm3, %%xmm2\n" /* d[1].im * wT[1].im | d[1].im * wT[1].re | d[1].re * wT[1].im | d[1].re * wT[1].re */ + "movlhps %%xmm4, %%xmm4\n" /* wB[0] | wB[0] */ + "movlhps %%xmm6, %%xmm6\n" /* wB[1] | wB[1] */ + + "movhlps %%xmm0, %%xmm1\n" /* d[0].im * wT[0].im | d[0].im * wT[0].re */ + "movlhps %%xmm2, %%xmm0\n" /* d[1].re * wT[1].im | d[1].re * wT[1].re | d[0].re * wT[0].im | d[0].re * wT[0].re */ + "mulps %%xmm5, %%xmm4\n" /* wB[0].im * d3[0].im | wB[0].re * d3[0].im | wB[0].im * d3[0].re | wB[0].re * d3[0].re */ + "mulps %%xmm7, %%xmm6\n" /* wB[1].im * d3[1].im | wB[1].re * d3[1].im | wB[1].im * d3[1].re | wB[1].re * d3[1].re */ + "shufps $0xb1, %%xmm2, %%xmm1\n" /* d[1].im * wT[1].re | d[1].im * wT[1].im | d[0].im * wT[0].re | d[0].im * wT[0].im */ + "movl $C_1, %%edi\n" + "movups (%%edi), %%xmm3\n" /* 1.0 | -1.0 | 1.0 | -1.0 */ + + "movhlps %%xmm4, %%xmm5\n" /* wB[0].im * d3[0].im | wB[0].re * d3[0].im */ + "mulps %%xmm3, %%xmm1\n" /* d[1].im * wT[1].re | -d[1].im * wT[1].im | d[0].im * wT[0].re | -d[0].im * wT[0].im */ + "movlhps %%xmm6, %%xmm4\n" /* wB[1].im * d3[1].re | wB[1].re * d3[1].re | wB[0].im * d3[0].re | wB[0].im * d3[0].re */ + "addps %%xmm1, %%xmm0\n" /* wT[1] * d[1] | wT[0] * d[0] */ + + "shufps $0xb1, %%xmm6, %%xmm5\n" /* wB[1].re * d3[1].im | wB[1].im * d3[1].im | wB[0].re * d3[0].im | wB[0].im * d3[0].im */ + "mulps %%xmm3, %%xmm5\n" /* wB[1].re * d3[1].im | -wB[1].im * d3[1].im | wB[0].re * d3[0].im | -wB[0].im * d3[0].im */ + "addps %%xmm5, %%xmm4\n" /* wB[1] * d3[1] | wB[0] * d3[0] */ + + "movups %%xmm0, %%xmm1\n" /* wT[1] * d[1] | wT[0] * d[0] */ + "addps %%xmm4, %%xmm0\n" /* u */ + "subps %%xmm4, %%xmm1\n" /* v */ + "movups (%%eax), %%xmm6\n" /* x[1] | x[0] */ + "leal (%%eax, %%ecx, 2), %%edi\n" + "mulps %%xmm3, %%xmm1\n" + "addl $16, %%ebx\n" + "addl $16, %%esi\n" + "shufps $0xb1, %%xmm1, %%xmm1\n" /* -i * v */ + "movups (%%eax, %%ecx), %%xmm7\n" /* xk[1] | xk[0] */ + "movups %%xmm6, %%xmm2\n" + "movups %%xmm7, %%xmm4\n" + "addps %%xmm0, %%xmm6\n" + "subps %%xmm0, %%xmm2\n" + "movups %%xmm6, (%%eax)\n" + "movups %%xmm2, (%%edi)\n" + "addps %%xmm1, %%xmm7\n" + "subps %%xmm1, %%xmm4\n" + "addl $16, %%edx\n" + "movups %%xmm7, (%%eax, %%ecx)\n" + "movups %%xmm4, (%%edi, %%ecx)\n" + + "addl $16, %%eax\n" + "decl -4(%%ebp)\n" + "jnz .loop\n" + +".end:\n" + "popl %%edi\n" + "popl %%esi\n" + "popl %%edx\n" + "popl %%ecx\n" + "popl %%ebx\n" + "popl %%eax\n" + + "addl $4, %%esp\n" + + "leave\n" + ::); +} + diff --git a/plugins/imdct/imdct.c b/plugins/imdct/imdct.c new file mode 100644 index 0000000000..57424a0915 --- /dev/null +++ b/plugins/imdct/imdct.c @@ -0,0 +1,147 @@ +/***************************************************************************** + * imdct.c : IMDCT module + ***************************************************************************** + * Copyright (C) 1999, 2000 VideoLAN + * $Id: imdct.c,v 1.1 2001/05/15 16:19:42 sam Exp $ + * + * Authors: Gaël Hendryckx + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA. + *****************************************************************************/ + +#define MODULE_NAME imdct +#include "modules_inner.h" + +/***************************************************************************** + * Preamble + *****************************************************************************/ +#include "defs.h" + +#include + +#include "config.h" +#include "common.h" +#include "threads.h" +#include "mtime.h" +#include "tests.h" + +#include "ac3_imdct.h" +#include "ac3_imdct_common.h" + +#include "modules.h" + +/***************************************************************************** + * Local and extern prototypes. + *****************************************************************************/ +static void imdct_getfunctions( function_list_t * p_function_list ); +static int imdct_Probe ( probedata_t *p_data ); + +/***************************************************************************** + * Build configuration tree. + *****************************************************************************/ +MODULE_CONFIG_START +ADD_WINDOW( "Configuration for IMDCT module" ) + ADD_COMMENT( "Ha, ha -- nothing to configure yet" ) +MODULE_CONFIG_END + +/***************************************************************************** + * InitModule: get the module structure and configuration. + ***************************************************************************** + * We have to fill psz_name, psz_longname and psz_version. These variables + * will be strdup()ed later by the main application because the module can + * be unloaded later to save memory, and we want to be able to access this + * data even after the module has been unloaded. + *****************************************************************************/ +MODULE_INIT +{ + p_module->psz_name = MODULE_STRING; + p_module->psz_longname = "AC3 IMDCT module"; + p_module->psz_version = VERSION; + + p_module->i_capabilities = MODULE_CAPABILITY_NULL + | MODULE_CAPABILITY_IMDCT; + + return( 0 ); +} + +/***************************************************************************** + * ActivateModule: set the module to an usable state. + ***************************************************************************** + * This function fills the capability functions and the configuration + * structure. Once ActivateModule() has been called, the i_usage can + * be set to 0 and calls to NeedModule() be made to increment it. To unload + * the module, one has to wait until i_usage == 0 and call DeactivateModule(). + *****************************************************************************/ +MODULE_ACTIVATE +{ + p_module->p_functions = malloc( sizeof( module_functions_t ) ); + if( p_module->p_functions == NULL ) + { + return( -1 ); + } + + imdct_getfunctions( &p_module->p_functions->imdct ); + + p_module->p_config = p_config; + + return( 0 ); +} + +/***************************************************************************** + * DeactivateModule: make sure the module can be unloaded. + ***************************************************************************** + * This function must only be called when i_usage == 0. If it successfully + * returns, i_usage can be set to -1 and the module unloaded. Be careful to + * lock usage_lock during the whole process. + *****************************************************************************/ +MODULE_DEACTIVATE +{ + free( p_module->p_functions ); + + return( 0 ); +} + +/* Following functions are local */ + +/***************************************************************************** + * Functions exported as capabilities. They are declared as static so that + * we don't pollute the namespace too much. + *****************************************************************************/ +static void imdct_getfunctions( function_list_t * p_function_list ) +{ + p_function_list->pf_probe = imdct_Probe; +#define F p_function_list->functions.imdct + F.pf_imdct_init = _M( imdct_init ); + F.pf_imdct_256 = _M( imdct_do_256 ); + F.pf_imdct_256_nol = _M( imdct_do_256_nol ); + F.pf_imdct_512 = _M( imdct_do_512 ); + F.pf_imdct_512_nol = _M( imdct_do_512_nol ); +#undef F +} + +/***************************************************************************** + * imdct_Probe: returns a preference score + *****************************************************************************/ +static int imdct_Probe( probedata_t *p_data ) +{ + if( TestMethod( IMDCT_METHOD_VAR, "imdct" ) ) + { + return( 999 ); + } + + /* This plugin always works */ + return( 50 ); +} + diff --git a/plugins/imdct/imdctsse.c b/plugins/imdct/imdctsse.c new file mode 100644 index 0000000000..9371a0a605 --- /dev/null +++ b/plugins/imdct/imdctsse.c @@ -0,0 +1,152 @@ +/***************************************************************************** + * imdctsse.c : accelerated SSE IMDCT module + ***************************************************************************** + * Copyright (C) 1999, 2000 VideoLAN + * $Id: imdctsse.c,v 1.1 2001/05/15 16:19:42 sam Exp $ + * + * Authors: Gaël Hendryckx + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA. + *****************************************************************************/ + +#define MODULE_NAME imdctsse +#include "modules_inner.h" + +/***************************************************************************** + * Preamble + *****************************************************************************/ +#include "defs.h" + +#include + +#include "config.h" +#include "common.h" +#include "threads.h" +#include "mtime.h" +#include "tests.h" + +#include "ac3_imdct.h" +#include "ac3_imdct_common.h" + +#include "modules.h" + +/***************************************************************************** + * Local and extern prototypes. + *****************************************************************************/ +static void imdct_getfunctions( function_list_t * p_function_list ); +static int imdct_Probe ( probedata_t *p_data ); + +/***************************************************************************** + * Build configuration tree. + *****************************************************************************/ +MODULE_CONFIG_START +ADD_WINDOW( "Configuration for IMDCT module" ) + ADD_COMMENT( "Ha, ha -- nothing to configure yet" ) +MODULE_CONFIG_END + +/***************************************************************************** + * InitModule: get the module structure and configuration. + ***************************************************************************** + * We have to fill psz_name, psz_longname and psz_version. These variables + * will be strdup()ed later by the main application because the module can + * be unloaded later to save memory, and we want to be able to access this + * data even after the module has been unloaded. + *****************************************************************************/ +MODULE_INIT +{ + p_module->psz_name = MODULE_STRING; + p_module->psz_longname = "AC3 IMDCT module"; + p_module->psz_version = VERSION; + + p_module->i_capabilities = MODULE_CAPABILITY_NULL + | MODULE_CAPABILITY_IMDCT; + + return( 0 ); +} + +/***************************************************************************** + * ActivateModule: set the module to an usable state. + ***************************************************************************** + * This function fills the capability functions and the configuration + * structure. Once ActivateModule() has been called, the i_usage can + * be set to 0 and calls to NeedModule() be made to increment it. To unload + * the module, one has to wait until i_usage == 0 and call DeactivateModule(). + *****************************************************************************/ +MODULE_ACTIVATE +{ + p_module->p_functions = malloc( sizeof( module_functions_t ) ); + if( p_module->p_functions == NULL ) + { + return( -1 ); + } + + imdct_getfunctions( &p_module->p_functions->imdct ); + + p_module->p_config = p_config; + + return( 0 ); +} + +/***************************************************************************** + * DeactivateModule: make sure the module can be unloaded. + ***************************************************************************** + * This function must only be called when i_usage == 0. If it successfully + * returns, i_usage can be set to -1 and the module unloaded. Be careful to + * lock usage_lock during the whole process. + *****************************************************************************/ +MODULE_DEACTIVATE +{ + free( p_module->p_functions ); + + return( 0 ); +} + +/* Following functions are local */ + +/***************************************************************************** + * Functions exported as capabilities. They are declared as static so that + * we don't pollute the namespace too much. + *****************************************************************************/ +static void imdct_getfunctions( function_list_t * p_function_list ) +{ + p_function_list->pf_probe = imdct_Probe; +#define F p_function_list->functions.imdct + F.pf_imdct_init = _M( imdct_init ); + F.pf_imdct_256 = _M( imdct_do_256 ); + F.pf_imdct_256_nol = _M( imdct_do_256_nol ); + F.pf_imdct_512 = _M( imdct_do_512 ); + F.pf_imdct_512_nol = _M( imdct_do_512_nol ); +#undef F +} + +/***************************************************************************** + * imdct_Probe: returns a preference score + *****************************************************************************/ +static int imdct_Probe( probedata_t *p_data ) +{ + if( !TestCPU( CPU_CAPABILITY_SSE ) ) + { + return( 0 ); + } + + if( TestMethod( IDCT_METHOD_VAR, "imdctsse" ) ) + { + return( 999 ); + } + + /* This plugin always works */ + return( 200 ); +} + diff --git a/plugins/motion/motionmmx.c b/plugins/motion/motionmmx.c index 96177f87de..724c2898e6 100644 --- a/plugins/motion/motionmmx.c +++ b/plugins/motion/motionmmx.c @@ -2,7 +2,7 @@ * motionmmx.c : MMX motion compensation module for vlc ***************************************************************************** * Copyright (C) 2000 VideoLAN - * $Id: motionmmx.c,v 1.4 2001/04/15 04:19:57 sam Exp $ + * $Id: motionmmx.c,v 1.5 2001/05/15 16:19:42 sam Exp $ * * Authors: Christophe Massiot * @@ -116,20 +116,16 @@ MODULE_DEACTIVATE *****************************************************************************/ int _M( motion_Probe )( probedata_t *p_data ) { - if( TestCPU( CPU_CAPABILITY_MMX ) ) + if( !TestCPU( CPU_CAPABILITY_MMX ) ) { - if( TestMethod( MOTION_METHOD_VAR, "motionmmx" ) ) - { - return( 999 ); - } - else - { - return( 150 ); - } + return( 0 ); } - else + + if( TestMethod( MOTION_METHOD_VAR, "motionmmx" ) ) { - return( 0 ); + return( 999 ); } + + return( 150 ); } diff --git a/plugins/motion/motionmmxext.c b/plugins/motion/motionmmxext.c index e2658ff8fa..702d5533dd 100644 --- a/plugins/motion/motionmmxext.c +++ b/plugins/motion/motionmmxext.c @@ -2,7 +2,7 @@ * motionmmxext.c : MMX EXT motion compensation module for vlc ***************************************************************************** * Copyright (C) 2000 VideoLAN - * $Id: motionmmxext.c,v 1.4 2001/04/15 04:19:57 sam Exp $ + * $Id: motionmmxext.c,v 1.5 2001/05/15 16:19:42 sam Exp $ * * Authors: Christophe Massiot * @@ -116,20 +116,16 @@ MODULE_DEACTIVATE *****************************************************************************/ int _M( motion_Probe )( probedata_t *p_data ) { - if( TestCPU( CPU_CAPABILITY_MMXEXT ) ) + if( !TestCPU( CPU_CAPABILITY_MMXEXT ) ) { - if( TestMethod( MOTION_METHOD_VAR, "motionmmxext" ) ) - { - return( 999 ); - } - else - { - return( 200 ); - } + return( 0 ); } - else + + if( TestMethod( MOTION_METHOD_VAR, "motionmmxext" ) ) { - return( 0 ); + return( 999 ); } + + return( 200 ); } diff --git a/plugins/yuv/video_yuvmmx.c b/plugins/yuv/video_yuvmmx.c index ea468c1982..782c3e361c 100644 --- a/plugins/yuv/video_yuvmmx.c +++ b/plugins/yuv/video_yuvmmx.c @@ -3,7 +3,7 @@ * Provides functions to perform the YUV conversion. ***************************************************************************** * Copyright (C) 1999, 2000 VideoLAN - * $Id: video_yuvmmx.c,v 1.8 2001/04/15 04:19:58 sam Exp $ + * $Id: video_yuvmmx.c,v 1.9 2001/05/15 16:19:42 sam Exp $ * * Authors: Samuel Hocevar * @@ -79,21 +79,17 @@ void _M( yuv_getfunctions )( function_list_t * p_function_list ) static int yuv_Probe( probedata_t *p_data ) { /* Test for MMX support in the CPU */ - if( TestCPU( CPU_CAPABILITY_MMX ) ) + if( !TestCPU( CPU_CAPABILITY_MMX ) ) { - if( TestMethod( YUV_METHOD_VAR, "yuvmmx" ) ) - { - return( 999 ); - } - else - { - return( 100 ); - } + return( 0 ); } - else + + if( TestMethod( YUV_METHOD_VAR, "yuvmmx" ) ) { - return( 0 ); + return( 999 ); } + + return( 100 ); } /***************************************************************************** diff --git a/src/ac3_decoder/ac3_bit_allocate.c b/src/ac3_decoder/ac3_bit_allocate.c index bd59e691f6..2e1db36ab7 100644 --- a/src/ac3_decoder/ac3_bit_allocate.c +++ b/src/ac3_decoder/ac3_bit_allocate.c @@ -2,7 +2,7 @@ * ac3_bit_allocate.c: ac3 allocation tables ***************************************************************************** * Copyright (C) 2000 VideoLAN - * $Id: ac3_bit_allocate.c,v 1.21 2001/05/14 15:58:03 reno Exp $ + * $Id: ac3_bit_allocate.c,v 1.22 2001/05/15 16:19:42 sam Exp $ * * Authors: Michel Kaempf * Aaron Holtzman @@ -22,6 +22,10 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA. *****************************************************************************/ + +/***************************************************************************** + * Preamble + *****************************************************************************/ #include "defs.h" #include /* memcpy() */ @@ -31,12 +35,13 @@ #include "threads.h" #include "mtime.h" -#include "intf_msg.h" /* intf_DbgMsg(), intf_ErrMsg() */ - #include "stream_control.h" #include "input_ext-dec.h" +#include "ac3_imdct.h" +#include "ac3_downmix.h" #include "ac3_decoder.h" + #include "ac3_internal.h" /* DELTA_BIT_REUSE */ diff --git a/src/ac3_decoder/ac3_decoder.c b/src/ac3_decoder/ac3_decoder.c index 5e3d17c1fb..c0bb86dfb4 100644 --- a/src/ac3_decoder/ac3_decoder.c +++ b/src/ac3_decoder/ac3_decoder.c @@ -2,7 +2,7 @@ * ac3_decoder.c: core ac3 decoder ***************************************************************************** * Copyright (C) 1999, 2000 VideoLAN - * $Id: ac3_decoder.c,v 1.33 2001/05/14 15:58:03 reno Exp $ + * $Id: ac3_decoder.c,v 1.34 2001/05/15 16:19:42 sam Exp $ * * Authors: Michel Kaempf * Michel Lespinasse @@ -23,6 +23,9 @@ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA. *****************************************************************************/ +/***************************************************************************** + * Preamble + *****************************************************************************/ #include "defs.h" #include /* memcpy() */ @@ -39,8 +42,11 @@ #include "audio_output.h" +#include "ac3_imdct.h" +#include "ac3_downmix.h" #include "ac3_decoder.h" #include "ac3_decoder_thread.h" /* ac3dec_thread_t */ + #include "ac3_internal.h" static const float cmixlev_lut[4] = { 0.707, 0.595, 0.500, 0.707 }; @@ -50,7 +56,6 @@ int ac3_init (ac3dec_t * p_ac3dec) { p_ac3dec->mantissa.lfsr_state = 1; /* dither_gen initialization */ imdct_init(&p_ac3dec->imdct); - downmix_init(&p_ac3dec->downmix); return 0; } @@ -58,7 +63,7 @@ int ac3_init (ac3dec_t * p_ac3dec) int ac3_decode_frame (ac3dec_t * p_ac3dec, s16 * buffer) { int i; - ac3dec_thread_t * p_ac3dec_t = (ac3dec_thread_t *) p_ac3dec->bit_stream.p_callback_arg; + ac3dec_thread_t * p_ac3thread = (ac3dec_thread_t *) p_ac3dec->bit_stream.p_callback_arg; if (parse_bsi (p_ac3dec)) { @@ -67,20 +72,20 @@ int ac3_decode_frame (ac3dec_t * p_ac3dec, s16 * buffer) return 1; } - /* compute downmix parameters - * downmix to tow channels for now */ - p_ac3dec->dm_par.clev = 0.0; + /* compute downmix parameters + * downmix to tow channels for now */ + p_ac3dec->dm_par.clev = 0.0; p_ac3dec->dm_par.slev = 0.0; p_ac3dec->dm_par.unit = 1.0; - if (p_ac3dec->bsi.acmod & 0x1) /* have center */ - p_ac3dec->dm_par.clev = cmixlev_lut[p_ac3dec->bsi.cmixlev]; + if (p_ac3dec->bsi.acmod & 0x1) /* have center */ + p_ac3dec->dm_par.clev = cmixlev_lut[p_ac3dec->bsi.cmixlev]; - if (p_ac3dec->bsi.acmod & 0x4) /* have surround channels */ - p_ac3dec->dm_par.slev = smixlev_lut[p_ac3dec->bsi.surmixlev]; + if (p_ac3dec->bsi.acmod & 0x4) /* have surround channels */ + p_ac3dec->dm_par.slev = smixlev_lut[p_ac3dec->bsi.surmixlev]; p_ac3dec->dm_par.unit /= 1.0 + p_ac3dec->dm_par.clev + p_ac3dec->dm_par.slev; - p_ac3dec->dm_par.clev *= p_ac3dec->dm_par.unit; - p_ac3dec->dm_par.slev *= p_ac3dec->dm_par.unit; + p_ac3dec->dm_par.clev *= p_ac3dec->dm_par.unit; + p_ac3dec->dm_par.slev *= p_ac3dec->dm_par.unit; for (i = 0; i < 6; i++) { /* Initialize freq/time sample storage */ @@ -88,45 +93,50 @@ int ac3_decode_frame (ac3dec_t * p_ac3dec, s16 * buffer) (p_ac3dec->bsi.nfchans + p_ac3dec->bsi.lfeon)); - if ((p_ac3dec_t->p_fifo->b_die) && (p_ac3dec_t->p_fifo->b_error)) + if( p_ac3thread->p_fifo->b_die || p_ac3thread->p_fifo->b_error ) { return 1; } - if (parse_audblk (p_ac3dec, i)) + if( parse_audblk( p_ac3dec, i ) ) { - intf_WarnMsg (3,"ac3dec warn: error during audioblock"); - parse_auxdata (p_ac3dec); + intf_WarnMsg( 3, "ac3dec warning: error during audioblock" ); + parse_auxdata( p_ac3dec ); return 1; } - if ((p_ac3dec_t->p_fifo->b_die) && (p_ac3dec_t->p_fifo->b_error)) + if( p_ac3thread->p_fifo->b_die || p_ac3thread->p_fifo->b_error ) { return 1; } - if (exponent_unpack (p_ac3dec)) + if( exponent_unpack( p_ac3dec ) ) { - intf_WarnMsg (3,"ac3dec warn: error during unpack"); - parse_auxdata (p_ac3dec); + intf_WarnMsg( 3, "ac3dec warning: error during unpack" ); + parse_auxdata( p_ac3dec ); return 1; } + bit_allocate (p_ac3dec); mantissa_unpack (p_ac3dec); - if ((p_ac3dec_t->p_fifo->b_die) && (p_ac3dec_t->p_fifo->b_error)) + if( p_ac3thread->p_fifo->b_die || p_ac3thread->p_fifo->b_error ) { return 1; } if (p_ac3dec->bsi.acmod == 0x2) + { rematrix (p_ac3dec); + } + imdct (p_ac3dec, buffer); - buffer += 2*256; + buffer += 2 * 256; } parse_auxdata (p_ac3dec); return 0; } + diff --git a/src/ac3_decoder/ac3_decoder.h b/src/ac3_decoder/ac3_decoder.h index 4de1c435f9..9237d12652 100644 --- a/src/ac3_decoder/ac3_decoder.h +++ b/src/ac3_decoder/ac3_decoder.h @@ -2,7 +2,7 @@ * ac3_decoder.h : ac3 decoder interface ***************************************************************************** * Copyright (C) 1999, 2000 VideoLAN - * $Id: ac3_decoder.h,v 1.8 2001/05/14 15:58:03 reno Exp $ + * $Id: ac3_decoder.h,v 1.9 2001/05/15 16:19:42 sam Exp $ * * Authors: Michel Kaempf * Renaud Dartus @@ -352,63 +352,6 @@ typedef struct mantissa_s u16 lfsr_state; } mantissa_t; -typedef struct complex_s { - float real; - float imag; -} complex_t; - -#define N 512 - -typedef struct imdct_s -{ - complex_t buf[N/4]; - - /* Delay buffer for time domain interleaving */ - float delay[6][256]; - float delay1[6][256]; - - /* Twiddle factors for IMDCT */ - float xcos1[N/4]; - float xsin1[N/4]; - float xcos2[N/8]; - float xsin2[N/8]; - - /* Twiddle factor LUT */ - complex_t *w[7]; - complex_t w_1[1]; - complex_t w_2[2]; - complex_t w_4[4]; - complex_t w_8[8]; - complex_t w_16[16]; - complex_t w_32[32]; - complex_t w_64[64]; - - float xcos_sin_sse[128 * 4] __attribute__((aligned(16))); - - /* Functions */ - void (*fft_64p) (complex_t *a); - - void (*imdct_do_512)(struct imdct_s * p_imdct, float data[], float delay[]); - void (*imdct_do_512_nol)(struct imdct_s * p_imdct, float data[], float delay[]); - -} imdct_t; - -typedef struct dm_par_s { - float unit; - float clev; - float slev; -} dm_par_t; - -typedef struct downmix_s { - void (*downmix_3f_2r_to_2ch)(float *samples, dm_par_t * dm_par); - void (*downmix_3f_1r_to_2ch)(float *samples, dm_par_t * dm_par); - void (*downmix_2f_2r_to_2ch)(float *samples, dm_par_t * dm_par); - void (*downmix_2f_1r_to_2ch)(float *samples, dm_par_t * dm_par); - void (*downmix_3f_0r_to_2ch)(float *samples, dm_par_t * dm_par); - void (*stream_sample_2ch_to_s16)(s16 *s16_samples, float *left, float *right); - void (*stream_sample_1ch_to_s16)(s16 *s16_samples, float *center); -} downmix_t; - struct ac3dec_s { /* @@ -436,3 +379,4 @@ struct ac3dec_s downmix_t downmix; }; + diff --git a/src/ac3_decoder/ac3_decoder_thread.c b/src/ac3_decoder/ac3_decoder_thread.c index 6cac3615ac..17e86ac2bc 100644 --- a/src/ac3_decoder/ac3_decoder_thread.c +++ b/src/ac3_decoder/ac3_decoder_thread.c @@ -2,7 +2,7 @@ * ac3_decoder_thread.c: ac3 decoder thread ***************************************************************************** * Copyright (C) 1999, 2000 VideoLAN - * $Id: ac3_decoder_thread.c,v 1.32 2001/05/06 04:32:02 sam Exp $ + * $Id: ac3_decoder_thread.c,v 1.33 2001/05/15 16:19:42 sam Exp $ * * Authors: Michel Lespinasse * @@ -37,7 +37,6 @@ #include /* getpid() */ -#include /* "intf_msg.h" */ #include /* malloc(), free() */ #include /* memset() */ @@ -45,6 +44,7 @@ #include "common.h" #include "threads.h" #include "mtime.h" +#include "modules.h" #include "intf_msg.h" /* intf_DbgMsg(), intf_ErrMsg() */ @@ -53,6 +53,8 @@ #include "audio_output.h" +#include "ac3_imdct.h" +#include "ac3_downmix.h" #include "ac3_decoder.h" #include "ac3_decoder_thread.h" @@ -68,18 +70,17 @@ static void EndThread (ac3dec_thread_t * p_adec); static void BitstreamCallback ( bit_stream_t *p_bit_stream, boolean_t b_new_pes ); - /***************************************************************************** * ac3dec_CreateThread: creates an ac3 decoder thread *****************************************************************************/ vlc_thread_t ac3dec_CreateThread( adec_config_t * p_config ) { - ac3dec_thread_t * p_ac3dec_t; + ac3dec_thread_t * p_ac3thread; intf_DbgMsg( "ac3dec debug: creating ac3 decoder thread" ); /* Allocate the memory needed to store the thread's structure */ - if((p_ac3dec_t = (ac3dec_thread_t *)malloc(sizeof(ac3dec_thread_t)))==NULL) + if((p_ac3thread = (ac3dec_thread_t *)malloc(sizeof(ac3dec_thread_t)))==NULL) { intf_ErrMsg ( "ac3dec error: not enough memory " "for ac3dec_CreateThread() to create the new thread"); @@ -89,28 +90,77 @@ vlc_thread_t ac3dec_CreateThread( adec_config_t * p_config ) /* * Initialize the thread properties */ - p_ac3dec_t->p_config = p_config; - p_ac3dec_t->p_fifo = p_config->decoder_config.p_decoder_fifo; + p_ac3thread->p_config = p_config; + p_ac3thread->p_fifo = p_config->decoder_config.p_decoder_fifo; + + /* + * Choose the best downmix module + */ +#define DOWNMIX p_ac3thread->ac3_decoder.downmix + DOWNMIX.p_module = module_Need( MODULE_CAPABILITY_DOWNMIX, NULL ); + + if( DOWNMIX.p_module == NULL ) + { + intf_ErrMsg( "ac3dec error: no suitable downmix module" ); + free( p_ac3thread ); + return( 0 ); + } + +#define F DOWNMIX.p_module->p_functions->downmix.functions.downmix + DOWNMIX.pf_downmix_3f_2r_to_2ch = F.pf_downmix_3f_2r_to_2ch; + DOWNMIX.pf_downmix_2f_2r_to_2ch = F.pf_downmix_2f_2r_to_2ch; + DOWNMIX.pf_downmix_3f_1r_to_2ch = F.pf_downmix_3f_1r_to_2ch; + DOWNMIX.pf_downmix_2f_1r_to_2ch = F.pf_downmix_2f_1r_to_2ch; + DOWNMIX.pf_downmix_3f_0r_to_2ch = F.pf_downmix_3f_0r_to_2ch; + DOWNMIX.pf_stream_sample_2ch_to_s16 = F.pf_stream_sample_2ch_to_s16; + DOWNMIX.pf_stream_sample_1ch_to_s16 = F.pf_stream_sample_1ch_to_s16; +#undef F +#undef DOWNMIX + + /* + * Choose the best IMDCT module + */ +#define IMDCT p_ac3thread->ac3_decoder.imdct + IMDCT.p_module = module_Need( MODULE_CAPABILITY_IMDCT, NULL ); + + if( IMDCT.p_module == NULL ) + { + intf_ErrMsg( "ac3dec error: no suitable IMDCT module" ); + module_Unneed( p_ac3thread->ac3_decoder.downmix.p_module ); + free( p_ac3thread ); + return( 0 ); + } + +#define F IMDCT.p_module->p_functions->imdct.functions.imdct + IMDCT.pf_imdct_init = F.pf_imdct_init; + IMDCT.pf_imdct_256 = F.pf_imdct_256; + IMDCT.pf_imdct_256_nol = F.pf_imdct_256_nol; + IMDCT.pf_imdct_512 = F.pf_imdct_512; + IMDCT.pf_imdct_512_nol = F.pf_imdct_512_nol; +#undef F +#undef IMDCT /* Initialize the ac3 decoder structures */ - ac3_init (&p_ac3dec_t->ac3_decoder); + ac3_init (&p_ac3thread->ac3_decoder); /* * Initialize the output properties */ - p_ac3dec_t->p_aout_fifo = NULL; + p_ac3thread->p_aout_fifo = NULL; /* Spawn the ac3 decoder thread */ - if (vlc_thread_create(&p_ac3dec_t->thread_id, "ac3 decoder", - (vlc_thread_func_t)RunThread, (void *)p_ac3dec_t)) + if (vlc_thread_create(&p_ac3thread->thread_id, "ac3 decoder", + (vlc_thread_func_t)RunThread, (void *)p_ac3thread)) { intf_ErrMsg( "ac3dec error: can't spawn ac3 decoder thread" ); - free (p_ac3dec_t); + module_Unneed( p_ac3thread->ac3_decoder.downmix.p_module ); + module_Unneed( p_ac3thread->ac3_decoder.imdct.p_module ); + free (p_ac3thread); return 0; } - intf_DbgMsg ("ac3dec debug: ac3 decoder thread (%p) created", p_ac3dec_t); - return p_ac3dec_t->thread_id; + intf_DbgMsg ("ac3dec debug: ac3 decoder thread (%p) created", p_ac3thread); + return p_ac3thread->thread_id; } /* Following functions are local */ @@ -118,48 +168,48 @@ vlc_thread_t ac3dec_CreateThread( adec_config_t * p_config ) /***************************************************************************** * InitThread : initialize an ac3 decoder thread *****************************************************************************/ -static int InitThread (ac3dec_thread_t * p_ac3dec_t) +static int InitThread (ac3dec_thread_t * p_ac3thread) { - intf_DbgMsg("ac3dec debug: initializing ac3 decoder thread %p",p_ac3dec_t); + intf_DbgMsg("ac3dec debug: initializing ac3 decoder thread %p",p_ac3thread); - p_ac3dec_t->p_config->decoder_config.pf_init_bit_stream( - &p_ac3dec_t->ac3_decoder.bit_stream, - p_ac3dec_t->p_config->decoder_config.p_decoder_fifo, - BitstreamCallback, (void *) p_ac3dec_t ); + p_ac3thread->p_config->decoder_config.pf_init_bit_stream( + &p_ac3thread->ac3_decoder.bit_stream, + p_ac3thread->p_config->decoder_config.p_decoder_fifo, + BitstreamCallback, (void *) p_ac3thread ); /* Creating the audio output fifo */ - p_ac3dec_t->p_aout_fifo = aout_CreateFifo( AOUT_ADEC_STEREO_FIFO, 2, 0, 0, + p_ac3thread->p_aout_fifo = aout_CreateFifo( AOUT_ADEC_STEREO_FIFO, 2, 0, 0, AC3DEC_FRAME_SIZE, NULL ); - if ( p_ac3dec_t->p_aout_fifo == NULL ) + if ( p_ac3thread->p_aout_fifo == NULL ) { return -1; } - intf_DbgMsg("ac3dec debug: ac3 decoder thread %p initialized", p_ac3dec_t); + intf_DbgMsg("ac3dec debug: ac3 decoder thread %p initialized", p_ac3thread); return 0; } /***************************************************************************** * RunThread : ac3 decoder thread *****************************************************************************/ -static void RunThread (ac3dec_thread_t * p_ac3dec_t) +static void RunThread (ac3dec_thread_t * p_ac3thread) { int sync; - intf_DbgMsg ("ac3dec debug: running ac3 decoder thread (%p) (pid == %i)", p_ac3dec_t, getpid()); + intf_DbgMsg ("ac3dec debug: running ac3 decoder thread (%p) (pid == %i)", p_ac3thread, getpid()); /* Initializing the ac3 decoder thread */ - if (InitThread (p_ac3dec_t)) /* XXX?? */ + if (InitThread (p_ac3thread)) /* XXX?? */ { - p_ac3dec_t->p_fifo->b_error = 1; + p_ac3thread->p_fifo->b_error = 1; } sync = 0; - p_ac3dec_t->sync_ptr = 0; + p_ac3thread->sync_ptr = 0; /* ac3 decoder thread's main loop */ /* FIXME : do we have enough room to store the decoded frames ?? */ - while ((!p_ac3dec_t->p_fifo->b_die) && (!p_ac3dec_t->p_fifo->b_error)) + while ((!p_ac3thread->p_fifo->b_die) && (!p_ac3thread->p_fifo->b_error)) { s16 * buffer; ac3_sync_info_t sync_info; @@ -167,122 +217,125 @@ static void RunThread (ac3dec_thread_t * p_ac3dec_t) if (!sync) { do { - GetBits(&p_ac3dec_t->ac3_decoder.bit_stream,8); - } while ((!p_ac3dec_t->sync_ptr) && (!p_ac3dec_t->p_fifo->b_die) - && (!p_ac3dec_t->p_fifo->b_error)); + GetBits(&p_ac3thread->ac3_decoder.bit_stream,8); + } while ((!p_ac3thread->sync_ptr) && (!p_ac3thread->p_fifo->b_die) + && (!p_ac3thread->p_fifo->b_error)); - ptr = p_ac3dec_t->sync_ptr; + ptr = p_ac3thread->sync_ptr; - while(ptr-- && (!p_ac3dec_t->p_fifo->b_die) - && (!p_ac3dec_t->p_fifo->b_error)) + while(ptr-- && (!p_ac3thread->p_fifo->b_die) + && (!p_ac3thread->p_fifo->b_error)) { - p_ac3dec_t->ac3_decoder.bit_stream.p_byte++; + p_ac3thread->ac3_decoder.bit_stream.p_byte++; } /* we are in sync now */ sync = 1; } - if (DECODER_FIFO_START(*p_ac3dec_t->p_fifo)->i_pts) + if (DECODER_FIFO_START(*p_ac3thread->p_fifo)->i_pts) { - p_ac3dec_t->p_aout_fifo->date[p_ac3dec_t->p_aout_fifo->l_end_frame] = - DECODER_FIFO_START(*p_ac3dec_t->p_fifo)->i_pts; - DECODER_FIFO_START(*p_ac3dec_t->p_fifo)->i_pts = 0; + p_ac3thread->p_aout_fifo->date[p_ac3thread->p_aout_fifo->l_end_frame] = + DECODER_FIFO_START(*p_ac3thread->p_fifo)->i_pts; + DECODER_FIFO_START(*p_ac3thread->p_fifo)->i_pts = 0; } else { - p_ac3dec_t->p_aout_fifo->date[p_ac3dec_t->p_aout_fifo->l_end_frame] = + p_ac3thread->p_aout_fifo->date[p_ac3thread->p_aout_fifo->l_end_frame] = LAST_MDATE; } - if (ac3_sync_frame (&p_ac3dec_t->ac3_decoder, &sync_info)) + if (ac3_sync_frame (&p_ac3thread->ac3_decoder, &sync_info)) { sync = 0; goto bad_frame; } - p_ac3dec_t->p_aout_fifo->l_rate = sync_info.sample_rate; + p_ac3thread->p_aout_fifo->l_rate = sync_info.sample_rate; - buffer = ((s16 *)p_ac3dec_t->p_aout_fifo->buffer) + - (p_ac3dec_t->p_aout_fifo->l_end_frame * AC3DEC_FRAME_SIZE); + buffer = ((s16 *)p_ac3thread->p_aout_fifo->buffer) + + (p_ac3thread->p_aout_fifo->l_end_frame * AC3DEC_FRAME_SIZE); - if (ac3_decode_frame (&p_ac3dec_t->ac3_decoder, buffer)) + if (ac3_decode_frame (&p_ac3thread->ac3_decoder, buffer)) { sync = 0; goto bad_frame; } - vlc_mutex_lock (&p_ac3dec_t->p_aout_fifo->data_lock); - p_ac3dec_t->p_aout_fifo->l_end_frame = - (p_ac3dec_t->p_aout_fifo->l_end_frame + 1) & AOUT_FIFO_SIZE; - vlc_cond_signal (&p_ac3dec_t->p_aout_fifo->data_wait); - vlc_mutex_unlock (&p_ac3dec_t->p_aout_fifo->data_lock); + vlc_mutex_lock (&p_ac3thread->p_aout_fifo->data_lock); + p_ac3thread->p_aout_fifo->l_end_frame = + (p_ac3thread->p_aout_fifo->l_end_frame + 1) & AOUT_FIFO_SIZE; + vlc_cond_signal (&p_ac3thread->p_aout_fifo->data_wait); + vlc_mutex_unlock (&p_ac3thread->p_aout_fifo->data_lock); bad_frame: - RealignBits(&p_ac3dec_t->ac3_decoder.bit_stream); + RealignBits(&p_ac3thread->ac3_decoder.bit_stream); } /* If b_error is set, the ac3 decoder thread enters the error loop */ - if (p_ac3dec_t->p_fifo->b_error) + if (p_ac3thread->p_fifo->b_error) { - ErrorThread (p_ac3dec_t); + ErrorThread (p_ac3thread); } /* End of the ac3 decoder thread */ - EndThread (p_ac3dec_t); + EndThread (p_ac3thread); } /***************************************************************************** * ErrorThread : ac3 decoder's RunThread() error loop *****************************************************************************/ -static void ErrorThread (ac3dec_thread_t * p_ac3dec_t) +static void ErrorThread (ac3dec_thread_t * p_ac3thread) { /* We take the lock, because we are going to read/write the start/end * indexes of the decoder fifo */ - vlc_mutex_lock (&p_ac3dec_t->p_fifo->data_lock); + vlc_mutex_lock (&p_ac3thread->p_fifo->data_lock); /* Wait until a `die' order is sent */ - while (!p_ac3dec_t->p_fifo->b_die) + while (!p_ac3thread->p_fifo->b_die) { /* Trash all received PES packets */ - while (!DECODER_FIFO_ISEMPTY(*p_ac3dec_t->p_fifo)) + while (!DECODER_FIFO_ISEMPTY(*p_ac3thread->p_fifo)) { - p_ac3dec_t->p_fifo->pf_delete_pes(p_ac3dec_t->p_fifo->p_packets_mgt, - DECODER_FIFO_START(*p_ac3dec_t->p_fifo)); - DECODER_FIFO_INCSTART (*p_ac3dec_t->p_fifo); + p_ac3thread->p_fifo->pf_delete_pes(p_ac3thread->p_fifo->p_packets_mgt, + DECODER_FIFO_START(*p_ac3thread->p_fifo)); + DECODER_FIFO_INCSTART (*p_ac3thread->p_fifo); } /* Waiting for the input thread to put new PES packets in the fifo */ - vlc_cond_wait (&p_ac3dec_t->p_fifo->data_wait, - &p_ac3dec_t->p_fifo->data_lock); + vlc_cond_wait (&p_ac3thread->p_fifo->data_wait, + &p_ac3thread->p_fifo->data_lock); } /* We can release the lock before leaving */ - vlc_mutex_unlock (&p_ac3dec_t->p_fifo->data_lock); + vlc_mutex_unlock (&p_ac3thread->p_fifo->data_lock); } /***************************************************************************** * EndThread : ac3 decoder thread destruction *****************************************************************************/ -static void EndThread (ac3dec_thread_t * p_ac3dec_t) +static void EndThread (ac3dec_thread_t * p_ac3thread) { - intf_DbgMsg ("ac3dec debug: destroying ac3 decoder thread %p", p_ac3dec_t); + intf_DbgMsg ("ac3dec debug: destroying ac3 decoder thread %p", p_ac3thread); /* If the audio output fifo was created, we destroy it */ - if (p_ac3dec_t->p_aout_fifo != NULL) + if (p_ac3thread->p_aout_fifo != NULL) { - aout_DestroyFifo (p_ac3dec_t->p_aout_fifo); + aout_DestroyFifo (p_ac3thread->p_aout_fifo); /* Make sure the output thread leaves the NextFrame() function */ - vlc_mutex_lock (&(p_ac3dec_t->p_aout_fifo->data_lock)); - vlc_cond_signal (&(p_ac3dec_t->p_aout_fifo->data_wait)); - vlc_mutex_unlock (&(p_ac3dec_t->p_aout_fifo->data_lock)); - + vlc_mutex_lock (&(p_ac3thread->p_aout_fifo->data_lock)); + vlc_cond_signal (&(p_ac3thread->p_aout_fifo->data_wait)); + vlc_mutex_unlock (&(p_ac3thread->p_aout_fifo->data_lock)); } + /* Unlock the modules */ + module_Unneed( p_ac3thread->ac3_decoder.downmix.p_module ); + module_Unneed( p_ac3thread->ac3_decoder.imdct.p_module ); + /* Destroy descriptor */ - free( p_ac3dec_t->p_config ); - free( p_ac3dec_t ); + free( p_ac3thread->p_config ); + free( p_ac3thread ); - intf_DbgMsg ("ac3dec debug: ac3 decoder thread %p destroyed", p_ac3dec_t); + intf_DbgMsg ("ac3dec debug: ac3 decoder thread %p destroyed", p_ac3thread); } /***************************************************************************** @@ -294,7 +347,7 @@ static void BitstreamCallback ( bit_stream_t * p_bit_stream, boolean_t b_new_pes) { - ac3dec_thread_t *p_ac3dec_t=(ac3dec_thread_t *)p_bit_stream->p_callback_arg; + ac3dec_thread_t *p_ac3thread=(ac3dec_thread_t *)p_bit_stream->p_callback_arg; if( b_new_pes ) { @@ -303,7 +356,8 @@ static void BitstreamCallback ( bit_stream_t * p_bit_stream, ptr = *(p_bit_stream->p_byte + 1); ptr <<= 8; ptr |= *(p_bit_stream->p_byte + 2); - p_ac3dec_t->sync_ptr = ptr; + p_ac3thread->sync_ptr = ptr; p_bit_stream->p_byte += 3; } } + diff --git a/src/ac3_decoder/ac3_downmix.c b/src/ac3_decoder/ac3_downmix.c deleted file mode 100644 index 20b0d4e86a..0000000000 --- a/src/ac3_decoder/ac3_downmix.c +++ /dev/null @@ -1,79 +0,0 @@ -/***************************************************************************** - * ac3_downmix.c: ac3 downmix functions - ***************************************************************************** - * Copyright (C) 1999, 2000 VideoLAN - * $Id: ac3_downmix.c,v 1.23 2001/05/14 15:58:03 reno Exp $ - * - * Authors: Michel Kaempf - * Aaron Holtzman - * Renaud Dartus - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA. - *****************************************************************************/ -#include "defs.h" - -#include /* memcpy() */ - -#include "config.h" -#include "common.h" -#include "threads.h" -#include "mtime.h" - -#include "intf_msg.h" /* intf_DbgMsg(), intf_ErrMsg() */ -#include "tests.h" - -#include "stream_control.h" -#include "input_ext-dec.h" - -#include "ac3_decoder.h" -#include "ac3_downmix.h" - -void downmix_init (downmix_t * p_downmix) -{ -#if 0 - if ( TestCPU (CPU_CAPABILITY_SSE) ) - { - intf_WarnMsg (1,"ac3dec: using MMX_SSE for downmix"); - p_downmix->downmix_3f_2r_to_2ch = downmix_3f_2r_to_2ch_sse; - p_downmix->downmix_2f_2r_to_2ch = downmix_2f_2r_to_2ch_sse; - p_downmix->downmix_3f_1r_to_2ch = downmix_3f_1r_to_2ch_sse; - p_downmix->downmix_2f_1r_to_2ch = downmix_2f_1r_to_2ch_sse; - p_downmix->downmix_3f_0r_to_2ch = downmix_3f_0r_to_2ch_sse; - p_downmix->stream_sample_2ch_to_s16 = stream_sample_2ch_to_s16_sse; - p_downmix->stream_sample_1ch_to_s16 = stream_sample_1ch_to_s16_sse; - } - else if ( TestCPU (CPU_CAPABILITY_3DNOW) ) - { - intf_WarnMsg (1,"ac3dec: using MMX_3DNOW for downmix"); - p_downmix->downmix_3f_2r_to_2ch = downmix_3f_2r_to_2ch_3dn; - p_downmix->downmix_2f_2r_to_2ch = downmix_2f_2r_to_2ch_3dn; - p_downmix->downmix_3f_1r_to_2ch = downmix_3f_1r_to_2ch_3dn; - p_downmix->downmix_2f_1r_to_2ch = downmix_2f_1r_to_2ch_3dn; - p_downmix->downmix_3f_0r_to_2ch = downmix_3f_0r_to_2ch_3dn; - p_downmix->stream_sample_2ch_to_s16 = stream_sample_2ch_to_s16_3dn; - p_downmix->stream_sample_1ch_to_s16 = stream_sample_1ch_to_s16_3dn; - } - else -#endif - { - p_downmix->downmix_3f_2r_to_2ch = downmix_3f_2r_to_2ch_c; - p_downmix->downmix_2f_2r_to_2ch = downmix_2f_2r_to_2ch_c; - p_downmix->downmix_3f_1r_to_2ch = downmix_3f_1r_to_2ch_c; - p_downmix->downmix_2f_1r_to_2ch = downmix_2f_1r_to_2ch_c; - p_downmix->downmix_3f_0r_to_2ch = downmix_3f_0r_to_2ch_c; - p_downmix->stream_sample_2ch_to_s16 = stream_sample_2ch_to_s16_c; - p_downmix->stream_sample_1ch_to_s16 = stream_sample_1ch_to_s16_c; - } -} diff --git a/src/ac3_decoder/ac3_downmix.h b/src/ac3_decoder/ac3_downmix.h deleted file mode 100644 index 063beeb1dc..0000000000 --- a/src/ac3_decoder/ac3_downmix.h +++ /dev/null @@ -1,51 +0,0 @@ -/***************************************************************************** - * ac3_downmix.h: ac3 downmix functions - ***************************************************************************** - * Copyright (C) 2000, 2001 VideoLAN - * $Id: ac3_downmix.h,v 1.7 2001/05/14 15:58:04 reno Exp $ - * - * Authors: Renaud Dartus - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA. - *****************************************************************************/ - -/* C functions */ -void downmix_3f_2r_to_2ch_c(float *samples, dm_par_t * dm_par); -void downmix_3f_1r_to_2ch_c(float *samples, dm_par_t * dm_par); -void downmix_2f_2r_to_2ch_c(float *samples, dm_par_t * dm_par); -void downmix_2f_1r_to_2ch_c(float *samples, dm_par_t * dm_par); -void downmix_3f_0r_to_2ch_c(float *samples, dm_par_t * dm_par); -void stream_sample_2ch_to_s16_c(s16 *s16_samples, float *left, float *right); -void stream_sample_1ch_to_s16_c(s16 *s16_samples, float *center); - -/* SSE functions */ -void downmix_3f_2r_to_2ch_sse(float *samples, dm_par_t * dm_par); -void downmix_3f_1r_to_2ch_sse(float *samples, dm_par_t * dm_par); -void downmix_2f_2r_to_2ch_sse(float *samples, dm_par_t * dm_par); -void downmix_2f_1r_to_2ch_sse(float *samples, dm_par_t * dm_par); -void downmix_3f_0r_to_2ch_sse(float *samples, dm_par_t * dm_par); -void stream_sample_2ch_to_s16_sse(s16 *s16_samples, float *left, float *right); -void stream_sample_1ch_to_s16_sse(s16 *s16_samples, float *center); - -/* 3DNow! functions */ -void downmix_3f_2r_to_2ch_3dn(float *samples, dm_par_t * dm_par); -void downmix_3f_1r_to_2ch_3dn(float *samples, dm_par_t * dm_par); -void downmix_2f_2r_to_2ch_3dn(float *samples, dm_par_t * dm_par); -void downmix_2f_1r_to_2ch_3dn(float *samples, dm_par_t * dm_par); -void downmix_3f_0r_to_2ch_3dn(float *samples, dm_par_t * dm_par); -void stream_sample_2ch_to_s16_3dn(s16 *s16_samples, float *left, float *right); -void stream_sample_1ch_to_s16_3dn(s16 *s16_samples, float *center); - - diff --git a/src/ac3_decoder/ac3_downmix_3dn.c b/src/ac3_decoder/ac3_downmix_3dn.c deleted file mode 100644 index 3fb5b0ce35..0000000000 --- a/src/ac3_decoder/ac3_downmix_3dn.c +++ /dev/null @@ -1,295 +0,0 @@ -/***************************************************************************** - * ac3_downmix_3dn.c: ac3 downmix functions - ***************************************************************************** - * Copyright (C) 1999, 2000, 2001 VideoLAN - * $Id: ac3_downmix_3dn.c,v 1.1 2001/05/14 15:58:04 reno Exp $ - * - * Authors: Renaud Dartus - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA. - *****************************************************************************/ - -#include "defs.h" - -#include "config.h" -#include "common.h" -#include "threads.h" -#include "mtime.h" -#include "tests.h" - -#include "stream_control.h" -#include "input_ext-dec.h" -#include "ac3_decoder.h" - - -void downmix_3f_2r_to_2ch_3dn (float * samples, dm_par_t * dm_par) -{ - __asm__ __volatile__ ( - "pushl %%ecx\n" - "movl $128, %%ecx\n" /* loop counter */ - - "movd (%%ebx), %%mm5\n" /* unit */ - "punpckldq %%mm5, %%mm5\n" /* unit | unit */ - - "movd 4(%%ebx), %%mm6\n" /* clev */ - "punpckldq %%mm6, %%mm6\n" /* clev | clev */ - - "movd 8(%%ebx), %%mm7\n" /* slev */ - "punpckldq %%mm7, %%mm7\n" /* slev | slev */ - -".loop:\n" - "movq (%%eax), %%mm0\n" /* left */ - "movq 2048(%%eax), %%mm1\n" /* right */ - "movq 1024(%%eax), %%mm2\n" /* center */ - "movq 3072(%%eax), %%mm3\n" /* leftsur */ - "movq 4096(%%eax), %%mm4\n" /* rightsur */ - "pfmul %%mm5, %%mm0\n" - "pfmul %%mm5, %%mm1\n" - "pfmul %%mm6, %%mm2\n" - "pfadd %%mm2, %%mm0\n" - "pfadd %%mm2, %%mm1\n" - "pfmul %%mm7, %%mm3\n" - "pfmul %%mm7, %%mm4\n" - "pfadd %%mm3, %%mm0\n" - "pfadd %%mm4, %%mm1\n" - - "movq %%mm0, (%%eax)\n" - "movq %%mm1, 1024(%%eax)\n" - - "addl $8, %%eax\n" - "decl %%ecx\n" - "jnz .loop\n" - - "popl %%ecx\n" - "femms\n" - : "=a" (samples) - : "a" (samples), "b" (dm_par)); -} - -void downmix_2f_2r_to_2ch_3dn (float *samples, dm_par_t * dm_par) -{ - __asm__ __volatile__ ( - "pushl %%ecx\n" - "movl $128, %%ecx\n" /* loop counter */ - - "movd (%%ebx), %%mm5\n" /* unit */ - "punpckldq %%mm5, %%mm5\n" /* unit | unit */ - - "movd 8(%%ebx), %%mm7\n" /* slev */ - "punpckldq %%mm7, %%mm7\n" /* slev | slev */ - -".loop3:\n" - "movq (%%eax), %%mm0\n" /* left */ - "movq 1024(%%eax), %%mm1\n" /* right */ - "movq 2048(%%eax), %%mm3\n" /* leftsur */ - "movq 3072(%%eax), %%mm4\n" /* rightsur */ - "pfmul %%mm5, %%mm0\n" - "pfmul %%mm5, %%mm1\n" - "pfmul %%mm7, %%mm3\n" - "pfmul %%mm7, %%mm4\n" - "pfadd %%mm3, %%mm0\n" - "pfadd %%mm4, %%mm1\n" - - "movq %%mm0, (%%eax)\n" - "movq %%mm1, 1024(%%eax)\n" - - "addl $8, %%eax\n" - "decl %%ecx\n" - "jnz .loop3\n" - - "popl %%ecx\n" - "femms\n" - : "=a" (samples) - : "a" (samples), "b" (dm_par)); -} -void downmix_3f_1r_to_2ch_3dn (float *samples, dm_par_t * dm_par) -{ - __asm__ __volatile__ ( - - "pushl %%ecx\n" - "movl $128, %%ecx\n" /* loop counter */ - - "movd (%%ebx), %%mm5\n" /* unit */ - "punpckldq %%mm5, %%mm5\n" /* unit | unit */ - - "movd 4(%%ebx), %%mm6\n" /* clev */ - "punpckldq %%mm6, %%mm6\n" /* clev | clev */ - - "movd 8(%%ebx), %%mm7\n" /* slev */ - "punpckldq %%mm7, %%mm7\n" /* slev | slev */ - -".loop4:\n" - "movq (%%eax), %%mm0\n" /* left */ - "movq 2048(%%eax), %%mm1\n" /* right */ - "movq 1024(%%eax), %%mm2\n" /* center */ - "movq 3072(%%eax), %%mm3\n" /* sur */ - "pfmul %%mm5, %%mm0\n" - "pfmul %%mm5, %%mm1\n" - "pfmul %%mm6, %%mm2\n" - "pfadd %%mm2, %%mm0\n" - "pfmul %%mm7, %%mm3\n" - "pfadd %%mm2, %%mm1\n" - "pfsub %%mm3, %%mm0\n" - "pfadd %%mm3, %%mm1\n" - - "movq %%mm0, (%%eax)\n" - "movq %%mm1, 1024(%%eax)\n" - - "addl $8, %%eax\n" - "decl %%ecx\n" - "jnz .loop4\n" - - "popl %%ecx\n" - "femms\n" - : "=a" (samples) - : "a" (samples), "b" (dm_par)); -} -void downmix_2f_1r_to_2ch_3dn (float *samples, dm_par_t * dm_par) -{ - __asm__ __volatile__ ( - "pushl %%ecx\n" - "movl $128, %%ecx\n" /* loop counter */ - - "movd (%%ebx), %%mm5\n" /* unit */ - "punpckldq %%mm5, %%mm5\n" /* unit | unit */ - - "movd 8(%%ebx), %%mm7\n" /* slev */ - "punpckldq %%mm7, %%mm7\n" /* slev | slev */ - -".loop5:\n" - "movq (%%eax), %%mm0\n" /* left */ - "movq 1024(%%eax), %%mm1\n" /* right */ - "movq 2048(%%eax), %%mm3\n" /* sur */ - "pfmul %%mm5, %%mm0\n" - "pfmul %%mm5, %%mm1\n" - "pfmul %%mm7, %%mm3\n" - "pfsub %%mm3, %%mm0\n" - "pfadd %%mm3, %%mm1\n" - - "movq %%mm0, (%%eax)\n" - "movq %%mm1, 1024(%%eax)\n" - - "addl $8, %%eax\n" - "decl %%ecx\n" - "jnz .loop5\n" - - "popl %%ecx\n" - "femms\n" - : "=a" (samples) - : "a" (samples), "b" (dm_par)); -} - -void downmix_3f_0r_to_2ch_3dn (float *samples, dm_par_t * dm_par) -{ - __asm__ __volatile__ ( - "pushl %%ecx\n" - "movl $128, %%ecx\n" /* loop counter */ - - "movd (%%ebx), %%mm5\n" /* unit */ - "punpckldq %%mm5, %%mm5\n" /* unit | unit */ - - "movd 4(%%ebx), %%mm6\n" /* clev */ - "punpckldq %%mm6, %%mm6\n" /* clev | clev */ - -".loop6:\n" - "movq (%%eax), %%mm0\n" /*left */ - "movq 2048(%%eax), %%mm1\n" /* right */ - "movq 1024(%%eax), %%mm2\n" /* center */ - "pfmul %%mm5, %%mm0\n" - "pfmul %%mm5, %%mm1\n" - "pfmul %%mm6, %%mm2\n" - "pfadd %%mm2, %%mm0\n" - "pfadd %%mm2, %%mm1\n" - - "movq %%mm0, (%%eax)\n" - "movq %%mm1, 1024(%%eax)\n" - - "addl $8, %%eax\n" - "decl %%ecx\n" - "jnz .loop6\n" - - "popl %%ecx\n" - "femms\n" - : "=a" (samples) - : "a" (samples), "b" (dm_par)); -} - -void stream_sample_1ch_to_s16_3dn (s16 *s16_samples, float *left) -{ - __asm__ __volatile__ ( - "pushl %%ecx\n" - "pushl %%edx\n" - - "movl $sqrt2, %%edx\n" - "movd (%%edx), %%mm7\n" - "punpckldq %%mm7, %%mm7\n" /* sqrt2 | sqrt2 */ - "movl $128, %%ecx\n" - -".loop2:\n" - "movq (%%ebx), %%mm0\n" /* c1 | c0 */ - "pfmul %%mm7, %%mm0\n" - - "pf2id %%mm0, %%mm0\n" /* c1 c0 --> mm0, int_32 */ - - "packssdw %%mm0, %%mm0\n" /* c1 c1 c0 c0 --> mm0, int_16 */ - - "movq %%mm0, (%%eax)\n" - "addl $8, %%eax\n" - "addl $8, %%ebx\n" - - "decl %%ecx\n" - "jnz .loop2\n" - - "popl %%edx\n" - "popl %%ecx\n" - "femms\n" - : "=a" (s16_samples), "=b" (left) - : "a" (s16_samples), "b" (left)); -} - -void stream_sample_2ch_to_s16_3dn (s16 *s16_samples, float *left, float *right) -{ - - __asm__ __volatile__ ( - "pushl %%ecx\n" - "movl $128, %%ecx\n" - -".loop1:\n" - "movq (%%ebx), %%mm0\n" /* l1 | l0 */ - "movq (%%edx), %%mm1\n" /* r1 | r0 */ - "movq %%mm0, %%mm2\n" /* l1 | l0 */ - "punpckldq %%mm1, %%mm0\n" /* r0 | l0 */ - "punpckhdq %%mm1, %%mm2\n" /* r1 | l1 */ - - "pf2id %%mm0, %%mm0\n" /* r0 l0 --> mm0, int_32 */ - "pf2id %%mm2, %%mm2\n" /* r0 l0 --> mm0, int_32 */ - - "packssdw %%mm2, %%mm0\n" /* r1 l1 r0 l0 --> mm0, int_16 */ - - "movq %%mm0, (%%eax)\n" - "movq %%mm2, 8(%%eax)\n" - "addl $8, %%eax\n" - "addl $8, %%ebx\n" - "addl $8, %%edx\n" - - "decl %%ecx\n" - "jnz .loop1\n" - - "popl %%ecx\n" - "femms\n" - : "=a" (s16_samples), "=b" (left), "=d" (right) - : "a" (s16_samples), "b" (left), "d" (right)); - -} diff --git a/src/ac3_decoder/ac3_downmix_sse.c b/src/ac3_decoder/ac3_downmix_sse.c deleted file mode 100644 index 71a9b33faf..0000000000 --- a/src/ac3_decoder/ac3_downmix_sse.c +++ /dev/null @@ -1,308 +0,0 @@ -/***************************************************************************** - * ac3_downmix_sse.c: ac3 downmix functions - ***************************************************************************** - * Copyright (C) 1999, 2000, 2001 VideoLAN - * $Id: ac3_downmix_sse.c,v 1.1 2001/05/14 15:58:04 reno Exp $ - * - * Authors: Renaud Dartus - * Aaron Holtzman - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA. - *****************************************************************************/ - -#include "defs.h" - -#include "config.h" -#include "common.h" -#include "threads.h" -#include "mtime.h" -#include "tests.h" - -#include "stream_control.h" -#include "input_ext-dec.h" -#include "ac3_decoder.h" - - -void sqrt2 (void) -{ - __asm__ (".float 0f0.7071068"); -} - -void downmix_3f_2r_to_2ch_sse (float * samples, dm_par_t * dm_par) -{ - __asm__ __volatile__ ( - "pushl %%ecx\n" - "movl $64, %%ecx\n" /* loop counter */ - - "movss (%%ebx), %%xmm5\n" /* unit */ - "shufps $0, %%xmm5, %%xmm5\n" /* unit | unit | unit | unit */ - - "movss 4(%%ebx), %%xmm6\n" /* clev */ - "shufps $0, %%xmm6, %%xmm6\n" /* clev | clev | clev | clev */ - - "movss 8(%%ebx), %%xmm7\n" /* slev */ - "shufps $0, %%xmm7, %%xmm7\n" /* slev | slev | slev | slev */ - -".loop:\n" - "movups (%%eax), %%xmm0\n" /* left */ - "movups 2048(%%eax), %%xmm1\n" /* right */ - "movups 1024(%%eax), %%xmm2\n" /* center */ - "movups 3072(%%eax), %%xmm3\n" /* leftsur */ - "movups 4096(%%eax), %%xmm4\n" /* rithgsur */ - "mulps %%xmm5, %%xmm0\n" - "mulps %%xmm5, %%xmm1\n" - "mulps %%xmm6, %%xmm2\n" - "addps %%xmm2, %%xmm0\n" - "addps %%xmm2, %%xmm1\n" - "mulps %%xmm7, %%xmm3\n" - "mulps %%xmm7, %%xmm4\n" - "addps %%xmm3, %%xmm0\n" - "addps %%xmm4, %%xmm1\n" - - "movups %%xmm0, (%%eax)\n" - "movups %%xmm1, 1024(%%eax)\n" - - "addl $16, %%eax\n" - "decl %%ecx\n" - "jnz .loop\n" - - "popl %%ecx\n" - : "=a" (samples) - : "a" (samples), "b" (dm_par)); -} - -void downmix_2f_2r_to_2ch_sse (float *samples, dm_par_t * dm_par) -{ - __asm__ __volatile__ ( - "pushl %%ecx\n" - "movl $64, %%ecx\n" /* loop counter */ - - "movss (%%ebx), %%xmm5\n" /* unit */ - "shufps $0, %%xmm5, %%xmm5\n" /* unit | unit | unit | unit */ - - "movss 8(%%ebx), %%xmm7\n" /* slev */ - "shufps $0, %%xmm7, %%xmm7\n" /* slev | slev | slev | slev */ - -".loop3:\n" - "movups (%%eax), %%xmm0\n" /* left */ - "movups 1024(%%eax), %%xmm1\n" /* right */ - "movups 2048(%%eax), %%xmm3\n" /* leftsur */ - "movups 3072(%%eax), %%xmm4\n" /* rightsur */ - "mulps %%xmm5, %%xmm0\n" - "mulps %%xmm5, %%xmm1\n" - "mulps %%xmm7, %%xmm3\n" - "mulps %%xmm7, %%xmm4\n" - "addps %%xmm3, %%xmm0\n" - "addps %%xmm4, %%xmm1\n" - - "movups %%xmm0, (%%eax)\n" - "movups %%xmm1, 1024(%%eax)\n" - - "addl $16, %%eax\n" - "decl %%ecx\n" - "jnz .loop3\n" - - "popl %%ecx\n" - : "=a" (samples) - : "a" (samples), "b" (dm_par)); -} -void downmix_3f_1r_to_2ch_sse (float *samples, dm_par_t * dm_par) -{ - __asm__ __volatile__ ( - - "pushl %%ecx\n" - "movl $64, %%ecx\n" /* loop counter */ - - "movss (%%ebx), %%xmm5\n" /* unit */ - "shufps $0, %%xmm5, %%xmm5\n" /* unit | unit | unit | unit */ - - "movss 4(%%ebx), %%xmm6\n" /* clev */ - "shufps $0, %%xmm6, %%xmm6\n" /* clev | clev | clev | clev */ - - "movss 8(%%ebx), %%xmm7\n" /* slev */ - "shufps $0, %%xmm7, %%xmm7\n" /* slev | slev | slev | slev */ - -".loop4:\n" - "movups (%%eax), %%xmm0\n" /* left */ - "movups 2048(%%eax), %%xmm1\n" /* right */ - "movups 1024(%%eax), %%xmm2\n" /* center */ - "movups 3072(%%eax), %%xmm3\n" /* sur */ - "mulps %%xmm5, %%xmm0\n" - "mulps %%xmm5, %%xmm1\n" - "mulps %%xmm6, %%xmm2\n" - "addps %%xmm2, %%xmm0\n" - "mulps %%xmm7, %%xmm3\n" - "addps %%xmm2, %%xmm1\n" - "subps %%xmm3, %%xmm0\n" - "addps %%xmm3, %%xmm1\n" - - "movups %%xmm0, (%%eax)\n" - "movups %%xmm1, 1024(%%eax)\n" - - "addl $16, %%eax\n" - "decl %%ecx\n" - "jnz .loop4\n" - - "popl %%ecx\n" - : "=a" (samples) - : "a" (samples), "b" (dm_par)); - -} -void downmix_2f_1r_to_2ch_sse (float *samples, dm_par_t * dm_par) -{ - __asm__ __volatile__ ( - "pushl %%ecx\n" - "movl $64, %%ecx\n" /* loop counter */ - - "movss (%%ebx), %%xmm5\n" /* unit */ - "shufps $0, %%xmm5, %%xmm5\n" /* unit | unit | unit | unit */ - - "movss 8(%%ebx), %%xmm7\n" /* slev */ - "shufps $0, %%xmm7, %%xmm7\n" /* slev | slev | slev | slev */ - -".loop5:\n" - "movups (%%eax), %%xmm0\n" /* left */ - "movups 1024(%%eax), %%xmm1\n" /* right */ - "movups 2048(%%eax), %%xmm3\n" /* sur */ - "mulps %%xmm5, %%xmm0\n" - "mulps %%xmm5, %%xmm1\n" - "mulps %%xmm7, %%xmm3\n" - "subps %%xmm3, %%xmm0\n" - "addps %%xmm3, %%xmm1\n" - - "movups %%xmm0, (%%eax)\n" - "movups %%xmm1, 1024(%%eax)\n" - - "addl $16, %%eax\n" - "decl %%ecx\n" - "jnz .loop5\n" - - "popl %%ecx\n" - : "=a" (samples) - : "a" (samples), "b" (dm_par)); - - -} -void downmix_3f_0r_to_2ch_sse (float *samples, dm_par_t * dm_par) -{ - __asm__ __volatile__ ( - "pushl %%ecx\n" - "movl $64, %%ecx\n" /* loop counter */ - - "movss (%%ebx), %%xmm5\n" /* unit */ - "shufps $0, %%xmm5, %%xmm5\n" /* unit | unit | unit | unit */ - - "movss 4(%%ebx), %%xmm6\n" /* clev */ - "shufps $0, %%xmm6, %%xmm6\n" /* clev | clev | clev | clev */ - -".loop6:\n" - "movups (%%eax), %%xmm0\n" /*left */ - "movups 2048(%%eax), %%xmm1\n" /* right */ - "movups 1024(%%eax), %%xmm2\n" /* center */ - "mulps %%xmm5, %%xmm0\n" - "mulps %%xmm5, %%xmm1\n" - "mulps %%xmm6, %%xmm2\n" - "addps %%xmm2, %%xmm0\n" - "addps %%xmm2, %%xmm1\n" - - "movups %%xmm0, (%%eax)\n" - "movups %%xmm1, 1024(%%eax)\n" - - "addl $16, %%eax\n" - "decl %%ecx\n" - "jnz .loop6\n" - - "popl %%ecx\n" - : "=a" (samples) - : "a" (samples), "b" (dm_par)); -} - -void stream_sample_1ch_to_s16_sse (s16 *s16_samples, float *left) -{ - __asm__ __volatile__ ( - "pushl %%ecx\n" - "pushl %%edx\n" - - "movl $sqrt2, %%edx\n" - "movss (%%edx), %%xmm7\n" - "shufps $0, %%xmm7, %%xmm7\n" /* sqrt2 | sqrt2 | sqrt2 | sqrt2 */ - "movl $64, %%ecx\n" - -".loop2:\n" - "movups (%%ebx), %%xmm0\n" /* c3 | c2 | c1 | c0 */ - "mulps %%xmm7, %%xmm0\n" - "movhlps %%xmm0, %%xmm2\n" /* c3 | c2 */ - - "cvtps2pi %%xmm0, %%mm0\n" /* c1 c0 --> mm0, int_32 */ - "cvtps2pi %%xmm2, %%mm1\n" /* c3 c2 --> mm1, int_32 */ - - "packssdw %%mm0, %%mm0\n" /* c1 c1 c0 c0 --> mm0, int_16 */ - "packssdw %%mm1, %%mm1\n" /* c3 c3 c2 c2 --> mm1, int_16 */ - - "movq %%mm0, (%%eax)\n" - "movq %%mm1, 8(%%eax)\n" - "addl $16, %%eax\n" - "addl $16, %%ebx\n" - - "decl %%ecx\n" - "jnz .loop2\n" - - "popl %%edx\n" - "popl %%ecx\n" - "emms\n" - : "=a" (s16_samples), "=b" (left) - : "a" (s16_samples), "b" (left)); -} - -void stream_sample_2ch_to_s16_sse (s16 *s16_samples, float *left, float *right) -{ - - __asm__ __volatile__ ( - "pushl %%ecx\n" - "movl $64, %%ecx\n" - -".loop1:\n" - "movups (%%ebx), %%xmm0\n" /* l3 | l2 | l1 | l0 */ - "movups (%%edx), %%xmm1\n" /* r3 | r2 | r1 | r0 */ - "movhlps %%xmm0, %%xmm2\n" /* l3 | l2 */ - "movhlps %%xmm1, %%xmm3\n" /* r3 | r2 */ - "unpcklps %%xmm1, %%xmm0\n" /* r1 | l1 | r0 | l0 */ - "unpcklps %%xmm3, %%xmm2\n" /* r3 | l3 | r2 | l2 */ - - "cvtps2pi %%xmm0, %%mm0\n" /* r0 l0 --> mm0, int_32 */ - "movhlps %%xmm0, %%xmm0\n" - "cvtps2pi %%xmm0, %%mm1\n" /* r1 l1 --> mm1, int_32 */ - "cvtps2pi %%xmm2, %%mm2\n" /* r2 l2 --> mm2, int_32 */ - "movhlps %%xmm2, %%xmm2\n" - "cvtps2pi %%xmm2, %%mm3\n" /* r3 l3 --> mm3, int_32 */ - - "packssdw %%mm1, %%mm0\n" /* r1 l1 r0 l0 --> mm0, int_16 */ - "packssdw %%mm3, %%mm2\n" /* r3 l3 r2 l2 --> mm2, int_16 */ - - "movq %%mm0, (%%eax)\n" - "movq %%mm2, 8(%%eax)\n" - "addl $16, %%eax\n" - "addl $16, %%ebx\n" - "addl $16, %%edx\n" - - "decl %%ecx\n" - "jnz .loop1\n" - - "popl %%ecx\n" - "emms\n" - : "=a" (s16_samples), "=b" (left), "=d" (right) - : "a" (s16_samples), "b" (left), "d" (right)); - -} diff --git a/src/ac3_decoder/ac3_exponent.c b/src/ac3_decoder/ac3_exponent.c index 96e3c704f6..f9bb5c4cee 100644 --- a/src/ac3_decoder/ac3_exponent.c +++ b/src/ac3_decoder/ac3_exponent.c @@ -2,7 +2,7 @@ * ac3_exponent.c: ac3 exponent calculations ***************************************************************************** * Copyright (C) 1999, 2000 VideoLAN - * $Id: ac3_exponent.c,v 1.24 2001/05/14 15:58:04 reno Exp $ + * $Id: ac3_exponent.c,v 1.25 2001/05/15 16:19:42 sam Exp $ * * Authors: Michel Kaempf * Michel Lespinasse @@ -22,6 +22,10 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA. *****************************************************************************/ + +/***************************************************************************** + * Preamble + *****************************************************************************/ #include "defs.h" #include /* memcpy(), memset() */ @@ -38,136 +42,13 @@ #include "audio_output.h" +#include "ac3_imdct.h" +#include "ac3_downmix.h" #include "ac3_decoder.h" #include "ac3_internal.h" -static const s16 exps_1[128] = -{ - -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, - -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 0, 0, 0 -}; - -static const s16 exps_2[128] = -{ - -2,-2,-2,-2,-2,-1,-1,-1,-1,-1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, - -2,-2,-2,-2,-2,-1,-1,-1,-1,-1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, - -2,-2,-2,-2,-2,-1,-1,-1,-1,-1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, - -2,-2,-2,-2,-2,-1,-1,-1,-1,-1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, - -2,-2,-2,-2,-2,-1,-1,-1,-1,-1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, - 0, 0, 0 -}; - -static const s16 exps_3[128] = -{ - -2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2, - -2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2, - -2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2, - -2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2, - -2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2, - 0, 0, 0 -}; - -#define UNPACK_FBW 1 -#define UNPACK_CPL 2 -#define UNPACK_LFE 4 - -static __inline__ int exp_unpack_ch (ac3dec_t * p_ac3dec, u16 type, - u16 expstr, u16 ngrps, u16 initial_exp, - u16 exps[], u16 * dest) -{ - u16 i,j; - s16 exp_acc; - - if (expstr == EXP_REUSE) - { - return 0; - } - - /* Handle the initial absolute exponent */ - exp_acc = initial_exp; - j = 0; - - /* In the case of a fbw channel then the initial absolute values is - * also an exponent */ - if (type != UNPACK_CPL) - { - dest[j++] = exp_acc; - } - - /* Loop through the groups and fill the dest array appropriately */ - switch (expstr) - { - case EXP_D15: /* 1 */ - for (i = 0; i < ngrps; i++) - { - if (exps[i] > 124) - { - intf_ErrMsg ( "ac3dec error: invalid exponent" ); - return 1; - } - exp_acc += (exps_1[exps[i]] /*- 2*/); - dest[j++] = exp_acc; - exp_acc += (exps_2[exps[i]] /*- 2*/); - dest[j++] = exp_acc; - exp_acc += (exps_3[exps[i]] /*- 2*/); - dest[j++] = exp_acc; - } - break; - - case EXP_D25: /* 2 */ - for (i = 0; i < ngrps; i++) - { - if (exps[i] > 124) - { - intf_ErrMsg ( "ac3dec error: invalid exponent" ); - return 1; - } - exp_acc += (exps_1[exps[i]] /*- 2*/); - dest[j++] = exp_acc; - dest[j++] = exp_acc; - exp_acc += (exps_2[exps[i]] /*- 2*/); - dest[j++] = exp_acc; - dest[j++] = exp_acc; - exp_acc += (exps_3[exps[i]] /*- 2*/); - dest[j++] = exp_acc; - dest[j++] = exp_acc; - } - break; - - case EXP_D45: /* 3 */ - for (i = 0; i < ngrps; i++) - { - if (exps[i] > 124) - { - intf_ErrMsg ( "ac3dec error: invalid exponent" ); - return 1; - } - exp_acc += (exps_1[exps[i]] /*- 2*/); - dest[j++] = exp_acc; - dest[j++] = exp_acc; - dest[j++] = exp_acc; - dest[j++] = exp_acc; - exp_acc += (exps_2[exps[i]] /*- 2*/); - dest[j++] = exp_acc; - dest[j++] = exp_acc; - dest[j++] = exp_acc; - dest[j++] = exp_acc; - exp_acc += (exps_3[exps[i]] /*- 2*/); - dest[j++] = exp_acc; - dest[j++] = exp_acc; - dest[j++] = exp_acc; - dest[j++] = exp_acc; - } - break; - } - - return 0; -} +#include "ac3_exponent.h" int exponent_unpack (ac3dec_t * p_ac3dec) { diff --git a/src/ac3_decoder/ac3_exponent.h b/src/ac3_decoder/ac3_exponent.h new file mode 100644 index 0000000000..cb8a514cb2 --- /dev/null +++ b/src/ac3_decoder/ac3_exponent.h @@ -0,0 +1,152 @@ +/***************************************************************************** + * ac3_exponent.h: ac3 exponent calculations + ***************************************************************************** + * Copyright (C) 1999, 2000 VideoLAN + * $Id: ac3_exponent.h,v 1.5 2001/05/15 16:19:42 sam Exp $ + * + * Authors: Michel Kaempf + * Michel Lespinasse + * Aaron Holtzman + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA. + *****************************************************************************/ + +static const s16 exps_1[128] = +{ + -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2, + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 0, 0, 0 +}; + +static const s16 exps_2[128] = +{ + -2,-2,-2,-2,-2,-1,-1,-1,-1,-1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, + -2,-2,-2,-2,-2,-1,-1,-1,-1,-1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, + -2,-2,-2,-2,-2,-1,-1,-1,-1,-1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, + -2,-2,-2,-2,-2,-1,-1,-1,-1,-1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, + -2,-2,-2,-2,-2,-1,-1,-1,-1,-1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, + 0, 0, 0 +}; + +static const s16 exps_3[128] = +{ + -2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2, + -2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2, + -2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2, + -2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2, + -2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2, + 0, 0, 0 +}; + +#define UNPACK_FBW 1 +#define UNPACK_CPL 2 +#define UNPACK_LFE 4 + +static __inline__ int exp_unpack_ch (ac3dec_t * p_ac3dec, u16 type, + u16 expstr, u16 ngrps, u16 initial_exp, + u16 exps[], u16 * dest) +{ + u16 i,j; + s16 exp_acc; + + if (expstr == EXP_REUSE) + { + return 0; + } + + /* Handle the initial absolute exponent */ + exp_acc = initial_exp; + j = 0; + + /* In the case of a fbw channel then the initial absolute values is + * also an exponent */ + if (type != UNPACK_CPL) + { + dest[j++] = exp_acc; + } + + /* Loop through the groups and fill the dest array appropriately */ + switch (expstr) + { + case EXP_D15: /* 1 */ + for (i = 0; i < ngrps; i++) + { + if (exps[i] > 124) + { + intf_ErrMsg ( "ac3dec error: invalid exponent" ); + return 1; + } + exp_acc += (exps_1[exps[i]] /*- 2*/); + dest[j++] = exp_acc; + exp_acc += (exps_2[exps[i]] /*- 2*/); + dest[j++] = exp_acc; + exp_acc += (exps_3[exps[i]] /*- 2*/); + dest[j++] = exp_acc; + } + break; + + case EXP_D25: /* 2 */ + for (i = 0; i < ngrps; i++) + { + if (exps[i] > 124) + { + intf_ErrMsg ( "ac3dec error: invalid exponent" ); + return 1; + } + exp_acc += (exps_1[exps[i]] /*- 2*/); + dest[j++] = exp_acc; + dest[j++] = exp_acc; + exp_acc += (exps_2[exps[i]] /*- 2*/); + dest[j++] = exp_acc; + dest[j++] = exp_acc; + exp_acc += (exps_3[exps[i]] /*- 2*/); + dest[j++] = exp_acc; + dest[j++] = exp_acc; + } + break; + + case EXP_D45: /* 3 */ + for (i = 0; i < ngrps; i++) + { + if (exps[i] > 124) + { + intf_ErrMsg ( "ac3dec error: invalid exponent" ); + return 1; + } + exp_acc += (exps_1[exps[i]] /*- 2*/); + dest[j++] = exp_acc; + dest[j++] = exp_acc; + dest[j++] = exp_acc; + dest[j++] = exp_acc; + exp_acc += (exps_2[exps[i]] /*- 2*/); + dest[j++] = exp_acc; + dest[j++] = exp_acc; + dest[j++] = exp_acc; + dest[j++] = exp_acc; + exp_acc += (exps_3[exps[i]] /*- 2*/); + dest[j++] = exp_acc; + dest[j++] = exp_acc; + dest[j++] = exp_acc; + dest[j++] = exp_acc; + } + break; + } + + return 0; +} + diff --git a/src/ac3_decoder/ac3_imdct.c b/src/ac3_decoder/ac3_imdct.c index c52006b21f..639ac439f3 100644 --- a/src/ac3_decoder/ac3_imdct.c +++ b/src/ac3_decoder/ac3_imdct.c @@ -2,7 +2,7 @@ * ac3_imdct.c: ac3 DCT ***************************************************************************** * Copyright (C) 1999, 2000 VideoLAN - * $Id: ac3_imdct.c,v 1.19 2001/05/14 15:58:04 reno Exp $ + * $Id: ac3_imdct.c,v 1.20 2001/05/15 16:19:42 sam Exp $ * * Authors: Michel Kaempf * Aaron Holtzman @@ -23,6 +23,9 @@ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA. *****************************************************************************/ +/***************************************************************************** + * Preamble + *****************************************************************************/ #include "defs.h" #include /* memcpy() */ @@ -38,235 +41,223 @@ #include "stream_control.h" #include "input_ext-dec.h" +#include "ac3_imdct.h" +#include "ac3_downmix.h" #include "ac3_decoder.h" -#include "ac3_imdct_c.h" /* imdct_init_c */ -#include "ac3_imdct_sse.h" /* imdct_init_sse */ - -#include "tests.h" /* TestCPU */ - #ifndef M_PI # define M_PI 3.14159265358979323846 #endif - void imdct_init(imdct_t * p_imdct) { - int i; - float scale = 181.019; -#if 0 - if ( TestCPU (CPU_CAPABILITY_SSE) ) - { - imdct_init_sse (p_imdct); - } - else -#endif - { - imdct_init_c (p_imdct); - } + int i; + float scale = 181.019; + + p_imdct->pf_imdct_init( p_imdct ); - /* More twiddle factors to turn IFFT into IMDCT */ - for (i=0; i < 64; i++) { - p_imdct->xcos2[i] = cos(2.0f * M_PI * (8*i+1)/(4*N)) * scale; - p_imdct->xsin2[i] = sin(2.0f * M_PI * (8*i+1)/(4*N)) * scale; - } + /* More twiddle factors to turn IFFT into IMDCT */ + for (i=0; i < 64; i++) { + p_imdct->xcos2[i] = cos(2.0f * M_PI * (8*i+1)/(4*N)) * scale; + p_imdct->xsin2[i] = sin(2.0f * M_PI * (8*i+1)/(4*N)) * scale; + } } void imdct (ac3dec_t * p_ac3dec, s16 * buffer) { - int i; - int doable = 0; - float *center=NULL, *left, *right, *left_sur, *right_sur; - float *delay_left, *delay_right; - float *delay1_left, *delay1_right, *delay1_center, *delay1_sr, *delay1_sl; - float right_tmp, left_tmp; - void (*do_imdct)(imdct_t * p_imdct, float data[], float delay[]); + int i; + int doable = 0; + float *center=NULL, *left, *right, *left_sur, *right_sur; + float *delay_left, *delay_right; + float *delay1_left, *delay1_right, *delay1_center, *delay1_sr, *delay1_sl; + float right_tmp, left_tmp; + void (*do_imdct)(imdct_t * p_imdct, float data[], float delay[]); - /* test if dm in frequency is doable */ - if (!(doable = p_ac3dec->audblk.blksw[0])) + /* test if dm in frequency is doable */ + if (!(doable = p_ac3dec->audblk.blksw[0])) { - do_imdct = p_ac3dec->imdct.imdct_do_512; + do_imdct = p_ac3dec->imdct.pf_imdct_512; } - else + else { - do_imdct = imdct_do_256; /* There is only a C function */ + do_imdct = p_ac3dec->imdct.pf_imdct_256; } - /* downmix in the frequency domain if all the channels - * use the same imdct */ - for (i=0; i < p_ac3dec->bsi.nfchans; i++) + /* downmix in the frequency domain if all the channels + * use the same imdct */ + for (i=0; i < p_ac3dec->bsi.nfchans; i++) { - if (doable != p_ac3dec->audblk.blksw[i]) + if (doable != p_ac3dec->audblk.blksw[i]) { - do_imdct = NULL; - break; - } - } + do_imdct = NULL; + break; + } + } if (do_imdct) { - /* dowmix first and imdct */ + /* dowmix first and imdct */ switch(p_ac3dec->bsi.acmod) { - case 7: /* 3/2 */ - p_ac3dec->downmix.downmix_3f_2r_to_2ch (p_ac3dec->samples[0], &p_ac3dec->dm_par); - break; - case 6: /* 2/2 */ - p_ac3dec->downmix.downmix_2f_2r_to_2ch (p_ac3dec->samples[0], &p_ac3dec->dm_par); - break; - case 5: /* 3/1 */ - p_ac3dec->downmix.downmix_3f_1r_to_2ch (p_ac3dec->samples[0], &p_ac3dec->dm_par); - break; - case 4: /* 2/1 */ - p_ac3dec->downmix.downmix_2f_1r_to_2ch (p_ac3dec->samples[0], &p_ac3dec->dm_par); - break; - case 3: /* 3/0 */ - p_ac3dec->downmix.downmix_3f_0r_to_2ch (p_ac3dec->samples[0], &p_ac3dec->dm_par); - break; - case 2: - break; - default: /* 1/0 */ -// if (p_ac3dec->bsi.acmod == 1) - center = p_ac3dec->samples[0]; -// else if (p_ac3dec->bsi.acmod == 0) + case 7: /* 3/2 */ + p_ac3dec->downmix.pf_downmix_3f_2r_to_2ch (p_ac3dec->samples[0], &p_ac3dec->dm_par); + break; + case 6: /* 2/2 */ + p_ac3dec->downmix.pf_downmix_2f_2r_to_2ch (p_ac3dec->samples[0], &p_ac3dec->dm_par); + break; + case 5: /* 3/1 */ + p_ac3dec->downmix.pf_downmix_3f_1r_to_2ch (p_ac3dec->samples[0], &p_ac3dec->dm_par); + break; + case 4: /* 2/1 */ + p_ac3dec->downmix.pf_downmix_2f_1r_to_2ch (p_ac3dec->samples[0], &p_ac3dec->dm_par); + break; + case 3: /* 3/0 */ + p_ac3dec->downmix.pf_downmix_3f_0r_to_2ch (p_ac3dec->samples[0], &p_ac3dec->dm_par); + break; + case 2: + break; + default: /* 1/0 */ +// if (p_ac3dec->bsi.acmod == 1) + center = p_ac3dec->samples[0]; +// else if (p_ac3dec->bsi.acmod == 0) // center = samples[ac3_config.dual_mono_ch_sel]; do_imdct(&p_ac3dec->imdct, center, p_ac3dec->imdct.delay[0]); /* no downmix*/ - p_ac3dec->downmix.stream_sample_1ch_to_s16 (buffer, center); + p_ac3dec->downmix.pf_stream_sample_1ch_to_s16 (buffer, center); - return; + return; break; } - do_imdct (&p_ac3dec->imdct, p_ac3dec->samples[0], p_ac3dec->imdct.delay[0]); - do_imdct (&p_ac3dec->imdct, p_ac3dec->samples[1], p_ac3dec->imdct.delay[1]); - p_ac3dec->downmix.stream_sample_2ch_to_s16(buffer, p_ac3dec->samples[0], p_ac3dec->samples[1]); + do_imdct (&p_ac3dec->imdct, p_ac3dec->samples[0], p_ac3dec->imdct.delay[0]); + do_imdct (&p_ac3dec->imdct, p_ac3dec->samples[1], p_ac3dec->imdct.delay[1]); + p_ac3dec->downmix.pf_stream_sample_2ch_to_s16(buffer, p_ac3dec->samples[0], p_ac3dec->samples[1]); - } else { + } else { /* imdct and then downmix - * delay and samples should be saved and mixed - * fprintf(stderr, "time domain downmix\n"); */ - for (i=0; ibsi.nfchans; i++) + * delay and samples should be saved and mixed + * fprintf(stderr, "time domain downmix\n"); */ + for (i=0; ibsi.nfchans; i++) { - if (p_ac3dec->audblk.blksw[i]) + if (p_ac3dec->audblk.blksw[i]) /* There is only a C function */ - imdct_do_256_nol (&p_ac3dec->imdct, p_ac3dec->samples[i], p_ac3dec->imdct.delay1[i]); - else - p_ac3dec->imdct.imdct_do_512_nol (&p_ac3dec->imdct, p_ac3dec->samples[i], p_ac3dec->imdct.delay1[i]); - } + p_ac3dec->imdct.pf_imdct_256_nol (&p_ac3dec->imdct, p_ac3dec->samples[i], p_ac3dec->imdct.delay1[i]); + else + p_ac3dec->imdct.pf_imdct_512_nol (&p_ac3dec->imdct, p_ac3dec->samples[i], p_ac3dec->imdct.delay1[i]); + } - /* mix the sample, overlap */ - switch(p_ac3dec->bsi.acmod) + /* mix the sample, overlap */ + switch(p_ac3dec->bsi.acmod) { - case 7: /* 3/2 */ - left = p_ac3dec->samples[0]; - center = p_ac3dec->samples[1]; - right = p_ac3dec->samples[2]; - left_sur = p_ac3dec->samples[3]; - right_sur = p_ac3dec->samples[4]; - delay_left = p_ac3dec->imdct.delay[0]; - delay_right = p_ac3dec->imdct.delay[1]; - delay1_left = p_ac3dec->imdct.delay1[0]; - delay1_center = p_ac3dec->imdct.delay1[1]; - delay1_right = p_ac3dec->imdct.delay1[2]; - delay1_sl = p_ac3dec->imdct.delay1[3]; - delay1_sr = p_ac3dec->imdct.delay1[4]; + case 7: /* 3/2 */ + left = p_ac3dec->samples[0]; + center = p_ac3dec->samples[1]; + right = p_ac3dec->samples[2]; + left_sur = p_ac3dec->samples[3]; + right_sur = p_ac3dec->samples[4]; + delay_left = p_ac3dec->imdct.delay[0]; + delay_right = p_ac3dec->imdct.delay[1]; + delay1_left = p_ac3dec->imdct.delay1[0]; + delay1_center = p_ac3dec->imdct.delay1[1]; + delay1_right = p_ac3dec->imdct.delay1[2]; + delay1_sl = p_ac3dec->imdct.delay1[3]; + delay1_sr = p_ac3dec->imdct.delay1[4]; - for (i = 0; i < 256; i++) { - left_tmp = p_ac3dec->dm_par.unit * *left++ + p_ac3dec->dm_par.clev * *center + p_ac3dec->dm_par.slev * *left_sur++; - right_tmp= p_ac3dec->dm_par.unit * *right++ + p_ac3dec->dm_par.clev * *center++ + p_ac3dec->dm_par.slev * *right_sur++; - *buffer++ = (s16)(left_tmp + *delay_left); - *buffer++ = (s16)(right_tmp + *delay_right); - *delay_left++ = p_ac3dec->dm_par.unit * *delay1_left++ + p_ac3dec->dm_par.clev * *delay1_center + p_ac3dec->dm_par.slev * *delay1_sl++; - *delay_right++ = p_ac3dec->dm_par.unit * *delay1_right++ + p_ac3dec->dm_par.clev * *center++ + p_ac3dec->dm_par.slev * *delay1_sr++; - } - break; - case 6: /* 2/2 */ - left = p_ac3dec->samples[0]; - right = p_ac3dec->samples[1]; - left_sur = p_ac3dec->samples[2]; - right_sur = p_ac3dec->samples[3]; - delay_left = p_ac3dec->imdct.delay[0]; - delay_right = p_ac3dec->imdct.delay[1]; - delay1_left = p_ac3dec->imdct.delay1[0]; - delay1_right = p_ac3dec->imdct.delay1[1]; - delay1_sl = p_ac3dec->imdct.delay1[2]; - delay1_sr = p_ac3dec->imdct.delay1[3]; + for (i = 0; i < 256; i++) { + left_tmp = p_ac3dec->dm_par.unit * *left++ + p_ac3dec->dm_par.clev * *center + p_ac3dec->dm_par.slev * *left_sur++; + right_tmp= p_ac3dec->dm_par.unit * *right++ + p_ac3dec->dm_par.clev * *center++ + p_ac3dec->dm_par.slev * *right_sur++; + *buffer++ = (s16)(left_tmp + *delay_left); + *buffer++ = (s16)(right_tmp + *delay_right); + *delay_left++ = p_ac3dec->dm_par.unit * *delay1_left++ + p_ac3dec->dm_par.clev * *delay1_center + p_ac3dec->dm_par.slev * *delay1_sl++; + *delay_right++ = p_ac3dec->dm_par.unit * *delay1_right++ + p_ac3dec->dm_par.clev * *center++ + p_ac3dec->dm_par.slev * *delay1_sr++; + } + break; + case 6: /* 2/2 */ + left = p_ac3dec->samples[0]; + right = p_ac3dec->samples[1]; + left_sur = p_ac3dec->samples[2]; + right_sur = p_ac3dec->samples[3]; + delay_left = p_ac3dec->imdct.delay[0]; + delay_right = p_ac3dec->imdct.delay[1]; + delay1_left = p_ac3dec->imdct.delay1[0]; + delay1_right = p_ac3dec->imdct.delay1[1]; + delay1_sl = p_ac3dec->imdct.delay1[2]; + delay1_sr = p_ac3dec->imdct.delay1[3]; - for (i = 0; i < 256; i++) { - left_tmp = p_ac3dec->dm_par.unit * *left++ + p_ac3dec->dm_par.slev * *left_sur++; - right_tmp= p_ac3dec->dm_par.unit * *right++ + p_ac3dec->dm_par.slev * *right_sur++; - *buffer++ = (s16)(left_tmp + *delay_left); - *buffer++ = (s16)(right_tmp + *delay_right); - *delay_left++ = p_ac3dec->dm_par.unit * *delay1_left++ + p_ac3dec->dm_par.slev * *delay1_sl++; - *delay_right++ = p_ac3dec->dm_par.unit * *delay1_right++ + p_ac3dec->dm_par.slev * *delay1_sr++; - } - break; - case 5: /* 3/1 */ - left = p_ac3dec->samples[0]; - center = p_ac3dec->samples[1]; - right = p_ac3dec->samples[2]; - right_sur = p_ac3dec->samples[3]; - delay_left = p_ac3dec->imdct.delay[0]; - delay_right = p_ac3dec->imdct.delay[1]; - delay1_left = p_ac3dec->imdct.delay1[0]; - delay1_center = p_ac3dec->imdct.delay1[1]; - delay1_right = p_ac3dec->imdct.delay1[2]; - delay1_sl = p_ac3dec->imdct.delay1[3]; + for (i = 0; i < 256; i++) { + left_tmp = p_ac3dec->dm_par.unit * *left++ + p_ac3dec->dm_par.slev * *left_sur++; + right_tmp= p_ac3dec->dm_par.unit * *right++ + p_ac3dec->dm_par.slev * *right_sur++; + *buffer++ = (s16)(left_tmp + *delay_left); + *buffer++ = (s16)(right_tmp + *delay_right); + *delay_left++ = p_ac3dec->dm_par.unit * *delay1_left++ + p_ac3dec->dm_par.slev * *delay1_sl++; + *delay_right++ = p_ac3dec->dm_par.unit * *delay1_right++ + p_ac3dec->dm_par.slev * *delay1_sr++; + } + break; + case 5: /* 3/1 */ + left = p_ac3dec->samples[0]; + center = p_ac3dec->samples[1]; + right = p_ac3dec->samples[2]; + right_sur = p_ac3dec->samples[3]; + delay_left = p_ac3dec->imdct.delay[0]; + delay_right = p_ac3dec->imdct.delay[1]; + delay1_left = p_ac3dec->imdct.delay1[0]; + delay1_center = p_ac3dec->imdct.delay1[1]; + delay1_right = p_ac3dec->imdct.delay1[2]; + delay1_sl = p_ac3dec->imdct.delay1[3]; - for (i = 0; i < 256; i++) { - left_tmp = p_ac3dec->dm_par.unit * *left++ + p_ac3dec->dm_par.clev * *center - p_ac3dec->dm_par.slev * *right_sur; - right_tmp= p_ac3dec->dm_par.unit * *right++ + p_ac3dec->dm_par.clev * *center++ + p_ac3dec->dm_par.slev * *right_sur++; - *buffer++ = (s16)(left_tmp + *delay_left); - *buffer++ = (s16)(right_tmp + *delay_right); - *delay_left++ = p_ac3dec->dm_par.unit * *delay1_left++ + p_ac3dec->dm_par.clev * *delay1_center + p_ac3dec->dm_par.slev * *delay1_sl; - *delay_right++ = p_ac3dec->dm_par.unit * *delay1_right++ + p_ac3dec->dm_par.clev * *center++ + p_ac3dec->dm_par.slev * *delay1_sl++; - } - break; - case 4: /* 2/1 */ - left = p_ac3dec->samples[0]; - right = p_ac3dec->samples[1]; - right_sur = p_ac3dec->samples[2]; - delay_left = p_ac3dec->imdct.delay[0]; - delay_right = p_ac3dec->imdct.delay[1]; - delay1_left = p_ac3dec->imdct.delay1[0]; - delay1_right = p_ac3dec->imdct.delay1[1]; - delay1_sl = p_ac3dec->imdct.delay1[2]; + for (i = 0; i < 256; i++) { + left_tmp = p_ac3dec->dm_par.unit * *left++ + p_ac3dec->dm_par.clev * *center - p_ac3dec->dm_par.slev * *right_sur; + right_tmp= p_ac3dec->dm_par.unit * *right++ + p_ac3dec->dm_par.clev * *center++ + p_ac3dec->dm_par.slev * *right_sur++; + *buffer++ = (s16)(left_tmp + *delay_left); + *buffer++ = (s16)(right_tmp + *delay_right); + *delay_left++ = p_ac3dec->dm_par.unit * *delay1_left++ + p_ac3dec->dm_par.clev * *delay1_center + p_ac3dec->dm_par.slev * *delay1_sl; + *delay_right++ = p_ac3dec->dm_par.unit * *delay1_right++ + p_ac3dec->dm_par.clev * *center++ + p_ac3dec->dm_par.slev * *delay1_sl++; + } + break; + case 4: /* 2/1 */ + left = p_ac3dec->samples[0]; + right = p_ac3dec->samples[1]; + right_sur = p_ac3dec->samples[2]; + delay_left = p_ac3dec->imdct.delay[0]; + delay_right = p_ac3dec->imdct.delay[1]; + delay1_left = p_ac3dec->imdct.delay1[0]; + delay1_right = p_ac3dec->imdct.delay1[1]; + delay1_sl = p_ac3dec->imdct.delay1[2]; - for (i = 0; i < 256; i++) { - left_tmp = p_ac3dec->dm_par.unit * *left++ - p_ac3dec->dm_par.slev * *right_sur; - right_tmp= p_ac3dec->dm_par.unit * *right++ + p_ac3dec->dm_par.slev * *right_sur++; - *buffer++ = (s16)(left_tmp + *delay_left); - *buffer++ = (s16)(right_tmp + *delay_right); - *delay_left++ = p_ac3dec->dm_par.unit * *delay1_left++ + p_ac3dec->dm_par.slev * *delay1_sl; - *delay_right++ = p_ac3dec->dm_par.unit * *delay1_right++ + p_ac3dec->dm_par.slev * *delay1_sl++; - } - break; - case 3: /* 3/0 */ - left = p_ac3dec->samples[0]; - center = p_ac3dec->samples[1]; - right = p_ac3dec->samples[2]; - delay_left = p_ac3dec->imdct.delay[0]; - delay_right = p_ac3dec->imdct.delay[1]; - delay1_left = p_ac3dec->imdct.delay1[0]; - delay1_center = p_ac3dec->imdct.delay1[1]; - delay1_right = p_ac3dec->imdct.delay1[2]; + for (i = 0; i < 256; i++) { + left_tmp = p_ac3dec->dm_par.unit * *left++ - p_ac3dec->dm_par.slev * *right_sur; + right_tmp= p_ac3dec->dm_par.unit * *right++ + p_ac3dec->dm_par.slev * *right_sur++; + *buffer++ = (s16)(left_tmp + *delay_left); + *buffer++ = (s16)(right_tmp + *delay_right); + *delay_left++ = p_ac3dec->dm_par.unit * *delay1_left++ + p_ac3dec->dm_par.slev * *delay1_sl; + *delay_right++ = p_ac3dec->dm_par.unit * *delay1_right++ + p_ac3dec->dm_par.slev * *delay1_sl++; + } + break; + case 3: /* 3/0 */ + left = p_ac3dec->samples[0]; + center = p_ac3dec->samples[1]; + right = p_ac3dec->samples[2]; + delay_left = p_ac3dec->imdct.delay[0]; + delay_right = p_ac3dec->imdct.delay[1]; + delay1_left = p_ac3dec->imdct.delay1[0]; + delay1_center = p_ac3dec->imdct.delay1[1]; + delay1_right = p_ac3dec->imdct.delay1[2]; - for (i = 0; i < 256; i++) { - left_tmp = p_ac3dec->dm_par.unit * *left++ + p_ac3dec->dm_par.clev * *center; - right_tmp= p_ac3dec->dm_par.unit * *right++ + p_ac3dec->dm_par.clev * *center++; - *buffer++ = (s16)(left_tmp + *delay_left); - *buffer++ = (s16)(right_tmp + *delay_right); - *delay_left++ = p_ac3dec->dm_par.unit * *delay1_left++ + p_ac3dec->dm_par.clev * *delay1_center; - *delay_right++ = p_ac3dec->dm_par.unit * *delay1_right++ + p_ac3dec->dm_par.clev * *center++; - } - break; - case 2: /* copy to output */ - for (i = 0; i < 256; i++) { - *buffer++ = (s16)p_ac3dec->samples[0][i]; - *buffer++ = (s16)p_ac3dec->samples[1][i]; - } - break; - } - } + for (i = 0; i < 256; i++) { + left_tmp = p_ac3dec->dm_par.unit * *left++ + p_ac3dec->dm_par.clev * *center; + right_tmp= p_ac3dec->dm_par.unit * *right++ + p_ac3dec->dm_par.clev * *center++; + *buffer++ = (s16)(left_tmp + *delay_left); + *buffer++ = (s16)(right_tmp + *delay_right); + *delay_left++ = p_ac3dec->dm_par.unit * *delay1_left++ + p_ac3dec->dm_par.clev * *delay1_center; + *delay_right++ = p_ac3dec->dm_par.unit * *delay1_right++ + p_ac3dec->dm_par.clev * *center++; + } + break; + case 2: /* copy to output */ + for (i = 0; i < 256; i++) { + *buffer++ = (s16)p_ac3dec->samples[0][i]; + *buffer++ = (s16)p_ac3dec->samples[1][i]; + } + break; + } + } } diff --git a/src/ac3_decoder/ac3_imdct_c.c b/src/ac3_decoder/ac3_imdct_c.c deleted file mode 100644 index c5011bc310..0000000000 --- a/src/ac3_decoder/ac3_imdct_c.c +++ /dev/null @@ -1,421 +0,0 @@ -/***************************************************************************** - * ac3_imdct_c.c: ac3 DCT - ***************************************************************************** - * Copyright (C) 1999, 2000 VideoLAN - * $Id: ac3_imdct_c.c,v 1.3 2001/05/14 15:58:04 reno Exp $ - * - * Authors: Renaud Dartus - * Aaron Holtzman - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA. - *****************************************************************************/ - -#include "defs.h" - -#include /* memcpy() */ - -#include -#include - -#include "config.h" -#include "common.h" -#include "threads.h" -#include "mtime.h" - -#include "stream_control.h" -#include "input_ext-dec.h" - -#include "ac3_decoder.h" -#include "ac3_imdct_c.h" - -#ifndef M_PI -# define M_PI 3.14159265358979323846 -#endif - -void fft_64p_c (complex_t *x); -void fft_128p_c (complex_t *x); - -static float window[] = { - 0.00014, 0.00024, 0.00037, 0.00051, 0.00067, 0.00086, 0.00107, 0.00130, - 0.00157, 0.00187, 0.00220, 0.00256, 0.00297, 0.00341, 0.00390, 0.00443, - 0.00501, 0.00564, 0.00632, 0.00706, 0.00785, 0.00871, 0.00962, 0.01061, - 0.01166, 0.01279, 0.01399, 0.01526, 0.01662, 0.01806, 0.01959, 0.02121, - 0.02292, 0.02472, 0.02662, 0.02863, 0.03073, 0.03294, 0.03527, 0.03770, - 0.04025, 0.04292, 0.04571, 0.04862, 0.05165, 0.05481, 0.05810, 0.06153, - 0.06508, 0.06878, 0.07261, 0.07658, 0.08069, 0.08495, 0.08935, 0.09389, - 0.09859, 0.10343, 0.10842, 0.11356, 0.11885, 0.12429, 0.12988, 0.13563, - 0.14152, 0.14757, 0.15376, 0.16011, 0.16661, 0.17325, 0.18005, 0.18699, - 0.19407, 0.20130, 0.20867, 0.21618, 0.22382, 0.23161, 0.23952, 0.24757, - 0.25574, 0.26404, 0.27246, 0.28100, 0.28965, 0.29841, 0.30729, 0.31626, - 0.32533, 0.33450, 0.34376, 0.35311, 0.36253, 0.37204, 0.38161, 0.39126, - 0.40096, 0.41072, 0.42054, 0.43040, 0.44030, 0.45023, 0.46020, 0.47019, - 0.48020, 0.49022, 0.50025, 0.51028, 0.52031, 0.53033, 0.54033, 0.55031, - 0.56026, 0.57019, 0.58007, 0.58991, 0.59970, 0.60944, 0.61912, 0.62873, - 0.63827, 0.64774, 0.65713, 0.66643, 0.67564, 0.68476, 0.69377, 0.70269, - 0.71150, 0.72019, 0.72877, 0.73723, 0.74557, 0.75378, 0.76186, 0.76981, - 0.77762, 0.78530, 0.79283, 0.80022, 0.80747, 0.81457, 0.82151, 0.82831, - 0.83496, 0.84145, 0.84779, 0.85398, 0.86001, 0.86588, 0.87160, 0.87716, - 0.88257, 0.88782, 0.89291, 0.89785, 0.90264, 0.90728, 0.91176, 0.91610, - 0.92028, 0.92432, 0.92822, 0.93197, 0.93558, 0.93906, 0.94240, 0.94560, - 0.94867, 0.95162, 0.95444, 0.95713, 0.95971, 0.96217, 0.96451, 0.96674, - 0.96887, 0.97089, 0.97281, 0.97463, 0.97635, 0.97799, 0.97953, 0.98099, - 0.98236, 0.98366, 0.98488, 0.98602, 0.98710, 0.98811, 0.98905, 0.98994, - 0.99076, 0.99153, 0.99225, 0.99291, 0.99353, 0.99411, 0.99464, 0.99513, - 0.99558, 0.99600, 0.99639, 0.99674, 0.99706, 0.99736, 0.99763, 0.99788, - 0.99811, 0.99831, 0.99850, 0.99867, 0.99882, 0.99895, 0.99908, 0.99919, - 0.99929, 0.99938, 0.99946, 0.99953, 0.99959, 0.99965, 0.99969, 0.99974, - 0.99978, 0.99981, 0.99984, 0.99986, 0.99988, 0.99990, 0.99992, 0.99993, - 0.99994, 0.99995, 0.99996, 0.99997, 0.99998, 0.99998, 0.99998, 0.99999, - 0.99999, 0.99999, 0.99999, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000, - 1.00000, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000 -}; - -static const int pm128[128] = -{ - 0, 16, 32, 48, 64, 80, 96, 112, 8, 40, 72, 104, 24, 56, 88, 120, - 4, 20, 36, 52, 68, 84, 100, 116, 12, 28, 44, 60, 76, 92, 108, 124, - 2, 18, 34, 50, 66, 82, 98, 114, 10, 42, 74, 106, 26, 58, 90, 122, - 6, 22, 38, 54, 70, 86, 102, 118, 14, 46, 78, 110, 30, 62, 94, 126, - 1, 17, 33, 49, 65, 81, 97, 113, 9, 41, 73, 105, 25, 57, 89, 121, - 5, 21, 37, 53, 69, 85, 101, 117, 13, 29, 45, 61, 77, 93, 109, 125, - 3, 19, 35, 51, 67, 83, 99, 115, 11, 43, 75, 107, 27, 59, 91, 123, - 7, 23, 39, 55, 71, 87, 103, 119, 15, 31, 47, 63, 79, 95, 111, 127 -}; - -static const int pm64[64] = -{ - 0, 8, 16, 24, 32, 40, 48, 56, - 4, 20, 36, 52, 12, 28, 44, 60, - 2, 10, 18, 26, 34, 42, 50, 58, - 6, 14, 22, 30, 38, 46, 54, 62, - 1, 9, 17, 25, 33, 41, 49, 57, - 5, 21, 37, 53, 13, 29, 45, 61, - 3, 11, 19, 27, 35, 43, 51, 59, - 7, 23, 39, 55, 15, 31, 47, 63 -}; - -int imdct_init_c (imdct_t * p_imdct) -{ - int i; - float scale = 181.019; - - p_imdct->imdct_do_512 = imdct_do_512_c; - p_imdct->imdct_do_512_nol = imdct_do_512_nol_c; - p_imdct->fft_64p = fft_64p_c; - - /* Twiddle factors to turn IFFT into IMDCT */ - - for (i=0; i < 128; i++) { - p_imdct->xcos1[i] = cos(2.0f * M_PI * (8*i+1)/(8*N)) * scale; - p_imdct->xsin1[i] = sin(2.0f * M_PI * (8*i+1)/(8*N)) * scale; - } - - return 0; -} - -void imdct_do_256 (imdct_t * p_imdct, float data[],float delay[]) -{ - int i, j, k; - int p, q; - - float tmp_a_i; - float tmp_a_r; - - float *data_ptr; - float *delay_ptr; - float *window_ptr; - - complex_t *buf1, *buf2; - - buf1 = &p_imdct->buf[0]; - buf2 = &p_imdct->buf[64]; - - /* Pre IFFT complex multiply plus IFFT complex conjugate */ - for (k=0; k<64; k++) { - /* X1[k] = X[2*k] - * X2[k] = X[2*k+1] */ - - j = pm64[k]; - p = 2 * (128-2*j-1); - q = 2 * (2 * j); - - /* Z1[k] = (X1[128-2*k-1] + j * X1[2*k]) * (xcos2[k] + j * xsin2[k]); */ - buf1[k].real = data[p] * p_imdct->xcos2[j] - data[q] * p_imdct->xsin2[j]; - buf1[k].imag = -1.0f*(data[q] * p_imdct->xcos2[j] + data[p] * p_imdct->xsin2[j]); - /* Z2[k] = (X2[128-2*k-1] + j * X2[2*k]) * (xcos2[k] + j * xsin2[k]); */ - buf2[k].real = data[p + 1] * p_imdct->xcos2[j] - data[q + 1] * p_imdct->xsin2[j]; - buf2[k].imag = -1.0f*(data[q + 1] * p_imdct->xcos2[j] + data[p + 1] * p_imdct->xsin2[j]); - } - - p_imdct->fft_64p(&buf1[0]); - p_imdct->fft_64p(&buf2[0]); - - /* Post IFFT complex multiply */ - for( i=0; i < 64; i++) { - tmp_a_r = buf1[i].real; - tmp_a_i = -buf1[i].imag; - buf1[i].real = (tmp_a_r * p_imdct->xcos2[i]) - (tmp_a_i * p_imdct->xsin2[i]); - buf1[i].imag = (tmp_a_r * p_imdct->xsin2[i]) + (tmp_a_i * p_imdct->xcos2[i]); - tmp_a_r = buf2[i].real; - tmp_a_i = -buf2[i].imag; - buf2[i].real = (tmp_a_r * p_imdct->xcos2[i]) - (tmp_a_i * p_imdct->xsin2[i]); - buf2[i].imag = (tmp_a_r * p_imdct->xsin2[i]) + (tmp_a_i * p_imdct->xcos2[i]); - } - - data_ptr = data; - delay_ptr = delay; - window_ptr = window; - - /* Window and convert to real valued signal */ - for(i=0; i< 64; i++) { - *data_ptr++ = -buf1[i].imag * *window_ptr++ + *delay_ptr++; - *data_ptr++ = buf1[64-i-1].real * *window_ptr++ + *delay_ptr++; - } - - for(i=0; i< 64; i++) { - *data_ptr++ = -buf1[i].real * *window_ptr++ + *delay_ptr++; - *data_ptr++ = buf1[64-i-1].imag * *window_ptr++ + *delay_ptr++; - } - - delay_ptr = delay; - - for(i=0; i< 64; i++) { - *delay_ptr++ = -buf2[i].real * *--window_ptr; - *delay_ptr++ = buf2[64-i-1].imag * *--window_ptr; - } - - for(i=0; i< 64; i++) { - *delay_ptr++ = buf2[i].imag * *--window_ptr; - *delay_ptr++ = -buf2[64-i-1].real * *--window_ptr; - } -} - - -void imdct_do_256_nol (imdct_t * p_imdct, float data[], float delay[]) -{ - int i, j, k; - int p, q; - - float tmp_a_i; - float tmp_a_r; - - float *data_ptr; - float *delay_ptr; - float *window_ptr; - - complex_t *buf1, *buf2; - - buf1 = &p_imdct->buf[0]; - buf2 = &p_imdct->buf[64]; - - /* Pre IFFT complex multiply plus IFFT cmplx conjugate */ - for(k=0; k<64; k++) { - /* X1[k] = X[2*k] - * X2[k] = X[2*k+1] */ - j = pm64[k]; - p = 2 * (128-2*j-1); - q = 2 * (2 * j); - - /* Z1[k] = (X1[128-2*k-1] + j * X1[2*k]) * (xcos2[k] + j * xsin2[k]); */ - buf1[k].real = data[p] * p_imdct->xcos2[j] - data[q] * p_imdct->xsin2[j]; - buf1[k].imag = -1.0f*(data[q] * p_imdct->xcos2[j] + data[p] * p_imdct->xsin2[j]); - /* Z2[k] = (X2[128-2*k-1] + j * X2[2*k]) * (xcos2[k] + j * xsin2[k]); */ - buf2[k].real = data[p + 1] * p_imdct->xcos2[j] - data[q + 1] * p_imdct->xsin2[j]; - buf2[k].imag = -1.0f*(data[q + 1] * p_imdct->xcos2[j] + data[p + 1] * p_imdct->xsin2[j]); - } - - p_imdct->fft_64p(&buf1[0]); - p_imdct->fft_64p(&buf2[0]); - - /* Post IFFT complex multiply */ - for( i=0; i < 64; i++) { - /* y1[n] = z1[n] * (xcos2[n] + j * xs in2[n]) ; */ - tmp_a_r = buf1[i].real; - tmp_a_i = -buf1[i].imag; - buf1[i].real =(tmp_a_r * p_imdct->xcos2[i]) - (tmp_a_i * p_imdct->xsin2[i]); - buf1[i].imag =(tmp_a_r * p_imdct->xsin2[i]) + (tmp_a_i * p_imdct->xcos2[i]); - /* y2[n] = z2[n] * (xcos2[n] + j * xsin2[n]) ; */ - tmp_a_r = buf2[i].real; - tmp_a_i = -buf2[i].imag; - buf2[i].real =(tmp_a_r * p_imdct->xcos2[i]) - (tmp_a_i * p_imdct->xsin2[i]); - buf2[i].imag =(tmp_a_r * p_imdct->xsin2[i]) + (tmp_a_i * p_imdct->xcos2[i]); - } - - data_ptr = data; - delay_ptr = delay; - window_ptr = window; - - /* Window and convert to real valued signal, no overlap */ - for(i=0; i< 64; i++) { - *data_ptr++ = -buf1[i].imag * *window_ptr++; - *data_ptr++ = buf1[64-i-1].real * *window_ptr++; - } - - for(i=0; i< 64; i++) { - *data_ptr++ = -buf1[i].real * *window_ptr++ + *delay_ptr++; - *data_ptr++ = buf1[64-i-1].imag * *window_ptr++ + *delay_ptr++; - } - - delay_ptr = delay; - - for(i=0; i< 64; i++) { - *delay_ptr++ = -buf2[i].real * *--window_ptr; - *delay_ptr++ = buf2[64-i-1].imag * *--window_ptr; - } - - for(i=0; i< 64; i++) { - *delay_ptr++ = buf2[i].imag * *--window_ptr; - *delay_ptr++ = -buf2[64-i-1].real * *--window_ptr; - } -} - -void imdct_do_512_c (imdct_t * p_imdct, float data[], float delay[]) -{ - int i, j; - float tmp_a_r, tmp_a_i; - float *data_ptr; - float *delay_ptr; - float *window_ptr; - - /* 512 IMDCT with source and dest data in 'data' - * Pre IFFT complex multiply plus IFFT complex conjugate */ - - for( i=0; i < 128; i++) { - j = pm128[i]; - /* a = (data[256-2*j-1] - data[2*j]) * (xcos1[j] + xsin1[j]); - * c = data[2*j] * xcos1[j]; - * b = data[256-2*j-1] * xsin1[j]; - * buf1[i].real = a - b + c; - * buf1[i].imag = b + c; */ - p_imdct->buf[i].real = (data[256-2*j-1] * p_imdct->xcos1[j]) - (data[2*j] * p_imdct->xsin1[j]); - p_imdct->buf[i].imag = -1.0 * (data[2*j] * p_imdct->xcos1[j] + data[256-2*j-1] * p_imdct->xsin1[j]); - } - - fft_128p_c (&p_imdct->buf[0]); - - /* Post IFFT complex multiply plus IFFT complex conjugate */ - for (i=0; i < 128; i++) { - tmp_a_r = p_imdct->buf[i].real; - tmp_a_i = p_imdct->buf[i].imag; - /* a = (tmp_a_r - tmp_a_i) * (xcos1[j] + xsin1[j]); - * b = tmp_a_r * xsin1[j]; - * c = tmp_a_i * xcos1[j]; - * buf[j].real = a - b + c; - * buf[j].imag = b + c; */ - p_imdct->buf[i].real =(tmp_a_r * p_imdct->xcos1[i]) + (tmp_a_i * p_imdct->xsin1[i]); - p_imdct->buf[i].imag =(tmp_a_r * p_imdct->xsin1[i]) - (tmp_a_i * p_imdct->xcos1[i]); - } - - data_ptr = data; - delay_ptr = delay; - window_ptr = window; - - /* Window and convert to real valued signal */ - for (i=0; i< 64; i++) { - *data_ptr++ = -p_imdct->buf[64+i].imag * *window_ptr++ + *delay_ptr++; - *data_ptr++ = p_imdct->buf[64-i-1].real * *window_ptr++ + *delay_ptr++; - } - - for(i=0; i< 64; i++) { - *data_ptr++ = -p_imdct->buf[i].real * *window_ptr++ + *delay_ptr++; - *data_ptr++ = p_imdct->buf[128-i-1].imag * *window_ptr++ + *delay_ptr++; - } - - /* The trailing edge of the window goes into the delay line */ - delay_ptr = delay; - - for(i=0; i< 64; i++) { - *delay_ptr++ = -p_imdct->buf[64+i].real * *--window_ptr; - *delay_ptr++ = p_imdct->buf[64-i-1].imag * *--window_ptr; - } - - for(i=0; i<64; i++) { - *delay_ptr++ = p_imdct->buf[i].imag * *--window_ptr; - *delay_ptr++ = -p_imdct->buf[128-i-1].real * *--window_ptr; - } -} - - -void imdct_do_512_nol_c (imdct_t * p_imdct, float data[], float delay[]) -{ - int i, j; - - float tmp_a_i; - float tmp_a_r; - - float *data_ptr; - float *delay_ptr; - float *window_ptr; - - /* 512 IMDCT with source and dest data in 'data' - * Pre IFFT complex multiply plus IFFT cmplx conjugate */ - - for( i=0; i < 128; i++) { - /* z[i] = (X[256-2*i-1] + j * X[2*i]) * (xcos1[i] + j * xsin1[i]) */ - j = pm128[i]; - /* a = (data[256-2*j-1] - data[2*j]) * (xcos1[j] + xsin1[j]); - * c = data[2*j] * xcos1[j]; - * b = data[256-2*j-1] * xsin1[j]; - * buf1[i].real = a - b + c; - * buf1[i].imag = b + c; */ - p_imdct->buf[i].real = (data[256-2*j-1] * p_imdct->xcos1[j]) - (data[2*j] * p_imdct->xsin1[j]); - p_imdct->buf[i].imag = -1.0 * (data[2*j] * p_imdct->xcos1[j] + data[256-2*j-1] * p_imdct->xsin1[j]); - } - - fft_128p_c (&p_imdct->buf[0]); - - /* Post IFFT complex multiply plus IFFT complex conjugate*/ - for (i=0; i < 128; i++) { - /* y[n] = z[n] * (xcos1[n] + j * xsin1[n]) ; - * int j1 = i; */ - tmp_a_r = p_imdct->buf[i].real; - tmp_a_i = p_imdct->buf[i].imag; - /* a = (tmp_a_r - tmp_a_i) * (xcos1[j] + xsin1[j]); - * b = tmp_a_r * xsin1[j]; - * c = tmp_a_i * xcos1[j]; - * buf[j].real = a - b + c; - * buf[j].imag = b + c; */ - p_imdct->buf[i].real =(tmp_a_r * p_imdct->xcos1[i]) + (tmp_a_i * p_imdct->xsin1[i]); - p_imdct->buf[i].imag =(tmp_a_r * p_imdct->xsin1[i]) - (tmp_a_i * p_imdct->xcos1[i]); - } - - data_ptr = data; - delay_ptr = delay; - window_ptr = window; - - /* Window and convert to real valued signal, no overlap here*/ - for (i=0; i< 64; i++) { - *data_ptr++ = -p_imdct->buf[64+i].imag * *window_ptr++; - *data_ptr++ = p_imdct->buf[64-i-1].real * *window_ptr++; - } - - for(i=0; i< 64; i++) { - *data_ptr++ = -p_imdct->buf[i].real * *window_ptr++; - *data_ptr++ = p_imdct->buf[128-i-1].imag * *window_ptr++; - } - - /* The trailing edge of the window goes into the delay line */ - delay_ptr = delay; - - for(i=0; i< 64; i++) { - *delay_ptr++ = -p_imdct->buf[64+i].real * *--window_ptr; - *delay_ptr++ = p_imdct->buf[64-i-1].imag * *--window_ptr; - } - - for(i=0; i<64; i++) { - *delay_ptr++ = p_imdct->buf[i].imag * *--window_ptr; - *delay_ptr++ = -p_imdct->buf[128-i-1].real * *--window_ptr; - } -} diff --git a/src/ac3_decoder/ac3_imdct_sse.c b/src/ac3_decoder/ac3_imdct_sse.c deleted file mode 100644 index 7146057586..0000000000 --- a/src/ac3_decoder/ac3_imdct_sse.c +++ /dev/null @@ -1,642 +0,0 @@ -/***************************************************************************** - * ac3_imdct_sse.c: ac3 DCT - ***************************************************************************** - * Copyright (C) 1999, 2000 VideoLAN - * $Id: ac3_imdct_sse.c,v 1.1 2001/05/14 15:58:04 reno Exp $ - * - * Authors: Renaud Dartus - * Aaron Holtzman - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA. - *****************************************************************************/ - -#include "defs.h" - -#include -#include - -#include "config.h" -#include "common.h" -#include "threads.h" -#include "mtime.h" - -#include "intf_msg.h" /* intf_DbgMsg(), intf_ErrMsg() */ - -#include "stream_control.h" -#include "input_ext-dec.h" - -#include "ac3_decoder.h" - -#include "ac3_imdct_sse.h" - -static const float window[] = { - 0.00014, 0.00024, 0.00037, 0.00051, 0.00067, 0.00086, 0.00107, 0.00130, - 0.00157, 0.00187, 0.00220, 0.00256, 0.00297, 0.00341, 0.00390, 0.00443, - 0.00501, 0.00564, 0.00632, 0.00706, 0.00785, 0.00871, 0.00962, 0.01061, - 0.01166, 0.01279, 0.01399, 0.01526, 0.01662, 0.01806, 0.01959, 0.02121, - 0.02292, 0.02472, 0.02662, 0.02863, 0.03073, 0.03294, 0.03527, 0.03770, - 0.04025, 0.04292, 0.04571, 0.04862, 0.05165, 0.05481, 0.05810, 0.06153, - 0.06508, 0.06878, 0.07261, 0.07658, 0.08069, 0.08495, 0.08935, 0.09389, - 0.09859, 0.10343, 0.10842, 0.11356, 0.11885, 0.12429, 0.12988, 0.13563, - 0.14152, 0.14757, 0.15376, 0.16011, 0.16661, 0.17325, 0.18005, 0.18699, - 0.19407, 0.20130, 0.20867, 0.21618, 0.22382, 0.23161, 0.23952, 0.24757, - 0.25574, 0.26404, 0.27246, 0.28100, 0.28965, 0.29841, 0.30729, 0.31626, - 0.32533, 0.33450, 0.34376, 0.35311, 0.36253, 0.37204, 0.38161, 0.39126, - 0.40096, 0.41072, 0.42054, 0.43040, 0.44030, 0.45023, 0.46020, 0.47019, - 0.48020, 0.49022, 0.50025, 0.51028, 0.52031, 0.53033, 0.54033, 0.55031, - 0.56026, 0.57019, 0.58007, 0.58991, 0.59970, 0.60944, 0.61912, 0.62873, - 0.63827, 0.64774, 0.65713, 0.66643, 0.67564, 0.68476, 0.69377, 0.70269, - 0.71150, 0.72019, 0.72877, 0.73723, 0.74557, 0.75378, 0.76186, 0.76981, - 0.77762, 0.78530, 0.79283, 0.80022, 0.80747, 0.81457, 0.82151, 0.82831, - 0.83496, 0.84145, 0.84779, 0.85398, 0.86001, 0.86588, 0.87160, 0.87716, - 0.88257, 0.88782, 0.89291, 0.89785, 0.90264, 0.90728, 0.91176, 0.91610, - 0.92028, 0.92432, 0.92822, 0.93197, 0.93558, 0.93906, 0.94240, 0.94560, - 0.94867, 0.95162, 0.95444, 0.95713, 0.95971, 0.96217, 0.96451, 0.96674, - 0.96887, 0.97089, 0.97281, 0.97463, 0.97635, 0.97799, 0.97953, 0.98099, - 0.98236, 0.98366, 0.98488, 0.98602, 0.98710, 0.98811, 0.98905, 0.98994, - 0.99076, 0.99153, 0.99225, 0.99291, 0.99353, 0.99411, 0.99464, 0.99513, - 0.99558, 0.99600, 0.99639, 0.99674, 0.99706, 0.99736, 0.99763, 0.99788, - 0.99811, 0.99831, 0.99850, 0.99867, 0.99882, 0.99895, 0.99908, 0.99919, - 0.99929, 0.99938, 0.99946, 0.99953, 0.99959, 0.99965, 0.99969, 0.99974, - 0.99978, 0.99981, 0.99984, 0.99986, 0.99988, 0.99990, 0.99992, 0.99993, - 0.99994, 0.99995, 0.99996, 0.99997, 0.99998, 0.99998, 0.99998, 0.99999, - 0.99999, 0.99999, 0.99999, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000, - 1.00000, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000 -}; - -static const int pm128[128] = -{ - 0, 16, 32, 48, 64, 80, 96, 112, 8, 40, 72, 104, 24, 56, 88, 120, - 4, 20, 36, 52, 68, 84, 100, 116, 12, 28, 44, 60, 76, 92, 108, 124, - 2, 18, 34, 50, 66, 82, 98, 114, 10, 42, 74, 106, 26, 58, 90, 122, - 6, 22, 38, 54, 70, 86, 102, 118, 14, 46, 78, 110, 30, 62, 94, 126, - 1, 17, 33, 49, 65, 81, 97, 113, 9, 41, 73, 105, 25, 57, 89, 121, - 5, 21, 37, 53, 69, 85, 101, 117, 13, 29, 45, 61, 77, 93, 109, 125, - 3, 19, 35, 51, 67, 83, 99, 115, 11, 43, 75, 107, 27, 59, 91, 123, - 7, 23, 39, 55, 71, 87, 103, 119, 15, 31, 47, 63, 79, 95, 111, 127 -}; - -void fft_64p_sse (complex_t *x); -void fft_128p_sse(complex_t *a); -static void imdct512_pre_ifft_twiddle_sse (const int *pmt, complex_t *buf, float *data, float *xcos_sin_sse); -static void imdct512_post_ifft_twiddle_sse (complex_t *buf, float *xcos_sin_sse); -static void imdct512_window_delay_sse (complex_t *buf, float *data_ptr, float *window_prt, float *delay_prt); -static void imdct512_window_delay_nol_sse (complex_t *buf, float *data_ptr, float *window_prt, float *delay_prt); - - -int imdct_init_sse (imdct_t * p_imdct) -{ - int i; - float scale = 181.019; - - intf_WarnMsg (1, "ac3dec: using MMX_SSE for imdct"); - p_imdct->imdct_do_512 = imdct_do_512_sse; - p_imdct->imdct_do_512_nol = imdct_do_512_nol_sse; - p_imdct->fft_64p = fft_64p_sse; - - for (i=0; i < 128; i++) - { - float xcos_i = cos(2.0f * M_PI * (8*i+1)/(8*N)) * scale; - float xsin_i = sin(2.0f * M_PI * (8*i+1)/(8*N)) * scale; - p_imdct->xcos_sin_sse[i * 4] = xcos_i; - p_imdct->xcos_sin_sse[i * 4 + 1] = -xsin_i; - p_imdct->xcos_sin_sse[i * 4 + 2] = -xsin_i; - p_imdct->xcos_sin_sse[i * 4 + 3] = -xcos_i; - } - return 0; -} - -void imdct_do_512_sse (imdct_t * p_imdct, float data[], float delay[]) -{ - imdct512_pre_ifft_twiddle_sse (pm128, p_imdct->buf, data, p_imdct->xcos_sin_sse); - fft_128p_sse (p_imdct->buf); - imdct512_post_ifft_twiddle_sse (p_imdct->buf, p_imdct->xcos_sin_sse); - imdct512_window_delay_sse (p_imdct->buf, data, window, delay); -} - - -void imdct_do_512_nol_sse (imdct_t * p_imdct, float data[], float delay[]) -{ - imdct512_pre_ifft_twiddle_sse (pm128, p_imdct->buf, data, p_imdct->xcos_sin_sse); - fft_128p_sse (p_imdct->buf); - imdct512_post_ifft_twiddle_sse (p_imdct->buf, p_imdct->xcos_sin_sse); - imdct512_window_delay_nol_sse (p_imdct->buf, data, window, delay); -} - -static void imdct512_pre_ifft_twiddle_sse (const int *pmt, complex_t *buf, float *data, float *xcos_sin_sse) -{ - __asm__ __volatile__ ( - "pushl %%ebp\n" - "movl %%esp, %%ebp\n" - "addl $-4, %%esp\n" /* local variable, loop counter */ - - "pushl %%eax\n" - "pushl %%ebx\n" - "pushl %%ecx\n" - "pushl %%edx\n" - "pushl %%edi\n" - "pushl %%esi\n" - - "movl 8(%%ebp), %%eax\n" /* pmt */ - "movl 12(%%ebp), %%ebx\n" /* buf */ - "movl 16(%%ebp), %%ecx\n" /* data */ - "movl 20(%%ebp), %%edx\n" /* xcos_sin_sse */ - "movl $64, -4(%%ebp)\n" - -".loop:\n" - "movl (%%eax), %%esi\n" - "movl 4(%%eax), %%edi\n" - "movss (%%ecx, %%esi, 8), %%xmm1\n" /* 2j */ - "movss (%%ecx, %%edi, 8), %%xmm3\n" /* 2(j+1) */ - - "shll $1, %%esi\n" - "shll $1, %%edi\n" - - "movups (%%edx, %%esi, 8), %%xmm0\n" /* -c_j | -s_j | -s_j | c_j */ - "movups (%%edx, %%edi, 8), %%xmm2\n" /* -c_j+1 | -s_j+1 | -s_j+1 | c_j+1 */ - - "negl %%esi\n" - "negl %%edi\n" - - "movss 1020(%%ecx, %%esi, 4), %%xmm4\n" /* 255-2j */ - "addl $8, %%eax\n" - "movss 1020(%%ecx, %%edi, 4), %%xmm5\n" /* 255-2(j+1) */ - - "shufps $0, %%xmm1, %%xmm4\n" /* 2j | 2j | 255-2j | 255-2j */ - "shufps $0, %%xmm3, %%xmm5\n" /* 2(j+1) | 2(j+1) | 255-2(j+1) | 255-2(j+1) */ - "mulps %%xmm4, %%xmm0\n" - "mulps %%xmm5, %%xmm2\n" - "movhlps %%xmm0, %%xmm1\n" - "movhlps %%xmm2, %%xmm3\n" - "addl $16, %%ebx\n" - "addps %%xmm1, %%xmm0\n" - "addps %%xmm3, %%xmm2\n" - "movlhps %%xmm2, %%xmm0\n" - - "movups %%xmm0, -16(%%ebx)\n" - "decl -4(%%ebp)\n" - "jnz .loop\n" - - "popl %%esi\n" - "popl %%edi\n" - "popl %%edx\n" - "popl %%ecx\n" - "popl %%ebx\n" - "popl %%eax\n" - - "addl $4, %%esp\n" - "popl %%ebp\n" - ::); -} - -static void imdct512_post_ifft_twiddle_sse (complex_t *buf, float *xcos_sin_sse) -{ - __asm__ __volatile__ ( - "pushl %%ecx\n" - "movl $32, %%ecx\n" /* loop counter */ - -".loop1:\n" - "movups (%%eax), %%xmm0\n" /* im1 | re1 | im0 | re0 */ - - "movups (%%ebx), %%xmm2\n" /* -c | -s | -s | c */ - "movhlps %%xmm0, %%xmm1\n" /* im1 | re1 */ - "movups 16(%%ebx), %%xmm3\n" /* -c1 | -s1 | -s1 | c1 */ - - "shufps $0x50, %%xmm0, %%xmm0\n" /* im0 | im0 | re0 | re0 */ - "shufps $0x50, %%xmm1, %%xmm1\n" /* im1 | im1 | re1 | re1 */ - - "movups 16(%%eax), %%xmm4\n" /* im3 | re3 | im2 | re2 */ - - "shufps $0x27, %%xmm2, %%xmm2\n" /* c | -s | -s | -c */ - "movhlps %%xmm4, %%xmm5\n" /* im3 | re3 */ - "shufps $0x27, %%xmm3, %%xmm3\n" /* c1 | -s1 | -s1 | -c1 */ - - "movups 32(%%ebx), %%xmm6\n" /* -c2 | -s2 | -s2 | c2 */ - "movups 48(%%ebx), %%xmm7\n" /* -c3 | -s3 | -s3 | c3 */ - - "shufps $0x50, %%xmm4, %%xmm4\n" /* im2 | im2 | re2 | re2 */ - "shufps $0x50, %%xmm5, %%xmm5\n" /* im3 | im3 | re3 | re3 */ - - "mulps %%xmm2, %%xmm0\n" - "mulps %%xmm3, %%xmm1\n" - - "shufps $0x27, %%xmm6, %%xmm6\n" /* c2 | -s2 | -s2 | -c2 */ - "shufps $0x27, %%xmm7, %%xmm7\n" /* c3 | -s3 | -s3 | -c3 */ - - "movhlps %%xmm0, %%xmm2\n" - "movhlps %%xmm1, %%xmm3\n" - - "mulps %%xmm6, %%xmm4\n" - "mulps %%xmm7, %%xmm5\n" - - "addps %%xmm2, %%xmm0\n" - "addps %%xmm3, %%xmm1\n" - - "movhlps %%xmm4, %%xmm6\n" - "movhlps %%xmm5, %%xmm7\n" - - "addps %%xmm6, %%xmm4\n" - "addps %%xmm7, %%xmm5\n" - - "movlhps %%xmm1, %%xmm0\n" - "movlhps %%xmm5, %%xmm4\n" - - "movups %%xmm0, (%%eax)\n" - "movups %%xmm4, 16(%%eax)\n" - "addl $64, %%ebx\n" - "addl $32, %%eax\n" - "decl %%ecx\n" - "jnz .loop1\n" - - "popl %%ecx\n" - : "=a" (buf) - : "a" (buf), "b" (xcos_sin_sse) ); -} - -static void imdct512_window_delay_sse (complex_t *buf, float *data_ptr, float *window_prt, float *delay_prt) -{ - __asm__ __volatile__ ( - "pushl %%ebp\n" - "movl %%esp, %%ebp\n" - - "pushl %%eax\n" - "pushl %%ebx\n" - "pushl %%ecx\n" - "pushl %%edx\n" - "pushl %%esi\n" - "pushl %%edi\n" - - "movl 20(%%ebp), %%ebx\n" /* delay */ - "movl 16(%%ebp), %%edx\n" /* window */ - - "movl 8(%%ebp), %%eax\n" /* buf */ - "movl $16, %%ecx\n" /* loop count */ - "leal 516(%%eax), %%esi\n" /* buf[64].im */ - "leal 504(%%eax), %%edi\n" /* buf[63].re */ - "movl 12(%%ebp), %%eax\n" /* data */ - -".first_128_samples:\n" - "movss (%%esi), %%xmm0\n" - "movss 8(%%esi), %%xmm2\n" - "movss (%%edi), %%xmm1\n" - "movss -8(%%edi), %%xmm3\n" - - "movlhps %%xmm2, %%xmm0\n" /* 0.0 | im1 | 0.0 | im0 */ - "movlhps %%xmm3, %%xmm1\n" /* 0.0 | re1 | 0.0 | re0 */ - - "movups (%%edx), %%xmm4\n" /* w3 | w2 | w1 | w0 */ - "movups (%%ebx), %%xmm5\n" /* d3 | d2 | d1 | d0 */ - "shufps $0xb1, %%xmm1, %%xmm1\n"/* re1 | 0.0 | re0 | 0.0 */ - - "movss 16(%%esi), %%xmm6\n" /* im2 */ - "movss 24(%%esi), %%xmm7\n" /* im3 */ - "subps %%xmm1, %%xmm0\n" /* -re1 | im1 | -re0 | im0 */ - "movss -16(%%edi), %%xmm2\n" /* re2 */ - "movss -24(%%edi), %%xmm3\n" /* re3 */ - "mulps %%xmm4, %%xmm0\n" - "movlhps %%xmm7, %%xmm6\n" /* 0.0 | im3 | 0.0 | im2 */ - "movlhps %%xmm3, %%xmm2\n" /* 0.0 | re3 | 0.0 | re2 */ - "addps %%xmm5, %%xmm0\n" - "shufps $0xb1, %%xmm2, %%xmm2\n"/* re3 | 0.0 | re2 | 0.0 */ - "movups 16(%%edx), %%xmm4\n" /* w7 | w6 | w5 | w4 */ - "movups 16(%%ebx), %%xmm5\n" /* d7 | d6 | d5 | d4 */ - "subps %%xmm2, %%xmm6\n" /* -re3 | im3 | -re2 | im2 */ - "addl $32, %%edx\n" - "movups %%xmm0, (%%eax)\n" - "addl $32, %%ebx\n" - "mulps %%xmm4, %%xmm6\n" - "addl $32, %%esi\n" - "addl $32, %%eax\n" - "addps %%xmm5, %%xmm6\n" - "addl $-32, %%edi\n" - "movups %%xmm6, -16(%%eax)\n" - "decl %%ecx\n" - "jnz .first_128_samples\n" - - "movl 8(%%ebp), %%esi\n" /* buf[0].re */ - "leal 1020(%%esi), %%edi\n" /* buf[127].im */ - "movl $16, %%ecx\n" /* loop count */ - -".second_128_samples:\n" - "movss (%%esi), %%xmm0\n" /* buf[i].re */ - "movss 8(%%esi), %%xmm2\n" /* re1 */ - "movss (%%edi), %%xmm1\n" /* buf[127-i].im */ - "movss -8(%%edi), %%xmm3\n" /* im1 */ - - "movlhps %%xmm2, %%xmm0\n" /* 0.0 | re1 | 0.0 | re0 */ - "movlhps %%xmm3, %%xmm1\n" /* 0.0 | im1 | 0.0 | im1 */ - - "movups (%%edx), %%xmm4\n" /* w3 | w2 | w1 | w0 */ - "movups (%%ebx), %%xmm5\n" /* d3 | d2 | d1 | d0 */ - - "shufps $0xb1, %%xmm1, %%xmm1\n"/* im1 | 0.0 | im0 | 0.0 */ - "movss 16(%%esi), %%xmm6\n" /* re2 */ - "movss 24(%%esi), %%xmm7\n" /* re3 */ - "movss -16(%%edi), %%xmm2\n" /* im2 */ - "movss -24(%%edi), %%xmm3\n" /* im3 */ - "subps %%xmm1, %%xmm0\n" /* -im1 | re1 | -im0 | re0 */ - "movlhps %%xmm7, %%xmm6\n" /* 0.0 | re3 | 0.0 | re2 */ - "movlhps %%xmm3, %%xmm2\n" /* 0.0 | im3 | 0.0 | im2 */ - "mulps %%xmm4, %%xmm0\n" - "shufps $0xb1, %%xmm2, %%xmm2\n"/* im3 | 0.0 | im2 | 0.0 */ - "movups 16(%%edx), %%xmm4\n" /* w7 | w6 | w5 | w4 */ - "addl $32, %%esi\n" - "subps %%xmm2, %%xmm6\n" /* -im3 | re3 | -im2 | re2 */ - "addps %%xmm5, %%xmm0\n" - "mulps %%xmm4, %%xmm6\n" - "addl $-32, %%edi\n" - "movups 16(%%ebx), %%xmm5\n" /* d7 | d6 | d5 | d4 */ - "movups %%xmm0, (%%eax)\n" - "addps %%xmm5, %%xmm6\n" - "addl $32, %%edx\n" - "addl $32, %%eax\n" - "addl $32, %%ebx\n" - "movups %%xmm6, -16(%%eax)\n" - "decl %%ecx\n" - "jnz .second_128_samples\n" - - "movl 8(%%ebp), %%eax\n" - "leal 512(%%eax), %%esi\n" /* buf[64].re */ - "leal 508(%%eax), %%edi\n" /* buf[63].im */ - "movl $16, %%ecx\n" /* loop count */ - "movl 20(%%ebp), %%eax\n" /* delay */ - -".first_128_delay:\n" - "movss (%%esi), %%xmm0\n" - "movss 8(%%esi), %%xmm2\n" - "movss (%%edi), %%xmm1\n" - "movss -8(%%edi), %%xmm3\n" - - "movlhps %%xmm2, %%xmm0\n" /* 0.0 | re1 | 0.0 | re0 */ - "movlhps %%xmm3, %%xmm1\n" /* 0.0 | im1 | 0.0 | im0 */ - - "movups -16(%%edx), %%xmm4\n" /* w3 | w2 | w1 | w0 */ - "shufps $0xb1, %%xmm1, %%xmm1\n"/* im1 | 0.0 | im0 | 0.0 */ - "movss 16(%%esi), %%xmm6\n" /* re2 */ - "movss 24(%%esi), %%xmm7\n" /* re3 */ - "movss -16(%%edi), %%xmm2\n" /* im2 */ - "movss -24(%%edi), %%xmm3\n" /* im3 */ - "subps %%xmm1, %%xmm0\n" /* -im1 | re1 | -im0 | re0 */ - "addl $-32, %%edx\n" - "movlhps %%xmm7, %%xmm6\n" /* 0.0 | re3 | 0.0 | re2 */ - "movlhps %%xmm3, %%xmm2\n" /* 0.0 | im3 | 0.0 | im2 */ - "mulps %%xmm4, %%xmm0\n" - "movups (%%edx), %%xmm5\n" /* w7 | w6 | w5 | w4 */ - "shufps $0xb1, %%xmm2, %%xmm2\n"/* im3 | 0.0 | im2 | 0.0 */ - "movups %%xmm0, (%%eax)\n" - "addl $32, %%esi\n" - "subps %%xmm2, %%xmm6\n" /* -im3 | re3 | -im2 | re2 */ - "addl $-32, %%edi\n" - "mulps %%xmm5, %%xmm6\n" - "addl $32, %%eax\n" - "movups %%xmm6, -16(%%eax)\n" - "decl %%ecx\n" - "jnz .first_128_delay\n" - - "movl 8(%%ebp), %%ebx\n" - "leal 4(%%ebx), %%esi\n" /* buf[0].im */ - "leal 1016(%%ebx), %%edi\n" /* buf[127].re */ - "movl $16, %%ecx\n" /* loop count */ - -".second_128_delay:\n" - "movss (%%esi), %%xmm0\n" - "movss 8(%%esi), %%xmm2\n" - "movss (%%edi), %%xmm1\n" - "movss -8(%%edi), %%xmm3\n" - - "movlhps %%xmm2, %%xmm0\n" /* 0.0 | im1 | 0.0 | im0 */ - "movlhps %%xmm3, %%xmm1\n" /* 0.0 | re1 | 0.0 | re0 */ - - "movups -16(%%edx), %%xmm4\n" /* w3 | w2 | w1 | w0 */ - "shufps $0xb1, %%xmm1, %%xmm1\n"/* re1 | 0.0 | re0 | 0.0 */ - "movss 16(%%esi), %%xmm6\n" /* im2 */ - "movss 24(%%esi), %%xmm7\n" /* im3 */ - "movss -16(%%edi), %%xmm2\n" /* re2 */ - "movss -24(%%edi), %%xmm3\n" /* re3 */ - "subps %%xmm0, %%xmm1\n" /* re1 | -im1 | re0 | -im0 */ - "addl $-32, %%edx\n" - "movlhps %%xmm7, %%xmm6\n" /* 0.0 | im3 | 0.0 | im2 */ - "movlhps %%xmm3, %%xmm2\n" /* 0.0 | re3 | 0.0 | re2 */ - "mulps %%xmm4, %%xmm1\n" - "movups (%%edx), %%xmm5\n" /* w7 | w6 | w5 | w4 */ - "shufps $0xb1, %%xmm2, %%xmm2\n"/* re3 | 0.0 | re2 | 0.0 */ - "movups %%xmm1, (%%eax)\n" - "addl $32, %%esi\n" - "subps %%xmm6, %%xmm2\n" /* re | -im3 | re | -im2 */ - "addl $-32, %%edi\n" - "mulps %%xmm5, %%xmm2\n" - "addl $32, %%eax\n" - "movups %%xmm2, -16(%%eax)\n" - "decl %%ecx\n" - "jnz .second_128_delay\n" - - "popl %%edi\n" - "popl %%esi\n" - "popl %%edx\n" - "popl %%ecx\n" - "popl %%ebx\n" - "popl %%eax\n" - - "leave\n" - ::); -} - -static void imdct512_window_delay_nol_sse (complex_t *buf, float *data_ptr, float *window_prt, float *delay_prt) -{ - __asm__ __volatile__ ( - "pushl %%ebp\n" - "movl %%esp, %%ebp\n" - - "pushl %%eax\n" - "pushl %%ebx\n" - "pushl %%ecx\n" - "pushl %%edx\n" - "pushl %%esi\n" - "pushl %%edi\n" - - /* movl 20(%%ebp), %%ebx delay */ - "movl 16(%%ebp), %%edx\n" /* window */ - - "movl 8(%%ebp), %%eax\n" /* buf */ - "movl $16, %%ecx\n" /* loop count */ - "leal 516(%%eax), %%esi\n" /* buf[64].im */ - "leal 504(%%eax), %%edi\n" /* buf[63].re */ - "movl 12(%%ebp), %%eax\n" /* data */ - -".first_128_sample:\n" - "movss (%%esi), %%xmm0\n" - "movss 8(%%esi), %%xmm2\n" - "movss (%%edi), %%xmm1\n" - "movss -8(%%edi), %%xmm3\n" - - "movlhps %%xmm2, %%xmm0\n" /* 0.0 | im1 | 0.0 | im0 */ - "movlhps %%xmm3, %%xmm1\n" /* 0.0 | re1 | 0.0 | re0 */ - - "movups (%%edx), %%xmm4\n" /* w3 | w2 | w1 | w0 */ - /* movups (%%ebx), %%xmm5 d3 | d2 | d1 | d0 */ - "shufps $0xb1, %%xmm1, %%xmm1\n"/* re1 | 0.0 | re0 | 0.0 */ - - "movss 16(%%esi), %%xmm6\n" /* im2 */ - "movss 24(%%esi), %%xmm7\n" /* im3 */ - "subps %%xmm1, %%xmm0\n" /* -re1 | im1 | -re0 | im0 */ - "movss -16(%%edi), %%xmm2\n" /* re2 */ - "movss -24(%%edi), %%xmm3\n" /* re3 */ - "mulps %%xmm4, %%xmm0\n" - "movlhps %%xmm7, %%xmm6\n" /* 0.0 | im3 | 0.0 | im2 */ - "movlhps %%xmm3, %%xmm2\n" /* 0.0 | re3 | 0.0 | re2 */ - /* addps %%xmm5, %%xmm0 */ - "shufps $0xb1, %%xmm2, %%xmm2\n"/* re3 | 0.0 | re2 | 0.0 */ - "movups 16(%%edx), %%xmm4\n" /* w7 | w6 | w5 | w4 */ - /* movups 16(%%ebx), %%xmm5 d7 | d6 | d5 | d4 */ - "subps %%xmm2, %%xmm6\n" /* -re3 | im3 | -re2 | im2 */ - "addl $32, %%edx\n" - "movups %%xmm0, (%%eax)\n" - /* addl $32, %%ebx */ - "mulps %%xmm4, %%xmm6\n" - "addl $32, %%esi\n" - "addl $32, %%eax\n" - /* addps %%xmm5, %%xmm6 */ - "addl $-32, %%edi\n" - "movups %%xmm6, -16(%%eax)\n" - "decl %%ecx\n" - "jnz .first_128_sample\n" - - "movl 8(%%ebp), %%esi\n" /* buf[0].re */ - "leal 1020(%%esi), %%edi\n" /* buf[127].im */ - "movl $16, %%ecx\n" /* loop count */ - -".second_128_sample:\n" - "movss (%%esi), %%xmm0\n" /* buf[i].re */ - "movss 8(%%esi), %%xmm2\n" /* re1 */ - "movss (%%edi), %%xmm1\n" /* buf[127-i].im */ - "movss -8(%%edi), %%xmm3\n" /* im1 */ - - "movlhps %%xmm2, %%xmm0\n" /* 0.0 | re1 | 0.0 | re0 */ - "movlhps %%xmm3, %%xmm1\n" /* 0.0 | im1 | 0.0 | im1 */ - - "movups (%%edx), %%xmm4\n" /* w3 | w2 | w1 | w0 */ - /* movups (%%ebx), %%xmm5 d3 | d2 | d1 | d0 */ - - "shufps $0xb1, %%xmm1, %%xmm1\n"/* im1 | 0.0 | im0 | 0.0 */ - "movss 16(%%esi), %%xmm6\n" /* re2 */ - "movss 24(%%esi), %%xmm7\n" /* re3 */ - "movss -16(%%edi), %%xmm2\n" /* im2 */ - "movss -24(%%edi), %%xmm3\n" /* im3 */ - "subps %%xmm1, %%xmm0\n" /* -im1 | re1 | -im0 | re0 */ - "movlhps %%xmm7, %%xmm6\n" /* 0.0 | re3 | 0.0 | re2 */ - "movlhps %%xmm3, %%xmm2\n" /* 0.0 | im3 | 0.0 | im2 */ - "mulps %%xmm4, %%xmm0\n" - "shufps $0xb1, %%xmm2, %%xmm2\n"/* im3 | 0.0 | im2 | 0.0 */ - "movups 16(%%edx), %%xmm4\n" /* w7 | w6 | w5 | w4 */ - "addl $32, %%esi\n" - "subps %%xmm2, %%xmm6\n" /* -im3 | re3 | -im2 | re2 */ - /* addps %%xmm5, %%xmm0 */ - "mulps %%xmm4, %%xmm6\n" - "addl $-32, %%edi\n" - /* movups 16(%%ebx), %%xmm5 d7 | d6 | d5 | d4 */ - "movups %%xmm0, (%%eax)\n" - /* addps %%xmm5, %%xmm6 */ - "addl $32, %%edx\n" - "addl $32, %%eax\n" - /* addl $32, %%ebx */ - "movups %%xmm6, -16(%%eax)\n" - "decl %%ecx\n" - "jnz .second_128_sample\n" - - "movl 8(%%ebp), %%eax\n" - "leal 512(%%eax), %%esi\n" /* buf[64].re */ - "leal 508(%%eax), %%edi\n" /* buf[63].im */ - "movl $16, %%ecx\n" /* loop count */ - "movl 20(%%ebp), %%eax\n" /* delay */ - -".first_128_delays:\n" - "movss (%%esi), %%xmm0\n" - "movss 8(%%esi), %%xmm2\n" - "movss (%%edi), %%xmm1\n" - "movss -8(%%edi), %%xmm3\n" - - "movlhps %%xmm2, %%xmm0\n" /* 0.0 | re1 | 0.0 | re0 */ - "movlhps %%xmm3, %%xmm1\n" /* 0.0 | im1 | 0.0 | im0 */ - - "movups -16(%%edx), %%xmm4\n" /* w3 | w2 | w1 | w0 */ - "shufps $0xb1, %%xmm1, %%xmm1\n"/* im1 | 0.0 | im0 | 0.0 */ - "movss 16(%%esi), %%xmm6\n" /* re2 */ - "movss 24(%%esi), %%xmm7\n" /* re3 */ - "movss -16(%%edi), %%xmm2\n" /* im2 */ - "movss -24(%%edi), %%xmm3\n" /* im3 */ - "subps %%xmm1, %%xmm0\n" /* -im1 | re1 | -im0 | re0 */ - "addl $-32, %%edx\n" - "movlhps %%xmm7, %%xmm6\n" /* 0.0 | re3 | 0.0 | re2 */ - "movlhps %%xmm3, %%xmm2\n" /* 0.0 | im3 | 0.0 | im2 */ - "mulps %%xmm4, %%xmm0\n" - "movups (%%edx), %%xmm5\n" /* w7 | w6 | w5 | w4 */ - "shufps $0xb1, %%xmm2, %%xmm2\n"/* im3 | 0.0 | im2 | 0.0 */ - "movups %%xmm0, (%%eax)\n" - "addl $32, %%esi\n" - "subps %%xmm2, %%xmm6\n" /* -im3 | re3 | -im2 | re2 */ - "addl $-32, %%edi\n" - "mulps %%xmm5, %%xmm6\n" - "addl $32, %%eax\n" - "movups %%xmm6, -16(%%eax)\n" - "decl %%ecx\n" - "jnz .first_128_delays\n" - - "movl 8(%%ebp), %%ebx\n" - "leal 4(%%ebx), %%esi\n" /* buf[0].im */ - "leal 1016(%%ebx), %%edi\n" /* buf[127].re */ - "movl $16, %%ecx\n" /* loop count */ - -".second_128_delays:\n" - "movss (%%esi), %%xmm0\n" - "movss 8(%%esi), %%xmm2\n" - "movss (%%edi), %%xmm1\n" - "movss -8(%%edi), %%xmm3\n" - - "movlhps %%xmm2, %%xmm0\n" /* 0.0 | im1 | 0.0 | im0 */ - "movlhps %%xmm3, %%xmm1\n" /* 0.0 | re1 | 0.0 | re0 */ - - "movups -16(%%edx), %%xmm4\n" /* w3 | w2 | w1 | w0 */ - "shufps $0xb1, %%xmm1, %%xmm1\n"/* re1 | 0.0 | re0 | 0.0 */ - "movss 16(%%esi), %%xmm6\n" /* im2 */ - "movss 24(%%esi), %%xmm7\n" /* im3 */ - "movss -16(%%edi), %%xmm2\n" /* re2 */ - "movss -24(%%edi), %%xmm3\n" /* re3 */ - "subps %%xmm0, %%xmm1\n" /* re1 | -im1 | re0 | -im0 */ - "addl $-32, %%edx\n" - "movlhps %%xmm7, %%xmm6\n" /* 0.0 | im3 | 0.0 | im2 */ - "movlhps %%xmm3, %%xmm2\n" /* 0.0 | re3 | 0.0 | re2 */ - "mulps %%xmm4, %%xmm1\n" - "movups (%%edx), %%xmm5\n" /* w7 | w6 | w5 | w4 */ - "shufps $0xb1, %%xmm2, %%xmm2\n"/* re3 | 0.0 | re2 | 0.0 */ - "movups %%xmm1, (%%eax)\n" - "addl $32, %%esi\n" - "subps %%xmm6, %%xmm2\n" /* re | -im3 | re | -im2 */ - "addl $-32, %%edi\n" - "mulps %%xmm5, %%xmm2\n" - "addl $32, %%eax\n" - "movups %%xmm2, -16(%%eax)\n" - "decl %%ecx\n" - "jnz .second_128_delays\n" - - "popl %%edi\n" - "popl %%esi\n" - "popl %%edx\n" - "popl %%ecx\n" - "popl %%ebx\n" - "popl %%eax\n" - - "leave\n" - ::); -} diff --git a/src/ac3_decoder/ac3_imdct_sse.h b/src/ac3_decoder/ac3_imdct_sse.h deleted file mode 100644 index 703f7ccd26..0000000000 --- a/src/ac3_decoder/ac3_imdct_sse.h +++ /dev/null @@ -1,3 +0,0 @@ -int imdct_init_sse (imdct_t * p_imdct); -void imdct_do_512_sse(imdct_t * p_imdct, float data[], float delay[]); -void imdct_do_512_nol_sse(imdct_t * p_imdct, float data[], float delay[]); diff --git a/src/ac3_decoder/ac3_internal.h b/src/ac3_decoder/ac3_internal.h index b6e5bdca62..08419cf2cf 100644 --- a/src/ac3_decoder/ac3_internal.h +++ b/src/ac3_decoder/ac3_internal.h @@ -2,7 +2,7 @@ * ac3_internals.h: needed by the ac3 decoder ***************************************************************************** * Copyright (C) 2000 VideoLAN - * $Id: ac3_internal.h,v 1.9 2001/05/14 15:58:04 reno Exp $ + * $Id: ac3_internal.h,v 1.10 2001/05/15 16:19:42 sam Exp $ * * Authors: Michel Lespinasse * @@ -36,9 +36,6 @@ /* ac3_bit_allocate.c */ void bit_allocate (ac3dec_t *); -/* ac3_downmix.c */ -void downmix_init (downmix_t * p_downmix); - /* ac3_exponent.c */ int exponent_unpack (ac3dec_t *); @@ -56,3 +53,4 @@ void parse_auxdata (ac3dec_t *); /* ac3_rematrix.c */ void rematrix (ac3dec_t *); + diff --git a/src/ac3_decoder/ac3_mantissa.c b/src/ac3_decoder/ac3_mantissa.c index b95e5e5952..2f829bfa24 100644 --- a/src/ac3_decoder/ac3_mantissa.c +++ b/src/ac3_decoder/ac3_mantissa.c @@ -2,7 +2,7 @@ * ac3_mantissa.c: ac3 mantissa computation ***************************************************************************** * Copyright (C) 1999, 2000, 2001 VideoLAN - * $Id: ac3_mantissa.c,v 1.28 2001/05/14 15:58:04 reno Exp $ + * $Id: ac3_mantissa.c,v 1.29 2001/05/15 16:19:42 sam Exp $ * * Authors: Michel Kaempf * Aaron Holtzman @@ -23,6 +23,9 @@ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA. *****************************************************************************/ +/***************************************************************************** + * Preamble + *****************************************************************************/ #include "defs.h" #include /* memcpy() */ @@ -32,393 +35,18 @@ #include "threads.h" #include "mtime.h" +#include "intf_msg.h" + #include "stream_control.h" #include "input_ext-dec.h" #include "audio_output.h" +#include "ac3_imdct.h" +#include "ac3_downmix.h" #include "ac3_decoder.h" -#include "intf_msg.h" - -#define Q0 ((-2 << 15) / 3.0) -#define Q1 (0) -#define Q2 ((2 << 15) / 3.0) -static const float q_1_0[ 32 ] = -{ - Q0, Q0, Q0, Q0, Q0, Q0, Q0, Q0, Q0, - Q1, Q1, Q1, Q1, Q1, Q1, Q1, Q1, Q1, - Q2, Q2, Q2, Q2, Q2, Q2, Q2, Q2, Q2, - 0, 0, 0, 0, 0 -}; -static const float q_1_1[ 32 ] = -{ - Q0, Q0, Q0, Q1, Q1, Q1, Q2, Q2, Q2, - Q0, Q0, Q0, Q1, Q1, Q1, Q2, Q2, Q2, - Q0, Q0, Q0, Q1, Q1, Q1, Q2, Q2, Q2, - 0, 0, 0, 0, 0 -}; -static const float q_1_2[ 32 ] = -{ - Q0, Q1, Q2, Q0, Q1, Q2, Q0, Q1, Q2, - Q0, Q1, Q2, Q0, Q1, Q2, Q0, Q1, Q2, - Q0, Q1, Q2, Q0, Q1, Q2, Q0, Q1, Q2, - 0, 0, 0, 0, 0 -}; -#undef Q0 -#undef Q1 -#undef Q2 - -#define Q0 ((-4 << 15) / 5.0) -#define Q1 ((-2 << 15) / 5.0) -#define Q2 (0) -#define Q3 ((2 << 15) / 5.0) -#define Q4 ((4 << 15) / 5.0) -static const float q_2_0[ 128 ] = -{ - Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0, - Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1, - Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2, - Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3, - Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4, - 0, 0, 0 -}; -static const float q_2_1[ 128 ] = -{ - Q0,Q0,Q0,Q0,Q0,Q1,Q1,Q1,Q1,Q1,Q2,Q2,Q2,Q2,Q2,Q3,Q3,Q3,Q3,Q3,Q4,Q4,Q4,Q4,Q4, - Q0,Q0,Q0,Q0,Q0,Q1,Q1,Q1,Q1,Q1,Q2,Q2,Q2,Q2,Q2,Q3,Q3,Q3,Q3,Q3,Q4,Q4,Q4,Q4,Q4, - Q0,Q0,Q0,Q0,Q0,Q1,Q1,Q1,Q1,Q1,Q2,Q2,Q2,Q2,Q2,Q3,Q3,Q3,Q3,Q3,Q4,Q4,Q4,Q4,Q4, - Q0,Q0,Q0,Q0,Q0,Q1,Q1,Q1,Q1,Q1,Q2,Q2,Q2,Q2,Q2,Q3,Q3,Q3,Q3,Q3,Q4,Q4,Q4,Q4,Q4, - Q0,Q0,Q0,Q0,Q0,Q1,Q1,Q1,Q1,Q1,Q2,Q2,Q2,Q2,Q2,Q3,Q3,Q3,Q3,Q3,Q4,Q4,Q4,Q4,Q4, - 0, 0, 0 -}; -static const float q_2_2[ 128 ] = -{ - Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4, - Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4, - Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4, - Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4, - Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4, - 0, 0, 0 -}; -#undef Q0 -#undef Q1 -#undef Q2 -#undef Q3 -#undef Q4 - -#define Q0 ((-10 << 15) / 11.0) -#define Q1 ((-8 << 15) / 11.0) -#define Q2 ((-6 << 15) / 11.0) -#define Q3 ((-4 << 15) / 11.0) -#define Q4 ((-2 << 15) / 11.0) -#define Q5 (0) -#define Q6 ((2 << 15) / 11.0) -#define Q7 ((4 << 15) / 11.0) -#define Q8 ((6 << 15) / 11.0) -#define Q9 ((8 << 15) / 11.0) -#define QA ((10 << 15) / 11.0) -static const float q_4_0[ 128 ] = -{ - Q0, Q0, Q0, Q0, Q0, Q0, Q0, Q0, Q0, Q0, Q0, - Q1, Q1, Q1, Q1, Q1, Q1, Q1, Q1, Q1, Q1, Q1, - Q2, Q2, Q2, Q2, Q2, Q2, Q2, Q2, Q2, Q2, Q2, - Q3, Q3, Q3, Q3, Q3, Q3, Q3, Q3, Q3, Q3, Q3, - Q4, Q4, Q4, Q4, Q4, Q4, Q4, Q4, Q4, Q4, Q4, - Q5, Q5, Q5, Q5, Q5, Q5, Q5, Q5, Q5, Q5, Q5, - Q6, Q6, Q6, Q6, Q6, Q6, Q6, Q6, Q6, Q6, Q6, - Q7, Q7, Q7, Q7, Q7, Q7, Q7, Q7, Q7, Q7, Q7, - Q8, Q8, Q8, Q8, Q8, Q8, Q8, Q8, Q8, Q8, Q8, - Q9, Q9, Q9, Q9, Q9, Q9, Q9, Q9, Q9, Q9, Q9, - QA, QA, QA, QA, QA, QA, QA, QA, QA, QA, QA, - 0, 0, 0, 0, 0, 0, 0 -}; -static const float q_4_1[ 128 ] = -{ - Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA, - Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA, - Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA, - Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA, - Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA, - Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA, - Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA, - Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA, - Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA, - Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA, - Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA, - 0, 0, 0, 0, 0, 0, 0 -}; -#undef Q0 -#undef Q1 -#undef Q2 -#undef Q3 -#undef Q4 -#undef Q5 -#undef Q6 -#undef Q7 -#undef Q8 -#undef Q9 -#undef QA - -/* Lookup tables of 0.16 two's complement quantization values */ - -static const float q_3[8] = -{ - (-6 << 15)/7.0, (-4 << 15)/7.0, (-2 << 15)/7.0, - 0 , (2 << 15)/7.0, (4 << 15)/7.0, - (6 << 15)/7.0, 0 -}; - -static const float q_5[16] = -{ - (-14 << 15)/15.0, (-12 << 15)/15.0, (-10 << 15)/15.0, - (-8 << 15)/15.0, (-6 << 15)/15.0, (-4 << 15)/15.0, - (-2 << 15)/15.0, 0 , (2 << 15)/15.0, - (4 << 15)/15.0, (6 << 15)/15.0, (8 << 15)/15.0, - (10 << 15)/15.0, (12 << 15)/15.0, (14 << 15)/15.0, - 0 -}; - -/* Conversion from bap to number of bits in the mantissas - * zeros account for cases 0,1,2,4 which are special cased */ -static const u16 qnttztab[16] = -{ - 0, 0, 0, 3, 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16 -}; - -static const float scale_factor[25] = -{ - 6.10351562500000000000000000e-05, - 3.05175781250000000000000000e-05, - 1.52587890625000000000000000e-05, - 7.62939453125000000000000000e-06, - 3.81469726562500000000000000e-06, - 1.90734863281250000000000000e-06, - 9.53674316406250000000000000e-07, - 4.76837158203125000000000000e-07, - 2.38418579101562500000000000e-07, - 1.19209289550781250000000000e-07, - 5.96046447753906250000000000e-08, - 2.98023223876953125000000000e-08, - 1.49011611938476562500000000e-08, - 7.45058059692382812500000000e-09, - 3.72529029846191406250000000e-09, - 1.86264514923095703125000000e-09, - 9.31322574615478515625000000e-10, - 4.65661287307739257812500000e-10, - 2.32830643653869628906250000e-10, - 1.16415321826934814453125000e-10, - 5.82076609134674072265625000e-11, - 2.91038304567337036132812500e-11, - 1.45519152283668518066406250e-11, - 7.27595761418342590332031250e-12, - 3.63797880709171295166015625e-12, -}; - -static const u16 dither_lut[256] = -{ - 0x0000, 0xa011, 0xe033, 0x4022, 0x6077, 0xc066, 0x8044, 0x2055, - 0xc0ee, 0x60ff, 0x20dd, 0x80cc, 0xa099, 0x0088, 0x40aa, 0xe0bb, - 0x21cd, 0x81dc, 0xc1fe, 0x61ef, 0x41ba, 0xe1ab, 0xa189, 0x0198, - 0xe123, 0x4132, 0x0110, 0xa101, 0x8154, 0x2145, 0x6167, 0xc176, - 0x439a, 0xe38b, 0xa3a9, 0x03b8, 0x23ed, 0x83fc, 0xc3de, 0x63cf, - 0x8374, 0x2365, 0x6347, 0xc356, 0xe303, 0x4312, 0x0330, 0xa321, - 0x6257, 0xc246, 0x8264, 0x2275, 0x0220, 0xa231, 0xe213, 0x4202, - 0xa2b9, 0x02a8, 0x428a, 0xe29b, 0xc2ce, 0x62df, 0x22fd, 0x82ec, - 0x8734, 0x2725, 0x6707, 0xc716, 0xe743, 0x4752, 0x0770, 0xa761, - 0x47da, 0xe7cb, 0xa7e9, 0x07f8, 0x27ad, 0x87bc, 0xc79e, 0x678f, - 0xa6f9, 0x06e8, 0x46ca, 0xe6db, 0xc68e, 0x669f, 0x26bd, 0x86ac, - 0x6617, 0xc606, 0x8624, 0x2635, 0x0660, 0xa671, 0xe653, 0x4642, - 0xc4ae, 0x64bf, 0x249d, 0x848c, 0xa4d9, 0x04c8, 0x44ea, 0xe4fb, - 0x0440, 0xa451, 0xe473, 0x4462, 0x6437, 0xc426, 0x8404, 0x2415, - 0xe563, 0x4572, 0x0550, 0xa541, 0x8514, 0x2505, 0x6527, 0xc536, - 0x258d, 0x859c, 0xc5be, 0x65af, 0x45fa, 0xe5eb, 0xa5c9, 0x05d8, - 0xae79, 0x0e68, 0x4e4a, 0xee5b, 0xce0e, 0x6e1f, 0x2e3d, 0x8e2c, - 0x6e97, 0xce86, 0x8ea4, 0x2eb5, 0x0ee0, 0xaef1, 0xeed3, 0x4ec2, - 0x8fb4, 0x2fa5, 0x6f87, 0xcf96, 0xefc3, 0x4fd2, 0x0ff0, 0xafe1, - 0x4f5a, 0xef4b, 0xaf69, 0x0f78, 0x2f2d, 0x8f3c, 0xcf1e, 0x6f0f, - 0xede3, 0x4df2, 0x0dd0, 0xadc1, 0x8d94, 0x2d85, 0x6da7, 0xcdb6, - 0x2d0d, 0x8d1c, 0xcd3e, 0x6d2f, 0x4d7a, 0xed6b, 0xad49, 0x0d58, - 0xcc2e, 0x6c3f, 0x2c1d, 0x8c0c, 0xac59, 0x0c48, 0x4c6a, 0xec7b, - 0x0cc0, 0xacd1, 0xecf3, 0x4ce2, 0x6cb7, 0xcca6, 0x8c84, 0x2c95, - 0x294d, 0x895c, 0xc97e, 0x696f, 0x493a, 0xe92b, 0xa909, 0x0918, - 0xe9a3, 0x49b2, 0x0990, 0xa981, 0x89d4, 0x29c5, 0x69e7, 0xc9f6, - 0x0880, 0xa891, 0xe8b3, 0x48a2, 0x68f7, 0xc8e6, 0x88c4, 0x28d5, - 0xc86e, 0x687f, 0x285d, 0x884c, 0xa819, 0x0808, 0x482a, 0xe83b, - 0x6ad7, 0xcac6, 0x8ae4, 0x2af5, 0x0aa0, 0xaab1, 0xea93, 0x4a82, - 0xaa39, 0x0a28, 0x4a0a, 0xea1b, 0xca4e, 0x6a5f, 0x2a7d, 0x8a6c, - 0x4b1a, 0xeb0b, 0xab29, 0x0b38, 0x2b6d, 0x8b7c, 0xcb5e, 0x6b4f, - 0x8bf4, 0x2be5, 0x6bc7, 0xcbd6, 0xeb83, 0x4b92, 0x0bb0, 0xaba1 -}; - -static __inline__ u16 dither_gen (mantissa_t * p_mantissa) -{ - s16 state; - - state = dither_lut[p_mantissa->lfsr_state >> 8] ^ - (p_mantissa->lfsr_state << 8); - p_mantissa->lfsr_state = (u16) state; - return ( (state * (s32) (0.707106 * 256.0)) >> 8 ); -} - - -/* Fetch an unpacked, left justified, and properly biased/dithered mantissa value */ -static __inline__ float coeff_get_float (ac3dec_t * p_ac3dec, u16 bap, u16 dithflag, - u16 exp) -{ - u16 group_code = 0; - - /* If the bap is 0-5 then we have special cases to take care of */ - switch (bap) - { - case 0: - if (dithflag) - { - return ( dither_gen(&p_ac3dec->mantissa) * scale_factor[exp] ); - } - return (0); - - case 1: - if (p_ac3dec->mantissa.q_1_pointer >= 0) - { - return (p_ac3dec->mantissa.q_1[p_ac3dec->mantissa.q_1_pointer--] * - scale_factor[exp]); - } - - p_ac3dec->total_bits_read += 5; - if ((group_code = GetBits (&p_ac3dec->bit_stream,5)) > 26) - { - intf_WarnMsg ( 3, "ac3dec warn: invalid mantissa (1)" ); - return 0; - } - - p_ac3dec->mantissa.q_1[ 1 ] = q_1_1[ group_code ]; - p_ac3dec->mantissa.q_1[ 0 ] = q_1_2[ group_code ]; - - p_ac3dec->mantissa.q_1_pointer = 1; - - return (q_1_0[group_code] * scale_factor[exp]); - - case 2: - if (p_ac3dec->mantissa.q_2_pointer >= 0) - { - return (p_ac3dec->mantissa.q_2[p_ac3dec->mantissa.q_2_pointer--] * - scale_factor[exp]); - } - - p_ac3dec->total_bits_read += 7; - if ((group_code = GetBits (&p_ac3dec->bit_stream,7)) > 124) - { - intf_WarnMsg ( 3, "ac3dec warn: invalid mantissa (2)" ); - return 0; - } - - p_ac3dec->mantissa.q_2[ 1 ] = q_2_1[ group_code ]; - p_ac3dec->mantissa.q_2[ 0 ] = q_2_2[ group_code ]; - - p_ac3dec->mantissa.q_2_pointer = 1; - - return (q_2_0[group_code] * scale_factor[exp]); - - case 3: - p_ac3dec->total_bits_read += 3; - if ((group_code = GetBits (&p_ac3dec->bit_stream,3)) > 6) - { - intf_WarnMsg ( 3, "ac3dec warn: invalid mantissa (3)" ); - return 0; - } - - return (q_3[group_code] * scale_factor[exp]); - - case 4: - if (p_ac3dec->mantissa.q_4_pointer >= 0) - { - return (p_ac3dec->mantissa.q_4[p_ac3dec->mantissa.q_4_pointer--] * - scale_factor[exp]); - } - - p_ac3dec->total_bits_read += 7; - if ((group_code = GetBits (&p_ac3dec->bit_stream,7)) > 120) - { - intf_WarnMsg ( 3, "ac3dec warn: invalid mantissa (4)" ); - return 0; - } - - p_ac3dec->mantissa.q_4[ 0 ] = q_4_1[group_code]; - - p_ac3dec->mantissa.q_4_pointer = 0; - - return (q_4_0[group_code] * scale_factor[exp]); - - case 5: - p_ac3dec->total_bits_read += 4; - if ((group_code = GetBits (&p_ac3dec->bit_stream,4)) > 14) - { - intf_WarnMsg ( 3, "ac3dec warn: invalid mantissa (5)" ); - return 0; - } - - return (q_5[group_code] * scale_factor[exp]); - - default: - group_code = GetBits (&p_ac3dec->bit_stream,qnttztab[bap]); - group_code <<= 16 - qnttztab[bap]; - p_ac3dec->total_bits_read += qnttztab[bap]; - - return ((s16)(group_code) * scale_factor[exp]); - } -} - -/* Uncouple the coupling channel into a fbw channel */ -static __inline__ void uncouple_channel (ac3dec_t * p_ac3dec, u32 ch) -{ - u32 bnd = 0; - u32 sub_bnd = 0; - u32 i,j; - float cpl_coord = 1.0; - u32 cpl_exp_tmp; - u32 cpl_mant_tmp; - - for (i = p_ac3dec->audblk.cplstrtmant; i < p_ac3dec->audblk.cplendmant;) - { - if (!p_ac3dec->audblk.cplbndstrc[sub_bnd++]) - { - cpl_exp_tmp = p_ac3dec->audblk.cplcoexp[ch][bnd] + - 3 * p_ac3dec->audblk.mstrcplco[ch]; - if (p_ac3dec->audblk.cplcoexp[ch][bnd] == 15) - { - cpl_mant_tmp = (p_ac3dec->audblk.cplcomant[ch][bnd]) << 11; - } - else - { - cpl_mant_tmp = ((0x10) | p_ac3dec->audblk.cplcomant[ch][bnd]) << 10; - } - cpl_coord = (cpl_mant_tmp) * scale_factor[cpl_exp_tmp] * 8.0f; - - /* Invert the phase for the right channel if necessary */ - if (p_ac3dec->bsi.acmod == 0x02 && p_ac3dec->audblk.phsflginu && - ch == 1 && p_ac3dec->audblk.phsflg[bnd]) - { - cpl_coord *= -1; - } - bnd++; - } - - for (j=0;j < 12; j++) - { - /* Get new dither values for each channel if necessary, - * so the channels are uncorrelated */ - if (p_ac3dec->audblk.dithflag[ch] && !p_ac3dec->audblk.cpl_bap[i]) - { - p_ac3dec->samples[ch][i] = cpl_coord * dither_gen(&p_ac3dec->mantissa) * - scale_factor[p_ac3dec->audblk.cpl_exp[i]]; - } else { - p_ac3dec->samples[ch][i] = cpl_coord * p_ac3dec->audblk.cpl_flt[i]; - } - i++; - } - } -} +#include "ac3_mantissa.h" void mantissa_unpack (ac3dec_t * p_ac3dec) { diff --git a/src/ac3_decoder/ac3_mantissa.h b/src/ac3_decoder/ac3_mantissa.h new file mode 100644 index 0000000000..687fb66eb9 --- /dev/null +++ b/src/ac3_decoder/ac3_mantissa.h @@ -0,0 +1,404 @@ +/***************************************************************************** + * ac3_mantissa.h: ac3 mantissa computation + ***************************************************************************** + * Copyright (C) 1999, 2000, 2001 VideoLAN + * $Id: ac3_mantissa.h,v 1.4 2001/05/15 16:19:42 sam Exp $ + * + * Authors: Michel Kaempf + * Aaron Holtzman + * Renaud Dartus + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA. + *****************************************************************************/ + +#define Q0 ((-2 << 15) / 3.0) +#define Q1 (0) +#define Q2 ((2 << 15) / 3.0) +static const float q_1_0[ 32 ] = +{ + Q0, Q0, Q0, Q0, Q0, Q0, Q0, Q0, Q0, + Q1, Q1, Q1, Q1, Q1, Q1, Q1, Q1, Q1, + Q2, Q2, Q2, Q2, Q2, Q2, Q2, Q2, Q2, + 0, 0, 0, 0, 0 +}; +static const float q_1_1[ 32 ] = +{ + Q0, Q0, Q0, Q1, Q1, Q1, Q2, Q2, Q2, + Q0, Q0, Q0, Q1, Q1, Q1, Q2, Q2, Q2, + Q0, Q0, Q0, Q1, Q1, Q1, Q2, Q2, Q2, + 0, 0, 0, 0, 0 +}; +static const float q_1_2[ 32 ] = +{ + Q0, Q1, Q2, Q0, Q1, Q2, Q0, Q1, Q2, + Q0, Q1, Q2, Q0, Q1, Q2, Q0, Q1, Q2, + Q0, Q1, Q2, Q0, Q1, Q2, Q0, Q1, Q2, + 0, 0, 0, 0, 0 +}; +#undef Q0 +#undef Q1 +#undef Q2 + +#define Q0 ((-4 << 15) / 5.0) +#define Q1 ((-2 << 15) / 5.0) +#define Q2 (0) +#define Q3 ((2 << 15) / 5.0) +#define Q4 ((4 << 15) / 5.0) +static const float q_2_0[ 128 ] = +{ + Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0, + Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1, + Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2, + Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3, + Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4, + 0, 0, 0 +}; +static const float q_2_1[ 128 ] = +{ + Q0,Q0,Q0,Q0,Q0,Q1,Q1,Q1,Q1,Q1,Q2,Q2,Q2,Q2,Q2,Q3,Q3,Q3,Q3,Q3,Q4,Q4,Q4,Q4,Q4, + Q0,Q0,Q0,Q0,Q0,Q1,Q1,Q1,Q1,Q1,Q2,Q2,Q2,Q2,Q2,Q3,Q3,Q3,Q3,Q3,Q4,Q4,Q4,Q4,Q4, + Q0,Q0,Q0,Q0,Q0,Q1,Q1,Q1,Q1,Q1,Q2,Q2,Q2,Q2,Q2,Q3,Q3,Q3,Q3,Q3,Q4,Q4,Q4,Q4,Q4, + Q0,Q0,Q0,Q0,Q0,Q1,Q1,Q1,Q1,Q1,Q2,Q2,Q2,Q2,Q2,Q3,Q3,Q3,Q3,Q3,Q4,Q4,Q4,Q4,Q4, + Q0,Q0,Q0,Q0,Q0,Q1,Q1,Q1,Q1,Q1,Q2,Q2,Q2,Q2,Q2,Q3,Q3,Q3,Q3,Q3,Q4,Q4,Q4,Q4,Q4, + 0, 0, 0 +}; +static const float q_2_2[ 128 ] = +{ + Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4, + Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4, + Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4, + Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4, + Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4, + 0, 0, 0 +}; +#undef Q0 +#undef Q1 +#undef Q2 +#undef Q3 +#undef Q4 + +#define Q0 ((-10 << 15) / 11.0) +#define Q1 ((-8 << 15) / 11.0) +#define Q2 ((-6 << 15) / 11.0) +#define Q3 ((-4 << 15) / 11.0) +#define Q4 ((-2 << 15) / 11.0) +#define Q5 (0) +#define Q6 ((2 << 15) / 11.0) +#define Q7 ((4 << 15) / 11.0) +#define Q8 ((6 << 15) / 11.0) +#define Q9 ((8 << 15) / 11.0) +#define QA ((10 << 15) / 11.0) +static const float q_4_0[ 128 ] = +{ + Q0, Q0, Q0, Q0, Q0, Q0, Q0, Q0, Q0, Q0, Q0, + Q1, Q1, Q1, Q1, Q1, Q1, Q1, Q1, Q1, Q1, Q1, + Q2, Q2, Q2, Q2, Q2, Q2, Q2, Q2, Q2, Q2, Q2, + Q3, Q3, Q3, Q3, Q3, Q3, Q3, Q3, Q3, Q3, Q3, + Q4, Q4, Q4, Q4, Q4, Q4, Q4, Q4, Q4, Q4, Q4, + Q5, Q5, Q5, Q5, Q5, Q5, Q5, Q5, Q5, Q5, Q5, + Q6, Q6, Q6, Q6, Q6, Q6, Q6, Q6, Q6, Q6, Q6, + Q7, Q7, Q7, Q7, Q7, Q7, Q7, Q7, Q7, Q7, Q7, + Q8, Q8, Q8, Q8, Q8, Q8, Q8, Q8, Q8, Q8, Q8, + Q9, Q9, Q9, Q9, Q9, Q9, Q9, Q9, Q9, Q9, Q9, + QA, QA, QA, QA, QA, QA, QA, QA, QA, QA, QA, + 0, 0, 0, 0, 0, 0, 0 +}; +static const float q_4_1[ 128 ] = +{ + Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA, + Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA, + Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA, + Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA, + Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA, + Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA, + Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA, + Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA, + Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA, + Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA, + Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA, + 0, 0, 0, 0, 0, 0, 0 +}; +#undef Q0 +#undef Q1 +#undef Q2 +#undef Q3 +#undef Q4 +#undef Q5 +#undef Q6 +#undef Q7 +#undef Q8 +#undef Q9 +#undef QA + +/* Lookup tables of 0.16 two's complement quantization values */ + +static const float q_3[8] = +{ + (-6 << 15)/7.0, (-4 << 15)/7.0, (-2 << 15)/7.0, + 0 , (2 << 15)/7.0, (4 << 15)/7.0, + (6 << 15)/7.0, 0 +}; + +static const float q_5[16] = +{ + (-14 << 15)/15.0, (-12 << 15)/15.0, (-10 << 15)/15.0, + (-8 << 15)/15.0, (-6 << 15)/15.0, (-4 << 15)/15.0, + (-2 << 15)/15.0, 0 , (2 << 15)/15.0, + (4 << 15)/15.0, (6 << 15)/15.0, (8 << 15)/15.0, + (10 << 15)/15.0, (12 << 15)/15.0, (14 << 15)/15.0, + 0 +}; + +/* Conversion from bap to number of bits in the mantissas + * zeros account for cases 0,1,2,4 which are special cased */ +static const u16 qnttztab[16] = +{ + 0, 0, 0, 3, 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16 +}; + +static const float scale_factor[25] = +{ + 6.10351562500000000000000000e-05, + 3.05175781250000000000000000e-05, + 1.52587890625000000000000000e-05, + 7.62939453125000000000000000e-06, + 3.81469726562500000000000000e-06, + 1.90734863281250000000000000e-06, + 9.53674316406250000000000000e-07, + 4.76837158203125000000000000e-07, + 2.38418579101562500000000000e-07, + 1.19209289550781250000000000e-07, + 5.96046447753906250000000000e-08, + 2.98023223876953125000000000e-08, + 1.49011611938476562500000000e-08, + 7.45058059692382812500000000e-09, + 3.72529029846191406250000000e-09, + 1.86264514923095703125000000e-09, + 9.31322574615478515625000000e-10, + 4.65661287307739257812500000e-10, + 2.32830643653869628906250000e-10, + 1.16415321826934814453125000e-10, + 5.82076609134674072265625000e-11, + 2.91038304567337036132812500e-11, + 1.45519152283668518066406250e-11, + 7.27595761418342590332031250e-12, + 3.63797880709171295166015625e-12, +}; + +static const u16 dither_lut[256] = +{ + 0x0000, 0xa011, 0xe033, 0x4022, 0x6077, 0xc066, 0x8044, 0x2055, + 0xc0ee, 0x60ff, 0x20dd, 0x80cc, 0xa099, 0x0088, 0x40aa, 0xe0bb, + 0x21cd, 0x81dc, 0xc1fe, 0x61ef, 0x41ba, 0xe1ab, 0xa189, 0x0198, + 0xe123, 0x4132, 0x0110, 0xa101, 0x8154, 0x2145, 0x6167, 0xc176, + 0x439a, 0xe38b, 0xa3a9, 0x03b8, 0x23ed, 0x83fc, 0xc3de, 0x63cf, + 0x8374, 0x2365, 0x6347, 0xc356, 0xe303, 0x4312, 0x0330, 0xa321, + 0x6257, 0xc246, 0x8264, 0x2275, 0x0220, 0xa231, 0xe213, 0x4202, + 0xa2b9, 0x02a8, 0x428a, 0xe29b, 0xc2ce, 0x62df, 0x22fd, 0x82ec, + 0x8734, 0x2725, 0x6707, 0xc716, 0xe743, 0x4752, 0x0770, 0xa761, + 0x47da, 0xe7cb, 0xa7e9, 0x07f8, 0x27ad, 0x87bc, 0xc79e, 0x678f, + 0xa6f9, 0x06e8, 0x46ca, 0xe6db, 0xc68e, 0x669f, 0x26bd, 0x86ac, + 0x6617, 0xc606, 0x8624, 0x2635, 0x0660, 0xa671, 0xe653, 0x4642, + 0xc4ae, 0x64bf, 0x249d, 0x848c, 0xa4d9, 0x04c8, 0x44ea, 0xe4fb, + 0x0440, 0xa451, 0xe473, 0x4462, 0x6437, 0xc426, 0x8404, 0x2415, + 0xe563, 0x4572, 0x0550, 0xa541, 0x8514, 0x2505, 0x6527, 0xc536, + 0x258d, 0x859c, 0xc5be, 0x65af, 0x45fa, 0xe5eb, 0xa5c9, 0x05d8, + 0xae79, 0x0e68, 0x4e4a, 0xee5b, 0xce0e, 0x6e1f, 0x2e3d, 0x8e2c, + 0x6e97, 0xce86, 0x8ea4, 0x2eb5, 0x0ee0, 0xaef1, 0xeed3, 0x4ec2, + 0x8fb4, 0x2fa5, 0x6f87, 0xcf96, 0xefc3, 0x4fd2, 0x0ff0, 0xafe1, + 0x4f5a, 0xef4b, 0xaf69, 0x0f78, 0x2f2d, 0x8f3c, 0xcf1e, 0x6f0f, + 0xede3, 0x4df2, 0x0dd0, 0xadc1, 0x8d94, 0x2d85, 0x6da7, 0xcdb6, + 0x2d0d, 0x8d1c, 0xcd3e, 0x6d2f, 0x4d7a, 0xed6b, 0xad49, 0x0d58, + 0xcc2e, 0x6c3f, 0x2c1d, 0x8c0c, 0xac59, 0x0c48, 0x4c6a, 0xec7b, + 0x0cc0, 0xacd1, 0xecf3, 0x4ce2, 0x6cb7, 0xcca6, 0x8c84, 0x2c95, + 0x294d, 0x895c, 0xc97e, 0x696f, 0x493a, 0xe92b, 0xa909, 0x0918, + 0xe9a3, 0x49b2, 0x0990, 0xa981, 0x89d4, 0x29c5, 0x69e7, 0xc9f6, + 0x0880, 0xa891, 0xe8b3, 0x48a2, 0x68f7, 0xc8e6, 0x88c4, 0x28d5, + 0xc86e, 0x687f, 0x285d, 0x884c, 0xa819, 0x0808, 0x482a, 0xe83b, + 0x6ad7, 0xcac6, 0x8ae4, 0x2af5, 0x0aa0, 0xaab1, 0xea93, 0x4a82, + 0xaa39, 0x0a28, 0x4a0a, 0xea1b, 0xca4e, 0x6a5f, 0x2a7d, 0x8a6c, + 0x4b1a, 0xeb0b, 0xab29, 0x0b38, 0x2b6d, 0x8b7c, 0xcb5e, 0x6b4f, + 0x8bf4, 0x2be5, 0x6bc7, 0xcbd6, 0xeb83, 0x4b92, 0x0bb0, 0xaba1 +}; + +static __inline__ u16 dither_gen (mantissa_t * p_mantissa) +{ + s16 state; + + state = dither_lut[p_mantissa->lfsr_state >> 8] ^ + (p_mantissa->lfsr_state << 8); + p_mantissa->lfsr_state = (u16) state; + return ( (state * (s32) (0.707106 * 256.0)) >> 8 ); +} + + +/* Fetch an unpacked, left justified, and properly biased/dithered mantissa value */ +static __inline__ float coeff_get_float (ac3dec_t * p_ac3dec, u16 bap, u16 dithflag, + u16 exp) +{ + u16 group_code = 0; + + /* If the bap is 0-5 then we have special cases to take care of */ + switch (bap) + { + case 0: + if (dithflag) + { + return ( dither_gen(&p_ac3dec->mantissa) * scale_factor[exp] ); + } + return (0); + + case 1: + if (p_ac3dec->mantissa.q_1_pointer >= 0) + { + return (p_ac3dec->mantissa.q_1[p_ac3dec->mantissa.q_1_pointer--] * + scale_factor[exp]); + } + + p_ac3dec->total_bits_read += 5; + if ((group_code = GetBits (&p_ac3dec->bit_stream,5)) > 26) + { + intf_WarnMsg ( 3, "ac3dec warn: invalid mantissa (1)" ); + return 0; + } + + p_ac3dec->mantissa.q_1[ 1 ] = q_1_1[ group_code ]; + p_ac3dec->mantissa.q_1[ 0 ] = q_1_2[ group_code ]; + + p_ac3dec->mantissa.q_1_pointer = 1; + + return (q_1_0[group_code] * scale_factor[exp]); + + case 2: + if (p_ac3dec->mantissa.q_2_pointer >= 0) + { + return (p_ac3dec->mantissa.q_2[p_ac3dec->mantissa.q_2_pointer--] * + scale_factor[exp]); + } + + p_ac3dec->total_bits_read += 7; + if ((group_code = GetBits (&p_ac3dec->bit_stream,7)) > 124) + { + intf_WarnMsg ( 3, "ac3dec warn: invalid mantissa (2)" ); + return 0; + } + + p_ac3dec->mantissa.q_2[ 1 ] = q_2_1[ group_code ]; + p_ac3dec->mantissa.q_2[ 0 ] = q_2_2[ group_code ]; + + p_ac3dec->mantissa.q_2_pointer = 1; + + return (q_2_0[group_code] * scale_factor[exp]); + + case 3: + p_ac3dec->total_bits_read += 3; + if ((group_code = GetBits (&p_ac3dec->bit_stream,3)) > 6) + { + intf_WarnMsg ( 3, "ac3dec warn: invalid mantissa (3)" ); + return 0; + } + + return (q_3[group_code] * scale_factor[exp]); + + case 4: + if (p_ac3dec->mantissa.q_4_pointer >= 0) + { + return (p_ac3dec->mantissa.q_4[p_ac3dec->mantissa.q_4_pointer--] * + scale_factor[exp]); + } + + p_ac3dec->total_bits_read += 7; + if ((group_code = GetBits (&p_ac3dec->bit_stream,7)) > 120) + { + intf_WarnMsg ( 3, "ac3dec warn: invalid mantissa (4)" ); + return 0; + } + + p_ac3dec->mantissa.q_4[ 0 ] = q_4_1[group_code]; + + p_ac3dec->mantissa.q_4_pointer = 0; + + return (q_4_0[group_code] * scale_factor[exp]); + + case 5: + p_ac3dec->total_bits_read += 4; + if ((group_code = GetBits (&p_ac3dec->bit_stream,4)) > 14) + { + intf_WarnMsg ( 3, "ac3dec warn: invalid mantissa (5)" ); + return 0; + } + + return (q_5[group_code] * scale_factor[exp]); + + default: + group_code = GetBits (&p_ac3dec->bit_stream,qnttztab[bap]); + group_code <<= 16 - qnttztab[bap]; + p_ac3dec->total_bits_read += qnttztab[bap]; + + return ((s16)(group_code) * scale_factor[exp]); + } +} + +/* Uncouple the coupling channel into a fbw channel */ +static __inline__ void uncouple_channel (ac3dec_t * p_ac3dec, u32 ch) +{ + u32 bnd = 0; + u32 sub_bnd = 0; + u32 i,j; + float cpl_coord = 1.0; + u32 cpl_exp_tmp; + u32 cpl_mant_tmp; + + for (i = p_ac3dec->audblk.cplstrtmant; i < p_ac3dec->audblk.cplendmant;) + { + if (!p_ac3dec->audblk.cplbndstrc[sub_bnd++]) + { + cpl_exp_tmp = p_ac3dec->audblk.cplcoexp[ch][bnd] + + 3 * p_ac3dec->audblk.mstrcplco[ch]; + if (p_ac3dec->audblk.cplcoexp[ch][bnd] == 15) + { + cpl_mant_tmp = (p_ac3dec->audblk.cplcomant[ch][bnd]) << 11; + } + else + { + cpl_mant_tmp = ((0x10) | p_ac3dec->audblk.cplcomant[ch][bnd]) << 10; + } + cpl_coord = (cpl_mant_tmp) * scale_factor[cpl_exp_tmp] * 8.0f; + + /* Invert the phase for the right channel if necessary */ + if (p_ac3dec->bsi.acmod == 0x02 && p_ac3dec->audblk.phsflginu && + ch == 1 && p_ac3dec->audblk.phsflg[bnd]) + { + cpl_coord *= -1; + } + bnd++; + } + + for (j=0;j < 12; j++) + { + /* Get new dither values for each channel if necessary, + * so the channels are uncorrelated */ + if (p_ac3dec->audblk.dithflag[ch] && !p_ac3dec->audblk.cpl_bap[i]) + { + p_ac3dec->samples[ch][i] = cpl_coord * dither_gen(&p_ac3dec->mantissa) * + scale_factor[p_ac3dec->audblk.cpl_exp[i]]; + } else { + p_ac3dec->samples[ch][i] = cpl_coord * p_ac3dec->audblk.cpl_flt[i]; + } + i++; + } + } +} + diff --git a/src/ac3_decoder/ac3_parse.c b/src/ac3_decoder/ac3_parse.c index 41310da6be..0903da5804 100644 --- a/src/ac3_decoder/ac3_parse.c +++ b/src/ac3_decoder/ac3_parse.c @@ -2,7 +2,7 @@ * ac3_parse.c: ac3 parsing procedures ***************************************************************************** * Copyright (C) 1999, 2000, 2001 VideoLAN - * $Id: ac3_parse.c,v 1.22 2001/05/14 15:58:04 reno Exp $ + * $Id: ac3_parse.c,v 1.23 2001/05/15 16:19:42 sam Exp $ * * Authors: Michel Kaempf * Aaron Holtzman @@ -23,6 +23,9 @@ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA. *****************************************************************************/ +/***************************************************************************** + * Preamble + *****************************************************************************/ #include "defs.h" #include /* memset() */ @@ -33,12 +36,15 @@ #include "threads.h" #include "mtime.h" +#include "intf_msg.h" + #include "stream_control.h" #include "input_ext-dec.h" #include "audio_output.h" -#include "intf_msg.h" +#include "ac3_imdct.h" +#include "ac3_downmix.h" #include "ac3_decoder.h" #include "ac3_decoder_thread.h" /* ac3dec_thread_t */ @@ -871,7 +877,6 @@ static void parse_audblk_stats (ac3dec_t * p_ac3dec) for(i=0;ibsi.nfchans;i++) intf_ErrMsg ("%1d",p_ac3dec->audblk.blksw[i]); intf_ErrMsg ("]"); - - intf_ErrMsg ("\n"); } #endif + diff --git a/src/ac3_decoder/ac3_rematrix.c b/src/ac3_decoder/ac3_rematrix.c index 3189239b5b..d9aca1cb18 100644 --- a/src/ac3_decoder/ac3_rematrix.c +++ b/src/ac3_decoder/ac3_rematrix.c @@ -2,7 +2,7 @@ * ac3_rematrix.c: ac3 audio rematrixing ***************************************************************************** * Copyright (C) 1999, 2000 VideoLAN - * $Id: ac3_rematrix.c,v 1.17 2001/05/14 15:58:04 reno Exp $ + * $Id: ac3_rematrix.c,v 1.18 2001/05/15 16:19:42 sam Exp $ * * Authors: Michel Kaempf * Aaron Holtzman @@ -21,6 +21,10 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA. *****************************************************************************/ + +/***************************************************************************** + * Preamble + *****************************************************************************/ #include "defs.h" #include /* memcpy() */ @@ -33,6 +37,8 @@ #include "stream_control.h" #include "input_ext-dec.h" +#include "ac3_imdct.h" +#include "ac3_downmix.h" #include "ac3_decoder.h" struct rematrix_band_s { @@ -79,3 +85,4 @@ void rematrix (ac3dec_t * p_ac3dec) } } } + diff --git a/src/ac3_decoder/ac3_srfft_sse.c b/src/ac3_decoder/ac3_srfft_sse.c deleted file mode 100644 index 8f5294631c..0000000000 --- a/src/ac3_decoder/ac3_srfft_sse.c +++ /dev/null @@ -1,369 +0,0 @@ -/***************************************************************************** - * ac3_srfft_sse.c: ac3 fft functions - ***************************************************************************** - * Copyright (C) 1999, 2000, 2001 VideoLAN - * $Id: ac3_srfft_sse.c,v 1.1 2001/05/14 15:58:04 reno Exp $ - * - * Authors: Renaud Dartus - * Aaron Holtzman - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA. - *****************************************************************************/ - -#include - -#include "defs.h" - -#include -#include - -#include "config.h" -#include "common.h" -#include "threads.h" -#include "mtime.h" - -#include "stream_control.h" -#include "input_ext-dec.h" - -#include "ac3_decoder.h" -#include "ac3_srfft.h" - -void hsqrt2 (void); -void C_1 (void); -static void fft_4_sse (complex_t *x); -static void fft_8_sse (complex_t *x); -static void fft_asmb_sse (int k, complex_t *x, complex_t *wTB, - const complex_t *d, const complex_t *d_3); - -void fft_64p_sse(complex_t *a) -{ - fft_8_sse(&a[0]); fft_4_sse(&a[8]); fft_4_sse(&a[12]); - fft_asmb_sse(2, &a[0], &a[8], &delta16[0], &delta16_3[0]); - - fft_8_sse(&a[16]), fft_8_sse(&a[24]); - fft_asmb_sse(4, &a[0], &a[16],&delta32[0], &delta32_3[0]); - - fft_8_sse(&a[32]); fft_4_sse(&a[40]); fft_4_sse(&a[44]); - fft_asmb_sse(2, &a[32], &a[40], &delta16[0], &delta16_3[0]); - - fft_8_sse(&a[48]); fft_4_sse(&a[56]); fft_4_sse(&a[60]); - fft_asmb_sse(2, &a[48], &a[56], &delta16[0], &delta16_3[0]); - - fft_asmb_sse(8, &a[0], &a[32],&delta64[0], &delta64_3[0]); -} - - -void fft_128p_sse(complex_t *a) -{ - fft_8_sse(&a[0]); fft_4_sse(&a[8]); fft_4_sse(&a[12]); - fft_asmb_sse(2, &a[0], &a[8], &delta16[0], &delta16_3[0]); - - fft_8_sse(&a[16]), fft_8_sse(&a[24]); - fft_asmb_sse(4, &a[0], &a[16],&delta32[0], &delta32_3[0]); - - fft_8_sse(&a[32]); fft_4_sse(&a[40]); fft_4_sse(&a[44]); - fft_asmb_sse(2, &a[32], &a[40], &delta16[0], &delta16_3[0]); - - fft_8_sse(&a[48]); fft_4_sse(&a[56]); fft_4_sse(&a[60]); - fft_asmb_sse(2, &a[48], &a[56], &delta16[0], &delta16_3[0]); - - fft_asmb_sse(8, &a[0], &a[32],&delta64[0], &delta64_3[0]); - - fft_8_sse(&a[64]); fft_4_sse(&a[72]); fft_4_sse(&a[76]); - /* fft_16(&a[64]); */ - fft_asmb_sse(2, &a[64], &a[72], &delta16[0], &delta16_3[0]); - - fft_8_sse(&a[80]); fft_8_sse(&a[88]); - - /* fft_32(&a[64]); */ - fft_asmb_sse(4, &a[64], &a[80],&delta32[0], &delta32_3[0]); - - fft_8_sse(&a[96]); fft_4_sse(&a[104]), fft_4_sse(&a[108]); - /* fft_16(&a[96]); */ - fft_asmb_sse(2, &a[96], &a[104], &delta16[0], &delta16_3[0]); - - fft_8_sse(&a[112]), fft_8_sse(&a[120]); - /* fft_32(&a[96]); */ - fft_asmb_sse(4, &a[96], &a[112], &delta32[0], &delta32_3[0]); - - /* fft_128(&a[0]); */ - fft_asmb_sse(16, &a[0], &a[64], &delta128[0], &delta128_3[0]); -} - -void hsqrt2 (void) -{ - __asm__ ( - ".float 0f0.707106781188\n" - ".float 0f0.707106781188\n" - ".float 0f-0.707106781188\n" - ".float 0f-0.707106781188\n" - ); -} - -void C_1 (void) -{ - __asm__ ( - ".float 0f-1.0\n" - ".float 0f1.0\n" - ".float 0f-1.0\n" - ".float 0f1.0\n" - ); -} - -static void fft_4_sse (complex_t *x) -{ - __asm__ __volatile__ ( - "movups (%%eax), %%xmm0\n" /* x[1] | x[0] */ - "movups 16(%%eax), %%xmm2\n" /* x[3] | x[2] */ - "movups %%xmm0, %%xmm1\n" /* x[1] | x[0] */ - "addps %%xmm2, %%xmm0\n" /* x[1] + x[3] | x[0] + x[2] */ - "subps %%xmm2, %%xmm1\n" /* x[1] - x[3] | x[0] - x[2] */ - "xorps %%xmm6, %%xmm6\n" - "movhlps %%xmm1, %%xmm4\n" /* ? | x[1] - x[3] */ - "movhlps %%xmm0, %%xmm3\n" /* ? | x[1] + x[3] */ - "subss %%xmm4, %%xmm6\n" /* 0 | -(x[1] - x[3]).re */ - "movlhps %%xmm1, %%xmm0\n" /* x[0] - x[2] | x[0] + x[2] */ - "movlhps %%xmm6, %%xmm4\n" /* 0 | -(x[1] - x[3]).re | (x[1] - x[3]).im | (x[3]-x[1]).re */ - "movups %%xmm0, %%xmm2\n" /* x[0] - x[2] | x[0] + x[2] */ - "shufps $0x94, %%xmm4, %%xmm3\n" /* i*(x[1] - x[3]) | x[1] + x[3] */ - "addps %%xmm3, %%xmm0\n" - "subps %%xmm3, %%xmm2\n" - "movups %%xmm0, (%%eax)\n" - "movups %%xmm2, 16(%%eax)\n" - : "=a" (x) - : "a" (x) ); -} - -static void fft_8_sse (complex_t *x) -{ - __asm__ __volatile__ ( - "pushl %%ebx\n" - - "movlps (%%eax), %%xmm0\n" /* x[0] */ - "movlps 32(%%eax), %%xmm1\n" /* x[4] */ - "movhps 16(%%eax), %%xmm0\n" /* x[2] | x[0] */ - "movhps 48(%%eax), %%xmm1\n" /* x[6] | x[4] */ - "movups %%xmm0, %%xmm2\n" /* x[2] | x[0] */ - "xorps %%xmm3, %%xmm3\n" - "addps %%xmm1, %%xmm0\n" /* x[2] + x[6] | x[0] + x[4] */ - "subps %%xmm1, %%xmm2\n" /* x[2] - x[6] | x[0] - x[4] */ - "movhlps %%xmm0, %%xmm5\n" /* x[2] + x[6] */ - "movhlps %%xmm2, %%xmm4\n" /* x[2] - x[6] */ - "movlhps %%xmm2, %%xmm0\n" /* x[0] - x[4] | x[0] + x[4] */ - "subss %%xmm4, %%xmm3\n" /* (x[2]-x[6]).im | -(x[2]-x[6]).re */ - "movups %%xmm0, %%xmm7\n" /* x[0] - x[4] | x[0] + x[4] */ - "movups %%xmm3, %%xmm4\n" /* (x[2]-x[6]).im | -(x[2]-x[6]).re */ - "movlps 8(%%eax), %%xmm1\n" /* x[1] */ - "shufps $0x14, %%xmm4, %%xmm5\n" /* i*(x[2] - x[6]) | x[2] + x[6] */ - - "addps %%xmm5, %%xmm0\n" /* yt = i*(x2-x6)+x0-x4 | x2+x6+x0+x4 */ - "subps %%xmm5, %%xmm7\n" /* yb = i*(x6-x2)+x0-x4 | -x6-x2+x0+x4 */ - - "movhps 24(%%eax), %%xmm1\n" /* x[3] | x[1] */ - "movl $hsqrt2, %%ebx\n" - "movlps 40(%%eax), %%xmm2\n" /* x[5] */ - "movhps 56(%%eax), %%xmm2\n" /* x[7] | x[5] */ - "movups %%xmm1, %%xmm3\n" /* x[3] | x[1] */ - "addps %%xmm2, %%xmm1\n" /* x[3] + x[7] | x[1] + x[5] */ - "subps %%xmm2, %%xmm3\n" /* x[3] - x[7] | x[1] - x[5] */ - "movups (%%ebx), %%xmm4\n" /* -1/sqrt2 | -1/sqrt2 | 1/sqrt2 | 1/sqrt2 */ - "movups %%xmm3, %%xmm6\n" /* x[3] - x[7] | x[1] - x[5] */ - "mulps %%xmm4, %%xmm3\n" /* -1/s2*(x[3] - x[7]) | 1/s2*(x[1] - x[5]) */ - "shufps $0xc8, %%xmm4, %%xmm4\n" /* -1/sqrt2 | 1/sqrt2 | -1/sqrt2 | 1/sqrt2 */ - "shufps $0xb1, %%xmm6, %%xmm6\n" /* (x3-x7).re|(x3-x7).im|(x1-x5).re|(x1-x5).im */ - "mulps %%xmm4, %%xmm6\n" /* (x7-x3).re/s2|(x3-x7).im/s2|(x5-x1).re/s2|(x1-x5).im/s2 */ - "addps %%xmm3, %%xmm6\n" /* (-1-i)/sqrt2 * (x[3]-x[7]) | (1-i)/sqrt2 * (x[1] - x[5]) */ - "movhlps %%xmm1, %%xmm5\n" /* x[3] + x[7] */ - "movlhps %%xmm6, %%xmm1\n" /* (1+i)/sqrt2 * (x[1]-x[5]) | x[1]+x[5] */ - "shufps $0xe4, %%xmm6, %%xmm5\n" /* (-1-i)/sqrt2 * (x[3]-x[7]) | x[3]+x[7] */ - "movups %%xmm1, %%xmm3\n" /* (1-i)/sqrt2 * (x[1]-x[5]) | x[1]+x[5] */ - "movl $C_1, %%ebx\n" - "addps %%xmm5, %%xmm1\n" /* u */ - "subps %%xmm5, %%xmm3\n" /* v */ - "movups %%xmm0, %%xmm2\n" /* yb */ - "movups %%xmm7, %%xmm4\n" /* yt */ - "movups (%%ebx), %%xmm5\n" - "mulps %%xmm5, %%xmm3\n" - "addps %%xmm1, %%xmm0\n" /* yt + u */ - "subps %%xmm1, %%xmm2\n" /* yt - u */ - "shufps $0xb1, %%xmm3, %%xmm3\n" /* -i * v */ - "movups %%xmm0, (%%eax)\n" - "movups %%xmm2, 32(%%eax)\n" - "addps %%xmm3, %%xmm4\n" /* yb - i*v */ - "subps %%xmm3, %%xmm7\n" /* yb + i*v */ - "movups %%xmm4, 16(%%eax)\n" - "movups %%xmm7, 48(%%eax)\n" - - "popl %%ebx\n" - : "=a" (x) - : "a" (x)); -} - - -static void fft_asmb_sse (int k, complex_t *x, complex_t *wTB, - const complex_t *d, const complex_t *d_3) -{ - __asm__ __volatile__ ( - "pushl %%ebp\n" - "movl %%esp, %%ebp\n" - - "subl $4, %%esp\n" - - "pushl %%eax\n" - "pushl %%ebx\n" - "pushl %%ecx\n" - "pushl %%edx\n" - "pushl %%esi\n" - "pushl %%edi\n" - - "movl 8(%%ebp), %%ecx\n" /* k */ - "movl 12(%%ebp), %%eax\n" /* x */ - "movl %%ecx, -4(%%ebp)\n" /* k */ - "movl 16(%%ebp), %%ebx\n" /* wT */ - "movl 20(%%ebp), %%edx\n" /* d */ - "movl 24(%%ebp), %%esi\n" /* d3 */ - "shll $4, %%ecx\n" /* 16k */ - "addl $8, %%edx\n" - "leal (%%eax, %%ecx, 2), %%edi\n" - "addl $8, %%esi\n" - - /* TRANSZERO and TRANS */ - "movups (%%eax), %%xmm0\n" /* x[1] | x[0] */ - "movups (%%ebx), %%xmm1\n" /* wT[1] | wT[0] */ - "movups (%%ebx, %%ecx), %%xmm2\n" /* wB[1] | wB[0] */ - "movlps (%%edx), %%xmm3\n" /* d */ - "movlps (%%esi), %%xmm4\n" /* d3 */ - "movhlps %%xmm1, %%xmm5\n" /* wT[1] */ - "movhlps %%xmm2, %%xmm6\n" /* wB[1] */ - "shufps $0x50, %%xmm3, %%xmm3\n" /* d[1].im | d[1].im | d[1].re | d[1].re */ - "shufps $0x50, %%xmm4, %%xmm4\n" /* d3[1].im | d3[1].im | d3[i].re | d3[i].re */ - "movlhps %%xmm5, %%xmm5\n" /* wT[1] | wT[1] */ - "movlhps %%xmm6, %%xmm6\n" /* wB[1] | wB[1] */ - "mulps %%xmm3, %%xmm5\n" - "mulps %%xmm4, %%xmm6\n" - "movhlps %%xmm5, %%xmm7\n" /* wT[1].im * d[1].im | wT[1].re * d[1].im */ - "movlhps %%xmm6, %%xmm5\n" /* wB[1].im * d3[1].re | wB[1].re * d3[1].re | wT[1].im * d[1].re | wT[1].re * d[1].re */ - "shufps $0xb1, %%xmm6, %%xmm7\n" /* wB[1].re * d3[1].im | wB[i].im * d3[1].im | wT[1].re * d[1].im | wT[1].im * d[1].im */ - "movl $C_1, %%edi\n" - "movups (%%edi), %%xmm4\n" - "mulps %%xmm4, %%xmm7\n" - "addps %%xmm7, %%xmm5\n" /* wB[1] * d3[1] | wT[1] * d[1] */ - "movlhps %%xmm5, %%xmm1\n" /* d[1] * wT[1] | wT[0] */ - "shufps $0xe4, %%xmm5, %%xmm2\n" /* d3[1] * wB[1] | wB[0] */ - "movups %%xmm1, %%xmm3\n" /* d[1] * wT[1] | wT[0] */ - "leal (%%eax, %%ecx, 2), %%edi\n" - "addps %%xmm2, %%xmm1\n" /* u */ - "subps %%xmm2, %%xmm3\n" /* v */ - "mulps %%xmm4, %%xmm3\n" - "movups (%%eax, %%ecx), %%xmm5\n" /* xk[1] | xk[0] */ - "shufps $0xb1, %%xmm3, %%xmm3\n" /* -i * v */ - "movups %%xmm0, %%xmm2\n" /* x[1] | x[0] */ - "movups %%xmm5, %%xmm6\n" /* xk[1] | xk[0] */ - "addps %%xmm1, %%xmm0\n" - "subps %%xmm1, %%xmm2\n" - "addps %%xmm3, %%xmm5\n" - "subps %%xmm3, %%xmm6\n" - "movups %%xmm0, (%%eax)\n" - "movups %%xmm2, (%%edi)\n" - "movups %%xmm5, (%%eax, %%ecx)\n" - "movups %%xmm6, (%%edi, %%ecx)\n" - "addl $16, %%eax\n" - "addl $16, %%ebx\n" - "addl $8, %%edx\n" - "addl $8, %%esi\n" - "decl -4(%%ebp)\n" - -".loop:\n" - "movups (%%ebx), %%xmm0\n" /* wT[1] | wT[0] */ - "movups (%%edx), %%xmm1\n" /* d[1] | d[0] */ - - "movups (%%ebx, %%ecx), %%xmm4\n" /* wB[1] | wB[0] */ - "movups (%%esi), %%xmm5\n" /* d3[1] | d3[0] */ - - "movhlps %%xmm0, %%xmm2\n" /* wT[1] */ - "movhlps %%xmm1, %%xmm3\n" /* d[1] */ - - "movhlps %%xmm4, %%xmm6\n" /* wB[1] */ - "movhlps %%xmm5, %%xmm7\n" /* d3[1] */ - - "shufps $0x50, %%xmm1, %%xmm1\n" /* d[0].im | d[0].im | d[0].re | d[0].re */ - "shufps $0x50, %%xmm3, %%xmm3\n" /* d[1].im | d[1].im | d[1].re | d[1].re */ - - "movlhps %%xmm0, %%xmm0\n" /* wT[0] | wT[0] */ - "shufps $0x50, %%xmm5, %%xmm5\n" /* d3[0].im | d3[0].im | d3[0].re | d3[0].re */ - "movlhps %%xmm2, %%xmm2\n" /* wT[1] | wT[1] */ - "shufps $0x50, %%xmm7, %%xmm7\n" /* d3[1].im | d3[1].im | d3[1].re | d3[1].re */ - - "mulps %%xmm1, %%xmm0\n" /* d[0].im * wT[0].im | d[0].im * wT[0].re | d[0].re * wT[0].im | d[0].re * wT[0].re */ - "mulps %%xmm3, %%xmm2\n" /* d[1].im * wT[1].im | d[1].im * wT[1].re | d[1].re * wT[1].im | d[1].re * wT[1].re */ - "movlhps %%xmm4, %%xmm4\n" /* wB[0] | wB[0] */ - "movlhps %%xmm6, %%xmm6\n" /* wB[1] | wB[1] */ - - "movhlps %%xmm0, %%xmm1\n" /* d[0].im * wT[0].im | d[0].im * wT[0].re */ - "movlhps %%xmm2, %%xmm0\n" /* d[1].re * wT[1].im | d[1].re * wT[1].re | d[0].re * wT[0].im | d[0].re * wT[0].re */ - "mulps %%xmm5, %%xmm4\n" /* wB[0].im * d3[0].im | wB[0].re * d3[0].im | wB[0].im * d3[0].re | wB[0].re * d3[0].re */ - "mulps %%xmm7, %%xmm6\n" /* wB[1].im * d3[1].im | wB[1].re * d3[1].im | wB[1].im * d3[1].re | wB[1].re * d3[1].re */ - "shufps $0xb1, %%xmm2, %%xmm1\n" /* d[1].im * wT[1].re | d[1].im * wT[1].im | d[0].im * wT[0].re | d[0].im * wT[0].im */ - "movl $C_1, %%edi\n" - "movups (%%edi), %%xmm3\n" /* 1.0 | -1.0 | 1.0 | -1.0 */ - - "movhlps %%xmm4, %%xmm5\n" /* wB[0].im * d3[0].im | wB[0].re * d3[0].im */ - "mulps %%xmm3, %%xmm1\n" /* d[1].im * wT[1].re | -d[1].im * wT[1].im | d[0].im * wT[0].re | -d[0].im * wT[0].im */ - "movlhps %%xmm6, %%xmm4\n" /* wB[1].im * d3[1].re | wB[1].re * d3[1].re | wB[0].im * d3[0].re | wB[0].im * d3[0].re */ - "addps %%xmm1, %%xmm0\n" /* wT[1] * d[1] | wT[0] * d[0] */ - - "shufps $0xb1, %%xmm6, %%xmm5\n" /* wB[1].re * d3[1].im | wB[1].im * d3[1].im | wB[0].re * d3[0].im | wB[0].im * d3[0].im */ - "mulps %%xmm3, %%xmm5\n" /* wB[1].re * d3[1].im | -wB[1].im * d3[1].im | wB[0].re * d3[0].im | -wB[0].im * d3[0].im */ - "addps %%xmm5, %%xmm4\n" /* wB[1] * d3[1] | wB[0] * d3[0] */ - - "movups %%xmm0, %%xmm1\n" /* wT[1] * d[1] | wT[0] * d[0] */ - "addps %%xmm4, %%xmm0\n" /* u */ - "subps %%xmm4, %%xmm1\n" /* v */ - "movups (%%eax), %%xmm6\n" /* x[1] | x[0] */ - "leal (%%eax, %%ecx, 2), %%edi\n" - "mulps %%xmm3, %%xmm1\n" - "addl $16, %%ebx\n" - "addl $16, %%esi\n" - "shufps $0xb1, %%xmm1, %%xmm1\n" /* -i * v */ - "movups (%%eax, %%ecx), %%xmm7\n" /* xk[1] | xk[0] */ - "movups %%xmm6, %%xmm2\n" - "movups %%xmm7, %%xmm4\n" - "addps %%xmm0, %%xmm6\n" - "subps %%xmm0, %%xmm2\n" - "movups %%xmm6, (%%eax)\n" - "movups %%xmm2, (%%edi)\n" - "addps %%xmm1, %%xmm7\n" - "subps %%xmm1, %%xmm4\n" - "addl $16, %%edx\n" - "movups %%xmm7, (%%eax, %%ecx)\n" - "movups %%xmm4, (%%edi, %%ecx)\n" - - "addl $16, %%eax\n" - "decl -4(%%ebp)\n" - "jnz .loop\n" - -".end:\n" - "popl %%edi\n" - "popl %%esi\n" - "popl %%edx\n" - "popl %%ecx\n" - "popl %%ebx\n" - "popl %%eax\n" - - "addl $4, %%esp\n" - - "leave\n" - ::); -} diff --git a/src/audio_output/aout_u8.c b/src/audio_output/aout_u8.c index 7037471125..209bb235bc 100644 --- a/src/audio_output/aout_u8.c +++ b/src/audio_output/aout_u8.c @@ -2,7 +2,7 @@ * aout_u8.c: 8 bit unsigned audio output functions ***************************************************************************** * Copyright (C) 1999, 2000, 2001 VideoLAN - * $Id: aout_u8.c,v 1.4 2001/05/06 04:32:02 sam Exp $ + * $Id: aout_u8.c,v 1.5 2001/05/15 16:19:42 sam Exp $ * * Authors: Michel Kaempf * @@ -105,8 +105,6 @@ void aout_U8StereoThread( aout_thread_t * p_aout ) int i_fifo; long l_buffer, l_buffer_limit, l_bytes; - intf_DbgMsg("adec debug: running audio output U8_S_thread (%p) (pid == %i)", p_aout, getpid()); - /* As the s32_buffer was created with calloc(), we don't have to set this * memory to zero and we can immediately jump into the thread's loop */ while ( ! p_aout->b_die ) diff --git a/src/interface/main.c b/src/interface/main.c index de99fcceb4..cd15fb856a 100644 --- a/src/interface/main.c +++ b/src/interface/main.c @@ -4,7 +4,7 @@ * and spawn threads. ***************************************************************************** * Copyright (C) 1998, 1999, 2000 VideoLAN - * $Id: main.c,v 1.94 2001/05/14 15:58:04 reno Exp $ + * $Id: main.c,v 1.95 2001/05/15 16:19:42 sam Exp $ * * Authors: Vincent Seguin * Samuel Hocevar @@ -113,6 +113,8 @@ #define OPT_MOTION 181 #define OPT_IDCT 182 #define OPT_YUV 183 +#define OPT_DOWNMIX 184 +#define OPT_IMDCT 185 #define OPT_SYNCHRO 190 #define OPT_WARNING 191 @@ -143,6 +145,8 @@ static const struct option longopts[] = { "stereo", 0, 0, OPT_STEREO }, { "mono", 0, 0, OPT_MONO }, { "spdif", 0, 0, OPT_SPDIF }, + { "downmix", 1, 0, OPT_DOWNMIX }, + { "imdct", 1, 0, OPT_IMDCT }, /* Video options */ { "novideo", 0, 0, OPT_NOVIDEO }, @@ -559,6 +563,12 @@ static int GetConfiguration( int *pi_argc, char *ppsz_argv[], char *ppsz_env[] ) case OPT_SPDIF: /* --spdif */ main_PutIntVariable( AOUT_SPDIF_VAR, 1 ); break; + case OPT_DOWNMIX: /* --downmix */ + main_PutPszVariable( DOWNMIX_METHOD_VAR, optarg ); + break; + case OPT_IMDCT: /* --imdct */ + main_PutPszVariable( IMDCT_METHOD_VAR, optarg ); + break; /* Video options */ case OPT_NOVIDEO: /* --novideo */ @@ -712,6 +722,8 @@ static void Usage( int i_fashion ) "\n -A, --aout \taudio output method" "\n --stereo, --mono \tstereo/mono audio" "\n --spdif \tAC3 pass-through mode" + "\n --downmix \tAC3 downmix method" + "\n --imdct \tAC3 IMDCT method" "\n" "\n --novideo \tdisable video" "\n -V, --vout \tvideo output method" @@ -758,6 +770,8 @@ static void Usage( int i_fashion ) "\n " AOUT_DSP_VAR "= \tdsp device path" "\n " AOUT_STEREO_VAR "={1|0} \tstereo or mono output" "\n " AOUT_SPDIF_VAR "={1|0} \tAC3 pass-through mode" + "\n " DOWNMIX_METHOD_VAR "= \tAC3 downmix method" + "\n " IMDCT_METHOD_VAR "= \tAC3 IMDCT method" "\n " AOUT_RATE_VAR "= \toutput rate" ); /* Video parameters */