From: Sam Hocevar <sam@videolan.org>
Date: Tue, 15 May 2001 16:19:42 +0000 (+0000)
Subject:   * AC3 IMDCT and downmix functions are now in plugins, --imdct and
X-Git-Tag: 0.2.81~119
X-Git-Url: https://git.sesse.net/?a=commitdiff_plain;h=501cb1ba4bfc11b6987df0cf1d27fbf91c48f495;p=vlc

  * AC3 IMDCT and downmix functions are now in plugins, --imdct and
    --downmix options added.
---

diff --git a/Makefile b/Makefile
index fbe187721d..9a868def9f 100644
--- a/Makefile
+++ b/Makefile
@@ -13,7 +13,7 @@
 #
 PLUGINS_DIR :=	alsa beos darwin dsp dummy \
 		dvd esd fb ggi glide gnome gtk \
-		idct \
+		downmix idct imdct \
 		macosx mga \
 		motion \
 		mpeg null qt sdl \
@@ -24,7 +24,9 @@ PLUGINS_DIR :=	alsa beos darwin dsp dummy \
 #
 PLUGINS_TARGETS := alsa/alsa beos/beos darwin/darwin dsp/dsp dummy/dummy \
 		dvd/dvd esd/esd fb/fb ggi/ggi glide/glide gnome/gnome gtk/gtk \
+		downmix/downmix downmix/downmixsse downmix/downmix3dn \
 		idct/idct idct/idctclassic idct/idctmmx idct/idctmmxext \
+		imdct/imdct imdct/imdctsse \
 		macosx/macosx mga/mga \
 		motion/motion motion/motionmmx motion/motionmmxext \
 		mpeg/es mpeg/ps mpeg/ts null/null qt/qt sdl/sdl \
@@ -69,11 +71,7 @@ AC3_DECODER =	src/ac3_decoder/ac3_decoder_thread.o \
 		src/ac3_decoder/ac3_bit_allocate.o \
 		src/ac3_decoder/ac3_mantissa.o \
 		src/ac3_decoder/ac3_rematrix.o \
-		src/ac3_decoder/ac3_imdct.o \
-		src/ac3_decoder/ac3_imdct_c.o \
-		src/ac3_decoder/ac3_srfft.o \
-		src/ac3_decoder/ac3_downmix.o \
-		src/ac3_decoder/ac3_downmix_c.o
+		src/ac3_decoder/ac3_imdct.o
 
 AC3_SPDIF = src/ac3_spdif/ac3_spdif.o \
 	        src/ac3_spdif/ac3_iec958.o
diff --git a/configure b/configure
index 8624e778ad..eb8c146924 100755
--- a/configure
+++ b/configure
@@ -3162,7 +3162,7 @@ fi
 
 ARCH=${host_cpu}
 
-BUILTINS="${BUILTINS} es ps ts yuv idct idctclassic motion"
+BUILTINS="${BUILTINS} es ps ts yuv idct idctclassic motion imdct downmix"
 
 case x$host_os in
   xmingw32msvc)
@@ -3195,8 +3195,8 @@ else
 fi
 rm -f conftest*
 
-echo $ac_n "checking if \$CC groks MMX EXT (SSE) inline assembly""... $ac_c" 1>&6
-echo "configure:3200: checking if \$CC groks MMX EXT (SSE) inline assembly" >&5
+echo $ac_n "checking if \$CC groks MMX EXT or SSE inline assembly""... $ac_c" 1>&6
+echo "configure:3200: checking if \$CC groks MMX EXT or SSE inline assembly" >&5
 cat > conftest.$ac_ext <<EOF
 #line 3202 "configure"
 #include "confdefs.h"
@@ -3207,7 +3207,7 @@ int main() {
 EOF
 if { (eval echo configure:3209: \"$ac_compile\") 1>&5; (eval $ac_compile) 2>&5; }; then
   rm -rf conftest*
-  ACCEL_PLUGINS="${ACCEL_PLUGINS} idctmmxext motionmmxext"
+  ACCEL_PLUGINS="${ACCEL_PLUGINS} idctmmxext motionmmxext imdctsse downmix3dn downmixsse"
   echo "$ac_t""yes" 1>&6
 else
   echo "configure: failed program was:" >&5
diff --git a/configure.in b/configure.in
index 2c380c1043..6b85cc1816 100644
--- a/configure.in
+++ b/configure.in
@@ -135,7 +135,7 @@ ARCH=${host_cpu}
 dnl
 dnl  default modules
 dnl
-BUILTINS="${BUILTINS} es ps ts yuv idct idctclassic motion"
+BUILTINS="${BUILTINS} es ps ts yuv idct idctclassic motion imdct downmix"
 
 dnl
 dnl  Accelerated modules
@@ -154,9 +154,9 @@ AC_TRY_COMPILE([void quux(){void *p;asm("packuswb %%mm1,%%mm2"::"r"(p));}],,
   ACCEL_PLUGINS="${ACCEL_PLUGINS} ${MMX_PLUGINS}"
   AC_MSG_RESULT(yes), AC_MSG_RESULT(no))
 
-AC_MSG_CHECKING([if \$CC groks MMX EXT (SSE) inline assembly])
+AC_MSG_CHECKING([if \$CC groks MMX EXT or SSE inline assembly])
 AC_TRY_COMPILE([void quux(){void *p;asm("maskmovq %%mm1,%%mm2"::"r"(p));}],,
-  ACCEL_PLUGINS="${ACCEL_PLUGINS} idctmmxext motionmmxext"
+  ACCEL_PLUGINS="${ACCEL_PLUGINS} idctmmxext motionmmxext imdctsse downmix3dn downmixsse"
   AC_MSG_RESULT(yes), AC_MSG_RESULT(no))
 
 dnl
diff --git a/doc/vlc.1 b/doc/vlc.1
index 494508125f..3565198da8 100644
--- a/doc/vlc.1
+++ b/doc/vlc.1
@@ -52,6 +52,12 @@ Choose stereo or mono audio output.
 .B \-\-spdif
 Activate hardware AC3 pass-through mode.
 .TP
+.B \-\-downmix <module>
+Specify a module for AC3 downmix: "downmix", "downmixsse", for instance.
+.TP
+.B \-\-imdct <module>
+Specify a module for AC3 IMDCT: "imdct", "imdctsse", for instance.
+.TP
 .B \-\-novideo
 Disable video output.
 .TP
@@ -145,11 +151,13 @@ also accepts a lot of parameters to customize its behaviour.
  vlc_channels=<filename>  channels list
 .TP
 .B Audio parameters:
- vlc_aout=<method name>   audio method
- vlc_dsp=<filename>       dsp device path
- vlc_stereo={1|0}         stereo or mono output
- vlc_spdif={1|0}          AC3 pass-through mode
- vlc_audio_rate=<rate>    output rate
+ vlc_aout=<method name>  	audio method
+ vlc_dsp=<filename>      	dsp device path
+ vlc_stereo={1|0}        	stereo or mono output
+ vlc_spdif={1|0}         	AC3 pass-through mode
+ vlc_downmix=<method name>      AC3 downmix method
+ vlc_imdct=<method name>        AC3 IMDCT method
+ vlc_audio_rate=<rate>    	output rate
 .TP
 .B Video parameters:
  vlc_vout=<method name>        display method
@@ -160,6 +168,7 @@ also accepts a lot of parameters to customize its behaviour.
  vlc_grayscale={1|0}           grayscale or color
  vlc_fullscreen={1|0}          full screen
  vlc_overlay={1|0}             overlay
+ vlc_motion=<method name>      motion compensation method
  vlc_idct=<method name>        IDCT method
  vlc_yuv=<method name>         YUV method
  vlc_synchro={I|I+|IP|IP+|IPB} synchro algorithm
diff --git a/include/ac3_downmix.h b/include/ac3_downmix.h
new file mode 100644
index 0000000000..8dede4cfdd
--- /dev/null
+++ b/include/ac3_downmix.h
@@ -0,0 +1,42 @@
+/*****************************************************************************
+ * ac3_downmix.h : AC3 downmix types
+ *****************************************************************************
+ * Copyright (C) 1999, 2000 VideoLAN
+ * $Id: ac3_downmix.h,v 1.3 2001/05/15 16:19:42 sam Exp $
+ *
+ * Authors: Michel Kaempf <maxx@via.ecp.fr>
+ *          Renaud Dartus <reno@videolan.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
+ *****************************************************************************/
+
+typedef struct dm_par_s {
+    float unit;
+    float clev;
+    float slev;
+} dm_par_t;
+
+typedef struct downmix_s {
+    /* Module used and shortcuts */
+    struct module_s * p_module;
+    void (*pf_downmix_3f_2r_to_2ch)(float *, dm_par_t * dm_par);
+    void (*pf_downmix_3f_1r_to_2ch)(float *, dm_par_t * dm_par);
+    void (*pf_downmix_2f_2r_to_2ch)(float *, dm_par_t * dm_par);
+    void (*pf_downmix_2f_1r_to_2ch)(float *, dm_par_t * dm_par);
+    void (*pf_downmix_3f_0r_to_2ch)(float *, dm_par_t * dm_par);
+    void (*pf_stream_sample_2ch_to_s16)(s16 *, float *left, float *right);
+    void (*pf_stream_sample_1ch_to_s16)(s16 *, float *center);
+} downmix_t;
+
diff --git a/include/ac3_imdct.h b/include/ac3_imdct.h
new file mode 100644
index 0000000000..4720653c15
--- /dev/null
+++ b/include/ac3_imdct.h
@@ -0,0 +1,68 @@
+/*****************************************************************************
+ * ac3_imdct.h : AC3 IMDCT types
+ *****************************************************************************
+ * Copyright (C) 1999, 2000 VideoLAN
+ * $Id: ac3_imdct.h,v 1.3 2001/05/15 16:19:42 sam Exp $
+ *
+ * Authors: Michel Kaempf <maxx@via.ecp.fr>
+ *          Renaud Dartus <reno@videolan.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
+ *****************************************************************************/
+
+typedef struct complex_s {
+    float real;
+    float imag;
+} complex_t;
+
+#define N 512
+
+typedef struct imdct_s
+{
+    complex_t buf[N/4];
+
+    /* Delay buffer for time domain interleaving */
+    float delay[6][256];
+    float delay1[6][256];
+
+    /* Twiddle factors for IMDCT */
+    float xcos1[N/4];
+    float xsin1[N/4];
+    float xcos2[N/8];
+    float xsin2[N/8];
+   
+    /* Twiddle factor LUT */
+    complex_t *w[7];
+    complex_t w_1[1];
+    complex_t w_2[2];
+    complex_t w_4[4];
+    complex_t w_8[8];
+    complex_t w_16[16];
+    complex_t w_32[32];
+    complex_t w_64[64];
+
+    float xcos_sin_sse[128 * 4] __attribute__((aligned(16)));
+    
+    /* Module used and shortcuts */
+    struct module_s * p_module;
+    void (*pf_imdct_init) (struct imdct_s *);
+    //void (*pf_fft_64p) (complex_t *a);
+    void (*pf_imdct_256)(struct imdct_s *, float data[], float delay[]);
+    void (*pf_imdct_256_nol)(struct imdct_s *, float data[], float delay[]);
+    void (*pf_imdct_512)(struct imdct_s *, float data[], float delay[]);
+    void (*pf_imdct_512_nol)(struct imdct_s *, float data[], float delay[]);
+
+} imdct_t;
+
diff --git a/include/config.h.in b/include/config.h.in
index 81b73b587b..94c04da15e 100644
--- a/include/config.h.in
+++ b/include/config.h.in
@@ -264,6 +264,12 @@
 #define AOUT_SPDIF_VAR                  "vlc_spdif"
 #define AOUT_SPDIF_DEFAULT              0
 
+/* Environment variable containing the AC3 downmix method */
+#define DOWNMIX_METHOD_VAR              "vlc_downmix"
+
+/* Environment variable containing the AC3 IMDCT method */
+#define IMDCT_METHOD_VAR                "vlc_imdct"
+
 /* Volume */
 #define VOLUME_DEFAULT                  512
 #define VOLUME_STEP                     128
diff --git a/include/modules.h b/include/modules.h
index 212af24828..33c0cbbdc7 100644
--- a/include/modules.h
+++ b/include/modules.h
@@ -2,7 +2,7 @@
  * modules.h : Module management functions.
  *****************************************************************************
  * Copyright (C) 2001 VideoLAN
- * $Id: modules.h,v 1.23 2001/05/06 04:32:02 sam Exp $
+ * $Id: modules.h,v 1.24 2001/05/15 16:19:42 sam Exp $
  *
  * Authors: Samuel Hocevar <sam@zoy.org>
  *
@@ -65,16 +65,19 @@ typedef void *  module_handle_t;
 #define MODULE_CAPABILITY_DECAPS   1 <<  3  /* Decaps */
 #define MODULE_CAPABILITY_ADEC     1 <<  4  /* Audio decoder */
 #define MODULE_CAPABILITY_VDEC     1 <<  5  /* Video decoder */
-#define MODULE_CAPABILITY_MOTION   1 <<  6  /* Video decoder */
+#define MODULE_CAPABILITY_MOTION   1 <<  6  /* Motion compensation */
 #define MODULE_CAPABILITY_IDCT     1 <<  7  /* IDCT transformation */
 #define MODULE_CAPABILITY_AOUT     1 <<  8  /* Audio output */
 #define MODULE_CAPABILITY_VOUT     1 <<  9  /* Video output */
 #define MODULE_CAPABILITY_YUV      1 << 10  /* YUV colorspace conversion */
-#define MODULE_CAPABILITY_AFX      1 << 11  /* Audio effects */
-#define MODULE_CAPABILITY_VFX      1 << 12  /* Video effects */
+#define MODULE_CAPABILITY_IMDCT    1 << 11  /* IMDCT transformation */
+#define MODULE_CAPABILITY_DOWNMIX  1 << 12  /* AC3 downmix */
 
 /* FIXME: kludge */
 struct input_area_s;
+struct imdct_s;
+struct complex_s;
+struct dm_par_s;
 
 /* FIXME: not yet used */
 typedef struct probedata_s
@@ -190,6 +193,35 @@ typedef struct function_list_s
             void ( * pf_end )          ( struct vout_thread_s * );
         } yuv;
 
+        /* IMDCT plugin */
+        struct
+        {
+            void ( * pf_imdct_init )   ( struct imdct_s * );
+            void ( * pf_imdct_256 )    ( struct imdct_s *,
+                                         float data[], float delay[] );
+            void ( * pf_imdct_256_nol )( struct imdct_s *,
+                                         float data[], float delay[] );
+            void ( * pf_imdct_512 )    ( struct imdct_s *,
+                                         float data[], float delay[] );
+            void ( * pf_imdct_512_nol )( struct imdct_s *,
+                                         float data[], float delay[] );
+//            void ( * pf_fft_64p )      ( struct complex_s * );
+
+        } imdct;
+
+        /* AC3 downmix plugin */
+        struct
+        {
+            void ( * pf_downmix_3f_2r_to_2ch ) ( float *, struct dm_par_s * );
+            void ( * pf_downmix_3f_1r_to_2ch ) ( float *, struct dm_par_s * );
+            void ( * pf_downmix_2f_2r_to_2ch ) ( float *, struct dm_par_s * );
+            void ( * pf_downmix_2f_1r_to_2ch ) ( float *, struct dm_par_s * );
+            void ( * pf_downmix_3f_0r_to_2ch ) ( float *, struct dm_par_s * );
+            void ( * pf_stream_sample_2ch_to_s16 ) ( s16 *, float *, float * );
+            void ( * pf_stream_sample_1ch_to_s16 ) ( s16 *, float * );
+
+        } downmix;
+
     } functions;
 
 } function_list_t;
@@ -208,8 +240,8 @@ typedef struct module_functions_s
     function_list_t aout;
     function_list_t vout;
     function_list_t yuv;
-    function_list_t afx;
-    function_list_t vfx;
+    function_list_t imdct;
+    function_list_t downmix;
 
 } module_functions_t;
 
diff --git a/plugins/downmix/.cvsignore b/plugins/downmix/.cvsignore
new file mode 100644
index 0000000000..63e7180a26
--- /dev/null
+++ b/plugins/downmix/.cvsignore
@@ -0,0 +1 @@
+.dep
diff --git a/plugins/downmix/Makefile b/plugins/downmix/Makefile
new file mode 100644
index 0000000000..deb95734a8
--- /dev/null
+++ b/plugins/downmix/Makefile
@@ -0,0 +1,63 @@
+###############################################################################
+# vlc (VideoLAN Client) downmix module makefile
+# (c)2001 VideoLAN
+###############################################################################
+
+#
+# Objects
+#
+
+PLUGIN_DOWNMIX = downmix.o ac3_downmix_c.o
+PLUGIN_DOWNMIXSSE = downmixsse.o ac3_downmix_sse.o
+PLUGIN_DOWNMIX3DN = downmix3dn.o ac3_downmix_3dn.o
+
+BUILTIN_DOWNMIX = $(PLUGIN_DOWNMIX:%.o=BUILTIN_DOWNMIX_%.o)
+BUILTIN_DOWNMIXSSE = $(PLUGIN_DOWNMIXSSE:%.o=BUILTIN_DOWNMIXSSE_%.o)
+BUILTIN_DOWNMIX3DN = $(PLUGIN_DOWNMIX3DN:%.o=BUILTIN_DOWNMIX3DN_%.o)
+
+PLUGIN_C = $(PLUGIN_DOWNMIX) $(PLUGIN_DOWNMIXSSE) $(PLUGIN_DOWNMIX3DN)
+ALL_OBJ = $(PLUGIN_C) $(BUILTIN_DOWNMIX) $(BUILTIN_DOWNMIXSSE) $(BUILTIN_DOWNMIX3DN)
+
+#
+# Virtual targets
+#
+
+include ../../Makefile.modules
+
+$(BUILTIN_DOWNMIX): BUILTIN_DOWNMIX_%.o: .dep/%.d
+$(BUILTIN_DOWNMIX): BUILTIN_DOWNMIX_%.o: %.c
+	$(CC) $(CFLAGS) -DBUILTIN -DMODULE_NAME=downmix -c -o $@ $<
+
+$(BUILTIN_DOWNMIXSSE): BUILTIN_DOWNMIXSSE_%.o: .dep/%.d
+$(BUILTIN_DOWNMIXSSE): BUILTIN_DOWNMIXSSE_%.o: %.c
+	$(CC) $(CFLAGS) -DBUILTIN -DMODULE_NAME=downmixsse -c -o $@ $<
+
+$(BUILTIN_DOWNMIX3DN): BUILTIN_DOWNMIX3DN_%.o: .dep/%.d
+$(BUILTIN_DOWNMIX3DN): BUILTIN_DOWNMIX3DN_%.o: %.c
+	$(CC) $(CFLAGS) -DBUILTIN -DMODULE_NAME=downmix3dn -c -o $@ $<
+
+#
+# Real targets
+#
+
+../../lib/downmix.so: $(PLUGIN_DOWNMIX)
+	$(CC) $(PCFLAGS) -o $@ $^ $(PLCFLAGS) 
+
+../../lib/downmix.a: $(BUILTIN_DOWNMIX)
+	ar r $@ $^
+	$(RANLIB) $@
+
+../../lib/downmixsse.so: $(PLUGIN_DOWNMIXSSE)
+	$(CC) $(PCFLAGS) -o $@ $^ $(PLCFLAGS) 
+
+../../lib/downmixsse.a: $(BUILTIN_DOWNMIXSSE)
+	ar r $@ $^
+	$(RANLIB) $@
+
+../../lib/downmix3dn.so: $(PLUGIN_DOWNMIX3DN)
+	$(CC) $(PCFLAGS) -o $@ $^ $(PLCFLAGS) 
+
+../../lib/downmix3dn.a: $(BUILTIN_DOWNMIX3DN)
+	ar r $@ $^
+	$(RANLIB) $@
+
diff --git a/plugins/downmix/ac3_downmix_3dn.c b/plugins/downmix/ac3_downmix_3dn.c
new file mode 100644
index 0000000000..fa8e7f0f7c
--- /dev/null
+++ b/plugins/downmix/ac3_downmix_3dn.c
@@ -0,0 +1,306 @@
+/*****************************************************************************
+ * ac3_downmix_3dn.c: accelerated 3D Now! ac3 downmix functions
+ *****************************************************************************
+ * Copyright (C) 1999, 2000, 2001 VideoLAN
+ * $Id: ac3_downmix_3dn.c,v 1.1 2001/05/15 16:19:42 sam Exp $
+ *
+ * Authors: Renaud Dartus <reno@videolan.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
+ *****************************************************************************/
+
+#define MODULE_NAME downmix3dn
+#include "modules_inner.h"
+
+/*****************************************************************************
+ * Preamble
+ *****************************************************************************/
+#include "defs.h"
+
+#include "config.h"
+#include "common.h"
+#include "threads.h"
+#include "mtime.h"
+#include "tests.h"
+
+#include "ac3_downmix.h"
+
+void sqrt2_3dn (void)
+{
+    __asm__ (".float 0f0.7071068");
+}
+
+void _M( downmix_3f_2r_to_2ch ) (float * samples, dm_par_t * dm_par)
+{
+    __asm__ __volatile__ (
+    "pushl %%ecx\n"
+    "movl  $128,  %%ecx\n"            /* loop counter */
+
+    "movd    (%%ebx), %%mm5\n"        /* unit */
+    "punpckldq %%mm5, %%mm5\n"        /* unit | unit */
+
+    "movd    4(%%ebx), %%mm6\n"        /* clev */
+    "punpckldq %%mm6, %%mm6\n"        /* clev | clev */
+
+    "movd    8(%%ebx), %%mm7\n"        /* slev */
+    "punpckldq %%mm7, %%mm7\n"        /* slev | slev */
+
+".loop:\n"
+    "movq    (%%eax),     %%mm0\n"   /* left */
+    "movq    2048(%%eax), %%mm1\n"   /* right */
+    "movq   1024(%%eax), %%mm2\n"    /* center */
+    "movq    3072(%%eax), %%mm3\n"    /* leftsur */
+    "movq    4096(%%eax), %%mm4\n"    /* rightsur */
+    "pfmul    %%mm5, %%mm0\n"
+    "pfmul    %%mm5, %%mm1\n"
+    "pfmul    %%mm6, %%mm2\n"
+    "pfadd    %%mm2, %%mm0\n"
+    "pfadd     %%mm2, %%mm1\n"
+    "pfmul  %%mm7, %%mm3\n"
+    "pfmul    %%mm7, %%mm4\n"
+    "pfadd    %%mm3, %%mm0\n"
+    "pfadd    %%mm4, %%mm1\n"
+
+    "movq    %%mm0, (%%eax)\n"
+    "movq    %%mm1, 1024(%%eax)\n"
+
+    "addl    $8, %%eax\n"
+    "decl     %%ecx\n"
+    "jnz    .loop\n"
+    
+    "popl   %%ecx\n"
+    "femms\n"
+    : "=a" (samples)
+    : "a" (samples), "b" (dm_par));
+}
+
+void _M( downmix_2f_2r_to_2ch ) (float *samples, dm_par_t * dm_par)
+{
+    __asm__ __volatile__ (
+    "pushl %%ecx\n"
+    "movl  $128, %%ecx\n"       /* loop counter */
+
+    "movd  (%%ebx), %%mm5\n"    /* unit */
+    "punpckldq %%mm5, %%mm5\n"  /* unit | unit */
+
+    "movd    8(%%ebx), %%mm7\n"    /* slev */
+    "punpckldq %%mm7, %%mm7\n"    /* slev | slev */
+
+".loop3:\n"
+    "movq   (%%eax), %%mm0\n"       /* left */
+    "movq   1024(%%eax), %%mm1\n"   /* right */
+    "movq   2048(%%eax), %%mm3\n"    /* leftsur */
+    "movq   3072(%%eax), %%mm4\n"    /* rightsur */
+    "pfmul    %%mm5, %%mm0\n"
+    "pfmul    %%mm5, %%mm1\n"
+    "pfmul    %%mm7, %%mm3\n"
+    "pfmul    %%mm7, %%mm4\n"
+    "pfadd    %%mm3, %%mm0\n"
+    "pfadd    %%mm4, %%mm1\n"
+
+    "movq    %%mm0, (%%eax)\n"
+    "movq    %%mm1, 1024(%%eax)\n"
+
+    "addl    $8, %%eax\n"
+    "decl     %%ecx\n"
+    "jnz    .loop3\n"
+
+    "popl    %%ecx\n"
+    "femms\n"
+    : "=a" (samples)
+    : "a" (samples), "b" (dm_par));
+}
+
+void _M( downmix_3f_1r_to_2ch ) (float *samples, dm_par_t * dm_par)
+{
+    __asm__ __volatile__ (
+
+    "pushl    %%ecx\n"
+    "movl    $128, %%ecx\n"            /* loop counter */
+
+    "movd    (%%ebx), %%mm5\n"        /* unit */
+    "punpckldq %%mm5, %%mm5\n"        /* unit | unit */
+
+    "movd    4(%%ebx), %%mm6\n"        /* clev */
+    "punpckldq %%mm6, %%mm6\n"        /* clev | clev */
+
+    "movd    8(%%ebx), %%mm7\n"        /* slev */
+    "punpckldq %%mm7, %%mm7\n"      /* slev | slev */
+
+".loop4:\n"
+    "movq    (%%eax), %%mm0\n"       /* left */
+    "movq    2048(%%eax), %%mm1\n"   /* right */
+    "movq    1024(%%eax), %%mm2\n"    /* center */
+    "movq    3072(%%eax), %%mm3\n"    /* sur */
+    "pfmul    %%mm5, %%mm0\n"
+    "pfmul    %%mm5, %%mm1\n"
+    "pfmul    %%mm6, %%mm2\n"
+    "pfadd    %%mm2, %%mm0\n"
+    "pfmul    %%mm7, %%mm3\n"
+    "pfadd     %%mm2, %%mm1\n"
+    "pfsub    %%mm3, %%mm0\n"
+    "pfadd    %%mm3, %%mm1\n"
+
+    "movq    %%mm0, (%%eax)\n"
+    "movq    %%mm1, 1024(%%eax)\n"
+
+    "addl    $8, %%eax\n"
+    "decl     %%ecx\n"
+    "jnz    .loop4\n"
+
+    "popl    %%ecx\n"
+    "femms\n"
+    : "=a" (samples)
+    : "a" (samples), "b" (dm_par));
+}
+
+void _M( downmix_2f_1r_to_2ch ) (float *samples, dm_par_t * dm_par)
+{
+    __asm__ __volatile__ (
+    "pushl    %%ecx\n"
+    "movl    $128, %%ecx\n"            /* loop counter */
+
+    "movd    (%%ebx), %%mm5\n"        /* unit */
+    "punpckldq %%mm5, %%mm5\n"        /* unit | unit */
+
+    "movd    8(%%ebx), %%mm7\n"        /* slev */
+    "punpckldq %%mm7, %%mm7\n"      /* slev | slev */
+
+".loop5:\n"
+    "movq    (%%eax), %%mm0\n"       /* left */
+    "movq    1024(%%eax), %%mm1\n"   /* right */
+    "movq    2048(%%eax), %%mm3\n"    /* sur */
+    "pfmul    %%mm5, %%mm0\n"
+    "pfmul    %%mm5, %%mm1\n"
+    "pfmul    %%mm7, %%mm3\n"
+    "pfsub    %%mm3, %%mm0\n"
+    "pfadd    %%mm3, %%mm1\n"
+
+    "movq    %%mm0, (%%eax)\n"
+    "movq    %%mm1, 1024(%%eax)\n"
+
+    "addl    $8, %%eax\n"
+    "decl     %%ecx\n"
+    "jnz    .loop5\n"
+
+    "popl    %%ecx\n"
+    "femms\n"
+    : "=a" (samples)
+    : "a" (samples), "b" (dm_par));
+}
+
+void _M( downmix_3f_0r_to_2ch ) (float *samples, dm_par_t * dm_par)
+{
+    __asm__ __volatile__ (
+    "pushl    %%ecx\n"
+    "movl    $128, %%ecx\n"            /* loop counter */
+
+    "movd    (%%ebx), %%mm5\n"        /* unit */
+    "punpckldq %%mm5, %%mm5\n"        /* unit | unit */
+
+    "movd    4(%%ebx), %%mm6\n"        /* clev */
+    "punpckldq %%mm6, %%mm6\n"      /* clev | clev */
+
+".loop6:\n"
+    "movq    (%%eax), %%mm0\n"       /*left */
+    "movq    2048(%%eax), %%mm1\n"   /* right */
+    "movq   1024(%%eax), %%mm2\n"   /* center */
+    "pfmul    %%mm5, %%mm0\n"
+    "pfmul    %%mm5, %%mm1\n"
+    "pfmul    %%mm6, %%mm2\n"
+    "pfadd    %%mm2, %%mm0\n"
+    "pfadd     %%mm2, %%mm1\n"
+
+    "movq    %%mm0, (%%eax)\n"
+    "movq    %%mm1, 1024(%%eax)\n"
+
+    "addl    $8, %%eax\n"
+    "decl     %%ecx\n"
+    "jnz    .loop6\n"
+
+    "popl    %%ecx\n"
+    "femms\n"
+    : "=a" (samples)
+    : "a" (samples), "b" (dm_par));
+}
+
+void _M( stream_sample_1ch_to_s16 ) (s16 *s16_samples, float *left)
+{
+    __asm__ __volatile__ (
+    "pushl %%ecx\n"
+    "pushl %%edx\n"
+
+    "movl   $sqrt2_3dn, %%edx\n"
+    "movd  (%%edx), %%mm7\n"
+    "punpckldq %%mm7, %%mm7\n"   /* sqrt2 | sqrt2 */
+    "movl $128, %%ecx\n"
+
+".loop2:\n"
+    "movq (%%ebx), %%mm0\n"        /* c1 | c0 */
+    "pfmul   %%mm7, %%mm0\n"
+
+    "pf2id %%mm0, %%mm0\n"        /* c1 c0 --> mm0, int_32 */
+
+    "packssdw %%mm0, %%mm0\n"        /* c1 c1 c0 c0 --> mm0, int_16 */
+
+    "movq %%mm0, (%%eax)\n"
+    "addl $8, %%eax\n"
+    "addl $8, %%ebx\n"
+
+    "decl %%ecx\n"
+    "jnz .loop2\n"
+
+    "popl %%edx\n"
+    "popl %%ecx\n"
+    "femms\n"
+    : "=a" (s16_samples), "=b" (left)
+    : "a" (s16_samples), "b" (left));
+}
+
+void _M( stream_sample_2ch_to_s16 ) (s16 *s16_samples, float *left, float *right)
+{
+
+    __asm__ __volatile__ (
+    "pushl %%ecx\n"
+    "movl $128, %%ecx\n"
+
+".loop1:\n"
+    "movq  (%%ebx), %%mm0\n"    /* l1 | l0 */
+    "movq  (%%edx), %%mm1\n"    /* r1 | r0 */
+    "movq   %%mm0,  %%mm2\n"    /* l1 | l0 */
+    "punpckldq %%mm1, %%mm0\n"    /* r0 | l0 */
+    "punpckhdq %%mm1, %%mm2\n"    /* r1 | l1 */
+
+    "pf2id    %%mm0, %%mm0\n"    /* r0 l0 --> mm0, int_32 */
+    "pf2id    %%mm2, %%mm2\n"    /* r0 l0 --> mm0, int_32 */
+    
+    "packssdw %%mm2, %%mm0\n"    /* r1 l1 r0 l0 --> mm0, int_16 */
+
+    "movq %%mm0, (%%eax)\n"
+    "movq %%mm2, 8(%%eax)\n"
+    "addl $8, %%eax\n"
+    "addl $8, %%ebx\n"
+    "addl $8, %%edx\n"
+
+    "decl %%ecx\n"
+    "jnz .loop1\n"
+
+    "popl %%ecx\n"
+    "femms\n"
+    : "=a" (s16_samples), "=b" (left), "=d" (right)
+    : "a" (s16_samples), "b" (left), "d" (right));
+    
+}
+
diff --git a/src/ac3_decoder/ac3_downmix_c.c b/plugins/downmix/ac3_downmix_c.c
similarity index 82%
rename from src/ac3_decoder/ac3_downmix_c.c
rename to plugins/downmix/ac3_downmix_c.c
index 08573af563..759933d6a7 100644
--- a/src/ac3_decoder/ac3_downmix_c.c
+++ b/plugins/downmix/ac3_downmix_c.c
@@ -1,8 +1,8 @@
 /*****************************************************************************
- * ac3_downmix_c.c: ac3 downmix functions
+ * ac3_downmix_c.c: ac3 downmix functions in C
  *****************************************************************************
  * Copyright (C) 1999, 2000, 2001 VideoLAN
- * $Id: ac3_downmix_c.c,v 1.8 2001/05/14 15:58:04 reno Exp $
+ * $Id: ac3_downmix_c.c,v 1.1 2001/05/15 16:19:42 sam Exp $
  *
  * Authors: Renaud Dartus <reno@videolan.org>
  *          Aaron Holtzman <aholtzma@engr.uvic.ca>
@@ -22,6 +22,12 @@
  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
  *****************************************************************************/
 
+#define MODULE_NAME downmix
+#include "modules_inner.h"
+
+/*****************************************************************************
+ * Preamble
+ *****************************************************************************/
 #include "defs.h"
 
 #include <string.h>                                              /* memcpy() */
@@ -31,12 +37,9 @@
 #include "threads.h"
 #include "mtime.h"
 
-#include "stream_control.h"
-#include "input_ext-dec.h"
+#include "ac3_downmix.h"
 
-#include "ac3_decoder.h"
-
-void downmix_3f_2r_to_2ch_c (float *samples, dm_par_t *dm_par)
+void _M( downmix_3f_2r_to_2ch ) (float *samples, dm_par_t *dm_par)
 {
     int i;
     float *left, *right, *center, *left_sur, *right_sur;
@@ -56,7 +59,7 @@ void downmix_3f_2r_to_2ch_c (float *samples, dm_par_t *dm_par)
     }
 }
 
-void downmix_2f_2r_to_2ch_c (float *samples, dm_par_t *dm_par)
+void _M( downmix_2f_2r_to_2ch ) (float *samples, dm_par_t *dm_par)
 {
     int i;
     float *left, *right, *left_sur, *right_sur;
@@ -75,7 +78,7 @@ void downmix_2f_2r_to_2ch_c (float *samples, dm_par_t *dm_par)
     }
 }
 
-void downmix_3f_1r_to_2ch_c (float *samples, dm_par_t *dm_par)
+void _M( downmix_3f_1r_to_2ch ) (float *samples, dm_par_t *dm_par)
 {
     int i;
     float *left, *right, *center, *right_sur;
@@ -95,7 +98,7 @@ void downmix_3f_1r_to_2ch_c (float *samples, dm_par_t *dm_par)
 }
 
 
-void downmix_2f_1r_to_2ch_c (float *samples, dm_par_t *dm_par)
+void _M( downmix_2f_1r_to_2ch ) (float *samples, dm_par_t *dm_par)
 {
     int i;
     float *left, *right, *right_sur;
@@ -114,7 +117,7 @@ void downmix_2f_1r_to_2ch_c (float *samples, dm_par_t *dm_par)
 }
 
 
-void downmix_3f_0r_to_2ch_c (float *samples, dm_par_t *dm_par)
+void _M( downmix_3f_0r_to_2ch ) (float *samples, dm_par_t *dm_par)
 {
     int i;
     float *left, *right, *center;
@@ -133,7 +136,7 @@ void downmix_3f_0r_to_2ch_c (float *samples, dm_par_t *dm_par)
 }
 
 
-void stream_sample_2ch_to_s16_c (s16 *out_buf, float *left, float *right)
+void _M( stream_sample_2ch_to_s16 ) (s16 *out_buf, float *left, float *right)
 {
     int i;
     for (i=0; i < 256; i++) {
@@ -143,7 +146,7 @@ void stream_sample_2ch_to_s16_c (s16 *out_buf, float *left, float *right)
 }
 
 
-void stream_sample_1ch_to_s16_c (s16 *out_buf, float *center)
+void _M( stream_sample_1ch_to_s16 ) (s16 *out_buf, float *center)
 {
     int i;
     float tmp;
@@ -153,3 +156,4 @@ void stream_sample_1ch_to_s16_c (s16 *out_buf, float *center)
         *out_buf++ = tmp;
     }
 }
+
diff --git a/plugins/downmix/ac3_downmix_common.h b/plugins/downmix/ac3_downmix_common.h
new file mode 100644
index 0000000000..305cdb462f
--- /dev/null
+++ b/plugins/downmix/ac3_downmix_common.h
@@ -0,0 +1,32 @@
+/*****************************************************************************
+ * ac3_downmix_common.h: ac3 downmix functions headers
+ *****************************************************************************
+ * Copyright (C) 1999, 2000, 2001 VideoLAN
+ * $Id: ac3_downmix_common.h,v 1.1 2001/05/15 16:19:42 sam Exp $
+ *
+ * Authors: Renaud Dartus <reno@videolan.org>
+ *          Aaron Holtzman <aholtzma@engr.uvic.ca>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
+ *****************************************************************************/
+
+void _M( downmix_3f_2r_to_2ch )     ( float *, dm_par_t * );
+void _M( downmix_2f_2r_to_2ch )     ( float *, dm_par_t * );
+void _M( downmix_3f_1r_to_2ch )     ( float *, dm_par_t * );
+void _M( downmix_2f_1r_to_2ch )     ( float *, dm_par_t * );
+void _M( downmix_3f_0r_to_2ch )     ( float *, dm_par_t * );
+void _M( stream_sample_2ch_to_s16 ) ( s16 *, float *, float * );
+void _M( stream_sample_1ch_to_s16 ) ( s16 *, float * );
+
diff --git a/plugins/downmix/ac3_downmix_sse.c b/plugins/downmix/ac3_downmix_sse.c
new file mode 100644
index 0000000000..ce7ebc653b
--- /dev/null
+++ b/plugins/downmix/ac3_downmix_sse.c
@@ -0,0 +1,315 @@
+/*****************************************************************************
+ * ac3_downmix_sse.c: accelerated SSE ac3 downmix functions
+ *****************************************************************************
+ * Copyright (C) 1999, 2000, 2001 VideoLAN
+ * $Id: ac3_downmix_sse.c,v 1.1 2001/05/15 16:19:42 sam Exp $
+ *
+ * Authors: Renaud Dartus <reno@videolan.org>
+ *          Aaron Holtzman <aholtzma@engr.uvic.ca>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
+ *****************************************************************************/
+
+#define MODULE_NAME downmixsse
+#include "modules_inner.h"
+
+/*****************************************************************************
+ * Preamble
+ *****************************************************************************/
+#include "defs.h"
+
+#include "config.h"
+#include "common.h"
+#include "threads.h"
+#include "mtime.h"
+#include "tests.h"
+
+#include "ac3_downmix.h"
+
+void sqrt2_sse (void)
+{
+    __asm__ (".float 0f0.7071068");
+}
+
+void _M( downmix_3f_2r_to_2ch ) (float * samples, dm_par_t * dm_par)
+{
+    __asm__ __volatile__ (
+    "pushl %%ecx\n"
+    "movl  $64,  %%ecx\n"            /* loop counter */
+
+    "movss    (%%ebx), %%xmm5\n"        /* unit */
+    "shufps    $0, %%xmm5, %%xmm5\n"    /* unit | unit | unit | unit */
+
+    "movss    4(%%ebx), %%xmm6\n"        /* clev */
+    "shufps    $0, %%xmm6, %%xmm6\n"    /* clev | clev | clev | clev */
+
+    "movss    8(%%ebx), %%xmm7\n"        /* slev */
+    "shufps    $0, %%xmm7, %%xmm7\n"    /* slev | slev | slev | slev */
+
+".loop:\n"
+    "movups    (%%eax),     %%xmm0\n"  /* left */
+    "movups    2048(%%eax), %%xmm1\n"  /* right */
+    "movups 1024(%%eax), %%xmm2\n"    /* center */
+    "movups    3072(%%eax), %%xmm3\n"    /* leftsur */
+    "movups    4096(%%eax), %%xmm4\n"    /* rithgsur */
+    "mulps    %%xmm5, %%xmm0\n"
+    "mulps    %%xmm5, %%xmm1\n"
+    "mulps    %%xmm6, %%xmm2\n"
+    "addps    %%xmm2, %%xmm0\n"
+    "addps     %%xmm2, %%xmm1\n"
+    "mulps    %%xmm7, %%xmm3\n"
+    "mulps    %%xmm7, %%xmm4\n"
+    "addps    %%xmm3, %%xmm0\n"
+    "addps    %%xmm4, %%xmm1\n"
+
+    "movups    %%xmm0, (%%eax)\n"
+    "movups    %%xmm1, 1024(%%eax)\n"
+
+    "addl    $16, %%eax\n"
+    "decl     %%ecx\n"
+    "jnz    .loop\n"
+    
+    "popl   %%ecx\n"
+    : "=a" (samples)
+    : "a" (samples), "b" (dm_par));
+}
+
+void _M( downmix_2f_2r_to_2ch ) (float *samples, dm_par_t * dm_par)
+{
+    __asm__ __volatile__ (
+    "pushl %%ecx\n"
+    "movl  $64, %%ecx\n"            /* loop counter */
+
+    "movss  (%%ebx), %%xmm5\n"        /* unit */
+    "shufps $0, %%xmm5, %%xmm5\n"   /* unit | unit | unit | unit */
+
+    "movss    8(%%ebx), %%xmm7\n"        /* slev */
+    "shufps    $0, %%xmm7, %%xmm7\n"    /* slev | slev | slev | slev */
+
+".loop3:\n"
+    "movups    (%%eax), %%xmm0\n"      /* left */
+    "movups    1024(%%eax), %%xmm1\n"  /* right */
+    "movups 2048(%%eax), %%xmm3\n"    /* leftsur */
+    "movups    3072(%%eax), %%xmm4\n"    /* rightsur */
+    "mulps    %%xmm5, %%xmm0\n"
+    "mulps    %%xmm5, %%xmm1\n"
+    "mulps    %%xmm7, %%xmm3\n"
+    "mulps    %%xmm7, %%xmm4\n"
+    "addps    %%xmm3, %%xmm0\n"
+    "addps    %%xmm4, %%xmm1\n"
+
+    "movups    %%xmm0, (%%eax)\n"
+    "movups    %%xmm1, 1024(%%eax)\n"
+
+    "addl    $16, %%eax\n"
+    "decl     %%ecx\n"
+    "jnz    .loop3\n"
+
+    "popl    %%ecx\n"
+    : "=a" (samples)
+    : "a" (samples), "b" (dm_par));
+}
+
+void _M( downmix_3f_1r_to_2ch ) (float *samples, dm_par_t * dm_par)
+{
+    __asm__ __volatile__ (
+
+    "pushl    %%ecx\n"
+    "movl    $64, %%ecx\n"            /* loop counter */
+
+    "movss    (%%ebx), %%xmm5\n"        /* unit */
+    "shufps    $0, %%xmm5, %%xmm5\n"    /* unit | unit | unit | unit */
+
+    "movss    4(%%ebx), %%xmm6\n"        /* clev */
+    "shufps    $0, %%xmm6, %%xmm6\n"    /* clev | clev | clev | clev */
+
+    "movss    8(%%ebx), %%xmm7\n"        /* slev */
+    "shufps    $0, %%xmm7, %%xmm7\n"    /* slev | slev | slev | slev */
+
+".loop4:\n"
+    "movups    (%%eax), %%xmm0\n"      /* left */
+    "movups    2048(%%eax), %%xmm1\n"  /* right */
+    "movups    1024(%%eax), %%xmm2\n"    /* center */
+    "movups    3072(%%eax), %%xmm3\n"    /* sur */
+    "mulps    %%xmm5, %%xmm0\n"
+    "mulps    %%xmm5, %%xmm1\n"
+    "mulps    %%xmm6, %%xmm2\n"
+    "addps    %%xmm2, %%xmm0\n"
+    "mulps    %%xmm7, %%xmm3\n"
+    "addps     %%xmm2, %%xmm1\n"
+    "subps    %%xmm3, %%xmm0\n"
+    "addps    %%xmm3, %%xmm1\n"
+
+    "movups    %%xmm0, (%%eax)\n"
+    "movups    %%xmm1, 1024(%%eax)\n"
+
+    "addl    $16, %%eax\n"
+    "decl     %%ecx\n"
+    "jnz    .loop4\n"
+
+    "popl    %%ecx\n"
+    : "=a" (samples)
+    : "a" (samples), "b" (dm_par));
+
+}
+
+void _M( downmix_2f_1r_to_2ch ) (float *samples, dm_par_t * dm_par)
+{
+    __asm__ __volatile__ (
+    "pushl    %%ecx\n"
+    "movl    $64, %%ecx\n"            /* loop counter */
+
+    "movss    (%%ebx), %%xmm5\n"        /* unit */
+    "shufps    $0, %%xmm5, %%xmm5\n"    /* unit | unit | unit | unit */
+
+    "movss    8(%%ebx), %%xmm7\n"        /* slev */
+    "shufps    $0, %%xmm7, %%xmm7\n"    /* slev | slev | slev | slev */
+
+".loop5:\n"
+    "movups    (%%eax), %%xmm0\n"      /* left */
+    "movups    1024(%%eax), %%xmm1\n"  /* right */
+    "movups    2048(%%eax), %%xmm3\n"    /* sur */
+    "mulps    %%xmm5, %%xmm0\n"
+    "mulps    %%xmm5, %%xmm1\n"
+    "mulps    %%xmm7, %%xmm3\n"
+    "subps    %%xmm3, %%xmm0\n"
+    "addps    %%xmm3, %%xmm1\n"
+
+    "movups    %%xmm0, (%%eax)\n"
+    "movups    %%xmm1, 1024(%%eax)\n"
+
+    "addl    $16, %%eax\n"
+    "decl     %%ecx\n"
+    "jnz    .loop5\n"
+
+    "popl    %%ecx\n"
+    : "=a" (samples)
+    : "a" (samples), "b" (dm_par));
+
+
+}
+
+void _M( downmix_3f_0r_to_2ch ) (float *samples, dm_par_t * dm_par)
+{
+    __asm__ __volatile__ (
+    "pushl    %%ecx\n"
+    "movl    $64, %%ecx\n"            /* loop counter */
+
+    "movss    (%%ebx), %%xmm5\n"        /* unit */
+    "shufps    $0, %%xmm5, %%xmm5\n"    /* unit | unit | unit | unit */
+
+    "movss    4(%%ebx), %%xmm6\n"        /* clev */
+    "shufps    $0, %%xmm6, %%xmm6\n"    /* clev | clev | clev | clev */
+
+".loop6:\n"
+    "movups    (%%eax), %%xmm0\n"      /*left */
+    "movups    2048(%%eax), %%xmm1\n"  /* right */
+    "movups 1024(%%eax), %%xmm2\n"    /* center */
+    "mulps    %%xmm5, %%xmm0\n"
+    "mulps    %%xmm5, %%xmm1\n"
+    "mulps    %%xmm6, %%xmm2\n"
+    "addps    %%xmm2, %%xmm0\n"
+    "addps     %%xmm2, %%xmm1\n"
+
+    "movups    %%xmm0, (%%eax)\n"
+    "movups    %%xmm1, 1024(%%eax)\n"
+
+    "addl    $16, %%eax\n"
+    "decl     %%ecx\n"
+    "jnz    .loop6\n"
+
+    "popl    %%ecx\n"
+    : "=a" (samples)
+    : "a" (samples), "b" (dm_par));
+}
+    
+void _M( stream_sample_1ch_to_s16 ) (s16 *s16_samples, float *left)
+{
+    __asm__ __volatile__ (
+    "pushl %%ecx\n"
+    "pushl %%edx\n"
+
+    "movl   $sqrt2_sse, %%edx\n"
+    "movss (%%edx), %%xmm7\n"
+    "shufps $0, %%xmm7, %%xmm7\n"   /* sqrt2 | sqrt2 | sqrt2 | sqrt2 */
+    "movl $64, %%ecx\n"
+
+".loop2:\n"
+    "movups (%%ebx), %%xmm0\n"        /* c3 | c2 | c1 | c0 */
+    "mulps   %%xmm7, %%xmm0\n"
+    "movhlps %%xmm0, %%xmm2\n"        /* c3 | c2 */
+
+    "cvtps2pi %%xmm0, %%mm0\n"        /* c1 c0 --> mm0, int_32 */
+    "cvtps2pi %%xmm2, %%mm1\n"        /* c3 c2 --> mm1, int_32 */
+
+    "packssdw %%mm0, %%mm0\n"        /* c1 c1 c0 c0 --> mm0, int_16 */
+    "packssdw %%mm1, %%mm1\n"        /* c3 c3 c2 c2 --> mm1, int_16 */
+
+    "movq %%mm0, (%%eax)\n"
+    "movq %%mm1, 8(%%eax)\n"
+    "addl $16, %%eax\n"
+    "addl $16, %%ebx\n"
+
+    "decl %%ecx\n"
+    "jnz .loop2\n"
+
+    "popl %%edx\n"
+    "popl %%ecx\n"
+    "emms\n"
+    : "=a" (s16_samples), "=b" (left)
+    : "a" (s16_samples), "b" (left));
+}
+
+void _M( stream_sample_2ch_to_s16 ) (s16 *s16_samples, float *left, float *right)
+{
+
+    __asm__ __volatile__ (
+    "pushl %%ecx\n"
+    "movl $64, %%ecx\n"
+
+".loop1:\n"
+    "movups  (%%ebx), %%xmm0\n"    /* l3 | l2 | l1 | l0 */
+    "movups  (%%edx), %%xmm1\n"    /* r3 | r2 | r1 | r0 */
+    "movhlps  %%xmm0, %%xmm2\n"    /* l3 | l2 */
+    "movhlps  %%xmm1, %%xmm3\n"    /* r3 | r2 */
+    "unpcklps %%xmm1, %%xmm0\n"    /* r1 | l1 | r0 | l0 */
+    "unpcklps %%xmm3, %%xmm2\n"    /* r3 | l3 | r2 | l2 */
+
+    "cvtps2pi %%xmm0, %%mm0\n"    /* r0 l0 --> mm0, int_32 */
+    "movhlps  %%xmm0, %%xmm0\n"
+    "cvtps2pi %%xmm0, %%mm1\n"    /* r1 l1 --> mm1, int_32 */
+    "cvtps2pi %%xmm2, %%mm2\n"    /* r2 l2 --> mm2, int_32 */
+    "movhlps  %%xmm2, %%xmm2\n"
+    "cvtps2pi %%xmm2, %%mm3\n"    /* r3 l3 --> mm3, int_32 */
+    
+    "packssdw %%mm1, %%mm0\n"    /* r1 l1 r0 l0 --> mm0, int_16 */
+    "packssdw %%mm3, %%mm2\n"    /* r3 l3 r2 l2 --> mm2, int_16 */
+
+    "movq %%mm0, (%%eax)\n"
+    "movq %%mm2, 8(%%eax)\n"
+    "addl $16, %%eax\n"
+    "addl $16, %%ebx\n"
+    "addl $16, %%edx\n"
+
+    "decl %%ecx\n"
+    "jnz .loop1\n"
+
+    "popl %%ecx\n"
+    "emms\n"
+    : "=a" (s16_samples), "=b" (left), "=d" (right)
+    : "a" (s16_samples), "b" (left), "d" (right));
+    
+}
+
diff --git a/plugins/downmix/downmix.c b/plugins/downmix/downmix.c
new file mode 100644
index 0000000000..675424b887
--- /dev/null
+++ b/plugins/downmix/downmix.c
@@ -0,0 +1,149 @@
+/*****************************************************************************
+ * downmix.c : AC3 downmix module
+ *****************************************************************************
+ * Copyright (C) 1999, 2000 VideoLAN
+ * $Id: downmix.c,v 1.1 2001/05/15 16:19:42 sam Exp $
+ *
+ * Authors: Gaël Hendryckx <jimmy@via.ecp.fr>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
+ *****************************************************************************/
+
+#define MODULE_NAME downmix
+#include "modules_inner.h"
+
+/*****************************************************************************
+ * Preamble
+ *****************************************************************************/
+#include "defs.h"
+
+#include <stdlib.h>
+
+#include "config.h"
+#include "common.h"
+#include "threads.h"
+#include "mtime.h"
+#include "tests.h"
+
+#include "ac3_downmix.h"
+#include "ac3_downmix_common.h"
+
+#include "modules.h"
+
+/*****************************************************************************
+ * Local and extern prototypes.
+ *****************************************************************************/
+static void downmix_getfunctions( function_list_t * p_function_list );
+static int  downmix_Probe       ( probedata_t *p_data );
+
+/*****************************************************************************
+ * Build configuration tree.
+ *****************************************************************************/
+MODULE_CONFIG_START
+ADD_WINDOW( "Configuration for AC3 downmix module" )
+    ADD_COMMENT( "Ha, ha -- nothing to configure yet" )
+MODULE_CONFIG_END
+
+/*****************************************************************************
+ * InitModule: get the module structure and configuration.
+ *****************************************************************************
+ * We have to fill psz_name, psz_longname and psz_version. These variables
+ * will be strdup()ed later by the main application because the module can
+ * be unloaded later to save memory, and we want to be able to access this
+ * data even after the module has been unloaded.
+ *****************************************************************************/
+MODULE_INIT
+{
+    p_module->psz_name = MODULE_STRING;
+    p_module->psz_longname = "AC3 downmix module";
+    p_module->psz_version = VERSION;
+
+    p_module->i_capabilities = MODULE_CAPABILITY_NULL
+                                | MODULE_CAPABILITY_DOWNMIX;
+
+    return( 0 );
+}
+
+/*****************************************************************************
+ * ActivateModule: set the module to an usable state.
+ *****************************************************************************
+ * This function fills the capability functions and the configuration
+ * structure. Once ActivateModule() has been called, the i_usage can
+ * be set to 0 and calls to NeedModule() be made to increment it. To unload
+ * the module, one has to wait until i_usage == 0 and call DeactivateModule().
+ *****************************************************************************/
+MODULE_ACTIVATE
+{
+    p_module->p_functions = malloc( sizeof( module_functions_t ) );
+    if( p_module->p_functions == NULL )
+    {
+        return( -1 );
+    }
+
+    downmix_getfunctions( &p_module->p_functions->downmix );
+
+    p_module->p_config = p_config;
+
+    return( 0 );
+}
+
+/*****************************************************************************
+ * DeactivateModule: make sure the module can be unloaded.
+ *****************************************************************************
+ * This function must only be called when i_usage == 0. If it successfully
+ * returns, i_usage can be set to -1 and the module unloaded. Be careful to
+ * lock usage_lock during the whole process.
+ *****************************************************************************/
+MODULE_DEACTIVATE
+{
+    free( p_module->p_functions );
+
+    return( 0 );
+}
+
+/* Following functions are local */
+
+/*****************************************************************************
+ * Functions exported as capabilities. They are declared as static so that
+ * we don't pollute the namespace too much.
+ *****************************************************************************/
+static void downmix_getfunctions( function_list_t * p_function_list )
+{
+    p_function_list->pf_probe = downmix_Probe;
+#define F p_function_list->functions.downmix
+    F.pf_downmix_3f_2r_to_2ch = _M( downmix_3f_2r_to_2ch );
+    F.pf_downmix_3f_1r_to_2ch = _M( downmix_3f_1r_to_2ch );
+    F.pf_downmix_2f_2r_to_2ch = _M( downmix_2f_2r_to_2ch );
+    F.pf_downmix_2f_1r_to_2ch = _M( downmix_2f_1r_to_2ch );
+    F.pf_downmix_3f_0r_to_2ch = _M( downmix_3f_0r_to_2ch );
+    F.pf_stream_sample_2ch_to_s16 = _M( stream_sample_2ch_to_s16 );
+    F.pf_stream_sample_1ch_to_s16 = _M( stream_sample_1ch_to_s16 );
+#undef F
+}
+
+/*****************************************************************************
+ * downmix_Probe: returns a preference score
+ *****************************************************************************/
+static int downmix_Probe( probedata_t *p_data )
+{
+    if( TestMethod( DOWNMIX_METHOD_VAR, "downmix" ) )
+    {
+        return( 999 );
+    }
+
+    /* This plugin always works */
+    return( 50 );
+}
+
diff --git a/plugins/downmix/downmix3dn.c b/plugins/downmix/downmix3dn.c
new file mode 100644
index 0000000000..f05a8a78dc
--- /dev/null
+++ b/plugins/downmix/downmix3dn.c
@@ -0,0 +1,154 @@
+/*****************************************************************************
+ * downmix3dn.c : accelerated 3D Now! AC3 downmix module
+ *****************************************************************************
+ * Copyright (C) 1999, 2000 VideoLAN
+ * $Id: downmix3dn.c,v 1.1 2001/05/15 16:19:42 sam Exp $
+ *
+ * Authors: Gaël Hendryckx <jimmy@via.ecp.fr>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
+ *****************************************************************************/
+
+#define MODULE_NAME downmix3dn
+#include "modules_inner.h"
+
+/*****************************************************************************
+ * Preamble
+ *****************************************************************************/
+#include "defs.h"
+
+#include <stdlib.h>
+
+#include "config.h"
+#include "common.h"
+#include "threads.h"
+#include "mtime.h"
+#include "tests.h"
+
+#include "ac3_downmix.h"
+#include "ac3_downmix_common.h"
+
+#include "modules.h"
+
+/*****************************************************************************
+ * Local and extern prototypes.
+ *****************************************************************************/
+static void downmix_getfunctions( function_list_t * p_function_list );
+static int  downmix_Probe       ( probedata_t *p_data );
+
+/*****************************************************************************
+ * Build configuration tree.
+ *****************************************************************************/
+MODULE_CONFIG_START
+ADD_WINDOW( "Configuration for AC3 downmix3dn module" )
+    ADD_COMMENT( "Ha, ha -- nothing to configure yet" )
+MODULE_CONFIG_END
+
+/*****************************************************************************
+ * InitModule: get the module structure and configuration.
+ *****************************************************************************
+ * We have to fill psz_name, psz_longname and psz_version. These variables
+ * will be strdup()ed later by the main application because the module can
+ * be unloaded later to save memory, and we want to be able to access this
+ * data even after the module has been unloaded.
+ *****************************************************************************/
+MODULE_INIT
+{
+    p_module->psz_name = MODULE_STRING;
+    p_module->psz_longname = "3D Now! AC3 downmix module";
+    p_module->psz_version = VERSION;
+
+    p_module->i_capabilities = MODULE_CAPABILITY_NULL
+                                | MODULE_CAPABILITY_DOWNMIX;
+
+    return( 0 );
+}
+
+/*****************************************************************************
+ * ActivateModule: set the module to an usable state.
+ *****************************************************************************
+ * This function fills the capability functions and the configuration
+ * structure. Once ActivateModule() has been called, the i_usage can
+ * be set to 0 and calls to NeedModule() be made to increment it. To unload
+ * the module, one has to wait until i_usage == 0 and call DeactivateModule().
+ *****************************************************************************/
+MODULE_ACTIVATE
+{
+    p_module->p_functions = malloc( sizeof( module_functions_t ) );
+    if( p_module->p_functions == NULL )
+    {
+        return( -1 );
+    }
+
+    downmix_getfunctions( &p_module->p_functions->downmix );
+
+    p_module->p_config = p_config;
+
+    return( 0 );
+}
+
+/*****************************************************************************
+ * DeactivateModule: make sure the module can be unloaded.
+ *****************************************************************************
+ * This function must only be called when i_usage == 0. If it successfully
+ * returns, i_usage can be set to -1 and the module unloaded. Be careful to
+ * lock usage_lock during the whole process.
+ *****************************************************************************/
+MODULE_DEACTIVATE
+{
+    free( p_module->p_functions );
+
+    return( 0 );
+}
+
+/* Following functions are local */
+
+/*****************************************************************************
+ * Functions exported as capabilities. They are declared as static so that
+ * we don't pollute the namespace too much.
+ *****************************************************************************/
+static void downmix_getfunctions( function_list_t * p_function_list )
+{
+    p_function_list->pf_probe = downmix_Probe;
+#define F p_function_list->functions.downmix
+    F.pf_downmix_3f_2r_to_2ch = _M( downmix_3f_2r_to_2ch );
+    F.pf_downmix_3f_1r_to_2ch = _M( downmix_3f_1r_to_2ch );
+    F.pf_downmix_2f_2r_to_2ch = _M( downmix_2f_2r_to_2ch );
+    F.pf_downmix_2f_1r_to_2ch = _M( downmix_2f_1r_to_2ch );
+    F.pf_downmix_3f_0r_to_2ch = _M( downmix_3f_0r_to_2ch );
+    F.pf_stream_sample_2ch_to_s16 = _M( stream_sample_2ch_to_s16 );
+    F.pf_stream_sample_1ch_to_s16 = _M( stream_sample_1ch_to_s16 );
+#undef F
+}
+
+/*****************************************************************************
+ * downmix_Probe: returns a preference score
+ *****************************************************************************/
+static int downmix_Probe( probedata_t *p_data )
+{
+    if( !TestCPU( CPU_CAPABILITY_3DNOW ) )
+    {
+        return( 0 );
+    }
+
+    if( TestMethod( DOWNMIX_METHOD_VAR, "downmix3dn" ) )
+    {
+        return( 999 );
+    }
+
+    /* This plugin always works */
+    return( 200 );
+}
+
diff --git a/plugins/downmix/downmixsse.c b/plugins/downmix/downmixsse.c
new file mode 100644
index 0000000000..34d2c172b6
--- /dev/null
+++ b/plugins/downmix/downmixsse.c
@@ -0,0 +1,154 @@
+/*****************************************************************************
+ * downmixsse.c : accelerated SSE AC3 downmix module
+ *****************************************************************************
+ * Copyright (C) 1999, 2000 VideoLAN
+ * $Id: downmixsse.c,v 1.1 2001/05/15 16:19:42 sam Exp $
+ *
+ * Authors: Gaël Hendryckx <jimmy@via.ecp.fr>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
+ *****************************************************************************/
+
+#define MODULE_NAME downmixsse
+#include "modules_inner.h"
+
+/*****************************************************************************
+ * Preamble
+ *****************************************************************************/
+#include "defs.h"
+
+#include <stdlib.h>
+
+#include "config.h"
+#include "common.h"
+#include "threads.h"
+#include "mtime.h"
+#include "tests.h"
+
+#include "ac3_downmix.h"
+#include "ac3_downmix_common.h"
+
+#include "modules.h"
+
+/*****************************************************************************
+ * Local and extern prototypes.
+ *****************************************************************************/
+static void downmix_getfunctions( function_list_t * p_function_list );
+static int  downmix_Probe       ( probedata_t *p_data );
+
+/*****************************************************************************
+ * Build configuration tree.
+ *****************************************************************************/
+MODULE_CONFIG_START
+ADD_WINDOW( "Configuration for AC3 downmixsse module" )
+    ADD_COMMENT( "Ha, ha -- nothing to configure yet" )
+MODULE_CONFIG_END
+
+/*****************************************************************************
+ * InitModule: get the module structure and configuration.
+ *****************************************************************************
+ * We have to fill psz_name, psz_longname and psz_version. These variables
+ * will be strdup()ed later by the main application because the module can
+ * be unloaded later to save memory, and we want to be able to access this
+ * data even after the module has been unloaded.
+ *****************************************************************************/
+MODULE_INIT
+{
+    p_module->psz_name = MODULE_STRING;
+    p_module->psz_longname = "SSE AC3 downmix module";
+    p_module->psz_version = VERSION;
+
+    p_module->i_capabilities = MODULE_CAPABILITY_NULL
+                                | MODULE_CAPABILITY_DOWNMIX;
+
+    return( 0 );
+}
+
+/*****************************************************************************
+ * ActivateModule: set the module to an usable state.
+ *****************************************************************************
+ * This function fills the capability functions and the configuration
+ * structure. Once ActivateModule() has been called, the i_usage can
+ * be set to 0 and calls to NeedModule() be made to increment it. To unload
+ * the module, one has to wait until i_usage == 0 and call DeactivateModule().
+ *****************************************************************************/
+MODULE_ACTIVATE
+{
+    p_module->p_functions = malloc( sizeof( module_functions_t ) );
+    if( p_module->p_functions == NULL )
+    {
+        return( -1 );
+    }
+
+    downmix_getfunctions( &p_module->p_functions->downmix );
+
+    p_module->p_config = p_config;
+
+    return( 0 );
+}
+
+/*****************************************************************************
+ * DeactivateModule: make sure the module can be unloaded.
+ *****************************************************************************
+ * This function must only be called when i_usage == 0. If it successfully
+ * returns, i_usage can be set to -1 and the module unloaded. Be careful to
+ * lock usage_lock during the whole process.
+ *****************************************************************************/
+MODULE_DEACTIVATE
+{
+    free( p_module->p_functions );
+
+    return( 0 );
+}
+
+/* Following functions are local */
+
+/*****************************************************************************
+ * Functions exported as capabilities. They are declared as static so that
+ * we don't pollute the namespace too much.
+ *****************************************************************************/
+static void downmix_getfunctions( function_list_t * p_function_list )
+{
+    p_function_list->pf_probe = downmix_Probe;
+#define F p_function_list->functions.downmix
+    F.pf_downmix_3f_2r_to_2ch = _M( downmix_3f_2r_to_2ch );
+    F.pf_downmix_3f_1r_to_2ch = _M( downmix_3f_1r_to_2ch );
+    F.pf_downmix_2f_2r_to_2ch = _M( downmix_2f_2r_to_2ch );
+    F.pf_downmix_2f_1r_to_2ch = _M( downmix_2f_1r_to_2ch );
+    F.pf_downmix_3f_0r_to_2ch = _M( downmix_3f_0r_to_2ch );
+    F.pf_stream_sample_2ch_to_s16 = _M( stream_sample_2ch_to_s16 );
+    F.pf_stream_sample_1ch_to_s16 = _M( stream_sample_1ch_to_s16 );
+#undef F
+}
+
+/*****************************************************************************
+ * downmix_Probe: returns a preference score
+ *****************************************************************************/
+static int downmix_Probe( probedata_t *p_data )
+{
+    if( !TestCPU( CPU_CAPABILITY_SSE ) )
+    {
+        return( 0 );
+    }
+
+    if( TestMethod( DOWNMIX_METHOD_VAR, "downmixsse" ) )
+    {
+        return( 999 );
+    }
+
+    /* This plugin always works */
+    return( 200 );
+}
+
diff --git a/plugins/idct/idctaltivec.c b/plugins/idct/idctaltivec.c
index d1b6f4ee2d..8a4e2ca8ef 100644
--- a/plugins/idct/idctaltivec.c
+++ b/plugins/idct/idctaltivec.c
@@ -2,7 +2,7 @@
  * idctaltivec.c : Altivec IDCT module
  *****************************************************************************
  * Copyright (C) 1999, 2000 VideoLAN
- * $Id: idctaltivec.c,v 1.5 2001/05/06 04:32:02 sam Exp $
+ * $Id: idctaltivec.c,v 1.6 2001/05/15 16:19:42 sam Exp $
  *
  * Authors: Christophe Massiot <massiot@via.ecp.fr>
  *
@@ -146,22 +146,18 @@ static void idct_getfunctions( function_list_t * p_function_list )
  *****************************************************************************/
 static int idct_Probe( probedata_t *p_data )
 {
-    if( TestCPU( CPU_CAPABILITY_ALTIVEC ) )
+    if( !TestCPU( CPU_CAPABILITY_ALTIVEC ) )
     {
-        if( TestMethod( IDCT_METHOD_VAR, "idctaltivec" ) )
-        {
-            return( 999 );
-        }
-        else
-        {
-            /* The Altivec iDCT is deactivated until it really works */
-            return( 0 /* 200 */ );
-        }
+        return( 0 );
     }
-    else
+
+    if( TestMethod( IDCT_METHOD_VAR, "idctaltivec" ) )
     {
-        return( 0 );
+        return( 999 );
     }
+
+    /* The Altivec iDCT is deactivated until it really works */
+    return( 0 /* 200 */ );
 }
 
 /*****************************************************************************
diff --git a/plugins/idct/idctmmx.c b/plugins/idct/idctmmx.c
index c0b22526c6..acc3702347 100644
--- a/plugins/idct/idctmmx.c
+++ b/plugins/idct/idctmmx.c
@@ -2,7 +2,7 @@
  * idctmmx.c : MMX IDCT module
  *****************************************************************************
  * Copyright (C) 1999, 2000 VideoLAN
- * $Id: idctmmx.c,v 1.10 2001/05/06 04:32:02 sam Exp $
+ * $Id: idctmmx.c,v 1.11 2001/05/15 16:19:42 sam Exp $
  *
  * Authors: Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
  *          Michel Lespinasse <walken@zoy.org>
@@ -151,21 +151,17 @@ static void idct_getfunctions( function_list_t * p_function_list )
  *****************************************************************************/
 static int idct_Probe( probedata_t *p_data )
 {
-    if( TestCPU( CPU_CAPABILITY_MMX ) )
+    if( !TestCPU( CPU_CAPABILITY_MMX ) )
     {
-        if( TestMethod( IDCT_METHOD_VAR, "idctmmx" ) )
-        {
-            return( 999 );
-        }
-        else
-        {
-            return( 150 );
-        }
+        return( 0 );
     }
-    else
+
+    if( TestMethod( IDCT_METHOD_VAR, "idctmmx" ) )
     {
-        return( 0 );
+        return( 999 );
     }
+
+    return( 150 );
 }
 
 /*****************************************************************************
diff --git a/plugins/idct/idctmmxext.c b/plugins/idct/idctmmxext.c
index f8a281d61c..e2c2e6688e 100644
--- a/plugins/idct/idctmmxext.c
+++ b/plugins/idct/idctmmxext.c
@@ -2,7 +2,7 @@
  * idctmmxext.c : MMX EXT IDCT module
  *****************************************************************************
  * Copyright (C) 1999, 2000 VideoLAN
- * $Id: idctmmxext.c,v 1.7 2001/05/06 04:32:02 sam Exp $
+ * $Id: idctmmxext.c,v 1.8 2001/05/15 16:19:42 sam Exp $
  *
  * Authors: Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
  *          Michel Lespinasse <walken@zoy.org>
@@ -151,21 +151,18 @@ static void idct_getfunctions( function_list_t * p_function_list )
  *****************************************************************************/
 static int idct_Probe( probedata_t *p_data )
 {
-    if( TestCPU( CPU_CAPABILITY_MMXEXT ) )
+    if( !TestCPU( CPU_CAPABILITY_MMXEXT ) )
     {
-        if( TestMethod( IDCT_METHOD_VAR, "idctmmxext" ) )
-        {
-            return( 999 );
-        }
-        else
-        {
-            return( 200 );
-        }
+        return( 0 );
     }
-    else
+
+    if( TestMethod( IDCT_METHOD_VAR, "idctmmxext" ) )
     {
-        return( 0 );
+        return( 999 );
     }
+
+    return( 200 );
+
 }
 
 /*****************************************************************************
diff --git a/plugins/imdct/.cvsignore b/plugins/imdct/.cvsignore
new file mode 100644
index 0000000000..63e7180a26
--- /dev/null
+++ b/plugins/imdct/.cvsignore
@@ -0,0 +1 @@
+.dep
diff --git a/plugins/imdct/Makefile b/plugins/imdct/Makefile
new file mode 100644
index 0000000000..330287c9b6
--- /dev/null
+++ b/plugins/imdct/Makefile
@@ -0,0 +1,53 @@
+###############################################################################
+# vlc (VideoLAN Client) imdct module makefile
+# (c)2001 VideoLAN
+###############################################################################
+
+#
+# Objects
+#
+
+PLUGIN_IMDCT = imdct.o ac3_imdct_c.o ac3_srfft_c.o
+PLUGIN_IMDCTSSE = imdctsse.o ac3_imdct_sse.o ac3_srfft_sse.o
+PLUGIN_IMDCTCOMMON = ac3_imdct_common.o
+
+BUILTIN_IMDCT = $(PLUGIN_IMDCT:%.o=BUILTIN_IMDCT_%.o) \
+		$(PLUGIN_IMDCTCOMMON:%.o=BUILTIN_IMDCT_%.o)
+BUILTIN_IMDCTSSE = $(PLUGIN_IMDCTSSE:%.o=BUILTIN_IMDCTSSE_%.o) \
+		$(PLUGIN_IMDCTCOMMON:%.o=BUILTIN_IMDCTSSE_%.o)
+
+PLUGIN_C = $(PLUGIN_IMDCT) $(PLUGIN_IMDCTSSE) $(PLUGIN_IMDCTCOMMON)
+ALL_OBJ = $(PLUGIN_C) $(BUILTIN_IMDCT) $(BUILTIN_IMDCTSSE)
+
+#
+# Virtual targets
+#
+
+include ../../Makefile.modules
+
+$(BUILTIN_IMDCT): BUILTIN_IMDCT_%.o: .dep/%.d
+$(BUILTIN_IMDCT): BUILTIN_IMDCT_%.o: %.c
+	$(CC) $(CFLAGS) -DBUILTIN -DMODULE_NAME=imdct -c -o $@ $<
+
+$(BUILTIN_IMDCTSSE): BUILTIN_IMDCTSSE_%.o: .dep/%.d
+$(BUILTIN_IMDCTSSE): BUILTIN_IMDCTSSE_%.o: %.c
+	$(CC) $(CFLAGS) -DBUILTIN -DMODULE_NAME=imdctsse -c -o $@ $<
+
+#
+# Real targets
+#
+
+../../lib/imdct.so: $(PLUGIN_IMDCT) $(PLUGIN_IMDCTCOMMON)
+	$(CC) $(PCFLAGS) -o $@ $^ $(PLCFLAGS) 
+
+../../lib/imdct.a: $(BUILTIN_IMDCT)
+	ar r $@ $^
+	$(RANLIB) $@
+
+../../lib/imdctsse.so: $(PLUGIN_IMDCTSSE) $(PLUGIN_IMDCTCOMMON)
+	$(CC) $(PCFLAGS) -o $@ $^ $(PLCFLAGS) 
+
+../../lib/imdctsse.a: $(BUILTIN_IMDCTSSE)
+	ar r $@ $^
+	$(RANLIB) $@
+
diff --git a/plugins/imdct/ac3_imdct_c.c b/plugins/imdct/ac3_imdct_c.c
new file mode 100644
index 0000000000..3ebf16c9b3
--- /dev/null
+++ b/plugins/imdct/ac3_imdct_c.c
@@ -0,0 +1,262 @@
+/*****************************************************************************
+ * ac3_imdct_c.c: ac3 DCT in C
+ *****************************************************************************
+ * Copyright (C) 1999, 2000 VideoLAN
+ * $Id: ac3_imdct_c.c,v 1.1 2001/05/15 16:19:42 sam Exp $
+ *
+ * Authors: Renaud Dartus <reno@videolan.org>
+ *          Aaron Holtzman <aholtzma@engr.uvic.ca>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
+ *****************************************************************************/
+
+#define MODULE_NAME imdct
+#include "modules_inner.h"
+
+/*****************************************************************************
+ * Preamble
+ *****************************************************************************/
+#include "defs.h"
+
+#include <string.h>                                              /* memcpy() */
+
+#include <math.h>
+#include <stdio.h>
+
+#include "config.h"
+#include "common.h"
+#include "threads.h"
+#include "mtime.h"
+
+#include "ac3_imdct.h"
+#include "ac3_imdct_common.h"
+
+#ifndef M_PI
+#   define M_PI 3.14159265358979323846
+#endif
+
+void _M( fft_64p )  ( complex_t *x );
+void _M( fft_128p ) ( complex_t *x );
+
+static float window[] = {
+    0.00014, 0.00024, 0.00037, 0.00051, 0.00067, 0.00086, 0.00107, 0.00130,
+    0.00157, 0.00187, 0.00220, 0.00256, 0.00297, 0.00341, 0.00390, 0.00443,
+    0.00501, 0.00564, 0.00632, 0.00706, 0.00785, 0.00871, 0.00962, 0.01061,
+    0.01166, 0.01279, 0.01399, 0.01526, 0.01662, 0.01806, 0.01959, 0.02121,
+    0.02292, 0.02472, 0.02662, 0.02863, 0.03073, 0.03294, 0.03527, 0.03770,
+    0.04025, 0.04292, 0.04571, 0.04862, 0.05165, 0.05481, 0.05810, 0.06153,
+    0.06508, 0.06878, 0.07261, 0.07658, 0.08069, 0.08495, 0.08935, 0.09389,
+    0.09859, 0.10343, 0.10842, 0.11356, 0.11885, 0.12429, 0.12988, 0.13563,
+    0.14152, 0.14757, 0.15376, 0.16011, 0.16661, 0.17325, 0.18005, 0.18699,
+    0.19407, 0.20130, 0.20867, 0.21618, 0.22382, 0.23161, 0.23952, 0.24757,
+    0.25574, 0.26404, 0.27246, 0.28100, 0.28965, 0.29841, 0.30729, 0.31626,
+    0.32533, 0.33450, 0.34376, 0.35311, 0.36253, 0.37204, 0.38161, 0.39126,
+    0.40096, 0.41072, 0.42054, 0.43040, 0.44030, 0.45023, 0.46020, 0.47019,
+    0.48020, 0.49022, 0.50025, 0.51028, 0.52031, 0.53033, 0.54033, 0.55031,
+    0.56026, 0.57019, 0.58007, 0.58991, 0.59970, 0.60944, 0.61912, 0.62873,
+    0.63827, 0.64774, 0.65713, 0.66643, 0.67564, 0.68476, 0.69377, 0.70269,
+    0.71150, 0.72019, 0.72877, 0.73723, 0.74557, 0.75378, 0.76186, 0.76981,
+    0.77762, 0.78530, 0.79283, 0.80022, 0.80747, 0.81457, 0.82151, 0.82831,
+    0.83496, 0.84145, 0.84779, 0.85398, 0.86001, 0.86588, 0.87160, 0.87716,
+    0.88257, 0.88782, 0.89291, 0.89785, 0.90264, 0.90728, 0.91176, 0.91610,
+    0.92028, 0.92432, 0.92822, 0.93197, 0.93558, 0.93906, 0.94240, 0.94560,
+    0.94867, 0.95162, 0.95444, 0.95713, 0.95971, 0.96217, 0.96451, 0.96674,
+    0.96887, 0.97089, 0.97281, 0.97463, 0.97635, 0.97799, 0.97953, 0.98099,
+    0.98236, 0.98366, 0.98488, 0.98602, 0.98710, 0.98811, 0.98905, 0.98994,
+    0.99076, 0.99153, 0.99225, 0.99291, 0.99353, 0.99411, 0.99464, 0.99513,
+    0.99558, 0.99600, 0.99639, 0.99674, 0.99706, 0.99736, 0.99763, 0.99788,
+    0.99811, 0.99831, 0.99850, 0.99867, 0.99882, 0.99895, 0.99908, 0.99919,
+    0.99929, 0.99938, 0.99946, 0.99953, 0.99959, 0.99965, 0.99969, 0.99974,
+    0.99978, 0.99981, 0.99984, 0.99986, 0.99988, 0.99990, 0.99992, 0.99993,
+    0.99994, 0.99995, 0.99996, 0.99997, 0.99998, 0.99998, 0.99998, 0.99999,
+    0.99999, 0.99999, 0.99999, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000,
+    1.00000, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000
+};
+
+static const int pm128[128] =
+{
+    0, 16, 32, 48, 64, 80,  96, 112,  8, 40, 72, 104, 24, 56,  88, 120,
+    4, 20, 36, 52, 68, 84, 100, 116, 12, 28, 44,  60, 76, 92, 108, 124,
+    2, 18, 34, 50, 66, 82,  98, 114, 10, 42, 74, 106, 26, 58,  90, 122,
+    6, 22, 38, 54, 70, 86, 102, 118, 14, 46, 78, 110, 30, 62,  94, 126,
+    1, 17, 33, 49, 65, 81,  97, 113,  9, 41, 73, 105, 25, 57,  89, 121,
+    5, 21, 37, 53, 69, 85, 101, 117, 13, 29, 45,  61, 77, 93, 109, 125,
+    3, 19, 35, 51, 67, 83,  99, 115, 11, 43, 75, 107, 27, 59,  91, 123,
+    7, 23, 39, 55, 71, 87, 103, 119, 15, 31, 47,  63, 79, 95, 111, 127
+}; 
+
+static const int pm64[64] =
+{
+    0,  8, 16, 24, 32, 40, 48, 56,
+    4, 20, 36, 52, 12, 28, 44, 60,
+    2, 10, 18, 26, 34, 42, 50, 58,
+    6, 14, 22, 30, 38, 46, 54, 62,
+    1,  9, 17, 25, 33, 41, 49, 57,
+    5, 21, 37, 53, 13, 29, 45, 61,
+    3, 11, 19, 27, 35, 43, 51, 59,
+    7, 23, 39, 55, 15, 31, 47, 63
+};
+
+void _M( imdct_init ) (imdct_t * p_imdct)
+{
+    int i;
+    float scale = 181.019;
+
+    /* Twiddle factors to turn IFFT into IMDCT */
+    for (i=0; i < 128; i++) {
+        p_imdct->xcos1[i] = cos(2.0f * M_PI * (8*i+1)/(8*N)) * scale; 
+        p_imdct->xsin1[i] = sin(2.0f * M_PI * (8*i+1)/(8*N)) * scale;
+    }
+}
+
+void _M( imdct_do_512 ) (imdct_t * p_imdct, float data[], float delay[])
+{
+    int i, j;
+    float tmp_a_r, tmp_a_i;
+    float *data_ptr;
+    float *delay_ptr;
+    float *window_ptr;
+
+    /* 512 IMDCT with source and dest data in 'data'
+     * Pre IFFT complex multiply plus IFFT complex conjugate */
+
+    for( i=0; i < 128; i++) {
+        j = pm128[i];
+        /* a = (data[256-2*j-1] - data[2*j]) * (xcos1[j] + xsin1[j]);
+         * c = data[2*j] * xcos1[j];
+         * b = data[256-2*j-1] * xsin1[j];
+         * buf1[i].real = a - b + c;
+         * buf1[i].imag = b + c; */
+        p_imdct->buf[i].real = (data[256-2*j-1] * p_imdct->xcos1[j]) - (data[2*j] * p_imdct->xsin1[j]);
+        p_imdct->buf[i].imag = -1.0 * (data[2*j] * p_imdct->xcos1[j] + data[256-2*j-1] * p_imdct->xsin1[j]);
+    }
+
+    _M( fft_128p ) ( &p_imdct->buf[0] );
+
+    /* Post IFFT complex multiply  plus IFFT complex conjugate */
+    for (i=0; i < 128; i++) {
+        tmp_a_r = p_imdct->buf[i].real;
+        tmp_a_i = p_imdct->buf[i].imag;
+        /* a = (tmp_a_r - tmp_a_i) * (xcos1[j] + xsin1[j]);
+         * b = tmp_a_r * xsin1[j];
+         * c = tmp_a_i * xcos1[j];
+         * buf[j].real = a - b + c;
+         * buf[j].imag = b + c; */
+        p_imdct->buf[i].real =(tmp_a_r * p_imdct->xcos1[i])  +  (tmp_a_i  * p_imdct->xsin1[i]);
+        p_imdct->buf[i].imag =(tmp_a_r * p_imdct->xsin1[i])  -  (tmp_a_i  * p_imdct->xcos1[i]);
+    }
+
+    data_ptr = data;
+    delay_ptr = delay;
+    window_ptr = window;
+
+    /* Window and convert to real valued signal */
+    for (i=0; i< 64; i++) {
+        *data_ptr++ = -p_imdct->buf[64+i].imag  * *window_ptr++ + *delay_ptr++;
+        *data_ptr++ = p_imdct->buf[64-i-1].real * *window_ptr++ + *delay_ptr++;
+    }
+
+    for(i=0; i< 64; i++) {
+        *data_ptr++ = -p_imdct->buf[i].real      * *window_ptr++ + *delay_ptr++;
+        *data_ptr++ = p_imdct->buf[128-i-1].imag * *window_ptr++ + *delay_ptr++;
+    }
+
+    /* The trailing edge of the window goes into the delay line */
+    delay_ptr = delay;
+
+    for(i=0; i< 64; i++) {
+        *delay_ptr++ = -p_imdct->buf[64+i].real   * *--window_ptr;
+        *delay_ptr++ =  p_imdct->buf[64-i-1].imag * *--window_ptr;
+    }
+
+    for(i=0; i<64; i++) {
+        *delay_ptr++ =  p_imdct->buf[i].imag       * *--window_ptr;
+        *delay_ptr++ = -p_imdct->buf[128-i-1].real * *--window_ptr;
+    }
+}
+
+
+void _M( imdct_do_512_nol ) (imdct_t * p_imdct, float data[], float delay[])
+{
+    int i, j;
+
+    float tmp_a_i;
+    float tmp_a_r;
+
+    float *data_ptr;
+    float *delay_ptr;
+    float *window_ptr;
+
+    /* 512 IMDCT with source and dest data in 'data'
+     * Pre IFFT complex multiply plus IFFT cmplx conjugate */
+
+    for( i=0; i < 128; i++) {
+        /* z[i] = (X[256-2*i-1] + j * X[2*i]) * (xcos1[i] + j * xsin1[i]) */
+        j = pm128[i];
+        /* a = (data[256-2*j-1] - data[2*j]) * (xcos1[j] + xsin1[j]);
+         * c = data[2*j] * xcos1[j];
+         * b = data[256-2*j-1] * xsin1[j];
+         * buf1[i].real = a - b + c;
+         * buf1[i].imag = b + c; */
+        p_imdct->buf[i].real = (data[256-2*j-1] * p_imdct->xcos1[j]) - (data[2*j] * p_imdct->xsin1[j]);
+        p_imdct->buf[i].imag = -1.0 * (data[2*j] * p_imdct->xcos1[j] + data[256-2*j-1] * p_imdct->xsin1[j]);
+    }
+       
+    _M( fft_128p ) ( &p_imdct->buf[0] );
+
+    /* Post IFFT complex multiply  plus IFFT complex conjugate*/
+    for (i=0; i < 128; i++) {
+        /* y[n] = z[n] * (xcos1[n] + j * xsin1[n]) ;
+         * int j1 = i; */
+        tmp_a_r = p_imdct->buf[i].real;
+        tmp_a_i = p_imdct->buf[i].imag;
+        /* a = (tmp_a_r - tmp_a_i) * (xcos1[j] + xsin1[j]);
+         * b = tmp_a_r * xsin1[j];
+         * c = tmp_a_i * xcos1[j];
+         * buf[j].real = a - b + c;
+         * buf[j].imag = b + c; */
+        p_imdct->buf[i].real =(tmp_a_r * p_imdct->xcos1[i]) + (tmp_a_i  * p_imdct->xsin1[i]);
+        p_imdct->buf[i].imag =(tmp_a_r * p_imdct->xsin1[i]) - (tmp_a_i  * p_imdct->xcos1[i]);
+    }
+       
+    data_ptr = data;
+    delay_ptr = delay;
+    window_ptr = window;
+
+    /* Window and convert to real valued signal, no overlap here*/
+    for (i=0; i< 64; i++) { 
+        *data_ptr++ = -p_imdct->buf[64+i].imag  * *window_ptr++; 
+        *data_ptr++ = p_imdct->buf[64-i-1].real * *window_ptr++; 
+    }
+
+    for(i=0; i< 64; i++) { 
+        *data_ptr++ = -p_imdct->buf[i].real      * *window_ptr++; 
+        *data_ptr++ = p_imdct->buf[128-i-1].imag * *window_ptr++; 
+    }
+       
+    /* The trailing edge of the window goes into the delay line */
+    delay_ptr = delay;
+
+    for(i=0; i< 64; i++) { 
+        *delay_ptr++ = -p_imdct->buf[64+i].real   * *--window_ptr; 
+        *delay_ptr++ =  p_imdct->buf[64-i-1].imag * *--window_ptr; 
+    }
+
+    for(i=0; i<64; i++) {
+        *delay_ptr++ =  p_imdct->buf[i].imag       * *--window_ptr; 
+        *delay_ptr++ = -p_imdct->buf[128-i-1].real * *--window_ptr; 
+    }
+}
+
diff --git a/plugins/imdct/ac3_imdct_common.c b/plugins/imdct/ac3_imdct_common.c
new file mode 100644
index 0000000000..15ff1d9c49
--- /dev/null
+++ b/plugins/imdct/ac3_imdct_common.c
@@ -0,0 +1,267 @@
+/*****************************************************************************
+ * ac3_imdct_common.c: common ac3 DCT functions
+ *****************************************************************************
+ * Copyright (C) 1999, 2000 VideoLAN
+ * $Id: ac3_imdct_common.c,v 1.1 2001/05/15 16:19:42 sam Exp $
+ *
+ * Authors: Renaud Dartus <reno@videolan.org>
+ *          Aaron Holtzman <aholtzma@engr.uvic.ca>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
+ *****************************************************************************/
+
+/* MODULE_NAME defined in Makefile together with -DBUILTIN */
+#ifdef BUILTIN
+#   include "modules_inner.h"
+#else
+#   define _M( foo ) foo
+#endif
+
+/*****************************************************************************
+ * Preamble
+ *****************************************************************************/
+#include "defs.h"
+
+#include <string.h>                                              /* memcpy() */
+
+#include <math.h>
+#include <stdio.h>
+
+#include "config.h"
+#include "common.h"
+#include "threads.h"
+#include "mtime.h"
+
+#include "ac3_imdct.h"
+
+#ifndef M_PI
+#   define M_PI 3.14159265358979323846
+#endif
+
+static float window[] = {
+    0.00014, 0.00024, 0.00037, 0.00051, 0.00067, 0.00086, 0.00107, 0.00130,
+    0.00157, 0.00187, 0.00220, 0.00256, 0.00297, 0.00341, 0.00390, 0.00443,
+    0.00501, 0.00564, 0.00632, 0.00706, 0.00785, 0.00871, 0.00962, 0.01061,
+    0.01166, 0.01279, 0.01399, 0.01526, 0.01662, 0.01806, 0.01959, 0.02121,
+    0.02292, 0.02472, 0.02662, 0.02863, 0.03073, 0.03294, 0.03527, 0.03770,
+    0.04025, 0.04292, 0.04571, 0.04862, 0.05165, 0.05481, 0.05810, 0.06153,
+    0.06508, 0.06878, 0.07261, 0.07658, 0.08069, 0.08495, 0.08935, 0.09389,
+    0.09859, 0.10343, 0.10842, 0.11356, 0.11885, 0.12429, 0.12988, 0.13563,
+    0.14152, 0.14757, 0.15376, 0.16011, 0.16661, 0.17325, 0.18005, 0.18699,
+    0.19407, 0.20130, 0.20867, 0.21618, 0.22382, 0.23161, 0.23952, 0.24757,
+    0.25574, 0.26404, 0.27246, 0.28100, 0.28965, 0.29841, 0.30729, 0.31626,
+    0.32533, 0.33450, 0.34376, 0.35311, 0.36253, 0.37204, 0.38161, 0.39126,
+    0.40096, 0.41072, 0.42054, 0.43040, 0.44030, 0.45023, 0.46020, 0.47019,
+    0.48020, 0.49022, 0.50025, 0.51028, 0.52031, 0.53033, 0.54033, 0.55031,
+    0.56026, 0.57019, 0.58007, 0.58991, 0.59970, 0.60944, 0.61912, 0.62873,
+    0.63827, 0.64774, 0.65713, 0.66643, 0.67564, 0.68476, 0.69377, 0.70269,
+    0.71150, 0.72019, 0.72877, 0.73723, 0.74557, 0.75378, 0.76186, 0.76981,
+    0.77762, 0.78530, 0.79283, 0.80022, 0.80747, 0.81457, 0.82151, 0.82831,
+    0.83496, 0.84145, 0.84779, 0.85398, 0.86001, 0.86588, 0.87160, 0.87716,
+    0.88257, 0.88782, 0.89291, 0.89785, 0.90264, 0.90728, 0.91176, 0.91610,
+    0.92028, 0.92432, 0.92822, 0.93197, 0.93558, 0.93906, 0.94240, 0.94560,
+    0.94867, 0.95162, 0.95444, 0.95713, 0.95971, 0.96217, 0.96451, 0.96674,
+    0.96887, 0.97089, 0.97281, 0.97463, 0.97635, 0.97799, 0.97953, 0.98099,
+    0.98236, 0.98366, 0.98488, 0.98602, 0.98710, 0.98811, 0.98905, 0.98994,
+    0.99076, 0.99153, 0.99225, 0.99291, 0.99353, 0.99411, 0.99464, 0.99513,
+    0.99558, 0.99600, 0.99639, 0.99674, 0.99706, 0.99736, 0.99763, 0.99788,
+    0.99811, 0.99831, 0.99850, 0.99867, 0.99882, 0.99895, 0.99908, 0.99919,
+    0.99929, 0.99938, 0.99946, 0.99953, 0.99959, 0.99965, 0.99969, 0.99974,
+    0.99978, 0.99981, 0.99984, 0.99986, 0.99988, 0.99990, 0.99992, 0.99993,
+    0.99994, 0.99995, 0.99996, 0.99997, 0.99998, 0.99998, 0.99998, 0.99999,
+    0.99999, 0.99999, 0.99999, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000,
+    1.00000, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000
+};
+
+static const int pm128[128] =
+{
+    0, 16, 32, 48, 64, 80,  96, 112,  8, 40, 72, 104, 24, 56,  88, 120,
+    4, 20, 36, 52, 68, 84, 100, 116, 12, 28, 44,  60, 76, 92, 108, 124,
+    2, 18, 34, 50, 66, 82,  98, 114, 10, 42, 74, 106, 26, 58,  90, 122,
+    6, 22, 38, 54, 70, 86, 102, 118, 14, 46, 78, 110, 30, 62,  94, 126,
+    1, 17, 33, 49, 65, 81,  97, 113,  9, 41, 73, 105, 25, 57,  89, 121,
+    5, 21, 37, 53, 69, 85, 101, 117, 13, 29, 45,  61, 77, 93, 109, 125,
+    3, 19, 35, 51, 67, 83,  99, 115, 11, 43, 75, 107, 27, 59,  91, 123,
+    7, 23, 39, 55, 71, 87, 103, 119, 15, 31, 47,  63, 79, 95, 111, 127
+}; 
+
+static const int pm64[64] =
+{
+    0,  8, 16, 24, 32, 40, 48, 56,
+    4, 20, 36, 52, 12, 28, 44, 60,
+    2, 10, 18, 26, 34, 42, 50, 58,
+    6, 14, 22, 30, 38, 46, 54, 62,
+    1,  9, 17, 25, 33, 41, 49, 57,
+    5, 21, 37, 53, 13, 29, 45, 61,
+    3, 11, 19, 27, 35, 43, 51, 59,
+    7, 23, 39, 55, 15, 31, 47, 63
+};
+
+void _M( imdct_do_256 ) (imdct_t * p_imdct, float data[],float delay[])
+{
+    int i, j, k;
+    int p, q;
+
+    float tmp_a_i;
+    float tmp_a_r;
+
+    float *data_ptr;
+    float *delay_ptr;
+    float *window_ptr;
+
+    complex_t *buf1, *buf2;
+
+    buf1 = &p_imdct->buf[0];
+    buf2 = &p_imdct->buf[64];
+
+    /* Pre IFFT complex multiply plus IFFT complex conjugate */
+    for (k=0; k<64; k++) { 
+        /* X1[k] = X[2*k]
+         * X2[k] = X[2*k+1]    */
+
+        j = pm64[k];
+        p = 2 * (128-2*j-1);
+        q = 2 * (2 * j);
+
+        /* Z1[k] = (X1[128-2*k-1] + j * X1[2*k]) * (xcos2[k] + j * xsin2[k]); */
+        buf1[k].real =        data[p] * p_imdct->xcos2[j] - data[q] * p_imdct->xsin2[j];
+        buf1[k].imag = -1.0f*(data[q] * p_imdct->xcos2[j] + data[p] * p_imdct->xsin2[j]);
+        /* Z2[k] = (X2[128-2*k-1] + j * X2[2*k]) * (xcos2[k] + j * xsin2[k]); */
+        buf2[k].real =        data[p + 1] * p_imdct->xcos2[j] - data[q + 1] * p_imdct->xsin2[j];
+        buf2[k].imag = -1.0f*(data[q + 1] * p_imdct->xcos2[j] + data[p + 1] * p_imdct->xsin2[j]);
+    }
+
+    _M( fft_64p ) ( &buf1[0] );
+    _M( fft_64p ) ( &buf2[0] );
+
+    /* Post IFFT complex multiply */
+    for( i=0; i < 64; i++) {
+        tmp_a_r =  buf1[i].real;
+        tmp_a_i = -buf1[i].imag;
+        buf1[i].real = (tmp_a_r * p_imdct->xcos2[i]) - (tmp_a_i * p_imdct->xsin2[i]);
+        buf1[i].imag = (tmp_a_r * p_imdct->xsin2[i]) + (tmp_a_i * p_imdct->xcos2[i]);
+        tmp_a_r =  buf2[i].real;
+        tmp_a_i = -buf2[i].imag;
+        buf2[i].real = (tmp_a_r * p_imdct->xcos2[i]) - (tmp_a_i * p_imdct->xsin2[i]);
+        buf2[i].imag = (tmp_a_r * p_imdct->xsin2[i]) + (tmp_a_i * p_imdct->xcos2[i]);
+    }
+    
+    data_ptr = data;
+    delay_ptr = delay;
+    window_ptr = window;
+
+    /* Window and convert to real valued signal */
+    for(i=0; i< 64; i++) { 
+        *data_ptr++ = -buf1[i].imag     * *window_ptr++ + *delay_ptr++;
+        *data_ptr++ = buf1[64-i-1].real * *window_ptr++ + *delay_ptr++;
+    }
+
+    for(i=0; i< 64; i++) {
+        *data_ptr++ = -buf1[i].real     * *window_ptr++ + *delay_ptr++;
+        *data_ptr++ = buf1[64-i-1].imag * *window_ptr++ + *delay_ptr++;
+    }
+    
+    delay_ptr = delay;
+
+    for(i=0; i< 64; i++) {
+        *delay_ptr++ = -buf2[i].real      * *--window_ptr;
+        *delay_ptr++ =  buf2[64-i-1].imag * *--window_ptr;
+    }
+
+    for(i=0; i< 64; i++) {
+        *delay_ptr++ =  buf2[i].imag      * *--window_ptr;
+        *delay_ptr++ = -buf2[64-i-1].real * *--window_ptr;
+    }
+}
+
+
+void _M( imdct_do_256_nol ) (imdct_t * p_imdct, float data[], float delay[])
+{
+    int i, j, k;
+    int p, q;
+
+    float tmp_a_i;
+    float tmp_a_r;
+
+    float *data_ptr;
+    float *delay_ptr;
+    float *window_ptr;
+
+    complex_t *buf1, *buf2;
+
+    buf1 = &p_imdct->buf[0];
+    buf2 = &p_imdct->buf[64];
+
+    /* Pre IFFT complex multiply plus IFFT cmplx conjugate */
+    for(k=0; k<64; k++) {
+        /* X1[k] = X[2*k]
+        * X2[k] = X[2*k+1] */
+        j = pm64[k];
+        p = 2 * (128-2*j-1);
+        q = 2 * (2 * j);
+
+        /* Z1[k] = (X1[128-2*k-1] + j * X1[2*k]) * (xcos2[k] + j * xsin2[k]); */
+        buf1[k].real =        data[p] * p_imdct->xcos2[j] - data[q] * p_imdct->xsin2[j];
+        buf1[k].imag = -1.0f*(data[q] * p_imdct->xcos2[j] + data[p] * p_imdct->xsin2[j]);
+        /* Z2[k] = (X2[128-2*k-1] + j * X2[2*k]) * (xcos2[k] + j * xsin2[k]); */
+        buf2[k].real =        data[p + 1] * p_imdct->xcos2[j] - data[q + 1] * p_imdct->xsin2[j];
+        buf2[k].imag = -1.0f*(data[q + 1] * p_imdct->xcos2[j] + data[p + 1] * p_imdct->xsin2[j]);
+    }
+
+    _M( fft_64p ) ( &buf1[0] );
+    _M( fft_64p ) ( &buf2[0] );
+
+    /* Post IFFT complex multiply */
+    for( i=0; i < 64; i++) {
+        /* y1[n] = z1[n] * (xcos2[n] + j * xs in2[n]) ; */
+        tmp_a_r =  buf1[i].real;
+        tmp_a_i = -buf1[i].imag;
+        buf1[i].real =(tmp_a_r * p_imdct->xcos2[i])  -  (tmp_a_i  * p_imdct->xsin2[i]);
+        buf1[i].imag =(tmp_a_r * p_imdct->xsin2[i])  +  (tmp_a_i  * p_imdct->xcos2[i]);
+        /* y2[n] = z2[n] * (xcos2[n] + j * xsin2[n]) ; */
+        tmp_a_r =  buf2[i].real;
+        tmp_a_i = -buf2[i].imag;
+        buf2[i].real =(tmp_a_r * p_imdct->xcos2[i])  -  (tmp_a_i  * p_imdct->xsin2[i]);
+        buf2[i].imag =(tmp_a_r * p_imdct->xsin2[i])  +  (tmp_a_i  * p_imdct->xcos2[i]);
+    }
+      
+    data_ptr = data;
+    delay_ptr = delay;
+    window_ptr = window;
+
+    /* Window and convert to real valued signal, no overlap */
+    for(i=0; i< 64; i++) {
+        *data_ptr++ = -buf1[i].imag     * *window_ptr++;
+        *data_ptr++ = buf1[64-i-1].real * *window_ptr++;
+    }
+
+    for(i=0; i< 64; i++) {
+        *data_ptr++ = -buf1[i].real     * *window_ptr++ + *delay_ptr++;
+        *data_ptr++ = buf1[64-i-1].imag * *window_ptr++ + *delay_ptr++;
+    }
+
+    delay_ptr = delay;
+
+    for(i=0; i< 64; i++) {
+        *delay_ptr++ = -buf2[i].real      * *--window_ptr;
+        *delay_ptr++ =  buf2[64-i-1].imag * *--window_ptr;
+    }
+
+    for(i=0; i< 64; i++) {
+        *delay_ptr++ =  buf2[i].imag      * *--window_ptr;
+        *delay_ptr++ = -buf2[64-i-1].real * *--window_ptr;
+    }
+}
+
diff --git a/src/ac3_decoder/ac3_imdct_c.h b/plugins/imdct/ac3_imdct_common.h
similarity index 69%
rename from src/ac3_decoder/ac3_imdct_c.h
rename to plugins/imdct/ac3_imdct_common.h
index 5863dc9d88..ce0a7ab6d8 100644
--- a/src/ac3_decoder/ac3_imdct_c.h
+++ b/plugins/imdct/ac3_imdct_common.h
@@ -1,8 +1,8 @@
 /*****************************************************************************
- * ac3_imdct_c.h: ac3 DCT
+ * ac3_imdct_common.h: common ac3 DCT headers
  *****************************************************************************
  * Copyright (C) 1999, 2000 VideoLAN
- * $Id: ac3_imdct_c.h,v 1.2 2001/04/30 21:10:25 reno Exp $
+ * $Id: ac3_imdct_common.h,v 1.1 2001/05/15 16:19:42 sam Exp $
  *
  * Authors: Renaud Dartus <reno@videolan.org>
  *          Aaron Holtzman <aholtzma@engr.uvic.ca>
@@ -22,9 +22,9 @@
  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
  *****************************************************************************/
 
-int  imdct_init_c (imdct_t * p_imdct);
-void imdct_do_256(imdct_t * p_imdct, float data[], float delay[]);
-void imdct_do_256_nol(imdct_t * p_imdct, float data[], float delay[]);
-void imdct_do_512_c(imdct_t * p_imdct, float data[], float delay[]);
-void imdct_do_512_nol_c(imdct_t * p_imdct, float data[], float delay[]);
+void _M( imdct_init )       ( imdct_t * p_imdct );
+void _M( imdct_do_256 )     ( imdct_t * p_imdct, float data[], float delay[] );
+void _M( imdct_do_256_nol ) ( imdct_t * p_imdct, float data[], float delay[] );
+void _M( imdct_do_512  )    ( imdct_t * p_imdct, float data[], float delay[] );
+void _M( imdct_do_512_nol ) ( imdct_t * p_imdct, float data[], float delay[] );
 
diff --git a/plugins/imdct/ac3_imdct_sse.c b/plugins/imdct/ac3_imdct_sse.c
new file mode 100644
index 0000000000..d426f55a66
--- /dev/null
+++ b/plugins/imdct/ac3_imdct_sse.c
@@ -0,0 +1,637 @@
+/*****************************************************************************
+ * ac3_imdct_sse.c: accelerated SSE ac3 DCT
+ *****************************************************************************
+ * Copyright (C) 1999, 2000 VideoLAN
+ * $Id: ac3_imdct_sse.c,v 1.1 2001/05/15 16:19:42 sam Exp $
+ *
+ * Authors: Renaud Dartus <reno@videolan.org>
+ *          Aaron Holtzman <aholtzma@engr.uvic.ca>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
+ *****************************************************************************/
+
+#define MODULE_NAME imdctsse
+#include "modules_inner.h"
+
+/*****************************************************************************
+ * Preamble
+ *****************************************************************************/
+#include "defs.h"
+
+#include <math.h>
+#include <stdio.h>
+
+#include "config.h"
+#include "common.h"
+#include "threads.h"
+#include "mtime.h"
+
+#include "ac3_imdct.h"
+#include "ac3_imdct_common.h"
+
+static const float window[] = {
+    0.00014, 0.00024, 0.00037, 0.00051, 0.00067, 0.00086, 0.00107, 0.00130,
+    0.00157, 0.00187, 0.00220, 0.00256, 0.00297, 0.00341, 0.00390, 0.00443,
+    0.00501, 0.00564, 0.00632, 0.00706, 0.00785, 0.00871, 0.00962, 0.01061,
+    0.01166, 0.01279, 0.01399, 0.01526, 0.01662, 0.01806, 0.01959, 0.02121,
+    0.02292, 0.02472, 0.02662, 0.02863, 0.03073, 0.03294, 0.03527, 0.03770,
+    0.04025, 0.04292, 0.04571, 0.04862, 0.05165, 0.05481, 0.05810, 0.06153,
+    0.06508, 0.06878, 0.07261, 0.07658, 0.08069, 0.08495, 0.08935, 0.09389,
+    0.09859, 0.10343, 0.10842, 0.11356, 0.11885, 0.12429, 0.12988, 0.13563,
+    0.14152, 0.14757, 0.15376, 0.16011, 0.16661, 0.17325, 0.18005, 0.18699,
+    0.19407, 0.20130, 0.20867, 0.21618, 0.22382, 0.23161, 0.23952, 0.24757,
+    0.25574, 0.26404, 0.27246, 0.28100, 0.28965, 0.29841, 0.30729, 0.31626,
+    0.32533, 0.33450, 0.34376, 0.35311, 0.36253, 0.37204, 0.38161, 0.39126,
+    0.40096, 0.41072, 0.42054, 0.43040, 0.44030, 0.45023, 0.46020, 0.47019,
+    0.48020, 0.49022, 0.50025, 0.51028, 0.52031, 0.53033, 0.54033, 0.55031,
+    0.56026, 0.57019, 0.58007, 0.58991, 0.59970, 0.60944, 0.61912, 0.62873,
+    0.63827, 0.64774, 0.65713, 0.66643, 0.67564, 0.68476, 0.69377, 0.70269,
+    0.71150, 0.72019, 0.72877, 0.73723, 0.74557, 0.75378, 0.76186, 0.76981,
+    0.77762, 0.78530, 0.79283, 0.80022, 0.80747, 0.81457, 0.82151, 0.82831,
+    0.83496, 0.84145, 0.84779, 0.85398, 0.86001, 0.86588, 0.87160, 0.87716,
+    0.88257, 0.88782, 0.89291, 0.89785, 0.90264, 0.90728, 0.91176, 0.91610,
+    0.92028, 0.92432, 0.92822, 0.93197, 0.93558, 0.93906, 0.94240, 0.94560,
+    0.94867, 0.95162, 0.95444, 0.95713, 0.95971, 0.96217, 0.96451, 0.96674,
+    0.96887, 0.97089, 0.97281, 0.97463, 0.97635, 0.97799, 0.97953, 0.98099,
+    0.98236, 0.98366, 0.98488, 0.98602, 0.98710, 0.98811, 0.98905, 0.98994,
+    0.99076, 0.99153, 0.99225, 0.99291, 0.99353, 0.99411, 0.99464, 0.99513,
+    0.99558, 0.99600, 0.99639, 0.99674, 0.99706, 0.99736, 0.99763, 0.99788,
+    0.99811, 0.99831, 0.99850, 0.99867, 0.99882, 0.99895, 0.99908, 0.99919,
+    0.99929, 0.99938, 0.99946, 0.99953, 0.99959, 0.99965, 0.99969, 0.99974,
+    0.99978, 0.99981, 0.99984, 0.99986, 0.99988, 0.99990, 0.99992, 0.99993,
+    0.99994, 0.99995, 0.99996, 0.99997, 0.99998, 0.99998, 0.99998, 0.99999,
+    0.99999, 0.99999, 0.99999, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000,
+    1.00000, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000
+};
+
+static const int pm128[128] =
+{
+    0, 16, 32, 48, 64, 80,  96, 112,  8, 40, 72, 104, 24, 56,  88, 120,
+    4, 20, 36, 52, 68, 84, 100, 116, 12, 28, 44,  60, 76, 92, 108, 124,
+    2, 18, 34, 50, 66, 82,  98, 114, 10, 42, 74, 106, 26, 58,  90, 122,
+    6, 22, 38, 54, 70, 86, 102, 118, 14, 46, 78, 110, 30, 62,  94, 126,
+    1, 17, 33, 49, 65, 81,  97, 113,  9, 41, 73, 105, 25, 57,  89, 121,
+    5, 21, 37, 53, 69, 85, 101, 117, 13, 29, 45,  61, 77, 93, 109, 125,
+    3, 19, 35, 51, 67, 83,  99, 115, 11, 43, 75, 107, 27, 59,  91, 123,
+    7, 23, 39, 55, 71, 87, 103, 119, 15, 31, 47,  63, 79, 95, 111, 127
+}; 
+
+void _M( fft_64p )  ( complex_t *x );
+void _M( fft_128p ) ( complex_t *a );
+
+static void imdct512_pre_ifft_twiddle_sse (const int *pmt, complex_t *buf, float *data, float *xcos_sin_sse);
+static void imdct512_post_ifft_twiddle_sse (complex_t *buf, float *xcos_sin_sse);
+static void imdct512_window_delay_sse (complex_t *buf, float *data_ptr, const float *window_prt, float *delay_prt);
+static void imdct512_window_delay_nol_sse (complex_t *buf, float *data_ptr, const float *window_prt, float *delay_prt);
+
+
+void _M( imdct_init ) (imdct_t * p_imdct)
+{
+    int i;
+    float scale = 181.019;
+
+    for (i=0; i < 128; i++)
+    {
+        float xcos_i = cos(2.0f * M_PI * (8*i+1)/(8*N)) * scale;
+        float xsin_i = sin(2.0f * M_PI * (8*i+1)/(8*N)) * scale;
+        p_imdct->xcos_sin_sse[i * 4]     = xcos_i;
+        p_imdct->xcos_sin_sse[i * 4 + 1] = -xsin_i;
+        p_imdct->xcos_sin_sse[i * 4 + 2] = -xsin_i;
+        p_imdct->xcos_sin_sse[i * 4 + 3] = -xcos_i;
+    }
+}
+
+void _M( imdct_do_512 ) (imdct_t * p_imdct, float data[], float delay[])
+{
+    imdct512_pre_ifft_twiddle_sse (pm128, p_imdct->buf, data, p_imdct->xcos_sin_sse);
+    _M( fft_128p ) ( p_imdct->buf );
+    imdct512_post_ifft_twiddle_sse (p_imdct->buf, p_imdct->xcos_sin_sse);
+    imdct512_window_delay_sse (p_imdct->buf, data, window, delay);
+}
+
+
+void _M( imdct_do_512_nol ) (imdct_t * p_imdct, float data[], float delay[])
+{
+    imdct512_pre_ifft_twiddle_sse (pm128, p_imdct->buf, data, p_imdct->xcos_sin_sse);  
+    _M( fft_128p ) ( p_imdct->buf );
+    imdct512_post_ifft_twiddle_sse (p_imdct->buf, p_imdct->xcos_sin_sse);
+    imdct512_window_delay_nol_sse (p_imdct->buf, data, window, delay);
+}
+
+static void imdct512_pre_ifft_twiddle_sse (const int *pmt, complex_t *buf, float *data, float *xcos_sin_sse)
+{
+    __asm__ __volatile__ (    
+    "pushl %%ebp\n"
+    "movl  %%esp, %%ebp\n"
+    "addl  $-4, %%esp\n" /* local variable, loop counter */
+    
+    "pushl %%eax\n"
+    "pushl %%ebx\n"
+    "pushl %%ecx\n"
+    "pushl %%edx\n"
+    "pushl %%edi\n"
+    "pushl %%esi\n"
+
+    "movl  8(%%ebp), %%eax\n"     /* pmt */
+    "movl 12(%%ebp), %%ebx\n"    /* buf */
+    "movl 16(%%ebp), %%ecx\n"    /* data */
+    "movl 20(%%ebp), %%edx\n"     /* xcos_sin_sse */
+    "movl $64, -4(%%ebp)\n"
+    
+".loop:\n"
+    "movl  (%%eax), %%esi\n"
+    "movl 4(%%eax), %%edi\n"
+    "movss (%%ecx, %%esi, 8), %%xmm1\n" /* 2j */
+    "movss (%%ecx, %%edi, 8), %%xmm3\n" /* 2(j+1) */
+
+    "shll $1, %%esi\n"
+    "shll $1, %%edi\n"
+
+    "movups (%%edx, %%esi, 8), %%xmm0\n" /* -c_j | -s_j | -s_j | c_j */
+    "movups (%%edx, %%edi, 8), %%xmm2\n" /* -c_j+1 | -s_j+1 | -s_j+1 | c_j+1 */
+
+    "negl %%esi\n"
+    "negl %%edi\n"
+
+    "movss 1020(%%ecx, %%esi, 4), %%xmm4\n" /* 255-2j */
+    "addl $8, %%eax\n"
+    "movss 1020(%%ecx, %%edi, 4), %%xmm5\n" /* 255-2(j+1) */
+
+    "shufps $0, %%xmm1, %%xmm4\n" /* 2j | 2j | 255-2j | 255-2j */
+    "shufps $0, %%xmm3, %%xmm5\n" /* 2(j+1) | 2(j+1) | 255-2(j+1) | 255-2(j+1) */
+    "mulps   %%xmm4, %%xmm0\n"
+    "mulps   %%xmm5, %%xmm2\n"
+    "movhlps %%xmm0, %%xmm1\n"
+    "movhlps %%xmm2, %%xmm3\n"
+    "addl    $16, %%ebx\n"
+    "addps   %%xmm1, %%xmm0\n"
+    "addps   %%xmm3, %%xmm2\n"
+    "movlhps %%xmm2, %%xmm0\n"
+    
+    "movups  %%xmm0, -16(%%ebx)\n"
+    "decl -4(%%ebp)\n"
+       "jnz .loop\n"
+
+    "popl %%esi\n"
+    "popl %%edi\n"
+    "popl %%edx\n"
+    "popl %%ecx\n"
+    "popl %%ebx\n"
+    "popl %%eax\n"
+
+    "addl $4, %%esp\n"
+    "popl %%ebp\n"
+    ::);
+}
+
+static void imdct512_post_ifft_twiddle_sse (complex_t *buf, float *xcos_sin_sse)
+{
+    __asm__ __volatile__ ( 
+    "pushl %%ebx\n"
+    "movl $32, %%ebx\n"                 /* loop counter */
+
+".loop1:\n"
+    "movups    (%%eax), %%xmm0\n"          /*  im1 | re1 | im0 | re0 */
+
+    "movups  (%%ecx), %%xmm2\n"         /* -c | -s | -s | c */
+    "movhlps  %%xmm0, %%xmm1\n"         /* im1 | re1 */
+    "movups  16(%%ecx), %%xmm3\n"       /* -c1 | -s1 | -s1 | c1 */
+
+    "shufps $0x50, %%xmm0, %%xmm0\n"    /* im0 | im0 | re0 | re0 */
+    "shufps $0x50, %%xmm1, %%xmm1\n"    /* im1 | im1 | re1 | re1 */
+
+    "movups  16(%%eax), %%xmm4\n"       /* im3 | re3 | im2 | re2 */
+
+    "shufps $0x27, %%xmm2, %%xmm2\n"    /* c | -s | -s | -c */
+    "movhlps  %%xmm4, %%xmm5\n"         /* im3 | re3 */
+    "shufps $0x27, %%xmm3, %%xmm3\n"    /* c1 | -s1 | -s1 | -c1 */
+
+    "movups  32(%%ecx), %%xmm6\n"       /* -c2 | -s2 | -s2 | c2 */
+    "movups  48(%%ecx), %%xmm7\n"       /* -c3 | -s3 | -s3 | c3 */
+
+    "shufps $0x50, %%xmm4, %%xmm4\n"    /* im2 | im2 | re2 | re2 */
+    "shufps $0x50, %%xmm5, %%xmm5\n"    /* im3 | im3 | re3 | re3 */
+
+    "mulps %%xmm2, %%xmm0\n"
+    "mulps %%xmm3, %%xmm1\n"
+
+    "shufps $0x27, %%xmm6, %%xmm6\n"    /* c2 | -s2 | -s2 | -c2 */
+    "shufps $0x27, %%xmm7, %%xmm7\n"    /* c3 | -s3 | -s3 | -c3 */
+
+    "movhlps %%xmm0, %%xmm2\n"
+    "movhlps %%xmm1, %%xmm3\n"
+
+    "mulps %%xmm6, %%xmm4\n"
+    "mulps %%xmm7, %%xmm5\n"
+
+    "addps %%xmm2, %%xmm0\n"
+    "addps %%xmm3, %%xmm1\n"
+
+    "movhlps %%xmm4, %%xmm6\n"
+    "movhlps %%xmm5, %%xmm7\n"
+
+    "addps %%xmm6, %%xmm4\n"
+    "addps %%xmm7, %%xmm5\n"
+
+    "movlhps %%xmm1, %%xmm0\n"
+    "movlhps %%xmm5, %%xmm4\n"
+
+    "movups %%xmm0, (%%eax)\n"
+    "movups %%xmm4, 16(%%eax)\n"
+    "addl $64, %%ecx\n"
+    "addl $32, %%eax\n"
+    "decl %%ebx\n"
+    "jnz .loop1\n"
+
+    "popl %%ebx\n"
+    : "=a" (buf)
+    : "a" (buf), "c" (xcos_sin_sse) );
+}
+
+static void imdct512_window_delay_sse (complex_t *buf, float *data_ptr, const float *window_prt, float *delay_prt)
+{
+    __asm__ __volatile__ (
+    "pushl %%ebp\n"
+    "movl  %%esp, %%ebp\n"
+    
+    "pushl %%eax\n"
+    "pushl %%ebx\n"
+    "pushl %%ecx\n"
+    "pushl %%edx\n"
+    "pushl %%esi\n"
+    "pushl %%edi\n"
+
+    "movl 20(%%ebp), %%ebx\n"   /* delay */
+    "movl 16(%%ebp), %%edx\n"   /* window */
+
+    "movl 8(%%ebp), %%eax\n"    /* buf */
+    "movl $16, %%ecx\n"         /* loop count */
+    "leal 516(%%eax), %%esi\n"  /* buf[64].im */
+    "leal 504(%%eax), %%edi\n"  /* buf[63].re */
+    "movl  12(%%ebp), %%eax\n"  /* data */
+
+".first_128_samples:\n"
+    "movss   (%%esi), %%xmm0\n"
+    "movss  8(%%esi), %%xmm2\n"
+    "movss   (%%edi), %%xmm1\n"
+    "movss -8(%%edi), %%xmm3\n"
+
+    "movlhps %%xmm2, %%xmm0\n"      /* 0.0 | im1 | 0.0 | im0 */
+    "movlhps %%xmm3, %%xmm1\n"      /* 0.0 | re1 | 0.0 | re0 */
+
+    "movups (%%edx), %%xmm4\n"      /* w3 | w2 | w1 | w0 */
+    "movups (%%ebx), %%xmm5\n"      /* d3 | d2 | d1 | d0 */
+    "shufps $0xb1, %%xmm1, %%xmm1\n"/* re1 | 0.0 | re0 | 0.0 */
+
+    "movss  16(%%esi), %%xmm6\n"    /* im2 */
+    "movss  24(%%esi), %%xmm7\n"    /* im3 */
+    "subps     %%xmm1, %%xmm0\n"    /* -re1 | im1 | -re0 | im0 */
+    "movss -16(%%edi), %%xmm2\n"    /* re2 */
+    "movss -24(%%edi), %%xmm3\n"    /* re3 */
+    "mulps     %%xmm4, %%xmm0\n"
+    "movlhps   %%xmm7, %%xmm6\n"    /* 0.0 | im3 | 0.0 | im2 */
+    "movlhps   %%xmm3, %%xmm2\n"    /* 0.0 | re3 | 0.0 | re2 */
+    "addps %%xmm5, %%xmm0\n"
+    "shufps $0xb1, %%xmm2, %%xmm2\n"/* re3 | 0.0 | re2 | 0.0 */
+    "movups 16(%%edx), %%xmm4\n"    /* w7 | w6 | w5 | w4 */
+    "movups 16(%%ebx), %%xmm5\n"    /* d7 | d6 | d5 | d4 */
+    "subps %%xmm2, %%xmm6\n"        /* -re3 | im3 | -re2 | im2 */
+    "addl $32, %%edx\n"
+    "movups %%xmm0, (%%eax)\n"
+    "addl $32, %%ebx\n"
+    "mulps %%xmm4, %%xmm6\n"
+    "addl $32, %%esi\n"
+    "addl $32, %%eax\n"
+    "addps %%xmm5, %%xmm6\n"
+    "addl $-32, %%edi\n"
+    "movups %%xmm6, -16(%%eax)\n"
+    "decl %%ecx\n"
+    "jnz .first_128_samples\n"
+
+    "movl 8(%%ebp), %%esi\n"    /* buf[0].re */
+    "leal 1020(%%esi), %%edi\n" /* buf[127].im */
+    "movl $16, %%ecx\n"         /* loop count */
+    
+".second_128_samples:\n"
+    "movss   (%%esi), %%xmm0\n" /* buf[i].re */
+    "movss  8(%%esi), %%xmm2\n" /* re1 */
+    "movss   (%%edi), %%xmm1\n" /* buf[127-i].im */
+    "movss -8(%%edi), %%xmm3\n" /* im1 */
+
+    "movlhps %%xmm2, %%xmm0\n"  /* 0.0 | re1 | 0.0 | re0 */
+    "movlhps %%xmm3, %%xmm1\n"  /* 0.0 | im1 | 0.0 | im1 */
+
+    "movups (%%edx), %%xmm4\n"  /* w3 | w2 | w1 | w0 */
+    "movups (%%ebx), %%xmm5\n"  /* d3 | d2 | d1 | d0 */
+
+    "shufps $0xb1, %%xmm1, %%xmm1\n"/* im1 | 0.0 | im0 | 0.0 */
+    "movss  16(%%esi), %%xmm6\n"    /* re2 */
+    "movss  24(%%esi), %%xmm7\n"    /* re3 */
+    "movss -16(%%edi), %%xmm2\n"    /* im2 */
+    "movss -24(%%edi), %%xmm3\n"    /* im3 */
+    "subps   %%xmm1, %%xmm0\n"      /* -im1 | re1 | -im0 | re0 */
+    "movlhps %%xmm7, %%xmm6\n"      /* 0.0 | re3 | 0.0 | re2 */
+    "movlhps %%xmm3, %%xmm2\n"      /* 0.0 | im3 | 0.0 | im2 */
+    "mulps   %%xmm4, %%xmm0\n"
+    "shufps $0xb1, %%xmm2, %%xmm2\n"/* im3 | 0.0 | im2 | 0.0 */
+    "movups 16(%%edx), %%xmm4\n"    /* w7 | w6 | w5 | w4 */
+    "addl $32, %%esi\n"
+    "subps %%xmm2, %%xmm6\n"        /* -im3 | re3 | -im2 | re2 */
+    "addps %%xmm5, %%xmm0\n"
+    "mulps %%xmm4, %%xmm6\n"
+    "addl $-32, %%edi\n"
+    "movups 16(%%ebx), %%xmm5\n"    /* d7 | d6 | d5 | d4 */
+    "movups %%xmm0, (%%eax)\n"
+    "addps %%xmm5, %%xmm6\n"
+    "addl $32, %%edx\n"
+    "addl $32, %%eax\n"
+    "addl $32, %%ebx\n"
+    "movups %%xmm6, -16(%%eax)\n"
+    "decl %%ecx\n"
+    "jnz .second_128_samples\n"
+
+    "movl   8(%%ebp), %%eax\n"
+    "leal 512(%%eax), %%esi\n"  /* buf[64].re */
+    "leal 508(%%eax), %%edi\n"  /* buf[63].im */
+    "movl $16, %%ecx\n"         /* loop count */
+    "movl  20(%%ebp), %%eax\n"  /* delay */
+
+".first_128_delay:\n"
+    "movss   (%%esi), %%xmm0\n"
+    "movss  8(%%esi), %%xmm2\n"
+    "movss   (%%edi), %%xmm1\n"
+    "movss -8(%%edi), %%xmm3\n"
+
+    "movlhps %%xmm2, %%xmm0\n"      /* 0.0 | re1 | 0.0 | re0 */
+    "movlhps %%xmm3, %%xmm1\n"      /* 0.0 | im1 | 0.0 | im0 */
+
+    "movups -16(%%edx), %%xmm4\n"   /* w3 | w2 | w1 | w0 */
+    "shufps $0xb1, %%xmm1, %%xmm1\n"/* im1 | 0.0 | im0 | 0.0 */
+    "movss  16(%%esi), %%xmm6\n"    /* re2 */
+    "movss  24(%%esi), %%xmm7\n"    /* re3 */
+    "movss -16(%%edi), %%xmm2\n"    /* im2 */
+    "movss -24(%%edi), %%xmm3\n"    /* im3 */
+    "subps     %%xmm1, %%xmm0\n"    /* -im1 | re1 | -im0 | re0 */
+    "addl $-32, %%edx\n"
+    "movlhps %%xmm7, %%xmm6\n"      /* 0.0 | re3 | 0.0 | re2 */
+    "movlhps %%xmm3, %%xmm2\n"      /* 0.0 | im3 | 0.0 | im2 */
+    "mulps   %%xmm4, %%xmm0\n"
+    "movups (%%edx), %%xmm5\n"      /* w7 | w6 | w5 | w4 */
+    "shufps $0xb1, %%xmm2, %%xmm2\n"/* im3 | 0.0 | im2 | 0.0 */
+    "movups %%xmm0, (%%eax)\n"
+    "addl $32, %%esi\n"
+    "subps %%xmm2, %%xmm6\n"        /* -im3 | re3 | -im2 | re2 */
+    "addl $-32, %%edi\n"
+    "mulps %%xmm5, %%xmm6\n"
+    "addl $32, %%eax\n"
+    "movups %%xmm6, -16(%%eax)\n"
+    "decl %%ecx\n"
+    "jnz .first_128_delay\n"
+
+    "movl    8(%%ebp), %%ebx\n"
+    "leal    4(%%ebx), %%esi\n" /* buf[0].im */
+    "leal 1016(%%ebx), %%edi\n" /* buf[127].re */
+    "movl $16, %%ecx\n"         /* loop count */
+    
+".second_128_delay:\n"
+    "movss   (%%esi), %%xmm0\n"
+    "movss  8(%%esi), %%xmm2\n"
+    "movss   (%%edi), %%xmm1\n"
+    "movss -8(%%edi), %%xmm3\n"
+
+    "movlhps %%xmm2, %%xmm0\n"      /* 0.0 | im1 | 0.0 | im0 */
+    "movlhps %%xmm3, %%xmm1\n"      /* 0.0 | re1 | 0.0 | re0 */
+
+    "movups -16(%%edx), %%xmm4\n"   /* w3 | w2 | w1 | w0 */
+    "shufps $0xb1, %%xmm1, %%xmm1\n"/* re1 | 0.0 | re0 | 0.0 */
+    "movss  16(%%esi), %%xmm6\n"    /* im2 */
+    "movss  24(%%esi), %%xmm7\n"    /* im3 */
+    "movss -16(%%edi), %%xmm2\n"    /* re2 */
+    "movss -24(%%edi), %%xmm3\n"    /* re3 */
+    "subps %%xmm0, %%xmm1\n"        /* re1 | -im1 | re0 | -im0 */
+    "addl $-32, %%edx\n"
+    "movlhps %%xmm7, %%xmm6\n"      /* 0.0 | im3 | 0.0 | im2 */
+    "movlhps %%xmm3, %%xmm2\n"      /* 0.0 | re3 | 0.0 | re2 */
+    "mulps   %%xmm4, %%xmm1\n"
+    "movups (%%edx), %%xmm5\n"      /* w7 | w6 | w5 | w4 */
+    "shufps $0xb1, %%xmm2, %%xmm2\n"/* re3 | 0.0 | re2 | 0.0 */
+    "movups %%xmm1, (%%eax)\n"
+    "addl $32, %%esi\n"
+    "subps %%xmm6, %%xmm2\n"        /* re | -im3 | re | -im2 */
+    "addl $-32, %%edi\n"
+    "mulps %%xmm5, %%xmm2\n"
+    "addl $32, %%eax\n"
+    "movups %%xmm2, -16(%%eax)\n"
+    "decl %%ecx\n"
+    "jnz .second_128_delay\n"
+
+    "popl %%edi\n"
+    "popl %%esi\n"
+    "popl %%edx\n"
+    "popl %%ecx\n"
+    "popl %%ebx\n"
+    "popl %%eax\n"
+    
+    "leave\n"
+    ::);
+}
+
+static void imdct512_window_delay_nol_sse (complex_t *buf, float *data_ptr, const float *window_prt, float *delay_prt)
+{
+    __asm__ __volatile__ (
+    "pushl %%ebp\n"
+    "movl  %%esp, %%ebp\n"
+    
+    "pushl %%eax\n"
+    "pushl %%ebx\n"
+    "pushl %%ecx\n"
+    "pushl %%edx\n"
+    "pushl %%esi\n"
+    "pushl %%edi\n"
+
+    /* movl 20(%%ebp), %%ebx delay */
+    "movl 16(%%ebp), %%edx\n"   /* window */
+
+    "movl   8(%%ebp), %%eax\n"  /* buf */
+    "movl $16, %%ecx\n"         /* loop count */
+    "leal 516(%%eax), %%esi\n"  /* buf[64].im */
+    "leal 504(%%eax), %%edi\n"  /* buf[63].re */
+    "movl  12(%%ebp), %%eax\n"  /* data */
+    
+".first_128_sample:\n"
+    "movss   (%%esi), %%xmm0\n"
+    "movss  8(%%esi), %%xmm2\n"
+    "movss   (%%edi), %%xmm1\n"
+    "movss -8(%%edi), %%xmm3\n"
+
+    "movlhps %%xmm2, %%xmm0\n"      /* 0.0 | im1 | 0.0 | im0 */
+    "movlhps %%xmm3, %%xmm1\n"      /* 0.0 | re1 | 0.0 | re0 */
+
+    "movups (%%edx), %%xmm4\n"      /* w3 | w2 | w1 | w0 */
+    /* movups (%%ebx), %%xmm5 d3 | d2 | d1 | d0 */
+    "shufps $0xb1, %%xmm1, %%xmm1\n"/* re1 | 0.0 | re0 | 0.0 */
+
+    "movss  16(%%esi), %%xmm6\n"    /* im2 */
+    "movss  24(%%esi), %%xmm7\n"    /* im3 */
+    "subps     %%xmm1, %%xmm0\n"    /* -re1 | im1 | -re0 | im0 */
+    "movss -16(%%edi), %%xmm2\n"    /* re2 */
+    "movss -24(%%edi), %%xmm3\n"    /* re3 */
+    "mulps %%xmm4, %%xmm0\n"
+    "movlhps %%xmm7, %%xmm6\n"      /* 0.0 | im3 | 0.0 | im2 */
+    "movlhps %%xmm3, %%xmm2\n"      /* 0.0 | re3 | 0.0 | re2 */
+    /* addps %%xmm5, %%xmm0 */
+    "shufps $0xb1, %%xmm2, %%xmm2\n"/* re3 | 0.0 | re2 | 0.0 */
+    "movups 16(%%edx), %%xmm4\n"    /* w7 | w6 | w5 | w4 */
+    /* movups 16(%%ebx), %%xmm5  d7 | d6 | d5 | d4 */
+    "subps %%xmm2, %%xmm6\n"        /* -re3 | im3 | -re2 | im2 */
+    "addl $32, %%edx\n"
+    "movups %%xmm0, (%%eax)\n"
+    /* addl $32, %%ebx */
+    "mulps %%xmm4, %%xmm6\n"
+    "addl $32, %%esi\n"
+    "addl $32, %%eax\n"
+    /* addps %%xmm5, %%xmm6 */
+    "addl $-32, %%edi\n"
+    "movups %%xmm6, -16(%%eax)\n"
+    "decl %%ecx\n"
+    "jnz .first_128_sample\n"
+
+    "movl    8(%%ebp), %%esi\n"     /* buf[0].re */
+    "leal 1020(%%esi), %%edi\n"     /* buf[127].im */
+    "movl $16, %%ecx\n"             /* loop count */
+    
+".second_128_sample:\n"
+    "movss   (%%esi), %%xmm0\n"     /* buf[i].re */
+    "movss  8(%%esi), %%xmm2\n"     /* re1 */
+    "movss   (%%edi), %%xmm1\n"     /* buf[127-i].im */
+    "movss -8(%%edi), %%xmm3\n"     /* im1 */
+
+    "movlhps %%xmm2, %%xmm0\n"      /* 0.0 | re1 | 0.0 | re0 */
+    "movlhps %%xmm3, %%xmm1\n"      /* 0.0 | im1 | 0.0 | im1 */
+    
+    "movups (%%edx), %%xmm4\n"      /* w3 | w2 | w1 | w0 */
+    /* movups (%%ebx), %%xmm5 d3 | d2 | d1 | d0 */
+
+    "shufps $0xb1, %%xmm1, %%xmm1\n"/* im1 | 0.0 | im0 | 0.0 */
+    "movss  16(%%esi), %%xmm6\n"    /* re2 */
+    "movss  24(%%esi), %%xmm7\n"    /* re3 */
+    "movss -16(%%edi), %%xmm2\n"    /* im2 */
+    "movss -24(%%edi), %%xmm3\n"    /* im3 */
+    "subps %%xmm1, %%xmm0\n"        /* -im1 | re1 | -im0 | re0 */
+    "movlhps %%xmm7, %%xmm6\n"      /* 0.0 | re3 | 0.0 | re2 */
+    "movlhps %%xmm3, %%xmm2\n"      /* 0.0 | im3 | 0.0 | im2 */
+    "mulps %%xmm4, %%xmm0\n"
+    "shufps $0xb1, %%xmm2, %%xmm2\n"/* im3 | 0.0 | im2 | 0.0 */
+    "movups 16(%%edx), %%xmm4\n"    /* w7 | w6 | w5 | w4 */
+    "addl $32, %%esi\n"
+    "subps %%xmm2, %%xmm6\n"        /* -im3 | re3 | -im2 | re2 */
+    /* addps %%xmm5, %%xmm0 */
+    "mulps %%xmm4, %%xmm6\n"
+    "addl $-32, %%edi\n"
+    /* movups 16(%%ebx), %%xmm5  d7 | d6 | d5 | d4 */
+    "movups %%xmm0, (%%eax)\n"
+    /* addps %%xmm5, %%xmm6 */
+    "addl $32, %%edx\n"
+    "addl $32, %%eax\n"
+    /* addl $32, %%ebx */
+    "movups %%xmm6, -16(%%eax)\n"
+    "decl %%ecx\n"
+    "jnz .second_128_sample\n"
+
+    "movl   8(%%ebp), %%eax\n"
+    "leal 512(%%eax), %%esi\n"  /* buf[64].re */
+    "leal 508(%%eax), %%edi\n"  /* buf[63].im */
+    "movl $16, %%ecx\n"         /* loop count */
+    "movl  20(%%ebp), %%eax\n"  /* delay */
+    
+".first_128_delays:\n"
+    "movss   (%%esi), %%xmm0\n"
+    "movss  8(%%esi), %%xmm2\n"
+    "movss   (%%edi), %%xmm1\n"
+    "movss -8(%%edi), %%xmm3\n"
+
+    "movlhps %%xmm2, %%xmm0\n"  /* 0.0 | re1 | 0.0 | re0 */
+    "movlhps %%xmm3, %%xmm1\n"  /* 0.0 | im1 | 0.0 | im0 */
+
+    "movups -16(%%edx), %%xmm4\n"   /* w3 | w2 | w1 | w0 */
+    "shufps $0xb1, %%xmm1, %%xmm1\n"/* im1 | 0.0 | im0 | 0.0 */
+    "movss  16(%%esi), %%xmm6\n"    /* re2 */
+    "movss  24(%%esi), %%xmm7\n"    /* re3 */
+    "movss -16(%%edi), %%xmm2\n"    /* im2 */
+    "movss -24(%%edi), %%xmm3\n"    /* im3 */
+    "subps %%xmm1, %%xmm0\n"        /* -im1 | re1 | -im0 | re0 */
+    "addl $-32, %%edx\n"
+    "movlhps %%xmm7, %%xmm6\n"      /* 0.0 | re3 | 0.0 | re2 */
+    "movlhps %%xmm3, %%xmm2\n"      /* 0.0 | im3 | 0.0 | im2 */
+    "mulps %%xmm4, %%xmm0\n"
+    "movups (%%edx), %%xmm5\n"      /* w7 | w6 | w5 | w4 */
+    "shufps $0xb1, %%xmm2, %%xmm2\n"/* im3 | 0.0 | im2 | 0.0 */
+    "movups %%xmm0, (%%eax)\n"
+    "addl $32, %%esi\n"
+    "subps %%xmm2, %%xmm6\n"        /* -im3 | re3 | -im2 | re2 */
+    "addl $-32, %%edi\n"
+    "mulps %%xmm5, %%xmm6\n"
+    "addl $32, %%eax\n"
+    "movups %%xmm6, -16(%%eax)\n"
+    "decl %%ecx\n"
+    "jnz .first_128_delays\n"
+
+    "movl    8(%%ebp), %%ebx\n"
+    "leal    4(%%ebx), %%esi\n" /* buf[0].im */
+    "leal 1016(%%ebx), %%edi\n" /* buf[127].re */
+    "movl $16, %%ecx\n"         /* loop count */
+    
+".second_128_delays:\n"
+    "movss   (%%esi), %%xmm0\n"
+    "movss  8(%%esi), %%xmm2\n"
+    "movss   (%%edi), %%xmm1\n"
+    "movss -8(%%edi), %%xmm3\n"
+
+    "movlhps %%xmm2, %%xmm0\n"  /* 0.0 | im1 | 0.0 | im0 */
+    "movlhps %%xmm3, %%xmm1\n"  /* 0.0 | re1 | 0.0 | re0 */
+
+    "movups -16(%%edx), %%xmm4\n"   /* w3 | w2 | w1 | w0 */
+    "shufps $0xb1, %%xmm1, %%xmm1\n"/* re1 | 0.0 | re0 | 0.0 */
+    "movss  16(%%esi), %%xmm6\n"    /* im2 */
+    "movss  24(%%esi), %%xmm7\n"    /* im3 */
+    "movss -16(%%edi), %%xmm2\n"    /* re2 */
+    "movss -24(%%edi), %%xmm3\n"    /* re3 */
+    "subps %%xmm0, %%xmm1\n"        /* re1 | -im1 | re0 | -im0 */
+    "addl $-32, %%edx\n"
+    "movlhps %%xmm7, %%xmm6\n"      /* 0.0 | im3 | 0.0 | im2 */
+    "movlhps %%xmm3, %%xmm2\n"      /* 0.0 | re3 | 0.0 | re2 */
+    "mulps %%xmm4, %%xmm1\n"
+    "movups (%%edx), %%xmm5\n"      /* w7 | w6 | w5 | w4 */
+    "shufps $0xb1, %%xmm2, %%xmm2\n"/* re3 | 0.0 | re2 | 0.0 */
+    "movups %%xmm1, (%%eax)\n"
+    "addl $32, %%esi\n"
+    "subps %%xmm6, %%xmm2\n"        /* re | -im3 | re | -im2 */
+    "addl $-32, %%edi\n"
+    "mulps %%xmm5, %%xmm2\n"
+    "addl $32, %%eax\n"
+    "movups %%xmm2, -16(%%eax)\n"
+    "decl %%ecx\n"
+    "jnz .second_128_delays\n"
+
+    "popl %%edi\n"
+    "popl %%esi\n"
+    "popl %%edx\n"
+    "popl %%ecx\n"
+    "popl %%ebx\n"
+    "popl %%eax\n"
+    
+    "leave\n"
+    ::);
+}
diff --git a/src/ac3_decoder/ac3_srfft.h b/plugins/imdct/ac3_srfft.h
similarity index 99%
rename from src/ac3_decoder/ac3_srfft.h
rename to plugins/imdct/ac3_srfft.h
index c068b4dff5..27a2511676 100644
--- a/src/ac3_decoder/ac3_srfft.h
+++ b/plugins/imdct/ac3_srfft.h
@@ -1,8 +1,8 @@
 /*****************************************************************************
- * ac3_srfft.h: ac3 FFT
+ * ac3_srfft.h: ac3 FFT tables
  *****************************************************************************
  * Copyright (C) 1999, 2000 VideoLAN
- * $Id: ac3_srfft.h,v 1.3 2001/05/14 15:58:04 reno Exp $
+ * $Id: ac3_srfft.h,v 1.1 2001/05/15 16:19:42 sam Exp $
  *
  * Authors: Renaud Dartus <reno@videolan.org>
  *          Aaron Holtzman <aholtzma@engr.uvic.ca>
@@ -289,3 +289,4 @@ static const complex_t delta128_3[32] =
   a_i += v_i; \
   A13.imag = a_i; \
   }
+
diff --git a/src/ac3_decoder/ac3_srfft.c b/plugins/imdct/ac3_srfft_c.c
similarity index 93%
rename from src/ac3_decoder/ac3_srfft.c
rename to plugins/imdct/ac3_srfft_c.c
index ee165ffd1c..d3fdc58c70 100644
--- a/src/ac3_decoder/ac3_srfft.c
+++ b/plugins/imdct/ac3_srfft_c.c
@@ -1,8 +1,8 @@
 /*****************************************************************************
- * ac3_srfft.c: ac3 FFT
+ * ac3_srfft.c: ac3 FFT in C
  *****************************************************************************
  * Copyright (C) 1999, 2000 VideoLAN
- * $Id: ac3_srfft.c,v 1.4 2001/05/14 15:58:04 reno Exp $
+ * $Id: ac3_srfft_c.c,v 1.1 2001/05/15 16:19:42 sam Exp $
  *
  * Authors: Renaud Dartus <reno@videolan.org>
  *          Aaron Holtzman <aholtzma@engr.uvic.ca>
@@ -22,6 +22,12 @@
  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
  *****************************************************************************/
 
+#define MODULE_NAME imdct
+#include "modules_inner.h"
+
+/*****************************************************************************
+ * Preamble
+ *****************************************************************************/
 #include "defs.h"
 
 #include <string.h>                                              /* memcpy() */
@@ -34,10 +40,7 @@
 #include "threads.h"
 #include "mtime.h"
 
-#include "stream_control.h"
-#include "input_ext-dec.h"
-
-#include "ac3_decoder.h"
+#include "ac3_imdct.h"
 #include "ac3_srfft.h"
 
 static void fft_8 (complex_t *x);
@@ -206,7 +209,7 @@ static void fft_8 (complex_t *x)
 
 
 static void fft_asmb(int k, complex_t *x, complex_t *wTB,
-	     const complex_t *d, const complex_t *d_3)
+                     const complex_t *d, const complex_t *d_3)
 {
   register complex_t  *x2k, *x3k, *x4k, *wB;
   register float a_r, a_i, a1_r, a1_i, u_r, u_i, v_r, v_i;
@@ -256,7 +259,7 @@ static void fft_asmb16(complex_t *x, complex_t *wTB)
 } 
 
 
-void fft_64p_c (complex_t *a)
+void _M( fft_64p ) ( complex_t *a )
 {
   fft_8(&a[0]); fft_4(&a[8]); fft_4(&a[12]);
   fft_asmb16(&a[0], &a[8]);
@@ -274,7 +277,7 @@ void fft_64p_c (complex_t *a)
 }
 
 
-void fft_128p_c (complex_t *a)
+void _M( fft_128p ) ( complex_t *a )
 {
   fft_8(&a[0]); fft_4(&a[8]); fft_4(&a[12]);
   fft_asmb16(&a[0], &a[8]);
@@ -310,3 +313,4 @@ void fft_128p_c (complex_t *a)
   /* fft_128(&a[0]); */
   fft_asmb(16, &a[0], &a[64], &delta128[0], &delta128_3[0]);
 }
+
diff --git a/plugins/imdct/ac3_srfft_sse.c b/plugins/imdct/ac3_srfft_sse.c
new file mode 100644
index 0000000000..2de563b57b
--- /dev/null
+++ b/plugins/imdct/ac3_srfft_sse.c
@@ -0,0 +1,372 @@
+/*****************************************************************************
+ * ac3_srfft_sse.c: accelerated SSE ac3 fft functions
+ *****************************************************************************
+ * Copyright (C) 1999, 2000, 2001 VideoLAN
+ * $Id: ac3_srfft_sse.c,v 1.1 2001/05/15 16:19:42 sam Exp $
+ *
+ * Authors: Renaud Dartus <reno@videolan.org>
+ *          Aaron Holtzman <aholtzma@engr.uvic.ca>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
+ *****************************************************************************/
+
+#define MODULE_NAME imdctsse
+#include "modules_inner.h"
+
+/*****************************************************************************
+ * Preamble
+ *****************************************************************************/
+#include <stdio.h>
+
+#include "defs.h"
+
+#include <math.h>
+#include <stdio.h>
+
+#include "config.h"
+#include "common.h"
+#include "threads.h"
+#include "mtime.h"
+
+#include "ac3_imdct.h"
+#include "ac3_srfft.h"
+
+void hsqrt2 (void);
+void C_1 (void);
+static void fft_4_sse (complex_t *x);
+static void fft_8_sse (complex_t *x);
+static void fft_asmb_sse (int k, complex_t *x, complex_t *wTB,
+         const complex_t *d, const complex_t *d_3);
+
+void _M( fft_64p ) ( complex_t *a )
+{
+    fft_8_sse(&a[0]); fft_4_sse(&a[8]); fft_4_sse(&a[12]);
+    fft_asmb_sse(2, &a[0], &a[8], &delta16[0], &delta16_3[0]);
+  
+    fft_8_sse(&a[16]), fft_8_sse(&a[24]);
+    fft_asmb_sse(4, &a[0], &a[16],&delta32[0], &delta32_3[0]);
+
+    fft_8_sse(&a[32]); fft_4_sse(&a[40]); fft_4_sse(&a[44]);
+    fft_asmb_sse(2, &a[32], &a[40], &delta16[0], &delta16_3[0]);
+
+    fft_8_sse(&a[48]); fft_4_sse(&a[56]); fft_4_sse(&a[60]);
+    fft_asmb_sse(2, &a[48], &a[56], &delta16[0], &delta16_3[0]);
+
+    fft_asmb_sse(8, &a[0], &a[32],&delta64[0], &delta64_3[0]);
+}
+
+void _M( fft_128p ) ( complex_t *a )
+{
+    fft_8_sse(&a[0]); fft_4_sse(&a[8]); fft_4_sse(&a[12]);
+    fft_asmb_sse(2, &a[0], &a[8], &delta16[0], &delta16_3[0]);
+  
+    fft_8_sse(&a[16]), fft_8_sse(&a[24]);
+    fft_asmb_sse(4, &a[0], &a[16],&delta32[0], &delta32_3[0]);
+
+    fft_8_sse(&a[32]); fft_4_sse(&a[40]); fft_4_sse(&a[44]);
+    fft_asmb_sse(2, &a[32], &a[40], &delta16[0], &delta16_3[0]);
+
+    fft_8_sse(&a[48]); fft_4_sse(&a[56]); fft_4_sse(&a[60]);
+    fft_asmb_sse(2, &a[48], &a[56], &delta16[0], &delta16_3[0]);
+
+    fft_asmb_sse(8, &a[0], &a[32],&delta64[0], &delta64_3[0]);
+
+    fft_8_sse(&a[64]); fft_4_sse(&a[72]); fft_4_sse(&a[76]);
+    /* fft_16(&a[64]); */
+    fft_asmb_sse(2, &a[64], &a[72], &delta16[0], &delta16_3[0]);
+
+    fft_8_sse(&a[80]); fft_8_sse(&a[88]);
+  
+    /* fft_32(&a[64]); */
+    fft_asmb_sse(4, &a[64], &a[80],&delta32[0], &delta32_3[0]);
+
+    fft_8_sse(&a[96]); fft_4_sse(&a[104]), fft_4_sse(&a[108]);
+    /* fft_16(&a[96]); */
+    fft_asmb_sse(2, &a[96], &a[104], &delta16[0], &delta16_3[0]);
+
+    fft_8_sse(&a[112]), fft_8_sse(&a[120]);
+    /* fft_32(&a[96]); */
+    fft_asmb_sse(4, &a[96], &a[112], &delta32[0], &delta32_3[0]);
+  
+    /* fft_128(&a[0]); */
+    fft_asmb_sse(16, &a[0], &a[64], &delta128[0], &delta128_3[0]);
+}
+
+void hsqrt2 (void)
+{
+    __asm__ (
+     ".float 0f0.707106781188\n"
+     ".float 0f0.707106781188\n"
+     ".float 0f-0.707106781188\n"
+     ".float 0f-0.707106781188\n"
+     );
+}
+
+void C_1 (void)
+{
+    __asm__ (
+     ".float 0f-1.0\n"
+     ".float 0f1.0\n"
+     ".float 0f-1.0\n"
+     ".float 0f1.0\n"
+     );
+}
+
+static void fft_4_sse (complex_t *x)
+{
+    __asm__ __volatile__ (
+    "movups   (%%eax), %%xmm0\n"    /* x[1] | x[0] */
+    "movups 16(%%eax), %%xmm2\n"    /* x[3] | x[2] */
+    "movups  %%xmm0, %%xmm1\n"        /* x[1] | x[0] */
+    "addps   %%xmm2, %%xmm0\n"        /* x[1] + x[3] | x[0] + x[2] */
+    "subps   %%xmm2, %%xmm1\n"        /* x[1] - x[3] | x[0] - x[2] */
+    "xorps   %%xmm6, %%xmm6\n"
+    "movhlps %%xmm1, %%xmm4\n"        /* ? | x[1] - x[3] */
+    "movhlps %%xmm0, %%xmm3\n"        /* ? | x[1] + x[3] */
+    "subss   %%xmm4, %%xmm6\n"        /* 0 | -(x[1] - x[3]).re */
+    "movlhps %%xmm1, %%xmm0\n"        /* x[0] - x[2] | x[0] + x[2] */
+    "movlhps %%xmm6, %%xmm4\n"        /* 0 | -(x[1] - x[3]).re | (x[1] - x[3]).im | (x[3]-x[1]).re */
+    "movups  %%xmm0, %%xmm2\n"        /* x[0] - x[2] | x[0] + x[2] */
+    "shufps   $0x94, %%xmm4, %%xmm3\n" /* i*(x[1] - x[3]) | x[1] + x[3] */
+    "addps   %%xmm3, %%xmm0\n"
+    "subps   %%xmm3, %%xmm2\n"
+    "movups  %%xmm0,   (%%eax)\n"
+    "movups  %%xmm2, 16(%%eax)\n"
+    : "=a" (x)
+    : "a" (x) );
+}
+
+static void fft_8_sse (complex_t *x)
+{
+    __asm__ __volatile__ (
+    "pushl   %%ebx\n"
+    
+    "movlps   (%%eax), %%xmm0\n"    /* x[0] */
+    "movlps 32(%%eax), %%xmm1\n"    /* x[4] */
+    "movhps 16(%%eax), %%xmm0\n"    /* x[2] | x[0] */
+    "movhps 48(%%eax), %%xmm1\n"    /* x[6] | x[4] */
+    "movups  %%xmm0, %%xmm2\n"        /* x[2] | x[0] */
+    "xorps   %%xmm3, %%xmm3\n"
+    "addps   %%xmm1, %%xmm0\n"        /* x[2] + x[6] | x[0] + x[4] */
+    "subps   %%xmm1, %%xmm2\n"        /* x[2] - x[6] | x[0] - x[4] */
+    "movhlps %%xmm0, %%xmm5\n"         /* x[2] + x[6] */
+    "movhlps %%xmm2, %%xmm4\n"      /* x[2] - x[6] */
+    "movlhps %%xmm2, %%xmm0\n"        /* x[0] - x[4] | x[0] + x[4] */
+    "subss   %%xmm4, %%xmm3\n"        /* (x[2]-x[6]).im | -(x[2]-x[6]).re */
+    "movups  %%xmm0, %%xmm7\n"        /* x[0] - x[4] | x[0] + x[4] */
+    "movups  %%xmm3, %%xmm4\n"        /* (x[2]-x[6]).im | -(x[2]-x[6]).re */
+    "movlps 8(%%eax), %%xmm1\n"        /* x[1] */
+    "shufps   $0x14, %%xmm4, %%xmm5\n" /* i*(x[2] - x[6]) | x[2] + x[6] */
+
+    "addps   %%xmm5, %%xmm0\n"        /* yt = i*(x2-x6)+x0-x4 | x2+x6+x0+x4 */
+    "subps   %%xmm5, %%xmm7\n"        /* yb = i*(x6-x2)+x0-x4 | -x6-x2+x0+x4 */
+
+    "movhps 24(%%eax), %%xmm1\n"    /* x[3] | x[1] */
+    "movl   $hsqrt2, %%ebx\n"
+    "movlps 40(%%eax), %%xmm2\n"    /* x[5] */
+    "movhps 56(%%eax), %%xmm2\n"    /* x[7] | x[5] */
+    "movups  %%xmm1, %%xmm3\n"        /* x[3] | x[1] */
+    "addps   %%xmm2, %%xmm1\n"        /* x[3] + x[7] | x[1] + x[5] */
+    "subps   %%xmm2, %%xmm3\n"        /* x[3] - x[7] | x[1] - x[5] */
+    "movups (%%ebx), %%xmm4\n"        /* -1/sqrt2 | -1/sqrt2 | 1/sqrt2 | 1/sqrt2 */
+    "movups  %%xmm3, %%xmm6\n"        /* x[3] - x[7] | x[1] - x[5] */
+    "mulps   %%xmm4, %%xmm3\n"      /* -1/s2*(x[3] - x[7]) | 1/s2*(x[1] - x[5]) */
+    "shufps   $0xc8, %%xmm4, %%xmm4\n" /* -1/sqrt2 | 1/sqrt2 | -1/sqrt2 | 1/sqrt2 */
+    "shufps   $0xb1, %%xmm6, %%xmm6\n" /* (x3-x7).re|(x3-x7).im|(x1-x5).re|(x1-x5).im */
+    "mulps   %%xmm4, %%xmm6\n"      /* (x7-x3).re/s2|(x3-x7).im/s2|(x5-x1).re/s2|(x1-x5).im/s2 */
+    "addps   %%xmm3, %%xmm6\n"        /* (-1-i)/sqrt2 * (x[3]-x[7]) | (1-i)/sqrt2 * (x[1] - x[5]) */
+    "movhlps %%xmm1, %%xmm5\n"        /* x[3] + x[7] */
+    "movlhps %%xmm6, %%xmm1\n"        /* (1+i)/sqrt2 * (x[1]-x[5]) | x[1]+x[5] */
+    "shufps   $0xe4, %%xmm6, %%xmm5\n"    /* (-1-i)/sqrt2 * (x[3]-x[7]) | x[3]+x[7] */
+    "movups  %%xmm1, %%xmm3\n"        /* (1-i)/sqrt2 * (x[1]-x[5]) | x[1]+x[5] */
+    "movl      $C_1, %%ebx\n"
+    "addps   %%xmm5, %%xmm1\n"        /* u */
+    "subps   %%xmm5, %%xmm3\n"        /* v */
+    "movups  %%xmm0, %%xmm2\n"        /* yb */
+    "movups  %%xmm7, %%xmm4\n"        /* yt */
+    "movups (%%ebx), %%xmm5\n"
+    "mulps   %%xmm5, %%xmm3\n"
+    "addps   %%xmm1, %%xmm0\n"        /* yt + u */
+    "subps   %%xmm1, %%xmm2\n"        /* yt - u */
+    "shufps   $0xb1, %%xmm3, %%xmm3\n" /* -i * v */
+    "movups  %%xmm0, (%%eax)\n"
+    "movups  %%xmm2, 32(%%eax)\n"
+    "addps   %%xmm3, %%xmm4\n"        /* yb - i*v */
+    "subps   %%xmm3, %%xmm7\n"        /* yb + i*v */
+    "movups  %%xmm4, 16(%%eax)\n"
+    "movups  %%xmm7, 48(%%eax)\n"
+
+    "popl    %%ebx\n"
+    : "=a" (x)
+    : "a" (x));
+}
+
+    
+static void fft_asmb_sse (int k, complex_t *x, complex_t *wTB,
+         const complex_t *d, const complex_t *d_3)
+{
+    __asm__ __volatile__ (
+    "pushl %%ebp\n"
+    "movl %%esp, %%ebp\n"
+
+    "subl $4, %%esp\n"
+    
+    "pushl %%eax\n"
+    "pushl %%ebx\n"
+    "pushl %%ecx\n"
+    "pushl %%edx\n"
+    "pushl %%esi\n"
+    "pushl %%edi\n"
+
+    "movl  8(%%ebp), %%ecx\n"   /* k */
+    "movl 12(%%ebp), %%eax\n"   /* x */
+    "movl %%ecx, -4(%%ebp)\n"   /* k */
+    "movl 16(%%ebp), %%ebx\n"   /* wT */
+    "movl 20(%%ebp), %%edx\n"   /* d */
+    "movl 24(%%ebp), %%esi\n"   /* d3 */
+    "shll $4, %%ecx\n"          /* 16k */
+    "addl $8, %%edx\n"
+    "leal (%%eax, %%ecx, 2), %%edi\n"
+    "addl $8, %%esi\n"
+    
+    /* TRANSZERO and TRANS */
+    "movups (%%eax), %%xmm0\n"      /* x[1] | x[0] */
+    "movups (%%ebx), %%xmm1\n"      /* wT[1] | wT[0] */
+    "movups (%%ebx, %%ecx), %%xmm2\n" /* wB[1] | wB[0] */
+    "movlps (%%edx), %%xmm3\n"      /* d */
+    "movlps (%%esi), %%xmm4\n"      /* d3 */
+    "movhlps %%xmm1, %%xmm5\n"      /* wT[1] */
+    "movhlps %%xmm2, %%xmm6\n"      /* wB[1] */
+    "shufps $0x50, %%xmm3, %%xmm3\n" /* d[1].im | d[1].im | d[1].re | d[1].re */
+    "shufps $0x50, %%xmm4, %%xmm4\n" /* d3[1].im | d3[1].im | d3[i].re | d3[i].re */
+    "movlhps %%xmm5, %%xmm5\n"      /* wT[1] | wT[1] */
+    "movlhps %%xmm6, %%xmm6\n"      /* wB[1] | wB[1] */
+    "mulps   %%xmm3, %%xmm5\n"
+    "mulps   %%xmm4, %%xmm6\n"
+    "movhlps %%xmm5, %%xmm7\n"      /* wT[1].im * d[1].im | wT[1].re * d[1].im */
+    "movlhps %%xmm6, %%xmm5\n"      /* wB[1].im * d3[1].re | wB[1].re * d3[1].re | wT[1].im * d[1].re | wT[1].re * d[1].re */
+    "shufps $0xb1, %%xmm6, %%xmm7\n" /* wB[1].re * d3[1].im | wB[i].im * d3[1].im | wT[1].re * d[1].im | wT[1].im * d[1].im */
+    "movl $C_1, %%edi\n"
+    "movups (%%edi), %%xmm4\n"
+    "mulps   %%xmm4, %%xmm7\n"
+    "addps   %%xmm7, %%xmm5\n"      /* wB[1] * d3[1] | wT[1] * d[1] */
+    "movlhps %%xmm5, %%xmm1\n"      /* d[1] * wT[1] | wT[0] */
+    "shufps  $0xe4, %%xmm5, %%xmm2\n" /* d3[1] * wB[1] | wB[0] */
+    "movups  %%xmm1, %%xmm3\n"      /* d[1] * wT[1] | wT[0] */
+    "leal   (%%eax, %%ecx, 2), %%edi\n"
+    "addps  %%xmm2, %%xmm1\n"       /* u */
+    "subps  %%xmm2, %%xmm3\n"       /* v */
+    "mulps  %%xmm4, %%xmm3\n"
+    "movups (%%eax, %%ecx), %%xmm5\n" /* xk[1] | xk[0] */
+    "shufps $0xb1, %%xmm3, %%xmm3\n"  /* -i * v */
+    "movups %%xmm0, %%xmm2\n"         /* x[1] | x[0] */
+    "movups %%xmm5, %%xmm6\n"         /* xk[1] | xk[0] */
+    "addps  %%xmm1, %%xmm0\n"
+    "subps  %%xmm1, %%xmm2\n"
+    "addps  %%xmm3, %%xmm5\n"
+    "subps  %%xmm3, %%xmm6\n"
+    "movups %%xmm0, (%%eax)\n"
+    "movups %%xmm2, (%%edi)\n"
+    "movups %%xmm5, (%%eax, %%ecx)\n"
+    "movups %%xmm6, (%%edi, %%ecx)\n"
+    "addl $16, %%eax\n"
+    "addl $16, %%ebx\n"
+    "addl  $8, %%edx\n"
+    "addl  $8, %%esi\n"
+    "decl -4(%%ebp)\n"
+
+".loop:\n"
+    "movups (%%ebx), %%xmm0\n"      /* wT[1] | wT[0] */
+    "movups (%%edx), %%xmm1\n"      /* d[1] | d[0] */
+
+    "movups (%%ebx, %%ecx), %%xmm4\n" /* wB[1] | wB[0] */
+    "movups (%%esi), %%xmm5\n"      /* d3[1] | d3[0] */
+
+    "movhlps %%xmm0, %%xmm2\n"      /* wT[1] */
+    "movhlps %%xmm1, %%xmm3\n"      /* d[1] */
+
+    "movhlps %%xmm4, %%xmm6\n"      /* wB[1] */
+    "movhlps %%xmm5, %%xmm7\n"      /* d3[1] */
+
+    "shufps $0x50, %%xmm1, %%xmm1\n" /* d[0].im | d[0].im | d[0].re | d[0].re */
+    "shufps $0x50, %%xmm3, %%xmm3\n" /* d[1].im | d[1].im | d[1].re | d[1].re */
+
+    "movlhps %%xmm0, %%xmm0\n"       /* wT[0] | wT[0] */
+    "shufps $0x50, %%xmm5, %%xmm5\n" /* d3[0].im | d3[0].im | d3[0].re | d3[0].re */
+    "movlhps %%xmm2, %%xmm2\n"       /* wT[1] | wT[1] */
+    "shufps $0x50, %%xmm7, %%xmm7\n" /* d3[1].im | d3[1].im | d3[1].re | d3[1].re */
+
+    "mulps   %%xmm1, %%xmm0\n"  /* d[0].im * wT[0].im | d[0].im * wT[0].re | d[0].re * wT[0].im | d[0].re * wT[0].re */
+    "mulps   %%xmm3, %%xmm2\n"  /* d[1].im * wT[1].im | d[1].im * wT[1].re | d[1].re * wT[1].im | d[1].re * wT[1].re */
+    "movlhps %%xmm4, %%xmm4\n"  /* wB[0] | wB[0] */
+    "movlhps %%xmm6, %%xmm6\n"  /* wB[1] | wB[1] */
+    
+    "movhlps %%xmm0, %%xmm1\n"  /* d[0].im * wT[0].im | d[0].im * wT[0].re */
+    "movlhps %%xmm2, %%xmm0\n"  /* d[1].re * wT[1].im | d[1].re * wT[1].re | d[0].re * wT[0].im | d[0].re * wT[0].re */
+    "mulps   %%xmm5, %%xmm4\n"  /* wB[0].im * d3[0].im | wB[0].re * d3[0].im | wB[0].im * d3[0].re | wB[0].re * d3[0].re */
+    "mulps   %%xmm7, %%xmm6\n"  /* wB[1].im * d3[1].im | wB[1].re * d3[1].im | wB[1].im * d3[1].re | wB[1].re * d3[1].re */
+    "shufps $0xb1, %%xmm2, %%xmm1\n"    /* d[1].im * wT[1].re | d[1].im * wT[1].im | d[0].im * wT[0].re | d[0].im * wT[0].im */
+    "movl $C_1, %%edi\n"
+    "movups (%%edi), %%xmm3\n"  /* 1.0 | -1.0 | 1.0 | -1.0 */
+
+    "movhlps %%xmm4, %%xmm5\n"  /* wB[0].im * d3[0].im | wB[0].re * d3[0].im */
+    "mulps   %%xmm3, %%xmm1\n"  /* d[1].im * wT[1].re | -d[1].im * wT[1].im | d[0].im * wT[0].re | -d[0].im * wT[0].im */
+    "movlhps %%xmm6, %%xmm4\n"  /* wB[1].im * d3[1].re | wB[1].re * d3[1].re | wB[0].im * d3[0].re | wB[0].im * d3[0].re */
+    "addps   %%xmm1, %%xmm0\n"  /* wT[1] * d[1] | wT[0] * d[0] */
+
+    "shufps $0xb1, %%xmm6, %%xmm5\n"    /* wB[1].re * d3[1].im | wB[1].im * d3[1].im | wB[0].re * d3[0].im | wB[0].im * d3[0].im */
+    "mulps   %%xmm3, %%xmm5\n"  /* wB[1].re * d3[1].im | -wB[1].im * d3[1].im | wB[0].re * d3[0].im | -wB[0].im * d3[0].im */
+    "addps   %%xmm5, %%xmm4\n"  /* wB[1] * d3[1] | wB[0] * d3[0] */
+
+    "movups %%xmm0, %%xmm1\n"   /* wT[1] * d[1] | wT[0] * d[0] */
+    "addps  %%xmm4, %%xmm0\n"   /* u */
+    "subps  %%xmm4, %%xmm1\n"   /* v */
+    "movups (%%eax), %%xmm6\n"  /* x[1] | x[0] */
+    "leal   (%%eax, %%ecx, 2), %%edi\n"
+    "mulps  %%xmm3, %%xmm1\n"
+    "addl $16, %%ebx\n"
+    "addl $16, %%esi\n"
+    "shufps $0xb1, %%xmm1, %%xmm1\n"    /* -i * v */
+    "movups (%%eax, %%ecx), %%xmm7\n"   /* xk[1] | xk[0] */
+    "movups %%xmm6, %%xmm2\n"
+    "movups %%xmm7, %%xmm4\n"
+    "addps  %%xmm0, %%xmm6\n"
+    "subps  %%xmm0, %%xmm2\n"
+    "movups %%xmm6, (%%eax)\n"
+    "movups %%xmm2, (%%edi)\n"
+    "addps  %%xmm1, %%xmm7\n"
+    "subps  %%xmm1, %%xmm4\n"
+    "addl $16, %%edx\n"
+    "movups %%xmm7, (%%eax, %%ecx)\n"
+    "movups %%xmm4, (%%edi, %%ecx)\n"
+
+    "addl $16, %%eax\n"
+    "decl -4(%%ebp)\n"
+    "jnz .loop\n"
+
+".end:\n"
+    "popl %%edi\n"
+    "popl %%esi\n"
+    "popl %%edx\n"
+    "popl %%ecx\n"
+    "popl %%ebx\n"
+    "popl %%eax\n"
+    
+    "addl $4, %%esp\n"
+
+    "leave\n"
+    ::);
+}
+
diff --git a/plugins/imdct/imdct.c b/plugins/imdct/imdct.c
new file mode 100644
index 0000000000..57424a0915
--- /dev/null
+++ b/plugins/imdct/imdct.c
@@ -0,0 +1,147 @@
+/*****************************************************************************
+ * imdct.c : IMDCT module
+ *****************************************************************************
+ * Copyright (C) 1999, 2000 VideoLAN
+ * $Id: imdct.c,v 1.1 2001/05/15 16:19:42 sam Exp $
+ *
+ * Authors: Gaël Hendryckx <jimmy@via.ecp.fr>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
+ *****************************************************************************/
+
+#define MODULE_NAME imdct
+#include "modules_inner.h"
+
+/*****************************************************************************
+ * Preamble
+ *****************************************************************************/
+#include "defs.h"
+
+#include <stdlib.h>
+
+#include "config.h"
+#include "common.h"
+#include "threads.h"
+#include "mtime.h"
+#include "tests.h"
+
+#include "ac3_imdct.h"
+#include "ac3_imdct_common.h"
+
+#include "modules.h"
+
+/*****************************************************************************
+ * Local and extern prototypes.
+ *****************************************************************************/
+static void imdct_getfunctions( function_list_t * p_function_list );
+static int  imdct_Probe       ( probedata_t *p_data );
+
+/*****************************************************************************
+ * Build configuration tree.
+ *****************************************************************************/
+MODULE_CONFIG_START
+ADD_WINDOW( "Configuration for IMDCT module" )
+    ADD_COMMENT( "Ha, ha -- nothing to configure yet" )
+MODULE_CONFIG_END
+
+/*****************************************************************************
+ * InitModule: get the module structure and configuration.
+ *****************************************************************************
+ * We have to fill psz_name, psz_longname and psz_version. These variables
+ * will be strdup()ed later by the main application because the module can
+ * be unloaded later to save memory, and we want to be able to access this
+ * data even after the module has been unloaded.
+ *****************************************************************************/
+MODULE_INIT
+{
+    p_module->psz_name = MODULE_STRING;
+    p_module->psz_longname = "AC3 IMDCT module";
+    p_module->psz_version = VERSION;
+
+    p_module->i_capabilities = MODULE_CAPABILITY_NULL
+                                | MODULE_CAPABILITY_IMDCT;
+
+    return( 0 );
+}
+
+/*****************************************************************************
+ * ActivateModule: set the module to an usable state.
+ *****************************************************************************
+ * This function fills the capability functions and the configuration
+ * structure. Once ActivateModule() has been called, the i_usage can
+ * be set to 0 and calls to NeedModule() be made to increment it. To unload
+ * the module, one has to wait until i_usage == 0 and call DeactivateModule().
+ *****************************************************************************/
+MODULE_ACTIVATE
+{
+    p_module->p_functions = malloc( sizeof( module_functions_t ) );
+    if( p_module->p_functions == NULL )
+    {
+        return( -1 );
+    }
+
+    imdct_getfunctions( &p_module->p_functions->imdct );
+
+    p_module->p_config = p_config;
+
+    return( 0 );
+}
+
+/*****************************************************************************
+ * DeactivateModule: make sure the module can be unloaded.
+ *****************************************************************************
+ * This function must only be called when i_usage == 0. If it successfully
+ * returns, i_usage can be set to -1 and the module unloaded. Be careful to
+ * lock usage_lock during the whole process.
+ *****************************************************************************/
+MODULE_DEACTIVATE
+{
+    free( p_module->p_functions );
+
+    return( 0 );
+}
+
+/* Following functions are local */
+
+/*****************************************************************************
+ * Functions exported as capabilities. They are declared as static so that
+ * we don't pollute the namespace too much.
+ *****************************************************************************/
+static void imdct_getfunctions( function_list_t * p_function_list )
+{
+    p_function_list->pf_probe = imdct_Probe;
+#define F p_function_list->functions.imdct
+    F.pf_imdct_init    = _M( imdct_init );
+    F.pf_imdct_256     = _M( imdct_do_256 );
+    F.pf_imdct_256_nol = _M( imdct_do_256_nol );
+    F.pf_imdct_512     = _M( imdct_do_512 );
+    F.pf_imdct_512_nol = _M( imdct_do_512_nol );
+#undef F
+}
+
+/*****************************************************************************
+ * imdct_Probe: returns a preference score
+ *****************************************************************************/
+static int imdct_Probe( probedata_t *p_data )
+{
+    if( TestMethod( IMDCT_METHOD_VAR, "imdct" ) )
+    {
+        return( 999 );
+    }
+
+    /* This plugin always works */
+    return( 50 );
+}
+
diff --git a/plugins/imdct/imdctsse.c b/plugins/imdct/imdctsse.c
new file mode 100644
index 0000000000..9371a0a605
--- /dev/null
+++ b/plugins/imdct/imdctsse.c
@@ -0,0 +1,152 @@
+/*****************************************************************************
+ * imdctsse.c : accelerated SSE IMDCT module
+ *****************************************************************************
+ * Copyright (C) 1999, 2000 VideoLAN
+ * $Id: imdctsse.c,v 1.1 2001/05/15 16:19:42 sam Exp $
+ *
+ * Authors: Gaël Hendryckx <jimmy@via.ecp.fr>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
+ *****************************************************************************/
+
+#define MODULE_NAME imdctsse
+#include "modules_inner.h"
+
+/*****************************************************************************
+ * Preamble
+ *****************************************************************************/
+#include "defs.h"
+
+#include <stdlib.h>
+
+#include "config.h"
+#include "common.h"
+#include "threads.h"
+#include "mtime.h"
+#include "tests.h"
+
+#include "ac3_imdct.h"
+#include "ac3_imdct_common.h"
+
+#include "modules.h"
+
+/*****************************************************************************
+ * Local and extern prototypes.
+ *****************************************************************************/
+static void imdct_getfunctions( function_list_t * p_function_list );
+static int  imdct_Probe       ( probedata_t *p_data );
+
+/*****************************************************************************
+ * Build configuration tree.
+ *****************************************************************************/
+MODULE_CONFIG_START
+ADD_WINDOW( "Configuration for IMDCT module" )
+    ADD_COMMENT( "Ha, ha -- nothing to configure yet" )
+MODULE_CONFIG_END
+
+/*****************************************************************************
+ * InitModule: get the module structure and configuration.
+ *****************************************************************************
+ * We have to fill psz_name, psz_longname and psz_version. These variables
+ * will be strdup()ed later by the main application because the module can
+ * be unloaded later to save memory, and we want to be able to access this
+ * data even after the module has been unloaded.
+ *****************************************************************************/
+MODULE_INIT
+{
+    p_module->psz_name = MODULE_STRING;
+    p_module->psz_longname = "AC3 IMDCT module";
+    p_module->psz_version = VERSION;
+
+    p_module->i_capabilities = MODULE_CAPABILITY_NULL
+                                | MODULE_CAPABILITY_IMDCT;
+
+    return( 0 );
+}
+
+/*****************************************************************************
+ * ActivateModule: set the module to an usable state.
+ *****************************************************************************
+ * This function fills the capability functions and the configuration
+ * structure. Once ActivateModule() has been called, the i_usage can
+ * be set to 0 and calls to NeedModule() be made to increment it. To unload
+ * the module, one has to wait until i_usage == 0 and call DeactivateModule().
+ *****************************************************************************/
+MODULE_ACTIVATE
+{
+    p_module->p_functions = malloc( sizeof( module_functions_t ) );
+    if( p_module->p_functions == NULL )
+    {
+        return( -1 );
+    }
+
+    imdct_getfunctions( &p_module->p_functions->imdct );
+
+    p_module->p_config = p_config;
+
+    return( 0 );
+}
+
+/*****************************************************************************
+ * DeactivateModule: make sure the module can be unloaded.
+ *****************************************************************************
+ * This function must only be called when i_usage == 0. If it successfully
+ * returns, i_usage can be set to -1 and the module unloaded. Be careful to
+ * lock usage_lock during the whole process.
+ *****************************************************************************/
+MODULE_DEACTIVATE
+{
+    free( p_module->p_functions );
+
+    return( 0 );
+}
+
+/* Following functions are local */
+
+/*****************************************************************************
+ * Functions exported as capabilities. They are declared as static so that
+ * we don't pollute the namespace too much.
+ *****************************************************************************/
+static void imdct_getfunctions( function_list_t * p_function_list )
+{
+    p_function_list->pf_probe = imdct_Probe;
+#define F p_function_list->functions.imdct
+    F.pf_imdct_init    = _M( imdct_init );
+    F.pf_imdct_256     = _M( imdct_do_256 );
+    F.pf_imdct_256_nol = _M( imdct_do_256_nol );
+    F.pf_imdct_512     = _M( imdct_do_512 );
+    F.pf_imdct_512_nol = _M( imdct_do_512_nol );
+#undef F
+}
+
+/*****************************************************************************
+ * imdct_Probe: returns a preference score
+ *****************************************************************************/
+static int imdct_Probe( probedata_t *p_data )
+{
+    if( !TestCPU( CPU_CAPABILITY_SSE ) )
+    {
+        return( 0 );
+    }
+
+    if( TestMethod( IDCT_METHOD_VAR, "imdctsse" ) )
+    {
+        return( 999 );
+    }
+
+    /* This plugin always works */
+    return( 200 );
+}
+
diff --git a/plugins/motion/motionmmx.c b/plugins/motion/motionmmx.c
index 96177f87de..724c2898e6 100644
--- a/plugins/motion/motionmmx.c
+++ b/plugins/motion/motionmmx.c
@@ -2,7 +2,7 @@
  * motionmmx.c : MMX motion compensation module for vlc
  *****************************************************************************
  * Copyright (C) 2000 VideoLAN
- * $Id: motionmmx.c,v 1.4 2001/04/15 04:19:57 sam Exp $
+ * $Id: motionmmx.c,v 1.5 2001/05/15 16:19:42 sam Exp $
  *
  * Authors: Christophe Massiot <massiot@via.ecp.fr>
  *
@@ -116,20 +116,16 @@ MODULE_DEACTIVATE
  *****************************************************************************/
 int _M( motion_Probe )( probedata_t *p_data )
 {
-    if( TestCPU( CPU_CAPABILITY_MMX ) )
+    if( !TestCPU( CPU_CAPABILITY_MMX ) )
     {
-        if( TestMethod( MOTION_METHOD_VAR, "motionmmx" ) )
-        {
-            return( 999 );
-        }
-        else
-        {
-            return( 150 );
-        }
+        return( 0 );
     }
-    else
+
+    if( TestMethod( MOTION_METHOD_VAR, "motionmmx" ) )
     {
-        return( 0 );
+        return( 999 );
     }
+
+    return( 150 );
 }
 
diff --git a/plugins/motion/motionmmxext.c b/plugins/motion/motionmmxext.c
index e2658ff8fa..702d5533dd 100644
--- a/plugins/motion/motionmmxext.c
+++ b/plugins/motion/motionmmxext.c
@@ -2,7 +2,7 @@
  * motionmmxext.c : MMX EXT motion compensation module for vlc
  *****************************************************************************
  * Copyright (C) 2000 VideoLAN
- * $Id: motionmmxext.c,v 1.4 2001/04/15 04:19:57 sam Exp $
+ * $Id: motionmmxext.c,v 1.5 2001/05/15 16:19:42 sam Exp $
  *
  * Authors: Christophe Massiot <massiot@via.ecp.fr>
  *
@@ -116,20 +116,16 @@ MODULE_DEACTIVATE
  *****************************************************************************/
 int _M( motion_Probe )( probedata_t *p_data )
 {
-    if( TestCPU( CPU_CAPABILITY_MMXEXT ) )
+    if( !TestCPU( CPU_CAPABILITY_MMXEXT ) )
     {
-        if( TestMethod( MOTION_METHOD_VAR, "motionmmxext" ) )
-        {
-            return( 999 );
-        }
-        else
-        {
-            return( 200 );
-        }
+        return( 0 );
     }
-    else
+
+    if( TestMethod( MOTION_METHOD_VAR, "motionmmxext" ) )
     {
-        return( 0 );
+        return( 999 );
     }
+
+    return( 200 );
 }
 
diff --git a/plugins/yuv/video_yuvmmx.c b/plugins/yuv/video_yuvmmx.c
index ea468c1982..782c3e361c 100644
--- a/plugins/yuv/video_yuvmmx.c
+++ b/plugins/yuv/video_yuvmmx.c
@@ -3,7 +3,7 @@
  * Provides functions to perform the YUV conversion.
  *****************************************************************************
  * Copyright (C) 1999, 2000 VideoLAN
- * $Id: video_yuvmmx.c,v 1.8 2001/04/15 04:19:58 sam Exp $
+ * $Id: video_yuvmmx.c,v 1.9 2001/05/15 16:19:42 sam Exp $
  *
  * Authors: Samuel Hocevar <sam@zoy.org>
  *
@@ -79,21 +79,17 @@ void _M( yuv_getfunctions )( function_list_t * p_function_list )
 static int yuv_Probe( probedata_t *p_data )
 {
     /* Test for MMX support in the CPU */
-    if( TestCPU( CPU_CAPABILITY_MMX ) )
+    if( !TestCPU( CPU_CAPABILITY_MMX ) )
     {
-        if( TestMethod( YUV_METHOD_VAR, "yuvmmx" ) )
-        {
-            return( 999 );
-        }
-        else
-        {
-            return( 100 );
-        }
+        return( 0 );
     }
-    else
+
+    if( TestMethod( YUV_METHOD_VAR, "yuvmmx" ) )
     {
-        return( 0 );
+        return( 999 );
     }
+
+    return( 100 );
 }
 
 /*****************************************************************************
diff --git a/src/ac3_decoder/ac3_bit_allocate.c b/src/ac3_decoder/ac3_bit_allocate.c
index bd59e691f6..2e1db36ab7 100644
--- a/src/ac3_decoder/ac3_bit_allocate.c
+++ b/src/ac3_decoder/ac3_bit_allocate.c
@@ -2,7 +2,7 @@
  * ac3_bit_allocate.c: ac3 allocation tables
  *****************************************************************************
  * Copyright (C) 2000 VideoLAN
- * $Id: ac3_bit_allocate.c,v 1.21 2001/05/14 15:58:03 reno Exp $
+ * $Id: ac3_bit_allocate.c,v 1.22 2001/05/15 16:19:42 sam Exp $
  *
  * Authors: Michel Kaempf <maxx@via.ecp.fr>
  *          Aaron Holtzman <aholtzma@engr.uvic.ca>
@@ -22,6 +22,10 @@
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
  *****************************************************************************/
+
+/*****************************************************************************
+ * Preamble
+ *****************************************************************************/
 #include "defs.h"
 
 #include <string.h>                                              /* memcpy() */
@@ -31,12 +35,13 @@
 #include "threads.h"
 #include "mtime.h"
 
-#include "intf_msg.h"                        /* intf_DbgMsg(), intf_ErrMsg() */
-
 #include "stream_control.h"
 #include "input_ext-dec.h"
 
+#include "ac3_imdct.h"
+#include "ac3_downmix.h"
 #include "ac3_decoder.h"
+
 #include "ac3_internal.h"                                 /* DELTA_BIT_REUSE */
 
 
diff --git a/src/ac3_decoder/ac3_decoder.c b/src/ac3_decoder/ac3_decoder.c
index 5e3d17c1fb..c0bb86dfb4 100644
--- a/src/ac3_decoder/ac3_decoder.c
+++ b/src/ac3_decoder/ac3_decoder.c
@@ -2,7 +2,7 @@
  * ac3_decoder.c: core ac3 decoder
  *****************************************************************************
  * Copyright (C) 1999, 2000 VideoLAN
- * $Id: ac3_decoder.c,v 1.33 2001/05/14 15:58:03 reno Exp $
+ * $Id: ac3_decoder.c,v 1.34 2001/05/15 16:19:42 sam Exp $
  *
  * Authors: Michel Kaempf <maxx@via.ecp.fr>
  *          Michel Lespinasse <walken@zoy.org>
@@ -23,6 +23,9 @@
  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
  *****************************************************************************/
 
+/*****************************************************************************
+ * Preamble
+ *****************************************************************************/
 #include "defs.h"
 
 #include <string.h>                                              /* memcpy() */
@@ -39,8 +42,11 @@
 
 #include "audio_output.h"
 
+#include "ac3_imdct.h"
+#include "ac3_downmix.h"
 #include "ac3_decoder.h"
 #include "ac3_decoder_thread.h"                           /* ac3dec_thread_t */
+
 #include "ac3_internal.h"
 
 static const float cmixlev_lut[4] = { 0.707, 0.595, 0.500, 0.707 };
@@ -50,7 +56,6 @@ int ac3_init (ac3dec_t * p_ac3dec)
 {
     p_ac3dec->mantissa.lfsr_state = 1;          /* dither_gen initialization */
     imdct_init(&p_ac3dec->imdct);
-    downmix_init(&p_ac3dec->downmix);
     
     return 0;
 }
@@ -58,7 +63,7 @@ int ac3_init (ac3dec_t * p_ac3dec)
 int ac3_decode_frame (ac3dec_t * p_ac3dec, s16 * buffer)
 {
     int i;
-    ac3dec_thread_t * p_ac3dec_t = (ac3dec_thread_t *) p_ac3dec->bit_stream.p_callback_arg;
+    ac3dec_thread_t * p_ac3thread = (ac3dec_thread_t *) p_ac3dec->bit_stream.p_callback_arg;
     
     if (parse_bsi (p_ac3dec))
     {
@@ -67,20 +72,20 @@ int ac3_decode_frame (ac3dec_t * p_ac3dec, s16 * buffer)
         return 1;
     }
     
-	/* compute downmix parameters
-	 * downmix to tow channels for now */
-	p_ac3dec->dm_par.clev = 0.0;
+    /* compute downmix parameters
+     * downmix to tow channels for now */
+    p_ac3dec->dm_par.clev = 0.0;
     p_ac3dec->dm_par.slev = 0.0; 
     p_ac3dec->dm_par.unit = 1.0;
-	if (p_ac3dec->bsi.acmod & 0x1)	/* have center */
-	    p_ac3dec->dm_par.clev = cmixlev_lut[p_ac3dec->bsi.cmixlev];
+    if (p_ac3dec->bsi.acmod & 0x1)    /* have center */
+        p_ac3dec->dm_par.clev = cmixlev_lut[p_ac3dec->bsi.cmixlev];
 
-	if (p_ac3dec->bsi.acmod & 0x4)	/* have surround channels */
-		p_ac3dec->dm_par.slev = smixlev_lut[p_ac3dec->bsi.surmixlev];
+    if (p_ac3dec->bsi.acmod & 0x4)    /* have surround channels */
+        p_ac3dec->dm_par.slev = smixlev_lut[p_ac3dec->bsi.surmixlev];
 
     p_ac3dec->dm_par.unit /= 1.0 + p_ac3dec->dm_par.clev + p_ac3dec->dm_par.slev;
-	p_ac3dec->dm_par.clev *= p_ac3dec->dm_par.unit;
-	p_ac3dec->dm_par.slev *= p_ac3dec->dm_par.unit;
+    p_ac3dec->dm_par.clev *= p_ac3dec->dm_par.unit;
+    p_ac3dec->dm_par.slev *= p_ac3dec->dm_par.unit;
 
     for (i = 0; i < 6; i++) {
         /* Initialize freq/time sample storage */
@@ -88,45 +93,50 @@ int ac3_decode_frame (ac3dec_t * p_ac3dec, s16 * buffer)
                 (p_ac3dec->bsi.nfchans + p_ac3dec->bsi.lfeon));
 
 
-        if ((p_ac3dec_t->p_fifo->b_die) && (p_ac3dec_t->p_fifo->b_error))
+        if( p_ac3thread->p_fifo->b_die || p_ac3thread->p_fifo->b_error )
         {        
             return 1;
         }
  
-        if (parse_audblk (p_ac3dec, i))
+        if( parse_audblk( p_ac3dec, i ) )
         {
-            intf_WarnMsg (3,"ac3dec warn: error during audioblock");
-            parse_auxdata (p_ac3dec);
+            intf_WarnMsg( 3, "ac3dec warning: error during audioblock" );
+            parse_auxdata( p_ac3dec );
             return 1;
         }
 
-        if ((p_ac3dec_t->p_fifo->b_die) && (p_ac3dec_t->p_fifo->b_error))
+        if( p_ac3thread->p_fifo->b_die || p_ac3thread->p_fifo->b_error )
         {        
             return 1;
         }
 
-        if (exponent_unpack (p_ac3dec))
+        if( exponent_unpack( p_ac3dec ) )
         {
-            intf_WarnMsg (3,"ac3dec warn: error during unpack");
-            parse_auxdata (p_ac3dec);
+            intf_WarnMsg( 3, "ac3dec warning: error during unpack" );
+            parse_auxdata( p_ac3dec );
             return 1;
         }
+
         bit_allocate (p_ac3dec);
         mantissa_unpack (p_ac3dec);
 
-        if ((p_ac3dec_t->p_fifo->b_die) && (p_ac3dec_t->p_fifo->b_error))
+        if( p_ac3thread->p_fifo->b_die || p_ac3thread->p_fifo->b_error )
         {        
             return 1;
         }
         
         if  (p_ac3dec->bsi.acmod == 0x2)
+        {
             rematrix (p_ac3dec);
+        }
+
         imdct (p_ac3dec, buffer);
 
-        buffer += 2*256;
+        buffer += 2 * 256;
     }
 
     parse_auxdata (p_ac3dec);
 
     return 0;
 }
+
diff --git a/src/ac3_decoder/ac3_decoder.h b/src/ac3_decoder/ac3_decoder.h
index 4de1c435f9..9237d12652 100644
--- a/src/ac3_decoder/ac3_decoder.h
+++ b/src/ac3_decoder/ac3_decoder.h
@@ -2,7 +2,7 @@
  * ac3_decoder.h : ac3 decoder interface
  *****************************************************************************
  * Copyright (C) 1999, 2000 VideoLAN
- * $Id: ac3_decoder.h,v 1.8 2001/05/14 15:58:03 reno Exp $
+ * $Id: ac3_decoder.h,v 1.9 2001/05/15 16:19:42 sam Exp $
  *
  * Authors: Michel Kaempf <maxx@via.ecp.fr>
  *          Renaud Dartus <reno@videolan.org>
@@ -352,63 +352,6 @@ typedef struct mantissa_s
     u16 lfsr_state;
 } mantissa_t;
 
-typedef struct complex_s {
-    float real;
-    float imag;
-} complex_t;
-
-#define N 512
-
-typedef struct imdct_s
-{
-    complex_t buf[N/4];
-
-    /* Delay buffer for time domain interleaving */
-    float delay[6][256];
-    float delay1[6][256];
-
-    /* Twiddle factors for IMDCT */
-    float xcos1[N/4];
-    float xsin1[N/4];
-    float xcos2[N/8];
-    float xsin2[N/8];
-   
-    /* Twiddle factor LUT */
-    complex_t *w[7];
-    complex_t w_1[1];
-    complex_t w_2[2];
-    complex_t w_4[4];
-    complex_t w_8[8];
-    complex_t w_16[16];
-    complex_t w_32[32];
-    complex_t w_64[64];
-
-    float xcos_sin_sse[128 * 4] __attribute__((aligned(16)));
-    
-    /* Functions */
-    void (*fft_64p) (complex_t *a);
-    
-    void (*imdct_do_512)(struct imdct_s * p_imdct, float data[], float delay[]);
-    void (*imdct_do_512_nol)(struct imdct_s * p_imdct, float data[], float delay[]);
-
-} imdct_t;
-
-typedef struct dm_par_s {
-    float unit;
-    float clev;
-    float slev;
-} dm_par_t;
-
-typedef struct downmix_s {
-    void (*downmix_3f_2r_to_2ch)(float *samples, dm_par_t * dm_par);
-    void (*downmix_3f_1r_to_2ch)(float *samples, dm_par_t * dm_par);
-    void (*downmix_2f_2r_to_2ch)(float *samples, dm_par_t * dm_par);
-    void (*downmix_2f_1r_to_2ch)(float *samples, dm_par_t * dm_par);
-    void (*downmix_3f_0r_to_2ch)(float *samples, dm_par_t * dm_par);
-    void (*stream_sample_2ch_to_s16)(s16 *s16_samples, float *left, float *right);
-    void (*stream_sample_1ch_to_s16)(s16 *s16_samples, float *center);
-} downmix_t;
-
 struct ac3dec_s
 {
     /*
@@ -436,3 +379,4 @@ struct ac3dec_s
     downmix_t           downmix;
 
 };
+
diff --git a/src/ac3_decoder/ac3_decoder_thread.c b/src/ac3_decoder/ac3_decoder_thread.c
index 6cac3615ac..17e86ac2bc 100644
--- a/src/ac3_decoder/ac3_decoder_thread.c
+++ b/src/ac3_decoder/ac3_decoder_thread.c
@@ -2,7 +2,7 @@
  * ac3_decoder_thread.c: ac3 decoder thread
  *****************************************************************************
  * Copyright (C) 1999, 2000 VideoLAN
- * $Id: ac3_decoder_thread.c,v 1.32 2001/05/06 04:32:02 sam Exp $
+ * $Id: ac3_decoder_thread.c,v 1.33 2001/05/15 16:19:42 sam Exp $
  *
  * Authors: Michel Lespinasse <walken@zoy.org>
  *
@@ -37,7 +37,6 @@
 
 #include <unistd.h>                                              /* getpid() */
 
-#include <stdio.h>                                           /* "intf_msg.h" */
 #include <stdlib.h>                                      /* malloc(), free() */
 #include <string.h>                                              /* memset() */
 
@@ -45,6 +44,7 @@
 #include "common.h"
 #include "threads.h"
 #include "mtime.h"
+#include "modules.h"
 
 #include "intf_msg.h"                        /* intf_DbgMsg(), intf_ErrMsg() */
 
@@ -53,6 +53,8 @@
 
 #include "audio_output.h"
 
+#include "ac3_imdct.h"
+#include "ac3_downmix.h"
 #include "ac3_decoder.h"
 #include "ac3_decoder_thread.h"
 
@@ -68,18 +70,17 @@ static void     EndThread               (ac3dec_thread_t * p_adec);
 static void     BitstreamCallback       ( bit_stream_t *p_bit_stream,
                                               boolean_t b_new_pes );
 
-
 /*****************************************************************************
  * ac3dec_CreateThread: creates an ac3 decoder thread
  *****************************************************************************/
 vlc_thread_t ac3dec_CreateThread( adec_config_t * p_config )
 {
-    ac3dec_thread_t *   p_ac3dec_t;
+    ac3dec_thread_t *   p_ac3thread;
 
     intf_DbgMsg( "ac3dec debug: creating ac3 decoder thread" );
 
     /* Allocate the memory needed to store the thread's structure */
-    if((p_ac3dec_t = (ac3dec_thread_t *)malloc(sizeof(ac3dec_thread_t)))==NULL)
+    if((p_ac3thread = (ac3dec_thread_t *)malloc(sizeof(ac3dec_thread_t)))==NULL)
     {
         intf_ErrMsg ( "ac3dec error: not enough memory "
                       "for ac3dec_CreateThread() to create the new thread");
@@ -89,28 +90,77 @@ vlc_thread_t ac3dec_CreateThread( adec_config_t * p_config )
     /*
      * Initialize the thread properties
      */
-    p_ac3dec_t->p_config = p_config;
-    p_ac3dec_t->p_fifo = p_config->decoder_config.p_decoder_fifo;
+    p_ac3thread->p_config = p_config;
+    p_ac3thread->p_fifo = p_config->decoder_config.p_decoder_fifo;
+
+    /*
+     * Choose the best downmix module
+     */
+#define DOWNMIX p_ac3thread->ac3_decoder.downmix
+    DOWNMIX.p_module = module_Need( MODULE_CAPABILITY_DOWNMIX, NULL );
+
+    if( DOWNMIX.p_module == NULL )
+    {
+        intf_ErrMsg( "ac3dec error: no suitable downmix module" );
+        free( p_ac3thread );
+        return( 0 );
+    }
+
+#define F DOWNMIX.p_module->p_functions->downmix.functions.downmix
+    DOWNMIX.pf_downmix_3f_2r_to_2ch     = F.pf_downmix_3f_2r_to_2ch;
+    DOWNMIX.pf_downmix_2f_2r_to_2ch     = F.pf_downmix_2f_2r_to_2ch;
+    DOWNMIX.pf_downmix_3f_1r_to_2ch     = F.pf_downmix_3f_1r_to_2ch;
+    DOWNMIX.pf_downmix_2f_1r_to_2ch     = F.pf_downmix_2f_1r_to_2ch;
+    DOWNMIX.pf_downmix_3f_0r_to_2ch     = F.pf_downmix_3f_0r_to_2ch;
+    DOWNMIX.pf_stream_sample_2ch_to_s16 = F.pf_stream_sample_2ch_to_s16;
+    DOWNMIX.pf_stream_sample_1ch_to_s16 = F.pf_stream_sample_1ch_to_s16;
+#undef F
+#undef DOWNMIX
+
+    /*
+     * Choose the best IMDCT module
+     */
+#define IMDCT p_ac3thread->ac3_decoder.imdct
+    IMDCT.p_module = module_Need( MODULE_CAPABILITY_IMDCT, NULL );
+
+    if( IMDCT.p_module == NULL )
+    {
+        intf_ErrMsg( "ac3dec error: no suitable IMDCT module" );
+        module_Unneed( p_ac3thread->ac3_decoder.downmix.p_module );
+        free( p_ac3thread );
+        return( 0 );
+    }
+
+#define F IMDCT.p_module->p_functions->imdct.functions.imdct
+    IMDCT.pf_imdct_init    = F.pf_imdct_init;
+    IMDCT.pf_imdct_256     = F.pf_imdct_256;
+    IMDCT.pf_imdct_256_nol = F.pf_imdct_256_nol;
+    IMDCT.pf_imdct_512     = F.pf_imdct_512;
+    IMDCT.pf_imdct_512_nol = F.pf_imdct_512_nol;
+#undef F
+#undef IMDCT
 
     /* Initialize the ac3 decoder structures */
-    ac3_init (&p_ac3dec_t->ac3_decoder);
+    ac3_init (&p_ac3thread->ac3_decoder);
 
     /*
      * Initialize the output properties
      */
-    p_ac3dec_t->p_aout_fifo = NULL;
+    p_ac3thread->p_aout_fifo = NULL;
 
     /* Spawn the ac3 decoder thread */
-    if (vlc_thread_create(&p_ac3dec_t->thread_id, "ac3 decoder", 
-                (vlc_thread_func_t)RunThread, (void *)p_ac3dec_t))
+    if (vlc_thread_create(&p_ac3thread->thread_id, "ac3 decoder", 
+                (vlc_thread_func_t)RunThread, (void *)p_ac3thread))
     {
         intf_ErrMsg( "ac3dec error: can't spawn ac3 decoder thread" );
-        free (p_ac3dec_t);
+        module_Unneed( p_ac3thread->ac3_decoder.downmix.p_module );
+        module_Unneed( p_ac3thread->ac3_decoder.imdct.p_module );
+        free (p_ac3thread);
         return 0;
     }
 
-    intf_DbgMsg ("ac3dec debug: ac3 decoder thread (%p) created", p_ac3dec_t);
-    return p_ac3dec_t->thread_id;
+    intf_DbgMsg ("ac3dec debug: ac3 decoder thread (%p) created", p_ac3thread);
+    return p_ac3thread->thread_id;
 }
 
 /* Following functions are local */
@@ -118,48 +168,48 @@ vlc_thread_t ac3dec_CreateThread( adec_config_t * p_config )
 /*****************************************************************************
  * InitThread : initialize an ac3 decoder thread
  *****************************************************************************/
-static int InitThread (ac3dec_thread_t * p_ac3dec_t)
+static int InitThread (ac3dec_thread_t * p_ac3thread)
 {
-    intf_DbgMsg("ac3dec debug: initializing ac3 decoder thread %p",p_ac3dec_t);
+    intf_DbgMsg("ac3dec debug: initializing ac3 decoder thread %p",p_ac3thread);
 
-    p_ac3dec_t->p_config->decoder_config.pf_init_bit_stream(
-            &p_ac3dec_t->ac3_decoder.bit_stream,
-            p_ac3dec_t->p_config->decoder_config.p_decoder_fifo,
-            BitstreamCallback, (void *) p_ac3dec_t );
+    p_ac3thread->p_config->decoder_config.pf_init_bit_stream(
+            &p_ac3thread->ac3_decoder.bit_stream,
+            p_ac3thread->p_config->decoder_config.p_decoder_fifo,
+            BitstreamCallback, (void *) p_ac3thread );
 
     /* Creating the audio output fifo */
-    p_ac3dec_t->p_aout_fifo = aout_CreateFifo( AOUT_ADEC_STEREO_FIFO, 2, 0, 0,
+    p_ac3thread->p_aout_fifo = aout_CreateFifo( AOUT_ADEC_STEREO_FIFO, 2, 0, 0,
                                                AC3DEC_FRAME_SIZE, NULL  );
-    if ( p_ac3dec_t->p_aout_fifo == NULL )
+    if ( p_ac3thread->p_aout_fifo == NULL )
     {
         return -1;
     }
 
-    intf_DbgMsg("ac3dec debug: ac3 decoder thread %p initialized", p_ac3dec_t);
+    intf_DbgMsg("ac3dec debug: ac3 decoder thread %p initialized", p_ac3thread);
     return 0;
 }
 
 /*****************************************************************************
  * RunThread : ac3 decoder thread
  *****************************************************************************/
-static void RunThread (ac3dec_thread_t * p_ac3dec_t)
+static void RunThread (ac3dec_thread_t * p_ac3thread)
 {
     int sync;
 
-    intf_DbgMsg ("ac3dec debug: running ac3 decoder thread (%p) (pid == %i)", p_ac3dec_t, getpid());
+    intf_DbgMsg ("ac3dec debug: running ac3 decoder thread (%p) (pid == %i)", p_ac3thread, getpid());
 
     /* Initializing the ac3 decoder thread */
-    if (InitThread (p_ac3dec_t)) /* XXX?? */
+    if (InitThread (p_ac3thread)) /* XXX?? */
     {
-        p_ac3dec_t->p_fifo->b_error = 1;
+        p_ac3thread->p_fifo->b_error = 1;
     }
 
     sync = 0;
-    p_ac3dec_t->sync_ptr = 0;
+    p_ac3thread->sync_ptr = 0;
 
     /* ac3 decoder thread's main loop */
     /* FIXME : do we have enough room to store the decoded frames ?? */
-    while ((!p_ac3dec_t->p_fifo->b_die) && (!p_ac3dec_t->p_fifo->b_error))
+    while ((!p_ac3thread->p_fifo->b_die) && (!p_ac3thread->p_fifo->b_error))
     {
         s16 * buffer;
         ac3_sync_info_t sync_info;
@@ -167,122 +217,125 @@ static void RunThread (ac3dec_thread_t * p_ac3dec_t)
 
         if (!sync) {
             do {
-                GetBits(&p_ac3dec_t->ac3_decoder.bit_stream,8);
-            } while ((!p_ac3dec_t->sync_ptr) && (!p_ac3dec_t->p_fifo->b_die)
-                    && (!p_ac3dec_t->p_fifo->b_error));
+                GetBits(&p_ac3thread->ac3_decoder.bit_stream,8);
+            } while ((!p_ac3thread->sync_ptr) && (!p_ac3thread->p_fifo->b_die)
+                    && (!p_ac3thread->p_fifo->b_error));
             
-            ptr = p_ac3dec_t->sync_ptr;
+            ptr = p_ac3thread->sync_ptr;
 
-            while(ptr-- && (!p_ac3dec_t->p_fifo->b_die)
-                && (!p_ac3dec_t->p_fifo->b_error))
+            while(ptr-- && (!p_ac3thread->p_fifo->b_die)
+                && (!p_ac3thread->p_fifo->b_error))
             {
-                p_ac3dec_t->ac3_decoder.bit_stream.p_byte++;
+                p_ac3thread->ac3_decoder.bit_stream.p_byte++;
             }
                         
             /* we are in sync now */
             sync = 1;
         }
 
-        if (DECODER_FIFO_START(*p_ac3dec_t->p_fifo)->i_pts)
+        if (DECODER_FIFO_START(*p_ac3thread->p_fifo)->i_pts)
         {
-            p_ac3dec_t->p_aout_fifo->date[p_ac3dec_t->p_aout_fifo->l_end_frame] =
-                DECODER_FIFO_START(*p_ac3dec_t->p_fifo)->i_pts;
-            DECODER_FIFO_START(*p_ac3dec_t->p_fifo)->i_pts = 0;
+            p_ac3thread->p_aout_fifo->date[p_ac3thread->p_aout_fifo->l_end_frame] =
+                DECODER_FIFO_START(*p_ac3thread->p_fifo)->i_pts;
+            DECODER_FIFO_START(*p_ac3thread->p_fifo)->i_pts = 0;
         } else {
-            p_ac3dec_t->p_aout_fifo->date[p_ac3dec_t->p_aout_fifo->l_end_frame] =
+            p_ac3thread->p_aout_fifo->date[p_ac3thread->p_aout_fifo->l_end_frame] =
                 LAST_MDATE;
         }
     
-        if (ac3_sync_frame (&p_ac3dec_t->ac3_decoder, &sync_info))
+        if (ac3_sync_frame (&p_ac3thread->ac3_decoder, &sync_info))
         {
             sync = 0;
             goto bad_frame;
         }
 
-        p_ac3dec_t->p_aout_fifo->l_rate = sync_info.sample_rate;
+        p_ac3thread->p_aout_fifo->l_rate = sync_info.sample_rate;
 
-        buffer = ((s16 *)p_ac3dec_t->p_aout_fifo->buffer) + 
-            (p_ac3dec_t->p_aout_fifo->l_end_frame * AC3DEC_FRAME_SIZE);
+        buffer = ((s16 *)p_ac3thread->p_aout_fifo->buffer) + 
+            (p_ac3thread->p_aout_fifo->l_end_frame * AC3DEC_FRAME_SIZE);
 
-        if (ac3_decode_frame (&p_ac3dec_t->ac3_decoder, buffer))
+        if (ac3_decode_frame (&p_ac3thread->ac3_decoder, buffer))
         {
             sync = 0;
             goto bad_frame;
         }
         
-        vlc_mutex_lock (&p_ac3dec_t->p_aout_fifo->data_lock);
-        p_ac3dec_t->p_aout_fifo->l_end_frame = 
-            (p_ac3dec_t->p_aout_fifo->l_end_frame + 1) & AOUT_FIFO_SIZE;
-        vlc_cond_signal (&p_ac3dec_t->p_aout_fifo->data_wait);
-        vlc_mutex_unlock (&p_ac3dec_t->p_aout_fifo->data_lock);
+        vlc_mutex_lock (&p_ac3thread->p_aout_fifo->data_lock);
+        p_ac3thread->p_aout_fifo->l_end_frame = 
+            (p_ac3thread->p_aout_fifo->l_end_frame + 1) & AOUT_FIFO_SIZE;
+        vlc_cond_signal (&p_ac3thread->p_aout_fifo->data_wait);
+        vlc_mutex_unlock (&p_ac3thread->p_aout_fifo->data_lock);
 
         bad_frame:
-            RealignBits(&p_ac3dec_t->ac3_decoder.bit_stream);
+            RealignBits(&p_ac3thread->ac3_decoder.bit_stream);
     }
 
     /* If b_error is set, the ac3 decoder thread enters the error loop */
-    if (p_ac3dec_t->p_fifo->b_error)
+    if (p_ac3thread->p_fifo->b_error)
     {
-        ErrorThread (p_ac3dec_t);
+        ErrorThread (p_ac3thread);
     }
 
     /* End of the ac3 decoder thread */
-    EndThread (p_ac3dec_t);
+    EndThread (p_ac3thread);
 }
 
 /*****************************************************************************
  * ErrorThread : ac3 decoder's RunThread() error loop
  *****************************************************************************/
-static void ErrorThread (ac3dec_thread_t * p_ac3dec_t)
+static void ErrorThread (ac3dec_thread_t * p_ac3thread)
 {
     /* We take the lock, because we are going to read/write the start/end
      * indexes of the decoder fifo */
-    vlc_mutex_lock (&p_ac3dec_t->p_fifo->data_lock);
+    vlc_mutex_lock (&p_ac3thread->p_fifo->data_lock);
 
     /* Wait until a `die' order is sent */
-    while (!p_ac3dec_t->p_fifo->b_die)
+    while (!p_ac3thread->p_fifo->b_die)
     {
         /* Trash all received PES packets */
-        while (!DECODER_FIFO_ISEMPTY(*p_ac3dec_t->p_fifo))
+        while (!DECODER_FIFO_ISEMPTY(*p_ac3thread->p_fifo))
         {
-            p_ac3dec_t->p_fifo->pf_delete_pes(p_ac3dec_t->p_fifo->p_packets_mgt,
-                    DECODER_FIFO_START(*p_ac3dec_t->p_fifo));
-            DECODER_FIFO_INCSTART (*p_ac3dec_t->p_fifo);
+            p_ac3thread->p_fifo->pf_delete_pes(p_ac3thread->p_fifo->p_packets_mgt,
+                    DECODER_FIFO_START(*p_ac3thread->p_fifo));
+            DECODER_FIFO_INCSTART (*p_ac3thread->p_fifo);
         }
 
         /* Waiting for the input thread to put new PES packets in the fifo */
-        vlc_cond_wait (&p_ac3dec_t->p_fifo->data_wait,
-                       &p_ac3dec_t->p_fifo->data_lock);
+        vlc_cond_wait (&p_ac3thread->p_fifo->data_wait,
+                       &p_ac3thread->p_fifo->data_lock);
     }
 
     /* We can release the lock before leaving */
-    vlc_mutex_unlock (&p_ac3dec_t->p_fifo->data_lock);
+    vlc_mutex_unlock (&p_ac3thread->p_fifo->data_lock);
 }
 
 /*****************************************************************************
  * EndThread : ac3 decoder thread destruction
  *****************************************************************************/
-static void EndThread (ac3dec_thread_t * p_ac3dec_t)
+static void EndThread (ac3dec_thread_t * p_ac3thread)
 {
-    intf_DbgMsg ("ac3dec debug: destroying ac3 decoder thread %p", p_ac3dec_t);
+    intf_DbgMsg ("ac3dec debug: destroying ac3 decoder thread %p", p_ac3thread);
 
     /* If the audio output fifo was created, we destroy it */
-    if (p_ac3dec_t->p_aout_fifo != NULL)
+    if (p_ac3thread->p_aout_fifo != NULL)
     {
-        aout_DestroyFifo (p_ac3dec_t->p_aout_fifo);
+        aout_DestroyFifo (p_ac3thread->p_aout_fifo);
 
         /* Make sure the output thread leaves the NextFrame() function */
-        vlc_mutex_lock (&(p_ac3dec_t->p_aout_fifo->data_lock));
-        vlc_cond_signal (&(p_ac3dec_t->p_aout_fifo->data_wait));
-        vlc_mutex_unlock (&(p_ac3dec_t->p_aout_fifo->data_lock));
-        
+        vlc_mutex_lock (&(p_ac3thread->p_aout_fifo->data_lock));
+        vlc_cond_signal (&(p_ac3thread->p_aout_fifo->data_wait));
+        vlc_mutex_unlock (&(p_ac3thread->p_aout_fifo->data_lock));
     }
 
+    /* Unlock the modules */
+    module_Unneed( p_ac3thread->ac3_decoder.downmix.p_module );
+    module_Unneed( p_ac3thread->ac3_decoder.imdct.p_module );
+
     /* Destroy descriptor */
-    free( p_ac3dec_t->p_config );
-    free( p_ac3dec_t );
+    free( p_ac3thread->p_config );
+    free( p_ac3thread );
 
-    intf_DbgMsg ("ac3dec debug: ac3 decoder thread %p destroyed", p_ac3dec_t);
+    intf_DbgMsg ("ac3dec debug: ac3 decoder thread %p destroyed", p_ac3thread);
 }
 
 /*****************************************************************************
@@ -294,7 +347,7 @@ static void BitstreamCallback ( bit_stream_t * p_bit_stream,
                                         boolean_t b_new_pes)
 {
 
-    ac3dec_thread_t *p_ac3dec_t=(ac3dec_thread_t *)p_bit_stream->p_callback_arg;
+    ac3dec_thread_t *p_ac3thread=(ac3dec_thread_t *)p_bit_stream->p_callback_arg;
 
     if( b_new_pes )
     {
@@ -303,7 +356,8 @@ static void BitstreamCallback ( bit_stream_t * p_bit_stream,
         ptr = *(p_bit_stream->p_byte + 1);
         ptr <<= 8;
         ptr |= *(p_bit_stream->p_byte + 2);
-        p_ac3dec_t->sync_ptr = ptr;
+        p_ac3thread->sync_ptr = ptr;
         p_bit_stream->p_byte += 3;                                                            
     }
 }
+
diff --git a/src/ac3_decoder/ac3_downmix.c b/src/ac3_decoder/ac3_downmix.c
deleted file mode 100644
index 20b0d4e86a..0000000000
--- a/src/ac3_decoder/ac3_downmix.c
+++ /dev/null
@@ -1,79 +0,0 @@
-/*****************************************************************************
- * ac3_downmix.c: ac3 downmix functions
- *****************************************************************************
- * Copyright (C) 1999, 2000 VideoLAN
- * $Id: ac3_downmix.c,v 1.23 2001/05/14 15:58:03 reno Exp $
- *
- * Authors: Michel Kaempf <maxx@via.ecp.fr>
- *          Aaron Holtzman <aholtzma@engr.uvic.ca>
- *          Renaud Dartus <reno@videolan.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- * 
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
- *****************************************************************************/
-#include "defs.h"
-
-#include <string.h>                                              /* memcpy() */
-
-#include "config.h"
-#include "common.h"
-#include "threads.h"
-#include "mtime.h"
-
-#include "intf_msg.h"                        /* intf_DbgMsg(), intf_ErrMsg() */
-#include "tests.h"
-
-#include "stream_control.h"
-#include "input_ext-dec.h"
-
-#include "ac3_decoder.h"
-#include "ac3_downmix.h"
-
-void downmix_init (downmix_t * p_downmix)
-{
-#if 0
-    if ( TestCPU (CPU_CAPABILITY_SSE) )
-    {
-		intf_WarnMsg (1,"ac3dec: using MMX_SSE for downmix");
-		p_downmix->downmix_3f_2r_to_2ch = downmix_3f_2r_to_2ch_sse;
-		p_downmix->downmix_2f_2r_to_2ch = downmix_2f_2r_to_2ch_sse;
-		p_downmix->downmix_3f_1r_to_2ch = downmix_3f_1r_to_2ch_sse;
-		p_downmix->downmix_2f_1r_to_2ch = downmix_2f_1r_to_2ch_sse;
-		p_downmix->downmix_3f_0r_to_2ch = downmix_3f_0r_to_2ch_sse;
-		p_downmix->stream_sample_2ch_to_s16 = stream_sample_2ch_to_s16_sse;
-    	p_downmix->stream_sample_1ch_to_s16 = stream_sample_1ch_to_s16_sse;
-    } 
-    else if ( TestCPU (CPU_CAPABILITY_3DNOW) )
-    {
-		intf_WarnMsg (1,"ac3dec: using MMX_3DNOW for downmix");
-		p_downmix->downmix_3f_2r_to_2ch = downmix_3f_2r_to_2ch_3dn;
-		p_downmix->downmix_2f_2r_to_2ch = downmix_2f_2r_to_2ch_3dn;
-		p_downmix->downmix_3f_1r_to_2ch = downmix_3f_1r_to_2ch_3dn;
-		p_downmix->downmix_2f_1r_to_2ch = downmix_2f_1r_to_2ch_3dn;
-		p_downmix->downmix_3f_0r_to_2ch = downmix_3f_0r_to_2ch_3dn;
-		p_downmix->stream_sample_2ch_to_s16 = stream_sample_2ch_to_s16_3dn;
-    	p_downmix->stream_sample_1ch_to_s16 = stream_sample_1ch_to_s16_3dn;
-    } 
-    else
-#endif
-    {
-		p_downmix->downmix_3f_2r_to_2ch = downmix_3f_2r_to_2ch_c;
-		p_downmix->downmix_2f_2r_to_2ch = downmix_2f_2r_to_2ch_c;
-		p_downmix->downmix_3f_1r_to_2ch = downmix_3f_1r_to_2ch_c;
-		p_downmix->downmix_2f_1r_to_2ch = downmix_2f_1r_to_2ch_c;
-		p_downmix->downmix_3f_0r_to_2ch = downmix_3f_0r_to_2ch_c;
-		p_downmix->stream_sample_2ch_to_s16 = stream_sample_2ch_to_s16_c;
-		p_downmix->stream_sample_1ch_to_s16 = stream_sample_1ch_to_s16_c;
-    }
-}
diff --git a/src/ac3_decoder/ac3_downmix.h b/src/ac3_decoder/ac3_downmix.h
deleted file mode 100644
index 063beeb1dc..0000000000
--- a/src/ac3_decoder/ac3_downmix.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/*****************************************************************************
- * ac3_downmix.h: ac3 downmix functions
- *****************************************************************************
- * Copyright (C) 2000, 2001 VideoLAN
- * $Id: ac3_downmix.h,v 1.7 2001/05/14 15:58:04 reno Exp $
- *
- * Authors: Renaud Dartus <reno@videolan.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- * 
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
- *****************************************************************************/
-
-/* C functions */
-void downmix_3f_2r_to_2ch_c(float *samples, dm_par_t * dm_par);
-void downmix_3f_1r_to_2ch_c(float *samples, dm_par_t * dm_par);
-void downmix_2f_2r_to_2ch_c(float *samples, dm_par_t * dm_par);
-void downmix_2f_1r_to_2ch_c(float *samples, dm_par_t * dm_par);
-void downmix_3f_0r_to_2ch_c(float *samples, dm_par_t * dm_par);            
-void stream_sample_2ch_to_s16_c(s16 *s16_samples, float *left, float *right);
-void stream_sample_1ch_to_s16_c(s16 *s16_samples, float *center); 
-
-/* SSE functions */
-void downmix_3f_2r_to_2ch_sse(float *samples, dm_par_t * dm_par);
-void downmix_3f_1r_to_2ch_sse(float *samples, dm_par_t * dm_par);
-void downmix_2f_2r_to_2ch_sse(float *samples, dm_par_t * dm_par);
-void downmix_2f_1r_to_2ch_sse(float *samples, dm_par_t * dm_par);
-void downmix_3f_0r_to_2ch_sse(float *samples, dm_par_t * dm_par);            
-void stream_sample_2ch_to_s16_sse(s16 *s16_samples, float *left, float *right);
-void stream_sample_1ch_to_s16_sse(s16 *s16_samples, float *center);  
-
-/* 3DNow! functions */
-void downmix_3f_2r_to_2ch_3dn(float *samples, dm_par_t * dm_par);
-void downmix_3f_1r_to_2ch_3dn(float *samples, dm_par_t * dm_par);
-void downmix_2f_2r_to_2ch_3dn(float *samples, dm_par_t * dm_par);
-void downmix_2f_1r_to_2ch_3dn(float *samples, dm_par_t * dm_par);
-void downmix_3f_0r_to_2ch_3dn(float *samples, dm_par_t * dm_par);            
-void stream_sample_2ch_to_s16_3dn(s16 *s16_samples, float *left, float *right);
-void stream_sample_1ch_to_s16_3dn(s16 *s16_samples, float *center);  
-
-
diff --git a/src/ac3_decoder/ac3_downmix_3dn.c b/src/ac3_decoder/ac3_downmix_3dn.c
deleted file mode 100644
index 3fb5b0ce35..0000000000
--- a/src/ac3_decoder/ac3_downmix_3dn.c
+++ /dev/null
@@ -1,295 +0,0 @@
-/*****************************************************************************
- * ac3_downmix_3dn.c: ac3 downmix functions
- *****************************************************************************
- * Copyright (C) 1999, 2000, 2001 VideoLAN
- * $Id: ac3_downmix_3dn.c,v 1.1 2001/05/14 15:58:04 reno Exp $
- *
- * Authors: Renaud Dartus <reno@videolan.org>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- * 
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
- *****************************************************************************/
-
-#include "defs.h"
-
-#include "config.h"
-#include "common.h"
-#include "threads.h"
-#include "mtime.h"
-#include "tests.h"
-
-#include "stream_control.h"
-#include "input_ext-dec.h"
-#include "ac3_decoder.h"
-
-
-void downmix_3f_2r_to_2ch_3dn (float * samples, dm_par_t * dm_par)
-{
-    __asm__ __volatile__ (
-    "pushl %%ecx\n"
-	"movl  $128,  %%ecx\n"	        /* loop counter */
-
-	"movd	(%%ebx), %%mm5\n"	    /* unit */
-	"punpckldq %%mm5, %%mm5\n"	    /* unit | unit */
-
-	"movd	4(%%ebx), %%mm6\n"		/* clev */
-	"punpckldq %%mm6, %%mm6\n"	    /* clev | clev */
-
-	"movd	8(%%ebx), %%mm7\n"		/* slev */
-	"punpckldq %%mm7, %%mm7\n"	    /* slev | slev */
-
-".loop:\n"
-	"movq	(%%eax),     %%mm0\n"   /* left */
-	"movq	2048(%%eax), %%mm1\n"   /* right */
-	"movq   1024(%%eax), %%mm2\n"	/* center */
-	"movq	3072(%%eax), %%mm3\n"	/* leftsur */
-	"movq	4096(%%eax), %%mm4\n"	/* rightsur */
-	"pfmul	%%mm5, %%mm0\n"
-	"pfmul	%%mm5, %%mm1\n"
-	"pfmul	%%mm6, %%mm2\n"
-	"pfadd	%%mm2, %%mm0\n"
-	"pfadd 	%%mm2, %%mm1\n"
-	"pfmul  %%mm7, %%mm3\n"
-	"pfmul	%%mm7, %%mm4\n"
-	"pfadd	%%mm3, %%mm0\n"
-	"pfadd	%%mm4, %%mm1\n"
-
-	"movq	%%mm0, (%%eax)\n"
-	"movq	%%mm1, 1024(%%eax)\n"
-
-	"addl	$8, %%eax\n"
-	"decl 	%%ecx\n"
-	"jnz	.loop\n"
-    
-    "popl   %%ecx\n"
-    "femms\n"
-    : "=a" (samples)
-    : "a" (samples), "b" (dm_par));
-}
-
-void downmix_2f_2r_to_2ch_3dn (float *samples, dm_par_t * dm_par)
-{
-    __asm__ __volatile__ (
-	"pushl %%ecx\n"
-	"movl  $128, %%ecx\n"       /* loop counter */
-
-	"movd  (%%ebx), %%mm5\n"	/* unit */
-	"punpckldq %%mm5, %%mm5\n"  /* unit | unit */
-
-	"movd	8(%%ebx), %%mm7\n"	/* slev */
-	"punpckldq %%mm7, %%mm7\n"	/* slev | slev */
-
-".loop3:\n"
-	"movq   (%%eax), %%mm0\n"       /* left */
-	"movq   1024(%%eax), %%mm1\n"   /* right */
-	"movq   2048(%%eax), %%mm3\n"	/* leftsur */
-	"movq   3072(%%eax), %%mm4\n"	/* rightsur */
-	"pfmul	%%mm5, %%mm0\n"
-	"pfmul	%%mm5, %%mm1\n"
-	"pfmul	%%mm7, %%mm3\n"
-	"pfmul	%%mm7, %%mm4\n"
-	"pfadd	%%mm3, %%mm0\n"
-	"pfadd	%%mm4, %%mm1\n"
-
-	"movq	%%mm0, (%%eax)\n"
-	"movq	%%mm1, 1024(%%eax)\n"
-
-	"addl	$8, %%eax\n"
-	"decl 	%%ecx\n"
-	"jnz	.loop3\n"
-
-	"popl	%%ecx\n"
-    "femms\n"
-    : "=a" (samples)
-    : "a" (samples), "b" (dm_par));
-}
-void downmix_3f_1r_to_2ch_3dn (float *samples, dm_par_t * dm_par)
-{
-    __asm__ __volatile__ (
-
-	"pushl	%%ecx\n"
-	"movl	$128, %%ecx\n"		    /* loop counter */
-
-	"movd	(%%ebx), %%mm5\n"	    /* unit */
-	"punpckldq %%mm5, %%mm5\n"	    /* unit | unit */
-
-	"movd	4(%%ebx), %%mm6\n"		/* clev */
-	"punpckldq %%mm6, %%mm6\n"	    /* clev | clev */
-
-    "movd	8(%%ebx), %%mm7\n"		/* slev */
-	"punpckldq %%mm7, %%mm7\n"  	/* slev | slev */
-
-".loop4:\n"
-	"movq	(%%eax), %%mm0\n"       /* left */
-	"movq	2048(%%eax), %%mm1\n"   /* right */
-	"movq	1024(%%eax), %%mm2\n"	/* center */
-    "movq	3072(%%eax), %%mm3\n"	/* sur */
-	"pfmul	%%mm5, %%mm0\n"
-	"pfmul	%%mm5, %%mm1\n"
-	"pfmul	%%mm6, %%mm2\n"
-	"pfadd	%%mm2, %%mm0\n"
-	"pfmul	%%mm7, %%mm3\n"
-	"pfadd 	%%mm2, %%mm1\n"
-	"pfsub	%%mm3, %%mm0\n"
-	"pfadd	%%mm3, %%mm1\n"
-
-	"movq	%%mm0, (%%eax)\n"
-	"movq	%%mm1, 1024(%%eax)\n"
-
-	"addl	$8, %%eax\n"
-	"decl 	%%ecx\n"
-	"jnz	.loop4\n"
-
-	"popl	%%ecx\n"
-    "femms\n"
-    : "=a" (samples)
-    : "a" (samples), "b" (dm_par));
-}
-void downmix_2f_1r_to_2ch_3dn (float *samples, dm_par_t * dm_par)
-{
-    __asm__ __volatile__ (
-	"pushl	%%ecx\n"
-	"movl	$128, %%ecx\n"		    /* loop counter */
-
-	"movd	(%%ebx), %%mm5\n"	    /* unit */
-	"punpckldq %%mm5, %%mm5\n"	    /* unit | unit */
-
-	"movd	8(%%ebx), %%mm7\n"		/* slev */
-	"punpckldq %%mm7, %%mm7\n"  	/* slev | slev */
-
-".loop5:\n"
-	"movq	(%%eax), %%mm0\n"       /* left */
-	"movq	1024(%%eax), %%mm1\n"   /* right */
-	"movq	2048(%%eax), %%mm3\n"	/* sur */
-	"pfmul	%%mm5, %%mm0\n"
-	"pfmul	%%mm5, %%mm1\n"
-	"pfmul	%%mm7, %%mm3\n"
-	"pfsub	%%mm3, %%mm0\n"
-	"pfadd	%%mm3, %%mm1\n"
-
-	"movq	%%mm0, (%%eax)\n"
-	"movq	%%mm1, 1024(%%eax)\n"
-
-	"addl	$8, %%eax\n"
-	"decl 	%%ecx\n"
-	"jnz	.loop5\n"
-
-	"popl	%%ecx\n"
-    "femms\n"
-    : "=a" (samples)
-    : "a" (samples), "b" (dm_par));
-}
-
-void downmix_3f_0r_to_2ch_3dn (float *samples, dm_par_t * dm_par)
-{
-    __asm__ __volatile__ (
-	"pushl	%%ecx\n"
-	"movl	$128, %%ecx\n"		    /* loop counter */
-
-	"movd	(%%ebx), %%mm5\n"	    /* unit */
-	"punpckldq %%mm5, %%mm5\n"	    /* unit | unit */
-
-	"movd	4(%%ebx), %%mm6\n"		/* clev */
-	"punpckldq %%mm6, %%mm6\n"  	/* clev | clev */
-
-".loop6:\n"
-	"movq	(%%eax), %%mm0\n"       /*left */
-	"movq	2048(%%eax), %%mm1\n"   /* right */
-	"movq   1024(%%eax), %%mm2\n"   /* center */
-	"pfmul	%%mm5, %%mm0\n"
-	"pfmul	%%mm5, %%mm1\n"
-	"pfmul	%%mm6, %%mm2\n"
-	"pfadd	%%mm2, %%mm0\n"
-	"pfadd 	%%mm2, %%mm1\n"
-
-	"movq	%%mm0, (%%eax)\n"
-	"movq	%%mm1, 1024(%%eax)\n"
-
-	"addl	$8, %%eax\n"
-	"decl 	%%ecx\n"
-	"jnz	.loop6\n"
-
-	"popl	%%ecx\n"
-    "femms\n"
-    : "=a" (samples)
-    : "a" (samples), "b" (dm_par));
-}
-
-void stream_sample_1ch_to_s16_3dn (s16 *s16_samples, float *left)
-{
-    __asm__ __volatile__ (
-    "pushl %%ecx\n"
-    "pushl %%edx\n"
-
-	"movl   $sqrt2, %%edx\n"
-	"movd  (%%edx), %%mm7\n"
-    "punpckldq %%mm7, %%mm7\n"   /* sqrt2 | sqrt2 */
-	"movl $128, %%ecx\n"
-
-".loop2:\n"
-	"movq (%%ebx), %%mm0\n"	    /* c1 | c0 */
-	"pfmul   %%mm7, %%mm0\n"
-
-	"pf2id %%mm0, %%mm0\n"	    /* c1 c0 --> mm0, int_32 */
-
-	"packssdw %%mm0, %%mm0\n"	    /* c1 c1 c0 c0 --> mm0, int_16 */
-
-    "movq %%mm0, (%%eax)\n"
-	"addl $8, %%eax\n"
-	"addl $8, %%ebx\n"
-
-	"decl %%ecx\n"
-	"jnz .loop2\n"
-
-	"popl %%edx\n"
-	"popl %%ecx\n"
-	"femms\n"
-    : "=a" (s16_samples), "=b" (left)
-    : "a" (s16_samples), "b" (left));
-}
-
-void stream_sample_2ch_to_s16_3dn (s16 *s16_samples, float *left, float *right)
-{
-
-	__asm__ __volatile__ (
-    "pushl %%ecx\n"
-	"movl $128, %%ecx\n"
-
-".loop1:\n"
-	"movq  (%%ebx), %%mm0\n"	/* l1 | l0 */
-	"movq  (%%edx), %%mm1\n"	/* r1 | r0 */
-	"movq   %%mm0,  %%mm2\n"	/* l1 | l0 */
-	"punpckldq %%mm1, %%mm0\n"	/* r0 | l0 */
-	"punpckhdq %%mm1, %%mm2\n"	/* r1 | l1 */
-
-	"pf2id    %%mm0, %%mm0\n"	/* r0 l0 --> mm0, int_32 */
-	"pf2id    %%mm2, %%mm2\n"	/* r0 l0 --> mm0, int_32 */
-    
-    "packssdw %%mm2, %%mm0\n"	/* r1 l1 r0 l0 --> mm0, int_16 */
-
-	"movq %%mm0, (%%eax)\n"
-	"movq %%mm2, 8(%%eax)\n"
-	"addl $8, %%eax\n"
-	"addl $8, %%ebx\n"
-	"addl $8, %%edx\n"
-
-	"decl %%ecx\n"
-	"jnz .loop1\n"
-
-	"popl %%ecx\n"
-	"femms\n"
-    : "=a" (s16_samples), "=b" (left), "=d" (right)
-    : "a" (s16_samples), "b" (left), "d" (right));
-    
-}
diff --git a/src/ac3_decoder/ac3_downmix_sse.c b/src/ac3_decoder/ac3_downmix_sse.c
deleted file mode 100644
index 71a9b33faf..0000000000
--- a/src/ac3_decoder/ac3_downmix_sse.c
+++ /dev/null
@@ -1,308 +0,0 @@
-/*****************************************************************************
- * ac3_downmix_sse.c: ac3 downmix functions
- *****************************************************************************
- * Copyright (C) 1999, 2000, 2001 VideoLAN
- * $Id: ac3_downmix_sse.c,v 1.1 2001/05/14 15:58:04 reno Exp $
- *
- * Authors: Renaud Dartus <reno@videolan.org>
- *          Aaron Holtzman <aholtzma@engr.uvic.ca>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- * 
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
- *****************************************************************************/
-
-#include "defs.h"
-
-#include "config.h"
-#include "common.h"
-#include "threads.h"
-#include "mtime.h"
-#include "tests.h"
-
-#include "stream_control.h"
-#include "input_ext-dec.h"
-#include "ac3_decoder.h"
-
-
-void sqrt2 (void)
-{
-    __asm__ (".float 0f0.7071068");
-}
-
-void downmix_3f_2r_to_2ch_sse (float * samples, dm_par_t * dm_par)
-{
-    __asm__ __volatile__ (
-    "pushl %%ecx\n"
-	"movl  $64,  %%ecx\n"	        /* loop counter */
-
-	"movss	(%%ebx), %%xmm5\n"	    /* unit */
-	"shufps	$0, %%xmm5, %%xmm5\n"	/* unit | unit | unit | unit */
-
-	"movss	4(%%ebx), %%xmm6\n"		/* clev */
-	"shufps	$0, %%xmm6, %%xmm6\n"	/* clev | clev | clev | clev */
-
-	"movss	8(%%ebx), %%xmm7\n"		/* slev */
-	"shufps	$0, %%xmm7, %%xmm7\n"	/* slev | slev | slev | slev */
-
-".loop:\n"
-	"movups	(%%eax),     %%xmm0\n"  /* left */
-	"movups	2048(%%eax), %%xmm1\n"  /* right */
-	"movups 1024(%%eax), %%xmm2\n"	/* center */
-	"movups	3072(%%eax), %%xmm3\n"	/* leftsur */
-	"movups	4096(%%eax), %%xmm4\n"	/* rithgsur */
-	"mulps	%%xmm5, %%xmm0\n"
-	"mulps	%%xmm5, %%xmm1\n"
-	"mulps	%%xmm6, %%xmm2\n"
-	"addps	%%xmm2, %%xmm0\n"
-	"addps 	%%xmm2, %%xmm1\n"
-	"mulps	%%xmm7, %%xmm3\n"
-	"mulps	%%xmm7, %%xmm4\n"
-	"addps	%%xmm3, %%xmm0\n"
-	"addps	%%xmm4, %%xmm1\n"
-
-	"movups	%%xmm0, (%%eax)\n"
-	"movups	%%xmm1, 1024(%%eax)\n"
-
-	"addl	$16, %%eax\n"
-	"decl 	%%ecx\n"
-	"jnz	.loop\n"
-    
-    "popl   %%ecx\n"
-    : "=a" (samples)
-    : "a" (samples), "b" (dm_par));
-}
-
-void downmix_2f_2r_to_2ch_sse (float *samples, dm_par_t * dm_par)
-{
-    __asm__ __volatile__ (
-	"pushl %%ecx\n"
-	"movl  $64, %%ecx\n"            /* loop counter */
-
-	"movss  (%%ebx), %%xmm5\n"	    /* unit */
-	"shufps $0, %%xmm5, %%xmm5\n"   /* unit | unit | unit | unit */
-
-	"movss	8(%%ebx), %%xmm7\n"		/* slev */
-	"shufps	$0, %%xmm7, %%xmm7\n"	/* slev | slev | slev | slev */
-
-".loop3:\n"
-	"movups	(%%eax), %%xmm0\n"      /* left */
-	"movups	1024(%%eax), %%xmm1\n"  /* right */
-	"movups 2048(%%eax), %%xmm3\n"	/* leftsur */
-	"movups	3072(%%eax), %%xmm4\n"	/* rightsur */
-	"mulps	%%xmm5, %%xmm0\n"
-	"mulps	%%xmm5, %%xmm1\n"
-	"mulps	%%xmm7, %%xmm3\n"
-	"mulps	%%xmm7, %%xmm4\n"
-	"addps	%%xmm3, %%xmm0\n"
-	"addps	%%xmm4, %%xmm1\n"
-
-	"movups	%%xmm0, (%%eax)\n"
-	"movups	%%xmm1, 1024(%%eax)\n"
-
-	"addl	$16, %%eax\n"
-	"decl 	%%ecx\n"
-	"jnz	.loop3\n"
-
-	"popl	%%ecx\n"
-    : "=a" (samples)
-    : "a" (samples), "b" (dm_par));
-}
-void downmix_3f_1r_to_2ch_sse (float *samples, dm_par_t * dm_par)
-{
-    __asm__ __volatile__ (
-
-	"pushl	%%ecx\n"
-	"movl	$64, %%ecx\n"		    /* loop counter */
-
-	"movss	(%%ebx), %%xmm5\n"	    /* unit */
-	"shufps	$0, %%xmm5, %%xmm5\n"	/* unit | unit | unit | unit */
-
-	"movss	4(%%ebx), %%xmm6\n"		/* clev */
-	"shufps	$0, %%xmm6, %%xmm6\n"	/* clev | clev | clev | clev */
-
-	"movss	8(%%ebx), %%xmm7\n"		/* slev */
-	"shufps	$0, %%xmm7, %%xmm7\n"	/* slev | slev | slev | slev */
-
-".loop4:\n"
-	"movups	(%%eax), %%xmm0\n"      /* left */
-	"movups	2048(%%eax), %%xmm1\n"  /* right */
-	"movups	1024(%%eax), %%xmm2\n"	/* center */
-    "movups	3072(%%eax), %%xmm3\n"	/* sur */
-	"mulps	%%xmm5, %%xmm0\n"
-	"mulps	%%xmm5, %%xmm1\n"
-	"mulps	%%xmm6, %%xmm2\n"
-	"addps	%%xmm2, %%xmm0\n"
-	"mulps	%%xmm7, %%xmm3\n"
-	"addps 	%%xmm2, %%xmm1\n"
-	"subps	%%xmm3, %%xmm0\n"
-	"addps	%%xmm3, %%xmm1\n"
-
-	"movups	%%xmm0, (%%eax)\n"
-	"movups	%%xmm1, 1024(%%eax)\n"
-
-	"addl	$16, %%eax\n"
-	"decl 	%%ecx\n"
-	"jnz	.loop4\n"
-
-	"popl	%%ecx\n"
-    : "=a" (samples)
-    : "a" (samples), "b" (dm_par));
-
-}
-void downmix_2f_1r_to_2ch_sse (float *samples, dm_par_t * dm_par)
-{
-    __asm__ __volatile__ (
-	"pushl	%%ecx\n"
-	"movl	$64, %%ecx\n"		    /* loop counter */
-
-	"movss	(%%ebx), %%xmm5\n"	    /* unit */
-	"shufps	$0, %%xmm5, %%xmm5\n"	/* unit | unit | unit | unit */
-
-	"movss	8(%%ebx), %%xmm7\n"		/* slev */
-	"shufps	$0, %%xmm7, %%xmm7\n"	/* slev | slev | slev | slev */
-
-".loop5:\n"
-	"movups	(%%eax), %%xmm0\n"      /* left */
-	"movups	1024(%%eax), %%xmm1\n"  /* right */
-	"movups	2048(%%eax), %%xmm3\n"	/* sur */
-	"mulps	%%xmm5, %%xmm0\n"
-	"mulps	%%xmm5, %%xmm1\n"
-	"mulps	%%xmm7, %%xmm3\n"
-	"subps	%%xmm3, %%xmm0\n"
-	"addps	%%xmm3, %%xmm1\n"
-
-	"movups	%%xmm0, (%%eax)\n"
-	"movups	%%xmm1, 1024(%%eax)\n"
-
-	"addl	$16, %%eax\n"
-	"decl 	%%ecx\n"
-	"jnz	.loop5\n"
-
-	"popl	%%ecx\n"
-    : "=a" (samples)
-    : "a" (samples), "b" (dm_par));
-
-
-}
-void downmix_3f_0r_to_2ch_sse (float *samples, dm_par_t * dm_par)
-{
-    __asm__ __volatile__ (
-	"pushl	%%ecx\n"
-	"movl	$64, %%ecx\n"		    /* loop counter */
-
-	"movss	(%%ebx), %%xmm5\n"	    /* unit */
-	"shufps	$0, %%xmm5, %%xmm5\n"	/* unit | unit | unit | unit */
-
-	"movss	4(%%ebx), %%xmm6\n"		/* clev */
-	"shufps	$0, %%xmm6, %%xmm6\n"	/* clev | clev | clev | clev */
-
-".loop6:\n"
-	"movups	(%%eax), %%xmm0\n"      /*left */
-	"movups	2048(%%eax), %%xmm1\n"  /* right */
-	"movups 1024(%%eax), %%xmm2\n"	/* center */
-	"mulps	%%xmm5, %%xmm0\n"
-	"mulps	%%xmm5, %%xmm1\n"
-	"mulps	%%xmm6, %%xmm2\n"
-	"addps	%%xmm2, %%xmm0\n"
-	"addps 	%%xmm2, %%xmm1\n"
-
-	"movups	%%xmm0, (%%eax)\n"
-	"movups	%%xmm1, 1024(%%eax)\n"
-
-	"addl	$16, %%eax\n"
-	"decl 	%%ecx\n"
-	"jnz	.loop6\n"
-
-	"popl	%%ecx\n"
-    : "=a" (samples)
-    : "a" (samples), "b" (dm_par));
-}
-    
-void stream_sample_1ch_to_s16_sse (s16 *s16_samples, float *left)
-{
-    __asm__ __volatile__ (
-    "pushl %%ecx\n"
-    "pushl %%edx\n"
-
-	"movl   $sqrt2, %%edx\n"
-	"movss (%%edx), %%xmm7\n"
-    "shufps $0, %%xmm7, %%xmm7\n"   /* sqrt2 | sqrt2 | sqrt2 | sqrt2 */
-	"movl $64, %%ecx\n"
-
-".loop2:\n"
-	"movups (%%ebx), %%xmm0\n"	    /* c3 | c2 | c1 | c0 */
-	"mulps   %%xmm7, %%xmm0\n"
-	"movhlps %%xmm0, %%xmm2\n"	    /* c3 | c2 */
-
-	"cvtps2pi %%xmm0, %%mm0\n"	    /* c1 c0 --> mm0, int_32 */
-	"cvtps2pi %%xmm2, %%mm1\n"	    /* c3 c2 --> mm1, int_32 */
-
-	"packssdw %%mm0, %%mm0\n"	    /* c1 c1 c0 c0 --> mm0, int_16 */
-	"packssdw %%mm1, %%mm1\n"	    /* c3 c3 c2 c2 --> mm1, int_16 */
-
-    "movq %%mm0, (%%eax)\n"
-	"movq %%mm1, 8(%%eax)\n"
-	"addl $16, %%eax\n"
-	"addl $16, %%ebx\n"
-
-	"decl %%ecx\n"
-	"jnz .loop2\n"
-
-	"popl %%edx\n"
-	"popl %%ecx\n"
-	"emms\n"
-    : "=a" (s16_samples), "=b" (left)
-    : "a" (s16_samples), "b" (left));
-}
-
-void stream_sample_2ch_to_s16_sse (s16 *s16_samples, float *left, float *right)
-{
-
-	__asm__ __volatile__ (
-    "pushl %%ecx\n"
-	"movl $64, %%ecx\n"
-
-".loop1:\n"
-	"movups  (%%ebx), %%xmm0\n"	/* l3 | l2 | l1 | l0 */
-	"movups  (%%edx), %%xmm1\n"	/* r3 | r2 | r1 | r0 */
-	"movhlps  %%xmm0, %%xmm2\n"	/* l3 | l2 */
-	"movhlps  %%xmm1, %%xmm3\n"	/* r3 | r2 */
-	"unpcklps %%xmm1, %%xmm0\n"	/* r1 | l1 | r0 | l0 */
-	"unpcklps %%xmm3, %%xmm2\n"	/* r3 | l3 | r2 | l2 */
-
-	"cvtps2pi %%xmm0, %%mm0\n"	/* r0 l0 --> mm0, int_32 */
-	"movhlps  %%xmm0, %%xmm0\n"
-	"cvtps2pi %%xmm0, %%mm1\n"	/* r1 l1 --> mm1, int_32 */
-	"cvtps2pi %%xmm2, %%mm2\n"	/* r2 l2 --> mm2, int_32 */
-	"movhlps  %%xmm2, %%xmm2\n"
-	"cvtps2pi %%xmm2, %%mm3\n"	/* r3 l3 --> mm3, int_32 */
-    
-	"packssdw %%mm1, %%mm0\n"	/* r1 l1 r0 l0 --> mm0, int_16 */
-	"packssdw %%mm3, %%mm2\n"	/* r3 l3 r2 l2 --> mm2, int_16 */
-
-	"movq %%mm0, (%%eax)\n"
-	"movq %%mm2, 8(%%eax)\n"
-	"addl $16, %%eax\n"
-	"addl $16, %%ebx\n"
-	"addl $16, %%edx\n"
-
-	"decl %%ecx\n"
-	"jnz .loop1\n"
-
-	"popl %%ecx\n"
-	"emms\n"
-    : "=a" (s16_samples), "=b" (left), "=d" (right)
-    : "a" (s16_samples), "b" (left), "d" (right));
-    
-}
diff --git a/src/ac3_decoder/ac3_exponent.c b/src/ac3_decoder/ac3_exponent.c
index 96e3c704f6..f9bb5c4cee 100644
--- a/src/ac3_decoder/ac3_exponent.c
+++ b/src/ac3_decoder/ac3_exponent.c
@@ -2,7 +2,7 @@
  * ac3_exponent.c: ac3 exponent calculations
  *****************************************************************************
  * Copyright (C) 1999, 2000 VideoLAN
- * $Id: ac3_exponent.c,v 1.24 2001/05/14 15:58:04 reno Exp $
+ * $Id: ac3_exponent.c,v 1.25 2001/05/15 16:19:42 sam Exp $
  *
  * Authors: Michel Kaempf <maxx@via.ecp.fr>
  *          Michel Lespinasse <walken@zoy.org>
@@ -22,6 +22,10 @@
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
  *****************************************************************************/
+
+/*****************************************************************************
+ * Preamble
+ *****************************************************************************/
 #include "defs.h"
 
 #include <string.h>                                    /* memcpy(), memset() */
@@ -38,136 +42,13 @@
 
 #include "audio_output.h"
 
+#include "ac3_imdct.h"
+#include "ac3_downmix.h"
 #include "ac3_decoder.h"
 
 #include "ac3_internal.h"
 
-static const s16 exps_1[128] =
-{
-    -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
-    -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
-     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
-     0, 0, 0
-};
-
-static const s16 exps_2[128] =
-{
-    -2,-2,-2,-2,-2,-1,-1,-1,-1,-1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
-    -2,-2,-2,-2,-2,-1,-1,-1,-1,-1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
-    -2,-2,-2,-2,-2,-1,-1,-1,-1,-1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
-    -2,-2,-2,-2,-2,-1,-1,-1,-1,-1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
-    -2,-2,-2,-2,-2,-1,-1,-1,-1,-1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
-     0, 0, 0
-};
-
-static const s16 exps_3[128] =
-{
-    -2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,
-    -2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,
-    -2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,
-    -2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,
-    -2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,
-     0, 0, 0
-};
-
-#define UNPACK_FBW 1 
-#define UNPACK_CPL 2 
-#define UNPACK_LFE 4
-
-static __inline__ int exp_unpack_ch (ac3dec_t * p_ac3dec, u16 type,
-                                     u16 expstr, u16 ngrps, u16 initial_exp,
-                                     u16 exps[], u16 * dest)
-{
-    u16 i,j;
-    s16 exp_acc;
-
-    if  (expstr == EXP_REUSE)
-    {
-        return 0;
-    }
-
-    /* Handle the initial absolute exponent */
-    exp_acc = initial_exp;
-    j = 0;
-
-    /* In the case of a fbw channel then the initial absolute values is
-     * also an exponent */
-    if (type != UNPACK_CPL)
-    {
-        dest[j++] = exp_acc;
-    }
-
-    /* Loop through the groups and fill the dest array appropriately */
-    switch (expstr)
-    {
-    case EXP_D15:        /* 1 */
-        for (i = 0; i < ngrps; i++)
-        {
-            if (exps[i] > 124)
-            {
-                intf_ErrMsg ( "ac3dec error: invalid exponent" );
-                return 1;
-            }
-            exp_acc += (exps_1[exps[i]] /*- 2*/);
-            dest[j++] = exp_acc;
-            exp_acc += (exps_2[exps[i]] /*- 2*/);
-            dest[j++] = exp_acc;
-            exp_acc += (exps_3[exps[i]] /*- 2*/);
-            dest[j++] = exp_acc;
-        }
-        break;
-
-    case EXP_D25:        /* 2 */
-        for (i = 0; i < ngrps; i++)
-        {
-            if (exps[i] > 124)
-            {
-                intf_ErrMsg ( "ac3dec error: invalid exponent" );
-                return 1;
-            }
-            exp_acc += (exps_1[exps[i]] /*- 2*/);
-            dest[j++] = exp_acc;
-            dest[j++] = exp_acc;
-            exp_acc += (exps_2[exps[i]] /*- 2*/);
-            dest[j++] = exp_acc;
-            dest[j++] = exp_acc;
-            exp_acc += (exps_3[exps[i]] /*- 2*/);
-            dest[j++] = exp_acc;
-            dest[j++] = exp_acc;
-        }
-        break;
-
-    case EXP_D45:        /* 3 */
-        for (i = 0; i < ngrps; i++)
-        {
-            if (exps[i] > 124)
-            {
-                intf_ErrMsg ( "ac3dec error: invalid exponent" );
-                return 1;
-            }
-            exp_acc += (exps_1[exps[i]] /*- 2*/);
-            dest[j++] = exp_acc;
-            dest[j++] = exp_acc;
-            dest[j++] = exp_acc;
-            dest[j++] = exp_acc;
-            exp_acc += (exps_2[exps[i]] /*- 2*/);
-            dest[j++] = exp_acc;
-            dest[j++] = exp_acc;
-            dest[j++] = exp_acc;
-            dest[j++] = exp_acc;
-            exp_acc += (exps_3[exps[i]] /*- 2*/);
-            dest[j++] = exp_acc;
-            dest[j++] = exp_acc;
-            dest[j++] = exp_acc;
-            dest[j++] = exp_acc;
-        }
-        break;
-    }
-
-    return 0;
-}
+#include "ac3_exponent.h"
 
 int exponent_unpack (ac3dec_t * p_ac3dec)
 {
diff --git a/src/ac3_decoder/ac3_exponent.h b/src/ac3_decoder/ac3_exponent.h
new file mode 100644
index 0000000000..cb8a514cb2
--- /dev/null
+++ b/src/ac3_decoder/ac3_exponent.h
@@ -0,0 +1,152 @@
+/*****************************************************************************
+ * ac3_exponent.h: ac3 exponent calculations
+ *****************************************************************************
+ * Copyright (C) 1999, 2000 VideoLAN
+ * $Id: ac3_exponent.h,v 1.5 2001/05/15 16:19:42 sam Exp $
+ *
+ * Authors: Michel Kaempf <maxx@via.ecp.fr>
+ *          Michel Lespinasse <walken@zoy.org>
+ *          Aaron Holtzman <aholtzma@engr.uvic.ca>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
+ *****************************************************************************/
+
+static const s16 exps_1[128] =
+{
+    -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
+    -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
+     0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+     0, 0, 0
+};
+
+static const s16 exps_2[128] =
+{
+    -2,-2,-2,-2,-2,-1,-1,-1,-1,-1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
+    -2,-2,-2,-2,-2,-1,-1,-1,-1,-1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
+    -2,-2,-2,-2,-2,-1,-1,-1,-1,-1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
+    -2,-2,-2,-2,-2,-1,-1,-1,-1,-1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
+    -2,-2,-2,-2,-2,-1,-1,-1,-1,-1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,
+     0, 0, 0
+};
+
+static const s16 exps_3[128] =
+{
+    -2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,
+    -2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,
+    -2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,
+    -2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,
+    -2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,-2,-1, 0, 1, 2,
+     0, 0, 0
+};
+
+#define UNPACK_FBW 1 
+#define UNPACK_CPL 2 
+#define UNPACK_LFE 4
+
+static __inline__ int exp_unpack_ch (ac3dec_t * p_ac3dec, u16 type,
+                                     u16 expstr, u16 ngrps, u16 initial_exp,
+                                     u16 exps[], u16 * dest)
+{
+    u16 i,j;
+    s16 exp_acc;
+
+    if  (expstr == EXP_REUSE)
+    {
+        return 0;
+    }
+
+    /* Handle the initial absolute exponent */
+    exp_acc = initial_exp;
+    j = 0;
+
+    /* In the case of a fbw channel then the initial absolute values is
+     * also an exponent */
+    if (type != UNPACK_CPL)
+    {
+        dest[j++] = exp_acc;
+    }
+
+    /* Loop through the groups and fill the dest array appropriately */
+    switch (expstr)
+    {
+    case EXP_D15:        /* 1 */
+        for (i = 0; i < ngrps; i++)
+        {
+            if (exps[i] > 124)
+            {
+                intf_ErrMsg ( "ac3dec error: invalid exponent" );
+                return 1;
+            }
+            exp_acc += (exps_1[exps[i]] /*- 2*/);
+            dest[j++] = exp_acc;
+            exp_acc += (exps_2[exps[i]] /*- 2*/);
+            dest[j++] = exp_acc;
+            exp_acc += (exps_3[exps[i]] /*- 2*/);
+            dest[j++] = exp_acc;
+        }
+        break;
+
+    case EXP_D25:        /* 2 */
+        for (i = 0; i < ngrps; i++)
+        {
+            if (exps[i] > 124)
+            {
+                intf_ErrMsg ( "ac3dec error: invalid exponent" );
+                return 1;
+            }
+            exp_acc += (exps_1[exps[i]] /*- 2*/);
+            dest[j++] = exp_acc;
+            dest[j++] = exp_acc;
+            exp_acc += (exps_2[exps[i]] /*- 2*/);
+            dest[j++] = exp_acc;
+            dest[j++] = exp_acc;
+            exp_acc += (exps_3[exps[i]] /*- 2*/);
+            dest[j++] = exp_acc;
+            dest[j++] = exp_acc;
+        }
+        break;
+
+    case EXP_D45:        /* 3 */
+        for (i = 0; i < ngrps; i++)
+        {
+            if (exps[i] > 124)
+            {
+                intf_ErrMsg ( "ac3dec error: invalid exponent" );
+                return 1;
+            }
+            exp_acc += (exps_1[exps[i]] /*- 2*/);
+            dest[j++] = exp_acc;
+            dest[j++] = exp_acc;
+            dest[j++] = exp_acc;
+            dest[j++] = exp_acc;
+            exp_acc += (exps_2[exps[i]] /*- 2*/);
+            dest[j++] = exp_acc;
+            dest[j++] = exp_acc;
+            dest[j++] = exp_acc;
+            dest[j++] = exp_acc;
+            exp_acc += (exps_3[exps[i]] /*- 2*/);
+            dest[j++] = exp_acc;
+            dest[j++] = exp_acc;
+            dest[j++] = exp_acc;
+            dest[j++] = exp_acc;
+        }
+        break;
+    }
+
+    return 0;
+}
+
diff --git a/src/ac3_decoder/ac3_imdct.c b/src/ac3_decoder/ac3_imdct.c
index c52006b21f..639ac439f3 100644
--- a/src/ac3_decoder/ac3_imdct.c
+++ b/src/ac3_decoder/ac3_imdct.c
@@ -2,7 +2,7 @@
  * ac3_imdct.c: ac3 DCT
  *****************************************************************************
  * Copyright (C) 1999, 2000 VideoLAN
- * $Id: ac3_imdct.c,v 1.19 2001/05/14 15:58:04 reno Exp $
+ * $Id: ac3_imdct.c,v 1.20 2001/05/15 16:19:42 sam Exp $
  *
  * Authors: Michel Kaempf <maxx@via.ecp.fr>
  *          Aaron Holtzman <aholtzma@engr.uvic.ca>
@@ -23,6 +23,9 @@
  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
  *****************************************************************************/
 
+/*****************************************************************************
+ * Preamble
+ *****************************************************************************/
 #include "defs.h"
 
 #include <string.h>                                              /* memcpy() */
@@ -38,235 +41,223 @@
 #include "stream_control.h"
 #include "input_ext-dec.h"
 
+#include "ac3_imdct.h"
+#include "ac3_downmix.h"
 #include "ac3_decoder.h"
 
-#include "ac3_imdct_c.h"                                     /* imdct_init_c */
-#include "ac3_imdct_sse.h"                                 /* imdct_init_sse */
-
-#include "tests.h"                                                /* TestCPU */
-
 #ifndef M_PI
 #   define M_PI 3.14159265358979323846
 #endif
 
-
 void imdct_init(imdct_t * p_imdct)
 {
-	int i;
-	float scale = 181.019;
-#if 0
-	if ( TestCPU (CPU_CAPABILITY_SSE) )
-    {
-        imdct_init_sse (p_imdct);
-    }
-    else
-#endif
-    {
-        imdct_init_c (p_imdct);
-    }
+    int i;
+    float scale = 181.019;
+
+        p_imdct->pf_imdct_init( p_imdct );
 
-	/* More twiddle factors to turn IFFT into IMDCT */
-	for (i=0; i < 64; i++) {
-		p_imdct->xcos2[i] = cos(2.0f * M_PI * (8*i+1)/(4*N)) * scale;
-		p_imdct->xsin2[i] = sin(2.0f * M_PI * (8*i+1)/(4*N)) * scale;
-	}
+    /* More twiddle factors to turn IFFT into IMDCT */
+    for (i=0; i < 64; i++) {
+        p_imdct->xcos2[i] = cos(2.0f * M_PI * (8*i+1)/(4*N)) * scale;
+        p_imdct->xsin2[i] = sin(2.0f * M_PI * (8*i+1)/(4*N)) * scale;
+    }
 }
 
 void imdct (ac3dec_t * p_ac3dec, s16 * buffer)
 {
-	int   i;
-	int   doable = 0;
-	float *center=NULL, *left, *right, *left_sur, *right_sur;
-	float *delay_left, *delay_right;
-	float *delay1_left, *delay1_right, *delay1_center, *delay1_sr, *delay1_sl;
-	float right_tmp, left_tmp;
-	void (*do_imdct)(imdct_t * p_imdct, float data[], float delay[]);
+    int   i;
+    int   doable = 0;
+    float *center=NULL, *left, *right, *left_sur, *right_sur;
+    float *delay_left, *delay_right;
+    float *delay1_left, *delay1_right, *delay1_center, *delay1_sr, *delay1_sl;
+    float right_tmp, left_tmp;
+    void (*do_imdct)(imdct_t * p_imdct, float data[], float delay[]);
 
-	/* test if dm in frequency is doable */
-	if (!(doable = p_ac3dec->audblk.blksw[0]))
+    /* test if dm in frequency is doable */
+    if (!(doable = p_ac3dec->audblk.blksw[0]))
     {
-		do_imdct = p_ac3dec->imdct.imdct_do_512;
+        do_imdct = p_ac3dec->imdct.pf_imdct_512;
     }
-	else
+    else
     {
-		do_imdct = imdct_do_256; /* There is only a C function */
+        do_imdct = p_ac3dec->imdct.pf_imdct_256;
     }
 
-	/* downmix in the frequency domain if all the channels
-	 * use the same imdct */
-	for (i=0; i < p_ac3dec->bsi.nfchans; i++)
+    /* downmix in the frequency domain if all the channels
+     * use the same imdct */
+    for (i=0; i < p_ac3dec->bsi.nfchans; i++)
     {
-		if (doable != p_ac3dec->audblk.blksw[i])
+        if (doable != p_ac3dec->audblk.blksw[i])
         {
-			do_imdct = NULL;
-			break;
-		}
-	}
+            do_imdct = NULL;
+            break;
+        }
+    }
 
     if (do_imdct)
     {
-		/* dowmix first and imdct */
+        /* dowmix first and imdct */
         switch(p_ac3dec->bsi.acmod)
         {
-    		case 7:		/* 3/2 */
-    			p_ac3dec->downmix.downmix_3f_2r_to_2ch (p_ac3dec->samples[0], &p_ac3dec->dm_par);
-    			break;
-    		case 6:		/* 2/2 */
-    			p_ac3dec->downmix.downmix_2f_2r_to_2ch (p_ac3dec->samples[0], &p_ac3dec->dm_par);
-    			break;
-    		case 5:		/* 3/1 */
-    			p_ac3dec->downmix.downmix_3f_1r_to_2ch (p_ac3dec->samples[0], &p_ac3dec->dm_par);
-    			break;
-    		case 4:		/* 2/1 */
-    			p_ac3dec->downmix.downmix_2f_1r_to_2ch (p_ac3dec->samples[0], &p_ac3dec->dm_par);
-    			break;
-	    	case 3:		/* 3/0 */
-    			p_ac3dec->downmix.downmix_3f_0r_to_2ch (p_ac3dec->samples[0], &p_ac3dec->dm_par);
-    			break;
-    		case 2:
-    			break;
-    		default:	/* 1/0 */
-//    			if (p_ac3dec->bsi.acmod == 1)
-    				center = p_ac3dec->samples[0];
-//    			else if (p_ac3dec->bsi.acmod == 0)
+            case 7:        /* 3/2 */
+                p_ac3dec->downmix.pf_downmix_3f_2r_to_2ch (p_ac3dec->samples[0], &p_ac3dec->dm_par);
+                break;
+            case 6:        /* 2/2 */
+                p_ac3dec->downmix.pf_downmix_2f_2r_to_2ch (p_ac3dec->samples[0], &p_ac3dec->dm_par);
+                break;
+            case 5:        /* 3/1 */
+                p_ac3dec->downmix.pf_downmix_3f_1r_to_2ch (p_ac3dec->samples[0], &p_ac3dec->dm_par);
+                break;
+            case 4:        /* 2/1 */
+                p_ac3dec->downmix.pf_downmix_2f_1r_to_2ch (p_ac3dec->samples[0], &p_ac3dec->dm_par);
+                break;
+            case 3:        /* 3/0 */
+                p_ac3dec->downmix.pf_downmix_3f_0r_to_2ch (p_ac3dec->samples[0], &p_ac3dec->dm_par);
+                break;
+            case 2:
+                break;
+            default:    /* 1/0 */
+//                if (p_ac3dec->bsi.acmod == 1)
+                    center = p_ac3dec->samples[0];
+//                else if (p_ac3dec->bsi.acmod == 0)
 //                  center = samples[ac3_config.dual_mono_ch_sel];
                 do_imdct(&p_ac3dec->imdct, center, p_ac3dec->imdct.delay[0]); /* no downmix*/
     
-    			p_ac3dec->downmix.stream_sample_1ch_to_s16 (buffer, center);
+                p_ac3dec->downmix.pf_stream_sample_1ch_to_s16 (buffer, center);
 
-        	    return;
+                return;
                 break;
         }
 
-		do_imdct (&p_ac3dec->imdct, p_ac3dec->samples[0], p_ac3dec->imdct.delay[0]);
-		do_imdct (&p_ac3dec->imdct, p_ac3dec->samples[1], p_ac3dec->imdct.delay[1]);
-		p_ac3dec->downmix.stream_sample_2ch_to_s16(buffer, p_ac3dec->samples[0], p_ac3dec->samples[1]);
+        do_imdct (&p_ac3dec->imdct, p_ac3dec->samples[0], p_ac3dec->imdct.delay[0]);
+        do_imdct (&p_ac3dec->imdct, p_ac3dec->samples[1], p_ac3dec->imdct.delay[1]);
+        p_ac3dec->downmix.pf_stream_sample_2ch_to_s16(buffer, p_ac3dec->samples[0], p_ac3dec->samples[1]);
 
-	} else {
+    } else {
         /* imdct and then downmix
-		 * delay and samples should be saved and mixed
-		 * fprintf(stderr, "time domain downmix\n"); */
-		for (i=0; i<p_ac3dec->bsi.nfchans; i++)
+         * delay and samples should be saved and mixed
+         * fprintf(stderr, "time domain downmix\n"); */
+        for (i=0; i<p_ac3dec->bsi.nfchans; i++)
         {
-			if (p_ac3dec->audblk.blksw[i])
+            if (p_ac3dec->audblk.blksw[i])
                 /* There is only a C function */
-				imdct_do_256_nol (&p_ac3dec->imdct, p_ac3dec->samples[i], p_ac3dec->imdct.delay1[i]);
-			else
-				p_ac3dec->imdct.imdct_do_512_nol (&p_ac3dec->imdct, p_ac3dec->samples[i], p_ac3dec->imdct.delay1[i]);
-		}
+                p_ac3dec->imdct.pf_imdct_256_nol (&p_ac3dec->imdct, p_ac3dec->samples[i], p_ac3dec->imdct.delay1[i]);
+            else
+                p_ac3dec->imdct.pf_imdct_512_nol (&p_ac3dec->imdct, p_ac3dec->samples[i], p_ac3dec->imdct.delay1[i]);
+        }
 
-		/* mix the sample, overlap */
-		switch(p_ac3dec->bsi.acmod)
+        /* mix the sample, overlap */
+        switch(p_ac3dec->bsi.acmod)
         {
-    		case 7:		/* 3/2 */
-    			left = p_ac3dec->samples[0];
-    			center = p_ac3dec->samples[1];
-    			right = p_ac3dec->samples[2];
-    			left_sur = p_ac3dec->samples[3];
-    			right_sur = p_ac3dec->samples[4];
-    			delay_left = p_ac3dec->imdct.delay[0];
-    			delay_right = p_ac3dec->imdct.delay[1];
-    			delay1_left = p_ac3dec->imdct.delay1[0];
-    			delay1_center = p_ac3dec->imdct.delay1[1];
-	    		delay1_right = p_ac3dec->imdct.delay1[2];
-            	delay1_sl = p_ac3dec->imdct.delay1[3];
-    			delay1_sr = p_ac3dec->imdct.delay1[4];
+            case 7:        /* 3/2 */
+                left = p_ac3dec->samples[0];
+                center = p_ac3dec->samples[1];
+                right = p_ac3dec->samples[2];
+                left_sur = p_ac3dec->samples[3];
+                right_sur = p_ac3dec->samples[4];
+                delay_left = p_ac3dec->imdct.delay[0];
+                delay_right = p_ac3dec->imdct.delay[1];
+                delay1_left = p_ac3dec->imdct.delay1[0];
+                delay1_center = p_ac3dec->imdct.delay1[1];
+                delay1_right = p_ac3dec->imdct.delay1[2];
+                delay1_sl = p_ac3dec->imdct.delay1[3];
+                delay1_sr = p_ac3dec->imdct.delay1[4];
     
-	    		for (i = 0; i < 256; i++) {
-    				left_tmp = p_ac3dec->dm_par.unit * *left++  + p_ac3dec->dm_par.clev * *center  + p_ac3dec->dm_par.slev * *left_sur++;
-    				right_tmp= p_ac3dec->dm_par.unit * *right++ + p_ac3dec->dm_par.clev * *center++ + p_ac3dec->dm_par.slev * *right_sur++;
-    				*buffer++ = (s16)(left_tmp + *delay_left);
-	    			*buffer++ = (s16)(right_tmp + *delay_right);
-    				*delay_left++ = p_ac3dec->dm_par.unit * *delay1_left++  + p_ac3dec->dm_par.clev * *delay1_center  + p_ac3dec->dm_par.slev * *delay1_sl++;
-    				*delay_right++ = p_ac3dec->dm_par.unit * *delay1_right++ + p_ac3dec->dm_par.clev * *center++ + p_ac3dec->dm_par.slev * *delay1_sr++;
-    			}
-    			break;
-    		case 6:		/* 2/2 */
-    			left = p_ac3dec->samples[0];
-    			right = p_ac3dec->samples[1];
-    			left_sur = p_ac3dec->samples[2];
-	    		right_sur = p_ac3dec->samples[3];
-    			delay_left = p_ac3dec->imdct.delay[0];
-    			delay_right = p_ac3dec->imdct.delay[1];
-	    		delay1_left = p_ac3dec->imdct.delay1[0];
-    			delay1_right = p_ac3dec->imdct.delay1[1];
-    			delay1_sl = p_ac3dec->imdct.delay1[2];
-    			delay1_sr = p_ac3dec->imdct.delay1[3];
+                for (i = 0; i < 256; i++) {
+                    left_tmp = p_ac3dec->dm_par.unit * *left++  + p_ac3dec->dm_par.clev * *center  + p_ac3dec->dm_par.slev * *left_sur++;
+                    right_tmp= p_ac3dec->dm_par.unit * *right++ + p_ac3dec->dm_par.clev * *center++ + p_ac3dec->dm_par.slev * *right_sur++;
+                    *buffer++ = (s16)(left_tmp + *delay_left);
+                    *buffer++ = (s16)(right_tmp + *delay_right);
+                    *delay_left++ = p_ac3dec->dm_par.unit * *delay1_left++  + p_ac3dec->dm_par.clev * *delay1_center  + p_ac3dec->dm_par.slev * *delay1_sl++;
+                    *delay_right++ = p_ac3dec->dm_par.unit * *delay1_right++ + p_ac3dec->dm_par.clev * *center++ + p_ac3dec->dm_par.slev * *delay1_sr++;
+                }
+                break;
+            case 6:        /* 2/2 */
+                left = p_ac3dec->samples[0];
+                right = p_ac3dec->samples[1];
+                left_sur = p_ac3dec->samples[2];
+                right_sur = p_ac3dec->samples[3];
+                delay_left = p_ac3dec->imdct.delay[0];
+                delay_right = p_ac3dec->imdct.delay[1];
+                delay1_left = p_ac3dec->imdct.delay1[0];
+                delay1_right = p_ac3dec->imdct.delay1[1];
+                delay1_sl = p_ac3dec->imdct.delay1[2];
+                delay1_sr = p_ac3dec->imdct.delay1[3];
     
-    			for (i = 0; i < 256; i++) {
-    				left_tmp = p_ac3dec->dm_par.unit * *left++  + p_ac3dec->dm_par.slev * *left_sur++;
-    				right_tmp= p_ac3dec->dm_par.unit * *right++ + p_ac3dec->dm_par.slev * *right_sur++;
-    				*buffer++ = (s16)(left_tmp + *delay_left);
-    				*buffer++ = (s16)(right_tmp + *delay_right);
-    				*delay_left++ = p_ac3dec->dm_par.unit * *delay1_left++  + p_ac3dec->dm_par.slev * *delay1_sl++;
-    				*delay_right++ = p_ac3dec->dm_par.unit * *delay1_right++ + p_ac3dec->dm_par.slev * *delay1_sr++;
-    			}
-    			break;
-    		case 5:		/* 3/1 */
-    			left = p_ac3dec->samples[0];
-    			center = p_ac3dec->samples[1];
-    			right = p_ac3dec->samples[2];
-    			right_sur = p_ac3dec->samples[3];
-    			delay_left = p_ac3dec->imdct.delay[0];
-    			delay_right = p_ac3dec->imdct.delay[1];
-    			delay1_left = p_ac3dec->imdct.delay1[0];
-    			delay1_center = p_ac3dec->imdct.delay1[1];
-    			delay1_right = p_ac3dec->imdct.delay1[2];
-    			delay1_sl = p_ac3dec->imdct.delay1[3];
+                for (i = 0; i < 256; i++) {
+                    left_tmp = p_ac3dec->dm_par.unit * *left++  + p_ac3dec->dm_par.slev * *left_sur++;
+                    right_tmp= p_ac3dec->dm_par.unit * *right++ + p_ac3dec->dm_par.slev * *right_sur++;
+                    *buffer++ = (s16)(left_tmp + *delay_left);
+                    *buffer++ = (s16)(right_tmp + *delay_right);
+                    *delay_left++ = p_ac3dec->dm_par.unit * *delay1_left++  + p_ac3dec->dm_par.slev * *delay1_sl++;
+                    *delay_right++ = p_ac3dec->dm_par.unit * *delay1_right++ + p_ac3dec->dm_par.slev * *delay1_sr++;
+                }
+                break;
+            case 5:        /* 3/1 */
+                left = p_ac3dec->samples[0];
+                center = p_ac3dec->samples[1];
+                right = p_ac3dec->samples[2];
+                right_sur = p_ac3dec->samples[3];
+                delay_left = p_ac3dec->imdct.delay[0];
+                delay_right = p_ac3dec->imdct.delay[1];
+                delay1_left = p_ac3dec->imdct.delay1[0];
+                delay1_center = p_ac3dec->imdct.delay1[1];
+                delay1_right = p_ac3dec->imdct.delay1[2];
+                delay1_sl = p_ac3dec->imdct.delay1[3];
     
-    			for (i = 0; i < 256; i++) {
-    				left_tmp = p_ac3dec->dm_par.unit * *left++  + p_ac3dec->dm_par.clev * *center  - p_ac3dec->dm_par.slev * *right_sur;
-    				right_tmp= p_ac3dec->dm_par.unit * *right++ + p_ac3dec->dm_par.clev * *center++ + p_ac3dec->dm_par.slev * *right_sur++;
-	    			*buffer++ = (s16)(left_tmp + *delay_left);
-    				*buffer++ = (s16)(right_tmp + *delay_right);
-    				*delay_left++ = p_ac3dec->dm_par.unit * *delay1_left++  + p_ac3dec->dm_par.clev * *delay1_center  + p_ac3dec->dm_par.slev * *delay1_sl;
-    				*delay_right++ = p_ac3dec->dm_par.unit * *delay1_right++ + p_ac3dec->dm_par.clev * *center++ + p_ac3dec->dm_par.slev * *delay1_sl++;
-	    		}
-    			break;
-    		case 4:		/* 2/1 */
-    			left = p_ac3dec->samples[0];
-    			right = p_ac3dec->samples[1];
-    			right_sur = p_ac3dec->samples[2];
-	    		delay_left = p_ac3dec->imdct.delay[0];
-    			delay_right = p_ac3dec->imdct.delay[1];
-    			delay1_left = p_ac3dec->imdct.delay1[0];
-    			delay1_right = p_ac3dec->imdct.delay1[1];
-    			delay1_sl = p_ac3dec->imdct.delay1[2];
+                for (i = 0; i < 256; i++) {
+                    left_tmp = p_ac3dec->dm_par.unit * *left++  + p_ac3dec->dm_par.clev * *center  - p_ac3dec->dm_par.slev * *right_sur;
+                    right_tmp= p_ac3dec->dm_par.unit * *right++ + p_ac3dec->dm_par.clev * *center++ + p_ac3dec->dm_par.slev * *right_sur++;
+                    *buffer++ = (s16)(left_tmp + *delay_left);
+                    *buffer++ = (s16)(right_tmp + *delay_right);
+                    *delay_left++ = p_ac3dec->dm_par.unit * *delay1_left++  + p_ac3dec->dm_par.clev * *delay1_center  + p_ac3dec->dm_par.slev * *delay1_sl;
+                    *delay_right++ = p_ac3dec->dm_par.unit * *delay1_right++ + p_ac3dec->dm_par.clev * *center++ + p_ac3dec->dm_par.slev * *delay1_sl++;
+                }
+                break;
+            case 4:        /* 2/1 */
+                left = p_ac3dec->samples[0];
+                right = p_ac3dec->samples[1];
+                right_sur = p_ac3dec->samples[2];
+                delay_left = p_ac3dec->imdct.delay[0];
+                delay_right = p_ac3dec->imdct.delay[1];
+                delay1_left = p_ac3dec->imdct.delay1[0];
+                delay1_right = p_ac3dec->imdct.delay1[1];
+                delay1_sl = p_ac3dec->imdct.delay1[2];
     
-        		for (i = 0; i < 256; i++) {
-    				left_tmp = p_ac3dec->dm_par.unit * *left++ - p_ac3dec->dm_par.slev * *right_sur;
-    				right_tmp= p_ac3dec->dm_par.unit * *right++ + p_ac3dec->dm_par.slev * *right_sur++;
-	    			*buffer++ = (s16)(left_tmp + *delay_left);
-    				*buffer++ = (s16)(right_tmp + *delay_right);
-    				*delay_left++ = p_ac3dec->dm_par.unit * *delay1_left++ + p_ac3dec->dm_par.slev * *delay1_sl;
-    				*delay_right++ = p_ac3dec->dm_par.unit * *delay1_right++ + p_ac3dec->dm_par.slev * *delay1_sl++;
-    			}
-    			break;
-    		case 3:		/* 3/0 */
-    			left = p_ac3dec->samples[0];
-    			center = p_ac3dec->samples[1];
-    			right = p_ac3dec->samples[2];
-    			delay_left = p_ac3dec->imdct.delay[0];
-	    		delay_right = p_ac3dec->imdct.delay[1];
-    			delay1_left = p_ac3dec->imdct.delay1[0];
-    	   		delay1_center = p_ac3dec->imdct.delay1[1];
-		    	delay1_right = p_ac3dec->imdct.delay1[2];
+                for (i = 0; i < 256; i++) {
+                    left_tmp = p_ac3dec->dm_par.unit * *left++ - p_ac3dec->dm_par.slev * *right_sur;
+                    right_tmp= p_ac3dec->dm_par.unit * *right++ + p_ac3dec->dm_par.slev * *right_sur++;
+                    *buffer++ = (s16)(left_tmp + *delay_left);
+                    *buffer++ = (s16)(right_tmp + *delay_right);
+                    *delay_left++ = p_ac3dec->dm_par.unit * *delay1_left++ + p_ac3dec->dm_par.slev * *delay1_sl;
+                    *delay_right++ = p_ac3dec->dm_par.unit * *delay1_right++ + p_ac3dec->dm_par.slev * *delay1_sl++;
+                }
+                break;
+            case 3:        /* 3/0 */
+                left = p_ac3dec->samples[0];
+                center = p_ac3dec->samples[1];
+                right = p_ac3dec->samples[2];
+                delay_left = p_ac3dec->imdct.delay[0];
+                delay_right = p_ac3dec->imdct.delay[1];
+                delay1_left = p_ac3dec->imdct.delay1[0];
+                   delay1_center = p_ac3dec->imdct.delay1[1];
+                delay1_right = p_ac3dec->imdct.delay1[2];
 
-    			for (i = 0; i < 256; i++) {
-    				left_tmp = p_ac3dec->dm_par.unit * *left++  + p_ac3dec->dm_par.clev * *center;
-    				right_tmp= p_ac3dec->dm_par.unit * *right++ + p_ac3dec->dm_par.clev * *center++;
-    				*buffer++ = (s16)(left_tmp + *delay_left);
-    				*buffer++ = (s16)(right_tmp + *delay_right);
-	    			*delay_left++ = p_ac3dec->dm_par.unit * *delay1_left++  + p_ac3dec->dm_par.clev * *delay1_center;
-    				*delay_right++ = p_ac3dec->dm_par.unit * *delay1_right++ + p_ac3dec->dm_par.clev * *center++;
-    			}
-    			break;
-    		case 2:		/* copy to output */
-    			for (i = 0; i < 256; i++) {
-    				*buffer++ = (s16)p_ac3dec->samples[0][i];
-	    			*buffer++ = (s16)p_ac3dec->samples[1][i];
-    			}
-    			break;
-		}
-	}
+                for (i = 0; i < 256; i++) {
+                    left_tmp = p_ac3dec->dm_par.unit * *left++  + p_ac3dec->dm_par.clev * *center;
+                    right_tmp= p_ac3dec->dm_par.unit * *right++ + p_ac3dec->dm_par.clev * *center++;
+                    *buffer++ = (s16)(left_tmp + *delay_left);
+                    *buffer++ = (s16)(right_tmp + *delay_right);
+                    *delay_left++ = p_ac3dec->dm_par.unit * *delay1_left++  + p_ac3dec->dm_par.clev * *delay1_center;
+                    *delay_right++ = p_ac3dec->dm_par.unit * *delay1_right++ + p_ac3dec->dm_par.clev * *center++;
+                }
+                break;
+            case 2:        /* copy to output */
+                for (i = 0; i < 256; i++) {
+                    *buffer++ = (s16)p_ac3dec->samples[0][i];
+                    *buffer++ = (s16)p_ac3dec->samples[1][i];
+                }
+                break;
+        }
+    }
 }
diff --git a/src/ac3_decoder/ac3_imdct_c.c b/src/ac3_decoder/ac3_imdct_c.c
deleted file mode 100644
index c5011bc310..0000000000
--- a/src/ac3_decoder/ac3_imdct_c.c
+++ /dev/null
@@ -1,421 +0,0 @@
-/*****************************************************************************
- * ac3_imdct_c.c: ac3 DCT
- *****************************************************************************
- * Copyright (C) 1999, 2000 VideoLAN
- * $Id: ac3_imdct_c.c,v 1.3 2001/05/14 15:58:04 reno Exp $
- *
- * Authors: Renaud Dartus <reno@videolan.org>
- *          Aaron Holtzman <aholtzma@engr.uvic.ca>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- * 
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
- *****************************************************************************/
-
-#include "defs.h"
-
-#include <string.h>                                              /* memcpy() */
-
-#include <math.h>
-#include <stdio.h>
-
-#include "config.h"
-#include "common.h"
-#include "threads.h"
-#include "mtime.h"
-
-#include "stream_control.h"
-#include "input_ext-dec.h"
-
-#include "ac3_decoder.h"
-#include "ac3_imdct_c.h"
-
-#ifndef M_PI
-#   define M_PI 3.14159265358979323846
-#endif
-
-void fft_64p_c (complex_t *x);
-void fft_128p_c (complex_t *x);
-
-static float window[] = {
-	0.00014, 0.00024, 0.00037, 0.00051, 0.00067, 0.00086, 0.00107, 0.00130,
-	0.00157, 0.00187, 0.00220, 0.00256, 0.00297, 0.00341, 0.00390, 0.00443,
-	0.00501, 0.00564, 0.00632, 0.00706, 0.00785, 0.00871, 0.00962, 0.01061,
-	0.01166, 0.01279, 0.01399, 0.01526, 0.01662, 0.01806, 0.01959, 0.02121,
-	0.02292, 0.02472, 0.02662, 0.02863, 0.03073, 0.03294, 0.03527, 0.03770,
-	0.04025, 0.04292, 0.04571, 0.04862, 0.05165, 0.05481, 0.05810, 0.06153,
-	0.06508, 0.06878, 0.07261, 0.07658, 0.08069, 0.08495, 0.08935, 0.09389,
-	0.09859, 0.10343, 0.10842, 0.11356, 0.11885, 0.12429, 0.12988, 0.13563,
-	0.14152, 0.14757, 0.15376, 0.16011, 0.16661, 0.17325, 0.18005, 0.18699,
-	0.19407, 0.20130, 0.20867, 0.21618, 0.22382, 0.23161, 0.23952, 0.24757,
-	0.25574, 0.26404, 0.27246, 0.28100, 0.28965, 0.29841, 0.30729, 0.31626,
-	0.32533, 0.33450, 0.34376, 0.35311, 0.36253, 0.37204, 0.38161, 0.39126,
-	0.40096, 0.41072, 0.42054, 0.43040, 0.44030, 0.45023, 0.46020, 0.47019,
-	0.48020, 0.49022, 0.50025, 0.51028, 0.52031, 0.53033, 0.54033, 0.55031,
-	0.56026, 0.57019, 0.58007, 0.58991, 0.59970, 0.60944, 0.61912, 0.62873,
-	0.63827, 0.64774, 0.65713, 0.66643, 0.67564, 0.68476, 0.69377, 0.70269,
-	0.71150, 0.72019, 0.72877, 0.73723, 0.74557, 0.75378, 0.76186, 0.76981,
-	0.77762, 0.78530, 0.79283, 0.80022, 0.80747, 0.81457, 0.82151, 0.82831,
-	0.83496, 0.84145, 0.84779, 0.85398, 0.86001, 0.86588, 0.87160, 0.87716,
-	0.88257, 0.88782, 0.89291, 0.89785, 0.90264, 0.90728, 0.91176, 0.91610,
-	0.92028, 0.92432, 0.92822, 0.93197, 0.93558, 0.93906, 0.94240, 0.94560,
-	0.94867, 0.95162, 0.95444, 0.95713, 0.95971, 0.96217, 0.96451, 0.96674,
-	0.96887, 0.97089, 0.97281, 0.97463, 0.97635, 0.97799, 0.97953, 0.98099,
-	0.98236, 0.98366, 0.98488, 0.98602, 0.98710, 0.98811, 0.98905, 0.98994,
-	0.99076, 0.99153, 0.99225, 0.99291, 0.99353, 0.99411, 0.99464, 0.99513,
-	0.99558, 0.99600, 0.99639, 0.99674, 0.99706, 0.99736, 0.99763, 0.99788,
-	0.99811, 0.99831, 0.99850, 0.99867, 0.99882, 0.99895, 0.99908, 0.99919,
-	0.99929, 0.99938, 0.99946, 0.99953, 0.99959, 0.99965, 0.99969, 0.99974,
-	0.99978, 0.99981, 0.99984, 0.99986, 0.99988, 0.99990, 0.99992, 0.99993,
-	0.99994, 0.99995, 0.99996, 0.99997, 0.99998, 0.99998, 0.99998, 0.99999,
-	0.99999, 0.99999, 0.99999, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000,
-	1.00000, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000
-};
-
-static const int pm128[128] =
-{
-	0, 16, 32, 48, 64, 80,  96, 112,  8, 40, 72, 104, 24, 56,  88, 120,
-	4, 20, 36, 52, 68, 84, 100, 116, 12, 28, 44,  60, 76, 92, 108, 124,
-	2, 18, 34, 50, 66, 82,  98, 114, 10, 42, 74, 106, 26, 58,  90, 122,
-	6, 22, 38, 54, 70, 86, 102, 118, 14, 46, 78, 110, 30, 62,  94, 126,
-	1, 17, 33, 49, 65, 81,  97, 113,  9, 41, 73, 105, 25, 57,  89, 121,
-	5, 21, 37, 53, 69, 85, 101, 117, 13, 29, 45,  61, 77, 93, 109, 125,
-	3, 19, 35, 51, 67, 83,  99, 115, 11, 43, 75, 107, 27, 59,  91, 123,
-	7, 23, 39, 55, 71, 87, 103, 119, 15, 31, 47,  63, 79, 95, 111, 127
-}; 
-
-static const int pm64[64] =
-{
-	0,  8, 16, 24, 32, 40, 48, 56,
-	4, 20, 36, 52, 12, 28, 44, 60,
-	2, 10, 18, 26, 34, 42, 50, 58,
-	6, 14, 22, 30, 38, 46, 54, 62,
-	1,  9, 17, 25, 33, 41, 49, 57,
-	5, 21, 37, 53, 13, 29, 45, 61,
-	3, 11, 19, 27, 35, 43, 51, 59,
-	7, 23, 39, 55, 15, 31, 47, 63
-};
-
-int imdct_init_c (imdct_t * p_imdct)
-{
-	int i;
-	float scale = 181.019;
-
-	p_imdct->imdct_do_512 = imdct_do_512_c;
-	p_imdct->imdct_do_512_nol = imdct_do_512_nol_c;
-	p_imdct->fft_64p = fft_64p_c;
-
-	/* Twiddle factors to turn IFFT into IMDCT */
-         
-	for (i=0; i < 128; i++) {
-		p_imdct->xcos1[i] = cos(2.0f * M_PI * (8*i+1)/(8*N)) * scale; 
-		p_imdct->xsin1[i] = sin(2.0f * M_PI * (8*i+1)/(8*N)) * scale;
-	}
-
-	return 0;
-}
-
-void imdct_do_256 (imdct_t * p_imdct, float data[],float delay[])
-{
-	int i, j, k;
-	int p, q;
-
-	float tmp_a_i;
-	float tmp_a_r;
-
-	float *data_ptr;
-	float *delay_ptr;
-	float *window_ptr;
-
-	complex_t *buf1, *buf2;
-
-	buf1 = &p_imdct->buf[0];
-	buf2 = &p_imdct->buf[64];
-
-    /* Pre IFFT complex multiply plus IFFT complex conjugate */
-	for (k=0; k<64; k++) { 
-		/* X1[k] = X[2*k]
-		 * X2[k] = X[2*k+1]	*/
-
-		j = pm64[k];
-		p = 2 * (128-2*j-1);
-		q = 2 * (2 * j);
-
-		/* Z1[k] = (X1[128-2*k-1] + j * X1[2*k]) * (xcos2[k] + j * xsin2[k]); */
-		buf1[k].real =        data[p] * p_imdct->xcos2[j] - data[q] * p_imdct->xsin2[j];
-		buf1[k].imag = -1.0f*(data[q] * p_imdct->xcos2[j] + data[p] * p_imdct->xsin2[j]);
-		/* Z2[k] = (X2[128-2*k-1] + j * X2[2*k]) * (xcos2[k] + j * xsin2[k]); */
-		buf2[k].real =        data[p + 1] * p_imdct->xcos2[j] - data[q + 1] * p_imdct->xsin2[j];
-		buf2[k].imag = -1.0f*(data[q + 1] * p_imdct->xcos2[j] + data[p + 1] * p_imdct->xsin2[j]);
-	}
-
-	p_imdct->fft_64p(&buf1[0]);
-	p_imdct->fft_64p(&buf2[0]);
-
-	/* Post IFFT complex multiply */
-	for( i=0; i < 64; i++) {
-		tmp_a_r =  buf1[i].real;
-		tmp_a_i = -buf1[i].imag;
-		buf1[i].real = (tmp_a_r * p_imdct->xcos2[i]) - (tmp_a_i * p_imdct->xsin2[i]);
-		buf1[i].imag = (tmp_a_r * p_imdct->xsin2[i]) + (tmp_a_i * p_imdct->xcos2[i]);
-		tmp_a_r =  buf2[i].real;
-		tmp_a_i = -buf2[i].imag;
-		buf2[i].real = (tmp_a_r * p_imdct->xcos2[i]) - (tmp_a_i * p_imdct->xsin2[i]);
-		buf2[i].imag = (tmp_a_r * p_imdct->xsin2[i]) + (tmp_a_i * p_imdct->xcos2[i]);
-	}
-	
-	data_ptr = data;
-	delay_ptr = delay;
-	window_ptr = window;
-
-	/* Window and convert to real valued signal */
-	for(i=0; i< 64; i++) { 
-		*data_ptr++ = -buf1[i].imag     * *window_ptr++ + *delay_ptr++;
-		*data_ptr++ = buf1[64-i-1].real * *window_ptr++ + *delay_ptr++;
-	}
-
-	for(i=0; i< 64; i++) {
-		*data_ptr++ = -buf1[i].real     * *window_ptr++ + *delay_ptr++;
-		*data_ptr++ = buf1[64-i-1].imag * *window_ptr++ + *delay_ptr++;
-	}
-	
-	delay_ptr = delay;
-
-	for(i=0; i< 64; i++) {
-		*delay_ptr++ = -buf2[i].real      * *--window_ptr;
-		*delay_ptr++ =  buf2[64-i-1].imag * *--window_ptr;
-	}
-
-	for(i=0; i< 64; i++) {
-		*delay_ptr++ =  buf2[i].imag      * *--window_ptr;
-		*delay_ptr++ = -buf2[64-i-1].real * *--window_ptr;
-	}
-}
-
-
-void imdct_do_256_nol (imdct_t * p_imdct, float data[], float delay[])
-{
-	int i, j, k;
-	int p, q;
-
-	float tmp_a_i;
-	float tmp_a_r;
-
-	float *data_ptr;
-	float *delay_ptr;
-	float *window_ptr;
-
-	complex_t *buf1, *buf2;
-
-	buf1 = &p_imdct->buf[0];
-	buf2 = &p_imdct->buf[64];
-
-    /* Pre IFFT complex multiply plus IFFT cmplx conjugate */
-	for(k=0; k<64; k++) {
-        /* X1[k] = X[2*k]
-        * X2[k] = X[2*k+1] */
-        j = pm64[k];
-        p = 2 * (128-2*j-1);
-        q = 2 * (2 * j);
-
-        /* Z1[k] = (X1[128-2*k-1] + j * X1[2*k]) * (xcos2[k] + j * xsin2[k]); */
-        buf1[k].real =        data[p] * p_imdct->xcos2[j] - data[q] * p_imdct->xsin2[j];
-        buf1[k].imag = -1.0f*(data[q] * p_imdct->xcos2[j] + data[p] * p_imdct->xsin2[j]);
-        /* Z2[k] = (X2[128-2*k-1] + j * X2[2*k]) * (xcos2[k] + j * xsin2[k]); */
-        buf2[k].real =        data[p + 1] * p_imdct->xcos2[j] - data[q + 1] * p_imdct->xsin2[j];
-        buf2[k].imag = -1.0f*(data[q + 1] * p_imdct->xcos2[j] + data[p + 1] * p_imdct->xsin2[j]);
-    }
-
-    p_imdct->fft_64p(&buf1[0]);
-    p_imdct->fft_64p(&buf2[0]);
-
-    /* Post IFFT complex multiply */
-    for( i=0; i < 64; i++) {
-        /* y1[n] = z1[n] * (xcos2[n] + j * xs in2[n]) ; */
-        tmp_a_r =  buf1[i].real;
-        tmp_a_i = -buf1[i].imag;
-        buf1[i].real =(tmp_a_r * p_imdct->xcos2[i])  -  (tmp_a_i  * p_imdct->xsin2[i]);
-        buf1[i].imag =(tmp_a_r * p_imdct->xsin2[i])  +  (tmp_a_i  * p_imdct->xcos2[i]);
-        /* y2[n] = z2[n] * (xcos2[n] + j * xsin2[n]) ; */
-        tmp_a_r =  buf2[i].real;
-        tmp_a_i = -buf2[i].imag;
-        buf2[i].real =(tmp_a_r * p_imdct->xcos2[i])  -  (tmp_a_i  * p_imdct->xsin2[i]);
-        buf2[i].imag =(tmp_a_r * p_imdct->xsin2[i])  +  (tmp_a_i  * p_imdct->xcos2[i]);
-    }
-      
-    data_ptr = data;
-    delay_ptr = delay;
-    window_ptr = window;
-
-    /* Window and convert to real valued signal, no overlap */
-    for(i=0; i< 64; i++) {
-        *data_ptr++ = -buf1[i].imag     * *window_ptr++;
-        *data_ptr++ = buf1[64-i-1].real * *window_ptr++;
-    }
-
-    for(i=0; i< 64; i++) {
-        *data_ptr++ = -buf1[i].real     * *window_ptr++ + *delay_ptr++;
-        *data_ptr++ = buf1[64-i-1].imag * *window_ptr++ + *delay_ptr++;
-    }
-
-    delay_ptr = delay;
-
-    for(i=0; i< 64; i++) {
-        *delay_ptr++ = -buf2[i].real      * *--window_ptr;
-        *delay_ptr++ =  buf2[64-i-1].imag * *--window_ptr;
-    }
-
-    for(i=0; i< 64; i++) {
-        *delay_ptr++ =  buf2[i].imag      * *--window_ptr;
-        *delay_ptr++ = -buf2[64-i-1].real * *--window_ptr;
-	}
-}
-
-void imdct_do_512_c (imdct_t * p_imdct, float data[], float delay[])
-{
-	int i, j;
-	float tmp_a_r, tmp_a_i;
-	float *data_ptr;
-	float *delay_ptr;
-	float *window_ptr;
-
-    /* 512 IMDCT with source and dest data in 'data'
-     * Pre IFFT complex multiply plus IFFT complex conjugate */
-
-    for( i=0; i < 128; i++) {
-		j = pm128[i];
-		/* a = (data[256-2*j-1] - data[2*j]) * (xcos1[j] + xsin1[j]);
-		 * c = data[2*j] * xcos1[j];
-		 * b = data[256-2*j-1] * xsin1[j];
-		 * buf1[i].real = a - b + c;
-		 * buf1[i].imag = b + c; */
-		p_imdct->buf[i].real = (data[256-2*j-1] * p_imdct->xcos1[j]) - (data[2*j] * p_imdct->xsin1[j]);
-		p_imdct->buf[i].imag = -1.0 * (data[2*j] * p_imdct->xcos1[j] + data[256-2*j-1] * p_imdct->xsin1[j]);
-	}
-
-	fft_128p_c (&p_imdct->buf[0]);
-
-    /* Post IFFT complex multiply  plus IFFT complex conjugate */
-	for (i=0; i < 128; i++) {
-		tmp_a_r = p_imdct->buf[i].real;
-		tmp_a_i = p_imdct->buf[i].imag;
-		/* a = (tmp_a_r - tmp_a_i) * (xcos1[j] + xsin1[j]);
-		 * b = tmp_a_r * xsin1[j];
-		 * c = tmp_a_i * xcos1[j];
-		 * buf[j].real = a - b + c;
-		 * buf[j].imag = b + c; */
-		p_imdct->buf[i].real =(tmp_a_r * p_imdct->xcos1[i])  +  (tmp_a_i  * p_imdct->xsin1[i]);
-		p_imdct->buf[i].imag =(tmp_a_r * p_imdct->xsin1[i])  -  (tmp_a_i  * p_imdct->xcos1[i]);
-	}
-
-	data_ptr = data;
-	delay_ptr = delay;
-	window_ptr = window;
-
-    /* Window and convert to real valued signal */
-	for (i=0; i< 64; i++) {
-		*data_ptr++ = -p_imdct->buf[64+i].imag  * *window_ptr++ + *delay_ptr++;
-		*data_ptr++ = p_imdct->buf[64-i-1].real * *window_ptr++ + *delay_ptr++;
-	}
-
-	for(i=0; i< 64; i++) {
-		*data_ptr++ = -p_imdct->buf[i].real      * *window_ptr++ + *delay_ptr++;
-		*data_ptr++ = p_imdct->buf[128-i-1].imag * *window_ptr++ + *delay_ptr++;
-	}
-
-    /* The trailing edge of the window goes into the delay line */
-	delay_ptr = delay;
-
-	for(i=0; i< 64; i++) {
-		*delay_ptr++ = -p_imdct->buf[64+i].real   * *--window_ptr;
-		*delay_ptr++ =  p_imdct->buf[64-i-1].imag * *--window_ptr;
-	}
-
-	for(i=0; i<64; i++) {
-		*delay_ptr++ =  p_imdct->buf[i].imag       * *--window_ptr;
-		*delay_ptr++ = -p_imdct->buf[128-i-1].real * *--window_ptr;
-	}
-}
-
-
-void imdct_do_512_nol_c (imdct_t * p_imdct, float data[], float delay[])
-{
-	int i, j;
-
-	float tmp_a_i;
-	float tmp_a_r;
-
-	float *data_ptr;
-	float *delay_ptr;
-	float *window_ptr;
-
-    /* 512 IMDCT with source and dest data in 'data'
-	 * Pre IFFT complex multiply plus IFFT cmplx conjugate */
-
-    for( i=0; i < 128; i++) {
-    	/* z[i] = (X[256-2*i-1] + j * X[2*i]) * (xcos1[i] + j * xsin1[i]) */
-		j = pm128[i];
-    	/* a = (data[256-2*j-1] - data[2*j]) * (xcos1[j] + xsin1[j]);
-    	 * c = data[2*j] * xcos1[j];
-    	 * b = data[256-2*j-1] * xsin1[j];
-    	 * buf1[i].real = a - b + c;
-         * buf1[i].imag = b + c; */
-		p_imdct->buf[i].real = (data[256-2*j-1] * p_imdct->xcos1[j]) - (data[2*j] * p_imdct->xsin1[j]);
-		p_imdct->buf[i].imag = -1.0 * (data[2*j] * p_imdct->xcos1[j] + data[256-2*j-1] * p_imdct->xsin1[j]);
-	}
-       
-	fft_128p_c (&p_imdct->buf[0]);
-
-    /* Post IFFT complex multiply  plus IFFT complex conjugate*/
-	for (i=0; i < 128; i++) {
-		/* y[n] = z[n] * (xcos1[n] + j * xsin1[n]) ;
-		 * int j1 = i; */
-		tmp_a_r = p_imdct->buf[i].real;
-		tmp_a_i = p_imdct->buf[i].imag;
-		/* a = (tmp_a_r - tmp_a_i) * (xcos1[j] + xsin1[j]);
-		 * b = tmp_a_r * xsin1[j];
-		 * c = tmp_a_i * xcos1[j];
-		 * buf[j].real = a - b + c;
-		 * buf[j].imag = b + c; */
-		p_imdct->buf[i].real =(tmp_a_r * p_imdct->xcos1[i]) + (tmp_a_i  * p_imdct->xsin1[i]);
-		p_imdct->buf[i].imag =(tmp_a_r * p_imdct->xsin1[i]) - (tmp_a_i  * p_imdct->xcos1[i]);
-	}
-       
-	data_ptr = data;
-	delay_ptr = delay;
-	window_ptr = window;
-
-	/* Window and convert to real valued signal, no overlap here*/
-	for (i=0; i< 64; i++) { 
-		*data_ptr++ = -p_imdct->buf[64+i].imag  * *window_ptr++; 
-		*data_ptr++ = p_imdct->buf[64-i-1].real * *window_ptr++; 
-	}
-
-	for(i=0; i< 64; i++) { 
-		*data_ptr++ = -p_imdct->buf[i].real      * *window_ptr++; 
-		*data_ptr++ = p_imdct->buf[128-i-1].imag * *window_ptr++; 
-	}
-       
-	/* The trailing edge of the window goes into the delay line */
-	delay_ptr = delay;
-
-	for(i=0; i< 64; i++) { 
-		*delay_ptr++ = -p_imdct->buf[64+i].real   * *--window_ptr; 
-		*delay_ptr++ =  p_imdct->buf[64-i-1].imag * *--window_ptr; 
-	}
-
-	for(i=0; i<64; i++) {
-		*delay_ptr++ =  p_imdct->buf[i].imag       * *--window_ptr; 
-		*delay_ptr++ = -p_imdct->buf[128-i-1].real * *--window_ptr; 
-	}
-}
diff --git a/src/ac3_decoder/ac3_imdct_sse.c b/src/ac3_decoder/ac3_imdct_sse.c
deleted file mode 100644
index 7146057586..0000000000
--- a/src/ac3_decoder/ac3_imdct_sse.c
+++ /dev/null
@@ -1,642 +0,0 @@
-/*****************************************************************************
- * ac3_imdct_sse.c: ac3 DCT
- *****************************************************************************
- * Copyright (C) 1999, 2000 VideoLAN
- * $Id: ac3_imdct_sse.c,v 1.1 2001/05/14 15:58:04 reno Exp $
- *
- * Authors: Renaud Dartus <reno@videolan.org>
- *          Aaron Holtzman <aholtzma@engr.uvic.ca>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- * 
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
- *****************************************************************************/
-
-#include "defs.h"
-
-#include <math.h>
-#include <stdio.h>
-
-#include "config.h"
-#include "common.h"
-#include "threads.h"
-#include "mtime.h"
-
-#include "intf_msg.h"                        /* intf_DbgMsg(), intf_ErrMsg() */
-
-#include "stream_control.h"
-#include "input_ext-dec.h"
-
-#include "ac3_decoder.h"
-
-#include "ac3_imdct_sse.h"
-
-static const float window[] = {
-	0.00014, 0.00024, 0.00037, 0.00051, 0.00067, 0.00086, 0.00107, 0.00130,
-	0.00157, 0.00187, 0.00220, 0.00256, 0.00297, 0.00341, 0.00390, 0.00443,
-	0.00501, 0.00564, 0.00632, 0.00706, 0.00785, 0.00871, 0.00962, 0.01061,
-	0.01166, 0.01279, 0.01399, 0.01526, 0.01662, 0.01806, 0.01959, 0.02121,
-	0.02292, 0.02472, 0.02662, 0.02863, 0.03073, 0.03294, 0.03527, 0.03770,
-	0.04025, 0.04292, 0.04571, 0.04862, 0.05165, 0.05481, 0.05810, 0.06153,
-	0.06508, 0.06878, 0.07261, 0.07658, 0.08069, 0.08495, 0.08935, 0.09389,
-	0.09859, 0.10343, 0.10842, 0.11356, 0.11885, 0.12429, 0.12988, 0.13563,
-	0.14152, 0.14757, 0.15376, 0.16011, 0.16661, 0.17325, 0.18005, 0.18699,
-	0.19407, 0.20130, 0.20867, 0.21618, 0.22382, 0.23161, 0.23952, 0.24757,
-	0.25574, 0.26404, 0.27246, 0.28100, 0.28965, 0.29841, 0.30729, 0.31626,
-	0.32533, 0.33450, 0.34376, 0.35311, 0.36253, 0.37204, 0.38161, 0.39126,
-	0.40096, 0.41072, 0.42054, 0.43040, 0.44030, 0.45023, 0.46020, 0.47019,
-	0.48020, 0.49022, 0.50025, 0.51028, 0.52031, 0.53033, 0.54033, 0.55031,
-	0.56026, 0.57019, 0.58007, 0.58991, 0.59970, 0.60944, 0.61912, 0.62873,
-	0.63827, 0.64774, 0.65713, 0.66643, 0.67564, 0.68476, 0.69377, 0.70269,
-	0.71150, 0.72019, 0.72877, 0.73723, 0.74557, 0.75378, 0.76186, 0.76981,
-	0.77762, 0.78530, 0.79283, 0.80022, 0.80747, 0.81457, 0.82151, 0.82831,
-	0.83496, 0.84145, 0.84779, 0.85398, 0.86001, 0.86588, 0.87160, 0.87716,
-	0.88257, 0.88782, 0.89291, 0.89785, 0.90264, 0.90728, 0.91176, 0.91610,
-	0.92028, 0.92432, 0.92822, 0.93197, 0.93558, 0.93906, 0.94240, 0.94560,
-	0.94867, 0.95162, 0.95444, 0.95713, 0.95971, 0.96217, 0.96451, 0.96674,
-	0.96887, 0.97089, 0.97281, 0.97463, 0.97635, 0.97799, 0.97953, 0.98099,
-	0.98236, 0.98366, 0.98488, 0.98602, 0.98710, 0.98811, 0.98905, 0.98994,
-	0.99076, 0.99153, 0.99225, 0.99291, 0.99353, 0.99411, 0.99464, 0.99513,
-	0.99558, 0.99600, 0.99639, 0.99674, 0.99706, 0.99736, 0.99763, 0.99788,
-	0.99811, 0.99831, 0.99850, 0.99867, 0.99882, 0.99895, 0.99908, 0.99919,
-	0.99929, 0.99938, 0.99946, 0.99953, 0.99959, 0.99965, 0.99969, 0.99974,
-	0.99978, 0.99981, 0.99984, 0.99986, 0.99988, 0.99990, 0.99992, 0.99993,
-	0.99994, 0.99995, 0.99996, 0.99997, 0.99998, 0.99998, 0.99998, 0.99999,
-	0.99999, 0.99999, 0.99999, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000,
-	1.00000, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000
-};
-
-static const int pm128[128] =
-{
-	0, 16, 32, 48, 64, 80,  96, 112,  8, 40, 72, 104, 24, 56,  88, 120,
-	4, 20, 36, 52, 68, 84, 100, 116, 12, 28, 44,  60, 76, 92, 108, 124,
-	2, 18, 34, 50, 66, 82,  98, 114, 10, 42, 74, 106, 26, 58,  90, 122,
-	6, 22, 38, 54, 70, 86, 102, 118, 14, 46, 78, 110, 30, 62,  94, 126,
-	1, 17, 33, 49, 65, 81,  97, 113,  9, 41, 73, 105, 25, 57,  89, 121,
-	5, 21, 37, 53, 69, 85, 101, 117, 13, 29, 45,  61, 77, 93, 109, 125,
-	3, 19, 35, 51, 67, 83,  99, 115, 11, 43, 75, 107, 27, 59,  91, 123,
-	7, 23, 39, 55, 71, 87, 103, 119, 15, 31, 47,  63, 79, 95, 111, 127
-}; 
-
-void fft_64p_sse (complex_t *x);
-void fft_128p_sse(complex_t *a);
-static void imdct512_pre_ifft_twiddle_sse (const int *pmt, complex_t *buf, float *data, float *xcos_sin_sse);
-static void imdct512_post_ifft_twiddle_sse (complex_t *buf, float *xcos_sin_sse);
-static void imdct512_window_delay_sse (complex_t *buf, float *data_ptr, float *window_prt, float *delay_prt);
-static void imdct512_window_delay_nol_sse (complex_t *buf, float *data_ptr, float *window_prt, float *delay_prt);
-
-
-int imdct_init_sse (imdct_t * p_imdct)
-{
-	int i;
-	float scale = 181.019;
-
-	intf_WarnMsg (1, "ac3dec: using MMX_SSE for imdct");
-	p_imdct->imdct_do_512 = imdct_do_512_sse;
-	p_imdct->imdct_do_512_nol = imdct_do_512_nol_sse;
-	p_imdct->fft_64p = fft_64p_sse;
-
-	for (i=0; i < 128; i++)
-	{
-		float xcos_i = cos(2.0f * M_PI * (8*i+1)/(8*N)) * scale;
-		float xsin_i = sin(2.0f * M_PI * (8*i+1)/(8*N)) * scale;
-		p_imdct->xcos_sin_sse[i * 4]     = xcos_i;
-		p_imdct->xcos_sin_sse[i * 4 + 1] = -xsin_i;
-		p_imdct->xcos_sin_sse[i * 4 + 2] = -xsin_i;
-		p_imdct->xcos_sin_sse[i * 4 + 3] = -xcos_i;
-	}
-	return 0;
-}
-
-void imdct_do_512_sse (imdct_t * p_imdct, float data[], float delay[])
-{
-	imdct512_pre_ifft_twiddle_sse (pm128, p_imdct->buf, data, p_imdct->xcos_sin_sse);
-	fft_128p_sse (p_imdct->buf);
-	imdct512_post_ifft_twiddle_sse (p_imdct->buf, p_imdct->xcos_sin_sse);
-    imdct512_window_delay_sse (p_imdct->buf, data, window, delay);
-}
-
-
-void imdct_do_512_nol_sse (imdct_t * p_imdct, float data[], float delay[])
-{
-	imdct512_pre_ifft_twiddle_sse (pm128, p_imdct->buf, data, p_imdct->xcos_sin_sse);  
-	fft_128p_sse (p_imdct->buf);
-	imdct512_post_ifft_twiddle_sse (p_imdct->buf, p_imdct->xcos_sin_sse);
-    imdct512_window_delay_nol_sse (p_imdct->buf, data, window, delay);
-}
-
-static void imdct512_pre_ifft_twiddle_sse (const int *pmt, complex_t *buf, float *data, float *xcos_sin_sse)
-{
-    __asm__ __volatile__ (	
-	"pushl %%ebp\n"
-	"movl  %%esp, %%ebp\n"
-	"addl  $-4, %%esp\n" /* local variable, loop counter */
-	
-	"pushl %%eax\n"
-	"pushl %%ebx\n"
-	"pushl %%ecx\n"
-	"pushl %%edx\n"
-	"pushl %%edi\n"
-	"pushl %%esi\n"
-
-	"movl  8(%%ebp), %%eax\n" 	/* pmt */
-	"movl 12(%%ebp), %%ebx\n"	/* buf */
-	"movl 16(%%ebp), %%ecx\n"	/* data */
-	"movl 20(%%ebp), %%edx\n" 	/* xcos_sin_sse */
-	"movl $64, -4(%%ebp)\n"
-	
-".loop:\n"
-	"movl  (%%eax), %%esi\n"
-	"movl 4(%%eax), %%edi\n"
-	"movss (%%ecx, %%esi, 8), %%xmm1\n" /* 2j */
-	"movss (%%ecx, %%edi, 8), %%xmm3\n" /* 2(j+1) */
-
-	"shll $1, %%esi\n"
-	"shll $1, %%edi\n"
-
-	"movups (%%edx, %%esi, 8), %%xmm0\n" /* -c_j | -s_j | -s_j | c_j */
-	"movups (%%edx, %%edi, 8), %%xmm2\n" /* -c_j+1 | -s_j+1 | -s_j+1 | c_j+1 */
-
-	"negl %%esi\n"
-	"negl %%edi\n"
-
-	"movss 1020(%%ecx, %%esi, 4), %%xmm4\n" /* 255-2j */
-	"addl $8, %%eax\n"
-	"movss 1020(%%ecx, %%edi, 4), %%xmm5\n" /* 255-2(j+1) */
-
-	"shufps $0, %%xmm1, %%xmm4\n" /* 2j | 2j | 255-2j | 255-2j */
-	"shufps $0, %%xmm3, %%xmm5\n" /* 2(j+1) | 2(j+1) | 255-2(j+1) | 255-2(j+1) */
-	"mulps   %%xmm4, %%xmm0\n"
-	"mulps   %%xmm5, %%xmm2\n"
-	"movhlps %%xmm0, %%xmm1\n"
-	"movhlps %%xmm2, %%xmm3\n"
-	"addl    $16, %%ebx\n"
-	"addps   %%xmm1, %%xmm0\n"
-	"addps   %%xmm3, %%xmm2\n"
-	"movlhps %%xmm2, %%xmm0\n"
-    
-	"movups  %%xmm0, -16(%%ebx)\n"
-	"decl -4(%%ebp)\n"
-   	"jnz .loop\n"
-
-	"popl %%esi\n"
-	"popl %%edi\n"
-	"popl %%edx\n"
-	"popl %%ecx\n"
-	"popl %%ebx\n"
-	"popl %%eax\n"
-
-	"addl $4, %%esp\n"
-	"popl %%ebp\n"
-    ::);
-}
-
-static void imdct512_post_ifft_twiddle_sse (complex_t *buf, float *xcos_sin_sse)
-{
-    __asm__ __volatile__ ( 
-	"pushl %%ecx\n"
-	"movl $32, %%ecx\n"                 /* loop counter */
-
-".loop1:\n"
-	"movups	(%%eax), %%xmm0\n"          /*  im1 | re1 | im0 | re0 */
-
-	"movups  (%%ebx), %%xmm2\n"         /* -c | -s | -s | c */
-	"movhlps  %%xmm0, %%xmm1\n"         /* im1 | re1 */
-	"movups  16(%%ebx), %%xmm3\n"       /* -c1 | -s1 | -s1 | c1 */
-
-	"shufps $0x50, %%xmm0, %%xmm0\n"    /* im0 | im0 | re0 | re0 */
-	"shufps $0x50, %%xmm1, %%xmm1\n"    /* im1 | im1 | re1 | re1 */
-
-	"movups  16(%%eax), %%xmm4\n"       /* im3 | re3 | im2 | re2 */
-
-    "shufps $0x27, %%xmm2, %%xmm2\n"    /* c | -s | -s | -c */
-	"movhlps  %%xmm4, %%xmm5\n"         /* im3 | re3 */
-    "shufps $0x27, %%xmm3, %%xmm3\n"    /* c1 | -s1 | -s1 | -c1 */
-
-	"movups  32(%%ebx), %%xmm6\n"       /* -c2 | -s2 | -s2 | c2 */
-	"movups  48(%%ebx), %%xmm7\n"       /* -c3 | -s3 | -s3 | c3 */
-
-	"shufps $0x50, %%xmm4, %%xmm4\n"    /* im2 | im2 | re2 | re2 */
-	"shufps $0x50, %%xmm5, %%xmm5\n"    /* im3 | im3 | re3 | re3 */
-
-	"mulps %%xmm2, %%xmm0\n"
-	"mulps %%xmm3, %%xmm1\n"
-
-	"shufps $0x27, %%xmm6, %%xmm6\n"    /* c2 | -s2 | -s2 | -c2 */
-	"shufps $0x27, %%xmm7, %%xmm7\n"    /* c3 | -s3 | -s3 | -c3 */
-
-	"movhlps %%xmm0, %%xmm2\n"
-	"movhlps %%xmm1, %%xmm3\n"
-
-	"mulps %%xmm6, %%xmm4\n"
-	"mulps %%xmm7, %%xmm5\n"
-
-	"addps %%xmm2, %%xmm0\n"
-	"addps %%xmm3, %%xmm1\n"
-
-	"movhlps %%xmm4, %%xmm6\n"
-	"movhlps %%xmm5, %%xmm7\n"
-
-	"addps %%xmm6, %%xmm4\n"
-	"addps %%xmm7, %%xmm5\n"
-
-	"movlhps %%xmm1, %%xmm0\n"
-	"movlhps %%xmm5, %%xmm4\n"
-
-	"movups %%xmm0, (%%eax)\n"
-	"movups %%xmm4, 16(%%eax)\n"
-	"addl $64, %%ebx\n"
-	"addl $32, %%eax\n"
-	"decl %%ecx\n"
-	"jnz .loop1\n"
-
-	"popl %%ecx\n"
-    : "=a" (buf)
-    : "a" (buf), "b" (xcos_sin_sse) );
-}
-
-static void imdct512_window_delay_sse (complex_t *buf, float *data_ptr, float *window_prt, float *delay_prt)
-{
-    __asm__ __volatile__ (
-	"pushl %%ebp\n"
-	"movl  %%esp, %%ebp\n"
-	
-	"pushl %%eax\n"
-	"pushl %%ebx\n"
-	"pushl %%ecx\n"
-	"pushl %%edx\n"
-	"pushl %%esi\n"
-	"pushl %%edi\n"
-
-	"movl 20(%%ebp), %%ebx\n"   /* delay */
-	"movl 16(%%ebp), %%edx\n"   /* window */
-
-	"movl 8(%%ebp), %%eax\n"    /* buf */
-	"movl $16, %%ecx\n"         /* loop count */
-	"leal 516(%%eax), %%esi\n"  /* buf[64].im */
-	"leal 504(%%eax), %%edi\n"  /* buf[63].re */
-	"movl  12(%%ebp), %%eax\n"  /* data */
-
-".first_128_samples:\n"
-	"movss   (%%esi), %%xmm0\n"
-	"movss  8(%%esi), %%xmm2\n"
-	"movss   (%%edi), %%xmm1\n"
-	"movss -8(%%edi), %%xmm3\n"
-
-	"movlhps %%xmm2, %%xmm0\n"      /* 0.0 | im1 | 0.0 | im0 */
-	"movlhps %%xmm3, %%xmm1\n"      /* 0.0 | re1 | 0.0 | re0 */
-
-	"movups (%%edx), %%xmm4\n"      /* w3 | w2 | w1 | w0 */
-	"movups (%%ebx), %%xmm5\n"      /* d3 | d2 | d1 | d0 */
-	"shufps $0xb1, %%xmm1, %%xmm1\n"/* re1 | 0.0 | re0 | 0.0 */
-
-	"movss  16(%%esi), %%xmm6\n"    /* im2 */
-	"movss  24(%%esi), %%xmm7\n"    /* im3 */
-	"subps     %%xmm1, %%xmm0\n"    /* -re1 | im1 | -re0 | im0 */
-	"movss -16(%%edi), %%xmm2\n"    /* re2 */
-	"movss -24(%%edi), %%xmm3\n"    /* re3 */
-	"mulps     %%xmm4, %%xmm0\n"
-	"movlhps   %%xmm7, %%xmm6\n"    /* 0.0 | im3 | 0.0 | im2 */
-	"movlhps   %%xmm3, %%xmm2\n"    /* 0.0 | re3 | 0.0 | re2 */
-	"addps %%xmm5, %%xmm0\n"
-	"shufps $0xb1, %%xmm2, %%xmm2\n"/* re3 | 0.0 | re2 | 0.0 */
-	"movups 16(%%edx), %%xmm4\n"    /* w7 | w6 | w5 | w4 */
-	"movups 16(%%ebx), %%xmm5\n"    /* d7 | d6 | d5 | d4 */
-	"subps %%xmm2, %%xmm6\n"        /* -re3 | im3 | -re2 | im2 */
-	"addl $32, %%edx\n"
-	"movups %%xmm0, (%%eax)\n"
-	"addl $32, %%ebx\n"
-	"mulps %%xmm4, %%xmm6\n"
-	"addl $32, %%esi\n"
-	"addl $32, %%eax\n"
-	"addps %%xmm5, %%xmm6\n"
-    "addl $-32, %%edi\n"
-	"movups %%xmm6, -16(%%eax)\n"
-	"decl %%ecx\n"
-	"jnz .first_128_samples\n"
-
-	"movl 8(%%ebp), %%esi\n"    /* buf[0].re */
-	"leal 1020(%%esi), %%edi\n" /* buf[127].im */
-	"movl $16, %%ecx\n"         /* loop count */
-    
-".second_128_samples:\n"
-	"movss   (%%esi), %%xmm0\n" /* buf[i].re */
-	"movss  8(%%esi), %%xmm2\n" /* re1 */
-	"movss   (%%edi), %%xmm1\n" /* buf[127-i].im */
-	"movss -8(%%edi), %%xmm3\n" /* im1 */
-
-	"movlhps %%xmm2, %%xmm0\n"  /* 0.0 | re1 | 0.0 | re0 */
-	"movlhps %%xmm3, %%xmm1\n"  /* 0.0 | im1 | 0.0 | im1 */
-
-	"movups (%%edx), %%xmm4\n"  /* w3 | w2 | w1 | w0 */
-	"movups (%%ebx), %%xmm5\n"  /* d3 | d2 | d1 | d0 */
-
-	"shufps $0xb1, %%xmm1, %%xmm1\n"/* im1 | 0.0 | im0 | 0.0 */
-	"movss  16(%%esi), %%xmm6\n"    /* re2 */
-	"movss  24(%%esi), %%xmm7\n"    /* re3 */
-	"movss -16(%%edi), %%xmm2\n"    /* im2 */
-	"movss -24(%%edi), %%xmm3\n"    /* im3 */
-	"subps   %%xmm1, %%xmm0\n"      /* -im1 | re1 | -im0 | re0 */
-	"movlhps %%xmm7, %%xmm6\n"      /* 0.0 | re3 | 0.0 | re2 */
-	"movlhps %%xmm3, %%xmm2\n"      /* 0.0 | im3 | 0.0 | im2 */
-	"mulps   %%xmm4, %%xmm0\n"
-	"shufps $0xb1, %%xmm2, %%xmm2\n"/* im3 | 0.0 | im2 | 0.0 */
-	"movups 16(%%edx), %%xmm4\n"    /* w7 | w6 | w5 | w4 */
-	"addl $32, %%esi\n"
-	"subps %%xmm2, %%xmm6\n"        /* -im3 | re3 | -im2 | re2 */
-	"addps %%xmm5, %%xmm0\n"
-	"mulps %%xmm4, %%xmm6\n"
-	"addl $-32, %%edi\n"
-	"movups 16(%%ebx), %%xmm5\n"    /* d7 | d6 | d5 | d4 */
-	"movups %%xmm0, (%%eax)\n"
-	"addps %%xmm5, %%xmm6\n"
-	"addl $32, %%edx\n"
-	"addl $32, %%eax\n"
-	"addl $32, %%ebx\n"
-	"movups %%xmm6, -16(%%eax)\n"
-	"decl %%ecx\n"
-	"jnz .second_128_samples\n"
-
-	"movl   8(%%ebp), %%eax\n"
-	"leal 512(%%eax), %%esi\n"  /* buf[64].re */
-	"leal 508(%%eax), %%edi\n"  /* buf[63].im */
-	"movl $16, %%ecx\n"         /* loop count */
-	"movl  20(%%ebp), %%eax\n"  /* delay */
-
-".first_128_delay:\n"
-	"movss   (%%esi), %%xmm0\n"
-	"movss  8(%%esi), %%xmm2\n"
-	"movss   (%%edi), %%xmm1\n"
-	"movss -8(%%edi), %%xmm3\n"
-
-	"movlhps %%xmm2, %%xmm0\n"      /* 0.0 | re1 | 0.0 | re0 */
-	"movlhps %%xmm3, %%xmm1\n"      /* 0.0 | im1 | 0.0 | im0 */
-
-	"movups -16(%%edx), %%xmm4\n"   /* w3 | w2 | w1 | w0 */
-    "shufps $0xb1, %%xmm1, %%xmm1\n"/* im1 | 0.0 | im0 | 0.0 */
-	"movss  16(%%esi), %%xmm6\n"    /* re2 */
-	"movss  24(%%esi), %%xmm7\n"    /* re3 */
-	"movss -16(%%edi), %%xmm2\n"    /* im2 */
-	"movss -24(%%edi), %%xmm3\n"    /* im3 */
-	"subps     %%xmm1, %%xmm0\n"    /* -im1 | re1 | -im0 | re0 */
-	"addl $-32, %%edx\n"
-	"movlhps %%xmm7, %%xmm6\n"      /* 0.0 | re3 | 0.0 | re2 */
-	"movlhps %%xmm3, %%xmm2\n"      /* 0.0 | im3 | 0.0 | im2 */
-    "mulps   %%xmm4, %%xmm0\n"
-	"movups (%%edx), %%xmm5\n"      /* w7 | w6 | w5 | w4 */
-	"shufps $0xb1, %%xmm2, %%xmm2\n"/* im3 | 0.0 | im2 | 0.0 */
-	"movups %%xmm0, (%%eax)\n"
-	"addl $32, %%esi\n"
-	"subps %%xmm2, %%xmm6\n"        /* -im3 | re3 | -im2 | re2 */
-	"addl $-32, %%edi\n"
-	"mulps %%xmm5, %%xmm6\n"
-	"addl $32, %%eax\n"
-	"movups %%xmm6, -16(%%eax)\n"
-	"decl %%ecx\n"
-	"jnz .first_128_delay\n"
-
-	"movl    8(%%ebp), %%ebx\n"
-	"leal    4(%%ebx), %%esi\n" /* buf[0].im */
-	"leal 1016(%%ebx), %%edi\n" /* buf[127].re */
-	"movl $16, %%ecx\n"         /* loop count */
-    
-".second_128_delay:\n"
-	"movss   (%%esi), %%xmm0\n"
-	"movss  8(%%esi), %%xmm2\n"
-	"movss   (%%edi), %%xmm1\n"
-	"movss -8(%%edi), %%xmm3\n"
-
-	"movlhps %%xmm2, %%xmm0\n"      /* 0.0 | im1 | 0.0 | im0 */
-	"movlhps %%xmm3, %%xmm1\n"      /* 0.0 | re1 | 0.0 | re0 */
-
-	"movups -16(%%edx), %%xmm4\n"   /* w3 | w2 | w1 | w0 */
-	"shufps $0xb1, %%xmm1, %%xmm1\n"/* re1 | 0.0 | re0 | 0.0 */
-	"movss  16(%%esi), %%xmm6\n"    /* im2 */
-	"movss  24(%%esi), %%xmm7\n"    /* im3 */
-	"movss -16(%%edi), %%xmm2\n"    /* re2 */
-	"movss -24(%%edi), %%xmm3\n"    /* re3 */
-	"subps %%xmm0, %%xmm1\n"        /* re1 | -im1 | re0 | -im0 */
-	"addl $-32, %%edx\n"
-	"movlhps %%xmm7, %%xmm6\n"      /* 0.0 | im3 | 0.0 | im2 */
-	"movlhps %%xmm3, %%xmm2\n"      /* 0.0 | re3 | 0.0 | re2 */
-	"mulps   %%xmm4, %%xmm1\n"
-	"movups (%%edx), %%xmm5\n"      /* w7 | w6 | w5 | w4 */
-	"shufps $0xb1, %%xmm2, %%xmm2\n"/* re3 | 0.0 | re2 | 0.0 */
-	"movups %%xmm1, (%%eax)\n"
-	"addl $32, %%esi\n"
-	"subps %%xmm6, %%xmm2\n"        /* re | -im3 | re | -im2 */
-	"addl $-32, %%edi\n"
-	"mulps %%xmm5, %%xmm2\n"
-	"addl $32, %%eax\n"
-	"movups %%xmm2, -16(%%eax)\n"
-	"decl %%ecx\n"
-	"jnz .second_128_delay\n"
-
-	"popl %%edi\n"
-	"popl %%esi\n"
-	"popl %%edx\n"
-	"popl %%ecx\n"
-	"popl %%ebx\n"
-	"popl %%eax\n"
-	
-	"leave\n"
-    ::);
-}
-
-static void imdct512_window_delay_nol_sse (complex_t *buf, float *data_ptr, float *window_prt, float *delay_prt)
-{
-    __asm__ __volatile__ (
-	"pushl %%ebp\n"
-	"movl  %%esp, %%ebp\n"
-	
-	"pushl %%eax\n"
-	"pushl %%ebx\n"
-	"pushl %%ecx\n"
-	"pushl %%edx\n"
-	"pushl %%esi\n"
-	"pushl %%edi\n"
-
-	/* movl 20(%%ebp), %%ebx delay */
-	"movl 16(%%ebp), %%edx\n"   /* window */
-
-	"movl   8(%%ebp), %%eax\n"  /* buf */
-	"movl $16, %%ecx\n"         /* loop count */
-	"leal 516(%%eax), %%esi\n"  /* buf[64].im */
-	"leal 504(%%eax), %%edi\n"  /* buf[63].re */
-	"movl  12(%%ebp), %%eax\n"  /* data */
-    
-".first_128_sample:\n"
-	"movss   (%%esi), %%xmm0\n"
-	"movss  8(%%esi), %%xmm2\n"
-	"movss   (%%edi), %%xmm1\n"
-	"movss -8(%%edi), %%xmm3\n"
-
-	"movlhps %%xmm2, %%xmm0\n"      /* 0.0 | im1 | 0.0 | im0 */
-	"movlhps %%xmm3, %%xmm1\n"      /* 0.0 | re1 | 0.0 | re0 */
-
-	"movups (%%edx), %%xmm4\n"      /* w3 | w2 | w1 | w0 */
-    /* movups (%%ebx), %%xmm5 d3 | d2 | d1 | d0 */
-	"shufps $0xb1, %%xmm1, %%xmm1\n"/* re1 | 0.0 | re0 | 0.0 */
-
-	"movss  16(%%esi), %%xmm6\n"    /* im2 */
-	"movss  24(%%esi), %%xmm7\n"    /* im3 */
-	"subps     %%xmm1, %%xmm0\n"    /* -re1 | im1 | -re0 | im0 */
-	"movss -16(%%edi), %%xmm2\n"    /* re2 */
-	"movss -24(%%edi), %%xmm3\n"    /* re3 */
-	"mulps %%xmm4, %%xmm0\n"
-	"movlhps %%xmm7, %%xmm6\n"      /* 0.0 | im3 | 0.0 | im2 */
-	"movlhps %%xmm3, %%xmm2\n"      /* 0.0 | re3 | 0.0 | re2 */
-	/* addps %%xmm5, %%xmm0 */
-	"shufps $0xb1, %%xmm2, %%xmm2\n"/* re3 | 0.0 | re2 | 0.0 */
-	"movups 16(%%edx), %%xmm4\n"    /* w7 | w6 | w5 | w4 */
-	/* movups 16(%%ebx), %%xmm5  d7 | d6 | d5 | d4 */
-	"subps %%xmm2, %%xmm6\n"        /* -re3 | im3 | -re2 | im2 */
-    "addl $32, %%edx\n"
-	"movups %%xmm0, (%%eax)\n"
-	/* addl $32, %%ebx */
-	"mulps %%xmm4, %%xmm6\n"
-	"addl $32, %%esi\n"
-	"addl $32, %%eax\n"
-	/* addps %%xmm5, %%xmm6 */
-	"addl $-32, %%edi\n"
-	"movups %%xmm6, -16(%%eax)\n"
-	"decl %%ecx\n"
-	"jnz .first_128_sample\n"
-
-	"movl    8(%%ebp), %%esi\n"     /* buf[0].re */
-	"leal 1020(%%esi), %%edi\n"     /* buf[127].im */
-	"movl $16, %%ecx\n"             /* loop count */
-    
-".second_128_sample:\n"
-	"movss   (%%esi), %%xmm0\n"     /* buf[i].re */
-	"movss  8(%%esi), %%xmm2\n"     /* re1 */
-	"movss   (%%edi), %%xmm1\n"     /* buf[127-i].im */
-	"movss -8(%%edi), %%xmm3\n"     /* im1 */
-
-	"movlhps %%xmm2, %%xmm0\n"      /* 0.0 | re1 | 0.0 | re0 */
-	"movlhps %%xmm3, %%xmm1\n"      /* 0.0 | im1 | 0.0 | im1 */
-	
-	"movups (%%edx), %%xmm4\n"      /* w3 | w2 | w1 | w0 */
-	/* movups (%%ebx), %%xmm5 d3 | d2 | d1 | d0 */
-
-	"shufps $0xb1, %%xmm1, %%xmm1\n"/* im1 | 0.0 | im0 | 0.0 */
-	"movss  16(%%esi), %%xmm6\n"    /* re2 */
-	"movss  24(%%esi), %%xmm7\n"    /* re3 */
-	"movss -16(%%edi), %%xmm2\n"    /* im2 */
-	"movss -24(%%edi), %%xmm3\n"    /* im3 */
-	"subps %%xmm1, %%xmm0\n"        /* -im1 | re1 | -im0 | re0 */
-	"movlhps %%xmm7, %%xmm6\n"      /* 0.0 | re3 | 0.0 | re2 */
-	"movlhps %%xmm3, %%xmm2\n"      /* 0.0 | im3 | 0.0 | im2 */
-	"mulps %%xmm4, %%xmm0\n"
-	"shufps $0xb1, %%xmm2, %%xmm2\n"/* im3 | 0.0 | im2 | 0.0 */
-	"movups 16(%%edx), %%xmm4\n"    /* w7 | w6 | w5 | w4 */
-	"addl $32, %%esi\n"
-	"subps %%xmm2, %%xmm6\n"        /* -im3 | re3 | -im2 | re2 */
-	/* addps %%xmm5, %%xmm0 */
-	"mulps %%xmm4, %%xmm6\n"
-	"addl $-32, %%edi\n"
-	/* movups 16(%%ebx), %%xmm5  d7 | d6 | d5 | d4 */
-	"movups %%xmm0, (%%eax)\n"
-	/* addps %%xmm5, %%xmm6 */
-	"addl $32, %%edx\n"
-	"addl $32, %%eax\n"
-	/* addl $32, %%ebx */
-	"movups %%xmm6, -16(%%eax)\n"
-	"decl %%ecx\n"
-	"jnz .second_128_sample\n"
-
-	"movl   8(%%ebp), %%eax\n"
-	"leal 512(%%eax), %%esi\n"  /* buf[64].re */
-	"leal 508(%%eax), %%edi\n"  /* buf[63].im */
-	"movl $16, %%ecx\n"         /* loop count */
-	"movl  20(%%ebp), %%eax\n"  /* delay */
-    
-".first_128_delays:\n"
-	"movss   (%%esi), %%xmm0\n"
-	"movss  8(%%esi), %%xmm2\n"
-	"movss   (%%edi), %%xmm1\n"
-	"movss -8(%%edi), %%xmm3\n"
-
-	"movlhps %%xmm2, %%xmm0\n"  /* 0.0 | re1 | 0.0 | re0 */
-	"movlhps %%xmm3, %%xmm1\n"  /* 0.0 | im1 | 0.0 | im0 */
-
-	"movups -16(%%edx), %%xmm4\n"   /* w3 | w2 | w1 | w0 */
-	"shufps $0xb1, %%xmm1, %%xmm1\n"/* im1 | 0.0 | im0 | 0.0 */
-	"movss  16(%%esi), %%xmm6\n"    /* re2 */
-	"movss  24(%%esi), %%xmm7\n"    /* re3 */
-	"movss -16(%%edi), %%xmm2\n"    /* im2 */
-	"movss -24(%%edi), %%xmm3\n"    /* im3 */
-	"subps %%xmm1, %%xmm0\n"        /* -im1 | re1 | -im0 | re0 */
-	"addl $-32, %%edx\n"
-	"movlhps %%xmm7, %%xmm6\n"      /* 0.0 | re3 | 0.0 | re2 */
-	"movlhps %%xmm3, %%xmm2\n"      /* 0.0 | im3 | 0.0 | im2 */
-	"mulps %%xmm4, %%xmm0\n"
-	"movups (%%edx), %%xmm5\n"      /* w7 | w6 | w5 | w4 */
-	"shufps $0xb1, %%xmm2, %%xmm2\n"/* im3 | 0.0 | im2 | 0.0 */
-	"movups %%xmm0, (%%eax)\n"
-	"addl $32, %%esi\n"
-	"subps %%xmm2, %%xmm6\n"        /* -im3 | re3 | -im2 | re2 */
-	"addl $-32, %%edi\n"
-	"mulps %%xmm5, %%xmm6\n"
-	"addl $32, %%eax\n"
-	"movups %%xmm6, -16(%%eax)\n"
-	"decl %%ecx\n"
-	"jnz .first_128_delays\n"
-
-	"movl    8(%%ebp), %%ebx\n"
-	"leal    4(%%ebx), %%esi\n" /* buf[0].im */
-	"leal 1016(%%ebx), %%edi\n" /* buf[127].re */
-	"movl $16, %%ecx\n"         /* loop count */
-    
-".second_128_delays:\n"
-	"movss   (%%esi), %%xmm0\n"
-	"movss  8(%%esi), %%xmm2\n"
-	"movss   (%%edi), %%xmm1\n"
-	"movss -8(%%edi), %%xmm3\n"
-
-	"movlhps %%xmm2, %%xmm0\n"  /* 0.0 | im1 | 0.0 | im0 */
-	"movlhps %%xmm3, %%xmm1\n"  /* 0.0 | re1 | 0.0 | re0 */
-
-	"movups -16(%%edx), %%xmm4\n"   /* w3 | w2 | w1 | w0 */
-	"shufps $0xb1, %%xmm1, %%xmm1\n"/* re1 | 0.0 | re0 | 0.0 */
-	"movss  16(%%esi), %%xmm6\n"    /* im2 */
-	"movss  24(%%esi), %%xmm7\n"    /* im3 */
-	"movss -16(%%edi), %%xmm2\n"    /* re2 */
-	"movss -24(%%edi), %%xmm3\n"    /* re3 */
-	"subps %%xmm0, %%xmm1\n"        /* re1 | -im1 | re0 | -im0 */
-	"addl $-32, %%edx\n"
-	"movlhps %%xmm7, %%xmm6\n"      /* 0.0 | im3 | 0.0 | im2 */
-	"movlhps %%xmm3, %%xmm2\n"      /* 0.0 | re3 | 0.0 | re2 */
-	"mulps %%xmm4, %%xmm1\n"
-	"movups (%%edx), %%xmm5\n"      /* w7 | w6 | w5 | w4 */
-	"shufps $0xb1, %%xmm2, %%xmm2\n"/* re3 | 0.0 | re2 | 0.0 */
-	"movups %%xmm1, (%%eax)\n"
-	"addl $32, %%esi\n"
-	"subps %%xmm6, %%xmm2\n"        /* re | -im3 | re | -im2 */
-	"addl $-32, %%edi\n"
-	"mulps %%xmm5, %%xmm2\n"
-	"addl $32, %%eax\n"
-	"movups %%xmm2, -16(%%eax)\n"
-	"decl %%ecx\n"
-	"jnz .second_128_delays\n"
-
-	"popl %%edi\n"
-	"popl %%esi\n"
-	"popl %%edx\n"
-	"popl %%ecx\n"
-	"popl %%ebx\n"
-	"popl %%eax\n"
-	
-	"leave\n"
-    ::);
-}
diff --git a/src/ac3_decoder/ac3_imdct_sse.h b/src/ac3_decoder/ac3_imdct_sse.h
deleted file mode 100644
index 703f7ccd26..0000000000
--- a/src/ac3_decoder/ac3_imdct_sse.h
+++ /dev/null
@@ -1,3 +0,0 @@
-int  imdct_init_sse (imdct_t * p_imdct);
-void imdct_do_512_sse(imdct_t * p_imdct, float data[], float delay[]);
-void imdct_do_512_nol_sse(imdct_t * p_imdct, float data[], float delay[]);
diff --git a/src/ac3_decoder/ac3_internal.h b/src/ac3_decoder/ac3_internal.h
index b6e5bdca62..08419cf2cf 100644
--- a/src/ac3_decoder/ac3_internal.h
+++ b/src/ac3_decoder/ac3_internal.h
@@ -2,7 +2,7 @@
  * ac3_internals.h: needed by the ac3 decoder
  *****************************************************************************
  * Copyright (C) 2000 VideoLAN
- * $Id: ac3_internal.h,v 1.9 2001/05/14 15:58:04 reno Exp $
+ * $Id: ac3_internal.h,v 1.10 2001/05/15 16:19:42 sam Exp $
  *
  * Authors: Michel Lespinasse <walken@zoy.org>
  *
@@ -36,9 +36,6 @@
 /* ac3_bit_allocate.c */
 void bit_allocate (ac3dec_t *);
 
-/* ac3_downmix.c */
-void downmix_init (downmix_t * p_downmix);
-
 /* ac3_exponent.c */
 int exponent_unpack (ac3dec_t *);
 
@@ -56,3 +53,4 @@ void parse_auxdata (ac3dec_t *);
 
 /* ac3_rematrix.c */
 void rematrix (ac3dec_t *);
+
diff --git a/src/ac3_decoder/ac3_mantissa.c b/src/ac3_decoder/ac3_mantissa.c
index b95e5e5952..2f829bfa24 100644
--- a/src/ac3_decoder/ac3_mantissa.c
+++ b/src/ac3_decoder/ac3_mantissa.c
@@ -2,7 +2,7 @@
  * ac3_mantissa.c: ac3 mantissa computation
  *****************************************************************************
  * Copyright (C) 1999, 2000, 2001 VideoLAN
- * $Id: ac3_mantissa.c,v 1.28 2001/05/14 15:58:04 reno Exp $
+ * $Id: ac3_mantissa.c,v 1.29 2001/05/15 16:19:42 sam Exp $
  *
  * Authors: Michel Kaempf <maxx@via.ecp.fr>
  *          Aaron Holtzman <aholtzma@engr.uvic.ca>
@@ -23,6 +23,9 @@
  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
  *****************************************************************************/
 
+/*****************************************************************************
+ * Preamble
+ *****************************************************************************/
 #include "defs.h"
 
 #include <string.h>                                              /* memcpy() */
@@ -32,393 +35,18 @@
 #include "threads.h"
 #include "mtime.h"
 
+#include "intf_msg.h"
+
 #include "stream_control.h"
 #include "input_ext-dec.h"
 
 #include "audio_output.h"
 
+#include "ac3_imdct.h"
+#include "ac3_downmix.h"
 #include "ac3_decoder.h"
 
-#include "intf_msg.h"
-
-#define Q0 ((-2 << 15) / 3.0)
-#define Q1 (0)
-#define Q2 ((2 << 15) / 3.0)
-static const float q_1_0[ 32 ] =
-{
-    Q0, Q0, Q0, Q0, Q0, Q0, Q0, Q0, Q0,
-    Q1, Q1, Q1, Q1, Q1, Q1, Q1, Q1, Q1,
-    Q2, Q2, Q2, Q2, Q2, Q2, Q2, Q2, Q2,
-    0, 0, 0, 0, 0
-};
-static const float q_1_1[ 32 ] =
-{
-    Q0, Q0, Q0, Q1, Q1, Q1, Q2, Q2, Q2,
-    Q0, Q0, Q0, Q1, Q1, Q1, Q2, Q2, Q2,
-    Q0, Q0, Q0, Q1, Q1, Q1, Q2, Q2, Q2,
-    0, 0, 0, 0, 0
-};
-static const float q_1_2[ 32 ] =
-{
-    Q0, Q1, Q2, Q0, Q1, Q2, Q0, Q1, Q2,
-    Q0, Q1, Q2, Q0, Q1, Q2, Q0, Q1, Q2,
-    Q0, Q1, Q2, Q0, Q1, Q2, Q0, Q1, Q2,
-    0, 0, 0, 0, 0
-};
-#undef Q0
-#undef Q1
-#undef Q2
-
-#define Q0 ((-4 << 15) / 5.0)
-#define Q1 ((-2 << 15) / 5.0)
-#define Q2 (0)
-#define Q3 ((2 << 15) / 5.0)
-#define Q4 ((4 << 15) / 5.0)
-static const float q_2_0[ 128 ] =
-{
-    Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,
-    Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,
-    Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,
-    Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,
-    Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,
-    0, 0, 0
-};
-static const float q_2_1[ 128 ] =
-{
-    Q0,Q0,Q0,Q0,Q0,Q1,Q1,Q1,Q1,Q1,Q2,Q2,Q2,Q2,Q2,Q3,Q3,Q3,Q3,Q3,Q4,Q4,Q4,Q4,Q4,
-    Q0,Q0,Q0,Q0,Q0,Q1,Q1,Q1,Q1,Q1,Q2,Q2,Q2,Q2,Q2,Q3,Q3,Q3,Q3,Q3,Q4,Q4,Q4,Q4,Q4,
-    Q0,Q0,Q0,Q0,Q0,Q1,Q1,Q1,Q1,Q1,Q2,Q2,Q2,Q2,Q2,Q3,Q3,Q3,Q3,Q3,Q4,Q4,Q4,Q4,Q4,
-    Q0,Q0,Q0,Q0,Q0,Q1,Q1,Q1,Q1,Q1,Q2,Q2,Q2,Q2,Q2,Q3,Q3,Q3,Q3,Q3,Q4,Q4,Q4,Q4,Q4,
-    Q0,Q0,Q0,Q0,Q0,Q1,Q1,Q1,Q1,Q1,Q2,Q2,Q2,Q2,Q2,Q3,Q3,Q3,Q3,Q3,Q4,Q4,Q4,Q4,Q4,
-    0, 0, 0
-};
-static const float q_2_2[ 128 ] =
-{
-    Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,
-    Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,
-    Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,
-    Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,
-    Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,
-    0, 0, 0
-};
-#undef Q0
-#undef Q1
-#undef Q2
-#undef Q3
-#undef Q4
-
-#define Q0 ((-10 << 15) / 11.0)
-#define Q1 ((-8 << 15) / 11.0)
-#define Q2 ((-6 << 15) / 11.0)
-#define Q3 ((-4 << 15) / 11.0)
-#define Q4 ((-2 << 15) / 11.0)
-#define Q5 (0)
-#define Q6 ((2 << 15) / 11.0)
-#define Q7 ((4 << 15) / 11.0)
-#define Q8 ((6 << 15) / 11.0)
-#define Q9 ((8 << 15) / 11.0)
-#define QA ((10 << 15) / 11.0)
-static const float q_4_0[ 128 ] =
-{
-    Q0, Q0, Q0, Q0, Q0, Q0, Q0, Q0, Q0, Q0, Q0,
-    Q1, Q1, Q1, Q1, Q1, Q1, Q1, Q1, Q1, Q1, Q1,
-    Q2, Q2, Q2, Q2, Q2, Q2, Q2, Q2, Q2, Q2, Q2,
-    Q3, Q3, Q3, Q3, Q3, Q3, Q3, Q3, Q3, Q3, Q3,
-    Q4, Q4, Q4, Q4, Q4, Q4, Q4, Q4, Q4, Q4, Q4,
-    Q5, Q5, Q5, Q5, Q5, Q5, Q5, Q5, Q5, Q5, Q5,
-    Q6, Q6, Q6, Q6, Q6, Q6, Q6, Q6, Q6, Q6, Q6,
-    Q7, Q7, Q7, Q7, Q7, Q7, Q7, Q7, Q7, Q7, Q7,
-    Q8, Q8, Q8, Q8, Q8, Q8, Q8, Q8, Q8, Q8, Q8,
-    Q9, Q9, Q9, Q9, Q9, Q9, Q9, Q9, Q9, Q9, Q9,
-    QA, QA, QA, QA, QA, QA, QA, QA, QA, QA, QA,
-    0,  0,  0,  0,  0,  0,  0
-};
-static const float q_4_1[ 128 ] =
-{
-    Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA,
-    Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA,
-    Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA,
-    Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA,
-    Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA,
-    Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA,
-    Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA,
-    Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA,
-    Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA,
-    Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA,
-    Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA,
-    0,  0,  0,  0,  0,  0,  0
-};
-#undef Q0
-#undef Q1
-#undef Q2
-#undef Q3
-#undef Q4
-#undef Q5
-#undef Q6
-#undef Q7
-#undef Q8
-#undef Q9
-#undef QA
-
-/* Lookup tables of 0.16 two's complement quantization values */
-
-static const float q_3[8] =
-{
-    (-6 << 15)/7.0, (-4 << 15)/7.0, (-2 << 15)/7.0,
-    0           , (2 << 15)/7.0, (4 << 15)/7.0,
-    (6 << 15)/7.0, 0
-};
-
-static const float q_5[16] =
-{
-    (-14 << 15)/15.0, (-12 << 15)/15.0, (-10 << 15)/15.0,
-    (-8 << 15)/15.0,  (-6 << 15)/15.0,  (-4 << 15)/15.0,
-    (-2 << 15)/15.0,  0            ,    (2 << 15)/15.0,
-    (4 << 15)/15.0,   (6 << 15)/15.0,   (8 << 15)/15.0,
-    (10 << 15)/15.0,  (12 << 15)/15.0,  (14 << 15)/15.0,
-    0
-};
-
-/* Conversion from bap to number of bits in the mantissas
- * zeros account for cases 0,1,2,4 which are special cased */
-static const u16 qnttztab[16] =
-{
-    0, 0, 0, 3, 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16
-};
-
-static const float scale_factor[25] =
-{
-    6.10351562500000000000000000e-05,
-    3.05175781250000000000000000e-05,
-    1.52587890625000000000000000e-05,
-    7.62939453125000000000000000e-06,
-    3.81469726562500000000000000e-06,
-    1.90734863281250000000000000e-06,
-    9.53674316406250000000000000e-07,
-    4.76837158203125000000000000e-07,
-    2.38418579101562500000000000e-07,
-    1.19209289550781250000000000e-07,
-    5.96046447753906250000000000e-08,
-    2.98023223876953125000000000e-08,
-    1.49011611938476562500000000e-08,
-    7.45058059692382812500000000e-09,
-    3.72529029846191406250000000e-09,
-    1.86264514923095703125000000e-09,
-    9.31322574615478515625000000e-10,
-    4.65661287307739257812500000e-10,
-    2.32830643653869628906250000e-10,
-    1.16415321826934814453125000e-10,
-    5.82076609134674072265625000e-11,
-    2.91038304567337036132812500e-11,
-    1.45519152283668518066406250e-11,
-    7.27595761418342590332031250e-12,
-    3.63797880709171295166015625e-12,
-};
-
-static const u16 dither_lut[256] =
-{
- 0x0000, 0xa011, 0xe033, 0x4022, 0x6077, 0xc066, 0x8044, 0x2055,
- 0xc0ee, 0x60ff, 0x20dd, 0x80cc, 0xa099, 0x0088, 0x40aa, 0xe0bb,
- 0x21cd, 0x81dc, 0xc1fe, 0x61ef, 0x41ba, 0xe1ab, 0xa189, 0x0198,
- 0xe123, 0x4132, 0x0110, 0xa101, 0x8154, 0x2145, 0x6167, 0xc176,
- 0x439a, 0xe38b, 0xa3a9, 0x03b8, 0x23ed, 0x83fc, 0xc3de, 0x63cf,
- 0x8374, 0x2365, 0x6347, 0xc356, 0xe303, 0x4312, 0x0330, 0xa321,
- 0x6257, 0xc246, 0x8264, 0x2275, 0x0220, 0xa231, 0xe213, 0x4202,
- 0xa2b9, 0x02a8, 0x428a, 0xe29b, 0xc2ce, 0x62df, 0x22fd, 0x82ec,
- 0x8734, 0x2725, 0x6707, 0xc716, 0xe743, 0x4752, 0x0770, 0xa761,
- 0x47da, 0xe7cb, 0xa7e9, 0x07f8, 0x27ad, 0x87bc, 0xc79e, 0x678f,
- 0xa6f9, 0x06e8, 0x46ca, 0xe6db, 0xc68e, 0x669f, 0x26bd, 0x86ac,
- 0x6617, 0xc606, 0x8624, 0x2635, 0x0660, 0xa671, 0xe653, 0x4642,
- 0xc4ae, 0x64bf, 0x249d, 0x848c, 0xa4d9, 0x04c8, 0x44ea, 0xe4fb,
- 0x0440, 0xa451, 0xe473, 0x4462, 0x6437, 0xc426, 0x8404, 0x2415,
- 0xe563, 0x4572, 0x0550, 0xa541, 0x8514, 0x2505, 0x6527, 0xc536,
- 0x258d, 0x859c, 0xc5be, 0x65af, 0x45fa, 0xe5eb, 0xa5c9, 0x05d8,
- 0xae79, 0x0e68, 0x4e4a, 0xee5b, 0xce0e, 0x6e1f, 0x2e3d, 0x8e2c,
- 0x6e97, 0xce86, 0x8ea4, 0x2eb5, 0x0ee0, 0xaef1, 0xeed3, 0x4ec2,
- 0x8fb4, 0x2fa5, 0x6f87, 0xcf96, 0xefc3, 0x4fd2, 0x0ff0, 0xafe1,
- 0x4f5a, 0xef4b, 0xaf69, 0x0f78, 0x2f2d, 0x8f3c, 0xcf1e, 0x6f0f,
- 0xede3, 0x4df2, 0x0dd0, 0xadc1, 0x8d94, 0x2d85, 0x6da7, 0xcdb6,
- 0x2d0d, 0x8d1c, 0xcd3e, 0x6d2f, 0x4d7a, 0xed6b, 0xad49, 0x0d58,
- 0xcc2e, 0x6c3f, 0x2c1d, 0x8c0c, 0xac59, 0x0c48, 0x4c6a, 0xec7b,
- 0x0cc0, 0xacd1, 0xecf3, 0x4ce2, 0x6cb7, 0xcca6, 0x8c84, 0x2c95,
- 0x294d, 0x895c, 0xc97e, 0x696f, 0x493a, 0xe92b, 0xa909, 0x0918,
- 0xe9a3, 0x49b2, 0x0990, 0xa981, 0x89d4, 0x29c5, 0x69e7, 0xc9f6,
- 0x0880, 0xa891, 0xe8b3, 0x48a2, 0x68f7, 0xc8e6, 0x88c4, 0x28d5,
- 0xc86e, 0x687f, 0x285d, 0x884c, 0xa819, 0x0808, 0x482a, 0xe83b,
- 0x6ad7, 0xcac6, 0x8ae4, 0x2af5, 0x0aa0, 0xaab1, 0xea93, 0x4a82,
- 0xaa39, 0x0a28, 0x4a0a, 0xea1b, 0xca4e, 0x6a5f, 0x2a7d, 0x8a6c,
- 0x4b1a, 0xeb0b, 0xab29, 0x0b38, 0x2b6d, 0x8b7c, 0xcb5e, 0x6b4f,
- 0x8bf4, 0x2be5, 0x6bc7, 0xcbd6, 0xeb83, 0x4b92, 0x0bb0, 0xaba1
-};
-
-static __inline__ u16 dither_gen (mantissa_t * p_mantissa)
-{
-        s16 state;
-
-        state = dither_lut[p_mantissa->lfsr_state >> 8] ^ 
-                    (p_mantissa->lfsr_state << 8);
-        p_mantissa->lfsr_state = (u16) state;
-        return ( (state * (s32) (0.707106 * 256.0)) >> 8 );
-}
-
-
-/* Fetch an unpacked, left justified, and properly biased/dithered mantissa value */
-static __inline__ float coeff_get_float (ac3dec_t * p_ac3dec, u16 bap, u16 dithflag,
-                                   u16 exp)
-{
-    u16 group_code = 0;
-
-    /* If the bap is 0-5 then we have special cases to take care of */
-    switch (bap)
-    {
-        case 0:
-            if (dithflag)
-            {
-                return ( dither_gen(&p_ac3dec->mantissa) * scale_factor[exp] );
-            }    
-            return (0);
-
-        case 1:
-            if (p_ac3dec->mantissa.q_1_pointer >= 0)
-            {
-                return (p_ac3dec->mantissa.q_1[p_ac3dec->mantissa.q_1_pointer--] *
-                        scale_factor[exp]);
-            }
-
-            p_ac3dec->total_bits_read += 5;
-            if ((group_code = GetBits (&p_ac3dec->bit_stream,5)) > 26)
-            {
-                intf_WarnMsg ( 3, "ac3dec warn: invalid mantissa (1)" );
-                return 0;
-            }
-    
-            p_ac3dec->mantissa.q_1[ 1 ] = q_1_1[ group_code ];
-            p_ac3dec->mantissa.q_1[ 0 ] = q_1_2[ group_code ];
-    
-            p_ac3dec->mantissa.q_1_pointer = 1;
-    
-            return (q_1_0[group_code] * scale_factor[exp]);
-    
-        case 2:
-            if (p_ac3dec->mantissa.q_2_pointer >= 0)
-            {
-                return (p_ac3dec->mantissa.q_2[p_ac3dec->mantissa.q_2_pointer--] *
-                        scale_factor[exp]);
-            }
-            
-            p_ac3dec->total_bits_read += 7;
-            if ((group_code = GetBits (&p_ac3dec->bit_stream,7)) > 124)
-            {
-                intf_WarnMsg ( 3, "ac3dec warn: invalid mantissa (2)" );
-                return 0;
-            }
-
-            p_ac3dec->mantissa.q_2[ 1 ] = q_2_1[ group_code ];
-            p_ac3dec->mantissa.q_2[ 0 ] = q_2_2[ group_code ];
-
-            p_ac3dec->mantissa.q_2_pointer = 1;
-
-            return (q_2_0[group_code] * scale_factor[exp]);
-
-        case 3:
-            p_ac3dec->total_bits_read += 3;
-            if ((group_code = GetBits (&p_ac3dec->bit_stream,3)) > 6)
-            {
-                intf_WarnMsg ( 3, "ac3dec warn: invalid mantissa (3)" );
-                return 0;
-            }
-
-            return (q_3[group_code] * scale_factor[exp]);
-
-        case 4:
-            if (p_ac3dec->mantissa.q_4_pointer >= 0)
-            {
-                return (p_ac3dec->mantissa.q_4[p_ac3dec->mantissa.q_4_pointer--] *
-                        scale_factor[exp]);
-            }
-
-            p_ac3dec->total_bits_read += 7;
-            if ((group_code = GetBits (&p_ac3dec->bit_stream,7)) > 120)
-            {
-                intf_WarnMsg ( 3, "ac3dec warn: invalid mantissa (4)" );
-                return 0;
-            }
-
-            p_ac3dec->mantissa.q_4[ 0 ] = q_4_1[group_code];
-
-            p_ac3dec->mantissa.q_4_pointer = 0;
-
-            return (q_4_0[group_code] * scale_factor[exp]);
-
-        case 5:
-            p_ac3dec->total_bits_read += 4;
-            if ((group_code = GetBits (&p_ac3dec->bit_stream,4)) > 14)
-            {
-                intf_WarnMsg ( 3, "ac3dec warn: invalid mantissa (5)" );
-                return 0;
-            }
-
-            return (q_5[group_code] * scale_factor[exp]);
-
-        default:
-            group_code = GetBits (&p_ac3dec->bit_stream,qnttztab[bap]);
-            group_code <<= 16 - qnttztab[bap];
-            p_ac3dec->total_bits_read += qnttztab[bap];
-
-            return ((s16)(group_code) * scale_factor[exp]);
-    }
-}
-
-/* Uncouple the coupling channel into a fbw channel */
-static __inline__ void uncouple_channel (ac3dec_t * p_ac3dec, u32 ch)
-{
-    u32 bnd = 0;
-    u32 sub_bnd = 0;
-    u32 i,j;
-    float cpl_coord = 1.0;
-    u32 cpl_exp_tmp;
-    u32 cpl_mant_tmp;
-
-    for (i = p_ac3dec->audblk.cplstrtmant; i < p_ac3dec->audblk.cplendmant;)
-    {
-        if (!p_ac3dec->audblk.cplbndstrc[sub_bnd++])
-        {
-            cpl_exp_tmp = p_ac3dec->audblk.cplcoexp[ch][bnd] +
-                3 * p_ac3dec->audblk.mstrcplco[ch];
-            if (p_ac3dec->audblk.cplcoexp[ch][bnd] == 15)
-            {
-                cpl_mant_tmp = (p_ac3dec->audblk.cplcomant[ch][bnd]) << 11;
-            }
-            else
-            {
-                cpl_mant_tmp = ((0x10) | p_ac3dec->audblk.cplcomant[ch][bnd]) << 10;
-            }
-            cpl_coord = (cpl_mant_tmp) * scale_factor[cpl_exp_tmp] * 8.0f;
-
-            /* Invert the phase for the right channel if necessary */
-            if (p_ac3dec->bsi.acmod == 0x02 && p_ac3dec->audblk.phsflginu &&
-                    ch == 1 && p_ac3dec->audblk.phsflg[bnd])
-            {
-                cpl_coord *= -1;
-            }
-            bnd++;
-        }
-
-        for (j=0;j < 12; j++)
-        {
-            /* Get new dither values for each channel if necessary,
-             * so the channels are uncorrelated */
-            if (p_ac3dec->audblk.dithflag[ch] && !p_ac3dec->audblk.cpl_bap[i])
-            {
-                p_ac3dec->samples[ch][i] = cpl_coord * dither_gen(&p_ac3dec->mantissa) *
-                    scale_factor[p_ac3dec->audblk.cpl_exp[i]];
-            } else {
-                p_ac3dec->samples[ch][i]  = cpl_coord * p_ac3dec->audblk.cpl_flt[i];
-            }
-            i++;
-        }
-    }
-}
+#include "ac3_mantissa.h"
 
 void mantissa_unpack (ac3dec_t * p_ac3dec)
 {
diff --git a/src/ac3_decoder/ac3_mantissa.h b/src/ac3_decoder/ac3_mantissa.h
new file mode 100644
index 0000000000..687fb66eb9
--- /dev/null
+++ b/src/ac3_decoder/ac3_mantissa.h
@@ -0,0 +1,404 @@
+/*****************************************************************************
+ * ac3_mantissa.h: ac3 mantissa computation
+ *****************************************************************************
+ * Copyright (C) 1999, 2000, 2001 VideoLAN
+ * $Id: ac3_mantissa.h,v 1.4 2001/05/15 16:19:42 sam Exp $
+ *
+ * Authors: Michel Kaempf <maxx@via.ecp.fr>
+ *          Aaron Holtzman <aholtzma@engr.uvic.ca>
+ *          Renaud Dartus <reno@videolan.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ * 
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
+ *****************************************************************************/
+
+#define Q0 ((-2 << 15) / 3.0)
+#define Q1 (0)
+#define Q2 ((2 << 15) / 3.0)
+static const float q_1_0[ 32 ] =
+{
+    Q0, Q0, Q0, Q0, Q0, Q0, Q0, Q0, Q0,
+    Q1, Q1, Q1, Q1, Q1, Q1, Q1, Q1, Q1,
+    Q2, Q2, Q2, Q2, Q2, Q2, Q2, Q2, Q2,
+    0, 0, 0, 0, 0
+};
+static const float q_1_1[ 32 ] =
+{
+    Q0, Q0, Q0, Q1, Q1, Q1, Q2, Q2, Q2,
+    Q0, Q0, Q0, Q1, Q1, Q1, Q2, Q2, Q2,
+    Q0, Q0, Q0, Q1, Q1, Q1, Q2, Q2, Q2,
+    0, 0, 0, 0, 0
+};
+static const float q_1_2[ 32 ] =
+{
+    Q0, Q1, Q2, Q0, Q1, Q2, Q0, Q1, Q2,
+    Q0, Q1, Q2, Q0, Q1, Q2, Q0, Q1, Q2,
+    Q0, Q1, Q2, Q0, Q1, Q2, Q0, Q1, Q2,
+    0, 0, 0, 0, 0
+};
+#undef Q0
+#undef Q1
+#undef Q2
+
+#define Q0 ((-4 << 15) / 5.0)
+#define Q1 ((-2 << 15) / 5.0)
+#define Q2 (0)
+#define Q3 ((2 << 15) / 5.0)
+#define Q4 ((4 << 15) / 5.0)
+static const float q_2_0[ 128 ] =
+{
+    Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,Q0,
+    Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,Q1,
+    Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,Q2,
+    Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,Q3,
+    Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,Q4,
+    0, 0, 0
+};
+static const float q_2_1[ 128 ] =
+{
+    Q0,Q0,Q0,Q0,Q0,Q1,Q1,Q1,Q1,Q1,Q2,Q2,Q2,Q2,Q2,Q3,Q3,Q3,Q3,Q3,Q4,Q4,Q4,Q4,Q4,
+    Q0,Q0,Q0,Q0,Q0,Q1,Q1,Q1,Q1,Q1,Q2,Q2,Q2,Q2,Q2,Q3,Q3,Q3,Q3,Q3,Q4,Q4,Q4,Q4,Q4,
+    Q0,Q0,Q0,Q0,Q0,Q1,Q1,Q1,Q1,Q1,Q2,Q2,Q2,Q2,Q2,Q3,Q3,Q3,Q3,Q3,Q4,Q4,Q4,Q4,Q4,
+    Q0,Q0,Q0,Q0,Q0,Q1,Q1,Q1,Q1,Q1,Q2,Q2,Q2,Q2,Q2,Q3,Q3,Q3,Q3,Q3,Q4,Q4,Q4,Q4,Q4,
+    Q0,Q0,Q0,Q0,Q0,Q1,Q1,Q1,Q1,Q1,Q2,Q2,Q2,Q2,Q2,Q3,Q3,Q3,Q3,Q3,Q4,Q4,Q4,Q4,Q4,
+    0, 0, 0
+};
+static const float q_2_2[ 128 ] =
+{
+    Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,
+    Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,
+    Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,
+    Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,
+    Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,Q0,Q1,Q2,Q3,Q4,
+    0, 0, 0
+};
+#undef Q0
+#undef Q1
+#undef Q2
+#undef Q3
+#undef Q4
+
+#define Q0 ((-10 << 15) / 11.0)
+#define Q1 ((-8 << 15) / 11.0)
+#define Q2 ((-6 << 15) / 11.0)
+#define Q3 ((-4 << 15) / 11.0)
+#define Q4 ((-2 << 15) / 11.0)
+#define Q5 (0)
+#define Q6 ((2 << 15) / 11.0)
+#define Q7 ((4 << 15) / 11.0)
+#define Q8 ((6 << 15) / 11.0)
+#define Q9 ((8 << 15) / 11.0)
+#define QA ((10 << 15) / 11.0)
+static const float q_4_0[ 128 ] =
+{
+    Q0, Q0, Q0, Q0, Q0, Q0, Q0, Q0, Q0, Q0, Q0,
+    Q1, Q1, Q1, Q1, Q1, Q1, Q1, Q1, Q1, Q1, Q1,
+    Q2, Q2, Q2, Q2, Q2, Q2, Q2, Q2, Q2, Q2, Q2,
+    Q3, Q3, Q3, Q3, Q3, Q3, Q3, Q3, Q3, Q3, Q3,
+    Q4, Q4, Q4, Q4, Q4, Q4, Q4, Q4, Q4, Q4, Q4,
+    Q5, Q5, Q5, Q5, Q5, Q5, Q5, Q5, Q5, Q5, Q5,
+    Q6, Q6, Q6, Q6, Q6, Q6, Q6, Q6, Q6, Q6, Q6,
+    Q7, Q7, Q7, Q7, Q7, Q7, Q7, Q7, Q7, Q7, Q7,
+    Q8, Q8, Q8, Q8, Q8, Q8, Q8, Q8, Q8, Q8, Q8,
+    Q9, Q9, Q9, Q9, Q9, Q9, Q9, Q9, Q9, Q9, Q9,
+    QA, QA, QA, QA, QA, QA, QA, QA, QA, QA, QA,
+    0,  0,  0,  0,  0,  0,  0
+};
+static const float q_4_1[ 128 ] =
+{
+    Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA,
+    Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA,
+    Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA,
+    Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA,
+    Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA,
+    Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA,
+    Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA,
+    Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA,
+    Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA,
+    Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA,
+    Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7, Q8, Q9, QA,
+    0,  0,  0,  0,  0,  0,  0
+};
+#undef Q0
+#undef Q1
+#undef Q2
+#undef Q3
+#undef Q4
+#undef Q5
+#undef Q6
+#undef Q7
+#undef Q8
+#undef Q9
+#undef QA
+
+/* Lookup tables of 0.16 two's complement quantization values */
+
+static const float q_3[8] =
+{
+    (-6 << 15)/7.0, (-4 << 15)/7.0, (-2 << 15)/7.0,
+    0           , (2 << 15)/7.0, (4 << 15)/7.0,
+    (6 << 15)/7.0, 0
+};
+
+static const float q_5[16] =
+{
+    (-14 << 15)/15.0, (-12 << 15)/15.0, (-10 << 15)/15.0,
+    (-8 << 15)/15.0,  (-6 << 15)/15.0,  (-4 << 15)/15.0,
+    (-2 << 15)/15.0,  0            ,    (2 << 15)/15.0,
+    (4 << 15)/15.0,   (6 << 15)/15.0,   (8 << 15)/15.0,
+    (10 << 15)/15.0,  (12 << 15)/15.0,  (14 << 15)/15.0,
+    0
+};
+
+/* Conversion from bap to number of bits in the mantissas
+ * zeros account for cases 0,1,2,4 which are special cased */
+static const u16 qnttztab[16] =
+{
+    0, 0, 0, 3, 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16
+};
+
+static const float scale_factor[25] =
+{
+    6.10351562500000000000000000e-05,
+    3.05175781250000000000000000e-05,
+    1.52587890625000000000000000e-05,
+    7.62939453125000000000000000e-06,
+    3.81469726562500000000000000e-06,
+    1.90734863281250000000000000e-06,
+    9.53674316406250000000000000e-07,
+    4.76837158203125000000000000e-07,
+    2.38418579101562500000000000e-07,
+    1.19209289550781250000000000e-07,
+    5.96046447753906250000000000e-08,
+    2.98023223876953125000000000e-08,
+    1.49011611938476562500000000e-08,
+    7.45058059692382812500000000e-09,
+    3.72529029846191406250000000e-09,
+    1.86264514923095703125000000e-09,
+    9.31322574615478515625000000e-10,
+    4.65661287307739257812500000e-10,
+    2.32830643653869628906250000e-10,
+    1.16415321826934814453125000e-10,
+    5.82076609134674072265625000e-11,
+    2.91038304567337036132812500e-11,
+    1.45519152283668518066406250e-11,
+    7.27595761418342590332031250e-12,
+    3.63797880709171295166015625e-12,
+};
+
+static const u16 dither_lut[256] =
+{
+ 0x0000, 0xa011, 0xe033, 0x4022, 0x6077, 0xc066, 0x8044, 0x2055,
+ 0xc0ee, 0x60ff, 0x20dd, 0x80cc, 0xa099, 0x0088, 0x40aa, 0xe0bb,
+ 0x21cd, 0x81dc, 0xc1fe, 0x61ef, 0x41ba, 0xe1ab, 0xa189, 0x0198,
+ 0xe123, 0x4132, 0x0110, 0xa101, 0x8154, 0x2145, 0x6167, 0xc176,
+ 0x439a, 0xe38b, 0xa3a9, 0x03b8, 0x23ed, 0x83fc, 0xc3de, 0x63cf,
+ 0x8374, 0x2365, 0x6347, 0xc356, 0xe303, 0x4312, 0x0330, 0xa321,
+ 0x6257, 0xc246, 0x8264, 0x2275, 0x0220, 0xa231, 0xe213, 0x4202,
+ 0xa2b9, 0x02a8, 0x428a, 0xe29b, 0xc2ce, 0x62df, 0x22fd, 0x82ec,
+ 0x8734, 0x2725, 0x6707, 0xc716, 0xe743, 0x4752, 0x0770, 0xa761,
+ 0x47da, 0xe7cb, 0xa7e9, 0x07f8, 0x27ad, 0x87bc, 0xc79e, 0x678f,
+ 0xa6f9, 0x06e8, 0x46ca, 0xe6db, 0xc68e, 0x669f, 0x26bd, 0x86ac,
+ 0x6617, 0xc606, 0x8624, 0x2635, 0x0660, 0xa671, 0xe653, 0x4642,
+ 0xc4ae, 0x64bf, 0x249d, 0x848c, 0xa4d9, 0x04c8, 0x44ea, 0xe4fb,
+ 0x0440, 0xa451, 0xe473, 0x4462, 0x6437, 0xc426, 0x8404, 0x2415,
+ 0xe563, 0x4572, 0x0550, 0xa541, 0x8514, 0x2505, 0x6527, 0xc536,
+ 0x258d, 0x859c, 0xc5be, 0x65af, 0x45fa, 0xe5eb, 0xa5c9, 0x05d8,
+ 0xae79, 0x0e68, 0x4e4a, 0xee5b, 0xce0e, 0x6e1f, 0x2e3d, 0x8e2c,
+ 0x6e97, 0xce86, 0x8ea4, 0x2eb5, 0x0ee0, 0xaef1, 0xeed3, 0x4ec2,
+ 0x8fb4, 0x2fa5, 0x6f87, 0xcf96, 0xefc3, 0x4fd2, 0x0ff0, 0xafe1,
+ 0x4f5a, 0xef4b, 0xaf69, 0x0f78, 0x2f2d, 0x8f3c, 0xcf1e, 0x6f0f,
+ 0xede3, 0x4df2, 0x0dd0, 0xadc1, 0x8d94, 0x2d85, 0x6da7, 0xcdb6,
+ 0x2d0d, 0x8d1c, 0xcd3e, 0x6d2f, 0x4d7a, 0xed6b, 0xad49, 0x0d58,
+ 0xcc2e, 0x6c3f, 0x2c1d, 0x8c0c, 0xac59, 0x0c48, 0x4c6a, 0xec7b,
+ 0x0cc0, 0xacd1, 0xecf3, 0x4ce2, 0x6cb7, 0xcca6, 0x8c84, 0x2c95,
+ 0x294d, 0x895c, 0xc97e, 0x696f, 0x493a, 0xe92b, 0xa909, 0x0918,
+ 0xe9a3, 0x49b2, 0x0990, 0xa981, 0x89d4, 0x29c5, 0x69e7, 0xc9f6,
+ 0x0880, 0xa891, 0xe8b3, 0x48a2, 0x68f7, 0xc8e6, 0x88c4, 0x28d5,
+ 0xc86e, 0x687f, 0x285d, 0x884c, 0xa819, 0x0808, 0x482a, 0xe83b,
+ 0x6ad7, 0xcac6, 0x8ae4, 0x2af5, 0x0aa0, 0xaab1, 0xea93, 0x4a82,
+ 0xaa39, 0x0a28, 0x4a0a, 0xea1b, 0xca4e, 0x6a5f, 0x2a7d, 0x8a6c,
+ 0x4b1a, 0xeb0b, 0xab29, 0x0b38, 0x2b6d, 0x8b7c, 0xcb5e, 0x6b4f,
+ 0x8bf4, 0x2be5, 0x6bc7, 0xcbd6, 0xeb83, 0x4b92, 0x0bb0, 0xaba1
+};
+
+static __inline__ u16 dither_gen (mantissa_t * p_mantissa)
+{
+        s16 state;
+
+        state = dither_lut[p_mantissa->lfsr_state >> 8] ^ 
+                    (p_mantissa->lfsr_state << 8);
+        p_mantissa->lfsr_state = (u16) state;
+        return ( (state * (s32) (0.707106 * 256.0)) >> 8 );
+}
+
+
+/* Fetch an unpacked, left justified, and properly biased/dithered mantissa value */
+static __inline__ float coeff_get_float (ac3dec_t * p_ac3dec, u16 bap, u16 dithflag,
+                                   u16 exp)
+{
+    u16 group_code = 0;
+
+    /* If the bap is 0-5 then we have special cases to take care of */
+    switch (bap)
+    {
+        case 0:
+            if (dithflag)
+            {
+                return ( dither_gen(&p_ac3dec->mantissa) * scale_factor[exp] );
+            }    
+            return (0);
+
+        case 1:
+            if (p_ac3dec->mantissa.q_1_pointer >= 0)
+            {
+                return (p_ac3dec->mantissa.q_1[p_ac3dec->mantissa.q_1_pointer--] *
+                        scale_factor[exp]);
+            }
+
+            p_ac3dec->total_bits_read += 5;
+            if ((group_code = GetBits (&p_ac3dec->bit_stream,5)) > 26)
+            {
+                intf_WarnMsg ( 3, "ac3dec warn: invalid mantissa (1)" );
+                return 0;
+            }
+    
+            p_ac3dec->mantissa.q_1[ 1 ] = q_1_1[ group_code ];
+            p_ac3dec->mantissa.q_1[ 0 ] = q_1_2[ group_code ];
+    
+            p_ac3dec->mantissa.q_1_pointer = 1;
+    
+            return (q_1_0[group_code] * scale_factor[exp]);
+    
+        case 2:
+            if (p_ac3dec->mantissa.q_2_pointer >= 0)
+            {
+                return (p_ac3dec->mantissa.q_2[p_ac3dec->mantissa.q_2_pointer--] *
+                        scale_factor[exp]);
+            }
+            
+            p_ac3dec->total_bits_read += 7;
+            if ((group_code = GetBits (&p_ac3dec->bit_stream,7)) > 124)
+            {
+                intf_WarnMsg ( 3, "ac3dec warn: invalid mantissa (2)" );
+                return 0;
+            }
+
+            p_ac3dec->mantissa.q_2[ 1 ] = q_2_1[ group_code ];
+            p_ac3dec->mantissa.q_2[ 0 ] = q_2_2[ group_code ];
+
+            p_ac3dec->mantissa.q_2_pointer = 1;
+
+            return (q_2_0[group_code] * scale_factor[exp]);
+
+        case 3:
+            p_ac3dec->total_bits_read += 3;
+            if ((group_code = GetBits (&p_ac3dec->bit_stream,3)) > 6)
+            {
+                intf_WarnMsg ( 3, "ac3dec warn: invalid mantissa (3)" );
+                return 0;
+            }
+
+            return (q_3[group_code] * scale_factor[exp]);
+
+        case 4:
+            if (p_ac3dec->mantissa.q_4_pointer >= 0)
+            {
+                return (p_ac3dec->mantissa.q_4[p_ac3dec->mantissa.q_4_pointer--] *
+                        scale_factor[exp]);
+            }
+
+            p_ac3dec->total_bits_read += 7;
+            if ((group_code = GetBits (&p_ac3dec->bit_stream,7)) > 120)
+            {
+                intf_WarnMsg ( 3, "ac3dec warn: invalid mantissa (4)" );
+                return 0;
+            }
+
+            p_ac3dec->mantissa.q_4[ 0 ] = q_4_1[group_code];
+
+            p_ac3dec->mantissa.q_4_pointer = 0;
+
+            return (q_4_0[group_code] * scale_factor[exp]);
+
+        case 5:
+            p_ac3dec->total_bits_read += 4;
+            if ((group_code = GetBits (&p_ac3dec->bit_stream,4)) > 14)
+            {
+                intf_WarnMsg ( 3, "ac3dec warn: invalid mantissa (5)" );
+                return 0;
+            }
+
+            return (q_5[group_code] * scale_factor[exp]);
+
+        default:
+            group_code = GetBits (&p_ac3dec->bit_stream,qnttztab[bap]);
+            group_code <<= 16 - qnttztab[bap];
+            p_ac3dec->total_bits_read += qnttztab[bap];
+
+            return ((s16)(group_code) * scale_factor[exp]);
+    }
+}
+
+/* Uncouple the coupling channel into a fbw channel */
+static __inline__ void uncouple_channel (ac3dec_t * p_ac3dec, u32 ch)
+{
+    u32 bnd = 0;
+    u32 sub_bnd = 0;
+    u32 i,j;
+    float cpl_coord = 1.0;
+    u32 cpl_exp_tmp;
+    u32 cpl_mant_tmp;
+
+    for (i = p_ac3dec->audblk.cplstrtmant; i < p_ac3dec->audblk.cplendmant;)
+    {
+        if (!p_ac3dec->audblk.cplbndstrc[sub_bnd++])
+        {
+            cpl_exp_tmp = p_ac3dec->audblk.cplcoexp[ch][bnd] +
+                3 * p_ac3dec->audblk.mstrcplco[ch];
+            if (p_ac3dec->audblk.cplcoexp[ch][bnd] == 15)
+            {
+                cpl_mant_tmp = (p_ac3dec->audblk.cplcomant[ch][bnd]) << 11;
+            }
+            else
+            {
+                cpl_mant_tmp = ((0x10) | p_ac3dec->audblk.cplcomant[ch][bnd]) << 10;
+            }
+            cpl_coord = (cpl_mant_tmp) * scale_factor[cpl_exp_tmp] * 8.0f;
+
+            /* Invert the phase for the right channel if necessary */
+            if (p_ac3dec->bsi.acmod == 0x02 && p_ac3dec->audblk.phsflginu &&
+                    ch == 1 && p_ac3dec->audblk.phsflg[bnd])
+            {
+                cpl_coord *= -1;
+            }
+            bnd++;
+        }
+
+        for (j=0;j < 12; j++)
+        {
+            /* Get new dither values for each channel if necessary,
+             * so the channels are uncorrelated */
+            if (p_ac3dec->audblk.dithflag[ch] && !p_ac3dec->audblk.cpl_bap[i])
+            {
+                p_ac3dec->samples[ch][i] = cpl_coord * dither_gen(&p_ac3dec->mantissa) *
+                    scale_factor[p_ac3dec->audblk.cpl_exp[i]];
+            } else {
+                p_ac3dec->samples[ch][i]  = cpl_coord * p_ac3dec->audblk.cpl_flt[i];
+            }
+            i++;
+        }
+    }
+}
+
diff --git a/src/ac3_decoder/ac3_parse.c b/src/ac3_decoder/ac3_parse.c
index 41310da6be..0903da5804 100644
--- a/src/ac3_decoder/ac3_parse.c
+++ b/src/ac3_decoder/ac3_parse.c
@@ -2,7 +2,7 @@
  * ac3_parse.c: ac3 parsing procedures
  *****************************************************************************
  * Copyright (C) 1999, 2000, 2001 VideoLAN
- * $Id: ac3_parse.c,v 1.22 2001/05/14 15:58:04 reno Exp $
+ * $Id: ac3_parse.c,v 1.23 2001/05/15 16:19:42 sam Exp $
  *
  * Authors: Michel Kaempf <maxx@via.ecp.fr>
  *          Aaron Holtzman <aholtzma@engr.uvic.ca>
@@ -23,6 +23,9 @@
  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
  *****************************************************************************/
 
+/*****************************************************************************
+ * Preamble
+ *****************************************************************************/
 #include "defs.h"
 
 #include <string.h>                                              /* memset() */
@@ -33,12 +36,15 @@
 #include "threads.h"
 #include "mtime.h"
 
+#include "intf_msg.h"
+
 #include "stream_control.h"
 #include "input_ext-dec.h"
 
 #include "audio_output.h"
 
-#include "intf_msg.h"
+#include "ac3_imdct.h"
+#include "ac3_downmix.h"
 #include "ac3_decoder.h"
 #include "ac3_decoder_thread.h"                           /* ac3dec_thread_t */
 
@@ -871,7 +877,6 @@ static void parse_audblk_stats (ac3dec_t * p_ac3dec)
     for(i=0;i<p_ac3dec->bsi.nfchans;i++)
             intf_ErrMsg ("%1d",p_ac3dec->audblk.blksw[i]);
     intf_ErrMsg ("]");
-
-    intf_ErrMsg ("\n");
 }
 #endif
+
diff --git a/src/ac3_decoder/ac3_rematrix.c b/src/ac3_decoder/ac3_rematrix.c
index 3189239b5b..d9aca1cb18 100644
--- a/src/ac3_decoder/ac3_rematrix.c
+++ b/src/ac3_decoder/ac3_rematrix.c
@@ -2,7 +2,7 @@
  * ac3_rematrix.c: ac3 audio rematrixing
  *****************************************************************************
  * Copyright (C) 1999, 2000 VideoLAN
- * $Id: ac3_rematrix.c,v 1.17 2001/05/14 15:58:04 reno Exp $
+ * $Id: ac3_rematrix.c,v 1.18 2001/05/15 16:19:42 sam Exp $
  *
  * Authors: Michel Kaempf <maxx@via.ecp.fr>
  *          Aaron Holtzman <aholtzma@engr.uvic.ca>
@@ -21,6 +21,10 @@
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
  *****************************************************************************/
+
+/*****************************************************************************
+ * Preamble
+ *****************************************************************************/
 #include "defs.h"
 
 #include <string.h>                                              /* memcpy() */
@@ -33,6 +37,8 @@
 #include "stream_control.h"
 #include "input_ext-dec.h"
 
+#include "ac3_imdct.h"
+#include "ac3_downmix.h"
 #include "ac3_decoder.h"
 
 struct rematrix_band_s {
@@ -79,3 +85,4 @@ void rematrix (ac3dec_t * p_ac3dec)
         }
     }
 }
+
diff --git a/src/ac3_decoder/ac3_srfft_sse.c b/src/ac3_decoder/ac3_srfft_sse.c
deleted file mode 100644
index 8f5294631c..0000000000
--- a/src/ac3_decoder/ac3_srfft_sse.c
+++ /dev/null
@@ -1,369 +0,0 @@
-/*****************************************************************************
- * ac3_srfft_sse.c: ac3 fft functions
- *****************************************************************************
- * Copyright (C) 1999, 2000, 2001 VideoLAN
- * $Id: ac3_srfft_sse.c,v 1.1 2001/05/14 15:58:04 reno Exp $
- *
- * Authors: Renaud Dartus <reno@videolan.org>
- *          Aaron Holtzman <aholtzma@engr.uvic.ca>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- * 
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
- *****************************************************************************/
-
-#include <stdio.h>
-
-#include "defs.h"
-
-#include <math.h>
-#include <stdio.h>
-
-#include "config.h"
-#include "common.h"
-#include "threads.h"
-#include "mtime.h"
-
-#include "stream_control.h"
-#include "input_ext-dec.h"
-
-#include "ac3_decoder.h"
-#include "ac3_srfft.h"
-
-void hsqrt2 (void);
-void C_1 (void);
-static void fft_4_sse (complex_t *x);
-static void fft_8_sse (complex_t *x);
-static void fft_asmb_sse (int k, complex_t *x, complex_t *wTB,
-	     const complex_t *d, const complex_t *d_3);
-
-void fft_64p_sse(complex_t *a)
-{
-	fft_8_sse(&a[0]); fft_4_sse(&a[8]); fft_4_sse(&a[12]);
-	fft_asmb_sse(2, &a[0], &a[8], &delta16[0], &delta16_3[0]);
-  
-	fft_8_sse(&a[16]), fft_8_sse(&a[24]);
-	fft_asmb_sse(4, &a[0], &a[16],&delta32[0], &delta32_3[0]);
-
-	fft_8_sse(&a[32]); fft_4_sse(&a[40]); fft_4_sse(&a[44]);
-	fft_asmb_sse(2, &a[32], &a[40], &delta16[0], &delta16_3[0]);
-
-	fft_8_sse(&a[48]); fft_4_sse(&a[56]); fft_4_sse(&a[60]);
-	fft_asmb_sse(2, &a[48], &a[56], &delta16[0], &delta16_3[0]);
-
-	fft_asmb_sse(8, &a[0], &a[32],&delta64[0], &delta64_3[0]);
-}
-
-
-void fft_128p_sse(complex_t *a)
-{
-	fft_8_sse(&a[0]); fft_4_sse(&a[8]); fft_4_sse(&a[12]);
-	fft_asmb_sse(2, &a[0], &a[8], &delta16[0], &delta16_3[0]);
-  
-	fft_8_sse(&a[16]), fft_8_sse(&a[24]);
-	fft_asmb_sse(4, &a[0], &a[16],&delta32[0], &delta32_3[0]);
-
-	fft_8_sse(&a[32]); fft_4_sse(&a[40]); fft_4_sse(&a[44]);
-	fft_asmb_sse(2, &a[32], &a[40], &delta16[0], &delta16_3[0]);
-
-	fft_8_sse(&a[48]); fft_4_sse(&a[56]); fft_4_sse(&a[60]);
-	fft_asmb_sse(2, &a[48], &a[56], &delta16[0], &delta16_3[0]);
-
-	fft_asmb_sse(8, &a[0], &a[32],&delta64[0], &delta64_3[0]);
-
-	fft_8_sse(&a[64]); fft_4_sse(&a[72]); fft_4_sse(&a[76]);
-	/* fft_16(&a[64]); */
-	fft_asmb_sse(2, &a[64], &a[72], &delta16[0], &delta16_3[0]);
-
-	fft_8_sse(&a[80]); fft_8_sse(&a[88]);
-  
-	/* fft_32(&a[64]); */
-	fft_asmb_sse(4, &a[64], &a[80],&delta32[0], &delta32_3[0]);
-
-	fft_8_sse(&a[96]); fft_4_sse(&a[104]), fft_4_sse(&a[108]);
-	/* fft_16(&a[96]); */
-	fft_asmb_sse(2, &a[96], &a[104], &delta16[0], &delta16_3[0]);
-
-	fft_8_sse(&a[112]), fft_8_sse(&a[120]);
-	/* fft_32(&a[96]); */
-	fft_asmb_sse(4, &a[96], &a[112], &delta32[0], &delta32_3[0]);
-  
-	/* fft_128(&a[0]); */
-	fft_asmb_sse(16, &a[0], &a[64], &delta128[0], &delta128_3[0]);
-}
-
-void hsqrt2 (void)
-{
-    __asm__ (
-     ".float 0f0.707106781188\n"
-	 ".float 0f0.707106781188\n"
-	 ".float 0f-0.707106781188\n"
-	 ".float 0f-0.707106781188\n"
-     );
-}
-
-void C_1 (void)
-{
-    __asm__ (
-     ".float 0f-1.0\n"
-	 ".float 0f1.0\n"
-	 ".float 0f-1.0\n"
-	 ".float 0f1.0\n"
-     );
-}
-
-static void fft_4_sse (complex_t *x)
-{
-    __asm__ __volatile__ (
-	"movups   (%%eax), %%xmm0\n"	/* x[1] | x[0] */
-	"movups 16(%%eax), %%xmm2\n"	/* x[3] | x[2] */
-	"movups  %%xmm0, %%xmm1\n"		/* x[1] | x[0] */
-	"addps   %%xmm2, %%xmm0\n"		/* x[1] + x[3] | x[0] + x[2] */
-	"subps   %%xmm2, %%xmm1\n"		/* x[1] - x[3] | x[0] - x[2] */
-	"xorps   %%xmm6, %%xmm6\n"
-	"movhlps %%xmm1, %%xmm4\n"		/* ? | x[1] - x[3] */
-	"movhlps %%xmm0, %%xmm3\n"		/* ? | x[1] + x[3] */
-	"subss   %%xmm4, %%xmm6\n"		/* 0 | -(x[1] - x[3]).re */
-	"movlhps %%xmm1, %%xmm0\n"		/* x[0] - x[2] | x[0] + x[2] */
-    "movlhps %%xmm6, %%xmm4\n"		/* 0 | -(x[1] - x[3]).re | (x[1] - x[3]).im | (x[3]-x[1]).re */
-	"movups  %%xmm0, %%xmm2\n"		/* x[0] - x[2] | x[0] + x[2] */
-	"shufps   $0x94, %%xmm4, %%xmm3\n" /* i*(x[1] - x[3]) | x[1] + x[3] */
-    "addps   %%xmm3, %%xmm0\n"
-	"subps   %%xmm3, %%xmm2\n"
-	"movups  %%xmm0,   (%%eax)\n"
-	"movups  %%xmm2, 16(%%eax)\n"
-    : "=a" (x)
-    : "a" (x) );
-}
-
-static void fft_8_sse (complex_t *x)
-{
-    __asm__ __volatile__ (
-	"pushl   %%ebx\n"
-    
-	"movlps   (%%eax), %%xmm0\n"	/* x[0] */
-	"movlps 32(%%eax), %%xmm1\n"	/* x[4] */
-	"movhps 16(%%eax), %%xmm0\n"	/* x[2] | x[0] */
-	"movhps 48(%%eax), %%xmm1\n"	/* x[6] | x[4] */
-	"movups  %%xmm0, %%xmm2\n"	    /* x[2] | x[0] */
-	"xorps   %%xmm3, %%xmm3\n"
-    "addps   %%xmm1, %%xmm0\n"	    /* x[2] + x[6] | x[0] + x[4] */
-	"subps   %%xmm1, %%xmm2\n"    	/* x[2] - x[6] | x[0] - x[4] */
-	"movhlps %%xmm0, %%xmm5\n" 		/* x[2] + x[6] */
-	"movhlps %%xmm2, %%xmm4\n"      /* x[2] - x[6] */
-    "movlhps %%xmm2, %%xmm0\n"	    /* x[0] - x[4] | x[0] + x[4] */
-	"subss   %%xmm4, %%xmm3\n"	    /* (x[2]-x[6]).im | -(x[2]-x[6]).re */
-	"movups  %%xmm0, %%xmm7\n"	    /* x[0] - x[4] | x[0] + x[4] */
-	"movups  %%xmm3, %%xmm4\n"	    /* (x[2]-x[6]).im | -(x[2]-x[6]).re */
-	"movlps 8(%%eax), %%xmm1\n"	    /* x[1] */
-	"shufps   $0x14, %%xmm4, %%xmm5\n" /* i*(x[2] - x[6]) | x[2] + x[6] */
-
-	"addps   %%xmm5, %%xmm0\n"		/* yt = i*(x2-x6)+x0-x4 | x2+x6+x0+x4 */
-	"subps   %%xmm5, %%xmm7\n"		/* yb = i*(x6-x2)+x0-x4 | -x6-x2+x0+x4 */
-
-	"movhps 24(%%eax), %%xmm1\n"	/* x[3] | x[1] */
-    "movl   $hsqrt2, %%ebx\n"
-	"movlps 40(%%eax), %%xmm2\n"	/* x[5] */
-	"movhps 56(%%eax), %%xmm2\n"	/* x[7] | x[5] */
-	"movups  %%xmm1, %%xmm3\n"		/* x[3] | x[1] */
-	"addps   %%xmm2, %%xmm1\n"		/* x[3] + x[7] | x[1] + x[5] */
-	"subps   %%xmm2, %%xmm3\n"		/* x[3] - x[7] | x[1] - x[5] */
-	"movups (%%ebx), %%xmm4\n"		/* -1/sqrt2 | -1/sqrt2 | 1/sqrt2 | 1/sqrt2 */
-	"movups  %%xmm3, %%xmm6\n"		/* x[3] - x[7] | x[1] - x[5] */
-	"mulps   %%xmm4, %%xmm3\n"      /* -1/s2*(x[3] - x[7]) | 1/s2*(x[1] - x[5]) */
-	"shufps   $0xc8, %%xmm4, %%xmm4\n" /* -1/sqrt2 | 1/sqrt2 | -1/sqrt2 | 1/sqrt2 */
-	"shufps   $0xb1, %%xmm6, %%xmm6\n" /* (x3-x7).re|(x3-x7).im|(x1-x5).re|(x1-x5).im */
-	"mulps   %%xmm4, %%xmm6\n"      /* (x7-x3).re/s2|(x3-x7).im/s2|(x5-x1).re/s2|(x1-x5).im/s2 */
-	"addps   %%xmm3, %%xmm6\n"		/* (-1-i)/sqrt2 * (x[3]-x[7]) | (1-i)/sqrt2 * (x[1] - x[5]) */
-	"movhlps %%xmm1, %%xmm5\n"		/* x[3] + x[7] */
-	"movlhps %%xmm6, %%xmm1\n"		/* (1+i)/sqrt2 * (x[1]-x[5]) | x[1]+x[5] */
-	"shufps   $0xe4, %%xmm6, %%xmm5\n"	/* (-1-i)/sqrt2 * (x[3]-x[7]) | x[3]+x[7] */
-	"movups  %%xmm1, %%xmm3\n"		/* (1-i)/sqrt2 * (x[1]-x[5]) | x[1]+x[5] */
-	"movl      $C_1, %%ebx\n"
-	"addps   %%xmm5, %%xmm1\n"		/* u */
-	"subps   %%xmm5, %%xmm3\n"		/* v */
-	"movups  %%xmm0, %%xmm2\n"		/* yb */
-	"movups  %%xmm7, %%xmm4\n"		/* yt */
-	"movups (%%ebx), %%xmm5\n"
-	"mulps   %%xmm5, %%xmm3\n"
-	"addps   %%xmm1, %%xmm0\n"		/* yt + u */
-	"subps   %%xmm1, %%xmm2\n"		/* yt - u */
-	"shufps   $0xb1, %%xmm3, %%xmm3\n" /* -i * v */
-	"movups  %%xmm0, (%%eax)\n"
-	"movups  %%xmm2, 32(%%eax)\n"
-	"addps   %%xmm3, %%xmm4\n"		/* yb - i*v */
-	"subps   %%xmm3, %%xmm7\n"		/* yb + i*v */
-	"movups  %%xmm4, 16(%%eax)\n"
-	"movups  %%xmm7, 48(%%eax)\n"
-
-	"popl    %%ebx\n"
-    : "=a" (x)
-    : "a" (x));
-}
-
-    
-static void fft_asmb_sse (int k, complex_t *x, complex_t *wTB,
-	     const complex_t *d, const complex_t *d_3)
-{
-    __asm__ __volatile__ (
-	"pushl %%ebp\n"
-	"movl %%esp, %%ebp\n"
-
-	"subl $4, %%esp\n"
-	
-	"pushl %%eax\n"
-	"pushl %%ebx\n"
-	"pushl %%ecx\n"
-	"pushl %%edx\n"
-	"pushl %%esi\n"
-	"pushl %%edi\n"
-
-	"movl  8(%%ebp), %%ecx\n"   /* k */
-	"movl 12(%%ebp), %%eax\n"   /* x */
-	"movl %%ecx, -4(%%ebp)\n"   /* k */
-	"movl 16(%%ebp), %%ebx\n"   /* wT */
-	"movl 20(%%ebp), %%edx\n"   /* d */
-	"movl 24(%%ebp), %%esi\n"   /* d3 */
-	"shll $4, %%ecx\n"          /* 16k */
-	"addl $8, %%edx\n"
-	"leal (%%eax, %%ecx, 2), %%edi\n"
-	"addl $8, %%esi\n"
-	
-	/* TRANSZERO and TRANS */
-	"movups (%%eax), %%xmm0\n"      /* x[1] | x[0] */
-	"movups (%%ebx), %%xmm1\n"      /* wT[1] | wT[0] */
-	"movups (%%ebx, %%ecx), %%xmm2\n" /* wB[1] | wB[0] */
-	"movlps (%%edx), %%xmm3\n"      /* d */
-	"movlps (%%esi), %%xmm4\n"      /* d3 */
-	"movhlps %%xmm1, %%xmm5\n"      /* wT[1] */
-	"movhlps %%xmm2, %%xmm6\n"      /* wB[1] */
-	"shufps $0x50, %%xmm3, %%xmm3\n" /* d[1].im | d[1].im | d[1].re | d[1].re */
-	"shufps $0x50, %%xmm4, %%xmm4\n" /* d3[1].im | d3[1].im | d3[i].re | d3[i].re */
-	"movlhps %%xmm5, %%xmm5\n"      /* wT[1] | wT[1] */
-	"movlhps %%xmm6, %%xmm6\n"      /* wB[1] | wB[1] */
-	"mulps   %%xmm3, %%xmm5\n"
-	"mulps   %%xmm4, %%xmm6\n"
-	"movhlps %%xmm5, %%xmm7\n"      /* wT[1].im * d[1].im | wT[1].re * d[1].im */
-	"movlhps %%xmm6, %%xmm5\n"      /* wB[1].im * d3[1].re | wB[1].re * d3[1].re | wT[1].im * d[1].re | wT[1].re * d[1].re */
-	"shufps $0xb1, %%xmm6, %%xmm7\n" /* wB[1].re * d3[1].im | wB[i].im * d3[1].im | wT[1].re * d[1].im | wT[1].im * d[1].im */
-	"movl $C_1, %%edi\n"
-	"movups (%%edi), %%xmm4\n"
-	"mulps   %%xmm4, %%xmm7\n"
-	"addps   %%xmm7, %%xmm5\n"      /* wB[1] * d3[1] | wT[1] * d[1] */
-	"movlhps %%xmm5, %%xmm1\n"      /* d[1] * wT[1] | wT[0] */
-	"shufps  $0xe4, %%xmm5, %%xmm2\n" /* d3[1] * wB[1] | wB[0] */
-	"movups  %%xmm1, %%xmm3\n"      /* d[1] * wT[1] | wT[0] */
-	"leal   (%%eax, %%ecx, 2), %%edi\n"
-	"addps  %%xmm2, %%xmm1\n"       /* u */
-	"subps  %%xmm2, %%xmm3\n"       /* v */
-	"mulps  %%xmm4, %%xmm3\n"
-	"movups (%%eax, %%ecx), %%xmm5\n" /* xk[1] | xk[0] */
-	"shufps $0xb1, %%xmm3, %%xmm3\n"  /* -i * v */
-	"movups %%xmm0, %%xmm2\n"         /* x[1] | x[0] */
-	"movups %%xmm5, %%xmm6\n"         /* xk[1] | xk[0] */
-	"addps  %%xmm1, %%xmm0\n"
-	"subps  %%xmm1, %%xmm2\n"
-	"addps  %%xmm3, %%xmm5\n"
-	"subps  %%xmm3, %%xmm6\n"
-	"movups %%xmm0, (%%eax)\n"
-	"movups %%xmm2, (%%edi)\n"
-	"movups %%xmm5, (%%eax, %%ecx)\n"
-	"movups %%xmm6, (%%edi, %%ecx)\n"
-	"addl $16, %%eax\n"
-	"addl $16, %%ebx\n"
-	"addl  $8, %%edx\n"
-	"addl  $8, %%esi\n"
-	"decl -4(%%ebp)\n"
-
-".loop:\n"
-	"movups (%%ebx), %%xmm0\n"      /* wT[1] | wT[0] */
-	"movups (%%edx), %%xmm1\n"      /* d[1] | d[0] */
-
-	"movups (%%ebx, %%ecx), %%xmm4\n" /* wB[1] | wB[0] */
-	"movups (%%esi), %%xmm5\n"      /* d3[1] | d3[0] */
-
-	"movhlps %%xmm0, %%xmm2\n"      /* wT[1] */
-	"movhlps %%xmm1, %%xmm3\n"      /* d[1] */
-
-	"movhlps %%xmm4, %%xmm6\n"      /* wB[1] */
-	"movhlps %%xmm5, %%xmm7\n"      /* d3[1] */
-
-	"shufps $0x50, %%xmm1, %%xmm1\n" /* d[0].im | d[0].im | d[0].re | d[0].re */
-	"shufps $0x50, %%xmm3, %%xmm3\n" /* d[1].im | d[1].im | d[1].re | d[1].re */
-
-	"movlhps %%xmm0, %%xmm0\n"       /* wT[0] | wT[0] */
-	"shufps $0x50, %%xmm5, %%xmm5\n" /* d3[0].im | d3[0].im | d3[0].re | d3[0].re */
-	"movlhps %%xmm2, %%xmm2\n"       /* wT[1] | wT[1] */
-	"shufps $0x50, %%xmm7, %%xmm7\n" /* d3[1].im | d3[1].im | d3[1].re | d3[1].re */
-
-	"mulps   %%xmm1, %%xmm0\n"  /* d[0].im * wT[0].im | d[0].im * wT[0].re | d[0].re * wT[0].im | d[0].re * wT[0].re */
-	"mulps   %%xmm3, %%xmm2\n"  /* d[1].im * wT[1].im | d[1].im * wT[1].re | d[1].re * wT[1].im | d[1].re * wT[1].re */
-	"movlhps %%xmm4, %%xmm4\n"  /* wB[0] | wB[0] */
-	"movlhps %%xmm6, %%xmm6\n"  /* wB[1] | wB[1] */
-    
-	"movhlps %%xmm0, %%xmm1\n"  /* d[0].im * wT[0].im | d[0].im * wT[0].re */
-	"movlhps %%xmm2, %%xmm0\n"  /* d[1].re * wT[1].im | d[1].re * wT[1].re | d[0].re * wT[0].im | d[0].re * wT[0].re */
-	"mulps   %%xmm5, %%xmm4\n"  /* wB[0].im * d3[0].im | wB[0].re * d3[0].im | wB[0].im * d3[0].re | wB[0].re * d3[0].re */
-	"mulps   %%xmm7, %%xmm6\n"  /* wB[1].im * d3[1].im | wB[1].re * d3[1].im | wB[1].im * d3[1].re | wB[1].re * d3[1].re */
-	"shufps $0xb1, %%xmm2, %%xmm1\n"    /* d[1].im * wT[1].re | d[1].im * wT[1].im | d[0].im * wT[0].re | d[0].im * wT[0].im */
-	"movl $C_1, %%edi\n"
-	"movups (%%edi), %%xmm3\n"  /* 1.0 | -1.0 | 1.0 | -1.0 */
-
-	"movhlps %%xmm4, %%xmm5\n"  /* wB[0].im * d3[0].im | wB[0].re * d3[0].im */
-	"mulps   %%xmm3, %%xmm1\n"  /* d[1].im * wT[1].re | -d[1].im * wT[1].im | d[0].im * wT[0].re | -d[0].im * wT[0].im */
-	"movlhps %%xmm6, %%xmm4\n"  /* wB[1].im * d3[1].re | wB[1].re * d3[1].re | wB[0].im * d3[0].re | wB[0].im * d3[0].re */
-	"addps   %%xmm1, %%xmm0\n"  /* wT[1] * d[1] | wT[0] * d[0] */
-
-	"shufps $0xb1, %%xmm6, %%xmm5\n"    /* wB[1].re * d3[1].im | wB[1].im * d3[1].im | wB[0].re * d3[0].im | wB[0].im * d3[0].im */
-	"mulps   %%xmm3, %%xmm5\n"  /* wB[1].re * d3[1].im | -wB[1].im * d3[1].im | wB[0].re * d3[0].im | -wB[0].im * d3[0].im */
-	"addps   %%xmm5, %%xmm4\n"  /* wB[1] * d3[1] | wB[0] * d3[0] */
-
-	"movups %%xmm0, %%xmm1\n"   /* wT[1] * d[1] | wT[0] * d[0] */
-	"addps  %%xmm4, %%xmm0\n"   /* u */
-	"subps  %%xmm4, %%xmm1\n"   /* v */
-	"movups (%%eax), %%xmm6\n"  /* x[1] | x[0] */
-	"leal   (%%eax, %%ecx, 2), %%edi\n"
-	"mulps  %%xmm3, %%xmm1\n"
-	"addl $16, %%ebx\n"
-	"addl $16, %%esi\n"
-	"shufps $0xb1, %%xmm1, %%xmm1\n"    /* -i * v */
-	"movups (%%eax, %%ecx), %%xmm7\n"   /* xk[1] | xk[0] */
-	"movups %%xmm6, %%xmm2\n"
-	"movups %%xmm7, %%xmm4\n"
-	"addps  %%xmm0, %%xmm6\n"
-	"subps  %%xmm0, %%xmm2\n"
-	"movups %%xmm6, (%%eax)\n"
-	"movups %%xmm2, (%%edi)\n"
-	"addps  %%xmm1, %%xmm7\n"
-	"subps  %%xmm1, %%xmm4\n"
-	"addl $16, %%edx\n"
-	"movups %%xmm7, (%%eax, %%ecx)\n"
-	"movups %%xmm4, (%%edi, %%ecx)\n"
-
-	"addl $16, %%eax\n"
-	"decl -4(%%ebp)\n"
-	"jnz .loop\n"
-
-".end:\n"
-	"popl %%edi\n"
-	"popl %%esi\n"
-	"popl %%edx\n"
-	"popl %%ecx\n"
-	"popl %%ebx\n"
-	"popl %%eax\n"
-	
-	"addl $4, %%esp\n"
-
-    "leave\n"
-    ::);
-}
diff --git a/src/audio_output/aout_u8.c b/src/audio_output/aout_u8.c
index 7037471125..209bb235bc 100644
--- a/src/audio_output/aout_u8.c
+++ b/src/audio_output/aout_u8.c
@@ -2,7 +2,7 @@
  * aout_u8.c: 8 bit unsigned audio output functions
  *****************************************************************************
  * Copyright (C) 1999, 2000, 2001 VideoLAN
- * $Id: aout_u8.c,v 1.4 2001/05/06 04:32:02 sam Exp $
+ * $Id: aout_u8.c,v 1.5 2001/05/15 16:19:42 sam Exp $
  *
  * Authors: Michel Kaempf <maxx@via.ecp.fr>
  *
@@ -105,8 +105,6 @@ void aout_U8StereoThread( aout_thread_t * p_aout )
     int i_fifo;
     long l_buffer, l_buffer_limit, l_bytes;
 
-    intf_DbgMsg("adec debug: running audio output U8_S_thread (%p) (pid == %i)", p_aout, getpid());
-
     /* As the s32_buffer was created with calloc(), we don't have to set this
      * memory to zero and we can immediately jump into the thread's loop */
     while ( ! p_aout->b_die )
diff --git a/src/interface/main.c b/src/interface/main.c
index de99fcceb4..cd15fb856a 100644
--- a/src/interface/main.c
+++ b/src/interface/main.c
@@ -4,7 +4,7 @@
  * and spawn threads.
  *****************************************************************************
  * Copyright (C) 1998, 1999, 2000 VideoLAN
- * $Id: main.c,v 1.94 2001/05/14 15:58:04 reno Exp $
+ * $Id: main.c,v 1.95 2001/05/15 16:19:42 sam Exp $
  *
  * Authors: Vincent Seguin <seguin@via.ecp.fr>
  *          Samuel Hocevar <sam@zoy.org>
@@ -113,6 +113,8 @@
 #define OPT_MOTION              181
 #define OPT_IDCT                182
 #define OPT_YUV                 183
+#define OPT_DOWNMIX             184
+#define OPT_IMDCT               185
 
 #define OPT_SYNCHRO             190
 #define OPT_WARNING             191
@@ -143,6 +145,8 @@ static const struct option longopts[] =
     {   "stereo",           0,          0,      OPT_STEREO },
     {   "mono",             0,          0,      OPT_MONO },
     {   "spdif",            0,          0,      OPT_SPDIF },
+    {   "downmix",          1,          0,      OPT_DOWNMIX },
+    {   "imdct",            1,          0,      OPT_IMDCT },
 
     /* Video options */
     {   "novideo",          0,          0,      OPT_NOVIDEO },
@@ -559,6 +563,12 @@ static int GetConfiguration( int *pi_argc, char *ppsz_argv[], char *ppsz_env[] )
         case OPT_SPDIF:                                           /* --spdif */
             main_PutIntVariable( AOUT_SPDIF_VAR, 1 );
             break;
+	case OPT_DOWNMIX:                                       /* --downmix */
+            main_PutPszVariable( DOWNMIX_METHOD_VAR, optarg );
+            break;
+        case OPT_IMDCT:                                           /* --imdct */
+            main_PutPszVariable( IMDCT_METHOD_VAR, optarg );
+            break;
 
         /* Video options */
         case OPT_NOVIDEO:                                       /* --novideo */
@@ -712,6 +722,8 @@ static void Usage( int i_fashion )
           "\n  -A, --aout <module>            \taudio output method"
           "\n      --stereo, --mono           \tstereo/mono audio"
           "\n      --spdif                    \tAC3 pass-through mode"
+          "\n      --downmix <module>         \tAC3 downmix method"
+          "\n      --imdct <module>           \tAC3 IMDCT method"
           "\n"
           "\n      --novideo                  \tdisable video"
           "\n  -V, --vout <module>            \tvideo output method"
@@ -758,6 +770,8 @@ static void Usage( int i_fashion )
         "\n  " AOUT_DSP_VAR "=<filename>              \tdsp device path"
         "\n  " AOUT_STEREO_VAR "={1|0}                \tstereo or mono output"
         "\n  " AOUT_SPDIF_VAR "={1|0}                 \tAC3 pass-through mode"
+        "\n  " DOWNMIX_METHOD_VAR "=<method name>     \tAC3 downmix method"
+        "\n  " IMDCT_METHOD_VAR "=<method name>       \tAC3 IMDCT method"
         "\n  " AOUT_RATE_VAR "=<rate>             \toutput rate" );
 
     /* Video parameters */