]> git.sesse.net Git - vlc/commitdiff
* Use memalign for aligned data (instead of a greek malloc)
authorRenaud Dartus <reno@videolan.org>
Tue, 30 Oct 2001 19:34:53 +0000 (19:34 +0000)
committerRenaud Dartus <reno@videolan.org>
Tue, 30 Oct 2001 19:34:53 +0000 (19:34 +0000)
* Some optimization in imdct (all data are now aligned)
* SSE downmix now works for windows
* SSE imdct is desactivated for windows (MINGW32 doesn't know how to aligned data)

15 files changed:
include/ac3_imdct.h
include/common.h
plugins/imdct/ac3_imdct_sse.c
plugins/imdct/ac3_retables.h
plugins/imdct/ac3_srfft.h
plugins/imdct/ac3_srfft_sse.c
plugins/imdct/imdctsse.c
src/ac3_decoder/ac3_decoder.c
src/ac3_decoder/ac3_decoder.h
src/ac3_decoder/ac3_decoder_thread.c
src/ac3_decoder/ac3_decoder_thread.h
src/ac3_decoder/ac3_imdct.c
src/ac3_decoder/ac3_mantissa.c
src/ac3_decoder/ac3_mantissa.h
src/ac3_decoder/ac3_rematrix.c

index 7a405a34638a77f3e3615852e92c92945361f195..877b05745bba67650f31a6d9b78781b3ee201dec 100644 (file)
@@ -2,7 +2,7 @@
  * ac3_imdct.h : AC3 IMDCT types
  *****************************************************************************
  * Copyright (C) 1999, 2000 VideoLAN
- * $Id: ac3_imdct.h,v 1.5 2001/07/08 23:15:11 reno Exp $
+ * $Id: ac3_imdct.h,v 1.6 2001/10/30 19:34:53 reno Exp $
  *
  * Authors: Michel Kaempf <maxx@via.ecp.fr>
  *          Renaud Dartus <reno@videolan.org>
@@ -31,30 +31,27 @@ typedef struct complex_s {
 
 typedef struct imdct_s
 {
-    complex_t buf[N/4] __attribute__ ((aligned(16)));
+    complex_t * buf;
 
     /* Delay buffer for time domain interleaving */
-    float delay[6][256] __attribute__ ((aligned(16)));
-    float delay1[6][256] __attribute__ ((aligned(16)));
+    float * delay;
+    float * delay1;
 
     /* Twiddle factors for IMDCT */
-    float xcos1[N/4] __attribute__ ((aligned(16)));
-    float xsin1[N/4] __attribute__ ((aligned(16)));
-    float xcos2[N/8] __attribute__ ((aligned(16)));
-    float xsin2[N/8] __attribute__ ((aligned(16)));
-    float xcos_sin_sse[128 * 4] __attribute__ ((aligned(16)));
+    float * xcos1;
+    float * xsin1;
+    float * xcos2;
+    float * xsin2;
+    float * xcos_sin_sse;
    
     /* Twiddle factor LUT */
-    complex_t w_1[1] __attribute__ ((aligned(16)));
-    float used_for_alignement1;
-    float used_for_alignement2;
-    complex_t w_2[2] __attribute__ ((aligned(16)));
-    complex_t w_4[4] __attribute__ ((aligned(16)));
-    complex_t w_8[8] __attribute__ ((aligned(16)));
-    complex_t w_16[16] __attribute__ ((aligned(16)));
-    complex_t w_32[32] __attribute__ ((aligned(16)));
-    complex_t w_64[64] __attribute__ ((aligned(16)));
-    complex_t *w[7] __attribute__ ((aligned(16)));
+    complex_t * w_2;
+    complex_t * w_4;
+    complex_t * w_8;
+    complex_t * w_16;
+    complex_t * w_32;
+    complex_t * w_64;
+    complex_t * w_1;
     
     /* Module used and shortcuts */
     struct module_s * p_module;
index ccae63e682c96c4e15e6672e99cc4e7878e5491b..b34e824f390bc77fa34072e5ea3a766fc9366839 100644 (file)
@@ -3,7 +3,7 @@
  * Collection of useful common types and macros definitions
  *****************************************************************************
  * Copyright (C) 1998, 1999, 2000 VideoLAN
- * $Id: common.h,v 1.42 2001/10/22 12:28:53 massiot Exp $
+ * $Id: common.h,v 1.43 2001/10/30 19:34:53 reno Exp $
  *
  * Authors: Samuel Hocevar <sam@via.ecp.fr>
  *          Vincent Seguin <seguin@via.ecp.fr>
@@ -209,9 +209,15 @@ struct pgrm_descriptor_s;
 #       include <unistd.h>
 #       define memalign(align,size) valloc(size)
 #   else
+#       if defined( __MINGW32__ )
+#           define memalign(align,size) (void *)(((unsigned long)(malloc(size+align-1))+align-1)&~(align-1))
+#       else
         /* Assume malloc alignment is sufficient */
-#       define memalign(align,size) malloc(size)
+#           define memalign(align,size) malloc(size)
+#       endif
 #   endif
+
+    
 #endif
 
 /* win32, cl and icl support */
index f55817611db99e8ccfb70d300682f16a984f2ded..fe155204290be12297b9006737aefbad369ff072 100644 (file)
@@ -2,7 +2,7 @@
  * ac3_imdct_sse.c: accelerated SSE ac3 DCT
  *****************************************************************************
  * Copyright (C) 1999, 2000 VideoLAN
- * $Id: ac3_imdct_sse.c,v 1.5 2001/07/26 20:00:33 reno Exp $
+ * $Id: ac3_imdct_sse.c,v 1.6 2001/10/30 19:34:53 reno Exp $
  *
  * Authors: Renaud Dartus <reno@videolan.org>
  *          Aaron Holtzman <aholtzma@engr.uvic.ca>
@@ -249,7 +249,7 @@ static void imdct512_window_delay_sse (complex_t *buf, float *data_ptr, float *w
     "movlhps %%xmm2, %%xmm0\n"      /* 0.0 | im1 | 0.0 | im0 */
     "movlhps %%xmm3, %%xmm1\n"      /* 0.0 | re1 | 0.0 | re0 */
 
-    "movups (%%edx), %%xmm4\n"      /* w3 | w2 | w1 | w0 */
+    "movaps (%%edx), %%xmm4\n"      /* w3 | w2 | w1 | w0 */
     "movaps (%%ecx), %%xmm5\n"      /* d3 | d2 | d1 | d0 */
     "shufps $0xb1, %%xmm1, %%xmm1\n"/* re1 | 0.0 | re0 | 0.0 */
 
@@ -263,7 +263,7 @@ static void imdct512_window_delay_sse (complex_t *buf, float *data_ptr, float *w
     "movlhps   %%xmm3, %%xmm2\n"    /* 0.0 | re3 | 0.0 | re2 */
     "addps  %%xmm5, %%xmm0\n"
     "shufps $0xb1, %%xmm2, %%xmm2\n"/* re3 | 0.0 | re2 | 0.0 */
-    "movups 16(%%edx), %%xmm4\n"    /* w7 | w6 | w5 | w4 */
+    "movaps 16(%%edx), %%xmm4\n"    /* w7 | w6 | w5 | w4 */
     "movaps 16(%%ecx), %%xmm5\n"    /* d7 | d6 | d5 | d4 */
     "subps  %%xmm2, %%xmm6\n"       /* -re3 | im3 | -re2 | im2 */
     "addl   $32, %%edx\n"
@@ -292,7 +292,7 @@ static void imdct512_window_delay_sse (complex_t *buf, float *data_ptr, float *w
     "movlhps %%xmm2, %%xmm0\n"  /* 0.0 | re1 | 0.0 | re0 */
     "movlhps %%xmm3, %%xmm1\n"  /* 0.0 | im1 | 0.0 | im1 */
 
-    "movups (%%edx), %%xmm4\n"  /* w3 | w2 | w1 | w0 */
+    "movaps (%%edx), %%xmm4\n"  /* w3 | w2 | w1 | w0 */
     "movaps (%%ecx), %%xmm5\n"  /* d3 | d2 | d1 | d0 */
 
     "shufps $0xb1, %%xmm1, %%xmm1\n"/* im1 | 0.0 | im0 | 0.0 */
@@ -305,7 +305,7 @@ static void imdct512_window_delay_sse (complex_t *buf, float *data_ptr, float *w
     "movlhps %%xmm3, %%xmm2\n"      /* 0.0 | im3 | 0.0 | im2 */
     "mulps   %%xmm4, %%xmm0\n"
     "shufps $0xb1, %%xmm2, %%xmm2\n"/* im3 | 0.0 | im2 | 0.0 */
-    "movups 16(%%edx), %%xmm4\n"    /* w7 | w6 | w5 | w4 */
+    "movaps 16(%%edx), %%xmm4\n"    /* w7 | w6 | w5 | w4 */
     "addl   $32, %%esi\n"
     "subps  %%xmm2, %%xmm6\n"       /* -im3 | re3 | -im2 | re2 */
     "addps  %%xmm5, %%xmm0\n"
@@ -336,7 +336,7 @@ static void imdct512_window_delay_sse (complex_t *buf, float *data_ptr, float *w
     "movlhps %%xmm2, %%xmm0\n"      /* 0.0 | re1 | 0.0 | re0 */
     "movlhps %%xmm3, %%xmm1\n"      /* 0.0 | im1 | 0.0 | im0 */
 
-    "movups -16(%%edx), %%xmm4\n"   /* w3 | w2 | w1 | w0 */
+    "movaps -16(%%edx), %%xmm4\n"   /* w3 | w2 | w1 | w0 */
     "shufps $0xb1, %%xmm1, %%xmm1\n"/* im1 | 0.0 | im0 | 0.0 */
     "movss  16(%%esi), %%xmm6\n"    /* re2 */
     "movss  24(%%esi), %%xmm7\n"    /* re3 */
@@ -347,7 +347,7 @@ static void imdct512_window_delay_sse (complex_t *buf, float *data_ptr, float *w
     "movlhps %%xmm7, %%xmm6\n"      /* 0.0 | re3 | 0.0 | re2 */
     "movlhps %%xmm3, %%xmm2\n"      /* 0.0 | im3 | 0.0 | im2 */
     "mulps   %%xmm4, %%xmm0\n"
-    "movups (%%edx), %%xmm5\n"      /* w7 | w6 | w5 | w4 */
+    "movaps (%%edx), %%xmm5\n"      /* w7 | w6 | w5 | w4 */
     "shufps $0xb1, %%xmm2, %%xmm2\n"/* im3 | 0.0 | im2 | 0.0 */
     "movaps %%xmm0, (%%ecx)\n"
     "addl   $32, %%esi\n"
@@ -373,7 +373,7 @@ static void imdct512_window_delay_sse (complex_t *buf, float *data_ptr, float *w
     "movlhps %%xmm2, %%xmm0\n"      /* 0.0 | im1 | 0.0 | im0 */
     "movlhps %%xmm3, %%xmm1\n"      /* 0.0 | re1 | 0.0 | re0 */
 
-    "movups -16(%%edx), %%xmm4\n"   /* w3 | w2 | w1 | w0 */
+    "movaps -16(%%edx), %%xmm4\n"   /* w3 | w2 | w1 | w0 */
     "shufps $0xb1, %%xmm1, %%xmm1\n"/* re1 | 0.0 | re0 | 0.0 */
     "movss  16(%%esi), %%xmm6\n"    /* im2 */
     "movss  24(%%esi), %%xmm7\n"    /* im3 */
@@ -384,7 +384,7 @@ static void imdct512_window_delay_sse (complex_t *buf, float *data_ptr, float *w
     "movlhps %%xmm7, %%xmm6\n"      /* 0.0 | im3 | 0.0 | im2 */
     "movlhps %%xmm3, %%xmm2\n"      /* 0.0 | re3 | 0.0 | re2 */
     "mulps   %%xmm4, %%xmm1\n"
-    "movups (%%edx), %%xmm5\n"      /* w7 | w6 | w5 | w4 */
+    "movaps (%%edx), %%xmm5\n"      /* w7 | w6 | w5 | w4 */
     "shufps $0xb1, %%xmm2, %%xmm2\n"/* re3 | 0.0 | re2 | 0.0 */
     "movaps %%xmm1, (%%ecx)\n"
     "addl   $32, %%esi\n"
@@ -436,7 +436,7 @@ static void imdct512_window_delay_nol_sse (complex_t *buf, float *data_ptr, floa
     "movlhps %%xmm2, %%xmm0\n"      /* 0.0 | im1 | 0.0 | im0 */
     "movlhps %%xmm3, %%xmm1\n"      /* 0.0 | re1 | 0.0 | re0 */
 
-    "movups (%%edx), %%xmm4\n"      /* w3 | w2 | w1 | w0 */
+    "movaps (%%edx), %%xmm4\n"      /* w3 | w2 | w1 | w0 */
     "shufps $0xb1, %%xmm1, %%xmm1\n"/* re1 | 0.0 | re0 | 0.0 */
 
     "movss  16(%%esi), %%xmm6\n"    /* im2 */
@@ -448,7 +448,7 @@ static void imdct512_window_delay_nol_sse (complex_t *buf, float *data_ptr, floa
     "movlhps %%xmm7, %%xmm6\n"      /* 0.0 | im3 | 0.0 | im2 */
     "movlhps %%xmm3, %%xmm2\n"      /* 0.0 | re3 | 0.0 | re2 */
     "shufps $0xb1, %%xmm2, %%xmm2\n"/* re3 | 0.0 | re2 | 0.0 */
-    "movups 16(%%edx), %%xmm4\n"    /* w7 | w6 | w5 | w4 */
+    "movaps 16(%%edx), %%xmm4\n"    /* w7 | w6 | w5 | w4 */
     "subps  %%xmm2, %%xmm6\n"       /* -re3 | im3 | -re2 | im2 */
     "addl   $32, %%edx\n"
     "movaps %%xmm0, (%%eax)\n"
@@ -474,7 +474,7 @@ static void imdct512_window_delay_nol_sse (complex_t *buf, float *data_ptr, floa
     "movlhps %%xmm2, %%xmm0\n"      /* 0.0 | re1 | 0.0 | re0 */
     "movlhps %%xmm3, %%xmm1\n"      /* 0.0 | im1 | 0.0 | im1 */
     
-    "movups (%%edx), %%xmm4\n"      /* w3 | w2 | w1 | w0 */
+    "movaps (%%edx), %%xmm4\n"      /* w3 | w2 | w1 | w0 */
 
     "shufps $0xb1, %%xmm1, %%xmm1\n"/* im1 | 0.0 | im0 | 0.0 */
     "movss  16(%%esi), %%xmm6\n"    /* re2 */
@@ -486,7 +486,7 @@ static void imdct512_window_delay_nol_sse (complex_t *buf, float *data_ptr, floa
     "movlhps %%xmm3, %%xmm2\n"      /* 0.0 | im3 | 0.0 | im2 */
     "mulps   %%xmm4, %%xmm0\n"
     "shufps $0xb1, %%xmm2, %%xmm2\n"/* im3 | 0.0 | im2 | 0.0 */
-    "movups 16(%%edx), %%xmm4\n"    /* w7 | w6 | w5 | w4 */
+    "movaps 16(%%edx), %%xmm4\n"    /* w7 | w6 | w5 | w4 */
     "addl   $32, %%esi\n"
     "subps  %%xmm2, %%xmm6\n"       /* -im3 | re3 | -im2 | re2 */
     "mulps  %%xmm4, %%xmm6\n"
@@ -513,7 +513,7 @@ static void imdct512_window_delay_nol_sse (complex_t *buf, float *data_ptr, floa
     "movlhps %%xmm2, %%xmm0\n"  /* 0.0 | re1 | 0.0 | re0 */
     "movlhps %%xmm3, %%xmm1\n"  /* 0.0 | im1 | 0.0 | im0 */
 
-    "movups -16(%%edx), %%xmm4\n"   /* w3 | w2 | w1 | w0 */
+    "movaps -16(%%edx), %%xmm4\n"   /* w3 | w2 | w1 | w0 */
     "shufps $0xb1, %%xmm1, %%xmm1\n"/* im1 | 0.0 | im0 | 0.0 */
     "movss  16(%%esi), %%xmm6\n"    /* re2 */
     "movss  24(%%esi), %%xmm7\n"    /* re3 */
@@ -524,7 +524,7 @@ static void imdct512_window_delay_nol_sse (complex_t *buf, float *data_ptr, floa
     "movlhps %%xmm7, %%xmm6\n"      /* 0.0 | re3 | 0.0 | re2 */
     "movlhps %%xmm3, %%xmm2\n"      /* 0.0 | im3 | 0.0 | im2 */
     "mulps   %%xmm4, %%xmm0\n"
-    "movups (%%edx), %%xmm5\n"      /* w7 | w6 | w5 | w4 */
+    "movaps (%%edx), %%xmm5\n"      /* w7 | w6 | w5 | w4 */
     "shufps $0xb1, %%xmm2, %%xmm2\n"/* im3 | 0.0 | im2 | 0.0 */
     "movaps %%xmm0, (%%ecx)\n"
     "addl   $32, %%esi\n"
@@ -550,7 +550,7 @@ static void imdct512_window_delay_nol_sse (complex_t *buf, float *data_ptr, floa
     "movlhps %%xmm2, %%xmm0\n"  /* 0.0 | im1 | 0.0 | im0 */
     "movlhps %%xmm3, %%xmm1\n"  /* 0.0 | re1 | 0.0 | re0 */
 
-    "movups -16(%%edx), %%xmm4\n"   /* w3 | w2 | w1 | w0 */
+    "movaps -16(%%edx), %%xmm4\n"   /* w3 | w2 | w1 | w0 */
     "shufps $0xb1, %%xmm1, %%xmm1\n"/* re1 | 0.0 | re0 | 0.0 */
     "movss  16(%%esi), %%xmm6\n"    /* im2 */
     "movss  24(%%esi), %%xmm7\n"    /* im3 */
@@ -561,7 +561,7 @@ static void imdct512_window_delay_nol_sse (complex_t *buf, float *data_ptr, floa
     "movlhps %%xmm7, %%xmm6\n"      /* 0.0 | im3 | 0.0 | im2 */
     "movlhps %%xmm3, %%xmm2\n"      /* 0.0 | re3 | 0.0 | re2 */
     "mulps   %%xmm4, %%xmm1\n"
-    "movups (%%edx), %%xmm5\n"      /* w7 | w6 | w5 | w4 */
+    "movaps (%%edx), %%xmm5\n"      /* w7 | w6 | w5 | w4 */
     "shufps $0xb1, %%xmm2, %%xmm2\n"/* re3 | 0.0 | re2 | 0.0 */
     "movaps %%xmm1, (%%ecx)\n"
     "addl   $32, %%esi\n"
index 50e1d1c55d7e68c407c94051bba9c3c02f28cbc2..2ce8282e96eba35831658fc7a8632ae1fba0a487 100644 (file)
@@ -2,7 +2,7 @@
  * ac3_retables.h: ac3 DCT tables
  *****************************************************************************
  * Copyright (C) 1999, 2000 VideoLAN
- * $Id: ac3_retables.h,v 1.1 2001/05/16 14:51:29 reno Exp $
+ * $Id: ac3_retables.h,v 1.2 2001/10/30 19:34:53 reno Exp $
  *
  * Authors: Renaud Dartus <reno@videolan.org>
  *          Aaron Holtzman <aholtzma@engr.uvic.ca>
@@ -22,7 +22,7 @@
  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
  *****************************************************************************/
 
-static float window[] = {
+static float window[] ATTR_ALIGN(16) = {
     0.00014, 0.00024, 0.00037, 0.00051, 0.00067, 0.00086, 0.00107, 0.00130,
     0.00157, 0.00187, 0.00220, 0.00256, 0.00297, 0.00341, 0.00390, 0.00443,
     0.00501, 0.00564, 0.00632, 0.00706, 0.00785, 0.00871, 0.00962, 0.01061,
index 27a2511676691c38f39a518534d1a95c88c7a564..248d946d32078a47f53a4de6447d7d0d95b70dfd 100644 (file)
@@ -2,7 +2,7 @@
  * ac3_srfft.h: ac3 FFT tables
  *****************************************************************************
  * Copyright (C) 1999, 2000 VideoLAN
- * $Id: ac3_srfft.h,v 1.1 2001/05/15 16:19:42 sam Exp $
+ * $Id: ac3_srfft.h,v 1.2 2001/10/30 19:34:53 reno Exp $
  *
  * Authors: Renaud Dartus <reno@videolan.org>
  *          Aaron Holtzman <aholtzma@engr.uvic.ca>
  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
  *****************************************************************************/
 
-static const complex_t delta16[4] = 
+static const complex_t delta16[4] ATTR_ALIGN(16) 
  { {1.00000000000000,  0.00000000000000},
    {0.92387953251129, -0.38268343236509},
    {0.70710678118655, -0.70710678118655},
    {0.38268343236509, -0.92387953251129}};
 
-static const complex_t delta16_3[4] = 
+static const complex_t delta16_3[4] ATTR_ALIGN(16) 
  { {1.00000000000000,  0.00000000000000},
    {0.38268343236509, -0.92387953251129},
    {-0.70710678118655, -0.70710678118655},
    {-0.92387953251129, 0.38268343236509}};
 
-static const complex_t delta32[8] = 
+static const complex_t delta32[8] ATTR_ALIGN(16) 
  { {1.00000000000000,  0.00000000000000},
    {0.98078528040323, -0.19509032201613},
    {0.92387953251129, -0.38268343236509},
@@ -44,7 +44,7 @@ static const complex_t delta32[8] =
    {0.38268343236509, -0.92387953251129},
    {0.19509032201613, -0.98078528040323}};
 
-static const complex_t delta32_3[8] = 
+static const complex_t delta32_3[8] ATTR_ALIGN(16) 
  { {1.00000000000000,  0.00000000000000},
    {0.83146961230255, -0.55557023301960},
    {0.38268343236509, -0.92387953251129},
@@ -54,7 +54,7 @@ static const complex_t delta32_3[8] =
    {-0.92387953251129, 0.38268343236509},
    {-0.55557023301960, 0.83146961230255}};
 
-static const complex_t delta64[16] = 
+static const complex_t delta64[16] ATTR_ALIGN(16) 
  { {1.00000000000000,  0.00000000000000},
    {0.99518472667220, -0.09801714032956},
    {0.98078528040323, -0.19509032201613},
@@ -72,7 +72,7 @@ static const complex_t delta64[16] =
    {0.19509032201613, -0.98078528040323},
    {0.09801714032956, -0.99518472667220}};
 
-static const complex_t delta64_3[16] = 
+static const complex_t delta64_3[16] ATTR_ALIGN(16) 
  { {1.00000000000000,  0.00000000000000},
    {0.95694033573221, -0.29028467725446},
    {0.83146961230255, -0.55557023301960},
@@ -90,7 +90,7 @@ static const complex_t delta64_3[16] =
    {-0.55557023301960, 0.83146961230255},
    {-0.29028467725446, 0.95694033573221}};
 
-static const complex_t delta128[32] = 
+static const complex_t delta128[32] ATTR_ALIGN(16) 
  { {1.00000000000000,  0.00000000000000},
    {0.99879545620517, -0.04906767432742},
    {0.99518472667220, -0.09801714032956},
@@ -124,7 +124,7 @@ static const complex_t delta128[32] =
    {0.09801714032956, -0.99518472667220},
    {0.04906767432742, -0.99879545620517}};
 
-static const complex_t delta128_3[32] = 
+static const complex_t delta128_3[32] ATTR_ALIGN(16) 
  { {1.00000000000000,  0.00000000000000},
    {0.98917650996478, -0.14673047445536},
    {0.95694033573221, -0.29028467725446},
index 8f2443b6440cce68dcab0379327bbcfea7f6c657..29d6995495d415db521ab6e2443f95fbc325c64e 100644 (file)
@@ -2,7 +2,7 @@
  * ac3_srfft_sse.c: accelerated SSE ac3 fft functions
  *****************************************************************************
  * Copyright (C) 1999, 2000, 2001 VideoLAN
- * $Id: ac3_srfft_sse.c,v 1.5 2001/07/26 20:00:33 reno Exp $
+ * $Id: ac3_srfft_sse.c,v 1.6 2001/10/30 19:34:53 reno Exp $
  *
  * Authors: Renaud Dartus <reno@videolan.org>
  *          Aaron Holtzman <aholtzma@engr.uvic.ca>
@@ -43,8 +43,6 @@
 #include "ac3_imdct.h"
 #include "ac3_srfft.h"
 
-void hsqrt2_sse (void) __asm__ ("hsqrt2_sse");
-void C_1_sse (void) __asm__ ("C_1_sse");
 static void fft_4_sse (complex_t *x);
 static void fft_8_sse (complex_t *x);
 static void fft_asmb_sse (int k, complex_t *x, complex_t *wTB,
@@ -104,25 +102,11 @@ void _M( fft_128p ) ( complex_t *a )
     fft_asmb_sse(16, &a[0], &a[64], &delta128[0], &delta128_3[0]);
 }
 
-void hsqrt2_sse (void)
-{
-    __asm__ __volatile__ (
-    ".float 0f0.707106781188\n"
-    ".float 0f0.707106781188\n"
-    ".float 0f-0.707106781188\n"
-    ".float 0f-0.707106781188\n"
-    );
-}
+static float hsqrt2_sse[] ATTR_ALIGN(16) =
+    { 0.707106781188, 0.707106781188, -0.707106781188, -0.707106781188 };
 
-void C_1_sse (void)
-{
-    __asm__ __volatile__ (
-    ".float 0f-1.0\n"
-    ".float 0f1.0\n"
-    ".float 0f-1.0\n"
-    ".float 0f1.0\n"
-    );
-}
+static float C_1_sse[] ATTR_ALIGN(16) =
+    { -1.0, 1.0, -1.0, 1.0 };
 
 static void fft_4_sse (complex_t *x)
 {
@@ -153,7 +137,6 @@ static void fft_8_sse (complex_t *x)
 {
     __asm__ __volatile__ (
     ".align 16\n"
-    "pushl   %%ebx\n"
     
     "movlps   (%%eax), %%xmm0\n"    /* x[0] */
     "movlps 32(%%eax), %%xmm1\n"    /* x[4] */
@@ -176,13 +159,12 @@ static void fft_8_sse (complex_t *x)
     "subps   %%xmm5, %%xmm7\n"      /* yb = i*(x6-x2)+x0-x4 | -x6-x2+x0+x4 */
 
     "movhps 24(%%eax), %%xmm1\n"    /* x[3] | x[1] */
-    "movl   $hsqrt2_sse, %%ebx\n"
     "movlps 40(%%eax), %%xmm2\n"    /* x[5] */
     "movhps 56(%%eax), %%xmm2\n"    /* x[7] | x[5] */
     "movaps  %%xmm1, %%xmm3\n"      /* x[3] | x[1] */
     "addps   %%xmm2, %%xmm1\n"      /* x[3] + x[7] | x[1] + x[5] */
     "subps   %%xmm2, %%xmm3\n"      /* x[3] - x[7] | x[1] - x[5] */
-    "movups (%%ebx), %%xmm4\n"      /* -1/sqrt2 | -1/sqrt2 | 1/sqrt2 | 1/sqrt2 */
+    "movaps (%%ecx), %%xmm4\n"      /* -1/sqrt2 | -1/sqrt2 | 1/sqrt2 | 1/sqrt2 */
     "movaps  %%xmm3, %%xmm6\n"      /* x[3] - x[7] | x[1] - x[5] */
     "mulps   %%xmm4, %%xmm3\n"      /* -1/s2*(x[3] - x[7]) | 1/s2*(x[1] - x[5]) */
     "shufps   $0xc8, %%xmm4, %%xmm4\n" /* -1/sqrt2 | 1/sqrt2 | -1/sqrt2 | 1/sqrt2 */
@@ -193,12 +175,11 @@ static void fft_8_sse (complex_t *x)
     "movlhps %%xmm6, %%xmm1\n"      /* (1+i)/sqrt2 * (x[1]-x[5]) | x[1]+x[5] */
     "shufps   $0xe4, %%xmm6, %%xmm5\n" /* (-1-i)/sqrt2 * (x[3]-x[7]) | x[3]+x[7] */
     "movaps  %%xmm1, %%xmm3\n"      /* (1-i)/sqrt2 * (x[1]-x[5]) | x[1]+x[5] */
-    "movl  $C_1_sse, %%ebx\n"
     "addps   %%xmm5, %%xmm1\n"      /* u */
     "subps   %%xmm5, %%xmm3\n"      /* v */
     "movaps  %%xmm0, %%xmm2\n"      /* yb */
     "movaps  %%xmm7, %%xmm4\n"      /* yt */
-    "movups (%%ebx), %%xmm5\n"
+    "movaps (%%edx), %%xmm5\n"
     "mulps   %%xmm5, %%xmm3\n"
     "addps   %%xmm1, %%xmm0\n"      /* yt + u */
     "subps   %%xmm1, %%xmm2\n"      /* yt - u */
@@ -210,17 +191,16 @@ static void fft_8_sse (complex_t *x)
     "movaps  %%xmm4, 16(%%eax)\n"
     "movaps  %%xmm7, 48(%%eax)\n"
 
-    "popl    %%ebx\n"
     : "=a" (x)
-    : "a" (x));
+    : "a" (x), "c" (hsqrt2_sse), "d" (C_1_sse));
 }
 
-    
 static void fft_asmb_sse (int k, complex_t *x, complex_t *wTB,
          const complex_t *d, const complex_t *d_3)
 {
     __asm__ __volatile__ (
     ".align 16\n"
+    "pushl %%esp\n"
     "pushl %%ebp\n"
     "movl %%esp, %%ebp\n"
 
@@ -256,14 +236,13 @@ static void fft_asmb_sse (int k, complex_t *x, complex_t *wTB,
     "movhlps %%xmm5, %%xmm7\n"      /* wT[1].im * d[1].im | wT[1].re * d[1].im */
     "movlhps %%xmm6, %%xmm5\n"      /* wB[1].im * d3[1].re | wB[1].re * d3[1].re | wT[1].im * d[1].re | wT[1].re * d[1].re */
     "shufps $0xb1, %%xmm6, %%xmm7\n" /* wB[1].re * d3[1].im | wB[i].im * d3[1].im | wT[1].re * d[1].im | wT[1].im * d[1].im */
-    "movl  $C_1_sse, %%ebx\n"
     "movaps (%%ebx), %%xmm4\n"
     "mulps   %%xmm4, %%xmm7\n"
     "addps   %%xmm7, %%xmm5\n"      /* wB[1] * d3[1] | wT[1] * d[1] */
     "movlhps %%xmm5, %%xmm1\n"      /* d[1] * wT[1] | wT[0] */
     "shufps  $0xe4, %%xmm5, %%xmm2\n" /* d3[1] * wB[1] | wB[0] */
     "movaps  %%xmm1, %%xmm3\n"      /* d[1] * wT[1] | wT[0] */
-    "leal   (%%eax, %%ecx, 2), %%ebx\n"
+    "leal   (%%eax, %%ecx, 2), %%esp\n"
     "addps  %%xmm2, %%xmm1\n"       /* u */
     "subps  %%xmm2, %%xmm3\n"       /* v */
     "mulps  %%xmm4, %%xmm3\n"
@@ -276,9 +255,9 @@ static void fft_asmb_sse (int k, complex_t *x, complex_t *wTB,
     "addps  %%xmm3, %%xmm5\n"
     "subps  %%xmm3, %%xmm6\n"
     "movaps %%xmm0, (%%eax)\n"
-    "movaps %%xmm2, (%%ebx)\n"
+    "movaps %%xmm2, (%%esp)\n"
     "movaps %%xmm5, (%%eax, %%ecx)\n"
-    "movaps %%xmm6, (%%ebx, %%ecx)\n"
+    "movaps %%xmm6, (%%esp, %%ecx)\n"
     "addl $16, %%eax\n"
     "addl $16, %%edi\n"
     "addl  $8, %%edx\n"
@@ -317,7 +296,6 @@ static void fft_asmb_sse (int k, complex_t *x, complex_t *wTB,
     "mulps   %%xmm5, %%xmm4\n"  /* wB[0].im * d3[0].im | wB[0].re * d3[0].im | wB[0].im * d3[0].re | wB[0].re * d3[0].re */
     "mulps   %%xmm7, %%xmm6\n"  /* wB[1].im * d3[1].im | wB[1].re * d3[1].im | wB[1].im * d3[1].re | wB[1].re * d3[1].re */
     "shufps $0xb1, %%xmm2, %%xmm1\n" /* d[1].im * wT[1].re | d[1].im * wT[1].im | d[0].im * wT[0].re | d[0].im * wT[0].im */
-    "movl  $C_1_sse, %%ebx\n"
     "movaps (%%ebx), %%xmm3\n"  /* 1.0 | -1.0 | 1.0 | -1.0 */
 
     "movhlps %%xmm4, %%xmm5\n"  /* wB[0].im * d3[0].im | wB[0].re * d3[0].im */
@@ -333,7 +311,7 @@ static void fft_asmb_sse (int k, complex_t *x, complex_t *wTB,
     "addps  %%xmm4, %%xmm0\n"   /* u */
     "subps  %%xmm4, %%xmm1\n"   /* v */
     "movaps (%%eax), %%xmm6\n"  /* x[1] | x[0] */
-    "leal   (%%eax, %%ecx, 2), %%ebx\n"
+    "leal   (%%eax, %%ecx, 2), %%esp\n"
     "mulps  %%xmm3, %%xmm1\n"
     "addl $16, %%edi\n"
     "addl $16, %%esi\n"
@@ -344,12 +322,12 @@ static void fft_asmb_sse (int k, complex_t *x, complex_t *wTB,
     "addps  %%xmm0, %%xmm6\n"
     "subps  %%xmm0, %%xmm2\n"
     "movaps %%xmm6, (%%eax)\n"
-    "movaps %%xmm2, (%%ebx)\n"
+    "movaps %%xmm2, (%%esp)\n"
     "addps  %%xmm1, %%xmm7\n"
     "subps  %%xmm1, %%xmm4\n"
     "addl $16, %%edx\n"
     "movaps %%xmm7, (%%eax, %%ecx)\n"
-    "movaps %%xmm4, (%%ebx, %%ecx)\n"
+    "movaps %%xmm4, (%%esp, %%ecx)\n"
 
     "addl $16, %%eax\n"
     "decl -4(%%ebp)\n"
@@ -367,7 +345,7 @@ static void fft_asmb_sse (int k, complex_t *x, complex_t *wTB,
     "addl $4, %%esp\n"
 
     "leave\n"
+    "popl %%esp\n"
     : "=c" (k), "=a" (x), "=D" (wTB)
-    : "c" (k), "a" (x), "D" (wTB), "d" (d), "S" (d_3));
+    : "c" (k), "a" (x), "D" (wTB), "d" (d), "S" (d_3), "b" (C_1_sse) );
 }
-
index 0ad87c8956f5a61c084809ec2f384b368e438bd6..5d6c59b45e58f8ffe9ae303243a57e84d97730b1 100644 (file)
@@ -2,7 +2,7 @@
  * imdctsse.c : accelerated SSE IMDCT module
  *****************************************************************************
  * Copyright (C) 1999, 2000 VideoLAN
- * $Id: imdctsse.c,v 1.6 2001/07/11 02:01:04 sam Exp $
+ * $Id: imdctsse.c,v 1.7 2001/10/30 19:34:53 reno Exp $
  *
  * Authors: Gaël Hendryckx <jimmy@via.ecp.fr>
  *
@@ -93,6 +93,9 @@ static void imdct_getfunctions( function_list_t * p_function_list )
  *****************************************************************************/
 static int imdct_Probe( probedata_t *p_data )
 {
+#if defined ( __MINGW32__ )
+    return 0;
+#else
     if( !TestCPU( CPU_CAPABILITY_SSE ) )
     {
         return( 0 );
@@ -106,5 +109,6 @@ static int imdct_Probe( probedata_t *p_data )
 
     /* This plugin always works */
     return( 200 );
+#endif
 }
 
index c0bb86dfb49221df572ae5ff8d68dc7c20e795da..99db0767f4014027a201e2d28933397b1684f405 100644 (file)
@@ -2,7 +2,7 @@
  * ac3_decoder.c: core ac3 decoder
  *****************************************************************************
  * Copyright (C) 1999, 2000 VideoLAN
- * $Id: ac3_decoder.c,v 1.34 2001/05/15 16:19:42 sam Exp $
+ * $Id: ac3_decoder.c,v 1.35 2001/10/30 19:34:53 reno Exp $
  *
  * Authors: Michel Kaempf <maxx@via.ecp.fr>
  *          Michel Lespinasse <walken@zoy.org>
@@ -55,7 +55,7 @@ static const float smixlev_lut[4] = { 0.707, 0.500, 0.0  , 0.500 };
 int ac3_init (ac3dec_t * p_ac3dec)
 {
     p_ac3dec->mantissa.lfsr_state = 1;          /* dither_gen initialization */
-    imdct_init(&p_ac3dec->imdct);
+    imdct_init(p_ac3dec->imdct);
     
     return 0;
 }
index be0cef2a315588c22af82eb23d9488ba14a9fd36..71882df09b6df8571edbd505274c23b61055259f 100644 (file)
@@ -2,7 +2,7 @@
  * ac3_decoder.h : ac3 decoder interface
  *****************************************************************************
  * Copyright (C) 1999, 2000 VideoLAN
- * $Id: ac3_decoder.h,v 1.11 2001/07/08 23:15:11 reno Exp $
+ * $Id: ac3_decoder.h,v 1.12 2001/10/30 19:34:53 reno Exp $
  *
  * Authors: Michel Kaempf <maxx@via.ecp.fr>
  *          Renaud Dartus <reno@videolan.org>
@@ -354,8 +354,8 @@ typedef struct mantissa_s
 
 struct ac3dec_s
 {
-    float               samples[6][256] __attribute__ ((aligned(16)));
-    imdct_t             imdct __attribute__ ((aligned(16)));
+    float *             samples;
+    imdct_t *           imdct;
 
     /*
      * Input properties
index e8db6e9a126c2cd9be10dd76dc67e40076d5acf5..13d9bd08daea526172b88b1a3d315716fe4f15a1 100644 (file)
@@ -2,7 +2,7 @@
  * ac3_decoder_thread.c: ac3 decoder thread
  *****************************************************************************
  * Copyright (C) 1999, 2000 VideoLAN
- * $Id: ac3_decoder_thread.c,v 1.37 2001/09/05 16:46:10 massiot Exp $
+ * $Id: ac3_decoder_thread.c,v 1.38 2001/10/30 19:34:53 reno Exp $
  *
  * Authors: Michel Lespinasse <walken@zoy.org>
  *
@@ -78,42 +78,37 @@ static void     BitstreamCallback       ( bit_stream_t *p_bit_stream,
 vlc_thread_t ac3dec_CreateThread( adec_config_t * p_config )
 {
     ac3dec_thread_t *   p_ac3thread;
-    ac3dec_thread_t *   p_ac3thread_temp;
 
     intf_DbgMsg( "ac3dec debug: creating ac3 decoder thread" );
 
     /* Allocate the memory needed to store the thread's structure */
-    p_ac3thread_temp = (ac3dec_thread_t *)malloc(sizeof(ac3dec_thread_t) + 15);
-    memset( p_ac3thread_temp, 0, sizeof(ac3dec_thread_t) + 15 );
+    p_ac3thread = (ac3dec_thread_t *)memalign(16, sizeof(ac3dec_thread_t));
 
-    /* We need to be 16 bytes aligned */
-    p_ac3thread = (ac3dec_thread_t *)(((unsigned long)p_ac3thread_temp + 15)
-                                     & ~0xFUL );
-    p_ac3thread->ac3thread = p_ac3thread_temp;
-    
     if(p_ac3thread == NULL)
     {
         intf_ErrMsg ( "ac3dec error: not enough memory "
                       "for ac3dec_CreateThread() to create the new thread");
         return 0;
     }
-    
+   
     /*
      * Initialize the thread properties
      */
     p_ac3thread->p_config = p_config;
     p_ac3thread->p_fifo = p_config->decoder_config.p_decoder_fifo;
+    p_ac3thread->ac3_decoder = memalign(16, sizeof(ac3dec_t));
 
     /*
      * Choose the best downmix module
      */
-#define DOWNMIX p_ac3thread->ac3_decoder.downmix
+#define DOWNMIX p_ac3thread->ac3_decoder->downmix
     DOWNMIX.p_module = module_Need( MODULE_CAPABILITY_DOWNMIX, NULL );
 
     if( DOWNMIX.p_module == NULL )
     {
         intf_ErrMsg( "ac3dec error: no suitable downmix module" );
-        free( p_ac3thread->ac3thread );
+        free( p_ac3thread->ac3_decoder );
+        free( p_ac3thread );
         return( 0 );
     }
 
@@ -131,28 +126,49 @@ vlc_thread_t ac3dec_CreateThread( adec_config_t * p_config )
     /*
      * Choose the best IMDCT module
      */
-#define IMDCT p_ac3thread->ac3_decoder.imdct
-    IMDCT.p_module = module_Need( MODULE_CAPABILITY_IMDCT, NULL );
+    p_ac3thread->ac3_decoder->imdct = memalign(16, sizeof(imdct_t));
+    
+#define IMDCT p_ac3thread->ac3_decoder->imdct
+    IMDCT->p_module = module_Need( MODULE_CAPABILITY_IMDCT, NULL );
 
-    if( IMDCT.p_module == NULL )
+    if( IMDCT->p_module == NULL )
     {
         intf_ErrMsg( "ac3dec error: no suitable IMDCT module" );
-        module_Unneed( p_ac3thread->ac3_decoder.downmix.p_module );
-        free( p_ac3thread->ac3thread );
+        module_Unneed( p_ac3thread->ac3_decoder->downmix.p_module );
+        free( p_ac3thread->ac3_decoder->imdct );
+        free( p_ac3thread->ac3_decoder );
+        free( p_ac3thread );
         return( 0 );
     }
 
-#define F IMDCT.p_module->p_functions->imdct.functions.imdct
-    IMDCT.pf_imdct_init    = F.pf_imdct_init;
-    IMDCT.pf_imdct_256     = F.pf_imdct_256;
-    IMDCT.pf_imdct_256_nol = F.pf_imdct_256_nol;
-    IMDCT.pf_imdct_512     = F.pf_imdct_512;
-    IMDCT.pf_imdct_512_nol = F.pf_imdct_512_nol;
+#define F IMDCT->p_module->p_functions->imdct.functions.imdct
+    IMDCT->pf_imdct_init    = F.pf_imdct_init;
+    IMDCT->pf_imdct_256     = F.pf_imdct_256;
+    IMDCT->pf_imdct_256_nol = F.pf_imdct_256_nol;
+    IMDCT->pf_imdct_512     = F.pf_imdct_512;
+    IMDCT->pf_imdct_512_nol = F.pf_imdct_512_nol;
 #undef F
 #undef IMDCT
 
     /* Initialize the ac3 decoder structures */
-    ac3_init (&p_ac3thread->ac3_decoder);
+    p_ac3thread->ac3_decoder->samples = memalign(16, 6 * 256 * sizeof(float));
+    p_ac3thread->ac3_decoder->imdct->buf = memalign(16, N/4 * sizeof(complex_t));
+    p_ac3thread->ac3_decoder->imdct->delay = memalign(16, 6 * 256 * sizeof(float));
+    p_ac3thread->ac3_decoder->imdct->delay1 = memalign(16, 6 * 256 * sizeof(float));
+    p_ac3thread->ac3_decoder->imdct->xcos1 = memalign(16, N/4 * sizeof(float));
+    p_ac3thread->ac3_decoder->imdct->xsin1 = memalign(16, N/4 * sizeof(float));
+    p_ac3thread->ac3_decoder->imdct->xcos2 = memalign(16, N/8 * sizeof(float));
+    p_ac3thread->ac3_decoder->imdct->xsin2 = memalign(16, N/8 * sizeof(float));
+    p_ac3thread->ac3_decoder->imdct->xcos_sin_sse = memalign(16, 128 * 4 * sizeof(float));
+    p_ac3thread->ac3_decoder->imdct->w_2 = memalign(16, 2 * sizeof(complex_t));
+    p_ac3thread->ac3_decoder->imdct->w_4 = memalign(16, 4 * sizeof(complex_t));
+    p_ac3thread->ac3_decoder->imdct->w_8 = memalign(16, 8 * sizeof(complex_t));
+    p_ac3thread->ac3_decoder->imdct->w_16 = memalign(16, 16 * sizeof(complex_t));
+    p_ac3thread->ac3_decoder->imdct->w_32 = memalign(16, 32 * sizeof(complex_t));
+    p_ac3thread->ac3_decoder->imdct->w_64 = memalign(16, 64 * sizeof(complex_t));
+    p_ac3thread->ac3_decoder->imdct->w_1 = memalign(16, sizeof(complex_t));
+
+    ac3_init (p_ac3thread->ac3_decoder);
 
     /*
      * Initialize the output properties
@@ -164,9 +180,27 @@ vlc_thread_t ac3dec_CreateThread( adec_config_t * p_config )
                 (vlc_thread_func_t)RunThread, (void *)p_ac3thread))
     {
         intf_ErrMsg( "ac3dec error: can't spawn ac3 decoder thread" );
-        module_Unneed( p_ac3thread->ac3_decoder.downmix.p_module );
-        module_Unneed( p_ac3thread->ac3_decoder.imdct.p_module );
-        free (p_ac3thread->ac3thread);
+        module_Unneed( p_ac3thread->ac3_decoder->downmix.p_module );
+        module_Unneed( p_ac3thread->ac3_decoder->imdct->p_module );
+        free( p_ac3thread->ac3_decoder->imdct->w_1 );
+        free( p_ac3thread->ac3_decoder->imdct->w_64 );
+        free( p_ac3thread->ac3_decoder->imdct->w_32 );
+        free( p_ac3thread->ac3_decoder->imdct->w_16 );
+        free( p_ac3thread->ac3_decoder->imdct->w_8 );
+        free( p_ac3thread->ac3_decoder->imdct->w_4 );
+        free( p_ac3thread->ac3_decoder->imdct->w_2 );
+        free( p_ac3thread->ac3_decoder->imdct->xcos_sin_sse );
+        free( p_ac3thread->ac3_decoder->imdct->xsin2 );
+        free( p_ac3thread->ac3_decoder->imdct->xcos2 );
+        free( p_ac3thread->ac3_decoder->imdct->xsin1 );
+        free( p_ac3thread->ac3_decoder->imdct->xcos1 );
+        free( p_ac3thread->ac3_decoder->imdct->delay1 );
+        free( p_ac3thread->ac3_decoder->imdct->delay );
+        free( p_ac3thread->ac3_decoder->imdct->buf );
+        free( p_ac3thread->ac3_decoder->samples );
+        free( p_ac3thread->ac3_decoder->imdct );
+        free( p_ac3thread->ac3_decoder );
+        free( p_ac3thread );
         return 0;
     }
 
@@ -184,7 +218,7 @@ static int InitThread (ac3dec_thread_t * p_ac3thread)
     intf_DbgMsg("ac3dec debug: initializing ac3 decoder thread %p",p_ac3thread);
 
     p_ac3thread->p_config->decoder_config.pf_init_bit_stream(
-            &p_ac3thread->ac3_decoder.bit_stream,
+            &p_ac3thread->ac3_decoder->bit_stream,
             p_ac3thread->p_config->decoder_config.p_decoder_fifo,
             BitstreamCallback, (void *) p_ac3thread );
 
@@ -228,7 +262,7 @@ static void RunThread (ac3dec_thread_t * p_ac3thread)
 
         if (!sync) {
             do {
-                GetBits(&p_ac3thread->ac3_decoder.bit_stream,8);
+                GetBits(&p_ac3thread->ac3_decoder->bit_stream,8);
             } while ((!p_ac3thread->sync_ptr) && (!p_ac3thread->p_fifo->b_die)
                     && (!p_ac3thread->p_fifo->b_error));
             
@@ -237,7 +271,7 @@ static void RunThread (ac3dec_thread_t * p_ac3thread)
             while(ptr-- && (!p_ac3thread->p_fifo->b_die)
                 && (!p_ac3thread->p_fifo->b_error))
             {
-                p_ac3thread->ac3_decoder.bit_stream.p_byte++;
+                p_ac3thread->ac3_decoder->bit_stream.p_byte++;
             }
                         
             /* we are in sync now */
@@ -254,7 +288,7 @@ static void RunThread (ac3dec_thread_t * p_ac3thread)
                 LAST_MDATE;
         }
     
-        if (ac3_sync_frame (&p_ac3thread->ac3_decoder, &sync_info))
+        if (ac3_sync_frame (p_ac3thread->ac3_decoder, &sync_info))
         {
             sync = 0;
             goto bad_frame;
@@ -265,7 +299,7 @@ static void RunThread (ac3dec_thread_t * p_ac3thread)
         buffer = ((s16 *)p_ac3thread->p_aout_fifo->buffer) + 
             (p_ac3thread->p_aout_fifo->l_end_frame * AC3DEC_FRAME_SIZE);
 
-        if (ac3_decode_frame (&p_ac3thread->ac3_decoder, buffer))
+        if (ac3_decode_frame (p_ac3thread->ac3_decoder, buffer))
         {
             sync = 0;
             goto bad_frame;
@@ -278,7 +312,7 @@ static void RunThread (ac3dec_thread_t * p_ac3thread)
         vlc_mutex_unlock (&p_ac3thread->p_aout_fifo->data_lock);
 
         bad_frame:
-            RealignBits(&p_ac3thread->ac3_decoder.bit_stream);
+            RealignBits(&p_ac3thread->ac3_decoder->bit_stream);
     }
 
     /* If b_error is set, the ac3 decoder thread enters the error loop */
@@ -339,12 +373,30 @@ static void EndThread (ac3dec_thread_t * p_ac3thread)
     }
 
     /* Unlock the modules */
-    module_Unneed( p_ac3thread->ac3_decoder.downmix.p_module );
-    module_Unneed( p_ac3thread->ac3_decoder.imdct.p_module );
+    module_Unneed( p_ac3thread->ac3_decoder->downmix.p_module );
+    module_Unneed( p_ac3thread->ac3_decoder->imdct->p_module );
 
     /* Destroy descriptor */
+    free( p_ac3thread->ac3_decoder->imdct->w_1 );
+    free( p_ac3thread->ac3_decoder->imdct->w_64 );
+    free( p_ac3thread->ac3_decoder->imdct->w_32 );
+    free( p_ac3thread->ac3_decoder->imdct->w_16 );
+    free( p_ac3thread->ac3_decoder->imdct->w_8 );
+    free( p_ac3thread->ac3_decoder->imdct->w_4 );
+    free( p_ac3thread->ac3_decoder->imdct->w_2 );
+    free( p_ac3thread->ac3_decoder->imdct->xcos_sin_sse );
+    free( p_ac3thread->ac3_decoder->imdct->xsin2 );
+    free( p_ac3thread->ac3_decoder->imdct->xcos2 );
+    free( p_ac3thread->ac3_decoder->imdct->xsin1 );
+    free( p_ac3thread->ac3_decoder->imdct->xcos1 );
+    free( p_ac3thread->ac3_decoder->imdct->delay1 );
+    free( p_ac3thread->ac3_decoder->imdct->delay );
+    free( p_ac3thread->ac3_decoder->imdct->buf );
+    free( p_ac3thread->ac3_decoder->samples );
+    free( p_ac3thread->ac3_decoder->imdct );
+    free( p_ac3thread->ac3_decoder );
     free( p_ac3thread->p_config );
-    free( p_ac3thread->ac3thread );
+    free( p_ac3thread );
 
     intf_DbgMsg ("ac3dec debug: ac3 decoder thread %p destroyed", p_ac3thread);
 }
index 896ad88038704cfeef5ae5ad949768f84468719a..600a2555166295a77f4182f9c8c9acad61a36655 100644 (file)
@@ -2,7 +2,7 @@
  * ac3_decoder_thread.h : ac3 decoder thread interface
  *****************************************************************************
  * Copyright (C) 1999, 2000 VideoLAN
- * $Id: ac3_decoder_thread.h,v 1.9 2001/08/05 15:32:46 gbazin Exp $
+ * $Id: ac3_decoder_thread.h,v 1.10 2001/10/30 19:34:53 reno Exp $
  *
  * Authors: Michel Kaempf <maxx@via.ecp.fr>
  *
@@ -30,9 +30,7 @@ typedef struct ac3dec_thread_s
     /*
      * Decoder properties
      */
-    float used_for_alignement1;
-    float used_for_alignement2;
-    ac3dec_t            ac3_decoder __attribute__ ((aligned(16)));
+    ac3dec_t *          ac3_decoder;
     
     /*
      * Thread properties
@@ -51,8 +49,6 @@ typedef struct ac3dec_thread_s
      */
     aout_fifo_t *       p_aout_fifo; /* stores the decompressed audio frames */
 
-    struct ac3dec_thread_s *   ac3thread;            /* save the old pointer */
-    
 } ac3dec_thread_t;
 
 /*****************************************************************************
index 6ad5fbc72473d80f8097e6a7cc5052221091074d..01287bda4f1493fa298231412af300f0d0aeac33 100644 (file)
@@ -2,7 +2,7 @@
  * ac3_imdct.c: ac3 DCT
  *****************************************************************************
  * Copyright (C) 1999, 2000 VideoLAN
- * $Id: ac3_imdct.c,v 1.21 2001/05/16 14:51:29 reno Exp $
+ * $Id: ac3_imdct.c,v 1.22 2001/10/30 19:34:53 reno Exp $
  *
  * Authors: Michel Kaempf <maxx@via.ecp.fr>
  *          Aaron Holtzman <aholtzma@engr.uvic.ca>
@@ -76,11 +76,11 @@ void imdct (ac3dec_t * p_ac3dec, s16 * buffer)
     /* test if dm in frequency is doable */
     if (!(doable = p_ac3dec->audblk.blksw[0]))
     {
-        do_imdct = p_ac3dec->imdct.pf_imdct_512;
+        do_imdct = p_ac3dec->imdct->pf_imdct_512;
     }
     else
     {
-        do_imdct = p_ac3dec->imdct.pf_imdct_256;
+        do_imdct = p_ac3dec->imdct->pf_imdct_256;
     }
 
     /* downmix in the frequency domain if all the channels
@@ -100,28 +100,28 @@ void imdct (ac3dec_t * p_ac3dec, s16 * buffer)
         switch(p_ac3dec->bsi.acmod)
         {
             case 7:        /* 3/2 */
-                p_ac3dec->downmix.pf_downmix_3f_2r_to_2ch (p_ac3dec->samples[0], &p_ac3dec->dm_par);
+                p_ac3dec->downmix.pf_downmix_3f_2r_to_2ch (p_ac3dec->samples, &p_ac3dec->dm_par);
                 break;
             case 6:        /* 2/2 */
-                p_ac3dec->downmix.pf_downmix_2f_2r_to_2ch (p_ac3dec->samples[0], &p_ac3dec->dm_par);
+                p_ac3dec->downmix.pf_downmix_2f_2r_to_2ch (p_ac3dec->samples, &p_ac3dec->dm_par);
                 break;
             case 5:        /* 3/1 */
-                p_ac3dec->downmix.pf_downmix_3f_1r_to_2ch (p_ac3dec->samples[0], &p_ac3dec->dm_par);
+                p_ac3dec->downmix.pf_downmix_3f_1r_to_2ch (p_ac3dec->samples, &p_ac3dec->dm_par);
                 break;
             case 4:        /* 2/1 */
-                p_ac3dec->downmix.pf_downmix_2f_1r_to_2ch (p_ac3dec->samples[0], &p_ac3dec->dm_par);
+                p_ac3dec->downmix.pf_downmix_2f_1r_to_2ch (p_ac3dec->samples, &p_ac3dec->dm_par);
                 break;
             case 3:        /* 3/0 */
-                p_ac3dec->downmix.pf_downmix_3f_0r_to_2ch (p_ac3dec->samples[0], &p_ac3dec->dm_par);
+                p_ac3dec->downmix.pf_downmix_3f_0r_to_2ch (p_ac3dec->samples, &p_ac3dec->dm_par);
                 break;
             case 2:
                 break;
             default:    /* 1/0 */
 //                if (p_ac3dec->bsi.acmod == 1)
-                    center = p_ac3dec->samples[0];
+                    center = p_ac3dec->samples;
 //                else if (p_ac3dec->bsi.acmod == 0)
 //                  center = samples[ac3_config.dual_mono_ch_sel];
-                do_imdct(&p_ac3dec->imdct, center, p_ac3dec->imdct.delay[0]); /* no downmix*/
+                do_imdct(p_ac3dec->imdct, center, p_ac3dec->imdct->delay); /* no downmix*/
     
                 p_ac3dec->downmix.pf_stream_sample_1ch_to_s16 (buffer, center);
 
@@ -129,9 +129,9 @@ void imdct (ac3dec_t * p_ac3dec, s16 * buffer)
                 break;
         }
 
-        do_imdct (&p_ac3dec->imdct, p_ac3dec->samples[0], p_ac3dec->imdct.delay[0]);
-        do_imdct (&p_ac3dec->imdct, p_ac3dec->samples[1], p_ac3dec->imdct.delay[1]);
-        p_ac3dec->downmix.pf_stream_sample_2ch_to_s16(buffer, p_ac3dec->samples[0], p_ac3dec->samples[1]);
+        do_imdct (p_ac3dec->imdct, p_ac3dec->samples, p_ac3dec->imdct->delay);
+        do_imdct (p_ac3dec->imdct, p_ac3dec->samples+256, p_ac3dec->imdct->delay+256);
+        p_ac3dec->downmix.pf_stream_sample_2ch_to_s16(buffer, p_ac3dec->samples, p_ac3dec->samples+256);
 
     } else {
         /* imdct and then downmix
@@ -141,27 +141,27 @@ void imdct (ac3dec_t * p_ac3dec, s16 * buffer)
         {
             if (p_ac3dec->audblk.blksw[i])
                 /* There is only a C function */
-                p_ac3dec->imdct.pf_imdct_256_nol (&p_ac3dec->imdct, p_ac3dec->samples[i], p_ac3dec->imdct.delay1[i]);
+                p_ac3dec->imdct->pf_imdct_256_nol (p_ac3dec->imdct, p_ac3dec->samples+256*i, p_ac3dec->imdct->delay1+256*i);
             else
-                p_ac3dec->imdct.pf_imdct_512_nol (&p_ac3dec->imdct, p_ac3dec->samples[i], p_ac3dec->imdct.delay1[i]);
+                p_ac3dec->imdct->pf_imdct_512_nol (p_ac3dec->imdct, p_ac3dec->samples+256*i, p_ac3dec->imdct->delay1+256*i);
         }
 
         /* mix the sample, overlap */
         switch(p_ac3dec->bsi.acmod)
         {
             case 7:        /* 3/2 */
-                left = p_ac3dec->samples[0];
-                center = p_ac3dec->samples[1];
-                right = p_ac3dec->samples[2];
-                left_sur = p_ac3dec->samples[3];
-                right_sur = p_ac3dec->samples[4];
-                delay_left = p_ac3dec->imdct.delay[0];
-                delay_right = p_ac3dec->imdct.delay[1];
-                delay1_left = p_ac3dec->imdct.delay1[0];
-                delay1_center = p_ac3dec->imdct.delay1[1];
-                delay1_right = p_ac3dec->imdct.delay1[2];
-                delay1_sl = p_ac3dec->imdct.delay1[3];
-                delay1_sr = p_ac3dec->imdct.delay1[4];
+                left = p_ac3dec->samples;
+                center = p_ac3dec->samples+256;
+                right = p_ac3dec->samples+2*256;
+                left_sur = p_ac3dec->samples+3*256;
+                right_sur = p_ac3dec->samples+4*256;
+                delay_left = p_ac3dec->imdct->delay;
+                delay_right = p_ac3dec->imdct->delay+256;
+                delay1_left = p_ac3dec->imdct->delay1;
+                delay1_center = p_ac3dec->imdct->delay1+256;
+                delay1_right = p_ac3dec->imdct->delay1+2*256;
+                delay1_sl = p_ac3dec->imdct->delay1+3*256;
+                delay1_sr = p_ac3dec->imdct->delay1+4*256;
     
                 for (i = 0; i < 256; i++) {
                     left_tmp = p_ac3dec->dm_par.unit * *left++  + p_ac3dec->dm_par.clev * *center  + p_ac3dec->dm_par.slev * *left_sur++;
@@ -173,16 +173,16 @@ void imdct (ac3dec_t * p_ac3dec, s16 * buffer)
                 }
                 break;
             case 6:        /* 2/2 */
-                left = p_ac3dec->samples[0];
-                right = p_ac3dec->samples[1];
-                left_sur = p_ac3dec->samples[2];
-                right_sur = p_ac3dec->samples[3];
-                delay_left = p_ac3dec->imdct.delay[0];
-                delay_right = p_ac3dec->imdct.delay[1];
-                delay1_left = p_ac3dec->imdct.delay1[0];
-                delay1_right = p_ac3dec->imdct.delay1[1];
-                delay1_sl = p_ac3dec->imdct.delay1[2];
-                delay1_sr = p_ac3dec->imdct.delay1[3];
+                left = p_ac3dec->samples;
+                right = p_ac3dec->samples+256;
+                left_sur = p_ac3dec->samples+2*256;
+                right_sur = p_ac3dec->samples+3*256;
+                delay_left = p_ac3dec->imdct->delay;
+                delay_right = p_ac3dec->imdct->delay+256;
+                delay1_left = p_ac3dec->imdct->delay1;
+                delay1_right = p_ac3dec->imdct->delay1+256;
+                delay1_sl = p_ac3dec->imdct->delay1+2*256;
+                delay1_sr = p_ac3dec->imdct->delay1+3*256;
     
                 for (i = 0; i < 256; i++) {
                     left_tmp = p_ac3dec->dm_par.unit * *left++  + p_ac3dec->dm_par.slev * *left_sur++;
@@ -194,16 +194,16 @@ void imdct (ac3dec_t * p_ac3dec, s16 * buffer)
                 }
                 break;
             case 5:        /* 3/1 */
-                left = p_ac3dec->samples[0];
-                center = p_ac3dec->samples[1];
-                right = p_ac3dec->samples[2];
-                right_sur = p_ac3dec->samples[3];
-                delay_left = p_ac3dec->imdct.delay[0];
-                delay_right = p_ac3dec->imdct.delay[1];
-                delay1_left = p_ac3dec->imdct.delay1[0];
-                delay1_center = p_ac3dec->imdct.delay1[1];
-                delay1_right = p_ac3dec->imdct.delay1[2];
-                delay1_sl = p_ac3dec->imdct.delay1[3];
+                left = p_ac3dec->samples;
+                center = p_ac3dec->samples+256;
+                right = p_ac3dec->samples+2*256;
+                right_sur = p_ac3dec->samples+3*256;
+                delay_left = p_ac3dec->imdct->delay;
+                delay_right = p_ac3dec->imdct->delay+256;
+                delay1_left = p_ac3dec->imdct->delay1;
+                delay1_center = p_ac3dec->imdct->delay1+256;
+                delay1_right = p_ac3dec->imdct->delay1+2*256;
+                delay1_sl = p_ac3dec->imdct->delay1+3*256;
     
                 for (i = 0; i < 256; i++) {
                     left_tmp = p_ac3dec->dm_par.unit * *left++  + p_ac3dec->dm_par.clev * *center  - p_ac3dec->dm_par.slev * *right_sur;
@@ -215,14 +215,14 @@ void imdct (ac3dec_t * p_ac3dec, s16 * buffer)
                 }
                 break;
             case 4:        /* 2/1 */
-                left = p_ac3dec->samples[0];
-                right = p_ac3dec->samples[1];
-                right_sur = p_ac3dec->samples[2];
-                delay_left = p_ac3dec->imdct.delay[0];
-                delay_right = p_ac3dec->imdct.delay[1];
-                delay1_left = p_ac3dec->imdct.delay1[0];
-                delay1_right = p_ac3dec->imdct.delay1[1];
-                delay1_sl = p_ac3dec->imdct.delay1[2];
+                left = p_ac3dec->samples;
+                right = p_ac3dec->samples+256;
+                right_sur = p_ac3dec->samples+2*256;
+                delay_left = p_ac3dec->imdct->delay;
+                delay_right = p_ac3dec->imdct->delay+256;
+                delay1_left = p_ac3dec->imdct->delay1;
+                delay1_right = p_ac3dec->imdct->delay1+256;
+                delay1_sl = p_ac3dec->imdct->delay1+2*256;
     
                 for (i = 0; i < 256; i++) {
                     left_tmp = p_ac3dec->dm_par.unit * *left++ - p_ac3dec->dm_par.slev * *right_sur;
@@ -234,14 +234,14 @@ void imdct (ac3dec_t * p_ac3dec, s16 * buffer)
                 }
                 break;
             case 3:        /* 3/0 */
-                left = p_ac3dec->samples[0];
-                center = p_ac3dec->samples[1];
-                right = p_ac3dec->samples[2];
-                delay_left = p_ac3dec->imdct.delay[0];
-                delay_right = p_ac3dec->imdct.delay[1];
-                delay1_left = p_ac3dec->imdct.delay1[0];
-                   delay1_center = p_ac3dec->imdct.delay1[1];
-                delay1_right = p_ac3dec->imdct.delay1[2];
+                left = p_ac3dec->samples;
+                center = p_ac3dec->samples+256;
+                right = p_ac3dec->samples+2*256;
+                delay_left = p_ac3dec->imdct->delay;
+                delay_right = p_ac3dec->imdct->delay+256;
+                delay1_left = p_ac3dec->imdct->delay1;
+                delay1_center = p_ac3dec->imdct->delay1+256;
+                delay1_right = p_ac3dec->imdct->delay1+2*256;
 
                 for (i = 0; i < 256; i++) {
                     left_tmp = p_ac3dec->dm_par.unit * *left++  + p_ac3dec->dm_par.clev * *center;
@@ -254,8 +254,8 @@ void imdct (ac3dec_t * p_ac3dec, s16 * buffer)
                 break;
             case 2:        /* copy to output */
                 for (i = 0; i < 256; i++) {
-                    *buffer++ = (s16)p_ac3dec->samples[0][i];
-                    *buffer++ = (s16)p_ac3dec->samples[1][i];
+                    *buffer++ = (s16) *(p_ac3dec->samples+i);
+                    *buffer++ = (s16) *(p_ac3dec->samples+256+i);
                 }
                 break;
         }
index 2f829bfa24955062ab87a47ccee2bdb15bfcbdb9..ffc80f6258eb088a562f9b703baa85a75fef89cc 100644 (file)
@@ -2,7 +2,7 @@
  * ac3_mantissa.c: ac3 mantissa computation
  *****************************************************************************
  * Copyright (C) 1999, 2000, 2001 VideoLAN
- * $Id: ac3_mantissa.c,v 1.29 2001/05/15 16:19:42 sam Exp $
+ * $Id: ac3_mantissa.c,v 1.30 2001/10/30 19:34:53 reno Exp $
  *
  * Authors: Michel Kaempf <maxx@via.ecp.fr>
  *          Aaron Holtzman <aholtzma@engr.uvic.ca>
@@ -59,7 +59,7 @@ void mantissa_unpack (ac3dec_t * p_ac3dec)
 
     for (i=0; i< p_ac3dec->bsi.nfchans; i++) {
         for (j=0; j < p_ac3dec->audblk.endmant[i]; j++)
-            p_ac3dec->samples[i][j] = coeff_get_float(p_ac3dec, p_ac3dec->audblk.fbw_bap[i][j],
+            *(p_ac3dec->samples+i*256+j) = coeff_get_float(p_ac3dec, p_ac3dec->audblk.fbw_bap[i][j],
                     p_ac3dec->audblk.dithflag[i], p_ac3dec->audblk.fbw_exp[i][j]);
 
         if (p_ac3dec->audblk.cplinu && p_ac3dec->audblk.chincpl[i] && !(done_cpl)) {
@@ -85,7 +85,7 @@ void mantissa_unpack (ac3dec_t * p_ac3dec)
     if (p_ac3dec->bsi.lfeon) {
         /* There are always 7 mantissas for lfe, no dither for lfe */
         for (j=0; j < 7 ; j++)
-            p_ac3dec->samples[5][j] = coeff_get_float(p_ac3dec, p_ac3dec->audblk.lfe_bap[j],
+            *(p_ac3dec->samples+5*256+j) = coeff_get_float(p_ac3dec, p_ac3dec->audblk.lfe_bap[j],
                     0, p_ac3dec->audblk.lfe_exp[j]);
     }
 }
index 687fb66eb95e99343747922e73d25649f27a2da2..cb6a32ff8782c5c39a2b01fef6557d175552111b 100644 (file)
@@ -2,7 +2,7 @@
  * ac3_mantissa.h: ac3 mantissa computation
  *****************************************************************************
  * Copyright (C) 1999, 2000, 2001 VideoLAN
- * $Id: ac3_mantissa.h,v 1.4 2001/05/15 16:19:42 sam Exp $
+ * $Id: ac3_mantissa.h,v 1.5 2001/10/30 19:34:53 reno Exp $
  *
  * Authors: Michel Kaempf <maxx@via.ecp.fr>
  *          Aaron Holtzman <aholtzma@engr.uvic.ca>
@@ -392,10 +392,10 @@ static __inline__ void uncouple_channel (ac3dec_t * p_ac3dec, u32 ch)
              * so the channels are uncorrelated */
             if (p_ac3dec->audblk.dithflag[ch] && !p_ac3dec->audblk.cpl_bap[i])
             {
-                p_ac3dec->samples[ch][i] = cpl_coord * dither_gen(&p_ac3dec->mantissa) *
+                *(p_ac3dec->samples+ch*256+i) = cpl_coord * dither_gen(&p_ac3dec->mantissa) *
                     scale_factor[p_ac3dec->audblk.cpl_exp[i]];
             } else {
-                p_ac3dec->samples[ch][i]  = cpl_coord * p_ac3dec->audblk.cpl_flt[i];
+                *(p_ac3dec->samples+ch*256+i) = cpl_coord * p_ac3dec->audblk.cpl_flt[i];
             }
             i++;
         }
index d9aca1cb1884d81569f44402222bc863ee51d2eb..11be6a7c3bc6f8124a83ec5e2720e79acedcfe91 100644 (file)
@@ -2,7 +2,7 @@
  * ac3_rematrix.c: ac3 audio rematrixing
  *****************************************************************************
  * Copyright (C) 1999, 2000 VideoLAN
- * $Id: ac3_rematrix.c,v 1.18 2001/05/15 16:19:42 sam Exp $
+ * $Id: ac3_rematrix.c,v 1.19 2001/10/30 19:34:53 reno Exp $
  *
  * Authors: Michel Kaempf <maxx@via.ecp.fr>
  *          Aaron Holtzman <aholtzma@engr.uvic.ca>
@@ -78,10 +78,10 @@ void rematrix (ac3dec_t * p_ac3dec)
         end = min_value(rematrix_band[i].end ,12 * p_ac3dec->audblk.cplbegf + 36);
 
         for (j=start;j < end; j++) {
-            left  = 0.5f * (p_ac3dec->samples[0][j] + p_ac3dec->samples[1][j]);
-            right = 0.5f * (p_ac3dec->samples[0][j] - p_ac3dec->samples[1][j]);
-            p_ac3dec->samples[0][j] = left;
-            p_ac3dec->samples[1][j] = right;
+            left  = 0.5f * ( *(p_ac3dec->samples+j) + *(p_ac3dec->samples+256+j) );
+            right = 0.5f * ( *(p_ac3dec->samples+j) - *(p_ac3dec->samples+256+j) );
+            *(p_ac3dec->samples+j) = left;
+            *(p_ac3dec->samples+256+j) = right;
         }
     }
 }