git.sesse.net Git - vlc/blob - src/ac3_decoder/ac3_imdct.c

   1 /*****************************************************************************
   2  * ac3_imdct.c: ac3 DCT
   3  *****************************************************************************
   4  * Copyright (C) 1999, 2000 VideoLAN
   5  *
   6  * Authors:
   7  *
   8  * This program is free software; you can redistribute it and/or modify
   9  * it under the terms of the GNU General Public License as published by
  10  * the Free Software Foundation; either version 2 of the License, or
  11  * (at your option) any later version.
  12  *
  13  * This program is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16  * GNU General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU General Public License
  19  * along with this program; if not, write to the Free Software
  20  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
  21  *****************************************************************************/
  22 #include "defs.h"
  23
  24 #include <math.h>
  25
  26 #include "int_types.h"
  27 #include "ac3_decoder.h"
  28 #include "ac3_internal.h"
  29
  30 void imdct_do_256(float x[],float y[],float delay[]);
  31 void imdct_do_512(float x[],float y[],float delay[]);
  32
  33 typedef struct complex_s {
  34     float real;
  35     float imag;
  36 } complex_t;
  37
  38 #define N 512
  39
  40 static complex_t buf[N/4];
  41
  42 /* 128 point bit-reverse LUT */
  43 static u8 bit_reverse_512[] = {
  44     0x00, 0x40, 0x20, 0x60, 0x10, 0x50, 0x30, 0x70,
  45     0x08, 0x48, 0x28, 0x68, 0x18, 0x58, 0x38, 0x78,
  46     0x04, 0x44, 0x24, 0x64, 0x14, 0x54, 0x34, 0x74,
  47     0x0c, 0x4c, 0x2c, 0x6c, 0x1c, 0x5c, 0x3c, 0x7c,
  48     0x02, 0x42, 0x22, 0x62, 0x12, 0x52, 0x32, 0x72,
  49     0x0a, 0x4a, 0x2a, 0x6a, 0x1a, 0x5a, 0x3a, 0x7a,
  50     0x06, 0x46, 0x26, 0x66, 0x16, 0x56, 0x36, 0x76,
  51     0x0e, 0x4e, 0x2e, 0x6e, 0x1e, 0x5e, 0x3e, 0x7e,
  52     0x01, 0x41, 0x21, 0x61, 0x11, 0x51, 0x31, 0x71,
  53     0x09, 0x49, 0x29, 0x69, 0x19, 0x59, 0x39, 0x79,
  54     0x05, 0x45, 0x25, 0x65, 0x15, 0x55, 0x35, 0x75,
  55     0x0d, 0x4d, 0x2d, 0x6d, 0x1d, 0x5d, 0x3d, 0x7d,
  56     0x03, 0x43, 0x23, 0x63, 0x13, 0x53, 0x33, 0x73,
  57     0x0b, 0x4b, 0x2b, 0x6b, 0x1b, 0x5b, 0x3b, 0x7b,
  58     0x07, 0x47, 0x27, 0x67, 0x17, 0x57, 0x37, 0x77,
  59     0x0f, 0x4f, 0x2f, 0x6f, 0x1f, 0x5f, 0x3f, 0x7f};
  60
  61 static u8 bit_reverse_256[] = {
  62     0x00, 0x20, 0x10, 0x30, 0x08, 0x28, 0x18, 0x38,
  63     0x04, 0x24, 0x14, 0x34, 0x0c, 0x2c, 0x1c, 0x3c,
  64     0x02, 0x22, 0x12, 0x32, 0x0a, 0x2a, 0x1a, 0x3a,
  65     0x06, 0x26, 0x16, 0x36, 0x0e, 0x2e, 0x1e, 0x3e,
  66     0x01, 0x21, 0x11, 0x31, 0x09, 0x29, 0x19, 0x39,
  67     0x05, 0x25, 0x15, 0x35, 0x0d, 0x2d, 0x1d, 0x3d,
  68     0x03, 0x23, 0x13, 0x33, 0x0b, 0x2b, 0x1b, 0x3b,
  69     0x07, 0x27, 0x17, 0x37, 0x0f, 0x2f, 0x1f, 0x3f};
  70
  71 /* Twiddle factor LUT */
  72 static complex_t *w[7];
  73 static complex_t w_1[1];
  74 static complex_t w_2[2];
  75 static complex_t w_4[4];
  76 static complex_t w_8[8];
  77 static complex_t w_16[16];
  78 static complex_t w_32[32];
  79 static complex_t w_64[64];
  80
  81 /* Twiddle factors for IMDCT */
  82 static float xcos1[N/4];
  83 static float xsin1[N/4];
  84 static float xcos2[N/8];
  85 static float xsin2[N/8];
  86
  87 /* Delay buffer for time domain interleaving */
  88 static float delay[6][256];
  89
  90 /* Windowing function for Modified DCT - Thank you acroread */
  91 static float window[] = {
  92     0.00014, 0.00024, 0.00037, 0.00051, 0.00067, 0.00086, 0.00107, 0.00130,
  93     0.00157, 0.00187, 0.00220, 0.00256, 0.00297, 0.00341, 0.00390, 0.00443,
  94     0.00501, 0.00564, 0.00632, 0.00706, 0.00785, 0.00871, 0.00962, 0.01061,
  95     0.01166, 0.01279, 0.01399, 0.01526, 0.01662, 0.01806, 0.01959, 0.02121,
  96     0.02292, 0.02472, 0.02662, 0.02863, 0.03073, 0.03294, 0.03527, 0.03770,
  97     0.04025, 0.04292, 0.04571, 0.04862, 0.05165, 0.05481, 0.05810, 0.06153,
  98     0.06508, 0.06878, 0.07261, 0.07658, 0.08069, 0.08495, 0.08935, 0.09389,
  99     0.09859, 0.10343, 0.10842, 0.11356, 0.11885, 0.12429, 0.12988, 0.13563,
 100     0.14152, 0.14757, 0.15376, 0.16011, 0.16661, 0.17325, 0.18005, 0.18699,
 101     0.19407, 0.20130, 0.20867, 0.21618, 0.22382, 0.23161, 0.23952, 0.24757,
 102     0.25574, 0.26404, 0.27246, 0.28100, 0.28965, 0.29841, 0.30729, 0.31626,
 103     0.32533, 0.33450, 0.34376, 0.35311, 0.36253, 0.37204, 0.38161, 0.39126,
 104     0.40096, 0.41072, 0.42054, 0.43040, 0.44030, 0.45023, 0.46020, 0.47019,
 105     0.48020, 0.49022, 0.50025, 0.51028, 0.52031, 0.53033, 0.54033, 0.55031,
 106     0.56026, 0.57019, 0.58007, 0.58991, 0.59970, 0.60944, 0.61912, 0.62873,
 107     0.63827, 0.64774, 0.65713, 0.66643, 0.67564, 0.68476, 0.69377, 0.70269,
 108     0.71150, 0.72019, 0.72877, 0.73723, 0.74557, 0.75378, 0.76186, 0.76981,
 109     0.77762, 0.78530, 0.79283, 0.80022, 0.80747, 0.81457, 0.82151, 0.82831,
 110     0.83496, 0.84145, 0.84779, 0.85398, 0.86001, 0.86588, 0.87160, 0.87716,
 111     0.88257, 0.88782, 0.89291, 0.89785, 0.90264, 0.90728, 0.91176, 0.91610,
 112     0.92028, 0.92432, 0.92822, 0.93197, 0.93558, 0.93906, 0.94240, 0.94560,
 113     0.94867, 0.95162, 0.95444, 0.95713, 0.95971, 0.96217, 0.96451, 0.96674,
 114     0.96887, 0.97089, 0.97281, 0.97463, 0.97635, 0.97799, 0.97953, 0.98099,
 115     0.98236, 0.98366, 0.98488, 0.98602, 0.98710, 0.98811, 0.98905, 0.98994,
 116     0.99076, 0.99153, 0.99225, 0.99291, 0.99353, 0.99411, 0.99464, 0.99513,
 117     0.99558, 0.99600, 0.99639, 0.99674, 0.99706, 0.99736, 0.99763, 0.99788,
 118     0.99811, 0.99831, 0.99850, 0.99867, 0.99882, 0.99895, 0.99908, 0.99919,
 119     0.99929, 0.99938, 0.99946, 0.99953, 0.99959, 0.99965, 0.99969, 0.99974,
 120     0.99978, 0.99981, 0.99984, 0.99986, 0.99988, 0.99990, 0.99992, 0.99993,
 121     0.99994, 0.99995, 0.99996, 0.99997, 0.99998, 0.99998, 0.99998, 0.99999,
 122     0.99999, 0.99999, 0.99999, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000,
 123     1.00000, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000 };
 124
 125 static __inline__ void swap_cmplx(complex_t *a, complex_t *b)
 126 {
 127     complex_t tmp;
 128
 129     tmp = *a;
 130     *a = *b;
 131     *b = tmp;
 132 }
 133
 134 static __inline__ complex_t cmplx_mult(complex_t a, complex_t b)
 135 {
 136     complex_t ret;
 137
 138     ret.real = a.real * b.real - a.imag * b.imag;
 139     ret.imag = a.real * b.imag + a.imag * b.real;
 140
 141     return ret;
 142 }
 143
 144 static void imdct_init(void) __attribute__ ((__constructor__));
 145 static void imdct_init(void)
 146 {
 147     int i,k;
 148     complex_t angle_step;
 149     complex_t current_angle;
 150
 151     /* Twiddle factors to turn IFFT into IMDCT */
 152     for (i=0; i < N/4; i++) {
 153         xcos1[i] = -cos(2 * M_PI * (8*i+1)/(8*N)) ;
 154         xsin1[i] = -sin(2 * M_PI * (8*i+1)/(8*N)) ;
 155     }
 156
 157     /* More twiddle factors to turn IFFT into IMDCT */
 158     for (i=0; i < N/8; i++) {
 159         xcos2[i] = -cos(2 * M_PI * (8*i+1)/(4*N)) ;
 160         xsin2[i] = -sin(2 * M_PI * (8*i+1)/(4*N)) ;
 161     }
 162
 163     /* Canonical twiddle factors for FFT */
 164     w[0] = w_1;
 165     w[1] = w_2;
 166     w[2] = w_4;
 167     w[3] = w_8;
 168     w[4] = w_16;
 169     w[5] = w_32;
 170     w[6] = w_64;
 171
 172     for (i = 0; i < 7; i++) {
 173         angle_step.real = cos(-2.0f * M_PI / (1 << (i+1)));
 174         angle_step.imag = sin(-2.0f * M_PI / (1 << (i+1)));
 175
 176         current_angle.real = 1.0f;
 177         current_angle.imag = 0.0f;
 178
 179         for (k = 0; k < 1 << i; k++) {
 180             w[i][k] = current_angle;
 181             current_angle = cmplx_mult(current_angle,angle_step);
 182         }
 183     }
 184 }
 185
 186 void imdct (ac3dec_t * p_ac3dec)
 187 {
 188     int i;
 189
 190     for (i=0; i<p_ac3dec->bsi.nfchans;i++) {
 191         if (p_ac3dec->audblk.blksw[i])
 192             imdct_do_256(p_ac3dec->coeffs.fbw[i],p_ac3dec->samples.channel[i],delay[i]);
 193         else
 194             imdct_do_512(p_ac3dec->coeffs.fbw[i],p_ac3dec->samples.channel[i],delay[i]);
 195     }
 196
 197     /* XXX?? We don't bother with the IMDCT for the LFE as it's currently
 198      * unused. */
 199     //if (bsi->lfeon)
 200     //    imdct_do_512(coeffs->lfe,samples->channel[5],delay[5]);
 201 }
 202
 203 void
 204 imdct_do_512(float x[],float y[],float delay[])
 205 {
 206     int i,k;
 207     int p,q;
 208     int m;
 209     int two_m;
 210     int two_m_plus_one;
 211
 212     float tmp_a_i;
 213     float tmp_a_r;
 214     float tmp_b_i;
 215     float tmp_b_r;
 216
 217
 218     float *y_ptr;
 219     float *delay_ptr;
 220     float *window_ptr;
 221
 222     /* Pre IFFT complex multiply plus IFFT cmplx conjugate */
 223     for (i=0; i < N/4; i++) {
 224         /* z[i] = (X[N/2-2*i-1] + j * X[2*i]) * (xcos1[i] + j * xsin1[i]) ; */
 225         buf[i].real =  (x[N/2-2*i-1] * xcos1[i])  - (x[2*i]       * xsin1[i]);
 226         buf[i].imag = -((x[2*i]       * xcos1[i])  + (x[N/2-2*i-1] * xsin1[i]));
 227     }
 228
 229     /* Bit reversed shuffling */
 230     for (i=0; i<N/4; i++) {
 231         k = bit_reverse_512[i];
 232         if (k < i)
 233             swap_cmplx(&buf[i],&buf[k]);
 234     }
 235
 236     /* FFT Merge */
 237     for (m=0; m < 7; m++) {
 238         two_m = (1 << m);
 239         two_m_plus_one = (1 << (m+1));
 240
 241         for (k = 0; k < two_m; k++) {
 242             for (i = 0; i < 128; i += two_m_plus_one) {
 243                 p = k + i;
 244                 q = p + two_m;
 245                 tmp_a_r = buf[p].real;
 246                 tmp_a_i = buf[p].imag;
 247                 tmp_b_r = buf[q].real * w[m][k].real - buf[q].imag * w[m][k].imag;
 248                 tmp_b_i = buf[q].imag * w[m][k].real + buf[q].real * w[m][k].imag;
 249                 buf[p].real = tmp_a_r + tmp_b_r;
 250                 buf[p].imag =  tmp_a_i + tmp_b_i;
 251                 buf[q].real = tmp_a_r - tmp_b_r;
 252                 buf[q].imag =  tmp_a_i - tmp_b_i;
 253             }
 254         }
 255     }
 256
 257     /* Post IFFT complex multiply  plus IFFT complex conjugate*/
 258     for (i=0; i < N/4; i++) {
 259         /* y[n] = z[n] * (xcos1[n] + j * xsin1[n]) ; */
 260         tmp_a_r =   buf[i].real;
 261         tmp_a_i = - buf[i].imag;
 262         buf[i].real =(tmp_a_r * xcos1[i])  - (tmp_a_i  * xsin1[i]);
 263         buf[i].imag =(tmp_a_r * xsin1[i])  + (tmp_a_i  * xcos1[i]);
 264     }
 265
 266     y_ptr = y;
 267     delay_ptr = delay;
 268     window_ptr = window;
 269     /* Window and convert to real valued signal */
 270     for (i=0; i<N/8; i++) {
 271         *y_ptr++   = 2.0f * (-buf[N/8+i].imag   * *window_ptr++ + *delay_ptr++);
 272         *y_ptr++   = 2.0f * (buf[N/8-i-1].real * *window_ptr++ + *delay_ptr++);
 273     }
 274
 275     for (i=0; i<N/8; i++) {
 276         *y_ptr++  = 2.0f * (-buf[i].real       * *window_ptr++ + *delay_ptr++);
 277         *y_ptr++  = 2.0f * (buf[N/4-i-1].imag * *window_ptr++ + *delay_ptr++);
 278     }
 279
 280     /* The trailing edge of the window goes into the delay line */
 281     delay_ptr = delay;
 282
 283     for (i=0; i<N/8; i++) {
 284         *delay_ptr++  = -buf[N/8+i].real   * *--window_ptr;
 285         *delay_ptr++  =  buf[N/8-i-1].imag * *--window_ptr;
 286     }
 287
 288     for (i=0; i<N/8; i++) {
 289         *delay_ptr++  =  buf[i].imag       * *--window_ptr;
 290         *delay_ptr++  = -buf[N/4-i-1].real * *--window_ptr;
 291     }
 292 }
 293
 294 void
 295 imdct_do_256(float x[],float y[],float delay[])
 296 {
 297     int i,k;
 298     int p,q;
 299     int m;
 300     int two_m;
 301     int two_m_plus_one;
 302
 303     float tmp_a_i;
 304     float tmp_a_r;
 305     float tmp_b_i;
 306     float tmp_b_r;
 307
 308     complex_t *buf_1, *buf_2;
 309
 310     buf_1 = &buf[0];
 311     buf_2 = &buf[64];
 312
 313     /* Pre IFFT complex multiply plus IFFT cmplx conjugate */
 314     for (k=0; k<N/8; k++) {
 315         /* X1[k] = X[2*k]  */
 316         /* X2[k] = X[2*k+1]     */
 317
 318         p = 2 * (N/4-2*k-1);
 319         q = 2 * (2 * k);
 320
 321        /* Z1[k] = (X1[N/4-2*k-1] + j * X1[2*k]) * (xcos2[k] + j * xsin2[k]); */
 322         buf_1[k].real =    x[p] * xcos2[k] - x[q] * xsin2[k];
 323         buf_1[k].imag = - (x[q] * xcos2[k] + x[p] * xsin2[k]);
 324        /* Z2[k] = (X2[N/4-2*k-1] + j * X2[2*k]) * (xcos2[k] + j * xsin2[k]); */
 325         buf_2[k].real =    x[p + 1] * xcos2[k] - x[q + 1] * xsin2[k];
 326         buf_2[k].imag = - (x[q + 1] * xcos2[k] + x[p + 1] * xsin2[k]);
 327     }
 328
 329     /* IFFT Bit reversed shuffling */
 330     for (i=0; i<N/8; i++) {
 331         k = bit_reverse_256[i];
 332         if (k < i) {
 333             swap_cmplx(&buf_1[i],&buf_1[k]);
 334             swap_cmplx(&buf_2[i],&buf_2[k]);
 335         }
 336     }
 337
 338     /* FFT Merge */
 339     for (m=0; m < 6; m++) {
 340         two_m = (1 << m);
 341         two_m_plus_one = (1 << (m+1));
 342
 343         for (k = 0; k < two_m; k++) {
 344             for (i = 0; i < 64; i += two_m_plus_one) {
 345                 p = k + i;
 346                 q = p + two_m;
 347                 /* Do block 1 */
 348                 tmp_a_r = buf_1[p].real;
 349                 tmp_a_i = buf_1[p].imag;
 350                 tmp_b_r = buf_1[q].real * w[m][k].real - buf_1[q].imag * w[m][k].imag;
 351                 tmp_b_i = buf_1[q].imag * w[m][k].real + buf_1[q].real * w[m][k].imag;
 352                 buf_1[p].real = tmp_a_r + tmp_b_r;
 353                 buf_1[p].imag =  tmp_a_i + tmp_b_i;
 354                 buf_1[q].real = tmp_a_r - tmp_b_r;
 355                 buf_1[q].imag =  tmp_a_i - tmp_b_i;
 356
 357                 /* Do block 2 */
 358                 tmp_a_r = buf_2[p].real;
 359                 tmp_a_i = buf_2[p].imag;
 360                 tmp_b_r = buf_2[q].real * w[m][k].real - buf_2[q].imag * w[m][k].imag;
 361                 tmp_b_i = buf_2[q].imag * w[m][k].real + buf_2[q].real * w[m][k].imag;
 362                 buf_2[p].real = tmp_a_r + tmp_b_r;
 363                 buf_2[p].imag =  tmp_a_i + tmp_b_i;
 364                 buf_2[q].real = tmp_a_r - tmp_b_r;
 365                 buf_2[q].imag =  tmp_a_i - tmp_b_i;
 366             }
 367         }
 368     }
 369
 370     /* Post IFFT complex multiply */
 371     for (i=0; i < N/8; i++) {
 372         /* y1[n] = z1[n] * (xcos2[n] + j * xs in2[n]) ; */
 373         tmp_a_r =   buf_1[i].real;
 374         tmp_a_i = - buf_1[i].imag;
 375         buf_1[i].real =(tmp_a_r * xcos2[i])  - (tmp_a_i  * xsin2[i]);
 376         buf_1[i].imag =(tmp_a_r * xsin2[i])  + (tmp_a_i  * xcos2[i]);
 377         /* y2[n] = z2[n] * (xcos2[n] + j * xsin2[n]) ; */
 378         tmp_a_r =   buf_2[i].real;
 379         tmp_a_i = - buf_2[i].imag;
 380         buf_2[i].real =(tmp_a_r * xcos2[i])  - (tmp_a_i  * xsin2[i]);
 381         buf_2[i].imag =(tmp_a_r * xsin2[i])  + (tmp_a_i  * xcos2[i]);
 382     }
 383
 384     /* Window and convert to real valued signal */
 385     for (i=0; i<N/8; i++) {
 386         y[2*i]         = -buf_1[i].imag       * window[2*i];
 387         y[2*i+1]       =  buf_1[N/8-i-1].real * window[2*i+1];
 388         y[N/4+2*i]     = -buf_1[i].real       * window[N/4+2*i];
 389         y[N/4+2*i+1]   =  buf_1[N/8-i-1].imag * window[N/4+2*i+1];
 390         y[N/2+2*i]     = -buf_2[i].real       * window[N/2-2*i-1];
 391         y[N/2+2*i+1]   =  buf_2[N/8-i-1].imag * window[N/2-2*i-2];
 392         y[3*N/4+2*i]   =  buf_2[i].imag       * window[N/4-2*i-1];
 393         y[3*N/4+2*i+1] = -buf_2[N/8-i-1].real * window[N/4-2*i-2];
 394     }
 395
 396     /* Overlap and add */
 397     for (i=0; i<N/2; i++) {
 398         y[i] = 2 * (y[i] + delay[i]);
 399         delay[i] = y[N/2+i];
 400     }
 401 }