git.sesse.net Git - vlc/blob - src/ac3_decoder/ac3_imdct_c.c

   1 /*****************************************************************************
   2  * ac3_imdct_c.c: ac3 DCT
   3  *****************************************************************************
   4  * Copyright (C) 1999, 2000 VideoLAN
   5  * $Id: ac3_imdct_c.c,v 1.2 2001/05/06 04:32:02 sam Exp $
   6  *
   7  * Authors: Renaud Dartus <reno@videolan.org>
   8  *          Aaron Holtzman <aholtzma@engr.uvic.ca>
   9  *
  10  * This program is free software; you can redistribute it and/or modify
  11  * it under the terms of the GNU General Public License as published by
  12  * the Free Software Foundation; either version 2 of the License, or
  13  * (at your option) any later version.
  14  *
  15  * This program is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  18  * GNU General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU General Public License
  21  * along with this program; if not, write to the Free Software
  22  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
  23  *****************************************************************************/
  24
  25 #include "defs.h"
  26
  27 #include <string.h>                                              /* memcpy() */
  28
  29 #include <math.h>
  30 #include <stdio.h>
  31
  32 #include "config.h"
  33 #include "common.h"
  34 #include "threads.h"
  35 #include "mtime.h"
  36
  37 #include "stream_control.h"
  38 #include "input_ext-dec.h"
  39
  40 #include "ac3_decoder.h"
  41 #include "ac3_internal.h"
  42
  43 #ifndef M_PI
  44 #   define M_PI 3.14159265358979323846
  45 #endif
  46
  47 void fft_64p_c (complex_t *x);
  48 void fft_128p_c (complex_t *x);
  49 void imdct_do_512_c (imdct_t * p_imdct, float data[], float delay[]);
  50 void imdct_do_512_nol_c (imdct_t * p_imdct, float data[], float delay[]);
  51
  52
  53 static float window[] = {
  54         0.00014, 0.00024, 0.00037, 0.00051, 0.00067, 0.00086, 0.00107, 0.00130,
  55         0.00157, 0.00187, 0.00220, 0.00256, 0.00297, 0.00341, 0.00390, 0.00443,
  56         0.00501, 0.00564, 0.00632, 0.00706, 0.00785, 0.00871, 0.00962, 0.01061,
  57         0.01166, 0.01279, 0.01399, 0.01526, 0.01662, 0.01806, 0.01959, 0.02121,
  58         0.02292, 0.02472, 0.02662, 0.02863, 0.03073, 0.03294, 0.03527, 0.03770,
  59         0.04025, 0.04292, 0.04571, 0.04862, 0.05165, 0.05481, 0.05810, 0.06153,
  60         0.06508, 0.06878, 0.07261, 0.07658, 0.08069, 0.08495, 0.08935, 0.09389,
  61         0.09859, 0.10343, 0.10842, 0.11356, 0.11885, 0.12429, 0.12988, 0.13563,
  62         0.14152, 0.14757, 0.15376, 0.16011, 0.16661, 0.17325, 0.18005, 0.18699,
  63         0.19407, 0.20130, 0.20867, 0.21618, 0.22382, 0.23161, 0.23952, 0.24757,
  64         0.25574, 0.26404, 0.27246, 0.28100, 0.28965, 0.29841, 0.30729, 0.31626,
  65         0.32533, 0.33450, 0.34376, 0.35311, 0.36253, 0.37204, 0.38161, 0.39126,
  66         0.40096, 0.41072, 0.42054, 0.43040, 0.44030, 0.45023, 0.46020, 0.47019,
  67         0.48020, 0.49022, 0.50025, 0.51028, 0.52031, 0.53033, 0.54033, 0.55031,
  68         0.56026, 0.57019, 0.58007, 0.58991, 0.59970, 0.60944, 0.61912, 0.62873,
  69         0.63827, 0.64774, 0.65713, 0.66643, 0.67564, 0.68476, 0.69377, 0.70269,
  70         0.71150, 0.72019, 0.72877, 0.73723, 0.74557, 0.75378, 0.76186, 0.76981,
  71         0.77762, 0.78530, 0.79283, 0.80022, 0.80747, 0.81457, 0.82151, 0.82831,
  72         0.83496, 0.84145, 0.84779, 0.85398, 0.86001, 0.86588, 0.87160, 0.87716,
  73         0.88257, 0.88782, 0.89291, 0.89785, 0.90264, 0.90728, 0.91176, 0.91610,
  74         0.92028, 0.92432, 0.92822, 0.93197, 0.93558, 0.93906, 0.94240, 0.94560,
  75         0.94867, 0.95162, 0.95444, 0.95713, 0.95971, 0.96217, 0.96451, 0.96674,
  76         0.96887, 0.97089, 0.97281, 0.97463, 0.97635, 0.97799, 0.97953, 0.98099,
  77         0.98236, 0.98366, 0.98488, 0.98602, 0.98710, 0.98811, 0.98905, 0.98994,
  78         0.99076, 0.99153, 0.99225, 0.99291, 0.99353, 0.99411, 0.99464, 0.99513,
  79         0.99558, 0.99600, 0.99639, 0.99674, 0.99706, 0.99736, 0.99763, 0.99788,
  80         0.99811, 0.99831, 0.99850, 0.99867, 0.99882, 0.99895, 0.99908, 0.99919,
  81         0.99929, 0.99938, 0.99946, 0.99953, 0.99959, 0.99965, 0.99969, 0.99974,
  82         0.99978, 0.99981, 0.99984, 0.99986, 0.99988, 0.99990, 0.99992, 0.99993,
  83         0.99994, 0.99995, 0.99996, 0.99997, 0.99998, 0.99998, 0.99998, 0.99999,
  84         0.99999, 0.99999, 0.99999, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000,
  85         1.00000, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000
  86 };
  87
  88 static const int pm128[128] =
  89 {
  90         0, 16, 32, 48, 64, 80,  96, 112,  8, 40, 72, 104, 24, 56,  88, 120,
  91         4, 20, 36, 52, 68, 84, 100, 116, 12, 28, 44,  60, 76, 92, 108, 124,
  92         2, 18, 34, 50, 66, 82,  98, 114, 10, 42, 74, 106, 26, 58,  90, 122,
  93         6, 22, 38, 54, 70, 86, 102, 118, 14, 46, 78, 110, 30, 62,  94, 126,
  94         1, 17, 33, 49, 65, 81,  97, 113,  9, 41, 73, 105, 25, 57,  89, 121,
  95         5, 21, 37, 53, 69, 85, 101, 117, 13, 29, 45,  61, 77, 93, 109, 125,
  96         3, 19, 35, 51, 67, 83,  99, 115, 11, 43, 75, 107, 27, 59,  91, 123,
  97         7, 23, 39, 55, 71, 87, 103, 119, 15, 31, 47,  63, 79, 95, 111, 127
  98 };
  99
 100 static const int pm64[64] =
 101 {
 102         0,  8, 16, 24, 32, 40, 48, 56,
 103         4, 20, 36, 52, 12, 28, 44, 60,
 104         2, 10, 18, 26, 34, 42, 50, 58,
 105         6, 14, 22, 30, 38, 46, 54, 62,
 106         1,  9, 17, 25, 33, 41, 49, 57,
 107         5, 21, 37, 53, 13, 29, 45, 61,
 108         3, 11, 19, 27, 35, 43, 51, 59,
 109         7, 23, 39, 55, 15, 31, 47, 63
 110 };
 111
 112 int imdct_init_c (imdct_t * p_imdct)
 113 {
 114         int i;
 115         float scale = 255.99609372;
 116
 117         p_imdct->imdct_do_512 = imdct_do_512_c;
 118         p_imdct->imdct_do_512_nol = imdct_do_512_nol_c;
 119         p_imdct->fft_64p = fft_64p_c;
 120
 121         /* Twiddle factors to turn IFFT into IMDCT */
 122
 123         for (i=0; i < 128; i++) {
 124                 p_imdct->xcos1[i] = cos(2.0f * M_PI * (8*i+1)/(8*N)) * scale;
 125                 p_imdct->xsin1[i] = sin(2.0f * M_PI * (8*i+1)/(8*N)) * scale;
 126         }
 127
 128         return 0;
 129 }
 130
 131 void imdct_do_256 (imdct_t * p_imdct, float data[],float delay[])
 132 {
 133         int i, j, k;
 134         int p, q;
 135
 136         float tmp_a_i;
 137         float tmp_a_r;
 138
 139         float *data_ptr;
 140         float *delay_ptr;
 141         float *window_ptr;
 142
 143         complex_t *buf1, *buf2;
 144
 145         buf1 = &p_imdct->buf[0];
 146         buf2 = &p_imdct->buf[64];
 147
 148     /* Pre IFFT complex multiply plus IFFT complex conjugate */
 149         for (k=0; k<64; k++) {
 150                 /* X1[k] = X[2*k]
 151                  * X2[k] = X[2*k+1]     */
 152
 153                 j = pm64[k];
 154                 p = 2 * (128-2*j-1);
 155                 q = 2 * (2 * j);
 156
 157                 /* Z1[k] = (X1[128-2*k-1] + j * X1[2*k]) * (xcos2[k] + j * xsin2[k]); */
 158                 buf1[k].real =        data[p] * p_imdct->xcos2[j] - data[q] * p_imdct->xsin2[j];
 159                 buf1[k].imag = -1.0f*(data[q] * p_imdct->xcos2[j] + data[p] * p_imdct->xsin2[j]);
 160                 /* Z2[k] = (X2[128-2*k-1] + j * X2[2*k]) * (xcos2[k] + j * xsin2[k]); */
 161                 buf2[k].real =        data[p + 1] * p_imdct->xcos2[j] - data[q + 1] * p_imdct->xsin2[j];
 162                 buf2[k].imag = -1.0f*(data[q + 1] * p_imdct->xcos2[j] + data[p + 1] * p_imdct->xsin2[j]);
 163         }
 164
 165         p_imdct->fft_64p(&buf1[0]);
 166         p_imdct->fft_64p(&buf2[0]);
 167
 168         /* Post IFFT complex multiply */
 169         for( i=0; i < 64; i++) {
 170                 tmp_a_r =  buf1[i].real;
 171                 tmp_a_i = -buf1[i].imag;
 172                 buf1[i].real = (tmp_a_r * p_imdct->xcos2[i]) - (tmp_a_i * p_imdct->xsin2[i]);
 173                 buf1[i].imag = (tmp_a_r * p_imdct->xsin2[i]) + (tmp_a_i * p_imdct->xcos2[i]);
 174                 tmp_a_r =  buf2[i].real;
 175                 tmp_a_i = -buf2[i].imag;
 176                 buf2[i].real = (tmp_a_r * p_imdct->xcos2[i]) - (tmp_a_i * p_imdct->xsin2[i]);
 177                 buf2[i].imag = (tmp_a_r * p_imdct->xsin2[i]) + (tmp_a_i * p_imdct->xcos2[i]);
 178         }
 179
 180         data_ptr = data;
 181         delay_ptr = delay;
 182         window_ptr = window;
 183
 184         /* Window and convert to real valued signal */
 185         for(i=0; i< 64; i++) {
 186                 *data_ptr++ = -buf1[i].imag     * *window_ptr++ + *delay_ptr++;
 187                 *data_ptr++ = buf1[64-i-1].real * *window_ptr++ + *delay_ptr++;
 188         }
 189
 190         for(i=0; i< 64; i++) {
 191                 *data_ptr++ = -buf1[i].real     * *window_ptr++ + *delay_ptr++;
 192                 *data_ptr++ = buf1[64-i-1].imag * *window_ptr++ + *delay_ptr++;
 193         }
 194
 195         delay_ptr = delay;
 196
 197         for(i=0; i< 64; i++) {
 198                 *delay_ptr++ = -buf2[i].real      * *--window_ptr;
 199                 *delay_ptr++ =  buf2[64-i-1].imag * *--window_ptr;
 200         }
 201
 202         for(i=0; i< 64; i++) {
 203                 *delay_ptr++ =  buf2[i].imag      * *--window_ptr;
 204                 *delay_ptr++ = -buf2[64-i-1].real * *--window_ptr;
 205         }
 206 }
 207
 208
 209 void imdct_do_256_nol (imdct_t * p_imdct, float data[], float delay[])
 210 {
 211         int i, j, k;
 212         int p, q;
 213
 214         float tmp_a_i;
 215         float tmp_a_r;
 216
 217         float *data_ptr;
 218         float *delay_ptr;
 219         float *window_ptr;
 220
 221         complex_t *buf1, *buf2;
 222
 223         buf1 = &p_imdct->buf[0];
 224         buf2 = &p_imdct->buf[64];
 225
 226     /* Pre IFFT complex multiply plus IFFT cmplx conjugate */
 227         for(k=0; k<64; k++) {
 228         /* X1[k] = X[2*k]
 229         * X2[k] = X[2*k+1] */
 230         j = pm64[k];
 231         p = 2 * (128-2*j-1);
 232         q = 2 * (2 * j);
 233
 234         /* Z1[k] = (X1[128-2*k-1] + j * X1[2*k]) * (xcos2[k] + j * xsin2[k]); */
 235         buf1[k].real =        data[p] * p_imdct->xcos2[j] - data[q] * p_imdct->xsin2[j];
 236         buf1[k].imag = -1.0f*(data[q] * p_imdct->xcos2[j] + data[p] * p_imdct->xsin2[j]);
 237         /* Z2[k] = (X2[128-2*k-1] + j * X2[2*k]) * (xcos2[k] + j * xsin2[k]); */
 238         buf2[k].real =        data[p + 1] * p_imdct->xcos2[j] - data[q + 1] * p_imdct->xsin2[j];
 239         buf2[k].imag = -1.0f*(data[q + 1] * p_imdct->xcos2[j] + data[p + 1] * p_imdct->xsin2[j]);
 240     }
 241
 242     p_imdct->fft_64p(&buf1[0]);
 243     p_imdct->fft_64p(&buf2[0]);
 244
 245     /* Post IFFT complex multiply */
 246     for( i=0; i < 64; i++) {
 247         /* y1[n] = z1[n] * (xcos2[n] + j * xs in2[n]) ; */
 248         tmp_a_r =  buf1[i].real;
 249         tmp_a_i = -buf1[i].imag;
 250         buf1[i].real =(tmp_a_r * p_imdct->xcos2[i])  -  (tmp_a_i  * p_imdct->xsin2[i]);
 251         buf1[i].imag =(tmp_a_r * p_imdct->xsin2[i])  +  (tmp_a_i  * p_imdct->xcos2[i]);
 252         /* y2[n] = z2[n] * (xcos2[n] + j * xsin2[n]) ; */
 253         tmp_a_r =  buf2[i].real;
 254         tmp_a_i = -buf2[i].imag;
 255         buf2[i].real =(tmp_a_r * p_imdct->xcos2[i])  -  (tmp_a_i  * p_imdct->xsin2[i]);
 256         buf2[i].imag =(tmp_a_r * p_imdct->xsin2[i])  +  (tmp_a_i  * p_imdct->xcos2[i]);
 257     }
 258
 259     data_ptr = data;
 260     delay_ptr = delay;
 261     window_ptr = window;
 262
 263     /* Window and convert to real valued signal, no overlap */
 264     for(i=0; i< 64; i++) {
 265         *data_ptr++ = -buf1[i].imag     * *window_ptr++;
 266         *data_ptr++ = buf1[64-i-1].real * *window_ptr++;
 267     }
 268
 269     for(i=0; i< 64; i++) {
 270         *data_ptr++ = -buf1[i].real     * *window_ptr++ + *delay_ptr++;
 271         *data_ptr++ = buf1[64-i-1].imag * *window_ptr++ + *delay_ptr++;
 272     }
 273
 274     delay_ptr = delay;
 275
 276     for(i=0; i< 64; i++) {
 277         *delay_ptr++ = -buf2[i].real      * *--window_ptr;
 278         *delay_ptr++ =  buf2[64-i-1].imag * *--window_ptr;
 279     }
 280
 281     for(i=0; i< 64; i++) {
 282         *delay_ptr++ =  buf2[i].imag      * *--window_ptr;
 283         *delay_ptr++ = -buf2[64-i-1].real * *--window_ptr;
 284         }
 285 }
 286
 287 void imdct_do_512_c (imdct_t * p_imdct, float data[], float delay[])
 288 {
 289         int i, j;
 290         float tmp_a_r, tmp_a_i;
 291         float *data_ptr;
 292         float *delay_ptr;
 293         float *window_ptr;
 294
 295     /* 512 IMDCT with source and dest data in 'data'
 296      * Pre IFFT complex multiply plus IFFT complex conjugate */
 297
 298     for( i=0; i < 128; i++) {
 299                 j = pm128[i];
 300                 /* a = (data[256-2*j-1] - data[2*j]) * (xcos1[j] + xsin1[j]);
 301                  * c = data[2*j] * xcos1[j];
 302                  * b = data[256-2*j-1] * xsin1[j];
 303                  * buf1[i].real = a - b + c;
 304                  * buf1[i].imag = b + c; */
 305                 p_imdct->buf[i].real = (data[256-2*j-1] * p_imdct->xcos1[j]) - (data[2*j] * p_imdct->xsin1[j]);
 306                 p_imdct->buf[i].imag = -1.0 * (data[2*j] * p_imdct->xcos1[j] + data[256-2*j-1] * p_imdct->xsin1[j]);
 307         }
 308
 309         fft_128p_c (&p_imdct->buf[0]);
 310
 311     /* Post IFFT complex multiply  plus IFFT complex conjugate */
 312         for (i=0; i < 128; i++) {
 313                 tmp_a_r = p_imdct->buf[i].real;
 314                 tmp_a_i = p_imdct->buf[i].imag;
 315                 /* a = (tmp_a_r - tmp_a_i) * (xcos1[j] + xsin1[j]);
 316                  * b = tmp_a_r * xsin1[j];
 317                  * c = tmp_a_i * xcos1[j];
 318                  * buf[j].real = a - b + c;
 319                  * buf[j].imag = b + c; */
 320                 p_imdct->buf[i].real =(tmp_a_r * p_imdct->xcos1[i])  +  (tmp_a_i  * p_imdct->xsin1[i]);
 321                 p_imdct->buf[i].imag =(tmp_a_r * p_imdct->xsin1[i])  -  (tmp_a_i  * p_imdct->xcos1[i]);
 322         }
 323
 324         data_ptr = data;
 325         delay_ptr = delay;
 326         window_ptr = window;
 327
 328     /* Window and convert to real valued signal */
 329         for (i=0; i< 64; i++) {
 330                 *data_ptr++ = -p_imdct->buf[64+i].imag  * *window_ptr++ + *delay_ptr++;
 331                 *data_ptr++ = p_imdct->buf[64-i-1].real * *window_ptr++ + *delay_ptr++;
 332         }
 333
 334         for(i=0; i< 64; i++) {
 335                 *data_ptr++ = -p_imdct->buf[i].real      * *window_ptr++ + *delay_ptr++;
 336                 *data_ptr++ = p_imdct->buf[128-i-1].imag * *window_ptr++ + *delay_ptr++;
 337         }
 338
 339     /* The trailing edge of the window goes into the delay line */
 340         delay_ptr = delay;
 341
 342         for(i=0; i< 64; i++) {
 343                 *delay_ptr++ = -p_imdct->buf[64+i].real   * *--window_ptr;
 344                 *delay_ptr++ =  p_imdct->buf[64-i-1].imag * *--window_ptr;
 345         }
 346
 347         for(i=0; i<64; i++) {
 348                 *delay_ptr++ =  p_imdct->buf[i].imag       * *--window_ptr;
 349                 *delay_ptr++ = -p_imdct->buf[128-i-1].real * *--window_ptr;
 350         }
 351 }
 352
 353
 354 void imdct_do_512_nol_c (imdct_t * p_imdct, float data[], float delay[])
 355 {
 356         int i, j;
 357
 358         float tmp_a_i;
 359         float tmp_a_r;
 360
 361         float *data_ptr;
 362         float *delay_ptr;
 363         float *window_ptr;
 364
 365     /* 512 IMDCT with source and dest data in 'data'
 366          * Pre IFFT complex multiply plus IFFT cmplx conjugate */
 367
 368     for( i=0; i < 128; i++) {
 369         /* z[i] = (X[256-2*i-1] + j * X[2*i]) * (xcos1[i] + j * xsin1[i]) */
 370                 j = pm128[i];
 371         /* a = (data[256-2*j-1] - data[2*j]) * (xcos1[j] + xsin1[j]);
 372          * c = data[2*j] * xcos1[j];
 373          * b = data[256-2*j-1] * xsin1[j];
 374          * buf1[i].real = a - b + c;
 375          * buf1[i].imag = b + c; */
 376                 p_imdct->buf[i].real = (data[256-2*j-1] * p_imdct->xcos1[j]) - (data[2*j] * p_imdct->xsin1[j]);
 377                 p_imdct->buf[i].imag = -1.0 * (data[2*j] * p_imdct->xcos1[j] + data[256-2*j-1] * p_imdct->xsin1[j]);
 378         }
 379
 380         fft_128p_c (&p_imdct->buf[0]);
 381
 382     /* Post IFFT complex multiply  plus IFFT complex conjugate*/
 383         for (i=0; i < 128; i++) {
 384                 /* y[n] = z[n] * (xcos1[n] + j * xsin1[n]) ;
 385                  * int j1 = i; */
 386                 tmp_a_r = p_imdct->buf[i].real;
 387                 tmp_a_i = p_imdct->buf[i].imag;
 388                 /* a = (tmp_a_r - tmp_a_i) * (xcos1[j] + xsin1[j]);
 389                  * b = tmp_a_r * xsin1[j];
 390                  * c = tmp_a_i * xcos1[j];
 391                  * buf[j].real = a - b + c;
 392                  * buf[j].imag = b + c; */
 393                 p_imdct->buf[i].real =(tmp_a_r * p_imdct->xcos1[i]) + (tmp_a_i  * p_imdct->xsin1[i]);
 394                 p_imdct->buf[i].imag =(tmp_a_r * p_imdct->xsin1[i]) - (tmp_a_i  * p_imdct->xcos1[i]);
 395         }
 396
 397         data_ptr = data;
 398         delay_ptr = delay;
 399         window_ptr = window;
 400
 401         /* Window and convert to real valued signal, no overlap here*/
 402         for (i=0; i< 64; i++) {
 403                 *data_ptr++ = -p_imdct->buf[64+i].imag  * *window_ptr++;
 404                 *data_ptr++ = p_imdct->buf[64-i-1].real * *window_ptr++;
 405         }
 406
 407         for(i=0; i< 64; i++) {
 408                 *data_ptr++ = -p_imdct->buf[i].real      * *window_ptr++;
 409                 *data_ptr++ = p_imdct->buf[128-i-1].imag * *window_ptr++;
 410         }
 411
 412         /* The trailing edge of the window goes into the delay line */
 413         delay_ptr = delay;
 414
 415         for(i=0; i< 64; i++) {
 416                 *delay_ptr++ = -p_imdct->buf[64+i].real   * *--window_ptr;
 417                 *delay_ptr++ =  p_imdct->buf[64-i-1].imag * *--window_ptr;
 418         }
 419
 420         for(i=0; i<64; i++) {
 421                 *delay_ptr++ =  p_imdct->buf[i].imag       * *--window_ptr;
 422                 *delay_ptr++ = -p_imdct->buf[128-i-1].real * *--window_ptr;
 423         }
 424 }