git.sesse.net Git - vlc/blob - src/ac3_decoder/ac3_imdct_c.c

   1 /*****************************************************************************
   2  * ac3_imdct_c.c: ac3 DCT
   3  *****************************************************************************
   4  * Copyright (C) 1999, 2000 VideoLAN
   5  * $Id: ac3_imdct_c.c,v 1.3 2001/05/14 15:58:04 reno Exp $
   6  *
   7  * Authors: Renaud Dartus <reno@videolan.org>
   8  *          Aaron Holtzman <aholtzma@engr.uvic.ca>
   9  *
  10  * This program is free software; you can redistribute it and/or modify
  11  * it under the terms of the GNU General Public License as published by
  12  * the Free Software Foundation; either version 2 of the License, or
  13  * (at your option) any later version.
  14  *
  15  * This program is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  18  * GNU General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU General Public License
  21  * along with this program; if not, write to the Free Software
  22  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
  23  *****************************************************************************/
  24
  25 #include "defs.h"
  26
  27 #include <string.h>                                              /* memcpy() */
  28
  29 #include <math.h>
  30 #include <stdio.h>
  31
  32 #include "config.h"
  33 #include "common.h"
  34 #include "threads.h"
  35 #include "mtime.h"
  36
  37 #include "stream_control.h"
  38 #include "input_ext-dec.h"
  39
  40 #include "ac3_decoder.h"
  41 #include "ac3_imdct_c.h"
  42
  43 #ifndef M_PI
  44 #   define M_PI 3.14159265358979323846
  45 #endif
  46
  47 void fft_64p_c (complex_t *x);
  48 void fft_128p_c (complex_t *x);
  49
  50 static float window[] = {
  51         0.00014, 0.00024, 0.00037, 0.00051, 0.00067, 0.00086, 0.00107, 0.00130,
  52         0.00157, 0.00187, 0.00220, 0.00256, 0.00297, 0.00341, 0.00390, 0.00443,
  53         0.00501, 0.00564, 0.00632, 0.00706, 0.00785, 0.00871, 0.00962, 0.01061,
  54         0.01166, 0.01279, 0.01399, 0.01526, 0.01662, 0.01806, 0.01959, 0.02121,
  55         0.02292, 0.02472, 0.02662, 0.02863, 0.03073, 0.03294, 0.03527, 0.03770,
  56         0.04025, 0.04292, 0.04571, 0.04862, 0.05165, 0.05481, 0.05810, 0.06153,
  57         0.06508, 0.06878, 0.07261, 0.07658, 0.08069, 0.08495, 0.08935, 0.09389,
  58         0.09859, 0.10343, 0.10842, 0.11356, 0.11885, 0.12429, 0.12988, 0.13563,
  59         0.14152, 0.14757, 0.15376, 0.16011, 0.16661, 0.17325, 0.18005, 0.18699,
  60         0.19407, 0.20130, 0.20867, 0.21618, 0.22382, 0.23161, 0.23952, 0.24757,
  61         0.25574, 0.26404, 0.27246, 0.28100, 0.28965, 0.29841, 0.30729, 0.31626,
  62         0.32533, 0.33450, 0.34376, 0.35311, 0.36253, 0.37204, 0.38161, 0.39126,
  63         0.40096, 0.41072, 0.42054, 0.43040, 0.44030, 0.45023, 0.46020, 0.47019,
  64         0.48020, 0.49022, 0.50025, 0.51028, 0.52031, 0.53033, 0.54033, 0.55031,
  65         0.56026, 0.57019, 0.58007, 0.58991, 0.59970, 0.60944, 0.61912, 0.62873,
  66         0.63827, 0.64774, 0.65713, 0.66643, 0.67564, 0.68476, 0.69377, 0.70269,
  67         0.71150, 0.72019, 0.72877, 0.73723, 0.74557, 0.75378, 0.76186, 0.76981,
  68         0.77762, 0.78530, 0.79283, 0.80022, 0.80747, 0.81457, 0.82151, 0.82831,
  69         0.83496, 0.84145, 0.84779, 0.85398, 0.86001, 0.86588, 0.87160, 0.87716,
  70         0.88257, 0.88782, 0.89291, 0.89785, 0.90264, 0.90728, 0.91176, 0.91610,
  71         0.92028, 0.92432, 0.92822, 0.93197, 0.93558, 0.93906, 0.94240, 0.94560,
  72         0.94867, 0.95162, 0.95444, 0.95713, 0.95971, 0.96217, 0.96451, 0.96674,
  73         0.96887, 0.97089, 0.97281, 0.97463, 0.97635, 0.97799, 0.97953, 0.98099,
  74         0.98236, 0.98366, 0.98488, 0.98602, 0.98710, 0.98811, 0.98905, 0.98994,
  75         0.99076, 0.99153, 0.99225, 0.99291, 0.99353, 0.99411, 0.99464, 0.99513,
  76         0.99558, 0.99600, 0.99639, 0.99674, 0.99706, 0.99736, 0.99763, 0.99788,
  77         0.99811, 0.99831, 0.99850, 0.99867, 0.99882, 0.99895, 0.99908, 0.99919,
  78         0.99929, 0.99938, 0.99946, 0.99953, 0.99959, 0.99965, 0.99969, 0.99974,
  79         0.99978, 0.99981, 0.99984, 0.99986, 0.99988, 0.99990, 0.99992, 0.99993,
  80         0.99994, 0.99995, 0.99996, 0.99997, 0.99998, 0.99998, 0.99998, 0.99999,
  81         0.99999, 0.99999, 0.99999, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000,
  82         1.00000, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000
  83 };
  84
  85 static const int pm128[128] =
  86 {
  87         0, 16, 32, 48, 64, 80,  96, 112,  8, 40, 72, 104, 24, 56,  88, 120,
  88         4, 20, 36, 52, 68, 84, 100, 116, 12, 28, 44,  60, 76, 92, 108, 124,
  89         2, 18, 34, 50, 66, 82,  98, 114, 10, 42, 74, 106, 26, 58,  90, 122,
  90         6, 22, 38, 54, 70, 86, 102, 118, 14, 46, 78, 110, 30, 62,  94, 126,
  91         1, 17, 33, 49, 65, 81,  97, 113,  9, 41, 73, 105, 25, 57,  89, 121,
  92         5, 21, 37, 53, 69, 85, 101, 117, 13, 29, 45,  61, 77, 93, 109, 125,
  93         3, 19, 35, 51, 67, 83,  99, 115, 11, 43, 75, 107, 27, 59,  91, 123,
  94         7, 23, 39, 55, 71, 87, 103, 119, 15, 31, 47,  63, 79, 95, 111, 127
  95 };
  96
  97 static const int pm64[64] =
  98 {
  99         0,  8, 16, 24, 32, 40, 48, 56,
 100         4, 20, 36, 52, 12, 28, 44, 60,
 101         2, 10, 18, 26, 34, 42, 50, 58,
 102         6, 14, 22, 30, 38, 46, 54, 62,
 103         1,  9, 17, 25, 33, 41, 49, 57,
 104         5, 21, 37, 53, 13, 29, 45, 61,
 105         3, 11, 19, 27, 35, 43, 51, 59,
 106         7, 23, 39, 55, 15, 31, 47, 63
 107 };
 108
 109 int imdct_init_c (imdct_t * p_imdct)
 110 {
 111         int i;
 112         float scale = 181.019;
 113
 114         p_imdct->imdct_do_512 = imdct_do_512_c;
 115         p_imdct->imdct_do_512_nol = imdct_do_512_nol_c;
 116         p_imdct->fft_64p = fft_64p_c;
 117
 118         /* Twiddle factors to turn IFFT into IMDCT */
 119
 120         for (i=0; i < 128; i++) {
 121                 p_imdct->xcos1[i] = cos(2.0f * M_PI * (8*i+1)/(8*N)) * scale;
 122                 p_imdct->xsin1[i] = sin(2.0f * M_PI * (8*i+1)/(8*N)) * scale;
 123         }
 124
 125         return 0;
 126 }
 127
 128 void imdct_do_256 (imdct_t * p_imdct, float data[],float delay[])
 129 {
 130         int i, j, k;
 131         int p, q;
 132
 133         float tmp_a_i;
 134         float tmp_a_r;
 135
 136         float *data_ptr;
 137         float *delay_ptr;
 138         float *window_ptr;
 139
 140         complex_t *buf1, *buf2;
 141
 142         buf1 = &p_imdct->buf[0];
 143         buf2 = &p_imdct->buf[64];
 144
 145     /* Pre IFFT complex multiply plus IFFT complex conjugate */
 146         for (k=0; k<64; k++) {
 147                 /* X1[k] = X[2*k]
 148                  * X2[k] = X[2*k+1]     */
 149
 150                 j = pm64[k];
 151                 p = 2 * (128-2*j-1);
 152                 q = 2 * (2 * j);
 153
 154                 /* Z1[k] = (X1[128-2*k-1] + j * X1[2*k]) * (xcos2[k] + j * xsin2[k]); */
 155                 buf1[k].real =        data[p] * p_imdct->xcos2[j] - data[q] * p_imdct->xsin2[j];
 156                 buf1[k].imag = -1.0f*(data[q] * p_imdct->xcos2[j] + data[p] * p_imdct->xsin2[j]);
 157                 /* Z2[k] = (X2[128-2*k-1] + j * X2[2*k]) * (xcos2[k] + j * xsin2[k]); */
 158                 buf2[k].real =        data[p + 1] * p_imdct->xcos2[j] - data[q + 1] * p_imdct->xsin2[j];
 159                 buf2[k].imag = -1.0f*(data[q + 1] * p_imdct->xcos2[j] + data[p + 1] * p_imdct->xsin2[j]);
 160         }
 161
 162         p_imdct->fft_64p(&buf1[0]);
 163         p_imdct->fft_64p(&buf2[0]);
 164
 165         /* Post IFFT complex multiply */
 166         for( i=0; i < 64; i++) {
 167                 tmp_a_r =  buf1[i].real;
 168                 tmp_a_i = -buf1[i].imag;
 169                 buf1[i].real = (tmp_a_r * p_imdct->xcos2[i]) - (tmp_a_i * p_imdct->xsin2[i]);
 170                 buf1[i].imag = (tmp_a_r * p_imdct->xsin2[i]) + (tmp_a_i * p_imdct->xcos2[i]);
 171                 tmp_a_r =  buf2[i].real;
 172                 tmp_a_i = -buf2[i].imag;
 173                 buf2[i].real = (tmp_a_r * p_imdct->xcos2[i]) - (tmp_a_i * p_imdct->xsin2[i]);
 174                 buf2[i].imag = (tmp_a_r * p_imdct->xsin2[i]) + (tmp_a_i * p_imdct->xcos2[i]);
 175         }
 176
 177         data_ptr = data;
 178         delay_ptr = delay;
 179         window_ptr = window;
 180
 181         /* Window and convert to real valued signal */
 182         for(i=0; i< 64; i++) {
 183                 *data_ptr++ = -buf1[i].imag     * *window_ptr++ + *delay_ptr++;
 184                 *data_ptr++ = buf1[64-i-1].real * *window_ptr++ + *delay_ptr++;
 185         }
 186
 187         for(i=0; i< 64; i++) {
 188                 *data_ptr++ = -buf1[i].real     * *window_ptr++ + *delay_ptr++;
 189                 *data_ptr++ = buf1[64-i-1].imag * *window_ptr++ + *delay_ptr++;
 190         }
 191
 192         delay_ptr = delay;
 193
 194         for(i=0; i< 64; i++) {
 195                 *delay_ptr++ = -buf2[i].real      * *--window_ptr;
 196                 *delay_ptr++ =  buf2[64-i-1].imag * *--window_ptr;
 197         }
 198
 199         for(i=0; i< 64; i++) {
 200                 *delay_ptr++ =  buf2[i].imag      * *--window_ptr;
 201                 *delay_ptr++ = -buf2[64-i-1].real * *--window_ptr;
 202         }
 203 }
 204
 205
 206 void imdct_do_256_nol (imdct_t * p_imdct, float data[], float delay[])
 207 {
 208         int i, j, k;
 209         int p, q;
 210
 211         float tmp_a_i;
 212         float tmp_a_r;
 213
 214         float *data_ptr;
 215         float *delay_ptr;
 216         float *window_ptr;
 217
 218         complex_t *buf1, *buf2;
 219
 220         buf1 = &p_imdct->buf[0];
 221         buf2 = &p_imdct->buf[64];
 222
 223     /* Pre IFFT complex multiply plus IFFT cmplx conjugate */
 224         for(k=0; k<64; k++) {
 225         /* X1[k] = X[2*k]
 226         * X2[k] = X[2*k+1] */
 227         j = pm64[k];
 228         p = 2 * (128-2*j-1);
 229         q = 2 * (2 * j);
 230
 231         /* Z1[k] = (X1[128-2*k-1] + j * X1[2*k]) * (xcos2[k] + j * xsin2[k]); */
 232         buf1[k].real =        data[p] * p_imdct->xcos2[j] - data[q] * p_imdct->xsin2[j];
 233         buf1[k].imag = -1.0f*(data[q] * p_imdct->xcos2[j] + data[p] * p_imdct->xsin2[j]);
 234         /* Z2[k] = (X2[128-2*k-1] + j * X2[2*k]) * (xcos2[k] + j * xsin2[k]); */
 235         buf2[k].real =        data[p + 1] * p_imdct->xcos2[j] - data[q + 1] * p_imdct->xsin2[j];
 236         buf2[k].imag = -1.0f*(data[q + 1] * p_imdct->xcos2[j] + data[p + 1] * p_imdct->xsin2[j]);
 237     }
 238
 239     p_imdct->fft_64p(&buf1[0]);
 240     p_imdct->fft_64p(&buf2[0]);
 241
 242     /* Post IFFT complex multiply */
 243     for( i=0; i < 64; i++) {
 244         /* y1[n] = z1[n] * (xcos2[n] + j * xs in2[n]) ; */
 245         tmp_a_r =  buf1[i].real;
 246         tmp_a_i = -buf1[i].imag;
 247         buf1[i].real =(tmp_a_r * p_imdct->xcos2[i])  -  (tmp_a_i  * p_imdct->xsin2[i]);
 248         buf1[i].imag =(tmp_a_r * p_imdct->xsin2[i])  +  (tmp_a_i  * p_imdct->xcos2[i]);
 249         /* y2[n] = z2[n] * (xcos2[n] + j * xsin2[n]) ; */
 250         tmp_a_r =  buf2[i].real;
 251         tmp_a_i = -buf2[i].imag;
 252         buf2[i].real =(tmp_a_r * p_imdct->xcos2[i])  -  (tmp_a_i  * p_imdct->xsin2[i]);
 253         buf2[i].imag =(tmp_a_r * p_imdct->xsin2[i])  +  (tmp_a_i  * p_imdct->xcos2[i]);
 254     }
 255
 256     data_ptr = data;
 257     delay_ptr = delay;
 258     window_ptr = window;
 259
 260     /* Window and convert to real valued signal, no overlap */
 261     for(i=0; i< 64; i++) {
 262         *data_ptr++ = -buf1[i].imag     * *window_ptr++;
 263         *data_ptr++ = buf1[64-i-1].real * *window_ptr++;
 264     }
 265
 266     for(i=0; i< 64; i++) {
 267         *data_ptr++ = -buf1[i].real     * *window_ptr++ + *delay_ptr++;
 268         *data_ptr++ = buf1[64-i-1].imag * *window_ptr++ + *delay_ptr++;
 269     }
 270
 271     delay_ptr = delay;
 272
 273     for(i=0; i< 64; i++) {
 274         *delay_ptr++ = -buf2[i].real      * *--window_ptr;
 275         *delay_ptr++ =  buf2[64-i-1].imag * *--window_ptr;
 276     }
 277
 278     for(i=0; i< 64; i++) {
 279         *delay_ptr++ =  buf2[i].imag      * *--window_ptr;
 280         *delay_ptr++ = -buf2[64-i-1].real * *--window_ptr;
 281         }
 282 }
 283
 284 void imdct_do_512_c (imdct_t * p_imdct, float data[], float delay[])
 285 {
 286         int i, j;
 287         float tmp_a_r, tmp_a_i;
 288         float *data_ptr;
 289         float *delay_ptr;
 290         float *window_ptr;
 291
 292     /* 512 IMDCT with source and dest data in 'data'
 293      * Pre IFFT complex multiply plus IFFT complex conjugate */
 294
 295     for( i=0; i < 128; i++) {
 296                 j = pm128[i];
 297                 /* a = (data[256-2*j-1] - data[2*j]) * (xcos1[j] + xsin1[j]);
 298                  * c = data[2*j] * xcos1[j];
 299                  * b = data[256-2*j-1] * xsin1[j];
 300                  * buf1[i].real = a - b + c;
 301                  * buf1[i].imag = b + c; */
 302                 p_imdct->buf[i].real = (data[256-2*j-1] * p_imdct->xcos1[j]) - (data[2*j] * p_imdct->xsin1[j]);
 303                 p_imdct->buf[i].imag = -1.0 * (data[2*j] * p_imdct->xcos1[j] + data[256-2*j-1] * p_imdct->xsin1[j]);
 304         }
 305
 306         fft_128p_c (&p_imdct->buf[0]);
 307
 308     /* Post IFFT complex multiply  plus IFFT complex conjugate */
 309         for (i=0; i < 128; i++) {
 310                 tmp_a_r = p_imdct->buf[i].real;
 311                 tmp_a_i = p_imdct->buf[i].imag;
 312                 /* a = (tmp_a_r - tmp_a_i) * (xcos1[j] + xsin1[j]);
 313                  * b = tmp_a_r * xsin1[j];
 314                  * c = tmp_a_i * xcos1[j];
 315                  * buf[j].real = a - b + c;
 316                  * buf[j].imag = b + c; */
 317                 p_imdct->buf[i].real =(tmp_a_r * p_imdct->xcos1[i])  +  (tmp_a_i  * p_imdct->xsin1[i]);
 318                 p_imdct->buf[i].imag =(tmp_a_r * p_imdct->xsin1[i])  -  (tmp_a_i  * p_imdct->xcos1[i]);
 319         }
 320
 321         data_ptr = data;
 322         delay_ptr = delay;
 323         window_ptr = window;
 324
 325     /* Window and convert to real valued signal */
 326         for (i=0; i< 64; i++) {
 327                 *data_ptr++ = -p_imdct->buf[64+i].imag  * *window_ptr++ + *delay_ptr++;
 328                 *data_ptr++ = p_imdct->buf[64-i-1].real * *window_ptr++ + *delay_ptr++;
 329         }
 330
 331         for(i=0; i< 64; i++) {
 332                 *data_ptr++ = -p_imdct->buf[i].real      * *window_ptr++ + *delay_ptr++;
 333                 *data_ptr++ = p_imdct->buf[128-i-1].imag * *window_ptr++ + *delay_ptr++;
 334         }
 335
 336     /* The trailing edge of the window goes into the delay line */
 337         delay_ptr = delay;
 338
 339         for(i=0; i< 64; i++) {
 340                 *delay_ptr++ = -p_imdct->buf[64+i].real   * *--window_ptr;
 341                 *delay_ptr++ =  p_imdct->buf[64-i-1].imag * *--window_ptr;
 342         }
 343
 344         for(i=0; i<64; i++) {
 345                 *delay_ptr++ =  p_imdct->buf[i].imag       * *--window_ptr;
 346                 *delay_ptr++ = -p_imdct->buf[128-i-1].real * *--window_ptr;
 347         }
 348 }
 349
 350
 351 void imdct_do_512_nol_c (imdct_t * p_imdct, float data[], float delay[])
 352 {
 353         int i, j;
 354
 355         float tmp_a_i;
 356         float tmp_a_r;
 357
 358         float *data_ptr;
 359         float *delay_ptr;
 360         float *window_ptr;
 361
 362     /* 512 IMDCT with source and dest data in 'data'
 363          * Pre IFFT complex multiply plus IFFT cmplx conjugate */
 364
 365     for( i=0; i < 128; i++) {
 366         /* z[i] = (X[256-2*i-1] + j * X[2*i]) * (xcos1[i] + j * xsin1[i]) */
 367                 j = pm128[i];
 368         /* a = (data[256-2*j-1] - data[2*j]) * (xcos1[j] + xsin1[j]);
 369          * c = data[2*j] * xcos1[j];
 370          * b = data[256-2*j-1] * xsin1[j];
 371          * buf1[i].real = a - b + c;
 372          * buf1[i].imag = b + c; */
 373                 p_imdct->buf[i].real = (data[256-2*j-1] * p_imdct->xcos1[j]) - (data[2*j] * p_imdct->xsin1[j]);
 374                 p_imdct->buf[i].imag = -1.0 * (data[2*j] * p_imdct->xcos1[j] + data[256-2*j-1] * p_imdct->xsin1[j]);
 375         }
 376
 377         fft_128p_c (&p_imdct->buf[0]);
 378
 379     /* Post IFFT complex multiply  plus IFFT complex conjugate*/
 380         for (i=0; i < 128; i++) {
 381                 /* y[n] = z[n] * (xcos1[n] + j * xsin1[n]) ;
 382                  * int j1 = i; */
 383                 tmp_a_r = p_imdct->buf[i].real;
 384                 tmp_a_i = p_imdct->buf[i].imag;
 385                 /* a = (tmp_a_r - tmp_a_i) * (xcos1[j] + xsin1[j]);
 386                  * b = tmp_a_r * xsin1[j];
 387                  * c = tmp_a_i * xcos1[j];
 388                  * buf[j].real = a - b + c;
 389                  * buf[j].imag = b + c; */
 390                 p_imdct->buf[i].real =(tmp_a_r * p_imdct->xcos1[i]) + (tmp_a_i  * p_imdct->xsin1[i]);
 391                 p_imdct->buf[i].imag =(tmp_a_r * p_imdct->xsin1[i]) - (tmp_a_i  * p_imdct->xcos1[i]);
 392         }
 393
 394         data_ptr = data;
 395         delay_ptr = delay;
 396         window_ptr = window;
 397
 398         /* Window and convert to real valued signal, no overlap here*/
 399         for (i=0; i< 64; i++) {
 400                 *data_ptr++ = -p_imdct->buf[64+i].imag  * *window_ptr++;
 401                 *data_ptr++ = p_imdct->buf[64-i-1].real * *window_ptr++;
 402         }
 403
 404         for(i=0; i< 64; i++) {
 405                 *data_ptr++ = -p_imdct->buf[i].real      * *window_ptr++;
 406                 *data_ptr++ = p_imdct->buf[128-i-1].imag * *window_ptr++;
 407         }
 408
 409         /* The trailing edge of the window goes into the delay line */
 410         delay_ptr = delay;
 411
 412         for(i=0; i< 64; i++) {
 413                 *delay_ptr++ = -p_imdct->buf[64+i].real   * *--window_ptr;
 414                 *delay_ptr++ =  p_imdct->buf[64-i-1].imag * *--window_ptr;
 415         }
 416
 417         for(i=0; i<64; i++) {
 418                 *delay_ptr++ =  p_imdct->buf[i].imag       * *--window_ptr;
 419                 *delay_ptr++ = -p_imdct->buf[128-i-1].real * *--window_ptr;
 420         }
 421 }