git.sesse.net Git - ffmpeg/blob - libavcodec/dcadsp.c

   1 /*
   2  * Copyright (C) 2016 foo86
   3  *
   4  * This file is part of FFmpeg.
   5  *
   6  * FFmpeg is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2.1 of the License, or (at your option) any later version.
  10  *
  11  * FFmpeg is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with FFmpeg; if not, write to the Free Software
  18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19  */
  20
  21 #include "libavutil/mem.h"
  22
  23 #include "dcadsp.h"
  24 #include "dcamath.h"
  25
  26 static void decode_hf_c(int32_t **dst,
  27                         const int32_t *vq_index,
  28                         const int8_t hf_vq[1024][32],
  29                         int32_t scale_factors[32][2],
  30                         ptrdiff_t sb_start, ptrdiff_t sb_end,
  31                         ptrdiff_t ofs, ptrdiff_t len)
  32 {
  33     int i, j;
  34
  35     for (i = sb_start; i < sb_end; i++) {
  36         const int8_t *coeff = hf_vq[vq_index[i]];
  37         int32_t scale = scale_factors[i][0];
  38         for (j = 0; j < len; j++)
  39             dst[i][j + ofs] = clip23(coeff[j] * scale + (1 << 3) >> 4);
  40     }
  41 }
  42
  43 static void decode_joint_c(int32_t **dst, int32_t **src,
  44                            const int32_t *scale_factors,
  45                            ptrdiff_t sb_start, ptrdiff_t sb_end,
  46                            ptrdiff_t ofs, ptrdiff_t len)
  47 {
  48     int i, j;
  49
  50     for (i = sb_start; i < sb_end; i++) {
  51         int32_t scale = scale_factors[i];
  52         for (j = 0; j < len; j++)
  53             dst[i][j + ofs] = clip23(mul17(src[i][j + ofs], scale));
  54     }
  55 }
  56
  57 static void lfe_fir_float_c(float *pcm_samples, int32_t *lfe_samples,
  58                             const float *filter_coeff, ptrdiff_t npcmblocks,
  59                             int dec_select)
  60 {
  61     // Select decimation factor
  62     int factor = 64 << dec_select;
  63     int ncoeffs = 8 >> dec_select;
  64     int nlfesamples = npcmblocks >> (dec_select + 1);
  65     int i, j, k;
  66
  67     for (i = 0; i < nlfesamples; i++) {
  68         // One decimated sample generates 64 or 128 interpolated ones
  69         for (j = 0; j < factor / 2; j++) {
  70             float a = 0;
  71             float b = 0;
  72
  73             for (k = 0; k < ncoeffs; k++) {
  74                 a += filter_coeff[      j * ncoeffs + k] * lfe_samples[-k];
  75                 b += filter_coeff[255 - j * ncoeffs - k] * lfe_samples[-k];
  76             }
  77
  78             pcm_samples[             j] = a;
  79             pcm_samples[factor / 2 + j] = b;
  80         }
  81
  82         lfe_samples++;
  83         pcm_samples += factor;
  84     }
  85 }
  86
  87 static void lfe_fir0_float_c(float *pcm_samples, int32_t *lfe_samples,
  88                              const float *filter_coeff, ptrdiff_t npcmblocks)
  89 {
  90     lfe_fir_float_c(pcm_samples, lfe_samples, filter_coeff, npcmblocks, 0);
  91 }
  92
  93 static void lfe_fir1_float_c(float *pcm_samples, int32_t *lfe_samples,
  94                              const float *filter_coeff, ptrdiff_t npcmblocks)
  95 {
  96     lfe_fir_float_c(pcm_samples, lfe_samples, filter_coeff, npcmblocks, 1);
  97 }
  98
  99 static void lfe_x96_float_c(float *dst, const float *src,
 100                             float *hist, ptrdiff_t len)
 101 {
 102     float prev = *hist;
 103     int i;
 104
 105     for (i = 0; i < len; i++) {
 106         float a = 0.25f * src[i] + 0.75f * prev;
 107         float b = 0.75f * src[i] + 0.25f * prev;
 108         prev = src[i];
 109         *dst++ = a;
 110         *dst++ = b;
 111     }
 112
 113     *hist = prev;
 114 }
 115
 116 static void sub_qmf32_float_c(SynthFilterContext *synth,
 117                               FFTContext *imdct,
 118                               float *pcm_samples,
 119                               int32_t **subband_samples_lo,
 120                               int32_t **subband_samples_hi,
 121                               float *hist1, int *offset, float *hist2,
 122                               const float *filter_coeff, ptrdiff_t npcmblocks,
 123                               float scale)
 124 {
 125     LOCAL_ALIGNED_32(float, input, [32]);
 126     int i, j;
 127
 128     for (j = 0; j < npcmblocks; j++) {
 129         // Load in one sample from each subband
 130         for (i = 0; i < 32; i++) {
 131             if ((i - 1) & 2)
 132                 input[i] = -subband_samples_lo[i][j];
 133             else
 134                 input[i] =  subband_samples_lo[i][j];
 135         }
 136
 137         // One subband sample generates 32 interpolated ones
 138         synth->synth_filter_float(imdct, hist1, offset,
 139                                   hist2, filter_coeff,
 140                                   pcm_samples, input, scale);
 141         pcm_samples += 32;
 142     }
 143 }
 144
 145 static void sub_qmf64_float_c(SynthFilterContext *synth,
 146                               FFTContext *imdct,
 147                               float *pcm_samples,
 148                               int32_t **subband_samples_lo,
 149                               int32_t **subband_samples_hi,
 150                               float *hist1, int *offset, float *hist2,
 151                               const float *filter_coeff, ptrdiff_t npcmblocks,
 152                               float scale)
 153 {
 154     LOCAL_ALIGNED_32(float, input, [64]);
 155     int i, j;
 156
 157     if (!subband_samples_hi)
 158         memset(&input[32], 0, sizeof(input[0]) * 32);
 159
 160     for (j = 0; j < npcmblocks; j++) {
 161         // Load in one sample from each subband
 162         if (subband_samples_hi) {
 163             // Full 64 subbands, first 32 are residual coded
 164             for (i =  0; i < 32; i++) {
 165                 if ((i - 1) & 2)
 166                     input[i] = -subband_samples_lo[i][j] - subband_samples_hi[i][j];
 167                 else
 168                     input[i] =  subband_samples_lo[i][j] + subband_samples_hi[i][j];
 169             }
 170             for (i = 32; i < 64; i++) {
 171                 if ((i - 1) & 2)
 172                     input[i] = -subband_samples_hi[i][j];
 173                 else
 174                     input[i] =  subband_samples_hi[i][j];
 175             }
 176         } else {
 177             // Only first 32 subbands
 178             for (i =  0; i < 32; i++) {
 179                 if ((i - 1) & 2)
 180                     input[i] = -subband_samples_lo[i][j];
 181                 else
 182                     input[i] =  subband_samples_lo[i][j];
 183             }
 184         }
 185
 186         // One subband sample generates 64 interpolated ones
 187         synth->synth_filter_float_64(imdct, hist1, offset,
 188                                      hist2, filter_coeff,
 189                                      pcm_samples, input, scale);
 190         pcm_samples += 64;
 191     }
 192 }
 193
 194 static void lfe_fir_fixed_c(int32_t *pcm_samples, int32_t *lfe_samples,
 195                             const int32_t *filter_coeff, ptrdiff_t npcmblocks)
 196 {
 197     // Select decimation factor
 198     int nlfesamples = npcmblocks >> 1;
 199     int i, j, k;
 200
 201     for (i = 0; i < nlfesamples; i++) {
 202         // One decimated sample generates 64 interpolated ones
 203         for (j = 0; j < 32; j++) {
 204             int64_t a = 0;
 205             int64_t b = 0;
 206
 207             for (k = 0; k < 8; k++) {
 208                 a += (int64_t)filter_coeff[      j * 8 + k] * lfe_samples[-k];
 209                 b += (int64_t)filter_coeff[255 - j * 8 - k] * lfe_samples[-k];
 210             }
 211
 212             pcm_samples[     j] = clip23(norm23(a));
 213             pcm_samples[32 + j] = clip23(norm23(b));
 214         }
 215
 216         lfe_samples++;
 217         pcm_samples += 64;
 218     }
 219 }
 220
 221 static void lfe_x96_fixed_c(int32_t *dst, const int32_t *src,
 222                             int32_t *hist, ptrdiff_t len)
 223 {
 224     int32_t prev = *hist;
 225     int i;
 226
 227     for (i = 0; i < len; i++) {
 228         int64_t a = INT64_C(2097471) * src[i] + INT64_C(6291137) * prev;
 229         int64_t b = INT64_C(6291137) * src[i] + INT64_C(2097471) * prev;
 230         prev = src[i];
 231         *dst++ = clip23(norm23(a));
 232         *dst++ = clip23(norm23(b));
 233     }
 234
 235     *hist = prev;
 236 }
 237
 238 static void sub_qmf32_fixed_c(SynthFilterContext *synth,
 239                               DCADCTContext *imdct,
 240                               int32_t *pcm_samples,
 241                               int32_t **subband_samples_lo,
 242                               int32_t **subband_samples_hi,
 243                               int32_t *hist1, int *offset, int32_t *hist2,
 244                               const int32_t *filter_coeff, ptrdiff_t npcmblocks)
 245 {
 246     LOCAL_ALIGNED_32(int32_t, input, [32]);
 247     int i, j;
 248
 249     for (j = 0; j < npcmblocks; j++) {
 250         // Load in one sample from each subband
 251         for (i = 0; i < 32; i++)
 252             input[i] = subband_samples_lo[i][j];
 253
 254         // One subband sample generates 32 interpolated ones
 255         synth->synth_filter_fixed(imdct, hist1, offset,
 256                                   hist2, filter_coeff,
 257                                   pcm_samples, input);
 258         pcm_samples += 32;
 259     }
 260 }
 261
 262 static void sub_qmf64_fixed_c(SynthFilterContext *synth,
 263                               DCADCTContext *imdct,
 264                               int32_t *pcm_samples,
 265                               int32_t **subband_samples_lo,
 266                               int32_t **subband_samples_hi,
 267                               int32_t *hist1, int *offset, int32_t *hist2,
 268                               const int32_t *filter_coeff, ptrdiff_t npcmblocks)
 269 {
 270     LOCAL_ALIGNED_32(int32_t, input, [64]);
 271     int i, j;
 272
 273     if (!subband_samples_hi)
 274         memset(&input[32], 0, sizeof(input[0]) * 32);
 275
 276     for (j = 0; j < npcmblocks; j++) {
 277         // Load in one sample from each subband
 278         if (subband_samples_hi) {
 279             // Full 64 subbands, first 32 are residual coded
 280             for (i =  0; i < 32; i++)
 281                 input[i] = subband_samples_lo[i][j] + subband_samples_hi[i][j];
 282             for (i = 32; i < 64; i++)
 283                 input[i] = subband_samples_hi[i][j];
 284         } else {
 285             // Only first 32 subbands
 286             for (i =  0; i < 32; i++)
 287                 input[i] = subband_samples_lo[i][j];
 288         }
 289
 290         // One subband sample generates 64 interpolated ones
 291         synth->synth_filter_fixed_64(imdct, hist1, offset,
 292                                      hist2, filter_coeff,
 293                                      pcm_samples, input);
 294         pcm_samples += 64;
 295     }
 296 }
 297
 298 static void decor_c(int32_t *dst, const int32_t *src, int coeff, ptrdiff_t len)
 299 {
 300     int i;
 301
 302     for (i = 0; i < len; i++)
 303         dst[i] += (SUINT)((int)(src[i] * (SUINT)coeff + (1 << 2)) >> 3);
 304 }
 305
 306 static void dmix_sub_xch_c(int32_t *dst1, int32_t *dst2,
 307                            const int32_t *src, ptrdiff_t len)
 308 {
 309     int i;
 310
 311     for (i = 0; i < len; i++) {
 312         int32_t cs = mul23(src[i], 5931520 /* M_SQRT1_2 * (1 << 23) */);
 313         dst1[i] -= cs;
 314         dst2[i] -= cs;
 315     }
 316 }
 317
 318 static void dmix_sub_c(int32_t *dst, const int32_t *src, int coeff, ptrdiff_t len)
 319 {
 320     int i;
 321
 322     for (i = 0; i < len; i++)
 323         dst[i] -= (unsigned)mul15(src[i], coeff);
 324 }
 325
 326 static void dmix_add_c(int32_t *dst, const int32_t *src, int coeff, ptrdiff_t len)
 327 {
 328     int i;
 329
 330     for (i = 0; i < len; i++)
 331         dst[i] += mul15(src[i], coeff);
 332 }
 333
 334 static void dmix_scale_c(int32_t *dst, int scale, ptrdiff_t len)
 335 {
 336     int i;
 337
 338     for (i = 0; i < len; i++)
 339         dst[i] = mul15(dst[i], scale);
 340 }
 341
 342 static void dmix_scale_inv_c(int32_t *dst, int scale_inv, ptrdiff_t len)
 343 {
 344     int i;
 345
 346     for (i = 0; i < len; i++)
 347         dst[i] = mul16(dst[i], scale_inv);
 348 }
 349
 350 static void filter0(SUINT32 *dst, const int32_t *src, int32_t coeff, ptrdiff_t len)
 351 {
 352     int i;
 353
 354     for (i = 0; i < len; i++)
 355         dst[i] -= mul22(src[i], coeff);
 356 }
 357
 358 static void filter1(SUINT32 *dst, const int32_t *src, int32_t coeff, ptrdiff_t len)
 359 {
 360     int i;
 361
 362     for (i = 0; i < len; i++)
 363         dst[i] -= mul23(src[i], coeff);
 364 }
 365
 366 static void assemble_freq_bands_c(int32_t *dst, int32_t *src0, int32_t *src1,
 367                                   const int32_t *coeff, ptrdiff_t len)
 368 {
 369     int i;
 370
 371     filter0(src0, src1, coeff[0], len);
 372     filter0(src1, src0, coeff[1], len);
 373     filter0(src0, src1, coeff[2], len);
 374     filter0(src1, src0, coeff[3], len);
 375
 376     for (i = 0; i < 8; i++, src0--) {
 377         filter1(src0, src1, coeff[i +  4], len);
 378         filter1(src1, src0, coeff[i + 12], len);
 379         filter1(src0, src1, coeff[i +  4], len);
 380     }
 381
 382     for (i = 0; i < len; i++) {
 383         *dst++ = *src1++;
 384         *dst++ = *++src0;
 385     }
 386 }
 387
 388 static void lbr_bank_c(float output[32][4], float **input,
 389                        const float *coeff, ptrdiff_t ofs, ptrdiff_t len)
 390 {
 391     float SW0 = coeff[0];
 392     float SW1 = coeff[1];
 393     float SW2 = coeff[2];
 394     float SW3 = coeff[3];
 395
 396     float C1  = coeff[4];
 397     float C2  = coeff[5];
 398     float C3  = coeff[6];
 399     float C4  = coeff[7];
 400
 401     float AL1 = coeff[8];
 402     float AL2 = coeff[9];
 403
 404     int i;
 405
 406     // Short window and 8 point forward MDCT
 407     for (i = 0; i < len; i++) {
 408         float *src = input[i] + ofs;
 409
 410         float a = src[-4] * SW0 - src[-1] * SW3;
 411         float b = src[-3] * SW1 - src[-2] * SW2;
 412         float c = src[ 2] * SW1 + src[ 1] * SW2;
 413         float d = src[ 3] * SW0 + src[ 0] * SW3;
 414
 415         output[i][0] = C1 * b - C2 * c + C4 * a - C3 * d;
 416         output[i][1] = C1 * d - C2 * a - C4 * b - C3 * c;
 417         output[i][2] = C3 * b + C2 * d - C4 * c + C1 * a;
 418         output[i][3] = C3 * a - C2 * b + C4 * d - C1 * c;
 419     }
 420
 421     // Aliasing cancellation for high frequencies
 422     for (i = 12; i < len - 1; i++) {
 423         float a = output[i  ][3] * AL1;
 424         float b = output[i+1][0] * AL1;
 425         output[i  ][3] += b - a;
 426         output[i+1][0] -= b + a;
 427         a = output[i  ][2] * AL2;
 428         b = output[i+1][1] * AL2;
 429         output[i  ][2] += b - a;
 430         output[i+1][1] -= b + a;
 431     }
 432 }
 433
 434 static void lfe_iir_c(float *output, const float *input,
 435                       const float iir[5][4], float hist[5][2],
 436                       ptrdiff_t factor)
 437 {
 438     float res, tmp;
 439     int i, j, k;
 440
 441     for (i = 0; i < 64; i++) {
 442         res = *input++;
 443
 444         for (j = 0; j < factor; j++) {
 445             for (k = 0; k < 5; k++) {
 446                 tmp = hist[k][0] * iir[k][0] + hist[k][1] * iir[k][1] + res;
 447                 res = hist[k][0] * iir[k][2] + hist[k][1] * iir[k][3] + tmp;
 448
 449                 hist[k][0] = hist[k][1];
 450                 hist[k][1] = tmp;
 451             }
 452
 453             *output++ = res;
 454             res = 0;
 455         }
 456     }
 457 }
 458
 459 av_cold void ff_dcadsp_init(DCADSPContext *s)
 460 {
 461     s->decode_hf     = decode_hf_c;
 462     s->decode_joint  = decode_joint_c;
 463
 464     s->lfe_fir_float[0] = lfe_fir0_float_c;
 465     s->lfe_fir_float[1] = lfe_fir1_float_c;
 466     s->lfe_x96_float    = lfe_x96_float_c;
 467     s->sub_qmf_float[0] = sub_qmf32_float_c;
 468     s->sub_qmf_float[1] = sub_qmf64_float_c;
 469
 470     s->lfe_fir_fixed    = lfe_fir_fixed_c;
 471     s->lfe_x96_fixed    = lfe_x96_fixed_c;
 472     s->sub_qmf_fixed[0] = sub_qmf32_fixed_c;
 473     s->sub_qmf_fixed[1] = sub_qmf64_fixed_c;
 474
 475     s->decor   = decor_c;
 476
 477     s->dmix_sub_xch   = dmix_sub_xch_c;
 478     s->dmix_sub       = dmix_sub_c;
 479     s->dmix_add       = dmix_add_c;
 480     s->dmix_scale     = dmix_scale_c;
 481     s->dmix_scale_inv = dmix_scale_inv_c;
 482
 483     s->assemble_freq_bands = assemble_freq_bands_c;
 484
 485     s->lbr_bank = lbr_bank_c;
 486     s->lfe_iir = lfe_iir_c;
 487
 488     if (ARCH_X86)
 489         ff_dcadsp_init_x86(s);
 490 }