git.sesse.net Git - ffmpeg/blob - libavcodec/dcadsp.c

   1 /*
   2  * Copyright (C) 2016 foo86
   3  *
   4  * This file is part of FFmpeg.
   5  *
   6  * FFmpeg is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2.1 of the License, or (at your option) any later version.
  10  *
  11  * FFmpeg is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with FFmpeg; if not, write to the Free Software
  18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19  */
  20
  21 #include "libavutil/mem.h"
  22 #include "libavutil/mem_internal.h"
  23
  24 #include "dcadsp.h"
  25 #include "dcamath.h"
  26
  27 static void decode_hf_c(int32_t **dst,
  28                         const int32_t *vq_index,
  29                         const int8_t hf_vq[1024][32],
  30                         int32_t scale_factors[32][2],
  31                         ptrdiff_t sb_start, ptrdiff_t sb_end,
  32                         ptrdiff_t ofs, ptrdiff_t len)
  33 {
  34     int i, j;
  35
  36     for (i = sb_start; i < sb_end; i++) {
  37         const int8_t *coeff = hf_vq[vq_index[i]];
  38         int32_t scale = scale_factors[i][0];
  39         for (j = 0; j < len; j++)
  40             dst[i][j + ofs] = clip23(coeff[j] * scale + (1 << 3) >> 4);
  41     }
  42 }
  43
  44 static void decode_joint_c(int32_t **dst, int32_t **src,
  45                            const int32_t *scale_factors,
  46                            ptrdiff_t sb_start, ptrdiff_t sb_end,
  47                            ptrdiff_t ofs, ptrdiff_t len)
  48 {
  49     int i, j;
  50
  51     for (i = sb_start; i < sb_end; i++) {
  52         int32_t scale = scale_factors[i];
  53         for (j = 0; j < len; j++)
  54             dst[i][j + ofs] = clip23(mul17(src[i][j + ofs], scale));
  55     }
  56 }
  57
  58 static void lfe_fir_float_c(float *pcm_samples, int32_t *lfe_samples,
  59                             const float *filter_coeff, ptrdiff_t npcmblocks,
  60                             int dec_select)
  61 {
  62     // Select decimation factor
  63     int factor = 64 << dec_select;
  64     int ncoeffs = 8 >> dec_select;
  65     int nlfesamples = npcmblocks >> (dec_select + 1);
  66     int i, j, k;
  67
  68     for (i = 0; i < nlfesamples; i++) {
  69         // One decimated sample generates 64 or 128 interpolated ones
  70         for (j = 0; j < factor / 2; j++) {
  71             float a = 0;
  72             float b = 0;
  73
  74             for (k = 0; k < ncoeffs; k++) {
  75                 a += filter_coeff[      j * ncoeffs + k] * lfe_samples[-k];
  76                 b += filter_coeff[255 - j * ncoeffs - k] * lfe_samples[-k];
  77             }
  78
  79             pcm_samples[             j] = a;
  80             pcm_samples[factor / 2 + j] = b;
  81         }
  82
  83         lfe_samples++;
  84         pcm_samples += factor;
  85     }
  86 }
  87
  88 static void lfe_fir0_float_c(float *pcm_samples, int32_t *lfe_samples,
  89                              const float *filter_coeff, ptrdiff_t npcmblocks)
  90 {
  91     lfe_fir_float_c(pcm_samples, lfe_samples, filter_coeff, npcmblocks, 0);
  92 }
  93
  94 static void lfe_fir1_float_c(float *pcm_samples, int32_t *lfe_samples,
  95                              const float *filter_coeff, ptrdiff_t npcmblocks)
  96 {
  97     lfe_fir_float_c(pcm_samples, lfe_samples, filter_coeff, npcmblocks, 1);
  98 }
  99
 100 static void lfe_x96_float_c(float *dst, const float *src,
 101                             float *hist, ptrdiff_t len)
 102 {
 103     float prev = *hist;
 104     int i;
 105
 106     for (i = 0; i < len; i++) {
 107         float a = 0.25f * src[i] + 0.75f * prev;
 108         float b = 0.75f * src[i] + 0.25f * prev;
 109         prev = src[i];
 110         *dst++ = a;
 111         *dst++ = b;
 112     }
 113
 114     *hist = prev;
 115 }
 116
 117 static void sub_qmf32_float_c(SynthFilterContext *synth,
 118                               FFTContext *imdct,
 119                               float *pcm_samples,
 120                               int32_t **subband_samples_lo,
 121                               int32_t **subband_samples_hi,
 122                               float *hist1, int *offset, float *hist2,
 123                               const float *filter_coeff, ptrdiff_t npcmblocks,
 124                               float scale)
 125 {
 126     LOCAL_ALIGNED_32(float, input, [32]);
 127     int i, j;
 128
 129     for (j = 0; j < npcmblocks; j++) {
 130         // Load in one sample from each subband
 131         for (i = 0; i < 32; i++) {
 132             if ((i - 1) & 2)
 133                 input[i] = -subband_samples_lo[i][j];
 134             else
 135                 input[i] =  subband_samples_lo[i][j];
 136         }
 137
 138         // One subband sample generates 32 interpolated ones
 139         synth->synth_filter_float(imdct, hist1, offset,
 140                                   hist2, filter_coeff,
 141                                   pcm_samples, input, scale);
 142         pcm_samples += 32;
 143     }
 144 }
 145
 146 static void sub_qmf64_float_c(SynthFilterContext *synth,
 147                               FFTContext *imdct,
 148                               float *pcm_samples,
 149                               int32_t **subband_samples_lo,
 150                               int32_t **subband_samples_hi,
 151                               float *hist1, int *offset, float *hist2,
 152                               const float *filter_coeff, ptrdiff_t npcmblocks,
 153                               float scale)
 154 {
 155     LOCAL_ALIGNED_32(float, input, [64]);
 156     int i, j;
 157
 158     if (!subband_samples_hi)
 159         memset(&input[32], 0, sizeof(input[0]) * 32);
 160
 161     for (j = 0; j < npcmblocks; j++) {
 162         // Load in one sample from each subband
 163         if (subband_samples_hi) {
 164             // Full 64 subbands, first 32 are residual coded
 165             for (i =  0; i < 32; i++) {
 166                 if ((i - 1) & 2)
 167                     input[i] = -subband_samples_lo[i][j] - subband_samples_hi[i][j];
 168                 else
 169                     input[i] =  subband_samples_lo[i][j] + subband_samples_hi[i][j];
 170             }
 171             for (i = 32; i < 64; i++) {
 172                 if ((i - 1) & 2)
 173                     input[i] = -subband_samples_hi[i][j];
 174                 else
 175                     input[i] =  subband_samples_hi[i][j];
 176             }
 177         } else {
 178             // Only first 32 subbands
 179             for (i =  0; i < 32; i++) {
 180                 if ((i - 1) & 2)
 181                     input[i] = -subband_samples_lo[i][j];
 182                 else
 183                     input[i] =  subband_samples_lo[i][j];
 184             }
 185         }
 186
 187         // One subband sample generates 64 interpolated ones
 188         synth->synth_filter_float_64(imdct, hist1, offset,
 189                                      hist2, filter_coeff,
 190                                      pcm_samples, input, scale);
 191         pcm_samples += 64;
 192     }
 193 }
 194
 195 static void lfe_fir_fixed_c(int32_t *pcm_samples, int32_t *lfe_samples,
 196                             const int32_t *filter_coeff, ptrdiff_t npcmblocks)
 197 {
 198     // Select decimation factor
 199     int nlfesamples = npcmblocks >> 1;
 200     int i, j, k;
 201
 202     for (i = 0; i < nlfesamples; i++) {
 203         // One decimated sample generates 64 interpolated ones
 204         for (j = 0; j < 32; j++) {
 205             int64_t a = 0;
 206             int64_t b = 0;
 207
 208             for (k = 0; k < 8; k++) {
 209                 a += (int64_t)filter_coeff[      j * 8 + k] * lfe_samples[-k];
 210                 b += (int64_t)filter_coeff[255 - j * 8 - k] * lfe_samples[-k];
 211             }
 212
 213             pcm_samples[     j] = clip23(norm23(a));
 214             pcm_samples[32 + j] = clip23(norm23(b));
 215         }
 216
 217         lfe_samples++;
 218         pcm_samples += 64;
 219     }
 220 }
 221
 222 static void lfe_x96_fixed_c(int32_t *dst, const int32_t *src,
 223                             int32_t *hist, ptrdiff_t len)
 224 {
 225     int32_t prev = *hist;
 226     int i;
 227
 228     for (i = 0; i < len; i++) {
 229         int64_t a = INT64_C(2097471) * src[i] + INT64_C(6291137) * prev;
 230         int64_t b = INT64_C(6291137) * src[i] + INT64_C(2097471) * prev;
 231         prev = src[i];
 232         *dst++ = clip23(norm23(a));
 233         *dst++ = clip23(norm23(b));
 234     }
 235
 236     *hist = prev;
 237 }
 238
 239 static void sub_qmf32_fixed_c(SynthFilterContext *synth,
 240                               DCADCTContext *imdct,
 241                               int32_t *pcm_samples,
 242                               int32_t **subband_samples_lo,
 243                               int32_t **subband_samples_hi,
 244                               int32_t *hist1, int *offset, int32_t *hist2,
 245                               const int32_t *filter_coeff, ptrdiff_t npcmblocks)
 246 {
 247     LOCAL_ALIGNED_32(int32_t, input, [32]);
 248     int i, j;
 249
 250     for (j = 0; j < npcmblocks; j++) {
 251         // Load in one sample from each subband
 252         for (i = 0; i < 32; i++)
 253             input[i] = subband_samples_lo[i][j];
 254
 255         // One subband sample generates 32 interpolated ones
 256         synth->synth_filter_fixed(imdct, hist1, offset,
 257                                   hist2, filter_coeff,
 258                                   pcm_samples, input);
 259         pcm_samples += 32;
 260     }
 261 }
 262
 263 static void sub_qmf64_fixed_c(SynthFilterContext *synth,
 264                               DCADCTContext *imdct,
 265                               int32_t *pcm_samples,
 266                               int32_t **subband_samples_lo,
 267                               int32_t **subband_samples_hi,
 268                               int32_t *hist1, int *offset, int32_t *hist2,
 269                               const int32_t *filter_coeff, ptrdiff_t npcmblocks)
 270 {
 271     LOCAL_ALIGNED_32(int32_t, input, [64]);
 272     int i, j;
 273
 274     if (!subband_samples_hi)
 275         memset(&input[32], 0, sizeof(input[0]) * 32);
 276
 277     for (j = 0; j < npcmblocks; j++) {
 278         // Load in one sample from each subband
 279         if (subband_samples_hi) {
 280             // Full 64 subbands, first 32 are residual coded
 281             for (i =  0; i < 32; i++)
 282                 input[i] = subband_samples_lo[i][j] + subband_samples_hi[i][j];
 283             for (i = 32; i < 64; i++)
 284                 input[i] = subband_samples_hi[i][j];
 285         } else {
 286             // Only first 32 subbands
 287             for (i =  0; i < 32; i++)
 288                 input[i] = subband_samples_lo[i][j];
 289         }
 290
 291         // One subband sample generates 64 interpolated ones
 292         synth->synth_filter_fixed_64(imdct, hist1, offset,
 293                                      hist2, filter_coeff,
 294                                      pcm_samples, input);
 295         pcm_samples += 64;
 296     }
 297 }
 298
 299 static void decor_c(int32_t *dst, const int32_t *src, int coeff, ptrdiff_t len)
 300 {
 301     int i;
 302
 303     for (i = 0; i < len; i++)
 304         dst[i] += (SUINT)((int)(src[i] * (SUINT)coeff + (1 << 2)) >> 3);
 305 }
 306
 307 static void dmix_sub_xch_c(int32_t *dst1, int32_t *dst2,
 308                            const int32_t *src, ptrdiff_t len)
 309 {
 310     int i;
 311
 312     for (i = 0; i < len; i++) {
 313         int32_t cs = mul23(src[i], 5931520 /* M_SQRT1_2 * (1 << 23) */);
 314         dst1[i] -= cs;
 315         dst2[i] -= cs;
 316     }
 317 }
 318
 319 static void dmix_sub_c(int32_t *dst, const int32_t *src, int coeff, ptrdiff_t len)
 320 {
 321     int i;
 322
 323     for (i = 0; i < len; i++)
 324         dst[i] -= (unsigned)mul15(src[i], coeff);
 325 }
 326
 327 static void dmix_add_c(int32_t *dst, const int32_t *src, int coeff, ptrdiff_t len)
 328 {
 329     int i;
 330
 331     for (i = 0; i < len; i++)
 332         dst[i] += (unsigned)mul15(src[i], coeff);
 333 }
 334
 335 static void dmix_scale_c(int32_t *dst, int scale, ptrdiff_t len)
 336 {
 337     int i;
 338
 339     for (i = 0; i < len; i++)
 340         dst[i] = mul15(dst[i], scale);
 341 }
 342
 343 static void dmix_scale_inv_c(int32_t *dst, int scale_inv, ptrdiff_t len)
 344 {
 345     int i;
 346
 347     for (i = 0; i < len; i++)
 348         dst[i] = mul16(dst[i], scale_inv);
 349 }
 350
 351 static void filter0(SUINT32 *dst, const int32_t *src, int32_t coeff, ptrdiff_t len)
 352 {
 353     int i;
 354
 355     for (i = 0; i < len; i++)
 356         dst[i] -= mul22(src[i], coeff);
 357 }
 358
 359 static void filter1(SUINT32 *dst, const int32_t *src, int32_t coeff, ptrdiff_t len)
 360 {
 361     int i;
 362
 363     for (i = 0; i < len; i++)
 364         dst[i] -= mul23(src[i], coeff);
 365 }
 366
 367 static void assemble_freq_bands_c(int32_t *dst, int32_t *src0, int32_t *src1,
 368                                   const int32_t *coeff, ptrdiff_t len)
 369 {
 370     int i;
 371
 372     filter0(src0, src1, coeff[0], len);
 373     filter0(src1, src0, coeff[1], len);
 374     filter0(src0, src1, coeff[2], len);
 375     filter0(src1, src0, coeff[3], len);
 376
 377     for (i = 0; i < 8; i++, src0--) {
 378         filter1(src0, src1, coeff[i +  4], len);
 379         filter1(src1, src0, coeff[i + 12], len);
 380         filter1(src0, src1, coeff[i +  4], len);
 381     }
 382
 383     for (i = 0; i < len; i++) {
 384         *dst++ = *src1++;
 385         *dst++ = *++src0;
 386     }
 387 }
 388
 389 static void lbr_bank_c(float output[32][4], float **input,
 390                        const float *coeff, ptrdiff_t ofs, ptrdiff_t len)
 391 {
 392     float SW0 = coeff[0];
 393     float SW1 = coeff[1];
 394     float SW2 = coeff[2];
 395     float SW3 = coeff[3];
 396
 397     float C1  = coeff[4];
 398     float C2  = coeff[5];
 399     float C3  = coeff[6];
 400     float C4  = coeff[7];
 401
 402     float AL1 = coeff[8];
 403     float AL2 = coeff[9];
 404
 405     int i;
 406
 407     // Short window and 8 point forward MDCT
 408     for (i = 0; i < len; i++) {
 409         float *src = input[i] + ofs;
 410
 411         float a = src[-4] * SW0 - src[-1] * SW3;
 412         float b = src[-3] * SW1 - src[-2] * SW2;
 413         float c = src[ 2] * SW1 + src[ 1] * SW2;
 414         float d = src[ 3] * SW0 + src[ 0] * SW3;
 415
 416         output[i][0] = C1 * b - C2 * c + C4 * a - C3 * d;
 417         output[i][1] = C1 * d - C2 * a - C4 * b - C3 * c;
 418         output[i][2] = C3 * b + C2 * d - C4 * c + C1 * a;
 419         output[i][3] = C3 * a - C2 * b + C4 * d - C1 * c;
 420     }
 421
 422     // Aliasing cancellation for high frequencies
 423     for (i = 12; i < len - 1; i++) {
 424         float a = output[i  ][3] * AL1;
 425         float b = output[i+1][0] * AL1;
 426         output[i  ][3] += b - a;
 427         output[i+1][0] -= b + a;
 428         a = output[i  ][2] * AL2;
 429         b = output[i+1][1] * AL2;
 430         output[i  ][2] += b - a;
 431         output[i+1][1] -= b + a;
 432     }
 433 }
 434
 435 static void lfe_iir_c(float *output, const float *input,
 436                       const float iir[5][4], float hist[5][2],
 437                       ptrdiff_t factor)
 438 {
 439     float res, tmp;
 440     int i, j, k;
 441
 442     for (i = 0; i < 64; i++) {
 443         res = *input++;
 444
 445         for (j = 0; j < factor; j++) {
 446             for (k = 0; k < 5; k++) {
 447                 tmp = hist[k][0] * iir[k][0] + hist[k][1] * iir[k][1] + res;
 448                 res = hist[k][0] * iir[k][2] + hist[k][1] * iir[k][3] + tmp;
 449
 450                 hist[k][0] = hist[k][1];
 451                 hist[k][1] = tmp;
 452             }
 453
 454             *output++ = res;
 455             res = 0;
 456         }
 457     }
 458 }
 459
 460 av_cold void ff_dcadsp_init(DCADSPContext *s)
 461 {
 462     s->decode_hf     = decode_hf_c;
 463     s->decode_joint  = decode_joint_c;
 464
 465     s->lfe_fir_float[0] = lfe_fir0_float_c;
 466     s->lfe_fir_float[1] = lfe_fir1_float_c;
 467     s->lfe_x96_float    = lfe_x96_float_c;
 468     s->sub_qmf_float[0] = sub_qmf32_float_c;
 469     s->sub_qmf_float[1] = sub_qmf64_float_c;
 470
 471     s->lfe_fir_fixed    = lfe_fir_fixed_c;
 472     s->lfe_x96_fixed    = lfe_x96_fixed_c;
 473     s->sub_qmf_fixed[0] = sub_qmf32_fixed_c;
 474     s->sub_qmf_fixed[1] = sub_qmf64_fixed_c;
 475
 476     s->decor   = decor_c;
 477
 478     s->dmix_sub_xch   = dmix_sub_xch_c;
 479     s->dmix_sub       = dmix_sub_c;
 480     s->dmix_add       = dmix_add_c;
 481     s->dmix_scale     = dmix_scale_c;
 482     s->dmix_scale_inv = dmix_scale_inv_c;
 483
 484     s->assemble_freq_bands = assemble_freq_bands_c;
 485
 486     s->lbr_bank = lbr_bank_c;
 487     s->lfe_iir = lfe_iir_c;
 488
 489     if (ARCH_X86)
 490         ff_dcadsp_init_x86(s);
 491 }