git.sesse.net Git - ffmpeg/blob - libavcodec/ac3dsp.c

   1 /*
   2  * AC-3 DSP functions
   3  * Copyright (c) 2011 Justin Ruggles
   4  *
   5  * This file is part of FFmpeg.
   6  *
   7  * FFmpeg is free software; you can redistribute it and/or
   8  * modify it under the terms of the GNU Lesser General Public
   9  * License as published by the Free Software Foundation; either
  10  * version 2.1 of the License, or (at your option) any later version.
  11  *
  12  * FFmpeg is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15  * Lesser General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU Lesser General Public
  18  * License along with FFmpeg; if not, write to the Free Software
  19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20  */
  21
  22 #include "libavutil/mem_internal.h"
  23
  24 #include "avcodec.h"
  25 #include "ac3.h"
  26 #include "ac3dsp.h"
  27 #include "mathops.h"
  28
  29 static void ac3_exponent_min_c(uint8_t *exp, int num_reuse_blocks, int nb_coefs)
  30 {
  31     int blk, i;
  32
  33     if (!num_reuse_blocks)
  34         return;
  35
  36     for (i = 0; i < nb_coefs; i++) {
  37         uint8_t min_exp = *exp;
  38         uint8_t *exp1 = exp + 256;
  39         for (blk = 0; blk < num_reuse_blocks; blk++) {
  40             uint8_t next_exp = *exp1;
  41             if (next_exp < min_exp)
  42                 min_exp = next_exp;
  43             exp1 += 256;
  44         }
  45         *exp++ = min_exp;
  46     }
  47 }
  48
  49 static int ac3_max_msb_abs_int16_c(const int16_t *src, int len)
  50 {
  51     int i, v = 0;
  52     for (i = 0; i < len; i++)
  53         v |= abs(src[i]);
  54     return v;
  55 }
  56
  57 static void ac3_lshift_int16_c(int16_t *src, unsigned int len,
  58                                unsigned int shift)
  59 {
  60     uint32_t *src32 = (uint32_t *)src;
  61     const uint32_t mask = ~(((1 << shift) - 1) << 16);
  62     int i;
  63     len >>= 1;
  64     for (i = 0; i < len; i += 8) {
  65         src32[i  ] = (src32[i  ] << shift) & mask;
  66         src32[i+1] = (src32[i+1] << shift) & mask;
  67         src32[i+2] = (src32[i+2] << shift) & mask;
  68         src32[i+3] = (src32[i+3] << shift) & mask;
  69         src32[i+4] = (src32[i+4] << shift) & mask;
  70         src32[i+5] = (src32[i+5] << shift) & mask;
  71         src32[i+6] = (src32[i+6] << shift) & mask;
  72         src32[i+7] = (src32[i+7] << shift) & mask;
  73     }
  74 }
  75
  76 static void ac3_rshift_int32_c(int32_t *src, unsigned int len,
  77                                unsigned int shift)
  78 {
  79     do {
  80         *src++ >>= shift;
  81         *src++ >>= shift;
  82         *src++ >>= shift;
  83         *src++ >>= shift;
  84         *src++ >>= shift;
  85         *src++ >>= shift;
  86         *src++ >>= shift;
  87         *src++ >>= shift;
  88         len -= 8;
  89     } while (len > 0);
  90 }
  91
  92 static void float_to_fixed24_c(int32_t *dst, const float *src, unsigned int len)
  93 {
  94     const float scale = 1 << 24;
  95     do {
  96         *dst++ = lrintf(*src++ * scale);
  97         *dst++ = lrintf(*src++ * scale);
  98         *dst++ = lrintf(*src++ * scale);
  99         *dst++ = lrintf(*src++ * scale);
 100         *dst++ = lrintf(*src++ * scale);
 101         *dst++ = lrintf(*src++ * scale);
 102         *dst++ = lrintf(*src++ * scale);
 103         *dst++ = lrintf(*src++ * scale);
 104         len -= 8;
 105     } while (len > 0);
 106 }
 107
 108 static void ac3_bit_alloc_calc_bap_c(int16_t *mask, int16_t *psd,
 109                                      int start, int end,
 110                                      int snr_offset, int floor,
 111                                      const uint8_t *bap_tab, uint8_t *bap)
 112 {
 113     int bin, band, band_end;
 114
 115     /* special case, if snr offset is -960, set all bap's to zero */
 116     if (snr_offset == -960) {
 117         memset(bap, 0, AC3_MAX_COEFS);
 118         return;
 119     }
 120
 121     bin  = start;
 122     band = ff_ac3_bin_to_band_tab[start];
 123     do {
 124         int m = (FFMAX(mask[band] - snr_offset - floor, 0) & 0x1FE0) + floor;
 125         band_end = ff_ac3_band_start_tab[++band];
 126         band_end = FFMIN(band_end, end);
 127
 128         for (; bin < band_end; bin++) {
 129             int address = av_clip_uintp2((psd[bin] - m) >> 5, 6);
 130             bap[bin] = bap_tab[address];
 131         }
 132     } while (end > band_end);
 133 }
 134
 135 static void ac3_update_bap_counts_c(uint16_t mant_cnt[16], uint8_t *bap,
 136                                     int len)
 137 {
 138     while (len-- > 0)
 139         mant_cnt[bap[len]]++;
 140 }
 141
 142 DECLARE_ALIGNED(16, const uint16_t, ff_ac3_bap_bits)[16] = {
 143     0,  0,  0,  3,  0,  4,  5,  6,  7,  8,  9, 10, 11, 12, 14, 16
 144 };
 145
 146 static int ac3_compute_mantissa_size_c(uint16_t mant_cnt[6][16])
 147 {
 148     int blk, bap;
 149     int bits = 0;
 150
 151     for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
 152         // bap=1 : 3 mantissas in 5 bits
 153         bits += (mant_cnt[blk][1] / 3) * 5;
 154         // bap=2 : 3 mantissas in 7 bits
 155         // bap=4 : 2 mantissas in 7 bits
 156         bits += ((mant_cnt[blk][2] / 3) + (mant_cnt[blk][4] >> 1)) * 7;
 157         // bap=3 : 1 mantissa in 3 bits
 158         bits += mant_cnt[blk][3] * 3;
 159         // bap=5 to 15 : get bits per mantissa from table
 160         for (bap = 5; bap < 16; bap++)
 161             bits += mant_cnt[blk][bap] * ff_ac3_bap_bits[bap];
 162     }
 163     return bits;
 164 }
 165
 166 static void ac3_extract_exponents_c(uint8_t *exp, int32_t *coef, int nb_coefs)
 167 {
 168     int i;
 169
 170     for (i = 0; i < nb_coefs; i++) {
 171         int v = abs(coef[i]);
 172         exp[i] = v ? 23 - av_log2(v) : 24;
 173     }
 174 }
 175
 176 static void ac3_sum_square_butterfly_int32_c(int64_t sum[4],
 177                                              const int32_t *coef0,
 178                                              const int32_t *coef1,
 179                                              int len)
 180 {
 181     int i;
 182
 183     sum[0] = sum[1] = sum[2] = sum[3] = 0;
 184
 185     for (i = 0; i < len; i++) {
 186         int lt = coef0[i];
 187         int rt = coef1[i];
 188         int md = lt + rt;
 189         int sd = lt - rt;
 190         MAC64(sum[0], lt, lt);
 191         MAC64(sum[1], rt, rt);
 192         MAC64(sum[2], md, md);
 193         MAC64(sum[3], sd, sd);
 194     }
 195 }
 196
 197 static void ac3_sum_square_butterfly_float_c(float sum[4],
 198                                              const float *coef0,
 199                                              const float *coef1,
 200                                              int len)
 201 {
 202     int i;
 203
 204     sum[0] = sum[1] = sum[2] = sum[3] = 0;
 205
 206     for (i = 0; i < len; i++) {
 207         float lt = coef0[i];
 208         float rt = coef1[i];
 209         float md = lt + rt;
 210         float sd = lt - rt;
 211         sum[0] += lt * lt;
 212         sum[1] += rt * rt;
 213         sum[2] += md * md;
 214         sum[3] += sd * sd;
 215     }
 216 }
 217
 218 static void ac3_downmix_5_to_2_symmetric_c(float **samples, float **matrix,
 219                                            int len)
 220 {
 221     int i;
 222     float v0, v1;
 223     float front_mix    = matrix[0][0];
 224     float center_mix   = matrix[0][1];
 225     float surround_mix = matrix[0][3];
 226
 227     for (i = 0; i < len; i++) {
 228         v0 = samples[0][i] * front_mix  +
 229              samples[1][i] * center_mix +
 230              samples[3][i] * surround_mix;
 231
 232         v1 = samples[1][i] * center_mix +
 233              samples[2][i] * front_mix  +
 234              samples[4][i] * surround_mix;
 235
 236         samples[0][i] = v0;
 237         samples[1][i] = v1;
 238     }
 239 }
 240
 241 static void ac3_downmix_5_to_1_symmetric_c(float **samples, float **matrix,
 242                                            int len)
 243 {
 244     int i;
 245     float front_mix    = matrix[0][0];
 246     float center_mix   = matrix[0][1];
 247     float surround_mix = matrix[0][3];
 248
 249     for (i = 0; i < len; i++) {
 250         samples[0][i] = samples[0][i] * front_mix    +
 251                         samples[1][i] * center_mix   +
 252                         samples[2][i] * front_mix    +
 253                         samples[3][i] * surround_mix +
 254                         samples[4][i] * surround_mix;
 255     }
 256 }
 257
 258 static void ac3_downmix_c(float **samples, float **matrix,
 259                           int out_ch, int in_ch, int len)
 260 {
 261     int i, j;
 262     float v0, v1;
 263
 264     if (out_ch == 2) {
 265         for (i = 0; i < len; i++) {
 266             v0 = v1 = 0.0f;
 267             for (j = 0; j < in_ch; j++) {
 268                 v0 += samples[j][i] * matrix[0][j];
 269                 v1 += samples[j][i] * matrix[1][j];
 270             }
 271             samples[0][i] = v0;
 272             samples[1][i] = v1;
 273         }
 274     } else if (out_ch == 1) {
 275         for (i = 0; i < len; i++) {
 276             v0 = 0.0f;
 277             for (j = 0; j < in_ch; j++)
 278                 v0 += samples[j][i] * matrix[0][j];
 279             samples[0][i] = v0;
 280         }
 281     }
 282 }
 283
 284 static void ac3_downmix_5_to_2_symmetric_c_fixed(int32_t **samples, int16_t **matrix,
 285                                            int len)
 286 {
 287     int i;
 288     int64_t v0, v1;
 289     int16_t front_mix    = matrix[0][0];
 290     int16_t center_mix   = matrix[0][1];
 291     int16_t surround_mix = matrix[0][3];
 292
 293     for (i = 0; i < len; i++) {
 294         v0 = (int64_t)samples[0][i] * front_mix  +
 295              (int64_t)samples[1][i] * center_mix +
 296              (int64_t)samples[3][i] * surround_mix;
 297
 298         v1 = (int64_t)samples[1][i] * center_mix +
 299              (int64_t)samples[2][i] * front_mix  +
 300              (int64_t)samples[4][i] * surround_mix;
 301
 302         samples[0][i] = (v0+2048)>>12;
 303         samples[1][i] = (v1+2048)>>12;
 304     }
 305 }
 306
 307 static void ac3_downmix_5_to_1_symmetric_c_fixed(int32_t **samples, int16_t **matrix,
 308                                                  int len)
 309 {
 310     int i;
 311     int64_t v0;
 312     int16_t front_mix    = matrix[0][0];
 313     int16_t center_mix   = matrix[0][1];
 314     int16_t surround_mix = matrix[0][3];
 315
 316     for (i = 0; i < len; i++) {
 317         v0 = (int64_t)samples[0][i] * front_mix    +
 318              (int64_t)samples[1][i] * center_mix   +
 319              (int64_t)samples[2][i] * front_mix    +
 320              (int64_t)samples[3][i] * surround_mix +
 321              (int64_t)samples[4][i] * surround_mix;
 322
 323         samples[0][i] = (v0+2048)>>12;
 324     }
 325 }
 326
 327 static void ac3_downmix_c_fixed(int32_t **samples, int16_t **matrix,
 328                                 int out_ch, int in_ch, int len)
 329 {
 330     int i, j;
 331     int64_t v0, v1;
 332     if (out_ch == 2) {
 333         for (i = 0; i < len; i++) {
 334             v0 = v1 = 0;
 335             for (j = 0; j < in_ch; j++) {
 336                 v0 += (int64_t)samples[j][i] * matrix[0][j];
 337                 v1 += (int64_t)samples[j][i] * matrix[1][j];
 338             }
 339             samples[0][i] = (v0+2048)>>12;
 340             samples[1][i] = (v1+2048)>>12;
 341         }
 342     } else if (out_ch == 1) {
 343         for (i = 0; i < len; i++) {
 344             v0 = 0;
 345             for (j = 0; j < in_ch; j++)
 346                 v0 += (int64_t)samples[j][i] * matrix[0][j];
 347             samples[0][i] = (v0+2048)>>12;
 348         }
 349     }
 350 }
 351
 352 void ff_ac3dsp_downmix_fixed(AC3DSPContext *c, int32_t **samples, int16_t **matrix,
 353                              int out_ch, int in_ch, int len)
 354 {
 355     if (c->in_channels != in_ch || c->out_channels != out_ch) {
 356         c->in_channels  = in_ch;
 357         c->out_channels = out_ch;
 358         c->downmix_fixed = NULL;
 359
 360         if (in_ch == 5 && out_ch == 2 &&
 361             !(matrix[1][0] | matrix[0][2]  |
 362               matrix[1][3] | matrix[0][4]  |
 363              (matrix[0][1] ^ matrix[1][1]) |
 364              (matrix[0][0] ^ matrix[1][2]))) {
 365             c->downmix_fixed = ac3_downmix_5_to_2_symmetric_c_fixed;
 366         } else if (in_ch == 5 && out_ch == 1 &&
 367                    matrix[0][0] == matrix[0][2] &&
 368                    matrix[0][3] == matrix[0][4]) {
 369             c->downmix_fixed = ac3_downmix_5_to_1_symmetric_c_fixed;
 370         }
 371     }
 372
 373     if (c->downmix_fixed)
 374         c->downmix_fixed(samples, matrix, len);
 375     else
 376         ac3_downmix_c_fixed(samples, matrix, out_ch, in_ch, len);
 377 }
 378
 379 static void apply_window_int16_c(int16_t *output, const int16_t *input,
 380                                  const int16_t *window, unsigned int len)
 381 {
 382     int i;
 383     int len2 = len >> 1;
 384
 385     for (i = 0; i < len2; i++) {
 386         int16_t w       = window[i];
 387         output[i]       = (MUL16(input[i],       w) + (1 << 14)) >> 15;
 388         output[len-i-1] = (MUL16(input[len-i-1], w) + (1 << 14)) >> 15;
 389     }
 390 }
 391
 392 void ff_ac3dsp_downmix(AC3DSPContext *c, float **samples, float **matrix,
 393                        int out_ch, int in_ch, int len)
 394 {
 395     if (c->in_channels != in_ch || c->out_channels != out_ch) {
 396         int **matrix_cmp = (int **)matrix;
 397
 398         c->in_channels  = in_ch;
 399         c->out_channels = out_ch;
 400         c->downmix      = NULL;
 401
 402         if (in_ch == 5 && out_ch == 2 &&
 403             !(matrix_cmp[1][0] | matrix_cmp[0][2]   |
 404               matrix_cmp[1][3] | matrix_cmp[0][4]   |
 405              (matrix_cmp[0][1] ^ matrix_cmp[1][1]) |
 406              (matrix_cmp[0][0] ^ matrix_cmp[1][2]))) {
 407             c->downmix = ac3_downmix_5_to_2_symmetric_c;
 408         } else if (in_ch == 5 && out_ch == 1 &&
 409                    matrix_cmp[0][0] == matrix_cmp[0][2] &&
 410                    matrix_cmp[0][3] == matrix_cmp[0][4]) {
 411             c->downmix = ac3_downmix_5_to_1_symmetric_c;
 412         }
 413
 414         if (ARCH_X86)
 415             ff_ac3dsp_set_downmix_x86(c);
 416     }
 417
 418     if (c->downmix)
 419         c->downmix(samples, matrix, len);
 420     else
 421         ac3_downmix_c(samples, matrix, out_ch, in_ch, len);
 422 }
 423
 424 av_cold void ff_ac3dsp_init(AC3DSPContext *c, int bit_exact)
 425 {
 426     c->ac3_exponent_min = ac3_exponent_min_c;
 427     c->ac3_max_msb_abs_int16 = ac3_max_msb_abs_int16_c;
 428     c->ac3_lshift_int16 = ac3_lshift_int16_c;
 429     c->ac3_rshift_int32 = ac3_rshift_int32_c;
 430     c->float_to_fixed24 = float_to_fixed24_c;
 431     c->bit_alloc_calc_bap = ac3_bit_alloc_calc_bap_c;
 432     c->update_bap_counts = ac3_update_bap_counts_c;
 433     c->compute_mantissa_size = ac3_compute_mantissa_size_c;
 434     c->extract_exponents = ac3_extract_exponents_c;
 435     c->sum_square_butterfly_int32 = ac3_sum_square_butterfly_int32_c;
 436     c->sum_square_butterfly_float = ac3_sum_square_butterfly_float_c;
 437     c->in_channels           = 0;
 438     c->out_channels          = 0;
 439     c->downmix               = NULL;
 440     c->downmix_fixed         = NULL;
 441     c->apply_window_int16 = apply_window_int16_c;
 442
 443     if (ARCH_ARM)
 444         ff_ac3dsp_init_arm(c, bit_exact);
 445     if (ARCH_X86)
 446         ff_ac3dsp_init_x86(c, bit_exact);
 447     if (ARCH_MIPS)
 448         ff_ac3dsp_init_mips(c, bit_exact);
 449 }