git.sesse.net Git - ffmpeg/blob - libavcodec/aacdec.c

   1 /*
   2  * AAC decoder
   3  * Copyright (c) 2005-2006 Oded Shimon ( ods15 ods15 dyndns org )
   4  * Copyright (c) 2006-2007 Maxim Gavrilov ( maxim.gavrilov gmail com )
   5  * Copyright (c) 2008-2013 Alex Converse <alex.converse@gmail.com>
   6  *
   7  * AAC LATM decoder
   8  * Copyright (c) 2008-2010 Paul Kendall <paul@kcbbs.gen.nz>
   9  * Copyright (c) 2010      Janne Grunau <janne-libav@jannau.net>
  10  *
  11  * This file is part of FFmpeg.
  12  *
  13  * FFmpeg is free software; you can redistribute it and/or
  14  * modify it under the terms of the GNU Lesser General Public
  15  * License as published by the Free Software Foundation; either
  16  * version 2.1 of the License, or (at your option) any later version.
  17  *
  18  * FFmpeg is distributed in the hope that it will be useful,
  19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  21  * Lesser General Public License for more details.
  22  *
  23  * You should have received a copy of the GNU Lesser General Public
  24  * License along with FFmpeg; if not, write to the Free Software
  25  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  26  */
  27
  28 /**
  29  * @file
  30  * AAC decoder
  31  * @author Oded Shimon  ( ods15 ods15 dyndns org )
  32  * @author Maxim Gavrilov ( maxim.gavrilov gmail com )
  33  */
  34
  35 #define FFT_FLOAT 1
  36 #define FFT_FIXED_32 0
  37 #define USE_FIXED 0
  38
  39 #include "libavutil/float_dsp.h"
  40 #include "libavutil/opt.h"
  41 #include "avcodec.h"
  42 #include "internal.h"
  43 #include "get_bits.h"
  44 #include "fft.h"
  45 #include "mdct15.h"
  46 #include "lpc.h"
  47 #include "kbdwin.h"
  48 #include "sinewin.h"
  49
  50 #include "aac.h"
  51 #include "aactab.h"
  52 #include "aacdectab.h"
  53 #include "adts_header.h"
  54 #include "cbrt_data.h"
  55 #include "sbr.h"
  56 #include "aacsbr.h"
  57 #include "mpeg4audio.h"
  58 #include "profiles.h"
  59 #include "libavutil/intfloat.h"
  60
  61 #include <errno.h>
  62 #include <math.h>
  63 #include <stdint.h>
  64 #include <string.h>
  65
  66 #if ARCH_ARM
  67 #   include "arm/aac.h"
  68 #elif ARCH_MIPS
  69 #   include "mips/aacdec_mips.h"
  70 #endif
  71
  72 DECLARE_ALIGNED(32, static INTFLOAT, AAC_RENAME(sine_120))[120];
  73 DECLARE_ALIGNED(32, static INTFLOAT, AAC_RENAME(sine_960))[960];
  74 DECLARE_ALIGNED(32, static INTFLOAT, AAC_RENAME(aac_kbd_long_960))[960];
  75 DECLARE_ALIGNED(32, static INTFLOAT, AAC_RENAME(aac_kbd_short_120))[120];
  76
  77 static av_always_inline void reset_predict_state(PredictorState *ps)
  78 {
  79     ps->r0   = 0.0f;
  80     ps->r1   = 0.0f;
  81     ps->cor0 = 0.0f;
  82     ps->cor1 = 0.0f;
  83     ps->var0 = 1.0f;
  84     ps->var1 = 1.0f;
  85 }
  86
  87 #ifndef VMUL2
  88 static inline float *VMUL2(float *dst, const float *v, unsigned idx,
  89                            const float *scale)
  90 {
  91     float s = *scale;
  92     *dst++ = v[idx    & 15] * s;
  93     *dst++ = v[idx>>4 & 15] * s;
  94     return dst;
  95 }
  96 #endif
  97
  98 #ifndef VMUL4
  99 static inline float *VMUL4(float *dst, const float *v, unsigned idx,
 100                            const float *scale)
 101 {
 102     float s = *scale;
 103     *dst++ = v[idx    & 3] * s;
 104     *dst++ = v[idx>>2 & 3] * s;
 105     *dst++ = v[idx>>4 & 3] * s;
 106     *dst++ = v[idx>>6 & 3] * s;
 107     return dst;
 108 }
 109 #endif
 110
 111 #ifndef VMUL2S
 112 static inline float *VMUL2S(float *dst, const float *v, unsigned idx,
 113                             unsigned sign, const float *scale)
 114 {
 115     union av_intfloat32 s0, s1;
 116
 117     s0.f = s1.f = *scale;
 118     s0.i ^= sign >> 1 << 31;
 119     s1.i ^= sign      << 31;
 120
 121     *dst++ = v[idx    & 15] * s0.f;
 122     *dst++ = v[idx>>4 & 15] * s1.f;
 123
 124     return dst;
 125 }
 126 #endif
 127
 128 #ifndef VMUL4S
 129 static inline float *VMUL4S(float *dst, const float *v, unsigned idx,
 130                             unsigned sign, const float *scale)
 131 {
 132     unsigned nz = idx >> 12;
 133     union av_intfloat32 s = { .f = *scale };
 134     union av_intfloat32 t;
 135
 136     t.i = s.i ^ (sign & 1U<<31);
 137     *dst++ = v[idx    & 3] * t.f;
 138
 139     sign <<= nz & 1; nz >>= 1;
 140     t.i = s.i ^ (sign & 1U<<31);
 141     *dst++ = v[idx>>2 & 3] * t.f;
 142
 143     sign <<= nz & 1; nz >>= 1;
 144     t.i = s.i ^ (sign & 1U<<31);
 145     *dst++ = v[idx>>4 & 3] * t.f;
 146
 147     sign <<= nz & 1;
 148     t.i = s.i ^ (sign & 1U<<31);
 149     *dst++ = v[idx>>6 & 3] * t.f;
 150
 151     return dst;
 152 }
 153 #endif
 154
 155 static av_always_inline float flt16_round(float pf)
 156 {
 157     union av_intfloat32 tmp;
 158     tmp.f = pf;
 159     tmp.i = (tmp.i + 0x00008000U) & 0xFFFF0000U;
 160     return tmp.f;
 161 }
 162
 163 static av_always_inline float flt16_even(float pf)
 164 {
 165     union av_intfloat32 tmp;
 166     tmp.f = pf;
 167     tmp.i = (tmp.i + 0x00007FFFU + (tmp.i & 0x00010000U >> 16)) & 0xFFFF0000U;
 168     return tmp.f;
 169 }
 170
 171 static av_always_inline float flt16_trunc(float pf)
 172 {
 173     union av_intfloat32 pun;
 174     pun.f = pf;
 175     pun.i &= 0xFFFF0000U;
 176     return pun.f;
 177 }
 178
 179 static av_always_inline void predict(PredictorState *ps, float *coef,
 180                                      int output_enable)
 181 {
 182     const float a     = 0.953125; // 61.0 / 64
 183     const float alpha = 0.90625;  // 29.0 / 32
 184     float e0, e1;
 185     float pv;
 186     float k1, k2;
 187     float   r0 = ps->r0,     r1 = ps->r1;
 188     float cor0 = ps->cor0, cor1 = ps->cor1;
 189     float var0 = ps->var0, var1 = ps->var1;
 190
 191     k1 = var0 > 1 ? cor0 * flt16_even(a / var0) : 0;
 192     k2 = var1 > 1 ? cor1 * flt16_even(a / var1) : 0;
 193
 194     pv = flt16_round(k1 * r0 + k2 * r1);
 195     if (output_enable)
 196         *coef += pv;
 197
 198     e0 = *coef;
 199     e1 = e0 - k1 * r0;
 200
 201     ps->cor1 = flt16_trunc(alpha * cor1 + r1 * e1);
 202     ps->var1 = flt16_trunc(alpha * var1 + 0.5f * (r1 * r1 + e1 * e1));
 203     ps->cor0 = flt16_trunc(alpha * cor0 + r0 * e0);
 204     ps->var0 = flt16_trunc(alpha * var0 + 0.5f * (r0 * r0 + e0 * e0));
 205
 206     ps->r1 = flt16_trunc(a * (r0 - k1 * e0));
 207     ps->r0 = flt16_trunc(a * e0);
 208 }
 209
 210 /**
 211  * Apply dependent channel coupling (applied before IMDCT).
 212  *
 213  * @param   index   index into coupling gain array
 214  */
 215 static void apply_dependent_coupling(AACContext *ac,
 216                                      SingleChannelElement *target,
 217                                      ChannelElement *cce, int index)
 218 {
 219     IndividualChannelStream *ics = &cce->ch[0].ics;
 220     const uint16_t *offsets = ics->swb_offset;
 221     float *dest = target->coeffs;
 222     const float *src = cce->ch[0].coeffs;
 223     int g, i, group, k, idx = 0;
 224     if (ac->oc[1].m4ac.object_type == AOT_AAC_LTP) {
 225         av_log(ac->avctx, AV_LOG_ERROR,
 226                "Dependent coupling is not supported together with LTP\n");
 227         return;
 228     }
 229     for (g = 0; g < ics->num_window_groups; g++) {
 230         for (i = 0; i < ics->max_sfb; i++, idx++) {
 231             if (cce->ch[0].band_type[idx] != ZERO_BT) {
 232                 const float gain = cce->coup.gain[index][idx];
 233                 for (group = 0; group < ics->group_len[g]; group++) {
 234                     for (k = offsets[i]; k < offsets[i + 1]; k++) {
 235                         // FIXME: SIMDify
 236                         dest[group * 128 + k] += gain * src[group * 128 + k];
 237                     }
 238                 }
 239             }
 240         }
 241         dest += ics->group_len[g] * 128;
 242         src  += ics->group_len[g] * 128;
 243     }
 244 }
 245
 246 /**
 247  * Apply independent channel coupling (applied after IMDCT).
 248  *
 249  * @param   index   index into coupling gain array
 250  */
 251 static void apply_independent_coupling(AACContext *ac,
 252                                        SingleChannelElement *target,
 253                                        ChannelElement *cce, int index)
 254 {
 255     const float gain = cce->coup.gain[index][0];
 256     const float *src = cce->ch[0].ret;
 257     float *dest = target->ret;
 258     const int len = 1024 << (ac->oc[1].m4ac.sbr == 1);
 259
 260     ac->fdsp->vector_fmac_scalar(dest, src, gain, len);
 261 }
 262
 263 #include "aacdec_template.c"
 264
 265 #define LOAS_SYNC_WORD   0x2b7       ///< 11 bits LOAS sync word
 266
 267 struct LATMContext {
 268     AACContext aac_ctx;     ///< containing AACContext
 269     int initialized;        ///< initialized after a valid extradata was seen
 270
 271     // parser data
 272     int audio_mux_version_A; ///< LATM syntax version
 273     int frame_length_type;   ///< 0/1 variable/fixed frame length
 274     int frame_length;        ///< frame length for fixed frame length
 275 };
 276
 277 static inline uint32_t latm_get_value(GetBitContext *b)
 278 {
 279     int length = get_bits(b, 2);
 280
 281     return get_bits_long(b, (length+1)*8);
 282 }
 283
 284 static int latm_decode_audio_specific_config(struct LATMContext *latmctx,
 285                                              GetBitContext *gb, int asclen)
 286 {
 287     AACContext *ac        = &latmctx->aac_ctx;
 288     AVCodecContext *avctx = ac->avctx;
 289     MPEG4AudioConfig m4ac = { 0 };
 290     GetBitContext gbc;
 291     int config_start_bit  = get_bits_count(gb);
 292     int sync_extension    = 0;
 293     int bits_consumed, esize, i;
 294
 295     if (asclen > 0) {
 296         sync_extension = 1;
 297         asclen         = FFMIN(asclen, get_bits_left(gb));
 298         init_get_bits(&gbc, gb->buffer, config_start_bit + asclen);
 299         skip_bits_long(&gbc, config_start_bit);
 300     } else if (asclen == 0) {
 301         gbc = *gb;
 302     } else {
 303         return AVERROR_INVALIDDATA;
 304     }
 305
 306     if (get_bits_left(gb) <= 0)
 307         return AVERROR_INVALIDDATA;
 308
 309     bits_consumed = decode_audio_specific_config_gb(NULL, avctx, &m4ac,
 310                                                     &gbc, config_start_bit,
 311                                                     sync_extension);
 312
 313     if (bits_consumed < config_start_bit)
 314         return AVERROR_INVALIDDATA;
 315     bits_consumed -= config_start_bit;
 316
 317     if (asclen == 0)
 318       asclen = bits_consumed;
 319
 320     if (!latmctx->initialized ||
 321         ac->oc[1].m4ac.sample_rate != m4ac.sample_rate ||
 322         ac->oc[1].m4ac.chan_config != m4ac.chan_config) {
 323
 324         if (latmctx->initialized) {
 325             av_log(avctx, AV_LOG_INFO, "audio config changed (sample_rate=%d, chan_config=%d)\n", m4ac.sample_rate, m4ac.chan_config);
 326         } else {
 327             av_log(avctx, AV_LOG_DEBUG, "initializing latmctx\n");
 328         }
 329         latmctx->initialized = 0;
 330
 331         esize = (asclen + 7) / 8;
 332
 333         if (avctx->extradata_size < esize) {
 334             av_free(avctx->extradata);
 335             avctx->extradata = av_malloc(esize + AV_INPUT_BUFFER_PADDING_SIZE);
 336             if (!avctx->extradata)
 337                 return AVERROR(ENOMEM);
 338         }
 339
 340         avctx->extradata_size = esize;
 341         gbc = *gb;
 342         for (i = 0; i < esize; i++) {
 343           avctx->extradata[i] = get_bits(&gbc, 8);
 344         }
 345         memset(avctx->extradata+esize, 0, AV_INPUT_BUFFER_PADDING_SIZE);
 346     }
 347     skip_bits_long(gb, asclen);
 348
 349     return 0;
 350 }
 351
 352 static int read_stream_mux_config(struct LATMContext *latmctx,
 353                                   GetBitContext *gb)
 354 {
 355     int ret, audio_mux_version = get_bits(gb, 1);
 356
 357     latmctx->audio_mux_version_A = 0;
 358     if (audio_mux_version)
 359         latmctx->audio_mux_version_A = get_bits(gb, 1);
 360
 361     if (!latmctx->audio_mux_version_A) {
 362
 363         if (audio_mux_version)
 364             latm_get_value(gb);                 // taraFullness
 365
 366         skip_bits(gb, 1);                       // allStreamSameTimeFraming
 367         skip_bits(gb, 6);                       // numSubFrames
 368         // numPrograms
 369         if (get_bits(gb, 4)) {                  // numPrograms
 370             avpriv_request_sample(latmctx->aac_ctx.avctx, "Multiple programs");
 371             return AVERROR_PATCHWELCOME;
 372         }
 373
 374         // for each program (which there is only one in DVB)
 375
 376         // for each layer (which there is only one in DVB)
 377         if (get_bits(gb, 3)) {                   // numLayer
 378             avpriv_request_sample(latmctx->aac_ctx.avctx, "Multiple layers");
 379             return AVERROR_PATCHWELCOME;
 380         }
 381
 382         // for all but first stream: use_same_config = get_bits(gb, 1);
 383         if (!audio_mux_version) {
 384             if ((ret = latm_decode_audio_specific_config(latmctx, gb, 0)) < 0)
 385                 return ret;
 386         } else {
 387             int ascLen = latm_get_value(gb);
 388             if ((ret = latm_decode_audio_specific_config(latmctx, gb, ascLen)) < 0)
 389                 return ret;
 390         }
 391
 392         latmctx->frame_length_type = get_bits(gb, 3);
 393         switch (latmctx->frame_length_type) {
 394         case 0:
 395             skip_bits(gb, 8);       // latmBufferFullness
 396             break;
 397         case 1:
 398             latmctx->frame_length = get_bits(gb, 9);
 399             break;
 400         case 3:
 401         case 4:
 402         case 5:
 403             skip_bits(gb, 6);       // CELP frame length table index
 404             break;
 405         case 6:
 406         case 7:
 407             skip_bits(gb, 1);       // HVXC frame length table index
 408             break;
 409         }
 410
 411         if (get_bits(gb, 1)) {                  // other data
 412             if (audio_mux_version) {
 413                 latm_get_value(gb);             // other_data_bits
 414             } else {
 415                 int esc;
 416                 do {
 417                     if (get_bits_left(gb) < 9)
 418                         return AVERROR_INVALIDDATA;
 419                     esc = get_bits(gb, 1);
 420                     skip_bits(gb, 8);
 421                 } while (esc);
 422             }
 423         }
 424
 425         if (get_bits(gb, 1))                     // crc present
 426             skip_bits(gb, 8);                    // config_crc
 427     }
 428
 429     return 0;
 430 }
 431
 432 static int read_payload_length_info(struct LATMContext *ctx, GetBitContext *gb)
 433 {
 434     uint8_t tmp;
 435
 436     if (ctx->frame_length_type == 0) {
 437         int mux_slot_length = 0;
 438         do {
 439             if (get_bits_left(gb) < 8)
 440                 return AVERROR_INVALIDDATA;
 441             tmp = get_bits(gb, 8);
 442             mux_slot_length += tmp;
 443         } while (tmp == 255);
 444         return mux_slot_length;
 445     } else if (ctx->frame_length_type == 1) {
 446         return ctx->frame_length;
 447     } else if (ctx->frame_length_type == 3 ||
 448                ctx->frame_length_type == 5 ||
 449                ctx->frame_length_type == 7) {
 450         skip_bits(gb, 2);          // mux_slot_length_coded
 451     }
 452     return 0;
 453 }
 454
 455 static int read_audio_mux_element(struct LATMContext *latmctx,
 456                                   GetBitContext *gb)
 457 {
 458     int err;
 459     uint8_t use_same_mux = get_bits(gb, 1);
 460     if (!use_same_mux) {
 461         if ((err = read_stream_mux_config(latmctx, gb)) < 0)
 462             return err;
 463     } else if (!latmctx->aac_ctx.avctx->extradata) {
 464         av_log(latmctx->aac_ctx.avctx, AV_LOG_DEBUG,
 465                "no decoder config found\n");
 466         return 1;
 467     }
 468     if (latmctx->audio_mux_version_A == 0) {
 469         int mux_slot_length_bytes = read_payload_length_info(latmctx, gb);
 470         if (mux_slot_length_bytes < 0 || mux_slot_length_bytes * 8LL > get_bits_left(gb)) {
 471             av_log(latmctx->aac_ctx.avctx, AV_LOG_ERROR, "incomplete frame\n");
 472             return AVERROR_INVALIDDATA;
 473         } else if (mux_slot_length_bytes * 8 + 256 < get_bits_left(gb)) {
 474             av_log(latmctx->aac_ctx.avctx, AV_LOG_ERROR,
 475                    "frame length mismatch %d << %d\n",
 476                    mux_slot_length_bytes * 8, get_bits_left(gb));
 477             return AVERROR_INVALIDDATA;
 478         }
 479     }
 480     return 0;
 481 }
 482
 483
 484 static int latm_decode_frame(AVCodecContext *avctx, void *out,
 485                              int *got_frame_ptr, AVPacket *avpkt)
 486 {
 487     struct LATMContext *latmctx = avctx->priv_data;
 488     int                 muxlength, err;
 489     GetBitContext       gb;
 490
 491     if ((err = init_get_bits8(&gb, avpkt->data, avpkt->size)) < 0)
 492         return err;
 493
 494     // check for LOAS sync word
 495     if (get_bits(&gb, 11) != LOAS_SYNC_WORD)
 496         return AVERROR_INVALIDDATA;
 497
 498     muxlength = get_bits(&gb, 13) + 3;
 499     // not enough data, the parser should have sorted this out
 500     if (muxlength > avpkt->size)
 501         return AVERROR_INVALIDDATA;
 502
 503     if ((err = read_audio_mux_element(latmctx, &gb)))
 504         return (err < 0) ? err : avpkt->size;
 505
 506     if (!latmctx->initialized) {
 507         if (!avctx->extradata) {
 508             *got_frame_ptr = 0;
 509             return avpkt->size;
 510         } else {
 511             push_output_configuration(&latmctx->aac_ctx);
 512             if ((err = decode_audio_specific_config(
 513                     &latmctx->aac_ctx, avctx, &latmctx->aac_ctx.oc[1].m4ac,
 514                     avctx->extradata, avctx->extradata_size*8LL, 1)) < 0) {
 515                 pop_output_configuration(&latmctx->aac_ctx);
 516                 return err;
 517             }
 518             latmctx->initialized = 1;
 519         }
 520     }
 521
 522     if (show_bits(&gb, 12) == 0xfff) {
 523         av_log(latmctx->aac_ctx.avctx, AV_LOG_ERROR,
 524                "ADTS header detected, probably as result of configuration "
 525                "misparsing\n");
 526         return AVERROR_INVALIDDATA;
 527     }
 528
 529     switch (latmctx->aac_ctx.oc[1].m4ac.object_type) {
 530     case AOT_ER_AAC_LC:
 531     case AOT_ER_AAC_LTP:
 532     case AOT_ER_AAC_LD:
 533     case AOT_ER_AAC_ELD:
 534         err = aac_decode_er_frame(avctx, out, got_frame_ptr, &gb);
 535         break;
 536     default:
 537         err = aac_decode_frame_int(avctx, out, got_frame_ptr, &gb, avpkt);
 538     }
 539     if (err < 0)
 540         return err;
 541
 542     return muxlength;
 543 }
 544
 545 static av_cold int latm_decode_init(AVCodecContext *avctx)
 546 {
 547     struct LATMContext *latmctx = avctx->priv_data;
 548     int ret = aac_decode_init(avctx);
 549
 550     if (avctx->extradata_size > 0)
 551         latmctx->initialized = !ret;
 552
 553     return ret;
 554 }
 555
 556 const AVCodec ff_aac_decoder = {
 557     .name            = "aac",
 558     .long_name       = NULL_IF_CONFIG_SMALL("AAC (Advanced Audio Coding)"),
 559     .type            = AVMEDIA_TYPE_AUDIO,
 560     .id              = AV_CODEC_ID_AAC,
 561     .priv_data_size  = sizeof(AACContext),
 562     .init            = aac_decode_init,
 563     .close           = aac_decode_close,
 564     .decode          = aac_decode_frame,
 565     .sample_fmts     = (const enum AVSampleFormat[]) {
 566         AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_NONE
 567     },
 568     .capabilities    = AV_CODEC_CAP_CHANNEL_CONF | AV_CODEC_CAP_DR1,
 569     .caps_internal   = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
 570     .channel_layouts = aac_channel_layout,
 571     .flush = flush,
 572     .priv_class      = &aac_decoder_class,
 573     .profiles        = NULL_IF_CONFIG_SMALL(ff_aac_profiles),
 574 };
 575
 576 /*
 577     Note: This decoder filter is intended to decode LATM streams transferred
 578     in MPEG transport streams which only contain one program.
 579     To do a more complex LATM demuxing a separate LATM demuxer should be used.
 580 */
 581 const AVCodec ff_aac_latm_decoder = {
 582     .name            = "aac_latm",
 583     .long_name       = NULL_IF_CONFIG_SMALL("AAC LATM (Advanced Audio Coding LATM syntax)"),
 584     .type            = AVMEDIA_TYPE_AUDIO,
 585     .id              = AV_CODEC_ID_AAC_LATM,
 586     .priv_data_size  = sizeof(struct LATMContext),
 587     .init            = latm_decode_init,
 588     .close           = aac_decode_close,
 589     .decode          = latm_decode_frame,
 590     .sample_fmts     = (const enum AVSampleFormat[]) {
 591         AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_NONE
 592     },
 593     .capabilities    = AV_CODEC_CAP_CHANNEL_CONF | AV_CODEC_CAP_DR1,
 594     .caps_internal   = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
 595     .channel_layouts = aac_channel_layout,
 596     .flush = flush,
 597     .profiles        = NULL_IF_CONFIG_SMALL(ff_aac_profiles),
 598 };