git.sesse.net Git - ffmpeg/blob - libavcodec/aacdec.c

   1 /*
   2  * AAC decoder
   3  * Copyright (c) 2005-2006 Oded Shimon ( ods15 ods15 dyndns org )
   4  * Copyright (c) 2006-2007 Maxim Gavrilov ( maxim.gavrilov gmail com )
   5  * Copyright (c) 2008-2013 Alex Converse <alex.converse@gmail.com>
   6  *
   7  * AAC LATM decoder
   8  * Copyright (c) 2008-2010 Paul Kendall <paul@kcbbs.gen.nz>
   9  * Copyright (c) 2010      Janne Grunau <janne-libav@jannau.net>
  10  *
  11  * This file is part of FFmpeg.
  12  *
  13  * FFmpeg is free software; you can redistribute it and/or
  14  * modify it under the terms of the GNU Lesser General Public
  15  * License as published by the Free Software Foundation; either
  16  * version 2.1 of the License, or (at your option) any later version.
  17  *
  18  * FFmpeg is distributed in the hope that it will be useful,
  19  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  20  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  21  * Lesser General Public License for more details.
  22  *
  23  * You should have received a copy of the GNU Lesser General Public
  24  * License along with FFmpeg; if not, write to the Free Software
  25  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  26  */
  27
  28 /**
  29  * @file
  30  * AAC decoder
  31  * @author Oded Shimon  ( ods15 ods15 dyndns org )
  32  * @author Maxim Gavrilov ( maxim.gavrilov gmail com )
  33  */
  34
  35 #define FFT_FLOAT 1
  36 #define FFT_FIXED_32 0
  37 #define USE_FIXED 0
  38
  39 #include "libavutil/float_dsp.h"
  40 #include "libavutil/opt.h"
  41 #include "avcodec.h"
  42 #include "internal.h"
  43 #include "get_bits.h"
  44 #include "fft.h"
  45 #include "imdct15.h"
  46 #include "lpc.h"
  47 #include "kbdwin.h"
  48 #include "sinewin.h"
  49
  50 #include "aac.h"
  51 #include "aactab.h"
  52 #include "aacdectab.h"
  53 #include "cbrt_tablegen.h"
  54 #include "sbr.h"
  55 #include "aacsbr.h"
  56 #include "mpeg4audio.h"
  57 #include "aacadtsdec.h"
  58 #include "libavutil/intfloat.h"
  59
  60 #include <errno.h>
  61 #include <math.h>
  62 #include <stdint.h>
  63 #include <string.h>
  64
  65 #if ARCH_ARM
  66 #   include "arm/aac.h"
  67 #elif ARCH_MIPS
  68 #   include "mips/aacdec_mips.h"
  69 #endif
  70
  71 static av_always_inline void reset_predict_state(PredictorState *ps)
  72 {
  73     ps->r0   = 0.0f;
  74     ps->r1   = 0.0f;
  75     ps->cor0 = 0.0f;
  76     ps->cor1 = 0.0f;
  77     ps->var0 = 1.0f;
  78     ps->var1 = 1.0f;
  79 }
  80
  81 #ifndef VMUL2
  82 static inline float *VMUL2(float *dst, const float *v, unsigned idx,
  83                            const float *scale)
  84 {
  85     float s = *scale;
  86     *dst++ = v[idx    & 15] * s;
  87     *dst++ = v[idx>>4 & 15] * s;
  88     return dst;
  89 }
  90 #endif
  91
  92 #ifndef VMUL4
  93 static inline float *VMUL4(float *dst, const float *v, unsigned idx,
  94                            const float *scale)
  95 {
  96     float s = *scale;
  97     *dst++ = v[idx    & 3] * s;
  98     *dst++ = v[idx>>2 & 3] * s;
  99     *dst++ = v[idx>>4 & 3] * s;
 100     *dst++ = v[idx>>6 & 3] * s;
 101     return dst;
 102 }
 103 #endif
 104
 105 #ifndef VMUL2S
 106 static inline float *VMUL2S(float *dst, const float *v, unsigned idx,
 107                             unsigned sign, const float *scale)
 108 {
 109     union av_intfloat32 s0, s1;
 110
 111     s0.f = s1.f = *scale;
 112     s0.i ^= sign >> 1 << 31;
 113     s1.i ^= sign      << 31;
 114
 115     *dst++ = v[idx    & 15] * s0.f;
 116     *dst++ = v[idx>>4 & 15] * s1.f;
 117
 118     return dst;
 119 }
 120 #endif
 121
 122 #ifndef VMUL4S
 123 static inline float *VMUL4S(float *dst, const float *v, unsigned idx,
 124                             unsigned sign, const float *scale)
 125 {
 126     unsigned nz = idx >> 12;
 127     union av_intfloat32 s = { .f = *scale };
 128     union av_intfloat32 t;
 129
 130     t.i = s.i ^ (sign & 1U<<31);
 131     *dst++ = v[idx    & 3] * t.f;
 132
 133     sign <<= nz & 1; nz >>= 1;
 134     t.i = s.i ^ (sign & 1U<<31);
 135     *dst++ = v[idx>>2 & 3] * t.f;
 136
 137     sign <<= nz & 1; nz >>= 1;
 138     t.i = s.i ^ (sign & 1U<<31);
 139     *dst++ = v[idx>>4 & 3] * t.f;
 140
 141     sign <<= nz & 1;
 142     t.i = s.i ^ (sign & 1U<<31);
 143     *dst++ = v[idx>>6 & 3] * t.f;
 144
 145     return dst;
 146 }
 147 #endif
 148
 149 static av_always_inline float flt16_round(float pf)
 150 {
 151     union av_intfloat32 tmp;
 152     tmp.f = pf;
 153     tmp.i = (tmp.i + 0x00008000U) & 0xFFFF0000U;
 154     return tmp.f;
 155 }
 156
 157 static av_always_inline float flt16_even(float pf)
 158 {
 159     union av_intfloat32 tmp;
 160     tmp.f = pf;
 161     tmp.i = (tmp.i + 0x00007FFFU + (tmp.i & 0x00010000U >> 16)) & 0xFFFF0000U;
 162     return tmp.f;
 163 }
 164
 165 static av_always_inline float flt16_trunc(float pf)
 166 {
 167     union av_intfloat32 pun;
 168     pun.f = pf;
 169     pun.i &= 0xFFFF0000U;
 170     return pun.f;
 171 }
 172
 173 static av_always_inline void predict(PredictorState *ps, float *coef,
 174                                      int output_enable)
 175 {
 176     const float a     = 0.953125; // 61.0 / 64
 177     const float alpha = 0.90625;  // 29.0 / 32
 178     float e0, e1;
 179     float pv;
 180     float k1, k2;
 181     float   r0 = ps->r0,     r1 = ps->r1;
 182     float cor0 = ps->cor0, cor1 = ps->cor1;
 183     float var0 = ps->var0, var1 = ps->var1;
 184
 185     k1 = var0 > 1 ? cor0 * flt16_even(a / var0) : 0;
 186     k2 = var1 > 1 ? cor1 * flt16_even(a / var1) : 0;
 187
 188     pv = flt16_round(k1 * r0 + k2 * r1);
 189     if (output_enable)
 190         *coef += pv;
 191
 192     e0 = *coef;
 193     e1 = e0 - k1 * r0;
 194
 195     ps->cor1 = flt16_trunc(alpha * cor1 + r1 * e1);
 196     ps->var1 = flt16_trunc(alpha * var1 + 0.5f * (r1 * r1 + e1 * e1));
 197     ps->cor0 = flt16_trunc(alpha * cor0 + r0 * e0);
 198     ps->var0 = flt16_trunc(alpha * var0 + 0.5f * (r0 * r0 + e0 * e0));
 199
 200     ps->r1 = flt16_trunc(a * (r0 - k1 * e0));
 201     ps->r0 = flt16_trunc(a * e0);
 202 }
 203
 204 /**
 205  * Apply dependent channel coupling (applied before IMDCT).
 206  *
 207  * @param   index   index into coupling gain array
 208  */
 209 static void apply_dependent_coupling(AACContext *ac,
 210                                      SingleChannelElement *target,
 211                                      ChannelElement *cce, int index)
 212 {
 213     IndividualChannelStream *ics = &cce->ch[0].ics;
 214     const uint16_t *offsets = ics->swb_offset;
 215     float *dest = target->coeffs;
 216     const float *src = cce->ch[0].coeffs;
 217     int g, i, group, k, idx = 0;
 218     if (ac->oc[1].m4ac.object_type == AOT_AAC_LTP) {
 219         av_log(ac->avctx, AV_LOG_ERROR,
 220                "Dependent coupling is not supported together with LTP\n");
 221         return;
 222     }
 223     for (g = 0; g < ics->num_window_groups; g++) {
 224         for (i = 0; i < ics->max_sfb; i++, idx++) {
 225             if (cce->ch[0].band_type[idx] != ZERO_BT) {
 226                 const float gain = cce->coup.gain[index][idx];
 227                 for (group = 0; group < ics->group_len[g]; group++) {
 228                     for (k = offsets[i]; k < offsets[i + 1]; k++) {
 229                         // FIXME: SIMDify
 230                         dest[group * 128 + k] += gain * src[group * 128 + k];
 231                     }
 232                 }
 233             }
 234         }
 235         dest += ics->group_len[g] * 128;
 236         src  += ics->group_len[g] * 128;
 237     }
 238 }
 239
 240 /**
 241  * Apply independent channel coupling (applied after IMDCT).
 242  *
 243  * @param   index   index into coupling gain array
 244  */
 245 static void apply_independent_coupling(AACContext *ac,
 246                                        SingleChannelElement *target,
 247                                        ChannelElement *cce, int index)
 248 {
 249     int i;
 250     const float gain = cce->coup.gain[index][0];
 251     const float *src = cce->ch[0].ret;
 252     float *dest = target->ret;
 253     const int len = 1024 << (ac->oc[1].m4ac.sbr == 1);
 254
 255     for (i = 0; i < len; i++)
 256         dest[i] += gain * src[i];
 257 }
 258
 259 #include "aacdec_template.c"
 260
 261 #define LOAS_SYNC_WORD   0x2b7       ///< 11 bits LOAS sync word
 262
 263 struct LATMContext {
 264     AACContext aac_ctx;     ///< containing AACContext
 265     int initialized;        ///< initialized after a valid extradata was seen
 266
 267     // parser data
 268     int audio_mux_version_A; ///< LATM syntax version
 269     int frame_length_type;   ///< 0/1 variable/fixed frame length
 270     int frame_length;        ///< frame length for fixed frame length
 271 };
 272
 273 static inline uint32_t latm_get_value(GetBitContext *b)
 274 {
 275     int length = get_bits(b, 2);
 276
 277     return get_bits_long(b, (length+1)*8);
 278 }
 279
 280 static int latm_decode_audio_specific_config(struct LATMContext *latmctx,
 281                                              GetBitContext *gb, int asclen)
 282 {
 283     AACContext *ac        = &latmctx->aac_ctx;
 284     AVCodecContext *avctx = ac->avctx;
 285     MPEG4AudioConfig m4ac = { 0 };
 286     int config_start_bit  = get_bits_count(gb);
 287     int sync_extension    = 0;
 288     int bits_consumed, esize;
 289
 290     if (asclen) {
 291         sync_extension = 1;
 292         asclen         = FFMIN(asclen, get_bits_left(gb));
 293     } else
 294         asclen         = get_bits_left(gb);
 295
 296     if (config_start_bit % 8) {
 297         avpriv_request_sample(latmctx->aac_ctx.avctx,
 298                               "Non-byte-aligned audio-specific config");
 299         return AVERROR_PATCHWELCOME;
 300     }
 301     if (asclen <= 0)
 302         return AVERROR_INVALIDDATA;
 303     bits_consumed = decode_audio_specific_config(NULL, avctx, &m4ac,
 304                                          gb->buffer + (config_start_bit / 8),
 305                                          asclen, sync_extension);
 306
 307     if (bits_consumed < 0)
 308         return AVERROR_INVALIDDATA;
 309
 310     if (!latmctx->initialized ||
 311         ac->oc[1].m4ac.sample_rate != m4ac.sample_rate ||
 312         ac->oc[1].m4ac.chan_config != m4ac.chan_config) {
 313
 314         if(latmctx->initialized) {
 315             av_log(avctx, AV_LOG_INFO, "audio config changed\n");
 316         } else {
 317             av_log(avctx, AV_LOG_DEBUG, "initializing latmctx\n");
 318         }
 319         latmctx->initialized = 0;
 320
 321         esize = (bits_consumed+7) / 8;
 322
 323         if (avctx->extradata_size < esize) {
 324             av_free(avctx->extradata);
 325             avctx->extradata = av_malloc(esize + AV_INPUT_BUFFER_PADDING_SIZE);
 326             if (!avctx->extradata)
 327                 return AVERROR(ENOMEM);
 328         }
 329
 330         avctx->extradata_size = esize;
 331         memcpy(avctx->extradata, gb->buffer + (config_start_bit/8), esize);
 332         memset(avctx->extradata+esize, 0, AV_INPUT_BUFFER_PADDING_SIZE);
 333     }
 334     skip_bits_long(gb, bits_consumed);
 335
 336     return bits_consumed;
 337 }
 338
 339 static int read_stream_mux_config(struct LATMContext *latmctx,
 340                                   GetBitContext *gb)
 341 {
 342     int ret, audio_mux_version = get_bits(gb, 1);
 343
 344     latmctx->audio_mux_version_A = 0;
 345     if (audio_mux_version)
 346         latmctx->audio_mux_version_A = get_bits(gb, 1);
 347
 348     if (!latmctx->audio_mux_version_A) {
 349
 350         if (audio_mux_version)
 351             latm_get_value(gb);                 // taraFullness
 352
 353         skip_bits(gb, 1);                       // allStreamSameTimeFraming
 354         skip_bits(gb, 6);                       // numSubFrames
 355         // numPrograms
 356         if (get_bits(gb, 4)) {                  // numPrograms
 357             avpriv_request_sample(latmctx->aac_ctx.avctx, "Multiple programs");
 358             return AVERROR_PATCHWELCOME;
 359         }
 360
 361         // for each program (which there is only one in DVB)
 362
 363         // for each layer (which there is only one in DVB)
 364         if (get_bits(gb, 3)) {                   // numLayer
 365             avpriv_request_sample(latmctx->aac_ctx.avctx, "Multiple layers");
 366             return AVERROR_PATCHWELCOME;
 367         }
 368
 369         // for all but first stream: use_same_config = get_bits(gb, 1);
 370         if (!audio_mux_version) {
 371             if ((ret = latm_decode_audio_specific_config(latmctx, gb, 0)) < 0)
 372                 return ret;
 373         } else {
 374             int ascLen = latm_get_value(gb);
 375             if ((ret = latm_decode_audio_specific_config(latmctx, gb, ascLen)) < 0)
 376                 return ret;
 377             ascLen -= ret;
 378             skip_bits_long(gb, ascLen);
 379         }
 380
 381         latmctx->frame_length_type = get_bits(gb, 3);
 382         switch (latmctx->frame_length_type) {
 383         case 0:
 384             skip_bits(gb, 8);       // latmBufferFullness
 385             break;
 386         case 1:
 387             latmctx->frame_length = get_bits(gb, 9);
 388             break;
 389         case 3:
 390         case 4:
 391         case 5:
 392             skip_bits(gb, 6);       // CELP frame length table index
 393             break;
 394         case 6:
 395         case 7:
 396             skip_bits(gb, 1);       // HVXC frame length table index
 397             break;
 398         }
 399
 400         if (get_bits(gb, 1)) {                  // other data
 401             if (audio_mux_version) {
 402                 latm_get_value(gb);             // other_data_bits
 403             } else {
 404                 int esc;
 405                 do {
 406                     esc = get_bits(gb, 1);
 407                     skip_bits(gb, 8);
 408                 } while (esc);
 409             }
 410         }
 411
 412         if (get_bits(gb, 1))                     // crc present
 413             skip_bits(gb, 8);                    // config_crc
 414     }
 415
 416     return 0;
 417 }
 418
 419 static int read_payload_length_info(struct LATMContext *ctx, GetBitContext *gb)
 420 {
 421     uint8_t tmp;
 422
 423     if (ctx->frame_length_type == 0) {
 424         int mux_slot_length = 0;
 425         do {
 426             tmp = get_bits(gb, 8);
 427             mux_slot_length += tmp;
 428         } while (tmp == 255);
 429         return mux_slot_length;
 430     } else if (ctx->frame_length_type == 1) {
 431         return ctx->frame_length;
 432     } else if (ctx->frame_length_type == 3 ||
 433                ctx->frame_length_type == 5 ||
 434                ctx->frame_length_type == 7) {
 435         skip_bits(gb, 2);          // mux_slot_length_coded
 436     }
 437     return 0;
 438 }
 439
 440 static int read_audio_mux_element(struct LATMContext *latmctx,
 441                                   GetBitContext *gb)
 442 {
 443     int err;
 444     uint8_t use_same_mux = get_bits(gb, 1);
 445     if (!use_same_mux) {
 446         if ((err = read_stream_mux_config(latmctx, gb)) < 0)
 447             return err;
 448     } else if (!latmctx->aac_ctx.avctx->extradata) {
 449         av_log(latmctx->aac_ctx.avctx, AV_LOG_DEBUG,
 450                "no decoder config found\n");
 451         return AVERROR(EAGAIN);
 452     }
 453     if (latmctx->audio_mux_version_A == 0) {
 454         int mux_slot_length_bytes = read_payload_length_info(latmctx, gb);
 455         if (mux_slot_length_bytes * 8 > get_bits_left(gb)) {
 456             av_log(latmctx->aac_ctx.avctx, AV_LOG_ERROR, "incomplete frame\n");
 457             return AVERROR_INVALIDDATA;
 458         } else if (mux_slot_length_bytes * 8 + 256 < get_bits_left(gb)) {
 459             av_log(latmctx->aac_ctx.avctx, AV_LOG_ERROR,
 460                    "frame length mismatch %d << %d\n",
 461                    mux_slot_length_bytes * 8, get_bits_left(gb));
 462             return AVERROR_INVALIDDATA;
 463         }
 464     }
 465     return 0;
 466 }
 467
 468
 469 static int latm_decode_frame(AVCodecContext *avctx, void *out,
 470                              int *got_frame_ptr, AVPacket *avpkt)
 471 {
 472     struct LATMContext *latmctx = avctx->priv_data;
 473     int                 muxlength, err;
 474     GetBitContext       gb;
 475
 476     if ((err = init_get_bits8(&gb, avpkt->data, avpkt->size)) < 0)
 477         return err;
 478
 479     // check for LOAS sync word
 480     if (get_bits(&gb, 11) != LOAS_SYNC_WORD)
 481         return AVERROR_INVALIDDATA;
 482
 483     muxlength = get_bits(&gb, 13) + 3;
 484     // not enough data, the parser should have sorted this out
 485     if (muxlength > avpkt->size)
 486         return AVERROR_INVALIDDATA;
 487
 488     if ((err = read_audio_mux_element(latmctx, &gb)) < 0)
 489         return err;
 490
 491     if (!latmctx->initialized) {
 492         if (!avctx->extradata) {
 493             *got_frame_ptr = 0;
 494             return avpkt->size;
 495         } else {
 496             push_output_configuration(&latmctx->aac_ctx);
 497             if ((err = decode_audio_specific_config(
 498                     &latmctx->aac_ctx, avctx, &latmctx->aac_ctx.oc[1].m4ac,
 499                     avctx->extradata, avctx->extradata_size*8LL, 1)) < 0) {
 500                 pop_output_configuration(&latmctx->aac_ctx);
 501                 return err;
 502             }
 503             latmctx->initialized = 1;
 504         }
 505     }
 506
 507     if (show_bits(&gb, 12) == 0xfff) {
 508         av_log(latmctx->aac_ctx.avctx, AV_LOG_ERROR,
 509                "ADTS header detected, probably as result of configuration "
 510                "misparsing\n");
 511         return AVERROR_INVALIDDATA;
 512     }
 513
 514     switch (latmctx->aac_ctx.oc[1].m4ac.object_type) {
 515     case AOT_ER_AAC_LC:
 516     case AOT_ER_AAC_LTP:
 517     case AOT_ER_AAC_LD:
 518     case AOT_ER_AAC_ELD:
 519         err = aac_decode_er_frame(avctx, out, got_frame_ptr, &gb);
 520         break;
 521     default:
 522         err = aac_decode_frame_int(avctx, out, got_frame_ptr, &gb, avpkt);
 523     }
 524     if (err < 0)
 525         return err;
 526
 527     return muxlength;
 528 }
 529
 530 static av_cold int latm_decode_init(AVCodecContext *avctx)
 531 {
 532     struct LATMContext *latmctx = avctx->priv_data;
 533     int ret = aac_decode_init(avctx);
 534
 535     if (avctx->extradata_size > 0)
 536         latmctx->initialized = !ret;
 537
 538     return ret;
 539 }
 540
 541 AVCodec ff_aac_decoder = {
 542     .name            = "aac",
 543     .long_name       = NULL_IF_CONFIG_SMALL("AAC (Advanced Audio Coding)"),
 544     .type            = AVMEDIA_TYPE_AUDIO,
 545     .id              = AV_CODEC_ID_AAC,
 546     .priv_data_size  = sizeof(AACContext),
 547     .init            = aac_decode_init,
 548     .close           = aac_decode_close,
 549     .decode          = aac_decode_frame,
 550     .sample_fmts     = (const enum AVSampleFormat[]) {
 551         AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_NONE
 552     },
 553     .capabilities    = AV_CODEC_CAP_CHANNEL_CONF | AV_CODEC_CAP_DR1,
 554     .caps_internal   = FF_CODEC_CAP_INIT_THREADSAFE,
 555     .channel_layouts = aac_channel_layout,
 556     .flush = flush,
 557     .priv_class      = &aac_decoder_class,
 558     .profiles        = profiles,
 559 };
 560
 561 /*
 562     Note: This decoder filter is intended to decode LATM streams transferred
 563     in MPEG transport streams which only contain one program.
 564     To do a more complex LATM demuxing a separate LATM demuxer should be used.
 565 */
 566 AVCodec ff_aac_latm_decoder = {
 567     .name            = "aac_latm",
 568     .long_name       = NULL_IF_CONFIG_SMALL("AAC LATM (Advanced Audio Coding LATM syntax)"),
 569     .type            = AVMEDIA_TYPE_AUDIO,
 570     .id              = AV_CODEC_ID_AAC_LATM,
 571     .priv_data_size  = sizeof(struct LATMContext),
 572     .init            = latm_decode_init,
 573     .close           = aac_decode_close,
 574     .decode          = latm_decode_frame,
 575     .sample_fmts     = (const enum AVSampleFormat[]) {
 576         AV_SAMPLE_FMT_FLTP, AV_SAMPLE_FMT_NONE
 577     },
 578     .capabilities    = AV_CODEC_CAP_CHANNEL_CONF | AV_CODEC_CAP_DR1,
 579     .caps_internal   = FF_CODEC_CAP_INIT_THREADSAFE,
 580     .channel_layouts = aac_channel_layout,
 581     .flush = flush,
 582     .profiles        = profiles,
 583 };