git.sesse.net Git - ffmpeg/blob - libavcodec/aacenc.c

   1 /*
   2  * AAC encoder
   3  * Copyright (C) 2008 Konstantin Shishkov
   4  *
   5  * This file is part of FFmpeg.
   6  *
   7  * FFmpeg is free software; you can redistribute it and/or
   8  * modify it under the terms of the GNU Lesser General Public
   9  * License as published by the Free Software Foundation; either
  10  * version 2.1 of the License, or (at your option) any later version.
  11  *
  12  * FFmpeg is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15  * Lesser General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU Lesser General Public
  18  * License along with FFmpeg; if not, write to the Free Software
  19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20  */
  21
  22 /**
  23  * @file
  24  * AAC encoder
  25  */
  26
  27 /***********************************
  28  *              TODOs:
  29  * add sane pulse detection
  30  * add temporal noise shaping
  31  ***********************************/
  32
  33 #include "libavutil/float_dsp.h"
  34 #include "libavutil/opt.h"
  35 #include "avcodec.h"
  36 #include "put_bits.h"
  37 #include "internal.h"
  38 #include "mpeg4audio.h"
  39 #include "kbdwin.h"
  40 #include "sinewin.h"
  41
  42 #include "aac.h"
  43 #include "aactab.h"
  44 #include "aacenc.h"
  45
  46 #include "psymodel.h"
  47
  48 #define AAC_MAX_CHANNELS 6
  49
  50 #define ERROR_IF(cond, ...) \
  51     if (cond) { \
  52         av_log(avctx, AV_LOG_ERROR, __VA_ARGS__); \
  53         return AVERROR(EINVAL); \
  54     }
  55
  56 #define WARN_IF(cond, ...) \
  57     if (cond) { \
  58         av_log(avctx, AV_LOG_WARNING, __VA_ARGS__); \
  59     }
  60
  61
  62 static const uint8_t swb_size_1024_96[] = {
  63     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8,
  64     12, 12, 12, 12, 12, 16, 16, 24, 28, 36, 44,
  65     64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64
  66 };
  67
  68 static const uint8_t swb_size_1024_64[] = {
  69     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8,
  70     12, 12, 12, 16, 16, 16, 20, 24, 24, 28, 36,
  71     40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40
  72 };
  73
  74 static const uint8_t swb_size_1024_48[] = {
  75     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8,
  76     12, 12, 12, 12, 16, 16, 20, 20, 24, 24, 28, 28,
  77     32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
  78     96
  79 };
  80
  81 static const uint8_t swb_size_1024_32[] = {
  82     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8,
  83     12, 12, 12, 12, 16, 16, 20, 20, 24, 24, 28, 28,
  84     32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32
  85 };
  86
  87 static const uint8_t swb_size_1024_24[] = {
  88     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
  89     12, 12, 12, 12, 16, 16, 16, 20, 20, 24, 24, 28, 28,
  90     32, 36, 36, 40, 44, 48, 52, 52, 64, 64, 64, 64, 64
  91 };
  92
  93 static const uint8_t swb_size_1024_16[] = {
  94     8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
  95     12, 12, 12, 12, 12, 12, 12, 12, 12, 16, 16, 16, 16, 20, 20, 20, 24, 24, 28, 28,
  96     32, 36, 40, 40, 44, 48, 52, 56, 60, 64, 64, 64
  97 };
  98
  99 static const uint8_t swb_size_1024_8[] = {
 100     12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
 101     16, 16, 16, 16, 16, 16, 16, 20, 20, 20, 20, 24, 24, 24, 28, 28,
 102     32, 36, 36, 40, 44, 48, 52, 56, 60, 64, 80
 103 };
 104
 105 static const uint8_t *swb_size_1024[] = {
 106     swb_size_1024_96, swb_size_1024_96, swb_size_1024_64,
 107     swb_size_1024_48, swb_size_1024_48, swb_size_1024_32,
 108     swb_size_1024_24, swb_size_1024_24, swb_size_1024_16,
 109     swb_size_1024_16, swb_size_1024_16, swb_size_1024_8,
 110     swb_size_1024_8
 111 };
 112
 113 static const uint8_t swb_size_128_96[] = {
 114     4, 4, 4, 4, 4, 4, 8, 8, 8, 16, 28, 36
 115 };
 116
 117 static const uint8_t swb_size_128_48[] = {
 118     4, 4, 4, 4, 4, 8, 8, 8, 12, 12, 12, 16, 16, 16
 119 };
 120
 121 static const uint8_t swb_size_128_24[] = {
 122     4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 20
 123 };
 124
 125 static const uint8_t swb_size_128_16[] = {
 126     4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 12, 12, 16, 20, 20
 127 };
 128
 129 static const uint8_t swb_size_128_8[] = {
 130     4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 12, 16, 20, 20
 131 };
 132
 133 static const uint8_t *swb_size_128[] = {
 134     /* the last entry on the following row is swb_size_128_64 but is a
 135        duplicate of swb_size_128_96 */
 136     swb_size_128_96, swb_size_128_96, swb_size_128_96,
 137     swb_size_128_48, swb_size_128_48, swb_size_128_48,
 138     swb_size_128_24, swb_size_128_24, swb_size_128_16,
 139     swb_size_128_16, swb_size_128_16, swb_size_128_8,
 140     swb_size_128_8
 141 };
 142
 143 /** default channel configurations */
 144 static const uint8_t aac_chan_configs[6][5] = {
 145  {1, TYPE_SCE},                               // 1 channel  - single channel element
 146  {1, TYPE_CPE},                               // 2 channels - channel pair
 147  {2, TYPE_SCE, TYPE_CPE},                     // 3 channels - center + stereo
 148  {3, TYPE_SCE, TYPE_CPE, TYPE_SCE},           // 4 channels - front center + stereo + back center
 149  {3, TYPE_SCE, TYPE_CPE, TYPE_CPE},           // 5 channels - front center + stereo + back stereo
 150  {4, TYPE_SCE, TYPE_CPE, TYPE_CPE, TYPE_LFE}, // 6 channels - front center + stereo + back stereo + LFE
 151 };
 152
 153 /**
 154  * Table to remap channels from libavcodec's default order to AAC order.
 155  */
 156 static const uint8_t aac_chan_maps[AAC_MAX_CHANNELS][AAC_MAX_CHANNELS] = {
 157     { 0 },
 158     { 0, 1 },
 159     { 2, 0, 1 },
 160     { 2, 0, 1, 3 },
 161     { 2, 0, 1, 3, 4 },
 162     { 2, 0, 1, 4, 5, 3 },
 163 };
 164
 165 /**
 166  * Make AAC audio config object.
 167  * @see 1.6.2.1 "Syntax - AudioSpecificConfig"
 168  */
 169 static void put_audio_specific_config(AVCodecContext *avctx)
 170 {
 171     PutBitContext pb;
 172     AACEncContext *s = avctx->priv_data;
 173
 174     init_put_bits(&pb, avctx->extradata, avctx->extradata_size);
 175     put_bits(&pb, 5, 2); //object type - AAC-LC
 176     put_bits(&pb, 4, s->samplerate_index); //sample rate index
 177     put_bits(&pb, 4, s->channels);
 178     //GASpecificConfig
 179     put_bits(&pb, 1, 0); //frame length - 1024 samples
 180     put_bits(&pb, 1, 0); //does not depend on core coder
 181     put_bits(&pb, 1, 0); //is not extension
 182
 183     //Explicitly Mark SBR absent
 184     put_bits(&pb, 11, 0x2b7); //sync extension
 185     put_bits(&pb, 5,  AOT_SBR);
 186     put_bits(&pb, 1,  0);
 187     flush_put_bits(&pb);
 188 }
 189
 190 #define WINDOW_FUNC(type) \
 191 static void apply_ ##type ##_window(AVFloatDSPContext *fdsp, \
 192                                     SingleChannelElement *sce, \
 193                                     const float *audio)
 194
 195 WINDOW_FUNC(only_long)
 196 {
 197     const float *lwindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024;
 198     const float *pwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
 199     float *out = sce->ret_buf;
 200
 201     fdsp->vector_fmul        (out,        audio,        lwindow, 1024);
 202     fdsp->vector_fmul_reverse(out + 1024, audio + 1024, pwindow, 1024);
 203 }
 204
 205 WINDOW_FUNC(long_start)
 206 {
 207     const float *lwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
 208     const float *swindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
 209     float *out = sce->ret_buf;
 210
 211     fdsp->vector_fmul(out, audio, lwindow, 1024);
 212     memcpy(out + 1024, audio + 1024, sizeof(out[0]) * 448);
 213     fdsp->vector_fmul_reverse(out + 1024 + 448, audio + 1024 + 448, swindow, 128);
 214     memset(out + 1024 + 576, 0, sizeof(out[0]) * 448);
 215 }
 216
 217 WINDOW_FUNC(long_stop)
 218 {
 219     const float *lwindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024;
 220     const float *swindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
 221     float *out = sce->ret_buf;
 222
 223     memset(out, 0, sizeof(out[0]) * 448);
 224     fdsp->vector_fmul(out + 448, audio + 448, swindow, 128);
 225     memcpy(out + 576, audio + 576, sizeof(out[0]) * 448);
 226     fdsp->vector_fmul_reverse(out + 1024, audio + 1024, lwindow, 1024);
 227 }
 228
 229 WINDOW_FUNC(eight_short)
 230 {
 231     const float *swindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
 232     const float *pwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
 233     const float *in = audio + 448;
 234     float *out = sce->ret_buf;
 235     int w;
 236
 237     for (w = 0; w < 8; w++) {
 238         fdsp->vector_fmul        (out, in, w ? pwindow : swindow, 128);
 239         out += 128;
 240         in  += 128;
 241         fdsp->vector_fmul_reverse(out, in, swindow, 128);
 242         out += 128;
 243     }
 244 }
 245
 246 static void (*const apply_window[4])(AVFloatDSPContext *fdsp,
 247                                      SingleChannelElement *sce,
 248                                      const float *audio) = {
 249     [ONLY_LONG_SEQUENCE]   = apply_only_long_window,
 250     [LONG_START_SEQUENCE]  = apply_long_start_window,
 251     [EIGHT_SHORT_SEQUENCE] = apply_eight_short_window,
 252     [LONG_STOP_SEQUENCE]   = apply_long_stop_window
 253 };
 254
 255 static void apply_window_and_mdct(AACEncContext *s, SingleChannelElement *sce,
 256                                   float *audio)
 257 {
 258     int i;
 259     float *output = sce->ret_buf;
 260
 261     apply_window[sce->ics.window_sequence[0]](s->fdsp, sce, audio);
 262
 263     if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE)
 264         s->mdct1024.mdct_calc(&s->mdct1024, sce->coeffs, output);
 265     else
 266         for (i = 0; i < 1024; i += 128)
 267             s->mdct128.mdct_calc(&s->mdct128, sce->coeffs + i, output + i*2);
 268     memcpy(audio, audio + 1024, sizeof(audio[0]) * 1024);
 269     memcpy(sce->pcoeffs, sce->coeffs, sizeof(sce->pcoeffs));
 270 }
 271
 272 /**
 273  * Encode ics_info element.
 274  * @see Table 4.6 (syntax of ics_info)
 275  */
 276 static void put_ics_info(AACEncContext *s, IndividualChannelStream *info)
 277 {
 278     int w;
 279
 280     put_bits(&s->pb, 1, 0);                // ics_reserved bit
 281     put_bits(&s->pb, 2, info->window_sequence[0]);
 282     put_bits(&s->pb, 1, info->use_kb_window[0]);
 283     if (info->window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
 284         put_bits(&s->pb, 6, info->max_sfb);
 285         put_bits(&s->pb, 1, 0);            // no prediction
 286     } else {
 287         put_bits(&s->pb, 4, info->max_sfb);
 288         for (w = 1; w < 8; w++)
 289             put_bits(&s->pb, 1, !info->group_len[w]);
 290     }
 291 }
 292
 293 /**
 294  * Encode MS data.
 295  * @see 4.6.8.1 "Joint Coding - M/S Stereo"
 296  */
 297 static void encode_ms_info(PutBitContext *pb, ChannelElement *cpe)
 298 {
 299     int i, w;
 300
 301     put_bits(pb, 2, cpe->ms_mode);
 302     if (cpe->ms_mode == 1)
 303         for (w = 0; w < cpe->ch[0].ics.num_windows; w += cpe->ch[0].ics.group_len[w])
 304             for (i = 0; i < cpe->ch[0].ics.max_sfb; i++)
 305                 put_bits(pb, 1, cpe->ms_mask[w*16 + i]);
 306 }
 307
 308 /**
 309  * Produce integer coefficients from scalefactors provided by the model.
 310  */
 311 static void adjust_frame_information(ChannelElement *cpe, int chans)
 312 {
 313     int i, w, w2, g, ch;
 314     int maxsfb, cmaxsfb;
 315     IndividualChannelStream *ics;
 316
 317     if (cpe->common_window) {
 318         ics = &cpe->ch[0].ics;
 319         for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
 320             for (w2 =  0; w2 < ics->group_len[w]; w2++) {
 321                 int start = (w+w2) * 128;
 322                 for (g = 0; g < ics->num_swb; g++) {
 323                     //apply Intensity stereo coeffs transformation
 324                     if (cpe->is_mask[w*16 + g]) {
 325                         int p = -1 + 2 * (cpe->ch[1].band_type[w*16+g] - 14);
 326                         float scale = cpe->ch[0].is_ener[w*16+g];
 327                         for (i = 0; i < ics->swb_sizes[g]; i++) {
 328                             cpe->ch[0].coeffs[start+i] = (cpe->ch[0].pcoeffs[start+i] + p*cpe->ch[1].pcoeffs[start+i]) * scale;
 329                             cpe->ch[1].coeffs[start+i] = 0.0f;
 330                         }
 331                     } else if (cpe->ms_mask[w*16 + g] &&
 332                                cpe->ch[0].band_type[w*16 + g] < NOISE_BT &&
 333                                cpe->ch[1].band_type[w*16 + g] < NOISE_BT) {
 334                         for (i = 0; i < ics->swb_sizes[g]; i++) {
 335                             cpe->ch[0].coeffs[start+i] = (cpe->ch[0].pcoeffs[start+i] + cpe->ch[1].pcoeffs[start+i]) * 0.5f;
 336                             cpe->ch[1].coeffs[start+i] = cpe->ch[0].coeffs[start+i] - cpe->ch[1].pcoeffs[start+i];
 337                         }
 338                     }
 339                     start += ics->swb_sizes[g];
 340                 }
 341             }
 342         }
 343     }
 344
 345     for (ch = 0; ch < chans; ch++) {
 346         IndividualChannelStream *ics = &cpe->ch[ch].ics;
 347         maxsfb = 0;
 348         cpe->ch[ch].pulse.num_pulse = 0;
 349         for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
 350             for (w2 =  0; w2 < ics->group_len[w]; w2++) {
 351                 for (cmaxsfb = ics->num_swb; cmaxsfb > 0 && cpe->ch[ch].zeroes[w*16+cmaxsfb-1]; cmaxsfb--)
 352                     ;
 353                 maxsfb = FFMAX(maxsfb, cmaxsfb);
 354             }
 355         }
 356         ics->max_sfb = maxsfb;
 357
 358         //adjust zero bands for window groups
 359         for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
 360             for (g = 0; g < ics->max_sfb; g++) {
 361                 i = 1;
 362                 for (w2 = w; w2 < w + ics->group_len[w]; w2++) {
 363                     if (!cpe->ch[ch].zeroes[w2*16 + g]) {
 364                         i = 0;
 365                         break;
 366                     }
 367                 }
 368                 cpe->ch[ch].zeroes[w*16 + g] = i;
 369             }
 370         }
 371     }
 372
 373     if (chans > 1 && cpe->common_window) {
 374         IndividualChannelStream *ics0 = &cpe->ch[0].ics;
 375         IndividualChannelStream *ics1 = &cpe->ch[1].ics;
 376         int msc = 0;
 377         ics0->max_sfb = FFMAX(ics0->max_sfb, ics1->max_sfb);
 378         ics1->max_sfb = ics0->max_sfb;
 379         for (w = 0; w < ics0->num_windows*16; w += 16)
 380             for (i = 0; i < ics0->max_sfb; i++)
 381                 if (cpe->ms_mask[w+i])
 382                     msc++;
 383         if (msc == 0 || ics0->max_sfb == 0)
 384             cpe->ms_mode = 0;
 385         else
 386             cpe->ms_mode = msc < ics0->max_sfb * ics0->num_windows ? 1 : 2;
 387     }
 388 }
 389
 390 /**
 391  * Encode scalefactor band coding type.
 392  */
 393 static void encode_band_info(AACEncContext *s, SingleChannelElement *sce)
 394 {
 395     int w;
 396
 397     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w])
 398         s->coder->encode_window_bands_info(s, sce, w, sce->ics.group_len[w], s->lambda);
 399 }
 400
 401 /**
 402  * Encode scalefactors.
 403  */
 404 static void encode_scale_factors(AVCodecContext *avctx, AACEncContext *s,
 405                                  SingleChannelElement *sce)
 406 {
 407     int diff, off_sf = sce->sf_idx[0], off_pns = sce->sf_idx[0] - NOISE_OFFSET;
 408     int off_is = 0, noise_flag = 1;
 409     int i, w;
 410
 411     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
 412         for (i = 0; i < sce->ics.max_sfb; i++) {
 413             if (!sce->zeroes[w*16 + i]) {
 414                 if (sce->band_type[w*16 + i] == NOISE_BT) {
 415                     diff = sce->sf_idx[w*16 + i] - off_pns;
 416                     off_pns = sce->sf_idx[w*16 + i];
 417                     if (noise_flag-- > 0) {
 418                         put_bits(&s->pb, NOISE_PRE_BITS, diff + NOISE_PRE);
 419                         continue;
 420                     }
 421                 } else if (sce->band_type[w*16 + i] == INTENSITY_BT  ||
 422                            sce->band_type[w*16 + i] == INTENSITY_BT2) {
 423                     diff = sce->sf_idx[w*16 + i] - off_is;
 424                     off_is = sce->sf_idx[w*16 + i];
 425                 } else {
 426                     diff = sce->sf_idx[w*16 + i] - off_sf;
 427                     off_sf = sce->sf_idx[w*16 + i];
 428                 }
 429                 diff += SCALE_DIFF_ZERO;
 430                 av_assert0(diff >= 0 && diff <= 120);
 431                 put_bits(&s->pb, ff_aac_scalefactor_bits[diff], ff_aac_scalefactor_code[diff]);
 432             }
 433         }
 434     }
 435 }
 436
 437 /**
 438  * Encode pulse data.
 439  */
 440 static void encode_pulses(AACEncContext *s, Pulse *pulse)
 441 {
 442     int i;
 443
 444     put_bits(&s->pb, 1, !!pulse->num_pulse);
 445     if (!pulse->num_pulse)
 446         return;
 447
 448     put_bits(&s->pb, 2, pulse->num_pulse - 1);
 449     put_bits(&s->pb, 6, pulse->start);
 450     for (i = 0; i < pulse->num_pulse; i++) {
 451         put_bits(&s->pb, 5, pulse->pos[i]);
 452         put_bits(&s->pb, 4, pulse->amp[i]);
 453     }
 454 }
 455
 456 /**
 457  * Encode spectral coefficients processed by psychoacoustic model.
 458  */
 459 static void encode_spectral_coeffs(AACEncContext *s, SingleChannelElement *sce)
 460 {
 461     int start, i, w, w2;
 462
 463     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
 464         start = 0;
 465         for (i = 0; i < sce->ics.max_sfb; i++) {
 466             if (sce->zeroes[w*16 + i]) {
 467                 start += sce->ics.swb_sizes[i];
 468                 continue;
 469             }
 470             for (w2 = w; w2 < w + sce->ics.group_len[w]; w2++)
 471                 s->coder->quantize_and_encode_band(s, &s->pb, sce->coeffs + start + w2*128,
 472                                                    sce->ics.swb_sizes[i],
 473                                                    sce->sf_idx[w*16 + i],
 474                                                    sce->band_type[w*16 + i],
 475                                                    s->lambda);
 476             start += sce->ics.swb_sizes[i];
 477         }
 478     }
 479 }
 480
 481 /**
 482  * Encode one channel of audio data.
 483  */
 484 static int encode_individual_channel(AVCodecContext *avctx, AACEncContext *s,
 485                                      SingleChannelElement *sce,
 486                                      int common_window)
 487 {
 488     put_bits(&s->pb, 8, sce->sf_idx[0]);
 489     if (!common_window)
 490         put_ics_info(s, &sce->ics);
 491     encode_band_info(s, sce);
 492     encode_scale_factors(avctx, s, sce);
 493     encode_pulses(s, &sce->pulse);
 494     put_bits(&s->pb, 1, 0); //tns
 495     put_bits(&s->pb, 1, 0); //ssr
 496     encode_spectral_coeffs(s, sce);
 497     return 0;
 498 }
 499
 500 /**
 501  * Write some auxiliary information about the created AAC file.
 502  */
 503 static void put_bitstream_info(AACEncContext *s, const char *name)
 504 {
 505     int i, namelen, padbits;
 506
 507     namelen = strlen(name) + 2;
 508     put_bits(&s->pb, 3, TYPE_FIL);
 509     put_bits(&s->pb, 4, FFMIN(namelen, 15));
 510     if (namelen >= 15)
 511         put_bits(&s->pb, 8, namelen - 14);
 512     put_bits(&s->pb, 4, 0); //extension type - filler
 513     padbits = -put_bits_count(&s->pb) & 7;
 514     avpriv_align_put_bits(&s->pb);
 515     for (i = 0; i < namelen - 2; i++)
 516         put_bits(&s->pb, 8, name[i]);
 517     put_bits(&s->pb, 12 - padbits, 0);
 518 }
 519
 520 /*
 521  * Copy input samples.
 522  * Channels are reordered from libavcodec's default order to AAC order.
 523  */
 524 static void copy_input_samples(AACEncContext *s, const AVFrame *frame)
 525 {
 526     int ch;
 527     int end = 2048 + (frame ? frame->nb_samples : 0);
 528     const uint8_t *channel_map = aac_chan_maps[s->channels - 1];
 529
 530     /* copy and remap input samples */
 531     for (ch = 0; ch < s->channels; ch++) {
 532         /* copy last 1024 samples of previous frame to the start of the current frame */
 533         memcpy(&s->planar_samples[ch][1024], &s->planar_samples[ch][2048], 1024 * sizeof(s->planar_samples[0][0]));
 534
 535         /* copy new samples and zero any remaining samples */
 536         if (frame) {
 537             memcpy(&s->planar_samples[ch][2048],
 538                    frame->extended_data[channel_map[ch]],
 539                    frame->nb_samples * sizeof(s->planar_samples[0][0]));
 540         }
 541         memset(&s->planar_samples[ch][end], 0,
 542                (3072 - end) * sizeof(s->planar_samples[0][0]));
 543     }
 544 }
 545
 546 static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
 547                             const AVFrame *frame, int *got_packet_ptr)
 548 {
 549     AACEncContext *s = avctx->priv_data;
 550     float **samples = s->planar_samples, *samples2, *la, *overlap;
 551     ChannelElement *cpe;
 552     int i, ch, w, g, chans, tag, start_ch, ret, ms_mode = 0, is_mode = 0;
 553     int chan_el_counter[4];
 554     FFPsyWindowInfo windows[AAC_MAX_CHANNELS];
 555
 556     if (s->last_frame == 2)
 557         return 0;
 558
 559     /* add current frame to queue */
 560     if (frame) {
 561         if ((ret = ff_af_queue_add(&s->afq, frame)) < 0)
 562             return ret;
 563     }
 564
 565     copy_input_samples(s, frame);
 566     if (s->psypp)
 567         ff_psy_preprocess(s->psypp, s->planar_samples, s->channels);
 568
 569     if (!avctx->frame_number)
 570         return 0;
 571
 572     start_ch = 0;
 573     for (i = 0; i < s->chan_map[0]; i++) {
 574         FFPsyWindowInfo* wi = windows + start_ch;
 575         tag      = s->chan_map[i+1];
 576         chans    = tag == TYPE_CPE ? 2 : 1;
 577         cpe      = &s->cpe[i];
 578         for (ch = 0; ch < chans; ch++) {
 579             IndividualChannelStream *ics = &cpe->ch[ch].ics;
 580             int cur_channel = start_ch + ch;
 581             overlap  = &samples[cur_channel][0];
 582             samples2 = overlap + 1024;
 583             la       = samples2 + (448+64);
 584             if (!frame)
 585                 la = NULL;
 586             if (tag == TYPE_LFE) {
 587                 wi[ch].window_type[0] = ONLY_LONG_SEQUENCE;
 588                 wi[ch].window_shape   = 0;
 589                 wi[ch].num_windows    = 1;
 590                 wi[ch].grouping[0]    = 1;
 591
 592                 /* Only the lowest 12 coefficients are used in a LFE channel.
 593                  * The expression below results in only the bottom 8 coefficients
 594                  * being used for 11.025kHz to 16kHz sample rates.
 595                  */
 596                 ics->num_swb = s->samplerate_index >= 8 ? 1 : 3;
 597             } else {
 598                 wi[ch] = s->psy.model->window(&s->psy, samples2, la, cur_channel,
 599                                               ics->window_sequence[0]);
 600             }
 601             ics->window_sequence[1] = ics->window_sequence[0];
 602             ics->window_sequence[0] = wi[ch].window_type[0];
 603             ics->use_kb_window[1]   = ics->use_kb_window[0];
 604             ics->use_kb_window[0]   = wi[ch].window_shape;
 605             ics->num_windows        = wi[ch].num_windows;
 606             ics->swb_sizes          = s->psy.bands    [ics->num_windows == 8];
 607             ics->num_swb            = tag == TYPE_LFE ? ics->num_swb : s->psy.num_bands[ics->num_windows == 8];
 608             for (w = 0; w < ics->num_windows; w++)
 609                 ics->group_len[w] = wi[ch].grouping[w];
 610
 611             apply_window_and_mdct(s, &cpe->ch[ch], overlap);
 612             if (isnan(cpe->ch->coeffs[0])) {
 613                 av_log(avctx, AV_LOG_ERROR, "Input contains NaN\n");
 614                 return AVERROR(EINVAL);
 615             }
 616         }
 617         start_ch += chans;
 618     }
 619     if ((ret = ff_alloc_packet2(avctx, avpkt, 8192 * s->channels)) < 0)
 620         return ret;
 621     do {
 622         int frame_bits;
 623
 624         init_put_bits(&s->pb, avpkt->data, avpkt->size);
 625
 626         if ((avctx->frame_number & 0xFF)==1 && !(avctx->flags & CODEC_FLAG_BITEXACT))
 627             put_bitstream_info(s, LIBAVCODEC_IDENT);
 628         start_ch = 0;
 629         memset(chan_el_counter, 0, sizeof(chan_el_counter));
 630         for (i = 0; i < s->chan_map[0]; i++) {
 631             FFPsyWindowInfo* wi = windows + start_ch;
 632             const float *coeffs[2];
 633             tag      = s->chan_map[i+1];
 634             chans    = tag == TYPE_CPE ? 2 : 1;
 635             cpe      = &s->cpe[i];
 636             memset(cpe->is_mask, 0, sizeof(cpe->is_mask));
 637             memset(cpe->ms_mask, 0, sizeof(cpe->ms_mask));
 638             put_bits(&s->pb, 3, tag);
 639             put_bits(&s->pb, 4, chan_el_counter[tag]++);
 640             for (ch = 0; ch < chans; ch++)
 641                 coeffs[ch] = cpe->ch[ch].coeffs;
 642             s->psy.model->analyze(&s->psy, start_ch, coeffs, wi);
 643             for (ch = 0; ch < chans; ch++) {
 644                 s->cur_channel = start_ch + ch;
 645                 s->coder->search_for_quantizers(avctx, s, &cpe->ch[ch], s->lambda);
 646             }
 647             cpe->common_window = 0;
 648             if (chans > 1
 649                 && wi[0].window_type[0] == wi[1].window_type[0]
 650                 && wi[0].window_shape   == wi[1].window_shape) {
 651
 652                 cpe->common_window = 1;
 653                 for (w = 0; w < wi[0].num_windows; w++) {
 654                     if (wi[0].grouping[w] != wi[1].grouping[w]) {
 655                         cpe->common_window = 0;
 656                         break;
 657                     }
 658                 }
 659             }
 660             if (s->options.pns && s->coder->search_for_pns) {
 661                 for (ch = 0; ch < chans; ch++) {
 662                     s->cur_channel = start_ch + ch;
 663                     s->coder->search_for_pns(s, avctx, &cpe->ch[ch], s->lambda);
 664                 }
 665             }
 666             s->cur_channel = start_ch;
 667             if (s->options.stereo_mode && cpe->common_window) {
 668                 if (s->options.stereo_mode > 0) {
 669                     IndividualChannelStream *ics = &cpe->ch[0].ics;
 670                     for (w = 0; w < ics->num_windows; w += ics->group_len[w])
 671                         for (g = 0;  g < ics->num_swb; g++)
 672                             cpe->ms_mask[w*16+g] = 1;
 673                 } else if (s->coder->search_for_ms) {
 674                     s->coder->search_for_ms(s, cpe, s->lambda);
 675                 }
 676             }
 677             if (chans > 1 && s->options.intensity_stereo && s->coder->search_for_is) {
 678                 s->coder->search_for_is(s, avctx, cpe, s->lambda);
 679                 if (cpe->is_mode) is_mode = 1;
 680             }
 681             if (s->coder->set_special_band_scalefactors)
 682                 for (ch = 0; ch < chans; ch++)
 683                     s->coder->set_special_band_scalefactors(s, &cpe->ch[ch]);
 684             adjust_frame_information(cpe, chans);
 685             if (chans == 2) {
 686                 put_bits(&s->pb, 1, cpe->common_window);
 687                 if (cpe->common_window) {
 688                     put_ics_info(s, &cpe->ch[0].ics);
 689                     encode_ms_info(&s->pb, cpe);
 690                     if (cpe->ms_mode) ms_mode = 1;
 691                 }
 692             }
 693             for (ch = 0; ch < chans; ch++) {
 694                 s->cur_channel = start_ch + ch;
 695                 encode_individual_channel(avctx, s, &cpe->ch[ch], cpe->common_window);
 696             }
 697             start_ch += chans;
 698         }
 699
 700         frame_bits = put_bits_count(&s->pb);
 701         if (frame_bits <= 6144 * s->channels - 3) {
 702             s->psy.bitres.bits = frame_bits / s->channels;
 703             break;
 704         }
 705         if (is_mode || ms_mode) {
 706             for (i = 0; i < s->chan_map[0]; i++) {
 707                 // Must restore coeffs
 708                 chans = tag == TYPE_CPE ? 2 : 1;
 709                 cpe = &s->cpe[i];
 710                 for (ch = 0; ch < chans; ch++)
 711                     memcpy(cpe->ch[ch].coeffs, cpe->ch[ch].pcoeffs, sizeof(cpe->ch[ch].coeffs));
 712             }
 713         }
 714
 715         s->lambda *= avctx->bit_rate * 1024.0f / avctx->sample_rate / frame_bits;
 716
 717     } while (1);
 718
 719     put_bits(&s->pb, 3, TYPE_END);
 720     flush_put_bits(&s->pb);
 721     avctx->frame_bits = put_bits_count(&s->pb);
 722
 723     // rate control stuff
 724     if (!(avctx->flags & CODEC_FLAG_QSCALE)) {
 725         float ratio = avctx->bit_rate * 1024.0f / avctx->sample_rate / avctx->frame_bits;
 726         s->lambda *= ratio;
 727         s->lambda = FFMIN(s->lambda, 65536.f);
 728     }
 729
 730     if (!frame)
 731         s->last_frame++;
 732
 733     ff_af_queue_remove(&s->afq, avctx->frame_size, &avpkt->pts,
 734                        &avpkt->duration);
 735
 736     avpkt->size = put_bits_count(&s->pb) >> 3;
 737     *got_packet_ptr = 1;
 738     return 0;
 739 }
 740
 741 static av_cold int aac_encode_end(AVCodecContext *avctx)
 742 {
 743     AACEncContext *s = avctx->priv_data;
 744
 745     ff_mdct_end(&s->mdct1024);
 746     ff_mdct_end(&s->mdct128);
 747     ff_psy_end(&s->psy);
 748     if (s->psypp)
 749         ff_psy_preprocess_end(s->psypp);
 750     av_freep(&s->buffer.samples);
 751     av_freep(&s->cpe);
 752     av_freep(&s->fdsp);
 753     ff_af_queue_close(&s->afq);
 754     return 0;
 755 }
 756
 757 static av_cold int dsp_init(AVCodecContext *avctx, AACEncContext *s)
 758 {
 759     int ret = 0;
 760
 761     s->fdsp = avpriv_float_dsp_alloc(avctx->flags & CODEC_FLAG_BITEXACT);
 762     if (!s->fdsp)
 763         return AVERROR(ENOMEM);
 764
 765     // window init
 766     ff_kbd_window_init(ff_aac_kbd_long_1024, 4.0, 1024);
 767     ff_kbd_window_init(ff_aac_kbd_short_128, 6.0, 128);
 768     ff_init_ff_sine_windows(10);
 769     ff_init_ff_sine_windows(7);
 770
 771     if ((ret = ff_mdct_init(&s->mdct1024, 11, 0, 32768.0)) < 0)
 772         return ret;
 773     if ((ret = ff_mdct_init(&s->mdct128,   8, 0, 32768.0)) < 0)
 774         return ret;
 775
 776     return 0;
 777 }
 778
 779 static av_cold int alloc_buffers(AVCodecContext *avctx, AACEncContext *s)
 780 {
 781     int ch;
 782     FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->buffer.samples, s->channels, 3 * 1024 * sizeof(s->buffer.samples[0]), alloc_fail);
 783     FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->cpe, s->chan_map[0], sizeof(ChannelElement), alloc_fail);
 784     FF_ALLOCZ_OR_GOTO(avctx, avctx->extradata, 5 + FF_INPUT_BUFFER_PADDING_SIZE, alloc_fail);
 785
 786     for(ch = 0; ch < s->channels; ch++)
 787         s->planar_samples[ch] = s->buffer.samples + 3 * 1024 * ch;
 788
 789     return 0;
 790 alloc_fail:
 791     return AVERROR(ENOMEM);
 792 }
 793
 794 static av_cold int aac_encode_init(AVCodecContext *avctx)
 795 {
 796     AACEncContext *s = avctx->priv_data;
 797     int i, ret = 0;
 798     const uint8_t *sizes[2];
 799     uint8_t grouping[AAC_MAX_CHANNELS];
 800     int lengths[2];
 801
 802     avctx->frame_size = 1024;
 803
 804     for (i = 0; i < 16; i++)
 805         if (avctx->sample_rate == avpriv_mpeg4audio_sample_rates[i])
 806             break;
 807
 808     s->channels = avctx->channels;
 809
 810     ERROR_IF(i == 16
 811                 || i >= (sizeof(swb_size_1024) / sizeof(*swb_size_1024))
 812                 || i >= (sizeof(swb_size_128) / sizeof(*swb_size_128)),
 813              "Unsupported sample rate %d\n", avctx->sample_rate);
 814     ERROR_IF(s->channels > AAC_MAX_CHANNELS,
 815              "Unsupported number of channels: %d\n", s->channels);
 816     ERROR_IF(avctx->profile != FF_PROFILE_UNKNOWN && avctx->profile != FF_PROFILE_AAC_LOW,
 817              "Unsupported profile %d\n", avctx->profile);
 818     WARN_IF(1024.0 * avctx->bit_rate / avctx->sample_rate > 6144 * s->channels,
 819              "Too many bits per frame requested, clamping to max\n");
 820
 821     avctx->bit_rate = (int)FFMIN(
 822         6144 * s->channels / 1024.0 * avctx->sample_rate,
 823         avctx->bit_rate);
 824
 825     s->samplerate_index = i;
 826
 827     s->chan_map = aac_chan_configs[s->channels-1];
 828
 829     if ((ret = dsp_init(avctx, s)) < 0)
 830         goto fail;
 831
 832     if ((ret = alloc_buffers(avctx, s)) < 0)
 833         goto fail;
 834
 835     avctx->extradata_size = 5;
 836     put_audio_specific_config(avctx);
 837
 838     sizes[0]   = swb_size_1024[i];
 839     sizes[1]   = swb_size_128[i];
 840     lengths[0] = ff_aac_num_swb_1024[i];
 841     lengths[1] = ff_aac_num_swb_128[i];
 842     for (i = 0; i < s->chan_map[0]; i++)
 843         grouping[i] = s->chan_map[i + 1] == TYPE_CPE;
 844     if ((ret = ff_psy_init(&s->psy, avctx, 2, sizes, lengths,
 845                            s->chan_map[0], grouping)) < 0)
 846         goto fail;
 847     s->psypp = ff_psy_preprocess_init(avctx);
 848     s->coder = &ff_aac_coders[s->options.aac_coder];
 849
 850     if (HAVE_MIPSDSPR1)
 851         ff_aac_coder_init_mips(s);
 852
 853     s->lambda = avctx->global_quality > 0 ? avctx->global_quality : 120;
 854
 855     ff_aac_tableinit();
 856
 857     avctx->initial_padding = 1024;
 858     ff_af_queue_init(avctx, &s->afq);
 859
 860     return 0;
 861 fail:
 862     aac_encode_end(avctx);
 863     return ret;
 864 }
 865
 866 #define AACENC_FLAGS AV_OPT_FLAG_ENCODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM
 867 static const AVOption aacenc_options[] = {
 868     {"stereo_mode", "Stereo coding method", offsetof(AACEncContext, options.stereo_mode), AV_OPT_TYPE_INT, {.i64 = 0}, -1, 1, AACENC_FLAGS, "stereo_mode"},
 869         {"auto",     "Selected by the Encoder", 0, AV_OPT_TYPE_CONST, {.i64 = -1 }, INT_MIN, INT_MAX, AACENC_FLAGS, "stereo_mode"},
 870         {"ms_off",   "Disable Mid/Side coding", 0, AV_OPT_TYPE_CONST, {.i64 =  0 }, INT_MIN, INT_MAX, AACENC_FLAGS, "stereo_mode"},
 871         {"ms_force", "Force Mid/Side for the whole frame if possible", 0, AV_OPT_TYPE_CONST, {.i64 =  1 }, INT_MIN, INT_MAX, AACENC_FLAGS, "stereo_mode"},
 872     {"aac_coder", "", offsetof(AACEncContext, options.aac_coder), AV_OPT_TYPE_INT, {.i64 = AAC_CODER_TWOLOOP}, 0, AAC_CODER_NB-1, AACENC_FLAGS, "aac_coder"},
 873         {"faac",     "FAAC-inspired method",      0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_FAAC},    INT_MIN, INT_MAX, AACENC_FLAGS, "aac_coder"},
 874         {"anmr",     "ANMR method",               0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_ANMR},    INT_MIN, INT_MAX, AACENC_FLAGS, "aac_coder"},
 875         {"twoloop",  "Two loop searching method", 0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_TWOLOOP}, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_coder"},
 876         {"fast",     "Constant quantizer",        0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_FAST},    INT_MIN, INT_MAX, AACENC_FLAGS, "aac_coder"},
 877     {"aac_pns", "Perceptual Noise Substitution", offsetof(AACEncContext, options.pns), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, AACENC_FLAGS, "aac_pns"},
 878         {"disable",  "Disable perceptual noise substitution", 0, AV_OPT_TYPE_CONST, {.i64 =  0 }, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_pns"},
 879         {"enable",   "Enable perceptual noise substitution",  0, AV_OPT_TYPE_CONST, {.i64 =  1 }, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_pns"},
 880     {"aac_is", "Intensity stereo coding", offsetof(AACEncContext, options.intensity_stereo), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, AACENC_FLAGS, "intensity_stereo"},
 881         {"disable",  "Disable intensity stereo coding", 0, AV_OPT_TYPE_CONST, {.i64 = 0}, INT_MIN, INT_MAX, AACENC_FLAGS, "intensity_stereo"},
 882         {"enable",   "Enable intensity stereo coding", 0, AV_OPT_TYPE_CONST, {.i64 = 1}, INT_MIN, INT_MAX, AACENC_FLAGS, "intensity_stereo"},
 883     {NULL}
 884 };
 885
 886 static const AVClass aacenc_class = {
 887     "AAC encoder",
 888     av_default_item_name,
 889     aacenc_options,
 890     LIBAVUTIL_VERSION_INT,
 891 };
 892
 893 /* duplicated from avpriv_mpeg4audio_sample_rates to avoid shared build
 894  * failures */
 895 static const int mpeg4audio_sample_rates[16] = {
 896     96000, 88200, 64000, 48000, 44100, 32000,
 897     24000, 22050, 16000, 12000, 11025, 8000, 7350
 898 };
 899
 900 AVCodec ff_aac_encoder = {
 901     .name           = "aac",
 902     .long_name      = NULL_IF_CONFIG_SMALL("AAC (Advanced Audio Coding)"),
 903     .type           = AVMEDIA_TYPE_AUDIO,
 904     .id             = AV_CODEC_ID_AAC,
 905     .priv_data_size = sizeof(AACEncContext),
 906     .init           = aac_encode_init,
 907     .encode2        = aac_encode_frame,
 908     .close          = aac_encode_end,
 909     .supported_samplerates = mpeg4audio_sample_rates,
 910     .capabilities   = CODEC_CAP_SMALL_LAST_FRAME | CODEC_CAP_DELAY |
 911                       CODEC_CAP_EXPERIMENTAL,
 912     .sample_fmts    = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_FLTP,
 913                                                      AV_SAMPLE_FMT_NONE },
 914     .priv_class     = &aacenc_class,
 915 };