git.sesse.net Git - ffmpeg/blob - libavcodec/aacenc.c

   1 /*
   2  * AAC encoder
   3  * Copyright (C) 2008 Konstantin Shishkov
   4  *
   5  * This file is part of FFmpeg.
   6  *
   7  * FFmpeg is free software; you can redistribute it and/or
   8  * modify it under the terms of the GNU Lesser General Public
   9  * License as published by the Free Software Foundation; either
  10  * version 2.1 of the License, or (at your option) any later version.
  11  *
  12  * FFmpeg is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15  * Lesser General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU Lesser General Public
  18  * License along with FFmpeg; if not, write to the Free Software
  19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20  */
  21
  22 /**
  23  * @file
  24  * AAC encoder
  25  */
  26
  27 /***********************************
  28  *              TODOs:
  29  * add sane pulse detection
  30  * add temporal noise shaping
  31  ***********************************/
  32
  33 #include "libavutil/float_dsp.h"
  34 #include "libavutil/opt.h"
  35 #include "avcodec.h"
  36 #include "put_bits.h"
  37 #include "internal.h"
  38 #include "mpeg4audio.h"
  39 #include "kbdwin.h"
  40 #include "sinewin.h"
  41
  42 #include "aac.h"
  43 #include "aactab.h"
  44 #include "aacenc.h"
  45
  46 #include "psymodel.h"
  47
  48 #define AAC_MAX_CHANNELS 6
  49
  50 #define ERROR_IF(cond, ...) \
  51     if (cond) { \
  52         av_log(avctx, AV_LOG_ERROR, __VA_ARGS__); \
  53         return AVERROR(EINVAL); \
  54     }
  55
  56 #define WARN_IF(cond, ...) \
  57     if (cond) { \
  58         av_log(avctx, AV_LOG_WARNING, __VA_ARGS__); \
  59     }
  60
  61 float ff_aac_pow34sf_tab[428];
  62
  63 static const uint8_t swb_size_1024_96[] = {
  64     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8,
  65     12, 12, 12, 12, 12, 16, 16, 24, 28, 36, 44,
  66     64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64
  67 };
  68
  69 static const uint8_t swb_size_1024_64[] = {
  70     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8,
  71     12, 12, 12, 16, 16, 16, 20, 24, 24, 28, 36,
  72     40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40
  73 };
  74
  75 static const uint8_t swb_size_1024_48[] = {
  76     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8,
  77     12, 12, 12, 12, 16, 16, 20, 20, 24, 24, 28, 28,
  78     32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
  79     96
  80 };
  81
  82 static const uint8_t swb_size_1024_32[] = {
  83     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8,
  84     12, 12, 12, 12, 16, 16, 20, 20, 24, 24, 28, 28,
  85     32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32
  86 };
  87
  88 static const uint8_t swb_size_1024_24[] = {
  89     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
  90     12, 12, 12, 12, 16, 16, 16, 20, 20, 24, 24, 28, 28,
  91     32, 36, 36, 40, 44, 48, 52, 52, 64, 64, 64, 64, 64
  92 };
  93
  94 static const uint8_t swb_size_1024_16[] = {
  95     8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
  96     12, 12, 12, 12, 12, 12, 12, 12, 12, 16, 16, 16, 16, 20, 20, 20, 24, 24, 28, 28,
  97     32, 36, 40, 40, 44, 48, 52, 56, 60, 64, 64, 64
  98 };
  99
 100 static const uint8_t swb_size_1024_8[] = {
 101     12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
 102     16, 16, 16, 16, 16, 16, 16, 20, 20, 20, 20, 24, 24, 24, 28, 28,
 103     32, 36, 36, 40, 44, 48, 52, 56, 60, 64, 80
 104 };
 105
 106 static const uint8_t *swb_size_1024[] = {
 107     swb_size_1024_96, swb_size_1024_96, swb_size_1024_64,
 108     swb_size_1024_48, swb_size_1024_48, swb_size_1024_32,
 109     swb_size_1024_24, swb_size_1024_24, swb_size_1024_16,
 110     swb_size_1024_16, swb_size_1024_16, swb_size_1024_8,
 111     swb_size_1024_8
 112 };
 113
 114 static const uint8_t swb_size_128_96[] = {
 115     4, 4, 4, 4, 4, 4, 8, 8, 8, 16, 28, 36
 116 };
 117
 118 static const uint8_t swb_size_128_48[] = {
 119     4, 4, 4, 4, 4, 8, 8, 8, 12, 12, 12, 16, 16, 16
 120 };
 121
 122 static const uint8_t swb_size_128_24[] = {
 123     4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 20
 124 };
 125
 126 static const uint8_t swb_size_128_16[] = {
 127     4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 12, 12, 16, 20, 20
 128 };
 129
 130 static const uint8_t swb_size_128_8[] = {
 131     4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 12, 16, 20, 20
 132 };
 133
 134 static const uint8_t *swb_size_128[] = {
 135     /* the last entry on the following row is swb_size_128_64 but is a
 136        duplicate of swb_size_128_96 */
 137     swb_size_128_96, swb_size_128_96, swb_size_128_96,
 138     swb_size_128_48, swb_size_128_48, swb_size_128_48,
 139     swb_size_128_24, swb_size_128_24, swb_size_128_16,
 140     swb_size_128_16, swb_size_128_16, swb_size_128_8,
 141     swb_size_128_8
 142 };
 143
 144 /** default channel configurations */
 145 static const uint8_t aac_chan_configs[6][5] = {
 146  {1, TYPE_SCE},                               // 1 channel  - single channel element
 147  {1, TYPE_CPE},                               // 2 channels - channel pair
 148  {2, TYPE_SCE, TYPE_CPE},                     // 3 channels - center + stereo
 149  {3, TYPE_SCE, TYPE_CPE, TYPE_SCE},           // 4 channels - front center + stereo + back center
 150  {3, TYPE_SCE, TYPE_CPE, TYPE_CPE},           // 5 channels - front center + stereo + back stereo
 151  {4, TYPE_SCE, TYPE_CPE, TYPE_CPE, TYPE_LFE}, // 6 channels - front center + stereo + back stereo + LFE
 152 };
 153
 154 /**
 155  * Table to remap channels from libavcodec's default order to AAC order.
 156  */
 157 static const uint8_t aac_chan_maps[AAC_MAX_CHANNELS][AAC_MAX_CHANNELS] = {
 158     { 0 },
 159     { 0, 1 },
 160     { 2, 0, 1 },
 161     { 2, 0, 1, 3 },
 162     { 2, 0, 1, 3, 4 },
 163     { 2, 0, 1, 4, 5, 3 },
 164 };
 165
 166 /**
 167  * Make AAC audio config object.
 168  * @see 1.6.2.1 "Syntax - AudioSpecificConfig"
 169  */
 170 static void put_audio_specific_config(AVCodecContext *avctx)
 171 {
 172     PutBitContext pb;
 173     AACEncContext *s = avctx->priv_data;
 174
 175     init_put_bits(&pb, avctx->extradata, avctx->extradata_size);
 176     put_bits(&pb, 5, 2); //object type - AAC-LC
 177     put_bits(&pb, 4, s->samplerate_index); //sample rate index
 178     put_bits(&pb, 4, s->channels);
 179     //GASpecificConfig
 180     put_bits(&pb, 1, 0); //frame length - 1024 samples
 181     put_bits(&pb, 1, 0); //does not depend on core coder
 182     put_bits(&pb, 1, 0); //is not extension
 183
 184     //Explicitly Mark SBR absent
 185     put_bits(&pb, 11, 0x2b7); //sync extension
 186     put_bits(&pb, 5,  AOT_SBR);
 187     put_bits(&pb, 1,  0);
 188     flush_put_bits(&pb);
 189 }
 190
 191 #define WINDOW_FUNC(type) \
 192 static void apply_ ##type ##_window(AVFloatDSPContext *fdsp, \
 193                                     SingleChannelElement *sce, \
 194                                     const float *audio)
 195
 196 WINDOW_FUNC(only_long)
 197 {
 198     const float *lwindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024;
 199     const float *pwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
 200     float *out = sce->ret_buf;
 201
 202     fdsp->vector_fmul        (out,        audio,        lwindow, 1024);
 203     fdsp->vector_fmul_reverse(out + 1024, audio + 1024, pwindow, 1024);
 204 }
 205
 206 WINDOW_FUNC(long_start)
 207 {
 208     const float *lwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
 209     const float *swindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
 210     float *out = sce->ret_buf;
 211
 212     fdsp->vector_fmul(out, audio, lwindow, 1024);
 213     memcpy(out + 1024, audio + 1024, sizeof(out[0]) * 448);
 214     fdsp->vector_fmul_reverse(out + 1024 + 448, audio + 1024 + 448, swindow, 128);
 215     memset(out + 1024 + 576, 0, sizeof(out[0]) * 448);
 216 }
 217
 218 WINDOW_FUNC(long_stop)
 219 {
 220     const float *lwindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024;
 221     const float *swindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
 222     float *out = sce->ret_buf;
 223
 224     memset(out, 0, sizeof(out[0]) * 448);
 225     fdsp->vector_fmul(out + 448, audio + 448, swindow, 128);
 226     memcpy(out + 576, audio + 576, sizeof(out[0]) * 448);
 227     fdsp->vector_fmul_reverse(out + 1024, audio + 1024, lwindow, 1024);
 228 }
 229
 230 WINDOW_FUNC(eight_short)
 231 {
 232     const float *swindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
 233     const float *pwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
 234     const float *in = audio + 448;
 235     float *out = sce->ret_buf;
 236     int w;
 237
 238     for (w = 0; w < 8; w++) {
 239         fdsp->vector_fmul        (out, in, w ? pwindow : swindow, 128);
 240         out += 128;
 241         in  += 128;
 242         fdsp->vector_fmul_reverse(out, in, swindow, 128);
 243         out += 128;
 244     }
 245 }
 246
 247 static void (*const apply_window[4])(AVFloatDSPContext *fdsp,
 248                                      SingleChannelElement *sce,
 249                                      const float *audio) = {
 250     [ONLY_LONG_SEQUENCE]   = apply_only_long_window,
 251     [LONG_START_SEQUENCE]  = apply_long_start_window,
 252     [EIGHT_SHORT_SEQUENCE] = apply_eight_short_window,
 253     [LONG_STOP_SEQUENCE]   = apply_long_stop_window
 254 };
 255
 256 static void apply_window_and_mdct(AACEncContext *s, SingleChannelElement *sce,
 257                                   float *audio)
 258 {
 259     int i;
 260     float *output = sce->ret_buf;
 261
 262     apply_window[sce->ics.window_sequence[0]](s->fdsp, sce, audio);
 263
 264     if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE)
 265         s->mdct1024.mdct_calc(&s->mdct1024, sce->coeffs, output);
 266     else
 267         for (i = 0; i < 1024; i += 128)
 268             s->mdct128.mdct_calc(&s->mdct128, sce->coeffs + i, output + i*2);
 269     memcpy(audio, audio + 1024, sizeof(audio[0]) * 1024);
 270     memcpy(sce->pcoeffs, sce->coeffs, sizeof(sce->pcoeffs));
 271 }
 272
 273 /**
 274  * Encode ics_info element.
 275  * @see Table 4.6 (syntax of ics_info)
 276  */
 277 static void put_ics_info(AACEncContext *s, IndividualChannelStream *info)
 278 {
 279     int w;
 280
 281     put_bits(&s->pb, 1, 0);                // ics_reserved bit
 282     put_bits(&s->pb, 2, info->window_sequence[0]);
 283     put_bits(&s->pb, 1, info->use_kb_window[0]);
 284     if (info->window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
 285         put_bits(&s->pb, 6, info->max_sfb);
 286         put_bits(&s->pb, 1, 0);            // no prediction
 287     } else {
 288         put_bits(&s->pb, 4, info->max_sfb);
 289         for (w = 1; w < 8; w++)
 290             put_bits(&s->pb, 1, !info->group_len[w]);
 291     }
 292 }
 293
 294 /**
 295  * Encode MS data.
 296  * @see 4.6.8.1 "Joint Coding - M/S Stereo"
 297  */
 298 static void encode_ms_info(PutBitContext *pb, ChannelElement *cpe)
 299 {
 300     int i, w;
 301
 302     put_bits(pb, 2, cpe->ms_mode);
 303     if (cpe->ms_mode == 1)
 304         for (w = 0; w < cpe->ch[0].ics.num_windows; w += cpe->ch[0].ics.group_len[w])
 305             for (i = 0; i < cpe->ch[0].ics.max_sfb; i++)
 306                 put_bits(pb, 1, cpe->ms_mask[w*16 + i]);
 307 }
 308
 309 /**
 310  * Produce integer coefficients from scalefactors provided by the model.
 311  */
 312 static void adjust_frame_information(ChannelElement *cpe, int chans)
 313 {
 314     int i, w, w2, g, ch;
 315     int maxsfb, cmaxsfb;
 316     IndividualChannelStream *ics;
 317
 318     if (cpe->common_window) {
 319         ics = &cpe->ch[0].ics;
 320         for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
 321             for (w2 =  0; w2 < ics->group_len[w]; w2++) {
 322                 int start = (w+w2) * 128;
 323                 for (g = 0; g < ics->num_swb; g++) {
 324                     //apply Intensity stereo coeffs transformation
 325                     if (cpe->is_mask[w*16 + g]) {
 326                         int p = -1 + 2 * (cpe->ch[1].band_type[w*16+g] - 14);
 327                         float scale = cpe->ch[0].is_ener[w*16+g];
 328                         for (i = 0; i < ics->swb_sizes[g]; i++) {
 329                             cpe->ch[0].coeffs[start+i] = (cpe->ch[0].pcoeffs[start+i] + p*cpe->ch[1].pcoeffs[start+i]) * scale;
 330                             cpe->ch[1].coeffs[start+i] = 0.0f;
 331                         }
 332                     } else if (cpe->ms_mask[w*16 + g] &&
 333                                cpe->ch[0].band_type[w*16 + g] < NOISE_BT &&
 334                                cpe->ch[1].band_type[w*16 + g] < NOISE_BT) {
 335                         for (i = 0; i < ics->swb_sizes[g]; i++) {
 336                             cpe->ch[0].coeffs[start+i] = (cpe->ch[0].pcoeffs[start+i] + cpe->ch[1].pcoeffs[start+i]) * 0.5f;
 337                             cpe->ch[1].coeffs[start+i] = cpe->ch[0].coeffs[start+i] - cpe->ch[1].pcoeffs[start+i];
 338                         }
 339                     }
 340                     start += ics->swb_sizes[g];
 341                 }
 342             }
 343         }
 344     }
 345
 346     for (ch = 0; ch < chans; ch++) {
 347         IndividualChannelStream *ics = &cpe->ch[ch].ics;
 348         maxsfb = 0;
 349         cpe->ch[ch].pulse.num_pulse = 0;
 350         for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
 351             for (w2 =  0; w2 < ics->group_len[w]; w2++) {
 352                 for (cmaxsfb = ics->num_swb; cmaxsfb > 0 && cpe->ch[ch].zeroes[w*16+cmaxsfb-1]; cmaxsfb--)
 353                     ;
 354                 maxsfb = FFMAX(maxsfb, cmaxsfb);
 355             }
 356         }
 357         ics->max_sfb = maxsfb;
 358
 359         //adjust zero bands for window groups
 360         for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
 361             for (g = 0; g < ics->max_sfb; g++) {
 362                 i = 1;
 363                 for (w2 = w; w2 < w + ics->group_len[w]; w2++) {
 364                     if (!cpe->ch[ch].zeroes[w2*16 + g]) {
 365                         i = 0;
 366                         break;
 367                     }
 368                 }
 369                 cpe->ch[ch].zeroes[w*16 + g] = i;
 370             }
 371         }
 372     }
 373
 374     if (chans > 1 && cpe->common_window) {
 375         IndividualChannelStream *ics0 = &cpe->ch[0].ics;
 376         IndividualChannelStream *ics1 = &cpe->ch[1].ics;
 377         int msc = 0;
 378         ics0->max_sfb = FFMAX(ics0->max_sfb, ics1->max_sfb);
 379         ics1->max_sfb = ics0->max_sfb;
 380         for (w = 0; w < ics0->num_windows*16; w += 16)
 381             for (i = 0; i < ics0->max_sfb; i++)
 382                 if (cpe->ms_mask[w+i])
 383                     msc++;
 384         if (msc == 0 || ics0->max_sfb == 0)
 385             cpe->ms_mode = 0;
 386         else
 387             cpe->ms_mode = msc < ics0->max_sfb * ics0->num_windows ? 1 : 2;
 388     }
 389 }
 390
 391 /**
 392  * Encode scalefactor band coding type.
 393  */
 394 static void encode_band_info(AACEncContext *s, SingleChannelElement *sce)
 395 {
 396     int w;
 397
 398     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w])
 399         s->coder->encode_window_bands_info(s, sce, w, sce->ics.group_len[w], s->lambda);
 400 }
 401
 402 /**
 403  * Encode scalefactors.
 404  */
 405 static void encode_scale_factors(AVCodecContext *avctx, AACEncContext *s,
 406                                  SingleChannelElement *sce)
 407 {
 408     int diff, off_sf = sce->sf_idx[0], off_pns = sce->sf_idx[0] - NOISE_OFFSET;
 409     int off_is = 0, noise_flag = 1;
 410     int i, w;
 411
 412     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
 413         for (i = 0; i < sce->ics.max_sfb; i++) {
 414             if (!sce->zeroes[w*16 + i]) {
 415                 if (sce->band_type[w*16 + i] == NOISE_BT) {
 416                     diff = sce->sf_idx[w*16 + i] - off_pns;
 417                     off_pns = sce->sf_idx[w*16 + i];
 418                     if (noise_flag-- > 0) {
 419                         put_bits(&s->pb, NOISE_PRE_BITS, diff + NOISE_PRE);
 420                         continue;
 421                     }
 422                 } else if (sce->band_type[w*16 + i] == INTENSITY_BT  ||
 423                            sce->band_type[w*16 + i] == INTENSITY_BT2) {
 424                     diff = sce->sf_idx[w*16 + i] - off_is;
 425                     off_is = sce->sf_idx[w*16 + i];
 426                 } else {
 427                     diff = sce->sf_idx[w*16 + i] - off_sf;
 428                     off_sf = sce->sf_idx[w*16 + i];
 429                 }
 430                 diff += SCALE_DIFF_ZERO;
 431                 av_assert0(diff >= 0 && diff <= 120);
 432                 put_bits(&s->pb, ff_aac_scalefactor_bits[diff], ff_aac_scalefactor_code[diff]);
 433             }
 434         }
 435     }
 436 }
 437
 438 /**
 439  * Encode pulse data.
 440  */
 441 static void encode_pulses(AACEncContext *s, Pulse *pulse)
 442 {
 443     int i;
 444
 445     put_bits(&s->pb, 1, !!pulse->num_pulse);
 446     if (!pulse->num_pulse)
 447         return;
 448
 449     put_bits(&s->pb, 2, pulse->num_pulse - 1);
 450     put_bits(&s->pb, 6, pulse->start);
 451     for (i = 0; i < pulse->num_pulse; i++) {
 452         put_bits(&s->pb, 5, pulse->pos[i]);
 453         put_bits(&s->pb, 4, pulse->amp[i]);
 454     }
 455 }
 456
 457 /**
 458  * Encode spectral coefficients processed by psychoacoustic model.
 459  */
 460 static void encode_spectral_coeffs(AACEncContext *s, SingleChannelElement *sce)
 461 {
 462     int start, i, w, w2;
 463
 464     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
 465         start = 0;
 466         for (i = 0; i < sce->ics.max_sfb; i++) {
 467             if (sce->zeroes[w*16 + i]) {
 468                 start += sce->ics.swb_sizes[i];
 469                 continue;
 470             }
 471             for (w2 = w; w2 < w + sce->ics.group_len[w]; w2++)
 472                 s->coder->quantize_and_encode_band(s, &s->pb, sce->coeffs + start + w2*128,
 473                                                    sce->ics.swb_sizes[i],
 474                                                    sce->sf_idx[w*16 + i],
 475                                                    sce->band_type[w*16 + i],
 476                                                    s->lambda);
 477             start += sce->ics.swb_sizes[i];
 478         }
 479     }
 480 }
 481
 482 /**
 483  * Encode one channel of audio data.
 484  */
 485 static int encode_individual_channel(AVCodecContext *avctx, AACEncContext *s,
 486                                      SingleChannelElement *sce,
 487                                      int common_window)
 488 {
 489     put_bits(&s->pb, 8, sce->sf_idx[0]);
 490     if (!common_window)
 491         put_ics_info(s, &sce->ics);
 492     encode_band_info(s, sce);
 493     encode_scale_factors(avctx, s, sce);
 494     encode_pulses(s, &sce->pulse);
 495     put_bits(&s->pb, 1, 0); //tns
 496     put_bits(&s->pb, 1, 0); //ssr
 497     encode_spectral_coeffs(s, sce);
 498     return 0;
 499 }
 500
 501 /**
 502  * Write some auxiliary information about the created AAC file.
 503  */
 504 static void put_bitstream_info(AACEncContext *s, const char *name)
 505 {
 506     int i, namelen, padbits;
 507
 508     namelen = strlen(name) + 2;
 509     put_bits(&s->pb, 3, TYPE_FIL);
 510     put_bits(&s->pb, 4, FFMIN(namelen, 15));
 511     if (namelen >= 15)
 512         put_bits(&s->pb, 8, namelen - 14);
 513     put_bits(&s->pb, 4, 0); //extension type - filler
 514     padbits = -put_bits_count(&s->pb) & 7;
 515     avpriv_align_put_bits(&s->pb);
 516     for (i = 0; i < namelen - 2; i++)
 517         put_bits(&s->pb, 8, name[i]);
 518     put_bits(&s->pb, 12 - padbits, 0);
 519 }
 520
 521 /*
 522  * Copy input samples.
 523  * Channels are reordered from libavcodec's default order to AAC order.
 524  */
 525 static void copy_input_samples(AACEncContext *s, const AVFrame *frame)
 526 {
 527     int ch;
 528     int end = 2048 + (frame ? frame->nb_samples : 0);
 529     const uint8_t *channel_map = aac_chan_maps[s->channels - 1];
 530
 531     /* copy and remap input samples */
 532     for (ch = 0; ch < s->channels; ch++) {
 533         /* copy last 1024 samples of previous frame to the start of the current frame */
 534         memcpy(&s->planar_samples[ch][1024], &s->planar_samples[ch][2048], 1024 * sizeof(s->planar_samples[0][0]));
 535
 536         /* copy new samples and zero any remaining samples */
 537         if (frame) {
 538             memcpy(&s->planar_samples[ch][2048],
 539                    frame->extended_data[channel_map[ch]],
 540                    frame->nb_samples * sizeof(s->planar_samples[0][0]));
 541         }
 542         memset(&s->planar_samples[ch][end], 0,
 543                (3072 - end) * sizeof(s->planar_samples[0][0]));
 544     }
 545 }
 546
 547 static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
 548                             const AVFrame *frame, int *got_packet_ptr)
 549 {
 550     AACEncContext *s = avctx->priv_data;
 551     float **samples = s->planar_samples, *samples2, *la, *overlap;
 552     ChannelElement *cpe;
 553     int i, ch, w, g, chans, tag, start_ch, ret, ms_mode = 0, is_mode = 0;
 554     int chan_el_counter[4];
 555     FFPsyWindowInfo windows[AAC_MAX_CHANNELS];
 556
 557     if (s->last_frame == 2)
 558         return 0;
 559
 560     /* add current frame to queue */
 561     if (frame) {
 562         if ((ret = ff_af_queue_add(&s->afq, frame)) < 0)
 563             return ret;
 564     }
 565
 566     copy_input_samples(s, frame);
 567     if (s->psypp)
 568         ff_psy_preprocess(s->psypp, s->planar_samples, s->channels);
 569
 570     if (!avctx->frame_number)
 571         return 0;
 572
 573     start_ch = 0;
 574     for (i = 0; i < s->chan_map[0]; i++) {
 575         FFPsyWindowInfo* wi = windows + start_ch;
 576         tag      = s->chan_map[i+1];
 577         chans    = tag == TYPE_CPE ? 2 : 1;
 578         cpe      = &s->cpe[i];
 579         for (ch = 0; ch < chans; ch++) {
 580             IndividualChannelStream *ics = &cpe->ch[ch].ics;
 581             int cur_channel = start_ch + ch;
 582             overlap  = &samples[cur_channel][0];
 583             samples2 = overlap + 1024;
 584             la       = samples2 + (448+64);
 585             if (!frame)
 586                 la = NULL;
 587             if (tag == TYPE_LFE) {
 588                 wi[ch].window_type[0] = ONLY_LONG_SEQUENCE;
 589                 wi[ch].window_shape   = 0;
 590                 wi[ch].num_windows    = 1;
 591                 wi[ch].grouping[0]    = 1;
 592
 593                 /* Only the lowest 12 coefficients are used in a LFE channel.
 594                  * The expression below results in only the bottom 8 coefficients
 595                  * being used for 11.025kHz to 16kHz sample rates.
 596                  */
 597                 ics->num_swb = s->samplerate_index >= 8 ? 1 : 3;
 598             } else {
 599                 wi[ch] = s->psy.model->window(&s->psy, samples2, la, cur_channel,
 600                                               ics->window_sequence[0]);
 601             }
 602             ics->window_sequence[1] = ics->window_sequence[0];
 603             ics->window_sequence[0] = wi[ch].window_type[0];
 604             ics->use_kb_window[1]   = ics->use_kb_window[0];
 605             ics->use_kb_window[0]   = wi[ch].window_shape;
 606             ics->num_windows        = wi[ch].num_windows;
 607             ics->swb_sizes          = s->psy.bands    [ics->num_windows == 8];
 608             ics->num_swb            = tag == TYPE_LFE ? ics->num_swb : s->psy.num_bands[ics->num_windows == 8];
 609             for (w = 0; w < ics->num_windows; w++)
 610                 ics->group_len[w] = wi[ch].grouping[w];
 611
 612             apply_window_and_mdct(s, &cpe->ch[ch], overlap);
 613             if (isnan(cpe->ch->coeffs[0])) {
 614                 av_log(avctx, AV_LOG_ERROR, "Input contains NaN\n");
 615                 return AVERROR(EINVAL);
 616             }
 617         }
 618         start_ch += chans;
 619     }
 620     if ((ret = ff_alloc_packet2(avctx, avpkt, 8192 * s->channels)) < 0)
 621         return ret;
 622     do {
 623         int frame_bits;
 624
 625         init_put_bits(&s->pb, avpkt->data, avpkt->size);
 626
 627         if ((avctx->frame_number & 0xFF)==1 && !(avctx->flags & CODEC_FLAG_BITEXACT))
 628             put_bitstream_info(s, LIBAVCODEC_IDENT);
 629         start_ch = 0;
 630         memset(chan_el_counter, 0, sizeof(chan_el_counter));
 631         for (i = 0; i < s->chan_map[0]; i++) {
 632             FFPsyWindowInfo* wi = windows + start_ch;
 633             const float *coeffs[2];
 634             tag      = s->chan_map[i+1];
 635             chans    = tag == TYPE_CPE ? 2 : 1;
 636             cpe      = &s->cpe[i];
 637             memset(cpe->is_mask, 0, sizeof(cpe->is_mask));
 638             memset(cpe->ms_mask, 0, sizeof(cpe->ms_mask));
 639             put_bits(&s->pb, 3, tag);
 640             put_bits(&s->pb, 4, chan_el_counter[tag]++);
 641             for (ch = 0; ch < chans; ch++)
 642                 coeffs[ch] = cpe->ch[ch].coeffs;
 643             s->psy.model->analyze(&s->psy, start_ch, coeffs, wi);
 644             for (ch = 0; ch < chans; ch++) {
 645                 s->cur_channel = start_ch + ch;
 646                 s->coder->search_for_quantizers(avctx, s, &cpe->ch[ch], s->lambda);
 647             }
 648             cpe->common_window = 0;
 649             if (chans > 1
 650                 && wi[0].window_type[0] == wi[1].window_type[0]
 651                 && wi[0].window_shape   == wi[1].window_shape) {
 652
 653                 cpe->common_window = 1;
 654                 for (w = 0; w < wi[0].num_windows; w++) {
 655                     if (wi[0].grouping[w] != wi[1].grouping[w]) {
 656                         cpe->common_window = 0;
 657                         break;
 658                     }
 659                 }
 660             }
 661             if (s->options.pns && s->coder->search_for_pns) {
 662                 for (ch = 0; ch < chans; ch++) {
 663                     s->cur_channel = start_ch + ch;
 664                     s->coder->search_for_pns(s, avctx, &cpe->ch[ch], s->lambda);
 665                 }
 666             }
 667             s->cur_channel = start_ch;
 668             if (s->options.stereo_mode && cpe->common_window) {
 669                 if (s->options.stereo_mode > 0) {
 670                     IndividualChannelStream *ics = &cpe->ch[0].ics;
 671                     for (w = 0; w < ics->num_windows; w += ics->group_len[w])
 672                         for (g = 0;  g < ics->num_swb; g++)
 673                             cpe->ms_mask[w*16+g] = 1;
 674                 } else if (s->coder->search_for_ms) {
 675                     s->coder->search_for_ms(s, cpe, s->lambda);
 676                 }
 677             }
 678             if (chans > 1 && s->options.intensity_stereo && s->coder->search_for_is) {
 679                 s->coder->search_for_is(s, avctx, cpe, s->lambda);
 680                 if (cpe->is_mode) is_mode = 1;
 681             }
 682             if (s->coder->set_special_band_scalefactors)
 683                 for (ch = 0; ch < chans; ch++)
 684                     s->coder->set_special_band_scalefactors(s, &cpe->ch[ch]);
 685             adjust_frame_information(cpe, chans);
 686             if (chans == 2) {
 687                 put_bits(&s->pb, 1, cpe->common_window);
 688                 if (cpe->common_window) {
 689                     put_ics_info(s, &cpe->ch[0].ics);
 690                     encode_ms_info(&s->pb, cpe);
 691                     if (cpe->ms_mode) ms_mode = 1;
 692                 }
 693             }
 694             for (ch = 0; ch < chans; ch++) {
 695                 s->cur_channel = start_ch + ch;
 696                 encode_individual_channel(avctx, s, &cpe->ch[ch], cpe->common_window);
 697             }
 698             start_ch += chans;
 699         }
 700
 701         frame_bits = put_bits_count(&s->pb);
 702         if (frame_bits <= 6144 * s->channels - 3) {
 703             s->psy.bitres.bits = frame_bits / s->channels;
 704             break;
 705         }
 706         if (is_mode || ms_mode) {
 707             for (i = 0; i < s->chan_map[0]; i++) {
 708                 // Must restore coeffs
 709                 chans = tag == TYPE_CPE ? 2 : 1;
 710                 cpe = &s->cpe[i];
 711                 for (ch = 0; ch < chans; ch++)
 712                     memcpy(cpe->ch[ch].coeffs, cpe->ch[ch].pcoeffs, sizeof(cpe->ch[ch].coeffs));
 713             }
 714         }
 715
 716         s->lambda *= avctx->bit_rate * 1024.0f / avctx->sample_rate / frame_bits;
 717
 718     } while (1);
 719
 720     put_bits(&s->pb, 3, TYPE_END);
 721     flush_put_bits(&s->pb);
 722     avctx->frame_bits = put_bits_count(&s->pb);
 723
 724     // rate control stuff
 725     if (!(avctx->flags & CODEC_FLAG_QSCALE)) {
 726         float ratio = avctx->bit_rate * 1024.0f / avctx->sample_rate / avctx->frame_bits;
 727         s->lambda *= ratio;
 728         s->lambda = FFMIN(s->lambda, 65536.f);
 729     }
 730
 731     if (!frame)
 732         s->last_frame++;
 733
 734     ff_af_queue_remove(&s->afq, avctx->frame_size, &avpkt->pts,
 735                        &avpkt->duration);
 736
 737     avpkt->size = put_bits_count(&s->pb) >> 3;
 738     *got_packet_ptr = 1;
 739     return 0;
 740 }
 741
 742 static av_cold int aac_encode_end(AVCodecContext *avctx)
 743 {
 744     AACEncContext *s = avctx->priv_data;
 745
 746     ff_mdct_end(&s->mdct1024);
 747     ff_mdct_end(&s->mdct128);
 748     ff_psy_end(&s->psy);
 749     if (s->psypp)
 750         ff_psy_preprocess_end(s->psypp);
 751     av_freep(&s->buffer.samples);
 752     av_freep(&s->cpe);
 753     av_freep(&s->fdsp);
 754     ff_af_queue_close(&s->afq);
 755     return 0;
 756 }
 757
 758 static av_cold int dsp_init(AVCodecContext *avctx, AACEncContext *s)
 759 {
 760     int ret = 0;
 761
 762     s->fdsp = avpriv_float_dsp_alloc(avctx->flags & CODEC_FLAG_BITEXACT);
 763     if (!s->fdsp)
 764         return AVERROR(ENOMEM);
 765
 766     // window init
 767     ff_kbd_window_init(ff_aac_kbd_long_1024, 4.0, 1024);
 768     ff_kbd_window_init(ff_aac_kbd_short_128, 6.0, 128);
 769     ff_init_ff_sine_windows(10);
 770     ff_init_ff_sine_windows(7);
 771
 772     if ((ret = ff_mdct_init(&s->mdct1024, 11, 0, 32768.0)) < 0)
 773         return ret;
 774     if ((ret = ff_mdct_init(&s->mdct128,   8, 0, 32768.0)) < 0)
 775         return ret;
 776
 777     return 0;
 778 }
 779
 780 static av_cold int alloc_buffers(AVCodecContext *avctx, AACEncContext *s)
 781 {
 782     int ch;
 783     FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->buffer.samples, s->channels, 3 * 1024 * sizeof(s->buffer.samples[0]), alloc_fail);
 784     FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->cpe, s->chan_map[0], sizeof(ChannelElement), alloc_fail);
 785     FF_ALLOCZ_OR_GOTO(avctx, avctx->extradata, 5 + FF_INPUT_BUFFER_PADDING_SIZE, alloc_fail);
 786
 787     for(ch = 0; ch < s->channels; ch++)
 788         s->planar_samples[ch] = s->buffer.samples + 3 * 1024 * ch;
 789
 790     return 0;
 791 alloc_fail:
 792     return AVERROR(ENOMEM);
 793 }
 794
 795 static av_cold int aac_encode_init(AVCodecContext *avctx)
 796 {
 797     AACEncContext *s = avctx->priv_data;
 798     int i, ret = 0;
 799     const uint8_t *sizes[2];
 800     uint8_t grouping[AAC_MAX_CHANNELS];
 801     int lengths[2];
 802
 803     avctx->frame_size = 1024;
 804
 805     for (i = 0; i < 16; i++)
 806         if (avctx->sample_rate == avpriv_mpeg4audio_sample_rates[i])
 807             break;
 808
 809     s->channels = avctx->channels;
 810
 811     ERROR_IF(i == 16
 812                 || i >= (sizeof(swb_size_1024) / sizeof(*swb_size_1024))
 813                 || i >= (sizeof(swb_size_128) / sizeof(*swb_size_128)),
 814              "Unsupported sample rate %d\n", avctx->sample_rate);
 815     ERROR_IF(s->channels > AAC_MAX_CHANNELS,
 816              "Unsupported number of channels: %d\n", s->channels);
 817     ERROR_IF(avctx->profile != FF_PROFILE_UNKNOWN && avctx->profile != FF_PROFILE_AAC_LOW,
 818              "Unsupported profile %d\n", avctx->profile);
 819     WARN_IF(1024.0 * avctx->bit_rate / avctx->sample_rate > 6144 * s->channels,
 820              "Too many bits per frame requested, clamping to max\n");
 821
 822     avctx->bit_rate = (int)FFMIN(
 823         6144 * s->channels / 1024.0 * avctx->sample_rate,
 824         avctx->bit_rate);
 825
 826     s->samplerate_index = i;
 827
 828     s->chan_map = aac_chan_configs[s->channels-1];
 829
 830     if ((ret = dsp_init(avctx, s)) < 0)
 831         goto fail;
 832
 833     if ((ret = alloc_buffers(avctx, s)) < 0)
 834         goto fail;
 835
 836     avctx->extradata_size = 5;
 837     put_audio_specific_config(avctx);
 838
 839     sizes[0]   = swb_size_1024[i];
 840     sizes[1]   = swb_size_128[i];
 841     lengths[0] = ff_aac_num_swb_1024[i];
 842     lengths[1] = ff_aac_num_swb_128[i];
 843     for (i = 0; i < s->chan_map[0]; i++)
 844         grouping[i] = s->chan_map[i + 1] == TYPE_CPE;
 845     if ((ret = ff_psy_init(&s->psy, avctx, 2, sizes, lengths,
 846                            s->chan_map[0], grouping)) < 0)
 847         goto fail;
 848     s->psypp = ff_psy_preprocess_init(avctx);
 849     s->coder = &ff_aac_coders[s->options.aac_coder];
 850
 851     if (HAVE_MIPSDSPR1)
 852         ff_aac_coder_init_mips(s);
 853
 854     s->lambda = avctx->global_quality > 0 ? avctx->global_quality : 120;
 855
 856     ff_aac_tableinit();
 857
 858     for (i = 0; i < 428; i++)
 859         ff_aac_pow34sf_tab[i] = sqrt(ff_aac_pow2sf_tab[i] * sqrt(ff_aac_pow2sf_tab[i]));
 860
 861     avctx->initial_padding = 1024;
 862     ff_af_queue_init(avctx, &s->afq);
 863
 864     return 0;
 865 fail:
 866     aac_encode_end(avctx);
 867     return ret;
 868 }
 869
 870 #define AACENC_FLAGS AV_OPT_FLAG_ENCODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM
 871 static const AVOption aacenc_options[] = {
 872     {"stereo_mode", "Stereo coding method", offsetof(AACEncContext, options.stereo_mode), AV_OPT_TYPE_INT, {.i64 = 0}, -1, 1, AACENC_FLAGS, "stereo_mode"},
 873         {"auto",     "Selected by the Encoder", 0, AV_OPT_TYPE_CONST, {.i64 = -1 }, INT_MIN, INT_MAX, AACENC_FLAGS, "stereo_mode"},
 874         {"ms_off",   "Disable Mid/Side coding", 0, AV_OPT_TYPE_CONST, {.i64 =  0 }, INT_MIN, INT_MAX, AACENC_FLAGS, "stereo_mode"},
 875         {"ms_force", "Force Mid/Side for the whole frame if possible", 0, AV_OPT_TYPE_CONST, {.i64 =  1 }, INT_MIN, INT_MAX, AACENC_FLAGS, "stereo_mode"},
 876     {"aac_coder", "", offsetof(AACEncContext, options.aac_coder), AV_OPT_TYPE_INT, {.i64 = AAC_CODER_TWOLOOP}, 0, AAC_CODER_NB-1, AACENC_FLAGS, "aac_coder"},
 877         {"faac",     "FAAC-inspired method",      0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_FAAC},    INT_MIN, INT_MAX, AACENC_FLAGS, "aac_coder"},
 878         {"anmr",     "ANMR method",               0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_ANMR},    INT_MIN, INT_MAX, AACENC_FLAGS, "aac_coder"},
 879         {"twoloop",  "Two loop searching method", 0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_TWOLOOP}, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_coder"},
 880         {"fast",     "Constant quantizer",        0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_FAST},    INT_MIN, INT_MAX, AACENC_FLAGS, "aac_coder"},
 881     {"aac_pns", "Perceptual Noise Substitution", offsetof(AACEncContext, options.pns), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, AACENC_FLAGS, "aac_pns"},
 882         {"disable",  "Disable perceptual noise substitution", 0, AV_OPT_TYPE_CONST, {.i64 =  0 }, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_pns"},
 883         {"enable",   "Enable perceptual noise substitution",  0, AV_OPT_TYPE_CONST, {.i64 =  1 }, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_pns"},
 884     {"aac_is", "Intensity stereo coding", offsetof(AACEncContext, options.intensity_stereo), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, AACENC_FLAGS, "intensity_stereo"},
 885         {"disable",  "Disable intensity stereo coding", 0, AV_OPT_TYPE_CONST, {.i64 = 0}, INT_MIN, INT_MAX, AACENC_FLAGS, "intensity_stereo"},
 886         {"enable",   "Enable intensity stereo coding", 0, AV_OPT_TYPE_CONST, {.i64 = 1}, INT_MIN, INT_MAX, AACENC_FLAGS, "intensity_stereo"},
 887     {NULL}
 888 };
 889
 890 static const AVClass aacenc_class = {
 891     "AAC encoder",
 892     av_default_item_name,
 893     aacenc_options,
 894     LIBAVUTIL_VERSION_INT,
 895 };
 896
 897 /* duplicated from avpriv_mpeg4audio_sample_rates to avoid shared build
 898  * failures */
 899 static const int mpeg4audio_sample_rates[16] = {
 900     96000, 88200, 64000, 48000, 44100, 32000,
 901     24000, 22050, 16000, 12000, 11025, 8000, 7350
 902 };
 903
 904 AVCodec ff_aac_encoder = {
 905     .name           = "aac",
 906     .long_name      = NULL_IF_CONFIG_SMALL("AAC (Advanced Audio Coding)"),
 907     .type           = AVMEDIA_TYPE_AUDIO,
 908     .id             = AV_CODEC_ID_AAC,
 909     .priv_data_size = sizeof(AACEncContext),
 910     .init           = aac_encode_init,
 911     .encode2        = aac_encode_frame,
 912     .close          = aac_encode_end,
 913     .supported_samplerates = mpeg4audio_sample_rates,
 914     .capabilities   = CODEC_CAP_SMALL_LAST_FRAME | CODEC_CAP_DELAY |
 915                       CODEC_CAP_EXPERIMENTAL,
 916     .sample_fmts    = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_FLTP,
 917                                                      AV_SAMPLE_FMT_NONE },
 918     .priv_class     = &aacenc_class,
 919 };