git.sesse.net Git - ffmpeg/blob - libavcodec/aacenc.c

   1 /*
   2  * AAC encoder
   3  * Copyright (C) 2008 Konstantin Shishkov
   4  *
   5  * This file is part of FFmpeg.
   6  *
   7  * FFmpeg is free software; you can redistribute it and/or
   8  * modify it under the terms of the GNU Lesser General Public
   9  * License as published by the Free Software Foundation; either
  10  * version 2.1 of the License, or (at your option) any later version.
  11  *
  12  * FFmpeg is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15  * Lesser General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU Lesser General Public
  18  * License along with FFmpeg; if not, write to the Free Software
  19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20  */
  21
  22 /**
  23  * @file
  24  * AAC encoder
  25  */
  26
  27 /***********************************
  28  *              TODOs:
  29  * add sane pulse detection
  30  * add temporal noise shaping
  31  ***********************************/
  32
  33 #include "libavutil/float_dsp.h"
  34 #include "libavutil/opt.h"
  35 #include "avcodec.h"
  36 #include "put_bits.h"
  37 #include "internal.h"
  38 #include "mpeg4audio.h"
  39 #include "kbdwin.h"
  40 #include "sinewin.h"
  41
  42 #include "aac.h"
  43 #include "aactab.h"
  44 #include "aacenc.h"
  45
  46 #include "psymodel.h"
  47
  48 #define AAC_MAX_CHANNELS 6
  49
  50 #define ERROR_IF(cond, ...) \
  51     if (cond) { \
  52         av_log(avctx, AV_LOG_ERROR, __VA_ARGS__); \
  53         return AVERROR(EINVAL); \
  54     }
  55
  56 #define WARN_IF(cond, ...) \
  57     if (cond) { \
  58         av_log(avctx, AV_LOG_WARNING, __VA_ARGS__); \
  59     }
  60
  61
  62 static const uint8_t swb_size_1024_96[] = {
  63     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8,
  64     12, 12, 12, 12, 12, 16, 16, 24, 28, 36, 44,
  65     64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64
  66 };
  67
  68 static const uint8_t swb_size_1024_64[] = {
  69     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8,
  70     12, 12, 12, 16, 16, 16, 20, 24, 24, 28, 36,
  71     40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40
  72 };
  73
  74 static const uint8_t swb_size_1024_48[] = {
  75     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8,
  76     12, 12, 12, 12, 16, 16, 20, 20, 24, 24, 28, 28,
  77     32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
  78     96
  79 };
  80
  81 static const uint8_t swb_size_1024_32[] = {
  82     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8,
  83     12, 12, 12, 12, 16, 16, 20, 20, 24, 24, 28, 28,
  84     32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32
  85 };
  86
  87 static const uint8_t swb_size_1024_24[] = {
  88     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
  89     12, 12, 12, 12, 16, 16, 16, 20, 20, 24, 24, 28, 28,
  90     32, 36, 36, 40, 44, 48, 52, 52, 64, 64, 64, 64, 64
  91 };
  92
  93 static const uint8_t swb_size_1024_16[] = {
  94     8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
  95     12, 12, 12, 12, 12, 12, 12, 12, 12, 16, 16, 16, 16, 20, 20, 20, 24, 24, 28, 28,
  96     32, 36, 40, 40, 44, 48, 52, 56, 60, 64, 64, 64
  97 };
  98
  99 static const uint8_t swb_size_1024_8[] = {
 100     12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
 101     16, 16, 16, 16, 16, 16, 16, 20, 20, 20, 20, 24, 24, 24, 28, 28,
 102     32, 36, 36, 40, 44, 48, 52, 56, 60, 64, 80
 103 };
 104
 105 static const uint8_t *swb_size_1024[] = {
 106     swb_size_1024_96, swb_size_1024_96, swb_size_1024_64,
 107     swb_size_1024_48, swb_size_1024_48, swb_size_1024_32,
 108     swb_size_1024_24, swb_size_1024_24, swb_size_1024_16,
 109     swb_size_1024_16, swb_size_1024_16, swb_size_1024_8,
 110     swb_size_1024_8
 111 };
 112
 113 static const uint8_t swb_size_128_96[] = {
 114     4, 4, 4, 4, 4, 4, 8, 8, 8, 16, 28, 36
 115 };
 116
 117 static const uint8_t swb_size_128_48[] = {
 118     4, 4, 4, 4, 4, 8, 8, 8, 12, 12, 12, 16, 16, 16
 119 };
 120
 121 static const uint8_t swb_size_128_24[] = {
 122     4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 20
 123 };
 124
 125 static const uint8_t swb_size_128_16[] = {
 126     4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 12, 12, 16, 20, 20
 127 };
 128
 129 static const uint8_t swb_size_128_8[] = {
 130     4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 12, 16, 20, 20
 131 };
 132
 133 static const uint8_t *swb_size_128[] = {
 134     /* the last entry on the following row is swb_size_128_64 but is a
 135        duplicate of swb_size_128_96 */
 136     swb_size_128_96, swb_size_128_96, swb_size_128_96,
 137     swb_size_128_48, swb_size_128_48, swb_size_128_48,
 138     swb_size_128_24, swb_size_128_24, swb_size_128_16,
 139     swb_size_128_16, swb_size_128_16, swb_size_128_8,
 140     swb_size_128_8
 141 };
 142
 143 /** default channel configurations */
 144 static const uint8_t aac_chan_configs[6][5] = {
 145  {1, TYPE_SCE},                               // 1 channel  - single channel element
 146  {1, TYPE_CPE},                               // 2 channels - channel pair
 147  {2, TYPE_SCE, TYPE_CPE},                     // 3 channels - center + stereo
 148  {3, TYPE_SCE, TYPE_CPE, TYPE_SCE},           // 4 channels - front center + stereo + back center
 149  {3, TYPE_SCE, TYPE_CPE, TYPE_CPE},           // 5 channels - front center + stereo + back stereo
 150  {4, TYPE_SCE, TYPE_CPE, TYPE_CPE, TYPE_LFE}, // 6 channels - front center + stereo + back stereo + LFE
 151 };
 152
 153 /**
 154  * Table to remap channels from libavcodec's default order to AAC order.
 155  */
 156 static const uint8_t aac_chan_maps[AAC_MAX_CHANNELS][AAC_MAX_CHANNELS] = {
 157     { 0 },
 158     { 0, 1 },
 159     { 2, 0, 1 },
 160     { 2, 0, 1, 3 },
 161     { 2, 0, 1, 3, 4 },
 162     { 2, 0, 1, 4, 5, 3 },
 163 };
 164
 165 /**
 166  * Make AAC audio config object.
 167  * @see 1.6.2.1 "Syntax - AudioSpecificConfig"
 168  */
 169 static void put_audio_specific_config(AVCodecContext *avctx)
 170 {
 171     PutBitContext pb;
 172     AACEncContext *s = avctx->priv_data;
 173
 174     init_put_bits(&pb, avctx->extradata, avctx->extradata_size);
 175     put_bits(&pb, 5, 2); //object type - AAC-LC
 176     put_bits(&pb, 4, s->samplerate_index); //sample rate index
 177     put_bits(&pb, 4, s->channels);
 178     //GASpecificConfig
 179     put_bits(&pb, 1, 0); //frame length - 1024 samples
 180     put_bits(&pb, 1, 0); //does not depend on core coder
 181     put_bits(&pb, 1, 0); //is not extension
 182
 183     //Explicitly Mark SBR absent
 184     put_bits(&pb, 11, 0x2b7); //sync extension
 185     put_bits(&pb, 5,  AOT_SBR);
 186     put_bits(&pb, 1,  0);
 187     flush_put_bits(&pb);
 188 }
 189
 190 #define WINDOW_FUNC(type) \
 191 static void apply_ ##type ##_window(AVFloatDSPContext *fdsp, \
 192                                     SingleChannelElement *sce, \
 193                                     const float *audio)
 194
 195 WINDOW_FUNC(only_long)
 196 {
 197     const float *lwindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024;
 198     const float *pwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
 199     float *out = sce->ret_buf;
 200
 201     fdsp->vector_fmul        (out,        audio,        lwindow, 1024);
 202     fdsp->vector_fmul_reverse(out + 1024, audio + 1024, pwindow, 1024);
 203 }
 204
 205 WINDOW_FUNC(long_start)
 206 {
 207     const float *lwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
 208     const float *swindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
 209     float *out = sce->ret_buf;
 210
 211     fdsp->vector_fmul(out, audio, lwindow, 1024);
 212     memcpy(out + 1024, audio + 1024, sizeof(out[0]) * 448);
 213     fdsp->vector_fmul_reverse(out + 1024 + 448, audio + 1024 + 448, swindow, 128);
 214     memset(out + 1024 + 576, 0, sizeof(out[0]) * 448);
 215 }
 216
 217 WINDOW_FUNC(long_stop)
 218 {
 219     const float *lwindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024;
 220     const float *swindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
 221     float *out = sce->ret_buf;
 222
 223     memset(out, 0, sizeof(out[0]) * 448);
 224     fdsp->vector_fmul(out + 448, audio + 448, swindow, 128);
 225     memcpy(out + 576, audio + 576, sizeof(out[0]) * 448);
 226     fdsp->vector_fmul_reverse(out + 1024, audio + 1024, lwindow, 1024);
 227 }
 228
 229 WINDOW_FUNC(eight_short)
 230 {
 231     const float *swindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
 232     const float *pwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
 233     const float *in = audio + 448;
 234     float *out = sce->ret_buf;
 235     int w;
 236
 237     for (w = 0; w < 8; w++) {
 238         fdsp->vector_fmul        (out, in, w ? pwindow : swindow, 128);
 239         out += 128;
 240         in  += 128;
 241         fdsp->vector_fmul_reverse(out, in, swindow, 128);
 242         out += 128;
 243     }
 244 }
 245
 246 static void (*const apply_window[4])(AVFloatDSPContext *fdsp,
 247                                      SingleChannelElement *sce,
 248                                      const float *audio) = {
 249     [ONLY_LONG_SEQUENCE]   = apply_only_long_window,
 250     [LONG_START_SEQUENCE]  = apply_long_start_window,
 251     [EIGHT_SHORT_SEQUENCE] = apply_eight_short_window,
 252     [LONG_STOP_SEQUENCE]   = apply_long_stop_window
 253 };
 254
 255 static void apply_window_and_mdct(AACEncContext *s, SingleChannelElement *sce,
 256                                   float *audio)
 257 {
 258     int i;
 259     float *output = sce->ret_buf;
 260
 261     apply_window[sce->ics.window_sequence[0]](s->fdsp, sce, audio);
 262
 263     if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE)
 264         s->mdct1024.mdct_calc(&s->mdct1024, sce->coeffs, output);
 265     else
 266         for (i = 0; i < 1024; i += 128)
 267             s->mdct128.mdct_calc(&s->mdct128, sce->coeffs + i, output + i*2);
 268     memcpy(audio, audio + 1024, sizeof(audio[0]) * 1024);
 269     memcpy(sce->pcoeffs, sce->coeffs, sizeof(sce->pcoeffs));
 270 }
 271
 272 /**
 273  * Encode ics_info element.
 274  * @see Table 4.6 (syntax of ics_info)
 275  */
 276 static void put_ics_info(AACEncContext *s, IndividualChannelStream *info)
 277 {
 278     int w;
 279
 280     put_bits(&s->pb, 1, 0);                // ics_reserved bit
 281     put_bits(&s->pb, 2, info->window_sequence[0]);
 282     put_bits(&s->pb, 1, info->use_kb_window[0]);
 283     if (info->window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
 284         put_bits(&s->pb, 6, info->max_sfb);
 285         put_bits(&s->pb, 1, 0);            // no prediction
 286     } else {
 287         put_bits(&s->pb, 4, info->max_sfb);
 288         for (w = 1; w < 8; w++)
 289             put_bits(&s->pb, 1, !info->group_len[w]);
 290     }
 291 }
 292
 293 /**
 294  * Encode MS data.
 295  * @see 4.6.8.1 "Joint Coding - M/S Stereo"
 296  */
 297 static void encode_ms_info(PutBitContext *pb, ChannelElement *cpe)
 298 {
 299     int i, w;
 300
 301     put_bits(pb, 2, cpe->ms_mode);
 302     if (cpe->ms_mode == 1)
 303         for (w = 0; w < cpe->ch[0].ics.num_windows; w += cpe->ch[0].ics.group_len[w])
 304             for (i = 0; i < cpe->ch[0].ics.max_sfb; i++)
 305                 put_bits(pb, 1, cpe->ms_mask[w*16 + i]);
 306 }
 307
 308 /**
 309  * Produce integer coefficients from scalefactors provided by the model.
 310  */
 311 static void adjust_frame_information(ChannelElement *cpe, int chans)
 312 {
 313     int i, w, w2, g, ch;
 314     int maxsfb, cmaxsfb;
 315     IndividualChannelStream *ics;
 316
 317     if (cpe->common_window) {
 318         ics = &cpe->ch[0].ics;
 319         for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
 320             for (w2 =  0; w2 < ics->group_len[w]; w2++) {
 321                 int start = (w+w2) * 128;
 322                 for (g = 0; g < ics->num_swb; g++) {
 323                     //apply Intensity stereo coeffs transformation
 324                     if (cpe->is_mask[w*16 + g]) {
 325                         int p = -1 + 2 * (cpe->ch[1].band_type[w*16+g] - 14);
 326                         float scale = cpe->ch[0].is_ener[w*16+g];
 327                         for (i = 0; i < ics->swb_sizes[g]; i++) {
 328                             cpe->ch[0].coeffs[start+i] = (cpe->ch[0].pcoeffs[start+i] + p*cpe->ch[1].pcoeffs[start+i]) * scale;
 329                             cpe->ch[1].coeffs[start+i] = 0.0f;
 330                         }
 331                     } else if (cpe->ms_mask[w*16 + g] &&
 332                                cpe->ch[0].band_type[w*16 + g] < NOISE_BT &&
 333                                cpe->ch[1].band_type[w*16 + g] < NOISE_BT) {
 334                         for (i = 0; i < ics->swb_sizes[g]; i++) {
 335                             cpe->ch[0].coeffs[start+i] = (cpe->ch[0].pcoeffs[start+i] + cpe->ch[1].pcoeffs[start+i]) * 0.5f;
 336                             cpe->ch[1].coeffs[start+i] = cpe->ch[0].coeffs[start+i] - cpe->ch[1].pcoeffs[start+i];
 337                         }
 338                     }
 339                     start += ics->swb_sizes[g];
 340                 }
 341             }
 342         }
 343     }
 344
 345     for (ch = 0; ch < chans; ch++) {
 346         IndividualChannelStream *ics = &cpe->ch[ch].ics;
 347         maxsfb = 0;
 348         cpe->ch[ch].pulse.num_pulse = 0;
 349         for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
 350             for (w2 =  0; w2 < ics->group_len[w]; w2++) {
 351                 for (cmaxsfb = ics->num_swb; cmaxsfb > 0 && cpe->ch[ch].zeroes[w*16+cmaxsfb-1]; cmaxsfb--)
 352                     ;
 353                 maxsfb = FFMAX(maxsfb, cmaxsfb);
 354             }
 355         }
 356         ics->max_sfb = maxsfb;
 357
 358         //adjust zero bands for window groups
 359         for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
 360             for (g = 0; g < ics->max_sfb; g++) {
 361                 i = 1;
 362                 for (w2 = w; w2 < w + ics->group_len[w]; w2++) {
 363                     if (!cpe->ch[ch].zeroes[w2*16 + g]) {
 364                         i = 0;
 365                         break;
 366                     }
 367                 }
 368                 cpe->ch[ch].zeroes[w*16 + g] = i;
 369             }
 370         }
 371     }
 372
 373     if (chans > 1 && cpe->common_window) {
 374         IndividualChannelStream *ics0 = &cpe->ch[0].ics;
 375         IndividualChannelStream *ics1 = &cpe->ch[1].ics;
 376         int msc = 0;
 377         ics0->max_sfb = FFMAX(ics0->max_sfb, ics1->max_sfb);
 378         ics1->max_sfb = ics0->max_sfb;
 379         for (w = 0; w < ics0->num_windows*16; w += 16)
 380             for (i = 0; i < ics0->max_sfb; i++)
 381                 if (cpe->ms_mask[w+i])
 382                     msc++;
 383         if (msc == 0 || ics0->max_sfb == 0)
 384             cpe->ms_mode = 0;
 385         else
 386             cpe->ms_mode = msc < ics0->max_sfb * ics0->num_windows ? 1 : 2;
 387     }
 388 }
 389
 390 /**
 391  * Encode scalefactor band coding type.
 392  */
 393 static void encode_band_info(AACEncContext *s, SingleChannelElement *sce)
 394 {
 395     int w;
 396
 397     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w])
 398         s->coder->encode_window_bands_info(s, sce, w, sce->ics.group_len[w], s->lambda);
 399 }
 400
 401 /**
 402  * Encode scalefactors.
 403  */
 404 static void encode_scale_factors(AVCodecContext *avctx, AACEncContext *s,
 405                                  SingleChannelElement *sce)
 406 {
 407     int diff, off_sf = sce->sf_idx[0], off_pns = sce->sf_idx[0] - NOISE_OFFSET;
 408     int off_is = 0, noise_flag = 1;
 409     int i, w;
 410
 411     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
 412         for (i = 0; i < sce->ics.max_sfb; i++) {
 413             if (!sce->zeroes[w*16 + i]) {
 414                 if (sce->band_type[w*16 + i] == NOISE_BT) {
 415                     diff = sce->sf_idx[w*16 + i] - off_pns;
 416                     off_pns = sce->sf_idx[w*16 + i];
 417                     if (noise_flag-- > 0) {
 418                         put_bits(&s->pb, NOISE_PRE_BITS, diff + NOISE_PRE);
 419                         continue;
 420                     }
 421                 } else if (sce->band_type[w*16 + i] == INTENSITY_BT  ||
 422                            sce->band_type[w*16 + i] == INTENSITY_BT2) {
 423                     diff = sce->sf_idx[w*16 + i] - off_is;
 424                     off_is = sce->sf_idx[w*16 + i];
 425                 } else {
 426                     diff = sce->sf_idx[w*16 + i] - off_sf;
 427                     off_sf = sce->sf_idx[w*16 + i];
 428                 }
 429                 diff += SCALE_DIFF_ZERO;
 430                 av_assert0(diff >= 0 && diff <= 120);
 431                 put_bits(&s->pb, ff_aac_scalefactor_bits[diff], ff_aac_scalefactor_code[diff]);
 432             }
 433         }
 434     }
 435 }
 436
 437 /**
 438  * Encode pulse data.
 439  */
 440 static void encode_pulses(AACEncContext *s, Pulse *pulse)
 441 {
 442     int i;
 443
 444     put_bits(&s->pb, 1, !!pulse->num_pulse);
 445     if (!pulse->num_pulse)
 446         return;
 447
 448     put_bits(&s->pb, 2, pulse->num_pulse - 1);
 449     put_bits(&s->pb, 6, pulse->start);
 450     for (i = 0; i < pulse->num_pulse; i++) {
 451         put_bits(&s->pb, 5, pulse->pos[i]);
 452         put_bits(&s->pb, 4, pulse->amp[i]);
 453     }
 454 }
 455
 456 /**
 457  * Encode spectral coefficients processed by psychoacoustic model.
 458  */
 459 static void encode_spectral_coeffs(AACEncContext *s, SingleChannelElement *sce)
 460 {
 461     int start, i, w, w2;
 462
 463     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
 464         start = 0;
 465         for (i = 0; i < sce->ics.max_sfb; i++) {
 466             if (sce->zeroes[w*16 + i]) {
 467                 start += sce->ics.swb_sizes[i];
 468                 continue;
 469             }
 470             for (w2 = w; w2 < w + sce->ics.group_len[w]; w2++)
 471                 s->coder->quantize_and_encode_band(s, &s->pb, sce->coeffs + start + w2*128,
 472                                                    sce->ics.swb_sizes[i],
 473                                                    sce->sf_idx[w*16 + i],
 474                                                    sce->band_type[w*16 + i],
 475                                                    s->lambda, sce->ics.window_clipping[w]);
 476             start += sce->ics.swb_sizes[i];
 477         }
 478     }
 479 }
 480
 481 /**
 482  * Downscale spectral coefficients for near-clipping windows to avoid artifacts
 483  */
 484 static void avoid_clipping(AACEncContext *s, SingleChannelElement *sce)
 485 {
 486     int start, i, j, w;
 487
 488     if (sce->ics.clip_avoidance_factor < 1.0f) {
 489         for (w = 0; w < sce->ics.num_windows; w++) {
 490             start = 0;
 491             for (i = 0; i < sce->ics.max_sfb; i++) {
 492                 float *swb_coeffs = sce->coeffs + start + w*128;
 493                 for (j = 0; j < sce->ics.swb_sizes[i]; j++)
 494                     swb_coeffs[j] *= sce->ics.clip_avoidance_factor;
 495                 start += sce->ics.swb_sizes[i];
 496             }
 497         }
 498     }
 499 }
 500
 501 /**
 502  * Encode one channel of audio data.
 503  */
 504 static int encode_individual_channel(AVCodecContext *avctx, AACEncContext *s,
 505                                      SingleChannelElement *sce,
 506                                      int common_window)
 507 {
 508     put_bits(&s->pb, 8, sce->sf_idx[0]);
 509     if (!common_window)
 510         put_ics_info(s, &sce->ics);
 511     encode_band_info(s, sce);
 512     encode_scale_factors(avctx, s, sce);
 513     encode_pulses(s, &sce->pulse);
 514     put_bits(&s->pb, 1, 0); //tns
 515     put_bits(&s->pb, 1, 0); //ssr
 516     encode_spectral_coeffs(s, sce);
 517     return 0;
 518 }
 519
 520 /**
 521  * Write some auxiliary information about the created AAC file.
 522  */
 523 static void put_bitstream_info(AACEncContext *s, const char *name)
 524 {
 525     int i, namelen, padbits;
 526
 527     namelen = strlen(name) + 2;
 528     put_bits(&s->pb, 3, TYPE_FIL);
 529     put_bits(&s->pb, 4, FFMIN(namelen, 15));
 530     if (namelen >= 15)
 531         put_bits(&s->pb, 8, namelen - 14);
 532     put_bits(&s->pb, 4, 0); //extension type - filler
 533     padbits = -put_bits_count(&s->pb) & 7;
 534     avpriv_align_put_bits(&s->pb);
 535     for (i = 0; i < namelen - 2; i++)
 536         put_bits(&s->pb, 8, name[i]);
 537     put_bits(&s->pb, 12 - padbits, 0);
 538 }
 539
 540 /*
 541  * Copy input samples.
 542  * Channels are reordered from libavcodec's default order to AAC order.
 543  */
 544 static void copy_input_samples(AACEncContext *s, const AVFrame *frame)
 545 {
 546     int ch;
 547     int end = 2048 + (frame ? frame->nb_samples : 0);
 548     const uint8_t *channel_map = aac_chan_maps[s->channels - 1];
 549
 550     /* copy and remap input samples */
 551     for (ch = 0; ch < s->channels; ch++) {
 552         /* copy last 1024 samples of previous frame to the start of the current frame */
 553         memcpy(&s->planar_samples[ch][1024], &s->planar_samples[ch][2048], 1024 * sizeof(s->planar_samples[0][0]));
 554
 555         /* copy new samples and zero any remaining samples */
 556         if (frame) {
 557             memcpy(&s->planar_samples[ch][2048],
 558                    frame->extended_data[channel_map[ch]],
 559                    frame->nb_samples * sizeof(s->planar_samples[0][0]));
 560         }
 561         memset(&s->planar_samples[ch][end], 0,
 562                (3072 - end) * sizeof(s->planar_samples[0][0]));
 563     }
 564 }
 565
 566 static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
 567                             const AVFrame *frame, int *got_packet_ptr)
 568 {
 569     AACEncContext *s = avctx->priv_data;
 570     float **samples = s->planar_samples, *samples2, *la, *overlap;
 571     ChannelElement *cpe;
 572     int i, ch, w, g, chans, tag, start_ch, ret, ms_mode = 0, is_mode = 0;
 573     int chan_el_counter[4];
 574     FFPsyWindowInfo windows[AAC_MAX_CHANNELS];
 575
 576     if (s->last_frame == 2)
 577         return 0;
 578
 579     /* add current frame to queue */
 580     if (frame) {
 581         if ((ret = ff_af_queue_add(&s->afq, frame)) < 0)
 582             return ret;
 583     }
 584
 585     copy_input_samples(s, frame);
 586     if (s->psypp)
 587         ff_psy_preprocess(s->psypp, s->planar_samples, s->channels);
 588
 589     if (!avctx->frame_number)
 590         return 0;
 591
 592     start_ch = 0;
 593     for (i = 0; i < s->chan_map[0]; i++) {
 594         FFPsyWindowInfo* wi = windows + start_ch;
 595         tag      = s->chan_map[i+1];
 596         chans    = tag == TYPE_CPE ? 2 : 1;
 597         cpe      = &s->cpe[i];
 598         for (ch = 0; ch < chans; ch++) {
 599             IndividualChannelStream *ics = &cpe->ch[ch].ics;
 600             int cur_channel = start_ch + ch;
 601             float clip_avoidance_factor;
 602             overlap  = &samples[cur_channel][0];
 603             samples2 = overlap + 1024;
 604             la       = samples2 + (448+64);
 605             if (!frame)
 606                 la = NULL;
 607             if (tag == TYPE_LFE) {
 608                 wi[ch].window_type[0] = ONLY_LONG_SEQUENCE;
 609                 wi[ch].window_shape   = 0;
 610                 wi[ch].num_windows    = 1;
 611                 wi[ch].grouping[0]    = 1;
 612
 613                 /* Only the lowest 12 coefficients are used in a LFE channel.
 614                  * The expression below results in only the bottom 8 coefficients
 615                  * being used for 11.025kHz to 16kHz sample rates.
 616                  */
 617                 ics->num_swb = s->samplerate_index >= 8 ? 1 : 3;
 618             } else {
 619                 wi[ch] = s->psy.model->window(&s->psy, samples2, la, cur_channel,
 620                                               ics->window_sequence[0]);
 621             }
 622             ics->window_sequence[1] = ics->window_sequence[0];
 623             ics->window_sequence[0] = wi[ch].window_type[0];
 624             ics->use_kb_window[1]   = ics->use_kb_window[0];
 625             ics->use_kb_window[0]   = wi[ch].window_shape;
 626             ics->num_windows        = wi[ch].num_windows;
 627             ics->swb_sizes          = s->psy.bands    [ics->num_windows == 8];
 628             ics->num_swb            = tag == TYPE_LFE ? ics->num_swb : s->psy.num_bands[ics->num_windows == 8];
 629             clip_avoidance_factor = 0.0f;
 630             for (w = 0; w < ics->num_windows; w++)
 631                 ics->group_len[w] = wi[ch].grouping[w];
 632             for (w = 0; w < ics->num_windows; w++) {
 633                 if (wi[ch].clipping[w] > CLIP_AVOIDANCE_FACTOR) {
 634                     ics->window_clipping[w] = 1;
 635                     clip_avoidance_factor = FFMAX(clip_avoidance_factor, wi[ch].clipping[w]);
 636                 } else {
 637                     ics->window_clipping[w] = 0;
 638                 }
 639             }
 640             if (clip_avoidance_factor > CLIP_AVOIDANCE_FACTOR) {
 641                 ics->clip_avoidance_factor = CLIP_AVOIDANCE_FACTOR / clip_avoidance_factor;
 642             } else {
 643                 ics->clip_avoidance_factor = 1.0f;
 644             }
 645
 646             apply_window_and_mdct(s, &cpe->ch[ch], overlap);
 647             if (isnan(cpe->ch->coeffs[0])) {
 648                 av_log(avctx, AV_LOG_ERROR, "Input contains NaN\n");
 649                 return AVERROR(EINVAL);
 650             }
 651             avoid_clipping(s, &cpe->ch[ch]);
 652         }
 653         start_ch += chans;
 654     }
 655     if ((ret = ff_alloc_packet2(avctx, avpkt, 8192 * s->channels, 0)) < 0)
 656         return ret;
 657     do {
 658         int frame_bits;
 659
 660         init_put_bits(&s->pb, avpkt->data, avpkt->size);
 661
 662         if ((avctx->frame_number & 0xFF)==1 && !(avctx->flags & AV_CODEC_FLAG_BITEXACT))
 663             put_bitstream_info(s, LIBAVCODEC_IDENT);
 664         start_ch = 0;
 665         memset(chan_el_counter, 0, sizeof(chan_el_counter));
 666         for (i = 0; i < s->chan_map[0]; i++) {
 667             FFPsyWindowInfo* wi = windows + start_ch;
 668             const float *coeffs[2];
 669             tag      = s->chan_map[i+1];
 670             chans    = tag == TYPE_CPE ? 2 : 1;
 671             cpe      = &s->cpe[i];
 672             memset(cpe->is_mask, 0, sizeof(cpe->is_mask));
 673             memset(cpe->ms_mask, 0, sizeof(cpe->ms_mask));
 674             put_bits(&s->pb, 3, tag);
 675             put_bits(&s->pb, 4, chan_el_counter[tag]++);
 676             for (ch = 0; ch < chans; ch++)
 677                 coeffs[ch] = cpe->ch[ch].coeffs;
 678             s->psy.model->analyze(&s->psy, start_ch, coeffs, wi);
 679             for (ch = 0; ch < chans; ch++) {
 680                 s->cur_channel = start_ch + ch;
 681                 s->coder->search_for_quantizers(avctx, s, &cpe->ch[ch], s->lambda);
 682             }
 683             cpe->common_window = 0;
 684             if (chans > 1
 685                 && wi[0].window_type[0] == wi[1].window_type[0]
 686                 && wi[0].window_shape   == wi[1].window_shape) {
 687
 688                 cpe->common_window = 1;
 689                 for (w = 0; w < wi[0].num_windows; w++) {
 690                     if (wi[0].grouping[w] != wi[1].grouping[w]) {
 691                         cpe->common_window = 0;
 692                         break;
 693                     }
 694                 }
 695             }
 696             if (s->options.pns && s->coder->search_for_pns) {
 697                 for (ch = 0; ch < chans; ch++) {
 698                     s->cur_channel = start_ch + ch;
 699                     s->coder->search_for_pns(s, avctx, &cpe->ch[ch], s->lambda);
 700                 }
 701             }
 702             s->cur_channel = start_ch;
 703             if (s->options.stereo_mode && cpe->common_window) {
 704                 if (s->options.stereo_mode > 0) {
 705                     IndividualChannelStream *ics = &cpe->ch[0].ics;
 706                     for (w = 0; w < ics->num_windows; w += ics->group_len[w])
 707                         for (g = 0;  g < ics->num_swb; g++)
 708                             cpe->ms_mask[w*16+g] = 1;
 709                 } else if (s->coder->search_for_ms) {
 710                     s->coder->search_for_ms(s, cpe, s->lambda);
 711                 }
 712             }
 713             if (chans > 1 && s->options.intensity_stereo && s->coder->search_for_is) {
 714                 s->coder->search_for_is(s, avctx, cpe, s->lambda);
 715                 if (cpe->is_mode) is_mode = 1;
 716             }
 717             if (s->coder->set_special_band_scalefactors)
 718                 for (ch = 0; ch < chans; ch++)
 719                     s->coder->set_special_band_scalefactors(s, &cpe->ch[ch]);
 720             adjust_frame_information(cpe, chans);
 721             if (chans == 2) {
 722                 put_bits(&s->pb, 1, cpe->common_window);
 723                 if (cpe->common_window) {
 724                     put_ics_info(s, &cpe->ch[0].ics);
 725                     encode_ms_info(&s->pb, cpe);
 726                     if (cpe->ms_mode) ms_mode = 1;
 727                 }
 728             }
 729             for (ch = 0; ch < chans; ch++) {
 730                 s->cur_channel = start_ch + ch;
 731                 encode_individual_channel(avctx, s, &cpe->ch[ch], cpe->common_window);
 732             }
 733             start_ch += chans;
 734         }
 735
 736         frame_bits = put_bits_count(&s->pb);
 737         if (frame_bits <= 6144 * s->channels - 3) {
 738             s->psy.bitres.bits = frame_bits / s->channels;
 739             break;
 740         }
 741         if (is_mode || ms_mode) {
 742             for (i = 0; i < s->chan_map[0]; i++) {
 743                 // Must restore coeffs
 744                 chans = tag == TYPE_CPE ? 2 : 1;
 745                 cpe = &s->cpe[i];
 746                 for (ch = 0; ch < chans; ch++)
 747                     memcpy(cpe->ch[ch].coeffs, cpe->ch[ch].pcoeffs, sizeof(cpe->ch[ch].coeffs));
 748             }
 749         }
 750
 751         s->lambda *= avctx->bit_rate * 1024.0f / avctx->sample_rate / frame_bits;
 752
 753     } while (1);
 754
 755     put_bits(&s->pb, 3, TYPE_END);
 756     flush_put_bits(&s->pb);
 757     avctx->frame_bits = put_bits_count(&s->pb);
 758
 759     // rate control stuff
 760     if (!(avctx->flags & AV_CODEC_FLAG_QSCALE)) {
 761         float ratio = avctx->bit_rate * 1024.0f / avctx->sample_rate / avctx->frame_bits;
 762         s->lambda *= ratio;
 763         s->lambda = FFMIN(s->lambda, 65536.f);
 764     }
 765
 766     if (!frame)
 767         s->last_frame++;
 768
 769     ff_af_queue_remove(&s->afq, avctx->frame_size, &avpkt->pts,
 770                        &avpkt->duration);
 771
 772     avpkt->size = put_bits_count(&s->pb) >> 3;
 773     *got_packet_ptr = 1;
 774     return 0;
 775 }
 776
 777 static av_cold int aac_encode_end(AVCodecContext *avctx)
 778 {
 779     AACEncContext *s = avctx->priv_data;
 780
 781     ff_mdct_end(&s->mdct1024);
 782     ff_mdct_end(&s->mdct128);
 783     ff_psy_end(&s->psy);
 784     if (s->psypp)
 785         ff_psy_preprocess_end(s->psypp);
 786     av_freep(&s->buffer.samples);
 787     av_freep(&s->cpe);
 788     av_freep(&s->fdsp);
 789     ff_af_queue_close(&s->afq);
 790     return 0;
 791 }
 792
 793 static av_cold int dsp_init(AVCodecContext *avctx, AACEncContext *s)
 794 {
 795     int ret = 0;
 796
 797     s->fdsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT);
 798     if (!s->fdsp)
 799         return AVERROR(ENOMEM);
 800
 801     // window init
 802     ff_kbd_window_init(ff_aac_kbd_long_1024, 4.0, 1024);
 803     ff_kbd_window_init(ff_aac_kbd_short_128, 6.0, 128);
 804     ff_init_ff_sine_windows(10);
 805     ff_init_ff_sine_windows(7);
 806
 807     if ((ret = ff_mdct_init(&s->mdct1024, 11, 0, 32768.0)) < 0)
 808         return ret;
 809     if ((ret = ff_mdct_init(&s->mdct128,   8, 0, 32768.0)) < 0)
 810         return ret;
 811
 812     return 0;
 813 }
 814
 815 static av_cold int alloc_buffers(AVCodecContext *avctx, AACEncContext *s)
 816 {
 817     int ch;
 818     FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->buffer.samples, s->channels, 3 * 1024 * sizeof(s->buffer.samples[0]), alloc_fail);
 819     FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->cpe, s->chan_map[0], sizeof(ChannelElement), alloc_fail);
 820     FF_ALLOCZ_OR_GOTO(avctx, avctx->extradata, 5 + AV_INPUT_BUFFER_PADDING_SIZE, alloc_fail);
 821
 822     for(ch = 0; ch < s->channels; ch++)
 823         s->planar_samples[ch] = s->buffer.samples + 3 * 1024 * ch;
 824
 825     return 0;
 826 alloc_fail:
 827     return AVERROR(ENOMEM);
 828 }
 829
 830 static av_cold int aac_encode_init(AVCodecContext *avctx)
 831 {
 832     AACEncContext *s = avctx->priv_data;
 833     int i, ret = 0;
 834     const uint8_t *sizes[2];
 835     uint8_t grouping[AAC_MAX_CHANNELS];
 836     int lengths[2];
 837
 838     avctx->frame_size = 1024;
 839
 840     for (i = 0; i < 16; i++)
 841         if (avctx->sample_rate == avpriv_mpeg4audio_sample_rates[i])
 842             break;
 843
 844     s->channels = avctx->channels;
 845
 846     ERROR_IF(i == 16
 847                 || i >= (sizeof(swb_size_1024) / sizeof(*swb_size_1024))
 848                 || i >= (sizeof(swb_size_128) / sizeof(*swb_size_128)),
 849              "Unsupported sample rate %d\n", avctx->sample_rate);
 850     ERROR_IF(s->channels > AAC_MAX_CHANNELS,
 851              "Unsupported number of channels: %d\n", s->channels);
 852     ERROR_IF(avctx->profile != FF_PROFILE_UNKNOWN && avctx->profile != FF_PROFILE_AAC_LOW,
 853              "Unsupported profile %d\n", avctx->profile);
 854     WARN_IF(1024.0 * avctx->bit_rate / avctx->sample_rate > 6144 * s->channels,
 855              "Too many bits per frame requested, clamping to max\n");
 856
 857     avctx->bit_rate = (int)FFMIN(
 858         6144 * s->channels / 1024.0 * avctx->sample_rate,
 859         avctx->bit_rate);
 860
 861     s->samplerate_index = i;
 862
 863     s->chan_map = aac_chan_configs[s->channels-1];
 864
 865     if ((ret = dsp_init(avctx, s)) < 0)
 866         goto fail;
 867
 868     if ((ret = alloc_buffers(avctx, s)) < 0)
 869         goto fail;
 870
 871     avctx->extradata_size = 5;
 872     put_audio_specific_config(avctx);
 873
 874     sizes[0]   = swb_size_1024[i];
 875     sizes[1]   = swb_size_128[i];
 876     lengths[0] = ff_aac_num_swb_1024[i];
 877     lengths[1] = ff_aac_num_swb_128[i];
 878     for (i = 0; i < s->chan_map[0]; i++)
 879         grouping[i] = s->chan_map[i + 1] == TYPE_CPE;
 880     if ((ret = ff_psy_init(&s->psy, avctx, 2, sizes, lengths,
 881                            s->chan_map[0], grouping)) < 0)
 882         goto fail;
 883     s->psypp = ff_psy_preprocess_init(avctx);
 884     s->coder = &ff_aac_coders[s->options.aac_coder];
 885
 886     if (HAVE_MIPSDSPR1)
 887         ff_aac_coder_init_mips(s);
 888
 889     s->lambda = avctx->global_quality > 0 ? avctx->global_quality : 120;
 890
 891     ff_aac_tableinit();
 892
 893     avctx->initial_padding = 1024;
 894     ff_af_queue_init(avctx, &s->afq);
 895
 896     return 0;
 897 fail:
 898     aac_encode_end(avctx);
 899     return ret;
 900 }
 901
 902 #define AACENC_FLAGS AV_OPT_FLAG_ENCODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM
 903 static const AVOption aacenc_options[] = {
 904     {"stereo_mode", "Stereo coding method", offsetof(AACEncContext, options.stereo_mode), AV_OPT_TYPE_INT, {.i64 = 0}, -1, 1, AACENC_FLAGS, "stereo_mode"},
 905         {"auto",     "Selected by the Encoder", 0, AV_OPT_TYPE_CONST, {.i64 = -1 }, INT_MIN, INT_MAX, AACENC_FLAGS, "stereo_mode"},
 906         {"ms_off",   "Disable Mid/Side coding", 0, AV_OPT_TYPE_CONST, {.i64 =  0 }, INT_MIN, INT_MAX, AACENC_FLAGS, "stereo_mode"},
 907         {"ms_force", "Force Mid/Side for the whole frame if possible", 0, AV_OPT_TYPE_CONST, {.i64 =  1 }, INT_MIN, INT_MAX, AACENC_FLAGS, "stereo_mode"},
 908     {"aac_coder", "", offsetof(AACEncContext, options.aac_coder), AV_OPT_TYPE_INT, {.i64 = AAC_CODER_TWOLOOP}, 0, AAC_CODER_NB-1, AACENC_FLAGS, "aac_coder"},
 909         {"faac",     "FAAC-inspired method",      0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_FAAC},    INT_MIN, INT_MAX, AACENC_FLAGS, "aac_coder"},
 910         {"anmr",     "ANMR method",               0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_ANMR},    INT_MIN, INT_MAX, AACENC_FLAGS, "aac_coder"},
 911         {"twoloop",  "Two loop searching method", 0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_TWOLOOP}, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_coder"},
 912         {"fast",     "Constant quantizer",        0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_FAST},    INT_MIN, INT_MAX, AACENC_FLAGS, "aac_coder"},
 913     {"aac_pns", "Perceptual Noise Substitution", offsetof(AACEncContext, options.pns), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, AACENC_FLAGS, "aac_pns"},
 914         {"disable",  "Disable perceptual noise substitution", 0, AV_OPT_TYPE_CONST, {.i64 =  0 }, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_pns"},
 915         {"enable",   "Enable perceptual noise substitution",  0, AV_OPT_TYPE_CONST, {.i64 =  1 }, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_pns"},
 916     {"aac_is", "Intensity stereo coding", offsetof(AACEncContext, options.intensity_stereo), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, AACENC_FLAGS, "intensity_stereo"},
 917         {"disable",  "Disable intensity stereo coding", 0, AV_OPT_TYPE_CONST, {.i64 = 0}, INT_MIN, INT_MAX, AACENC_FLAGS, "intensity_stereo"},
 918         {"enable",   "Enable intensity stereo coding", 0, AV_OPT_TYPE_CONST, {.i64 = 1}, INT_MIN, INT_MAX, AACENC_FLAGS, "intensity_stereo"},
 919     {NULL}
 920 };
 921
 922 static const AVClass aacenc_class = {
 923     "AAC encoder",
 924     av_default_item_name,
 925     aacenc_options,
 926     LIBAVUTIL_VERSION_INT,
 927 };
 928
 929 /* duplicated from avpriv_mpeg4audio_sample_rates to avoid shared build
 930  * failures */
 931 static const int mpeg4audio_sample_rates[16] = {
 932     96000, 88200, 64000, 48000, 44100, 32000,
 933     24000, 22050, 16000, 12000, 11025, 8000, 7350
 934 };
 935
 936 AVCodec ff_aac_encoder = {
 937     .name           = "aac",
 938     .long_name      = NULL_IF_CONFIG_SMALL("AAC (Advanced Audio Coding)"),
 939     .type           = AVMEDIA_TYPE_AUDIO,
 940     .id             = AV_CODEC_ID_AAC,
 941     .priv_data_size = sizeof(AACEncContext),
 942     .init           = aac_encode_init,
 943     .encode2        = aac_encode_frame,
 944     .close          = aac_encode_end,
 945     .supported_samplerates = mpeg4audio_sample_rates,
 946     .capabilities   = AV_CODEC_CAP_SMALL_LAST_FRAME | AV_CODEC_CAP_DELAY |
 947                       AV_CODEC_CAP_EXPERIMENTAL,
 948     .sample_fmts    = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_FLTP,
 949                                                      AV_SAMPLE_FMT_NONE },
 950     .priv_class     = &aacenc_class,
 951 };