git.sesse.net Git - ffmpeg/blob - libavcodec/aacenc.c

   1 /*
   2  * AAC encoder
   3  * Copyright (C) 2008 Konstantin Shishkov
   4  *
   5  * This file is part of FFmpeg.
   6  *
   7  * FFmpeg is free software; you can redistribute it and/or
   8  * modify it under the terms of the GNU Lesser General Public
   9  * License as published by the Free Software Foundation; either
  10  * version 2.1 of the License, or (at your option) any later version.
  11  *
  12  * FFmpeg is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15  * Lesser General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU Lesser General Public
  18  * License along with FFmpeg; if not, write to the Free Software
  19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20  */
  21
  22 /**
  23  * @file
  24  * AAC encoder
  25  */
  26
  27 /***********************************
  28  *              TODOs:
  29  * add sane pulse detection
  30  * add temporal noise shaping
  31  ***********************************/
  32
  33 #include "libavutil/float_dsp.h"
  34 #include "libavutil/opt.h"
  35 #include "avcodec.h"
  36 #include "put_bits.h"
  37 #include "internal.h"
  38 #include "mpeg4audio.h"
  39 #include "kbdwin.h"
  40 #include "sinewin.h"
  41
  42 #include "aac.h"
  43 #include "aactab.h"
  44 #include "aacenc.h"
  45
  46 #include "psymodel.h"
  47
  48 #define AAC_MAX_CHANNELS 6
  49
  50 #define ERROR_IF(cond, ...) \
  51     if (cond) { \
  52         av_log(avctx, AV_LOG_ERROR, __VA_ARGS__); \
  53         return AVERROR(EINVAL); \
  54     }
  55
  56 #define WARN_IF(cond, ...) \
  57     if (cond) { \
  58         av_log(avctx, AV_LOG_WARNING, __VA_ARGS__); \
  59     }
  60
  61 float ff_aac_pow34sf_tab[428];
  62
  63 static const uint8_t swb_size_1024_96[] = {
  64     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8,
  65     12, 12, 12, 12, 12, 16, 16, 24, 28, 36, 44,
  66     64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64
  67 };
  68
  69 static const uint8_t swb_size_1024_64[] = {
  70     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8,
  71     12, 12, 12, 16, 16, 16, 20, 24, 24, 28, 36,
  72     40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40
  73 };
  74
  75 static const uint8_t swb_size_1024_48[] = {
  76     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8,
  77     12, 12, 12, 12, 16, 16, 20, 20, 24, 24, 28, 28,
  78     32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
  79     96
  80 };
  81
  82 static const uint8_t swb_size_1024_32[] = {
  83     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8,
  84     12, 12, 12, 12, 16, 16, 20, 20, 24, 24, 28, 28,
  85     32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32
  86 };
  87
  88 static const uint8_t swb_size_1024_24[] = {
  89     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
  90     12, 12, 12, 12, 16, 16, 16, 20, 20, 24, 24, 28, 28,
  91     32, 36, 36, 40, 44, 48, 52, 52, 64, 64, 64, 64, 64
  92 };
  93
  94 static const uint8_t swb_size_1024_16[] = {
  95     8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
  96     12, 12, 12, 12, 12, 12, 12, 12, 12, 16, 16, 16, 16, 20, 20, 20, 24, 24, 28, 28,
  97     32, 36, 40, 40, 44, 48, 52, 56, 60, 64, 64, 64
  98 };
  99
 100 static const uint8_t swb_size_1024_8[] = {
 101     12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
 102     16, 16, 16, 16, 16, 16, 16, 20, 20, 20, 20, 24, 24, 24, 28, 28,
 103     32, 36, 36, 40, 44, 48, 52, 56, 60, 64, 80
 104 };
 105
 106 static const uint8_t *swb_size_1024[] = {
 107     swb_size_1024_96, swb_size_1024_96, swb_size_1024_64,
 108     swb_size_1024_48, swb_size_1024_48, swb_size_1024_32,
 109     swb_size_1024_24, swb_size_1024_24, swb_size_1024_16,
 110     swb_size_1024_16, swb_size_1024_16, swb_size_1024_8,
 111     swb_size_1024_8
 112 };
 113
 114 static const uint8_t swb_size_128_96[] = {
 115     4, 4, 4, 4, 4, 4, 8, 8, 8, 16, 28, 36
 116 };
 117
 118 static const uint8_t swb_size_128_48[] = {
 119     4, 4, 4, 4, 4, 8, 8, 8, 12, 12, 12, 16, 16, 16
 120 };
 121
 122 static const uint8_t swb_size_128_24[] = {
 123     4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 20
 124 };
 125
 126 static const uint8_t swb_size_128_16[] = {
 127     4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 12, 12, 16, 20, 20
 128 };
 129
 130 static const uint8_t swb_size_128_8[] = {
 131     4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 12, 16, 20, 20
 132 };
 133
 134 static const uint8_t *swb_size_128[] = {
 135     /* the last entry on the following row is swb_size_128_64 but is a
 136        duplicate of swb_size_128_96 */
 137     swb_size_128_96, swb_size_128_96, swb_size_128_96,
 138     swb_size_128_48, swb_size_128_48, swb_size_128_48,
 139     swb_size_128_24, swb_size_128_24, swb_size_128_16,
 140     swb_size_128_16, swb_size_128_16, swb_size_128_8,
 141     swb_size_128_8
 142 };
 143
 144 /** default channel configurations */
 145 static const uint8_t aac_chan_configs[6][5] = {
 146  {1, TYPE_SCE},                               // 1 channel  - single channel element
 147  {1, TYPE_CPE},                               // 2 channels - channel pair
 148  {2, TYPE_SCE, TYPE_CPE},                     // 3 channels - center + stereo
 149  {3, TYPE_SCE, TYPE_CPE, TYPE_SCE},           // 4 channels - front center + stereo + back center
 150  {3, TYPE_SCE, TYPE_CPE, TYPE_CPE},           // 5 channels - front center + stereo + back stereo
 151  {4, TYPE_SCE, TYPE_CPE, TYPE_CPE, TYPE_LFE}, // 6 channels - front center + stereo + back stereo + LFE
 152 };
 153
 154 /**
 155  * Table to remap channels from libavcodec's default order to AAC order.
 156  */
 157 static const uint8_t aac_chan_maps[AAC_MAX_CHANNELS][AAC_MAX_CHANNELS] = {
 158     { 0 },
 159     { 0, 1 },
 160     { 2, 0, 1 },
 161     { 2, 0, 1, 3 },
 162     { 2, 0, 1, 3, 4 },
 163     { 2, 0, 1, 4, 5, 3 },
 164 };
 165
 166 /**
 167  * Make AAC audio config object.
 168  * @see 1.6.2.1 "Syntax - AudioSpecificConfig"
 169  */
 170 static void put_audio_specific_config(AVCodecContext *avctx)
 171 {
 172     PutBitContext pb;
 173     AACEncContext *s = avctx->priv_data;
 174
 175     init_put_bits(&pb, avctx->extradata, avctx->extradata_size);
 176     put_bits(&pb, 5, 2); //object type - AAC-LC
 177     put_bits(&pb, 4, s->samplerate_index); //sample rate index
 178     put_bits(&pb, 4, s->channels);
 179     //GASpecificConfig
 180     put_bits(&pb, 1, 0); //frame length - 1024 samples
 181     put_bits(&pb, 1, 0); //does not depend on core coder
 182     put_bits(&pb, 1, 0); //is not extension
 183
 184     //Explicitly Mark SBR absent
 185     put_bits(&pb, 11, 0x2b7); //sync extension
 186     put_bits(&pb, 5,  AOT_SBR);
 187     put_bits(&pb, 1,  0);
 188     flush_put_bits(&pb);
 189 }
 190
 191 #define WINDOW_FUNC(type) \
 192 static void apply_ ##type ##_window(AVFloatDSPContext *fdsp, \
 193                                     SingleChannelElement *sce, \
 194                                     const float *audio)
 195
 196 WINDOW_FUNC(only_long)
 197 {
 198     const float *lwindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024;
 199     const float *pwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
 200     float *out = sce->ret_buf;
 201
 202     fdsp->vector_fmul        (out,        audio,        lwindow, 1024);
 203     fdsp->vector_fmul_reverse(out + 1024, audio + 1024, pwindow, 1024);
 204 }
 205
 206 WINDOW_FUNC(long_start)
 207 {
 208     const float *lwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
 209     const float *swindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
 210     float *out = sce->ret_buf;
 211
 212     fdsp->vector_fmul(out, audio, lwindow, 1024);
 213     memcpy(out + 1024, audio + 1024, sizeof(out[0]) * 448);
 214     fdsp->vector_fmul_reverse(out + 1024 + 448, audio + 1024 + 448, swindow, 128);
 215     memset(out + 1024 + 576, 0, sizeof(out[0]) * 448);
 216 }
 217
 218 WINDOW_FUNC(long_stop)
 219 {
 220     const float *lwindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024;
 221     const float *swindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
 222     float *out = sce->ret_buf;
 223
 224     memset(out, 0, sizeof(out[0]) * 448);
 225     fdsp->vector_fmul(out + 448, audio + 448, swindow, 128);
 226     memcpy(out + 576, audio + 576, sizeof(out[0]) * 448);
 227     fdsp->vector_fmul_reverse(out + 1024, audio + 1024, lwindow, 1024);
 228 }
 229
 230 WINDOW_FUNC(eight_short)
 231 {
 232     const float *swindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
 233     const float *pwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
 234     const float *in = audio + 448;
 235     float *out = sce->ret_buf;
 236     int w;
 237
 238     for (w = 0; w < 8; w++) {
 239         fdsp->vector_fmul        (out, in, w ? pwindow : swindow, 128);
 240         out += 128;
 241         in  += 128;
 242         fdsp->vector_fmul_reverse(out, in, swindow, 128);
 243         out += 128;
 244     }
 245 }
 246
 247 static void (*const apply_window[4])(AVFloatDSPContext *fdsp,
 248                                      SingleChannelElement *sce,
 249                                      const float *audio) = {
 250     [ONLY_LONG_SEQUENCE]   = apply_only_long_window,
 251     [LONG_START_SEQUENCE]  = apply_long_start_window,
 252     [EIGHT_SHORT_SEQUENCE] = apply_eight_short_window,
 253     [LONG_STOP_SEQUENCE]   = apply_long_stop_window
 254 };
 255
 256 static void apply_window_and_mdct(AACEncContext *s, SingleChannelElement *sce,
 257                                   float *audio)
 258 {
 259     int i;
 260     float *output = sce->ret_buf;
 261
 262     apply_window[sce->ics.window_sequence[0]](s->fdsp, sce, audio);
 263
 264     if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE)
 265         s->mdct1024.mdct_calc(&s->mdct1024, sce->coeffs, output);
 266     else
 267         for (i = 0; i < 1024; i += 128)
 268             s->mdct128.mdct_calc(&s->mdct128, sce->coeffs + i, output + i*2);
 269     memcpy(audio, audio + 1024, sizeof(audio[0]) * 1024);
 270     memcpy(sce->pcoeffs, sce->coeffs, sizeof(sce->pcoeffs));
 271 }
 272
 273 /**
 274  * Encode ics_info element.
 275  * @see Table 4.6 (syntax of ics_info)
 276  */
 277 static void put_ics_info(AACEncContext *s, IndividualChannelStream *info)
 278 {
 279     int w;
 280
 281     put_bits(&s->pb, 1, 0);                // ics_reserved bit
 282     put_bits(&s->pb, 2, info->window_sequence[0]);
 283     put_bits(&s->pb, 1, info->use_kb_window[0]);
 284     if (info->window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
 285         put_bits(&s->pb, 6, info->max_sfb);
 286         put_bits(&s->pb, 1, 0);            // no prediction
 287     } else {
 288         put_bits(&s->pb, 4, info->max_sfb);
 289         for (w = 1; w < 8; w++)
 290             put_bits(&s->pb, 1, !info->group_len[w]);
 291     }
 292 }
 293
 294 /**
 295  * Encode MS data.
 296  * @see 4.6.8.1 "Joint Coding - M/S Stereo"
 297  */
 298 static void encode_ms_info(PutBitContext *pb, ChannelElement *cpe)
 299 {
 300     int i, w;
 301
 302     put_bits(pb, 2, cpe->ms_mode);
 303     if (cpe->ms_mode == 1)
 304         for (w = 0; w < cpe->ch[0].ics.num_windows; w += cpe->ch[0].ics.group_len[w])
 305             for (i = 0; i < cpe->ch[0].ics.max_sfb; i++)
 306                 put_bits(pb, 1, cpe->ms_mask[w*16 + i]);
 307 }
 308
 309 /**
 310  * Produce integer coefficients from scalefactors provided by the model.
 311  */
 312 static void adjust_frame_information(ChannelElement *cpe, int chans)
 313 {
 314     int i, w, w2, g, ch;
 315     int start, maxsfb, cmaxsfb;
 316
 317     for (ch = 0; ch < chans; ch++) {
 318         IndividualChannelStream *ics = &cpe->ch[ch].ics;
 319         start = 0;
 320         maxsfb = 0;
 321         cpe->ch[ch].pulse.num_pulse = 0;
 322         for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
 323             for (w2 = 0; w2 < ics->group_len[w]; w2++) {
 324                 start = (w+w2) * 128;
 325                 for (g = 0; g < ics->num_swb; g++) {
 326                     //apply M/S
 327                     if (cpe->common_window && !ch && cpe->ms_mask[w*16 + g]) {
 328                         for (i = 0; i < ics->swb_sizes[g]; i++) {
 329                             cpe->ch[0].coeffs[start+i] = (cpe->ch[0].pcoeffs[start+i] + cpe->ch[1].pcoeffs[start+i]) * 0.5f;
 330                             cpe->ch[1].coeffs[start+i] = cpe->ch[0].coeffs[start+i] - cpe->ch[1].pcoeffs[start+i];
 331                         }
 332                     }
 333                     start += ics->swb_sizes[g];
 334                 }
 335                 for (cmaxsfb = ics->num_swb; cmaxsfb > 0 && cpe->ch[ch].zeroes[w*16+cmaxsfb-1]; cmaxsfb--)
 336                     ;
 337                 maxsfb = FFMAX(maxsfb, cmaxsfb);
 338             }
 339         }
 340         ics->max_sfb = maxsfb;
 341
 342         //adjust zero bands for window groups
 343         for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
 344             for (g = 0; g < ics->max_sfb; g++) {
 345                 i = 1;
 346                 for (w2 = w; w2 < w + ics->group_len[w]; w2++) {
 347                     if (!cpe->ch[ch].zeroes[w2*16 + g]) {
 348                         i = 0;
 349                         break;
 350                     }
 351                 }
 352                 cpe->ch[ch].zeroes[w*16 + g] = i;
 353             }
 354         }
 355     }
 356
 357     if (chans > 1 && cpe->common_window) {
 358         IndividualChannelStream *ics0 = &cpe->ch[0].ics;
 359         IndividualChannelStream *ics1 = &cpe->ch[1].ics;
 360         int msc = 0;
 361         ics0->max_sfb = FFMAX(ics0->max_sfb, ics1->max_sfb);
 362         ics1->max_sfb = ics0->max_sfb;
 363         for (w = 0; w < ics0->num_windows*16; w += 16)
 364             for (i = 0; i < ics0->max_sfb; i++)
 365                 if (cpe->ms_mask[w+i])
 366                     msc++;
 367         if (msc == 0 || ics0->max_sfb == 0)
 368             cpe->ms_mode = 0;
 369         else
 370             cpe->ms_mode = msc < ics0->max_sfb * ics0->num_windows ? 1 : 2;
 371     }
 372 }
 373
 374 /**
 375  * Encode scalefactor band coding type.
 376  */
 377 static void encode_band_info(AACEncContext *s, SingleChannelElement *sce)
 378 {
 379     int w;
 380
 381     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w])
 382         s->coder->encode_window_bands_info(s, sce, w, sce->ics.group_len[w], s->lambda);
 383 }
 384
 385 /**
 386  * Encode scalefactors.
 387  */
 388 static void encode_scale_factors(AVCodecContext *avctx, AACEncContext *s,
 389                                  SingleChannelElement *sce)
 390 {
 391     int off = sce->sf_idx[0], diff;
 392     int i, w;
 393
 394     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
 395         for (i = 0; i < sce->ics.max_sfb; i++) {
 396             if (!sce->zeroes[w*16 + i]) {
 397                 diff = sce->sf_idx[w*16 + i] - off + SCALE_DIFF_ZERO;
 398                 av_assert0(diff >= 0 && diff <= 120);
 399                 off = sce->sf_idx[w*16 + i];
 400                 put_bits(&s->pb, ff_aac_scalefactor_bits[diff], ff_aac_scalefactor_code[diff]);
 401             }
 402         }
 403     }
 404 }
 405
 406 /**
 407  * Encode pulse data.
 408  */
 409 static void encode_pulses(AACEncContext *s, Pulse *pulse)
 410 {
 411     int i;
 412
 413     put_bits(&s->pb, 1, !!pulse->num_pulse);
 414     if (!pulse->num_pulse)
 415         return;
 416
 417     put_bits(&s->pb, 2, pulse->num_pulse - 1);
 418     put_bits(&s->pb, 6, pulse->start);
 419     for (i = 0; i < pulse->num_pulse; i++) {
 420         put_bits(&s->pb, 5, pulse->pos[i]);
 421         put_bits(&s->pb, 4, pulse->amp[i]);
 422     }
 423 }
 424
 425 /**
 426  * Encode spectral coefficients processed by psychoacoustic model.
 427  */
 428 static void encode_spectral_coeffs(AACEncContext *s, SingleChannelElement *sce)
 429 {
 430     int start, i, w, w2;
 431
 432     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
 433         start = 0;
 434         for (i = 0; i < sce->ics.max_sfb; i++) {
 435             if (sce->zeroes[w*16 + i]) {
 436                 start += sce->ics.swb_sizes[i];
 437                 continue;
 438             }
 439             for (w2 = w; w2 < w + sce->ics.group_len[w]; w2++)
 440                 s->coder->quantize_and_encode_band(s, &s->pb, sce->coeffs + start + w2*128,
 441                                                    sce->ics.swb_sizes[i],
 442                                                    sce->sf_idx[w*16 + i],
 443                                                    sce->band_type[w*16 + i],
 444                                                    s->lambda);
 445             start += sce->ics.swb_sizes[i];
 446         }
 447     }
 448 }
 449
 450 /**
 451  * Encode one channel of audio data.
 452  */
 453 static int encode_individual_channel(AVCodecContext *avctx, AACEncContext *s,
 454                                      SingleChannelElement *sce,
 455                                      int common_window)
 456 {
 457     put_bits(&s->pb, 8, sce->sf_idx[0]);
 458     if (!common_window)
 459         put_ics_info(s, &sce->ics);
 460     encode_band_info(s, sce);
 461     encode_scale_factors(avctx, s, sce);
 462     encode_pulses(s, &sce->pulse);
 463     put_bits(&s->pb, 1, 0); //tns
 464     put_bits(&s->pb, 1, 0); //ssr
 465     encode_spectral_coeffs(s, sce);
 466     return 0;
 467 }
 468
 469 /**
 470  * Write some auxiliary information about the created AAC file.
 471  */
 472 static void put_bitstream_info(AACEncContext *s, const char *name)
 473 {
 474     int i, namelen, padbits;
 475
 476     namelen = strlen(name) + 2;
 477     put_bits(&s->pb, 3, TYPE_FIL);
 478     put_bits(&s->pb, 4, FFMIN(namelen, 15));
 479     if (namelen >= 15)
 480         put_bits(&s->pb, 8, namelen - 14);
 481     put_bits(&s->pb, 4, 0); //extension type - filler
 482     padbits = -put_bits_count(&s->pb) & 7;
 483     avpriv_align_put_bits(&s->pb);
 484     for (i = 0; i < namelen - 2; i++)
 485         put_bits(&s->pb, 8, name[i]);
 486     put_bits(&s->pb, 12 - padbits, 0);
 487 }
 488
 489 /*
 490  * Copy input samples.
 491  * Channels are reordered from libavcodec's default order to AAC order.
 492  */
 493 static void copy_input_samples(AACEncContext *s, const AVFrame *frame)
 494 {
 495     int ch;
 496     int end = 2048 + (frame ? frame->nb_samples : 0);
 497     const uint8_t *channel_map = aac_chan_maps[s->channels - 1];
 498
 499     /* copy and remap input samples */
 500     for (ch = 0; ch < s->channels; ch++) {
 501         /* copy last 1024 samples of previous frame to the start of the current frame */
 502         memcpy(&s->planar_samples[ch][1024], &s->planar_samples[ch][2048], 1024 * sizeof(s->planar_samples[0][0]));
 503
 504         /* copy new samples and zero any remaining samples */
 505         if (frame) {
 506             memcpy(&s->planar_samples[ch][2048],
 507                    frame->extended_data[channel_map[ch]],
 508                    frame->nb_samples * sizeof(s->planar_samples[0][0]));
 509         }
 510         memset(&s->planar_samples[ch][end], 0,
 511                (3072 - end) * sizeof(s->planar_samples[0][0]));
 512     }
 513 }
 514
 515 static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
 516                             const AVFrame *frame, int *got_packet_ptr)
 517 {
 518     AACEncContext *s = avctx->priv_data;
 519     float **samples = s->planar_samples, *samples2, *la, *overlap;
 520     ChannelElement *cpe;
 521     int i, ch, w, g, chans, tag, start_ch, ret, ms_mode = 0;
 522     int chan_el_counter[4];
 523     FFPsyWindowInfo windows[AAC_MAX_CHANNELS];
 524
 525     if (s->last_frame == 2)
 526         return 0;
 527
 528     /* add current frame to queue */
 529     if (frame) {
 530         if ((ret = ff_af_queue_add(&s->afq, frame)) < 0)
 531             return ret;
 532     }
 533
 534     copy_input_samples(s, frame);
 535     if (s->psypp)
 536         ff_psy_preprocess(s->psypp, s->planar_samples, s->channels);
 537
 538     if (!avctx->frame_number)
 539         return 0;
 540
 541     start_ch = 0;
 542     for (i = 0; i < s->chan_map[0]; i++) {
 543         FFPsyWindowInfo* wi = windows + start_ch;
 544         tag      = s->chan_map[i+1];
 545         chans    = tag == TYPE_CPE ? 2 : 1;
 546         cpe      = &s->cpe[i];
 547         for (ch = 0; ch < chans; ch++) {
 548             IndividualChannelStream *ics = &cpe->ch[ch].ics;
 549             int cur_channel = start_ch + ch;
 550             overlap  = &samples[cur_channel][0];
 551             samples2 = overlap + 1024;
 552             la       = samples2 + (448+64);
 553             if (!frame)
 554                 la = NULL;
 555             if (tag == TYPE_LFE) {
 556                 wi[ch].window_type[0] = ONLY_LONG_SEQUENCE;
 557                 wi[ch].window_shape   = 0;
 558                 wi[ch].num_windows    = 1;
 559                 wi[ch].grouping[0]    = 1;
 560
 561                 /* Only the lowest 12 coefficients are used in a LFE channel.
 562                  * The expression below results in only the bottom 8 coefficients
 563                  * being used for 11.025kHz to 16kHz sample rates.
 564                  */
 565                 ics->num_swb = s->samplerate_index >= 8 ? 1 : 3;
 566             } else {
 567                 wi[ch] = s->psy.model->window(&s->psy, samples2, la, cur_channel,
 568                                               ics->window_sequence[0]);
 569             }
 570             ics->window_sequence[1] = ics->window_sequence[0];
 571             ics->window_sequence[0] = wi[ch].window_type[0];
 572             ics->use_kb_window[1]   = ics->use_kb_window[0];
 573             ics->use_kb_window[0]   = wi[ch].window_shape;
 574             ics->num_windows        = wi[ch].num_windows;
 575             ics->swb_sizes          = s->psy.bands    [ics->num_windows == 8];
 576             ics->num_swb            = tag == TYPE_LFE ? ics->num_swb : s->psy.num_bands[ics->num_windows == 8];
 577             for (w = 0; w < ics->num_windows; w++)
 578                 ics->group_len[w] = wi[ch].grouping[w];
 579
 580             apply_window_and_mdct(s, &cpe->ch[ch], overlap);
 581             if (isnan(cpe->ch->coeffs[0])) {
 582                 av_log(avctx, AV_LOG_ERROR, "Input contains NaN\n");
 583                 return AVERROR(EINVAL);
 584             }
 585         }
 586         start_ch += chans;
 587     }
 588     if ((ret = ff_alloc_packet2(avctx, avpkt, 8192 * s->channels)) < 0)
 589         return ret;
 590     do {
 591         int frame_bits;
 592
 593         init_put_bits(&s->pb, avpkt->data, avpkt->size);
 594
 595         if ((avctx->frame_number & 0xFF)==1 && !(avctx->flags & CODEC_FLAG_BITEXACT))
 596             put_bitstream_info(s, LIBAVCODEC_IDENT);
 597         start_ch = 0;
 598         memset(chan_el_counter, 0, sizeof(chan_el_counter));
 599         for (i = 0; i < s->chan_map[0]; i++) {
 600             FFPsyWindowInfo* wi = windows + start_ch;
 601             const float *coeffs[2];
 602             tag      = s->chan_map[i+1];
 603             chans    = tag == TYPE_CPE ? 2 : 1;
 604             cpe      = &s->cpe[i];
 605             put_bits(&s->pb, 3, tag);
 606             put_bits(&s->pb, 4, chan_el_counter[tag]++);
 607             for (ch = 0; ch < chans; ch++)
 608                 coeffs[ch] = cpe->ch[ch].coeffs;
 609             s->psy.model->analyze(&s->psy, start_ch, coeffs, wi);
 610             for (ch = 0; ch < chans; ch++) {
 611                 s->cur_channel = start_ch + ch;
 612                 s->coder->search_for_quantizers(avctx, s, &cpe->ch[ch], s->lambda);
 613             }
 614             cpe->common_window = 0;
 615             if (chans > 1
 616                 && wi[0].window_type[0] == wi[1].window_type[0]
 617                 && wi[0].window_shape   == wi[1].window_shape) {
 618
 619                 cpe->common_window = 1;
 620                 for (w = 0; w < wi[0].num_windows; w++) {
 621                     if (wi[0].grouping[w] != wi[1].grouping[w]) {
 622                         cpe->common_window = 0;
 623                         break;
 624                     }
 625                 }
 626             }
 627             s->cur_channel = start_ch;
 628             if (s->options.stereo_mode && cpe->common_window) {
 629                 if (s->options.stereo_mode > 0) {
 630                     IndividualChannelStream *ics = &cpe->ch[0].ics;
 631                     for (w = 0; w < ics->num_windows; w += ics->group_len[w])
 632                         for (g = 0;  g < ics->num_swb; g++)
 633                             cpe->ms_mask[w*16+g] = 1;
 634                 } else if (s->coder->search_for_ms) {
 635                     s->coder->search_for_ms(s, cpe, s->lambda);
 636                 }
 637             }
 638             adjust_frame_information(cpe, chans);
 639             if (chans == 2) {
 640                 put_bits(&s->pb, 1, cpe->common_window);
 641                 if (cpe->common_window) {
 642                     put_ics_info(s, &cpe->ch[0].ics);
 643                     encode_ms_info(&s->pb, cpe);
 644                     if (cpe->ms_mode) ms_mode = 1;
 645                 }
 646             }
 647             for (ch = 0; ch < chans; ch++) {
 648                 s->cur_channel = start_ch + ch;
 649                 encode_individual_channel(avctx, s, &cpe->ch[ch], cpe->common_window);
 650             }
 651             start_ch += chans;
 652         }
 653
 654         frame_bits = put_bits_count(&s->pb);
 655         if (frame_bits <= 6144 * s->channels - 3) {
 656             s->psy.bitres.bits = frame_bits / s->channels;
 657             break;
 658         }
 659         if (ms_mode) {
 660             for (i = 0; i < s->chan_map[0]; i++) {
 661                 // Must restore coeffs
 662                 chans = tag == TYPE_CPE ? 2 : 1;
 663                 cpe = &s->cpe[i];
 664                 for (ch = 0; ch < chans; ch++)
 665                     memcpy(cpe->ch[ch].coeffs, cpe->ch[ch].pcoeffs, sizeof(cpe->ch[ch].coeffs));
 666             }
 667         }
 668
 669         s->lambda *= avctx->bit_rate * 1024.0f / avctx->sample_rate / frame_bits;
 670
 671     } while (1);
 672
 673     put_bits(&s->pb, 3, TYPE_END);
 674     flush_put_bits(&s->pb);
 675     avctx->frame_bits = put_bits_count(&s->pb);
 676
 677     // rate control stuff
 678     if (!(avctx->flags & CODEC_FLAG_QSCALE)) {
 679         float ratio = avctx->bit_rate * 1024.0f / avctx->sample_rate / avctx->frame_bits;
 680         s->lambda *= ratio;
 681         s->lambda = FFMIN(s->lambda, 65536.f);
 682     }
 683
 684     if (!frame)
 685         s->last_frame++;
 686
 687     ff_af_queue_remove(&s->afq, avctx->frame_size, &avpkt->pts,
 688                        &avpkt->duration);
 689
 690     avpkt->size = put_bits_count(&s->pb) >> 3;
 691     *got_packet_ptr = 1;
 692     return 0;
 693 }
 694
 695 static av_cold int aac_encode_end(AVCodecContext *avctx)
 696 {
 697     AACEncContext *s = avctx->priv_data;
 698
 699     ff_mdct_end(&s->mdct1024);
 700     ff_mdct_end(&s->mdct128);
 701     ff_psy_end(&s->psy);
 702     if (s->psypp)
 703         ff_psy_preprocess_end(s->psypp);
 704     av_freep(&s->buffer.samples);
 705     av_freep(&s->cpe);
 706     av_freep(&s->fdsp);
 707     ff_af_queue_close(&s->afq);
 708     return 0;
 709 }
 710
 711 static av_cold int dsp_init(AVCodecContext *avctx, AACEncContext *s)
 712 {
 713     int ret = 0;
 714
 715     s->fdsp = avpriv_float_dsp_alloc(avctx->flags & CODEC_FLAG_BITEXACT);
 716     if (!s->fdsp)
 717         return AVERROR(ENOMEM);
 718
 719     // window init
 720     ff_kbd_window_init(ff_aac_kbd_long_1024, 4.0, 1024);
 721     ff_kbd_window_init(ff_aac_kbd_short_128, 6.0, 128);
 722     ff_init_ff_sine_windows(10);
 723     ff_init_ff_sine_windows(7);
 724
 725     if (ret = ff_mdct_init(&s->mdct1024, 11, 0, 32768.0))
 726         return ret;
 727     if (ret = ff_mdct_init(&s->mdct128,   8, 0, 32768.0))
 728         return ret;
 729
 730     return 0;
 731 }
 732
 733 static av_cold int alloc_buffers(AVCodecContext *avctx, AACEncContext *s)
 734 {
 735     int ch;
 736     FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->buffer.samples, s->channels, 3 * 1024 * sizeof(s->buffer.samples[0]), alloc_fail);
 737     FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->cpe, s->chan_map[0], sizeof(ChannelElement), alloc_fail);
 738     FF_ALLOCZ_OR_GOTO(avctx, avctx->extradata, 5 + FF_INPUT_BUFFER_PADDING_SIZE, alloc_fail);
 739
 740     for(ch = 0; ch < s->channels; ch++)
 741         s->planar_samples[ch] = s->buffer.samples + 3 * 1024 * ch;
 742
 743     return 0;
 744 alloc_fail:
 745     return AVERROR(ENOMEM);
 746 }
 747
 748 static av_cold int aac_encode_init(AVCodecContext *avctx)
 749 {
 750     AACEncContext *s = avctx->priv_data;
 751     int i, ret = 0;
 752     const uint8_t *sizes[2];
 753     uint8_t grouping[AAC_MAX_CHANNELS];
 754     int lengths[2];
 755
 756     avctx->frame_size = 1024;
 757
 758     for (i = 0; i < 16; i++)
 759         if (avctx->sample_rate == avpriv_mpeg4audio_sample_rates[i])
 760             break;
 761
 762     s->channels = avctx->channels;
 763
 764     ERROR_IF(i == 16
 765                 || i >= (sizeof(swb_size_1024) / sizeof(*swb_size_1024))
 766                 || i >= (sizeof(swb_size_128) / sizeof(*swb_size_128)),
 767              "Unsupported sample rate %d\n", avctx->sample_rate);
 768     ERROR_IF(s->channels > AAC_MAX_CHANNELS,
 769              "Unsupported number of channels: %d\n", s->channels);
 770     ERROR_IF(avctx->profile != FF_PROFILE_UNKNOWN && avctx->profile != FF_PROFILE_AAC_LOW,
 771              "Unsupported profile %d\n", avctx->profile);
 772     WARN_IF(1024.0 * avctx->bit_rate / avctx->sample_rate > 6144 * s->channels,
 773              "Too many bits per frame requested, clamping to max\n");
 774
 775     avctx->bit_rate = (int)FFMIN(
 776         6144 * s->channels / 1024.0 * avctx->sample_rate,
 777         avctx->bit_rate);
 778
 779     s->samplerate_index = i;
 780
 781     s->chan_map = aac_chan_configs[s->channels-1];
 782
 783     if ((ret = dsp_init(avctx, s)) < 0)
 784         goto fail;
 785
 786     if ((ret = alloc_buffers(avctx, s)) < 0)
 787         goto fail;
 788
 789     avctx->extradata_size = 5;
 790     put_audio_specific_config(avctx);
 791
 792     sizes[0]   = swb_size_1024[i];
 793     sizes[1]   = swb_size_128[i];
 794     lengths[0] = ff_aac_num_swb_1024[i];
 795     lengths[1] = ff_aac_num_swb_128[i];
 796     for (i = 0; i < s->chan_map[0]; i++)
 797         grouping[i] = s->chan_map[i + 1] == TYPE_CPE;
 798     if ((ret = ff_psy_init(&s->psy, avctx, 2, sizes, lengths,
 799                            s->chan_map[0], grouping)) < 0)
 800         goto fail;
 801     s->psypp = ff_psy_preprocess_init(avctx);
 802     s->coder = &ff_aac_coders[s->options.aac_coder];
 803
 804     if (HAVE_MIPSDSPR1)
 805         ff_aac_coder_init_mips(s);
 806
 807     s->lambda = avctx->global_quality > 0 ? avctx->global_quality : 120;
 808
 809     ff_aac_tableinit();
 810
 811     for (i = 0; i < 428; i++)
 812         ff_aac_pow34sf_tab[i] = sqrt(ff_aac_pow2sf_tab[i] * sqrt(ff_aac_pow2sf_tab[i]));
 813
 814     avctx->initial_padding = 1024;
 815     ff_af_queue_init(avctx, &s->afq);
 816
 817     return 0;
 818 fail:
 819     aac_encode_end(avctx);
 820     return ret;
 821 }
 822
 823 #define AACENC_FLAGS AV_OPT_FLAG_ENCODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM
 824 static const AVOption aacenc_options[] = {
 825     {"stereo_mode", "Stereo coding method", offsetof(AACEncContext, options.stereo_mode), AV_OPT_TYPE_INT, {.i64 = 0}, -1, 1, AACENC_FLAGS, "stereo_mode"},
 826         {"auto",     "Selected by the Encoder", 0, AV_OPT_TYPE_CONST, {.i64 = -1 }, INT_MIN, INT_MAX, AACENC_FLAGS, "stereo_mode"},
 827         {"ms_off",   "Disable Mid/Side coding", 0, AV_OPT_TYPE_CONST, {.i64 =  0 }, INT_MIN, INT_MAX, AACENC_FLAGS, "stereo_mode"},
 828         {"ms_force", "Force Mid/Side for the whole frame if possible", 0, AV_OPT_TYPE_CONST, {.i64 =  1 }, INT_MIN, INT_MAX, AACENC_FLAGS, "stereo_mode"},
 829     {"aac_coder", "", offsetof(AACEncContext, options.aac_coder), AV_OPT_TYPE_INT, {.i64 = AAC_CODER_TWOLOOP}, 0, AAC_CODER_NB-1, AACENC_FLAGS, "aac_coder"},
 830         {"faac",     "FAAC-inspired method",      0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_FAAC},    INT_MIN, INT_MAX, AACENC_FLAGS, "aac_coder"},
 831         {"anmr",     "ANMR method",               0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_ANMR},    INT_MIN, INT_MAX, AACENC_FLAGS, "aac_coder"},
 832         {"twoloop",  "Two loop searching method", 0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_TWOLOOP}, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_coder"},
 833         {"fast",     "Constant quantizer",        0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_FAST},    INT_MIN, INT_MAX, AACENC_FLAGS, "aac_coder"},
 834     {NULL}
 835 };
 836
 837 static const AVClass aacenc_class = {
 838     "AAC encoder",
 839     av_default_item_name,
 840     aacenc_options,
 841     LIBAVUTIL_VERSION_INT,
 842 };
 843
 844 /* duplicated from avpriv_mpeg4audio_sample_rates to avoid shared build
 845  * failures */
 846 static const int mpeg4audio_sample_rates[16] = {
 847     96000, 88200, 64000, 48000, 44100, 32000,
 848     24000, 22050, 16000, 12000, 11025, 8000, 7350
 849 };
 850
 851 AVCodec ff_aac_encoder = {
 852     .name           = "aac",
 853     .long_name      = NULL_IF_CONFIG_SMALL("AAC (Advanced Audio Coding)"),
 854     .type           = AVMEDIA_TYPE_AUDIO,
 855     .id             = AV_CODEC_ID_AAC,
 856     .priv_data_size = sizeof(AACEncContext),
 857     .init           = aac_encode_init,
 858     .encode2        = aac_encode_frame,
 859     .close          = aac_encode_end,
 860     .supported_samplerates = mpeg4audio_sample_rates,
 861     .capabilities   = CODEC_CAP_SMALL_LAST_FRAME | CODEC_CAP_DELAY |
 862                       CODEC_CAP_EXPERIMENTAL,
 863     .sample_fmts    = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_FLTP,
 864                                                      AV_SAMPLE_FMT_NONE },
 865     .priv_class     = &aacenc_class,
 866 };