git.sesse.net Git - ffmpeg/blob - libavcodec/aacenc.c

   1 /*
   2  * AAC encoder
   3  * Copyright (C) 2008 Konstantin Shishkov
   4  *
   5  * This file is part of FFmpeg.
   6  *
   7  * FFmpeg is free software; you can redistribute it and/or
   8  * modify it under the terms of the GNU Lesser General Public
   9  * License as published by the Free Software Foundation; either
  10  * version 2.1 of the License, or (at your option) any later version.
  11  *
  12  * FFmpeg is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15  * Lesser General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU Lesser General Public
  18  * License along with FFmpeg; if not, write to the Free Software
  19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20  */
  21
  22 /**
  23  * @file
  24  * AAC encoder
  25  */
  26
  27 /***********************************
  28  *              TODOs:
  29  * add sane pulse detection
  30  * add temporal noise shaping
  31  ***********************************/
  32
  33 #include "libavutil/float_dsp.h"
  34 #include "libavutil/opt.h"
  35 #include "avcodec.h"
  36 #include "put_bits.h"
  37 #include "internal.h"
  38 #include "mpeg4audio.h"
  39 #include "kbdwin.h"
  40 #include "sinewin.h"
  41
  42 #include "aac.h"
  43 #include "aactab.h"
  44 #include "aacenc.h"
  45
  46 #include "psymodel.h"
  47
  48 #define AAC_MAX_CHANNELS 6
  49
  50 #define ERROR_IF(cond, ...) \
  51     if (cond) { \
  52         av_log(avctx, AV_LOG_ERROR, __VA_ARGS__); \
  53         return AVERROR(EINVAL); \
  54     }
  55
  56 #define WARN_IF(cond, ...) \
  57     if (cond) { \
  58         av_log(avctx, AV_LOG_WARNING, __VA_ARGS__); \
  59     }
  60
  61 float ff_aac_pow34sf_tab[428];
  62
  63 static const uint8_t swb_size_1024_96[] = {
  64     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8,
  65     12, 12, 12, 12, 12, 16, 16, 24, 28, 36, 44,
  66     64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64
  67 };
  68
  69 static const uint8_t swb_size_1024_64[] = {
  70     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8,
  71     12, 12, 12, 16, 16, 16, 20, 24, 24, 28, 36,
  72     40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40
  73 };
  74
  75 static const uint8_t swb_size_1024_48[] = {
  76     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8,
  77     12, 12, 12, 12, 16, 16, 20, 20, 24, 24, 28, 28,
  78     32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
  79     96
  80 };
  81
  82 static const uint8_t swb_size_1024_32[] = {
  83     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8,
  84     12, 12, 12, 12, 16, 16, 20, 20, 24, 24, 28, 28,
  85     32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32
  86 };
  87
  88 static const uint8_t swb_size_1024_24[] = {
  89     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
  90     12, 12, 12, 12, 16, 16, 16, 20, 20, 24, 24, 28, 28,
  91     32, 36, 36, 40, 44, 48, 52, 52, 64, 64, 64, 64, 64
  92 };
  93
  94 static const uint8_t swb_size_1024_16[] = {
  95     8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
  96     12, 12, 12, 12, 12, 12, 12, 12, 12, 16, 16, 16, 16, 20, 20, 20, 24, 24, 28, 28,
  97     32, 36, 40, 40, 44, 48, 52, 56, 60, 64, 64, 64
  98 };
  99
 100 static const uint8_t swb_size_1024_8[] = {
 101     12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
 102     16, 16, 16, 16, 16, 16, 16, 20, 20, 20, 20, 24, 24, 24, 28, 28,
 103     32, 36, 36, 40, 44, 48, 52, 56, 60, 64, 80
 104 };
 105
 106 static const uint8_t *swb_size_1024[] = {
 107     swb_size_1024_96, swb_size_1024_96, swb_size_1024_64,
 108     swb_size_1024_48, swb_size_1024_48, swb_size_1024_32,
 109     swb_size_1024_24, swb_size_1024_24, swb_size_1024_16,
 110     swb_size_1024_16, swb_size_1024_16, swb_size_1024_8,
 111     swb_size_1024_8
 112 };
 113
 114 static const uint8_t swb_size_128_96[] = {
 115     4, 4, 4, 4, 4, 4, 8, 8, 8, 16, 28, 36
 116 };
 117
 118 static const uint8_t swb_size_128_48[] = {
 119     4, 4, 4, 4, 4, 8, 8, 8, 12, 12, 12, 16, 16, 16
 120 };
 121
 122 static const uint8_t swb_size_128_24[] = {
 123     4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 20
 124 };
 125
 126 static const uint8_t swb_size_128_16[] = {
 127     4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 12, 12, 16, 20, 20
 128 };
 129
 130 static const uint8_t swb_size_128_8[] = {
 131     4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 12, 16, 20, 20
 132 };
 133
 134 static const uint8_t *swb_size_128[] = {
 135     /* the last entry on the following row is swb_size_128_64 but is a
 136        duplicate of swb_size_128_96 */
 137     swb_size_128_96, swb_size_128_96, swb_size_128_96,
 138     swb_size_128_48, swb_size_128_48, swb_size_128_48,
 139     swb_size_128_24, swb_size_128_24, swb_size_128_16,
 140     swb_size_128_16, swb_size_128_16, swb_size_128_8,
 141     swb_size_128_8
 142 };
 143
 144 /** default channel configurations */
 145 static const uint8_t aac_chan_configs[6][5] = {
 146  {1, TYPE_SCE},                               // 1 channel  - single channel element
 147  {1, TYPE_CPE},                               // 2 channels - channel pair
 148  {2, TYPE_SCE, TYPE_CPE},                     // 3 channels - center + stereo
 149  {3, TYPE_SCE, TYPE_CPE, TYPE_SCE},           // 4 channels - front center + stereo + back center
 150  {3, TYPE_SCE, TYPE_CPE, TYPE_CPE},           // 5 channels - front center + stereo + back stereo
 151  {4, TYPE_SCE, TYPE_CPE, TYPE_CPE, TYPE_LFE}, // 6 channels - front center + stereo + back stereo + LFE
 152 };
 153
 154 /**
 155  * Table to remap channels from libavcodec's default order to AAC order.
 156  */
 157 static const uint8_t aac_chan_maps[AAC_MAX_CHANNELS][AAC_MAX_CHANNELS] = {
 158     { 0 },
 159     { 0, 1 },
 160     { 2, 0, 1 },
 161     { 2, 0, 1, 3 },
 162     { 2, 0, 1, 3, 4 },
 163     { 2, 0, 1, 4, 5, 3 },
 164 };
 165
 166 /**
 167  * Make AAC audio config object.
 168  * @see 1.6.2.1 "Syntax - AudioSpecificConfig"
 169  */
 170 static void put_audio_specific_config(AVCodecContext *avctx)
 171 {
 172     PutBitContext pb;
 173     AACEncContext *s = avctx->priv_data;
 174
 175     init_put_bits(&pb, avctx->extradata, avctx->extradata_size);
 176     put_bits(&pb, 5, 2); //object type - AAC-LC
 177     put_bits(&pb, 4, s->samplerate_index); //sample rate index
 178     put_bits(&pb, 4, s->channels);
 179     //GASpecificConfig
 180     put_bits(&pb, 1, 0); //frame length - 1024 samples
 181     put_bits(&pb, 1, 0); //does not depend on core coder
 182     put_bits(&pb, 1, 0); //is not extension
 183
 184     //Explicitly Mark SBR absent
 185     put_bits(&pb, 11, 0x2b7); //sync extension
 186     put_bits(&pb, 5,  AOT_SBR);
 187     put_bits(&pb, 1,  0);
 188     flush_put_bits(&pb);
 189 }
 190
 191 #define WINDOW_FUNC(type) \
 192 static void apply_ ##type ##_window(AVFloatDSPContext *fdsp, \
 193                                     SingleChannelElement *sce, \
 194                                     const float *audio)
 195
 196 WINDOW_FUNC(only_long)
 197 {
 198     const float *lwindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024;
 199     const float *pwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
 200     float *out = sce->ret_buf;
 201
 202     fdsp->vector_fmul        (out,        audio,        lwindow, 1024);
 203     fdsp->vector_fmul_reverse(out + 1024, audio + 1024, pwindow, 1024);
 204 }
 205
 206 WINDOW_FUNC(long_start)
 207 {
 208     const float *lwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
 209     const float *swindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
 210     float *out = sce->ret_buf;
 211
 212     fdsp->vector_fmul(out, audio, lwindow, 1024);
 213     memcpy(out + 1024, audio + 1024, sizeof(out[0]) * 448);
 214     fdsp->vector_fmul_reverse(out + 1024 + 448, audio + 1024 + 448, swindow, 128);
 215     memset(out + 1024 + 576, 0, sizeof(out[0]) * 448);
 216 }
 217
 218 WINDOW_FUNC(long_stop)
 219 {
 220     const float *lwindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024;
 221     const float *swindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
 222     float *out = sce->ret_buf;
 223
 224     memset(out, 0, sizeof(out[0]) * 448);
 225     fdsp->vector_fmul(out + 448, audio + 448, swindow, 128);
 226     memcpy(out + 576, audio + 576, sizeof(out[0]) * 448);
 227     fdsp->vector_fmul_reverse(out + 1024, audio + 1024, lwindow, 1024);
 228 }
 229
 230 WINDOW_FUNC(eight_short)
 231 {
 232     const float *swindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
 233     const float *pwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
 234     const float *in = audio + 448;
 235     float *out = sce->ret_buf;
 236     int w;
 237
 238     for (w = 0; w < 8; w++) {
 239         fdsp->vector_fmul        (out, in, w ? pwindow : swindow, 128);
 240         out += 128;
 241         in  += 128;
 242         fdsp->vector_fmul_reverse(out, in, swindow, 128);
 243         out += 128;
 244     }
 245 }
 246
 247 static void (*const apply_window[4])(AVFloatDSPContext *fdsp,
 248                                      SingleChannelElement *sce,
 249                                      const float *audio) = {
 250     [ONLY_LONG_SEQUENCE]   = apply_only_long_window,
 251     [LONG_START_SEQUENCE]  = apply_long_start_window,
 252     [EIGHT_SHORT_SEQUENCE] = apply_eight_short_window,
 253     [LONG_STOP_SEQUENCE]   = apply_long_stop_window
 254 };
 255
 256 static void apply_window_and_mdct(AACEncContext *s, SingleChannelElement *sce,
 257                                   float *audio)
 258 {
 259     int i;
 260     float *output = sce->ret_buf;
 261
 262     apply_window[sce->ics.window_sequence[0]](s->fdsp, sce, audio);
 263
 264     if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE)
 265         s->mdct1024.mdct_calc(&s->mdct1024, sce->coeffs, output);
 266     else
 267         for (i = 0; i < 1024; i += 128)
 268             s->mdct128.mdct_calc(&s->mdct128, sce->coeffs + i, output + i*2);
 269     memcpy(audio, audio + 1024, sizeof(audio[0]) * 1024);
 270     memcpy(sce->pcoeffs, sce->coeffs, sizeof(sce->pcoeffs));
 271 }
 272
 273 /**
 274  * Encode ics_info element.
 275  * @see Table 4.6 (syntax of ics_info)
 276  */
 277 static void put_ics_info(AACEncContext *s, IndividualChannelStream *info)
 278 {
 279     int w;
 280
 281     put_bits(&s->pb, 1, 0);                // ics_reserved bit
 282     put_bits(&s->pb, 2, info->window_sequence[0]);
 283     put_bits(&s->pb, 1, info->use_kb_window[0]);
 284     if (info->window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
 285         put_bits(&s->pb, 6, info->max_sfb);
 286         put_bits(&s->pb, 1, 0);            // no prediction
 287     } else {
 288         put_bits(&s->pb, 4, info->max_sfb);
 289         for (w = 1; w < 8; w++)
 290             put_bits(&s->pb, 1, !info->group_len[w]);
 291     }
 292 }
 293
 294 /**
 295  * Encode MS data.
 296  * @see 4.6.8.1 "Joint Coding - M/S Stereo"
 297  */
 298 static void encode_ms_info(PutBitContext *pb, ChannelElement *cpe)
 299 {
 300     int i, w;
 301
 302     put_bits(pb, 2, cpe->ms_mode);
 303     if (cpe->ms_mode == 1)
 304         for (w = 0; w < cpe->ch[0].ics.num_windows; w += cpe->ch[0].ics.group_len[w])
 305             for (i = 0; i < cpe->ch[0].ics.max_sfb; i++)
 306                 put_bits(pb, 1, cpe->ms_mask[w*16 + i]);
 307 }
 308
 309 /**
 310  * Produce integer coefficients from scalefactors provided by the model.
 311  */
 312 static void adjust_frame_information(ChannelElement *cpe, int chans)
 313 {
 314     int i, w, w2, g, ch;
 315     int start, maxsfb, cmaxsfb;
 316
 317     for (ch = 0; ch < chans; ch++) {
 318         IndividualChannelStream *ics = &cpe->ch[ch].ics;
 319         start = 0;
 320         maxsfb = 0;
 321         cpe->ch[ch].pulse.num_pulse = 0;
 322         for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
 323             for (w2 = 0; w2 < ics->group_len[w]; w2++) {
 324                 start = (w+w2) * 128;
 325                 for (g = 0; g < ics->num_swb; g++) {
 326                     //apply M/S
 327                     if (cpe->common_window && !ch && cpe->ms_mask[w*16 + g]) {
 328                         for (i = 0; i < ics->swb_sizes[g]; i++) {
 329                             cpe->ch[0].coeffs[start+i] = (cpe->ch[0].pcoeffs[start+i] + cpe->ch[1].pcoeffs[start+i]) * 0.5f;
 330                             cpe->ch[1].coeffs[start+i] = cpe->ch[0].coeffs[start+i] - cpe->ch[1].pcoeffs[start+i];
 331                         }
 332                     }
 333                     start += ics->swb_sizes[g];
 334                 }
 335                 for (cmaxsfb = ics->num_swb; cmaxsfb > 0 && cpe->ch[ch].zeroes[w*16+cmaxsfb-1]; cmaxsfb--)
 336                     ;
 337                 maxsfb = FFMAX(maxsfb, cmaxsfb);
 338             }
 339         }
 340         ics->max_sfb = maxsfb;
 341
 342         //adjust zero bands for window groups
 343         for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
 344             for (g = 0; g < ics->max_sfb; g++) {
 345                 i = 1;
 346                 for (w2 = w; w2 < w + ics->group_len[w]; w2++) {
 347                     if (!cpe->ch[ch].zeroes[w2*16 + g]) {
 348                         i = 0;
 349                         break;
 350                     }
 351                 }
 352                 cpe->ch[ch].zeroes[w*16 + g] = i;
 353             }
 354         }
 355     }
 356
 357     if (chans > 1 && cpe->common_window) {
 358         IndividualChannelStream *ics0 = &cpe->ch[0].ics;
 359         IndividualChannelStream *ics1 = &cpe->ch[1].ics;
 360         int msc = 0;
 361         ics0->max_sfb = FFMAX(ics0->max_sfb, ics1->max_sfb);
 362         ics1->max_sfb = ics0->max_sfb;
 363         for (w = 0; w < ics0->num_windows*16; w += 16)
 364             for (i = 0; i < ics0->max_sfb; i++)
 365                 if (cpe->ms_mask[w+i])
 366                     msc++;
 367         if (msc == 0 || ics0->max_sfb == 0)
 368             cpe->ms_mode = 0;
 369         else
 370             cpe->ms_mode = msc < ics0->max_sfb * ics0->num_windows ? 1 : 2;
 371     }
 372 }
 373
 374 /**
 375  * Encode scalefactor band coding type.
 376  */
 377 static void encode_band_info(AACEncContext *s, SingleChannelElement *sce)
 378 {
 379     int w;
 380
 381     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w])
 382         s->coder->encode_window_bands_info(s, sce, w, sce->ics.group_len[w], s->lambda);
 383 }
 384
 385 /**
 386  * Encode scalefactors.
 387  */
 388 static void encode_scale_factors(AVCodecContext *avctx, AACEncContext *s,
 389                                  SingleChannelElement *sce)
 390 {
 391     int diff, off_sf = sce->sf_idx[0], off_pns = sce->sf_idx[0] - NOISE_OFFSET;
 392     int noise_flag = 1;
 393     int i, w;
 394
 395     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
 396         for (i = 0; i < sce->ics.max_sfb; i++) {
 397             if (!sce->zeroes[w*16 + i]) {
 398                 if (sce->band_type[w*16 + i] == NOISE_BT) {
 399                     diff = sce->sf_idx[w*16 + i] - off_pns;
 400                     off_pns = sce->sf_idx[w*16 + i];
 401                     if (noise_flag-- > 0) {
 402                         put_bits(&s->pb, NOISE_PRE_BITS, diff + NOISE_PRE);
 403                         continue;
 404                     }
 405                 } else {
 406                     diff = sce->sf_idx[w*16 + i] - off_sf;
 407                     off_sf = sce->sf_idx[w*16 + i];
 408                 }
 409                 diff += SCALE_DIFF_ZERO;
 410                 av_assert0(diff >= 0 && diff <= 120);
 411                 put_bits(&s->pb, ff_aac_scalefactor_bits[diff], ff_aac_scalefactor_code[diff]);
 412             }
 413         }
 414     }
 415 }
 416
 417 /**
 418  * Encode pulse data.
 419  */
 420 static void encode_pulses(AACEncContext *s, Pulse *pulse)
 421 {
 422     int i;
 423
 424     put_bits(&s->pb, 1, !!pulse->num_pulse);
 425     if (!pulse->num_pulse)
 426         return;
 427
 428     put_bits(&s->pb, 2, pulse->num_pulse - 1);
 429     put_bits(&s->pb, 6, pulse->start);
 430     for (i = 0; i < pulse->num_pulse; i++) {
 431         put_bits(&s->pb, 5, pulse->pos[i]);
 432         put_bits(&s->pb, 4, pulse->amp[i]);
 433     }
 434 }
 435
 436 /**
 437  * Encode spectral coefficients processed by psychoacoustic model.
 438  */
 439 static void encode_spectral_coeffs(AACEncContext *s, SingleChannelElement *sce)
 440 {
 441     int start, i, w, w2;
 442
 443     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
 444         start = 0;
 445         for (i = 0; i < sce->ics.max_sfb; i++) {
 446             if (sce->zeroes[w*16 + i]) {
 447                 start += sce->ics.swb_sizes[i];
 448                 continue;
 449             }
 450             for (w2 = w; w2 < w + sce->ics.group_len[w]; w2++)
 451                 s->coder->quantize_and_encode_band(s, &s->pb, sce->coeffs + start + w2*128,
 452                                                    sce->ics.swb_sizes[i],
 453                                                    sce->sf_idx[w*16 + i],
 454                                                    sce->band_type[w*16 + i],
 455                                                    s->lambda);
 456             start += sce->ics.swb_sizes[i];
 457         }
 458     }
 459 }
 460
 461 /**
 462  * Encode one channel of audio data.
 463  */
 464 static int encode_individual_channel(AVCodecContext *avctx, AACEncContext *s,
 465                                      SingleChannelElement *sce,
 466                                      int common_window)
 467 {
 468     put_bits(&s->pb, 8, sce->sf_idx[0]);
 469     if (!common_window)
 470         put_ics_info(s, &sce->ics);
 471     encode_band_info(s, sce);
 472     encode_scale_factors(avctx, s, sce);
 473     encode_pulses(s, &sce->pulse);
 474     put_bits(&s->pb, 1, 0); //tns
 475     put_bits(&s->pb, 1, 0); //ssr
 476     encode_spectral_coeffs(s, sce);
 477     return 0;
 478 }
 479
 480 /**
 481  * Write some auxiliary information about the created AAC file.
 482  */
 483 static void put_bitstream_info(AACEncContext *s, const char *name)
 484 {
 485     int i, namelen, padbits;
 486
 487     namelen = strlen(name) + 2;
 488     put_bits(&s->pb, 3, TYPE_FIL);
 489     put_bits(&s->pb, 4, FFMIN(namelen, 15));
 490     if (namelen >= 15)
 491         put_bits(&s->pb, 8, namelen - 14);
 492     put_bits(&s->pb, 4, 0); //extension type - filler
 493     padbits = -put_bits_count(&s->pb) & 7;
 494     avpriv_align_put_bits(&s->pb);
 495     for (i = 0; i < namelen - 2; i++)
 496         put_bits(&s->pb, 8, name[i]);
 497     put_bits(&s->pb, 12 - padbits, 0);
 498 }
 499
 500 /*
 501  * Copy input samples.
 502  * Channels are reordered from libavcodec's default order to AAC order.
 503  */
 504 static void copy_input_samples(AACEncContext *s, const AVFrame *frame)
 505 {
 506     int ch;
 507     int end = 2048 + (frame ? frame->nb_samples : 0);
 508     const uint8_t *channel_map = aac_chan_maps[s->channels - 1];
 509
 510     /* copy and remap input samples */
 511     for (ch = 0; ch < s->channels; ch++) {
 512         /* copy last 1024 samples of previous frame to the start of the current frame */
 513         memcpy(&s->planar_samples[ch][1024], &s->planar_samples[ch][2048], 1024 * sizeof(s->planar_samples[0][0]));
 514
 515         /* copy new samples and zero any remaining samples */
 516         if (frame) {
 517             memcpy(&s->planar_samples[ch][2048],
 518                    frame->extended_data[channel_map[ch]],
 519                    frame->nb_samples * sizeof(s->planar_samples[0][0]));
 520         }
 521         memset(&s->planar_samples[ch][end], 0,
 522                (3072 - end) * sizeof(s->planar_samples[0][0]));
 523     }
 524 }
 525
 526 static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
 527                             const AVFrame *frame, int *got_packet_ptr)
 528 {
 529     AACEncContext *s = avctx->priv_data;
 530     float **samples = s->planar_samples, *samples2, *la, *overlap;
 531     ChannelElement *cpe;
 532     int i, ch, w, g, chans, tag, start_ch, ret, ms_mode = 0;
 533     int chan_el_counter[4];
 534     FFPsyWindowInfo windows[AAC_MAX_CHANNELS];
 535
 536     if (s->last_frame == 2)
 537         return 0;
 538
 539     /* add current frame to queue */
 540     if (frame) {
 541         if ((ret = ff_af_queue_add(&s->afq, frame)) < 0)
 542             return ret;
 543     }
 544
 545     copy_input_samples(s, frame);
 546     if (s->psypp)
 547         ff_psy_preprocess(s->psypp, s->planar_samples, s->channels);
 548
 549     if (!avctx->frame_number)
 550         return 0;
 551
 552     start_ch = 0;
 553     for (i = 0; i < s->chan_map[0]; i++) {
 554         FFPsyWindowInfo* wi = windows + start_ch;
 555         tag      = s->chan_map[i+1];
 556         chans    = tag == TYPE_CPE ? 2 : 1;
 557         cpe      = &s->cpe[i];
 558         for (ch = 0; ch < chans; ch++) {
 559             IndividualChannelStream *ics = &cpe->ch[ch].ics;
 560             int cur_channel = start_ch + ch;
 561             overlap  = &samples[cur_channel][0];
 562             samples2 = overlap + 1024;
 563             la       = samples2 + (448+64);
 564             if (!frame)
 565                 la = NULL;
 566             if (tag == TYPE_LFE) {
 567                 wi[ch].window_type[0] = ONLY_LONG_SEQUENCE;
 568                 wi[ch].window_shape   = 0;
 569                 wi[ch].num_windows    = 1;
 570                 wi[ch].grouping[0]    = 1;
 571
 572                 /* Only the lowest 12 coefficients are used in a LFE channel.
 573                  * The expression below results in only the bottom 8 coefficients
 574                  * being used for 11.025kHz to 16kHz sample rates.
 575                  */
 576                 ics->num_swb = s->samplerate_index >= 8 ? 1 : 3;
 577             } else {
 578                 wi[ch] = s->psy.model->window(&s->psy, samples2, la, cur_channel,
 579                                               ics->window_sequence[0]);
 580             }
 581             ics->window_sequence[1] = ics->window_sequence[0];
 582             ics->window_sequence[0] = wi[ch].window_type[0];
 583             ics->use_kb_window[1]   = ics->use_kb_window[0];
 584             ics->use_kb_window[0]   = wi[ch].window_shape;
 585             ics->num_windows        = wi[ch].num_windows;
 586             ics->swb_sizes          = s->psy.bands    [ics->num_windows == 8];
 587             ics->num_swb            = tag == TYPE_LFE ? ics->num_swb : s->psy.num_bands[ics->num_windows == 8];
 588             for (w = 0; w < ics->num_windows; w++)
 589                 ics->group_len[w] = wi[ch].grouping[w];
 590
 591             apply_window_and_mdct(s, &cpe->ch[ch], overlap);
 592             if (isnan(cpe->ch->coeffs[0])) {
 593                 av_log(avctx, AV_LOG_ERROR, "Input contains NaN\n");
 594                 return AVERROR(EINVAL);
 595             }
 596         }
 597         start_ch += chans;
 598     }
 599     if ((ret = ff_alloc_packet2(avctx, avpkt, 8192 * s->channels)) < 0)
 600         return ret;
 601     do {
 602         int frame_bits;
 603
 604         init_put_bits(&s->pb, avpkt->data, avpkt->size);
 605
 606         if ((avctx->frame_number & 0xFF)==1 && !(avctx->flags & CODEC_FLAG_BITEXACT))
 607             put_bitstream_info(s, LIBAVCODEC_IDENT);
 608         start_ch = 0;
 609         memset(chan_el_counter, 0, sizeof(chan_el_counter));
 610         for (i = 0; i < s->chan_map[0]; i++) {
 611             FFPsyWindowInfo* wi = windows + start_ch;
 612             const float *coeffs[2];
 613             tag      = s->chan_map[i+1];
 614             chans    = tag == TYPE_CPE ? 2 : 1;
 615             cpe      = &s->cpe[i];
 616             put_bits(&s->pb, 3, tag);
 617             put_bits(&s->pb, 4, chan_el_counter[tag]++);
 618             for (ch = 0; ch < chans; ch++)
 619                 coeffs[ch] = cpe->ch[ch].coeffs;
 620             s->psy.model->analyze(&s->psy, start_ch, coeffs, wi);
 621             for (ch = 0; ch < chans; ch++) {
 622                 s->cur_channel = start_ch + ch;
 623                 s->coder->search_for_quantizers(avctx, s, &cpe->ch[ch], s->lambda);
 624             }
 625             cpe->common_window = 0;
 626             if (chans > 1
 627                 && wi[0].window_type[0] == wi[1].window_type[0]
 628                 && wi[0].window_shape   == wi[1].window_shape) {
 629
 630                 cpe->common_window = 1;
 631                 for (w = 0; w < wi[0].num_windows; w++) {
 632                     if (wi[0].grouping[w] != wi[1].grouping[w]) {
 633                         cpe->common_window = 0;
 634                         break;
 635                     }
 636                 }
 637             }
 638             s->cur_channel = start_ch;
 639             if (s->options.stereo_mode && cpe->common_window) {
 640                 if (s->options.stereo_mode > 0) {
 641                     IndividualChannelStream *ics = &cpe->ch[0].ics;
 642                     for (w = 0; w < ics->num_windows; w += ics->group_len[w])
 643                         for (g = 0;  g < ics->num_swb; g++)
 644                             cpe->ms_mask[w*16+g] = 1;
 645                 } else if (s->coder->search_for_ms) {
 646                     s->coder->search_for_ms(s, cpe, s->lambda);
 647                 }
 648             }
 649             adjust_frame_information(cpe, chans);
 650             if (chans == 2) {
 651                 put_bits(&s->pb, 1, cpe->common_window);
 652                 if (cpe->common_window) {
 653                     put_ics_info(s, &cpe->ch[0].ics);
 654                     encode_ms_info(&s->pb, cpe);
 655                     if (cpe->ms_mode) ms_mode = 1;
 656                 }
 657             }
 658             for (ch = 0; ch < chans; ch++) {
 659                 s->cur_channel = start_ch + ch;
 660                 encode_individual_channel(avctx, s, &cpe->ch[ch], cpe->common_window);
 661             }
 662             start_ch += chans;
 663         }
 664
 665         frame_bits = put_bits_count(&s->pb);
 666         if (frame_bits <= 6144 * s->channels - 3) {
 667             s->psy.bitres.bits = frame_bits / s->channels;
 668             break;
 669         }
 670         if (ms_mode) {
 671             for (i = 0; i < s->chan_map[0]; i++) {
 672                 // Must restore coeffs
 673                 chans = tag == TYPE_CPE ? 2 : 1;
 674                 cpe = &s->cpe[i];
 675                 for (ch = 0; ch < chans; ch++)
 676                     memcpy(cpe->ch[ch].coeffs, cpe->ch[ch].pcoeffs, sizeof(cpe->ch[ch].coeffs));
 677             }
 678         }
 679
 680         s->lambda *= avctx->bit_rate * 1024.0f / avctx->sample_rate / frame_bits;
 681
 682     } while (1);
 683
 684     put_bits(&s->pb, 3, TYPE_END);
 685     flush_put_bits(&s->pb);
 686     avctx->frame_bits = put_bits_count(&s->pb);
 687
 688     // rate control stuff
 689     if (!(avctx->flags & CODEC_FLAG_QSCALE)) {
 690         float ratio = avctx->bit_rate * 1024.0f / avctx->sample_rate / avctx->frame_bits;
 691         s->lambda *= ratio;
 692         s->lambda = FFMIN(s->lambda, 65536.f);
 693     }
 694
 695     if (!frame)
 696         s->last_frame++;
 697
 698     ff_af_queue_remove(&s->afq, avctx->frame_size, &avpkt->pts,
 699                        &avpkt->duration);
 700
 701     avpkt->size = put_bits_count(&s->pb) >> 3;
 702     *got_packet_ptr = 1;
 703     return 0;
 704 }
 705
 706 static av_cold int aac_encode_end(AVCodecContext *avctx)
 707 {
 708     AACEncContext *s = avctx->priv_data;
 709
 710     ff_mdct_end(&s->mdct1024);
 711     ff_mdct_end(&s->mdct128);
 712     ff_psy_end(&s->psy);
 713     if (s->psypp)
 714         ff_psy_preprocess_end(s->psypp);
 715     av_freep(&s->buffer.samples);
 716     av_freep(&s->cpe);
 717     av_freep(&s->fdsp);
 718     ff_af_queue_close(&s->afq);
 719     return 0;
 720 }
 721
 722 static av_cold int dsp_init(AVCodecContext *avctx, AACEncContext *s)
 723 {
 724     int ret = 0;
 725
 726     s->fdsp = avpriv_float_dsp_alloc(avctx->flags & CODEC_FLAG_BITEXACT);
 727     if (!s->fdsp)
 728         return AVERROR(ENOMEM);
 729
 730     // window init
 731     ff_kbd_window_init(ff_aac_kbd_long_1024, 4.0, 1024);
 732     ff_kbd_window_init(ff_aac_kbd_short_128, 6.0, 128);
 733     ff_init_ff_sine_windows(10);
 734     ff_init_ff_sine_windows(7);
 735
 736     if ((ret = ff_mdct_init(&s->mdct1024, 11, 0, 32768.0)) < 0)
 737         return ret;
 738     if ((ret = ff_mdct_init(&s->mdct128,   8, 0, 32768.0)) < 0)
 739         return ret;
 740
 741     return 0;
 742 }
 743
 744 static av_cold int alloc_buffers(AVCodecContext *avctx, AACEncContext *s)
 745 {
 746     int ch;
 747     FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->buffer.samples, s->channels, 3 * 1024 * sizeof(s->buffer.samples[0]), alloc_fail);
 748     FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->cpe, s->chan_map[0], sizeof(ChannelElement), alloc_fail);
 749     FF_ALLOCZ_OR_GOTO(avctx, avctx->extradata, 5 + FF_INPUT_BUFFER_PADDING_SIZE, alloc_fail);
 750
 751     for(ch = 0; ch < s->channels; ch++)
 752         s->planar_samples[ch] = s->buffer.samples + 3 * 1024 * ch;
 753
 754     return 0;
 755 alloc_fail:
 756     return AVERROR(ENOMEM);
 757 }
 758
 759 static av_cold int aac_encode_init(AVCodecContext *avctx)
 760 {
 761     AACEncContext *s = avctx->priv_data;
 762     int i, ret = 0;
 763     const uint8_t *sizes[2];
 764     uint8_t grouping[AAC_MAX_CHANNELS];
 765     int lengths[2];
 766
 767     avctx->frame_size = 1024;
 768
 769     for (i = 0; i < 16; i++)
 770         if (avctx->sample_rate == avpriv_mpeg4audio_sample_rates[i])
 771             break;
 772
 773     s->channels = avctx->channels;
 774
 775     ERROR_IF(i == 16
 776                 || i >= (sizeof(swb_size_1024) / sizeof(*swb_size_1024))
 777                 || i >= (sizeof(swb_size_128) / sizeof(*swb_size_128)),
 778              "Unsupported sample rate %d\n", avctx->sample_rate);
 779     ERROR_IF(s->channels > AAC_MAX_CHANNELS,
 780              "Unsupported number of channels: %d\n", s->channels);
 781     ERROR_IF(avctx->profile != FF_PROFILE_UNKNOWN && avctx->profile != FF_PROFILE_AAC_LOW,
 782              "Unsupported profile %d\n", avctx->profile);
 783     WARN_IF(1024.0 * avctx->bit_rate / avctx->sample_rate > 6144 * s->channels,
 784              "Too many bits per frame requested, clamping to max\n");
 785
 786     avctx->bit_rate = (int)FFMIN(
 787         6144 * s->channels / 1024.0 * avctx->sample_rate,
 788         avctx->bit_rate);
 789
 790     s->samplerate_index = i;
 791
 792     s->chan_map = aac_chan_configs[s->channels-1];
 793
 794     if ((ret = dsp_init(avctx, s)) < 0)
 795         goto fail;
 796
 797     if ((ret = alloc_buffers(avctx, s)) < 0)
 798         goto fail;
 799
 800     avctx->extradata_size = 5;
 801     put_audio_specific_config(avctx);
 802
 803     sizes[0]   = swb_size_1024[i];
 804     sizes[1]   = swb_size_128[i];
 805     lengths[0] = ff_aac_num_swb_1024[i];
 806     lengths[1] = ff_aac_num_swb_128[i];
 807     for (i = 0; i < s->chan_map[0]; i++)
 808         grouping[i] = s->chan_map[i + 1] == TYPE_CPE;
 809     if ((ret = ff_psy_init(&s->psy, avctx, 2, sizes, lengths,
 810                            s->chan_map[0], grouping)) < 0)
 811         goto fail;
 812     s->psypp = ff_psy_preprocess_init(avctx);
 813     s->coder = &ff_aac_coders[s->options.aac_coder];
 814
 815     if (HAVE_MIPSDSPR1)
 816         ff_aac_coder_init_mips(s);
 817
 818     s->lambda = avctx->global_quality > 0 ? avctx->global_quality : 120;
 819
 820     ff_aac_tableinit();
 821
 822     for (i = 0; i < 428; i++)
 823         ff_aac_pow34sf_tab[i] = sqrt(ff_aac_pow2sf_tab[i] * sqrt(ff_aac_pow2sf_tab[i]));
 824
 825     avctx->initial_padding = 1024;
 826     ff_af_queue_init(avctx, &s->afq);
 827
 828     return 0;
 829 fail:
 830     aac_encode_end(avctx);
 831     return ret;
 832 }
 833
 834 #define AACENC_FLAGS AV_OPT_FLAG_ENCODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM
 835 static const AVOption aacenc_options[] = {
 836     {"stereo_mode", "Stereo coding method", offsetof(AACEncContext, options.stereo_mode), AV_OPT_TYPE_INT, {.i64 = 0}, -1, 1, AACENC_FLAGS, "stereo_mode"},
 837         {"auto",     "Selected by the Encoder", 0, AV_OPT_TYPE_CONST, {.i64 = -1 }, INT_MIN, INT_MAX, AACENC_FLAGS, "stereo_mode"},
 838         {"ms_off",   "Disable Mid/Side coding", 0, AV_OPT_TYPE_CONST, {.i64 =  0 }, INT_MIN, INT_MAX, AACENC_FLAGS, "stereo_mode"},
 839         {"ms_force", "Force Mid/Side for the whole frame if possible", 0, AV_OPT_TYPE_CONST, {.i64 =  1 }, INT_MIN, INT_MAX, AACENC_FLAGS, "stereo_mode"},
 840     {"aac_coder", "", offsetof(AACEncContext, options.aac_coder), AV_OPT_TYPE_INT, {.i64 = AAC_CODER_TWOLOOP}, 0, AAC_CODER_NB-1, AACENC_FLAGS, "aac_coder"},
 841         {"faac",     "FAAC-inspired method",      0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_FAAC},    INT_MIN, INT_MAX, AACENC_FLAGS, "aac_coder"},
 842         {"anmr",     "ANMR method",               0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_ANMR},    INT_MIN, INT_MAX, AACENC_FLAGS, "aac_coder"},
 843         {"twoloop",  "Two loop searching method", 0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_TWOLOOP}, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_coder"},
 844         {"fast",     "Constant quantizer",        0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_FAST},    INT_MIN, INT_MAX, AACENC_FLAGS, "aac_coder"},
 845     {"aac_pns", "Perceptual Noise Substitution", offsetof(AACEncContext, options.pns), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, AACENC_FLAGS, "aac_pns"},
 846         {"disable",  "Disable PNS", 0, AV_OPT_TYPE_CONST, {.i64 =  0 }, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_pns"},
 847         {"enable",   "Enable PNS (Proof of concept)",  0, AV_OPT_TYPE_CONST, {.i64 =  1 }, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_pns"},
 848     {NULL}
 849 };
 850
 851 static const AVClass aacenc_class = {
 852     "AAC encoder",
 853     av_default_item_name,
 854     aacenc_options,
 855     LIBAVUTIL_VERSION_INT,
 856 };
 857
 858 /* duplicated from avpriv_mpeg4audio_sample_rates to avoid shared build
 859  * failures */
 860 static const int mpeg4audio_sample_rates[16] = {
 861     96000, 88200, 64000, 48000, 44100, 32000,
 862     24000, 22050, 16000, 12000, 11025, 8000, 7350
 863 };
 864
 865 AVCodec ff_aac_encoder = {
 866     .name           = "aac",
 867     .long_name      = NULL_IF_CONFIG_SMALL("AAC (Advanced Audio Coding)"),
 868     .type           = AVMEDIA_TYPE_AUDIO,
 869     .id             = AV_CODEC_ID_AAC,
 870     .priv_data_size = sizeof(AACEncContext),
 871     .init           = aac_encode_init,
 872     .encode2        = aac_encode_frame,
 873     .close          = aac_encode_end,
 874     .supported_samplerates = mpeg4audio_sample_rates,
 875     .capabilities   = CODEC_CAP_SMALL_LAST_FRAME | CODEC_CAP_DELAY |
 876                       CODEC_CAP_EXPERIMENTAL,
 877     .sample_fmts    = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_FLTP,
 878                                                      AV_SAMPLE_FMT_NONE },
 879     .priv_class     = &aacenc_class,
 880 };