3 * Copyright (C) 2008 Konstantin Shishkov
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
27 /***********************************
29 * add sane pulse detection
30 * add temporal noise shaping
31 ***********************************/
33 #include "libavutil/float_dsp.h"
34 #include "libavutil/opt.h"
38 #include "mpeg4audio.h"
48 #define AAC_MAX_CHANNELS 6
50 #define ERROR_IF(cond, ...) \
52 av_log(avctx, AV_LOG_ERROR, __VA_ARGS__); \
53 return AVERROR(EINVAL); \
56 #define WARN_IF(cond, ...) \
58 av_log(avctx, AV_LOG_WARNING, __VA_ARGS__); \
62 static const uint8_t swb_size_1024_96[] = {
63 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8,
64 12, 12, 12, 12, 12, 16, 16, 24, 28, 36, 44,
65 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64
68 static const uint8_t swb_size_1024_64[] = {
69 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8,
70 12, 12, 12, 16, 16, 16, 20, 24, 24, 28, 36,
71 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40
74 static const uint8_t swb_size_1024_48[] = {
75 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8,
76 12, 12, 12, 12, 16, 16, 20, 20, 24, 24, 28, 28,
77 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32,
81 static const uint8_t swb_size_1024_32[] = {
82 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8,
83 12, 12, 12, 12, 16, 16, 20, 20, 24, 24, 28, 28,
84 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32
87 static const uint8_t swb_size_1024_24[] = {
88 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
89 12, 12, 12, 12, 16, 16, 16, 20, 20, 24, 24, 28, 28,
90 32, 36, 36, 40, 44, 48, 52, 52, 64, 64, 64, 64, 64
93 static const uint8_t swb_size_1024_16[] = {
94 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
95 12, 12, 12, 12, 12, 12, 12, 12, 12, 16, 16, 16, 16, 20, 20, 20, 24, 24, 28, 28,
96 32, 36, 40, 40, 44, 48, 52, 56, 60, 64, 64, 64
99 static const uint8_t swb_size_1024_8[] = {
100 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
101 16, 16, 16, 16, 16, 16, 16, 20, 20, 20, 20, 24, 24, 24, 28, 28,
102 32, 36, 36, 40, 44, 48, 52, 56, 60, 64, 80
105 static const uint8_t *swb_size_1024[] = {
106 swb_size_1024_96, swb_size_1024_96, swb_size_1024_64,
107 swb_size_1024_48, swb_size_1024_48, swb_size_1024_32,
108 swb_size_1024_24, swb_size_1024_24, swb_size_1024_16,
109 swb_size_1024_16, swb_size_1024_16, swb_size_1024_8,
113 static const uint8_t swb_size_128_96[] = {
114 4, 4, 4, 4, 4, 4, 8, 8, 8, 16, 28, 36
117 static const uint8_t swb_size_128_48[] = {
118 4, 4, 4, 4, 4, 8, 8, 8, 12, 12, 12, 16, 16, 16
121 static const uint8_t swb_size_128_24[] = {
122 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 20
125 static const uint8_t swb_size_128_16[] = {
126 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 12, 12, 16, 20, 20
129 static const uint8_t swb_size_128_8[] = {
130 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 12, 16, 20, 20
133 static const uint8_t *swb_size_128[] = {
134 /* the last entry on the following row is swb_size_128_64 but is a
135 duplicate of swb_size_128_96 */
136 swb_size_128_96, swb_size_128_96, swb_size_128_96,
137 swb_size_128_48, swb_size_128_48, swb_size_128_48,
138 swb_size_128_24, swb_size_128_24, swb_size_128_16,
139 swb_size_128_16, swb_size_128_16, swb_size_128_8,
143 /** default channel configurations */
144 static const uint8_t aac_chan_configs[6][5] = {
145 {1, TYPE_SCE}, // 1 channel - single channel element
146 {1, TYPE_CPE}, // 2 channels - channel pair
147 {2, TYPE_SCE, TYPE_CPE}, // 3 channels - center + stereo
148 {3, TYPE_SCE, TYPE_CPE, TYPE_SCE}, // 4 channels - front center + stereo + back center
149 {3, TYPE_SCE, TYPE_CPE, TYPE_CPE}, // 5 channels - front center + stereo + back stereo
150 {4, TYPE_SCE, TYPE_CPE, TYPE_CPE, TYPE_LFE}, // 6 channels - front center + stereo + back stereo + LFE
154 * Table to remap channels from libavcodec's default order to AAC order.
156 static const uint8_t aac_chan_maps[AAC_MAX_CHANNELS][AAC_MAX_CHANNELS] = {
162 { 2, 0, 1, 4, 5, 3 },
166 * Make AAC audio config object.
167 * @see 1.6.2.1 "Syntax - AudioSpecificConfig"
169 static void put_audio_specific_config(AVCodecContext *avctx)
172 AACEncContext *s = avctx->priv_data;
174 init_put_bits(&pb, avctx->extradata, avctx->extradata_size);
175 put_bits(&pb, 5, 2); //object type - AAC-LC
176 put_bits(&pb, 4, s->samplerate_index); //sample rate index
177 put_bits(&pb, 4, s->channels);
179 put_bits(&pb, 1, 0); //frame length - 1024 samples
180 put_bits(&pb, 1, 0); //does not depend on core coder
181 put_bits(&pb, 1, 0); //is not extension
183 //Explicitly Mark SBR absent
184 put_bits(&pb, 11, 0x2b7); //sync extension
185 put_bits(&pb, 5, AOT_SBR);
190 #define WINDOW_FUNC(type) \
191 static void apply_ ##type ##_window(AVFloatDSPContext *fdsp, \
192 SingleChannelElement *sce, \
195 WINDOW_FUNC(only_long)
197 const float *lwindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024;
198 const float *pwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
199 float *out = sce->ret_buf;
201 fdsp->vector_fmul (out, audio, lwindow, 1024);
202 fdsp->vector_fmul_reverse(out + 1024, audio + 1024, pwindow, 1024);
205 WINDOW_FUNC(long_start)
207 const float *lwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
208 const float *swindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
209 float *out = sce->ret_buf;
211 fdsp->vector_fmul(out, audio, lwindow, 1024);
212 memcpy(out + 1024, audio + 1024, sizeof(out[0]) * 448);
213 fdsp->vector_fmul_reverse(out + 1024 + 448, audio + 1024 + 448, swindow, 128);
214 memset(out + 1024 + 576, 0, sizeof(out[0]) * 448);
217 WINDOW_FUNC(long_stop)
219 const float *lwindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024;
220 const float *swindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
221 float *out = sce->ret_buf;
223 memset(out, 0, sizeof(out[0]) * 448);
224 fdsp->vector_fmul(out + 448, audio + 448, swindow, 128);
225 memcpy(out + 576, audio + 576, sizeof(out[0]) * 448);
226 fdsp->vector_fmul_reverse(out + 1024, audio + 1024, lwindow, 1024);
229 WINDOW_FUNC(eight_short)
231 const float *swindow = sce->ics.use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
232 const float *pwindow = sce->ics.use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
233 const float *in = audio + 448;
234 float *out = sce->ret_buf;
237 for (w = 0; w < 8; w++) {
238 fdsp->vector_fmul (out, in, w ? pwindow : swindow, 128);
241 fdsp->vector_fmul_reverse(out, in, swindow, 128);
246 static void (*const apply_window[4])(AVFloatDSPContext *fdsp,
247 SingleChannelElement *sce,
248 const float *audio) = {
249 [ONLY_LONG_SEQUENCE] = apply_only_long_window,
250 [LONG_START_SEQUENCE] = apply_long_start_window,
251 [EIGHT_SHORT_SEQUENCE] = apply_eight_short_window,
252 [LONG_STOP_SEQUENCE] = apply_long_stop_window
255 static void apply_window_and_mdct(AACEncContext *s, SingleChannelElement *sce,
259 float *output = sce->ret_buf;
261 apply_window[sce->ics.window_sequence[0]](s->fdsp, sce, audio);
263 if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE)
264 s->mdct1024.mdct_calc(&s->mdct1024, sce->coeffs, output);
266 for (i = 0; i < 1024; i += 128)
267 s->mdct128.mdct_calc(&s->mdct128, sce->coeffs + i, output + i*2);
268 memcpy(audio, audio + 1024, sizeof(audio[0]) * 1024);
269 memcpy(sce->pcoeffs, sce->coeffs, sizeof(sce->pcoeffs));
273 * Encode ics_info element.
274 * @see Table 4.6 (syntax of ics_info)
276 static void put_ics_info(AACEncContext *s, IndividualChannelStream *info)
280 put_bits(&s->pb, 1, 0); // ics_reserved bit
281 put_bits(&s->pb, 2, info->window_sequence[0]);
282 put_bits(&s->pb, 1, info->use_kb_window[0]);
283 if (info->window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
284 put_bits(&s->pb, 6, info->max_sfb);
285 put_bits(&s->pb, 1, 0); // no prediction
287 put_bits(&s->pb, 4, info->max_sfb);
288 for (w = 1; w < 8; w++)
289 put_bits(&s->pb, 1, !info->group_len[w]);
295 * @see 4.6.8.1 "Joint Coding - M/S Stereo"
297 static void encode_ms_info(PutBitContext *pb, ChannelElement *cpe)
301 put_bits(pb, 2, cpe->ms_mode);
302 if (cpe->ms_mode == 1)
303 for (w = 0; w < cpe->ch[0].ics.num_windows; w += cpe->ch[0].ics.group_len[w])
304 for (i = 0; i < cpe->ch[0].ics.max_sfb; i++)
305 put_bits(pb, 1, cpe->ms_mask[w*16 + i]);
309 * Produce integer coefficients from scalefactors provided by the model.
311 static void adjust_frame_information(ChannelElement *cpe, int chans)
315 IndividualChannelStream *ics;
317 if (cpe->common_window) {
318 ics = &cpe->ch[0].ics;
319 for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
320 for (w2 = 0; w2 < ics->group_len[w]; w2++) {
321 int start = (w+w2) * 128;
322 for (g = 0; g < ics->num_swb; g++) {
323 //apply Intensity stereo coeffs transformation
324 if (cpe->is_mask[w*16 + g]) {
325 int p = -1 + 2 * (cpe->ch[1].band_type[w*16+g] - 14);
326 float scale = cpe->ch[0].is_ener[w*16+g];
327 for (i = 0; i < ics->swb_sizes[g]; i++) {
328 cpe->ch[0].coeffs[start+i] = (cpe->ch[0].pcoeffs[start+i] + p*cpe->ch[1].pcoeffs[start+i]) * scale;
329 cpe->ch[1].coeffs[start+i] = 0.0f;
331 } else if (cpe->ms_mask[w*16 + g] &&
332 cpe->ch[0].band_type[w*16 + g] < NOISE_BT &&
333 cpe->ch[1].band_type[w*16 + g] < NOISE_BT) {
334 for (i = 0; i < ics->swb_sizes[g]; i++) {
335 cpe->ch[0].coeffs[start+i] = (cpe->ch[0].pcoeffs[start+i] + cpe->ch[1].pcoeffs[start+i]) * 0.5f;
336 cpe->ch[1].coeffs[start+i] = cpe->ch[0].coeffs[start+i] - cpe->ch[1].pcoeffs[start+i];
339 start += ics->swb_sizes[g];
345 for (ch = 0; ch < chans; ch++) {
346 IndividualChannelStream *ics = &cpe->ch[ch].ics;
348 cpe->ch[ch].pulse.num_pulse = 0;
349 for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
350 for (w2 = 0; w2 < ics->group_len[w]; w2++) {
351 for (cmaxsfb = ics->num_swb; cmaxsfb > 0 && cpe->ch[ch].zeroes[w*16+cmaxsfb-1]; cmaxsfb--)
353 maxsfb = FFMAX(maxsfb, cmaxsfb);
356 ics->max_sfb = maxsfb;
358 //adjust zero bands for window groups
359 for (w = 0; w < ics->num_windows; w += ics->group_len[w]) {
360 for (g = 0; g < ics->max_sfb; g++) {
362 for (w2 = w; w2 < w + ics->group_len[w]; w2++) {
363 if (!cpe->ch[ch].zeroes[w2*16 + g]) {
368 cpe->ch[ch].zeroes[w*16 + g] = i;
373 if (chans > 1 && cpe->common_window) {
374 IndividualChannelStream *ics0 = &cpe->ch[0].ics;
375 IndividualChannelStream *ics1 = &cpe->ch[1].ics;
377 ics0->max_sfb = FFMAX(ics0->max_sfb, ics1->max_sfb);
378 ics1->max_sfb = ics0->max_sfb;
379 for (w = 0; w < ics0->num_windows*16; w += 16)
380 for (i = 0; i < ics0->max_sfb; i++)
381 if (cpe->ms_mask[w+i])
383 if (msc == 0 || ics0->max_sfb == 0)
386 cpe->ms_mode = msc < ics0->max_sfb * ics0->num_windows ? 1 : 2;
391 * Encode scalefactor band coding type.
393 static void encode_band_info(AACEncContext *s, SingleChannelElement *sce)
397 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w])
398 s->coder->encode_window_bands_info(s, sce, w, sce->ics.group_len[w], s->lambda);
402 * Encode scalefactors.
404 static void encode_scale_factors(AVCodecContext *avctx, AACEncContext *s,
405 SingleChannelElement *sce)
407 int diff, off_sf = sce->sf_idx[0], off_pns = sce->sf_idx[0] - NOISE_OFFSET;
408 int off_is = 0, noise_flag = 1;
411 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
412 for (i = 0; i < sce->ics.max_sfb; i++) {
413 if (!sce->zeroes[w*16 + i]) {
414 if (sce->band_type[w*16 + i] == NOISE_BT) {
415 diff = sce->sf_idx[w*16 + i] - off_pns;
416 off_pns = sce->sf_idx[w*16 + i];
417 if (noise_flag-- > 0) {
418 put_bits(&s->pb, NOISE_PRE_BITS, diff + NOISE_PRE);
421 } else if (sce->band_type[w*16 + i] == INTENSITY_BT ||
422 sce->band_type[w*16 + i] == INTENSITY_BT2) {
423 diff = sce->sf_idx[w*16 + i] - off_is;
424 off_is = sce->sf_idx[w*16 + i];
426 diff = sce->sf_idx[w*16 + i] - off_sf;
427 off_sf = sce->sf_idx[w*16 + i];
429 diff += SCALE_DIFF_ZERO;
430 av_assert0(diff >= 0 && diff <= 120);
431 put_bits(&s->pb, ff_aac_scalefactor_bits[diff], ff_aac_scalefactor_code[diff]);
440 static void encode_pulses(AACEncContext *s, Pulse *pulse)
444 put_bits(&s->pb, 1, !!pulse->num_pulse);
445 if (!pulse->num_pulse)
448 put_bits(&s->pb, 2, pulse->num_pulse - 1);
449 put_bits(&s->pb, 6, pulse->start);
450 for (i = 0; i < pulse->num_pulse; i++) {
451 put_bits(&s->pb, 5, pulse->pos[i]);
452 put_bits(&s->pb, 4, pulse->amp[i]);
457 * Encode spectral coefficients processed by psychoacoustic model.
459 static void encode_spectral_coeffs(AACEncContext *s, SingleChannelElement *sce)
463 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
465 for (i = 0; i < sce->ics.max_sfb; i++) {
466 if (sce->zeroes[w*16 + i]) {
467 start += sce->ics.swb_sizes[i];
470 for (w2 = w; w2 < w + sce->ics.group_len[w]; w2++)
471 s->coder->quantize_and_encode_band(s, &s->pb, sce->coeffs + start + w2*128,
472 sce->ics.swb_sizes[i],
473 sce->sf_idx[w*16 + i],
474 sce->band_type[w*16 + i],
475 s->lambda, sce->ics.window_clipping[w]);
476 start += sce->ics.swb_sizes[i];
482 * Downscale spectral coefficients for near-clipping windows to avoid artifacts
484 static void avoid_clipping(AACEncContext *s, SingleChannelElement *sce)
488 if (sce->ics.clip_avoidance_factor < 1.0f) {
489 for (w = 0; w < sce->ics.num_windows; w++) {
491 for (i = 0; i < sce->ics.max_sfb; i++) {
492 float *swb_coeffs = sce->coeffs + start + w*128;
493 for (j = 0; j < sce->ics.swb_sizes[i]; j++)
494 swb_coeffs[j] *= sce->ics.clip_avoidance_factor;
495 start += sce->ics.swb_sizes[i];
502 * Encode one channel of audio data.
504 static int encode_individual_channel(AVCodecContext *avctx, AACEncContext *s,
505 SingleChannelElement *sce,
508 put_bits(&s->pb, 8, sce->sf_idx[0]);
510 put_ics_info(s, &sce->ics);
511 encode_band_info(s, sce);
512 encode_scale_factors(avctx, s, sce);
513 encode_pulses(s, &sce->pulse);
514 put_bits(&s->pb, 1, 0); //tns
515 put_bits(&s->pb, 1, 0); //ssr
516 encode_spectral_coeffs(s, sce);
521 * Write some auxiliary information about the created AAC file.
523 static void put_bitstream_info(AACEncContext *s, const char *name)
525 int i, namelen, padbits;
527 namelen = strlen(name) + 2;
528 put_bits(&s->pb, 3, TYPE_FIL);
529 put_bits(&s->pb, 4, FFMIN(namelen, 15));
531 put_bits(&s->pb, 8, namelen - 14);
532 put_bits(&s->pb, 4, 0); //extension type - filler
533 padbits = -put_bits_count(&s->pb) & 7;
534 avpriv_align_put_bits(&s->pb);
535 for (i = 0; i < namelen - 2; i++)
536 put_bits(&s->pb, 8, name[i]);
537 put_bits(&s->pb, 12 - padbits, 0);
541 * Copy input samples.
542 * Channels are reordered from libavcodec's default order to AAC order.
544 static void copy_input_samples(AACEncContext *s, const AVFrame *frame)
547 int end = 2048 + (frame ? frame->nb_samples : 0);
548 const uint8_t *channel_map = aac_chan_maps[s->channels - 1];
550 /* copy and remap input samples */
551 for (ch = 0; ch < s->channels; ch++) {
552 /* copy last 1024 samples of previous frame to the start of the current frame */
553 memcpy(&s->planar_samples[ch][1024], &s->planar_samples[ch][2048], 1024 * sizeof(s->planar_samples[0][0]));
555 /* copy new samples and zero any remaining samples */
557 memcpy(&s->planar_samples[ch][2048],
558 frame->extended_data[channel_map[ch]],
559 frame->nb_samples * sizeof(s->planar_samples[0][0]));
561 memset(&s->planar_samples[ch][end], 0,
562 (3072 - end) * sizeof(s->planar_samples[0][0]));
566 static int aac_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
567 const AVFrame *frame, int *got_packet_ptr)
569 AACEncContext *s = avctx->priv_data;
570 float **samples = s->planar_samples, *samples2, *la, *overlap;
572 int i, ch, w, g, chans, tag, start_ch, ret, ms_mode = 0, is_mode = 0;
573 int chan_el_counter[4];
574 FFPsyWindowInfo windows[AAC_MAX_CHANNELS];
576 if (s->last_frame == 2)
579 /* add current frame to queue */
581 if ((ret = ff_af_queue_add(&s->afq, frame)) < 0)
585 copy_input_samples(s, frame);
587 ff_psy_preprocess(s->psypp, s->planar_samples, s->channels);
589 if (!avctx->frame_number)
593 for (i = 0; i < s->chan_map[0]; i++) {
594 FFPsyWindowInfo* wi = windows + start_ch;
595 tag = s->chan_map[i+1];
596 chans = tag == TYPE_CPE ? 2 : 1;
598 for (ch = 0; ch < chans; ch++) {
599 IndividualChannelStream *ics = &cpe->ch[ch].ics;
600 int cur_channel = start_ch + ch;
601 float clip_avoidance_factor;
602 overlap = &samples[cur_channel][0];
603 samples2 = overlap + 1024;
604 la = samples2 + (448+64);
607 if (tag == TYPE_LFE) {
608 wi[ch].window_type[0] = ONLY_LONG_SEQUENCE;
609 wi[ch].window_shape = 0;
610 wi[ch].num_windows = 1;
611 wi[ch].grouping[0] = 1;
613 /* Only the lowest 12 coefficients are used in a LFE channel.
614 * The expression below results in only the bottom 8 coefficients
615 * being used for 11.025kHz to 16kHz sample rates.
617 ics->num_swb = s->samplerate_index >= 8 ? 1 : 3;
619 wi[ch] = s->psy.model->window(&s->psy, samples2, la, cur_channel,
620 ics->window_sequence[0]);
622 ics->window_sequence[1] = ics->window_sequence[0];
623 ics->window_sequence[0] = wi[ch].window_type[0];
624 ics->use_kb_window[1] = ics->use_kb_window[0];
625 ics->use_kb_window[0] = wi[ch].window_shape;
626 ics->num_windows = wi[ch].num_windows;
627 ics->swb_sizes = s->psy.bands [ics->num_windows == 8];
628 ics->num_swb = tag == TYPE_LFE ? ics->num_swb : s->psy.num_bands[ics->num_windows == 8];
629 clip_avoidance_factor = 0.0f;
630 for (w = 0; w < ics->num_windows; w++)
631 ics->group_len[w] = wi[ch].grouping[w];
632 for (w = 0; w < ics->num_windows; w++) {
633 if (wi[ch].clipping[w] > CLIP_AVOIDANCE_FACTOR) {
634 ics->window_clipping[w] = 1;
635 clip_avoidance_factor = FFMAX(clip_avoidance_factor, wi[ch].clipping[w]);
637 ics->window_clipping[w] = 0;
640 if (clip_avoidance_factor > CLIP_AVOIDANCE_FACTOR) {
641 ics->clip_avoidance_factor = CLIP_AVOIDANCE_FACTOR / clip_avoidance_factor;
643 ics->clip_avoidance_factor = 1.0f;
646 apply_window_and_mdct(s, &cpe->ch[ch], overlap);
647 if (isnan(cpe->ch->coeffs[0])) {
648 av_log(avctx, AV_LOG_ERROR, "Input contains NaN\n");
649 return AVERROR(EINVAL);
651 avoid_clipping(s, &cpe->ch[ch]);
655 if ((ret = ff_alloc_packet2(avctx, avpkt, 8192 * s->channels, 0)) < 0)
660 init_put_bits(&s->pb, avpkt->data, avpkt->size);
662 if ((avctx->frame_number & 0xFF)==1 && !(avctx->flags & AV_CODEC_FLAG_BITEXACT))
663 put_bitstream_info(s, LIBAVCODEC_IDENT);
665 memset(chan_el_counter, 0, sizeof(chan_el_counter));
666 for (i = 0; i < s->chan_map[0]; i++) {
667 FFPsyWindowInfo* wi = windows + start_ch;
668 const float *coeffs[2];
669 tag = s->chan_map[i+1];
670 chans = tag == TYPE_CPE ? 2 : 1;
672 memset(cpe->is_mask, 0, sizeof(cpe->is_mask));
673 memset(cpe->ms_mask, 0, sizeof(cpe->ms_mask));
674 put_bits(&s->pb, 3, tag);
675 put_bits(&s->pb, 4, chan_el_counter[tag]++);
676 for (ch = 0; ch < chans; ch++)
677 coeffs[ch] = cpe->ch[ch].coeffs;
678 s->psy.model->analyze(&s->psy, start_ch, coeffs, wi);
679 for (ch = 0; ch < chans; ch++) {
680 s->cur_channel = start_ch + ch;
681 s->coder->search_for_quantizers(avctx, s, &cpe->ch[ch], s->lambda);
683 cpe->common_window = 0;
685 && wi[0].window_type[0] == wi[1].window_type[0]
686 && wi[0].window_shape == wi[1].window_shape) {
688 cpe->common_window = 1;
689 for (w = 0; w < wi[0].num_windows; w++) {
690 if (wi[0].grouping[w] != wi[1].grouping[w]) {
691 cpe->common_window = 0;
696 if (s->options.pns && s->coder->search_for_pns) {
697 for (ch = 0; ch < chans; ch++) {
698 s->cur_channel = start_ch + ch;
699 s->coder->search_for_pns(s, avctx, &cpe->ch[ch]);
702 s->cur_channel = start_ch;
703 if (s->options.stereo_mode && cpe->common_window) {
704 if (s->options.stereo_mode > 0) {
705 IndividualChannelStream *ics = &cpe->ch[0].ics;
706 for (w = 0; w < ics->num_windows; w += ics->group_len[w])
707 for (g = 0; g < ics->num_swb; g++)
708 cpe->ms_mask[w*16+g] = 1;
709 } else if (s->coder->search_for_ms) {
710 s->coder->search_for_ms(s, cpe);
713 if (chans > 1 && s->options.intensity_stereo && s->coder->search_for_is) {
714 s->coder->search_for_is(s, avctx, cpe);
715 if (cpe->is_mode) is_mode = 1;
717 if (s->coder->set_special_band_scalefactors)
718 for (ch = 0; ch < chans; ch++)
719 s->coder->set_special_band_scalefactors(s, &cpe->ch[ch]);
720 adjust_frame_information(cpe, chans);
722 put_bits(&s->pb, 1, cpe->common_window);
723 if (cpe->common_window) {
724 put_ics_info(s, &cpe->ch[0].ics);
725 encode_ms_info(&s->pb, cpe);
726 if (cpe->ms_mode) ms_mode = 1;
729 for (ch = 0; ch < chans; ch++) {
730 s->cur_channel = start_ch + ch;
731 encode_individual_channel(avctx, s, &cpe->ch[ch], cpe->common_window);
736 frame_bits = put_bits_count(&s->pb);
737 if (frame_bits <= 6144 * s->channels - 3) {
738 s->psy.bitres.bits = frame_bits / s->channels;
741 if (is_mode || ms_mode) {
742 for (i = 0; i < s->chan_map[0]; i++) {
743 // Must restore coeffs
744 chans = tag == TYPE_CPE ? 2 : 1;
746 for (ch = 0; ch < chans; ch++)
747 memcpy(cpe->ch[ch].coeffs, cpe->ch[ch].pcoeffs, sizeof(cpe->ch[ch].coeffs));
751 s->lambda *= avctx->bit_rate * 1024.0f / avctx->sample_rate / frame_bits;
755 put_bits(&s->pb, 3, TYPE_END);
756 flush_put_bits(&s->pb);
757 avctx->frame_bits = put_bits_count(&s->pb);
759 // rate control stuff
760 if (!(avctx->flags & AV_CODEC_FLAG_QSCALE)) {
761 float ratio = avctx->bit_rate * 1024.0f / avctx->sample_rate / avctx->frame_bits;
763 s->lambda = FFMIN(s->lambda, 65536.f);
769 ff_af_queue_remove(&s->afq, avctx->frame_size, &avpkt->pts,
772 avpkt->size = put_bits_count(&s->pb) >> 3;
777 static av_cold int aac_encode_end(AVCodecContext *avctx)
779 AACEncContext *s = avctx->priv_data;
781 ff_mdct_end(&s->mdct1024);
782 ff_mdct_end(&s->mdct128);
785 ff_psy_preprocess_end(s->psypp);
786 av_freep(&s->buffer.samples);
789 ff_af_queue_close(&s->afq);
793 static av_cold int dsp_init(AVCodecContext *avctx, AACEncContext *s)
797 s->fdsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT);
799 return AVERROR(ENOMEM);
802 ff_kbd_window_init(ff_aac_kbd_long_1024, 4.0, 1024);
803 ff_kbd_window_init(ff_aac_kbd_short_128, 6.0, 128);
804 ff_init_ff_sine_windows(10);
805 ff_init_ff_sine_windows(7);
807 if ((ret = ff_mdct_init(&s->mdct1024, 11, 0, 32768.0)) < 0)
809 if ((ret = ff_mdct_init(&s->mdct128, 8, 0, 32768.0)) < 0)
815 static av_cold int alloc_buffers(AVCodecContext *avctx, AACEncContext *s)
818 FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->buffer.samples, s->channels, 3 * 1024 * sizeof(s->buffer.samples[0]), alloc_fail);
819 FF_ALLOCZ_ARRAY_OR_GOTO(avctx, s->cpe, s->chan_map[0], sizeof(ChannelElement), alloc_fail);
820 FF_ALLOCZ_OR_GOTO(avctx, avctx->extradata, 5 + AV_INPUT_BUFFER_PADDING_SIZE, alloc_fail);
822 for(ch = 0; ch < s->channels; ch++)
823 s->planar_samples[ch] = s->buffer.samples + 3 * 1024 * ch;
827 return AVERROR(ENOMEM);
830 static av_cold int aac_encode_init(AVCodecContext *avctx)
832 AACEncContext *s = avctx->priv_data;
834 const uint8_t *sizes[2];
835 uint8_t grouping[AAC_MAX_CHANNELS];
838 avctx->frame_size = 1024;
840 for (i = 0; i < 16; i++)
841 if (avctx->sample_rate == avpriv_mpeg4audio_sample_rates[i])
844 s->channels = avctx->channels;
847 || i >= (sizeof(swb_size_1024) / sizeof(*swb_size_1024))
848 || i >= (sizeof(swb_size_128) / sizeof(*swb_size_128)),
849 "Unsupported sample rate %d\n", avctx->sample_rate);
850 ERROR_IF(s->channels > AAC_MAX_CHANNELS,
851 "Unsupported number of channels: %d\n", s->channels);
852 ERROR_IF(avctx->profile != FF_PROFILE_UNKNOWN && avctx->profile != FF_PROFILE_AAC_LOW,
853 "Unsupported profile %d\n", avctx->profile);
854 WARN_IF(1024.0 * avctx->bit_rate / avctx->sample_rate > 6144 * s->channels,
855 "Too many bits per frame requested, clamping to max\n");
857 avctx->bit_rate = (int)FFMIN(
858 6144 * s->channels / 1024.0 * avctx->sample_rate,
861 s->samplerate_index = i;
863 s->chan_map = aac_chan_configs[s->channels-1];
865 if ((ret = dsp_init(avctx, s)) < 0)
868 if ((ret = alloc_buffers(avctx, s)) < 0)
871 avctx->extradata_size = 5;
872 put_audio_specific_config(avctx);
874 sizes[0] = swb_size_1024[i];
875 sizes[1] = swb_size_128[i];
876 lengths[0] = ff_aac_num_swb_1024[i];
877 lengths[1] = ff_aac_num_swb_128[i];
878 for (i = 0; i < s->chan_map[0]; i++)
879 grouping[i] = s->chan_map[i + 1] == TYPE_CPE;
880 if ((ret = ff_psy_init(&s->psy, avctx, 2, sizes, lengths,
881 s->chan_map[0], grouping)) < 0)
883 s->psypp = ff_psy_preprocess_init(avctx);
884 s->coder = &ff_aac_coders[s->options.aac_coder];
887 ff_aac_coder_init_mips(s);
889 s->lambda = avctx->global_quality > 0 ? avctx->global_quality : 120;
893 avctx->initial_padding = 1024;
894 ff_af_queue_init(avctx, &s->afq);
898 aac_encode_end(avctx);
902 #define AACENC_FLAGS AV_OPT_FLAG_ENCODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM
903 static const AVOption aacenc_options[] = {
904 {"stereo_mode", "Stereo coding method", offsetof(AACEncContext, options.stereo_mode), AV_OPT_TYPE_INT, {.i64 = 0}, -1, 1, AACENC_FLAGS, "stereo_mode"},
905 {"auto", "Selected by the Encoder", 0, AV_OPT_TYPE_CONST, {.i64 = -1 }, INT_MIN, INT_MAX, AACENC_FLAGS, "stereo_mode"},
906 {"ms_off", "Disable Mid/Side coding", 0, AV_OPT_TYPE_CONST, {.i64 = 0 }, INT_MIN, INT_MAX, AACENC_FLAGS, "stereo_mode"},
907 {"ms_force", "Force Mid/Side for the whole frame if possible", 0, AV_OPT_TYPE_CONST, {.i64 = 1 }, INT_MIN, INT_MAX, AACENC_FLAGS, "stereo_mode"},
908 {"aac_coder", "Coding algorithm", offsetof(AACEncContext, options.aac_coder), AV_OPT_TYPE_INT, {.i64 = AAC_CODER_TWOLOOP}, 0, AAC_CODER_NB-1, AACENC_FLAGS, "aac_coder"},
909 {"faac", "FAAC-inspired method", 0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_FAAC}, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_coder"},
910 {"anmr", "ANMR method", 0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_ANMR}, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_coder"},
911 {"twoloop", "Two loop searching method", 0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_TWOLOOP}, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_coder"},
912 {"fast", "Constant quantizer", 0, AV_OPT_TYPE_CONST, {.i64 = AAC_CODER_FAST}, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_coder"},
913 {"aac_pns", "Perceptual Noise Substitution", offsetof(AACEncContext, options.pns), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, AACENC_FLAGS, "aac_pns"},
914 {"disable", "Disable perceptual noise substitution", 0, AV_OPT_TYPE_CONST, {.i64 = 0 }, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_pns"},
915 {"enable", "Enable perceptual noise substitution", 0, AV_OPT_TYPE_CONST, {.i64 = 1 }, INT_MIN, INT_MAX, AACENC_FLAGS, "aac_pns"},
916 {"aac_is", "Intensity stereo coding", offsetof(AACEncContext, options.intensity_stereo), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 1, AACENC_FLAGS, "intensity_stereo"},
917 {"disable", "Disable intensity stereo coding", 0, AV_OPT_TYPE_CONST, {.i64 = 0}, INT_MIN, INT_MAX, AACENC_FLAGS, "intensity_stereo"},
918 {"enable", "Enable intensity stereo coding", 0, AV_OPT_TYPE_CONST, {.i64 = 1}, INT_MIN, INT_MAX, AACENC_FLAGS, "intensity_stereo"},
922 static const AVClass aacenc_class = {
924 av_default_item_name,
926 LIBAVUTIL_VERSION_INT,
929 /* duplicated from avpriv_mpeg4audio_sample_rates to avoid shared build
931 static const int mpeg4audio_sample_rates[16] = {
932 96000, 88200, 64000, 48000, 44100, 32000,
933 24000, 22050, 16000, 12000, 11025, 8000, 7350
936 AVCodec ff_aac_encoder = {
938 .long_name = NULL_IF_CONFIG_SMALL("AAC (Advanced Audio Coding)"),
939 .type = AVMEDIA_TYPE_AUDIO,
940 .id = AV_CODEC_ID_AAC,
941 .priv_data_size = sizeof(AACEncContext),
942 .init = aac_encode_init,
943 .encode2 = aac_encode_frame,
944 .close = aac_encode_end,
945 .supported_samplerates = mpeg4audio_sample_rates,
946 .capabilities = AV_CODEC_CAP_SMALL_LAST_FRAME | AV_CODEC_CAP_DELAY |
947 AV_CODEC_CAP_EXPERIMENTAL,
948 .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_FLTP,
949 AV_SAMPLE_FMT_NONE },
950 .priv_class = &aacenc_class,