#include "fft.h"
#include "fmtconvert.h"
#include "lpc.h"
+#include "kbdwin.h"
+#include "sinewin.h"
#include "aac.h"
#include "aactab.h"
* @return Returns error status. 0 - OK, !0 - error
*/
static av_cold int che_configure(AACContext *ac,
- enum ChannelPosition che_pos[4][MAX_ELEM_ID],
- int type, int id,
- int *channels)
+ enum ChannelPosition che_pos[4][MAX_ELEM_ID],
+ int type, int id, int *channels)
{
if (che_pos[type][id]) {
if (!ac->che[type][id] && !(ac->che[type][id] = av_mallocz(sizeof(ChannelElement))))
return AVERROR(ENOMEM);
- ff_aac_sbr_ctx_init(&ac->che[type][id]->sbr);
+ ff_aac_sbr_ctx_init(ac, &ac->che[type][id]->sbr);
if (type != TYPE_CCE) {
ac->output_data[(*channels)++] = ac->che[type][id]->ch[0].ret;
if (type == TYPE_CPE ||
* @return Returns error status. 0 - OK, !0 - error
*/
static av_cold int output_configure(AACContext *ac,
- enum ChannelPosition che_pos[4][MAX_ELEM_ID],
- enum ChannelPosition new_che_pos[4][MAX_ELEM_ID],
- int channel_config, enum OCStatus oc_type)
+ enum ChannelPosition che_pos[4][MAX_ELEM_ID],
+ enum ChannelPosition new_che_pos[4][MAX_ELEM_ID],
+ int channel_config, enum OCStatus oc_type)
{
AVCodecContext *avctx = ac->avctx;
int i, type, channels = 0, ret;
return ret;
}
- memset(ac->tag_che_map, 0, 4 * MAX_ELEM_ID * sizeof(ac->che[0][0]));
+ memset(ac->tag_che_map, 0, 4 * MAX_ELEM_ID * sizeof(ac->che[0][0]));
avctx->channel_layout = aac_channel_layout[channel_config - 1];
} else {
* @return Returns error status. 0 - OK, !0 - error
*/
static av_cold int set_default_channel_config(AVCodecContext *avctx,
- enum ChannelPosition new_che_pos[4][MAX_ELEM_ID],
- int channel_config)
+ enum ChannelPosition new_che_pos[4][MAX_ELEM_ID],
+ int channel_config)
{
if (channel_config < 1 || channel_config > 7) {
av_log(avctx, AV_LOG_ERROR, "invalid default channel configuration (%d)\n",
GetBitContext gb;
int i;
+ av_dlog(avctx, "extradata size %d\n", avctx->extradata_size);
+ for (i = 0; i < avctx->extradata_size; i++)
+ av_dlog(avctx, "%02x ", avctx->extradata[i]);
+ av_dlog(avctx, "\n");
+
init_get_bits(&gb, data, data_size * 8);
if ((i = ff_mpeg4audio_get_config(m4ac, data, data_size)) < 0)
return -1;
}
+ av_dlog(avctx, "AOT %d chan config %d sampling index %d (%d) SBR %d PS %d\n",
+ m4ac->object_type, m4ac->chan_config, m4ac->sampling_index,
+ m4ac->sample_rate, m4ac->sbr, m4ac->ps);
+
return get_bits_count(&gb);
}
reset_predict_state(&ps[i]);
}
+static int sample_rate_idx (int rate)
+{
+ if (92017 <= rate) return 0;
+ else if (75132 <= rate) return 1;
+ else if (55426 <= rate) return 2;
+ else if (46009 <= rate) return 3;
+ else if (37566 <= rate) return 4;
+ else if (27713 <= rate) return 5;
+ else if (23004 <= rate) return 6;
+ else if (18783 <= rate) return 7;
+ else if (13856 <= rate) return 8;
+ else if (11502 <= rate) return 9;
+ else if (9391 <= rate) return 10;
+ else return 11;
+}
+
static void reset_predictor_group(PredictorState *ps, int group_num)
{
int i;
static av_cold int aac_decode_init(AVCodecContext *avctx)
{
AACContext *ac = avctx->priv_data;
+ float output_scale_factor;
ac->avctx = avctx;
ac->m4ac.sample_rate = avctx->sample_rate;
avctx->extradata,
avctx->extradata_size) < 0)
return -1;
+ } else {
+ int sr, i;
+ enum ChannelPosition new_che_pos[4][MAX_ELEM_ID];
+
+ sr = sample_rate_idx(avctx->sample_rate);
+ ac->m4ac.sampling_index = sr;
+ ac->m4ac.channels = avctx->channels;
+ ac->m4ac.sbr = -1;
+ ac->m4ac.ps = -1;
+
+ for (i = 0; i < FF_ARRAY_ELEMS(ff_mpeg4audio_channels); i++)
+ if (ff_mpeg4audio_channels[i] == avctx->channels)
+ break;
+ if (i == FF_ARRAY_ELEMS(ff_mpeg4audio_channels)) {
+ i = 0;
+ }
+ ac->m4ac.chan_config = i;
+
+ if (ac->m4ac.chan_config) {
+ int ret = set_default_channel_config(avctx, new_che_pos, ac->m4ac.chan_config);
+ if (!ret)
+ output_configure(ac, ac->che_pos, new_che_pos, ac->m4ac.chan_config, OC_GLOBAL_HDR);
+ else if (avctx->error_recognition >= FF_ER_EXPLODE)
+ return AVERROR_INVALIDDATA;
+ }
}
- avctx->sample_fmt = AV_SAMPLE_FMT_S16;
+ if (avctx->request_sample_fmt == AV_SAMPLE_FMT_FLT) {
+ avctx->sample_fmt = AV_SAMPLE_FMT_FLT;
+ output_scale_factor = 1.0 / 32768.0;
+ } else {
+ avctx->sample_fmt = AV_SAMPLE_FMT_S16;
+ output_scale_factor = 1.0;
+ }
AAC_INIT_VLC_STATIC( 0, 304);
AAC_INIT_VLC_STATIC( 1, 270);
ac->random_state = 0x1f2e3d4c;
- // -1024 - Compensate wrong IMDCT method.
- // 60 - Required to scale values to the correct range [-32768,32767]
- // for float to int16 conversion. (1 << (60 / 4)) == 32768
- ac->sf_scale = 1. / -1024.;
- ac->sf_offset = 60;
-
ff_aac_tableinit();
INIT_VLC_STATIC(&vlc_scalefactors,7,FF_ARRAY_ELEMS(ff_aac_scalefactor_code),
ff_aac_scalefactor_code, sizeof(ff_aac_scalefactor_code[0]), sizeof(ff_aac_scalefactor_code[0]),
352);
- ff_mdct_init(&ac->mdct, 11, 1, 1.0);
- ff_mdct_init(&ac->mdct_small, 8, 1, 1.0);
- ff_mdct_init(&ac->mdct_ltp, 11, 0, 1.0);
+ ff_mdct_init(&ac->mdct, 11, 1, output_scale_factor/1024.0);
+ ff_mdct_init(&ac->mdct_small, 8, 1, output_scale_factor/128.0);
+ ff_mdct_init(&ac->mdct_ltp, 11, 0, -2.0/output_scale_factor);
// window initialization
ff_kbd_window_init(ff_aac_kbd_long_1024, 4.0, 1024);
ff_kbd_window_init(ff_aac_kbd_short_128, 6.0, 128);
int sfb;
ltp->lag = get_bits(gb, 11);
- ltp->coef = ltp_coef[get_bits(gb, 3)] * ac->sf_scale;
+ ltp->coef = ltp_coef[get_bits(gb, 3)];
for (sfb = 0; sfb < FFMIN(max_sfb, MAX_LTP_LONG_SFB); sfb++)
ltp->used[sfb] = get_bits1(gb);
}
enum BandType band_type[120],
int band_type_run_end[120])
{
- const int sf_offset = ac->sf_offset + (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE ? 12 : 0);
int g, i, idx = 0;
- int offset[3] = { global_gain, global_gain - 90, 100 };
+ int offset[3] = { global_gain, global_gain - 90, 0 };
+ int clipped_offset;
int noise_flag = 1;
static const char *sf_str[3] = { "Global gain", "Noise gain", "Intensity stereo position" };
for (g = 0; g < ics->num_window_groups; g++) {
} else if ((band_type[idx] == INTENSITY_BT) || (band_type[idx] == INTENSITY_BT2)) {
for (; i < run_end; i++, idx++) {
offset[2] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
- if (offset[2] > 255U) {
- av_log(ac->avctx, AV_LOG_ERROR,
- "%s (%d) out of range.\n", sf_str[2], offset[2]);
- return -1;
+ clipped_offset = av_clip(offset[2], -155, 100);
+ if (offset[2] != clipped_offset) {
+ av_log_ask_for_sample(ac->avctx, "Intensity stereo "
+ "position clipped (%d -> %d).\nIf you heard an "
+ "audible artifact, there may be a bug in the "
+ "decoder. ", offset[2], clipped_offset);
}
- sf[idx] = ff_aac_pow2sf_tab[-offset[2] + 300];
+ sf[idx] = ff_aac_pow2sf_tab[-clipped_offset + POW_SF2_ZERO];
}
} else if (band_type[idx] == NOISE_BT) {
for (; i < run_end; i++, idx++) {
offset[1] += get_bits(gb, 9) - 256;
else
offset[1] += get_vlc2(gb, vlc_scalefactors.table, 7, 3) - 60;
- if (offset[1] > 255U) {
- av_log(ac->avctx, AV_LOG_ERROR,
- "%s (%d) out of range.\n", sf_str[1], offset[1]);
- return -1;
+ clipped_offset = av_clip(offset[1], -100, 155);
+ if (offset[1] != clipped_offset) {
+ av_log_ask_for_sample(ac->avctx, "Noise gain clipped "
+ "(%d -> %d).\nIf you heard an audible "
+ "artifact, there may be a bug in the decoder. ",
+ offset[1], clipped_offset);
}
- sf[idx] = -ff_aac_pow2sf_tab[offset[1] + sf_offset + 100];
+ sf[idx] = -ff_aac_pow2sf_tab[clipped_offset + POW_SF2_ZERO];
}
} else {
for (; i < run_end; i++, idx++) {
"%s (%d) out of range.\n", sf_str[0], offset[0]);
return -1;
}
- sf[idx] = -ff_aac_pow2sf_tab[ offset[0] + sf_offset];
+ sf[idx] = -ff_aac_pow2sf_tab[offset[0] - 100 + POW_SF2_ZERO];
}
}
}
union float754 s = { .f = *scale };
union float754 t;
- t.i = s.i ^ (sign & 1<<31);
+ t.i = s.i ^ (sign & 1U<<31);
*dst++ = v[idx & 3] * t.f;
sign <<= nz & 1; nz >>= 1;
- t.i = s.i ^ (sign & 1<<31);
+ t.i = s.i ^ (sign & 1U<<31);
*dst++ = v[idx>>2 & 3] * t.f;
sign <<= nz & 1; nz >>= 1;
- t.i = s.i ^ (sign & 1<<31);
+ t.i = s.i ^ (sign & 1U<<31);
*dst++ = v[idx>>4 & 3] * t.f;
sign <<= nz & 1; nz >>= 1;
- t.i = s.i ^ (sign & 1<<31);
+ t.i = s.i ^ (sign & 1U<<31);
*dst++ = v[idx>>6 & 3] * t.f;
return dst;
b += 4;
n = (1 << b) + SHOW_UBITS(re, gb, b);
LAST_SKIP_BITS(re, gb, b);
- *icf++ = cbrt_tab[n] | (bits & 1<<31);
+ *icf++ = cbrt_tab[n] | (bits & 1U<<31);
bits <<= 1;
} else {
unsigned v = ((const uint32_t*)vq)[cb_idx & 15];
- *icf++ = (bits & 1<<31) | v;
+ *icf++ = (bits & 1U<<31) | v;
bits <<= !!v;
}
cb_idx >>= 4;
}
static av_always_inline void predict(PredictorState *ps, float *coef,
- float sf_scale, float inv_sf_scale,
- int output_enable)
+ int output_enable)
{
const float a = 0.953125; // 61.0 / 64
const float alpha = 0.90625; // 29.0 / 32
pv = flt16_round(k1 * r0 + k2 * r1);
if (output_enable)
- *coef += pv * sf_scale;
+ *coef += pv;
- e0 = *coef * inv_sf_scale;
+ e0 = *coef;
e1 = e0 - k1 * r0;
ps->cor1 = flt16_trunc(alpha * cor1 + r1 * e1);
static void apply_prediction(AACContext *ac, SingleChannelElement *sce)
{
int sfb, k;
- float sf_scale = ac->sf_scale, inv_sf_scale = 1 / ac->sf_scale;
if (!sce->ics.predictor_initialized) {
reset_all_predictors(sce->predictor_state);
for (sfb = 0; sfb < ff_aac_pred_sfb_max[ac->m4ac.sampling_index]; sfb++) {
for (k = sce->ics.swb_offset[sfb]; k < sce->ics.swb_offset[sfb + 1]; k++) {
predict(&sce->predictor_state[k], &sce->coeffs[k],
- sf_scale, inv_sf_scale,
sce->ics.predictor_present && sce->ics.prediction_used[sfb]);
}
}
} else {
memset(in, 0, 448 * sizeof(float));
ac->dsp.vector_fmul(in + 448, in + 448, swindow_prev, 128);
- memcpy(in + 576, in + 576, 448 * sizeof(float));
}
if (ics->window_sequence[0] != LONG_START_SEQUENCE) {
ac->dsp.vector_fmul_reverse(in + 1024, in + 1024, lwindow, 1024);
} else {
- memcpy(in + 1024, in + 1024, 448 * sizeof(float));
ac->dsp.vector_fmul_reverse(in + 1024 + 448, in + 1024 + 448, swindow, 128);
memset(in + 1024 + 576, 0, 448 * sizeof(float));
}
saved_ltp[i + 512] = ac->buf_mdct[1023 - i] * lwindow[511 - i];
}
- memcpy(sce->ltp_state, &sce->ltp_state[1024], 1024 * sizeof(int16_t));
- ac->fmt_conv.float_to_int16(&(sce->ltp_state[1024]), sce->ret, 1024);
- ac->fmt_conv.float_to_int16(&(sce->ltp_state[2048]), saved_ltp, 1024);
+ memcpy(sce->ltp_state, sce->ltp_state+1024, 1024 * sizeof(*sce->ltp_state));
+ memcpy(sce->ltp_state+1024, sce->ret, 1024 * sizeof(*sce->ltp_state));
+ memcpy(sce->ltp_state+2048, saved_ltp, 1024 * sizeof(*sce->ltp_state));
}
/**
if (output_configure(ac, ac->che_pos, new_che_pos, hdr_info.chan_config, OC_TRIAL_FRAME))
return -7;
} else if (ac->output_configured != OC_LOCKED) {
+ ac->m4ac.chan_config = 0;
ac->output_configured = OC_NONE;
}
if (ac->output_configured != OC_LOCKED) {
ac->m4ac.sbr = -1;
ac->m4ac.ps = -1;
+ ac->m4ac.sample_rate = hdr_info.sample_rate;
+ ac->m4ac.sampling_index = hdr_info.sampling_index;
+ ac->m4ac.object_type = hdr_info.object_type;
}
- ac->m4ac.sample_rate = hdr_info.sample_rate;
- ac->m4ac.sampling_index = hdr_info.sampling_index;
- ac->m4ac.object_type = hdr_info.object_type;
if (!ac->avctx->sample_rate)
ac->avctx->sample_rate = hdr_info.sample_rate;
if (hdr_info.num_aac_frames == 1) {
ChannelElement *che = NULL, *che_prev = NULL;
enum RawDataBlockType elem_type, elem_type_prev = TYPE_END;
int err, elem_id, data_size_tmp;
- int samples = 0, multiplier;
+ int samples = 0, multiplier, audio_found = 0;
if (show_bits(gb, 12) == 0xfff) {
if (parse_adts_frame_header(ac, gb) < 0) {
case TYPE_SCE:
err = decode_ics(ac, &che->ch[0], gb, 0, 0);
+ audio_found = 1;
break;
case TYPE_CPE:
err = decode_cpe(ac, gb, che);
+ audio_found = 1;
break;
case TYPE_CCE:
case TYPE_LFE:
err = decode_ics(ac, &che->ch[0], gb, 0, 0);
+ audio_found = 1;
break;
case TYPE_DSE:
avctx->frame_size = samples;
}
- data_size_tmp = samples * avctx->channels * sizeof(int16_t);
+ data_size_tmp = samples * avctx->channels *
+ av_get_bytes_per_sample(avctx->sample_fmt);
if (*data_size < data_size_tmp) {
av_log(avctx, AV_LOG_ERROR,
"Output buffer too small (%d) or trying to output too many samples (%d) for this frame.\n",
}
*data_size = data_size_tmp;
- if (samples)
- ac->fmt_conv.float_to_int16_interleave(data, (const float **)ac->output_data, samples, avctx->channels);
+ if (samples) {
+ if (avctx->sample_fmt == AV_SAMPLE_FMT_FLT)
+ ac->fmt_conv.float_interleave(data, (const float **)ac->output_data,
+ samples, avctx->channels);
+ else
+ ac->fmt_conv.float_to_int16_interleave(data, (const float **)ac->output_data,
+ samples, avctx->channels);
+ }
- if (ac->output_configured)
+ if (ac->output_configured && audio_found)
ac->output_configured = OC_LOCKED;
return 0;
GetBitContext *gb)
{
AVCodecContext *avctx = latmctx->aac_ctx.avctx;
+ MPEG4AudioConfig m4ac;
int config_start_bit = get_bits_count(gb);
int bits_consumed, esize;
return AVERROR_INVALIDDATA;
} else {
bits_consumed =
- decode_audio_specific_config(&latmctx->aac_ctx, avctx,
- &latmctx->aac_ctx.m4ac,
+ decode_audio_specific_config(NULL, avctx, &m4ac,
gb->buffer + (config_start_bit / 8),
get_bits_left(gb) / 8);
*out_size = 0;
return avpkt->size;
} else {
+ aac_decode_close(avctx);
if ((err = aac_decode_init(avctx)) < 0)
return err;
latmctx->initialized = 1;
AVCodec ff_aac_decoder = {
- "aac",
- AVMEDIA_TYPE_AUDIO,
- CODEC_ID_AAC,
- sizeof(AACContext),
- aac_decode_init,
- NULL,
- aac_decode_close,
- aac_decode_frame,
+ .name = "aac",
+ .type = AVMEDIA_TYPE_AUDIO,
+ .id = CODEC_ID_AAC,
+ .priv_data_size = sizeof(AACContext),
+ .init = aac_decode_init,
+ .close = aac_decode_close,
+ .decode = aac_decode_frame,
.long_name = NULL_IF_CONFIG_SMALL("Advanced Audio Coding"),
.sample_fmts = (const enum AVSampleFormat[]) {
- AV_SAMPLE_FMT_S16,AV_SAMPLE_FMT_NONE
+ AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE
},
+ .capabilities = CODEC_CAP_CHANNEL_CONF,
.channel_layouts = aac_channel_layout,
};
.decode = latm_decode_frame,
.long_name = NULL_IF_CONFIG_SMALL("AAC LATM (Advanced Audio Codec LATM syntax)"),
.sample_fmts = (const enum AVSampleFormat[]) {
- AV_SAMPLE_FMT_S16,AV_SAMPLE_FMT_NONE
+ AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE
},
+ .capabilities = CODEC_CAP_CHANNEL_CONF,
.channel_layouts = aac_channel_layout,
};