X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=libavcodec%2Faacdec.c;h=77ee59f0abc783cdf2b2ac08ef396d50dce7ae41;hb=7a1723086a9cc72c2f3e1e682b002ed3d81990d3;hp=216692829456287763bc4895e877abae3698c28b;hpb=34465bbcb470f63a3e545ecbec0bd5619a27509b;p=ffmpeg diff --git a/libavcodec/aacdec.c b/libavcodec/aacdec.c index 21669282945..77ee59f0abc 100644 --- a/libavcodec/aacdec.c +++ b/libavcodec/aacdec.c @@ -98,6 +98,7 @@ #include "aacsbr.h" #include "mpeg4audio.h" #include "aacadtsdec.h" +#include "libavutil/intfloat.h" #include #include @@ -108,11 +109,6 @@ # include "arm/aac.h" #endif -union float754 { - float f; - uint32_t i; -}; - static VLC vlc_scalefactors; static VLC vlc_spectral[11]; @@ -167,6 +163,19 @@ static ChannelElement *get_che(AACContext *ac, int type, int elem_id) } } +static int count_channels(enum ChannelPosition che_pos[4][MAX_ELEM_ID]) +{ + int i, type, sum = 0; + for (i = 0; i < MAX_ELEM_ID; i++) { + for (type = 0; type < 4; type++) { + sum += (1 + (type == TYPE_CPE)) * + (che_pos[type][i] != AAC_CHANNEL_OFF && + che_pos[type][i] != AAC_CHANNEL_CC); + } + } + return sum; +} + /** * Check for the channel element in the current channel position configuration. * If it exists, make sure the appropriate element is allocated and map the @@ -262,6 +271,23 @@ static av_cold int output_configure(AACContext *ac, return 0; } +static void flush(AVCodecContext *avctx) +{ + AACContext *ac= avctx->priv_data; + int type, i, j; + + for (type = 3; type >= 0; type--) { + for (i = 0; i < MAX_ELEM_ID; i++) { + ChannelElement *che = ac->che[type][i]; + if (che) { + for (j = 0; j <= 1; j++) { + memset(che->ch[j].saved, 0, sizeof(che->ch[j].saved)); + } + } + } + } +} + /** * Decode an array of 4 bit element IDs, optionally interleaved with a stereo/mono switching bit. * @@ -424,6 +450,12 @@ static int decode_ga_specific_config(AACContext *ac, AVCodecContext *avctx, if ((ret = set_default_channel_config(avctx, new_che_pos, channel_config))) return ret; } + + if (count_channels(new_che_pos) > 1) { + m4ac->ps = 0; + } else if (m4ac->sbr == 1 && m4ac->ps == -1) + m4ac->ps = 1; + if (ac && (ret = output_configure(ac, ac->che_pos, new_che_pos, channel_config, OC_GLOBAL_HDR))) return ret; @@ -454,15 +486,17 @@ static int decode_ga_specific_config(AACContext *ac, AVCodecContext *avctx, * @param ac pointer to AACContext, may be null * @param avctx pointer to AVCCodecContext, used for logging * @param m4ac pointer to MPEG4AudioConfig, used for parsing - * @param data pointer to AVCodecContext extradata - * @param data_size size of AVCCodecContext extradata + * @param data pointer to buffer holding an audio specific config + * @param bit_size size of audio specific config or data in bits + * @param sync_extension look for an appended sync extension * * @return Returns error status or number of consumed bits. <0 - error */ static int decode_audio_specific_config(AACContext *ac, AVCodecContext *avctx, MPEG4AudioConfig *m4ac, - const uint8_t *data, int data_size, int asclen) + const uint8_t *data, int bit_size, + int sync_extension) { GetBitContext gb; int i; @@ -472,16 +506,14 @@ static int decode_audio_specific_config(AACContext *ac, av_dlog(avctx, "%02x ", avctx->extradata[i]); av_dlog(avctx, "\n"); - init_get_bits(&gb, data, data_size * 8); + init_get_bits(&gb, data, bit_size); - if ((i = avpriv_mpeg4audio_get_config(m4ac, data, asclen/8)) < 0) + if ((i = avpriv_mpeg4audio_get_config(m4ac, data, bit_size, sync_extension)) < 0) return -1; if (m4ac->sampling_index > 12) { av_log(avctx, AV_LOG_ERROR, "invalid sampling rate index %d\n", m4ac->sampling_index); return -1; } - if (m4ac->sbr == 1 && m4ac->ps == -1) - m4ac->ps = 1; skip_bits_long(&gb, i); @@ -574,7 +606,7 @@ static av_cold int aac_decode_init(AVCodecContext *avctx) if (avctx->extradata_size > 0) { if (decode_audio_specific_config(ac, ac->avctx, &ac->m4ac, avctx->extradata, - avctx->extradata_size, 8*avctx->extradata_size) < 0) + avctx->extradata_size*8, 1) < 0) return -1; } else { int sr, i; @@ -648,6 +680,9 @@ static av_cold int aac_decode_init(AVCodecContext *avctx) cbrt_tableinit(); + avcodec_get_frame_defaults(&ac->frame); + avctx->coded_frame = &ac->frame; + return 0; } @@ -704,16 +739,13 @@ static void decode_ltp(AACContext *ac, LongTermPrediction *ltp, /** * Decode Individual Channel Stream info; reference: table 4.6. - * - * @param common_window Channels have independent [0], or shared [1], Individual Channel Stream information. */ static int decode_ics_info(AACContext *ac, IndividualChannelStream *ics, - GetBitContext *gb, int common_window) + GetBitContext *gb) { if (get_bits1(gb)) { av_log(ac->avctx, AV_LOG_ERROR, "Reserved bit set.\n"); - memset(ics, 0, sizeof(IndividualChannelStream)); - return -1; + return AVERROR_INVALIDDATA; } ics->window_sequence[1] = ics->window_sequence[0]; ics->window_sequence[0] = get_bits(gb, 2); @@ -748,13 +780,11 @@ static int decode_ics_info(AACContext *ac, IndividualChannelStream *ics, if (ics->predictor_present) { if (ac->m4ac.object_type == AOT_AAC_MAIN) { if (decode_prediction(ac, ics, gb)) { - memset(ics, 0, sizeof(IndividualChannelStream)); - return -1; + return AVERROR_INVALIDDATA; } } else if (ac->m4ac.object_type == AOT_AAC_LC) { av_log(ac->avctx, AV_LOG_ERROR, "Prediction is not allowed in AAC-LC.\n"); - memset(ics, 0, sizeof(IndividualChannelStream)); - return -1; + return AVERROR_INVALIDDATA; } else { if ((ics->ltp.present = get_bits(gb, 1))) decode_ltp(ac, &ics->ltp, gb, ics->max_sfb); @@ -766,8 +796,7 @@ static int decode_ics_info(AACContext *ac, IndividualChannelStream *ics, av_log(ac->avctx, AV_LOG_ERROR, "Number of scalefactor bands in group (%d) exceeds limit (%d).\n", ics->max_sfb, ics->num_swb); - memset(ics, 0, sizeof(IndividualChannelStream)); - return -1; + return AVERROR_INVALIDDATA; } return 0; @@ -797,10 +826,10 @@ static int decode_band_types(AACContext *ac, enum BandType band_type[120], av_log(ac->avctx, AV_LOG_ERROR, "invalid band type\n"); return -1; } - while ((sect_len_incr = get_bits(gb, bits)) == (1 << bits) - 1) + while ((sect_len_incr = get_bits(gb, bits)) == (1 << bits) - 1 && get_bits_left(gb) >= bits) sect_end += sect_len_incr; sect_end += sect_len_incr; - if (get_bits_left(gb) < 0) { + if (get_bits_left(gb) < 0 || sect_len_incr == (1 << bits) - 1) { av_log(ac->avctx, AV_LOG_ERROR, overread_err); return -1; } @@ -1001,7 +1030,7 @@ static inline float *VMUL4(float *dst, const float *v, unsigned idx, static inline float *VMUL2S(float *dst, const float *v, unsigned idx, unsigned sign, const float *scale) { - union float754 s0, s1; + union av_intfloat32 s0, s1; s0.f = s1.f = *scale; s0.i ^= sign >> 1 << 31; @@ -1019,8 +1048,8 @@ static inline float *VMUL4S(float *dst, const float *v, unsigned idx, unsigned sign, const float *scale) { unsigned nz = idx >> 12; - union float754 s = { .f = *scale }; - union float754 t; + union av_intfloat32 s = { .f = *scale }; + union av_intfloat32 t; t.i = s.i ^ (sign & 1U<<31); *dst++ = v[idx & 3] * t.f; @@ -1269,7 +1298,7 @@ static int decode_spectrum_and_dequant(AACContext *ac, float coef[1024], static av_always_inline float flt16_round(float pf) { - union float754 tmp; + union av_intfloat32 tmp; tmp.f = pf; tmp.i = (tmp.i + 0x00008000U) & 0xFFFF0000U; return tmp.f; @@ -1277,7 +1306,7 @@ static av_always_inline float flt16_round(float pf) static av_always_inline float flt16_even(float pf) { - union float754 tmp; + union av_intfloat32 tmp; tmp.f = pf; tmp.i = (tmp.i + 0x00007FFFU + (tmp.i & 0x00010000U >> 16)) & 0xFFFF0000U; return tmp.f; @@ -1285,7 +1314,7 @@ static av_always_inline float flt16_even(float pf) static av_always_inline float flt16_trunc(float pf) { - union float754 pun; + union av_intfloat32 pun; pun.f = pf; pun.i &= 0xFFFF0000U; return pun.f; @@ -1372,8 +1401,8 @@ static int decode_ics(AACContext *ac, SingleChannelElement *sce, global_gain = get_bits(gb, 8); if (!common_window && !scale_flag) { - if (decode_ics_info(ac, ics, gb, 0) < 0) - return -1; + if (decode_ics_info(ac, ics, gb) < 0) + return AVERROR_INVALIDDATA; } if (decode_band_types(ac, sce->band_type, sce->band_type_run_end, gb, ics) < 0) @@ -1489,8 +1518,8 @@ static int decode_cpe(AACContext *ac, GetBitContext *gb, ChannelElement *cpe) common_window = get_bits1(gb); if (common_window) { - if (decode_ics_info(ac, &cpe->ch[0].ics, gb, 1)) - return -1; + if (decode_ics_info(ac, &cpe->ch[0].ics, gb)) + return AVERROR_INVALIDDATA; i = cpe->ch[1].ics.use_kb_window[0]; cpe->ch[1].ics = cpe->ch[0].ics; cpe->ch[1].ics.use_kb_window[1] = i; @@ -2081,13 +2110,14 @@ static int parse_adts_frame_header(AACContext *ac, GetBitContext *gb) size = avpriv_aac_parse_header(gb, &hdr_info); if (size > 0) { - if (hdr_info.chan_config && (hdr_info.chan_config!=ac->m4ac.chan_config || ac->m4ac.sample_rate!=hdr_info.sample_rate)) { + if (hdr_info.chan_config) { enum ChannelPosition new_che_pos[4][MAX_ELEM_ID]; memset(new_che_pos, 0, 4 * MAX_ELEM_ID * sizeof(new_che_pos[0][0])); ac->m4ac.chan_config = hdr_info.chan_config; if (set_default_channel_config(ac->avctx, new_che_pos, hdr_info.chan_config)) return -7; - if (output_configure(ac, ac->che_pos, new_che_pos, hdr_info.chan_config, OC_TRIAL_FRAME)) + if (output_configure(ac, ac->che_pos, new_che_pos, hdr_info.chan_config, + FFMAX(ac->output_configured, OC_TRIAL_FRAME))) return -7; } else if (ac->output_configured != OC_LOCKED) { ac->m4ac.chan_config = 0; @@ -2102,24 +2132,25 @@ static int parse_adts_frame_header(AACContext *ac, GetBitContext *gb) } if (!ac->avctx->sample_rate) ac->avctx->sample_rate = hdr_info.sample_rate; - if (hdr_info.num_aac_frames == 1) { - if (!hdr_info.crc_absent) - skip_bits(gb, 16); - } else { + if (!ac->warned_num_aac_frames && hdr_info.num_aac_frames != 1) { + // This is 2 for "VLB " audio in NSV files. + // See samples/nsv/vlb_audio. av_log_missing_feature(ac->avctx, "More than one AAC RDB per ADTS frame is", 0); - return -1; + ac->warned_num_aac_frames = 1; } + if (!hdr_info.crc_absent) + skip_bits(gb, 16); } return size; } static int aac_decode_frame_int(AVCodecContext *avctx, void *data, - int *data_size, GetBitContext *gb) + int *got_frame_ptr, GetBitContext *gb) { AACContext *ac = avctx->priv_data; ChannelElement *che = NULL, *che_prev = NULL; enum RawDataBlockType elem_type, elem_type_prev = TYPE_END; - int err, elem_id, data_size_tmp; + int err, elem_id; int samples = 0, multiplier, audio_found = 0; if (show_bits(gb, 12) == 0xfff) { @@ -2187,10 +2218,11 @@ static int aac_decode_frame_int(AVCodecContext *avctx, void *data, if ((err = decode_pce(avctx, &ac->m4ac, new_che_pos, gb))) break; if (ac->output_configured > OC_TRIAL_PCE) - av_log(avctx, AV_LOG_ERROR, - "Not evaluating a further program_config_element as this construct is dubious at best.\n"); - else - err = output_configure(ac, ac->che_pos, new_che_pos, 0, OC_TRIAL_PCE); + av_log(avctx, AV_LOG_INFO, + "Evaluating a further program_config_element.\n"); + err = output_configure(ac, ac->che_pos, new_che_pos, 0, OC_TRIAL_PCE); + if (!err) + ac->m4ac.chan_config = 0; break; } @@ -2232,24 +2264,26 @@ static int aac_decode_frame_int(AVCodecContext *avctx, void *data, avctx->frame_size = samples; } - data_size_tmp = samples * avctx->channels * - av_get_bytes_per_sample(avctx->sample_fmt); - if (*data_size < data_size_tmp) { - av_log(avctx, AV_LOG_ERROR, - "Output buffer too small (%d) or trying to output too many samples (%d) for this frame.\n", - *data_size, data_size_tmp); - return -1; - } - *data_size = data_size_tmp; - if (samples) { + /* get output buffer */ + ac->frame.nb_samples = samples; + if ((err = avctx->get_buffer(avctx, &ac->frame)) < 0) { + av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n"); + return err; + } + if (avctx->sample_fmt == AV_SAMPLE_FMT_FLT) - ac->fmt_conv.float_interleave(data, (const float **)ac->output_data, + ac->fmt_conv.float_interleave((float *)ac->frame.data[0], + (const float **)ac->output_data, samples, avctx->channels); else - ac->fmt_conv.float_to_int16_interleave(data, (const float **)ac->output_data, + ac->fmt_conv.float_to_int16_interleave((int16_t *)ac->frame.data[0], + (const float **)ac->output_data, samples, avctx->channels); + + *(AVFrame *)data = ac->frame; } + *got_frame_ptr = !!samples; if (ac->output_configured && audio_found) ac->output_configured = OC_LOCKED; @@ -2258,18 +2292,37 @@ static int aac_decode_frame_int(AVCodecContext *avctx, void *data, } static int aac_decode_frame(AVCodecContext *avctx, void *data, - int *data_size, AVPacket *avpkt) + int *got_frame_ptr, AVPacket *avpkt) { + AACContext *ac = avctx->priv_data; const uint8_t *buf = avpkt->data; int buf_size = avpkt->size; GetBitContext gb; int buf_consumed; int buf_offset; int err; + int new_extradata_size; + const uint8_t *new_extradata = av_packet_get_side_data(avpkt, + AV_PKT_DATA_NEW_EXTRADATA, + &new_extradata_size); + + if (new_extradata) { + av_free(avctx->extradata); + avctx->extradata = av_mallocz(new_extradata_size + + FF_INPUT_BUFFER_PADDING_SIZE); + if (!avctx->extradata) + return AVERROR(ENOMEM); + avctx->extradata_size = new_extradata_size; + memcpy(avctx->extradata, new_extradata, new_extradata_size); + if (decode_audio_specific_config(ac, ac->avctx, &ac->m4ac, + avctx->extradata, + avctx->extradata_size*8, 1) < 0) + return AVERROR_INVALIDDATA; + } init_get_bits(&gb, buf, buf_size * 8); - if ((err = aac_decode_frame_int(avctx, data, data_size, &gb)) < 0) + if ((err = aac_decode_frame_int(avctx, data, got_frame_ptr, &gb)) < 0) return err; buf_consumed = (get_bits_count(&gb) + 7) >> 3; @@ -2322,30 +2375,42 @@ static inline uint32_t latm_get_value(GetBitContext *b) static int latm_decode_audio_specific_config(struct LATMContext *latmctx, GetBitContext *gb, int asclen) { - AVCodecContext *avctx = latmctx->aac_ctx.avctx; - AACContext *ac= &latmctx->aac_ctx; - MPEG4AudioConfig m4ac=ac->m4ac; - int config_start_bit = get_bits_count(gb); - int bits_consumed, esize; + AACContext *ac = &latmctx->aac_ctx; + AVCodecContext *avctx = ac->avctx; + MPEG4AudioConfig m4ac = {0}; + int config_start_bit = get_bits_count(gb); + int sync_extension = 0; + int bits_consumed, esize; + + if (asclen) { + sync_extension = 1; + asclen = FFMIN(asclen, get_bits_left(gb)); + } else + asclen = get_bits_left(gb); if (config_start_bit % 8) { av_log_missing_feature(latmctx->aac_ctx.avctx, "audio specific " "config not byte aligned.\n", 1); return AVERROR_INVALIDDATA; - } else { - bits_consumed = - decode_audio_specific_config(ac, avctx, &m4ac, + } + if (asclen <= 0) + return AVERROR_INVALIDDATA; + bits_consumed = decode_audio_specific_config(NULL, avctx, &m4ac, gb->buffer + (config_start_bit / 8), - get_bits_left(gb) / 8, asclen); + asclen, sync_extension); - if (bits_consumed < 0) - return AVERROR_INVALIDDATA; - if(ac->m4ac.sample_rate != m4ac.sample_rate || m4ac.chan_config != ac->m4ac.chan_config) - ac->m4ac= m4ac; + if (bits_consumed < 0) + return AVERROR_INVALIDDATA; + + if (ac->m4ac.sample_rate != m4ac.sample_rate || + ac->m4ac.chan_config != m4ac.chan_config) { + + av_log(avctx, AV_LOG_INFO, "audio config changed\n"); + latmctx->initialized = 0; esize = (bits_consumed+7) / 8; - if (avctx->extradata_size <= esize) { + if (avctx->extradata_size < esize) { av_free(avctx->extradata); avctx->extradata = av_malloc(esize + FF_INPUT_BUFFER_PADDING_SIZE); if (!avctx->extradata) @@ -2355,9 +2420,8 @@ static int latm_decode_audio_specific_config(struct LATMContext *latmctx, avctx->extradata_size = esize; memcpy(avctx->extradata, gb->buffer + (config_start_bit/8), esize); memset(avctx->extradata+esize, 0, FF_INPUT_BUFFER_PADDING_SIZE); - - skip_bits_long(gb, bits_consumed); } + skip_bits_long(gb, bits_consumed); return bits_consumed; } @@ -2494,8 +2558,8 @@ static int read_audio_mux_element(struct LATMContext *latmctx, } -static int latm_decode_frame(AVCodecContext *avctx, void *out, int *out_size, - AVPacket *avpkt) +static int latm_decode_frame(AVCodecContext *avctx, void *out, + int *got_frame_ptr, AVPacket *avpkt) { struct LATMContext *latmctx = avctx->priv_data; int muxlength, err; @@ -2517,12 +2581,12 @@ static int latm_decode_frame(AVCodecContext *avctx, void *out, int *out_size, if (!latmctx->initialized) { if (!avctx->extradata) { - *out_size = 0; + *got_frame_ptr = 0; return avpkt->size; } else { if ((err = decode_audio_specific_config( &latmctx->aac_ctx, avctx, &latmctx->aac_ctx.m4ac, - avctx->extradata, avctx->extradata_size, 8*avctx->extradata_size)) < 0) + avctx->extradata, avctx->extradata_size*8, 1)) < 0) return err; latmctx->initialized = 1; } @@ -2535,7 +2599,7 @@ static int latm_decode_frame(AVCodecContext *avctx, void *out, int *out_size, return AVERROR_INVALIDDATA; } - if ((err = aac_decode_frame_int(avctx, out, out_size, &gb)) < 0) + if ((err = aac_decode_frame_int(avctx, out, got_frame_ptr, &gb)) < 0) return err; return muxlength; @@ -2565,7 +2629,7 @@ AVCodec ff_aac_decoder = { .sample_fmts = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE }, - .capabilities = CODEC_CAP_CHANNEL_CONF, + .capabilities = CODEC_CAP_CHANNEL_CONF | CODEC_CAP_DR1, .channel_layouts = aac_channel_layout, }; @@ -2586,6 +2650,7 @@ AVCodec ff_aac_latm_decoder = { .sample_fmts = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_FLT, AV_SAMPLE_FMT_S16, AV_SAMPLE_FMT_NONE }, - .capabilities = CODEC_CAP_CHANNEL_CONF, + .capabilities = CODEC_CAP_CHANNEL_CONF | CODEC_CAP_DR1, .channel_layouts = aac_channel_layout, + .flush = flush, };