X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=libavcodec%2Faac.h;h=1b6013e4589ffd7263e7f71ecc1a26d26c006282;hb=4466d0e84b3a105a02170ce8bc9e22820ff0152f;hp=eec0828533e63bf3fc1a855f34a95f989bad6039;hpb=d0ee50216d8f33f5c03a856112204cbd66009d92;p=ffmpeg diff --git a/libavcodec/aac.h b/libavcodec/aac.h index eec0828533e..1b6013e4589 100644 --- a/libavcodec/aac.h +++ b/libavcodec/aac.h @@ -3,25 +3,25 @@ * Copyright (c) 2005-2006 Oded Shimon ( ods15 ods15 dyndns org ) * Copyright (c) 2006-2007 Maxim Gavrilov ( maxim.gavrilov gmail com ) * - * This file is part of FFmpeg. + * This file is part of Libav. * - * FFmpeg is free software; you can redistribute it and/or + * Libav is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * - * FFmpeg is distributed in the hope that it will be useful, + * Libav is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software + * License along with Libav; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ /** - * @file aac.h + * @file * AAC definitions and structures * @author Oded Shimon ( ods15 ods15 dyndns org ) * @author Maxim Gavrilov ( maxim.gavrilov gmail com ) @@ -32,50 +32,18 @@ #include "avcodec.h" #include "dsputil.h" +#include "fft.h" #include "mpeg4audio.h" +#include "sbr.h" +#include "fmtconvert.h" #include -#define AAC_INIT_VLC_STATIC(num, size) \ - INIT_VLC_STATIC(&vlc_spectral[num], 6, ff_aac_spectral_sizes[num], \ - ff_aac_spectral_bits[num], sizeof( ff_aac_spectral_bits[num][0]), sizeof( ff_aac_spectral_bits[num][0]), \ - ff_aac_spectral_codes[num], sizeof(ff_aac_spectral_codes[num][0]), sizeof(ff_aac_spectral_codes[num][0]), \ - size); - #define MAX_CHANNELS 64 #define MAX_ELEM_ID 16 #define TNS_MAX_ORDER 20 - -enum AudioObjectType { - AOT_NULL, - // Support? Name - AOT_AAC_MAIN, ///< Y Main - AOT_AAC_LC, ///< Y Low Complexity - AOT_AAC_SSR, ///< N (code in SoC repo) Scalable Sample Rate - AOT_AAC_LTP, ///< N (code in SoC repo) Long Term Prediction - AOT_SBR, ///< N (in progress) Spectral Band Replication - AOT_AAC_SCALABLE, ///< N Scalable - AOT_TWINVQ, ///< N Twin Vector Quantizer - AOT_CELP, ///< N Code Excited Linear Prediction - AOT_HVXC, ///< N Harmonic Vector eXcitation Coding - AOT_TTSI = 12, ///< N Text-To-Speech Interface - AOT_MAINSYNTH, ///< N Main Synthesis - AOT_WAVESYNTH, ///< N Wavetable Synthesis - AOT_MIDI, ///< N General MIDI - AOT_SAFX, ///< N Algorithmic Synthesis and Audio Effects - AOT_ER_AAC_LC, ///< N Error Resilient Low Complexity - AOT_ER_AAC_LTP = 19, ///< N Error Resilient Long Term Prediction - AOT_ER_AAC_SCALABLE, ///< N Error Resilient Scalable - AOT_ER_TWINVQ, ///< N Error Resilient Twin Vector Quantizer - AOT_ER_BSAC, ///< N Error Resilient Bit-Sliced Arithmetic Coding - AOT_ER_AAC_LD, ///< N Error Resilient Low Delay - AOT_ER_CELP, ///< N Error Resilient Code Excited Linear Prediction - AOT_ER_HVXC, ///< N Error Resilient Harmonic Vector eXcitation Coding - AOT_ER_HILN, ///< N Error Resilient Harmonic and Individual Lines plus Noise - AOT_ER_PARAM, ///< N Error Resilient Parametric - AOT_SSC, ///< N SinuSoidal Coding -}; +#define MAX_LTP_LONG_SFB 40 enum RawDataBlockType { TYPE_SCE, @@ -116,6 +84,7 @@ enum BandType { #define IS_CODEBOOK_UNSIGNED(x) ((x - 1) & 10) enum ChannelPosition { + AAC_CHANNEL_OFF = 0, AAC_CHANNEL_FRONT = 1, AAC_CHANNEL_SIDE = 2, AAC_CHANNEL_BACK = 3, @@ -132,6 +101,57 @@ enum CouplingPoint { AFTER_IMDCT = 3, }; +/** + * Output configuration status + */ +enum OCStatus { + OC_NONE, ///< Output unconfigured + OC_TRIAL_PCE, ///< Output configuration under trial specified by an inband PCE + OC_TRIAL_FRAME, ///< Output configuration under trial specified by a frame header + OC_GLOBAL_HDR, ///< Output configuration set in a global header but not yet locked + OC_LOCKED, ///< Output configuration locked in place +}; + +typedef struct { + MPEG4AudioConfig m4ac; + uint8_t layout_map[MAX_ELEM_ID*4][3]; + int layout_map_tags; + int channels; + uint64_t channel_layout; + enum OCStatus status; +} OutputConfiguration; + +/** + * Predictor State + */ +typedef struct { + float cor0; + float cor1; + float var0; + float var1; + float r0; + float r1; +} PredictorState; + +#define MAX_PREDICTORS 672 + +#define SCALE_DIV_512 36 ///< scalefactor difference that corresponds to scale difference in 512 times +#define SCALE_ONE_POS 140 ///< scalefactor index that corresponds to scale=1.0 +#define SCALE_MAX_POS 255 ///< scalefactor index maximum value +#define SCALE_MAX_DIFF 60 ///< maximum scalefactor difference allowed by standard +#define SCALE_DIFF_ZERO 60 ///< codebook index corresponding to zero scalefactor indices difference +#define POW_SF2_ZERO 200 ///< ff_aac_pow2sf_tab index corresponding to pow(2, 0); + +/** + * Long Term Prediction + */ +typedef struct { + int8_t present; + int16_t lag; + float coef; + int8_t used[MAX_LTP_LONG_SFB]; +} LongTermPrediction; + /** * Individual Channel Stream */ @@ -141,10 +161,16 @@ typedef struct { uint8_t use_kb_window[2]; ///< If set, use Kaiser-Bessel window, otherwise use a sinus window. int num_window_groups; uint8_t group_len[8]; + LongTermPrediction ltp; const uint16_t *swb_offset; ///< table of offsets to the lowest spectral coefficient of a scalefactor band, sfb, for a particular window + const uint8_t *swb_sizes; ///< table of scalefactor band sizes for a particular window int num_swb; ///< number of scalefactor window bands int num_windows; int tns_max_bands; + int predictor_present; + int predictor_initialized; + int predictor_reset_group; + uint8_t prediction_used[41]; } IndividualChannelStream; /** @@ -177,6 +203,7 @@ typedef struct { typedef struct { int num_pulse; + int start; int pos[4]; int amp[4]; } Pulse; @@ -201,12 +228,17 @@ typedef struct { typedef struct { IndividualChannelStream ics; TemporalNoiseShaping tns; - enum BandType band_type[120]; ///< band types - int band_type_run_end[120]; ///< band type run end points - float sf[120]; ///< scalefactors - DECLARE_ALIGNED_16(float, coeffs[1024]); ///< coefficients for IMDCT - DECLARE_ALIGNED_16(float, saved[512]); ///< overlap - DECLARE_ALIGNED_16(float, ret[1024]); ///< PCM output + Pulse pulse; + enum BandType band_type[128]; ///< band types + int band_type_run_end[120]; ///< band type run end points + float sf[120]; ///< scalefactors + int sf_idx[128]; ///< scalefactor indices (used by encoder) + uint8_t zeroes[128]; ///< band is not coded (used by encoder) + DECLARE_ALIGNED(32, float, coeffs)[1024]; ///< coefficients for IMDCT + DECLARE_ALIGNED(32, float, saved)[1024]; ///< overlap + DECLARE_ALIGNED(32, float, ret)[2048]; ///< PCM output + DECLARE_ALIGNED(16, float, ltp_state)[3072]; ///< time signal for LTP + PredictorState predictor_state[MAX_PREDICTORS]; } SingleChannelElement; /** @@ -214,61 +246,65 @@ typedef struct { */ typedef struct { // CPE specific - uint8_t ms_mask[120]; ///< Set if mid/side stereo is used for each scalefactor window band + int common_window; ///< Set if channels share a common 'IndividualChannelStream' in bitstream. + int ms_mode; ///< Signals mid/side stereo flags coding mode (used by encoder) + uint8_t ms_mask[128]; ///< Set if mid/side stereo is used for each scalefactor window band // shared SingleChannelElement ch[2]; // CCE specific ChannelCoupling coup; + SpectralBandReplication sbr; } ChannelElement; /** * main AAC context */ typedef struct { - AVCodecContext * avccontext; - - MPEG4AudioConfig m4ac; + AVCodecContext *avctx; + AVFrame frame; int is_saved; ///< Set if elements have stored overlap from previous frame. DynamicRangeControl che_drc; /** - * @defgroup elements + * @name Channel element related data * @{ */ - enum ChannelPosition che_pos[4][MAX_ELEM_ID]; /**< channel element channel mapping with the - * first index as the first 4 raw data block types - */ - ChannelElement * che[4][MAX_ELEM_ID]; + ChannelElement *che[4][MAX_ELEM_ID]; + ChannelElement *tag_che_map[4][MAX_ELEM_ID]; + int tags_mapped; /** @} */ /** - * @defgroup temporary aligned temporary buffers (We do not want to have these on the stack.) + * @name temporary aligned temporary buffers + * (We do not want to have these on the stack.) * @{ */ - DECLARE_ALIGNED_16(float, buf_mdct[1024]); + DECLARE_ALIGNED(32, float, buf_mdct)[1024]; /** @} */ /** - * @defgroup tables Computed / set up during initialization. + * @name Computed / set up during initialization * @{ */ - MDCTContext mdct; - MDCTContext mdct_small; + FFTContext mdct; + FFTContext mdct_small; + FFTContext mdct_ltp; DSPContext dsp; + FmtConvertContext fmt_conv; int random_state; /** @} */ /** - * @defgroup output Members used for output interleaving. + * @name Members used for output interleaving * @{ */ float *output_data[MAX_CHANNELS]; ///< Points to each element's 'ret' buffer (PCM output). - float add_bias; ///< offset for dsp.float_to_int16 - float sf_scale; ///< Pre-scale for correct IMDCT and dsp.float_to_int16. - int sf_offset; ///< offset into pow2sf_tab as appropriate for dsp.float_to_int16 /** @} */ + DECLARE_ALIGNED(32, float, temp)[128]; + + OutputConfiguration oc[2]; } AACContext; #endif /* AVCODEC_AAC_H */