X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=libavcodec%2Fdca_xll.c;h=cd1af81dcc4ee12dd6262d203e3410478c566d93;hb=a236e4e8b840b57cc3975db21a5f54cda59753a9;hp=98fd4c8eaa16bfbabadbb665557f5c4a58bd3ddb;hpb=ed9cff9edf0f7f48d66e5700d154f79394f535a2;p=ffmpeg diff --git a/libavcodec/dca_xll.c b/libavcodec/dca_xll.c index 98fd4c8eaa1..cd1af81dcc4 100644 --- a/libavcodec/dca_xll.c +++ b/libavcodec/dca_xll.c @@ -1,8 +1,5 @@ /* - * DCA XLL extension - * - * Copyright (C) 2012 Paul B Mahol - * Copyright (C) 2014 Niels Möller + * Copyright (C) 2016 foo86 * * This file is part of FFmpeg. * @@ -21,727 +18,1482 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ -#include "libavutil/attributes.h" -#include "libavutil/common.h" -#include "libavutil/internal.h" - -#include "avcodec.h" -#include "dca.h" +#include "dcadec.h" #include "dcadata.h" -#include "get_bits.h" +#include "dcamath.h" +#include "dca_syncwords.h" #include "unary.h" -/* Sign as bit 0 */ -static inline int get_bits_sm(GetBitContext *s, unsigned n) +static int get_linear(GetBitContext *gb, int n) { - int x = get_bits(s, n); - if (x & 1) - return -(x >> 1) - 1; - else - return x >> 1; -} - -/* Return -1 on error. */ -static int32_t get_dmix_coeff(DCAContext *s, int inverse) -{ - unsigned code = get_bits(&s->gb, 9); - int32_t sign = (int32_t) (code >> 8) - 1; - unsigned idx = code & 0xff; - int inv_offset = FF_DCA_DMIXTABLE_SIZE -FF_DCA_INV_DMIXTABLE_SIZE; - if (idx >= FF_DCA_DMIXTABLE_SIZE) { - av_log(s->avctx, AV_LOG_ERROR, - "XLL: Invalid channel set downmix code %x\n", code); - return -1; - } else if (!inverse) { - return (ff_dca_dmixtable[idx] ^ sign) - sign; - } else if (idx < inv_offset) { - av_log(s->avctx, AV_LOG_ERROR, - "XLL: Invalid channel set inverse downmix code %x\n", code); - return -1; - } else { - return (ff_dca_inv_dmixtable[idx - inv_offset] ^ sign) - sign; - } + unsigned int v = get_bits_long(gb, n); + return (v >> 1) ^ -(v & 1); +} + +static int get_rice_un(GetBitContext *gb, int k) +{ + unsigned int v = get_unary(gb, 1, 128); + return (v << k) | get_bits_long(gb, k); } -static int32_t dca_get_dmix_coeff(DCAContext *s) +static int get_rice(GetBitContext *gb, int k) { - return get_dmix_coeff(s, 0); + unsigned int v = get_rice_un(gb, k); + return (v >> 1) ^ -(v & 1); } -static int32_t dca_get_inv_dmix_coeff(DCAContext *s) +static void get_array(GetBitContext *gb, int32_t *array, int size, int n) { - return get_dmix_coeff(s, 1); + int i; + + for (i = 0; i < size; i++) + array[i] = get_bits(gb, n); } -/* parse XLL header */ -int ff_dca_xll_decode_header(DCAContext *s) +static void get_linear_array(GetBitContext *gb, int32_t *array, int size, int n) { - int hdr_pos, hdr_size; - av_unused int version, frame_size; - int i, chset_index; + int i; - /* get bit position of sync header */ - hdr_pos = get_bits_count(&s->gb) - 32; + if (n == 0) + memset(array, 0, sizeof(*array) * size); + else for (i = 0; i < size; i++) + array[i] = get_linear(gb, n); +} - version = get_bits(&s->gb, 4) + 1; - hdr_size = get_bits(&s->gb, 8) + 1; +static void get_rice_array(GetBitContext *gb, int32_t *array, int size, int k) +{ + int i; - frame_size = get_bits_long(&s->gb, get_bits(&s->gb, 5) + 1) + 1; + for (i = 0; i < size; i++) + array[i] = get_rice(gb, k); +} - s->xll_channels = - s->xll_residual_channels = 0; - s->xll_nch_sets = get_bits(&s->gb, 4) + 1; - s->xll_segments = 1 << get_bits(&s->gb, 4); - s->xll_log_smpl_in_seg = get_bits(&s->gb, 4); - s->xll_smpl_in_seg = 1 << s->xll_log_smpl_in_seg; - s->xll_bits4seg_size = get_bits(&s->gb, 5) + 1; - s->xll_banddata_crc = get_bits(&s->gb, 2); - s->xll_scalable_lsb = get_bits1(&s->gb); - s->xll_bits4ch_mask = get_bits(&s->gb, 5) + 1; +static int parse_dmix_coeffs(DCAXllDecoder *s, DCAXllChSet *c) +{ + // Size of downmix coefficient matrix + int m = c->primary_chset ? ff_dca_dmix_primary_nch[c->dmix_type] : c->hier_ofs; + int i, j, *coeff_ptr = c->dmix_coeff; + + for (i = 0; i < m; i++) { + int code, sign, coeff, scale, scale_inv = 0; + unsigned int index; + + // Downmix scale (only for non-primary channel sets) + if (!c->primary_chset) { + code = get_bits(&s->gb, 9); + sign = (code >> 8) - 1; + index = (code & 0xff) - FF_DCA_DMIXTABLE_OFFSET; + if (index >= FF_DCA_INV_DMIXTABLE_SIZE) { + av_log(s->avctx, AV_LOG_ERROR, "Invalid XLL downmix scale index\n"); + return AVERROR_INVALIDDATA; + } + scale = ff_dca_dmixtable[index + FF_DCA_DMIXTABLE_OFFSET]; + scale_inv = ff_dca_inv_dmixtable[index]; + c->dmix_scale[i] = (scale ^ sign) - sign; + c->dmix_scale_inv[i] = (scale_inv ^ sign) - sign; + } - if (s->xll_scalable_lsb) { - s->xll_fixed_lsb_width = get_bits(&s->gb, 4); - if (s->xll_fixed_lsb_width) - av_log(s->avctx, AV_LOG_WARNING, - "XLL: fixed lsb width = %d, non-zero not supported.\n", - s->xll_fixed_lsb_width); + // Downmix coefficients + for (j = 0; j < c->nchannels; j++) { + code = get_bits(&s->gb, 9); + sign = (code >> 8) - 1; + index = code & 0xff; + if (index >= FF_DCA_DMIXTABLE_SIZE) { + av_log(s->avctx, AV_LOG_ERROR, "Invalid XLL downmix coefficient index\n"); + return AVERROR_INVALIDDATA; + } + coeff = ff_dca_dmixtable[index]; + if (!c->primary_chset) + // Multiply by |InvDmixScale| to get |UndoDmixScale| + coeff = mul16(scale_inv, coeff); + *coeff_ptr++ = (coeff ^ sign) - sign; + } } - /* skip to the end of the common header */ - i = get_bits_count(&s->gb); - if (hdr_pos + hdr_size * 8 > i) - skip_bits_long(&s->gb, hdr_pos + hdr_size * 8 - i); - for (chset_index = 0; chset_index < s->xll_nch_sets; chset_index++) { - XllChSetSubHeader *chset = &s->xll_chsets[chset_index]; - hdr_pos = get_bits_count(&s->gb); - hdr_size = get_bits(&s->gb, 10) + 1; + return 0; +} + +static int chs_parse_header(DCAXllDecoder *s, DCAXllChSet *c, DCAExssAsset *asset) +{ + int i, j, k, ret, band, header_size, header_pos = get_bits_count(&s->gb); + DCAXllChSet *p = &s->chset[0]; + DCAXllBand *b; - chset->channels = get_bits(&s->gb, 4) + 1; - chset->residual_encode = get_bits(&s->gb, chset->channels); - chset->bit_resolution = get_bits(&s->gb, 5) + 1; - chset->bit_width = get_bits(&s->gb, 5) + 1; - chset->sampling_frequency = ff_dca_sampling_freqs[get_bits(&s->gb, 4)]; - chset->samp_freq_interp = get_bits(&s->gb, 2); - chset->replacement_set = get_bits(&s->gb, 2); - if (chset->replacement_set) - chset->active_replace_set = get_bits(&s->gb, 1); + // Size of channel set sub-header + header_size = get_bits(&s->gb, 10) + 1; - if (s->one2one_map_chtospkr) { - chset->primary_ch_set = get_bits(&s->gb, 1); - chset->downmix_coeff_code_embedded = get_bits(&s->gb, 1); - if (chset->downmix_coeff_code_embedded) { - chset->downmix_embedded = get_bits(&s->gb, 1); - if (chset->primary_ch_set) { - chset->downmix_type = get_bits(&s->gb, 3); - if (chset->downmix_type > 6) { - av_log(s->avctx, AV_LOG_ERROR, - "XLL: Invalid channel set downmix type\n"); - return AVERROR_INVALIDDATA; - } - } + // Check CRC + if ((s->avctx->err_recognition & (AV_EF_CRCCHECK | AV_EF_CAREFUL)) + && ff_dca_check_crc(&s->gb, header_pos, header_pos + header_size * 8)) { + av_log(s->avctx, AV_LOG_ERROR, "Invalid XLL sub-header checksum\n"); + return AVERROR_INVALIDDATA; + } + + // Number of channels in the channel set + c->nchannels = get_bits(&s->gb, 4) + 1; + if (c->nchannels > DCA_XLL_CHANNELS_MAX) { + avpriv_request_sample(s->avctx, "%d XLL channels", c->nchannels); + return AVERROR_PATCHWELCOME; + } + + // Residual type + c->residual_encode = get_bits(&s->gb, c->nchannels); + + // PCM bit resolution + c->pcm_bit_res = get_bits(&s->gb, 5) + 1; + + // Storage unit width + c->storage_bit_res = get_bits(&s->gb, 5) + 1; + if (c->storage_bit_res != 16 && c->storage_bit_res != 24) { + avpriv_request_sample(s->avctx, "%d-bit XLL storage resolution", c->storage_bit_res); + return AVERROR_PATCHWELCOME; + } + + if (c->pcm_bit_res > c->storage_bit_res) { + av_log(s->avctx, AV_LOG_ERROR, "Invalid PCM bit resolution for XLL channel set (%d > %d)\n", c->pcm_bit_res, c->storage_bit_res); + return AVERROR_INVALIDDATA; + } + + // Original sampling frequency + c->freq = ff_dca_sampling_freqs[get_bits(&s->gb, 4)]; + if (c->freq > 192000) { + avpriv_request_sample(s->avctx, "%d Hz XLL sampling frequency", c->freq); + return AVERROR_PATCHWELCOME; + } + + // Sampling frequency modifier + if (get_bits(&s->gb, 2)) { + avpriv_request_sample(s->avctx, "XLL sampling frequency modifier"); + return AVERROR_PATCHWELCOME; + } + + // Which replacement set this channel set is member of + if (get_bits(&s->gb, 2)) { + avpriv_request_sample(s->avctx, "XLL replacement set"); + return AVERROR_PATCHWELCOME; + } + + if (asset->one_to_one_map_ch_to_spkr) { + // Primary channel set flag + c->primary_chset = get_bits1(&s->gb); + if (c->primary_chset != (c == p)) { + av_log(s->avctx, AV_LOG_ERROR, "The first (and only) XLL channel set must be primary\n"); + return AVERROR_INVALIDDATA; + } + + // Downmix coefficients present in stream + c->dmix_coeffs_present = get_bits1(&s->gb); + + // Downmix already performed by encoder + c->dmix_embedded = c->dmix_coeffs_present && get_bits1(&s->gb); + + // Downmix type + if (c->dmix_coeffs_present && c->primary_chset) { + c->dmix_type = get_bits(&s->gb, 3); + if (c->dmix_type >= DCA_DMIX_TYPE_COUNT) { + av_log(s->avctx, AV_LOG_ERROR, "Invalid XLL primary channel set downmix type\n"); + return AVERROR_INVALIDDATA; } - chset->hier_chset = get_bits(&s->gb, 1); - - if (chset->downmix_coeff_code_embedded) { - /* nDownmixCoeffs is specified as N * M. For a primary - * channel set, it appears that N = number of - * channels, and M is the number of downmix channels. - * - * For a non-primary channel set, N is specified as - * number of channels + 1, and M is derived from the - * channel set hierarchy, and at least in simple cases - * M is the number of channels in preceding channel - * sets. */ - if (chset->primary_ch_set) { - static const char dmix_table[7] = { 1, 2, 2, 3, 3, 4, 4 }; - chset->downmix_ncoeffs = chset->channels * dmix_table[chset->downmix_type]; - } else - chset->downmix_ncoeffs = (chset->channels + 1) * s->xll_channels; - - if (chset->downmix_ncoeffs > DCA_XLL_DMIX_NCOEFFS_MAX) { - avpriv_request_sample(s->avctx, - "XLL: More than %d downmix coefficients", - DCA_XLL_DMIX_NCOEFFS_MAX); - return AVERROR_PATCHWELCOME; - } else if (chset->primary_ch_set) { - for (i = 0; i < chset->downmix_ncoeffs; i++) - if ((chset->downmix_coeffs[i] = dca_get_dmix_coeff(s)) == -1) - return AVERROR_INVALIDDATA; - } else { - unsigned c, r; - for (c = 0, i = 0; c < s->xll_channels; c++, i += chset->channels + 1) { - if ((chset->downmix_coeffs[i] = dca_get_inv_dmix_coeff(s)) == -1) - return AVERROR_INVALIDDATA; - for (r = 1; r <= chset->channels; r++) { - int32_t coeff = dca_get_dmix_coeff(s); - if (coeff == -1) - return AVERROR_INVALIDDATA; - chset->downmix_coeffs[i + r] = - (chset->downmix_coeffs[i] * (int64_t) coeff + (1 << 15)) >> 16; - } - } + } + + // Whether the channel set is part of a hierarchy + c->hier_chset = get_bits1(&s->gb); + if (!c->hier_chset && s->nchsets != 1) { + avpriv_request_sample(s->avctx, "XLL channel set outside of hierarchy"); + return AVERROR_PATCHWELCOME; + } + + // Downmix coefficients + if (c->dmix_coeffs_present && (ret = parse_dmix_coeffs(s, c)) < 0) + return ret; + + // Channel mask enabled + if (!get_bits1(&s->gb)) { + avpriv_request_sample(s->avctx, "Disabled XLL channel mask"); + return AVERROR_PATCHWELCOME; + } + + // Channel mask for set + c->ch_mask = get_bits_long(&s->gb, s->ch_mask_nbits); + if (av_popcount(c->ch_mask) != c->nchannels) { + av_log(s->avctx, AV_LOG_ERROR, "Invalid XLL channel mask\n"); + return AVERROR_INVALIDDATA; + } + + // Build the channel to speaker map + for (i = 0, j = 0; i < s->ch_mask_nbits; i++) + if (c->ch_mask & (1U << i)) + c->ch_remap[j++] = i; + } else { + // Mapping coeffs present flag + if (c->nchannels != 2 || s->nchsets != 1 || get_bits1(&s->gb)) { + avpriv_request_sample(s->avctx, "Custom XLL channel to speaker mapping"); + return AVERROR_PATCHWELCOME; + } + + // Setup for LtRt decoding + c->primary_chset = 1; + c->dmix_coeffs_present = 0; + c->dmix_embedded = 0; + c->hier_chset = 0; + c->ch_mask = DCA_SPEAKER_LAYOUT_STEREO; + c->ch_remap[0] = DCA_SPEAKER_L; + c->ch_remap[1] = DCA_SPEAKER_R; + } + + if (c->freq > 96000) { + // Extra frequency bands flag + if (get_bits1(&s->gb)) { + avpriv_request_sample(s->avctx, "Extra XLL frequency bands"); + return AVERROR_PATCHWELCOME; + } + c->nfreqbands = 2; + } else { + c->nfreqbands = 1; + } + + // Set the sampling frequency to that of the first frequency band. + // Frequency will be doubled again after bands assembly. + c->freq >>= c->nfreqbands - 1; + + // Verify that all channel sets have the same audio characteristics + if (c != p && (c->nfreqbands != p->nfreqbands || c->freq != p->freq + || c->pcm_bit_res != p->pcm_bit_res + || c->storage_bit_res != p->storage_bit_res)) { + avpriv_request_sample(s->avctx, "Different XLL audio characteristics"); + return AVERROR_PATCHWELCOME; + } + + // Determine number of bits to read bit allocation coding parameter + if (c->storage_bit_res > 16) + c->nabits = 5; + else if (c->storage_bit_res > 8) + c->nabits = 4; + else + c->nabits = 3; + + // Account for embedded downmix and decimator saturation + if ((s->nchsets > 1 || c->nfreqbands > 1) && c->nabits < 5) + c->nabits++; + + for (band = 0, b = c->bands; band < c->nfreqbands; band++, b++) { + // Pairwise channel decorrelation + if ((b->decor_enabled = get_bits1(&s->gb)) && c->nchannels > 1) { + int ch_nbits = av_ceil_log2(c->nchannels); + + // Original channel order + for (i = 0; i < c->nchannels; i++) { + b->orig_order[i] = get_bits(&s->gb, ch_nbits); + if (b->orig_order[i] >= c->nchannels) { + av_log(s->avctx, AV_LOG_ERROR, "Invalid XLL original channel order\n"); + return AVERROR_INVALIDDATA; } } - chset->ch_mask_enabled = get_bits(&s->gb, 1); - if (chset->ch_mask_enabled) - chset->ch_mask = get_bits(&s->gb, s->xll_bits4ch_mask); - else - /* Skip speaker configuration bits */ - skip_bits_long(&s->gb, 25 * chset->channels); + + // Pairwise channel coefficients + for (i = 0; i < c->nchannels / 2; i++) + b->decor_coeff[i] = get_bits1(&s->gb) ? get_linear(&s->gb, 7) : 0; } else { - chset->primary_ch_set = 1; - chset->downmix_coeff_code_embedded = 0; - /* Spec: NumChHierChSet = 0, NumDwnMixCodeCoeffs = 0, whatever that means. */ - chset->mapping_coeffs_present = get_bits(&s->gb, 1); - if (chset->mapping_coeffs_present) { - avpriv_report_missing_feature(s->avctx, "XLL: mapping coefficients"); - return AVERROR_PATCHWELCOME; - } + for (i = 0; i < c->nchannels; i++) + b->orig_order[i] = i; + for (i = 0; i < c->nchannels / 2; i++) + b->decor_coeff[i] = 0; } - if (chset->sampling_frequency > 96000) - chset->num_freq_bands = 2 * (1 + get_bits(&s->gb, 1)); - else - chset->num_freq_bands = 1; - if (chset->num_freq_bands > 1) { - avpriv_report_missing_feature(s->avctx, "XLL: num_freq_bands > 1"); - return AVERROR_PATCHWELCOME; + // Adaptive predictor order + b->highest_pred_order = 0; + for (i = 0; i < c->nchannels; i++) { + b->adapt_pred_order[i] = get_bits(&s->gb, 4); + if (b->adapt_pred_order[i] > b->highest_pred_order) + b->highest_pred_order = b->adapt_pred_order[i]; + } + if (b->highest_pred_order > s->nsegsamples) { + av_log(s->avctx, AV_LOG_ERROR, "Invalid XLL adaptive predicition order\n"); + return AVERROR_INVALIDDATA; } - if (get_bits(&s->gb, 1)) { /* pw_ch_decor_enabled */ - int bits = av_ceil_log2(chset->channels); - for (i = 0; i < chset->channels; i++) { - unsigned j = get_bits(&s->gb, bits); - if (j >= chset->channels) { - av_log(s->avctx, AV_LOG_ERROR, - "Original channel order value %u too large, only %d channels.\n", - j, chset->channels); + // Fixed predictor order + for (i = 0; i < c->nchannels; i++) + b->fixed_pred_order[i] = b->adapt_pred_order[i] ? 0 : get_bits(&s->gb, 2); + + // Adaptive predictor quantized reflection coefficients + for (i = 0; i < c->nchannels; i++) { + for (j = 0; j < b->adapt_pred_order[i]; j++) { + k = get_linear(&s->gb, 8); + if (k == -128) { + av_log(s->avctx, AV_LOG_ERROR, "Invalid XLL reflection coefficient index\n"); return AVERROR_INVALIDDATA; } - chset->orig_chan_order[0][i] = j; - chset->orig_chan_order_inv[0][j] = i; - } - for (i = 0; i < chset->channels / 2; i++) { - if (get_bits(&s->gb, 1)) /* bChPFlag */ - chset->pw_ch_pairs_coeffs[0][i] = get_bits_sm(&s->gb, 7); + if (k < 0) + b->adapt_refl_coeff[i][j] = -(int)ff_dca_xll_refl_coeff[-k]; else - chset->pw_ch_pairs_coeffs[0][i] = 0; + b->adapt_refl_coeff[i][j] = (int)ff_dca_xll_refl_coeff[ k]; } - } else { - for (i = 0; i < chset->channels; i++) - chset->orig_chan_order[0][i] = - chset->orig_chan_order_inv[0][i] = i; - for (i = 0; i < chset->channels / 2; i++) - chset->pw_ch_pairs_coeffs[0][i] = 0; - } - /* Adaptive prediction order */ - chset->adapt_order_max[0] = 0; - for (i = 0; i < chset->channels; i++) { - chset->adapt_order[0][i] = get_bits(&s->gb, 4); - if (chset->adapt_order_max[0] < chset->adapt_order[0][i]) - chset->adapt_order_max[0] = chset->adapt_order[0][i]; - } - /* Fixed prediction order, used in case the adaptive order - * above is zero */ - for (i = 0; i < chset->channels; i++) - chset->fixed_order[0][i] = - chset->adapt_order[0][i] ? 0 : get_bits(&s->gb, 2); - - for (i = 0; i < chset->channels; i++) { - unsigned j; - for (j = 0; j < chset->adapt_order[0][i]; j++) - chset->lpc_refl_coeffs_q_ind[0][i][j] = get_bits(&s->gb, 8); - } - - if (s->xll_scalable_lsb) { - chset->lsb_fsize[0] = get_bits(&s->gb, s->xll_bits4seg_size); - - for (i = 0; i < chset->channels; i++) - chset->scalable_lsbs[0][i] = get_bits(&s->gb, 4); - for (i = 0; i < chset->channels; i++) - chset->bit_width_adj_per_ch[0][i] = get_bits(&s->gb, 4); - } else { - memset(chset->scalable_lsbs[0], 0, - chset->channels * sizeof(chset->scalable_lsbs[0][0])); - memset(chset->bit_width_adj_per_ch[0], 0, - chset->channels * sizeof(chset->bit_width_adj_per_ch[0][0])); } - s->xll_channels += chset->channels; - s->xll_residual_channels += chset->channels - - av_popcount(chset->residual_encode); + // Downmix performed by encoder in extension frequency band + b->dmix_embedded = c->dmix_embedded && (band == 0 || get_bits1(&s->gb)); - /* FIXME: Parse header data for extra frequency bands. */ + // MSB/LSB split flag in extension frequency band + if ((band == 0 && s->scalable_lsbs) || (band != 0 && get_bits1(&s->gb))) { + // Size of LSB section in any segment + b->lsb_section_size = get_bits_long(&s->gb, s->seg_size_nbits); + if (b->lsb_section_size < 0 || b->lsb_section_size > s->frame_size) { + av_log(s->avctx, AV_LOG_ERROR, "Invalid LSB section size\n"); + return AVERROR_INVALIDDATA; + } - /* Skip to end of channel set sub header. */ - i = get_bits_count(&s->gb); - if (hdr_pos + 8 * hdr_size < i) { - av_log(s->avctx, AV_LOG_ERROR, - "chset header too large, %d bits, should be <= %d bits\n", - i - hdr_pos, 8 * hdr_size); - return AVERROR_INVALIDDATA; + // Account for optional CRC bytes after LSB section + if (b->lsb_section_size && (s->band_crc_present > 2 || + (band == 0 && s->band_crc_present > 1))) + b->lsb_section_size += 2; + + // Number of bits to represent the samples in LSB part + for (i = 0; i < c->nchannels; i++) { + b->nscalablelsbs[i] = get_bits(&s->gb, 4); + if (b->nscalablelsbs[i] && !b->lsb_section_size) { + av_log(s->avctx, AV_LOG_ERROR, "LSB section missing with non-zero LSB width\n"); + return AVERROR_INVALIDDATA; + } + } + } else { + b->lsb_section_size = 0; + for (i = 0; i < c->nchannels; i++) + b->nscalablelsbs[i] = 0; } - if (hdr_pos + 8 * hdr_size > i) - skip_bits_long(&s->gb, hdr_pos + 8 * hdr_size - i); + + // Scalable resolution flag in extension frequency band + if ((band == 0 && s->scalable_lsbs) || (band != 0 && get_bits1(&s->gb))) { + // Number of bits discarded by authoring + for (i = 0; i < c->nchannels; i++) + b->bit_width_adjust[i] = get_bits(&s->gb, 4); + } else { + for (i = 0; i < c->nchannels; i++) + b->bit_width_adjust[i] = 0; + } + } + + // Reserved + // Byte align + // CRC16 of channel set sub-header + if (ff_dca_seek_bits(&s->gb, header_pos + header_size * 8)) { + av_log(s->avctx, AV_LOG_ERROR, "Read past end of XLL sub-header\n"); + return AVERROR_INVALIDDATA; } + return 0; } -/* parse XLL navigation table */ -int ff_dca_xll_decode_navi(DCAContext *s, int asset_end) +static int chs_alloc_msb_band_data(DCAXllDecoder *s, DCAXllChSet *c) { - int nbands, band, chset, seg, data_start; + int ndecisamples = c->nfreqbands > 1 ? DCA_XLL_DECI_HISTORY_MAX : 0; + int nchsamples = s->nframesamples + ndecisamples; + int i, j, nsamples = nchsamples * c->nchannels * c->nfreqbands; + int32_t *ptr; + + // Reallocate MSB sample buffer + av_fast_malloc(&c->sample_buffer[0], &c->sample_size[0], nsamples * sizeof(int32_t)); + if (!c->sample_buffer[0]) + return AVERROR(ENOMEM); - /* FIXME: Supports only a single frequency band */ - nbands = 1; + ptr = c->sample_buffer[0] + ndecisamples; + for (i = 0; i < c->nfreqbands; i++) { + for (j = 0; j < c->nchannels; j++) { + c->bands[i].msb_sample_buffer[j] = ptr; + ptr += nchsamples; + } + } - for (band = 0; band < nbands; band++) { - s->xll_navi.band_size[band] = 0; - for (seg = 0; seg < s->xll_segments; seg++) { - /* Note: The spec, ETSI TS 102 114 V1.4.1 (2012-09), says - * we should read a base value for segment_size from the - * stream, before reading the sizes of the channel sets. - * But that's apparently incorrect. */ - s->xll_navi.segment_size[band][seg] = 0; + return 0; +} - for (chset = 0; chset < s->xll_nch_sets; chset++) - if (band < s->xll_chsets[chset].num_freq_bands) { - s->xll_navi.chset_size[band][seg][chset] = - get_bits(&s->gb, s->xll_bits4seg_size) + 1; - s->xll_navi.segment_size[band][seg] += - s->xll_navi.chset_size[band][seg][chset]; - } - s->xll_navi.band_size[band] += s->xll_navi.segment_size[band][seg]; +static int chs_alloc_lsb_band_data(DCAXllDecoder *s, DCAXllChSet *c) +{ + int i, j, nsamples = 0; + int32_t *ptr; + + // Determine number of frequency bands that have MSB/LSB split + for (i = 0; i < c->nfreqbands; i++) + if (c->bands[i].lsb_section_size) + nsamples += s->nframesamples * c->nchannels; + if (!nsamples) + return 0; + + // Reallocate LSB sample buffer + av_fast_malloc(&c->sample_buffer[1], &c->sample_size[1], nsamples * sizeof(int32_t)); + if (!c->sample_buffer[1]) + return AVERROR(ENOMEM); + + ptr = c->sample_buffer[1]; + for (i = 0; i < c->nfreqbands; i++) { + if (c->bands[i].lsb_section_size) { + for (j = 0; j < c->nchannels; j++) { + c->bands[i].lsb_sample_buffer[j] = ptr; + ptr += s->nframesamples; + } + } else { + for (j = 0; j < c->nchannels; j++) + c->bands[i].lsb_sample_buffer[j] = NULL; } } - /* Align to 8 bits and skip 16-bit CRC. */ - skip_bits_long(&s->gb, 16 + ((-get_bits_count(&s->gb)) & 7)); - data_start = get_bits_count(&s->gb); - if (data_start + 8 * s->xll_navi.band_size[0] > asset_end) { - av_log(s->avctx, AV_LOG_ERROR, - "XLL: Data in NAVI table exceeds containing asset\n" - "start: %d (bit), size %u (bytes), end %d (bit), error %u\n", - data_start, s->xll_navi.band_size[0], asset_end, - data_start + 8 * s->xll_navi.band_size[0] - asset_end); - return AVERROR_INVALIDDATA; - } - init_get_bits(&s->xll_navi.gb, s->gb.buffer + data_start / 8, - 8 * s->xll_navi.band_size[0]); return 0; } -static void dca_xll_inv_adapt_pred(int *samples, int nsamples, unsigned order, - const int *prev, const uint8_t *q_ind) -{ - static const uint16_t table[0x81] = { - 0, 3070, 5110, 7140, 9156, 11154, 13132, 15085, - 17010, 18904, 20764, 22588, 24373, 26117, 27818, 29474, - 31085, 32648, 34164, 35631, 37049, 38418, 39738, 41008, - 42230, 43404, 44530, 45609, 46642, 47630, 48575, 49477, - 50337, 51157, 51937, 52681, 53387, 54059, 54697, 55302, - 55876, 56421, 56937, 57426, 57888, 58326, 58741, 59132, - 59502, 59852, 60182, 60494, 60789, 61066, 61328, 61576, - 61809, 62029, 62236, 62431, 62615, 62788, 62951, 63105, - 63250, 63386, 63514, 63635, 63749, 63855, 63956, 64051, - 64140, 64224, 64302, 64376, 64446, 64512, 64573, 64631, - 64686, 64737, 64785, 64830, 64873, 64913, 64950, 64986, - 65019, 65050, 65079, 65107, 65133, 65157, 65180, 65202, - 65222, 65241, 65259, 65275, 65291, 65306, 65320, 65333, - 65345, 65357, 65368, 65378, 65387, 65396, 65405, 65413, - 65420, 65427, 65434, 65440, 65446, 65451, 65456, 65461, - 65466, 65470, 65474, 65478, 65481, 65485, 65488, 65491, - 65535, /* Final value is for the -128 corner case, see below. */ - }; - int c[DCA_XLL_AORDER_MAX]; - int64_t s; - unsigned i, j; - - for (i = 0; i < order; i++) { - if (q_ind[i] & 1) - /* The index value 0xff corresponds to a lookup of entry 0x80 in - * the table, and no value is provided in the specification. */ - c[i] = -table[(q_ind[i] >> 1) + 1]; - else - c[i] = table[q_ind[i] >> 1]; - } - /* The description in the spec is a bit convoluted. We can convert - * the reflected values to direct values in place, using a - * sequence of reflections operating on two values. */ - for (i = 1; i < order; i++) { - /* i = 1: scale c[0] - * i = 2: reflect c[0] <-> c[1] - * i = 3: scale c[1], reflect c[0] <-> c[2] - * i = 4: reflect c[0] <-> c[3] reflect c[1] <-> c[2] - * ... */ - if (i & 1) - c[i / 2] += ((int64_t) c[i] * c[i / 2] + 0x8000) >> 16; - for (j = 0; j < i / 2; j++) { - int r0 = c[j]; - int r1 = c[i - j - 1]; - c[j] += ((int64_t) c[i] * r1 + 0x8000) >> 16; - c[i - j - 1] += ((int64_t) c[i] * r0 + 0x8000) >> 16; - } - } - /* Apply predictor. */ - /* NOTE: Processing samples in this order means that the - * predictor is applied to the newly reconstructed samples. */ - if (prev) { - for (i = 0; i < order; i++) { - for (j = s = 0; j < i; j++) - s += (int64_t) c[j] * samples[i - 1 - j]; - for (; j < order; j++) - s += (int64_t) c[j] * prev[DCA_XLL_AORDER_MAX + i - 1 - j]; - - samples[i] -= av_clip_intp2((s + 0x8000) >> 16, 24); - } - } - for (i = order; i < nsamples; i++) { - for (j = s = 0; j < order; j++) - s += (int64_t) c[j] * samples[i - 1 - j]; - - /* NOTE: Equations seem to imply addition, while the - * pseudocode seems to use subtraction.*/ - samples[i] -= av_clip_intp2((s + 0x8000) >> 16, 24); - } -} - -int ff_dca_xll_decode_audio(DCAContext *s, AVFrame *frame) -{ - /* FIXME: Decodes only the first frequency band. */ - int seg, chset_i; - - /* Coding parameters for each channel set. */ - struct coding_params { - int seg_type; - int rice_code_flag[16]; - int pancAuxABIT[16]; - int pancABIT0[16]; /* Not sure what this is */ - int pancABIT[16]; /* Not sure what this is */ - int nSamplPart0[16]; - } param_state[16]; - - GetBitContext *gb = &s->xll_navi.gb; - int *history; - - /* Layout: First the sample buffer for one segment per channel, - * followed by history buffers of DCA_XLL_AORDER_MAX samples for - * each channel. */ - av_fast_malloc(&s->xll_sample_buf, &s->xll_sample_buf_size, - (s->xll_smpl_in_seg + DCA_XLL_AORDER_MAX) * - s->xll_channels * sizeof(*s->xll_sample_buf)); - if (!s->xll_sample_buf) - return AVERROR(ENOMEM); +static int chs_parse_band_data(DCAXllDecoder *s, DCAXllChSet *c, int band, int seg, int band_data_end) +{ + DCAXllBand *b = &c->bands[band]; + int i, j, k; + + // Start unpacking MSB portion of the segment + if (!(seg && get_bits1(&s->gb))) { + // Unpack segment type + // 0 - distinct coding parameters for each channel + // 1 - common coding parameters for all channels + c->seg_common = get_bits1(&s->gb); + + // Determine number of coding parameters encoded in segment + k = c->seg_common ? 1 : c->nchannels; + + // Unpack Rice coding parameters + for (i = 0; i < k; i++) { + // Unpack Rice coding flag + // 0 - linear code, 1 - Rice code + c->rice_code_flag[i] = get_bits1(&s->gb); + if (!c->seg_common && c->rice_code_flag[i]) { + // Unpack Hybrid Rice coding flag + // 0 - Rice code, 1 - Hybrid Rice code + if (get_bits1(&s->gb)) + // Unpack binary code length for isolated samples + c->bitalloc_hybrid_linear[i] = get_bits(&s->gb, c->nabits) + 1; + else + // 0 indicates no Hybrid Rice coding + c->bitalloc_hybrid_linear[i] = 0; + } else { + // 0 indicates no Hybrid Rice coding + c->bitalloc_hybrid_linear[i] = 0; + } + } + + // Unpack coding parameters + for (i = 0; i < k; i++) { + if (seg == 0) { + // Unpack coding parameter for part A of segment 0 + c->bitalloc_part_a[i] = get_bits(&s->gb, c->nabits); - history = s->xll_sample_buf + s->xll_smpl_in_seg * s->xll_channels; - - for (seg = 0; seg < s->xll_segments; seg++) { - unsigned in_channel; - - for (chset_i = in_channel = 0; chset_i < s->xll_nch_sets; chset_i++) { - /* The spec isn't very explicit, but I think the NAVI sizes are in bytes. */ - int end_pos = get_bits_count(gb) + - 8 * s->xll_navi.chset_size[0][seg][chset_i]; - int i, j; - struct coding_params *params = ¶m_state[chset_i]; - /* I think this flag means that we should keep seg_type and - * other parameters from the previous segment. */ - int use_seg_state_code_param; - XllChSetSubHeader *chset = &s->xll_chsets[chset_i]; - if (in_channel >= s->avctx->channels) - /* FIXME: Could go directly to next segment */ - goto next_chset; - - if (s->avctx->sample_rate != chset->sampling_frequency) { - av_log(s->avctx, AV_LOG_WARNING, - "XLL: unexpected chset sample rate %d, expected %d\n", - chset->sampling_frequency, s->avctx->sample_rate); - goto next_chset; + // Adjust for the linear code + if (!c->rice_code_flag[i] && c->bitalloc_part_a[i]) + c->bitalloc_part_a[i]++; + + if (!c->seg_common) + c->nsamples_part_a[i] = b->adapt_pred_order[i]; + else + c->nsamples_part_a[i] = b->highest_pred_order; + } else { + c->bitalloc_part_a[i] = 0; + c->nsamples_part_a[i] = 0; } - if (seg != 0) - use_seg_state_code_param = get_bits(gb, 1); - else - use_seg_state_code_param = 0; - - if (!use_seg_state_code_param) { - int num_param_sets, i; - unsigned bits4ABIT; - - params->seg_type = get_bits(gb, 1); - num_param_sets = params->seg_type ? 1 : chset->channels; - - if (chset->bit_width > 16) { - bits4ABIT = 5; - } else { - if (chset->bit_width > 8) - bits4ABIT = 4; - else - bits4ABIT = 3; - if (s->xll_nch_sets > 1) - bits4ABIT++; + + // Unpack coding parameter for part B of segment + c->bitalloc_part_b[i] = get_bits(&s->gb, c->nabits); + + // Adjust for the linear code + if (!c->rice_code_flag[i] && c->bitalloc_part_b[i]) + c->bitalloc_part_b[i]++; + } + } + + // Unpack entropy codes + for (i = 0; i < c->nchannels; i++) { + int32_t *part_a, *part_b; + int nsamples_part_b; + + // Select index of coding parameters + k = c->seg_common ? 0 : i; + + // Slice the segment into parts A and B + part_a = b->msb_sample_buffer[i] + seg * s->nsegsamples; + part_b = part_a + c->nsamples_part_a[k]; + nsamples_part_b = s->nsegsamples - c->nsamples_part_a[k]; + + if (get_bits_left(&s->gb) < 0) + return AVERROR_INVALIDDATA; + + if (!c->rice_code_flag[k]) { + // Linear codes + // Unpack all residuals of part A of segment 0 + get_linear_array(&s->gb, part_a, c->nsamples_part_a[k], + c->bitalloc_part_a[k]); + + // Unpack all residuals of part B of segment 0 and others + get_linear_array(&s->gb, part_b, nsamples_part_b, + c->bitalloc_part_b[k]); + } else { + // Rice codes + // Unpack all residuals of part A of segment 0 + get_rice_array(&s->gb, part_a, c->nsamples_part_a[k], + c->bitalloc_part_a[k]); + + if (c->bitalloc_hybrid_linear[k]) { + // Hybrid Rice codes + // Unpack the number of isolated samples + int nisosamples = get_bits(&s->gb, s->nsegsamples_log2); + + // Set all locations to 0 + memset(part_b, 0, sizeof(*part_b) * nsamples_part_b); + + // Extract the locations of isolated samples and flag by -1 + for (j = 0; j < nisosamples; j++) { + int loc = get_bits(&s->gb, s->nsegsamples_log2); + if (loc >= nsamples_part_b) { + av_log(s->avctx, AV_LOG_ERROR, "Invalid isolated sample location\n"); + return AVERROR_INVALIDDATA; + } + part_b[loc] = -1; } - for (i = 0; i < num_param_sets; i++) { - params->rice_code_flag[i] = get_bits(gb, 1); - if (!params->seg_type && params->rice_code_flag[i] && get_bits(gb, 1)) - params->pancAuxABIT[i] = get_bits(gb, bits4ABIT) + 1; + // Unpack all residuals of part B of segment 0 and others + for (j = 0; j < nsamples_part_b; j++) { + if (part_b[j]) + part_b[j] = get_linear(&s->gb, c->bitalloc_hybrid_linear[k]); else - params->pancAuxABIT[i] = 0; + part_b[j] = get_rice(&s->gb, c->bitalloc_part_b[k]); } + } else { + // Rice codes + // Unpack all residuals of part B of segment 0 and others + get_rice_array(&s->gb, part_b, nsamples_part_b, c->bitalloc_part_b[k]); + } + } + } - for (i = 0; i < num_param_sets; i++) { - if (!seg) { - /* Parameters for part 1 */ - params->pancABIT0[i] = get_bits(gb, bits4ABIT); - if (params->rice_code_flag[i] == 0 && params->pancABIT0[i] > 0) - /* For linear code */ - params->pancABIT0[i]++; - - /* NOTE: In the spec, not indexed by band??? */ - if (params->seg_type == 0) - params->nSamplPart0[i] = chset->adapt_order[0][i]; - else - params->nSamplPart0[i] = chset->adapt_order_max[0]; - } else - params->nSamplPart0[i] = 0; - - /* Parameters for part 2 */ - params->pancABIT[i] = get_bits(gb, bits4ABIT); - if (params->rice_code_flag[i] == 0 && params->pancABIT[i] > 0) - /* For linear code */ - params->pancABIT[i]++; - } + // Unpack decimator history for frequency band 1 + if (seg == 0 && band == 1) { + int nbits = get_bits(&s->gb, 5) + 1; + for (i = 0; i < c->nchannels; i++) + for (j = 1; j < DCA_XLL_DECI_HISTORY_MAX; j++) + c->deci_history[i][j] = get_sbits_long(&s->gb, nbits); + } + + // Start unpacking LSB portion of the segment + if (b->lsb_section_size) { + // Skip to the start of LSB portion + if (ff_dca_seek_bits(&s->gb, band_data_end - b->lsb_section_size * 8)) { + av_log(s->avctx, AV_LOG_ERROR, "Read past end of XLL band data\n"); + return AVERROR_INVALIDDATA; + } + + // Unpack all LSB parts of residuals of this segment + for (i = 0; i < c->nchannels; i++) { + if (b->nscalablelsbs[i]) { + get_array(&s->gb, + b->lsb_sample_buffer[i] + seg * s->nsegsamples, + s->nsegsamples, b->nscalablelsbs[i]); } - for (i = 0; i < chset->channels; i++) { - int param_index = params->seg_type ? 0 : i; - int part0 = params->nSamplPart0[param_index]; - int bits = part0 ? params->pancABIT0[param_index] : 0; - int *sample_buf = s->xll_sample_buf + - (in_channel + i) * s->xll_smpl_in_seg; - - if (!params->rice_code_flag[param_index]) { - /* Linear code */ - if (bits) - for (j = 0; j < part0; j++) - sample_buf[j] = get_bits_sm(gb, bits); - else - memset(sample_buf, 0, part0 * sizeof(sample_buf[0])); + } + } - /* Second part */ - bits = params->pancABIT[param_index]; - if (bits) - for (j = part0; j < s->xll_smpl_in_seg; j++) - sample_buf[j] = get_bits_sm(gb, bits); - else - memset(sample_buf + part0, 0, - (s->xll_smpl_in_seg - part0) * sizeof(sample_buf[0])); - } else { - int aux_bits = params->pancAuxABIT[param_index]; - - for (j = 0; j < part0; j++) { - /* FIXME: Is this identical to Golomb code? */ - int t = get_unary(gb, 1, 33) << bits; - /* FIXME: Could move this test outside of the loop, for efficiency. */ - if (bits) - t |= get_bits(gb, bits); - sample_buf[j] = (t & 1) ? -(t >> 1) - 1 : (t >> 1); - } + // Skip to the end of band data + if (ff_dca_seek_bits(&s->gb, band_data_end)) { + av_log(s->avctx, AV_LOG_ERROR, "Read past end of XLL band data\n"); + return AVERROR_INVALIDDATA; + } - /* Second part */ - bits = params->pancABIT[param_index]; - - /* Follow the spec's suggestion of using the - * buffer also to store the hybrid-rice flags. */ - memset(sample_buf + part0, 0, - (s->xll_smpl_in_seg - part0) * sizeof(sample_buf[0])); - - if (aux_bits > 0) { - /* For hybrid rice encoding, some samples are linearly - * coded. According to the spec, "nBits4SamplLoci" bits - * are used for each index, but this value is not - * defined. I guess we should use log2(xll_smpl_in_seg) - * bits. */ - int count = get_bits(gb, s->xll_log_smpl_in_seg); - av_log(s->avctx, AV_LOG_DEBUG, "aux count %d (bits %d)\n", - count, s->xll_log_smpl_in_seg); - - for (j = 0; j < count; j++) - sample_buf[get_bits(gb, s->xll_log_smpl_in_seg)] = 1; - } - for (j = part0; j < s->xll_smpl_in_seg; j++) { - if (!sample_buf[j]) { - int t = get_unary(gb, 1, 33); - if (bits) - t = (t << bits) | get_bits(gb, bits); - sample_buf[j] = (t & 1) ? -(t >> 1) - 1 : (t >> 1); - } else - sample_buf[j] = get_bits_sm(gb, aux_bits); - } + return 0; +} + +static void av_cold chs_clear_band_data(DCAXllDecoder *s, DCAXllChSet *c, int band, int seg) +{ + DCAXllBand *b = &c->bands[band]; + int i, offset, nsamples; + + if (seg < 0) { + offset = 0; + nsamples = s->nframesamples; + } else { + offset = seg * s->nsegsamples; + nsamples = s->nsegsamples; + } + + for (i = 0; i < c->nchannels; i++) { + memset(b->msb_sample_buffer[i] + offset, 0, nsamples * sizeof(int32_t)); + if (b->lsb_section_size) + memset(b->lsb_sample_buffer[i] + offset, 0, nsamples * sizeof(int32_t)); + } + + if (seg <= 0 && band) + memset(c->deci_history, 0, sizeof(c->deci_history)); + + if (seg < 0) { + memset(b->nscalablelsbs, 0, sizeof(b->nscalablelsbs)); + memset(b->bit_width_adjust, 0, sizeof(b->bit_width_adjust)); + } +} + +static void chs_filter_band_data(DCAXllDecoder *s, DCAXllChSet *c, int band) +{ + DCAXllBand *b = &c->bands[band]; + int nsamples = s->nframesamples; + int i, j, k; + + // Inverse adaptive or fixed prediction + for (i = 0; i < c->nchannels; i++) { + int32_t *buf = b->msb_sample_buffer[i]; + int order = b->adapt_pred_order[i]; + if (order > 0) { + int coeff[DCA_XLL_ADAPT_PRED_ORDER_MAX]; + // Conversion from reflection coefficients to direct form coefficients + for (j = 0; j < order; j++) { + int rc = b->adapt_refl_coeff[i][j]; + for (k = 0; k < (j + 1) / 2; k++) { + int tmp1 = coeff[ k ]; + int tmp2 = coeff[j - k - 1]; + coeff[ k ] = tmp1 + mul16(rc, tmp2); + coeff[j - k - 1] = tmp2 + mul16(rc, tmp1); } + coeff[j] = rc; + } + // Inverse adaptive prediction + for (j = 0; j < nsamples - order; j++) { + int64_t err = 0; + for (k = 0; k < order; k++) + err += (int64_t)buf[j + k] * coeff[order - k - 1]; + buf[j + k] -= clip23(norm16(err)); } + } else { + // Inverse fixed coefficient prediction + for (j = 0; j < b->fixed_pred_order[i]; j++) + for (k = 1; k < nsamples; k++) + buf[k] += buf[k - 1]; + } + } - for (i = 0; i < chset->channels; i++) { - unsigned adapt_order = chset->adapt_order[0][i]; - int *sample_buf = s->xll_sample_buf + - (in_channel + i) * s->xll_smpl_in_seg; - int *prev = history + (in_channel + i) * DCA_XLL_AORDER_MAX; - - if (!adapt_order) { - unsigned order; - for (order = chset->fixed_order[0][i]; order > 0; order--) { - unsigned j; - for (j = 1; j < s->xll_smpl_in_seg; j++) - sample_buf[j] += sample_buf[j - 1]; - } - } else - /* Inverse adaptive prediction, in place. */ - dca_xll_inv_adapt_pred(sample_buf, s->xll_smpl_in_seg, - adapt_order, seg ? prev : NULL, - chset->lpc_refl_coeffs_q_ind[0][i]); - memcpy(prev, sample_buf + s->xll_smpl_in_seg - DCA_XLL_AORDER_MAX, - DCA_XLL_AORDER_MAX * sizeof(*prev)); + // Inverse pairwise channel decorrellation + if (b->decor_enabled) { + int32_t *tmp[DCA_XLL_CHANNELS_MAX]; + + for (i = 0; i < c->nchannels / 2; i++) { + int coeff = b->decor_coeff[i]; + if (coeff) { + s->dcadsp->decor(b->msb_sample_buffer[i * 2 + 1], + b->msb_sample_buffer[i * 2 ], + coeff, nsamples); } - for (i = 1; i < chset->channels; i += 2) { - int coeff = chset->pw_ch_pairs_coeffs[0][i / 2]; - if (coeff != 0) { - int *sample_buf = s->xll_sample_buf + - (in_channel + i) * s->xll_smpl_in_seg; - int *prev = sample_buf - s->xll_smpl_in_seg; - unsigned j; - for (j = 0; j < s->xll_smpl_in_seg; j++) - /* Shift is unspecified, but should apparently be 3. */ - sample_buf[j] += ((int64_t) coeff * prev[j] + 4) >> 3; - } + } + + // Reorder channel pointers to the original order + for (i = 0; i < c->nchannels; i++) + tmp[i] = b->msb_sample_buffer[i]; + + for (i = 0; i < c->nchannels; i++) + b->msb_sample_buffer[b->orig_order[i]] = tmp[i]; + } + + // Map output channel pointers for frequency band 0 + if (c->nfreqbands == 1) + for (i = 0; i < c->nchannels; i++) + s->output_samples[c->ch_remap[i]] = b->msb_sample_buffer[i]; +} + +static int chs_get_lsb_width(DCAXllDecoder *s, DCAXllChSet *c, int band, int ch) +{ + int adj = c->bands[band].bit_width_adjust[ch]; + int shift = c->bands[band].nscalablelsbs[ch]; + + if (s->fixed_lsb_width) + shift = s->fixed_lsb_width; + else if (shift && adj) + shift += adj - 1; + else + shift += adj; + + return shift; +} + +static void chs_assemble_msbs_lsbs(DCAXllDecoder *s, DCAXllChSet *c, int band) +{ + DCAXllBand *b = &c->bands[band]; + int n, ch, nsamples = s->nframesamples; + + for (ch = 0; ch < c->nchannels; ch++) { + int shift = chs_get_lsb_width(s, c, band, ch); + if (shift) { + int32_t *msb = b->msb_sample_buffer[ch]; + if (b->nscalablelsbs[ch]) { + int32_t *lsb = b->lsb_sample_buffer[ch]; + int adj = b->bit_width_adjust[ch]; + for (n = 0; n < nsamples; n++) + msb[n] = msb[n] * (1 << shift) + (lsb[n] << adj); + } else { + for (n = 0; n < nsamples; n++) + msb[n] = msb[n] * (1 << shift); } + } + } +} - if (s->xll_scalable_lsb) { - int lsb_start = end_pos - 8 * chset->lsb_fsize[0] - - 8 * (s->xll_banddata_crc & 2); - int done; - i = get_bits_count(gb); - if (i > lsb_start) { - av_log(s->avctx, AV_LOG_ERROR, - "chset data lsb exceeds NAVI size, end_pos %d, lsb_start %d, pos %d\n", - end_pos, lsb_start, i); - return AVERROR_INVALIDDATA; - } - if (i < lsb_start) - skip_bits_long(gb, lsb_start - i); - - for (i = done = 0; i < chset->channels; i++) { - int bits = chset->scalable_lsbs[0][i]; - if (bits > 0) { - /* The channel reordering is conceptually done - * before adding the lsb:s, so we need to do - * the inverse permutation here. */ - unsigned pi = chset->orig_chan_order_inv[0][i]; - int *sample_buf = s->xll_sample_buf + - (in_channel + pi) * s->xll_smpl_in_seg; - int adj = chset->bit_width_adj_per_ch[0][i]; - int msb_shift = bits; - unsigned j; - - if (adj > 0) - msb_shift += adj - 1; - - for (j = 0; j < s->xll_smpl_in_seg; j++) - sample_buf[j] = (sample_buf[j] << msb_shift) + - (get_bits(gb, bits) << adj); - - done += bits * s->xll_smpl_in_seg; +static int chs_assemble_freq_bands(DCAXllDecoder *s, DCAXllChSet *c) +{ + int ch, nsamples = s->nframesamples; + int32_t *ptr; + + av_assert1(c->nfreqbands > 1); + + // Reallocate frequency band assembly buffer + av_fast_malloc(&c->sample_buffer[2], &c->sample_size[2], + 2 * nsamples * c->nchannels * sizeof(int32_t)); + if (!c->sample_buffer[2]) + return AVERROR(ENOMEM); + + // Assemble frequency bands 0 and 1 + ptr = c->sample_buffer[2]; + for (ch = 0; ch < c->nchannels; ch++) { + int32_t *band0 = c->bands[0].msb_sample_buffer[ch]; + int32_t *band1 = c->bands[1].msb_sample_buffer[ch]; + + // Copy decimator history + memcpy(band0 - DCA_XLL_DECI_HISTORY_MAX, + c->deci_history[ch], sizeof(c->deci_history[0])); + + // Filter + s->dcadsp->assemble_freq_bands(ptr, band0, band1, + ff_dca_xll_band_coeff, + nsamples); + + // Remap output channel pointer to assembly buffer + s->output_samples[c->ch_remap[ch]] = ptr; + ptr += nsamples * 2; + } + + return 0; +} + +static int parse_common_header(DCAXllDecoder *s) +{ + int stream_ver, header_size, frame_size_nbits, nframesegs_log2; + + // XLL extension sync word + if (get_bits_long(&s->gb, 32) != DCA_SYNCWORD_XLL) { + av_log(s->avctx, AV_LOG_VERBOSE, "Invalid XLL sync word\n"); + return AVERROR(EAGAIN); + } + + // Version number + stream_ver = get_bits(&s->gb, 4) + 1; + if (stream_ver > 1) { + avpriv_request_sample(s->avctx, "XLL stream version %d", stream_ver); + return AVERROR_PATCHWELCOME; + } + + // Lossless frame header length + header_size = get_bits(&s->gb, 8) + 1; + + // Check CRC + if ((s->avctx->err_recognition & (AV_EF_CRCCHECK | AV_EF_CAREFUL)) + && ff_dca_check_crc(&s->gb, 32, header_size * 8)) { + av_log(s->avctx, AV_LOG_ERROR, "Invalid XLL common header checksum\n"); + return AVERROR_INVALIDDATA; + } + + // Number of bits used to read frame size + frame_size_nbits = get_bits(&s->gb, 5) + 1; + + // Number of bytes in a lossless frame + s->frame_size = get_bits_long(&s->gb, frame_size_nbits); + if (s->frame_size < 0 || s->frame_size >= DCA_XLL_PBR_BUFFER_MAX) { + av_log(s->avctx, AV_LOG_ERROR, "Invalid XLL frame size (%d bytes)\n", s->frame_size); + return AVERROR_INVALIDDATA; + } + s->frame_size++; + + // Number of channels sets per frame + s->nchsets = get_bits(&s->gb, 4) + 1; + if (s->nchsets > DCA_XLL_CHSETS_MAX) { + avpriv_request_sample(s->avctx, "%d XLL channel sets", s->nchsets); + return AVERROR_PATCHWELCOME; + } + + // Number of segments per frame + nframesegs_log2 = get_bits(&s->gb, 4); + s->nframesegs = 1 << nframesegs_log2; + if (s->nframesegs > 1024) { + av_log(s->avctx, AV_LOG_ERROR, "Too many segments per XLL frame\n"); + return AVERROR_INVALIDDATA; + } + + // Samples in segment per one frequency band for the first channel set + // Maximum value is 256 for sampling frequencies <= 48 kHz + // Maximum value is 512 for sampling frequencies > 48 kHz + s->nsegsamples_log2 = get_bits(&s->gb, 4); + if (!s->nsegsamples_log2) { + av_log(s->avctx, AV_LOG_ERROR, "Too few samples per XLL segment\n"); + return AVERROR_INVALIDDATA; + } + s->nsegsamples = 1 << s->nsegsamples_log2; + if (s->nsegsamples > 512) { + av_log(s->avctx, AV_LOG_ERROR, "Too many samples per XLL segment\n"); + return AVERROR_INVALIDDATA; + } + + // Samples in frame per one frequency band for the first channel set + s->nframesamples_log2 = s->nsegsamples_log2 + nframesegs_log2; + s->nframesamples = 1 << s->nframesamples_log2; + if (s->nframesamples > 65536) { + av_log(s->avctx, AV_LOG_ERROR, "Too many samples per XLL frame\n"); + return AVERROR_INVALIDDATA; + } + + // Number of bits used to read segment size + s->seg_size_nbits = get_bits(&s->gb, 5) + 1; + + // Presence of CRC16 within each frequency band + // 0 - No CRC16 within band + // 1 - CRC16 placed at the end of MSB0 + // 2 - CRC16 placed at the end of MSB0 and LSB0 + // 3 - CRC16 placed at the end of MSB0 and LSB0 and other frequency bands + s->band_crc_present = get_bits(&s->gb, 2); + + // MSB/LSB split flag + s->scalable_lsbs = get_bits1(&s->gb); + + // Channel position mask + s->ch_mask_nbits = get_bits(&s->gb, 5) + 1; + + // Fixed LSB width + if (s->scalable_lsbs) + s->fixed_lsb_width = get_bits(&s->gb, 4); + else + s->fixed_lsb_width = 0; + + // Reserved + // Byte align + // Header CRC16 protection + if (ff_dca_seek_bits(&s->gb, header_size * 8)) { + av_log(s->avctx, AV_LOG_ERROR, "Read past end of XLL common header\n"); + return AVERROR_INVALIDDATA; + } + + return 0; +} + +static int is_hier_dmix_chset(DCAXllChSet *c) +{ + return !c->primary_chset && c->dmix_embedded && c->hier_chset; +} + +static DCAXllChSet *find_next_hier_dmix_chset(DCAXllDecoder *s, DCAXllChSet *c) +{ + if (c->hier_chset) + while (++c < &s->chset[s->nchsets]) + if (is_hier_dmix_chset(c)) + return c; + + return NULL; +} + +static void prescale_down_mix(DCAXllChSet *c, DCAXllChSet *o) +{ + int i, j, *coeff_ptr = c->dmix_coeff; + + for (i = 0; i < c->hier_ofs; i++) { + int scale = o->dmix_scale[i]; + int scale_inv = o->dmix_scale_inv[i]; + c->dmix_scale[i] = mul15(c->dmix_scale[i], scale); + c->dmix_scale_inv[i] = mul16(c->dmix_scale_inv[i], scale_inv); + for (j = 0; j < c->nchannels; j++) { + int coeff = mul16(*coeff_ptr, scale_inv); + *coeff_ptr++ = mul15(coeff, o->dmix_scale[c->hier_ofs + j]); + } + } +} + +static int parse_sub_headers(DCAXllDecoder *s, DCAExssAsset *asset) +{ + DCAContext *dca = s->avctx->priv_data; + DCAXllChSet *c; + int i, ret; + + // Parse channel set headers + s->nfreqbands = 0; + s->nchannels = 0; + s->nreschsets = 0; + for (i = 0, c = s->chset; i < s->nchsets; i++, c++) { + c->hier_ofs = s->nchannels; + if ((ret = chs_parse_header(s, c, asset)) < 0) + return ret; + if (c->nfreqbands > s->nfreqbands) + s->nfreqbands = c->nfreqbands; + if (c->hier_chset) + s->nchannels += c->nchannels; + if (c->residual_encode != (1 << c->nchannels) - 1) + s->nreschsets++; + } + + // Pre-scale downmixing coefficients for all non-primary channel sets + for (i = s->nchsets - 1, c = &s->chset[i]; i > 0; i--, c--) { + if (is_hier_dmix_chset(c)) { + DCAXllChSet *o = find_next_hier_dmix_chset(s, c); + if (o) + prescale_down_mix(c, o); + } + } + + // Determine number of active channel sets to decode + switch (dca->request_channel_layout) { + case DCA_SPEAKER_LAYOUT_STEREO: + s->nactivechsets = 1; + break; + case DCA_SPEAKER_LAYOUT_5POINT0: + case DCA_SPEAKER_LAYOUT_5POINT1: + s->nactivechsets = (s->chset[0].nchannels < 5 && s->nchsets > 1) ? 2 : 1; + break; + default: + s->nactivechsets = s->nchsets; + break; + } + + return 0; +} + +static int parse_navi_table(DCAXllDecoder *s) +{ + int chs, seg, band, navi_nb, navi_pos, *navi_ptr; + DCAXllChSet *c; + + // Determine size of NAVI table + navi_nb = s->nfreqbands * s->nframesegs * s->nchsets; + if (navi_nb > 1024) { + av_log(s->avctx, AV_LOG_ERROR, "Too many NAVI entries (%d)\n", navi_nb); + return AVERROR_INVALIDDATA; + } + + // Reallocate NAVI table + av_fast_malloc(&s->navi, &s->navi_size, navi_nb * sizeof(*s->navi)); + if (!s->navi) + return AVERROR(ENOMEM); + + // Parse NAVI + navi_pos = get_bits_count(&s->gb); + navi_ptr = s->navi; + for (band = 0; band < s->nfreqbands; band++) { + for (seg = 0; seg < s->nframesegs; seg++) { + for (chs = 0, c = s->chset; chs < s->nchsets; chs++, c++) { + int size = 0; + if (c->nfreqbands > band) { + size = get_bits_long(&s->gb, s->seg_size_nbits); + if (size < 0 || size >= s->frame_size) { + av_log(s->avctx, AV_LOG_ERROR, "Invalid NAVI segment size (%d bytes)\n", size); + return AVERROR_INVALIDDATA; } + size++; } - if (done > 8 * chset->lsb_fsize[0]) { - av_log(s->avctx, AV_LOG_ERROR, - "chset lsb exceeds lsb_size\n"); - return AVERROR_INVALIDDATA; - } + *navi_ptr++ = size; } + } + } + + // Byte align + // CRC16 + skip_bits(&s->gb, -get_bits_count(&s->gb) & 7); + skip_bits(&s->gb, 16); + + // Check CRC + if ((s->avctx->err_recognition & (AV_EF_CRCCHECK | AV_EF_CAREFUL)) + && ff_dca_check_crc(&s->gb, navi_pos, get_bits_count(&s->gb))) { + av_log(s->avctx, AV_LOG_ERROR, "Invalid NAVI checksum\n"); + return AVERROR_INVALIDDATA; + } + + return 0; +} - /* Store output. */ - for (i = 0; i < chset->channels; i++) { - int *sample_buf = s->xll_sample_buf + - (in_channel + i) * s->xll_smpl_in_seg; - int shift = 1 - chset->bit_resolution; - int out_channel = chset->orig_chan_order[0][i]; - float *out; - - /* XLL uses the channel order C, L, R, and we want L, - * R, C. FIXME: Generalize. */ - if (chset->ch_mask_enabled && - (chset->ch_mask & 7) == 7 && out_channel < 3) - out_channel = out_channel ? out_channel - 1 : 2; - - out_channel += in_channel; - if (out_channel >= s->avctx->channels) - continue; - - out = (float *) frame->extended_data[out_channel]; - out += seg * s->xll_smpl_in_seg; - - /* NOTE: A one bit means residual encoding is *not* used. */ - if ((chset->residual_encode >> i) & 1) { - /* Replace channel samples. - * FIXME: Most likely not the right thing to do. */ - for (j = 0; j < s->xll_smpl_in_seg; j++) - out[j] = ldexpf(sample_buf[j], shift); - } else { - /* Add residual signal to core channel */ - for (j = 0; j < s->xll_smpl_in_seg; j++) - out[j] += ldexpf(sample_buf[j], shift); +static int parse_band_data(DCAXllDecoder *s) +{ + int ret, chs, seg, band, navi_pos, *navi_ptr; + DCAXllChSet *c; + + for (chs = 0, c = s->chset; chs < s->nactivechsets; chs++, c++) { + if ((ret = chs_alloc_msb_band_data(s, c)) < 0) + return ret; + if ((ret = chs_alloc_lsb_band_data(s, c)) < 0) + return ret; + } + + navi_pos = get_bits_count(&s->gb); + navi_ptr = s->navi; + for (band = 0; band < s->nfreqbands; band++) { + for (seg = 0; seg < s->nframesegs; seg++) { + for (chs = 0, c = s->chset; chs < s->nchsets; chs++, c++) { + if (c->nfreqbands > band) { + navi_pos += *navi_ptr * 8; + if (navi_pos > s->gb.size_in_bits) { + av_log(s->avctx, AV_LOG_ERROR, "Invalid NAVI position\n"); + return AVERROR_INVALIDDATA; + } + if (chs < s->nactivechsets && + (ret = chs_parse_band_data(s, c, band, seg, navi_pos)) < 0) { + if (s->avctx->err_recognition & AV_EF_EXPLODE) + return ret; + chs_clear_band_data(s, c, band, seg); + } + s->gb.index = navi_pos; } + navi_ptr++; } + } + } - if (chset->downmix_coeff_code_embedded && - !chset->primary_ch_set && chset->hier_chset) { - /* Undo hierarchical downmix of earlier channels. */ - unsigned mix_channel; - for (mix_channel = 0; mix_channel < in_channel; mix_channel++) { - float *mix_buf; - const int *col; - float coeff; - unsigned row; - /* Similar channel reorder C, L, R vs L, R, C reorder. */ - if (chset->ch_mask_enabled && - (chset->ch_mask & 7) == 7 && mix_channel < 3) - mix_buf = (float *) frame->extended_data[mix_channel ? mix_channel - 1 : 2]; - else - mix_buf = (float *) frame->extended_data[mix_channel]; - - mix_buf += seg * s->xll_smpl_in_seg; - col = &chset->downmix_coeffs[mix_channel * (chset->channels + 1)]; - - /* Scale */ - coeff = ldexpf(col[0], -16); - for (j = 0; j < s->xll_smpl_in_seg; j++) - mix_buf[j] *= coeff; - - for (row = 0; - row < chset->channels && in_channel + row < s->avctx->channels; - row++) - if (col[row + 1]) { - const float *new_channel = - (const float *) frame->extended_data[in_channel + row]; - new_channel += seg * s->xll_smpl_in_seg; - coeff = ldexpf(col[row + 1], -15); - for (j = 0; j < s->xll_smpl_in_seg; j++) - mix_buf[j] -= coeff * new_channel[j]; - } + return 0; +} + +static int parse_frame(DCAXllDecoder *s, uint8_t *data, int size, DCAExssAsset *asset) +{ + int ret; + + if ((ret = init_get_bits8(&s->gb, data, size)) < 0) + return ret; + if ((ret = parse_common_header(s)) < 0) + return ret; + if ((ret = parse_sub_headers(s, asset)) < 0) + return ret; + if ((ret = parse_navi_table(s)) < 0) + return ret; + if ((ret = parse_band_data(s)) < 0) + return ret; + if (ff_dca_seek_bits(&s->gb, s->frame_size * 8)) { + av_log(s->avctx, AV_LOG_ERROR, "Read past end of XLL frame\n"); + return AVERROR_INVALIDDATA; + } + return ret; +} + +static void clear_pbr(DCAXllDecoder *s) +{ + s->pbr_length = 0; + s->pbr_delay = 0; +} + +static int copy_to_pbr(DCAXllDecoder *s, uint8_t *data, int size, int delay) +{ + if (size > DCA_XLL_PBR_BUFFER_MAX) + return AVERROR(ENOSPC); + + if (!s->pbr_buffer && !(s->pbr_buffer = av_malloc(DCA_XLL_PBR_BUFFER_MAX + DCA_BUFFER_PADDING_SIZE))) + return AVERROR(ENOMEM); + + memcpy(s->pbr_buffer, data, size); + s->pbr_length = size; + s->pbr_delay = delay; + return 0; +} + +static int parse_frame_no_pbr(DCAXllDecoder *s, uint8_t *data, int size, DCAExssAsset *asset) +{ + int ret = parse_frame(s, data, size, asset); + + // If XLL packet data didn't start with a sync word, we must have jumped + // right into the middle of PBR smoothing period + if (ret == AVERROR(EAGAIN) && asset->xll_sync_present && asset->xll_sync_offset < size) { + // Skip to the next sync word in this packet + data += asset->xll_sync_offset; + size -= asset->xll_sync_offset; + + // If decoding delay is set, put the frame into PBR buffer and return + // failure code. Higher level decoder is expected to switch to lossy + // core decoding or mute its output until decoding delay expires. + if (asset->xll_delay_nframes > 0) { + if ((ret = copy_to_pbr(s, data, size, asset->xll_delay_nframes)) < 0) + return ret; + return AVERROR(EAGAIN); + } + + // No decoding delay, just parse the frame in place + ret = parse_frame(s, data, size, asset); + } + + if (ret < 0) + return ret; + + if (s->frame_size > size) + return AVERROR(EINVAL); + + // If the XLL decoder didn't consume full packet, start PBR smoothing period + if (s->frame_size < size) + if ((ret = copy_to_pbr(s, data + s->frame_size, size - s->frame_size, 0)) < 0) + return ret; + + return 0; +} + +static int parse_frame_pbr(DCAXllDecoder *s, uint8_t *data, int size, DCAExssAsset *asset) +{ + int ret; + + if (size > DCA_XLL_PBR_BUFFER_MAX - s->pbr_length) { + ret = AVERROR(ENOSPC); + goto fail; + } + + memcpy(s->pbr_buffer + s->pbr_length, data, size); + s->pbr_length += size; + + // Respect decoding delay after synchronization error + if (s->pbr_delay > 0 && --s->pbr_delay) + return AVERROR(EAGAIN); + + if ((ret = parse_frame(s, s->pbr_buffer, s->pbr_length, asset)) < 0) + goto fail; + + if (s->frame_size > s->pbr_length) { + ret = AVERROR(EINVAL); + goto fail; + } + + if (s->frame_size == s->pbr_length) { + // End of PBR smoothing period + clear_pbr(s); + } else { + s->pbr_length -= s->frame_size; + memmove(s->pbr_buffer, s->pbr_buffer + s->frame_size, s->pbr_length); + } + + return 0; + +fail: + // For now, throw out all PBR state on failure. + // Perhaps we can be smarter and try to resync somehow. + clear_pbr(s); + return ret; +} + +int ff_dca_xll_parse(DCAXllDecoder *s, uint8_t *data, DCAExssAsset *asset) +{ + int ret; + + if (s->hd_stream_id != asset->hd_stream_id) { + clear_pbr(s); + s->hd_stream_id = asset->hd_stream_id; + } + + if (s->pbr_length) + ret = parse_frame_pbr(s, data + asset->xll_offset, asset->xll_size, asset); + else + ret = parse_frame_no_pbr(s, data + asset->xll_offset, asset->xll_size, asset); + + return ret; +} + +static void undo_down_mix(DCAXllDecoder *s, DCAXllChSet *o, int band) +{ + int i, j, k, nchannels = 0, *coeff_ptr = o->dmix_coeff; + DCAXllChSet *c; + + for (i = 0, c = s->chset; i < s->nactivechsets; i++, c++) { + if (!c->hier_chset) + continue; + + av_assert1(band < c->nfreqbands); + for (j = 0; j < c->nchannels; j++) { + for (k = 0; k < o->nchannels; k++) { + int coeff = *coeff_ptr++; + if (coeff) { + s->dcadsp->dmix_sub(c->bands[band].msb_sample_buffer[j], + o->bands[band].msb_sample_buffer[k], + coeff, s->nframesamples); + if (band) + s->dcadsp->dmix_sub(c->deci_history[j], + o->deci_history[k], + coeff, DCA_XLL_DECI_HISTORY_MAX); } } + } -next_chset: - in_channel += chset->channels; - /* Skip to next channel set using the NAVI info. */ - i = get_bits_count(gb); - if (i > end_pos) { - av_log(s->avctx, AV_LOG_ERROR, - "chset data exceeds NAVI size\n"); - return AVERROR_INVALIDDATA; + nchannels += c->nchannels; + if (nchannels >= o->hier_ofs) + break; + } +} + +static void scale_down_mix(DCAXllDecoder *s, DCAXllChSet *o, int band) +{ + int i, j, nchannels = 0; + DCAXllChSet *c; + + for (i = 0, c = s->chset; i < s->nactivechsets; i++, c++) { + if (!c->hier_chset) + continue; + + av_assert1(band < c->nfreqbands); + for (j = 0; j < c->nchannels; j++) { + int scale = o->dmix_scale[nchannels++]; + if (scale != (1 << 15)) { + s->dcadsp->dmix_scale(c->bands[band].msb_sample_buffer[j], + scale, s->nframesamples); + if (band) + s->dcadsp->dmix_scale(c->deci_history[j], + scale, DCA_XLL_DECI_HISTORY_MAX); } - if (i < end_pos) - skip_bits_long(gb, end_pos - i); + } + + if (nchannels >= o->hier_ofs) + break; + } +} + +// Clear all band data and replace non-residual encoded channels with lossy +// counterparts +static void av_cold force_lossy_output(DCAXllDecoder *s, DCAXllChSet *c) +{ + DCAContext *dca = s->avctx->priv_data; + int band, ch; + + for (band = 0; band < c->nfreqbands; band++) + chs_clear_band_data(s, c, band, -1); + + for (ch = 0; ch < c->nchannels; ch++) { + if (!(c->residual_encode & (1 << ch))) + continue; + if (ff_dca_core_map_spkr(&dca->core, c->ch_remap[ch]) < 0) + continue; + c->residual_encode &= ~(1 << ch); + } +} + +static int combine_residual_frame(DCAXllDecoder *s, DCAXllChSet *c) +{ + DCAContext *dca = s->avctx->priv_data; + int ch, nsamples = s->nframesamples; + DCAXllChSet *o; + + // Verify that core is compatible + if (!(dca->packet & DCA_PACKET_CORE)) { + av_log(s->avctx, AV_LOG_ERROR, "Residual encoded channels are present without core\n"); + return AVERROR(EINVAL); + } + + if (c->freq != dca->core.output_rate) { + av_log(s->avctx, AV_LOG_WARNING, "Sample rate mismatch between core (%d Hz) and XLL (%d Hz)\n", dca->core.output_rate, c->freq); + return AVERROR_INVALIDDATA; + } + + if (nsamples != dca->core.npcmsamples) { + av_log(s->avctx, AV_LOG_WARNING, "Number of samples per frame mismatch between core (%d) and XLL (%d)\n", dca->core.npcmsamples, nsamples); + return AVERROR_INVALIDDATA; + } + + // See if this channel set is downmixed and find the next channel set in + // hierarchy. If downmixed, undo core pre-scaling before combining with + // residual (residual is not scaled). + o = find_next_hier_dmix_chset(s, c); + + // Reduce core bit width and combine with residual + for (ch = 0; ch < c->nchannels; ch++) { + int n, spkr, shift, round; + int32_t *src, *dst; + + if (c->residual_encode & (1 << ch)) + continue; + + // Map this channel to core speaker + spkr = ff_dca_core_map_spkr(&dca->core, c->ch_remap[ch]); + if (spkr < 0) { + av_log(s->avctx, AV_LOG_WARNING, "Residual encoded channel (%d) references unavailable core channel\n", c->ch_remap[ch]); + return AVERROR_INVALIDDATA; + } + + // Account for LSB width + shift = 24 - c->pcm_bit_res + chs_get_lsb_width(s, c, 0, ch); + if (shift > 24) { + av_log(s->avctx, AV_LOG_WARNING, "Invalid core shift (%d bits)\n", shift); + return AVERROR_INVALIDDATA; + } + + round = shift > 0 ? 1 << (shift - 1) : 0; + + src = dca->core.output_samples[spkr]; + dst = c->bands[0].msb_sample_buffer[ch]; + if (o) { + // Undo embedded core downmix pre-scaling + int scale_inv = o->dmix_scale_inv[c->hier_ofs + ch]; + for (n = 0; n < nsamples; n++) + dst[n] += clip23((mul16(src[n], scale_inv) + round) >> shift); + } else { + // No downmix scaling + for (n = 0; n < nsamples; n++) + dst[n] += (src[n] + round) >> shift; } } + return 0; } + +int ff_dca_xll_filter_frame(DCAXllDecoder *s, AVFrame *frame) +{ + AVCodecContext *avctx = s->avctx; + DCAContext *dca = avctx->priv_data; + DCAExssAsset *asset = &dca->exss.assets[0]; + DCAXllChSet *p = &s->chset[0], *c; + enum AVMatrixEncoding matrix_encoding = AV_MATRIX_ENCODING_NONE; + int i, j, k, ret, shift, nsamples, request_mask; + int ch_remap[DCA_SPEAKER_COUNT]; + + // Force lossy downmixed output during recovery + if (dca->packet & DCA_PACKET_RECOVERY) { + for (i = 0, c = s->chset; i < s->nchsets; i++, c++) { + if (i < s->nactivechsets) + force_lossy_output(s, c); + + if (!c->primary_chset) + c->dmix_embedded = 0; + } + + s->scalable_lsbs = 0; + s->fixed_lsb_width = 0; + } + + // Filter frequency bands for active channel sets + s->output_mask = 0; + for (i = 0, c = s->chset; i < s->nactivechsets; i++, c++) { + chs_filter_band_data(s, c, 0); + + if (c->residual_encode != (1 << c->nchannels) - 1 + && (ret = combine_residual_frame(s, c)) < 0) + return ret; + + if (s->scalable_lsbs) + chs_assemble_msbs_lsbs(s, c, 0); + + if (c->nfreqbands > 1) { + chs_filter_band_data(s, c, 1); + chs_assemble_msbs_lsbs(s, c, 1); + } + + s->output_mask |= c->ch_mask; + } + + // Undo hierarchial downmix and/or apply scaling + for (i = 1, c = &s->chset[1]; i < s->nchsets; i++, c++) { + if (!is_hier_dmix_chset(c)) + continue; + + if (i >= s->nactivechsets) { + for (j = 0; j < c->nfreqbands; j++) + if (c->bands[j].dmix_embedded) + scale_down_mix(s, c, j); + break; + } + + for (j = 0; j < c->nfreqbands; j++) + if (c->bands[j].dmix_embedded) + undo_down_mix(s, c, j); + } + + // Assemble frequency bands for active channel sets + if (s->nfreqbands > 1) { + for (i = 0; i < s->nactivechsets; i++) + if ((ret = chs_assemble_freq_bands(s, &s->chset[i])) < 0) + return ret; + } + + // Normalize to regular 5.1 layout if downmixing + if (dca->request_channel_layout) { + if (s->output_mask & DCA_SPEAKER_MASK_Lss) { + s->output_samples[DCA_SPEAKER_Ls] = s->output_samples[DCA_SPEAKER_Lss]; + s->output_mask = (s->output_mask & ~DCA_SPEAKER_MASK_Lss) | DCA_SPEAKER_MASK_Ls; + } + if (s->output_mask & DCA_SPEAKER_MASK_Rss) { + s->output_samples[DCA_SPEAKER_Rs] = s->output_samples[DCA_SPEAKER_Rss]; + s->output_mask = (s->output_mask & ~DCA_SPEAKER_MASK_Rss) | DCA_SPEAKER_MASK_Rs; + } + } + + // Handle downmixing to stereo request + if (dca->request_channel_layout == DCA_SPEAKER_LAYOUT_STEREO + && DCA_HAS_STEREO(s->output_mask) && p->dmix_embedded + && (p->dmix_type == DCA_DMIX_TYPE_LoRo || + p->dmix_type == DCA_DMIX_TYPE_LtRt)) + request_mask = DCA_SPEAKER_LAYOUT_STEREO; + else + request_mask = s->output_mask; + if (!ff_dca_set_channel_layout(avctx, ch_remap, request_mask)) + return AVERROR(EINVAL); + + avctx->sample_rate = p->freq << (s->nfreqbands - 1); + + switch (p->storage_bit_res) { + case 16: + avctx->sample_fmt = AV_SAMPLE_FMT_S16P; + break; + case 24: + avctx->sample_fmt = AV_SAMPLE_FMT_S32P; + break; + default: + return AVERROR(EINVAL); + } + + avctx->bits_per_raw_sample = p->storage_bit_res; + avctx->profile = FF_PROFILE_DTS_HD_MA; + avctx->bit_rate = 0; + + frame->nb_samples = nsamples = s->nframesamples << (s->nfreqbands - 1); + if ((ret = ff_get_buffer(avctx, frame, 0)) < 0) + return ret; + + // Downmix primary channel set to stereo + if (request_mask != s->output_mask) { + ff_dca_downmix_to_stereo_fixed(s->dcadsp, s->output_samples, + p->dmix_coeff, nsamples, + s->output_mask); + } + + shift = p->storage_bit_res - p->pcm_bit_res; + for (i = 0; i < avctx->channels; i++) { + int32_t *samples = s->output_samples[ch_remap[i]]; + if (frame->format == AV_SAMPLE_FMT_S16P) { + int16_t *plane = (int16_t *)frame->extended_data[i]; + for (k = 0; k < nsamples; k++) + plane[k] = av_clip_int16(samples[k] * (1 << shift)); + } else { + int32_t *plane = (int32_t *)frame->extended_data[i]; + for (k = 0; k < nsamples; k++) + plane[k] = clip23(samples[k] * (1 << shift)) * (1 << 8); + } + } + + if (!asset->one_to_one_map_ch_to_spkr) { + if (asset->representation_type == DCA_REPR_TYPE_LtRt) + matrix_encoding = AV_MATRIX_ENCODING_DOLBY; + else if (asset->representation_type == DCA_REPR_TYPE_LhRh) + matrix_encoding = AV_MATRIX_ENCODING_DOLBYHEADPHONE; + } else if (request_mask != s->output_mask && p->dmix_type == DCA_DMIX_TYPE_LtRt) { + matrix_encoding = AV_MATRIX_ENCODING_DOLBY; + } + if ((ret = ff_side_data_update_matrix_encoding(frame, matrix_encoding)) < 0) + return ret; + + return 0; +} + +av_cold void ff_dca_xll_flush(DCAXllDecoder *s) +{ + clear_pbr(s); +} + +av_cold void ff_dca_xll_close(DCAXllDecoder *s) +{ + DCAXllChSet *c; + int i, j; + + for (i = 0, c = s->chset; i < DCA_XLL_CHSETS_MAX; i++, c++) { + for (j = 0; j < DCA_XLL_SAMPLE_BUFFERS_MAX; j++) { + av_freep(&c->sample_buffer[j]); + c->sample_size[j] = 0; + } + } + + av_freep(&s->navi); + s->navi_size = 0; + + av_freep(&s->pbr_buffer); + clear_pbr(s); +}