X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=libavcodec%2Fdcaenc.c;h=c1d72bfd16b08e830f244e9843d19e2528566ec8;hb=752dd1952a7b68094f8b2d5da0ca0df94190cf0e;hp=3c5c33cda25648ce40ae8647085579d027572486;hpb=d168fe14e949b100f9552198339502331405d8ad;p=ffmpeg diff --git a/libavcodec/dcaenc.c b/libavcodec/dcaenc.c index 3c5c33cda25..c1d72bfd16b 100644 --- a/libavcodec/dcaenc.c +++ b/libavcodec/dcaenc.c @@ -25,8 +25,12 @@ #include "libavutil/channel_layout.h" #include "libavutil/common.h" #include "libavutil/ffmath.h" +#include "libavutil/opt.h" #include "avcodec.h" #include "dca.h" +#include "dcaadpcm.h" +#include "dcamath.h" +#include "dca_core.h" #include "dcadata.h" #include "dcaenc.h" #include "internal.h" @@ -44,8 +48,15 @@ #define SUBBAND_SAMPLES (SUBFRAMES * SUBSUBFRAMES * 8) #define AUBANDS 25 +typedef struct CompressionOptions { + int adpcm_mode; +} CompressionOptions; + typedef struct DCAEncContext { + AVClass *class; PutBitContext pb; + DCAADPCMEncContext adpcm_ctx; + CompressionOptions options; int frame_size; int frame_bits; int fullband_channels; @@ -61,10 +72,13 @@ typedef struct DCAEncContext { int32_t lfe_peak_cb; const int8_t *channel_order_tab; ///< channel reordering table, lfe and non lfe + int32_t prediction_mode[MAX_CHANNELS][DCAENC_SUBBANDS]; + int32_t adpcm_history[MAX_CHANNELS][DCAENC_SUBBANDS][DCA_ADPCM_COEFFS * 2]; int32_t history[MAX_CHANNELS][512]; /* This is a circular buffer */ - int32_t subband[MAX_CHANNELS][DCAENC_SUBBANDS][SUBBAND_SAMPLES]; + int32_t *subband[MAX_CHANNELS][DCAENC_SUBBANDS]; int32_t quantized[MAX_CHANNELS][DCAENC_SUBBANDS][SUBBAND_SAMPLES]; int32_t peak_cb[MAX_CHANNELS][DCAENC_SUBBANDS]; + int32_t diff_peak_cb[MAX_CHANNELS][DCAENC_SUBBANDS]; ///< expected peak of residual signal int32_t downsampled_lfe[DCA_LFE_SAMPLES]; int32_t masking_curve_cb[SUBSUBFRAMES][256]; int32_t bit_allocation_sel[MAX_CHANNELS]; @@ -77,6 +91,7 @@ typedef struct DCAEncContext { int32_t worst_quantization_noise; int32_t worst_noise_ever; int consumed_bits; + int consumed_adpcm_bits; ///< Number of bits to transmit ADPCM related info } DCAEncContext; static int32_t cos_table[2048]; @@ -107,18 +122,52 @@ static double gammafilter(int i, double f) return 20 * log10(h); } +static int subband_bufer_alloc(DCAEncContext *c) +{ + int ch, band; + int32_t *bufer = av_calloc(MAX_CHANNELS * DCAENC_SUBBANDS * + (SUBBAND_SAMPLES + DCA_ADPCM_COEFFS), + sizeof(int32_t)); + if (!bufer) + return -1; + + /* we need a place for DCA_ADPCM_COEFF samples from previous frame + * to calc prediction coefficients for each subband */ + for (ch = 0; ch < MAX_CHANNELS; ch++) { + for (band = 0; band < DCAENC_SUBBANDS; band++) { + c->subband[ch][band] = bufer + + ch * DCAENC_SUBBANDS * (SUBBAND_SAMPLES + DCA_ADPCM_COEFFS) + + band * (SUBBAND_SAMPLES + DCA_ADPCM_COEFFS) + DCA_ADPCM_COEFFS; + } + } + return 0; +} + +static void subband_bufer_free(DCAEncContext *c) +{ + int32_t *bufer = c->subband[0][0] - DCA_ADPCM_COEFFS; + av_freep(&bufer); +} + static int encode_init(AVCodecContext *avctx) { DCAEncContext *c = avctx->priv_data; uint64_t layout = avctx->channel_layout; int i, j, min_frame_bits; + if (subband_bufer_alloc(c)) + return AVERROR(ENOMEM); + c->fullband_channels = c->channels = avctx->channels; c->lfe_channel = (avctx->channels == 3 || avctx->channels == 6); c->band_interpolation = band_interpolation[1]; c->band_spectrum = band_spectrum[1]; c->worst_quantization_noise = -2047; c->worst_noise_ever = -2047; + c->consumed_adpcm_bits = 0; + + if (ff_dcaadpcm_init(&c->adpcm_ctx)) + return AVERROR(ENOMEM); if (!layout) { av_log(avctx, AV_LOG_WARNING, "No channel layout specified. The " @@ -150,6 +199,12 @@ static int encode_init(AVCodecContext *avctx) } /* 6 - no Huffman */ c->bit_allocation_sel[i] = 6; + + for (j = 0; j < DCAENC_SUBBANDS; j++) { + /* -1 - no ADPCM */ + c->prediction_mode[i][j] = -1; + memset(c->adpcm_history[i][j], 0, sizeof(int32_t)*DCA_ADPCM_COEFFS); + } } for (i = 0; i < 9; i++) { @@ -238,6 +293,16 @@ static int encode_init(AVCodecContext *avctx) return 0; } +static av_cold int encode_close(AVCodecContext *avctx) +{ + if (avctx->priv_data) { + DCAEncContext *c = avctx->priv_data; + subband_bufer_free(c); + ff_dcaadpcm_free(&c->adpcm_ctx); + } + return 0; +} + static inline int32_t cos_t(int x) { return cos_table[x & 2047]; @@ -253,12 +318,6 @@ static inline int32_t half32(int32_t a) return (a + 1) >> 1; } -static inline int32_t mul32(int32_t a, int32_t b) -{ - int64_t r = (int64_t)a * b + 0x80000000ULL; - return r >> 32; -} - static void subband_transform(DCAEncContext *c, const int32_t *input) { int ch, subs, i, k, j; @@ -545,46 +604,70 @@ static void calc_masking(DCAEncContext *c, const int32_t *input) } } +static inline int32_t find_peak(const int32_t *in, int len) { + int sample; + int32_t m = 0; + for (sample = 0; sample < len; sample++) { + int32_t s = abs(in[sample]); + if (m < s) { + m = s; + } + } + return get_cb(m); +} + static void find_peaks(DCAEncContext *c) { int band, ch; - for (ch = 0; ch < c->fullband_channels; ch++) + for (ch = 0; ch < c->fullband_channels; ch++) { for (band = 0; band < 32; band++) { - int sample; - int32_t m = 0; - - for (sample = 0; sample < SUBBAND_SAMPLES; sample++) { - int32_t s = abs(c->subband[ch][band][sample]); - if (m < s) - m = s; - } - c->peak_cb[ch][band] = get_cb(m); + c->peak_cb[ch][band] = find_peak(c->subband[ch][band], SUBBAND_SAMPLES); } + } if (c->lfe_channel) { - int sample; - int32_t m = 0; + c->lfe_peak_cb = find_peak(c->downsampled_lfe, DCA_LFE_SAMPLES); + } +} - for (sample = 0; sample < DCA_LFE_SAMPLES; sample++) - if (m < abs(c->downsampled_lfe[sample])) - m = abs(c->downsampled_lfe[sample]); - c->lfe_peak_cb = get_cb(m); +static void adpcm_analysis(DCAEncContext *c) +{ + int ch, band; + int pred_vq_id; + int32_t *samples; + int32_t estimated_diff[SUBBAND_SAMPLES]; + + c->consumed_adpcm_bits = 0; + for (ch = 0; ch < c->fullband_channels; ch++) { + for (band = 0; band < 32; band++) { + samples = c->subband[ch][band] - DCA_ADPCM_COEFFS; + pred_vq_id = ff_dcaadpcm_subband_analysis(&c->adpcm_ctx, samples, SUBBAND_SAMPLES, estimated_diff); + if (pred_vq_id >= 0) { + c->prediction_mode[ch][band] = pred_vq_id; + c->consumed_adpcm_bits += 12; //12 bits to transmit prediction vq index + c->diff_peak_cb[ch][band] = find_peak(estimated_diff, 16); + } else { + c->prediction_mode[ch][band] = -1; + } + } } } static const int snr_fudge = 128; #define USED_1ABITS 1 -#define USED_NABITS 2 #define USED_26ABITS 4 -static int32_t quantize_value(int32_t value, softfloat quant) +static inline int32_t get_step_size(const DCAEncContext *c, int ch, int band) { - int32_t offset = 1 << (quant.e - 1); + int32_t step_size; - value = mul32(value, quant.m) + offset; - value = value >> quant.e; - return value; + if (c->bitrate_index == 3) + step_size = ff_dca_lossless_quant[c->abits[ch][band]]; + else + step_size = ff_dca_lossy_quant[c->abits[ch][band]]; + + return step_size; } static int calc_one_scale(int32_t peak_cb, int abits, softfloat *quant) @@ -619,14 +702,40 @@ static int calc_one_scale(int32_t peak_cb, int abits, softfloat *quant) return our_nscale; } -static void quantize_all(DCAEncContext *c) +static inline void quantize_adpcm_subband(DCAEncContext *c, int ch, int band) +{ + int32_t step_size; + int32_t diff_peak_cb = c->diff_peak_cb[ch][band]; + c->scale_factor[ch][band] = calc_one_scale(diff_peak_cb, + c->abits[ch][band], + &c->quant[ch][band]); + + step_size = get_step_size(c, ch, band); + ff_dcaadpcm_do_real(c->prediction_mode[ch][band], + c->quant[ch][band], ff_dca_scale_factor_quant7[c->scale_factor[ch][band]], step_size, + c->adpcm_history[ch][band], c->subband[ch][band], c->adpcm_history[ch][band]+4, c->quantized[ch][band], + SUBBAND_SAMPLES, cb_to_level[-diff_peak_cb]); +} + +static void quantize_adpcm(DCAEncContext *c) +{ + int band, ch; + + for (ch = 0; ch < c->fullband_channels; ch++) + for (band = 0; band < 32; band++) + if (c->prediction_mode[ch][band] >= 0) + quantize_adpcm_subband(c, ch, band); +} + +static void quantize_pcm(DCAEncContext *c) { int sample, band, ch; for (ch = 0; ch < c->fullband_channels; ch++) for (band = 0; band < 32; band++) - for (sample = 0; sample < SUBBAND_SAMPLES; sample++) - c->quantized[ch][band][sample] = quantize_value(c->subband[ch][band][sample], c->quant[ch][band]); + if (c->prediction_mode[ch][band] == -1) + for (sample = 0; sample < SUBBAND_SAMPLES; sample++) + c->quantized[ch][band][sample] = quantize_value(c->subband[ch][band][sample], c->quant[ch][band]); } static void accumulate_huff_bit_consumption(int abits, int32_t *quantized, uint32_t *result) @@ -684,7 +793,7 @@ static uint32_t set_best_abits_code(int abits[DCAENC_SUBBANDS], int bands, int32 /* Check do we have subband which cannot be encoded by Huffman tables */ for (i = 0; i < bands; i++) { - if (abits[i] > 12) { + if (abits[i] > 12 || abits[i] == 0) { *res = best_sel; return best_bits; } @@ -702,14 +811,15 @@ static uint32_t set_best_abits_code(int abits[DCAENC_SUBBANDS], int bands, int32 return best_bits; } -static int init_quantization_noise(DCAEncContext *c, int noise) +static int init_quantization_noise(DCAEncContext *c, int noise, int forbid_zero) { - int ch, band, ret = 0; + int ch, band, ret = USED_26ABITS | USED_1ABITS; uint32_t huff_bit_count_accum[MAX_CHANNELS][DCA_CODE_BOOKS][7]; uint32_t clc_bit_count_accum[MAX_CHANNELS][DCA_CODE_BOOKS]; uint32_t bits_counter = 0; c->consumed_bits = 132 + 333 * c->fullband_channels; + c->consumed_bits += c->consumed_adpcm_bits; if (c->lfe_channel) c->consumed_bits += 72; @@ -720,16 +830,19 @@ static int init_quantization_noise(DCAEncContext *c, int noise) if (snr_cb >= 1312) { c->abits[ch][band] = 26; - ret |= USED_26ABITS; + ret &= ~USED_1ABITS; } else if (snr_cb >= 222) { c->abits[ch][band] = 8 + mul32(snr_cb - 222, 69000000); - ret |= USED_NABITS; + ret &= ~(USED_26ABITS | USED_1ABITS); } else if (snr_cb >= 0) { c->abits[ch][band] = 2 + mul32(snr_cb, 106000000); - ret |= USED_NABITS; - } else { + ret &= ~(USED_26ABITS | USED_1ABITS); + } else if (forbid_zero || snr_cb >= -140) { c->abits[ch][band] = 1; - ret |= USED_1ABITS; + ret &= ~USED_26ABITS; + } else { + c->abits[ch][band] = 0; + ret &= ~(USED_26ABITS | USED_1ABITS); } } c->consumed_bits += set_best_abits_code(c->abits[ch], 32, &c->bit_allocation_sel[ch]); @@ -740,12 +853,15 @@ static int init_quantization_noise(DCAEncContext *c, int noise) /* TODO: May be cache scaled values */ for (ch = 0; ch < c->fullband_channels; ch++) { for (band = 0; band < 32; band++) { - c->scale_factor[ch][band] = calc_one_scale(c->peak_cb[ch][band], - c->abits[ch][band], - &c->quant[ch][band]); + if (c->prediction_mode[ch][band] == -1) { + c->scale_factor[ch][band] = calc_one_scale(c->peak_cb[ch][band], + c->abits[ch][band], + &c->quant[ch][band]); + } } } - quantize_all(c); + quantize_adpcm(c); + quantize_pcm(c); memset(huff_bit_count_accum, 0, MAX_CHANNELS * DCA_CODE_BOOKS * 7 * sizeof(uint32_t)); memset(clc_bit_count_accum, 0, MAX_CHANNELS * DCA_CODE_BOOKS * sizeof(uint32_t)); @@ -774,15 +890,19 @@ static void assign_bits(DCAEncContext *c) /* Find the bounds where the binary search should work */ int low, high, down; int used_abits = 0; - - init_quantization_noise(c, c->worst_quantization_noise); + int forbid_zero = 1; +restart: + init_quantization_noise(c, c->worst_quantization_noise, forbid_zero); low = high = c->worst_quantization_noise; if (c->consumed_bits > c->frame_bits) { while (c->consumed_bits > c->frame_bits) { - av_assert0(used_abits != USED_1ABITS); + if (used_abits == USED_1ABITS && forbid_zero) { + forbid_zero = 0; + goto restart; + } low = high; high += snr_fudge; - used_abits = init_quantization_noise(c, high); + used_abits = init_quantization_noise(c, high, forbid_zero); } } else { while (c->consumed_bits <= c->frame_bits) { @@ -790,17 +910,17 @@ static void assign_bits(DCAEncContext *c) if (used_abits == USED_26ABITS) goto out; /* The requested bitrate is too high, pad with zeros */ low -= snr_fudge; - used_abits = init_quantization_noise(c, low); + used_abits = init_quantization_noise(c, low, forbid_zero); } } /* Now do a binary search between low and high to see what fits */ for (down = snr_fudge >> 1; down; down >>= 1) { - init_quantization_noise(c, high - down); + init_quantization_noise(c, high - down, forbid_zero); if (c->consumed_bits <= c->frame_bits) high -= down; } - init_quantization_noise(c, high); + init_quantization_noise(c, high, forbid_zero); out: c->worst_quantization_noise = high; if (high > c->worst_noise_ever) @@ -819,6 +939,41 @@ static void shift_history(DCAEncContext *c, const int32_t *input) } } +static void fill_in_adpcm_bufer(DCAEncContext *c) +{ + int ch, band; + int32_t step_size; + /* We fill in ADPCM work buffer for subbands which hasn't been ADPCM coded + * in current frame - we need this data if subband of next frame is + * ADPCM + */ + for (ch = 0; ch < c->channels; ch++) { + for (band = 0; band < 32; band++) { + int32_t *samples = c->subband[ch][band] - DCA_ADPCM_COEFFS; + if (c->prediction_mode[ch][band] == -1) { + step_size = get_step_size(c, ch, band); + + ff_dca_core_dequantize(c->adpcm_history[ch][band], + c->quantized[ch][band]+12, step_size, ff_dca_scale_factor_quant7[c->scale_factor[ch][band]], 0, 4); + } else { + AV_COPY128U(c->adpcm_history[ch][band], c->adpcm_history[ch][band]+4); + } + /* Copy dequantized values for LPC analysis. + * It reduces artifacts in case of extreme quantization, + * example: in current frame abits is 1 and has no prediction flag, + * but end of this frame is sine like signal. In this case, if LPC analysis uses + * original values, likely LPC analysis returns good prediction gain, and sets prediction flag. + * But there are no proper value in decoder history, so likely result will be no good. + * Bitstream has "Predictor history flag switch", but this flag disables history for all subbands + */ + samples[0] = c->adpcm_history[ch][band][0] << 7; + samples[1] = c->adpcm_history[ch][band][1] << 7; + samples[2] = c->adpcm_history[ch][band][2] << 7; + samples[3] = c->adpcm_history[ch][band][3] << 7; + } + } +} + static void calc_lfe_scales(DCAEncContext *c) { if (c->lfe_channel) @@ -1001,9 +1156,14 @@ static void put_subframe(DCAEncContext *c, int subframe) /* Prediction mode: no ADPCM, in each channel and subband */ for (ch = 0; ch < c->fullband_channels; ch++) for (band = 0; band < DCAENC_SUBBANDS; band++) - put_bits(&c->pb, 1, 0); + put_bits(&c->pb, 1, !(c->prediction_mode[ch][band] == -1)); + + /* Prediction VQ address */ + for (ch = 0; ch < c->fullband_channels; ch++) + for (band = 0; band < DCAENC_SUBBANDS; band++) + if (c->prediction_mode[ch][band] >= 0) + put_bits(&c->pb, 12, c->prediction_mode[ch][band]); - /* Prediction VQ address: not transmitted */ /* Bit allocation index */ for (ch = 0; ch < c->fullband_channels; ch++) { if (c->bit_allocation_sel[ch] == 6) { @@ -1019,13 +1179,15 @@ static void put_subframe(DCAEncContext *c, int subframe) /* Transition mode: none for each channel and subband */ for (ch = 0; ch < c->fullband_channels; ch++) for (band = 0; band < DCAENC_SUBBANDS; band++) - put_bits(&c->pb, 1, 0); /* codebook A4 */ + if (c->abits[ch][band]) + put_bits(&c->pb, 1, 0); /* codebook A4 */ } /* Scale factors */ for (ch = 0; ch < c->fullband_channels; ch++) for (band = 0; band < DCAENC_SUBBANDS; band++) - put_bits(&c->pb, 7, c->scale_factor[ch][band]); + if (c->abits[ch][band]) + put_bits(&c->pb, 7, c->scale_factor[ch][band]); /* Joint subband scale factor codebook select: not transmitted */ /* Scale factors for joint subband coding: not transmitted */ @@ -1045,6 +1207,7 @@ static void put_subframe(DCAEncContext *c, int subframe) for (ss = 0; ss < SUBSUBFRAMES ; ss++) for (ch = 0; ch < c->fullband_channels; ch++) for (band = 0; band < DCAENC_SUBBANDS; band++) + if (c->abits[ch][band]) put_subframe_samples(c, ss, band, ch); /* DSYNC */ @@ -1068,12 +1231,15 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *avpkt, lfe_downsample(c, samples); calc_masking(c, samples); + if (c->options.adpcm_mode) + adpcm_analysis(c); find_peaks(c); assign_bits(c); calc_lfe_scales(c); shift_history(c, samples); init_put_bits(&c->pb, avpkt->data, avpkt->size); + fill_in_adpcm_bufer(c); put_frame_header(c); put_primary_audio_header(c); for (i = 0; i < SUBFRAMES; i++) @@ -1092,6 +1258,20 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *avpkt, return 0; } +#define DCAENC_FLAGS AV_OPT_FLAG_ENCODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM + +static const AVOption options[] = { + { "dca_adpcm", "Use ADPCM encoding", offsetof(DCAEncContext, options.adpcm_mode), AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, DCAENC_FLAGS }, + { NULL }, +}; + +static const AVClass dcaenc_class = { + .class_name = "DCA (DTS Coherent Acoustics)", + .item_name = av_default_item_name, + .option = options, + .version = LIBAVUTIL_VERSION_INT, +}; + static const AVCodecDefault defaults[] = { { "b", "1411200" }, { NULL }, @@ -1104,6 +1284,7 @@ AVCodec ff_dca_encoder = { .id = AV_CODEC_ID_DTS, .priv_data_size = sizeof(DCAEncContext), .init = encode_init, + .close = encode_close, .encode2 = encode_frame, .capabilities = AV_CODEC_CAP_EXPERIMENTAL, .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_S32, @@ -1116,4 +1297,5 @@ AVCodec ff_dca_encoder = { AV_CH_LAYOUT_5POINT1, 0 }, .defaults = defaults, + .priv_class = &dcaenc_class, };