X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=libavcodec%2Fwmalosslessdec.c;h=ca62dc1cf98b8572e42936755ed074e39ae4c52e;hb=40c9d43086890df11389ceefd88f517471e41202;hp=99139eba78edab17e5e362f6be0db6b4f28fd4d1;hpb=d09793576760c3f6894007184875a158b7be2347;p=ffmpeg diff --git a/libavcodec/wmalosslessdec.c b/libavcodec/wmalosslessdec.c index 99139eba78e..ca62dc1cf98 100644 --- a/libavcodec/wmalosslessdec.c +++ b/libavcodec/wmalosslessdec.c @@ -31,6 +31,7 @@ #include "internal.h" #include "get_bits.h" #include "put_bits.h" +#include "lossless_audiodsp.h" #include "wma.h" #include "wma_common.h" @@ -46,6 +47,7 @@ #define WMALL_BLOCK_MAX_SIZE (1 << WMALL_BLOCK_MAX_BITS) ///< maximum block size #define WMALL_BLOCK_SIZES (WMALL_BLOCK_MAX_BITS - WMALL_BLOCK_MIN_BITS + 1) ///< possible block sizes +#define WMALL_COEFF_PAD_SIZE 16 ///< pad coef buffers with 0 for use with SIMD /** * @brief frame-specific decoder context for a single channel @@ -69,6 +71,7 @@ typedef struct WmallDecodeCtx { /* generic decoder variables */ AVCodecContext *avctx; AVFrame *frame; + LLAudDSPContext dsp; ///< accelerated DSP functions uint8_t frame_data[MAX_FRAMESIZE + FF_INPUT_BUFFER_PADDING_SIZE]; ///< compressed frame data PutBitContext pb; ///< context for filling the frame_data buffer @@ -143,9 +146,9 @@ typedef struct WmallDecodeCtx { int scaling; int coefsend; int bitsend; - int16_t coefs[MAX_ORDER]; - int16_t lms_prevvalues[MAX_ORDER * 2]; - int16_t lms_updates[MAX_ORDER * 2]; + DECLARE_ALIGNED(16, int16_t, coefs)[MAX_ORDER + WMALL_COEFF_PAD_SIZE/sizeof(int16_t)]; + DECLARE_ALIGNED(16, int16_t, lms_prevvalues)[MAX_ORDER * 2]; + DECLARE_ALIGNED(16, int16_t, lms_updates)[MAX_ORDER * 2]; int recent; } cdlms[WMALL_MAX_CHANNELS][9]; @@ -186,6 +189,7 @@ static av_cold int decode_init(AVCodecContext *avctx) } s->avctx = avctx; + ff_llauddsp_init(&s->dsp); init_put_bits(&s->pb, s->frame_data, MAX_FRAMESIZE); if (avctx->extradata_size >= 18) { @@ -459,6 +463,13 @@ static int decode_cdlms(WmallDecodeCtx *s) s->cdlms[0][0].order = 0; return AVERROR_INVALIDDATA; } + if(s->cdlms[c][i].order & 8) { + static int warned; + if(!warned) + avpriv_request_sample(s->avctx, "CDLMS of order %d", + s->cdlms[c][i].order); + warned = 1; + } } for (i = 0; i < s->cdlms_ttl[c]; i++) @@ -484,6 +495,10 @@ static int decode_cdlms(WmallDecodeCtx *s) (get_bits(&s->gb, s->cdlms[c][i].bitsend) << shift_l) >> shift_r; } } + + for (i = 0; i < s->cdlms_ttl[c]; i++) + memset(s->cdlms[c][i].coefs + s->cdlms[c][i].order, + 0, WMALL_COEFF_PAD_SIZE); } return 0; @@ -693,35 +708,11 @@ static void revert_mclms(WmallDecodeCtx *s, int tile_size) } } -static int lms_predict(WmallDecodeCtx *s, int ich, int ilms) -{ - int pred = 0, icoef; - int recent = s->cdlms[ich][ilms].recent; - - for (icoef = 0; icoef < s->cdlms[ich][ilms].order; icoef++) - pred += s->cdlms[ich][ilms].coefs[icoef] * - s->cdlms[ich][ilms].lms_prevvalues[icoef + recent]; - - return pred; -} - -static void lms_update(WmallDecodeCtx *s, int ich, int ilms, - int input, int residue) +static void lms_update(WmallDecodeCtx *s, int ich, int ilms, int input) { - int icoef; int recent = s->cdlms[ich][ilms].recent; int range = 1 << s->bits_per_sample - 1; - if (residue < 0) { - for (icoef = 0; icoef < s->cdlms[ich][ilms].order; icoef++) - s->cdlms[ich][ilms].coefs[icoef] -= - s->cdlms[ich][ilms].lms_updates[icoef + recent]; - } else if (residue > 0) { - for (icoef = 0; icoef < s->cdlms[ich][ilms].order; icoef++) - s->cdlms[ich][ilms].coefs[icoef] += - s->cdlms[ich][ilms].lms_updates[icoef + recent]; - } - if (recent) recent--; else { @@ -782,6 +773,9 @@ static void use_normal_update_speed(WmallDecodeCtx *s, int ich) s->update_speed[ich] = 8; } +/** Get sign of integer (1 for positive, -1 for negative and 0 for zero) */ +#define WMASIGN(x) ((x > 0) - (x < 0)) + static void revert_cdlms(WmallDecodeCtx *s, int ch, int coef_begin, int coef_end) { @@ -792,9 +786,15 @@ static void revert_cdlms(WmallDecodeCtx *s, int ch, for (icoef = coef_begin; icoef < coef_end; icoef++) { pred = 1 << (s->cdlms[ch][ilms].scaling - 1); residue = s->channel_residues[ch][icoef]; - pred += lms_predict(s, ch, ilms); + pred += s->dsp.scalarproduct_and_madd_int16(s->cdlms[ch][ilms].coefs, + s->cdlms[ch][ilms].lms_prevvalues + + s->cdlms[ch][ilms].recent, + s->cdlms[ch][ilms].lms_updates + + s->cdlms[ch][ilms].recent, + s->cdlms[ch][ilms].order, + WMASIGN(residue)); input = residue + (pred >> s->cdlms[ch][ilms].scaling); - lms_update(s, ch, ilms, input, residue); + lms_update(s, ch, ilms, input); s->channel_residues[ch][icoef] = input; } }