#include "internal.h"
#include "get_bits.h"
#include "put_bits.h"
+#include "lossless_audiodsp.h"
#include "wma.h"
#include "wma_common.h"
#define WMALL_BLOCK_MAX_SIZE (1 << WMALL_BLOCK_MAX_BITS) ///< maximum block size
#define WMALL_BLOCK_SIZES (WMALL_BLOCK_MAX_BITS - WMALL_BLOCK_MIN_BITS + 1) ///< possible block sizes
+#define WMALL_COEFF_PAD_SIZE 16 ///< pad coef buffers with 0 for use with SIMD
/**
* @brief frame-specific decoder context for a single channel
/* generic decoder variables */
AVCodecContext *avctx;
AVFrame *frame;
+ LLAudDSPContext dsp; ///< accelerated DSP functions
uint8_t frame_data[MAX_FRAMESIZE + FF_INPUT_BUFFER_PADDING_SIZE]; ///< compressed frame data
PutBitContext pb; ///< context for filling the frame_data buffer
int scaling;
int coefsend;
int bitsend;
- int16_t coefs[MAX_ORDER];
- int16_t lms_prevvalues[MAX_ORDER * 2];
- int16_t lms_updates[MAX_ORDER * 2];
+ DECLARE_ALIGNED(16, int16_t, coefs)[MAX_ORDER + WMALL_COEFF_PAD_SIZE/sizeof(int16_t)];
+ DECLARE_ALIGNED(16, int16_t, lms_prevvalues)[MAX_ORDER * 2];
+ DECLARE_ALIGNED(16, int16_t, lms_updates)[MAX_ORDER * 2];
int recent;
} cdlms[WMALL_MAX_CHANNELS][9];
}
s->avctx = avctx;
+ ff_llauddsp_init(&s->dsp);
init_put_bits(&s->pb, s->frame_data, MAX_FRAMESIZE);
if (avctx->extradata_size >= 18) {
s->cdlms[0][0].order = 0;
return AVERROR_INVALIDDATA;
}
+ if(s->cdlms[c][i].order & 8) {
+ static int warned;
+ if(!warned)
+ avpriv_request_sample(s->avctx, "CDLMS of order %d",
+ s->cdlms[c][i].order);
+ warned = 1;
+ }
}
for (i = 0; i < s->cdlms_ttl[c]; i++)
(get_bits(&s->gb, s->cdlms[c][i].bitsend) << shift_l) >> shift_r;
}
}
+
+ for (i = 0; i < s->cdlms_ttl[c]; i++)
+ memset(s->cdlms[c][i].coefs + s->cdlms[c][i].order,
+ 0, WMALL_COEFF_PAD_SIZE);
}
return 0;
}
}
-static int lms_predict(WmallDecodeCtx *s, int ich, int ilms)
-{
- int pred = 0, icoef;
- int recent = s->cdlms[ich][ilms].recent;
-
- for (icoef = 0; icoef < s->cdlms[ich][ilms].order; icoef++)
- pred += s->cdlms[ich][ilms].coefs[icoef] *
- s->cdlms[ich][ilms].lms_prevvalues[icoef + recent];
-
- return pred;
-}
-
-static void lms_update(WmallDecodeCtx *s, int ich, int ilms,
- int input, int residue)
+static void lms_update(WmallDecodeCtx *s, int ich, int ilms, int input)
{
- int icoef;
int recent = s->cdlms[ich][ilms].recent;
int range = 1 << s->bits_per_sample - 1;
- if (residue < 0) {
- for (icoef = 0; icoef < s->cdlms[ich][ilms].order; icoef++)
- s->cdlms[ich][ilms].coefs[icoef] -=
- s->cdlms[ich][ilms].lms_updates[icoef + recent];
- } else if (residue > 0) {
- for (icoef = 0; icoef < s->cdlms[ich][ilms].order; icoef++)
- s->cdlms[ich][ilms].coefs[icoef] +=
- s->cdlms[ich][ilms].lms_updates[icoef + recent];
- }
-
if (recent)
recent--;
else {
s->update_speed[ich] = 8;
}
+/** Get sign of integer (1 for positive, -1 for negative and 0 for zero) */
+#define WMASIGN(x) ((x > 0) - (x < 0))
+
static void revert_cdlms(WmallDecodeCtx *s, int ch,
int coef_begin, int coef_end)
{
for (icoef = coef_begin; icoef < coef_end; icoef++) {
pred = 1 << (s->cdlms[ch][ilms].scaling - 1);
residue = s->channel_residues[ch][icoef];
- pred += lms_predict(s, ch, ilms);
+ pred += s->dsp.scalarproduct_and_madd_int16(s->cdlms[ch][ilms].coefs,
+ s->cdlms[ch][ilms].lms_prevvalues
+ + s->cdlms[ch][ilms].recent,
+ s->cdlms[ch][ilms].lms_updates
+ + s->cdlms[ch][ilms].recent,
+ s->cdlms[ch][ilms].order,
+ WMASIGN(residue));
input = residue + (pred >> s->cdlms[ch][ilms].scaling);
- lms_update(s, ch, ilms, input, residue);
+ lms_update(s, ch, ilms, input);
s->channel_residues[ch][icoef] = input;
}
}