* @author Ronald S. Bultje <rsbultje@gmail.com>
*/
-#define UNCHECKED_BITSTREAM_READER 1
-
#include <math.h>
+
+#include "libavutil/channel_layout.h"
+#include "libavutil/float_dsp.h"
+#include "libavutil/mem.h"
+
#include "avcodec.h"
-#include "get_bits.h"
+#include "bitstream.h"
+#include "internal.h"
#include "put_bits.h"
#include "wmavoice_data.h"
-#include "celp_math.h"
#include "celp_filters.h"
#include "acelp_vectors.h"
#include "acelp_filters.h"
#include "lsp.h"
-#include "libavutil/lzo.h"
#include "dct.h"
#include "rdft.h"
#include "sinewin.h"
/**
* WMA Voice decoding context.
*/
-typedef struct {
+typedef struct WMAVoiceContext {
/**
* @name Global values specified in the stream header / extradata or used all over.
* @{
*/
- AVFrame frame;
- GetBitContext gb; ///< packet bitreader. During decoder init,
+ BitstreamContext bc; ///< packet bitreader. During decoder init,
///< it contains the extradata from the
///< demuxer. During decoding, it contains
///< packet data.
///< to #wmavoice_decode_packet() (since
///< they're part of the previous superframe)
- uint8_t sframe_cache[SFRAME_CACHE_MAXSIZE + FF_INPUT_BUFFER_PADDING_SIZE];
+ uint8_t sframe_cache[SFRAME_CACHE_MAXSIZE + AV_INPUT_BUFFER_PADDING_SIZE];
///< cache for superframe data split over
///< multiple packets
int sframe_cache_size; ///< set to >0 if we have data from an
/**
* Set up the variable bit mode (VBM) tree from container extradata.
- * @param gb bit I/O context.
- * The bit context (s->gb) should be loaded with byte 23-46 of the
+ * @param bc bit I/O context.
+ * The bit context (s->bc) should be loaded with byte 23-46 of the
* container extradata (i.e. the ones containing the VBM tree).
* @param vbm_tree pointer to array to which the decoded VBM tree will be
* written.
* @return 0 on success, <0 on error.
*/
-static av_cold int decode_vbmtree(GetBitContext *gb, int8_t vbm_tree[25])
+static av_cold int decode_vbmtree(BitstreamContext *bc, int8_t vbm_tree[25])
+{
+ int cntr[8] = { 0 }, n, res;
+
+ memset(vbm_tree, 0xff, sizeof(vbm_tree[0]) * 25);
+ for (n = 0; n < 17; n++) {
+ res = bitstream_read(bc, 3);
+ if (cntr[res] > 3) // should be >= 3 + (res == 7))
+ return -1;
+ vbm_tree[res * 3 + cntr[res]++] = n;
+ }
+ return 0;
+}
+
+static av_cold void wmavoice_init_static_data(AVCodec *codec)
{
static const uint8_t bits[] = {
2, 2, 2, 4, 4, 4,
0x0ffc, 0x0ffd, 0x0ffe, // 1111111111+00/01/10
0x3ffc, 0x3ffd, 0x3ffe, 0x3fff // 111111111111+xx
};
- int cntr[8] = { 0 }, n, res;
- memset(vbm_tree, 0xff, sizeof(vbm_tree[0]) * 25);
- for (n = 0; n < 17; n++) {
- res = get_bits(gb, 3);
- if (cntr[res] > 3) // should be >= 3 + (res == 7))
- return -1;
- vbm_tree[res * 3 + cntr[res]++] = n;
- }
INIT_VLC_STATIC(&frame_type_vlc, VLC_NBITS, sizeof(bits),
bits, 1, 1, codes, 2, 2, 132);
- return 0;
}
/**
av_log(ctx, AV_LOG_ERROR,
"Invalid extradata size %d (should be 46)\n",
ctx->extradata_size);
- return -1;
+ return AVERROR_INVALIDDATA;
}
flags = AV_RL32(ctx->extradata + 18);
s->spillover_bitsize = 3 + av_ceil_log2(ctx->block_align);
av_log(ctx, AV_LOG_ERROR,
"Invalid denoise filter strength %d (max=11)\n",
s->denoise_strength);
- return -1;
+ return AVERROR_INVALIDDATA;
}
s->denoise_tilt_corr = !!(flags & 0x40);
s->dc_level = (flags >> 7) & 0xF;
for (n = 0; n < s->lsps; n++)
s->prev_lsps[n] = M_PI * (n + 1.0) / (s->lsps + 1.0);
- init_get_bits(&s->gb, ctx->extradata + 22, (ctx->extradata_size - 22) << 3);
- if (decode_vbmtree(&s->gb, s->vbm_tree) < 0) {
+ bitstream_init8(&s->bc, ctx->extradata + 22, ctx->extradata_size - 22);
+ if (decode_vbmtree(&s->bc, s->vbm_tree) < 0) {
av_log(ctx, AV_LOG_ERROR, "Invalid VBM tree; broken extradata?\n");
- return -1;
+ return AVERROR_INVALIDDATA;
}
s->min_pitch_val = ((ctx->sample_rate << 8) / 400 + 50) >> 8;
pitch_range = s->max_pitch_val - s->min_pitch_val;
if (pitch_range <= 0) {
av_log(ctx, AV_LOG_ERROR, "Invalid pitch range; broken extradata?\n");
- return -1;
+ return AVERROR_INVALIDDATA;
}
s->pitch_nbits = av_ceil_log2(pitch_range);
s->last_pitch_val = 40;
"Unsupported samplerate %d (min=%d, max=%d)\n",
ctx->sample_rate, min_sr, max_sr); // 322-22097 Hz
- return -1;
+ return AVERROR(ENOSYS);
}
s->block_conv_table[0] = s->min_pitch_val;
s->block_delta_pitch_hrange = (pitch_range >> 3) & ~0xF;
if (s->block_delta_pitch_hrange <= 0) {
av_log(ctx, AV_LOG_ERROR, "Invalid delta pitch hrange; broken extradata?\n");
- return -1;
+ return AVERROR_INVALIDDATA;
}
s->block_delta_pitch_nbits = 1 + av_ceil_log2(s->block_delta_pitch_hrange);
s->block_pitch_range = s->block_conv_table[2] +
2 * (s->block_conv_table[1] - 2 * s->min_pitch_val);
s->block_pitch_nbits = av_ceil_log2(s->block_pitch_range);
+ ctx->channels = 1;
+ ctx->channel_layout = AV_CH_LAYOUT_MONO;
ctx->sample_fmt = AV_SAMPLE_FMT_FLT;
- avcodec_get_frame_defaults(&s->frame);
- ctx->coded_frame = &s->frame;
-
return 0;
}
/* find best fitting point in history */
do {
- dot = ff_dot_productf(in, ptr, size);
+ dot = avpriv_scalarproduct_float_c(in, ptr, size);
if (dot > optimal_gain) {
optimal_gain = dot;
best_hist_ptr = ptr;
if (optimal_gain <= 0)
return -1;
- dot = ff_dot_productf(best_hist_ptr, best_hist_ptr, size);
+ dot = avpriv_scalarproduct_float_c(best_hist_ptr, best_hist_ptr, size);
if (dot <= 0) // would be 1.0
return -1;
{
float rh0, rh1;
- rh0 = 1.0 + ff_dot_productf(lpcs, lpcs, n_lpcs);
- rh1 = lpcs[0] + ff_dot_productf(lpcs, &lpcs[1], n_lpcs - 1);
+ rh0 = 1.0 + avpriv_scalarproduct_float_c(lpcs, lpcs, n_lpcs);
+ rh1 = lpcs[0] + avpriv_scalarproduct_float_c(lpcs, &lpcs[1], n_lpcs - 1);
return rh1 / rh0;
}
/* 70.57 =~ 1/log10(1.0331663) */
idx = (pwr * gain_mul - 0.0295) * 70.570526123;
- if (idx > 127) { // fallback if index falls outside table range
+ if (idx > 127) { // fall back if index falls outside table range
coeffs[n] = wmavoice_energy_table[127] *
powf(1.0331663, idx - 127);
} else
}
/* calculate the Hilbert transform of the gains, which we do (since this
- * is a sinus input) by doing a phase shift (in theory, H(sin())=cos()).
+ * is a sine input) by doing a phase shift (in theory, H(sin())=cos()).
* Hilbert_Transform(RDFT(x)) = Laplace_Transform(x), which calculates the
* "moment" of the LPCs in this filter. */
s->dct.dct_calc(&s->dct, lpcs);
-1.8 * tilt_factor(coeffs, remainder - 1),
coeffs, remainder);
}
- sq = (1.0 / 64.0) * sqrtf(1 / ff_dot_productf(coeffs, coeffs, remainder));
+ sq = (1.0 / 64.0) * sqrtf(1 / avpriv_scalarproduct_float_c(coeffs, coeffs,
+ remainder));
for (n = 0; n < remainder; n++)
coeffs[n] *= sq;
}
/**
* Parse 10 independently-coded LSPs.
*/
-static void dequant_lsp10i(GetBitContext *gb, double *lsps)
+static void dequant_lsp10i(BitstreamContext *bc, double *lsps)
{
static const uint16_t vec_sizes[4] = { 256, 64, 32, 32 };
static const double mul_lsf[4] = {
};
uint16_t v[4];
- v[0] = get_bits(gb, 8);
- v[1] = get_bits(gb, 6);
- v[2] = get_bits(gb, 5);
- v[3] = get_bits(gb, 5);
+ v[0] = bitstream_read(bc, 8);
+ v[1] = bitstream_read(bc, 6);
+ v[2] = bitstream_read(bc, 5);
+ v[3] = bitstream_read(bc, 5);
dequant_lsps(lsps, 10, v, vec_sizes, 4, wmavoice_dq_lsp10i,
mul_lsf, base_lsf);
* Parse 10 independently-coded LSPs, and then derive the tables to
* generate LSPs for the other frames from them (residual coding).
*/
-static void dequant_lsp10r(GetBitContext *gb,
+static void dequant_lsp10r(BitstreamContext *bc,
double *i_lsps, const double *old,
double *a1, double *a2, int q_mode)
{
uint16_t interpol, v[3];
int n;
- dequant_lsp10i(gb, i_lsps);
+ dequant_lsp10i(bc, i_lsps);
- interpol = get_bits(gb, 5);
- v[0] = get_bits(gb, 7);
- v[1] = get_bits(gb, 6);
- v[2] = get_bits(gb, 6);
+ interpol = bitstream_read(bc, 5);
+ v[0] = bitstream_read(bc, 7);
+ v[1] = bitstream_read(bc, 6);
+ v[2] = bitstream_read(bc, 6);
for (n = 0; n < 10; n++) {
double delta = old[n] - i_lsps[n];
/**
* Parse 16 independently-coded LSPs.
*/
-static void dequant_lsp16i(GetBitContext *gb, double *lsps)
+static void dequant_lsp16i(BitstreamContext *bc, double *lsps)
{
static const uint16_t vec_sizes[5] = { 256, 64, 128, 64, 128 };
static const double mul_lsf[5] = {
};
uint16_t v[5];
- v[0] = get_bits(gb, 8);
- v[1] = get_bits(gb, 6);
- v[2] = get_bits(gb, 7);
- v[3] = get_bits(gb, 6);
- v[4] = get_bits(gb, 7);
+ v[0] = bitstream_read(bc, 8);
+ v[1] = bitstream_read(bc, 6);
+ v[2] = bitstream_read(bc, 7);
+ v[3] = bitstream_read(bc, 6);
+ v[4] = bitstream_read(bc, 7);
dequant_lsps( lsps, 5, v, vec_sizes, 2,
wmavoice_dq_lsp16i1, mul_lsf, base_lsf);
* Parse 16 independently-coded LSPs, and then derive the tables to
* generate LSPs for the other frames from them (residual coding).
*/
-static void dequant_lsp16r(GetBitContext *gb,
+static void dequant_lsp16r(BitstreamContext *bc,
double *i_lsps, const double *old,
double *a1, double *a2, int q_mode)
{
uint16_t interpol, v[3];
int n;
- dequant_lsp16i(gb, i_lsps);
+ dequant_lsp16i(bc, i_lsps);
- interpol = get_bits(gb, 5);
- v[0] = get_bits(gb, 7);
- v[1] = get_bits(gb, 7);
- v[2] = get_bits(gb, 7);
+ interpol = bitstream_read(bc, 5);
+ v[0] = bitstream_read(bc, 7);
+ v[1] = bitstream_read(bc, 7);
+ v[2] = bitstream_read(bc, 7);
for (n = 0; n < 16; n++) {
double delta = old[n] - i_lsps[n];
* Parse the offset of the first pitch-adaptive window pulses, and
* the distribution of pulses between the two blocks in this frame.
* @param s WMA Voice decoding context private data
- * @param gb bit I/O context
+ * @param bc bit I/O context
* @param pitch pitch for each block in this frame
*/
-static void aw_parse_coords(WMAVoiceContext *s, GetBitContext *gb,
+static void aw_parse_coords(WMAVoiceContext *s, BitstreamContext *bc,
const int *pitch)
{
static const int16_t start_offset[94] = {
/* position of pulse */
s->aw_idx_is_ext = 0;
- if ((bits = get_bits(gb, 6)) >= 54) {
+ if ((bits = bitstream_read(bc, 6)) >= 54) {
s->aw_idx_is_ext = 1;
- bits += (bits - 54) * 3 + get_bits(gb, 2);
+ bits += (bits - 54) * 3 + bitstream_read(bc, 2);
}
/* for a repeated pulse at pulse_off with a pitch_lag of pitch[], count
/**
* Apply second set of pitch-adaptive window pulses.
* @param s WMA Voice decoding context private data
- * @param gb bit I/O context
+ * @param bc bit I/O context
* @param block_idx block index in frame [0, 1]
* @param fcb structure containing fixed codebook vector info
+ * @return -1 on error, 0 otherwise
*/
-static void aw_pulse_set2(WMAVoiceContext *s, GetBitContext *gb,
- int block_idx, AMRFixed *fcb)
+static int aw_pulse_set2(WMAVoiceContext *s, BitstreamContext *bc,
+ int block_idx, AMRFixed *fcb)
{
uint16_t use_mask_mem[9]; // only 5 are used, rest is padding
uint16_t *use_mask = use_mask_mem + 2;
}
/* find the 'aidx'th offset that is not excluded */
- aidx = get_bits(gb, s->aw_n_pulses[0] > 0 ? 5 - 2 * block_idx : 4);
+ aidx = bitstream_read(bc, s->aw_n_pulses[0] > 0 ? 5 - 2 * block_idx : 4);
for (n = 0; n <= aidx; pulse_start++) {
for (idx = pulse_start; idx < 0; idx += fcb->pitch_lag) ;
if (idx >= MAX_FRAMESIZE / 2) { // find from zero
else if (use_mask[2]) idx = 0x2F;
else if (use_mask[3]) idx = 0x3F;
else if (use_mask[4]) idx = 0x4F;
- else return;
+ else return -1;
idx -= av_log2_16bit(use_mask[idx >> 4]);
}
if (use_mask[idx >> 4] & (0x8000 >> (idx & 15))) {
}
fcb->x[fcb->n] = start_off;
- fcb->y[fcb->n] = get_bits1(gb) ? -1.0 : 1.0;
+ fcb->y[fcb->n] = bitstream_read_bit(bc) ? -1.0 : 1.0;
fcb->n++;
/* set offset for next block, relative to start of that block */
n = (MAX_FRAMESIZE / 2 - start_off) % fcb->pitch_lag;
s->aw_next_pulse_off_cache = n ? fcb->pitch_lag - n : 0;
+ return 0;
}
/**
* Apply first set of pitch-adaptive window pulses.
* @param s WMA Voice decoding context private data
- * @param gb bit I/O context
+ * @param bc bit I/O context
* @param block_idx block index in frame [0, 1]
* @param fcb storage location for fixed codebook pulse info
*/
-static void aw_pulse_set1(WMAVoiceContext *s, GetBitContext *gb,
+static void aw_pulse_set1(WMAVoiceContext *s, BitstreamContext *bc,
int block_idx, AMRFixed *fcb)
{
- int val = get_bits(gb, 12 - 2 * (s->aw_idx_is_ext && !block_idx));
+ int val = bitstream_read(bc, 12 - 2 * (s->aw_idx_is_ext && !block_idx));
float v;
if (s->aw_n_pulses[block_idx] > 0) {
/**
* @}
*
- * Generate a random number from frame_cntr and block_idx, which will lief
+ * Generate a random number from frame_cntr and block_idx, which will live
* in the range [0, 1000 - block_size] (so it can be used as an index in a
* table of size 1000 of which you want to read block_size entries).
*
* Parse hardcoded signal for a single block.
* @note see #synth_block().
*/
-static void synth_block_hardcoded(WMAVoiceContext *s, GetBitContext *gb,
+static void synth_block_hardcoded(WMAVoiceContext *s, BitstreamContext *bc,
int block_idx, int size,
const struct frame_type_desc *frame_desc,
float *excitation)
r_idx = pRNG(s->frame_cntr, block_idx, size);
gain = s->silence_gain;
} else /* FCB_TYPE_HARDCODED */ {
- r_idx = get_bits(gb, 8);
- gain = wmavoice_gain_universal[get_bits(gb, 6)];
+ r_idx = bitstream_read(bc, 8);
+ gain = wmavoice_gain_universal[bitstream_read(bc, 6)];
}
/* Clear gain prediction parameters */
* Parse FCB/ACB signal for a single block.
* @note see #synth_block().
*/
-static void synth_block_fcb_acb(WMAVoiceContext *s, GetBitContext *gb,
+static void synth_block_fcb_acb(WMAVoiceContext *s, BitstreamContext *bc,
int block_idx, int size,
int block_pitch_sh2,
const struct frame_type_desc *frame_desc,
/* For the other frame types, this is where we apply the innovation
* (fixed) codebook pulses of the speech signal. */
if (frame_desc->fcb_type == FCB_TYPE_AW_PULSES) {
- aw_pulse_set1(s, gb, block_idx, &fcb);
- aw_pulse_set2(s, gb, block_idx, &fcb);
+ aw_pulse_set1(s, bc, block_idx, &fcb);
+ if (aw_pulse_set2(s, bc, block_idx, &fcb)) {
+ /* Conceal the block with silence and return.
+ * Skip the correct amount of bits to read the next
+ * block from the correct offset. */
+ int r_idx = pRNG(s->frame_cntr, block_idx, size);
+
+ for (n = 0; n < size; n++)
+ excitation[n] =
+ wmavoice_std_codebook[r_idx + n] * s->silence_gain;
+ bitstream_skip(bc, 7 + 1);
+ return;
+ }
} else /* FCB_TYPE_EXC_PULSES */ {
int offset_nbits = 5 - frame_desc->log_n_blocks;
float sign;
int pos1, pos2;
- sign = get_bits1(gb) ? 1.0 : -1.0;
- pos1 = get_bits(gb, offset_nbits);
+ sign = bitstream_read_bit(bc) ? 1.0 : -1.0;
+ pos1 = bitstream_read(bc, offset_nbits);
fcb.x[fcb.n] = n + 5 * pos1;
fcb.y[fcb.n++] = sign;
if (n < frame_desc->dbl_pulses) {
- pos2 = get_bits(gb, offset_nbits);
+ pos2 = bitstream_read(bc, offset_nbits);
fcb.x[fcb.n] = n + 5 * pos2;
fcb.y[fcb.n++] = (pos1 < pos2) ? -sign : sign;
}
/* Calculate gain for adaptive & fixed codebook signal.
* see ff_amr_set_fixed_gain(). */
- idx = get_bits(gb, 7);
- fcb_gain = expf(ff_dot_productf(s->gain_pred_err, gain_coeff, 6) -
+ idx = bitstream_read(bc, 7);
+ fcb_gain = expf(avpriv_scalarproduct_float_c(s->gain_pred_err,
+ gain_coeff, 6) -
5.2409161640 + wmavoice_gain_codebook_fcb[idx]);
acb_gain = wmavoice_gain_codebook_acb[idx];
pred_err = av_clipf(wmavoice_gain_codebook_fcb[idx],
* @note we assume enough bits are available, caller should check.
*
* @param s WMA Voice decoding context private data
- * @param gb bit I/O context
+ * @param bc bit I/O context
* @param block_idx index of the to-be-read block
* @param size amount of samples to be read in this block
* @param block_pitch_sh2 pitch for this block << 2
* @param synth target memory for the speech synthesis filter output
* @return 0 on success, <0 on error.
*/
-static void synth_block(WMAVoiceContext *s, GetBitContext *gb,
+static void synth_block(WMAVoiceContext *s, BitstreamContext *bc,
int block_idx, int size,
int block_pitch_sh2,
const double *lsps, const double *prev_lsps,
int n;
if (frame_desc->acb_type == ACB_TYPE_NONE)
- synth_block_hardcoded(s, gb, block_idx, size, frame_desc, excitation);
+ synth_block_hardcoded(s, bc, block_idx, size, frame_desc, excitation);
else
- synth_block_fcb_acb(s, gb, block_idx, size, block_pitch_sh2,
+ synth_block_fcb_acb(s, bc, block_idx, size, block_pitch_sh2,
frame_desc, excitation);
/* convert interpolated LSPs to LPCs */
* @note we assume enough bits are available, caller should check.
*
* @param ctx WMA Voice decoder context
- * @param gb bit I/O context (s->gb or one for cross-packet superframes)
+ * @param bc bit I/O context (s->bc or one for cross-packet superframes)
* @param frame_idx Frame number within superframe [0-2]
* @param samples pointer to output sample buffer, has space for at least 160
* samples
* @param synth target buffer for synthesized speech data
* @return 0 on success, <0 on error.
*/
-static int synth_frame(AVCodecContext *ctx, GetBitContext *gb, int frame_idx,
- float *samples,
+static int synth_frame(AVCodecContext *ctx, BitstreamContext *bc,
+ int frame_idx, float *samples,
const double *lsps, const double *prev_lsps,
float *excitation, float *synth)
{
int pitch[MAX_BLOCKS], last_block_pitch;
/* Parse frame type ("frame header"), see frame_descs */
- int bd_idx = s->vbm_tree[get_vlc2(gb, frame_type_vlc.table, 6, 3)], block_nsamples;
+ int bd_idx = s->vbm_tree[bitstream_read_vlc(bc, frame_type_vlc.table, 6, 3)], block_nsamples;
if (bd_idx < 0) {
av_log(ctx, AV_LOG_ERROR,
"Invalid frame type VLC code, skipping\n");
- return -1;
+ return AVERROR_INVALIDDATA;
}
block_nsamples = MAX_FRAMESIZE / frame_descs[bd_idx].n_blocks;
* incrementing/decrementing prev_frame_pitch to cur_pitch_val. */
n_blocks_x2 = frame_descs[bd_idx].n_blocks << 1;
log_n_blocks_x2 = frame_descs[bd_idx].log_n_blocks + 1;
- cur_pitch_val = s->min_pitch_val + get_bits(gb, s->pitch_nbits);
+ cur_pitch_val = s->min_pitch_val + bitstream_read(bc, s->pitch_nbits);
cur_pitch_val = FFMIN(cur_pitch_val, s->max_pitch_val - 1);
if (s->last_acb_type == ACB_TYPE_NONE ||
20 * abs(cur_pitch_val - s->last_pitch_val) >
/* Global gain (if silence) and pitch-adaptive window coordinates */
switch (frame_descs[bd_idx].fcb_type) {
case FCB_TYPE_SILENCE:
- s->silence_gain = wmavoice_gain_silence[get_bits(gb, 8)];
+ s->silence_gain = wmavoice_gain_silence[bitstream_read(bc, 8)];
break;
case FCB_TYPE_AW_PULSES:
- aw_parse_coords(s, gb, pitch);
+ aw_parse_coords(s, bc, pitch);
break;
}
/* Pitch is given per block. Per-block pitches are encoded as an
* absolute value for the first block, and then delta values
* relative to this value) for all subsequent blocks. The scale of
- * this pitch value is semi-logaritmic compared to its use in the
+ * this pitch value is semi-logarithmic compared to its use in the
* decoder, so we convert it to normal scale also. */
int block_pitch,
t1 = (s->block_conv_table[1] - s->block_conv_table[0]) << 2,
t3 = s->block_conv_table[3] - s->block_conv_table[2] + 1;
if (n == 0) {
- block_pitch = get_bits(gb, s->block_pitch_nbits);
+ block_pitch = bitstream_read(bc, s->block_pitch_nbits);
} else
block_pitch = last_block_pitch - s->block_delta_pitch_hrange +
- get_bits(gb, s->block_delta_pitch_nbits);
+ bitstream_read(bc, s->block_delta_pitch_nbits);
/* Convert last_ so that any next delta is within _range */
last_block_pitch = av_clip(block_pitch,
s->block_delta_pitch_hrange,
break;
}
- synth_block(s, gb, n, block_nsamples, bl_pitch_sh2,
+ synth_block(s, bc, n, block_nsamples, bl_pitch_sh2,
lsps, prev_lsps, &frame_descs[bd_idx],
&excitation[n * block_nsamples],
&synth[n * block_nsamples]);
/**
* Test if there's enough bits to read 1 superframe.
*
- * @param orig_gb bit I/O context used for reading. This function
+ * @param orig_bc bit I/O context used for reading. This function
* does not modify the state of the bitreader; it
* only uses it to copy the current stream position
* @param s WMA Voice decoding context private data
- * @return -1 if unsupported, 1 on not enough bits or 0 if OK.
+ * @return < 0 on error, 1 on not enough bits or 0 if OK.
*/
-static int check_bits_for_superframe(GetBitContext *orig_gb,
+static int check_bits_for_superframe(BitstreamContext *orig_bc,
WMAVoiceContext *s)
{
- GetBitContext s_gb, *gb = &s_gb;
+ BitstreamContext s_bc, *bc = &s_bc;
int n, need_bits, bd_idx;
const struct frame_type_desc *frame_desc;
/* initialize a copy */
- init_get_bits(gb, orig_gb->buffer, orig_gb->size_in_bits);
- skip_bits_long(gb, get_bits_count(orig_gb));
- assert(get_bits_left(gb) == get_bits_left(orig_gb));
+ *bc = *orig_bc;
/* superframe header */
- if (get_bits_left(gb) < 14)
+ if (bitstream_bits_left(bc) < 14)
return 1;
- if (!get_bits1(gb))
- return -1; // WMAPro-in-WMAVoice superframe
- if (get_bits1(gb)) skip_bits(gb, 12); // number of samples in superframe
+ if (!bitstream_read_bit(bc))
+ return AVERROR(ENOSYS); // WMAPro-in-WMAVoice superframe
+ if (bitstream_read_bit(bc)) bitstream_skip(bc, 12); // number of samples in superframe
if (s->has_residual_lsps) { // residual LSPs (for all frames)
- if (get_bits_left(gb) < s->sframe_lsp_bitsize)
+ if (bitstream_bits_left(bc) < s->sframe_lsp_bitsize)
return 1;
- skip_bits_long(gb, s->sframe_lsp_bitsize);
+ bitstream_skip(bc, s->sframe_lsp_bitsize);
}
/* frames */
int aw_idx_is_ext = 0;
if (!s->has_residual_lsps) { // independent LSPs (per-frame)
- if (get_bits_left(gb) < s->frame_lsp_bitsize) return 1;
- skip_bits_long(gb, s->frame_lsp_bitsize);
+ if (bitstream_bits_left(bc) < s->frame_lsp_bitsize)
+ return 1;
+ bitstream_skip(bc, s->frame_lsp_bitsize);
}
- bd_idx = s->vbm_tree[get_vlc2(gb, frame_type_vlc.table, 6, 3)];
+ bd_idx = s->vbm_tree[bitstream_read_vlc(bc, frame_type_vlc.table, 6, 3)];
if (bd_idx < 0)
- return -1; // invalid frame type VLC code
+ return AVERROR_INVALIDDATA; // invalid frame type VLC code
frame_desc = &frame_descs[bd_idx];
if (frame_desc->acb_type == ACB_TYPE_ASYMMETRIC) {
- if (get_bits_left(gb) < s->pitch_nbits)
+ if (bitstream_bits_left(bc) < s->pitch_nbits)
return 1;
- skip_bits_long(gb, s->pitch_nbits);
+ bitstream_skip(bc, s->pitch_nbits);
}
if (frame_desc->fcb_type == FCB_TYPE_SILENCE) {
- skip_bits(gb, 8);
+ bitstream_skip(bc, 8);
} else if (frame_desc->fcb_type == FCB_TYPE_AW_PULSES) {
- int tmp = get_bits(gb, 6);
+ int tmp = bitstream_read(bc, 6);
if (tmp >= 0x36) {
- skip_bits(gb, 2);
+ bitstream_skip(bc, 2);
aw_idx_is_ext = 1;
}
}
} else
need_bits = 0;
need_bits += frame_desc->frame_size;
- if (get_bits_left(gb) < need_bits)
+ if (bitstream_bits_left(bc) < need_bits)
return 1;
- skip_bits_long(gb, need_bits);
+ bitstream_skip(bc, need_bits);
}
return 0;
/**
* Synthesize output samples for a single superframe. If we have any data
* cached in s->sframe_cache, that will be used instead of whatever is loaded
- * in s->gb.
+ * in s->bc.
*
* WMA Voice superframes contain 3 frames, each containing 160 audio samples,
* to give a total of 480 samples per frame. See #synth_frame() for frame
* @return 0 on success, <0 on error or 1 if there was not enough data to
* fully parse the superframe
*/
-static int synth_superframe(AVCodecContext *ctx, int *got_frame_ptr)
+static int synth_superframe(AVCodecContext *ctx, AVFrame *frame,
+ int *got_frame_ptr)
{
WMAVoiceContext *s = ctx->priv_data;
- GetBitContext *gb = &s->gb, s_gb;
+ BitstreamContext *bc = &s->bc, s_bc;
int n, res, n_samples = 480;
double lsps[MAX_FRAMES][MAX_LSPS];
const double *mean_lsf = s->lsps == 16 ?
s->history_nsamples * sizeof(*excitation));
if (s->sframe_cache_size > 0) {
- gb = &s_gb;
- init_get_bits(gb, s->sframe_cache, s->sframe_cache_size);
+ bc = &s_bc;
+ bitstream_init(bc, s->sframe_cache, s->sframe_cache_size);
s->sframe_cache_size = 0;
}
- if ((res = check_bits_for_superframe(gb, s)) == 1) {
+ if ((res = check_bits_for_superframe(bc, s)) == 1) {
*got_frame_ptr = 0;
return 1;
- }
+ } else if (res < 0)
+ return res;
/* First bit is speech/music bit, it differentiates between WMAVoice
* speech samples (the actual codec) and WMAVoice music samples, which
* are really WMAPro-in-WMAVoice-superframes. I've never seen those in
* the wild yet. */
- if (!get_bits1(gb)) {
- av_log_missing_feature(ctx, "WMAPro-in-WMAVoice support", 1);
- return -1;
+ if (!bitstream_read_bit(bc)) {
+ avpriv_request_sample(ctx, "WMAPro-in-WMAVoice");
+ return AVERROR_PATCHWELCOME;
}
/* (optional) nr. of samples in superframe; always <= 480 and >= 0 */
- if (get_bits1(gb)) {
- if ((n_samples = get_bits(gb, 12)) > 480) {
+ if (bitstream_read_bit(bc)) {
+ if ((n_samples = bitstream_read(bc, 12)) > 480) {
av_log(ctx, AV_LOG_ERROR,
"Superframe encodes >480 samples (%d), not allowed\n",
n_samples);
- return -1;
+ return AVERROR_INVALIDDATA;
}
}
/* Parse LSPs, if global for the superframe (can also be per-frame). */
prev_lsps[n] = s->prev_lsps[n] - mean_lsf[n];
if (s->lsps == 10) {
- dequant_lsp10r(gb, lsps[2], prev_lsps, a1, a2, s->lsp_q_mode);
+ dequant_lsp10r(bc, lsps[2], prev_lsps, a1, a2, s->lsp_q_mode);
} else /* s->lsps == 16 */
- dequant_lsp16r(gb, lsps[2], prev_lsps, a1, a2, s->lsp_q_mode);
+ dequant_lsp16r(bc, lsps[2], prev_lsps, a1, a2, s->lsp_q_mode);
for (n = 0; n < s->lsps; n++) {
lsps[0][n] = mean_lsf[n] + (a1[n] - a2[n * 2]);
}
/* get output buffer */
- s->frame.nb_samples = 480;
- if ((res = ctx->get_buffer(ctx, &s->frame)) < 0) {
+ frame->nb_samples = 480;
+ if ((res = ff_get_buffer(ctx, frame, 0)) < 0) {
av_log(ctx, AV_LOG_ERROR, "get_buffer() failed\n");
return res;
}
- s->frame.nb_samples = n_samples;
- samples = (float *)s->frame.data[0];
+ frame->nb_samples = n_samples;
+ samples = (float *)frame->data[0];
/* Parse frames, optionally preceded by per-frame (independent) LSPs. */
for (n = 0; n < 3; n++) {
int m;
if (s->lsps == 10) {
- dequant_lsp10i(gb, lsps[n]);
+ dequant_lsp10i(bc, lsps[n]);
} else /* s->lsps == 16 */
- dequant_lsp16i(gb, lsps[n]);
+ dequant_lsp16i(bc, lsps[n]);
for (m = 0; m < s->lsps; m++)
lsps[n][m] += mean_lsf[m];
stabilize_lsps(lsps[n], s->lsps);
}
- if ((res = synth_frame(ctx, gb, n,
+ if ((res = synth_frame(ctx, bc, n,
&samples[n * MAX_FRAMESIZE],
lsps[n], n == 0 ? s->prev_lsps : lsps[n - 1],
&excitation[s->history_nsamples + n * MAX_FRAMESIZE],
/* Statistics? FIXME - we don't check for length, a slight overrun
* will be caught by internal buffer padding, and anything else
* will be skipped, not read. */
- if (get_bits1(gb)) {
- res = get_bits(gb, 4);
- skip_bits(gb, 10 * (res + 1));
+ if (bitstream_read_bit(bc)) {
+ res = bitstream_read(bc, 4);
+ bitstream_skip(bc, 10 * (res + 1));
}
*got_frame_ptr = 1;
*/
static int parse_packet_header(WMAVoiceContext *s)
{
- GetBitContext *gb = &s->gb;
+ BitstreamContext *bc = &s->bc;
unsigned int res;
- if (get_bits_left(gb) < 11)
+ if (bitstream_bits_left(bc) < 11)
return 1;
- skip_bits(gb, 4); // packet sequence number
- s->has_residual_lsps = get_bits1(gb);
+ bitstream_skip(bc, 4); // packet sequence number
+ s->has_residual_lsps = bitstream_read_bit(bc);
do {
- res = get_bits(gb, 6); // number of superframes per packet
- // (minus first one if there is spillover)
- if (get_bits_left(gb) < 6 * (res == 0x3F) + s->spillover_bitsize)
+ res = bitstream_read(bc, 6); // number of superframes per packet
+ // (minus first one if there is spillover)
+ if (bitstream_bits_left(bc) < 6 * (res == 0x3F) + s->spillover_bitsize)
return 1;
} while (res == 0x3F);
- s->spillover_nbits = get_bits(gb, s->spillover_bitsize);
+ s->spillover_nbits = bitstream_read(bc, s->spillover_bitsize);
return 0;
}
/**
- * Copy (unaligned) bits from gb/data/size to pb.
+ * Copy (unaligned) bits from bc/data/size to pb.
*
* @param pb target buffer to copy bits into
* @param data source buffer to copy bits from
* @param size size of the source data, in bytes
- * @param gb bit I/O context specifying the current position in the source.
+ * @param bc bit I/O context specifying the current position in the source.
* data. This function might use this to align the bit position to
* a whole-byte boundary before calling #avpriv_copy_bits() on aligned
* source data
*/
static void copy_bits(PutBitContext *pb,
const uint8_t *data, int size,
- GetBitContext *gb, int nbits)
+ BitstreamContext *bc, int nbits)
{
int rmn_bytes, rmn_bits;
- rmn_bits = rmn_bytes = get_bits_left(gb);
+ rmn_bits = rmn_bytes = bitstream_bits_left(bc);
if (rmn_bits < nbits)
return;
if (nbits > pb->size_in_bits - put_bits_count(pb))
return;
rmn_bits &= 7; rmn_bytes >>= 3;
if ((rmn_bits = FFMIN(rmn_bits, nbits)) > 0)
- put_bits(pb, rmn_bits, get_bits(gb, rmn_bits));
+ put_bits(pb, rmn_bits, bitstream_read(bc, rmn_bits));
avpriv_copy_bits(pb, data + size - rmn_bytes,
FFMIN(nbits - rmn_bits, rmn_bytes << 3));
}
int *got_frame_ptr, AVPacket *avpkt)
{
WMAVoiceContext *s = ctx->priv_data;
- GetBitContext *gb = &s->gb;
+ BitstreamContext *bc = &s->bc;
int size, res, pos;
/* Packets are sometimes a multiple of ctx->block_align, with a packet
*got_frame_ptr = 0;
return 0;
}
- init_get_bits(&s->gb, avpkt->data, size << 3);
+ bitstream_init8(&s->bc, avpkt->data, size);
/* size == ctx->block_align is used to indicate whether we are dealing with
* a new packet or a packet of which we already read the packet header
* continuing to parse new superframes in the current packet. */
if (s->spillover_nbits > 0) {
if (s->sframe_cache_size > 0) {
- int cnt = get_bits_count(gb);
- copy_bits(&s->pb, avpkt->data, size, gb, s->spillover_nbits);
+ int cnt = bitstream_tell(bc);
+ copy_bits(&s->pb, avpkt->data, size, bc, s->spillover_nbits);
flush_put_bits(&s->pb);
s->sframe_cache_size += s->spillover_nbits;
- if ((res = synth_superframe(ctx, got_frame_ptr)) == 0 &&
+ if ((res = synth_superframe(ctx, data, got_frame_ptr)) == 0 &&
*got_frame_ptr) {
cnt += s->spillover_nbits;
s->skip_bits_next = cnt & 7;
- *(AVFrame *)data = s->frame;
return cnt >> 3;
} else
- skip_bits_long (gb, s->spillover_nbits - cnt +
- get_bits_count(gb)); // resync
+ bitstream_skip (bc, s->spillover_nbits - cnt +
+ bitstream_tell(bc)); // resync
} else
- skip_bits_long(gb, s->spillover_nbits); // resync
+ bitstream_skip(bc, s->spillover_nbits); // resync
}
} else if (s->skip_bits_next)
- skip_bits(gb, s->skip_bits_next);
+ bitstream_skip(bc, s->skip_bits_next);
/* Try parsing superframes in current packet */
s->sframe_cache_size = 0;
s->skip_bits_next = 0;
- pos = get_bits_left(gb);
- if ((res = synth_superframe(ctx, got_frame_ptr)) < 0) {
+ pos = bitstream_bits_left(bc);
+ if ((res = synth_superframe(ctx, data, got_frame_ptr)) < 0) {
return res;
} else if (*got_frame_ptr) {
- int cnt = get_bits_count(gb);
+ int cnt = bitstream_tell(bc);
s->skip_bits_next = cnt & 7;
- *(AVFrame *)data = s->frame;
return cnt >> 3;
} else if ((s->sframe_cache_size = pos) > 0) {
/* rewind bit reader to start of last (incomplete) superframe... */
- init_get_bits(gb, avpkt->data, size << 3);
- skip_bits_long(gb, (size << 3) - pos);
- assert(get_bits_left(gb) == pos);
+ bitstream_init8(bc, avpkt->data, size);
+ bitstream_skip(bc, (size << 3) - pos);
+ assert(bitstream_bits_left(bc) == pos);
/* ...and cache it for spillover in next packet */
init_put_bits(&s->pb, s->sframe_cache, SFRAME_CACHE_MAXSIZE);
- copy_bits(&s->pb, avpkt->data, size, gb, s->sframe_cache_size);
+ copy_bits(&s->pb, avpkt->data, size, bc, s->sframe_cache_size);
// FIXME bad - just copy bytes as whole and add use the
// skip_bits_next field
}
}
AVCodec ff_wmavoice_decoder = {
- .name = "wmavoice",
- .type = AVMEDIA_TYPE_AUDIO,
- .id = AV_CODEC_ID_WMAVOICE,
- .priv_data_size = sizeof(WMAVoiceContext),
- .init = wmavoice_decode_init,
- .close = wmavoice_decode_end,
- .decode = wmavoice_decode_packet,
- .capabilities = CODEC_CAP_SUBFRAMES | CODEC_CAP_DR1,
- .flush = wmavoice_flush,
- .long_name = NULL_IF_CONFIG_SMALL("Windows Media Audio Voice"),
+ .name = "wmavoice",
+ .long_name = NULL_IF_CONFIG_SMALL("Windows Media Audio Voice"),
+ .type = AVMEDIA_TYPE_AUDIO,
+ .id = AV_CODEC_ID_WMAVOICE,
+ .priv_data_size = sizeof(WMAVoiceContext),
+ .init = wmavoice_decode_init,
+ .init_static_data = wmavoice_init_static_data,
+ .close = wmavoice_decode_end,
+ .decode = wmavoice_decode_packet,
+ .capabilities = AV_CODEC_CAP_SUBFRAMES | AV_CODEC_CAP_DR1,
+ .flush = wmavoice_flush,
};