* AMR wideband decoder
* Copyright (c) 2010 Marcelo Galvao Povoa
*
- * This file is part of FFmpeg.
+ * This file is part of Libav.
*
- * FFmpeg is free software; you can redistribute it and/or
+ * Libav is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * FFmpeg is distributed in the hope that it will be useful,
+ * Libav is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A particular PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
+ * License along with Libav; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
* AMR wideband decoder
*/
+#include "libavutil/common.h"
#include "libavutil/lfg.h"
#include "avcodec.h"
-#include "get_bits.h"
+#include "dsputil.h"
#include "lsp.h"
-#include "celp_math.h"
#include "celp_filters.h"
#include "acelp_filters.h"
#include "acelp_vectors.h"
#include "amrwbdata.h"
typedef struct {
+ AVFrame avframe; ///< AVFrame for decoded samples
AMRWBFrame frame; ///< AMRWB parameters decoded from bitstream
enum Mode fr_cur_mode; ///< mode index of current frame
uint8_t fr_quality; ///< frame quality index (FQI)
AMRWBContext *ctx = avctx->priv_data;
int i;
- avctx->sample_fmt = SAMPLE_FMT_FLT;
+ avctx->sample_fmt = AV_SAMPLE_FMT_FLT;
av_lfg_init(&ctx->prng, 1);
for (i = 0; i < 4; i++)
ctx->prediction_error[i] = MIN_ENERGY;
+ avcodec_get_frame_defaults(&ctx->avframe);
+ avctx->coded_frame = &ctx->avframe;
+
return 0;
}
/**
* Decode the frame header in the "MIME/storage" format. This format
- * is simpler and does not carry the auxiliary information of the frame
+ * is simpler and does not carry the auxiliary frame information.
*
* @param[in] ctx The Context
* @param[in] buf Pointer to the input buffer
*/
static int decode_mime_header(AMRWBContext *ctx, const uint8_t *buf)
{
- GetBitContext gb;
- init_get_bits(&gb, buf, 8);
-
/* Decode frame header (1st octet) */
- skip_bits(&gb, 1); // padding bit
- ctx->fr_cur_mode = get_bits(&gb, 4);
- ctx->fr_quality = get_bits1(&gb);
- skip_bits(&gb, 2); // padding bits
+ ctx->fr_cur_mode = buf[0] >> 3 & 0x0F;
+ ctx->fr_quality = (buf[0] & 0x4) == 0x4;
return 1;
}
/**
- * Decodes quantized ISF vectors using 36-bit indexes (6K60 mode only)
+ * Decode quantized ISF vectors using 36-bit indexes (6K60 mode only).
*
* @param[in] ind Array of 5 indexes
* @param[out] isf_q Buffer for isf_q[LP_ORDER]
}
/**
- * Decodes quantized ISF vectors using 46-bit indexes (except 6K60 mode)
+ * Decode quantized ISF vectors using 46-bit indexes (except 6K60 mode).
*
* @param[in] ind Array of 7 indexes
* @param[out] isf_q Buffer for isf_q[LP_ORDER]
}
/**
- * Apply mean and past ISF values using the prediction factor
- * Updates past ISF vector
+ * Apply mean and past ISF values using the prediction factor.
+ * Updates past ISF vector.
*
* @param[in,out] isf_q Current quantized ISF
* @param[in,out] isf_past Past quantized ISF
/**
* Interpolate the fourth ISP vector from current and past frames
- * to obtain a ISP vector for each subframe
+ * to obtain an ISP vector for each subframe.
*
* @param[in,out] isp_q ISPs for each subframe
* @param[in] isp4_past Past ISP for subframe 4
}
/**
- * Decode an adaptive codebook index into pitch lag (except 6k60, 8k85 modes)
- * Calculate integer lag and fractional lag always using 1/4 resolution
- * In 1st and 3rd subframes the index is relative to last subframe integer lag
+ * Decode an adaptive codebook index into pitch lag (except 6k60, 8k85 modes).
+ * Calculate integer lag and fractional lag always using 1/4 resolution.
+ * In 1st and 3rd subframes the index is relative to last subframe integer lag.
*
* @param[out] lag_int Decoded integer pitch lag
* @param[out] lag_frac Decoded fractional pitch lag
}
/**
- * Decode a adaptive codebook index into pitch lag for 8k85 and 6k60 modes
- * Description is analogous to decode_pitch_lag_high, but in 6k60 relative
- * index is used for all subframes except the first
+ * Decode an adaptive codebook index into pitch lag for 8k85 and 6k60 modes.
+ * The description is analogous to decode_pitch_lag_high, but in 6k60 the
+ * relative index is used for all subframes except the first.
*/
static void decode_pitch_lag_low(int *lag_int, int *lag_frac, int pitch_index,
uint8_t *base_lag_int, int subframe, enum Mode mode)
/**
* Find the pitch vector by interpolating the past excitation at the
- * pitch delay, which is obtained in this function
+ * pitch delay, which is obtained in this function.
*
* @param[in,out] ctx The context
* @param[in] amr_subframe Current subframe data
/**
* The next six functions decode_[i]p_track decode exactly i pulses
* positions and amplitudes (-1 or 1) in a subframe track using
- * an encoded pulse indexing (TS 26.190 section 5.8.2)
+ * an encoded pulse indexing (TS 26.190 section 5.8.2).
*
* The results are given in out[], in which a negative number means
- * amplitude -1 and vice versa (i.e., ampl(x) = x / abs(x) )
+ * amplitude -1 and vice versa (i.e., ampl(x) = x / abs(x) ).
*
* @param[out] out Output buffer (writes i elements)
* @param[in] code Pulse index (no. of bits varies, see below)
/**
* Decode the algebraic codebook index to pulse positions and signs,
- * then construct the algebraic codebook vector
+ * then construct the algebraic codebook vector.
*
* @param[out] fixed_vector Buffer for the fixed codebook excitation
* @param[in] pulse_hi MSBs part of the pulse index array (higher modes only)
}
/**
- * Decode pitch gain and fixed gain correction factor
+ * Decode pitch gain and fixed gain correction factor.
*
* @param[in] vq_gain Vector-quantized index for gains
* @param[in] mode Mode of the current frame
}
/**
- * Apply pitch sharpening filters to the fixed codebook vector
+ * Apply pitch sharpening filters to the fixed codebook vector.
*
* @param[in] ctx The context
* @param[in,out] fixed_vector Fixed codebook excitation
}
/**
- * Calculate the voicing factor (-1.0 = unvoiced to 1.0 = voiced)
+ * Calculate the voicing factor (-1.0 = unvoiced to 1.0 = voiced).
*
* @param[in] p_vector, f_vector Pitch and fixed excitation vectors
* @param[in] p_gain, f_gain Pitch and fixed gains
static float voice_factor(float *p_vector, float p_gain,
float *f_vector, float f_gain)
{
- double p_ener = (double) ff_dot_productf(p_vector, p_vector,
- AMRWB_SFR_SIZE) * p_gain * p_gain;
- double f_ener = (double) ff_dot_productf(f_vector, f_vector,
- AMRWB_SFR_SIZE) * f_gain * f_gain;
+ double p_ener = (double) ff_scalarproduct_float_c(p_vector, p_vector,
+ AMRWB_SFR_SIZE) *
+ p_gain * p_gain;
+ double f_ener = (double) ff_scalarproduct_float_c(f_vector, f_vector,
+ AMRWB_SFR_SIZE) *
+ f_gain * f_gain;
return (p_ener - f_ener) / (p_ener + f_ener);
}
/**
- * Reduce fixed vector sparseness by smoothing with one of three IR filters
- * Also known as "adaptive phase dispersion"
+ * Reduce fixed vector sparseness by smoothing with one of three IR filters,
+ * also known as "adaptive phase dispersion".
*
* @param[in] ctx The context
* @param[in,out] fixed_vector Unfiltered fixed vector
/**
* Calculate a stability factor {teta} based on distance between
- * current and past isf. A value of 1 shows maximum signal stability
+ * current and past isf. A value of 1 shows maximum signal stability.
*/
static float stability_factor(const float *isf, const float *isf_past)
{
/**
* Apply a non-linear fixed gain smoothing in order to reduce
- * fluctuation in the energy of excitation
+ * fluctuation in the energy of excitation.
*
* @param[in] fixed_gain Unsmoothed fixed gain
* @param[in,out] prev_tr_gain Previous threshold gain (updated)
}
/**
- * Filter the fixed_vector to emphasize the higher frequencies
+ * Filter the fixed_vector to emphasize the higher frequencies.
*
* @param[in,out] fixed_vector Fixed codebook vector
* @param[in] voice_fac Frame voicing factor
}
/**
- * Conduct 16th order linear predictive coding synthesis from excitation
+ * Conduct 16th order linear predictive coding synthesis from excitation.
*
* @param[in] ctx Pointer to the AMRWBContext
* @param[in] lpc Pointer to the LPC coefficients
/* emphasize pitch vector contribution in low bitrate modes */
if (ctx->pitch_gain[0] > 0.5 && ctx->fr_cur_mode <= MODE_8k85) {
int i;
- float energy = ff_dot_productf(excitation, excitation,
- AMRWB_SFR_SIZE);
+ float energy = ff_scalarproduct_float_c(excitation, excitation,
+ AMRWB_SFR_SIZE);
// XXX: Weird part in both ref code and spec. A unknown parameter
// {beta} seems to be identical to the current pitch gain
/**
* Upsample a signal by 5/4 ratio (from 12.8kHz to 16kHz) using
- * a FIR interpolation filter. Uses past data from before *in address
+ * a FIR interpolation filter. Uses past data from before *in address.
*
* @param[out] out Buffer for interpolated signal
* @param[in] in Current signal data (length 0.8*o_size)
i++;
for (k = 1; k < 5; k++) {
- out[i] = ff_dot_productf(in0 + int_part, upsample_fir[4 - frac_part],
- UPS_MEM_SIZE);
+ out[i] = ff_scalarproduct_float_c(in0 + int_part,
+ upsample_fir[4 - frac_part],
+ UPS_MEM_SIZE);
int_part++;
frac_part--;
i++;
/**
* Calculate the high-band gain based on encoded index (23k85 mode) or
- * on the low-band speech signal and the Voice Activity Detection flag
+ * on the low-band speech signal and the Voice Activity Detection flag.
*
* @param[in] ctx The context
* @param[in] synth LB speech synthesis at 12.8k
if (ctx->fr_cur_mode == MODE_23k85)
return qua_hb_gain[hb_idx] * (1.0f / (1 << 14));
- tilt = ff_dot_productf(synth, synth + 1, AMRWB_SFR_SIZE - 1) /
- ff_dot_productf(synth, synth, AMRWB_SFR_SIZE);
+ tilt = ff_scalarproduct_float_c(synth, synth + 1, AMRWB_SFR_SIZE - 1) /
+ ff_scalarproduct_float_c(synth, synth, AMRWB_SFR_SIZE);
/* return gain bounded by [0.1, 1.0] */
return av_clipf((1.0 - FFMAX(0.0, tilt)) * (1.25 - 0.25 * wsp), 0.1, 1.0);
/**
* Generate the high-band excitation with the same energy from the lower
- * one and scaled by the given gain
+ * one and scaled by the given gain.
*
* @param[in] ctx The context
* @param[out] hb_exc Buffer for the excitation
const float *synth_exc, float hb_gain)
{
int i;
- float energy = ff_dot_productf(synth_exc, synth_exc, AMRWB_SFR_SIZE);
+ float energy = ff_scalarproduct_float_c(synth_exc, synth_exc, AMRWB_SFR_SIZE);
/* Generate a white-noise excitation */
for (i = 0; i < AMRWB_SFR_SIZE_16k; i++)
}
/**
- * Calculate the auto-correlation for the ISF difference vector
+ * Calculate the auto-correlation for the ISF difference vector.
*/
static float auto_correlation(float *diff_isf, float mean, int lag)
{
/**
* Extrapolate a ISF vector to the 16kHz range (20th order LP)
- * used at mode 6k60 LP filter for the high frequency band
+ * used at mode 6k60 LP filter for the high frequency band.
*
- * @param[out] out Buffer for extrapolated isf
- * @param[in] isf Input isf vector
+ * @param[out] isf Buffer for extrapolated isf; contains LP_ORDER
+ * values on input
*/
-static void extrapolate_isf(float out[LP_ORDER_16k], float isf[LP_ORDER])
+static void extrapolate_isf(float isf[LP_ORDER_16k])
{
float diff_isf[LP_ORDER - 2], diff_mean;
float *diff_hi = diff_isf - LP_ORDER + 1; // diff array for extrapolated indexes
float est, scale;
int i, i_max_corr;
- memcpy(out, isf, (LP_ORDER - 1) * sizeof(float));
- out[LP_ORDER_16k - 1] = isf[LP_ORDER - 1];
+ isf[LP_ORDER_16k - 1] = isf[LP_ORDER - 1];
/* Calculate the difference vector */
for (i = 0; i < LP_ORDER - 2; i++)
i_max_corr++;
for (i = LP_ORDER - 1; i < LP_ORDER_16k - 1; i++)
- out[i] = isf[i - 1] + isf[i - 1 - i_max_corr]
+ isf[i] = isf[i - 1] + isf[i - 1 - i_max_corr]
- isf[i - 2 - i_max_corr];
/* Calculate an estimate for ISF(18) and scale ISF based on the error */
- est = 7965 + (out[2] - out[3] - out[4]) / 6.0;
- scale = 0.5 * (FFMIN(est, 7600) - out[LP_ORDER - 2]) /
- (out[LP_ORDER_16k - 2] - out[LP_ORDER - 2]);
+ est = 7965 + (isf[2] - isf[3] - isf[4]) / 6.0;
+ scale = 0.5 * (FFMIN(est, 7600) - isf[LP_ORDER - 2]) /
+ (isf[LP_ORDER_16k - 2] - isf[LP_ORDER - 2]);
for (i = LP_ORDER - 1; i < LP_ORDER_16k - 1; i++)
- diff_hi[i] = scale * (out[i] - out[i - 1]);
+ diff_hi[i] = scale * (isf[i] - isf[i - 1]);
/* Stability insurance */
for (i = LP_ORDER; i < LP_ORDER_16k - 1; i++)
}
for (i = LP_ORDER - 1; i < LP_ORDER_16k - 1; i++)
- out[i] = out[i - 1] + diff_hi[i] * (1.0f / (1 << 15));
+ isf[i] = isf[i - 1] + diff_hi[i] * (1.0f / (1 << 15));
/* Scale the ISF vector for 16000 Hz */
for (i = 0; i < LP_ORDER_16k - 1; i++)
- out[i] *= 0.8;
+ isf[i] *= 0.8;
}
/**
/**
* Conduct 20th order linear predictive coding synthesis for the high
- * frequency band excitation at 16kHz
+ * frequency band excitation at 16kHz.
*
* @param[in] ctx The context
* @param[in] subframe Current subframe index (0 to 3)
ff_weighted_vector_sumf(e_isf, isf_past, isf, isfp_inter[subframe],
1.0 - isfp_inter[subframe], LP_ORDER);
- extrapolate_isf(e_isf, e_isf);
+ extrapolate_isf(e_isf);
e_isf[LP_ORDER_16k - 1] *= 2.0;
ff_acelp_lsf2lspd(e_isp, e_isf, LP_ORDER_16k);
}
/**
- * Apply to high-band samples a 15th order filter
- * The filter characteristic depends on the given coefficients
+ * Apply a 15th order filter to high-band samples.
+ * The filter characteristic depends on the given coefficients.
*
* @param[out] out Buffer for filtered output
* @param[in] fir_coef Filter coefficients
}
/**
- * Update context state before the next subframe
+ * Update context state before the next subframe.
*/
static void update_sub_state(AMRWBContext *ctx)
{
LP_ORDER_16k * sizeof(float));
}
-static int amrwb_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
- AVPacket *avpkt)
+static int amrwb_decode_frame(AVCodecContext *avctx, void *data,
+ int *got_frame_ptr, AVPacket *avpkt)
{
AMRWBContext *ctx = avctx->priv_data;
AMRWBFrame *cf = &ctx->frame;
const uint8_t *buf = avpkt->data;
int buf_size = avpkt->size;
int expected_fr_size, header_size;
- float *buf_out = data;
+ float *buf_out;
float spare_vector[AMRWB_SFR_SIZE]; // extra stack space to hold result from anti-sparseness processing
float fixed_gain_factor; // fixed gain correction factor (gamma)
float *synth_fixed_vector; // pointer to the fixed vector that synthesis should use
float hb_exc[AMRWB_SFR_SIZE_16k]; // excitation for the high frequency band
float hb_samples[AMRWB_SFR_SIZE_16k]; // filtered high-band samples from synthesis
float hb_gain;
- int sub, i;
+ int sub, i, ret;
+
+ /* get output buffer */
+ ctx->avframe.nb_samples = 4 * AMRWB_SFR_SIZE_16k;
+ if ((ret = avctx->get_buffer(avctx, &ctx->avframe)) < 0) {
+ av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+ return ret;
+ }
+ buf_out = (float *)ctx->avframe.data[0];
header_size = decode_mime_header(ctx, buf);
+ if (ctx->fr_cur_mode > MODE_SID) {
+ av_log(avctx, AV_LOG_ERROR,
+ "Invalid mode %d\n", ctx->fr_cur_mode);
+ return AVERROR_INVALIDDATA;
+ }
expected_fr_size = ((cf_sizes_wb[ctx->fr_cur_mode] + 7) >> 3) + 1;
if (buf_size < expected_fr_size) {
av_log(avctx, AV_LOG_ERROR,
"Frame too small (%d bytes). Truncated file?\n", buf_size);
- *data_size = 0;
- return buf_size;
+ *got_frame_ptr = 0;
+ return AVERROR_INVALIDDATA;
}
if (!ctx->fr_quality || ctx->fr_cur_mode > MODE_SID)
av_log(avctx, AV_LOG_ERROR, "Encountered a bad or corrupted frame\n");
- if (ctx->fr_cur_mode == MODE_SID) /* Comfort noise frame */
+ if (ctx->fr_cur_mode == MODE_SID) { /* Comfort noise frame */
av_log_missing_feature(avctx, "SID mode", 1);
-
- if (ctx->fr_cur_mode >= MODE_SID)
return -1;
+ }
ff_amr_bit_reorder((uint16_t *) &ctx->frame, sizeof(AMRWBFrame),
buf + header_size, amr_bit_orderings_by_mode[ctx->fr_cur_mode]);
ctx->fixed_gain[0] =
ff_amr_set_fixed_gain(fixed_gain_factor,
- ff_dot_productf(ctx->fixed_vector, ctx->fixed_vector,
- AMRWB_SFR_SIZE) / AMRWB_SFR_SIZE,
+ ff_scalarproduct_float_c(ctx->fixed_vector,
+ ctx->fixed_vector,
+ AMRWB_SFR_SIZE) /
+ AMRWB_SFR_SIZE,
ctx->prediction_error,
ENERGY_MEAN, energy_pred_fac);
memcpy(ctx->isp_sub4_past, ctx->isp[3], LP_ORDER * sizeof(ctx->isp[3][0]));
memcpy(ctx->isf_past_final, ctx->isf_cur, LP_ORDER * sizeof(float));
- /* report how many samples we got */
- *data_size = 4 * AMRWB_SFR_SIZE_16k * sizeof(float);
+ *got_frame_ptr = 1;
+ *(AVFrame *)data = ctx->avframe;
return expected_fr_size;
}
-AVCodec amrwb_decoder = {
+AVCodec ff_amrwb_decoder = {
.name = "amrwb",
- .type = CODEC_TYPE_AUDIO,
- .id = CODEC_ID_AMR_WB,
+ .type = AVMEDIA_TYPE_AUDIO,
+ .id = AV_CODEC_ID_AMR_WB,
.priv_data_size = sizeof(AMRWBContext),
.init = amrwb_decode_init,
.decode = amrwb_decode_frame,
- .long_name = NULL_IF_CONFIG_SMALL("Adaptive Multi-Rate WideBand"),
- .sample_fmts = (enum AVSampleFormat[]){SAMPLE_FMT_FLT,SAMPLE_FMT_NONE},
+ .capabilities = CODEC_CAP_DR1,
+ .long_name = NULL_IF_CONFIG_SMALL("AMR-WB (Adaptive Multi-Rate WideBand)"),
+ .sample_fmts = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_FLT,
+ AV_SAMPLE_FMT_NONE },
};