X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=libavcodec%2Fwmavoice.c;h=e408166c48001539d030672291e4bcb500c39425;hb=3a651f599a18b023602370b67a77eb0efa309b20;hp=f81e8e9403bbfdbd5ac3231a3c9f508afecc6002;hpb=5a7288822f24b31e18d5fbe2c4086af6507878a7;p=ffmpeg diff --git a/libavcodec/wmavoice.c b/libavcodec/wmavoice.c index f81e8e9403b..e408166c480 100644 --- a/libavcodec/wmavoice.c +++ b/libavcodec/wmavoice.c @@ -131,7 +131,7 @@ static const struct frame_type_desc { /** * WMA Voice decoding context. */ -typedef struct { +typedef struct WMAVoiceContext { /** * @name Global values specified in the stream header / extradata or used all over. * @{ @@ -304,6 +304,20 @@ typedef struct { * @return 0 on success, <0 on error. */ static av_cold int decode_vbmtree(GetBitContext *gb, int8_t vbm_tree[25]) +{ + int cntr[8] = { 0 }, n, res; + + memset(vbm_tree, 0xff, sizeof(vbm_tree[0]) * 25); + for (n = 0; n < 17; n++) { + res = get_bits(gb, 3); + if (cntr[res] > 3) // should be >= 3 + (res == 7)) + return -1; + vbm_tree[res * 3 + cntr[res]++] = n; + } + return 0; +} + +static av_cold void wmavoice_init_static_data(AVCodec *codec) { static const uint8_t bits[] = { 2, 2, 2, 4, 4, 4, @@ -320,18 +334,9 @@ static av_cold int decode_vbmtree(GetBitContext *gb, int8_t vbm_tree[25]) 0x0ffc, 0x0ffd, 0x0ffe, // 1111111111+00/01/10 0x3ffc, 0x3ffd, 0x3ffe, 0x3fff // 111111111111+xx }; - int cntr[8] = { 0 }, n, res; - memset(vbm_tree, 0xff, sizeof(vbm_tree[0]) * 25); - for (n = 0; n < 17; n++) { - res = get_bits(gb, 3); - if (cntr[res] > 3) // should be >= 3 + (res == 7)) - return -1; - vbm_tree[res * 3 + cntr[res]++] = n; - } INIT_VLC_STATIC(&frame_type_vlc, VLC_NBITS, sizeof(bits), bits, 1, 1, codes, 2, 2, 132); - return 0; } /** @@ -354,7 +359,7 @@ static av_cold int wmavoice_decode_init(AVCodecContext *ctx) av_log(ctx, AV_LOG_ERROR, "Invalid extradata size %d (should be 46)\n", ctx->extradata_size); - return -1; + return AVERROR_INVALIDDATA; } flags = AV_RL32(ctx->extradata + 18); s->spillover_bitsize = 3 + av_ceil_log2(ctx->block_align); @@ -377,7 +382,7 @@ static av_cold int wmavoice_decode_init(AVCodecContext *ctx) av_log(ctx, AV_LOG_ERROR, "Invalid denoise filter strength %d (max=11)\n", s->denoise_strength); - return -1; + return AVERROR_INVALIDDATA; } s->denoise_tilt_corr = !!(flags & 0x40); s->dc_level = (flags >> 7) & 0xF; @@ -399,7 +404,7 @@ static av_cold int wmavoice_decode_init(AVCodecContext *ctx) init_get_bits(&s->gb, ctx->extradata + 22, (ctx->extradata_size - 22) << 3); if (decode_vbmtree(&s->gb, s->vbm_tree) < 0) { av_log(ctx, AV_LOG_ERROR, "Invalid VBM tree; broken extradata?\n"); - return -1; + return AVERROR_INVALIDDATA; } s->min_pitch_val = ((ctx->sample_rate << 8) / 400 + 50) >> 8; @@ -407,7 +412,7 @@ static av_cold int wmavoice_decode_init(AVCodecContext *ctx) pitch_range = s->max_pitch_val - s->min_pitch_val; if (pitch_range <= 0) { av_log(ctx, AV_LOG_ERROR, "Invalid pitch range; broken extradata?\n"); - return -1; + return AVERROR_INVALIDDATA; } s->pitch_nbits = av_ceil_log2(pitch_range); s->last_pitch_val = 40; @@ -422,7 +427,7 @@ static av_cold int wmavoice_decode_init(AVCodecContext *ctx) "Unsupported samplerate %d (min=%d, max=%d)\n", ctx->sample_rate, min_sr, max_sr); // 322-22097 Hz - return -1; + return AVERROR(ENOSYS); } s->block_conv_table[0] = s->min_pitch_val; @@ -432,7 +437,7 @@ static av_cold int wmavoice_decode_init(AVCodecContext *ctx) s->block_delta_pitch_hrange = (pitch_range >> 3) & ~0xF; if (s->block_delta_pitch_hrange <= 0) { av_log(ctx, AV_LOG_ERROR, "Invalid delta pitch hrange; broken extradata?\n"); - return -1; + return AVERROR_INVALIDDATA; } s->block_delta_pitch_nbits = 1 + av_ceil_log2(s->block_delta_pitch_hrange); s->block_pitch_range = s->block_conv_table[2] + @@ -607,7 +612,7 @@ static void calc_input_response(WMAVoiceContext *s, float *lpcs, /* 70.57 =~ 1/log10(1.0331663) */ idx = (pwr * gain_mul - 0.0295) * 70.570526123; - if (idx > 127) { // fallback if index falls outside table range + if (idx > 127) { // fall back if index falls outside table range coeffs[n] = wmavoice_energy_table[127] * powf(1.0331663, idx - 127); } else @@ -615,7 +620,7 @@ static void calc_input_response(WMAVoiceContext *s, float *lpcs, } /* calculate the Hilbert transform of the gains, which we do (since this - * is a sinus input) by doing a phase shift (in theory, H(sin())=cos()). + * is a sine input) by doing a phase shift (in theory, H(sin())=cos()). * Hilbert_Transform(RDFT(x)) = Laplace_Transform(x), which calculates the * "moment" of the LPCs in this filter. */ s->dct.dct_calc(&s->dct, lpcs); @@ -1047,9 +1052,10 @@ static void aw_parse_coords(WMAVoiceContext *s, GetBitContext *gb, * @param gb bit I/O context * @param block_idx block index in frame [0, 1] * @param fcb structure containing fixed codebook vector info + * @return -1 on error, 0 otherwise */ -static void aw_pulse_set2(WMAVoiceContext *s, GetBitContext *gb, - int block_idx, AMRFixed *fcb) +static int aw_pulse_set2(WMAVoiceContext *s, GetBitContext *gb, + int block_idx, AMRFixed *fcb) { uint16_t use_mask_mem[9]; // only 5 are used, rest is padding uint16_t *use_mask = use_mask_mem + 2; @@ -1111,7 +1117,7 @@ static void aw_pulse_set2(WMAVoiceContext *s, GetBitContext *gb, else if (use_mask[2]) idx = 0x2F; else if (use_mask[3]) idx = 0x3F; else if (use_mask[4]) idx = 0x4F; - else return; + else return -1; idx -= av_log2_16bit(use_mask[idx >> 4]); } if (use_mask[idx >> 4] & (0x8000 >> (idx & 15))) { @@ -1128,6 +1134,7 @@ static void aw_pulse_set2(WMAVoiceContext *s, GetBitContext *gb, /* set offset for next block, relative to start of that block */ n = (MAX_FRAMESIZE / 2 - start_off) % fcb->pitch_lag; s->aw_next_pulse_off_cache = n ? fcb->pitch_lag - n : 0; + return 0; } /** @@ -1290,7 +1297,18 @@ static void synth_block_fcb_acb(WMAVoiceContext *s, GetBitContext *gb, * (fixed) codebook pulses of the speech signal. */ if (frame_desc->fcb_type == FCB_TYPE_AW_PULSES) { aw_pulse_set1(s, gb, block_idx, &fcb); - aw_pulse_set2(s, gb, block_idx, &fcb); + if (aw_pulse_set2(s, gb, block_idx, &fcb)) { + /* Conceal the block with silence and return. + * Skip the correct amount of bits to read the next + * block from the correct offset. */ + int r_idx = pRNG(s->frame_cntr, block_idx, size); + + for (n = 0; n < size; n++) + excitation[n] = + wmavoice_std_codebook[r_idx + n] * s->silence_gain; + skip_bits(gb, 7 + 1); + return; + } } else /* FCB_TYPE_EXC_PULSES */ { int offset_nbits = 5 - frame_desc->log_n_blocks; @@ -1447,7 +1465,7 @@ static int synth_frame(AVCodecContext *ctx, GetBitContext *gb, int frame_idx, if (bd_idx < 0) { av_log(ctx, AV_LOG_ERROR, "Invalid frame type VLC code, skipping\n"); - return -1; + return AVERROR_INVALIDDATA; } block_nsamples = MAX_FRAMESIZE / frame_descs[bd_idx].n_blocks; @@ -1644,7 +1662,7 @@ static void stabilize_lsps(double *lsps, int num) * does not modify the state of the bitreader; it * only uses it to copy the current stream position * @param s WMA Voice decoding context private data - * @return -1 if unsupported, 1 on not enough bits or 0 if OK. + * @return < 0 on error, 1 on not enough bits or 0 if OK. */ static int check_bits_for_superframe(GetBitContext *orig_gb, WMAVoiceContext *s) @@ -1662,7 +1680,7 @@ static int check_bits_for_superframe(GetBitContext *orig_gb, if (get_bits_left(gb) < 14) return 1; if (!get_bits1(gb)) - return -1; // WMAPro-in-WMAVoice superframe + return AVERROR(ENOSYS); // WMAPro-in-WMAVoice superframe if (get_bits1(gb)) skip_bits(gb, 12); // number of samples in superframe if (s->has_residual_lsps) { // residual LSPs (for all frames) if (get_bits_left(gb) < s->sframe_lsp_bitsize) @@ -1680,7 +1698,7 @@ static int check_bits_for_superframe(GetBitContext *orig_gb, } bd_idx = s->vbm_tree[get_vlc2(gb, frame_type_vlc.table, 6, 3)]; if (bd_idx < 0) - return -1; // invalid frame type VLC code + return AVERROR_INVALIDDATA; // invalid frame type VLC code frame_desc = &frame_descs[bd_idx]; if (frame_desc->acb_type == ACB_TYPE_ASYMMETRIC) { if (get_bits_left(gb) < s->pitch_nbits) @@ -1758,14 +1776,15 @@ static int synth_superframe(AVCodecContext *ctx, AVFrame *frame, if ((res = check_bits_for_superframe(gb, s)) == 1) { *got_frame_ptr = 0; return 1; - } + } else if (res < 0) + return res; /* First bit is speech/music bit, it differentiates between WMAVoice * speech samples (the actual codec) and WMAVoice music samples, which * are really WMAPro-in-WMAVoice-superframes. I've never seen those in * the wild yet. */ if (!get_bits1(gb)) { - av_log_missing_feature(ctx, "WMAPro-in-WMAVoice", 1); + avpriv_request_sample(ctx, "WMAPro-in-WMAVoice"); return AVERROR_PATCHWELCOME; } @@ -1775,7 +1794,7 @@ static int synth_superframe(AVCodecContext *ctx, AVFrame *frame, av_log(ctx, AV_LOG_ERROR, "Superframe encodes >480 samples (%d), not allowed\n", n_samples); - return -1; + return AVERROR_INVALIDDATA; } } /* Parse LSPs, if global for the superframe (can also be per-frame). */ @@ -1801,7 +1820,7 @@ static int synth_superframe(AVCodecContext *ctx, AVFrame *frame, /* get output buffer */ frame->nb_samples = 480; - if ((res = ff_get_buffer(ctx, frame)) < 0) { + if ((res = ff_get_buffer(ctx, frame, 0)) < 0) { av_log(ctx, AV_LOG_ERROR, "get_buffer() failed\n"); return res; } @@ -2046,14 +2065,15 @@ static av_cold void wmavoice_flush(AVCodecContext *ctx) } AVCodec ff_wmavoice_decoder = { - .name = "wmavoice", - .type = AVMEDIA_TYPE_AUDIO, - .id = AV_CODEC_ID_WMAVOICE, - .priv_data_size = sizeof(WMAVoiceContext), - .init = wmavoice_decode_init, - .close = wmavoice_decode_end, - .decode = wmavoice_decode_packet, - .capabilities = CODEC_CAP_SUBFRAMES | CODEC_CAP_DR1, - .flush = wmavoice_flush, - .long_name = NULL_IF_CONFIG_SMALL("Windows Media Audio Voice"), + .name = "wmavoice", + .long_name = NULL_IF_CONFIG_SMALL("Windows Media Audio Voice"), + .type = AVMEDIA_TYPE_AUDIO, + .id = AV_CODEC_ID_WMAVOICE, + .priv_data_size = sizeof(WMAVoiceContext), + .init = wmavoice_decode_init, + .init_static_data = wmavoice_init_static_data, + .close = wmavoice_decode_end, + .decode = wmavoice_decode_packet, + .capabilities = CODEC_CAP_SUBFRAMES | CODEC_CAP_DR1, + .flush = wmavoice_flush, };