X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=libavcodec%2Fwmavoice.c;h=ef711141da1fc9dcf532c90269a109cedd6ea970;hb=005c80b645cc6ab5f30cbc3e10ab66a15dfd107e;hp=cff3f5fc7e17740418c4aa42f6e3e0e668ccbdd1;hpb=d99427cb8ba099375d8cce6df808d4acf045ab43;p=ffmpeg diff --git a/libavcodec/wmavoice.c b/libavcodec/wmavoice.c index cff3f5fc7e1..ef711141da1 100644 --- a/libavcodec/wmavoice.c +++ b/libavcodec/wmavoice.c @@ -25,6 +25,8 @@ * @author Ronald S. Bultje */ +#define UNCHECKED_BITSTREAM_READER 1 + #include #include "avcodec.h" #include "get_bits.h" @@ -131,6 +133,7 @@ typedef struct { * @name Global values specified in the stream header / extradata or used all over. * @{ */ + AVFrame frame; GetBitContext gb; ///< packet bitreader. During decoder init, ///< it contains the extradata from the ///< demuxer. During decoding, it contains @@ -188,7 +191,7 @@ typedef struct { * @{ */ int spillover_nbits; ///< number of bits of the previous packet's - ///< last superframe preceeding this + ///< last superframe preceding this ///< packet's first full superframe (useful ///< for re-synchronization also) int has_residual_lsps; ///< if set, superframes contain one set of @@ -315,10 +318,9 @@ static av_cold int decode_vbmtree(GetBitContext *gb, int8_t vbm_tree[25]) 0x0ffc, 0x0ffd, 0x0ffe, // 1111111111+00/01/10 0x3ffc, 0x3ffd, 0x3ffe, 0x3fff // 111111111111+xx }; - int cntr[8], n, res; + int cntr[8] = { 0 }, n, res; memset(vbm_tree, 0xff, sizeof(vbm_tree[0]) * 25); - memset(cntr, 0, sizeof(cntr)); for (n = 0; n < 17; n++) { res = get_bits(gb, 3); if (cntr[res] > 3) // should be >= 3 + (res == 7)) @@ -438,6 +440,9 @@ static av_cold int wmavoice_decode_init(AVCodecContext *ctx) ctx->sample_fmt = AV_SAMPLE_FMT_FLT; + avcodec_get_frame_defaults(&s->frame); + ctx->coded_frame = &s->frame; + return 0; } @@ -1085,7 +1090,7 @@ static void aw_pulse_set2(WMAVoiceContext *s, GetBitContext *gb, int excl_range = s->aw_pulse_range; // always 16 or 24 uint16_t *use_mask_ptr = &use_mask[idx >> 4]; int first_sh = 16 - (idx & 15); - *use_mask_ptr++ &= 0xFFFF << first_sh; + *use_mask_ptr++ &= 0xFFFFu << first_sh; excl_range -= first_sh; if (excl_range >= 16) { *use_mask_ptr++ = 0; @@ -1434,8 +1439,7 @@ static int synth_frame(AVCodecContext *ctx, GetBitContext *gb, int frame_idx, int pitch[MAX_BLOCKS], last_block_pitch; /* Parse frame type ("frame header"), see frame_descs */ - int bd_idx = s->vbm_tree[get_vlc2(gb, frame_type_vlc.table, 6, 3)], - block_nsamples = MAX_FRAMESIZE / frame_descs[bd_idx].n_blocks; + int bd_idx = s->vbm_tree[get_vlc2(gb, frame_type_vlc.table, 6, 3)], block_nsamples; if (bd_idx < 0) { av_log(ctx, AV_LOG_ERROR, @@ -1443,6 +1447,8 @@ static int synth_frame(AVCodecContext *ctx, GetBitContext *gb, int frame_idx, return -1; } + block_nsamples = MAX_FRAMESIZE / frame_descs[bd_idx].n_blocks; + /* Pitch calculation for ACB_TYPE_ASYMMETRIC ("pitch-per-frame") */ if (frame_descs[bd_idx].acb_type == ACB_TYPE_ASYMMETRIC) { /* Pitch is provided per frame, which is interpreted as the pitch of @@ -1719,14 +1725,10 @@ static int check_bits_for_superframe(GetBitContext *orig_gb, * (if less than 480), usually used to prevent blanks at track boundaries. * * @param ctx WMA Voice decoder context - * @param samples pointer to output buffer for voice samples - * @param data_size pointer containing the size of #samples on input, and the - * amount of #samples filled on output * @return 0 on success, <0 on error or 1 if there was not enough data to * fully parse the superframe */ -static int synth_superframe(AVCodecContext *ctx, - float *samples, int *data_size) +static int synth_superframe(AVCodecContext *ctx, int *got_frame_ptr) { WMAVoiceContext *s = ctx->priv_data; GetBitContext *gb = &s->gb, s_gb; @@ -1736,6 +1738,7 @@ static int synth_superframe(AVCodecContext *ctx, wmavoice_mean_lsf16[s->lsp_def_mode] : wmavoice_mean_lsf10[s->lsp_def_mode]; float excitation[MAX_SIGNAL_HISTORY + MAX_SFRAMESIZE + 12]; float synth[MAX_LSPS + MAX_SFRAMESIZE]; + float *samples; memcpy(synth, s->synth_history, s->lsps * sizeof(*synth)); @@ -1748,7 +1751,10 @@ static int synth_superframe(AVCodecContext *ctx, s->sframe_cache_size = 0; } - if ((res = check_bits_for_superframe(gb, s)) == 1) return 1; + if ((res = check_bits_for_superframe(gb, s)) == 1) { + *got_frame_ptr = 0; + return 1; + } /* First bit is speech/music bit, it differentiates between WMAVoice * speech samples (the actual codec) and WMAVoice music samples, which @@ -1789,7 +1795,16 @@ static int synth_superframe(AVCodecContext *ctx, stabilize_lsps(lsps[n], s->lsps); } - /* Parse frames, optionally preceeded by per-frame (independent) LSPs. */ + /* get output buffer */ + s->frame.nb_samples = 480; + if ((res = ctx->get_buffer(ctx, &s->frame)) < 0) { + av_log(ctx, AV_LOG_ERROR, "get_buffer() failed\n"); + return res; + } + s->frame.nb_samples = n_samples; + samples = (float *)s->frame.data[0]; + + /* Parse frames, optionally preceded by per-frame (independent) LSPs. */ for (n = 0; n < 3; n++) { if (!s->has_residual_lsps) { int m; @@ -1808,8 +1823,10 @@ static int synth_superframe(AVCodecContext *ctx, &samples[n * MAX_FRAMESIZE], lsps[n], n == 0 ? s->prev_lsps : lsps[n - 1], &excitation[s->history_nsamples + n * MAX_FRAMESIZE], - &synth[s->lsps + n * MAX_FRAMESIZE]))) + &synth[s->lsps + n * MAX_FRAMESIZE]))) { + *got_frame_ptr = 0; return res; + } } /* Statistics? FIXME - we don't check for length, a slight overrun @@ -1820,8 +1837,7 @@ static int synth_superframe(AVCodecContext *ctx, skip_bits(gb, 10 * (res + 1)); } - /* Specify nr. of output samples */ - *data_size = n_samples * sizeof(float); + *got_frame_ptr = 1; /* Update history */ memcpy(s->prev_lsps, lsps[2], @@ -1872,7 +1888,7 @@ static int parse_packet_header(WMAVoiceContext *s) * @param size size of the source data, in bytes * @param gb bit I/O context specifying the current position in the source. * data. This function might use this to align the bit position to - * a whole-byte boundary before calling #ff_copy_bits() on aligned + * a whole-byte boundary before calling #avpriv_copy_bits() on aligned * source data * @param nbits the amount of bits to copy from source to target * @@ -1893,7 +1909,7 @@ static void copy_bits(PutBitContext *pb, rmn_bits &= 7; rmn_bytes >>= 3; if ((rmn_bits = FFMIN(rmn_bits, nbits)) > 0) put_bits(pb, rmn_bits, get_bits(gb, rmn_bits)); - ff_copy_bits(pb, data + size - rmn_bytes, + avpriv_copy_bits(pb, data + size - rmn_bytes, FFMIN(nbits - rmn_bits, rmn_bytes << 3)); } @@ -1909,28 +1925,22 @@ static void copy_bits(PutBitContext *pb, * For more information about frames, see #synth_superframe(). */ static int wmavoice_decode_packet(AVCodecContext *ctx, void *data, - int *data_size, AVPacket *avpkt) + int *got_frame_ptr, AVPacket *avpkt) { WMAVoiceContext *s = ctx->priv_data; GetBitContext *gb = &s->gb; int size, res, pos; - if (*data_size < 480 * sizeof(float)) { - av_log(ctx, AV_LOG_ERROR, - "Output buffer too small (%d given - %zu needed)\n", - *data_size, 480 * sizeof(float)); - return -1; - } - *data_size = 0; - /* Packets are sometimes a multiple of ctx->block_align, with a packet * header at each ctx->block_align bytes. However, Libav's ASF demuxer * feeds us ASF packets, which may concatenate multiple "codec" packets * in a single "muxer" packet, so we artificially emulate that by * capping the packet size at ctx->block_align. */ for (size = avpkt->size; size > ctx->block_align; size -= ctx->block_align); - if (!size) + if (!size) { + *got_frame_ptr = 0; return 0; + } init_get_bits(&s->gb, avpkt->data, size << 3); /* size == ctx->block_align is used to indicate whether we are dealing with @@ -1949,10 +1959,11 @@ static int wmavoice_decode_packet(AVCodecContext *ctx, void *data, copy_bits(&s->pb, avpkt->data, size, gb, s->spillover_nbits); flush_put_bits(&s->pb); s->sframe_cache_size += s->spillover_nbits; - if ((res = synth_superframe(ctx, data, data_size)) == 0 && - *data_size > 0) { + if ((res = synth_superframe(ctx, got_frame_ptr)) == 0 && + *got_frame_ptr) { cnt += s->spillover_nbits; s->skip_bits_next = cnt & 7; + *(AVFrame *)data = s->frame; return cnt >> 3; } else skip_bits_long (gb, s->spillover_nbits - cnt + @@ -1967,11 +1978,12 @@ static int wmavoice_decode_packet(AVCodecContext *ctx, void *data, s->sframe_cache_size = 0; s->skip_bits_next = 0; pos = get_bits_left(gb); - if ((res = synth_superframe(ctx, data, data_size)) < 0) { + if ((res = synth_superframe(ctx, got_frame_ptr)) < 0) { return res; - } else if (*data_size > 0) { + } else if (*got_frame_ptr) { int cnt = get_bits_count(gb); s->skip_bits_next = cnt & 7; + *(AVFrame *)data = s->frame; return cnt >> 3; } else if ((s->sframe_cache_size = pos) > 0) { /* rewind bit reader to start of last (incomplete) superframe... */ @@ -2039,7 +2051,7 @@ AVCodec ff_wmavoice_decoder = { .init = wmavoice_decode_init, .close = wmavoice_decode_end, .decode = wmavoice_decode_packet, - .capabilities = CODEC_CAP_SUBFRAMES, - .flush = wmavoice_flush, - .long_name = NULL_IF_CONFIG_SMALL("Windows Media Audio Voice"), + .capabilities = CODEC_CAP_SUBFRAMES | CODEC_CAP_DR1, + .flush = wmavoice_flush, + .long_name = NULL_IF_CONFIG_SMALL("Windows Media Audio Voice"), };