2 * Windows Media Audio Lossless decoder
3 * Copyright (c) 2007 Baptiste Coudurier, Benjamin Larsson, Ulion
4 * Copyright (c) 2008 - 2011 Sascha Sommer, Benjamin Larsson
5 * Copyright (c) 2011 Andreas Ă–man
6 * Copyright (c) 2011 - 2012 Mashiat Sarker Shakkhar
8 * This file is part of FFmpeg.
10 * FFmpeg is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
13 * version 2.1 of the License, or (at your option) any later version.
15 * FFmpeg is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with FFmpeg; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
27 #include "libavutil/attributes.h"
28 #include "libavutil/avassert.h"
34 #include "lossless_audiodsp.h"
36 #include "wma_common.h"
38 /** current decoder limitations */
39 #define WMALL_MAX_CHANNELS 8 ///< max number of handled channels
40 #define MAX_SUBFRAMES 32 ///< max number of subframes per channel
41 #define MAX_BANDS 29 ///< max number of scale factor bands
42 #define MAX_FRAMESIZE 32768 ///< maximum compressed frame size
45 #define WMALL_BLOCK_MIN_BITS 6 ///< log2 of min block size
46 #define WMALL_BLOCK_MAX_BITS 14 ///< log2 of max block size
47 #define WMALL_BLOCK_MAX_SIZE (1 << WMALL_BLOCK_MAX_BITS) ///< maximum block size
48 #define WMALL_BLOCK_SIZES (WMALL_BLOCK_MAX_BITS - WMALL_BLOCK_MIN_BITS + 1) ///< possible block sizes
50 #define WMALL_COEFF_PAD_SIZE 16 ///< pad coef buffers with 0 for use with SIMD
53 * @brief frame-specific decoder context for a single channel
55 typedef struct WmallChannelCtx {
56 int16_t prev_block_len; ///< length of the previous block
57 uint8_t transmit_coefs;
58 uint8_t num_subframes;
59 uint16_t subframe_len[MAX_SUBFRAMES]; ///< subframe length in samples
60 uint16_t subframe_offsets[MAX_SUBFRAMES]; ///< subframe positions in the current frame
61 uint8_t cur_subframe; ///< current subframe number
62 uint16_t decoded_samples; ///< number of already processed samples
63 int quant_step; ///< quantization step for the current subframe
64 int transient_counter; ///< number of transient samples from the beginning of the transient zone
68 * @brief main decoder context
70 typedef struct WmallDecodeCtx {
71 /* generic decoder variables */
72 AVCodecContext *avctx;
74 LLAudDSPContext dsp; ///< accelerated DSP functions
75 uint8_t *frame_data; ///< compressed frame data
76 int max_frame_size; ///< max bitstream size
77 PutBitContext pb; ///< context for filling the frame_data buffer
79 /* frame size dependent frame information (set during initialization) */
80 uint32_t decode_flags; ///< used compression features
81 int len_prefix; ///< frame is prefixed with its length
82 int dynamic_range_compression; ///< frame contains DRC data
83 uint8_t bits_per_sample; ///< integer audio sample size for the unscaled IMDCT output (used to scale to [-1.0, 1.0])
84 uint16_t samples_per_frame; ///< number of samples to output
85 uint16_t log2_frame_size;
86 int8_t num_channels; ///< number of channels in the stream (same as AVCodecContext.num_channels)
87 int8_t lfe_channel; ///< lfe channel index
88 uint8_t max_num_subframes;
89 uint8_t subframe_len_bits; ///< number of bits used for the subframe length
90 uint8_t max_subframe_len_bit; ///< flag indicating that the subframe is of maximum size when the first subframe length bit is 1
91 uint16_t min_samples_per_subframe;
93 /* packet decode state */
94 GetBitContext pgb; ///< bitstream reader context for the packet
95 int next_packet_start; ///< start offset of the next WMA packet in the demuxer packet
96 uint8_t packet_offset; ///< offset to the frame in the packet
97 uint8_t packet_sequence_number; ///< current packet number
98 int num_saved_bits; ///< saved number of bits
99 int frame_offset; ///< frame offset in the bit reservoir
100 int subframe_offset; ///< subframe offset in the bit reservoir
101 uint8_t packet_loss; ///< set in case of bitstream error
102 uint8_t packet_done; ///< set when a packet is fully decoded
104 /* frame decode state */
105 uint32_t frame_num; ///< current frame number (not used for decoding)
106 GetBitContext gb; ///< bitstream reader context
107 int buf_bit_size; ///< buffer size in bits
108 int16_t *samples_16[WMALL_MAX_CHANNELS]; ///< current samplebuffer pointer (16-bit)
109 int32_t *samples_32[WMALL_MAX_CHANNELS]; ///< current samplebuffer pointer (24-bit)
110 uint8_t drc_gain; ///< gain for the DRC tool
111 int8_t skip_frame; ///< skip output step
112 int8_t parsed_all_subframes; ///< all subframes decoded?
114 /* subframe/block decode state */
115 int16_t subframe_len; ///< current subframe length
116 int8_t channels_for_cur_subframe; ///< number of channels that contain the subframe
117 int8_t channel_indexes_for_cur_subframe[WMALL_MAX_CHANNELS];
119 WmallChannelCtx channel[WMALL_MAX_CHANNELS]; ///< per channel data
121 // WMA Lossless-specific
123 uint8_t do_arith_coding;
124 uint8_t do_ac_filter;
125 uint8_t do_inter_ch_decorr;
129 int8_t acfilter_order;
130 int8_t acfilter_scaling;
131 int16_t acfilter_coeffs[16];
132 int acfilter_prevvalues[WMALL_MAX_CHANNELS][16];
135 int8_t mclms_scaling;
136 int16_t mclms_coeffs[WMALL_MAX_CHANNELS * WMALL_MAX_CHANNELS * 32];
137 int16_t mclms_coeffs_cur[WMALL_MAX_CHANNELS * WMALL_MAX_CHANNELS];
138 int32_t mclms_prevvalues[WMALL_MAX_CHANNELS * 2 * 32];
139 int32_t mclms_updates[WMALL_MAX_CHANNELS * 2 * 32];
150 DECLARE_ALIGNED(16, int16_t, coefs)[MAX_ORDER + WMALL_COEFF_PAD_SIZE/sizeof(int16_t)];
151 DECLARE_ALIGNED(16, int32_t, lms_prevvalues)[MAX_ORDER * 2 + WMALL_COEFF_PAD_SIZE/sizeof(int16_t)];
152 DECLARE_ALIGNED(16, int16_t, lms_updates)[MAX_ORDER * 2 + WMALL_COEFF_PAD_SIZE/sizeof(int16_t)];
154 } cdlms[WMALL_MAX_CHANNELS][9];
156 int cdlms_ttl[WMALL_MAX_CHANNELS];
160 int is_channel_coded[WMALL_MAX_CHANNELS];
161 int update_speed[WMALL_MAX_CHANNELS];
163 int transient[WMALL_MAX_CHANNELS];
164 int transient_pos[WMALL_MAX_CHANNELS];
167 int ave_sum[WMALL_MAX_CHANNELS];
169 int channel_residues[WMALL_MAX_CHANNELS][WMALL_BLOCK_MAX_SIZE];
171 int lpc_coefs[WMALL_MAX_CHANNELS][40];
177 /** Get sign of integer (1 for positive, -1 for negative and 0 for zero) */
178 #define WMASIGN(x) (((x) > 0) - ((x) < 0))
180 static av_cold int decode_init(AVCodecContext *avctx)
182 WmallDecodeCtx *s = avctx->priv_data;
183 uint8_t *edata_ptr = avctx->extradata;
184 unsigned int channel_mask;
185 int i, log2_max_num_subframes;
187 if (!avctx->block_align) {
188 av_log(avctx, AV_LOG_ERROR, "block_align is not set\n");
189 return AVERROR(EINVAL);
192 s->max_frame_size = MAX_FRAMESIZE * avctx->channels;
193 s->frame_data = av_mallocz(s->max_frame_size + AV_INPUT_BUFFER_PADDING_SIZE);
195 return AVERROR(ENOMEM);
198 ff_llauddsp_init(&s->dsp);
199 init_put_bits(&s->pb, s->frame_data, s->max_frame_size);
201 if (avctx->extradata_size >= 18) {
202 s->decode_flags = AV_RL16(edata_ptr + 14);
203 channel_mask = AV_RL32(edata_ptr + 2);
204 s->bits_per_sample = AV_RL16(edata_ptr);
205 if (s->bits_per_sample == 16)
206 avctx->sample_fmt = AV_SAMPLE_FMT_S16P;
207 else if (s->bits_per_sample == 24) {
208 av_log(avctx, AV_LOG_WARNING, "Decoding audio at 24 bit-depth\n");
209 avctx->sample_fmt = AV_SAMPLE_FMT_S32P;
210 avctx->bits_per_raw_sample = 24;
212 av_log(avctx, AV_LOG_ERROR, "Unknown bit-depth: %"PRIu8"\n",
214 return AVERROR_INVALIDDATA;
216 /* dump the extradata */
217 for (i = 0; i < avctx->extradata_size; i++)
218 ff_dlog(avctx, "[%x] ", avctx->extradata[i]);
219 ff_dlog(avctx, "\n");
222 avpriv_request_sample(avctx, "Unsupported extradata size");
223 return AVERROR_PATCHWELCOME;
227 s->log2_frame_size = av_log2(avctx->block_align) + 4;
230 s->skip_frame = 1; /* skip first frame */
232 s->len_prefix = s->decode_flags & 0x40;
235 s->samples_per_frame = 1 << ff_wma_get_frame_len_bits(avctx->sample_rate,
237 av_assert0(s->samples_per_frame <= WMALL_BLOCK_MAX_SIZE);
239 /* init previous block len */
240 for (i = 0; i < avctx->channels; i++)
241 s->channel[i].prev_block_len = s->samples_per_frame;
244 log2_max_num_subframes = (s->decode_flags & 0x38) >> 3;
245 s->max_num_subframes = 1 << log2_max_num_subframes;
246 s->max_subframe_len_bit = 0;
247 s->subframe_len_bits = av_log2(log2_max_num_subframes) + 1;
249 s->min_samples_per_subframe = s->samples_per_frame / s->max_num_subframes;
250 s->dynamic_range_compression = s->decode_flags & 0x80;
251 s->bV3RTM = s->decode_flags & 0x100;
253 if (s->max_num_subframes > MAX_SUBFRAMES) {
254 av_log(avctx, AV_LOG_ERROR, "invalid number of subframes %"PRIu8"\n",
255 s->max_num_subframes);
256 return AVERROR_INVALIDDATA;
259 s->num_channels = avctx->channels;
261 /* extract lfe channel position */
264 if (channel_mask & 8) {
266 for (mask = 1; mask < 16; mask <<= 1)
267 if (channel_mask & mask)
271 if (s->num_channels < 0) {
272 av_log(avctx, AV_LOG_ERROR, "invalid number of channels %"PRId8"\n",
274 return AVERROR_INVALIDDATA;
275 } else if (s->num_channels > WMALL_MAX_CHANNELS) {
276 avpriv_request_sample(avctx,
277 "More than %d channels", WMALL_MAX_CHANNELS);
278 return AVERROR_PATCHWELCOME;
281 s->frame = av_frame_alloc();
283 return AVERROR(ENOMEM);
285 avctx->channel_layout = channel_mask;
290 * @brief Decode the subframe length.
292 * @param offset sample offset in the frame
293 * @return decoded subframe length on success, < 0 in case of an error
295 static int decode_subframe_length(WmallDecodeCtx *s, int offset)
297 int frame_len_ratio, subframe_len, len;
299 /* no need to read from the bitstream when only one length is possible */
300 if (offset == s->samples_per_frame - s->min_samples_per_subframe)
301 return s->min_samples_per_subframe;
303 len = av_log2(s->max_num_subframes - 1) + 1;
304 frame_len_ratio = get_bits(&s->gb, len);
305 subframe_len = s->min_samples_per_subframe * (frame_len_ratio + 1);
307 /* sanity check the length */
308 if (subframe_len < s->min_samples_per_subframe ||
309 subframe_len > s->samples_per_frame) {
310 av_log(s->avctx, AV_LOG_ERROR, "broken frame: subframe_len %i\n",
312 return AVERROR_INVALIDDATA;
318 * @brief Decode how the data in the frame is split into subframes.
319 * Every WMA frame contains the encoded data for a fixed number of
320 * samples per channel. The data for every channel might be split
321 * into several subframes. This function will reconstruct the list of
322 * subframes for every channel.
324 * If the subframes are not evenly split, the algorithm estimates the
325 * channels with the lowest number of total samples.
326 * Afterwards, for each of these channels a bit is read from the
327 * bitstream that indicates if the channel contains a subframe with the
328 * next subframe size that is going to be read from the bitstream or not.
329 * If a channel contains such a subframe, the subframe size gets added to
330 * the channel's subframe list.
331 * The algorithm repeats these steps until the frame is properly divided
332 * between the individual channels.
335 * @return 0 on success, < 0 in case of an error
337 static int decode_tilehdr(WmallDecodeCtx *s)
339 uint16_t num_samples[WMALL_MAX_CHANNELS] = { 0 }; /* sum of samples for all currently known subframes of a channel */
340 uint8_t contains_subframe[WMALL_MAX_CHANNELS]; /* flag indicating if a channel contains the current subframe */
341 int channels_for_cur_subframe = s->num_channels; /* number of channels that contain the current subframe */
342 int fixed_channel_layout = 0; /* flag indicating that all channels use the same subfra2me offsets and sizes */
343 int min_channel_len = 0; /* smallest sum of samples (channels with this length will be processed first) */
346 /* reset tiling information */
347 for (c = 0; c < s->num_channels; c++)
348 s->channel[c].num_subframes = 0;
350 tile_aligned = get_bits1(&s->gb);
351 if (s->max_num_subframes == 1 || tile_aligned)
352 fixed_channel_layout = 1;
354 /* loop until the frame data is split between the subframes */
356 int subframe_len, in_use = 0;
358 /* check which channels contain the subframe */
359 for (c = 0; c < s->num_channels; c++) {
360 if (num_samples[c] == min_channel_len) {
361 if (fixed_channel_layout || channels_for_cur_subframe == 1 ||
362 (min_channel_len == s->samples_per_frame - s->min_samples_per_subframe)) {
363 contains_subframe[c] = 1;
365 contains_subframe[c] = get_bits1(&s->gb);
367 in_use |= contains_subframe[c];
369 contains_subframe[c] = 0;
373 av_log(s->avctx, AV_LOG_ERROR,
374 "Found empty subframe\n");
375 return AVERROR_INVALIDDATA;
378 /* get subframe length, subframe_len == 0 is not allowed */
379 if ((subframe_len = decode_subframe_length(s, min_channel_len)) <= 0)
380 return AVERROR_INVALIDDATA;
381 /* add subframes to the individual channels and find new min_channel_len */
382 min_channel_len += subframe_len;
383 for (c = 0; c < s->num_channels; c++) {
384 WmallChannelCtx *chan = &s->channel[c];
386 if (contains_subframe[c]) {
387 if (chan->num_subframes >= MAX_SUBFRAMES) {
388 av_log(s->avctx, AV_LOG_ERROR,
389 "broken frame: num subframes > 31\n");
390 return AVERROR_INVALIDDATA;
392 chan->subframe_len[chan->num_subframes] = subframe_len;
393 num_samples[c] += subframe_len;
394 ++chan->num_subframes;
395 if (num_samples[c] > s->samples_per_frame) {
396 av_log(s->avctx, AV_LOG_ERROR, "broken frame: "
397 "channel len(%"PRIu16") > samples_per_frame(%"PRIu16")\n",
398 num_samples[c], s->samples_per_frame);
399 return AVERROR_INVALIDDATA;
401 } else if (num_samples[c] <= min_channel_len) {
402 if (num_samples[c] < min_channel_len) {
403 channels_for_cur_subframe = 0;
404 min_channel_len = num_samples[c];
406 ++channels_for_cur_subframe;
409 } while (min_channel_len < s->samples_per_frame);
411 for (c = 0; c < s->num_channels; c++) {
413 for (i = 0; i < s->channel[c].num_subframes; i++) {
414 s->channel[c].subframe_offsets[i] = offset;
415 offset += s->channel[c].subframe_len[i];
422 static void decode_ac_filter(WmallDecodeCtx *s)
425 s->acfilter_order = get_bits(&s->gb, 4) + 1;
426 s->acfilter_scaling = get_bits(&s->gb, 4);
428 for (i = 0; i < s->acfilter_order; i++)
429 s->acfilter_coeffs[i] = get_bitsz(&s->gb, s->acfilter_scaling) + 1;
432 static void decode_mclms(WmallDecodeCtx *s)
434 s->mclms_order = (get_bits(&s->gb, 4) + 1) * 2;
435 s->mclms_scaling = get_bits(&s->gb, 4);
436 if (get_bits1(&s->gb)) {
437 int i, send_coef_bits;
438 int cbits = av_log2(s->mclms_scaling + 1);
439 if (1 << cbits < s->mclms_scaling + 1)
442 send_coef_bits = get_bitsz(&s->gb, cbits) + 2;
444 for (i = 0; i < s->mclms_order * s->num_channels * s->num_channels; i++)
445 s->mclms_coeffs[i] = get_bits(&s->gb, send_coef_bits);
447 for (i = 0; i < s->num_channels; i++) {
449 for (c = 0; c < i; c++)
450 s->mclms_coeffs_cur[i * s->num_channels + c] = get_bits(&s->gb, send_coef_bits);
455 static int decode_cdlms(WmallDecodeCtx *s)
458 int cdlms_send_coef = get_bits1(&s->gb);
460 for (c = 0; c < s->num_channels; c++) {
461 s->cdlms_ttl[c] = get_bits(&s->gb, 3) + 1;
462 for (i = 0; i < s->cdlms_ttl[c]; i++) {
463 s->cdlms[c][i].order = (get_bits(&s->gb, 7) + 1) * 8;
464 if (s->cdlms[c][i].order > MAX_ORDER) {
465 av_log(s->avctx, AV_LOG_ERROR,
466 "Order[%d][%d] %d > max (%d), not supported\n",
467 c, i, s->cdlms[c][i].order, MAX_ORDER);
468 s->cdlms[0][0].order = 0;
469 return AVERROR_INVALIDDATA;
471 if(s->cdlms[c][i].order & 8 && s->bits_per_sample == 16) {
474 avpriv_request_sample(s->avctx, "CDLMS of order %d",
475 s->cdlms[c][i].order);
480 for (i = 0; i < s->cdlms_ttl[c]; i++)
481 s->cdlms[c][i].scaling = get_bits(&s->gb, 4);
483 if (cdlms_send_coef) {
484 for (i = 0; i < s->cdlms_ttl[c]; i++) {
485 int cbits, shift_l, shift_r, j;
486 cbits = av_log2(s->cdlms[c][i].order);
487 if ((1 << cbits) < s->cdlms[c][i].order)
489 s->cdlms[c][i].coefsend = get_bits(&s->gb, cbits) + 1;
491 cbits = av_log2(s->cdlms[c][i].scaling + 1);
492 if ((1 << cbits) < s->cdlms[c][i].scaling + 1)
495 s->cdlms[c][i].bitsend = get_bitsz(&s->gb, cbits) + 2;
496 shift_l = 32 - s->cdlms[c][i].bitsend;
497 shift_r = 32 - s->cdlms[c][i].scaling - 2;
498 for (j = 0; j < s->cdlms[c][i].coefsend; j++)
499 s->cdlms[c][i].coefs[j] =
500 (get_bits(&s->gb, s->cdlms[c][i].bitsend) << shift_l) >> shift_r;
504 for (i = 0; i < s->cdlms_ttl[c]; i++)
505 memset(s->cdlms[c][i].coefs + s->cdlms[c][i].order,
506 0, WMALL_COEFF_PAD_SIZE);
512 static int decode_channel_residues(WmallDecodeCtx *s, int ch, int tile_size)
515 unsigned int ave_mean;
516 s->transient[ch] = get_bits1(&s->gb);
517 if (s->transient[ch]) {
518 s->transient_pos[ch] = get_bits(&s->gb, av_log2(tile_size));
519 if (s->transient_pos[ch])
520 s->transient[ch] = 0;
521 s->channel[ch].transient_counter =
522 FFMAX(s->channel[ch].transient_counter, s->samples_per_frame / 2);
523 } else if (s->channel[ch].transient_counter)
524 s->transient[ch] = 1;
526 if (s->seekable_tile) {
527 ave_mean = get_bits(&s->gb, s->bits_per_sample);
528 s->ave_sum[ch] = ave_mean << (s->movave_scaling + 1);
531 if (s->seekable_tile) {
532 if (s->do_inter_ch_decorr)
533 s->channel_residues[ch][0] = get_sbits_long(&s->gb, s->bits_per_sample + 1);
535 s->channel_residues[ch][0] = get_sbits_long(&s->gb, s->bits_per_sample);
538 for (; i < tile_size; i++) {
539 int quo = 0, rem, rem_bits, residue;
540 while(get_bits1(&s->gb)) {
542 if (get_bits_left(&s->gb) <= 0)
546 quo += get_bits_long(&s->gb, get_bits(&s->gb, 5) + 1);
548 ave_mean = (s->ave_sum[ch] + (1 << s->movave_scaling)) >> (s->movave_scaling + 1);
552 rem_bits = av_ceil_log2(ave_mean);
553 rem = get_bits_long(&s->gb, rem_bits);
554 residue = (quo << rem_bits) + rem;
557 s->ave_sum[ch] = residue + s->ave_sum[ch] -
558 (s->ave_sum[ch] >> s->movave_scaling);
560 residue = (residue >> 1) ^ -(residue & 1);
561 s->channel_residues[ch][i] = residue;
568 static void decode_lpc(WmallDecodeCtx *s)
571 s->lpc_order = get_bits(&s->gb, 5) + 1;
572 s->lpc_scaling = get_bits(&s->gb, 4);
573 s->lpc_intbits = get_bits(&s->gb, 3) + 1;
574 cbits = s->lpc_scaling + s->lpc_intbits;
575 for (ch = 0; ch < s->num_channels; ch++)
576 for (i = 0; i < s->lpc_order; i++)
577 s->lpc_coefs[ch][i] = get_sbits(&s->gb, cbits);
580 static void clear_codec_buffers(WmallDecodeCtx *s)
584 memset(s->acfilter_coeffs, 0, sizeof(s->acfilter_coeffs));
585 memset(s->acfilter_prevvalues, 0, sizeof(s->acfilter_prevvalues));
586 memset(s->lpc_coefs, 0, sizeof(s->lpc_coefs));
588 memset(s->mclms_coeffs, 0, sizeof(s->mclms_coeffs));
589 memset(s->mclms_coeffs_cur, 0, sizeof(s->mclms_coeffs_cur));
590 memset(s->mclms_prevvalues, 0, sizeof(s->mclms_prevvalues));
591 memset(s->mclms_updates, 0, sizeof(s->mclms_updates));
593 for (ich = 0; ich < s->num_channels; ich++) {
594 for (ilms = 0; ilms < s->cdlms_ttl[ich]; ilms++) {
595 memset(s->cdlms[ich][ilms].coefs, 0,
596 sizeof(s->cdlms[ich][ilms].coefs));
597 memset(s->cdlms[ich][ilms].lms_prevvalues, 0,
598 sizeof(s->cdlms[ich][ilms].lms_prevvalues));
599 memset(s->cdlms[ich][ilms].lms_updates, 0,
600 sizeof(s->cdlms[ich][ilms].lms_updates));
607 * @brief Reset filter parameters and transient area at new seekable tile.
609 static void reset_codec(WmallDecodeCtx *s)
612 s->mclms_recent = s->mclms_order * s->num_channels;
613 for (ich = 0; ich < s->num_channels; ich++) {
614 for (ilms = 0; ilms < s->cdlms_ttl[ich]; ilms++)
615 s->cdlms[ich][ilms].recent = s->cdlms[ich][ilms].order;
616 /* first sample of a seekable subframe is considered as the starting of
617 a transient area which is samples_per_frame samples long */
618 s->channel[ich].transient_counter = s->samples_per_frame;
619 s->transient[ich] = 1;
620 s->transient_pos[ich] = 0;
624 static void mclms_update(WmallDecodeCtx *s, int icoef, int *pred)
626 int i, j, ich, pred_error;
627 int order = s->mclms_order;
628 int num_channels = s->num_channels;
629 int range = 1 << (s->bits_per_sample - 1);
631 for (ich = 0; ich < num_channels; ich++) {
632 pred_error = s->channel_residues[ich][icoef] - pred[ich];
633 if (pred_error > 0) {
634 for (i = 0; i < order * num_channels; i++)
635 s->mclms_coeffs[i + ich * order * num_channels] +=
636 s->mclms_updates[s->mclms_recent + i];
637 for (j = 0; j < ich; j++)
638 s->mclms_coeffs_cur[ich * num_channels + j] += WMASIGN(s->channel_residues[j][icoef]);
639 } else if (pred_error < 0) {
640 for (i = 0; i < order * num_channels; i++)
641 s->mclms_coeffs[i + ich * order * num_channels] -=
642 s->mclms_updates[s->mclms_recent + i];
643 for (j = 0; j < ich; j++)
644 s->mclms_coeffs_cur[ich * num_channels + j] -= WMASIGN(s->channel_residues[j][icoef]);
648 for (ich = num_channels - 1; ich >= 0; ich--) {
650 s->mclms_prevvalues[s->mclms_recent] = av_clip(s->channel_residues[ich][icoef],
652 s->mclms_updates[s->mclms_recent] = WMASIGN(s->channel_residues[ich][icoef]);
655 if (s->mclms_recent == 0) {
656 memcpy(&s->mclms_prevvalues[order * num_channels],
658 sizeof(int32_t) * order * num_channels);
659 memcpy(&s->mclms_updates[order * num_channels],
661 sizeof(int32_t) * order * num_channels);
662 s->mclms_recent = num_channels * order;
666 static void mclms_predict(WmallDecodeCtx *s, int icoef, int *pred)
669 int order = s->mclms_order;
670 int num_channels = s->num_channels;
672 for (ich = 0; ich < num_channels; ich++) {
674 if (!s->is_channel_coded[ich])
676 for (i = 0; i < order * num_channels; i++)
677 pred[ich] += (uint32_t)s->mclms_prevvalues[i + s->mclms_recent] *
678 s->mclms_coeffs[i + order * num_channels * ich];
679 for (i = 0; i < ich; i++)
680 pred[ich] += (uint32_t)s->channel_residues[i][icoef] *
681 s->mclms_coeffs_cur[i + num_channels * ich];
682 pred[ich] += 1 << s->mclms_scaling - 1;
683 pred[ich] >>= s->mclms_scaling;
684 s->channel_residues[ich][icoef] += pred[ich];
688 static void revert_mclms(WmallDecodeCtx *s, int tile_size)
690 int icoef, pred[WMALL_MAX_CHANNELS] = { 0 };
691 for (icoef = 0; icoef < tile_size; icoef++) {
692 mclms_predict(s, icoef, pred);
693 mclms_update(s, icoef, pred);
697 static void use_high_update_speed(WmallDecodeCtx *s, int ich)
699 int ilms, recent, icoef;
700 for (ilms = s->cdlms_ttl[ich] - 1; ilms >= 0; ilms--) {
701 recent = s->cdlms[ich][ilms].recent;
702 if (s->update_speed[ich] == 16)
705 for (icoef = 0; icoef < s->cdlms[ich][ilms].order; icoef++)
706 s->cdlms[ich][ilms].lms_updates[icoef + recent] *= 2;
708 for (icoef = 0; icoef < s->cdlms[ich][ilms].order; icoef++)
709 s->cdlms[ich][ilms].lms_updates[icoef] *= 2;
712 s->update_speed[ich] = 16;
715 static void use_normal_update_speed(WmallDecodeCtx *s, int ich)
717 int ilms, recent, icoef;
718 for (ilms = s->cdlms_ttl[ich] - 1; ilms >= 0; ilms--) {
719 recent = s->cdlms[ich][ilms].recent;
720 if (s->update_speed[ich] == 8)
723 for (icoef = 0; icoef < s->cdlms[ich][ilms].order; icoef++)
724 s->cdlms[ich][ilms].lms_updates[icoef + recent] /= 2;
726 for (icoef = 0; icoef < s->cdlms[ich][ilms].order; icoef++)
727 s->cdlms[ich][ilms].lms_updates[icoef] /= 2;
729 s->update_speed[ich] = 8;
732 #define CD_LMS(bits, ROUND) \
733 static void lms_update ## bits (WmallDecodeCtx *s, int ich, int ilms, int input) \
735 int recent = s->cdlms[ich][ilms].recent; \
736 int range = 1 << s->bits_per_sample - 1; \
737 int order = s->cdlms[ich][ilms].order; \
738 int ##bits##_t *prev = (int##bits##_t *)s->cdlms[ich][ilms].lms_prevvalues; \
743 memcpy(prev + order, prev, (bits/8) * order); \
744 memcpy(s->cdlms[ich][ilms].lms_updates + order, \
745 s->cdlms[ich][ilms].lms_updates, \
746 sizeof(*s->cdlms[ich][ilms].lms_updates) * order); \
747 recent = order - 1; \
750 prev[recent] = av_clip(input, -range, range - 1); \
751 s->cdlms[ich][ilms].lms_updates[recent] = WMASIGN(input) * s->update_speed[ich]; \
753 s->cdlms[ich][ilms].lms_updates[recent + (order >> 4)] >>= 2; \
754 s->cdlms[ich][ilms].lms_updates[recent + (order >> 3)] >>= 1; \
755 s->cdlms[ich][ilms].recent = recent; \
756 memset(s->cdlms[ich][ilms].lms_updates + recent + order, 0, \
757 sizeof(s->cdlms[ich][ilms].lms_updates) - \
758 sizeof(*s->cdlms[ich][ilms].lms_updates)*(recent+order)); \
761 static void revert_cdlms ## bits (WmallDecodeCtx *s, int ch, \
762 int coef_begin, int coef_end) \
764 int icoef, pred, ilms, num_lms, residue, input; \
766 num_lms = s->cdlms_ttl[ch]; \
767 for (ilms = num_lms - 1; ilms >= 0; ilms--) { \
768 for (icoef = coef_begin; icoef < coef_end; icoef++) { \
769 int##bits##_t *prevvalues = (int##bits##_t *)s->cdlms[ch][ilms].lms_prevvalues; \
770 pred = 1 << (s->cdlms[ch][ilms].scaling - 1); \
771 residue = s->channel_residues[ch][icoef]; \
772 pred += s->dsp.scalarproduct_and_madd_int## bits (s->cdlms[ch][ilms].coefs, \
773 prevvalues + s->cdlms[ch][ilms].recent, \
774 s->cdlms[ch][ilms].lms_updates + \
775 s->cdlms[ch][ilms].recent, \
776 FFALIGN(s->cdlms[ch][ilms].order, ROUND), \
778 input = residue + (pred >> s->cdlms[ch][ilms].scaling); \
779 lms_update ## bits(s, ch, ilms, input); \
780 s->channel_residues[ch][icoef] = input; \
783 if (bits <= 16) emms_c(); \
786 CD_LMS(16, WMALL_COEFF_PAD_SIZE)
789 static void revert_inter_ch_decorr(WmallDecodeCtx *s, int tile_size)
791 if (s->num_channels != 2)
793 else if (s->is_channel_coded[0] || s->is_channel_coded[1]) {
795 for (icoef = 0; icoef < tile_size; icoef++) {
796 s->channel_residues[0][icoef] -= s->channel_residues[1][icoef] >> 1;
797 s->channel_residues[1][icoef] += s->channel_residues[0][icoef];
802 static void revert_acfilter(WmallDecodeCtx *s, int tile_size)
805 int16_t *filter_coeffs = s->acfilter_coeffs;
806 int scaling = s->acfilter_scaling;
807 int order = s->acfilter_order;
809 for (ich = 0; ich < s->num_channels; ich++) {
810 int *prevvalues = s->acfilter_prevvalues[ich];
811 for (i = 0; i < order; i++) {
813 for (j = 0; j < order; j++) {
815 pred += filter_coeffs[j] * prevvalues[j - i];
817 pred += s->channel_residues[ich][i - j - 1] * filter_coeffs[j];
820 s->channel_residues[ich][i] += pred;
822 for (i = order; i < tile_size; i++) {
824 for (j = 0; j < order; j++)
825 pred += (uint32_t)s->channel_residues[ich][i - j - 1] * filter_coeffs[j];
827 s->channel_residues[ich][i] += pred;
829 for (j = 0; j < order; j++)
830 prevvalues[j] = s->channel_residues[ich][tile_size - j - 1];
834 static int decode_subframe(WmallDecodeCtx *s)
836 int offset = s->samples_per_frame;
837 int subframe_len = s->samples_per_frame;
838 int total_samples = s->samples_per_frame * s->num_channels;
839 int i, j, rawpcm_tile, padding_zeroes, res;
841 s->subframe_offset = get_bits_count(&s->gb);
843 /* reset channel context and find the next block offset and size
844 == the next block of the channel with the smallest number of
846 for (i = 0; i < s->num_channels; i++) {
847 if (offset > s->channel[i].decoded_samples) {
848 offset = s->channel[i].decoded_samples;
850 s->channel[i].subframe_len[s->channel[i].cur_subframe];
854 /* get a list of all channels that contain the estimated block */
855 s->channels_for_cur_subframe = 0;
856 for (i = 0; i < s->num_channels; i++) {
857 const int cur_subframe = s->channel[i].cur_subframe;
858 /* subtract already processed samples */
859 total_samples -= s->channel[i].decoded_samples;
861 /* and count if there are multiple subframes that match our profile */
862 if (offset == s->channel[i].decoded_samples &&
863 subframe_len == s->channel[i].subframe_len[cur_subframe]) {
864 total_samples -= s->channel[i].subframe_len[cur_subframe];
865 s->channel[i].decoded_samples +=
866 s->channel[i].subframe_len[cur_subframe];
867 s->channel_indexes_for_cur_subframe[s->channels_for_cur_subframe] = i;
868 ++s->channels_for_cur_subframe;
872 /* check if the frame will be complete after processing the
875 s->parsed_all_subframes = 1;
878 s->seekable_tile = get_bits1(&s->gb);
879 if (s->seekable_tile) {
880 clear_codec_buffers(s);
882 s->do_arith_coding = get_bits1(&s->gb);
883 if (s->do_arith_coding) {
884 avpriv_request_sample(s->avctx, "Arithmetic coding");
885 return AVERROR_PATCHWELCOME;
887 s->do_ac_filter = get_bits1(&s->gb);
888 s->do_inter_ch_decorr = get_bits1(&s->gb);
889 s->do_mclms = get_bits1(&s->gb);
897 if ((res = decode_cdlms(s)) < 0)
899 s->movave_scaling = get_bits(&s->gb, 3);
900 s->quant_stepsize = get_bits(&s->gb, 8) + 1;
905 rawpcm_tile = get_bits1(&s->gb);
907 if (!rawpcm_tile && !s->cdlms[0][0].order) {
908 av_log(s->avctx, AV_LOG_DEBUG,
909 "Waiting for seekable tile\n");
910 av_frame_unref(s->frame);
915 for (i = 0; i < s->num_channels; i++)
916 s->is_channel_coded[i] = 1;
919 for (i = 0; i < s->num_channels; i++)
920 s->is_channel_coded[i] = get_bits1(&s->gb);
924 s->do_lpc = get_bits1(&s->gb);
927 avpriv_request_sample(s->avctx, "Expect wrong output since "
928 "inverse LPC filter");
935 if (get_bits1(&s->gb))
936 padding_zeroes = get_bits(&s->gb, 5);
941 int bits = s->bits_per_sample - padding_zeroes;
943 av_log(s->avctx, AV_LOG_ERROR,
944 "Invalid number of padding bits in raw PCM tile\n");
945 return AVERROR_INVALIDDATA;
947 ff_dlog(s->avctx, "RAWPCM %d bits per sample. "
948 "total %d bits, remain=%d\n", bits,
949 bits * s->num_channels * subframe_len, get_bits_count(&s->gb));
950 for (i = 0; i < s->num_channels; i++)
951 for (j = 0; j < subframe_len; j++)
952 s->channel_residues[i][j] = get_sbits_long(&s->gb, bits);
954 for (i = 0; i < s->num_channels; i++) {
955 if (s->is_channel_coded[i]) {
956 decode_channel_residues(s, i, subframe_len);
957 if (s->seekable_tile)
958 use_high_update_speed(s, i);
960 use_normal_update_speed(s, i);
961 if (s->bits_per_sample > 16)
962 revert_cdlms32(s, i, 0, subframe_len);
964 revert_cdlms16(s, i, 0, subframe_len);
966 memset(s->channel_residues[i], 0, sizeof(**s->channel_residues) * subframe_len);
971 revert_mclms(s, subframe_len);
972 if (s->do_inter_ch_decorr)
973 revert_inter_ch_decorr(s, subframe_len);
975 revert_acfilter(s, subframe_len);
978 if (s->quant_stepsize != 1)
979 for (i = 0; i < s->num_channels; i++)
980 for (j = 0; j < subframe_len; j++)
981 s->channel_residues[i][j] *= s->quant_stepsize;
984 /* Write to proper output buffer depending on bit-depth */
985 for (i = 0; i < s->channels_for_cur_subframe; i++) {
986 int c = s->channel_indexes_for_cur_subframe[i];
987 int subframe_len = s->channel[c].subframe_len[s->channel[c].cur_subframe];
989 for (j = 0; j < subframe_len; j++) {
990 if (s->bits_per_sample == 16) {
991 *s->samples_16[c]++ = (int16_t) s->channel_residues[c][j] << padding_zeroes;
993 *s->samples_32[c]++ = s->channel_residues[c][j] << (padding_zeroes + 8);
998 /* handled one subframe */
999 for (i = 0; i < s->channels_for_cur_subframe; i++) {
1000 int c = s->channel_indexes_for_cur_subframe[i];
1001 if (s->channel[c].cur_subframe >= s->channel[c].num_subframes) {
1002 av_log(s->avctx, AV_LOG_ERROR, "broken subframe\n");
1003 return AVERROR_INVALIDDATA;
1005 ++s->channel[c].cur_subframe;
1011 * @brief Decode one WMA frame.
1012 * @param s codec context
1013 * @return 0 if the trailer bit indicates that this is the last frame,
1014 * 1 if there are additional frames
1016 static int decode_frame(WmallDecodeCtx *s)
1018 GetBitContext* gb = &s->gb;
1019 int more_frames = 0, len = 0, i, ret;
1021 s->frame->nb_samples = s->samples_per_frame;
1022 if ((ret = ff_get_buffer(s->avctx, s->frame, 0)) < 0) {
1023 /* return an error if no frame could be decoded at all */
1025 s->frame->nb_samples = 0;
1028 for (i = 0; i < s->num_channels; i++) {
1029 s->samples_16[i] = (int16_t *)s->frame->extended_data[i];
1030 s->samples_32[i] = (int32_t *)s->frame->extended_data[i];
1033 /* get frame length */
1035 len = get_bits(gb, s->log2_frame_size);
1037 /* decode tile information */
1038 if ((ret = decode_tilehdr(s))) {
1040 av_frame_unref(s->frame);
1045 if (s->dynamic_range_compression)
1046 s->drc_gain = get_bits(gb, 8);
1048 /* no idea what these are for, might be the number of samples
1049 that need to be skipped at the beginning or end of a stream */
1050 if (get_bits1(gb)) {
1053 /* usually true for the first frame */
1054 if (get_bits1(gb)) {
1055 skip = get_bits(gb, av_log2(s->samples_per_frame * 2));
1056 ff_dlog(s->avctx, "start skip: %i\n", skip);
1059 /* sometimes true for the last frame */
1060 if (get_bits1(gb)) {
1061 skip = get_bits(gb, av_log2(s->samples_per_frame * 2));
1062 ff_dlog(s->avctx, "end skip: %i\n", skip);
1063 s->frame->nb_samples -= skip;
1064 if (s->frame->nb_samples <= 0)
1065 return AVERROR_INVALIDDATA;
1070 /* reset subframe states */
1071 s->parsed_all_subframes = 0;
1072 for (i = 0; i < s->num_channels; i++) {
1073 s->channel[i].decoded_samples = 0;
1074 s->channel[i].cur_subframe = 0;
1077 /* decode all subframes */
1078 while (!s->parsed_all_subframes) {
1079 int decoded_samples = s->channel[0].decoded_samples;
1080 if (decode_subframe(s) < 0) {
1082 if (s->frame->nb_samples)
1083 s->frame->nb_samples = decoded_samples;
1088 ff_dlog(s->avctx, "Frame done\n");
1092 if (s->len_prefix) {
1093 if (len != (get_bits_count(gb) - s->frame_offset) + 2) {
1094 /* FIXME: not sure if this is always an error */
1095 av_log(s->avctx, AV_LOG_ERROR,
1096 "frame[%"PRIu32"] would have to skip %i bits\n",
1098 len - (get_bits_count(gb) - s->frame_offset) - 1);
1103 /* skip the rest of the frame data */
1104 skip_bits_long(gb, len - (get_bits_count(gb) - s->frame_offset) - 1);
1107 /* decode trailer bit */
1108 more_frames = get_bits1(gb);
1114 * @brief Calculate remaining input buffer length.
1115 * @param s codec context
1116 * @param gb bitstream reader context
1117 * @return remaining size in bits
1119 static int remaining_bits(WmallDecodeCtx *s, GetBitContext *gb)
1121 return s->buf_bit_size - get_bits_count(gb);
1125 * @brief Fill the bit reservoir with a (partial) frame.
1126 * @param s codec context
1127 * @param gb bitstream reader context
1128 * @param len length of the partial frame
1129 * @param append decides whether to reset the buffer or not
1131 static void save_bits(WmallDecodeCtx *s, GetBitContext* gb, int len,
1137 /* when the frame data does not need to be concatenated, the input buffer
1138 is reset and additional bits from the previous frame are copied
1139 and skipped later so that a fast byte copy is possible */
1142 s->frame_offset = get_bits_count(gb) & 7;
1143 s->num_saved_bits = s->frame_offset;
1144 init_put_bits(&s->pb, s->frame_data, s->max_frame_size);
1147 buflen = (s->num_saved_bits + len + 8) >> 3;
1149 if (len <= 0 || buflen > s->max_frame_size) {
1150 avpriv_request_sample(s->avctx, "Too small input buffer");
1155 s->num_saved_bits += len;
1157 avpriv_copy_bits(&s->pb, gb->buffer + (get_bits_count(gb) >> 3),
1160 int align = 8 - (get_bits_count(gb) & 7);
1161 align = FFMIN(align, len);
1162 put_bits(&s->pb, align, get_bits(gb, align));
1164 avpriv_copy_bits(&s->pb, gb->buffer + (get_bits_count(gb) >> 3), len);
1166 skip_bits_long(gb, len);
1169 flush_put_bits(&tmp);
1171 init_get_bits(&s->gb, s->frame_data, s->num_saved_bits);
1172 skip_bits(&s->gb, s->frame_offset);
1175 static int decode_packet(AVCodecContext *avctx, void *data, int *got_frame_ptr,
1178 WmallDecodeCtx *s = avctx->priv_data;
1179 GetBitContext* gb = &s->pgb;
1180 const uint8_t* buf = avpkt->data;
1181 int buf_size = avpkt->size;
1182 int num_bits_prev_frame, packet_sequence_number, spliced_packet;
1184 s->frame->nb_samples = 0;
1186 if (!buf_size && s->num_saved_bits > get_bits_count(&s->gb)) {
1188 if (!decode_frame(s))
1189 s->num_saved_bits = 0;
1190 } else if (s->packet_done || s->packet_loss) {
1196 s->next_packet_start = buf_size - FFMIN(avctx->block_align, buf_size);
1197 buf_size = FFMIN(avctx->block_align, buf_size);
1198 s->buf_bit_size = buf_size << 3;
1200 /* parse packet header */
1201 init_get_bits(gb, buf, s->buf_bit_size);
1202 packet_sequence_number = get_bits(gb, 4);
1203 skip_bits(gb, 1); // Skip seekable_frame_in_packet, currently ununused
1204 spliced_packet = get_bits1(gb);
1206 avpriv_request_sample(avctx, "Bitstream splicing");
1208 /* get number of bits that need to be added to the previous frame */
1209 num_bits_prev_frame = get_bits(gb, s->log2_frame_size);
1211 /* check for packet loss */
1212 if (!s->packet_loss &&
1213 ((s->packet_sequence_number + 1) & 0xF) != packet_sequence_number) {
1215 av_log(avctx, AV_LOG_ERROR,
1216 "Packet loss detected! seq %"PRIx8" vs %x\n",
1217 s->packet_sequence_number, packet_sequence_number);
1219 s->packet_sequence_number = packet_sequence_number;
1221 if (num_bits_prev_frame > 0) {
1222 int remaining_packet_bits = s->buf_bit_size - get_bits_count(gb);
1223 if (num_bits_prev_frame >= remaining_packet_bits) {
1224 num_bits_prev_frame = remaining_packet_bits;
1228 /* Append the previous frame data to the remaining data from the
1229 * previous packet to create a full frame. */
1230 save_bits(s, gb, num_bits_prev_frame, 1);
1232 /* decode the cross packet frame if it is valid */
1233 if (num_bits_prev_frame < remaining_packet_bits && !s->packet_loss)
1235 } else if (s->num_saved_bits - s->frame_offset) {
1236 ff_dlog(avctx, "ignoring %x previously saved bits\n",
1237 s->num_saved_bits - s->frame_offset);
1240 if (s->packet_loss) {
1241 /* Reset number of saved bits so that the decoder does not start
1242 * to decode incomplete frames in the s->len_prefix == 0 case. */
1243 s->num_saved_bits = 0;
1245 init_put_bits(&s->pb, s->frame_data, s->max_frame_size);
1251 s->buf_bit_size = (avpkt->size - s->next_packet_start) << 3;
1252 init_get_bits(gb, avpkt->data, s->buf_bit_size);
1253 skip_bits(gb, s->packet_offset);
1255 if (s->len_prefix && remaining_bits(s, gb) > s->log2_frame_size &&
1256 (frame_size = show_bits(gb, s->log2_frame_size)) &&
1257 frame_size <= remaining_bits(s, gb)) {
1258 save_bits(s, gb, frame_size, 0);
1259 s->packet_done = !decode_frame(s);
1260 } else if (!s->len_prefix
1261 && s->num_saved_bits > get_bits_count(&s->gb)) {
1262 /* when the frames do not have a length prefix, we don't know the
1263 * compressed length of the individual frames however, we know what
1264 * part of a new packet belongs to the previous frame therefore we
1265 * save the incoming packet first, then we append the "previous
1266 * frame" data from the next packet so that we get a buffer that
1267 * only contains full frames */
1268 s->packet_done = !decode_frame(s);
1274 if (s->packet_done && !s->packet_loss &&
1275 remaining_bits(s, gb) > 0) {
1276 /* save the rest of the data so that it can be decoded
1277 * with the next packet */
1278 save_bits(s, gb, remaining_bits(s, gb), 0);
1281 *got_frame_ptr = s->frame->nb_samples > 0;
1282 av_frame_move_ref(data, s->frame);
1284 s->packet_offset = get_bits_count(gb) & 7;
1286 return (s->packet_loss) ? AVERROR_INVALIDDATA : buf_size ? get_bits_count(gb) >> 3 : 0;
1289 static void flush(AVCodecContext *avctx)
1291 WmallDecodeCtx *s = avctx->priv_data;
1294 s->num_saved_bits = 0;
1295 s->frame_offset = 0;
1296 s->next_packet_start = 0;
1297 s->cdlms[0][0].order = 0;
1298 s->frame->nb_samples = 0;
1299 init_put_bits(&s->pb, s->frame_data, s->max_frame_size);
1302 static av_cold int decode_close(AVCodecContext *avctx)
1304 WmallDecodeCtx *s = avctx->priv_data;
1306 av_frame_free(&s->frame);
1307 av_freep(&s->frame_data);
1312 AVCodec ff_wmalossless_decoder = {
1313 .name = "wmalossless",
1314 .long_name = NULL_IF_CONFIG_SMALL("Windows Media Audio Lossless"),
1315 .type = AVMEDIA_TYPE_AUDIO,
1316 .id = AV_CODEC_ID_WMALOSSLESS,
1317 .priv_data_size = sizeof(WmallDecodeCtx),
1318 .init = decode_init,
1319 .close = decode_close,
1320 .decode = decode_packet,
1322 .capabilities = AV_CODEC_CAP_SUBFRAMES | AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY,
1323 .sample_fmts = (const enum AVSampleFormat[]) { AV_SAMPLE_FMT_S16P,
1325 AV_SAMPLE_FMT_NONE },