git.sesse.net Git - ffmpeg/blob - libavcodec/wmalosslessdec.c

   1 /*
   2  * Wmall compatible decoder
   3  * Copyright (c) 2007 Baptiste Coudurier, Benjamin Larsson, Ulion
   4  * Copyright (c) 2008 - 2011 Sascha Sommer, Benjamin Larsson
   5  * Copyright (c) 2011 Andreas Öman
   6  *
   7  * This file is part of FFmpeg.
   8  *
   9  * FFmpeg is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public
  11  * License as published by the Free Software Foundation; either
  12  * version 2.1 of the License, or (at your option) any later version.
  13  *
  14  * FFmpeg is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with FFmpeg; if not, write to the Free Software
  21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  22  */
  23
  24 /**
  25  * @file
  26  * @brief wmall decoder implementation
  27  * Wmall is an MDCT based codec comparable to wma standard or AAC.
  28  * The decoding therefore consists of the following steps:
  29  * - bitstream decoding
  30  * - reconstruction of per-channel data
  31  * - rescaling and inverse quantization
  32  * - IMDCT
  33  * - windowing and overlapp-add
  34  *
  35  * The compressed wmall bitstream is split into individual packets.
  36  * Every such packet contains one or more wma frames.
  37  * The compressed frames may have a variable length and frames may
  38  * cross packet boundaries.
  39  * Common to all wmall frames is the number of samples that are stored in
  40  * a frame.
  41  * The number of samples and a few other decode flags are stored
  42  * as extradata that has to be passed to the decoder.
  43  *
  44  * The wmall frames themselves are again split into a variable number of
  45  * subframes. Every subframe contains the data for 2^N time domain samples
  46  * where N varies between 7 and 12.
  47  *
  48  * Example wmall bitstream (in samples):
  49  *
  50  * ||   packet 0           || packet 1 || packet 2      packets
  51  * ---------------------------------------------------
  52  * || frame 0      || frame 1       || frame 2    ||    frames
  53  * ---------------------------------------------------
  54  * ||   |      |   ||   |   |   |   ||            ||    subframes of channel 0
  55  * ---------------------------------------------------
  56  * ||      |   |   ||   |   |   |   ||            ||    subframes of channel 1
  57  * ---------------------------------------------------
  58  *
  59  * The frame layouts for the individual channels of a wma frame does not need
  60  * to be the same.
  61  *
  62  * However, if the offsets and lengths of several subframes of a frame are the
  63  * same, the subframes of the channels can be grouped.
  64  * Every group may then use special coding techniques like M/S stereo coding
  65  * to improve the compression ratio. These channel transformations do not
  66  * need to be applied to a whole subframe. Instead, they can also work on
  67  * individual scale factor bands (see below).
  68  * The coefficients that carry the audio signal in the frequency domain
  69  * are transmitted as huffman-coded vectors with 4, 2 and 1 elements.
  70  * In addition to that, the encoder can switch to a runlevel coding scheme
  71  * by transmitting subframe_length / 128 zero coefficients.
  72  *
  73  * Before the audio signal can be converted to the time domain, the
  74  * coefficients have to be rescaled and inverse quantized.
  75  * A subframe is therefore split into several scale factor bands that get
  76  * scaled individually.
  77  * Scale factors are submitted for every frame but they might be shared
  78  * between the subframes of a channel. Scale factors are initially DPCM-coded.
  79  * Once scale factors are shared, the differences are transmitted as runlevel
  80  * codes.
  81  * Every subframe length and offset combination in the frame layout shares a
  82  * common quantization factor that can be adjusted for every channel by a
  83  * modifier.
  84  * After the inverse quantization, the coefficients get processed by an IMDCT.
  85  * The resulting values are then windowed with a sine window and the first half
  86  * of the values are added to the second half of the output from the previous
  87  * subframe in order to reconstruct the output samples.
  88  */
  89
  90 #include "avcodec.h"
  91 #include "internal.h"
  92 #include "get_bits.h"
  93 #include "put_bits.h"
  94 #include "dsputil.h"
  95 #include "wma.h"
  96
  97 /** current decoder limitations */
  98 #define WMALL_MAX_CHANNELS    8                             ///< max number of handled channels
  99 #define MAX_SUBFRAMES  32                                    ///< max number of subframes per channel
 100 #define MAX_BANDS      29                                    ///< max number of scale factor bands
 101 #define MAX_FRAMESIZE  32768                                 ///< maximum compressed frame size
 102
 103 #define WMALL_BLOCK_MIN_BITS  6                                           ///< log2 of min block size
 104 #define WMALL_BLOCK_MAX_BITS 12                                           ///< log2 of max block size
 105 #define WMALL_BLOCK_MAX_SIZE (1 << WMALL_BLOCK_MAX_BITS)                 ///< maximum block size
 106 #define WMALL_BLOCK_SIZES    (WMALL_BLOCK_MAX_BITS - WMALL_BLOCK_MIN_BITS + 1) ///< possible block sizes
 107
 108
 109 #define VLCBITS            9
 110 #define SCALEVLCBITS       8
 111 #define VEC4MAXDEPTH    ((HUFF_VEC4_MAXBITS+VLCBITS-1)/VLCBITS)
 112 #define VEC2MAXDEPTH    ((HUFF_VEC2_MAXBITS+VLCBITS-1)/VLCBITS)
 113 #define VEC1MAXDEPTH    ((HUFF_VEC1_MAXBITS+VLCBITS-1)/VLCBITS)
 114 #define SCALEMAXDEPTH   ((HUFF_SCALE_MAXBITS+SCALEVLCBITS-1)/SCALEVLCBITS)
 115 #define SCALERLMAXDEPTH ((HUFF_SCALE_RL_MAXBITS+VLCBITS-1)/VLCBITS)
 116
 117 static float            sin64[33];        ///< sinus table for decorrelation
 118
 119 /**
 120  * @brief frame specific decoder context for a single channel
 121  */
 122 typedef struct {
 123     int16_t  prev_block_len;                          ///< length of the previous block
 124     uint8_t  transmit_coefs;
 125     uint8_t  num_subframes;
 126     uint16_t subframe_len[MAX_SUBFRAMES];             ///< subframe length in samples
 127     uint16_t subframe_offset[MAX_SUBFRAMES];          ///< subframe positions in the current frame
 128     uint8_t  cur_subframe;                            ///< current subframe number
 129     uint16_t decoded_samples;                         ///< number of already processed samples
 130     uint8_t  grouped;                                 ///< channel is part of a group
 131     int      quant_step;                              ///< quantization step for the current subframe
 132     int8_t   reuse_sf;                                ///< share scale factors between subframes
 133     int8_t   scale_factor_step;                       ///< scaling step for the current subframe
 134     int      max_scale_factor;                        ///< maximum scale factor for the current subframe
 135     int      saved_scale_factors[2][MAX_BANDS];       ///< resampled and (previously) transmitted scale factor values
 136     int8_t   scale_factor_idx;                        ///< index for the transmitted scale factor values (used for resampling)
 137     int*     scale_factors;                           ///< pointer to the scale factor values used for decoding
 138     uint8_t  table_idx;                               ///< index in sf_offsets for the scale factor reference block
 139     float*   coeffs;                                  ///< pointer to the subframe decode buffer
 140     uint16_t num_vec_coeffs;                          ///< number of vector coded coefficients
 141     DECLARE_ALIGNED(16, float, out)[WMALL_BLOCK_MAX_SIZE + WMALL_BLOCK_MAX_SIZE / 2]; ///< output buffer
 142     int      transient_counter;                       ///< number of transient samples from the beginning of transient zone
 143 } WmallChannelCtx;
 144
 145 /**
 146  * @brief channel group for channel transformations
 147  */
 148 typedef struct {
 149     uint8_t num_channels;                                     ///< number of channels in the group
 150     int8_t  transform;                                        ///< transform on / off
 151     int8_t  transform_band[MAX_BANDS];                        ///< controls if the transform is enabled for a certain band
 152     float   decorrelation_matrix[WMALL_MAX_CHANNELS*WMALL_MAX_CHANNELS];
 153     float*  channel_data[WMALL_MAX_CHANNELS];                ///< transformation coefficients
 154 } WmallChannelGrp;
 155
 156 /**
 157  * @brief main decoder context
 158  */
 159 typedef struct WmallDecodeCtx {
 160     /* generic decoder variables */
 161     AVCodecContext*  avctx;                         ///< codec context for av_log
 162     DSPContext       dsp;                           ///< accelerated DSP functions
 163     AVFrame          frame;
 164     uint8_t          frame_data[MAX_FRAMESIZE +
 165                       FF_INPUT_BUFFER_PADDING_SIZE];///< compressed frame data
 166     PutBitContext    pb;                            ///< context for filling the frame_data buffer
 167     FFTContext       mdct_ctx[WMALL_BLOCK_SIZES];  ///< MDCT context per block size
 168     DECLARE_ALIGNED(16, float, tmp)[WMALL_BLOCK_MAX_SIZE]; ///< IMDCT output buffer
 169     float*           windows[WMALL_BLOCK_SIZES];   ///< windows for the different block sizes
 170
 171     /* frame size dependent frame information (set during initialization) */
 172     uint32_t         decode_flags;                  ///< used compression features
 173     uint8_t          len_prefix;                    ///< frame is prefixed with its length
 174     uint8_t          dynamic_range_compression;     ///< frame contains DRC data
 175     uint8_t          bits_per_sample;               ///< integer audio sample size for the unscaled IMDCT output (used to scale to [-1.0, 1.0])
 176     uint16_t         samples_per_frame;             ///< number of samples to output
 177     uint16_t         log2_frame_size;
 178     int8_t           num_channels;                  ///< number of channels in the stream (same as AVCodecContext.num_channels)
 179     int8_t           lfe_channel;                   ///< lfe channel index
 180     uint8_t          max_num_subframes;
 181     uint8_t          subframe_len_bits;             ///< number of bits used for the subframe length
 182     uint8_t          max_subframe_len_bit;          ///< flag indicating that the subframe is of maximum size when the first subframe length bit is 1
 183     uint16_t         min_samples_per_subframe;
 184     int8_t           num_sfb[WMALL_BLOCK_SIZES];   ///< scale factor bands per block size
 185     int16_t          sfb_offsets[WMALL_BLOCK_SIZES][MAX_BANDS];                    ///< scale factor band offsets (multiples of 4)
 186     int8_t           sf_offsets[WMALL_BLOCK_SIZES][WMALL_BLOCK_SIZES][MAX_BANDS]; ///< scale factor resample matrix
 187     int16_t          subwoofer_cutoffs[WMALL_BLOCK_SIZES]; ///< subwoofer cutoff values
 188
 189     /* packet decode state */
 190     GetBitContext    pgb;                           ///< bitstream reader context for the packet
 191     int              next_packet_start;             ///< start offset of the next wma packet in the demuxer packet
 192     uint8_t          packet_offset;                 ///< frame offset in the packet
 193     uint8_t          packet_sequence_number;        ///< current packet number
 194     int              num_saved_bits;                ///< saved number of bits
 195     int              frame_offset;                  ///< frame offset in the bit reservoir
 196     int              subframe_offset;               ///< subframe offset in the bit reservoir
 197     uint8_t          packet_loss;                   ///< set in case of bitstream error
 198     uint8_t          packet_done;                   ///< set when a packet is fully decoded
 199
 200     /* frame decode state */
 201     uint32_t         frame_num;                     ///< current frame number (not used for decoding)
 202     GetBitContext    gb;                            ///< bitstream reader context
 203     int              buf_bit_size;                  ///< buffer size in bits
 204     int16_t*         samples_16;                    ///< current samplebuffer pointer (16-bit)
 205     int16_t*         samples_16_end;                ///< maximum samplebuffer pointer
 206     int             *samples_32;                    ///< current samplebuffer pointer (24-bit)
 207     int             *samples_32_end;                ///< maximum samplebuffer pointer
 208     uint8_t          drc_gain;                      ///< gain for the DRC tool
 209     int8_t           skip_frame;                    ///< skip output step
 210     int8_t           parsed_all_subframes;          ///< all subframes decoded?
 211
 212     /* subframe/block decode state */
 213     int16_t          subframe_len;                  ///< current subframe length
 214     int8_t           channels_for_cur_subframe;     ///< number of channels that contain the subframe
 215     int8_t           channel_indexes_for_cur_subframe[WMALL_MAX_CHANNELS];
 216     int8_t           num_bands;                     ///< number of scale factor bands
 217     int8_t           transmit_num_vec_coeffs;       ///< number of vector coded coefficients is part of the bitstream
 218     int16_t*         cur_sfb_offsets;               ///< sfb offsets for the current block
 219     uint8_t          table_idx;                     ///< index for the num_sfb, sfb_offsets, sf_offsets and subwoofer_cutoffs tables
 220     int8_t           esc_len;                       ///< length of escaped coefficients
 221
 222     uint8_t          num_chgroups;                  ///< number of channel groups
 223     WmallChannelGrp chgroup[WMALL_MAX_CHANNELS];    ///< channel group information
 224
 225     WmallChannelCtx channel[WMALL_MAX_CHANNELS];    ///< per channel data
 226
 227     // WMA lossless
 228
 229     uint8_t do_arith_coding;
 230     uint8_t do_ac_filter;
 231     uint8_t do_inter_ch_decorr;
 232     uint8_t do_mclms;
 233     uint8_t do_lpc;
 234
 235     int8_t acfilter_order;
 236     int8_t acfilter_scaling;
 237     int64_t acfilter_coeffs[16];
 238     int acfilter_prevvalues[2][16];
 239
 240     int8_t mclms_order;
 241     int8_t mclms_scaling;
 242     int16_t mclms_coeffs[128];
 243     int16_t mclms_coeffs_cur[4];
 244     int mclms_prevvalues[64];   // FIXME: should be 32-bit / 16-bit depending on bit-depth
 245     int16_t mclms_updates[64];
 246     int mclms_recent;
 247
 248     int movave_scaling;
 249     int quant_stepsize;
 250
 251     struct {
 252     int order;
 253     int scaling;
 254     int coefsend;
 255     int bitsend;
 256     int16_t coefs[256];
 257     int lms_prevvalues[512];    // FIXME: see above
 258     int16_t lms_updates[512];   // and here too
 259     int recent;
 260     } cdlms[2][9];              /* XXX: Here, 2 is the max. no. of channels allowed,
 261                                         9 is the maximum no. of filters per channel.
 262                                         Question is, why 2 if WMALL_MAX_CHANNELS == 8 */
 263
 264
 265     int cdlms_ttl[2];
 266
 267     int bV3RTM;
 268
 269     int is_channel_coded[2];    // XXX: same question as above applies here too (and below)
 270     int update_speed[2];
 271
 272     int transient[2];
 273     int transient_pos[2];
 274     int seekable_tile;
 275
 276     int ave_sum[2];
 277
 278     int channel_residues[2][2048];
 279
 280
 281     int lpc_coefs[2][40];
 282     int lpc_order;
 283     int lpc_scaling;
 284     int lpc_intbits;
 285
 286     int channel_coeffs[2][2048]; // FIXME: should be 32-bit / 16-bit depending on bit-depth
 287
 288 } WmallDecodeCtx;
 289
 290
 291 #undef dprintf
 292 #define dprintf(pctx, ...) av_log(pctx, AV_LOG_DEBUG, __VA_ARGS__)
 293
 294
 295 static int num_logged_tiles = 0;
 296 static int num_logged_subframes = 0;
 297 static int num_lms_update_call = 0;
 298
 299 /**
 300  *@brief helper function to print the most important members of the context
 301  *@param s context
 302  */
 303 static void av_cold dump_context(WmallDecodeCtx *s)
 304 {
 305 #define PRINT(a, b)     av_log(s->avctx, AV_LOG_DEBUG, " %s = %d\n", a, b);
 306 #define PRINT_HEX(a, b) av_log(s->avctx, AV_LOG_DEBUG, " %s = %x\n", a, b);
 307
 308     PRINT("ed sample bit depth", s->bits_per_sample);
 309     PRINT_HEX("ed decode flags", s->decode_flags);
 310     PRINT("samples per frame",   s->samples_per_frame);
 311     PRINT("log2 frame size",     s->log2_frame_size);
 312     PRINT("max num subframes",   s->max_num_subframes);
 313     PRINT("len prefix",          s->len_prefix);
 314     PRINT("num channels",        s->num_channels);
 315 }
 316
 317 static void dump_int_buffer(uint8_t *buffer, int size, int length, int delimiter)
 318 {
 319     int i;
 320
 321     for (i=0 ; i<length ; i++) {
 322         if (!(i%delimiter))
 323             av_log(0, 0, "\n[%d] ", i);
 324         av_log(0, 0, "%d, ", *(int16_t *)(buffer + i * size));
 325     }
 326     av_log(0, 0, "\n");
 327 }
 328
 329 /**
 330  *@brief Uninitialize the decoder and free all resources.
 331  *@param avctx codec context
 332  *@return 0 on success, < 0 otherwise
 333  */
 334 static av_cold int decode_end(AVCodecContext *avctx)
 335 {
 336     WmallDecodeCtx *s = avctx->priv_data;
 337     int i;
 338
 339     for (i = 0; i < WMALL_BLOCK_SIZES; i++)
 340         ff_mdct_end(&s->mdct_ctx[i]);
 341
 342     return 0;
 343 }
 344
 345 /**
 346  *@brief Initialize the decoder.
 347  *@param avctx codec context
 348  *@return 0 on success, -1 otherwise
 349  */
 350 static av_cold int decode_init(AVCodecContext *avctx)
 351 {
 352     WmallDecodeCtx *s = avctx->priv_data;
 353     uint8_t *edata_ptr = avctx->extradata;
 354     unsigned int channel_mask;
 355     int i;
 356     int log2_max_num_subframes;
 357     int num_possible_block_sizes;
 358
 359     s->avctx = avctx;
 360     dsputil_init(&s->dsp, avctx);
 361     init_put_bits(&s->pb, s->frame_data, MAX_FRAMESIZE);
 362
 363     if (avctx->extradata_size >= 18) {
 364         s->decode_flags    = AV_RL16(edata_ptr+14);
 365         channel_mask       = AV_RL32(edata_ptr+2);
 366         s->bits_per_sample = AV_RL16(edata_ptr);
 367         if (s->bits_per_sample == 16)
 368             avctx->sample_fmt = AV_SAMPLE_FMT_S16;
 369         else if (s->bits_per_sample == 24)
 370             avctx->sample_fmt = AV_SAMPLE_FMT_S32;
 371         else {
 372             av_log(avctx, AV_LOG_ERROR, "Unknown bit-depth: %d\n",
 373                    s->bits_per_sample);
 374             return AVERROR_INVALIDDATA;
 375         }
 376         /** dump the extradata */
 377         for (i = 0; i < avctx->extradata_size; i++)
 378             dprintf(avctx, "[%x] ", avctx->extradata[i]);
 379         dprintf(avctx, "\n");
 380
 381     } else {
 382         av_log_ask_for_sample(avctx, "Unknown extradata size\n");
 383         return AVERROR_INVALIDDATA;
 384     }
 385
 386     /** generic init */
 387     s->log2_frame_size = av_log2(avctx->block_align) + 4;
 388
 389     /** frame info */
 390     s->skip_frame  = 1; /* skip first frame */
 391     s->packet_loss = 1;
 392     s->len_prefix  = (s->decode_flags & 0x40);
 393
 394     /** get frame len */
 395     s->samples_per_frame = 1 << ff_wma_get_frame_len_bits(avctx->sample_rate,
 396                                                           3, s->decode_flags);
 397
 398     /** init previous block len */
 399     for (i = 0; i < avctx->channels; i++)
 400         s->channel[i].prev_block_len = s->samples_per_frame;
 401
 402     /** subframe info */
 403     log2_max_num_subframes  = ((s->decode_flags & 0x38) >> 3);
 404     s->max_num_subframes    = 1 << log2_max_num_subframes;
 405     s->max_subframe_len_bit = 0;
 406     s->subframe_len_bits    = av_log2(log2_max_num_subframes) + 1;
 407
 408     num_possible_block_sizes     = log2_max_num_subframes + 1;
 409     s->min_samples_per_subframe  = s->samples_per_frame / s->max_num_subframes;
 410     s->dynamic_range_compression = (s->decode_flags & 0x80);
 411
 412     s->bV3RTM = s->decode_flags & 0x100;
 413
 414     if (s->max_num_subframes > MAX_SUBFRAMES) {
 415         av_log(avctx, AV_LOG_ERROR, "invalid number of subframes %i\n",
 416                s->max_num_subframes);
 417         return AVERROR_INVALIDDATA;
 418     }
 419
 420     s->num_channels = avctx->channels;
 421
 422     /** extract lfe channel position */
 423     s->lfe_channel = -1;
 424
 425     if (channel_mask & 8) {
 426         unsigned int mask;
 427         for (mask = 1; mask < 16; mask <<= 1) {
 428             if (channel_mask & mask)
 429                 ++s->lfe_channel;
 430         }
 431     }
 432
 433     if (s->num_channels < 0) {
 434         av_log(avctx, AV_LOG_ERROR, "invalid number of channels %d\n", s->num_channels);
 435         return AVERROR_INVALIDDATA;
 436     } else if (s->num_channels > WMALL_MAX_CHANNELS) {
 437         av_log_ask_for_sample(avctx, "unsupported number of channels\n");
 438         return AVERROR_PATCHWELCOME;
 439     }
 440
 441     avcodec_get_frame_defaults(&s->frame);
 442     avctx->coded_frame = &s->frame;
 443
 444     avctx->channel_layout = channel_mask;
 445     return 0;
 446 }
 447
 448 /**
 449  *@brief Decode the subframe length.
 450  *@param s context
 451  *@param offset sample offset in the frame
 452  *@return decoded subframe length on success, < 0 in case of an error
 453  */
 454 static int decode_subframe_length(WmallDecodeCtx *s, int offset)
 455 {
 456     int frame_len_ratio;
 457     int subframe_len, len;
 458
 459     /** no need to read from the bitstream when only one length is possible */
 460     if (offset == s->samples_per_frame - s->min_samples_per_subframe)
 461         return s->min_samples_per_subframe;
 462
 463     len = av_log2(s->max_num_subframes - 1) + 1;
 464     frame_len_ratio = get_bits(&s->gb, len);
 465
 466     subframe_len = s->min_samples_per_subframe * (frame_len_ratio + 1);
 467
 468     /** sanity check the length */
 469     if (subframe_len < s->min_samples_per_subframe ||
 470         subframe_len > s->samples_per_frame) {
 471         av_log(s->avctx, AV_LOG_ERROR, "broken frame: subframe_len %i\n",
 472                subframe_len);
 473         return AVERROR_INVALIDDATA;
 474     }
 475     return subframe_len;
 476 }
 477
 478 /**
 479  *@brief Decode how the data in the frame is split into subframes.
 480  *       Every WMA frame contains the encoded data for a fixed number of
 481  *       samples per channel. The data for every channel might be split
 482  *       into several subframes. This function will reconstruct the list of
 483  *       subframes for every channel.
 484  *
 485  *       If the subframes are not evenly split, the algorithm estimates the
 486  *       channels with the lowest number of total samples.
 487  *       Afterwards, for each of these channels a bit is read from the
 488  *       bitstream that indicates if the channel contains a subframe with the
 489  *       next subframe size that is going to be read from the bitstream or not.
 490  *       If a channel contains such a subframe, the subframe size gets added to
 491  *       the channel's subframe list.
 492  *       The algorithm repeats these steps until the frame is properly divided
 493  *       between the individual channels.
 494  *
 495  *@param s context
 496  *@return 0 on success, < 0 in case of an error
 497  */
 498 static int decode_tilehdr(WmallDecodeCtx *s)
 499 {
 500     uint16_t num_samples[WMALL_MAX_CHANNELS];        /**< sum of samples for all currently known subframes of a channel */
 501     uint8_t  contains_subframe[WMALL_MAX_CHANNELS];  /**< flag indicating if a channel contains the current subframe */
 502     int channels_for_cur_subframe = s->num_channels;  /**< number of channels that contain the current subframe */
 503     int fixed_channel_layout = 0;                     /**< flag indicating that all channels use the same subfra2me offsets and sizes */
 504     int min_channel_len = 0;                          /**< smallest sum of samples (channels with this length will be processed first) */
 505     int c;
 506
 507     /* Should never consume more than 3073 bits (256 iterations for the
 508      * while loop when always the minimum amount of 128 samples is substracted
 509      * from missing samples in the 8 channel case).
 510      * 1 + BLOCK_MAX_SIZE * MAX_CHANNELS / BLOCK_MIN_SIZE * (MAX_CHANNELS  + 4)
 511      */
 512
 513     /** reset tiling information */
 514     for (c = 0; c < s->num_channels; c++)
 515         s->channel[c].num_subframes = 0;
 516
 517     memset(num_samples, 0, sizeof(num_samples));
 518
 519     if (s->max_num_subframes == 1 || get_bits1(&s->gb))
 520         fixed_channel_layout = 1;
 521
 522     /** loop until the frame data is split between the subframes */
 523     do {
 524         int subframe_len;
 525
 526         /** check which channels contain the subframe */
 527         for (c = 0; c < s->num_channels; c++) {
 528             if (num_samples[c] == min_channel_len) {
 529                 if (fixed_channel_layout || channels_for_cur_subframe == 1 ||
 530                    (min_channel_len == s->samples_per_frame - s->min_samples_per_subframe)) {
 531                     contains_subframe[c] = 1;
 532                 } else {
 533                     contains_subframe[c] = get_bits1(&s->gb);
 534                 }
 535             } else
 536                 contains_subframe[c] = 0;
 537         }
 538
 539         /** get subframe length, subframe_len == 0 is not allowed */
 540         if ((subframe_len = decode_subframe_length(s, min_channel_len)) <= 0)
 541             return AVERROR_INVALIDDATA;
 542         /** add subframes to the individual channels and find new min_channel_len */
 543         min_channel_len += subframe_len;
 544         for (c = 0; c < s->num_channels; c++) {
 545             WmallChannelCtx* chan = &s->channel[c];
 546
 547             if (contains_subframe[c]) {
 548                 if (chan->num_subframes >= MAX_SUBFRAMES) {
 549                     av_log(s->avctx, AV_LOG_ERROR,
 550                            "broken frame: num subframes > 31\n");
 551                     return AVERROR_INVALIDDATA;
 552                 }
 553                 chan->subframe_len[chan->num_subframes] = subframe_len;
 554                 num_samples[c] += subframe_len;
 555                 ++chan->num_subframes;
 556                 if (num_samples[c] > s->samples_per_frame) {
 557                     av_log(s->avctx, AV_LOG_ERROR, "broken frame: "
 558                            "channel len(%d) > samples_per_frame(%d)\n",
 559                            num_samples[c], s->samples_per_frame);
 560                     return AVERROR_INVALIDDATA;
 561                 }
 562             } else if (num_samples[c] <= min_channel_len) {
 563                 if (num_samples[c] < min_channel_len) {
 564                     channels_for_cur_subframe = 0;
 565                     min_channel_len = num_samples[c];
 566                 }
 567                 ++channels_for_cur_subframe;
 568             }
 569         }
 570     } while (min_channel_len < s->samples_per_frame);
 571
 572     for (c = 0; c < s->num_channels; c++) {
 573         int i;
 574         int offset = 0;
 575         for (i = 0; i < s->channel[c].num_subframes; i++) {
 576             s->channel[c].subframe_offset[i] = offset;
 577             offset += s->channel[c].subframe_len[i];
 578         }
 579     }
 580
 581     return 0;
 582 }
 583
 584
 585 static int my_log2(unsigned int i)
 586 {
 587     unsigned int iLog2 = 0;
 588     while ((i >> iLog2) > 1)
 589         iLog2++;
 590     return iLog2;
 591 }
 592
 593
 594 /**
 595  *
 596  */
 597 static void decode_ac_filter(WmallDecodeCtx *s)
 598 {
 599     int i;
 600     s->acfilter_order = get_bits(&s->gb, 4) + 1;
 601     s->acfilter_scaling = get_bits(&s->gb, 4);
 602
 603     for(i = 0; i < s->acfilter_order; i++) {
 604         s->acfilter_coeffs[i] = get_bits(&s->gb, s->acfilter_scaling) + 1;
 605     }
 606 }
 607
 608
 609 /**
 610  *
 611  */
 612 static void decode_mclms(WmallDecodeCtx *s)
 613 {
 614     s->mclms_order = (get_bits(&s->gb, 4) + 1) * 2;
 615     s->mclms_scaling = get_bits(&s->gb, 4);
 616     if(get_bits1(&s->gb)) {
 617         // mclms_send_coef
 618         int i;
 619         int send_coef_bits;
 620         int cbits = av_log2(s->mclms_scaling + 1);
 621         assert(cbits == my_log2(s->mclms_scaling + 1));
 622         if(1 << cbits < s->mclms_scaling + 1)
 623             cbits++;
 624
 625         send_coef_bits = (cbits ? get_bits(&s->gb, cbits) : 0) + 2;
 626
 627         for(i = 0; i < s->mclms_order * s->num_channels * s->num_channels; i++) {
 628             s->mclms_coeffs[i] = get_bits(&s->gb, send_coef_bits);
 629         }
 630
 631         for(i = 0; i < s->num_channels; i++) {
 632             int c;
 633             for(c = 0; c < i; c++) {
 634                 s->mclms_coeffs_cur[i * s->num_channels + c] = get_bits(&s->gb, send_coef_bits);
 635             }
 636         }
 637     }
 638 }
 639
 640
 641 /**
 642  *
 643  */
 644 static void decode_cdlms(WmallDecodeCtx *s)
 645 {
 646     int c, i;
 647     int cdlms_send_coef = get_bits1(&s->gb);
 648
 649     for(c = 0; c < s->num_channels; c++) {
 650         s->cdlms_ttl[c] = get_bits(&s->gb, 3) + 1;
 651         for(i = 0; i < s->cdlms_ttl[c]; i++) {
 652             s->cdlms[c][i].order = (get_bits(&s->gb, 7) + 1) * 8;
 653         }
 654
 655         for(i = 0; i < s->cdlms_ttl[c]; i++) {
 656             s->cdlms[c][i].scaling = get_bits(&s->gb, 4);
 657         }
 658
 659         if(cdlms_send_coef) {
 660             for(i = 0; i < s->cdlms_ttl[c]; i++) {
 661                 int cbits, shift_l, shift_r, j;
 662                 cbits = av_log2(s->cdlms[c][i].order);
 663                 if(1 << cbits < s->cdlms[c][i].order)
 664                     cbits++;
 665                 s->cdlms[c][i].coefsend = get_bits(&s->gb, cbits) + 1;
 666
 667                 cbits = av_log2(s->cdlms[c][i].scaling + 1);
 668                 if(1 << cbits < s->cdlms[c][i].scaling + 1)
 669                     cbits++;
 670
 671                 s->cdlms[c][i].bitsend = get_bits(&s->gb, cbits) + 2;
 672                 shift_l = 32 - s->cdlms[c][i].bitsend;
 673                 shift_r = 32 - 2 - s->cdlms[c][i].scaling;
 674                 for(j = 0; j < s->cdlms[c][i].coefsend; j++) {
 675                     s->cdlms[c][i].coefs[j] =
 676                         (get_bits(&s->gb, s->cdlms[c][i].bitsend) << shift_l) >> shift_r;
 677                 }
 678             }
 679         }
 680     }
 681 }
 682
 683 /**
 684  *
 685  */
 686 static int decode_channel_residues(WmallDecodeCtx *s, int ch, int tile_size)
 687 {
 688     int i = 0;
 689     unsigned int ave_mean;
 690     s->transient[ch] = get_bits1(&s->gb);
 691     if(s->transient[ch]) {
 692             s->transient_pos[ch] = get_bits(&s->gb, av_log2(tile_size));
 693         if (s->transient_pos[ch])
 694                 s->transient[ch] = 0;
 695             s->channel[ch].transient_counter =
 696                 FFMAX(s->channel[ch].transient_counter, s->samples_per_frame / 2);
 697         } else if (s->channel[ch].transient_counter)
 698             s->transient[ch] = 1;
 699
 700     if(s->seekable_tile) {
 701         ave_mean = get_bits(&s->gb, s->bits_per_sample);
 702         s->ave_sum[ch] = ave_mean << (s->movave_scaling + 1);
 703 //        s->ave_sum[ch] *= 2;
 704     }
 705
 706     if(s->seekable_tile) {
 707         if(s->do_inter_ch_decorr)
 708             s->channel_residues[ch][0] = get_sbits(&s->gb, s->bits_per_sample + 1);
 709         else
 710             s->channel_residues[ch][0] = get_sbits(&s->gb, s->bits_per_sample);
 711         i++;
 712     }
 713     //av_log(0, 0, "%8d: ", num_logged_tiles++);
 714     for(; i < tile_size; i++) {
 715         int quo = 0, rem, rem_bits, residue;
 716         while(get_bits1(&s->gb)) {
 717             quo++;
 718             if (get_bits_left(&s->gb) <= 0)
 719                 return -1;
 720         }
 721         if(quo >= 32)
 722             quo += get_bits_long(&s->gb, get_bits(&s->gb, 5) + 1);
 723
 724     ave_mean = (s->ave_sum[ch] + (1 << s->movave_scaling)) >> (s->movave_scaling + 1);
 725     if (ave_mean <= 1)
 726         residue = quo;
 727     else
 728     {
 729         rem_bits = av_ceil_log2(ave_mean);
 730         rem = rem_bits ? get_bits(&s->gb, rem_bits) : 0;
 731         residue = (quo << rem_bits) + rem;
 732     }
 733
 734         s->ave_sum[ch] = residue + s->ave_sum[ch] - (s->ave_sum[ch] >> s->movave_scaling);
 735
 736         if(residue & 1)
 737             residue = -(residue >> 1) - 1;
 738         else
 739             residue = residue >> 1;
 740         s->channel_residues[ch][i] = residue;
 741     }
 742     //dump_int_buffer(s->channel_residues[ch], 4, tile_size, 16);
 743
 744     return 0;
 745
 746 }
 747
 748
 749 /**
 750  *
 751  */
 752 static void
 753 decode_lpc(WmallDecodeCtx *s)
 754 {
 755     int ch, i, cbits;
 756     s->lpc_order = get_bits(&s->gb, 5) + 1;
 757     s->lpc_scaling = get_bits(&s->gb, 4);
 758     s->lpc_intbits = get_bits(&s->gb, 3) + 1;
 759     cbits = s->lpc_scaling + s->lpc_intbits;
 760     for(ch = 0; ch < s->num_channels; ch++) {
 761         for(i = 0; i < s->lpc_order; i++) {
 762             s->lpc_coefs[ch][i] = get_sbits(&s->gb, cbits);
 763         }
 764     }
 765 }
 766
 767
 768 static void clear_codec_buffers(WmallDecodeCtx *s)
 769 {
 770     int ich, ilms;
 771
 772     memset(s->acfilter_coeffs    , 0, 16 * sizeof(int));
 773     memset(s->acfilter_prevvalues, 0, 16 * 2 * sizeof(int)); // may be wrong
 774     memset(s->lpc_coefs          , 0, 40 * 2 * sizeof(int));
 775
 776     memset(s->mclms_coeffs    , 0, 128 * sizeof(int16_t));
 777     memset(s->mclms_coeffs_cur, 0,   4 * sizeof(int16_t));
 778     memset(s->mclms_prevvalues, 0,  64 * sizeof(int));
 779     memset(s->mclms_updates   , 0,  64 * sizeof(int16_t));
 780
 781     for (ich = 0; ich < s->num_channels; ich++) {
 782         for (ilms = 0; ilms < s->cdlms_ttl[ich]; ilms++) {
 783             memset(s->cdlms[ich][ilms].coefs         , 0, 256 * sizeof(int16_t));
 784             memset(s->cdlms[ich][ilms].lms_prevvalues, 0, 512 * sizeof(int));
 785             memset(s->cdlms[ich][ilms].lms_updates   , 0, 512 * sizeof(int16_t));
 786         }
 787         s->ave_sum[ich] = 0;
 788     }
 789 }
 790
 791 /**
 792  *@brief Resets filter parameters and transient area at new seekable tile
 793  */
 794 static void reset_codec(WmallDecodeCtx *s)
 795 {
 796     int ich, ilms;
 797     s->mclms_recent = s->mclms_order * s->num_channels;
 798     for (ich = 0; ich < s->num_channels; ich++) {
 799         for (ilms = 0; ilms < s->cdlms_ttl[ich]; ilms++)
 800             s->cdlms[ich][ilms].recent = s->cdlms[ich][ilms].order;
 801         /* first sample of a seekable subframe is considered as the starting of
 802            a transient area which is samples_per_frame samples long */
 803         s->channel[ich].transient_counter = s->samples_per_frame;
 804         s->transient[ich] = 1;
 805         s->transient_pos[ich] = 0;
 806     }
 807 }
 808
 809
 810
 811 static void mclms_update(WmallDecodeCtx *s, int icoef, int *pred)
 812 {
 813     int i, j, ich;
 814     int pred_error;
 815     int order = s->mclms_order;
 816     int num_channels = s->num_channels;
 817     int range = 1 << (s->bits_per_sample - 1);
 818     //int bps = s->bits_per_sample > 16 ? 4 : 2; // bytes per sample
 819
 820     for (ich = 0; ich < num_channels; ich++) {
 821         pred_error = s->channel_residues[ich][icoef] - pred[ich];
 822         if (pred_error > 0) {
 823             for (i = 0; i < order * num_channels; i++)
 824                 s->mclms_coeffs[i + ich * order * num_channels] +=
 825                     s->mclms_updates[s->mclms_recent + i];
 826             for (j = 0; j < ich; j++) {
 827                 if (s->channel_residues[j][icoef] > 0)
 828                     s->mclms_coeffs_cur[ich * num_channels + j] += 1;
 829                 else if (s->channel_residues[j][icoef] < 0)
 830                     s->mclms_coeffs_cur[ich * num_channels + j] -= 1;
 831             }
 832         } else if (pred_error < 0) {
 833             for (i = 0; i < order * num_channels; i++)
 834                 s->mclms_coeffs[i + ich * order * num_channels] -=
 835                     s->mclms_updates[s->mclms_recent + i];
 836             for (j = 0; j < ich; j++) {
 837                 if (s->channel_residues[j][icoef] > 0)
 838                     s->mclms_coeffs_cur[ich * num_channels + j] -= 1;
 839                 else if (s->channel_residues[j][icoef] < 0)
 840                     s->mclms_coeffs_cur[ich * num_channels + j] += 1;
 841             }
 842         }
 843     }
 844
 845     for (ich = num_channels - 1; ich >= 0; ich--) {
 846         s->mclms_recent--;
 847         s->mclms_prevvalues[s->mclms_recent] = s->channel_residues[ich][icoef];
 848         if (s->channel_residues[ich][icoef] > range - 1)
 849             s->mclms_prevvalues[s->mclms_recent] = range - 1;
 850         else if (s->channel_residues[ich][icoef] < -range)
 851             s->mclms_prevvalues[s->mclms_recent] = -range;
 852
 853         s->mclms_updates[s->mclms_recent] = 0;
 854         if (s->channel_residues[ich][icoef] > 0)
 855             s->mclms_updates[s->mclms_recent] = 1;
 856         else if (s->channel_residues[ich][icoef] < 0)
 857             s->mclms_updates[s->mclms_recent] = -1;
 858     }
 859
 860     if (s->mclms_recent == 0) {
 861         memcpy(&s->mclms_prevvalues[order * num_channels],
 862                s->mclms_prevvalues,
 863                4 * order * num_channels);
 864         memcpy(&s->mclms_updates[order * num_channels],
 865                s->mclms_updates,
 866                2 * order * num_channels);
 867         s->mclms_recent = num_channels * order;
 868     }
 869 }
 870
 871 static void mclms_predict(WmallDecodeCtx *s, int icoef, int *pred)
 872 {
 873     int ich, i;
 874     int order = s->mclms_order;
 875     int num_channels = s->num_channels;
 876
 877     for (ich = 0; ich < num_channels; ich++) {
 878         if (!s->is_channel_coded[ich])
 879             continue;
 880         pred[ich] = 0;
 881         for (i = 0; i < order * num_channels; i++)
 882             pred[ich] += s->mclms_prevvalues[i + s->mclms_recent] *
 883                          s->mclms_coeffs[i + order * num_channels * ich];
 884         for (i = 0; i < ich; i++)
 885             pred[ich] += s->channel_residues[i][icoef] *
 886                          s->mclms_coeffs_cur[i + num_channels * ich];
 887         pred[ich] += 1 << s->mclms_scaling - 1;
 888         pred[ich] >>= s->mclms_scaling;
 889         s->channel_residues[ich][icoef] += pred[ich];
 890     }
 891 }
 892
 893 static void revert_mclms(WmallDecodeCtx *s, int tile_size)
 894 {
 895     int icoef, pred[WMALL_MAX_CHANNELS] = {0};
 896     for (icoef = 0; icoef < tile_size; icoef++) {
 897         mclms_predict(s, icoef, pred);
 898         mclms_update(s, icoef, pred);
 899     }
 900 }
 901
 902 static int lms_predict(WmallDecodeCtx *s, int ich, int ilms)
 903 {
 904     int pred = 0;
 905     int icoef;
 906     int recent = s->cdlms[ich][ilms].recent;
 907
 908     for (icoef = 0; icoef < s->cdlms[ich][ilms].order; icoef++)
 909         pred += s->cdlms[ich][ilms].coefs[icoef] *
 910                     s->cdlms[ich][ilms].lms_prevvalues[icoef + recent];
 911
 912     //pred += (1 << (s->cdlms[ich][ilms].scaling - 1));
 913     /* XXX: Table 29 has:
 914             iPred >= cdlms[iCh][ilms].scaling;
 915        seems to me like a missing > */
 916     //pred >>= s->cdlms[ich][ilms].scaling;
 917     return pred;
 918 }
 919
 920 static void lms_update(WmallDecodeCtx *s, int ich, int ilms, int input, int residue)
 921 {
 922     int icoef;
 923     int recent = s->cdlms[ich][ilms].recent;
 924     int range = 1 << s->bits_per_sample - 1;
 925     //int bps = s->bits_per_sample > 16 ? 4 : 2; // bytes per sample
 926
 927     if (residue < 0) {
 928         for (icoef = 0; icoef < s->cdlms[ich][ilms].order; icoef++)
 929             s->cdlms[ich][ilms].coefs[icoef] -=
 930                 s->cdlms[ich][ilms].lms_updates[icoef + recent];
 931     } else if (residue > 0) {
 932         for (icoef = 0; icoef < s->cdlms[ich][ilms].order; icoef++)
 933             s->cdlms[ich][ilms].coefs[icoef] +=
 934                 s->cdlms[ich][ilms].lms_updates[icoef + recent];    /* spec mistakenly
 935                                                                     dropped the recent */
 936     }
 937
 938     if (recent)
 939         recent--;
 940     else {
 941         /* XXX: This memcpy()s will probably fail if a fixed 32-bit buffer is used.
 942                 follow kshishkov's suggestion of using a union. */
 943         memcpy(&s->cdlms[ich][ilms].lms_prevvalues[s->cdlms[ich][ilms].order],
 944                s->cdlms[ich][ilms].lms_prevvalues,
 945                4 * s->cdlms[ich][ilms].order);
 946         memcpy(&s->cdlms[ich][ilms].lms_updates[s->cdlms[ich][ilms].order],
 947                s->cdlms[ich][ilms].lms_updates,
 948                2 * s->cdlms[ich][ilms].order);
 949         recent = s->cdlms[ich][ilms].order - 1;
 950     }
 951
 952     s->cdlms[ich][ilms].lms_prevvalues[recent] = av_clip(input, -range, range - 1);
 953     if (!input)
 954         s->cdlms[ich][ilms].lms_updates[recent] = 0;
 955     else if (input < 0)
 956         s->cdlms[ich][ilms].lms_updates[recent] = -s->update_speed[ich];
 957     else
 958         s->cdlms[ich][ilms].lms_updates[recent] = s->update_speed[ich];
 959
 960     /* XXX: spec says:
 961     cdlms[iCh][ilms].updates[iRecent + cdlms[iCh][ilms].order >> 4] >>= 2;
 962     lms_updates[iCh][ilms][iRecent + cdlms[iCh][ilms].order >> 3] >>= 1;
 963
 964         Questions is - are cdlms[iCh][ilms].updates[] and lms_updates[][][] two
 965         seperate buffers? Here I've assumed that the two are same which makes
 966         more sense to me.
 967     */
 968     s->cdlms[ich][ilms].lms_updates[recent + (s->cdlms[ich][ilms].order >> 4)] >>= 2;
 969     s->cdlms[ich][ilms].lms_updates[recent + (s->cdlms[ich][ilms].order >> 3)] >>= 1;
 970     s->cdlms[ich][ilms].recent = recent;
 971 }
 972
 973 static void use_high_update_speed(WmallDecodeCtx *s, int ich)
 974 {
 975     int ilms, recent, icoef;
 976     for (ilms = s->cdlms_ttl[ich] - 1; ilms >= 0; ilms--) {
 977         recent = s->cdlms[ich][ilms].recent;
 978         if (s->update_speed[ich] == 16)
 979             continue;
 980         if (s->bV3RTM) {
 981             for (icoef = 0; icoef < s->cdlms[ich][ilms].order; icoef++)
 982                 s->cdlms[ich][ilms].lms_updates[icoef + recent] *= 2;
 983         } else {
 984             for (icoef = 0; icoef < s->cdlms[ich][ilms].order; icoef++)
 985                 s->cdlms[ich][ilms].lms_updates[icoef] *= 2;
 986         }
 987     }
 988     s->update_speed[ich] = 16;
 989 }
 990
 991 static void use_normal_update_speed(WmallDecodeCtx *s, int ich)
 992 {
 993     int ilms, recent, icoef;
 994     for (ilms = s->cdlms_ttl[ich] - 1; ilms >= 0; ilms--) {
 995         recent = s->cdlms[ich][ilms].recent;
 996         if (s->update_speed[ich] == 8)
 997             continue;
 998         if (s->bV3RTM) {
 999             for (icoef = 0; icoef < s->cdlms[ich][ilms].order; icoef++)
1000                 s->cdlms[ich][ilms].lms_updates[icoef + recent] /= 2;
1001         } else {
1002             for (icoef = 0; icoef < s->cdlms[ich][ilms].order; icoef++)
1003                 s->cdlms[ich][ilms].lms_updates[icoef] /= 2;
1004         }
1005     }
1006     s->update_speed[ich] = 8;
1007 }
1008
1009 static void revert_cdlms(WmallDecodeCtx *s, int ch, int coef_begin, int coef_end)
1010 {
1011     int icoef;
1012     int pred;
1013     int ilms, num_lms;
1014     int residue, input;
1015
1016     num_lms = s->cdlms_ttl[ch];
1017     for (ilms = num_lms - 1; ilms >= 0; ilms--) {
1018         //s->cdlms[ch][ilms].recent = s->cdlms[ch][ilms].order;
1019         for (icoef = coef_begin; icoef < coef_end; icoef++) {
1020             pred = 1 << (s->cdlms[ch][ilms].scaling - 1);
1021             residue = s->channel_residues[ch][icoef];
1022             pred += lms_predict(s, ch, ilms);
1023             input = residue + (pred >> s->cdlms[ch][ilms].scaling);
1024             lms_update(s, ch, ilms, input, residue);
1025             s->channel_residues[ch][icoef] = input;
1026         }
1027     }
1028 }
1029
1030 static void revert_inter_ch_decorr(WmallDecodeCtx *s, int tile_size)
1031 {
1032     int icoef;
1033     if (s->num_channels != 2)
1034         return;
1035     else if (s->is_channel_coded[0] && s->is_channel_coded[1]) {
1036         for (icoef = 0; icoef < tile_size; icoef++) {
1037             s->channel_residues[0][icoef] -= s->channel_residues[1][icoef] >> 1;
1038             s->channel_residues[1][icoef] += s->channel_residues[0][icoef];
1039         }
1040     }
1041 }
1042
1043 static void revert_acfilter(WmallDecodeCtx *s, int tile_size)
1044 {
1045     int ich, icoef;
1046     int pred;
1047     int i, j;
1048     int64_t *filter_coeffs = s->acfilter_coeffs;
1049     int scaling = s->acfilter_scaling;
1050     int order = s->acfilter_order;
1051
1052     for (ich = 0; ich < s->num_channels; ich++) {
1053         int *prevvalues = s->acfilter_prevvalues[ich];
1054         for (i = 0; i < order; i++) {
1055             pred = 0;
1056             for (j = 0; j < order; j++) {
1057                 if (i <= j)
1058                     pred += filter_coeffs[j] * prevvalues[j - i];
1059                 else
1060                     pred += s->channel_residues[ich][i - j - 1] * filter_coeffs[j];
1061             }
1062             pred >>= scaling;
1063             s->channel_residues[ich][i] += pred;
1064         }
1065         for (i = order; i < tile_size; i++) {
1066             pred = 0;
1067             for (j = 0; j < order; j++)
1068                 pred += s->channel_residues[ich][i - j - 1] * filter_coeffs[j];
1069             pred >>= scaling;
1070             s->channel_residues[ich][i] += pred;
1071         }
1072         for (j = 0; j < order; j++)
1073             prevvalues[j] = s->channel_residues[ich][tile_size - j - 1];
1074     }
1075 }
1076
1077 /**
1078  *@brief Decode a single subframe (block).
1079  *@param s codec context
1080  *@return 0 on success, < 0 when decoding failed
1081  */
1082 static int decode_subframe(WmallDecodeCtx *s)
1083 {
1084     int offset = s->samples_per_frame;
1085     int subframe_len = s->samples_per_frame;
1086     int i, j;
1087     int total_samples   = s->samples_per_frame * s->num_channels;
1088     int rawpcm_tile;
1089     int padding_zeroes;
1090
1091     s->subframe_offset = get_bits_count(&s->gb);
1092
1093     /** reset channel context and find the next block offset and size
1094         == the next block of the channel with the smallest number of
1095         decoded samples
1096     */
1097     for (i = 0; i < s->num_channels; i++) {
1098         s->channel[i].grouped = 0;
1099         if (offset > s->channel[i].decoded_samples) {
1100             offset = s->channel[i].decoded_samples;
1101             subframe_len =
1102                 s->channel[i].subframe_len[s->channel[i].cur_subframe];
1103         }
1104     }
1105
1106     /** get a list of all channels that contain the estimated block */
1107     s->channels_for_cur_subframe = 0;
1108     for (i = 0; i < s->num_channels; i++) {
1109         const int cur_subframe = s->channel[i].cur_subframe;
1110         /** substract already processed samples */
1111         total_samples -= s->channel[i].decoded_samples;
1112
1113         /** and count if there are multiple subframes that match our profile */
1114         if (offset == s->channel[i].decoded_samples &&
1115             subframe_len == s->channel[i].subframe_len[cur_subframe]) {
1116             total_samples -= s->channel[i].subframe_len[cur_subframe];
1117             s->channel[i].decoded_samples +=
1118                 s->channel[i].subframe_len[cur_subframe];
1119             s->channel_indexes_for_cur_subframe[s->channels_for_cur_subframe] = i;
1120             ++s->channels_for_cur_subframe;
1121         }
1122     }
1123
1124     /** check if the frame will be complete after processing the
1125         estimated block */
1126     if (!total_samples)
1127         s->parsed_all_subframes = 1;
1128
1129
1130     s->seekable_tile = get_bits1(&s->gb);
1131     if(s->seekable_tile) {
1132         clear_codec_buffers(s);
1133
1134         s->do_arith_coding    = get_bits1(&s->gb);
1135         if(s->do_arith_coding) {
1136             dprintf(s->avctx, "do_arith_coding == 1");
1137             abort();
1138         }
1139         s->do_ac_filter       = get_bits1(&s->gb);
1140         s->do_inter_ch_decorr = get_bits1(&s->gb);
1141         s->do_mclms           = get_bits1(&s->gb);
1142
1143         if(s->do_ac_filter)
1144             decode_ac_filter(s);
1145
1146         if(s->do_mclms)
1147             decode_mclms(s);
1148
1149         decode_cdlms(s);
1150         s->movave_scaling = get_bits(&s->gb, 3);
1151         s->quant_stepsize = get_bits(&s->gb, 8) + 1;
1152
1153             reset_codec(s);
1154     }
1155
1156     rawpcm_tile = get_bits1(&s->gb);
1157
1158     for(i = 0; i < s->num_channels; i++) {
1159         s->is_channel_coded[i] = 1;
1160     }
1161
1162     if(!rawpcm_tile) {
1163
1164         for(i = 0; i < s->num_channels; i++) {
1165             s->is_channel_coded[i] = get_bits1(&s->gb);
1166         }
1167
1168         if(s->bV3RTM) {
1169             // LPC
1170             s->do_lpc = get_bits1(&s->gb);
1171             if(s->do_lpc) {
1172                 decode_lpc(s);
1173             }
1174         } else {
1175             s->do_lpc = 0;
1176         }
1177     }
1178
1179
1180     if(get_bits1(&s->gb)) {
1181         padding_zeroes = get_bits(&s->gb, 5);
1182     } else {
1183         padding_zeroes = 0;
1184     }
1185
1186     if(rawpcm_tile) {
1187
1188         int bits = s->bits_per_sample - padding_zeroes;
1189         dprintf(s->avctx, "RAWPCM %d bits per sample. total %d bits, remain=%d\n", bits,
1190                 bits * s->num_channels * subframe_len, get_bits_count(&s->gb));
1191         for(i = 0; i < s->num_channels; i++) {
1192             for(j = 0; j < subframe_len; j++) {
1193                 s->channel_coeffs[i][j] = get_sbits(&s->gb, bits);
1194 //                dprintf(s->avctx, "PCM[%d][%d] = 0x%04x\n", i, j, s->channel_coeffs[i][j]);
1195             }
1196         }
1197     } else {
1198         for(i = 0; i < s->num_channels; i++)
1199             if(s->is_channel_coded[i]) {
1200             decode_channel_residues(s, i, subframe_len);
1201             if (s->seekable_tile)
1202                 use_high_update_speed(s, i);
1203             else
1204                 use_normal_update_speed(s, i);
1205             revert_cdlms(s, i, 0, subframe_len);
1206         }
1207     }
1208     if (s->do_mclms)
1209         revert_mclms(s, subframe_len);
1210     if (s->do_inter_ch_decorr)
1211         revert_inter_ch_decorr(s, subframe_len);
1212     if(s->do_ac_filter)
1213         revert_acfilter(s, subframe_len);
1214
1215     /* Dequantize */
1216     if (s->quant_stepsize != 1)
1217         for (i = 0; i < s->num_channels; i++)
1218             for (j = 0; j < subframe_len; j++)
1219                 s->channel_residues[i][j] *= s->quant_stepsize;
1220
1221     // Write to proper output buffer depending on bit-depth
1222     for (i = 0; i < subframe_len; i++)
1223         for (j = 0; j < s->num_channels; j++) {
1224             if (s->bits_per_sample == 16)
1225                 *s->samples_16++ = (int16_t) s->channel_residues[j][i];
1226             else
1227                 *s->samples_32++ = s->channel_residues[j][i];
1228         }
1229
1230     /** handled one subframe */
1231
1232     for (i = 0; i < s->channels_for_cur_subframe; i++) {
1233         int c = s->channel_indexes_for_cur_subframe[i];
1234         if (s->channel[c].cur_subframe >= s->channel[c].num_subframes) {
1235             av_log(s->avctx, AV_LOG_ERROR, "broken subframe\n");
1236             return AVERROR_INVALIDDATA;
1237         }
1238         ++s->channel[c].cur_subframe;
1239     }
1240     num_logged_subframes++;
1241     return 0;
1242 }
1243
1244 /**
1245  *@brief Decode one WMA frame.
1246  *@param s codec context
1247  *@return 0 if the trailer bit indicates that this is the last frame,
1248  *        1 if there are additional frames
1249  */
1250 static int decode_frame(WmallDecodeCtx *s)
1251 {
1252     GetBitContext* gb = &s->gb;
1253     int more_frames = 0;
1254     int len = 0;
1255     int i, ret;
1256
1257     s->frame.nb_samples = s->samples_per_frame;
1258     if ((ret = s->avctx->get_buffer(s->avctx, &s->frame)) < 0) {
1259         /** return an error if no frame could be decoded at all */
1260         av_log(s->avctx, AV_LOG_ERROR,
1261                "not enough space for the output samples\n");
1262         s->packet_loss = 1;
1263         return 0;
1264     }
1265     s->samples_16 = (int16_t *)s->frame.data[0];
1266     s->samples_32 = (int32_t *)s->frame.data[0];
1267
1268     /** get frame length */
1269     if (s->len_prefix)
1270         len = get_bits(gb, s->log2_frame_size);
1271
1272     /** decode tile information */
1273     if (decode_tilehdr(s)) {
1274         s->packet_loss = 1;
1275         return 0;
1276     }
1277
1278     /** read drc info */
1279     if (s->dynamic_range_compression) {
1280         s->drc_gain = get_bits(gb, 8);
1281     }
1282
1283     /** no idea what these are for, might be the number of samples
1284         that need to be skipped at the beginning or end of a stream */
1285     if (get_bits1(gb)) {
1286         int skip;
1287
1288         /** usually true for the first frame */
1289         if (get_bits1(gb)) {
1290             skip = get_bits(gb, av_log2(s->samples_per_frame * 2));
1291             dprintf(s->avctx, "start skip: %i\n", skip);
1292         }
1293
1294         /** sometimes true for the last frame */
1295         if (get_bits1(gb)) {
1296             skip = get_bits(gb, av_log2(s->samples_per_frame * 2));
1297             dprintf(s->avctx, "end skip: %i\n", skip);
1298         }
1299
1300     }
1301
1302     /** reset subframe states */
1303     s->parsed_all_subframes = 0;
1304     for (i = 0; i < s->num_channels; i++) {
1305         s->channel[i].decoded_samples = 0;
1306         s->channel[i].cur_subframe    = 0;
1307         s->channel[i].reuse_sf        = 0;
1308     }
1309
1310     /** decode all subframes */
1311     while (!s->parsed_all_subframes) {
1312         if (decode_subframe(s) < 0) {
1313             s->packet_loss = 1;
1314             return 0;
1315         }
1316     }
1317
1318     dprintf(s->avctx, "Frame done\n");
1319
1320     if (s->skip_frame) {
1321         s->skip_frame = 0;
1322     }
1323
1324     if (s->len_prefix) {
1325         if (len != (get_bits_count(gb) - s->frame_offset) + 2) {
1326             /** FIXME: not sure if this is always an error */
1327             av_log(s->avctx, AV_LOG_ERROR,
1328                    "frame[%i] would have to skip %i bits\n", s->frame_num,
1329                    len - (get_bits_count(gb) - s->frame_offset) - 1);
1330             s->packet_loss = 1;
1331             return 0;
1332         }
1333
1334         /** skip the rest of the frame data */
1335         skip_bits_long(gb, len - (get_bits_count(gb) - s->frame_offset) - 1);
1336     } else {
1337 /*
1338         while (get_bits_count(gb) < s->num_saved_bits && get_bits1(gb) == 0) {
1339             dprintf(s->avctx, "skip1\n");
1340         }
1341 */
1342     }
1343
1344     /** decode trailer bit */
1345     more_frames = get_bits1(gb);
1346     ++s->frame_num;
1347     return more_frames;
1348 }
1349
1350 /**
1351  *@brief Calculate remaining input buffer length.
1352  *@param s codec context
1353  *@param gb bitstream reader context
1354  *@return remaining size in bits
1355  */
1356 static int remaining_bits(WmallDecodeCtx *s, GetBitContext *gb)
1357 {
1358     return s->buf_bit_size - get_bits_count(gb);
1359 }
1360
1361 /**
1362  *@brief Fill the bit reservoir with a (partial) frame.
1363  *@param s codec context
1364  *@param gb bitstream reader context
1365  *@param len length of the partial frame
1366  *@param append decides wether to reset the buffer or not
1367  */
1368 static void save_bits(WmallDecodeCtx *s, GetBitContext* gb, int len,
1369                       int append)
1370 {
1371     int buflen;
1372
1373     /** when the frame data does not need to be concatenated, the input buffer
1374         is resetted and additional bits from the previous frame are copyed
1375         and skipped later so that a fast byte copy is possible */
1376
1377     if (!append) {
1378         s->frame_offset = get_bits_count(gb) & 7;
1379         s->num_saved_bits = s->frame_offset;
1380         init_put_bits(&s->pb, s->frame_data, MAX_FRAMESIZE);
1381     }
1382
1383     buflen = (s->num_saved_bits + len + 8) >> 3;
1384
1385     if (len <= 0 || buflen > MAX_FRAMESIZE) {
1386         av_log_ask_for_sample(s->avctx, "input buffer too small\n");
1387         s->packet_loss = 1;
1388         return;
1389     }
1390
1391     s->num_saved_bits += len;
1392     if (!append) {
1393         avpriv_copy_bits(&s->pb, gb->buffer + (get_bits_count(gb) >> 3),
1394                      s->num_saved_bits);
1395     } else {
1396         int align = 8 - (get_bits_count(gb) & 7);
1397         align = FFMIN(align, len);
1398         put_bits(&s->pb, align, get_bits(gb, align));
1399         len -= align;
1400         avpriv_copy_bits(&s->pb, gb->buffer + (get_bits_count(gb) >> 3), len);
1401     }
1402     skip_bits_long(gb, len);
1403
1404     {
1405         PutBitContext tmp = s->pb;
1406         flush_put_bits(&tmp);
1407     }
1408
1409     init_get_bits(&s->gb, s->frame_data, s->num_saved_bits);
1410     skip_bits(&s->gb, s->frame_offset);
1411 }
1412
1413 /**
1414  *@brief Decode a single WMA packet.
1415  *@param avctx codec context
1416  *@param data the output buffer
1417  *@param data_size number of bytes that were written to the output buffer
1418  *@param avpkt input packet
1419  *@return number of bytes that were read from the input buffer
1420  */
1421 static int decode_packet(AVCodecContext *avctx,
1422                          void *data, int *got_frame_ptr, AVPacket* avpkt)
1423 {
1424     WmallDecodeCtx *s = avctx->priv_data;
1425     GetBitContext* gb  = &s->pgb;
1426     const uint8_t* buf = avpkt->data;
1427     int buf_size       = avpkt->size;
1428     int num_bits_prev_frame;
1429     int packet_sequence_number;
1430     int seekable_frame_in_packet;
1431     int spliced_packet;
1432
1433     if (s->packet_done || s->packet_loss) {
1434         int seekable_frame_in_packet, spliced_packet;
1435         s->packet_done = 0;
1436
1437         /** sanity check for the buffer length */
1438         if (buf_size < avctx->block_align)
1439             return 0;
1440
1441         s->next_packet_start = buf_size - avctx->block_align;
1442         buf_size = avctx->block_align;
1443         s->buf_bit_size = buf_size << 3;
1444
1445         /** parse packet header */
1446         init_get_bits(gb, buf, s->buf_bit_size);
1447         packet_sequence_number = get_bits(gb, 4);
1448         seekable_frame_in_packet = get_bits1(gb);
1449         spliced_packet = get_bits1(gb);
1450
1451         /** get number of bits that need to be added to the previous frame */
1452         num_bits_prev_frame = get_bits(gb, s->log2_frame_size);
1453
1454         /** check for packet loss */
1455         if (!s->packet_loss &&
1456             ((s->packet_sequence_number + 1) & 0xF) != packet_sequence_number) {
1457             s->packet_loss = 1;
1458             av_log(avctx, AV_LOG_ERROR, "Packet loss detected! seq %x vs %x\n",
1459                    s->packet_sequence_number, packet_sequence_number);
1460         }
1461         s->packet_sequence_number = packet_sequence_number;
1462
1463         if (num_bits_prev_frame > 0) {
1464             int remaining_packet_bits = s->buf_bit_size - get_bits_count(gb);
1465             if (num_bits_prev_frame >= remaining_packet_bits) {
1466                 num_bits_prev_frame = remaining_packet_bits;
1467                 s->packet_done = 1;
1468             }
1469
1470             /** append the previous frame data to the remaining data from the
1471                 previous packet to create a full frame */
1472             save_bits(s, gb, num_bits_prev_frame, 1);
1473
1474             /** decode the cross packet frame if it is valid */
1475             if (!s->packet_loss)
1476                 decode_frame(s);
1477         } else if (s->num_saved_bits - s->frame_offset) {
1478             dprintf(avctx, "ignoring %x previously saved bits\n",
1479                     s->num_saved_bits - s->frame_offset);
1480         }
1481
1482         if (s->packet_loss) {
1483             /** reset number of saved bits so that the decoder
1484                 does not start to decode incomplete frames in the
1485                 s->len_prefix == 0 case */
1486             s->num_saved_bits = 0;
1487             s->packet_loss = 0;
1488         }
1489
1490     } else {
1491         int frame_size;
1492
1493         s->buf_bit_size = (avpkt->size - s->next_packet_start) << 3;
1494         init_get_bits(gb, avpkt->data, s->buf_bit_size);
1495         skip_bits(gb, s->packet_offset);
1496
1497         if (s->len_prefix && remaining_bits(s, gb) > s->log2_frame_size &&
1498             (frame_size = show_bits(gb, s->log2_frame_size)) &&
1499             frame_size <= remaining_bits(s, gb)) {
1500             save_bits(s, gb, frame_size, 0);
1501             s->packet_done = !decode_frame(s);
1502         } else if (!s->len_prefix
1503                    && s->num_saved_bits > get_bits_count(&s->gb)) {
1504             /** when the frames do not have a length prefix, we don't know
1505                 the compressed length of the individual frames
1506                 however, we know what part of a new packet belongs to the
1507                 previous frame
1508                 therefore we save the incoming packet first, then we append
1509                 the "previous frame" data from the next packet so that
1510                 we get a buffer that only contains full frames */
1511             s->packet_done = !decode_frame(s);
1512         } else {
1513             s->packet_done = 1;
1514         }
1515     }
1516
1517     if (s->packet_done && !s->packet_loss &&
1518         remaining_bits(s, gb) > 0) {
1519         /** save the rest of the data so that it can be decoded
1520             with the next packet */
1521         save_bits(s, gb, remaining_bits(s, gb), 0);
1522     }
1523
1524     *(AVFrame *)data = s->frame;
1525     *got_frame_ptr = 1;
1526     s->packet_offset = get_bits_count(gb) & 7;
1527
1528     return (s->packet_loss) ? AVERROR_INVALIDDATA : get_bits_count(gb) >> 3;
1529 }
1530
1531 /**
1532  *@brief Clear decoder buffers (for seeking).
1533  *@param avctx codec context
1534  */
1535 static void flush(AVCodecContext *avctx)
1536 {
1537     WmallDecodeCtx *s = avctx->priv_data;
1538     int i;
1539     /** reset output buffer as a part of it is used during the windowing of a
1540         new frame */
1541     for (i = 0; i < s->num_channels; i++)
1542         memset(s->channel[i].out, 0, s->samples_per_frame *
1543                sizeof(*s->channel[i].out));
1544     s->packet_loss = 1;
1545 }
1546
1547
1548 /**
1549  *@brief wmall decoder
1550  */
1551 AVCodec ff_wmalossless_decoder = {
1552     .name           = "wmalossless",
1553     .type           = AVMEDIA_TYPE_AUDIO,
1554     .id             = CODEC_ID_WMALOSSLESS,
1555     .priv_data_size = sizeof(WmallDecodeCtx),
1556     .init           = decode_init,
1557     .close          = decode_end,
1558     .decode         = decode_packet,
1559     .flush          = flush,
1560     .capabilities = CODEC_CAP_SUBFRAMES | CODEC_CAP_EXPERIMENTAL | CODEC_CAP_DR1,
1561     .long_name = NULL_IF_CONFIG_SMALL("Windows Media Audio 9 Lossless"),
1562 };