git.sesse.net Git - ffmpeg/blob - libavcodec/wmalosslessdec.c

   1 /*
   2  * Wmall compatible decoder
   3  * Copyright (c) 2007 Baptiste Coudurier, Benjamin Larsson, Ulion
   4  * Copyright (c) 2008 - 2011 Sascha Sommer, Benjamin Larsson
   5  * Copyright (c) 2011 Andreas Öman
   6  *
   7  * This file is part of FFmpeg.
   8  *
   9  * FFmpeg is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public
  11  * License as published by the Free Software Foundation; either
  12  * version 2.1 of the License, or (at your option) any later version.
  13  *
  14  * FFmpeg is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with FFmpeg; if not, write to the Free Software
  21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  22  */
  23
  24 /**
  25  * @file
  26  * @brief wmall decoder implementation
  27  * Wmall is an MDCT based codec comparable to wma standard or AAC.
  28  * The decoding therefore consists of the following steps:
  29  * - bitstream decoding
  30  * - reconstruction of per-channel data
  31  * - rescaling and inverse quantization
  32  * - IMDCT
  33  * - windowing and overlapp-add
  34  *
  35  * The compressed wmall bitstream is split into individual packets.
  36  * Every such packet contains one or more wma frames.
  37  * The compressed frames may have a variable length and frames may
  38  * cross packet boundaries.
  39  * Common to all wmall frames is the number of samples that are stored in
  40  * a frame.
  41  * The number of samples and a few other decode flags are stored
  42  * as extradata that has to be passed to the decoder.
  43  *
  44  * The wmall frames themselves are again split into a variable number of
  45  * subframes. Every subframe contains the data for 2^N time domain samples
  46  * where N varies between 7 and 12.
  47  *
  48  * Example wmall bitstream (in samples):
  49  *
  50  * ||   packet 0           || packet 1 || packet 2      packets
  51  * ---------------------------------------------------
  52  * || frame 0      || frame 1       || frame 2    ||    frames
  53  * ---------------------------------------------------
  54  * ||   |      |   ||   |   |   |   ||            ||    subframes of channel 0
  55  * ---------------------------------------------------
  56  * ||      |   |   ||   |   |   |   ||            ||    subframes of channel 1
  57  * ---------------------------------------------------
  58  *
  59  * The frame layouts for the individual channels of a wma frame does not need
  60  * to be the same.
  61  *
  62  * However, if the offsets and lengths of several subframes of a frame are the
  63  * same, the subframes of the channels can be grouped.
  64  * Every group may then use special coding techniques like M/S stereo coding
  65  * to improve the compression ratio. These channel transformations do not
  66  * need to be applied to a whole subframe. Instead, they can also work on
  67  * individual scale factor bands (see below).
  68  * The coefficients that carry the audio signal in the frequency domain
  69  * are transmitted as huffman-coded vectors with 4, 2 and 1 elements.
  70  * In addition to that, the encoder can switch to a runlevel coding scheme
  71  * by transmitting subframe_length / 128 zero coefficients.
  72  *
  73  * Before the audio signal can be converted to the time domain, the
  74  * coefficients have to be rescaled and inverse quantized.
  75  * A subframe is therefore split into several scale factor bands that get
  76  * scaled individually.
  77  * Scale factors are submitted for every frame but they might be shared
  78  * between the subframes of a channel. Scale factors are initially DPCM-coded.
  79  * Once scale factors are shared, the differences are transmitted as runlevel
  80  * codes.
  81  * Every subframe length and offset combination in the frame layout shares a
  82  * common quantization factor that can be adjusted for every channel by a
  83  * modifier.
  84  * After the inverse quantization, the coefficients get processed by an IMDCT.
  85  * The resulting values are then windowed with a sine window and the first half
  86  * of the values are added to the second half of the output from the previous
  87  * subframe in order to reconstruct the output samples.
  88  */
  89
  90 #include "avcodec.h"
  91 #include "internal.h"
  92 #include "get_bits.h"
  93 #include "put_bits.h"
  94 #include "dsputil.h"
  95 #include "wma.h"
  96
  97 /** current decoder limitations */
  98 #define WMALL_MAX_CHANNELS    8                             ///< max number of handled channels
  99 #define MAX_SUBFRAMES  32                                    ///< max number of subframes per channel
 100 #define MAX_BANDS      29                                    ///< max number of scale factor bands
 101 #define MAX_FRAMESIZE  32768                                 ///< maximum compressed frame size
 102
 103 #define WMALL_BLOCK_MIN_BITS  6                                           ///< log2 of min block size
 104 #define WMALL_BLOCK_MAX_BITS 12                                           ///< log2 of max block size
 105 #define WMALL_BLOCK_MAX_SIZE (1 << WMALL_BLOCK_MAX_BITS)                 ///< maximum block size
 106 #define WMALL_BLOCK_SIZES    (WMALL_BLOCK_MAX_BITS - WMALL_BLOCK_MIN_BITS + 1) ///< possible block sizes
 107
 108
 109 #define VLCBITS            9
 110 #define SCALEVLCBITS       8
 111 #define VEC4MAXDEPTH    ((HUFF_VEC4_MAXBITS+VLCBITS-1)/VLCBITS)
 112 #define VEC2MAXDEPTH    ((HUFF_VEC2_MAXBITS+VLCBITS-1)/VLCBITS)
 113 #define VEC1MAXDEPTH    ((HUFF_VEC1_MAXBITS+VLCBITS-1)/VLCBITS)
 114 #define SCALEMAXDEPTH   ((HUFF_SCALE_MAXBITS+SCALEVLCBITS-1)/SCALEVLCBITS)
 115 #define SCALERLMAXDEPTH ((HUFF_SCALE_RL_MAXBITS+VLCBITS-1)/VLCBITS)
 116
 117 static float            sin64[33];        ///< sinus table for decorrelation
 118
 119 /**
 120  * @brief frame specific decoder context for a single channel
 121  */
 122 typedef struct {
 123     int16_t  prev_block_len;                          ///< length of the previous block
 124     uint8_t  transmit_coefs;
 125     uint8_t  num_subframes;
 126     uint16_t subframe_len[MAX_SUBFRAMES];             ///< subframe length in samples
 127     uint16_t subframe_offset[MAX_SUBFRAMES];          ///< subframe positions in the current frame
 128     uint8_t  cur_subframe;                            ///< current subframe number
 129     uint16_t decoded_samples;                         ///< number of already processed samples
 130     uint8_t  grouped;                                 ///< channel is part of a group
 131     int      quant_step;                              ///< quantization step for the current subframe
 132     int8_t   reuse_sf;                                ///< share scale factors between subframes
 133     int8_t   scale_factor_step;                       ///< scaling step for the current subframe
 134     int      max_scale_factor;                        ///< maximum scale factor for the current subframe
 135     int      saved_scale_factors[2][MAX_BANDS];       ///< resampled and (previously) transmitted scale factor values
 136     int8_t   scale_factor_idx;                        ///< index for the transmitted scale factor values (used for resampling)
 137     int*     scale_factors;                           ///< pointer to the scale factor values used for decoding
 138     uint8_t  table_idx;                               ///< index in sf_offsets for the scale factor reference block
 139     float*   coeffs;                                  ///< pointer to the subframe decode buffer
 140     uint16_t num_vec_coeffs;                          ///< number of vector coded coefficients
 141     DECLARE_ALIGNED(16, float, out)[WMALL_BLOCK_MAX_SIZE + WMALL_BLOCK_MAX_SIZE / 2]; ///< output buffer
 142     int      transient_counter;                       ///< number of transient samples from the beginning of transient zone
 143 } WmallChannelCtx;
 144
 145 /**
 146  * @brief channel group for channel transformations
 147  */
 148 typedef struct {
 149     uint8_t num_channels;                                     ///< number of channels in the group
 150     int8_t  transform;                                        ///< transform on / off
 151     int8_t  transform_band[MAX_BANDS];                        ///< controls if the transform is enabled for a certain band
 152     float   decorrelation_matrix[WMALL_MAX_CHANNELS*WMALL_MAX_CHANNELS];
 153     float*  channel_data[WMALL_MAX_CHANNELS];                ///< transformation coefficients
 154 } WmallChannelGrp;
 155
 156 /**
 157  * @brief main decoder context
 158  */
 159 typedef struct WmallDecodeCtx {
 160     /* generic decoder variables */
 161     AVCodecContext*  avctx;                         ///< codec context for av_log
 162     DSPContext       dsp;                           ///< accelerated DSP functions
 163     AVFrame          frame;
 164     uint8_t          frame_data[MAX_FRAMESIZE +
 165                       FF_INPUT_BUFFER_PADDING_SIZE];///< compressed frame data
 166     PutBitContext    pb;                            ///< context for filling the frame_data buffer
 167     FFTContext       mdct_ctx[WMALL_BLOCK_SIZES];  ///< MDCT context per block size
 168     DECLARE_ALIGNED(16, float, tmp)[WMALL_BLOCK_MAX_SIZE]; ///< IMDCT output buffer
 169     float*           windows[WMALL_BLOCK_SIZES];   ///< windows for the different block sizes
 170
 171     /* frame size dependent frame information (set during initialization) */
 172     uint32_t         decode_flags;                  ///< used compression features
 173     uint8_t          len_prefix;                    ///< frame is prefixed with its length
 174     uint8_t          dynamic_range_compression;     ///< frame contains DRC data
 175     uint8_t          bits_per_sample;               ///< integer audio sample size for the unscaled IMDCT output (used to scale to [-1.0, 1.0])
 176     uint16_t         samples_per_frame;             ///< number of samples to output
 177     uint16_t         log2_frame_size;
 178     int8_t           num_channels;                  ///< number of channels in the stream (same as AVCodecContext.num_channels)
 179     int8_t           lfe_channel;                   ///< lfe channel index
 180     uint8_t          max_num_subframes;
 181     uint8_t          subframe_len_bits;             ///< number of bits used for the subframe length
 182     uint8_t          max_subframe_len_bit;          ///< flag indicating that the subframe is of maximum size when the first subframe length bit is 1
 183     uint16_t         min_samples_per_subframe;
 184     int8_t           num_sfb[WMALL_BLOCK_SIZES];   ///< scale factor bands per block size
 185     int16_t          sfb_offsets[WMALL_BLOCK_SIZES][MAX_BANDS];                    ///< scale factor band offsets (multiples of 4)
 186     int8_t           sf_offsets[WMALL_BLOCK_SIZES][WMALL_BLOCK_SIZES][MAX_BANDS]; ///< scale factor resample matrix
 187     int16_t          subwoofer_cutoffs[WMALL_BLOCK_SIZES]; ///< subwoofer cutoff values
 188
 189     /* packet decode state */
 190     GetBitContext    pgb;                           ///< bitstream reader context for the packet
 191     int              next_packet_start;             ///< start offset of the next wma packet in the demuxer packet
 192     uint8_t          packet_offset;                 ///< frame offset in the packet
 193     uint8_t          packet_sequence_number;        ///< current packet number
 194     int              num_saved_bits;                ///< saved number of bits
 195     int              frame_offset;                  ///< frame offset in the bit reservoir
 196     int              subframe_offset;               ///< subframe offset in the bit reservoir
 197     uint8_t          packet_loss;                   ///< set in case of bitstream error
 198     uint8_t          packet_done;                   ///< set when a packet is fully decoded
 199
 200     /* frame decode state */
 201     uint32_t         frame_num;                     ///< current frame number (not used for decoding)
 202     GetBitContext    gb;                            ///< bitstream reader context
 203     int              buf_bit_size;                  ///< buffer size in bits
 204     int16_t*         samples_16;                    ///< current samplebuffer pointer (16-bit)
 205     int16_t*         samples_16_end;                ///< maximum samplebuffer pointer
 206     int             *samples_32;                    ///< current samplebuffer pointer (24-bit)
 207     int             *samples_32_end;                ///< maximum samplebuffer pointer
 208     uint8_t          drc_gain;                      ///< gain for the DRC tool
 209     int8_t           skip_frame;                    ///< skip output step
 210     int8_t           parsed_all_subframes;          ///< all subframes decoded?
 211
 212     /* subframe/block decode state */
 213     int16_t          subframe_len;                  ///< current subframe length
 214     int8_t           channels_for_cur_subframe;     ///< number of channels that contain the subframe
 215     int8_t           channel_indexes_for_cur_subframe[WMALL_MAX_CHANNELS];
 216     int8_t           num_bands;                     ///< number of scale factor bands
 217     int8_t           transmit_num_vec_coeffs;       ///< number of vector coded coefficients is part of the bitstream
 218     int16_t*         cur_sfb_offsets;               ///< sfb offsets for the current block
 219     uint8_t          table_idx;                     ///< index for the num_sfb, sfb_offsets, sf_offsets and subwoofer_cutoffs tables
 220     int8_t           esc_len;                       ///< length of escaped coefficients
 221
 222     uint8_t          num_chgroups;                  ///< number of channel groups
 223     WmallChannelGrp chgroup[WMALL_MAX_CHANNELS];    ///< channel group information
 224
 225     WmallChannelCtx channel[WMALL_MAX_CHANNELS];    ///< per channel data
 226
 227     // WMA lossless
 228
 229     uint8_t do_arith_coding;
 230     uint8_t do_ac_filter;
 231     uint8_t do_inter_ch_decorr;
 232     uint8_t do_mclms;
 233     uint8_t do_lpc;
 234
 235     int8_t acfilter_order;
 236     int8_t acfilter_scaling;
 237     int64_t acfilter_coeffs[16];
 238     int acfilter_prevvalues[2][16];
 239
 240     int8_t mclms_order;
 241     int8_t mclms_scaling;
 242     int16_t mclms_coeffs[128];
 243     int16_t mclms_coeffs_cur[4];
 244     int mclms_prevvalues[64];   // FIXME: should be 32-bit / 16-bit depending on bit-depth
 245     int16_t mclms_updates[64];
 246     int mclms_recent;
 247
 248     int movave_scaling;
 249     int quant_stepsize;
 250
 251     struct {
 252     int order;
 253     int scaling;
 254     int coefsend;
 255     int bitsend;
 256     int16_t coefs[256];
 257     int lms_prevvalues[512];    // FIXME: see above
 258     int16_t lms_updates[512];   // and here too
 259     int recent;
 260     } cdlms[2][9];              /* XXX: Here, 2 is the max. no. of channels allowed,
 261                                         9 is the maximum no. of filters per channel.
 262                                         Question is, why 2 if WMALL_MAX_CHANNELS == 8 */
 263
 264
 265     int cdlms_ttl[2];
 266
 267     int bV3RTM;
 268
 269     int is_channel_coded[2];    // XXX: same question as above applies here too (and below)
 270     int update_speed[2];
 271
 272     int transient[2];
 273     int transient_pos[2];
 274     int seekable_tile;
 275
 276     int ave_sum[2];
 277
 278     int channel_residues[2][2048];
 279
 280
 281     int lpc_coefs[2][40];
 282     int lpc_order;
 283     int lpc_scaling;
 284     int lpc_intbits;
 285
 286     int channel_coeffs[2][2048]; // FIXME: should be 32-bit / 16-bit depending on bit-depth
 287
 288 } WmallDecodeCtx;
 289
 290
 291 #undef dprintf
 292 #define dprintf(pctx, ...) av_log(pctx, AV_LOG_DEBUG, __VA_ARGS__)
 293
 294
 295 static int num_logged_tiles = 0;
 296 static int num_logged_subframes = 0;
 297 static int num_lms_update_call = 0;
 298
 299 /**
 300  *@brief helper function to print the most important members of the context
 301  *@param s context
 302  */
 303 static void av_cold dump_context(WmallDecodeCtx *s)
 304 {
 305 #define PRINT(a, b)     av_log(s->avctx, AV_LOG_DEBUG, " %s = %d\n", a, b);
 306 #define PRINT_HEX(a, b) av_log(s->avctx, AV_LOG_DEBUG, " %s = %x\n", a, b);
 307
 308     PRINT("ed sample bit depth", s->bits_per_sample);
 309     PRINT_HEX("ed decode flags", s->decode_flags);
 310     PRINT("samples per frame",   s->samples_per_frame);
 311     PRINT("log2 frame size",     s->log2_frame_size);
 312     PRINT("max num subframes",   s->max_num_subframes);
 313     PRINT("len prefix",          s->len_prefix);
 314     PRINT("num channels",        s->num_channels);
 315 }
 316
 317 static void dump_int_buffer(uint8_t *buffer, int size, int length, int delimiter)
 318 {
 319     int i;
 320
 321     for (i=0 ; i<length ; i++) {
 322         if (!(i%delimiter))
 323             av_log(0, 0, "\n[%d] ", i);
 324         av_log(0, 0, "%d, ", *(int16_t *)(buffer + i * size));
 325     }
 326     av_log(0, 0, "\n");
 327 }
 328
 329 /**
 330  *@brief Uninitialize the decoder and free all resources.
 331  *@param avctx codec context
 332  *@return 0 on success, < 0 otherwise
 333  */
 334 static av_cold int decode_end(AVCodecContext *avctx)
 335 {
 336     WmallDecodeCtx *s = avctx->priv_data;
 337     int i;
 338
 339     for (i = 0; i < WMALL_BLOCK_SIZES; i++)
 340         ff_mdct_end(&s->mdct_ctx[i]);
 341
 342     return 0;
 343 }
 344
 345 /**
 346  *@brief Initialize the decoder.
 347  *@param avctx codec context
 348  *@return 0 on success, -1 otherwise
 349  */
 350 static av_cold int decode_init(AVCodecContext *avctx)
 351 {
 352     WmallDecodeCtx *s = avctx->priv_data;
 353     uint8_t *edata_ptr = avctx->extradata;
 354     unsigned int channel_mask;
 355     int i;
 356     int log2_max_num_subframes;
 357     int num_possible_block_sizes;
 358
 359     s->avctx = avctx;
 360     dsputil_init(&s->dsp, avctx);
 361     init_put_bits(&s->pb, s->frame_data, MAX_FRAMESIZE);
 362
 363     if (avctx->extradata_size >= 18) {
 364         s->decode_flags    = AV_RL16(edata_ptr+14);
 365         channel_mask       = AV_RL32(edata_ptr+2);
 366         s->bits_per_sample = AV_RL16(edata_ptr);
 367         if (s->bits_per_sample == 16)
 368             avctx->sample_fmt = AV_SAMPLE_FMT_S16;
 369         else if (s->bits_per_sample == 24)
 370             avctx->sample_fmt = AV_SAMPLE_FMT_S32;
 371         else {
 372             av_log(avctx, AV_LOG_ERROR, "Unknown bit-depth: %d\n",
 373                    s->bits_per_sample);
 374             return AVERROR_INVALIDDATA;
 375         }
 376         /** dump the extradata */
 377         for (i = 0; i < avctx->extradata_size; i++)
 378             dprintf(avctx, "[%x] ", avctx->extradata[i]);
 379         dprintf(avctx, "\n");
 380
 381     } else {
 382         av_log_ask_for_sample(avctx, "Unknown extradata size\n");
 383         return AVERROR_INVALIDDATA;
 384     }
 385
 386     /** generic init */
 387     s->log2_frame_size = av_log2(avctx->block_align) + 4;
 388
 389     /** frame info */
 390     s->skip_frame  = 1; /* skip first frame */
 391     s->packet_loss = 1;
 392     s->len_prefix  = (s->decode_flags & 0x40);
 393
 394     /** get frame len */
 395     s->samples_per_frame = 1 << ff_wma_get_frame_len_bits(avctx->sample_rate,
 396                                                           3, s->decode_flags);
 397
 398     /** init previous block len */
 399     for (i = 0; i < avctx->channels; i++)
 400         s->channel[i].prev_block_len = s->samples_per_frame;
 401
 402     /** subframe info */
 403     log2_max_num_subframes  = ((s->decode_flags & 0x38) >> 3);
 404     s->max_num_subframes    = 1 << log2_max_num_subframes;
 405     s->max_subframe_len_bit = 0;
 406     s->subframe_len_bits    = av_log2(log2_max_num_subframes) + 1;
 407
 408     num_possible_block_sizes     = log2_max_num_subframes + 1;
 409     s->min_samples_per_subframe  = s->samples_per_frame / s->max_num_subframes;
 410     s->dynamic_range_compression = (s->decode_flags & 0x80);
 411
 412     s->bV3RTM = s->decode_flags & 0x100;
 413
 414     if (s->max_num_subframes > MAX_SUBFRAMES) {
 415         av_log(avctx, AV_LOG_ERROR, "invalid number of subframes %i\n",
 416                s->max_num_subframes);
 417         return AVERROR_INVALIDDATA;
 418     }
 419
 420     s->num_channels = avctx->channels;
 421
 422     /** extract lfe channel position */
 423     s->lfe_channel = -1;
 424
 425     if (channel_mask & 8) {
 426         unsigned int mask;
 427         for (mask = 1; mask < 16; mask <<= 1) {
 428             if (channel_mask & mask)
 429                 ++s->lfe_channel;
 430         }
 431     }
 432
 433     if (s->num_channels < 0) {
 434         av_log(avctx, AV_LOG_ERROR, "invalid number of channels %d\n", s->num_channels);
 435         return AVERROR_INVALIDDATA;
 436     } else if (s->num_channels > WMALL_MAX_CHANNELS) {
 437         av_log_ask_for_sample(avctx, "unsupported number of channels\n");
 438         return AVERROR_PATCHWELCOME;
 439     }
 440
 441     avcodec_get_frame_defaults(&s->frame);
 442     avctx->coded_frame = &s->frame;
 443
 444     avctx->channel_layout = channel_mask;
 445     return 0;
 446 }
 447
 448 /**
 449  *@brief Decode the subframe length.
 450  *@param s context
 451  *@param offset sample offset in the frame
 452  *@return decoded subframe length on success, < 0 in case of an error
 453  */
 454 static int decode_subframe_length(WmallDecodeCtx *s, int offset)
 455 {
 456     int frame_len_ratio;
 457     int subframe_len, len;
 458
 459     /** no need to read from the bitstream when only one length is possible */
 460     if (offset == s->samples_per_frame - s->min_samples_per_subframe)
 461         return s->min_samples_per_subframe;
 462
 463     len = av_log2(s->max_num_subframes - 1) + 1;
 464     frame_len_ratio = get_bits(&s->gb, len);
 465
 466     subframe_len = s->min_samples_per_subframe * (frame_len_ratio + 1);
 467
 468     /** sanity check the length */
 469     if (subframe_len < s->min_samples_per_subframe ||
 470         subframe_len > s->samples_per_frame) {
 471         av_log(s->avctx, AV_LOG_ERROR, "broken frame: subframe_len %i\n",
 472                subframe_len);
 473         return AVERROR_INVALIDDATA;
 474     }
 475     return subframe_len;
 476 }
 477
 478 /**
 479  *@brief Decode how the data in the frame is split into subframes.
 480  *       Every WMA frame contains the encoded data for a fixed number of
 481  *       samples per channel. The data for every channel might be split
 482  *       into several subframes. This function will reconstruct the list of
 483  *       subframes for every channel.
 484  *
 485  *       If the subframes are not evenly split, the algorithm estimates the
 486  *       channels with the lowest number of total samples.
 487  *       Afterwards, for each of these channels a bit is read from the
 488  *       bitstream that indicates if the channel contains a subframe with the
 489  *       next subframe size that is going to be read from the bitstream or not.
 490  *       If a channel contains such a subframe, the subframe size gets added to
 491  *       the channel's subframe list.
 492  *       The algorithm repeats these steps until the frame is properly divided
 493  *       between the individual channels.
 494  *
 495  *@param s context
 496  *@return 0 on success, < 0 in case of an error
 497  */
 498 static int decode_tilehdr(WmallDecodeCtx *s)
 499 {
 500     uint16_t num_samples[WMALL_MAX_CHANNELS];        /**< sum of samples for all currently known subframes of a channel */
 501     uint8_t  contains_subframe[WMALL_MAX_CHANNELS];  /**< flag indicating if a channel contains the current subframe */
 502     int channels_for_cur_subframe = s->num_channels;  /**< number of channels that contain the current subframe */
 503     int fixed_channel_layout = 0;                     /**< flag indicating that all channels use the same subfra2me offsets and sizes */
 504     int min_channel_len = 0;                          /**< smallest sum of samples (channels with this length will be processed first) */
 505     int c;
 506
 507     /* Should never consume more than 3073 bits (256 iterations for the
 508      * while loop when always the minimum amount of 128 samples is substracted
 509      * from missing samples in the 8 channel case).
 510      * 1 + BLOCK_MAX_SIZE * MAX_CHANNELS / BLOCK_MIN_SIZE * (MAX_CHANNELS  + 4)
 511      */
 512
 513     /** reset tiling information */
 514     for (c = 0; c < s->num_channels; c++)
 515         s->channel[c].num_subframes = 0;
 516
 517     memset(num_samples, 0, sizeof(num_samples));
 518
 519     if (s->max_num_subframes == 1 || get_bits1(&s->gb))
 520         fixed_channel_layout = 1;
 521
 522     /** loop until the frame data is split between the subframes */
 523     do {
 524         int subframe_len;
 525
 526         /** check which channels contain the subframe */
 527         for (c = 0; c < s->num_channels; c++) {
 528             if (num_samples[c] == min_channel_len) {
 529                 if (fixed_channel_layout || channels_for_cur_subframe == 1 ||
 530                    (min_channel_len == s->samples_per_frame - s->min_samples_per_subframe)) {
 531                     contains_subframe[c] = 1;
 532                 } else {
 533                     contains_subframe[c] = get_bits1(&s->gb);
 534                 }
 535             } else
 536                 contains_subframe[c] = 0;
 537         }
 538
 539         /** get subframe length, subframe_len == 0 is not allowed */
 540         if ((subframe_len = decode_subframe_length(s, min_channel_len)) <= 0)
 541             return AVERROR_INVALIDDATA;
 542         /** add subframes to the individual channels and find new min_channel_len */
 543         min_channel_len += subframe_len;
 544         for (c = 0; c < s->num_channels; c++) {
 545             WmallChannelCtx* chan = &s->channel[c];
 546
 547             if (contains_subframe[c]) {
 548                 if (chan->num_subframes >= MAX_SUBFRAMES) {
 549                     av_log(s->avctx, AV_LOG_ERROR,
 550                            "broken frame: num subframes > 31\n");
 551                     return AVERROR_INVALIDDATA;
 552                 }
 553                 chan->subframe_len[chan->num_subframes] = subframe_len;
 554                 num_samples[c] += subframe_len;
 555                 ++chan->num_subframes;
 556                 if (num_samples[c] > s->samples_per_frame) {
 557                     av_log(s->avctx, AV_LOG_ERROR, "broken frame: "
 558                            "channel len(%d) > samples_per_frame(%d)\n",
 559                            num_samples[c], s->samples_per_frame);
 560                     return AVERROR_INVALIDDATA;
 561                 }
 562             } else if (num_samples[c] <= min_channel_len) {
 563                 if (num_samples[c] < min_channel_len) {
 564                     channels_for_cur_subframe = 0;
 565                     min_channel_len = num_samples[c];
 566                 }
 567                 ++channels_for_cur_subframe;
 568             }
 569         }
 570     } while (min_channel_len < s->samples_per_frame);
 571
 572     for (c = 0; c < s->num_channels; c++) {
 573         int i;
 574         int offset = 0;
 575         for (i = 0; i < s->channel[c].num_subframes; i++) {
 576             s->channel[c].subframe_offset[i] = offset;
 577             offset += s->channel[c].subframe_len[i];
 578         }
 579     }
 580
 581     return 0;
 582 }
 583
 584
 585 static int my_log2(unsigned int i)
 586 {
 587     unsigned int iLog2 = 0;
 588     while ((i >> iLog2) > 1)
 589         iLog2++;
 590     return iLog2;
 591 }
 592
 593
 594 /**
 595  *
 596  */
 597 static void decode_ac_filter(WmallDecodeCtx *s)
 598 {
 599     int i;
 600     s->acfilter_order = get_bits(&s->gb, 4) + 1;
 601     s->acfilter_scaling = get_bits(&s->gb, 4);
 602
 603     for(i = 0; i < s->acfilter_order; i++) {
 604         s->acfilter_coeffs[i] = get_bits(&s->gb, s->acfilter_scaling) + 1;
 605     }
 606 }
 607
 608
 609 /**
 610  *
 611  */
 612 static void decode_mclms(WmallDecodeCtx *s)
 613 {
 614     s->mclms_order = (get_bits(&s->gb, 4) + 1) * 2;
 615     s->mclms_scaling = get_bits(&s->gb, 4);
 616     if(get_bits1(&s->gb)) {
 617         // mclms_send_coef
 618         int i;
 619         int send_coef_bits;
 620         int cbits = av_log2(s->mclms_scaling + 1);
 621         assert(cbits == my_log2(s->mclms_scaling + 1));
 622         if(1 << cbits < s->mclms_scaling + 1)
 623             cbits++;
 624
 625         send_coef_bits = (cbits ? get_bits(&s->gb, cbits) : 0) + 2;
 626
 627         for(i = 0; i < s->mclms_order * s->num_channels * s->num_channels; i++) {
 628             s->mclms_coeffs[i] = get_bits(&s->gb, send_coef_bits);
 629         }
 630
 631         for(i = 0; i < s->num_channels; i++) {
 632             int c;
 633             for(c = 0; c < i; c++) {
 634                 s->mclms_coeffs_cur[i * s->num_channels + c] = get_bits(&s->gb, send_coef_bits);
 635             }
 636         }
 637     }
 638 }
 639
 640
 641 /**
 642  *
 643  */
 644 static void decode_cdlms(WmallDecodeCtx *s)
 645 {
 646     int c, i;
 647     int cdlms_send_coef = get_bits1(&s->gb);
 648
 649     for(c = 0; c < s->num_channels; c++) {
 650         s->cdlms_ttl[c] = get_bits(&s->gb, 3) + 1;
 651         for(i = 0; i < s->cdlms_ttl[c]; i++) {
 652             s->cdlms[c][i].order = (get_bits(&s->gb, 7) + 1) * 8;
 653         }
 654
 655         for(i = 0; i < s->cdlms_ttl[c]; i++) {
 656             s->cdlms[c][i].scaling = get_bits(&s->gb, 4);
 657         }
 658
 659         if(cdlms_send_coef) {
 660             for(i = 0; i < s->cdlms_ttl[c]; i++) {
 661                 int cbits, shift_l, shift_r, j;
 662                 cbits = av_log2(s->cdlms[c][i].order);
 663                 if(1 << cbits < s->cdlms[c][i].order)
 664                     cbits++;
 665                 s->cdlms[c][i].coefsend = get_bits(&s->gb, cbits) + 1;
 666
 667                 cbits = av_log2(s->cdlms[c][i].scaling + 1);
 668                 if(1 << cbits < s->cdlms[c][i].scaling + 1)
 669                     cbits++;
 670
 671                 s->cdlms[c][i].bitsend = get_bits(&s->gb, cbits) + 2;
 672                 shift_l = 32 - s->cdlms[c][i].bitsend;
 673                 shift_r = 32 - 2 - s->cdlms[c][i].scaling;
 674                 for(j = 0; j < s->cdlms[c][i].coefsend; j++) {
 675                     s->cdlms[c][i].coefs[j] =
 676                         (get_bits(&s->gb, s->cdlms[c][i].bitsend) << shift_l) >> shift_r;
 677                 }
 678             }
 679         }
 680     }
 681 }
 682
 683 /**
 684  *
 685  */
 686 static int decode_channel_residues(WmallDecodeCtx *s, int ch, int tile_size)
 687 {
 688     int i = 0;
 689     unsigned int ave_mean;
 690     s->transient[ch] = get_bits1(&s->gb);
 691     if(s->transient[ch]) {
 692             s->transient_pos[ch] = get_bits(&s->gb, av_log2(tile_size));
 693         if (s->transient_pos[ch])
 694                 s->transient[ch] = 0;
 695             s->channel[ch].transient_counter =
 696                 FFMAX(s->channel[ch].transient_counter, s->samples_per_frame / 2);
 697         } else if (s->channel[ch].transient_counter)
 698             s->transient[ch] = 1;
 699
 700     if(s->seekable_tile) {
 701         ave_mean = get_bits(&s->gb, s->bits_per_sample);
 702         s->ave_sum[ch] = ave_mean << (s->movave_scaling + 1);
 703 //        s->ave_sum[ch] *= 2;
 704     }
 705
 706     if(s->seekable_tile) {
 707         if(s->do_inter_ch_decorr)
 708             s->channel_residues[ch][0] = get_sbits(&s->gb, s->bits_per_sample + 1);
 709         else
 710             s->channel_residues[ch][0] = get_sbits(&s->gb, s->bits_per_sample);
 711         i++;
 712     }
 713     //av_log(0, 0, "%8d: ", num_logged_tiles++);
 714     for(; i < tile_size; i++) {
 715         int quo = 0, rem, rem_bits, residue;
 716         while(get_bits1(&s->gb))
 717             quo++;
 718         if(quo >= 32)
 719             quo += get_bits_long(&s->gb, get_bits(&s->gb, 5) + 1);
 720
 721     ave_mean = (s->ave_sum[ch] + (1 << s->movave_scaling)) >> (s->movave_scaling + 1);
 722     if (ave_mean <= 1)
 723         residue = quo;
 724     else
 725     {
 726         rem_bits = av_ceil_log2(ave_mean);
 727         rem = rem_bits ? get_bits(&s->gb, rem_bits) : 0;
 728         residue = (quo << rem_bits) + rem;
 729     }
 730
 731         s->ave_sum[ch] = residue + s->ave_sum[ch] - (s->ave_sum[ch] >> s->movave_scaling);
 732
 733         if(residue & 1)
 734             residue = -(residue >> 1) - 1;
 735         else
 736             residue = residue >> 1;
 737         s->channel_residues[ch][i] = residue;
 738     }
 739     //dump_int_buffer(s->channel_residues[ch], 4, tile_size, 16);
 740
 741     return 0;
 742
 743 }
 744
 745
 746 /**
 747  *
 748  */
 749 static void
 750 decode_lpc(WmallDecodeCtx *s)
 751 {
 752     int ch, i, cbits;
 753     s->lpc_order = get_bits(&s->gb, 5) + 1;
 754     s->lpc_scaling = get_bits(&s->gb, 4);
 755     s->lpc_intbits = get_bits(&s->gb, 3) + 1;
 756     cbits = s->lpc_scaling + s->lpc_intbits;
 757     for(ch = 0; ch < s->num_channels; ch++) {
 758         for(i = 0; i < s->lpc_order; i++) {
 759             s->lpc_coefs[ch][i] = get_sbits(&s->gb, cbits);
 760         }
 761     }
 762 }
 763
 764
 765 static void clear_codec_buffers(WmallDecodeCtx *s)
 766 {
 767     int ich, ilms;
 768
 769     memset(s->acfilter_coeffs    , 0, 16 * sizeof(int));
 770     memset(s->acfilter_prevvalues, 0, 16 * 2 * sizeof(int)); // may be wrong
 771     memset(s->lpc_coefs          , 0, 40 * 2 * sizeof(int));
 772
 773     memset(s->mclms_coeffs    , 0, 128 * sizeof(int16_t));
 774     memset(s->mclms_coeffs_cur, 0,   4 * sizeof(int16_t));
 775     memset(s->mclms_prevvalues, 0,  64 * sizeof(int));
 776     memset(s->mclms_updates   , 0,  64 * sizeof(int16_t));
 777
 778     for (ich = 0; ich < s->num_channels; ich++) {
 779         for (ilms = 0; ilms < s->cdlms_ttl[ich]; ilms++) {
 780             memset(s->cdlms[ich][ilms].coefs         , 0, 256 * sizeof(int16_t));
 781             memset(s->cdlms[ich][ilms].lms_prevvalues, 0, 512 * sizeof(int));
 782             memset(s->cdlms[ich][ilms].lms_updates   , 0, 512 * sizeof(int16_t));
 783         }
 784         s->ave_sum[ich] = 0;
 785     }
 786 }
 787
 788 /**
 789  *@brief Resets filter parameters and transient area at new seekable tile
 790  */
 791 static void reset_codec(WmallDecodeCtx *s)
 792 {
 793     int ich, ilms;
 794     s->mclms_recent = s->mclms_order * s->num_channels;
 795     for (ich = 0; ich < s->num_channels; ich++) {
 796         for (ilms = 0; ilms < s->cdlms_ttl[ich]; ilms++)
 797             s->cdlms[ich][ilms].recent = s->cdlms[ich][ilms].order;
 798         /* first sample of a seekable subframe is considered as the starting of
 799            a transient area which is samples_per_frame samples long */
 800         s->channel[ich].transient_counter = s->samples_per_frame;
 801         s->transient[ich] = 1;
 802         s->transient_pos[ich] = 0;
 803     }
 804 }
 805
 806
 807
 808 static void mclms_update(WmallDecodeCtx *s, int icoef, int *pred)
 809 {
 810     int i, j, ich;
 811     int pred_error;
 812     int order = s->mclms_order;
 813     int num_channels = s->num_channels;
 814     int range = 1 << (s->bits_per_sample - 1);
 815     //int bps = s->bits_per_sample > 16 ? 4 : 2; // bytes per sample
 816
 817     for (ich = 0; ich < num_channels; ich++) {
 818         pred_error = s->channel_residues[ich][icoef] - pred[ich];
 819         if (pred_error > 0) {
 820             for (i = 0; i < order * num_channels; i++)
 821                 s->mclms_coeffs[i + ich * order * num_channels] +=
 822                     s->mclms_updates[s->mclms_recent + i];
 823             for (j = 0; j < ich; j++) {
 824                 if (s->channel_residues[j][icoef] > 0)
 825                     s->mclms_coeffs_cur[ich * num_channels + j] += 1;
 826                 else if (s->channel_residues[j][icoef] < 0)
 827                     s->mclms_coeffs_cur[ich * num_channels + j] -= 1;
 828             }
 829         } else if (pred_error < 0) {
 830             for (i = 0; i < order * num_channels; i++)
 831                 s->mclms_coeffs[i + ich * order * num_channels] -=
 832                     s->mclms_updates[s->mclms_recent + i];
 833             for (j = 0; j < ich; j++) {
 834                 if (s->channel_residues[j][icoef] > 0)
 835                     s->mclms_coeffs_cur[ich * num_channels + j] -= 1;
 836                 else if (s->channel_residues[j][icoef] < 0)
 837                     s->mclms_coeffs_cur[ich * num_channels + j] += 1;
 838             }
 839         }
 840     }
 841
 842     for (ich = num_channels - 1; ich >= 0; ich--) {
 843         s->mclms_recent--;
 844         s->mclms_prevvalues[s->mclms_recent] = s->channel_residues[ich][icoef];
 845         if (s->channel_residues[ich][icoef] > range - 1)
 846             s->mclms_prevvalues[s->mclms_recent] = range - 1;
 847         else if (s->channel_residues[ich][icoef] < -range)
 848             s->mclms_prevvalues[s->mclms_recent] = -range;
 849
 850         s->mclms_updates[s->mclms_recent] = 0;
 851         if (s->channel_residues[ich][icoef] > 0)
 852             s->mclms_updates[s->mclms_recent] = 1;
 853         else if (s->channel_residues[ich][icoef] < 0)
 854             s->mclms_updates[s->mclms_recent] = -1;
 855     }
 856
 857     if (s->mclms_recent == 0) {
 858         memcpy(&s->mclms_prevvalues[order * num_channels],
 859                s->mclms_prevvalues,
 860                4 * order * num_channels);
 861         memcpy(&s->mclms_updates[order * num_channels],
 862                s->mclms_updates,
 863                2 * order * num_channels);
 864         s->mclms_recent = num_channels * order;
 865     }
 866 }
 867
 868 static void mclms_predict(WmallDecodeCtx *s, int icoef, int *pred)
 869 {
 870     int ich, i;
 871     int order = s->mclms_order;
 872     int num_channels = s->num_channels;
 873
 874     for (ich = 0; ich < num_channels; ich++) {
 875         if (!s->is_channel_coded[ich])
 876             continue;
 877         pred[ich] = 0;
 878         for (i = 0; i < order * num_channels; i++)
 879             pred[ich] += s->mclms_prevvalues[i + s->mclms_recent] *
 880                          s->mclms_coeffs[i + order * num_channels * ich];
 881         for (i = 0; i < ich; i++)
 882             pred[ich] += s->channel_residues[i][icoef] *
 883                          s->mclms_coeffs_cur[i + num_channels * ich];
 884         pred[ich] += 1 << s->mclms_scaling - 1;
 885         pred[ich] >>= s->mclms_scaling;
 886         s->channel_residues[ich][icoef] += pred[ich];
 887     }
 888 }
 889
 890 static void revert_mclms(WmallDecodeCtx *s, int tile_size)
 891 {
 892     int icoef, pred[WMALL_MAX_CHANNELS] = {0};
 893     for (icoef = 0; icoef < tile_size; icoef++) {
 894         mclms_predict(s, icoef, pred);
 895         mclms_update(s, icoef, pred);
 896     }
 897 }
 898
 899 static int lms_predict(WmallDecodeCtx *s, int ich, int ilms)
 900 {
 901     int pred = 0;
 902     int icoef;
 903     int recent = s->cdlms[ich][ilms].recent;
 904
 905     for (icoef = 0; icoef < s->cdlms[ich][ilms].order; icoef++)
 906         pred += s->cdlms[ich][ilms].coefs[icoef] *
 907                     s->cdlms[ich][ilms].lms_prevvalues[icoef + recent];
 908
 909     //pred += (1 << (s->cdlms[ich][ilms].scaling - 1));
 910     /* XXX: Table 29 has:
 911             iPred >= cdlms[iCh][ilms].scaling;
 912        seems to me like a missing > */
 913     //pred >>= s->cdlms[ich][ilms].scaling;
 914     return pred;
 915 }
 916
 917 static void lms_update(WmallDecodeCtx *s, int ich, int ilms, int input, int residue)
 918 {
 919     int icoef;
 920     int recent = s->cdlms[ich][ilms].recent;
 921     int range = 1 << s->bits_per_sample - 1;
 922     //int bps = s->bits_per_sample > 16 ? 4 : 2; // bytes per sample
 923
 924     if (residue < 0) {
 925         for (icoef = 0; icoef < s->cdlms[ich][ilms].order; icoef++)
 926             s->cdlms[ich][ilms].coefs[icoef] -=
 927                 s->cdlms[ich][ilms].lms_updates[icoef + recent];
 928     } else if (residue > 0) {
 929         for (icoef = 0; icoef < s->cdlms[ich][ilms].order; icoef++)
 930             s->cdlms[ich][ilms].coefs[icoef] +=
 931                 s->cdlms[ich][ilms].lms_updates[icoef + recent];    /* spec mistakenly
 932                                                                     dropped the recent */
 933     }
 934
 935     if (recent)
 936         recent--;
 937     else {
 938         /* XXX: This memcpy()s will probably fail if a fixed 32-bit buffer is used.
 939                 follow kshishkov's suggestion of using a union. */
 940         memcpy(&s->cdlms[ich][ilms].lms_prevvalues[s->cdlms[ich][ilms].order],
 941                s->cdlms[ich][ilms].lms_prevvalues,
 942                4 * s->cdlms[ich][ilms].order);
 943         memcpy(&s->cdlms[ich][ilms].lms_updates[s->cdlms[ich][ilms].order],
 944                s->cdlms[ich][ilms].lms_updates,
 945                2 * s->cdlms[ich][ilms].order);
 946         recent = s->cdlms[ich][ilms].order - 1;
 947     }
 948
 949     s->cdlms[ich][ilms].lms_prevvalues[recent] = av_clip(input, -range, range - 1);
 950     if (!input)
 951         s->cdlms[ich][ilms].lms_updates[recent] = 0;
 952     else if (input < 0)
 953         s->cdlms[ich][ilms].lms_updates[recent] = -s->update_speed[ich];
 954     else
 955         s->cdlms[ich][ilms].lms_updates[recent] = s->update_speed[ich];
 956
 957     /* XXX: spec says:
 958     cdlms[iCh][ilms].updates[iRecent + cdlms[iCh][ilms].order >> 4] >>= 2;
 959     lms_updates[iCh][ilms][iRecent + cdlms[iCh][ilms].order >> 3] >>= 1;
 960
 961         Questions is - are cdlms[iCh][ilms].updates[] and lms_updates[][][] two
 962         seperate buffers? Here I've assumed that the two are same which makes
 963         more sense to me.
 964     */
 965     s->cdlms[ich][ilms].lms_updates[recent + (s->cdlms[ich][ilms].order >> 4)] >>= 2;
 966     s->cdlms[ich][ilms].lms_updates[recent + (s->cdlms[ich][ilms].order >> 3)] >>= 1;
 967     s->cdlms[ich][ilms].recent = recent;
 968 }
 969
 970 static void use_high_update_speed(WmallDecodeCtx *s, int ich)
 971 {
 972     int ilms, recent, icoef;
 973     for (ilms = s->cdlms_ttl[ich] - 1; ilms >= 0; ilms--) {
 974         recent = s->cdlms[ich][ilms].recent;
 975         if (s->update_speed[ich] == 16)
 976             continue;
 977         if (s->bV3RTM) {
 978             for (icoef = 0; icoef < s->cdlms[ich][ilms].order; icoef++)
 979                 s->cdlms[ich][ilms].lms_updates[icoef + recent] *= 2;
 980         } else {
 981             for (icoef = 0; icoef < s->cdlms[ich][ilms].order; icoef++)
 982                 s->cdlms[ich][ilms].lms_updates[icoef] *= 2;
 983         }
 984     }
 985     s->update_speed[ich] = 16;
 986 }
 987
 988 static void use_normal_update_speed(WmallDecodeCtx *s, int ich)
 989 {
 990     int ilms, recent, icoef;
 991     for (ilms = s->cdlms_ttl[ich] - 1; ilms >= 0; ilms--) {
 992         recent = s->cdlms[ich][ilms].recent;
 993         if (s->update_speed[ich] == 8)
 994             continue;
 995         if (s->bV3RTM) {
 996             for (icoef = 0; icoef < s->cdlms[ich][ilms].order; icoef++)
 997                 s->cdlms[ich][ilms].lms_updates[icoef + recent] /= 2;
 998         } else {
 999             for (icoef = 0; icoef < s->cdlms[ich][ilms].order; icoef++)
1000                 s->cdlms[ich][ilms].lms_updates[icoef] /= 2;
1001         }
1002     }
1003     s->update_speed[ich] = 8;
1004 }
1005
1006 static void revert_cdlms(WmallDecodeCtx *s, int ch, int coef_begin, int coef_end)
1007 {
1008     int icoef;
1009     int pred;
1010     int ilms, num_lms;
1011     int residue, input;
1012
1013     num_lms = s->cdlms_ttl[ch];
1014     for (ilms = num_lms - 1; ilms >= 0; ilms--) {
1015         //s->cdlms[ch][ilms].recent = s->cdlms[ch][ilms].order;
1016         for (icoef = coef_begin; icoef < coef_end; icoef++) {
1017             pred = 1 << (s->cdlms[ch][ilms].scaling - 1);
1018             residue = s->channel_residues[ch][icoef];
1019             pred += lms_predict(s, ch, ilms);
1020             input = residue + (pred >> s->cdlms[ch][ilms].scaling);
1021             lms_update(s, ch, ilms, input, residue);
1022             s->channel_residues[ch][icoef] = input;
1023         }
1024     }
1025 }
1026
1027 static void revert_inter_ch_decorr(WmallDecodeCtx *s, int tile_size)
1028 {
1029     int icoef;
1030     if (s->num_channels != 2)
1031         return;
1032     else if (s->is_channel_coded[0] && s->is_channel_coded[1]) {
1033         for (icoef = 0; icoef < tile_size; icoef++) {
1034             s->channel_residues[0][icoef] -= s->channel_residues[1][icoef] >> 1;
1035             s->channel_residues[1][icoef] += s->channel_residues[0][icoef];
1036         }
1037     }
1038 }
1039
1040 static void revert_acfilter(WmallDecodeCtx *s, int tile_size)
1041 {
1042     int ich, icoef;
1043     int pred;
1044     int i, j;
1045     int64_t *filter_coeffs = s->acfilter_coeffs;
1046     int scaling = s->acfilter_scaling;
1047     int order = s->acfilter_order;
1048
1049     for (ich = 0; ich < s->num_channels; ich++) {
1050         int *prevvalues = s->acfilter_prevvalues[ich];
1051         for (i = 0; i < order; i++) {
1052             pred = 0;
1053             for (j = 0; j < order; j++) {
1054                 if (i <= j)
1055                     pred += filter_coeffs[j] * prevvalues[j - i];
1056                 else
1057                     pred += s->channel_residues[ich][i - j - 1] * filter_coeffs[j];
1058             }
1059             pred >>= scaling;
1060             s->channel_residues[ich][i] += pred;
1061         }
1062         for (i = order; i < tile_size; i++) {
1063             pred = 0;
1064             for (j = 0; j < order; j++)
1065                 pred += s->channel_residues[ich][i - j - 1] * filter_coeffs[j];
1066             pred >>= scaling;
1067             s->channel_residues[ich][i] += pred;
1068         }
1069         for (j = 0; j < order; j++)
1070             prevvalues[j] = s->channel_residues[ich][tile_size - j - 1];
1071     }
1072 }
1073
1074 /**
1075  *@brief Decode a single subframe (block).
1076  *@param s codec context
1077  *@return 0 on success, < 0 when decoding failed
1078  */
1079 static int decode_subframe(WmallDecodeCtx *s)
1080 {
1081     int offset = s->samples_per_frame;
1082     int subframe_len = s->samples_per_frame;
1083     int i, j;
1084     int total_samples   = s->samples_per_frame * s->num_channels;
1085     int rawpcm_tile;
1086     int padding_zeroes;
1087
1088     s->subframe_offset = get_bits_count(&s->gb);
1089
1090     /** reset channel context and find the next block offset and size
1091         == the next block of the channel with the smallest number of
1092         decoded samples
1093     */
1094     for (i = 0; i < s->num_channels; i++) {
1095         s->channel[i].grouped = 0;
1096         if (offset > s->channel[i].decoded_samples) {
1097             offset = s->channel[i].decoded_samples;
1098             subframe_len =
1099                 s->channel[i].subframe_len[s->channel[i].cur_subframe];
1100         }
1101     }
1102
1103     /** get a list of all channels that contain the estimated block */
1104     s->channels_for_cur_subframe = 0;
1105     for (i = 0; i < s->num_channels; i++) {
1106         const int cur_subframe = s->channel[i].cur_subframe;
1107         /** substract already processed samples */
1108         total_samples -= s->channel[i].decoded_samples;
1109
1110         /** and count if there are multiple subframes that match our profile */
1111         if (offset == s->channel[i].decoded_samples &&
1112             subframe_len == s->channel[i].subframe_len[cur_subframe]) {
1113             total_samples -= s->channel[i].subframe_len[cur_subframe];
1114             s->channel[i].decoded_samples +=
1115                 s->channel[i].subframe_len[cur_subframe];
1116             s->channel_indexes_for_cur_subframe[s->channels_for_cur_subframe] = i;
1117             ++s->channels_for_cur_subframe;
1118         }
1119     }
1120
1121     /** check if the frame will be complete after processing the
1122         estimated block */
1123     if (!total_samples)
1124         s->parsed_all_subframes = 1;
1125
1126
1127     s->seekable_tile = get_bits1(&s->gb);
1128     if(s->seekable_tile) {
1129         clear_codec_buffers(s);
1130
1131         s->do_arith_coding    = get_bits1(&s->gb);
1132         if(s->do_arith_coding) {
1133             dprintf(s->avctx, "do_arith_coding == 1");
1134             abort();
1135         }
1136         s->do_ac_filter       = get_bits1(&s->gb);
1137         s->do_inter_ch_decorr = get_bits1(&s->gb);
1138         s->do_mclms           = get_bits1(&s->gb);
1139
1140         if(s->do_ac_filter)
1141             decode_ac_filter(s);
1142
1143         if(s->do_mclms)
1144             decode_mclms(s);
1145
1146         decode_cdlms(s);
1147         s->movave_scaling = get_bits(&s->gb, 3);
1148         s->quant_stepsize = get_bits(&s->gb, 8) + 1;
1149
1150             reset_codec(s);
1151     }
1152
1153     rawpcm_tile = get_bits1(&s->gb);
1154
1155     for(i = 0; i < s->num_channels; i++) {
1156         s->is_channel_coded[i] = 1;
1157     }
1158
1159     if(!rawpcm_tile) {
1160
1161         for(i = 0; i < s->num_channels; i++) {
1162             s->is_channel_coded[i] = get_bits1(&s->gb);
1163         }
1164
1165         if(s->bV3RTM) {
1166             // LPC
1167             s->do_lpc = get_bits1(&s->gb);
1168             if(s->do_lpc) {
1169                 decode_lpc(s);
1170             }
1171         } else {
1172             s->do_lpc = 0;
1173         }
1174     }
1175
1176
1177     if(get_bits1(&s->gb)) {
1178         padding_zeroes = get_bits(&s->gb, 5);
1179     } else {
1180         padding_zeroes = 0;
1181     }
1182
1183     if(rawpcm_tile) {
1184
1185         int bits = s->bits_per_sample - padding_zeroes;
1186         dprintf(s->avctx, "RAWPCM %d bits per sample. total %d bits, remain=%d\n", bits,
1187                 bits * s->num_channels * subframe_len, get_bits_count(&s->gb));
1188         for(i = 0; i < s->num_channels; i++) {
1189             for(j = 0; j < subframe_len; j++) {
1190                 s->channel_coeffs[i][j] = get_sbits(&s->gb, bits);
1191 //                dprintf(s->avctx, "PCM[%d][%d] = 0x%04x\n", i, j, s->channel_coeffs[i][j]);
1192             }
1193         }
1194     } else {
1195         for(i = 0; i < s->num_channels; i++)
1196             if(s->is_channel_coded[i]) {
1197             decode_channel_residues(s, i, subframe_len);
1198             if (s->seekable_tile)
1199                 use_high_update_speed(s, i);
1200             else
1201                 use_normal_update_speed(s, i);
1202             revert_cdlms(s, i, 0, subframe_len);
1203         }
1204     }
1205     if (s->do_mclms)
1206         revert_mclms(s, subframe_len);
1207     if (s->do_inter_ch_decorr)
1208         revert_inter_ch_decorr(s, subframe_len);
1209     if(s->do_ac_filter)
1210         revert_acfilter(s, subframe_len);
1211
1212     /* Dequantize */
1213     if (s->quant_stepsize != 1)
1214         for (i = 0; i < s->num_channels; i++)
1215             for (j = 0; j < subframe_len; j++)
1216                 s->channel_residues[i][j] *= s->quant_stepsize;
1217
1218     // Write to proper output buffer depending on bit-depth
1219     for (i = 0; i < subframe_len; i++)
1220         for (j = 0; j < s->num_channels; j++) {
1221             if (s->bits_per_sample == 16)
1222                 *s->samples_16++ = (int16_t) s->channel_residues[j][i];
1223             else
1224                 *s->samples_32++ = s->channel_residues[j][i];
1225         }
1226
1227     /** handled one subframe */
1228
1229     for (i = 0; i < s->channels_for_cur_subframe; i++) {
1230         int c = s->channel_indexes_for_cur_subframe[i];
1231         if (s->channel[c].cur_subframe >= s->channel[c].num_subframes) {
1232             av_log(s->avctx, AV_LOG_ERROR, "broken subframe\n");
1233             return AVERROR_INVALIDDATA;
1234         }
1235         ++s->channel[c].cur_subframe;
1236     }
1237     num_logged_subframes++;
1238     return 0;
1239 }
1240
1241 /**
1242  *@brief Decode one WMA frame.
1243  *@param s codec context
1244  *@return 0 if the trailer bit indicates that this is the last frame,
1245  *        1 if there are additional frames
1246  */
1247 static int decode_frame(WmallDecodeCtx *s)
1248 {
1249     GetBitContext* gb = &s->gb;
1250     int more_frames = 0;
1251     int len = 0;
1252     int i, ret;
1253
1254     s->frame.nb_samples = s->samples_per_frame;
1255     if ((ret = s->avctx->get_buffer(s->avctx, &s->frame)) < 0) {
1256         /** return an error if no frame could be decoded at all */
1257         av_log(s->avctx, AV_LOG_ERROR,
1258                "not enough space for the output samples\n");
1259         s->packet_loss = 1;
1260         return 0;
1261     }
1262     s->samples_16 = (int16_t *)s->frame.data[0];
1263     s->samples_32 = (int32_t *)s->frame.data[0];
1264
1265     /** get frame length */
1266     if (s->len_prefix)
1267         len = get_bits(gb, s->log2_frame_size);
1268
1269     /** decode tile information */
1270     if (decode_tilehdr(s)) {
1271         s->packet_loss = 1;
1272         return 0;
1273     }
1274
1275     /** read drc info */
1276     if (s->dynamic_range_compression) {
1277         s->drc_gain = get_bits(gb, 8);
1278     }
1279
1280     /** no idea what these are for, might be the number of samples
1281         that need to be skipped at the beginning or end of a stream */
1282     if (get_bits1(gb)) {
1283         int skip;
1284
1285         /** usually true for the first frame */
1286         if (get_bits1(gb)) {
1287             skip = get_bits(gb, av_log2(s->samples_per_frame * 2));
1288             dprintf(s->avctx, "start skip: %i\n", skip);
1289         }
1290
1291         /** sometimes true for the last frame */
1292         if (get_bits1(gb)) {
1293             skip = get_bits(gb, av_log2(s->samples_per_frame * 2));
1294             dprintf(s->avctx, "end skip: %i\n", skip);
1295         }
1296
1297     }
1298
1299     /** reset subframe states */
1300     s->parsed_all_subframes = 0;
1301     for (i = 0; i < s->num_channels; i++) {
1302         s->channel[i].decoded_samples = 0;
1303         s->channel[i].cur_subframe    = 0;
1304         s->channel[i].reuse_sf        = 0;
1305     }
1306
1307     /** decode all subframes */
1308     while (!s->parsed_all_subframes) {
1309         if (decode_subframe(s) < 0) {
1310             s->packet_loss = 1;
1311             return 0;
1312         }
1313     }
1314
1315     dprintf(s->avctx, "Frame done\n");
1316
1317     if (s->skip_frame) {
1318         s->skip_frame = 0;
1319     }
1320
1321     if (s->len_prefix) {
1322         if (len != (get_bits_count(gb) - s->frame_offset) + 2) {
1323             /** FIXME: not sure if this is always an error */
1324             av_log(s->avctx, AV_LOG_ERROR,
1325                    "frame[%i] would have to skip %i bits\n", s->frame_num,
1326                    len - (get_bits_count(gb) - s->frame_offset) - 1);
1327             s->packet_loss = 1;
1328             return 0;
1329         }
1330
1331         /** skip the rest of the frame data */
1332         skip_bits_long(gb, len - (get_bits_count(gb) - s->frame_offset) - 1);
1333     } else {
1334 /*
1335         while (get_bits_count(gb) < s->num_saved_bits && get_bits1(gb) == 0) {
1336             dprintf(s->avctx, "skip1\n");
1337         }
1338 */
1339     }
1340
1341     /** decode trailer bit */
1342     more_frames = get_bits1(gb);
1343     ++s->frame_num;
1344     return more_frames;
1345 }
1346
1347 /**
1348  *@brief Calculate remaining input buffer length.
1349  *@param s codec context
1350  *@param gb bitstream reader context
1351  *@return remaining size in bits
1352  */
1353 static int remaining_bits(WmallDecodeCtx *s, GetBitContext *gb)
1354 {
1355     return s->buf_bit_size - get_bits_count(gb);
1356 }
1357
1358 /**
1359  *@brief Fill the bit reservoir with a (partial) frame.
1360  *@param s codec context
1361  *@param gb bitstream reader context
1362  *@param len length of the partial frame
1363  *@param append decides wether to reset the buffer or not
1364  */
1365 static void save_bits(WmallDecodeCtx *s, GetBitContext* gb, int len,
1366                       int append)
1367 {
1368     int buflen;
1369
1370     /** when the frame data does not need to be concatenated, the input buffer
1371         is resetted and additional bits from the previous frame are copyed
1372         and skipped later so that a fast byte copy is possible */
1373
1374     if (!append) {
1375         s->frame_offset = get_bits_count(gb) & 7;
1376         s->num_saved_bits = s->frame_offset;
1377         init_put_bits(&s->pb, s->frame_data, MAX_FRAMESIZE);
1378     }
1379
1380     buflen = (s->num_saved_bits + len + 8) >> 3;
1381
1382     if (len <= 0 || buflen > MAX_FRAMESIZE) {
1383         av_log_ask_for_sample(s->avctx, "input buffer too small\n");
1384         s->packet_loss = 1;
1385         return;
1386     }
1387
1388     s->num_saved_bits += len;
1389     if (!append) {
1390         avpriv_copy_bits(&s->pb, gb->buffer + (get_bits_count(gb) >> 3),
1391                      s->num_saved_bits);
1392     } else {
1393         int align = 8 - (get_bits_count(gb) & 7);
1394         align = FFMIN(align, len);
1395         put_bits(&s->pb, align, get_bits(gb, align));
1396         len -= align;
1397         avpriv_copy_bits(&s->pb, gb->buffer + (get_bits_count(gb) >> 3), len);
1398     }
1399     skip_bits_long(gb, len);
1400
1401     {
1402         PutBitContext tmp = s->pb;
1403         flush_put_bits(&tmp);
1404     }
1405
1406     init_get_bits(&s->gb, s->frame_data, s->num_saved_bits);
1407     skip_bits(&s->gb, s->frame_offset);
1408 }
1409
1410 /**
1411  *@brief Decode a single WMA packet.
1412  *@param avctx codec context
1413  *@param data the output buffer
1414  *@param data_size number of bytes that were written to the output buffer
1415  *@param avpkt input packet
1416  *@return number of bytes that were read from the input buffer
1417  */
1418 static int decode_packet(AVCodecContext *avctx,
1419                          void *data, int *got_frame_ptr, AVPacket* avpkt)
1420 {
1421     WmallDecodeCtx *s = avctx->priv_data;
1422     GetBitContext* gb  = &s->pgb;
1423     const uint8_t* buf = avpkt->data;
1424     int buf_size       = avpkt->size;
1425     int num_bits_prev_frame;
1426     int packet_sequence_number;
1427     int seekable_frame_in_packet;
1428     int spliced_packet;
1429
1430     if (s->packet_done || s->packet_loss) {
1431         int seekable_frame_in_packet, spliced_packet;
1432         s->packet_done = 0;
1433
1434         /** sanity check for the buffer length */
1435         if (buf_size < avctx->block_align)
1436             return 0;
1437
1438         s->next_packet_start = buf_size - avctx->block_align;
1439         buf_size = avctx->block_align;
1440         s->buf_bit_size = buf_size << 3;
1441
1442         /** parse packet header */
1443         init_get_bits(gb, buf, s->buf_bit_size);
1444         packet_sequence_number = get_bits(gb, 4);
1445         seekable_frame_in_packet = get_bits1(gb);
1446         spliced_packet = get_bits1(gb);
1447
1448         /** get number of bits that need to be added to the previous frame */
1449         num_bits_prev_frame = get_bits(gb, s->log2_frame_size);
1450
1451         /** check for packet loss */
1452         if (!s->packet_loss &&
1453             ((s->packet_sequence_number + 1) & 0xF) != packet_sequence_number) {
1454             s->packet_loss = 1;
1455             av_log(avctx, AV_LOG_ERROR, "Packet loss detected! seq %x vs %x\n",
1456                    s->packet_sequence_number, packet_sequence_number);
1457         }
1458         s->packet_sequence_number = packet_sequence_number;
1459
1460         if (num_bits_prev_frame > 0) {
1461             int remaining_packet_bits = s->buf_bit_size - get_bits_count(gb);
1462             if (num_bits_prev_frame >= remaining_packet_bits) {
1463                 num_bits_prev_frame = remaining_packet_bits;
1464                 s->packet_done = 1;
1465             }
1466
1467             /** append the previous frame data to the remaining data from the
1468                 previous packet to create a full frame */
1469             save_bits(s, gb, num_bits_prev_frame, 1);
1470
1471             /** decode the cross packet frame if it is valid */
1472             if (!s->packet_loss)
1473                 decode_frame(s);
1474         } else if (s->num_saved_bits - s->frame_offset) {
1475             dprintf(avctx, "ignoring %x previously saved bits\n",
1476                     s->num_saved_bits - s->frame_offset);
1477         }
1478
1479         if (s->packet_loss) {
1480             /** reset number of saved bits so that the decoder
1481                 does not start to decode incomplete frames in the
1482                 s->len_prefix == 0 case */
1483             s->num_saved_bits = 0;
1484             s->packet_loss = 0;
1485         }
1486
1487     } else {
1488         int frame_size;
1489
1490         s->buf_bit_size = (avpkt->size - s->next_packet_start) << 3;
1491         init_get_bits(gb, avpkt->data, s->buf_bit_size);
1492         skip_bits(gb, s->packet_offset);
1493
1494         if (s->len_prefix && remaining_bits(s, gb) > s->log2_frame_size &&
1495             (frame_size = show_bits(gb, s->log2_frame_size)) &&
1496             frame_size <= remaining_bits(s, gb)) {
1497             save_bits(s, gb, frame_size, 0);
1498             s->packet_done = !decode_frame(s);
1499         } else if (!s->len_prefix
1500                    && s->num_saved_bits > get_bits_count(&s->gb)) {
1501             /** when the frames do not have a length prefix, we don't know
1502                 the compressed length of the individual frames
1503                 however, we know what part of a new packet belongs to the
1504                 previous frame
1505                 therefore we save the incoming packet first, then we append
1506                 the "previous frame" data from the next packet so that
1507                 we get a buffer that only contains full frames */
1508             s->packet_done = !decode_frame(s);
1509         } else {
1510             s->packet_done = 1;
1511         }
1512     }
1513
1514     if (s->packet_done && !s->packet_loss &&
1515         remaining_bits(s, gb) > 0) {
1516         /** save the rest of the data so that it can be decoded
1517             with the next packet */
1518         save_bits(s, gb, remaining_bits(s, gb), 0);
1519     }
1520
1521     *(AVFrame *)data = s->frame;
1522     *got_frame_ptr = 1;
1523     s->packet_offset = get_bits_count(gb) & 7;
1524
1525     return (s->packet_loss) ? AVERROR_INVALIDDATA : get_bits_count(gb) >> 3;
1526 }
1527
1528 /**
1529  *@brief Clear decoder buffers (for seeking).
1530  *@param avctx codec context
1531  */
1532 static void flush(AVCodecContext *avctx)
1533 {
1534     WmallDecodeCtx *s = avctx->priv_data;
1535     int i;
1536     /** reset output buffer as a part of it is used during the windowing of a
1537         new frame */
1538     for (i = 0; i < s->num_channels; i++)
1539         memset(s->channel[i].out, 0, s->samples_per_frame *
1540                sizeof(*s->channel[i].out));
1541     s->packet_loss = 1;
1542 }
1543
1544
1545 /**
1546  *@brief wmall decoder
1547  */
1548 AVCodec ff_wmalossless_decoder = {
1549     .name           = "wmalossless",
1550     .type           = AVMEDIA_TYPE_AUDIO,
1551     .id             = CODEC_ID_WMALOSSLESS,
1552     .priv_data_size = sizeof(WmallDecodeCtx),
1553     .init           = decode_init,
1554     .close          = decode_end,
1555     .decode         = decode_packet,
1556     .flush          = flush,
1557     .capabilities = CODEC_CAP_SUBFRAMES | CODEC_CAP_EXPERIMENTAL | CODEC_CAP_DR1,
1558     .long_name = NULL_IF_CONFIG_SMALL("Windows Media Audio 9 Lossless"),
1559 };