git.sesse.net Git - ffmpeg/blob - libavcodec/opus_celt.c

   1 /*
   2  * Copyright (c) 2012 Andrew D'Addesio
   3  * Copyright (c) 2013-2014 Mozilla Corporation
   4  *
   5  * This file is part of FFmpeg.
   6  *
   7  * FFmpeg is free software; you can redistribute it and/or
   8  * modify it under the terms of the GNU Lesser General Public
   9  * License as published by the Free Software Foundation; either
  10  * version 2.1 of the License, or (at your option) any later version.
  11  *
  12  * FFmpeg is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15  * Lesser General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU Lesser General Public
  18  * License along with FFmpeg; if not, write to the Free Software
  19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20  */
  21
  22 /**
  23  * @file
  24  * Opus CELT decoder
  25  */
  26
  27 #include <stdint.h>
  28
  29 #include "libavutil/float_dsp.h"
  30 #include "libavutil/libm.h"
  31
  32 #include "imdct15.h"
  33 #include "opus.h"
  34 #include "opustab.h"
  35
  36 enum CeltSpread {
  37     CELT_SPREAD_NONE,
  38     CELT_SPREAD_LIGHT,
  39     CELT_SPREAD_NORMAL,
  40     CELT_SPREAD_AGGRESSIVE
  41 };
  42
  43 typedef struct CeltFrame {
  44     float energy[CELT_MAX_BANDS];
  45     float prev_energy[2][CELT_MAX_BANDS];
  46
  47     uint8_t collapse_masks[CELT_MAX_BANDS];
  48
  49     /* buffer for mdct output + postfilter */
  50     DECLARE_ALIGNED(32, float, buf)[2048];
  51
  52     /* postfilter parameters */
  53     int pf_period_new;
  54     float pf_gains_new[3];
  55     int pf_period;
  56     float pf_gains[3];
  57     int pf_period_old;
  58     float pf_gains_old[3];
  59
  60     float deemph_coeff;
  61 } CeltFrame;
  62
  63 struct CeltContext {
  64     // constant values that do not change during context lifetime
  65     AVCodecContext    *avctx;
  66     IMDCT15Context    *imdct[4];
  67     AVFloatDSPContext  *dsp;
  68     int output_channels;
  69
  70     // values that have inter-frame effect and must be reset on flush
  71     CeltFrame frame[2];
  72     uint32_t seed;
  73     int flushed;
  74
  75     // values that only affect a single frame
  76     int coded_channels;
  77     int framebits;
  78     int duration;
  79
  80     /* number of iMDCT blocks in the frame */
  81     int blocks;
  82     /* size of each block */
  83     int blocksize;
  84
  85     int startband;
  86     int endband;
  87     int codedbands;
  88
  89     int anticollapse_bit;
  90
  91     int intensitystereo;
  92     int dualstereo;
  93     enum CeltSpread spread;
  94
  95     int remaining;
  96     int remaining2;
  97     int fine_bits    [CELT_MAX_BANDS];
  98     int fine_priority[CELT_MAX_BANDS];
  99     int pulses       [CELT_MAX_BANDS];
 100     int tf_change    [CELT_MAX_BANDS];
 101
 102     DECLARE_ALIGNED(32, float, coeffs)[2][CELT_MAX_FRAME_SIZE];
 103     DECLARE_ALIGNED(32, float, scratch)[22 * 8]; // MAX(ff_celt_freq_range) * 1<<CELT_MAX_LOG_BLOCKS
 104 };
 105
 106 static inline int16_t celt_cos(int16_t x)
 107 {
 108     x = (MUL16(x, x) + 4096) >> 13;
 109     x = (32767-x) + ROUND_MUL16(x, (-7651 + ROUND_MUL16(x, (8277 + ROUND_MUL16(-626, x)))));
 110     return 1+x;
 111 }
 112
 113 static inline int celt_log2tan(int isin, int icos)
 114 {
 115     int lc, ls;
 116     lc = opus_ilog(icos);
 117     ls = opus_ilog(isin);
 118     icos <<= 15 - lc;
 119     isin <<= 15 - ls;
 120     return (ls << 11) - (lc << 11) +
 121            ROUND_MUL16(isin, ROUND_MUL16(isin, -2597) + 7932) -
 122            ROUND_MUL16(icos, ROUND_MUL16(icos, -2597) + 7932);
 123 }
 124
 125 static inline uint32_t celt_rng(CeltContext *s)
 126 {
 127     s->seed = 1664525 * s->seed + 1013904223;
 128     return s->seed;
 129 }
 130
 131 static void celt_decode_coarse_energy(CeltContext *s, OpusRangeCoder *rc)
 132 {
 133     int i, j;
 134     float prev[2] = {0};
 135     float alpha, beta;
 136     const uint8_t *model;
 137
 138     /* use the 2D z-transform to apply prediction in both */
 139     /* the time domain (alpha) and the frequency domain (beta) */
 140
 141     if (opus_rc_tell(rc)+3 <= s->framebits && ff_opus_rc_dec_log(rc, 3)) {
 142         /* intra frame */
 143         alpha = 0;
 144         beta  = 1.0f - 4915.0f/32768.0f;
 145         model = ff_celt_coarse_energy_dist[s->duration][1];
 146     } else {
 147         alpha = ff_celt_alpha_coef[s->duration];
 148         beta  = 1.0f - ff_celt_beta_coef[s->duration];
 149         model = ff_celt_coarse_energy_dist[s->duration][0];
 150     }
 151
 152     for (i = 0; i < CELT_MAX_BANDS; i++) {
 153         for (j = 0; j < s->coded_channels; j++) {
 154             CeltFrame *frame = &s->frame[j];
 155             float value;
 156             int available;
 157
 158             if (i < s->startband || i >= s->endband) {
 159                 frame->energy[i] = 0.0;
 160                 continue;
 161             }
 162
 163             available = s->framebits - opus_rc_tell(rc);
 164             if (available >= 15) {
 165                 /* decode using a Laplace distribution */
 166                 int k = FFMIN(i, 20) << 1;
 167                 value = ff_opus_rc_dec_laplace(rc, model[k] << 7, model[k+1] << 6);
 168             } else if (available >= 2) {
 169                 int x = ff_opus_rc_dec_cdf(rc, ff_celt_model_energy_small);
 170                 value = (x>>1) ^ -(x&1);
 171             } else if (available >= 1) {
 172                 value = -(float)ff_opus_rc_dec_log(rc, 1);
 173             } else value = -1;
 174
 175             frame->energy[i] = FFMAX(-9.0f, frame->energy[i]) * alpha + prev[j] + value;
 176             prev[j] += beta * value;
 177         }
 178     }
 179 }
 180
 181 static void celt_decode_fine_energy(CeltContext *s, OpusRangeCoder *rc)
 182 {
 183     int i;
 184     for (i = s->startband; i < s->endband; i++) {
 185         int j;
 186         if (!s->fine_bits[i])
 187             continue;
 188
 189         for (j = 0; j < s->coded_channels; j++) {
 190             CeltFrame *frame = &s->frame[j];
 191             int q2;
 192             float offset;
 193             q2 = ff_opus_rc_get_raw(rc, s->fine_bits[i]);
 194             offset = (q2 + 0.5f) * (1 << (14 - s->fine_bits[i])) / 16384.0f - 0.5f;
 195             frame->energy[i] += offset;
 196         }
 197     }
 198 }
 199
 200 static void celt_decode_final_energy(CeltContext *s, OpusRangeCoder *rc,
 201                                      int bits_left)
 202 {
 203     int priority, i, j;
 204
 205     for (priority = 0; priority < 2; priority++) {
 206         for (i = s->startband; i < s->endband && bits_left >= s->coded_channels; i++) {
 207             if (s->fine_priority[i] != priority || s->fine_bits[i] >= CELT_MAX_FINE_BITS)
 208                 continue;
 209
 210             for (j = 0; j < s->coded_channels; j++) {
 211                 int q2;
 212                 float offset;
 213                 q2 = ff_opus_rc_get_raw(rc, 1);
 214                 offset = (q2 - 0.5f) * (1 << (14 - s->fine_bits[i] - 1)) / 16384.0f;
 215                 s->frame[j].energy[i] += offset;
 216                 bits_left--;
 217             }
 218         }
 219     }
 220 }
 221
 222 static void celt_decode_tf_changes(CeltContext *s, OpusRangeCoder *rc,
 223                                    int transient)
 224 {
 225     int i, diff = 0, tf_select = 0, tf_changed = 0, tf_select_bit;
 226     int consumed, bits = transient ? 2 : 4;
 227
 228     consumed = opus_rc_tell(rc);
 229     tf_select_bit = (s->duration != 0 && consumed+bits+1 <= s->framebits);
 230
 231     for (i = s->startband; i < s->endband; i++) {
 232         if (consumed+bits+tf_select_bit <= s->framebits) {
 233             diff ^= ff_opus_rc_dec_log(rc, bits);
 234             consumed = opus_rc_tell(rc);
 235             tf_changed |= diff;
 236         }
 237         s->tf_change[i] = diff;
 238         bits = transient ? 4 : 5;
 239     }
 240
 241     if (tf_select_bit && ff_celt_tf_select[s->duration][transient][0][tf_changed] !=
 242                          ff_celt_tf_select[s->duration][transient][1][tf_changed])
 243         tf_select = ff_opus_rc_dec_log(rc, 1);
 244
 245     for (i = s->startband; i < s->endband; i++) {
 246         s->tf_change[i] = ff_celt_tf_select[s->duration][transient][tf_select][s->tf_change[i]];
 247     }
 248 }
 249
 250 static void celt_decode_allocation(CeltContext *s, OpusRangeCoder *rc)
 251 {
 252     // approx. maximum bit allocation for each band before boost/trim
 253     int cap[CELT_MAX_BANDS];
 254     int boost[CELT_MAX_BANDS];
 255     int threshold[CELT_MAX_BANDS];
 256     int bits1[CELT_MAX_BANDS];
 257     int bits2[CELT_MAX_BANDS];
 258     int trim_offset[CELT_MAX_BANDS];
 259
 260     int skip_startband = s->startband;
 261     int dynalloc       = 6;
 262     int alloctrim      = 5;
 263     int extrabits      = 0;
 264
 265     int skip_bit            = 0;
 266     int intensitystereo_bit = 0;
 267     int dualstereo_bit      = 0;
 268
 269     int remaining, bandbits;
 270     int low, high, total, done;
 271     int totalbits;
 272     int consumed;
 273     int i, j;
 274
 275     consumed = opus_rc_tell(rc);
 276
 277     /* obtain spread flag */
 278     s->spread = CELT_SPREAD_NORMAL;
 279     if (consumed + 4 <= s->framebits)
 280         s->spread = ff_opus_rc_dec_cdf(rc, ff_celt_model_spread);
 281
 282     /* generate static allocation caps */
 283     for (i = 0; i < CELT_MAX_BANDS; i++) {
 284         cap[i] = (ff_celt_static_caps[s->duration][s->coded_channels - 1][i] + 64)
 285                  * ff_celt_freq_range[i] << (s->coded_channels - 1) << s->duration >> 2;
 286     }
 287
 288     /* obtain band boost */
 289     totalbits = s->framebits << 3; // convert to 1/8 bits
 290     consumed = opus_rc_tell_frac(rc);
 291     for (i = s->startband; i < s->endband; i++) {
 292         int quanta, band_dynalloc;
 293
 294         boost[i] = 0;
 295
 296         quanta = ff_celt_freq_range[i] << (s->coded_channels - 1) << s->duration;
 297         quanta = FFMIN(quanta << 3, FFMAX(6 << 3, quanta));
 298         band_dynalloc = dynalloc;
 299         while (consumed + (band_dynalloc<<3) < totalbits && boost[i] < cap[i]) {
 300             int add = ff_opus_rc_dec_log(rc, band_dynalloc);
 301             consumed = opus_rc_tell_frac(rc);
 302             if (!add)
 303                 break;
 304
 305             boost[i]     += quanta;
 306             totalbits    -= quanta;
 307             band_dynalloc = 1;
 308         }
 309         /* dynalloc is more likely to occur if it's already been used for earlier bands */
 310         if (boost[i])
 311             dynalloc = FFMAX(2, dynalloc - 1);
 312     }
 313
 314     /* obtain allocation trim */
 315     if (consumed + (6 << 3) <= totalbits)
 316         alloctrim = ff_opus_rc_dec_cdf(rc, ff_celt_model_alloc_trim);
 317
 318     /* anti-collapse bit reservation */
 319     totalbits = (s->framebits << 3) - opus_rc_tell_frac(rc) - 1;
 320     s->anticollapse_bit = 0;
 321     if (s->blocks > 1 && s->duration >= 2 &&
 322         totalbits >= ((s->duration + 2) << 3))
 323         s->anticollapse_bit = 1 << 3;
 324     totalbits -= s->anticollapse_bit;
 325
 326     /* band skip bit reservation */
 327     if (totalbits >= 1 << 3)
 328         skip_bit = 1 << 3;
 329     totalbits -= skip_bit;
 330
 331     /* intensity/dual stereo bit reservation */
 332     if (s->coded_channels == 2) {
 333         intensitystereo_bit = ff_celt_log2_frac[s->endband - s->startband];
 334         if (intensitystereo_bit <= totalbits) {
 335             totalbits -= intensitystereo_bit;
 336             if (totalbits >= 1 << 3) {
 337                 dualstereo_bit = 1 << 3;
 338                 totalbits -= 1 << 3;
 339             }
 340         } else
 341             intensitystereo_bit = 0;
 342     }
 343
 344     for (i = s->startband; i < s->endband; i++) {
 345         int trim     = alloctrim - 5 - s->duration;
 346         int band     = ff_celt_freq_range[i] * (s->endband - i - 1);
 347         int duration = s->duration + 3;
 348         int scale    = duration + s->coded_channels - 1;
 349
 350         /* PVQ minimum allocation threshold, below this value the band is
 351          * skipped */
 352         threshold[i] = FFMAX(3 * ff_celt_freq_range[i] << duration >> 4,
 353                              s->coded_channels << 3);
 354
 355         trim_offset[i] = trim * (band << scale) >> 6;
 356
 357         if (ff_celt_freq_range[i] << s->duration == 1)
 358             trim_offset[i] -= s->coded_channels << 3;
 359     }
 360
 361     /* bisection */
 362     low  = 1;
 363     high = CELT_VECTORS - 1;
 364     while (low <= high) {
 365         int center = (low + high) >> 1;
 366         done = total = 0;
 367
 368         for (i = s->endband - 1; i >= s->startband; i--) {
 369             bandbits = ff_celt_freq_range[i] * ff_celt_static_alloc[center][i]
 370                        << (s->coded_channels - 1) << s->duration >> 2;
 371
 372             if (bandbits)
 373                 bandbits = FFMAX(0, bandbits + trim_offset[i]);
 374             bandbits += boost[i];
 375
 376             if (bandbits >= threshold[i] || done) {
 377                 done = 1;
 378                 total += FFMIN(bandbits, cap[i]);
 379             } else if (bandbits >= s->coded_channels << 3)
 380                 total += s->coded_channels << 3;
 381         }
 382
 383         if (total > totalbits)
 384             high = center - 1;
 385         else
 386             low = center + 1;
 387     }
 388     high = low--;
 389
 390     for (i = s->startband; i < s->endband; i++) {
 391         bits1[i] = ff_celt_freq_range[i] * ff_celt_static_alloc[low][i]
 392                    << (s->coded_channels - 1) << s->duration >> 2;
 393         bits2[i] = high >= CELT_VECTORS ? cap[i] :
 394                    ff_celt_freq_range[i] * ff_celt_static_alloc[high][i]
 395                    << (s->coded_channels - 1) << s->duration >> 2;
 396
 397         if (bits1[i])
 398             bits1[i] = FFMAX(0, bits1[i] + trim_offset[i]);
 399         if (bits2[i])
 400             bits2[i] = FFMAX(0, bits2[i] + trim_offset[i]);
 401         if (low)
 402             bits1[i] += boost[i];
 403         bits2[i] += boost[i];
 404
 405         if (boost[i])
 406             skip_startband = i;
 407         bits2[i] = FFMAX(0, bits2[i] - bits1[i]);
 408     }
 409
 410     /* bisection */
 411     low  = 0;
 412     high = 1 << CELT_ALLOC_STEPS;
 413     for (i = 0; i < CELT_ALLOC_STEPS; i++) {
 414         int center = (low + high) >> 1;
 415         done = total = 0;
 416
 417         for (j = s->endband - 1; j >= s->startband; j--) {
 418             bandbits = bits1[j] + (center * bits2[j] >> CELT_ALLOC_STEPS);
 419
 420             if (bandbits >= threshold[j] || done) {
 421                 done = 1;
 422                 total += FFMIN(bandbits, cap[j]);
 423             } else if (bandbits >= s->coded_channels << 3)
 424                 total += s->coded_channels << 3;
 425         }
 426         if (total > totalbits)
 427             high = center;
 428         else
 429             low = center;
 430     }
 431
 432     done = total = 0;
 433     for (i = s->endband - 1; i >= s->startband; i--) {
 434         bandbits = bits1[i] + (low * bits2[i] >> CELT_ALLOC_STEPS);
 435
 436         if (bandbits >= threshold[i] || done)
 437             done = 1;
 438         else
 439             bandbits = (bandbits >= s->coded_channels << 3) ?
 440                        s->coded_channels << 3 : 0;
 441
 442         bandbits     = FFMIN(bandbits, cap[i]);
 443         s->pulses[i] = bandbits;
 444         total      += bandbits;
 445     }
 446
 447     /* band skipping */
 448     for (s->codedbands = s->endband; ; s->codedbands--) {
 449         int allocation;
 450         j = s->codedbands - 1;
 451
 452         if (j == skip_startband) {
 453             /* all remaining bands are not skipped */
 454             totalbits += skip_bit;
 455             break;
 456         }
 457
 458         /* determine the number of bits available for coding "do not skip" markers */
 459         remaining   = totalbits - total;
 460         bandbits    = remaining / (ff_celt_freq_bands[j+1] - ff_celt_freq_bands[s->startband]);
 461         remaining  -= bandbits  * (ff_celt_freq_bands[j+1] - ff_celt_freq_bands[s->startband]);
 462         allocation  = s->pulses[j] + bandbits * ff_celt_freq_range[j]
 463                       + FFMAX(0, remaining - (ff_celt_freq_bands[j] - ff_celt_freq_bands[s->startband]));
 464
 465         /* a "do not skip" marker is only coded if the allocation is
 466            above the chosen threshold */
 467         if (allocation >= FFMAX(threshold[j], (s->coded_channels + 1) <<3 )) {
 468             if (ff_opus_rc_dec_log(rc, 1))
 469                 break;
 470
 471             total      += 1 << 3;
 472             allocation -= 1 << 3;
 473         }
 474
 475         /* the band is skipped, so reclaim its bits */
 476         total -= s->pulses[j];
 477         if (intensitystereo_bit) {
 478             total -= intensitystereo_bit;
 479             intensitystereo_bit = ff_celt_log2_frac[j - s->startband];
 480             total += intensitystereo_bit;
 481         }
 482
 483         total += s->pulses[j] = (allocation >= s->coded_channels << 3) ?
 484                               s->coded_channels << 3 : 0;
 485     }
 486
 487     /* obtain stereo flags */
 488     s->intensitystereo = 0;
 489     s->dualstereo      = 0;
 490     if (intensitystereo_bit)
 491         s->intensitystereo = s->startband +
 492                           ff_opus_rc_dec_uint(rc, s->codedbands + 1 - s->startband);
 493     if (s->intensitystereo <= s->startband)
 494         totalbits += dualstereo_bit; /* no intensity stereo means no dual stereo */
 495     else if (dualstereo_bit)
 496         s->dualstereo = ff_opus_rc_dec_log(rc, 1);
 497
 498     /* supply the remaining bits in this frame to lower bands */
 499     remaining = totalbits - total;
 500     bandbits  = remaining / (ff_celt_freq_bands[s->codedbands] - ff_celt_freq_bands[s->startband]);
 501     remaining -= bandbits * (ff_celt_freq_bands[s->codedbands] - ff_celt_freq_bands[s->startband]);
 502     for (i = s->startband; i < s->codedbands; i++) {
 503         int bits = FFMIN(remaining, ff_celt_freq_range[i]);
 504
 505         s->pulses[i] += bits + bandbits * ff_celt_freq_range[i];
 506         remaining    -= bits;
 507     }
 508
 509     for (i = s->startband; i < s->codedbands; i++) {
 510         int N = ff_celt_freq_range[i] << s->duration;
 511         int prev_extra = extrabits;
 512         s->pulses[i] += extrabits;
 513
 514         if (N > 1) {
 515             int dof;        // degrees of freedom
 516             int temp;       // dof * channels * log(dof)
 517             int offset;     // fine energy quantization offset, i.e.
 518                             // extra bits assigned over the standard
 519                             // totalbits/dof
 520             int fine_bits, max_bits;
 521
 522             extrabits = FFMAX(0, s->pulses[i] - cap[i]);
 523             s->pulses[i] -= extrabits;
 524
 525             /* intensity stereo makes use of an extra degree of freedom */
 526             dof = N * s->coded_channels
 527                   + (s->coded_channels == 2 && N > 2 && !s->dualstereo && i < s->intensitystereo);
 528             temp = dof * (ff_celt_log_freq_range[i] + (s->duration<<3));
 529             offset = (temp >> 1) - dof * CELT_FINE_OFFSET;
 530             if (N == 2) /* dof=2 is the only case that doesn't fit the model */
 531                 offset += dof<<1;
 532
 533             /* grant an additional bias for the first and second pulses */
 534             if (s->pulses[i] + offset < 2 * (dof << 3))
 535                 offset += temp >> 2;
 536             else if (s->pulses[i] + offset < 3 * (dof << 3))
 537                 offset += temp >> 3;
 538
 539             fine_bits = (s->pulses[i] + offset + (dof << 2)) / (dof << 3);
 540             max_bits  = FFMIN((s->pulses[i]>>3) >> (s->coded_channels - 1),
 541                               CELT_MAX_FINE_BITS);
 542
 543             max_bits  = FFMAX(max_bits, 0);
 544
 545             s->fine_bits[i] = av_clip(fine_bits, 0, max_bits);
 546
 547             /* if fine_bits was rounded down or capped,
 548                give priority for the final fine energy pass */
 549             s->fine_priority[i] = (s->fine_bits[i] * (dof<<3) >= s->pulses[i] + offset);
 550
 551             /* the remaining bits are assigned to PVQ */
 552             s->pulses[i] -= s->fine_bits[i] << (s->coded_channels - 1) << 3;
 553         } else {
 554             /* all bits go to fine energy except for the sign bit */
 555             extrabits = FFMAX(0, s->pulses[i] - (s->coded_channels << 3));
 556             s->pulses[i] -= extrabits;
 557             s->fine_bits[i] = 0;
 558             s->fine_priority[i] = 1;
 559         }
 560
 561         /* hand back a limited number of extra fine energy bits to this band */
 562         if (extrabits > 0) {
 563             int fineextra = FFMIN(extrabits >> (s->coded_channels + 2),
 564                                   CELT_MAX_FINE_BITS - s->fine_bits[i]);
 565             s->fine_bits[i] += fineextra;
 566
 567             fineextra <<= s->coded_channels + 2;
 568             s->fine_priority[i] = (fineextra >= extrabits - prev_extra);
 569             extrabits -= fineextra;
 570         }
 571     }
 572     s->remaining = extrabits;
 573
 574     /* skipped bands dedicate all of their bits for fine energy */
 575     for (; i < s->endband; i++) {
 576         s->fine_bits[i]     = s->pulses[i] >> (s->coded_channels - 1) >> 3;
 577         s->pulses[i]        = 0;
 578         s->fine_priority[i] = s->fine_bits[i] < 1;
 579     }
 580 }
 581
 582 static inline int celt_bits2pulses(const uint8_t *cache, int bits)
 583 {
 584     // TODO: Find the size of cache and make it into an array in the parameters list
 585     int i, low = 0, high;
 586
 587     high = cache[0];
 588     bits--;
 589
 590     for (i = 0; i < 6; i++) {
 591         int center = (low + high + 1) >> 1;
 592         if (cache[center] >= bits)
 593             high = center;
 594         else
 595             low = center;
 596     }
 597
 598     return (bits - (low == 0 ? -1 : cache[low]) <= cache[high] - bits) ? low : high;
 599 }
 600
 601 static inline int celt_pulses2bits(const uint8_t *cache, int pulses)
 602 {
 603     // TODO: Find the size of cache and make it into an array in the parameters list
 604    return (pulses == 0) ? 0 : cache[pulses] + 1;
 605 }
 606
 607 static inline void celt_normalize_residual(const int * av_restrict iy, float * av_restrict X,
 608                                            int N, float g)
 609 {
 610     int i;
 611     for (i = 0; i < N; i++)
 612         X[i] = g * iy[i];
 613 }
 614
 615 static void celt_exp_rotation1(float *X, unsigned int len, unsigned int stride,
 616                                float c, float s)
 617 {
 618     float *Xptr;
 619     int i;
 620
 621     Xptr = X;
 622     for (i = 0; i < len - stride; i++) {
 623         float x1, x2;
 624         x1           = Xptr[0];
 625         x2           = Xptr[stride];
 626         Xptr[stride] = c * x2 + s * x1;
 627         *Xptr++      = c * x1 - s * x2;
 628     }
 629
 630     Xptr = &X[len - 2 * stride - 1];
 631     for (i = len - 2 * stride - 1; i >= 0; i--) {
 632         float x1, x2;
 633         x1           = Xptr[0];
 634         x2           = Xptr[stride];
 635         Xptr[stride] = c * x2 + s * x1;
 636         *Xptr--      = c * x1 - s * x2;
 637     }
 638 }
 639
 640 static inline void celt_exp_rotation(float *X, unsigned int len,
 641                                      unsigned int stride, unsigned int K,
 642                                      enum CeltSpread spread)
 643 {
 644     unsigned int stride2 = 0;
 645     float c, s;
 646     float gain, theta;
 647     int i;
 648
 649     if (2*K >= len || spread == CELT_SPREAD_NONE)
 650         return;
 651
 652     gain = (float)len / (len + (20 - 5*spread) * K);
 653     theta = M_PI * gain * gain / 4;
 654
 655     c = cos(theta);
 656     s = sin(theta);
 657
 658     if (len >= stride << 3) {
 659         stride2 = 1;
 660         /* This is just a simple (equivalent) way of computing sqrt(len/stride) with rounding.
 661         It's basically incrementing long as (stride2+0.5)^2 < len/stride. */
 662         while ((stride2 * stride2 + stride2) * stride + (stride >> 2) < len)
 663             stride2++;
 664     }
 665
 666     /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for
 667     extract_collapse_mask().*/
 668     len /= stride;
 669     for (i = 0; i < stride; i++) {
 670         if (stride2)
 671             celt_exp_rotation1(X + i * len, len, stride2, s, c);
 672         celt_exp_rotation1(X + i * len, len, 1, c, s);
 673     }
 674 }
 675
 676 static inline unsigned int celt_extract_collapse_mask(const int *iy,
 677                                                       unsigned int N,
 678                                                       unsigned int B)
 679 {
 680     unsigned int collapse_mask;
 681     int N0;
 682     int i, j;
 683
 684     if (B <= 1)
 685         return 1;
 686
 687     /*NOTE: As a minor optimization, we could be passing around log2(B), not B, for both this and for
 688     exp_rotation().*/
 689     N0 = N/B;
 690     collapse_mask = 0;
 691     for (i = 0; i < B; i++)
 692         for (j = 0; j < N0; j++)
 693             collapse_mask |= (iy[i*N0+j]!=0)<<i;
 694     return collapse_mask;
 695 }
 696
 697 static inline void celt_renormalize_vector(float *X, int N, float gain)
 698 {
 699     int i;
 700     float g = 1e-15f;
 701     for (i = 0; i < N; i++)
 702         g += X[i] * X[i];
 703     g = gain / sqrtf(g);
 704
 705     for (i = 0; i < N; i++)
 706         X[i] *= g;
 707 }
 708
 709 static inline void celt_stereo_merge(float *X, float *Y, float mid, int N)
 710 {
 711     int i;
 712     float xp = 0, side = 0;
 713     float E[2];
 714     float mid2;
 715     float t, gain[2];
 716
 717     /* Compute the norm of X+Y and X-Y as |X|^2 + |Y|^2 +/- sum(xy) */
 718     for (i = 0; i < N; i++) {
 719         xp   += X[i] * Y[i];
 720         side += Y[i] * Y[i];
 721     }
 722
 723     /* Compensating for the mid normalization */
 724     xp *= mid;
 725     mid2 = mid;
 726     E[0] = mid2 * mid2 + side - 2 * xp;
 727     E[1] = mid2 * mid2 + side + 2 * xp;
 728     if (E[0] < 6e-4f || E[1] < 6e-4f) {
 729         for (i = 0; i < N; i++)
 730             Y[i] = X[i];
 731         return;
 732     }
 733
 734     t = E[0];
 735     gain[0] = 1.0f / sqrtf(t);
 736     t = E[1];
 737     gain[1] = 1.0f / sqrtf(t);
 738
 739     for (i = 0; i < N; i++) {
 740         float value[2];
 741         /* Apply mid scaling (side is already scaled) */
 742         value[0] = mid * X[i];
 743         value[1] = Y[i];
 744         X[i] = gain[0] * (value[0] - value[1]);
 745         Y[i] = gain[1] * (value[0] + value[1]);
 746     }
 747 }
 748
 749 static void celt_interleave_hadamard(float *tmp, float *X, int N0,
 750                                      int stride, int hadamard)
 751 {
 752     int i, j;
 753     int N = N0*stride;
 754
 755     if (hadamard) {
 756         const uint8_t *ordery = ff_celt_hadamard_ordery + stride - 2;
 757         for (i = 0; i < stride; i++)
 758             for (j = 0; j < N0; j++)
 759                 tmp[j*stride+i] = X[ordery[i]*N0+j];
 760     } else {
 761         for (i = 0; i < stride; i++)
 762             for (j = 0; j < N0; j++)
 763                 tmp[j*stride+i] = X[i*N0+j];
 764     }
 765
 766     for (i = 0; i < N; i++)
 767         X[i] = tmp[i];
 768 }
 769
 770 static void celt_deinterleave_hadamard(float *tmp, float *X, int N0,
 771                                        int stride, int hadamard)
 772 {
 773     int i, j;
 774     int N = N0*stride;
 775
 776     if (hadamard) {
 777         const uint8_t *ordery = ff_celt_hadamard_ordery + stride - 2;
 778         for (i = 0; i < stride; i++)
 779             for (j = 0; j < N0; j++)
 780                 tmp[ordery[i]*N0+j] = X[j*stride+i];
 781     } else {
 782         for (i = 0; i < stride; i++)
 783             for (j = 0; j < N0; j++)
 784                 tmp[i*N0+j] = X[j*stride+i];
 785     }
 786
 787     for (i = 0; i < N; i++)
 788         X[i] = tmp[i];
 789 }
 790
 791 static void celt_haar1(float *X, int N0, int stride)
 792 {
 793     int i, j;
 794     N0 >>= 1;
 795     for (i = 0; i < stride; i++) {
 796         for (j = 0; j < N0; j++) {
 797             float x0 = X[stride * (2 * j + 0) + i];
 798             float x1 = X[stride * (2 * j + 1) + i];
 799             X[stride * (2 * j + 0) + i] = (x0 + x1) * M_SQRT1_2;
 800             X[stride * (2 * j + 1) + i] = (x0 - x1) * M_SQRT1_2;
 801         }
 802     }
 803 }
 804
 805 static inline int celt_compute_qn(int N, int b, int offset, int pulse_cap,
 806                                   int dualstereo)
 807 {
 808     int qn, qb;
 809     int N2 = 2 * N - 1;
 810     if (dualstereo && N == 2)
 811         N2--;
 812
 813     /* The upper limit ensures that in a stereo split with itheta==16384, we'll
 814      * always have enough bits left over to code at least one pulse in the
 815      * side; otherwise it would collapse, since it doesn't get folded. */
 816     qb = FFMIN3(b - pulse_cap - (4 << 3), (b + N2 * offset) / N2, 8 << 3);
 817     qn = (qb < (1 << 3 >> 1)) ? 1 : ((ff_celt_qn_exp2[qb & 0x7] >> (14 - (qb >> 3))) + 1) >> 1 << 1;
 818     return qn;
 819 }
 820
 821 // this code was adapted from libopus
 822 static inline uint64_t celt_cwrsi(unsigned int N, unsigned int K, unsigned int i, int *y)
 823 {
 824     uint64_t norm = 0;
 825     uint32_t p;
 826     int s, val;
 827     int k0;
 828
 829     while (N > 2) {
 830         uint32_t q;
 831
 832         /*Lots of pulses case:*/
 833         if (K >= N) {
 834             const uint32_t *row = ff_celt_pvq_u_row[N];
 835
 836             /* Are the pulses in this dimension negative? */
 837             p  = row[K + 1];
 838             s  = -(i >= p);
 839             i -= p & s;
 840
 841             /*Count how many pulses were placed in this dimension.*/
 842             k0 = K;
 843             q = row[N];
 844             if (q > i) {
 845                 K = N;
 846                 do {
 847                     p = ff_celt_pvq_u_row[--K][N];
 848                 } while (p > i);
 849             } else
 850                 for (p = row[K]; p > i; p = row[K])
 851                     K--;
 852
 853             i    -= p;
 854             val   = (k0 - K + s) ^ s;
 855             norm += val * val;
 856             *y++  = val;
 857         } else { /*Lots of dimensions case:*/
 858             /*Are there any pulses in this dimension at all?*/
 859             p = ff_celt_pvq_u_row[K    ][N];
 860             q = ff_celt_pvq_u_row[K + 1][N];
 861
 862             if (p <= i && i < q) {
 863                 i -= p;
 864                 *y++ = 0;
 865             } else {
 866                 /*Are the pulses in this dimension negative?*/
 867                 s  = -(i >= q);
 868                 i -= q & s;
 869
 870                 /*Count how many pulses were placed in this dimension.*/
 871                 k0 = K;
 872                 do p = ff_celt_pvq_u_row[--K][N];
 873                 while (p > i);
 874
 875                 i    -= p;
 876                 val   = (k0 - K + s) ^ s;
 877                 norm += val * val;
 878                 *y++  = val;
 879             }
 880         }
 881         N--;
 882     }
 883
 884     /* N == 2 */
 885     p  = 2 * K + 1;
 886     s  = -(i >= p);
 887     i -= p & s;
 888     k0 = K;
 889     K  = (i + 1) / 2;
 890
 891     if (K)
 892         i -= 2 * K - 1;
 893
 894     val   = (k0 - K + s) ^ s;
 895     norm += val * val;
 896     *y++  = val;
 897
 898     /* N==1 */
 899     s     = -i;
 900     val   = (K + s) ^ s;
 901     norm += val * val;
 902     *y    = val;
 903
 904     return norm;
 905 }
 906
 907 static inline float celt_decode_pulses(OpusRangeCoder *rc, int *y, unsigned int N, unsigned int K)
 908 {
 909     unsigned int idx;
 910 #define CELT_PVQ_U(n, k) (ff_celt_pvq_u_row[FFMIN(n, k)][FFMAX(n, k)])
 911 #define CELT_PVQ_V(n, k) (CELT_PVQ_U(n, k) + CELT_PVQ_U(n, (k) + 1))
 912     idx = ff_opus_rc_dec_uint(rc, CELT_PVQ_V(N, K));
 913     return celt_cwrsi(N, K, idx, y);
 914 }
 915
 916 /** Decode pulse vector and combine the result with the pitch vector to produce
 917     the final normalised signal in the current band. */
 918 static inline unsigned int celt_alg_unquant(OpusRangeCoder *rc, float *X,
 919                                             unsigned int N, unsigned int K,
 920                                             enum CeltSpread spread,
 921                                             unsigned int blocks, float gain)
 922 {
 923     int y[176];
 924
 925     gain /= sqrtf(celt_decode_pulses(rc, y, N, K));
 926     celt_normalize_residual(y, X, N, gain);
 927     celt_exp_rotation(X, N, blocks, K, spread);
 928     return celt_extract_collapse_mask(y, N, blocks);
 929 }
 930
 931 static unsigned int celt_decode_band(CeltContext *s, OpusRangeCoder *rc,
 932                                      const int band, float *X, float *Y,
 933                                      int N, int b, unsigned int blocks,
 934                                      float *lowband, int duration,
 935                                      float *lowband_out, int level,
 936                                      float gain, float *lowband_scratch,
 937                                      int fill)
 938 {
 939     const uint8_t *cache;
 940     int dualstereo, split;
 941     int imid = 0, iside = 0;
 942     unsigned int N0 = N;
 943     int N_B;
 944     int N_B0;
 945     int B0 = blocks;
 946     int time_divide = 0;
 947     int recombine = 0;
 948     int inv = 0;
 949     float mid = 0, side = 0;
 950     int longblocks = (B0 == 1);
 951     unsigned int cm = 0;
 952
 953     N_B0 = N_B = N / blocks;
 954     split = dualstereo = (Y != NULL);
 955
 956     if (N == 1) {
 957         /* special case for one sample */
 958         int i;
 959         float *x = X;
 960         for (i = 0; i <= dualstereo; i++) {
 961             int sign = 0;
 962             if (s->remaining2 >= 1<<3) {
 963                 sign           = ff_opus_rc_get_raw(rc, 1);
 964                 s->remaining2 -= 1 << 3;
 965                 b             -= 1 << 3;
 966             }
 967             x[0] = sign ? -1.0f : 1.0f;
 968             x = Y;
 969         }
 970         if (lowband_out)
 971             lowband_out[0] = X[0];
 972         return 1;
 973     }
 974
 975     if (!dualstereo && level == 0) {
 976         int tf_change = s->tf_change[band];
 977         int k;
 978         if (tf_change > 0)
 979             recombine = tf_change;
 980         /* Band recombining to increase frequency resolution */
 981
 982         if (lowband &&
 983             (recombine || ((N_B & 1) == 0 && tf_change < 0) || B0 > 1)) {
 984             int j;
 985             for (j = 0; j < N; j++)
 986                 lowband_scratch[j] = lowband[j];
 987             lowband = lowband_scratch;
 988         }
 989
 990         for (k = 0; k < recombine; k++) {
 991             if (lowband)
 992                 celt_haar1(lowband, N >> k, 1 << k);
 993             fill = ff_celt_bit_interleave[fill & 0xF] | ff_celt_bit_interleave[fill >> 4] << 2;
 994         }
 995         blocks >>= recombine;
 996         N_B <<= recombine;
 997
 998         /* Increasing the time resolution */
 999         while ((N_B & 1) == 0 && tf_change < 0) {
1000             if (lowband)
1001                 celt_haar1(lowband, N_B, blocks);
1002             fill |= fill << blocks;
1003             blocks <<= 1;
1004             N_B >>= 1;
1005             time_divide++;
1006             tf_change++;
1007         }
1008         B0 = blocks;
1009         N_B0 = N_B;
1010
1011         /* Reorganize the samples in time order instead of frequency order */
1012         if (B0 > 1 && lowband)
1013             celt_deinterleave_hadamard(s->scratch, lowband, N_B >> recombine,
1014                                        B0 << recombine, longblocks);
1015     }
1016
1017     /* If we need 1.5 more bit than we can produce, split the band in two. */
1018     cache = ff_celt_cache_bits +
1019             ff_celt_cache_index[(duration + 1) * CELT_MAX_BANDS + band];
1020     if (!dualstereo && duration >= 0 && b > cache[cache[0]] + 12 && N > 2) {
1021         N >>= 1;
1022         Y = X + N;
1023         split = 1;
1024         duration -= 1;
1025         if (blocks == 1)
1026             fill = (fill & 1) | (fill << 1);
1027         blocks = (blocks + 1) >> 1;
1028     }
1029
1030     if (split) {
1031         int qn;
1032         int itheta = 0;
1033         int mbits, sbits, delta;
1034         int qalloc;
1035         int pulse_cap;
1036         int offset;
1037         int orig_fill;
1038         int tell;
1039
1040         /* Decide on the resolution to give to the split parameter theta */
1041         pulse_cap = ff_celt_log_freq_range[band] + duration * 8;
1042         offset = (pulse_cap >> 1) - (dualstereo && N == 2 ? CELT_QTHETA_OFFSET_TWOPHASE :
1043                                                           CELT_QTHETA_OFFSET);
1044         qn = (dualstereo && band >= s->intensitystereo) ? 1 :
1045              celt_compute_qn(N, b, offset, pulse_cap, dualstereo);
1046         tell = opus_rc_tell_frac(rc);
1047         if (qn != 1) {
1048             /* Entropy coding of the angle. We use a uniform pdf for the
1049             time split, a step for stereo, and a triangular one for the rest. */
1050             if (dualstereo && N > 2)
1051                 itheta = ff_opus_rc_dec_uint_step(rc, qn/2);
1052             else if (dualstereo || B0 > 1)
1053                 itheta = ff_opus_rc_dec_uint(rc, qn+1);
1054             else
1055                 itheta = ff_opus_rc_dec_uint_tri(rc, qn);
1056             itheta = itheta * 16384 / qn;
1057             /* NOTE: Renormalising X and Y *may* help fixed-point a bit at very high rate.
1058             Let's do that at higher complexity */
1059         } else if (dualstereo) {
1060             inv = (b > 2 << 3 && s->remaining2 > 2 << 3) ? ff_opus_rc_dec_log(rc, 2) : 0;
1061             itheta = 0;
1062         }
1063         qalloc = opus_rc_tell_frac(rc) - tell;
1064         b -= qalloc;
1065
1066         orig_fill = fill;
1067         if (itheta == 0) {
1068             imid = 32767;
1069             iside = 0;
1070             fill = av_mod_uintp2(fill, blocks);
1071             delta = -16384;
1072         } else if (itheta == 16384) {
1073             imid = 0;
1074             iside = 32767;
1075             fill &= ((1 << blocks) - 1) << blocks;
1076             delta = 16384;
1077         } else {
1078             imid = celt_cos(itheta);
1079             iside = celt_cos(16384-itheta);
1080             /* This is the mid vs side allocation that minimizes squared error
1081             in that band. */
1082             delta = ROUND_MUL16((N - 1) << 7, celt_log2tan(iside, imid));
1083         }
1084
1085         mid  = imid  / 32768.0f;
1086         side = iside / 32768.0f;
1087
1088         /* This is a special case for N=2 that only works for stereo and takes
1089         advantage of the fact that mid and side are orthogonal to encode
1090         the side with just one bit. */
1091         if (N == 2 && dualstereo) {
1092             int c;
1093             int sign = 0;
1094             float tmp;
1095             float *x2, *y2;
1096             mbits = b;
1097             /* Only need one bit for the side */
1098             sbits = (itheta != 0 && itheta != 16384) ? 1 << 3 : 0;
1099             mbits -= sbits;
1100             c = (itheta > 8192);
1101             s->remaining2 -= qalloc+sbits;
1102
1103             x2 = c ? Y : X;
1104             y2 = c ? X : Y;
1105             if (sbits)
1106                 sign = ff_opus_rc_get_raw(rc, 1);
1107             sign = 1 - 2 * sign;
1108             /* We use orig_fill here because we want to fold the side, but if
1109             itheta==16384, we'll have cleared the low bits of fill. */
1110             cm = celt_decode_band(s, rc, band, x2, NULL, N, mbits, blocks,
1111                                   lowband, duration, lowband_out, level, gain,
1112                                   lowband_scratch, orig_fill);
1113             /* We don't split N=2 bands, so cm is either 1 or 0 (for a fold-collapse),
1114             and there's no need to worry about mixing with the other channel. */
1115             y2[0] = -sign * x2[1];
1116             y2[1] =  sign * x2[0];
1117             X[0] *= mid;
1118             X[1] *= mid;
1119             Y[0] *= side;
1120             Y[1] *= side;
1121             tmp = X[0];
1122             X[0] = tmp - Y[0];
1123             Y[0] = tmp + Y[0];
1124             tmp = X[1];
1125             X[1] = tmp - Y[1];
1126             Y[1] = tmp + Y[1];
1127         } else {
1128             /* "Normal" split code */
1129             float *next_lowband2     = NULL;
1130             float *next_lowband_out1 = NULL;
1131             int next_level = 0;
1132             int rebalance;
1133
1134             /* Give more bits to low-energy MDCTs than they would
1135              * otherwise deserve */
1136             if (B0 > 1 && !dualstereo && (itheta & 0x3fff)) {
1137                 if (itheta > 8192)
1138                     /* Rough approximation for pre-echo masking */
1139                     delta -= delta >> (4 - duration);
1140                 else
1141                     /* Corresponds to a forward-masking slope of
1142                      * 1.5 dB per 10 ms */
1143                     delta = FFMIN(0, delta + (N << 3 >> (5 - duration)));
1144             }
1145             mbits = av_clip((b - delta) / 2, 0, b);
1146             sbits = b - mbits;
1147             s->remaining2 -= qalloc;
1148
1149             if (lowband && !dualstereo)
1150                 next_lowband2 = lowband + N; /* >32-bit split case */
1151
1152             /* Only stereo needs to pass on lowband_out.
1153              * Otherwise, it's handled at the end */
1154             if (dualstereo)
1155                 next_lowband_out1 = lowband_out;
1156             else
1157                 next_level = level + 1;
1158
1159             rebalance = s->remaining2;
1160             if (mbits >= sbits) {
1161                 /* In stereo mode, we do not apply a scaling to the mid
1162                  * because we need the normalized mid for folding later */
1163                 cm = celt_decode_band(s, rc, band, X, NULL, N, mbits, blocks,
1164                                       lowband, duration, next_lowband_out1,
1165                                       next_level, dualstereo ? 1.0f : (gain * mid),
1166                                       lowband_scratch, fill);
1167
1168                 rebalance = mbits - (rebalance - s->remaining2);
1169                 if (rebalance > 3 << 3 && itheta != 0)
1170                     sbits += rebalance - (3 << 3);
1171
1172                 /* For a stereo split, the high bits of fill are always zero,
1173                  * so no folding will be done to the side. */
1174                 cm |= celt_decode_band(s, rc, band, Y, NULL, N, sbits, blocks,
1175                                        next_lowband2, duration, NULL,
1176                                        next_level, gain * side, NULL,
1177                                        fill >> blocks) << ((B0 >> 1) & (dualstereo - 1));
1178             } else {
1179                 /* For a stereo split, the high bits of fill are always zero,
1180                  * so no folding will be done to the side. */
1181                 cm = celt_decode_band(s, rc, band, Y, NULL, N, sbits, blocks,
1182                                       next_lowband2, duration, NULL,
1183                                       next_level, gain * side, NULL,
1184                                       fill >> blocks) << ((B0 >> 1) & (dualstereo - 1));
1185
1186                 rebalance = sbits - (rebalance - s->remaining2);
1187                 if (rebalance > 3 << 3 && itheta != 16384)
1188                     mbits += rebalance - (3 << 3);
1189
1190                 /* In stereo mode, we do not apply a scaling to the mid because
1191                  * we need the normalized mid for folding later */
1192                 cm |= celt_decode_band(s, rc, band, X, NULL, N, mbits, blocks,
1193                                        lowband, duration, next_lowband_out1,
1194                                        next_level, dualstereo ? 1.0f : (gain * mid),
1195                                        lowband_scratch, fill);
1196             }
1197         }
1198     } else {
1199         /* This is the basic no-split case */
1200         unsigned int q         = celt_bits2pulses(cache, b);
1201         unsigned int curr_bits = celt_pulses2bits(cache, q);
1202         s->remaining2 -= curr_bits;
1203
1204         /* Ensures we can never bust the budget */
1205         while (s->remaining2 < 0 && q > 0) {
1206             s->remaining2 += curr_bits;
1207             curr_bits      = celt_pulses2bits(cache, --q);
1208             s->remaining2 -= curr_bits;
1209         }
1210
1211         if (q != 0) {
1212             /* Finally do the actual quantization */
1213             cm = celt_alg_unquant(rc, X, N, (q < 8) ? q : (8 + (q & 7)) << ((q >> 3) - 1),
1214                                   s->spread, blocks, gain);
1215         } else {
1216             /* If there's no pulse, fill the band anyway */
1217             int j;
1218             unsigned int cm_mask = (1 << blocks) - 1;
1219             fill &= cm_mask;
1220             if (!fill) {
1221                 for (j = 0; j < N; j++)
1222                     X[j] = 0.0f;
1223             } else {
1224                 if (!lowband) {
1225                     /* Noise */
1226                     for (j = 0; j < N; j++)
1227                         X[j] = (((int32_t)celt_rng(s)) >> 20);
1228                     cm = cm_mask;
1229                 } else {
1230                     /* Folded spectrum */
1231                     for (j = 0; j < N; j++) {
1232                         /* About 48 dB below the "normal" folding level */
1233                         X[j] = lowband[j] + (((celt_rng(s)) & 0x8000) ? 1.0f / 256 : -1.0f / 256);
1234                     }
1235                     cm = fill;
1236                 }
1237                 celt_renormalize_vector(X, N, gain);
1238             }
1239         }
1240     }
1241
1242     /* This code is used by the decoder and by the resynthesis-enabled encoder */
1243     if (dualstereo) {
1244         int j;
1245         if (N != 2)
1246             celt_stereo_merge(X, Y, mid, N);
1247         if (inv) {
1248             for (j = 0; j < N; j++)
1249                 Y[j] *= -1;
1250         }
1251     } else if (level == 0) {
1252         int k;
1253
1254         /* Undo the sample reorganization going from time order to frequency order */
1255         if (B0 > 1)
1256             celt_interleave_hadamard(s->scratch, X, N_B>>recombine,
1257                                      B0<<recombine, longblocks);
1258
1259         /* Undo time-freq changes that we did earlier */
1260         N_B = N_B0;
1261         blocks = B0;
1262         for (k = 0; k < time_divide; k++) {
1263             blocks >>= 1;
1264             N_B <<= 1;
1265             cm |= cm >> blocks;
1266             celt_haar1(X, N_B, blocks);
1267         }
1268
1269         for (k = 0; k < recombine; k++) {
1270             cm = ff_celt_bit_deinterleave[cm];
1271             celt_haar1(X, N0>>k, 1<<k);
1272         }
1273         blocks <<= recombine;
1274
1275         /* Scale output for later folding */
1276         if (lowband_out) {
1277             int j;
1278             float n = sqrtf(N0);
1279             for (j = 0; j < N0; j++)
1280                 lowband_out[j] = n * X[j];
1281         }
1282         cm = av_mod_uintp2(cm, blocks);
1283     }
1284     return cm;
1285 }
1286
1287 static void celt_denormalize(CeltContext *s, CeltFrame *frame, float *data)
1288 {
1289     int i, j;
1290
1291     for (i = s->startband; i < s->endband; i++) {
1292         float *dst = data + (ff_celt_freq_bands[i] << s->duration);
1293         float norm = exp2(frame->energy[i] + ff_celt_mean_energy[i]);
1294
1295         for (j = 0; j < ff_celt_freq_range[i] << s->duration; j++)
1296             dst[j] *= norm;
1297     }
1298 }
1299
1300 static void celt_postfilter_apply_transition(CeltFrame *frame, float *data)
1301 {
1302     const int T0 = frame->pf_period_old;
1303     const int T1 = frame->pf_period;
1304
1305     float g00, g01, g02;
1306     float g10, g11, g12;
1307
1308     float x0, x1, x2, x3, x4;
1309
1310     int i;
1311
1312     if (frame->pf_gains[0]     == 0.0 &&
1313         frame->pf_gains_old[0] == 0.0)
1314         return;
1315
1316     g00 = frame->pf_gains_old[0];
1317     g01 = frame->pf_gains_old[1];
1318     g02 = frame->pf_gains_old[2];
1319     g10 = frame->pf_gains[0];
1320     g11 = frame->pf_gains[1];
1321     g12 = frame->pf_gains[2];
1322
1323     x1 = data[-T1 + 1];
1324     x2 = data[-T1];
1325     x3 = data[-T1 - 1];
1326     x4 = data[-T1 - 2];
1327
1328     for (i = 0; i < CELT_OVERLAP; i++) {
1329         float w = ff_celt_window2[i];
1330         x0 = data[i - T1 + 2];
1331
1332         data[i] +=  (1.0 - w) * g00 * data[i - T0]                          +
1333                     (1.0 - w) * g01 * (data[i - T0 - 1] + data[i - T0 + 1]) +
1334                     (1.0 - w) * g02 * (data[i - T0 - 2] + data[i - T0 + 2]) +
1335                     w         * g10 * x2                                    +
1336                     w         * g11 * (x1 + x3)                             +
1337                     w         * g12 * (x0 + x4);
1338         x4 = x3;
1339         x3 = x2;
1340         x2 = x1;
1341         x1 = x0;
1342     }
1343 }
1344
1345 static void celt_postfilter_apply(CeltFrame *frame,
1346                                   float *data, int len)
1347 {
1348     const int T = frame->pf_period;
1349     float g0, g1, g2;
1350     float x0, x1, x2, x3, x4;
1351     int i;
1352
1353     if (frame->pf_gains[0] == 0.0 || len <= 0)
1354         return;
1355
1356     g0 = frame->pf_gains[0];
1357     g1 = frame->pf_gains[1];
1358     g2 = frame->pf_gains[2];
1359
1360     x4 = data[-T - 2];
1361     x3 = data[-T - 1];
1362     x2 = data[-T];
1363     x1 = data[-T + 1];
1364
1365     for (i = 0; i < len; i++) {
1366         x0 = data[i - T + 2];
1367         data[i] += g0 * x2        +
1368                    g1 * (x1 + x3) +
1369                    g2 * (x0 + x4);
1370         x4 = x3;
1371         x3 = x2;
1372         x2 = x1;
1373         x1 = x0;
1374     }
1375 }
1376
1377 static void celt_postfilter(CeltContext *s, CeltFrame *frame)
1378 {
1379     int len = s->blocksize * s->blocks;
1380
1381     celt_postfilter_apply_transition(frame, frame->buf + 1024);
1382
1383     frame->pf_period_old = frame->pf_period;
1384     memcpy(frame->pf_gains_old, frame->pf_gains, sizeof(frame->pf_gains));
1385
1386     frame->pf_period = frame->pf_period_new;
1387     memcpy(frame->pf_gains, frame->pf_gains_new, sizeof(frame->pf_gains));
1388
1389     if (len > CELT_OVERLAP) {
1390         celt_postfilter_apply_transition(frame, frame->buf + 1024 + CELT_OVERLAP);
1391         celt_postfilter_apply(frame, frame->buf + 1024 + 2 * CELT_OVERLAP,
1392                               len - 2 * CELT_OVERLAP);
1393
1394         frame->pf_period_old = frame->pf_period;
1395         memcpy(frame->pf_gains_old, frame->pf_gains, sizeof(frame->pf_gains));
1396     }
1397
1398     memmove(frame->buf, frame->buf + len, (1024 + CELT_OVERLAP / 2) * sizeof(float));
1399 }
1400
1401 static int parse_postfilter(CeltContext *s, OpusRangeCoder *rc, int consumed)
1402 {
1403     static const float postfilter_taps[3][3] = {
1404         { 0.3066406250f, 0.2170410156f, 0.1296386719f },
1405         { 0.4638671875f, 0.2680664062f, 0.0           },
1406         { 0.7998046875f, 0.1000976562f, 0.0           }
1407     };
1408     int i;
1409
1410     memset(s->frame[0].pf_gains_new, 0, sizeof(s->frame[0].pf_gains_new));
1411     memset(s->frame[1].pf_gains_new, 0, sizeof(s->frame[1].pf_gains_new));
1412
1413     if (s->startband == 0 && consumed + 16 <= s->framebits) {
1414         int has_postfilter = ff_opus_rc_dec_log(rc, 1);
1415         if (has_postfilter) {
1416             float gain;
1417             int tapset, octave, period;
1418
1419             octave = ff_opus_rc_dec_uint(rc, 6);
1420             period = (16 << octave) + ff_opus_rc_get_raw(rc, 4 + octave) - 1;
1421             gain   = 0.09375f * (ff_opus_rc_get_raw(rc, 3) + 1);
1422             tapset = (opus_rc_tell(rc) + 2 <= s->framebits) ?
1423                      ff_opus_rc_dec_cdf(rc, ff_celt_model_tapset) : 0;
1424
1425             for (i = 0; i < 2; i++) {
1426                 CeltFrame *frame = &s->frame[i];
1427
1428                 frame->pf_period_new = FFMAX(period, CELT_POSTFILTER_MINPERIOD);
1429                 frame->pf_gains_new[0] = gain * postfilter_taps[tapset][0];
1430                 frame->pf_gains_new[1] = gain * postfilter_taps[tapset][1];
1431                 frame->pf_gains_new[2] = gain * postfilter_taps[tapset][2];
1432             }
1433         }
1434
1435         consumed = opus_rc_tell(rc);
1436     }
1437
1438     return consumed;
1439 }
1440
1441 static void process_anticollapse(CeltContext *s, CeltFrame *frame, float *X)
1442 {
1443     int i, j, k;
1444
1445     for (i = s->startband; i < s->endband; i++) {
1446         int renormalize = 0;
1447         float *xptr;
1448         float prev[2];
1449         float Ediff, r;
1450         float thresh, sqrt_1;
1451         int depth;
1452
1453         /* depth in 1/8 bits */
1454         depth = (1 + s->pulses[i]) / (ff_celt_freq_range[i] << s->duration);
1455         thresh = exp2f(-1.0 - 0.125f * depth);
1456         sqrt_1 = 1.0f / sqrtf(ff_celt_freq_range[i] << s->duration);
1457
1458         xptr = X + (ff_celt_freq_bands[i] << s->duration);
1459
1460         prev[0] = frame->prev_energy[0][i];
1461         prev[1] = frame->prev_energy[1][i];
1462         if (s->coded_channels == 1) {
1463             CeltFrame *frame1 = &s->frame[1];
1464
1465             prev[0] = FFMAX(prev[0], frame1->prev_energy[0][i]);
1466             prev[1] = FFMAX(prev[1], frame1->prev_energy[1][i]);
1467         }
1468         Ediff = frame->energy[i] - FFMIN(prev[0], prev[1]);
1469         Ediff = FFMAX(0, Ediff);
1470
1471         /* r needs to be multiplied by 2 or 2*sqrt(2) depending on LM because
1472         short blocks don't have the same energy as long */
1473         r = exp2(1 - Ediff);
1474         if (s->duration == 3)
1475             r *= M_SQRT2;
1476         r = FFMIN(thresh, r) * sqrt_1;
1477         for (k = 0; k < 1 << s->duration; k++) {
1478             /* Detect collapse */
1479             if (!(frame->collapse_masks[i] & 1 << k)) {
1480                 /* Fill with noise */
1481                 for (j = 0; j < ff_celt_freq_range[i]; j++)
1482                     xptr[(j << s->duration) + k] = (celt_rng(s) & 0x8000) ? r : -r;
1483                 renormalize = 1;
1484             }
1485         }
1486
1487         /* We just added some energy, so we need to renormalize */
1488         if (renormalize)
1489             celt_renormalize_vector(xptr, ff_celt_freq_range[i] << s->duration, 1.0f);
1490     }
1491 }
1492
1493 static void celt_decode_bands(CeltContext *s, OpusRangeCoder *rc)
1494 {
1495     float lowband_scratch[8 * 22];
1496     float norm[2 * 8 * 100];
1497
1498     int totalbits = (s->framebits << 3) - s->anticollapse_bit;
1499
1500     int update_lowband = 1;
1501     int lowband_offset = 0;
1502
1503     int i, j;
1504
1505     memset(s->coeffs, 0, sizeof(s->coeffs));
1506
1507     for (i = s->startband; i < s->endband; i++) {
1508         int band_offset = ff_celt_freq_bands[i] << s->duration;
1509         int band_size   = ff_celt_freq_range[i] << s->duration;
1510         float *X = s->coeffs[0] + band_offset;
1511         float *Y = (s->coded_channels == 2) ? s->coeffs[1] + band_offset : NULL;
1512
1513         int consumed = opus_rc_tell_frac(rc);
1514         float *norm2 = norm + 8 * 100;
1515         int effective_lowband = -1;
1516         unsigned int cm[2];
1517         int b;
1518
1519         /* Compute how many bits we want to allocate to this band */
1520         if (i != s->startband)
1521             s->remaining -= consumed;
1522         s->remaining2 = totalbits - consumed - 1;
1523         if (i <= s->codedbands - 1) {
1524             int curr_balance = s->remaining / FFMIN(3, s->codedbands-i);
1525             b = av_clip_uintp2(FFMIN(s->remaining2 + 1, s->pulses[i] + curr_balance), 14);
1526         } else
1527             b = 0;
1528
1529         if (ff_celt_freq_bands[i] - ff_celt_freq_range[i] >= ff_celt_freq_bands[s->startband] &&
1530             (update_lowband || lowband_offset == 0))
1531             lowband_offset = i;
1532
1533         /* Get a conservative estimate of the collapse_mask's for the bands we're
1534         going to be folding from. */
1535         if (lowband_offset != 0 && (s->spread != CELT_SPREAD_AGGRESSIVE ||
1536                                     s->blocks > 1 || s->tf_change[i] < 0)) {
1537             int foldstart, foldend;
1538
1539             /* This ensures we never repeat spectral content within one band */
1540             effective_lowband = FFMAX(ff_celt_freq_bands[s->startband],
1541                                       ff_celt_freq_bands[lowband_offset] - ff_celt_freq_range[i]);
1542             foldstart = lowband_offset;
1543             while (ff_celt_freq_bands[--foldstart] > effective_lowband);
1544             foldend = lowband_offset - 1;
1545             while (ff_celt_freq_bands[++foldend] < effective_lowband + ff_celt_freq_range[i]);
1546
1547             cm[0] = cm[1] = 0;
1548             for (j = foldstart; j < foldend; j++) {
1549                 cm[0] |= s->frame[0].collapse_masks[j];
1550                 cm[1] |= s->frame[s->coded_channels - 1].collapse_masks[j];
1551             }
1552         } else
1553             /* Otherwise, we'll be using the LCG to fold, so all blocks will (almost
1554             always) be non-zero.*/
1555             cm[0] = cm[1] = (1 << s->blocks) - 1;
1556
1557         if (s->dualstereo && i == s->intensitystereo) {
1558             /* Switch off dual stereo to do intensity */
1559             s->dualstereo = 0;
1560             for (j = ff_celt_freq_bands[s->startband] << s->duration; j < band_offset; j++)
1561                 norm[j] = (norm[j] + norm2[j]) / 2;
1562         }
1563
1564         if (s->dualstereo) {
1565             cm[0] = celt_decode_band(s, rc, i, X, NULL, band_size, b / 2, s->blocks,
1566                                      effective_lowband != -1 ? norm + (effective_lowband << s->duration) : NULL, s->duration,
1567             norm + band_offset, 0, 1.0f, lowband_scratch, cm[0]);
1568
1569             cm[1] = celt_decode_band(s, rc, i, Y, NULL, band_size, b/2, s->blocks,
1570                                      effective_lowband != -1 ? norm2 + (effective_lowband << s->duration) : NULL, s->duration,
1571             norm2 + band_offset, 0, 1.0f, lowband_scratch, cm[1]);
1572         } else {
1573             cm[0] = celt_decode_band(s, rc, i, X, Y, band_size, b, s->blocks,
1574             effective_lowband != -1 ? norm + (effective_lowband << s->duration) : NULL, s->duration,
1575             norm + band_offset, 0, 1.0f, lowband_scratch, cm[0]|cm[1]);
1576
1577             cm[1] = cm[0];
1578         }
1579
1580         s->frame[0].collapse_masks[i]                     = (uint8_t)cm[0];
1581         s->frame[s->coded_channels - 1].collapse_masks[i] = (uint8_t)cm[1];
1582         s->remaining += s->pulses[i] + consumed;
1583
1584         /* Update the folding position only as long as we have 1 bit/sample depth */
1585         update_lowband = (b > band_size << 3);
1586     }
1587 }
1588
1589 int ff_celt_decode_frame(CeltContext *s, OpusRangeCoder *rc,
1590                          float **output, int coded_channels, int frame_size,
1591                          int startband,  int endband)
1592 {
1593     int i, j;
1594
1595     int consumed;           // bits of entropy consumed thus far for this frame
1596     int silence = 0;
1597     int transient = 0;
1598     int anticollapse = 0;
1599     IMDCT15Context *imdct;
1600     float imdct_scale = 1.0;
1601
1602     if (coded_channels != 1 && coded_channels != 2) {
1603         av_log(s->avctx, AV_LOG_ERROR, "Invalid number of coded channels: %d\n",
1604                coded_channels);
1605         return AVERROR_INVALIDDATA;
1606     }
1607     if (startband < 0 || startband > endband || endband > CELT_MAX_BANDS) {
1608         av_log(s->avctx, AV_LOG_ERROR, "Invalid start/end band: %d %d\n",
1609                startband, endband);
1610         return AVERROR_INVALIDDATA;
1611     }
1612
1613     s->flushed        = 0;
1614     s->coded_channels = coded_channels;
1615     s->startband      = startband;
1616     s->endband        = endband;
1617     s->framebits      = rc->rb.bytes * 8;
1618
1619     s->duration = av_log2(frame_size / CELT_SHORT_BLOCKSIZE);
1620     if (s->duration > CELT_MAX_LOG_BLOCKS ||
1621         frame_size != CELT_SHORT_BLOCKSIZE * (1 << s->duration)) {
1622         av_log(s->avctx, AV_LOG_ERROR, "Invalid CELT frame size: %d\n",
1623                frame_size);
1624         return AVERROR_INVALIDDATA;
1625     }
1626
1627     if (!s->output_channels)
1628         s->output_channels = coded_channels;
1629
1630     memset(s->frame[0].collapse_masks, 0, sizeof(s->frame[0].collapse_masks));
1631     memset(s->frame[1].collapse_masks, 0, sizeof(s->frame[1].collapse_masks));
1632
1633     consumed = opus_rc_tell(rc);
1634
1635     /* obtain silence flag */
1636     if (consumed >= s->framebits)
1637         silence = 1;
1638     else if (consumed == 1)
1639         silence = ff_opus_rc_dec_log(rc, 15);
1640
1641
1642     if (silence) {
1643         consumed = s->framebits;
1644         rc->total_read_bits += s->framebits - opus_rc_tell(rc);
1645     }
1646
1647     /* obtain post-filter options */
1648     consumed = parse_postfilter(s, rc, consumed);
1649
1650     /* obtain transient flag */
1651     if (s->duration != 0 && consumed+3 <= s->framebits)
1652         transient = ff_opus_rc_dec_log(rc, 3);
1653
1654     s->blocks    = transient ? 1 << s->duration : 1;
1655     s->blocksize = frame_size / s->blocks;
1656
1657     imdct = s->imdct[transient ? 0 : s->duration];
1658
1659     if (coded_channels == 1) {
1660         for (i = 0; i < CELT_MAX_BANDS; i++)
1661             s->frame[0].energy[i] = FFMAX(s->frame[0].energy[i], s->frame[1].energy[i]);
1662     }
1663
1664     celt_decode_coarse_energy(s, rc);
1665     celt_decode_tf_changes   (s, rc, transient);
1666     celt_decode_allocation   (s, rc);
1667     celt_decode_fine_energy  (s, rc);
1668     celt_decode_bands        (s, rc);
1669
1670     if (s->anticollapse_bit)
1671         anticollapse = ff_opus_rc_get_raw(rc, 1);
1672
1673     celt_decode_final_energy(s, rc, s->framebits - opus_rc_tell(rc));
1674
1675     /* apply anti-collapse processing and denormalization to
1676      * each coded channel */
1677     for (i = 0; i < s->coded_channels; i++) {
1678         CeltFrame *frame = &s->frame[i];
1679
1680         if (anticollapse)
1681             process_anticollapse(s, frame, s->coeffs[i]);
1682
1683         celt_denormalize(s, frame, s->coeffs[i]);
1684     }
1685
1686     /* stereo -> mono downmix */
1687     if (s->output_channels < s->coded_channels) {
1688         s->dsp->vector_fmac_scalar(s->coeffs[0], s->coeffs[1], 1.0, FFALIGN(frame_size, 16));
1689         imdct_scale = 0.5;
1690     } else if (s->output_channels > s->coded_channels)
1691         memcpy(s->coeffs[1], s->coeffs[0], frame_size * sizeof(float));
1692
1693     if (silence) {
1694         for (i = 0; i < 2; i++) {
1695             CeltFrame *frame = &s->frame[i];
1696
1697             for (j = 0; j < FF_ARRAY_ELEMS(frame->energy); j++)
1698                 frame->energy[j] = CELT_ENERGY_SILENCE;
1699         }
1700         memset(s->coeffs, 0, sizeof(s->coeffs));
1701     }
1702
1703     /* transform and output for each output channel */
1704     for (i = 0; i < s->output_channels; i++) {
1705         CeltFrame *frame = &s->frame[i];
1706         float m = frame->deemph_coeff;
1707
1708         /* iMDCT and overlap-add */
1709         for (j = 0; j < s->blocks; j++) {
1710             float *dst  = frame->buf + 1024 + j * s->blocksize;
1711
1712             imdct->imdct_half(imdct, dst + CELT_OVERLAP / 2, s->coeffs[i] + j,
1713                               s->blocks, imdct_scale);
1714             s->dsp->vector_fmul_window(dst, dst, dst + CELT_OVERLAP / 2,
1715                                        ff_celt_window, CELT_OVERLAP / 2);
1716         }
1717
1718         /* postfilter */
1719         celt_postfilter(s, frame);
1720
1721         /* deemphasis and output scaling */
1722         for (j = 0; j < frame_size; j++) {
1723             float tmp = frame->buf[1024 - frame_size + j] + m;
1724             m = tmp * CELT_DEEMPH_COEFF;
1725             output[i][j] = tmp / 32768.;
1726         }
1727         frame->deemph_coeff = m;
1728     }
1729
1730     if (coded_channels == 1)
1731         memcpy(s->frame[1].energy, s->frame[0].energy, sizeof(s->frame[0].energy));
1732
1733     for (i = 0; i < 2; i++ ) {
1734         CeltFrame *frame = &s->frame[i];
1735
1736         if (!transient) {
1737             memcpy(frame->prev_energy[1], frame->prev_energy[0], sizeof(frame->prev_energy[0]));
1738             memcpy(frame->prev_energy[0], frame->energy,         sizeof(frame->prev_energy[0]));
1739         } else {
1740             for (j = 0; j < CELT_MAX_BANDS; j++)
1741                 frame->prev_energy[0][j] = FFMIN(frame->prev_energy[0][j], frame->energy[j]);
1742         }
1743
1744         for (j = 0; j < s->startband; j++) {
1745             frame->prev_energy[0][j] = CELT_ENERGY_SILENCE;
1746             frame->energy[j]         = 0.0;
1747         }
1748         for (j = s->endband; j < CELT_MAX_BANDS; j++) {
1749             frame->prev_energy[0][j] = CELT_ENERGY_SILENCE;
1750             frame->energy[j]         = 0.0;
1751         }
1752     }
1753
1754     s->seed = rc->range;
1755
1756     return 0;
1757 }
1758
1759 void ff_celt_flush(CeltContext *s)
1760 {
1761     int i, j;
1762
1763     if (s->flushed)
1764         return;
1765
1766     for (i = 0; i < 2; i++) {
1767         CeltFrame *frame = &s->frame[i];
1768
1769         for (j = 0; j < CELT_MAX_BANDS; j++)
1770             frame->prev_energy[0][j] = frame->prev_energy[1][j] = CELT_ENERGY_SILENCE;
1771
1772         memset(frame->energy, 0, sizeof(frame->energy));
1773         memset(frame->buf,    0, sizeof(frame->buf));
1774
1775         memset(frame->pf_gains,     0, sizeof(frame->pf_gains));
1776         memset(frame->pf_gains_old, 0, sizeof(frame->pf_gains_old));
1777         memset(frame->pf_gains_new, 0, sizeof(frame->pf_gains_new));
1778
1779         frame->deemph_coeff = 0.0;
1780     }
1781     s->seed = 0;
1782
1783     s->flushed = 1;
1784 }
1785
1786 void ff_celt_free(CeltContext **ps)
1787 {
1788     CeltContext *s = *ps;
1789     int i;
1790
1791     if (!s)
1792         return;
1793
1794     for (i = 0; i < FF_ARRAY_ELEMS(s->imdct); i++)
1795         ff_imdct15_uninit(&s->imdct[i]);
1796
1797     av_freep(&s->dsp);
1798     av_freep(ps);
1799 }
1800
1801 int ff_celt_init(AVCodecContext *avctx, CeltContext **ps, int output_channels)
1802 {
1803     CeltContext *s;
1804     int i, ret;
1805
1806     if (output_channels != 1 && output_channels != 2) {
1807         av_log(avctx, AV_LOG_ERROR, "Invalid number of output channels: %d\n",
1808                output_channels);
1809         return AVERROR(EINVAL);
1810     }
1811
1812     s = av_mallocz(sizeof(*s));
1813     if (!s)
1814         return AVERROR(ENOMEM);
1815
1816     s->avctx           = avctx;
1817     s->output_channels = output_channels;
1818
1819     for (i = 0; i < FF_ARRAY_ELEMS(s->imdct); i++) {
1820         ret = ff_imdct15_init(&s->imdct[i], i + 3);
1821         if (ret < 0)
1822             goto fail;
1823     }
1824
1825     s->dsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT);
1826     if (!s->dsp) {
1827         ret = AVERROR(ENOMEM);
1828         goto fail;
1829     }
1830
1831     ff_celt_flush(s);
1832
1833     *ps = s;
1834
1835     return 0;
1836 fail:
1837     ff_celt_free(&s);
1838     return ret;
1839 }