git.sesse.net Git - ffmpeg/blob - libavcodec/opusenc.c

   1 /*
   2  * Opus encoder
   3  * Copyright (c) 2017 Rostislav Pehlivanov <atomnuker@gmail.com>
   4  *
   5  * This file is part of FFmpeg.
   6  *
   7  * FFmpeg is free software; you can redistribute it and/or
   8  * modify it under the terms of the GNU Lesser General Public
   9  * License as published by the Free Software Foundation; either
  10  * version 2.1 of the License, or (at your option) any later version.
  11  *
  12  * FFmpeg is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15  * Lesser General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU Lesser General Public
  18  * License along with FFmpeg; if not, write to the Free Software
  19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20  */
  21
  22 #include "opusenc.h"
  23 #include "opus_pvq.h"
  24 #include "opusenc_psy.h"
  25 #include "opustab.h"
  26
  27 #include "libavutil/float_dsp.h"
  28 #include "libavutil/opt.h"
  29 #include "internal.h"
  30 #include "bytestream.h"
  31 #include "audio_frame_queue.h"
  32
  33 typedef struct OpusEncContext {
  34     AVClass *av_class;
  35     OpusEncOptions options;
  36     OpusPsyContext psyctx;
  37     AVCodecContext *avctx;
  38     AudioFrameQueue afq;
  39     AVFloatDSPContext *dsp;
  40     MDCT15Context *mdct[CELT_BLOCK_NB];
  41     CeltPVQ *pvq;
  42     struct FFBufQueue bufqueue;
  43
  44     uint8_t enc_id[64];
  45     int enc_id_bits;
  46
  47     OpusPacketInfo packet;
  48
  49     int channels;
  50
  51     CeltFrame *frame;
  52     OpusRangeCoder *rc;
  53
  54     /* Actual energy the decoder will have */
  55     float last_quantized_energy[OPUS_MAX_CHANNELS][CELT_MAX_BANDS];
  56
  57     DECLARE_ALIGNED(32, float, scratch)[2048];
  58 } OpusEncContext;
  59
  60 static void opus_write_extradata(AVCodecContext *avctx)
  61 {
  62     uint8_t *bs = avctx->extradata;
  63
  64     bytestream_put_buffer(&bs, "OpusHead", 8);
  65     bytestream_put_byte  (&bs, 0x1);
  66     bytestream_put_byte  (&bs, avctx->channels);
  67     bytestream_put_le16  (&bs, avctx->initial_padding);
  68     bytestream_put_le32  (&bs, avctx->sample_rate);
  69     bytestream_put_le16  (&bs, 0x0);
  70     bytestream_put_byte  (&bs, 0x0); /* Default layout */
  71 }
  72
  73 static int opus_gen_toc(OpusEncContext *s, uint8_t *toc, int *size, int *fsize_needed)
  74 {
  75     int i, tmp = 0x0, extended_toc = 0;
  76     static const int toc_cfg[][OPUS_MODE_NB][OPUS_BANDWITH_NB] = {
  77         /*  Silk                    Hybrid                  Celt                    Layer     */
  78         /*  NB  MB  WB SWB  FB      NB  MB  WB SWB  FB      NB  MB  WB SWB  FB      Bandwidth */
  79         { {  0,  0,  0,  0,  0 }, {  0,  0,  0,  0,  0 }, { 17,  0, 21, 25, 29 } }, /* 2.5 ms */
  80         { {  0,  0,  0,  0,  0 }, {  0,  0,  0,  0,  0 }, { 18,  0, 22, 26, 30 } }, /*   5 ms */
  81         { {  1,  5,  9,  0,  0 }, {  0,  0,  0, 13, 15 }, { 19,  0, 23, 27, 31 } }, /*  10 ms */
  82         { {  2,  6, 10,  0,  0 }, {  0,  0,  0, 14, 16 }, { 20,  0, 24, 28, 32 } }, /*  20 ms */
  83         { {  3,  7, 11,  0,  0 }, {  0,  0,  0,  0,  0 }, {  0,  0,  0,  0,  0 } }, /*  40 ms */
  84         { {  4,  8, 12,  0,  0 }, {  0,  0,  0,  0,  0 }, {  0,  0,  0,  0,  0 } }, /*  60 ms */
  85     };
  86     int cfg = toc_cfg[s->packet.framesize][s->packet.mode][s->packet.bandwidth];
  87     *fsize_needed = 0;
  88     if (!cfg)
  89         return 1;
  90     if (s->packet.frames == 2) {                                       /* 2 packets */
  91         if (s->frame[0].framebits == s->frame[1].framebits) {          /* same size */
  92             tmp = 0x1;
  93         } else {                                                  /* different size */
  94             tmp = 0x2;
  95             *fsize_needed = 1;                     /* put frame sizes in the packet */
  96         }
  97     } else if (s->packet.frames > 2) {
  98         tmp = 0x3;
  99         extended_toc = 1;
 100     }
 101     tmp |= (s->channels > 1) << 2;                                /* Stereo or mono */
 102     tmp |= (cfg - 1)         << 3;                           /* codec configuration */
 103     *toc++ = tmp;
 104     if (extended_toc) {
 105         for (i = 0; i < (s->packet.frames - 1); i++)
 106             *fsize_needed |= (s->frame[i].framebits != s->frame[i + 1].framebits);
 107         tmp = (*fsize_needed) << 7;                                /* vbr flag */
 108         tmp |= (0) << 6;                                       /* padding flag */
 109         tmp |= s->packet.frames;
 110         *toc++ = tmp;
 111     }
 112     *size = 1 + extended_toc;
 113     return 0;
 114 }
 115
 116 static void celt_frame_setup_input(OpusEncContext *s, CeltFrame *f)
 117 {
 118     int sf, ch;
 119     AVFrame *cur = NULL;
 120     const int subframesize = s->avctx->frame_size;
 121     int subframes = OPUS_BLOCK_SIZE(s->packet.framesize) / subframesize;
 122
 123     cur = ff_bufqueue_get(&s->bufqueue);
 124
 125     for (ch = 0; ch < f->channels; ch++) {
 126         CeltBlock *b = &f->block[ch];
 127         const void *input = cur->extended_data[ch];
 128         size_t bps = av_get_bytes_per_sample(cur->format);
 129         memcpy(b->overlap, input, bps*cur->nb_samples);
 130     }
 131
 132     av_frame_free(&cur);
 133
 134     for (sf = 0; sf < subframes; sf++) {
 135         if (sf != (subframes - 1))
 136             cur = ff_bufqueue_get(&s->bufqueue);
 137         else
 138             cur = ff_bufqueue_peek(&s->bufqueue, 0);
 139
 140         for (ch = 0; ch < f->channels; ch++) {
 141             CeltBlock *b = &f->block[ch];
 142             const void *input = cur->extended_data[ch];
 143             const size_t bps  = av_get_bytes_per_sample(cur->format);
 144             const size_t left = (subframesize - cur->nb_samples)*bps;
 145             const size_t len  = FFMIN(subframesize, cur->nb_samples)*bps;
 146             memcpy(&b->samples[sf*subframesize], input, len);
 147             memset(&b->samples[cur->nb_samples], 0, left);
 148         }
 149
 150         /* Last frame isn't popped off and freed yet - we need it for overlap */
 151         if (sf != (subframes - 1))
 152             av_frame_free(&cur);
 153     }
 154 }
 155
 156 /* Apply the pre emphasis filter */
 157 static void celt_apply_preemph_filter(OpusEncContext *s, CeltFrame *f)
 158 {
 159     int i, sf, ch;
 160     const int subframesize = s->avctx->frame_size;
 161     const int subframes = OPUS_BLOCK_SIZE(s->packet.framesize) / subframesize;
 162
 163     /* Filter overlap */
 164     for (ch = 0; ch < f->channels; ch++) {
 165         CeltBlock *b = &f->block[ch];
 166         float m = b->emph_coeff;
 167         for (i = 0; i < CELT_OVERLAP; i++) {
 168             float sample = b->overlap[i];
 169             b->overlap[i] = sample - m;
 170             m = sample * CELT_EMPH_COEFF;
 171         }
 172         b->emph_coeff = m;
 173     }
 174
 175     /* Filter the samples but do not update the last subframe's coeff - overlap ^^^ */
 176     for (sf = 0; sf < subframes; sf++) {
 177         for (ch = 0; ch < f->channels; ch++) {
 178             CeltBlock *b = &f->block[ch];
 179             float m = b->emph_coeff;
 180             for (i = 0; i < subframesize; i++) {
 181                 float sample = b->samples[sf*subframesize + i];
 182                 b->samples[sf*subframesize + i] = sample - m;
 183                 m = sample * CELT_EMPH_COEFF;
 184             }
 185             if (sf != (subframes - 1))
 186                 b->emph_coeff = m;
 187         }
 188     }
 189 }
 190
 191 /* Create the window and do the mdct */
 192 static void celt_frame_mdct(OpusEncContext *s, CeltFrame *f)
 193 {
 194     int i, j, t, ch;
 195     float *win = s->scratch, *temp = s->scratch + 1920;
 196
 197     if (f->transient) {
 198         for (ch = 0; ch < f->channels; ch++) {
 199             CeltBlock *b = &f->block[ch];
 200             float *src1 = b->overlap;
 201             for (t = 0; t < f->blocks; t++) {
 202                 float *src2 = &b->samples[CELT_OVERLAP*t];
 203                 s->dsp->vector_fmul(win, src1, ff_celt_window, 128);
 204                 s->dsp->vector_fmul_reverse(&win[CELT_OVERLAP], src2,
 205                                             ff_celt_window - 8, 128);
 206                 src1 = src2;
 207                 s->mdct[0]->mdct(s->mdct[0], b->coeffs + t, win, f->blocks);
 208             }
 209         }
 210     } else {
 211         int blk_len = OPUS_BLOCK_SIZE(f->size), wlen = OPUS_BLOCK_SIZE(f->size + 1);
 212         int rwin = blk_len - CELT_OVERLAP, lap_dst = (wlen - blk_len - CELT_OVERLAP) >> 1;
 213         memset(win, 0, wlen*sizeof(float));
 214         for (ch = 0; ch < f->channels; ch++) {
 215             CeltBlock *b = &f->block[ch];
 216
 217             /* Overlap */
 218             s->dsp->vector_fmul(temp, b->overlap, ff_celt_window, 128);
 219             memcpy(win + lap_dst, temp, CELT_OVERLAP*sizeof(float));
 220
 221             /* Samples, flat top window */
 222             memcpy(&win[lap_dst + CELT_OVERLAP], b->samples, rwin*sizeof(float));
 223
 224             /* Samples, windowed */
 225             s->dsp->vector_fmul_reverse(temp, b->samples + rwin,
 226                                         ff_celt_window - 8, 128);
 227             memcpy(win + lap_dst + blk_len, temp, CELT_OVERLAP*sizeof(float));
 228
 229             s->mdct[f->size]->mdct(s->mdct[f->size], b->coeffs, win, 1);
 230         }
 231     }
 232
 233     for (ch = 0; ch < f->channels; ch++) {
 234         CeltBlock *block = &f->block[ch];
 235         for (i = 0; i < CELT_MAX_BANDS; i++) {
 236             float ener = 0.0f;
 237             int band_offset = ff_celt_freq_bands[i] << f->size;
 238             int band_size   = ff_celt_freq_range[i] << f->size;
 239             float *coeffs   = &block->coeffs[band_offset];
 240
 241             for (j = 0; j < band_size; j++)
 242                 ener += coeffs[j]*coeffs[j];
 243
 244             block->lin_energy[i] = sqrtf(ener) + FLT_EPSILON;
 245             ener = 1.0f/block->lin_energy[i];
 246
 247             for (j = 0; j < band_size; j++)
 248                 coeffs[j] *= ener;
 249
 250             block->energy[i] = log2f(block->lin_energy[i]) - ff_celt_mean_energy[i];
 251
 252             /* CELT_ENERGY_SILENCE is what the decoder uses and its not -infinity */
 253             block->energy[i] = FFMAX(block->energy[i], CELT_ENERGY_SILENCE);
 254         }
 255     }
 256 }
 257
 258 static void celt_enc_tf(CeltFrame *f, OpusRangeCoder *rc)
 259 {
 260     int i, tf_select = 0, diff = 0, tf_changed = 0, tf_select_needed;
 261     int bits = f->transient ? 2 : 4;
 262
 263     tf_select_needed = ((f->size && (opus_rc_tell(rc) + bits + 1) <= f->framebits));
 264
 265     for (i = f->start_band; i < f->end_band; i++) {
 266         if ((opus_rc_tell(rc) + bits + tf_select_needed) <= f->framebits) {
 267             const int tbit = (diff ^ 1) == f->tf_change[i];
 268             ff_opus_rc_enc_log(rc, tbit, bits);
 269             diff ^= tbit;
 270             tf_changed |= diff;
 271         }
 272         bits = f->transient ? 4 : 5;
 273     }
 274
 275     if (tf_select_needed && ff_celt_tf_select[f->size][f->transient][0][tf_changed] !=
 276                             ff_celt_tf_select[f->size][f->transient][1][tf_changed]) {
 277         ff_opus_rc_enc_log(rc, f->tf_select, 1);
 278         tf_select = f->tf_select;
 279     }
 280
 281     for (i = f->start_band; i < f->end_band; i++)
 282         f->tf_change[i] = ff_celt_tf_select[f->size][f->transient][tf_select][f->tf_change[i]];
 283 }
 284
 285 void ff_celt_enc_bitalloc(CeltFrame *f, OpusRangeCoder *rc)
 286 {
 287     int i, j, low, high, total, done, bandbits, remaining, tbits_8ths;
 288     int skip_startband      = f->start_band;
 289     int skip_bit            = 0;
 290     int intensitystereo_bit = 0;
 291     int dualstereo_bit      = 0;
 292     int dynalloc            = 6;
 293     int extrabits           = 0;
 294
 295     int *cap = f->caps;
 296     int boost[CELT_MAX_BANDS];
 297     int trim_offset[CELT_MAX_BANDS];
 298     int threshold[CELT_MAX_BANDS];
 299     int bits1[CELT_MAX_BANDS];
 300     int bits2[CELT_MAX_BANDS];
 301
 302     /* Tell the spread to the decoder */
 303     if (opus_rc_tell(rc) + 4 <= f->framebits)
 304         ff_opus_rc_enc_cdf(rc, f->spread, ff_celt_model_spread);
 305     else
 306         f->spread = CELT_SPREAD_NORMAL;
 307
 308     /* Generate static allocation caps */
 309     for (i = 0; i < CELT_MAX_BANDS; i++) {
 310         cap[i] = (ff_celt_static_caps[f->size][f->channels - 1][i] + 64)
 311                  * ff_celt_freq_range[i] << (f->channels - 1) << f->size >> 2;
 312     }
 313
 314     /* Band boosts */
 315     tbits_8ths = f->framebits << 3;
 316     for (i = f->start_band; i < f->end_band; i++) {
 317         int quanta, b_dynalloc, boost_amount = f->alloc_boost[i];
 318
 319         boost[i] = 0;
 320
 321         quanta = ff_celt_freq_range[i] << (f->channels - 1) << f->size;
 322         quanta = FFMIN(quanta << 3, FFMAX(6 << 3, quanta));
 323         b_dynalloc = dynalloc;
 324
 325         while (opus_rc_tell_frac(rc) + (b_dynalloc << 3) < tbits_8ths && boost[i] < cap[i]) {
 326             int is_boost = boost_amount--;
 327
 328             ff_opus_rc_enc_log(rc, is_boost, b_dynalloc);
 329             if (!is_boost)
 330                 break;
 331
 332             boost[i]   += quanta;
 333             tbits_8ths -= quanta;
 334
 335             b_dynalloc = 1;
 336         }
 337
 338         if (boost[i])
 339             dynalloc = FFMAX(2, dynalloc - 1);
 340     }
 341
 342     /* Put allocation trim */
 343     if (opus_rc_tell_frac(rc) + (6 << 3) <= tbits_8ths)
 344         ff_opus_rc_enc_cdf(rc, f->alloc_trim, ff_celt_model_alloc_trim);
 345
 346     /* Anti-collapse bit reservation */
 347     tbits_8ths = (f->framebits << 3) - opus_rc_tell_frac(rc) - 1;
 348     f->anticollapse_needed = 0;
 349     if (f->transient && f->size >= 2 && tbits_8ths >= ((f->size + 2) << 3))
 350         f->anticollapse_needed = 1 << 3;
 351     tbits_8ths -= f->anticollapse_needed;
 352
 353     /* Band skip bit reservation */
 354     if (tbits_8ths >= 1 << 3)
 355         skip_bit = 1 << 3;
 356     tbits_8ths -= skip_bit;
 357
 358     /* Intensity/dual stereo bit reservation */
 359     if (f->channels == 2) {
 360         intensitystereo_bit = ff_celt_log2_frac[f->end_band - f->start_band];
 361         if (intensitystereo_bit <= tbits_8ths) {
 362             tbits_8ths -= intensitystereo_bit;
 363             if (tbits_8ths >= 1 << 3) {
 364                 dualstereo_bit = 1 << 3;
 365                 tbits_8ths -= 1 << 3;
 366             }
 367         } else {
 368             intensitystereo_bit = 0;
 369         }
 370     }
 371
 372     /* Trim offsets */
 373     for (i = f->start_band; i < f->end_band; i++) {
 374         int trim     = f->alloc_trim - 5 - f->size;
 375         int band     = ff_celt_freq_range[i] * (f->end_band - i - 1);
 376         int duration = f->size + 3;
 377         int scale    = duration + f->channels - 1;
 378
 379         /* PVQ minimum allocation threshold, below this value the band is
 380          * skipped */
 381         threshold[i] = FFMAX(3 * ff_celt_freq_range[i] << duration >> 4,
 382                              f->channels << 3);
 383
 384         trim_offset[i] = trim * (band << scale) >> 6;
 385
 386         if (ff_celt_freq_range[i] << f->size == 1)
 387             trim_offset[i] -= f->channels << 3;
 388     }
 389
 390     /* Bisection */
 391     low  = 1;
 392     high = CELT_VECTORS - 1;
 393     while (low <= high) {
 394         int center = (low + high) >> 1;
 395         done = total = 0;
 396
 397         for (i = f->end_band - 1; i >= f->start_band; i--) {
 398             bandbits = ff_celt_freq_range[i] * ff_celt_static_alloc[center][i]
 399                        << (f->channels - 1) << f->size >> 2;
 400
 401             if (bandbits)
 402                 bandbits = FFMAX(0, bandbits + trim_offset[i]);
 403             bandbits += boost[i];
 404
 405             if (bandbits >= threshold[i] || done) {
 406                 done = 1;
 407                 total += FFMIN(bandbits, cap[i]);
 408             } else if (bandbits >= f->channels << 3)
 409                 total += f->channels << 3;
 410         }
 411
 412         if (total > tbits_8ths)
 413             high = center - 1;
 414         else
 415             low = center + 1;
 416     }
 417     high = low--;
 418
 419     /* Bisection */
 420     for (i = f->start_band; i < f->end_band; i++) {
 421         bits1[i] = ff_celt_freq_range[i] * ff_celt_static_alloc[low][i]
 422                    << (f->channels - 1) << f->size >> 2;
 423         bits2[i] = high >= CELT_VECTORS ? cap[i] :
 424                    ff_celt_freq_range[i] * ff_celt_static_alloc[high][i]
 425                    << (f->channels - 1) << f->size >> 2;
 426
 427         if (bits1[i])
 428             bits1[i] = FFMAX(0, bits1[i] + trim_offset[i]);
 429         if (bits2[i])
 430             bits2[i] = FFMAX(0, bits2[i] + trim_offset[i]);
 431         if (low)
 432             bits1[i] += boost[i];
 433         bits2[i] += boost[i];
 434
 435         if (boost[i])
 436             skip_startband = i;
 437         bits2[i] = FFMAX(0, bits2[i] - bits1[i]);
 438     }
 439
 440     /* Bisection */
 441     low  = 0;
 442     high = 1 << CELT_ALLOC_STEPS;
 443     for (i = 0; i < CELT_ALLOC_STEPS; i++) {
 444         int center = (low + high) >> 1;
 445         done = total = 0;
 446
 447         for (j = f->end_band - 1; j >= f->start_band; j--) {
 448             bandbits = bits1[j] + (center * bits2[j] >> CELT_ALLOC_STEPS);
 449
 450             if (bandbits >= threshold[j] || done) {
 451                 done = 1;
 452                 total += FFMIN(bandbits, cap[j]);
 453             } else if (bandbits >= f->channels << 3)
 454                 total += f->channels << 3;
 455         }
 456         if (total > tbits_8ths)
 457             high = center;
 458         else
 459             low = center;
 460     }
 461
 462     /* Bisection */
 463     done = total = 0;
 464     for (i = f->end_band - 1; i >= f->start_band; i--) {
 465         bandbits = bits1[i] + (low * bits2[i] >> CELT_ALLOC_STEPS);
 466
 467         if (bandbits >= threshold[i] || done)
 468             done = 1;
 469         else
 470             bandbits = (bandbits >= f->channels << 3) ?
 471                        f->channels << 3 : 0;
 472
 473         bandbits     = FFMIN(bandbits, cap[i]);
 474         f->pulses[i] = bandbits;
 475         total      += bandbits;
 476     }
 477
 478     /* Band skipping */
 479     for (f->coded_bands = f->end_band; ; f->coded_bands--) {
 480         int allocation;
 481         j = f->coded_bands - 1;
 482
 483         if (j == skip_startband) {
 484             /* all remaining bands are not skipped */
 485             tbits_8ths += skip_bit;
 486             break;
 487         }
 488
 489         /* determine the number of bits available for coding "do not skip" markers */
 490         remaining   = tbits_8ths - total;
 491         bandbits    = remaining / (ff_celt_freq_bands[j+1] - ff_celt_freq_bands[f->start_band]);
 492         remaining  -= bandbits  * (ff_celt_freq_bands[j+1] - ff_celt_freq_bands[f->start_band]);
 493         allocation  = f->pulses[j] + bandbits * ff_celt_freq_range[j]
 494                       + FFMAX(0, remaining - (ff_celt_freq_bands[j] - ff_celt_freq_bands[f->start_band]));
 495
 496         /* a "do not skip" marker is only coded if the allocation is
 497            above the chosen threshold */
 498         if (allocation >= FFMAX(threshold[j], (f->channels + 1) << 3)) {
 499             const int do_not_skip = f->coded_bands <= f->skip_band_floor;
 500             ff_opus_rc_enc_log(rc, do_not_skip, 1);
 501             if (do_not_skip)
 502                 break;
 503
 504             total      += 1 << 3;
 505             allocation -= 1 << 3;
 506         }
 507
 508         /* the band is skipped, so reclaim its bits */
 509         total -= f->pulses[j];
 510         if (intensitystereo_bit) {
 511             total -= intensitystereo_bit;
 512             intensitystereo_bit = ff_celt_log2_frac[j - f->start_band];
 513             total += intensitystereo_bit;
 514         }
 515
 516         total += f->pulses[j] = (allocation >= f->channels << 3) ? f->channels << 3 : 0;
 517     }
 518
 519     /* Encode stereo flags */
 520     if (intensitystereo_bit) {
 521         f->intensity_stereo = FFMIN(f->intensity_stereo, f->coded_bands);
 522         ff_opus_rc_enc_uint(rc, f->intensity_stereo, f->coded_bands + 1 - f->start_band);
 523     }
 524     if (f->intensity_stereo <= f->start_band)
 525         tbits_8ths += dualstereo_bit; /* no intensity stereo means no dual stereo */
 526     else if (dualstereo_bit)
 527         ff_opus_rc_enc_log(rc, f->dual_stereo, 1);
 528
 529     /* Supply the remaining bits in this frame to lower bands */
 530     remaining = tbits_8ths - total;
 531     bandbits  = remaining / (ff_celt_freq_bands[f->coded_bands] - ff_celt_freq_bands[f->start_band]);
 532     remaining -= bandbits * (ff_celt_freq_bands[f->coded_bands] - ff_celt_freq_bands[f->start_band]);
 533     for (i = f->start_band; i < f->coded_bands; i++) {
 534         int bits = FFMIN(remaining, ff_celt_freq_range[i]);
 535
 536         f->pulses[i] += bits + bandbits * ff_celt_freq_range[i];
 537         remaining    -= bits;
 538     }
 539
 540     /* Finally determine the allocation */
 541     for (i = f->start_band; i < f->coded_bands; i++) {
 542         int N = ff_celt_freq_range[i] << f->size;
 543         int prev_extra = extrabits;
 544         f->pulses[i] += extrabits;
 545
 546         if (N > 1) {
 547             int dof;        // degrees of freedom
 548             int temp;       // dof * channels * log(dof)
 549             int offset;     // fine energy quantization offset, i.e.
 550                             // extra bits assigned over the standard
 551                             // totalbits/dof
 552             int fine_bits, max_bits;
 553
 554             extrabits = FFMAX(0, f->pulses[i] - cap[i]);
 555             f->pulses[i] -= extrabits;
 556
 557             /* intensity stereo makes use of an extra degree of freedom */
 558             dof = N * f->channels + (f->channels == 2 && N > 2 && !f->dual_stereo && i < f->intensity_stereo);
 559             temp = dof * (ff_celt_log_freq_range[i] + (f->size << 3));
 560             offset = (temp >> 1) - dof * CELT_FINE_OFFSET;
 561             if (N == 2) /* dof=2 is the only case that doesn't fit the model */
 562                 offset += dof << 1;
 563
 564             /* grant an additional bias for the first and second pulses */
 565             if (f->pulses[i] + offset < 2 * (dof << 3))
 566                 offset += temp >> 2;
 567             else if (f->pulses[i] + offset < 3 * (dof << 3))
 568                 offset += temp >> 3;
 569
 570             fine_bits = (f->pulses[i] + offset + (dof << 2)) / (dof << 3);
 571             max_bits  = FFMIN((f->pulses[i] >> 3) >> (f->channels - 1), CELT_MAX_FINE_BITS);
 572
 573             max_bits  = FFMAX(max_bits, 0);
 574
 575             f->fine_bits[i] = av_clip(fine_bits, 0, max_bits);
 576
 577             /* if fine_bits was rounded down or capped,
 578                give priority for the final fine energy pass */
 579             f->fine_priority[i] = (f->fine_bits[i] * (dof << 3) >= f->pulses[i] + offset);
 580
 581             /* the remaining bits are assigned to PVQ */
 582             f->pulses[i] -= f->fine_bits[i] << (f->channels - 1) << 3;
 583         } else {
 584             /* all bits go to fine energy except for the sign bit */
 585             extrabits = FFMAX(0, f->pulses[i] - (f->channels << 3));
 586             f->pulses[i] -= extrabits;
 587             f->fine_bits[i] = 0;
 588             f->fine_priority[i] = 1;
 589         }
 590
 591         /* hand back a limited number of extra fine energy bits to this band */
 592         if (extrabits > 0) {
 593             int fineextra = FFMIN(extrabits >> (f->channels + 2),
 594                                   CELT_MAX_FINE_BITS - f->fine_bits[i]);
 595             f->fine_bits[i] += fineextra;
 596
 597             fineextra <<= f->channels + 2;
 598             f->fine_priority[i] = (fineextra >= extrabits - prev_extra);
 599             extrabits -= fineextra;
 600         }
 601     }
 602     f->remaining = extrabits;
 603
 604     /* skipped bands dedicate all of their bits for fine energy */
 605     for (; i < f->end_band; i++) {
 606         f->fine_bits[i]     = f->pulses[i] >> (f->channels - 1) >> 3;
 607         f->pulses[i]        = 0;
 608         f->fine_priority[i] = f->fine_bits[i] < 1;
 609     }
 610 }
 611
 612 static void celt_enc_quant_pfilter(OpusRangeCoder *rc, CeltFrame *f)
 613 {
 614     float gain = f->pf_gain;
 615     int i, txval, octave = f->pf_octave, period = f->pf_period, tapset = f->pf_tapset;
 616
 617     ff_opus_rc_enc_log(rc, f->pfilter, 1);
 618     if (!f->pfilter)
 619         return;
 620
 621     /* Octave */
 622     txval = FFMIN(octave, 6);
 623     ff_opus_rc_enc_uint(rc, txval, 6);
 624     octave = txval;
 625     /* Period */
 626     txval = av_clip(period - (16 << octave) + 1, 0, (1 << (4 + octave)) - 1);
 627     ff_opus_rc_put_raw(rc, period, 4 + octave);
 628     period = txval + (16 << octave) - 1;
 629     /* Gain */
 630     txval = FFMIN(((int)(gain / 0.09375f)) - 1, 7);
 631     ff_opus_rc_put_raw(rc, txval, 3);
 632     gain   = 0.09375f * (txval + 1);
 633     /* Tapset */
 634     if ((opus_rc_tell(rc) + 2) <= f->framebits)
 635         ff_opus_rc_enc_cdf(rc, tapset, ff_celt_model_tapset);
 636     else
 637         tapset = 0;
 638     /* Finally create the coeffs */
 639     for (i = 0; i < 2; i++) {
 640         CeltBlock *block = &f->block[i];
 641
 642         block->pf_period_new = FFMAX(period, CELT_POSTFILTER_MINPERIOD);
 643         block->pf_gains_new[0] = gain * ff_celt_postfilter_taps[tapset][0];
 644         block->pf_gains_new[1] = gain * ff_celt_postfilter_taps[tapset][1];
 645         block->pf_gains_new[2] = gain * ff_celt_postfilter_taps[tapset][2];
 646     }
 647 }
 648
 649 static void exp_quant_coarse(OpusRangeCoder *rc, CeltFrame *f,
 650                              float last_energy[][CELT_MAX_BANDS], int intra)
 651 {
 652     int i, ch;
 653     float alpha, beta, prev[2] = { 0, 0 };
 654     const uint8_t *pmod = ff_celt_coarse_energy_dist[f->size][intra];
 655
 656     /* Inter is really just differential coding */
 657     if (opus_rc_tell(rc) + 3 <= f->framebits)
 658         ff_opus_rc_enc_log(rc, intra, 3);
 659     else
 660         intra = 0;
 661
 662     if (intra) {
 663         alpha = 0.0f;
 664         beta  = 1.0f - (4915.0f/32768.0f);
 665     } else {
 666         alpha = ff_celt_alpha_coef[f->size];
 667         beta  = ff_celt_beta_coef[f->size];
 668     }
 669
 670     for (i = f->start_band; i < f->end_band; i++) {
 671         for (ch = 0; ch < f->channels; ch++) {
 672             CeltBlock *block = &f->block[ch];
 673             const int left = f->framebits - opus_rc_tell(rc);
 674             const float last = FFMAX(-9.0f, last_energy[ch][i]);
 675             float diff = block->energy[i] - prev[ch] - last*alpha;
 676             int q_en = lrintf(diff);
 677             if (left >= 15) {
 678                 ff_opus_rc_enc_laplace(rc, &q_en, pmod[i << 1] << 7, pmod[(i << 1) + 1] << 6);
 679             } else if (left >= 2) {
 680                 q_en = av_clip(q_en, -1, 1);
 681                 ff_opus_rc_enc_cdf(rc, 2*q_en + 3*(q_en < 0), ff_celt_model_energy_small);
 682             } else if (left >= 1) {
 683                 q_en = av_clip(q_en, -1, 0);
 684                 ff_opus_rc_enc_log(rc, (q_en & 1), 1);
 685             } else q_en = -1;
 686
 687             block->error_energy[i] = q_en - diff;
 688             prev[ch] += beta * q_en;
 689         }
 690     }
 691 }
 692
 693 static void celt_quant_coarse(CeltFrame *f, OpusRangeCoder *rc,
 694                               float last_energy[][CELT_MAX_BANDS])
 695 {
 696     uint32_t inter, intra;
 697     OPUS_RC_CHECKPOINT_SPAWN(rc);
 698
 699     exp_quant_coarse(rc, f, last_energy, 1);
 700     intra = OPUS_RC_CHECKPOINT_BITS(rc);
 701
 702     OPUS_RC_CHECKPOINT_ROLLBACK(rc);
 703
 704     exp_quant_coarse(rc, f, last_energy, 0);
 705     inter = OPUS_RC_CHECKPOINT_BITS(rc);
 706
 707     if (inter > intra) { /* Unlikely */
 708         OPUS_RC_CHECKPOINT_ROLLBACK(rc);
 709         exp_quant_coarse(rc, f, last_energy, 1);
 710     }
 711 }
 712
 713 static void celt_quant_fine(CeltFrame *f, OpusRangeCoder *rc)
 714 {
 715     int i, ch;
 716     for (i = f->start_band; i < f->end_band; i++) {
 717         if (!f->fine_bits[i])
 718             continue;
 719         for (ch = 0; ch < f->channels; ch++) {
 720             CeltBlock *block = &f->block[ch];
 721             int quant, lim = (1 << f->fine_bits[i]);
 722             float offset, diff = 0.5f - block->error_energy[i];
 723             quant = av_clip(floor(diff*lim), 0, lim - 1);
 724             ff_opus_rc_put_raw(rc, quant, f->fine_bits[i]);
 725             offset = 0.5f - ((quant + 0.5f) * (1 << (14 - f->fine_bits[i])) / 16384.0f);
 726             block->error_energy[i] -= offset;
 727         }
 728     }
 729 }
 730
 731 static void celt_quant_final(OpusEncContext *s, OpusRangeCoder *rc, CeltFrame *f)
 732 {
 733     int i, ch, priority;
 734     for (priority = 0; priority < 2; priority++) {
 735         for (i = f->start_band; i < f->end_band && (f->framebits - opus_rc_tell(rc)) >= f->channels; i++) {
 736             if (f->fine_priority[i] != priority || f->fine_bits[i] >= CELT_MAX_FINE_BITS)
 737                 continue;
 738             for (ch = 0; ch < f->channels; ch++) {
 739                 CeltBlock *block = &f->block[ch];
 740                 const float err = block->error_energy[i];
 741                 const float offset = 0.5f * (1 << (14 - f->fine_bits[i] - 1)) / 16384.0f;
 742                 const int sign = FFABS(err + offset) < FFABS(err - offset);
 743                 ff_opus_rc_put_raw(rc, sign, 1);
 744                 block->error_energy[i] -= offset*(1 - 2*sign);
 745             }
 746         }
 747     }
 748 }
 749
 750 static void celt_encode_frame(OpusEncContext *s, OpusRangeCoder *rc,
 751                               CeltFrame *f, int index)
 752 {
 753     int i, ch;
 754
 755     ff_opus_rc_enc_init(rc);
 756
 757     ff_opus_psy_celt_frame_init(&s->psyctx, f, index);
 758
 759     celt_frame_setup_input(s, f);
 760
 761     if (f->silence) {
 762         if (f->framebits >= 16)
 763             ff_opus_rc_enc_log(rc, 1, 15); /* Silence (if using explicit singalling) */
 764         for (ch = 0; ch < s->channels; ch++)
 765             memset(s->last_quantized_energy[ch], 0.0f, sizeof(float)*CELT_MAX_BANDS);
 766         return;
 767     }
 768
 769     /* Filters */
 770     celt_apply_preemph_filter(s, f);
 771     if (f->pfilter) {
 772         ff_opus_rc_enc_log(rc, 0, 15);
 773         celt_enc_quant_pfilter(rc, f);
 774     }
 775
 776     /* Transform */
 777     celt_frame_mdct(s, f);
 778
 779     /* Need to handle transient/non-transient switches at any point during analysis */
 780     while (ff_opus_psy_celt_frame_process(&s->psyctx, f, index))
 781         celt_frame_mdct(s, f);
 782
 783     ff_opus_rc_enc_init(rc);
 784
 785     /* Silence */
 786     ff_opus_rc_enc_log(rc, 0, 15);
 787
 788     /* Pitch filter */
 789     if (!f->start_band && opus_rc_tell(rc) + 16 <= f->framebits)
 790         celt_enc_quant_pfilter(rc, f);
 791
 792     /* Transient flag */
 793     if (f->size && opus_rc_tell(rc) + 3 <= f->framebits)
 794         ff_opus_rc_enc_log(rc, f->transient, 3);
 795
 796     /* Main encoding */
 797     celt_quant_coarse   (f, rc, s->last_quantized_energy);
 798     celt_enc_tf         (f, rc);
 799     ff_celt_enc_bitalloc(f, rc);
 800     celt_quant_fine     (f, rc);
 801     ff_celt_quant_bands (f, rc);
 802
 803     /* Anticollapse bit */
 804     if (f->anticollapse_needed)
 805         ff_opus_rc_put_raw(rc, f->anticollapse, 1);
 806
 807     /* Final per-band energy adjustments from leftover bits */
 808     celt_quant_final(s, rc, f);
 809
 810     for (ch = 0; ch < f->channels; ch++) {
 811         CeltBlock *block = &f->block[ch];
 812         for (i = 0; i < CELT_MAX_BANDS; i++)
 813             s->last_quantized_energy[ch][i] = block->energy[i] + block->error_energy[i];
 814     }
 815 }
 816
 817 static inline int write_opuslacing(uint8_t *dst, int v)
 818 {
 819     dst[0] = FFMIN(v - FFALIGN(v - 255, 4), v);
 820     dst[1] = v - dst[0] >> 2;
 821     return 1 + (v >= 252);
 822 }
 823
 824 static void opus_packet_assembler(OpusEncContext *s, AVPacket *avpkt)
 825 {
 826     int i, offset, fsize_needed;
 827
 828     /* Write toc */
 829     opus_gen_toc(s, avpkt->data, &offset, &fsize_needed);
 830
 831     /* Frame sizes if needed */
 832     if (fsize_needed) {
 833         for (i = 0; i < s->packet.frames - 1; i++) {
 834             offset += write_opuslacing(avpkt->data + offset,
 835                                        s->frame[i].framebits >> 3);
 836         }
 837     }
 838
 839     /* Packets */
 840     for (i = 0; i < s->packet.frames; i++) {
 841         ff_opus_rc_enc_end(&s->rc[i], avpkt->data + offset,
 842                            s->frame[i].framebits >> 3);
 843         offset += s->frame[i].framebits >> 3;
 844     }
 845
 846     avpkt->size = offset;
 847 }
 848
 849 /* Used as overlap for the first frame and padding for the last encoded packet */
 850 static AVFrame *spawn_empty_frame(OpusEncContext *s)
 851 {
 852     int i;
 853     AVFrame *f = av_frame_alloc();
 854     if (!f)
 855         return NULL;
 856     f->format         = s->avctx->sample_fmt;
 857     f->nb_samples     = s->avctx->frame_size;
 858     f->channel_layout = s->avctx->channel_layout;
 859     if (av_frame_get_buffer(f, 4)) {
 860         av_frame_free(&f);
 861         return NULL;
 862     }
 863     for (i = 0; i < s->channels; i++) {
 864         size_t bps = av_get_bytes_per_sample(f->format);
 865         memset(f->extended_data[i], 0, bps*f->nb_samples);
 866     }
 867     return f;
 868 }
 869
 870 static int opus_encode_frame(AVCodecContext *avctx, AVPacket *avpkt,
 871                              const AVFrame *frame, int *got_packet_ptr)
 872 {
 873     OpusEncContext *s = avctx->priv_data;
 874     int i, ret, frame_size, alloc_size = 0;
 875
 876     if (frame) { /* Add new frame to queue */
 877         if ((ret = ff_af_queue_add(&s->afq, frame)) < 0)
 878             return ret;
 879         ff_bufqueue_add(avctx, &s->bufqueue, av_frame_clone(frame));
 880     } else {
 881         ff_opus_psy_signal_eof(&s->psyctx);
 882         if (!s->afq.remaining_samples)
 883             return 0; /* We've been flushed and there's nothing left to encode */
 884     }
 885
 886     /* Run the psychoacoustic system */
 887     if (ff_opus_psy_process(&s->psyctx, &s->packet))
 888         return 0;
 889
 890     frame_size = OPUS_BLOCK_SIZE(s->packet.framesize);
 891
 892     if (!frame) {
 893         /* This can go negative, that's not a problem, we only pad if positive */
 894         int pad_empty = s->packet.frames*(frame_size/s->avctx->frame_size) - s->bufqueue.available + 1;
 895         /* Pad with empty 2.5 ms frames to whatever framesize was decided,
 896          * this should only happen at the very last flush frame. The frames
 897          * allocated here will be freed (because they have no other references)
 898          * after they get used by celt_frame_setup_input() */
 899         for (i = 0; i < pad_empty; i++) {
 900             AVFrame *empty = spawn_empty_frame(s);
 901             if (!empty)
 902                 return AVERROR(ENOMEM);
 903             ff_bufqueue_add(avctx, &s->bufqueue, empty);
 904         }
 905     }
 906
 907     for (i = 0; i < s->packet.frames; i++) {
 908         celt_encode_frame(s, &s->rc[i], &s->frame[i], i);
 909         alloc_size += s->frame[i].framebits >> 3;
 910     }
 911
 912     /* Worst case toc + the frame lengths if needed */
 913     alloc_size += 2 + s->packet.frames*2;
 914
 915     if ((ret = ff_alloc_packet2(avctx, avpkt, alloc_size, 0)) < 0)
 916         return ret;
 917
 918     /* Assemble packet */
 919     opus_packet_assembler(s, avpkt);
 920
 921     /* Update the psychoacoustic system */
 922     ff_opus_psy_postencode_update(&s->psyctx, s->frame, s->rc);
 923
 924     /* Remove samples from queue and skip if needed */
 925     ff_af_queue_remove(&s->afq, s->packet.frames*frame_size, &avpkt->pts, &avpkt->duration);
 926     if (s->packet.frames*frame_size > avpkt->duration) {
 927         uint8_t *side = av_packet_new_side_data(avpkt, AV_PKT_DATA_SKIP_SAMPLES, 10);
 928         if (!side)
 929             return AVERROR(ENOMEM);
 930         AV_WL32(&side[4], s->packet.frames*frame_size - avpkt->duration + 120);
 931     }
 932
 933     *got_packet_ptr = 1;
 934
 935     return 0;
 936 }
 937
 938 static av_cold int opus_encode_end(AVCodecContext *avctx)
 939 {
 940     int i;
 941     OpusEncContext *s = avctx->priv_data;
 942
 943     for (i = 0; i < CELT_BLOCK_NB; i++)
 944         ff_mdct15_uninit(&s->mdct[i]);
 945
 946     ff_celt_pvq_uninit(&s->pvq);
 947     av_freep(&s->dsp);
 948     av_freep(&s->frame);
 949     av_freep(&s->rc);
 950     ff_af_queue_close(&s->afq);
 951     ff_opus_psy_end(&s->psyctx);
 952     ff_bufqueue_discard_all(&s->bufqueue);
 953     av_freep(&avctx->extradata);
 954
 955     return 0;
 956 }
 957
 958 static av_cold int opus_encode_init(AVCodecContext *avctx)
 959 {
 960     int i, ch, ret, max_frames;
 961     OpusEncContext *s = avctx->priv_data;
 962
 963     s->avctx = avctx;
 964     s->channels = avctx->channels;
 965
 966     /* Opus allows us to change the framesize on each packet (and each packet may
 967      * have multiple frames in it) but we can't change the codec's frame size on
 968      * runtime, so fix it to the lowest possible number of samples and use a queue
 969      * to accumulate AVFrames until we have enough to encode whatever the encoder
 970      * decides is the best */
 971     avctx->frame_size = 120;
 972     /* Initial padding will change if SILK is ever supported */
 973     avctx->initial_padding = 120;
 974
 975     if (!avctx->bit_rate) {
 976         int coupled = ff_opus_default_coupled_streams[s->channels - 1];
 977         avctx->bit_rate = coupled*(96000) + (s->channels - coupled*2)*(48000);
 978     } else if (avctx->bit_rate < 6000 || avctx->bit_rate > 255000 * s->channels) {
 979         int64_t clipped_rate = av_clip(avctx->bit_rate, 6000, 255000 * s->channels);
 980         av_log(avctx, AV_LOG_ERROR, "Unsupported bitrate %"PRId64" kbps, clipping to %"PRId64" kbps\n",
 981                avctx->bit_rate/1000, clipped_rate/1000);
 982         avctx->bit_rate = clipped_rate;
 983     }
 984
 985     /* Extradata */
 986     avctx->extradata_size = 19;
 987     avctx->extradata = av_malloc(avctx->extradata_size + AV_INPUT_BUFFER_PADDING_SIZE);
 988     if (!avctx->extradata)
 989         return AVERROR(ENOMEM);
 990     opus_write_extradata(avctx);
 991
 992     ff_af_queue_init(avctx, &s->afq);
 993
 994     if ((ret = ff_celt_pvq_init(&s->pvq, 1)) < 0)
 995         return ret;
 996
 997     if (!(s->dsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT)))
 998         return AVERROR(ENOMEM);
 999
1000     /* I have no idea why a base scaling factor of 68 works, could be the twiddles */
1001     for (i = 0; i < CELT_BLOCK_NB; i++)
1002         if ((ret = ff_mdct15_init(&s->mdct[i], 0, i + 3, 68 << (CELT_BLOCK_NB - 1 - i))))
1003             return AVERROR(ENOMEM);
1004
1005     /* Zero out previous energy (matters for inter first frame) */
1006     for (ch = 0; ch < s->channels; ch++)
1007         memset(s->last_quantized_energy[ch], 0.0f, sizeof(float)*CELT_MAX_BANDS);
1008
1009     /* Allocate an empty frame to use as overlap for the first frame of audio */
1010     ff_bufqueue_add(avctx, &s->bufqueue, spawn_empty_frame(s));
1011     if (!ff_bufqueue_peek(&s->bufqueue, 0))
1012         return AVERROR(ENOMEM);
1013
1014     if ((ret = ff_opus_psy_init(&s->psyctx, s->avctx, &s->bufqueue, &s->options)))
1015         return ret;
1016
1017     /* Frame structs and range coder buffers */
1018     max_frames = ceilf(FFMIN(s->options.max_delay_ms, 120.0f)/2.5f);
1019     s->frame = av_malloc(max_frames*sizeof(CeltFrame));
1020     if (!s->frame)
1021         return AVERROR(ENOMEM);
1022     s->rc = av_malloc(max_frames*sizeof(OpusRangeCoder));
1023     if (!s->rc)
1024         return AVERROR(ENOMEM);
1025
1026     for (i = 0; i < max_frames; i++) {
1027         s->frame[i].dsp = s->dsp;
1028         s->frame[i].avctx = s->avctx;
1029         s->frame[i].seed = 0;
1030         s->frame[i].pvq = s->pvq;
1031         s->frame[i].apply_phase_inv = 1;
1032         s->frame[i].block[0].emph_coeff = s->frame[i].block[1].emph_coeff = 0.0f;
1033     }
1034
1035     return 0;
1036 }
1037
1038 #define OPUSENC_FLAGS AV_OPT_FLAG_ENCODING_PARAM | AV_OPT_FLAG_AUDIO_PARAM
1039 static const AVOption opusenc_options[] = {
1040     { "opus_delay", "Maximum delay in milliseconds", offsetof(OpusEncContext, options.max_delay_ms), AV_OPT_TYPE_FLOAT, { .dbl = OPUS_MAX_LOOKAHEAD }, 2.5f, OPUS_MAX_LOOKAHEAD, OPUSENC_FLAGS, "max_delay_ms" },
1041     { NULL },
1042 };
1043
1044 static const AVClass opusenc_class = {
1045     .class_name = "Opus encoder",
1046     .item_name  = av_default_item_name,
1047     .option     = opusenc_options,
1048     .version    = LIBAVUTIL_VERSION_INT,
1049 };
1050
1051 static const AVCodecDefault opusenc_defaults[] = {
1052     { "b", "0" },
1053     { "compression_level", "10" },
1054     { NULL },
1055 };
1056
1057 AVCodec ff_opus_encoder = {
1058     .name           = "opus",
1059     .long_name      = NULL_IF_CONFIG_SMALL("Opus"),
1060     .type           = AVMEDIA_TYPE_AUDIO,
1061     .id             = AV_CODEC_ID_OPUS,
1062     .defaults       = opusenc_defaults,
1063     .priv_class     = &opusenc_class,
1064     .priv_data_size = sizeof(OpusEncContext),
1065     .init           = opus_encode_init,
1066     .encode2        = opus_encode_frame,
1067     .close          = opus_encode_end,
1068     .caps_internal  = FF_CODEC_CAP_INIT_THREADSAFE | FF_CODEC_CAP_INIT_CLEANUP,
1069     .capabilities   = AV_CODEC_CAP_EXPERIMENTAL | AV_CODEC_CAP_SMALL_LAST_FRAME | AV_CODEC_CAP_DELAY,
1070     .supported_samplerates = (const int []){ 48000, 0 },
1071     .channel_layouts = (const uint64_t []){ AV_CH_LAYOUT_MONO,
1072                                             AV_CH_LAYOUT_STEREO, 0 },
1073     .sample_fmts    = (const enum AVSampleFormat[]){ AV_SAMPLE_FMT_FLTP,
1074                                                      AV_SAMPLE_FMT_NONE },
1075 };