3 * Copyright (c) 2017 Rostislav Pehlivanov <atomnuker@gmail.com>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 #include "opusenc_psy.h"
26 #include "libavutil/qsort.h"
28 /* Populate metrics without taking into consideration neighbouring steps */
29 static void step_collect_psy_metrics(OpusPsyContext *s, int index)
31 int silence = 0, ch, i, j;
32 OpusPsyStep *st = s->steps[index];
36 for (ch = 0; ch < s->avctx->channels; ch++) {
37 const int lap_size = (1 << s->bsize_analysis);
38 for (i = 1; i <= FFMIN(lap_size, index); i++) {
39 const int offset = i*120;
40 AVFrame *cur = ff_bufqueue_peek(s->bufqueue, index - i);
41 memcpy(&s->scratch[offset], cur->extended_data[ch], cur->nb_samples*sizeof(float));
43 for (i = 0; i < lap_size; i++) {
44 const int offset = i*120 + lap_size;
45 AVFrame *cur = ff_bufqueue_peek(s->bufqueue, index + i);
46 memcpy(&s->scratch[offset], cur->extended_data[ch], cur->nb_samples*sizeof(float));
49 s->dsp->vector_fmul(s->scratch, s->scratch, s->window[s->bsize_analysis],
50 (OPUS_BLOCK_SIZE(s->bsize_analysis) << 1));
52 s->mdct[s->bsize_analysis]->mdct(s->mdct[s->bsize_analysis], st->coeffs[ch], s->scratch, 1);
54 for (i = 0; i < CELT_MAX_BANDS; i++)
55 st->bands[ch][i] = &st->coeffs[ch][ff_celt_freq_bands[i] << s->bsize_analysis];
58 for (ch = 0; ch < s->avctx->channels; ch++) {
59 for (i = 0; i < CELT_MAX_BANDS; i++) {
60 float avg_c_s, energy = 0.0f, dist_dev = 0.0f;
61 const int range = ff_celt_freq_range[i] << s->bsize_analysis;
62 const float *coeffs = st->bands[ch][i];
63 for (j = 0; j < range; j++)
64 energy += coeffs[j]*coeffs[j];
66 st->energy[ch][i] += sqrtf(energy);
67 silence |= !!st->energy[ch][i];
68 avg_c_s = energy / range;
70 for (j = 0; j < range; j++) {
71 const float c_s = coeffs[j]*coeffs[j];
72 dist_dev = (avg_c_s - c_s)*(avg_c_s - c_s);
75 st->tone[ch][i] += sqrtf(dist_dev);
79 st->silence = !silence;
81 if (s->avctx->channels > 1) {
82 for (i = 0; i < CELT_MAX_BANDS; i++) {
83 float incompat = 0.0f;
84 const float *coeffs1 = st->bands[0][i];
85 const float *coeffs2 = st->bands[1][i];
86 const int range = ff_celt_freq_range[i] << s->bsize_analysis;
87 for (j = 0; j < range; j++)
88 incompat += (coeffs1[j] - coeffs2[j])*(coeffs1[j] - coeffs2[j]);
89 st->stereo[i] = sqrtf(incompat);
93 for (ch = 0; ch < s->avctx->channels; ch++) {
94 for (i = 0; i < CELT_MAX_BANDS; i++) {
95 OpusBandExcitation *ex = &s->ex[ch][i];
96 float bp_e = bessel_filter(&s->bfilter_lo[ch][i], st->energy[ch][i]);
97 bp_e = bessel_filter(&s->bfilter_hi[ch][i], bp_e);
99 if (bp_e > ex->excitation) {
100 st->change_amp[ch][i] = bp_e - ex->excitation;
101 st->total_change += st->change_amp[ch][i];
102 ex->excitation = ex->excitation_init = bp_e;
103 ex->excitation_dist = 0.0f;
105 if (ex->excitation > 0.0f) {
106 ex->excitation -= av_clipf((1/expf(ex->excitation_dist)), ex->excitation_init/20, ex->excitation_init/1.09);
107 ex->excitation = FFMAX(ex->excitation, 0.0f);
108 ex->excitation_dist += 1.0f;
114 static void search_for_change_points(OpusPsyContext *s, float tgt_change,
115 int offset_s, int offset_e, int resolution,
119 float c_change = 0.0f;
120 if ((offset_e - offset_s) <= resolution)
122 for (i = offset_s; i < offset_e; i++) {
123 c_change += s->steps[i]->total_change;
124 if (c_change > tgt_change)
129 search_for_change_points(s, tgt_change / 2.0f, offset_s, i + 0, resolution, level + 1);
130 s->inflection_points[s->inflection_points_count++] = i;
131 search_for_change_points(s, tgt_change / 2.0f, i + 1, offset_e, resolution, level + 1);
134 static int flush_silent_frames(OpusPsyContext *s)
136 int fsize, silent_frames;
138 for (silent_frames = 0; silent_frames < s->buffered_steps; silent_frames++)
139 if (!s->steps[silent_frames]->silence)
141 if (--silent_frames < 0)
144 for (fsize = CELT_BLOCK_960; fsize > CELT_BLOCK_120; fsize--) {
145 if ((1 << fsize) > silent_frames)
147 s->p.frames = FFMIN(silent_frames / (1 << fsize), 48 >> fsize);
148 s->p.framesize = fsize;
155 /* Main function which decides frame size and frames per current packet */
156 static void psy_output_groups(OpusPsyContext *s)
158 int max_delay_samples = (s->options->max_delay_ms*s->avctx->sample_rate)/1000;
159 int max_bsize = FFMIN(OPUS_SAMPLES_TO_BLOCK_SIZE(max_delay_samples), CELT_BLOCK_960);
161 /* These don't change for now */
162 s->p.mode = OPUS_MODE_CELT;
163 s->p.bandwidth = OPUS_BANDWIDTH_FULLBAND;
165 /* Flush silent frames ASAP */
166 if (s->steps[0]->silence && flush_silent_frames(s))
169 s->p.framesize = FFMIN(max_bsize, CELT_BLOCK_960);
173 int ff_opus_psy_process(OpusPsyContext *s, OpusPacketInfo *p)
176 float total_energy_change = 0.0f;
178 if (s->buffered_steps < s->max_steps && !s->eof) {
179 const int awin = (1 << s->bsize_analysis);
180 if (++s->steps_to_process >= awin) {
181 step_collect_psy_metrics(s, s->buffered_steps - awin + 1);
182 s->steps_to_process = 0;
184 if ((++s->buffered_steps) < s->max_steps)
188 for (i = 0; i < s->buffered_steps; i++)
189 total_energy_change += s->steps[i]->total_change;
191 search_for_change_points(s, total_energy_change / 2.0f, 0,
192 s->buffered_steps, 1, 0);
194 psy_output_groups(s);
196 p->frames = s->p.frames;
197 p->framesize = s->p.framesize;
199 p->bandwidth = s->p.bandwidth;
204 void ff_opus_psy_celt_frame_init(OpusPsyContext *s, CeltFrame *f, int index)
206 int i, neighbouring_points = 0, start_offset = 0;
207 int radius = (1 << s->p.framesize), step_offset = radius*index;
210 f->start_band = (s->p.mode == OPUS_MODE_HYBRID) ? 17 : 0;
211 f->end_band = ff_celt_band_end[s->p.bandwidth];
212 f->channels = s->avctx->channels;
213 f->size = s->p.framesize;
215 for (i = 0; i < (1 << f->size); i++)
216 silence &= s->steps[index*(1 << f->size) + i]->silence;
218 f->silence = silence;
220 f->framebits = 0; /* Otherwise the silence flag eats up 16(!) bits */
224 for (i = 0; i < s->inflection_points_count; i++) {
225 if (s->inflection_points[i] >= step_offset) {
231 for (i = start_offset; i < FFMIN(radius, s->inflection_points_count - start_offset); i++) {
232 if (s->inflection_points[i] < (step_offset + radius)) {
233 neighbouring_points++;
237 /* Transient flagging */
238 f->transient = neighbouring_points > 0;
239 f->blocks = f->transient ? OPUS_BLOCK_SIZE(s->p.framesize)/CELT_OVERLAP : 1;
241 /* Some sane defaults */
248 /* More sane defaults */
252 f->skip_band_floor = f->end_band;
253 f->intensity_stereo = f->end_band;
255 f->spread = CELT_SPREAD_NORMAL;
256 memset(f->tf_change, 0, sizeof(int)*CELT_MAX_BANDS);
257 memset(f->alloc_boost, 0, sizeof(int)*CELT_MAX_BANDS);
260 static void celt_gauge_psy_weight(OpusPsyContext *s, OpusPsyStep **start,
264 int frame_size = OPUS_BLOCK_SIZE(s->p.framesize);
265 float rate, frame_bits = 0;
267 /* Used for the global ROTATE flag */
271 float band_score[CELT_MAX_BANDS] = { 0 };
272 float max_score = 1.0f;
274 /* Pass one - one loop around each band, computing unquant stuff */
275 for (i = 0; i < CELT_MAX_BANDS; i++) {
277 float tonal_contrib = 0.0f;
278 for (f = 0; f < (1 << s->p.framesize); f++) {
279 weight = start[f]->stereo[i];
280 for (ch = 0; ch < s->avctx->channels; ch++) {
281 weight += start[f]->change_amp[ch][i] + start[f]->tone[ch][i] + start[f]->energy[ch][i];
282 tonal_contrib += start[f]->tone[ch][i];
285 tonal += tonal_contrib;
286 band_score[i] = weight;
289 tonal /= (float)CELT_MAX_BANDS;
291 for (i = 0; i < CELT_MAX_BANDS; i++) {
292 if (band_score[i] > max_score)
293 max_score = band_score[i];
296 for (i = 0; i < CELT_MAX_BANDS; i++) {
297 f_out->alloc_boost[i] = (int)((band_score[i]/max_score)*3.0f);
298 frame_bits += band_score[i]*8.0f;
302 f_out->spread = av_clip_uintp2(lrintf(tonal), 2);
304 rate = ((float)s->avctx->bit_rate) + frame_bits*frame_size*16;
306 rate /= s->avctx->sample_rate/frame_size;
308 f_out->framebits = lrintf(rate);
309 f_out->framebits = FFMIN(f_out->framebits, OPUS_MAX_PACKET_SIZE*8);
310 f_out->framebits = FFALIGN(f_out->framebits, 8);
313 static int bands_dist(OpusPsyContext *s, CeltFrame *f, float *total_dist)
318 ff_opus_rc_enc_init(&dump);
319 ff_celt_enc_bitalloc(&dump, f);
321 for (i = 0; i < CELT_MAX_BANDS; i++) {
323 float dist = f->pvq->band_cost(f->pvq, f, &dump, i, &bits, s->lambda);
332 static void celt_search_for_dual_stereo(OpusPsyContext *s, CeltFrame *f)
336 bands_dist(s, f, &td1);
338 bands_dist(s, f, &td2);
340 f->dual_stereo = td2 < td1;
341 s->dual_stereo_used += td2 < td1;
344 static void celt_search_for_intensity(OpusPsyContext *s, CeltFrame *f)
346 int i, best_band = CELT_MAX_BANDS - 1;
347 float dist, best_dist = FLT_MAX;
349 /* TODO: fix, make some heuristic up here using the lambda value */
352 for (i = f->end_band; i >= end_band; i--) {
353 f->intensity_stereo = i;
354 bands_dist(s, f, &dist);
355 if (best_dist > dist) {
361 f->intensity_stereo = best_band;
362 s->avg_is_band = (s->avg_is_band + f->intensity_stereo)/2.0f;
365 static int celt_search_for_tf(OpusPsyContext *s, OpusPsyStep **start, CeltFrame *f)
367 int i, j, k, cway, config[2][CELT_MAX_BANDS] = { { 0 } };
368 float score[2] = { 0 };
370 for (cway = 0; cway < 2; cway++) {
372 int base = f->transient ? 120 : 960;
374 for (int i = 0; i < 2; i++) {
375 int c = ff_celt_tf_select[f->size][f->transient][cway][i];
376 mag[i] = c < 0 ? base >> FFABS(c) : base << FFABS(c);
379 for (i = 0; i < CELT_MAX_BANDS; i++) {
380 float iscore0 = 0.0f;
381 float iscore1 = 0.0f;
382 for (j = 0; j < (1 << f->size); j++) {
383 for (k = 0; k < s->avctx->channels; k++) {
384 iscore0 += start[j]->tone[k][i]*start[j]->change_amp[k][i]/mag[0];
385 iscore1 += start[j]->tone[k][i]*start[j]->change_amp[k][i]/mag[1];
388 config[cway][i] = FFABS(iscore0 - 1.0f) < FFABS(iscore1 - 1.0f);
389 score[cway] += config[cway][i] ? iscore1 : iscore0;
393 f->tf_select = score[0] < score[1];
394 memcpy(f->tf_change, config[f->tf_select], sizeof(int)*CELT_MAX_BANDS);
399 int ff_opus_psy_celt_frame_process(OpusPsyContext *s, CeltFrame *f, int index)
401 int start_transient_flag = f->transient;
402 OpusPsyStep **start = &s->steps[index * (1 << s->p.framesize)];
407 celt_gauge_psy_weight(s, start, f);
408 celt_search_for_intensity(s, f);
409 celt_search_for_dual_stereo(s, f);
410 celt_search_for_tf(s, start, f);
412 if (f->transient != start_transient_flag) {
413 f->blocks = f->transient ? OPUS_BLOCK_SIZE(s->p.framesize)/CELT_OVERLAP : 1;
414 s->redo_analysis = 1;
418 s->redo_analysis = 0;
423 void ff_opus_psy_postencode_update(OpusPsyContext *s, CeltFrame *f, OpusRangeCoder *rc)
425 int i, frame_size = OPUS_BLOCK_SIZE(s->p.framesize);
426 int steps_out = s->p.frames*(frame_size/120);
427 void *tmp[FF_BUFQUEUE_SIZE];
430 for (i = 0; i < steps_out; i++)
431 memset(s->steps[i], 0, sizeof(OpusPsyStep));
433 for (i = 0; i < s->max_steps; i++)
434 tmp[i] = s->steps[i];
436 for (i = 0; i < s->max_steps; i++) {
437 const int i_new = i - steps_out;
438 s->steps[i_new < 0 ? s->max_steps + i_new : i_new] = tmp[i];
441 for (i = steps_out; i < s->buffered_steps; i++)
442 s->steps[i]->index -= steps_out;
444 ideal_fbits = s->avctx->bit_rate/(s->avctx->sample_rate/frame_size);
446 for (i = 0; i < s->p.frames; i++) {
447 s->avg_is_band += f[i].intensity_stereo;
448 s->lambda *= ideal_fbits / f[i].framebits;
451 s->avg_is_band /= (s->p.frames + 1);
454 s->steps_to_process = 0;
455 s->buffered_steps -= steps_out;
456 s->total_packets_out += s->p.frames;
457 s->inflection_points_count = 0;
460 av_cold int ff_opus_psy_init(OpusPsyContext *s, AVCodecContext *avctx,
461 struct FFBufQueue *bufqueue, OpusEncOptions *options)
465 s->redo_analysis = 0;
467 s->options = options;
469 s->bufqueue = bufqueue;
470 s->max_steps = ceilf(s->options->max_delay_ms/2.5f);
471 s->bsize_analysis = CELT_BLOCK_960;
472 s->avg_is_band = CELT_MAX_BANDS - 1;
473 s->inflection_points_count = 0;
475 s->inflection_points = av_mallocz(sizeof(*s->inflection_points)*s->max_steps);
476 if (!s->inflection_points) {
477 ret = AVERROR(ENOMEM);
481 s->dsp = avpriv_float_dsp_alloc(avctx->flags & AV_CODEC_FLAG_BITEXACT);
483 ret = AVERROR(ENOMEM);
487 for (ch = 0; ch < s->avctx->channels; ch++) {
488 for (i = 0; i < CELT_MAX_BANDS; i++) {
489 bessel_init(&s->bfilter_hi[ch][i], 1.0f, 19.0f, 100.0f, 1);
490 bessel_init(&s->bfilter_lo[ch][i], 1.0f, 20.0f, 100.0f, 0);
494 for (i = 0; i < s->max_steps; i++) {
495 s->steps[i] = av_mallocz(sizeof(OpusPsyStep));
497 ret = AVERROR(ENOMEM);
502 for (i = 0; i < CELT_BLOCK_NB; i++) {
504 const int len = OPUS_BLOCK_SIZE(i);
505 s->window[i] = av_malloc(2*len*sizeof(float));
507 ret = AVERROR(ENOMEM);
510 generate_window_func(s->window[i], 2*len, WFUNC_SINE, &tmp);
511 if ((ret = ff_mdct15_init(&s->mdct[i], 0, i + 3, 68 << (CELT_BLOCK_NB - 1 - i))))
518 av_freep(&s->inflection_points);
521 for (i = 0; i < CELT_BLOCK_NB; i++) {
522 ff_mdct15_uninit(&s->mdct[i]);
523 av_freep(&s->window[i]);
526 for (i = 0; i < s->max_steps; i++)
527 av_freep(&s->steps[i]);
532 void ff_opus_psy_signal_eof(OpusPsyContext *s)
537 av_cold int ff_opus_psy_end(OpusPsyContext *s)
541 av_freep(&s->inflection_points);
544 for (i = 0; i < CELT_BLOCK_NB; i++) {
545 ff_mdct15_uninit(&s->mdct[i]);
546 av_freep(&s->window[i]);
549 for (i = 0; i < s->max_steps; i++)
550 av_freep(&s->steps[i]);
552 av_log(s->avctx, AV_LOG_INFO, "Average Intensity Stereo band: %0.1f\n", s->avg_is_band);
553 av_log(s->avctx, AV_LOG_INFO, "Dual Stereo used: %0.2f%%\n", ((float)s->dual_stereo_used/s->total_packets_out)*100.0f);