]> git.sesse.net Git - ffmpeg/blob - libavcodec/aaccoder_twoloop.h
Merge commit '998e1b8f521b73e1ed3a13caaabcf79eb401cf0d'
[ffmpeg] / libavcodec / aaccoder_twoloop.h
1 /*
2  * AAC encoder twoloop coder
3  * Copyright (C) 2008-2009 Konstantin Shishkov
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21
22 /**
23  * @file
24  * AAC encoder twoloop coder
25  * @author Konstantin Shishkov, Claudio Freire
26  */
27
28 /**
29  * This file contains a template for the twoloop coder function.
30  * It needs to be provided, externally, as an already included declaration,
31  * the following functions from aacenc_quantization/util.h. They're not included
32  * explicitly here to make it possible to provide alternative implementations:
33  *  - quantize_band_cost
34  *  - abs_pow34_v
35  *  - find_max_val
36  *  - find_min_book
37  *  - find_form_factor
38  */
39
40 #ifndef AVCODEC_AACCODER_TWOLOOP_H
41 #define AVCODEC_AACCODER_TWOLOOP_H
42
43 #include <float.h>
44 #include "libavutil/mathematics.h"
45 #include "mathops.h"
46 #include "avcodec.h"
47 #include "put_bits.h"
48 #include "aac.h"
49 #include "aacenc.h"
50 #include "aactab.h"
51 #include "aacenctab.h"
52
53 /** Frequency in Hz for lower limit of noise substitution **/
54 #define NOISE_LOW_LIMIT 4000
55
56 #define sclip(x) av_clip(x,60,218)
57
58 /* Reflects the cost to change codebooks */
59 static inline int ff_pns_bits(SingleChannelElement *sce, int w, int g)
60 {
61     return (!g || !sce->zeroes[w*16+g-1] || !sce->can_pns[w*16+g-1]) ? 9 : 5;
62 }
63
64 /**
65  * two-loop quantizers search taken from ISO 13818-7 Appendix C
66  */
67 static void search_for_quantizers_twoloop(AVCodecContext *avctx,
68                                           AACEncContext *s,
69                                           SingleChannelElement *sce,
70                                           const float lambda)
71 {
72     int start = 0, i, w, w2, g, recomprd;
73     int destbits = avctx->bit_rate * 1024.0 / avctx->sample_rate
74         / ((avctx->flags & CODEC_FLAG_QSCALE) ? 2.0f : avctx->channels)
75         * (lambda / 120.f);
76     int refbits = destbits;
77     int toomanybits, toofewbits;
78     char nzs[128];
79     uint8_t nextband[128];
80     int maxsf[128], minsf[128];
81     float dists[128] = { 0 }, qenergies[128] = { 0 }, uplims[128], euplims[128], energies[128];
82     float maxvals[128], spread_thr_r[128];
83     float min_spread_thr_r, max_spread_thr_r;
84
85     /**
86      * rdlambda controls the maximum tolerated distortion. Twoloop
87      * will keep iterating until it fails to lower it or it reaches
88      * ulimit * rdlambda. Keeping it low increases quality on difficult
89      * signals, but lower it too much, and bits will be taken from weak
90      * signals, creating "holes". A balance is necesary.
91      * rdmax and rdmin specify the relative deviation from rdlambda
92      * allowed for tonality compensation
93      */
94     float rdlambda = av_clipf(2.0f * 120.f / lambda, 0.0625f, 16.0f);
95     const float nzslope = 1.5f;
96     float rdmin = 0.03125f;
97     float rdmax = 1.0f;
98
99     /**
100      * sfoffs controls an offset of optmium allocation that will be
101      * applied based on lambda. Keep it real and modest, the loop
102      * will take care of the rest, this just accelerates convergence
103      */
104     float sfoffs = av_clipf(log2f(120.0f / lambda) * 4.0f, -5, 10);
105
106     int fflag, minscaler, maxscaler, nminscaler;
107     int its  = 0;
108     int maxits = 30;
109     int allz = 0;
110     int tbits;
111     int cutoff = 1024;
112     int pns_start_pos;
113     int prev;
114
115     /**
116      * zeroscale controls a multiplier of the threshold, if band energy
117      * is below this, a zero is forced. Keep it lower than 1, unless
118      * low lambda is used, because energy < threshold doesn't mean there's
119      * no audible signal outright, it's just energy. Also make it rise
120      * slower than rdlambda, as rdscale has due compensation with
121      * noisy band depriorization below, whereas zeroing logic is rather dumb
122      */
123     float zeroscale;
124     if (lambda > 120.f) {
125         zeroscale = av_clipf(powf(120.f / lambda, 0.25f), 0.0625f, 1.0f);
126     } else {
127         zeroscale = 1.f;
128     }
129
130     if (s->psy.bitres.alloc >= 0) {
131         /**
132          * Psy granted us extra bits to use, from the reservoire
133          * adjust for lambda except what psy already did
134          */
135         destbits = s->psy.bitres.alloc
136             * (lambda / (avctx->global_quality ? avctx->global_quality : 120));
137     }
138
139     if (avctx->flags & CODEC_FLAG_QSCALE) {
140         /**
141          * Constant Q-scale doesn't compensate MS coding on its own
142          * No need to be overly precise, this only controls RD
143          * adjustment CB limits when going overboard
144          */
145         if (s->options.mid_side && s->cur_type == TYPE_CPE)
146             destbits *= 2;
147
148         /**
149          * When using a constant Q-scale, don't adjust bits, just use RD
150          * Don't let it go overboard, though... 8x psy target is enough
151          */
152         toomanybits = 5800;
153         toofewbits = destbits / 16;
154
155         /** Don't offset scalers, just RD */
156         sfoffs = sce->ics.num_windows - 1;
157         rdlambda = sqrtf(rdlambda);
158
159         /** search further */
160         maxits *= 2;
161     } else {
162         /* When using ABR, be strict, but a reasonable leeway is
163          * critical to allow RC to smoothly track desired bitrate
164          * without sudden quality drops that cause audible artifacts.
165          * Symmetry is also desirable, to avoid systematic bias.
166          */
167         toomanybits = destbits + destbits/8;
168         toofewbits = destbits - destbits/8;
169
170         sfoffs = 0;
171         rdlambda = sqrtf(rdlambda);
172     }
173
174     /** and zero out above cutoff frequency */
175     {
176         int wlen = 1024 / sce->ics.num_windows;
177         int bandwidth;
178
179         /**
180          * Scale, psy gives us constant quality, this LP only scales
181          * bitrate by lambda, so we save bits on subjectively unimportant HF
182          * rather than increase quantization noise. Adjust nominal bitrate
183          * to effective bitrate according to encoding parameters,
184          * AAC_CUTOFF_FROM_BITRATE is calibrated for effective bitrate.
185          */
186         float rate_bandwidth_multiplier = 1.5f;
187         int frame_bit_rate = (avctx->flags & CODEC_FLAG_QSCALE)
188             ? (refbits * rate_bandwidth_multiplier * avctx->sample_rate / 1024)
189             : (avctx->bit_rate / avctx->channels);
190
191         /** Compensate for extensions that increase efficiency */
192         if (s->options.pns || s->options.intensity_stereo)
193             frame_bit_rate *= 1.15f;
194
195         if (avctx->cutoff > 0) {
196             bandwidth = avctx->cutoff;
197         } else {
198             bandwidth = FFMAX(3000, AAC_CUTOFF_FROM_BITRATE(frame_bit_rate, 1, avctx->sample_rate));
199             s->psy.cutoff = bandwidth;
200         }
201
202         cutoff = bandwidth * 2 * wlen / avctx->sample_rate;
203         pns_start_pos = NOISE_LOW_LIMIT * 2 * wlen / avctx->sample_rate;
204     }
205
206     /**
207      * for values above this the decoder might end up in an endless loop
208      * due to always having more bits than what can be encoded.
209      */
210     destbits = FFMIN(destbits, 5800);
211     toomanybits = FFMIN(toomanybits, 5800);
212     toofewbits = FFMIN(toofewbits, 5800);
213     /**
214      * XXX: some heuristic to determine initial quantizers will reduce search time
215      * determine zero bands and upper distortion limits
216      */
217     min_spread_thr_r = -1;
218     max_spread_thr_r = -1;
219     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
220         for (g = start = 0;  g < sce->ics.num_swb; start += sce->ics.swb_sizes[g++]) {
221             int nz = 0;
222             float uplim = 0.0f, energy = 0.0f, spread = 0.0f;
223             for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
224                 FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
225                 if (start >= cutoff || band->energy <= (band->threshold * zeroscale) || band->threshold == 0.0f) {
226                     sce->zeroes[(w+w2)*16+g] = 1;
227                     continue;
228                 }
229                 nz = 1;
230             }
231             if (!nz) {
232                 uplim = 0.0f;
233             } else {
234                 nz = 0;
235                 for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
236                     FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
237                     if (band->energy <= (band->threshold * zeroscale) || band->threshold == 0.0f)
238                         continue;
239                     uplim += band->threshold;
240                     energy += band->energy;
241                     spread += band->spread;
242                     nz++;
243                 }
244             }
245             uplims[w*16+g] = uplim;
246             energies[w*16+g] = energy;
247             nzs[w*16+g] = nz;
248             sce->zeroes[w*16+g] = !nz;
249             allz |= nz;
250             if (nz && sce->can_pns[w*16+g]) {
251                 spread_thr_r[w*16+g] = energy * nz / (uplim * spread);
252                 if (min_spread_thr_r < 0) {
253                     min_spread_thr_r = max_spread_thr_r = spread_thr_r[w*16+g];
254                 } else {
255                     min_spread_thr_r = FFMIN(min_spread_thr_r, spread_thr_r[w*16+g]);
256                     max_spread_thr_r = FFMAX(max_spread_thr_r, spread_thr_r[w*16+g]);
257                 }
258             }
259         }
260     }
261
262     /** Compute initial scalers */
263     minscaler = 65535;
264     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
265         for (g = 0;  g < sce->ics.num_swb; g++) {
266             if (sce->zeroes[w*16+g]) {
267                 sce->sf_idx[w*16+g] = SCALE_ONE_POS;
268                 continue;
269             }
270             /**
271              * log2f-to-distortion ratio is, technically, 2 (1.5db = 4, but it's power vs level so it's 2).
272              * But, as offsets are applied, low-frequency signals are too sensitive to the induced distortion,
273              * so we make scaling more conservative by choosing a lower log2f-to-distortion ratio, and thus
274              * more robust.
275              */
276             sce->sf_idx[w*16+g] = av_clip(
277                 SCALE_ONE_POS
278                     + 1.75*log2f(FFMAX(0.00125f,uplims[w*16+g]) / sce->ics.swb_sizes[g])
279                     + sfoffs,
280                 60, SCALE_MAX_POS);
281             minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]);
282         }
283     }
284
285     /** Clip */
286     minscaler = av_clip(minscaler, SCALE_ONE_POS - SCALE_DIV_512, SCALE_MAX_POS - SCALE_DIV_512);
287     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w])
288         for (g = 0;  g < sce->ics.num_swb; g++)
289             if (!sce->zeroes[w*16+g])
290                 sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], minscaler, minscaler + SCALE_MAX_DIFF - 1);
291
292     if (!allz)
293         return;
294     abs_pow34_v(s->scoefs, sce->coeffs, 1024);
295     ff_quantize_band_cost_cache_init(s);
296
297     for (i = 0; i < sizeof(minsf) / sizeof(minsf[0]); ++i)
298         minsf[i] = 0;
299     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
300         start = w*128;
301         for (g = 0;  g < sce->ics.num_swb; g++) {
302             const float *scaled = s->scoefs + start;
303             int minsfidx;
304             maxvals[w*16+g] = find_max_val(sce->ics.group_len[w], sce->ics.swb_sizes[g], scaled);
305             if (maxvals[w*16+g] > 0)
306                 minsfidx = coef2minsf(maxvals[w*16+g]);
307             for (w2 = 0; w2 < sce->ics.group_len[w]; w2++)
308                 minsf[(w+w2)*16+g] = minsfidx;
309             start += sce->ics.swb_sizes[g];
310         }
311     }
312
313     /**
314      * Scale uplims to match rate distortion to quality
315      * bu applying noisy band depriorization and tonal band priorization.
316      * Maxval-energy ratio gives us an idea of how noisy/tonal the band is.
317      * If maxval^2 ~ energy, then that band is mostly noise, and we can relax
318      * rate distortion requirements.
319      */
320     memcpy(euplims, uplims, sizeof(euplims));
321     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
322         /** psy already priorizes transients to some extent */
323         float de_psy_factor = (sce->ics.num_windows > 1) ? 8.0f / sce->ics.group_len[w] : 1.0f;
324         start = w*128;
325         for (g = 0;  g < sce->ics.num_swb; g++) {
326             if (nzs[g] > 0) {
327                 float cleanup_factor = ff_sqrf(av_clipf(start / (cutoff * 0.75f), 1.0f, 2.0f));
328                 float energy2uplim = find_form_factor(
329                     sce->ics.group_len[w], sce->ics.swb_sizes[g],
330                     uplims[w*16+g] / (nzs[g] * sce->ics.swb_sizes[w]),
331                     sce->coeffs + start,
332                     nzslope * cleanup_factor);
333                 energy2uplim *= de_psy_factor;
334                 if (!(avctx->flags & CODEC_FLAG_QSCALE)) {
335                     /** In ABR, we need to priorize less and let rate control do its thing */
336                     energy2uplim = sqrtf(energy2uplim);
337                 }
338                 energy2uplim = FFMAX(0.015625f, FFMIN(1.0f, energy2uplim));
339                 uplims[w*16+g] *= av_clipf(rdlambda * energy2uplim, rdmin, rdmax)
340                                   * sce->ics.group_len[w];
341
342                 energy2uplim = find_form_factor(
343                     sce->ics.group_len[w], sce->ics.swb_sizes[g],
344                     uplims[w*16+g] / (nzs[g] * sce->ics.swb_sizes[w]),
345                     sce->coeffs + start,
346                     2.0f);
347                 energy2uplim *= de_psy_factor;
348                 if (!(avctx->flags & CODEC_FLAG_QSCALE)) {
349                     /** In ABR, we need to priorize less and let rate control do its thing */
350                     energy2uplim = sqrtf(energy2uplim);
351                 }
352                 energy2uplim = FFMAX(0.015625f, FFMIN(1.0f, energy2uplim));
353                 euplims[w*16+g] *= av_clipf(rdlambda * energy2uplim * sce->ics.group_len[w],
354                     0.5f, 1.0f);
355             }
356             start += sce->ics.swb_sizes[g];
357         }
358     }
359
360     for (i = 0; i < sizeof(maxsf) / sizeof(maxsf[0]); ++i)
361         maxsf[i] = SCALE_MAX_POS;
362
363     //perform two-loop search
364     //outer loop - improve quality
365     do {
366         //inner loop - quantize spectrum to fit into given number of bits
367         int overdist;
368         int qstep = its ? 1 : 32;
369         do {
370             int changed = 0;
371             prev = -1;
372             recomprd = 0;
373             tbits = 0;
374             for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
375                 start = w*128;
376                 for (g = 0;  g < sce->ics.num_swb; g++) {
377                     const float *coefs = &sce->coeffs[start];
378                     const float *scaled = &s->scoefs[start];
379                     int bits = 0;
380                     int cb;
381                     float dist = 0.0f;
382                     float qenergy = 0.0f;
383
384                     if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >= 218) {
385                         start += sce->ics.swb_sizes[g];
386                         if (sce->can_pns[w*16+g]) {
387                             /** PNS isn't free */
388                             tbits += ff_pns_bits(sce, w, g);
389                         }
390                         continue;
391                     }
392                     cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
393                     for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
394                         int b;
395                         float sqenergy;
396                         dist += quantize_band_cost_cached(s, w + w2, g, coefs + w2*128,
397                                                    scaled + w2*128,
398                                                    sce->ics.swb_sizes[g],
399                                                    sce->sf_idx[w*16+g],
400                                                    cb,
401                                                    1.0f,
402                                                    INFINITY,
403                                                    &b, &sqenergy,
404                                                    0);
405                         bits += b;
406                         qenergy += sqenergy;
407                     }
408                     dists[w*16+g] = dist - bits;
409                     qenergies[w*16+g] = qenergy;
410                     if (prev != -1) {
411                         int sfdiff = av_clip(sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO, 0, 2*SCALE_MAX_DIFF);
412                         bits += ff_aac_scalefactor_bits[sfdiff];
413                     }
414                     tbits += bits;
415                     start += sce->ics.swb_sizes[g];
416                     prev = sce->sf_idx[w*16+g];
417                 }
418             }
419             if (tbits > toomanybits) {
420                 recomprd = 1;
421                 for (i = 0; i < 128; i++) {
422                     if (sce->sf_idx[i] < (SCALE_MAX_POS - SCALE_DIV_512)) {
423                         int maxsf_i = (tbits > 5800) ? SCALE_MAX_POS : maxsf[i];
424                         int new_sf = FFMIN(maxsf_i, sce->sf_idx[i] + qstep);
425                         if (new_sf != sce->sf_idx[i]) {
426                             sce->sf_idx[i] = new_sf;
427                             changed = 1;
428                         }
429                     }
430                 }
431             } else if (tbits < toofewbits) {
432                 recomprd = 1;
433                 for (i = 0; i < 128; i++) {
434                     if (sce->sf_idx[i] > SCALE_ONE_POS) {
435                         int new_sf = FFMAX3(minsf[i], SCALE_ONE_POS, sce->sf_idx[i] - qstep);
436                         if (new_sf != sce->sf_idx[i]) {
437                             sce->sf_idx[i] = new_sf;
438                             changed = 1;
439                         }
440                     }
441                 }
442             }
443             qstep >>= 1;
444             if (!qstep && tbits > toomanybits && sce->sf_idx[0] < 217 && changed)
445                 qstep = 1;
446         } while (qstep);
447
448         overdist = 1;
449         fflag = tbits < toofewbits;
450         for (i = 0; i < 2 && (overdist || recomprd); ++i) {
451             if (recomprd) {
452                 /** Must recompute distortion */
453                 prev = -1;
454                 tbits = 0;
455                 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
456                     start = w*128;
457                     for (g = 0;  g < sce->ics.num_swb; g++) {
458                         const float *coefs = sce->coeffs + start;
459                         const float *scaled = s->scoefs + start;
460                         int bits = 0;
461                         int cb;
462                         float dist = 0.0f;
463                         float qenergy = 0.0f;
464
465                         if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >= 218) {
466                             start += sce->ics.swb_sizes[g];
467                             if (sce->can_pns[w*16+g]) {
468                                 /** PNS isn't free */
469                                 tbits += ff_pns_bits(sce, w, g);
470                             }
471                             continue;
472                         }
473                         cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
474                         for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
475                             int b;
476                             float sqenergy;
477                             dist += quantize_band_cost_cached(s, w + w2, g, coefs + w2*128,
478                                                     scaled + w2*128,
479                                                     sce->ics.swb_sizes[g],
480                                                     sce->sf_idx[w*16+g],
481                                                     cb,
482                                                     1.0f,
483                                                     INFINITY,
484                                                     &b, &sqenergy,
485                                                     0);
486                             bits += b;
487                             qenergy += sqenergy;
488                         }
489                         dists[w*16+g] = dist - bits;
490                         qenergies[w*16+g] = qenergy;
491                         if (prev != -1) {
492                             int sfdiff = av_clip(sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO, 0, 2*SCALE_MAX_DIFF);
493                             bits += ff_aac_scalefactor_bits[sfdiff];
494                         }
495                         tbits += bits;
496                         start += sce->ics.swb_sizes[g];
497                         prev = sce->sf_idx[w*16+g];
498                     }
499                 }
500             }
501             if (!i && s->options.pns && its > maxits/2 && tbits > toofewbits) {
502                 float maxoverdist = 0.0f;
503                 float ovrfactor = 1.f+(maxits-its)*16.f/maxits;
504                 overdist = recomprd = 0;
505                 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
506                     for (g = start = 0;  g < sce->ics.num_swb; start += sce->ics.swb_sizes[g++]) {
507                         if (!sce->zeroes[w*16+g] && sce->sf_idx[w*16+g] > SCALE_ONE_POS && dists[w*16+g] > uplims[w*16+g]*ovrfactor) {
508                             float ovrdist = dists[w*16+g] / FFMAX(uplims[w*16+g],euplims[w*16+g]);
509                             maxoverdist = FFMAX(maxoverdist, ovrdist);
510                             overdist++;
511                         }
512                     }
513                 }
514                 if (overdist) {
515                     /* We have overdistorted bands, trade for zeroes (that can be noise)
516                      * Zero the bands in the lowest 1.25% spread-energy-threshold ranking
517                      */
518                     float minspread = max_spread_thr_r;
519                     float maxspread = min_spread_thr_r;
520                     float zspread;
521                     int zeroable = 0;
522                     int zeroed = 0;
523                     int maxzeroed, zloop;
524                     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
525                         for (g = start = 0;  g < sce->ics.num_swb; start += sce->ics.swb_sizes[g++]) {
526                             if (start >= pns_start_pos && !sce->zeroes[w*16+g] && sce->can_pns[w*16+g]) {
527                                 minspread = FFMIN(minspread, spread_thr_r[w*16+g]);
528                                 maxspread = FFMAX(maxspread, spread_thr_r[w*16+g]);
529                                 zeroable++;
530                             }
531                         }
532                     }
533                     zspread = (maxspread-minspread) * 0.0125f + minspread;
534                     /* Don't PNS everything even if allowed. It suppresses bit starvation signals from RC,
535                      * and forced the hand of the later search_for_pns step.
536                      * Instead, PNS a fraction of the spread_thr_r range depending on how starved for bits we are,
537                      * and leave further PNSing to search_for_pns if worthwhile.
538                      */
539                     zspread = FFMIN3(min_spread_thr_r * 8.f, zspread,
540                         ((toomanybits - tbits) * min_spread_thr_r + (tbits - toofewbits) * max_spread_thr_r) / (toomanybits - toofewbits + 1));
541                     maxzeroed = FFMIN(zeroable, FFMAX(1, (zeroable * its + maxits - 1) / (2 * maxits)));
542                     for (zloop = 0; zloop < 2; zloop++) {
543                         /* Two passes: first distorted stuff - two birds in one shot and all that,
544                          * then anything viable. Viable means not zero, but either CB=zero-able
545                          * (too high SF), not SF <= 1 (that means we'd be operating at very high
546                          * quality, we don't want PNS when doing VHQ), PNS allowed, and within
547                          * the lowest ranking percentile.
548                          */
549                         float loopovrfactor = (zloop) ? 1.0f : ovrfactor;
550                         int loopminsf = (zloop) ? (SCALE_ONE_POS - SCALE_DIV_512) : SCALE_ONE_POS;
551                         int mcb;
552                         for (g = sce->ics.num_swb-1; g > 0 && zeroed < maxzeroed; g--) {
553                             if (sce->ics.swb_offset[g] < pns_start_pos)
554                                 continue;
555                             for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
556                                 if (!sce->zeroes[w*16+g] && sce->can_pns[w*16+g] && spread_thr_r[w*16+g] <= zspread
557                                     && sce->sf_idx[w*16+g] > loopminsf
558                                     && (dists[w*16+g] > loopovrfactor*uplims[w*16+g] || !(mcb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]))
559                                         || (mcb <= 1 && dists[w*16+g] > FFMIN(uplims[w*16+g], euplims[w*16+g]))) ) {
560                                     sce->zeroes[w*16+g] = 1;
561                                     sce->band_type[w*16+g] = 0;
562                                     zeroed++;
563                                 }
564                             }
565                         }
566                     }
567                     if (zeroed)
568                         recomprd = fflag = 1;
569                 } else {
570                     overdist = 0;
571                 }
572             }
573         }
574
575         minscaler = SCALE_MAX_POS;
576         maxscaler = 0;
577         for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
578             for (g = 0;  g < sce->ics.num_swb; g++) {
579                 if (!sce->zeroes[w*16+g]) {
580                     minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]);
581                     maxscaler = FFMAX(maxscaler, sce->sf_idx[w*16+g]);
582                 }
583             }
584         }
585
586         minscaler = nminscaler = av_clip(minscaler, SCALE_ONE_POS - SCALE_DIV_512, SCALE_MAX_POS - SCALE_DIV_512);
587         prev = -1;
588         for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
589             /** Start with big steps, end up fine-tunning */
590             int depth = (its > maxits/2) ? ((its > maxits*2/3) ? 1 : 3) : 10;
591             int edepth = depth+2;
592             float uplmax = its / (maxits*0.25f) + 1.0f;
593             uplmax *= (tbits > destbits) ? FFMIN(2.0f, tbits / (float)FFMAX(1,destbits)) : 1.0f;
594             start = w * 128;
595             for (g = 0; g < sce->ics.num_swb; g++) {
596                 int prevsc = sce->sf_idx[w*16+g];
597                 if (prev < 0 && !sce->zeroes[w*16+g])
598                     prev = sce->sf_idx[0];
599                 if (!sce->zeroes[w*16+g]) {
600                     const float *coefs = sce->coeffs + start;
601                     const float *scaled = s->scoefs + start;
602                     int cmb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
603                     int mindeltasf = FFMAX(0, prev - SCALE_MAX_DIFF);
604                     int maxdeltasf = FFMIN(SCALE_MAX_POS - SCALE_DIV_512, prev + SCALE_MAX_DIFF);
605                     if ((!cmb || dists[w*16+g] > uplims[w*16+g]) && sce->sf_idx[w*16+g] > FFMAX(mindeltasf, minsf[w*16+g])) {
606                         /* Try to make sure there is some energy in every nonzero band
607                          * NOTE: This algorithm must be forcibly imbalanced, pushing harder
608                          *  on holes or more distorted bands at first, otherwise there's
609                          *  no net gain (since the next iteration will offset all bands
610                          *  on the opposite direction to compensate for extra bits)
611                          */
612                         for (i = 0; i < edepth && sce->sf_idx[w*16+g] > mindeltasf; ++i) {
613                             int cb, bits;
614                             float dist, qenergy;
615                             int mb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]-1);
616                             cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
617                             dist = qenergy = 0.f;
618                             bits = 0;
619                             if (!cb) {
620                                 maxsf[w*16+g] = FFMIN(sce->sf_idx[w*16+g]-1, maxsf[w*16+g]);
621                             } else if (i >= depth && dists[w*16+g] < euplims[w*16+g]) {
622                                 break;
623                             }
624                             /* !g is the DC band, it's important, since quantization error here
625                              * applies to less than a cycle, it creates horrible intermodulation
626                              * distortion if it doesn't stick to what psy requests
627                              */
628                             if (!g && sce->ics.num_windows > 1 && dists[w*16+g] >= euplims[w*16+g])
629                                 maxsf[w*16+g] = FFMIN(sce->sf_idx[w*16+g], maxsf[w*16+g]);
630                             for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
631                                 int b;
632                                 float sqenergy;
633                                 dist += quantize_band_cost_cached(s, w + w2, g, coefs + w2*128,
634                                                         scaled + w2*128,
635                                                         sce->ics.swb_sizes[g],
636                                                         sce->sf_idx[w*16+g]-1,
637                                                         cb,
638                                                         1.0f,
639                                                         INFINITY,
640                                                         &b, &sqenergy,
641                                                         0);
642                                 bits += b;
643                                 qenergy += sqenergy;
644                             }
645                             sce->sf_idx[w*16+g]--;
646                             dists[w*16+g] = dist - bits;
647                             qenergies[w*16+g] = qenergy;
648                             if (mb && (sce->sf_idx[w*16+g] < mindeltasf || (
649                                     (dists[w*16+g] < FFMIN(uplmax*uplims[w*16+g], euplims[w*16+g]))
650                                     && (fabsf(qenergies[w*16+g]-energies[w*16+g]) < euplims[w*16+g])
651                                 ) )) {
652                                 break;
653                             }
654                         }
655                     } else if (tbits > toofewbits && sce->sf_idx[w*16+g] < FFMIN(maxdeltasf, maxsf[w*16+g])
656                             && (dists[w*16+g] < FFMIN(euplims[w*16+g], uplims[w*16+g]))
657                             && (fabsf(qenergies[w*16+g]-energies[w*16+g]) < euplims[w*16+g])
658                         ) {
659                         /** Um... over target. Save bits for more important stuff. */
660                         for (i = 0; i < depth && sce->sf_idx[w*16+g] < maxdeltasf; ++i) {
661                             int cb, bits;
662                             float dist, qenergy;
663                             cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]+1);
664                             if (cb > 0) {
665                                 dist = qenergy = 0.f;
666                                 bits = 0;
667                                 for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
668                                     int b;
669                                     float sqenergy;
670                                     dist += quantize_band_cost_cached(s, w + w2, g, coefs + w2*128,
671                                                             scaled + w2*128,
672                                                             sce->ics.swb_sizes[g],
673                                                             sce->sf_idx[w*16+g]+1,
674                                                             cb,
675                                                             1.0f,
676                                                             INFINITY,
677                                                             &b, &sqenergy,
678                                                             0);
679                                     bits += b;
680                                     qenergy += sqenergy;
681                                 }
682                                 dist -= bits;
683                                 if (dist < FFMIN(euplims[w*16+g], uplims[w*16+g])) {
684                                     sce->sf_idx[w*16+g]++;
685                                     dists[w*16+g] = dist;
686                                     qenergies[w*16+g] = qenergy;
687                                 } else {
688                                     break;
689                                 }
690                             } else {
691                                 maxsf[w*16+g] = FFMIN(sce->sf_idx[w*16+g], maxsf[w*16+g]);
692                                 break;
693                             }
694                         }
695                     }
696                     prev = sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], mindeltasf, maxdeltasf);
697                     if (sce->sf_idx[w*16+g] != prevsc)
698                         fflag = 1;
699                     nminscaler = FFMIN(nminscaler, sce->sf_idx[w*16+g]);
700                     sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
701                 }
702                 start += sce->ics.swb_sizes[g];
703             }
704         }
705
706         /** SF difference limit violation risk. Must re-clamp. */
707         prev = -1;
708         for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
709             for (g = 0; g < sce->ics.num_swb; g++) {
710                 if (!sce->zeroes[w*16+g]) {
711                     int prevsf = sce->sf_idx[w*16+g];
712                     if (prev < 0)
713                         prev = prevsf;
714                     sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], prev - SCALE_MAX_DIFF, prev + SCALE_MAX_DIFF);
715                     sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
716                     prev = sce->sf_idx[w*16+g];
717                     if (!fflag && prevsf != sce->sf_idx[w*16+g])
718                         fflag = 1;
719                 }
720             }
721         }
722
723         its++;
724     } while (fflag && its < maxits);
725
726     /** Scout out next nonzero bands */
727     ff_init_nextband_map(sce, nextband);
728
729     prev = -1;
730     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
731         /** Make sure proper codebooks are set */
732         for (g = 0; g < sce->ics.num_swb; g++) {
733             if (!sce->zeroes[w*16+g]) {
734                 sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
735                 if (sce->band_type[w*16+g] <= 0) {
736                     if (!ff_sfdelta_can_remove_band(sce, nextband, prev, w*16+g)) {
737                         /** Cannot zero out, make sure it's not attempted */
738                         sce->band_type[w*16+g] = 1;
739                     } else {
740                         sce->zeroes[w*16+g] = 1;
741                         sce->band_type[w*16+g] = 0;
742                     }
743                 }
744             } else {
745                 sce->band_type[w*16+g] = 0;
746             }
747             /** Check that there's no SF delta range violations */
748             if (!sce->zeroes[w*16+g]) {
749                 if (prev != -1) {
750                     av_unused int sfdiff = sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO;
751                     av_assert1(sfdiff >= 0 && sfdiff <= 2*SCALE_MAX_DIFF);
752                 } else if (sce->zeroes[0]) {
753                     /** Set global gain to something useful */
754                     sce->sf_idx[0] = sce->sf_idx[w*16+g];
755                 }
756                 prev = sce->sf_idx[w*16+g];
757             }
758         }
759     }
760 }
761
762 #endif /* AVCODEC_AACCODER_TWOLOOP_H */