]> git.sesse.net Git - ffmpeg/blob - libavcodec/aaccoder_twoloop.h
Merge commit '4012fe1ee819edc7689e182189e66c5401fb4b41'
[ffmpeg] / libavcodec / aaccoder_twoloop.h
1 /*
2  * AAC encoder twoloop coder
3  * Copyright (C) 2008-2009 Konstantin Shishkov
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21
22 /**
23  * @file
24  * AAC encoder twoloop coder
25  * @author Konstantin Shishkov, Claudio Freire
26  */
27
28 /**
29  * This file contains a template for the twoloop coder function.
30  * It needs to be provided, externally, as an already included declaration,
31  * the following functions from aacenc_quantization/util.h. They're not included
32  * explicitly here to make it possible to provide alternative implementations:
33  *  - quantize_band_cost
34  *  - abs_pow34_v
35  *  - find_max_val
36  *  - find_min_book
37  *  - find_form_factor
38  */
39
40 #ifndef AVCODEC_AACCODER_TWOLOOP_H
41 #define AVCODEC_AACCODER_TWOLOOP_H
42
43 #include <float.h>
44 #include "libavutil/mathematics.h"
45 #include "mathops.h"
46 #include "avcodec.h"
47 #include "put_bits.h"
48 #include "aac.h"
49 #include "aacenc.h"
50 #include "aactab.h"
51 #include "aacenctab.h"
52
53 /** Frequency in Hz for lower limit of noise substitution **/
54 #define NOISE_LOW_LIMIT 4000
55
56 #define sclip(x) av_clip(x,60,218)
57
58 /* Reflects the cost to change codebooks */
59 static inline int ff_pns_bits(SingleChannelElement *sce, int w, int g)
60 {
61     return (!g || !sce->zeroes[w*16+g-1] || !sce->can_pns[w*16+g-1]) ? 9 : 5;
62 }
63
64 /**
65  * two-loop quantizers search taken from ISO 13818-7 Appendix C
66  */
67 static void search_for_quantizers_twoloop(AVCodecContext *avctx,
68                                           AACEncContext *s,
69                                           SingleChannelElement *sce,
70                                           const float lambda)
71 {
72     int start = 0, i, w, w2, g, recomprd;
73     int destbits = avctx->bit_rate * 1024.0 / avctx->sample_rate
74         / ((avctx->flags & CODEC_FLAG_QSCALE) ? 2.0f : avctx->channels)
75         * (lambda / 120.f);
76     int refbits = destbits;
77     int toomanybits, toofewbits;
78     char nzs[128];
79     uint8_t nextband[128];
80     int maxsf[128], minsf[128];
81     float dists[128] = { 0 }, qenergies[128] = { 0 }, uplims[128], euplims[128], energies[128];
82     float maxvals[128], spread_thr_r[128];
83     float min_spread_thr_r, max_spread_thr_r;
84
85     /**
86      * rdlambda controls the maximum tolerated distortion. Twoloop
87      * will keep iterating until it fails to lower it or it reaches
88      * ulimit * rdlambda. Keeping it low increases quality on difficult
89      * signals, but lower it too much, and bits will be taken from weak
90      * signals, creating "holes". A balance is necesary.
91      * rdmax and rdmin specify the relative deviation from rdlambda
92      * allowed for tonality compensation
93      */
94     float rdlambda = av_clipf(2.0f * 120.f / lambda, 0.0625f, 16.0f);
95     const float nzslope = 1.5f;
96     float rdmin = 0.03125f;
97     float rdmax = 1.0f;
98
99     /**
100      * sfoffs controls an offset of optmium allocation that will be
101      * applied based on lambda. Keep it real and modest, the loop
102      * will take care of the rest, this just accelerates convergence
103      */
104     float sfoffs = av_clipf(log2f(120.0f / lambda) * 4.0f, -5, 10);
105
106     int fflag, minscaler, maxscaler, nminscaler;
107     int its  = 0;
108     int maxits = 30;
109     int allz = 0;
110     int tbits;
111     int cutoff = 1024;
112     int pns_start_pos;
113     int prev;
114
115     /**
116      * zeroscale controls a multiplier of the threshold, if band energy
117      * is below this, a zero is forced. Keep it lower than 1, unless
118      * low lambda is used, because energy < threshold doesn't mean there's
119      * no audible signal outright, it's just energy. Also make it rise
120      * slower than rdlambda, as rdscale has due compensation with
121      * noisy band depriorization below, whereas zeroing logic is rather dumb
122      */
123     float zeroscale;
124     if (lambda > 120.f) {
125         zeroscale = av_clipf(powf(120.f / lambda, 0.25f), 0.0625f, 1.0f);
126     } else {
127         zeroscale = 1.f;
128     }
129
130     if (s->psy.bitres.alloc >= 0) {
131         /**
132          * Psy granted us extra bits to use, from the reservoire
133          * adjust for lambda except what psy already did
134          */
135         destbits = s->psy.bitres.alloc
136             * (lambda / (avctx->global_quality ? avctx->global_quality : 120));
137     }
138
139     if (avctx->flags & CODEC_FLAG_QSCALE) {
140         /**
141          * Constant Q-scale doesn't compensate MS coding on its own
142          * No need to be overly precise, this only controls RD
143          * adjustment CB limits when going overboard
144          */
145         if (s->options.mid_side && s->cur_type == TYPE_CPE)
146             destbits *= 2;
147
148         /**
149          * When using a constant Q-scale, don't adjust bits, just use RD
150          * Don't let it go overboard, though... 8x psy target is enough
151          */
152         toomanybits = 5800;
153         toofewbits = destbits / 16;
154
155         /** Don't offset scalers, just RD */
156         sfoffs = sce->ics.num_windows - 1;
157         rdlambda = sqrtf(rdlambda);
158
159         /** search further */
160         maxits *= 2;
161     } else {
162         /* When using ABR, be strict, but a reasonable leeway is
163          * critical to allow RC to smoothly track desired bitrate
164          * without sudden quality drops that cause audible artifacts.
165          * Symmetry is also desirable, to avoid systematic bias.
166          */
167         toomanybits = destbits + destbits/8;
168         toofewbits = destbits - destbits/8;
169
170         sfoffs = 0;
171         rdlambda = sqrtf(rdlambda);
172     }
173
174     /** and zero out above cutoff frequency */
175     {
176         int wlen = 1024 / sce->ics.num_windows;
177         int bandwidth;
178
179         /**
180          * Scale, psy gives us constant quality, this LP only scales
181          * bitrate by lambda, so we save bits on subjectively unimportant HF
182          * rather than increase quantization noise. Adjust nominal bitrate
183          * to effective bitrate according to encoding parameters,
184          * AAC_CUTOFF_FROM_BITRATE is calibrated for effective bitrate.
185          */
186         float rate_bandwidth_multiplier = 1.5f;
187         int frame_bit_rate = (avctx->flags & CODEC_FLAG_QSCALE)
188             ? (refbits * rate_bandwidth_multiplier * avctx->sample_rate / 1024)
189             : (avctx->bit_rate / avctx->channels);
190
191         /** Compensate for extensions that increase efficiency */
192         if (s->options.pns || s->options.intensity_stereo)
193             frame_bit_rate *= 1.15f;
194
195         if (avctx->cutoff > 0) {
196             bandwidth = avctx->cutoff;
197         } else {
198             bandwidth = FFMAX(3000, AAC_CUTOFF_FROM_BITRATE(frame_bit_rate, 1, avctx->sample_rate));
199             s->psy.cutoff = bandwidth;
200         }
201
202         cutoff = bandwidth * 2 * wlen / avctx->sample_rate;
203         pns_start_pos = NOISE_LOW_LIMIT * 2 * wlen / avctx->sample_rate;
204     }
205
206     /**
207      * for values above this the decoder might end up in an endless loop
208      * due to always having more bits than what can be encoded.
209      */
210     destbits = FFMIN(destbits, 5800);
211     toomanybits = FFMIN(toomanybits, 5800);
212     toofewbits = FFMIN(toofewbits, 5800);
213     /**
214      * XXX: some heuristic to determine initial quantizers will reduce search time
215      * determine zero bands and upper distortion limits
216      */
217     min_spread_thr_r = -1;
218     max_spread_thr_r = -1;
219     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
220         for (g = start = 0;  g < sce->ics.num_swb; start += sce->ics.swb_sizes[g++]) {
221             int nz = 0;
222             float uplim = 0.0f, energy = 0.0f, spread = 0.0f;
223             for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
224                 FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
225                 if (start >= cutoff || band->energy <= (band->threshold * zeroscale) || band->threshold == 0.0f) {
226                     sce->zeroes[(w+w2)*16+g] = 1;
227                     continue;
228                 }
229                 nz = 1;
230             }
231             if (!nz) {
232                 uplim = 0.0f;
233             } else {
234                 nz = 0;
235                 for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
236                     FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
237                     if (band->energy <= (band->threshold * zeroscale) || band->threshold == 0.0f)
238                         continue;
239                     uplim += band->threshold;
240                     energy += band->energy;
241                     spread += band->spread;
242                     nz++;
243                 }
244             }
245             uplims[w*16+g] = uplim;
246             energies[w*16+g] = energy;
247             nzs[w*16+g] = nz;
248             sce->zeroes[w*16+g] = !nz;
249             allz |= nz;
250             if (nz && sce->can_pns[w*16+g]) {
251                 spread_thr_r[w*16+g] = energy * nz / (uplim * spread);
252                 if (min_spread_thr_r < 0) {
253                     min_spread_thr_r = max_spread_thr_r = spread_thr_r[w*16+g];
254                 } else {
255                     min_spread_thr_r = FFMIN(min_spread_thr_r, spread_thr_r[w*16+g]);
256                     max_spread_thr_r = FFMAX(max_spread_thr_r, spread_thr_r[w*16+g]);
257                 }
258             }
259         }
260     }
261
262     /** Compute initial scalers */
263     minscaler = 65535;
264     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
265         for (g = 0;  g < sce->ics.num_swb; g++) {
266             if (sce->zeroes[w*16+g]) {
267                 sce->sf_idx[w*16+g] = SCALE_ONE_POS;
268                 continue;
269             }
270             /**
271              * log2f-to-distortion ratio is, technically, 2 (1.5db = 4, but it's power vs level so it's 2).
272              * But, as offsets are applied, low-frequency signals are too sensitive to the induced distortion,
273              * so we make scaling more conservative by choosing a lower log2f-to-distortion ratio, and thus
274              * more robust.
275              */
276             sce->sf_idx[w*16+g] = av_clip(
277                 SCALE_ONE_POS
278                     + 1.75*log2f(FFMAX(0.00125f,uplims[w*16+g]) / sce->ics.swb_sizes[g])
279                     + sfoffs,
280                 60, SCALE_MAX_POS);
281             minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]);
282         }
283     }
284
285     /** Clip */
286     minscaler = av_clip(minscaler, SCALE_ONE_POS - SCALE_DIV_512, SCALE_MAX_POS - SCALE_DIV_512);
287     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w])
288         for (g = 0;  g < sce->ics.num_swb; g++)
289             if (!sce->zeroes[w*16+g])
290                 sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], minscaler, minscaler + SCALE_MAX_DIFF - 1);
291
292     if (!allz)
293         return;
294     abs_pow34_v(s->scoefs, sce->coeffs, 1024);
295     ff_quantize_band_cost_cache_init(s);
296
297     for (i = 0; i < sizeof(minsf) / sizeof(minsf[0]); ++i)
298         minsf[i] = 0;
299     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
300         start = w*128;
301         for (g = 0;  g < sce->ics.num_swb; g++) {
302             const float *scaled = s->scoefs + start;
303             int minsfidx;
304             maxvals[w*16+g] = find_max_val(sce->ics.group_len[w], sce->ics.swb_sizes[g], scaled);
305             if (maxvals[w*16+g] > 0) {
306                 minsfidx = coef2minsf(maxvals[w*16+g]);
307                 for (w2 = 0; w2 < sce->ics.group_len[w]; w2++)
308                     minsf[(w+w2)*16+g] = minsfidx;
309             }
310             start += sce->ics.swb_sizes[g];
311         }
312     }
313
314     /**
315      * Scale uplims to match rate distortion to quality
316      * bu applying noisy band depriorization and tonal band priorization.
317      * Maxval-energy ratio gives us an idea of how noisy/tonal the band is.
318      * If maxval^2 ~ energy, then that band is mostly noise, and we can relax
319      * rate distortion requirements.
320      */
321     memcpy(euplims, uplims, sizeof(euplims));
322     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
323         /** psy already priorizes transients to some extent */
324         float de_psy_factor = (sce->ics.num_windows > 1) ? 8.0f / sce->ics.group_len[w] : 1.0f;
325         start = w*128;
326         for (g = 0;  g < sce->ics.num_swb; g++) {
327             if (nzs[g] > 0) {
328                 float cleanup_factor = ff_sqrf(av_clipf(start / (cutoff * 0.75f), 1.0f, 2.0f));
329                 float energy2uplim = find_form_factor(
330                     sce->ics.group_len[w], sce->ics.swb_sizes[g],
331                     uplims[w*16+g] / (nzs[g] * sce->ics.swb_sizes[w]),
332                     sce->coeffs + start,
333                     nzslope * cleanup_factor);
334                 energy2uplim *= de_psy_factor;
335                 if (!(avctx->flags & CODEC_FLAG_QSCALE)) {
336                     /** In ABR, we need to priorize less and let rate control do its thing */
337                     energy2uplim = sqrtf(energy2uplim);
338                 }
339                 energy2uplim = FFMAX(0.015625f, FFMIN(1.0f, energy2uplim));
340                 uplims[w*16+g] *= av_clipf(rdlambda * energy2uplim, rdmin, rdmax)
341                                   * sce->ics.group_len[w];
342
343                 energy2uplim = find_form_factor(
344                     sce->ics.group_len[w], sce->ics.swb_sizes[g],
345                     uplims[w*16+g] / (nzs[g] * sce->ics.swb_sizes[w]),
346                     sce->coeffs + start,
347                     2.0f);
348                 energy2uplim *= de_psy_factor;
349                 if (!(avctx->flags & CODEC_FLAG_QSCALE)) {
350                     /** In ABR, we need to priorize less and let rate control do its thing */
351                     energy2uplim = sqrtf(energy2uplim);
352                 }
353                 energy2uplim = FFMAX(0.015625f, FFMIN(1.0f, energy2uplim));
354                 euplims[w*16+g] *= av_clipf(rdlambda * energy2uplim * sce->ics.group_len[w],
355                     0.5f, 1.0f);
356             }
357             start += sce->ics.swb_sizes[g];
358         }
359     }
360
361     for (i = 0; i < sizeof(maxsf) / sizeof(maxsf[0]); ++i)
362         maxsf[i] = SCALE_MAX_POS;
363
364     //perform two-loop search
365     //outer loop - improve quality
366     do {
367         //inner loop - quantize spectrum to fit into given number of bits
368         int overdist;
369         int qstep = its ? 1 : 32;
370         do {
371             int changed = 0;
372             prev = -1;
373             recomprd = 0;
374             tbits = 0;
375             for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
376                 start = w*128;
377                 for (g = 0;  g < sce->ics.num_swb; g++) {
378                     const float *coefs = &sce->coeffs[start];
379                     const float *scaled = &s->scoefs[start];
380                     int bits = 0;
381                     int cb;
382                     float dist = 0.0f;
383                     float qenergy = 0.0f;
384
385                     if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >= 218) {
386                         start += sce->ics.swb_sizes[g];
387                         if (sce->can_pns[w*16+g]) {
388                             /** PNS isn't free */
389                             tbits += ff_pns_bits(sce, w, g);
390                         }
391                         continue;
392                     }
393                     cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
394                     for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
395                         int b;
396                         float sqenergy;
397                         dist += quantize_band_cost_cached(s, w + w2, g, coefs + w2*128,
398                                                    scaled + w2*128,
399                                                    sce->ics.swb_sizes[g],
400                                                    sce->sf_idx[w*16+g],
401                                                    cb,
402                                                    1.0f,
403                                                    INFINITY,
404                                                    &b, &sqenergy,
405                                                    0);
406                         bits += b;
407                         qenergy += sqenergy;
408                     }
409                     dists[w*16+g] = dist - bits;
410                     qenergies[w*16+g] = qenergy;
411                     if (prev != -1) {
412                         int sfdiff = av_clip(sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO, 0, 2*SCALE_MAX_DIFF);
413                         bits += ff_aac_scalefactor_bits[sfdiff];
414                     }
415                     tbits += bits;
416                     start += sce->ics.swb_sizes[g];
417                     prev = sce->sf_idx[w*16+g];
418                 }
419             }
420             if (tbits > toomanybits) {
421                 recomprd = 1;
422                 for (i = 0; i < 128; i++) {
423                     if (sce->sf_idx[i] < (SCALE_MAX_POS - SCALE_DIV_512)) {
424                         int maxsf_i = (tbits > 5800) ? SCALE_MAX_POS : maxsf[i];
425                         int new_sf = FFMIN(maxsf_i, sce->sf_idx[i] + qstep);
426                         if (new_sf != sce->sf_idx[i]) {
427                             sce->sf_idx[i] = new_sf;
428                             changed = 1;
429                         }
430                     }
431                 }
432             } else if (tbits < toofewbits) {
433                 recomprd = 1;
434                 for (i = 0; i < 128; i++) {
435                     if (sce->sf_idx[i] > SCALE_ONE_POS) {
436                         int new_sf = FFMAX3(minsf[i], SCALE_ONE_POS, sce->sf_idx[i] - qstep);
437                         if (new_sf != sce->sf_idx[i]) {
438                             sce->sf_idx[i] = new_sf;
439                             changed = 1;
440                         }
441                     }
442                 }
443             }
444             qstep >>= 1;
445             if (!qstep && tbits > toomanybits && sce->sf_idx[0] < 217 && changed)
446                 qstep = 1;
447         } while (qstep);
448
449         overdist = 1;
450         fflag = tbits < toofewbits;
451         for (i = 0; i < 2 && (overdist || recomprd); ++i) {
452             if (recomprd) {
453                 /** Must recompute distortion */
454                 prev = -1;
455                 tbits = 0;
456                 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
457                     start = w*128;
458                     for (g = 0;  g < sce->ics.num_swb; g++) {
459                         const float *coefs = sce->coeffs + start;
460                         const float *scaled = s->scoefs + start;
461                         int bits = 0;
462                         int cb;
463                         float dist = 0.0f;
464                         float qenergy = 0.0f;
465
466                         if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >= 218) {
467                             start += sce->ics.swb_sizes[g];
468                             if (sce->can_pns[w*16+g]) {
469                                 /** PNS isn't free */
470                                 tbits += ff_pns_bits(sce, w, g);
471                             }
472                             continue;
473                         }
474                         cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
475                         for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
476                             int b;
477                             float sqenergy;
478                             dist += quantize_band_cost_cached(s, w + w2, g, coefs + w2*128,
479                                                     scaled + w2*128,
480                                                     sce->ics.swb_sizes[g],
481                                                     sce->sf_idx[w*16+g],
482                                                     cb,
483                                                     1.0f,
484                                                     INFINITY,
485                                                     &b, &sqenergy,
486                                                     0);
487                             bits += b;
488                             qenergy += sqenergy;
489                         }
490                         dists[w*16+g] = dist - bits;
491                         qenergies[w*16+g] = qenergy;
492                         if (prev != -1) {
493                             int sfdiff = av_clip(sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO, 0, 2*SCALE_MAX_DIFF);
494                             bits += ff_aac_scalefactor_bits[sfdiff];
495                         }
496                         tbits += bits;
497                         start += sce->ics.swb_sizes[g];
498                         prev = sce->sf_idx[w*16+g];
499                     }
500                 }
501             }
502             if (!i && s->options.pns && its > maxits/2 && tbits > toofewbits) {
503                 float maxoverdist = 0.0f;
504                 float ovrfactor = 1.f+(maxits-its)*16.f/maxits;
505                 overdist = recomprd = 0;
506                 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
507                     for (g = start = 0;  g < sce->ics.num_swb; start += sce->ics.swb_sizes[g++]) {
508                         if (!sce->zeroes[w*16+g] && sce->sf_idx[w*16+g] > SCALE_ONE_POS && dists[w*16+g] > uplims[w*16+g]*ovrfactor) {
509                             float ovrdist = dists[w*16+g] / FFMAX(uplims[w*16+g],euplims[w*16+g]);
510                             maxoverdist = FFMAX(maxoverdist, ovrdist);
511                             overdist++;
512                         }
513                     }
514                 }
515                 if (overdist) {
516                     /* We have overdistorted bands, trade for zeroes (that can be noise)
517                      * Zero the bands in the lowest 1.25% spread-energy-threshold ranking
518                      */
519                     float minspread = max_spread_thr_r;
520                     float maxspread = min_spread_thr_r;
521                     float zspread;
522                     int zeroable = 0;
523                     int zeroed = 0;
524                     int maxzeroed, zloop;
525                     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
526                         for (g = start = 0;  g < sce->ics.num_swb; start += sce->ics.swb_sizes[g++]) {
527                             if (start >= pns_start_pos && !sce->zeroes[w*16+g] && sce->can_pns[w*16+g]) {
528                                 minspread = FFMIN(minspread, spread_thr_r[w*16+g]);
529                                 maxspread = FFMAX(maxspread, spread_thr_r[w*16+g]);
530                                 zeroable++;
531                             }
532                         }
533                     }
534                     zspread = (maxspread-minspread) * 0.0125f + minspread;
535                     /* Don't PNS everything even if allowed. It suppresses bit starvation signals from RC,
536                      * and forced the hand of the later search_for_pns step.
537                      * Instead, PNS a fraction of the spread_thr_r range depending on how starved for bits we are,
538                      * and leave further PNSing to search_for_pns if worthwhile.
539                      */
540                     zspread = FFMIN3(min_spread_thr_r * 8.f, zspread,
541                         ((toomanybits - tbits) * min_spread_thr_r + (tbits - toofewbits) * max_spread_thr_r) / (toomanybits - toofewbits + 1));
542                     maxzeroed = FFMIN(zeroable, FFMAX(1, (zeroable * its + maxits - 1) / (2 * maxits)));
543                     for (zloop = 0; zloop < 2; zloop++) {
544                         /* Two passes: first distorted stuff - two birds in one shot and all that,
545                          * then anything viable. Viable means not zero, but either CB=zero-able
546                          * (too high SF), not SF <= 1 (that means we'd be operating at very high
547                          * quality, we don't want PNS when doing VHQ), PNS allowed, and within
548                          * the lowest ranking percentile.
549                          */
550                         float loopovrfactor = (zloop) ? 1.0f : ovrfactor;
551                         int loopminsf = (zloop) ? (SCALE_ONE_POS - SCALE_DIV_512) : SCALE_ONE_POS;
552                         int mcb;
553                         for (g = sce->ics.num_swb-1; g > 0 && zeroed < maxzeroed; g--) {
554                             if (sce->ics.swb_offset[g] < pns_start_pos)
555                                 continue;
556                             for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
557                                 if (!sce->zeroes[w*16+g] && sce->can_pns[w*16+g] && spread_thr_r[w*16+g] <= zspread
558                                     && sce->sf_idx[w*16+g] > loopminsf
559                                     && (dists[w*16+g] > loopovrfactor*uplims[w*16+g] || !(mcb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]))
560                                         || (mcb <= 1 && dists[w*16+g] > FFMIN(uplims[w*16+g], euplims[w*16+g]))) ) {
561                                     sce->zeroes[w*16+g] = 1;
562                                     sce->band_type[w*16+g] = 0;
563                                     zeroed++;
564                                 }
565                             }
566                         }
567                     }
568                     if (zeroed)
569                         recomprd = fflag = 1;
570                 } else {
571                     overdist = 0;
572                 }
573             }
574         }
575
576         minscaler = SCALE_MAX_POS;
577         maxscaler = 0;
578         for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
579             for (g = 0;  g < sce->ics.num_swb; g++) {
580                 if (!sce->zeroes[w*16+g]) {
581                     minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]);
582                     maxscaler = FFMAX(maxscaler, sce->sf_idx[w*16+g]);
583                 }
584             }
585         }
586
587         minscaler = nminscaler = av_clip(minscaler, SCALE_ONE_POS - SCALE_DIV_512, SCALE_MAX_POS - SCALE_DIV_512);
588         prev = -1;
589         for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
590             /** Start with big steps, end up fine-tunning */
591             int depth = (its > maxits/2) ? ((its > maxits*2/3) ? 1 : 3) : 10;
592             int edepth = depth+2;
593             float uplmax = its / (maxits*0.25f) + 1.0f;
594             uplmax *= (tbits > destbits) ? FFMIN(2.0f, tbits / (float)FFMAX(1,destbits)) : 1.0f;
595             start = w * 128;
596             for (g = 0; g < sce->ics.num_swb; g++) {
597                 int prevsc = sce->sf_idx[w*16+g];
598                 if (prev < 0 && !sce->zeroes[w*16+g])
599                     prev = sce->sf_idx[0];
600                 if (!sce->zeroes[w*16+g]) {
601                     const float *coefs = sce->coeffs + start;
602                     const float *scaled = s->scoefs + start;
603                     int cmb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
604                     int mindeltasf = FFMAX(0, prev - SCALE_MAX_DIFF);
605                     int maxdeltasf = FFMIN(SCALE_MAX_POS - SCALE_DIV_512, prev + SCALE_MAX_DIFF);
606                     if ((!cmb || dists[w*16+g] > uplims[w*16+g]) && sce->sf_idx[w*16+g] > FFMAX(mindeltasf, minsf[w*16+g])) {
607                         /* Try to make sure there is some energy in every nonzero band
608                          * NOTE: This algorithm must be forcibly imbalanced, pushing harder
609                          *  on holes or more distorted bands at first, otherwise there's
610                          *  no net gain (since the next iteration will offset all bands
611                          *  on the opposite direction to compensate for extra bits)
612                          */
613                         for (i = 0; i < edepth && sce->sf_idx[w*16+g] > mindeltasf; ++i) {
614                             int cb, bits;
615                             float dist, qenergy;
616                             int mb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]-1);
617                             cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
618                             dist = qenergy = 0.f;
619                             bits = 0;
620                             if (!cb) {
621                                 maxsf[w*16+g] = FFMIN(sce->sf_idx[w*16+g]-1, maxsf[w*16+g]);
622                             } else if (i >= depth && dists[w*16+g] < euplims[w*16+g]) {
623                                 break;
624                             }
625                             /* !g is the DC band, it's important, since quantization error here
626                              * applies to less than a cycle, it creates horrible intermodulation
627                              * distortion if it doesn't stick to what psy requests
628                              */
629                             if (!g && sce->ics.num_windows > 1 && dists[w*16+g] >= euplims[w*16+g])
630                                 maxsf[w*16+g] = FFMIN(sce->sf_idx[w*16+g], maxsf[w*16+g]);
631                             for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
632                                 int b;
633                                 float sqenergy;
634                                 dist += quantize_band_cost_cached(s, w + w2, g, coefs + w2*128,
635                                                         scaled + w2*128,
636                                                         sce->ics.swb_sizes[g],
637                                                         sce->sf_idx[w*16+g]-1,
638                                                         cb,
639                                                         1.0f,
640                                                         INFINITY,
641                                                         &b, &sqenergy,
642                                                         0);
643                                 bits += b;
644                                 qenergy += sqenergy;
645                             }
646                             sce->sf_idx[w*16+g]--;
647                             dists[w*16+g] = dist - bits;
648                             qenergies[w*16+g] = qenergy;
649                             if (mb && (sce->sf_idx[w*16+g] < mindeltasf || (
650                                     (dists[w*16+g] < FFMIN(uplmax*uplims[w*16+g], euplims[w*16+g]))
651                                     && (fabsf(qenergies[w*16+g]-energies[w*16+g]) < euplims[w*16+g])
652                                 ) )) {
653                                 break;
654                             }
655                         }
656                     } else if (tbits > toofewbits && sce->sf_idx[w*16+g] < FFMIN(maxdeltasf, maxsf[w*16+g])
657                             && (dists[w*16+g] < FFMIN(euplims[w*16+g], uplims[w*16+g]))
658                             && (fabsf(qenergies[w*16+g]-energies[w*16+g]) < euplims[w*16+g])
659                         ) {
660                         /** Um... over target. Save bits for more important stuff. */
661                         for (i = 0; i < depth && sce->sf_idx[w*16+g] < maxdeltasf; ++i) {
662                             int cb, bits;
663                             float dist, qenergy;
664                             cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]+1);
665                             if (cb > 0) {
666                                 dist = qenergy = 0.f;
667                                 bits = 0;
668                                 for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
669                                     int b;
670                                     float sqenergy;
671                                     dist += quantize_band_cost_cached(s, w + w2, g, coefs + w2*128,
672                                                             scaled + w2*128,
673                                                             sce->ics.swb_sizes[g],
674                                                             sce->sf_idx[w*16+g]+1,
675                                                             cb,
676                                                             1.0f,
677                                                             INFINITY,
678                                                             &b, &sqenergy,
679                                                             0);
680                                     bits += b;
681                                     qenergy += sqenergy;
682                                 }
683                                 dist -= bits;
684                                 if (dist < FFMIN(euplims[w*16+g], uplims[w*16+g])) {
685                                     sce->sf_idx[w*16+g]++;
686                                     dists[w*16+g] = dist;
687                                     qenergies[w*16+g] = qenergy;
688                                 } else {
689                                     break;
690                                 }
691                             } else {
692                                 maxsf[w*16+g] = FFMIN(sce->sf_idx[w*16+g], maxsf[w*16+g]);
693                                 break;
694                             }
695                         }
696                     }
697                     prev = sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], mindeltasf, maxdeltasf);
698                     if (sce->sf_idx[w*16+g] != prevsc)
699                         fflag = 1;
700                     nminscaler = FFMIN(nminscaler, sce->sf_idx[w*16+g]);
701                     sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
702                 }
703                 start += sce->ics.swb_sizes[g];
704             }
705         }
706
707         /** SF difference limit violation risk. Must re-clamp. */
708         prev = -1;
709         for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
710             for (g = 0; g < sce->ics.num_swb; g++) {
711                 if (!sce->zeroes[w*16+g]) {
712                     int prevsf = sce->sf_idx[w*16+g];
713                     if (prev < 0)
714                         prev = prevsf;
715                     sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], prev - SCALE_MAX_DIFF, prev + SCALE_MAX_DIFF);
716                     sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
717                     prev = sce->sf_idx[w*16+g];
718                     if (!fflag && prevsf != sce->sf_idx[w*16+g])
719                         fflag = 1;
720                 }
721             }
722         }
723
724         its++;
725     } while (fflag && its < maxits);
726
727     /** Scout out next nonzero bands */
728     ff_init_nextband_map(sce, nextband);
729
730     prev = -1;
731     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
732         /** Make sure proper codebooks are set */
733         for (g = 0; g < sce->ics.num_swb; g++) {
734             if (!sce->zeroes[w*16+g]) {
735                 sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
736                 if (sce->band_type[w*16+g] <= 0) {
737                     if (!ff_sfdelta_can_remove_band(sce, nextband, prev, w*16+g)) {
738                         /** Cannot zero out, make sure it's not attempted */
739                         sce->band_type[w*16+g] = 1;
740                     } else {
741                         sce->zeroes[w*16+g] = 1;
742                         sce->band_type[w*16+g] = 0;
743                     }
744                 }
745             } else {
746                 sce->band_type[w*16+g] = 0;
747             }
748             /** Check that there's no SF delta range violations */
749             if (!sce->zeroes[w*16+g]) {
750                 if (prev != -1) {
751                     av_unused int sfdiff = sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO;
752                     av_assert1(sfdiff >= 0 && sfdiff <= 2*SCALE_MAX_DIFF);
753                 } else if (sce->zeroes[0]) {
754                     /** Set global gain to something useful */
755                     sce->sf_idx[0] = sce->sf_idx[w*16+g];
756                 }
757                 prev = sce->sf_idx[w*16+g];
758             }
759         }
760     }
761 }
762
763 #endif /* AVCODEC_AACCODER_TWOLOOP_H */