]> git.sesse.net Git - ffmpeg/blob - libavcodec/aaccoder_twoloop.h
vp9: add superframe merging bitstream filter.
[ffmpeg] / libavcodec / aaccoder_twoloop.h
1 /*
2  * AAC encoder twoloop coder
3  * Copyright (C) 2008-2009 Konstantin Shishkov
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21
22 /**
23  * @file
24  * AAC encoder twoloop coder
25  * @author Konstantin Shishkov, Claudio Freire
26  */
27
28 /**
29  * This file contains a template for the twoloop coder function.
30  * It needs to be provided, externally, as an already included declaration,
31  * the following functions from aacenc_quantization/util.h. They're not included
32  * explicitly here to make it possible to provide alternative implementations:
33  *  - quantize_band_cost
34  *  - abs_pow34_v
35  *  - find_max_val
36  *  - find_min_book
37  *  - find_form_factor
38  */
39
40 #ifndef AVCODEC_AACCODER_TWOLOOP_H
41 #define AVCODEC_AACCODER_TWOLOOP_H
42
43 #include <float.h>
44 #include "libavutil/mathematics.h"
45 #include "mathops.h"
46 #include "avcodec.h"
47 #include "put_bits.h"
48 #include "aac.h"
49 #include "aacenc.h"
50 #include "aactab.h"
51 #include "aacenctab.h"
52
53 /** Frequency in Hz for lower limit of noise substitution **/
54 #define NOISE_LOW_LIMIT 4000
55
56 #define sclip(x) av_clip(x,60,218)
57
58 /* Reflects the cost to change codebooks */
59 static inline int ff_pns_bits(SingleChannelElement *sce, int w, int g)
60 {
61     return (!g || !sce->zeroes[w*16+g-1] || !sce->can_pns[w*16+g-1]) ? 9 : 5;
62 }
63
64 /**
65  * two-loop quantizers search taken from ISO 13818-7 Appendix C
66  */
67 static void search_for_quantizers_twoloop(AVCodecContext *avctx,
68                                           AACEncContext *s,
69                                           SingleChannelElement *sce,
70                                           const float lambda)
71 {
72     int start = 0, i, w, w2, g, recomprd;
73     int destbits = avctx->bit_rate * 1024.0 / avctx->sample_rate
74         / ((avctx->flags & CODEC_FLAG_QSCALE) ? 2.0f : avctx->channels)
75         * (lambda / 120.f);
76     int refbits = destbits;
77     int toomanybits, toofewbits;
78     char nzs[128];
79     uint8_t nextband[128];
80     int maxsf[128];
81     float dists[128] = { 0 }, qenergies[128] = { 0 }, uplims[128], euplims[128], energies[128];
82     float maxvals[128], spread_thr_r[128];
83     float min_spread_thr_r, max_spread_thr_r;
84
85     /**
86      * rdlambda controls the maximum tolerated distortion. Twoloop
87      * will keep iterating until it fails to lower it or it reaches
88      * ulimit * rdlambda. Keeping it low increases quality on difficult
89      * signals, but lower it too much, and bits will be taken from weak
90      * signals, creating "holes". A balance is necesary.
91      * rdmax and rdmin specify the relative deviation from rdlambda
92      * allowed for tonality compensation
93      */
94     float rdlambda = av_clipf(2.0f * 120.f / lambda, 0.0625f, 16.0f);
95     const float nzslope = 1.5f;
96     float rdmin = 0.03125f;
97     float rdmax = 1.0f;
98
99     /**
100      * sfoffs controls an offset of optmium allocation that will be
101      * applied based on lambda. Keep it real and modest, the loop
102      * will take care of the rest, this just accelerates convergence
103      */
104     float sfoffs = av_clipf(log2f(120.0f / lambda) * 4.0f, -5, 10);
105
106     int fflag, minscaler, maxscaler, nminscaler;
107     int its  = 0;
108     int maxits = 30;
109     int allz = 0;
110     int tbits;
111     int cutoff = 1024;
112     int pns_start_pos;
113     int prev;
114
115     /**
116      * zeroscale controls a multiplier of the threshold, if band energy
117      * is below this, a zero is forced. Keep it lower than 1, unless
118      * low lambda is used, because energy < threshold doesn't mean there's
119      * no audible signal outright, it's just energy. Also make it rise
120      * slower than rdlambda, as rdscale has due compensation with
121      * noisy band depriorization below, whereas zeroing logic is rather dumb
122      */
123     float zeroscale;
124     if (lambda > 120.f) {
125         zeroscale = av_clipf(powf(120.f / lambda, 0.25f), 0.0625f, 1.0f);
126     } else {
127         zeroscale = 1.f;
128     }
129
130     if (s->psy.bitres.alloc >= 0) {
131         /**
132          * Psy granted us extra bits to use, from the reservoire
133          * adjust for lambda except what psy already did
134          */
135         destbits = s->psy.bitres.alloc
136             * (lambda / (avctx->global_quality ? avctx->global_quality : 120));
137     }
138
139     if (avctx->flags & CODEC_FLAG_QSCALE) {
140         /**
141          * Constant Q-scale doesn't compensate MS coding on its own
142          * No need to be overly precise, this only controls RD
143          * adjustment CB limits when going overboard
144          */
145         if (s->options.mid_side && s->cur_type == TYPE_CPE)
146             destbits *= 2;
147
148         /**
149          * When using a constant Q-scale, don't adjust bits, just use RD
150          * Don't let it go overboard, though... 8x psy target is enough
151          */
152         toomanybits = 5800;
153         toofewbits = destbits / 16;
154
155         /** Don't offset scalers, just RD */
156         sfoffs = sce->ics.num_windows - 1;
157         rdlambda = sqrtf(rdlambda);
158
159         /** search further */
160         maxits *= 2;
161     } else {
162         /* When using ABR, be strict, but a reasonable leeway is
163          * critical to allow RC to smoothly track desired bitrate
164          * without sudden quality drops that cause audible artifacts.
165          * Symmetry is also desirable, to avoid systematic bias.
166          */
167         toomanybits = destbits + destbits/8;
168         toofewbits = destbits - destbits/8;
169
170         sfoffs = 0;
171         rdlambda = sqrtf(rdlambda);
172     }
173
174     /** and zero out above cutoff frequency */
175     {
176         int wlen = 1024 / sce->ics.num_windows;
177         int bandwidth;
178
179         /**
180          * Scale, psy gives us constant quality, this LP only scales
181          * bitrate by lambda, so we save bits on subjectively unimportant HF
182          * rather than increase quantization noise. Adjust nominal bitrate
183          * to effective bitrate according to encoding parameters,
184          * AAC_CUTOFF_FROM_BITRATE is calibrated for effective bitrate.
185          */
186         float rate_bandwidth_multiplier = 1.5f;
187         int frame_bit_rate = (avctx->flags & CODEC_FLAG_QSCALE)
188             ? (refbits * rate_bandwidth_multiplier * avctx->sample_rate / 1024)
189             : (avctx->bit_rate / avctx->channels);
190
191         /** Compensate for extensions that increase efficiency */
192         if (s->options.pns || s->options.intensity_stereo)
193             frame_bit_rate *= 1.15f;
194
195         if (avctx->cutoff > 0) {
196             bandwidth = avctx->cutoff;
197         } else {
198             bandwidth = FFMAX(3000, AAC_CUTOFF_FROM_BITRATE(frame_bit_rate, 1, avctx->sample_rate));
199             s->psy.cutoff = bandwidth;
200         }
201
202         cutoff = bandwidth * 2 * wlen / avctx->sample_rate;
203         pns_start_pos = NOISE_LOW_LIMIT * 2 * wlen / avctx->sample_rate;
204     }
205
206     /**
207      * for values above this the decoder might end up in an endless loop
208      * due to always having more bits than what can be encoded.
209      */
210     destbits = FFMIN(destbits, 5800);
211     toomanybits = FFMIN(toomanybits, 5800);
212     toofewbits = FFMIN(toofewbits, 5800);
213     /**
214      * XXX: some heuristic to determine initial quantizers will reduce search time
215      * determine zero bands and upper distortion limits
216      */
217     min_spread_thr_r = -1;
218     max_spread_thr_r = -1;
219     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
220         for (g = start = 0;  g < sce->ics.num_swb; start += sce->ics.swb_sizes[g++]) {
221             int nz = 0;
222             float uplim = 0.0f, energy = 0.0f, spread = 0.0f;
223             for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
224                 FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
225                 if (start >= cutoff || band->energy <= (band->threshold * zeroscale) || band->threshold == 0.0f) {
226                     sce->zeroes[(w+w2)*16+g] = 1;
227                     continue;
228                 }
229                 nz = 1;
230             }
231             if (!nz) {
232                 uplim = 0.0f;
233             } else {
234                 nz = 0;
235                 for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
236                     FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
237                     if (band->energy <= (band->threshold * zeroscale) || band->threshold == 0.0f)
238                         continue;
239                     uplim += band->threshold;
240                     energy += band->energy;
241                     spread += band->spread;
242                     nz++;
243                 }
244             }
245             uplims[w*16+g] = uplim;
246             energies[w*16+g] = energy;
247             nzs[w*16+g] = nz;
248             sce->zeroes[w*16+g] = !nz;
249             allz |= nz;
250             if (nz && sce->can_pns[w*16+g]) {
251                 spread_thr_r[w*16+g] = energy * nz / (uplim * spread);
252                 if (min_spread_thr_r < 0) {
253                     min_spread_thr_r = max_spread_thr_r = spread_thr_r[w*16+g];
254                 } else {
255                     min_spread_thr_r = FFMIN(min_spread_thr_r, spread_thr_r[w*16+g]);
256                     max_spread_thr_r = FFMAX(max_spread_thr_r, spread_thr_r[w*16+g]);
257                 }
258             }
259         }
260     }
261
262     /** Compute initial scalers */
263     minscaler = 65535;
264     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
265         for (g = 0;  g < sce->ics.num_swb; g++) {
266             if (sce->zeroes[w*16+g]) {
267                 sce->sf_idx[w*16+g] = SCALE_ONE_POS;
268                 continue;
269             }
270             /**
271              * log2f-to-distortion ratio is, technically, 2 (1.5db = 4, but it's power vs level so it's 2).
272              * But, as offsets are applied, low-frequency signals are too sensitive to the induced distortion,
273              * so we make scaling more conservative by choosing a lower log2f-to-distortion ratio, and thus
274              * more robust.
275              */
276             sce->sf_idx[w*16+g] = av_clip(
277                 SCALE_ONE_POS
278                     + 1.75*log2f(FFMAX(0.00125f,uplims[w*16+g]) / sce->ics.swb_sizes[g])
279                     + sfoffs,
280                 60, SCALE_MAX_POS);
281             minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]);
282         }
283     }
284
285     /** Clip */
286     minscaler = av_clip(minscaler, SCALE_ONE_POS - SCALE_DIV_512, SCALE_MAX_POS - SCALE_DIV_512);
287     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w])
288         for (g = 0;  g < sce->ics.num_swb; g++)
289             if (!sce->zeroes[w*16+g])
290                 sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], minscaler, minscaler + SCALE_MAX_DIFF - 1);
291
292     if (!allz)
293         return;
294     abs_pow34_v(s->scoefs, sce->coeffs, 1024);
295     ff_quantize_band_cost_cache_init(s);
296
297     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
298         start = w*128;
299         for (g = 0;  g < sce->ics.num_swb; g++) {
300             const float *scaled = s->scoefs + start;
301             maxvals[w*16+g] = find_max_val(sce->ics.group_len[w], sce->ics.swb_sizes[g], scaled);
302             start += sce->ics.swb_sizes[g];
303         }
304     }
305
306     /**
307      * Scale uplims to match rate distortion to quality
308      * bu applying noisy band depriorization and tonal band priorization.
309      * Maxval-energy ratio gives us an idea of how noisy/tonal the band is.
310      * If maxval^2 ~ energy, then that band is mostly noise, and we can relax
311      * rate distortion requirements.
312      */
313     memcpy(euplims, uplims, sizeof(euplims));
314     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
315         /** psy already priorizes transients to some extent */
316         float de_psy_factor = (sce->ics.num_windows > 1) ? 8.0f / sce->ics.group_len[w] : 1.0f;
317         start = w*128;
318         for (g = 0;  g < sce->ics.num_swb; g++) {
319             if (nzs[g] > 0) {
320                 float cleanup_factor = ff_sqrf(av_clipf(start / (cutoff * 0.75f), 1.0f, 2.0f));
321                 float energy2uplim = find_form_factor(
322                     sce->ics.group_len[w], sce->ics.swb_sizes[g],
323                     uplims[w*16+g] / (nzs[g] * sce->ics.swb_sizes[w]),
324                     sce->coeffs + start,
325                     nzslope * cleanup_factor);
326                 energy2uplim *= de_psy_factor;
327                 if (!(avctx->flags & CODEC_FLAG_QSCALE)) {
328                     /** In ABR, we need to priorize less and let rate control do its thing */
329                     energy2uplim = sqrtf(energy2uplim);
330                 }
331                 energy2uplim = FFMAX(0.015625f, FFMIN(1.0f, energy2uplim));
332                 uplims[w*16+g] *= av_clipf(rdlambda * energy2uplim, rdmin, rdmax)
333                                   * sce->ics.group_len[w];
334
335                 energy2uplim = find_form_factor(
336                     sce->ics.group_len[w], sce->ics.swb_sizes[g],
337                     uplims[w*16+g] / (nzs[g] * sce->ics.swb_sizes[w]),
338                     sce->coeffs + start,
339                     2.0f);
340                 energy2uplim *= de_psy_factor;
341                 if (!(avctx->flags & CODEC_FLAG_QSCALE)) {
342                     /** In ABR, we need to priorize less and let rate control do its thing */
343                     energy2uplim = sqrtf(energy2uplim);
344                 }
345                 energy2uplim = FFMAX(0.015625f, FFMIN(1.0f, energy2uplim));
346                 euplims[w*16+g] *= av_clipf(rdlambda * energy2uplim * sce->ics.group_len[w],
347                     0.5f, 1.0f);
348             }
349             start += sce->ics.swb_sizes[g];
350         }
351     }
352
353     for (i = 0; i < sizeof(maxsf) / sizeof(maxsf[0]); ++i)
354         maxsf[i] = SCALE_MAX_POS;
355
356     //perform two-loop search
357     //outer loop - improve quality
358     do {
359         //inner loop - quantize spectrum to fit into given number of bits
360         int overdist;
361         int qstep = its ? 1 : 32;
362         do {
363             int changed = 0;
364             prev = -1;
365             recomprd = 0;
366             tbits = 0;
367             for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
368                 start = w*128;
369                 for (g = 0;  g < sce->ics.num_swb; g++) {
370                     const float *coefs = &sce->coeffs[start];
371                     const float *scaled = &s->scoefs[start];
372                     int bits = 0;
373                     int cb;
374                     float dist = 0.0f;
375                     float qenergy = 0.0f;
376
377                     if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >= 218) {
378                         start += sce->ics.swb_sizes[g];
379                         if (sce->can_pns[w*16+g]) {
380                             /** PNS isn't free */
381                             tbits += ff_pns_bits(sce, w, g);
382                         }
383                         continue;
384                     }
385                     cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
386                     for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
387                         int b;
388                         float sqenergy;
389                         dist += quantize_band_cost_cached(s, w + w2, g, coefs + w2*128,
390                                                    scaled + w2*128,
391                                                    sce->ics.swb_sizes[g],
392                                                    sce->sf_idx[w*16+g],
393                                                    cb,
394                                                    1.0f,
395                                                    INFINITY,
396                                                    &b, &sqenergy,
397                                                    0);
398                         bits += b;
399                         qenergy += sqenergy;
400                     }
401                     dists[w*16+g] = dist - bits;
402                     qenergies[w*16+g] = qenergy;
403                     if (prev != -1) {
404                         int sfdiff = av_clip(sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO, 0, 2*SCALE_MAX_DIFF);
405                         bits += ff_aac_scalefactor_bits[sfdiff];
406                     }
407                     tbits += bits;
408                     start += sce->ics.swb_sizes[g];
409                     prev = sce->sf_idx[w*16+g];
410                 }
411             }
412             if (tbits > toomanybits) {
413                 recomprd = 1;
414                 for (i = 0; i < 128; i++) {
415                     if (sce->sf_idx[i] < (SCALE_MAX_POS - SCALE_DIV_512)) {
416                         int maxsf_i = (tbits > 5800) ? SCALE_MAX_POS : maxsf[i];
417                         int new_sf = FFMIN(maxsf_i, sce->sf_idx[i] + qstep);
418                         if (new_sf != sce->sf_idx[i]) {
419                             sce->sf_idx[i] = new_sf;
420                             changed = 1;
421                         }
422                     }
423                 }
424             } else if (tbits < toofewbits) {
425                 recomprd = 1;
426                 for (i = 0; i < 128; i++) {
427                     if (sce->sf_idx[i] > SCALE_ONE_POS) {
428                         int new_sf = FFMAX(SCALE_ONE_POS, sce->sf_idx[i] - qstep);
429                         if (new_sf != sce->sf_idx[i]) {
430                             sce->sf_idx[i] = new_sf;
431                             changed = 1;
432                         }
433                     }
434                 }
435             }
436             qstep >>= 1;
437             if (!qstep && tbits > toomanybits && sce->sf_idx[0] < 217 && changed)
438                 qstep = 1;
439         } while (qstep);
440
441         overdist = 1;
442         fflag = tbits < toofewbits;
443         for (i = 0; i < 2 && (overdist || recomprd); ++i) {
444             if (recomprd) {
445                 /** Must recompute distortion */
446                 prev = -1;
447                 tbits = 0;
448                 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
449                     start = w*128;
450                     for (g = 0;  g < sce->ics.num_swb; g++) {
451                         const float *coefs = sce->coeffs + start;
452                         const float *scaled = s->scoefs + start;
453                         int bits = 0;
454                         int cb;
455                         float dist = 0.0f;
456                         float qenergy = 0.0f;
457
458                         if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >= 218) {
459                             start += sce->ics.swb_sizes[g];
460                             if (sce->can_pns[w*16+g]) {
461                                 /** PNS isn't free */
462                                 tbits += ff_pns_bits(sce, w, g);
463                             }
464                             continue;
465                         }
466                         cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
467                         for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
468                             int b;
469                             float sqenergy;
470                             dist += quantize_band_cost_cached(s, w + w2, g, coefs + w2*128,
471                                                     scaled + w2*128,
472                                                     sce->ics.swb_sizes[g],
473                                                     sce->sf_idx[w*16+g],
474                                                     cb,
475                                                     1.0f,
476                                                     INFINITY,
477                                                     &b, &sqenergy,
478                                                     0);
479                             bits += b;
480                             qenergy += sqenergy;
481                         }
482                         dists[w*16+g] = dist - bits;
483                         qenergies[w*16+g] = qenergy;
484                         if (prev != -1) {
485                             int sfdiff = av_clip(sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO, 0, 2*SCALE_MAX_DIFF);
486                             bits += ff_aac_scalefactor_bits[sfdiff];
487                         }
488                         tbits += bits;
489                         start += sce->ics.swb_sizes[g];
490                         prev = sce->sf_idx[w*16+g];
491                     }
492                 }
493             }
494             if (!i && s->options.pns && its > maxits/2 && tbits > toofewbits) {
495                 float maxoverdist = 0.0f;
496                 float ovrfactor = 1.f+(maxits-its)*16.f/maxits;
497                 overdist = recomprd = 0;
498                 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
499                     for (g = start = 0;  g < sce->ics.num_swb; start += sce->ics.swb_sizes[g++]) {
500                         if (!sce->zeroes[w*16+g] && sce->sf_idx[w*16+g] > SCALE_ONE_POS && dists[w*16+g] > uplims[w*16+g]*ovrfactor) {
501                             float ovrdist = dists[w*16+g] / FFMAX(uplims[w*16+g],euplims[w*16+g]);
502                             maxoverdist = FFMAX(maxoverdist, ovrdist);
503                             overdist++;
504                         }
505                     }
506                 }
507                 if (overdist) {
508                     /* We have overdistorted bands, trade for zeroes (that can be noise)
509                      * Zero the bands in the lowest 1.25% spread-energy-threshold ranking
510                      */
511                     float minspread = max_spread_thr_r;
512                     float maxspread = min_spread_thr_r;
513                     float zspread;
514                     int zeroable = 0;
515                     int zeroed = 0;
516                     int maxzeroed, zloop;
517                     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
518                         for (g = start = 0;  g < sce->ics.num_swb; start += sce->ics.swb_sizes[g++]) {
519                             if (start >= pns_start_pos && !sce->zeroes[w*16+g] && sce->can_pns[w*16+g]) {
520                                 minspread = FFMIN(minspread, spread_thr_r[w*16+g]);
521                                 maxspread = FFMAX(maxspread, spread_thr_r[w*16+g]);
522                                 zeroable++;
523                             }
524                         }
525                     }
526                     zspread = (maxspread-minspread) * 0.0125f + minspread;
527                     /* Don't PNS everything even if allowed. It suppresses bit starvation signals from RC,
528                      * and forced the hand of the later search_for_pns step.
529                      * Instead, PNS a fraction of the spread_thr_r range depending on how starved for bits we are,
530                      * and leave further PNSing to search_for_pns if worthwhile.
531                      */
532                     zspread = FFMIN3(min_spread_thr_r * 8.f, zspread,
533                         ((toomanybits - tbits) * min_spread_thr_r + (tbits - toofewbits) * max_spread_thr_r) / (toomanybits - toofewbits + 1));
534                     maxzeroed = FFMIN(zeroable, FFMAX(1, (zeroable * its + maxits - 1) / (2 * maxits)));
535                     for (zloop = 0; zloop < 2; zloop++) {
536                         /* Two passes: first distorted stuff - two birds in one shot and all that,
537                          * then anything viable. Viable means not zero, but either CB=zero-able
538                          * (too high SF), not SF <= 1 (that means we'd be operating at very high
539                          * quality, we don't want PNS when doing VHQ), PNS allowed, and within
540                          * the lowest ranking percentile.
541                          */
542                         float loopovrfactor = (zloop) ? 1.0f : ovrfactor;
543                         int loopminsf = (zloop) ? (SCALE_ONE_POS - SCALE_DIV_512) : SCALE_ONE_POS;
544                         int mcb;
545                         for (g = sce->ics.num_swb-1; g > 0 && zeroed < maxzeroed; g--) {
546                             if (sce->ics.swb_offset[g] < pns_start_pos)
547                                 continue;
548                             for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
549                                 if (!sce->zeroes[w*16+g] && sce->can_pns[w*16+g] && spread_thr_r[w*16+g] <= zspread
550                                     && sce->sf_idx[w*16+g] > loopminsf
551                                     && (dists[w*16+g] > loopovrfactor*uplims[w*16+g] || !(mcb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]))
552                                         || (mcb <= 1 && dists[w*16+g] > FFMIN(uplims[w*16+g], euplims[w*16+g]))) ) {
553                                     sce->zeroes[w*16+g] = 1;
554                                     sce->band_type[w*16+g] = 0;
555                                     zeroed++;
556                                 }
557                             }
558                         }
559                     }
560                     if (zeroed)
561                         recomprd = fflag = 1;
562                 } else {
563                     overdist = 0;
564                 }
565             }
566         }
567
568         minscaler = SCALE_MAX_POS;
569         maxscaler = 0;
570         for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
571             for (g = 0;  g < sce->ics.num_swb; g++) {
572                 if (!sce->zeroes[w*16+g]) {
573                     minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]);
574                     maxscaler = FFMAX(maxscaler, sce->sf_idx[w*16+g]);
575                 }
576             }
577         }
578
579         minscaler = nminscaler = av_clip(minscaler, SCALE_ONE_POS - SCALE_DIV_512, SCALE_MAX_POS - SCALE_DIV_512);
580         prev = -1;
581         for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
582             /** Start with big steps, end up fine-tunning */
583             int depth = (its > maxits/2) ? ((its > maxits*2/3) ? 1 : 3) : 10;
584             int edepth = depth+2;
585             float uplmax = its / (maxits*0.25f) + 1.0f;
586             uplmax *= (tbits > destbits) ? FFMIN(2.0f, tbits / (float)FFMAX(1,destbits)) : 1.0f;
587             start = w * 128;
588             for (g = 0; g < sce->ics.num_swb; g++) {
589                 int prevsc = sce->sf_idx[w*16+g];
590                 if (prev < 0 && !sce->zeroes[w*16+g])
591                     prev = sce->sf_idx[0];
592                 if (!sce->zeroes[w*16+g]) {
593                     const float *coefs = sce->coeffs + start;
594                     const float *scaled = s->scoefs + start;
595                     int cmb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
596                     int mindeltasf = FFMAX(0, prev - SCALE_MAX_DIFF);
597                     int maxdeltasf = FFMIN(SCALE_MAX_POS - SCALE_DIV_512, prev + SCALE_MAX_DIFF);
598                     if ((!cmb || dists[w*16+g] > uplims[w*16+g]) && sce->sf_idx[w*16+g] > mindeltasf) {
599                         /* Try to make sure there is some energy in every nonzero band
600                          * NOTE: This algorithm must be forcibly imbalanced, pushing harder
601                          *  on holes or more distorted bands at first, otherwise there's
602                          *  no net gain (since the next iteration will offset all bands
603                          *  on the opposite direction to compensate for extra bits)
604                          */
605                         for (i = 0; i < edepth && sce->sf_idx[w*16+g] > mindeltasf; ++i) {
606                             int cb, bits;
607                             float dist, qenergy;
608                             int mb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]-1);
609                             cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
610                             dist = qenergy = 0.f;
611                             bits = 0;
612                             if (!cb) {
613                                 maxsf[w*16+g] = FFMIN(sce->sf_idx[w*16+g]-1, maxsf[w*16+g]);
614                             } else if (i >= depth && dists[w*16+g] < euplims[w*16+g]) {
615                                 break;
616                             }
617                             /* !g is the DC band, it's important, since quantization error here
618                              * applies to less than a cycle, it creates horrible intermodulation
619                              * distortion if it doesn't stick to what psy requests
620                              */
621                             if (!g && sce->ics.num_windows > 1 && dists[w*16+g] >= euplims[w*16+g])
622                                 maxsf[w*16+g] = FFMIN(sce->sf_idx[w*16+g], maxsf[w*16+g]);
623                             for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
624                                 int b;
625                                 float sqenergy;
626                                 dist += quantize_band_cost_cached(s, w + w2, g, coefs + w2*128,
627                                                         scaled + w2*128,
628                                                         sce->ics.swb_sizes[g],
629                                                         sce->sf_idx[w*16+g]-1,
630                                                         cb,
631                                                         1.0f,
632                                                         INFINITY,
633                                                         &b, &sqenergy,
634                                                         0);
635                                 bits += b;
636                                 qenergy += sqenergy;
637                             }
638                             sce->sf_idx[w*16+g]--;
639                             dists[w*16+g] = dist - bits;
640                             qenergies[w*16+g] = qenergy;
641                             if (mb && (sce->sf_idx[w*16+g] < mindeltasf || (
642                                     (dists[w*16+g] < FFMIN(uplmax*uplims[w*16+g], euplims[w*16+g]))
643                                     && (fabsf(qenergies[w*16+g]-energies[w*16+g]) < euplims[w*16+g])
644                                 ) )) {
645                                 break;
646                             }
647                         }
648                     } else if (tbits > toofewbits && sce->sf_idx[w*16+g] < FFMIN(maxdeltasf, maxsf[w*16+g])
649                             && (dists[w*16+g] < FFMIN(euplims[w*16+g], uplims[w*16+g]))
650                             && (fabsf(qenergies[w*16+g]-energies[w*16+g]) < euplims[w*16+g])
651                         ) {
652                         /** Um... over target. Save bits for more important stuff. */
653                         for (i = 0; i < depth && sce->sf_idx[w*16+g] < maxdeltasf; ++i) {
654                             int cb, bits;
655                             float dist, qenergy;
656                             cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]+1);
657                             if (cb > 0) {
658                                 dist = qenergy = 0.f;
659                                 bits = 0;
660                                 for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
661                                     int b;
662                                     float sqenergy;
663                                     dist += quantize_band_cost_cached(s, w + w2, g, coefs + w2*128,
664                                                             scaled + w2*128,
665                                                             sce->ics.swb_sizes[g],
666                                                             sce->sf_idx[w*16+g]+1,
667                                                             cb,
668                                                             1.0f,
669                                                             INFINITY,
670                                                             &b, &sqenergy,
671                                                             0);
672                                     bits += b;
673                                     qenergy += sqenergy;
674                                 }
675                                 dist -= bits;
676                                 if (dist < FFMIN(euplims[w*16+g], uplims[w*16+g])) {
677                                     sce->sf_idx[w*16+g]++;
678                                     dists[w*16+g] = dist;
679                                     qenergies[w*16+g] = qenergy;
680                                 } else {
681                                     break;
682                                 }
683                             } else {
684                                 maxsf[w*16+g] = FFMIN(sce->sf_idx[w*16+g], maxsf[w*16+g]);
685                                 break;
686                             }
687                         }
688                     }
689                     prev = sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], mindeltasf, maxdeltasf);
690                     if (sce->sf_idx[w*16+g] != prevsc)
691                         fflag = 1;
692                     nminscaler = FFMIN(nminscaler, sce->sf_idx[w*16+g]);
693                     sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
694                 }
695                 start += sce->ics.swb_sizes[g];
696             }
697         }
698
699         /** SF difference limit violation risk. Must re-clamp. */
700         prev = -1;
701         for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
702             for (g = 0; g < sce->ics.num_swb; g++) {
703                 if (!sce->zeroes[w*16+g]) {
704                     int prevsf = sce->sf_idx[w*16+g];
705                     if (prev < 0)
706                         prev = prevsf;
707                     sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], prev - SCALE_MAX_DIFF, prev + SCALE_MAX_DIFF);
708                     sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
709                     prev = sce->sf_idx[w*16+g];
710                     if (!fflag && prevsf != sce->sf_idx[w*16+g])
711                         fflag = 1;
712                 }
713             }
714         }
715
716         its++;
717     } while (fflag && its < maxits);
718
719     /** Scout out next nonzero bands */
720     ff_init_nextband_map(sce, nextband);
721
722     prev = -1;
723     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
724         /** Make sure proper codebooks are set */
725         for (g = 0; g < sce->ics.num_swb; g++) {
726             if (!sce->zeroes[w*16+g]) {
727                 sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
728                 if (sce->band_type[w*16+g] <= 0) {
729                     if (!ff_sfdelta_can_remove_band(sce, nextband, prev, w*16+g)) {
730                         /** Cannot zero out, make sure it's not attempted */
731                         sce->band_type[w*16+g] = 1;
732                     } else {
733                         sce->zeroes[w*16+g] = 1;
734                         sce->band_type[w*16+g] = 0;
735                     }
736                 }
737             } else {
738                 sce->band_type[w*16+g] = 0;
739             }
740             /** Check that there's no SF delta range violations */
741             if (!sce->zeroes[w*16+g]) {
742                 if (prev != -1) {
743                     av_unused int sfdiff = sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO;
744                     av_assert1(sfdiff >= 0 && sfdiff <= 2*SCALE_MAX_DIFF);
745                 } else if (sce->zeroes[0]) {
746                     /** Set global gain to something useful */
747                     sce->sf_idx[0] = sce->sf_idx[w*16+g];
748                 }
749                 prev = sce->sf_idx[w*16+g];
750             }
751         }
752     }
753 }
754
755 #endif /* AVCODEC_AACCODER_TWOLOOP_H */