]> git.sesse.net Git - ffmpeg/blob - libavcodec/aaccoder_twoloop.h
avcodec/hevc: Check entry_point_offsets
[ffmpeg] / libavcodec / aaccoder_twoloop.h
1 /*
2  * AAC encoder twoloop coder
3  * Copyright (C) 2008-2009 Konstantin Shishkov
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21
22 /**
23  * @file
24  * AAC encoder twoloop coder
25  * @author Konstantin Shishkov, Claudio Freire
26  */
27
28 /**
29  * This file contains a template for the twoloop coder function.
30  * It needs to be provided, externally, as an already included declaration,
31  * the following functions from aacenc_quantization/util.h. They're not included
32  * explicitly here to make it possible to provide alternative implementations:
33  *  - quantize_band_cost
34  *  - abs_pow34_v
35  *  - find_max_val
36  *  - find_min_book
37  *  - find_form_factor
38  */
39
40 #ifndef AVCODEC_AACCODER_TWOLOOP_H
41 #define AVCODEC_AACCODER_TWOLOOP_H
42
43 #include <float.h>
44 #include "libavutil/mathematics.h"
45 #include "mathops.h"
46 #include "avcodec.h"
47 #include "put_bits.h"
48 #include "aac.h"
49 #include "aacenc.h"
50 #include "aactab.h"
51 #include "aacenctab.h"
52 #include "aac_tablegen_decl.h"
53
54 /** Frequency in Hz for lower limit of noise substitution **/
55 #define NOISE_LOW_LIMIT 4000
56
57 #define sclip(x) av_clip(x,60,218)
58
59 /* Reflects the cost to change codebooks */
60 static inline int ff_pns_bits(SingleChannelElement *sce, int w, int g)
61 {
62     return (!g || !sce->zeroes[w*16+g-1] || !sce->can_pns[w*16+g-1]) ? 9 : 5;
63 }
64
65 /**
66  * two-loop quantizers search taken from ISO 13818-7 Appendix C
67  */
68 static void search_for_quantizers_twoloop(AVCodecContext *avctx,
69                                           AACEncContext *s,
70                                           SingleChannelElement *sce,
71                                           const float lambda)
72 {
73     int start = 0, i, w, w2, g, recomprd;
74     int destbits = avctx->bit_rate * 1024.0 / avctx->sample_rate
75         / ((avctx->flags & CODEC_FLAG_QSCALE) ? 2.0f : avctx->channels)
76         * (lambda / 120.f);
77     int refbits = destbits;
78     int toomanybits, toofewbits;
79     char nzs[128];
80     int maxsf[128];
81     float dists[128] = { 0 }, qenergies[128] = { 0 }, uplims[128], euplims[128], energies[128];
82     float maxvals[128], spread_thr_r[128];
83     float min_spread_thr_r, max_spread_thr_r;
84
85     /**
86      * rdlambda controls the maximum tolerated distortion. Twoloop
87      * will keep iterating until it fails to lower it or it reaches
88      * ulimit * rdlambda. Keeping it low increases quality on difficult
89      * signals, but lower it too much, and bits will be taken from weak
90      * signals, creating "holes". A balance is necesary.
91      * rdmax and rdmin specify the relative deviation from rdlambda
92      * allowed for tonality compensation
93      */
94     float rdlambda = av_clipf(2.0f * 120.f / lambda, 0.0625f, 16.0f);
95     const float nzslope = 1.5f;
96     float rdmin = 0.03125f;
97     float rdmax = 1.0f;
98
99     /**
100      * sfoffs controls an offset of optmium allocation that will be
101      * applied based on lambda. Keep it real and modest, the loop
102      * will take care of the rest, this just accelerates convergence
103      */
104     float sfoffs = av_clipf(log2f(120.0f / lambda) * 4.0f, -5, 10);
105
106     int fflag, minscaler, maxscaler, nminscaler, minrdsf;
107     int its  = 0;
108     int maxits = 30;
109     int allz = 0;
110     int tbits;
111     int cutoff = 1024;
112     int pns_start_pos;
113     int prev;
114
115     /**
116      * zeroscale controls a multiplier of the threshold, if band energy
117      * is below this, a zero is forced. Keep it lower than 1, unless
118      * low lambda is used, because energy < threshold doesn't mean there's
119      * no audible signal outright, it's just energy. Also make it rise
120      * slower than rdlambda, as rdscale has due compensation with
121      * noisy band depriorization below, whereas zeroing logic is rather dumb
122      */
123     float zeroscale;
124     if (lambda > 120.f) {
125         zeroscale = av_clipf(powf(120.f / lambda, 0.25f), 0.0625f, 1.0f);
126     } else {
127         zeroscale = 1.f;
128     }
129
130     if (s->psy.bitres.alloc >= 0) {
131         /**
132          * Psy granted us extra bits to use, from the reservoire
133          * adjust for lambda except what psy already did
134          */
135         destbits = s->psy.bitres.alloc
136             * (lambda / (avctx->global_quality ? avctx->global_quality : 120));
137     }
138
139     if (avctx->flags & CODEC_FLAG_QSCALE) {
140         /**
141          * Constant Q-scale doesn't compensate MS coding on its own
142          * No need to be overly precise, this only controls RD
143          * adjustment CB limits when going overboard
144          */
145         if (s->options.mid_side && s->cur_type == TYPE_CPE)
146             destbits *= 2;
147
148         /**
149          * When using a constant Q-scale, don't adjust bits, just use RD
150          * Don't let it go overboard, though... 8x psy target is enough
151          */
152         toomanybits = 5800;
153         toofewbits = destbits / 16;
154
155         /** Don't offset scalers, just RD */
156         sfoffs = sce->ics.num_windows - 1;
157         rdlambda = sqrtf(rdlambda);
158
159         /** search further */
160         maxits *= 2;
161     } else {
162         /** When using ABR, be strict */
163         toomanybits = destbits + destbits/16;
164         toofewbits = destbits - destbits/4;
165
166         sfoffs = 0;
167         rdlambda = sqrtf(rdlambda);
168     }
169
170     /** and zero out above cutoff frequency */
171     {
172         int wlen = 1024 / sce->ics.num_windows;
173         int bandwidth;
174
175         /**
176          * Scale, psy gives us constant quality, this LP only scales
177          * bitrate by lambda, so we save bits on subjectively unimportant HF
178          * rather than increase quantization noise. Adjust nominal bitrate
179          * to effective bitrate according to encoding parameters,
180          * AAC_CUTOFF_FROM_BITRATE is calibrated for effective bitrate.
181          */
182         float rate_bandwidth_multiplier = 1.5f;
183         int frame_bit_rate = (avctx->flags & CODEC_FLAG_QSCALE)
184             ? (refbits * rate_bandwidth_multiplier * avctx->sample_rate / 1024)
185             : (avctx->bit_rate / avctx->channels);
186
187         /** Compensate for extensions that increase efficiency */
188         if (s->options.pns || s->options.intensity_stereo)
189             frame_bit_rate *= 1.15f;
190
191         if (avctx->cutoff > 0) {
192             bandwidth = avctx->cutoff;
193         } else {
194             bandwidth = FFMAX(3000, AAC_CUTOFF_FROM_BITRATE(frame_bit_rate, 1, avctx->sample_rate));
195         }
196
197         cutoff = bandwidth * 2 * wlen / avctx->sample_rate;
198         pns_start_pos = NOISE_LOW_LIMIT * 2 * wlen / avctx->sample_rate;
199     }
200
201     /**
202      * for values above this the decoder might end up in an endless loop
203      * due to always having more bits than what can be encoded.
204      */
205     destbits = FFMIN(destbits, 5800);
206     toomanybits = FFMIN(toomanybits, 5800);
207     toofewbits = FFMIN(toofewbits, 5800);
208     /**
209      * XXX: some heuristic to determine initial quantizers will reduce search time
210      * determine zero bands and upper distortion limits
211      */
212     min_spread_thr_r = -1;
213     max_spread_thr_r = -1;
214     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
215         for (g = start = 0;  g < sce->ics.num_swb; start += sce->ics.swb_sizes[g++]) {
216             int nz = 0;
217             float uplim = 0.0f, energy = 0.0f, spread = 0.0f;
218             for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
219                 FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
220                 if (start >= cutoff || band->energy <= (band->threshold * zeroscale) || band->threshold == 0.0f) {
221                     sce->zeroes[(w+w2)*16+g] = 1;
222                     continue;
223                 }
224                 nz = 1;
225             }
226             if (!nz) {
227                 uplim = 0.0f;
228             } else {
229                 nz = 0;
230                 for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
231                     FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
232                     if (band->energy <= (band->threshold * zeroscale) || band->threshold == 0.0f)
233                         continue;
234                     uplim += band->threshold;
235                     energy += band->energy;
236                     spread += band->spread;
237                     nz++;
238                 }
239             }
240             uplims[w*16+g] = uplim;
241             energies[w*16+g] = energy;
242             nzs[w*16+g] = nz;
243             sce->zeroes[w*16+g] = !nz;
244             allz |= nz;
245             if (nz) {
246                 spread_thr_r[w*16+g] = energy * nz / (uplim * spread);
247                 if (min_spread_thr_r < 0) {
248                     min_spread_thr_r = max_spread_thr_r = spread_thr_r[w*16+g];
249                 } else {
250                     min_spread_thr_r = FFMIN(min_spread_thr_r, spread_thr_r[w*16+g]);
251                     max_spread_thr_r = FFMAX(max_spread_thr_r, spread_thr_r[w*16+g]);
252                 }
253             }
254         }
255     }
256
257     /** Compute initial scalers */
258     minscaler = 65535;
259     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
260         for (g = 0;  g < sce->ics.num_swb; g++) {
261             if (sce->zeroes[w*16+g]) {
262                 sce->sf_idx[w*16+g] = SCALE_ONE_POS;
263                 continue;
264             }
265             /**
266              * log2f-to-distortion ratio is, technically, 2 (1.5db = 4, but it's power vs level so it's 2).
267              * But, as offsets are applied, low-frequency signals are too sensitive to the induced distortion,
268              * so we make scaling more conservative by choosing a lower log2f-to-distortion ratio, and thus
269              * more robust.
270              */
271             sce->sf_idx[w*16+g] = av_clip(
272                 SCALE_ONE_POS
273                     + 1.75*log2f(FFMAX(0.00125f,uplims[w*16+g]) / sce->ics.swb_sizes[g])
274                     + sfoffs,
275                 60, SCALE_MAX_POS);
276             minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]);
277         }
278     }
279
280     /** Clip */
281     minscaler = av_clip(minscaler, SCALE_ONE_POS - SCALE_DIV_512, SCALE_MAX_POS - SCALE_DIV_512);
282     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w])
283         for (g = 0;  g < sce->ics.num_swb; g++)
284             if (!sce->zeroes[w*16+g])
285                 sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], minscaler, minscaler + SCALE_MAX_DIFF - 1);
286
287     if (!allz)
288         return;
289     abs_pow34_v(s->scoefs, sce->coeffs, 1024);
290     ff_quantize_band_cost_cache_init(s);
291
292     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
293         start = w*128;
294         for (g = 0;  g < sce->ics.num_swb; g++) {
295             const float *scaled = s->scoefs + start;
296             maxvals[w*16+g] = find_max_val(sce->ics.group_len[w], sce->ics.swb_sizes[g], scaled);
297             start += sce->ics.swb_sizes[g];
298         }
299     }
300
301     /**
302      * Scale uplims to match rate distortion to quality
303      * bu applying noisy band depriorization and tonal band priorization.
304      * Maxval-energy ratio gives us an idea of how noisy/tonal the band is.
305      * If maxval^2 ~ energy, then that band is mostly noise, and we can relax
306      * rate distortion requirements.
307      */
308     memcpy(euplims, uplims, sizeof(euplims));
309     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
310         /** psy already priorizes transients to some extent */
311         float de_psy_factor = (sce->ics.num_windows > 1) ? 8.0f / sce->ics.group_len[w] : 1.0f;
312         start = w*128;
313         for (g = 0;  g < sce->ics.num_swb; g++) {
314             if (nzs[g] > 0) {
315                 float cleanup_factor = ff_sqrf(av_clipf(start / (cutoff * 0.75f), 1.0f, 2.0f));
316                 float energy2uplim = find_form_factor(
317                     sce->ics.group_len[w], sce->ics.swb_sizes[g],
318                     uplims[w*16+g] / (nzs[g] * sce->ics.swb_sizes[w]),
319                     sce->coeffs + start,
320                     nzslope * cleanup_factor);
321                 energy2uplim *= de_psy_factor;
322                 if (!(avctx->flags & CODEC_FLAG_QSCALE)) {
323                     /** In ABR, we need to priorize less and let rate control do its thing */
324                     energy2uplim = sqrtf(energy2uplim);
325                 }
326                 energy2uplim = FFMAX(0.015625f, FFMIN(1.0f, energy2uplim));
327                 uplims[w*16+g] *= av_clipf(rdlambda * energy2uplim, rdmin, rdmax)
328                                   * sce->ics.group_len[w];
329
330                 energy2uplim = find_form_factor(
331                     sce->ics.group_len[w], sce->ics.swb_sizes[g],
332                     uplims[w*16+g] / (nzs[g] * sce->ics.swb_sizes[w]),
333                     sce->coeffs + start,
334                     2.0f);
335                 energy2uplim *= de_psy_factor;
336                 if (!(avctx->flags & CODEC_FLAG_QSCALE)) {
337                     /** In ABR, we need to priorize less and let rate control do its thing */
338                     energy2uplim = sqrtf(energy2uplim);
339                 }
340                 energy2uplim = FFMAX(0.015625f, FFMIN(1.0f, energy2uplim));
341                 euplims[w*16+g] *= av_clipf(rdlambda * energy2uplim * sce->ics.group_len[w],
342                     0.5f, 1.0f);
343             }
344             start += sce->ics.swb_sizes[g];
345         }
346     }
347
348     for (i = 0; i < sizeof(maxsf) / sizeof(maxsf[0]); ++i)
349         maxsf[i] = SCALE_MAX_POS;
350
351     //perform two-loop search
352     //outer loop - improve quality
353     do {
354         //inner loop - quantize spectrum to fit into given number of bits
355         int overdist;
356         int qstep = its ? 1 : 32;
357         do {
358             int changed = 0;
359             prev = -1;
360             recomprd = 0;
361             tbits = 0;
362             for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
363                 start = w*128;
364                 for (g = 0;  g < sce->ics.num_swb; g++) {
365                     const float *coefs = &sce->coeffs[start];
366                     const float *scaled = &s->scoefs[start];
367                     int bits = 0;
368                     int cb;
369                     float dist = 0.0f;
370                     float qenergy = 0.0f;
371
372                     if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >= 218) {
373                         start += sce->ics.swb_sizes[g];
374                         if (sce->can_pns[w*16+g]) {
375                             /** PNS isn't free */
376                             tbits += ff_pns_bits(sce, w, g);
377                         }
378                         continue;
379                     }
380                     cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
381                     for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
382                         int b;
383                         float sqenergy;
384                         dist += quantize_band_cost_cached(s, w + w2, g, coefs + w2*128,
385                                                    scaled + w2*128,
386                                                    sce->ics.swb_sizes[g],
387                                                    sce->sf_idx[w*16+g],
388                                                    cb,
389                                                    1.0f,
390                                                    INFINITY,
391                                                    &b, &sqenergy,
392                                                    0);
393                         bits += b;
394                         qenergy += sqenergy;
395                     }
396                     dists[w*16+g] = dist - bits;
397                     qenergies[w*16+g] = qenergy;
398                     if (prev != -1) {
399                         int sfdiff = av_clip(sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO, 0, 2*SCALE_MAX_DIFF);
400                         bits += ff_aac_scalefactor_bits[sfdiff];
401                     }
402                     tbits += bits;
403                     start += sce->ics.swb_sizes[g];
404                     prev = sce->sf_idx[w*16+g];
405                 }
406             }
407             if (tbits > toomanybits) {
408                 recomprd = 1;
409                 for (i = 0; i < 128; i++) {
410                     if (sce->sf_idx[i] < (SCALE_MAX_POS - SCALE_DIV_512)) {
411                         int maxsf_i = (tbits > 5800) ? SCALE_MAX_POS : maxsf[i];
412                         int new_sf = FFMIN(maxsf_i, sce->sf_idx[i] + qstep);
413                         if (new_sf != sce->sf_idx[i]) {
414                             sce->sf_idx[i] = new_sf;
415                             changed = 1;
416                         }
417                     }
418                 }
419             } else if (tbits < toofewbits) {
420                 recomprd = 1;
421                 for (i = 0; i < 128; i++) {
422                     if (sce->sf_idx[i] > SCALE_ONE_POS) {
423                         int new_sf = FFMAX(SCALE_ONE_POS, sce->sf_idx[i] - qstep);
424                         if (new_sf != sce->sf_idx[i]) {
425                             sce->sf_idx[i] = new_sf;
426                             changed = 1;
427                         }
428                     }
429                 }
430             }
431             qstep >>= 1;
432             if (!qstep && tbits > toomanybits && sce->sf_idx[0] < 217 && changed)
433                 qstep = 1;
434         } while (qstep);
435
436         overdist = 1;
437         for (i = 0; i < 2 && (overdist || recomprd); ++i) {
438             if (recomprd) {
439                 /** Must recompute distortion */
440                 prev = -1;
441                 tbits = 0;
442                 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
443                     start = w*128;
444                     for (g = 0;  g < sce->ics.num_swb; g++) {
445                         const float *coefs = sce->coeffs + start;
446                         const float *scaled = s->scoefs + start;
447                         int bits = 0;
448                         int cb;
449                         float dist = 0.0f;
450                         float qenergy = 0.0f;
451
452                         if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >= 218) {
453                             start += sce->ics.swb_sizes[g];
454                             if (sce->can_pns[w*16+g]) {
455                                 /** PNS isn't free */
456                                 tbits += ff_pns_bits(sce, w, g);
457                             }
458                             continue;
459                         }
460                         cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
461                         for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
462                             int b;
463                             float sqenergy;
464                             dist += quantize_band_cost_cached(s, w + w2, g, coefs + w2*128,
465                                                     scaled + w2*128,
466                                                     sce->ics.swb_sizes[g],
467                                                     sce->sf_idx[w*16+g],
468                                                     cb,
469                                                     1.0f,
470                                                     INFINITY,
471                                                     &b, &sqenergy,
472                                                     0);
473                             bits += b;
474                             qenergy += sqenergy;
475                         }
476                         dists[w*16+g] = dist - bits;
477                         qenergies[w*16+g] = qenergy;
478                         if (prev != -1) {
479                             int sfdiff = av_clip(sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO, 0, 2*SCALE_MAX_DIFF);
480                             bits += ff_aac_scalefactor_bits[sfdiff];
481                         }
482                         tbits += bits;
483                         start += sce->ics.swb_sizes[g];
484                         prev = sce->sf_idx[w*16+g];
485                     }
486                 }
487             }
488             if (!i && s->options.pns && its > maxits/2) {
489                 float maxoverdist = 0.0f;
490                 overdist = recomprd = 0;
491                 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
492                     float ovrfactor = 2.f+(maxits-its)*16.f/maxits;
493                     for (g = start = 0;  g < sce->ics.num_swb; start += sce->ics.swb_sizes[g++]) {
494                         if (!sce->zeroes[w*16+g] && dists[w*16+g] > uplims[w*16+g]*ovrfactor) {
495                             float ovrdist = dists[w*16+g] / FFMAX(uplims[w*16+g],euplims[w*16+g]);
496                             maxoverdist = FFMAX(maxoverdist, ovrdist);
497                             overdist++;
498                         }
499                     }
500                 }
501                 if (overdist) {
502                     /* We have overdistorted bands, trade for zeroes (that can be noise)
503                      * Zero the bands in the lowest 1.25% spread-energy-threshold ranking
504                      */
505                     float minspread = max_spread_thr_r;
506                     float maxspread = min_spread_thr_r;
507                     float zspread;
508                     int zeroable = 0;
509                     int zeroed = 0;
510                     int maxzeroed;
511                     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
512                         for (g = start = 0;  g < sce->ics.num_swb; start += sce->ics.swb_sizes[g++]) {
513                             if (start >= pns_start_pos && !sce->zeroes[w*16+g] && sce->can_pns[w*16+g]) {
514                                 minspread = FFMIN(minspread, spread_thr_r[w*16+g]);
515                                 maxspread = FFMAX(maxspread, spread_thr_r[w*16+g]);
516                                 zeroable++;
517                             }
518                         }
519                     }
520                     zspread = (maxspread-minspread) * 0.0125f + minspread;
521                     zspread = FFMIN(maxoverdist, zspread);
522                     maxzeroed = zeroable * its / (2 * maxits);
523                     for (g = sce->ics.num_swb-1; g > 0 && zeroed < maxzeroed; g--) {
524                         if (sce->ics.swb_offset[g] < pns_start_pos)
525                             continue;
526                         for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
527                             if (!sce->zeroes[w*16+g] && sce->can_pns[w*16+g] && spread_thr_r[w*16+g] <= zspread) {
528                                 sce->zeroes[w*16+g] = 1;
529                                 sce->band_type[w*16+g] = 0;
530                                 zeroed++;
531                             }
532                         }
533                     }
534                     if (zeroed)
535                         recomprd = 1;
536                 } else {
537                     overdist = 0;
538                 }
539             }
540         }
541
542         minscaler = SCALE_MAX_POS;
543         maxscaler = 0;
544         for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
545             for (g = 0;  g < sce->ics.num_swb; g++) {
546                 if (!sce->zeroes[w*16+g]) {
547                     minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]);
548                     maxscaler = FFMAX(maxscaler, sce->sf_idx[w*16+g]);
549                 }
550             }
551         }
552
553         fflag = 0;
554         minscaler = nminscaler = av_clip(minscaler, SCALE_ONE_POS - SCALE_DIV_512, SCALE_MAX_POS - SCALE_DIV_512);
555         minrdsf = FFMAX3(60, minscaler - 1, maxscaler - SCALE_MAX_DIFF - 1);
556         for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
557             /** Start with big steps, end up fine-tunning */
558             int depth = (its > maxits/2) ? ((its > maxits*2/3) ? 1 : 3) : 10;
559             int edepth = depth+2;
560             float uplmax = its / (maxits*0.25f) + 1.0f;
561             uplmax *= (tbits > destbits) ? FFMIN(2.0f, tbits / (float)FFMAX(1,destbits)) : 1.0f;
562             start = w * 128;
563             for (g = 0; g < sce->ics.num_swb; g++) {
564                 int prevsc = sce->sf_idx[w*16+g];
565                 int minrdsfboost = (sce->ics.num_windows > 1) ? av_clip(g-4, -2, 0) : av_clip(g-16, -4, 0);
566                 if (!sce->zeroes[w*16+g]) {
567                     const float *coefs = sce->coeffs + start;
568                     const float *scaled = s->scoefs + start;
569                     int cmb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
570                     if ((!cmb || dists[w*16+g] > uplims[w*16+g]) && sce->sf_idx[w*16+g] > minrdsf) {
571                         /* Try to make sure there is some energy in every nonzero band
572                          * NOTE: This algorithm must be forcibly imbalanced, pushing harder
573                          *  on holes or more distorted bands at first, otherwise there's
574                          *  no net gain (since the next iteration will offset all bands
575                          *  on the opposite direction to compensate for extra bits)
576                          */
577                         for (i = 0; i < edepth; ++i) {
578                             int cb, bits;
579                             float dist, qenergy;
580                             int mb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]-1);
581                             cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
582                             dist = qenergy = 0.f;
583                             bits = 0;
584                             if (!cb) {
585                                 maxsf[w*16+g] = FFMIN(sce->sf_idx[w*16+g]-1, maxsf[w*16+g]);
586                             } else if (i >= depth && dists[w*16+g] < euplims[w*16+g]) {
587                                 break;
588                             }
589                             for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
590                                 int b;
591                                 float sqenergy;
592                                 dist += quantize_band_cost_cached(s, w + w2, g, coefs + w2*128,
593                                                         scaled + w2*128,
594                                                         sce->ics.swb_sizes[g],
595                                                         sce->sf_idx[w*16+g]-1,
596                                                         cb,
597                                                         1.0f,
598                                                         INFINITY,
599                                                         &b, &sqenergy,
600                                                         0);
601                                 bits += b;
602                                 qenergy += sqenergy;
603                             }
604                             sce->sf_idx[w*16+g]--;
605                             dists[w*16+g] = dist - bits;
606                             qenergies[w*16+g] = qenergy;
607                             if (mb && (sce->sf_idx[w*16+g] < (minrdsf+minrdsfboost) || (
608                                     (dists[w*16+g] < FFMIN(uplmax*uplims[w*16+g], euplims[w*16+g]))
609                                     && (fabsf(qenergies[w*16+g]-energies[w*16+g]) < euplims[w*16+g])
610                                 ) )) {
611                                 break;
612                             }
613                         }
614                     } else if (tbits > toofewbits && sce->sf_idx[w*16+g] < maxscaler
615                             && (dists[w*16+g] < FFMIN(euplims[w*16+g], uplims[w*16+g]))
616                             && (fabsf(qenergies[w*16+g]-energies[w*16+g]) < euplims[w*16+g])
617                         ) {
618                         /** Um... over target. Save bits for more important stuff. */
619                         for (i = 0; i < depth; ++i) {
620                             int cb, bits;
621                             float dist, qenergy;
622                             cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]+1);
623                             if (cb > 0) {
624                                 dist = qenergy = 0.f;
625                                 bits = 0;
626                                 for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
627                                     int b;
628                                     float sqenergy;
629                                     dist += quantize_band_cost_cached(s, w + w2, g, coefs + w2*128,
630                                                             scaled + w2*128,
631                                                             sce->ics.swb_sizes[g],
632                                                             sce->sf_idx[w*16+g]+1,
633                                                             cb,
634                                                             1.0f,
635                                                             INFINITY,
636                                                             &b, &sqenergy,
637                                                             0);
638                                     bits += b;
639                                     qenergy += sqenergy;
640                                 }
641                                 dist -= bits;
642                                 if (dist < FFMIN(euplims[w*16+g], uplims[w*16+g])) {
643                                     sce->sf_idx[w*16+g]++;
644                                     dists[w*16+g] = dist;
645                                     qenergies[w*16+g] = qenergy;
646                                 } else {
647                                     break;
648                                 }
649                             } else {
650                                 maxsf[w*16+g] = FFMIN(sce->sf_idx[w*16+g], maxsf[w*16+g]);
651                                 break;
652                             }
653                         }
654                     }
655                 }
656                 sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], minrdsf, minscaler + SCALE_MAX_DIFF);
657                 sce->sf_idx[w*16+g] = FFMIN(sce->sf_idx[w*16+g], SCALE_MAX_POS - SCALE_DIV_512);
658                 if (sce->sf_idx[w*16+g] != prevsc)
659                     fflag = 1;
660                 nminscaler = FFMIN(nminscaler, sce->sf_idx[w*16+g]);
661                 sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
662                 start += sce->ics.swb_sizes[g];
663             }
664         }
665         if (nminscaler < minscaler || sce->ics.num_windows > 1) {
666             /** SF difference limit violation risk. Must re-clamp. */
667             minscaler = nminscaler;
668             for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
669                 for (g = 0; g < sce->ics.num_swb; g++) {
670                     sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], minscaler, minscaler + SCALE_MAX_DIFF);
671                     sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
672                 }
673             }
674         }
675         its++;
676     } while (fflag && its < maxits);
677
678     prev = -1;
679     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
680         /** Make sure proper codebooks are set */
681         for (g = start = 0; g < sce->ics.num_swb; start += sce->ics.swb_sizes[g++]) {
682             if (!sce->zeroes[w*16+g]) {
683                 sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
684                 if (sce->band_type[w*16+g] <= 0) {
685                     sce->zeroes[w*16+g] = 1;
686                     sce->band_type[w*16+g] = 0;
687                 }
688             } else {
689                 sce->band_type[w*16+g] = 0;
690             }
691             /** Check that there's no SF delta range violations */
692             if (!sce->zeroes[w*16+g]) {
693                 if (prev != -1) {
694                     av_unused int sfdiff = sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO;
695                     av_assert1(sfdiff >= 0 && sfdiff <= 2*SCALE_MAX_DIFF);
696                 } else if (sce->zeroes[0]) {
697                     /** Set global gain to something useful */
698                     sce->sf_idx[0] = sce->sf_idx[w*16+g];
699                 }
700                 prev = sce->sf_idx[w*16+g];
701             }
702         }
703     }
704 }
705
706 #endif /* AVCODEC_AACCODER_TWOLOOP_H */