git.sesse.net Git - ffmpeg/blob - libavcodec/aaccoder_twoloop.h

   1 /*
   2  * AAC encoder twoloop coder
   3  * Copyright (C) 2008-2009 Konstantin Shishkov
   4  *
   5  * This file is part of FFmpeg.
   6  *
   7  * FFmpeg is free software; you can redistribute it and/or
   8  * modify it under the terms of the GNU Lesser General Public
   9  * License as published by the Free Software Foundation; either
  10  * version 2.1 of the License, or (at your option) any later version.
  11  *
  12  * FFmpeg is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15  * Lesser General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU Lesser General Public
  18  * License along with FFmpeg; if not, write to the Free Software
  19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20  */
  21
  22 /**
  23  * @file
  24  * AAC encoder twoloop coder
  25  * @author Konstantin Shishkov, Claudio Freire
  26  */
  27
  28 /**
  29  * This file contains a template for the twoloop coder function.
  30  * It needs to be provided, externally, as an already included declaration,
  31  * the following functions from aacenc_quantization/util.h. They're not included
  32  * explicitly here to make it possible to provide alternative implementations:
  33  *  - quantize_band_cost
  34  *  - abs_pow34_v
  35  *  - find_max_val
  36  *  - find_min_book
  37  *  - find_form_factor
  38  */
  39
  40 #ifndef AVCODEC_AACCODER_TWOLOOP_H
  41 #define AVCODEC_AACCODER_TWOLOOP_H
  42
  43 #include <float.h>
  44 #include "libavutil/mathematics.h"
  45 #include "mathops.h"
  46 #include "avcodec.h"
  47 #include "put_bits.h"
  48 #include "aac.h"
  49 #include "aacenc.h"
  50 #include "aactab.h"
  51 #include "aacenctab.h"
  52
  53 /** Frequency in Hz for lower limit of noise substitution **/
  54 #define NOISE_LOW_LIMIT 4000
  55
  56 #define sclip(x) av_clip(x,60,218)
  57
  58 /* Reflects the cost to change codebooks */
  59 static inline int ff_pns_bits(SingleChannelElement *sce, int w, int g)
  60 {
  61     return (!g || !sce->zeroes[w*16+g-1] || !sce->can_pns[w*16+g-1]) ? 9 : 5;
  62 }
  63
  64 /**
  65  * two-loop quantizers search taken from ISO 13818-7 Appendix C
  66  */
  67 static void search_for_quantizers_twoloop(AVCodecContext *avctx,
  68                                           AACEncContext *s,
  69                                           SingleChannelElement *sce,
  70                                           const float lambda)
  71 {
  72     int start = 0, i, w, w2, g, recomprd;
  73     int destbits = avctx->bit_rate * 1024.0 / avctx->sample_rate
  74         / ((avctx->flags & CODEC_FLAG_QSCALE) ? 2.0f : avctx->channels)
  75         * (lambda / 120.f);
  76     int refbits = destbits;
  77     int toomanybits, toofewbits;
  78     char nzs[128];
  79     uint8_t nextband[128];
  80     int maxsf[128], minsf[128];
  81     float dists[128] = { 0 }, qenergies[128] = { 0 }, uplims[128], euplims[128], energies[128];
  82     float maxvals[128], spread_thr_r[128];
  83     float min_spread_thr_r, max_spread_thr_r;
  84
  85     /**
  86      * rdlambda controls the maximum tolerated distortion. Twoloop
  87      * will keep iterating until it fails to lower it or it reaches
  88      * ulimit * rdlambda. Keeping it low increases quality on difficult
  89      * signals, but lower it too much, and bits will be taken from weak
  90      * signals, creating "holes". A balance is necesary.
  91      * rdmax and rdmin specify the relative deviation from rdlambda
  92      * allowed for tonality compensation
  93      */
  94     float rdlambda = av_clipf(2.0f * 120.f / lambda, 0.0625f, 16.0f);
  95     const float nzslope = 1.5f;
  96     float rdmin = 0.03125f;
  97     float rdmax = 1.0f;
  98
  99     /**
 100      * sfoffs controls an offset of optmium allocation that will be
 101      * applied based on lambda. Keep it real and modest, the loop
 102      * will take care of the rest, this just accelerates convergence
 103      */
 104     float sfoffs = av_clipf(log2f(120.0f / lambda) * 4.0f, -5, 10);
 105
 106     int fflag, minscaler, maxscaler, nminscaler;
 107     int its  = 0;
 108     int maxits = 30;
 109     int allz = 0;
 110     int tbits;
 111     int cutoff = 1024;
 112     int pns_start_pos;
 113     int prev;
 114
 115     /**
 116      * zeroscale controls a multiplier of the threshold, if band energy
 117      * is below this, a zero is forced. Keep it lower than 1, unless
 118      * low lambda is used, because energy < threshold doesn't mean there's
 119      * no audible signal outright, it's just energy. Also make it rise
 120      * slower than rdlambda, as rdscale has due compensation with
 121      * noisy band depriorization below, whereas zeroing logic is rather dumb
 122      */
 123     float zeroscale;
 124     if (lambda > 120.f) {
 125         zeroscale = av_clipf(powf(120.f / lambda, 0.25f), 0.0625f, 1.0f);
 126     } else {
 127         zeroscale = 1.f;
 128     }
 129
 130     if (s->psy.bitres.alloc >= 0) {
 131         /**
 132          * Psy granted us extra bits to use, from the reservoire
 133          * adjust for lambda except what psy already did
 134          */
 135         destbits = s->psy.bitres.alloc
 136             * (lambda / (avctx->global_quality ? avctx->global_quality : 120));
 137     }
 138
 139     if (avctx->flags & CODEC_FLAG_QSCALE) {
 140         /**
 141          * Constant Q-scale doesn't compensate MS coding on its own
 142          * No need to be overly precise, this only controls RD
 143          * adjustment CB limits when going overboard
 144          */
 145         if (s->options.mid_side && s->cur_type == TYPE_CPE)
 146             destbits *= 2;
 147
 148         /**
 149          * When using a constant Q-scale, don't adjust bits, just use RD
 150          * Don't let it go overboard, though... 8x psy target is enough
 151          */
 152         toomanybits = 5800;
 153         toofewbits = destbits / 16;
 154
 155         /** Don't offset scalers, just RD */
 156         sfoffs = sce->ics.num_windows - 1;
 157         rdlambda = sqrtf(rdlambda);
 158
 159         /** search further */
 160         maxits *= 2;
 161     } else {
 162         /* When using ABR, be strict, but a reasonable leeway is
 163          * critical to allow RC to smoothly track desired bitrate
 164          * without sudden quality drops that cause audible artifacts.
 165          * Symmetry is also desirable, to avoid systematic bias.
 166          */
 167         toomanybits = destbits + destbits/8;
 168         toofewbits = destbits - destbits/8;
 169
 170         sfoffs = 0;
 171         rdlambda = sqrtf(rdlambda);
 172     }
 173
 174     /** and zero out above cutoff frequency */
 175     {
 176         int wlen = 1024 / sce->ics.num_windows;
 177         int bandwidth;
 178
 179         /**
 180          * Scale, psy gives us constant quality, this LP only scales
 181          * bitrate by lambda, so we save bits on subjectively unimportant HF
 182          * rather than increase quantization noise. Adjust nominal bitrate
 183          * to effective bitrate according to encoding parameters,
 184          * AAC_CUTOFF_FROM_BITRATE is calibrated for effective bitrate.
 185          */
 186         float rate_bandwidth_multiplier = 1.5f;
 187         int frame_bit_rate = (avctx->flags & CODEC_FLAG_QSCALE)
 188             ? (refbits * rate_bandwidth_multiplier * avctx->sample_rate / 1024)
 189             : (avctx->bit_rate / avctx->channels);
 190
 191         /** Compensate for extensions that increase efficiency */
 192         if (s->options.pns || s->options.intensity_stereo)
 193             frame_bit_rate *= 1.15f;
 194
 195         if (avctx->cutoff > 0) {
 196             bandwidth = avctx->cutoff;
 197         } else {
 198             bandwidth = FFMAX(3000, AAC_CUTOFF_FROM_BITRATE(frame_bit_rate, 1, avctx->sample_rate));
 199             s->psy.cutoff = bandwidth;
 200         }
 201
 202         cutoff = bandwidth * 2 * wlen / avctx->sample_rate;
 203         pns_start_pos = NOISE_LOW_LIMIT * 2 * wlen / avctx->sample_rate;
 204     }
 205
 206     /**
 207      * for values above this the decoder might end up in an endless loop
 208      * due to always having more bits than what can be encoded.
 209      */
 210     destbits = FFMIN(destbits, 5800);
 211     toomanybits = FFMIN(toomanybits, 5800);
 212     toofewbits = FFMIN(toofewbits, 5800);
 213     /**
 214      * XXX: some heuristic to determine initial quantizers will reduce search time
 215      * determine zero bands and upper distortion limits
 216      */
 217     min_spread_thr_r = -1;
 218     max_spread_thr_r = -1;
 219     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
 220         for (g = start = 0;  g < sce->ics.num_swb; start += sce->ics.swb_sizes[g++]) {
 221             int nz = 0;
 222             float uplim = 0.0f, energy = 0.0f, spread = 0.0f;
 223             for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
 224                 FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
 225                 if (start >= cutoff || band->energy <= (band->threshold * zeroscale) || band->threshold == 0.0f) {
 226                     sce->zeroes[(w+w2)*16+g] = 1;
 227                     continue;
 228                 }
 229                 nz = 1;
 230             }
 231             if (!nz) {
 232                 uplim = 0.0f;
 233             } else {
 234                 nz = 0;
 235                 for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
 236                     FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
 237                     if (band->energy <= (band->threshold * zeroscale) || band->threshold == 0.0f)
 238                         continue;
 239                     uplim += band->threshold;
 240                     energy += band->energy;
 241                     spread += band->spread;
 242                     nz++;
 243                 }
 244             }
 245             uplims[w*16+g] = uplim;
 246             energies[w*16+g] = energy;
 247             nzs[w*16+g] = nz;
 248             sce->zeroes[w*16+g] = !nz;
 249             allz |= nz;
 250             if (nz && sce->can_pns[w*16+g]) {
 251                 spread_thr_r[w*16+g] = energy * nz / (uplim * spread);
 252                 if (min_spread_thr_r < 0) {
 253                     min_spread_thr_r = max_spread_thr_r = spread_thr_r[w*16+g];
 254                 } else {
 255                     min_spread_thr_r = FFMIN(min_spread_thr_r, spread_thr_r[w*16+g]);
 256                     max_spread_thr_r = FFMAX(max_spread_thr_r, spread_thr_r[w*16+g]);
 257                 }
 258             }
 259         }
 260     }
 261
 262     /** Compute initial scalers */
 263     minscaler = 65535;
 264     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
 265         for (g = 0;  g < sce->ics.num_swb; g++) {
 266             if (sce->zeroes[w*16+g]) {
 267                 sce->sf_idx[w*16+g] = SCALE_ONE_POS;
 268                 continue;
 269             }
 270             /**
 271              * log2f-to-distortion ratio is, technically, 2 (1.5db = 4, but it's power vs level so it's 2).
 272              * But, as offsets are applied, low-frequency signals are too sensitive to the induced distortion,
 273              * so we make scaling more conservative by choosing a lower log2f-to-distortion ratio, and thus
 274              * more robust.
 275              */
 276             sce->sf_idx[w*16+g] = av_clip(
 277                 SCALE_ONE_POS
 278                     + 1.75*log2f(FFMAX(0.00125f,uplims[w*16+g]) / sce->ics.swb_sizes[g])
 279                     + sfoffs,
 280                 60, SCALE_MAX_POS);
 281             minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]);
 282         }
 283     }
 284
 285     /** Clip */
 286     minscaler = av_clip(minscaler, SCALE_ONE_POS - SCALE_DIV_512, SCALE_MAX_POS - SCALE_DIV_512);
 287     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w])
 288         for (g = 0;  g < sce->ics.num_swb; g++)
 289             if (!sce->zeroes[w*16+g])
 290                 sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], minscaler, minscaler + SCALE_MAX_DIFF - 1);
 291
 292     if (!allz)
 293         return;
 294     abs_pow34_v(s->scoefs, sce->coeffs, 1024);
 295     ff_quantize_band_cost_cache_init(s);
 296
 297     for (i = 0; i < sizeof(minsf) / sizeof(minsf[0]); ++i)
 298         minsf[i] = 0;
 299     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
 300         start = w*128;
 301         for (g = 0;  g < sce->ics.num_swb; g++) {
 302             const float *scaled = s->scoefs + start;
 303             int minsfidx;
 304             maxvals[w*16+g] = find_max_val(sce->ics.group_len[w], sce->ics.swb_sizes[g], scaled);
 305             if (maxvals[w*16+g] > 0) {
 306                 minsfidx = coef2minsf(maxvals[w*16+g]);
 307                 for (w2 = 0; w2 < sce->ics.group_len[w]; w2++)
 308                     minsf[(w+w2)*16+g] = minsfidx;
 309             }
 310             start += sce->ics.swb_sizes[g];
 311         }
 312     }
 313
 314     /**
 315      * Scale uplims to match rate distortion to quality
 316      * bu applying noisy band depriorization and tonal band priorization.
 317      * Maxval-energy ratio gives us an idea of how noisy/tonal the band is.
 318      * If maxval^2 ~ energy, then that band is mostly noise, and we can relax
 319      * rate distortion requirements.
 320      */
 321     memcpy(euplims, uplims, sizeof(euplims));
 322     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
 323         /** psy already priorizes transients to some extent */
 324         float de_psy_factor = (sce->ics.num_windows > 1) ? 8.0f / sce->ics.group_len[w] : 1.0f;
 325         start = w*128;
 326         for (g = 0;  g < sce->ics.num_swb; g++) {
 327             if (nzs[g] > 0) {
 328                 float cleanup_factor = ff_sqrf(av_clipf(start / (cutoff * 0.75f), 1.0f, 2.0f));
 329                 float energy2uplim = find_form_factor(
 330                     sce->ics.group_len[w], sce->ics.swb_sizes[g],
 331                     uplims[w*16+g] / (nzs[g] * sce->ics.swb_sizes[w]),
 332                     sce->coeffs + start,
 333                     nzslope * cleanup_factor);
 334                 energy2uplim *= de_psy_factor;
 335                 if (!(avctx->flags & CODEC_FLAG_QSCALE)) {
 336                     /** In ABR, we need to priorize less and let rate control do its thing */
 337                     energy2uplim = sqrtf(energy2uplim);
 338                 }
 339                 energy2uplim = FFMAX(0.015625f, FFMIN(1.0f, energy2uplim));
 340                 uplims[w*16+g] *= av_clipf(rdlambda * energy2uplim, rdmin, rdmax)
 341                                   * sce->ics.group_len[w];
 342
 343                 energy2uplim = find_form_factor(
 344                     sce->ics.group_len[w], sce->ics.swb_sizes[g],
 345                     uplims[w*16+g] / (nzs[g] * sce->ics.swb_sizes[w]),
 346                     sce->coeffs + start,
 347                     2.0f);
 348                 energy2uplim *= de_psy_factor;
 349                 if (!(avctx->flags & CODEC_FLAG_QSCALE)) {
 350                     /** In ABR, we need to priorize less and let rate control do its thing */
 351                     energy2uplim = sqrtf(energy2uplim);
 352                 }
 353                 energy2uplim = FFMAX(0.015625f, FFMIN(1.0f, energy2uplim));
 354                 euplims[w*16+g] *= av_clipf(rdlambda * energy2uplim * sce->ics.group_len[w],
 355                     0.5f, 1.0f);
 356             }
 357             start += sce->ics.swb_sizes[g];
 358         }
 359     }
 360
 361     for (i = 0; i < sizeof(maxsf) / sizeof(maxsf[0]); ++i)
 362         maxsf[i] = SCALE_MAX_POS;
 363
 364     //perform two-loop search
 365     //outer loop - improve quality
 366     do {
 367         //inner loop - quantize spectrum to fit into given number of bits
 368         int overdist;
 369         int qstep = its ? 1 : 32;
 370         do {
 371             int changed = 0;
 372             prev = -1;
 373             recomprd = 0;
 374             tbits = 0;
 375             for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
 376                 start = w*128;
 377                 for (g = 0;  g < sce->ics.num_swb; g++) {
 378                     const float *coefs = &sce->coeffs[start];
 379                     const float *scaled = &s->scoefs[start];
 380                     int bits = 0;
 381                     int cb;
 382                     float dist = 0.0f;
 383                     float qenergy = 0.0f;
 384
 385                     if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >= 218) {
 386                         start += sce->ics.swb_sizes[g];
 387                         if (sce->can_pns[w*16+g]) {
 388                             /** PNS isn't free */
 389                             tbits += ff_pns_bits(sce, w, g);
 390                         }
 391                         continue;
 392                     }
 393                     cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
 394                     for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
 395                         int b;
 396                         float sqenergy;
 397                         dist += quantize_band_cost_cached(s, w + w2, g, coefs + w2*128,
 398                                                    scaled + w2*128,
 399                                                    sce->ics.swb_sizes[g],
 400                                                    sce->sf_idx[w*16+g],
 401                                                    cb,
 402                                                    1.0f,
 403                                                    INFINITY,
 404                                                    &b, &sqenergy,
 405                                                    0);
 406                         bits += b;
 407                         qenergy += sqenergy;
 408                     }
 409                     dists[w*16+g] = dist - bits;
 410                     qenergies[w*16+g] = qenergy;
 411                     if (prev != -1) {
 412                         int sfdiff = av_clip(sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO, 0, 2*SCALE_MAX_DIFF);
 413                         bits += ff_aac_scalefactor_bits[sfdiff];
 414                     }
 415                     tbits += bits;
 416                     start += sce->ics.swb_sizes[g];
 417                     prev = sce->sf_idx[w*16+g];
 418                 }
 419             }
 420             if (tbits > toomanybits) {
 421                 recomprd = 1;
 422                 for (i = 0; i < 128; i++) {
 423                     if (sce->sf_idx[i] < (SCALE_MAX_POS - SCALE_DIV_512)) {
 424                         int maxsf_i = (tbits > 5800) ? SCALE_MAX_POS : maxsf[i];
 425                         int new_sf = FFMIN(maxsf_i, sce->sf_idx[i] + qstep);
 426                         if (new_sf != sce->sf_idx[i]) {
 427                             sce->sf_idx[i] = new_sf;
 428                             changed = 1;
 429                         }
 430                     }
 431                 }
 432             } else if (tbits < toofewbits) {
 433                 recomprd = 1;
 434                 for (i = 0; i < 128; i++) {
 435                     if (sce->sf_idx[i] > SCALE_ONE_POS) {
 436                         int new_sf = FFMAX3(minsf[i], SCALE_ONE_POS, sce->sf_idx[i] - qstep);
 437                         if (new_sf != sce->sf_idx[i]) {
 438                             sce->sf_idx[i] = new_sf;
 439                             changed = 1;
 440                         }
 441                     }
 442                 }
 443             }
 444             qstep >>= 1;
 445             if (!qstep && tbits > toomanybits && sce->sf_idx[0] < 217 && changed)
 446                 qstep = 1;
 447         } while (qstep);
 448
 449         overdist = 1;
 450         fflag = tbits < toofewbits;
 451         for (i = 0; i < 2 && (overdist || recomprd); ++i) {
 452             if (recomprd) {
 453                 /** Must recompute distortion */
 454                 prev = -1;
 455                 tbits = 0;
 456                 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
 457                     start = w*128;
 458                     for (g = 0;  g < sce->ics.num_swb; g++) {
 459                         const float *coefs = sce->coeffs + start;
 460                         const float *scaled = s->scoefs + start;
 461                         int bits = 0;
 462                         int cb;
 463                         float dist = 0.0f;
 464                         float qenergy = 0.0f;
 465
 466                         if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >= 218) {
 467                             start += sce->ics.swb_sizes[g];
 468                             if (sce->can_pns[w*16+g]) {
 469                                 /** PNS isn't free */
 470                                 tbits += ff_pns_bits(sce, w, g);
 471                             }
 472                             continue;
 473                         }
 474                         cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
 475                         for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
 476                             int b;
 477                             float sqenergy;
 478                             dist += quantize_band_cost_cached(s, w + w2, g, coefs + w2*128,
 479                                                     scaled + w2*128,
 480                                                     sce->ics.swb_sizes[g],
 481                                                     sce->sf_idx[w*16+g],
 482                                                     cb,
 483                                                     1.0f,
 484                                                     INFINITY,
 485                                                     &b, &sqenergy,
 486                                                     0);
 487                             bits += b;
 488                             qenergy += sqenergy;
 489                         }
 490                         dists[w*16+g] = dist - bits;
 491                         qenergies[w*16+g] = qenergy;
 492                         if (prev != -1) {
 493                             int sfdiff = av_clip(sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO, 0, 2*SCALE_MAX_DIFF);
 494                             bits += ff_aac_scalefactor_bits[sfdiff];
 495                         }
 496                         tbits += bits;
 497                         start += sce->ics.swb_sizes[g];
 498                         prev = sce->sf_idx[w*16+g];
 499                     }
 500                 }
 501             }
 502             if (!i && s->options.pns && its > maxits/2 && tbits > toofewbits) {
 503                 float maxoverdist = 0.0f;
 504                 float ovrfactor = 1.f+(maxits-its)*16.f/maxits;
 505                 overdist = recomprd = 0;
 506                 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
 507                     for (g = start = 0;  g < sce->ics.num_swb; start += sce->ics.swb_sizes[g++]) {
 508                         if (!sce->zeroes[w*16+g] && sce->sf_idx[w*16+g] > SCALE_ONE_POS && dists[w*16+g] > uplims[w*16+g]*ovrfactor) {
 509                             float ovrdist = dists[w*16+g] / FFMAX(uplims[w*16+g],euplims[w*16+g]);
 510                             maxoverdist = FFMAX(maxoverdist, ovrdist);
 511                             overdist++;
 512                         }
 513                     }
 514                 }
 515                 if (overdist) {
 516                     /* We have overdistorted bands, trade for zeroes (that can be noise)
 517                      * Zero the bands in the lowest 1.25% spread-energy-threshold ranking
 518                      */
 519                     float minspread = max_spread_thr_r;
 520                     float maxspread = min_spread_thr_r;
 521                     float zspread;
 522                     int zeroable = 0;
 523                     int zeroed = 0;
 524                     int maxzeroed, zloop;
 525                     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
 526                         for (g = start = 0;  g < sce->ics.num_swb; start += sce->ics.swb_sizes[g++]) {
 527                             if (start >= pns_start_pos && !sce->zeroes[w*16+g] && sce->can_pns[w*16+g]) {
 528                                 minspread = FFMIN(minspread, spread_thr_r[w*16+g]);
 529                                 maxspread = FFMAX(maxspread, spread_thr_r[w*16+g]);
 530                                 zeroable++;
 531                             }
 532                         }
 533                     }
 534                     zspread = (maxspread-minspread) * 0.0125f + minspread;
 535                     /* Don't PNS everything even if allowed. It suppresses bit starvation signals from RC,
 536                      * and forced the hand of the later search_for_pns step.
 537                      * Instead, PNS a fraction of the spread_thr_r range depending on how starved for bits we are,
 538                      * and leave further PNSing to search_for_pns if worthwhile.
 539                      */
 540                     zspread = FFMIN3(min_spread_thr_r * 8.f, zspread,
 541                         ((toomanybits - tbits) * min_spread_thr_r + (tbits - toofewbits) * max_spread_thr_r) / (toomanybits - toofewbits + 1));
 542                     maxzeroed = FFMIN(zeroable, FFMAX(1, (zeroable * its + maxits - 1) / (2 * maxits)));
 543                     for (zloop = 0; zloop < 2; zloop++) {
 544                         /* Two passes: first distorted stuff - two birds in one shot and all that,
 545                          * then anything viable. Viable means not zero, but either CB=zero-able
 546                          * (too high SF), not SF <= 1 (that means we'd be operating at very high
 547                          * quality, we don't want PNS when doing VHQ), PNS allowed, and within
 548                          * the lowest ranking percentile.
 549                          */
 550                         float loopovrfactor = (zloop) ? 1.0f : ovrfactor;
 551                         int loopminsf = (zloop) ? (SCALE_ONE_POS - SCALE_DIV_512) : SCALE_ONE_POS;
 552                         int mcb;
 553                         for (g = sce->ics.num_swb-1; g > 0 && zeroed < maxzeroed; g--) {
 554                             if (sce->ics.swb_offset[g] < pns_start_pos)
 555                                 continue;
 556                             for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
 557                                 if (!sce->zeroes[w*16+g] && sce->can_pns[w*16+g] && spread_thr_r[w*16+g] <= zspread
 558                                     && sce->sf_idx[w*16+g] > loopminsf
 559                                     && (dists[w*16+g] > loopovrfactor*uplims[w*16+g] || !(mcb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]))
 560                                         || (mcb <= 1 && dists[w*16+g] > FFMIN(uplims[w*16+g], euplims[w*16+g]))) ) {
 561                                     sce->zeroes[w*16+g] = 1;
 562                                     sce->band_type[w*16+g] = 0;
 563                                     zeroed++;
 564                                 }
 565                             }
 566                         }
 567                     }
 568                     if (zeroed)
 569                         recomprd = fflag = 1;
 570                 } else {
 571                     overdist = 0;
 572                 }
 573             }
 574         }
 575
 576         minscaler = SCALE_MAX_POS;
 577         maxscaler = 0;
 578         for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
 579             for (g = 0;  g < sce->ics.num_swb; g++) {
 580                 if (!sce->zeroes[w*16+g]) {
 581                     minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]);
 582                     maxscaler = FFMAX(maxscaler, sce->sf_idx[w*16+g]);
 583                 }
 584             }
 585         }
 586
 587         minscaler = nminscaler = av_clip(minscaler, SCALE_ONE_POS - SCALE_DIV_512, SCALE_MAX_POS - SCALE_DIV_512);
 588         prev = -1;
 589         for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
 590             /** Start with big steps, end up fine-tunning */
 591             int depth = (its > maxits/2) ? ((its > maxits*2/3) ? 1 : 3) : 10;
 592             int edepth = depth+2;
 593             float uplmax = its / (maxits*0.25f) + 1.0f;
 594             uplmax *= (tbits > destbits) ? FFMIN(2.0f, tbits / (float)FFMAX(1,destbits)) : 1.0f;
 595             start = w * 128;
 596             for (g = 0; g < sce->ics.num_swb; g++) {
 597                 int prevsc = sce->sf_idx[w*16+g];
 598                 if (prev < 0 && !sce->zeroes[w*16+g])
 599                     prev = sce->sf_idx[0];
 600                 if (!sce->zeroes[w*16+g]) {
 601                     const float *coefs = sce->coeffs + start;
 602                     const float *scaled = s->scoefs + start;
 603                     int cmb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
 604                     int mindeltasf = FFMAX(0, prev - SCALE_MAX_DIFF);
 605                     int maxdeltasf = FFMIN(SCALE_MAX_POS - SCALE_DIV_512, prev + SCALE_MAX_DIFF);
 606                     if ((!cmb || dists[w*16+g] > uplims[w*16+g]) && sce->sf_idx[w*16+g] > FFMAX(mindeltasf, minsf[w*16+g])) {
 607                         /* Try to make sure there is some energy in every nonzero band
 608                          * NOTE: This algorithm must be forcibly imbalanced, pushing harder
 609                          *  on holes or more distorted bands at first, otherwise there's
 610                          *  no net gain (since the next iteration will offset all bands
 611                          *  on the opposite direction to compensate for extra bits)
 612                          */
 613                         for (i = 0; i < edepth && sce->sf_idx[w*16+g] > mindeltasf; ++i) {
 614                             int cb, bits;
 615                             float dist, qenergy;
 616                             int mb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]-1);
 617                             cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
 618                             dist = qenergy = 0.f;
 619                             bits = 0;
 620                             if (!cb) {
 621                                 maxsf[w*16+g] = FFMIN(sce->sf_idx[w*16+g]-1, maxsf[w*16+g]);
 622                             } else if (i >= depth && dists[w*16+g] < euplims[w*16+g]) {
 623                                 break;
 624                             }
 625                             /* !g is the DC band, it's important, since quantization error here
 626                              * applies to less than a cycle, it creates horrible intermodulation
 627                              * distortion if it doesn't stick to what psy requests
 628                              */
 629                             if (!g && sce->ics.num_windows > 1 && dists[w*16+g] >= euplims[w*16+g])
 630                                 maxsf[w*16+g] = FFMIN(sce->sf_idx[w*16+g], maxsf[w*16+g]);
 631                             for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
 632                                 int b;
 633                                 float sqenergy;
 634                                 dist += quantize_band_cost_cached(s, w + w2, g, coefs + w2*128,
 635                                                         scaled + w2*128,
 636                                                         sce->ics.swb_sizes[g],
 637                                                         sce->sf_idx[w*16+g]-1,
 638                                                         cb,
 639                                                         1.0f,
 640                                                         INFINITY,
 641                                                         &b, &sqenergy,
 642                                                         0);
 643                                 bits += b;
 644                                 qenergy += sqenergy;
 645                             }
 646                             sce->sf_idx[w*16+g]--;
 647                             dists[w*16+g] = dist - bits;
 648                             qenergies[w*16+g] = qenergy;
 649                             if (mb && (sce->sf_idx[w*16+g] < mindeltasf || (
 650                                     (dists[w*16+g] < FFMIN(uplmax*uplims[w*16+g], euplims[w*16+g]))
 651                                     && (fabsf(qenergies[w*16+g]-energies[w*16+g]) < euplims[w*16+g])
 652                                 ) )) {
 653                                 break;
 654                             }
 655                         }
 656                     } else if (tbits > toofewbits && sce->sf_idx[w*16+g] < FFMIN(maxdeltasf, maxsf[w*16+g])
 657                             && (dists[w*16+g] < FFMIN(euplims[w*16+g], uplims[w*16+g]))
 658                             && (fabsf(qenergies[w*16+g]-energies[w*16+g]) < euplims[w*16+g])
 659                         ) {
 660                         /** Um... over target. Save bits for more important stuff. */
 661                         for (i = 0; i < depth && sce->sf_idx[w*16+g] < maxdeltasf; ++i) {
 662                             int cb, bits;
 663                             float dist, qenergy;
 664                             cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]+1);
 665                             if (cb > 0) {
 666                                 dist = qenergy = 0.f;
 667                                 bits = 0;
 668                                 for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
 669                                     int b;
 670                                     float sqenergy;
 671                                     dist += quantize_band_cost_cached(s, w + w2, g, coefs + w2*128,
 672                                                             scaled + w2*128,
 673                                                             sce->ics.swb_sizes[g],
 674                                                             sce->sf_idx[w*16+g]+1,
 675                                                             cb,
 676                                                             1.0f,
 677                                                             INFINITY,
 678                                                             &b, &sqenergy,
 679                                                             0);
 680                                     bits += b;
 681                                     qenergy += sqenergy;
 682                                 }
 683                                 dist -= bits;
 684                                 if (dist < FFMIN(euplims[w*16+g], uplims[w*16+g])) {
 685                                     sce->sf_idx[w*16+g]++;
 686                                     dists[w*16+g] = dist;
 687                                     qenergies[w*16+g] = qenergy;
 688                                 } else {
 689                                     break;
 690                                 }
 691                             } else {
 692                                 maxsf[w*16+g] = FFMIN(sce->sf_idx[w*16+g], maxsf[w*16+g]);
 693                                 break;
 694                             }
 695                         }
 696                     }
 697                     prev = sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], mindeltasf, maxdeltasf);
 698                     if (sce->sf_idx[w*16+g] != prevsc)
 699                         fflag = 1;
 700                     nminscaler = FFMIN(nminscaler, sce->sf_idx[w*16+g]);
 701                     sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
 702                 }
 703                 start += sce->ics.swb_sizes[g];
 704             }
 705         }
 706
 707         /** SF difference limit violation risk. Must re-clamp. */
 708         prev = -1;
 709         for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
 710             for (g = 0; g < sce->ics.num_swb; g++) {
 711                 if (!sce->zeroes[w*16+g]) {
 712                     int prevsf = sce->sf_idx[w*16+g];
 713                     if (prev < 0)
 714                         prev = prevsf;
 715                     sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], prev - SCALE_MAX_DIFF, prev + SCALE_MAX_DIFF);
 716                     sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
 717                     prev = sce->sf_idx[w*16+g];
 718                     if (!fflag && prevsf != sce->sf_idx[w*16+g])
 719                         fflag = 1;
 720                 }
 721             }
 722         }
 723
 724         its++;
 725     } while (fflag && its < maxits);
 726
 727     /** Scout out next nonzero bands */
 728     ff_init_nextband_map(sce, nextband);
 729
 730     prev = -1;
 731     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
 732         /** Make sure proper codebooks are set */
 733         for (g = 0; g < sce->ics.num_swb; g++) {
 734             if (!sce->zeroes[w*16+g]) {
 735                 sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
 736                 if (sce->band_type[w*16+g] <= 0) {
 737                     if (!ff_sfdelta_can_remove_band(sce, nextband, prev, w*16+g)) {
 738                         /** Cannot zero out, make sure it's not attempted */
 739                         sce->band_type[w*16+g] = 1;
 740                     } else {
 741                         sce->zeroes[w*16+g] = 1;
 742                         sce->band_type[w*16+g] = 0;
 743                     }
 744                 }
 745             } else {
 746                 sce->band_type[w*16+g] = 0;
 747             }
 748             /** Check that there's no SF delta range violations */
 749             if (!sce->zeroes[w*16+g]) {
 750                 if (prev != -1) {
 751                     av_unused int sfdiff = sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO;
 752                     av_assert1(sfdiff >= 0 && sfdiff <= 2*SCALE_MAX_DIFF);
 753                 } else if (sce->zeroes[0]) {
 754                     /** Set global gain to something useful */
 755                     sce->sf_idx[0] = sce->sf_idx[w*16+g];
 756                 }
 757                 prev = sce->sf_idx[w*16+g];
 758             }
 759         }
 760     }
 761 }
 762
 763 #endif /* AVCODEC_AACCODER_TWOLOOP_H */