git.sesse.net Git - ffmpeg/blob - libavcodec/aaccoder_twoloop.h

   1 /*
   2  * AAC encoder twoloop coder
   3  * Copyright (C) 2008-2009 Konstantin Shishkov
   4  *
   5  * This file is part of FFmpeg.
   6  *
   7  * FFmpeg is free software; you can redistribute it and/or
   8  * modify it under the terms of the GNU Lesser General Public
   9  * License as published by the Free Software Foundation; either
  10  * version 2.1 of the License, or (at your option) any later version.
  11  *
  12  * FFmpeg is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15  * Lesser General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU Lesser General Public
  18  * License along with FFmpeg; if not, write to the Free Software
  19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20  */
  21
  22 /**
  23  * @file
  24  * AAC encoder twoloop coder
  25  * @author Konstantin Shishkov, Claudio Freire
  26  */
  27
  28 /**
  29  * This file contains a template for the twoloop coder function.
  30  * It needs to be provided, externally, as an already included declaration,
  31  * the following functions from aacenc_quantization/util.h. They're not included
  32  * explicitly here to make it possible to provide alternative implementations:
  33  *  - quantize_band_cost
  34  *  - abs_pow34_v
  35  *  - find_max_val
  36  *  - find_min_book
  37  *  - find_form_factor
  38  */
  39
  40 #ifndef AVCODEC_AACCODER_TWOLOOP_H
  41 #define AVCODEC_AACCODER_TWOLOOP_H
  42
  43 #include <float.h>
  44 #include "libavutil/mathematics.h"
  45 #include "mathops.h"
  46 #include "avcodec.h"
  47 #include "put_bits.h"
  48 #include "aac.h"
  49 #include "aacenc.h"
  50 #include "aactab.h"
  51 #include "aacenctab.h"
  52
  53 /** Frequency in Hz for lower limit of noise substitution **/
  54 #define NOISE_LOW_LIMIT 4000
  55
  56 #define sclip(x) av_clip(x,60,218)
  57
  58 /* Reflects the cost to change codebooks */
  59 static inline int ff_pns_bits(SingleChannelElement *sce, int w, int g)
  60 {
  61     return (!g || !sce->zeroes[w*16+g-1] || !sce->can_pns[w*16+g-1]) ? 9 : 5;
  62 }
  63
  64 /**
  65  * two-loop quantizers search taken from ISO 13818-7 Appendix C
  66  */
  67 static void search_for_quantizers_twoloop(AVCodecContext *avctx,
  68                                           AACEncContext *s,
  69                                           SingleChannelElement *sce,
  70                                           const float lambda)
  71 {
  72     int start = 0, i, w, w2, g, recomprd;
  73     int destbits = avctx->bit_rate * 1024.0 / avctx->sample_rate
  74         / ((avctx->flags & CODEC_FLAG_QSCALE) ? 2.0f : avctx->channels)
  75         * (lambda / 120.f);
  76     int refbits = destbits;
  77     int toomanybits, toofewbits;
  78     char nzs[128];
  79     uint8_t nextband[128];
  80     int maxsf[128];
  81     float dists[128] = { 0 }, qenergies[128] = { 0 }, uplims[128], euplims[128], energies[128];
  82     float maxvals[128], spread_thr_r[128];
  83     float min_spread_thr_r, max_spread_thr_r;
  84
  85     /**
  86      * rdlambda controls the maximum tolerated distortion. Twoloop
  87      * will keep iterating until it fails to lower it or it reaches
  88      * ulimit * rdlambda. Keeping it low increases quality on difficult
  89      * signals, but lower it too much, and bits will be taken from weak
  90      * signals, creating "holes". A balance is necesary.
  91      * rdmax and rdmin specify the relative deviation from rdlambda
  92      * allowed for tonality compensation
  93      */
  94     float rdlambda = av_clipf(2.0f * 120.f / lambda, 0.0625f, 16.0f);
  95     const float nzslope = 1.5f;
  96     float rdmin = 0.03125f;
  97     float rdmax = 1.0f;
  98
  99     /**
 100      * sfoffs controls an offset of optmium allocation that will be
 101      * applied based on lambda. Keep it real and modest, the loop
 102      * will take care of the rest, this just accelerates convergence
 103      */
 104     float sfoffs = av_clipf(log2f(120.0f / lambda) * 4.0f, -5, 10);
 105
 106     int fflag, minscaler, maxscaler, nminscaler;
 107     int its  = 0;
 108     int maxits = 30;
 109     int allz = 0;
 110     int tbits;
 111     int cutoff = 1024;
 112     int pns_start_pos;
 113     int prev;
 114
 115     /**
 116      * zeroscale controls a multiplier of the threshold, if band energy
 117      * is below this, a zero is forced. Keep it lower than 1, unless
 118      * low lambda is used, because energy < threshold doesn't mean there's
 119      * no audible signal outright, it's just energy. Also make it rise
 120      * slower than rdlambda, as rdscale has due compensation with
 121      * noisy band depriorization below, whereas zeroing logic is rather dumb
 122      */
 123     float zeroscale;
 124     if (lambda > 120.f) {
 125         zeroscale = av_clipf(powf(120.f / lambda, 0.25f), 0.0625f, 1.0f);
 126     } else {
 127         zeroscale = 1.f;
 128     }
 129
 130     if (s->psy.bitres.alloc >= 0) {
 131         /**
 132          * Psy granted us extra bits to use, from the reservoire
 133          * adjust for lambda except what psy already did
 134          */
 135         destbits = s->psy.bitres.alloc
 136             * (lambda / (avctx->global_quality ? avctx->global_quality : 120));
 137     }
 138
 139     if (avctx->flags & CODEC_FLAG_QSCALE) {
 140         /**
 141          * Constant Q-scale doesn't compensate MS coding on its own
 142          * No need to be overly precise, this only controls RD
 143          * adjustment CB limits when going overboard
 144          */
 145         if (s->options.mid_side && s->cur_type == TYPE_CPE)
 146             destbits *= 2;
 147
 148         /**
 149          * When using a constant Q-scale, don't adjust bits, just use RD
 150          * Don't let it go overboard, though... 8x psy target is enough
 151          */
 152         toomanybits = 5800;
 153         toofewbits = destbits / 16;
 154
 155         /** Don't offset scalers, just RD */
 156         sfoffs = sce->ics.num_windows - 1;
 157         rdlambda = sqrtf(rdlambda);
 158
 159         /** search further */
 160         maxits *= 2;
 161     } else {
 162         /* When using ABR, be strict, but a reasonable leeway is
 163          * critical to allow RC to smoothly track desired bitrate
 164          * without sudden quality drops that cause audible artifacts.
 165          * Symmetry is also desirable, to avoid systematic bias.
 166          */
 167         toomanybits = destbits + destbits/8;
 168         toofewbits = destbits - destbits/8;
 169
 170         sfoffs = 0;
 171         rdlambda = sqrtf(rdlambda);
 172     }
 173
 174     /** and zero out above cutoff frequency */
 175     {
 176         int wlen = 1024 / sce->ics.num_windows;
 177         int bandwidth;
 178
 179         /**
 180          * Scale, psy gives us constant quality, this LP only scales
 181          * bitrate by lambda, so we save bits on subjectively unimportant HF
 182          * rather than increase quantization noise. Adjust nominal bitrate
 183          * to effective bitrate according to encoding parameters,
 184          * AAC_CUTOFF_FROM_BITRATE is calibrated for effective bitrate.
 185          */
 186         float rate_bandwidth_multiplier = 1.5f;
 187         int frame_bit_rate = (avctx->flags & CODEC_FLAG_QSCALE)
 188             ? (refbits * rate_bandwidth_multiplier * avctx->sample_rate / 1024)
 189             : (avctx->bit_rate / avctx->channels);
 190
 191         /** Compensate for extensions that increase efficiency */
 192         if (s->options.pns || s->options.intensity_stereo)
 193             frame_bit_rate *= 1.15f;
 194
 195         if (avctx->cutoff > 0) {
 196             bandwidth = avctx->cutoff;
 197         } else {
 198             bandwidth = FFMAX(3000, AAC_CUTOFF_FROM_BITRATE(frame_bit_rate, 1, avctx->sample_rate));
 199             s->psy.cutoff = bandwidth;
 200         }
 201
 202         cutoff = bandwidth * 2 * wlen / avctx->sample_rate;
 203         pns_start_pos = NOISE_LOW_LIMIT * 2 * wlen / avctx->sample_rate;
 204     }
 205
 206     /**
 207      * for values above this the decoder might end up in an endless loop
 208      * due to always having more bits than what can be encoded.
 209      */
 210     destbits = FFMIN(destbits, 5800);
 211     toomanybits = FFMIN(toomanybits, 5800);
 212     toofewbits = FFMIN(toofewbits, 5800);
 213     /**
 214      * XXX: some heuristic to determine initial quantizers will reduce search time
 215      * determine zero bands and upper distortion limits
 216      */
 217     min_spread_thr_r = -1;
 218     max_spread_thr_r = -1;
 219     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
 220         for (g = start = 0;  g < sce->ics.num_swb; start += sce->ics.swb_sizes[g++]) {
 221             int nz = 0;
 222             float uplim = 0.0f, energy = 0.0f, spread = 0.0f;
 223             for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
 224                 FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
 225                 if (start >= cutoff || band->energy <= (band->threshold * zeroscale) || band->threshold == 0.0f) {
 226                     sce->zeroes[(w+w2)*16+g] = 1;
 227                     continue;
 228                 }
 229                 nz = 1;
 230             }
 231             if (!nz) {
 232                 uplim = 0.0f;
 233             } else {
 234                 nz = 0;
 235                 for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
 236                     FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
 237                     if (band->energy <= (band->threshold * zeroscale) || band->threshold == 0.0f)
 238                         continue;
 239                     uplim += band->threshold;
 240                     energy += band->energy;
 241                     spread += band->spread;
 242                     nz++;
 243                 }
 244             }
 245             uplims[w*16+g] = uplim;
 246             energies[w*16+g] = energy;
 247             nzs[w*16+g] = nz;
 248             sce->zeroes[w*16+g] = !nz;
 249             allz |= nz;
 250             if (nz && sce->can_pns[w*16+g]) {
 251                 spread_thr_r[w*16+g] = energy * nz / (uplim * spread);
 252                 if (min_spread_thr_r < 0) {
 253                     min_spread_thr_r = max_spread_thr_r = spread_thr_r[w*16+g];
 254                 } else {
 255                     min_spread_thr_r = FFMIN(min_spread_thr_r, spread_thr_r[w*16+g]);
 256                     max_spread_thr_r = FFMAX(max_spread_thr_r, spread_thr_r[w*16+g]);
 257                 }
 258             }
 259         }
 260     }
 261
 262     /** Compute initial scalers */
 263     minscaler = 65535;
 264     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
 265         for (g = 0;  g < sce->ics.num_swb; g++) {
 266             if (sce->zeroes[w*16+g]) {
 267                 sce->sf_idx[w*16+g] = SCALE_ONE_POS;
 268                 continue;
 269             }
 270             /**
 271              * log2f-to-distortion ratio is, technically, 2 (1.5db = 4, but it's power vs level so it's 2).
 272              * But, as offsets are applied, low-frequency signals are too sensitive to the induced distortion,
 273              * so we make scaling more conservative by choosing a lower log2f-to-distortion ratio, and thus
 274              * more robust.
 275              */
 276             sce->sf_idx[w*16+g] = av_clip(
 277                 SCALE_ONE_POS
 278                     + 1.75*log2f(FFMAX(0.00125f,uplims[w*16+g]) / sce->ics.swb_sizes[g])
 279                     + sfoffs,
 280                 60, SCALE_MAX_POS);
 281             minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]);
 282         }
 283     }
 284
 285     /** Clip */
 286     minscaler = av_clip(minscaler, SCALE_ONE_POS - SCALE_DIV_512, SCALE_MAX_POS - SCALE_DIV_512);
 287     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w])
 288         for (g = 0;  g < sce->ics.num_swb; g++)
 289             if (!sce->zeroes[w*16+g])
 290                 sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], minscaler, minscaler + SCALE_MAX_DIFF - 1);
 291
 292     if (!allz)
 293         return;
 294     abs_pow34_v(s->scoefs, sce->coeffs, 1024);
 295     ff_quantize_band_cost_cache_init(s);
 296
 297     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
 298         start = w*128;
 299         for (g = 0;  g < sce->ics.num_swb; g++) {
 300             const float *scaled = s->scoefs + start;
 301             maxvals[w*16+g] = find_max_val(sce->ics.group_len[w], sce->ics.swb_sizes[g], scaled);
 302             start += sce->ics.swb_sizes[g];
 303         }
 304     }
 305
 306     /**
 307      * Scale uplims to match rate distortion to quality
 308      * bu applying noisy band depriorization and tonal band priorization.
 309      * Maxval-energy ratio gives us an idea of how noisy/tonal the band is.
 310      * If maxval^2 ~ energy, then that band is mostly noise, and we can relax
 311      * rate distortion requirements.
 312      */
 313     memcpy(euplims, uplims, sizeof(euplims));
 314     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
 315         /** psy already priorizes transients to some extent */
 316         float de_psy_factor = (sce->ics.num_windows > 1) ? 8.0f / sce->ics.group_len[w] : 1.0f;
 317         start = w*128;
 318         for (g = 0;  g < sce->ics.num_swb; g++) {
 319             if (nzs[g] > 0) {
 320                 float cleanup_factor = ff_sqrf(av_clipf(start / (cutoff * 0.75f), 1.0f, 2.0f));
 321                 float energy2uplim = find_form_factor(
 322                     sce->ics.group_len[w], sce->ics.swb_sizes[g],
 323                     uplims[w*16+g] / (nzs[g] * sce->ics.swb_sizes[w]),
 324                     sce->coeffs + start,
 325                     nzslope * cleanup_factor);
 326                 energy2uplim *= de_psy_factor;
 327                 if (!(avctx->flags & CODEC_FLAG_QSCALE)) {
 328                     /** In ABR, we need to priorize less and let rate control do its thing */
 329                     energy2uplim = sqrtf(energy2uplim);
 330                 }
 331                 energy2uplim = FFMAX(0.015625f, FFMIN(1.0f, energy2uplim));
 332                 uplims[w*16+g] *= av_clipf(rdlambda * energy2uplim, rdmin, rdmax)
 333                                   * sce->ics.group_len[w];
 334
 335                 energy2uplim = find_form_factor(
 336                     sce->ics.group_len[w], sce->ics.swb_sizes[g],
 337                     uplims[w*16+g] / (nzs[g] * sce->ics.swb_sizes[w]),
 338                     sce->coeffs + start,
 339                     2.0f);
 340                 energy2uplim *= de_psy_factor;
 341                 if (!(avctx->flags & CODEC_FLAG_QSCALE)) {
 342                     /** In ABR, we need to priorize less and let rate control do its thing */
 343                     energy2uplim = sqrtf(energy2uplim);
 344                 }
 345                 energy2uplim = FFMAX(0.015625f, FFMIN(1.0f, energy2uplim));
 346                 euplims[w*16+g] *= av_clipf(rdlambda * energy2uplim * sce->ics.group_len[w],
 347                     0.5f, 1.0f);
 348             }
 349             start += sce->ics.swb_sizes[g];
 350         }
 351     }
 352
 353     for (i = 0; i < sizeof(maxsf) / sizeof(maxsf[0]); ++i)
 354         maxsf[i] = SCALE_MAX_POS;
 355
 356     //perform two-loop search
 357     //outer loop - improve quality
 358     do {
 359         //inner loop - quantize spectrum to fit into given number of bits
 360         int overdist;
 361         int qstep = its ? 1 : 32;
 362         do {
 363             int changed = 0;
 364             prev = -1;
 365             recomprd = 0;
 366             tbits = 0;
 367             for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
 368                 start = w*128;
 369                 for (g = 0;  g < sce->ics.num_swb; g++) {
 370                     const float *coefs = &sce->coeffs[start];
 371                     const float *scaled = &s->scoefs[start];
 372                     int bits = 0;
 373                     int cb;
 374                     float dist = 0.0f;
 375                     float qenergy = 0.0f;
 376
 377                     if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >= 218) {
 378                         start += sce->ics.swb_sizes[g];
 379                         if (sce->can_pns[w*16+g]) {
 380                             /** PNS isn't free */
 381                             tbits += ff_pns_bits(sce, w, g);
 382                         }
 383                         continue;
 384                     }
 385                     cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
 386                     for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
 387                         int b;
 388                         float sqenergy;
 389                         dist += quantize_band_cost_cached(s, w + w2, g, coefs + w2*128,
 390                                                    scaled + w2*128,
 391                                                    sce->ics.swb_sizes[g],
 392                                                    sce->sf_idx[w*16+g],
 393                                                    cb,
 394                                                    1.0f,
 395                                                    INFINITY,
 396                                                    &b, &sqenergy,
 397                                                    0);
 398                         bits += b;
 399                         qenergy += sqenergy;
 400                     }
 401                     dists[w*16+g] = dist - bits;
 402                     qenergies[w*16+g] = qenergy;
 403                     if (prev != -1) {
 404                         int sfdiff = av_clip(sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO, 0, 2*SCALE_MAX_DIFF);
 405                         bits += ff_aac_scalefactor_bits[sfdiff];
 406                     }
 407                     tbits += bits;
 408                     start += sce->ics.swb_sizes[g];
 409                     prev = sce->sf_idx[w*16+g];
 410                 }
 411             }
 412             if (tbits > toomanybits) {
 413                 recomprd = 1;
 414                 for (i = 0; i < 128; i++) {
 415                     if (sce->sf_idx[i] < (SCALE_MAX_POS - SCALE_DIV_512)) {
 416                         int maxsf_i = (tbits > 5800) ? SCALE_MAX_POS : maxsf[i];
 417                         int new_sf = FFMIN(maxsf_i, sce->sf_idx[i] + qstep);
 418                         if (new_sf != sce->sf_idx[i]) {
 419                             sce->sf_idx[i] = new_sf;
 420                             changed = 1;
 421                         }
 422                     }
 423                 }
 424             } else if (tbits < toofewbits) {
 425                 recomprd = 1;
 426                 for (i = 0; i < 128; i++) {
 427                     if (sce->sf_idx[i] > SCALE_ONE_POS) {
 428                         int new_sf = FFMAX(SCALE_ONE_POS, sce->sf_idx[i] - qstep);
 429                         if (new_sf != sce->sf_idx[i]) {
 430                             sce->sf_idx[i] = new_sf;
 431                             changed = 1;
 432                         }
 433                     }
 434                 }
 435             }
 436             qstep >>= 1;
 437             if (!qstep && tbits > toomanybits && sce->sf_idx[0] < 217 && changed)
 438                 qstep = 1;
 439         } while (qstep);
 440
 441         overdist = 1;
 442         fflag = tbits < toofewbits;
 443         for (i = 0; i < 2 && (overdist || recomprd); ++i) {
 444             if (recomprd) {
 445                 /** Must recompute distortion */
 446                 prev = -1;
 447                 tbits = 0;
 448                 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
 449                     start = w*128;
 450                     for (g = 0;  g < sce->ics.num_swb; g++) {
 451                         const float *coefs = sce->coeffs + start;
 452                         const float *scaled = s->scoefs + start;
 453                         int bits = 0;
 454                         int cb;
 455                         float dist = 0.0f;
 456                         float qenergy = 0.0f;
 457
 458                         if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >= 218) {
 459                             start += sce->ics.swb_sizes[g];
 460                             if (sce->can_pns[w*16+g]) {
 461                                 /** PNS isn't free */
 462                                 tbits += ff_pns_bits(sce, w, g);
 463                             }
 464                             continue;
 465                         }
 466                         cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
 467                         for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
 468                             int b;
 469                             float sqenergy;
 470                             dist += quantize_band_cost_cached(s, w + w2, g, coefs + w2*128,
 471                                                     scaled + w2*128,
 472                                                     sce->ics.swb_sizes[g],
 473                                                     sce->sf_idx[w*16+g],
 474                                                     cb,
 475                                                     1.0f,
 476                                                     INFINITY,
 477                                                     &b, &sqenergy,
 478                                                     0);
 479                             bits += b;
 480                             qenergy += sqenergy;
 481                         }
 482                         dists[w*16+g] = dist - bits;
 483                         qenergies[w*16+g] = qenergy;
 484                         if (prev != -1) {
 485                             int sfdiff = av_clip(sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO, 0, 2*SCALE_MAX_DIFF);
 486                             bits += ff_aac_scalefactor_bits[sfdiff];
 487                         }
 488                         tbits += bits;
 489                         start += sce->ics.swb_sizes[g];
 490                         prev = sce->sf_idx[w*16+g];
 491                     }
 492                 }
 493             }
 494             if (!i && s->options.pns && its > maxits/2 && tbits > toofewbits) {
 495                 float maxoverdist = 0.0f;
 496                 float ovrfactor = 1.f+(maxits-its)*16.f/maxits;
 497                 overdist = recomprd = 0;
 498                 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
 499                     for (g = start = 0;  g < sce->ics.num_swb; start += sce->ics.swb_sizes[g++]) {
 500                         if (!sce->zeroes[w*16+g] && sce->sf_idx[w*16+g] > SCALE_ONE_POS && dists[w*16+g] > uplims[w*16+g]*ovrfactor) {
 501                             float ovrdist = dists[w*16+g] / FFMAX(uplims[w*16+g],euplims[w*16+g]);
 502                             maxoverdist = FFMAX(maxoverdist, ovrdist);
 503                             overdist++;
 504                         }
 505                     }
 506                 }
 507                 if (overdist) {
 508                     /* We have overdistorted bands, trade for zeroes (that can be noise)
 509                      * Zero the bands in the lowest 1.25% spread-energy-threshold ranking
 510                      */
 511                     float minspread = max_spread_thr_r;
 512                     float maxspread = min_spread_thr_r;
 513                     float zspread;
 514                     int zeroable = 0;
 515                     int zeroed = 0;
 516                     int maxzeroed, zloop;
 517                     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
 518                         for (g = start = 0;  g < sce->ics.num_swb; start += sce->ics.swb_sizes[g++]) {
 519                             if (start >= pns_start_pos && !sce->zeroes[w*16+g] && sce->can_pns[w*16+g]) {
 520                                 minspread = FFMIN(minspread, spread_thr_r[w*16+g]);
 521                                 maxspread = FFMAX(maxspread, spread_thr_r[w*16+g]);
 522                                 zeroable++;
 523                             }
 524                         }
 525                     }
 526                     zspread = (maxspread-minspread) * 0.0125f + minspread;
 527                     /* Don't PNS everything even if allowed. It suppresses bit starvation signals from RC,
 528                      * and forced the hand of the later search_for_pns step.
 529                      * Instead, PNS a fraction of the spread_thr_r range depending on how starved for bits we are,
 530                      * and leave further PNSing to search_for_pns if worthwhile.
 531                      */
 532                     zspread = FFMIN3(min_spread_thr_r * 8.f, zspread,
 533                         ((toomanybits - tbits) * min_spread_thr_r + (tbits - toofewbits) * max_spread_thr_r) / (toomanybits - toofewbits + 1));
 534                     maxzeroed = FFMIN(zeroable, FFMAX(1, (zeroable * its + maxits - 1) / (2 * maxits)));
 535                     for (zloop = 0; zloop < 2; zloop++) {
 536                         /* Two passes: first distorted stuff - two birds in one shot and all that,
 537                          * then anything viable. Viable means not zero, but either CB=zero-able
 538                          * (too high SF), not SF <= 1 (that means we'd be operating at very high
 539                          * quality, we don't want PNS when doing VHQ), PNS allowed, and within
 540                          * the lowest ranking percentile.
 541                          */
 542                         float loopovrfactor = (zloop) ? 1.0f : ovrfactor;
 543                         int loopminsf = (zloop) ? (SCALE_ONE_POS - SCALE_DIV_512) : SCALE_ONE_POS;
 544                         int mcb;
 545                         for (g = sce->ics.num_swb-1; g > 0 && zeroed < maxzeroed; g--) {
 546                             if (sce->ics.swb_offset[g] < pns_start_pos)
 547                                 continue;
 548                             for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
 549                                 if (!sce->zeroes[w*16+g] && sce->can_pns[w*16+g] && spread_thr_r[w*16+g] <= zspread
 550                                     && sce->sf_idx[w*16+g] > loopminsf
 551                                     && (dists[w*16+g] > loopovrfactor*uplims[w*16+g] || !(mcb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]))
 552                                         || (mcb <= 1 && dists[w*16+g] > FFMIN(uplims[w*16+g], euplims[w*16+g]))) ) {
 553                                     sce->zeroes[w*16+g] = 1;
 554                                     sce->band_type[w*16+g] = 0;
 555                                     zeroed++;
 556                                 }
 557                             }
 558                         }
 559                     }
 560                     if (zeroed)
 561                         recomprd = fflag = 1;
 562                 } else {
 563                     overdist = 0;
 564                 }
 565             }
 566         }
 567
 568         minscaler = SCALE_MAX_POS;
 569         maxscaler = 0;
 570         for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
 571             for (g = 0;  g < sce->ics.num_swb; g++) {
 572                 if (!sce->zeroes[w*16+g]) {
 573                     minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]);
 574                     maxscaler = FFMAX(maxscaler, sce->sf_idx[w*16+g]);
 575                 }
 576             }
 577         }
 578
 579         minscaler = nminscaler = av_clip(minscaler, SCALE_ONE_POS - SCALE_DIV_512, SCALE_MAX_POS - SCALE_DIV_512);
 580         prev = -1;
 581         for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
 582             /** Start with big steps, end up fine-tunning */
 583             int depth = (its > maxits/2) ? ((its > maxits*2/3) ? 1 : 3) : 10;
 584             int edepth = depth+2;
 585             float uplmax = its / (maxits*0.25f) + 1.0f;
 586             uplmax *= (tbits > destbits) ? FFMIN(2.0f, tbits / (float)FFMAX(1,destbits)) : 1.0f;
 587             start = w * 128;
 588             for (g = 0; g < sce->ics.num_swb; g++) {
 589                 int prevsc = sce->sf_idx[w*16+g];
 590                 if (prev < 0 && !sce->zeroes[w*16+g])
 591                     prev = sce->sf_idx[0];
 592                 if (!sce->zeroes[w*16+g]) {
 593                     const float *coefs = sce->coeffs + start;
 594                     const float *scaled = s->scoefs + start;
 595                     int cmb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
 596                     int mindeltasf = FFMAX(0, prev - SCALE_MAX_DIFF);
 597                     int maxdeltasf = FFMIN(SCALE_MAX_POS - SCALE_DIV_512, prev + SCALE_MAX_DIFF);
 598                     if ((!cmb || dists[w*16+g] > uplims[w*16+g]) && sce->sf_idx[w*16+g] > mindeltasf) {
 599                         /* Try to make sure there is some energy in every nonzero band
 600                          * NOTE: This algorithm must be forcibly imbalanced, pushing harder
 601                          *  on holes or more distorted bands at first, otherwise there's
 602                          *  no net gain (since the next iteration will offset all bands
 603                          *  on the opposite direction to compensate for extra bits)
 604                          */
 605                         for (i = 0; i < edepth && sce->sf_idx[w*16+g] > mindeltasf; ++i) {
 606                             int cb, bits;
 607                             float dist, qenergy;
 608                             int mb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]-1);
 609                             cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
 610                             dist = qenergy = 0.f;
 611                             bits = 0;
 612                             if (!cb) {
 613                                 maxsf[w*16+g] = FFMIN(sce->sf_idx[w*16+g]-1, maxsf[w*16+g]);
 614                             } else if (i >= depth && dists[w*16+g] < euplims[w*16+g]) {
 615                                 break;
 616                             }
 617                             /* !g is the DC band, it's important, since quantization error here
 618                              * applies to less than a cycle, it creates horrible intermodulation
 619                              * distortion if it doesn't stick to what psy requests
 620                              */
 621                             if (!g && sce->ics.num_windows > 1 && dists[w*16+g] >= euplims[w*16+g])
 622                                 maxsf[w*16+g] = FFMIN(sce->sf_idx[w*16+g], maxsf[w*16+g]);
 623                             for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
 624                                 int b;
 625                                 float sqenergy;
 626                                 dist += quantize_band_cost_cached(s, w + w2, g, coefs + w2*128,
 627                                                         scaled + w2*128,
 628                                                         sce->ics.swb_sizes[g],
 629                                                         sce->sf_idx[w*16+g]-1,
 630                                                         cb,
 631                                                         1.0f,
 632                                                         INFINITY,
 633                                                         &b, &sqenergy,
 634                                                         0);
 635                                 bits += b;
 636                                 qenergy += sqenergy;
 637                             }
 638                             sce->sf_idx[w*16+g]--;
 639                             dists[w*16+g] = dist - bits;
 640                             qenergies[w*16+g] = qenergy;
 641                             if (mb && (sce->sf_idx[w*16+g] < mindeltasf || (
 642                                     (dists[w*16+g] < FFMIN(uplmax*uplims[w*16+g], euplims[w*16+g]))
 643                                     && (fabsf(qenergies[w*16+g]-energies[w*16+g]) < euplims[w*16+g])
 644                                 ) )) {
 645                                 break;
 646                             }
 647                         }
 648                     } else if (tbits > toofewbits && sce->sf_idx[w*16+g] < FFMIN(maxdeltasf, maxsf[w*16+g])
 649                             && (dists[w*16+g] < FFMIN(euplims[w*16+g], uplims[w*16+g]))
 650                             && (fabsf(qenergies[w*16+g]-energies[w*16+g]) < euplims[w*16+g])
 651                         ) {
 652                         /** Um... over target. Save bits for more important stuff. */
 653                         for (i = 0; i < depth && sce->sf_idx[w*16+g] < maxdeltasf; ++i) {
 654                             int cb, bits;
 655                             float dist, qenergy;
 656                             cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]+1);
 657                             if (cb > 0) {
 658                                 dist = qenergy = 0.f;
 659                                 bits = 0;
 660                                 for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
 661                                     int b;
 662                                     float sqenergy;
 663                                     dist += quantize_band_cost_cached(s, w + w2, g, coefs + w2*128,
 664                                                             scaled + w2*128,
 665                                                             sce->ics.swb_sizes[g],
 666                                                             sce->sf_idx[w*16+g]+1,
 667                                                             cb,
 668                                                             1.0f,
 669                                                             INFINITY,
 670                                                             &b, &sqenergy,
 671                                                             0);
 672                                     bits += b;
 673                                     qenergy += sqenergy;
 674                                 }
 675                                 dist -= bits;
 676                                 if (dist < FFMIN(euplims[w*16+g], uplims[w*16+g])) {
 677                                     sce->sf_idx[w*16+g]++;
 678                                     dists[w*16+g] = dist;
 679                                     qenergies[w*16+g] = qenergy;
 680                                 } else {
 681                                     break;
 682                                 }
 683                             } else {
 684                                 maxsf[w*16+g] = FFMIN(sce->sf_idx[w*16+g], maxsf[w*16+g]);
 685                                 break;
 686                             }
 687                         }
 688                     }
 689                     prev = sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], mindeltasf, maxdeltasf);
 690                     if (sce->sf_idx[w*16+g] != prevsc)
 691                         fflag = 1;
 692                     nminscaler = FFMIN(nminscaler, sce->sf_idx[w*16+g]);
 693                     sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
 694                 }
 695                 start += sce->ics.swb_sizes[g];
 696             }
 697         }
 698
 699         /** SF difference limit violation risk. Must re-clamp. */
 700         prev = -1;
 701         for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
 702             for (g = 0; g < sce->ics.num_swb; g++) {
 703                 if (!sce->zeroes[w*16+g]) {
 704                     int prevsf = sce->sf_idx[w*16+g];
 705                     if (prev < 0)
 706                         prev = prevsf;
 707                     sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], prev - SCALE_MAX_DIFF, prev + SCALE_MAX_DIFF);
 708                     sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
 709                     prev = sce->sf_idx[w*16+g];
 710                     if (!fflag && prevsf != sce->sf_idx[w*16+g])
 711                         fflag = 1;
 712                 }
 713             }
 714         }
 715
 716         its++;
 717     } while (fflag && its < maxits);
 718
 719     /** Scout out next nonzero bands */
 720     ff_init_nextband_map(sce, nextband);
 721
 722     prev = -1;
 723     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
 724         /** Make sure proper codebooks are set */
 725         for (g = 0; g < sce->ics.num_swb; g++) {
 726             if (!sce->zeroes[w*16+g]) {
 727                 sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
 728                 if (sce->band_type[w*16+g] <= 0) {
 729                     if (!ff_sfdelta_can_remove_band(sce, nextband, prev, w*16+g)) {
 730                         /** Cannot zero out, make sure it's not attempted */
 731                         sce->band_type[w*16+g] = 1;
 732                     } else {
 733                         sce->zeroes[w*16+g] = 1;
 734                         sce->band_type[w*16+g] = 0;
 735                     }
 736                 }
 737             } else {
 738                 sce->band_type[w*16+g] = 0;
 739             }
 740             /** Check that there's no SF delta range violations */
 741             if (!sce->zeroes[w*16+g]) {
 742                 if (prev != -1) {
 743                     av_unused int sfdiff = sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO;
 744                     av_assert1(sfdiff >= 0 && sfdiff <= 2*SCALE_MAX_DIFF);
 745                 } else if (sce->zeroes[0]) {
 746                     /** Set global gain to something useful */
 747                     sce->sf_idx[0] = sce->sf_idx[w*16+g];
 748                 }
 749                 prev = sce->sf_idx[w*16+g];
 750             }
 751         }
 752     }
 753 }
 754
 755 #endif /* AVCODEC_AACCODER_TWOLOOP_H */