git.sesse.net Git - ffmpeg/blob - libavcodec/aaccoder_twoloop.h

   1 /*
   2  * AAC encoder twoloop coder
   3  * Copyright (C) 2008-2009 Konstantin Shishkov
   4  *
   5  * This file is part of FFmpeg.
   6  *
   7  * FFmpeg is free software; you can redistribute it and/or
   8  * modify it under the terms of the GNU Lesser General Public
   9  * License as published by the Free Software Foundation; either
  10  * version 2.1 of the License, or (at your option) any later version.
  11  *
  12  * FFmpeg is distributed in the hope that it will be useful,
  13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15  * Lesser General Public License for more details.
  16  *
  17  * You should have received a copy of the GNU Lesser General Public
  18  * License along with FFmpeg; if not, write to the Free Software
  19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20  */
  21
  22 /**
  23  * @file
  24  * AAC encoder twoloop coder
  25  * @author Konstantin Shishkov, Claudio Freire
  26  */
  27
  28 /**
  29  * This file contains a template for the twoloop coder function.
  30  * It needs to be provided, externally, as an already included declaration,
  31  * the following functions from aacenc_quantization/util.h. They're not included
  32  * explicitly here to make it possible to provide alternative implementations:
  33  *  - quantize_band_cost
  34  *  - abs_pow34_v
  35  *  - find_max_val
  36  *  - find_min_book
  37  *  - find_form_factor
  38  */
  39
  40 #ifndef AVCODEC_AACCODER_TWOLOOP_H
  41 #define AVCODEC_AACCODER_TWOLOOP_H
  42
  43 #include <float.h>
  44 #include "libavutil/mathematics.h"
  45 #include "mathops.h"
  46 #include "avcodec.h"
  47 #include "put_bits.h"
  48 #include "aac.h"
  49 #include "aacenc.h"
  50 #include "aactab.h"
  51 #include "aacenctab.h"
  52
  53 /** Frequency in Hz for lower limit of noise substitution **/
  54 #define NOISE_LOW_LIMIT 4000
  55
  56 #define sclip(x) av_clip(x,60,218)
  57
  58 /* Reflects the cost to change codebooks */
  59 static inline int ff_pns_bits(SingleChannelElement *sce, int w, int g)
  60 {
  61     return (!g || !sce->zeroes[w*16+g-1] || !sce->can_pns[w*16+g-1]) ? 9 : 5;
  62 }
  63
  64 /**
  65  * two-loop quantizers search taken from ISO 13818-7 Appendix C
  66  */
  67 static void search_for_quantizers_twoloop(AVCodecContext *avctx,
  68                                           AACEncContext *s,
  69                                           SingleChannelElement *sce,
  70                                           const float lambda)
  71 {
  72     int start = 0, i, w, w2, g, recomprd;
  73     int destbits = avctx->bit_rate * 1024.0 / avctx->sample_rate
  74         / ((avctx->flags & CODEC_FLAG_QSCALE) ? 2.0f : avctx->channels)
  75         * (lambda / 120.f);
  76     int refbits = destbits;
  77     int toomanybits, toofewbits;
  78     char nzs[128];
  79     int maxsf[128];
  80     float dists[128] = { 0 }, qenergies[128] = { 0 }, uplims[128], euplims[128], energies[128];
  81     float maxvals[128], spread_thr_r[128];
  82     float min_spread_thr_r, max_spread_thr_r;
  83
  84     /**
  85      * rdlambda controls the maximum tolerated distortion. Twoloop
  86      * will keep iterating until it fails to lower it or it reaches
  87      * ulimit * rdlambda. Keeping it low increases quality on difficult
  88      * signals, but lower it too much, and bits will be taken from weak
  89      * signals, creating "holes". A balance is necesary.
  90      * rdmax and rdmin specify the relative deviation from rdlambda
  91      * allowed for tonality compensation
  92      */
  93     float rdlambda = av_clipf(2.0f * 120.f / lambda, 0.0625f, 16.0f);
  94     const float nzslope = 1.5f;
  95     float rdmin = 0.03125f;
  96     float rdmax = 1.0f;
  97
  98     /**
  99      * sfoffs controls an offset of optmium allocation that will be
 100      * applied based on lambda. Keep it real and modest, the loop
 101      * will take care of the rest, this just accelerates convergence
 102      */
 103     float sfoffs = av_clipf(log2f(120.0f / lambda) * 4.0f, -5, 10);
 104
 105     int fflag, minscaler, maxscaler, nminscaler, minrdsf;
 106     int its  = 0;
 107     int maxits = 30;
 108     int allz = 0;
 109     int tbits;
 110     int cutoff = 1024;
 111     int pns_start_pos;
 112     int prev;
 113
 114     /**
 115      * zeroscale controls a multiplier of the threshold, if band energy
 116      * is below this, a zero is forced. Keep it lower than 1, unless
 117      * low lambda is used, because energy < threshold doesn't mean there's
 118      * no audible signal outright, it's just energy. Also make it rise
 119      * slower than rdlambda, as rdscale has due compensation with
 120      * noisy band depriorization below, whereas zeroing logic is rather dumb
 121      */
 122     float zeroscale;
 123     if (lambda > 120.f) {
 124         zeroscale = av_clipf(powf(120.f / lambda, 0.25f), 0.0625f, 1.0f);
 125     } else {
 126         zeroscale = 1.f;
 127     }
 128
 129     if (s->psy.bitres.alloc >= 0) {
 130         /**
 131          * Psy granted us extra bits to use, from the reservoire
 132          * adjust for lambda except what psy already did
 133          */
 134         destbits = s->psy.bitres.alloc
 135             * (lambda / (avctx->global_quality ? avctx->global_quality : 120));
 136     }
 137
 138     if (avctx->flags & CODEC_FLAG_QSCALE) {
 139         /**
 140          * Constant Q-scale doesn't compensate MS coding on its own
 141          * No need to be overly precise, this only controls RD
 142          * adjustment CB limits when going overboard
 143          */
 144         if (s->options.mid_side && s->cur_type == TYPE_CPE)
 145             destbits *= 2;
 146
 147         /**
 148          * When using a constant Q-scale, don't adjust bits, just use RD
 149          * Don't let it go overboard, though... 8x psy target is enough
 150          */
 151         toomanybits = 5800;
 152         toofewbits = destbits / 16;
 153
 154         /** Don't offset scalers, just RD */
 155         sfoffs = sce->ics.num_windows - 1;
 156         rdlambda = sqrtf(rdlambda);
 157
 158         /** search further */
 159         maxits *= 2;
 160     } else {
 161         /** When using ABR, be strict */
 162         toomanybits = destbits + destbits/16;
 163         toofewbits = destbits - destbits/4;
 164
 165         sfoffs = 0;
 166         rdlambda = sqrtf(rdlambda);
 167     }
 168
 169     /** and zero out above cutoff frequency */
 170     {
 171         int wlen = 1024 / sce->ics.num_windows;
 172         int bandwidth;
 173
 174         /**
 175          * Scale, psy gives us constant quality, this LP only scales
 176          * bitrate by lambda, so we save bits on subjectively unimportant HF
 177          * rather than increase quantization noise. Adjust nominal bitrate
 178          * to effective bitrate according to encoding parameters,
 179          * AAC_CUTOFF_FROM_BITRATE is calibrated for effective bitrate.
 180          */
 181         float rate_bandwidth_multiplier = 1.5f;
 182         int frame_bit_rate = (avctx->flags & CODEC_FLAG_QSCALE)
 183             ? (refbits * rate_bandwidth_multiplier * avctx->sample_rate / 1024)
 184             : (avctx->bit_rate / avctx->channels);
 185
 186         /** Compensate for extensions that increase efficiency */
 187         if (s->options.pns || s->options.intensity_stereo)
 188             frame_bit_rate *= 1.15f;
 189
 190         if (avctx->cutoff > 0) {
 191             bandwidth = avctx->cutoff;
 192         } else {
 193             bandwidth = FFMAX(3000, AAC_CUTOFF_FROM_BITRATE(frame_bit_rate, 1, avctx->sample_rate));
 194         }
 195
 196         cutoff = bandwidth * 2 * wlen / avctx->sample_rate;
 197         pns_start_pos = NOISE_LOW_LIMIT * 2 * wlen / avctx->sample_rate;
 198     }
 199
 200     /**
 201      * for values above this the decoder might end up in an endless loop
 202      * due to always having more bits than what can be encoded.
 203      */
 204     destbits = FFMIN(destbits, 5800);
 205     toomanybits = FFMIN(toomanybits, 5800);
 206     toofewbits = FFMIN(toofewbits, 5800);
 207     /**
 208      * XXX: some heuristic to determine initial quantizers will reduce search time
 209      * determine zero bands and upper distortion limits
 210      */
 211     min_spread_thr_r = -1;
 212     max_spread_thr_r = -1;
 213     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
 214         for (g = start = 0;  g < sce->ics.num_swb; start += sce->ics.swb_sizes[g++]) {
 215             int nz = 0;
 216             float uplim = 0.0f, energy = 0.0f, spread = 0.0f;
 217             for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
 218                 FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
 219                 if (start >= cutoff || band->energy <= (band->threshold * zeroscale) || band->threshold == 0.0f) {
 220                     sce->zeroes[(w+w2)*16+g] = 1;
 221                     continue;
 222                 }
 223                 nz = 1;
 224             }
 225             if (!nz) {
 226                 uplim = 0.0f;
 227             } else {
 228                 nz = 0;
 229                 for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
 230                     FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
 231                     if (band->energy <= (band->threshold * zeroscale) || band->threshold == 0.0f)
 232                         continue;
 233                     uplim += band->threshold;
 234                     energy += band->energy;
 235                     spread += band->spread;
 236                     nz++;
 237                 }
 238             }
 239             uplims[w*16+g] = uplim;
 240             energies[w*16+g] = energy;
 241             nzs[w*16+g] = nz;
 242             sce->zeroes[w*16+g] = !nz;
 243             allz |= nz;
 244             if (nz) {
 245                 spread_thr_r[w*16+g] = energy * nz / (uplim * spread);
 246                 if (min_spread_thr_r < 0) {
 247                     min_spread_thr_r = max_spread_thr_r = spread_thr_r[w*16+g];
 248                 } else {
 249                     min_spread_thr_r = FFMIN(min_spread_thr_r, spread_thr_r[w*16+g]);
 250                     max_spread_thr_r = FFMAX(max_spread_thr_r, spread_thr_r[w*16+g]);
 251                 }
 252             }
 253         }
 254     }
 255
 256     /** Compute initial scalers */
 257     minscaler = 65535;
 258     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
 259         for (g = 0;  g < sce->ics.num_swb; g++) {
 260             if (sce->zeroes[w*16+g]) {
 261                 sce->sf_idx[w*16+g] = SCALE_ONE_POS;
 262                 continue;
 263             }
 264             /**
 265              * log2f-to-distortion ratio is, technically, 2 (1.5db = 4, but it's power vs level so it's 2).
 266              * But, as offsets are applied, low-frequency signals are too sensitive to the induced distortion,
 267              * so we make scaling more conservative by choosing a lower log2f-to-distortion ratio, and thus
 268              * more robust.
 269              */
 270             sce->sf_idx[w*16+g] = av_clip(
 271                 SCALE_ONE_POS
 272                     + 1.75*log2f(FFMAX(0.00125f,uplims[w*16+g]) / sce->ics.swb_sizes[g])
 273                     + sfoffs,
 274                 60, SCALE_MAX_POS);
 275             minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]);
 276         }
 277     }
 278
 279     /** Clip */
 280     minscaler = av_clip(minscaler, SCALE_ONE_POS - SCALE_DIV_512, SCALE_MAX_POS - SCALE_DIV_512);
 281     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w])
 282         for (g = 0;  g < sce->ics.num_swb; g++)
 283             if (!sce->zeroes[w*16+g])
 284                 sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], minscaler, minscaler + SCALE_MAX_DIFF - 1);
 285
 286     if (!allz)
 287         return;
 288     abs_pow34_v(s->scoefs, sce->coeffs, 1024);
 289     ff_quantize_band_cost_cache_init(s);
 290
 291     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
 292         start = w*128;
 293         for (g = 0;  g < sce->ics.num_swb; g++) {
 294             const float *scaled = s->scoefs + start;
 295             maxvals[w*16+g] = find_max_val(sce->ics.group_len[w], sce->ics.swb_sizes[g], scaled);
 296             start += sce->ics.swb_sizes[g];
 297         }
 298     }
 299
 300     /**
 301      * Scale uplims to match rate distortion to quality
 302      * bu applying noisy band depriorization and tonal band priorization.
 303      * Maxval-energy ratio gives us an idea of how noisy/tonal the band is.
 304      * If maxval^2 ~ energy, then that band is mostly noise, and we can relax
 305      * rate distortion requirements.
 306      */
 307     memcpy(euplims, uplims, sizeof(euplims));
 308     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
 309         /** psy already priorizes transients to some extent */
 310         float de_psy_factor = (sce->ics.num_windows > 1) ? 8.0f / sce->ics.group_len[w] : 1.0f;
 311         start = w*128;
 312         for (g = 0;  g < sce->ics.num_swb; g++) {
 313             if (nzs[g] > 0) {
 314                 float cleanup_factor = ff_sqrf(av_clipf(start / (cutoff * 0.75f), 1.0f, 2.0f));
 315                 float energy2uplim = find_form_factor(
 316                     sce->ics.group_len[w], sce->ics.swb_sizes[g],
 317                     uplims[w*16+g] / (nzs[g] * sce->ics.swb_sizes[w]),
 318                     sce->coeffs + start,
 319                     nzslope * cleanup_factor);
 320                 energy2uplim *= de_psy_factor;
 321                 if (!(avctx->flags & CODEC_FLAG_QSCALE)) {
 322                     /** In ABR, we need to priorize less and let rate control do its thing */
 323                     energy2uplim = sqrtf(energy2uplim);
 324                 }
 325                 energy2uplim = FFMAX(0.015625f, FFMIN(1.0f, energy2uplim));
 326                 uplims[w*16+g] *= av_clipf(rdlambda * energy2uplim, rdmin, rdmax)
 327                                   * sce->ics.group_len[w];
 328
 329                 energy2uplim = find_form_factor(
 330                     sce->ics.group_len[w], sce->ics.swb_sizes[g],
 331                     uplims[w*16+g] / (nzs[g] * sce->ics.swb_sizes[w]),
 332                     sce->coeffs + start,
 333                     2.0f);
 334                 energy2uplim *= de_psy_factor;
 335                 if (!(avctx->flags & CODEC_FLAG_QSCALE)) {
 336                     /** In ABR, we need to priorize less and let rate control do its thing */
 337                     energy2uplim = sqrtf(energy2uplim);
 338                 }
 339                 energy2uplim = FFMAX(0.015625f, FFMIN(1.0f, energy2uplim));
 340                 euplims[w*16+g] *= av_clipf(rdlambda * energy2uplim * sce->ics.group_len[w],
 341                     0.5f, 1.0f);
 342             }
 343             start += sce->ics.swb_sizes[g];
 344         }
 345     }
 346
 347     for (i = 0; i < sizeof(maxsf) / sizeof(maxsf[0]); ++i)
 348         maxsf[i] = SCALE_MAX_POS;
 349
 350     //perform two-loop search
 351     //outer loop - improve quality
 352     do {
 353         //inner loop - quantize spectrum to fit into given number of bits
 354         int overdist;
 355         int qstep = its ? 1 : 32;
 356         do {
 357             int changed = 0;
 358             prev = -1;
 359             recomprd = 0;
 360             tbits = 0;
 361             for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
 362                 start = w*128;
 363                 for (g = 0;  g < sce->ics.num_swb; g++) {
 364                     const float *coefs = &sce->coeffs[start];
 365                     const float *scaled = &s->scoefs[start];
 366                     int bits = 0;
 367                     int cb;
 368                     float dist = 0.0f;
 369                     float qenergy = 0.0f;
 370
 371                     if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >= 218) {
 372                         start += sce->ics.swb_sizes[g];
 373                         if (sce->can_pns[w*16+g]) {
 374                             /** PNS isn't free */
 375                             tbits += ff_pns_bits(sce, w, g);
 376                         }
 377                         continue;
 378                     }
 379                     cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
 380                     for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
 381                         int b;
 382                         float sqenergy;
 383                         dist += quantize_band_cost_cached(s, w + w2, g, coefs + w2*128,
 384                                                    scaled + w2*128,
 385                                                    sce->ics.swb_sizes[g],
 386                                                    sce->sf_idx[w*16+g],
 387                                                    cb,
 388                                                    1.0f,
 389                                                    INFINITY,
 390                                                    &b, &sqenergy,
 391                                                    0);
 392                         bits += b;
 393                         qenergy += sqenergy;
 394                     }
 395                     dists[w*16+g] = dist - bits;
 396                     qenergies[w*16+g] = qenergy;
 397                     if (prev != -1) {
 398                         int sfdiff = av_clip(sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO, 0, 2*SCALE_MAX_DIFF);
 399                         bits += ff_aac_scalefactor_bits[sfdiff];
 400                     }
 401                     tbits += bits;
 402                     start += sce->ics.swb_sizes[g];
 403                     prev = sce->sf_idx[w*16+g];
 404                 }
 405             }
 406             if (tbits > toomanybits) {
 407                 recomprd = 1;
 408                 for (i = 0; i < 128; i++) {
 409                     if (sce->sf_idx[i] < (SCALE_MAX_POS - SCALE_DIV_512)) {
 410                         int maxsf_i = (tbits > 5800) ? SCALE_MAX_POS : maxsf[i];
 411                         int new_sf = FFMIN(maxsf_i, sce->sf_idx[i] + qstep);
 412                         if (new_sf != sce->sf_idx[i]) {
 413                             sce->sf_idx[i] = new_sf;
 414                             changed = 1;
 415                         }
 416                     }
 417                 }
 418             } else if (tbits < toofewbits) {
 419                 recomprd = 1;
 420                 for (i = 0; i < 128; i++) {
 421                     if (sce->sf_idx[i] > SCALE_ONE_POS) {
 422                         int new_sf = FFMAX(SCALE_ONE_POS, sce->sf_idx[i] - qstep);
 423                         if (new_sf != sce->sf_idx[i]) {
 424                             sce->sf_idx[i] = new_sf;
 425                             changed = 1;
 426                         }
 427                     }
 428                 }
 429             }
 430             qstep >>= 1;
 431             if (!qstep && tbits > toomanybits && sce->sf_idx[0] < 217 && changed)
 432                 qstep = 1;
 433         } while (qstep);
 434
 435         overdist = 1;
 436         for (i = 0; i < 2 && (overdist || recomprd); ++i) {
 437             if (recomprd) {
 438                 /** Must recompute distortion */
 439                 prev = -1;
 440                 tbits = 0;
 441                 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
 442                     start = w*128;
 443                     for (g = 0;  g < sce->ics.num_swb; g++) {
 444                         const float *coefs = sce->coeffs + start;
 445                         const float *scaled = s->scoefs + start;
 446                         int bits = 0;
 447                         int cb;
 448                         float dist = 0.0f;
 449                         float qenergy = 0.0f;
 450
 451                         if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >= 218) {
 452                             start += sce->ics.swb_sizes[g];
 453                             if (sce->can_pns[w*16+g]) {
 454                                 /** PNS isn't free */
 455                                 tbits += ff_pns_bits(sce, w, g);
 456                             }
 457                             continue;
 458                         }
 459                         cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
 460                         for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
 461                             int b;
 462                             float sqenergy;
 463                             dist += quantize_band_cost_cached(s, w + w2, g, coefs + w2*128,
 464                                                     scaled + w2*128,
 465                                                     sce->ics.swb_sizes[g],
 466                                                     sce->sf_idx[w*16+g],
 467                                                     cb,
 468                                                     1.0f,
 469                                                     INFINITY,
 470                                                     &b, &sqenergy,
 471                                                     0);
 472                             bits += b;
 473                             qenergy += sqenergy;
 474                         }
 475                         dists[w*16+g] = dist - bits;
 476                         qenergies[w*16+g] = qenergy;
 477                         if (prev != -1) {
 478                             int sfdiff = av_clip(sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO, 0, 2*SCALE_MAX_DIFF);
 479                             bits += ff_aac_scalefactor_bits[sfdiff];
 480                         }
 481                         tbits += bits;
 482                         start += sce->ics.swb_sizes[g];
 483                         prev = sce->sf_idx[w*16+g];
 484                     }
 485                 }
 486             }
 487             if (!i && s->options.pns && its > maxits/2) {
 488                 float maxoverdist = 0.0f;
 489                 overdist = recomprd = 0;
 490                 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
 491                     float ovrfactor = 2.f+(maxits-its)*16.f/maxits;
 492                     for (g = start = 0;  g < sce->ics.num_swb; start += sce->ics.swb_sizes[g++]) {
 493                         if (!sce->zeroes[w*16+g] && dists[w*16+g] > uplims[w*16+g]*ovrfactor) {
 494                             float ovrdist = dists[w*16+g] / FFMAX(uplims[w*16+g],euplims[w*16+g]);
 495                             maxoverdist = FFMAX(maxoverdist, ovrdist);
 496                             overdist++;
 497                         }
 498                     }
 499                 }
 500                 if (overdist) {
 501                     /* We have overdistorted bands, trade for zeroes (that can be noise)
 502                      * Zero the bands in the lowest 1.25% spread-energy-threshold ranking
 503                      */
 504                     float minspread = max_spread_thr_r;
 505                     float maxspread = min_spread_thr_r;
 506                     float zspread;
 507                     int zeroable = 0;
 508                     int zeroed = 0;
 509                     int maxzeroed;
 510                     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
 511                         for (g = start = 0;  g < sce->ics.num_swb; start += sce->ics.swb_sizes[g++]) {
 512                             if (start >= pns_start_pos && !sce->zeroes[w*16+g] && sce->can_pns[w*16+g]) {
 513                                 minspread = FFMIN(minspread, spread_thr_r[w*16+g]);
 514                                 maxspread = FFMAX(maxspread, spread_thr_r[w*16+g]);
 515                                 zeroable++;
 516                             }
 517                         }
 518                     }
 519                     zspread = (maxspread-minspread) * 0.0125f + minspread;
 520                     zspread = FFMIN(maxoverdist, zspread);
 521                     maxzeroed = zeroable * its / (2 * maxits);
 522                     for (g = sce->ics.num_swb-1; g > 0 && zeroed < maxzeroed; g--) {
 523                         if (sce->ics.swb_offset[g] < pns_start_pos)
 524                             continue;
 525                         for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
 526                             if (!sce->zeroes[w*16+g] && sce->can_pns[w*16+g] && spread_thr_r[w*16+g] <= zspread) {
 527                                 sce->zeroes[w*16+g] = 1;
 528                                 sce->band_type[w*16+g] = 0;
 529                                 zeroed++;
 530                             }
 531                         }
 532                     }
 533                     if (zeroed)
 534                         recomprd = 1;
 535                 } else {
 536                     overdist = 0;
 537                 }
 538             }
 539         }
 540
 541         minscaler = SCALE_MAX_POS;
 542         maxscaler = 0;
 543         for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
 544             for (g = 0;  g < sce->ics.num_swb; g++) {
 545                 if (!sce->zeroes[w*16+g]) {
 546                     minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]);
 547                     maxscaler = FFMAX(maxscaler, sce->sf_idx[w*16+g]);
 548                 }
 549             }
 550         }
 551
 552         fflag = 0;
 553         minscaler = nminscaler = av_clip(minscaler, SCALE_ONE_POS - SCALE_DIV_512, SCALE_MAX_POS - SCALE_DIV_512);
 554         minrdsf = FFMAX3(60, minscaler - 1, maxscaler - SCALE_MAX_DIFF - 1);
 555         for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
 556             /** Start with big steps, end up fine-tunning */
 557             int depth = (its > maxits/2) ? ((its > maxits*2/3) ? 1 : 3) : 10;
 558             int edepth = depth+2;
 559             float uplmax = its / (maxits*0.25f) + 1.0f;
 560             uplmax *= (tbits > destbits) ? FFMIN(2.0f, tbits / (float)FFMAX(1,destbits)) : 1.0f;
 561             start = w * 128;
 562             for (g = 0; g < sce->ics.num_swb; g++) {
 563                 int prevsc = sce->sf_idx[w*16+g];
 564                 int minrdsfboost = (sce->ics.num_windows > 1) ? av_clip(g-4, -2, 0) : av_clip(g-16, -4, 0);
 565                 if (!sce->zeroes[w*16+g]) {
 566                     const float *coefs = sce->coeffs + start;
 567                     const float *scaled = s->scoefs + start;
 568                     int cmb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
 569                     if ((!cmb || dists[w*16+g] > uplims[w*16+g]) && sce->sf_idx[w*16+g] > minrdsf) {
 570                         /* Try to make sure there is some energy in every nonzero band
 571                          * NOTE: This algorithm must be forcibly imbalanced, pushing harder
 572                          *  on holes or more distorted bands at first, otherwise there's
 573                          *  no net gain (since the next iteration will offset all bands
 574                          *  on the opposite direction to compensate for extra bits)
 575                          */
 576                         for (i = 0; i < edepth; ++i) {
 577                             int cb, bits;
 578                             float dist, qenergy;
 579                             int mb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]-1);
 580                             cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
 581                             dist = qenergy = 0.f;
 582                             bits = 0;
 583                             if (!cb) {
 584                                 maxsf[w*16+g] = FFMIN(sce->sf_idx[w*16+g]-1, maxsf[w*16+g]);
 585                             } else if (i >= depth && dists[w*16+g] < euplims[w*16+g]) {
 586                                 break;
 587                             }
 588                             for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
 589                                 int b;
 590                                 float sqenergy;
 591                                 dist += quantize_band_cost_cached(s, w + w2, g, coefs + w2*128,
 592                                                         scaled + w2*128,
 593                                                         sce->ics.swb_sizes[g],
 594                                                         sce->sf_idx[w*16+g]-1,
 595                                                         cb,
 596                                                         1.0f,
 597                                                         INFINITY,
 598                                                         &b, &sqenergy,
 599                                                         0);
 600                                 bits += b;
 601                                 qenergy += sqenergy;
 602                             }
 603                             sce->sf_idx[w*16+g]--;
 604                             dists[w*16+g] = dist - bits;
 605                             qenergies[w*16+g] = qenergy;
 606                             if (mb && (sce->sf_idx[w*16+g] < (minrdsf+minrdsfboost) || (
 607                                     (dists[w*16+g] < FFMIN(uplmax*uplims[w*16+g], euplims[w*16+g]))
 608                                     && (fabsf(qenergies[w*16+g]-energies[w*16+g]) < euplims[w*16+g])
 609                                 ) )) {
 610                                 break;
 611                             }
 612                         }
 613                     } else if (tbits > toofewbits && sce->sf_idx[w*16+g] < maxscaler
 614                             && (dists[w*16+g] < FFMIN(euplims[w*16+g], uplims[w*16+g]))
 615                             && (fabsf(qenergies[w*16+g]-energies[w*16+g]) < euplims[w*16+g])
 616                         ) {
 617                         /** Um... over target. Save bits for more important stuff. */
 618                         for (i = 0; i < depth; ++i) {
 619                             int cb, bits;
 620                             float dist, qenergy;
 621                             cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]+1);
 622                             if (cb > 0) {
 623                                 dist = qenergy = 0.f;
 624                                 bits = 0;
 625                                 for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
 626                                     int b;
 627                                     float sqenergy;
 628                                     dist += quantize_band_cost_cached(s, w + w2, g, coefs + w2*128,
 629                                                             scaled + w2*128,
 630                                                             sce->ics.swb_sizes[g],
 631                                                             sce->sf_idx[w*16+g]+1,
 632                                                             cb,
 633                                                             1.0f,
 634                                                             INFINITY,
 635                                                             &b, &sqenergy,
 636                                                             0);
 637                                     bits += b;
 638                                     qenergy += sqenergy;
 639                                 }
 640                                 dist -= bits;
 641                                 if (dist < FFMIN(euplims[w*16+g], uplims[w*16+g])) {
 642                                     sce->sf_idx[w*16+g]++;
 643                                     dists[w*16+g] = dist;
 644                                     qenergies[w*16+g] = qenergy;
 645                                 } else {
 646                                     break;
 647                                 }
 648                             } else {
 649                                 maxsf[w*16+g] = FFMIN(sce->sf_idx[w*16+g], maxsf[w*16+g]);
 650                                 break;
 651                             }
 652                         }
 653                     }
 654                 }
 655                 sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], minrdsf, minscaler + SCALE_MAX_DIFF);
 656                 sce->sf_idx[w*16+g] = FFMIN(sce->sf_idx[w*16+g], SCALE_MAX_POS - SCALE_DIV_512);
 657                 if (sce->sf_idx[w*16+g] != prevsc)
 658                     fflag = 1;
 659                 nminscaler = FFMIN(nminscaler, sce->sf_idx[w*16+g]);
 660                 sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
 661                 start += sce->ics.swb_sizes[g];
 662             }
 663         }
 664         if (nminscaler < minscaler || sce->ics.num_windows > 1) {
 665             /** SF difference limit violation risk. Must re-clamp. */
 666             minscaler = nminscaler;
 667             for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
 668                 for (g = 0; g < sce->ics.num_swb; g++) {
 669                     sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], minscaler, minscaler + SCALE_MAX_DIFF);
 670                     sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
 671                 }
 672             }
 673         }
 674         its++;
 675     } while (fflag && its < maxits);
 676
 677     prev = -1;
 678     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
 679         /** Make sure proper codebooks are set */
 680         for (g = start = 0; g < sce->ics.num_swb; start += sce->ics.swb_sizes[g++]) {
 681             if (!sce->zeroes[w*16+g]) {
 682                 sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
 683                 if (sce->band_type[w*16+g] <= 0) {
 684                     sce->zeroes[w*16+g] = 1;
 685                     sce->band_type[w*16+g] = 0;
 686                 }
 687             } else {
 688                 sce->band_type[w*16+g] = 0;
 689             }
 690             /** Check that there's no SF delta range violations */
 691             if (!sce->zeroes[w*16+g]) {
 692                 if (prev != -1) {
 693                     av_unused int sfdiff = sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO;
 694                     av_assert1(sfdiff >= 0 && sfdiff <= 2*SCALE_MAX_DIFF);
 695                 } else if (sce->zeroes[0]) {
 696                     /** Set global gain to something useful */
 697                     sce->sf_idx[0] = sce->sf_idx[w*16+g];
 698                 }
 699                 prev = sce->sf_idx[w*16+g];
 700             }
 701         }
 702     }
 703 }
 704
 705 #endif /* AVCODEC_AACCODER_TWOLOOP_H */