]> git.sesse.net Git - ffmpeg/blob - libavcodec/aaccoder.c
aacenc: Use exact values when quantizing, not fuzzy values.
[ffmpeg] / libavcodec / aaccoder.c
1 /*
2  * AAC coefficients encoder
3  * Copyright (C) 2008-2009 Konstantin Shishkov
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21
22 /**
23  * @file
24  * AAC coefficients encoder
25  */
26
27 /***********************************
28  *              TODOs:
29  * speedup quantizer selection
30  * add sane pulse detection
31  ***********************************/
32
33 #include "avcodec.h"
34 #include "put_bits.h"
35 #include "aac.h"
36 #include "aacenc.h"
37 #include "aactab.h"
38
39 /** bits needed to code codebook run value for long windows */
40 static const uint8_t run_value_bits_long[64] = {
41      5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
42      5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5, 10,
43     10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
44     10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 15
45 };
46
47 /** bits needed to code codebook run value for short windows */
48 static const uint8_t run_value_bits_short[16] = {
49     3, 3, 3, 3, 3, 3, 3, 6, 6, 6, 6, 6, 6, 6, 6, 9
50 };
51
52 static const uint8_t *run_value_bits[2] = {
53     run_value_bits_long, run_value_bits_short
54 };
55
56
57 /**
58  * Quantize one coefficient.
59  * @return absolute value of the quantized coefficient
60  * @see 3GPP TS26.403 5.6.2 "Scalefactor determination"
61  */
62 static av_always_inline int quant(float coef, const float Q)
63 {
64     float a = coef * Q;
65     return sqrtf(a * sqrtf(a)) + 0.4054;
66 }
67
68 static void quantize_bands(int *out, const float *in, const float *scaled,
69                            int size, float Q34, int is_signed, int maxval)
70 {
71     int i;
72     double qc;
73     for (i = 0; i < size; i++) {
74         qc = scaled[i] * Q34;
75         out[i] = (int)FFMIN(qc + 0.4054, (double)maxval);
76         if (is_signed && in[i] < 0.0f) {
77             out[i] = -out[i];
78         }
79     }
80 }
81
82 static void abs_pow34_v(float *out, const float *in, const int size)
83 {
84 #ifndef USE_REALLY_FULL_SEARCH
85     int i;
86     for (i = 0; i < size; i++) {
87         float a = fabsf(in[i]);
88         out[i] = sqrtf(a * sqrtf(a));
89     }
90 #endif /* USE_REALLY_FULL_SEARCH */
91 }
92
93 static const uint8_t aac_cb_range [12] = {0, 3, 3, 3, 3, 9, 9, 8, 8, 13, 13, 17};
94 static const uint8_t aac_cb_maxval[12] = {0, 1, 1, 2, 2, 4, 4, 7, 7, 12, 12, 16};
95
96 /**
97  * Calculate rate distortion cost for quantizing with given codebook
98  *
99  * @return quantization distortion
100  */
101 static float quantize_and_encode_band_cost(struct AACEncContext *s,
102                                 PutBitContext *pb, const float *in,
103                                 const float *scaled, int size, int scale_idx,
104                                 int cb, const float lambda, const float uplim,
105                                 int *bits)
106 {
107     const float IQ = ff_aac_pow2sf_tab[200 + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
108     const float  Q = ff_aac_pow2sf_tab[200 - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
109     const float CLIPPED_ESCAPE = 165140.0f*IQ;
110     int i, j, k;
111     float cost = 0;
112     const int dim = cb < FIRST_PAIR_BT ? 4 : 2;
113     int resbits = 0;
114     const float  Q34 = sqrtf(Q * sqrtf(Q));
115     const int range  = aac_cb_range[cb];
116     const int maxval = aac_cb_maxval[cb];
117     int off;
118
119     if (!cb) {
120         for (i = 0; i < size; i++)
121             cost += in[i]*in[i];
122         if (bits)
123             *bits = 0;
124         return cost * lambda;
125     }
126     if (!scaled) {
127         abs_pow34_v(s->scoefs, in, size);
128         scaled = s->scoefs;
129     }
130     quantize_bands(s->qcoefs, in, scaled, size, Q34, !IS_CODEBOOK_UNSIGNED(cb), maxval);
131     if (IS_CODEBOOK_UNSIGNED(cb)) {
132         off = 0;
133     } else {
134         off = maxval;
135     }
136     for (i = 0; i < size; i += dim) {
137         const float *vec;
138         int *quants = s->qcoefs + i;
139         int curidx = 0;
140         int curbits;
141         float rd = 0.0f;
142         for (j = 0; j < dim; j++) {
143             curidx *= range;
144             curidx += quants[j] + off;
145         }
146             curbits =  ff_aac_spectral_bits[cb-1][curidx];
147             vec     = &ff_aac_codebook_vectors[cb-1][curidx*dim];
148             if (IS_CODEBOOK_UNSIGNED(cb)) {
149                 for (k = 0; k < dim; k++) {
150                     float t = fabsf(in[i+k]);
151                     float di;
152                     if (vec[k] == 64.0f) { //FIXME: slow
153                         if (t >= CLIPPED_ESCAPE) {
154                             di = t - CLIPPED_ESCAPE;
155                             curbits += 21;
156                         } else {
157                             int c = av_clip(quant(t, Q), 0, 8191);
158                             di = t - c*cbrtf(c)*IQ;
159                             curbits += av_log2(c)*2 - 4 + 1;
160                         }
161                     } else {
162                         di = t - vec[k]*IQ;
163                     }
164                     if (vec[k] != 0.0f)
165                         curbits++;
166                     rd += di*di;
167                 }
168             } else {
169                 for (k = 0; k < dim; k++) {
170                     float di = in[i+k] - vec[k]*IQ;
171                     rd += di*di;
172                 }
173             }
174         cost    += rd * lambda + curbits;
175         resbits += curbits;
176         if (cost >= uplim)
177             return uplim;
178         if (pb) {
179         put_bits(pb, ff_aac_spectral_bits[cb-1][curidx], ff_aac_spectral_codes[cb-1][curidx]);
180         if (IS_CODEBOOK_UNSIGNED(cb))
181             for (j = 0; j < dim; j++)
182                 if (ff_aac_codebook_vectors[cb-1][curidx*dim+j] != 0.0f)
183                     put_bits(pb, 1, in[i+j] < 0.0f);
184         if (cb == ESC_BT) {
185             for (j = 0; j < 2; j++) {
186                 if (ff_aac_codebook_vectors[cb-1][curidx*2+j] == 64.0f) {
187                     int coef = av_clip(quant(fabsf(in[i+j]), Q), 0, 8191);
188                     int len = av_log2(coef);
189
190                     put_bits(pb, len - 4 + 1, (1 << (len - 4 + 1)) - 2);
191                     put_bits(pb, len, coef & ((1 << len) - 1));
192                 }
193             }
194         }
195         }
196     }
197
198     if (bits)
199         *bits = resbits;
200     return cost;
201 }
202 static float quantize_band_cost(struct AACEncContext *s, const float *in,
203                                 const float *scaled, int size, int scale_idx,
204                                 int cb, const float lambda, const float uplim,
205                                 int *bits)
206 {
207     return quantize_and_encode_band_cost(s, NULL, in, scaled, size, scale_idx,
208                                          cb, lambda, uplim, bits);
209 }
210
211 static void quantize_and_encode_band(struct AACEncContext *s, PutBitContext *pb,
212                                      const float *in, int size, int scale_idx,
213                                      int cb, const float lambda)
214 {
215     quantize_and_encode_band_cost(s, pb, in, NULL, size, scale_idx, cb, lambda,
216                                   INFINITY, NULL);
217 }
218
219 /**
220  * structure used in optimal codebook search
221  */
222 typedef struct BandCodingPath {
223     int prev_idx; ///< pointer to the previous path point
224     float cost;   ///< path cost
225     int run;
226 } BandCodingPath;
227
228 /**
229  * Encode band info for single window group bands.
230  */
231 static void encode_window_bands_info(AACEncContext *s, SingleChannelElement *sce,
232                                      int win, int group_len, const float lambda)
233 {
234     BandCodingPath path[120][12];
235     int w, swb, cb, start, start2, size;
236     int i, j;
237     const int max_sfb  = sce->ics.max_sfb;
238     const int run_bits = sce->ics.num_windows == 1 ? 5 : 3;
239     const int run_esc  = (1 << run_bits) - 1;
240     int idx, ppos, count;
241     int stackrun[120], stackcb[120], stack_len;
242     float next_minrd = INFINITY;
243     int next_mincb = 0;
244
245     abs_pow34_v(s->scoefs, sce->coeffs, 1024);
246     start = win*128;
247     for (cb = 0; cb < 12; cb++) {
248         path[0][cb].cost     = 0.0f;
249         path[0][cb].prev_idx = -1;
250         path[0][cb].run      = 0;
251     }
252     for (swb = 0; swb < max_sfb; swb++) {
253         start2 = start;
254         size = sce->ics.swb_sizes[swb];
255         if (sce->zeroes[win*16 + swb]) {
256             for (cb = 0; cb < 12; cb++) {
257                 path[swb+1][cb].prev_idx = cb;
258                 path[swb+1][cb].cost     = path[swb][cb].cost;
259                 path[swb+1][cb].run      = path[swb][cb].run + 1;
260             }
261         } else {
262             float minrd = next_minrd;
263             int mincb = next_mincb;
264             next_minrd = INFINITY;
265             next_mincb = 0;
266             for (cb = 0; cb < 12; cb++) {
267                 float cost_stay_here, cost_get_here;
268                 float rd = 0.0f;
269                 for (w = 0; w < group_len; w++) {
270                     FFPsyBand *band = &s->psy.psy_bands[s->cur_channel*PSY_MAX_BANDS+(win+w)*16+swb];
271                     rd += quantize_band_cost(s, sce->coeffs + start + w*128,
272                                              s->scoefs + start + w*128, size,
273                                              sce->sf_idx[(win+w)*16+swb], cb,
274                                              lambda / band->threshold, INFINITY, NULL);
275                 }
276                 cost_stay_here = path[swb][cb].cost + rd;
277                 cost_get_here  = minrd              + rd + run_bits + 4;
278                 if (   run_value_bits[sce->ics.num_windows == 8][path[swb][cb].run]
279                     != run_value_bits[sce->ics.num_windows == 8][path[swb][cb].run+1])
280                     cost_stay_here += run_bits;
281                 if (cost_get_here < cost_stay_here) {
282                     path[swb+1][cb].prev_idx = mincb;
283                     path[swb+1][cb].cost     = cost_get_here;
284                     path[swb+1][cb].run      = 1;
285                 } else {
286                     path[swb+1][cb].prev_idx = cb;
287                     path[swb+1][cb].cost     = cost_stay_here;
288                     path[swb+1][cb].run      = path[swb][cb].run + 1;
289                 }
290                 if (path[swb+1][cb].cost < next_minrd) {
291                     next_minrd = path[swb+1][cb].cost;
292                     next_mincb = cb;
293                 }
294             }
295         }
296         start += sce->ics.swb_sizes[swb];
297     }
298
299     //convert resulting path from backward-linked list
300     stack_len = 0;
301     idx       = 0;
302     for (cb = 1; cb < 12; cb++)
303         if (path[max_sfb][cb].cost < path[max_sfb][idx].cost)
304             idx = cb;
305     ppos = max_sfb;
306     while (ppos > 0) {
307         cb = idx;
308         stackrun[stack_len] = path[ppos][cb].run;
309         stackcb [stack_len] = cb;
310         idx = path[ppos-path[ppos][cb].run+1][cb].prev_idx;
311         ppos -= path[ppos][cb].run;
312         stack_len++;
313     }
314     //perform actual band info encoding
315     start = 0;
316     for (i = stack_len - 1; i >= 0; i--) {
317         put_bits(&s->pb, 4, stackcb[i]);
318         count = stackrun[i];
319         memset(sce->zeroes + win*16 + start, !stackcb[i], count);
320         //XXX: memset when band_type is also uint8_t
321         for (j = 0; j < count; j++) {
322             sce->band_type[win*16 + start] =  stackcb[i];
323             start++;
324         }
325         while (count >= run_esc) {
326             put_bits(&s->pb, run_bits, run_esc);
327             count -= run_esc;
328         }
329         put_bits(&s->pb, run_bits, count);
330     }
331 }
332
333 typedef struct TrellisPath {
334     float cost;
335     int prev;
336     int min_val;
337     int max_val;
338 } TrellisPath;
339
340 #define TRELLIS_STAGES 121
341 #define TRELLIS_STATES 256
342
343 static void search_for_quantizers_anmr(AVCodecContext *avctx, AACEncContext *s,
344                                        SingleChannelElement *sce,
345                                        const float lambda)
346 {
347     int q, w, w2, g, start = 0;
348     int i, j;
349     int idx;
350     TrellisPath paths[TRELLIS_STAGES][TRELLIS_STATES];
351     int bandaddr[TRELLIS_STAGES];
352     int minq;
353     float mincost;
354
355     for (i = 0; i < TRELLIS_STATES; i++) {
356         paths[0][i].cost    = 0.0f;
357         paths[0][i].prev    = -1;
358         paths[0][i].min_val = i;
359         paths[0][i].max_val = i;
360     }
361     for (j = 1; j < TRELLIS_STAGES; j++) {
362         for (i = 0; i < TRELLIS_STATES; i++) {
363             paths[j][i].cost    = INFINITY;
364             paths[j][i].prev    = -2;
365             paths[j][i].min_val = INT_MAX;
366             paths[j][i].max_val = 0;
367         }
368     }
369     idx = 1;
370     abs_pow34_v(s->scoefs, sce->coeffs, 1024);
371     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
372         start = w*128;
373         for (g = 0; g < sce->ics.num_swb; g++) {
374             const float *coefs = sce->coeffs + start;
375             float qmin, qmax;
376             int nz = 0;
377
378             bandaddr[idx] = w * 16 + g;
379             qmin = INT_MAX;
380             qmax = 0.0f;
381             for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
382                 FFPsyBand *band = &s->psy.psy_bands[s->cur_channel*PSY_MAX_BANDS+(w+w2)*16+g];
383                 if (band->energy <= band->threshold || band->threshold == 0.0f) {
384                     sce->zeroes[(w+w2)*16+g] = 1;
385                     continue;
386                 }
387                 sce->zeroes[(w+w2)*16+g] = 0;
388                 nz = 1;
389                 for (i = 0; i < sce->ics.swb_sizes[g]; i++) {
390                     float t = fabsf(coefs[w2*128+i]);
391                     if (t > 0.0f)
392                         qmin = FFMIN(qmin, t);
393                     qmax = FFMAX(qmax, t);
394                 }
395             }
396             if (nz) {
397                 int minscale, maxscale;
398                 float minrd = INFINITY;
399                 //minimum scalefactor index is when minimum nonzero coefficient after quantizing is not clipped
400                 minscale = av_clip_uint8(log2(qmin)*4 - 69 + SCALE_ONE_POS - SCALE_DIV_512);
401                 //maximum scalefactor index is when maximum coefficient after quantizing is still not zero
402                 maxscale = av_clip_uint8(log2(qmax)*4 +  6 + SCALE_ONE_POS - SCALE_DIV_512);
403                 for (q = minscale; q < maxscale; q++) {
404                     float dists[12], dist;
405                     memset(dists, 0, sizeof(dists));
406                     for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
407                         FFPsyBand *band = &s->psy.psy_bands[s->cur_channel*PSY_MAX_BANDS+(w+w2)*16+g];
408                         int cb;
409                         for (cb = 0; cb <= ESC_BT; cb++)
410                             dists[cb] += quantize_band_cost(s, coefs + w2*128, s->scoefs + start + w2*128, sce->ics.swb_sizes[g],
411                                                             q, cb, lambda / band->threshold, INFINITY, NULL);
412                     }
413                     dist = dists[0];
414                     for (i = 1; i <= ESC_BT; i++)
415                         dist = FFMIN(dist, dists[i]);
416                     minrd = FFMIN(minrd, dist);
417
418                     for (i = FFMAX(q - SCALE_MAX_DIFF, 0); i < FFMIN(q + SCALE_MAX_DIFF, TRELLIS_STATES); i++) {
419                         float cost;
420                         int minv, maxv;
421                         if (isinf(paths[idx - 1][i].cost))
422                             continue;
423                         cost = paths[idx - 1][i].cost + dist
424                                + ff_aac_scalefactor_bits[q - i + SCALE_DIFF_ZERO];
425                         minv = FFMIN(paths[idx - 1][i].min_val, q);
426                         maxv = FFMAX(paths[idx - 1][i].max_val, q);
427                         if (cost < paths[idx][q].cost && maxv-minv < SCALE_MAX_DIFF) {
428                             paths[idx][q].cost    = cost;
429                             paths[idx][q].prev    = i;
430                             paths[idx][q].min_val = minv;
431                             paths[idx][q].max_val = maxv;
432                         }
433                     }
434                 }
435             } else {
436                 for (q = 0; q < TRELLIS_STATES; q++) {
437                     if (!isinf(paths[idx - 1][q].cost)) {
438                         paths[idx][q].cost = paths[idx - 1][q].cost + 1;
439                         paths[idx][q].prev = q;
440                         paths[idx][q].min_val = FFMIN(paths[idx - 1][q].min_val, q);
441                         paths[idx][q].max_val = FFMAX(paths[idx - 1][q].max_val, q);
442                         continue;
443                     }
444                     for (i = FFMAX(q - SCALE_MAX_DIFF, 0); i < FFMIN(q + SCALE_MAX_DIFF, TRELLIS_STATES); i++) {
445                         float cost;
446                         int minv, maxv;
447                         if (isinf(paths[idx - 1][i].cost))
448                             continue;
449                         cost = paths[idx - 1][i].cost + ff_aac_scalefactor_bits[q - i + SCALE_DIFF_ZERO];
450                         minv = FFMIN(paths[idx - 1][i].min_val, q);
451                         maxv = FFMAX(paths[idx - 1][i].max_val, q);
452                         if (cost < paths[idx][q].cost && maxv-minv < SCALE_MAX_DIFF) {
453                             paths[idx][q].cost    = cost;
454                             paths[idx][q].prev    = i;
455                             paths[idx][q].min_val = minv;
456                             paths[idx][q].max_val = maxv;
457                         }
458                     }
459                 }
460             }
461             sce->zeroes[w*16+g] = !nz;
462             start += sce->ics.swb_sizes[g];
463             idx++;
464         }
465     }
466     idx--;
467     mincost = paths[idx][0].cost;
468     minq    = 0;
469     for (i = 1; i < TRELLIS_STATES; i++) {
470         if (paths[idx][i].cost < mincost) {
471             mincost = paths[idx][i].cost;
472             minq = i;
473         }
474     }
475     while (idx) {
476         sce->sf_idx[bandaddr[idx]] = minq;
477         minq = paths[idx][minq].prev;
478         idx--;
479     }
480     //set the same quantizers inside window groups
481     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w])
482         for (g = 0;  g < sce->ics.num_swb; g++)
483             for (w2 = 1; w2 < sce->ics.group_len[w]; w2++)
484                 sce->sf_idx[(w+w2)*16+g] = sce->sf_idx[w*16+g];
485 }
486
487 /**
488  * two-loop quantizers search taken from ISO 13818-7 Appendix C
489  */
490 static void search_for_quantizers_twoloop(AVCodecContext *avctx,
491                                           AACEncContext *s,
492                                           SingleChannelElement *sce,
493                                           const float lambda)
494 {
495     int start = 0, i, w, w2, g;
496     int destbits = avctx->bit_rate * 1024.0 / avctx->sample_rate / avctx->channels;
497     float dists[128], uplims[128];
498     int fflag, minscaler;
499     int its  = 0;
500     int allz = 0;
501     float minthr = INFINITY;
502
503     //XXX: some heuristic to determine initial quantizers will reduce search time
504     memset(dists, 0, sizeof(dists));
505     //determine zero bands and upper limits
506     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
507         for (g = 0;  g < sce->ics.num_swb; g++) {
508             int nz = 0;
509             float uplim = 0.0f;
510             for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
511                 FFPsyBand *band = &s->psy.psy_bands[s->cur_channel*PSY_MAX_BANDS+(w+w2)*16+g];
512                 uplim += band->threshold;
513                 if (band->energy <= band->threshold || band->threshold == 0.0f) {
514                     sce->zeroes[(w+w2)*16+g] = 1;
515                     continue;
516                 }
517                 nz = 1;
518             }
519             uplims[w*16+g] = uplim *512;
520             sce->zeroes[w*16+g] = !nz;
521             if (nz)
522                 minthr = FFMIN(minthr, uplim);
523             allz = FFMAX(allz, nz);
524         }
525     }
526     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
527         for (g = 0;  g < sce->ics.num_swb; g++) {
528             if (sce->zeroes[w*16+g]) {
529                 sce->sf_idx[w*16+g] = SCALE_ONE_POS;
530                 continue;
531             }
532             sce->sf_idx[w*16+g] = SCALE_ONE_POS + FFMIN(log2(uplims[w*16+g]/minthr)*4,59);
533         }
534     }
535
536     if (!allz)
537         return;
538     abs_pow34_v(s->scoefs, sce->coeffs, 1024);
539     //perform two-loop search
540     //outer loop - improve quality
541     do {
542         int tbits, qstep;
543         minscaler = sce->sf_idx[0];
544         //inner loop - quantize spectrum to fit into given number of bits
545         qstep = its ? 1 : 32;
546         do {
547             int prev = -1;
548             tbits = 0;
549             fflag = 0;
550             for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
551                 start = w*128;
552                 for (g = 0;  g < sce->ics.num_swb; g++) {
553                     const float *coefs = sce->coeffs + start;
554                     const float *scaled = s->scoefs + start;
555                     int bits = 0;
556                     int cb;
557                     float mindist = INFINITY;
558                     int minbits = 0;
559
560                     if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >= 218) {
561                         start += sce->ics.swb_sizes[g];
562                         continue;
563                     }
564                     minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]);
565                     {
566                         float dist = 0.0f;
567                         int bb = 0;
568                         float maxval = 0.0f;
569                         float Q = ff_aac_pow2sf_tab[200 - sce->sf_idx[w*16+g] + SCALE_ONE_POS - SCALE_DIV_512];
570                         float Q34 = sqrtf(Q * sqrtf(Q));
571                         int qmaxval;
572                         for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
573                             for (i = 0; i < sce->ics.swb_sizes[g]; i++) {
574                                 maxval = fmaxf(maxval, scaled[w2*128+i]);
575                             }
576                         }
577                         qmaxval = maxval * Q34 + 0.4054;
578                         if      (qmaxval ==  0) cb = 0;
579                         else if (qmaxval ==  1) cb = 1;
580                         else if (qmaxval ==  2) cb = 3;
581                         else if (qmaxval <=  4) cb = 5;
582                         else if (qmaxval <=  7) cb = 7;
583                         else if (qmaxval <= 12) cb = 9;
584                         else                    cb = 11;
585                         for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
586                             int b;
587                             dist += quantize_band_cost(s, coefs + w2*128,
588                                                        scaled + w2*128,
589                                                        sce->ics.swb_sizes[g],
590                                                        sce->sf_idx[w*16+g],
591                                                        cb,
592                                                        lambda,
593                                                        INFINITY,
594                                                        &b);
595                             bb += b;
596                         }
597                             mindist = dist;
598                             minbits = bb;
599                     }
600                     dists[w*16+g] = (mindist - minbits) / lambda;
601                     bits = minbits;
602                     if (prev != -1) {
603                         bits += ff_aac_scalefactor_bits[sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO];
604                     }
605                     tbits += bits;
606                     start += sce->ics.swb_sizes[g];
607                     prev = sce->sf_idx[w*16+g];
608                 }
609             }
610             if (tbits > destbits) {
611                 for (i = 0; i < 128; i++)
612                     if (sce->sf_idx[i] < 218 - qstep)
613                         sce->sf_idx[i] += qstep;
614             } else {
615                 for (i = 0; i < 128; i++)
616                     if (sce->sf_idx[i] > 60 - qstep)
617                         sce->sf_idx[i] -= qstep;
618             }
619             qstep >>= 1;
620             if (!qstep && tbits > destbits*1.02)
621                 qstep = 1;
622             if (sce->sf_idx[0] >= 217)
623                 break;
624         } while (qstep);
625
626         fflag = 0;
627         minscaler = av_clip(minscaler, 60, 255 - SCALE_MAX_DIFF);
628         for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
629             start = w*128;
630             for (g = 0; g < sce->ics.num_swb; g++) {
631                 int prevsc = sce->sf_idx[w*16+g];
632                 if (dists[w*16+g] > uplims[w*16+g] && sce->sf_idx[w*16+g] > 60)
633                     sce->sf_idx[w*16+g]--;
634                 sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], minscaler, minscaler + SCALE_MAX_DIFF);
635                 sce->sf_idx[w*16+g] = FFMIN(sce->sf_idx[w*16+g], 219);
636                 if (sce->sf_idx[w*16+g] != prevsc)
637                     fflag = 1;
638             }
639         }
640         its++;
641     } while (fflag && its < 10);
642 }
643
644 static void search_for_quantizers_faac(AVCodecContext *avctx, AACEncContext *s,
645                                        SingleChannelElement *sce,
646                                        const float lambda)
647 {
648     int start = 0, i, w, w2, g;
649     float uplim[128], maxq[128];
650     int minq, maxsf;
651     float distfact = ((sce->ics.num_windows > 1) ? 85.80 : 147.84) / lambda;
652     int last = 0, lastband = 0, curband = 0;
653     float avg_energy = 0.0;
654     if (sce->ics.num_windows == 1) {
655         start = 0;
656         for (i = 0; i < 1024; i++) {
657             if (i - start >= sce->ics.swb_sizes[curband]) {
658                 start += sce->ics.swb_sizes[curband];
659                 curband++;
660             }
661             if (sce->coeffs[i]) {
662                 avg_energy += sce->coeffs[i] * sce->coeffs[i];
663                 last = i;
664                 lastband = curband;
665             }
666         }
667     } else {
668         for (w = 0; w < 8; w++) {
669             const float *coeffs = sce->coeffs + w*128;
670             start = 0;
671             for (i = 0; i < 128; i++) {
672                 if (i - start >= sce->ics.swb_sizes[curband]) {
673                     start += sce->ics.swb_sizes[curband];
674                     curband++;
675                 }
676                 if (coeffs[i]) {
677                     avg_energy += coeffs[i] * coeffs[i];
678                     last = FFMAX(last, i);
679                     lastband = FFMAX(lastband, curband);
680                 }
681             }
682         }
683     }
684     last++;
685     avg_energy /= last;
686     if (avg_energy == 0.0f) {
687         for (i = 0; i < FF_ARRAY_ELEMS(sce->sf_idx); i++)
688             sce->sf_idx[i] = SCALE_ONE_POS;
689         return;
690     }
691     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
692         start = w*128;
693         for (g = 0; g < sce->ics.num_swb; g++) {
694             float *coefs   = sce->coeffs + start;
695             const int size = sce->ics.swb_sizes[g];
696             int start2 = start, end2 = start + size, peakpos = start;
697             float maxval = -1, thr = 0.0f, t;
698             maxq[w*16+g] = 0.0f;
699             if (g > lastband) {
700                 maxq[w*16+g] = 0.0f;
701                 start += size;
702                 for (w2 = 0; w2 < sce->ics.group_len[w]; w2++)
703                     memset(coefs + w2*128, 0, sizeof(coefs[0])*size);
704                 continue;
705             }
706             for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
707                 for (i = 0; i < size; i++) {
708                     float t = coefs[w2*128+i]*coefs[w2*128+i];
709                     maxq[w*16+g] = FFMAX(maxq[w*16+g], fabsf(coefs[w2*128 + i]));
710                     thr += t;
711                     if (sce->ics.num_windows == 1 && maxval < t) {
712                         maxval  = t;
713                         peakpos = start+i;
714                     }
715                 }
716             }
717             if (sce->ics.num_windows == 1) {
718                 start2 = FFMAX(peakpos - 2, start2);
719                 end2   = FFMIN(peakpos + 3, end2);
720             } else {
721                 start2 -= start;
722                 end2   -= start;
723             }
724             start += size;
725             thr = pow(thr / (avg_energy * (end2 - start2)), 0.3 + 0.1*(lastband - g) / lastband);
726             t   = 1.0 - (1.0 * start2 / last);
727             uplim[w*16+g] = distfact / (1.4 * thr + t*t*t + 0.075);
728         }
729     }
730     memset(sce->sf_idx, 0, sizeof(sce->sf_idx));
731     abs_pow34_v(s->scoefs, sce->coeffs, 1024);
732     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
733         start = w*128;
734         for (g = 0;  g < sce->ics.num_swb; g++) {
735             const float *coefs  = sce->coeffs + start;
736             const float *scaled = s->scoefs   + start;
737             const int size      = sce->ics.swb_sizes[g];
738             int scf, prev_scf, step;
739             int min_scf = -1, max_scf = 256;
740             float curdiff;
741             if (maxq[w*16+g] < 21.544) {
742                 sce->zeroes[w*16+g] = 1;
743                 start += size;
744                 continue;
745             }
746             sce->zeroes[w*16+g] = 0;
747             scf  = prev_scf = av_clip(SCALE_ONE_POS - SCALE_DIV_512 - log2(1/maxq[w*16+g])*16/3, 60, 218);
748             step = 16;
749             for (;;) {
750                 float dist = 0.0f;
751                 int quant_max;
752
753                 for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
754                     int b;
755                     dist += quantize_band_cost(s, coefs + w2*128,
756                                                scaled + w2*128,
757                                                sce->ics.swb_sizes[g],
758                                                scf,
759                                                ESC_BT,
760                                                lambda,
761                                                INFINITY,
762                                                &b);
763                     dist -= b;
764                 }
765                 dist *= 1.0f / 512.0f / lambda;
766                 quant_max = quant(maxq[w*16+g], ff_aac_pow2sf_tab[200 - scf + SCALE_ONE_POS - SCALE_DIV_512]);
767                 if (quant_max >= 8191) { // too much, return to the previous quantizer
768                     sce->sf_idx[w*16+g] = prev_scf;
769                     break;
770                 }
771                 prev_scf = scf;
772                 curdiff = fabsf(dist - uplim[w*16+g]);
773                 if (curdiff <= 1.0f)
774                     step = 0;
775                 else
776                     step = log2(curdiff);
777                 if (dist > uplim[w*16+g])
778                     step = -step;
779                 scf += step;
780                 scf = av_clip_uint8(scf);
781                 step = scf - prev_scf;
782                 if (FFABS(step) <= 1 || (step > 0 && scf >= max_scf) || (step < 0 && scf <= min_scf)) {
783                     sce->sf_idx[w*16+g] = av_clip(scf, min_scf, max_scf);
784                     break;
785                 }
786                 if (step > 0)
787                     min_scf = prev_scf;
788                 else
789                     max_scf = prev_scf;
790             }
791             start += size;
792         }
793     }
794     minq = sce->sf_idx[0] ? sce->sf_idx[0] : INT_MAX;
795     for (i = 1; i < 128; i++) {
796         if (!sce->sf_idx[i])
797             sce->sf_idx[i] = sce->sf_idx[i-1];
798         else
799             minq = FFMIN(minq, sce->sf_idx[i]);
800     }
801     if (minq == INT_MAX)
802         minq = 0;
803     minq = FFMIN(minq, SCALE_MAX_POS);
804     maxsf = FFMIN(minq + SCALE_MAX_DIFF, SCALE_MAX_POS);
805     for (i = 126; i >= 0; i--) {
806         if (!sce->sf_idx[i])
807             sce->sf_idx[i] = sce->sf_idx[i+1];
808         sce->sf_idx[i] = av_clip(sce->sf_idx[i], minq, maxsf);
809     }
810 }
811
812 static void search_for_quantizers_fast(AVCodecContext *avctx, AACEncContext *s,
813                                        SingleChannelElement *sce,
814                                        const float lambda)
815 {
816     int start = 0, i, w, w2, g;
817     int minq = 255;
818
819     memset(sce->sf_idx, 0, sizeof(sce->sf_idx));
820     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
821         start = w*128;
822         for (g = 0; g < sce->ics.num_swb; g++) {
823             for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
824                 FFPsyBand *band = &s->psy.psy_bands[s->cur_channel*PSY_MAX_BANDS+(w+w2)*16+g];
825                 if (band->energy <= band->threshold) {
826                     sce->sf_idx[(w+w2)*16+g] = 218;
827                     sce->zeroes[(w+w2)*16+g] = 1;
828                 } else {
829                     sce->sf_idx[(w+w2)*16+g] = av_clip(SCALE_ONE_POS - SCALE_DIV_512 + log2(band->threshold), 80, 218);
830                     sce->zeroes[(w+w2)*16+g] = 0;
831                 }
832                 minq = FFMIN(minq, sce->sf_idx[(w+w2)*16+g]);
833             }
834         }
835     }
836     for (i = 0; i < 128; i++) {
837         sce->sf_idx[i] = 140;
838         //av_clip(sce->sf_idx[i], minq, minq + SCALE_MAX_DIFF - 1);
839     }
840     //set the same quantizers inside window groups
841     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w])
842         for (g = 0;  g < sce->ics.num_swb; g++)
843             for (w2 = 1; w2 < sce->ics.group_len[w]; w2++)
844                 sce->sf_idx[(w+w2)*16+g] = sce->sf_idx[w*16+g];
845 }
846
847 static void search_for_ms(AACEncContext *s, ChannelElement *cpe,
848                           const float lambda)
849 {
850     int start = 0, i, w, w2, g;
851     float M[128], S[128];
852     float *L34 = s->scoefs, *R34 = s->scoefs + 128, *M34 = s->scoefs + 128*2, *S34 = s->scoefs + 128*3;
853     SingleChannelElement *sce0 = &cpe->ch[0];
854     SingleChannelElement *sce1 = &cpe->ch[1];
855     if (!cpe->common_window)
856         return;
857     for (w = 0; w < sce0->ics.num_windows; w += sce0->ics.group_len[w]) {
858         for (g = 0;  g < sce0->ics.num_swb; g++) {
859             if (!cpe->ch[0].zeroes[w*16+g] && !cpe->ch[1].zeroes[w*16+g]) {
860                 float dist1 = 0.0f, dist2 = 0.0f;
861                 for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) {
862                     FFPsyBand *band0 = &s->psy.psy_bands[(s->cur_channel+0)*PSY_MAX_BANDS+(w+w2)*16+g];
863                     FFPsyBand *band1 = &s->psy.psy_bands[(s->cur_channel+1)*PSY_MAX_BANDS+(w+w2)*16+g];
864                     float minthr = FFMIN(band0->threshold, band1->threshold);
865                     float maxthr = FFMAX(band0->threshold, band1->threshold);
866                     for (i = 0; i < sce0->ics.swb_sizes[g]; i++) {
867                         M[i] = (sce0->coeffs[start+w2*128+i]
868                               + sce1->coeffs[start+w2*128+i]) * 0.5;
869                         S[i] =  sce0->coeffs[start+w2*128+i]
870                               - sce1->coeffs[start+w2*128+i];
871                     }
872                     abs_pow34_v(L34, sce0->coeffs+start+w2*128, sce0->ics.swb_sizes[g]);
873                     abs_pow34_v(R34, sce1->coeffs+start+w2*128, sce0->ics.swb_sizes[g]);
874                     abs_pow34_v(M34, M,                         sce0->ics.swb_sizes[g]);
875                     abs_pow34_v(S34, S,                         sce0->ics.swb_sizes[g]);
876                     dist1 += quantize_band_cost(s, sce0->coeffs + start + w2*128,
877                                                 L34,
878                                                 sce0->ics.swb_sizes[g],
879                                                 sce0->sf_idx[(w+w2)*16+g],
880                                                 sce0->band_type[(w+w2)*16+g],
881                                                 lambda / band0->threshold, INFINITY, NULL);
882                     dist1 += quantize_band_cost(s, sce1->coeffs + start + w2*128,
883                                                 R34,
884                                                 sce1->ics.swb_sizes[g],
885                                                 sce1->sf_idx[(w+w2)*16+g],
886                                                 sce1->band_type[(w+w2)*16+g],
887                                                 lambda / band1->threshold, INFINITY, NULL);
888                     dist2 += quantize_band_cost(s, M,
889                                                 M34,
890                                                 sce0->ics.swb_sizes[g],
891                                                 sce0->sf_idx[(w+w2)*16+g],
892                                                 sce0->band_type[(w+w2)*16+g],
893                                                 lambda / maxthr, INFINITY, NULL);
894                     dist2 += quantize_band_cost(s, S,
895                                                 S34,
896                                                 sce1->ics.swb_sizes[g],
897                                                 sce1->sf_idx[(w+w2)*16+g],
898                                                 sce1->band_type[(w+w2)*16+g],
899                                                 lambda / minthr, INFINITY, NULL);
900                 }
901                 cpe->ms_mask[w*16+g] = dist2 < dist1;
902             }
903             start += sce0->ics.swb_sizes[g];
904         }
905     }
906 }
907
908 AACCoefficientsEncoder ff_aac_coders[] = {
909     {
910         search_for_quantizers_faac,
911         encode_window_bands_info,
912         quantize_and_encode_band,
913         search_for_ms,
914     },
915     {
916         search_for_quantizers_anmr,
917         encode_window_bands_info,
918         quantize_and_encode_band,
919         search_for_ms,
920     },
921     {
922         search_for_quantizers_twoloop,
923         encode_window_bands_info,
924         quantize_and_encode_band,
925         search_for_ms,
926     },
927     {
928         search_for_quantizers_fast,
929         encode_window_bands_info,
930         quantize_and_encode_band,
931         search_for_ms,
932     },
933 };