]> git.sesse.net Git - ffmpeg/blob - libavcodec/aaccoder.c
fmaxf -> FFMAX to fix pre-C99 systems
[ffmpeg] / libavcodec / aaccoder.c
1 /*
2  * AAC coefficients encoder
3  * Copyright (C) 2008-2009 Konstantin Shishkov
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21
22 /**
23  * @file
24  * AAC coefficients encoder
25  */
26
27 /***********************************
28  *              TODOs:
29  * speedup quantizer selection
30  * add sane pulse detection
31  ***********************************/
32
33 #include "avcodec.h"
34 #include "put_bits.h"
35 #include "aac.h"
36 #include "aacenc.h"
37 #include "aactab.h"
38
39 /** bits needed to code codebook run value for long windows */
40 static const uint8_t run_value_bits_long[64] = {
41      5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
42      5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5, 10,
43     10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
44     10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 15
45 };
46
47 /** bits needed to code codebook run value for short windows */
48 static const uint8_t run_value_bits_short[16] = {
49     3, 3, 3, 3, 3, 3, 3, 6, 6, 6, 6, 6, 6, 6, 6, 9
50 };
51
52 static const uint8_t *run_value_bits[2] = {
53     run_value_bits_long, run_value_bits_short
54 };
55
56
57 /**
58  * Quantize one coefficient.
59  * @return absolute value of the quantized coefficient
60  * @see 3GPP TS26.403 5.6.2 "Scalefactor determination"
61  */
62 static av_always_inline int quant(float coef, const float Q)
63 {
64     float a = coef * Q;
65     return sqrtf(a * sqrtf(a)) + 0.4054;
66 }
67
68 static void quantize_bands(int *out, const float *in, const float *scaled,
69                            int size, float Q34, int is_signed, int maxval)
70 {
71     int i;
72     double qc;
73     for (i = 0; i < size; i++) {
74         qc = scaled[i] * Q34;
75         out[i] = (int)FFMIN(qc + 0.4054, (double)maxval);
76         if (is_signed && in[i] < 0.0f) {
77             out[i] = -out[i];
78         }
79     }
80 }
81
82 static void abs_pow34_v(float *out, const float *in, const int size)
83 {
84 #ifndef USE_REALLY_FULL_SEARCH
85     int i;
86     for (i = 0; i < size; i++) {
87         float a = fabsf(in[i]);
88         out[i] = sqrtf(a * sqrtf(a));
89     }
90 #endif /* USE_REALLY_FULL_SEARCH */
91 }
92
93 static const uint8_t aac_cb_range [12] = {0, 3, 3, 3, 3, 9, 9, 8, 8, 13, 13, 17};
94 static const uint8_t aac_cb_maxval[12] = {0, 1, 1, 2, 2, 4, 4, 7, 7, 12, 12, 16};
95
96 /**
97  * Calculate rate distortion cost for quantizing with given codebook
98  *
99  * @return quantization distortion
100  */
101 static float quantize_and_encode_band_cost(struct AACEncContext *s,
102                                 PutBitContext *pb, const float *in,
103                                 const float *scaled, int size, int scale_idx,
104                                 int cb, const float lambda, const float uplim,
105                                 int *bits)
106 {
107     const float IQ = ff_aac_pow2sf_tab[200 + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
108     const float  Q = ff_aac_pow2sf_tab[200 - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
109     const float CLIPPED_ESCAPE = 165140.0f*IQ;
110     int i, j, k;
111     float cost = 0;
112     const int dim = cb < FIRST_PAIR_BT ? 4 : 2;
113     int resbits = 0;
114     const float  Q34 = sqrtf(Q * sqrtf(Q));
115     const int range  = aac_cb_range[cb];
116     const int maxval = aac_cb_maxval[cb];
117     int off;
118
119     if (!cb) {
120         for (i = 0; i < size; i++)
121             cost += in[i]*in[i];
122         if (bits)
123             *bits = 0;
124         return cost * lambda;
125     }
126     if (!scaled) {
127         abs_pow34_v(s->scoefs, in, size);
128         scaled = s->scoefs;
129     }
130     quantize_bands(s->qcoefs, in, scaled, size, Q34, !IS_CODEBOOK_UNSIGNED(cb), maxval);
131     if (IS_CODEBOOK_UNSIGNED(cb)) {
132         off = 0;
133     } else {
134         off = maxval;
135     }
136     for (i = 0; i < size; i += dim) {
137         const float *vec;
138         int *quants = s->qcoefs + i;
139         int curidx = 0;
140         int curbits;
141         float rd = 0.0f;
142         for (j = 0; j < dim; j++) {
143             curidx *= range;
144             curidx += quants[j] + off;
145         }
146             curbits =  ff_aac_spectral_bits[cb-1][curidx];
147             vec     = &ff_aac_codebook_vectors[cb-1][curidx*dim];
148             if (IS_CODEBOOK_UNSIGNED(cb)) {
149                 for (k = 0; k < dim; k++) {
150                     float t = fabsf(in[i+k]);
151                     float di;
152                     if (vec[k] == 64.0f) { //FIXME: slow
153                         if (t >= CLIPPED_ESCAPE) {
154                             di = t - CLIPPED_ESCAPE;
155                             curbits += 21;
156                         } else {
157                             int c = av_clip(quant(t, Q), 0, 8191);
158                             di = t - c*cbrtf(c)*IQ;
159                             curbits += av_log2(c)*2 - 4 + 1;
160                         }
161                     } else {
162                         di = t - vec[k]*IQ;
163                     }
164                     if (vec[k] != 0.0f)
165                         curbits++;
166                     rd += di*di;
167                 }
168             } else {
169                 for (k = 0; k < dim; k++) {
170                     float di = in[i+k] - vec[k]*IQ;
171                     rd += di*di;
172                 }
173             }
174         cost    += rd * lambda + curbits;
175         resbits += curbits;
176         if (cost >= uplim)
177             return uplim;
178         if (pb) {
179         put_bits(pb, ff_aac_spectral_bits[cb-1][curidx], ff_aac_spectral_codes[cb-1][curidx]);
180         if (IS_CODEBOOK_UNSIGNED(cb))
181             for (j = 0; j < dim; j++)
182                 if (ff_aac_codebook_vectors[cb-1][curidx*dim+j] != 0.0f)
183                     put_bits(pb, 1, in[i+j] < 0.0f);
184         if (cb == ESC_BT) {
185             for (j = 0; j < 2; j++) {
186                 if (ff_aac_codebook_vectors[cb-1][curidx*2+j] == 64.0f) {
187                     int coef = av_clip(quant(fabsf(in[i+j]), Q), 0, 8191);
188                     int len = av_log2(coef);
189
190                     put_bits(pb, len - 4 + 1, (1 << (len - 4 + 1)) - 2);
191                     put_bits(pb, len, coef & ((1 << len) - 1));
192                 }
193             }
194         }
195         }
196     }
197
198     if (bits)
199         *bits = resbits;
200     return cost;
201 }
202 static float quantize_band_cost(struct AACEncContext *s, const float *in,
203                                 const float *scaled, int size, int scale_idx,
204                                 int cb, const float lambda, const float uplim,
205                                 int *bits)
206 {
207     return quantize_and_encode_band_cost(s, NULL, in, scaled, size, scale_idx,
208                                          cb, lambda, uplim, bits);
209 }
210
211 static void quantize_and_encode_band(struct AACEncContext *s, PutBitContext *pb,
212                                      const float *in, int size, int scale_idx,
213                                      int cb, const float lambda)
214 {
215     quantize_and_encode_band_cost(s, pb, in, NULL, size, scale_idx, cb, lambda,
216                                   INFINITY, NULL);
217 }
218
219 /**
220  * structure used in optimal codebook search
221  */
222 typedef struct BandCodingPath {
223     int prev_idx; ///< pointer to the previous path point
224     float cost;   ///< path cost
225     int run;
226 } BandCodingPath;
227
228 /**
229  * Encode band info for single window group bands.
230  */
231 static void encode_window_bands_info(AACEncContext *s, SingleChannelElement *sce,
232                                      int win, int group_len, const float lambda)
233 {
234     BandCodingPath path[120][12];
235     int w, swb, cb, start, start2, size;
236     int i, j;
237     const int max_sfb  = sce->ics.max_sfb;
238     const int run_bits = sce->ics.num_windows == 1 ? 5 : 3;
239     const int run_esc  = (1 << run_bits) - 1;
240     int idx, ppos, count;
241     int stackrun[120], stackcb[120], stack_len;
242     float next_minrd = INFINITY;
243     int next_mincb = 0;
244
245     abs_pow34_v(s->scoefs, sce->coeffs, 1024);
246     start = win*128;
247     for (cb = 0; cb < 12; cb++) {
248         path[0][cb].cost     = 0.0f;
249         path[0][cb].prev_idx = -1;
250         path[0][cb].run      = 0;
251     }
252     for (swb = 0; swb < max_sfb; swb++) {
253         start2 = start;
254         size = sce->ics.swb_sizes[swb];
255         if (sce->zeroes[win*16 + swb]) {
256             for (cb = 0; cb < 12; cb++) {
257                 path[swb+1][cb].prev_idx = cb;
258                 path[swb+1][cb].cost     = path[swb][cb].cost;
259                 path[swb+1][cb].run      = path[swb][cb].run + 1;
260             }
261         } else {
262             float minrd = next_minrd;
263             int mincb = next_mincb;
264             next_minrd = INFINITY;
265             next_mincb = 0;
266             for (cb = 0; cb < 12; cb++) {
267                 float cost_stay_here, cost_get_here;
268                 float rd = 0.0f;
269                 for (w = 0; w < group_len; w++) {
270                     FFPsyBand *band = &s->psy.psy_bands[s->cur_channel*PSY_MAX_BANDS+(win+w)*16+swb];
271                     rd += quantize_band_cost(s, sce->coeffs + start + w*128,
272                                              s->scoefs + start + w*128, size,
273                                              sce->sf_idx[(win+w)*16+swb], cb,
274                                              lambda / band->threshold, INFINITY, NULL);
275                 }
276                 cost_stay_here = path[swb][cb].cost + rd;
277                 cost_get_here  = minrd              + rd + run_bits + 4;
278                 if (   run_value_bits[sce->ics.num_windows == 8][path[swb][cb].run]
279                     != run_value_bits[sce->ics.num_windows == 8][path[swb][cb].run+1])
280                     cost_stay_here += run_bits;
281                 if (cost_get_here < cost_stay_here) {
282                     path[swb+1][cb].prev_idx = mincb;
283                     path[swb+1][cb].cost     = cost_get_here;
284                     path[swb+1][cb].run      = 1;
285                 } else {
286                     path[swb+1][cb].prev_idx = cb;
287                     path[swb+1][cb].cost     = cost_stay_here;
288                     path[swb+1][cb].run      = path[swb][cb].run + 1;
289                 }
290                 if (path[swb+1][cb].cost < next_minrd) {
291                     next_minrd = path[swb+1][cb].cost;
292                     next_mincb = cb;
293                 }
294             }
295         }
296         start += sce->ics.swb_sizes[swb];
297     }
298
299     //convert resulting path from backward-linked list
300     stack_len = 0;
301     idx       = 0;
302     for (cb = 1; cb < 12; cb++)
303         if (path[max_sfb][cb].cost < path[max_sfb][idx].cost)
304             idx = cb;
305     ppos = max_sfb;
306     while (ppos > 0) {
307         cb = idx;
308         stackrun[stack_len] = path[ppos][cb].run;
309         stackcb [stack_len] = cb;
310         idx = path[ppos-path[ppos][cb].run+1][cb].prev_idx;
311         ppos -= path[ppos][cb].run;
312         stack_len++;
313     }
314     //perform actual band info encoding
315     start = 0;
316     for (i = stack_len - 1; i >= 0; i--) {
317         put_bits(&s->pb, 4, stackcb[i]);
318         count = stackrun[i];
319         memset(sce->zeroes + win*16 + start, !stackcb[i], count);
320         //XXX: memset when band_type is also uint8_t
321         for (j = 0; j < count; j++) {
322             sce->band_type[win*16 + start] =  stackcb[i];
323             start++;
324         }
325         while (count >= run_esc) {
326             put_bits(&s->pb, run_bits, run_esc);
327             count -= run_esc;
328         }
329         put_bits(&s->pb, run_bits, count);
330     }
331 }
332
333 static void codebook_trellis_rate(AACEncContext *s, SingleChannelElement *sce,
334                                   int win, int group_len, const float lambda)
335 {
336     BandCodingPath path[120][12];
337     int w, swb, cb, start, start2, size;
338     int i, j;
339     const int max_sfb  = sce->ics.max_sfb;
340     const int run_bits = sce->ics.num_windows == 1 ? 5 : 3;
341     const int run_esc  = (1 << run_bits) - 1;
342     int idx, ppos, count;
343     int stackrun[120], stackcb[120], stack_len;
344     float next_minrd = INFINITY;
345     int next_mincb = 0;
346
347     abs_pow34_v(s->scoefs, sce->coeffs, 1024);
348     start = win*128;
349     for (cb = 0; cb < 12; cb++) {
350         path[0][cb].cost     = run_bits+4;
351         path[0][cb].prev_idx = -1;
352         path[0][cb].run      = 0;
353     }
354     for (swb = 0; swb < max_sfb; swb++) {
355         start2 = start;
356         size = sce->ics.swb_sizes[swb];
357         if (sce->zeroes[win*16 + swb]) {
358             for (cb = 0; cb < 12; cb++) {
359                 path[swb+1][cb].prev_idx = cb;
360                 path[swb+1][cb].cost     = path[swb][cb].cost;
361                 path[swb+1][cb].run      = path[swb][cb].run + 1;
362             }
363         } else {
364             float minrd = next_minrd;
365             int mincb = next_mincb;
366             int startcb = sce->band_type[win*16+swb];
367             next_minrd = INFINITY;
368             next_mincb = 0;
369             for (cb = 0; cb < startcb; cb++) {
370                 path[swb+1][cb].cost = 61450;
371                 path[swb+1][cb].prev_idx = -1;
372                 path[swb+1][cb].run = 0;
373             }
374             for (cb = startcb; cb < 12; cb++) {
375                 float cost_stay_here, cost_get_here;
376                 float rd = 0.0f;
377                 for (w = 0; w < group_len; w++) {
378                     rd += quantize_band_cost(s, sce->coeffs + start + w*128,
379                                              s->scoefs + start + w*128, size,
380                                              sce->sf_idx[(win+w)*16+swb], cb,
381                                              0, INFINITY, NULL);
382                 }
383                 cost_stay_here = path[swb][cb].cost + rd;
384                 cost_get_here  = minrd              + rd + run_bits + 4;
385                 if (   run_value_bits[sce->ics.num_windows == 8][path[swb][cb].run]
386                     != run_value_bits[sce->ics.num_windows == 8][path[swb][cb].run+1])
387                     cost_stay_here += run_bits;
388                 if (cost_get_here < cost_stay_here) {
389                     path[swb+1][cb].prev_idx = mincb;
390                     path[swb+1][cb].cost     = cost_get_here;
391                     path[swb+1][cb].run      = 1;
392                 } else {
393                     path[swb+1][cb].prev_idx = cb;
394                     path[swb+1][cb].cost     = cost_stay_here;
395                     path[swb+1][cb].run      = path[swb][cb].run + 1;
396                 }
397                 if (path[swb+1][cb].cost < next_minrd) {
398                     next_minrd = path[swb+1][cb].cost;
399                     next_mincb = cb;
400                 }
401             }
402         }
403         start += sce->ics.swb_sizes[swb];
404     }
405
406     //convert resulting path from backward-linked list
407     stack_len = 0;
408     idx       = 0;
409     for (cb = 1; cb < 12; cb++)
410         if (path[max_sfb][cb].cost < path[max_sfb][idx].cost)
411             idx = cb;
412     ppos = max_sfb;
413     while (ppos > 0) {
414         if (idx < 0) abort();
415         cb = idx;
416         stackrun[stack_len] = path[ppos][cb].run;
417         stackcb [stack_len] = cb;
418         idx = path[ppos-path[ppos][cb].run+1][cb].prev_idx;
419         ppos -= path[ppos][cb].run;
420         stack_len++;
421     }
422     //perform actual band info encoding
423     start = 0;
424     for (i = stack_len - 1; i >= 0; i--) {
425         put_bits(&s->pb, 4, stackcb[i]);
426         count = stackrun[i];
427         memset(sce->zeroes + win*16 + start, !stackcb[i], count);
428         //XXX: memset when band_type is also uint8_t
429         for (j = 0; j < count; j++) {
430             sce->band_type[win*16 + start] =  stackcb[i];
431             start++;
432         }
433         while (count >= run_esc) {
434             put_bits(&s->pb, run_bits, run_esc);
435             count -= run_esc;
436         }
437         put_bits(&s->pb, run_bits, count);
438     }
439 }
440
441 typedef struct TrellisPath {
442     float cost;
443     int prev;
444     int min_val;
445     int max_val;
446 } TrellisPath;
447
448 #define TRELLIS_STAGES 121
449 #define TRELLIS_STATES 256
450
451 static void search_for_quantizers_anmr(AVCodecContext *avctx, AACEncContext *s,
452                                        SingleChannelElement *sce,
453                                        const float lambda)
454 {
455     int q, w, w2, g, start = 0;
456     int i, j;
457     int idx;
458     TrellisPath paths[TRELLIS_STAGES][TRELLIS_STATES];
459     int bandaddr[TRELLIS_STAGES];
460     int minq;
461     float mincost;
462
463     for (i = 0; i < TRELLIS_STATES; i++) {
464         paths[0][i].cost    = 0.0f;
465         paths[0][i].prev    = -1;
466         paths[0][i].min_val = i;
467         paths[0][i].max_val = i;
468     }
469     for (j = 1; j < TRELLIS_STAGES; j++) {
470         for (i = 0; i < TRELLIS_STATES; i++) {
471             paths[j][i].cost    = INFINITY;
472             paths[j][i].prev    = -2;
473             paths[j][i].min_val = INT_MAX;
474             paths[j][i].max_val = 0;
475         }
476     }
477     idx = 1;
478     abs_pow34_v(s->scoefs, sce->coeffs, 1024);
479     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
480         start = w*128;
481         for (g = 0; g < sce->ics.num_swb; g++) {
482             const float *coefs = sce->coeffs + start;
483             float qmin, qmax;
484             int nz = 0;
485
486             bandaddr[idx] = w * 16 + g;
487             qmin = INT_MAX;
488             qmax = 0.0f;
489             for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
490                 FFPsyBand *band = &s->psy.psy_bands[s->cur_channel*PSY_MAX_BANDS+(w+w2)*16+g];
491                 if (band->energy <= band->threshold || band->threshold == 0.0f) {
492                     sce->zeroes[(w+w2)*16+g] = 1;
493                     continue;
494                 }
495                 sce->zeroes[(w+w2)*16+g] = 0;
496                 nz = 1;
497                 for (i = 0; i < sce->ics.swb_sizes[g]; i++) {
498                     float t = fabsf(coefs[w2*128+i]);
499                     if (t > 0.0f)
500                         qmin = FFMIN(qmin, t);
501                     qmax = FFMAX(qmax, t);
502                 }
503             }
504             if (nz) {
505                 int minscale, maxscale;
506                 float minrd = INFINITY;
507                 //minimum scalefactor index is when minimum nonzero coefficient after quantizing is not clipped
508                 minscale = av_clip_uint8(log2(qmin)*4 - 69 + SCALE_ONE_POS - SCALE_DIV_512);
509                 //maximum scalefactor index is when maximum coefficient after quantizing is still not zero
510                 maxscale = av_clip_uint8(log2(qmax)*4 +  6 + SCALE_ONE_POS - SCALE_DIV_512);
511                 for (q = minscale; q < maxscale; q++) {
512                     float dists[12], dist;
513                     memset(dists, 0, sizeof(dists));
514                     for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
515                         FFPsyBand *band = &s->psy.psy_bands[s->cur_channel*PSY_MAX_BANDS+(w+w2)*16+g];
516                         int cb;
517                         for (cb = 0; cb <= ESC_BT; cb++)
518                             dists[cb] += quantize_band_cost(s, coefs + w2*128, s->scoefs + start + w2*128, sce->ics.swb_sizes[g],
519                                                             q, cb, lambda / band->threshold, INFINITY, NULL);
520                     }
521                     dist = dists[0];
522                     for (i = 1; i <= ESC_BT; i++)
523                         dist = FFMIN(dist, dists[i]);
524                     minrd = FFMIN(minrd, dist);
525
526                     for (i = FFMAX(q - SCALE_MAX_DIFF, 0); i < FFMIN(q + SCALE_MAX_DIFF, TRELLIS_STATES); i++) {
527                         float cost;
528                         int minv, maxv;
529                         if (isinf(paths[idx - 1][i].cost))
530                             continue;
531                         cost = paths[idx - 1][i].cost + dist
532                                + ff_aac_scalefactor_bits[q - i + SCALE_DIFF_ZERO];
533                         minv = FFMIN(paths[idx - 1][i].min_val, q);
534                         maxv = FFMAX(paths[idx - 1][i].max_val, q);
535                         if (cost < paths[idx][q].cost && maxv-minv < SCALE_MAX_DIFF) {
536                             paths[idx][q].cost    = cost;
537                             paths[idx][q].prev    = i;
538                             paths[idx][q].min_val = minv;
539                             paths[idx][q].max_val = maxv;
540                         }
541                     }
542                 }
543             } else {
544                 for (q = 0; q < TRELLIS_STATES; q++) {
545                     if (!isinf(paths[idx - 1][q].cost)) {
546                         paths[idx][q].cost = paths[idx - 1][q].cost + 1;
547                         paths[idx][q].prev = q;
548                         paths[idx][q].min_val = FFMIN(paths[idx - 1][q].min_val, q);
549                         paths[idx][q].max_val = FFMAX(paths[idx - 1][q].max_val, q);
550                         continue;
551                     }
552                     for (i = FFMAX(q - SCALE_MAX_DIFF, 0); i < FFMIN(q + SCALE_MAX_DIFF, TRELLIS_STATES); i++) {
553                         float cost;
554                         int minv, maxv;
555                         if (isinf(paths[idx - 1][i].cost))
556                             continue;
557                         cost = paths[idx - 1][i].cost + ff_aac_scalefactor_bits[q - i + SCALE_DIFF_ZERO];
558                         minv = FFMIN(paths[idx - 1][i].min_val, q);
559                         maxv = FFMAX(paths[idx - 1][i].max_val, q);
560                         if (cost < paths[idx][q].cost && maxv-minv < SCALE_MAX_DIFF) {
561                             paths[idx][q].cost    = cost;
562                             paths[idx][q].prev    = i;
563                             paths[idx][q].min_val = minv;
564                             paths[idx][q].max_val = maxv;
565                         }
566                     }
567                 }
568             }
569             sce->zeroes[w*16+g] = !nz;
570             start += sce->ics.swb_sizes[g];
571             idx++;
572         }
573     }
574     idx--;
575     mincost = paths[idx][0].cost;
576     minq    = 0;
577     for (i = 1; i < TRELLIS_STATES; i++) {
578         if (paths[idx][i].cost < mincost) {
579             mincost = paths[idx][i].cost;
580             minq = i;
581         }
582     }
583     while (idx) {
584         sce->sf_idx[bandaddr[idx]] = minq;
585         minq = paths[idx][minq].prev;
586         idx--;
587     }
588     //set the same quantizers inside window groups
589     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w])
590         for (g = 0;  g < sce->ics.num_swb; g++)
591             for (w2 = 1; w2 < sce->ics.group_len[w]; w2++)
592                 sce->sf_idx[(w+w2)*16+g] = sce->sf_idx[w*16+g];
593 }
594
595 /**
596  * two-loop quantizers search taken from ISO 13818-7 Appendix C
597  */
598 static void search_for_quantizers_twoloop(AVCodecContext *avctx,
599                                           AACEncContext *s,
600                                           SingleChannelElement *sce,
601                                           const float lambda)
602 {
603     int start = 0, i, w, w2, g;
604     int destbits = avctx->bit_rate * 1024.0 / avctx->sample_rate / avctx->channels;
605     float dists[128], uplims[128];
606     int fflag, minscaler;
607     int its  = 0;
608     int allz = 0;
609     float minthr = INFINITY;
610
611     //XXX: some heuristic to determine initial quantizers will reduce search time
612     memset(dists, 0, sizeof(dists));
613     //determine zero bands and upper limits
614     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
615         for (g = 0;  g < sce->ics.num_swb; g++) {
616             int nz = 0;
617             float uplim = 0.0f;
618             for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
619                 FFPsyBand *band = &s->psy.psy_bands[s->cur_channel*PSY_MAX_BANDS+(w+w2)*16+g];
620                 uplim += band->threshold;
621                 if (band->energy <= band->threshold || band->threshold == 0.0f) {
622                     sce->zeroes[(w+w2)*16+g] = 1;
623                     continue;
624                 }
625                 nz = 1;
626             }
627             uplims[w*16+g] = uplim *512;
628             sce->zeroes[w*16+g] = !nz;
629             if (nz)
630                 minthr = FFMIN(minthr, uplim);
631             allz = FFMAX(allz, nz);
632         }
633     }
634     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
635         for (g = 0;  g < sce->ics.num_swb; g++) {
636             if (sce->zeroes[w*16+g]) {
637                 sce->sf_idx[w*16+g] = SCALE_ONE_POS;
638                 continue;
639             }
640             sce->sf_idx[w*16+g] = SCALE_ONE_POS + FFMIN(log2(uplims[w*16+g]/minthr)*4,59);
641         }
642     }
643
644     if (!allz)
645         return;
646     abs_pow34_v(s->scoefs, sce->coeffs, 1024);
647     //perform two-loop search
648     //outer loop - improve quality
649     do {
650         int tbits, qstep;
651         minscaler = sce->sf_idx[0];
652         //inner loop - quantize spectrum to fit into given number of bits
653         qstep = its ? 1 : 32;
654         do {
655             int prev = -1;
656             tbits = 0;
657             fflag = 0;
658             for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
659                 start = w*128;
660                 for (g = 0;  g < sce->ics.num_swb; g++) {
661                     const float *coefs = sce->coeffs + start;
662                     const float *scaled = s->scoefs + start;
663                     int bits = 0;
664                     int cb;
665                     float mindist = INFINITY;
666                     int minbits = 0;
667
668                     if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >= 218) {
669                         start += sce->ics.swb_sizes[g];
670                         continue;
671                     }
672                     minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]);
673                     {
674                         float dist = 0.0f;
675                         int bb = 0;
676                         float maxval = 0.0f;
677                         float Q = ff_aac_pow2sf_tab[200 - sce->sf_idx[w*16+g] + SCALE_ONE_POS - SCALE_DIV_512];
678                         float Q34 = sqrtf(Q * sqrtf(Q));
679                         int qmaxval;
680                         for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
681                             for (i = 0; i < sce->ics.swb_sizes[g]; i++) {
682                                 maxval = FFMAX(maxval, scaled[w2*128+i]);
683                             }
684                         }
685                         qmaxval = maxval * Q34 + 0.4054;
686                         if      (qmaxval ==  0) cb = 0;
687                         else if (qmaxval ==  1) cb = 1;
688                         else if (qmaxval ==  2) cb = 3;
689                         else if (qmaxval <=  4) cb = 5;
690                         else if (qmaxval <=  7) cb = 7;
691                         else if (qmaxval <= 12) cb = 9;
692                         else                    cb = 11;
693                         sce->band_type[w*16+g] = cb;
694                         for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
695                             int b;
696                             dist += quantize_band_cost(s, coefs + w2*128,
697                                                        scaled + w2*128,
698                                                        sce->ics.swb_sizes[g],
699                                                        sce->sf_idx[w*16+g],
700                                                        cb,
701                                                        lambda,
702                                                        INFINITY,
703                                                        &b);
704                             bb += b;
705                         }
706                             mindist = dist;
707                             minbits = bb;
708                     }
709                     dists[w*16+g] = (mindist - minbits) / lambda;
710                     bits = minbits;
711                     if (prev != -1) {
712                         bits += ff_aac_scalefactor_bits[sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO];
713                     }
714                     tbits += bits;
715                     start += sce->ics.swb_sizes[g];
716                     prev = sce->sf_idx[w*16+g];
717                 }
718             }
719             if (tbits > destbits) {
720                 for (i = 0; i < 128; i++)
721                     if (sce->sf_idx[i] < 218 - qstep)
722                         sce->sf_idx[i] += qstep;
723             } else {
724                 for (i = 0; i < 128; i++)
725                     if (sce->sf_idx[i] > 60 - qstep)
726                         sce->sf_idx[i] -= qstep;
727             }
728             qstep >>= 1;
729             if (!qstep && tbits > destbits*1.02)
730                 qstep = 1;
731             if (sce->sf_idx[0] >= 217)
732                 break;
733         } while (qstep);
734
735         fflag = 0;
736         minscaler = av_clip(minscaler, 60, 255 - SCALE_MAX_DIFF);
737         for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
738             start = w*128;
739             for (g = 0; g < sce->ics.num_swb; g++) {
740                 int prevsc = sce->sf_idx[w*16+g];
741                 if (dists[w*16+g] > uplims[w*16+g] && sce->sf_idx[w*16+g] > 60)
742                     sce->sf_idx[w*16+g]--;
743                 sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], minscaler, minscaler + SCALE_MAX_DIFF);
744                 sce->sf_idx[w*16+g] = FFMIN(sce->sf_idx[w*16+g], 219);
745                 if (sce->sf_idx[w*16+g] != prevsc)
746                     fflag = 1;
747             }
748         }
749         its++;
750     } while (fflag && its < 10);
751 }
752
753 static void search_for_quantizers_faac(AVCodecContext *avctx, AACEncContext *s,
754                                        SingleChannelElement *sce,
755                                        const float lambda)
756 {
757     int start = 0, i, w, w2, g;
758     float uplim[128], maxq[128];
759     int minq, maxsf;
760     float distfact = ((sce->ics.num_windows > 1) ? 85.80 : 147.84) / lambda;
761     int last = 0, lastband = 0, curband = 0;
762     float avg_energy = 0.0;
763     if (sce->ics.num_windows == 1) {
764         start = 0;
765         for (i = 0; i < 1024; i++) {
766             if (i - start >= sce->ics.swb_sizes[curband]) {
767                 start += sce->ics.swb_sizes[curband];
768                 curband++;
769             }
770             if (sce->coeffs[i]) {
771                 avg_energy += sce->coeffs[i] * sce->coeffs[i];
772                 last = i;
773                 lastband = curband;
774             }
775         }
776     } else {
777         for (w = 0; w < 8; w++) {
778             const float *coeffs = sce->coeffs + w*128;
779             start = 0;
780             for (i = 0; i < 128; i++) {
781                 if (i - start >= sce->ics.swb_sizes[curband]) {
782                     start += sce->ics.swb_sizes[curband];
783                     curband++;
784                 }
785                 if (coeffs[i]) {
786                     avg_energy += coeffs[i] * coeffs[i];
787                     last = FFMAX(last, i);
788                     lastband = FFMAX(lastband, curband);
789                 }
790             }
791         }
792     }
793     last++;
794     avg_energy /= last;
795     if (avg_energy == 0.0f) {
796         for (i = 0; i < FF_ARRAY_ELEMS(sce->sf_idx); i++)
797             sce->sf_idx[i] = SCALE_ONE_POS;
798         return;
799     }
800     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
801         start = w*128;
802         for (g = 0; g < sce->ics.num_swb; g++) {
803             float *coefs   = sce->coeffs + start;
804             const int size = sce->ics.swb_sizes[g];
805             int start2 = start, end2 = start + size, peakpos = start;
806             float maxval = -1, thr = 0.0f, t;
807             maxq[w*16+g] = 0.0f;
808             if (g > lastband) {
809                 maxq[w*16+g] = 0.0f;
810                 start += size;
811                 for (w2 = 0; w2 < sce->ics.group_len[w]; w2++)
812                     memset(coefs + w2*128, 0, sizeof(coefs[0])*size);
813                 continue;
814             }
815             for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
816                 for (i = 0; i < size; i++) {
817                     float t = coefs[w2*128+i]*coefs[w2*128+i];
818                     maxq[w*16+g] = FFMAX(maxq[w*16+g], fabsf(coefs[w2*128 + i]));
819                     thr += t;
820                     if (sce->ics.num_windows == 1 && maxval < t) {
821                         maxval  = t;
822                         peakpos = start+i;
823                     }
824                 }
825             }
826             if (sce->ics.num_windows == 1) {
827                 start2 = FFMAX(peakpos - 2, start2);
828                 end2   = FFMIN(peakpos + 3, end2);
829             } else {
830                 start2 -= start;
831                 end2   -= start;
832             }
833             start += size;
834             thr = pow(thr / (avg_energy * (end2 - start2)), 0.3 + 0.1*(lastband - g) / lastband);
835             t   = 1.0 - (1.0 * start2 / last);
836             uplim[w*16+g] = distfact / (1.4 * thr + t*t*t + 0.075);
837         }
838     }
839     memset(sce->sf_idx, 0, sizeof(sce->sf_idx));
840     abs_pow34_v(s->scoefs, sce->coeffs, 1024);
841     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
842         start = w*128;
843         for (g = 0;  g < sce->ics.num_swb; g++) {
844             const float *coefs  = sce->coeffs + start;
845             const float *scaled = s->scoefs   + start;
846             const int size      = sce->ics.swb_sizes[g];
847             int scf, prev_scf, step;
848             int min_scf = -1, max_scf = 256;
849             float curdiff;
850             if (maxq[w*16+g] < 21.544) {
851                 sce->zeroes[w*16+g] = 1;
852                 start += size;
853                 continue;
854             }
855             sce->zeroes[w*16+g] = 0;
856             scf  = prev_scf = av_clip(SCALE_ONE_POS - SCALE_DIV_512 - log2(1/maxq[w*16+g])*16/3, 60, 218);
857             step = 16;
858             for (;;) {
859                 float dist = 0.0f;
860                 int quant_max;
861
862                 for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
863                     int b;
864                     dist += quantize_band_cost(s, coefs + w2*128,
865                                                scaled + w2*128,
866                                                sce->ics.swb_sizes[g],
867                                                scf,
868                                                ESC_BT,
869                                                lambda,
870                                                INFINITY,
871                                                &b);
872                     dist -= b;
873                 }
874                 dist *= 1.0f / 512.0f / lambda;
875                 quant_max = quant(maxq[w*16+g], ff_aac_pow2sf_tab[200 - scf + SCALE_ONE_POS - SCALE_DIV_512]);
876                 if (quant_max >= 8191) { // too much, return to the previous quantizer
877                     sce->sf_idx[w*16+g] = prev_scf;
878                     break;
879                 }
880                 prev_scf = scf;
881                 curdiff = fabsf(dist - uplim[w*16+g]);
882                 if (curdiff <= 1.0f)
883                     step = 0;
884                 else
885                     step = log2(curdiff);
886                 if (dist > uplim[w*16+g])
887                     step = -step;
888                 scf += step;
889                 scf = av_clip_uint8(scf);
890                 step = scf - prev_scf;
891                 if (FFABS(step) <= 1 || (step > 0 && scf >= max_scf) || (step < 0 && scf <= min_scf)) {
892                     sce->sf_idx[w*16+g] = av_clip(scf, min_scf, max_scf);
893                     break;
894                 }
895                 if (step > 0)
896                     min_scf = prev_scf;
897                 else
898                     max_scf = prev_scf;
899             }
900             start += size;
901         }
902     }
903     minq = sce->sf_idx[0] ? sce->sf_idx[0] : INT_MAX;
904     for (i = 1; i < 128; i++) {
905         if (!sce->sf_idx[i])
906             sce->sf_idx[i] = sce->sf_idx[i-1];
907         else
908             minq = FFMIN(minq, sce->sf_idx[i]);
909     }
910     if (minq == INT_MAX)
911         minq = 0;
912     minq = FFMIN(minq, SCALE_MAX_POS);
913     maxsf = FFMIN(minq + SCALE_MAX_DIFF, SCALE_MAX_POS);
914     for (i = 126; i >= 0; i--) {
915         if (!sce->sf_idx[i])
916             sce->sf_idx[i] = sce->sf_idx[i+1];
917         sce->sf_idx[i] = av_clip(sce->sf_idx[i], minq, maxsf);
918     }
919 }
920
921 static void search_for_quantizers_fast(AVCodecContext *avctx, AACEncContext *s,
922                                        SingleChannelElement *sce,
923                                        const float lambda)
924 {
925     int start = 0, i, w, w2, g;
926     int minq = 255;
927
928     memset(sce->sf_idx, 0, sizeof(sce->sf_idx));
929     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
930         start = w*128;
931         for (g = 0; g < sce->ics.num_swb; g++) {
932             for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
933                 FFPsyBand *band = &s->psy.psy_bands[s->cur_channel*PSY_MAX_BANDS+(w+w2)*16+g];
934                 if (band->energy <= band->threshold) {
935                     sce->sf_idx[(w+w2)*16+g] = 218;
936                     sce->zeroes[(w+w2)*16+g] = 1;
937                 } else {
938                     sce->sf_idx[(w+w2)*16+g] = av_clip(SCALE_ONE_POS - SCALE_DIV_512 + log2(band->threshold), 80, 218);
939                     sce->zeroes[(w+w2)*16+g] = 0;
940                 }
941                 minq = FFMIN(minq, sce->sf_idx[(w+w2)*16+g]);
942             }
943         }
944     }
945     for (i = 0; i < 128; i++) {
946         sce->sf_idx[i] = 140;
947         //av_clip(sce->sf_idx[i], minq, minq + SCALE_MAX_DIFF - 1);
948     }
949     //set the same quantizers inside window groups
950     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w])
951         for (g = 0;  g < sce->ics.num_swb; g++)
952             for (w2 = 1; w2 < sce->ics.group_len[w]; w2++)
953                 sce->sf_idx[(w+w2)*16+g] = sce->sf_idx[w*16+g];
954 }
955
956 static void search_for_ms(AACEncContext *s, ChannelElement *cpe,
957                           const float lambda)
958 {
959     int start = 0, i, w, w2, g;
960     float M[128], S[128];
961     float *L34 = s->scoefs, *R34 = s->scoefs + 128, *M34 = s->scoefs + 128*2, *S34 = s->scoefs + 128*3;
962     SingleChannelElement *sce0 = &cpe->ch[0];
963     SingleChannelElement *sce1 = &cpe->ch[1];
964     if (!cpe->common_window)
965         return;
966     for (w = 0; w < sce0->ics.num_windows; w += sce0->ics.group_len[w]) {
967         for (g = 0;  g < sce0->ics.num_swb; g++) {
968             if (!cpe->ch[0].zeroes[w*16+g] && !cpe->ch[1].zeroes[w*16+g]) {
969                 float dist1 = 0.0f, dist2 = 0.0f;
970                 for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) {
971                     FFPsyBand *band0 = &s->psy.psy_bands[(s->cur_channel+0)*PSY_MAX_BANDS+(w+w2)*16+g];
972                     FFPsyBand *band1 = &s->psy.psy_bands[(s->cur_channel+1)*PSY_MAX_BANDS+(w+w2)*16+g];
973                     float minthr = FFMIN(band0->threshold, band1->threshold);
974                     float maxthr = FFMAX(band0->threshold, band1->threshold);
975                     for (i = 0; i < sce0->ics.swb_sizes[g]; i++) {
976                         M[i] = (sce0->coeffs[start+w2*128+i]
977                               + sce1->coeffs[start+w2*128+i]) * 0.5;
978                         S[i] =  sce0->coeffs[start+w2*128+i]
979                               - sce1->coeffs[start+w2*128+i];
980                     }
981                     abs_pow34_v(L34, sce0->coeffs+start+w2*128, sce0->ics.swb_sizes[g]);
982                     abs_pow34_v(R34, sce1->coeffs+start+w2*128, sce0->ics.swb_sizes[g]);
983                     abs_pow34_v(M34, M,                         sce0->ics.swb_sizes[g]);
984                     abs_pow34_v(S34, S,                         sce0->ics.swb_sizes[g]);
985                     dist1 += quantize_band_cost(s, sce0->coeffs + start + w2*128,
986                                                 L34,
987                                                 sce0->ics.swb_sizes[g],
988                                                 sce0->sf_idx[(w+w2)*16+g],
989                                                 sce0->band_type[(w+w2)*16+g],
990                                                 lambda / band0->threshold, INFINITY, NULL);
991                     dist1 += quantize_band_cost(s, sce1->coeffs + start + w2*128,
992                                                 R34,
993                                                 sce1->ics.swb_sizes[g],
994                                                 sce1->sf_idx[(w+w2)*16+g],
995                                                 sce1->band_type[(w+w2)*16+g],
996                                                 lambda / band1->threshold, INFINITY, NULL);
997                     dist2 += quantize_band_cost(s, M,
998                                                 M34,
999                                                 sce0->ics.swb_sizes[g],
1000                                                 sce0->sf_idx[(w+w2)*16+g],
1001                                                 sce0->band_type[(w+w2)*16+g],
1002                                                 lambda / maxthr, INFINITY, NULL);
1003                     dist2 += quantize_band_cost(s, S,
1004                                                 S34,
1005                                                 sce1->ics.swb_sizes[g],
1006                                                 sce1->sf_idx[(w+w2)*16+g],
1007                                                 sce1->band_type[(w+w2)*16+g],
1008                                                 lambda / minthr, INFINITY, NULL);
1009                 }
1010                 cpe->ms_mask[w*16+g] = dist2 < dist1;
1011             }
1012             start += sce0->ics.swb_sizes[g];
1013         }
1014     }
1015 }
1016
1017 AACCoefficientsEncoder ff_aac_coders[] = {
1018     {
1019         search_for_quantizers_faac,
1020         encode_window_bands_info,
1021         quantize_and_encode_band,
1022         search_for_ms,
1023     },
1024     {
1025         search_for_quantizers_anmr,
1026         encode_window_bands_info,
1027         quantize_and_encode_band,
1028         search_for_ms,
1029     },
1030     {
1031         search_for_quantizers_twoloop,
1032         codebook_trellis_rate,
1033         quantize_and_encode_band,
1034         search_for_ms,
1035     },
1036     {
1037         search_for_quantizers_fast,
1038         encode_window_bands_info,
1039         quantize_and_encode_band,
1040         search_for_ms,
1041     },
1042 };