]> git.sesse.net Git - ffmpeg/blob - libavcodec/aaccoder.c
Merge commit '6f4cd33efb5a9ec75db1677d5f7846c60337129f'
[ffmpeg] / libavcodec / aaccoder.c
1 /*
2  * AAC coefficients encoder
3  * Copyright (C) 2008-2009 Konstantin Shishkov
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21
22 /**
23  * @file
24  * AAC coefficients encoder
25  */
26
27 /***********************************
28  *              TODOs:
29  * speedup quantizer selection
30  * add sane pulse detection
31  ***********************************/
32
33 #include "libavutil/libm.h" // brought forward to work around cygwin header breakage
34
35 #include <float.h>
36 #include "libavutil/mathematics.h"
37 #include "avcodec.h"
38 #include "put_bits.h"
39 #include "aac.h"
40 #include "aacenc.h"
41 #include "aactab.h"
42
43 /** Frequency in Hz for lower limit of noise substitution **/
44 #define NOISE_LOW_LIMIT 4000
45
46 /** Total number of usable codebooks **/
47 #define CB_TOT 12
48
49 /** Total number of codebooks, including special ones **/
50 #define CB_TOT_ALL 15
51
52 /** bits needed to code codebook run value for long windows */
53 static const uint8_t run_value_bits_long[64] = {
54      5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
55      5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5, 10,
56     10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
57     10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 15
58 };
59
60 /** bits needed to code codebook run value for short windows */
61 static const uint8_t run_value_bits_short[16] = {
62     3, 3, 3, 3, 3, 3, 3, 6, 6, 6, 6, 6, 6, 6, 6, 9
63 };
64
65 static const uint8_t * const run_value_bits[2] = {
66     run_value_bits_long, run_value_bits_short
67 };
68
69 /** Map to convert values from BandCodingPath index to a codebook index **/
70 static const uint8_t aac_cb_out_map[CB_TOT_ALL]  = {0,1,2,3,4,5,6,7,8,9,10,11,13,14,15};
71 /** Inverse map to convert from codebooks to BandCodingPath indices **/
72 static const uint8_t aac_cb_in_map[CB_TOT_ALL+1] = {0,1,2,3,4,5,6,7,8,9,10,11,0,12,13,14};
73
74 /**
75  * Quantize one coefficient.
76  * @return absolute value of the quantized coefficient
77  * @see 3GPP TS26.403 5.6.2 "Scalefactor determination"
78  */
79 static av_always_inline int quant(float coef, const float Q)
80 {
81     float a = coef * Q;
82     return sqrtf(a * sqrtf(a)) + 0.4054;
83 }
84
85 static void quantize_bands(int *out, const float *in, const float *scaled,
86                            int size, float Q34, int is_signed, int maxval)
87 {
88     int i;
89     double qc;
90     for (i = 0; i < size; i++) {
91         qc = scaled[i] * Q34;
92         out[i] = (int)FFMIN(qc + 0.4054, (double)maxval);
93         if (is_signed && in[i] < 0.0f) {
94             out[i] = -out[i];
95         }
96     }
97 }
98
99 static void abs_pow34_v(float *out, const float *in, const int size)
100 {
101 #ifndef USE_REALLY_FULL_SEARCH
102     int i;
103     for (i = 0; i < size; i++) {
104         float a = fabsf(in[i]);
105         out[i] = sqrtf(a * sqrtf(a));
106     }
107 #endif /* USE_REALLY_FULL_SEARCH */
108 }
109
110 static const uint8_t aac_cb_range [12] = {0, 3, 3, 3, 3, 9, 9, 8, 8, 13, 13, 17};
111 static const uint8_t aac_cb_maxval[12] = {0, 1, 1, 2, 2, 4, 4, 7, 7, 12, 12, 16};
112
113 /**
114  * Calculate rate distortion cost for quantizing with given codebook
115  *
116  * @return quantization distortion
117  */
118 static av_always_inline float quantize_and_encode_band_cost_template(
119                                 struct AACEncContext *s,
120                                 PutBitContext *pb, const float *in,
121                                 const float *scaled, int size, int scale_idx,
122                                 int cb, const float lambda, const float uplim,
123                                 int *bits, int BT_ZERO, int BT_UNSIGNED,
124                                 int BT_PAIR, int BT_ESC, int BT_NOISE, int BT_STEREO)
125 {
126     const int q_idx = POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512;
127     const float Q   = ff_aac_pow2sf_tab [q_idx];
128     const float Q34 = ff_aac_pow34sf_tab[q_idx];
129     const float IQ  = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
130     const float CLIPPED_ESCAPE = 165140.0f*IQ;
131     int i, j;
132     float cost = 0;
133     const int dim = BT_PAIR ? 2 : 4;
134     int resbits = 0;
135     int off;
136
137     if (BT_ZERO || BT_NOISE || BT_STEREO) {
138         for (i = 0; i < size; i++)
139             cost += in[i]*in[i];
140         if (bits)
141             *bits = 0;
142         return cost * lambda;
143     }
144     if (!scaled) {
145         abs_pow34_v(s->scoefs, in, size);
146         scaled = s->scoefs;
147     }
148     quantize_bands(s->qcoefs, in, scaled, size, Q34, !BT_UNSIGNED, aac_cb_maxval[cb]);
149     if (BT_UNSIGNED) {
150         off = 0;
151     } else {
152         off = aac_cb_maxval[cb];
153     }
154     for (i = 0; i < size; i += dim) {
155         const float *vec;
156         int *quants = s->qcoefs + i;
157         int curidx = 0;
158         int curbits;
159         float rd = 0.0f;
160         for (j = 0; j < dim; j++) {
161             curidx *= aac_cb_range[cb];
162             curidx += quants[j] + off;
163         }
164         curbits =  ff_aac_spectral_bits[cb-1][curidx];
165         vec     = &ff_aac_codebook_vectors[cb-1][curidx*dim];
166         if (BT_UNSIGNED) {
167             for (j = 0; j < dim; j++) {
168                 float t = fabsf(in[i+j]);
169                 float di;
170                 if (BT_ESC && vec[j] == 64.0f) { //FIXME: slow
171                     if (t >= CLIPPED_ESCAPE) {
172                         di = t - CLIPPED_ESCAPE;
173                         curbits += 21;
174                     } else {
175                         int c = av_clip_uintp2(quant(t, Q), 13);
176                         di = t - c*cbrtf(c)*IQ;
177                         curbits += av_log2(c)*2 - 4 + 1;
178                     }
179                 } else {
180                     di = t - vec[j]*IQ;
181                 }
182                 if (vec[j] != 0.0f)
183                     curbits++;
184                 rd += di*di;
185             }
186         } else {
187             for (j = 0; j < dim; j++) {
188                 float di = in[i+j] - vec[j]*IQ;
189                 rd += di*di;
190             }
191         }
192         cost    += rd * lambda + curbits;
193         resbits += curbits;
194         if (cost >= uplim)
195             return uplim;
196         if (pb) {
197             put_bits(pb, ff_aac_spectral_bits[cb-1][curidx], ff_aac_spectral_codes[cb-1][curidx]);
198             if (BT_UNSIGNED)
199                 for (j = 0; j < dim; j++)
200                     if (ff_aac_codebook_vectors[cb-1][curidx*dim+j] != 0.0f)
201                         put_bits(pb, 1, in[i+j] < 0.0f);
202             if (BT_ESC) {
203                 for (j = 0; j < 2; j++) {
204                     if (ff_aac_codebook_vectors[cb-1][curidx*2+j] == 64.0f) {
205                         int coef = av_clip_uintp2(quant(fabsf(in[i+j]), Q), 13);
206                         int len = av_log2(coef);
207
208                         put_bits(pb, len - 4 + 1, (1 << (len - 4 + 1)) - 2);
209                         put_sbits(pb, len, coef);
210                     }
211                 }
212             }
213         }
214     }
215
216     if (bits)
217         *bits = resbits;
218     return cost;
219 }
220
221 static float quantize_and_encode_band_cost_NONE(struct AACEncContext *s, PutBitContext *pb,
222                                                 const float *in, const float *scaled,
223                                                 int size, int scale_idx, int cb,
224                                                 const float lambda, const float uplim,
225                                                 int *bits) {
226     av_assert0(0);
227     return 0.0f;
228 }
229
230 #define QUANTIZE_AND_ENCODE_BAND_COST_FUNC(NAME, BT_ZERO, BT_UNSIGNED, BT_PAIR, BT_ESC, BT_NOISE, BT_STEREO) \
231 static float quantize_and_encode_band_cost_ ## NAME(                                         \
232                                 struct AACEncContext *s,                                     \
233                                 PutBitContext *pb, const float *in,                          \
234                                 const float *scaled, int size, int scale_idx,                \
235                                 int cb, const float lambda, const float uplim,               \
236                                 int *bits) {                                                 \
237     return quantize_and_encode_band_cost_template(                                           \
238                                 s, pb, in, scaled, size, scale_idx,                          \
239                                 BT_ESC ? ESC_BT : cb, lambda, uplim, bits,                   \
240                                 BT_ZERO, BT_UNSIGNED, BT_PAIR, BT_ESC, BT_NOISE, BT_STEREO); \
241 }
242
243 QUANTIZE_AND_ENCODE_BAND_COST_FUNC(ZERO,  1, 0, 0, 0, 0, 0)
244 QUANTIZE_AND_ENCODE_BAND_COST_FUNC(SQUAD, 0, 0, 0, 0, 0, 0)
245 QUANTIZE_AND_ENCODE_BAND_COST_FUNC(UQUAD, 0, 1, 0, 0, 0, 0)
246 QUANTIZE_AND_ENCODE_BAND_COST_FUNC(SPAIR, 0, 0, 1, 0, 0, 0)
247 QUANTIZE_AND_ENCODE_BAND_COST_FUNC(UPAIR, 0, 1, 1, 0, 0, 0)
248 QUANTIZE_AND_ENCODE_BAND_COST_FUNC(ESC,   0, 1, 1, 1, 0, 0)
249 QUANTIZE_AND_ENCODE_BAND_COST_FUNC(NOISE, 0, 0, 0, 0, 1, 0)
250 QUANTIZE_AND_ENCODE_BAND_COST_FUNC(STEREO,0, 0, 0, 0, 0, 1)
251
252 static float (*const quantize_and_encode_band_cost_arr[])(
253                                 struct AACEncContext *s,
254                                 PutBitContext *pb, const float *in,
255                                 const float *scaled, int size, int scale_idx,
256                                 int cb, const float lambda, const float uplim,
257                                 int *bits) = {
258     quantize_and_encode_band_cost_ZERO,
259     quantize_and_encode_band_cost_SQUAD,
260     quantize_and_encode_band_cost_SQUAD,
261     quantize_and_encode_band_cost_UQUAD,
262     quantize_and_encode_band_cost_UQUAD,
263     quantize_and_encode_band_cost_SPAIR,
264     quantize_and_encode_band_cost_SPAIR,
265     quantize_and_encode_band_cost_UPAIR,
266     quantize_and_encode_band_cost_UPAIR,
267     quantize_and_encode_band_cost_UPAIR,
268     quantize_and_encode_band_cost_UPAIR,
269     quantize_and_encode_band_cost_ESC,
270     quantize_and_encode_band_cost_NONE,     /* CB 12 doesn't exist */
271     quantize_and_encode_band_cost_NOISE,
272     quantize_and_encode_band_cost_STEREO,
273     quantize_and_encode_band_cost_STEREO,
274 };
275
276 #define quantize_and_encode_band_cost(                                  \
277                                 s, pb, in, scaled, size, scale_idx, cb, \
278                                 lambda, uplim, bits)                    \
279     quantize_and_encode_band_cost_arr[cb](                              \
280                                 s, pb, in, scaled, size, scale_idx, cb, \
281                                 lambda, uplim, bits)
282
283 static float quantize_band_cost(struct AACEncContext *s, const float *in,
284                                 const float *scaled, int size, int scale_idx,
285                                 int cb, const float lambda, const float uplim,
286                                 int *bits)
287 {
288     return quantize_and_encode_band_cost(s, NULL, in, scaled, size, scale_idx,
289                                          cb, lambda, uplim, bits);
290 }
291
292 static void quantize_and_encode_band(struct AACEncContext *s, PutBitContext *pb,
293                                      const float *in, int size, int scale_idx,
294                                      int cb, const float lambda)
295 {
296     quantize_and_encode_band_cost(s, pb, in, NULL, size, scale_idx, cb, lambda,
297                                   INFINITY, NULL);
298 }
299
300 static float find_max_val(int group_len, int swb_size, const float *scaled) {
301     float maxval = 0.0f;
302     int w2, i;
303     for (w2 = 0; w2 < group_len; w2++) {
304         for (i = 0; i < swb_size; i++) {
305             maxval = FFMAX(maxval, scaled[w2*128+i]);
306         }
307     }
308     return maxval;
309 }
310
311 static int find_min_book(float maxval, int sf) {
312     float Q = ff_aac_pow2sf_tab[POW_SF2_ZERO - sf + SCALE_ONE_POS - SCALE_DIV_512];
313     float Q34 = sqrtf(Q * sqrtf(Q));
314     int qmaxval, cb;
315     qmaxval = maxval * Q34 + 0.4054f;
316     if      (qmaxval ==  0) cb = 0;
317     else if (qmaxval ==  1) cb = 1;
318     else if (qmaxval ==  2) cb = 3;
319     else if (qmaxval <=  4) cb = 5;
320     else if (qmaxval <=  7) cb = 7;
321     else if (qmaxval <= 12) cb = 9;
322     else                    cb = 11;
323     return cb;
324 }
325
326 /**
327  * structure used in optimal codebook search
328  */
329 typedef struct BandCodingPath {
330     int prev_idx; ///< pointer to the previous path point
331     float cost;   ///< path cost
332     int run;
333 } BandCodingPath;
334
335 /**
336  * Encode band info for single window group bands.
337  */
338 static void encode_window_bands_info(AACEncContext *s, SingleChannelElement *sce,
339                                      int win, int group_len, const float lambda)
340 {
341     BandCodingPath path[120][CB_TOT_ALL];
342     int w, swb, cb, start, size;
343     int i, j;
344     const int max_sfb  = sce->ics.max_sfb;
345     const int run_bits = sce->ics.num_windows == 1 ? 5 : 3;
346     const int run_esc  = (1 << run_bits) - 1;
347     int idx, ppos, count;
348     int stackrun[120], stackcb[120], stack_len;
349     float next_minrd = INFINITY;
350     int next_mincb = 0;
351
352     abs_pow34_v(s->scoefs, sce->coeffs, 1024);
353     start = win*128;
354     for (cb = 0; cb < CB_TOT_ALL; cb++) {
355         path[0][cb].cost     = 0.0f;
356         path[0][cb].prev_idx = -1;
357         path[0][cb].run      = 0;
358     }
359     for (swb = 0; swb < max_sfb; swb++) {
360         size = sce->ics.swb_sizes[swb];
361         if (sce->zeroes[win*16 + swb]) {
362             for (cb = 0; cb < CB_TOT_ALL; cb++) {
363                 path[swb+1][cb].prev_idx = cb;
364                 path[swb+1][cb].cost     = path[swb][cb].cost;
365                 path[swb+1][cb].run      = path[swb][cb].run + 1;
366             }
367         } else {
368             float minrd = next_minrd;
369             int mincb = next_mincb;
370             next_minrd = INFINITY;
371             next_mincb = 0;
372             for (cb = 0; cb < CB_TOT_ALL; cb++) {
373                 float cost_stay_here, cost_get_here;
374                 float rd = 0.0f;
375                 if (cb >= 12 && sce->band_type[win*16+swb] < aac_cb_out_map[cb] ||
376                     cb  < aac_cb_in_map[sce->band_type[win*16+swb]] && sce->band_type[win*16+swb] > aac_cb_out_map[cb]) {
377                     path[swb+1][cb].prev_idx = -1;
378                     path[swb+1][cb].cost     = INFINITY;
379                     path[swb+1][cb].run      = path[swb][cb].run + 1;
380                     continue;
381                 }
382                 for (w = 0; w < group_len; w++) {
383                     FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(win+w)*16+swb];
384                     rd += quantize_band_cost(s, sce->coeffs + start + w*128,
385                                              s->scoefs + start + w*128, size,
386                                              sce->sf_idx[(win+w)*16+swb], aac_cb_out_map[cb],
387                                              lambda / band->threshold, INFINITY, NULL);
388                 }
389                 cost_stay_here = path[swb][cb].cost + rd;
390                 cost_get_here  = minrd              + rd + run_bits + 4;
391                 if (   run_value_bits[sce->ics.num_windows == 8][path[swb][cb].run]
392                     != run_value_bits[sce->ics.num_windows == 8][path[swb][cb].run+1])
393                     cost_stay_here += run_bits;
394                 if (cost_get_here < cost_stay_here) {
395                     path[swb+1][cb].prev_idx = mincb;
396                     path[swb+1][cb].cost     = cost_get_here;
397                     path[swb+1][cb].run      = 1;
398                 } else {
399                     path[swb+1][cb].prev_idx = cb;
400                     path[swb+1][cb].cost     = cost_stay_here;
401                     path[swb+1][cb].run      = path[swb][cb].run + 1;
402                 }
403                 if (path[swb+1][cb].cost < next_minrd) {
404                     next_minrd = path[swb+1][cb].cost;
405                     next_mincb = cb;
406                 }
407             }
408         }
409         start += sce->ics.swb_sizes[swb];
410     }
411
412     //convert resulting path from backward-linked list
413     stack_len = 0;
414     idx       = 0;
415     for (cb = 1; cb < CB_TOT_ALL; cb++)
416         if (path[max_sfb][cb].cost < path[max_sfb][idx].cost)
417             idx = cb;
418     ppos = max_sfb;
419     while (ppos > 0) {
420         av_assert1(idx >= 0);
421         cb = idx;
422         stackrun[stack_len] = path[ppos][cb].run;
423         stackcb [stack_len] = cb;
424         idx = path[ppos-path[ppos][cb].run+1][cb].prev_idx;
425         ppos -= path[ppos][cb].run;
426         stack_len++;
427     }
428     //perform actual band info encoding
429     start = 0;
430     for (i = stack_len - 1; i >= 0; i--) {
431         cb = aac_cb_out_map[stackcb[i]];
432         put_bits(&s->pb, 4, cb);
433         count = stackrun[i];
434         memset(sce->zeroes + win*16 + start, !cb, count);
435         //XXX: memset when band_type is also uint8_t
436         for (j = 0; j < count; j++) {
437             sce->band_type[win*16 + start] = cb;
438             start++;
439         }
440         while (count >= run_esc) {
441             put_bits(&s->pb, run_bits, run_esc);
442             count -= run_esc;
443         }
444         put_bits(&s->pb, run_bits, count);
445     }
446 }
447
448 static void codebook_trellis_rate(AACEncContext *s, SingleChannelElement *sce,
449                                   int win, int group_len, const float lambda)
450 {
451     BandCodingPath path[120][CB_TOT_ALL];
452     int w, swb, cb, start, size;
453     int i, j;
454     const int max_sfb  = sce->ics.max_sfb;
455     const int run_bits = sce->ics.num_windows == 1 ? 5 : 3;
456     const int run_esc  = (1 << run_bits) - 1;
457     int idx, ppos, count;
458     int stackrun[120], stackcb[120], stack_len;
459     float next_minbits = INFINITY;
460     int next_mincb = 0;
461
462     abs_pow34_v(s->scoefs, sce->coeffs, 1024);
463     start = win*128;
464     for (cb = 0; cb < CB_TOT_ALL; cb++) {
465         path[0][cb].cost     = run_bits+4;
466         path[0][cb].prev_idx = -1;
467         path[0][cb].run      = 0;
468     }
469     for (swb = 0; swb < max_sfb; swb++) {
470         size = sce->ics.swb_sizes[swb];
471         if (sce->zeroes[win*16 + swb]) {
472             float cost_stay_here = path[swb][0].cost;
473             float cost_get_here  = next_minbits + run_bits + 4;
474             if (   run_value_bits[sce->ics.num_windows == 8][path[swb][0].run]
475                 != run_value_bits[sce->ics.num_windows == 8][path[swb][0].run+1])
476                 cost_stay_here += run_bits;
477             if (cost_get_here < cost_stay_here) {
478                 path[swb+1][0].prev_idx = next_mincb;
479                 path[swb+1][0].cost     = cost_get_here;
480                 path[swb+1][0].run      = 1;
481             } else {
482                 path[swb+1][0].prev_idx = 0;
483                 path[swb+1][0].cost     = cost_stay_here;
484                 path[swb+1][0].run      = path[swb][0].run + 1;
485             }
486             next_minbits = path[swb+1][0].cost;
487             next_mincb = 0;
488             for (cb = 1; cb < CB_TOT_ALL; cb++) {
489                 path[swb+1][cb].cost = 61450;
490                 path[swb+1][cb].prev_idx = -1;
491                 path[swb+1][cb].run = 0;
492             }
493         } else {
494             float minbits = next_minbits;
495             int mincb = next_mincb;
496             int startcb = sce->band_type[win*16+swb];
497             startcb = aac_cb_in_map[startcb];
498             next_minbits = INFINITY;
499             next_mincb = 0;
500             for (cb = 0; cb < startcb; cb++) {
501                 path[swb+1][cb].cost = 61450;
502                 path[swb+1][cb].prev_idx = -1;
503                 path[swb+1][cb].run = 0;
504             }
505             for (cb = startcb; cb < CB_TOT_ALL; cb++) {
506                 float cost_stay_here, cost_get_here;
507                 float bits = 0.0f;
508                 if (cb >= 12 && sce->band_type[win*16+swb] != aac_cb_out_map[cb]) {
509                     path[swb+1][cb].cost = 61450;
510                     path[swb+1][cb].prev_idx = -1;
511                     path[swb+1][cb].run = 0;
512                     continue;
513                 }
514                 for (w = 0; w < group_len; w++) {
515                     bits += quantize_band_cost(s, sce->coeffs + start + w*128,
516                                                s->scoefs + start + w*128, size,
517                                                sce->sf_idx[(win+w)*16+swb],
518                                                aac_cb_out_map[cb],
519                                                0, INFINITY, NULL);
520                 }
521                 cost_stay_here = path[swb][cb].cost + bits;
522                 cost_get_here  = minbits            + bits + run_bits + 4;
523                 if (   run_value_bits[sce->ics.num_windows == 8][path[swb][cb].run]
524                     != run_value_bits[sce->ics.num_windows == 8][path[swb][cb].run+1])
525                     cost_stay_here += run_bits;
526                 if (cost_get_here < cost_stay_here) {
527                     path[swb+1][cb].prev_idx = mincb;
528                     path[swb+1][cb].cost     = cost_get_here;
529                     path[swb+1][cb].run      = 1;
530                 } else {
531                     path[swb+1][cb].prev_idx = cb;
532                     path[swb+1][cb].cost     = cost_stay_here;
533                     path[swb+1][cb].run      = path[swb][cb].run + 1;
534                 }
535                 if (path[swb+1][cb].cost < next_minbits) {
536                     next_minbits = path[swb+1][cb].cost;
537                     next_mincb = cb;
538                 }
539             }
540         }
541         start += sce->ics.swb_sizes[swb];
542     }
543
544     //convert resulting path from backward-linked list
545     stack_len = 0;
546     idx       = 0;
547     for (cb = 1; cb < CB_TOT_ALL; cb++)
548         if (path[max_sfb][cb].cost < path[max_sfb][idx].cost)
549             idx = cb;
550     ppos = max_sfb;
551     while (ppos > 0) {
552         av_assert1(idx >= 0);
553         cb = idx;
554         stackrun[stack_len] = path[ppos][cb].run;
555         stackcb [stack_len] = cb;
556         idx = path[ppos-path[ppos][cb].run+1][cb].prev_idx;
557         ppos -= path[ppos][cb].run;
558         stack_len++;
559     }
560     //perform actual band info encoding
561     start = 0;
562     for (i = stack_len - 1; i >= 0; i--) {
563         cb = aac_cb_out_map[stackcb[i]];
564         put_bits(&s->pb, 4, cb);
565         count = stackrun[i];
566         memset(sce->zeroes + win*16 + start, !cb, count);
567         //XXX: memset when band_type is also uint8_t
568         for (j = 0; j < count; j++) {
569             sce->band_type[win*16 + start] = cb;
570             start++;
571         }
572         while (count >= run_esc) {
573             put_bits(&s->pb, run_bits, run_esc);
574             count -= run_esc;
575         }
576         put_bits(&s->pb, run_bits, count);
577     }
578 }
579
580 /** Return the minimum scalefactor where the quantized coef does not clip. */
581 static av_always_inline uint8_t coef2minsf(float coef) {
582     return av_clip_uint8(log2f(coef)*4 - 69 + SCALE_ONE_POS - SCALE_DIV_512);
583 }
584
585 /** Return the maximum scalefactor where the quantized coef is not zero. */
586 static av_always_inline uint8_t coef2maxsf(float coef) {
587     return av_clip_uint8(log2f(coef)*4 +  6 + SCALE_ONE_POS - SCALE_DIV_512);
588 }
589
590 typedef struct TrellisPath {
591     float cost;
592     int prev;
593 } TrellisPath;
594
595 #define TRELLIS_STAGES 121
596 #define TRELLIS_STATES (SCALE_MAX_DIFF+1)
597
598 static void set_special_band_scalefactors(AACEncContext *s, SingleChannelElement *sce)
599 {
600     int w, g, start = 0;
601     int minscaler_n = sce->sf_idx[0], minscaler_i = sce->sf_idx[0];
602     int bands = 0;
603
604     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
605         start = 0;
606         for (g = 0;  g < sce->ics.num_swb; g++) {
607             if (sce->band_type[w*16+g] == INTENSITY_BT || sce->band_type[w*16+g] == INTENSITY_BT2) {
608                 sce->sf_idx[w*16+g] = av_clip(ceilf(log2f(sce->is_ener[w*16+g])*2), -155, 100);
609                 minscaler_i = FFMIN(minscaler_i, sce->sf_idx[w*16+g]);
610                 bands++;
611             } else if (sce->band_type[w*16+g] == NOISE_BT) {
612                 sce->sf_idx[w*16+g] = av_clip(4+log2f(sce->pns_ener[w*16+g])*2, -100, 155);
613                 minscaler_n = FFMIN(minscaler_n, sce->sf_idx[w*16+g]);
614                 bands++;
615             }
616             start += sce->ics.swb_sizes[g];
617         }
618     }
619
620     if (!bands)
621         return;
622
623     /* Clip the scalefactor indices */
624     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
625         for (g = 0;  g < sce->ics.num_swb; g++) {
626             if (sce->band_type[w*16+g] == INTENSITY_BT || sce->band_type[w*16+g] == INTENSITY_BT2) {
627                 sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], minscaler_i, minscaler_i + SCALE_MAX_DIFF);
628             } else if (sce->band_type[w*16+g] == NOISE_BT) {
629                 sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], minscaler_n, minscaler_n + SCALE_MAX_DIFF);
630             }
631         }
632     }
633 }
634
635 static void search_for_quantizers_anmr(AVCodecContext *avctx, AACEncContext *s,
636                                        SingleChannelElement *sce,
637                                        const float lambda)
638 {
639     int q, w, w2, g, start = 0;
640     int i, j;
641     int idx;
642     TrellisPath paths[TRELLIS_STAGES][TRELLIS_STATES];
643     int bandaddr[TRELLIS_STAGES];
644     int minq;
645     float mincost;
646     float q0f = FLT_MAX, q1f = 0.0f, qnrgf = 0.0f;
647     int q0, q1, qcnt = 0;
648
649     for (i = 0; i < 1024; i++) {
650         float t = fabsf(sce->coeffs[i]);
651         if (t > 0.0f) {
652             q0f = FFMIN(q0f, t);
653             q1f = FFMAX(q1f, t);
654             qnrgf += t*t;
655             qcnt++;
656         }
657     }
658
659     if (!qcnt) {
660         memset(sce->sf_idx, 0, sizeof(sce->sf_idx));
661         memset(sce->zeroes, 1, sizeof(sce->zeroes));
662         return;
663     }
664
665     //minimum scalefactor index is when minimum nonzero coefficient after quantizing is not clipped
666     q0 = coef2minsf(q0f);
667     //maximum scalefactor index is when maximum coefficient after quantizing is still not zero
668     q1 = coef2maxsf(q1f);
669     if (q1 - q0 > 60) {
670         int q0low  = q0;
671         int q1high = q1;
672         //minimum scalefactor index is when maximum nonzero coefficient after quantizing is not clipped
673         int qnrg = av_clip_uint8(log2f(sqrtf(qnrgf/qcnt))*4 - 31 + SCALE_ONE_POS - SCALE_DIV_512);
674         q1 = qnrg + 30;
675         q0 = qnrg - 30;
676         if (q0 < q0low) {
677             q1 += q0low - q0;
678             q0  = q0low;
679         } else if (q1 > q1high) {
680             q0 -= q1 - q1high;
681             q1  = q1high;
682         }
683     }
684
685     for (i = 0; i < TRELLIS_STATES; i++) {
686         paths[0][i].cost    = 0.0f;
687         paths[0][i].prev    = -1;
688     }
689     for (j = 1; j < TRELLIS_STAGES; j++) {
690         for (i = 0; i < TRELLIS_STATES; i++) {
691             paths[j][i].cost    = INFINITY;
692             paths[j][i].prev    = -2;
693         }
694     }
695     idx = 1;
696     abs_pow34_v(s->scoefs, sce->coeffs, 1024);
697     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
698         start = w*128;
699         for (g = 0; g < sce->ics.num_swb; g++) {
700             const float *coefs = sce->coeffs + start;
701             float qmin, qmax;
702             int nz = 0;
703
704             bandaddr[idx] = w * 16 + g;
705             qmin = INT_MAX;
706             qmax = 0.0f;
707             for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
708                 FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
709                 if (band->energy <= band->threshold || band->threshold == 0.0f) {
710                     sce->zeroes[(w+w2)*16+g] = 1;
711                     continue;
712                 }
713                 sce->zeroes[(w+w2)*16+g] = 0;
714                 nz = 1;
715                 for (i = 0; i < sce->ics.swb_sizes[g]; i++) {
716                     float t = fabsf(coefs[w2*128+i]);
717                     if (t > 0.0f)
718                         qmin = FFMIN(qmin, t);
719                     qmax = FFMAX(qmax, t);
720                 }
721             }
722             if (nz) {
723                 int minscale, maxscale;
724                 float minrd = INFINITY;
725                 float maxval;
726                 //minimum scalefactor index is when minimum nonzero coefficient after quantizing is not clipped
727                 minscale = coef2minsf(qmin);
728                 //maximum scalefactor index is when maximum coefficient after quantizing is still not zero
729                 maxscale = coef2maxsf(qmax);
730                 minscale = av_clip(minscale - q0, 0, TRELLIS_STATES - 1);
731                 maxscale = av_clip(maxscale - q0, 0, TRELLIS_STATES);
732                 maxval = find_max_val(sce->ics.group_len[w], sce->ics.swb_sizes[g], s->scoefs+start);
733                 for (q = minscale; q < maxscale; q++) {
734                     float dist = 0;
735                     int cb = find_min_book(maxval, sce->sf_idx[w*16+g]);
736                     for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
737                         FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
738                         dist += quantize_band_cost(s, coefs + w2*128, s->scoefs + start + w2*128, sce->ics.swb_sizes[g],
739                                                    q + q0, cb, lambda / band->threshold, INFINITY, NULL);
740                     }
741                     minrd = FFMIN(minrd, dist);
742
743                     for (i = 0; i < q1 - q0; i++) {
744                         float cost;
745                         cost = paths[idx - 1][i].cost + dist
746                                + ff_aac_scalefactor_bits[q - i + SCALE_DIFF_ZERO];
747                         if (cost < paths[idx][q].cost) {
748                             paths[idx][q].cost    = cost;
749                             paths[idx][q].prev    = i;
750                         }
751                     }
752                 }
753             } else {
754                 for (q = 0; q < q1 - q0; q++) {
755                     paths[idx][q].cost = paths[idx - 1][q].cost + 1;
756                     paths[idx][q].prev = q;
757                 }
758             }
759             sce->zeroes[w*16+g] = !nz;
760             start += sce->ics.swb_sizes[g];
761             idx++;
762         }
763     }
764     idx--;
765     mincost = paths[idx][0].cost;
766     minq    = 0;
767     for (i = 1; i < TRELLIS_STATES; i++) {
768         if (paths[idx][i].cost < mincost) {
769             mincost = paths[idx][i].cost;
770             minq = i;
771         }
772     }
773     while (idx) {
774         sce->sf_idx[bandaddr[idx]] = minq + q0;
775         minq = paths[idx][minq].prev;
776         idx--;
777     }
778     //set the same quantizers inside window groups
779     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w])
780         for (g = 0;  g < sce->ics.num_swb; g++)
781             for (w2 = 1; w2 < sce->ics.group_len[w]; w2++)
782                 sce->sf_idx[(w+w2)*16+g] = sce->sf_idx[w*16+g];
783 }
784
785 /**
786  * two-loop quantizers search taken from ISO 13818-7 Appendix C
787  */
788 static void search_for_quantizers_twoloop(AVCodecContext *avctx,
789                                           AACEncContext *s,
790                                           SingleChannelElement *sce,
791                                           const float lambda)
792 {
793     int start = 0, i, w, w2, g;
794     int destbits = avctx->bit_rate * 1024.0 / avctx->sample_rate / avctx->channels * (lambda / 120.f);
795     const float freq_mult = avctx->sample_rate/(1024.0f/sce->ics.num_windows)/2.0f;
796     float dists[128] = { 0 }, uplims[128] = { 0 };
797     float maxvals[128];
798     int noise_sf[128] = { 0 };
799     int fflag, minscaler, minscaler_n;
800     int its  = 0;
801     int allz = 0;
802     float minthr = INFINITY;
803
804     // for values above this the decoder might end up in an endless loop
805     // due to always having more bits than what can be encoded.
806     destbits = FFMIN(destbits, 5800);
807     //XXX: some heuristic to determine initial quantizers will reduce search time
808     //determine zero bands and upper limits
809     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
810         start = 0;
811         for (g = 0;  g < sce->ics.num_swb; g++) {
812             int nz = 0;
813             float uplim = 0.0f, energy = 0.0f;
814             for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
815                 FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
816                 uplim += band->threshold;
817                 energy += band->energy;
818                 if (band->energy <= band->threshold || band->threshold == 0.0f) {
819                     sce->zeroes[(w+w2)*16+g] = 1;
820                     continue;
821                 }
822                 nz = 1;
823             }
824             uplims[w*16+g] = uplim *512;
825             if (s->options.pns && start*freq_mult > NOISE_LOW_LIMIT && energy < uplim * 1.2f) {
826                 noise_sf[w*16+g] = av_clip(4+FFMIN(log2f(energy)*2,255), -100, 155);
827                 sce->band_type[w*16+g] = NOISE_BT;
828                 nz= 1;
829             } else { /** Band type will be determined by the twoloop algorithm */
830                 sce->band_type[w*16+g] = 0;
831             }
832             sce->zeroes[w*16+g] = !nz;
833             if (nz)
834                 minthr = FFMIN(minthr, uplim);
835             allz |= nz;
836             start += sce->ics.swb_sizes[g];
837         }
838     }
839     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
840         for (g = 0;  g < sce->ics.num_swb; g++) {
841             if (sce->zeroes[w*16+g]) {
842                 sce->sf_idx[w*16+g] = SCALE_ONE_POS;
843                 continue;
844             }
845             sce->sf_idx[w*16+g] = SCALE_ONE_POS + FFMIN(log2f(uplims[w*16+g]/minthr)*4,59);
846         }
847     }
848
849     if (!allz)
850         return;
851     abs_pow34_v(s->scoefs, sce->coeffs, 1024);
852
853     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
854         start = w*128;
855         for (g = 0;  g < sce->ics.num_swb; g++) {
856             const float *scaled = s->scoefs + start;
857             maxvals[w*16+g] = find_max_val(sce->ics.group_len[w], sce->ics.swb_sizes[g], scaled);
858             start += sce->ics.swb_sizes[g];
859         }
860     }
861
862     //perform two-loop search
863     //outer loop - improve quality
864     do {
865         int tbits, qstep;
866         minscaler = sce->sf_idx[0];
867         minscaler_n = sce->sf_idx[0];
868         //inner loop - quantize spectrum to fit into given number of bits
869         qstep = its ? 1 : 32;
870         do {
871             int prev = -1;
872             tbits = 0;
873             for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
874                 start = w*128;
875                 for (g = 0;  g < sce->ics.num_swb; g++) {
876                     const float *coefs = sce->coeffs + start;
877                     const float *scaled = s->scoefs + start;
878                     int bits = 0;
879                     int cb;
880                     float dist = 0.0f;
881
882                     if (sce->band_type[w*16+g] == NOISE_BT) {
883                         minscaler_n = FFMIN(minscaler_n, noise_sf[w*16+g]);
884                         start += sce->ics.swb_sizes[g];
885                         continue;
886                     } else if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >= 218) {
887                         start += sce->ics.swb_sizes[g];
888                         continue;
889                     }
890                     minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]);
891                     cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
892                     for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
893                         int b;
894                         dist += quantize_band_cost(s, coefs + w2*128,
895                                                    scaled + w2*128,
896                                                    sce->ics.swb_sizes[g],
897                                                    sce->sf_idx[w*16+g],
898                                                    cb,
899                                                    1.0f,
900                                                    INFINITY,
901                                                    &b);
902                         bits += b;
903                     }
904                     dists[w*16+g] = dist - bits;
905                     if (prev != -1) {
906                         bits += ff_aac_scalefactor_bits[sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO];
907                     }
908                     tbits += bits;
909                     start += sce->ics.swb_sizes[g];
910                     prev = sce->sf_idx[w*16+g];
911                 }
912             }
913             if (tbits > destbits) {
914                 for (i = 0; i < 128; i++)
915                     if (sce->sf_idx[i] < 218 - qstep)
916                         sce->sf_idx[i] += qstep;
917             } else {
918                 for (i = 0; i < 128; i++)
919                     if (sce->sf_idx[i] > 60 - qstep)
920                         sce->sf_idx[i] -= qstep;
921             }
922             qstep >>= 1;
923             if (!qstep && tbits > destbits*1.02 && sce->sf_idx[0] < 217)
924                 qstep = 1;
925         } while (qstep);
926
927         fflag = 0;
928         minscaler = av_clip(minscaler, 60, 255 - SCALE_MAX_DIFF);
929
930         for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w])
931             for (g = 0; g < sce->ics.num_swb; g++)
932                 if (sce->band_type[w*16+g] == NOISE_BT)
933                     sce->sf_idx[w*16+g] = av_clip(noise_sf[w*16+g], minscaler_n, minscaler_n + SCALE_MAX_DIFF);
934
935         for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
936             for (g = 0; g < sce->ics.num_swb; g++) {
937                 int prevsc = sce->sf_idx[w*16+g];
938                 if (sce->band_type[w*16+g] == NOISE_BT)
939                     continue;
940                 if (dists[w*16+g] > uplims[w*16+g] && sce->sf_idx[w*16+g] > 60) {
941                     if (find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]-1))
942                         sce->sf_idx[w*16+g]--;
943                     else //Try to make sure there is some energy in every band
944                         sce->sf_idx[w*16+g]-=2;
945                 }
946                 sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], minscaler, minscaler + SCALE_MAX_DIFF);
947                 sce->sf_idx[w*16+g] = FFMIN(sce->sf_idx[w*16+g], 219);
948                 if (sce->sf_idx[w*16+g] != prevsc)
949                     fflag = 1;
950                 sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
951             }
952         }
953         its++;
954     } while (fflag && its < 10);
955 }
956
957 static void search_for_quantizers_faac(AVCodecContext *avctx, AACEncContext *s,
958                                        SingleChannelElement *sce,
959                                        const float lambda)
960 {
961     int start = 0, i, w, w2, g;
962     float uplim[128], maxq[128];
963     int minq, maxsf;
964     float distfact = ((sce->ics.num_windows > 1) ? 85.80 : 147.84) / lambda;
965     int last = 0, lastband = 0, curband = 0;
966     float avg_energy = 0.0;
967     if (sce->ics.num_windows == 1) {
968         start = 0;
969         for (i = 0; i < 1024; i++) {
970             if (i - start >= sce->ics.swb_sizes[curband]) {
971                 start += sce->ics.swb_sizes[curband];
972                 curband++;
973             }
974             if (sce->coeffs[i]) {
975                 avg_energy += sce->coeffs[i] * sce->coeffs[i];
976                 last = i;
977                 lastband = curband;
978             }
979         }
980     } else {
981         for (w = 0; w < 8; w++) {
982             const float *coeffs = sce->coeffs + w*128;
983             curband = start = 0;
984             for (i = 0; i < 128; i++) {
985                 if (i - start >= sce->ics.swb_sizes[curband]) {
986                     start += sce->ics.swb_sizes[curband];
987                     curband++;
988                 }
989                 if (coeffs[i]) {
990                     avg_energy += coeffs[i] * coeffs[i];
991                     last = FFMAX(last, i);
992                     lastband = FFMAX(lastband, curband);
993                 }
994             }
995         }
996     }
997     last++;
998     avg_energy /= last;
999     if (avg_energy == 0.0f) {
1000         for (i = 0; i < FF_ARRAY_ELEMS(sce->sf_idx); i++)
1001             sce->sf_idx[i] = SCALE_ONE_POS;
1002         return;
1003     }
1004     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
1005         start = w*128;
1006         for (g = 0; g < sce->ics.num_swb; g++) {
1007             float *coefs   = sce->coeffs + start;
1008             const int size = sce->ics.swb_sizes[g];
1009             int start2 = start, end2 = start + size, peakpos = start;
1010             float maxval = -1, thr = 0.0f, t;
1011             maxq[w*16+g] = 0.0f;
1012             if (g > lastband) {
1013                 maxq[w*16+g] = 0.0f;
1014                 start += size;
1015                 for (w2 = 0; w2 < sce->ics.group_len[w]; w2++)
1016                     memset(coefs + w2*128, 0, sizeof(coefs[0])*size);
1017                 continue;
1018             }
1019             for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
1020                 for (i = 0; i < size; i++) {
1021                     float t = coefs[w2*128+i]*coefs[w2*128+i];
1022                     maxq[w*16+g] = FFMAX(maxq[w*16+g], fabsf(coefs[w2*128 + i]));
1023                     thr += t;
1024                     if (sce->ics.num_windows == 1 && maxval < t) {
1025                         maxval  = t;
1026                         peakpos = start+i;
1027                     }
1028                 }
1029             }
1030             if (sce->ics.num_windows == 1) {
1031                 start2 = FFMAX(peakpos - 2, start2);
1032                 end2   = FFMIN(peakpos + 3, end2);
1033             } else {
1034                 start2 -= start;
1035                 end2   -= start;
1036             }
1037             start += size;
1038             thr = pow(thr / (avg_energy * (end2 - start2)), 0.3 + 0.1*(lastband - g) / lastband);
1039             t   = 1.0 - (1.0 * start2 / last);
1040             uplim[w*16+g] = distfact / (1.4 * thr + t*t*t + 0.075);
1041         }
1042     }
1043     memset(sce->sf_idx, 0, sizeof(sce->sf_idx));
1044     abs_pow34_v(s->scoefs, sce->coeffs, 1024);
1045     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
1046         start = w*128;
1047         for (g = 0;  g < sce->ics.num_swb; g++) {
1048             const float *coefs  = sce->coeffs + start;
1049             const float *scaled = s->scoefs   + start;
1050             const int size      = sce->ics.swb_sizes[g];
1051             int scf, prev_scf, step;
1052             int min_scf = -1, max_scf = 256;
1053             float curdiff;
1054             if (maxq[w*16+g] < 21.544) {
1055                 sce->zeroes[w*16+g] = 1;
1056                 start += size;
1057                 continue;
1058             }
1059             sce->zeroes[w*16+g] = 0;
1060             scf  = prev_scf = av_clip(SCALE_ONE_POS - SCALE_DIV_512 - log2f(1/maxq[w*16+g])*16/3, 60, 218);
1061             for (;;) {
1062                 float dist = 0.0f;
1063                 int quant_max;
1064
1065                 for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
1066                     int b;
1067                     dist += quantize_band_cost(s, coefs + w2*128,
1068                                                scaled + w2*128,
1069                                                sce->ics.swb_sizes[g],
1070                                                scf,
1071                                                ESC_BT,
1072                                                lambda,
1073                                                INFINITY,
1074                                                &b);
1075                     dist -= b;
1076                 }
1077                 dist *= 1.0f / 512.0f / lambda;
1078                 quant_max = quant(maxq[w*16+g], ff_aac_pow2sf_tab[POW_SF2_ZERO - scf + SCALE_ONE_POS - SCALE_DIV_512]);
1079                 if (quant_max >= 8191) { // too much, return to the previous quantizer
1080                     sce->sf_idx[w*16+g] = prev_scf;
1081                     break;
1082                 }
1083                 prev_scf = scf;
1084                 curdiff = fabsf(dist - uplim[w*16+g]);
1085                 if (curdiff <= 1.0f)
1086                     step = 0;
1087                 else
1088                     step = log2f(curdiff);
1089                 if (dist > uplim[w*16+g])
1090                     step = -step;
1091                 scf += step;
1092                 scf = av_clip_uint8(scf);
1093                 step = scf - prev_scf;
1094                 if (FFABS(step) <= 1 || (step > 0 && scf >= max_scf) || (step < 0 && scf <= min_scf)) {
1095                     sce->sf_idx[w*16+g] = av_clip(scf, min_scf, max_scf);
1096                     break;
1097                 }
1098                 if (step > 0)
1099                     min_scf = prev_scf;
1100                 else
1101                     max_scf = prev_scf;
1102             }
1103             start += size;
1104         }
1105     }
1106     minq = sce->sf_idx[0] ? sce->sf_idx[0] : INT_MAX;
1107     for (i = 1; i < 128; i++) {
1108         if (!sce->sf_idx[i])
1109             sce->sf_idx[i] = sce->sf_idx[i-1];
1110         else
1111             minq = FFMIN(minq, sce->sf_idx[i]);
1112     }
1113     if (minq == INT_MAX)
1114         minq = 0;
1115     minq = FFMIN(minq, SCALE_MAX_POS);
1116     maxsf = FFMIN(minq + SCALE_MAX_DIFF, SCALE_MAX_POS);
1117     for (i = 126; i >= 0; i--) {
1118         if (!sce->sf_idx[i])
1119             sce->sf_idx[i] = sce->sf_idx[i+1];
1120         sce->sf_idx[i] = av_clip(sce->sf_idx[i], minq, maxsf);
1121     }
1122 }
1123
1124 static void search_for_quantizers_fast(AVCodecContext *avctx, AACEncContext *s,
1125                                        SingleChannelElement *sce,
1126                                        const float lambda)
1127 {
1128     int i, w, w2, g;
1129     int minq = 255;
1130
1131     memset(sce->sf_idx, 0, sizeof(sce->sf_idx));
1132     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
1133         for (g = 0; g < sce->ics.num_swb; g++) {
1134             for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
1135                 FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
1136                 if (band->energy <= band->threshold) {
1137                     sce->sf_idx[(w+w2)*16+g] = 218;
1138                     sce->zeroes[(w+w2)*16+g] = 1;
1139                 } else {
1140                     sce->sf_idx[(w+w2)*16+g] = av_clip(SCALE_ONE_POS - SCALE_DIV_512 + log2f(band->threshold), 80, 218);
1141                     sce->zeroes[(w+w2)*16+g] = 0;
1142                 }
1143                 minq = FFMIN(minq, sce->sf_idx[(w+w2)*16+g]);
1144             }
1145         }
1146     }
1147     for (i = 0; i < 128; i++) {
1148         sce->sf_idx[i] = 140;
1149         //av_clip(sce->sf_idx[i], minq, minq + SCALE_MAX_DIFF - 1);
1150     }
1151     //set the same quantizers inside window groups
1152     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w])
1153         for (g = 0;  g < sce->ics.num_swb; g++)
1154             for (w2 = 1; w2 < sce->ics.group_len[w]; w2++)
1155                 sce->sf_idx[(w+w2)*16+g] = sce->sf_idx[w*16+g];
1156 }
1157
1158 static void search_for_ms(AACEncContext *s, ChannelElement *cpe,
1159                           const float lambda)
1160 {
1161     int start = 0, i, w, w2, g;
1162     float M[128], S[128];
1163     float *L34 = s->scoefs, *R34 = s->scoefs + 128, *M34 = s->scoefs + 128*2, *S34 = s->scoefs + 128*3;
1164     SingleChannelElement *sce0 = &cpe->ch[0];
1165     SingleChannelElement *sce1 = &cpe->ch[1];
1166     if (!cpe->common_window)
1167         return;
1168     for (w = 0; w < sce0->ics.num_windows; w += sce0->ics.group_len[w]) {
1169         for (g = 0;  g < sce0->ics.num_swb; g++) {
1170             if (!cpe->ch[0].zeroes[w*16+g] && !cpe->ch[1].zeroes[w*16+g]) {
1171                 float dist1 = 0.0f, dist2 = 0.0f;
1172                 for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) {
1173                     FFPsyBand *band0 = &s->psy.ch[s->cur_channel+0].psy_bands[(w+w2)*16+g];
1174                     FFPsyBand *band1 = &s->psy.ch[s->cur_channel+1].psy_bands[(w+w2)*16+g];
1175                     float minthr = FFMIN(band0->threshold, band1->threshold);
1176                     float maxthr = FFMAX(band0->threshold, band1->threshold);
1177                     for (i = 0; i < sce0->ics.swb_sizes[g]; i++) {
1178                         M[i] = (sce0->pcoeffs[start+w2*128+i]
1179                               + sce1->pcoeffs[start+w2*128+i]) * 0.5;
1180                         S[i] =  M[i]
1181                               - sce1->pcoeffs[start+w2*128+i];
1182                     }
1183                     abs_pow34_v(L34, sce0->coeffs+start+w2*128, sce0->ics.swb_sizes[g]);
1184                     abs_pow34_v(R34, sce1->coeffs+start+w2*128, sce0->ics.swb_sizes[g]);
1185                     abs_pow34_v(M34, M,                         sce0->ics.swb_sizes[g]);
1186                     abs_pow34_v(S34, S,                         sce0->ics.swb_sizes[g]);
1187                     dist1 += quantize_band_cost(s, sce0->coeffs + start + w2*128,
1188                                                 L34,
1189                                                 sce0->ics.swb_sizes[g],
1190                                                 sce0->sf_idx[(w+w2)*16+g],
1191                                                 sce0->band_type[(w+w2)*16+g],
1192                                                 lambda / band0->threshold, INFINITY, NULL);
1193                     dist1 += quantize_band_cost(s, sce1->coeffs + start + w2*128,
1194                                                 R34,
1195                                                 sce1->ics.swb_sizes[g],
1196                                                 sce1->sf_idx[(w+w2)*16+g],
1197                                                 sce1->band_type[(w+w2)*16+g],
1198                                                 lambda / band1->threshold, INFINITY, NULL);
1199                     dist2 += quantize_band_cost(s, M,
1200                                                 M34,
1201                                                 sce0->ics.swb_sizes[g],
1202                                                 sce0->sf_idx[(w+w2)*16+g],
1203                                                 sce0->band_type[(w+w2)*16+g],
1204                                                 lambda / maxthr, INFINITY, NULL);
1205                     dist2 += quantize_band_cost(s, S,
1206                                                 S34,
1207                                                 sce1->ics.swb_sizes[g],
1208                                                 sce1->sf_idx[(w+w2)*16+g],
1209                                                 sce1->band_type[(w+w2)*16+g],
1210                                                 lambda / minthr, INFINITY, NULL);
1211                 }
1212                 cpe->ms_mask[w*16+g] = dist2 < dist1;
1213             }
1214             start += sce0->ics.swb_sizes[g];
1215         }
1216     }
1217 }
1218
1219 AACCoefficientsEncoder ff_aac_coders[AAC_CODER_NB] = {
1220     [AAC_CODER_FAAC] = {
1221         search_for_quantizers_faac,
1222         encode_window_bands_info,
1223         quantize_and_encode_band,
1224         search_for_ms,
1225     },
1226     [AAC_CODER_ANMR] = {
1227         search_for_quantizers_anmr,
1228         encode_window_bands_info,
1229         quantize_and_encode_band,
1230         search_for_ms,
1231     },
1232     [AAC_CODER_TWOLOOP] = {
1233         search_for_quantizers_twoloop,
1234         codebook_trellis_rate,
1235         quantize_and_encode_band,
1236         search_for_ms,
1237     },
1238     [AAC_CODER_FAST] = {
1239         search_for_quantizers_fast,
1240         encode_window_bands_info,
1241         quantize_and_encode_band,
1242         search_for_ms,
1243     },
1244 };