]> git.sesse.net Git - ffmpeg/blob - libavcodec/aaccoder.c
aaccoder: respect cutoff when marking bands as PNS
[ffmpeg] / libavcodec / aaccoder.c
1 /*
2  * AAC coefficients encoder
3  * Copyright (C) 2008-2009 Konstantin Shishkov
4  *
5  * This file is part of FFmpeg.
6  *
7  * FFmpeg is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * FFmpeg is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with FFmpeg; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21
22 /**
23  * @file
24  * AAC coefficients encoder
25  */
26
27 /***********************************
28  *              TODOs:
29  * speedup quantizer selection
30  * add sane pulse detection
31  ***********************************/
32
33 #include "libavutil/libm.h" // brought forward to work around cygwin header breakage
34
35 #include <float.h>
36 #include "libavutil/mathematics.h"
37 #include "avcodec.h"
38 #include "put_bits.h"
39 #include "aac.h"
40 #include "aacenc.h"
41 #include "aactab.h"
42 #include "aacenctab.h"
43 #include "aacenc_utils.h"
44 #include "aacenc_quantization.h"
45 #include "aac_tablegen_decl.h"
46
47 #include "aacenc_is.h"
48 #include "aacenc_tns.h"
49 #include "aacenc_pred.h"
50
51 /** Frequency in Hz for lower limit of noise substitution **/
52 #define NOISE_LOW_LIMIT 4000
53
54 /* Parameter of f(x) = a*(lambda/100), defines the maximum fourier spread
55  * beyond which no PNS is used (since the SFBs contain tone rather than noise) */
56 #define NOISE_SPREAD_THRESHOLD 0.5073f
57
58 /* Parameter of f(x) = a*(100/lambda), defines how much PNS is allowed to
59  * replace low energy non zero bands */
60 #define NOISE_LAMBDA_REPLACE 1.948f
61
62 /**
63  * structure used in optimal codebook search
64  */
65 typedef struct BandCodingPath {
66     int prev_idx; ///< pointer to the previous path point
67     float cost;   ///< path cost
68     int run;
69 } BandCodingPath;
70
71 /**
72  * Encode band info for single window group bands.
73  */
74 static void encode_window_bands_info(AACEncContext *s, SingleChannelElement *sce,
75                                      int win, int group_len, const float lambda)
76 {
77     BandCodingPath path[120][CB_TOT_ALL];
78     int w, swb, cb, start, size;
79     int i, j;
80     const int max_sfb  = sce->ics.max_sfb;
81     const int run_bits = sce->ics.num_windows == 1 ? 5 : 3;
82     const int run_esc  = (1 << run_bits) - 1;
83     int idx, ppos, count;
84     int stackrun[120], stackcb[120], stack_len;
85     float next_minrd = INFINITY;
86     int next_mincb = 0;
87
88     abs_pow34_v(s->scoefs, sce->coeffs, 1024);
89     start = win*128;
90     for (cb = 0; cb < CB_TOT_ALL; cb++) {
91         path[0][cb].cost     = 0.0f;
92         path[0][cb].prev_idx = -1;
93         path[0][cb].run      = 0;
94     }
95     for (swb = 0; swb < max_sfb; swb++) {
96         size = sce->ics.swb_sizes[swb];
97         if (sce->zeroes[win*16 + swb]) {
98             for (cb = 0; cb < CB_TOT_ALL; cb++) {
99                 path[swb+1][cb].prev_idx = cb;
100                 path[swb+1][cb].cost     = path[swb][cb].cost;
101                 path[swb+1][cb].run      = path[swb][cb].run + 1;
102             }
103         } else {
104             float minrd = next_minrd;
105             int mincb = next_mincb;
106             next_minrd = INFINITY;
107             next_mincb = 0;
108             for (cb = 0; cb < CB_TOT_ALL; cb++) {
109                 float cost_stay_here, cost_get_here;
110                 float rd = 0.0f;
111                 if (cb >= 12 && sce->band_type[win*16+swb] < aac_cb_out_map[cb] ||
112                     cb  < aac_cb_in_map[sce->band_type[win*16+swb]] && sce->band_type[win*16+swb] > aac_cb_out_map[cb]) {
113                     path[swb+1][cb].prev_idx = -1;
114                     path[swb+1][cb].cost     = INFINITY;
115                     path[swb+1][cb].run      = path[swb][cb].run + 1;
116                     continue;
117                 }
118                 for (w = 0; w < group_len; w++) {
119                     FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(win+w)*16+swb];
120                     rd += quantize_band_cost(s, &sce->coeffs[start + w*128],
121                                              &s->scoefs[start + w*128], size,
122                                              sce->sf_idx[(win+w)*16+swb], aac_cb_out_map[cb],
123                                              lambda / band->threshold, INFINITY, NULL, 0);
124                 }
125                 cost_stay_here = path[swb][cb].cost + rd;
126                 cost_get_here  = minrd              + rd + run_bits + 4;
127                 if (   run_value_bits[sce->ics.num_windows == 8][path[swb][cb].run]
128                     != run_value_bits[sce->ics.num_windows == 8][path[swb][cb].run+1])
129                     cost_stay_here += run_bits;
130                 if (cost_get_here < cost_stay_here) {
131                     path[swb+1][cb].prev_idx = mincb;
132                     path[swb+1][cb].cost     = cost_get_here;
133                     path[swb+1][cb].run      = 1;
134                 } else {
135                     path[swb+1][cb].prev_idx = cb;
136                     path[swb+1][cb].cost     = cost_stay_here;
137                     path[swb+1][cb].run      = path[swb][cb].run + 1;
138                 }
139                 if (path[swb+1][cb].cost < next_minrd) {
140                     next_minrd = path[swb+1][cb].cost;
141                     next_mincb = cb;
142                 }
143             }
144         }
145         start += sce->ics.swb_sizes[swb];
146     }
147
148     //convert resulting path from backward-linked list
149     stack_len = 0;
150     idx       = 0;
151     for (cb = 1; cb < CB_TOT_ALL; cb++)
152         if (path[max_sfb][cb].cost < path[max_sfb][idx].cost)
153             idx = cb;
154     ppos = max_sfb;
155     while (ppos > 0) {
156         av_assert1(idx >= 0);
157         cb = idx;
158         stackrun[stack_len] = path[ppos][cb].run;
159         stackcb [stack_len] = cb;
160         idx = path[ppos-path[ppos][cb].run+1][cb].prev_idx;
161         ppos -= path[ppos][cb].run;
162         stack_len++;
163     }
164     //perform actual band info encoding
165     start = 0;
166     for (i = stack_len - 1; i >= 0; i--) {
167         cb = aac_cb_out_map[stackcb[i]];
168         put_bits(&s->pb, 4, cb);
169         count = stackrun[i];
170         memset(sce->zeroes + win*16 + start, !cb, count);
171         //XXX: memset when band_type is also uint8_t
172         for (j = 0; j < count; j++) {
173             sce->band_type[win*16 + start] = cb;
174             start++;
175         }
176         while (count >= run_esc) {
177             put_bits(&s->pb, run_bits, run_esc);
178             count -= run_esc;
179         }
180         put_bits(&s->pb, run_bits, count);
181     }
182 }
183
184 static void codebook_trellis_rate(AACEncContext *s, SingleChannelElement *sce,
185                                   int win, int group_len, const float lambda)
186 {
187     BandCodingPath path[120][CB_TOT_ALL];
188     int w, swb, cb, start, size;
189     int i, j;
190     const int max_sfb  = sce->ics.max_sfb;
191     const int run_bits = sce->ics.num_windows == 1 ? 5 : 3;
192     const int run_esc  = (1 << run_bits) - 1;
193     int idx, ppos, count;
194     int stackrun[120], stackcb[120], stack_len;
195     float next_minbits = INFINITY;
196     int next_mincb = 0;
197
198     abs_pow34_v(s->scoefs, sce->coeffs, 1024);
199     start = win*128;
200     for (cb = 0; cb < CB_TOT_ALL; cb++) {
201         path[0][cb].cost     = run_bits+4;
202         path[0][cb].prev_idx = -1;
203         path[0][cb].run      = 0;
204     }
205     for (swb = 0; swb < max_sfb; swb++) {
206         size = sce->ics.swb_sizes[swb];
207         if (sce->zeroes[win*16 + swb]) {
208             float cost_stay_here = path[swb][0].cost;
209             float cost_get_here  = next_minbits + run_bits + 4;
210             if (   run_value_bits[sce->ics.num_windows == 8][path[swb][0].run]
211                 != run_value_bits[sce->ics.num_windows == 8][path[swb][0].run+1])
212                 cost_stay_here += run_bits;
213             if (cost_get_here < cost_stay_here) {
214                 path[swb+1][0].prev_idx = next_mincb;
215                 path[swb+1][0].cost     = cost_get_here;
216                 path[swb+1][0].run      = 1;
217             } else {
218                 path[swb+1][0].prev_idx = 0;
219                 path[swb+1][0].cost     = cost_stay_here;
220                 path[swb+1][0].run      = path[swb][0].run + 1;
221             }
222             next_minbits = path[swb+1][0].cost;
223             next_mincb = 0;
224             for (cb = 1; cb < CB_TOT_ALL; cb++) {
225                 path[swb+1][cb].cost = 61450;
226                 path[swb+1][cb].prev_idx = -1;
227                 path[swb+1][cb].run = 0;
228             }
229         } else {
230             float minbits = next_minbits;
231             int mincb = next_mincb;
232             int startcb = sce->band_type[win*16+swb];
233             startcb = aac_cb_in_map[startcb];
234             next_minbits = INFINITY;
235             next_mincb = 0;
236             for (cb = 0; cb < startcb; cb++) {
237                 path[swb+1][cb].cost = 61450;
238                 path[swb+1][cb].prev_idx = -1;
239                 path[swb+1][cb].run = 0;
240             }
241             for (cb = startcb; cb < CB_TOT_ALL; cb++) {
242                 float cost_stay_here, cost_get_here;
243                 float bits = 0.0f;
244                 if (cb >= 12 && sce->band_type[win*16+swb] != aac_cb_out_map[cb]) {
245                     path[swb+1][cb].cost = 61450;
246                     path[swb+1][cb].prev_idx = -1;
247                     path[swb+1][cb].run = 0;
248                     continue;
249                 }
250                 for (w = 0; w < group_len; w++) {
251                     bits += quantize_band_cost(s, &sce->coeffs[start + w*128],
252                                                &s->scoefs[start + w*128], size,
253                                                sce->sf_idx[win*16+swb],
254                                                aac_cb_out_map[cb],
255                                                0, INFINITY, NULL, 0);
256                 }
257                 cost_stay_here = path[swb][cb].cost + bits;
258                 cost_get_here  = minbits            + bits + run_bits + 4;
259                 if (   run_value_bits[sce->ics.num_windows == 8][path[swb][cb].run]
260                     != run_value_bits[sce->ics.num_windows == 8][path[swb][cb].run+1])
261                     cost_stay_here += run_bits;
262                 if (cost_get_here < cost_stay_here) {
263                     path[swb+1][cb].prev_idx = mincb;
264                     path[swb+1][cb].cost     = cost_get_here;
265                     path[swb+1][cb].run      = 1;
266                 } else {
267                     path[swb+1][cb].prev_idx = cb;
268                     path[swb+1][cb].cost     = cost_stay_here;
269                     path[swb+1][cb].run      = path[swb][cb].run + 1;
270                 }
271                 if (path[swb+1][cb].cost < next_minbits) {
272                     next_minbits = path[swb+1][cb].cost;
273                     next_mincb = cb;
274                 }
275             }
276         }
277         start += sce->ics.swb_sizes[swb];
278     }
279
280     //convert resulting path from backward-linked list
281     stack_len = 0;
282     idx       = 0;
283     for (cb = 1; cb < CB_TOT_ALL; cb++)
284         if (path[max_sfb][cb].cost < path[max_sfb][idx].cost)
285             idx = cb;
286     ppos = max_sfb;
287     while (ppos > 0) {
288         av_assert1(idx >= 0);
289         cb = idx;
290         stackrun[stack_len] = path[ppos][cb].run;
291         stackcb [stack_len] = cb;
292         idx = path[ppos-path[ppos][cb].run+1][cb].prev_idx;
293         ppos -= path[ppos][cb].run;
294         stack_len++;
295     }
296     //perform actual band info encoding
297     start = 0;
298     for (i = stack_len - 1; i >= 0; i--) {
299         cb = aac_cb_out_map[stackcb[i]];
300         put_bits(&s->pb, 4, cb);
301         count = stackrun[i];
302         memset(sce->zeroes + win*16 + start, !cb, count);
303         //XXX: memset when band_type is also uint8_t
304         for (j = 0; j < count; j++) {
305             sce->band_type[win*16 + start] = cb;
306             start++;
307         }
308         while (count >= run_esc) {
309             put_bits(&s->pb, run_bits, run_esc);
310             count -= run_esc;
311         }
312         put_bits(&s->pb, run_bits, count);
313     }
314 }
315
316 typedef struct TrellisPath {
317     float cost;
318     int prev;
319 } TrellisPath;
320
321 #define TRELLIS_STAGES 121
322 #define TRELLIS_STATES (SCALE_MAX_DIFF+1)
323
324 static void set_special_band_scalefactors(AACEncContext *s, SingleChannelElement *sce)
325 {
326     int w, g, start = 0;
327     int minscaler_n = sce->sf_idx[0], minscaler_i = sce->sf_idx[0];
328     int bands = 0;
329
330     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
331         start = 0;
332         for (g = 0;  g < sce->ics.num_swb; g++) {
333             if (sce->band_type[w*16+g] == INTENSITY_BT || sce->band_type[w*16+g] == INTENSITY_BT2) {
334                 sce->sf_idx[w*16+g] = av_clip(roundf(log2f(sce->is_ener[w*16+g])*2), -155, 100);
335                 minscaler_i = FFMIN(minscaler_i, sce->sf_idx[w*16+g]);
336                 bands++;
337             } else if (sce->band_type[w*16+g] == NOISE_BT) {
338                 sce->sf_idx[w*16+g] = av_clip(3+ceilf(log2f(sce->pns_ener[w*16+g])*2), -100, 155);
339                 minscaler_n = FFMIN(minscaler_n, sce->sf_idx[w*16+g]);
340                 bands++;
341             }
342             start += sce->ics.swb_sizes[g];
343         }
344     }
345
346     if (!bands)
347         return;
348
349     /* Clip the scalefactor indices */
350     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
351         for (g = 0;  g < sce->ics.num_swb; g++) {
352             if (sce->band_type[w*16+g] == INTENSITY_BT || sce->band_type[w*16+g] == INTENSITY_BT2) {
353                 sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], minscaler_i, minscaler_i + SCALE_MAX_DIFF);
354             } else if (sce->band_type[w*16+g] == NOISE_BT) {
355                 sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], minscaler_n, minscaler_n + SCALE_MAX_DIFF);
356             }
357         }
358     }
359 }
360
361 static void search_for_quantizers_anmr(AVCodecContext *avctx, AACEncContext *s,
362                                        SingleChannelElement *sce,
363                                        const float lambda)
364 {
365     int q, w, w2, g, start = 0;
366     int i, j;
367     int idx;
368     TrellisPath paths[TRELLIS_STAGES][TRELLIS_STATES];
369     int bandaddr[TRELLIS_STAGES];
370     int minq;
371     float mincost;
372     float q0f = FLT_MAX, q1f = 0.0f, qnrgf = 0.0f;
373     int q0, q1, qcnt = 0;
374
375     for (i = 0; i < 1024; i++) {
376         float t = fabsf(sce->coeffs[i]);
377         if (t > 0.0f) {
378             q0f = FFMIN(q0f, t);
379             q1f = FFMAX(q1f, t);
380             qnrgf += t*t;
381             qcnt++;
382         }
383     }
384
385     if (!qcnt) {
386         memset(sce->sf_idx, 0, sizeof(sce->sf_idx));
387         memset(sce->zeroes, 1, sizeof(sce->zeroes));
388         return;
389     }
390
391     //minimum scalefactor index is when minimum nonzero coefficient after quantizing is not clipped
392     q0 = coef2minsf(q0f);
393     //maximum scalefactor index is when maximum coefficient after quantizing is still not zero
394     q1 = coef2maxsf(q1f);
395     if (q1 - q0 > 60) {
396         int q0low  = q0;
397         int q1high = q1;
398         //minimum scalefactor index is when maximum nonzero coefficient after quantizing is not clipped
399         int qnrg = av_clip_uint8(log2f(sqrtf(qnrgf/qcnt))*4 - 31 + SCALE_ONE_POS - SCALE_DIV_512);
400         q1 = qnrg + 30;
401         q0 = qnrg - 30;
402         if (q0 < q0low) {
403             q1 += q0low - q0;
404             q0  = q0low;
405         } else if (q1 > q1high) {
406             q0 -= q1 - q1high;
407             q1  = q1high;
408         }
409     }
410
411     for (i = 0; i < TRELLIS_STATES; i++) {
412         paths[0][i].cost    = 0.0f;
413         paths[0][i].prev    = -1;
414     }
415     for (j = 1; j < TRELLIS_STAGES; j++) {
416         for (i = 0; i < TRELLIS_STATES; i++) {
417             paths[j][i].cost    = INFINITY;
418             paths[j][i].prev    = -2;
419         }
420     }
421     idx = 1;
422     abs_pow34_v(s->scoefs, sce->coeffs, 1024);
423     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
424         start = w*128;
425         for (g = 0; g < sce->ics.num_swb; g++) {
426             const float *coefs = &sce->coeffs[start];
427             float qmin, qmax;
428             int nz = 0;
429
430             bandaddr[idx] = w * 16 + g;
431             qmin = INT_MAX;
432             qmax = 0.0f;
433             for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
434                 FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
435                 if (band->energy <= band->threshold || band->threshold == 0.0f) {
436                     sce->zeroes[(w+w2)*16+g] = 1;
437                     continue;
438                 }
439                 sce->zeroes[(w+w2)*16+g] = 0;
440                 nz = 1;
441                 for (i = 0; i < sce->ics.swb_sizes[g]; i++) {
442                     float t = fabsf(coefs[w2*128+i]);
443                     if (t > 0.0f)
444                         qmin = FFMIN(qmin, t);
445                     qmax = FFMAX(qmax, t);
446                 }
447             }
448             if (nz) {
449                 int minscale, maxscale;
450                 float minrd = INFINITY;
451                 float maxval;
452                 //minimum scalefactor index is when minimum nonzero coefficient after quantizing is not clipped
453                 minscale = coef2minsf(qmin);
454                 //maximum scalefactor index is when maximum coefficient after quantizing is still not zero
455                 maxscale = coef2maxsf(qmax);
456                 minscale = av_clip(minscale - q0, 0, TRELLIS_STATES - 1);
457                 maxscale = av_clip(maxscale - q0, 0, TRELLIS_STATES);
458                 maxval = find_max_val(sce->ics.group_len[w], sce->ics.swb_sizes[g], s->scoefs+start);
459                 for (q = minscale; q < maxscale; q++) {
460                     float dist = 0;
461                     int cb = find_min_book(maxval, sce->sf_idx[w*16+g]);
462                     for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
463                         FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
464                         dist += quantize_band_cost(s, coefs + w2*128, s->scoefs + start + w2*128, sce->ics.swb_sizes[g],
465                                                    q + q0, cb, lambda / band->threshold, INFINITY, NULL, 0);
466                     }
467                     minrd = FFMIN(minrd, dist);
468
469                     for (i = 0; i < q1 - q0; i++) {
470                         float cost;
471                         cost = paths[idx - 1][i].cost + dist
472                                + ff_aac_scalefactor_bits[q - i + SCALE_DIFF_ZERO];
473                         if (cost < paths[idx][q].cost) {
474                             paths[idx][q].cost    = cost;
475                             paths[idx][q].prev    = i;
476                         }
477                     }
478                 }
479             } else {
480                 for (q = 0; q < q1 - q0; q++) {
481                     paths[idx][q].cost = paths[idx - 1][q].cost + 1;
482                     paths[idx][q].prev = q;
483                 }
484             }
485             sce->zeroes[w*16+g] = !nz;
486             start += sce->ics.swb_sizes[g];
487             idx++;
488         }
489     }
490     idx--;
491     mincost = paths[idx][0].cost;
492     minq    = 0;
493     for (i = 1; i < TRELLIS_STATES; i++) {
494         if (paths[idx][i].cost < mincost) {
495             mincost = paths[idx][i].cost;
496             minq = i;
497         }
498     }
499     while (idx) {
500         sce->sf_idx[bandaddr[idx]] = minq + q0;
501         minq = paths[idx][minq].prev;
502         idx--;
503     }
504     //set the same quantizers inside window groups
505     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w])
506         for (g = 0;  g < sce->ics.num_swb; g++)
507             for (w2 = 1; w2 < sce->ics.group_len[w]; w2++)
508                 sce->sf_idx[(w+w2)*16+g] = sce->sf_idx[w*16+g];
509 }
510
511 /**
512  * two-loop quantizers search taken from ISO 13818-7 Appendix C
513  */
514 static void search_for_quantizers_twoloop(AVCodecContext *avctx,
515                                           AACEncContext *s,
516                                           SingleChannelElement *sce,
517                                           const float lambda)
518 {
519     int start = 0, i, w, w2, g;
520     int destbits = avctx->bit_rate * 1024.0 / avctx->sample_rate / avctx->channels * (lambda / 120.f);
521     float dists[128] = { 0 }, uplims[128] = { 0 };
522     float maxvals[128];
523     int fflag, minscaler;
524     int its  = 0;
525     int allz = 0;
526     float minthr = INFINITY;
527
528     // for values above this the decoder might end up in an endless loop
529     // due to always having more bits than what can be encoded.
530     destbits = FFMIN(destbits, 5800);
531     //XXX: some heuristic to determine initial quantizers will reduce search time
532     //determine zero bands and upper limits
533     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
534         for (g = 0;  g < sce->ics.num_swb; g++) {
535             int nz = 0;
536             float uplim = 0.0f, energy = 0.0f;
537             for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
538                 FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
539                 uplim  += band->threshold;
540                 energy += band->energy;
541                 if (band->energy <= band->threshold || band->threshold == 0.0f) {
542                     sce->zeroes[(w+w2)*16+g] = 1;
543                     continue;
544                 }
545                 nz = 1;
546             }
547             uplims[w*16+g] = uplim *512;
548             sce->zeroes[w*16+g] = !nz;
549             if (nz)
550                 minthr = FFMIN(minthr, uplim);
551             allz |= nz;
552         }
553     }
554     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
555         for (g = 0;  g < sce->ics.num_swb; g++) {
556             if (sce->zeroes[w*16+g]) {
557                 sce->sf_idx[w*16+g] = SCALE_ONE_POS;
558                 continue;
559             }
560             sce->sf_idx[w*16+g] = SCALE_ONE_POS + FFMIN(log2f(uplims[w*16+g]/minthr)*4,59);
561         }
562     }
563
564     if (!allz)
565         return;
566     abs_pow34_v(s->scoefs, sce->coeffs, 1024);
567
568     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
569         start = w*128;
570         for (g = 0;  g < sce->ics.num_swb; g++) {
571             const float *scaled = s->scoefs + start;
572             maxvals[w*16+g] = find_max_val(sce->ics.group_len[w], sce->ics.swb_sizes[g], scaled);
573             start += sce->ics.swb_sizes[g];
574         }
575     }
576
577     //perform two-loop search
578     //outer loop - improve quality
579     do {
580         int tbits, qstep;
581         minscaler = sce->sf_idx[0];
582         //inner loop - quantize spectrum to fit into given number of bits
583         qstep = its ? 1 : 32;
584         do {
585             int prev = -1;
586             tbits = 0;
587             for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
588                 start = w*128;
589                 for (g = 0;  g < sce->ics.num_swb; g++) {
590                     const float *coefs = &sce->coeffs[start];
591                     const float *scaled = &s->scoefs[start];
592                     int bits = 0;
593                     int cb;
594                     float dist = 0.0f;
595
596                     if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >= 218) {
597                         start += sce->ics.swb_sizes[g];
598                         continue;
599                     }
600                     minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]);
601                     cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
602                     for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
603                         int b;
604                         dist += quantize_band_cost(s, coefs + w2*128,
605                                                    scaled + w2*128,
606                                                    sce->ics.swb_sizes[g],
607                                                    sce->sf_idx[w*16+g],
608                                                    cb,
609                                                    1.0f,
610                                                    INFINITY,
611                                                    &b,
612                                                    0);
613                         bits += b;
614                     }
615                     dists[w*16+g] = dist - bits;
616                     if (prev != -1) {
617                         bits += ff_aac_scalefactor_bits[sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO];
618                     }
619                     tbits += bits;
620                     start += sce->ics.swb_sizes[g];
621                     prev = sce->sf_idx[w*16+g];
622                 }
623             }
624             if (tbits > destbits) {
625                 for (i = 0; i < 128; i++)
626                     if (sce->sf_idx[i] < 218 - qstep)
627                         sce->sf_idx[i] += qstep;
628             } else {
629                 for (i = 0; i < 128; i++)
630                     if (sce->sf_idx[i] > 60 - qstep)
631                         sce->sf_idx[i] -= qstep;
632             }
633             qstep >>= 1;
634             if (!qstep && tbits > destbits*1.02 && sce->sf_idx[0] < 217)
635                 qstep = 1;
636         } while (qstep);
637
638         fflag = 0;
639         minscaler = av_clip(minscaler, 60, 255 - SCALE_MAX_DIFF);
640
641         for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
642             for (g = 0; g < sce->ics.num_swb; g++) {
643                 int prevsc = sce->sf_idx[w*16+g];
644                 if (dists[w*16+g] > uplims[w*16+g] && sce->sf_idx[w*16+g] > 60) {
645                     if (find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]-1))
646                         sce->sf_idx[w*16+g]--;
647                     else //Try to make sure there is some energy in every band
648                         sce->sf_idx[w*16+g]-=2;
649                 }
650                 sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], minscaler, minscaler + SCALE_MAX_DIFF);
651                 sce->sf_idx[w*16+g] = FFMIN(sce->sf_idx[w*16+g], 219);
652                 if (sce->sf_idx[w*16+g] != prevsc)
653                     fflag = 1;
654                 sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
655             }
656         }
657         its++;
658     } while (fflag && its < 10);
659 }
660
661 static void search_for_quantizers_faac(AVCodecContext *avctx, AACEncContext *s,
662                                        SingleChannelElement *sce,
663                                        const float lambda)
664 {
665     int start = 0, i, w, w2, g;
666     float uplim[128], maxq[128];
667     int minq, maxsf;
668     float distfact = ((sce->ics.num_windows > 1) ? 85.80 : 147.84) / lambda;
669     int last = 0, lastband = 0, curband = 0;
670     float avg_energy = 0.0;
671     if (sce->ics.num_windows == 1) {
672         start = 0;
673         for (i = 0; i < 1024; i++) {
674             if (i - start >= sce->ics.swb_sizes[curband]) {
675                 start += sce->ics.swb_sizes[curband];
676                 curband++;
677             }
678             if (sce->coeffs[i]) {
679                 avg_energy += sce->coeffs[i] * sce->coeffs[i];
680                 last = i;
681                 lastband = curband;
682             }
683         }
684     } else {
685         for (w = 0; w < 8; w++) {
686             const float *coeffs = &sce->coeffs[w*128];
687             curband = start = 0;
688             for (i = 0; i < 128; i++) {
689                 if (i - start >= sce->ics.swb_sizes[curband]) {
690                     start += sce->ics.swb_sizes[curband];
691                     curband++;
692                 }
693                 if (coeffs[i]) {
694                     avg_energy += coeffs[i] * coeffs[i];
695                     last = FFMAX(last, i);
696                     lastband = FFMAX(lastband, curband);
697                 }
698             }
699         }
700     }
701     last++;
702     avg_energy /= last;
703     if (avg_energy == 0.0f) {
704         for (i = 0; i < FF_ARRAY_ELEMS(sce->sf_idx); i++)
705             sce->sf_idx[i] = SCALE_ONE_POS;
706         return;
707     }
708     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
709         start = w*128;
710         for (g = 0; g < sce->ics.num_swb; g++) {
711             float *coefs   = &sce->coeffs[start];
712             const int size = sce->ics.swb_sizes[g];
713             int start2 = start, end2 = start + size, peakpos = start;
714             float maxval = -1, thr = 0.0f, t;
715             maxq[w*16+g] = 0.0f;
716             if (g > lastband) {
717                 maxq[w*16+g] = 0.0f;
718                 start += size;
719                 for (w2 = 0; w2 < sce->ics.group_len[w]; w2++)
720                     memset(coefs + w2*128, 0, sizeof(coefs[0])*size);
721                 continue;
722             }
723             for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
724                 for (i = 0; i < size; i++) {
725                     float t = coefs[w2*128+i]*coefs[w2*128+i];
726                     maxq[w*16+g] = FFMAX(maxq[w*16+g], fabsf(coefs[w2*128 + i]));
727                     thr += t;
728                     if (sce->ics.num_windows == 1 && maxval < t) {
729                         maxval  = t;
730                         peakpos = start+i;
731                     }
732                 }
733             }
734             if (sce->ics.num_windows == 1) {
735                 start2 = FFMAX(peakpos - 2, start2);
736                 end2   = FFMIN(peakpos + 3, end2);
737             } else {
738                 start2 -= start;
739                 end2   -= start;
740             }
741             start += size;
742             thr = pow(thr / (avg_energy * (end2 - start2)), 0.3 + 0.1*(lastband - g) / lastband);
743             t   = 1.0 - (1.0 * start2 / last);
744             uplim[w*16+g] = distfact / (1.4 * thr + t*t*t + 0.075);
745         }
746     }
747     memset(sce->sf_idx, 0, sizeof(sce->sf_idx));
748     abs_pow34_v(s->scoefs, sce->coeffs, 1024);
749     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
750         start = w*128;
751         for (g = 0;  g < sce->ics.num_swb; g++) {
752             const float *coefs  = &sce->coeffs[start];
753             const float *scaled = &s->scoefs[start];
754             const int size      = sce->ics.swb_sizes[g];
755             int scf, prev_scf, step;
756             int min_scf = -1, max_scf = 256;
757             float curdiff;
758             if (maxq[w*16+g] < 21.544) {
759                 sce->zeroes[w*16+g] = 1;
760                 start += size;
761                 continue;
762             }
763             sce->zeroes[w*16+g] = 0;
764             scf  = prev_scf = av_clip(SCALE_ONE_POS - SCALE_DIV_512 - log2f(1/maxq[w*16+g])*16/3, 60, 218);
765             for (;;) {
766                 float dist = 0.0f;
767                 int quant_max;
768
769                 for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
770                     int b;
771                     dist += quantize_band_cost(s, coefs + w2*128,
772                                                scaled + w2*128,
773                                                sce->ics.swb_sizes[g],
774                                                scf,
775                                                ESC_BT,
776                                                lambda,
777                                                INFINITY,
778                                                &b,
779                                                0);
780                     dist -= b;
781                 }
782                 dist *= 1.0f / 512.0f / lambda;
783                 quant_max = quant(maxq[w*16+g], ff_aac_pow2sf_tab[POW_SF2_ZERO - scf + SCALE_ONE_POS - SCALE_DIV_512], ROUND_STANDARD);
784                 if (quant_max >= 8191) { // too much, return to the previous quantizer
785                     sce->sf_idx[w*16+g] = prev_scf;
786                     break;
787                 }
788                 prev_scf = scf;
789                 curdiff = fabsf(dist - uplim[w*16+g]);
790                 if (curdiff <= 1.0f)
791                     step = 0;
792                 else
793                     step = log2f(curdiff);
794                 if (dist > uplim[w*16+g])
795                     step = -step;
796                 scf += step;
797                 scf = av_clip_uint8(scf);
798                 step = scf - prev_scf;
799                 if (FFABS(step) <= 1 || (step > 0 && scf >= max_scf) || (step < 0 && scf <= min_scf)) {
800                     sce->sf_idx[w*16+g] = av_clip(scf, min_scf, max_scf);
801                     break;
802                 }
803                 if (step > 0)
804                     min_scf = prev_scf;
805                 else
806                     max_scf = prev_scf;
807             }
808             start += size;
809         }
810     }
811     minq = sce->sf_idx[0] ? sce->sf_idx[0] : INT_MAX;
812     for (i = 1; i < 128; i++) {
813         if (!sce->sf_idx[i])
814             sce->sf_idx[i] = sce->sf_idx[i-1];
815         else
816             minq = FFMIN(minq, sce->sf_idx[i]);
817     }
818     if (minq == INT_MAX)
819         minq = 0;
820     minq = FFMIN(minq, SCALE_MAX_POS);
821     maxsf = FFMIN(minq + SCALE_MAX_DIFF, SCALE_MAX_POS);
822     for (i = 126; i >= 0; i--) {
823         if (!sce->sf_idx[i])
824             sce->sf_idx[i] = sce->sf_idx[i+1];
825         sce->sf_idx[i] = av_clip(sce->sf_idx[i], minq, maxsf);
826     }
827 }
828
829 static void search_for_quantizers_fast(AVCodecContext *avctx, AACEncContext *s,
830                                        SingleChannelElement *sce,
831                                        const float lambda)
832 {
833     int i, w, w2, g;
834     int minq = 255;
835
836     memset(sce->sf_idx, 0, sizeof(sce->sf_idx));
837     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
838         for (g = 0; g < sce->ics.num_swb; g++) {
839             for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
840                 FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
841                 if (band->energy <= band->threshold) {
842                     sce->sf_idx[(w+w2)*16+g] = 218;
843                     sce->zeroes[(w+w2)*16+g] = 1;
844                 } else {
845                     sce->sf_idx[(w+w2)*16+g] = av_clip(SCALE_ONE_POS - SCALE_DIV_512 + log2f(band->threshold), 80, 218);
846                     sce->zeroes[(w+w2)*16+g] = 0;
847                 }
848                 minq = FFMIN(minq, sce->sf_idx[(w+w2)*16+g]);
849             }
850         }
851     }
852     for (i = 0; i < 128; i++) {
853         sce->sf_idx[i] = 140;
854         //av_clip(sce->sf_idx[i], minq, minq + SCALE_MAX_DIFF - 1);
855     }
856     //set the same quantizers inside window groups
857     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w])
858         for (g = 0;  g < sce->ics.num_swb; g++)
859             for (w2 = 1; w2 < sce->ics.group_len[w]; w2++)
860                 sce->sf_idx[(w+w2)*16+g] = sce->sf_idx[w*16+g];
861 }
862
863 static void search_for_pns(AACEncContext *s, AVCodecContext *avctx, SingleChannelElement *sce)
864 {
865     FFPsyBand *band;
866     int w, g, w2, i;
867     float *PNS = &s->scoefs[0*128], *PNS34 = &s->scoefs[1*128];
868     float *NOR34 = &s->scoefs[3*128];
869     const float lambda = s->lambda;
870     const float freq_mult = avctx->sample_rate/(1024.0f/sce->ics.num_windows)/2.0f;
871     const float thr_mult = NOISE_LAMBDA_REPLACE*(100.0f/lambda);
872     const float spread_threshold = NOISE_SPREAD_THRESHOLD*(lambda/100.f);
873
874     if (sce->ics.window_sequence[0] == EIGHT_SHORT_SEQUENCE)
875         return;
876
877     for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
878         for (g = 0;  g < sce->ics.num_swb; g++) {
879             int noise_sfi;
880             float dist1 = 0.0f, dist2 = 0.0f, noise_amp;
881             float pns_energy = 0.0f, energy_ratio, dist_thresh;
882             float sfb_energy = 0.0f, threshold = 0.0f, spread = 0.0f;
883             const int start = sce->ics.swb_offset[w*16+g];
884             const float freq = start*freq_mult;
885             const float freq_boost = FFMAX(0.88f*freq/NOISE_LOW_LIMIT, 1.0f);
886             if (freq < NOISE_LOW_LIMIT || avctx->cutoff && freq >= avctx->cutoff)
887                 continue;
888             for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
889                 band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
890                 sfb_energy += band->energy;
891                 spread     += band->spread;
892                 threshold  += band->threshold;
893             }
894
895             /* Ramps down at ~8000Hz and loosens the dist threshold */
896             dist_thresh = FFMIN(2.5f*NOISE_LOW_LIMIT/freq, 1.27f);
897
898             if (sce->zeroes[w*16+g] || spread < spread_threshold ||
899                 sfb_energy > threshold*thr_mult*freq_boost) {
900                 sce->pns_ener[w*16+g] = sfb_energy;
901                 continue;
902             }
903
904             noise_sfi = av_clip(roundf(log2f(sfb_energy)*2), -100, 155); /* Quantize */
905             noise_amp = -ff_aac_pow2sf_tab[noise_sfi + POW_SF2_ZERO];    /* Dequantize */
906             for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
907                 float band_energy, scale;
908                 const int start_c = sce->ics.swb_offset[(w+w2)*16+g];
909                 band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
910                 for (i = 0; i < sce->ics.swb_sizes[g]; i++)
911                     PNS[i] = s->random_state = lcg_random(s->random_state);
912                 band_energy = s->fdsp->scalarproduct_float(PNS, PNS, sce->ics.swb_sizes[g]);
913                 scale = noise_amp/sqrtf(band_energy);
914                 s->fdsp->vector_fmul_scalar(PNS, PNS, scale, sce->ics.swb_sizes[g]);
915                 pns_energy += s->fdsp->scalarproduct_float(PNS, PNS, sce->ics.swb_sizes[g]);
916                 abs_pow34_v(NOR34, &sce->coeffs[start_c], sce->ics.swb_sizes[g]);
917                 abs_pow34_v(PNS34, PNS, sce->ics.swb_sizes[g]);
918                 dist1 += quantize_band_cost(s, &sce->coeffs[start_c],
919                                             NOR34,
920                                             sce->ics.swb_sizes[g],
921                                             sce->sf_idx[(w+w2)*16+g],
922                                             sce->band_alt[(w+w2)*16+g],
923                                             lambda/band->threshold, INFINITY, NULL, 0);
924                 dist2 += quantize_band_cost(s, PNS,
925                                             PNS34,
926                                             sce->ics.swb_sizes[g],
927                                             noise_sfi,
928                                             NOISE_BT,
929                                             lambda/band->threshold, INFINITY, NULL, 0);
930             }
931             energy_ratio = sfb_energy/pns_energy; /* Compensates for quantization error */
932             sce->pns_ener[w*16+g] = energy_ratio*sfb_energy;
933             if (energy_ratio > 0.85f && energy_ratio < 1.25f && dist1/dist2 > dist_thresh) {
934                 sce->band_type[w*16+g] = NOISE_BT;
935                 sce->zeroes[w*16+g] = 0;
936                 if (sce->band_type[w*16+g-1] != NOISE_BT && /* Prevent holes */
937                     sce->band_type[w*16+g-2] == NOISE_BT) {
938                     sce->band_type[w*16+g-1] = NOISE_BT;
939                     sce->zeroes[w*16+g-1] = 0;
940                 }
941             }
942         }
943     }
944 }
945
946 static void search_for_ms(AACEncContext *s, ChannelElement *cpe)
947 {
948     int start = 0, i, w, w2, g;
949     float M[128], S[128];
950     float *L34 = s->scoefs, *R34 = s->scoefs + 128, *M34 = s->scoefs + 128*2, *S34 = s->scoefs + 128*3;
951     const float lambda = s->lambda;
952     SingleChannelElement *sce0 = &cpe->ch[0];
953     SingleChannelElement *sce1 = &cpe->ch[1];
954     if (!cpe->common_window)
955         return;
956     for (w = 0; w < sce0->ics.num_windows; w += sce0->ics.group_len[w]) {
957         start = 0;
958         for (g = 0;  g < sce0->ics.num_swb; g++) {
959             if (!cpe->ch[0].zeroes[w*16+g] && !cpe->ch[1].zeroes[w*16+g]) {
960                 float dist1 = 0.0f, dist2 = 0.0f;
961                 for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) {
962                     FFPsyBand *band0 = &s->psy.ch[s->cur_channel+0].psy_bands[(w+w2)*16+g];
963                     FFPsyBand *band1 = &s->psy.ch[s->cur_channel+1].psy_bands[(w+w2)*16+g];
964                     float minthr = FFMIN(band0->threshold, band1->threshold);
965                     float maxthr = FFMAX(band0->threshold, band1->threshold);
966                     for (i = 0; i < sce0->ics.swb_sizes[g]; i++) {
967                         M[i] = (sce0->coeffs[start+(w+w2)*128+i]
968                               + sce1->coeffs[start+(w+w2)*128+i]) * 0.5;
969                         S[i] =  M[i]
970                               - sce1->coeffs[start+(w+w2)*128+i];
971                     }
972                     abs_pow34_v(L34, sce0->coeffs+start+(w+w2)*128, sce0->ics.swb_sizes[g]);
973                     abs_pow34_v(R34, sce1->coeffs+start+(w+w2)*128, sce0->ics.swb_sizes[g]);
974                     abs_pow34_v(M34, M,                         sce0->ics.swb_sizes[g]);
975                     abs_pow34_v(S34, S,                         sce0->ics.swb_sizes[g]);
976                     dist1 += quantize_band_cost(s, &sce0->coeffs[start + (w+w2)*128],
977                                                 L34,
978                                                 sce0->ics.swb_sizes[g],
979                                                 sce0->sf_idx[(w+w2)*16+g],
980                                                 sce0->band_type[(w+w2)*16+g],
981                                                 lambda / band0->threshold, INFINITY, NULL, 0);
982                     dist1 += quantize_band_cost(s, &sce1->coeffs[start + (w+w2)*128],
983                                                 R34,
984                                                 sce1->ics.swb_sizes[g],
985                                                 sce1->sf_idx[(w+w2)*16+g],
986                                                 sce1->band_type[(w+w2)*16+g],
987                                                 lambda / band1->threshold, INFINITY, NULL, 0);
988                     dist2 += quantize_band_cost(s, M,
989                                                 M34,
990                                                 sce0->ics.swb_sizes[g],
991                                                 sce0->sf_idx[(w+w2)*16+g],
992                                                 sce0->band_type[(w+w2)*16+g],
993                                                 lambda / maxthr, INFINITY, NULL, 0);
994                     dist2 += quantize_band_cost(s, S,
995                                                 S34,
996                                                 sce1->ics.swb_sizes[g],
997                                                 sce1->sf_idx[(w+w2)*16+g],
998                                                 sce1->band_type[(w+w2)*16+g],
999                                                 lambda / minthr, INFINITY, NULL, 0);
1000                 }
1001                 cpe->ms_mask[w*16+g] = dist2 < dist1;
1002             }
1003             start += sce0->ics.swb_sizes[g];
1004         }
1005     }
1006 }
1007
1008 AACCoefficientsEncoder ff_aac_coders[AAC_CODER_NB] = {
1009     [AAC_CODER_FAAC] = {
1010         search_for_quantizers_faac,
1011         encode_window_bands_info,
1012         quantize_and_encode_band,
1013         ff_aac_encode_tns_info,
1014         ff_aac_encode_main_pred,
1015         ff_aac_adjust_common_prediction,
1016         ff_aac_apply_main_pred,
1017         ff_aac_apply_tns,
1018         set_special_band_scalefactors,
1019         search_for_pns,
1020         ff_aac_search_for_tns,
1021         search_for_ms,
1022         ff_aac_search_for_is,
1023         ff_aac_search_for_pred,
1024     },
1025     [AAC_CODER_ANMR] = {
1026         search_for_quantizers_anmr,
1027         encode_window_bands_info,
1028         quantize_and_encode_band,
1029         ff_aac_encode_tns_info,
1030         ff_aac_encode_main_pred,
1031         ff_aac_adjust_common_prediction,
1032         ff_aac_apply_main_pred,
1033         ff_aac_apply_tns,
1034         set_special_band_scalefactors,
1035         search_for_pns,
1036         ff_aac_search_for_tns,
1037         search_for_ms,
1038         ff_aac_search_for_is,
1039         ff_aac_search_for_pred,
1040     },
1041     [AAC_CODER_TWOLOOP] = {
1042         search_for_quantizers_twoloop,
1043         codebook_trellis_rate,
1044         quantize_and_encode_band,
1045         ff_aac_encode_tns_info,
1046         ff_aac_encode_main_pred,
1047         ff_aac_adjust_common_prediction,
1048         ff_aac_apply_main_pred,
1049         ff_aac_apply_tns,
1050         set_special_band_scalefactors,
1051         search_for_pns,
1052         ff_aac_search_for_tns,
1053         search_for_ms,
1054         ff_aac_search_for_is,
1055         ff_aac_search_for_pred,
1056     },
1057     [AAC_CODER_FAST] = {
1058         search_for_quantizers_fast,
1059         encode_window_bands_info,
1060         quantize_and_encode_band,
1061         ff_aac_encode_tns_info,
1062         ff_aac_encode_main_pred,
1063         ff_aac_adjust_common_prediction,
1064         ff_aac_apply_main_pred,
1065         ff_aac_apply_tns,
1066         set_special_band_scalefactors,
1067         search_for_pns,
1068         ff_aac_search_for_tns,
1069         search_for_ms,
1070         ff_aac_search_for_is,
1071         ff_aac_search_for_pred,
1072     },
1073 };