3 * MIPS Technologies, Inc., California.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
14 * contributors may be used to endorse or promote products derived from
15 * this software without specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * Author: Stanislav Ocovaj (socovaj@mips.com)
30 * Szabolcs Pal (sabolc@mips.com)
32 * AAC coefficients encoder optimized for MIPS floating-point architecture
34 * This file is part of FFmpeg.
36 * FFmpeg is free software; you can redistribute it and/or
37 * modify it under the terms of the GNU Lesser General Public
38 * License as published by the Free Software Foundation; either
39 * version 2.1 of the License, or (at your option) any later version.
41 * FFmpeg is distributed in the hope that it will be useful,
42 * but WITHOUT ANY WARRANTY; without even the implied warranty of
43 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
44 * Lesser General Public License for more details.
46 * You should have received a copy of the GNU Lesser General Public
47 * License along with FFmpeg; if not, write to the Free Software
48 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
53 * Reference: libavcodec/aaccoder.c
56 #include "libavutil/libm.h"
59 #include "libavutil/mathematics.h"
60 #include "libavcodec/avcodec.h"
61 #include "libavcodec/put_bits.h"
62 #include "libavcodec/aac.h"
63 #include "libavcodec/aacenc.h"
64 #include "libavcodec/aacenctab.h"
65 #include "libavcodec/aactab.h"
68 typedef struct BandCodingPath {
74 static const uint8_t uquad_sign_bits[81] = {
75 0, 1, 1, 1, 2, 2, 1, 2, 2,
76 1, 2, 2, 2, 3, 3, 2, 3, 3,
77 1, 2, 2, 2, 3, 3, 2, 3, 3,
78 1, 2, 2, 2, 3, 3, 2, 3, 3,
79 2, 3, 3, 3, 4, 4, 3, 4, 4,
80 2, 3, 3, 3, 4, 4, 3, 4, 4,
81 1, 2, 2, 2, 3, 3, 2, 3, 3,
82 2, 3, 3, 3, 4, 4, 3, 4, 4,
83 2, 3, 3, 3, 4, 4, 3, 4, 4
86 static const uint8_t upair7_sign_bits[64] = {
87 0, 1, 1, 1, 1, 1, 1, 1,
88 1, 2, 2, 2, 2, 2, 2, 2,
89 1, 2, 2, 2, 2, 2, 2, 2,
90 1, 2, 2, 2, 2, 2, 2, 2,
91 1, 2, 2, 2, 2, 2, 2, 2,
92 1, 2, 2, 2, 2, 2, 2, 2,
93 1, 2, 2, 2, 2, 2, 2, 2,
94 1, 2, 2, 2, 2, 2, 2, 2,
97 static const uint8_t upair12_sign_bits[169] = {
98 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
99 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
100 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
101 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
102 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
103 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
104 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
105 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
106 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
107 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
108 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
109 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
110 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
113 static const uint8_t esc_sign_bits[289] = {
114 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
115 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
116 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
117 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
118 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
119 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
120 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
121 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
122 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
123 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
124 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
125 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
126 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
127 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
128 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
129 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
130 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
133 #define ROUND_STANDARD 0.4054f
134 #define ROUND_TO_ZERO 0.1054f
136 static void abs_pow34_v(float *out, const float *in, const int size) {
137 #ifndef USE_REALLY_FULL_SEARCH
140 float ax, bx, cx, dx;
142 for (i = 0; i < size; i += 4) {
163 #endif /* USE_REALLY_FULL_SEARCH */
166 static float find_max_val(int group_len, int swb_size, const float *scaled) {
169 for (w2 = 0; w2 < group_len; w2++) {
170 for (i = 0; i < swb_size; i++) {
171 maxval = FFMAX(maxval, scaled[w2*128+i]);
177 static int find_min_book(float maxval, int sf) {
178 float Q = ff_aac_pow2sf_tab[POW_SF2_ZERO - sf + SCALE_ONE_POS - SCALE_DIV_512];
179 float Q34 = sqrtf(Q * sqrtf(Q));
181 qmaxval = maxval * Q34 + 0.4054f;
182 if (qmaxval == 0) cb = 0;
183 else if (qmaxval == 1) cb = 1;
184 else if (qmaxval == 2) cb = 3;
185 else if (qmaxval <= 4) cb = 5;
186 else if (qmaxval <= 7) cb = 7;
187 else if (qmaxval <= 12) cb = 9;
193 * Functions developed from template function and optimized for quantizing and encoding band
195 static void quantize_and_encode_band_cost_SQUAD_mips(struct AACEncContext *s,
196 PutBitContext *pb, const float *in, float *out,
197 const float *scaled, int size, int scale_idx,
198 int cb, const float lambda, const float uplim,
199 int *bits, const float ROUNDING)
201 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
203 int qc1, qc2, qc3, qc4;
205 uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
206 uint16_t *p_codes = (uint16_t *)ff_aac_spectral_codes[cb-1];
208 abs_pow34_v(s->scoefs, in, size);
210 for (i = 0; i < size; i += 4) {
212 int *in_int = (int *)&in[i];
213 int t0, t1, t2, t3, t4, t5, t6, t7;
215 qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
216 qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
217 qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
218 qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
222 ".set noreorder \n\t"
224 "slt %[qc1], $zero, %[qc1] \n\t"
225 "slt %[qc2], $zero, %[qc2] \n\t"
226 "slt %[qc3], $zero, %[qc3] \n\t"
227 "slt %[qc4], $zero, %[qc4] \n\t"
228 "lw %[t0], 0(%[in_int]) \n\t"
229 "lw %[t1], 4(%[in_int]) \n\t"
230 "lw %[t2], 8(%[in_int]) \n\t"
231 "lw %[t3], 12(%[in_int]) \n\t"
232 "srl %[t0], %[t0], 31 \n\t"
233 "srl %[t1], %[t1], 31 \n\t"
234 "srl %[t2], %[t2], 31 \n\t"
235 "srl %[t3], %[t3], 31 \n\t"
236 "subu %[t4], $zero, %[qc1] \n\t"
237 "subu %[t5], $zero, %[qc2] \n\t"
238 "subu %[t6], $zero, %[qc3] \n\t"
239 "subu %[t7], $zero, %[qc4] \n\t"
240 "movn %[qc1], %[t4], %[t0] \n\t"
241 "movn %[qc2], %[t5], %[t1] \n\t"
242 "movn %[qc3], %[t6], %[t2] \n\t"
243 "movn %[qc4], %[t7], %[t3] \n\t"
247 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
248 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
249 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
250 [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
251 : [in_int]"r"(in_int)
264 put_bits(pb, p_bits[curidx], p_codes[curidx]);
268 static void quantize_and_encode_band_cost_UQUAD_mips(struct AACEncContext *s,
269 PutBitContext *pb, const float *in, float *out,
270 const float *scaled, int size, int scale_idx,
271 int cb, const float lambda, const float uplim,
272 int *bits, const float ROUNDING)
274 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
276 int qc1, qc2, qc3, qc4;
278 uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
279 uint16_t *p_codes = (uint16_t *)ff_aac_spectral_codes[cb-1];
281 abs_pow34_v(s->scoefs, in, size);
283 for (i = 0; i < size; i += 4) {
284 int curidx, sign, count;
285 int *in_int = (int *)&in[i];
287 unsigned int v_codes;
288 int t0, t1, t2, t3, t4;
290 qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
291 qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
292 qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
293 qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
297 ".set noreorder \n\t"
299 "ori %[t4], $zero, 2 \n\t"
300 "ori %[sign], $zero, 0 \n\t"
301 "slt %[t0], %[t4], %[qc1] \n\t"
302 "slt %[t1], %[t4], %[qc2] \n\t"
303 "slt %[t2], %[t4], %[qc3] \n\t"
304 "slt %[t3], %[t4], %[qc4] \n\t"
305 "movn %[qc1], %[t4], %[t0] \n\t"
306 "movn %[qc2], %[t4], %[t1] \n\t"
307 "movn %[qc3], %[t4], %[t2] \n\t"
308 "movn %[qc4], %[t4], %[t3] \n\t"
309 "lw %[t0], 0(%[in_int]) \n\t"
310 "lw %[t1], 4(%[in_int]) \n\t"
311 "lw %[t2], 8(%[in_int]) \n\t"
312 "lw %[t3], 12(%[in_int]) \n\t"
313 "slt %[t0], %[t0], $zero \n\t"
314 "movn %[sign], %[t0], %[qc1] \n\t"
315 "slt %[t1], %[t1], $zero \n\t"
316 "slt %[t2], %[t2], $zero \n\t"
317 "slt %[t3], %[t3], $zero \n\t"
318 "sll %[t0], %[sign], 1 \n\t"
319 "or %[t0], %[t0], %[t1] \n\t"
320 "movn %[sign], %[t0], %[qc2] \n\t"
321 "slt %[t4], $zero, %[qc1] \n\t"
322 "slt %[t1], $zero, %[qc2] \n\t"
323 "slt %[count], $zero, %[qc3] \n\t"
324 "sll %[t0], %[sign], 1 \n\t"
325 "or %[t0], %[t0], %[t2] \n\t"
326 "movn %[sign], %[t0], %[qc3] \n\t"
327 "slt %[t2], $zero, %[qc4] \n\t"
328 "addu %[count], %[count], %[t4] \n\t"
329 "addu %[count], %[count], %[t1] \n\t"
330 "sll %[t0], %[sign], 1 \n\t"
331 "or %[t0], %[t0], %[t3] \n\t"
332 "movn %[sign], %[t0], %[qc4] \n\t"
333 "addu %[count], %[count], %[t2] \n\t"
337 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
338 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
339 [sign]"=&r"(sign), [count]"=&r"(count),
340 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
342 : [in_int]"r"(in_int)
354 v_codes = (p_codes[curidx] << count) | (sign & ((1 << count) - 1));
355 v_bits = p_bits[curidx] + count;
356 put_bits(pb, v_bits, v_codes);
360 static void quantize_and_encode_band_cost_SPAIR_mips(struct AACEncContext *s,
361 PutBitContext *pb, const float *in, float *out,
362 const float *scaled, int size, int scale_idx,
363 int cb, const float lambda, const float uplim,
364 int *bits, const float ROUNDING)
366 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
368 int qc1, qc2, qc3, qc4;
370 uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
371 uint16_t *p_codes = (uint16_t *)ff_aac_spectral_codes[cb-1];
373 abs_pow34_v(s->scoefs, in, size);
375 for (i = 0; i < size; i += 4) {
377 int *in_int = (int *)&in[i];
379 unsigned int v_codes;
380 int t0, t1, t2, t3, t4, t5, t6, t7;
382 qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
383 qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
384 qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
385 qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
389 ".set noreorder \n\t"
391 "ori %[t4], $zero, 4 \n\t"
392 "slt %[t0], %[t4], %[qc1] \n\t"
393 "slt %[t1], %[t4], %[qc2] \n\t"
394 "slt %[t2], %[t4], %[qc3] \n\t"
395 "slt %[t3], %[t4], %[qc4] \n\t"
396 "movn %[qc1], %[t4], %[t0] \n\t"
397 "movn %[qc2], %[t4], %[t1] \n\t"
398 "movn %[qc3], %[t4], %[t2] \n\t"
399 "movn %[qc4], %[t4], %[t3] \n\t"
400 "lw %[t0], 0(%[in_int]) \n\t"
401 "lw %[t1], 4(%[in_int]) \n\t"
402 "lw %[t2], 8(%[in_int]) \n\t"
403 "lw %[t3], 12(%[in_int]) \n\t"
404 "srl %[t0], %[t0], 31 \n\t"
405 "srl %[t1], %[t1], 31 \n\t"
406 "srl %[t2], %[t2], 31 \n\t"
407 "srl %[t3], %[t3], 31 \n\t"
408 "subu %[t4], $zero, %[qc1] \n\t"
409 "subu %[t5], $zero, %[qc2] \n\t"
410 "subu %[t6], $zero, %[qc3] \n\t"
411 "subu %[t7], $zero, %[qc4] \n\t"
412 "movn %[qc1], %[t4], %[t0] \n\t"
413 "movn %[qc2], %[t5], %[t1] \n\t"
414 "movn %[qc3], %[t6], %[t2] \n\t"
415 "movn %[qc4], %[t7], %[t3] \n\t"
419 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
420 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
421 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
422 [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
423 : [in_int]"r"(in_int)
433 v_codes = (p_codes[curidx] << p_bits[curidx2]) | (p_codes[curidx2]);
434 v_bits = p_bits[curidx] + p_bits[curidx2];
435 put_bits(pb, v_bits, v_codes);
439 static void quantize_and_encode_band_cost_UPAIR7_mips(struct AACEncContext *s,
440 PutBitContext *pb, const float *in, float *out,
441 const float *scaled, int size, int scale_idx,
442 int cb, const float lambda, const float uplim,
443 int *bits, const float ROUNDING)
445 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
447 int qc1, qc2, qc3, qc4;
449 uint8_t *p_bits = (uint8_t*) ff_aac_spectral_bits[cb-1];
450 uint16_t *p_codes = (uint16_t*)ff_aac_spectral_codes[cb-1];
452 abs_pow34_v(s->scoefs, in, size);
454 for (i = 0; i < size; i += 4) {
455 int curidx, sign1, count1, sign2, count2;
456 int *in_int = (int *)&in[i];
458 unsigned int v_codes;
459 int t0, t1, t2, t3, t4;
461 qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
462 qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
463 qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
464 qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
468 ".set noreorder \n\t"
470 "ori %[t4], $zero, 7 \n\t"
471 "ori %[sign1], $zero, 0 \n\t"
472 "ori %[sign2], $zero, 0 \n\t"
473 "slt %[t0], %[t4], %[qc1] \n\t"
474 "slt %[t1], %[t4], %[qc2] \n\t"
475 "slt %[t2], %[t4], %[qc3] \n\t"
476 "slt %[t3], %[t4], %[qc4] \n\t"
477 "movn %[qc1], %[t4], %[t0] \n\t"
478 "movn %[qc2], %[t4], %[t1] \n\t"
479 "movn %[qc3], %[t4], %[t2] \n\t"
480 "movn %[qc4], %[t4], %[t3] \n\t"
481 "lw %[t0], 0(%[in_int]) \n\t"
482 "lw %[t1], 4(%[in_int]) \n\t"
483 "lw %[t2], 8(%[in_int]) \n\t"
484 "lw %[t3], 12(%[in_int]) \n\t"
485 "slt %[t0], %[t0], $zero \n\t"
486 "movn %[sign1], %[t0], %[qc1] \n\t"
487 "slt %[t2], %[t2], $zero \n\t"
488 "movn %[sign2], %[t2], %[qc3] \n\t"
489 "slt %[t1], %[t1], $zero \n\t"
490 "sll %[t0], %[sign1], 1 \n\t"
491 "or %[t0], %[t0], %[t1] \n\t"
492 "movn %[sign1], %[t0], %[qc2] \n\t"
493 "slt %[t3], %[t3], $zero \n\t"
494 "sll %[t0], %[sign2], 1 \n\t"
495 "or %[t0], %[t0], %[t3] \n\t"
496 "movn %[sign2], %[t0], %[qc4] \n\t"
497 "slt %[count1], $zero, %[qc1] \n\t"
498 "slt %[t1], $zero, %[qc2] \n\t"
499 "slt %[count2], $zero, %[qc3] \n\t"
500 "slt %[t2], $zero, %[qc4] \n\t"
501 "addu %[count1], %[count1], %[t1] \n\t"
502 "addu %[count2], %[count2], %[t2] \n\t"
506 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
507 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
508 [sign1]"=&r"(sign1), [count1]"=&r"(count1),
509 [sign2]"=&r"(sign2), [count2]"=&r"(count2),
510 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
512 : [in_int]"r"(in_int)
513 : "t0", "t1", "t2", "t3", "t4",
520 v_codes = (p_codes[curidx] << count1) | sign1;
521 v_bits = p_bits[curidx] + count1;
522 put_bits(pb, v_bits, v_codes);
527 v_codes = (p_codes[curidx] << count2) | sign2;
528 v_bits = p_bits[curidx] + count2;
529 put_bits(pb, v_bits, v_codes);
533 static void quantize_and_encode_band_cost_UPAIR12_mips(struct AACEncContext *s,
534 PutBitContext *pb, const float *in, float *out,
535 const float *scaled, int size, int scale_idx,
536 int cb, const float lambda, const float uplim,
537 int *bits, const float ROUNDING)
539 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
541 int qc1, qc2, qc3, qc4;
543 uint8_t *p_bits = (uint8_t*) ff_aac_spectral_bits[cb-1];
544 uint16_t *p_codes = (uint16_t*)ff_aac_spectral_codes[cb-1];
546 abs_pow34_v(s->scoefs, in, size);
548 for (i = 0; i < size; i += 4) {
549 int curidx, sign1, count1, sign2, count2;
550 int *in_int = (int *)&in[i];
552 unsigned int v_codes;
553 int t0, t1, t2, t3, t4;
555 qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
556 qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
557 qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
558 qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
562 ".set noreorder \n\t"
564 "ori %[t4], $zero, 12 \n\t"
565 "ori %[sign1], $zero, 0 \n\t"
566 "ori %[sign2], $zero, 0 \n\t"
567 "slt %[t0], %[t4], %[qc1] \n\t"
568 "slt %[t1], %[t4], %[qc2] \n\t"
569 "slt %[t2], %[t4], %[qc3] \n\t"
570 "slt %[t3], %[t4], %[qc4] \n\t"
571 "movn %[qc1], %[t4], %[t0] \n\t"
572 "movn %[qc2], %[t4], %[t1] \n\t"
573 "movn %[qc3], %[t4], %[t2] \n\t"
574 "movn %[qc4], %[t4], %[t3] \n\t"
575 "lw %[t0], 0(%[in_int]) \n\t"
576 "lw %[t1], 4(%[in_int]) \n\t"
577 "lw %[t2], 8(%[in_int]) \n\t"
578 "lw %[t3], 12(%[in_int]) \n\t"
579 "slt %[t0], %[t0], $zero \n\t"
580 "movn %[sign1], %[t0], %[qc1] \n\t"
581 "slt %[t2], %[t2], $zero \n\t"
582 "movn %[sign2], %[t2], %[qc3] \n\t"
583 "slt %[t1], %[t1], $zero \n\t"
584 "sll %[t0], %[sign1], 1 \n\t"
585 "or %[t0], %[t0], %[t1] \n\t"
586 "movn %[sign1], %[t0], %[qc2] \n\t"
587 "slt %[t3], %[t3], $zero \n\t"
588 "sll %[t0], %[sign2], 1 \n\t"
589 "or %[t0], %[t0], %[t3] \n\t"
590 "movn %[sign2], %[t0], %[qc4] \n\t"
591 "slt %[count1], $zero, %[qc1] \n\t"
592 "slt %[t1], $zero, %[qc2] \n\t"
593 "slt %[count2], $zero, %[qc3] \n\t"
594 "slt %[t2], $zero, %[qc4] \n\t"
595 "addu %[count1], %[count1], %[t1] \n\t"
596 "addu %[count2], %[count2], %[t2] \n\t"
600 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
601 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
602 [sign1]"=&r"(sign1), [count1]"=&r"(count1),
603 [sign2]"=&r"(sign2), [count2]"=&r"(count2),
604 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
606 : [in_int]"r"(in_int)
613 v_codes = (p_codes[curidx] << count1) | sign1;
614 v_bits = p_bits[curidx] + count1;
615 put_bits(pb, v_bits, v_codes);
620 v_codes = (p_codes[curidx] << count2) | sign2;
621 v_bits = p_bits[curidx] + count2;
622 put_bits(pb, v_bits, v_codes);
626 static void quantize_and_encode_band_cost_ESC_mips(struct AACEncContext *s,
627 PutBitContext *pb, const float *in, float *out,
628 const float *scaled, int size, int scale_idx,
629 int cb, const float lambda, const float uplim,
630 int *bits, const float ROUNDING)
632 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
634 int qc1, qc2, qc3, qc4;
636 uint8_t *p_bits = (uint8_t* )ff_aac_spectral_bits[cb-1];
637 uint16_t *p_codes = (uint16_t*)ff_aac_spectral_codes[cb-1];
638 float *p_vectors = (float* )ff_aac_codebook_vectors[cb-1];
640 abs_pow34_v(s->scoefs, in, size);
644 for (i = 0; i < size; i += 4) {
645 int curidx, curidx2, sign1, count1, sign2, count2;
646 int *in_int = (int *)&in[i];
648 unsigned int v_codes;
649 int t0, t1, t2, t3, t4;
651 qc1 = scaled[i ] * Q34 + ROUNDING;
652 qc2 = scaled[i+1] * Q34 + ROUNDING;
653 qc3 = scaled[i+2] * Q34 + ROUNDING;
654 qc4 = scaled[i+3] * Q34 + ROUNDING;
658 ".set noreorder \n\t"
660 "ori %[t4], $zero, 16 \n\t"
661 "ori %[sign1], $zero, 0 \n\t"
662 "ori %[sign2], $zero, 0 \n\t"
663 "slt %[t0], %[t4], %[qc1] \n\t"
664 "slt %[t1], %[t4], %[qc2] \n\t"
665 "slt %[t2], %[t4], %[qc3] \n\t"
666 "slt %[t3], %[t4], %[qc4] \n\t"
667 "movn %[qc1], %[t4], %[t0] \n\t"
668 "movn %[qc2], %[t4], %[t1] \n\t"
669 "movn %[qc3], %[t4], %[t2] \n\t"
670 "movn %[qc4], %[t4], %[t3] \n\t"
671 "lw %[t0], 0(%[in_int]) \n\t"
672 "lw %[t1], 4(%[in_int]) \n\t"
673 "lw %[t2], 8(%[in_int]) \n\t"
674 "lw %[t3], 12(%[in_int]) \n\t"
675 "slt %[t0], %[t0], $zero \n\t"
676 "movn %[sign1], %[t0], %[qc1] \n\t"
677 "slt %[t2], %[t2], $zero \n\t"
678 "movn %[sign2], %[t2], %[qc3] \n\t"
679 "slt %[t1], %[t1], $zero \n\t"
680 "sll %[t0], %[sign1], 1 \n\t"
681 "or %[t0], %[t0], %[t1] \n\t"
682 "movn %[sign1], %[t0], %[qc2] \n\t"
683 "slt %[t3], %[t3], $zero \n\t"
684 "sll %[t0], %[sign2], 1 \n\t"
685 "or %[t0], %[t0], %[t3] \n\t"
686 "movn %[sign2], %[t0], %[qc4] \n\t"
687 "slt %[count1], $zero, %[qc1] \n\t"
688 "slt %[t1], $zero, %[qc2] \n\t"
689 "slt %[count2], $zero, %[qc3] \n\t"
690 "slt %[t2], $zero, %[qc4] \n\t"
691 "addu %[count1], %[count1], %[t1] \n\t"
692 "addu %[count2], %[count2], %[t2] \n\t"
696 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
697 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
698 [sign1]"=&r"(sign1), [count1]"=&r"(count1),
699 [sign2]"=&r"(sign2), [count2]"=&r"(count2),
700 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
702 : [in_int]"r"(in_int)
711 v_codes = (p_codes[curidx] << count1) | sign1;
712 v_bits = p_bits[curidx] + count1;
713 put_bits(pb, v_bits, v_codes);
715 v_codes = (p_codes[curidx2] << count2) | sign2;
716 v_bits = p_bits[curidx2] + count2;
717 put_bits(pb, v_bits, v_codes);
720 for (i = 0; i < size; i += 4) {
721 int curidx, curidx2, sign1, count1, sign2, count2;
722 int *in_int = (int *)&in[i];
724 unsigned int v_codes;
726 int t0, t1, t2, t3, t4;
728 qc1 = scaled[i ] * Q34 + ROUNDING;
729 qc2 = scaled[i+1] * Q34 + ROUNDING;
730 qc3 = scaled[i+2] * Q34 + ROUNDING;
731 qc4 = scaled[i+3] * Q34 + ROUNDING;
735 ".set noreorder \n\t"
737 "ori %[t4], $zero, 16 \n\t"
738 "ori %[sign1], $zero, 0 \n\t"
739 "ori %[sign2], $zero, 0 \n\t"
740 "shll_s.w %[c1], %[qc1], 18 \n\t"
741 "shll_s.w %[c2], %[qc2], 18 \n\t"
742 "shll_s.w %[c3], %[qc3], 18 \n\t"
743 "shll_s.w %[c4], %[qc4], 18 \n\t"
744 "srl %[c1], %[c1], 18 \n\t"
745 "srl %[c2], %[c2], 18 \n\t"
746 "srl %[c3], %[c3], 18 \n\t"
747 "srl %[c4], %[c4], 18 \n\t"
748 "slt %[t0], %[t4], %[qc1] \n\t"
749 "slt %[t1], %[t4], %[qc2] \n\t"
750 "slt %[t2], %[t4], %[qc3] \n\t"
751 "slt %[t3], %[t4], %[qc4] \n\t"
752 "movn %[qc1], %[t4], %[t0] \n\t"
753 "movn %[qc2], %[t4], %[t1] \n\t"
754 "movn %[qc3], %[t4], %[t2] \n\t"
755 "movn %[qc4], %[t4], %[t3] \n\t"
756 "lw %[t0], 0(%[in_int]) \n\t"
757 "lw %[t1], 4(%[in_int]) \n\t"
758 "lw %[t2], 8(%[in_int]) \n\t"
759 "lw %[t3], 12(%[in_int]) \n\t"
760 "slt %[t0], %[t0], $zero \n\t"
761 "movn %[sign1], %[t0], %[qc1] \n\t"
762 "slt %[t2], %[t2], $zero \n\t"
763 "movn %[sign2], %[t2], %[qc3] \n\t"
764 "slt %[t1], %[t1], $zero \n\t"
765 "sll %[t0], %[sign1], 1 \n\t"
766 "or %[t0], %[t0], %[t1] \n\t"
767 "movn %[sign1], %[t0], %[qc2] \n\t"
768 "slt %[t3], %[t3], $zero \n\t"
769 "sll %[t0], %[sign2], 1 \n\t"
770 "or %[t0], %[t0], %[t3] \n\t"
771 "movn %[sign2], %[t0], %[qc4] \n\t"
772 "slt %[count1], $zero, %[qc1] \n\t"
773 "slt %[t1], $zero, %[qc2] \n\t"
774 "slt %[count2], $zero, %[qc3] \n\t"
775 "slt %[t2], $zero, %[qc4] \n\t"
776 "addu %[count1], %[count1], %[t1] \n\t"
777 "addu %[count2], %[count2], %[t2] \n\t"
781 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
782 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
783 [sign1]"=&r"(sign1), [count1]"=&r"(count1),
784 [sign2]"=&r"(sign2), [count2]"=&r"(count2),
785 [c1]"=&r"(c1), [c2]"=&r"(c2),
786 [c3]"=&r"(c3), [c4]"=&r"(c4),
787 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
789 : [in_int]"r"(in_int)
799 v_codes = (p_codes[curidx] << count1) | sign1;
800 v_bits = p_bits[curidx] + count1;
801 put_bits(pb, v_bits, v_codes);
803 if (p_vectors[curidx*2 ] == 64.0f) {
804 int len = av_log2(c1);
805 v_codes = (((1 << (len - 3)) - 2) << len) | (c1 & ((1 << len) - 1));
806 put_bits(pb, len * 2 - 3, v_codes);
808 if (p_vectors[curidx*2+1] == 64.0f) {
809 int len = av_log2(c2);
810 v_codes = (((1 << (len - 3)) - 2) << len) | (c2 & ((1 << len) - 1));
811 put_bits(pb, len*2-3, v_codes);
814 v_codes = (p_codes[curidx2] << count2) | sign2;
815 v_bits = p_bits[curidx2] + count2;
816 put_bits(pb, v_bits, v_codes);
818 if (p_vectors[curidx2*2 ] == 64.0f) {
819 int len = av_log2(c3);
820 v_codes = (((1 << (len - 3)) - 2) << len) | (c3 & ((1 << len) - 1));
821 put_bits(pb, len* 2 - 3, v_codes);
823 if (p_vectors[curidx2*2+1] == 64.0f) {
824 int len = av_log2(c4);
825 v_codes = (((1 << (len - 3)) - 2) << len) | (c4 & ((1 << len) - 1));
826 put_bits(pb, len * 2 - 3, v_codes);
832 static void quantize_and_encode_band_cost_NONE_mips(struct AACEncContext *s,
833 PutBitContext *pb, const float *in, float *out,
834 const float *scaled, int size, int scale_idx,
835 int cb, const float lambda, const float uplim,
836 int *bits, const float ROUNDING) {
840 static void quantize_and_encode_band_cost_ZERO_mips(struct AACEncContext *s,
841 PutBitContext *pb, const float *in, float *out,
842 const float *scaled, int size, int scale_idx,
843 int cb, const float lambda, const float uplim,
844 int *bits, const float ROUNDING) {
849 for (i = 0; i < size; i += 4) {
858 static void (*const quantize_and_encode_band_cost_arr[])(struct AACEncContext *s,
859 PutBitContext *pb, const float *in, float *out,
860 const float *scaled, int size, int scale_idx,
861 int cb, const float lambda, const float uplim,
862 int *bits, const float ROUNDING) = {
863 quantize_and_encode_band_cost_ZERO_mips,
864 quantize_and_encode_band_cost_SQUAD_mips,
865 quantize_and_encode_band_cost_SQUAD_mips,
866 quantize_and_encode_band_cost_UQUAD_mips,
867 quantize_and_encode_band_cost_UQUAD_mips,
868 quantize_and_encode_band_cost_SPAIR_mips,
869 quantize_and_encode_band_cost_SPAIR_mips,
870 quantize_and_encode_band_cost_UPAIR7_mips,
871 quantize_and_encode_band_cost_UPAIR7_mips,
872 quantize_and_encode_band_cost_UPAIR12_mips,
873 quantize_and_encode_band_cost_UPAIR12_mips,
874 quantize_and_encode_band_cost_ESC_mips,
875 quantize_and_encode_band_cost_NONE_mips, /* cb 12 doesn't exist */
876 quantize_and_encode_band_cost_ZERO_mips,
877 quantize_and_encode_band_cost_ZERO_mips,
878 quantize_and_encode_band_cost_ZERO_mips,
881 #define quantize_and_encode_band_cost( \
882 s, pb, in, out, scaled, size, scale_idx, cb, \
883 lambda, uplim, bits, ROUNDING) \
884 quantize_and_encode_band_cost_arr[cb]( \
885 s, pb, in, out, scaled, size, scale_idx, cb, \
886 lambda, uplim, bits, ROUNDING)
888 static void quantize_and_encode_band_mips(struct AACEncContext *s, PutBitContext *pb,
889 const float *in, float *out, int size, int scale_idx,
890 int cb, const float lambda, int rtz)
892 quantize_and_encode_band_cost(s, pb, in, out, NULL, size, scale_idx, cb, lambda,
893 INFINITY, NULL, (rtz) ? ROUND_TO_ZERO : ROUND_STANDARD);
897 * Functions developed from template function and optimized for getting the number of bits
899 static float get_band_numbits_ZERO_mips(struct AACEncContext *s,
900 PutBitContext *pb, const float *in,
901 const float *scaled, int size, int scale_idx,
902 int cb, const float lambda, const float uplim,
908 static float get_band_numbits_NONE_mips(struct AACEncContext *s,
909 PutBitContext *pb, const float *in,
910 const float *scaled, int size, int scale_idx,
911 int cb, const float lambda, const float uplim,
918 static float get_band_numbits_SQUAD_mips(struct AACEncContext *s,
919 PutBitContext *pb, const float *in,
920 const float *scaled, int size, int scale_idx,
921 int cb, const float lambda, const float uplim,
924 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
926 int qc1, qc2, qc3, qc4;
929 uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
931 for (i = 0; i < size; i += 4) {
933 int *in_int = (int *)&in[i];
934 int t0, t1, t2, t3, t4, t5, t6, t7;
936 qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
937 qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
938 qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
939 qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
943 ".set noreorder \n\t"
945 "slt %[qc1], $zero, %[qc1] \n\t"
946 "slt %[qc2], $zero, %[qc2] \n\t"
947 "slt %[qc3], $zero, %[qc3] \n\t"
948 "slt %[qc4], $zero, %[qc4] \n\t"
949 "lw %[t0], 0(%[in_int]) \n\t"
950 "lw %[t1], 4(%[in_int]) \n\t"
951 "lw %[t2], 8(%[in_int]) \n\t"
952 "lw %[t3], 12(%[in_int]) \n\t"
953 "srl %[t0], %[t0], 31 \n\t"
954 "srl %[t1], %[t1], 31 \n\t"
955 "srl %[t2], %[t2], 31 \n\t"
956 "srl %[t3], %[t3], 31 \n\t"
957 "subu %[t4], $zero, %[qc1] \n\t"
958 "subu %[t5], $zero, %[qc2] \n\t"
959 "subu %[t6], $zero, %[qc3] \n\t"
960 "subu %[t7], $zero, %[qc4] \n\t"
961 "movn %[qc1], %[t4], %[t0] \n\t"
962 "movn %[qc2], %[t5], %[t1] \n\t"
963 "movn %[qc3], %[t6], %[t2] \n\t"
964 "movn %[qc4], %[t7], %[t3] \n\t"
968 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
969 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
970 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
971 [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
972 : [in_int]"r"(in_int)
985 curbits += p_bits[curidx];
990 static float get_band_numbits_UQUAD_mips(struct AACEncContext *s,
991 PutBitContext *pb, const float *in,
992 const float *scaled, int size, int scale_idx,
993 int cb, const float lambda, const float uplim,
996 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
999 int qc1, qc2, qc3, qc4;
1001 uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
1003 for (i = 0; i < size; i += 4) {
1005 int t0, t1, t2, t3, t4;
1007 qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
1008 qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1009 qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1010 qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1014 ".set noreorder \n\t"
1016 "ori %[t4], $zero, 2 \n\t"
1017 "slt %[t0], %[t4], %[qc1] \n\t"
1018 "slt %[t1], %[t4], %[qc2] \n\t"
1019 "slt %[t2], %[t4], %[qc3] \n\t"
1020 "slt %[t3], %[t4], %[qc4] \n\t"
1021 "movn %[qc1], %[t4], %[t0] \n\t"
1022 "movn %[qc2], %[t4], %[t1] \n\t"
1023 "movn %[qc3], %[t4], %[t2] \n\t"
1024 "movn %[qc4], %[t4], %[t3] \n\t"
1028 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1029 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1030 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1042 curbits += p_bits[curidx];
1043 curbits += uquad_sign_bits[curidx];
1048 static float get_band_numbits_SPAIR_mips(struct AACEncContext *s,
1049 PutBitContext *pb, const float *in,
1050 const float *scaled, int size, int scale_idx,
1051 int cb, const float lambda, const float uplim,
1054 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1056 int qc1, qc2, qc3, qc4;
1059 uint8_t *p_bits = (uint8_t*)ff_aac_spectral_bits[cb-1];
1061 for (i = 0; i < size; i += 4) {
1062 int curidx, curidx2;
1063 int *in_int = (int *)&in[i];
1064 int t0, t1, t2, t3, t4, t5, t6, t7;
1066 qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
1067 qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1068 qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1069 qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1073 ".set noreorder \n\t"
1075 "ori %[t4], $zero, 4 \n\t"
1076 "slt %[t0], %[t4], %[qc1] \n\t"
1077 "slt %[t1], %[t4], %[qc2] \n\t"
1078 "slt %[t2], %[t4], %[qc3] \n\t"
1079 "slt %[t3], %[t4], %[qc4] \n\t"
1080 "movn %[qc1], %[t4], %[t0] \n\t"
1081 "movn %[qc2], %[t4], %[t1] \n\t"
1082 "movn %[qc3], %[t4], %[t2] \n\t"
1083 "movn %[qc4], %[t4], %[t3] \n\t"
1084 "lw %[t0], 0(%[in_int]) \n\t"
1085 "lw %[t1], 4(%[in_int]) \n\t"
1086 "lw %[t2], 8(%[in_int]) \n\t"
1087 "lw %[t3], 12(%[in_int]) \n\t"
1088 "srl %[t0], %[t0], 31 \n\t"
1089 "srl %[t1], %[t1], 31 \n\t"
1090 "srl %[t2], %[t2], 31 \n\t"
1091 "srl %[t3], %[t3], 31 \n\t"
1092 "subu %[t4], $zero, %[qc1] \n\t"
1093 "subu %[t5], $zero, %[qc2] \n\t"
1094 "subu %[t6], $zero, %[qc3] \n\t"
1095 "subu %[t7], $zero, %[qc4] \n\t"
1096 "movn %[qc1], %[t4], %[t0] \n\t"
1097 "movn %[qc2], %[t5], %[t1] \n\t"
1098 "movn %[qc3], %[t6], %[t2] \n\t"
1099 "movn %[qc4], %[t7], %[t3] \n\t"
1103 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1104 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1105 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1106 [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
1107 : [in_int]"r"(in_int)
1115 curidx2 += qc4 + 40;
1117 curbits += p_bits[curidx] + p_bits[curidx2];
1122 static float get_band_numbits_UPAIR7_mips(struct AACEncContext *s,
1123 PutBitContext *pb, const float *in,
1124 const float *scaled, int size, int scale_idx,
1125 int cb, const float lambda, const float uplim,
1128 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1130 int qc1, qc2, qc3, qc4;
1133 uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
1135 for (i = 0; i < size; i += 4) {
1136 int curidx, curidx2;
1137 int t0, t1, t2, t3, t4;
1139 qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
1140 qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1141 qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1142 qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1146 ".set noreorder \n\t"
1148 "ori %[t4], $zero, 7 \n\t"
1149 "slt %[t0], %[t4], %[qc1] \n\t"
1150 "slt %[t1], %[t4], %[qc2] \n\t"
1151 "slt %[t2], %[t4], %[qc3] \n\t"
1152 "slt %[t3], %[t4], %[qc4] \n\t"
1153 "movn %[qc1], %[t4], %[t0] \n\t"
1154 "movn %[qc2], %[t4], %[t1] \n\t"
1155 "movn %[qc3], %[t4], %[t2] \n\t"
1156 "movn %[qc4], %[t4], %[t3] \n\t"
1160 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1161 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1162 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1172 curbits += p_bits[curidx] +
1173 upair7_sign_bits[curidx] +
1175 upair7_sign_bits[curidx2];
1180 static float get_band_numbits_UPAIR12_mips(struct AACEncContext *s,
1181 PutBitContext *pb, const float *in,
1182 const float *scaled, int size, int scale_idx,
1183 int cb, const float lambda, const float uplim,
1186 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1188 int qc1, qc2, qc3, qc4;
1191 uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
1193 for (i = 0; i < size; i += 4) {
1194 int curidx, curidx2;
1195 int t0, t1, t2, t3, t4;
1197 qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
1198 qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1199 qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1200 qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1204 ".set noreorder \n\t"
1206 "ori %[t4], $zero, 12 \n\t"
1207 "slt %[t0], %[t4], %[qc1] \n\t"
1208 "slt %[t1], %[t4], %[qc2] \n\t"
1209 "slt %[t2], %[t4], %[qc3] \n\t"
1210 "slt %[t3], %[t4], %[qc4] \n\t"
1211 "movn %[qc1], %[t4], %[t0] \n\t"
1212 "movn %[qc2], %[t4], %[t1] \n\t"
1213 "movn %[qc3], %[t4], %[t2] \n\t"
1214 "movn %[qc4], %[t4], %[t3] \n\t"
1218 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1219 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1220 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1230 curbits += p_bits[curidx] +
1232 upair12_sign_bits[curidx] +
1233 upair12_sign_bits[curidx2];
1238 static float get_band_numbits_ESC_mips(struct AACEncContext *s,
1239 PutBitContext *pb, const float *in,
1240 const float *scaled, int size, int scale_idx,
1241 int cb, const float lambda, const float uplim,
1244 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1246 int qc1, qc2, qc3, qc4;
1249 uint8_t *p_bits = (uint8_t*)ff_aac_spectral_bits[cb-1];
1251 for (i = 0; i < size; i += 4) {
1252 int curidx, curidx2;
1253 int cond0, cond1, cond2, cond3;
1257 qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
1258 qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1259 qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1260 qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1264 ".set noreorder \n\t"
1266 "ori %[t4], $zero, 15 \n\t"
1267 "ori %[t5], $zero, 16 \n\t"
1268 "shll_s.w %[c1], %[qc1], 18 \n\t"
1269 "shll_s.w %[c2], %[qc2], 18 \n\t"
1270 "shll_s.w %[c3], %[qc3], 18 \n\t"
1271 "shll_s.w %[c4], %[qc4], 18 \n\t"
1272 "srl %[c1], %[c1], 18 \n\t"
1273 "srl %[c2], %[c2], 18 \n\t"
1274 "srl %[c3], %[c3], 18 \n\t"
1275 "srl %[c4], %[c4], 18 \n\t"
1276 "slt %[cond0], %[t4], %[qc1] \n\t"
1277 "slt %[cond1], %[t4], %[qc2] \n\t"
1278 "slt %[cond2], %[t4], %[qc3] \n\t"
1279 "slt %[cond3], %[t4], %[qc4] \n\t"
1280 "movn %[qc1], %[t5], %[cond0] \n\t"
1281 "movn %[qc2], %[t5], %[cond1] \n\t"
1282 "movn %[qc3], %[t5], %[cond2] \n\t"
1283 "movn %[qc4], %[t5], %[cond3] \n\t"
1284 "ori %[t5], $zero, 31 \n\t"
1285 "clz %[c1], %[c1] \n\t"
1286 "clz %[c2], %[c2] \n\t"
1287 "clz %[c3], %[c3] \n\t"
1288 "clz %[c4], %[c4] \n\t"
1289 "subu %[c1], %[t5], %[c1] \n\t"
1290 "subu %[c2], %[t5], %[c2] \n\t"
1291 "subu %[c3], %[t5], %[c3] \n\t"
1292 "subu %[c4], %[t5], %[c4] \n\t"
1293 "sll %[c1], %[c1], 1 \n\t"
1294 "sll %[c2], %[c2], 1 \n\t"
1295 "sll %[c3], %[c3], 1 \n\t"
1296 "sll %[c4], %[c4], 1 \n\t"
1297 "addiu %[c1], %[c1], -3 \n\t"
1298 "addiu %[c2], %[c2], -3 \n\t"
1299 "addiu %[c3], %[c3], -3 \n\t"
1300 "addiu %[c4], %[c4], -3 \n\t"
1301 "subu %[cond0], $zero, %[cond0] \n\t"
1302 "subu %[cond1], $zero, %[cond1] \n\t"
1303 "subu %[cond2], $zero, %[cond2] \n\t"
1304 "subu %[cond3], $zero, %[cond3] \n\t"
1305 "and %[c1], %[c1], %[cond0] \n\t"
1306 "and %[c2], %[c2], %[cond1] \n\t"
1307 "and %[c3], %[c3], %[cond2] \n\t"
1308 "and %[c4], %[c4], %[cond3] \n\t"
1312 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1313 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1314 [cond0]"=&r"(cond0), [cond1]"=&r"(cond1),
1315 [cond2]"=&r"(cond2), [cond3]"=&r"(cond3),
1316 [c1]"=&r"(c1), [c2]"=&r"(c2),
1317 [c3]"=&r"(c3), [c4]"=&r"(c4),
1318 [t4]"=&r"(t4), [t5]"=&r"(t5)
1327 curbits += p_bits[curidx];
1328 curbits += esc_sign_bits[curidx];
1329 curbits += p_bits[curidx2];
1330 curbits += esc_sign_bits[curidx2];
1340 static float (*const get_band_numbits_arr[])(struct AACEncContext *s,
1341 PutBitContext *pb, const float *in,
1342 const float *scaled, int size, int scale_idx,
1343 int cb, const float lambda, const float uplim,
1345 get_band_numbits_ZERO_mips,
1346 get_band_numbits_SQUAD_mips,
1347 get_band_numbits_SQUAD_mips,
1348 get_band_numbits_UQUAD_mips,
1349 get_band_numbits_UQUAD_mips,
1350 get_band_numbits_SPAIR_mips,
1351 get_band_numbits_SPAIR_mips,
1352 get_band_numbits_UPAIR7_mips,
1353 get_band_numbits_UPAIR7_mips,
1354 get_band_numbits_UPAIR12_mips,
1355 get_band_numbits_UPAIR12_mips,
1356 get_band_numbits_ESC_mips,
1357 get_band_numbits_NONE_mips, /* cb 12 doesn't exist */
1358 get_band_numbits_ZERO_mips,
1359 get_band_numbits_ZERO_mips,
1360 get_band_numbits_ZERO_mips,
1363 #define get_band_numbits( \
1364 s, pb, in, scaled, size, scale_idx, cb, \
1365 lambda, uplim, bits) \
1366 get_band_numbits_arr[cb]( \
1367 s, pb, in, scaled, size, scale_idx, cb, \
1368 lambda, uplim, bits)
1370 static float quantize_band_cost_bits(struct AACEncContext *s, const float *in,
1371 const float *scaled, int size, int scale_idx,
1372 int cb, const float lambda, const float uplim,
1375 return get_band_numbits(s, NULL, in, scaled, size, scale_idx, cb, lambda, uplim, bits);
1379 * Functions developed from template function and optimized for getting the band cost
1382 static float get_band_cost_ZERO_mips(struct AACEncContext *s,
1383 PutBitContext *pb, const float *in,
1384 const float *scaled, int size, int scale_idx,
1385 int cb, const float lambda, const float uplim,
1391 for (i = 0; i < size; i += 4) {
1392 cost += in[i ] * in[i ];
1393 cost += in[i+1] * in[i+1];
1394 cost += in[i+2] * in[i+2];
1395 cost += in[i+3] * in[i+3];
1399 return cost * lambda;
1402 static float get_band_cost_NONE_mips(struct AACEncContext *s,
1403 PutBitContext *pb, const float *in,
1404 const float *scaled, int size, int scale_idx,
1405 int cb, const float lambda, const float uplim,
1412 static float get_band_cost_SQUAD_mips(struct AACEncContext *s,
1413 PutBitContext *pb, const float *in,
1414 const float *scaled, int size, int scale_idx,
1415 int cb, const float lambda, const float uplim,
1418 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1419 const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
1422 int qc1, qc2, qc3, qc4;
1425 uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
1426 float *p_codes = (float *)ff_aac_codebook_vectors[cb-1];
1428 for (i = 0; i < size; i += 4) {
1431 int *in_int = (int *)&in[i];
1432 float *in_pos = (float *)&in[i];
1433 float di0, di1, di2, di3;
1434 int t0, t1, t2, t3, t4, t5, t6, t7;
1436 qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
1437 qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1438 qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1439 qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1443 ".set noreorder \n\t"
1445 "slt %[qc1], $zero, %[qc1] \n\t"
1446 "slt %[qc2], $zero, %[qc2] \n\t"
1447 "slt %[qc3], $zero, %[qc3] \n\t"
1448 "slt %[qc4], $zero, %[qc4] \n\t"
1449 "lw %[t0], 0(%[in_int]) \n\t"
1450 "lw %[t1], 4(%[in_int]) \n\t"
1451 "lw %[t2], 8(%[in_int]) \n\t"
1452 "lw %[t3], 12(%[in_int]) \n\t"
1453 "srl %[t0], %[t0], 31 \n\t"
1454 "srl %[t1], %[t1], 31 \n\t"
1455 "srl %[t2], %[t2], 31 \n\t"
1456 "srl %[t3], %[t3], 31 \n\t"
1457 "subu %[t4], $zero, %[qc1] \n\t"
1458 "subu %[t5], $zero, %[qc2] \n\t"
1459 "subu %[t6], $zero, %[qc3] \n\t"
1460 "subu %[t7], $zero, %[qc4] \n\t"
1461 "movn %[qc1], %[t4], %[t0] \n\t"
1462 "movn %[qc2], %[t5], %[t1] \n\t"
1463 "movn %[qc3], %[t6], %[t2] \n\t"
1464 "movn %[qc4], %[t7], %[t3] \n\t"
1468 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1469 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1470 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1471 [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
1472 : [in_int]"r"(in_int)
1485 curbits += p_bits[curidx];
1486 vec = &p_codes[curidx*4];
1490 ".set noreorder \n\t"
1492 "lwc1 $f0, 0(%[in_pos]) \n\t"
1493 "lwc1 $f1, 0(%[vec]) \n\t"
1494 "lwc1 $f2, 4(%[in_pos]) \n\t"
1495 "lwc1 $f3, 4(%[vec]) \n\t"
1496 "lwc1 $f4, 8(%[in_pos]) \n\t"
1497 "lwc1 $f5, 8(%[vec]) \n\t"
1498 "lwc1 $f6, 12(%[in_pos]) \n\t"
1499 "lwc1 $f7, 12(%[vec]) \n\t"
1500 "nmsub.s %[di0], $f0, $f1, %[IQ] \n\t"
1501 "nmsub.s %[di1], $f2, $f3, %[IQ] \n\t"
1502 "nmsub.s %[di2], $f4, $f5, %[IQ] \n\t"
1503 "nmsub.s %[di3], $f6, $f7, %[IQ] \n\t"
1507 : [di0]"=&f"(di0), [di1]"=&f"(di1),
1508 [di2]"=&f"(di2), [di3]"=&f"(di3)
1509 : [in_pos]"r"(in_pos), [vec]"r"(vec),
1511 : "$f0", "$f1", "$f2", "$f3",
1512 "$f4", "$f5", "$f6", "$f7",
1516 cost += di0 * di0 + di1 * di1
1517 + di2 * di2 + di3 * di3;
1522 return cost * lambda + curbits;
1525 static float get_band_cost_UQUAD_mips(struct AACEncContext *s,
1526 PutBitContext *pb, const float *in,
1527 const float *scaled, int size, int scale_idx,
1528 int cb, const float lambda, const float uplim,
1531 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1532 const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
1536 int qc1, qc2, qc3, qc4;
1538 uint8_t *p_bits = (uint8_t*)ff_aac_spectral_bits[cb-1];
1539 float *p_codes = (float *)ff_aac_codebook_vectors[cb-1];
1541 for (i = 0; i < size; i += 4) {
1544 float *in_pos = (float *)&in[i];
1545 float di0, di1, di2, di3;
1546 int t0, t1, t2, t3, t4;
1548 qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
1549 qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1550 qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1551 qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1555 ".set noreorder \n\t"
1557 "ori %[t4], $zero, 2 \n\t"
1558 "slt %[t0], %[t4], %[qc1] \n\t"
1559 "slt %[t1], %[t4], %[qc2] \n\t"
1560 "slt %[t2], %[t4], %[qc3] \n\t"
1561 "slt %[t3], %[t4], %[qc4] \n\t"
1562 "movn %[qc1], %[t4], %[t0] \n\t"
1563 "movn %[qc2], %[t4], %[t1] \n\t"
1564 "movn %[qc3], %[t4], %[t2] \n\t"
1565 "movn %[qc4], %[t4], %[t3] \n\t"
1569 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1570 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1571 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1583 curbits += p_bits[curidx];
1584 curbits += uquad_sign_bits[curidx];
1585 vec = &p_codes[curidx*4];
1589 ".set noreorder \n\t"
1591 "lwc1 %[di0], 0(%[in_pos]) \n\t"
1592 "lwc1 %[di1], 4(%[in_pos]) \n\t"
1593 "lwc1 %[di2], 8(%[in_pos]) \n\t"
1594 "lwc1 %[di3], 12(%[in_pos]) \n\t"
1595 "abs.s %[di0], %[di0] \n\t"
1596 "abs.s %[di1], %[di1] \n\t"
1597 "abs.s %[di2], %[di2] \n\t"
1598 "abs.s %[di3], %[di3] \n\t"
1599 "lwc1 $f0, 0(%[vec]) \n\t"
1600 "lwc1 $f1, 4(%[vec]) \n\t"
1601 "lwc1 $f2, 8(%[vec]) \n\t"
1602 "lwc1 $f3, 12(%[vec]) \n\t"
1603 "nmsub.s %[di0], %[di0], $f0, %[IQ] \n\t"
1604 "nmsub.s %[di1], %[di1], $f1, %[IQ] \n\t"
1605 "nmsub.s %[di2], %[di2], $f2, %[IQ] \n\t"
1606 "nmsub.s %[di3], %[di3], $f3, %[IQ] \n\t"
1610 : [di0]"=&f"(di0), [di1]"=&f"(di1),
1611 [di2]"=&f"(di2), [di3]"=&f"(di3)
1612 : [in_pos]"r"(in_pos), [vec]"r"(vec),
1614 : "$f0", "$f1", "$f2", "$f3",
1618 cost += di0 * di0 + di1 * di1
1619 + di2 * di2 + di3 * di3;
1624 return cost * lambda + curbits;
1627 static float get_band_cost_SPAIR_mips(struct AACEncContext *s,
1628 PutBitContext *pb, const float *in,
1629 const float *scaled, int size, int scale_idx,
1630 int cb, const float lambda, const float uplim,
1633 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1634 const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
1637 int qc1, qc2, qc3, qc4;
1640 uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
1641 float *p_codes = (float *)ff_aac_codebook_vectors[cb-1];
1643 for (i = 0; i < size; i += 4) {
1644 const float *vec, *vec2;
1645 int curidx, curidx2;
1646 int *in_int = (int *)&in[i];
1647 float *in_pos = (float *)&in[i];
1648 float di0, di1, di2, di3;
1649 int t0, t1, t2, t3, t4, t5, t6, t7;
1651 qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
1652 qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1653 qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1654 qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1658 ".set noreorder \n\t"
1660 "ori %[t4], $zero, 4 \n\t"
1661 "slt %[t0], %[t4], %[qc1] \n\t"
1662 "slt %[t1], %[t4], %[qc2] \n\t"
1663 "slt %[t2], %[t4], %[qc3] \n\t"
1664 "slt %[t3], %[t4], %[qc4] \n\t"
1665 "movn %[qc1], %[t4], %[t0] \n\t"
1666 "movn %[qc2], %[t4], %[t1] \n\t"
1667 "movn %[qc3], %[t4], %[t2] \n\t"
1668 "movn %[qc4], %[t4], %[t3] \n\t"
1669 "lw %[t0], 0(%[in_int]) \n\t"
1670 "lw %[t1], 4(%[in_int]) \n\t"
1671 "lw %[t2], 8(%[in_int]) \n\t"
1672 "lw %[t3], 12(%[in_int]) \n\t"
1673 "srl %[t0], %[t0], 31 \n\t"
1674 "srl %[t1], %[t1], 31 \n\t"
1675 "srl %[t2], %[t2], 31 \n\t"
1676 "srl %[t3], %[t3], 31 \n\t"
1677 "subu %[t4], $zero, %[qc1] \n\t"
1678 "subu %[t5], $zero, %[qc2] \n\t"
1679 "subu %[t6], $zero, %[qc3] \n\t"
1680 "subu %[t7], $zero, %[qc4] \n\t"
1681 "movn %[qc1], %[t4], %[t0] \n\t"
1682 "movn %[qc2], %[t5], %[t1] \n\t"
1683 "movn %[qc3], %[t6], %[t2] \n\t"
1684 "movn %[qc4], %[t7], %[t3] \n\t"
1688 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1689 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1690 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1691 [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
1692 : [in_int]"r"(in_int)
1700 curidx2 += qc4 + 40;
1702 curbits += p_bits[curidx];
1703 curbits += p_bits[curidx2];
1705 vec = &p_codes[curidx*2];
1706 vec2 = &p_codes[curidx2*2];
1710 ".set noreorder \n\t"
1712 "lwc1 $f0, 0(%[in_pos]) \n\t"
1713 "lwc1 $f1, 0(%[vec]) \n\t"
1714 "lwc1 $f2, 4(%[in_pos]) \n\t"
1715 "lwc1 $f3, 4(%[vec]) \n\t"
1716 "lwc1 $f4, 8(%[in_pos]) \n\t"
1717 "lwc1 $f5, 0(%[vec2]) \n\t"
1718 "lwc1 $f6, 12(%[in_pos]) \n\t"
1719 "lwc1 $f7, 4(%[vec2]) \n\t"
1720 "nmsub.s %[di0], $f0, $f1, %[IQ] \n\t"
1721 "nmsub.s %[di1], $f2, $f3, %[IQ] \n\t"
1722 "nmsub.s %[di2], $f4, $f5, %[IQ] \n\t"
1723 "nmsub.s %[di3], $f6, $f7, %[IQ] \n\t"
1727 : [di0]"=&f"(di0), [di1]"=&f"(di1),
1728 [di2]"=&f"(di2), [di3]"=&f"(di3)
1729 : [in_pos]"r"(in_pos), [vec]"r"(vec),
1730 [vec2]"r"(vec2), [IQ]"f"(IQ)
1731 : "$f0", "$f1", "$f2", "$f3",
1732 "$f4", "$f5", "$f6", "$f7",
1736 cost += di0 * di0 + di1 * di1
1737 + di2 * di2 + di3 * di3;
1742 return cost * lambda + curbits;
1745 static float get_band_cost_UPAIR7_mips(struct AACEncContext *s,
1746 PutBitContext *pb, const float *in,
1747 const float *scaled, int size, int scale_idx,
1748 int cb, const float lambda, const float uplim,
1751 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1752 const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
1755 int qc1, qc2, qc3, qc4;
1758 uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
1759 float *p_codes = (float *)ff_aac_codebook_vectors[cb-1];
1761 for (i = 0; i < size; i += 4) {
1762 const float *vec, *vec2;
1763 int curidx, curidx2, sign1, count1, sign2, count2;
1764 int *in_int = (int *)&in[i];
1765 float *in_pos = (float *)&in[i];
1766 float di0, di1, di2, di3;
1767 int t0, t1, t2, t3, t4;
1769 qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
1770 qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1771 qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1772 qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1776 ".set noreorder \n\t"
1778 "ori %[t4], $zero, 7 \n\t"
1779 "ori %[sign1], $zero, 0 \n\t"
1780 "ori %[sign2], $zero, 0 \n\t"
1781 "slt %[t0], %[t4], %[qc1] \n\t"
1782 "slt %[t1], %[t4], %[qc2] \n\t"
1783 "slt %[t2], %[t4], %[qc3] \n\t"
1784 "slt %[t3], %[t4], %[qc4] \n\t"
1785 "movn %[qc1], %[t4], %[t0] \n\t"
1786 "movn %[qc2], %[t4], %[t1] \n\t"
1787 "movn %[qc3], %[t4], %[t2] \n\t"
1788 "movn %[qc4], %[t4], %[t3] \n\t"
1789 "lw %[t0], 0(%[in_int]) \n\t"
1790 "lw %[t1], 4(%[in_int]) \n\t"
1791 "lw %[t2], 8(%[in_int]) \n\t"
1792 "lw %[t3], 12(%[in_int]) \n\t"
1793 "slt %[t0], %[t0], $zero \n\t"
1794 "movn %[sign1], %[t0], %[qc1] \n\t"
1795 "slt %[t2], %[t2], $zero \n\t"
1796 "movn %[sign2], %[t2], %[qc3] \n\t"
1797 "slt %[t1], %[t1], $zero \n\t"
1798 "sll %[t0], %[sign1], 1 \n\t"
1799 "or %[t0], %[t0], %[t1] \n\t"
1800 "movn %[sign1], %[t0], %[qc2] \n\t"
1801 "slt %[t3], %[t3], $zero \n\t"
1802 "sll %[t0], %[sign2], 1 \n\t"
1803 "or %[t0], %[t0], %[t3] \n\t"
1804 "movn %[sign2], %[t0], %[qc4] \n\t"
1805 "slt %[count1], $zero, %[qc1] \n\t"
1806 "slt %[t1], $zero, %[qc2] \n\t"
1807 "slt %[count2], $zero, %[qc3] \n\t"
1808 "slt %[t2], $zero, %[qc4] \n\t"
1809 "addu %[count1], %[count1], %[t1] \n\t"
1810 "addu %[count2], %[count2], %[t2] \n\t"
1814 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1815 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1816 [sign1]"=&r"(sign1), [count1]"=&r"(count1),
1817 [sign2]"=&r"(sign2), [count2]"=&r"(count2),
1818 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1820 : [in_int]"r"(in_int)
1830 curbits += p_bits[curidx];
1831 curbits += upair7_sign_bits[curidx];
1832 vec = &p_codes[curidx*2];
1834 curbits += p_bits[curidx2];
1835 curbits += upair7_sign_bits[curidx2];
1836 vec2 = &p_codes[curidx2*2];
1840 ".set noreorder \n\t"
1842 "lwc1 %[di0], 0(%[in_pos]) \n\t"
1843 "lwc1 %[di1], 4(%[in_pos]) \n\t"
1844 "lwc1 %[di2], 8(%[in_pos]) \n\t"
1845 "lwc1 %[di3], 12(%[in_pos]) \n\t"
1846 "abs.s %[di0], %[di0] \n\t"
1847 "abs.s %[di1], %[di1] \n\t"
1848 "abs.s %[di2], %[di2] \n\t"
1849 "abs.s %[di3], %[di3] \n\t"
1850 "lwc1 $f0, 0(%[vec]) \n\t"
1851 "lwc1 $f1, 4(%[vec]) \n\t"
1852 "lwc1 $f2, 0(%[vec2]) \n\t"
1853 "lwc1 $f3, 4(%[vec2]) \n\t"
1854 "nmsub.s %[di0], %[di0], $f0, %[IQ] \n\t"
1855 "nmsub.s %[di1], %[di1], $f1, %[IQ] \n\t"
1856 "nmsub.s %[di2], %[di2], $f2, %[IQ] \n\t"
1857 "nmsub.s %[di3], %[di3], $f3, %[IQ] \n\t"
1861 : [di0]"=&f"(di0), [di1]"=&f"(di1),
1862 [di2]"=&f"(di2), [di3]"=&f"(di3)
1863 : [in_pos]"r"(in_pos), [vec]"r"(vec),
1864 [vec2]"r"(vec2), [IQ]"f"(IQ)
1865 : "$f0", "$f1", "$f2", "$f3",
1869 cost += di0 * di0 + di1 * di1
1870 + di2 * di2 + di3 * di3;
1875 return cost * lambda + curbits;
1878 static float get_band_cost_UPAIR12_mips(struct AACEncContext *s,
1879 PutBitContext *pb, const float *in,
1880 const float *scaled, int size, int scale_idx,
1881 int cb, const float lambda, const float uplim,
1884 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1885 const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
1888 int qc1, qc2, qc3, qc4;
1891 uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
1892 float *p_codes = (float *)ff_aac_codebook_vectors[cb-1];
1894 for (i = 0; i < size; i += 4) {
1895 const float *vec, *vec2;
1896 int curidx, curidx2;
1897 int sign1, count1, sign2, count2;
1898 int *in_int = (int *)&in[i];
1899 float *in_pos = (float *)&in[i];
1900 float di0, di1, di2, di3;
1901 int t0, t1, t2, t3, t4;
1903 qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
1904 qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1905 qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1906 qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1910 ".set noreorder \n\t"
1912 "ori %[t4], $zero, 12 \n\t"
1913 "ori %[sign1], $zero, 0 \n\t"
1914 "ori %[sign2], $zero, 0 \n\t"
1915 "slt %[t0], %[t4], %[qc1] \n\t"
1916 "slt %[t1], %[t4], %[qc2] \n\t"
1917 "slt %[t2], %[t4], %[qc3] \n\t"
1918 "slt %[t3], %[t4], %[qc4] \n\t"
1919 "movn %[qc1], %[t4], %[t0] \n\t"
1920 "movn %[qc2], %[t4], %[t1] \n\t"
1921 "movn %[qc3], %[t4], %[t2] \n\t"
1922 "movn %[qc4], %[t4], %[t3] \n\t"
1923 "lw %[t0], 0(%[in_int]) \n\t"
1924 "lw %[t1], 4(%[in_int]) \n\t"
1925 "lw %[t2], 8(%[in_int]) \n\t"
1926 "lw %[t3], 12(%[in_int]) \n\t"
1927 "slt %[t0], %[t0], $zero \n\t"
1928 "movn %[sign1], %[t0], %[qc1] \n\t"
1929 "slt %[t2], %[t2], $zero \n\t"
1930 "movn %[sign2], %[t2], %[qc3] \n\t"
1931 "slt %[t1], %[t1], $zero \n\t"
1932 "sll %[t0], %[sign1], 1 \n\t"
1933 "or %[t0], %[t0], %[t1] \n\t"
1934 "movn %[sign1], %[t0], %[qc2] \n\t"
1935 "slt %[t3], %[t3], $zero \n\t"
1936 "sll %[t0], %[sign2], 1 \n\t"
1937 "or %[t0], %[t0], %[t3] \n\t"
1938 "movn %[sign2], %[t0], %[qc4] \n\t"
1939 "slt %[count1], $zero, %[qc1] \n\t"
1940 "slt %[t1], $zero, %[qc2] \n\t"
1941 "slt %[count2], $zero, %[qc3] \n\t"
1942 "slt %[t2], $zero, %[qc4] \n\t"
1943 "addu %[count1], %[count1], %[t1] \n\t"
1944 "addu %[count2], %[count2], %[t2] \n\t"
1948 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1949 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1950 [sign1]"=&r"(sign1), [count1]"=&r"(count1),
1951 [sign2]"=&r"(sign2), [count2]"=&r"(count2),
1952 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1954 : [in_int]"r"(in_int)
1964 curbits += p_bits[curidx];
1965 curbits += p_bits[curidx2];
1966 curbits += upair12_sign_bits[curidx];
1967 curbits += upair12_sign_bits[curidx2];
1968 vec = &p_codes[curidx*2];
1969 vec2 = &p_codes[curidx2*2];
1973 ".set noreorder \n\t"
1975 "lwc1 %[di0], 0(%[in_pos]) \n\t"
1976 "lwc1 %[di1], 4(%[in_pos]) \n\t"
1977 "lwc1 %[di2], 8(%[in_pos]) \n\t"
1978 "lwc1 %[di3], 12(%[in_pos]) \n\t"
1979 "abs.s %[di0], %[di0] \n\t"
1980 "abs.s %[di1], %[di1] \n\t"
1981 "abs.s %[di2], %[di2] \n\t"
1982 "abs.s %[di3], %[di3] \n\t"
1983 "lwc1 $f0, 0(%[vec]) \n\t"
1984 "lwc1 $f1, 4(%[vec]) \n\t"
1985 "lwc1 $f2, 0(%[vec2]) \n\t"
1986 "lwc1 $f3, 4(%[vec2]) \n\t"
1987 "nmsub.s %[di0], %[di0], $f0, %[IQ] \n\t"
1988 "nmsub.s %[di1], %[di1], $f1, %[IQ] \n\t"
1989 "nmsub.s %[di2], %[di2], $f2, %[IQ] \n\t"
1990 "nmsub.s %[di3], %[di3], $f3, %[IQ] \n\t"
1994 : [di0]"=&f"(di0), [di1]"=&f"(di1),
1995 [di2]"=&f"(di2), [di3]"=&f"(di3)
1996 : [in_pos]"r"(in_pos), [vec]"r"(vec),
1997 [vec2]"r"(vec2), [IQ]"f"(IQ)
1998 : "$f0", "$f1", "$f2", "$f3",
2002 cost += di0 * di0 + di1 * di1
2003 + di2 * di2 + di3 * di3;
2008 return cost * lambda + curbits;
2011 static float get_band_cost_ESC_mips(struct AACEncContext *s,
2012 PutBitContext *pb, const float *in,
2013 const float *scaled, int size, int scale_idx,
2014 int cb, const float lambda, const float uplim,
2017 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
2018 const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
2019 const float CLIPPED_ESCAPE = 165140.0f * IQ;
2022 int qc1, qc2, qc3, qc4;
2025 uint8_t *p_bits = (uint8_t*)ff_aac_spectral_bits[cb-1];
2026 float *p_codes = (float* )ff_aac_codebook_vectors[cb-1];
2028 for (i = 0; i < size; i += 4) {
2029 const float *vec, *vec2;
2030 int curidx, curidx2;
2031 float t1, t2, t3, t4;
2032 float di1, di2, di3, di4;
2033 int cond0, cond1, cond2, cond3;
2037 qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
2038 qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
2039 qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
2040 qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
2044 ".set noreorder \n\t"
2046 "ori %[t6], $zero, 15 \n\t"
2047 "ori %[t7], $zero, 16 \n\t"
2048 "shll_s.w %[c1], %[qc1], 18 \n\t"
2049 "shll_s.w %[c2], %[qc2], 18 \n\t"
2050 "shll_s.w %[c3], %[qc3], 18 \n\t"
2051 "shll_s.w %[c4], %[qc4], 18 \n\t"
2052 "srl %[c1], %[c1], 18 \n\t"
2053 "srl %[c2], %[c2], 18 \n\t"
2054 "srl %[c3], %[c3], 18 \n\t"
2055 "srl %[c4], %[c4], 18 \n\t"
2056 "slt %[cond0], %[t6], %[qc1] \n\t"
2057 "slt %[cond1], %[t6], %[qc2] \n\t"
2058 "slt %[cond2], %[t6], %[qc3] \n\t"
2059 "slt %[cond3], %[t6], %[qc4] \n\t"
2060 "movn %[qc1], %[t7], %[cond0] \n\t"
2061 "movn %[qc2], %[t7], %[cond1] \n\t"
2062 "movn %[qc3], %[t7], %[cond2] \n\t"
2063 "movn %[qc4], %[t7], %[cond3] \n\t"
2067 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
2068 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
2069 [cond0]"=&r"(cond0), [cond1]"=&r"(cond1),
2070 [cond2]"=&r"(cond2), [cond3]"=&r"(cond3),
2071 [c1]"=&r"(c1), [c2]"=&r"(c2),
2072 [c3]"=&r"(c3), [c4]"=&r"(c4),
2073 [t6]"=&r"(t6), [t7]"=&r"(t7)
2082 curbits += p_bits[curidx];
2083 curbits += esc_sign_bits[curidx];
2084 vec = &p_codes[curidx*2];
2086 curbits += p_bits[curidx2];
2087 curbits += esc_sign_bits[curidx2];
2088 vec2 = &p_codes[curidx2*2];
2090 curbits += (av_log2(c1) * 2 - 3) & (-cond0);
2091 curbits += (av_log2(c2) * 2 - 3) & (-cond1);
2092 curbits += (av_log2(c3) * 2 - 3) & (-cond2);
2093 curbits += (av_log2(c4) * 2 - 3) & (-cond3);
2096 t2 = fabsf(in[i+1]);
2097 t3 = fabsf(in[i+2]);
2098 t4 = fabsf(in[i+3]);
2101 if (t1 >= CLIPPED_ESCAPE) {
2102 di1 = t1 - CLIPPED_ESCAPE;
2104 di1 = t1 - c1 * cbrtf(c1) * IQ;
2107 di1 = t1 - vec[0] * IQ;
2110 if (t2 >= CLIPPED_ESCAPE) {
2111 di2 = t2 - CLIPPED_ESCAPE;
2113 di2 = t2 - c2 * cbrtf(c2) * IQ;
2116 di2 = t2 - vec[1] * IQ;
2119 if (t3 >= CLIPPED_ESCAPE) {
2120 di3 = t3 - CLIPPED_ESCAPE;
2122 di3 = t3 - c3 * cbrtf(c3) * IQ;
2125 di3 = t3 - vec2[0] * IQ;
2128 if (t4 >= CLIPPED_ESCAPE) {
2129 di4 = t4 - CLIPPED_ESCAPE;
2131 di4 = t4 - c4 * cbrtf(c4) * IQ;
2134 di4 = t4 - vec2[1]*IQ;
2136 cost += di1 * di1 + di2 * di2
2137 + di3 * di3 + di4 * di4;
2142 return cost * lambda + curbits;
2145 static float (*const get_band_cost_arr[])(struct AACEncContext *s,
2146 PutBitContext *pb, const float *in,
2147 const float *scaled, int size, int scale_idx,
2148 int cb, const float lambda, const float uplim,
2150 get_band_cost_ZERO_mips,
2151 get_band_cost_SQUAD_mips,
2152 get_band_cost_SQUAD_mips,
2153 get_band_cost_UQUAD_mips,
2154 get_band_cost_UQUAD_mips,
2155 get_band_cost_SPAIR_mips,
2156 get_band_cost_SPAIR_mips,
2157 get_band_cost_UPAIR7_mips,
2158 get_band_cost_UPAIR7_mips,
2159 get_band_cost_UPAIR12_mips,
2160 get_band_cost_UPAIR12_mips,
2161 get_band_cost_ESC_mips,
2162 get_band_cost_NONE_mips, /* cb 12 doesn't exist */
2163 get_band_cost_ZERO_mips,
2164 get_band_cost_ZERO_mips,
2165 get_band_cost_ZERO_mips,
2168 #define get_band_cost( \
2169 s, pb, in, scaled, size, scale_idx, cb, \
2170 lambda, uplim, bits) \
2171 get_band_cost_arr[cb]( \
2172 s, pb, in, scaled, size, scale_idx, cb, \
2173 lambda, uplim, bits)
2175 static float quantize_band_cost(struct AACEncContext *s, const float *in,
2176 const float *scaled, int size, int scale_idx,
2177 int cb, const float lambda, const float uplim,
2180 return get_band_cost(s, NULL, in, scaled, size, scale_idx, cb, lambda, uplim, bits);
2183 static void search_for_quantizers_twoloop_mips(AVCodecContext *avctx,
2185 SingleChannelElement *sce,
2188 int start = 0, i, w, w2, g;
2189 int destbits = avctx->bit_rate * 1024.0 / avctx->sample_rate / avctx->channels * (lambda / 120.f);
2190 float dists[128] = { 0 }, uplims[128] = { 0 };
2192 int fflag, minscaler;
2195 float minthr = INFINITY;
2197 // for values above this the decoder might end up in an endless loop
2198 // due to always having more bits than what can be encoded.
2199 destbits = FFMIN(destbits, 5800);
2200 //XXX: some heuristic to determine initial quantizers will reduce search time
2201 //determine zero bands and upper limits
2202 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
2203 for (g = 0; g < sce->ics.num_swb; g++) {
2205 float uplim = 0.0f, energy = 0.0f;
2206 for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
2207 FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
2208 uplim += band->threshold;
2209 energy += band->energy;
2210 if (band->energy <= band->threshold || band->threshold == 0.0f) {
2211 sce->zeroes[(w+w2)*16+g] = 1;
2216 uplims[w*16+g] = uplim *512;
2217 sce->zeroes[w*16+g] = !nz;
2219 minthr = FFMIN(minthr, uplim);
2223 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
2224 for (g = 0; g < sce->ics.num_swb; g++) {
2225 if (sce->zeroes[w*16+g]) {
2226 sce->sf_idx[w*16+g] = SCALE_ONE_POS;
2229 sce->sf_idx[w*16+g] = SCALE_ONE_POS + FFMIN(log2f(uplims[w*16+g]/minthr)*4,59);
2235 abs_pow34_v(s->scoefs, sce->coeffs, 1024);
2237 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
2239 for (g = 0; g < sce->ics.num_swb; g++) {
2240 const float *scaled = s->scoefs + start;
2241 maxvals[w*16+g] = find_max_val(sce->ics.group_len[w], sce->ics.swb_sizes[g], scaled);
2242 start += sce->ics.swb_sizes[g];
2246 //perform two-loop search
2247 //outer loop - improve quality
2250 minscaler = sce->sf_idx[0];
2251 //inner loop - quantize spectrum to fit into given number of bits
2252 qstep = its ? 1 : 32;
2259 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
2261 for (g = 0; g < sce->ics.num_swb; g++) {
2262 const float *coefs = sce->coeffs + start;
2263 const float *scaled = s->scoefs + start;
2267 if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >= 218) {
2268 start += sce->ics.swb_sizes[g];
2271 minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]);
2272 cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
2273 for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
2275 bits += quantize_band_cost_bits(s, coefs + w2*128,
2277 sce->ics.swb_sizes[g],
2278 sce->sf_idx[w*16+g],
2285 bits += ff_aac_scalefactor_bits[sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO];
2288 start += sce->ics.swb_sizes[g];
2289 prev = sce->sf_idx[w*16+g];
2294 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
2296 for (g = 0; g < sce->ics.num_swb; g++) {
2297 const float *coefs = sce->coeffs + start;
2298 const float *scaled = s->scoefs + start;
2303 if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >= 218) {
2304 start += sce->ics.swb_sizes[g];
2307 minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]);
2308 cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
2309 for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
2311 dist += quantize_band_cost(s, coefs + w2*128,
2313 sce->ics.swb_sizes[g],
2314 sce->sf_idx[w*16+g],
2321 dists[w*16+g] = dist - bits;
2323 bits += ff_aac_scalefactor_bits[sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO];
2326 start += sce->ics.swb_sizes[g];
2327 prev = sce->sf_idx[w*16+g];
2331 if (tbits > destbits) {
2332 for (i = 0; i < 128; i++)
2333 if (sce->sf_idx[i] < 218 - qstep)
2334 sce->sf_idx[i] += qstep;
2336 for (i = 0; i < 128; i++)
2337 if (sce->sf_idx[i] > 60 - qstep)
2338 sce->sf_idx[i] -= qstep;
2341 if (!qstep && tbits > destbits*1.02 && sce->sf_idx[0] < 217)
2346 minscaler = av_clip(minscaler, 60, 255 - SCALE_MAX_DIFF);
2348 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
2349 for (g = 0; g < sce->ics.num_swb; g++) {
2350 int prevsc = sce->sf_idx[w*16+g];
2351 if (dists[w*16+g] > uplims[w*16+g] && sce->sf_idx[w*16+g] > 60) {
2352 if (find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]-1))
2353 sce->sf_idx[w*16+g]--;
2354 else //Try to make sure there is some energy in every band
2355 sce->sf_idx[w*16+g]-=2;
2357 sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], minscaler, minscaler + SCALE_MAX_DIFF);
2358 sce->sf_idx[w*16+g] = FFMIN(sce->sf_idx[w*16+g], 219);
2359 if (sce->sf_idx[w*16+g] != prevsc)
2361 sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
2365 } while (fflag && its < 10);
2368 static void search_for_ms_mips(AACEncContext *s, ChannelElement *cpe)
2370 int start = 0, i, w, w2, g;
2371 float M[128], S[128];
2372 float *L34 = s->scoefs, *R34 = s->scoefs + 128, *M34 = s->scoefs + 128*2, *S34 = s->scoefs + 128*3;
2373 const float lambda = s->lambda;
2374 SingleChannelElement *sce0 = &cpe->ch[0];
2375 SingleChannelElement *sce1 = &cpe->ch[1];
2376 if (!cpe->common_window)
2378 for (w = 0; w < sce0->ics.num_windows; w += sce0->ics.group_len[w]) {
2380 for (g = 0; g < sce0->ics.num_swb; g++) {
2381 if (!cpe->ch[0].zeroes[w*16+g] && !cpe->ch[1].zeroes[w*16+g]) {
2382 float dist1 = 0.0f, dist2 = 0.0f;
2383 for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) {
2384 FFPsyBand *band0 = &s->psy.ch[s->cur_channel+0].psy_bands[(w+w2)*16+g];
2385 FFPsyBand *band1 = &s->psy.ch[s->cur_channel+1].psy_bands[(w+w2)*16+g];
2386 float minthr = FFMIN(band0->threshold, band1->threshold);
2387 float maxthr = FFMAX(band0->threshold, band1->threshold);
2388 for (i = 0; i < sce0->ics.swb_sizes[g]; i+=4) {
2389 M[i ] = (sce0->coeffs[start+w2*128+i ]
2390 + sce1->coeffs[start+w2*128+i ]) * 0.5;
2391 M[i+1] = (sce0->coeffs[start+w2*128+i+1]
2392 + sce1->coeffs[start+w2*128+i+1]) * 0.5;
2393 M[i+2] = (sce0->coeffs[start+w2*128+i+2]
2394 + sce1->coeffs[start+w2*128+i+2]) * 0.5;
2395 M[i+3] = (sce0->coeffs[start+w2*128+i+3]
2396 + sce1->coeffs[start+w2*128+i+3]) * 0.5;
2399 - sce1->coeffs[start+w2*128+i ];
2401 - sce1->coeffs[start+w2*128+i+1];
2403 - sce1->coeffs[start+w2*128+i+2];
2405 - sce1->coeffs[start+w2*128+i+3];
2407 abs_pow34_v(L34, sce0->coeffs+start+(w+w2)*128, sce0->ics.swb_sizes[g]);
2408 abs_pow34_v(R34, sce1->coeffs+start+(w+w2)*128, sce0->ics.swb_sizes[g]);
2409 abs_pow34_v(M34, M, sce0->ics.swb_sizes[g]);
2410 abs_pow34_v(S34, S, sce0->ics.swb_sizes[g]);
2411 dist1 += quantize_band_cost(s, &sce0->coeffs[start + (w+w2)*128],
2413 sce0->ics.swb_sizes[g],
2414 sce0->sf_idx[(w+w2)*16+g],
2415 sce0->band_type[(w+w2)*16+g],
2416 lambda / band0->threshold, INFINITY, NULL);
2417 dist1 += quantize_band_cost(s, &sce1->coeffs[start + (w+w2)*128],
2419 sce1->ics.swb_sizes[g],
2420 sce1->sf_idx[(w+w2)*16+g],
2421 sce1->band_type[(w+w2)*16+g],
2422 lambda / band1->threshold, INFINITY, NULL);
2423 dist2 += quantize_band_cost(s, M,
2425 sce0->ics.swb_sizes[g],
2426 sce0->sf_idx[(w+w2)*16+g],
2427 sce0->band_type[(w+w2)*16+g],
2428 lambda / maxthr, INFINITY, NULL);
2429 dist2 += quantize_band_cost(s, S,
2431 sce1->ics.swb_sizes[g],
2432 sce1->sf_idx[(w+w2)*16+g],
2433 sce1->band_type[(w+w2)*16+g],
2434 lambda / minthr, INFINITY, NULL);
2436 cpe->ms_mask[w*16+g] = dist2 < dist1;
2438 start += sce0->ics.swb_sizes[g];
2442 #endif /*HAVE_MIPSFPU */
2444 static void codebook_trellis_rate_mips(AACEncContext *s, SingleChannelElement *sce,
2445 int win, int group_len, const float lambda)
2447 BandCodingPath path[120][CB_TOT_ALL];
2448 int w, swb, cb, start, size;
2450 const int max_sfb = sce->ics.max_sfb;
2451 const int run_bits = sce->ics.num_windows == 1 ? 5 : 3;
2452 const int run_esc = (1 << run_bits) - 1;
2453 int idx, ppos, count;
2454 int stackrun[120], stackcb[120], stack_len;
2455 float next_minbits = INFINITY;
2458 abs_pow34_v(s->scoefs, sce->coeffs, 1024);
2460 for (cb = 0; cb < CB_TOT_ALL; cb++) {
2461 path[0][cb].cost = run_bits+4;
2462 path[0][cb].prev_idx = -1;
2463 path[0][cb].run = 0;
2465 for (swb = 0; swb < max_sfb; swb++) {
2466 size = sce->ics.swb_sizes[swb];
2467 if (sce->zeroes[win*16 + swb]) {
2468 float cost_stay_here = path[swb][0].cost;
2469 float cost_get_here = next_minbits + run_bits + 4;
2470 if ( run_value_bits[sce->ics.num_windows == 8][path[swb][0].run]
2471 != run_value_bits[sce->ics.num_windows == 8][path[swb][0].run+1])
2472 cost_stay_here += run_bits;
2473 if (cost_get_here < cost_stay_here) {
2474 path[swb+1][0].prev_idx = next_mincb;
2475 path[swb+1][0].cost = cost_get_here;
2476 path[swb+1][0].run = 1;
2478 path[swb+1][0].prev_idx = 0;
2479 path[swb+1][0].cost = cost_stay_here;
2480 path[swb+1][0].run = path[swb][0].run + 1;
2482 next_minbits = path[swb+1][0].cost;
2484 for (cb = 1; cb < CB_TOT_ALL; cb++) {
2485 path[swb+1][cb].cost = 61450;
2486 path[swb+1][cb].prev_idx = -1;
2487 path[swb+1][cb].run = 0;
2490 float minbits = next_minbits;
2491 int mincb = next_mincb;
2492 int startcb = sce->band_type[win*16+swb];
2493 startcb = aac_cb_in_map[startcb];
2494 next_minbits = INFINITY;
2496 for (cb = 0; cb < startcb; cb++) {
2497 path[swb+1][cb].cost = 61450;
2498 path[swb+1][cb].prev_idx = -1;
2499 path[swb+1][cb].run = 0;
2501 for (cb = startcb; cb < CB_TOT_ALL; cb++) {
2502 float cost_stay_here, cost_get_here;
2504 if (cb >= 12 && sce->band_type[win*16+swb] != aac_cb_out_map[cb]) {
2505 path[swb+1][cb].cost = 61450;
2506 path[swb+1][cb].prev_idx = -1;
2507 path[swb+1][cb].run = 0;
2510 for (w = 0; w < group_len; w++) {
2511 bits += quantize_band_cost_bits(s, sce->coeffs + start + w*128,
2512 s->scoefs + start + w*128, size,
2513 sce->sf_idx[(win+w)*16+swb],
2517 cost_stay_here = path[swb][cb].cost + bits;
2518 cost_get_here = minbits + bits + run_bits + 4;
2519 if ( run_value_bits[sce->ics.num_windows == 8][path[swb][cb].run]
2520 != run_value_bits[sce->ics.num_windows == 8][path[swb][cb].run+1])
2521 cost_stay_here += run_bits;
2522 if (cost_get_here < cost_stay_here) {
2523 path[swb+1][cb].prev_idx = mincb;
2524 path[swb+1][cb].cost = cost_get_here;
2525 path[swb+1][cb].run = 1;
2527 path[swb+1][cb].prev_idx = cb;
2528 path[swb+1][cb].cost = cost_stay_here;
2529 path[swb+1][cb].run = path[swb][cb].run + 1;
2531 if (path[swb+1][cb].cost < next_minbits) {
2532 next_minbits = path[swb+1][cb].cost;
2537 start += sce->ics.swb_sizes[swb];
2540 //convert resulting path from backward-linked list
2543 for (cb = 1; cb < CB_TOT_ALL; cb++)
2544 if (path[max_sfb][cb].cost < path[max_sfb][idx].cost)
2548 av_assert1(idx >= 0);
2550 stackrun[stack_len] = path[ppos][cb].run;
2551 stackcb [stack_len] = cb;
2552 idx = path[ppos-path[ppos][cb].run+1][cb].prev_idx;
2553 ppos -= path[ppos][cb].run;
2556 //perform actual band info encoding
2558 for (i = stack_len - 1; i >= 0; i--) {
2559 cb = aac_cb_out_map[stackcb[i]];
2560 put_bits(&s->pb, 4, cb);
2561 count = stackrun[i];
2562 memset(sce->zeroes + win*16 + start, !cb, count);
2563 //XXX: memset when band_type is also uint8_t
2564 for (j = 0; j < count; j++) {
2565 sce->band_type[win*16 + start] = cb;
2568 while (count >= run_esc) {
2569 put_bits(&s->pb, run_bits, run_esc);
2572 put_bits(&s->pb, run_bits, count);
2575 #endif /* HAVE_INLINE_ASM */
2577 void ff_aac_coder_init_mips(AACEncContext *c) {
2579 AACCoefficientsEncoder *e = c->coder;
2580 int option = c->options.aac_coder;
2583 e->quantize_and_encode_band = quantize_and_encode_band_mips;
2584 e->encode_window_bands_info = codebook_trellis_rate_mips;
2586 e->search_for_quantizers = search_for_quantizers_twoloop_mips;
2587 e->search_for_ms = search_for_ms_mips;
2588 #endif /* HAVE_MIPSFPU */
2590 #endif /* HAVE_INLINE_ASM */