3 * MIPS Technologies, Inc., California.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
14 * contributors may be used to endorse or promote products derived from
15 * this software without specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * Author: Stanislav Ocovaj (socovaj@mips.com)
30 * Szabolcs Pal (sabolc@mips.com)
32 * AAC coefficients encoder optimized for MIPS floating-point architecture
34 * This file is part of FFmpeg.
36 * FFmpeg is free software; you can redistribute it and/or
37 * modify it under the terms of the GNU Lesser General Public
38 * License as published by the Free Software Foundation; either
39 * version 2.1 of the License, or (at your option) any later version.
41 * FFmpeg is distributed in the hope that it will be useful,
42 * but WITHOUT ANY WARRANTY; without even the implied warranty of
43 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
44 * Lesser General Public License for more details.
46 * You should have received a copy of the GNU Lesser General Public
47 * License along with FFmpeg; if not, write to the Free Software
48 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
53 * Reference: libavcodec/aaccoder.c
56 #include "libavutil/libm.h"
59 #include "libavutil/mathematics.h"
60 #include "libavcodec/avcodec.h"
61 #include "libavcodec/put_bits.h"
62 #include "libavcodec/aac.h"
63 #include "libavcodec/aacenc.h"
64 #include "libavcodec/aactab.h"
67 typedef struct BandCodingPath {
73 static const uint8_t run_value_bits_long[64] = {
74 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
75 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 10,
76 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
77 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 15
80 static const uint8_t run_value_bits_short[16] = {
81 3, 3, 3, 3, 3, 3, 3, 6, 6, 6, 6, 6, 6, 6, 6, 9
84 static const uint8_t * const run_value_bits[2] = {
85 run_value_bits_long, run_value_bits_short
88 static const uint8_t uquad_sign_bits[81] = {
89 0, 1, 1, 1, 2, 2, 1, 2, 2,
90 1, 2, 2, 2, 3, 3, 2, 3, 3,
91 1, 2, 2, 2, 3, 3, 2, 3, 3,
92 1, 2, 2, 2, 3, 3, 2, 3, 3,
93 2, 3, 3, 3, 4, 4, 3, 4, 4,
94 2, 3, 3, 3, 4, 4, 3, 4, 4,
95 1, 2, 2, 2, 3, 3, 2, 3, 3,
96 2, 3, 3, 3, 4, 4, 3, 4, 4,
97 2, 3, 3, 3, 4, 4, 3, 4, 4
100 static const uint8_t upair7_sign_bits[64] = {
101 0, 1, 1, 1, 1, 1, 1, 1,
102 1, 2, 2, 2, 2, 2, 2, 2,
103 1, 2, 2, 2, 2, 2, 2, 2,
104 1, 2, 2, 2, 2, 2, 2, 2,
105 1, 2, 2, 2, 2, 2, 2, 2,
106 1, 2, 2, 2, 2, 2, 2, 2,
107 1, 2, 2, 2, 2, 2, 2, 2,
108 1, 2, 2, 2, 2, 2, 2, 2,
111 static const uint8_t upair12_sign_bits[169] = {
112 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
113 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
114 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
115 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
116 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
117 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
118 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
119 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
120 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
121 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
122 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
123 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
124 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
127 static const uint8_t esc_sign_bits[289] = {
128 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
129 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
130 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
131 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
132 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
133 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
134 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
135 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
136 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
137 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
138 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
139 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
140 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
141 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
142 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
143 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
144 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
147 #define ROUND_STANDARD 0.4054f
148 #define ROUND_TO_ZERO 0.1054f
150 static void abs_pow34_v(float *out, const float *in, const int size) {
151 #ifndef USE_REALLY_FULL_SEARCH
154 float ax, bx, cx, dx;
156 for (i = 0; i < size; i += 4) {
177 #endif /* USE_REALLY_FULL_SEARCH */
180 static float find_max_val(int group_len, int swb_size, const float *scaled) {
183 for (w2 = 0; w2 < group_len; w2++) {
184 for (i = 0; i < swb_size; i++) {
185 maxval = FFMAX(maxval, scaled[w2*128+i]);
191 static int find_min_book(float maxval, int sf) {
192 float Q = ff_aac_pow2sf_tab[POW_SF2_ZERO - sf + SCALE_ONE_POS - SCALE_DIV_512];
193 float Q34 = sqrtf(Q * sqrtf(Q));
195 qmaxval = maxval * Q34 + 0.4054f;
196 if (qmaxval == 0) cb = 0;
197 else if (qmaxval == 1) cb = 1;
198 else if (qmaxval == 2) cb = 3;
199 else if (qmaxval <= 4) cb = 5;
200 else if (qmaxval <= 7) cb = 7;
201 else if (qmaxval <= 12) cb = 9;
207 * Functions developed from template function and optimized for quantizing and encoding band
209 static void quantize_and_encode_band_cost_SQUAD_mips(struct AACEncContext *s,
210 PutBitContext *pb, const float *in, float *out,
211 const float *scaled, int size, int scale_idx,
212 int cb, const float lambda, const float uplim,
213 int *bits, const float ROUNDING)
215 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
217 int qc1, qc2, qc3, qc4;
219 uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
220 uint16_t *p_codes = (uint16_t *)ff_aac_spectral_codes[cb-1];
222 abs_pow34_v(s->scoefs, in, size);
224 for (i = 0; i < size; i += 4) {
226 int *in_int = (int *)&in[i];
227 int t0, t1, t2, t3, t4, t5, t6, t7;
229 qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
230 qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
231 qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
232 qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
236 ".set noreorder \n\t"
238 "slt %[qc1], $zero, %[qc1] \n\t"
239 "slt %[qc2], $zero, %[qc2] \n\t"
240 "slt %[qc3], $zero, %[qc3] \n\t"
241 "slt %[qc4], $zero, %[qc4] \n\t"
242 "lw %[t0], 0(%[in_int]) \n\t"
243 "lw %[t1], 4(%[in_int]) \n\t"
244 "lw %[t2], 8(%[in_int]) \n\t"
245 "lw %[t3], 12(%[in_int]) \n\t"
246 "srl %[t0], %[t0], 31 \n\t"
247 "srl %[t1], %[t1], 31 \n\t"
248 "srl %[t2], %[t2], 31 \n\t"
249 "srl %[t3], %[t3], 31 \n\t"
250 "subu %[t4], $zero, %[qc1] \n\t"
251 "subu %[t5], $zero, %[qc2] \n\t"
252 "subu %[t6], $zero, %[qc3] \n\t"
253 "subu %[t7], $zero, %[qc4] \n\t"
254 "movn %[qc1], %[t4], %[t0] \n\t"
255 "movn %[qc2], %[t5], %[t1] \n\t"
256 "movn %[qc3], %[t6], %[t2] \n\t"
257 "movn %[qc4], %[t7], %[t3] \n\t"
261 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
262 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
263 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
264 [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
265 : [in_int]"r"(in_int)
278 put_bits(pb, p_bits[curidx], p_codes[curidx]);
282 static void quantize_and_encode_band_cost_UQUAD_mips(struct AACEncContext *s,
283 PutBitContext *pb, const float *in, float *out,
284 const float *scaled, int size, int scale_idx,
285 int cb, const float lambda, const float uplim,
286 int *bits, const float ROUNDING)
288 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
290 int qc1, qc2, qc3, qc4;
292 uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
293 uint16_t *p_codes = (uint16_t *)ff_aac_spectral_codes[cb-1];
295 abs_pow34_v(s->scoefs, in, size);
297 for (i = 0; i < size; i += 4) {
298 int curidx, sign, count;
299 int *in_int = (int *)&in[i];
301 unsigned int v_codes;
302 int t0, t1, t2, t3, t4;
304 qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
305 qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
306 qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
307 qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
311 ".set noreorder \n\t"
313 "ori %[t4], $zero, 2 \n\t"
314 "ori %[sign], $zero, 0 \n\t"
315 "slt %[t0], %[t4], %[qc1] \n\t"
316 "slt %[t1], %[t4], %[qc2] \n\t"
317 "slt %[t2], %[t4], %[qc3] \n\t"
318 "slt %[t3], %[t4], %[qc4] \n\t"
319 "movn %[qc1], %[t4], %[t0] \n\t"
320 "movn %[qc2], %[t4], %[t1] \n\t"
321 "movn %[qc3], %[t4], %[t2] \n\t"
322 "movn %[qc4], %[t4], %[t3] \n\t"
323 "lw %[t0], 0(%[in_int]) \n\t"
324 "lw %[t1], 4(%[in_int]) \n\t"
325 "lw %[t2], 8(%[in_int]) \n\t"
326 "lw %[t3], 12(%[in_int]) \n\t"
327 "slt %[t0], %[t0], $zero \n\t"
328 "movn %[sign], %[t0], %[qc1] \n\t"
329 "slt %[t1], %[t1], $zero \n\t"
330 "slt %[t2], %[t2], $zero \n\t"
331 "slt %[t3], %[t3], $zero \n\t"
332 "sll %[t0], %[sign], 1 \n\t"
333 "or %[t0], %[t0], %[t1] \n\t"
334 "movn %[sign], %[t0], %[qc2] \n\t"
335 "slt %[t4], $zero, %[qc1] \n\t"
336 "slt %[t1], $zero, %[qc2] \n\t"
337 "slt %[count], $zero, %[qc3] \n\t"
338 "sll %[t0], %[sign], 1 \n\t"
339 "or %[t0], %[t0], %[t2] \n\t"
340 "movn %[sign], %[t0], %[qc3] \n\t"
341 "slt %[t2], $zero, %[qc4] \n\t"
342 "addu %[count], %[count], %[t4] \n\t"
343 "addu %[count], %[count], %[t1] \n\t"
344 "sll %[t0], %[sign], 1 \n\t"
345 "or %[t0], %[t0], %[t3] \n\t"
346 "movn %[sign], %[t0], %[qc4] \n\t"
347 "addu %[count], %[count], %[t2] \n\t"
351 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
352 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
353 [sign]"=&r"(sign), [count]"=&r"(count),
354 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
356 : [in_int]"r"(in_int)
368 v_codes = (p_codes[curidx] << count) | (sign & ((1 << count) - 1));
369 v_bits = p_bits[curidx] + count;
370 put_bits(pb, v_bits, v_codes);
374 static void quantize_and_encode_band_cost_SPAIR_mips(struct AACEncContext *s,
375 PutBitContext *pb, const float *in, float *out,
376 const float *scaled, int size, int scale_idx,
377 int cb, const float lambda, const float uplim,
378 int *bits, const float ROUNDING)
380 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
382 int qc1, qc2, qc3, qc4;
384 uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
385 uint16_t *p_codes = (uint16_t *)ff_aac_spectral_codes[cb-1];
387 abs_pow34_v(s->scoefs, in, size);
389 for (i = 0; i < size; i += 4) {
391 int *in_int = (int *)&in[i];
393 unsigned int v_codes;
394 int t0, t1, t2, t3, t4, t5, t6, t7;
396 qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
397 qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
398 qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
399 qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
403 ".set noreorder \n\t"
405 "ori %[t4], $zero, 4 \n\t"
406 "slt %[t0], %[t4], %[qc1] \n\t"
407 "slt %[t1], %[t4], %[qc2] \n\t"
408 "slt %[t2], %[t4], %[qc3] \n\t"
409 "slt %[t3], %[t4], %[qc4] \n\t"
410 "movn %[qc1], %[t4], %[t0] \n\t"
411 "movn %[qc2], %[t4], %[t1] \n\t"
412 "movn %[qc3], %[t4], %[t2] \n\t"
413 "movn %[qc4], %[t4], %[t3] \n\t"
414 "lw %[t0], 0(%[in_int]) \n\t"
415 "lw %[t1], 4(%[in_int]) \n\t"
416 "lw %[t2], 8(%[in_int]) \n\t"
417 "lw %[t3], 12(%[in_int]) \n\t"
418 "srl %[t0], %[t0], 31 \n\t"
419 "srl %[t1], %[t1], 31 \n\t"
420 "srl %[t2], %[t2], 31 \n\t"
421 "srl %[t3], %[t3], 31 \n\t"
422 "subu %[t4], $zero, %[qc1] \n\t"
423 "subu %[t5], $zero, %[qc2] \n\t"
424 "subu %[t6], $zero, %[qc3] \n\t"
425 "subu %[t7], $zero, %[qc4] \n\t"
426 "movn %[qc1], %[t4], %[t0] \n\t"
427 "movn %[qc2], %[t5], %[t1] \n\t"
428 "movn %[qc3], %[t6], %[t2] \n\t"
429 "movn %[qc4], %[t7], %[t3] \n\t"
433 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
434 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
435 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
436 [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
437 : [in_int]"r"(in_int)
447 v_codes = (p_codes[curidx] << p_bits[curidx2]) | (p_codes[curidx2]);
448 v_bits = p_bits[curidx] + p_bits[curidx2];
449 put_bits(pb, v_bits, v_codes);
453 static void quantize_and_encode_band_cost_UPAIR7_mips(struct AACEncContext *s,
454 PutBitContext *pb, const float *in, float *out,
455 const float *scaled, int size, int scale_idx,
456 int cb, const float lambda, const float uplim,
457 int *bits, const float ROUNDING)
459 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
461 int qc1, qc2, qc3, qc4;
463 uint8_t *p_bits = (uint8_t*) ff_aac_spectral_bits[cb-1];
464 uint16_t *p_codes = (uint16_t*)ff_aac_spectral_codes[cb-1];
466 abs_pow34_v(s->scoefs, in, size);
468 for (i = 0; i < size; i += 4) {
469 int curidx, sign1, count1, sign2, count2;
470 int *in_int = (int *)&in[i];
472 unsigned int v_codes;
473 int t0, t1, t2, t3, t4;
475 qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
476 qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
477 qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
478 qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
482 ".set noreorder \n\t"
484 "ori %[t4], $zero, 7 \n\t"
485 "ori %[sign1], $zero, 0 \n\t"
486 "ori %[sign2], $zero, 0 \n\t"
487 "slt %[t0], %[t4], %[qc1] \n\t"
488 "slt %[t1], %[t4], %[qc2] \n\t"
489 "slt %[t2], %[t4], %[qc3] \n\t"
490 "slt %[t3], %[t4], %[qc4] \n\t"
491 "movn %[qc1], %[t4], %[t0] \n\t"
492 "movn %[qc2], %[t4], %[t1] \n\t"
493 "movn %[qc3], %[t4], %[t2] \n\t"
494 "movn %[qc4], %[t4], %[t3] \n\t"
495 "lw %[t0], 0(%[in_int]) \n\t"
496 "lw %[t1], 4(%[in_int]) \n\t"
497 "lw %[t2], 8(%[in_int]) \n\t"
498 "lw %[t3], 12(%[in_int]) \n\t"
499 "slt %[t0], %[t0], $zero \n\t"
500 "movn %[sign1], %[t0], %[qc1] \n\t"
501 "slt %[t2], %[t2], $zero \n\t"
502 "movn %[sign2], %[t2], %[qc3] \n\t"
503 "slt %[t1], %[t1], $zero \n\t"
504 "sll %[t0], %[sign1], 1 \n\t"
505 "or %[t0], %[t0], %[t1] \n\t"
506 "movn %[sign1], %[t0], %[qc2] \n\t"
507 "slt %[t3], %[t3], $zero \n\t"
508 "sll %[t0], %[sign2], 1 \n\t"
509 "or %[t0], %[t0], %[t3] \n\t"
510 "movn %[sign2], %[t0], %[qc4] \n\t"
511 "slt %[count1], $zero, %[qc1] \n\t"
512 "slt %[t1], $zero, %[qc2] \n\t"
513 "slt %[count2], $zero, %[qc3] \n\t"
514 "slt %[t2], $zero, %[qc4] \n\t"
515 "addu %[count1], %[count1], %[t1] \n\t"
516 "addu %[count2], %[count2], %[t2] \n\t"
520 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
521 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
522 [sign1]"=&r"(sign1), [count1]"=&r"(count1),
523 [sign2]"=&r"(sign2), [count2]"=&r"(count2),
524 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
526 : [in_int]"r"(in_int)
527 : "t0", "t1", "t2", "t3", "t4",
534 v_codes = (p_codes[curidx] << count1) | sign1;
535 v_bits = p_bits[curidx] + count1;
536 put_bits(pb, v_bits, v_codes);
541 v_codes = (p_codes[curidx] << count2) | sign2;
542 v_bits = p_bits[curidx] + count2;
543 put_bits(pb, v_bits, v_codes);
547 static void quantize_and_encode_band_cost_UPAIR12_mips(struct AACEncContext *s,
548 PutBitContext *pb, const float *in, float *out,
549 const float *scaled, int size, int scale_idx,
550 int cb, const float lambda, const float uplim,
551 int *bits, const float ROUNDING)
553 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
555 int qc1, qc2, qc3, qc4;
557 uint8_t *p_bits = (uint8_t*) ff_aac_spectral_bits[cb-1];
558 uint16_t *p_codes = (uint16_t*)ff_aac_spectral_codes[cb-1];
560 abs_pow34_v(s->scoefs, in, size);
562 for (i = 0; i < size; i += 4) {
563 int curidx, sign1, count1, sign2, count2;
564 int *in_int = (int *)&in[i];
566 unsigned int v_codes;
567 int t0, t1, t2, t3, t4;
569 qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
570 qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
571 qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
572 qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
576 ".set noreorder \n\t"
578 "ori %[t4], $zero, 12 \n\t"
579 "ori %[sign1], $zero, 0 \n\t"
580 "ori %[sign2], $zero, 0 \n\t"
581 "slt %[t0], %[t4], %[qc1] \n\t"
582 "slt %[t1], %[t4], %[qc2] \n\t"
583 "slt %[t2], %[t4], %[qc3] \n\t"
584 "slt %[t3], %[t4], %[qc4] \n\t"
585 "movn %[qc1], %[t4], %[t0] \n\t"
586 "movn %[qc2], %[t4], %[t1] \n\t"
587 "movn %[qc3], %[t4], %[t2] \n\t"
588 "movn %[qc4], %[t4], %[t3] \n\t"
589 "lw %[t0], 0(%[in_int]) \n\t"
590 "lw %[t1], 4(%[in_int]) \n\t"
591 "lw %[t2], 8(%[in_int]) \n\t"
592 "lw %[t3], 12(%[in_int]) \n\t"
593 "slt %[t0], %[t0], $zero \n\t"
594 "movn %[sign1], %[t0], %[qc1] \n\t"
595 "slt %[t2], %[t2], $zero \n\t"
596 "movn %[sign2], %[t2], %[qc3] \n\t"
597 "slt %[t1], %[t1], $zero \n\t"
598 "sll %[t0], %[sign1], 1 \n\t"
599 "or %[t0], %[t0], %[t1] \n\t"
600 "movn %[sign1], %[t0], %[qc2] \n\t"
601 "slt %[t3], %[t3], $zero \n\t"
602 "sll %[t0], %[sign2], 1 \n\t"
603 "or %[t0], %[t0], %[t3] \n\t"
604 "movn %[sign2], %[t0], %[qc4] \n\t"
605 "slt %[count1], $zero, %[qc1] \n\t"
606 "slt %[t1], $zero, %[qc2] \n\t"
607 "slt %[count2], $zero, %[qc3] \n\t"
608 "slt %[t2], $zero, %[qc4] \n\t"
609 "addu %[count1], %[count1], %[t1] \n\t"
610 "addu %[count2], %[count2], %[t2] \n\t"
614 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
615 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
616 [sign1]"=&r"(sign1), [count1]"=&r"(count1),
617 [sign2]"=&r"(sign2), [count2]"=&r"(count2),
618 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
620 : [in_int]"r"(in_int)
627 v_codes = (p_codes[curidx] << count1) | sign1;
628 v_bits = p_bits[curidx] + count1;
629 put_bits(pb, v_bits, v_codes);
634 v_codes = (p_codes[curidx] << count2) | sign2;
635 v_bits = p_bits[curidx] + count2;
636 put_bits(pb, v_bits, v_codes);
640 static void quantize_and_encode_band_cost_ESC_mips(struct AACEncContext *s,
641 PutBitContext *pb, const float *in, float *out,
642 const float *scaled, int size, int scale_idx,
643 int cb, const float lambda, const float uplim,
644 int *bits, const float ROUNDING)
646 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
648 int qc1, qc2, qc3, qc4;
650 uint8_t *p_bits = (uint8_t* )ff_aac_spectral_bits[cb-1];
651 uint16_t *p_codes = (uint16_t*)ff_aac_spectral_codes[cb-1];
652 float *p_vectors = (float* )ff_aac_codebook_vectors[cb-1];
654 abs_pow34_v(s->scoefs, in, size);
658 for (i = 0; i < size; i += 4) {
659 int curidx, curidx2, sign1, count1, sign2, count2;
660 int *in_int = (int *)&in[i];
662 unsigned int v_codes;
663 int t0, t1, t2, t3, t4;
665 qc1 = scaled[i ] * Q34 + ROUNDING;
666 qc2 = scaled[i+1] * Q34 + ROUNDING;
667 qc3 = scaled[i+2] * Q34 + ROUNDING;
668 qc4 = scaled[i+3] * Q34 + ROUNDING;
672 ".set noreorder \n\t"
674 "ori %[t4], $zero, 16 \n\t"
675 "ori %[sign1], $zero, 0 \n\t"
676 "ori %[sign2], $zero, 0 \n\t"
677 "slt %[t0], %[t4], %[qc1] \n\t"
678 "slt %[t1], %[t4], %[qc2] \n\t"
679 "slt %[t2], %[t4], %[qc3] \n\t"
680 "slt %[t3], %[t4], %[qc4] \n\t"
681 "movn %[qc1], %[t4], %[t0] \n\t"
682 "movn %[qc2], %[t4], %[t1] \n\t"
683 "movn %[qc3], %[t4], %[t2] \n\t"
684 "movn %[qc4], %[t4], %[t3] \n\t"
685 "lw %[t0], 0(%[in_int]) \n\t"
686 "lw %[t1], 4(%[in_int]) \n\t"
687 "lw %[t2], 8(%[in_int]) \n\t"
688 "lw %[t3], 12(%[in_int]) \n\t"
689 "slt %[t0], %[t0], $zero \n\t"
690 "movn %[sign1], %[t0], %[qc1] \n\t"
691 "slt %[t2], %[t2], $zero \n\t"
692 "movn %[sign2], %[t2], %[qc3] \n\t"
693 "slt %[t1], %[t1], $zero \n\t"
694 "sll %[t0], %[sign1], 1 \n\t"
695 "or %[t0], %[t0], %[t1] \n\t"
696 "movn %[sign1], %[t0], %[qc2] \n\t"
697 "slt %[t3], %[t3], $zero \n\t"
698 "sll %[t0], %[sign2], 1 \n\t"
699 "or %[t0], %[t0], %[t3] \n\t"
700 "movn %[sign2], %[t0], %[qc4] \n\t"
701 "slt %[count1], $zero, %[qc1] \n\t"
702 "slt %[t1], $zero, %[qc2] \n\t"
703 "slt %[count2], $zero, %[qc3] \n\t"
704 "slt %[t2], $zero, %[qc4] \n\t"
705 "addu %[count1], %[count1], %[t1] \n\t"
706 "addu %[count2], %[count2], %[t2] \n\t"
710 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
711 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
712 [sign1]"=&r"(sign1), [count1]"=&r"(count1),
713 [sign2]"=&r"(sign2), [count2]"=&r"(count2),
714 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
716 : [in_int]"r"(in_int)
725 v_codes = (p_codes[curidx] << count1) | sign1;
726 v_bits = p_bits[curidx] + count1;
727 put_bits(pb, v_bits, v_codes);
729 v_codes = (p_codes[curidx2] << count2) | sign2;
730 v_bits = p_bits[curidx2] + count2;
731 put_bits(pb, v_bits, v_codes);
734 for (i = 0; i < size; i += 4) {
735 int curidx, curidx2, sign1, count1, sign2, count2;
736 int *in_int = (int *)&in[i];
738 unsigned int v_codes;
740 int t0, t1, t2, t3, t4;
742 qc1 = scaled[i ] * Q34 + ROUNDING;
743 qc2 = scaled[i+1] * Q34 + ROUNDING;
744 qc3 = scaled[i+2] * Q34 + ROUNDING;
745 qc4 = scaled[i+3] * Q34 + ROUNDING;
749 ".set noreorder \n\t"
751 "ori %[t4], $zero, 16 \n\t"
752 "ori %[sign1], $zero, 0 \n\t"
753 "ori %[sign2], $zero, 0 \n\t"
754 "shll_s.w %[c1], %[qc1], 18 \n\t"
755 "shll_s.w %[c2], %[qc2], 18 \n\t"
756 "shll_s.w %[c3], %[qc3], 18 \n\t"
757 "shll_s.w %[c4], %[qc4], 18 \n\t"
758 "srl %[c1], %[c1], 18 \n\t"
759 "srl %[c2], %[c2], 18 \n\t"
760 "srl %[c3], %[c3], 18 \n\t"
761 "srl %[c4], %[c4], 18 \n\t"
762 "slt %[t0], %[t4], %[qc1] \n\t"
763 "slt %[t1], %[t4], %[qc2] \n\t"
764 "slt %[t2], %[t4], %[qc3] \n\t"
765 "slt %[t3], %[t4], %[qc4] \n\t"
766 "movn %[qc1], %[t4], %[t0] \n\t"
767 "movn %[qc2], %[t4], %[t1] \n\t"
768 "movn %[qc3], %[t4], %[t2] \n\t"
769 "movn %[qc4], %[t4], %[t3] \n\t"
770 "lw %[t0], 0(%[in_int]) \n\t"
771 "lw %[t1], 4(%[in_int]) \n\t"
772 "lw %[t2], 8(%[in_int]) \n\t"
773 "lw %[t3], 12(%[in_int]) \n\t"
774 "slt %[t0], %[t0], $zero \n\t"
775 "movn %[sign1], %[t0], %[qc1] \n\t"
776 "slt %[t2], %[t2], $zero \n\t"
777 "movn %[sign2], %[t2], %[qc3] \n\t"
778 "slt %[t1], %[t1], $zero \n\t"
779 "sll %[t0], %[sign1], 1 \n\t"
780 "or %[t0], %[t0], %[t1] \n\t"
781 "movn %[sign1], %[t0], %[qc2] \n\t"
782 "slt %[t3], %[t3], $zero \n\t"
783 "sll %[t0], %[sign2], 1 \n\t"
784 "or %[t0], %[t0], %[t3] \n\t"
785 "movn %[sign2], %[t0], %[qc4] \n\t"
786 "slt %[count1], $zero, %[qc1] \n\t"
787 "slt %[t1], $zero, %[qc2] \n\t"
788 "slt %[count2], $zero, %[qc3] \n\t"
789 "slt %[t2], $zero, %[qc4] \n\t"
790 "addu %[count1], %[count1], %[t1] \n\t"
791 "addu %[count2], %[count2], %[t2] \n\t"
795 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
796 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
797 [sign1]"=&r"(sign1), [count1]"=&r"(count1),
798 [sign2]"=&r"(sign2), [count2]"=&r"(count2),
799 [c1]"=&r"(c1), [c2]"=&r"(c2),
800 [c3]"=&r"(c3), [c4]"=&r"(c4),
801 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
803 : [in_int]"r"(in_int)
813 v_codes = (p_codes[curidx] << count1) | sign1;
814 v_bits = p_bits[curidx] + count1;
815 put_bits(pb, v_bits, v_codes);
817 if (p_vectors[curidx*2 ] == 64.0f) {
818 int len = av_log2(c1);
819 v_codes = (((1 << (len - 3)) - 2) << len) | (c1 & ((1 << len) - 1));
820 put_bits(pb, len * 2 - 3, v_codes);
822 if (p_vectors[curidx*2+1] == 64.0f) {
823 int len = av_log2(c2);
824 v_codes = (((1 << (len - 3)) - 2) << len) | (c2 & ((1 << len) - 1));
825 put_bits(pb, len*2-3, v_codes);
828 v_codes = (p_codes[curidx2] << count2) | sign2;
829 v_bits = p_bits[curidx2] + count2;
830 put_bits(pb, v_bits, v_codes);
832 if (p_vectors[curidx2*2 ] == 64.0f) {
833 int len = av_log2(c3);
834 v_codes = (((1 << (len - 3)) - 2) << len) | (c3 & ((1 << len) - 1));
835 put_bits(pb, len* 2 - 3, v_codes);
837 if (p_vectors[curidx2*2+1] == 64.0f) {
838 int len = av_log2(c4);
839 v_codes = (((1 << (len - 3)) - 2) << len) | (c4 & ((1 << len) - 1));
840 put_bits(pb, len * 2 - 3, v_codes);
846 static void quantize_and_encode_band_cost_NONE_mips(struct AACEncContext *s,
847 PutBitContext *pb, const float *in, float *out,
848 const float *scaled, int size, int scale_idx,
849 int cb, const float lambda, const float uplim,
850 int *bits, const float ROUNDING) {
854 static void quantize_and_encode_band_cost_ZERO_mips(struct AACEncContext *s,
855 PutBitContext *pb, const float *in, float *out,
856 const float *scaled, int size, int scale_idx,
857 int cb, const float lambda, const float uplim,
858 int *bits, const float ROUNDING) {
863 for (i = 0; i < size; i += 4) {
872 static void (*const quantize_and_encode_band_cost_arr[])(struct AACEncContext *s,
873 PutBitContext *pb, const float *in, float *out,
874 const float *scaled, int size, int scale_idx,
875 int cb, const float lambda, const float uplim,
876 int *bits, const float ROUNDING) = {
877 quantize_and_encode_band_cost_ZERO_mips,
878 quantize_and_encode_band_cost_SQUAD_mips,
879 quantize_and_encode_band_cost_SQUAD_mips,
880 quantize_and_encode_band_cost_UQUAD_mips,
881 quantize_and_encode_band_cost_UQUAD_mips,
882 quantize_and_encode_band_cost_SPAIR_mips,
883 quantize_and_encode_band_cost_SPAIR_mips,
884 quantize_and_encode_band_cost_UPAIR7_mips,
885 quantize_and_encode_band_cost_UPAIR7_mips,
886 quantize_and_encode_band_cost_UPAIR12_mips,
887 quantize_and_encode_band_cost_UPAIR12_mips,
888 quantize_and_encode_band_cost_ESC_mips,
889 quantize_and_encode_band_cost_NONE_mips, /* cb 12 doesn't exist */
890 quantize_and_encode_band_cost_ZERO_mips,
891 quantize_and_encode_band_cost_ZERO_mips,
892 quantize_and_encode_band_cost_ZERO_mips,
895 #define quantize_and_encode_band_cost( \
896 s, pb, in, out, scaled, size, scale_idx, cb, \
897 lambda, uplim, bits, ROUNDING) \
898 quantize_and_encode_band_cost_arr[cb]( \
899 s, pb, in, out, scaled, size, scale_idx, cb, \
900 lambda, uplim, bits, ROUNDING)
902 static void quantize_and_encode_band_mips(struct AACEncContext *s, PutBitContext *pb,
903 const float *in, float *out, int size, int scale_idx,
904 int cb, const float lambda, int rtz)
906 quantize_and_encode_band_cost(s, pb, in, out, NULL, size, scale_idx, cb, lambda,
907 INFINITY, NULL, (rtz) ? ROUND_TO_ZERO : ROUND_STANDARD);
911 * Functions developed from template function and optimized for getting the number of bits
913 static float get_band_numbits_ZERO_mips(struct AACEncContext *s,
914 PutBitContext *pb, const float *in,
915 const float *scaled, int size, int scale_idx,
916 int cb, const float lambda, const float uplim,
922 static float get_band_numbits_NONE_mips(struct AACEncContext *s,
923 PutBitContext *pb, const float *in,
924 const float *scaled, int size, int scale_idx,
925 int cb, const float lambda, const float uplim,
932 static float get_band_numbits_SQUAD_mips(struct AACEncContext *s,
933 PutBitContext *pb, const float *in,
934 const float *scaled, int size, int scale_idx,
935 int cb, const float lambda, const float uplim,
938 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
940 int qc1, qc2, qc3, qc4;
943 uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
945 for (i = 0; i < size; i += 4) {
947 int *in_int = (int *)&in[i];
948 int t0, t1, t2, t3, t4, t5, t6, t7;
950 qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
951 qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
952 qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
953 qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
957 ".set noreorder \n\t"
959 "slt %[qc1], $zero, %[qc1] \n\t"
960 "slt %[qc2], $zero, %[qc2] \n\t"
961 "slt %[qc3], $zero, %[qc3] \n\t"
962 "slt %[qc4], $zero, %[qc4] \n\t"
963 "lw %[t0], 0(%[in_int]) \n\t"
964 "lw %[t1], 4(%[in_int]) \n\t"
965 "lw %[t2], 8(%[in_int]) \n\t"
966 "lw %[t3], 12(%[in_int]) \n\t"
967 "srl %[t0], %[t0], 31 \n\t"
968 "srl %[t1], %[t1], 31 \n\t"
969 "srl %[t2], %[t2], 31 \n\t"
970 "srl %[t3], %[t3], 31 \n\t"
971 "subu %[t4], $zero, %[qc1] \n\t"
972 "subu %[t5], $zero, %[qc2] \n\t"
973 "subu %[t6], $zero, %[qc3] \n\t"
974 "subu %[t7], $zero, %[qc4] \n\t"
975 "movn %[qc1], %[t4], %[t0] \n\t"
976 "movn %[qc2], %[t5], %[t1] \n\t"
977 "movn %[qc3], %[t6], %[t2] \n\t"
978 "movn %[qc4], %[t7], %[t3] \n\t"
982 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
983 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
984 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
985 [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
986 : [in_int]"r"(in_int)
999 curbits += p_bits[curidx];
1004 static float get_band_numbits_UQUAD_mips(struct AACEncContext *s,
1005 PutBitContext *pb, const float *in,
1006 const float *scaled, int size, int scale_idx,
1007 int cb, const float lambda, const float uplim,
1010 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1013 int qc1, qc2, qc3, qc4;
1015 uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
1017 for (i = 0; i < size; i += 4) {
1019 int t0, t1, t2, t3, t4;
1021 qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
1022 qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1023 qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1024 qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1028 ".set noreorder \n\t"
1030 "ori %[t4], $zero, 2 \n\t"
1031 "slt %[t0], %[t4], %[qc1] \n\t"
1032 "slt %[t1], %[t4], %[qc2] \n\t"
1033 "slt %[t2], %[t4], %[qc3] \n\t"
1034 "slt %[t3], %[t4], %[qc4] \n\t"
1035 "movn %[qc1], %[t4], %[t0] \n\t"
1036 "movn %[qc2], %[t4], %[t1] \n\t"
1037 "movn %[qc3], %[t4], %[t2] \n\t"
1038 "movn %[qc4], %[t4], %[t3] \n\t"
1042 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1043 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1044 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1056 curbits += p_bits[curidx];
1057 curbits += uquad_sign_bits[curidx];
1062 static float get_band_numbits_SPAIR_mips(struct AACEncContext *s,
1063 PutBitContext *pb, const float *in,
1064 const float *scaled, int size, int scale_idx,
1065 int cb, const float lambda, const float uplim,
1068 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1070 int qc1, qc2, qc3, qc4;
1073 uint8_t *p_bits = (uint8_t*)ff_aac_spectral_bits[cb-1];
1075 for (i = 0; i < size; i += 4) {
1076 int curidx, curidx2;
1077 int *in_int = (int *)&in[i];
1078 int t0, t1, t2, t3, t4, t5, t6, t7;
1080 qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
1081 qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1082 qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1083 qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1087 ".set noreorder \n\t"
1089 "ori %[t4], $zero, 4 \n\t"
1090 "slt %[t0], %[t4], %[qc1] \n\t"
1091 "slt %[t1], %[t4], %[qc2] \n\t"
1092 "slt %[t2], %[t4], %[qc3] \n\t"
1093 "slt %[t3], %[t4], %[qc4] \n\t"
1094 "movn %[qc1], %[t4], %[t0] \n\t"
1095 "movn %[qc2], %[t4], %[t1] \n\t"
1096 "movn %[qc3], %[t4], %[t2] \n\t"
1097 "movn %[qc4], %[t4], %[t3] \n\t"
1098 "lw %[t0], 0(%[in_int]) \n\t"
1099 "lw %[t1], 4(%[in_int]) \n\t"
1100 "lw %[t2], 8(%[in_int]) \n\t"
1101 "lw %[t3], 12(%[in_int]) \n\t"
1102 "srl %[t0], %[t0], 31 \n\t"
1103 "srl %[t1], %[t1], 31 \n\t"
1104 "srl %[t2], %[t2], 31 \n\t"
1105 "srl %[t3], %[t3], 31 \n\t"
1106 "subu %[t4], $zero, %[qc1] \n\t"
1107 "subu %[t5], $zero, %[qc2] \n\t"
1108 "subu %[t6], $zero, %[qc3] \n\t"
1109 "subu %[t7], $zero, %[qc4] \n\t"
1110 "movn %[qc1], %[t4], %[t0] \n\t"
1111 "movn %[qc2], %[t5], %[t1] \n\t"
1112 "movn %[qc3], %[t6], %[t2] \n\t"
1113 "movn %[qc4], %[t7], %[t3] \n\t"
1117 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1118 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1119 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1120 [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
1121 : [in_int]"r"(in_int)
1129 curidx2 += qc4 + 40;
1131 curbits += p_bits[curidx] + p_bits[curidx2];
1136 static float get_band_numbits_UPAIR7_mips(struct AACEncContext *s,
1137 PutBitContext *pb, const float *in,
1138 const float *scaled, int size, int scale_idx,
1139 int cb, const float lambda, const float uplim,
1142 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1144 int qc1, qc2, qc3, qc4;
1147 uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
1149 for (i = 0; i < size; i += 4) {
1150 int curidx, curidx2;
1151 int t0, t1, t2, t3, t4;
1153 qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
1154 qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1155 qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1156 qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1160 ".set noreorder \n\t"
1162 "ori %[t4], $zero, 7 \n\t"
1163 "slt %[t0], %[t4], %[qc1] \n\t"
1164 "slt %[t1], %[t4], %[qc2] \n\t"
1165 "slt %[t2], %[t4], %[qc3] \n\t"
1166 "slt %[t3], %[t4], %[qc4] \n\t"
1167 "movn %[qc1], %[t4], %[t0] \n\t"
1168 "movn %[qc2], %[t4], %[t1] \n\t"
1169 "movn %[qc3], %[t4], %[t2] \n\t"
1170 "movn %[qc4], %[t4], %[t3] \n\t"
1174 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1175 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1176 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1186 curbits += p_bits[curidx] +
1187 upair7_sign_bits[curidx] +
1189 upair7_sign_bits[curidx2];
1194 static float get_band_numbits_UPAIR12_mips(struct AACEncContext *s,
1195 PutBitContext *pb, const float *in,
1196 const float *scaled, int size, int scale_idx,
1197 int cb, const float lambda, const float uplim,
1200 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1202 int qc1, qc2, qc3, qc4;
1205 uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
1207 for (i = 0; i < size; i += 4) {
1208 int curidx, curidx2;
1209 int t0, t1, t2, t3, t4;
1211 qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
1212 qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1213 qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1214 qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1218 ".set noreorder \n\t"
1220 "ori %[t4], $zero, 12 \n\t"
1221 "slt %[t0], %[t4], %[qc1] \n\t"
1222 "slt %[t1], %[t4], %[qc2] \n\t"
1223 "slt %[t2], %[t4], %[qc3] \n\t"
1224 "slt %[t3], %[t4], %[qc4] \n\t"
1225 "movn %[qc1], %[t4], %[t0] \n\t"
1226 "movn %[qc2], %[t4], %[t1] \n\t"
1227 "movn %[qc3], %[t4], %[t2] \n\t"
1228 "movn %[qc4], %[t4], %[t3] \n\t"
1232 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1233 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1234 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1244 curbits += p_bits[curidx] +
1246 upair12_sign_bits[curidx] +
1247 upair12_sign_bits[curidx2];
1252 static float get_band_numbits_ESC_mips(struct AACEncContext *s,
1253 PutBitContext *pb, const float *in,
1254 const float *scaled, int size, int scale_idx,
1255 int cb, const float lambda, const float uplim,
1258 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1260 int qc1, qc2, qc3, qc4;
1263 uint8_t *p_bits = (uint8_t*)ff_aac_spectral_bits[cb-1];
1265 for (i = 0; i < size; i += 4) {
1266 int curidx, curidx2;
1267 int cond0, cond1, cond2, cond3;
1271 qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
1272 qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1273 qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1274 qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1278 ".set noreorder \n\t"
1280 "ori %[t4], $zero, 15 \n\t"
1281 "ori %[t5], $zero, 16 \n\t"
1282 "shll_s.w %[c1], %[qc1], 18 \n\t"
1283 "shll_s.w %[c2], %[qc2], 18 \n\t"
1284 "shll_s.w %[c3], %[qc3], 18 \n\t"
1285 "shll_s.w %[c4], %[qc4], 18 \n\t"
1286 "srl %[c1], %[c1], 18 \n\t"
1287 "srl %[c2], %[c2], 18 \n\t"
1288 "srl %[c3], %[c3], 18 \n\t"
1289 "srl %[c4], %[c4], 18 \n\t"
1290 "slt %[cond0], %[t4], %[qc1] \n\t"
1291 "slt %[cond1], %[t4], %[qc2] \n\t"
1292 "slt %[cond2], %[t4], %[qc3] \n\t"
1293 "slt %[cond3], %[t4], %[qc4] \n\t"
1294 "movn %[qc1], %[t5], %[cond0] \n\t"
1295 "movn %[qc2], %[t5], %[cond1] \n\t"
1296 "movn %[qc3], %[t5], %[cond2] \n\t"
1297 "movn %[qc4], %[t5], %[cond3] \n\t"
1298 "ori %[t5], $zero, 31 \n\t"
1299 "clz %[c1], %[c1] \n\t"
1300 "clz %[c2], %[c2] \n\t"
1301 "clz %[c3], %[c3] \n\t"
1302 "clz %[c4], %[c4] \n\t"
1303 "subu %[c1], %[t5], %[c1] \n\t"
1304 "subu %[c2], %[t5], %[c2] \n\t"
1305 "subu %[c3], %[t5], %[c3] \n\t"
1306 "subu %[c4], %[t5], %[c4] \n\t"
1307 "sll %[c1], %[c1], 1 \n\t"
1308 "sll %[c2], %[c2], 1 \n\t"
1309 "sll %[c3], %[c3], 1 \n\t"
1310 "sll %[c4], %[c4], 1 \n\t"
1311 "addiu %[c1], %[c1], -3 \n\t"
1312 "addiu %[c2], %[c2], -3 \n\t"
1313 "addiu %[c3], %[c3], -3 \n\t"
1314 "addiu %[c4], %[c4], -3 \n\t"
1315 "subu %[cond0], $zero, %[cond0] \n\t"
1316 "subu %[cond1], $zero, %[cond1] \n\t"
1317 "subu %[cond2], $zero, %[cond2] \n\t"
1318 "subu %[cond3], $zero, %[cond3] \n\t"
1319 "and %[c1], %[c1], %[cond0] \n\t"
1320 "and %[c2], %[c2], %[cond1] \n\t"
1321 "and %[c3], %[c3], %[cond2] \n\t"
1322 "and %[c4], %[c4], %[cond3] \n\t"
1326 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1327 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1328 [cond0]"=&r"(cond0), [cond1]"=&r"(cond1),
1329 [cond2]"=&r"(cond2), [cond3]"=&r"(cond3),
1330 [c1]"=&r"(c1), [c2]"=&r"(c2),
1331 [c3]"=&r"(c3), [c4]"=&r"(c4),
1332 [t4]"=&r"(t4), [t5]"=&r"(t5)
1341 curbits += p_bits[curidx];
1342 curbits += esc_sign_bits[curidx];
1343 curbits += p_bits[curidx2];
1344 curbits += esc_sign_bits[curidx2];
1354 static float (*const get_band_numbits_arr[])(struct AACEncContext *s,
1355 PutBitContext *pb, const float *in,
1356 const float *scaled, int size, int scale_idx,
1357 int cb, const float lambda, const float uplim,
1359 get_band_numbits_ZERO_mips,
1360 get_band_numbits_SQUAD_mips,
1361 get_band_numbits_SQUAD_mips,
1362 get_band_numbits_UQUAD_mips,
1363 get_band_numbits_UQUAD_mips,
1364 get_band_numbits_SPAIR_mips,
1365 get_band_numbits_SPAIR_mips,
1366 get_band_numbits_UPAIR7_mips,
1367 get_band_numbits_UPAIR7_mips,
1368 get_band_numbits_UPAIR12_mips,
1369 get_band_numbits_UPAIR12_mips,
1370 get_band_numbits_ESC_mips,
1371 get_band_numbits_NONE_mips, /* cb 12 doesn't exist */
1372 get_band_numbits_ZERO_mips,
1373 get_band_numbits_ZERO_mips,
1374 get_band_numbits_ZERO_mips,
1377 #define get_band_numbits( \
1378 s, pb, in, scaled, size, scale_idx, cb, \
1379 lambda, uplim, bits) \
1380 get_band_numbits_arr[cb]( \
1381 s, pb, in, scaled, size, scale_idx, cb, \
1382 lambda, uplim, bits)
1384 static float quantize_band_cost_bits(struct AACEncContext *s, const float *in,
1385 const float *scaled, int size, int scale_idx,
1386 int cb, const float lambda, const float uplim,
1389 return get_band_numbits(s, NULL, in, scaled, size, scale_idx, cb, lambda, uplim, bits);
1393 * Functions developed from template function and optimized for getting the band cost
1396 static float get_band_cost_ZERO_mips(struct AACEncContext *s,
1397 PutBitContext *pb, const float *in,
1398 const float *scaled, int size, int scale_idx,
1399 int cb, const float lambda, const float uplim,
1405 for (i = 0; i < size; i += 4) {
1406 cost += in[i ] * in[i ];
1407 cost += in[i+1] * in[i+1];
1408 cost += in[i+2] * in[i+2];
1409 cost += in[i+3] * in[i+3];
1413 return cost * lambda;
1416 static float get_band_cost_NONE_mips(struct AACEncContext *s,
1417 PutBitContext *pb, const float *in,
1418 const float *scaled, int size, int scale_idx,
1419 int cb, const float lambda, const float uplim,
1426 static float get_band_cost_SQUAD_mips(struct AACEncContext *s,
1427 PutBitContext *pb, const float *in,
1428 const float *scaled, int size, int scale_idx,
1429 int cb, const float lambda, const float uplim,
1432 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1433 const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
1436 int qc1, qc2, qc3, qc4;
1439 uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
1440 float *p_codes = (float *)ff_aac_codebook_vectors[cb-1];
1442 for (i = 0; i < size; i += 4) {
1445 int *in_int = (int *)&in[i];
1446 float *in_pos = (float *)&in[i];
1447 float di0, di1, di2, di3;
1448 int t0, t1, t2, t3, t4, t5, t6, t7;
1450 qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
1451 qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1452 qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1453 qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1457 ".set noreorder \n\t"
1459 "slt %[qc1], $zero, %[qc1] \n\t"
1460 "slt %[qc2], $zero, %[qc2] \n\t"
1461 "slt %[qc3], $zero, %[qc3] \n\t"
1462 "slt %[qc4], $zero, %[qc4] \n\t"
1463 "lw %[t0], 0(%[in_int]) \n\t"
1464 "lw %[t1], 4(%[in_int]) \n\t"
1465 "lw %[t2], 8(%[in_int]) \n\t"
1466 "lw %[t3], 12(%[in_int]) \n\t"
1467 "srl %[t0], %[t0], 31 \n\t"
1468 "srl %[t1], %[t1], 31 \n\t"
1469 "srl %[t2], %[t2], 31 \n\t"
1470 "srl %[t3], %[t3], 31 \n\t"
1471 "subu %[t4], $zero, %[qc1] \n\t"
1472 "subu %[t5], $zero, %[qc2] \n\t"
1473 "subu %[t6], $zero, %[qc3] \n\t"
1474 "subu %[t7], $zero, %[qc4] \n\t"
1475 "movn %[qc1], %[t4], %[t0] \n\t"
1476 "movn %[qc2], %[t5], %[t1] \n\t"
1477 "movn %[qc3], %[t6], %[t2] \n\t"
1478 "movn %[qc4], %[t7], %[t3] \n\t"
1482 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1483 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1484 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1485 [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
1486 : [in_int]"r"(in_int)
1499 curbits += p_bits[curidx];
1500 vec = &p_codes[curidx*4];
1504 ".set noreorder \n\t"
1506 "lwc1 $f0, 0(%[in_pos]) \n\t"
1507 "lwc1 $f1, 0(%[vec]) \n\t"
1508 "lwc1 $f2, 4(%[in_pos]) \n\t"
1509 "lwc1 $f3, 4(%[vec]) \n\t"
1510 "lwc1 $f4, 8(%[in_pos]) \n\t"
1511 "lwc1 $f5, 8(%[vec]) \n\t"
1512 "lwc1 $f6, 12(%[in_pos]) \n\t"
1513 "lwc1 $f7, 12(%[vec]) \n\t"
1514 "nmsub.s %[di0], $f0, $f1, %[IQ] \n\t"
1515 "nmsub.s %[di1], $f2, $f3, %[IQ] \n\t"
1516 "nmsub.s %[di2], $f4, $f5, %[IQ] \n\t"
1517 "nmsub.s %[di3], $f6, $f7, %[IQ] \n\t"
1521 : [di0]"=&f"(di0), [di1]"=&f"(di1),
1522 [di2]"=&f"(di2), [di3]"=&f"(di3)
1523 : [in_pos]"r"(in_pos), [vec]"r"(vec),
1525 : "$f0", "$f1", "$f2", "$f3",
1526 "$f4", "$f5", "$f6", "$f7",
1530 cost += di0 * di0 + di1 * di1
1531 + di2 * di2 + di3 * di3;
1536 return cost * lambda + curbits;
1539 static float get_band_cost_UQUAD_mips(struct AACEncContext *s,
1540 PutBitContext *pb, const float *in,
1541 const float *scaled, int size, int scale_idx,
1542 int cb, const float lambda, const float uplim,
1545 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1546 const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
1550 int qc1, qc2, qc3, qc4;
1552 uint8_t *p_bits = (uint8_t*)ff_aac_spectral_bits[cb-1];
1553 float *p_codes = (float *)ff_aac_codebook_vectors[cb-1];
1555 for (i = 0; i < size; i += 4) {
1558 float *in_pos = (float *)&in[i];
1559 float di0, di1, di2, di3;
1560 int t0, t1, t2, t3, t4;
1562 qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
1563 qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1564 qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1565 qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1569 ".set noreorder \n\t"
1571 "ori %[t4], $zero, 2 \n\t"
1572 "slt %[t0], %[t4], %[qc1] \n\t"
1573 "slt %[t1], %[t4], %[qc2] \n\t"
1574 "slt %[t2], %[t4], %[qc3] \n\t"
1575 "slt %[t3], %[t4], %[qc4] \n\t"
1576 "movn %[qc1], %[t4], %[t0] \n\t"
1577 "movn %[qc2], %[t4], %[t1] \n\t"
1578 "movn %[qc3], %[t4], %[t2] \n\t"
1579 "movn %[qc4], %[t4], %[t3] \n\t"
1583 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1584 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1585 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1597 curbits += p_bits[curidx];
1598 curbits += uquad_sign_bits[curidx];
1599 vec = &p_codes[curidx*4];
1603 ".set noreorder \n\t"
1605 "lwc1 %[di0], 0(%[in_pos]) \n\t"
1606 "lwc1 %[di1], 4(%[in_pos]) \n\t"
1607 "lwc1 %[di2], 8(%[in_pos]) \n\t"
1608 "lwc1 %[di3], 12(%[in_pos]) \n\t"
1609 "abs.s %[di0], %[di0] \n\t"
1610 "abs.s %[di1], %[di1] \n\t"
1611 "abs.s %[di2], %[di2] \n\t"
1612 "abs.s %[di3], %[di3] \n\t"
1613 "lwc1 $f0, 0(%[vec]) \n\t"
1614 "lwc1 $f1, 4(%[vec]) \n\t"
1615 "lwc1 $f2, 8(%[vec]) \n\t"
1616 "lwc1 $f3, 12(%[vec]) \n\t"
1617 "nmsub.s %[di0], %[di0], $f0, %[IQ] \n\t"
1618 "nmsub.s %[di1], %[di1], $f1, %[IQ] \n\t"
1619 "nmsub.s %[di2], %[di2], $f2, %[IQ] \n\t"
1620 "nmsub.s %[di3], %[di3], $f3, %[IQ] \n\t"
1624 : [di0]"=&f"(di0), [di1]"=&f"(di1),
1625 [di2]"=&f"(di2), [di3]"=&f"(di3)
1626 : [in_pos]"r"(in_pos), [vec]"r"(vec),
1628 : "$f0", "$f1", "$f2", "$f3",
1632 cost += di0 * di0 + di1 * di1
1633 + di2 * di2 + di3 * di3;
1638 return cost * lambda + curbits;
1641 static float get_band_cost_SPAIR_mips(struct AACEncContext *s,
1642 PutBitContext *pb, const float *in,
1643 const float *scaled, int size, int scale_idx,
1644 int cb, const float lambda, const float uplim,
1647 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1648 const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
1651 int qc1, qc2, qc3, qc4;
1654 uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
1655 float *p_codes = (float *)ff_aac_codebook_vectors[cb-1];
1657 for (i = 0; i < size; i += 4) {
1658 const float *vec, *vec2;
1659 int curidx, curidx2;
1660 int *in_int = (int *)&in[i];
1661 float *in_pos = (float *)&in[i];
1662 float di0, di1, di2, di3;
1663 int t0, t1, t2, t3, t4, t5, t6, t7;
1665 qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
1666 qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1667 qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1668 qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1672 ".set noreorder \n\t"
1674 "ori %[t4], $zero, 4 \n\t"
1675 "slt %[t0], %[t4], %[qc1] \n\t"
1676 "slt %[t1], %[t4], %[qc2] \n\t"
1677 "slt %[t2], %[t4], %[qc3] \n\t"
1678 "slt %[t3], %[t4], %[qc4] \n\t"
1679 "movn %[qc1], %[t4], %[t0] \n\t"
1680 "movn %[qc2], %[t4], %[t1] \n\t"
1681 "movn %[qc3], %[t4], %[t2] \n\t"
1682 "movn %[qc4], %[t4], %[t3] \n\t"
1683 "lw %[t0], 0(%[in_int]) \n\t"
1684 "lw %[t1], 4(%[in_int]) \n\t"
1685 "lw %[t2], 8(%[in_int]) \n\t"
1686 "lw %[t3], 12(%[in_int]) \n\t"
1687 "srl %[t0], %[t0], 31 \n\t"
1688 "srl %[t1], %[t1], 31 \n\t"
1689 "srl %[t2], %[t2], 31 \n\t"
1690 "srl %[t3], %[t3], 31 \n\t"
1691 "subu %[t4], $zero, %[qc1] \n\t"
1692 "subu %[t5], $zero, %[qc2] \n\t"
1693 "subu %[t6], $zero, %[qc3] \n\t"
1694 "subu %[t7], $zero, %[qc4] \n\t"
1695 "movn %[qc1], %[t4], %[t0] \n\t"
1696 "movn %[qc2], %[t5], %[t1] \n\t"
1697 "movn %[qc3], %[t6], %[t2] \n\t"
1698 "movn %[qc4], %[t7], %[t3] \n\t"
1702 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1703 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1704 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1705 [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
1706 : [in_int]"r"(in_int)
1714 curidx2 += qc4 + 40;
1716 curbits += p_bits[curidx];
1717 curbits += p_bits[curidx2];
1719 vec = &p_codes[curidx*2];
1720 vec2 = &p_codes[curidx2*2];
1724 ".set noreorder \n\t"
1726 "lwc1 $f0, 0(%[in_pos]) \n\t"
1727 "lwc1 $f1, 0(%[vec]) \n\t"
1728 "lwc1 $f2, 4(%[in_pos]) \n\t"
1729 "lwc1 $f3, 4(%[vec]) \n\t"
1730 "lwc1 $f4, 8(%[in_pos]) \n\t"
1731 "lwc1 $f5, 0(%[vec2]) \n\t"
1732 "lwc1 $f6, 12(%[in_pos]) \n\t"
1733 "lwc1 $f7, 4(%[vec2]) \n\t"
1734 "nmsub.s %[di0], $f0, $f1, %[IQ] \n\t"
1735 "nmsub.s %[di1], $f2, $f3, %[IQ] \n\t"
1736 "nmsub.s %[di2], $f4, $f5, %[IQ] \n\t"
1737 "nmsub.s %[di3], $f6, $f7, %[IQ] \n\t"
1741 : [di0]"=&f"(di0), [di1]"=&f"(di1),
1742 [di2]"=&f"(di2), [di3]"=&f"(di3)
1743 : [in_pos]"r"(in_pos), [vec]"r"(vec),
1744 [vec2]"r"(vec2), [IQ]"f"(IQ)
1745 : "$f0", "$f1", "$f2", "$f3",
1746 "$f4", "$f5", "$f6", "$f7",
1750 cost += di0 * di0 + di1 * di1
1751 + di2 * di2 + di3 * di3;
1756 return cost * lambda + curbits;
1759 static float get_band_cost_UPAIR7_mips(struct AACEncContext *s,
1760 PutBitContext *pb, const float *in,
1761 const float *scaled, int size, int scale_idx,
1762 int cb, const float lambda, const float uplim,
1765 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1766 const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
1769 int qc1, qc2, qc3, qc4;
1772 uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
1773 float *p_codes = (float *)ff_aac_codebook_vectors[cb-1];
1775 for (i = 0; i < size; i += 4) {
1776 const float *vec, *vec2;
1777 int curidx, curidx2, sign1, count1, sign2, count2;
1778 int *in_int = (int *)&in[i];
1779 float *in_pos = (float *)&in[i];
1780 float di0, di1, di2, di3;
1781 int t0, t1, t2, t3, t4;
1783 qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
1784 qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1785 qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1786 qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1790 ".set noreorder \n\t"
1792 "ori %[t4], $zero, 7 \n\t"
1793 "ori %[sign1], $zero, 0 \n\t"
1794 "ori %[sign2], $zero, 0 \n\t"
1795 "slt %[t0], %[t4], %[qc1] \n\t"
1796 "slt %[t1], %[t4], %[qc2] \n\t"
1797 "slt %[t2], %[t4], %[qc3] \n\t"
1798 "slt %[t3], %[t4], %[qc4] \n\t"
1799 "movn %[qc1], %[t4], %[t0] \n\t"
1800 "movn %[qc2], %[t4], %[t1] \n\t"
1801 "movn %[qc3], %[t4], %[t2] \n\t"
1802 "movn %[qc4], %[t4], %[t3] \n\t"
1803 "lw %[t0], 0(%[in_int]) \n\t"
1804 "lw %[t1], 4(%[in_int]) \n\t"
1805 "lw %[t2], 8(%[in_int]) \n\t"
1806 "lw %[t3], 12(%[in_int]) \n\t"
1807 "slt %[t0], %[t0], $zero \n\t"
1808 "movn %[sign1], %[t0], %[qc1] \n\t"
1809 "slt %[t2], %[t2], $zero \n\t"
1810 "movn %[sign2], %[t2], %[qc3] \n\t"
1811 "slt %[t1], %[t1], $zero \n\t"
1812 "sll %[t0], %[sign1], 1 \n\t"
1813 "or %[t0], %[t0], %[t1] \n\t"
1814 "movn %[sign1], %[t0], %[qc2] \n\t"
1815 "slt %[t3], %[t3], $zero \n\t"
1816 "sll %[t0], %[sign2], 1 \n\t"
1817 "or %[t0], %[t0], %[t3] \n\t"
1818 "movn %[sign2], %[t0], %[qc4] \n\t"
1819 "slt %[count1], $zero, %[qc1] \n\t"
1820 "slt %[t1], $zero, %[qc2] \n\t"
1821 "slt %[count2], $zero, %[qc3] \n\t"
1822 "slt %[t2], $zero, %[qc4] \n\t"
1823 "addu %[count1], %[count1], %[t1] \n\t"
1824 "addu %[count2], %[count2], %[t2] \n\t"
1828 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1829 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1830 [sign1]"=&r"(sign1), [count1]"=&r"(count1),
1831 [sign2]"=&r"(sign2), [count2]"=&r"(count2),
1832 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1834 : [in_int]"r"(in_int)
1844 curbits += p_bits[curidx];
1845 curbits += upair7_sign_bits[curidx];
1846 vec = &p_codes[curidx*2];
1848 curbits += p_bits[curidx2];
1849 curbits += upair7_sign_bits[curidx2];
1850 vec2 = &p_codes[curidx2*2];
1854 ".set noreorder \n\t"
1856 "lwc1 %[di0], 0(%[in_pos]) \n\t"
1857 "lwc1 %[di1], 4(%[in_pos]) \n\t"
1858 "lwc1 %[di2], 8(%[in_pos]) \n\t"
1859 "lwc1 %[di3], 12(%[in_pos]) \n\t"
1860 "abs.s %[di0], %[di0] \n\t"
1861 "abs.s %[di1], %[di1] \n\t"
1862 "abs.s %[di2], %[di2] \n\t"
1863 "abs.s %[di3], %[di3] \n\t"
1864 "lwc1 $f0, 0(%[vec]) \n\t"
1865 "lwc1 $f1, 4(%[vec]) \n\t"
1866 "lwc1 $f2, 0(%[vec2]) \n\t"
1867 "lwc1 $f3, 4(%[vec2]) \n\t"
1868 "nmsub.s %[di0], %[di0], $f0, %[IQ] \n\t"
1869 "nmsub.s %[di1], %[di1], $f1, %[IQ] \n\t"
1870 "nmsub.s %[di2], %[di2], $f2, %[IQ] \n\t"
1871 "nmsub.s %[di3], %[di3], $f3, %[IQ] \n\t"
1875 : [di0]"=&f"(di0), [di1]"=&f"(di1),
1876 [di2]"=&f"(di2), [di3]"=&f"(di3)
1877 : [in_pos]"r"(in_pos), [vec]"r"(vec),
1878 [vec2]"r"(vec2), [IQ]"f"(IQ)
1879 : "$f0", "$f1", "$f2", "$f3",
1883 cost += di0 * di0 + di1 * di1
1884 + di2 * di2 + di3 * di3;
1889 return cost * lambda + curbits;
1892 static float get_band_cost_UPAIR12_mips(struct AACEncContext *s,
1893 PutBitContext *pb, const float *in,
1894 const float *scaled, int size, int scale_idx,
1895 int cb, const float lambda, const float uplim,
1898 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1899 const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
1902 int qc1, qc2, qc3, qc4;
1905 uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
1906 float *p_codes = (float *)ff_aac_codebook_vectors[cb-1];
1908 for (i = 0; i < size; i += 4) {
1909 const float *vec, *vec2;
1910 int curidx, curidx2;
1911 int sign1, count1, sign2, count2;
1912 int *in_int = (int *)&in[i];
1913 float *in_pos = (float *)&in[i];
1914 float di0, di1, di2, di3;
1915 int t0, t1, t2, t3, t4;
1917 qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
1918 qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1919 qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1920 qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1924 ".set noreorder \n\t"
1926 "ori %[t4], $zero, 12 \n\t"
1927 "ori %[sign1], $zero, 0 \n\t"
1928 "ori %[sign2], $zero, 0 \n\t"
1929 "slt %[t0], %[t4], %[qc1] \n\t"
1930 "slt %[t1], %[t4], %[qc2] \n\t"
1931 "slt %[t2], %[t4], %[qc3] \n\t"
1932 "slt %[t3], %[t4], %[qc4] \n\t"
1933 "movn %[qc1], %[t4], %[t0] \n\t"
1934 "movn %[qc2], %[t4], %[t1] \n\t"
1935 "movn %[qc3], %[t4], %[t2] \n\t"
1936 "movn %[qc4], %[t4], %[t3] \n\t"
1937 "lw %[t0], 0(%[in_int]) \n\t"
1938 "lw %[t1], 4(%[in_int]) \n\t"
1939 "lw %[t2], 8(%[in_int]) \n\t"
1940 "lw %[t3], 12(%[in_int]) \n\t"
1941 "slt %[t0], %[t0], $zero \n\t"
1942 "movn %[sign1], %[t0], %[qc1] \n\t"
1943 "slt %[t2], %[t2], $zero \n\t"
1944 "movn %[sign2], %[t2], %[qc3] \n\t"
1945 "slt %[t1], %[t1], $zero \n\t"
1946 "sll %[t0], %[sign1], 1 \n\t"
1947 "or %[t0], %[t0], %[t1] \n\t"
1948 "movn %[sign1], %[t0], %[qc2] \n\t"
1949 "slt %[t3], %[t3], $zero \n\t"
1950 "sll %[t0], %[sign2], 1 \n\t"
1951 "or %[t0], %[t0], %[t3] \n\t"
1952 "movn %[sign2], %[t0], %[qc4] \n\t"
1953 "slt %[count1], $zero, %[qc1] \n\t"
1954 "slt %[t1], $zero, %[qc2] \n\t"
1955 "slt %[count2], $zero, %[qc3] \n\t"
1956 "slt %[t2], $zero, %[qc4] \n\t"
1957 "addu %[count1], %[count1], %[t1] \n\t"
1958 "addu %[count2], %[count2], %[t2] \n\t"
1962 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1963 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1964 [sign1]"=&r"(sign1), [count1]"=&r"(count1),
1965 [sign2]"=&r"(sign2), [count2]"=&r"(count2),
1966 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1968 : [in_int]"r"(in_int)
1978 curbits += p_bits[curidx];
1979 curbits += p_bits[curidx2];
1980 curbits += upair12_sign_bits[curidx];
1981 curbits += upair12_sign_bits[curidx2];
1982 vec = &p_codes[curidx*2];
1983 vec2 = &p_codes[curidx2*2];
1987 ".set noreorder \n\t"
1989 "lwc1 %[di0], 0(%[in_pos]) \n\t"
1990 "lwc1 %[di1], 4(%[in_pos]) \n\t"
1991 "lwc1 %[di2], 8(%[in_pos]) \n\t"
1992 "lwc1 %[di3], 12(%[in_pos]) \n\t"
1993 "abs.s %[di0], %[di0] \n\t"
1994 "abs.s %[di1], %[di1] \n\t"
1995 "abs.s %[di2], %[di2] \n\t"
1996 "abs.s %[di3], %[di3] \n\t"
1997 "lwc1 $f0, 0(%[vec]) \n\t"
1998 "lwc1 $f1, 4(%[vec]) \n\t"
1999 "lwc1 $f2, 0(%[vec2]) \n\t"
2000 "lwc1 $f3, 4(%[vec2]) \n\t"
2001 "nmsub.s %[di0], %[di0], $f0, %[IQ] \n\t"
2002 "nmsub.s %[di1], %[di1], $f1, %[IQ] \n\t"
2003 "nmsub.s %[di2], %[di2], $f2, %[IQ] \n\t"
2004 "nmsub.s %[di3], %[di3], $f3, %[IQ] \n\t"
2008 : [di0]"=&f"(di0), [di1]"=&f"(di1),
2009 [di2]"=&f"(di2), [di3]"=&f"(di3)
2010 : [in_pos]"r"(in_pos), [vec]"r"(vec),
2011 [vec2]"r"(vec2), [IQ]"f"(IQ)
2012 : "$f0", "$f1", "$f2", "$f3",
2016 cost += di0 * di0 + di1 * di1
2017 + di2 * di2 + di3 * di3;
2022 return cost * lambda + curbits;
2025 static float get_band_cost_ESC_mips(struct AACEncContext *s,
2026 PutBitContext *pb, const float *in,
2027 const float *scaled, int size, int scale_idx,
2028 int cb, const float lambda, const float uplim,
2031 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
2032 const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
2033 const float CLIPPED_ESCAPE = 165140.0f * IQ;
2036 int qc1, qc2, qc3, qc4;
2039 uint8_t *p_bits = (uint8_t*)ff_aac_spectral_bits[cb-1];
2040 float *p_codes = (float* )ff_aac_codebook_vectors[cb-1];
2042 for (i = 0; i < size; i += 4) {
2043 const float *vec, *vec2;
2044 int curidx, curidx2;
2045 float t1, t2, t3, t4;
2046 float di1, di2, di3, di4;
2047 int cond0, cond1, cond2, cond3;
2051 qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
2052 qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
2053 qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
2054 qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
2058 ".set noreorder \n\t"
2060 "ori %[t6], $zero, 15 \n\t"
2061 "ori %[t7], $zero, 16 \n\t"
2062 "shll_s.w %[c1], %[qc1], 18 \n\t"
2063 "shll_s.w %[c2], %[qc2], 18 \n\t"
2064 "shll_s.w %[c3], %[qc3], 18 \n\t"
2065 "shll_s.w %[c4], %[qc4], 18 \n\t"
2066 "srl %[c1], %[c1], 18 \n\t"
2067 "srl %[c2], %[c2], 18 \n\t"
2068 "srl %[c3], %[c3], 18 \n\t"
2069 "srl %[c4], %[c4], 18 \n\t"
2070 "slt %[cond0], %[t6], %[qc1] \n\t"
2071 "slt %[cond1], %[t6], %[qc2] \n\t"
2072 "slt %[cond2], %[t6], %[qc3] \n\t"
2073 "slt %[cond3], %[t6], %[qc4] \n\t"
2074 "movn %[qc1], %[t7], %[cond0] \n\t"
2075 "movn %[qc2], %[t7], %[cond1] \n\t"
2076 "movn %[qc3], %[t7], %[cond2] \n\t"
2077 "movn %[qc4], %[t7], %[cond3] \n\t"
2081 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
2082 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
2083 [cond0]"=&r"(cond0), [cond1]"=&r"(cond1),
2084 [cond2]"=&r"(cond2), [cond3]"=&r"(cond3),
2085 [c1]"=&r"(c1), [c2]"=&r"(c2),
2086 [c3]"=&r"(c3), [c4]"=&r"(c4),
2087 [t6]"=&r"(t6), [t7]"=&r"(t7)
2096 curbits += p_bits[curidx];
2097 curbits += esc_sign_bits[curidx];
2098 vec = &p_codes[curidx*2];
2100 curbits += p_bits[curidx2];
2101 curbits += esc_sign_bits[curidx2];
2102 vec2 = &p_codes[curidx2*2];
2104 curbits += (av_log2(c1) * 2 - 3) & (-cond0);
2105 curbits += (av_log2(c2) * 2 - 3) & (-cond1);
2106 curbits += (av_log2(c3) * 2 - 3) & (-cond2);
2107 curbits += (av_log2(c4) * 2 - 3) & (-cond3);
2110 t2 = fabsf(in[i+1]);
2111 t3 = fabsf(in[i+2]);
2112 t4 = fabsf(in[i+3]);
2115 if (t1 >= CLIPPED_ESCAPE) {
2116 di1 = t1 - CLIPPED_ESCAPE;
2118 di1 = t1 - c1 * cbrtf(c1) * IQ;
2121 di1 = t1 - vec[0] * IQ;
2124 if (t2 >= CLIPPED_ESCAPE) {
2125 di2 = t2 - CLIPPED_ESCAPE;
2127 di2 = t2 - c2 * cbrtf(c2) * IQ;
2130 di2 = t2 - vec[1] * IQ;
2133 if (t3 >= CLIPPED_ESCAPE) {
2134 di3 = t3 - CLIPPED_ESCAPE;
2136 di3 = t3 - c3 * cbrtf(c3) * IQ;
2139 di3 = t3 - vec2[0] * IQ;
2142 if (t4 >= CLIPPED_ESCAPE) {
2143 di4 = t4 - CLIPPED_ESCAPE;
2145 di4 = t4 - c4 * cbrtf(c4) * IQ;
2148 di4 = t4 - vec2[1]*IQ;
2150 cost += di1 * di1 + di2 * di2
2151 + di3 * di3 + di4 * di4;
2156 return cost * lambda + curbits;
2159 static float (*const get_band_cost_arr[])(struct AACEncContext *s,
2160 PutBitContext *pb, const float *in,
2161 const float *scaled, int size, int scale_idx,
2162 int cb, const float lambda, const float uplim,
2164 get_band_cost_ZERO_mips,
2165 get_band_cost_SQUAD_mips,
2166 get_band_cost_SQUAD_mips,
2167 get_band_cost_UQUAD_mips,
2168 get_band_cost_UQUAD_mips,
2169 get_band_cost_SPAIR_mips,
2170 get_band_cost_SPAIR_mips,
2171 get_band_cost_UPAIR7_mips,
2172 get_band_cost_UPAIR7_mips,
2173 get_band_cost_UPAIR12_mips,
2174 get_band_cost_UPAIR12_mips,
2175 get_band_cost_ESC_mips,
2176 get_band_cost_NONE_mips, /* cb 12 doesn't exist */
2177 get_band_cost_ZERO_mips,
2178 get_band_cost_ZERO_mips,
2179 get_band_cost_ZERO_mips,
2182 #define get_band_cost( \
2183 s, pb, in, scaled, size, scale_idx, cb, \
2184 lambda, uplim, bits) \
2185 get_band_cost_arr[cb]( \
2186 s, pb, in, scaled, size, scale_idx, cb, \
2187 lambda, uplim, bits)
2189 static float quantize_band_cost(struct AACEncContext *s, const float *in,
2190 const float *scaled, int size, int scale_idx,
2191 int cb, const float lambda, const float uplim,
2194 return get_band_cost(s, NULL, in, scaled, size, scale_idx, cb, lambda, uplim, bits);
2197 static void search_for_quantizers_twoloop_mips(AVCodecContext *avctx,
2199 SingleChannelElement *sce,
2202 int start = 0, i, w, w2, g;
2203 int destbits = avctx->bit_rate * 1024.0 / avctx->sample_rate / avctx->channels;
2204 float dists[128] = { 0 }, uplims[128];
2206 int fflag, minscaler;
2209 float minthr = INFINITY;
2211 destbits = FFMIN(destbits, 5800);
2212 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
2213 for (g = 0; g < sce->ics.num_swb; g++) {
2216 for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
2217 FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
2218 uplim += band->threshold;
2219 if (band->energy <= band->threshold || band->threshold == 0.0f) {
2220 sce->zeroes[(w+w2)*16+g] = 1;
2225 uplims[w*16+g] = uplim *512;
2226 sce->zeroes[w*16+g] = !nz;
2228 minthr = FFMIN(minthr, uplim);
2232 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
2233 for (g = 0; g < sce->ics.num_swb; g++) {
2234 if (sce->zeroes[w*16+g]) {
2235 sce->sf_idx[w*16+g] = SCALE_ONE_POS;
2238 sce->sf_idx[w*16+g] = SCALE_ONE_POS + FFMIN(log2f(uplims[w*16+g]/minthr)*4,59);
2244 abs_pow34_v(s->scoefs, sce->coeffs, 1024);
2246 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
2248 for (g = 0; g < sce->ics.num_swb; g++) {
2249 const float *scaled = s->scoefs + start;
2250 maxvals[w*16+g] = find_max_val(sce->ics.group_len[w], sce->ics.swb_sizes[g], scaled);
2251 start += sce->ics.swb_sizes[g];
2257 minscaler = sce->sf_idx[0];
2258 qstep = its ? 1 : 32;
2265 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
2267 for (g = 0; g < sce->ics.num_swb; g++) {
2268 const float *coefs = sce->coeffs + start;
2269 const float *scaled = s->scoefs + start;
2273 if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >= 218) {
2274 start += sce->ics.swb_sizes[g];
2277 minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]);
2278 cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
2279 for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
2281 bits += quantize_band_cost_bits(s, coefs + w2*128,
2283 sce->ics.swb_sizes[g],
2284 sce->sf_idx[w*16+g],
2291 bits += ff_aac_scalefactor_bits[sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO];
2294 start += sce->ics.swb_sizes[g];
2295 prev = sce->sf_idx[w*16+g];
2300 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
2302 for (g = 0; g < sce->ics.num_swb; g++) {
2303 const float *coefs = sce->coeffs + start;
2304 const float *scaled = s->scoefs + start;
2309 if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >= 218) {
2310 start += sce->ics.swb_sizes[g];
2313 minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]);
2314 cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
2315 for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
2317 dist += quantize_band_cost(s, coefs + w2*128,
2319 sce->ics.swb_sizes[g],
2320 sce->sf_idx[w*16+g],
2327 dists[w*16+g] = dist - bits;
2329 bits += ff_aac_scalefactor_bits[sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO];
2332 start += sce->ics.swb_sizes[g];
2333 prev = sce->sf_idx[w*16+g];
2337 if (tbits > destbits) {
2338 for (i = 0; i < 128; i++)
2339 if (sce->sf_idx[i] < 218 - qstep)
2340 sce->sf_idx[i] += qstep;
2342 for (i = 0; i < 128; i++)
2343 if (sce->sf_idx[i] > 60 - qstep)
2344 sce->sf_idx[i] -= qstep;
2347 if (!qstep && tbits > destbits*1.02 && sce->sf_idx[0] < 217)
2352 minscaler = av_clip(minscaler, 60, 255 - SCALE_MAX_DIFF);
2353 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
2354 for (g = 0; g < sce->ics.num_swb; g++) {
2355 int prevsc = sce->sf_idx[w*16+g];
2356 if (dists[w*16+g] > uplims[w*16+g] && sce->sf_idx[w*16+g] > 60) {
2357 if (find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]-1))
2358 sce->sf_idx[w*16+g]--;
2360 sce->sf_idx[w*16+g]-=2;
2362 sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], minscaler, minscaler + SCALE_MAX_DIFF);
2363 sce->sf_idx[w*16+g] = FFMIN(sce->sf_idx[w*16+g], 219);
2364 if (sce->sf_idx[w*16+g] != prevsc)
2366 sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
2370 } while (fflag && its < 10);
2373 static void search_for_ms_mips(AACEncContext *s, ChannelElement *cpe)
2375 int start = 0, i, w, w2, g;
2376 float M[128], S[128];
2377 float *L34 = s->scoefs, *R34 = s->scoefs + 128, *M34 = s->scoefs + 128*2, *S34 = s->scoefs + 128*3;
2378 SingleChannelElement *sce0 = &cpe->ch[0];
2379 SingleChannelElement *sce1 = &cpe->ch[1];
2380 if (!cpe->common_window)
2382 for (w = 0; w < sce0->ics.num_windows; w += sce0->ics.group_len[w]) {
2383 for (g = 0; g < sce0->ics.num_swb; g++) {
2384 if (!cpe->ch[0].zeroes[w*16+g] && !cpe->ch[1].zeroes[w*16+g]) {
2385 float dist1 = 0.0f, dist2 = 0.0f;
2386 for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) {
2387 FFPsyBand *band0 = &s->psy.ch[s->cur_channel+0].psy_bands[(w+w2)*16+g];
2388 FFPsyBand *band1 = &s->psy.ch[s->cur_channel+1].psy_bands[(w+w2)*16+g];
2389 float minthr = FFMIN(band0->threshold, band1->threshold);
2390 float maxthr = FFMAX(band0->threshold, band1->threshold);
2391 for (i = 0; i < sce0->ics.swb_sizes[g]; i+=4) {
2392 M[i ] = (sce0->coeffs[start+w2*128+i ]
2393 + sce1->coeffs[start+w2*128+i ]) * 0.5;
2394 M[i+1] = (sce0->coeffs[start+w2*128+i+1]
2395 + sce1->coeffs[start+w2*128+i+1]) * 0.5;
2396 M[i+2] = (sce0->coeffs[start+w2*128+i+2]
2397 + sce1->coeffs[start+w2*128+i+2]) * 0.5;
2398 M[i+3] = (sce0->coeffs[start+w2*128+i+3]
2399 + sce1->coeffs[start+w2*128+i+3]) * 0.5;
2402 - sce1->coeffs[start+w2*128+i ];
2404 - sce1->coeffs[start+w2*128+i+1];
2406 - sce1->coeffs[start+w2*128+i+2];
2408 - sce1->coeffs[start+w2*128+i+3];
2410 abs_pow34_v(L34, sce0->coeffs+start+w2*128, sce0->ics.swb_sizes[g]);
2411 abs_pow34_v(R34, sce1->coeffs+start+w2*128, sce0->ics.swb_sizes[g]);
2412 abs_pow34_v(M34, M, sce0->ics.swb_sizes[g]);
2413 abs_pow34_v(S34, S, sce0->ics.swb_sizes[g]);
2414 dist1 += quantize_band_cost(s, sce0->coeffs + start + w2*128,
2416 sce0->ics.swb_sizes[g],
2417 sce0->sf_idx[(w+w2)*16+g],
2418 sce0->band_type[(w+w2)*16+g],
2419 s->lambda / band0->threshold, INFINITY, NULL);
2420 dist1 += quantize_band_cost(s, sce1->coeffs + start + w2*128,
2422 sce1->ics.swb_sizes[g],
2423 sce1->sf_idx[(w+w2)*16+g],
2424 sce1->band_type[(w+w2)*16+g],
2425 s->lambda / band1->threshold, INFINITY, NULL);
2426 dist2 += quantize_band_cost(s, M,
2428 sce0->ics.swb_sizes[g],
2429 sce0->sf_idx[(w+w2)*16+g],
2430 sce0->band_type[(w+w2)*16+g],
2431 s->lambda / maxthr, INFINITY, NULL);
2432 dist2 += quantize_band_cost(s, S,
2434 sce1->ics.swb_sizes[g],
2435 sce1->sf_idx[(w+w2)*16+g],
2436 sce1->band_type[(w+w2)*16+g],
2437 s->lambda / minthr, INFINITY, NULL);
2439 cpe->ms_mask[w*16+g] = dist2 < dist1;
2441 start += sce0->ics.swb_sizes[g];
2445 #endif /*HAVE_MIPSFPU */
2447 static void codebook_trellis_rate_mips(AACEncContext *s, SingleChannelElement *sce,
2448 int win, int group_len, const float lambda)
2450 BandCodingPath path[120][12];
2451 int w, swb, cb, start, size;
2453 const int max_sfb = sce->ics.max_sfb;
2454 const int run_bits = sce->ics.num_windows == 1 ? 5 : 3;
2455 const int run_esc = (1 << run_bits) - 1;
2456 int idx, ppos, count;
2457 int stackrun[120], stackcb[120], stack_len;
2458 float next_minbits = INFINITY;
2461 abs_pow34_v(s->scoefs, sce->coeffs, 1024);
2463 for (cb = 0; cb < 12; cb++) {
2464 path[0][cb].cost = run_bits+4;
2465 path[0][cb].prev_idx = -1;
2466 path[0][cb].run = 0;
2468 for (swb = 0; swb < max_sfb; swb++) {
2469 size = sce->ics.swb_sizes[swb];
2470 if (sce->zeroes[win*16 + swb]) {
2471 float cost_stay_here = path[swb][0].cost;
2472 float cost_get_here = next_minbits + run_bits + 4;
2473 if ( run_value_bits[sce->ics.num_windows == 8][path[swb][0].run]
2474 != run_value_bits[sce->ics.num_windows == 8][path[swb][0].run+1])
2475 cost_stay_here += run_bits;
2476 if (cost_get_here < cost_stay_here) {
2477 path[swb+1][0].prev_idx = next_mincb;
2478 path[swb+1][0].cost = cost_get_here;
2479 path[swb+1][0].run = 1;
2481 path[swb+1][0].prev_idx = 0;
2482 path[swb+1][0].cost = cost_stay_here;
2483 path[swb+1][0].run = path[swb][0].run + 1;
2485 next_minbits = path[swb+1][0].cost;
2487 for (cb = 1; cb < 12; cb++) {
2488 path[swb+1][cb].cost = 61450;
2489 path[swb+1][cb].prev_idx = -1;
2490 path[swb+1][cb].run = 0;
2493 float minbits = next_minbits;
2494 int mincb = next_mincb;
2495 int startcb = sce->band_type[win*16+swb];
2496 next_minbits = INFINITY;
2498 for (cb = 0; cb < startcb; cb++) {
2499 path[swb+1][cb].cost = 61450;
2500 path[swb+1][cb].prev_idx = -1;
2501 path[swb+1][cb].run = 0;
2503 for (cb = startcb; cb < 12; cb++) {
2504 float cost_stay_here, cost_get_here;
2506 for (w = 0; w < group_len; w++) {
2507 bits += quantize_band_cost_bits(s, sce->coeffs + start + w*128,
2508 s->scoefs + start + w*128, size,
2509 sce->sf_idx[(win+w)*16+swb], cb,
2512 cost_stay_here = path[swb][cb].cost + bits;
2513 cost_get_here = minbits + bits + run_bits + 4;
2514 if ( run_value_bits[sce->ics.num_windows == 8][path[swb][cb].run]
2515 != run_value_bits[sce->ics.num_windows == 8][path[swb][cb].run+1])
2516 cost_stay_here += run_bits;
2517 if (cost_get_here < cost_stay_here) {
2518 path[swb+1][cb].prev_idx = mincb;
2519 path[swb+1][cb].cost = cost_get_here;
2520 path[swb+1][cb].run = 1;
2522 path[swb+1][cb].prev_idx = cb;
2523 path[swb+1][cb].cost = cost_stay_here;
2524 path[swb+1][cb].run = path[swb][cb].run + 1;
2526 if (path[swb+1][cb].cost < next_minbits) {
2527 next_minbits = path[swb+1][cb].cost;
2532 start += sce->ics.swb_sizes[swb];
2537 for (cb = 1; cb < 12; cb++)
2538 if (path[max_sfb][cb].cost < path[max_sfb][idx].cost)
2542 av_assert1(idx >= 0);
2544 stackrun[stack_len] = path[ppos][cb].run;
2545 stackcb [stack_len] = cb;
2546 idx = path[ppos-path[ppos][cb].run+1][cb].prev_idx;
2547 ppos -= path[ppos][cb].run;
2552 for (i = stack_len - 1; i >= 0; i--) {
2553 put_bits(&s->pb, 4, stackcb[i]);
2554 count = stackrun[i];
2555 memset(sce->zeroes + win*16 + start, !stackcb[i], count);
2556 for (j = 0; j < count; j++) {
2557 sce->band_type[win*16 + start] = stackcb[i];
2560 while (count >= run_esc) {
2561 put_bits(&s->pb, run_bits, run_esc);
2564 put_bits(&s->pb, run_bits, count);
2567 #endif /* HAVE_INLINE_ASM */
2569 void ff_aac_coder_init_mips(AACEncContext *c) {
2571 AACCoefficientsEncoder *e = c->coder;
2572 int option = c->options.aac_coder;
2575 // Disabled due to failure with fate-aac-pns-encode
2576 // e->quantize_and_encode_band = quantize_and_encode_band_mips;
2577 // e->encode_window_bands_info = codebook_trellis_rate_mips;
2579 e->search_for_quantizers = search_for_quantizers_twoloop_mips;
2580 e->search_for_ms = search_for_ms_mips;
2581 #endif /* HAVE_MIPSFPU */
2583 #endif /* HAVE_INLINE_ASM */