3 * MIPS Technologies, Inc., California.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
14 * contributors may be used to endorse or promote products derived from
15 * this software without specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * Author: Stanislav Ocovaj (socovaj@mips.com)
30 * Szabolcs Pal (sabolc@mips.com)
32 * AAC coefficients encoder optimized for MIPS floating-point architecture
34 * This file is part of FFmpeg.
36 * FFmpeg is free software; you can redistribute it and/or
37 * modify it under the terms of the GNU Lesser General Public
38 * License as published by the Free Software Foundation; either
39 * version 2.1 of the License, or (at your option) any later version.
41 * FFmpeg is distributed in the hope that it will be useful,
42 * but WITHOUT ANY WARRANTY; without even the implied warranty of
43 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
44 * Lesser General Public License for more details.
46 * You should have received a copy of the GNU Lesser General Public
47 * License along with FFmpeg; if not, write to the Free Software
48 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
53 * Reference: libavcodec/aaccoder.c
56 #include "libavutil/libm.h"
59 #include "libavutil/mathematics.h"
60 #include "libavcodec/avcodec.h"
61 #include "libavcodec/put_bits.h"
62 #include "libavcodec/aac.h"
63 #include "libavcodec/aacenc.h"
64 #include "libavcodec/aactab.h"
67 typedef struct BandCodingPath {
73 static const uint8_t run_value_bits_long[64] = {
74 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
75 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 10,
76 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
77 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 15
80 static const uint8_t run_value_bits_short[16] = {
81 3, 3, 3, 3, 3, 3, 3, 6, 6, 6, 6, 6, 6, 6, 6, 9
84 static const uint8_t * const run_value_bits[2] = {
85 run_value_bits_long, run_value_bits_short
88 static const uint8_t uquad_sign_bits[81] = {
89 0, 1, 1, 1, 2, 2, 1, 2, 2,
90 1, 2, 2, 2, 3, 3, 2, 3, 3,
91 1, 2, 2, 2, 3, 3, 2, 3, 3,
92 1, 2, 2, 2, 3, 3, 2, 3, 3,
93 2, 3, 3, 3, 4, 4, 3, 4, 4,
94 2, 3, 3, 3, 4, 4, 3, 4, 4,
95 1, 2, 2, 2, 3, 3, 2, 3, 3,
96 2, 3, 3, 3, 4, 4, 3, 4, 4,
97 2, 3, 3, 3, 4, 4, 3, 4, 4
100 static const uint8_t upair7_sign_bits[64] = {
101 0, 1, 1, 1, 1, 1, 1, 1,
102 1, 2, 2, 2, 2, 2, 2, 2,
103 1, 2, 2, 2, 2, 2, 2, 2,
104 1, 2, 2, 2, 2, 2, 2, 2,
105 1, 2, 2, 2, 2, 2, 2, 2,
106 1, 2, 2, 2, 2, 2, 2, 2,
107 1, 2, 2, 2, 2, 2, 2, 2,
108 1, 2, 2, 2, 2, 2, 2, 2,
111 static const uint8_t upair12_sign_bits[169] = {
112 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
113 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
114 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
115 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
116 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
117 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
118 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
119 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
120 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
121 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
122 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
123 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
124 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
127 static const uint8_t esc_sign_bits[289] = {
128 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
129 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
130 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
131 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
132 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
133 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
134 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
135 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
136 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
137 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
138 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
139 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
140 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
141 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
142 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
143 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
144 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
147 static void abs_pow34_v(float *out, const float *in, const int size) {
148 #ifndef USE_REALLY_FULL_SEARCH
151 float ax, bx, cx, dx;
153 for (i = 0; i < size; i += 4) {
174 #endif /* USE_REALLY_FULL_SEARCH */
177 static float find_max_val(int group_len, int swb_size, const float *scaled) {
180 for (w2 = 0; w2 < group_len; w2++) {
181 for (i = 0; i < swb_size; i++) {
182 maxval = FFMAX(maxval, scaled[w2*128+i]);
188 static int find_min_book(float maxval, int sf) {
189 float Q = ff_aac_pow2sf_tab[POW_SF2_ZERO - sf + SCALE_ONE_POS - SCALE_DIV_512];
190 float Q34 = sqrtf(Q * sqrtf(Q));
192 qmaxval = maxval * Q34 + 0.4054f;
193 if (qmaxval == 0) cb = 0;
194 else if (qmaxval == 1) cb = 1;
195 else if (qmaxval == 2) cb = 3;
196 else if (qmaxval <= 4) cb = 5;
197 else if (qmaxval <= 7) cb = 7;
198 else if (qmaxval <= 12) cb = 9;
204 * Functions developed from template function and optimized for quantizing and encoding band
206 static void quantize_and_encode_band_cost_SQUAD_mips(struct AACEncContext *s,
207 PutBitContext *pb, const float *in,
208 const float *scaled, int size, int scale_idx,
209 int cb, const float lambda, const float uplim,
212 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
214 int qc1, qc2, qc3, qc4;
216 uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
217 uint16_t *p_codes = (uint16_t *)ff_aac_spectral_codes[cb-1];
219 abs_pow34_v(s->scoefs, in, size);
221 for (i = 0; i < size; i += 4) {
223 int *in_int = (int *)&in[i];
224 int t0, t1, t2, t3, t4, t5, t6, t7;
226 qc1 = scaled[i ] * Q34 + 0.4054f;
227 qc2 = scaled[i+1] * Q34 + 0.4054f;
228 qc3 = scaled[i+2] * Q34 + 0.4054f;
229 qc4 = scaled[i+3] * Q34 + 0.4054f;
233 ".set noreorder \n\t"
235 "slt %[qc1], $zero, %[qc1] \n\t"
236 "slt %[qc2], $zero, %[qc2] \n\t"
237 "slt %[qc3], $zero, %[qc3] \n\t"
238 "slt %[qc4], $zero, %[qc4] \n\t"
239 "lw %[t0], 0(%[in_int]) \n\t"
240 "lw %[t1], 4(%[in_int]) \n\t"
241 "lw %[t2], 8(%[in_int]) \n\t"
242 "lw %[t3], 12(%[in_int]) \n\t"
243 "srl %[t0], %[t0], 31 \n\t"
244 "srl %[t1], %[t1], 31 \n\t"
245 "srl %[t2], %[t2], 31 \n\t"
246 "srl %[t3], %[t3], 31 \n\t"
247 "subu %[t4], $zero, %[qc1] \n\t"
248 "subu %[t5], $zero, %[qc2] \n\t"
249 "subu %[t6], $zero, %[qc3] \n\t"
250 "subu %[t7], $zero, %[qc4] \n\t"
251 "movn %[qc1], %[t4], %[t0] \n\t"
252 "movn %[qc2], %[t5], %[t1] \n\t"
253 "movn %[qc3], %[t6], %[t2] \n\t"
254 "movn %[qc4], %[t7], %[t3] \n\t"
258 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
259 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
260 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
261 [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
262 : [in_int]"r"(in_int)
275 put_bits(pb, p_bits[curidx], p_codes[curidx]);
279 static void quantize_and_encode_band_cost_UQUAD_mips(struct AACEncContext *s,
280 PutBitContext *pb, const float *in,
281 const float *scaled, int size, int scale_idx,
282 int cb, const float lambda, const float uplim,
285 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
287 int qc1, qc2, qc3, qc4;
289 uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
290 uint16_t *p_codes = (uint16_t *)ff_aac_spectral_codes[cb-1];
292 abs_pow34_v(s->scoefs, in, size);
294 for (i = 0; i < size; i += 4) {
295 int curidx, sign, count;
296 int *in_int = (int *)&in[i];
298 unsigned int v_codes;
299 int t0, t1, t2, t3, t4;
301 qc1 = scaled[i ] * Q34 + 0.4054f;
302 qc2 = scaled[i+1] * Q34 + 0.4054f;
303 qc3 = scaled[i+2] * Q34 + 0.4054f;
304 qc4 = scaled[i+3] * Q34 + 0.4054f;
308 ".set noreorder \n\t"
310 "ori %[t4], $zero, 2 \n\t"
311 "ori %[sign], $zero, 0 \n\t"
312 "slt %[t0], %[t4], %[qc1] \n\t"
313 "slt %[t1], %[t4], %[qc2] \n\t"
314 "slt %[t2], %[t4], %[qc3] \n\t"
315 "slt %[t3], %[t4], %[qc4] \n\t"
316 "movn %[qc1], %[t4], %[t0] \n\t"
317 "movn %[qc2], %[t4], %[t1] \n\t"
318 "movn %[qc3], %[t4], %[t2] \n\t"
319 "movn %[qc4], %[t4], %[t3] \n\t"
320 "lw %[t0], 0(%[in_int]) \n\t"
321 "lw %[t1], 4(%[in_int]) \n\t"
322 "lw %[t2], 8(%[in_int]) \n\t"
323 "lw %[t3], 12(%[in_int]) \n\t"
324 "slt %[t0], %[t0], $zero \n\t"
325 "movn %[sign], %[t0], %[qc1] \n\t"
326 "slt %[t1], %[t1], $zero \n\t"
327 "slt %[t2], %[t2], $zero \n\t"
328 "slt %[t3], %[t3], $zero \n\t"
329 "sll %[t0], %[sign], 1 \n\t"
330 "or %[t0], %[t0], %[t1] \n\t"
331 "movn %[sign], %[t0], %[qc2] \n\t"
332 "slt %[t4], $zero, %[qc1] \n\t"
333 "slt %[t1], $zero, %[qc2] \n\t"
334 "slt %[count], $zero, %[qc3] \n\t"
335 "sll %[t0], %[sign], 1 \n\t"
336 "or %[t0], %[t0], %[t2] \n\t"
337 "movn %[sign], %[t0], %[qc3] \n\t"
338 "slt %[t2], $zero, %[qc4] \n\t"
339 "addu %[count], %[count], %[t4] \n\t"
340 "addu %[count], %[count], %[t1] \n\t"
341 "sll %[t0], %[sign], 1 \n\t"
342 "or %[t0], %[t0], %[t3] \n\t"
343 "movn %[sign], %[t0], %[qc4] \n\t"
344 "addu %[count], %[count], %[t2] \n\t"
348 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
349 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
350 [sign]"=&r"(sign), [count]"=&r"(count),
351 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
353 : [in_int]"r"(in_int)
365 v_codes = (p_codes[curidx] << count) | (sign & ((1 << count) - 1));
366 v_bits = p_bits[curidx] + count;
367 put_bits(pb, v_bits, v_codes);
371 static void quantize_and_encode_band_cost_SPAIR_mips(struct AACEncContext *s,
372 PutBitContext *pb, const float *in,
373 const float *scaled, int size, int scale_idx,
374 int cb, const float lambda, const float uplim,
377 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
379 int qc1, qc2, qc3, qc4;
381 uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
382 uint16_t *p_codes = (uint16_t *)ff_aac_spectral_codes[cb-1];
384 abs_pow34_v(s->scoefs, in, size);
386 for (i = 0; i < size; i += 4) {
388 int *in_int = (int *)&in[i];
390 unsigned int v_codes;
391 int t0, t1, t2, t3, t4, t5, t6, t7;
393 qc1 = scaled[i ] * Q34 + 0.4054f;
394 qc2 = scaled[i+1] * Q34 + 0.4054f;
395 qc3 = scaled[i+2] * Q34 + 0.4054f;
396 qc4 = scaled[i+3] * Q34 + 0.4054f;
400 ".set noreorder \n\t"
402 "ori %[t4], $zero, 4 \n\t"
403 "slt %[t0], %[t4], %[qc1] \n\t"
404 "slt %[t1], %[t4], %[qc2] \n\t"
405 "slt %[t2], %[t4], %[qc3] \n\t"
406 "slt %[t3], %[t4], %[qc4] \n\t"
407 "movn %[qc1], %[t4], %[t0] \n\t"
408 "movn %[qc2], %[t4], %[t1] \n\t"
409 "movn %[qc3], %[t4], %[t2] \n\t"
410 "movn %[qc4], %[t4], %[t3] \n\t"
411 "lw %[t0], 0(%[in_int]) \n\t"
412 "lw %[t1], 4(%[in_int]) \n\t"
413 "lw %[t2], 8(%[in_int]) \n\t"
414 "lw %[t3], 12(%[in_int]) \n\t"
415 "srl %[t0], %[t0], 31 \n\t"
416 "srl %[t1], %[t1], 31 \n\t"
417 "srl %[t2], %[t2], 31 \n\t"
418 "srl %[t3], %[t3], 31 \n\t"
419 "subu %[t4], $zero, %[qc1] \n\t"
420 "subu %[t5], $zero, %[qc2] \n\t"
421 "subu %[t6], $zero, %[qc3] \n\t"
422 "subu %[t7], $zero, %[qc4] \n\t"
423 "movn %[qc1], %[t4], %[t0] \n\t"
424 "movn %[qc2], %[t5], %[t1] \n\t"
425 "movn %[qc3], %[t6], %[t2] \n\t"
426 "movn %[qc4], %[t7], %[t3] \n\t"
430 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
431 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
432 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
433 [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
434 : [in_int]"r"(in_int)
444 v_codes = (p_codes[curidx] << p_bits[curidx2]) | (p_codes[curidx2]);
445 v_bits = p_bits[curidx] + p_bits[curidx2];
446 put_bits(pb, v_bits, v_codes);
450 static void quantize_and_encode_band_cost_UPAIR7_mips(struct AACEncContext *s,
451 PutBitContext *pb, const float *in,
452 const float *scaled, int size, int scale_idx,
453 int cb, const float lambda, const float uplim,
456 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
458 int qc1, qc2, qc3, qc4;
460 uint8_t *p_bits = (uint8_t*) ff_aac_spectral_bits[cb-1];
461 uint16_t *p_codes = (uint16_t*)ff_aac_spectral_codes[cb-1];
463 abs_pow34_v(s->scoefs, in, size);
465 for (i = 0; i < size; i += 4) {
466 int curidx, sign1, count1, sign2, count2;
467 int *in_int = (int *)&in[i];
469 unsigned int v_codes;
470 int t0, t1, t2, t3, t4;
472 qc1 = scaled[i ] * Q34 + 0.4054f;
473 qc2 = scaled[i+1] * Q34 + 0.4054f;
474 qc3 = scaled[i+2] * Q34 + 0.4054f;
475 qc4 = scaled[i+3] * Q34 + 0.4054f;
479 ".set noreorder \n\t"
481 "ori %[t4], $zero, 7 \n\t"
482 "ori %[sign1], $zero, 0 \n\t"
483 "ori %[sign2], $zero, 0 \n\t"
484 "slt %[t0], %[t4], %[qc1] \n\t"
485 "slt %[t1], %[t4], %[qc2] \n\t"
486 "slt %[t2], %[t4], %[qc3] \n\t"
487 "slt %[t3], %[t4], %[qc4] \n\t"
488 "movn %[qc1], %[t4], %[t0] \n\t"
489 "movn %[qc2], %[t4], %[t1] \n\t"
490 "movn %[qc3], %[t4], %[t2] \n\t"
491 "movn %[qc4], %[t4], %[t3] \n\t"
492 "lw %[t0], 0(%[in_int]) \n\t"
493 "lw %[t1], 4(%[in_int]) \n\t"
494 "lw %[t2], 8(%[in_int]) \n\t"
495 "lw %[t3], 12(%[in_int]) \n\t"
496 "slt %[t0], %[t0], $zero \n\t"
497 "movn %[sign1], %[t0], %[qc1] \n\t"
498 "slt %[t2], %[t2], $zero \n\t"
499 "movn %[sign2], %[t2], %[qc3] \n\t"
500 "slt %[t1], %[t1], $zero \n\t"
501 "sll %[t0], %[sign1], 1 \n\t"
502 "or %[t0], %[t0], %[t1] \n\t"
503 "movn %[sign1], %[t0], %[qc2] \n\t"
504 "slt %[t3], %[t3], $zero \n\t"
505 "sll %[t0], %[sign2], 1 \n\t"
506 "or %[t0], %[t0], %[t3] \n\t"
507 "movn %[sign2], %[t0], %[qc4] \n\t"
508 "slt %[count1], $zero, %[qc1] \n\t"
509 "slt %[t1], $zero, %[qc2] \n\t"
510 "slt %[count2], $zero, %[qc3] \n\t"
511 "slt %[t2], $zero, %[qc4] \n\t"
512 "addu %[count1], %[count1], %[t1] \n\t"
513 "addu %[count2], %[count2], %[t2] \n\t"
517 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
518 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
519 [sign1]"=&r"(sign1), [count1]"=&r"(count1),
520 [sign2]"=&r"(sign2), [count2]"=&r"(count2),
521 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
523 : [in_int]"r"(in_int)
524 : "t0", "t1", "t2", "t3", "t4",
531 v_codes = (p_codes[curidx] << count1) | sign1;
532 v_bits = p_bits[curidx] + count1;
533 put_bits(pb, v_bits, v_codes);
538 v_codes = (p_codes[curidx] << count2) | sign2;
539 v_bits = p_bits[curidx] + count2;
540 put_bits(pb, v_bits, v_codes);
544 static void quantize_and_encode_band_cost_UPAIR12_mips(struct AACEncContext *s,
545 PutBitContext *pb, const float *in,
546 const float *scaled, int size, int scale_idx,
547 int cb, const float lambda, const float uplim,
550 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
552 int qc1, qc2, qc3, qc4;
554 uint8_t *p_bits = (uint8_t*) ff_aac_spectral_bits[cb-1];
555 uint16_t *p_codes = (uint16_t*)ff_aac_spectral_codes[cb-1];
557 abs_pow34_v(s->scoefs, in, size);
559 for (i = 0; i < size; i += 4) {
560 int curidx, sign1, count1, sign2, count2;
561 int *in_int = (int *)&in[i];
563 unsigned int v_codes;
564 int t0, t1, t2, t3, t4;
566 qc1 = scaled[i ] * Q34 + 0.4054f;
567 qc2 = scaled[i+1] * Q34 + 0.4054f;
568 qc3 = scaled[i+2] * Q34 + 0.4054f;
569 qc4 = scaled[i+3] * Q34 + 0.4054f;
573 ".set noreorder \n\t"
575 "ori %[t4], $zero, 12 \n\t"
576 "ori %[sign1], $zero, 0 \n\t"
577 "ori %[sign2], $zero, 0 \n\t"
578 "slt %[t0], %[t4], %[qc1] \n\t"
579 "slt %[t1], %[t4], %[qc2] \n\t"
580 "slt %[t2], %[t4], %[qc3] \n\t"
581 "slt %[t3], %[t4], %[qc4] \n\t"
582 "movn %[qc1], %[t4], %[t0] \n\t"
583 "movn %[qc2], %[t4], %[t1] \n\t"
584 "movn %[qc3], %[t4], %[t2] \n\t"
585 "movn %[qc4], %[t4], %[t3] \n\t"
586 "lw %[t0], 0(%[in_int]) \n\t"
587 "lw %[t1], 4(%[in_int]) \n\t"
588 "lw %[t2], 8(%[in_int]) \n\t"
589 "lw %[t3], 12(%[in_int]) \n\t"
590 "slt %[t0], %[t0], $zero \n\t"
591 "movn %[sign1], %[t0], %[qc1] \n\t"
592 "slt %[t2], %[t2], $zero \n\t"
593 "movn %[sign2], %[t2], %[qc3] \n\t"
594 "slt %[t1], %[t1], $zero \n\t"
595 "sll %[t0], %[sign1], 1 \n\t"
596 "or %[t0], %[t0], %[t1] \n\t"
597 "movn %[sign1], %[t0], %[qc2] \n\t"
598 "slt %[t3], %[t3], $zero \n\t"
599 "sll %[t0], %[sign2], 1 \n\t"
600 "or %[t0], %[t0], %[t3] \n\t"
601 "movn %[sign2], %[t0], %[qc4] \n\t"
602 "slt %[count1], $zero, %[qc1] \n\t"
603 "slt %[t1], $zero, %[qc2] \n\t"
604 "slt %[count2], $zero, %[qc3] \n\t"
605 "slt %[t2], $zero, %[qc4] \n\t"
606 "addu %[count1], %[count1], %[t1] \n\t"
607 "addu %[count2], %[count2], %[t2] \n\t"
611 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
612 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
613 [sign1]"=&r"(sign1), [count1]"=&r"(count1),
614 [sign2]"=&r"(sign2), [count2]"=&r"(count2),
615 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
617 : [in_int]"r"(in_int)
624 v_codes = (p_codes[curidx] << count1) | sign1;
625 v_bits = p_bits[curidx] + count1;
626 put_bits(pb, v_bits, v_codes);
631 v_codes = (p_codes[curidx] << count2) | sign2;
632 v_bits = p_bits[curidx] + count2;
633 put_bits(pb, v_bits, v_codes);
637 static void quantize_and_encode_band_cost_ESC_mips(struct AACEncContext *s,
638 PutBitContext *pb, const float *in,
639 const float *scaled, int size, int scale_idx,
640 int cb, const float lambda, const float uplim,
643 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
645 int qc1, qc2, qc3, qc4;
647 uint8_t *p_bits = (uint8_t* )ff_aac_spectral_bits[cb-1];
648 uint16_t *p_codes = (uint16_t*)ff_aac_spectral_codes[cb-1];
649 float *p_vectors = (float* )ff_aac_codebook_vectors[cb-1];
651 abs_pow34_v(s->scoefs, in, size);
655 for (i = 0; i < size; i += 4) {
656 int curidx, curidx2, sign1, count1, sign2, count2;
657 int *in_int = (int *)&in[i];
659 unsigned int v_codes;
660 int t0, t1, t2, t3, t4;
662 qc1 = scaled[i ] * Q34 + 0.4054f;
663 qc2 = scaled[i+1] * Q34 + 0.4054f;
664 qc3 = scaled[i+2] * Q34 + 0.4054f;
665 qc4 = scaled[i+3] * Q34 + 0.4054f;
669 ".set noreorder \n\t"
671 "ori %[t4], $zero, 16 \n\t"
672 "ori %[sign1], $zero, 0 \n\t"
673 "ori %[sign2], $zero, 0 \n\t"
674 "slt %[t0], %[t4], %[qc1] \n\t"
675 "slt %[t1], %[t4], %[qc2] \n\t"
676 "slt %[t2], %[t4], %[qc3] \n\t"
677 "slt %[t3], %[t4], %[qc4] \n\t"
678 "movn %[qc1], %[t4], %[t0] \n\t"
679 "movn %[qc2], %[t4], %[t1] \n\t"
680 "movn %[qc3], %[t4], %[t2] \n\t"
681 "movn %[qc4], %[t4], %[t3] \n\t"
682 "lw %[t0], 0(%[in_int]) \n\t"
683 "lw %[t1], 4(%[in_int]) \n\t"
684 "lw %[t2], 8(%[in_int]) \n\t"
685 "lw %[t3], 12(%[in_int]) \n\t"
686 "slt %[t0], %[t0], $zero \n\t"
687 "movn %[sign1], %[t0], %[qc1] \n\t"
688 "slt %[t2], %[t2], $zero \n\t"
689 "movn %[sign2], %[t2], %[qc3] \n\t"
690 "slt %[t1], %[t1], $zero \n\t"
691 "sll %[t0], %[sign1], 1 \n\t"
692 "or %[t0], %[t0], %[t1] \n\t"
693 "movn %[sign1], %[t0], %[qc2] \n\t"
694 "slt %[t3], %[t3], $zero \n\t"
695 "sll %[t0], %[sign2], 1 \n\t"
696 "or %[t0], %[t0], %[t3] \n\t"
697 "movn %[sign2], %[t0], %[qc4] \n\t"
698 "slt %[count1], $zero, %[qc1] \n\t"
699 "slt %[t1], $zero, %[qc2] \n\t"
700 "slt %[count2], $zero, %[qc3] \n\t"
701 "slt %[t2], $zero, %[qc4] \n\t"
702 "addu %[count1], %[count1], %[t1] \n\t"
703 "addu %[count2], %[count2], %[t2] \n\t"
707 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
708 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
709 [sign1]"=&r"(sign1), [count1]"=&r"(count1),
710 [sign2]"=&r"(sign2), [count2]"=&r"(count2),
711 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
713 : [in_int]"r"(in_int)
722 v_codes = (p_codes[curidx] << count1) | sign1;
723 v_bits = p_bits[curidx] + count1;
724 put_bits(pb, v_bits, v_codes);
726 v_codes = (p_codes[curidx2] << count2) | sign2;
727 v_bits = p_bits[curidx2] + count2;
728 put_bits(pb, v_bits, v_codes);
731 for (i = 0; i < size; i += 4) {
732 int curidx, curidx2, sign1, count1, sign2, count2;
733 int *in_int = (int *)&in[i];
735 unsigned int v_codes;
737 int t0, t1, t2, t3, t4;
739 qc1 = scaled[i ] * Q34 + 0.4054f;
740 qc2 = scaled[i+1] * Q34 + 0.4054f;
741 qc3 = scaled[i+2] * Q34 + 0.4054f;
742 qc4 = scaled[i+3] * Q34 + 0.4054f;
746 ".set noreorder \n\t"
748 "ori %[t4], $zero, 16 \n\t"
749 "ori %[sign1], $zero, 0 \n\t"
750 "ori %[sign2], $zero, 0 \n\t"
751 "shll_s.w %[c1], %[qc1], 18 \n\t"
752 "shll_s.w %[c2], %[qc2], 18 \n\t"
753 "shll_s.w %[c3], %[qc3], 18 \n\t"
754 "shll_s.w %[c4], %[qc4], 18 \n\t"
755 "srl %[c1], %[c1], 18 \n\t"
756 "srl %[c2], %[c2], 18 \n\t"
757 "srl %[c3], %[c3], 18 \n\t"
758 "srl %[c4], %[c4], 18 \n\t"
759 "slt %[t0], %[t4], %[qc1] \n\t"
760 "slt %[t1], %[t4], %[qc2] \n\t"
761 "slt %[t2], %[t4], %[qc3] \n\t"
762 "slt %[t3], %[t4], %[qc4] \n\t"
763 "movn %[qc1], %[t4], %[t0] \n\t"
764 "movn %[qc2], %[t4], %[t1] \n\t"
765 "movn %[qc3], %[t4], %[t2] \n\t"
766 "movn %[qc4], %[t4], %[t3] \n\t"
767 "lw %[t0], 0(%[in_int]) \n\t"
768 "lw %[t1], 4(%[in_int]) \n\t"
769 "lw %[t2], 8(%[in_int]) \n\t"
770 "lw %[t3], 12(%[in_int]) \n\t"
771 "slt %[t0], %[t0], $zero \n\t"
772 "movn %[sign1], %[t0], %[qc1] \n\t"
773 "slt %[t2], %[t2], $zero \n\t"
774 "movn %[sign2], %[t2], %[qc3] \n\t"
775 "slt %[t1], %[t1], $zero \n\t"
776 "sll %[t0], %[sign1], 1 \n\t"
777 "or %[t0], %[t0], %[t1] \n\t"
778 "movn %[sign1], %[t0], %[qc2] \n\t"
779 "slt %[t3], %[t3], $zero \n\t"
780 "sll %[t0], %[sign2], 1 \n\t"
781 "or %[t0], %[t0], %[t3] \n\t"
782 "movn %[sign2], %[t0], %[qc4] \n\t"
783 "slt %[count1], $zero, %[qc1] \n\t"
784 "slt %[t1], $zero, %[qc2] \n\t"
785 "slt %[count2], $zero, %[qc3] \n\t"
786 "slt %[t2], $zero, %[qc4] \n\t"
787 "addu %[count1], %[count1], %[t1] \n\t"
788 "addu %[count2], %[count2], %[t2] \n\t"
792 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
793 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
794 [sign1]"=&r"(sign1), [count1]"=&r"(count1),
795 [sign2]"=&r"(sign2), [count2]"=&r"(count2),
796 [c1]"=&r"(c1), [c2]"=&r"(c2),
797 [c3]"=&r"(c3), [c4]"=&r"(c4),
798 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
800 : [in_int]"r"(in_int)
810 v_codes = (p_codes[curidx] << count1) | sign1;
811 v_bits = p_bits[curidx] + count1;
812 put_bits(pb, v_bits, v_codes);
814 if (p_vectors[curidx*2 ] == 64.0f) {
815 int len = av_log2(c1);
816 v_codes = (((1 << (len - 3)) - 2) << len) | (c1 & ((1 << len) - 1));
817 put_bits(pb, len * 2 - 3, v_codes);
819 if (p_vectors[curidx*2+1] == 64.0f) {
820 int len = av_log2(c2);
821 v_codes = (((1 << (len - 3)) - 2) << len) | (c2 & ((1 << len) - 1));
822 put_bits(pb, len*2-3, v_codes);
825 v_codes = (p_codes[curidx2] << count2) | sign2;
826 v_bits = p_bits[curidx2] + count2;
827 put_bits(pb, v_bits, v_codes);
829 if (p_vectors[curidx2*2 ] == 64.0f) {
830 int len = av_log2(c3);
831 v_codes = (((1 << (len - 3)) - 2) << len) | (c3 & ((1 << len) - 1));
832 put_bits(pb, len* 2 - 3, v_codes);
834 if (p_vectors[curidx2*2+1] == 64.0f) {
835 int len = av_log2(c4);
836 v_codes = (((1 << (len - 3)) - 2) << len) | (c4 & ((1 << len) - 1));
837 put_bits(pb, len * 2 - 3, v_codes);
843 static void (*const quantize_and_encode_band_cost_arr[])(struct AACEncContext *s,
844 PutBitContext *pb, const float *in,
845 const float *scaled, int size, int scale_idx,
846 int cb, const float lambda, const float uplim,
849 quantize_and_encode_band_cost_SQUAD_mips,
850 quantize_and_encode_band_cost_SQUAD_mips,
851 quantize_and_encode_band_cost_UQUAD_mips,
852 quantize_and_encode_band_cost_UQUAD_mips,
853 quantize_and_encode_band_cost_SPAIR_mips,
854 quantize_and_encode_band_cost_SPAIR_mips,
855 quantize_and_encode_band_cost_UPAIR7_mips,
856 quantize_and_encode_band_cost_UPAIR7_mips,
857 quantize_and_encode_band_cost_UPAIR12_mips,
858 quantize_and_encode_band_cost_UPAIR12_mips,
859 quantize_and_encode_band_cost_ESC_mips,
862 #define quantize_and_encode_band_cost( \
863 s, pb, in, scaled, size, scale_idx, cb, \
864 lambda, uplim, bits) \
865 quantize_and_encode_band_cost_arr[cb]( \
866 s, pb, in, scaled, size, scale_idx, cb, \
869 static void quantize_and_encode_band_mips(struct AACEncContext *s, PutBitContext *pb,
870 const float *in, int size, int scale_idx,
871 int cb, const float lambda)
873 quantize_and_encode_band_cost(s, pb, in, NULL, size, scale_idx, cb, lambda,
878 * Functions developed from template function and optimized for getting the number of bits
880 static float get_band_numbits_ZERO_mips(struct AACEncContext *s,
881 PutBitContext *pb, const float *in,
882 const float *scaled, int size, int scale_idx,
883 int cb, const float lambda, const float uplim,
889 static float get_band_numbits_SQUAD_mips(struct AACEncContext *s,
890 PutBitContext *pb, const float *in,
891 const float *scaled, int size, int scale_idx,
892 int cb, const float lambda, const float uplim,
895 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
897 int qc1, qc2, qc3, qc4;
900 uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
902 for (i = 0; i < size; i += 4) {
904 int *in_int = (int *)&in[i];
905 int t0, t1, t2, t3, t4, t5, t6, t7;
907 qc1 = scaled[i ] * Q34 + 0.4054f;
908 qc2 = scaled[i+1] * Q34 + 0.4054f;
909 qc3 = scaled[i+2] * Q34 + 0.4054f;
910 qc4 = scaled[i+3] * Q34 + 0.4054f;
914 ".set noreorder \n\t"
916 "slt %[qc1], $zero, %[qc1] \n\t"
917 "slt %[qc2], $zero, %[qc2] \n\t"
918 "slt %[qc3], $zero, %[qc3] \n\t"
919 "slt %[qc4], $zero, %[qc4] \n\t"
920 "lw %[t0], 0(%[in_int]) \n\t"
921 "lw %[t1], 4(%[in_int]) \n\t"
922 "lw %[t2], 8(%[in_int]) \n\t"
923 "lw %[t3], 12(%[in_int]) \n\t"
924 "srl %[t0], %[t0], 31 \n\t"
925 "srl %[t1], %[t1], 31 \n\t"
926 "srl %[t2], %[t2], 31 \n\t"
927 "srl %[t3], %[t3], 31 \n\t"
928 "subu %[t4], $zero, %[qc1] \n\t"
929 "subu %[t5], $zero, %[qc2] \n\t"
930 "subu %[t6], $zero, %[qc3] \n\t"
931 "subu %[t7], $zero, %[qc4] \n\t"
932 "movn %[qc1], %[t4], %[t0] \n\t"
933 "movn %[qc2], %[t5], %[t1] \n\t"
934 "movn %[qc3], %[t6], %[t2] \n\t"
935 "movn %[qc4], %[t7], %[t3] \n\t"
939 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
940 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
941 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
942 [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
943 : [in_int]"r"(in_int)
956 curbits += p_bits[curidx];
961 static float get_band_numbits_UQUAD_mips(struct AACEncContext *s,
962 PutBitContext *pb, const float *in,
963 const float *scaled, int size, int scale_idx,
964 int cb, const float lambda, const float uplim,
967 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
970 int qc1, qc2, qc3, qc4;
972 uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
974 for (i = 0; i < size; i += 4) {
976 int t0, t1, t2, t3, t4;
978 qc1 = scaled[i ] * Q34 + 0.4054f;
979 qc2 = scaled[i+1] * Q34 + 0.4054f;
980 qc3 = scaled[i+2] * Q34 + 0.4054f;
981 qc4 = scaled[i+3] * Q34 + 0.4054f;
985 ".set noreorder \n\t"
987 "ori %[t4], $zero, 2 \n\t"
988 "slt %[t0], %[t4], %[qc1] \n\t"
989 "slt %[t1], %[t4], %[qc2] \n\t"
990 "slt %[t2], %[t4], %[qc3] \n\t"
991 "slt %[t3], %[t4], %[qc4] \n\t"
992 "movn %[qc1], %[t4], %[t0] \n\t"
993 "movn %[qc2], %[t4], %[t1] \n\t"
994 "movn %[qc3], %[t4], %[t2] \n\t"
995 "movn %[qc4], %[t4], %[t3] \n\t"
999 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1000 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1001 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1013 curbits += p_bits[curidx];
1014 curbits += uquad_sign_bits[curidx];
1019 static float get_band_numbits_SPAIR_mips(struct AACEncContext *s,
1020 PutBitContext *pb, const float *in,
1021 const float *scaled, int size, int scale_idx,
1022 int cb, const float lambda, const float uplim,
1025 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1027 int qc1, qc2, qc3, qc4;
1030 uint8_t *p_bits = (uint8_t*)ff_aac_spectral_bits[cb-1];
1032 for (i = 0; i < size; i += 4) {
1033 int curidx, curidx2;
1034 int *in_int = (int *)&in[i];
1035 int t0, t1, t2, t3, t4, t5, t6, t7;
1037 qc1 = scaled[i ] * Q34 + 0.4054f;
1038 qc2 = scaled[i+1] * Q34 + 0.4054f;
1039 qc3 = scaled[i+2] * Q34 + 0.4054f;
1040 qc4 = scaled[i+3] * Q34 + 0.4054f;
1044 ".set noreorder \n\t"
1046 "ori %[t4], $zero, 4 \n\t"
1047 "slt %[t0], %[t4], %[qc1] \n\t"
1048 "slt %[t1], %[t4], %[qc2] \n\t"
1049 "slt %[t2], %[t4], %[qc3] \n\t"
1050 "slt %[t3], %[t4], %[qc4] \n\t"
1051 "movn %[qc1], %[t4], %[t0] \n\t"
1052 "movn %[qc2], %[t4], %[t1] \n\t"
1053 "movn %[qc3], %[t4], %[t2] \n\t"
1054 "movn %[qc4], %[t4], %[t3] \n\t"
1055 "lw %[t0], 0(%[in_int]) \n\t"
1056 "lw %[t1], 4(%[in_int]) \n\t"
1057 "lw %[t2], 8(%[in_int]) \n\t"
1058 "lw %[t3], 12(%[in_int]) \n\t"
1059 "srl %[t0], %[t0], 31 \n\t"
1060 "srl %[t1], %[t1], 31 \n\t"
1061 "srl %[t2], %[t2], 31 \n\t"
1062 "srl %[t3], %[t3], 31 \n\t"
1063 "subu %[t4], $zero, %[qc1] \n\t"
1064 "subu %[t5], $zero, %[qc2] \n\t"
1065 "subu %[t6], $zero, %[qc3] \n\t"
1066 "subu %[t7], $zero, %[qc4] \n\t"
1067 "movn %[qc1], %[t4], %[t0] \n\t"
1068 "movn %[qc2], %[t5], %[t1] \n\t"
1069 "movn %[qc3], %[t6], %[t2] \n\t"
1070 "movn %[qc4], %[t7], %[t3] \n\t"
1074 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1075 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1076 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1077 [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
1078 : [in_int]"r"(in_int)
1086 curidx2 += qc4 + 40;
1088 curbits += p_bits[curidx] + p_bits[curidx2];
1093 static float get_band_numbits_UPAIR7_mips(struct AACEncContext *s,
1094 PutBitContext *pb, const float *in,
1095 const float *scaled, int size, int scale_idx,
1096 int cb, const float lambda, const float uplim,
1099 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1101 int qc1, qc2, qc3, qc4;
1104 uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
1106 for (i = 0; i < size; i += 4) {
1107 int curidx, curidx2;
1108 int t0, t1, t2, t3, t4;
1110 qc1 = scaled[i ] * Q34 + 0.4054f;
1111 qc2 = scaled[i+1] * Q34 + 0.4054f;
1112 qc3 = scaled[i+2] * Q34 + 0.4054f;
1113 qc4 = scaled[i+3] * Q34 + 0.4054f;
1117 ".set noreorder \n\t"
1119 "ori %[t4], $zero, 7 \n\t"
1120 "slt %[t0], %[t4], %[qc1] \n\t"
1121 "slt %[t1], %[t4], %[qc2] \n\t"
1122 "slt %[t2], %[t4], %[qc3] \n\t"
1123 "slt %[t3], %[t4], %[qc4] \n\t"
1124 "movn %[qc1], %[t4], %[t0] \n\t"
1125 "movn %[qc2], %[t4], %[t1] \n\t"
1126 "movn %[qc3], %[t4], %[t2] \n\t"
1127 "movn %[qc4], %[t4], %[t3] \n\t"
1131 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1132 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1133 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1143 curbits += p_bits[curidx] +
1144 upair7_sign_bits[curidx] +
1146 upair7_sign_bits[curidx2];
1151 static float get_band_numbits_UPAIR12_mips(struct AACEncContext *s,
1152 PutBitContext *pb, const float *in,
1153 const float *scaled, int size, int scale_idx,
1154 int cb, const float lambda, const float uplim,
1157 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1159 int qc1, qc2, qc3, qc4;
1162 uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
1164 for (i = 0; i < size; i += 4) {
1165 int curidx, curidx2;
1166 int t0, t1, t2, t3, t4;
1168 qc1 = scaled[i ] * Q34 + 0.4054f;
1169 qc2 = scaled[i+1] * Q34 + 0.4054f;
1170 qc3 = scaled[i+2] * Q34 + 0.4054f;
1171 qc4 = scaled[i+3] * Q34 + 0.4054f;
1175 ".set noreorder \n\t"
1177 "ori %[t4], $zero, 12 \n\t"
1178 "slt %[t0], %[t4], %[qc1] \n\t"
1179 "slt %[t1], %[t4], %[qc2] \n\t"
1180 "slt %[t2], %[t4], %[qc3] \n\t"
1181 "slt %[t3], %[t4], %[qc4] \n\t"
1182 "movn %[qc1], %[t4], %[t0] \n\t"
1183 "movn %[qc2], %[t4], %[t1] \n\t"
1184 "movn %[qc3], %[t4], %[t2] \n\t"
1185 "movn %[qc4], %[t4], %[t3] \n\t"
1189 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1190 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1191 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1201 curbits += p_bits[curidx] +
1203 upair12_sign_bits[curidx] +
1204 upair12_sign_bits[curidx2];
1209 static float get_band_numbits_ESC_mips(struct AACEncContext *s,
1210 PutBitContext *pb, const float *in,
1211 const float *scaled, int size, int scale_idx,
1212 int cb, const float lambda, const float uplim,
1215 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1217 int qc1, qc2, qc3, qc4;
1220 uint8_t *p_bits = (uint8_t*)ff_aac_spectral_bits[cb-1];
1222 for (i = 0; i < size; i += 4) {
1223 int curidx, curidx2;
1224 int cond0, cond1, cond2, cond3;
1228 qc1 = scaled[i ] * Q34 + 0.4054f;
1229 qc2 = scaled[i+1] * Q34 + 0.4054f;
1230 qc3 = scaled[i+2] * Q34 + 0.4054f;
1231 qc4 = scaled[i+3] * Q34 + 0.4054f;
1235 ".set noreorder \n\t"
1237 "ori %[t4], $zero, 15 \n\t"
1238 "ori %[t5], $zero, 16 \n\t"
1239 "shll_s.w %[c1], %[qc1], 18 \n\t"
1240 "shll_s.w %[c2], %[qc2], 18 \n\t"
1241 "shll_s.w %[c3], %[qc3], 18 \n\t"
1242 "shll_s.w %[c4], %[qc4], 18 \n\t"
1243 "srl %[c1], %[c1], 18 \n\t"
1244 "srl %[c2], %[c2], 18 \n\t"
1245 "srl %[c3], %[c3], 18 \n\t"
1246 "srl %[c4], %[c4], 18 \n\t"
1247 "slt %[cond0], %[t4], %[qc1] \n\t"
1248 "slt %[cond1], %[t4], %[qc2] \n\t"
1249 "slt %[cond2], %[t4], %[qc3] \n\t"
1250 "slt %[cond3], %[t4], %[qc4] \n\t"
1251 "movn %[qc1], %[t5], %[cond0] \n\t"
1252 "movn %[qc2], %[t5], %[cond1] \n\t"
1253 "movn %[qc3], %[t5], %[cond2] \n\t"
1254 "movn %[qc4], %[t5], %[cond3] \n\t"
1255 "ori %[t5], $zero, 31 \n\t"
1256 "clz %[c1], %[c1] \n\t"
1257 "clz %[c2], %[c2] \n\t"
1258 "clz %[c3], %[c3] \n\t"
1259 "clz %[c4], %[c4] \n\t"
1260 "subu %[c1], %[t5], %[c1] \n\t"
1261 "subu %[c2], %[t5], %[c2] \n\t"
1262 "subu %[c3], %[t5], %[c3] \n\t"
1263 "subu %[c4], %[t5], %[c4] \n\t"
1264 "sll %[c1], %[c1], 1 \n\t"
1265 "sll %[c2], %[c2], 1 \n\t"
1266 "sll %[c3], %[c3], 1 \n\t"
1267 "sll %[c4], %[c4], 1 \n\t"
1268 "addiu %[c1], %[c1], -3 \n\t"
1269 "addiu %[c2], %[c2], -3 \n\t"
1270 "addiu %[c3], %[c3], -3 \n\t"
1271 "addiu %[c4], %[c4], -3 \n\t"
1272 "subu %[cond0], $zero, %[cond0] \n\t"
1273 "subu %[cond1], $zero, %[cond1] \n\t"
1274 "subu %[cond2], $zero, %[cond2] \n\t"
1275 "subu %[cond3], $zero, %[cond3] \n\t"
1276 "and %[c1], %[c1], %[cond0] \n\t"
1277 "and %[c2], %[c2], %[cond1] \n\t"
1278 "and %[c3], %[c3], %[cond2] \n\t"
1279 "and %[c4], %[c4], %[cond3] \n\t"
1283 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1284 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1285 [cond0]"=&r"(cond0), [cond1]"=&r"(cond1),
1286 [cond2]"=&r"(cond2), [cond3]"=&r"(cond3),
1287 [c1]"=&r"(c1), [c2]"=&r"(c2),
1288 [c3]"=&r"(c3), [c4]"=&r"(c4),
1289 [t4]"=&r"(t4), [t5]"=&r"(t5)
1298 curbits += p_bits[curidx];
1299 curbits += esc_sign_bits[curidx];
1300 curbits += p_bits[curidx2];
1301 curbits += esc_sign_bits[curidx2];
1311 static float (*const get_band_numbits_arr[])(struct AACEncContext *s,
1312 PutBitContext *pb, const float *in,
1313 const float *scaled, int size, int scale_idx,
1314 int cb, const float lambda, const float uplim,
1316 get_band_numbits_ZERO_mips,
1317 get_band_numbits_SQUAD_mips,
1318 get_band_numbits_SQUAD_mips,
1319 get_band_numbits_UQUAD_mips,
1320 get_band_numbits_UQUAD_mips,
1321 get_band_numbits_SPAIR_mips,
1322 get_band_numbits_SPAIR_mips,
1323 get_band_numbits_UPAIR7_mips,
1324 get_band_numbits_UPAIR7_mips,
1325 get_band_numbits_UPAIR12_mips,
1326 get_band_numbits_UPAIR12_mips,
1327 get_band_numbits_ESC_mips,
1330 #define get_band_numbits( \
1331 s, pb, in, scaled, size, scale_idx, cb, \
1332 lambda, uplim, bits) \
1333 get_band_numbits_arr[cb]( \
1334 s, pb, in, scaled, size, scale_idx, cb, \
1335 lambda, uplim, bits)
1337 static float quantize_band_cost_bits(struct AACEncContext *s, const float *in,
1338 const float *scaled, int size, int scale_idx,
1339 int cb, const float lambda, const float uplim,
1342 return get_band_numbits(s, NULL, in, scaled, size, scale_idx, cb, lambda, uplim, bits);
1346 * Functions developed from template function and optimized for getting the band cost
1349 static float get_band_cost_ZERO_mips(struct AACEncContext *s,
1350 PutBitContext *pb, const float *in,
1351 const float *scaled, int size, int scale_idx,
1352 int cb, const float lambda, const float uplim,
1358 for (i = 0; i < size; i += 4) {
1359 cost += in[i ] * in[i ];
1360 cost += in[i+1] * in[i+1];
1361 cost += in[i+2] * in[i+2];
1362 cost += in[i+3] * in[i+3];
1366 return cost * lambda;
1369 static float get_band_cost_SQUAD_mips(struct AACEncContext *s,
1370 PutBitContext *pb, const float *in,
1371 const float *scaled, int size, int scale_idx,
1372 int cb, const float lambda, const float uplim,
1375 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1376 const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
1379 int qc1, qc2, qc3, qc4;
1382 uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
1383 float *p_codes = (float *)ff_aac_codebook_vectors[cb-1];
1385 for (i = 0; i < size; i += 4) {
1388 int *in_int = (int *)&in[i];
1389 float *in_pos = (float *)&in[i];
1390 float di0, di1, di2, di3;
1391 int t0, t1, t2, t3, t4, t5, t6, t7;
1393 qc1 = scaled[i ] * Q34 + 0.4054f;
1394 qc2 = scaled[i+1] * Q34 + 0.4054f;
1395 qc3 = scaled[i+2] * Q34 + 0.4054f;
1396 qc4 = scaled[i+3] * Q34 + 0.4054f;
1400 ".set noreorder \n\t"
1402 "slt %[qc1], $zero, %[qc1] \n\t"
1403 "slt %[qc2], $zero, %[qc2] \n\t"
1404 "slt %[qc3], $zero, %[qc3] \n\t"
1405 "slt %[qc4], $zero, %[qc4] \n\t"
1406 "lw %[t0], 0(%[in_int]) \n\t"
1407 "lw %[t1], 4(%[in_int]) \n\t"
1408 "lw %[t2], 8(%[in_int]) \n\t"
1409 "lw %[t3], 12(%[in_int]) \n\t"
1410 "srl %[t0], %[t0], 31 \n\t"
1411 "srl %[t1], %[t1], 31 \n\t"
1412 "srl %[t2], %[t2], 31 \n\t"
1413 "srl %[t3], %[t3], 31 \n\t"
1414 "subu %[t4], $zero, %[qc1] \n\t"
1415 "subu %[t5], $zero, %[qc2] \n\t"
1416 "subu %[t6], $zero, %[qc3] \n\t"
1417 "subu %[t7], $zero, %[qc4] \n\t"
1418 "movn %[qc1], %[t4], %[t0] \n\t"
1419 "movn %[qc2], %[t5], %[t1] \n\t"
1420 "movn %[qc3], %[t6], %[t2] \n\t"
1421 "movn %[qc4], %[t7], %[t3] \n\t"
1425 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1426 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1427 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1428 [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
1429 : [in_int]"r"(in_int)
1442 curbits += p_bits[curidx];
1443 vec = &p_codes[curidx*4];
1447 ".set noreorder \n\t"
1449 "lwc1 $f0, 0(%[in_pos]) \n\t"
1450 "lwc1 $f1, 0(%[vec]) \n\t"
1451 "lwc1 $f2, 4(%[in_pos]) \n\t"
1452 "lwc1 $f3, 4(%[vec]) \n\t"
1453 "lwc1 $f4, 8(%[in_pos]) \n\t"
1454 "lwc1 $f5, 8(%[vec]) \n\t"
1455 "lwc1 $f6, 12(%[in_pos]) \n\t"
1456 "lwc1 $f7, 12(%[vec]) \n\t"
1457 "nmsub.s %[di0], $f0, $f1, %[IQ] \n\t"
1458 "nmsub.s %[di1], $f2, $f3, %[IQ] \n\t"
1459 "nmsub.s %[di2], $f4, $f5, %[IQ] \n\t"
1460 "nmsub.s %[di3], $f6, $f7, %[IQ] \n\t"
1464 : [di0]"=&f"(di0), [di1]"=&f"(di1),
1465 [di2]"=&f"(di2), [di3]"=&f"(di3)
1466 : [in_pos]"r"(in_pos), [vec]"r"(vec),
1468 : "$f0", "$f1", "$f2", "$f3",
1469 "$f4", "$f5", "$f6", "$f7",
1473 cost += di0 * di0 + di1 * di1
1474 + di2 * di2 + di3 * di3;
1479 return cost * lambda + curbits;
1482 static float get_band_cost_UQUAD_mips(struct AACEncContext *s,
1483 PutBitContext *pb, const float *in,
1484 const float *scaled, int size, int scale_idx,
1485 int cb, const float lambda, const float uplim,
1488 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1489 const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
1493 int qc1, qc2, qc3, qc4;
1495 uint8_t *p_bits = (uint8_t*)ff_aac_spectral_bits[cb-1];
1496 float *p_codes = (float *)ff_aac_codebook_vectors[cb-1];
1498 for (i = 0; i < size; i += 4) {
1501 float *in_pos = (float *)&in[i];
1502 float di0, di1, di2, di3;
1503 int t0, t1, t2, t3, t4;
1505 qc1 = scaled[i ] * Q34 + 0.4054f;
1506 qc2 = scaled[i+1] * Q34 + 0.4054f;
1507 qc3 = scaled[i+2] * Q34 + 0.4054f;
1508 qc4 = scaled[i+3] * Q34 + 0.4054f;
1512 ".set noreorder \n\t"
1514 "ori %[t4], $zero, 2 \n\t"
1515 "slt %[t0], %[t4], %[qc1] \n\t"
1516 "slt %[t1], %[t4], %[qc2] \n\t"
1517 "slt %[t2], %[t4], %[qc3] \n\t"
1518 "slt %[t3], %[t4], %[qc4] \n\t"
1519 "movn %[qc1], %[t4], %[t0] \n\t"
1520 "movn %[qc2], %[t4], %[t1] \n\t"
1521 "movn %[qc3], %[t4], %[t2] \n\t"
1522 "movn %[qc4], %[t4], %[t3] \n\t"
1526 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1527 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1528 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1540 curbits += p_bits[curidx];
1541 curbits += uquad_sign_bits[curidx];
1542 vec = &p_codes[curidx*4];
1546 ".set noreorder \n\t"
1548 "lwc1 %[di0], 0(%[in_pos]) \n\t"
1549 "lwc1 %[di1], 4(%[in_pos]) \n\t"
1550 "lwc1 %[di2], 8(%[in_pos]) \n\t"
1551 "lwc1 %[di3], 12(%[in_pos]) \n\t"
1552 "abs.s %[di0], %[di0] \n\t"
1553 "abs.s %[di1], %[di1] \n\t"
1554 "abs.s %[di2], %[di2] \n\t"
1555 "abs.s %[di3], %[di3] \n\t"
1556 "lwc1 $f0, 0(%[vec]) \n\t"
1557 "lwc1 $f1, 4(%[vec]) \n\t"
1558 "lwc1 $f2, 8(%[vec]) \n\t"
1559 "lwc1 $f3, 12(%[vec]) \n\t"
1560 "nmsub.s %[di0], %[di0], $f0, %[IQ] \n\t"
1561 "nmsub.s %[di1], %[di1], $f1, %[IQ] \n\t"
1562 "nmsub.s %[di2], %[di2], $f2, %[IQ] \n\t"
1563 "nmsub.s %[di3], %[di3], $f3, %[IQ] \n\t"
1567 : [di0]"=&f"(di0), [di1]"=&f"(di1),
1568 [di2]"=&f"(di2), [di3]"=&f"(di3)
1569 : [in_pos]"r"(in_pos), [vec]"r"(vec),
1571 : "$f0", "$f1", "$f2", "$f3",
1575 cost += di0 * di0 + di1 * di1
1576 + di2 * di2 + di3 * di3;
1581 return cost * lambda + curbits;
1584 static float get_band_cost_SPAIR_mips(struct AACEncContext *s,
1585 PutBitContext *pb, const float *in,
1586 const float *scaled, int size, int scale_idx,
1587 int cb, const float lambda, const float uplim,
1590 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1591 const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
1594 int qc1, qc2, qc3, qc4;
1597 uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
1598 float *p_codes = (float *)ff_aac_codebook_vectors[cb-1];
1600 for (i = 0; i < size; i += 4) {
1601 const float *vec, *vec2;
1602 int curidx, curidx2;
1603 int *in_int = (int *)&in[i];
1604 float *in_pos = (float *)&in[i];
1605 float di0, di1, di2, di3;
1606 int t0, t1, t2, t3, t4, t5, t6, t7;
1608 qc1 = scaled[i ] * Q34 + 0.4054f;
1609 qc2 = scaled[i+1] * Q34 + 0.4054f;
1610 qc3 = scaled[i+2] * Q34 + 0.4054f;
1611 qc4 = scaled[i+3] * Q34 + 0.4054f;
1615 ".set noreorder \n\t"
1617 "ori %[t4], $zero, 4 \n\t"
1618 "slt %[t0], %[t4], %[qc1] \n\t"
1619 "slt %[t1], %[t4], %[qc2] \n\t"
1620 "slt %[t2], %[t4], %[qc3] \n\t"
1621 "slt %[t3], %[t4], %[qc4] \n\t"
1622 "movn %[qc1], %[t4], %[t0] \n\t"
1623 "movn %[qc2], %[t4], %[t1] \n\t"
1624 "movn %[qc3], %[t4], %[t2] \n\t"
1625 "movn %[qc4], %[t4], %[t3] \n\t"
1626 "lw %[t0], 0(%[in_int]) \n\t"
1627 "lw %[t1], 4(%[in_int]) \n\t"
1628 "lw %[t2], 8(%[in_int]) \n\t"
1629 "lw %[t3], 12(%[in_int]) \n\t"
1630 "srl %[t0], %[t0], 31 \n\t"
1631 "srl %[t1], %[t1], 31 \n\t"
1632 "srl %[t2], %[t2], 31 \n\t"
1633 "srl %[t3], %[t3], 31 \n\t"
1634 "subu %[t4], $zero, %[qc1] \n\t"
1635 "subu %[t5], $zero, %[qc2] \n\t"
1636 "subu %[t6], $zero, %[qc3] \n\t"
1637 "subu %[t7], $zero, %[qc4] \n\t"
1638 "movn %[qc1], %[t4], %[t0] \n\t"
1639 "movn %[qc2], %[t5], %[t1] \n\t"
1640 "movn %[qc3], %[t6], %[t2] \n\t"
1641 "movn %[qc4], %[t7], %[t3] \n\t"
1645 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1646 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1647 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1648 [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
1649 : [in_int]"r"(in_int)
1657 curidx2 += qc4 + 40;
1659 curbits += p_bits[curidx];
1660 curbits += p_bits[curidx2];
1662 vec = &p_codes[curidx*2];
1663 vec2 = &p_codes[curidx2*2];
1667 ".set noreorder \n\t"
1669 "lwc1 $f0, 0(%[in_pos]) \n\t"
1670 "lwc1 $f1, 0(%[vec]) \n\t"
1671 "lwc1 $f2, 4(%[in_pos]) \n\t"
1672 "lwc1 $f3, 4(%[vec]) \n\t"
1673 "lwc1 $f4, 8(%[in_pos]) \n\t"
1674 "lwc1 $f5, 0(%[vec2]) \n\t"
1675 "lwc1 $f6, 12(%[in_pos]) \n\t"
1676 "lwc1 $f7, 4(%[vec2]) \n\t"
1677 "nmsub.s %[di0], $f0, $f1, %[IQ] \n\t"
1678 "nmsub.s %[di1], $f2, $f3, %[IQ] \n\t"
1679 "nmsub.s %[di2], $f4, $f5, %[IQ] \n\t"
1680 "nmsub.s %[di3], $f6, $f7, %[IQ] \n\t"
1684 : [di0]"=&f"(di0), [di1]"=&f"(di1),
1685 [di2]"=&f"(di2), [di3]"=&f"(di3)
1686 : [in_pos]"r"(in_pos), [vec]"r"(vec),
1687 [vec2]"r"(vec2), [IQ]"f"(IQ)
1688 : "$f0", "$f1", "$f2", "$f3",
1689 "$f4", "$f5", "$f6", "$f7",
1693 cost += di0 * di0 + di1 * di1
1694 + di2 * di2 + di3 * di3;
1699 return cost * lambda + curbits;
1702 static float get_band_cost_UPAIR7_mips(struct AACEncContext *s,
1703 PutBitContext *pb, const float *in,
1704 const float *scaled, int size, int scale_idx,
1705 int cb, const float lambda, const float uplim,
1708 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1709 const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
1712 int qc1, qc2, qc3, qc4;
1715 uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
1716 float *p_codes = (float *)ff_aac_codebook_vectors[cb-1];
1718 for (i = 0; i < size; i += 4) {
1719 const float *vec, *vec2;
1720 int curidx, curidx2, sign1, count1, sign2, count2;
1721 int *in_int = (int *)&in[i];
1722 float *in_pos = (float *)&in[i];
1723 float di0, di1, di2, di3;
1724 int t0, t1, t2, t3, t4;
1726 qc1 = scaled[i ] * Q34 + 0.4054f;
1727 qc2 = scaled[i+1] * Q34 + 0.4054f;
1728 qc3 = scaled[i+2] * Q34 + 0.4054f;
1729 qc4 = scaled[i+3] * Q34 + 0.4054f;
1733 ".set noreorder \n\t"
1735 "ori %[t4], $zero, 7 \n\t"
1736 "ori %[sign1], $zero, 0 \n\t"
1737 "ori %[sign2], $zero, 0 \n\t"
1738 "slt %[t0], %[t4], %[qc1] \n\t"
1739 "slt %[t1], %[t4], %[qc2] \n\t"
1740 "slt %[t2], %[t4], %[qc3] \n\t"
1741 "slt %[t3], %[t4], %[qc4] \n\t"
1742 "movn %[qc1], %[t4], %[t0] \n\t"
1743 "movn %[qc2], %[t4], %[t1] \n\t"
1744 "movn %[qc3], %[t4], %[t2] \n\t"
1745 "movn %[qc4], %[t4], %[t3] \n\t"
1746 "lw %[t0], 0(%[in_int]) \n\t"
1747 "lw %[t1], 4(%[in_int]) \n\t"
1748 "lw %[t2], 8(%[in_int]) \n\t"
1749 "lw %[t3], 12(%[in_int]) \n\t"
1750 "slt %[t0], %[t0], $zero \n\t"
1751 "movn %[sign1], %[t0], %[qc1] \n\t"
1752 "slt %[t2], %[t2], $zero \n\t"
1753 "movn %[sign2], %[t2], %[qc3] \n\t"
1754 "slt %[t1], %[t1], $zero \n\t"
1755 "sll %[t0], %[sign1], 1 \n\t"
1756 "or %[t0], %[t0], %[t1] \n\t"
1757 "movn %[sign1], %[t0], %[qc2] \n\t"
1758 "slt %[t3], %[t3], $zero \n\t"
1759 "sll %[t0], %[sign2], 1 \n\t"
1760 "or %[t0], %[t0], %[t3] \n\t"
1761 "movn %[sign2], %[t0], %[qc4] \n\t"
1762 "slt %[count1], $zero, %[qc1] \n\t"
1763 "slt %[t1], $zero, %[qc2] \n\t"
1764 "slt %[count2], $zero, %[qc3] \n\t"
1765 "slt %[t2], $zero, %[qc4] \n\t"
1766 "addu %[count1], %[count1], %[t1] \n\t"
1767 "addu %[count2], %[count2], %[t2] \n\t"
1771 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1772 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1773 [sign1]"=&r"(sign1), [count1]"=&r"(count1),
1774 [sign2]"=&r"(sign2), [count2]"=&r"(count2),
1775 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1777 : [in_int]"r"(in_int)
1787 curbits += p_bits[curidx];
1788 curbits += upair7_sign_bits[curidx];
1789 vec = &p_codes[curidx*2];
1791 curbits += p_bits[curidx2];
1792 curbits += upair7_sign_bits[curidx2];
1793 vec2 = &p_codes[curidx2*2];
1797 ".set noreorder \n\t"
1799 "lwc1 %[di0], 0(%[in_pos]) \n\t"
1800 "lwc1 %[di1], 4(%[in_pos]) \n\t"
1801 "lwc1 %[di2], 8(%[in_pos]) \n\t"
1802 "lwc1 %[di3], 12(%[in_pos]) \n\t"
1803 "abs.s %[di0], %[di0] \n\t"
1804 "abs.s %[di1], %[di1] \n\t"
1805 "abs.s %[di2], %[di2] \n\t"
1806 "abs.s %[di3], %[di3] \n\t"
1807 "lwc1 $f0, 0(%[vec]) \n\t"
1808 "lwc1 $f1, 4(%[vec]) \n\t"
1809 "lwc1 $f2, 0(%[vec2]) \n\t"
1810 "lwc1 $f3, 4(%[vec2]) \n\t"
1811 "nmsub.s %[di0], %[di0], $f0, %[IQ] \n\t"
1812 "nmsub.s %[di1], %[di1], $f1, %[IQ] \n\t"
1813 "nmsub.s %[di2], %[di2], $f2, %[IQ] \n\t"
1814 "nmsub.s %[di3], %[di3], $f3, %[IQ] \n\t"
1818 : [di0]"=&f"(di0), [di1]"=&f"(di1),
1819 [di2]"=&f"(di2), [di3]"=&f"(di3)
1820 : [in_pos]"r"(in_pos), [vec]"r"(vec),
1821 [vec2]"r"(vec2), [IQ]"f"(IQ)
1822 : "$f0", "$f1", "$f2", "$f3",
1826 cost += di0 * di0 + di1 * di1
1827 + di2 * di2 + di3 * di3;
1832 return cost * lambda + curbits;
1835 static float get_band_cost_UPAIR12_mips(struct AACEncContext *s,
1836 PutBitContext *pb, const float *in,
1837 const float *scaled, int size, int scale_idx,
1838 int cb, const float lambda, const float uplim,
1841 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1842 const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
1845 int qc1, qc2, qc3, qc4;
1848 uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
1849 float *p_codes = (float *)ff_aac_codebook_vectors[cb-1];
1851 for (i = 0; i < size; i += 4) {
1852 const float *vec, *vec2;
1853 int curidx, curidx2;
1854 int sign1, count1, sign2, count2;
1855 int *in_int = (int *)&in[i];
1856 float *in_pos = (float *)&in[i];
1857 float di0, di1, di2, di3;
1858 int t0, t1, t2, t3, t4;
1860 qc1 = scaled[i ] * Q34 + 0.4054f;
1861 qc2 = scaled[i+1] * Q34 + 0.4054f;
1862 qc3 = scaled[i+2] * Q34 + 0.4054f;
1863 qc4 = scaled[i+3] * Q34 + 0.4054f;
1867 ".set noreorder \n\t"
1869 "ori %[t4], $zero, 12 \n\t"
1870 "ori %[sign1], $zero, 0 \n\t"
1871 "ori %[sign2], $zero, 0 \n\t"
1872 "slt %[t0], %[t4], %[qc1] \n\t"
1873 "slt %[t1], %[t4], %[qc2] \n\t"
1874 "slt %[t2], %[t4], %[qc3] \n\t"
1875 "slt %[t3], %[t4], %[qc4] \n\t"
1876 "movn %[qc1], %[t4], %[t0] \n\t"
1877 "movn %[qc2], %[t4], %[t1] \n\t"
1878 "movn %[qc3], %[t4], %[t2] \n\t"
1879 "movn %[qc4], %[t4], %[t3] \n\t"
1880 "lw %[t0], 0(%[in_int]) \n\t"
1881 "lw %[t1], 4(%[in_int]) \n\t"
1882 "lw %[t2], 8(%[in_int]) \n\t"
1883 "lw %[t3], 12(%[in_int]) \n\t"
1884 "slt %[t0], %[t0], $zero \n\t"
1885 "movn %[sign1], %[t0], %[qc1] \n\t"
1886 "slt %[t2], %[t2], $zero \n\t"
1887 "movn %[sign2], %[t2], %[qc3] \n\t"
1888 "slt %[t1], %[t1], $zero \n\t"
1889 "sll %[t0], %[sign1], 1 \n\t"
1890 "or %[t0], %[t0], %[t1] \n\t"
1891 "movn %[sign1], %[t0], %[qc2] \n\t"
1892 "slt %[t3], %[t3], $zero \n\t"
1893 "sll %[t0], %[sign2], 1 \n\t"
1894 "or %[t0], %[t0], %[t3] \n\t"
1895 "movn %[sign2], %[t0], %[qc4] \n\t"
1896 "slt %[count1], $zero, %[qc1] \n\t"
1897 "slt %[t1], $zero, %[qc2] \n\t"
1898 "slt %[count2], $zero, %[qc3] \n\t"
1899 "slt %[t2], $zero, %[qc4] \n\t"
1900 "addu %[count1], %[count1], %[t1] \n\t"
1901 "addu %[count2], %[count2], %[t2] \n\t"
1905 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1906 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1907 [sign1]"=&r"(sign1), [count1]"=&r"(count1),
1908 [sign2]"=&r"(sign2), [count2]"=&r"(count2),
1909 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1911 : [in_int]"r"(in_int)
1921 curbits += p_bits[curidx];
1922 curbits += p_bits[curidx2];
1923 curbits += upair12_sign_bits[curidx];
1924 curbits += upair12_sign_bits[curidx2];
1925 vec = &p_codes[curidx*2];
1926 vec2 = &p_codes[curidx2*2];
1930 ".set noreorder \n\t"
1932 "lwc1 %[di0], 0(%[in_pos]) \n\t"
1933 "lwc1 %[di1], 4(%[in_pos]) \n\t"
1934 "lwc1 %[di2], 8(%[in_pos]) \n\t"
1935 "lwc1 %[di3], 12(%[in_pos]) \n\t"
1936 "abs.s %[di0], %[di0] \n\t"
1937 "abs.s %[di1], %[di1] \n\t"
1938 "abs.s %[di2], %[di2] \n\t"
1939 "abs.s %[di3], %[di3] \n\t"
1940 "lwc1 $f0, 0(%[vec]) \n\t"
1941 "lwc1 $f1, 4(%[vec]) \n\t"
1942 "lwc1 $f2, 0(%[vec2]) \n\t"
1943 "lwc1 $f3, 4(%[vec2]) \n\t"
1944 "nmsub.s %[di0], %[di0], $f0, %[IQ] \n\t"
1945 "nmsub.s %[di1], %[di1], $f1, %[IQ] \n\t"
1946 "nmsub.s %[di2], %[di2], $f2, %[IQ] \n\t"
1947 "nmsub.s %[di3], %[di3], $f3, %[IQ] \n\t"
1951 : [di0]"=&f"(di0), [di1]"=&f"(di1),
1952 [di2]"=&f"(di2), [di3]"=&f"(di3)
1953 : [in_pos]"r"(in_pos), [vec]"r"(vec),
1954 [vec2]"r"(vec2), [IQ]"f"(IQ)
1955 : "$f0", "$f1", "$f2", "$f3",
1959 cost += di0 * di0 + di1 * di1
1960 + di2 * di2 + di3 * di3;
1965 return cost * lambda + curbits;
1968 static float get_band_cost_ESC_mips(struct AACEncContext *s,
1969 PutBitContext *pb, const float *in,
1970 const float *scaled, int size, int scale_idx,
1971 int cb, const float lambda, const float uplim,
1974 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1975 const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
1976 const float CLIPPED_ESCAPE = 165140.0f * IQ;
1979 int qc1, qc2, qc3, qc4;
1982 uint8_t *p_bits = (uint8_t*)ff_aac_spectral_bits[cb-1];
1983 float *p_codes = (float* )ff_aac_codebook_vectors[cb-1];
1985 for (i = 0; i < size; i += 4) {
1986 const float *vec, *vec2;
1987 int curidx, curidx2;
1988 float t1, t2, t3, t4;
1989 float di1, di2, di3, di4;
1990 int cond0, cond1, cond2, cond3;
1994 qc1 = scaled[i ] * Q34 + 0.4054f;
1995 qc2 = scaled[i+1] * Q34 + 0.4054f;
1996 qc3 = scaled[i+2] * Q34 + 0.4054f;
1997 qc4 = scaled[i+3] * Q34 + 0.4054f;
2001 ".set noreorder \n\t"
2003 "ori %[t6], $zero, 15 \n\t"
2004 "ori %[t7], $zero, 16 \n\t"
2005 "shll_s.w %[c1], %[qc1], 18 \n\t"
2006 "shll_s.w %[c2], %[qc2], 18 \n\t"
2007 "shll_s.w %[c3], %[qc3], 18 \n\t"
2008 "shll_s.w %[c4], %[qc4], 18 \n\t"
2009 "srl %[c1], %[c1], 18 \n\t"
2010 "srl %[c2], %[c2], 18 \n\t"
2011 "srl %[c3], %[c3], 18 \n\t"
2012 "srl %[c4], %[c4], 18 \n\t"
2013 "slt %[cond0], %[t6], %[qc1] \n\t"
2014 "slt %[cond1], %[t6], %[qc2] \n\t"
2015 "slt %[cond2], %[t6], %[qc3] \n\t"
2016 "slt %[cond3], %[t6], %[qc4] \n\t"
2017 "movn %[qc1], %[t7], %[cond0] \n\t"
2018 "movn %[qc2], %[t7], %[cond1] \n\t"
2019 "movn %[qc3], %[t7], %[cond2] \n\t"
2020 "movn %[qc4], %[t7], %[cond3] \n\t"
2024 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
2025 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
2026 [cond0]"=&r"(cond0), [cond1]"=&r"(cond1),
2027 [cond2]"=&r"(cond2), [cond3]"=&r"(cond3),
2028 [c1]"=&r"(c1), [c2]"=&r"(c2),
2029 [c3]"=&r"(c3), [c4]"=&r"(c4),
2030 [t6]"=&r"(t6), [t7]"=&r"(t7)
2039 curbits += p_bits[curidx];
2040 curbits += esc_sign_bits[curidx];
2041 vec = &p_codes[curidx*2];
2043 curbits += p_bits[curidx2];
2044 curbits += esc_sign_bits[curidx2];
2045 vec2 = &p_codes[curidx2*2];
2047 curbits += (av_log2(c1) * 2 - 3) & (-cond0);
2048 curbits += (av_log2(c2) * 2 - 3) & (-cond1);
2049 curbits += (av_log2(c3) * 2 - 3) & (-cond2);
2050 curbits += (av_log2(c4) * 2 - 3) & (-cond3);
2053 t2 = fabsf(in[i+1]);
2054 t3 = fabsf(in[i+2]);
2055 t4 = fabsf(in[i+3]);
2058 if (t1 >= CLIPPED_ESCAPE) {
2059 di1 = t1 - CLIPPED_ESCAPE;
2061 di1 = t1 - c1 * cbrtf(c1) * IQ;
2064 di1 = t1 - vec[0] * IQ;
2067 if (t2 >= CLIPPED_ESCAPE) {
2068 di2 = t2 - CLIPPED_ESCAPE;
2070 di2 = t2 - c2 * cbrtf(c2) * IQ;
2073 di2 = t2 - vec[1] * IQ;
2076 if (t3 >= CLIPPED_ESCAPE) {
2077 di3 = t3 - CLIPPED_ESCAPE;
2079 di3 = t3 - c3 * cbrtf(c3) * IQ;
2082 di3 = t3 - vec2[0] * IQ;
2085 if (t4 >= CLIPPED_ESCAPE) {
2086 di4 = t4 - CLIPPED_ESCAPE;
2088 di4 = t4 - c4 * cbrtf(c4) * IQ;
2091 di4 = t4 - vec2[1]*IQ;
2093 cost += di1 * di1 + di2 * di2
2094 + di3 * di3 + di4 * di4;
2099 return cost * lambda + curbits;
2102 static float (*const get_band_cost_arr[])(struct AACEncContext *s,
2103 PutBitContext *pb, const float *in,
2104 const float *scaled, int size, int scale_idx,
2105 int cb, const float lambda, const float uplim,
2107 get_band_cost_ZERO_mips,
2108 get_band_cost_SQUAD_mips,
2109 get_band_cost_SQUAD_mips,
2110 get_band_cost_UQUAD_mips,
2111 get_band_cost_UQUAD_mips,
2112 get_band_cost_SPAIR_mips,
2113 get_band_cost_SPAIR_mips,
2114 get_band_cost_UPAIR7_mips,
2115 get_band_cost_UPAIR7_mips,
2116 get_band_cost_UPAIR12_mips,
2117 get_band_cost_UPAIR12_mips,
2118 get_band_cost_ESC_mips,
2121 #define get_band_cost( \
2122 s, pb, in, scaled, size, scale_idx, cb, \
2123 lambda, uplim, bits) \
2124 get_band_cost_arr[cb]( \
2125 s, pb, in, scaled, size, scale_idx, cb, \
2126 lambda, uplim, bits)
2128 static float quantize_band_cost(struct AACEncContext *s, const float *in,
2129 const float *scaled, int size, int scale_idx,
2130 int cb, const float lambda, const float uplim,
2133 return get_band_cost(s, NULL, in, scaled, size, scale_idx, cb, lambda, uplim, bits);
2136 static void search_for_quantizers_twoloop_mips(AVCodecContext *avctx,
2138 SingleChannelElement *sce,
2141 int start = 0, i, w, w2, g;
2142 int destbits = avctx->bit_rate * 1024.0 / avctx->sample_rate / avctx->channels;
2143 float dists[128] = { 0 }, uplims[128];
2145 int fflag, minscaler;
2148 float minthr = INFINITY;
2150 destbits = FFMIN(destbits, 5800);
2151 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
2152 for (g = 0; g < sce->ics.num_swb; g++) {
2155 for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
2156 FFPsyBand *band = &s->psy.ch[s->cur_channel].psy_bands[(w+w2)*16+g];
2157 uplim += band->threshold;
2158 if (band->energy <= band->threshold || band->threshold == 0.0f) {
2159 sce->zeroes[(w+w2)*16+g] = 1;
2164 uplims[w*16+g] = uplim *512;
2165 sce->zeroes[w*16+g] = !nz;
2167 minthr = FFMIN(minthr, uplim);
2171 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
2172 for (g = 0; g < sce->ics.num_swb; g++) {
2173 if (sce->zeroes[w*16+g]) {
2174 sce->sf_idx[w*16+g] = SCALE_ONE_POS;
2177 sce->sf_idx[w*16+g] = SCALE_ONE_POS + FFMIN(log2f(uplims[w*16+g]/minthr)*4,59);
2183 abs_pow34_v(s->scoefs, sce->coeffs, 1024);
2185 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
2187 for (g = 0; g < sce->ics.num_swb; g++) {
2188 const float *scaled = s->scoefs + start;
2189 maxvals[w*16+g] = find_max_val(sce->ics.group_len[w], sce->ics.swb_sizes[g], scaled);
2190 start += sce->ics.swb_sizes[g];
2196 minscaler = sce->sf_idx[0];
2197 qstep = its ? 1 : 32;
2204 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
2206 for (g = 0; g < sce->ics.num_swb; g++) {
2207 const float *coefs = sce->coeffs + start;
2208 const float *scaled = s->scoefs + start;
2212 if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >= 218) {
2213 start += sce->ics.swb_sizes[g];
2216 minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]);
2217 cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
2218 for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
2220 bits += quantize_band_cost_bits(s, coefs + w2*128,
2222 sce->ics.swb_sizes[g],
2223 sce->sf_idx[w*16+g],
2230 bits += ff_aac_scalefactor_bits[sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO];
2233 start += sce->ics.swb_sizes[g];
2234 prev = sce->sf_idx[w*16+g];
2239 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
2241 for (g = 0; g < sce->ics.num_swb; g++) {
2242 const float *coefs = sce->coeffs + start;
2243 const float *scaled = s->scoefs + start;
2248 if (sce->zeroes[w*16+g] || sce->sf_idx[w*16+g] >= 218) {
2249 start += sce->ics.swb_sizes[g];
2252 minscaler = FFMIN(minscaler, sce->sf_idx[w*16+g]);
2253 cb = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
2254 for (w2 = 0; w2 < sce->ics.group_len[w]; w2++) {
2256 dist += quantize_band_cost(s, coefs + w2*128,
2258 sce->ics.swb_sizes[g],
2259 sce->sf_idx[w*16+g],
2266 dists[w*16+g] = dist - bits;
2268 bits += ff_aac_scalefactor_bits[sce->sf_idx[w*16+g] - prev + SCALE_DIFF_ZERO];
2271 start += sce->ics.swb_sizes[g];
2272 prev = sce->sf_idx[w*16+g];
2276 if (tbits > destbits) {
2277 for (i = 0; i < 128; i++)
2278 if (sce->sf_idx[i] < 218 - qstep)
2279 sce->sf_idx[i] += qstep;
2281 for (i = 0; i < 128; i++)
2282 if (sce->sf_idx[i] > 60 - qstep)
2283 sce->sf_idx[i] -= qstep;
2286 if (!qstep && tbits > destbits*1.02 && sce->sf_idx[0] < 217)
2291 minscaler = av_clip(minscaler, 60, 255 - SCALE_MAX_DIFF);
2292 for (w = 0; w < sce->ics.num_windows; w += sce->ics.group_len[w]) {
2293 for (g = 0; g < sce->ics.num_swb; g++) {
2294 int prevsc = sce->sf_idx[w*16+g];
2295 if (dists[w*16+g] > uplims[w*16+g] && sce->sf_idx[w*16+g] > 60) {
2296 if (find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]-1))
2297 sce->sf_idx[w*16+g]--;
2299 sce->sf_idx[w*16+g]-=2;
2301 sce->sf_idx[w*16+g] = av_clip(sce->sf_idx[w*16+g], minscaler, minscaler + SCALE_MAX_DIFF);
2302 sce->sf_idx[w*16+g] = FFMIN(sce->sf_idx[w*16+g], 219);
2303 if (sce->sf_idx[w*16+g] != prevsc)
2305 sce->band_type[w*16+g] = find_min_book(maxvals[w*16+g], sce->sf_idx[w*16+g]);
2309 } while (fflag && its < 10);
2312 static void search_for_ms_mips(AACEncContext *s, ChannelElement *cpe,
2315 int start = 0, i, w, w2, g;
2316 float M[128], S[128];
2317 float *L34 = s->scoefs, *R34 = s->scoefs + 128, *M34 = s->scoefs + 128*2, *S34 = s->scoefs + 128*3;
2318 SingleChannelElement *sce0 = &cpe->ch[0];
2319 SingleChannelElement *sce1 = &cpe->ch[1];
2320 if (!cpe->common_window)
2322 for (w = 0; w < sce0->ics.num_windows; w += sce0->ics.group_len[w]) {
2323 for (g = 0; g < sce0->ics.num_swb; g++) {
2324 if (!cpe->ch[0].zeroes[w*16+g] && !cpe->ch[1].zeroes[w*16+g]) {
2325 float dist1 = 0.0f, dist2 = 0.0f;
2326 for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) {
2327 FFPsyBand *band0 = &s->psy.ch[s->cur_channel+0].psy_bands[(w+w2)*16+g];
2328 FFPsyBand *band1 = &s->psy.ch[s->cur_channel+1].psy_bands[(w+w2)*16+g];
2329 float minthr = FFMIN(band0->threshold, band1->threshold);
2330 float maxthr = FFMAX(band0->threshold, band1->threshold);
2331 for (i = 0; i < sce0->ics.swb_sizes[g]; i+=4) {
2332 M[i ] = (sce0->coeffs[start+w2*128+i ]
2333 + sce1->coeffs[start+w2*128+i ]) * 0.5;
2334 M[i+1] = (sce0->coeffs[start+w2*128+i+1]
2335 + sce1->coeffs[start+w2*128+i+1]) * 0.5;
2336 M[i+2] = (sce0->coeffs[start+w2*128+i+2]
2337 + sce1->coeffs[start+w2*128+i+2]) * 0.5;
2338 M[i+3] = (sce0->coeffs[start+w2*128+i+3]
2339 + sce1->coeffs[start+w2*128+i+3]) * 0.5;
2342 - sce1->coeffs[start+w2*128+i ];
2344 - sce1->coeffs[start+w2*128+i+1];
2346 - sce1->coeffs[start+w2*128+i+2];
2348 - sce1->coeffs[start+w2*128+i+3];
2350 abs_pow34_v(L34, sce0->coeffs+start+w2*128, sce0->ics.swb_sizes[g]);
2351 abs_pow34_v(R34, sce1->coeffs+start+w2*128, sce0->ics.swb_sizes[g]);
2352 abs_pow34_v(M34, M, sce0->ics.swb_sizes[g]);
2353 abs_pow34_v(S34, S, sce0->ics.swb_sizes[g]);
2354 dist1 += quantize_band_cost(s, sce0->coeffs + start + w2*128,
2356 sce0->ics.swb_sizes[g],
2357 sce0->sf_idx[(w+w2)*16+g],
2358 sce0->band_type[(w+w2)*16+g],
2359 lambda / band0->threshold, INFINITY, NULL);
2360 dist1 += quantize_band_cost(s, sce1->coeffs + start + w2*128,
2362 sce1->ics.swb_sizes[g],
2363 sce1->sf_idx[(w+w2)*16+g],
2364 sce1->band_type[(w+w2)*16+g],
2365 lambda / band1->threshold, INFINITY, NULL);
2366 dist2 += quantize_band_cost(s, M,
2368 sce0->ics.swb_sizes[g],
2369 sce0->sf_idx[(w+w2)*16+g],
2370 sce0->band_type[(w+w2)*16+g],
2371 lambda / maxthr, INFINITY, NULL);
2372 dist2 += quantize_band_cost(s, S,
2374 sce1->ics.swb_sizes[g],
2375 sce1->sf_idx[(w+w2)*16+g],
2376 sce1->band_type[(w+w2)*16+g],
2377 lambda / minthr, INFINITY, NULL);
2379 cpe->ms_mask[w*16+g] = dist2 < dist1;
2381 start += sce0->ics.swb_sizes[g];
2385 #endif /*HAVE_MIPSFPU */
2387 static void codebook_trellis_rate_mips(AACEncContext *s, SingleChannelElement *sce,
2388 int win, int group_len, const float lambda)
2390 BandCodingPath path[120][12];
2391 int w, swb, cb, start, size;
2393 const int max_sfb = sce->ics.max_sfb;
2394 const int run_bits = sce->ics.num_windows == 1 ? 5 : 3;
2395 const int run_esc = (1 << run_bits) - 1;
2396 int idx, ppos, count;
2397 int stackrun[120], stackcb[120], stack_len;
2398 float next_minbits = INFINITY;
2401 abs_pow34_v(s->scoefs, sce->coeffs, 1024);
2403 for (cb = 0; cb < 12; cb++) {
2404 path[0][cb].cost = run_bits+4;
2405 path[0][cb].prev_idx = -1;
2406 path[0][cb].run = 0;
2408 for (swb = 0; swb < max_sfb; swb++) {
2409 size = sce->ics.swb_sizes[swb];
2410 if (sce->zeroes[win*16 + swb]) {
2411 float cost_stay_here = path[swb][0].cost;
2412 float cost_get_here = next_minbits + run_bits + 4;
2413 if ( run_value_bits[sce->ics.num_windows == 8][path[swb][0].run]
2414 != run_value_bits[sce->ics.num_windows == 8][path[swb][0].run+1])
2415 cost_stay_here += run_bits;
2416 if (cost_get_here < cost_stay_here) {
2417 path[swb+1][0].prev_idx = next_mincb;
2418 path[swb+1][0].cost = cost_get_here;
2419 path[swb+1][0].run = 1;
2421 path[swb+1][0].prev_idx = 0;
2422 path[swb+1][0].cost = cost_stay_here;
2423 path[swb+1][0].run = path[swb][0].run + 1;
2425 next_minbits = path[swb+1][0].cost;
2427 for (cb = 1; cb < 12; cb++) {
2428 path[swb+1][cb].cost = 61450;
2429 path[swb+1][cb].prev_idx = -1;
2430 path[swb+1][cb].run = 0;
2433 float minbits = next_minbits;
2434 int mincb = next_mincb;
2435 int startcb = sce->band_type[win*16+swb];
2436 next_minbits = INFINITY;
2438 for (cb = 0; cb < startcb; cb++) {
2439 path[swb+1][cb].cost = 61450;
2440 path[swb+1][cb].prev_idx = -1;
2441 path[swb+1][cb].run = 0;
2443 for (cb = startcb; cb < 12; cb++) {
2444 float cost_stay_here, cost_get_here;
2446 for (w = 0; w < group_len; w++) {
2447 bits += quantize_band_cost_bits(s, sce->coeffs + start + w*128,
2448 s->scoefs + start + w*128, size,
2449 sce->sf_idx[(win+w)*16+swb], cb,
2452 cost_stay_here = path[swb][cb].cost + bits;
2453 cost_get_here = minbits + bits + run_bits + 4;
2454 if ( run_value_bits[sce->ics.num_windows == 8][path[swb][cb].run]
2455 != run_value_bits[sce->ics.num_windows == 8][path[swb][cb].run+1])
2456 cost_stay_here += run_bits;
2457 if (cost_get_here < cost_stay_here) {
2458 path[swb+1][cb].prev_idx = mincb;
2459 path[swb+1][cb].cost = cost_get_here;
2460 path[swb+1][cb].run = 1;
2462 path[swb+1][cb].prev_idx = cb;
2463 path[swb+1][cb].cost = cost_stay_here;
2464 path[swb+1][cb].run = path[swb][cb].run + 1;
2466 if (path[swb+1][cb].cost < next_minbits) {
2467 next_minbits = path[swb+1][cb].cost;
2472 start += sce->ics.swb_sizes[swb];
2477 for (cb = 1; cb < 12; cb++)
2478 if (path[max_sfb][cb].cost < path[max_sfb][idx].cost)
2482 av_assert1(idx >= 0);
2484 stackrun[stack_len] = path[ppos][cb].run;
2485 stackcb [stack_len] = cb;
2486 idx = path[ppos-path[ppos][cb].run+1][cb].prev_idx;
2487 ppos -= path[ppos][cb].run;
2492 for (i = stack_len - 1; i >= 0; i--) {
2493 put_bits(&s->pb, 4, stackcb[i]);
2494 count = stackrun[i];
2495 memset(sce->zeroes + win*16 + start, !stackcb[i], count);
2496 for (j = 0; j < count; j++) {
2497 sce->band_type[win*16 + start] = stackcb[i];
2500 while (count >= run_esc) {
2501 put_bits(&s->pb, run_bits, run_esc);
2504 put_bits(&s->pb, run_bits, count);
2507 #endif /* HAVE_INLINE_ASM */
2509 void ff_aac_coder_init_mips(AACEncContext *c) {
2511 AACCoefficientsEncoder *e = c->coder;
2512 int option = c->options.aac_coder;
2515 e->quantize_and_encode_band = quantize_and_encode_band_mips;
2516 e->encode_window_bands_info = codebook_trellis_rate_mips;
2518 e->search_for_quantizers = search_for_quantizers_twoloop_mips;
2519 e->search_for_ms = search_for_ms_mips;
2520 #endif /* HAVE_MIPSFPU */
2522 #endif /* HAVE_INLINE_ASM */