3 * MIPS Technologies, Inc., California.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
14 * contributors may be used to endorse or promote products derived from
15 * this software without specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * Author: Stanislav Ocovaj (socovaj@mips.com)
30 * Szabolcs Pal (sabolc@mips.com)
32 * AAC coefficients encoder optimized for MIPS floating-point architecture
34 * This file is part of FFmpeg.
36 * FFmpeg is free software; you can redistribute it and/or
37 * modify it under the terms of the GNU Lesser General Public
38 * License as published by the Free Software Foundation; either
39 * version 2.1 of the License, or (at your option) any later version.
41 * FFmpeg is distributed in the hope that it will be useful,
42 * but WITHOUT ANY WARRANTY; without even the implied warranty of
43 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
44 * Lesser General Public License for more details.
46 * You should have received a copy of the GNU Lesser General Public
47 * License along with FFmpeg; if not, write to the Free Software
48 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
53 * Reference: libavcodec/aaccoder.c
56 #include "libavutil/libm.h"
59 #include "libavutil/mathematics.h"
60 #include "libavcodec/avcodec.h"
61 #include "libavcodec/put_bits.h"
62 #include "libavcodec/aac.h"
63 #include "libavcodec/aacenc.h"
64 #include "libavcodec/aactab.h"
65 #include "libavcodec/aacenctab.h"
66 #include "libavcodec/aacenc_utils.h"
69 #if !HAVE_MIPS32R6 && !HAVE_MIPS64R6
70 typedef struct BandCodingPath {
76 static const uint8_t uquad_sign_bits[81] = {
77 0, 1, 1, 1, 2, 2, 1, 2, 2,
78 1, 2, 2, 2, 3, 3, 2, 3, 3,
79 1, 2, 2, 2, 3, 3, 2, 3, 3,
80 1, 2, 2, 2, 3, 3, 2, 3, 3,
81 2, 3, 3, 3, 4, 4, 3, 4, 4,
82 2, 3, 3, 3, 4, 4, 3, 4, 4,
83 1, 2, 2, 2, 3, 3, 2, 3, 3,
84 2, 3, 3, 3, 4, 4, 3, 4, 4,
85 2, 3, 3, 3, 4, 4, 3, 4, 4
88 static const uint8_t upair7_sign_bits[64] = {
89 0, 1, 1, 1, 1, 1, 1, 1,
90 1, 2, 2, 2, 2, 2, 2, 2,
91 1, 2, 2, 2, 2, 2, 2, 2,
92 1, 2, 2, 2, 2, 2, 2, 2,
93 1, 2, 2, 2, 2, 2, 2, 2,
94 1, 2, 2, 2, 2, 2, 2, 2,
95 1, 2, 2, 2, 2, 2, 2, 2,
96 1, 2, 2, 2, 2, 2, 2, 2,
99 static const uint8_t upair12_sign_bits[169] = {
100 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
101 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
102 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
103 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
104 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
105 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
106 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
107 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
108 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
109 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
110 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
111 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
112 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
115 static const uint8_t esc_sign_bits[289] = {
116 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
117 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
118 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
119 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
120 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
121 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
122 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
123 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
124 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
125 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
126 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
127 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
128 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
129 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
130 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
131 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
132 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
136 * Functions developed from template function and optimized for quantizing and encoding band
138 static void quantize_and_encode_band_cost_SQUAD_mips(struct AACEncContext *s,
139 PutBitContext *pb, const float *in, float *out,
140 const float *scaled, int size, int scale_idx,
141 int cb, const float lambda, const float uplim,
142 int *bits, float *energy, const float ROUNDING)
144 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
145 const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
147 int qc1, qc2, qc3, qc4;
148 float qenergy = 0.0f;
150 uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
151 uint16_t *p_codes = (uint16_t *)ff_aac_spectral_codes[cb-1];
152 float *p_vec = (float *)ff_aac_codebook_vectors[cb-1];
154 abs_pow34_v(s->scoefs, in, size);
156 for (i = 0; i < size; i += 4) {
158 int *in_int = (int *)&in[i];
159 int t0, t1, t2, t3, t4, t5, t6, t7;
162 qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
163 qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
164 qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
165 qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
169 ".set noreorder \n\t"
171 "slt %[qc1], $zero, %[qc1] \n\t"
172 "slt %[qc2], $zero, %[qc2] \n\t"
173 "slt %[qc3], $zero, %[qc3] \n\t"
174 "slt %[qc4], $zero, %[qc4] \n\t"
175 "lw %[t0], 0(%[in_int]) \n\t"
176 "lw %[t1], 4(%[in_int]) \n\t"
177 "lw %[t2], 8(%[in_int]) \n\t"
178 "lw %[t3], 12(%[in_int]) \n\t"
179 "srl %[t0], %[t0], 31 \n\t"
180 "srl %[t1], %[t1], 31 \n\t"
181 "srl %[t2], %[t2], 31 \n\t"
182 "srl %[t3], %[t3], 31 \n\t"
183 "subu %[t4], $zero, %[qc1] \n\t"
184 "subu %[t5], $zero, %[qc2] \n\t"
185 "subu %[t6], $zero, %[qc3] \n\t"
186 "subu %[t7], $zero, %[qc4] \n\t"
187 "movn %[qc1], %[t4], %[t0] \n\t"
188 "movn %[qc2], %[t5], %[t1] \n\t"
189 "movn %[qc3], %[t6], %[t2] \n\t"
190 "movn %[qc4], %[t7], %[t3] \n\t"
194 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
195 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
196 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
197 [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
198 : [in_int]"r"(in_int)
211 put_bits(pb, p_bits[curidx], p_codes[curidx]);
215 vec = &p_vec[curidx*4];
227 qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
234 static void quantize_and_encode_band_cost_UQUAD_mips(struct AACEncContext *s,
235 PutBitContext *pb, const float *in, float *out,
236 const float *scaled, int size, int scale_idx,
237 int cb, const float lambda, const float uplim,
238 int *bits, float *energy, const float ROUNDING)
240 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
241 const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
243 int qc1, qc2, qc3, qc4;
244 float qenergy = 0.0f;
246 uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
247 uint16_t *p_codes = (uint16_t *)ff_aac_spectral_codes[cb-1];
248 float *p_vec = (float *)ff_aac_codebook_vectors[cb-1];
250 abs_pow34_v(s->scoefs, in, size);
252 for (i = 0; i < size; i += 4) {
253 int curidx, sign, count;
254 int *in_int = (int *)&in[i];
256 unsigned int v_codes;
257 int t0, t1, t2, t3, t4;
260 qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
261 qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
262 qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
263 qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
267 ".set noreorder \n\t"
269 "ori %[t4], $zero, 2 \n\t"
270 "ori %[sign], $zero, 0 \n\t"
271 "slt %[t0], %[t4], %[qc1] \n\t"
272 "slt %[t1], %[t4], %[qc2] \n\t"
273 "slt %[t2], %[t4], %[qc3] \n\t"
274 "slt %[t3], %[t4], %[qc4] \n\t"
275 "movn %[qc1], %[t4], %[t0] \n\t"
276 "movn %[qc2], %[t4], %[t1] \n\t"
277 "movn %[qc3], %[t4], %[t2] \n\t"
278 "movn %[qc4], %[t4], %[t3] \n\t"
279 "lw %[t0], 0(%[in_int]) \n\t"
280 "lw %[t1], 4(%[in_int]) \n\t"
281 "lw %[t2], 8(%[in_int]) \n\t"
282 "lw %[t3], 12(%[in_int]) \n\t"
283 "slt %[t0], %[t0], $zero \n\t"
284 "movn %[sign], %[t0], %[qc1] \n\t"
285 "slt %[t1], %[t1], $zero \n\t"
286 "slt %[t2], %[t2], $zero \n\t"
287 "slt %[t3], %[t3], $zero \n\t"
288 "sll %[t0], %[sign], 1 \n\t"
289 "or %[t0], %[t0], %[t1] \n\t"
290 "movn %[sign], %[t0], %[qc2] \n\t"
291 "slt %[t4], $zero, %[qc1] \n\t"
292 "slt %[t1], $zero, %[qc2] \n\t"
293 "slt %[count], $zero, %[qc3] \n\t"
294 "sll %[t0], %[sign], 1 \n\t"
295 "or %[t0], %[t0], %[t2] \n\t"
296 "movn %[sign], %[t0], %[qc3] \n\t"
297 "slt %[t2], $zero, %[qc4] \n\t"
298 "addu %[count], %[count], %[t4] \n\t"
299 "addu %[count], %[count], %[t1] \n\t"
300 "sll %[t0], %[sign], 1 \n\t"
301 "or %[t0], %[t0], %[t3] \n\t"
302 "movn %[sign], %[t0], %[qc4] \n\t"
303 "addu %[count], %[count], %[t2] \n\t"
307 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
308 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
309 [sign]"=&r"(sign), [count]"=&r"(count),
310 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
312 : [in_int]"r"(in_int)
324 v_codes = (p_codes[curidx] << count) | (sign & ((1 << count) - 1));
325 v_bits = p_bits[curidx] + count;
326 put_bits(pb, v_bits, v_codes);
330 vec = &p_vec[curidx*4];
331 e1 = copysignf(vec[0] * IQ, in[i+0]);
332 e2 = copysignf(vec[1] * IQ, in[i+1]);
333 e3 = copysignf(vec[2] * IQ, in[i+2]);
334 e4 = copysignf(vec[3] * IQ, in[i+3]);
342 qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
349 static void quantize_and_encode_band_cost_SPAIR_mips(struct AACEncContext *s,
350 PutBitContext *pb, const float *in, float *out,
351 const float *scaled, int size, int scale_idx,
352 int cb, const float lambda, const float uplim,
353 int *bits, float *energy, const float ROUNDING)
355 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
356 const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
358 int qc1, qc2, qc3, qc4;
359 float qenergy = 0.0f;
361 uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
362 uint16_t *p_codes = (uint16_t *)ff_aac_spectral_codes[cb-1];
363 float *p_vec = (float *)ff_aac_codebook_vectors[cb-1];
365 abs_pow34_v(s->scoefs, in, size);
367 for (i = 0; i < size; i += 4) {
369 int *in_int = (int *)&in[i];
371 unsigned int v_codes;
372 int t0, t1, t2, t3, t4, t5, t6, t7;
373 const float *vec1, *vec2;
375 qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
376 qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
377 qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
378 qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
382 ".set noreorder \n\t"
384 "ori %[t4], $zero, 4 \n\t"
385 "slt %[t0], %[t4], %[qc1] \n\t"
386 "slt %[t1], %[t4], %[qc2] \n\t"
387 "slt %[t2], %[t4], %[qc3] \n\t"
388 "slt %[t3], %[t4], %[qc4] \n\t"
389 "movn %[qc1], %[t4], %[t0] \n\t"
390 "movn %[qc2], %[t4], %[t1] \n\t"
391 "movn %[qc3], %[t4], %[t2] \n\t"
392 "movn %[qc4], %[t4], %[t3] \n\t"
393 "lw %[t0], 0(%[in_int]) \n\t"
394 "lw %[t1], 4(%[in_int]) \n\t"
395 "lw %[t2], 8(%[in_int]) \n\t"
396 "lw %[t3], 12(%[in_int]) \n\t"
397 "srl %[t0], %[t0], 31 \n\t"
398 "srl %[t1], %[t1], 31 \n\t"
399 "srl %[t2], %[t2], 31 \n\t"
400 "srl %[t3], %[t3], 31 \n\t"
401 "subu %[t4], $zero, %[qc1] \n\t"
402 "subu %[t5], $zero, %[qc2] \n\t"
403 "subu %[t6], $zero, %[qc3] \n\t"
404 "subu %[t7], $zero, %[qc4] \n\t"
405 "movn %[qc1], %[t4], %[t0] \n\t"
406 "movn %[qc2], %[t5], %[t1] \n\t"
407 "movn %[qc3], %[t6], %[t2] \n\t"
408 "movn %[qc4], %[t7], %[t3] \n\t"
412 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
413 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
414 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
415 [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
416 : [in_int]"r"(in_int)
426 v_codes = (p_codes[curidx] << p_bits[curidx2]) | (p_codes[curidx2]);
427 v_bits = p_bits[curidx] + p_bits[curidx2];
428 put_bits(pb, v_bits, v_codes);
432 vec1 = &p_vec[curidx*2 ];
433 vec2 = &p_vec[curidx2*2];
445 qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
452 static void quantize_and_encode_band_cost_UPAIR7_mips(struct AACEncContext *s,
453 PutBitContext *pb, const float *in, float *out,
454 const float *scaled, int size, int scale_idx,
455 int cb, const float lambda, const float uplim,
456 int *bits, float *energy, const float ROUNDING)
458 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
459 const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
461 int qc1, qc2, qc3, qc4;
462 float qenergy = 0.0f;
464 uint8_t *p_bits = (uint8_t*) ff_aac_spectral_bits[cb-1];
465 uint16_t *p_codes = (uint16_t*)ff_aac_spectral_codes[cb-1];
466 float *p_vec = (float *)ff_aac_codebook_vectors[cb-1];
468 abs_pow34_v(s->scoefs, in, size);
470 for (i = 0; i < size; i += 4) {
471 int curidx1, curidx2, sign1, count1, sign2, count2;
472 int *in_int = (int *)&in[i];
474 unsigned int v_codes;
475 int t0, t1, t2, t3, t4;
476 const float *vec1, *vec2;
478 qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
479 qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
480 qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
481 qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
485 ".set noreorder \n\t"
487 "ori %[t4], $zero, 7 \n\t"
488 "ori %[sign1], $zero, 0 \n\t"
489 "ori %[sign2], $zero, 0 \n\t"
490 "slt %[t0], %[t4], %[qc1] \n\t"
491 "slt %[t1], %[t4], %[qc2] \n\t"
492 "slt %[t2], %[t4], %[qc3] \n\t"
493 "slt %[t3], %[t4], %[qc4] \n\t"
494 "movn %[qc1], %[t4], %[t0] \n\t"
495 "movn %[qc2], %[t4], %[t1] \n\t"
496 "movn %[qc3], %[t4], %[t2] \n\t"
497 "movn %[qc4], %[t4], %[t3] \n\t"
498 "lw %[t0], 0(%[in_int]) \n\t"
499 "lw %[t1], 4(%[in_int]) \n\t"
500 "lw %[t2], 8(%[in_int]) \n\t"
501 "lw %[t3], 12(%[in_int]) \n\t"
502 "slt %[t0], %[t0], $zero \n\t"
503 "movn %[sign1], %[t0], %[qc1] \n\t"
504 "slt %[t2], %[t2], $zero \n\t"
505 "movn %[sign2], %[t2], %[qc3] \n\t"
506 "slt %[t1], %[t1], $zero \n\t"
507 "sll %[t0], %[sign1], 1 \n\t"
508 "or %[t0], %[t0], %[t1] \n\t"
509 "movn %[sign1], %[t0], %[qc2] \n\t"
510 "slt %[t3], %[t3], $zero \n\t"
511 "sll %[t0], %[sign2], 1 \n\t"
512 "or %[t0], %[t0], %[t3] \n\t"
513 "movn %[sign2], %[t0], %[qc4] \n\t"
514 "slt %[count1], $zero, %[qc1] \n\t"
515 "slt %[t1], $zero, %[qc2] \n\t"
516 "slt %[count2], $zero, %[qc3] \n\t"
517 "slt %[t2], $zero, %[qc4] \n\t"
518 "addu %[count1], %[count1], %[t1] \n\t"
519 "addu %[count2], %[count2], %[t2] \n\t"
523 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
524 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
525 [sign1]"=&r"(sign1), [count1]"=&r"(count1),
526 [sign2]"=&r"(sign2), [count2]"=&r"(count2),
527 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
529 : [in_int]"r"(in_int)
530 : "t0", "t1", "t2", "t3", "t4",
537 v_codes = (p_codes[curidx1] << count1) | sign1;
538 v_bits = p_bits[curidx1] + count1;
539 put_bits(pb, v_bits, v_codes);
544 v_codes = (p_codes[curidx2] << count2) | sign2;
545 v_bits = p_bits[curidx2] + count2;
546 put_bits(pb, v_bits, v_codes);
550 vec1 = &p_vec[curidx1*2];
551 vec2 = &p_vec[curidx2*2];
552 e1 = copysignf(vec1[0] * IQ, in[i+0]);
553 e2 = copysignf(vec1[1] * IQ, in[i+1]);
554 e3 = copysignf(vec2[0] * IQ, in[i+2]);
555 e4 = copysignf(vec2[1] * IQ, in[i+3]);
563 qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
570 static void quantize_and_encode_band_cost_UPAIR12_mips(struct AACEncContext *s,
571 PutBitContext *pb, const float *in, float *out,
572 const float *scaled, int size, int scale_idx,
573 int cb, const float lambda, const float uplim,
574 int *bits, float *energy, const float ROUNDING)
576 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
577 const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
579 int qc1, qc2, qc3, qc4;
580 float qenergy = 0.0f;
582 uint8_t *p_bits = (uint8_t*) ff_aac_spectral_bits[cb-1];
583 uint16_t *p_codes = (uint16_t*)ff_aac_spectral_codes[cb-1];
584 float *p_vec = (float *)ff_aac_codebook_vectors[cb-1];
586 abs_pow34_v(s->scoefs, in, size);
588 for (i = 0; i < size; i += 4) {
589 int curidx1, curidx2, sign1, count1, sign2, count2;
590 int *in_int = (int *)&in[i];
592 unsigned int v_codes;
593 int t0, t1, t2, t3, t4;
594 const float *vec1, *vec2;
596 qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
597 qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
598 qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
599 qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
603 ".set noreorder \n\t"
605 "ori %[t4], $zero, 12 \n\t"
606 "ori %[sign1], $zero, 0 \n\t"
607 "ori %[sign2], $zero, 0 \n\t"
608 "slt %[t0], %[t4], %[qc1] \n\t"
609 "slt %[t1], %[t4], %[qc2] \n\t"
610 "slt %[t2], %[t4], %[qc3] \n\t"
611 "slt %[t3], %[t4], %[qc4] \n\t"
612 "movn %[qc1], %[t4], %[t0] \n\t"
613 "movn %[qc2], %[t4], %[t1] \n\t"
614 "movn %[qc3], %[t4], %[t2] \n\t"
615 "movn %[qc4], %[t4], %[t3] \n\t"
616 "lw %[t0], 0(%[in_int]) \n\t"
617 "lw %[t1], 4(%[in_int]) \n\t"
618 "lw %[t2], 8(%[in_int]) \n\t"
619 "lw %[t3], 12(%[in_int]) \n\t"
620 "slt %[t0], %[t0], $zero \n\t"
621 "movn %[sign1], %[t0], %[qc1] \n\t"
622 "slt %[t2], %[t2], $zero \n\t"
623 "movn %[sign2], %[t2], %[qc3] \n\t"
624 "slt %[t1], %[t1], $zero \n\t"
625 "sll %[t0], %[sign1], 1 \n\t"
626 "or %[t0], %[t0], %[t1] \n\t"
627 "movn %[sign1], %[t0], %[qc2] \n\t"
628 "slt %[t3], %[t3], $zero \n\t"
629 "sll %[t0], %[sign2], 1 \n\t"
630 "or %[t0], %[t0], %[t3] \n\t"
631 "movn %[sign2], %[t0], %[qc4] \n\t"
632 "slt %[count1], $zero, %[qc1] \n\t"
633 "slt %[t1], $zero, %[qc2] \n\t"
634 "slt %[count2], $zero, %[qc3] \n\t"
635 "slt %[t2], $zero, %[qc4] \n\t"
636 "addu %[count1], %[count1], %[t1] \n\t"
637 "addu %[count2], %[count2], %[t2] \n\t"
641 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
642 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
643 [sign1]"=&r"(sign1), [count1]"=&r"(count1),
644 [sign2]"=&r"(sign2), [count2]"=&r"(count2),
645 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
647 : [in_int]"r"(in_int)
654 v_codes = (p_codes[curidx1] << count1) | sign1;
655 v_bits = p_bits[curidx1] + count1;
656 put_bits(pb, v_bits, v_codes);
661 v_codes = (p_codes[curidx2] << count2) | sign2;
662 v_bits = p_bits[curidx2] + count2;
663 put_bits(pb, v_bits, v_codes);
667 vec1 = &p_vec[curidx1*2];
668 vec2 = &p_vec[curidx2*2];
669 e1 = copysignf(vec1[0] * IQ, in[i+0]);
670 e2 = copysignf(vec1[1] * IQ, in[i+1]);
671 e3 = copysignf(vec2[0] * IQ, in[i+2]);
672 e4 = copysignf(vec2[1] * IQ, in[i+3]);
680 qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
687 static void quantize_and_encode_band_cost_ESC_mips(struct AACEncContext *s,
688 PutBitContext *pb, const float *in, float *out,
689 const float *scaled, int size, int scale_idx,
690 int cb, const float lambda, const float uplim,
691 int *bits, float *energy, const float ROUNDING)
693 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
694 const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
696 int qc1, qc2, qc3, qc4;
697 float qenergy = 0.0f;
699 uint8_t *p_bits = (uint8_t* )ff_aac_spectral_bits[cb-1];
700 uint16_t *p_codes = (uint16_t*)ff_aac_spectral_codes[cb-1];
701 float *p_vectors = (float* )ff_aac_codebook_vectors[cb-1];
703 abs_pow34_v(s->scoefs, in, size);
707 for (i = 0; i < size; i += 4) {
708 int curidx, curidx2, sign1, count1, sign2, count2;
709 int *in_int = (int *)&in[i];
711 unsigned int v_codes;
712 int t0, t1, t2, t3, t4;
713 const float *vec1, *vec2;
715 qc1 = scaled[i ] * Q34 + ROUNDING;
716 qc2 = scaled[i+1] * Q34 + ROUNDING;
717 qc3 = scaled[i+2] * Q34 + ROUNDING;
718 qc4 = scaled[i+3] * Q34 + ROUNDING;
722 ".set noreorder \n\t"
724 "ori %[t4], $zero, 16 \n\t"
725 "ori %[sign1], $zero, 0 \n\t"
726 "ori %[sign2], $zero, 0 \n\t"
727 "slt %[t0], %[t4], %[qc1] \n\t"
728 "slt %[t1], %[t4], %[qc2] \n\t"
729 "slt %[t2], %[t4], %[qc3] \n\t"
730 "slt %[t3], %[t4], %[qc4] \n\t"
731 "movn %[qc1], %[t4], %[t0] \n\t"
732 "movn %[qc2], %[t4], %[t1] \n\t"
733 "movn %[qc3], %[t4], %[t2] \n\t"
734 "movn %[qc4], %[t4], %[t3] \n\t"
735 "lw %[t0], 0(%[in_int]) \n\t"
736 "lw %[t1], 4(%[in_int]) \n\t"
737 "lw %[t2], 8(%[in_int]) \n\t"
738 "lw %[t3], 12(%[in_int]) \n\t"
739 "slt %[t0], %[t0], $zero \n\t"
740 "movn %[sign1], %[t0], %[qc1] \n\t"
741 "slt %[t2], %[t2], $zero \n\t"
742 "movn %[sign2], %[t2], %[qc3] \n\t"
743 "slt %[t1], %[t1], $zero \n\t"
744 "sll %[t0], %[sign1], 1 \n\t"
745 "or %[t0], %[t0], %[t1] \n\t"
746 "movn %[sign1], %[t0], %[qc2] \n\t"
747 "slt %[t3], %[t3], $zero \n\t"
748 "sll %[t0], %[sign2], 1 \n\t"
749 "or %[t0], %[t0], %[t3] \n\t"
750 "movn %[sign2], %[t0], %[qc4] \n\t"
751 "slt %[count1], $zero, %[qc1] \n\t"
752 "slt %[t1], $zero, %[qc2] \n\t"
753 "slt %[count2], $zero, %[qc3] \n\t"
754 "slt %[t2], $zero, %[qc4] \n\t"
755 "addu %[count1], %[count1], %[t1] \n\t"
756 "addu %[count2], %[count2], %[t2] \n\t"
760 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
761 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
762 [sign1]"=&r"(sign1), [count1]"=&r"(count1),
763 [sign2]"=&r"(sign2), [count2]"=&r"(count2),
764 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
766 : [in_int]"r"(in_int)
775 v_codes = (p_codes[curidx] << count1) | sign1;
776 v_bits = p_bits[curidx] + count1;
777 put_bits(pb, v_bits, v_codes);
779 v_codes = (p_codes[curidx2] << count2) | sign2;
780 v_bits = p_bits[curidx2] + count2;
781 put_bits(pb, v_bits, v_codes);
785 vec1 = &p_vectors[curidx*2 ];
786 vec2 = &p_vectors[curidx2*2];
787 e1 = copysignf(vec1[0] * IQ, in[i+0]);
788 e2 = copysignf(vec1[1] * IQ, in[i+1]);
789 e3 = copysignf(vec2[0] * IQ, in[i+2]);
790 e4 = copysignf(vec2[1] * IQ, in[i+3]);
798 qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
802 for (i = 0; i < size; i += 4) {
803 int curidx, curidx2, sign1, count1, sign2, count2;
804 int *in_int = (int *)&in[i];
806 unsigned int v_codes;
808 int t0, t1, t2, t3, t4;
810 qc1 = scaled[i ] * Q34 + ROUNDING;
811 qc2 = scaled[i+1] * Q34 + ROUNDING;
812 qc3 = scaled[i+2] * Q34 + ROUNDING;
813 qc4 = scaled[i+3] * Q34 + ROUNDING;
817 ".set noreorder \n\t"
819 "ori %[t4], $zero, 16 \n\t"
820 "ori %[sign1], $zero, 0 \n\t"
821 "ori %[sign2], $zero, 0 \n\t"
822 "shll_s.w %[c1], %[qc1], 18 \n\t"
823 "shll_s.w %[c2], %[qc2], 18 \n\t"
824 "shll_s.w %[c3], %[qc3], 18 \n\t"
825 "shll_s.w %[c4], %[qc4], 18 \n\t"
826 "srl %[c1], %[c1], 18 \n\t"
827 "srl %[c2], %[c2], 18 \n\t"
828 "srl %[c3], %[c3], 18 \n\t"
829 "srl %[c4], %[c4], 18 \n\t"
830 "slt %[t0], %[t4], %[qc1] \n\t"
831 "slt %[t1], %[t4], %[qc2] \n\t"
832 "slt %[t2], %[t4], %[qc3] \n\t"
833 "slt %[t3], %[t4], %[qc4] \n\t"
834 "movn %[qc1], %[t4], %[t0] \n\t"
835 "movn %[qc2], %[t4], %[t1] \n\t"
836 "movn %[qc3], %[t4], %[t2] \n\t"
837 "movn %[qc4], %[t4], %[t3] \n\t"
838 "lw %[t0], 0(%[in_int]) \n\t"
839 "lw %[t1], 4(%[in_int]) \n\t"
840 "lw %[t2], 8(%[in_int]) \n\t"
841 "lw %[t3], 12(%[in_int]) \n\t"
842 "slt %[t0], %[t0], $zero \n\t"
843 "movn %[sign1], %[t0], %[qc1] \n\t"
844 "slt %[t2], %[t2], $zero \n\t"
845 "movn %[sign2], %[t2], %[qc3] \n\t"
846 "slt %[t1], %[t1], $zero \n\t"
847 "sll %[t0], %[sign1], 1 \n\t"
848 "or %[t0], %[t0], %[t1] \n\t"
849 "movn %[sign1], %[t0], %[qc2] \n\t"
850 "slt %[t3], %[t3], $zero \n\t"
851 "sll %[t0], %[sign2], 1 \n\t"
852 "or %[t0], %[t0], %[t3] \n\t"
853 "movn %[sign2], %[t0], %[qc4] \n\t"
854 "slt %[count1], $zero, %[qc1] \n\t"
855 "slt %[t1], $zero, %[qc2] \n\t"
856 "slt %[count2], $zero, %[qc3] \n\t"
857 "slt %[t2], $zero, %[qc4] \n\t"
858 "addu %[count1], %[count1], %[t1] \n\t"
859 "addu %[count2], %[count2], %[t2] \n\t"
863 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
864 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
865 [sign1]"=&r"(sign1), [count1]"=&r"(count1),
866 [sign2]"=&r"(sign2), [count2]"=&r"(count2),
867 [c1]"=&r"(c1), [c2]"=&r"(c2),
868 [c3]"=&r"(c3), [c4]"=&r"(c4),
869 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
871 : [in_int]"r"(in_int)
881 v_codes = (p_codes[curidx] << count1) | sign1;
882 v_bits = p_bits[curidx] + count1;
883 put_bits(pb, v_bits, v_codes);
885 if (p_vectors[curidx*2 ] == 64.0f) {
886 int len = av_log2(c1);
887 v_codes = (((1 << (len - 3)) - 2) << len) | (c1 & ((1 << len) - 1));
888 put_bits(pb, len * 2 - 3, v_codes);
890 if (p_vectors[curidx*2+1] == 64.0f) {
891 int len = av_log2(c2);
892 v_codes = (((1 << (len - 3)) - 2) << len) | (c2 & ((1 << len) - 1));
893 put_bits(pb, len*2-3, v_codes);
896 v_codes = (p_codes[curidx2] << count2) | sign2;
897 v_bits = p_bits[curidx2] + count2;
898 put_bits(pb, v_bits, v_codes);
900 if (p_vectors[curidx2*2 ] == 64.0f) {
901 int len = av_log2(c3);
902 v_codes = (((1 << (len - 3)) - 2) << len) | (c3 & ((1 << len) - 1));
903 put_bits(pb, len* 2 - 3, v_codes);
905 if (p_vectors[curidx2*2+1] == 64.0f) {
906 int len = av_log2(c4);
907 v_codes = (((1 << (len - 3)) - 2) << len) | (c4 & ((1 << len) - 1));
908 put_bits(pb, len * 2 - 3, v_codes);
912 float e1, e2, e3, e4;
913 e1 = copysignf(c1 * cbrtf(c1) * IQ, in[i+0]);
914 e2 = copysignf(c2 * cbrtf(c2) * IQ, in[i+1]);
915 e3 = copysignf(c3 * cbrtf(c3) * IQ, in[i+2]);
916 e4 = copysignf(c4 * cbrtf(c4) * IQ, in[i+3]);
924 qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
932 static void quantize_and_encode_band_cost_NONE_mips(struct AACEncContext *s,
933 PutBitContext *pb, const float *in, float *out,
934 const float *scaled, int size, int scale_idx,
935 int cb, const float lambda, const float uplim,
936 int *bits, float *energy, const float ROUNDING) {
940 static void quantize_and_encode_band_cost_ZERO_mips(struct AACEncContext *s,
941 PutBitContext *pb, const float *in, float *out,
942 const float *scaled, int size, int scale_idx,
943 int cb, const float lambda, const float uplim,
944 int *bits, float *energy, const float ROUNDING) {
949 for (i = 0; i < size; i += 4) {
960 static void (*const quantize_and_encode_band_cost_arr[])(struct AACEncContext *s,
961 PutBitContext *pb, const float *in, float *out,
962 const float *scaled, int size, int scale_idx,
963 int cb, const float lambda, const float uplim,
964 int *bits, float *energy, const float ROUNDING) = {
965 quantize_and_encode_band_cost_ZERO_mips,
966 quantize_and_encode_band_cost_SQUAD_mips,
967 quantize_and_encode_band_cost_SQUAD_mips,
968 quantize_and_encode_band_cost_UQUAD_mips,
969 quantize_and_encode_band_cost_UQUAD_mips,
970 quantize_and_encode_band_cost_SPAIR_mips,
971 quantize_and_encode_band_cost_SPAIR_mips,
972 quantize_and_encode_band_cost_UPAIR7_mips,
973 quantize_and_encode_band_cost_UPAIR7_mips,
974 quantize_and_encode_band_cost_UPAIR12_mips,
975 quantize_and_encode_band_cost_UPAIR12_mips,
976 quantize_and_encode_band_cost_ESC_mips,
977 quantize_and_encode_band_cost_NONE_mips, /* cb 12 doesn't exist */
978 quantize_and_encode_band_cost_ZERO_mips,
979 quantize_and_encode_band_cost_ZERO_mips,
980 quantize_and_encode_band_cost_ZERO_mips,
983 #define quantize_and_encode_band_cost( \
984 s, pb, in, out, scaled, size, scale_idx, cb, \
985 lambda, uplim, bits, energy, ROUNDING) \
986 quantize_and_encode_band_cost_arr[cb]( \
987 s, pb, in, out, scaled, size, scale_idx, cb, \
988 lambda, uplim, bits, energy, ROUNDING)
990 static void quantize_and_encode_band_mips(struct AACEncContext *s, PutBitContext *pb,
991 const float *in, float *out, int size, int scale_idx,
992 int cb, const float lambda, int rtz)
994 quantize_and_encode_band_cost(s, pb, in, out, NULL, size, scale_idx, cb, lambda,
995 INFINITY, NULL, NULL, (rtz) ? ROUND_TO_ZERO : ROUND_STANDARD);
999 * Functions developed from template function and optimized for getting the number of bits
1001 static float get_band_numbits_ZERO_mips(struct AACEncContext *s,
1002 PutBitContext *pb, const float *in,
1003 const float *scaled, int size, int scale_idx,
1004 int cb, const float lambda, const float uplim,
1010 static float get_band_numbits_NONE_mips(struct AACEncContext *s,
1011 PutBitContext *pb, const float *in,
1012 const float *scaled, int size, int scale_idx,
1013 int cb, const float lambda, const float uplim,
1020 static float get_band_numbits_SQUAD_mips(struct AACEncContext *s,
1021 PutBitContext *pb, const float *in,
1022 const float *scaled, int size, int scale_idx,
1023 int cb, const float lambda, const float uplim,
1026 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1028 int qc1, qc2, qc3, qc4;
1031 uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
1033 for (i = 0; i < size; i += 4) {
1035 int *in_int = (int *)&in[i];
1036 int t0, t1, t2, t3, t4, t5, t6, t7;
1038 qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
1039 qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1040 qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1041 qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1045 ".set noreorder \n\t"
1047 "slt %[qc1], $zero, %[qc1] \n\t"
1048 "slt %[qc2], $zero, %[qc2] \n\t"
1049 "slt %[qc3], $zero, %[qc3] \n\t"
1050 "slt %[qc4], $zero, %[qc4] \n\t"
1051 "lw %[t0], 0(%[in_int]) \n\t"
1052 "lw %[t1], 4(%[in_int]) \n\t"
1053 "lw %[t2], 8(%[in_int]) \n\t"
1054 "lw %[t3], 12(%[in_int]) \n\t"
1055 "srl %[t0], %[t0], 31 \n\t"
1056 "srl %[t1], %[t1], 31 \n\t"
1057 "srl %[t2], %[t2], 31 \n\t"
1058 "srl %[t3], %[t3], 31 \n\t"
1059 "subu %[t4], $zero, %[qc1] \n\t"
1060 "subu %[t5], $zero, %[qc2] \n\t"
1061 "subu %[t6], $zero, %[qc3] \n\t"
1062 "subu %[t7], $zero, %[qc4] \n\t"
1063 "movn %[qc1], %[t4], %[t0] \n\t"
1064 "movn %[qc2], %[t5], %[t1] \n\t"
1065 "movn %[qc3], %[t6], %[t2] \n\t"
1066 "movn %[qc4], %[t7], %[t3] \n\t"
1070 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1071 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1072 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1073 [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
1074 : [in_int]"r"(in_int)
1087 curbits += p_bits[curidx];
1092 static float get_band_numbits_UQUAD_mips(struct AACEncContext *s,
1093 PutBitContext *pb, const float *in,
1094 const float *scaled, int size, int scale_idx,
1095 int cb, const float lambda, const float uplim,
1098 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1101 int qc1, qc2, qc3, qc4;
1103 uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
1105 for (i = 0; i < size; i += 4) {
1107 int t0, t1, t2, t3, t4;
1109 qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
1110 qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1111 qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1112 qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1116 ".set noreorder \n\t"
1118 "ori %[t4], $zero, 2 \n\t"
1119 "slt %[t0], %[t4], %[qc1] \n\t"
1120 "slt %[t1], %[t4], %[qc2] \n\t"
1121 "slt %[t2], %[t4], %[qc3] \n\t"
1122 "slt %[t3], %[t4], %[qc4] \n\t"
1123 "movn %[qc1], %[t4], %[t0] \n\t"
1124 "movn %[qc2], %[t4], %[t1] \n\t"
1125 "movn %[qc3], %[t4], %[t2] \n\t"
1126 "movn %[qc4], %[t4], %[t3] \n\t"
1130 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1131 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1132 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1144 curbits += p_bits[curidx];
1145 curbits += uquad_sign_bits[curidx];
1150 static float get_band_numbits_SPAIR_mips(struct AACEncContext *s,
1151 PutBitContext *pb, const float *in,
1152 const float *scaled, int size, int scale_idx,
1153 int cb, const float lambda, const float uplim,
1156 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1158 int qc1, qc2, qc3, qc4;
1161 uint8_t *p_bits = (uint8_t*)ff_aac_spectral_bits[cb-1];
1163 for (i = 0; i < size; i += 4) {
1164 int curidx, curidx2;
1165 int *in_int = (int *)&in[i];
1166 int t0, t1, t2, t3, t4, t5, t6, t7;
1168 qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
1169 qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1170 qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1171 qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1175 ".set noreorder \n\t"
1177 "ori %[t4], $zero, 4 \n\t"
1178 "slt %[t0], %[t4], %[qc1] \n\t"
1179 "slt %[t1], %[t4], %[qc2] \n\t"
1180 "slt %[t2], %[t4], %[qc3] \n\t"
1181 "slt %[t3], %[t4], %[qc4] \n\t"
1182 "movn %[qc1], %[t4], %[t0] \n\t"
1183 "movn %[qc2], %[t4], %[t1] \n\t"
1184 "movn %[qc3], %[t4], %[t2] \n\t"
1185 "movn %[qc4], %[t4], %[t3] \n\t"
1186 "lw %[t0], 0(%[in_int]) \n\t"
1187 "lw %[t1], 4(%[in_int]) \n\t"
1188 "lw %[t2], 8(%[in_int]) \n\t"
1189 "lw %[t3], 12(%[in_int]) \n\t"
1190 "srl %[t0], %[t0], 31 \n\t"
1191 "srl %[t1], %[t1], 31 \n\t"
1192 "srl %[t2], %[t2], 31 \n\t"
1193 "srl %[t3], %[t3], 31 \n\t"
1194 "subu %[t4], $zero, %[qc1] \n\t"
1195 "subu %[t5], $zero, %[qc2] \n\t"
1196 "subu %[t6], $zero, %[qc3] \n\t"
1197 "subu %[t7], $zero, %[qc4] \n\t"
1198 "movn %[qc1], %[t4], %[t0] \n\t"
1199 "movn %[qc2], %[t5], %[t1] \n\t"
1200 "movn %[qc3], %[t6], %[t2] \n\t"
1201 "movn %[qc4], %[t7], %[t3] \n\t"
1205 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1206 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1207 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1208 [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
1209 : [in_int]"r"(in_int)
1217 curidx2 += qc4 + 40;
1219 curbits += p_bits[curidx] + p_bits[curidx2];
1224 static float get_band_numbits_UPAIR7_mips(struct AACEncContext *s,
1225 PutBitContext *pb, const float *in,
1226 const float *scaled, int size, int scale_idx,
1227 int cb, const float lambda, const float uplim,
1230 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1232 int qc1, qc2, qc3, qc4;
1235 uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
1237 for (i = 0; i < size; i += 4) {
1238 int curidx, curidx2;
1239 int t0, t1, t2, t3, t4;
1241 qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
1242 qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1243 qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1244 qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1248 ".set noreorder \n\t"
1250 "ori %[t4], $zero, 7 \n\t"
1251 "slt %[t0], %[t4], %[qc1] \n\t"
1252 "slt %[t1], %[t4], %[qc2] \n\t"
1253 "slt %[t2], %[t4], %[qc3] \n\t"
1254 "slt %[t3], %[t4], %[qc4] \n\t"
1255 "movn %[qc1], %[t4], %[t0] \n\t"
1256 "movn %[qc2], %[t4], %[t1] \n\t"
1257 "movn %[qc3], %[t4], %[t2] \n\t"
1258 "movn %[qc4], %[t4], %[t3] \n\t"
1262 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1263 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1264 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1274 curbits += p_bits[curidx] +
1275 upair7_sign_bits[curidx] +
1277 upair7_sign_bits[curidx2];
1282 static float get_band_numbits_UPAIR12_mips(struct AACEncContext *s,
1283 PutBitContext *pb, const float *in,
1284 const float *scaled, int size, int scale_idx,
1285 int cb, const float lambda, const float uplim,
1288 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1290 int qc1, qc2, qc3, qc4;
1293 uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
1295 for (i = 0; i < size; i += 4) {
1296 int curidx, curidx2;
1297 int t0, t1, t2, t3, t4;
1299 qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
1300 qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1301 qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1302 qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1306 ".set noreorder \n\t"
1308 "ori %[t4], $zero, 12 \n\t"
1309 "slt %[t0], %[t4], %[qc1] \n\t"
1310 "slt %[t1], %[t4], %[qc2] \n\t"
1311 "slt %[t2], %[t4], %[qc3] \n\t"
1312 "slt %[t3], %[t4], %[qc4] \n\t"
1313 "movn %[qc1], %[t4], %[t0] \n\t"
1314 "movn %[qc2], %[t4], %[t1] \n\t"
1315 "movn %[qc3], %[t4], %[t2] \n\t"
1316 "movn %[qc4], %[t4], %[t3] \n\t"
1320 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1321 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1322 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1332 curbits += p_bits[curidx] +
1334 upair12_sign_bits[curidx] +
1335 upair12_sign_bits[curidx2];
1340 static float get_band_numbits_ESC_mips(struct AACEncContext *s,
1341 PutBitContext *pb, const float *in,
1342 const float *scaled, int size, int scale_idx,
1343 int cb, const float lambda, const float uplim,
1346 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1348 int qc1, qc2, qc3, qc4;
1351 uint8_t *p_bits = (uint8_t*)ff_aac_spectral_bits[cb-1];
1353 for (i = 0; i < size; i += 4) {
1354 int curidx, curidx2;
1355 int cond0, cond1, cond2, cond3;
1359 qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
1360 qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1361 qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1362 qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1366 ".set noreorder \n\t"
1368 "ori %[t4], $zero, 15 \n\t"
1369 "ori %[t5], $zero, 16 \n\t"
1370 "shll_s.w %[c1], %[qc1], 18 \n\t"
1371 "shll_s.w %[c2], %[qc2], 18 \n\t"
1372 "shll_s.w %[c3], %[qc3], 18 \n\t"
1373 "shll_s.w %[c4], %[qc4], 18 \n\t"
1374 "srl %[c1], %[c1], 18 \n\t"
1375 "srl %[c2], %[c2], 18 \n\t"
1376 "srl %[c3], %[c3], 18 \n\t"
1377 "srl %[c4], %[c4], 18 \n\t"
1378 "slt %[cond0], %[t4], %[qc1] \n\t"
1379 "slt %[cond1], %[t4], %[qc2] \n\t"
1380 "slt %[cond2], %[t4], %[qc3] \n\t"
1381 "slt %[cond3], %[t4], %[qc4] \n\t"
1382 "movn %[qc1], %[t5], %[cond0] \n\t"
1383 "movn %[qc2], %[t5], %[cond1] \n\t"
1384 "movn %[qc3], %[t5], %[cond2] \n\t"
1385 "movn %[qc4], %[t5], %[cond3] \n\t"
1386 "ori %[t5], $zero, 31 \n\t"
1387 "clz %[c1], %[c1] \n\t"
1388 "clz %[c2], %[c2] \n\t"
1389 "clz %[c3], %[c3] \n\t"
1390 "clz %[c4], %[c4] \n\t"
1391 "subu %[c1], %[t5], %[c1] \n\t"
1392 "subu %[c2], %[t5], %[c2] \n\t"
1393 "subu %[c3], %[t5], %[c3] \n\t"
1394 "subu %[c4], %[t5], %[c4] \n\t"
1395 "sll %[c1], %[c1], 1 \n\t"
1396 "sll %[c2], %[c2], 1 \n\t"
1397 "sll %[c3], %[c3], 1 \n\t"
1398 "sll %[c4], %[c4], 1 \n\t"
1399 "addiu %[c1], %[c1], -3 \n\t"
1400 "addiu %[c2], %[c2], -3 \n\t"
1401 "addiu %[c3], %[c3], -3 \n\t"
1402 "addiu %[c4], %[c4], -3 \n\t"
1403 "subu %[cond0], $zero, %[cond0] \n\t"
1404 "subu %[cond1], $zero, %[cond1] \n\t"
1405 "subu %[cond2], $zero, %[cond2] \n\t"
1406 "subu %[cond3], $zero, %[cond3] \n\t"
1407 "and %[c1], %[c1], %[cond0] \n\t"
1408 "and %[c2], %[c2], %[cond1] \n\t"
1409 "and %[c3], %[c3], %[cond2] \n\t"
1410 "and %[c4], %[c4], %[cond3] \n\t"
1414 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1415 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1416 [cond0]"=&r"(cond0), [cond1]"=&r"(cond1),
1417 [cond2]"=&r"(cond2), [cond3]"=&r"(cond3),
1418 [c1]"=&r"(c1), [c2]"=&r"(c2),
1419 [c3]"=&r"(c3), [c4]"=&r"(c4),
1420 [t4]"=&r"(t4), [t5]"=&r"(t5)
1429 curbits += p_bits[curidx];
1430 curbits += esc_sign_bits[curidx];
1431 curbits += p_bits[curidx2];
1432 curbits += esc_sign_bits[curidx2];
1442 static float (*const get_band_numbits_arr[])(struct AACEncContext *s,
1443 PutBitContext *pb, const float *in,
1444 const float *scaled, int size, int scale_idx,
1445 int cb, const float lambda, const float uplim,
1447 get_band_numbits_ZERO_mips,
1448 get_band_numbits_SQUAD_mips,
1449 get_band_numbits_SQUAD_mips,
1450 get_band_numbits_UQUAD_mips,
1451 get_band_numbits_UQUAD_mips,
1452 get_band_numbits_SPAIR_mips,
1453 get_band_numbits_SPAIR_mips,
1454 get_band_numbits_UPAIR7_mips,
1455 get_band_numbits_UPAIR7_mips,
1456 get_band_numbits_UPAIR12_mips,
1457 get_band_numbits_UPAIR12_mips,
1458 get_band_numbits_ESC_mips,
1459 get_band_numbits_NONE_mips, /* cb 12 doesn't exist */
1460 get_band_numbits_ZERO_mips,
1461 get_band_numbits_ZERO_mips,
1462 get_band_numbits_ZERO_mips,
1465 #define get_band_numbits( \
1466 s, pb, in, scaled, size, scale_idx, cb, \
1467 lambda, uplim, bits) \
1468 get_band_numbits_arr[cb]( \
1469 s, pb, in, scaled, size, scale_idx, cb, \
1470 lambda, uplim, bits)
1472 static float quantize_band_cost_bits(struct AACEncContext *s, const float *in,
1473 const float *scaled, int size, int scale_idx,
1474 int cb, const float lambda, const float uplim,
1475 int *bits, float *energy, int rtz)
1477 return get_band_numbits(s, NULL, in, scaled, size, scale_idx, cb, lambda, uplim, bits);
1481 * Functions developed from template function and optimized for getting the band cost
1484 static float get_band_cost_ZERO_mips(struct AACEncContext *s,
1485 PutBitContext *pb, const float *in,
1486 const float *scaled, int size, int scale_idx,
1487 int cb, const float lambda, const float uplim,
1488 int *bits, float *energy)
1493 for (i = 0; i < size; i += 4) {
1494 cost += in[i ] * in[i ];
1495 cost += in[i+1] * in[i+1];
1496 cost += in[i+2] * in[i+2];
1497 cost += in[i+3] * in[i+3];
1503 return cost * lambda;
1506 static float get_band_cost_NONE_mips(struct AACEncContext *s,
1507 PutBitContext *pb, const float *in,
1508 const float *scaled, int size, int scale_idx,
1509 int cb, const float lambda, const float uplim,
1510 int *bits, float *energy)
1516 static float get_band_cost_SQUAD_mips(struct AACEncContext *s,
1517 PutBitContext *pb, const float *in,
1518 const float *scaled, int size, int scale_idx,
1519 int cb, const float lambda, const float uplim,
1520 int *bits, float *energy)
1522 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1523 const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
1526 float qenergy = 0.0f;
1527 int qc1, qc2, qc3, qc4;
1530 uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
1531 float *p_codes = (float *)ff_aac_codebook_vectors[cb-1];
1533 for (i = 0; i < size; i += 4) {
1536 int *in_int = (int *)&in[i];
1537 float *in_pos = (float *)&in[i];
1538 float di0, di1, di2, di3;
1539 int t0, t1, t2, t3, t4, t5, t6, t7;
1541 qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
1542 qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1543 qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1544 qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1548 ".set noreorder \n\t"
1550 "slt %[qc1], $zero, %[qc1] \n\t"
1551 "slt %[qc2], $zero, %[qc2] \n\t"
1552 "slt %[qc3], $zero, %[qc3] \n\t"
1553 "slt %[qc4], $zero, %[qc4] \n\t"
1554 "lw %[t0], 0(%[in_int]) \n\t"
1555 "lw %[t1], 4(%[in_int]) \n\t"
1556 "lw %[t2], 8(%[in_int]) \n\t"
1557 "lw %[t3], 12(%[in_int]) \n\t"
1558 "srl %[t0], %[t0], 31 \n\t"
1559 "srl %[t1], %[t1], 31 \n\t"
1560 "srl %[t2], %[t2], 31 \n\t"
1561 "srl %[t3], %[t3], 31 \n\t"
1562 "subu %[t4], $zero, %[qc1] \n\t"
1563 "subu %[t5], $zero, %[qc2] \n\t"
1564 "subu %[t6], $zero, %[qc3] \n\t"
1565 "subu %[t7], $zero, %[qc4] \n\t"
1566 "movn %[qc1], %[t4], %[t0] \n\t"
1567 "movn %[qc2], %[t5], %[t1] \n\t"
1568 "movn %[qc3], %[t6], %[t2] \n\t"
1569 "movn %[qc4], %[t7], %[t3] \n\t"
1573 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1574 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1575 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1576 [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
1577 : [in_int]"r"(in_int)
1590 curbits += p_bits[curidx];
1591 vec = &p_codes[curidx*4];
1593 qenergy += vec[0]*vec[0] + vec[1]*vec[1]
1594 + vec[2]*vec[2] + vec[3]*vec[3];
1598 ".set noreorder \n\t"
1600 "lwc1 $f0, 0(%[in_pos]) \n\t"
1601 "lwc1 $f1, 0(%[vec]) \n\t"
1602 "lwc1 $f2, 4(%[in_pos]) \n\t"
1603 "lwc1 $f3, 4(%[vec]) \n\t"
1604 "lwc1 $f4, 8(%[in_pos]) \n\t"
1605 "lwc1 $f5, 8(%[vec]) \n\t"
1606 "lwc1 $f6, 12(%[in_pos]) \n\t"
1607 "lwc1 $f7, 12(%[vec]) \n\t"
1608 "nmsub.s %[di0], $f0, $f1, %[IQ] \n\t"
1609 "nmsub.s %[di1], $f2, $f3, %[IQ] \n\t"
1610 "nmsub.s %[di2], $f4, $f5, %[IQ] \n\t"
1611 "nmsub.s %[di3], $f6, $f7, %[IQ] \n\t"
1615 : [di0]"=&f"(di0), [di1]"=&f"(di1),
1616 [di2]"=&f"(di2), [di3]"=&f"(di3)
1617 : [in_pos]"r"(in_pos), [vec]"r"(vec),
1619 : "$f0", "$f1", "$f2", "$f3",
1620 "$f4", "$f5", "$f6", "$f7",
1624 cost += di0 * di0 + di1 * di1
1625 + di2 * di2 + di3 * di3;
1631 *energy = qenergy * (IQ*IQ);
1632 return cost * lambda + curbits;
1635 static float get_band_cost_UQUAD_mips(struct AACEncContext *s,
1636 PutBitContext *pb, const float *in,
1637 const float *scaled, int size, int scale_idx,
1638 int cb, const float lambda, const float uplim,
1639 int *bits, float *energy)
1641 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1642 const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
1645 float qenergy = 0.0f;
1647 int qc1, qc2, qc3, qc4;
1649 uint8_t *p_bits = (uint8_t*)ff_aac_spectral_bits[cb-1];
1650 float *p_codes = (float *)ff_aac_codebook_vectors[cb-1];
1652 for (i = 0; i < size; i += 4) {
1655 float *in_pos = (float *)&in[i];
1656 float di0, di1, di2, di3;
1657 int t0, t1, t2, t3, t4;
1659 qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
1660 qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1661 qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1662 qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1666 ".set noreorder \n\t"
1668 "ori %[t4], $zero, 2 \n\t"
1669 "slt %[t0], %[t4], %[qc1] \n\t"
1670 "slt %[t1], %[t4], %[qc2] \n\t"
1671 "slt %[t2], %[t4], %[qc3] \n\t"
1672 "slt %[t3], %[t4], %[qc4] \n\t"
1673 "movn %[qc1], %[t4], %[t0] \n\t"
1674 "movn %[qc2], %[t4], %[t1] \n\t"
1675 "movn %[qc3], %[t4], %[t2] \n\t"
1676 "movn %[qc4], %[t4], %[t3] \n\t"
1680 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1681 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1682 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1694 curbits += p_bits[curidx];
1695 curbits += uquad_sign_bits[curidx];
1696 vec = &p_codes[curidx*4];
1698 qenergy += vec[0]*vec[0] + vec[1]*vec[1]
1699 + vec[2]*vec[2] + vec[3]*vec[3];
1703 ".set noreorder \n\t"
1705 "lwc1 %[di0], 0(%[in_pos]) \n\t"
1706 "lwc1 %[di1], 4(%[in_pos]) \n\t"
1707 "lwc1 %[di2], 8(%[in_pos]) \n\t"
1708 "lwc1 %[di3], 12(%[in_pos]) \n\t"
1709 "abs.s %[di0], %[di0] \n\t"
1710 "abs.s %[di1], %[di1] \n\t"
1711 "abs.s %[di2], %[di2] \n\t"
1712 "abs.s %[di3], %[di3] \n\t"
1713 "lwc1 $f0, 0(%[vec]) \n\t"
1714 "lwc1 $f1, 4(%[vec]) \n\t"
1715 "lwc1 $f2, 8(%[vec]) \n\t"
1716 "lwc1 $f3, 12(%[vec]) \n\t"
1717 "nmsub.s %[di0], %[di0], $f0, %[IQ] \n\t"
1718 "nmsub.s %[di1], %[di1], $f1, %[IQ] \n\t"
1719 "nmsub.s %[di2], %[di2], $f2, %[IQ] \n\t"
1720 "nmsub.s %[di3], %[di3], $f3, %[IQ] \n\t"
1724 : [di0]"=&f"(di0), [di1]"=&f"(di1),
1725 [di2]"=&f"(di2), [di3]"=&f"(di3)
1726 : [in_pos]"r"(in_pos), [vec]"r"(vec),
1728 : "$f0", "$f1", "$f2", "$f3",
1732 cost += di0 * di0 + di1 * di1
1733 + di2 * di2 + di3 * di3;
1739 *energy = qenergy * (IQ*IQ);
1740 return cost * lambda + curbits;
1743 static float get_band_cost_SPAIR_mips(struct AACEncContext *s,
1744 PutBitContext *pb, const float *in,
1745 const float *scaled, int size, int scale_idx,
1746 int cb, const float lambda, const float uplim,
1747 int *bits, float *energy)
1749 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1750 const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
1753 float qenergy = 0.0f;
1754 int qc1, qc2, qc3, qc4;
1757 uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
1758 float *p_codes = (float *)ff_aac_codebook_vectors[cb-1];
1760 for (i = 0; i < size; i += 4) {
1761 const float *vec, *vec2;
1762 int curidx, curidx2;
1763 int *in_int = (int *)&in[i];
1764 float *in_pos = (float *)&in[i];
1765 float di0, di1, di2, di3;
1766 int t0, t1, t2, t3, t4, t5, t6, t7;
1768 qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
1769 qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1770 qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1771 qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1775 ".set noreorder \n\t"
1777 "ori %[t4], $zero, 4 \n\t"
1778 "slt %[t0], %[t4], %[qc1] \n\t"
1779 "slt %[t1], %[t4], %[qc2] \n\t"
1780 "slt %[t2], %[t4], %[qc3] \n\t"
1781 "slt %[t3], %[t4], %[qc4] \n\t"
1782 "movn %[qc1], %[t4], %[t0] \n\t"
1783 "movn %[qc2], %[t4], %[t1] \n\t"
1784 "movn %[qc3], %[t4], %[t2] \n\t"
1785 "movn %[qc4], %[t4], %[t3] \n\t"
1786 "lw %[t0], 0(%[in_int]) \n\t"
1787 "lw %[t1], 4(%[in_int]) \n\t"
1788 "lw %[t2], 8(%[in_int]) \n\t"
1789 "lw %[t3], 12(%[in_int]) \n\t"
1790 "srl %[t0], %[t0], 31 \n\t"
1791 "srl %[t1], %[t1], 31 \n\t"
1792 "srl %[t2], %[t2], 31 \n\t"
1793 "srl %[t3], %[t3], 31 \n\t"
1794 "subu %[t4], $zero, %[qc1] \n\t"
1795 "subu %[t5], $zero, %[qc2] \n\t"
1796 "subu %[t6], $zero, %[qc3] \n\t"
1797 "subu %[t7], $zero, %[qc4] \n\t"
1798 "movn %[qc1], %[t4], %[t0] \n\t"
1799 "movn %[qc2], %[t5], %[t1] \n\t"
1800 "movn %[qc3], %[t6], %[t2] \n\t"
1801 "movn %[qc4], %[t7], %[t3] \n\t"
1805 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1806 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1807 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1808 [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
1809 : [in_int]"r"(in_int)
1817 curidx2 += qc4 + 40;
1819 curbits += p_bits[curidx];
1820 curbits += p_bits[curidx2];
1822 vec = &p_codes[curidx*2];
1823 vec2 = &p_codes[curidx2*2];
1825 qenergy += vec[0]*vec[0] + vec[1]*vec[1]
1826 + vec2[0]*vec2[0] + vec2[1]*vec2[1];
1830 ".set noreorder \n\t"
1832 "lwc1 $f0, 0(%[in_pos]) \n\t"
1833 "lwc1 $f1, 0(%[vec]) \n\t"
1834 "lwc1 $f2, 4(%[in_pos]) \n\t"
1835 "lwc1 $f3, 4(%[vec]) \n\t"
1836 "lwc1 $f4, 8(%[in_pos]) \n\t"
1837 "lwc1 $f5, 0(%[vec2]) \n\t"
1838 "lwc1 $f6, 12(%[in_pos]) \n\t"
1839 "lwc1 $f7, 4(%[vec2]) \n\t"
1840 "nmsub.s %[di0], $f0, $f1, %[IQ] \n\t"
1841 "nmsub.s %[di1], $f2, $f3, %[IQ] \n\t"
1842 "nmsub.s %[di2], $f4, $f5, %[IQ] \n\t"
1843 "nmsub.s %[di3], $f6, $f7, %[IQ] \n\t"
1847 : [di0]"=&f"(di0), [di1]"=&f"(di1),
1848 [di2]"=&f"(di2), [di3]"=&f"(di3)
1849 : [in_pos]"r"(in_pos), [vec]"r"(vec),
1850 [vec2]"r"(vec2), [IQ]"f"(IQ)
1851 : "$f0", "$f1", "$f2", "$f3",
1852 "$f4", "$f5", "$f6", "$f7",
1856 cost += di0 * di0 + di1 * di1
1857 + di2 * di2 + di3 * di3;
1863 *energy = qenergy * (IQ*IQ);
1864 return cost * lambda + curbits;
1867 static float get_band_cost_UPAIR7_mips(struct AACEncContext *s,
1868 PutBitContext *pb, const float *in,
1869 const float *scaled, int size, int scale_idx,
1870 int cb, const float lambda, const float uplim,
1871 int *bits, float *energy)
1873 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1874 const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
1877 float qenergy = 0.0f;
1878 int qc1, qc2, qc3, qc4;
1881 uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
1882 float *p_codes = (float *)ff_aac_codebook_vectors[cb-1];
1884 for (i = 0; i < size; i += 4) {
1885 const float *vec, *vec2;
1886 int curidx, curidx2, sign1, count1, sign2, count2;
1887 int *in_int = (int *)&in[i];
1888 float *in_pos = (float *)&in[i];
1889 float di0, di1, di2, di3;
1890 int t0, t1, t2, t3, t4;
1892 qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
1893 qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1894 qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1895 qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1899 ".set noreorder \n\t"
1901 "ori %[t4], $zero, 7 \n\t"
1902 "ori %[sign1], $zero, 0 \n\t"
1903 "ori %[sign2], $zero, 0 \n\t"
1904 "slt %[t0], %[t4], %[qc1] \n\t"
1905 "slt %[t1], %[t4], %[qc2] \n\t"
1906 "slt %[t2], %[t4], %[qc3] \n\t"
1907 "slt %[t3], %[t4], %[qc4] \n\t"
1908 "movn %[qc1], %[t4], %[t0] \n\t"
1909 "movn %[qc2], %[t4], %[t1] \n\t"
1910 "movn %[qc3], %[t4], %[t2] \n\t"
1911 "movn %[qc4], %[t4], %[t3] \n\t"
1912 "lw %[t0], 0(%[in_int]) \n\t"
1913 "lw %[t1], 4(%[in_int]) \n\t"
1914 "lw %[t2], 8(%[in_int]) \n\t"
1915 "lw %[t3], 12(%[in_int]) \n\t"
1916 "slt %[t0], %[t0], $zero \n\t"
1917 "movn %[sign1], %[t0], %[qc1] \n\t"
1918 "slt %[t2], %[t2], $zero \n\t"
1919 "movn %[sign2], %[t2], %[qc3] \n\t"
1920 "slt %[t1], %[t1], $zero \n\t"
1921 "sll %[t0], %[sign1], 1 \n\t"
1922 "or %[t0], %[t0], %[t1] \n\t"
1923 "movn %[sign1], %[t0], %[qc2] \n\t"
1924 "slt %[t3], %[t3], $zero \n\t"
1925 "sll %[t0], %[sign2], 1 \n\t"
1926 "or %[t0], %[t0], %[t3] \n\t"
1927 "movn %[sign2], %[t0], %[qc4] \n\t"
1928 "slt %[count1], $zero, %[qc1] \n\t"
1929 "slt %[t1], $zero, %[qc2] \n\t"
1930 "slt %[count2], $zero, %[qc3] \n\t"
1931 "slt %[t2], $zero, %[qc4] \n\t"
1932 "addu %[count1], %[count1], %[t1] \n\t"
1933 "addu %[count2], %[count2], %[t2] \n\t"
1937 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1938 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1939 [sign1]"=&r"(sign1), [count1]"=&r"(count1),
1940 [sign2]"=&r"(sign2), [count2]"=&r"(count2),
1941 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1943 : [in_int]"r"(in_int)
1953 curbits += p_bits[curidx];
1954 curbits += upair7_sign_bits[curidx];
1955 vec = &p_codes[curidx*2];
1957 curbits += p_bits[curidx2];
1958 curbits += upair7_sign_bits[curidx2];
1959 vec2 = &p_codes[curidx2*2];
1961 qenergy += vec[0]*vec[0] + vec[1]*vec[1]
1962 + vec2[0]*vec2[0] + vec2[1]*vec2[1];
1966 ".set noreorder \n\t"
1968 "lwc1 %[di0], 0(%[in_pos]) \n\t"
1969 "lwc1 %[di1], 4(%[in_pos]) \n\t"
1970 "lwc1 %[di2], 8(%[in_pos]) \n\t"
1971 "lwc1 %[di3], 12(%[in_pos]) \n\t"
1972 "abs.s %[di0], %[di0] \n\t"
1973 "abs.s %[di1], %[di1] \n\t"
1974 "abs.s %[di2], %[di2] \n\t"
1975 "abs.s %[di3], %[di3] \n\t"
1976 "lwc1 $f0, 0(%[vec]) \n\t"
1977 "lwc1 $f1, 4(%[vec]) \n\t"
1978 "lwc1 $f2, 0(%[vec2]) \n\t"
1979 "lwc1 $f3, 4(%[vec2]) \n\t"
1980 "nmsub.s %[di0], %[di0], $f0, %[IQ] \n\t"
1981 "nmsub.s %[di1], %[di1], $f1, %[IQ] \n\t"
1982 "nmsub.s %[di2], %[di2], $f2, %[IQ] \n\t"
1983 "nmsub.s %[di3], %[di3], $f3, %[IQ] \n\t"
1987 : [di0]"=&f"(di0), [di1]"=&f"(di1),
1988 [di2]"=&f"(di2), [di3]"=&f"(di3)
1989 : [in_pos]"r"(in_pos), [vec]"r"(vec),
1990 [vec2]"r"(vec2), [IQ]"f"(IQ)
1991 : "$f0", "$f1", "$f2", "$f3",
1995 cost += di0 * di0 + di1 * di1
1996 + di2 * di2 + di3 * di3;
2002 *energy = qenergy * (IQ*IQ);
2003 return cost * lambda + curbits;
2006 static float get_band_cost_UPAIR12_mips(struct AACEncContext *s,
2007 PutBitContext *pb, const float *in,
2008 const float *scaled, int size, int scale_idx,
2009 int cb, const float lambda, const float uplim,
2010 int *bits, float *energy)
2012 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
2013 const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
2016 float qenergy = 0.0f;
2017 int qc1, qc2, qc3, qc4;
2020 uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
2021 float *p_codes = (float *)ff_aac_codebook_vectors[cb-1];
2023 for (i = 0; i < size; i += 4) {
2024 const float *vec, *vec2;
2025 int curidx, curidx2;
2026 int sign1, count1, sign2, count2;
2027 int *in_int = (int *)&in[i];
2028 float *in_pos = (float *)&in[i];
2029 float di0, di1, di2, di3;
2030 int t0, t1, t2, t3, t4;
2032 qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
2033 qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
2034 qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
2035 qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
2039 ".set noreorder \n\t"
2041 "ori %[t4], $zero, 12 \n\t"
2042 "ori %[sign1], $zero, 0 \n\t"
2043 "ori %[sign2], $zero, 0 \n\t"
2044 "slt %[t0], %[t4], %[qc1] \n\t"
2045 "slt %[t1], %[t4], %[qc2] \n\t"
2046 "slt %[t2], %[t4], %[qc3] \n\t"
2047 "slt %[t3], %[t4], %[qc4] \n\t"
2048 "movn %[qc1], %[t4], %[t0] \n\t"
2049 "movn %[qc2], %[t4], %[t1] \n\t"
2050 "movn %[qc3], %[t4], %[t2] \n\t"
2051 "movn %[qc4], %[t4], %[t3] \n\t"
2052 "lw %[t0], 0(%[in_int]) \n\t"
2053 "lw %[t1], 4(%[in_int]) \n\t"
2054 "lw %[t2], 8(%[in_int]) \n\t"
2055 "lw %[t3], 12(%[in_int]) \n\t"
2056 "slt %[t0], %[t0], $zero \n\t"
2057 "movn %[sign1], %[t0], %[qc1] \n\t"
2058 "slt %[t2], %[t2], $zero \n\t"
2059 "movn %[sign2], %[t2], %[qc3] \n\t"
2060 "slt %[t1], %[t1], $zero \n\t"
2061 "sll %[t0], %[sign1], 1 \n\t"
2062 "or %[t0], %[t0], %[t1] \n\t"
2063 "movn %[sign1], %[t0], %[qc2] \n\t"
2064 "slt %[t3], %[t3], $zero \n\t"
2065 "sll %[t0], %[sign2], 1 \n\t"
2066 "or %[t0], %[t0], %[t3] \n\t"
2067 "movn %[sign2], %[t0], %[qc4] \n\t"
2068 "slt %[count1], $zero, %[qc1] \n\t"
2069 "slt %[t1], $zero, %[qc2] \n\t"
2070 "slt %[count2], $zero, %[qc3] \n\t"
2071 "slt %[t2], $zero, %[qc4] \n\t"
2072 "addu %[count1], %[count1], %[t1] \n\t"
2073 "addu %[count2], %[count2], %[t2] \n\t"
2077 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
2078 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
2079 [sign1]"=&r"(sign1), [count1]"=&r"(count1),
2080 [sign2]"=&r"(sign2), [count2]"=&r"(count2),
2081 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
2083 : [in_int]"r"(in_int)
2093 curbits += p_bits[curidx];
2094 curbits += p_bits[curidx2];
2095 curbits += upair12_sign_bits[curidx];
2096 curbits += upair12_sign_bits[curidx2];
2097 vec = &p_codes[curidx*2];
2098 vec2 = &p_codes[curidx2*2];
2100 qenergy += vec[0]*vec[0] + vec[1]*vec[1]
2101 + vec2[0]*vec2[0] + vec2[1]*vec2[1];
2105 ".set noreorder \n\t"
2107 "lwc1 %[di0], 0(%[in_pos]) \n\t"
2108 "lwc1 %[di1], 4(%[in_pos]) \n\t"
2109 "lwc1 %[di2], 8(%[in_pos]) \n\t"
2110 "lwc1 %[di3], 12(%[in_pos]) \n\t"
2111 "abs.s %[di0], %[di0] \n\t"
2112 "abs.s %[di1], %[di1] \n\t"
2113 "abs.s %[di2], %[di2] \n\t"
2114 "abs.s %[di3], %[di3] \n\t"
2115 "lwc1 $f0, 0(%[vec]) \n\t"
2116 "lwc1 $f1, 4(%[vec]) \n\t"
2117 "lwc1 $f2, 0(%[vec2]) \n\t"
2118 "lwc1 $f3, 4(%[vec2]) \n\t"
2119 "nmsub.s %[di0], %[di0], $f0, %[IQ] \n\t"
2120 "nmsub.s %[di1], %[di1], $f1, %[IQ] \n\t"
2121 "nmsub.s %[di2], %[di2], $f2, %[IQ] \n\t"
2122 "nmsub.s %[di3], %[di3], $f3, %[IQ] \n\t"
2126 : [di0]"=&f"(di0), [di1]"=&f"(di1),
2127 [di2]"=&f"(di2), [di3]"=&f"(di3)
2128 : [in_pos]"r"(in_pos), [vec]"r"(vec),
2129 [vec2]"r"(vec2), [IQ]"f"(IQ)
2130 : "$f0", "$f1", "$f2", "$f3",
2134 cost += di0 * di0 + di1 * di1
2135 + di2 * di2 + di3 * di3;
2141 *energy = qenergy * (IQ*IQ);
2142 return cost * lambda + curbits;
2145 static float get_band_cost_ESC_mips(struct AACEncContext *s,
2146 PutBitContext *pb, const float *in,
2147 const float *scaled, int size, int scale_idx,
2148 int cb, const float lambda, const float uplim,
2149 int *bits, float *energy)
2151 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
2152 const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
2153 const float CLIPPED_ESCAPE = 165140.0f * IQ;
2156 float qenergy = 0.0f;
2157 int qc1, qc2, qc3, qc4;
2160 uint8_t *p_bits = (uint8_t*)ff_aac_spectral_bits[cb-1];
2161 float *p_codes = (float* )ff_aac_codebook_vectors[cb-1];
2163 for (i = 0; i < size; i += 4) {
2164 const float *vec, *vec2;
2165 int curidx, curidx2;
2166 float t1, t2, t3, t4, V;
2167 float di1, di2, di3, di4;
2168 int cond0, cond1, cond2, cond3;
2172 qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
2173 qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
2174 qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
2175 qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
2179 ".set noreorder \n\t"
2181 "ori %[t6], $zero, 15 \n\t"
2182 "ori %[t7], $zero, 16 \n\t"
2183 "shll_s.w %[c1], %[qc1], 18 \n\t"
2184 "shll_s.w %[c2], %[qc2], 18 \n\t"
2185 "shll_s.w %[c3], %[qc3], 18 \n\t"
2186 "shll_s.w %[c4], %[qc4], 18 \n\t"
2187 "srl %[c1], %[c1], 18 \n\t"
2188 "srl %[c2], %[c2], 18 \n\t"
2189 "srl %[c3], %[c3], 18 \n\t"
2190 "srl %[c4], %[c4], 18 \n\t"
2191 "slt %[cond0], %[t6], %[qc1] \n\t"
2192 "slt %[cond1], %[t6], %[qc2] \n\t"
2193 "slt %[cond2], %[t6], %[qc3] \n\t"
2194 "slt %[cond3], %[t6], %[qc4] \n\t"
2195 "movn %[qc1], %[t7], %[cond0] \n\t"
2196 "movn %[qc2], %[t7], %[cond1] \n\t"
2197 "movn %[qc3], %[t7], %[cond2] \n\t"
2198 "movn %[qc4], %[t7], %[cond3] \n\t"
2202 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
2203 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
2204 [cond0]"=&r"(cond0), [cond1]"=&r"(cond1),
2205 [cond2]"=&r"(cond2), [cond3]"=&r"(cond3),
2206 [c1]"=&r"(c1), [c2]"=&r"(c2),
2207 [c3]"=&r"(c3), [c4]"=&r"(c4),
2208 [t6]"=&r"(t6), [t7]"=&r"(t7)
2217 curbits += p_bits[curidx];
2218 curbits += esc_sign_bits[curidx];
2219 vec = &p_codes[curidx*2];
2221 curbits += p_bits[curidx2];
2222 curbits += esc_sign_bits[curidx2];
2223 vec2 = &p_codes[curidx2*2];
2225 curbits += (av_log2(c1) * 2 - 3) & (-cond0);
2226 curbits += (av_log2(c2) * 2 - 3) & (-cond1);
2227 curbits += (av_log2(c3) * 2 - 3) & (-cond2);
2228 curbits += (av_log2(c4) * 2 - 3) & (-cond3);
2231 t2 = fabsf(in[i+1]);
2232 t3 = fabsf(in[i+2]);
2233 t4 = fabsf(in[i+3]);
2236 if (t1 >= CLIPPED_ESCAPE) {
2237 di1 = t1 - CLIPPED_ESCAPE;
2238 qenergy += CLIPPED_ESCAPE*CLIPPED_ESCAPE;
2240 di1 = t1 - (V = c1 * cbrtf(c1) * IQ);
2244 di1 = t1 - (V = vec[0] * IQ);
2249 if (t2 >= CLIPPED_ESCAPE) {
2250 di2 = t2 - CLIPPED_ESCAPE;
2251 qenergy += CLIPPED_ESCAPE*CLIPPED_ESCAPE;
2253 di2 = t2 - (V = c2 * cbrtf(c2) * IQ);
2257 di2 = t2 - (V = vec[1] * IQ);
2262 if (t3 >= CLIPPED_ESCAPE) {
2263 di3 = t3 - CLIPPED_ESCAPE;
2264 qenergy += CLIPPED_ESCAPE*CLIPPED_ESCAPE;
2266 di3 = t3 - (V = c3 * cbrtf(c3) * IQ);
2270 di3 = t3 - (V = vec2[0] * IQ);
2275 if (t4 >= CLIPPED_ESCAPE) {
2276 di4 = t4 - CLIPPED_ESCAPE;
2277 qenergy += CLIPPED_ESCAPE*CLIPPED_ESCAPE;
2279 di4 = t4 - (V = c4 * cbrtf(c4) * IQ);
2283 di4 = t4 - (V = vec2[1]*IQ);
2287 cost += di1 * di1 + di2 * di2
2288 + di3 * di3 + di4 * di4;
2293 return cost * lambda + curbits;
2296 static float (*const get_band_cost_arr[])(struct AACEncContext *s,
2297 PutBitContext *pb, const float *in,
2298 const float *scaled, int size, int scale_idx,
2299 int cb, const float lambda, const float uplim,
2300 int *bits, float *energy) = {
2301 get_band_cost_ZERO_mips,
2302 get_band_cost_SQUAD_mips,
2303 get_band_cost_SQUAD_mips,
2304 get_band_cost_UQUAD_mips,
2305 get_band_cost_UQUAD_mips,
2306 get_band_cost_SPAIR_mips,
2307 get_band_cost_SPAIR_mips,
2308 get_band_cost_UPAIR7_mips,
2309 get_band_cost_UPAIR7_mips,
2310 get_band_cost_UPAIR12_mips,
2311 get_band_cost_UPAIR12_mips,
2312 get_band_cost_ESC_mips,
2313 get_band_cost_NONE_mips, /* cb 12 doesn't exist */
2314 get_band_cost_ZERO_mips,
2315 get_band_cost_ZERO_mips,
2316 get_band_cost_ZERO_mips,
2319 #define get_band_cost( \
2320 s, pb, in, scaled, size, scale_idx, cb, \
2321 lambda, uplim, bits, energy) \
2322 get_band_cost_arr[cb]( \
2323 s, pb, in, scaled, size, scale_idx, cb, \
2324 lambda, uplim, bits, energy)
2326 static float quantize_band_cost(struct AACEncContext *s, const float *in,
2327 const float *scaled, int size, int scale_idx,
2328 int cb, const float lambda, const float uplim,
2329 int *bits, float *energy, int rtz)
2331 return get_band_cost(s, NULL, in, scaled, size, scale_idx, cb, lambda, uplim, bits, energy);
2334 #include "libavcodec/aacenc_quantization_misc.h"
2336 #include "libavcodec/aaccoder_twoloop.h"
2338 static void search_for_ms_mips(AACEncContext *s, ChannelElement *cpe)
2340 int start = 0, i, w, w2, g, sid_sf_boost, prev_mid, prev_side;
2341 uint8_t nextband0[128], nextband1[128];
2342 float M[128], S[128];
2343 float *L34 = s->scoefs, *R34 = s->scoefs + 128, *M34 = s->scoefs + 128*2, *S34 = s->scoefs + 128*3;
2344 const float lambda = s->lambda;
2345 const float mslambda = FFMIN(1.0f, lambda / 120.f);
2346 SingleChannelElement *sce0 = &cpe->ch[0];
2347 SingleChannelElement *sce1 = &cpe->ch[1];
2348 if (!cpe->common_window)
2351 /** Scout out next nonzero bands */
2352 ff_init_nextband_map(sce0, nextband0);
2353 ff_init_nextband_map(sce1, nextband1);
2355 prev_mid = sce0->sf_idx[0];
2356 prev_side = sce1->sf_idx[0];
2357 for (w = 0; w < sce0->ics.num_windows; w += sce0->ics.group_len[w]) {
2359 for (g = 0; g < sce0->ics.num_swb; g++) {
2360 float bmax = bval2bmax(g * 17.0f / sce0->ics.num_swb) / 0.0045f;
2361 if (!cpe->is_mask[w*16+g])
2362 cpe->ms_mask[w*16+g] = 0;
2363 if (!sce0->zeroes[w*16+g] && !sce1->zeroes[w*16+g] && !cpe->is_mask[w*16+g]) {
2364 float Mmax = 0.0f, Smax = 0.0f;
2366 /* Must compute mid/side SF and book for the whole window group */
2367 for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) {
2368 for (i = 0; i < sce0->ics.swb_sizes[g]; i++) {
2369 M[i] = (sce0->coeffs[start+(w+w2)*128+i]
2370 + sce1->coeffs[start+(w+w2)*128+i]) * 0.5;
2372 - sce1->coeffs[start+(w+w2)*128+i];
2374 abs_pow34_v(M34, M, sce0->ics.swb_sizes[g]);
2375 abs_pow34_v(S34, S, sce0->ics.swb_sizes[g]);
2376 for (i = 0; i < sce0->ics.swb_sizes[g]; i++ ) {
2377 Mmax = FFMAX(Mmax, M34[i]);
2378 Smax = FFMAX(Smax, S34[i]);
2382 for (sid_sf_boost = 0; sid_sf_boost < 4; sid_sf_boost++) {
2383 float dist1 = 0.0f, dist2 = 0.0f;
2389 minidx = FFMIN(sce0->sf_idx[w*16+g], sce1->sf_idx[w*16+g]);
2390 mididx = av_clip(minidx, 0, SCALE_MAX_POS - SCALE_DIV_512);
2391 sididx = av_clip(minidx - sid_sf_boost * 3, 0, SCALE_MAX_POS - SCALE_DIV_512);
2392 if (sce0->band_type[w*16+g] != NOISE_BT && sce1->band_type[w*16+g] != NOISE_BT
2393 && ( !ff_sfdelta_can_replace(sce0, nextband0, prev_mid, mididx, w*16+g)
2394 || !ff_sfdelta_can_replace(sce1, nextband1, prev_side, sididx, w*16+g))) {
2395 /* scalefactor range violation, bad stuff, will decrease quality unacceptably */
2399 midcb = find_min_book(Mmax, mididx);
2400 sidcb = find_min_book(Smax, sididx);
2402 /* No CB can be zero */
2403 midcb = FFMAX(1,midcb);
2404 sidcb = FFMAX(1,sidcb);
2406 for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) {
2407 FFPsyBand *band0 = &s->psy.ch[s->cur_channel+0].psy_bands[(w+w2)*16+g];
2408 FFPsyBand *band1 = &s->psy.ch[s->cur_channel+1].psy_bands[(w+w2)*16+g];
2409 float minthr = FFMIN(band0->threshold, band1->threshold);
2411 for (i = 0; i < sce0->ics.swb_sizes[g]; i++) {
2412 M[i] = (sce0->coeffs[start+(w+w2)*128+i]
2413 + sce1->coeffs[start+(w+w2)*128+i]) * 0.5;
2415 - sce1->coeffs[start+(w+w2)*128+i];
2418 abs_pow34_v(L34, sce0->coeffs+start+(w+w2)*128, sce0->ics.swb_sizes[g]);
2419 abs_pow34_v(R34, sce1->coeffs+start+(w+w2)*128, sce0->ics.swb_sizes[g]);
2420 abs_pow34_v(M34, M, sce0->ics.swb_sizes[g]);
2421 abs_pow34_v(S34, S, sce0->ics.swb_sizes[g]);
2422 dist1 += quantize_band_cost(s, &sce0->coeffs[start + (w+w2)*128],
2424 sce0->ics.swb_sizes[g],
2425 sce0->sf_idx[w*16+g],
2426 sce0->band_type[w*16+g],
2427 lambda / band0->threshold, INFINITY, &b1, NULL, 0);
2428 dist1 += quantize_band_cost(s, &sce1->coeffs[start + (w+w2)*128],
2430 sce1->ics.swb_sizes[g],
2431 sce1->sf_idx[w*16+g],
2432 sce1->band_type[w*16+g],
2433 lambda / band1->threshold, INFINITY, &b2, NULL, 0);
2434 dist2 += quantize_band_cost(s, M,
2436 sce0->ics.swb_sizes[g],
2439 lambda / minthr, INFINITY, &b3, NULL, 0);
2440 dist2 += quantize_band_cost(s, S,
2442 sce1->ics.swb_sizes[g],
2445 mslambda / (minthr * bmax), INFINITY, &b4, NULL, 0);
2451 cpe->ms_mask[w*16+g] = dist2 <= dist1 && B1 < B0;
2452 if (cpe->ms_mask[w*16+g]) {
2453 if (sce0->band_type[w*16+g] != NOISE_BT && sce1->band_type[w*16+g] != NOISE_BT) {
2454 sce0->sf_idx[w*16+g] = mididx;
2455 sce1->sf_idx[w*16+g] = sididx;
2456 sce0->band_type[w*16+g] = midcb;
2457 sce1->band_type[w*16+g] = sidcb;
2458 } else if ((sce0->band_type[w*16+g] != NOISE_BT) ^ (sce1->band_type[w*16+g] != NOISE_BT)) {
2459 /* ms_mask unneeded, and it confuses some decoders */
2460 cpe->ms_mask[w*16+g] = 0;
2463 } else if (B1 > B0) {
2464 /* More boost won't fix this */
2469 if (!sce0->zeroes[w*16+g] && sce0->band_type[w*16+g] < RESERVED_BT)
2470 prev_mid = sce0->sf_idx[w*16+g];
2471 if (!sce1->zeroes[w*16+g] && !cpe->is_mask[w*16+g] && sce1->band_type[w*16+g] < RESERVED_BT)
2472 prev_side = sce1->sf_idx[w*16+g];
2473 start += sce0->ics.swb_sizes[g];
2477 #endif /*HAVE_MIPSFPU */
2479 #include "libavcodec/aaccoder_trellis.h"
2481 #endif /* !HAVE_MIPS32R6 && !HAVE_MIPS64R6 */
2482 #endif /* HAVE_INLINE_ASM */
2484 void ff_aac_coder_init_mips(AACEncContext *c) {
2486 #if !HAVE_MIPS32R6 && !HAVE_MIPS64R6
2487 AACCoefficientsEncoder *e = c->coder;
2488 int option = c->options.coder;
2491 e->quantize_and_encode_band = quantize_and_encode_band_mips;
2492 e->encode_window_bands_info = codebook_trellis_rate;
2494 e->search_for_quantizers = search_for_quantizers_twoloop;
2495 #endif /* HAVE_MIPSFPU */
2498 e->search_for_ms = search_for_ms_mips;
2499 #endif /* HAVE_MIPSFPU */
2500 #endif /* !HAVE_MIPS32R6 && !HAVE_MIPS64R6 */
2501 #endif /* HAVE_INLINE_ASM */