3 * MIPS Technologies, Inc., California.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
14 * contributors may be used to endorse or promote products derived from
15 * this software without specific prior written permission.
17 * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * Author: Stanislav Ocovaj (socovaj@mips.com)
30 * Szabolcs Pal (sabolc@mips.com)
32 * AAC coefficients encoder optimized for MIPS floating-point architecture
34 * This file is part of FFmpeg.
36 * FFmpeg is free software; you can redistribute it and/or
37 * modify it under the terms of the GNU Lesser General Public
38 * License as published by the Free Software Foundation; either
39 * version 2.1 of the License, or (at your option) any later version.
41 * FFmpeg is distributed in the hope that it will be useful,
42 * but WITHOUT ANY WARRANTY; without even the implied warranty of
43 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
44 * Lesser General Public License for more details.
46 * You should have received a copy of the GNU Lesser General Public
47 * License along with FFmpeg; if not, write to the Free Software
48 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
53 * Reference: libavcodec/aaccoder.c
56 #include "libavutil/libm.h"
59 #include "libavutil/mathematics.h"
60 #include "libavcodec/avcodec.h"
61 #include "libavcodec/put_bits.h"
62 #include "libavcodec/aac.h"
63 #include "libavcodec/aacenc.h"
64 #include "libavcodec/aactab.h"
65 #include "libavcodec/aacenctab.h"
66 #include "libavcodec/aacenc_utils.h"
69 typedef struct BandCodingPath {
75 static const uint8_t uquad_sign_bits[81] = {
76 0, 1, 1, 1, 2, 2, 1, 2, 2,
77 1, 2, 2, 2, 3, 3, 2, 3, 3,
78 1, 2, 2, 2, 3, 3, 2, 3, 3,
79 1, 2, 2, 2, 3, 3, 2, 3, 3,
80 2, 3, 3, 3, 4, 4, 3, 4, 4,
81 2, 3, 3, 3, 4, 4, 3, 4, 4,
82 1, 2, 2, 2, 3, 3, 2, 3, 3,
83 2, 3, 3, 3, 4, 4, 3, 4, 4,
84 2, 3, 3, 3, 4, 4, 3, 4, 4
87 static const uint8_t upair7_sign_bits[64] = {
88 0, 1, 1, 1, 1, 1, 1, 1,
89 1, 2, 2, 2, 2, 2, 2, 2,
90 1, 2, 2, 2, 2, 2, 2, 2,
91 1, 2, 2, 2, 2, 2, 2, 2,
92 1, 2, 2, 2, 2, 2, 2, 2,
93 1, 2, 2, 2, 2, 2, 2, 2,
94 1, 2, 2, 2, 2, 2, 2, 2,
95 1, 2, 2, 2, 2, 2, 2, 2,
98 static const uint8_t upair12_sign_bits[169] = {
99 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
100 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
101 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
102 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
103 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
104 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
105 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
106 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
107 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
108 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
109 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
110 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
111 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
114 static const uint8_t esc_sign_bits[289] = {
115 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
116 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
117 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
118 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
119 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
120 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
121 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
122 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
123 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
124 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
125 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
126 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
127 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
128 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
129 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
130 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
131 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
135 * Functions developed from template function and optimized for quantizing and encoding band
137 static void quantize_and_encode_band_cost_SQUAD_mips(struct AACEncContext *s,
138 PutBitContext *pb, const float *in, float *out,
139 const float *scaled, int size, int scale_idx,
140 int cb, const float lambda, const float uplim,
141 int *bits, float *energy, const float ROUNDING)
143 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
144 const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
146 int qc1, qc2, qc3, qc4;
147 float qenergy = 0.0f;
149 uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
150 uint16_t *p_codes = (uint16_t *)ff_aac_spectral_codes[cb-1];
151 float *p_vec = (float *)ff_aac_codebook_vectors[cb-1];
153 abs_pow34_v(s->scoefs, in, size);
155 for (i = 0; i < size; i += 4) {
157 int *in_int = (int *)&in[i];
158 int t0, t1, t2, t3, t4, t5, t6, t7;
161 qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
162 qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
163 qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
164 qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
168 ".set noreorder \n\t"
170 "slt %[qc1], $zero, %[qc1] \n\t"
171 "slt %[qc2], $zero, %[qc2] \n\t"
172 "slt %[qc3], $zero, %[qc3] \n\t"
173 "slt %[qc4], $zero, %[qc4] \n\t"
174 "lw %[t0], 0(%[in_int]) \n\t"
175 "lw %[t1], 4(%[in_int]) \n\t"
176 "lw %[t2], 8(%[in_int]) \n\t"
177 "lw %[t3], 12(%[in_int]) \n\t"
178 "srl %[t0], %[t0], 31 \n\t"
179 "srl %[t1], %[t1], 31 \n\t"
180 "srl %[t2], %[t2], 31 \n\t"
181 "srl %[t3], %[t3], 31 \n\t"
182 "subu %[t4], $zero, %[qc1] \n\t"
183 "subu %[t5], $zero, %[qc2] \n\t"
184 "subu %[t6], $zero, %[qc3] \n\t"
185 "subu %[t7], $zero, %[qc4] \n\t"
186 "movn %[qc1], %[t4], %[t0] \n\t"
187 "movn %[qc2], %[t5], %[t1] \n\t"
188 "movn %[qc3], %[t6], %[t2] \n\t"
189 "movn %[qc4], %[t7], %[t3] \n\t"
193 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
194 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
195 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
196 [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
197 : [in_int]"r"(in_int)
210 put_bits(pb, p_bits[curidx], p_codes[curidx]);
214 vec = &p_vec[curidx*4];
226 qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
233 static void quantize_and_encode_band_cost_UQUAD_mips(struct AACEncContext *s,
234 PutBitContext *pb, const float *in, float *out,
235 const float *scaled, int size, int scale_idx,
236 int cb, const float lambda, const float uplim,
237 int *bits, float *energy, const float ROUNDING)
239 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
240 const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
242 int qc1, qc2, qc3, qc4;
243 float qenergy = 0.0f;
245 uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
246 uint16_t *p_codes = (uint16_t *)ff_aac_spectral_codes[cb-1];
247 float *p_vec = (float *)ff_aac_codebook_vectors[cb-1];
249 abs_pow34_v(s->scoefs, in, size);
251 for (i = 0; i < size; i += 4) {
252 int curidx, sign, count;
253 int *in_int = (int *)&in[i];
255 unsigned int v_codes;
256 int t0, t1, t2, t3, t4;
259 qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
260 qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
261 qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
262 qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
266 ".set noreorder \n\t"
268 "ori %[t4], $zero, 2 \n\t"
269 "ori %[sign], $zero, 0 \n\t"
270 "slt %[t0], %[t4], %[qc1] \n\t"
271 "slt %[t1], %[t4], %[qc2] \n\t"
272 "slt %[t2], %[t4], %[qc3] \n\t"
273 "slt %[t3], %[t4], %[qc4] \n\t"
274 "movn %[qc1], %[t4], %[t0] \n\t"
275 "movn %[qc2], %[t4], %[t1] \n\t"
276 "movn %[qc3], %[t4], %[t2] \n\t"
277 "movn %[qc4], %[t4], %[t3] \n\t"
278 "lw %[t0], 0(%[in_int]) \n\t"
279 "lw %[t1], 4(%[in_int]) \n\t"
280 "lw %[t2], 8(%[in_int]) \n\t"
281 "lw %[t3], 12(%[in_int]) \n\t"
282 "slt %[t0], %[t0], $zero \n\t"
283 "movn %[sign], %[t0], %[qc1] \n\t"
284 "slt %[t1], %[t1], $zero \n\t"
285 "slt %[t2], %[t2], $zero \n\t"
286 "slt %[t3], %[t3], $zero \n\t"
287 "sll %[t0], %[sign], 1 \n\t"
288 "or %[t0], %[t0], %[t1] \n\t"
289 "movn %[sign], %[t0], %[qc2] \n\t"
290 "slt %[t4], $zero, %[qc1] \n\t"
291 "slt %[t1], $zero, %[qc2] \n\t"
292 "slt %[count], $zero, %[qc3] \n\t"
293 "sll %[t0], %[sign], 1 \n\t"
294 "or %[t0], %[t0], %[t2] \n\t"
295 "movn %[sign], %[t0], %[qc3] \n\t"
296 "slt %[t2], $zero, %[qc4] \n\t"
297 "addu %[count], %[count], %[t4] \n\t"
298 "addu %[count], %[count], %[t1] \n\t"
299 "sll %[t0], %[sign], 1 \n\t"
300 "or %[t0], %[t0], %[t3] \n\t"
301 "movn %[sign], %[t0], %[qc4] \n\t"
302 "addu %[count], %[count], %[t2] \n\t"
306 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
307 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
308 [sign]"=&r"(sign), [count]"=&r"(count),
309 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
311 : [in_int]"r"(in_int)
323 v_codes = (p_codes[curidx] << count) | (sign & ((1 << count) - 1));
324 v_bits = p_bits[curidx] + count;
325 put_bits(pb, v_bits, v_codes);
329 vec = &p_vec[curidx*4];
330 e1 = copysignf(vec[0] * IQ, in[i+0]);
331 e2 = copysignf(vec[1] * IQ, in[i+1]);
332 e3 = copysignf(vec[2] * IQ, in[i+2]);
333 e4 = copysignf(vec[3] * IQ, in[i+3]);
341 qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
348 static void quantize_and_encode_band_cost_SPAIR_mips(struct AACEncContext *s,
349 PutBitContext *pb, const float *in, float *out,
350 const float *scaled, int size, int scale_idx,
351 int cb, const float lambda, const float uplim,
352 int *bits, float *energy, const float ROUNDING)
354 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
355 const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
357 int qc1, qc2, qc3, qc4;
358 float qenergy = 0.0f;
360 uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
361 uint16_t *p_codes = (uint16_t *)ff_aac_spectral_codes[cb-1];
362 float *p_vec = (float *)ff_aac_codebook_vectors[cb-1];
364 abs_pow34_v(s->scoefs, in, size);
366 for (i = 0; i < size; i += 4) {
368 int *in_int = (int *)&in[i];
370 unsigned int v_codes;
371 int t0, t1, t2, t3, t4, t5, t6, t7;
372 const float *vec1, *vec2;
374 qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
375 qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
376 qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
377 qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
381 ".set noreorder \n\t"
383 "ori %[t4], $zero, 4 \n\t"
384 "slt %[t0], %[t4], %[qc1] \n\t"
385 "slt %[t1], %[t4], %[qc2] \n\t"
386 "slt %[t2], %[t4], %[qc3] \n\t"
387 "slt %[t3], %[t4], %[qc4] \n\t"
388 "movn %[qc1], %[t4], %[t0] \n\t"
389 "movn %[qc2], %[t4], %[t1] \n\t"
390 "movn %[qc3], %[t4], %[t2] \n\t"
391 "movn %[qc4], %[t4], %[t3] \n\t"
392 "lw %[t0], 0(%[in_int]) \n\t"
393 "lw %[t1], 4(%[in_int]) \n\t"
394 "lw %[t2], 8(%[in_int]) \n\t"
395 "lw %[t3], 12(%[in_int]) \n\t"
396 "srl %[t0], %[t0], 31 \n\t"
397 "srl %[t1], %[t1], 31 \n\t"
398 "srl %[t2], %[t2], 31 \n\t"
399 "srl %[t3], %[t3], 31 \n\t"
400 "subu %[t4], $zero, %[qc1] \n\t"
401 "subu %[t5], $zero, %[qc2] \n\t"
402 "subu %[t6], $zero, %[qc3] \n\t"
403 "subu %[t7], $zero, %[qc4] \n\t"
404 "movn %[qc1], %[t4], %[t0] \n\t"
405 "movn %[qc2], %[t5], %[t1] \n\t"
406 "movn %[qc3], %[t6], %[t2] \n\t"
407 "movn %[qc4], %[t7], %[t3] \n\t"
411 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
412 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
413 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
414 [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
415 : [in_int]"r"(in_int)
425 v_codes = (p_codes[curidx] << p_bits[curidx2]) | (p_codes[curidx2]);
426 v_bits = p_bits[curidx] + p_bits[curidx2];
427 put_bits(pb, v_bits, v_codes);
431 vec1 = &p_vec[curidx*2 ];
432 vec2 = &p_vec[curidx2*2];
444 qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
451 static void quantize_and_encode_band_cost_UPAIR7_mips(struct AACEncContext *s,
452 PutBitContext *pb, const float *in, float *out,
453 const float *scaled, int size, int scale_idx,
454 int cb, const float lambda, const float uplim,
455 int *bits, float *energy, const float ROUNDING)
457 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
458 const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
460 int qc1, qc2, qc3, qc4;
461 float qenergy = 0.0f;
463 uint8_t *p_bits = (uint8_t*) ff_aac_spectral_bits[cb-1];
464 uint16_t *p_codes = (uint16_t*)ff_aac_spectral_codes[cb-1];
465 float *p_vec = (float *)ff_aac_codebook_vectors[cb-1];
467 abs_pow34_v(s->scoefs, in, size);
469 for (i = 0; i < size; i += 4) {
470 int curidx1, curidx2, sign1, count1, sign2, count2;
471 int *in_int = (int *)&in[i];
473 unsigned int v_codes;
474 int t0, t1, t2, t3, t4;
475 const float *vec1, *vec2;
477 qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
478 qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
479 qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
480 qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
484 ".set noreorder \n\t"
486 "ori %[t4], $zero, 7 \n\t"
487 "ori %[sign1], $zero, 0 \n\t"
488 "ori %[sign2], $zero, 0 \n\t"
489 "slt %[t0], %[t4], %[qc1] \n\t"
490 "slt %[t1], %[t4], %[qc2] \n\t"
491 "slt %[t2], %[t4], %[qc3] \n\t"
492 "slt %[t3], %[t4], %[qc4] \n\t"
493 "movn %[qc1], %[t4], %[t0] \n\t"
494 "movn %[qc2], %[t4], %[t1] \n\t"
495 "movn %[qc3], %[t4], %[t2] \n\t"
496 "movn %[qc4], %[t4], %[t3] \n\t"
497 "lw %[t0], 0(%[in_int]) \n\t"
498 "lw %[t1], 4(%[in_int]) \n\t"
499 "lw %[t2], 8(%[in_int]) \n\t"
500 "lw %[t3], 12(%[in_int]) \n\t"
501 "slt %[t0], %[t0], $zero \n\t"
502 "movn %[sign1], %[t0], %[qc1] \n\t"
503 "slt %[t2], %[t2], $zero \n\t"
504 "movn %[sign2], %[t2], %[qc3] \n\t"
505 "slt %[t1], %[t1], $zero \n\t"
506 "sll %[t0], %[sign1], 1 \n\t"
507 "or %[t0], %[t0], %[t1] \n\t"
508 "movn %[sign1], %[t0], %[qc2] \n\t"
509 "slt %[t3], %[t3], $zero \n\t"
510 "sll %[t0], %[sign2], 1 \n\t"
511 "or %[t0], %[t0], %[t3] \n\t"
512 "movn %[sign2], %[t0], %[qc4] \n\t"
513 "slt %[count1], $zero, %[qc1] \n\t"
514 "slt %[t1], $zero, %[qc2] \n\t"
515 "slt %[count2], $zero, %[qc3] \n\t"
516 "slt %[t2], $zero, %[qc4] \n\t"
517 "addu %[count1], %[count1], %[t1] \n\t"
518 "addu %[count2], %[count2], %[t2] \n\t"
522 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
523 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
524 [sign1]"=&r"(sign1), [count1]"=&r"(count1),
525 [sign2]"=&r"(sign2), [count2]"=&r"(count2),
526 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
528 : [in_int]"r"(in_int)
529 : "t0", "t1", "t2", "t3", "t4",
536 v_codes = (p_codes[curidx1] << count1) | sign1;
537 v_bits = p_bits[curidx1] + count1;
538 put_bits(pb, v_bits, v_codes);
543 v_codes = (p_codes[curidx2] << count2) | sign2;
544 v_bits = p_bits[curidx2] + count2;
545 put_bits(pb, v_bits, v_codes);
549 vec1 = &p_vec[curidx1*2];
550 vec2 = &p_vec[curidx2*2];
551 e1 = copysignf(vec1[0] * IQ, in[i+0]);
552 e2 = copysignf(vec1[1] * IQ, in[i+1]);
553 e3 = copysignf(vec2[0] * IQ, in[i+2]);
554 e4 = copysignf(vec2[1] * IQ, in[i+3]);
562 qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
569 static void quantize_and_encode_band_cost_UPAIR12_mips(struct AACEncContext *s,
570 PutBitContext *pb, const float *in, float *out,
571 const float *scaled, int size, int scale_idx,
572 int cb, const float lambda, const float uplim,
573 int *bits, float *energy, const float ROUNDING)
575 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
576 const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
578 int qc1, qc2, qc3, qc4;
579 float qenergy = 0.0f;
581 uint8_t *p_bits = (uint8_t*) ff_aac_spectral_bits[cb-1];
582 uint16_t *p_codes = (uint16_t*)ff_aac_spectral_codes[cb-1];
583 float *p_vec = (float *)ff_aac_codebook_vectors[cb-1];
585 abs_pow34_v(s->scoefs, in, size);
587 for (i = 0; i < size; i += 4) {
588 int curidx1, curidx2, sign1, count1, sign2, count2;
589 int *in_int = (int *)&in[i];
591 unsigned int v_codes;
592 int t0, t1, t2, t3, t4;
593 const float *vec1, *vec2;
595 qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
596 qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
597 qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
598 qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
602 ".set noreorder \n\t"
604 "ori %[t4], $zero, 12 \n\t"
605 "ori %[sign1], $zero, 0 \n\t"
606 "ori %[sign2], $zero, 0 \n\t"
607 "slt %[t0], %[t4], %[qc1] \n\t"
608 "slt %[t1], %[t4], %[qc2] \n\t"
609 "slt %[t2], %[t4], %[qc3] \n\t"
610 "slt %[t3], %[t4], %[qc4] \n\t"
611 "movn %[qc1], %[t4], %[t0] \n\t"
612 "movn %[qc2], %[t4], %[t1] \n\t"
613 "movn %[qc3], %[t4], %[t2] \n\t"
614 "movn %[qc4], %[t4], %[t3] \n\t"
615 "lw %[t0], 0(%[in_int]) \n\t"
616 "lw %[t1], 4(%[in_int]) \n\t"
617 "lw %[t2], 8(%[in_int]) \n\t"
618 "lw %[t3], 12(%[in_int]) \n\t"
619 "slt %[t0], %[t0], $zero \n\t"
620 "movn %[sign1], %[t0], %[qc1] \n\t"
621 "slt %[t2], %[t2], $zero \n\t"
622 "movn %[sign2], %[t2], %[qc3] \n\t"
623 "slt %[t1], %[t1], $zero \n\t"
624 "sll %[t0], %[sign1], 1 \n\t"
625 "or %[t0], %[t0], %[t1] \n\t"
626 "movn %[sign1], %[t0], %[qc2] \n\t"
627 "slt %[t3], %[t3], $zero \n\t"
628 "sll %[t0], %[sign2], 1 \n\t"
629 "or %[t0], %[t0], %[t3] \n\t"
630 "movn %[sign2], %[t0], %[qc4] \n\t"
631 "slt %[count1], $zero, %[qc1] \n\t"
632 "slt %[t1], $zero, %[qc2] \n\t"
633 "slt %[count2], $zero, %[qc3] \n\t"
634 "slt %[t2], $zero, %[qc4] \n\t"
635 "addu %[count1], %[count1], %[t1] \n\t"
636 "addu %[count2], %[count2], %[t2] \n\t"
640 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
641 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
642 [sign1]"=&r"(sign1), [count1]"=&r"(count1),
643 [sign2]"=&r"(sign2), [count2]"=&r"(count2),
644 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
646 : [in_int]"r"(in_int)
653 v_codes = (p_codes[curidx1] << count1) | sign1;
654 v_bits = p_bits[curidx1] + count1;
655 put_bits(pb, v_bits, v_codes);
660 v_codes = (p_codes[curidx2] << count2) | sign2;
661 v_bits = p_bits[curidx2] + count2;
662 put_bits(pb, v_bits, v_codes);
666 vec1 = &p_vec[curidx1*2];
667 vec2 = &p_vec[curidx2*2];
668 e1 = copysignf(vec1[0] * IQ, in[i+0]);
669 e2 = copysignf(vec1[1] * IQ, in[i+1]);
670 e3 = copysignf(vec2[0] * IQ, in[i+2]);
671 e4 = copysignf(vec2[1] * IQ, in[i+3]);
679 qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
686 static void quantize_and_encode_band_cost_ESC_mips(struct AACEncContext *s,
687 PutBitContext *pb, const float *in, float *out,
688 const float *scaled, int size, int scale_idx,
689 int cb, const float lambda, const float uplim,
690 int *bits, float *energy, const float ROUNDING)
692 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
693 const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
695 int qc1, qc2, qc3, qc4;
696 float qenergy = 0.0f;
698 uint8_t *p_bits = (uint8_t* )ff_aac_spectral_bits[cb-1];
699 uint16_t *p_codes = (uint16_t*)ff_aac_spectral_codes[cb-1];
700 float *p_vectors = (float* )ff_aac_codebook_vectors[cb-1];
702 abs_pow34_v(s->scoefs, in, size);
706 for (i = 0; i < size; i += 4) {
707 int curidx, curidx2, sign1, count1, sign2, count2;
708 int *in_int = (int *)&in[i];
710 unsigned int v_codes;
711 int t0, t1, t2, t3, t4;
712 const float *vec1, *vec2;
714 qc1 = scaled[i ] * Q34 + ROUNDING;
715 qc2 = scaled[i+1] * Q34 + ROUNDING;
716 qc3 = scaled[i+2] * Q34 + ROUNDING;
717 qc4 = scaled[i+3] * Q34 + ROUNDING;
721 ".set noreorder \n\t"
723 "ori %[t4], $zero, 16 \n\t"
724 "ori %[sign1], $zero, 0 \n\t"
725 "ori %[sign2], $zero, 0 \n\t"
726 "slt %[t0], %[t4], %[qc1] \n\t"
727 "slt %[t1], %[t4], %[qc2] \n\t"
728 "slt %[t2], %[t4], %[qc3] \n\t"
729 "slt %[t3], %[t4], %[qc4] \n\t"
730 "movn %[qc1], %[t4], %[t0] \n\t"
731 "movn %[qc2], %[t4], %[t1] \n\t"
732 "movn %[qc3], %[t4], %[t2] \n\t"
733 "movn %[qc4], %[t4], %[t3] \n\t"
734 "lw %[t0], 0(%[in_int]) \n\t"
735 "lw %[t1], 4(%[in_int]) \n\t"
736 "lw %[t2], 8(%[in_int]) \n\t"
737 "lw %[t3], 12(%[in_int]) \n\t"
738 "slt %[t0], %[t0], $zero \n\t"
739 "movn %[sign1], %[t0], %[qc1] \n\t"
740 "slt %[t2], %[t2], $zero \n\t"
741 "movn %[sign2], %[t2], %[qc3] \n\t"
742 "slt %[t1], %[t1], $zero \n\t"
743 "sll %[t0], %[sign1], 1 \n\t"
744 "or %[t0], %[t0], %[t1] \n\t"
745 "movn %[sign1], %[t0], %[qc2] \n\t"
746 "slt %[t3], %[t3], $zero \n\t"
747 "sll %[t0], %[sign2], 1 \n\t"
748 "or %[t0], %[t0], %[t3] \n\t"
749 "movn %[sign2], %[t0], %[qc4] \n\t"
750 "slt %[count1], $zero, %[qc1] \n\t"
751 "slt %[t1], $zero, %[qc2] \n\t"
752 "slt %[count2], $zero, %[qc3] \n\t"
753 "slt %[t2], $zero, %[qc4] \n\t"
754 "addu %[count1], %[count1], %[t1] \n\t"
755 "addu %[count2], %[count2], %[t2] \n\t"
759 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
760 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
761 [sign1]"=&r"(sign1), [count1]"=&r"(count1),
762 [sign2]"=&r"(sign2), [count2]"=&r"(count2),
763 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
765 : [in_int]"r"(in_int)
774 v_codes = (p_codes[curidx] << count1) | sign1;
775 v_bits = p_bits[curidx] + count1;
776 put_bits(pb, v_bits, v_codes);
778 v_codes = (p_codes[curidx2] << count2) | sign2;
779 v_bits = p_bits[curidx2] + count2;
780 put_bits(pb, v_bits, v_codes);
784 vec1 = &p_vectors[curidx*2 ];
785 vec2 = &p_vectors[curidx2*2];
786 e1 = copysignf(vec1[0] * IQ, in[i+0]);
787 e2 = copysignf(vec1[1] * IQ, in[i+1]);
788 e3 = copysignf(vec2[0] * IQ, in[i+2]);
789 e4 = copysignf(vec2[1] * IQ, in[i+3]);
797 qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
801 for (i = 0; i < size; i += 4) {
802 int curidx, curidx2, sign1, count1, sign2, count2;
803 int *in_int = (int *)&in[i];
805 unsigned int v_codes;
807 int t0, t1, t2, t3, t4;
809 qc1 = scaled[i ] * Q34 + ROUNDING;
810 qc2 = scaled[i+1] * Q34 + ROUNDING;
811 qc3 = scaled[i+2] * Q34 + ROUNDING;
812 qc4 = scaled[i+3] * Q34 + ROUNDING;
816 ".set noreorder \n\t"
818 "ori %[t4], $zero, 16 \n\t"
819 "ori %[sign1], $zero, 0 \n\t"
820 "ori %[sign2], $zero, 0 \n\t"
821 "shll_s.w %[c1], %[qc1], 18 \n\t"
822 "shll_s.w %[c2], %[qc2], 18 \n\t"
823 "shll_s.w %[c3], %[qc3], 18 \n\t"
824 "shll_s.w %[c4], %[qc4], 18 \n\t"
825 "srl %[c1], %[c1], 18 \n\t"
826 "srl %[c2], %[c2], 18 \n\t"
827 "srl %[c3], %[c3], 18 \n\t"
828 "srl %[c4], %[c4], 18 \n\t"
829 "slt %[t0], %[t4], %[qc1] \n\t"
830 "slt %[t1], %[t4], %[qc2] \n\t"
831 "slt %[t2], %[t4], %[qc3] \n\t"
832 "slt %[t3], %[t4], %[qc4] \n\t"
833 "movn %[qc1], %[t4], %[t0] \n\t"
834 "movn %[qc2], %[t4], %[t1] \n\t"
835 "movn %[qc3], %[t4], %[t2] \n\t"
836 "movn %[qc4], %[t4], %[t3] \n\t"
837 "lw %[t0], 0(%[in_int]) \n\t"
838 "lw %[t1], 4(%[in_int]) \n\t"
839 "lw %[t2], 8(%[in_int]) \n\t"
840 "lw %[t3], 12(%[in_int]) \n\t"
841 "slt %[t0], %[t0], $zero \n\t"
842 "movn %[sign1], %[t0], %[qc1] \n\t"
843 "slt %[t2], %[t2], $zero \n\t"
844 "movn %[sign2], %[t2], %[qc3] \n\t"
845 "slt %[t1], %[t1], $zero \n\t"
846 "sll %[t0], %[sign1], 1 \n\t"
847 "or %[t0], %[t0], %[t1] \n\t"
848 "movn %[sign1], %[t0], %[qc2] \n\t"
849 "slt %[t3], %[t3], $zero \n\t"
850 "sll %[t0], %[sign2], 1 \n\t"
851 "or %[t0], %[t0], %[t3] \n\t"
852 "movn %[sign2], %[t0], %[qc4] \n\t"
853 "slt %[count1], $zero, %[qc1] \n\t"
854 "slt %[t1], $zero, %[qc2] \n\t"
855 "slt %[count2], $zero, %[qc3] \n\t"
856 "slt %[t2], $zero, %[qc4] \n\t"
857 "addu %[count1], %[count1], %[t1] \n\t"
858 "addu %[count2], %[count2], %[t2] \n\t"
862 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
863 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
864 [sign1]"=&r"(sign1), [count1]"=&r"(count1),
865 [sign2]"=&r"(sign2), [count2]"=&r"(count2),
866 [c1]"=&r"(c1), [c2]"=&r"(c2),
867 [c3]"=&r"(c3), [c4]"=&r"(c4),
868 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
870 : [in_int]"r"(in_int)
880 v_codes = (p_codes[curidx] << count1) | sign1;
881 v_bits = p_bits[curidx] + count1;
882 put_bits(pb, v_bits, v_codes);
884 if (p_vectors[curidx*2 ] == 64.0f) {
885 int len = av_log2(c1);
886 v_codes = (((1 << (len - 3)) - 2) << len) | (c1 & ((1 << len) - 1));
887 put_bits(pb, len * 2 - 3, v_codes);
889 if (p_vectors[curidx*2+1] == 64.0f) {
890 int len = av_log2(c2);
891 v_codes = (((1 << (len - 3)) - 2) << len) | (c2 & ((1 << len) - 1));
892 put_bits(pb, len*2-3, v_codes);
895 v_codes = (p_codes[curidx2] << count2) | sign2;
896 v_bits = p_bits[curidx2] + count2;
897 put_bits(pb, v_bits, v_codes);
899 if (p_vectors[curidx2*2 ] == 64.0f) {
900 int len = av_log2(c3);
901 v_codes = (((1 << (len - 3)) - 2) << len) | (c3 & ((1 << len) - 1));
902 put_bits(pb, len* 2 - 3, v_codes);
904 if (p_vectors[curidx2*2+1] == 64.0f) {
905 int len = av_log2(c4);
906 v_codes = (((1 << (len - 3)) - 2) << len) | (c4 & ((1 << len) - 1));
907 put_bits(pb, len * 2 - 3, v_codes);
911 float e1, e2, e3, e4;
912 e1 = copysignf(c1 * cbrtf(c1) * IQ, in[i+0]);
913 e2 = copysignf(c2 * cbrtf(c2) * IQ, in[i+1]);
914 e3 = copysignf(c3 * cbrtf(c3) * IQ, in[i+2]);
915 e4 = copysignf(c4 * cbrtf(c4) * IQ, in[i+3]);
923 qenergy += (e1*e1 + e2*e2) + (e3*e3 + e4*e4);
931 static void quantize_and_encode_band_cost_NONE_mips(struct AACEncContext *s,
932 PutBitContext *pb, const float *in, float *out,
933 const float *scaled, int size, int scale_idx,
934 int cb, const float lambda, const float uplim,
935 int *bits, float *energy, const float ROUNDING) {
939 static void quantize_and_encode_band_cost_ZERO_mips(struct AACEncContext *s,
940 PutBitContext *pb, const float *in, float *out,
941 const float *scaled, int size, int scale_idx,
942 int cb, const float lambda, const float uplim,
943 int *bits, float *energy, const float ROUNDING) {
948 for (i = 0; i < size; i += 4) {
959 static void (*const quantize_and_encode_band_cost_arr[])(struct AACEncContext *s,
960 PutBitContext *pb, const float *in, float *out,
961 const float *scaled, int size, int scale_idx,
962 int cb, const float lambda, const float uplim,
963 int *bits, float *energy, const float ROUNDING) = {
964 quantize_and_encode_band_cost_ZERO_mips,
965 quantize_and_encode_band_cost_SQUAD_mips,
966 quantize_and_encode_band_cost_SQUAD_mips,
967 quantize_and_encode_band_cost_UQUAD_mips,
968 quantize_and_encode_band_cost_UQUAD_mips,
969 quantize_and_encode_band_cost_SPAIR_mips,
970 quantize_and_encode_band_cost_SPAIR_mips,
971 quantize_and_encode_band_cost_UPAIR7_mips,
972 quantize_and_encode_band_cost_UPAIR7_mips,
973 quantize_and_encode_band_cost_UPAIR12_mips,
974 quantize_and_encode_band_cost_UPAIR12_mips,
975 quantize_and_encode_band_cost_ESC_mips,
976 quantize_and_encode_band_cost_NONE_mips, /* cb 12 doesn't exist */
977 quantize_and_encode_band_cost_ZERO_mips,
978 quantize_and_encode_band_cost_ZERO_mips,
979 quantize_and_encode_band_cost_ZERO_mips,
982 #define quantize_and_encode_band_cost( \
983 s, pb, in, out, scaled, size, scale_idx, cb, \
984 lambda, uplim, bits, energy, ROUNDING) \
985 quantize_and_encode_band_cost_arr[cb]( \
986 s, pb, in, out, scaled, size, scale_idx, cb, \
987 lambda, uplim, bits, energy, ROUNDING)
989 static void quantize_and_encode_band_mips(struct AACEncContext *s, PutBitContext *pb,
990 const float *in, float *out, int size, int scale_idx,
991 int cb, const float lambda, int rtz)
993 quantize_and_encode_band_cost(s, pb, in, out, NULL, size, scale_idx, cb, lambda,
994 INFINITY, NULL, NULL, (rtz) ? ROUND_TO_ZERO : ROUND_STANDARD);
998 * Functions developed from template function and optimized for getting the number of bits
1000 static float get_band_numbits_ZERO_mips(struct AACEncContext *s,
1001 PutBitContext *pb, const float *in,
1002 const float *scaled, int size, int scale_idx,
1003 int cb, const float lambda, const float uplim,
1009 static float get_band_numbits_NONE_mips(struct AACEncContext *s,
1010 PutBitContext *pb, const float *in,
1011 const float *scaled, int size, int scale_idx,
1012 int cb, const float lambda, const float uplim,
1019 static float get_band_numbits_SQUAD_mips(struct AACEncContext *s,
1020 PutBitContext *pb, const float *in,
1021 const float *scaled, int size, int scale_idx,
1022 int cb, const float lambda, const float uplim,
1025 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1027 int qc1, qc2, qc3, qc4;
1030 uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
1032 for (i = 0; i < size; i += 4) {
1034 int *in_int = (int *)&in[i];
1035 int t0, t1, t2, t3, t4, t5, t6, t7;
1037 qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
1038 qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1039 qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1040 qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1044 ".set noreorder \n\t"
1046 "slt %[qc1], $zero, %[qc1] \n\t"
1047 "slt %[qc2], $zero, %[qc2] \n\t"
1048 "slt %[qc3], $zero, %[qc3] \n\t"
1049 "slt %[qc4], $zero, %[qc4] \n\t"
1050 "lw %[t0], 0(%[in_int]) \n\t"
1051 "lw %[t1], 4(%[in_int]) \n\t"
1052 "lw %[t2], 8(%[in_int]) \n\t"
1053 "lw %[t3], 12(%[in_int]) \n\t"
1054 "srl %[t0], %[t0], 31 \n\t"
1055 "srl %[t1], %[t1], 31 \n\t"
1056 "srl %[t2], %[t2], 31 \n\t"
1057 "srl %[t3], %[t3], 31 \n\t"
1058 "subu %[t4], $zero, %[qc1] \n\t"
1059 "subu %[t5], $zero, %[qc2] \n\t"
1060 "subu %[t6], $zero, %[qc3] \n\t"
1061 "subu %[t7], $zero, %[qc4] \n\t"
1062 "movn %[qc1], %[t4], %[t0] \n\t"
1063 "movn %[qc2], %[t5], %[t1] \n\t"
1064 "movn %[qc3], %[t6], %[t2] \n\t"
1065 "movn %[qc4], %[t7], %[t3] \n\t"
1069 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1070 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1071 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1072 [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
1073 : [in_int]"r"(in_int)
1086 curbits += p_bits[curidx];
1091 static float get_band_numbits_UQUAD_mips(struct AACEncContext *s,
1092 PutBitContext *pb, const float *in,
1093 const float *scaled, int size, int scale_idx,
1094 int cb, const float lambda, const float uplim,
1097 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1100 int qc1, qc2, qc3, qc4;
1102 uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
1104 for (i = 0; i < size; i += 4) {
1106 int t0, t1, t2, t3, t4;
1108 qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
1109 qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1110 qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1111 qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1115 ".set noreorder \n\t"
1117 "ori %[t4], $zero, 2 \n\t"
1118 "slt %[t0], %[t4], %[qc1] \n\t"
1119 "slt %[t1], %[t4], %[qc2] \n\t"
1120 "slt %[t2], %[t4], %[qc3] \n\t"
1121 "slt %[t3], %[t4], %[qc4] \n\t"
1122 "movn %[qc1], %[t4], %[t0] \n\t"
1123 "movn %[qc2], %[t4], %[t1] \n\t"
1124 "movn %[qc3], %[t4], %[t2] \n\t"
1125 "movn %[qc4], %[t4], %[t3] \n\t"
1129 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1130 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1131 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1143 curbits += p_bits[curidx];
1144 curbits += uquad_sign_bits[curidx];
1149 static float get_band_numbits_SPAIR_mips(struct AACEncContext *s,
1150 PutBitContext *pb, const float *in,
1151 const float *scaled, int size, int scale_idx,
1152 int cb, const float lambda, const float uplim,
1155 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1157 int qc1, qc2, qc3, qc4;
1160 uint8_t *p_bits = (uint8_t*)ff_aac_spectral_bits[cb-1];
1162 for (i = 0; i < size; i += 4) {
1163 int curidx, curidx2;
1164 int *in_int = (int *)&in[i];
1165 int t0, t1, t2, t3, t4, t5, t6, t7;
1167 qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
1168 qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1169 qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1170 qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1174 ".set noreorder \n\t"
1176 "ori %[t4], $zero, 4 \n\t"
1177 "slt %[t0], %[t4], %[qc1] \n\t"
1178 "slt %[t1], %[t4], %[qc2] \n\t"
1179 "slt %[t2], %[t4], %[qc3] \n\t"
1180 "slt %[t3], %[t4], %[qc4] \n\t"
1181 "movn %[qc1], %[t4], %[t0] \n\t"
1182 "movn %[qc2], %[t4], %[t1] \n\t"
1183 "movn %[qc3], %[t4], %[t2] \n\t"
1184 "movn %[qc4], %[t4], %[t3] \n\t"
1185 "lw %[t0], 0(%[in_int]) \n\t"
1186 "lw %[t1], 4(%[in_int]) \n\t"
1187 "lw %[t2], 8(%[in_int]) \n\t"
1188 "lw %[t3], 12(%[in_int]) \n\t"
1189 "srl %[t0], %[t0], 31 \n\t"
1190 "srl %[t1], %[t1], 31 \n\t"
1191 "srl %[t2], %[t2], 31 \n\t"
1192 "srl %[t3], %[t3], 31 \n\t"
1193 "subu %[t4], $zero, %[qc1] \n\t"
1194 "subu %[t5], $zero, %[qc2] \n\t"
1195 "subu %[t6], $zero, %[qc3] \n\t"
1196 "subu %[t7], $zero, %[qc4] \n\t"
1197 "movn %[qc1], %[t4], %[t0] \n\t"
1198 "movn %[qc2], %[t5], %[t1] \n\t"
1199 "movn %[qc3], %[t6], %[t2] \n\t"
1200 "movn %[qc4], %[t7], %[t3] \n\t"
1204 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1205 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1206 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1207 [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
1208 : [in_int]"r"(in_int)
1216 curidx2 += qc4 + 40;
1218 curbits += p_bits[curidx] + p_bits[curidx2];
1223 static float get_band_numbits_UPAIR7_mips(struct AACEncContext *s,
1224 PutBitContext *pb, const float *in,
1225 const float *scaled, int size, int scale_idx,
1226 int cb, const float lambda, const float uplim,
1229 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1231 int qc1, qc2, qc3, qc4;
1234 uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
1236 for (i = 0; i < size; i += 4) {
1237 int curidx, curidx2;
1238 int t0, t1, t2, t3, t4;
1240 qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
1241 qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1242 qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1243 qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1247 ".set noreorder \n\t"
1249 "ori %[t4], $zero, 7 \n\t"
1250 "slt %[t0], %[t4], %[qc1] \n\t"
1251 "slt %[t1], %[t4], %[qc2] \n\t"
1252 "slt %[t2], %[t4], %[qc3] \n\t"
1253 "slt %[t3], %[t4], %[qc4] \n\t"
1254 "movn %[qc1], %[t4], %[t0] \n\t"
1255 "movn %[qc2], %[t4], %[t1] \n\t"
1256 "movn %[qc3], %[t4], %[t2] \n\t"
1257 "movn %[qc4], %[t4], %[t3] \n\t"
1261 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1262 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1263 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1273 curbits += p_bits[curidx] +
1274 upair7_sign_bits[curidx] +
1276 upair7_sign_bits[curidx2];
1281 static float get_band_numbits_UPAIR12_mips(struct AACEncContext *s,
1282 PutBitContext *pb, const float *in,
1283 const float *scaled, int size, int scale_idx,
1284 int cb, const float lambda, const float uplim,
1287 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1289 int qc1, qc2, qc3, qc4;
1292 uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
1294 for (i = 0; i < size; i += 4) {
1295 int curidx, curidx2;
1296 int t0, t1, t2, t3, t4;
1298 qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
1299 qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1300 qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1301 qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1305 ".set noreorder \n\t"
1307 "ori %[t4], $zero, 12 \n\t"
1308 "slt %[t0], %[t4], %[qc1] \n\t"
1309 "slt %[t1], %[t4], %[qc2] \n\t"
1310 "slt %[t2], %[t4], %[qc3] \n\t"
1311 "slt %[t3], %[t4], %[qc4] \n\t"
1312 "movn %[qc1], %[t4], %[t0] \n\t"
1313 "movn %[qc2], %[t4], %[t1] \n\t"
1314 "movn %[qc3], %[t4], %[t2] \n\t"
1315 "movn %[qc4], %[t4], %[t3] \n\t"
1319 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1320 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1321 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1331 curbits += p_bits[curidx] +
1333 upair12_sign_bits[curidx] +
1334 upair12_sign_bits[curidx2];
1339 static float get_band_numbits_ESC_mips(struct AACEncContext *s,
1340 PutBitContext *pb, const float *in,
1341 const float *scaled, int size, int scale_idx,
1342 int cb, const float lambda, const float uplim,
1345 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1347 int qc1, qc2, qc3, qc4;
1350 uint8_t *p_bits = (uint8_t*)ff_aac_spectral_bits[cb-1];
1352 for (i = 0; i < size; i += 4) {
1353 int curidx, curidx2;
1354 int cond0, cond1, cond2, cond3;
1358 qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
1359 qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1360 qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1361 qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1365 ".set noreorder \n\t"
1367 "ori %[t4], $zero, 15 \n\t"
1368 "ori %[t5], $zero, 16 \n\t"
1369 "shll_s.w %[c1], %[qc1], 18 \n\t"
1370 "shll_s.w %[c2], %[qc2], 18 \n\t"
1371 "shll_s.w %[c3], %[qc3], 18 \n\t"
1372 "shll_s.w %[c4], %[qc4], 18 \n\t"
1373 "srl %[c1], %[c1], 18 \n\t"
1374 "srl %[c2], %[c2], 18 \n\t"
1375 "srl %[c3], %[c3], 18 \n\t"
1376 "srl %[c4], %[c4], 18 \n\t"
1377 "slt %[cond0], %[t4], %[qc1] \n\t"
1378 "slt %[cond1], %[t4], %[qc2] \n\t"
1379 "slt %[cond2], %[t4], %[qc3] \n\t"
1380 "slt %[cond3], %[t4], %[qc4] \n\t"
1381 "movn %[qc1], %[t5], %[cond0] \n\t"
1382 "movn %[qc2], %[t5], %[cond1] \n\t"
1383 "movn %[qc3], %[t5], %[cond2] \n\t"
1384 "movn %[qc4], %[t5], %[cond3] \n\t"
1385 "ori %[t5], $zero, 31 \n\t"
1386 "clz %[c1], %[c1] \n\t"
1387 "clz %[c2], %[c2] \n\t"
1388 "clz %[c3], %[c3] \n\t"
1389 "clz %[c4], %[c4] \n\t"
1390 "subu %[c1], %[t5], %[c1] \n\t"
1391 "subu %[c2], %[t5], %[c2] \n\t"
1392 "subu %[c3], %[t5], %[c3] \n\t"
1393 "subu %[c4], %[t5], %[c4] \n\t"
1394 "sll %[c1], %[c1], 1 \n\t"
1395 "sll %[c2], %[c2], 1 \n\t"
1396 "sll %[c3], %[c3], 1 \n\t"
1397 "sll %[c4], %[c4], 1 \n\t"
1398 "addiu %[c1], %[c1], -3 \n\t"
1399 "addiu %[c2], %[c2], -3 \n\t"
1400 "addiu %[c3], %[c3], -3 \n\t"
1401 "addiu %[c4], %[c4], -3 \n\t"
1402 "subu %[cond0], $zero, %[cond0] \n\t"
1403 "subu %[cond1], $zero, %[cond1] \n\t"
1404 "subu %[cond2], $zero, %[cond2] \n\t"
1405 "subu %[cond3], $zero, %[cond3] \n\t"
1406 "and %[c1], %[c1], %[cond0] \n\t"
1407 "and %[c2], %[c2], %[cond1] \n\t"
1408 "and %[c3], %[c3], %[cond2] \n\t"
1409 "and %[c4], %[c4], %[cond3] \n\t"
1413 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1414 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1415 [cond0]"=&r"(cond0), [cond1]"=&r"(cond1),
1416 [cond2]"=&r"(cond2), [cond3]"=&r"(cond3),
1417 [c1]"=&r"(c1), [c2]"=&r"(c2),
1418 [c3]"=&r"(c3), [c4]"=&r"(c4),
1419 [t4]"=&r"(t4), [t5]"=&r"(t5)
1428 curbits += p_bits[curidx];
1429 curbits += esc_sign_bits[curidx];
1430 curbits += p_bits[curidx2];
1431 curbits += esc_sign_bits[curidx2];
1441 static float (*const get_band_numbits_arr[])(struct AACEncContext *s,
1442 PutBitContext *pb, const float *in,
1443 const float *scaled, int size, int scale_idx,
1444 int cb, const float lambda, const float uplim,
1446 get_band_numbits_ZERO_mips,
1447 get_band_numbits_SQUAD_mips,
1448 get_band_numbits_SQUAD_mips,
1449 get_band_numbits_UQUAD_mips,
1450 get_band_numbits_UQUAD_mips,
1451 get_band_numbits_SPAIR_mips,
1452 get_band_numbits_SPAIR_mips,
1453 get_band_numbits_UPAIR7_mips,
1454 get_band_numbits_UPAIR7_mips,
1455 get_band_numbits_UPAIR12_mips,
1456 get_band_numbits_UPAIR12_mips,
1457 get_band_numbits_ESC_mips,
1458 get_band_numbits_NONE_mips, /* cb 12 doesn't exist */
1459 get_band_numbits_ZERO_mips,
1460 get_band_numbits_ZERO_mips,
1461 get_band_numbits_ZERO_mips,
1464 #define get_band_numbits( \
1465 s, pb, in, scaled, size, scale_idx, cb, \
1466 lambda, uplim, bits) \
1467 get_band_numbits_arr[cb]( \
1468 s, pb, in, scaled, size, scale_idx, cb, \
1469 lambda, uplim, bits)
1471 static float quantize_band_cost_bits(struct AACEncContext *s, const float *in,
1472 const float *scaled, int size, int scale_idx,
1473 int cb, const float lambda, const float uplim,
1474 int *bits, float *energy, int rtz)
1476 return get_band_numbits(s, NULL, in, scaled, size, scale_idx, cb, lambda, uplim, bits);
1480 * Functions developed from template function and optimized for getting the band cost
1483 static float get_band_cost_ZERO_mips(struct AACEncContext *s,
1484 PutBitContext *pb, const float *in,
1485 const float *scaled, int size, int scale_idx,
1486 int cb, const float lambda, const float uplim,
1487 int *bits, float *energy)
1492 for (i = 0; i < size; i += 4) {
1493 cost += in[i ] * in[i ];
1494 cost += in[i+1] * in[i+1];
1495 cost += in[i+2] * in[i+2];
1496 cost += in[i+3] * in[i+3];
1502 return cost * lambda;
1505 static float get_band_cost_NONE_mips(struct AACEncContext *s,
1506 PutBitContext *pb, const float *in,
1507 const float *scaled, int size, int scale_idx,
1508 int cb, const float lambda, const float uplim,
1509 int *bits, float *energy)
1515 static float get_band_cost_SQUAD_mips(struct AACEncContext *s,
1516 PutBitContext *pb, const float *in,
1517 const float *scaled, int size, int scale_idx,
1518 int cb, const float lambda, const float uplim,
1519 int *bits, float *energy)
1521 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1522 const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
1525 float qenergy = 0.0f;
1526 int qc1, qc2, qc3, qc4;
1529 uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
1530 float *p_codes = (float *)ff_aac_codebook_vectors[cb-1];
1532 for (i = 0; i < size; i += 4) {
1535 int *in_int = (int *)&in[i];
1536 float *in_pos = (float *)&in[i];
1537 float di0, di1, di2, di3;
1538 int t0, t1, t2, t3, t4, t5, t6, t7;
1540 qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
1541 qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1542 qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1543 qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1547 ".set noreorder \n\t"
1549 "slt %[qc1], $zero, %[qc1] \n\t"
1550 "slt %[qc2], $zero, %[qc2] \n\t"
1551 "slt %[qc3], $zero, %[qc3] \n\t"
1552 "slt %[qc4], $zero, %[qc4] \n\t"
1553 "lw %[t0], 0(%[in_int]) \n\t"
1554 "lw %[t1], 4(%[in_int]) \n\t"
1555 "lw %[t2], 8(%[in_int]) \n\t"
1556 "lw %[t3], 12(%[in_int]) \n\t"
1557 "srl %[t0], %[t0], 31 \n\t"
1558 "srl %[t1], %[t1], 31 \n\t"
1559 "srl %[t2], %[t2], 31 \n\t"
1560 "srl %[t3], %[t3], 31 \n\t"
1561 "subu %[t4], $zero, %[qc1] \n\t"
1562 "subu %[t5], $zero, %[qc2] \n\t"
1563 "subu %[t6], $zero, %[qc3] \n\t"
1564 "subu %[t7], $zero, %[qc4] \n\t"
1565 "movn %[qc1], %[t4], %[t0] \n\t"
1566 "movn %[qc2], %[t5], %[t1] \n\t"
1567 "movn %[qc3], %[t6], %[t2] \n\t"
1568 "movn %[qc4], %[t7], %[t3] \n\t"
1572 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1573 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1574 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1575 [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
1576 : [in_int]"r"(in_int)
1589 curbits += p_bits[curidx];
1590 vec = &p_codes[curidx*4];
1592 qenergy += vec[0]*vec[0] + vec[1]*vec[1]
1593 + vec[2]*vec[2] + vec[3]*vec[3];
1597 ".set noreorder \n\t"
1599 "lwc1 $f0, 0(%[in_pos]) \n\t"
1600 "lwc1 $f1, 0(%[vec]) \n\t"
1601 "lwc1 $f2, 4(%[in_pos]) \n\t"
1602 "lwc1 $f3, 4(%[vec]) \n\t"
1603 "lwc1 $f4, 8(%[in_pos]) \n\t"
1604 "lwc1 $f5, 8(%[vec]) \n\t"
1605 "lwc1 $f6, 12(%[in_pos]) \n\t"
1606 "lwc1 $f7, 12(%[vec]) \n\t"
1607 "nmsub.s %[di0], $f0, $f1, %[IQ] \n\t"
1608 "nmsub.s %[di1], $f2, $f3, %[IQ] \n\t"
1609 "nmsub.s %[di2], $f4, $f5, %[IQ] \n\t"
1610 "nmsub.s %[di3], $f6, $f7, %[IQ] \n\t"
1614 : [di0]"=&f"(di0), [di1]"=&f"(di1),
1615 [di2]"=&f"(di2), [di3]"=&f"(di3)
1616 : [in_pos]"r"(in_pos), [vec]"r"(vec),
1618 : "$f0", "$f1", "$f2", "$f3",
1619 "$f4", "$f5", "$f6", "$f7",
1623 cost += di0 * di0 + di1 * di1
1624 + di2 * di2 + di3 * di3;
1630 *energy = qenergy * (IQ*IQ);
1631 return cost * lambda + curbits;
1634 static float get_band_cost_UQUAD_mips(struct AACEncContext *s,
1635 PutBitContext *pb, const float *in,
1636 const float *scaled, int size, int scale_idx,
1637 int cb, const float lambda, const float uplim,
1638 int *bits, float *energy)
1640 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1641 const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
1644 float qenergy = 0.0f;
1646 int qc1, qc2, qc3, qc4;
1648 uint8_t *p_bits = (uint8_t*)ff_aac_spectral_bits[cb-1];
1649 float *p_codes = (float *)ff_aac_codebook_vectors[cb-1];
1651 for (i = 0; i < size; i += 4) {
1654 float *in_pos = (float *)&in[i];
1655 float di0, di1, di2, di3;
1656 int t0, t1, t2, t3, t4;
1658 qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
1659 qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1660 qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1661 qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1665 ".set noreorder \n\t"
1667 "ori %[t4], $zero, 2 \n\t"
1668 "slt %[t0], %[t4], %[qc1] \n\t"
1669 "slt %[t1], %[t4], %[qc2] \n\t"
1670 "slt %[t2], %[t4], %[qc3] \n\t"
1671 "slt %[t3], %[t4], %[qc4] \n\t"
1672 "movn %[qc1], %[t4], %[t0] \n\t"
1673 "movn %[qc2], %[t4], %[t1] \n\t"
1674 "movn %[qc3], %[t4], %[t2] \n\t"
1675 "movn %[qc4], %[t4], %[t3] \n\t"
1679 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1680 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1681 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1693 curbits += p_bits[curidx];
1694 curbits += uquad_sign_bits[curidx];
1695 vec = &p_codes[curidx*4];
1697 qenergy += vec[0]*vec[0] + vec[1]*vec[1]
1698 + vec[2]*vec[2] + vec[3]*vec[3];
1702 ".set noreorder \n\t"
1704 "lwc1 %[di0], 0(%[in_pos]) \n\t"
1705 "lwc1 %[di1], 4(%[in_pos]) \n\t"
1706 "lwc1 %[di2], 8(%[in_pos]) \n\t"
1707 "lwc1 %[di3], 12(%[in_pos]) \n\t"
1708 "abs.s %[di0], %[di0] \n\t"
1709 "abs.s %[di1], %[di1] \n\t"
1710 "abs.s %[di2], %[di2] \n\t"
1711 "abs.s %[di3], %[di3] \n\t"
1712 "lwc1 $f0, 0(%[vec]) \n\t"
1713 "lwc1 $f1, 4(%[vec]) \n\t"
1714 "lwc1 $f2, 8(%[vec]) \n\t"
1715 "lwc1 $f3, 12(%[vec]) \n\t"
1716 "nmsub.s %[di0], %[di0], $f0, %[IQ] \n\t"
1717 "nmsub.s %[di1], %[di1], $f1, %[IQ] \n\t"
1718 "nmsub.s %[di2], %[di2], $f2, %[IQ] \n\t"
1719 "nmsub.s %[di3], %[di3], $f3, %[IQ] \n\t"
1723 : [di0]"=&f"(di0), [di1]"=&f"(di1),
1724 [di2]"=&f"(di2), [di3]"=&f"(di3)
1725 : [in_pos]"r"(in_pos), [vec]"r"(vec),
1727 : "$f0", "$f1", "$f2", "$f3",
1731 cost += di0 * di0 + di1 * di1
1732 + di2 * di2 + di3 * di3;
1738 *energy = qenergy * (IQ*IQ);
1739 return cost * lambda + curbits;
1742 static float get_band_cost_SPAIR_mips(struct AACEncContext *s,
1743 PutBitContext *pb, const float *in,
1744 const float *scaled, int size, int scale_idx,
1745 int cb, const float lambda, const float uplim,
1746 int *bits, float *energy)
1748 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1749 const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
1752 float qenergy = 0.0f;
1753 int qc1, qc2, qc3, qc4;
1756 uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
1757 float *p_codes = (float *)ff_aac_codebook_vectors[cb-1];
1759 for (i = 0; i < size; i += 4) {
1760 const float *vec, *vec2;
1761 int curidx, curidx2;
1762 int *in_int = (int *)&in[i];
1763 float *in_pos = (float *)&in[i];
1764 float di0, di1, di2, di3;
1765 int t0, t1, t2, t3, t4, t5, t6, t7;
1767 qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
1768 qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1769 qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1770 qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1774 ".set noreorder \n\t"
1776 "ori %[t4], $zero, 4 \n\t"
1777 "slt %[t0], %[t4], %[qc1] \n\t"
1778 "slt %[t1], %[t4], %[qc2] \n\t"
1779 "slt %[t2], %[t4], %[qc3] \n\t"
1780 "slt %[t3], %[t4], %[qc4] \n\t"
1781 "movn %[qc1], %[t4], %[t0] \n\t"
1782 "movn %[qc2], %[t4], %[t1] \n\t"
1783 "movn %[qc3], %[t4], %[t2] \n\t"
1784 "movn %[qc4], %[t4], %[t3] \n\t"
1785 "lw %[t0], 0(%[in_int]) \n\t"
1786 "lw %[t1], 4(%[in_int]) \n\t"
1787 "lw %[t2], 8(%[in_int]) \n\t"
1788 "lw %[t3], 12(%[in_int]) \n\t"
1789 "srl %[t0], %[t0], 31 \n\t"
1790 "srl %[t1], %[t1], 31 \n\t"
1791 "srl %[t2], %[t2], 31 \n\t"
1792 "srl %[t3], %[t3], 31 \n\t"
1793 "subu %[t4], $zero, %[qc1] \n\t"
1794 "subu %[t5], $zero, %[qc2] \n\t"
1795 "subu %[t6], $zero, %[qc3] \n\t"
1796 "subu %[t7], $zero, %[qc4] \n\t"
1797 "movn %[qc1], %[t4], %[t0] \n\t"
1798 "movn %[qc2], %[t5], %[t1] \n\t"
1799 "movn %[qc3], %[t6], %[t2] \n\t"
1800 "movn %[qc4], %[t7], %[t3] \n\t"
1804 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1805 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1806 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1807 [t4]"=&r"(t4), [t5]"=&r"(t5), [t6]"=&r"(t6), [t7]"=&r"(t7)
1808 : [in_int]"r"(in_int)
1816 curidx2 += qc4 + 40;
1818 curbits += p_bits[curidx];
1819 curbits += p_bits[curidx2];
1821 vec = &p_codes[curidx*2];
1822 vec2 = &p_codes[curidx2*2];
1824 qenergy += vec[0]*vec[0] + vec[1]*vec[1]
1825 + vec2[0]*vec2[0] + vec2[1]*vec2[1];
1829 ".set noreorder \n\t"
1831 "lwc1 $f0, 0(%[in_pos]) \n\t"
1832 "lwc1 $f1, 0(%[vec]) \n\t"
1833 "lwc1 $f2, 4(%[in_pos]) \n\t"
1834 "lwc1 $f3, 4(%[vec]) \n\t"
1835 "lwc1 $f4, 8(%[in_pos]) \n\t"
1836 "lwc1 $f5, 0(%[vec2]) \n\t"
1837 "lwc1 $f6, 12(%[in_pos]) \n\t"
1838 "lwc1 $f7, 4(%[vec2]) \n\t"
1839 "nmsub.s %[di0], $f0, $f1, %[IQ] \n\t"
1840 "nmsub.s %[di1], $f2, $f3, %[IQ] \n\t"
1841 "nmsub.s %[di2], $f4, $f5, %[IQ] \n\t"
1842 "nmsub.s %[di3], $f6, $f7, %[IQ] \n\t"
1846 : [di0]"=&f"(di0), [di1]"=&f"(di1),
1847 [di2]"=&f"(di2), [di3]"=&f"(di3)
1848 : [in_pos]"r"(in_pos), [vec]"r"(vec),
1849 [vec2]"r"(vec2), [IQ]"f"(IQ)
1850 : "$f0", "$f1", "$f2", "$f3",
1851 "$f4", "$f5", "$f6", "$f7",
1855 cost += di0 * di0 + di1 * di1
1856 + di2 * di2 + di3 * di3;
1862 *energy = qenergy * (IQ*IQ);
1863 return cost * lambda + curbits;
1866 static float get_band_cost_UPAIR7_mips(struct AACEncContext *s,
1867 PutBitContext *pb, const float *in,
1868 const float *scaled, int size, int scale_idx,
1869 int cb, const float lambda, const float uplim,
1870 int *bits, float *energy)
1872 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
1873 const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
1876 float qenergy = 0.0f;
1877 int qc1, qc2, qc3, qc4;
1880 uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
1881 float *p_codes = (float *)ff_aac_codebook_vectors[cb-1];
1883 for (i = 0; i < size; i += 4) {
1884 const float *vec, *vec2;
1885 int curidx, curidx2, sign1, count1, sign2, count2;
1886 int *in_int = (int *)&in[i];
1887 float *in_pos = (float *)&in[i];
1888 float di0, di1, di2, di3;
1889 int t0, t1, t2, t3, t4;
1891 qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
1892 qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
1893 qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
1894 qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
1898 ".set noreorder \n\t"
1900 "ori %[t4], $zero, 7 \n\t"
1901 "ori %[sign1], $zero, 0 \n\t"
1902 "ori %[sign2], $zero, 0 \n\t"
1903 "slt %[t0], %[t4], %[qc1] \n\t"
1904 "slt %[t1], %[t4], %[qc2] \n\t"
1905 "slt %[t2], %[t4], %[qc3] \n\t"
1906 "slt %[t3], %[t4], %[qc4] \n\t"
1907 "movn %[qc1], %[t4], %[t0] \n\t"
1908 "movn %[qc2], %[t4], %[t1] \n\t"
1909 "movn %[qc3], %[t4], %[t2] \n\t"
1910 "movn %[qc4], %[t4], %[t3] \n\t"
1911 "lw %[t0], 0(%[in_int]) \n\t"
1912 "lw %[t1], 4(%[in_int]) \n\t"
1913 "lw %[t2], 8(%[in_int]) \n\t"
1914 "lw %[t3], 12(%[in_int]) \n\t"
1915 "slt %[t0], %[t0], $zero \n\t"
1916 "movn %[sign1], %[t0], %[qc1] \n\t"
1917 "slt %[t2], %[t2], $zero \n\t"
1918 "movn %[sign2], %[t2], %[qc3] \n\t"
1919 "slt %[t1], %[t1], $zero \n\t"
1920 "sll %[t0], %[sign1], 1 \n\t"
1921 "or %[t0], %[t0], %[t1] \n\t"
1922 "movn %[sign1], %[t0], %[qc2] \n\t"
1923 "slt %[t3], %[t3], $zero \n\t"
1924 "sll %[t0], %[sign2], 1 \n\t"
1925 "or %[t0], %[t0], %[t3] \n\t"
1926 "movn %[sign2], %[t0], %[qc4] \n\t"
1927 "slt %[count1], $zero, %[qc1] \n\t"
1928 "slt %[t1], $zero, %[qc2] \n\t"
1929 "slt %[count2], $zero, %[qc3] \n\t"
1930 "slt %[t2], $zero, %[qc4] \n\t"
1931 "addu %[count1], %[count1], %[t1] \n\t"
1932 "addu %[count2], %[count2], %[t2] \n\t"
1936 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
1937 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
1938 [sign1]"=&r"(sign1), [count1]"=&r"(count1),
1939 [sign2]"=&r"(sign2), [count2]"=&r"(count2),
1940 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
1942 : [in_int]"r"(in_int)
1952 curbits += p_bits[curidx];
1953 curbits += upair7_sign_bits[curidx];
1954 vec = &p_codes[curidx*2];
1956 curbits += p_bits[curidx2];
1957 curbits += upair7_sign_bits[curidx2];
1958 vec2 = &p_codes[curidx2*2];
1960 qenergy += vec[0]*vec[0] + vec[1]*vec[1]
1961 + vec2[0]*vec2[0] + vec2[1]*vec2[1];
1965 ".set noreorder \n\t"
1967 "lwc1 %[di0], 0(%[in_pos]) \n\t"
1968 "lwc1 %[di1], 4(%[in_pos]) \n\t"
1969 "lwc1 %[di2], 8(%[in_pos]) \n\t"
1970 "lwc1 %[di3], 12(%[in_pos]) \n\t"
1971 "abs.s %[di0], %[di0] \n\t"
1972 "abs.s %[di1], %[di1] \n\t"
1973 "abs.s %[di2], %[di2] \n\t"
1974 "abs.s %[di3], %[di3] \n\t"
1975 "lwc1 $f0, 0(%[vec]) \n\t"
1976 "lwc1 $f1, 4(%[vec]) \n\t"
1977 "lwc1 $f2, 0(%[vec2]) \n\t"
1978 "lwc1 $f3, 4(%[vec2]) \n\t"
1979 "nmsub.s %[di0], %[di0], $f0, %[IQ] \n\t"
1980 "nmsub.s %[di1], %[di1], $f1, %[IQ] \n\t"
1981 "nmsub.s %[di2], %[di2], $f2, %[IQ] \n\t"
1982 "nmsub.s %[di3], %[di3], $f3, %[IQ] \n\t"
1986 : [di0]"=&f"(di0), [di1]"=&f"(di1),
1987 [di2]"=&f"(di2), [di3]"=&f"(di3)
1988 : [in_pos]"r"(in_pos), [vec]"r"(vec),
1989 [vec2]"r"(vec2), [IQ]"f"(IQ)
1990 : "$f0", "$f1", "$f2", "$f3",
1994 cost += di0 * di0 + di1 * di1
1995 + di2 * di2 + di3 * di3;
2001 *energy = qenergy * (IQ*IQ);
2002 return cost * lambda + curbits;
2005 static float get_band_cost_UPAIR12_mips(struct AACEncContext *s,
2006 PutBitContext *pb, const float *in,
2007 const float *scaled, int size, int scale_idx,
2008 int cb, const float lambda, const float uplim,
2009 int *bits, float *energy)
2011 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
2012 const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
2015 float qenergy = 0.0f;
2016 int qc1, qc2, qc3, qc4;
2019 uint8_t *p_bits = (uint8_t *)ff_aac_spectral_bits[cb-1];
2020 float *p_codes = (float *)ff_aac_codebook_vectors[cb-1];
2022 for (i = 0; i < size; i += 4) {
2023 const float *vec, *vec2;
2024 int curidx, curidx2;
2025 int sign1, count1, sign2, count2;
2026 int *in_int = (int *)&in[i];
2027 float *in_pos = (float *)&in[i];
2028 float di0, di1, di2, di3;
2029 int t0, t1, t2, t3, t4;
2031 qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
2032 qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
2033 qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
2034 qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
2038 ".set noreorder \n\t"
2040 "ori %[t4], $zero, 12 \n\t"
2041 "ori %[sign1], $zero, 0 \n\t"
2042 "ori %[sign2], $zero, 0 \n\t"
2043 "slt %[t0], %[t4], %[qc1] \n\t"
2044 "slt %[t1], %[t4], %[qc2] \n\t"
2045 "slt %[t2], %[t4], %[qc3] \n\t"
2046 "slt %[t3], %[t4], %[qc4] \n\t"
2047 "movn %[qc1], %[t4], %[t0] \n\t"
2048 "movn %[qc2], %[t4], %[t1] \n\t"
2049 "movn %[qc3], %[t4], %[t2] \n\t"
2050 "movn %[qc4], %[t4], %[t3] \n\t"
2051 "lw %[t0], 0(%[in_int]) \n\t"
2052 "lw %[t1], 4(%[in_int]) \n\t"
2053 "lw %[t2], 8(%[in_int]) \n\t"
2054 "lw %[t3], 12(%[in_int]) \n\t"
2055 "slt %[t0], %[t0], $zero \n\t"
2056 "movn %[sign1], %[t0], %[qc1] \n\t"
2057 "slt %[t2], %[t2], $zero \n\t"
2058 "movn %[sign2], %[t2], %[qc3] \n\t"
2059 "slt %[t1], %[t1], $zero \n\t"
2060 "sll %[t0], %[sign1], 1 \n\t"
2061 "or %[t0], %[t0], %[t1] \n\t"
2062 "movn %[sign1], %[t0], %[qc2] \n\t"
2063 "slt %[t3], %[t3], $zero \n\t"
2064 "sll %[t0], %[sign2], 1 \n\t"
2065 "or %[t0], %[t0], %[t3] \n\t"
2066 "movn %[sign2], %[t0], %[qc4] \n\t"
2067 "slt %[count1], $zero, %[qc1] \n\t"
2068 "slt %[t1], $zero, %[qc2] \n\t"
2069 "slt %[count2], $zero, %[qc3] \n\t"
2070 "slt %[t2], $zero, %[qc4] \n\t"
2071 "addu %[count1], %[count1], %[t1] \n\t"
2072 "addu %[count2], %[count2], %[t2] \n\t"
2076 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
2077 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
2078 [sign1]"=&r"(sign1), [count1]"=&r"(count1),
2079 [sign2]"=&r"(sign2), [count2]"=&r"(count2),
2080 [t0]"=&r"(t0), [t1]"=&r"(t1), [t2]"=&r"(t2), [t3]"=&r"(t3),
2082 : [in_int]"r"(in_int)
2092 curbits += p_bits[curidx];
2093 curbits += p_bits[curidx2];
2094 curbits += upair12_sign_bits[curidx];
2095 curbits += upair12_sign_bits[curidx2];
2096 vec = &p_codes[curidx*2];
2097 vec2 = &p_codes[curidx2*2];
2099 qenergy += vec[0]*vec[0] + vec[1]*vec[1]
2100 + vec2[0]*vec2[0] + vec2[1]*vec2[1];
2104 ".set noreorder \n\t"
2106 "lwc1 %[di0], 0(%[in_pos]) \n\t"
2107 "lwc1 %[di1], 4(%[in_pos]) \n\t"
2108 "lwc1 %[di2], 8(%[in_pos]) \n\t"
2109 "lwc1 %[di3], 12(%[in_pos]) \n\t"
2110 "abs.s %[di0], %[di0] \n\t"
2111 "abs.s %[di1], %[di1] \n\t"
2112 "abs.s %[di2], %[di2] \n\t"
2113 "abs.s %[di3], %[di3] \n\t"
2114 "lwc1 $f0, 0(%[vec]) \n\t"
2115 "lwc1 $f1, 4(%[vec]) \n\t"
2116 "lwc1 $f2, 0(%[vec2]) \n\t"
2117 "lwc1 $f3, 4(%[vec2]) \n\t"
2118 "nmsub.s %[di0], %[di0], $f0, %[IQ] \n\t"
2119 "nmsub.s %[di1], %[di1], $f1, %[IQ] \n\t"
2120 "nmsub.s %[di2], %[di2], $f2, %[IQ] \n\t"
2121 "nmsub.s %[di3], %[di3], $f3, %[IQ] \n\t"
2125 : [di0]"=&f"(di0), [di1]"=&f"(di1),
2126 [di2]"=&f"(di2), [di3]"=&f"(di3)
2127 : [in_pos]"r"(in_pos), [vec]"r"(vec),
2128 [vec2]"r"(vec2), [IQ]"f"(IQ)
2129 : "$f0", "$f1", "$f2", "$f3",
2133 cost += di0 * di0 + di1 * di1
2134 + di2 * di2 + di3 * di3;
2140 *energy = qenergy * (IQ*IQ);
2141 return cost * lambda + curbits;
2144 static float get_band_cost_ESC_mips(struct AACEncContext *s,
2145 PutBitContext *pb, const float *in,
2146 const float *scaled, int size, int scale_idx,
2147 int cb, const float lambda, const float uplim,
2148 int *bits, float *energy)
2150 const float Q34 = ff_aac_pow34sf_tab[POW_SF2_ZERO - scale_idx + SCALE_ONE_POS - SCALE_DIV_512];
2151 const float IQ = ff_aac_pow2sf_tab [POW_SF2_ZERO + scale_idx - SCALE_ONE_POS + SCALE_DIV_512];
2152 const float CLIPPED_ESCAPE = 165140.0f * IQ;
2155 float qenergy = 0.0f;
2156 int qc1, qc2, qc3, qc4;
2159 uint8_t *p_bits = (uint8_t*)ff_aac_spectral_bits[cb-1];
2160 float *p_codes = (float* )ff_aac_codebook_vectors[cb-1];
2162 for (i = 0; i < size; i += 4) {
2163 const float *vec, *vec2;
2164 int curidx, curidx2;
2165 float t1, t2, t3, t4, V;
2166 float di1, di2, di3, di4;
2167 int cond0, cond1, cond2, cond3;
2171 qc1 = scaled[i ] * Q34 + ROUND_STANDARD;
2172 qc2 = scaled[i+1] * Q34 + ROUND_STANDARD;
2173 qc3 = scaled[i+2] * Q34 + ROUND_STANDARD;
2174 qc4 = scaled[i+3] * Q34 + ROUND_STANDARD;
2178 ".set noreorder \n\t"
2180 "ori %[t6], $zero, 15 \n\t"
2181 "ori %[t7], $zero, 16 \n\t"
2182 "shll_s.w %[c1], %[qc1], 18 \n\t"
2183 "shll_s.w %[c2], %[qc2], 18 \n\t"
2184 "shll_s.w %[c3], %[qc3], 18 \n\t"
2185 "shll_s.w %[c4], %[qc4], 18 \n\t"
2186 "srl %[c1], %[c1], 18 \n\t"
2187 "srl %[c2], %[c2], 18 \n\t"
2188 "srl %[c3], %[c3], 18 \n\t"
2189 "srl %[c4], %[c4], 18 \n\t"
2190 "slt %[cond0], %[t6], %[qc1] \n\t"
2191 "slt %[cond1], %[t6], %[qc2] \n\t"
2192 "slt %[cond2], %[t6], %[qc3] \n\t"
2193 "slt %[cond3], %[t6], %[qc4] \n\t"
2194 "movn %[qc1], %[t7], %[cond0] \n\t"
2195 "movn %[qc2], %[t7], %[cond1] \n\t"
2196 "movn %[qc3], %[t7], %[cond2] \n\t"
2197 "movn %[qc4], %[t7], %[cond3] \n\t"
2201 : [qc1]"+r"(qc1), [qc2]"+r"(qc2),
2202 [qc3]"+r"(qc3), [qc4]"+r"(qc4),
2203 [cond0]"=&r"(cond0), [cond1]"=&r"(cond1),
2204 [cond2]"=&r"(cond2), [cond3]"=&r"(cond3),
2205 [c1]"=&r"(c1), [c2]"=&r"(c2),
2206 [c3]"=&r"(c3), [c4]"=&r"(c4),
2207 [t6]"=&r"(t6), [t7]"=&r"(t7)
2216 curbits += p_bits[curidx];
2217 curbits += esc_sign_bits[curidx];
2218 vec = &p_codes[curidx*2];
2220 curbits += p_bits[curidx2];
2221 curbits += esc_sign_bits[curidx2];
2222 vec2 = &p_codes[curidx2*2];
2224 curbits += (av_log2(c1) * 2 - 3) & (-cond0);
2225 curbits += (av_log2(c2) * 2 - 3) & (-cond1);
2226 curbits += (av_log2(c3) * 2 - 3) & (-cond2);
2227 curbits += (av_log2(c4) * 2 - 3) & (-cond3);
2230 t2 = fabsf(in[i+1]);
2231 t3 = fabsf(in[i+2]);
2232 t4 = fabsf(in[i+3]);
2235 if (t1 >= CLIPPED_ESCAPE) {
2236 di1 = t1 - CLIPPED_ESCAPE;
2237 qenergy += CLIPPED_ESCAPE*CLIPPED_ESCAPE;
2239 di1 = t1 - (V = c1 * cbrtf(c1) * IQ);
2243 di1 = t1 - (V = vec[0] * IQ);
2248 if (t2 >= CLIPPED_ESCAPE) {
2249 di2 = t2 - CLIPPED_ESCAPE;
2250 qenergy += CLIPPED_ESCAPE*CLIPPED_ESCAPE;
2252 di2 = t2 - (V = c2 * cbrtf(c2) * IQ);
2256 di2 = t2 - (V = vec[1] * IQ);
2261 if (t3 >= CLIPPED_ESCAPE) {
2262 di3 = t3 - CLIPPED_ESCAPE;
2263 qenergy += CLIPPED_ESCAPE*CLIPPED_ESCAPE;
2265 di3 = t3 - (V = c3 * cbrtf(c3) * IQ);
2269 di3 = t3 - (V = vec2[0] * IQ);
2274 if (t4 >= CLIPPED_ESCAPE) {
2275 di4 = t4 - CLIPPED_ESCAPE;
2276 qenergy += CLIPPED_ESCAPE*CLIPPED_ESCAPE;
2278 di4 = t4 - (V = c4 * cbrtf(c4) * IQ);
2282 di4 = t4 - (V = vec2[1]*IQ);
2286 cost += di1 * di1 + di2 * di2
2287 + di3 * di3 + di4 * di4;
2292 return cost * lambda + curbits;
2295 static float (*const get_band_cost_arr[])(struct AACEncContext *s,
2296 PutBitContext *pb, const float *in,
2297 const float *scaled, int size, int scale_idx,
2298 int cb, const float lambda, const float uplim,
2299 int *bits, float *energy) = {
2300 get_band_cost_ZERO_mips,
2301 get_band_cost_SQUAD_mips,
2302 get_band_cost_SQUAD_mips,
2303 get_band_cost_UQUAD_mips,
2304 get_band_cost_UQUAD_mips,
2305 get_band_cost_SPAIR_mips,
2306 get_band_cost_SPAIR_mips,
2307 get_band_cost_UPAIR7_mips,
2308 get_band_cost_UPAIR7_mips,
2309 get_band_cost_UPAIR12_mips,
2310 get_band_cost_UPAIR12_mips,
2311 get_band_cost_ESC_mips,
2312 get_band_cost_NONE_mips, /* cb 12 doesn't exist */
2313 get_band_cost_ZERO_mips,
2314 get_band_cost_ZERO_mips,
2315 get_band_cost_ZERO_mips,
2318 #define get_band_cost( \
2319 s, pb, in, scaled, size, scale_idx, cb, \
2320 lambda, uplim, bits, energy) \
2321 get_band_cost_arr[cb]( \
2322 s, pb, in, scaled, size, scale_idx, cb, \
2323 lambda, uplim, bits, energy)
2325 static float quantize_band_cost(struct AACEncContext *s, const float *in,
2326 const float *scaled, int size, int scale_idx,
2327 int cb, const float lambda, const float uplim,
2328 int *bits, float *energy, int rtz)
2330 return get_band_cost(s, NULL, in, scaled, size, scale_idx, cb, lambda, uplim, bits, energy);
2333 #include "libavcodec/aacenc_quantization_misc.h"
2335 #include "libavcodec/aaccoder_twoloop.h"
2337 static void search_for_ms_mips(AACEncContext *s, ChannelElement *cpe)
2339 int start = 0, i, w, w2, g;
2340 float M[128], S[128];
2341 float *L34 = s->scoefs, *R34 = s->scoefs + 128, *M34 = s->scoefs + 128*2, *S34 = s->scoefs + 128*3;
2342 const float lambda = s->lambda;
2343 SingleChannelElement *sce0 = &cpe->ch[0];
2344 SingleChannelElement *sce1 = &cpe->ch[1];
2345 if (!cpe->common_window)
2347 for (w = 0; w < sce0->ics.num_windows; w += sce0->ics.group_len[w]) {
2349 for (g = 0; g < sce0->ics.num_swb; g++) {
2350 if (!cpe->ch[0].zeroes[w*16+g] && !cpe->ch[1].zeroes[w*16+g]) {
2351 float dist1 = 0.0f, dist2 = 0.0f;
2352 for (w2 = 0; w2 < sce0->ics.group_len[w]; w2++) {
2353 FFPsyBand *band0 = &s->psy.ch[s->cur_channel+0].psy_bands[(w+w2)*16+g];
2354 FFPsyBand *band1 = &s->psy.ch[s->cur_channel+1].psy_bands[(w+w2)*16+g];
2355 float minthr = FFMIN(band0->threshold, band1->threshold);
2356 float maxthr = FFMAX(band0->threshold, band1->threshold);
2357 for (i = 0; i < sce0->ics.swb_sizes[g]; i+=4) {
2358 M[i ] = (sce0->coeffs[start+w2*128+i ]
2359 + sce1->coeffs[start+w2*128+i ]) * 0.5;
2360 M[i+1] = (sce0->coeffs[start+w2*128+i+1]
2361 + sce1->coeffs[start+w2*128+i+1]) * 0.5;
2362 M[i+2] = (sce0->coeffs[start+w2*128+i+2]
2363 + sce1->coeffs[start+w2*128+i+2]) * 0.5;
2364 M[i+3] = (sce0->coeffs[start+w2*128+i+3]
2365 + sce1->coeffs[start+w2*128+i+3]) * 0.5;
2368 - sce1->coeffs[start+w2*128+i ];
2370 - sce1->coeffs[start+w2*128+i+1];
2372 - sce1->coeffs[start+w2*128+i+2];
2374 - sce1->coeffs[start+w2*128+i+3];
2376 abs_pow34_v(L34, sce0->coeffs+start+(w+w2)*128, sce0->ics.swb_sizes[g]);
2377 abs_pow34_v(R34, sce1->coeffs+start+(w+w2)*128, sce0->ics.swb_sizes[g]);
2378 abs_pow34_v(M34, M, sce0->ics.swb_sizes[g]);
2379 abs_pow34_v(S34, S, sce0->ics.swb_sizes[g]);
2380 dist1 += quantize_band_cost(s, &sce0->coeffs[start + (w+w2)*128],
2382 sce0->ics.swb_sizes[g],
2383 sce0->sf_idx[(w+w2)*16+g],
2384 sce0->band_type[(w+w2)*16+g],
2385 lambda / band0->threshold, INFINITY, NULL, NULL, 0);
2386 dist1 += quantize_band_cost(s, &sce1->coeffs[start + (w+w2)*128],
2388 sce1->ics.swb_sizes[g],
2389 sce1->sf_idx[(w+w2)*16+g],
2390 sce1->band_type[(w+w2)*16+g],
2391 lambda / band1->threshold, INFINITY, NULL, NULL, 0);
2392 dist2 += quantize_band_cost(s, M,
2394 sce0->ics.swb_sizes[g],
2395 sce0->sf_idx[(w+w2)*16+g],
2396 sce0->band_type[(w+w2)*16+g],
2397 lambda / maxthr, INFINITY, NULL, NULL, 0);
2398 dist2 += quantize_band_cost(s, S,
2400 sce1->ics.swb_sizes[g],
2401 sce1->sf_idx[(w+w2)*16+g],
2402 sce1->band_type[(w+w2)*16+g],
2403 lambda / minthr, INFINITY, NULL, NULL, 0);
2405 cpe->ms_mask[w*16+g] = dist2 < dist1;
2407 start += sce0->ics.swb_sizes[g];
2411 #endif /*HAVE_MIPSFPU */
2413 #include "libavcodec/aaccoder_trellis.h"
2415 #endif /* HAVE_INLINE_ASM */
2417 void ff_aac_coder_init_mips(AACEncContext *c) {
2419 AACCoefficientsEncoder *e = c->coder;
2420 int option = c->options.coder;
2423 e->quantize_and_encode_band = quantize_and_encode_band_mips;
2424 e->encode_window_bands_info = codebook_trellis_rate;
2426 e->search_for_quantizers = search_for_quantizers_twoloop;
2427 #endif /* HAVE_MIPSFPU */
2430 e->search_for_ms = search_for_ms_mips;
2431 #endif /* HAVE_MIPSFPU */
2432 #endif /* HAVE_INLINE_ASM */