]> git.sesse.net Git - ffmpeg/blob - libavcodec/mips/aacsbr_mips.c
Merge commit '7e5bde93a1e7641e1622814dafac0be3f413d79b'
[ffmpeg] / libavcodec / mips / aacsbr_mips.c
1 /*
2  * Copyright (c) 2012
3  *      MIPS Technologies, Inc., California.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
14  *    contributors may be used to endorse or promote products derived from
15  *    this software without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  * Authors:  Djordje Pesut   (djordje@mips.com)
30  *           Mirjana Vulin   (mvulin@mips.com)
31  *
32  * This file is part of FFmpeg.
33  *
34  * FFmpeg is free software; you can redistribute it and/or
35  * modify it under the terms of the GNU Lesser General Public
36  * License as published by the Free Software Foundation; either
37  * version 2.1 of the License, or (at your option) any later version.
38  *
39  * FFmpeg is distributed in the hope that it will be useful,
40  * but WITHOUT ANY WARRANTY; without even the implied warranty of
41  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
42  * Lesser General Public License for more details.
43  *
44  * You should have received a copy of the GNU Lesser General Public
45  * License along with FFmpeg; if not, write to the Free Software
46  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
47  */
48
49 /**
50  * @file
51  * Reference: libavcodec/aacsbr.c
52  */
53
54 #include "libavcodec/aac.h"
55 #include "libavcodec/aacsbr.h"
56 #include "libavutil/mips/asmdefs.h"
57
58 #define ENVELOPE_ADJUSTMENT_OFFSET 2
59
60 #if HAVE_INLINE_ASM
61 static int sbr_lf_gen_mips(AACContext *ac, SpectralBandReplication *sbr,
62                       float X_low[32][40][2], const float W[2][32][32][2],
63                       int buf_idx)
64 {
65     int i, k;
66     int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
67     float *p_x_low = &X_low[0][8][0];
68     float *p_w = (float*)&W[buf_idx][0][0][0];
69     float *p_x1_low = &X_low[0][0][0];
70     float *p_w1 = (float*)&W[1-buf_idx][24][0][0];
71
72     float *loop_end=p_x1_low + 2560;
73
74     /* loop unrolled 8 times */
75     __asm__ volatile (
76     "1:                                                 \n\t"
77         "sw     $0,            0(%[p_x1_low])           \n\t"
78         "sw     $0,            4(%[p_x1_low])           \n\t"
79         "sw     $0,            8(%[p_x1_low])           \n\t"
80         "sw     $0,            12(%[p_x1_low])          \n\t"
81         "sw     $0,            16(%[p_x1_low])          \n\t"
82         "sw     $0,            20(%[p_x1_low])          \n\t"
83         "sw     $0,            24(%[p_x1_low])          \n\t"
84         "sw     $0,            28(%[p_x1_low])          \n\t"
85         PTR_ADDIU "%[p_x1_low],%[p_x1_low],      32     \n\t"
86         "bne    %[p_x1_low],   %[loop_end],      1b     \n\t"
87         PTR_ADDIU "%[p_x1_low],%[p_x1_low],      -10240 \n\t"
88
89         : [p_x1_low]"+r"(p_x1_low)
90         : [loop_end]"r"(loop_end)
91         : "memory"
92     );
93
94     for (k = 0; k < sbr->kx[1]; k++) {
95         for (i = 0; i < 32; i+=4) {
96             /* loop unrolled 4 times */
97             __asm__ volatile (
98                 "lw     %[temp0],   0(%[p_w])               \n\t"
99                 "lw     %[temp1],   4(%[p_w])               \n\t"
100                 "lw     %[temp2],   256(%[p_w])             \n\t"
101                 "lw     %[temp3],   260(%[p_w])             \n\t"
102                 "lw     %[temp4],   512(%[p_w])             \n\t"
103                 "lw     %[temp5],   516(%[p_w])             \n\t"
104                 "lw     %[temp6],   768(%[p_w])             \n\t"
105                 "lw     %[temp7],   772(%[p_w])             \n\t"
106                 "sw     %[temp0],   0(%[p_x_low])           \n\t"
107                 "sw     %[temp1],   4(%[p_x_low])           \n\t"
108                 "sw     %[temp2],   8(%[p_x_low])           \n\t"
109                 "sw     %[temp3],   12(%[p_x_low])          \n\t"
110                 "sw     %[temp4],   16(%[p_x_low])          \n\t"
111                 "sw     %[temp5],   20(%[p_x_low])          \n\t"
112                 "sw     %[temp6],   24(%[p_x_low])          \n\t"
113                 "sw     %[temp7],   28(%[p_x_low])          \n\t"
114                 PTR_ADDIU "%[p_x_low], %[p_x_low],  32      \n\t"
115                 PTR_ADDIU "%[p_w],     %[p_w],      1024    \n\t"
116
117                 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
118                   [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
119                   [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
120                   [temp6]"=&r"(temp6), [temp7]"=&r"(temp7),
121                   [p_w]"+r"(p_w), [p_x_low]"+r"(p_x_low)
122                 :
123                 : "memory"
124             );
125         }
126         p_x_low += 16;
127         p_w -= 2046;
128     }
129
130     for (k = 0; k < sbr->kx[0]; k++) {
131         for (i = 0; i < 2; i++) {
132
133             /* loop unrolled 4 times */
134             __asm__ volatile (
135                 "lw     %[temp0],    0(%[p_w1])             \n\t"
136                 "lw     %[temp1],    4(%[p_w1])             \n\t"
137                 "lw     %[temp2],    256(%[p_w1])           \n\t"
138                 "lw     %[temp3],    260(%[p_w1])           \n\t"
139                 "lw     %[temp4],    512(%[p_w1])           \n\t"
140                 "lw     %[temp5],    516(%[p_w1])           \n\t"
141                 "lw     %[temp6],    768(%[p_w1])           \n\t"
142                 "lw     %[temp7],    772(%[p_w1])           \n\t"
143                 "sw     %[temp0],    0(%[p_x1_low])         \n\t"
144                 "sw     %[temp1],    4(%[p_x1_low])         \n\t"
145                 "sw     %[temp2],    8(%[p_x1_low])         \n\t"
146                 "sw     %[temp3],    12(%[p_x1_low])        \n\t"
147                 "sw     %[temp4],    16(%[p_x1_low])        \n\t"
148                 "sw     %[temp5],    20(%[p_x1_low])        \n\t"
149                 "sw     %[temp6],    24(%[p_x1_low])        \n\t"
150                 "sw     %[temp7],    28(%[p_x1_low])        \n\t"
151                 PTR_ADDIU "%[p_x1_low], %[p_x1_low], 32     \n\t"
152                 PTR_ADDIU "%[p_w1],     %[p_w1],     1024   \n\t"
153
154                 : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
155                   [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
156                   [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
157                   [temp6]"=&r"(temp6), [temp7]"=&r"(temp7),
158                   [p_w1]"+r"(p_w1), [p_x1_low]"+r"(p_x1_low)
159                 :
160                 : "memory"
161             );
162         }
163         p_x1_low += 64;
164         p_w1 -= 510;
165     }
166     return 0;
167 }
168
169 static int sbr_x_gen_mips(SpectralBandReplication *sbr, float X[2][38][64],
170                      const float Y0[38][64][2], const float Y1[38][64][2],
171                      const float X_low[32][40][2], int ch)
172 {
173     int k, i;
174     const int i_f = 32;
175     int temp0, temp1, temp2, temp3;
176     const float *X_low1, *Y01, *Y11;
177     float *x1=&X[0][0][0];
178     float *j=x1+4864;
179     const int i_Temp = FFMAX(2*sbr->data[ch].t_env_num_env_old - i_f, 0);
180
181     /* loop unrolled 8 times */
182     __asm__ volatile (
183     "1:                                       \n\t"
184         "sw     $0,      0(%[x1])             \n\t"
185         "sw     $0,      4(%[x1])             \n\t"
186         "sw     $0,      8(%[x1])             \n\t"
187         "sw     $0,      12(%[x1])            \n\t"
188         "sw     $0,      16(%[x1])            \n\t"
189         "sw     $0,      20(%[x1])            \n\t"
190         "sw     $0,      24(%[x1])            \n\t"
191         "sw     $0,      28(%[x1])            \n\t"
192         PTR_ADDIU "%[x1],%[x1],      32       \n\t"
193         "bne    %[x1],   %[j],       1b       \n\t"
194         PTR_ADDIU "%[x1],%[x1],      -19456   \n\t"
195
196         : [x1]"+r"(x1)
197         : [j]"r"(j)
198         : "memory"
199     );
200
201     if (i_Temp != 0) {
202
203         X_low1=&X_low[0][2][0];
204
205         for (k = 0; k < sbr->kx[0]; k++) {
206
207             __asm__ volatile (
208                 "move    %[i],        $zero                  \n\t"
209             "2:                                              \n\t"
210                 "lw      %[temp0],    0(%[X_low1])           \n\t"
211                 "lw      %[temp1],    4(%[X_low1])           \n\t"
212                 "sw      %[temp0],    0(%[x1])               \n\t"
213                 "sw      %[temp1],    9728(%[x1])            \n\t"
214                 PTR_ADDIU "%[x1],     %[x1],         256     \n\t"
215                 PTR_ADDIU "%[X_low1], %[X_low1],     8       \n\t"
216                 "addiu   %[i],        %[i],          1       \n\t"
217                 "bne     %[i],        %[i_Temp],     2b      \n\t"
218
219                 : [x1]"+r"(x1), [X_low1]"+r"(X_low1), [i]"=&r"(i),
220                   [temp0]"=&r"(temp0), [temp1]"=&r"(temp1)
221                 : [i_Temp]"r"(i_Temp)
222                 : "memory"
223             );
224             x1-=(i_Temp<<6)-1;
225             X_low1-=(i_Temp<<1)-80;
226         }
227
228         x1=&X[0][0][k];
229         Y01=(float*)&Y0[32][k][0];
230
231         for (; k < sbr->kx[0] + sbr->m[0]; k++) {
232             __asm__ volatile (
233                 "move    %[i],       $zero               \n\t"
234             "3:                                          \n\t"
235                 "lw      %[temp0],   0(%[Y01])           \n\t"
236                 "lw      %[temp1],   4(%[Y01])           \n\t"
237                 "sw      %[temp0],   0(%[x1])            \n\t"
238                 "sw      %[temp1],   9728(%[x1])         \n\t"
239                 PTR_ADDIU "%[x1],    %[x1],      256     \n\t"
240                 PTR_ADDIU "%[Y01],   %[Y01],     512     \n\t"
241                 "addiu   %[i],       %[i],       1       \n\t"
242                 "bne     %[i],       %[i_Temp],  3b      \n\t"
243
244                 : [x1]"+r"(x1), [Y01]"+r"(Y01), [i]"=&r"(i),
245                   [temp0]"=&r"(temp0), [temp1]"=&r"(temp1)
246                 : [i_Temp]"r"(i_Temp)
247                 : "memory"
248             );
249             x1 -=(i_Temp<<6)-1;
250             Y01 -=(i_Temp<<7)-2;
251         }
252     }
253
254     x1=&X[0][i_Temp][0];
255     X_low1=&X_low[0][i_Temp+2][0];
256     temp3=38;
257
258     for (k = 0; k < sbr->kx[1]; k++) {
259
260         __asm__ volatile (
261             "move    %[i],       %[i_Temp]              \n\t"
262         "4:                                             \n\t"
263             "lw      %[temp0],   0(%[X_low1])           \n\t"
264             "lw      %[temp1],   4(%[X_low1])           \n\t"
265             "sw      %[temp0],   0(%[x1])               \n\t"
266             "sw      %[temp1],   9728(%[x1])            \n\t"
267             PTR_ADDIU "%[x1],    %[x1],         256     \n\t"
268             PTR_ADDIU "%[X_low1],%[X_low1],     8       \n\t"
269             "addiu   %[i],       %[i],          1       \n\t"
270             "bne     %[i],       %[temp3],      4b      \n\t"
271
272             : [x1]"+r"(x1), [X_low1]"+r"(X_low1), [i]"=&r"(i),
273               [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
274               [temp2]"=&r"(temp2)
275             : [i_Temp]"r"(i_Temp), [temp3]"r"(temp3)
276             : "memory"
277         );
278         x1 -= ((38-i_Temp)<<6)-1;
279         X_low1 -= ((38-i_Temp)<<1)- 80;
280     }
281
282     x1=&X[0][i_Temp][k];
283     Y11=&Y1[i_Temp][k][0];
284     temp2=32;
285
286     for (; k < sbr->kx[1] + sbr->m[1]; k++) {
287
288         __asm__ volatile (
289            "move    %[i],       %[i_Temp]               \n\t"
290         "5:                                             \n\t"
291            "lw      %[temp0],   0(%[Y11])               \n\t"
292            "lw      %[temp1],   4(%[Y11])               \n\t"
293            "sw      %[temp0],   0(%[x1])                \n\t"
294            "sw      %[temp1],   9728(%[x1])             \n\t"
295            PTR_ADDIU "%[x1],    %[x1],          256     \n\t"
296            PTR_ADDIU "%[Y11],   %[Y11],         512     \n\t"
297            "addiu   %[i],       %[i],           1       \n\t"
298            "bne     %[i],       %[temp2],       5b      \n\t"
299
300            : [x1]"+r"(x1), [Y11]"+r"(Y11), [i]"=&r"(i),
301              [temp0]"=&r"(temp0), [temp1]"=&r"(temp1)
302            : [i_Temp]"r"(i_Temp), [temp3]"r"(temp3),
303              [temp2]"r"(temp2)
304            : "memory"
305         );
306
307         x1 -= ((32-i_Temp)<<6)-1;
308         Y11 -= ((32-i_Temp)<<7)-2;
309    }
310       return 0;
311 }
312
313 #if HAVE_MIPSFPU
314 #if !HAVE_MIPS32R6 && !HAVE_MIPS64R6
315 static void sbr_hf_assemble_mips(float Y1[38][64][2],
316                             const float X_high[64][40][2],
317                             SpectralBandReplication *sbr, SBRData *ch_data,
318                             const int e_a[2])
319 {
320     int e, i, j, m;
321     const int h_SL = 4 * !sbr->bs_smoothing_mode;
322     const int kx = sbr->kx[1];
323     const int m_max = sbr->m[1];
324     static const float h_smooth[5] = {
325         0.33333333333333,
326         0.30150283239582,
327         0.21816949906249,
328         0.11516383427084,
329         0.03183050093751,
330     };
331
332     float (*g_temp)[48] = ch_data->g_temp, (*q_temp)[48] = ch_data->q_temp;
333     int indexnoise = ch_data->f_indexnoise;
334     int indexsine  = ch_data->f_indexsine;
335     float *g_temp1, *q_temp1, *pok, *pok1;
336     float temp1, temp2, temp3, temp4;
337     int size = m_max;
338
339     if (sbr->reset) {
340         for (i = 0; i < h_SL; i++) {
341             memcpy(g_temp[i + 2*ch_data->t_env[0]], sbr->gain[0], m_max * sizeof(sbr->gain[0][0]));
342             memcpy(q_temp[i + 2*ch_data->t_env[0]], sbr->q_m[0],  m_max * sizeof(sbr->q_m[0][0]));
343         }
344     } else if (h_SL) {
345         memcpy(g_temp[2*ch_data->t_env[0]], g_temp[2*ch_data->t_env_num_env_old], 4*sizeof(g_temp[0]));
346         memcpy(q_temp[2*ch_data->t_env[0]], q_temp[2*ch_data->t_env_num_env_old], 4*sizeof(q_temp[0]));
347     }
348
349     for (e = 0; e < ch_data->bs_num_env; e++) {
350         for (i = 2 * ch_data->t_env[e]; i < 2 * ch_data->t_env[e + 1]; i++) {
351             g_temp1 = g_temp[h_SL + i];
352             pok = sbr->gain[e];
353             q_temp1 = q_temp[h_SL + i];
354             pok1 = sbr->q_m[e];
355
356             /* loop unrolled 4 times */
357             for (j=0; j<(size>>2); j++) {
358                 __asm__ volatile (
359                     "lw      %[temp1],   0(%[pok])               \n\t"
360                     "lw      %[temp2],   4(%[pok])               \n\t"
361                     "lw      %[temp3],   8(%[pok])               \n\t"
362                     "lw      %[temp4],   12(%[pok])              \n\t"
363                     "sw      %[temp1],   0(%[g_temp1])           \n\t"
364                     "sw      %[temp2],   4(%[g_temp1])           \n\t"
365                     "sw      %[temp3],   8(%[g_temp1])           \n\t"
366                     "sw      %[temp4],   12(%[g_temp1])          \n\t"
367                     "lw      %[temp1],   0(%[pok1])              \n\t"
368                     "lw      %[temp2],   4(%[pok1])              \n\t"
369                     "lw      %[temp3],   8(%[pok1])              \n\t"
370                     "lw      %[temp4],   12(%[pok1])             \n\t"
371                     "sw      %[temp1],   0(%[q_temp1])           \n\t"
372                     "sw      %[temp2],   4(%[q_temp1])           \n\t"
373                     "sw      %[temp3],   8(%[q_temp1])           \n\t"
374                     "sw      %[temp4],   12(%[q_temp1])          \n\t"
375                     PTR_ADDIU "%[pok],     %[pok],         16    \n\t"
376                     PTR_ADDIU "%[g_temp1], %[g_temp1],     16    \n\t"
377                     PTR_ADDIU "%[pok1],    %[pok1],        16    \n\t"
378                     PTR_ADDIU "%[q_temp1], %[q_temp1],     16    \n\t"
379
380                     : [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
381                       [temp3]"=&r"(temp3), [temp4]"=&r"(temp4),
382                       [pok]"+r"(pok), [g_temp1]"+r"(g_temp1),
383                       [pok1]"+r"(pok1), [q_temp1]"+r"(q_temp1)
384                     :
385                     : "memory"
386                 );
387             }
388
389             for (j=0; j<(size&3); j++) {
390                 __asm__ volatile (
391                     "lw      %[temp1],   0(%[pok])              \n\t"
392                     "lw      %[temp2],   0(%[pok1])             \n\t"
393                     "sw      %[temp1],   0(%[g_temp1])          \n\t"
394                     "sw      %[temp2],   0(%[q_temp1])          \n\t"
395                     PTR_ADDIU "%[pok],     %[pok],        4     \n\t"
396                     PTR_ADDIU "%[g_temp1], %[g_temp1],    4     \n\t"
397                     PTR_ADDIU "%[pok1],    %[pok1],       4     \n\t"
398                     PTR_ADDIU "%[q_temp1], %[q_temp1],    4     \n\t"
399
400                     : [temp1]"=&r"(temp1), [temp2]"=&r"(temp2),
401                       [temp3]"=&r"(temp3), [temp4]"=&r"(temp4),
402                       [pok]"+r"(pok), [g_temp1]"+r"(g_temp1),
403                       [pok1]"+r"(pok1), [q_temp1]"+r"(q_temp1)
404                     :
405                     : "memory"
406                 );
407             }
408         }
409     }
410
411     for (e = 0; e < ch_data->bs_num_env; e++) {
412         for (i = 2 * ch_data->t_env[e]; i < 2 * ch_data->t_env[e + 1]; i++) {
413             LOCAL_ALIGNED_16(float, g_filt_tab, [48]);
414             LOCAL_ALIGNED_16(float, q_filt_tab, [48]);
415             float *g_filt, *q_filt;
416
417             if (h_SL && e != e_a[0] && e != e_a[1]) {
418                 g_filt = g_filt_tab;
419                 q_filt = q_filt_tab;
420
421                 for (m = 0; m < m_max; m++) {
422                     const int idx1 = i + h_SL;
423                     g_filt[m] = 0.0f;
424                     q_filt[m] = 0.0f;
425
426                     for (j = 0; j <= h_SL; j++) {
427                         g_filt[m] += g_temp[idx1 - j][m] * h_smooth[j];
428                         q_filt[m] += q_temp[idx1 - j][m] * h_smooth[j];
429                     }
430                 }
431             } else {
432                 g_filt = g_temp[i + h_SL];
433                 q_filt = q_temp[i];
434             }
435
436             sbr->dsp.hf_g_filt(Y1[i] + kx, X_high + kx, g_filt, m_max,
437                                i + ENVELOPE_ADJUSTMENT_OFFSET);
438
439             if (e != e_a[0] && e != e_a[1]) {
440                 sbr->dsp.hf_apply_noise[indexsine](Y1[i] + kx, sbr->s_m[e],
441                                                    q_filt, indexnoise,
442                                                    kx, m_max);
443             } else {
444                 int idx = indexsine&1;
445                 int A = (1-((indexsine+(kx & 1))&2));
446                 int B = (A^(-idx)) + idx;
447                 float *out = &Y1[i][kx][idx];
448                 float *in  = sbr->s_m[e];
449                 float temp0, temp1, temp2, temp3, temp4, temp5;
450                 float A_f = (float)A;
451                 float B_f = (float)B;
452
453                 for (m = 0; m+1 < m_max; m+=2) {
454
455                     temp2 = out[0];
456                     temp3 = out[2];
457
458                     __asm__ volatile(
459                         "lwc1    %[temp0],  0(%[in])                     \n\t"
460                         "lwc1    %[temp1],  4(%[in])                     \n\t"
461                         "madd.s  %[temp4],  %[temp2],  %[temp0], %[A_f]  \n\t"
462                         "madd.s  %[temp5],  %[temp3],  %[temp1], %[B_f]  \n\t"
463                         "swc1    %[temp4],  0(%[out])                    \n\t"
464                         "swc1    %[temp5],  8(%[out])                    \n\t"
465                         PTR_ADDIU "%[in],   %[in],     8                 \n\t"
466                         PTR_ADDIU "%[out],  %[out],    16                \n\t"
467
468                         : [temp0]"=&f" (temp0), [temp1]"=&f"(temp1),
469                           [temp4]"=&f" (temp4), [temp5]"=&f"(temp5),
470                           [in]"+r"(in), [out]"+r"(out)
471                         : [A_f]"f"(A_f), [B_f]"f"(B_f), [temp2]"f"(temp2),
472                           [temp3]"f"(temp3)
473                         : "memory"
474                     );
475                 }
476                 if(m_max&1)
477                     out[2*m  ] += in[m  ] * A;
478             }
479             indexnoise = (indexnoise + m_max) & 0x1ff;
480             indexsine = (indexsine + 1) & 3;
481         }
482     }
483     ch_data->f_indexnoise = indexnoise;
484     ch_data->f_indexsine  = indexsine;
485 }
486
487 static void sbr_hf_inverse_filter_mips(SBRDSPContext *dsp,
488                                   float (*alpha0)[2], float (*alpha1)[2],
489                                   const float X_low[32][40][2], int k0)
490 {
491     int k;
492     float temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, c;
493     float *phi1, *alpha_1, *alpha_0, res1, res2, temp_real, temp_im;
494
495     c = 1.000001f;
496
497     for (k = 0; k < k0; k++) {
498         LOCAL_ALIGNED_16(float, phi, [3], [2][2]);
499         float dk;
500         phi1 = &phi[0][0][0];
501         alpha_1 = &alpha1[k][0];
502         alpha_0 = &alpha0[k][0];
503         dsp->autocorrelate(X_low[k], phi);
504
505         __asm__ volatile (
506             "lwc1    %[temp0],  40(%[phi1])                       \n\t"
507             "lwc1    %[temp1],  16(%[phi1])                       \n\t"
508             "lwc1    %[temp2],  24(%[phi1])                       \n\t"
509             "lwc1    %[temp3],  28(%[phi1])                       \n\t"
510             "mul.s   %[dk],     %[temp0],    %[temp1]             \n\t"
511             "lwc1    %[temp4],  0(%[phi1])                        \n\t"
512             "mul.s   %[res2],   %[temp2],    %[temp2]             \n\t"
513             "lwc1    %[temp5],  4(%[phi1])                        \n\t"
514             "madd.s  %[res2],   %[res2],     %[temp3],  %[temp3]  \n\t"
515             "lwc1    %[temp6],  8(%[phi1])                        \n\t"
516             "div.s   %[res2],   %[res2],     %[c]                 \n\t"
517             "lwc1    %[temp0],  12(%[phi1])                       \n\t"
518             "sub.s   %[dk],     %[dk],       %[res2]              \n\t"
519
520             : [temp0]"=&f"(temp0), [temp1]"=&f"(temp1), [temp2]"=&f"(temp2),
521               [temp3]"=&f"(temp3), [temp4]"=&f"(temp4), [temp5]"=&f"(temp5),
522               [temp6]"=&f"(temp6), [res2]"=&f"(res2), [dk]"=&f"(dk)
523             : [phi1]"r"(phi1), [c]"f"(c)
524             : "memory"
525         );
526
527         if (!dk) {
528             alpha_1[0] = 0;
529             alpha_1[1] = 0;
530         } else {
531             __asm__ volatile (
532                 "mul.s   %[temp_real], %[temp4],     %[temp2]            \n\t"
533                 "nmsub.s %[temp_real], %[temp_real], %[temp5], %[temp3]  \n\t"
534                 "nmsub.s %[temp_real], %[temp_real], %[temp6], %[temp1]  \n\t"
535                 "mul.s   %[temp_im],   %[temp4],     %[temp3]            \n\t"
536                 "madd.s  %[temp_im],   %[temp_im],   %[temp5], %[temp2]  \n\t"
537                 "nmsub.s %[temp_im],   %[temp_im],   %[temp0], %[temp1]  \n\t"
538                 "div.s   %[temp_real], %[temp_real], %[dk]               \n\t"
539                 "div.s   %[temp_im],   %[temp_im],   %[dk]               \n\t"
540                 "swc1    %[temp_real], 0(%[alpha_1])                     \n\t"
541                 "swc1    %[temp_im],   4(%[alpha_1])                     \n\t"
542
543                 : [temp_real]"=&f" (temp_real), [temp_im]"=&f"(temp_im)
544                 : [phi1]"r"(phi1), [temp0]"f"(temp0), [temp1]"f"(temp1),
545                   [temp2]"f"(temp2), [temp3]"f"(temp3), [temp4]"f"(temp4),
546                   [temp5]"f"(temp5), [temp6]"f"(temp6),
547                   [alpha_1]"r"(alpha_1), [dk]"f"(dk)
548                 : "memory"
549             );
550         }
551
552         if (!phi1[4]) {
553             alpha_0[0] = 0;
554             alpha_0[1] = 0;
555         } else {
556             __asm__ volatile (
557                 "lwc1    %[temp6],     0(%[alpha_1])                     \n\t"
558                 "lwc1    %[temp7],     4(%[alpha_1])                     \n\t"
559                 "mul.s   %[temp_real], %[temp6],     %[temp2]            \n\t"
560                 "add.s   %[temp_real], %[temp_real], %[temp4]            \n\t"
561                 "madd.s  %[temp_real], %[temp_real], %[temp7], %[temp3]  \n\t"
562                 "mul.s   %[temp_im],   %[temp7],     %[temp2]            \n\t"
563                 "add.s   %[temp_im],   %[temp_im],   %[temp5]            \n\t"
564                 "nmsub.s %[temp_im],   %[temp_im],   %[temp6], %[temp3]  \n\t"
565                 "div.s   %[temp_real], %[temp_real], %[temp1]            \n\t"
566                 "div.s   %[temp_im],   %[temp_im],   %[temp1]            \n\t"
567                 "neg.s   %[temp_real], %[temp_real]                      \n\t"
568                 "neg.s   %[temp_im],   %[temp_im]                        \n\t"
569                 "swc1    %[temp_real], 0(%[alpha_0])                     \n\t"
570                 "swc1    %[temp_im],   4(%[alpha_0])                     \n\t"
571
572                 : [temp_real]"=&f"(temp_real), [temp_im]"=&f"(temp_im),
573                   [temp6]"=&f"(temp6), [temp7]"=&f"(temp7),
574                   [res1]"=&f"(res1), [res2]"=&f"(res2)
575                 : [alpha_1]"r"(alpha_1), [alpha_0]"r"(alpha_0),
576                   [temp0]"f"(temp0), [temp1]"f"(temp1), [temp2]"f"(temp2),
577                   [temp3]"f"(temp3), [temp4]"f"(temp4), [temp5]"f"(temp5)
578                 : "memory"
579             );
580         }
581
582         __asm__ volatile (
583             "lwc1    %[temp1],      0(%[alpha_1])                           \n\t"
584             "lwc1    %[temp2],      4(%[alpha_1])                           \n\t"
585             "lwc1    %[temp_real],  0(%[alpha_0])                           \n\t"
586             "lwc1    %[temp_im],    4(%[alpha_0])                           \n\t"
587             "mul.s   %[res1],       %[temp1],      %[temp1]                 \n\t"
588             "madd.s  %[res1],       %[res1],       %[temp2],    %[temp2]    \n\t"
589             "mul.s   %[res2],       %[temp_real],  %[temp_real]             \n\t"
590             "madd.s  %[res2],       %[res2],       %[temp_im],  %[temp_im]  \n\t"
591
592             : [temp_real]"=&f"(temp_real), [temp_im]"=&f"(temp_im),
593               [temp1]"=&f"(temp1), [temp2]"=&f"(temp2),
594               [res1]"=&f"(res1), [res2]"=&f"(res2)
595             : [alpha_1]"r"(alpha_1), [alpha_0]"r"(alpha_0)
596             : "memory"
597         );
598
599         if (res1 >= 16.0f || res2 >= 16.0f) {
600             alpha_1[0] = 0;
601             alpha_1[1] = 0;
602             alpha_0[0] = 0;
603             alpha_0[1] = 0;
604         }
605     }
606 }
607 #endif /* !HAVE_MIPS32R6 && !HAVE_MIPS64R6 */
608 #endif /* HAVE_MIPSFPU */
609 #endif /* HAVE_INLINE_ASM */
610
611 void ff_aacsbr_func_ptr_init_mips(AACSBRContext *c)
612 {
613 #if HAVE_INLINE_ASM
614     c->sbr_lf_gen            = sbr_lf_gen_mips;
615     c->sbr_x_gen             = sbr_x_gen_mips;
616 #if HAVE_MIPSFPU
617 #if !HAVE_MIPS32R6 && !HAVE_MIPS64R6
618     c->sbr_hf_inverse_filter = sbr_hf_inverse_filter_mips;
619     c->sbr_hf_assemble       = sbr_hf_assemble_mips;
620 #endif /* !HAVE_MIPS32R6 && !HAVE_MIPS64R6 */
621 #endif /* HAVE_MIPSFPU */
622 #endif /* HAVE_INLINE_ASM */
623 }