git.sesse.net Git - ffmpeg/blob - libavcodec/mips/aacpsy_mips.h

   1 /*
   2  * Copyright (c) 2012
   3  *      MIPS Technologies, Inc., California.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that the following conditions
   7  * are met:
   8  * 1. Redistributions of source code must retain the above copyright
   9  *    notice, this list of conditions and the following disclaimer.
  10  * 2. Redistributions in binary form must reproduce the above copyright
  11  *    notice, this list of conditions and the following disclaimer in the
  12  *    documentation and/or other materials provided with the distribution.
  13  * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
  14  *    contributors may be used to endorse or promote products derived from
  15  *    this software without specific prior written permission.
  16  *
  17  * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
  18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
  21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  27  * SUCH DAMAGE.
  28  *
  29  * Author:  Bojan Zivkovic   (bojan@mips.com)
  30  *
  31  * AAC encoder psychoacoustic model routines optimized
  32  * for MIPS floating-point architecture
  33  *
  34  * This file is part of FFmpeg.
  35  *
  36  * FFmpeg is free software; you can redistribute it and/or
  37  * modify it under the terms of the GNU Lesser General Public
  38  * License as published by the Free Software Foundation; either
  39  * version 2.1 of the License, or (at your option) any later version.
  40  *
  41  * FFmpeg is distributed in the hope that it will be useful,
  42  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  43  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  44  * Lesser General Public License for more details.
  45  *
  46  * You should have received a copy of the GNU Lesser General Public
  47  * License along with FFmpeg; if not, write to the Free Software
  48  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  49  */
  50
  51 /**
  52  * @file
  53  * Reference: libavcodec/aacpsy.c
  54  */
  55
  56 #ifndef AVCODEC_MIPS_AACPSY_MIPS_H
  57 #define AVCODEC_MIPS_AACPSY_MIPS_H
  58
  59 #include "libavutil/mips/asmdefs.h"
  60
  61 #if HAVE_INLINE_ASM && HAVE_MIPSFPU && ( PSY_LAME_FIR_LEN == 21 )
  62 #if !HAVE_MIPS32R6 && !HAVE_MIPS64R6
  63 static void calc_thr_3gpp_mips(const FFPsyWindowInfo *wi, const int num_bands,
  64                                AacPsyChannel *pch, const uint8_t *band_sizes,
  65                                const float *coefs, const int cutoff)
  66 {
  67     int i, w, g;
  68     int start = 0, wstart = 0;
  69     for (w = 0; w < wi->num_windows*16; w += 16) {
  70         wstart = 0;
  71         for (g = 0; g < num_bands; g++) {
  72             AacPsyBand *band = &pch->band[w+g];
  73
  74             float form_factor = 0.0f;
  75             float Temp;
  76             band->energy = 0.0f;
  77             if (wstart < cutoff) {
  78                 for (i = 0; i < band_sizes[g]; i+=4) {
  79                     float a, b, c, d;
  80                     float ax, bx, cx, dx;
  81                     float *cf = (float *)&coefs[start+i];
  82
  83                     __asm__ volatile (
  84                         "lwc1   %[a],   0(%[cf])                \n\t"
  85                         "lwc1   %[b],   4(%[cf])                \n\t"
  86                         "lwc1   %[c],   8(%[cf])                \n\t"
  87                         "lwc1   %[d],   12(%[cf])               \n\t"
  88                         "abs.s  %[a],   %[a]                    \n\t"
  89                         "abs.s  %[b],   %[b]                    \n\t"
  90                         "abs.s  %[c],   %[c]                    \n\t"
  91                         "abs.s  %[d],   %[d]                    \n\t"
  92                         "sqrt.s %[ax],  %[a]                    \n\t"
  93                         "sqrt.s %[bx],  %[b]                    \n\t"
  94                         "sqrt.s %[cx],  %[c]                    \n\t"
  95                         "sqrt.s %[dx],  %[d]                    \n\t"
  96                         "madd.s %[e],   %[e],   %[a],   %[a]    \n\t"
  97                         "madd.s %[e],   %[e],   %[b],   %[b]    \n\t"
  98                         "madd.s %[e],   %[e],   %[c],   %[c]    \n\t"
  99                         "madd.s %[e],   %[e],   %[d],   %[d]    \n\t"
 100                         "add.s  %[f],   %[f],   %[ax]           \n\t"
 101                         "add.s  %[f],   %[f],   %[bx]           \n\t"
 102                         "add.s  %[f],   %[f],   %[cx]           \n\t"
 103                         "add.s  %[f],   %[f],   %[dx]           \n\t"
 104
 105                         : [a]"=&f"(a), [b]"=&f"(b),
 106                           [c]"=&f"(c), [d]"=&f"(d),
 107                           [e]"+f"(band->energy), [f]"+f"(form_factor),
 108                           [ax]"=&f"(ax), [bx]"=&f"(bx),
 109                           [cx]"=&f"(cx), [dx]"=&f"(dx)
 110                         : [cf]"r"(cf)
 111                         : "memory"
 112                     );
 113                 }
 114             }
 115
 116             Temp = sqrtf((float)band_sizes[g] / band->energy);
 117             band->thr      = band->energy * 0.001258925f;
 118             band->nz_lines = form_factor * sqrtf(Temp);
 119             start += band_sizes[g];
 120             wstart += band_sizes[g];
 121         }
 122     }
 123 }
 124
 125 static void psy_hp_filter_mips(const float *firbuf, float *hpfsmpl, const float * psy_fir_coeffs)
 126 {
 127     float sum1, sum2, sum3, sum4;
 128     float *fb = (float*)firbuf;
 129     float *fb_end = fb + AAC_BLOCK_SIZE_LONG;
 130     float *hp = hpfsmpl;
 131
 132     float coeff0 = psy_fir_coeffs[1];
 133     float coeff1 = psy_fir_coeffs[3];
 134     float coeff2 = psy_fir_coeffs[5];
 135     float coeff3 = psy_fir_coeffs[7];
 136     float coeff4 = psy_fir_coeffs[9];
 137
 138     float f1 = 32768.0;
 139     __asm__ volatile (
 140         ".set push                                          \n\t"
 141         ".set noreorder                                     \n\t"
 142
 143         "1:                                                 \n\t"
 144         "lwc1   $f0,        40(%[fb])                       \n\t"
 145         "lwc1   $f1,        4(%[fb])                        \n\t"
 146         "lwc1   $f2,        80(%[fb])                       \n\t"
 147         "lwc1   $f3,        44(%[fb])                       \n\t"
 148         "lwc1   $f4,        8(%[fb])                        \n\t"
 149         "madd.s %[sum1],    $f0,        $f1,    %[coeff0]   \n\t"
 150         "lwc1   $f5,        84(%[fb])                       \n\t"
 151         "lwc1   $f6,        48(%[fb])                       \n\t"
 152         "madd.s %[sum2],    $f3,        $f4,    %[coeff0]   \n\t"
 153         "lwc1   $f7,        12(%[fb])                       \n\t"
 154         "madd.s %[sum1],    %[sum1],    $f2,    %[coeff0]   \n\t"
 155         "lwc1   $f8,        88(%[fb])                       \n\t"
 156         "lwc1   $f9,        52(%[fb])                       \n\t"
 157         "madd.s %[sum2],    %[sum2],    $f5,    %[coeff0]   \n\t"
 158         "madd.s %[sum3],    $f6,        $f7,    %[coeff0]   \n\t"
 159         "lwc1   $f10,       16(%[fb])                       \n\t"
 160         "lwc1   $f11,       92(%[fb])                       \n\t"
 161         "madd.s %[sum1],    %[sum1],    $f7,    %[coeff1]   \n\t"
 162         "lwc1   $f1,        72(%[fb])                       \n\t"
 163         "madd.s %[sum3],    %[sum3],    $f8,    %[coeff0]   \n\t"
 164         "madd.s %[sum4],    $f9,        $f10,   %[coeff0]   \n\t"
 165         "madd.s %[sum2],    %[sum2],    $f10,   %[coeff1]   \n\t"
 166         "madd.s %[sum1],    %[sum1],    $f1,    %[coeff1]   \n\t"
 167         "lwc1   $f4,        76(%[fb])                       \n\t"
 168         "lwc1   $f8,        20(%[fb])                       \n\t"
 169         "madd.s %[sum4],    %[sum4],    $f11,   %[coeff0]   \n\t"
 170         "lwc1   $f11,       24(%[fb])                       \n\t"
 171         "madd.s %[sum2],    %[sum2],    $f4,    %[coeff1]   \n\t"
 172         "madd.s %[sum1],    %[sum1],    $f8,    %[coeff2]   \n\t"
 173         "madd.s %[sum3],    %[sum3],    $f8,    %[coeff1]   \n\t"
 174         "madd.s %[sum4],    %[sum4],    $f11,   %[coeff1]   \n\t"
 175         "lwc1   $f7,        64(%[fb])                       \n\t"
 176         "madd.s %[sum2],    %[sum2],    $f11,   %[coeff2]   \n\t"
 177         "lwc1   $f10,       68(%[fb])                       \n\t"
 178         "madd.s %[sum3],    %[sum3],    $f2,    %[coeff1]   \n\t"
 179         "madd.s %[sum4],    %[sum4],    $f5,    %[coeff1]   \n\t"
 180         "madd.s %[sum1],    %[sum1],    $f7,    %[coeff2]   \n\t"
 181         "madd.s %[sum2],    %[sum2],    $f10,   %[coeff2]   \n\t"
 182         "lwc1   $f2,        28(%[fb])                       \n\t"
 183         "lwc1   $f5,        32(%[fb])                       \n\t"
 184         "lwc1   $f8,        56(%[fb])                       \n\t"
 185         "lwc1   $f11,       60(%[fb])                       \n\t"
 186         "madd.s %[sum3],    %[sum3],    $f2,    %[coeff2]   \n\t"
 187         "madd.s %[sum4],    %[sum4],    $f5,    %[coeff2]   \n\t"
 188         "madd.s %[sum1],    %[sum1],    $f2,    %[coeff3]   \n\t"
 189         "madd.s %[sum2],    %[sum2],    $f5,    %[coeff3]   \n\t"
 190         "madd.s %[sum3],    %[sum3],    $f1,    %[coeff2]   \n\t"
 191         "madd.s %[sum4],    %[sum4],    $f4,    %[coeff2]   \n\t"
 192         "madd.s %[sum1],    %[sum1],    $f8,    %[coeff3]   \n\t"
 193         "madd.s %[sum2],    %[sum2],    $f11,   %[coeff3]   \n\t"
 194         "lwc1   $f1,        36(%[fb])                       \n\t"
 195         PTR_ADDIU "%[fb],   %[fb],      16                  \n\t"
 196         "madd.s %[sum4],    %[sum4],    $f0,    %[coeff3]   \n\t"
 197         "madd.s %[sum3],    %[sum3],    $f1,    %[coeff3]   \n\t"
 198         "madd.s %[sum1],    %[sum1],    $f1,    %[coeff4]   \n\t"
 199         "madd.s %[sum2],    %[sum2],    $f0,    %[coeff4]   \n\t"
 200         "madd.s %[sum4],    %[sum4],    $f10,   %[coeff3]   \n\t"
 201         "madd.s %[sum3],    %[sum3],    $f7,    %[coeff3]   \n\t"
 202         "madd.s %[sum1],    %[sum1],    $f6,    %[coeff4]   \n\t"
 203         "madd.s %[sum2],    %[sum2],    $f9,    %[coeff4]   \n\t"
 204         "madd.s %[sum4],    %[sum4],    $f6,    %[coeff4]   \n\t"
 205         "madd.s %[sum3],    %[sum3],    $f3,    %[coeff4]   \n\t"
 206         "mul.s  %[sum1],    %[sum1],    %[f1]               \n\t"
 207         "mul.s  %[sum2],    %[sum2],    %[f1]               \n\t"
 208         "madd.s %[sum4],    %[sum4],    $f11,   %[coeff4]   \n\t"
 209         "madd.s %[sum3],    %[sum3],    $f8,    %[coeff4]   \n\t"
 210         "swc1   %[sum1],    0(%[hp])                        \n\t"
 211         "swc1   %[sum2],    4(%[hp])                        \n\t"
 212         "mul.s  %[sum4],    %[sum4],    %[f1]               \n\t"
 213         "mul.s  %[sum3],    %[sum3],    %[f1]               \n\t"
 214         "swc1   %[sum4],    12(%[hp])                       \n\t"
 215         "swc1   %[sum3],    8(%[hp])                        \n\t"
 216         "bne    %[fb],      %[fb_end],  1b                  \n\t"
 217         PTR_ADDIU "%[hp],   %[hp],      16                  \n\t"
 218
 219         ".set pop                                           \n\t"
 220
 221         : [sum1]"=&f"(sum1), [sum2]"=&f"(sum2),
 222           [sum3]"=&f"(sum3), [sum4]"=&f"(sum4),
 223           [fb]"+r"(fb), [hp]"+r"(hp)
 224         : [coeff0]"f"(coeff0), [coeff1]"f"(coeff1),
 225           [coeff2]"f"(coeff2), [coeff3]"f"(coeff3),
 226           [coeff4]"f"(coeff4), [fb_end]"r"(fb_end), [f1]"f"(f1)
 227         : "$f0", "$f1", "$f2", "$f3", "$f4", "$f5", "$f6",
 228           "$f7", "$f8", "$f9", "$f10", "$f11",
 229           "memory"
 230     );
 231 }
 232
 233 #define calc_thr_3gpp calc_thr_3gpp_mips
 234 #define psy_hp_filter psy_hp_filter_mips
 235
 236 #endif /* !HAVE_MIPS32R6 && !HAVE_MIPS64R6 */
 237 #endif /* HAVE_INLINE_ASM && HAVE_MIPSFPU */
 238 #endif /* AVCODEC_MIPS_AACPSY_MIPS_H */