git.sesse.net Git - ffmpeg/blob - libavcodec/ppc/fft_vsx.c

   1 /*
   2  * FFT  transform, optimized with VSX built-in functions
   3  * Copyright (c) 2014 Rong Yan
   4  *
   5  * This algorithm (though not any of the implementation details) is
   6  * based on libdjbfft by D. J. Bernstein.
   7  *
   8  * This file is part of FFmpeg.
   9  *
  10  * FFmpeg is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU Lesser General Public
  12  * License as published by the Free Software Foundation; either
  13  * version 2.1 of the License, or (at your option) any later version.
  14  *
  15  * FFmpeg is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * Lesser General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public
  21  * License along with FFmpeg; if not, write to the Free Software
  22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  23  */
  24
  25
  26 #include "config.h"
  27 #include "libavutil/cpu.h"
  28 #include "libavutil/ppc/types_altivec.h"
  29 #include "libavutil/ppc/util_altivec.h"
  30 #include "libavcodec/fft.h"
  31 #include "libavcodec/fft-internal.h"
  32 #include "fft_vsx.h"
  33
  34 #if HAVE_VSX
  35
  36 static void fft32_vsx_interleave(FFTComplex *z)
  37 {
  38     fft16_vsx_interleave(z);
  39     fft8_vsx_interleave(z+16);
  40     fft8_vsx_interleave(z+24);
  41     pass_vsx_interleave(z,ff_cos_32,4);
  42 }
  43
  44 static void fft64_vsx_interleave(FFTComplex *z)
  45 {
  46     fft32_vsx_interleave(z);
  47     fft16_vsx_interleave(z+32);
  48     fft16_vsx_interleave(z+48);
  49     pass_vsx_interleave(z,ff_cos_64, 8);
  50 }
  51 static void fft128_vsx_interleave(FFTComplex *z)
  52 {
  53     fft64_vsx_interleave(z);
  54     fft32_vsx_interleave(z+64);
  55     fft32_vsx_interleave(z+96);
  56     pass_vsx_interleave(z,ff_cos_128,16);
  57 }
  58 static void fft256_vsx_interleave(FFTComplex *z)
  59 {
  60     fft128_vsx_interleave(z);
  61     fft64_vsx_interleave(z+128);
  62     fft64_vsx_interleave(z+192);
  63     pass_vsx_interleave(z,ff_cos_256,32);
  64 }
  65 static void fft512_vsx_interleave(FFTComplex *z)
  66 {
  67     fft256_vsx_interleave(z);
  68     fft128_vsx_interleave(z+256);
  69     fft128_vsx_interleave(z+384);
  70     pass_vsx_interleave(z,ff_cos_512,64);
  71 }
  72 static void fft1024_vsx_interleave(FFTComplex *z)
  73 {
  74     fft512_vsx_interleave(z);
  75     fft256_vsx_interleave(z+512);
  76     fft256_vsx_interleave(z+768);
  77     pass_vsx_interleave(z,ff_cos_1024,128);
  78
  79 }
  80 static void fft2048_vsx_interleave(FFTComplex *z)
  81 {
  82     fft1024_vsx_interleave(z);
  83     fft512_vsx_interleave(z+1024);
  84     fft512_vsx_interleave(z+1536);
  85     pass_vsx_interleave(z,ff_cos_2048,256);
  86 }
  87 static void fft4096_vsx_interleave(FFTComplex *z)
  88 {
  89     fft2048_vsx_interleave(z);
  90     fft1024_vsx_interleave(z+2048);
  91     fft1024_vsx_interleave(z+3072);
  92     pass_vsx_interleave(z,ff_cos_4096, 512);
  93 }
  94 static void fft8192_vsx_interleave(FFTComplex *z)
  95 {
  96     fft4096_vsx_interleave(z);
  97     fft2048_vsx_interleave(z+4096);
  98     fft2048_vsx_interleave(z+6144);
  99     pass_vsx_interleave(z,ff_cos_8192,1024);
 100 }
 101 static void fft16384_vsx_interleave(FFTComplex *z)
 102 {
 103     fft8192_vsx_interleave(z);
 104     fft4096_vsx_interleave(z+8192);
 105     fft4096_vsx_interleave(z+12288);
 106     pass_vsx_interleave(z,ff_cos_16384,2048);
 107 }
 108 static void fft32768_vsx_interleave(FFTComplex *z)
 109 {
 110     fft16384_vsx_interleave(z);
 111     fft8192_vsx_interleave(z+16384);
 112     fft8192_vsx_interleave(z+24576);
 113     pass_vsx_interleave(z,ff_cos_32768,4096);
 114 }
 115 static void fft65536_vsx_interleave(FFTComplex *z)
 116 {
 117     fft32768_vsx_interleave(z);
 118     fft16384_vsx_interleave(z+32768);
 119     fft16384_vsx_interleave(z+49152);
 120     pass_vsx_interleave(z,ff_cos_65536,8192);
 121 }
 122
 123 static void fft32_vsx(FFTComplex *z)
 124 {
 125     fft16_vsx(z);
 126     fft8_vsx(z+16);
 127     fft8_vsx(z+24);
 128     pass_vsx(z,ff_cos_32,4);
 129 }
 130
 131 static void fft64_vsx(FFTComplex *z)
 132 {
 133     fft32_vsx(z);
 134     fft16_vsx(z+32);
 135     fft16_vsx(z+48);
 136     pass_vsx(z,ff_cos_64, 8);
 137 }
 138 static void fft128_vsx(FFTComplex *z)
 139 {
 140     fft64_vsx(z);
 141     fft32_vsx(z+64);
 142     fft32_vsx(z+96);
 143     pass_vsx(z,ff_cos_128,16);
 144 }
 145 static void fft256_vsx(FFTComplex *z)
 146 {
 147     fft128_vsx(z);
 148     fft64_vsx(z+128);
 149     fft64_vsx(z+192);
 150     pass_vsx(z,ff_cos_256,32);
 151 }
 152 static void fft512_vsx(FFTComplex *z)
 153 {
 154     fft256_vsx(z);
 155     fft128_vsx(z+256);
 156     fft128_vsx(z+384);
 157     pass_vsx(z,ff_cos_512,64);
 158 }
 159 static void fft1024_vsx(FFTComplex *z)
 160 {
 161     fft512_vsx(z);
 162     fft256_vsx(z+512);
 163     fft256_vsx(z+768);
 164     pass_vsx(z,ff_cos_1024,128);
 165
 166 }
 167 static void fft2048_vsx(FFTComplex *z)
 168 {
 169     fft1024_vsx(z);
 170     fft512_vsx(z+1024);
 171     fft512_vsx(z+1536);
 172     pass_vsx(z,ff_cos_2048,256);
 173 }
 174 static void fft4096_vsx(FFTComplex *z)
 175 {
 176     fft2048_vsx(z);
 177     fft1024_vsx(z+2048);
 178     fft1024_vsx(z+3072);
 179     pass_vsx(z,ff_cos_4096, 512);
 180 }
 181 static void fft8192_vsx(FFTComplex *z)
 182 {
 183     fft4096_vsx(z);
 184     fft2048_vsx(z+4096);
 185     fft2048_vsx(z+6144);
 186     pass_vsx(z,ff_cos_8192,1024);
 187 }
 188 static void fft16384_vsx(FFTComplex *z)
 189 {
 190     fft8192_vsx(z);
 191     fft4096_vsx(z+8192);
 192     fft4096_vsx(z+12288);
 193     pass_vsx(z,ff_cos_16384,2048);
 194 }
 195 static void fft32768_vsx(FFTComplex *z)
 196 {
 197     fft16384_vsx(z);
 198     fft8192_vsx(z+16384);
 199     fft8192_vsx(z+24576);
 200     pass_vsx(z,ff_cos_32768,4096);
 201 }
 202 static void fft65536_vsx(FFTComplex *z)
 203 {
 204     fft32768_vsx(z);
 205     fft16384_vsx(z+32768);
 206     fft16384_vsx(z+49152);
 207     pass_vsx(z,ff_cos_65536,8192);
 208 }
 209
 210 static void (* const fft_dispatch_vsx[])(FFTComplex*) = {
 211     fft4_vsx, fft8_vsx, fft16_vsx, fft32_vsx, fft64_vsx, fft128_vsx, fft256_vsx, fft512_vsx, fft1024_vsx,
 212     fft2048_vsx, fft4096_vsx, fft8192_vsx, fft16384_vsx, fft32768_vsx, fft65536_vsx,
 213 };
 214 static void (* const fft_dispatch_vsx_interleave[])(FFTComplex*) = {
 215     fft4_vsx_interleave, fft8_vsx_interleave, fft16_vsx_interleave, fft32_vsx_interleave, fft64_vsx_interleave,
 216     fft128_vsx_interleave, fft256_vsx_interleave, fft512_vsx_interleave, fft1024_vsx_interleave,
 217     fft2048_vsx_interleave, fft4096_vsx_interleave, fft8192_vsx_interleave, fft16384_vsx_interleave, fft32768_vsx_interleave, fft65536_vsx_interleave,
 218 };
 219 void ff_fft_calc_interleave_vsx(FFTContext *s, FFTComplex *z)
 220 {
 221      fft_dispatch_vsx_interleave[s->nbits-2](z);
 222 }
 223 void ff_fft_calc_vsx(FFTContext *s, FFTComplex *z)
 224 {
 225      fft_dispatch_vsx[s->nbits-2](z);
 226 }
 227 #endif /* HAVE_VSX */