#include <stdlib.h>
#include <string.h>
#include "libavutil/mathematics.h"
+#include "libavutil/thread.h"
#include "fft.h"
#include "fft-internal.h"
#if FFT_FIXED_32
#include "fft_table.h"
+
+static void av_cold fft_lut_init(void)
+{
+ int n = 0;
+ ff_fft_lut_init(ff_fft_offsets_lut, 0, 1 << 17, &n);
+}
+
#else /* FFT_FIXED_32 */
/* cos(2*pi*x/n) for 0<=x<=n/4, followed by its reverse */
COSTABLE(32768);
COSTABLE(65536);
COSTABLE(131072);
+
+static av_cold void init_ff_cos_tabs(int index)
+{
+ int i;
+ int m = 1<<index;
+ double freq = 2*M_PI/m;
+ FFTSample *tab = FFT_NAME(ff_cos_tabs)[index];
+ for(i=0; i<=m/4; i++)
+ tab[i] = FIX15(cos(i*freq));
+ for(i=1; i<m/4; i++)
+ tab[m/2-i] = tab[i];
+}
+
+typedef struct CosTabsInitOnce {
+ void (*func)(void);
+ AVOnce control;
+} CosTabsInitOnce;
+
+#define INIT_FF_COS_TABS_FUNC(index, size) \
+static av_cold void init_ff_cos_tabs_ ## size (void)\
+{ \
+ init_ff_cos_tabs(index); \
+}
+
+INIT_FF_COS_TABS_FUNC(4, 16)
+INIT_FF_COS_TABS_FUNC(5, 32)
+INIT_FF_COS_TABS_FUNC(6, 64)
+INIT_FF_COS_TABS_FUNC(7, 128)
+INIT_FF_COS_TABS_FUNC(8, 256)
+INIT_FF_COS_TABS_FUNC(9, 512)
+INIT_FF_COS_TABS_FUNC(10, 1024)
+INIT_FF_COS_TABS_FUNC(11, 2048)
+INIT_FF_COS_TABS_FUNC(12, 4096)
+INIT_FF_COS_TABS_FUNC(13, 8192)
+INIT_FF_COS_TABS_FUNC(14, 16384)
+INIT_FF_COS_TABS_FUNC(15, 32768)
+INIT_FF_COS_TABS_FUNC(16, 65536)
+INIT_FF_COS_TABS_FUNC(17, 131072)
+
+static CosTabsInitOnce cos_tabs_init_once[] = {
+ { NULL },
+ { NULL },
+ { NULL },
+ { NULL },
+ { init_ff_cos_tabs_16, AV_ONCE_INIT },
+ { init_ff_cos_tabs_32, AV_ONCE_INIT },
+ { init_ff_cos_tabs_64, AV_ONCE_INIT },
+ { init_ff_cos_tabs_128, AV_ONCE_INIT },
+ { init_ff_cos_tabs_256, AV_ONCE_INIT },
+ { init_ff_cos_tabs_512, AV_ONCE_INIT },
+ { init_ff_cos_tabs_1024, AV_ONCE_INIT },
+ { init_ff_cos_tabs_2048, AV_ONCE_INIT },
+ { init_ff_cos_tabs_4096, AV_ONCE_INIT },
+ { init_ff_cos_tabs_8192, AV_ONCE_INIT },
+ { init_ff_cos_tabs_16384, AV_ONCE_INIT },
+ { init_ff_cos_tabs_32768, AV_ONCE_INIT },
+ { init_ff_cos_tabs_65536, AV_ONCE_INIT },
+ { init_ff_cos_tabs_131072, AV_ONCE_INIT },
+};
+
#endif
COSTABLE_CONST FFTSample * const FFT_NAME(ff_cos_tabs)[] = {
NULL, NULL, NULL, NULL,
av_cold void ff_init_ff_cos_tabs(int index)
{
#if (!CONFIG_HARDCODED_TABLES) && (!FFT_FIXED_32)
- int i;
- int m = 1<<index;
- double freq = 2*M_PI/m;
- FFTSample *tab = FFT_NAME(ff_cos_tabs)[index];
- for(i=0; i<=m/4; i++)
- tab[i] = FIX15(cos(i*freq));
- for(i=1; i<m/4; i++)
- tab[m/2-i] = tab[i];
+ ff_thread_once(&cos_tabs_init_once[index].control, cos_tabs_init_once[index].func);
#endif
}
#if FFT_FIXED_32
{
- int n=0;
- ff_fft_lut_init(ff_fft_offsets_lut, 0, 1 << 17, &n);
+ static AVOnce control = AV_ONCE_INIT;
+ ff_thread_once(&control, fft_lut_init);
}
#else /* FFT_FIXED_32 */
#if FFT_FLOAT
int nbits, i, n, num_transforms, offset, step;
int n4, n2, n34;
- FFTSample tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8;
+ unsigned tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8;
FFTComplex *tmpz;
const int fft_size = (1 << s->nbits);
int64_t accu;
offset = ff_fft_offsets_lut[n] << 2;
tmpz = z + offset;
- tmp1 = tmpz[0].re + tmpz[1].re;
- tmp5 = tmpz[2].re + tmpz[3].re;
- tmp2 = tmpz[0].im + tmpz[1].im;
- tmp6 = tmpz[2].im + tmpz[3].im;
- tmp3 = tmpz[0].re - tmpz[1].re;
- tmp8 = tmpz[2].im - tmpz[3].im;
- tmp4 = tmpz[0].im - tmpz[1].im;
- tmp7 = tmpz[2].re - tmpz[3].re;
+ tmp1 = tmpz[0].re + (unsigned)tmpz[1].re;
+ tmp5 = tmpz[2].re + (unsigned)tmpz[3].re;
+ tmp2 = tmpz[0].im + (unsigned)tmpz[1].im;
+ tmp6 = tmpz[2].im + (unsigned)tmpz[3].im;
+ tmp3 = tmpz[0].re - (unsigned)tmpz[1].re;
+ tmp8 = tmpz[2].im - (unsigned)tmpz[3].im;
+ tmp4 = tmpz[0].im - (unsigned)tmpz[1].im;
+ tmp7 = tmpz[2].re - (unsigned)tmpz[3].re;
tmpz[0].re = tmp1 + tmp5;
tmpz[2].re = tmp1 - tmp5;
offset = ff_fft_offsets_lut[n] << 3;
tmpz = z + offset;
- tmp1 = tmpz[4].re + tmpz[5].re;
- tmp3 = tmpz[6].re + tmpz[7].re;
- tmp2 = tmpz[4].im + tmpz[5].im;
- tmp4 = tmpz[6].im + tmpz[7].im;
+ tmp1 = tmpz[4].re + (unsigned)tmpz[5].re;
+ tmp3 = tmpz[6].re + (unsigned)tmpz[7].re;
+ tmp2 = tmpz[4].im + (unsigned)tmpz[5].im;
+ tmp4 = tmpz[6].im + (unsigned)tmpz[7].im;
tmp5 = tmp1 + tmp3;
tmp7 = tmp1 - tmp3;
tmp6 = tmp2 + tmp4;
tmp8 = tmp2 - tmp4;
- tmp1 = tmpz[4].re - tmpz[5].re;
- tmp2 = tmpz[4].im - tmpz[5].im;
- tmp3 = tmpz[6].re - tmpz[7].re;
- tmp4 = tmpz[6].im - tmpz[7].im;
+ tmp1 = tmpz[4].re - (unsigned)tmpz[5].re;
+ tmp2 = tmpz[4].im - (unsigned)tmpz[5].im;
+ tmp3 = tmpz[6].re - (unsigned)tmpz[7].re;
+ tmp4 = tmpz[6].im - (unsigned)tmpz[7].im;
tmpz[4].re = tmpz[0].re - tmp5;
tmpz[0].re = tmpz[0].re + tmp5;
tmpz[6].im = tmpz[2].im + tmp7;
tmpz[2].im = tmpz[2].im - tmp7;
- accu = (int64_t)Q31(M_SQRT1_2)*(tmp1 + tmp2);
+ accu = (int64_t)Q31(M_SQRT1_2)*(int)(tmp1 + tmp2);
tmp5 = (int32_t)((accu + 0x40000000) >> 31);
- accu = (int64_t)Q31(M_SQRT1_2)*(tmp3 - tmp4);
+ accu = (int64_t)Q31(M_SQRT1_2)*(int)(tmp3 - tmp4);
tmp7 = (int32_t)((accu + 0x40000000) >> 31);
- accu = (int64_t)Q31(M_SQRT1_2)*(tmp2 - tmp1);
+ accu = (int64_t)Q31(M_SQRT1_2)*(int)(tmp2 - tmp1);
tmp6 = (int32_t)((accu + 0x40000000) >> 31);
- accu = (int64_t)Q31(M_SQRT1_2)*(tmp3 + tmp4);
+ accu = (int64_t)Q31(M_SQRT1_2)*(int)(tmp3 + tmp4);
tmp8 = (int32_t)((accu + 0x40000000) >> 31);
tmp1 = tmp5 + tmp7;
tmp3 = tmp5 - tmp7;
offset = ff_fft_offsets_lut[n] << nbits;
tmpz = z + offset;
- tmp5 = tmpz[ n2].re + tmpz[n34].re;
- tmp1 = tmpz[ n2].re - tmpz[n34].re;
- tmp6 = tmpz[ n2].im + tmpz[n34].im;
- tmp2 = tmpz[ n2].im - tmpz[n34].im;
+ tmp5 = tmpz[ n2].re + (unsigned)tmpz[n34].re;
+ tmp1 = tmpz[ n2].re - (unsigned)tmpz[n34].re;
+ tmp6 = tmpz[ n2].im + (unsigned)tmpz[n34].im;
+ tmp2 = tmpz[ n2].im - (unsigned)tmpz[n34].im;
tmpz[ n2].re = tmpz[ 0].re - tmp5;
tmpz[ 0].re = tmpz[ 0].re + tmp5;