#ifdef TX_FLOAT
#define TX_NAME(x) x ## _float
+#define SCALE_TYPE float
typedef float FFTSample;
typedef AVComplexFloat FFTComplex;
#elif defined(TX_DOUBLE)
#define TX_NAME(x) x ## _double
+#define SCALE_TYPE double
typedef double FFTSample;
typedef AVComplexDouble FFTComplex;
+#elif defined(TX_INT32)
+#define TX_NAME(x) x ## _int32
+#define SCALE_TYPE float
+typedef int32_t FFTSample;
+typedef AVComplexInt32 FFTComplex;
#else
typedef void FFTComplex;
#endif
#if defined(TX_FLOAT) || defined(TX_DOUBLE)
-#define BF(x, y, a, b) do { \
- x = (a) - (b); \
- y = (a) + (b); \
- } while (0)
+
+#define MUL(x, y) ((x)*(y))
#define CMUL(dre, dim, are, aim, bre, bim) do { \
(dre) = (are) * (bre) - (aim) * (bim); \
(dim) = (are) * (bim) + (aim) * (bre); \
} while (0)
+
+#define SMUL(dre, dim, are, aim, bre, bim) do { \
+ (dre) = (are) * (bre) - (aim) * (bim); \
+ (dim) = (are) * (bim) - (aim) * (bre); \
+ } while (0)
+
+#define RESCALE(x) (x)
+
+#define FOLD(a, b) ((a) + (b))
+
+#elif defined(TX_INT32)
+
+#define MUL(x, y) ((int32_t)(((int64_t)(x) * (int64_t)(y) + 0x40000000) >> 31))
+
+/* Properly rounds the result */
+#define CMUL(dre, dim, are, aim, bre, bim) do { \
+ int64_t accu; \
+ (accu) = (int64_t)(bre) * (are); \
+ (accu) -= (int64_t)(bim) * (aim); \
+ (dre) = (int)(((accu) + 0x40000000) >> 31); \
+ (accu) = (int64_t)(bim) * (are); \
+ (accu) += (int64_t)(bre) * (aim); \
+ (dim) = (int)(((accu) + 0x40000000) >> 31); \
+ } while (0)
+
+#define SMUL(dre, dim, are, aim, bre, bim) do { \
+ int64_t accu; \
+ (accu) = (int64_t)(bre) * (are); \
+ (accu) -= (int64_t)(bim) * (aim); \
+ (dre) = (int)(((accu) + 0x40000000) >> 31); \
+ (accu) = (int64_t)(bim) * (are); \
+ (accu) -= (int64_t)(bre) * (aim); \
+ (dim) = (int)(((accu) + 0x40000000) >> 31); \
+ } while (0)
+
+#define RESCALE(x) (lrintf((x) * 2147483648.0))
+
+#define FOLD(x, y) ((int)((x) + (unsigned)(y) + 32) >> 6)
+
#endif
+#define BF(x, y, a, b) do { \
+ x = (a) - (b); \
+ y = (a) + (b); \
+ } while (0)
+
#define CMUL3(c, a, b) \
CMUL((c).re, (c).im, (a).re, (a).im, (b).re, (b).im)
};
/* Shared functions */
+int ff_tx_type_is_mdct(enum AVTXType type);
int ff_tx_gen_compound_mapping(AVTXContext *s);
int ff_tx_gen_ptwo_revtab(AVTXContext *s);
int ff_tx_init_mdct_fft_double(AVTXContext *s, av_tx_fn *tx,
enum AVTXType type, int inv, int len,
const void *scale, uint64_t flags);
+int ff_tx_init_mdct_fft_int32(AVTXContext *s, av_tx_fn *tx,
+ enum AVTXType type, int inv, int len,
+ const void *scale, uint64_t flags);
typedef struct CosTabsInitOnce {
void (*func)(void);
double freq = 2*M_PI/m;
FFTSample *tab = cos_tabs[index];
for(int i = 0; i <= m/4; i++)
- tab[i] = cos(i*freq);
+ tab[i] = RESCALE(cos(i*freq));
for(int i = 1; i < m/4; i++)
tab[m/2 - i] = tab[i];
}
static av_cold void ff_init_53_tabs(void)
{
- TX_NAME(ff_cos_53)[0] = (FFTComplex){ cos(2 * M_PI / 12), cos(2 * M_PI / 12) };
- TX_NAME(ff_cos_53)[1] = (FFTComplex){ 0.5, 0.5 };
- TX_NAME(ff_cos_53)[2] = (FFTComplex){ cos(2 * M_PI / 5), sin(2 * M_PI / 5) };
- TX_NAME(ff_cos_53)[3] = (FFTComplex){ cos(2 * M_PI / 10), sin(2 * M_PI / 10) };
+ TX_NAME(ff_cos_53)[0] = (FFTComplex){ RESCALE(cos(2 * M_PI / 12)), RESCALE(cos(2 * M_PI / 12)) };
+ TX_NAME(ff_cos_53)[1] = (FFTComplex){ RESCALE(cos(2 * M_PI / 6)), RESCALE(cos(2 * M_PI / 6)) };
+ TX_NAME(ff_cos_53)[2] = (FFTComplex){ RESCALE(cos(2 * M_PI / 5)), RESCALE(sin(2 * M_PI / 5)) };
+ TX_NAME(ff_cos_53)[3] = (FFTComplex){ RESCALE(cos(2 * M_PI / 10)), RESCALE(sin(2 * M_PI / 10)) };
}
static CosTabsInitOnce cos_tabs_init_once[] = {
{
FFTComplex tmp[2];
- tmp[0].re = in[1].im - in[2].im;
- tmp[0].im = in[1].re - in[2].re;
- tmp[1].re = in[1].re + in[2].re;
- tmp[1].im = in[1].im + in[2].im;
+ BF(tmp[0].re, tmp[1].im, in[1].im, in[2].im);
+ BF(tmp[0].im, tmp[1].re, in[1].re, in[2].re);
out[0*stride].re = in[0].re + tmp[1].re;
out[0*stride].im = in[0].im + tmp[1].im;
- tmp[0].re *= TX_NAME(ff_cos_53)[0].re;
- tmp[0].im *= TX_NAME(ff_cos_53)[0].im;
- tmp[1].re *= TX_NAME(ff_cos_53)[1].re;
- tmp[1].im *= TX_NAME(ff_cos_53)[1].re;
+ tmp[0].re = MUL(TX_NAME(ff_cos_53)[0].re, tmp[0].re);
+ tmp[0].im = MUL(TX_NAME(ff_cos_53)[0].im, tmp[0].im);
+ tmp[1].re = MUL(TX_NAME(ff_cos_53)[1].re, tmp[1].re);
+ tmp[1].im = MUL(TX_NAME(ff_cos_53)[1].re, tmp[1].im);
out[1*stride].re = in[0].re - tmp[1].re + tmp[0].re;
out[1*stride].im = in[0].im - tmp[1].im - tmp[0].im;
out[2*stride].im = in[0].im - tmp[1].im + tmp[0].im;
}
-#define DECL_FFT5(NAME, D0, D1, D2, D3, D4) \
-static av_always_inline void NAME(FFTComplex *out, FFTComplex *in, \
- ptrdiff_t stride) \
-{ \
- FFTComplex z0[4], t[6]; \
- \
- t[0].re = in[1].re + in[4].re; \
- t[0].im = in[1].im + in[4].im; \
- t[1].im = in[1].re - in[4].re; \
- t[1].re = in[1].im - in[4].im; \
- t[2].re = in[2].re + in[3].re; \
- t[2].im = in[2].im + in[3].im; \
- t[3].im = in[2].re - in[3].re; \
- t[3].re = in[2].im - in[3].im; \
- \
- out[D0*stride].re = in[0].re + in[1].re + in[2].re + \
- in[3].re + in[4].re; \
- out[D0*stride].im = in[0].im + in[1].im + in[2].im + \
- in[3].im + in[4].im; \
- \
- t[4].re = TX_NAME(ff_cos_53)[2].re * t[2].re; \
- t[4].im = TX_NAME(ff_cos_53)[2].re * t[2].im; \
- t[4].re -= TX_NAME(ff_cos_53)[3].re * t[0].re; \
- t[4].im -= TX_NAME(ff_cos_53)[3].re * t[0].im; \
- t[0].re = TX_NAME(ff_cos_53)[2].re * t[0].re; \
- t[0].im = TX_NAME(ff_cos_53)[2].re * t[0].im; \
- t[0].re -= TX_NAME(ff_cos_53)[3].re * t[2].re; \
- t[0].im -= TX_NAME(ff_cos_53)[3].re * t[2].im; \
- t[5].re = TX_NAME(ff_cos_53)[2].im * t[3].re; \
- t[5].im = TX_NAME(ff_cos_53)[2].im * t[3].im; \
- t[5].re -= TX_NAME(ff_cos_53)[3].im * t[1].re; \
- t[5].im -= TX_NAME(ff_cos_53)[3].im * t[1].im; \
- t[1].re = TX_NAME(ff_cos_53)[2].im * t[1].re; \
- t[1].im = TX_NAME(ff_cos_53)[2].im * t[1].im; \
- t[1].re += TX_NAME(ff_cos_53)[3].im * t[3].re; \
- t[1].im += TX_NAME(ff_cos_53)[3].im * t[3].im; \
- \
- z0[0].re = t[0].re - t[1].re; \
- z0[0].im = t[0].im - t[1].im; \
- z0[1].re = t[4].re + t[5].re; \
- z0[1].im = t[4].im + t[5].im; \
- \
- z0[2].re = t[4].re - t[5].re; \
- z0[2].im = t[4].im - t[5].im; \
- z0[3].re = t[0].re + t[1].re; \
- z0[3].im = t[0].im + t[1].im; \
- \
- out[D1*stride].re = in[0].re + z0[3].re; \
- out[D1*stride].im = in[0].im + z0[0].im; \
- out[D2*stride].re = in[0].re + z0[2].re; \
- out[D2*stride].im = in[0].im + z0[1].im; \
- out[D3*stride].re = in[0].re + z0[1].re; \
- out[D3*stride].im = in[0].im + z0[2].im; \
- out[D4*stride].re = in[0].re + z0[0].re; \
- out[D4*stride].im = in[0].im + z0[3].im; \
+#define DECL_FFT5(NAME, D0, D1, D2, D3, D4) \
+static av_always_inline void NAME(FFTComplex *out, FFTComplex *in, \
+ ptrdiff_t stride) \
+{ \
+ FFTComplex z0[4], t[6]; \
+ \
+ BF(t[1].im, t[0].re, in[1].re, in[4].re); \
+ BF(t[1].re, t[0].im, in[1].im, in[4].im); \
+ BF(t[3].im, t[2].re, in[2].re, in[3].re); \
+ BF(t[3].re, t[2].im, in[2].im, in[3].im); \
+ \
+ out[D0*stride].re = in[0].re + in[1].re + in[2].re + in[3].re + in[4].re; \
+ out[D0*stride].im = in[0].im + in[1].im + in[2].im + in[3].im + in[4].im; \
+ \
+ SMUL(t[4].re, t[0].re, TX_NAME(ff_cos_53)[2].re, TX_NAME(ff_cos_53)[3].re, t[2].re, t[0].re); \
+ SMUL(t[4].im, t[0].im, TX_NAME(ff_cos_53)[2].re, TX_NAME(ff_cos_53)[3].re, t[2].im, t[0].im); \
+ CMUL(t[5].re, t[1].re, TX_NAME(ff_cos_53)[2].im, TX_NAME(ff_cos_53)[3].im, t[3].re, t[1].re); \
+ CMUL(t[5].im, t[1].im, TX_NAME(ff_cos_53)[2].im, TX_NAME(ff_cos_53)[3].im, t[3].im, t[1].im); \
+ \
+ BF(z0[0].re, z0[3].re, t[0].re, t[1].re); \
+ BF(z0[0].im, z0[3].im, t[0].im, t[1].im); \
+ BF(z0[2].re, z0[1].re, t[4].re, t[5].re); \
+ BF(z0[2].im, z0[1].im, t[4].im, t[5].im); \
+ \
+ out[D1*stride].re = in[0].re + z0[3].re; \
+ out[D1*stride].im = in[0].im + z0[0].im; \
+ out[D2*stride].re = in[0].re + z0[2].re; \
+ out[D2*stride].im = in[0].im + z0[1].im; \
+ out[D3*stride].re = in[0].re + z0[1].re; \
+ out[D3*stride].im = in[0].im + z0[2].im; \
+ out[D4*stride].re = in[0].re + z0[0].re; \
+ out[D4*stride].im = in[0].im + z0[3].im; \
}
DECL_FFT5(fft5, 0, 1, 2, 3, 4)
BF(t6, z[7].im, z[6].im, -z[7].im);
BUTTERFLIES(z[0],z[2],z[4],z[6]);
- TRANSFORM(z[1],z[3],z[5],z[7],M_SQRT1_2,M_SQRT1_2);
+ TRANSFORM(z[1],z[3],z[5],z[7],RESCALE(M_SQRT1_2),RESCALE(M_SQRT1_2));
}
static void fft16(FFTComplex *z)
fft4(z+12);
TRANSFORM_ZERO(z[0],z[4],z[8],z[12]);
- TRANSFORM(z[2],z[6],z[10],z[14],M_SQRT1_2,M_SQRT1_2);
+ TRANSFORM(z[2],z[6],z[10],z[14],RESCALE(M_SQRT1_2),RESCALE(M_SQRT1_2));
TRANSFORM(z[1],z[5],z[9],z[13],cos_16_1,cos_16_3);
TRANSFORM(z[3],z[7],z[11],z[15],cos_16_3,cos_16_1);
}
for (int j = 0; j < N; j++) { \
const int k = in_map[i*N + j]; \
if (k < len4) { \
- tmp.re = -src[ len4 + k] + src[1*len4 - 1 - k]; \
- tmp.im = -src[ len3 + k] - src[1*len3 - 1 - k]; \
+ tmp.re = FOLD(-src[ len4 + k], src[1*len4 - 1 - k]); \
+ tmp.im = FOLD(-src[ len3 + k], -src[1*len3 - 1 - k]); \
} else { \
- tmp.re = -src[ len4 + k] - src[5*len4 - 1 - k]; \
- tmp.im = src[-len4 + k] - src[1*len3 - 1 - k]; \
+ tmp.re = FOLD(-src[ len4 + k], -src[5*len4 - 1 - k]); \
+ tmp.im = FOLD( src[-len4 + k], -src[1*len3 - 1 - k]); \
} \
CMUL(fft##N##in[j].im, fft##N##in[j].re, tmp.re, tmp.im, \
exp[k >> 1].re, exp[k >> 1].im); \
for (int i = 0; i < m; i++) { /* Folding and pre-reindexing */
const int k = 2*i;
if (k < len4) {
- tmp.re = -src[ len4 + k] + src[1*len4 - 1 - k];
- tmp.im = -src[ len3 + k] - src[1*len3 - 1 - k];
+ tmp.re = FOLD(-src[ len4 + k], src[1*len4 - 1 - k]);
+ tmp.im = FOLD(-src[ len3 + k], -src[1*len3 - 1 - k]);
} else {
- tmp.re = -src[ len4 + k] - src[5*len4 - 1 - k];
- tmp.im = src[-len4 + k] - src[1*len3 - 1 - k];
+ tmp.re = FOLD(-src[ len4 + k], -src[5*len4 - 1 - k]);
+ tmp.im = FOLD( src[-len4 + k], -src[1*len3 - 1 - k]);
}
CMUL(z[s->revtab[i]].im, z[s->revtab[i]].re, tmp.re, tmp.im,
exp[i].re, exp[i].im);
scale = sqrt(fabs(scale));
for (int i = 0; i < len4; i++) {
const double alpha = M_PI_2 * (i + theta) / len4;
- s->exptab[i].re = cos(alpha) * scale;
- s->exptab[i].im = sin(alpha) * scale;
+ s->exptab[i].re = RESCALE(cos(alpha) * scale);
+ s->exptab[i].im = RESCALE(sin(alpha) * scale);
}
return 0;
enum AVTXType type, int inv, int len,
const void *scale, uint64_t flags)
{
- const int is_mdct = type == AV_TX_FLOAT_MDCT || type == AV_TX_DOUBLE_MDCT;
+ const int is_mdct = ff_tx_type_is_mdct(type);
int err, n = 1, m = 1, max_ptwo = 1 << (FF_ARRAY_ELEMS(fft_dispatch) + 1);
if (is_mdct)
}
if (is_mdct)
- return gen_mdct_exptab(s, n*m, *((FFTSample *)scale));
+ return gen_mdct_exptab(s, n*m, *((SCALE_TYPE *)scale));
return 0;
}