X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=libavcodec%2Fx86%2Fdcadsp_init.c;h=0e84b8c0e9592a3fec9fe6bb3f3e06965a602060;hb=64672098361361cd15d37e36f747ab44de5b80ca;hp=adb454fa020e78acdee23a158f4ad0787bc15870;hpb=38336f0fb10a0a1dbcbbf371d0ab90520cee29c0;p=ffmpeg diff --git a/libavcodec/x86/dcadsp_init.c b/libavcodec/x86/dcadsp_init.c index adb454fa020..0e84b8c0e95 100644 --- a/libavcodec/x86/dcadsp_init.c +++ b/libavcodec/x86/dcadsp_init.c @@ -23,9 +23,17 @@ #include "libavutil/x86/cpu.h" #include "libavcodec/dcadsp.h" -void ff_int8x8_fmul_int32_sse(float *dst, const int8_t *src, int scale); -void ff_int8x8_fmul_int32_sse2(float *dst, const int8_t *src, int scale); -void ff_int8x8_fmul_int32_sse4(float *dst, const int8_t *src, int scale); +void ff_decode_hf_sse(float dst[DCA_SUBBANDS][8], const int vq_num[DCA_SUBBANDS], + const int8_t hf_vq[1024][32], intptr_t vq_offset, + int scale[DCA_SUBBANDS][2], intptr_t start, intptr_t end); +void ff_decode_hf_sse2(float dst[DCA_SUBBANDS][8], const int vq_num[DCA_SUBBANDS], + const int8_t hf_vq[1024][32], intptr_t vq_offset, + int scale[DCA_SUBBANDS][2], intptr_t start, intptr_t end); +void ff_decode_hf_sse4(float dst[DCA_SUBBANDS][8], const int vq_num[DCA_SUBBANDS], + const int8_t hf_vq[1024][32], intptr_t vq_offset, + int scale[DCA_SUBBANDS][2], intptr_t start, intptr_t end); +void ff_dca_lfe_fir0_sse(float *out, const float *in, const float *coefs); +void ff_dca_lfe_fir1_sse(float *out, const float *in, const float *coefs); av_cold void ff_dcadsp_init_x86(DCADSPContext *s) { @@ -33,15 +41,56 @@ av_cold void ff_dcadsp_init_x86(DCADSPContext *s) if (EXTERNAL_SSE(cpu_flags)) { #if ARCH_X86_32 - s->int8x8_fmul_int32 = ff_int8x8_fmul_int32_sse; + s->decode_hf = ff_decode_hf_sse; #endif + s->lfe_fir[0] = ff_dca_lfe_fir0_sse; + s->lfe_fir[1] = ff_dca_lfe_fir1_sse; } if (EXTERNAL_SSE2(cpu_flags)) { - s->int8x8_fmul_int32 = ff_int8x8_fmul_int32_sse2; + s->decode_hf = ff_decode_hf_sse2; } if (EXTERNAL_SSE4(cpu_flags)) { - s->int8x8_fmul_int32 = ff_int8x8_fmul_int32_sse4; + s->decode_hf = ff_decode_hf_sse4; + } +} + + +#define SYNTH_FILTER_FUNC(opt) \ +void ff_synth_filter_inner_##opt(float *synth_buf_ptr, float synth_buf2[32], \ + const float window[512], \ + float out[32], intptr_t offset, float scale); \ +static void synth_filter_##opt(FFTContext *imdct, \ + float *synth_buf_ptr, int *synth_buf_offset, \ + float synth_buf2[32], const float window[512], \ + float out[32], const float in[32], float scale) \ +{ \ + float *synth_buf= synth_buf_ptr + *synth_buf_offset; \ + \ + imdct->imdct_half(imdct, synth_buf, in); \ + \ + ff_synth_filter_inner_##opt(synth_buf, synth_buf2, window, \ + out, *synth_buf_offset, scale); \ + \ + *synth_buf_offset = (*synth_buf_offset - 32) & 511; \ +} \ + +#if ARCH_X86_32 +SYNTH_FILTER_FUNC(sse) +#endif +SYNTH_FILTER_FUNC(sse2) + +av_cold void ff_synth_filter_init_x86(SynthFilterContext *s) +{ + int cpu_flags = av_get_cpu_flags(); + +#if ARCH_X86_32 + if (EXTERNAL_SSE(cpu_flags)) { + s->synth_filter_float = synth_filter_sse; + } +#endif + if (EXTERNAL_SSE2(cpu_flags)) { + s->synth_filter_float = synth_filter_sse2; } }