]> git.sesse.net Git - ffmpeg/blobdiff - libswresample/x86/swresample_x86.c
Merge commit '1470ce21cec5ee26e106e2a884c26bbf84e5aaea'
[ffmpeg] / libswresample / x86 / swresample_x86.c
index 6cd6073e9c09b8d51842334ad2e06e9d3a37f72e..74e05e6c06c68b6f647bd23ab63622a7e4cf1a73 100644 (file)
@@ -49,7 +49,7 @@ void swri_audio_convert_init_x86(struct AudioConvert *ac,
     }
 
 MULTI_CAPS_FUNC(AV_CPU_FLAG_MMX, mmx)
-MULTI_CAPS_FUNC(AV_CPU_FLAG_SSE, sse)
+MULTI_CAPS_FUNC(AV_CPU_FLAG_SSE2, sse2)
 
     if(mm_flags & AV_CPU_FLAG_MMX) {
         if(channels == 6) {
@@ -58,28 +58,6 @@ MULTI_CAPS_FUNC(AV_CPU_FLAG_SSE, sse)
         }
     }
 
-    if(mm_flags & AV_CPU_FLAG_SSE) {
-        if(channels == 2) {
-            if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_FLTP || out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_S32P)
-                ac->simd_f =  ff_pack_2ch_int32_to_int32_a_sse;
-            if(   out_fmt == AV_SAMPLE_FMT_S16  && in_fmt == AV_SAMPLE_FMT_S16P)
-                ac->simd_f =  ff_pack_2ch_int16_to_int16_a_sse;
-            if(   out_fmt == AV_SAMPLE_FMT_S32  && in_fmt == AV_SAMPLE_FMT_S16P)
-                ac->simd_f =  ff_pack_2ch_int16_to_int32_a_sse;
-            if(   out_fmt == AV_SAMPLE_FMT_S16  && in_fmt == AV_SAMPLE_FMT_S32P)
-                ac->simd_f =  ff_pack_2ch_int32_to_int16_a_sse;
-
-            if(   out_fmt == AV_SAMPLE_FMT_FLTP  && in_fmt == AV_SAMPLE_FMT_FLT || out_fmt == AV_SAMPLE_FMT_S32P && in_fmt == AV_SAMPLE_FMT_S32)
-                ac->simd_f =  ff_unpack_2ch_int32_to_int32_a_sse;
-            if(   out_fmt == AV_SAMPLE_FMT_S16P  && in_fmt == AV_SAMPLE_FMT_S16)
-                ac->simd_f =  ff_unpack_2ch_int16_to_int16_a_sse;
-            if(   out_fmt == AV_SAMPLE_FMT_S32P  && in_fmt == AV_SAMPLE_FMT_S16)
-                ac->simd_f =  ff_unpack_2ch_int16_to_int32_a_sse;
-            if(   out_fmt == AV_SAMPLE_FMT_S16P  && in_fmt == AV_SAMPLE_FMT_S32)
-                ac->simd_f =  ff_unpack_2ch_int32_to_int16_a_sse;
-        }
-    }
-
     if(mm_flags & AV_CPU_FLAG_SSE2) {
         if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_S32 || out_fmt == AV_SAMPLE_FMT_FLTP && in_fmt == AV_SAMPLE_FMT_S32P)
             ac->simd_f =  ff_int32_to_float_a_sse2;
@@ -91,6 +69,24 @@ MULTI_CAPS_FUNC(AV_CPU_FLAG_SSE, sse)
             ac->simd_f =  ff_float_to_int16_a_sse2;
 
         if(channels == 2) {
+            if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_FLTP || out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_S32P)
+                ac->simd_f =  ff_pack_2ch_int32_to_int32_a_sse2;
+            if(   out_fmt == AV_SAMPLE_FMT_S16  && in_fmt == AV_SAMPLE_FMT_S16P)
+                ac->simd_f =  ff_pack_2ch_int16_to_int16_a_sse2;
+            if(   out_fmt == AV_SAMPLE_FMT_S32  && in_fmt == AV_SAMPLE_FMT_S16P)
+                ac->simd_f =  ff_pack_2ch_int16_to_int32_a_sse2;
+            if(   out_fmt == AV_SAMPLE_FMT_S16  && in_fmt == AV_SAMPLE_FMT_S32P)
+                ac->simd_f =  ff_pack_2ch_int32_to_int16_a_sse2;
+
+            if(   out_fmt == AV_SAMPLE_FMT_FLTP  && in_fmt == AV_SAMPLE_FMT_FLT || out_fmt == AV_SAMPLE_FMT_S32P && in_fmt == AV_SAMPLE_FMT_S32)
+                ac->simd_f =  ff_unpack_2ch_int32_to_int32_a_sse2;
+            if(   out_fmt == AV_SAMPLE_FMT_S16P  && in_fmt == AV_SAMPLE_FMT_S16)
+                ac->simd_f =  ff_unpack_2ch_int16_to_int16_a_sse2;
+            if(   out_fmt == AV_SAMPLE_FMT_S32P  && in_fmt == AV_SAMPLE_FMT_S16)
+                ac->simd_f =  ff_unpack_2ch_int16_to_int32_a_sse2;
+            if(   out_fmt == AV_SAMPLE_FMT_S16P  && in_fmt == AV_SAMPLE_FMT_S32)
+                ac->simd_f =  ff_unpack_2ch_int32_to_int16_a_sse2;
+
             if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_S32P)
                 ac->simd_f =  ff_pack_2ch_int32_to_float_a_sse2;
             if(   out_fmt == AV_SAMPLE_FMT_S32  && in_fmt == AV_SAMPLE_FMT_FLTP)
@@ -142,3 +138,58 @@ MULTI_CAPS_FUNC(AV_CPU_FLAG_SSE, sse)
         }
     }
 }
+
+#define D(type, simd) \
+mix_1_1_func_type ff_mix_1_1_a_## type ## _ ## simd;\
+mix_2_1_func_type ff_mix_2_1_a_## type ## _ ## simd;
+
+D(float, sse)
+D(float, avx)
+D(int16, mmx)
+D(int16, sse2)
+
+
+void swri_rematrix_init_x86(struct SwrContext *s){
+    int mm_flags = av_get_cpu_flags();
+    int nb_in  = av_get_channel_layout_nb_channels(s->in_ch_layout);
+    int nb_out = av_get_channel_layout_nb_channels(s->out_ch_layout);
+    int num    = nb_in * nb_out;
+    int i,j;
+
+    s->mix_1_1_simd = NULL;
+    s->mix_2_1_simd = NULL;
+
+    if (s->midbuf.fmt == AV_SAMPLE_FMT_S16P){
+        if(mm_flags & AV_CPU_FLAG_MMX) {
+            s->mix_1_1_simd = ff_mix_1_1_a_int16_mmx;
+            s->mix_2_1_simd = ff_mix_2_1_a_int16_mmx;
+        }
+        if(mm_flags & AV_CPU_FLAG_SSE2) {
+            s->mix_1_1_simd = ff_mix_1_1_a_int16_sse2;
+            s->mix_2_1_simd = ff_mix_2_1_a_int16_sse2;
+        }
+        s->native_simd_matrix = av_mallocz(2 * num * sizeof(int16_t));
+        for(i=0; i<nb_out; i++){
+            int sh = 0;
+            for(j=0; j<nb_in; j++)
+                sh = FFMAX(sh, FFABS(((int*)s->native_matrix)[i * nb_in + j]));
+            sh = FFMAX(av_log2(sh) - 14, 0);
+            for(j=0; j<nb_in; j++) {
+                ((int16_t*)s->native_simd_matrix)[2*(i * nb_in + j)+1] = 15 - sh;
+                ((int16_t*)s->native_simd_matrix)[2*(i * nb_in + j)] =
+                    ((((int*)s->native_matrix)[i * nb_in + j]) + (1<<sh>>1)) >> sh;
+            }
+        }
+    } else if(s->midbuf.fmt == AV_SAMPLE_FMT_FLTP){
+        if(mm_flags & AV_CPU_FLAG_SSE) {
+            s->mix_1_1_simd = ff_mix_1_1_a_float_sse;
+            s->mix_2_1_simd = ff_mix_2_1_a_float_sse;
+        }
+        if(HAVE_AVX && mm_flags & AV_CPU_FLAG_AVX) {
+            s->mix_1_1_simd = ff_mix_1_1_a_float_avx;
+            s->mix_2_1_simd = ff_mix_2_1_a_float_avx;
+        }
+        s->native_simd_matrix = av_mallocz(num * sizeof(float));
+        memcpy(s->native_simd_matrix, s->native_matrix, num * sizeof(float));
+    }
+}