X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=libswresample%2Faudioconvert.c;h=c53135677bc948380a71ccca0a35f50f3242bd08;hb=bd1d975cd098062a15da85f1384883e8fb6c526f;hp=96e5083c8792c3811c5be2dadab791d38c06da9d;hpb=be2e211dce2964f922e5dee54a7569ebd6efb0da;p=ffmpeg

diff --git a/libswresample/audioconvert.c b/libswresample/audioconvert.c
index 96e5083c879..c53135677bc 100644
--- a/libswresample/audioconvert.c
+++ b/libswresample/audioconvert.c
@@ -32,41 +32,39 @@
 #include "audioconvert.h"
 
 
-typedef void (conv_func_type)(uint8_t *po, const uint8_t *pi, int is, int os, uint8_t *end);
-
-struct AudioConvert {
-    int channels;
-    conv_func_type *conv_f;
-    const int *ch_map;
-    uint8_t silence[8]; ///< silence input sample
-};
-
 #define CONV_FUNC_NAME(dst_fmt, src_fmt) conv_ ## src_fmt ## _to_ ## dst_fmt
 
 //FIXME rounding ?
 #define CONV_FUNC(ofmt, otype, ifmt, expr)\
 static void CONV_FUNC_NAME(ofmt, ifmt)(uint8_t *po, const uint8_t *pi, int is, int os, uint8_t *end)\
 {\
-    do{\
+    uint8_t *end2 = end - 3*os;\
+    while(po < end2){\
+        *(otype*)po = expr; pi += is; po += os;\
+        *(otype*)po = expr; pi += is; po += os;\
+        *(otype*)po = expr; pi += is; po += os;\
         *(otype*)po = expr; pi += is; po += os;\
-    }while(po < end);\
+    }\
+    while(po < end){\
+        *(otype*)po = expr; pi += is; po += os;\
+    }\
 }
 
 //FIXME put things below under ifdefs so we do not waste space for cases no codec will need
 CONV_FUNC(AV_SAMPLE_FMT_U8 , uint8_t, AV_SAMPLE_FMT_U8 ,  *(const uint8_t*)pi)
 CONV_FUNC(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_U8 , (*(const uint8_t*)pi - 0x80)<<8)
 CONV_FUNC(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_U8 , (*(const uint8_t*)pi - 0x80)<<24)
-CONV_FUNC(AV_SAMPLE_FMT_FLT, float  , AV_SAMPLE_FMT_U8 , (*(const uint8_t*)pi - 0x80)*(1.0 / (1<<7)))
+CONV_FUNC(AV_SAMPLE_FMT_FLT, float  , AV_SAMPLE_FMT_U8 , (*(const uint8_t*)pi - 0x80)*(1.0f/ (1<<7)))
 CONV_FUNC(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_U8 , (*(const uint8_t*)pi - 0x80)*(1.0 / (1<<7)))
 CONV_FUNC(AV_SAMPLE_FMT_U8 , uint8_t, AV_SAMPLE_FMT_S16, (*(const int16_t*)pi>>8) + 0x80)
 CONV_FUNC(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_S16,  *(const int16_t*)pi)
 CONV_FUNC(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_S16,  *(const int16_t*)pi<<16)
-CONV_FUNC(AV_SAMPLE_FMT_FLT, float  , AV_SAMPLE_FMT_S16,  *(const int16_t*)pi*(1.0 / (1<<15)))
+CONV_FUNC(AV_SAMPLE_FMT_FLT, float  , AV_SAMPLE_FMT_S16,  *(const int16_t*)pi*(1.0f/ (1<<15)))
 CONV_FUNC(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_S16,  *(const int16_t*)pi*(1.0 / (1<<15)))
 CONV_FUNC(AV_SAMPLE_FMT_U8 , uint8_t, AV_SAMPLE_FMT_S32, (*(const int32_t*)pi>>24) + 0x80)
 CONV_FUNC(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_S32,  *(const int32_t*)pi>>16)
 CONV_FUNC(AV_SAMPLE_FMT_S32, int32_t, AV_SAMPLE_FMT_S32,  *(const int32_t*)pi)
-CONV_FUNC(AV_SAMPLE_FMT_FLT, float  , AV_SAMPLE_FMT_S32,  *(const int32_t*)pi*(1.0 / (1U<<31)))
+CONV_FUNC(AV_SAMPLE_FMT_FLT, float  , AV_SAMPLE_FMT_S32,  *(const int32_t*)pi*(1.0f/ (1U<<31)))
 CONV_FUNC(AV_SAMPLE_FMT_DBL, double , AV_SAMPLE_FMT_S32,  *(const int32_t*)pi*(1.0 / (1U<<31)))
 CONV_FUNC(AV_SAMPLE_FMT_U8 , uint8_t, AV_SAMPLE_FMT_FLT, av_clip_uint8(  lrintf(*(const float*)pi * (1<<7)) + 0x80))
 CONV_FUNC(AV_SAMPLE_FMT_S16, int16_t, AV_SAMPLE_FMT_FLT, av_clip_int16(  lrintf(*(const float*)pi * (1<<15))))
@@ -109,24 +107,55 @@ static conv_func_type * const fmt_pair_to_conv_functions[AV_SAMPLE_FMT_NB*AV_SAM
     FMT_PAIR_FUNC(AV_SAMPLE_FMT_DBL, AV_SAMPLE_FMT_DBL),
 };
 
+static void cpy1(uint8_t **dst, const uint8_t **src, int len){
+    memcpy(*dst, *src, len);
+}
+static void cpy2(uint8_t **dst, const uint8_t **src, int len){
+    memcpy(*dst, *src, 2*len);
+}
+static void cpy4(uint8_t **dst, const uint8_t **src, int len){
+    memcpy(*dst, *src, 4*len);
+}
+static void cpy8(uint8_t **dst, const uint8_t **src, int len){
+    memcpy(*dst, *src, 8*len);
+}
+
 AudioConvert *swri_audio_convert_alloc(enum AVSampleFormat out_fmt,
                                        enum AVSampleFormat in_fmt,
                                        int channels, const int *ch_map,
                                        int flags)
 {
     AudioConvert *ctx;
-    conv_func_type *f = fmt_pair_to_conv_functions[out_fmt + AV_SAMPLE_FMT_NB*in_fmt];
+    conv_func_type *f = fmt_pair_to_conv_functions[av_get_packed_sample_fmt(out_fmt) + AV_SAMPLE_FMT_NB*av_get_packed_sample_fmt(in_fmt)];
 
     if (!f)
         return NULL;
     ctx = av_mallocz(sizeof(*ctx));
     if (!ctx)
         return NULL;
+
+    if(channels == 1){
+         in_fmt = av_get_planar_sample_fmt( in_fmt);
+        out_fmt = av_get_planar_sample_fmt(out_fmt);
+    }
+
     ctx->channels = channels;
     ctx->conv_f   = f;
     ctx->ch_map   = ch_map;
-    if (in_fmt == AV_SAMPLE_FMT_U8)
+    if (in_fmt == AV_SAMPLE_FMT_U8 || in_fmt == AV_SAMPLE_FMT_U8P)
         memset(ctx->silence, 0x80, sizeof(ctx->silence));
+
+    if(out_fmt == in_fmt && !ch_map) {
+        switch(av_get_bytes_per_sample(in_fmt)){
+            case 1:ctx->simd_f = cpy1; break;
+            case 2:ctx->simd_f = cpy2; break;
+            case 4:ctx->simd_f = cpy4; break;
+            case 8:ctx->simd_f = cpy8; break;
+        }
+    }
+
+    if(HAVE_YASM && HAVE_MMX) swri_audio_convert_init_x86(ctx, out_fmt, in_fmt, channels);
+
     return ctx;
 }
 
@@ -138,21 +167,40 @@ void swri_audio_convert_free(AudioConvert **ctx)
 int swri_audio_convert(AudioConvert *ctx, AudioData *out, AudioData *in, int len)
 {
     int ch;
+    int off=0;
+    const int os= (out->planar ? 1 :out->ch_count) *out->bps;
 
     av_assert0(ctx->channels == out->ch_count);
 
     //FIXME optimize common cases
 
+    if(ctx->simd_f && !ctx->ch_map){
+        off = len/16 * 16;
+        av_assert1(off>=0);
+        av_assert1(off<=len);
+        if(off>0){
+            if(out->planar == in->planar){
+                int planes = out->planar ? out->ch_count : 1;
+                for(ch=0; ch<planes; ch++){
+                    ctx->simd_f(out->ch+ch, in->ch+ch, off * (out->planar ? 1 :out->ch_count));
+                }
+            }else{
+                ctx->simd_f(out->ch, in->ch, off);
+            }
+        }
+        if(off == len)
+            return 0;
+    }
+
     for(ch=0; ch<ctx->channels; ch++){
         const int ich= ctx->ch_map ? ctx->ch_map[ch] : ch;
         const int is= ich < 0 ? 0 : (in->planar ? 1 : in->ch_count) * in->bps;
-        const int os= (out->planar ? 1 :out->ch_count) *out->bps;
         const uint8_t *pi= ich < 0 ? ctx->silence : in->ch[ich];
         uint8_t       *po= out->ch[ch];
         uint8_t *end= po + os*len;
         if(!po)
             continue;
-        ctx->conv_f(po, pi, is, os, end);
+        ctx->conv_f(po+off*os, pi+off*is, is, os, end);
     }
     return 0;
 }