swr: fix silence buffer for planar U8

[ffmpeg] / libswresample / audioconvert.c
diff --git a/libswresample/audioconvert.c b/libswresample/audioconvert.c

index 7598bba05d944eb8c81fb5dbb02e7bcf96156b19..c53135677bc948380a71ccca0a35f50f3242bd08 100644 (file)
--- a/libswresample/audioconvert.c
+++ b/libswresample/audioconvert.c
@@ -32,15 +32,6 @@
  #include "audioconvert.h"
  
  
-typedef void (conv_func_type)(uint8_t *po, const uint8_t *pi, int is, int os, uint8_t *end);
-
-struct AudioConvert {
-    int channels;
-    conv_func_type *conv_f;
-    const int *ch_map;
-    uint8_t silence[8]; ///< silence input sample
-};
-
  #define CONV_FUNC_NAME(dst_fmt, src_fmt) conv_ ## src_fmt ## _to_ ## dst_fmt
  
  //FIXME rounding ?
@@ -116,24 +107,55 @@ static conv_func_type * const fmt_pair_to_conv_functions[AV_SAMPLE_FMT_NB*AV_SAM
      FMT_PAIR_FUNC(AV_SAMPLE_FMT_DBL, AV_SAMPLE_FMT_DBL),
  };
  
+static void cpy1(uint8_t **dst, const uint8_t **src, int len){
+    memcpy(*dst, *src, len);
+}
+static void cpy2(uint8_t **dst, const uint8_t **src, int len){
+    memcpy(*dst, *src, 2*len);
+}
+static void cpy4(uint8_t **dst, const uint8_t **src, int len){
+    memcpy(*dst, *src, 4*len);
+}
+static void cpy8(uint8_t **dst, const uint8_t **src, int len){
+    memcpy(*dst, *src, 8*len);
+}
+
  AudioConvert *swri_audio_convert_alloc(enum AVSampleFormat out_fmt,
                                         enum AVSampleFormat in_fmt,
                                         int channels, const int *ch_map,
                                         int flags)
  {
      AudioConvert *ctx;
-    conv_func_type *f = fmt_pair_to_conv_functions[out_fmt + AV_SAMPLE_FMT_NB*in_fmt];
+    conv_func_type *f = fmt_pair_to_conv_functions[av_get_packed_sample_fmt(out_fmt) + AV_SAMPLE_FMT_NB*av_get_packed_sample_fmt(in_fmt)];
  
      if (!f)
          return NULL;
      ctx = av_mallocz(sizeof(*ctx));
      if (!ctx)
          return NULL;
+
+    if(channels == 1){
+         in_fmt = av_get_planar_sample_fmt( in_fmt);
+        out_fmt = av_get_planar_sample_fmt(out_fmt);
+    }
+
      ctx->channels = channels;
      ctx->conv_f   = f;
      ctx->ch_map   = ch_map;
-    if (in_fmt == AV_SAMPLE_FMT_U8)
+    if (in_fmt == AV_SAMPLE_FMT_U8 || in_fmt == AV_SAMPLE_FMT_U8P)
          memset(ctx->silence, 0x80, sizeof(ctx->silence));
+
+    if(out_fmt == in_fmt && !ch_map) {
+        switch(av_get_bytes_per_sample(in_fmt)){
+            case 1:ctx->simd_f = cpy1; break;
+            case 2:ctx->simd_f = cpy2; break;
+            case 4:ctx->simd_f = cpy4; break;
+            case 8:ctx->simd_f = cpy8; break;
+        }
+    }
+
+    if(HAVE_YASM && HAVE_MMX) swri_audio_convert_init_x86(ctx, out_fmt, in_fmt, channels);
+
      return ctx;
  }
  
@@ -145,21 +167,40 @@ void swri_audio_convert_free(AudioConvert **ctx)
  int swri_audio_convert(AudioConvert *ctx, AudioData *out, AudioData *in, int len)
  {
      int ch;
+    int off=0;
+    const int os= (out->planar ? 1 :out->ch_count) *out->bps;
  
      av_assert0(ctx->channels == out->ch_count);
  
      //FIXME optimize common cases
  
+    if(ctx->simd_f && !ctx->ch_map){
+        off = len/16 * 16;
+        av_assert1(off>=0);
+        av_assert1(off<=len);
+        if(off>0){
+            if(out->planar == in->planar){
+                int planes = out->planar ? out->ch_count : 1;
+                for(ch=0; ch<planes; ch++){
+                    ctx->simd_f(out->ch+ch, in->ch+ch, off * (out->planar ? 1 :out->ch_count));
+                }
+            }else{
+                ctx->simd_f(out->ch, in->ch, off);
+            }
+        }
+        if(off == len)
+            return 0;
+    }
+
      for(ch=0; ch<ctx->channels; ch++){
          const int ich= ctx->ch_map ? ctx->ch_map[ch] : ch;
          const int is= ich < 0 ? 0 : (in->planar ? 1 : in->ch_count) * in->bps;
-        const int os= (out->planar ? 1 :out->ch_count) *out->bps;
          const uint8_t *pi= ich < 0 ? ctx->silence : in->ch[ich];
          uint8_t       *po= out->ch[ch];
          uint8_t *end= po + os*len;
          if(!po)
              continue;
-        ctx->conv_f(po, pi, is, os, end);
+        ctx->conv_f(po+off*os, pi+off*is, is, os, end);
      }
      return 0;
  }