avcodec_decode_audio: do not trust the channel layout, use the channel count.

[ffmpeg] / libswresample / swresample.c
diff --git a/libswresample/swresample.c b/libswresample/swresample.c

index f01927f2933412c21ef67027da5f9e96c2ef4fcd..ad33467e2a3aaab39752aa5aa6d4b83c5ddab70e 100644 (file)
--- a/libswresample/swresample.c
+++ b/libswresample/swresample.c
@@ -1,5 +1,5 @@
  /*
- * Copyright (C) 2011-2012 Michael Niedermayer (michaelni@gmx.at)
+ * Copyright (C) 2011-2013 Michael Niedermayer (michaelni@gmx.at)
   *
   * This file is part of libswresample
   *
@@ -73,12 +73,19 @@ static const AVOption options[]={
  {"swr_flags"            , "set flags"                   , OFFSET(flags          ), AV_OPT_TYPE_FLAGS, {.i64=0                     }, 0      , UINT_MAX  , PARAM, "flags"},
  {"res"                  , "force resampling"            , 0                      , AV_OPT_TYPE_CONST, {.i64=SWR_FLAG_RESAMPLE     }, INT_MIN, INT_MAX   , PARAM, "flags"},
  
-{"dither_scale"         , "set dither scale"            , OFFSET(dither_scale   ), AV_OPT_TYPE_FLOAT, {.dbl=1                     }, 0      , INT_MAX   , PARAM},
+{"dither_scale"         , "set dither scale"            , OFFSET(dither.scale   ), AV_OPT_TYPE_FLOAT, {.dbl=1                     }, 0      , INT_MAX   , PARAM},
  
-{"dither_method"        , "set dither method"           , OFFSET(dither_method  ), AV_OPT_TYPE_INT  , {.i64=0                     }, 0      , SWR_DITHER_NB-1, PARAM, "dither_method"},
+{"dither_method"        , "set dither method"           , OFFSET(dither.method  ), AV_OPT_TYPE_INT  , {.i64=0                     }, 0      , SWR_DITHER_NB-1, PARAM, "dither_method"},
  {"rectangular"          , "select rectangular dither"   , 0                      , AV_OPT_TYPE_CONST, {.i64=SWR_DITHER_RECTANGULAR}, INT_MIN, INT_MAX   , PARAM, "dither_method"},
  {"triangular"           , "select triangular dither"    , 0                      , AV_OPT_TYPE_CONST, {.i64=SWR_DITHER_TRIANGULAR }, INT_MIN, INT_MAX   , PARAM, "dither_method"},
-{"triangular_hp"        , "select triangular dither with high pass" , 0                 , AV_OPT_TYPE_CONST, {.i64=SWR_DITHER_TRIANGULAR_HIGHPASS }, INT_MIN, INT_MAX, PARAM, "dither_method"},
+{"triangular_hp"        , "select triangular dither with high pass" , 0          , AV_OPT_TYPE_CONST, {.i64=SWR_DITHER_TRIANGULAR_HIGHPASS }, INT_MIN, INT_MAX, PARAM, "dither_method"},
+{"lipshitz"             , "select lipshitz noise shaping dither" , 0             , AV_OPT_TYPE_CONST, {.i64=SWR_DITHER_NS_LIPSHITZ}, INT_MIN, INT_MAX, PARAM, "dither_method"},
+{"shibata"              , "select shibata noise shaping dither" , 0              , AV_OPT_TYPE_CONST, {.i64=SWR_DITHER_NS_SHIBATA }, INT_MIN, INT_MAX, PARAM, "dither_method"},
+{"low_shibata"          , "select low shibata noise shaping dither" , 0          , AV_OPT_TYPE_CONST, {.i64=SWR_DITHER_NS_LOW_SHIBATA }, INT_MIN, INT_MAX, PARAM, "dither_method"},
+{"high_shibata"         , "select high shibata noise shaping dither" , 0         , AV_OPT_TYPE_CONST, {.i64=SWR_DITHER_NS_HIGH_SHIBATA }, INT_MIN, INT_MAX, PARAM, "dither_method"},
+{"f_weighted"           , "select f-weighted noise shaping dither" , 0           , AV_OPT_TYPE_CONST, {.i64=SWR_DITHER_NS_F_WEIGHTED }, INT_MIN, INT_MAX, PARAM, "dither_method"},
+{"modified_e_weighted"  , "select modified-e-weighted noise shaping dither" , 0  , AV_OPT_TYPE_CONST, {.i64=SWR_DITHER_NS_MODIFIED_E_WEIGHTED }, INT_MIN, INT_MAX, PARAM, "dither_method"},
+{"improved_e_weighted"  , "select improved-e-weighted noise shaping dither" , 0  , AV_OPT_TYPE_CONST, {.i64=SWR_DITHER_NS_IMPROVED_E_WEIGHTED }, INT_MIN, INT_MAX, PARAM, "dither_method"},
  
  {"filter_size"          , "set swr resampling filter size", OFFSET(filter_size)  , AV_OPT_TYPE_INT  , {.i64=32                    }, 0      , INT_MAX   , PARAM },
  {"phase_shift"          , "set swr resampling phase shift", OFFSET(phase_shift)  , AV_OPT_TYPE_INT  , {.i64=10                    }, 0      , 30        , PARAM },
@@ -210,7 +217,10 @@ av_cold void swr_free(SwrContext **ss){
          free_temp(&s->midbuf);
          free_temp(&s->preout);
          free_temp(&s->in_buffer);
-        free_temp(&s->dither);
+        free_temp(&s->silence);
+        free_temp(&s->drop_temp);
+        free_temp(&s->dither.noise);
+        free_temp(&s->dither.temp);
          swri_audio_convert_free(&s-> in_convert);
          swri_audio_convert_free(&s->out_convert);
          swri_audio_convert_free(&s->full_convert);
@@ -223,6 +233,7 @@ av_cold void swr_free(SwrContext **ss){
  }
  
  av_cold int swr_init(struct SwrContext *s){
+    int ret;
      s->in_buffer_index= 0;
      s->in_buffer_count= 0;
      s->resample_in_constraint= 0;
@@ -230,7 +241,10 @@ av_cold int swr_init(struct SwrContext *s){
      free_temp(&s->midbuf);
      free_temp(&s->preout);
      free_temp(&s->in_buffer);
-    free_temp(&s->dither);
+    free_temp(&s->silence);
+    free_temp(&s->drop_temp);
+    free_temp(&s->dither.noise);
+    free_temp(&s->dither.temp);
      memset(s->in.ch, 0, sizeof(s->in.ch));
      memset(s->out.ch, 0, sizeof(s->out.ch));
      swri_audio_convert_free(&s-> in_convert);
@@ -347,8 +361,10 @@ av_assert0(s->out.ch_count);
      s->resample_first= RSC*s->out.ch_count/s->in.ch_count - RSC < s->out_sample_rate/(float)s-> in_sample_rate - 1.0;
  
      s->in_buffer= s->in;
+    s->silence  = s->in;
+    s->drop_temp= s->out;
  
-    if(!s->resample && !s->rematrix && !s->channel_map && !s->dither_method){
+    if(!s->resample && !s->rematrix && !s->channel_map && !s->dither.method){
          s->full_convert = swri_audio_convert_alloc(s->out_sample_fmt,
                                                     s-> in_sample_fmt, s-> in.ch_count, NULL, 0);
          return 0;
@@ -359,6 +375,8 @@ av_assert0(s->out.ch_count);
      s->out_convert= swri_audio_convert_alloc(s->out_sample_fmt,
                                               s->int_sample_fmt, s->out.ch_count, NULL, 0);
  
+    if (!s->in_convert || !s->out_convert)
+        return AVERROR(ENOMEM);
  
      s->postin= s->in;
      s->preout= s->out;
@@ -384,9 +402,10 @@ av_assert0(s->out.ch_count);
          set_audiodata_fmt(&s->in_buffer, s->int_sample_fmt);
      }
  
-    s->dither = s->preout;
+    if ((ret = swri_dither_init(s, s->out_sample_fmt, s->int_sample_fmt)) < 0)
+        return ret;
  
-    if(s->rematrix || s->dither_method)
+    if(s->rematrix || s->dither.method)
          return swri_rematrix_init(s);
  
      return 0;
@@ -631,28 +650,54 @@ static int swr_convert_internal(struct SwrContext *s, AudioData *out, int out_co
      }
  
      if(preout != out && out_count){
-        if(s->dither_method){
+        AudioData *conv_src = preout;
+        if(s->dither.method){
              int ch;
              int dither_count= FFMAX(out_count, 1<<16);
-            av_assert0(preout != in);
  
-            if((ret=swri_realloc_audio(&s->dither, dither_count))<0)
+            if (preout == in) {
+                conv_src = &s->dither.temp;
+                if((ret=swri_realloc_audio(&s->dither.temp, dither_count))<0)
+                    return ret;
+            }
+
+            if((ret=swri_realloc_audio(&s->dither.noise, dither_count))<0)
                  return ret;
              if(ret)
-                for(ch=0; ch<s->dither.ch_count; ch++)
-                    swri_get_dither(s, s->dither.ch[ch], s->dither.count, 12345678913579<<ch, s->out_sample_fmt, s->int_sample_fmt);
-            av_assert0(s->dither.ch_count == preout->ch_count);
-
-            if(s->dither_pos + out_count > s->dither.count)
-                s->dither_pos = 0;
-
-            for(ch=0; ch<preout->ch_count; ch++)
-                s->mix_2_1_f(preout->ch[ch], preout->ch[ch], s->dither.ch[ch] + s->dither.bps * s->dither_pos, s->native_one, 0, 0, out_count);
-
-            s->dither_pos += out_count;
+                for(ch=0; ch<s->dither.noise.ch_count; ch++)
+                    swri_get_dither(s, s->dither.noise.ch[ch], s->dither.noise.count, 12345678913579<<ch, s->dither.noise.fmt);
+            av_assert0(s->dither.noise.ch_count == preout->ch_count);
+
+            if(s->dither.noise_pos + out_count > s->dither.noise.count)
+                s->dither.noise_pos = 0;
+
+            if (s->dither.method < SWR_DITHER_NS){
+                if (s->mix_2_1_simd) {
+                    int len1= out_count&~15;
+                    int off = len1 * preout->bps;
+
+                    if(len1)
+                        for(ch=0; ch<preout->ch_count; ch++)
+                            s->mix_2_1_simd(conv_src->ch[ch], preout->ch[ch], s->dither.noise.ch[ch] + s->dither.noise.bps * s->dither.noise_pos, s->native_one, 0, 0, len1);
+                    if(out_count != len1)
+                        for(ch=0; ch<preout->ch_count; ch++)
+                            s->mix_2_1_f(conv_src->ch[ch] + off, preout->ch[ch] + off, s->dither.noise.ch[ch] + s->dither.noise.bps * s->dither.noise_pos + off + len1, s->native_one, 0, 0, out_count - len1);
+                } else {
+                    for(ch=0; ch<preout->ch_count; ch++)
+                        s->mix_2_1_f(conv_src->ch[ch], preout->ch[ch], s->dither.noise.ch[ch] + s->dither.noise.bps * s->dither.noise_pos, s->native_one, 0, 0, out_count);
+                }
+            } else {
+                switch(s->int_sample_fmt) {
+                case AV_SAMPLE_FMT_S16P :swri_noise_shaping_int16(s, conv_src, preout, &s->dither.noise, out_count); break;
+                case AV_SAMPLE_FMT_S32P :swri_noise_shaping_int32(s, conv_src, preout, &s->dither.noise, out_count); break;
+                case AV_SAMPLE_FMT_FLTP :swri_noise_shaping_float(s, conv_src, preout, &s->dither.noise, out_count); break;
+                case AV_SAMPLE_FMT_DBLP :swri_noise_shaping_double(s,conv_src, preout, &s->dither.noise, out_count); break;
+                }
+            }
+            s->dither.noise_pos += out_count;
          }
  //FIXME packed doesnt need more than 1 chan here!
-        swri_audio_convert(s->out_convert, out, preout, out_count);
+        swri_audio_convert(s->out_convert, out, conv_src, out_count);
      }
      return out_count;
  }
@@ -662,26 +707,25 @@ int swr_convert(struct SwrContext *s, uint8_t *out_arg[SWR_CH_MAX], int out_coun
      AudioData * in= &s->in;
      AudioData *out= &s->out;
  
-    if(s->drop_output > 0){
+    while(s->drop_output > 0){
          int ret;
-        AudioData tmp = s->out;
          uint8_t *tmp_arg[SWR_CH_MAX];
-        tmp.count = 0;
-        tmp.data  = NULL;
-        if((ret=swri_realloc_audio(&tmp, s->drop_output))<0)
+#define MAX_DROP_STEP 16384
+        if((ret=swri_realloc_audio(&s->drop_temp, FFMIN(s->drop_output, MAX_DROP_STEP)))<0)
              return ret;
  
-        reversefill_audiodata(&tmp, tmp_arg);
+        reversefill_audiodata(&s->drop_temp, tmp_arg);
          s->drop_output *= -1; //FIXME find a less hackish solution
-        ret = swr_convert(s, tmp_arg, -s->drop_output, in_arg, in_count); //FIXME optimize but this is as good as never called so maybe it doesnt matter
+        ret = swr_convert(s, tmp_arg, FFMIN(-s->drop_output, MAX_DROP_STEP), in_arg, in_count); //FIXME optimize but this is as good as never called so maybe it doesnt matter
          s->drop_output *= -1;
-        if(ret>0)
+        in_count = 0;
+        if(ret>0) {
              s->drop_output -= ret;
+            continue;
+        }
  
-        av_freep(&tmp.data);
          if(s->drop_output || !out_arg)
              return 0;
-        in_count = 0;
      }
  
      if(!in_arg){
@@ -769,26 +813,29 @@ int swr_drop_output(struct SwrContext *s, int count){
  
  int swr_inject_silence(struct SwrContext *s, int count){
      int ret, i;
-    AudioData silence = s->in;
      uint8_t *tmp_arg[SWR_CH_MAX];
  
      if(count <= 0)
          return 0;
  
-    silence.count = 0;
-    silence.data  = NULL;
-    if((ret=swri_realloc_audio(&silence, count))<0)
+#define MAX_SILENCE_STEP 16384
+    while (count > MAX_SILENCE_STEP) {
+        if ((ret = swr_inject_silence(s, MAX_SILENCE_STEP)) < 0)
+            return ret;
+        count -= MAX_SILENCE_STEP;
+    }
+
+    if((ret=swri_realloc_audio(&s->silence, count))<0)
          return ret;
  
-    if(silence.planar) for(i=0; i<silence.ch_count; i++) {
-        memset(silence.ch[i], silence.bps==1 ? 0x80 : 0, count*silence.bps);
+    if(s->silence.planar) for(i=0; i<s->silence.ch_count; i++) {
+        memset(s->silence.ch[i], s->silence.bps==1 ? 0x80 : 0, count*s->silence.bps);
      } else
-        memset(silence.ch[0], silence.bps==1 ? 0x80 : 0, count*silence.bps*silence.ch_count);
+        memset(s->silence.ch[0], s->silence.bps==1 ? 0x80 : 0, count*s->silence.bps*s->silence.ch_count);
  
-    reversefill_audiodata(&silence, tmp_arg);
+    reversefill_audiodata(&s->silence, tmp_arg);
      av_log(s, AV_LOG_VERBOSE, "adding %d audio samples of silence\n", count);
      ret = swr_convert(s, NULL, 0, (const uint8_t**)tmp_arg, count);
-    av_freep(&silence.data);
      return ret;
  }
  
@@ -826,7 +873,7 @@ int64_t swr_next_pts(struct SwrContext *s, int64_t pts){
      if(s->min_compensation >= FLT_MAX) {
          return (s->outpts = pts - swr_get_delay(s, s->in_sample_rate * (int64_t)s->out_sample_rate));
      } else {
-        int64_t delta = pts - swr_get_delay(s, s->in_sample_rate * (int64_t)s->out_sample_rate) - s->outpts;
+        int64_t delta = pts - swr_get_delay(s, s->in_sample_rate * (int64_t)s->out_sample_rate) - s->outpts + s->drop_output*(int64_t)s->in_sample_rate;
          double fdelta = delta /(double)(s->in_sample_rate * (int64_t)s->out_sample_rate);
  
          if(fabs(fdelta) > s->min_compensation) {