Fix decoding audio with planar formats.

[mlt] / src / modules / avformat / producer_avformat.c
diff --git a/src/modules/avformat/producer_avformat.c b/src/modules/avformat/producer_avformat.c

index 2419238cd04e4c00cf47549a3b7947ee7ea7450a..2e00f8699ea8cf125efffe1cecb5628850e64367 100644 (file)
--- a/src/modules/avformat/producer_avformat.c
+++ b/src/modules/avformat/producer_avformat.c
@@ -79,7 +79,7 @@ const char *avcodec_get_sample_fmt_name(int sample_fmt);
  #define POSITION_INITIAL (-2)
  #define POSITION_INVALID (-1)
  
-#define MAX_AUDIO_STREAMS (10)
+#define MAX_AUDIO_STREAMS (32)
  #define MAX_VDPAU_SURFACES (10)
  
  struct producer_avformat_s
@@ -117,6 +117,7 @@ struct producer_avformat_s
         double resample_factor;
         mlt_cache image_cache;
         int colorspace;
+       int full_luma;
         pthread_mutex_t video_mutex;
         pthread_mutex_t audio_mutex;
         mlt_deque apackets;
@@ -1219,7 +1220,7 @@ static void set_luma_transfer( struct SwsContext *context, int colorspace, int u
  #endif
  }
  
-static mlt_image_format pick_format( enum PixelFormat pix_fmt )
+static mlt_image_format pick_pix_format( enum PixelFormat pix_fmt )
  {
         switch ( pix_fmt )
         {
@@ -1245,11 +1246,35 @@ static mlt_image_format pick_format( enum PixelFormat pix_fmt )
         }
  }
  
-static void convert_image( AVFrame *frame, uint8_t *buffer, int pix_fmt,
-       mlt_image_format *format, int width, int height, int colorspace, uint8_t **alpha )
+static mlt_audio_format pick_audio_format( enum AVSampleFormat sample_fmt )
+{
+       switch ( sample_fmt )
+       {
+       // interleaved
+       case AV_SAMPLE_FMT_S16:
+               return mlt_audio_s16;
+       case AV_SAMPLE_FMT_S32:
+               return mlt_audio_s32le;
+       case AV_SAMPLE_FMT_FLT:
+               return mlt_audio_f32le;
+       // planar - this producer converts planar to interleaved
+#if LIBAVUTIL_VERSION_INT >= ((51<<16)+(17<<8)+0)
+       case AV_SAMPLE_FMT_S16P:
+               return mlt_audio_s16;
+       case AV_SAMPLE_FMT_S32P:
+               return mlt_audio_s32le;
+       case AV_SAMPLE_FMT_FLTP:
+               return mlt_audio_f32le;
+#endif
+       default:
+               return mlt_audio_none;
+       }
+}
+
+static void convert_image( producer_avformat self, AVFrame *frame, uint8_t *buffer, int pix_fmt,
+       mlt_image_format *format, int width, int height, uint8_t **alpha )
  {
  #ifdef SWSCALE
-       int full_range = -1;
         int flags = SWS_BICUBIC | SWS_ACCURATE_RND;
  
  #ifdef USE_MMX
@@ -1289,7 +1314,7 @@ static void convert_image( AVFrame *frame, uint8_t *buffer, int pix_fmt,
                 output.linesize[0] = width;
                 output.linesize[1] = width >> 1;
                 output.linesize[2] = width >> 1;
-               set_luma_transfer( context, colorspace, full_range );
+               set_luma_transfer( context, self->colorspace, -1 );
                 sws_scale( context, (const uint8_t* const*) frame->data, frame->linesize, 0, height,
                         output.data, output.linesize);
                 sws_freeContext( context );
@@ -1300,7 +1325,7 @@ static void convert_image( AVFrame *frame, uint8_t *buffer, int pix_fmt,
                         width, height, PIX_FMT_RGB24, flags | SWS_FULL_CHR_H_INT, NULL, NULL, NULL);
                 AVPicture output;
                 avpicture_fill( &output, buffer, PIX_FMT_RGB24, width, height );
-               set_luma_transfer( context, colorspace, full_range );
+               set_luma_transfer( context, self->colorspace, self->full_luma );
                 sws_scale( context, (const uint8_t* const*) frame->data, frame->linesize, 0, height,
                         output.data, output.linesize);
                 sws_freeContext( context );
@@ -1311,7 +1336,7 @@ static void convert_image( AVFrame *frame, uint8_t *buffer, int pix_fmt,
                         width, height, PIX_FMT_RGBA, flags | SWS_FULL_CHR_H_INT, NULL, NULL, NULL);
                 AVPicture output;
                 avpicture_fill( &output, buffer, PIX_FMT_RGBA, width, height );
-               set_luma_transfer( context, colorspace, full_range );
+               set_luma_transfer( context, self->colorspace, self->full_luma );
                 sws_scale( context, (const uint8_t* const*) frame->data, frame->linesize, 0, height,
                         output.data, output.linesize);
                 sws_freeContext( context );
@@ -1322,7 +1347,7 @@ static void convert_image( AVFrame *frame, uint8_t *buffer, int pix_fmt,
                         width, height, PIX_FMT_YUYV422, flags | SWS_FULL_CHR_H_INP, NULL, NULL, NULL);
                 AVPicture output;
                 avpicture_fill( &output, buffer, PIX_FMT_YUYV422, width, height );
-               set_luma_transfer( context, colorspace, full_range );
+               set_luma_transfer( context, self->colorspace, -1 );
                 sws_scale( context, (const uint8_t* const*) frame->data, frame->linesize, 0, height,
                         output.data, output.linesize);
                 sws_freeContext( context );
@@ -1502,7 +1527,7 @@ static int producer_get_image( mlt_frame frame, uint8_t **buffer, mlt_image_form
                         codec_context->pix_fmt == PIX_FMT_RGBA ||
                         codec_context->pix_fmt == PIX_FMT_ABGR ||
                         codec_context->pix_fmt == PIX_FMT_BGRA )
-               *format = pick_format( codec_context->pix_fmt );
+               *format = pick_pix_format( codec_context->pix_fmt );
  
         // Duplicate the last image if necessary
         if ( self->av_frame && self->av_frame->linesize[0]
@@ -1524,13 +1549,13 @@ static int producer_get_image( mlt_frame frame, uint8_t **buffer, mlt_image_form
                                 picture.linesize[0] = codec_context->width;
                                 picture.linesize[1] = codec_context->width / 2;
                                 picture.linesize[2] = codec_context->width / 2;
-                               convert_image( (AVFrame*) &picture, *buffer,
-                                       PIX_FMT_YUV420P, format, *width, *height, self->colorspace, &alpha );
+                               convert_image( self, (AVFrame*) &picture, *buffer,
+                                       PIX_FMT_YUV420P, format, *width, *height, &alpha );
                         }
                         else
  #endif
-                       convert_image( self->av_frame, *buffer, codec_context->pix_fmt,
-                               format, *width, *height, self->colorspace, &alpha );
+                       convert_image( self, self->av_frame, *buffer, codec_context->pix_fmt,
+                               format, *width, *height, &alpha );
                         got_picture = 1;
                 }
         }
@@ -1687,8 +1712,8 @@ static int producer_get_image( mlt_frame frame, uint8_t **buffer, mlt_image_form
                                                         VdpStatus status = vdp_surface_get_bits( render->surface, dest_format, planes, pitches );
                                                         if ( status == VDP_STATUS_OK )
                                                         {
-                                                               convert_image( self->av_frame, *buffer, PIX_FMT_YUV420P,
-                                                                       format, *width, *height, self->colorspace, &alpha );
+                                                               convert_image( self, self->av_frame, *buffer, PIX_FMT_YUV420P,
+                                                                       format, *width, *height, &alpha );
                                                         }
                                                         else
                                                         {
@@ -1704,8 +1729,8 @@ static int producer_get_image( mlt_frame frame, uint8_t **buffer, mlt_image_form
                                         }
                                         else
  #endif
-                                       convert_image( self->av_frame, *buffer, codec_context->pix_fmt,
-                                               format, *width, *height, self->colorspace, &alpha );
+                                       convert_image( self, self->av_frame, *buffer, codec_context->pix_fmt,
+                                               format, *width, *height, &alpha );
                                         self->top_field_first |= self->av_frame->top_field_first;
                                         self->current_position = int_position;
                                 }
@@ -1753,13 +1778,13 @@ static int producer_get_image( mlt_frame frame, uint8_t **buffer, mlt_image_form
                                 picture.linesize[0] = codec_context->width;
                                 picture.linesize[1] = codec_context->width / 2;
                                 picture.linesize[2] = codec_context->width / 2;
-                               convert_image( (AVFrame*) &picture, *buffer,
-                                       PIX_FMT_YUV420P, format, *width, *height, self->colorspace, &alpha );
+                               convert_image( self, (AVFrame*) &picture, *buffer,
+                                       PIX_FMT_YUV420P, format, *width, *height, &alpha );
                         }
                         else
  #endif
-                       convert_image( self->av_frame, *buffer, codec_context->pix_fmt,
-                               format, *width, *height, self->colorspace, &alpha );
+                       convert_image( self, self->av_frame, *buffer, codec_context->pix_fmt,
+                               format, *width, *height, &alpha );
                         got_picture = 1;
                 }
         }
@@ -1789,6 +1814,11 @@ exit_get_image:
         mlt_properties_set_int( properties, "meta.media.progressive", mlt_properties_get_int( frame_properties, "progressive" ) );
         mlt_service_unlock( MLT_PRODUCER_SERVICE( producer ) );
  
+       // If we already have RGB, then the full range processing either happened already
+       // or does not apply (RGB source).
+       if ( *format == mlt_image_rgb24 || *format == mlt_image_rgb24a || *format == mlt_image_opengl )
+               mlt_properties_set( frame_properties, "force_full_luma", NULL );
+
         return !got_picture;
  }
  
@@ -1978,6 +2008,15 @@ static int video_codec_init( producer_avformat self, int index, mlt_properties p
  #endif
                 // Let apps get chosen colorspace
                 mlt_properties_set_int( properties, "meta.media.colorspace", self->colorspace );
+
+               self->full_luma = -1;
+#if LIBAVCODEC_VERSION_INT >= ((52<<16)+(72<<8)+2)
+               mlt_log_debug( MLT_PRODUCER_SERVICE(self->parent), "color_range %d\n", codec_context->color_range );
+               if ( codec_context->color_range == AVCOL_RANGE_JPEG )
+                       self->full_luma = 1;
+#endif
+               if ( mlt_properties_get( properties, "set.force_full_luma" ) )
+                       self->full_luma = mlt_properties_get_int( properties, "set.force_full_luma" );
         }
         return self->video_codec && self->video_index > -1;
  }
@@ -2116,6 +2155,19 @@ static int sample_bytes( AVCodecContext *context )
  #endif
  }
  
+static void planar_to_interleaved( uint8_t *dest, uint8_t *src, int samples, int channels, int bytes_per_sample )
+{
+       int s, c;
+       for ( s = 0; s < samples; s++ )
+       {
+               for ( c = 0; c < channels; c++ )
+               {
+                       memcpy( dest, src + ( c * samples + s ) * bytes_per_sample, bytes_per_sample );
+                       dest += bytes_per_sample;
+               }
+       }
+}
+
  static int decode_audio( producer_avformat self, int *ignore, AVPacket pkt, int channels, int samples, double timecode, double fps )
  {
         // Fetch the audio_format
@@ -2183,8 +2235,21 @@ static int decode_audio( producer_avformat self, int *ignore, AVPacket pkt, int
                         }
                         else
                         {
-                               // Straight copy to audio buffer
-                               memcpy( &audio_buffer[ audio_used * codec_context->channels * sizeof_sample ], decode_buffer, data_size );
+                               uint8_t *source = decode_buffer;
+                               uint8_t *dest = &audio_buffer[ audio_used * codec_context->channels * sizeof_sample ];
+                               switch ( codec_context->sample_fmt )
+                               {
+#if LIBAVUTIL_VERSION_INT >= ((51<<16)+(17<<8)+0)
+                               case AV_SAMPLE_FMT_S16P:
+                               case AV_SAMPLE_FMT_S32P:
+                               case AV_SAMPLE_FMT_FLTP:
+                                       planar_to_interleaved( dest, source, convert_samples, codec_context->channels, sizeof_sample );
+                                       break;
+#endif
+                               default:
+                                       // Straight copy to audio buffer
+                                       memcpy( dest, decode_buffer, data_size );
+                               }
                                 audio_used += convert_samples;
                         }
  
@@ -2272,7 +2337,7 @@ static int producer_get_audio( mlt_frame frame, void **buffer, mlt_audio_format
         if ( self->audio_index == INT_MAX )
         {
                 index = 0;
-               index_max = context->nb_streams;
+               index_max = FFMIN( MAX_AUDIO_STREAMS, context->nb_streams );
                 *channels = self->total_channels;
                 *samples = *samples * FFMAX( self->max_frequency, *frequency ) / *frequency;
                 *frequency = FFMAX( self->max_frequency, *frequency );
@@ -2286,6 +2351,7 @@ static int producer_get_audio( mlt_frame frame, void **buffer, mlt_audio_format
  
                 if ( codec_context && !self->audio_buffer[ index ] )
                 {
+#if LIBAVCODEC_VERSION_INT < ((54<<16)+(26<<8)+0)
                         // Check for resample and create if necessary
                         if ( codec_context->channels <= 2 )
                         {
@@ -2308,6 +2374,7 @@ static int producer_get_audio( mlt_frame frame, void **buffer, mlt_audio_format
  #endif
                         }
                         else
+#endif
                         {
                                 codec_context->request_channels = self->audio_index == INT_MAX ? codec_context->channels : *channels;
                                 sizeof_sample = sample_bytes( codec_context );
@@ -2351,7 +2418,7 @@ static int producer_get_audio( mlt_frame frame, void **buffer, mlt_audio_format
                         {
                                 // Check if there is enough audio for all streams
                                 got_audio = 1;
-                               for ( index = 0; got_audio && index < context->nb_streams; index++ )
+                               for ( index = 0; got_audio && index < index_max; index++ )
                                         if ( ( self->audio_codec[ index ] && self->audio_used[ index ] < *samples ) || ignore[ index ] )
                                                 got_audio = 0;
                                 if ( got_audio )
@@ -2403,20 +2470,16 @@ static int producer_get_audio( mlt_frame frame, void **buffer, mlt_audio_format
                         index = self->audio_index;
                         *channels = self->audio_codec[ index ]->channels;
                         *frequency = self->audio_codec[ index ]->sample_rate;
-                       *format = self->audio_codec[ index ]->sample_fmt == AV_SAMPLE_FMT_S32 ? mlt_audio_s32le
-                               : self->audio_codec[ index ]->sample_fmt == AV_SAMPLE_FMT_FLT ? mlt_audio_f32le
-                               : mlt_audio_s16;
+                       *format = pick_audio_format( self->audio_codec[ index ]->sample_fmt );
                         sizeof_sample = sample_bytes( self->audio_codec[ index ] );
                 }
                 else if ( self->audio_index == INT_MAX )
                 {
-                       // This only works if all audio tracks have the same sample format.
                         for ( index = 0; index < index_max; index++ )
                                 if ( self->audio_codec[ index ] && !self->audio_resample[ index ] )
                                 {
-                                       *format = self->audio_codec[ index ]->sample_fmt == AV_SAMPLE_FMT_S32 ? mlt_audio_s32le
-                                               : self->audio_codec[ index ]->sample_fmt == AV_SAMPLE_FMT_FLT ? mlt_audio_f32le
-                                               : mlt_audio_s16;
+                                       // XXX: This only works if all audio tracks have the same sample format.
+                                       *format = pick_audio_format( self->audio_codec[ index ]->sample_fmt );
                                         sizeof_sample = sample_bytes( self->audio_codec[ index ] );
                                         break;
                                 }