Fix muxing vorbis into webm/mkv with libavcodec < 55.

[mlt] / src / modules / avformat / consumer_avformat.c
diff --git a/src/modules/avformat/consumer_avformat.c b/src/modules/avformat/consumer_avformat.c

index bb791365210301b0bc2cdb3c73f8ef8535cf8927..33b3a93f983e05597ae0830be5ae0503cfc41924 100644 (file)
--- a/src/modules/avformat/consumer_avformat.c
+++ b/src/modules/avformat/consumer_avformat.c
@@ -62,6 +62,8 @@
  #define AV_CODEC_ID_NONE      CODEC_ID_NONE
  #define AV_CODEC_ID_AC3       CODEC_ID_AC3
  #define AV_CODEC_ID_VORBIS    CODEC_ID_VORBIS
+#define AV_CODEC_ID_RAWVIDEO  CODEC_ID_RAWVIDEO
+#define AV_CODEC_ID_MJPEG     CODEC_ID_MJPEG
  #endif
  
  #define MAX_AUDIO_STREAMS (8)
@@ -236,7 +238,7 @@ static void property_changed( mlt_properties owner, mlt_consumer self, char *nam
                 mlt_properties_set_int( properties, "display_aspect_den", rational.den );
  
                 // Now compute the sample aspect ratio
-               rational = av_d2q( ar * height / width, 255 );
+               rational = av_d2q( ar * height / FFMAX(width, 1), 255 );
  
                 // Update the profile and properties as well since this is an alias
                 // for mlt properties that correspond to profile settings
@@ -441,6 +443,21 @@ static void apply_properties( void *obj, mlt_properties properties, int flags )
         }
  }
  
+static enum PixelFormat pick_pix_fmt( mlt_image_format img_fmt )
+{
+       switch ( img_fmt )
+       {
+       case mlt_image_rgb24:
+               return PIX_FMT_RGB24;
+       case mlt_image_rgb24a:
+               return PIX_FMT_RGBA;
+       case mlt_image_yuv420p:
+               return PIX_FMT_YUV420P;
+       default:
+               return PIX_FMT_YUYV422;
+       }
+}
+
  static int get_mlt_audio_format( int av_sample_fmt )
  {
         switch ( av_sample_fmt )
@@ -1062,13 +1079,14 @@ static inline long time_difference( struct timeval *time1 )
  static int mlt_write(void *h, uint8_t *buf, int size)
  {
         mlt_properties properties = (mlt_properties) h;
-       mlt_events_fire( properties, "avformat-write", buf, size, NULL );
+       mlt_events_fire( properties, "avformat-write", buf, &size, NULL );
         return 0;
  }
  
  static void write_transmitter( mlt_listener listener, mlt_properties owner, mlt_service service, void **args )
  {
-       listener( owner, service, (uint8_t*) args[0], (int) args[1] );
+       int *p_size = (int*) args[1];
+       listener( owner, service, (uint8_t*) args[0], *p_size );
  }
  
  /** The main thread - the argument is simply the consumer.
@@ -1126,14 +1144,33 @@ static void *consumer_thread( void *arg )
         mlt_deque queue = mlt_properties_get_data( properties, "frame_queue", NULL );
         sample_fifo fifo = mlt_properties_get_data( properties, "sample_fifo", NULL );
  
-       // Need two av pictures for converting
-       AVFrame *converted_avframe = NULL;
-       AVFrame *audio_avframe = NULL;
-       AVFrame *video_avframe = alloc_picture( PIX_FMT_YUYV422, width, height );
-
         // For receiving images from an mlt_frame
         uint8_t *image;
         mlt_image_format img_fmt = mlt_image_yuv422;
+       // Get the image format to use for rendering threads
+       const char* img_fmt_name = mlt_properties_get( properties, "mlt_image_format" );
+       if ( img_fmt_name )
+       {
+               if ( !strcmp( img_fmt_name, "rgb24" ) )
+                       img_fmt = mlt_image_rgb24;
+               else if ( !strcmp( img_fmt_name, "rgb24a" ) )
+                       img_fmt = mlt_image_rgb24a;
+               else if ( !strcmp( img_fmt_name, "yuv420p" ) )
+                       img_fmt = mlt_image_yuv420p;
+       }
+       else if ( mlt_properties_get( properties, "pix_fmt" ) )
+       {
+               img_fmt_name = mlt_properties_get( properties, "pix_fmt" );
+               if ( !strcmp( img_fmt_name, "rgba" ) ||
+                    !strcmp( img_fmt_name, "argb" ) ||
+                    !strcmp( img_fmt_name, "bgra" ) )
+                       img_fmt = mlt_image_rgb24a;
+       }
+
+       // Need two av pictures for converting
+       AVFrame *converted_avframe = NULL;
+       AVFrame *audio_avframe = NULL;
+       AVFrame *video_avframe = alloc_picture( pick_pix_fmt( img_fmt ), width, height );
  
         // For receiving audio samples back from the fifo
         uint8_t *audio_buf_1 = av_malloc( AUDIO_ENCODE_BUFFER_SIZE );
@@ -1141,7 +1178,7 @@ static void *consumer_thread( void *arg )
         int count = 0;
  
         // Allocate the context
-       AVFormatContext *oc = avformat_alloc_context( );
+       AVFormatContext *oc = NULL;
  
         // Streams
         AVStream *video_st = NULL;
@@ -1172,6 +1209,7 @@ static void *consumer_thread( void *arg )
         char key[27];
         mlt_properties frame_meta_properties = mlt_properties_new();
         int error_count = 0;
+       int64_t synth_audio_pts = 0;
  
         // Initialize audio_st
         int i = MAX_AUDIO_STREAMS;
@@ -1194,6 +1232,22 @@ static void *consumer_thread( void *arg )
         if ( filename == NULL || !strcmp( filename, "" ) )
                 filename = "pipe:";
  
+#if LIBAVUTIL_VERSION_INT >= ((53<<16)+(2<<8)+0)
+       avformat_alloc_output_context2( &oc, fmt, format, filename );
+#else
+       oc = avformat_alloc_context( );
+       oc->oformat = fmt;
+       snprintf( oc->filename, sizeof(oc->filename), "%s", filename );
+
+       if ( oc->oformat && oc->oformat->priv_class && !oc->priv_data && oc->oformat->priv_data_size ) {
+               oc->priv_data = av_mallocz( oc->oformat->priv_data_size );
+               if ( oc->priv_data ) {
+                       *(const AVClass**)oc->priv_data = oc->oformat->priv_class;
+                       av_opt_set_defaults( oc->priv_data );
+               }
+       }
+#endif
+
         // Get the codec ids selected
         audio_codec_id = fmt->audio_codec;
         video_codec_id = fmt->video_codec;
@@ -1213,7 +1267,7 @@ static void *consumer_thread( void *arg )
                                 acodec = mlt_properties_get( properties, "_acodec" );
                                 audio_codec = avcodec_find_encoder_by_name( acodec );
                         }
-                       else if ( !strcmp( acodec, "aac" ) )
+                       else if ( !strcmp( acodec, "aac" ) || !strcmp( acodec, "vorbis" ) )
                         {
                                 mlt_properties_set( properties, "astrict", "experimental" );
                         }
@@ -1272,9 +1326,6 @@ static void *consumer_thread( void *arg )
                 }
         }
  
-       oc->oformat = fmt;
-       snprintf( oc->filename, sizeof(oc->filename), "%s", filename );
-
         // Get a frame now, so we can set some AVOptions from properties.
         frame = mlt_consumer_rt_frame( consumer );
  
@@ -1442,7 +1493,7 @@ static void *consumer_thread( void *arg )
         if ( video_st )
                 converted_avframe = alloc_picture( video_st->codec->pix_fmt, width, height );
  
-#if LIBAVCODEC_VERSION_MAJOR >= 55
+#if LIBAVCODEC_VERSION_MAJOR >= 54
         // Allocate audio AVFrame
         if ( audio_st[0] )
         {
@@ -1575,8 +1626,13 @@ static void *consumer_thread( void *arg )
                                                         else if ( codec->sample_fmt == AV_SAMPLE_FMT_U8P )
                                                                 p = interleaved_to_planar( samples, channels, p, sizeof( uint8_t ) );
  #endif
-#if LIBAVCODEC_VERSION_MAJOR >= 55
+#if LIBAVCODEC_VERSION_MAJOR >= 54
                                                         audio_avframe->nb_samples = FFMAX( samples, audio_input_nb_samples );
+#if LIBAVCODEC_VERSION_MAJOR >= 55
+                                                       if ( audio_codec_id == AV_CODEC_ID_VORBIS )
+                                                               audio_avframe->pts = synth_audio_pts;
+                                                       synth_audio_pts += audio_avframe->nb_samples;
+#endif
                                                         avcodec_fill_audio_frame( audio_avframe, codec->channels, codec->sample_fmt,
                                                                 (const uint8_t*) p, AUDIO_ENCODE_BUFFER_SIZE, 0 );
                                                         int got_packet = 0;
@@ -1662,8 +1718,13 @@ static void *consumer_thread( void *arg )
                                                                         dest_offset += current_channels;
                                                                 }
                                                         }
-#if LIBAVCODEC_VERSION_MAJOR >= 55
+#if LIBAVCODEC_VERSION_MAJOR >= 54
                                                         audio_avframe->nb_samples = FFMAX( samples, audio_input_nb_samples );
+#if LIBAVCODEC_VERSION_MAJOR >= 55
+                                                       if ( audio_codec_id == AV_CODEC_ID_VORBIS )
+                                                               audio_avframe->pts = synth_audio_pts;
+                                                       synth_audio_pts += audio_avframe->nb_samples;
+#endif
                                                         avcodec_fill_audio_frame( audio_avframe, codec->channels, codec->sample_fmt,
                                                                 (const uint8_t*) audio_buf_2, AUDIO_ENCODE_BUFFER_SIZE, 0 );
                                                         int got_packet = 0;
@@ -1699,6 +1760,8 @@ static void *consumer_thread( void *arg )
                                                                 goto on_fatal_error;
                                                         }
                                                         error_count = 0;
+                                                       mlt_log_debug( MLT_CONSUMER_SERVICE( consumer ), "audio stream %d pkt pts %"PRId64" frame_size %d stream pts %"PRId64"\n",
+                                                               stream->index, pkt.pts, codec->frame_size, stream->pts.val );
                                                 }
                                                 else if ( pkt.size < 0 )
                                                 {
@@ -1707,7 +1770,6 @@ static void *consumer_thread( void *arg )
                                                                 goto on_fatal_error;
                                                 }
  
-                                               mlt_log_debug( MLT_CONSUMER_SERVICE( consumer ), " frame_size %d\n", codec->frame_size );
                                                 if ( i == 0 )
                                                 {
                                                         audio_pts = (double)stream->pts.val * av_q2d( stream->time_base );
@@ -1725,29 +1787,35 @@ static void *consumer_thread( void *arg )
                                 if ( mlt_deque_count( queue ) )
                                 {
                                         int ret = 0;
-                                       AVCodecContext *c;
+                                       AVCodecContext *c = video_st->codec;
  
                                         frame = mlt_deque_pop_front( queue );
                                         frame_properties = MLT_FRAME_PROPERTIES( frame );
  
-                                       c = video_st->codec;
-                                       
                                         if ( mlt_properties_get_int( frame_properties, "rendered" ) )
                                         {
                                                 int i = 0;
                                                 uint8_t *p;
                                                 uint8_t *q;
+                                               int stride = mlt_image_format_size( img_fmt, width, 0, NULL );
  
                                                 mlt_frame_get_image( frame, &image, &img_fmt, &img_width, &img_height, 0 );
-
                                                 q = image;
  
                                                 // Convert the mlt frame to an AVPicture
-                                               for ( i = 0; i < height; i ++ )
+                                               if ( img_fmt == mlt_image_yuv420p )
                                                 {
-                                                       p = video_avframe->data[ 0 ] + i * video_avframe->linesize[ 0 ];
-                                                       memcpy( p, q, width * 2 );
-                                                       q += width * 2;
+                                                       memcpy( video_avframe->data[0], q, video_avframe->linesize[0] );
+                                                       q += stride;
+                                                       memcpy( video_avframe->data[1], q, video_avframe->linesize[1] );
+                                                       q += stride / 4;
+                                                       memcpy( video_avframe->data[2], q, video_avframe->linesize[2] );
+                                               }
+                                               else for ( i = 0; i < height; i ++ )
+                                               {
+                                                       p = video_avframe->data[0] + i * video_avframe->linesize[0];
+                                                       memcpy( p, q, stride );
+                                                       q += stride;
                                                 }
  
                                                 // Do the colour space conversion
@@ -1758,8 +1826,8 @@ static void *consumer_thread( void *arg )
  #ifdef USE_SSE
                                                 flags |= SWS_CPU_CAPS_MMX2;
  #endif
-                                               struct SwsContext *context = sws_getContext( width, height, PIX_FMT_YUYV422,
-                                                       width, height, video_st->codec->pix_fmt, flags, NULL, NULL, NULL);
+                                               struct SwsContext *context = sws_getContext( width, height, pick_pix_fmt( img_fmt ),
+                                                       width, height, c->pix_fmt, flags, NULL, NULL, NULL);
                                                 sws_scale( context, (const uint8_t* const*) video_avframe->data, video_avframe->linesize, 0, height,
                                                         converted_avframe->data, converted_avframe->linesize);
                                                 sws_freeContext( context );
@@ -1767,7 +1835,11 @@ static void *consumer_thread( void *arg )
                                                 mlt_events_fire( properties, "consumer-frame-show", frame, NULL );
  
                                                 // Apply the alpha if applicable
-                                               if ( video_st->codec->pix_fmt == PIX_FMT_RGB32 )
+                                               if ( !mlt_properties_get( properties, "mlt_image_format" ) ||
+                                                    strcmp( mlt_properties_get( properties, "mlt_image_format" ), "rgb24a" ) )
+                                               if ( c->pix_fmt == PIX_FMT_RGBA ||
+                                                    c->pix_fmt == PIX_FMT_ARGB ||
+                                                    c->pix_fmt == PIX_FMT_BGRA )
                                                 {
                                                         uint8_t *alpha = mlt_frame_get_alpha_mask( frame );
                                                         register int n;
@@ -1821,16 +1893,29 @@ static void *consumer_thread( void *arg )
                                         {
                                                 AVPacket pkt;
                                                 av_init_packet( &pkt );
-                                               pkt.data = video_outbuf;
-                                               pkt.size = video_outbuf_size;
+                                               if ( c->codec->id == AV_CODEC_ID_RAWVIDEO ) {
+                                                       pkt.data = NULL;
+                                                       pkt.size = 0;
+                                               } else {
+                                                       pkt.data = video_outbuf;
+                                                       pkt.size = video_outbuf_size;
+                                               }
  
                                                 // Set the quality
                                                 converted_avframe->quality = c->global_quality;
+                                               converted_avframe->pts = frame_count;
  
                                                 // Set frame interlace hints
                                                 converted_avframe->interlaced_frame = !mlt_properties_get_int( frame_properties, "progressive" );
                                                 converted_avframe->top_field_first = mlt_properties_get_int( frame_properties, "top_field_first" );
-                                               converted_avframe->pts = frame_count;
+#if LIBAVCODEC_VERSION_INT >= ((53<<16)+(61<<8)+100)
+                                               if ( mlt_properties_get_int( frame_properties, "progressive" ) )
+                                                       c->field_order = AV_FIELD_PROGRESSIVE;
+                                               else if ( c->codec_id == AV_CODEC_ID_MJPEG )
+                                                       c->field_order = (mlt_properties_get_int( frame_properties, "top_field_first" )) ? AV_FIELD_TT : AV_FIELD_BB;
+                                               else
+                                                       c->field_order = (mlt_properties_get_int( frame_properties, "top_field_first" )) ? AV_FIELD_TB : AV_FIELD_BT;
+#endif
  
                                                 // Encode the image
  #if LIBAVCODEC_VERSION_MAJOR >= 55
@@ -1929,10 +2014,11 @@ static void *consumer_thread( void *arg )
                         pkt.data = audio_outbuf;
                         pkt.size = 0;
  
-                       if ( fifo &&
-                               ( channels * audio_input_nb_samples < sample_fifo_used( fifo ) / sample_bytes ) )
+                       if ( fifo && sample_fifo_used( fifo ) > 0 )
                         {
-                               sample_fifo_fetch( fifo, audio_buf_1, channels * audio_input_nb_samples * sample_bytes );
+                               // Drain the MLT FIFO
+                               int samples = FFMIN( FFMIN( channels * audio_input_nb_samples, sample_fifo_used( fifo ) / sample_bytes ), AUDIO_ENCODE_BUFFER_SIZE );
+                               sample_fifo_fetch( fifo, audio_buf_1, samples * sample_bytes );
                                 void* p = audio_buf_1;
  #if LIBAVUTIL_VERSION_INT >= ((51<<16)+(17<<8)+0)
                                 if ( c->sample_fmt == AV_SAMPLE_FMT_FLTP )
@@ -1944,9 +2030,14 @@ static void *consumer_thread( void *arg )
                                 else if ( c->sample_fmt == AV_SAMPLE_FMT_U8P )
                                         p = interleaved_to_planar( audio_input_nb_samples, channels, p, sizeof( uint8_t ) );
  #endif
-#if LIBAVCODEC_VERSION_MAJOR >= 55
+#if LIBAVCODEC_VERSION_MAJOR >= 54
                                 pkt.size = audio_outbuf_size;
-                               audio_avframe->nb_samples = audio_input_nb_samples;
+                               audio_avframe->nb_samples = FFMAX( samples / channels, audio_input_nb_samples );
+#if LIBAVCODEC_VERSION_MAJOR >= 55
+                               if ( audio_codec_id == AV_CODEC_ID_VORBIS )
+                                       audio_avframe->pts = synth_audio_pts;
+                               synth_audio_pts += audio_avframe->nb_samples;
+#endif
                                 avcodec_fill_audio_frame( audio_avframe, c->channels, c->sample_fmt,
                                         (const uint8_t*) p, AUDIO_ENCODE_BUFFER_SIZE, 0 );
                                 int got_packet = 0;
@@ -1956,32 +2047,37 @@ static void *consumer_thread( void *arg )
                                 else if ( !got_packet )
                                         pkt.size = 0;
  #else
-                               c->frame_size = audio_input_nb_samples;
+                               c->frame_size = FFMAX( samples / channels, audio_input_nb_samples );
                                 pkt.size = avcodec_encode_audio( c, audio_outbuf, audio_outbuf_size, p );
  #endif
  #if LIBAVUTIL_VERSION_INT >= ((51<<16)+(17<<8)+0)
                                 if ( p != audio_buf_1 )
                                         mlt_pool_release( p );
  #endif
+                               mlt_log_debug( MLT_CONSUMER_SERVICE( consumer ), "flushing audio size %d\n", pkt.size );
                         }
-                       if ( pkt.size <= 0 ) {
-#if LIBAVCODEC_VERSION_MAJOR >= 55
-                               pkt.size = audio_outbuf_size;
-                               int got_packet = 0;
-                               int ret = avcodec_encode_audio2( c, &pkt, NULL, &got_packet );
-                               if ( ret < 0 )
-                                       pkt.size = ret;
-                               else if ( !got_packet )
-                                       pkt.size = 0;
+                       else
+                       {
+                               // Drain the codec
+                               if ( pkt.size <= 0 ) {
+#if LIBAVCODEC_VERSION_MAJOR >= 54
+                                       pkt.size = audio_outbuf_size;
+                                       int got_packet = 0;
+                                       int ret = avcodec_encode_audio2( c, &pkt, NULL, &got_packet );
+                                       if ( ret < 0 )
+                                               pkt.size = ret;
+                                       else if ( !got_packet )
+                                               pkt.size = 0;
  #else
-                               pkt.size = avcodec_encode_audio( c, audio_outbuf, audio_outbuf_size, NULL );
-                               pkt.pts = c->coded_frame? c->coded_frame->pts : AV_NOPTS_VALUE;
-                               pkt.flags |= PKT_FLAG_KEY;
+                                       pkt.size = avcodec_encode_audio( c, audio_outbuf, audio_outbuf_size, NULL );
+                                       pkt.pts = c->coded_frame? c->coded_frame->pts : AV_NOPTS_VALUE;
+                                       pkt.flags |= PKT_FLAG_KEY;
  #endif
+                               }
+                               mlt_log_debug( MLT_CONSUMER_SERVICE( consumer ), "flushing audio size %d\n", pkt.size );
+                               if ( pkt.size <= 0 )
+                                       break;
                         }
-                       mlt_log_debug( MLT_CONSUMER_SERVICE( consumer ), "flushing audio size %d\n", pkt.size );
-                       if ( pkt.size <= 0 )
-                               break;
  
                         // Write the compressed frame in the media file
                         if ( pkt.pts != AV_NOPTS_VALUE )
@@ -1995,9 +2091,8 @@ static void *consumer_thread( void *arg )
                         pkt.stream_index = audio_st[0]->index;
                         if ( av_interleaved_write_frame( oc, &pkt ) != 0 )
                         {
-                               mlt_log_fatal( MLT_CONSUMER_SERVICE( consumer ), "error writing flushed audio frame\n" );
-                               mlt_events_fire( properties, "consumer-fatal-error", NULL );
-                               goto on_fatal_error;
+                               mlt_log_warning( MLT_CONSUMER_SERVICE( consumer ), "error writing flushed audio frame\n" );
+                               break;
                         }
                 }
  
@@ -2007,8 +2102,13 @@ static void *consumer_thread( void *arg )
                         AVCodecContext *c = video_st->codec;
                         AVPacket pkt;
                         av_init_packet( &pkt );
-                       pkt.data = video_outbuf;
-                       pkt.size = video_outbuf_size;
+                       if ( c->codec->id == AV_CODEC_ID_RAWVIDEO ) {
+                               pkt.data = NULL;
+                               pkt.size = 0;
+                       } else {
+                               pkt.data = video_outbuf;
+                               pkt.size = video_outbuf_size;
+                       }
  
                         // Encode the image
  #if LIBAVCODEC_VERSION_MAJOR >= 55
@@ -2119,6 +2219,18 @@ on_fatal_error:
                 free( full );
                 free( cwd );
                 remove( "x264_2pass.log.temp" );
+
+               // Recent versions of libavcodec/x264 support passlogfile and need cleanup if specified.
+               if ( !mlt_properties_get( properties, "_logfilename" ) &&
+                     mlt_properties_get( properties, "passlogfile" ) )
+               {
+                       file = mlt_properties_get( properties, "passlogfile" );
+                       remove( file );
+                       full = malloc( strlen( file ) + strlen( ".mbtree" ) + 1 );
+                       sprintf( full, "%s.mbtree", file );
+                       remove( full );
+                       free( full );
+               }
         }
  
         while ( ( frame = mlt_deque_pop_back( queue ) ) )