]> git.sesse.net Git - mlt/blobdiff - src/modules/avformat/consumer_avformat.c
Fix muxing vorbis into webm/mkv with libavcodec < 55.
[mlt] / src / modules / avformat / consumer_avformat.c
index bb791365210301b0bc2cdb3c73f8ef8535cf8927..33b3a93f983e05597ae0830be5ae0503cfc41924 100644 (file)
@@ -62,6 +62,8 @@
 #define AV_CODEC_ID_NONE      CODEC_ID_NONE
 #define AV_CODEC_ID_AC3       CODEC_ID_AC3
 #define AV_CODEC_ID_VORBIS    CODEC_ID_VORBIS
+#define AV_CODEC_ID_RAWVIDEO  CODEC_ID_RAWVIDEO
+#define AV_CODEC_ID_MJPEG     CODEC_ID_MJPEG
 #endif
 
 #define MAX_AUDIO_STREAMS (8)
@@ -236,7 +238,7 @@ static void property_changed( mlt_properties owner, mlt_consumer self, char *nam
                mlt_properties_set_int( properties, "display_aspect_den", rational.den );
 
                // Now compute the sample aspect ratio
-               rational = av_d2q( ar * height / width, 255 );
+               rational = av_d2q( ar * height / FFMAX(width, 1), 255 );
 
                // Update the profile and properties as well since this is an alias
                // for mlt properties that correspond to profile settings
@@ -441,6 +443,21 @@ static void apply_properties( void *obj, mlt_properties properties, int flags )
        }
 }
 
+static enum PixelFormat pick_pix_fmt( mlt_image_format img_fmt )
+{
+       switch ( img_fmt )
+       {
+       case mlt_image_rgb24:
+               return PIX_FMT_RGB24;
+       case mlt_image_rgb24a:
+               return PIX_FMT_RGBA;
+       case mlt_image_yuv420p:
+               return PIX_FMT_YUV420P;
+       default:
+               return PIX_FMT_YUYV422;
+       }
+}
+
 static int get_mlt_audio_format( int av_sample_fmt )
 {
        switch ( av_sample_fmt )
@@ -1062,13 +1079,14 @@ static inline long time_difference( struct timeval *time1 )
 static int mlt_write(void *h, uint8_t *buf, int size)
 {
        mlt_properties properties = (mlt_properties) h;
-       mlt_events_fire( properties, "avformat-write", buf, size, NULL );
+       mlt_events_fire( properties, "avformat-write", buf, &size, NULL );
        return 0;
 }
 
 static void write_transmitter( mlt_listener listener, mlt_properties owner, mlt_service service, void **args )
 {
-       listener( owner, service, (uint8_t*) args[0], (int) args[1] );
+       int *p_size = (int*) args[1];
+       listener( owner, service, (uint8_t*) args[0], *p_size );
 }
 
 /** The main thread - the argument is simply the consumer.
@@ -1126,14 +1144,33 @@ static void *consumer_thread( void *arg )
        mlt_deque queue = mlt_properties_get_data( properties, "frame_queue", NULL );
        sample_fifo fifo = mlt_properties_get_data( properties, "sample_fifo", NULL );
 
-       // Need two av pictures for converting
-       AVFrame *converted_avframe = NULL;
-       AVFrame *audio_avframe = NULL;
-       AVFrame *video_avframe = alloc_picture( PIX_FMT_YUYV422, width, height );
-
        // For receiving images from an mlt_frame
        uint8_t *image;
        mlt_image_format img_fmt = mlt_image_yuv422;
+       // Get the image format to use for rendering threads
+       const char* img_fmt_name = mlt_properties_get( properties, "mlt_image_format" );
+       if ( img_fmt_name )
+       {
+               if ( !strcmp( img_fmt_name, "rgb24" ) )
+                       img_fmt = mlt_image_rgb24;
+               else if ( !strcmp( img_fmt_name, "rgb24a" ) )
+                       img_fmt = mlt_image_rgb24a;
+               else if ( !strcmp( img_fmt_name, "yuv420p" ) )
+                       img_fmt = mlt_image_yuv420p;
+       }
+       else if ( mlt_properties_get( properties, "pix_fmt" ) )
+       {
+               img_fmt_name = mlt_properties_get( properties, "pix_fmt" );
+               if ( !strcmp( img_fmt_name, "rgba" ) ||
+                    !strcmp( img_fmt_name, "argb" ) ||
+                    !strcmp( img_fmt_name, "bgra" ) )
+                       img_fmt = mlt_image_rgb24a;
+       }
+
+       // Need two av pictures for converting
+       AVFrame *converted_avframe = NULL;
+       AVFrame *audio_avframe = NULL;
+       AVFrame *video_avframe = alloc_picture( pick_pix_fmt( img_fmt ), width, height );
 
        // For receiving audio samples back from the fifo
        uint8_t *audio_buf_1 = av_malloc( AUDIO_ENCODE_BUFFER_SIZE );
@@ -1141,7 +1178,7 @@ static void *consumer_thread( void *arg )
        int count = 0;
 
        // Allocate the context
-       AVFormatContext *oc = avformat_alloc_context( );
+       AVFormatContext *oc = NULL;
 
        // Streams
        AVStream *video_st = NULL;
@@ -1172,6 +1209,7 @@ static void *consumer_thread( void *arg )
        char key[27];
        mlt_properties frame_meta_properties = mlt_properties_new();
        int error_count = 0;
+       int64_t synth_audio_pts = 0;
 
        // Initialize audio_st
        int i = MAX_AUDIO_STREAMS;
@@ -1194,6 +1232,22 @@ static void *consumer_thread( void *arg )
        if ( filename == NULL || !strcmp( filename, "" ) )
                filename = "pipe:";
 
+#if LIBAVUTIL_VERSION_INT >= ((53<<16)+(2<<8)+0)
+       avformat_alloc_output_context2( &oc, fmt, format, filename );
+#else
+       oc = avformat_alloc_context( );
+       oc->oformat = fmt;
+       snprintf( oc->filename, sizeof(oc->filename), "%s", filename );
+
+       if ( oc->oformat && oc->oformat->priv_class && !oc->priv_data && oc->oformat->priv_data_size ) {
+               oc->priv_data = av_mallocz( oc->oformat->priv_data_size );
+               if ( oc->priv_data ) {
+                       *(const AVClass**)oc->priv_data = oc->oformat->priv_class;
+                       av_opt_set_defaults( oc->priv_data );
+               }
+       }
+#endif
+
        // Get the codec ids selected
        audio_codec_id = fmt->audio_codec;
        video_codec_id = fmt->video_codec;
@@ -1213,7 +1267,7 @@ static void *consumer_thread( void *arg )
                                acodec = mlt_properties_get( properties, "_acodec" );
                                audio_codec = avcodec_find_encoder_by_name( acodec );
                        }
-                       else if ( !strcmp( acodec, "aac" ) )
+                       else if ( !strcmp( acodec, "aac" ) || !strcmp( acodec, "vorbis" ) )
                        {
                                mlt_properties_set( properties, "astrict", "experimental" );
                        }
@@ -1272,9 +1326,6 @@ static void *consumer_thread( void *arg )
                }
        }
 
-       oc->oformat = fmt;
-       snprintf( oc->filename, sizeof(oc->filename), "%s", filename );
-
        // Get a frame now, so we can set some AVOptions from properties.
        frame = mlt_consumer_rt_frame( consumer );
 
@@ -1442,7 +1493,7 @@ static void *consumer_thread( void *arg )
        if ( video_st )
                converted_avframe = alloc_picture( video_st->codec->pix_fmt, width, height );
 
-#if LIBAVCODEC_VERSION_MAJOR >= 55
+#if LIBAVCODEC_VERSION_MAJOR >= 54
        // Allocate audio AVFrame
        if ( audio_st[0] )
        {
@@ -1575,8 +1626,13 @@ static void *consumer_thread( void *arg )
                                                        else if ( codec->sample_fmt == AV_SAMPLE_FMT_U8P )
                                                                p = interleaved_to_planar( samples, channels, p, sizeof( uint8_t ) );
 #endif
-#if LIBAVCODEC_VERSION_MAJOR >= 55
+#if LIBAVCODEC_VERSION_MAJOR >= 54
                                                        audio_avframe->nb_samples = FFMAX( samples, audio_input_nb_samples );
+#if LIBAVCODEC_VERSION_MAJOR >= 55
+                                                       if ( audio_codec_id == AV_CODEC_ID_VORBIS )
+                                                               audio_avframe->pts = synth_audio_pts;
+                                                       synth_audio_pts += audio_avframe->nb_samples;
+#endif
                                                        avcodec_fill_audio_frame( audio_avframe, codec->channels, codec->sample_fmt,
                                                                (const uint8_t*) p, AUDIO_ENCODE_BUFFER_SIZE, 0 );
                                                        int got_packet = 0;
@@ -1662,8 +1718,13 @@ static void *consumer_thread( void *arg )
                                                                        dest_offset += current_channels;
                                                                }
                                                        }
-#if LIBAVCODEC_VERSION_MAJOR >= 55
+#if LIBAVCODEC_VERSION_MAJOR >= 54
                                                        audio_avframe->nb_samples = FFMAX( samples, audio_input_nb_samples );
+#if LIBAVCODEC_VERSION_MAJOR >= 55
+                                                       if ( audio_codec_id == AV_CODEC_ID_VORBIS )
+                                                               audio_avframe->pts = synth_audio_pts;
+                                                       synth_audio_pts += audio_avframe->nb_samples;
+#endif
                                                        avcodec_fill_audio_frame( audio_avframe, codec->channels, codec->sample_fmt,
                                                                (const uint8_t*) audio_buf_2, AUDIO_ENCODE_BUFFER_SIZE, 0 );
                                                        int got_packet = 0;
@@ -1699,6 +1760,8 @@ static void *consumer_thread( void *arg )
                                                                goto on_fatal_error;
                                                        }
                                                        error_count = 0;
+                                                       mlt_log_debug( MLT_CONSUMER_SERVICE( consumer ), "audio stream %d pkt pts %"PRId64" frame_size %d stream pts %"PRId64"\n",
+                                                               stream->index, pkt.pts, codec->frame_size, stream->pts.val );
                                                }
                                                else if ( pkt.size < 0 )
                                                {
@@ -1707,7 +1770,6 @@ static void *consumer_thread( void *arg )
                                                                goto on_fatal_error;
                                                }
 
-                                               mlt_log_debug( MLT_CONSUMER_SERVICE( consumer ), " frame_size %d\n", codec->frame_size );
                                                if ( i == 0 )
                                                {
                                                        audio_pts = (double)stream->pts.val * av_q2d( stream->time_base );
@@ -1725,29 +1787,35 @@ static void *consumer_thread( void *arg )
                                if ( mlt_deque_count( queue ) )
                                {
                                        int ret = 0;
-                                       AVCodecContext *c;
+                                       AVCodecContext *c = video_st->codec;
 
                                        frame = mlt_deque_pop_front( queue );
                                        frame_properties = MLT_FRAME_PROPERTIES( frame );
 
-                                       c = video_st->codec;
-                                       
                                        if ( mlt_properties_get_int( frame_properties, "rendered" ) )
                                        {
                                                int i = 0;
                                                uint8_t *p;
                                                uint8_t *q;
+                                               int stride = mlt_image_format_size( img_fmt, width, 0, NULL );
 
                                                mlt_frame_get_image( frame, &image, &img_fmt, &img_width, &img_height, 0 );
-
                                                q = image;
 
                                                // Convert the mlt frame to an AVPicture
-                                               for ( i = 0; i < height; i ++ )
+                                               if ( img_fmt == mlt_image_yuv420p )
                                                {
-                                                       p = video_avframe->data[ 0 ] + i * video_avframe->linesize[ 0 ];
-                                                       memcpy( p, q, width * 2 );
-                                                       q += width * 2;
+                                                       memcpy( video_avframe->data[0], q, video_avframe->linesize[0] );
+                                                       q += stride;
+                                                       memcpy( video_avframe->data[1], q, video_avframe->linesize[1] );
+                                                       q += stride / 4;
+                                                       memcpy( video_avframe->data[2], q, video_avframe->linesize[2] );
+                                               }
+                                               else for ( i = 0; i < height; i ++ )
+                                               {
+                                                       p = video_avframe->data[0] + i * video_avframe->linesize[0];
+                                                       memcpy( p, q, stride );
+                                                       q += stride;
                                                }
 
                                                // Do the colour space conversion
@@ -1758,8 +1826,8 @@ static void *consumer_thread( void *arg )
 #ifdef USE_SSE
                                                flags |= SWS_CPU_CAPS_MMX2;
 #endif
-                                               struct SwsContext *context = sws_getContext( width, height, PIX_FMT_YUYV422,
-                                                       width, height, video_st->codec->pix_fmt, flags, NULL, NULL, NULL);
+                                               struct SwsContext *context = sws_getContext( width, height, pick_pix_fmt( img_fmt ),
+                                                       width, height, c->pix_fmt, flags, NULL, NULL, NULL);
                                                sws_scale( context, (const uint8_t* const*) video_avframe->data, video_avframe->linesize, 0, height,
                                                        converted_avframe->data, converted_avframe->linesize);
                                                sws_freeContext( context );
@@ -1767,7 +1835,11 @@ static void *consumer_thread( void *arg )
                                                mlt_events_fire( properties, "consumer-frame-show", frame, NULL );
 
                                                // Apply the alpha if applicable
-                                               if ( video_st->codec->pix_fmt == PIX_FMT_RGB32 )
+                                               if ( !mlt_properties_get( properties, "mlt_image_format" ) ||
+                                                    strcmp( mlt_properties_get( properties, "mlt_image_format" ), "rgb24a" ) )
+                                               if ( c->pix_fmt == PIX_FMT_RGBA ||
+                                                    c->pix_fmt == PIX_FMT_ARGB ||
+                                                    c->pix_fmt == PIX_FMT_BGRA )
                                                {
                                                        uint8_t *alpha = mlt_frame_get_alpha_mask( frame );
                                                        register int n;
@@ -1821,16 +1893,29 @@ static void *consumer_thread( void *arg )
                                        {
                                                AVPacket pkt;
                                                av_init_packet( &pkt );
-                                               pkt.data = video_outbuf;
-                                               pkt.size = video_outbuf_size;
+                                               if ( c->codec->id == AV_CODEC_ID_RAWVIDEO ) {
+                                                       pkt.data = NULL;
+                                                       pkt.size = 0;
+                                               } else {
+                                                       pkt.data = video_outbuf;
+                                                       pkt.size = video_outbuf_size;
+                                               }
 
                                                // Set the quality
                                                converted_avframe->quality = c->global_quality;
+                                               converted_avframe->pts = frame_count;
 
                                                // Set frame interlace hints
                                                converted_avframe->interlaced_frame = !mlt_properties_get_int( frame_properties, "progressive" );
                                                converted_avframe->top_field_first = mlt_properties_get_int( frame_properties, "top_field_first" );
-                                               converted_avframe->pts = frame_count;
+#if LIBAVCODEC_VERSION_INT >= ((53<<16)+(61<<8)+100)
+                                               if ( mlt_properties_get_int( frame_properties, "progressive" ) )
+                                                       c->field_order = AV_FIELD_PROGRESSIVE;
+                                               else if ( c->codec_id == AV_CODEC_ID_MJPEG )
+                                                       c->field_order = (mlt_properties_get_int( frame_properties, "top_field_first" )) ? AV_FIELD_TT : AV_FIELD_BB;
+                                               else
+                                                       c->field_order = (mlt_properties_get_int( frame_properties, "top_field_first" )) ? AV_FIELD_TB : AV_FIELD_BT;
+#endif
 
                                                // Encode the image
 #if LIBAVCODEC_VERSION_MAJOR >= 55
@@ -1929,10 +2014,11 @@ static void *consumer_thread( void *arg )
                        pkt.data = audio_outbuf;
                        pkt.size = 0;
 
-                       if ( fifo &&
-                               ( channels * audio_input_nb_samples < sample_fifo_used( fifo ) / sample_bytes ) )
+                       if ( fifo && sample_fifo_used( fifo ) > 0 )
                        {
-                               sample_fifo_fetch( fifo, audio_buf_1, channels * audio_input_nb_samples * sample_bytes );
+                               // Drain the MLT FIFO
+                               int samples = FFMIN( FFMIN( channels * audio_input_nb_samples, sample_fifo_used( fifo ) / sample_bytes ), AUDIO_ENCODE_BUFFER_SIZE );
+                               sample_fifo_fetch( fifo, audio_buf_1, samples * sample_bytes );
                                void* p = audio_buf_1;
 #if LIBAVUTIL_VERSION_INT >= ((51<<16)+(17<<8)+0)
                                if ( c->sample_fmt == AV_SAMPLE_FMT_FLTP )
@@ -1944,9 +2030,14 @@ static void *consumer_thread( void *arg )
                                else if ( c->sample_fmt == AV_SAMPLE_FMT_U8P )
                                        p = interleaved_to_planar( audio_input_nb_samples, channels, p, sizeof( uint8_t ) );
 #endif
-#if LIBAVCODEC_VERSION_MAJOR >= 55
+#if LIBAVCODEC_VERSION_MAJOR >= 54
                                pkt.size = audio_outbuf_size;
-                               audio_avframe->nb_samples = audio_input_nb_samples;
+                               audio_avframe->nb_samples = FFMAX( samples / channels, audio_input_nb_samples );
+#if LIBAVCODEC_VERSION_MAJOR >= 55
+                               if ( audio_codec_id == AV_CODEC_ID_VORBIS )
+                                       audio_avframe->pts = synth_audio_pts;
+                               synth_audio_pts += audio_avframe->nb_samples;
+#endif
                                avcodec_fill_audio_frame( audio_avframe, c->channels, c->sample_fmt,
                                        (const uint8_t*) p, AUDIO_ENCODE_BUFFER_SIZE, 0 );
                                int got_packet = 0;
@@ -1956,32 +2047,37 @@ static void *consumer_thread( void *arg )
                                else if ( !got_packet )
                                        pkt.size = 0;
 #else
-                               c->frame_size = audio_input_nb_samples;
+                               c->frame_size = FFMAX( samples / channels, audio_input_nb_samples );
                                pkt.size = avcodec_encode_audio( c, audio_outbuf, audio_outbuf_size, p );
 #endif
 #if LIBAVUTIL_VERSION_INT >= ((51<<16)+(17<<8)+0)
                                if ( p != audio_buf_1 )
                                        mlt_pool_release( p );
 #endif
+                               mlt_log_debug( MLT_CONSUMER_SERVICE( consumer ), "flushing audio size %d\n", pkt.size );
                        }
-                       if ( pkt.size <= 0 ) {
-#if LIBAVCODEC_VERSION_MAJOR >= 55
-                               pkt.size = audio_outbuf_size;
-                               int got_packet = 0;
-                               int ret = avcodec_encode_audio2( c, &pkt, NULL, &got_packet );
-                               if ( ret < 0 )
-                                       pkt.size = ret;
-                               else if ( !got_packet )
-                                       pkt.size = 0;
+                       else
+                       {
+                               // Drain the codec
+                               if ( pkt.size <= 0 ) {
+#if LIBAVCODEC_VERSION_MAJOR >= 54
+                                       pkt.size = audio_outbuf_size;
+                                       int got_packet = 0;
+                                       int ret = avcodec_encode_audio2( c, &pkt, NULL, &got_packet );
+                                       if ( ret < 0 )
+                                               pkt.size = ret;
+                                       else if ( !got_packet )
+                                               pkt.size = 0;
 #else
-                               pkt.size = avcodec_encode_audio( c, audio_outbuf, audio_outbuf_size, NULL );
-                               pkt.pts = c->coded_frame? c->coded_frame->pts : AV_NOPTS_VALUE;
-                               pkt.flags |= PKT_FLAG_KEY;
+                                       pkt.size = avcodec_encode_audio( c, audio_outbuf, audio_outbuf_size, NULL );
+                                       pkt.pts = c->coded_frame? c->coded_frame->pts : AV_NOPTS_VALUE;
+                                       pkt.flags |= PKT_FLAG_KEY;
 #endif
+                               }
+                               mlt_log_debug( MLT_CONSUMER_SERVICE( consumer ), "flushing audio size %d\n", pkt.size );
+                               if ( pkt.size <= 0 )
+                                       break;
                        }
-                       mlt_log_debug( MLT_CONSUMER_SERVICE( consumer ), "flushing audio size %d\n", pkt.size );
-                       if ( pkt.size <= 0 )
-                               break;
 
                        // Write the compressed frame in the media file
                        if ( pkt.pts != AV_NOPTS_VALUE )
@@ -1995,9 +2091,8 @@ static void *consumer_thread( void *arg )
                        pkt.stream_index = audio_st[0]->index;
                        if ( av_interleaved_write_frame( oc, &pkt ) != 0 )
                        {
-                               mlt_log_fatal( MLT_CONSUMER_SERVICE( consumer ), "error writing flushed audio frame\n" );
-                               mlt_events_fire( properties, "consumer-fatal-error", NULL );
-                               goto on_fatal_error;
+                               mlt_log_warning( MLT_CONSUMER_SERVICE( consumer ), "error writing flushed audio frame\n" );
+                               break;
                        }
                }
 
@@ -2007,8 +2102,13 @@ static void *consumer_thread( void *arg )
                        AVCodecContext *c = video_st->codec;
                        AVPacket pkt;
                        av_init_packet( &pkt );
-                       pkt.data = video_outbuf;
-                       pkt.size = video_outbuf_size;
+                       if ( c->codec->id == AV_CODEC_ID_RAWVIDEO ) {
+                               pkt.data = NULL;
+                               pkt.size = 0;
+                       } else {
+                               pkt.data = video_outbuf;
+                               pkt.size = video_outbuf_size;
+                       }
 
                        // Encode the image
 #if LIBAVCODEC_VERSION_MAJOR >= 55
@@ -2119,6 +2219,18 @@ on_fatal_error:
                free( full );
                free( cwd );
                remove( "x264_2pass.log.temp" );
+
+               // Recent versions of libavcodec/x264 support passlogfile and need cleanup if specified.
+               if ( !mlt_properties_get( properties, "_logfilename" ) &&
+                     mlt_properties_get( properties, "passlogfile" ) )
+               {
+                       file = mlt_properties_get( properties, "passlogfile" );
+                       remove( file );
+                       full = malloc( strlen( file ) + strlen( ".mbtree" ) + 1 );
+                       sprintf( full, "%s.mbtree", file );
+                       remove( full );
+                       free( full );
+               }
        }
 
        while ( ( frame = mlt_deque_pop_back( queue ) ) )