]> git.sesse.net Git - ffmpeg/blobdiff - avconv.c
Dxtory capture format decoder
[ffmpeg] / avconv.c
index 409f2bad3d2468a4de3db073d3b125c23b56b2eb..3b1500b01d57e900e6a33afe526fb0adebb636c3 100644 (file)
--- a/avconv.c
+++ b/avconv.c
@@ -113,7 +113,7 @@ static int video_sync_method= -1;
 static int audio_sync_method= 0;
 static float audio_drift_threshold= 0.1;
 static int copy_ts= 0;
-static int copy_tb;
+static int copy_tb = 1;
 static int opt_shortest = 0;
 static char *vstats_filename;
 static FILE *vstats_file;
@@ -137,8 +137,6 @@ static uint8_t *audio_buf;
 static uint8_t *audio_out;
 static unsigned int allocated_audio_out_size, allocated_audio_buf_size;
 
-static void *samples;
-
 #define DEFAULT_PASS_LOGFILENAME_PREFIX "av2pass"
 
 typedef struct InputStream {
@@ -147,6 +145,8 @@ typedef struct InputStream {
     int discard;             /* true if stream data should be discarded */
     int decoding_needed;     /* true if the packets must be decoded in 'raw_fifo' */
     AVCodec *dec;
+    AVFrame *decoded_frame;
+    AVFrame *filtered_frame;
 
     int64_t       start;     /* time when read started */
     int64_t       next_pts;  /* synthetic pts for cases where pkt.pts
@@ -525,8 +525,11 @@ void exit_program(int ret)
     for(i=0;i<nb_input_files;i++) {
         av_close_input_file(input_files[i].ctx);
     }
-    for (i = 0; i < nb_input_streams; i++)
+    for (i = 0; i < nb_input_streams; i++) {
+        av_freep(&input_streams[i].decoded_frame);
+        av_freep(&input_streams[i].filtered_frame);
         av_dict_free(&input_streams[i].opts);
+    }
 
     if (vstats_file)
         fclose(vstats_file);
@@ -541,7 +544,6 @@ void exit_program(int ret)
     av_free(audio_buf);
     av_free(audio_out);
     allocated_audio_buf_size= allocated_audio_out_size= 0;
-    av_free(samples);
 
 #if CONFIG_AVFILTER
     avfilter_uninit();
@@ -554,7 +556,7 @@ void exit_program(int ret)
         exit (255);
     }
 
-    exit(ret); /* not all OS-es handle main() return value */
+    exit(ret);
 }
 
 static void assert_avoptions(AVDictionary *m)
@@ -673,11 +675,11 @@ static void choose_pixel_fmt(AVStream *st, AVCodec *codec)
                 p= (const enum PixelFormat[]){PIX_FMT_YUVJ420P, PIX_FMT_YUVJ422P, PIX_FMT_YUVJ444P, PIX_FMT_YUV420P, PIX_FMT_YUV422P, PIX_FMT_YUV444P, PIX_FMT_BGRA, PIX_FMT_NONE};
             }
         }
-        for(; *p!=-1; p++){
+        for (; *p != PIX_FMT_NONE; p++) {
             if(*p == st->codec->pix_fmt)
                 break;
         }
-        if (*p == -1) {
+        if (*p == PIX_FMT_NONE) {
             if(st->codec->pix_fmt != PIX_FMT_NONE)
                 av_log(NULL, AV_LOG_WARNING,
                         "Incompatible pixel format '%s' for codec '%s', auto-selecting format '%s'\n",
@@ -729,14 +731,19 @@ static void write_frame(AVFormatContext *s, AVPacket *pkt, AVCodecContext *avctx
     }
 }
 
-static void do_audio_out(AVFormatContext *s,
-                         OutputStream *ost,
-                         InputStream *ist,
-                         unsigned char *buf, int size)
+static void generate_silence(uint8_t* buf, enum AVSampleFormat sample_fmt, size_t size)
+{
+    int fill_char = 0x00;
+    if (sample_fmt == AV_SAMPLE_FMT_U8)
+        fill_char = 0x80;
+    memset(buf, fill_char, size);
+}
+
+static void do_audio_out(AVFormatContext *s, OutputStream *ost,
+                         InputStream *ist, AVFrame *decoded_frame)
 {
     uint8_t *buftmp;
     int64_t audio_out_size, audio_buf_size;
-    int64_t allocated_for_size= size;
 
     int size_out, frame_bytes, ret, resample_changed;
     AVCodecContext *enc= ost->st->codec;
@@ -744,6 +751,9 @@ static void do_audio_out(AVFormatContext *s,
     int osize = av_get_bytes_per_sample(enc->sample_fmt);
     int isize = av_get_bytes_per_sample(dec->sample_fmt);
     const int coded_bps = av_get_bits_per_sample(enc->codec->id);
+    uint8_t *buf = decoded_frame->data[0];
+    int size     = decoded_frame->nb_samples * dec->channels * isize;
+    int64_t allocated_for_size = size;
 
 need_realloc:
     audio_buf_size= (allocated_for_size + isize*dec->channels - 1) / (isize*dec->channels);
@@ -829,9 +839,9 @@ need_realloc:
 
     if(audio_sync_method){
         double delta = get_sync_ipts(ost) * enc->sample_rate - ost->sync_opts
-                - av_fifo_size(ost->fifo)/(enc->channels * 2);
-        double idelta= delta*dec->sample_rate / enc->sample_rate;
-        int byte_delta= ((int)idelta)*2*dec->channels;
+                - av_fifo_size(ost->fifo)/(enc->channels * osize);
+        int idelta = delta * dec->sample_rate / enc->sample_rate;
+        int byte_delta = idelta * isize * dec->channels;
 
         //FIXME resample delay
         if(fabs(delta) > 50){
@@ -840,7 +850,8 @@ need_realloc:
                     byte_delta= FFMAX(byte_delta, -size);
                     size += byte_delta;
                     buf  -= byte_delta;
-                    av_log(NULL, AV_LOG_VERBOSE, "discarding %d audio samples\n", (int)-delta);
+                    av_log(NULL, AV_LOG_VERBOSE, "discarding %d audio samples\n",
+                           -byte_delta / (isize * dec->channels));
                     if(!size)
                         return;
                     ist->is_start=0;
@@ -854,11 +865,11 @@ need_realloc:
                     }
                     ist->is_start=0;
 
-                    memset(input_tmp, 0, byte_delta);
+                    generate_silence(input_tmp, dec->sample_fmt, byte_delta);
                     memcpy(input_tmp + byte_delta, buf, size);
                     buf= input_tmp;
                     size += byte_delta;
-                    av_log(NULL, AV_LOG_VERBOSE, "adding %d audio samples of silence\n", (int)delta);
+                    av_log(NULL, AV_LOG_VERBOSE, "adding %d audio samples of silence\n", idelta);
                 }
             }else if(audio_sync_method>1){
                 int comp= av_clip(delta, -audio_sync_method, audio_sync_method);
@@ -871,7 +882,7 @@ need_realloc:
         }
     }else
         ost->sync_opts= lrintf(get_sync_ipts(ost) * enc->sample_rate)
-                        - av_fifo_size(ost->fifo)/(enc->channels * 2); //FIXME wrong
+                        - av_fifo_size(ost->fifo)/(enc->channels * osize); //FIXME wrong
 
     if (ost->audio_resample) {
         buftmp = audio_buf;
@@ -1160,7 +1171,8 @@ static void do_video_out(AVFormatContext *s,
 
     format_video_sync = video_sync_method;
     if (format_video_sync < 0)
-        format_video_sync = (s->oformat->flags & AVFMT_VARIABLE_FPS) ? 2 : 1;
+        format_video_sync = (s->oformat->flags & AVFMT_NOTIMESTAMPS) ? 0 :
+                            (s->oformat->flags & AVFMT_VARIABLE_FPS) ? 2 : 1;
 
     if (format_video_sync) {
         double vdelta = sync_ipts - ost->sync_opts;
@@ -1197,7 +1209,8 @@ static void do_video_out(AVFormatContext *s,
         av_init_packet(&pkt);
         pkt.stream_index= ost->index;
 
-        if (s->oformat->flags & AVFMT_RAWPICTURE) {
+        if (s->oformat->flags & AVFMT_RAWPICTURE &&
+            enc->codec->id == CODEC_ID_RAWVIDEO) {
             /* raw pictures are written as AVPicture structure to
                avoid any copies. We support temporarily the older
                method. */
@@ -1435,14 +1448,6 @@ static void print_report(OutputFile *output_files,
     }
 }
 
-static void generate_silence(uint8_t* buf, enum AVSampleFormat sample_fmt, size_t size)
-{
-    int fill_char = 0x00;
-    if (sample_fmt == AV_SAMPLE_FMT_U8)
-        fill_char = 0x80;
-    memset(buf, fill_char, size);
-}
-
 static void flush_encoders(OutputStream *ost_table, int nb_ostreams)
 {
     int i, ret;
@@ -1457,7 +1462,7 @@ static void flush_encoders(OutputStream *ost_table, int nb_ostreams)
 
         if (ost->st->codec->codec_type == AVMEDIA_TYPE_AUDIO && enc->frame_size <=1)
             continue;
-        if (ost->st->codec->codec_type == AVMEDIA_TYPE_VIDEO && (os->oformat->flags & AVFMT_RAWPICTURE))
+        if (ost->st->codec->codec_type == AVMEDIA_TYPE_VIDEO && (os->oformat->flags & AVFMT_RAWPICTURE) && enc->codec->id == CODEC_ID_RAWVIDEO)
             continue;
 
         for(;;) {
@@ -1617,41 +1622,42 @@ static void rate_emu_sleep(InputStream *ist)
 
 static int transcode_audio(InputStream *ist, AVPacket *pkt, int *got_output)
 {
-    static unsigned int samples_size = 0;
+    AVFrame *decoded_frame;
+    AVCodecContext *avctx = ist->st->codec;
     int bps = av_get_bytes_per_sample(ist->st->codec->sample_fmt);
-    uint8_t *decoded_data_buf  = NULL;
-    int      decoded_data_size = 0;
     int i, ret;
 
-    if (pkt && samples_size < FFMAX(pkt->size * bps, AVCODEC_MAX_AUDIO_FRAME_SIZE)) {
-        av_free(samples);
-        samples_size = FFMAX(pkt->size * bps, AVCODEC_MAX_AUDIO_FRAME_SIZE);
-        samples      = av_malloc(samples_size);
-    }
-    decoded_data_size = samples_size;
+    if (!ist->decoded_frame && !(ist->decoded_frame = avcodec_alloc_frame()))
+        return AVERROR(ENOMEM);
+    else
+        avcodec_get_frame_defaults(ist->decoded_frame);
+    decoded_frame = ist->decoded_frame;
 
-    ret = avcodec_decode_audio3(ist->st->codec, samples, &decoded_data_size,
-                                pkt);
-    if (ret < 0)
+    ret = avcodec_decode_audio4(avctx, decoded_frame, got_output, pkt);
+    if (ret < 0) {
         return ret;
-    pkt->data   += ret;
-    pkt->size   -= ret;
-    *got_output  = decoded_data_size > 0;
+    }
 
-    /* Some bug in mpeg audio decoder gives */
-    /* decoded_data_size < 0, it seems they are overflows */
     if (!*got_output) {
         /* no audio frame */
-        return 0;
+        return ret;
     }
 
-    decoded_data_buf = (uint8_t *)samples;
-    ist->next_pts   += ((int64_t)AV_TIME_BASE/bps * decoded_data_size) /
-                       (ist->st->codec->sample_rate * ist->st->codec->channels);
+    /* if the decoder provides a pts, use it instead of the last packet pts.
+       the decoder could be delaying output by a packet or more. */
+    if (decoded_frame->pts != AV_NOPTS_VALUE)
+        ist->next_pts = decoded_frame->pts;
+
+    /* increment next_pts to use for the case where the input stream does not
+       have timestamps or there are multiple frames in the packet */
+    ist->next_pts += ((int64_t)AV_TIME_BASE * decoded_frame->nb_samples) /
+                     avctx->sample_rate;
 
     // preprocess audio (volume)
     if (audio_volume != 256) {
-        switch (ist->st->codec->sample_fmt) {
+        int decoded_data_size = decoded_frame->nb_samples * avctx->channels * bps;
+        void *samples = decoded_frame->data[0];
+        switch (avctx->sample_fmt) {
         case AV_SAMPLE_FMT_U8:
         {
             uint8_t *volp = samples;
@@ -1712,10 +1718,10 @@ static int transcode_audio(InputStream *ist, AVPacket *pkt, int *got_output)
 
         if (!check_output_constraints(ist, ost) || !ost->encoding_needed)
             continue;
-        do_audio_out(output_files[ost->file_index].ctx, ost, ist,
-                     decoded_data_buf, decoded_data_size);
+        do_audio_out(output_files[ost->file_index].ctx, ost, ist, decoded_frame);
     }
-    return 0;
+
+    return ret;
 }
 
 static int transcode_video(InputStream *ist, AVPacket *pkt, int *got_output, int64_t *pkt_pts)
@@ -1728,8 +1734,11 @@ static int transcode_video(InputStream *ist, AVPacket *pkt, int *got_output, int
     int frame_available = 1;
 #endif
 
-    if (!(decoded_frame = avcodec_alloc_frame()))
+    if (!ist->decoded_frame && !(ist->decoded_frame = avcodec_alloc_frame()))
         return AVERROR(ENOMEM);
+    else
+        avcodec_get_frame_defaults(ist->decoded_frame);
+    decoded_frame = ist->decoded_frame;
     pkt->pts  = *pkt_pts;
     pkt->dts  = ist->pts;
     *pkt_pts  = AV_NOPTS_VALUE;
@@ -1737,17 +1746,18 @@ static int transcode_video(InputStream *ist, AVPacket *pkt, int *got_output, int
     ret = avcodec_decode_video2(ist->st->codec,
                                 decoded_frame, got_output, pkt);
     if (ret < 0)
-        goto fail;
+        return ret;
 
     quality = same_quant ? decoded_frame->quality : 0;
     if (!*got_output) {
         /* no picture yet */
-        av_freep(&decoded_frame);
-        return 0;
+        return ret;
     }
     ist->next_pts = ist->pts = guess_correct_pts(&ist->pts_ctx, decoded_frame->pkt_pts,
                                                  decoded_frame->pkt_dts);
-    if (ist->st->codec->time_base.num != 0) {
+    if (pkt->duration)
+        ist->next_pts += av_rescale_q(pkt->duration, ist->st->time_base, AV_TIME_BASE_Q);
+    else if (ist->st->codec->time_base.num != 0) {
         int ticks      = ist->st->parser ? ist->st->parser->repeat_pict + 1 :
                                            ist->st->codec->ticks_per_frame;
         ist->next_pts += ((int64_t)AV_TIME_BASE *
@@ -1774,10 +1784,12 @@ static int transcode_video(InputStream *ist, AVPacket *pkt, int *got_output, int
             else
                 sar = ist->st->codec->sample_aspect_ratio;
             av_vsrc_buffer_add_frame(ost->input_video_filter, decoded_frame, ist->pts, sar);
-            if (!(filtered_frame = avcodec_alloc_frame())) {
-                ret = AVERROR(ENOMEM);
-                goto fail;
-            }
+            if (!ist->filtered_frame && !(ist->filtered_frame = avcodec_alloc_frame())) {
+                av_free(buffer_to_free);
+                return AVERROR(ENOMEM);
+            } else
+                avcodec_get_frame_defaults(ist->filtered_frame);
+            filtered_frame = ist->filtered_frame;
             frame_available = avfilter_poll_frame(ost->output_video_filter->inputs[0]);
         }
         while (frame_available) {
@@ -1801,32 +1813,50 @@ static int transcode_video(InputStream *ist, AVPacket *pkt, int *got_output, int
             if (ost->picref)
                 avfilter_unref_buffer(ost->picref);
         }
-        av_freep(&filtered_frame);
 #endif
     }
 
-fail:
     av_free(buffer_to_free);
-    av_freep(&decoded_frame);
+    return ret;
+}
+
+static int transcode_subtitles(InputStream *ist, AVPacket *pkt, int *got_output)
+{
+    AVSubtitle subtitle;
+    int i, ret = avcodec_decode_subtitle2(ist->st->codec,
+                                          &subtitle, got_output, pkt);
+    if (ret < 0)
+        return ret;
+    if (!*got_output)
+        return ret;
+
+    rate_emu_sleep(ist);
+
+    for (i = 0; i < nb_output_streams; i++) {
+        OutputStream *ost = &output_streams[i];
+
+        if (!check_output_constraints(ist, ost) || !ost->encoding_needed)
+            continue;
+
+        do_subtitle_out(output_files[ost->file_index].ctx, ost, ist, &subtitle, pkt->pts);
+    }
+
+    avsubtitle_free(&subtitle);
     return ret;
 }
 
 /* pkt = NULL means EOF (needed to flush decoder buffers) */
-static int output_packet(InputStream *ist, int ist_index,
+static int output_packet(InputStream *ist,
                          OutputStream *ost_table, int nb_ostreams,
                          const AVPacket *pkt)
 {
-    AVFormatContext *os;
-    OutputStream *ost;
-    int ret = 0, i;
+    int i;
     int got_output;
-    AVSubtitle subtitle, *subtitle_to_free;
     int64_t pkt_pts = AV_NOPTS_VALUE;
-
     AVPacket avpkt;
 
-    if(ist->next_pts == AV_NOPTS_VALUE)
-        ist->next_pts= ist->pts;
+    if (ist->next_pts == AV_NOPTS_VALUE)
+        ist->next_pts = ist->pts;
 
     if (pkt == NULL) {
         /* EOF handling */
@@ -1845,88 +1875,47 @@ static int output_packet(InputStream *ist, int ist_index,
 
     //while we have more to decode or while the decoder did output something on EOF
     while (ist->decoding_needed && (avpkt.size > 0 || (!pkt && got_output))) {
+        int ret = 0;
     handle_eof:
-        ist->pts= ist->next_pts;
 
-        if(avpkt.size && avpkt.size != pkt->size)
+        ist->pts = ist->next_pts;
+
+        if (avpkt.size && avpkt.size != pkt->size) {
             av_log(NULL, ist->showed_multi_packet_warning ? AV_LOG_VERBOSE : AV_LOG_WARNING,
                    "Multiple frames in a packet from stream %d\n", pkt->stream_index);
-            ist->showed_multi_packet_warning=1;
-
-        // XXX temporary hack, will be turned to a switch() once all codec
-        // types are split out
-        if (ist->st->codec->codec_type == AVMEDIA_TYPE_AUDIO) {
-            ret = transcode_audio(ist, &avpkt, &got_output);
-            if (ret < 0)
-                return ret;
-            continue;
-        } else if (ist->st->codec->codec_type == AVMEDIA_TYPE_VIDEO) {
-            ret = transcode_video(ist, &avpkt, &got_output, &pkt_pts);
-            if (ret < 0)
-                return ret;
-            if (!got_output)
-                goto discard_packet;
-            continue;
+            ist->showed_multi_packet_warning = 1;
         }
 
-        /* decode the packet if needed */
-        subtitle_to_free = NULL;
         switch(ist->st->codec->codec_type) {
+        case AVMEDIA_TYPE_AUDIO:
+            ret = transcode_audio    (ist, &avpkt, &got_output);
+            break;
+        case AVMEDIA_TYPE_VIDEO:
+            ret = transcode_video    (ist, &avpkt, &got_output, &pkt_pts);
+            break;
         case AVMEDIA_TYPE_SUBTITLE:
-            ret = avcodec_decode_subtitle2(ist->st->codec,
-                                           &subtitle, &got_output, &avpkt);
-            if (ret < 0)
-                return ret;
-            if (!got_output) {
-                goto discard_packet;
-            }
-            subtitle_to_free = &subtitle;
-            avpkt.size = 0;
+            ret = transcode_subtitles(ist, &avpkt, &got_output);
             break;
         default:
             return -1;
         }
 
-        /* frame rate emulation */
-        rate_emu_sleep(ist);
-
-        /* if output time reached then transcode raw format,
-           encode packets and output them */
-        for (i = 0; i < nb_ostreams; i++) {
-            ost = &ost_table[i];
-
-            if (!check_output_constraints(ist, ost) || !ost->encoding_needed)
-                continue;
-
-                os = output_files[ost->file_index].ctx;
-
-                /* set the input output pts pairs */
-                //ost->sync_ipts = (double)(ist->pts + input_files[ist->file_index].ts_offset - start_time)/ AV_TIME_BASE;
-
-                av_assert0(ist->decoding_needed);
-                switch(ost->st->codec->codec_type) {
-                case AVMEDIA_TYPE_SUBTITLE:
-                    do_subtitle_out(os, ost, ist, &subtitle,
-                                    pkt->pts);
-                    break;
-                default:
-                    abort();
-                }
-            }
-
-        /* XXX: allocate the subtitles in the codec ? */
-        if (subtitle_to_free) {
-            avsubtitle_free(subtitle_to_free);
-            subtitle_to_free = NULL;
-        }
         if (ret < 0)
             return ret;
+        // touch data and size only if not EOF
+        if (pkt) {
+            avpkt.data += ret;
+            avpkt.size -= ret;
+        }
+        if (!got_output) {
+            continue;
+        }
     }
- discard_packet:
 
     /* handle stream copy */
     if (!ist->decoding_needed) {
         rate_emu_sleep(ist);
+        ist->pts = ist->next_pts;
         switch (ist->st->codec->codec_type) {
         case AVMEDIA_TYPE_AUDIO:
             ist->next_pts += ((int64_t)AV_TIME_BASE * ist->st->codec->frame_size) /
@@ -1943,7 +1932,7 @@ static int output_packet(InputStream *ist, int ist_index,
         }
     }
     for (i = 0; pkt && i < nb_ostreams; i++) {
-        ost = &ost_table[i];
+        OutputStream *ost = &ost_table[i];
 
         if (!check_output_constraints(ist, ost) || ost->encoding_needed)
             continue;
@@ -2085,9 +2074,7 @@ static int transcode_init(OutputFile *output_files,
             }
             memcpy(codec->extradata, icodec->extradata, icodec->extradata_size);
             codec->extradata_size = icodec->extradata_size;
-            if (!copy_tb &&
-                av_q2d(icodec->time_base)*icodec->ticks_per_frame > av_q2d(ist->st->time_base) &&
-                av_q2d(ist->st->time_base) < 1.0/500) {
+            if (!copy_tb) {
                 codec->time_base      = icodec->time_base;
                 codec->time_base.num *= icodec->ticks_per_frame;
                 av_reduce(&codec->time_base.num, &codec->time_base.den,
@@ -2538,7 +2525,7 @@ static int transcode(OutputFile *output_files,
         }
 
         //fprintf(stderr,"read #%d.%d size=%d\n", ist->file_index, ist->st->index, pkt.size);
-        if (output_packet(ist, ist_index, output_streams, nb_output_streams, &pkt) < 0) {
+        if (output_packet(ist, output_streams, nb_output_streams, &pkt) < 0) {
 
             av_log(NULL, AV_LOG_ERROR, "Error while decoding stream #%d:%d\n",
                    ist->file_index, ist->st->index);
@@ -2559,7 +2546,7 @@ static int transcode(OutputFile *output_files,
     for (i = 0; i < nb_input_streams; i++) {
         ist = &input_streams[i];
         if (ist->decoding_needed) {
-            output_packet(ist, i, output_streams, nb_output_streams, NULL);
+            output_packet(ist, output_streams, nb_output_streams, NULL);
         }
     }
     flush_encoders(output_streams, nb_output_streams);