struct timeval requires #include <sys/time.h>

[ffmpeg] / ffmpeg.c
diff --git a/ffmpeg.c b/ffmpeg.c

index bcd9358b46543577baa8ae173c1644623c63e81b..07c3ac41f48a68f1e268a5a135ea676389308cc7 100644 (file)
--- a/ffmpeg.c
+++ b/ffmpeg.c
@@ -19,6 +19,9 @@
   * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
   */
  
+/* needed for usleep() */
+#define _XOPEN_SOURCE 500
+
  #include "config.h"
  #include <ctype.h>
  #include <string.h>
@@ -27,11 +30,13 @@
  #include <errno.h>
  #include <signal.h>
  #include <limits.h>
+#include <unistd.h>
  #include "libavformat/avformat.h"
  #include "libavdevice/avdevice.h"
  #include "libswscale/swscale.h"
  #include "libavformat/framehook.h"
  #include "libavcodec/opt.h"
+#include "libavcodec/audioconvert.h"
  #include "libavutil/fifo.h"
  #include "libavutil/avstring.h"
  #include "libavformat/os_support.h"
@@ -44,7 +49,6 @@
  #endif
  
  #if defined(HAVE_TERMIOS_H)
-#include <unistd.h>
  #include <fcntl.h>
  #include <sys/ioctl.h>
  #include <sys/time.h>
@@ -85,12 +89,13 @@ static const OptionDef options[];
  
  static AVFormatContext *input_files[MAX_FILES];
  static int64_t input_files_ts_offset[MAX_FILES];
+static double input_files_ts_scale[MAX_FILES][MAX_STREAMS];
  static int nb_input_files = 0;
  
  static AVFormatContext *output_files[MAX_FILES];
  static int nb_output_files = 0;
  
-static AVStreamMap stream_maps[MAX_FILES];
+static AVStreamMap stream_maps[MAX_FILES*MAX_STREAMS];
  static int nb_stream_maps;
  
  static AVMetaDataMap meta_data_maps[MAX_FILES];
@@ -102,6 +107,7 @@ static int frame_width  = 0;
  static int frame_height = 0;
  static float frame_aspect_ratio = 0;
  static enum PixelFormat frame_pix_fmt = PIX_FMT_NONE;
+static enum SampleFormat audio_sample_fmt = SAMPLE_FMT_NONE;
  static int frame_padtop  = 0;
  static int frame_padbottom = 0;
  static int frame_padleft  = 0;
@@ -112,7 +118,7 @@ static int frame_bottomBand = 0;
  static int frame_leftBand  = 0;
  static int frame_rightBand = 0;
  static int max_frames[4] = {INT_MAX, INT_MAX, INT_MAX, INT_MAX};
-static AVRational frame_rate = (AVRational) {0,0};
+static AVRational frame_rate;
  static float video_qscale = 0;
  static uint16_t *intra_matrix = NULL;
  static uint16_t *inter_matrix = NULL;
@@ -128,7 +134,6 @@ static char *video_codec_name = NULL;
  static int video_codec_tag = 0;
  static int same_quality = 0;
  static int do_deinterlace = 0;
-static int strict = 0;
  static int top_field_first = -1;
  static int me_threshold = 0;
  static int intra_dc_precision = 8;
@@ -153,7 +158,7 @@ static char *subtitle_language = NULL;
  static float mux_preload= 0.5;
  static float mux_max_delay= 0.7;
  
-static int64_t recording_time = 0;
+static int64_t recording_time = INT64_MAX;
  static int64_t start_time = 0;
  static int64_t rec_timestamp = 0;
  static int64_t input_ts_offset = 0;
@@ -173,7 +178,7 @@ static char *pass_logfilename = NULL;
  static int audio_stream_copy = 0;
  static int video_stream_copy = 0;
  static int subtitle_stream_copy = 0;
-static int video_sync_method= 1;
+static int video_sync_method= -1;
  static int audio_sync_method= 0;
  static float audio_drift_threshold= 0.1;
  static int copy_ts= 0;
@@ -255,6 +260,8 @@ typedef struct AVOutputStream {
      /* audio only */
      int audio_resample;
      ReSampleContext *resample; /* for audio resampling */
+    int reformat_pair;
+    AVAudioConvert *reformat_ctx;
      AVFifoBuffer fifo;     /* for compression: one audio fifo per codec */
      FILE *logfile;
  } AVOutputStream;
@@ -511,6 +518,7 @@ static void do_audio_out(AVFormatContext *s,
      uint8_t *buftmp;
      static uint8_t *audio_buf = NULL;
      static uint8_t *audio_out = NULL;
+    static uint8_t *audio_out2 = NULL;
      const int audio_out_size= 4*MAX_AUDIO_PACKET_SIZE;
  
      int size_out, frame_bytes, ret;
@@ -539,6 +547,26 @@ static void do_audio_out(AVFormatContext *s,
          }
      }
  
+#define MAKE_SFMT_PAIR(a,b) ((a)+SAMPLE_FMT_NB*(b))
+    if (dec->sample_fmt!=enc->sample_fmt &&
+        MAKE_SFMT_PAIR(enc->sample_fmt,dec->sample_fmt)!=ost->reformat_pair) {
+        if (!audio_out2)
+            audio_out2 = av_malloc(audio_out_size);
+        if (!audio_out2)
+            av_exit(1);
+        if (ost->reformat_ctx)
+            av_audio_convert_free(ost->reformat_ctx);
+        ost->reformat_ctx = av_audio_convert_alloc(enc->sample_fmt, 1,
+                                                   dec->sample_fmt, 1, NULL, 0);
+        if (!ost->reformat_ctx) {
+            fprintf(stderr, "Cannot convert %s sample format to %s sample format\n",
+                avcodec_get_sample_fmt_name(dec->sample_fmt),
+                avcodec_get_sample_fmt_name(enc->sample_fmt));
+            av_exit(1);
+        }
+        ost->reformat_pair=MAKE_SFMT_PAIR(enc->sample_fmt,dec->sample_fmt);
+    }
+
      if(audio_sync_method){
          double delta = get_sync_ipts(ost) * enc->sample_rate - ost->sync_opts
                  - av_fifo_size(&ost->fifo)/(ost->st->codec->channels * 2);
@@ -597,6 +625,22 @@ static void do_audio_out(AVFormatContext *s,
          size_out = size;
      }
  
+    if (dec->sample_fmt!=enc->sample_fmt) {
+        const void *ibuf[6]= {buftmp};
+        void *obuf[6]= {audio_out2};
+        int istride[6]= {av_get_bits_per_sample_format(dec->sample_fmt)/8};
+        int ostride[6]= {av_get_bits_per_sample_format(enc->sample_fmt)/8};
+        int len= size_out/istride[0];
+        if (av_audio_convert(ost->reformat_ctx, obuf, ostride, ibuf, istride, len)<0) {
+            printf("av_audio_convert() failed\n");
+            return;
+        }
+        buftmp = audio_out2;
+        /* FIXME: existing code assume that size_out equals framesize*channels*2
+                  remove this legacy cruft */
+        size_out = len*2;
+    }
+
      /* now encode as many frames as possible */
      if (enc->frame_size > 1) {
          /* output resampled raw samples */
@@ -639,6 +683,7 @@ static void do_audio_out(AVFormatContext *s,
          case CODEC_ID_PCM_S32BE:
          case CODEC_ID_PCM_U32LE:
          case CODEC_ID_PCM_U32BE:
+        case CODEC_ID_PCM_F32BE:
              size_out = size_out << 1;
              break;
          case CODEC_ID_PCM_S24LE:
@@ -797,7 +842,7 @@ static void do_video_out(AVFormatContext *s,
  
      *frame_size = 0;
  
-    if(video_sync_method){
+    if(video_sync_method>0 || (video_sync_method && av_q2d(enc->time_base) > 0.001)){
          double vdelta;
          vdelta = get_sync_ipts(ost) / av_q2d(enc->time_base) - ost->sync_opts;
          //FIXME set to 0.5 after we fix some dts/pts bugs like in avidec.c
@@ -927,6 +972,7 @@ static void do_video_out(AVFormatContext *s,
                      pkt.flags |= PKT_FLAG_KEY;
                  write_frame(s, &pkt, ost->st->codec, bitstream_filters[ost->file_index][pkt.stream_index]);
                  *frame_size = ret;
+                video_size += ret;
                  //fprintf(stderr,"\nFrame: %3d %3d size: %5d type: %d",
                  //        enc->frame_number-1, enc->real_pict_num, ret,
                  //        enc->pict_type);
@@ -1139,7 +1185,9 @@ static int output_packet(AVInputStream *ist, int ist_index,
  
      len = pkt->size;
      ptr = pkt->data;
-    while (len > 0) {
+
+    //while we have more to decode or while the decoder did output something on EOF
+    while (len > 0 || (!pkt && ist->next_pts != ist->pts)) {
      handle_eof:
          ist->pts= ist->next_pts;
  
@@ -1153,8 +1201,11 @@ static int output_packet(AVInputStream *ist, int ist_index,
          if (ist->decoding_needed) {
              switch(ist->st->codec->codec_type) {
              case CODEC_TYPE_AUDIO:{
-                if(pkt)
-                    samples= av_fast_realloc(samples, &samples_size, FFMAX(pkt->size*sizeof(*samples), AVCODEC_MAX_AUDIO_FRAME_SIZE));
+                if(pkt && samples_size < FFMAX(pkt->size*sizeof(*samples), AVCODEC_MAX_AUDIO_FRAME_SIZE)) {
+                    samples_size = FFMAX(pkt->size*sizeof(*samples), AVCODEC_MAX_AUDIO_FRAME_SIZE);
+                    av_free(samples);
+                    samples= av_malloc(samples_size);
+                }
                  data_size= samples_size;
                      /* XXX: could avoid copy if PCM 16 bits with same
                         endianness as CPU */
@@ -1298,7 +1349,6 @@ static int output_packet(AVInputStream *ist, int ist_index,
                              break;
                          case CODEC_TYPE_VIDEO:
                              do_video_out(os, ost, ist, &picture, &frame_size);
-                            video_size += frame_size;
                              if (vstats_filename && frame_size)
                                  do_video_stats(os, ost, frame_size);
                              break;
@@ -1594,6 +1644,8 @@ static int av_encode(AVFormatContext **output_files,
  
                  /* Sanity check that the stream types match */
                  if (ist_table[ost->source_index]->st->codec->codec_type != ost->st->codec->codec_type) {
+                    int i= ost->file_index;
+                    dump_format(output_files[i], i, output_files[i]->filename, 1);
                      fprintf(stderr, "Codec type mismatch for mapping #%d.%d -> #%d.%d\n",
                          stream_maps[n-1].file_index, stream_maps[n-1].stream_index,
                          ost->file_index, ost->index);
@@ -1686,6 +1738,10 @@ static int av_encode(AVFormatContext **output_files,
                  codec->time_base = ist->st->time_base;
              switch(codec->codec_type) {
              case CODEC_TYPE_AUDIO:
+                if(audio_volume != 256) {
+                    fprintf(stderr,"-acodec copy and -vol are incompatible (frames are not decoded)\n");
+                    av_exit(1);
+                }
                  codec->sample_rate = icodec->sample_rate;
                  codec->channels = icodec->channels;
                  codec->frame_size = icodec->frame_size;
@@ -1715,6 +1771,7 @@ static int av_encode(AVFormatContext **output_files,
              case CODEC_TYPE_AUDIO:
                  if (av_fifo_init(&ost->fifo, 1024))
                      goto fail;
+                ost->reformat_pair = MAKE_SFMT_PAIR(SAMPLE_FMT_NONE,SAMPLE_FMT_NONE);
                  ost->audio_resample = codec->sample_rate != icodec->sample_rate || audio_sync_method > 1;
                  icodec->request_channels = codec->channels;
                  ist->decoding_needed = 1;
@@ -2013,7 +2070,7 @@ static int av_encode(AVFormatContext **output_files,
          }
  
          /* finish if recording time exhausted */
-        if (recording_time > 0 && opts_min >= (recording_time / 1000000.0))
+        if (opts_min >= (recording_time / 1000000.0))
              break;
  
          /* finish if limit size exhausted */
@@ -2047,6 +2104,13 @@ static int av_encode(AVFormatContext **output_files,
          if (pkt.pts != AV_NOPTS_VALUE)
              pkt.pts += av_rescale_q(input_files_ts_offset[ist->file_index], AV_TIME_BASE_Q, ist->st->time_base);
  
+        if(input_files_ts_scale[file_index][pkt.stream_index]){
+            if(pkt.pts != AV_NOPTS_VALUE)
+                pkt.pts *= input_files_ts_scale[file_index][pkt.stream_index];
+            if(pkt.dts != AV_NOPTS_VALUE)
+                pkt.dts *= input_files_ts_scale[file_index][pkt.stream_index];
+        }
+
  //        fprintf(stderr, "next:%"PRId64" dts:%"PRId64" off:%"PRId64" %d\n", ist->next_pts, pkt.dts, input_files_ts_offset[ist->file_index], ist->st->codec->codec_type);
          if (pkt.dts != AV_NOPTS_VALUE && ist->next_pts != AV_NOPTS_VALUE) {
              int64_t pkt_dts= av_rescale_q(pkt.dts, ist->st->time_base, AV_TIME_BASE_Q);
@@ -2144,6 +2208,8 @@ static int av_encode(AVFormatContext **output_files,
                      sws_freeContext(ost->img_resample_ctx);
                  if (ost->resample)
                      audio_resample_close(ost->resample);
+                if (ost->reformat_ctx)
+                    av_audio_convert_free(ost->reformat_ctx);
                  av_free(ost);
              }
          }
@@ -2222,7 +2288,7 @@ static int opt_default(const char *opt, const char *arg){
  
  //    av_log(NULL, AV_LOG_ERROR, "%s:%s: %f 0x%0X\n", opt, arg, av_get_double(avctx_opts, opt, NULL), (int)av_get_int(avctx_opts, opt, NULL));
  
-    //FIXME we should always use avctx_opts, ... for storing options so there wont be any need to keep track of whats set over this
+    //FIXME we should always use avctx_opts, ... for storing options so there will not be any need to keep track of what i set over this
      opt_names= av_realloc(opt_names, sizeof(void*)*(opt_name_count+1));
      opt_names[opt_name_count++]= o->name;
  
@@ -2439,13 +2505,13 @@ static void opt_frame_pad_right(const char *arg)
      }
  }
  
-static void list_pix_fmts(void)
+static void list_fmts(void (*get_fmt_string)(char *buf, int buf_size, int fmt), int nb_fmts)
  {
      int i;
-    char pix_fmt_str[128];
-    for (i=-1; i < PIX_FMT_NB; i++) {
-        avcodec_pix_fmt_string (pix_fmt_str, sizeof(pix_fmt_str), i);
-        fprintf(stdout, "%s\n", pix_fmt_str);
+    char fmt_str[128];
+    for (i=-1; i < nb_fmts; i++) {
+        get_fmt_string (fmt_str, sizeof(fmt_str), i);
+        fprintf(stdout, "%s\n", fmt_str);
      }
  }
  
@@ -2454,7 +2520,7 @@ static void opt_frame_pix_fmt(const char *arg)
      if (strcmp(arg, "list"))
          frame_pix_fmt = avcodec_get_pix_fmt(arg);
      else {
-        list_pix_fmts();
+        list_fmts(avcodec_pix_fmt_string, PIX_FMT_NB);
          av_exit(0);
      }
  }
@@ -2493,11 +2559,6 @@ static void opt_qscale(const char *arg)
      }
  }
  
-static void opt_strict(const char *arg)
-{
-    strict= atoi(arg);
-}
-
  static void opt_top_field_first(const char *arg)
  {
      top_field_first= atoi(arg);
@@ -2513,6 +2574,16 @@ static int opt_thread_count(const char *opt, const char *arg)
      return 0;
  }
  
+static void opt_audio_sample_fmt(const char *arg)
+{
+    if (strcmp(arg, "list"))
+        audio_sample_fmt = avcodec_get_sample_fmt(arg);
+    else {
+        list_fmts(avcodec_sample_fmt_string, SAMPLE_FMT_NB);
+        av_exit(0);
+    }
+}
+
  static int opt_audio_rate(const char *opt, const char *arg)
  {
      audio_sample_rate = parse_number_or_die(opt, arg, OPT_INT64, 0, INT_MAX);
@@ -2640,6 +2711,23 @@ static void opt_map_meta_data(const char *arg)
      m->in_file = strtol(p, &p, 0);
  }
  
+static void opt_input_ts_scale(const char *arg)
+{
+    unsigned int stream;
+    double scale;
+    char *p;
+
+    stream = strtol(arg, &p, 0);
+    if (*p)
+        p++;
+    scale= strtod(p, &p);
+
+    if(stream >= MAX_STREAMS)
+        av_exit(1);
+
+    input_files_ts_scale[nb_input_files][stream]= scale;
+}
+
  static int opt_recording_time(const char *opt, const char *arg)
  {
      recording_time = parse_time_or_die(opt, arg, 1);
@@ -2723,6 +2811,7 @@ static void opt_input_file(const char *filename)
      ap->width = frame_width + frame_padleft + frame_padright;
      ap->height = frame_height + frame_padtop + frame_padbottom;
      ap->pix_fmt = frame_pix_fmt;
+   // ap->sample_fmt = audio_sample_fmt; //FIXME:not implemented in libavformat
      ap->channel = video_channel;
      ap->standard = video_standard;
      ap->video_codec_id = find_codec_or_die(video_codec_name, CODEC_TYPE_VIDEO, 0);
@@ -2787,6 +2876,7 @@ static void opt_input_file(const char *filename)
              //fprintf(stderr, "\nInput Audio channels: %d", enc->channels);
              audio_channels = enc->channels;
              audio_sample_rate = enc->sample_rate;
+            audio_sample_fmt = enc->sample_fmt;
              if(audio_disable)
                  ic->streams[i]->discard= AVDISCARD_ALL;
              break;
@@ -3016,7 +3106,6 @@ static void new_video_stream(AVFormatContext *oc)
              video_enc->rc_initial_buffer_occupancy = video_enc->rc_buffer_size*3/4;
          video_enc->me_threshold= me_threshold;
          video_enc->intra_dc_precision= intra_dc_precision - 8;
-        video_enc->strict_std_compliance = strict;
  
          if (do_psnr)
              video_enc->flags|= CODEC_FLAG_PSNR;
@@ -3058,7 +3147,6 @@ static void new_audio_stream(AVFormatContext *oc)
  
      audio_enc = st->codec;
      audio_enc->codec_type = CODEC_TYPE_AUDIO;
-    audio_enc->strict_std_compliance = strict;
  
      if(audio_codec_tag)
          audio_enc->codec_tag= audio_codec_tag;
@@ -3071,6 +3159,7 @@ static void new_audio_stream(AVFormatContext *oc)
          st->stream_copy = 1;
          audio_enc->channels = audio_channels;
      } else {
+        AVCodec *codec;
          codec_id = av_guess_codec(oc->oformat, NULL, oc->filename, NULL, CODEC_TYPE_AUDIO);
  
          set_context_opts(audio_enc, avctx_opts[CODEC_TYPE_AUDIO], AV_OPT_FLAG_AUDIO_PARAM | AV_OPT_FLAG_ENCODING_PARAM);
@@ -3078,6 +3167,7 @@ static void new_audio_stream(AVFormatContext *oc)
          if (audio_codec_name)
              codec_id = find_codec_or_die(audio_codec_name, CODEC_TYPE_AUDIO, 1);
          audio_enc->codec_id = codec_id;
+        codec = avcodec_find_encoder(codec_id);
  
          if (audio_qscale > QSCALE_NONE) {
              audio_enc->flags |= CODEC_FLAG_QSCALE;
@@ -3085,6 +3175,17 @@ static void new_audio_stream(AVFormatContext *oc)
          }
          audio_enc->thread_count = thread_count;
          audio_enc->channels = audio_channels;
+        audio_enc->sample_fmt = audio_sample_fmt;
+
+        if(codec && codec->sample_fmts){
+            const enum SampleFormat *p= codec->sample_fmts;
+            for(; *p!=-1; p++){
+                if(*p == audio_enc->sample_fmt)
+                    break;
+            }
+            if(*p == -1)
+                audio_enc->sample_fmt = codec->sample_fmts[0];
+        }
      }
      audio_enc->sample_rate = audio_sample_rate;
      audio_enc->time_base= (AVRational){1, audio_sample_rate};
@@ -3669,6 +3770,7 @@ static const OptionDef options[] = {
      { "fs", HAS_ARG | OPT_INT64, {(void*)&limit_filesize}, "set the limit file size in bytes", "limit_size" }, //
      { "ss", OPT_FUNC2 | HAS_ARG, {(void*)opt_start_time}, "set the start time offset", "time_off" },
      { "itsoffset", OPT_FUNC2 | HAS_ARG, {(void*)opt_input_ts_offset}, "set the input ts offset", "time_off" },
+    { "itsscale", HAS_ARG, {(void*)opt_input_ts_scale}, "set the input ts scale", "stream:scale" },
      { "title", HAS_ARG | OPT_STRING, {(void*)&str_title}, "set the title", "string" },
      { "timestamp", OPT_FUNC2 | HAS_ARG, {(void*)&opt_rec_timestamp}, "set the timestamp", "time" },
      { "author", HAS_ARG | OPT_STRING, {(void*)&str_author}, "set the author", "string" },
@@ -3722,7 +3824,6 @@ static const OptionDef options[] = {
      { "rc_override", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_video_rc_override_string}, "rate control override for specific intervals", "override" },
      { "vcodec", HAS_ARG | OPT_VIDEO, {(void*)opt_video_codec}, "force video codec ('copy' to copy stream)", "codec" },
      { "me_threshold", HAS_ARG | OPT_FUNC2 | OPT_EXPERT | OPT_VIDEO, {(void*)opt_me_threshold}, "motion estimaton threshold",  "threshold" },
-    { "strict", HAS_ARG | OPT_EXPERT | OPT_VIDEO, {(void*)opt_strict}, "how strictly to follow the standards", "strictness" },
      { "sameq", OPT_BOOL | OPT_VIDEO, {(void*)&same_quality},
        "use same video quality as source (implies VBR)" },
      { "pass", HAS_ARG | OPT_VIDEO, {(void*)&opt_pass}, "select the pass number (1 or 2)", "n" },
@@ -3755,6 +3856,7 @@ static const OptionDef options[] = {
      { "vol", OPT_INT | HAS_ARG | OPT_AUDIO, {(void*)&audio_volume}, "change audio volume (256=normal)" , "volume" }, //
      { "newaudio", OPT_AUDIO, {(void*)opt_new_audio_stream}, "add a new audio stream to the current output stream" },
      { "alang", HAS_ARG | OPT_STRING | OPT_AUDIO, {(void *)&audio_language}, "set the ISO 639 language code (3 letters) of the current audio stream" , "code" },
+    { "sample_fmt", HAS_ARG | OPT_EXPERT | OPT_AUDIO, {(void*)opt_audio_sample_fmt}, "set sample format, 'list' as argument shows all the sample formats supported", "format" },
  
      /* subtitle options */
      { "sn", OPT_BOOL | OPT_SUBTITLE, {(void*)&subtitle_disable}, "disable subtitle" },