Initial check-in of Kaeru, a simple transcoder based on Nageru code.

[nageru] / ffmpeg_capture.cpp
diff --git a/ffmpeg_capture.cpp b/ffmpeg_capture.cpp

index 97363a7a82a3ab686717ed3542eaa9a021537dc3..48a395e27610edf725d4386ee557f561eb16cd8f 100644 (file)
--- a/ffmpeg_capture.cpp
+++ b/ffmpeg_capture.cpp
@@ -31,6 +31,7 @@ extern "C" {
  #include "ffmpeg_util.h"
  #include "flags.h"
  #include "image_input.h"
+#include "timebase.h"
  
  #define FRAME_SIZE (8 << 20)  // 8 MB.
  
@@ -71,6 +72,9 @@ AVPixelFormat decide_dst_format(AVPixelFormat src_format, bmusb::PixelFormat dst
         if (dst_format_type == bmusb::PixelFormat_8BitBGRA) {
                 return AV_PIX_FMT_BGRA;
         }
+       if (dst_format_type == FFmpegCapture::PixelFormat_NV12) {
+               return AV_PIX_FMT_NV12;
+       }
  
         assert(dst_format_type == bmusb::PixelFormat_8BitYCbCrPlanar);
  
@@ -313,7 +317,7 @@ void FFmpegCapture::send_disconnected_frame()
                 video_frame.len = width * height * 4;
                 memset(video_frame.data, 0, video_frame.len);
  
-               frame_callback(timecode++,
+               frame_callback(-1, AVRational{1, TIMEBASE}, timecode++,
                         video_frame, /*video_offset=*/0, video_format,
                         FrameAllocator::Frame(), /*audio_offset=*/0, AudioFormat());
         }
@@ -350,6 +354,8 @@ bool FFmpegCapture::play_video(const string &pathname)
                 return false;
         }
  
+       int audio_stream_index = find_stream_index(format_ctx.get(), AVMEDIA_TYPE_AUDIO);
+
         const AVCodecParameters *codecpar = format_ctx->streams[video_stream_index]->codecpar;
         video_timebase = format_ctx->streams[video_stream_index]->time_base;
         AVCodecContextWithDeleter codec_ctx = avcodec_alloc_context3_unique(nullptr);
@@ -378,7 +384,7 @@ bool FFmpegCapture::play_video(const string &pathname)
                 }
  
                 bool error;
-               AVFrameWithDeleter frame = decode_frame(format_ctx.get(), codec_ctx.get(), pathname, video_stream_index, &error);
+               AVFrameWithDeleter frame = decode_frame(format_ctx.get(), codec_ctx.get(), pathname, video_stream_index, audio_stream_index, &error);
                 if (error) {
                         return false;
                 }
@@ -411,15 +417,20 @@ bool FFmpegCapture::play_video(const string &pathname)
                  audio_format.num_channels = 8;
  
                 for ( ;; ) {
+                       if (last_pts == 0 && pts_origin == 0) {
+                               pts_origin = frame->pts;        
+                       }
                         next_frame_start = compute_frame_start(frame->pts, pts_origin, video_timebase, start, rate);
                         video_frame.received_timestamp = next_frame_start;
                         bool finished_wakeup = producer_thread_should_quit.sleep_until(next_frame_start);
                         if (finished_wakeup) {
-                               frame_callback(timecode++,
+                               frame_callback(frame->pts, video_timebase, timecode++,
                                         video_frame, 0, video_format,
                                         audio_frame, 0, audio_format);
                                 break;
                         } else {
+                               if (producer_thread_should_quit.should_quit()) break;
+
                                 bool rewound = false;
                                 if (process_queued_commands(format_ctx.get(), pathname, last_modified, &rewound)) {
                                         return true;
@@ -429,8 +440,15 @@ bool FFmpegCapture::play_video(const string &pathname)
                                         video_frame_allocator->release_frame(video_frame);
                                         break;
                                 }
-                               // OK, we didn't, so probably a rate change. We'll recalculate next_frame_start
-                               // in the next run.
+                               // OK, we didn't, so probably a rate change. Recalculate next_frame_start,
+                               // but if it's now in the past, we'll reset the origin, so that we don't
+                               // generate a huge backlog of frames that we need to run through quickly.
+                               next_frame_start = compute_frame_start(frame->pts, pts_origin, video_timebase, start, rate);
+                               steady_clock::time_point now = steady_clock::now();
+                               if (next_frame_start < now) {
+                                       pts_origin = frame->pts;
+                                       start = next_frame_start = now;
+                               }
                         }
                 }
                 last_pts = frame->pts;
@@ -482,7 +500,7 @@ bool FFmpegCapture::process_queued_commands(AVFormatContext *format_ctx, const s
         return false;
  }
  
-AVFrameWithDeleter FFmpegCapture::decode_frame(AVFormatContext *format_ctx, AVCodecContext *codec_ctx, const std::string &pathname, int video_stream_index, bool *error)
+AVFrameWithDeleter FFmpegCapture::decode_frame(AVFormatContext *format_ctx, AVCodecContext *codec_ctx, const std::string &pathname, int video_stream_index, int audio_stream_index, bool *error)
  {
         *error = false;
  
@@ -498,6 +516,9 @@ AVFrameWithDeleter FFmpegCapture::decode_frame(AVFormatContext *format_ctx, AVCo
                 pkt.data = nullptr;
                 pkt.size = 0;
                 if (av_read_frame(format_ctx, &pkt) == 0) {
+                       if (pkt.stream_index == audio_stream_index && audio_callback != nullptr) {
+                               audio_callback(&pkt, format_ctx->streams[audio_stream_index]->time_base);
+                       }
                         if (pkt.stream_index != video_stream_index) {
                                 // Ignore audio for now.
                                 continue;
@@ -535,6 +556,8 @@ VideoFormat FFmpegCapture::construct_video_format(const AVFrame *frame, AVRation
         video_format.height = height;
         if (pixel_format == bmusb::PixelFormat_8BitBGRA) {
                 video_format.stride = width * 4;
+       } else if (pixel_format == FFmpegCapture::PixelFormat_NV12) {
+               video_format.stride = width;
         } else {
                 assert(pixel_format == bmusb::PixelFormat_8BitYCbCrPlanar);
                 video_format.stride = width;
@@ -585,6 +608,17 @@ FrameAllocator::Frame FFmpegCapture::make_video_frame(const AVFrame *frame, cons
                 pic_data[0] = video_frame.data;
                 linesizes[0] = width * 4;
                 video_frame.len = (width * 4) * height;
+       } else if (pixel_format == PixelFormat_NV12) {
+               pic_data[0] = video_frame.data;
+               linesizes[0] = width;
+
+               pic_data[1] = pic_data[0] + width * height;
+               linesizes[1] = width;
+
+               video_frame.len = (width * 2) * height;
+
+               const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(sws_dst_format);
+               current_frame_ycbcr_format = decode_ycbcr_format(desc, frame);
         } else {
                 assert(pixel_format == bmusb::PixelFormat_8BitYCbCrPlanar);
                 const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(sws_dst_format);