Add support to FFmpegCapture to decode the audio.

author Steinar H. Gunderson <sgunderson@bigfoot.com>

Sat, 8 Jul 2017 11:45:13 +0000 (13:45 +0200)

committer Steinar H. Gunderson <sgunderson@bigfoot.com>

Sat, 8 Jul 2017 11:45:13 +0000 (13:45 +0200)
author Steinar H. Gunderson <sgunderson@bigfoot.com>
Sat, 8 Jul 2017 11:45:13 +0000 (13:45 +0200)
committer Steinar H. Gunderson <sgunderson@bigfoot.com>
Sat, 8 Jul 2017 11:45:13 +0000 (13:45 +0200)
diff --git a/ffmpeg_capture.cpp b/ffmpeg_capture.cpp

index 48a395e27610edf725d4386ee557f561eb16cd8f..5e6104f82c628be57cf543ee06599ac639ff3f6a 100644 (file)
--- a/ffmpeg_capture.cpp
+++ b/ffmpeg_capture.cpp
@@ -18,6 +18,7 @@ extern "C" {
  #include <libavutil/imgutils.h>
  #include <libavutil/mem.h>
  #include <libavutil/pixfmt.h>
+#include <libavutil/opt.h>
  #include <libswscale/swscale.h>
  }
  
@@ -217,6 +218,7 @@ FFmpegCapture::~FFmpegCapture()
         if (has_dequeue_callbacks) {
                 dequeue_cleanup_callback();
         }
+       avresample_free(&resampler);
  }
  
  void FFmpegCapture::configure_card()
@@ -356,24 +358,47 @@ bool FFmpegCapture::play_video(const string &pathname)
  
         int audio_stream_index = find_stream_index(format_ctx.get(), AVMEDIA_TYPE_AUDIO);
  
-       const AVCodecParameters *codecpar = format_ctx->streams[video_stream_index]->codecpar;
+       // Open video decoder.
+       const AVCodecParameters *video_codecpar = format_ctx->streams[video_stream_index]->codecpar;
+       AVCodec *video_codec = avcodec_find_decoder(video_codecpar->codec_id);
         video_timebase = format_ctx->streams[video_stream_index]->time_base;
-       AVCodecContextWithDeleter codec_ctx = avcodec_alloc_context3_unique(nullptr);
-       if (avcodec_parameters_to_context(codec_ctx.get(), codecpar) < 0) {
-               fprintf(stderr, "%s: Cannot fill codec parameters\n", pathname.c_str());
+       AVCodecContextWithDeleter video_codec_ctx = avcodec_alloc_context3_unique(nullptr);
+       if (avcodec_parameters_to_context(video_codec_ctx.get(), video_codecpar) < 0) {
+               fprintf(stderr, "%s: Cannot fill video codec parameters\n", pathname.c_str());
                 return false;
         }
-       AVCodec *codec = avcodec_find_decoder(codecpar->codec_id);
-       if (codec == nullptr) {
-               fprintf(stderr, "%s: Cannot find decoder\n", pathname.c_str());
+       if (video_codec == nullptr) {
+               fprintf(stderr, "%s: Cannot find video decoder\n", pathname.c_str());
                 return false;
         }
-       if (avcodec_open2(codec_ctx.get(), codec, nullptr) < 0) {
-               fprintf(stderr, "%s: Cannot open decoder\n", pathname.c_str());
+       if (avcodec_open2(video_codec_ctx.get(), video_codec, nullptr) < 0) {
+               fprintf(stderr, "%s: Cannot open video decoder\n", pathname.c_str());
                 return false;
         }
-       unique_ptr<AVCodecContext, decltype(avcodec_close)*> codec_ctx_cleanup(
-               codec_ctx.get(), avcodec_close);
+       unique_ptr<AVCodecContext, decltype(avcodec_close)*> video_codec_ctx_cleanup(
+               video_codec_ctx.get(), avcodec_close);
+
+       // Open audio decoder, if we have audio.
+       AVCodecContextWithDeleter audio_codec_ctx = avcodec_alloc_context3_unique(nullptr);
+       if (audio_stream_index != -1) {
+               const AVCodecParameters *audio_codecpar = format_ctx->streams[audio_stream_index]->codecpar;
+               audio_timebase = format_ctx->streams[audio_stream_index]->time_base;
+               if (avcodec_parameters_to_context(audio_codec_ctx.get(), audio_codecpar) < 0) {
+                       fprintf(stderr, "%s: Cannot fill audio codec parameters\n", pathname.c_str());
+                       return false;
+               }
+               AVCodec *audio_codec = avcodec_find_decoder(audio_codecpar->codec_id);
+               if (audio_codec == nullptr) {
+                       fprintf(stderr, "%s: Cannot find audio decoder\n", pathname.c_str());
+                       return false;
+               }
+               if (avcodec_open2(audio_codec_ctx.get(), audio_codec, nullptr) < 0) {
+                       fprintf(stderr, "%s: Cannot open audio decoder\n", pathname.c_str());
+                       return false;
+               }
+       }
+       unique_ptr<AVCodecContext, decltype(avcodec_close)*> audio_codec_ctx_cleanup(
+               audio_codec_ctx.get(), avcodec_close);
  
         internal_rewind();
  
@@ -382,9 +407,12 @@ bool FFmpegCapture::play_video(const string &pathname)
                 if (process_queued_commands(format_ctx.get(), pathname, last_modified, /*rewound=*/nullptr)) {
                         return true;
                 }
+               FrameAllocator::Frame audio_frame = audio_frame_allocator->alloc_frame();
+               AudioFormat audio_format;
  
                 bool error;
-               AVFrameWithDeleter frame = decode_frame(format_ctx.get(), codec_ctx.get(), pathname, video_stream_index, audio_stream_index, &error);
+               AVFrameWithDeleter frame = decode_frame(format_ctx.get(), video_codec_ctx.get(), audio_codec_ctx.get(),
+                       pathname, video_stream_index, audio_stream_index, &audio_frame, &audio_format, &error);
                 if (error) {
                         return false;
                 }
@@ -411,11 +439,6 @@ bool FFmpegCapture::play_video(const string &pathname)
                         return false;
                 }
  
-               FrameAllocator::Frame audio_frame;
-               AudioFormat audio_format;
-                audio_format.bits_per_sample = 32;
-                audio_format.num_channels = 8;
-
                 for ( ;; ) {
                         if (last_pts == 0 && pts_origin == 0) {
                                 pts_origin = frame->pts;        
@@ -500,13 +523,18 @@ bool FFmpegCapture::process_queued_commands(AVFormatContext *format_ctx, const s
         return false;
  }
  
-AVFrameWithDeleter FFmpegCapture::decode_frame(AVFormatContext *format_ctx, AVCodecContext *codec_ctx, const std::string &pathname, int video_stream_index, int audio_stream_index, bool *error)
+namespace {
+
+}  // namespace
+
+AVFrameWithDeleter FFmpegCapture::decode_frame(AVFormatContext *format_ctx, AVCodecContext *video_codec_ctx, AVCodecContext *audio_codec_ctx, const std::string &pathname, int video_stream_index, int audio_stream_index, FrameAllocator::Frame *audio_frame, AudioFormat *audio_format, bool *error)
  {
         *error = false;
  
         // Read packets until we have a frame or there are none left.
         bool frame_finished = false;
-       AVFrameWithDeleter frame = av_frame_alloc_unique();
+       AVFrameWithDeleter audio_avframe = av_frame_alloc_unique();
+       AVFrameWithDeleter video_avframe = av_frame_alloc_unique();
         bool eof = false;
         do {
                 AVPacket pkt;
@@ -519,36 +547,125 @@ AVFrameWithDeleter FFmpegCapture::decode_frame(AVFormatContext *format_ctx, AVCo
                         if (pkt.stream_index == audio_stream_index && audio_callback != nullptr) {
                                 audio_callback(&pkt, format_ctx->streams[audio_stream_index]->time_base);
                         }
-                       if (pkt.stream_index != video_stream_index) {
-                               // Ignore audio for now.
-                               continue;
-                       }
-                       if (avcodec_send_packet(codec_ctx, &pkt) < 0) {
-                               fprintf(stderr, "%s: Cannot send packet to codec.\n", pathname.c_str());
-                               *error = true;
-                               return AVFrameWithDeleter(nullptr);
+                       if (pkt.stream_index == video_stream_index) {
+                               if (avcodec_send_packet(video_codec_ctx, &pkt) < 0) {
+                                       fprintf(stderr, "%s: Cannot send packet to video codec.\n", pathname.c_str());
+                                       *error = true;
+                                       return AVFrameWithDeleter(nullptr);
+                               }
+                       } else if (pkt.stream_index == audio_stream_index) {
+                               if (avcodec_send_packet(audio_codec_ctx, &pkt) < 0) {
+                                       fprintf(stderr, "%s: Cannot send packet to audio codec.\n", pathname.c_str());
+                                       *error = true;
+                                       return AVFrameWithDeleter(nullptr);
+                               }
                         }
                 } else {
                         eof = true;  // Or error, but ignore that for the time being.
                 }
  
-               int err = avcodec_receive_frame(codec_ctx, frame.get());
+               // Decode audio, if any.
+               int err = avcodec_receive_frame(audio_codec_ctx, audio_avframe.get());
+               if (err == 0) {
+                       convert_audio(audio_avframe.get(), audio_frame, audio_format);
+               } else if (err != AVERROR(EAGAIN)) {
+                       fprintf(stderr, "%s: Cannot receive frame from audio codec.\n", pathname.c_str());
+                       *error = true;
+                       return AVFrameWithDeleter(nullptr);
+               }
+
+               // Decode video, if we have a frame.
+               err = avcodec_receive_frame(video_codec_ctx, video_avframe.get());
                 if (err == 0) {
                         frame_finished = true;
                         break;
                 } else if (err != AVERROR(EAGAIN)) {
-                       fprintf(stderr, "%s: Cannot receive frame from codec.\n", pathname.c_str());
+                       fprintf(stderr, "%s: Cannot receive frame from video codec.\n", pathname.c_str());
                         *error = true;
                         return AVFrameWithDeleter(nullptr);
                 }
         } while (!eof);
  
         if (frame_finished)
-               return frame;
+               return video_avframe;
         else
                 return AVFrameWithDeleter(nullptr);
  }
  
+void FFmpegCapture::convert_audio(const AVFrame *audio_avframe, FrameAllocator::Frame *audio_frame, AudioFormat *audio_format)
+{
+       // Decide on a format. If there already is one in this audio frame,
+       // we're pretty much forced to use it. If not, we try to find an exact match.
+       // If that still doesn't work, we default to 32-bit signed chunked
+       // (float would be nice, but there's really no way to signal that yet).
+       AVSampleFormat dst_format;
+       if (audio_format->bits_per_sample == 0) {
+               switch (audio_avframe->format) {
+               case AV_SAMPLE_FMT_S16:
+               case AV_SAMPLE_FMT_S16P:
+                       audio_format->bits_per_sample = 16;
+                       dst_format = AV_SAMPLE_FMT_S16;
+                       break;
+               case AV_SAMPLE_FMT_S32:
+               case AV_SAMPLE_FMT_S32P:
+               default:
+                       audio_format->bits_per_sample = 32;
+                       dst_format = AV_SAMPLE_FMT_S32;
+                       break;
+               }
+       } else if (audio_format->bits_per_sample == 16) {
+               dst_format = AV_SAMPLE_FMT_S16;
+       } else if (audio_format->bits_per_sample == 32) {
+               dst_format = AV_SAMPLE_FMT_S32;
+       } else {
+               assert(false);
+       }
+       audio_format->num_channels = 2;
+
+       if (resampler == nullptr ||
+           audio_avframe->format != last_src_format ||
+           dst_format != last_dst_format ||
+           av_frame_get_channel_layout(audio_avframe) != last_channel_layout ||
+           av_frame_get_sample_rate(audio_avframe) != last_sample_rate) {
+               avresample_free(&resampler);
+               resampler = avresample_alloc_context();
+               if (resampler == nullptr) {
+                       fprintf(stderr, "Allocating resampler failed.\n");
+                       exit(1);
+               }
+
+               av_opt_set_int(resampler, "in_channel_layout",  av_frame_get_channel_layout(audio_avframe), 0);
+               av_opt_set_int(resampler, "out_channel_layout", AV_CH_LAYOUT_STEREO,                        0);
+               av_opt_set_int(resampler, "in_sample_rate",     av_frame_get_sample_rate(audio_avframe),    0);
+               av_opt_set_int(resampler, "out_sample_rate",    OUTPUT_FREQUENCY,                           0);
+               av_opt_set_int(resampler, "in_sample_fmt",      audio_avframe->format,                      0);
+               av_opt_set_int(resampler, "out_sample_fmt",     dst_format,                                 0);
+
+               if (avresample_open(resampler) < 0) {
+                       fprintf(stderr, "Could not open resample context.\n");
+                       exit(1);
+               }
+
+               last_src_format = AVSampleFormat(audio_avframe->format);
+               last_dst_format = dst_format;
+               last_channel_layout = av_frame_get_channel_layout(audio_avframe);
+               last_sample_rate = av_frame_get_sample_rate(audio_avframe);
+       }
+
+       size_t bytes_per_sample = (audio_format->bits_per_sample / 8) * 2;
+       size_t num_samples_room = (audio_frame->size - audio_frame->len) / bytes_per_sample;
+
+       uint8_t *data = audio_frame->data + audio_frame->len;
+       int out_samples = avresample_convert(resampler, &data, 0, num_samples_room,
+               audio_avframe->data, audio_avframe->linesize[0], audio_avframe->nb_samples);
+       if (out_samples < 0) {
+                fprintf(stderr, "Audio conversion failed.\n");
+                exit(1);
+        }
+
+       audio_frame->len += out_samples * bytes_per_sample;
+}
+
  VideoFormat FFmpegCapture::construct_video_format(const AVFrame *frame, AVRational video_timebase)
  {
         VideoFormat video_format;
diff --git a/ffmpeg_capture.h b/ffmpeg_capture.h

index afca641e951e20031bf37bf2f9ed2d0cad9413a6..eb377f7fe039b42ba2f029aa8e7ea42117f59252 100644 (file)
--- a/ffmpeg_capture.h
+++ b/ffmpeg_capture.h
@@ -16,7 +16,8 @@
  // but it would require some more plumbing, and it would also fail if the file
  // changes parameters midway, which is allowed in some formats.
  //
-// There is currently no audio support.
+// You can get out the audio either as decoded or in raw form (Kaeru uses this).
+// However, the rest of Nageru can't really use the audio for anything yet.
  
  #include <assert.h>
  #include <stdint.h>
@@ -31,6 +32,7 @@
  #include <movit/ycbcr.h>
  
  extern "C" {
+#include <libavresample/avresample.h>
  #include <libavutil/pixfmt.h>
  #include <libavutil/rational.h>
  }
@@ -194,7 +196,10 @@ private:
         bool process_queued_commands(AVFormatContext *format_ctx, const std::string &pathname, timespec last_modified, bool *rewound);
  
         // Returns nullptr if no frame was decoded (e.g. EOF).
-       AVFrameWithDeleter decode_frame(AVFormatContext *format_ctx, AVCodecContext *codec_ctx, const std::string &pathname, int video_stream_index, int audio_stream_index, bool *error);
+       AVFrameWithDeleter decode_frame(AVFormatContext *format_ctx, AVCodecContext *video_codec_ctx, AVCodecContext *audio_codec_ctx,
+                                       const std::string &pathname, int video_stream_index, int audio_stream_index,
+                                       bmusb::FrameAllocator::Frame *audio_frame, bmusb::AudioFormat *audio_format, bool *error);
+       void convert_audio(const AVFrame *audio_avframe, bmusb::FrameAllocator::Frame *audio_frame, bmusb::AudioFormat *audio_format);
  
         bmusb::VideoFormat construct_video_format(const AVFrame *frame, AVRational video_timebase);
         bmusb::FrameAllocator::Frame make_video_frame(const AVFrame *frame, const std::string &pathname, bool *error);
@@ -222,7 +227,7 @@ private:
         SwsContextWithDeleter sws_ctx;
         int sws_last_width = -1, sws_last_height = -1, sws_last_src_format = -1;
         AVPixelFormat sws_dst_format = AVPixelFormat(-1);  // In practice, always initialized.
-       AVRational video_timebase;
+       AVRational video_timebase, audio_timebase;
  
         QuittableSleeper producer_thread_should_quit;
         std::thread producer_thread;
@@ -236,6 +241,13 @@ private:
                 double new_rate;  // For CHANGE_RATE.
         };
         std::vector<QueuedCommand> command_queue;  // Protected by <queue_mu>.
+
+       // Audio resampler.
+       AVAudioResampleContext *resampler = nullptr;
+       AVSampleFormat last_src_format, last_dst_format;
+       int64_t last_channel_layout;
+       int last_sample_rate;
+
  };
  
  #endif  // !defined(_FFMPEG_CAPTURE_H)
author	Steinar H. Gunderson <sgunderson@bigfoot.com>
	Sat, 8 Jul 2017 11:45:13 +0000 (13:45 +0200)
committer	Steinar H. Gunderson <sgunderson@bigfoot.com>
	Sat, 8 Jul 2017 11:45:13 +0000 (13:45 +0200)
ffmpeg_capture.cpp		patch \| blob \| history
ffmpeg_capture.h		patch \| blob \| history