Add support for transcoding the audio in Kaeru (on by default).

author Steinar H. Gunderson <sgunderson@bigfoot.com>

Sat, 8 Jul 2017 16:43:39 +0000 (18:43 +0200)

committer Steinar H. Gunderson <sgunderson@bigfoot.com>

Sat, 8 Jul 2017 16:43:39 +0000 (18:43 +0200)
author Steinar H. Gunderson <sgunderson@bigfoot.com>
Sat, 8 Jul 2017 16:43:39 +0000 (18:43 +0200)
committer Steinar H. Gunderson <sgunderson@bigfoot.com>
Sat, 8 Jul 2017 16:43:39 +0000 (18:43 +0200)
diff --git a/ffmpeg_capture.cpp b/ffmpeg_capture.cpp

index 5e6104f82c628be57cf543ee06599ac639ff3f6a..3de7d5efa056e77708c5b0bd7925c4dbfea1ead4 100644 (file)
--- a/ffmpeg_capture.cpp
+++ b/ffmpeg_capture.cpp
@@ -228,7 +228,8 @@ void FFmpegCapture::configure_card()
                 set_video_frame_allocator(owned_video_frame_allocator.get());
         }
         if (audio_frame_allocator == nullptr) {
-               owned_audio_frame_allocator.reset(new MallocFrameAllocator(65536, NUM_QUEUED_AUDIO_FRAMES));
+               // Audio can come out in pretty large chunks, so increase from the default 1 MB.
+               owned_audio_frame_allocator.reset(new MallocFrameAllocator(1 << 20, NUM_QUEUED_AUDIO_FRAMES));
                 set_audio_frame_allocator(owned_audio_frame_allocator.get());
         }
  }
@@ -319,7 +320,7 @@ void FFmpegCapture::send_disconnected_frame()
                 video_frame.len = width * height * 4;
                 memset(video_frame.data, 0, video_frame.len);
  
-               frame_callback(-1, AVRational{1, TIMEBASE}, timecode++,
+               frame_callback(-1, AVRational{1, TIMEBASE}, -1, AVRational{1, TIMEBASE}, timecode++,
                         video_frame, /*video_offset=*/0, video_format,
                         FrameAllocator::Frame(), /*audio_offset=*/0, AudioFormat());
         }
@@ -410,9 +411,10 @@ bool FFmpegCapture::play_video(const string &pathname)
                 FrameAllocator::Frame audio_frame = audio_frame_allocator->alloc_frame();
                 AudioFormat audio_format;
  
+               int64_t audio_pts;
                 bool error;
                 AVFrameWithDeleter frame = decode_frame(format_ctx.get(), video_codec_ctx.get(), audio_codec_ctx.get(),
-                       pathname, video_stream_index, audio_stream_index, &audio_frame, &audio_format, &error);
+                       pathname, video_stream_index, audio_stream_index, &audio_frame, &audio_format, &audio_pts, &error);
                 if (error) {
                         return false;
                 }
@@ -447,7 +449,10 @@ bool FFmpegCapture::play_video(const string &pathname)
                         video_frame.received_timestamp = next_frame_start;
                         bool finished_wakeup = producer_thread_should_quit.sleep_until(next_frame_start);
                         if (finished_wakeup) {
-                               frame_callback(frame->pts, video_timebase, timecode++,
+                               if (audio_frame.len > 0) {
+                                       assert(audio_pts != -1);
+                               }
+                               frame_callback(frame->pts, video_timebase, audio_pts, audio_timebase, timecode++,
                                         video_frame, 0, video_format,
                                         audio_frame, 0, audio_format);
                                 break;
@@ -527,7 +532,9 @@ namespace {
  
  }  // namespace
  
-AVFrameWithDeleter FFmpegCapture::decode_frame(AVFormatContext *format_ctx, AVCodecContext *video_codec_ctx, AVCodecContext *audio_codec_ctx, const std::string &pathname, int video_stream_index, int audio_stream_index, FrameAllocator::Frame *audio_frame, AudioFormat *audio_format, bool *error)
+AVFrameWithDeleter FFmpegCapture::decode_frame(AVFormatContext *format_ctx, AVCodecContext *video_codec_ctx, AVCodecContext *audio_codec_ctx,
+       const std::string &pathname, int video_stream_index, int audio_stream_index,
+       FrameAllocator::Frame *audio_frame, AudioFormat *audio_format, int64_t *audio_pts, bool *error)
  {
         *error = false;
  
@@ -536,6 +543,7 @@ AVFrameWithDeleter FFmpegCapture::decode_frame(AVFormatContext *format_ctx, AVCo
         AVFrameWithDeleter audio_avframe = av_frame_alloc_unique();
         AVFrameWithDeleter video_avframe = av_frame_alloc_unique();
         bool eof = false;
+       *audio_pts = -1;
         do {
                 AVPacket pkt;
                 unique_ptr<AVPacket, decltype(av_packet_unref)*> pkt_cleanup(
@@ -554,6 +562,9 @@ AVFrameWithDeleter FFmpegCapture::decode_frame(AVFormatContext *format_ctx, AVCo
                                         return AVFrameWithDeleter(nullptr);
                                 }
                         } else if (pkt.stream_index == audio_stream_index) {
+                               if (*audio_pts == -1) {
+                                       *audio_pts = pkt.pts;
+                               }
                                 if (avcodec_send_packet(audio_codec_ctx, &pkt) < 0) {
                                         fprintf(stderr, "%s: Cannot send packet to audio codec.\n", pathname.c_str());
                                         *error = true;
@@ -565,17 +576,23 @@ AVFrameWithDeleter FFmpegCapture::decode_frame(AVFormatContext *format_ctx, AVCo
                 }
  
                 // Decode audio, if any.
-               int err = avcodec_receive_frame(audio_codec_ctx, audio_avframe.get());
-               if (err == 0) {
-                       convert_audio(audio_avframe.get(), audio_frame, audio_format);
-               } else if (err != AVERROR(EAGAIN)) {
-                       fprintf(stderr, "%s: Cannot receive frame from audio codec.\n", pathname.c_str());
-                       *error = true;
-                       return AVFrameWithDeleter(nullptr);
+               if (*audio_pts != -1) {
+                       for ( ;; ) {
+                               int err = avcodec_receive_frame(audio_codec_ctx, audio_avframe.get());
+                               if (err == 0) {
+                                       convert_audio(audio_avframe.get(), audio_frame, audio_format);
+                               } else if (err == AVERROR(EAGAIN)) {
+                                       break;
+                               } else {
+                                       fprintf(stderr, "%s: Cannot receive frame from audio codec.\n", pathname.c_str());
+                                       *error = true;
+                                       return AVFrameWithDeleter(nullptr);
+                               }
+                       }
                 }
  
                 // Decode video, if we have a frame.
-               err = avcodec_receive_frame(video_codec_ctx, video_avframe.get());
+               int err = avcodec_receive_frame(video_codec_ctx, video_avframe.get());
                 if (err == 0) {
                         frame_finished = true;
                         break;
diff --git a/ffmpeg_capture.h b/ffmpeg_capture.h

index eb377f7fe039b42ba2f029aa8e7ea42117f59252..c50771549b60670fa323eb67da7b1c8dfa93876c 100644 (file)
--- a/ffmpeg_capture.h
+++ b/ffmpeg_capture.h
@@ -106,7 +106,8 @@ public:
  
         // FFmpegCapture-specific overload of set_frame_callback that also gives
         // the raw original pts from the video. Negative pts means a dummy frame.
-       typedef std::function<void(int64_t pts, AVRational timebase, uint16_t timecode,
+       typedef std::function<void(int64_t video_pts, AVRational video_timebase, int64_t audio_pts, AVRational audio_timebase,
+                                  uint16_t timecode,
                                    bmusb::FrameAllocator::Frame video_frame, size_t video_offset, bmusb::VideoFormat video_format,
                                    bmusb::FrameAllocator::Frame audio_frame, size_t audio_offset, bmusb::AudioFormat audio_format)>
                 frame_callback_t;
@@ -119,13 +120,13 @@ public:
         {
                 frame_callback = std::bind(
                         callback,
-                       std::placeholders::_3,
-                       std::placeholders::_4,
                         std::placeholders::_5,
                         std::placeholders::_6,
                         std::placeholders::_7,
                         std::placeholders::_8,
-                       std::placeholders::_9);
+                       std::placeholders::_9,
+                       std::placeholders::_10,
+                       std::placeholders::_11);
         }
  
         // FFmpegCapture-specific callback that gives the raw audio.
@@ -198,7 +199,7 @@ private:
         // Returns nullptr if no frame was decoded (e.g. EOF).
         AVFrameWithDeleter decode_frame(AVFormatContext *format_ctx, AVCodecContext *video_codec_ctx, AVCodecContext *audio_codec_ctx,
                                         const std::string &pathname, int video_stream_index, int audio_stream_index,
-                                       bmusb::FrameAllocator::Frame *audio_frame, bmusb::AudioFormat *audio_format, bool *error);
+                                       bmusb::FrameAllocator::Frame *audio_frame, bmusb::AudioFormat *audio_format, int64_t *audio_pts, bool *error);
         void convert_audio(const AVFrame *audio_avframe, bmusb::FrameAllocator::Frame *audio_frame, bmusb::AudioFormat *audio_format);
  
         bmusb::VideoFormat construct_video_format(const AVFrame *frame, AVRational video_timebase);
diff --git a/flags.cpp b/flags.cpp

index 773750d961afb5ec735939345c9d8e0f382e9740..c65903abacda4fc7b98911f27dd165569b1bfcd8 100644 (file)
--- a/flags.cpp
+++ b/flags.cpp
@@ -33,6 +33,7 @@ enum LongOption {
         OPTION_HTTP_COARSE_TIMEBASE,
         OPTION_HTTP_AUDIO_CODEC,
         OPTION_HTTP_AUDIO_BITRATE,
+       OPTION_NO_TRANSCODE_AUDIO,
         OPTION_FLAT_AUDIO,
         OPTION_GAIN_STAGING,
         OPTION_DISABLE_LOCUT,
@@ -107,6 +108,10 @@ void usage(Program program)
         fprintf(stderr, "      --http-audio-bitrate=KBITS  audio codec bit rate to use for HTTP streams\n");
         fprintf(stderr, "                                  (default is %d, ignored unless --http-audio-codec is set)\n",
                 DEFAULT_AUDIO_OUTPUT_BIT_RATE / 1000);
+       if (program == PROGRAM_KAERU) {
+               fprintf(stderr, "      --no-transcode-audio        copy encoded audio raw from the source stream\n");
+               fprintf(stderr, "                                    (requires --http-audio-codec= to be set)\n");
+       }
         fprintf(stderr, "      --http-coarse-timebase      use less timebase for HTTP (recommended for muxers\n");
         fprintf(stderr, "                                  that handle large pts poorly, like e.g. MP4)\n");
         if (program == PROGRAM_NAGERU) {
@@ -182,6 +187,7 @@ void parse_flags(Program program, int argc, char * const argv[])
                 { "http-coarse-timebase", no_argument, 0, OPTION_HTTP_COARSE_TIMEBASE },
                 { "http-audio-codec", required_argument, 0, OPTION_HTTP_AUDIO_CODEC },
                 { "http-audio-bitrate", required_argument, 0, OPTION_HTTP_AUDIO_BITRATE },
+               { "no-transcode-audio", no_argument, 0, OPTION_NO_TRANSCODE_AUDIO },
                 { "flat-audio", no_argument, 0, OPTION_FLAT_AUDIO },
                 { "gain-staging", required_argument, 0, OPTION_GAIN_STAGING },
                 { "disable-locut", no_argument, 0, OPTION_DISABLE_LOCUT },
@@ -288,6 +294,9 @@ void parse_flags(Program program, int argc, char * const argv[])
                 case OPTION_HTTP_AUDIO_BITRATE:
                         global_flags.stream_audio_codec_bitrate = atoi(optarg) * 1000;
                         break;
+               case OPTION_NO_TRANSCODE_AUDIO:
+                       global_flags.transcode_audio = false;
+                       break;
                 case OPTION_HTTP_X264_VIDEO:
                         global_flags.x264_video_to_http = true;
                         break;
@@ -476,6 +485,11 @@ void parse_flags(Program program, int argc, char * const argv[])
                 fprintf(stderr, "ERROR: --output-card points to a nonexistant card\n");
                 exit(1);
         }
+       if (!global_flags.transcode_audio && global_flags.stream_audio_codec_name.empty()) {
+               fprintf(stderr, "ERROR: If not transcoding audio, you must specify ahead-of-time what audio codec is in use\n");
+               fprintf(stderr, "       (using --http-audio-codec).\n");
+               exit(1);
+       }
         if (global_flags.x264_speedcontrol) {
                 if (!global_flags.x264_preset.empty() && global_flags.x264_preset != "faster") {
                         fprintf(stderr, "WARNING: --x264-preset is overridden by --x264-speedcontrol (implicitly uses \"faster\" as base preset)\n");
diff --git a/flags.h b/flags.h

index 87b05b1f5379be2cdad3204b81d7af094d148bd2..e6bf08ecf576c4612d14e10f52dc4c671d63844e 100644 (file)
--- a/flags.h
+++ b/flags.h
@@ -59,6 +59,7 @@ struct Flags {
         bool ten_bit_input = false;
         bool ten_bit_output = false;  // Implies x264_video_to_disk == true and x264_bit_depth == 10.
         YCbCrInterpretation ycbcr_interpretation[MAX_VIDEO_CARDS];
+       bool transcode_audio = true;  // Kaeru only.
         int x264_bit_depth = 8;  // Not user-settable.
         bool use_zerocopy = false;  // Not user-settable.
         bool can_disable_srgb_decoder = false;  // Not user-settable.
diff --git a/kaeru.cpp b/kaeru.cpp

index bb1b08fe8840f0b3a1179ed2c81cf2811d7b40ce..a82ee51494fa9c29d45d548f9c01cf4dd2a4c132 100644 (file)
--- a/kaeru.cpp
+++ b/kaeru.cpp
@@ -64,15 +64,45 @@ unique_ptr<Mux> create_mux(HTTPD *httpd, AVOutputFormat *oformat, X264Encoder *x
         return mux;
  }
  
-void video_frame_callback(FFmpegCapture *video, X264Encoder *x264_encoder, int64_t pts, AVRational timebase, uint16_t timecode,
+void video_frame_callback(FFmpegCapture *video, X264Encoder *x264_encoder, AudioEncoder *audio_encoder,
+                          int64_t video_pts, AVRational video_timebase,
+                          int64_t audio_pts, AVRational audio_timebase,
+                          uint16_t timecode,
                           FrameAllocator::Frame video_frame, size_t video_offset, VideoFormat video_format,
                           FrameAllocator::Frame audio_frame, size_t audio_offset, AudioFormat audio_format)
  {
-       if (pts >= 0 && video_frame.len > 0) {
-               pts = av_rescale_q(pts, timebase, AVRational{ 1, TIMEBASE });
+       if (video_pts >= 0 && video_frame.len > 0) {
+               video_pts = av_rescale_q(video_pts, video_timebase, AVRational{ 1, TIMEBASE });
                 int64_t frame_duration = TIMEBASE * video_format.frame_rate_nom / video_format.frame_rate_den;
-               x264_encoder->add_frame(pts, frame_duration, video->get_current_frame_ycbcr_format().luma_coefficients, video_frame.data + video_offset, ReceivedTimestamps());
+               x264_encoder->add_frame(video_pts, frame_duration, video->get_current_frame_ycbcr_format().luma_coefficients, video_frame.data + video_offset, ReceivedTimestamps());
         }
+       if (audio_frame.len > 0) {
+               // FFmpegCapture takes care of this for us.
+               assert(audio_format.num_channels == 2);
+               assert(audio_format.sample_rate == OUTPUT_FREQUENCY);
+
+               // TODO: Reduce some duplication against AudioMixer here.
+               size_t num_samples = audio_frame.len / (audio_format.bits_per_sample / 8);
+               vector<float> float_samples;
+               float_samples.resize(num_samples);
+               if (audio_format.bits_per_sample == 16) {
+                       const int16_t *src = (const int16_t *)audio_frame.data;
+                       float *dst = &float_samples[0];
+                       for (size_t i = 0; i < num_samples; ++i) {
+                               *dst++ = le16toh(*src++) * (1.0f / 32768.0f);
+                       }
+               } else if (audio_format.bits_per_sample == 32) {
+                       const int32_t *src = (const int32_t *)audio_frame.data;
+                       float *dst = &float_samples[0];
+                       for (size_t i = 0; i < num_samples; ++i) {
+                               *dst++ = le32toh(*src++) * (1.0f / 2147483648.0f);
+                       }
+               } else {
+                       assert(false);
+               }
+               audio_pts = av_rescale_q(audio_pts, audio_timebase, AVRational{ 1, TIMEBASE });
+               audio_encoder->encode_audio(float_samples, audio_pts);
+        }
  
         if (video_frame.owner) {
                 video_frame.owner->release_frame(video_frame);
@@ -104,20 +134,27 @@ int main(int argc, char *argv[])
         assert(oformat != nullptr);
  
         unique_ptr<AudioEncoder> audio_encoder;
-       if (global_flags.stream_audio_codec_name.empty()) {
-               audio_encoder.reset(new AudioEncoder(AUDIO_OUTPUT_CODEC_NAME, DEFAULT_AUDIO_OUTPUT_BIT_RATE, oformat));
-       } else {
-               audio_encoder.reset(new AudioEncoder(global_flags.stream_audio_codec_name, global_flags.stream_audio_codec_bitrate, oformat));
+       if (global_flags.transcode_audio) {
+               if (global_flags.stream_audio_codec_name.empty()) {
+                       audio_encoder.reset(new AudioEncoder(AUDIO_OUTPUT_CODEC_NAME, DEFAULT_AUDIO_OUTPUT_BIT_RATE, oformat));
+               } else {
+                       audio_encoder.reset(new AudioEncoder(global_flags.stream_audio_codec_name, global_flags.stream_audio_codec_bitrate, oformat));
+               }
         }
  
         X264Encoder x264_encoder(oformat);
         unique_ptr<Mux> http_mux = create_mux(&httpd, oformat, &x264_encoder, audio_encoder.get());
+       if (global_flags.transcode_audio) {
+               audio_encoder->add_mux(http_mux.get());
+       }
         x264_encoder.add_mux(http_mux.get());
  
         FFmpegCapture video(argv[optind], global_flags.width, global_flags.height);
         video.set_pixel_format(FFmpegCapture::PixelFormat_NV12);
-       video.set_frame_callback(bind(video_frame_callback, &video, &x264_encoder, _1, _2, _3, _4, _5, _6, _7, _8, _9));
-       video.set_audio_callback(bind(audio_frame_callback, http_mux.get(), _1, _2));
+       video.set_frame_callback(bind(video_frame_callback, &video, &x264_encoder, audio_encoder.get(), _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11));
+       if (!global_flags.transcode_audio) {
+               video.set_audio_callback(bind(audio_frame_callback, http_mux.get(), _1, _2));
+       }
         video.configure_card();
         video.start_bm_capture();
         video.change_rate(2.0);  // Be sure never to really fall behind, but also don't dump huge amounts of stuff onto x264.
author	Steinar H. Gunderson <sgunderson@bigfoot.com>
	Sat, 8 Jul 2017 16:43:39 +0000 (18:43 +0200)
committer	Steinar H. Gunderson <sgunderson@bigfoot.com>
	Sat, 8 Jul 2017 16:43:39 +0000 (18:43 +0200)
ffmpeg_capture.cpp		patch \| blob \| history
ffmpeg_capture.h		patch \| blob \| history
flags.cpp		patch \| blob \| history
flags.h		patch \| blob \| history
kaeru.cpp		patch \| blob \| history