From b0ce4383b7d64760bbfccf4e0e769b293f0db0cd Mon Sep 17 00:00:00 2001 From: "Steinar H. Gunderson" Date: Sat, 8 Jul 2017 18:43:39 +0200 Subject: [PATCH] Add support for transcoding the audio in Kaeru (on by default). --- ffmpeg_capture.cpp | 43 +++++++++++++++++++++++----------- ffmpeg_capture.h | 11 +++++---- flags.cpp | 14 ++++++++++++ flags.h | 1 + kaeru.cpp | 57 ++++++++++++++++++++++++++++++++++++++-------- 5 files changed, 98 insertions(+), 28 deletions(-) diff --git a/ffmpeg_capture.cpp b/ffmpeg_capture.cpp index 5e6104f..3de7d5e 100644 --- a/ffmpeg_capture.cpp +++ b/ffmpeg_capture.cpp @@ -228,7 +228,8 @@ void FFmpegCapture::configure_card() set_video_frame_allocator(owned_video_frame_allocator.get()); } if (audio_frame_allocator == nullptr) { - owned_audio_frame_allocator.reset(new MallocFrameAllocator(65536, NUM_QUEUED_AUDIO_FRAMES)); + // Audio can come out in pretty large chunks, so increase from the default 1 MB. + owned_audio_frame_allocator.reset(new MallocFrameAllocator(1 << 20, NUM_QUEUED_AUDIO_FRAMES)); set_audio_frame_allocator(owned_audio_frame_allocator.get()); } } @@ -319,7 +320,7 @@ void FFmpegCapture::send_disconnected_frame() video_frame.len = width * height * 4; memset(video_frame.data, 0, video_frame.len); - frame_callback(-1, AVRational{1, TIMEBASE}, timecode++, + frame_callback(-1, AVRational{1, TIMEBASE}, -1, AVRational{1, TIMEBASE}, timecode++, video_frame, /*video_offset=*/0, video_format, FrameAllocator::Frame(), /*audio_offset=*/0, AudioFormat()); } @@ -410,9 +411,10 @@ bool FFmpegCapture::play_video(const string &pathname) FrameAllocator::Frame audio_frame = audio_frame_allocator->alloc_frame(); AudioFormat audio_format; + int64_t audio_pts; bool error; AVFrameWithDeleter frame = decode_frame(format_ctx.get(), video_codec_ctx.get(), audio_codec_ctx.get(), - pathname, video_stream_index, audio_stream_index, &audio_frame, &audio_format, &error); + pathname, video_stream_index, audio_stream_index, &audio_frame, &audio_format, &audio_pts, &error); if (error) { return false; } @@ -447,7 +449,10 @@ bool FFmpegCapture::play_video(const string &pathname) video_frame.received_timestamp = next_frame_start; bool finished_wakeup = producer_thread_should_quit.sleep_until(next_frame_start); if (finished_wakeup) { - frame_callback(frame->pts, video_timebase, timecode++, + if (audio_frame.len > 0) { + assert(audio_pts != -1); + } + frame_callback(frame->pts, video_timebase, audio_pts, audio_timebase, timecode++, video_frame, 0, video_format, audio_frame, 0, audio_format); break; @@ -527,7 +532,9 @@ namespace { } // namespace -AVFrameWithDeleter FFmpegCapture::decode_frame(AVFormatContext *format_ctx, AVCodecContext *video_codec_ctx, AVCodecContext *audio_codec_ctx, const std::string &pathname, int video_stream_index, int audio_stream_index, FrameAllocator::Frame *audio_frame, AudioFormat *audio_format, bool *error) +AVFrameWithDeleter FFmpegCapture::decode_frame(AVFormatContext *format_ctx, AVCodecContext *video_codec_ctx, AVCodecContext *audio_codec_ctx, + const std::string &pathname, int video_stream_index, int audio_stream_index, + FrameAllocator::Frame *audio_frame, AudioFormat *audio_format, int64_t *audio_pts, bool *error) { *error = false; @@ -536,6 +543,7 @@ AVFrameWithDeleter FFmpegCapture::decode_frame(AVFormatContext *format_ctx, AVCo AVFrameWithDeleter audio_avframe = av_frame_alloc_unique(); AVFrameWithDeleter video_avframe = av_frame_alloc_unique(); bool eof = false; + *audio_pts = -1; do { AVPacket pkt; unique_ptr pkt_cleanup( @@ -554,6 +562,9 @@ AVFrameWithDeleter FFmpegCapture::decode_frame(AVFormatContext *format_ctx, AVCo return AVFrameWithDeleter(nullptr); } } else if (pkt.stream_index == audio_stream_index) { + if (*audio_pts == -1) { + *audio_pts = pkt.pts; + } if (avcodec_send_packet(audio_codec_ctx, &pkt) < 0) { fprintf(stderr, "%s: Cannot send packet to audio codec.\n", pathname.c_str()); *error = true; @@ -565,17 +576,23 @@ AVFrameWithDeleter FFmpegCapture::decode_frame(AVFormatContext *format_ctx, AVCo } // Decode audio, if any. - int err = avcodec_receive_frame(audio_codec_ctx, audio_avframe.get()); - if (err == 0) { - convert_audio(audio_avframe.get(), audio_frame, audio_format); - } else if (err != AVERROR(EAGAIN)) { - fprintf(stderr, "%s: Cannot receive frame from audio codec.\n", pathname.c_str()); - *error = true; - return AVFrameWithDeleter(nullptr); + if (*audio_pts != -1) { + for ( ;; ) { + int err = avcodec_receive_frame(audio_codec_ctx, audio_avframe.get()); + if (err == 0) { + convert_audio(audio_avframe.get(), audio_frame, audio_format); + } else if (err == AVERROR(EAGAIN)) { + break; + } else { + fprintf(stderr, "%s: Cannot receive frame from audio codec.\n", pathname.c_str()); + *error = true; + return AVFrameWithDeleter(nullptr); + } + } } // Decode video, if we have a frame. - err = avcodec_receive_frame(video_codec_ctx, video_avframe.get()); + int err = avcodec_receive_frame(video_codec_ctx, video_avframe.get()); if (err == 0) { frame_finished = true; break; diff --git a/ffmpeg_capture.h b/ffmpeg_capture.h index eb377f7..c507715 100644 --- a/ffmpeg_capture.h +++ b/ffmpeg_capture.h @@ -106,7 +106,8 @@ public: // FFmpegCapture-specific overload of set_frame_callback that also gives // the raw original pts from the video. Negative pts means a dummy frame. - typedef std::function frame_callback_t; @@ -119,13 +120,13 @@ public: { frame_callback = std::bind( callback, - std::placeholders::_3, - std::placeholders::_4, std::placeholders::_5, std::placeholders::_6, std::placeholders::_7, std::placeholders::_8, - std::placeholders::_9); + std::placeholders::_9, + std::placeholders::_10, + std::placeholders::_11); } // FFmpegCapture-specific callback that gives the raw audio. @@ -198,7 +199,7 @@ private: // Returns nullptr if no frame was decoded (e.g. EOF). AVFrameWithDeleter decode_frame(AVFormatContext *format_ctx, AVCodecContext *video_codec_ctx, AVCodecContext *audio_codec_ctx, const std::string &pathname, int video_stream_index, int audio_stream_index, - bmusb::FrameAllocator::Frame *audio_frame, bmusb::AudioFormat *audio_format, bool *error); + bmusb::FrameAllocator::Frame *audio_frame, bmusb::AudioFormat *audio_format, int64_t *audio_pts, bool *error); void convert_audio(const AVFrame *audio_avframe, bmusb::FrameAllocator::Frame *audio_frame, bmusb::AudioFormat *audio_format); bmusb::VideoFormat construct_video_format(const AVFrame *frame, AVRational video_timebase); diff --git a/flags.cpp b/flags.cpp index 773750d..c65903a 100644 --- a/flags.cpp +++ b/flags.cpp @@ -33,6 +33,7 @@ enum LongOption { OPTION_HTTP_COARSE_TIMEBASE, OPTION_HTTP_AUDIO_CODEC, OPTION_HTTP_AUDIO_BITRATE, + OPTION_NO_TRANSCODE_AUDIO, OPTION_FLAT_AUDIO, OPTION_GAIN_STAGING, OPTION_DISABLE_LOCUT, @@ -107,6 +108,10 @@ void usage(Program program) fprintf(stderr, " --http-audio-bitrate=KBITS audio codec bit rate to use for HTTP streams\n"); fprintf(stderr, " (default is %d, ignored unless --http-audio-codec is set)\n", DEFAULT_AUDIO_OUTPUT_BIT_RATE / 1000); + if (program == PROGRAM_KAERU) { + fprintf(stderr, " --no-transcode-audio copy encoded audio raw from the source stream\n"); + fprintf(stderr, " (requires --http-audio-codec= to be set)\n"); + } fprintf(stderr, " --http-coarse-timebase use less timebase for HTTP (recommended for muxers\n"); fprintf(stderr, " that handle large pts poorly, like e.g. MP4)\n"); if (program == PROGRAM_NAGERU) { @@ -182,6 +187,7 @@ void parse_flags(Program program, int argc, char * const argv[]) { "http-coarse-timebase", no_argument, 0, OPTION_HTTP_COARSE_TIMEBASE }, { "http-audio-codec", required_argument, 0, OPTION_HTTP_AUDIO_CODEC }, { "http-audio-bitrate", required_argument, 0, OPTION_HTTP_AUDIO_BITRATE }, + { "no-transcode-audio", no_argument, 0, OPTION_NO_TRANSCODE_AUDIO }, { "flat-audio", no_argument, 0, OPTION_FLAT_AUDIO }, { "gain-staging", required_argument, 0, OPTION_GAIN_STAGING }, { "disable-locut", no_argument, 0, OPTION_DISABLE_LOCUT }, @@ -288,6 +294,9 @@ void parse_flags(Program program, int argc, char * const argv[]) case OPTION_HTTP_AUDIO_BITRATE: global_flags.stream_audio_codec_bitrate = atoi(optarg) * 1000; break; + case OPTION_NO_TRANSCODE_AUDIO: + global_flags.transcode_audio = false; + break; case OPTION_HTTP_X264_VIDEO: global_flags.x264_video_to_http = true; break; @@ -476,6 +485,11 @@ void parse_flags(Program program, int argc, char * const argv[]) fprintf(stderr, "ERROR: --output-card points to a nonexistant card\n"); exit(1); } + if (!global_flags.transcode_audio && global_flags.stream_audio_codec_name.empty()) { + fprintf(stderr, "ERROR: If not transcoding audio, you must specify ahead-of-time what audio codec is in use\n"); + fprintf(stderr, " (using --http-audio-codec).\n"); + exit(1); + } if (global_flags.x264_speedcontrol) { if (!global_flags.x264_preset.empty() && global_flags.x264_preset != "faster") { fprintf(stderr, "WARNING: --x264-preset is overridden by --x264-speedcontrol (implicitly uses \"faster\" as base preset)\n"); diff --git a/flags.h b/flags.h index 87b05b1..e6bf08e 100644 --- a/flags.h +++ b/flags.h @@ -59,6 +59,7 @@ struct Flags { bool ten_bit_input = false; bool ten_bit_output = false; // Implies x264_video_to_disk == true and x264_bit_depth == 10. YCbCrInterpretation ycbcr_interpretation[MAX_VIDEO_CARDS]; + bool transcode_audio = true; // Kaeru only. int x264_bit_depth = 8; // Not user-settable. bool use_zerocopy = false; // Not user-settable. bool can_disable_srgb_decoder = false; // Not user-settable. diff --git a/kaeru.cpp b/kaeru.cpp index bb1b08f..a82ee51 100644 --- a/kaeru.cpp +++ b/kaeru.cpp @@ -64,15 +64,45 @@ unique_ptr create_mux(HTTPD *httpd, AVOutputFormat *oformat, X264Encoder *x return mux; } -void video_frame_callback(FFmpegCapture *video, X264Encoder *x264_encoder, int64_t pts, AVRational timebase, uint16_t timecode, +void video_frame_callback(FFmpegCapture *video, X264Encoder *x264_encoder, AudioEncoder *audio_encoder, + int64_t video_pts, AVRational video_timebase, + int64_t audio_pts, AVRational audio_timebase, + uint16_t timecode, FrameAllocator::Frame video_frame, size_t video_offset, VideoFormat video_format, FrameAllocator::Frame audio_frame, size_t audio_offset, AudioFormat audio_format) { - if (pts >= 0 && video_frame.len > 0) { - pts = av_rescale_q(pts, timebase, AVRational{ 1, TIMEBASE }); + if (video_pts >= 0 && video_frame.len > 0) { + video_pts = av_rescale_q(video_pts, video_timebase, AVRational{ 1, TIMEBASE }); int64_t frame_duration = TIMEBASE * video_format.frame_rate_nom / video_format.frame_rate_den; - x264_encoder->add_frame(pts, frame_duration, video->get_current_frame_ycbcr_format().luma_coefficients, video_frame.data + video_offset, ReceivedTimestamps()); + x264_encoder->add_frame(video_pts, frame_duration, video->get_current_frame_ycbcr_format().luma_coefficients, video_frame.data + video_offset, ReceivedTimestamps()); } + if (audio_frame.len > 0) { + // FFmpegCapture takes care of this for us. + assert(audio_format.num_channels == 2); + assert(audio_format.sample_rate == OUTPUT_FREQUENCY); + + // TODO: Reduce some duplication against AudioMixer here. + size_t num_samples = audio_frame.len / (audio_format.bits_per_sample / 8); + vector float_samples; + float_samples.resize(num_samples); + if (audio_format.bits_per_sample == 16) { + const int16_t *src = (const int16_t *)audio_frame.data; + float *dst = &float_samples[0]; + for (size_t i = 0; i < num_samples; ++i) { + *dst++ = le16toh(*src++) * (1.0f / 32768.0f); + } + } else if (audio_format.bits_per_sample == 32) { + const int32_t *src = (const int32_t *)audio_frame.data; + float *dst = &float_samples[0]; + for (size_t i = 0; i < num_samples; ++i) { + *dst++ = le32toh(*src++) * (1.0f / 2147483648.0f); + } + } else { + assert(false); + } + audio_pts = av_rescale_q(audio_pts, audio_timebase, AVRational{ 1, TIMEBASE }); + audio_encoder->encode_audio(float_samples, audio_pts); + } if (video_frame.owner) { video_frame.owner->release_frame(video_frame); @@ -104,20 +134,27 @@ int main(int argc, char *argv[]) assert(oformat != nullptr); unique_ptr audio_encoder; - if (global_flags.stream_audio_codec_name.empty()) { - audio_encoder.reset(new AudioEncoder(AUDIO_OUTPUT_CODEC_NAME, DEFAULT_AUDIO_OUTPUT_BIT_RATE, oformat)); - } else { - audio_encoder.reset(new AudioEncoder(global_flags.stream_audio_codec_name, global_flags.stream_audio_codec_bitrate, oformat)); + if (global_flags.transcode_audio) { + if (global_flags.stream_audio_codec_name.empty()) { + audio_encoder.reset(new AudioEncoder(AUDIO_OUTPUT_CODEC_NAME, DEFAULT_AUDIO_OUTPUT_BIT_RATE, oformat)); + } else { + audio_encoder.reset(new AudioEncoder(global_flags.stream_audio_codec_name, global_flags.stream_audio_codec_bitrate, oformat)); + } } X264Encoder x264_encoder(oformat); unique_ptr http_mux = create_mux(&httpd, oformat, &x264_encoder, audio_encoder.get()); + if (global_flags.transcode_audio) { + audio_encoder->add_mux(http_mux.get()); + } x264_encoder.add_mux(http_mux.get()); FFmpegCapture video(argv[optind], global_flags.width, global_flags.height); video.set_pixel_format(FFmpegCapture::PixelFormat_NV12); - video.set_frame_callback(bind(video_frame_callback, &video, &x264_encoder, _1, _2, _3, _4, _5, _6, _7, _8, _9)); - video.set_audio_callback(bind(audio_frame_callback, http_mux.get(), _1, _2)); + video.set_frame_callback(bind(video_frame_callback, &video, &x264_encoder, audio_encoder.get(), _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11)); + if (!global_flags.transcode_audio) { + video.set_audio_callback(bind(audio_frame_callback, http_mux.get(), _1, _2)); + } video.configure_card(); video.start_bm_capture(); video.change_rate(2.0); // Be sure never to really fall behind, but also don't dump huge amounts of stuff onto x264. -- 2.39.2