]> git.sesse.net Git - nageru/commitdiff
Add support for transcoding the audio in Kaeru (on by default).
authorSteinar H. Gunderson <sgunderson@bigfoot.com>
Sat, 8 Jul 2017 16:43:39 +0000 (18:43 +0200)
committerSteinar H. Gunderson <sgunderson@bigfoot.com>
Sat, 8 Jul 2017 16:43:39 +0000 (18:43 +0200)
ffmpeg_capture.cpp
ffmpeg_capture.h
flags.cpp
flags.h
kaeru.cpp

index 5e6104f82c628be57cf543ee06599ac639ff3f6a..3de7d5efa056e77708c5b0bd7925c4dbfea1ead4 100644 (file)
@@ -228,7 +228,8 @@ void FFmpegCapture::configure_card()
                set_video_frame_allocator(owned_video_frame_allocator.get());
        }
        if (audio_frame_allocator == nullptr) {
-               owned_audio_frame_allocator.reset(new MallocFrameAllocator(65536, NUM_QUEUED_AUDIO_FRAMES));
+               // Audio can come out in pretty large chunks, so increase from the default 1 MB.
+               owned_audio_frame_allocator.reset(new MallocFrameAllocator(1 << 20, NUM_QUEUED_AUDIO_FRAMES));
                set_audio_frame_allocator(owned_audio_frame_allocator.get());
        }
 }
@@ -319,7 +320,7 @@ void FFmpegCapture::send_disconnected_frame()
                video_frame.len = width * height * 4;
                memset(video_frame.data, 0, video_frame.len);
 
-               frame_callback(-1, AVRational{1, TIMEBASE}, timecode++,
+               frame_callback(-1, AVRational{1, TIMEBASE}, -1, AVRational{1, TIMEBASE}, timecode++,
                        video_frame, /*video_offset=*/0, video_format,
                        FrameAllocator::Frame(), /*audio_offset=*/0, AudioFormat());
        }
@@ -410,9 +411,10 @@ bool FFmpegCapture::play_video(const string &pathname)
                FrameAllocator::Frame audio_frame = audio_frame_allocator->alloc_frame();
                AudioFormat audio_format;
 
+               int64_t audio_pts;
                bool error;
                AVFrameWithDeleter frame = decode_frame(format_ctx.get(), video_codec_ctx.get(), audio_codec_ctx.get(),
-                       pathname, video_stream_index, audio_stream_index, &audio_frame, &audio_format, &error);
+                       pathname, video_stream_index, audio_stream_index, &audio_frame, &audio_format, &audio_pts, &error);
                if (error) {
                        return false;
                }
@@ -447,7 +449,10 @@ bool FFmpegCapture::play_video(const string &pathname)
                        video_frame.received_timestamp = next_frame_start;
                        bool finished_wakeup = producer_thread_should_quit.sleep_until(next_frame_start);
                        if (finished_wakeup) {
-                               frame_callback(frame->pts, video_timebase, timecode++,
+                               if (audio_frame.len > 0) {
+                                       assert(audio_pts != -1);
+                               }
+                               frame_callback(frame->pts, video_timebase, audio_pts, audio_timebase, timecode++,
                                        video_frame, 0, video_format,
                                        audio_frame, 0, audio_format);
                                break;
@@ -527,7 +532,9 @@ namespace {
 
 }  // namespace
 
-AVFrameWithDeleter FFmpegCapture::decode_frame(AVFormatContext *format_ctx, AVCodecContext *video_codec_ctx, AVCodecContext *audio_codec_ctx, const std::string &pathname, int video_stream_index, int audio_stream_index, FrameAllocator::Frame *audio_frame, AudioFormat *audio_format, bool *error)
+AVFrameWithDeleter FFmpegCapture::decode_frame(AVFormatContext *format_ctx, AVCodecContext *video_codec_ctx, AVCodecContext *audio_codec_ctx,
+       const std::string &pathname, int video_stream_index, int audio_stream_index,
+       FrameAllocator::Frame *audio_frame, AudioFormat *audio_format, int64_t *audio_pts, bool *error)
 {
        *error = false;
 
@@ -536,6 +543,7 @@ AVFrameWithDeleter FFmpegCapture::decode_frame(AVFormatContext *format_ctx, AVCo
        AVFrameWithDeleter audio_avframe = av_frame_alloc_unique();
        AVFrameWithDeleter video_avframe = av_frame_alloc_unique();
        bool eof = false;
+       *audio_pts = -1;
        do {
                AVPacket pkt;
                unique_ptr<AVPacket, decltype(av_packet_unref)*> pkt_cleanup(
@@ -554,6 +562,9 @@ AVFrameWithDeleter FFmpegCapture::decode_frame(AVFormatContext *format_ctx, AVCo
                                        return AVFrameWithDeleter(nullptr);
                                }
                        } else if (pkt.stream_index == audio_stream_index) {
+                               if (*audio_pts == -1) {
+                                       *audio_pts = pkt.pts;
+                               }
                                if (avcodec_send_packet(audio_codec_ctx, &pkt) < 0) {
                                        fprintf(stderr, "%s: Cannot send packet to audio codec.\n", pathname.c_str());
                                        *error = true;
@@ -565,17 +576,23 @@ AVFrameWithDeleter FFmpegCapture::decode_frame(AVFormatContext *format_ctx, AVCo
                }
 
                // Decode audio, if any.
-               int err = avcodec_receive_frame(audio_codec_ctx, audio_avframe.get());
-               if (err == 0) {
-                       convert_audio(audio_avframe.get(), audio_frame, audio_format);
-               } else if (err != AVERROR(EAGAIN)) {
-                       fprintf(stderr, "%s: Cannot receive frame from audio codec.\n", pathname.c_str());
-                       *error = true;
-                       return AVFrameWithDeleter(nullptr);
+               if (*audio_pts != -1) {
+                       for ( ;; ) {
+                               int err = avcodec_receive_frame(audio_codec_ctx, audio_avframe.get());
+                               if (err == 0) {
+                                       convert_audio(audio_avframe.get(), audio_frame, audio_format);
+                               } else if (err == AVERROR(EAGAIN)) {
+                                       break;
+                               } else {
+                                       fprintf(stderr, "%s: Cannot receive frame from audio codec.\n", pathname.c_str());
+                                       *error = true;
+                                       return AVFrameWithDeleter(nullptr);
+                               }
+                       }
                }
 
                // Decode video, if we have a frame.
-               err = avcodec_receive_frame(video_codec_ctx, video_avframe.get());
+               int err = avcodec_receive_frame(video_codec_ctx, video_avframe.get());
                if (err == 0) {
                        frame_finished = true;
                        break;
index eb377f7fe039b42ba2f029aa8e7ea42117f59252..c50771549b60670fa323eb67da7b1c8dfa93876c 100644 (file)
@@ -106,7 +106,8 @@ public:
 
        // FFmpegCapture-specific overload of set_frame_callback that also gives
        // the raw original pts from the video. Negative pts means a dummy frame.
-       typedef std::function<void(int64_t pts, AVRational timebase, uint16_t timecode,
+       typedef std::function<void(int64_t video_pts, AVRational video_timebase, int64_t audio_pts, AVRational audio_timebase,
+                                  uint16_t timecode,
                                   bmusb::FrameAllocator::Frame video_frame, size_t video_offset, bmusb::VideoFormat video_format,
                                   bmusb::FrameAllocator::Frame audio_frame, size_t audio_offset, bmusb::AudioFormat audio_format)>
                frame_callback_t;
@@ -119,13 +120,13 @@ public:
        {
                frame_callback = std::bind(
                        callback,
-                       std::placeholders::_3,
-                       std::placeholders::_4,
                        std::placeholders::_5,
                        std::placeholders::_6,
                        std::placeholders::_7,
                        std::placeholders::_8,
-                       std::placeholders::_9);
+                       std::placeholders::_9,
+                       std::placeholders::_10,
+                       std::placeholders::_11);
        }
 
        // FFmpegCapture-specific callback that gives the raw audio.
@@ -198,7 +199,7 @@ private:
        // Returns nullptr if no frame was decoded (e.g. EOF).
        AVFrameWithDeleter decode_frame(AVFormatContext *format_ctx, AVCodecContext *video_codec_ctx, AVCodecContext *audio_codec_ctx,
                                        const std::string &pathname, int video_stream_index, int audio_stream_index,
-                                       bmusb::FrameAllocator::Frame *audio_frame, bmusb::AudioFormat *audio_format, bool *error);
+                                       bmusb::FrameAllocator::Frame *audio_frame, bmusb::AudioFormat *audio_format, int64_t *audio_pts, bool *error);
        void convert_audio(const AVFrame *audio_avframe, bmusb::FrameAllocator::Frame *audio_frame, bmusb::AudioFormat *audio_format);
 
        bmusb::VideoFormat construct_video_format(const AVFrame *frame, AVRational video_timebase);
index 773750d961afb5ec735939345c9d8e0f382e9740..c65903abacda4fc7b98911f27dd165569b1bfcd8 100644 (file)
--- a/flags.cpp
+++ b/flags.cpp
@@ -33,6 +33,7 @@ enum LongOption {
        OPTION_HTTP_COARSE_TIMEBASE,
        OPTION_HTTP_AUDIO_CODEC,
        OPTION_HTTP_AUDIO_BITRATE,
+       OPTION_NO_TRANSCODE_AUDIO,
        OPTION_FLAT_AUDIO,
        OPTION_GAIN_STAGING,
        OPTION_DISABLE_LOCUT,
@@ -107,6 +108,10 @@ void usage(Program program)
        fprintf(stderr, "      --http-audio-bitrate=KBITS  audio codec bit rate to use for HTTP streams\n");
        fprintf(stderr, "                                  (default is %d, ignored unless --http-audio-codec is set)\n",
                DEFAULT_AUDIO_OUTPUT_BIT_RATE / 1000);
+       if (program == PROGRAM_KAERU) {
+               fprintf(stderr, "      --no-transcode-audio        copy encoded audio raw from the source stream\n");
+               fprintf(stderr, "                                    (requires --http-audio-codec= to be set)\n");
+       }
        fprintf(stderr, "      --http-coarse-timebase      use less timebase for HTTP (recommended for muxers\n");
        fprintf(stderr, "                                  that handle large pts poorly, like e.g. MP4)\n");
        if (program == PROGRAM_NAGERU) {
@@ -182,6 +187,7 @@ void parse_flags(Program program, int argc, char * const argv[])
                { "http-coarse-timebase", no_argument, 0, OPTION_HTTP_COARSE_TIMEBASE },
                { "http-audio-codec", required_argument, 0, OPTION_HTTP_AUDIO_CODEC },
                { "http-audio-bitrate", required_argument, 0, OPTION_HTTP_AUDIO_BITRATE },
+               { "no-transcode-audio", no_argument, 0, OPTION_NO_TRANSCODE_AUDIO },
                { "flat-audio", no_argument, 0, OPTION_FLAT_AUDIO },
                { "gain-staging", required_argument, 0, OPTION_GAIN_STAGING },
                { "disable-locut", no_argument, 0, OPTION_DISABLE_LOCUT },
@@ -288,6 +294,9 @@ void parse_flags(Program program, int argc, char * const argv[])
                case OPTION_HTTP_AUDIO_BITRATE:
                        global_flags.stream_audio_codec_bitrate = atoi(optarg) * 1000;
                        break;
+               case OPTION_NO_TRANSCODE_AUDIO:
+                       global_flags.transcode_audio = false;
+                       break;
                case OPTION_HTTP_X264_VIDEO:
                        global_flags.x264_video_to_http = true;
                        break;
@@ -476,6 +485,11 @@ void parse_flags(Program program, int argc, char * const argv[])
                fprintf(stderr, "ERROR: --output-card points to a nonexistant card\n");
                exit(1);
        }
+       if (!global_flags.transcode_audio && global_flags.stream_audio_codec_name.empty()) {
+               fprintf(stderr, "ERROR: If not transcoding audio, you must specify ahead-of-time what audio codec is in use\n");
+               fprintf(stderr, "       (using --http-audio-codec).\n");
+               exit(1);
+       }
        if (global_flags.x264_speedcontrol) {
                if (!global_flags.x264_preset.empty() && global_flags.x264_preset != "faster") {
                        fprintf(stderr, "WARNING: --x264-preset is overridden by --x264-speedcontrol (implicitly uses \"faster\" as base preset)\n");
diff --git a/flags.h b/flags.h
index 87b05b1f5379be2cdad3204b81d7af094d148bd2..e6bf08ecf576c4612d14e10f52dc4c671d63844e 100644 (file)
--- a/flags.h
+++ b/flags.h
@@ -59,6 +59,7 @@ struct Flags {
        bool ten_bit_input = false;
        bool ten_bit_output = false;  // Implies x264_video_to_disk == true and x264_bit_depth == 10.
        YCbCrInterpretation ycbcr_interpretation[MAX_VIDEO_CARDS];
+       bool transcode_audio = true;  // Kaeru only.
        int x264_bit_depth = 8;  // Not user-settable.
        bool use_zerocopy = false;  // Not user-settable.
        bool can_disable_srgb_decoder = false;  // Not user-settable.
index bb1b08fe8840f0b3a1179ed2c81cf2811d7b40ce..a82ee51494fa9c29d45d548f9c01cf4dd2a4c132 100644 (file)
--- a/kaeru.cpp
+++ b/kaeru.cpp
@@ -64,15 +64,45 @@ unique_ptr<Mux> create_mux(HTTPD *httpd, AVOutputFormat *oformat, X264Encoder *x
        return mux;
 }
 
-void video_frame_callback(FFmpegCapture *video, X264Encoder *x264_encoder, int64_t pts, AVRational timebase, uint16_t timecode,
+void video_frame_callback(FFmpegCapture *video, X264Encoder *x264_encoder, AudioEncoder *audio_encoder,
+                          int64_t video_pts, AVRational video_timebase,
+                          int64_t audio_pts, AVRational audio_timebase,
+                          uint16_t timecode,
                          FrameAllocator::Frame video_frame, size_t video_offset, VideoFormat video_format,
                          FrameAllocator::Frame audio_frame, size_t audio_offset, AudioFormat audio_format)
 {
-       if (pts >= 0 && video_frame.len > 0) {
-               pts = av_rescale_q(pts, timebase, AVRational{ 1, TIMEBASE });
+       if (video_pts >= 0 && video_frame.len > 0) {
+               video_pts = av_rescale_q(video_pts, video_timebase, AVRational{ 1, TIMEBASE });
                int64_t frame_duration = TIMEBASE * video_format.frame_rate_nom / video_format.frame_rate_den;
-               x264_encoder->add_frame(pts, frame_duration, video->get_current_frame_ycbcr_format().luma_coefficients, video_frame.data + video_offset, ReceivedTimestamps());
+               x264_encoder->add_frame(video_pts, frame_duration, video->get_current_frame_ycbcr_format().luma_coefficients, video_frame.data + video_offset, ReceivedTimestamps());
        }
+       if (audio_frame.len > 0) {
+               // FFmpegCapture takes care of this for us.
+               assert(audio_format.num_channels == 2);
+               assert(audio_format.sample_rate == OUTPUT_FREQUENCY);
+
+               // TODO: Reduce some duplication against AudioMixer here.
+               size_t num_samples = audio_frame.len / (audio_format.bits_per_sample / 8);
+               vector<float> float_samples;
+               float_samples.resize(num_samples);
+               if (audio_format.bits_per_sample == 16) {
+                       const int16_t *src = (const int16_t *)audio_frame.data;
+                       float *dst = &float_samples[0];
+                       for (size_t i = 0; i < num_samples; ++i) {
+                               *dst++ = le16toh(*src++) * (1.0f / 32768.0f);
+                       }
+               } else if (audio_format.bits_per_sample == 32) {
+                       const int32_t *src = (const int32_t *)audio_frame.data;
+                       float *dst = &float_samples[0];
+                       for (size_t i = 0; i < num_samples; ++i) {
+                               *dst++ = le32toh(*src++) * (1.0f / 2147483648.0f);
+                       }
+               } else {
+                       assert(false);
+               }
+               audio_pts = av_rescale_q(audio_pts, audio_timebase, AVRational{ 1, TIMEBASE });
+               audio_encoder->encode_audio(float_samples, audio_pts);
+        }
 
        if (video_frame.owner) {
                video_frame.owner->release_frame(video_frame);
@@ -104,20 +134,27 @@ int main(int argc, char *argv[])
        assert(oformat != nullptr);
 
        unique_ptr<AudioEncoder> audio_encoder;
-       if (global_flags.stream_audio_codec_name.empty()) {
-               audio_encoder.reset(new AudioEncoder(AUDIO_OUTPUT_CODEC_NAME, DEFAULT_AUDIO_OUTPUT_BIT_RATE, oformat));
-       } else {
-               audio_encoder.reset(new AudioEncoder(global_flags.stream_audio_codec_name, global_flags.stream_audio_codec_bitrate, oformat));
+       if (global_flags.transcode_audio) {
+               if (global_flags.stream_audio_codec_name.empty()) {
+                       audio_encoder.reset(new AudioEncoder(AUDIO_OUTPUT_CODEC_NAME, DEFAULT_AUDIO_OUTPUT_BIT_RATE, oformat));
+               } else {
+                       audio_encoder.reset(new AudioEncoder(global_flags.stream_audio_codec_name, global_flags.stream_audio_codec_bitrate, oformat));
+               }
        }
 
        X264Encoder x264_encoder(oformat);
        unique_ptr<Mux> http_mux = create_mux(&httpd, oformat, &x264_encoder, audio_encoder.get());
+       if (global_flags.transcode_audio) {
+               audio_encoder->add_mux(http_mux.get());
+       }
        x264_encoder.add_mux(http_mux.get());
 
        FFmpegCapture video(argv[optind], global_flags.width, global_flags.height);
        video.set_pixel_format(FFmpegCapture::PixelFormat_NV12);
-       video.set_frame_callback(bind(video_frame_callback, &video, &x264_encoder, _1, _2, _3, _4, _5, _6, _7, _8, _9));
-       video.set_audio_callback(bind(audio_frame_callback, http_mux.get(), _1, _2));
+       video.set_frame_callback(bind(video_frame_callback, &video, &x264_encoder, audio_encoder.get(), _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11));
+       if (!global_flags.transcode_audio) {
+               video.set_audio_callback(bind(audio_frame_callback, http_mux.get(), _1, _2));
+       }
        video.configure_card();
        video.start_bm_capture();
        video.change_rate(2.0);  // Be sure never to really fall behind, but also don't dump huge amounts of stuff onto x264.