From: Steinar H. Gunderson <sgunderson@bigfoot.com>
Date: Fri, 7 Jul 2017 16:02:04 +0000 (+0200)
Subject: Initial check-in of Kaeru, a simple transcoder based on Nageru code.
X-Git-Tag: 1.6.1~6
X-Git-Url: https://git.sesse.net/?a=commitdiff_plain;h=ad1641ad3ab50ecd17af2e1c2d980f26e6adf0bb;p=nageru

Initial check-in of Kaeru, a simple transcoder based on Nageru code.
---

diff --git a/Makefile b/Makefile
index a8f35f6..e924f15 100644
--- a/Makefile
+++ b/Makefile
@@ -30,9 +30,12 @@ OBJS += quicksync_encoder.o x264_encoder.o x264_dynamic.o x264_speed_control.o v
 # DeckLink
 OBJS += decklink_capture.o decklink_util.o decklink_output.o decklink/DeckLinkAPIDispatch.o
 
+KAERU_OBJS = kaeru.o x264_encoder.o mux.o metrics.o flags.o audio_encoder.o x264_speed_control.o print_latency.o x264_dynamic.o ffmpeg_raii.o ffmpeg_capture.o ffmpeg_util.o httpd.o metacube2.o
+
 # bmusb
 ifeq ($(EMBEDDED_BMUSB),yes)
   OBJS += bmusb/bmusb.o bmusb/fake_capture.o
+  KAERU_OBJS += bmusb/bmusb.o
 endif
 
 # FFmpeg input
@@ -58,6 +61,8 @@ all: nageru benchmark_audio_mixer
 
 nageru: $(OBJS)
 	$(CXX) -o $@ $^ $(LDFLAGS) $(LDLIBS)
+kaeru: $(KAERU_OBJS)
+	$(CXX) -o $@ $^ $(LDFLAGS) $(LDLIBS)
 benchmark_audio_mixer: $(BM_OBJS)
 	$(CXX) -o $@ $^ $(LDFLAGS) $(LDLIBS)
 
@@ -73,11 +78,11 @@ mainwindow.o: midi_mapping.pb.h
 midi_mapper.o: midi_mapping.pb.h
 midi_mapping_dialog.o: ui_midi_mapping.h midi_mapping.pb.h
 
-DEPS=$(OBJS:.o=.d) $(BM_OBJS:.o=.d)
+DEPS=$(OBJS:.o=.d) $(BM_OBJS:.o=.d) $(KAERU_OBJS:.o=.d)
 -include $(DEPS)
 
 clean:
-	$(RM) $(OBJS) $(BM_OBJS) $(DEPS) nageru benchmark_audio_mixer ui_aboutdialog.h ui_analyzer.h ui_mainwindow.h ui_display.h ui_about.h ui_audio_miniview.h ui_audio_expanded_view.h ui_input_mapping.h ui_midi_mapping.h chain-*.frag *.dot *.pb.cc *.pb.h $(OBJS_WITH_MOC:.o=.moc.cpp) ellipsis_label.moc.cpp clickable_label.moc.cpp
+	$(RM) $(OBJS) $(BM_OBJS) $(KAERU_OBJS) $(DEPS) nageru benchmark_audio_mixer ui_aboutdialog.h ui_analyzer.h ui_mainwindow.h ui_display.h ui_about.h ui_audio_miniview.h ui_audio_expanded_view.h ui_input_mapping.h ui_midi_mapping.h chain-*.frag *.dot *.pb.cc *.pb.h $(OBJS_WITH_MOC:.o=.moc.cpp) ellipsis_label.moc.cpp clickable_label.moc.cpp
 
 PREFIX=/usr/local
 install:
diff --git a/bmusb b/bmusb
index 32043c9..6a012a4 160000
--- a/bmusb
+++ b/bmusb
@@ -1 +1 @@
-Subproject commit 32043c95d3b9f8cb97d6d28b9996fa1bec2ce11b
+Subproject commit 6a012a41c5422092cdac1f18a9019f37c0b85368
diff --git a/ffmpeg_capture.cpp b/ffmpeg_capture.cpp
index be27e6f..48a395e 100644
--- a/ffmpeg_capture.cpp
+++ b/ffmpeg_capture.cpp
@@ -31,6 +31,7 @@ extern "C" {
 #include "ffmpeg_util.h"
 #include "flags.h"
 #include "image_input.h"
+#include "timebase.h"
 
 #define FRAME_SIZE (8 << 20)  // 8 MB.
 
@@ -71,6 +72,9 @@ AVPixelFormat decide_dst_format(AVPixelFormat src_format, bmusb::PixelFormat dst
 	if (dst_format_type == bmusb::PixelFormat_8BitBGRA) {
 		return AV_PIX_FMT_BGRA;
 	}
+	if (dst_format_type == FFmpegCapture::PixelFormat_NV12) {
+		return AV_PIX_FMT_NV12;
+	}
 
 	assert(dst_format_type == bmusb::PixelFormat_8BitYCbCrPlanar);
 
@@ -313,7 +317,7 @@ void FFmpegCapture::send_disconnected_frame()
 		video_frame.len = width * height * 4;
 		memset(video_frame.data, 0, video_frame.len);
 
-		frame_callback(timecode++,
+		frame_callback(-1, AVRational{1, TIMEBASE}, timecode++,
 			video_frame, /*video_offset=*/0, video_format,
 			FrameAllocator::Frame(), /*audio_offset=*/0, AudioFormat());
 	}
@@ -350,6 +354,8 @@ bool FFmpegCapture::play_video(const string &pathname)
 		return false;
 	}
 
+	int audio_stream_index = find_stream_index(format_ctx.get(), AVMEDIA_TYPE_AUDIO);
+
 	const AVCodecParameters *codecpar = format_ctx->streams[video_stream_index]->codecpar;
 	video_timebase = format_ctx->streams[video_stream_index]->time_base;
 	AVCodecContextWithDeleter codec_ctx = avcodec_alloc_context3_unique(nullptr);
@@ -378,7 +384,7 @@ bool FFmpegCapture::play_video(const string &pathname)
 		}
 
 		bool error;
-		AVFrameWithDeleter frame = decode_frame(format_ctx.get(), codec_ctx.get(), pathname, video_stream_index, &error);
+		AVFrameWithDeleter frame = decode_frame(format_ctx.get(), codec_ctx.get(), pathname, video_stream_index, audio_stream_index, &error);
 		if (error) {
 			return false;
 		}
@@ -418,7 +424,7 @@ bool FFmpegCapture::play_video(const string &pathname)
 			video_frame.received_timestamp = next_frame_start;
 			bool finished_wakeup = producer_thread_should_quit.sleep_until(next_frame_start);
 			if (finished_wakeup) {
-				frame_callback(timecode++,
+				frame_callback(frame->pts, video_timebase, timecode++,
 					video_frame, 0, video_format,
 					audio_frame, 0, audio_format);
 				break;
@@ -494,7 +500,7 @@ bool FFmpegCapture::process_queued_commands(AVFormatContext *format_ctx, const s
 	return false;
 }
 
-AVFrameWithDeleter FFmpegCapture::decode_frame(AVFormatContext *format_ctx, AVCodecContext *codec_ctx, const std::string &pathname, int video_stream_index, bool *error)
+AVFrameWithDeleter FFmpegCapture::decode_frame(AVFormatContext *format_ctx, AVCodecContext *codec_ctx, const std::string &pathname, int video_stream_index, int audio_stream_index, bool *error)
 {
 	*error = false;
 
@@ -510,6 +516,9 @@ AVFrameWithDeleter FFmpegCapture::decode_frame(AVFormatContext *format_ctx, AVCo
 		pkt.data = nullptr;
 		pkt.size = 0;
 		if (av_read_frame(format_ctx, &pkt) == 0) {
+			if (pkt.stream_index == audio_stream_index && audio_callback != nullptr) {
+				audio_callback(&pkt, format_ctx->streams[audio_stream_index]->time_base);
+			}
 			if (pkt.stream_index != video_stream_index) {
 				// Ignore audio for now.
 				continue;
@@ -547,6 +556,8 @@ VideoFormat FFmpegCapture::construct_video_format(const AVFrame *frame, AVRation
 	video_format.height = height;
 	if (pixel_format == bmusb::PixelFormat_8BitBGRA) {
 		video_format.stride = width * 4;
+	} else if (pixel_format == FFmpegCapture::PixelFormat_NV12) {
+		video_format.stride = width;
 	} else {
 		assert(pixel_format == bmusb::PixelFormat_8BitYCbCrPlanar);
 		video_format.stride = width;
@@ -597,6 +608,17 @@ FrameAllocator::Frame FFmpegCapture::make_video_frame(const AVFrame *frame, cons
 		pic_data[0] = video_frame.data;
 		linesizes[0] = width * 4;
 		video_frame.len = (width * 4) * height;
+	} else if (pixel_format == PixelFormat_NV12) {
+		pic_data[0] = video_frame.data;
+		linesizes[0] = width;
+
+		pic_data[1] = pic_data[0] + width * height;
+		linesizes[1] = width;
+
+		video_frame.len = (width * 2) * height;
+
+		const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(sws_dst_format);
+		current_frame_ycbcr_format = decode_ycbcr_format(desc, frame);
 	} else {
 		assert(pixel_format == bmusb::PixelFormat_8BitYCbCrPlanar);
 		const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(sws_dst_format);
diff --git a/ffmpeg_capture.h b/ffmpeg_capture.h
index 85e436e..afca641 100644
--- a/ffmpeg_capture.h
+++ b/ffmpeg_capture.h
@@ -42,6 +42,7 @@ extern "C" {
 struct AVFormatContext;
 struct AVFrame;
 struct AVRational;
+struct AVPacket;
 
 class FFmpegCapture : public bmusb::CaptureInterface
 {
@@ -101,11 +102,37 @@ public:
 		return audio_frame_allocator;
 	}
 
-	void set_frame_callback(bmusb::frame_callback_t callback) override
+	// FFmpegCapture-specific overload of set_frame_callback that also gives
+	// the raw original pts from the video. Negative pts means a dummy frame.
+	typedef std::function<void(int64_t pts, AVRational timebase, uint16_t timecode,
+	                           bmusb::FrameAllocator::Frame video_frame, size_t video_offset, bmusb::VideoFormat video_format,
+				   bmusb::FrameAllocator::Frame audio_frame, size_t audio_offset, bmusb::AudioFormat audio_format)>
+		frame_callback_t;
+	void set_frame_callback(frame_callback_t callback)
 	{
 		frame_callback = callback;
 	}
 
+	void set_frame_callback(bmusb::frame_callback_t callback) override
+	{
+		frame_callback = std::bind(
+			callback,
+			std::placeholders::_3,
+			std::placeholders::_4,
+			std::placeholders::_5,
+			std::placeholders::_6,
+			std::placeholders::_7,
+			std::placeholders::_8,
+			std::placeholders::_9);
+	}
+
+	// FFmpegCapture-specific callback that gives the raw audio.
+	typedef std::function<void(const AVPacket *pkt, const AVRational timebase)> audio_callback_t;
+	void set_audio_callback(audio_callback_t callback)
+	{
+		audio_callback = callback;
+	}
+
 	// Used to get precise information about the Y'CbCr format used
 	// for a given frame. Only valid to call during the frame callback,
 	// and only when receiving a frame with pixel format PixelFormat_8BitYCbCrPlanar.
@@ -135,8 +162,9 @@ public:
 	void set_video_mode(uint32_t video_mode_id) override {}  // Ignore.
 	uint32_t get_current_video_mode() const override { return 0; }
 
+	static constexpr bmusb::PixelFormat PixelFormat_NV12 = static_cast<bmusb::PixelFormat>(100);  // In the private range.
 	std::set<bmusb::PixelFormat> get_available_pixel_formats() const override {
-		return std::set<bmusb::PixelFormat>{ bmusb::PixelFormat_8BitBGRA, bmusb::PixelFormat_8BitYCbCrPlanar };
+		return std::set<bmusb::PixelFormat>{ bmusb::PixelFormat_8BitBGRA, bmusb::PixelFormat_8BitYCbCrPlanar, PixelFormat_NV12 };
 	}
 	void set_pixel_format(bmusb::PixelFormat pixel_format) override {
 		this->pixel_format = pixel_format;
@@ -166,7 +194,7 @@ private:
 	bool process_queued_commands(AVFormatContext *format_ctx, const std::string &pathname, timespec last_modified, bool *rewound);
 
 	// Returns nullptr if no frame was decoded (e.g. EOF).
-	AVFrameWithDeleter decode_frame(AVFormatContext *format_ctx, AVCodecContext *codec_ctx, const std::string &pathname, int video_stream_index, bool *error);
+	AVFrameWithDeleter decode_frame(AVFormatContext *format_ctx, AVCodecContext *codec_ctx, const std::string &pathname, int video_stream_index, int audio_stream_index, bool *error);
 
 	bmusb::VideoFormat construct_video_format(const AVFrame *frame, AVRational video_timebase);
 	bmusb::FrameAllocator::Frame make_video_frame(const AVFrame *frame, const std::string &pathname, bool *error);
@@ -188,7 +216,8 @@ private:
 	bmusb::FrameAllocator *audio_frame_allocator = nullptr;
 	std::unique_ptr<bmusb::FrameAllocator> owned_video_frame_allocator;
 	std::unique_ptr<bmusb::FrameAllocator> owned_audio_frame_allocator;
-	bmusb::frame_callback_t frame_callback = nullptr;
+	frame_callback_t frame_callback = nullptr;
+	audio_callback_t audio_callback = nullptr;
 
 	SwsContextWithDeleter sws_ctx;
 	int sws_last_width = -1, sws_last_height = -1, sws_last_src_format = -1;
diff --git a/flags.cpp b/flags.cpp
index fc1bc88..773750d 100644
--- a/flags.cpp
+++ b/flags.cpp
@@ -60,29 +60,35 @@ enum LongOption {
 	OPTION_INPUT_YCBCR_INTERPRETATION,
 };
 
-void usage()
+void usage(Program program)
 {
-	fprintf(stderr, "Usage: nageru [OPTION]...\n");
+	if (program == PROGRAM_KAERU) {
+		fprintf(stderr, "Usage: kaeru [OPTION]... SOURCE_URL\n");
+	} else {
+		fprintf(stderr, "Usage: nageru [OPTION]...\n");
+	}
 	fprintf(stderr, "\n");
 	fprintf(stderr, "      --help                      print usage information\n");
 	fprintf(stderr, "  -w, --width                     output width in pixels (default 1280)\n");
 	fprintf(stderr, "  -h, --height                    output height in pixels (default 720)\n");
-	fprintf(stderr, "  -c, --num-cards                 set number of input cards (default 2)\n");
-	fprintf(stderr, "  -o, --output-card=CARD          also output signal to the given card (default none)\n");
-	fprintf(stderr, "  -t, --theme=FILE                choose theme (default theme.lua)\n");
-	fprintf(stderr, "  -I, --theme-dir=DIR             search for theme in this directory (can be given multiple times)\n");
-	fprintf(stderr, "  -r, --recording-dir=DIR         where to store disk recording\n");
-	fprintf(stderr, "  -v, --va-display=SPEC           VA-API device for H.264 encoding\n");
-	fprintf(stderr, "                                    ($DISPLAY spec or /dev/dri/render* path)\n");
-	fprintf(stderr, "  -m, --map-signal=SIGNAL,CARD    set a default card mapping (can be given multiple times)\n");
-	fprintf(stderr, "  -M, --input-mapping=FILE        start with the given audio input mapping (implies --multichannel)\n");
-	fprintf(stderr, "      --multichannel              start in multichannel audio mapping mode\n");
-	fprintf(stderr, "      --midi-mapping=FILE         start with the given MIDI controller mapping (implies --multichannel)\n");
-	fprintf(stderr, "      --fake-cards-audio          make fake (disconnected) cards output a simple tone\n");
-	fprintf(stderr, "      --http-uncompressed-video   send uncompressed NV12 video to HTTP clients\n");
-	fprintf(stderr, "      --http-x264-video           send x264-compressed video to HTTP clients\n");
-	fprintf(stderr, "      --record-x264-video         store x264-compressed video to disk (implies --http-x264-video,\n");
-	fprintf(stderr, "                                    removes the need for working VA-API encoding)\n");
+	if (program == PROGRAM_NAGERU) {
+		fprintf(stderr, "  -c, --num-cards                 set number of input cards (default 2)\n");
+		fprintf(stderr, "  -o, --output-card=CARD          also output signal to the given card (default none)\n");
+		fprintf(stderr, "  -t, --theme=FILE                choose theme (default theme.lua)\n");
+		fprintf(stderr, "  -I, --theme-dir=DIR             search for theme in this directory (can be given multiple times)\n");
+		fprintf(stderr, "  -r, --recording-dir=DIR         where to store disk recording\n");
+		fprintf(stderr, "  -v, --va-display=SPEC           VA-API device for H.264 encoding\n");
+		fprintf(stderr, "                                    ($DISPLAY spec or /dev/dri/render* path)\n");
+		fprintf(stderr, "  -m, --map-signal=SIGNAL,CARD    set a default card mapping (can be given multiple times)\n");
+		fprintf(stderr, "  -M, --input-mapping=FILE        start with the given audio input mapping (implies --multichannel)\n");
+		fprintf(stderr, "      --multichannel              start in multichannel audio mapping mode\n");
+		fprintf(stderr, "      --midi-mapping=FILE         start with the given MIDI controller mapping (implies --multichannel)\n");
+		fprintf(stderr, "      --fake-cards-audio          make fake (disconnected) cards output a simple tone\n");
+		fprintf(stderr, "      --http-uncompressed-video   send uncompressed NV12 video to HTTP clients\n");
+		fprintf(stderr, "      --http-x264-video           send x264-compressed video to HTTP clients\n");
+		fprintf(stderr, "      --record-x264-video         store x264-compressed video to disk (implies --http-x264-video,\n");
+		fprintf(stderr, "                                    removes the need for working VA-API encoding)\n");
+	}
 	fprintf(stderr, "      --x264-preset               x264 quality preset (default " X264_DEFAULT_PRESET ")\n");
 	fprintf(stderr, "      --x264-tune                 x264 tuning (default " X264_DEFAULT_TUNE ", can be blank)\n");
 	fprintf(stderr, "      --x264-speedcontrol         try to match x264 preset to available CPU speed\n");
@@ -103,45 +109,47 @@ void usage()
 		DEFAULT_AUDIO_OUTPUT_BIT_RATE / 1000);
 	fprintf(stderr, "      --http-coarse-timebase      use less timebase for HTTP (recommended for muxers\n");
 	fprintf(stderr, "                                  that handle large pts poorly, like e.g. MP4)\n");
-	fprintf(stderr, "      --flat-audio                start with most audio processing turned off\n");
-	fprintf(stderr, "                                    (can be overridden by e.g. --enable-limiter)\n");
-	fprintf(stderr, "      --gain-staging=DB           set initial gain staging to the given value\n");
-	fprintf(stderr, "                                    (--disable-gain-staging-auto)\n");
-	fprintf(stderr, "      --disable-locut             turn off locut filter (also --enable)\n");
-	fprintf(stderr, "      --disable-gain-staging-auto  turn off automatic gain staging (also --enable)\n");
-	fprintf(stderr, "      --disable-compressor        turn off regular compressor (also --enable)\n");
-	fprintf(stderr, "      --disable-limiter           turn off limiter (also --enable)\n");
-	fprintf(stderr, "      --disable-makeup-gain-auto  turn off auto-adjustment of final makeup gain (also --enable)\n");
-	fprintf(stderr, "      --disable-alsa-output       disable audio monitoring via ALSA\n");
-	fprintf(stderr, "      --no-flush-pbos             do not explicitly signal texture data uploads\n");
-	fprintf(stderr, "                                    (will give display corruption, but makes it\n");
-	fprintf(stderr, "                                    possible to run with apitrace in real time)\n");
-	fprintf(stderr, "      --print-video-latency       print out measurements of video latency on stdout\n");
-	fprintf(stderr, "      --max-input-queue-frames=FRAMES  never keep more than FRAMES frames for each card\n");
-	fprintf(stderr, "                                    (default 6, minimum 1)\n");
-	fprintf(stderr, "      --audio-queue-length-ms=MS  length of audio resampling queue (default 100.0)\n");
-	fprintf(stderr, "      --output-ycbcr-coefficients={rec601,rec709,auto}\n");
-	fprintf(stderr, "                                  Y'CbCr coefficient standard of output (default auto)\n");
-	fprintf(stderr, "                                    auto is rec601, unless --output-card is used\n");
-	fprintf(stderr, "                                    and a Rec. 709 mode (typically HD modes) is in use\n");
-	fprintf(stderr, "      --output-buffer-frames=NUM  number of frames in output buffer for --output-card,\n");
-	fprintf(stderr, "                                    can be fractional (default 6.0); note also\n");
-	fprintf(stderr, "                                    the audio queue can't be much longer than this\n");
-	fprintf(stderr, "      --output-slop-frames=NUM    if more less than this number of frames behind for\n");
-	fprintf(stderr, "                                    --output-card, try to submit anyway instead of\n");
-	fprintf(stderr, "                                    dropping the frame (default 0.5)\n");
-	fprintf(stderr, "      --timecode-stream           show timestamp and timecode in stream\n");
-	fprintf(stderr, "      --timecode-stdout           show timestamp and timecode on standard output\n");
-	fprintf(stderr, "      --10-bit-input              use 10-bit video input (requires compute shaders)\n");
-	fprintf(stderr, "      --10-bit-output             use 10-bit video output (requires compute shaders,\n");
-	fprintf(stderr, "                                    implies --record-x264-video)\n");
-	fprintf(stderr, "      --input-ycbcr-interpretation=CARD,{rec601,rec709,auto}[,{limited,full}]\n");
-	fprintf(stderr, "                                  Y'CbCr coefficient standard of card CARD (default auto)\n");
-	fprintf(stderr, "                                    auto is rec601 for SD, rec709 for HD, always limited\n");
-	fprintf(stderr, "                                    limited means standard 0-240/0-235 input range (for 8-bit)\n");
+	if (program == PROGRAM_NAGERU) {
+		fprintf(stderr, "      --flat-audio                start with most audio processing turned off\n");
+		fprintf(stderr, "                                    (can be overridden by e.g. --enable-limiter)\n");
+		fprintf(stderr, "      --gain-staging=DB           set initial gain staging to the given value\n");
+		fprintf(stderr, "                                    (--disable-gain-staging-auto)\n");
+		fprintf(stderr, "      --disable-locut             turn off locut filter (also --enable)\n");
+		fprintf(stderr, "      --disable-gain-staging-auto  turn off automatic gain staging (also --enable)\n");
+		fprintf(stderr, "      --disable-compressor        turn off regular compressor (also --enable)\n");
+		fprintf(stderr, "      --disable-limiter           turn off limiter (also --enable)\n");
+		fprintf(stderr, "      --disable-makeup-gain-auto  turn off auto-adjustment of final makeup gain (also --enable)\n");
+		fprintf(stderr, "      --disable-alsa-output       disable audio monitoring via ALSA\n");
+		fprintf(stderr, "      --no-flush-pbos             do not explicitly signal texture data uploads\n");
+		fprintf(stderr, "                                    (will give display corruption, but makes it\n");
+		fprintf(stderr, "                                    possible to run with apitrace in real time)\n");
+		fprintf(stderr, "      --print-video-latency       print out measurements of video latency on stdout\n");
+		fprintf(stderr, "      --max-input-queue-frames=FRAMES  never keep more than FRAMES frames for each card\n");
+		fprintf(stderr, "                                    (default 6, minimum 1)\n");
+		fprintf(stderr, "      --audio-queue-length-ms=MS  length of audio resampling queue (default 100.0)\n");
+		fprintf(stderr, "      --output-ycbcr-coefficients={rec601,rec709,auto}\n");
+		fprintf(stderr, "                                  Y'CbCr coefficient standard of output (default auto)\n");
+		fprintf(stderr, "                                    auto is rec601, unless --output-card is used\n");
+		fprintf(stderr, "                                    and a Rec. 709 mode (typically HD modes) is in use\n");
+		fprintf(stderr, "      --output-buffer-frames=NUM  number of frames in output buffer for --output-card,\n");
+		fprintf(stderr, "                                    can be fractional (default 6.0); note also\n");
+		fprintf(stderr, "                                    the audio queue can't be much longer than this\n");
+		fprintf(stderr, "      --output-slop-frames=NUM    if more less than this number of frames behind for\n");
+		fprintf(stderr, "                                    --output-card, try to submit anyway instead of\n");
+		fprintf(stderr, "                                    dropping the frame (default 0.5)\n");
+		fprintf(stderr, "      --timecode-stream           show timestamp and timecode in stream\n");
+		fprintf(stderr, "      --timecode-stdout           show timestamp and timecode on standard output\n");
+		fprintf(stderr, "      --10-bit-input              use 10-bit video input (requires compute shaders)\n");
+		fprintf(stderr, "      --10-bit-output             use 10-bit video output (requires compute shaders,\n");
+		fprintf(stderr, "                                    implies --record-x264-video)\n");
+		fprintf(stderr, "      --input-ycbcr-interpretation=CARD,{rec601,rec709,auto}[,{limited,full}]\n");
+		fprintf(stderr, "                                  Y'CbCr coefficient standard of card CARD (default auto)\n");
+		fprintf(stderr, "                                    auto is rec601 for SD, rec709 for HD, always limited\n");
+		fprintf(stderr, "                                    limited means standard 0-240/0-235 input range (for 8-bit)\n");
+	}
 }
 
-void parse_flags(int argc, char * const argv[])
+void parse_flags(Program program, int argc, char * const argv[])
 {
 	static const option long_options[] = {
 		{ "help", no_argument, 0, OPTION_HELP },
@@ -444,12 +452,12 @@ void parse_flags(int argc, char * const argv[])
 			break;
 		}
 		case OPTION_HELP:
-			usage();
+			usage(program);
 			exit(0);
 		default:
 			fprintf(stderr, "Unknown option '%s'\n", argv[option_index]);
 			fprintf(stderr, "\n");
-			usage();
+			usage(program);
 			exit(1);
 		}
 	}
diff --git a/flags.h b/flags.h
index f26d4db..87b05b1 100644
--- a/flags.h
+++ b/flags.h
@@ -65,6 +65,11 @@ struct Flags {
 };
 extern Flags global_flags;
 
-void parse_flags(int argc, char * const argv[]);
+enum Program {
+	PROGRAM_NAGERU,
+	PROGRAM_KAERU
+};
+void usage(Program program);
+void parse_flags(Program program, int argc, char * const argv[]);
 
 #endif  // !defined(_FLAGS_H)
diff --git a/kaeru.cpp b/kaeru.cpp
new file mode 100644
index 0000000..bb1b08f
--- /dev/null
+++ b/kaeru.cpp
@@ -0,0 +1,130 @@
+// Kaeru (å¤ãã), a simple transcoder intended for use with Nageru.
+// This is experimental code, not yet supported.
+
+#include "audio_encoder.h"
+#include "defs.h"
+#include "flags.h"
+#include "ffmpeg_capture.h"
+#include "mixer.h"
+#include "mux.h"
+#include "timebase.h"
+#include "x264_encoder.h"
+
+#include <assert.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <string>
+
+using namespace bmusb;
+using namespace movit;
+using namespace std;
+using namespace std::placeholders;
+
+Mixer *global_mixer = nullptr;
+
+int write_packet(void *opaque, uint8_t *buf, int buf_size, AVIODataMarkerType type, int64_t time)
+{
+	static bool seen_sync_markers = false;
+	static string stream_mux_header;
+	HTTPD *httpd = (HTTPD *)opaque;
+
+	if (type == AVIO_DATA_MARKER_SYNC_POINT || type == AVIO_DATA_MARKER_BOUNDARY_POINT) {
+		seen_sync_markers = true;
+	} else if (type == AVIO_DATA_MARKER_UNKNOWN && !seen_sync_markers) {
+		// We don't know if this is a keyframe or not (the muxer could
+		// avoid marking it), so we just have to make the best of it.
+		type = AVIO_DATA_MARKER_SYNC_POINT;
+	}
+
+	if (type == AVIO_DATA_MARKER_HEADER) {
+		stream_mux_header.append((char *)buf, buf_size);
+		httpd->set_header(stream_mux_header);
+	} else {
+		httpd->add_data((char *)buf, buf_size, type == AVIO_DATA_MARKER_SYNC_POINT);
+	}
+	return buf_size;
+}
+
+unique_ptr<Mux> create_mux(HTTPD *httpd, AVOutputFormat *oformat, X264Encoder *x264_encoder, AudioEncoder *audio_encoder)
+{
+	AVFormatContext *avctx = avformat_alloc_context();
+	avctx->oformat = oformat;
+
+	uint8_t *buf = (uint8_t *)av_malloc(MUX_BUFFER_SIZE);
+	avctx->pb = avio_alloc_context(buf, MUX_BUFFER_SIZE, 1, httpd, nullptr, nullptr, nullptr);
+	avctx->pb->write_data_type = &write_packet;
+	avctx->pb->ignore_boundary_point = 1;
+
+	string video_extradata = x264_encoder->get_global_headers();
+
+	unique_ptr<Mux> mux;
+	int time_base = global_flags.stream_coarse_timebase ? COARSE_TIMEBASE : TIMEBASE;
+	mux.reset(new Mux(avctx, global_flags.width, global_flags.height, Mux::CODEC_H264, video_extradata, audio_encoder->get_codec_parameters().get(), time_base,
+	        /*write_callback=*/nullptr, Mux::WRITE_FOREGROUND, {}));
+	return mux;
+}
+
+void video_frame_callback(FFmpegCapture *video, X264Encoder *x264_encoder, int64_t pts, AVRational timebase, uint16_t timecode,
+	                  FrameAllocator::Frame video_frame, size_t video_offset, VideoFormat video_format,
+	                  FrameAllocator::Frame audio_frame, size_t audio_offset, AudioFormat audio_format)
+{
+	if (pts >= 0 && video_frame.len > 0) {
+		pts = av_rescale_q(pts, timebase, AVRational{ 1, TIMEBASE });
+		int64_t frame_duration = TIMEBASE * video_format.frame_rate_nom / video_format.frame_rate_den;
+		x264_encoder->add_frame(pts, frame_duration, video->get_current_frame_ycbcr_format().luma_coefficients, video_frame.data + video_offset, ReceivedTimestamps());
+	}
+
+	if (video_frame.owner) {
+		video_frame.owner->release_frame(video_frame);
+	}
+	if (audio_frame.owner) {
+		audio_frame.owner->release_frame(audio_frame);
+	}
+}
+
+void audio_frame_callback(Mux *mux, const AVPacket *pkt, AVRational timebase)
+{
+	mux->add_packet(*pkt, pkt->pts, pkt->dts == AV_NOPTS_VALUE ? pkt->pts : pkt->dts, timebase);
+}
+
+int main(int argc, char *argv[])
+{
+	parse_flags(PROGRAM_KAERU, argc, argv);
+	if (optind + 1 != argc) {
+		usage(PROGRAM_KAERU);
+		exit(1);
+	}
+
+	av_register_all();
+	avformat_network_init();
+
+	HTTPD httpd;
+
+	AVOutputFormat *oformat = av_guess_format(global_flags.stream_mux_name.c_str(), nullptr, nullptr);
+	assert(oformat != nullptr);
+
+	unique_ptr<AudioEncoder> audio_encoder;
+	if (global_flags.stream_audio_codec_name.empty()) {
+		audio_encoder.reset(new AudioEncoder(AUDIO_OUTPUT_CODEC_NAME, DEFAULT_AUDIO_OUTPUT_BIT_RATE, oformat));
+	} else {
+		audio_encoder.reset(new AudioEncoder(global_flags.stream_audio_codec_name, global_flags.stream_audio_codec_bitrate, oformat));
+	}
+
+	X264Encoder x264_encoder(oformat);
+	unique_ptr<Mux> http_mux = create_mux(&httpd, oformat, &x264_encoder, audio_encoder.get());
+	x264_encoder.add_mux(http_mux.get());
+
+	FFmpegCapture video(argv[optind], global_flags.width, global_flags.height);
+	video.set_pixel_format(FFmpegCapture::PixelFormat_NV12);
+	video.set_frame_callback(bind(video_frame_callback, &video, &x264_encoder, _1, _2, _3, _4, _5, _6, _7, _8, _9));
+	video.set_audio_callback(bind(audio_frame_callback, http_mux.get(), _1, _2));
+	video.configure_card();
+	video.start_bm_capture();
+	video.change_rate(2.0);  // Be sure never to really fall behind, but also don't dump huge amounts of stuff onto x264.
+
+	httpd.start(9095);
+
+	for ( ;; ) {
+		sleep(3600);
+	}
+}
diff --git a/main.cpp b/main.cpp
index dcaf2a6..fb67e0e 100644
--- a/main.cpp
+++ b/main.cpp
@@ -21,7 +21,7 @@ extern "C" {
 
 int main(int argc, char *argv[])
 {
-	parse_flags(argc, argv);
+	parse_flags(PROGRAM_NAGERU, argc, argv);
 
 	if (global_flags.va_display.empty() ||
 	    global_flags.va_display[0] != '/') {
diff --git a/mux.cpp b/mux.cpp
index 5c6a150..f52f795 100644
--- a/mux.cpp
+++ b/mux.cpp
@@ -145,7 +145,7 @@ Mux::~Mux()
 	avformat_free_context(avctx);
 }
 
-void Mux::add_packet(const AVPacket &pkt, int64_t pts, int64_t dts)
+void Mux::add_packet(const AVPacket &pkt, int64_t pts, int64_t dts, AVRational timebase)
 {
 	AVPacket pkt_copy;
 	if (av_copy_packet(&pkt_copy, &pkt) < 0) {
@@ -153,13 +153,13 @@ void Mux::add_packet(const AVPacket &pkt, int64_t pts, int64_t dts)
 		exit(1);
 	}
 	if (pkt.stream_index == 0) {
-		pkt_copy.pts = av_rescale_q(pts, AVRational{1, TIMEBASE}, avstream_video->time_base);
-		pkt_copy.dts = av_rescale_q(dts, AVRational{1, TIMEBASE}, avstream_video->time_base);
-		pkt_copy.duration = av_rescale_q(pkt.duration, AVRational{1, TIMEBASE}, avstream_video->time_base);
+		pkt_copy.pts = av_rescale_q(pts, timebase, avstream_video->time_base);
+		pkt_copy.dts = av_rescale_q(dts, timebase, avstream_video->time_base);
+		pkt_copy.duration = av_rescale_q(pkt.duration, timebase, avstream_video->time_base);
 	} else if (pkt.stream_index == 1) {
-		pkt_copy.pts = av_rescale_q(pts, AVRational{1, TIMEBASE}, avstream_audio->time_base);
-		pkt_copy.dts = av_rescale_q(dts, AVRational{1, TIMEBASE}, avstream_audio->time_base);
-		pkt_copy.duration = av_rescale_q(pkt.duration, AVRational{1, TIMEBASE}, avstream_audio->time_base);
+		pkt_copy.pts = av_rescale_q(pts, timebase, avstream_audio->time_base);
+		pkt_copy.dts = av_rescale_q(dts, timebase, avstream_audio->time_base);
+		pkt_copy.duration = av_rescale_q(pkt.duration, timebase, avstream_audio->time_base);
 	} else {
 		assert(false);
 	}
diff --git a/mux.h b/mux.h
index 5bf3e41..e6193e0 100644
--- a/mux.h
+++ b/mux.h
@@ -18,6 +18,8 @@ extern "C" {
 #include <thread>
 #include <vector>
 
+#include "timebase.h"
+
 struct MuxMetrics {
 	// âwrittenâ will usually be equal video + audio + mux overhead,
 	// except that there could be buffered packets that count in audio or video
@@ -60,7 +62,7 @@ public:
 	// will be added to.
 	Mux(AVFormatContext *avctx, int width, int height, Codec video_codec, const std::string &video_extradata, const AVCodecParameters *audio_codecpar, int time_base, std::function<void(int64_t)> write_callback, WriteStrategy write_strategy, const std::vector<MuxMetrics *> &metrics);
 	~Mux();
-	void add_packet(const AVPacket &pkt, int64_t pts, int64_t dts);
+	void add_packet(const AVPacket &pkt, int64_t pts, int64_t dts, AVRational timebase = { 1, TIMEBASE });
 
 	// As long as the mux is plugged, it will not actually write anything to disk,
 	// just queue the packets. Once it is unplugged, the packets are reordered by pts