From 3a1d58dee269fdca9cab7f482e4e8695b3fb2ffd Mon Sep 17 00:00:00 2001
From: "Steinar H. Gunderson" <sgunderson@bigfoot.com>
Date: Mon, 3 Apr 2023 22:09:07 +0200
Subject: [PATCH] Add a LD_PRELOAD-able library to force Metacube output from
 FFmpeg.

This hooks just the right amount of functions to add Metacube output
to arbitrary FFmpeg programs, but is obviously very brittle.
(Native FFmpeg support would be better, but a patch did not
go through when I tried a while back.) It is only lightly tested.
Documentation in the README and cubemap.config.sample.
---
 Makefile.in            |  13 +-
 README                 |   9 ++
 configure.ac           |   8 +
 cubemap.config.sample  |  14 ++
 ffmpeg_metacube_hack.c | 356 +++++++++++++++++++++++++++++++++++++++++
 5 files changed, 399 insertions(+), 1 deletion(-)
 create mode 100644 ffmpeg_metacube_hack.c

diff --git a/Makefile.in b/Makefile.in
index b4d8456..5f9f053 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -5,12 +5,17 @@ PROTOC=protoc
 CPPFLAGS=@CPPFLAGS@
 CPPFLAGS += -Itlse -DWITH_KTLS -DNO_TLS_LEGACY_SUPPORT -DNO_SSL_COMPATIBLE_INTERFACE -DLTM_DESC -DTLS_REEXPORTABLE -DNO_TLS_WITH_CHACHA20_POLY1305
 CXXFLAGS=-Wall @CXXFLAGS@ @protobuf_CFLAGS@ @libsystemd_CFLAGS@ @libtomcrypt_CFLAGS@ -pthread
+CFLAGS=-Wall @CFLAGS@
 LDFLAGS=@LDFLAGS@ -pthread
 LIBS=@LIBS@ @protobuf_LIBS@ @libsystemd_LIBS@ @libtomcrypt_LIBS@
 
 OBJS=main.o client.o server.o stream.o udpstream.o serverpool.o input.o input_stats.o httpinput.o udpinput.o parse.o config.o acceptor.o stats.o accesslog.o thread.o util.o log.o metacube2.o sa_compare.o timespec.o state.pb.o tlse/tlse.o
 
+ifeq (@have_ffmpeg@,yes)
+all: cubemap ffmpeg_metacube_hack.so
+else
 all: cubemap
+endif
 
 %.pb.cc %.pb.h : %.proto
 	$(PROTOC) --cpp_out=. $<
@@ -23,16 +28,19 @@ all: cubemap
 	$(CXX) -MMD -MP $(CPPFLAGS) $(CXXFLAGS) -o $@ -c $<
 cubemap: $(OBJS)
 	$(CXX) -o cubemap $(OBJS) $(LIBS) $(LDFLAGS)
+ffmpeg_metacube_hack.so: ffmpeg_metacube_hack.c
+	$(CC) -shared $(CPPFLAGS) $(CFLAGS) -fPIC -o $@ $< -ldl
 
 DEPS=$(OBJS:.o=.d)
 -include $(DEPS)
 
 clean:
-	$(RM) cubemap $(OBJS) $(DEPS) state.pb.h state.pb.cc
+	$(RM) cubemap $(OBJS) $(DEPS) state.pb.h state.pb.cc ffmpeg_metacube_hack.so
 
 PREFIX=@prefix@
 SYSCONFDIR=@sysconfdir@
 LOCALSTATEDIR=@localstatedir@
+LIBDIR=@libdir@
 install:
 	$(INSTALL) -m 755 -o root -g root -d \
 		$(DESTDIR)$(PREFIX)/bin \
@@ -44,6 +52,9 @@ install:
 		$(DESTDIR)/lib/systemd/system
 	$(INSTALL) -m 755 -o root -g root cubemap $(DESTDIR)$(PREFIX)/bin/cubemap
 	$(INSTALL) -m 755 -o root -g root munin/cubemap munin/cubemap_input $(DESTDIR)$(PREFIX)/share/munin/plugins/
+ifeq (@have_ffmpeg@,yes)
+	$(INSTALL) -m 755 -o root -g root ffmpeg_metacube_hack.so $(DESTDIR)$(LIBDIR)/
+endif
 	gzip -c cubemap.1 > $(DESTDIR)$(PREFIX)/share/man/man1/cubemap.1.gz
 	sed \
 		-e "s,@prefix[@],$(PREFIX),g" \
diff --git a/README b/README
index e9520d3..15913a1 100644
--- a/README
+++ b/README
@@ -43,6 +43,15 @@ compile and start cubemap.
 Nageru, my free video mixer, can also produce Metacube streams natively.
 See the manual at https://nageru.sesse.net/doc/ for more information.
 
+If you feel very adventurous, you can use LD_PRELOAD to load
+ffmpeg_metacube_hack.so into an FFmpeg-using binary. (This is
+experimental; native Metacube support is vastly preferred.)
+For instance, here's one way you can use the ffmpeg(1) binary
+to serve your webcam to Cubemap:
+
+  LD_PRELOAD=ffmpeg_metacube_hack.so ffmpeg -i /dev/video0 -f mpegts -listen 1 'http://[::]:9095'
+
+
 To upgrade cubemap (after you've compiled a new binary), or to pick up new
 config:
 
diff --git a/configure.ac b/configure.ac
index 3155873..189e45f 100644
--- a/configure.ac
+++ b/configure.ac
@@ -10,6 +10,14 @@ PKG_CHECK_MODULES([libsystemd], [libsystemd])
 PKG_CHECK_MODULES([protobuf], [protobuf])
 PKG_CHECK_MODULES([libtomcrypt], [libtomcrypt])
 
+have_ffmpeg=yes
+PKG_CHECK_MODULES([libavformat], [libavformat], [true], [have_ffmpeg=no])
+PKG_CHECK_MODULES([libavutil], [libavutil], [true], [have_ffmpeg=no])
+AC_SUBST([have_ffmpeg])
+
+AS_IF([test "x$have_ffmpeg" = "xno" ],
+  [AC_MSG_WARN([FFmpeg libraries not found. ffmpeg_metacube_hack.so will not be built.])])
+
 CXXFLAGS="$CXXFLAGS -std=gnu++11"
 
 AC_CONFIG_FILES([Makefile])
diff --git a/cubemap.config.sample b/cubemap.config.sample
index 51c46d3..967059c 100644
--- a/cubemap.config.sample
+++ b/cubemap.config.sample
@@ -81,6 +81,20 @@ stream /test.ts src=http://gruessi.zrh.sesse.net:4013/test.ts src_encoding=raw
 # to be some reasonable fraction of your fragment length.
 stream /stream.mp4 src=http://gruessi.zrh.sesse.net:9095/test.mp4.metacube hls_playlist=/stream.m3u8 hls_frag_duration=6 backlog_size=20971520 hls_backlog_margin=1048576 allow_origin=*
 
+# An example of using pipe: to read from a program that generates Metacube
+# on standard output (or you can use src_encoding=raw for MPEG_TS).
+# This shows how to use FFmpeg, combined with a LD_PRELOAD hack
+# (the .so file is built with Cubemap if you have the FFmpeg development
+# libraries instealled), to receive MPEG-TS over SRT and remux it into MP4.
+# Note that using ffmpeg_metacube_hack.so is experimental and not recommended
+# if you can avoid it.
+#
+# Only one instance of the command will be started at a time (it will drive a
+# stream just like an HTTP input will). Such subprocesses will survive
+# a Cubemap reload, but can of course themselves not be live-reloaded
+# like Cubemap can. Standard error will be passed through unchanged.
+stream /srt.mp4 src=pipe:"LD_PRELOAD=ffmpeg_metacube_hack.so ffmpeg -loglevel warning -i 'srt://[::]:9710?mode=listener' -c:v copy -c:a copy -f mp4 -movflags empty_moov+frag_keyframe+default_base_moof+skip_trailer -frag_duration 125000 -bsf:a aac_adtstoasc -"
+
 # UDP input. TS is the most common container to use over UDP (you cannot
 # take any arbitrary container and expect it to work).
 # backlog_size=<number of bytes> overrides the backlog, which is normally 10 MB.
diff --git a/ffmpeg_metacube_hack.c b/ffmpeg_metacube_hack.c
new file mode 100644
index 0000000..4398cd6
--- /dev/null
+++ b/ffmpeg_metacube_hack.c
@@ -0,0 +1,356 @@
+// A shared library that you can LD_PRELOAD into an FFmpeg-using process
+// (most likely ffmpeg(1)) to make it output Metacube. This is obviously
+// pretty hacky, since it needs to override various FFmpeg functions,
+// so there are few guarantees here. It is written in C to avoid pulling
+// in the C++ runtime into FFmpeg's C-only world.
+//
+// You should not link to this library. It does not have ABI stability.
+// It is licensed the same as the rest of Cubemap.
+
+#define _GNU_SOURCE
+#include <assert.h>
+#include <dlfcn.h>
+#include <libavformat/avformat.h>
+#include <libavformat/avio.h>
+#include <libavutil/avassert.h>
+#include <libavutil/crc.h>
+#include <libavutil/error.h>
+#include <libavutil/intreadwrite.h>
+#include <limits.h>
+#include <pthread.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/signal.h>
+#include "metacube2.h"
+
+static pthread_once_t metacube2_crc_once_control = PTHREAD_ONCE_INIT;
+static AVCRC metacube2_crc_table[257];
+
+// We need to store some extra information for each context,
+// so this is where we do it. The âopaqueâ field in the AVIOContext
+// points to this struct, but we can also look it up by the AVIOContext
+// pointer by scanning through the singly linked list starting with
+// first_extra_data.
+struct ContextExtraData {
+	struct ContextExtraData *next;	// NULL for last entry.
+	AVIOContext *ctx;	// The context we are associating data with.
+	bool seen_sync_point;
+	void *old_opaque;
+	int (*old_write_data_type)(void *opaque, uint8_t * buf,
+				   int buf_size,
+				   enum AVIODataMarkerType type,
+				   int64_t time);
+
+	// Used during avformat_write_header(), to combine adjacent header blocks
+	// into one (in particular, the MP4 mux has an unneeded avio_flush()
+	// halfway throughout).
+	bool in_header;
+	int64_t header_first_time;
+	uint8_t *buffered_header;
+	size_t buffered_header_bytes;
+};
+static struct ContextExtraData *first_extra_data = NULL;
+
+// Look up ContextExtraData for the given context, creating a new one if needed.
+static struct ContextExtraData *get_extra_data(AVIOContext * ctx)
+{
+	for (struct ContextExtraData * ed = first_extra_data; ed != NULL;
+	     ed = ed->next) {
+		if (ed->ctx == ctx) {
+			return ed;
+		}
+	}
+	struct ContextExtraData *ed = (struct ContextExtraData *)
+	    malloc(sizeof(struct ContextExtraData));
+	ed->ctx = ctx;
+	ed->seen_sync_point = false;
+	ed->old_write_data_type = NULL;
+	ed->in_header = false;
+	ed->buffered_header = NULL;
+	ed->buffered_header_bytes = 0;
+
+	ed->next = first_extra_data;
+	first_extra_data = ed;
+	return ed;
+}
+
+// Clear ContextExtraData for a given context (presumably before it's freed).
+static void free_extra_data(AVIOContext * ctx)
+{
+	if (first_extra_data == NULL) {
+		return;
+	}
+	if (first_extra_data->ctx == ctx) {
+		struct ContextExtraData *to_free = first_extra_data;
+		first_extra_data = to_free->next;
+		free(to_free);
+		return;
+	}
+	for (struct ContextExtraData * ed = first_extra_data; ed != NULL;
+	     ed = ed->next) {
+		if (ed->next != NULL && ed->next->ctx == ctx) {
+			struct ContextExtraData *to_free = ed->next;
+			ed->next = to_free->next;
+			free(to_free);
+			return;
+		}
+	}
+}
+
+static void metacube2_crc_init_table_once(void)
+{
+	av_assert0(av_crc_init
+		   (metacube2_crc_table, 0, 16, 0x8fdb,
+		    sizeof(metacube2_crc_table)) >= 0);
+}
+
+static uint16_t metacube2_compute_crc_ff(const struct
+					 metacube2_block_header *hdr)
+{
+	static const int data_len = sizeof(hdr->size) + sizeof(hdr->flags);
+	const uint8_t *data = (uint8_t *) & hdr->size;
+	uint16_t crc;
+
+	pthread_once(&metacube2_crc_once_control,
+		     metacube2_crc_init_table_once);
+
+	// Metacube2 specifies a CRC start of 0x1234, but its pycrc-derived CRC
+	// includes a finalization step that is done somewhat differently in av_crc().
+	// 0x1234 alone sent through that finalization becomes 0x394a, and then we
+	// need a byte-swap of the CRC value (both on input and output) to account for
+	// differing conventions.
+	crc = av_crc(metacube2_crc_table, 0x4a39, data, data_len);
+	return av_bswap16(crc);
+}
+
+static int write_packet(void *opaque, uint8_t * buf, int buf_size,
+			enum AVIODataMarkerType type, int64_t time)
+{
+	if (buf_size < 0) {
+		return AVERROR(EINVAL);
+	}
+
+	struct ContextExtraData *ed = (struct ContextExtraData *) opaque;
+
+	if (ed->in_header) {
+		if (ed->buffered_header_bytes == 0) {
+			ed->header_first_time = time;
+		}
+
+		size_t new_buffered_header_bytes =
+		    ed->buffered_header_bytes + buf_size;
+		if (new_buffered_header_bytes < ed->buffered_header_bytes) {
+			return AVERROR(ENOMEM);
+		}
+		ed->buffered_header =
+		    (uint8_t *) realloc(ed->buffered_header,
+					new_buffered_header_bytes);
+		if (ed->buffered_header == NULL) {
+			return AVERROR(ENOMEM);
+		}
+
+		memcpy(ed->buffered_header + ed->buffered_header_bytes,
+		       buf, buf_size);
+		ed->buffered_header_bytes = new_buffered_header_bytes;
+		return buf_size;
+	}
+	// Find block size if we add a Metacube2 header in front.
+	unsigned new_buf_size =
+	    (unsigned) buf_size + sizeof(struct metacube2_block_header);
+	if (new_buf_size < (unsigned) buf_size
+	    || new_buf_size > (unsigned) INT_MAX) {
+		// Overflow.
+		return -1;
+	}
+	// Fill in the header.
+	struct metacube2_block_header hdr;
+	int flags = 0;
+	if (type == AVIO_DATA_MARKER_SYNC_POINT)
+		ed->seen_sync_point = 1;
+	else if (type == AVIO_DATA_MARKER_HEADER)
+		// NOTE: If there are multiple blocks marked METACUBE_FLAGS_HEADER,
+		// only the last one will count. This may become a problem if the
+		// mux flushes halfway through the stream header; if so, we would
+		// need to keep track of and concatenate the different parts.
+		flags |= METACUBE_FLAGS_HEADER;
+	else if (ed->seen_sync_point)
+		flags |= METACUBE_FLAGS_NOT_SUITABLE_FOR_STREAM_START;
+
+	memcpy(hdr.sync, METACUBE2_SYNC, sizeof(hdr.sync));
+	AV_WB32(&hdr.size, buf_size);
+	AV_WB16(&hdr.flags, flags);
+	AV_WB16(&hdr.csum, metacube2_compute_crc_ff(&hdr));
+
+	int ret;
+	ed->ctx->opaque = ed->old_opaque;
+	if (new_buf_size < ed->ctx->max_packet_size) {
+		// Combine the two packets. (This is what we normally want.)
+		// So we allocate a new block, with a Metacube2 header in front.
+		uint8_t *buf_with_hdr = (uint8_t *) malloc(new_buf_size);
+		if (buf_with_hdr == NULL) {
+			return AVERROR(ENOMEM);
+		}
+		memcpy(buf_with_hdr, &hdr, sizeof(hdr));
+		memcpy(buf_with_hdr + sizeof(hdr), buf, buf_size);
+		if (ed->old_write_data_type) {
+			ret =
+			    ed->old_write_data_type(ed->old_opaque,
+						    buf_with_hdr,
+						    new_buf_size, type,
+						    time);
+		} else {
+			ret =
+			    ed->ctx->write_packet(ed->old_opaque,
+						  buf_with_hdr,
+						  new_buf_size);
+		}
+		free(buf_with_hdr);
+
+		if (ret >= 0
+		    && ret >= sizeof(struct metacube2_block_header)) {
+			ret -= sizeof(struct metacube2_block_header);
+		}
+	} else {
+		// Send separately. This will split a header block if it's really large,
+		// which we don't want, but that's how things are.
+		if (ed->old_write_data_type) {
+			ret =
+			    ed->old_write_data_type(ed->old_opaque,
+						    (uint8_t *) & hdr,
+						    sizeof(hdr), type,
+						    time);
+		} else {
+			ret =
+			    ed->ctx->write_packet(ed->old_opaque,
+						  (uint8_t *) & hdr,
+						  sizeof(hdr));
+		}
+		if (ret < 0) {
+			return ret;
+		}
+		if (ret != sizeof(hdr)) {
+			return AVERROR(EIO);
+		}
+
+		if (ed->old_write_data_type) {
+			ret =
+			    ed->old_write_data_type(ed->old_opaque, buf,
+						    buf_size, type, time);
+		} else {
+			ret =
+			    ed->ctx->write_packet(ed->old_opaque, buf,
+						  buf_size);
+		}
+	}
+
+	ed->ctx->opaque = ed;
+	return ret;
+}
+
+// Actual hooked functions below.
+
+int avformat_write_header(AVFormatContext * ctx, AVDictionary ** options)
+{
+	metacube2_crc_init_table_once();
+
+	struct ContextExtraData *ed = get_extra_data(ctx->pb);
+	ed->old_opaque = ctx->pb->opaque;
+	ed->old_write_data_type = ctx->pb->write_data_type;
+	ctx->pb->opaque = ed;
+	ctx->pb->write_data_type = write_packet;
+	ctx->pb->seek = NULL;
+	ctx->pb->seekable = 0;
+	if (ed->old_write_data_type == NULL) {
+		ctx->pb->ignore_boundary_point = 1;
+	}
+
+	int (*original_func)(AVFormatContext * ctx,
+			     AVDictionary ** options);
+	original_func = dlsym(RTLD_NEXT, "avformat_write_header");
+
+	ed->in_header = true;
+	int ret = (*original_func) (ctx, options);
+	ed->in_header = false;
+
+	if (ed->buffered_header_bytes > 0) {
+		int hdr_ret = write_packet(ed, ed->buffered_header,
+					   ed->buffered_header_bytes,
+					   AVIO_DATA_MARKER_HEADER,
+					   ed->header_first_time);
+		free(ed->buffered_header);
+		ed->buffered_header = NULL;
+
+		if (hdr_ret >= 0 && hdr_ret < ed->buffered_header_bytes) {
+			hdr_ret = AVERROR(EIO);
+		}
+		ed->buffered_header_bytes = 0;
+		if (hdr_ret < 0) {
+			return hdr_ret;
+		}
+	}
+
+	return ret;
+}
+
+void avformat_free_context(AVFormatContext * ctx)
+{
+	if (ctx == NULL) {
+		return;
+	}
+	free_extra_data(ctx->pb);
+
+	void (*original_func)(AVFormatContext * ctx);
+	original_func = dlsym(RTLD_NEXT, "avformat_free_context");
+	return (*original_func) (ctx);
+}
+
+// Hook so that we can restore opaque instead of ours being freed by the caller.
+int avio_close(AVIOContext * ctx)
+{
+	if (ctx == NULL) {
+		return 0;
+	}
+	struct ContextExtraData ed = *get_extra_data(ctx);
+	free_extra_data(ctx);
+	ctx->opaque = ed.old_opaque;
+
+	int (*original_func)(AVIOContext * ctx);
+	original_func = dlsym(RTLD_NEXT, "avio_close");
+	return (*original_func) (ctx);
+}
+
+// Identical to FFmpeg's definition, but we cannot hook avio_close()
+// when called from FFmpeg's avio_closep(), so we need to hook this one
+// as well.
+int avio_closep(AVIOContext ** s)
+{
+	int ret = avio_close(*s);
+	*s = NULL;
+	return ret;
+}
+
+
+int avio_open2(AVIOContext ** s, const char *filename, int flags,
+	       const AVIOInterruptCB * int_cb, AVDictionary ** options)
+{
+	// The options, if any, are destroyed on entry, so we can add new ones
+	// pretty freely.
+	if (options && *options) {
+		AVDictionaryEntry *listen =
+		    av_dict_get(*options, "listen", NULL,
+				AV_DICT_MATCH_CASE);
+		if (listen != NULL && atoi(listen->value) != 0) {
+			// If -listen is set, we'll want to add a header, too.
+			av_dict_set(options, "headers",
+				    "Content-encoding: metacube\r\n",
+				    AV_DICT_APPEND);
+		}
+	}
+
+	int (*original_func)(AVIOContext ** s, const char *filename,
+			     int flags, const AVIOInterruptCB * int_cb,
+			     AVDictionary ** options);
+	original_func = dlsym(RTLD_NEXT, "avio_open2");
+	return (*original_func) (s, filename, flags, int_cb, options);
+}
-- 
2.39.2