From: Steinar H. Gunderson <sgunderson@bigfoot.com>
Date: Mon, 4 Apr 2016 19:22:26 +0000 (+0200)
Subject: Add support for uncompressed video instead of H.264 (while still storing H.264 to... 
X-Git-Tag: 1.2.0~9
X-Git-Url: https://git.sesse.net/?p=nageru;a=commitdiff_plain;h=bb8cf44c90b78921e6c6b2f62e232addff78b5ed

Add support for uncompressed video instead of H.264 (while still storing H.264 to file).

Note that microhttpd and VLC is really inefficient for large data amounts, so
this is not actually a CPU win right now.
---

diff --git a/flags.cpp b/flags.cpp
index 21e71e9..c06be70 100644
--- a/flags.cpp
+++ b/flags.cpp
@@ -11,6 +11,7 @@ void parse_flags(int argc, char * const argv[])
 	static const option long_options[] = {
 		{ "num-cards", required_argument, 0, 'c' },
 		{ "va-display", required_argument, 0, 1000 },
+		{ "http-uncompressed-video", no_argument, 0, 1001 },
 		{ 0, 0, 0, 0 }
 	};
 	for ( ;; ) {
@@ -27,6 +28,9 @@ void parse_flags(int argc, char * const argv[])
 		case 1000:
 			global_flags.va_display = optarg;
 			break;
+		case 1001:
+			global_flags.uncompressed_video_to_http = true;
+			break;
 		default:
 			fprintf(stderr, "Unknown option '%s'\n", argv[option_index]);
 			exit(1);
diff --git a/flags.h b/flags.h
index 52a9a50..2a65c24 100644
--- a/flags.h
+++ b/flags.h
@@ -6,6 +6,7 @@
 struct Flags {
 	int num_cards = 2;
 	std::string va_display;
+	bool uncompressed_video_to_http = false;
 };
 extern Flags global_flags;
 
diff --git a/h264encode.cpp b/h264encode.cpp
index 876e956..fbeeba7 100644
--- a/h264encode.cpp
+++ b/h264encode.cpp
@@ -38,6 +38,7 @@ extern "C" {
 
 #include "context.h"
 #include "defs.h"
+#include "flags.h"
 #include "httpd.h"
 #include "timebase.h"
 
@@ -113,6 +114,82 @@ typedef struct __bitstream bitstream;
 
 using namespace std;
 
+// H.264 video comes out in encoding order (e.g. with two B-frames:
+// 0, 3, 1, 2, 6, 4, 5, etc.), but uncompressed video needs to
+// come in the right order. Since we do everything, including waiting
+// for the frames to come out of OpenGL, in encoding order, we need
+// a reordering buffer for uncompressed frames so that they come out
+// correctly. We go the super-lazy way of not making it understand
+// anything about the true order (which introduces some extra latency,
+// though); we know that for N B-frames we need at most (N-1) frames
+// in the reorder buffer, and can just sort on that.
+//
+// The class also deals with keeping a freelist as needed.
+class FrameReorderer {
+public:
+	FrameReorderer(unsigned queue_length, int width, int height);
+
+	// Returns the next frame to insert with its pts, if any. Otherwise -1 and nullptr.
+	// Does _not_ take ownership of data; a copy is taken if needed.
+	// The returned pointer is valid until the next call to reorder_frame, or destruction.
+	// As a special case, if queue_length == 0, will just return pts and data (no reordering needed).
+	pair<int64_t, const uint8_t *> reorder_frame(int64_t pts, const uint8_t *data);
+
+	// The same as reorder_frame, but without inserting anything. Used to empty the queue.
+	pair<int64_t, const uint8_t *> get_first_frame();
+
+	bool empty() const { return frames.empty(); }
+
+private:
+	unsigned queue_length;
+	int width, height;
+
+	priority_queue<pair<int64_t, uint8_t *>> frames;
+	stack<uint8_t *> freelist;  // Includes the last value returned from reorder_frame.
+
+	// Owns all the pointers. Normally, freelist and frames could do this themselves,
+	// except priority_queue doesn't work well with movable-only types.
+	vector<unique_ptr<uint8_t[]>> owner;
+};
+
+FrameReorderer::FrameReorderer(unsigned queue_length, int width, int height)
+    : queue_length(queue_length), width(width), height(height)
+{
+	for (unsigned i = 0; i < queue_length; ++i) {
+		owner.emplace_back(new uint8_t[width * height * 2]);
+		freelist.push(owner.back().get());
+	}
+}
+
+pair<int64_t, const uint8_t *> FrameReorderer::reorder_frame(int64_t pts, const uint8_t *data)
+{
+	if (queue_length == 0) {
+		return make_pair(pts, data);
+	}
+
+	assert(!freelist.empty());
+	uint8_t *storage = freelist.top();
+	freelist.pop();
+	memcpy(storage, data, width * height * 2);
+	frames.emplace(-pts, storage);  // Invert pts to get smallest first.
+
+	if (frames.size() >= queue_length) {
+		return get_first_frame();
+	} else {
+		return make_pair(-1, nullptr);
+	}
+}
+
+pair<int64_t, const uint8_t *> FrameReorderer::get_first_frame()
+{
+	assert(!frames.empty());
+	pair<int64_t, uint8_t *> storage = frames.top();
+	frames.pop();
+	int64_t pts = storage.first;
+	freelist.push(storage.second);
+	return make_pair(-pts, storage.second);  // Re-invert pts (see reorder_frame()).
+}
+
 class H264EncoderImpl {
 public:
 	H264EncoderImpl(QSurface *surface, const string &va_display, int width, int height, HTTPD *httpd);
@@ -137,6 +214,7 @@ private:
 
 	void encode_thread_func();
 	void encode_remaining_frames_as_p(int encoding_frame_num, int gop_start_display_frame_num, int64_t last_dts);
+	void add_packet_for_uncompressed_frame(int64_t pts, const uint8_t *data);
 	void encode_frame(PendingFrame frame, int encoding_frame_num, int display_frame_num, int gop_start_display_frame_num,
 	                  int frame_type, int64_t pts, int64_t dts);
 	void storage_task_thread();
@@ -188,6 +266,7 @@ private:
 	AVCodecContext *context_audio;
 	AVFrame *audio_frame = nullptr;
 	HTTPD *httpd;
+	unique_ptr<FrameReorderer> reorderer;
 
 	Display *x11_display = nullptr;
 
@@ -856,6 +935,10 @@ VADisplay H264EncoderImpl::va_open_display(const string &va_display)
 			return NULL;
 		}
 		use_zerocopy = true;
+		if (global_flags.uncompressed_video_to_http) {
+			fprintf(stderr, "Disabling zerocopy H.264 encoding due to --uncompressed_video_to_http.\n");
+			use_zerocopy = false;
+		}
 		return vaGetDisplay(x11_display);
 	} else if (va_display[0] != '/') {
 		x11_display = XOpenDisplay(va_display.c_str());
@@ -864,6 +947,10 @@ VADisplay H264EncoderImpl::va_open_display(const string &va_display)
 			return NULL;
 		}
 		use_zerocopy = true;
+		if (global_flags.uncompressed_video_to_http) {
+			fprintf(stderr, "Disabling zerocopy H.264 encoding due to --uncompressed_video_to_http.\n");
+			use_zerocopy = false;
+		}
 		return vaGetDisplay(x11_display);
 	} else {
 		drm_fd = open(va_display.c_str(), O_RDWR);
@@ -1524,7 +1611,8 @@ void H264EncoderImpl::save_codeddata(storage_task task)
             pkt.flags = 0;
         }
         //pkt.duration = 1;
-        httpd->add_packet(pkt, task.pts + global_delay, task.dts + global_delay);
+        httpd->add_packet(pkt, task.pts + global_delay, task.dts + global_delay,
+		global_flags.uncompressed_video_to_http ? HTTPD::DESTINATION_FILE_ONLY : HTTPD::DESTINATION_FILE_AND_HTTP);
     }
     // Encode and add all audio frames up to and including the pts of this video frame.
     for ( ;; ) {
@@ -1569,7 +1657,7 @@ void H264EncoderImpl::save_codeddata(storage_task task)
         avcodec_encode_audio2(context_audio, &pkt, audio_frame, &got_output);
         if (got_output) {
             pkt.stream_index = 1;
-            httpd->add_packet(pkt, audio_pts + global_delay, audio_pts + global_delay);
+            httpd->add_packet(pkt, audio_pts + global_delay, audio_pts + global_delay, HTTPD::DESTINATION_FILE_AND_HTTP);
         }
         // TODO: Delayed frames.
         av_frame_unref(audio_frame);
@@ -1672,6 +1760,10 @@ H264EncoderImpl::H264EncoderImpl(QSurface *surface, const string &va_display, in
 
 	//print_input();
 
+	if (global_flags.uncompressed_video_to_http) {
+		reorderer.reset(new FrameReorderer(ip_period - 1, frame_width, frame_height));
+	}
+
 	init_va(va_display);
 	setup_encode();
 
@@ -1917,6 +2009,26 @@ void H264EncoderImpl::encode_remaining_frames_as_p(int encoding_frame_num, int g
 		encode_frame(frame, encoding_frame_num++, display_frame_num, gop_start_display_frame_num, FRAME_P, frame.pts, dts);
 		last_dts = dts;
 	}
+
+	if (global_flags.uncompressed_video_to_http) {
+		// Add frames left in reorderer.
+		while (!reorderer->empty()) {
+			pair<int64_t, const uint8_t *> output_frame = reorderer->get_first_frame();
+			add_packet_for_uncompressed_frame(output_frame.first, output_frame.second);
+		}
+	}
+}
+
+void H264EncoderImpl::add_packet_for_uncompressed_frame(int64_t pts, const uint8_t *data)
+{
+	AVPacket pkt;
+	memset(&pkt, 0, sizeof(pkt));
+	pkt.buf = nullptr;
+	pkt.data = const_cast<uint8_t *>(data);
+	pkt.size = frame_width * frame_height * 2;
+	pkt.stream_index = 0;
+	pkt.flags = AV_PKT_FLAG_KEY;
+	httpd->add_packet(pkt, pts, pts, HTTPD::DESTINATION_HTTP_ONLY);
 }
 
 namespace {
@@ -1970,6 +2082,15 @@ void H264EncoderImpl::encode_frame(H264EncoderImpl::PendingFrame frame, int enco
 
 		va_status = vaUnmapBuffer(va_dpy, surf->surface_image.buf);
 		CHECK_VASTATUS(va_status, "vaUnmapBuffer");
+
+		if (global_flags.uncompressed_video_to_http) {
+			// Add uncompressed video. (Note that pts == dts here.)
+			const int64_t global_delay = int64_t(ip_period - 1) * (TIMEBASE / MAX_FPS);  // Needs to match audio.
+			pair<int64_t, const uint8_t *> output_frame = reorderer->reorder_frame(pts + global_delay, reinterpret_cast<uint8_t *>(surf->y_ptr));
+			if (output_frame.second != nullptr) {
+				add_packet_for_uncompressed_frame(output_frame.first, output_frame.second);
+			}
+		}
 	}
 
 	va_status = vaDestroyImage(va_dpy, surf->surface_image.image_id);
diff --git a/httpd.cpp b/httpd.cpp
index 401509d..d9bc022 100644
--- a/httpd.cpp
+++ b/httpd.cpp
@@ -19,6 +19,7 @@ extern "C" {
 #include "httpd.h"
 
 #include "defs.h"
+#include "flags.h"
 #include "timebase.h"
 
 struct MHD_Connection;
@@ -41,13 +42,15 @@ void HTTPD::start(int port)
 	                 MHD_OPTION_END);
 }
 
-void HTTPD::add_packet(const AVPacket &pkt, int64_t pts, int64_t dts)
+void HTTPD::add_packet(const AVPacket &pkt, int64_t pts, int64_t dts, PacketDestination destination)
 {
 	unique_lock<mutex> lock(streams_mutex);
-	for (Stream *stream : streams) {
-		stream->add_packet(pkt, pts, dts);
+	if (destination != DESTINATION_FILE_ONLY) {
+		for (Stream *stream : streams) {
+			stream->add_packet(pkt, pts, dts);
+		}
 	}
-	if (file_mux) {
+	if (file_mux && destination != DESTINATION_HTTP_ONLY) {
 		file_mux->add_packet(pkt, pts, dts);
 	}
 }
@@ -67,7 +70,7 @@ void HTTPD::open_output_file(const string &filename)
 		exit(1);
 	}
 
-	file_mux.reset(new Mux(avctx, width, height));
+	file_mux.reset(new Mux(avctx, width, height, Mux::CODEC_NV12));
 }
 
 void HTTPD::close_output_file()
@@ -135,10 +138,10 @@ void HTTPD::request_completed(struct MHD_Connection *connection, void **con_cls,
 	}
 }
 
-HTTPD::Mux::Mux(AVFormatContext *avctx, int width, int height)
+HTTPD::Mux::Mux(AVFormatContext *avctx, int width, int height, Codec codec)
 	: avctx(avctx)
 {
-	AVCodec *codec_video = avcodec_find_encoder(AV_CODEC_ID_H264);
+	AVCodec *codec_video = avcodec_find_encoder((codec == CODEC_H264) ? AV_CODEC_ID_H264 : AV_CODEC_ID_RAWVIDEO);
 	avstream_video = avformat_new_stream(avctx, codec_video);
 	if (avstream_video == nullptr) {
 		fprintf(stderr, "avformat_new_stream() failed\n");
@@ -146,7 +149,13 @@ HTTPD::Mux::Mux(AVFormatContext *avctx, int width, int height)
 	}
 	avstream_video->time_base = AVRational{1, TIMEBASE};
 	avstream_video->codec->codec_type = AVMEDIA_TYPE_VIDEO;
-	avstream_video->codec->codec_id = AV_CODEC_ID_H264;
+	if (codec == CODEC_H264) {
+		avstream_video->codec->codec_id = AV_CODEC_ID_H264;
+	} else {
+		assert(codec == CODEC_NV12);
+		avstream_video->codec->codec_id = AV_CODEC_ID_RAWVIDEO;
+		avstream_video->codec->codec_tag = avcodec_pix_fmt_to_codec_tag(AV_PIX_FMT_NV12);
+	}
 	avstream_video->codec->width = width;
 	avstream_video->codec->height = height;
 	avstream_video->codec->time_base = AVRational{1, TIMEBASE};
@@ -236,9 +245,17 @@ HTTPD::Stream::Stream(AVOutputFormat *oformat, int width, int height)
 	avctx->oformat = oformat;
 	uint8_t *buf = (uint8_t *)av_malloc(MUX_BUFFER_SIZE);
 	avctx->pb = avio_alloc_context(buf, MUX_BUFFER_SIZE, 1, this, nullptr, &HTTPD::Stream::write_packet_thunk, nullptr);
+
+	Mux::Codec codec;
+	if (global_flags.uncompressed_video_to_http) {
+		codec = Mux::CODEC_NV12;
+	} else {
+		codec = Mux::CODEC_H264;
+	}
+
 	avctx->flags = AVFMT_FLAG_CUSTOM_IO;
 
-	mux.reset(new Mux(avctx, width, height));
+	mux.reset(new Mux(avctx, width, height, codec));
 }
 
 ssize_t HTTPD::Stream::reader_callback_thunk(void *cls, uint64_t pos, char *buf, size_t max)
diff --git a/httpd.h b/httpd.h
index 88aeecc..f5bf55f 100644
--- a/httpd.h
+++ b/httpd.h
@@ -29,9 +29,15 @@ extern "C" {
 
 class HTTPD {
 public:
+	enum PacketDestination {
+		DESTINATION_FILE_ONLY,
+		DESTINATION_HTTP_ONLY,
+		DESTINATION_FILE_AND_HTTP
+	};
+
 	HTTPD(int width, int height);
 	void start(int port);
-	void add_packet(const AVPacket &pkt, int64_t pts, int64_t dts);
+	void add_packet(const AVPacket &pkt, int64_t pts, int64_t dts, PacketDestination destination);
 
 	// You can only have one going at the same time.
 	void open_output_file(const std::string &filename);
@@ -56,7 +62,12 @@ private:
 
 	class Mux {
 	public:
-		Mux(AVFormatContext *avctx, int width, int height);  // Takes ownership of avctx.
+		enum Codec {
+			CODEC_H264,
+			CODEC_NV12,  // Uncompressed 4:2:0.
+		};
+
+		Mux(AVFormatContext *avctx, int width, int height, Codec codec);  // Takes ownership of avctx.
 		~Mux();
 		void add_packet(const AVPacket &pkt, int64_t pts, int64_t dts);