From 33b86069c755119da2a35af63fbd580ca9abfa7c Mon Sep 17 00:00:00 2001
From: "Steinar H. Gunderson" <sgunderson@bigfoot.com>
Date: Thu, 28 Mar 2019 18:57:57 +0100
Subject: [PATCH] Make Futatabi accept and record the audio, although it cannot
 use it for anything yet.

---
 futatabi/db.cpp            |  7 +++++
 futatabi/frame.proto       |  2 ++
 futatabi/frame_on_disk.cpp |  3 +++
 futatabi/frame_on_disk.h   |  7 +++--
 futatabi/main.cpp          | 52 +++++++++++++++++++++++++++++++++-----
 5 files changed, 62 insertions(+), 9 deletions(-)
diff --git a/futatabi/db.cpp b/futatabi/db.cpp
index 6764f84..762c2fd 100644
--- a/futatabi/db.cpp
+++ b/futatabi/db.cpp
@@ -244,6 +244,11 @@ vector<DB::FrameOnDiskAndStreamIdx> DB::load_frame_file(const string &filename,
 			frame.frame.pts = stream.pts(i);
 			frame.frame.offset = stream.offset(i);
 			frame.frame.size = stream.file_size(i);
+			if (i < stream.audio_size_size()) {
+				frame.frame.audio_size = stream.audio_size(i);
+			} else {
+				frame.frame.audio_size = 0;
+			}
 			frames.push_back(frame);
 		}
 	}
@@ -274,6 +279,7 @@ void DB::store_frame_file(const string &filename, size_t size, const vector<Fram
 		stream->mutable_pts()->Reserve(frames.size());
 		stream->mutable_offset()->Reserve(frames.size());
 		stream->mutable_file_size()->Reserve(frames.size());
+		stream->mutable_audio_size()->Reserve(frames.size());
 		for (const FrameOnDiskAndStreamIdx &frame : frames) {
 			if (frame.stream_idx != stream_idx) {
 				continue;
@@ -281,6 +287,7 @@ void DB::store_frame_file(const string &filename, size_t size, const vector<Fram
 			stream->add_pts(frame.frame.pts);
 			stream->add_offset(frame.frame.offset);
 			stream->add_file_size(frame.frame.size);
+			stream->add_audio_size(frame.frame.size);
 		}
 	}
 	string serialized;
diff --git a/futatabi/frame.proto b/futatabi/frame.proto
index c8807fd..a0cc392 100644
--- a/futatabi/frame.proto
+++ b/futatabi/frame.proto
@@ -11,6 +11,7 @@ message FrameHeaderProto {
 	int32 stream_idx = 1;
 	int64 pts = 2;
 	int64 file_size = 3;  // In bytes of compressed frame. TODO: rename to size.
+	int32 audio_size = 4;  // In bytes of uncompressed 32-bit 48kHz stereo PCM. Can be zero.
 }
 
 message StreamContentsProto {
@@ -18,6 +19,7 @@ message StreamContentsProto {
 	repeated int64 pts = 2 [packed=true];
 	repeated int64 file_size = 3 [packed=true];
 	repeated int64 offset = 4 [packed=true];
+	repeated int32 audio_size = 5 [packed=true];
 }
 
 message FileContentsProto {
diff --git a/futatabi/frame_on_disk.cpp b/futatabi/frame_on_disk.cpp
index 971cafd..9a87094 100644
--- a/futatabi/frame_on_disk.cpp
+++ b/futatabi/frame_on_disk.cpp
@@ -76,12 +76,15 @@ string FrameReader::read_frame(FrameOnDisk frame)
 		++metric_frame_opened_files;
 	}
 
+	// TODO: Read the audio.
+
 	string str;
 	str.resize(frame.size);
 	off_t offset = 0;
 	while (offset < frame.size) {
 		int ret = pread(fd, &str[offset], frame.size - offset, frame.offset + offset);
 		if (ret <= 0) {
+			string filename = frame_filenames[frame.filename_idx];
 			perror("pread");
 			abort();
 		}
diff --git a/futatabi/frame_on_disk.h b/futatabi/frame_on_disk.h
index 6d1d397..dbe1211 100644
--- a/futatabi/frame_on_disk.h
+++ b/futatabi/frame_on_disk.h
@@ -14,7 +14,9 @@ struct FrameOnDisk {
 	int64_t pts = -1;  // -1 means empty.
 	off_t offset;
 	unsigned filename_idx;
-	uint32_t size;  // Not using size_t saves a few bytes; we can have so many frames.
+	uint32_t size;  // Not using size_t saves a few bytes; we can have so many frames. TODO: Not anymore due to audio_size.
+	uint32_t audio_size;
+	// Unfortunately, 32 bits wasted in padding here.
 };
 extern std::vector<FrameOnDisk> frames[MAX_STREAMS];  // Under frame_mu.
 extern std::vector<std::string> frame_filenames;  // Under frame_mu.
@@ -24,7 +26,8 @@ static bool inline operator==(const FrameOnDisk &a, const FrameOnDisk &b)
 	return a.pts == b.pts &&
 		a.offset == b.offset &&
 		a.filename_idx == b.filename_idx &&
-		a.size == b.size;
+		a.size == b.size &&
+		a.audio_size == b.audio_size;
 }
 
 // A helper class to read frames from disk. It caches the file descriptor
diff --git a/futatabi/main.cpp b/futatabi/main.cpp
index a4b1e42..19cd5fb 100644
--- a/futatabi/main.cpp
+++ b/futatabi/main.cpp
@@ -75,7 +75,7 @@ Summary metric_received_frame_size_bytes;
 
 namespace {
 
-FrameOnDisk write_frame(int stream_idx, int64_t pts, const uint8_t *data, size_t size, DB *db)
+FrameOnDisk write_frame(int stream_idx, int64_t pts, const uint8_t *data, size_t size, vector<uint32_t> audio, DB *db)
 {
 	if (open_frame_files.count(stream_idx) == 0) {
 		char filename[256];
@@ -105,6 +105,7 @@ FrameOnDisk write_frame(int stream_idx, int64_t pts, const uint8_t *data, size_t
 	hdr.set_stream_idx(stream_idx);
 	hdr.set_pts(pts);
 	hdr.set_file_size(size);
+	hdr.set_audio_size(audio.size() * sizeof(audio[0]));
 
 	string serialized;
 	if (!hdr.SerializeToString(&serialized)) {
@@ -130,6 +131,12 @@ FrameOnDisk write_frame(int stream_idx, int64_t pts, const uint8_t *data, size_t
 		perror("fwrite");
 		abort();
 	}
+	if (audio.size() > 0) {
+		if (fwrite(audio.data(), hdr.audio_size(), 1, file.fp) != 1) {
+			perror("fwrite");
+			exit(1);
+		}
+	}
 	fflush(file.fp);  // No fsync(), though. We can accept losing a few frames.
 	global_disk_space_estimator->report_write(filename, 8 + sizeof(len) + serialized.size() + size, pts);
 
@@ -138,6 +145,7 @@ FrameOnDisk write_frame(int stream_idx, int64_t pts, const uint8_t *data, size_t
 	frame.filename_idx = filename_idx;
 	frame.offset = offset;
 	frame.size = size;
+	frame.audio_size = audio.size() * sizeof(audio[0]);
 
 	{
 		lock_guard<mutex> lock(frame_mu);
@@ -371,9 +379,10 @@ void load_frame_file(const char *filename, const string &basename, unsigned file
 		}
 		frame.filename_idx = filename_idx;
 		frame.size = hdr.file_size();
+		frame.audio_size = hdr.audio_size();
 
-		if (frame.offset + frame.size > file_len ||
-		    fseek(fp, frame.offset + frame.size, SEEK_SET) == -1) {
+		if (frame.offset + frame.size + frame.audio_size > file_len ||
+		    fseek(fp, frame.offset + frame.size + frame.audio_size, SEEK_SET) == -1) {
 			fprintf(stderr, "WARNING: %s: Could not seek past frame (probably truncated).\n", filename);
 			break;
 		}
@@ -500,8 +509,22 @@ void record_thread_func()
 			continue;
 		}
 
-		int64_t last_pts = -1;
+		// Match any audio streams to video streams, sequentially.
+		vector<int> video_stream_idx, audio_stream_idx;
+		for (unsigned i = 0; i < format_ctx->nb_streams; ++i) {
+			if (format_ctx->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_VIDEO) {
+				video_stream_idx.push_back(i);
+			} else if (format_ctx->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_AUDIO) {
+				audio_stream_idx.push_back(i);
+			}
+		}
+		unordered_map<int, int> audio_stream_to_video_stream_idx;
+		for (size_t i = 0; i < min(video_stream_idx.size(), audio_stream_idx.size()); ++i) {
+			audio_stream_to_video_stream_idx[audio_stream_idx[i]] = video_stream_idx[i];
+		}
 
+		vector<uint32_t> pending_audio[MAX_STREAMS];
+		int64_t last_pts = -1;
 		while (!should_quit.load()) {
 			AVPacket pkt;
 			unique_ptr<AVPacket, decltype(av_packet_unref) *> pkt_cleanup(
@@ -515,8 +538,23 @@ void record_thread_func()
 			if (av_read_frame(format_ctx.get(), &pkt) != 0) {
 				break;
 			}
+
+			AVStream *stream = format_ctx->streams[pkt.stream_index];
+			if (stream->codecpar->codec_type == AVMEDIA_TYPE_AUDIO &&
+			    audio_stream_to_video_stream_idx.count(pkt.stream_index)) {
+				if ((pkt.size % (sizeof(uint32_t) * 2)) != 0) {
+					fprintf(stderr, "Audio stream %u had a packet of strange length %d, ignoring.\n",
+						pkt.stream_index, pkt.size);
+				} else {
+					// TODO: Endianness?
+					const uint32_t *begin = (const uint32_t *)pkt.data;
+					const uint32_t *end = (const uint32_t *)(pkt.data + pkt.size);
+					pending_audio[audio_stream_to_video_stream_idx[pkt.stream_index]].assign(begin, end);
+				}
+			}
+
 			if (pkt.stream_index >= MAX_STREAMS ||
-			    format_ctx->streams[pkt.stream_index]->codecpar->codec_type != AVMEDIA_TYPE_VIDEO) {
+			    stream->codecpar->codec_type != AVMEDIA_TYPE_VIDEO) {
 				continue;
 			}
 
@@ -524,7 +562,7 @@ void record_thread_func()
 			metric_received_frame_size_bytes.count_event(pkt.size);
 
 			// Convert pts to our own timebase.
-			AVRational stream_timebase = format_ctx->streams[pkt.stream_index]->time_base;
+			AVRational stream_timebase = stream->time_base;
 			int64_t pts = av_rescale_q(pkt.pts, stream_timebase, AVRational{ 1, TIMEBASE });
 
 			// Translate offset into our stream.
@@ -535,7 +573,7 @@ void record_thread_func()
 
 			//fprintf(stderr, "Got a frame from camera %d, pts = %ld, size = %d\n",
 			//      pkt.stream_index, pts, pkt.size);
-			FrameOnDisk frame = write_frame(pkt.stream_index, pts, pkt.data, pkt.size, &db);
+			FrameOnDisk frame = write_frame(pkt.stream_index, pts, pkt.data, pkt.size, move(pending_audio[pkt.stream_index]), &db);
 
 			post_to_main_thread([pkt, frame] {
 				global_mainwindow->display_frame(pkt.stream_index, frame);
-- 
2.39.2