From e15251d2787cb8e6b677af801de6180e55171763 Mon Sep 17 00:00:00 2001
From: "Steinar H. Gunderson" <sgunderson@bigfoot.com>
Date: Tue, 11 Feb 2020 22:42:53 +0100
Subject: [PATCH] Make Futatabi fades apply white balance.

When there are two different video streams involved, they will often
have different white points, so we can't just echo the Exif data back;
we need to apply it when converting, and then get back a result in
standard sRGB. It's not entirely correct since we still run with
crushed whites/black, but it's good enough.

This completes Nageru/Futatabi white balance round trip support.
---
 futatabi/exif_parser.cpp     | 99 ++++++++++++++++++++++++++++++++++++
 futatabi/exif_parser.h       | 16 ++++++
 futatabi/video_stream.cpp    | 11 +++-
 futatabi/ycbcr_converter.cpp | 52 ++++++++++++-------
 futatabi/ycbcr_converter.h   |  8 ++-
 meson.build                  |  3 +-
 6 files changed, 167 insertions(+), 22 deletions(-)
 create mode 100644 futatabi/exif_parser.cpp
 create mode 100644 futatabi/exif_parser.h
diff --git a/futatabi/exif_parser.cpp b/futatabi/exif_parser.cpp
new file mode 100644
index 0000000..316d3c4
--- /dev/null
+++ b/futatabi/exif_parser.cpp
@@ -0,0 +1,99 @@
+#include "exif_parser.h"
+
+#include <movit/colorspace_conversion_effect.h>
+#include <stdint.h>
+#include <Eigen/Core>
+#include <Eigen/LU>
+
+using namespace Eigen;
+using namespace movit;
+using namespace std;
+
+uint32_t read32be(const uint8_t *data)
+{
+	return (uint32_t(data[0]) << 24) |
+		(uint32_t(data[1]) << 16) |
+		(uint32_t(data[2]) <<  8) |
+		 uint32_t(data[3]);
+}
+
+uint16_t read16be(const uint8_t *data)
+{
+	return (uint16_t(data[0]) << 8) | uint16_t(data[1]);
+}
+
+RGBTriplet get_neutral_color(const string &exif)
+{
+	if (exif.empty()) {
+		return {1.0f, 1.0f, 1.0f};
+	}
+
+	const uint8_t *data = reinterpret_cast<const uint8_t *>(exif.data());
+
+	// Very rudimentary Exif parser (and probably integer-overflowable);
+	// we really only care about what Nageru sends us (MJPEGEncoder::init_jpeg_422()),
+	// but it would be nice to have a little bit of future-proofing, just in case.
+	if (exif.size() < 14 || memcmp(data, "Exif\0\0MM\0\x2a", 10) != 0) {
+		fprintf(stderr, "WARNING: Truncated or malformed Exif header, ignoring.\n");
+		return {1.0f, 1.0f, 1.0f};
+	}
+
+	// We only care about the first IFD.
+	uint32_t ifd_offset = read32be(data + 10);
+	ifd_offset += 6;  // Relative to the MM.
+
+	if (ifd_offset < 14 || ifd_offset >= exif.size()) {
+		fprintf(stderr, "WARNING: Truncated or malformed Exif IFD, ignoring.\n");
+		return {1.0f, 1.0f, 1.0f};
+	}
+
+	// Skip over number of tags (16 bits); if the white point is not the first one,
+	// we're bailing anyway.
+	if (ifd_offset + 2 > exif.size() || ifd_offset + 2 < ifd_offset) {
+		fprintf(stderr, "WARNING: Exif IFD has no rom for number of tags, ignoring.\n");
+		return {1.0f, 1.0f, 1.0f};
+	}
+
+	if (ifd_offset + 4 > exif.size() || ifd_offset + 4 < ifd_offset) {
+		fprintf(stderr, "WARNING: Exif IFD has no rom for tag, ignoring.\n");
+		return {1.0f, 1.0f, 1.0f};
+	}
+	uint16_t tag = read16be(data + ifd_offset + 2);
+	if (tag != 0x13e) {  // WhitePoint.
+		fprintf(stderr, "WARNING: Unexpected first Exif tag, ignoring.\n");
+		return {1.0f, 1.0f, 1.0f};
+	}
+
+	if (ifd_offset + 14 > exif.size() || ifd_offset + 14 < ifd_offset) {
+		fprintf(stderr, "WARNING: WhitePoint Exif tag was truncated, ignoring.\n");
+		return {1.0f, 1.0f, 1.0f};
+	}
+
+	// Just assume we're rational type and two values...
+	uint32_t white_point_offset = read32be(data + ifd_offset + 10);
+	white_point_offset += 6;  // Relative to the MM.
+
+	if (white_point_offset >= exif.size()) {
+		fprintf(stderr, "WARNING: WhitePoint Exif tag was out of bounds, ignoring.\n");
+		return {1.0f, 1.0f, 1.0f};
+	}
+	if (white_point_offset + 16 > exif.size()) {
+		fprintf(stderr, "WARNING: WhitePoint Exif tag was truncated, ignoring.\n");
+		return {1.0f, 1.0f, 1.0f};
+	}
+
+	uint32_t x_nom = read32be(data + white_point_offset);
+	uint32_t x_den = read32be(data + white_point_offset + 4);
+	uint32_t y_nom = read32be(data + white_point_offset + 8);
+	uint32_t y_den = read32be(data + white_point_offset + 12);
+
+	double x = double(x_nom) / x_den;
+	double y = double(y_nom) / y_den;
+	double z = 1.0 - x - y;
+
+	Matrix3d rgb_to_xyz_matrix = movit::ColorspaceConversionEffect::get_xyz_matrix(COLORSPACE_sRGB);
+	Vector3d rgb = rgb_to_xyz_matrix.inverse() * Vector3d(x, y, z);
+
+	return RGBTriplet(rgb[0], rgb[1], rgb[2]);
+}
+
diff --git a/futatabi/exif_parser.h b/futatabi/exif_parser.h
new file mode 100644
index 0000000..09700b1
--- /dev/null
+++ b/futatabi/exif_parser.h
@@ -0,0 +1,16 @@
+#ifndef _EXIF_PARSER_H
+#define _EXIF_PARSER_H
+
+#include <movit/effect.h>
+#include <string>
+
+class Frame;
+
+// Try to parse the WhitePoint tag in the given Exif data.
+// If the string is empty, or the tag is corrupted, or if it was
+// just more complicated than our makeshift parser could deal with,
+// returns (1.0, 1.0, 1.0), giving a regular D65 white point.
+movit::RGBTriplet get_neutral_color(const std::string &exif);
+
+#endif  // !defined(_EXIF_PARSER_H)
+
diff --git a/futatabi/video_stream.cpp b/futatabi/video_stream.cpp
index 9a120b5..d7d9fd0 100644
--- a/futatabi/video_stream.cpp
+++ b/futatabi/video_stream.cpp
@@ -6,6 +6,7 @@ extern "C" {
 }
 
 #include "chroma_subsampler.h"
+#include "exif_parser.h"
 #include "flags.h"
 #include "flow.h"
 #include "jpeg_frame_view.h"
@@ -22,6 +23,7 @@ extern "C" {
 #include <jpeglib.h>
 #include <unistd.h>
 
+using namespace movit;
 using namespace std;
 using namespace std::chrono;
 
@@ -526,12 +528,16 @@ void VideoStream::schedule_interpolated_frame(steady_clock::time_point local_pts
 		shared_ptr<Frame> frame2 = decode_jpeg_with_cache(secondary_frame, DECODE_IF_NOT_IN_CACHE, &frame_reader, &did_decode);
 
 		// Then fade against it, putting it into the fade Y' and CbCr textures.
-		ycbcr_semiplanar_converter->prepare_chain_for_fade_from_texture(qf.output_tex, global_flags.width, global_flags.height, frame2, fade_alpha)->render_to_fbo(resources->fade_fbo, global_flags.width, global_flags.height);
+		RGBTriplet neutral_color = get_neutral_color(qf.exif_data);
+		ycbcr_semiplanar_converter->prepare_chain_for_fade_from_texture(qf.output_tex, neutral_color, global_flags.width, global_flags.height, frame2, fade_alpha)->render_to_fbo(resources->fade_fbo, global_flags.width, global_flags.height);
 
 		// Subsample and split Cb/Cr.
 		chroma_subsampler->subsample_chroma(resources->fade_cbcr_output_tex, global_flags.width, global_flags.height, resources->cb_tex, resources->cr_tex);
 
 		interpolate_no_split->release_texture(qf.output_tex);
+
+		// We already applied the white balance, so don't have the client redo it.
+		qf.exif_data.clear();
 	} else {
 		tie(qf.output_tex, qf.cbcr_tex) = interpolate->exec(resources->input_tex, resources->gray_tex, flow_tex, global_flags.width, global_flags.height, alpha);
 		check_error();
@@ -710,9 +716,10 @@ void VideoStream::encode_thread_func()
 			glClientWaitSync(qf.fence.get(), /*flags=*/0, GL_TIMEOUT_IGNORED);
 
 			shared_ptr<Frame> frame = frame_from_pbo(qf.resources->pbo_contents, global_flags.width, global_flags.height);
+			assert(frame->exif_data.empty());
 
 			// Now JPEG encode it, and send it on to the stream.
-			string jpeg = encode_jpeg(frame->y.get(), frame->cb.get(), frame->cr.get(), global_flags.width, global_flags.height, move(frame->exif_data));
+			string jpeg = encode_jpeg(frame->y.get(), frame->cb.get(), frame->cr.get(), global_flags.width, global_flags.height, /*exif_data=*/"");
 
 			AVPacket pkt;
 			av_init_packet(&pkt);
diff --git a/futatabi/ycbcr_converter.cpp b/futatabi/ycbcr_converter.cpp
index ef402a5..2d2f32f 100644
--- a/futatabi/ycbcr_converter.cpp
+++ b/futatabi/ycbcr_converter.cpp
@@ -1,13 +1,16 @@
 #include "ycbcr_converter.h"
 
+#include "exif_parser.h"
 #include "flags.h"
 #include "jpeg_frame.h"
 
+#include <array>
 #include <movit/mix_effect.h>
+#include <movit/white_balance_effect.h>
 #include <movit/ycbcr_input.h>
 
-using namespace std;
 using namespace movit;
+using namespace std;
 
 namespace {
 
@@ -53,12 +56,12 @@ YCbCrConverter::YCbCrConverter(YCbCrConverter::OutputMode output_mode, ResourceP
 	// sources with no conversion, so we ought to have had false here.
 	// However, in the off chance that we're actually getting real MJPEG,
 	// we don't want to crush its blacks (or whites) by clamping. All of
-	// our processing is fades, so if we're in limited-range input, we'll
-	// stay in limited-range output. (Fading between limited-range and
-	// full-range sources will be broken, of course.) There will be some
-	// slight confusion in the parts of the algorithms dealing with RGB,
-	// but they're small and we'll manage.
-	ycbcr_format.full_range = true;
+	// our processing is fades or other linear operations, so if we're in
+	// limited-range input, we'll stay in limited-range output. (Fading
+	// between limited-range and full-range sources will be broken,
+	// of course.) There will be some slight confusion in the parts of the
+	// algorithms dealing with RGB, but they're small and we'll manage.
+	ycbcr_format.full_range = false;
 
 	YCbCrFormat ycbcr_output_format = ycbcr_format;
 	ycbcr_output_format.chroma_subsampling_x = 1;
@@ -77,19 +80,23 @@ YCbCrConverter::YCbCrConverter(YCbCrConverter::OutputMode output_mode, ResourceP
 	semiplanar_chain->set_dither_bits(8);
 	semiplanar_chain->finalize();
 
-	// Fade chains.
+	// Fade chains. These include white balance adjustments.
 	for (bool first_input_is_semiplanar : { false, true }) {
 		for (bool second_input_is_semiplanar : { false, true }) {
 			FadeChain &fade_chain = fade_chains[first_input_is_semiplanar][second_input_is_semiplanar];
 			fade_chain.chain.reset(new EffectChain(global_flags.width, global_flags.height, resource_pool));
-			fade_chain.input[0] = (movit::YCbCrInput *)fade_chain.chain->add_input(
+			fade_chain.input[0] = (YCbCrInput *)fade_chain.chain->add_input(
 				new YCbCrInput(inout_format, ycbcr_format, global_flags.width, global_flags.height,
 				               first_input_is_semiplanar ? YCBCR_INPUT_SPLIT_Y_AND_CBCR : YCBCR_INPUT_PLANAR));
-			fade_chain.input[1] = (movit::YCbCrInput *)fade_chain.chain->add_input(
+			fade_chain.input[1] = (YCbCrInput *)fade_chain.chain->add_input(
 				new YCbCrInput(inout_format, ycbcr_format, global_flags.width, global_flags.height,
 				               second_input_is_semiplanar ? YCBCR_INPUT_SPLIT_Y_AND_CBCR : YCBCR_INPUT_PLANAR));
-			fade_chain.mix_effect = (movit::MixEffect *)fade_chain.chain->add_effect(
-				new MixEffect, fade_chain.input[0], fade_chain.input[1]);
+			fade_chain.wb_effect[0] = (WhiteBalanceEffect *)fade_chain.chain->add_effect(
+				new WhiteBalanceEffect, fade_chain.input[0]);
+			fade_chain.wb_effect[1] = (WhiteBalanceEffect *)fade_chain.chain->add_effect(
+				new WhiteBalanceEffect, fade_chain.input[1]);
+			fade_chain.mix_effect = (MixEffect *)fade_chain.chain->add_effect(
+				new MixEffect, fade_chain.wb_effect[0], fade_chain.wb_effect[1]);
 			setup_outputs(output_mode, inout_format, ycbcr_output_format, fade_chain.chain.get());
 			fade_chain.chain->set_dither_bits(8);
 			fade_chain.chain->finalize();
@@ -103,17 +110,21 @@ YCbCrConverter::YCbCrConverter(YCbCrConverter::OutputMode output_mode, ResourceP
 		fade_chain.chain.reset(new EffectChain(global_flags.width, global_flags.height, resource_pool));
 
 		ycbcr_format.chroma_subsampling_x = 1;
-		fade_chain.input[0] = (movit::YCbCrInput *)fade_chain.chain->add_input(
+		fade_chain.input[0] = (YCbCrInput *)fade_chain.chain->add_input(
 			new YCbCrInput(inout_format, ycbcr_format, global_flags.width, global_flags.height,
 			               YCBCR_INPUT_INTERLEAVED));
 
 		ycbcr_format.chroma_subsampling_x = 2;
-		fade_chain.input[1] = (movit::YCbCrInput *)fade_chain.chain->add_input(
+		fade_chain.input[1] = (YCbCrInput *)fade_chain.chain->add_input(
 			new YCbCrInput(inout_format, ycbcr_format, global_flags.width, global_flags.height,
 			               second_input_is_semiplanar ? YCBCR_INPUT_SPLIT_Y_AND_CBCR : YCBCR_INPUT_PLANAR));
 
-		fade_chain.mix_effect = (movit::MixEffect *)fade_chain.chain->add_effect(
-			new MixEffect, fade_chain.input[0], fade_chain.input[1]);
+		fade_chain.wb_effect[0] = (WhiteBalanceEffect *)fade_chain.chain->add_effect(
+			new WhiteBalanceEffect, fade_chain.input[0]);
+		fade_chain.wb_effect[1] = (WhiteBalanceEffect *)fade_chain.chain->add_effect(
+			new WhiteBalanceEffect, fade_chain.input[1]);
+		fade_chain.mix_effect = (MixEffect *)fade_chain.chain->add_effect(
+			new MixEffect, fade_chain.wb_effect[0], fade_chain.wb_effect[1]);
 		setup_outputs(output_mode, inout_format, ycbcr_output_format, fade_chain.chain.get());
 		fade_chain.chain->set_dither_bits(8);
 		fade_chain.chain->finalize();
@@ -138,11 +149,15 @@ EffectChain *YCbCrConverter::prepare_chain_for_fade(shared_ptr<Frame> frame, sha
 	setup_input_for_frame(secondary_frame, ycbcr_format, fade_chain.input[1]);
 	bool ok = fade_chain.mix_effect->set_float("strength_first", 1.0f - fade_alpha);
 	ok |= fade_chain.mix_effect->set_float("strength_second", fade_alpha);
+	RGBTriplet neutral_color0 = get_neutral_color(frame->exif_data);
+	RGBTriplet neutral_color1 = get_neutral_color(secondary_frame->exif_data);
+	ok |= fade_chain.wb_effect[0]->set_vec3("neutral_color", (float *)&neutral_color0);
+	ok |= fade_chain.wb_effect[1]->set_vec3("neutral_color", (float *)&neutral_color1);
 	assert(ok);
 	return fade_chain.chain.get();
 }
 
-EffectChain *YCbCrConverter::prepare_chain_for_fade_from_texture(GLuint tex, unsigned width, unsigned height, std::shared_ptr<Frame> secondary_frame, float fade_alpha)
+EffectChain *YCbCrConverter::prepare_chain_for_fade_from_texture(GLuint tex, RGBTriplet tex_neutral_color, unsigned width, unsigned height, std::shared_ptr<Frame> secondary_frame, float fade_alpha)
 {
 	const FadeChain &fade_chain = interleaved_fade_chains[secondary_frame->is_semiplanar];
 	{
@@ -163,6 +178,9 @@ EffectChain *YCbCrConverter::prepare_chain_for_fade_from_texture(GLuint tex, uns
 	setup_input_for_frame(secondary_frame, ycbcr_format, fade_chain.input[1]);
 	bool ok = fade_chain.mix_effect->set_float("strength_first", 1.0f - fade_alpha);
 	ok |= fade_chain.mix_effect->set_float("strength_second", fade_alpha);
+	RGBTriplet neutral_color1 = get_neutral_color(secondary_frame->exif_data);
+	ok |= fade_chain.wb_effect[0]->set_vec3("neutral_color", (float *)&tex_neutral_color);
+	ok |= fade_chain.wb_effect[1]->set_vec3("neutral_color", (float *)&neutral_color1);
 	assert(ok);
 	return fade_chain.chain.get();
 }
diff --git a/futatabi/ycbcr_converter.h b/futatabi/ycbcr_converter.h
index 4618f1e..c2ad2fa 100644
--- a/futatabi/ycbcr_converter.h
+++ b/futatabi/ycbcr_converter.h
@@ -3,6 +3,7 @@
 
 #include <epoxy/gl.h>
 #include <memory>
+#include <movit/effect.h>
 #include <movit/ycbcr_input.h>
 
 namespace movit {
@@ -10,6 +11,7 @@ namespace movit {
 class EffectChain;
 class MixEffect;
 class ResourcePool;
+class WhiteBalanceEffect;
 struct YCbCrFormat;
 
 }  // namespace movit
@@ -25,12 +27,13 @@ public:
 	};
 	YCbCrConverter(OutputMode output_mode, movit::ResourcePool *resource_pool);
 
-	// Returns the appropriate chain for rendering.
+	// Returns the appropriate chain for rendering. Fades apply white balance,
+	// straight-up conversion does not.
 	movit::EffectChain *prepare_chain_for_conversion(std::shared_ptr<Frame> frame);
 	movit::EffectChain *prepare_chain_for_fade(std::shared_ptr<Frame> frame, std::shared_ptr<Frame> secondary_frame, float fade_alpha);
 
 	// <tex> must be interleaved Y'CbCr.
-	movit::EffectChain *prepare_chain_for_fade_from_texture(GLuint tex, unsigned width, unsigned height, std::shared_ptr<Frame> secondary_frame, float fade_alpha);
+	movit::EffectChain *prepare_chain_for_fade_from_texture(GLuint tex, movit::RGBTriplet neutral_color, unsigned width, unsigned height, std::shared_ptr<Frame> secondary_frame, float fade_alpha);
 
 private:
 	movit::YCbCrFormat ycbcr_format;
@@ -45,6 +48,7 @@ private:
 	struct FadeChain {
 		std::unique_ptr<movit::EffectChain> chain;
 		movit::YCbCrInput *input[2];
+		movit::WhiteBalanceEffect *wb_effect[2];
 		movit::MixEffect *mix_effect;
 	};
 	FadeChain fade_chains[2][2];
diff --git a/meson.build b/meson.build
index b23f6d9..d705c7f 100644
--- a/meson.build
+++ b/meson.build
@@ -290,6 +290,7 @@ futatabi_srcs += ['futatabi/main.cpp', 'futatabi/player.cpp', 'futatabi/video_st
 futatabi_srcs += ['futatabi/vaapi_jpeg_decoder.cpp', 'futatabi/db.cpp', 'futatabi/ycbcr_converter.cpp', 'futatabi/flags.cpp']
 futatabi_srcs += ['futatabi/mainwindow.cpp', 'futatabi/jpeg_frame_view.cpp', 'futatabi/clip_list.cpp', 'futatabi/frame_on_disk.cpp']
 futatabi_srcs += ['futatabi/export.cpp', 'futatabi/midi_mapper.cpp', 'futatabi/midi_mapping_dialog.cpp']
+futatabi_srcs += ['futatabi/exif_parser.cpp']
 futatabi_srcs += moc_files
 futatabi_srcs += proto_generated
 
@@ -307,7 +308,7 @@ futatabi_shader_srcs = bin2h_gen.process(shaders)
 futatabi_srcs += futatabi_shader_srcs
 
 executable('futatabi', futatabi_srcs,
-	dependencies: [shareddep, qt5deps, libjpegdep, movitdep, libmicrohttpddep, protobufdep, sqlite3dep, vax11dep, vadrmdep, x11dep, threaddep, libavformatdep, libavcodecdep, libavutildep, libswscaledep],
+	dependencies: [shareddep, qt5deps, libjpegdep, movitdep, libmicrohttpddep, protobufdep, sqlite3dep, vax11dep, vadrmdep, x11dep, threaddep, libavformatdep, libavcodecdep, libavutildep, libswscaledep, eigendep],
 	link_with: shared,
 	include_directories: [include_directories('futatabi')],
 	install: true)
-- 
2.39.2