From 919101c59390dbbe380af7cc77102819e515a632 Mon Sep 17 00:00:00 2001
From: "Steinar H. Gunderson" <sgunderson@bigfoot.com>
Date: Sun, 2 Jul 2017 10:10:11 +0200
Subject: [PATCH] =?utf8?q?Inline=20combine=5Ftwo=5Fsamples=20(and=20remove?=
 =?utf8?q?=20an=20obsolete=20assert).=20Helps=2013=E2=80=9314%=20on=20Resa?=
 =?utf8?q?mpleEffect::calculate=5Ftexture.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=utf8
Content-Transfer-Encoding: 8bit

---
 util.cpp | 53 -----------------------------------------------------
 util.h   | 42 +++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 41 insertions(+), 54 deletions(-)
diff --git a/util.cpp b/util.cpp
index 54c815c..4016644 100644
--- a/util.cpp
+++ b/util.cpp
@@ -219,59 +219,6 @@ string output_glsl_vec3(const string &name, float x, float y, float z)
 	return ss.str();
 }
 
-template<class DestFloat>
-void combine_two_samples(float w1, float w2, float pos1, float pos1_pos2_diff, float inv_pos1_pos2_diff, float num_subtexels, float inv_num_subtexels,
-                         DestFloat *offset, DestFloat *total_weight, float *sum_sq_error)
-{
-	assert(movit_initialized);
-	assert(w1 * w2 >= 0.0f);  // Should not have differing signs.
-	float z;  // Normalized 0..1 between pos1 and pos2.
-	if (fabs(w1 + w2) < 1e-6) {
-		z = 0.5f;
-	} else {
-		z = w2 / (w1 + w2);
-	}
-
-	// Round to the desired precision. Note that this might take z outside the 0..1 range.
-	*offset = from_fp32<DestFloat>(pos1 + z * pos1_pos2_diff);
-	z = (to_fp32(*offset) - pos1) * inv_pos1_pos2_diff;
-
-	// Round to the minimum number of bits we have measured earlier.
-	// The card will do this for us anyway, but if we know what the real z
-	// is, we can pick a better total_weight below.
-	z = lrintf(z * num_subtexels) * inv_num_subtexels;
-	
-	// Choose total weight w so that we minimize total squared error
-	// for the effective weights:
-	//
-	//   e = (w(1-z) - a)Â² + (wz - b)Â²
-	//
-	// Differentiating by w and setting equal to zero:
-	//
-	//   2(w(1-z) - a)(1-z) + 2(wz - b)z = 0
-	//   w(1-z)Â² - a(1-z) + wzÂ² - bz = 0
-	//   w((1-z)Â² + zÂ²) = a(1-z) + bz
-	//   w = (a(1-z) + bz) / ((1-z)Â² + zÂ²)
-	//
-	// If z had infinite precision, this would simply reduce to w = w1 + w2.
-	*total_weight = from_fp32<DestFloat>((w1 + z * (w2 - w1)) / (z * z + (1 - z) * (1 - z)));
-
-	if (sum_sq_error != NULL) {
-		float err1 = to_fp32(*total_weight) * (1 - z) - w1;
-		float err2 = to_fp32(*total_weight) * z - w2;
-		*sum_sq_error = err1 * err1 + err2 * err2;
-	}
-}
-
-// Explicit instantiations.
-template
-void combine_two_samples<float>(float w1, float w2, float pos1, float pos1_pos2_diff, float inv_pos1_pos2_diff, float num_subtexels, float inv_num_subtexels,
-                                float *offset, float *total_weight, float *sum_sq_error);
-
-template
-void combine_two_samples<fp16_int_t>(float w1, float w2, float pos1, float pos1_pos2_diff, float inv_pos1_pos2_diff, float num_subtexels, float inv_num_subtexels,
-                                     fp16_int_t *offset, fp16_int_t *total_weight, float *sum_sq_error);
-
 GLuint generate_vbo(GLint size, GLenum type, GLsizeiptr data_size, const GLvoid *data)
 {
 	GLuint vbo;
diff --git a/util.h b/util.h
index 577140b..feefea5 100644
--- a/util.h
+++ b/util.h
@@ -9,6 +9,7 @@
 #include <Eigen/Core>
 #include <string>
 #include "defs.h"
+#include "fp16.h"
 
 #define BUFFER_OFFSET(i) ((char *)NULL + (i))
 
@@ -74,7 +75,46 @@ enum CombineRoundingBehavior {
 // and sum_sq_error.
 template<class DestFloat>
 void combine_two_samples(float w1, float w2, float pos1, float pos1_pos2_diff, float inv_pos1_pos2_diff, float num_subtexels, float inv_num_subtexels,
-                         DestFloat *offset, DestFloat *total_weight, float *sum_sq_error);
+                         DestFloat *offset, DestFloat *total_weight, float *sum_sq_error)
+{
+	assert(w1 * w2 >= 0.0f);  // Should not have differing signs.
+	float z;  // Normalized 0..1 between pos1 and pos2.
+	if (fabs(w1 + w2) < 1e-6) {
+		z = 0.5f;
+	} else {
+		z = w2 / (w1 + w2);
+	}
+
+	// Round to the desired precision. Note that this might take z outside the 0..1 range.
+	*offset = from_fp32<DestFloat>(pos1 + z * pos1_pos2_diff);
+	z = (to_fp32(*offset) - pos1) * inv_pos1_pos2_diff;
+
+	// Round to the minimum number of bits we have measured earlier.
+	// The card will do this for us anyway, but if we know what the real z
+	// is, we can pick a better total_weight below.
+	z = lrintf(z * num_subtexels) * inv_num_subtexels;
+
+	// Choose total weight w so that we minimize total squared error
+	// for the effective weights:
+	//
+	//   e = (w(1-z) - a)Â² + (wz - b)Â²
+	//
+	// Differentiating by w and setting equal to zero:
+	//
+	//   2(w(1-z) - a)(1-z) + 2(wz - b)z = 0
+	//   w(1-z)Â² - a(1-z) + wzÂ² - bz = 0
+	//   w((1-z)Â² + zÂ²) = a(1-z) + bz
+	//   w = (a(1-z) + bz) / ((1-z)Â² + zÂ²)
+	//
+	// If z had infinite precision, this would simply reduce to w = w1 + w2.
+	*total_weight = from_fp32<DestFloat>((w1 + z * (w2 - w1)) / (z * z + (1 - z) * (1 - z)));
+
+	if (sum_sq_error != NULL) {
+		float err1 = to_fp32(*total_weight) * (1 - z) - w1;
+		float err2 = to_fp32(*total_weight) * z - w2;
+		*sum_sq_error = err1 * err1 + err2 * err2;
+	}
+}
 
 // Create a VBO with the given data. Returns the VBO number.
 GLuint generate_vbo(GLint size, GLenum type, GLsizeiptr data_size, const GLvoid *data);
-- 
2.39.2