From 0a3c9320e21fc211f0c61a4bda1c6932920c6883 Mon Sep 17 00:00:00 2001
From: "Steinar H. Gunderson" <sgunderson@bigfoot.com>
Date: Wed, 3 Oct 2012 16:30:34 +0200
Subject: [PATCH] Flesh out the blur code a little. It actually blurs now,
 although it is not as pretty as I would like it.

---
 blur_effect.cpp  | 157 +++++++++++++++++++++++++++++++++++++++--------
 blur_effect.frag |  28 +++------
 2 files changed, 139 insertions(+), 46 deletions(-)

diff --git a/blur_effect.cpp b/blur_effect.cpp
index 17044f1..fd81aa8 100644
--- a/blur_effect.cpp
+++ b/blur_effect.cpp
@@ -8,6 +8,9 @@
 #include "blur_effect.h"
 #include "util.h"
 
+// Must match blur_effect.frag.
+#define NUM_TAPS 16
+
 BlurEffect::BlurEffect()
 	: radius(3.0f),
 	  direction(HORIZONTAL)
@@ -25,16 +28,27 @@ void BlurEffect::set_uniforms(GLuint glsl_program_num, const std::string &prefix
 {
 	Effect::set_uniforms(glsl_program_num, prefix, sampler_num);
 
-	// We only have 15 taps to work with, and we want that to reach out to about 2.5*sigma.
-	// Bump up the mipmap levels (giving us box blurs) until we have what we need.
+	int base_texture_size, texture_size;
+	if (direction == HORIZONTAL) {
+		base_texture_size = texture_size = 1280;  // FIXME
+	} else if (direction == VERTICAL) {
+		base_texture_size = texture_size = 720;  // FIXME
+	} else {
+		assert(false);
+	}
+
+	// We only have 16 taps to work with on each side, and we want that to
+	// reach out to about 2.5*sigma.  Bump up the mipmap levels (giving us
+	// box blurs) until we have what we need.
+	//
+	// FIXME: we really need to pick the same mipmap level for both horizontal and vertical!
 	unsigned base_mipmap_level = 0;
 	float adjusted_radius = radius;
-	float pixel_size = 1.0f;
-	while (adjusted_radius * 2.5f > 7.0f) {
+	while (texture_size > 1 && adjusted_radius * 2.5f > NUM_TAPS / 2) {
 		++base_mipmap_level;
-		adjusted_radius *= 0.5f;
-		pixel_size *= 2.0f;
-	}	
+		texture_size /= 2;  // Rounding down.
+		adjusted_radius = radius * float(texture_size) / float(base_texture_size);
+	}
 
 	// In the second pass, we do the same, but don't sample from a mipmap;
 	// that would re-blur the other direction in an ugly fashion, and we already
@@ -47,32 +61,121 @@ void BlurEffect::set_uniforms(GLuint glsl_program_num, const std::string &prefix
 	}
 
 	glActiveTexture(GL_TEXTURE0);
+	check_error();
 	glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, base_mipmap_level);
 	check_error();
+	glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, base_mipmap_level);
+	check_error();
 
-	// FIXME
-	if (direction == HORIZONTAL) {
-		float ps[] = { pixel_size / 1280.0f, 0.0f };
-		set_uniform_vec2(glsl_program_num, prefix, "pixel_offset", ps);
-	} else if (direction == VERTICAL) {
-		float ps[] = { 0.0f, pixel_size / 720.0f };
-		set_uniform_vec2(glsl_program_num, prefix, "pixel_offset", ps);
+	// Compute the weights; they will be symmetrical, so we only compute
+	// the right side.
+	float weight[NUM_TAPS + 1];
+	if (radius < 1e-3) {
+		weight[0] = 1.0f;
+		for (unsigned i = 1; i < NUM_TAPS + 1; ++i) {
+			weight[i] = 0.0f;
+		}
 	} else {
-		assert(false);
+		float sum = 0.0f;
+		for (unsigned i = 0; i < NUM_TAPS + 1; ++i) {
+			float z = i / adjusted_radius;
+
+			// Gaussian blur is a common, but maybe not the prettiest choice;
+			// it can feel a bit too blurry in the fine detail and too little
+			// long-tail. This is a simple logistic distribution, which has
+			// a narrower peak but longer tails.
+			weight[i] = 1.0f / (cosh(z) * cosh(z));
+
+			if (i == 0) {
+				sum += weight[i];
+			} else {
+				sum += 2.0f * weight[i];
+			}
+		}
+		for (unsigned i = 0; i < NUM_TAPS + 1; ++i) {
+			weight[i] /= sum;
+		}
 	}
 
-	// Simple Gaussian weights for now.
-	float weight[15], total = 0.0f;
-	for (unsigned i = 0; i < 15; ++i) {
-		float z = (i - 7.0f) / adjusted_radius;
-		weight[i] = exp(-(z*z));
-		total += weight[i];
+#if 0
+	// NOTE: This is currently broken.
+
+	// Since the GPU gives us bilinear sampling for free, we can get two
+	// samples for the price of one (for every but the center sample,
+	// in which case this trick doesn't buy us anything). Simply sample
+	// between the two pixel centers, and we can do with fewer weights.
+	// (This is right even in the vertical pass where we don't actually
+	// sample between the pixels, because we have linear interpolation
+	// there too.)
+	//
+	// We pack the parameters into a float4: The relative sample coordinates
+	// in (x,y), and the weight in z. w is unused.
+	float samples[4 * (NUM_TAPS / 2 + 1)];
+
+	// Center sample.
+	samples[4 * 0 + 0] = 0.0f;
+	samples[4 * 0 + 1] = 0.0f;
+	samples[4 * 0 + 2] = weight[0];
+	samples[4 * 0 + 3] = 0.0f;
+
+	// All other samples.
+	for (unsigned i = 1; i < NUM_TAPS / 2 + 1; ++i) {
+		unsigned base_pos = i * 2 - 1;
+		float w1 = weight[base_pos];
+		float w2 = weight[base_pos + 1];
+
+		float offset, total_weight;
+		if (w1 + w2 < 1e-6) {
+			offset = 0.5f;
+			total_weight = 0.0f;
+		} else {
+			offset = w2 / (w1 + w2);
+			total_weight = w1 + w2;
+		}
+#if 0
+		// hack for easier visualization
+		offset = 0.5f;
+		total_weight = 8.0f;
+#endif
+		float x = 0.0f, y = 0.0f;
+
+		if (direction == HORIZONTAL) {
+			x = (base_pos + offset) / (float)texture_size;
+		} else if (direction == VERTICAL) {
+			y = (base_pos + offset) / (float)texture_size;
+		} else {
+			assert(false);
+		}
+
+		samples[4 * i + 0] = x;
+		samples[4 * i + 1] = y;
+		samples[4 * i + 2] = total_weight;
+		samples[4 * i + 3] = 0.0f;
 	}
-	printf("[mip level %d] ", base_mipmap_level);
-	for (unsigned i = 0; i < 15; ++i) {
-		weight[i] /= total;
-		printf("%f ", weight[i]);
+
+	set_uniform_vec4_array(glsl_program_num, prefix, "samples", samples, NUM_TAPS / 2 + 1);
+#else
+	// Boring, at-whole-pixels sampling.
+	float samples[4 * NUM_TAPS];
+
+	// All other samples.
+	for (unsigned i = 0; i < NUM_TAPS + 1; ++i) {
+		float x = 0.0f, y = 0.0f;
+
+		if (direction == HORIZONTAL) {
+			x = i / (float)texture_size;
+		} else if (direction == VERTICAL) {
+			y = i / (float)texture_size;
+		} else {
+			assert(false);
+		}
+
+		samples[4 * i + 0] = x;
+		samples[4 * i + 1] = y;
+		samples[4 * i + 2] = weight[i];
+		samples[4 * i + 3] = 0.0f;
 	}
-	printf("\n");
-	set_uniform_float_array(glsl_program_num, prefix, "weight", weight, 15);
+
+	set_uniform_vec4_array(glsl_program_num, prefix, "samples", samples, NUM_TAPS + 1);
+#endif
 }
diff --git a/blur_effect.frag b/blur_effect.frag
index 8c0a01d..7e5a424 100644
--- a/blur_effect.frag
+++ b/blur_effect.frag
@@ -1,24 +1,14 @@
 // A simple unidirectional blur.
 
-uniform vec2 PREFIX(pixel_offset);
-uniform float PREFIX(weight)[15];
+#define NUM_TAPS 16
+
+uniform vec4 PREFIX(samples)[NUM_TAPS + 1];
 
 vec4 FUNCNAME(vec2 tc) {
-	vec4 x = LAST_INPUT(tc);
-	return
-		vec4(PREFIX(weight)[ 0]) * LAST_INPUT(tc - 7.0 * PREFIX(pixel_offset)) +
-		vec4(PREFIX(weight)[ 1]) * LAST_INPUT(tc - 6.0 * PREFIX(pixel_offset)) +
-		vec4(PREFIX(weight)[ 2]) * LAST_INPUT(tc - 5.0 * PREFIX(pixel_offset)) +
-		vec4(PREFIX(weight)[ 3]) * LAST_INPUT(tc - 4.0 * PREFIX(pixel_offset)) +
-		vec4(PREFIX(weight)[ 4]) * LAST_INPUT(tc - 3.0 * PREFIX(pixel_offset)) +
-		vec4(PREFIX(weight)[ 5]) * LAST_INPUT(tc - 2.0 * PREFIX(pixel_offset)) +
-		vec4(PREFIX(weight)[ 6]) * LAST_INPUT(tc -       PREFIX(pixel_offset)) +
-		vec4(PREFIX(weight)[ 7]) * LAST_INPUT(tc) +
-		vec4(PREFIX(weight)[ 8]) * LAST_INPUT(tc +       PREFIX(pixel_offset)) +
-		vec4(PREFIX(weight)[ 9]) * LAST_INPUT(tc + 2.0 * PREFIX(pixel_offset)) +
-		vec4(PREFIX(weight)[10]) * LAST_INPUT(tc + 3.0 * PREFIX(pixel_offset)) +
-		vec4(PREFIX(weight)[11]) * LAST_INPUT(tc + 4.0 * PREFIX(pixel_offset)) +
-		vec4(PREFIX(weight)[12]) * LAST_INPUT(tc + 5.0 * PREFIX(pixel_offset)) +
-		vec4(PREFIX(weight)[13]) * LAST_INPUT(tc + 6.0 * PREFIX(pixel_offset)) +
-		vec4(PREFIX(weight)[14]) * LAST_INPUT(tc + 7.0 * PREFIX(pixel_offset));
+	vec4 sum = vec4(PREFIX(samples)[0].z) * LAST_INPUT(tc);
+	for (int i = 1; i < NUM_TAPS + 1; ++i) {
+		vec4 sample = PREFIX(samples)[i];
+		sum += vec4(sample.z) * (LAST_INPUT(tc - sample.xy) + LAST_INPUT(tc + sample.xy));
+	}
+	return sum;
 }
-- 
2.39.2