From b0f2d8f7604bfb4c8a9824a3a022d32ef26e12cc Mon Sep 17 00:00:00 2001
From: "Steinar H. Gunderson" <sgunderson@bigfoot.com>
Date: Fri, 5 Oct 2012 12:03:49 +0200
Subject: [PATCH] Actually implement multiple inputs to phases. Surprising
 amounts of stuff needed...

---
 effect_chain.cpp | 289 +++++++++++++++++++++++++++++++++++------------
 effect_chain.h   |  24 ++--
 header.frag      |   8 --
 3 files changed, 234 insertions(+), 87 deletions(-)
diff --git a/effect_chain.cpp b/effect_chain.cpp
index fc20ae1..1244ecb 100644
--- a/effect_chain.cpp
+++ b/effect_chain.cpp
@@ -7,6 +7,9 @@
 #include <GL/gl.h>
 #include <GL/glext.h>
 
+#include <algorithm>
+#include <set>
+
 #include "util.h"
 #include "effect_chain.h"
 #include "gamma_expansion_effect.h"
@@ -31,6 +34,7 @@ void EffectChain::add_input(const ImageFormat &format)
 	input_format = format;
 	output_color_space.insert(std::make_pair(static_cast<Effect *>(NULL), format.color_space));
 	output_gamma_curve.insert(std::make_pair(static_cast<Effect *>(NULL), format.gamma_curve));
+	effect_ids.insert(std::make_pair(static_cast<Effect *>(NULL), "src_image"));
 }
 
 void EffectChain::add_output(const ImageFormat &format)
@@ -40,12 +44,19 @@ void EffectChain::add_output(const ImageFormat &format)
 
 void EffectChain::add_effect_raw(Effect *effect, const std::vector<Effect *> &inputs)
 {
+	char effect_id[256];
+	sprintf(effect_id, "eff%u", (unsigned)effects.size());
+
 	effects.push_back(effect);
+	effect_ids.insert(std::make_pair(effect, effect_id));
 	assert(inputs.size() == effect->num_inputs());
 	for (unsigned i = 0; i < inputs.size(); ++i) {
-		outgoing_links.insert(std::make_pair(inputs[i], effect));
-		incoming_links.insert(std::make_pair(effect, inputs[i]));
+		if (inputs[i] != NULL) {
+			assert(std::find(effects.begin(), effects.end(), inputs[i]) != effects.end());
+		}
+		outgoing_links[inputs[i]].push_back(effect);
 	}
+	incoming_links.insert(std::make_pair(effect, inputs));
 	last_added_effect = effect;
 }
 
@@ -169,26 +180,82 @@ std::string replace_prefix(const std::string &text, const std::string &prefix)
 	return output;
 }
 
-EffectChain::Phase EffectChain::compile_glsl_program(unsigned start_index, unsigned end_index)
+EffectChain::Phase EffectChain::compile_glsl_program(const std::vector<Effect *> &inputs, const std::vector<Effect *> &effects)
 {
+	assert(!inputs.empty());
+	assert(!effects.empty());
+
+	// Figure out the true set of inputs to this phase. These are the ones
+	// that we need somehow but don't calculate ourselves.
+	std::set<Effect *> effect_set(effects.begin(), effects.end());
+	std::set<Effect *> input_set(inputs.begin(), inputs.end());
+	std::vector<Effect *> true_inputs;
+	std::set_difference(input_set.begin(), input_set.end(),
+		effect_set.begin(), effect_set.end(),
+		std::back_inserter(true_inputs));
+
 	bool input_needs_mipmaps = false;
 	std::string frag_shader = read_file("header.frag");
-	for (unsigned i = start_index; i < end_index; ++i) {
-		char effect_id[256];
-		sprintf(effect_id, "eff%d", i);
+
+	// Create functions for all the texture inputs that we need.
+	for (unsigned i = 0; i < true_inputs.size(); ++i) {
+		Effect *effect = true_inputs[i];
+		assert(effect_ids.count(effect) != 0);
+		std::string effect_id = effect_ids[effect];
+	
+		frag_shader += std::string("uniform sampler2D tex_") + effect_id + ";\n";	
+		frag_shader += std::string("vec4 ") + effect_id + "(vec2 tc) {\n";
+		if (effect == NULL) {
+			// OpenGL's origin is bottom-left, but most graphics software assumes
+			// a top-left origin. Thus, for inputs that come from the user,
+			// we flip the y coordinate. However, for FBOs, the origin
+			// is all correct, so don't do anything.
+			frag_shader += "\ttc.y = 1.0f - tc.y;\n";
+		}
+		frag_shader += "\treturn texture2D(tex_" + effect_id + ", tc);\n";
+		frag_shader += "}\n";
+		frag_shader += "\n";
+	}
+
+	std::string last_effect_id;
+	for (unsigned i = 0; i < effects.size(); ++i) {
+		Effect *effect = effects[i];
+		assert(effect != NULL);
+		assert(effect_ids.count(effect) != 0);
+		std::string effect_id = effect_ids[effect];
+		last_effect_id = effect_id;
+
+		if (incoming_links[effect].size() == 1) {
+			frag_shader += std::string("#define INPUT ") + effect_ids[incoming_links[effect][0]] + "\n";
+		} else {
+			for (unsigned j = 0; j < incoming_links[effect].size(); ++j) {
+				char buf[256];
+				sprintf(buf, "#define INPUT%d %s\n", j + 1, effect_ids[incoming_links[effect][j]].c_str());
+				frag_shader += buf;
+			}
+		}
 	
 		frag_shader += "\n";
 		frag_shader += std::string("#define FUNCNAME ") + effect_id + "\n";
-		frag_shader += replace_prefix(effects[i]->output_convenience_uniforms(), effect_id);
-		frag_shader += replace_prefix(effects[i]->output_fragment_shader(), effect_id);
+		frag_shader += replace_prefix(effect->output_convenience_uniforms(), effect_id);
+		frag_shader += replace_prefix(effect->output_fragment_shader(), effect_id);
 		frag_shader += "#undef PREFIX\n";
 		frag_shader += "#undef FUNCNAME\n";
-		frag_shader += "#undef INPUT\n";
-		frag_shader += std::string("#define INPUT ") + effect_id + "\n";
+		if (incoming_links[effect].size() == 1) {
+			frag_shader += "#undef INPUT\n";
+		} else {
+			for (unsigned j = 0; j < incoming_links[effect].size(); ++j) {
+				char buf[256];
+				sprintf(buf, "#undef INPUT%d\n", j + 1);
+				frag_shader += buf;
+			}
+		}
 		frag_shader += "\n";
 
-		input_needs_mipmaps |= effects[i]->needs_mipmaps();
+		input_needs_mipmaps |= effect->needs_mipmaps();
 	}
+	assert(!last_effect_id.empty());
+	frag_shader += std::string("#define INPUT ") + last_effect_id + "\n";
 	frag_shader.append(read_file("footer.frag"));
 	printf("%s\n", frag_shader.c_str());
 	
@@ -205,12 +272,92 @@ EffectChain::Phase EffectChain::compile_glsl_program(unsigned start_index, unsig
 	Phase phase;
 	phase.glsl_program_num = glsl_program_num;
 	phase.input_needs_mipmaps = input_needs_mipmaps;
-	phase.start = start_index;
-	phase.end = end_index;
+	phase.inputs = true_inputs;
+	phase.effects = effects;
 
 	return phase;
 }
 
+// Construct GLSL programs, starting at the given effect and following
+// the chain from there. We end a program every time we come to an effect
+// marked as "needs texture bounce", one that is used by multiple other
+// effects, and of course at the end.
+void EffectChain::construct_glsl_programs(Effect *start, std::set<Effect *> *completed_effects)
+{
+	if (completed_effects->count(start) != 0) {
+		// This has already been done for us.
+		return;
+	}
+
+	std::vector<Effect *> this_phase_inputs;  // Also includes all intermediates; these will be filtered away later.
+	std::vector<Effect *> this_phase_effects;
+	Effect *node = start;
+	for ( ;; ) {  // Termination condition within loop.
+		if (node == NULL) {
+			this_phase_inputs.push_back(node);
+		} else {
+			// Check that we have all the inputs we need for this effect.
+			// If not, we end the phase here right away; the other side
+			// of the input chain will eventually come and pick the effect up.
+			assert(incoming_links.count(node) != 0);
+			std::vector<Effect *> deps = incoming_links[node];
+			assert(!deps.empty());
+			bool have_all_deps = true;
+			for (unsigned i = 0; i < deps.size(); ++i) {
+				if (completed_effects->count(deps[i]) == 0) {
+					have_all_deps = false;
+					break;
+				}
+			}
+		
+			if (!have_all_deps) {
+				if (!this_phase_effects.empty()) {
+					phases.push_back(compile_glsl_program(this_phase_inputs, this_phase_effects));
+				}
+				return;
+			}
+			this_phase_inputs.insert(this_phase_inputs.end(), deps.begin(), deps.end());	
+			this_phase_effects.push_back(node);
+		}
+		completed_effects->insert(node);	
+
+		// Find all the effects that use this one as a direct input.
+		if (outgoing_links.count(node) == 0) {
+			// End of the line; output.
+			phases.push_back(compile_glsl_program(this_phase_inputs, this_phase_effects));
+			return;
+		}
+
+		std::vector<Effect *> next = outgoing_links[node];
+		assert(!next.empty());
+		if (next.size() > 1) {
+			// More than one effect uses this as the input.
+			// The easiest thing to do (and probably also the safest
+			// performance-wise in most cases) is to bounce it to a texture
+			// and then let the next passes read from that.
+			if (node != NULL) {
+				phases.push_back(compile_glsl_program(this_phase_inputs, this_phase_effects));
+			}
+
+			// Start phases for all the effects that need us (in arbitrary order).
+			for (unsigned i = 0; i < next.size(); ++i) {
+				construct_glsl_programs(next[i], completed_effects);
+			}
+			return;
+		}
+	
+		// OK, only one effect uses this as the input. Keep iterating,
+		// but first see if it requires a texture bounce; if so, give it
+		// one by starting a new phase.
+		node = next[0];
+		if (node->needs_texture_bounce()) {
+			phases.push_back(compile_glsl_program(this_phase_inputs, this_phase_effects));
+			this_phase_inputs.clear();
+			this_phase_effects.clear();
+		}
+	}
+}
+
 void EffectChain::finalize()
 {
 	// Add normalizers to get the output format right.
@@ -234,29 +381,24 @@ void EffectChain::finalize()
 		current_gamma_curve = output_format.gamma_curve;
 	}
 
-	// Construct the GLSL programs. We end a program every time we come
-	// to an effect marked as "needs many samples" (ie. "please let me
-	// sample directly from a texture, with no arithmetic in-between"),
-	// and of course at the end.
-	unsigned start = 0;
-	for (unsigned i = 0; i < effects.size(); ++i) {
-		if (effects[i]->needs_texture_bounce() && i != start) {
-			phases.push_back(compile_glsl_program(start, i));
-			start = i;
-		}
-	}
-	phases.push_back(compile_glsl_program(start, effects.size()));
+	// Construct all needed GLSL programs, starting at the input.
+	std::set<Effect *> completed_effects;
+	construct_glsl_programs(NULL, &completed_effects);
 
 	// If we have more than one phase, we need intermediate render-to-texture.
 	// Construct an FBO, and then as many textures as we need.
+	// We choose the simplest option of having one texture per output,
+	// since otherwise this turns into an (albeit simple)
+	// register allocation problem.
 	if (phases.size() > 1) {
 		glGenFramebuffers(1, &fbo);
 
-		unsigned num_textures = std::max<int>(phases.size() - 1, 2);
-		glGenTextures(num_textures, temp_textures);
-
-		for (unsigned i = 0; i < num_textures; ++i) {
-			glBindTexture(GL_TEXTURE_2D, temp_textures[i]);
+		for (unsigned i = 0; i < phases.size() - 1; ++i) {
+			Effect *output_effect = phases[i].effects.back();
+			GLuint temp_texture;
+			glGenTextures(1, &temp_texture);
+			check_error();
+			glBindTexture(GL_TEXTURE_2D, temp_texture);
 			check_error();
 			glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
 			check_error();
@@ -264,6 +406,7 @@ void EffectChain::finalize()
 			check_error();
 			glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA16F_ARB, width, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL);
 			check_error();
+			effect_output_textures.insert(std::make_pair(output_effect, temp_texture));
 		}
 	}
 	
@@ -359,78 +502,82 @@ void EffectChain::render_to_screen(unsigned char *src)
 		check_error();
 	}
 
+	std::set<Effect *> generated_mipmaps;
+	generated_mipmaps.insert(NULL);  // Already done further up.
+
 	for (unsigned phase = 0; phase < phases.size(); ++phase) {
-		// Set up inputs and outputs for this phase.
-		glActiveTexture(GL_TEXTURE0);
-		if (phase == 0) {
-			// First phase reads from the input texture (which is already bound).
-		} else {
-			glBindTexture(GL_TEXTURE_2D, temp_textures[(phase + 1) % 2]);
-			check_error();
-		}
-		if (phases[phase].input_needs_mipmaps) {
-			if (phase != 0) {
-				// For phase 0, it's done further up.
-				glGenerateMipmap(GL_TEXTURE_2D);
+		glUseProgram(phases[phase].glsl_program_num);
+		check_error();
+
+		// Set up inputs for this phase.
+		assert(!phases[phase].inputs.empty());
+		for (unsigned sampler = 0; sampler < phases[phase].inputs.size(); ++sampler) {
+			glActiveTexture(GL_TEXTURE0 + sampler);
+			Effect *input = phases[phase].inputs[sampler];
+			if (input == NULL) {
+				glBindTexture(GL_TEXTURE_2D, source_image_num);
+				check_error();
+			} else {
+				assert(effect_output_textures.count(input) != 0);
+				glBindTexture(GL_TEXTURE_2D, effect_output_textures[input]);
 				check_error();
 			}
-			glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR_MIPMAP_NEAREST);
-			check_error();
-		} else {
-			glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+			if (phases[phase].input_needs_mipmaps) {
+				if (generated_mipmaps.count(input) == 0) {
+					glGenerateMipmap(GL_TEXTURE_2D);
+					check_error();
+					generated_mipmaps.insert(input);
+				}
+				glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR_MIPMAP_NEAREST);
+				check_error();
+			} else {
+				glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+				check_error();
+			}
+
+			assert(effect_ids.count(input));
+			std::string texture_name = std::string("tex_") + effect_ids[input];
+			glUniform1i(glGetUniformLocation(phases[phase].glsl_program_num, texture_name.c_str()), sampler);
 			check_error();
 		}
 
+		// And now the output.
 		if (phase == phases.size() - 1) {
 			// Last phase goes directly to the screen.
 			glBindFramebuffer(GL_FRAMEBUFFER, 0);
 			check_error();
 		} else {
+			Effect *last_effect = phases[phase].effects.back();
+			assert(effect_output_textures.count(last_effect) != 0);
 			glFramebufferTexture2D(
 				GL_FRAMEBUFFER,
 			        GL_COLOR_ATTACHMENT0,
 				GL_TEXTURE_2D,
-				temp_textures[phase % 2],
+				effect_output_textures[last_effect],
 				0);
 			check_error();
 		}
 
-		// We have baked an upside-down transform into the quad coordinates,
-		// since the typical graphics program will have the origin at the upper-left,
-		// while OpenGL uses lower-left. In the next ones, however, the origin
-		// is all right, and we need to reverse that.
-		if (phase == 1) {
-			glTranslatef(0.0f, 1.0f, 0.0f);
-			glScalef(1.0f, -1.0f, 1.0f);
-		}
-
 		// Give the required parameters to all the effects.
-		glUseProgram(phases[phase].glsl_program_num);
-		check_error();
-
-		glUniform1i(glGetUniformLocation(phases[phase].glsl_program_num, "input_tex"), 0);
-		check_error();
-
-		unsigned sampler_num = 1;
-		for (unsigned i = phases[phase].start; i < phases[phase].end; ++i) {
-			char effect_id[256];
-			sprintf(effect_id, "eff%d", i);
-			effects[i]->set_uniforms(phases[phase].glsl_program_num, effect_id, &sampler_num);
+		unsigned sampler_num = phases[phase].inputs.size();
+		for (unsigned i = 0; i < phases[phase].effects.size(); ++i) {
+			Effect *effect = phases[phase].effects[i];
+			effect->set_uniforms(phases[phase].glsl_program_num, effect_ids[effect], &sampler_num);
 		}
 
 		// Now draw!
 		glBegin(GL_QUADS);
 
-		glTexCoord2f(0.0f, 1.0f);
+		glTexCoord2f(0.0f, 0.0f);
 		glVertex2f(0.0f, 0.0f);
 
-		glTexCoord2f(1.0f, 1.0f);
+		glTexCoord2f(1.0f, 0.0f);
 		glVertex2f(1.0f, 0.0f);
 
-		glTexCoord2f(1.0f, 0.0f);
+		glTexCoord2f(1.0f, 1.0f);
 		glVertex2f(1.0f, 1.0f);
 
-		glTexCoord2f(0.0f, 0.0f);
+		glTexCoord2f(0.0f, 1.0f);
 		glVertex2f(0.0f, 1.0f);
 
 		glEnd();
diff --git a/effect_chain.h b/effect_chain.h
index f521e9c..dc52d3e 100644
--- a/effect_chain.h
+++ b/effect_chain.h
@@ -1,6 +1,7 @@
 #ifndef _EFFECT_CHAIN_H
 #define _EFFECT_CHAIN_H 1
 
+#include <set>
 #include <vector>
 
 #include "effect.h"
@@ -78,28 +79,35 @@ private:
 	struct Phase {
 		GLint glsl_program_num;
 		bool input_needs_mipmaps;
-		unsigned start, end;
+		std::vector<Effect *> inputs;
+		std::vector<Effect *> effects;  // In order.
 	};
 
 	Effect *normalize_to_linear_gamma(Effect *input);
 	Effect *normalize_to_srgb(Effect *input);
 
-	// Create a GLSL program computing effects [start, end>.
-	Phase compile_glsl_program(unsigned start_index, unsigned end_index);
+	void draw_vertex(float x, float y, const std::vector<Effect *> &inputs);
+
+	// Create a GLSL program computing the given effects in order.
+	Phase compile_glsl_program(const std::vector<Effect *> &inputs, const std::vector<Effect *> &effects);
+
+	// Create all GLSL programs needed to compute the given effect, and all outputs
+	// that depends on it (whenever possible).
+	void construct_glsl_programs(Effect *start, std::set<Effect *> *completed_effects);
 
 	unsigned width, height;
 	ImageFormat input_format, output_format;
-	std::vector<Effect *> effects;
-	std::multimap<Effect *, Effect *> outgoing_links;
-	std::multimap<Effect *, Effect *> incoming_links;
+	std::vector<Effect *> effects, unexpanded_effects;
+	std::map<Effect *, std::string> effect_ids;
+	std::map<Effect *, GLuint> effect_output_textures;
+	std::map<Effect *, std::vector<Effect *> > outgoing_links;
+	std::map<Effect *, std::vector<Effect *> > incoming_links;
 	Effect *last_added_effect;
 
 	GLuint source_image_num;
 	bool use_srgb_texture_format;
 
 	GLuint fbo;
-	GLuint temp_textures[2];
-
 	std::vector<Phase> phases;
 
 	GLenum format, bytes_per_pixel;
diff --git a/header.frag b/header.frag
index 539eb8d..86c5730 100644
--- a/header.frag
+++ b/header.frag
@@ -1,9 +1 @@
-uniform sampler2D input_tex;
 varying vec2 tc;
-
-vec4 read_input(vec2 tc)
-{
-	return texture2D(input_tex, tc.st);
-}
-
-#define INPUT read_input
-- 
2.39.2