From: Steinar H. Gunderson <sgunderson@bigfoot.com>
Date: Tue, 18 Mar 2014 23:20:55 +0000 (+0100)
Subject: Merge branch 'master' into epoxy
X-Git-Tag: 1.1~12^2~28
X-Git-Url: https://git.sesse.net/?p=movit;a=commitdiff_plain;h=ee7863d9cdd683dd4df9d6463d98dc59182c54fe;hp=-c

Merge branch 'master' into epoxy

Conflicts:
	Makefile.in
	README
	movit.pc.in
---

ee7863d9cdd683dd4df9d6463d98dc59182c54fe
diff --combined Makefile.in
index ee3626b,445f5b7..77ced6c
--- a/Makefile.in
+++ b/Makefile.in
@@@ -1,5 -1,14 +1,14 @@@
  GTEST_DIR ?= /usr/src/gtest
  
+ # This will be upgraded for each release, although not necessarily for every git commit.
+ # See http://www.gnu.org/software/libtool/manual/html_node/Updating-version-info.html
+ # for the rules as of how this is changed. This does not really mean that Movit will
+ # strive towards having a rock-stable ABI, but at least the soversion will increase
+ # whenever it breaks, so that you will not have silent failures, and distribution package
+ # management can run its course.
+ movit_ltversion = 1:3:0
+ movit_version = 1.0.3
+ 
  prefix = @prefix@
  exec_prefix = @exec_prefix@
  includedir = @includedir@
@@@ -8,17 -17,15 +17,19 @@@ datarootdir = @datarootdir
  datadir = @datadir@
  top_builddir = @top_builddir@
  with_demo_app = @with_demo_app@
 +with_SDL2 = @with_SDL2@
  with_coverage = @with_coverage@
  
  CC=@CC@
  CXX=@CXX@
- CXXFLAGS=-Wall @CXXFLAGS@ -I$(GTEST_DIR)/include @SDL2_CFLAGS@ @SDL_CFLAGS@ @Eigen3_CFLAGS@ @epoxy_CFLAGS@ @FFTW3_CFLAGS@
 -CXXFLAGS=-Wall @CXXFLAGS@ -I$(GTEST_DIR)/include @Eigen3_CFLAGS@ @GLEW_CFLAGS@ @FFTW3_CFLAGS@
++CXXFLAGS=-Wall @CXXFLAGS@ -I$(GTEST_DIR)/include @Eigen3_CFLAGS@ @epoxy_CFLAGS@ @FFTW3_CFLAGS@
 +ifeq ($(with_SDL2),yes)
 +CXXFLAGS += -DHAVE_SDL2
 +endif
- LDFLAGS=@epoxy_LIBS@ @SDL2_LIBS@ @SDL_LIBS@ @FFTW3_LIBS@ -lpthread
+ LDFLAGS=@LDFLAGS@
 -LDLIBS=@GLEW_LIBS@ @FFTW3_LIBS@ -lpthread
 -TEST_LDLIBS=@GLEW_LIBS@ @SDL_LIBS@ -lpthread
 -DEMO_LDLIBS=@SDL_image_LIBS@ -lrt -lpthread @libpng_LIBS@ @FFTW3_LIBS@
++LDLIBS=@epoxy_LIBS@ @FFTW3_LIBS@ -lpthread
++TEST_LDLIBS=@epoxy_LIBS@ @SDL2_LIBS@ @SDL_LIBS@ -lpthread
 +DEMO_LDLIBS=@SDL2_image_LIBS@ @SDL_image_LIBS@ -lrt -lpthread @libpng_LIBS@ @FFTW3_LIBS@
  SHELL=@SHELL@
  LIBTOOL=@LIBTOOL@ --tag=CXX
  RANLIB=ranlib
@@@ -86,7 -93,8 +97,8 @@@ all: dem
  endif
  
  # Google Test and other test library functions.
- TEST_OBJS = gtest-all.o gtest_sdl_main.o test_util.o
+ OWN_TEST_OBJS = gtest_sdl_main.o test_util.o
+ TEST_OBJS = gtest-all.o $(OWN_TEST_OBJS)
  
  gtest-all.o: $(GTEST_DIR)/src/gtest-all.cc
  	$(CXX) -MMD $(CPPFLAGS) -I$(GTEST_DIR) $(CXXFLAGS) -c $< -o $@
@@@ -95,8 -103,9 +107,9 @@@ gtest_sdl_main.o: gtest_sdl_main.cp
  
  # Unit tests.
  $(TESTS): %: %.o $(TEST_OBJS) libmovit.la
- 	$(LIBTOOL) --mode=link $(CXX) $(LDFLAGS) -o $@ $^ $(LDLIBS)
+ 	$(LIBTOOL) --mode=link $(CXX) $(LDFLAGS) -o $@ $^ $(TEST_LDLIBS)
  
+ OWN_OBJS=$(DEMO_OBJS) $(LIB_OBJS) $(OWN_TEST_OBJS) $(TESTS:=.o)
  OBJS=$(DEMO_OBJS) $(LIB_OBJS) $(TEST_OBJS) $(TESTS:=.o)
  
  # A small demo program.
@@@ -105,7 -114,7 +118,7 @@@ demo: libmovit.la $(DEMO_OBJS
  
  # The library itself.
  libmovit.la: $(LIB_OBJS:.o=.lo)
- 	$(LIBTOOL) --mode=link $(CXX) $(LDFLAGS) -rpath $(libdir) -o $@ $^ $(LDLIBS)
+ 	$(LIBTOOL) --mode=link $(CXX) $(LDFLAGS) -rpath $(libdir) -version-info $(movit_ltversion) -o $@ $^ $(LDLIBS)
  
  %.lo: %.cpp
  	$(LIBTOOL) --mode=compile $(CXX) -MMD -MP $(CPPFLAGS) $(CXXFLAGS) -o $@ -c $<
@@@ -159,15 -168,30 +172,30 @@@ SHADERS += texture1d.fra
  
  # These purposefully do not exist.
  MISSING_SHADERS = diffusion_effect.frag glow_effect.frag unsharp_mask_effect.frag resize_effect.frag
+ MISSING_SHADERS += fft_convolution_effect.frag fft_input.frag
  SHADERS := $(filter-out $(MISSING_SHADERS),$(SHADERS))
  
  install: libmovit.la
- 	$(LIBTOOL) --mode=install $(INSTALL) -m 0644 libmovit.la $(libdir)/
- 	$(MKDIR) -p $(includedir)/movit/
- 	$(INSTALL) -m 0644 $(HDRS) $(includedir)/movit/
- 	$(MKDIR) -p $(datadir)/movit/
- 	$(INSTALL) -m 0644 $(SHADERS) $(datadir)/movit/
- 	$(MKDIR) -p $(libdir)/pkgconfig/
- 	$(INSTALL) -m 644 movit.pc $(libdir)/pkgconfig/
- 
- .PHONY: coverage clean distclean check all install
+ 	$(MKDIR) -p $(DESTDIR)$(libdir)/
+ 	$(LIBTOOL) --mode=install $(INSTALL) -m 0644 libmovit.la $(DESTDIR)$(libdir)/
+ 	$(MKDIR) -p $(DESTDIR)$(includedir)/movit/
+ 	$(INSTALL) -m 0644 $(HDRS) $(DESTDIR)$(includedir)/movit/
+ 	$(MKDIR) -p $(DESTDIR)$(datadir)/movit/
+ 	$(INSTALL) -m 0644 $(SHADERS) $(DESTDIR)$(datadir)/movit/
+ 	$(MKDIR) -p $(DESTDIR)$(libdir)/pkgconfig/
+ 	$(INSTALL) -m 644 movit.pc $(DESTDIR)$(libdir)/pkgconfig/
+ 
+ DISTDIR=movit-$(movit_version)
+ OTHER_DIST_FILES=add.frag autogen.sh blue.frag configure.ac d65.h identity.frag invert_effect.frag Makefile.in mipmap_needing_effect.frag movit.pc.in README NEWS test_util.h widgets.h
+ 
+ dist:
+ 	$(MKDIR) $(DISTDIR)
+ 	cp $(OWN_OBJS:.o=.cpp) $(DISTDIR)/
+ 	cp $(HDRS) $(DISTDIR)/
+ 	cp $(SHADERS) $(DISTDIR)/
+ 	cp $(OTHER_DIST_FILES) $(DISTDIR)/
+ 	( cd $(DISTDIR) && aclocal && libtoolize --install --copy && autoconf && $(RM) -r autom4te.cache/ )
+ 	tar zcvvf ../$(DISTDIR).tar.gz $(DISTDIR)
+ 	$(RM) -r $(DISTDIR)
+ 
+ .PHONY: coverage clean distclean check all install dist
diff --combined README
index 0537f68,f7017bd..74be5e5
--- a/README
+++ b/README
@@@ -9,7 -9,7 +9,7 @@@ Movit is the Modern Video Toolkit, notw
  called âmodernâ usually isn't, and it's really not a toolkit.
  
  Movit aims to be a _high-quality_, _high-performance_, _open-source_
- library for video filters. It is currently in alpha stage.
+ library for video filters.
  
  
  TL;DR, please give me download link and system demands
@@@ -21,13 -21,12 +21,13 @@@ OK, you nee
    works fine on Linux and OS X, and Movit is not very POSIX-bound.)
  * GNU Make.
  * A GPU capable of running GLSL fragment shaders,
 -  process floating-point textures, and a few other things. If your machine
 -  is less than five years old _and you have the appropriate drivers_,
 -  you're home free.
 +  processing floating-point textures, and a few other things (all are
 +  part of OpenGL 3.0 or newer, although most OpenGL 2.0 cards also
 +  have what's needed through extensions). If your machine is less than five
 +  years old _and you have the appropriate drivers_, you're home free.
- * The [Eigen 3] and [Google Test] libraries. (The library itself
-   depends only on the former, but you probably want to run the unit tests.)
+ * The [Eigen 3], [FFTW3] and [Google Test] libraries. (The library itself
+   does not depend on the latter, but you probably want to run the unit tests.)
 -* The [GLEW] library, for dealing with OpenGL extensions on various
 +* The [epoxy] library, for dealing with OpenGL extensions on various
    platforms.
  
  Movit has been tested with Intel GPUs with the Mesa drivers
@@@ -55,8 -54,7 +55,8 @@@ all research-grade problems, and Movit 
  TL;DR, but I am interested in a programming example instead
  ===========================================================
  
 -Assuming you have an OpenGL context already set up:
 +Assuming you have an OpenGL context already set up (currently you need
 +a classic OpenGL context; a GL 3.2+ core context won't do):
  
  <code>
    using namespace movit;
diff --combined blur_effect.cpp
index b0327e2,d43a7ca..903737a
--- a/blur_effect.cpp
+++ b/blur_effect.cpp
@@@ -1,4 -1,4 +1,4 @@@
 -#include <GL/glew.h>
 +#include <epoxy/gl.h>
  #include <assert.h>
  #include <math.h>
  #include <algorithm>
@@@ -108,7 -108,9 +108,9 @@@ SingleBlurPassEffect::SingleBlurPassEff
  
  string SingleBlurPassEffect::output_fragment_shader()
  {
- 	return read_file("blur_effect.frag");
+ 	char buf[256];
+ 	sprintf(buf, "#define DIRECTION_VERTICAL %d\n", (direction == VERTICAL));
+ 	return buf + read_file("blur_effect.frag");
  }
  
  void SingleBlurPassEffect::set_gl_state(GLuint glsl_program_num, const string &prefix, unsigned *sampler_num)
@@@ -159,13 -161,11 +161,11 @@@
  	//
  	// We pack the parameters into a float4: The relative sample coordinates
  	// in (x,y), and the weight in z. w is unused.
- 	float samples[4 * (NUM_TAPS / 2 + 1)];
+ 	float samples[2 * (NUM_TAPS / 2 + 1)];
  
  	// Center sample.
- 	samples[4 * 0 + 0] = 0.0f;
- 	samples[4 * 0 + 1] = 0.0f;
- 	samples[4 * 0 + 2] = weight[0];
- 	samples[4 * 0 + 3] = 0.0f;
+ 	samples[2 * 0 + 0] = 0.0f;
+ 	samples[2 * 0 + 1] = weight[0];
  
  	// All other samples.
  	for (unsigned i = 1; i < NUM_TAPS / 2 + 1; ++i) {
@@@ -176,23 -176,18 +176,18 @@@
  		float offset, total_weight;
  		combine_two_samples(w1, w2, &offset, &total_weight, NULL);
  
- 		float x = 0.0f, y = 0.0f;
- 
  		if (direction == HORIZONTAL) {
- 			x = (base_pos + offset) / (float)width;
+ 			samples[2 * i + 0] = (base_pos + offset) / (float)width;
  		} else if (direction == VERTICAL) {
- 			y = (base_pos + offset) / (float)height;
+ 			samples[2 * i + 0] = (base_pos + offset) / (float)height;
  		} else {
  			assert(false);
  		}
  
- 		samples[4 * i + 0] = x;
- 		samples[4 * i + 1] = y;
- 		samples[4 * i + 2] = total_weight;
- 		samples[4 * i + 3] = 0.0f;
+ 		samples[2 * i + 1] = total_weight;
  	}
  
- 	set_uniform_vec4_array(glsl_program_num, prefix, "samples", samples, NUM_TAPS / 2 + 1);
+ 	set_uniform_vec2_array(glsl_program_num, prefix, "samples", samples, NUM_TAPS / 2 + 1);
  }
  
  void SingleBlurPassEffect::clear_gl_state()
diff --combined complex_modulate_effect.cpp
index 656de3a,6af589c..9483f6b
--- a/complex_modulate_effect.cpp
+++ b/complex_modulate_effect.cpp
@@@ -1,4 -1,4 +1,4 @@@
 -#include <GL/glew.h>
 +#include <epoxy/gl.h>
  
  #include "complex_modulate_effect.h"
  #include "effect_chain.h"
@@@ -25,7 -25,7 +25,7 @@@ void ComplexModulateEffect::set_gl_stat
  {
  	Effect::set_gl_state(glsl_program_num, prefix, sampler_num);
  
- 	float num_repeats[] = { num_repeats_x, num_repeats_y };
+ 	float num_repeats[] = { float(num_repeats_x), float(num_repeats_y) };
  	set_uniform_vec2(glsl_program_num, prefix, "num_repeats", num_repeats);
  
  	// Set the secondary input to repeat (and nearest while we're at it).
diff --combined configure.ac
index 1007ce2,93e168b..823740b
--- a/configure.ac
+++ b/configure.ac
@@@ -1,33 -1,26 +1,35 @@@
  AC_CONFIG_MACRO_DIR([m4])
  AC_INIT(movit, git)
  LT_INIT
+ PKG_PROG_PKG_CONFIG
  
  AC_CONFIG_SRCDIR(effect.cpp)
+ AC_CONFIG_AUX_DIR(.)
  
  AC_PROG_CC
  AC_PROG_CXX
  PKG_CHECK_MODULES([Eigen3], [eigen3])
 -PKG_CHECK_MODULES([GLEW], [glew])
 +PKG_CHECK_MODULES([epoxy], [epoxy])
  PKG_CHECK_MODULES([FFTW3], [fftw3])
  
 -# Needed for unit tests and the demo app.
 -PKG_CHECK_MODULES([SDL], [sdl])
 +# Needed for unit tests and the demo app. We prefer SDL2 if possible,
 +# but can also use classic SDL.
 +with_SDL2=no
 +with_demo_app=yes
 +PKG_CHECK_MODULES([SDL2], [sdl2], [with_SDL2=yes], [
 +  PKG_CHECK_MODULES([SDL], [sdl])
 +])
  
  # These are only needed for the demo app.
 -with_demo_app=yes
 -PKG_CHECK_MODULES([SDL_image], [SDL_image], [], [with_demo_app=no; AC_MSG_WARN([SDL_image not found, demo program will not be built])])
 +if test $with_SDL2 = "yes"; then
 +  PKG_CHECK_MODULES([SDL2_image], [SDL2_image], [], [with_demo_app=no; AC_MSG_WARN([SDL2_image not found, demo program will not be built])])
 +else
 +  PKG_CHECK_MODULES([SDL_image], [SDL_image], [], [with_demo_app=no; AC_MSG_WARN([SDL_image not found, demo program will not be built])])
 +fi
  PKG_CHECK_MODULES([libpng], [libpng12], [], [with_demo_app=no; AC_MSG_WARN([libpng12 not found, demo program will not be built])])
  
  AC_SUBST([with_demo_app])
 +AC_SUBST([with_SDL2])
  
  with_coverage=no
  AC_ARG_ENABLE([coverage], [  --enable-coverage       build with information needed to compute test coverage], [with_coverage=yes])
diff --combined effect_chain.cpp
index ef44024,487d7b7..7f6c943
--- a/effect_chain.cpp
+++ b/effect_chain.cpp
@@@ -1,6 -1,6 +1,6 @@@
  #define GL_GLEXT_PROTOTYPES 1
  
 -#include <GL/glew.h>
 +#include <epoxy/gl.h>
  #include <assert.h>
  #include <locale.h>
  #include <math.h>
@@@ -53,12 -53,26 +53,26 @@@ EffectChain::~EffectChain(
  		delete nodes[i];
  	}
  	for (unsigned i = 0; i < phases.size(); ++i) {
+ 		glBindVertexArray(phases[i]->vao);
+ 		check_error();
+ 
+ 		cleanup_vertex_attribute(phases[i]->glsl_program_num, "position", phases[i]->position_vbo);
+ 		cleanup_vertex_attribute(phases[i]->glsl_program_num, "texcoord", phases[i]->texcoord_vbo);
+ 
+ 		glBindVertexArray(0);
+ 		check_error();
+ 
  		resource_pool->release_glsl_program(phases[i]->glsl_program_num);
  		delete phases[i];
  	}
  	if (owns_resource_pool) {
  		delete resource_pool;
  	}
+ 	for (map<void *, GLuint>::const_iterator fbo_it = fbos.begin();
+ 	     fbo_it != fbos.end(); ++fbo_it) {
+ 		glDeleteFramebuffers(1, &fbo_it->second);
+ 		check_error();
+ 	}
  }
  
  Input *EffectChain::add_input(Input *input)
@@@ -227,24 -241,13 +241,13 @@@ string replace_prefix(const string &tex
  	return output;
  }
  
- Phase *EffectChain::compile_glsl_program(
- 	const vector<Node *> &inputs,
- 	const vector<Node *> &effects)
+ void EffectChain::compile_glsl_program(Phase *phase)
  {
- 	Phase *phase = new Phase;
- 	assert(!effects.empty());
- 
- 	// Deduplicate the inputs.
- 	vector<Node *> true_inputs = inputs;
- 	sort(true_inputs.begin(), true_inputs.end());
- 	true_inputs.erase(unique(true_inputs.begin(), true_inputs.end()), true_inputs.end());
- 
- 	bool input_needs_mipmaps = false;
  	string frag_shader = read_file("header.frag");
  
  	// Create functions for all the texture inputs that we need.
- 	for (unsigned i = 0; i < true_inputs.size(); ++i) {
- 		Node *input = true_inputs[i];
+ 	for (unsigned i = 0; i < phase->inputs.size(); ++i) {
+ 		Node *input = phase->inputs[i]->output_node;
  		char effect_id[256];
  		sprintf(effect_id, "in%u", i);
  		phase->effect_ids.insert(make_pair(input, effect_id));
@@@ -256,10 -259,8 +259,8 @@@
  		frag_shader += "\n";
  	}
  
- 	vector<Node *> sorted_effects = topological_sort(effects);
- 
- 	for (unsigned i = 0; i < sorted_effects.size(); ++i) {
- 		Node *node = sorted_effects[i];
+ 	for (unsigned i = 0; i < phase->effects.size(); ++i) {
+ 		Node *node = phase->effects[i];
  		char effect_id[256];
  		sprintf(effect_id, "eff%u", i);
  		phase->effect_ids.insert(make_pair(node, effect_id));
@@@ -290,24 -291,32 +291,32 @@@
  			}
  		}
  		frag_shader += "\n";
- 
- 		input_needs_mipmaps |= node->effect->needs_mipmaps();
  	}
- 	for (unsigned i = 0; i < sorted_effects.size(); ++i) {
- 		Node *node = sorted_effects[i];
- 		if (node->effect->num_inputs() == 0) {
- 			CHECK(node->effect->set_int("needs_mipmaps", input_needs_mipmaps));
- 		}
- 	}
- 	frag_shader += string("#define INPUT ") + phase->effect_ids[sorted_effects.back()] + "\n";
+ 	frag_shader += string("#define INPUT ") + phase->effect_ids[phase->effects.back()] + "\n";
  	frag_shader.append(read_file("footer.frag"));
  
  	phase->glsl_program_num = resource_pool->compile_glsl_program(read_file("vs.vert"), frag_shader);
- 	phase->input_needs_mipmaps = input_needs_mipmaps;
- 	phase->inputs = true_inputs;
- 	phase->effects = sorted_effects;
  
- 	return phase;
+ 	// Prepare the geometry for the fullscreen quad used in this phase.
+ 	// (We have separate VAOs per shader, since the bindings can in theory
+ 	// be different.)
+ 	float vertices[] = {
+ 		0.0f, 1.0f,
+ 		0.0f, 0.0f,
+ 		1.0f, 1.0f,
+ 		1.0f, 0.0f
+ 	};
+ 
+ 	glGenVertexArrays(1, &phase->vao);
+ 	check_error();
+ 	glBindVertexArray(phase->vao);
+ 	check_error();
+ 
+ 	phase->position_vbo = fill_vertex_attribute(phase->glsl_program_num, "position", 2, GL_FLOAT, sizeof(vertices), vertices);
+ 	phase->texcoord_vbo = fill_vertex_attribute(phase->glsl_program_num, "texcoord", 2, GL_FLOAT, sizeof(vertices), vertices);  // Same as vertices.
+ 
+ 	glBindVertexArray(0);
+ 	check_error();
  }
  
  // Construct GLSL programs, starting at the given effect and following
@@@ -317,130 -326,118 +326,118 @@@
  // and of course at the end.
  //
  // We follow a quite simple depth-first search from the output, although
- // without any explicit recursion.
- void EffectChain::construct_glsl_programs(Node *output)
+ // without recursing explicitly within each phase.
+ Phase *EffectChain::construct_phase(Node *output, map<Node *, Phase *> *completed_effects)
  {
- 	// Which effects have already been completed?
- 	// We need to keep track of it, as an effect with multiple outputs
- 	// could otherwise be calculated multiple times.
- 	set<Node *> completed_effects;
+ 	if (completed_effects->count(output)) {
+ 		return (*completed_effects)[output];
+ 	}
  
- 	// Effects in the current phase, as well as inputs (outputs from other phases
- 	// that we depend on). Note that since we start iterating from the end,
- 	// the effect list will be in the reverse order.
- 	vector<Node *> this_phase_inputs;
- 	vector<Node *> this_phase_effects;
+ 	Phase *phase = new Phase;
+ 	phase->output_node = output;
  
  	// Effects that we have yet to calculate, but that we know should
  	// be in the current phase.
  	stack<Node *> effects_todo_this_phase;
- 
- 	// Effects that we have yet to calculate, but that come from other phases.
- 	// We delay these until we have this phase done in its entirety,
- 	// at which point we pick any of them and start a new phase from that.
- 	stack<Node *> effects_todo_other_phases;
- 
  	effects_todo_this_phase.push(output);
  
- 	for ( ;; ) {  // Termination condition within loop.
- 		if (!effects_todo_this_phase.empty()) {
- 			// OK, we have more to do this phase.
- 			Node *node = effects_todo_this_phase.top();
- 			effects_todo_this_phase.pop();
- 
- 			// This should currently only happen for effects that are inputs
- 			// (either true inputs or phase outputs). We special-case inputs,
- 			// and then deduplicate phase outputs in compile_glsl_program().
- 			if (node->effect->num_inputs() == 0) {
- 				if (find(this_phase_effects.begin(), this_phase_effects.end(), node) != this_phase_effects.end()) {
- 					continue;
- 				}
- 			} else {
- 				assert(completed_effects.count(node) == 0);
+ 	while (!effects_todo_this_phase.empty()) {
+ 		Node *node = effects_todo_this_phase.top();
+ 		effects_todo_this_phase.pop();
+ 
+ 		// This should currently only happen for effects that are inputs
+ 		// (either true inputs or phase outputs). We special-case inputs,
+ 		// and then deduplicate phase outputs below.
+ 		if (node->effect->num_inputs() == 0) {
+ 			if (find(phase->effects.begin(), phase->effects.end(), node) != phase->effects.end()) {
+ 				continue;
  			}
+ 		} else {
+ 			assert(completed_effects->count(node) == 0);
+ 		}
  
- 			this_phase_effects.push_back(node);
- 			completed_effects.insert(node);
+ 		phase->effects.push_back(node);
  
- 			// Find all the dependencies of this effect, and add them to the stack.
- 			vector<Node *> deps = node->incoming_links;
- 			assert(node->effect->num_inputs() == deps.size());
- 			for (unsigned i = 0; i < deps.size(); ++i) {
- 				bool start_new_phase = false;
+ 		// Find all the dependencies of this effect, and add them to the stack.
+ 		vector<Node *> deps = node->incoming_links;
+ 		assert(node->effect->num_inputs() == deps.size());
+ 		for (unsigned i = 0; i < deps.size(); ++i) {
+ 			bool start_new_phase = false;
  
- 				if (node->effect->needs_texture_bounce() &&
- 				    !deps[i]->effect->is_single_texture()) {
- 					start_new_phase = true;
- 				}
+ 			if (node->effect->needs_texture_bounce() &&
+ 			    !deps[i]->effect->is_single_texture()) {
+ 				start_new_phase = true;
+ 			}
  
- 				if (deps[i]->outgoing_links.size() > 1) {
- 					if (!deps[i]->effect->is_single_texture()) {
- 						// More than one effect uses this as the input,
- 						// and it is not a texture itself.
- 						// The easiest thing to do (and probably also the safest
- 						// performance-wise in most cases) is to bounce it to a texture
- 						// and then let the next passes read from that.
- 						start_new_phase = true;
- 					} else {
- 						assert(deps[i]->effect->num_inputs() == 0);
- 
- 						// For textures, we try to be slightly more clever;
- 						// if none of our outputs need a bounce, we don't bounce
- 						// but instead simply use the effect many times.
- 						//
- 						// Strictly speaking, we could bounce it for some outputs
- 						// and use it directly for others, but the processing becomes
- 						// somewhat simpler if the effect is only used in one such way.
- 						for (unsigned j = 0; j < deps[i]->outgoing_links.size(); ++j) {
- 							Node *rdep = deps[i]->outgoing_links[j];
- 							start_new_phase |= rdep->effect->needs_texture_bounce();
- 						}
+ 			if (deps[i]->outgoing_links.size() > 1) {
+ 				if (!deps[i]->effect->is_single_texture()) {
+ 					// More than one effect uses this as the input,
+ 					// and it is not a texture itself.
+ 					// The easiest thing to do (and probably also the safest
+ 					// performance-wise in most cases) is to bounce it to a texture
+ 					// and then let the next passes read from that.
+ 					start_new_phase = true;
+ 				} else {
+ 					assert(deps[i]->effect->num_inputs() == 0);
+ 
+ 					// For textures, we try to be slightly more clever;
+ 					// if none of our outputs need a bounce, we don't bounce
+ 					// but instead simply use the effect many times.
+ 					//
+ 					// Strictly speaking, we could bounce it for some outputs
+ 					// and use it directly for others, but the processing becomes
+ 					// somewhat simpler if the effect is only used in one such way.
+ 					for (unsigned j = 0; j < deps[i]->outgoing_links.size(); ++j) {
+ 						Node *rdep = deps[i]->outgoing_links[j];
+ 						start_new_phase |= rdep->effect->needs_texture_bounce();
  					}
  				}
+ 			}
  
- 				if (deps[i]->effect->changes_output_size()) {
- 					start_new_phase = true;
- 				}
+ 			if (deps[i]->effect->changes_output_size()) {
+ 				start_new_phase = true;
+ 			}
  
- 				if (start_new_phase) {
- 					effects_todo_other_phases.push(deps[i]);
- 					this_phase_inputs.push_back(deps[i]);
- 				} else {
- 					effects_todo_this_phase.push(deps[i]);
- 				}
+ 			if (start_new_phase) {
+ 				phase->inputs.push_back(construct_phase(deps[i], completed_effects));
+ 			} else {
+ 				effects_todo_this_phase.push(deps[i]);
  			}
- 			continue;
  		}
+ 	}
  
- 		// No more effects to do this phase. Take all the ones we have,
- 		// and create a GLSL program for it.
- 		if (!this_phase_effects.empty()) {
- 			reverse(this_phase_effects.begin(), this_phase_effects.end());
- 			phases.push_back(compile_glsl_program(this_phase_inputs, this_phase_effects));
- 			this_phase_effects.back()->phase = phases.back();
- 			this_phase_inputs.clear();
- 			this_phase_effects.clear();
- 		}
- 		assert(this_phase_inputs.empty());
- 		assert(this_phase_effects.empty());
+ 	// No more effects to do this phase. Take all the ones we have,
+ 	// and create a GLSL program for it.
+ 	assert(!phase->effects.empty());
  
- 		// If we have no effects left, exit.
- 		if (effects_todo_other_phases.empty()) {
- 			break;
- 		}
+ 	// Deduplicate the inputs.
+ 	sort(phase->inputs.begin(), phase->inputs.end());
+ 	phase->inputs.erase(unique(phase->inputs.begin(), phase->inputs.end()), phase->inputs.end());
  
- 		Node *node = effects_todo_other_phases.top();
- 		effects_todo_other_phases.pop();
+ 	// We added the effects from the output and back, but we need to output
+ 	// them in topological sort order in the shader.
+ 	phase->effects = topological_sort(phase->effects);
  
- 		if (completed_effects.count(node) == 0) {
- 			// Start a new phase, calculating from this effect.
- 			effects_todo_this_phase.push(node);
+ 	// Figure out if we need mipmaps or not, and if so, tell the inputs that.
+ 	phase->input_needs_mipmaps = false;
+ 	for (unsigned i = 0; i < phase->effects.size(); ++i) {
+ 		Node *node = phase->effects[i];
+ 		phase->input_needs_mipmaps |= node->effect->needs_mipmaps();
+ 	}
+ 	for (unsigned i = 0; i < phase->effects.size(); ++i) {
+ 		Node *node = phase->effects[i];
+ 		if (node->effect->num_inputs() == 0) {
+ 			CHECK(node->effect->set_int("needs_mipmaps", phase->input_needs_mipmaps));
  		}
  	}
  
- 	// Finally, since the phases are found from the output but must be executed
- 	// from the input(s), reverse them, too.
- 	reverse(phases.begin(), phases.end());
+ 	// Actually make the shader for this phase.
+ 	compile_glsl_program(phase);
+ 
+ 	assert(completed_effects->count(output) == 0);
+ 	completed_effects->insert(make_pair(output, phase));
+ 	phases.push_back(phase);
+ 	return phase;
  }
  
  void EffectChain::output_dot(const char *filename)
@@@ -621,11 -618,11 +618,11 @@@ void EffectChain::inform_input_sizes(Ph
  		}
  	}
  	for (unsigned i = 0; i < phase->inputs.size(); ++i) {
- 		Node *input = phase->inputs[i];
- 		input->output_width = input->phase->virtual_output_width;
- 		input->output_height = input->phase->virtual_output_height;
- 		assert(input->output_width != 0);
- 		assert(input->output_height != 0);
+ 		Phase *input = phase->inputs[i];
+ 		input->output_node->output_width = input->virtual_output_width;
+ 		input->output_node->output_height = input->virtual_output_height;
+ 		assert(input->output_node->output_width != 0);
+ 		assert(input->output_node->output_height != 0);
  	}
  
  	// Now propagate from the inputs towards the end, and inform as we go.
@@@ -676,14 -673,14 +673,14 @@@ void EffectChain::find_output_size(Phas
  	bool all_inputs_same_size = true;
  
  	for (unsigned i = 0; i < phase->inputs.size(); ++i) {
- 		Node *input = phase->inputs[i];
- 		assert(input->phase->output_width != 0);
- 		assert(input->phase->output_height != 0);
+ 		Phase *input = phase->inputs[i];
+ 		assert(input->output_width != 0);
+ 		assert(input->output_height != 0);
  		if (output_width == 0 && output_height == 0) {
- 			output_width = input->phase->virtual_output_width;
- 			output_height = input->phase->virtual_output_height;
- 		} else if (output_width != input->phase->virtual_output_width ||
- 		           output_height != input->phase->virtual_output_height) {
+ 			output_width = input->virtual_output_width;
+ 			output_height = input->virtual_output_height;
+ 		} else if (output_width != input->virtual_output_width ||
+ 		           output_height != input->virtual_output_height) {
  			all_inputs_same_size = false;
  		}
  	}
@@@ -715,10 -712,10 +712,10 @@@
  	output_width = 0;
  	output_height = 0;
  	for (unsigned i = 0; i < phase->inputs.size(); ++i) {
- 		Node *input = phase->inputs[i];
- 		assert(input->phase->output_width != 0);
- 		assert(input->phase->output_height != 0);
- 		size_rectangle_to_fit(input->phase->output_width, input->phase->output_height, &output_width, &output_height);
+ 		Phase *input = phase->inputs[i];
+ 		assert(input->output_width != 0);
+ 		assert(input->output_height != 0);
+ 		size_rectangle_to_fit(input->output_width, input->output_height, &output_width, &output_height);
  	}
  	for (unsigned i = 0; i < phase->effects.size(); ++i) {
  		Effect *effect = phase->effects[i]->effect;
@@@ -1422,7 -1419,11 +1419,11 @@@ void EffectChain::finalize(
  	output_dot("step18-final.dot");
  	
  	// Construct all needed GLSL programs, starting at the output.
- 	construct_glsl_programs(find_output_node());
+ 	// We need to keep track of which effects have already been computed,
+ 	// as an effect with multiple users could otherwise be calculated
+ 	// multiple times.
+ 	map<Node *, Phase *> completed_effects;
+ 	construct_phase(find_output_node(), &completed_effects);
  
  	output_dot("step19-split-to-phases.dot");
  
@@@ -1439,6 -1440,7 +1440,7 @@@ void EffectChain::render_to_fbo(GLuint 
  	// Save original viewport.
  	GLuint x = 0, y = 0;
  	GLuint fbo = 0;
+ 	void *context = get_gl_context_identifier();
  
  	if (width == 0 && height == 0) {
  		GLint viewport[4];
@@@ -1458,13 -1460,18 +1460,18 @@@
  	check_error();
  
  	if (phases.size() > 1) {
- 		glGenFramebuffers(1, &fbo);
- 		check_error();
+ 		if (fbos.count(context) == 0) {
+ 			glGenFramebuffers(1, &fbo);
+ 			check_error();
+ 			fbos.insert(make_pair(context, fbo));
+ 		} else {
+ 			fbo = fbos[context];
+ 		}
  		glBindFramebuffer(GL_FRAMEBUFFER, fbo);
  		check_error();
  	}
  
- 	set<Node *> generated_mipmaps;
+ 	set<Phase *> generated_mipmaps;
  
  	// We choose the simplest option of having one texture per output,
  	// since otherwise this turns into an (albeit simple) register allocation problem.
@@@ -1488,9 -1495,9 +1495,9 @@@
  		// Set up RTT inputs for this phase.
  		for (unsigned sampler = 0; sampler < phases[phase]->inputs.size(); ++sampler) {
  			glActiveTexture(GL_TEXTURE0 + sampler);
- 			Node *input = phases[phase]->inputs[sampler];
- 			input->bound_sampler_num = sampler;
- 			glBindTexture(GL_TEXTURE_2D, output_textures[input->phase]);
+ 			Phase *input = phases[phase]->inputs[sampler];
+ 			input->output_node->bound_sampler_num = sampler;
+ 			glBindTexture(GL_TEXTURE_2D, output_textures[input]);
  			check_error();
  			if (phases[phase]->input_needs_mipmaps) {
  				if (generated_mipmaps.count(input) == 0) {
@@@ -1509,7 -1516,7 +1516,7 @@@
  			glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
  			check_error();
  
- 			string texture_name = string("tex_") + phases[phase]->effect_ids[input];
+ 			string texture_name = string("tex_") + phases[phase]->effect_ids[input->output_node];
  			glUniform1i(glGetUniformLocation(glsl_program_num, texture_name.c_str()), sampler);
  			check_error();
  		}
@@@ -1555,39 -1562,15 +1562,15 @@@
  			}
  		}
  
- 		// Now draw!
- 		float vertices[] = {
- 			0.0f, 1.0f,
- 			0.0f, 0.0f,
- 			1.0f, 1.0f,
- 			1.0f, 0.0f
- 		};
- 
- 		GLuint vao;
- 		glGenVertexArrays(1, &vao);
- 		check_error();
- 		glBindVertexArray(vao);
+ 		glBindVertexArray(phases[phase]->vao);
  		check_error();
- 
- 		GLuint position_vbo = fill_vertex_attribute(glsl_program_num, "position", 2, GL_FLOAT, sizeof(vertices), vertices);
- 		GLuint texcoord_vbo = fill_vertex_attribute(glsl_program_num, "texcoord", 2, GL_FLOAT, sizeof(vertices), vertices);  // Same as vertices.
- 
  		glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
  		check_error();
  
- 		cleanup_vertex_attribute(glsl_program_num, "position", position_vbo);
- 		cleanup_vertex_attribute(glsl_program_num, "texcoord", texcoord_vbo);
- 
- 		glUseProgram(0);
- 		check_error();
- 
  		for (unsigned i = 0; i < phases[phase]->effects.size(); ++i) {
  			Node *node = phases[phase]->effects[i];
  			node->effect->clear_gl_state();
  		}
- 
- 		glDeleteVertexArrays(1, &vao);
- 		check_error();
  	}
  
  	for (map<Phase *, GLuint>::const_iterator texture_it = output_textures.begin();
@@@ -1598,11 -1581,10 +1581,10 @@@
  
  	glBindFramebuffer(GL_FRAMEBUFFER, 0);
  	check_error();
- 
- 	if (fbo != 0) {
- 		glDeleteFramebuffers(1, &fbo);
- 		check_error();
- 	}
+ 	glBindVertexArray(0);
+ 	check_error();
+ 	glUseProgram(0);
+ 	check_error();
  }
  
  }  // namespace movit
diff --combined effect_chain.h
index 02906fe,9051195..1ecee63
--- a/effect_chain.h
+++ b/effect_chain.h
@@@ -17,7 -17,7 +17,7 @@@
  // the EffectChain holds textures and other OpenGL objects that are tied to the
  // context.
  
 -#include <GL/glew.h>
 +#include <epoxy/gl.h>
  #include <stdio.h>
  #include <map>
  #include <set>
@@@ -66,11 -66,6 +66,6 @@@ private
  	// they will be equal.
  	unsigned output_width, output_height;
  
- 	// If output goes to RTT, which phase it is in (otherwise unset).
- 	// This is a bit ugly; we should probably fix so that Phase takes other
- 	// phases as inputs, instead of Node.
- 	Phase *phase;
- 
  	// If the effect has is_single_texture(), or if the output went to RTT
  	// and that texture has been bound to a sampler, the sampler number
  	// will be stored here.
@@@ -90,19 -85,27 +85,27 @@@
  
  // A rendering phase; a single GLSL program rendering a single quad.
  struct Phase {
+ 	Node *output_node;
+ 
  	GLuint glsl_program_num;  // Owned by the resource_pool.
  	bool input_needs_mipmaps;
  
  	// Inputs are only inputs from other phases (ie., those that come from RTT);
- 	// input textures are not counted here.
- 	std::vector<Node *> inputs;
- 
+ 	// input textures are counted as part of <effects>.
+ 	std::vector<Phase *> inputs;
  	std::vector<Node *> effects;  // In order.
  	unsigned output_width, output_height, virtual_output_width, virtual_output_height;
  
  	// Identifier used to create unique variables in GLSL.
  	// Unique per-phase to increase cacheability of compiled shaders.
  	std::map<Node *, std::string> effect_ids;
+ 
+ 	// The geometry needed to draw this quad, bound to the vertex array
+ 	// object. (Seemingly it's actually a win not to upload geometry every
+ 	// frame, even for something as small as a quad, due to fewer state
+ 	// changes.)
+ 	GLuint vao;
+ 	GLuint position_vbo, texcoord_vbo;
  };
  
  class EffectChain {
@@@ -224,13 -227,13 +227,13 @@@ private
  	// output gamma different from GAMMA_LINEAR.
  	void find_all_nonlinear_inputs(Node *effect, std::vector<Node *> *nonlinear_inputs);
  
- 	// Create a GLSL program computing the given effects in order.
- 	Phase *compile_glsl_program(const std::vector<Node *> &inputs,
- 	                            const std::vector<Node *> &effects);
+ 	// Create a GLSL program computing the effects for this phase in order.
+ 	void compile_glsl_program(Phase *phase);
  
  	// Create all GLSL programs needed to compute the given effect, and all outputs
- 	// that depends on it (whenever possible).
- 	void construct_glsl_programs(Node *output);
+ 	// that depend on it (whenever possible). Returns the phase that has <output>
+ 	// as the last effect. Also pushes all phases in order onto <phases>.
+ 	Phase *construct_phase(Node *output, std::map<Node *, Phase *> *completed_effects);
  
  	// Output the current graph to the given file in a Graphviz-compatible format;
  	// only useful for debugging.
@@@ -284,6 -287,7 +287,7 @@@
  	std::map<Effect *, Node *> node_map;
  	Effect *dither_effect;
  
+ 	std::map<void *, GLuint> fbos;  // One for each OpenGL context.
  	std::vector<Input *> inputs;  // Also contained in nodes.
  	std::vector<Phase *> phases;
  
diff --combined effect_util.cpp
index b671620,d485d46..dbeb48b
--- a/effect_util.cpp
+++ b/effect_util.cpp
@@@ -1,4 -1,4 +1,4 @@@
 -#include <GL/glew.h>
 +#include <epoxy/gl.h>
  #include <Eigen/Core>
  #include <stddef.h>
  #include <string>
@@@ -69,6 -69,17 +69,17 @@@ void set_uniform_vec4(GLuint glsl_progr
  	check_error();
  }
  
+ void set_uniform_vec2_array(GLuint glsl_program_num, const string &prefix, const string &key, const float *values, size_t num_values)
+ {
+ 	GLint location = get_uniform_location(glsl_program_num, prefix, key);
+ 	if (location == -1) {
+ 		return;
+ 	}
+ 	check_error();
+ 	glUniform2fv(location, num_values, values);
+ 	check_error();
+ }
+ 
  void set_uniform_vec4_array(GLuint glsl_program_num, const string &prefix, const string &key, const float *values, size_t num_values)
  {
  	GLint location = get_uniform_location(glsl_program_num, prefix, key);
diff --combined effect_util.h
index da0f95a,5420ce5..a1588ef
--- a/effect_util.h
+++ b/effect_util.h
@@@ -4,7 -4,7 +4,7 @@@
  // Utilities that are often useful for implementing Effect instances,
  // but don't need to be included from effect.h.
  
 -#include <GL/glew.h>
 +#include <epoxy/gl.h>
  #include <assert.h>
  #include <stddef.h>
  #include <Eigen/Core>
@@@ -26,6 -26,7 +26,7 @@@ void set_uniform_float(GLuint glsl_prog
  void set_uniform_vec2(GLuint glsl_program_num, const std::string &prefix, const std::string &key, const float *values);
  void set_uniform_vec3(GLuint glsl_program_num, const std::string &prefix, const std::string &key, const float *values);
  void set_uniform_vec4(GLuint glsl_program_num, const std::string &prefix, const std::string &key, const float *values);
+ void set_uniform_vec2_array(GLuint glsl_program_num, const std::string &prefix, const std::string &key, const float *values, size_t num_values);
  void set_uniform_vec4_array(GLuint glsl_program_num, const std::string &prefix, const std::string &key, const float *values, size_t num_values);
  void set_uniform_mat3(GLuint glsl_program_num, const std::string &prefix, const std::string &key, const Eigen::Matrix3d &matrix);
  
diff --combined movit.pc.in
index 6069a06,b11ba15..c57c031
--- a/movit.pc.in
+++ b/movit.pc.in
@@@ -13,5 -13,6 +13,6 @@@ Description: Movit is a library for hig
  Version: git
  Requires:
  Conflicts:
- Libs: -lmovit @epoxy_LIBS@
- Cflags: -I${includedir}/movit @Eigen3_CFLAGS@ @epoxy_CFLAGS@
+ Libs: -lmovit
 -Libs.private: @GLEW_LIBS@ @FFTW3_LIBS@
 -Cflags: -I${includedir}/movit @Eigen3_CFLAGS@ @GLEW_CFLAGS@ @FFTW3_CFLAGS@
++Libs.private: @epoxy_LIBS@ @FFTW3_LIBS@
++Cflags: -I${includedir}/movit @Eigen3_CFLAGS@ @GLEW_CFLAGS@ @epoxy_CFLAGS@
diff --combined util.cpp
index ac6a218,1e66c30..cce3d54
--- a/util.cpp
+++ b/util.cpp
@@@ -1,4 -1,4 +1,4 @@@
 -#include <GL/glew.h>
 +#include <epoxy/gl.h>
  #include <assert.h>
  #include <math.h>
  #include <stdio.h>
@@@ -9,6 -9,14 +9,14 @@@
  #include "init.h"
  #include "util.h"
  
+ #if defined(__DARWIN__)
+ #include <OpenGL/OpenGL.h>
+ #elif defined(WIN32)
 -#include <GL/wglew.h>
++#include <epoxy/wgl.h>
+ #else
 -#include <GL/glxew.h>
++#include <epoxy/glx.h>
+ #endif
+ 
  using namespace std;
  
  namespace movit {
@@@ -233,4 -241,15 +241,15 @@@ unsigned next_power_of_two(unsigned v
  	return v;
  }
  
+ void *get_gl_context_identifier()
+ {
+ #if defined(__DARWIN__)
+ 	return (void *)CGLGetCurrentContext();
+ #elif defined(WIN32)
+ 	return (void *)wglGetCurrentContext();
+ #else
+ 	return (void *)glXGetCurrentContext();
+ #endif
+ }
+ 
  }  // namespace movit
diff --combined util.h
index a3e262f,1ecdc00..6def56d
--- a/util.h
+++ b/util.h
@@@ -3,7 -3,7 +3,7 @@@
  
  // Various utilities.
  
 -#include <GL/glew.h>
 +#include <epoxy/gl.h>
  #include <stdio.h>
  #include <stdlib.h>
  #include <Eigen/Core>
@@@ -56,6 -56,12 +56,12 @@@ void cleanup_vertex_attribute(GLuint gl
  // If v is not already a power of two, return the first higher power of two.
  unsigned next_power_of_two(unsigned v);
  
+ // Get a pointer that represents the current OpenGL context, in a cross-platform way.
+ // This is not intended for anything but identification (ie., so you can associate
+ // different FBOs with different contexts); you should probably not try to cast it
+ // back into anything you intend to pass into OpenGL.
+ void *get_gl_context_identifier();
+ 
  }  // namespace movit
  
  #ifdef NDEBUG