]> git.sesse.net Git - movit/blobdiff - effect_chain.cpp
Use UBOs instead of glUniform. Work in progress; no clear wins seen yet.
[movit] / effect_chain.cpp
index af0710e46a2ce6c82cd40cf9eaba7c57ed9d7a9e..f800d1ea40ce62e8dadab19940a5b3a228c1afd2 100644 (file)
@@ -1,5 +1,3 @@
-#define GL_GLEXT_PROTOTYPES 1
-
 #include <epoxy/gl.h>
 #include <assert.h>
 #include <math.h>
@@ -34,12 +32,13 @@ using namespace std;
 
 namespace movit {
 
-EffectChain::EffectChain(float aspect_nom, float aspect_denom, ResourcePool *resource_pool)
+EffectChain::EffectChain(float aspect_nom, float aspect_denom, ResourcePool *resource_pool, GLenum intermediate_format)
        : aspect_nom(aspect_nom),
          aspect_denom(aspect_denom),
          output_color_rgba(false),
          output_color_ycbcr(false),
          dither_effect(NULL),
+         intermediate_format(intermediate_format),
          num_dither_bits(0),
          output_origin(OUTPUT_ORIGIN_BOTTOM_LEFT),
          finalized(false),
@@ -51,6 +50,14 @@ EffectChain::EffectChain(float aspect_nom, float aspect_denom, ResourcePool *res
        } else {
                owns_resource_pool = false;
        }
+
+       // Generate a VBO with some data in (shared position and texture coordinate data).
+       float vertices[] = {
+               0.0f, 2.0f,
+               0.0f, 0.0f,
+               2.0f, 0.0f
+       };
+       vbo = generate_vbo(2, GL_FLOAT, sizeof(vertices), vertices);
 }
 
 EffectChain::~EffectChain()
@@ -66,6 +73,8 @@ EffectChain::~EffectChain()
        if (owns_resource_pool) {
                delete resource_pool;
        }
+       glDeleteBuffers(1, &vbo);
+       check_error();
 }
 
 Input *EffectChain::add_input(Input *input)
@@ -184,6 +193,13 @@ GLenum EffectChain::get_input_sampler(Node *node, unsigned input_num) const
        return GL_TEXTURE0 + node->incoming_links[input_num]->bound_sampler_num;
 }
 
+GLenum EffectChain::has_input_sampler(Node *node, unsigned input_num) const
+{
+       assert(input_num < node->incoming_links.size());
+       return node->incoming_links[input_num]->bound_sampler_num >= 0 &&
+               node->incoming_links[input_num]->bound_sampler_num < 8;
+}
+
 void EffectChain::find_all_nonlinear_inputs(Node *node, vector<Node *> *nonlinear_inputs)
 {
        if (node->output_gamma_curve == GAMMA_LINEAR &&
@@ -259,6 +275,7 @@ template<class T>
 void extract_uniform_declarations(const vector<Uniform<T> > &effect_uniforms,
                                   const string &type_specifier,
                                   const string &effect_id,
+                                  bool in_ubo_block,
                                   vector<Uniform<T> > *phase_uniforms,
                                   string *glsl_string)
 {
@@ -266,8 +283,10 @@ void extract_uniform_declarations(const vector<Uniform<T> > &effect_uniforms,
                phase_uniforms->push_back(effect_uniforms[i]);
                phase_uniforms->back().prefix = effect_id;
 
-               *glsl_string += string("uniform ") + type_specifier + " " + effect_id
-                       + "_" + effect_uniforms[i].name + ";\n";
+               if (!in_ubo_block) {
+                       *glsl_string += "uniform ";
+               }
+               *glsl_string += type_specifier + " " + effect_id + "_" + effect_uniforms[i].name + ";\n";
        }
 }
 
@@ -275,6 +294,7 @@ template<class T>
 void extract_uniform_array_declarations(const vector<Uniform<T> > &effect_uniforms,
                                         const string &type_specifier,
                                         const string &effect_id,
+                                        bool in_ubo_block,
                                         vector<Uniform<T> > *phase_uniforms,
                                         string *glsl_string)
 {
@@ -282,8 +302,12 @@ void extract_uniform_array_declarations(const vector<Uniform<T> > &effect_unifor
                phase_uniforms->push_back(effect_uniforms[i]);
                phase_uniforms->back().prefix = effect_id;
 
+               if (!in_ubo_block) {
+                       *glsl_string += "uniform ";
+               }
+
                char buf[256];
-               snprintf(buf, sizeof(buf), "uniform %s %s_%s[%d];\n",
+               snprintf(buf, sizeof(buf), "%s %s_%s[%d];\n",
                        type_specifier.c_str(), effect_id.c_str(),
                        effect_uniforms[i].name.c_str(),
                        int(effect_uniforms[i].num_values));
@@ -297,6 +321,7 @@ void collect_uniform_locations(GLuint glsl_program_num, vector<Uniform<T> > *pha
        for (unsigned i = 0; i < phase_uniforms->size(); ++i) {
                Uniform<T> &uniform = (*phase_uniforms)[i];
                uniform.location = get_uniform_location(glsl_program_num, uniform.prefix, uniform.name);
+               get_uniform_offset_and_size(glsl_program_num, uniform.prefix, uniform.name, &uniform.ubo_offset, &uniform.ubo_num_elem);
        }
 }
 
@@ -369,16 +394,23 @@ void EffectChain::compile_glsl_program(Phase *phase)
        frag_shader += string("#define INPUT ") + phase->effect_ids[phase->effects.back()] + "\n";
 
        // If we're the last phase, add the right #defines for Y'CbCr multi-output as needed.
+       vector<string> frag_shader_outputs;  // In order.
        if (phase->output_node->outgoing_links.empty() && output_color_ycbcr) {
                switch (output_ycbcr_splitting) {
                case YCBCR_OUTPUT_INTERLEAVED:
                        // No #defines set.
+                       frag_shader_outputs.push_back("FragColor");
                        break;
                case YCBCR_OUTPUT_SPLIT_Y_AND_CBCR:
                        frag_shader += "#define YCBCR_OUTPUT_SPLIT_Y_AND_CBCR 1\n";
+                       frag_shader_outputs.push_back("Y");
+                       frag_shader_outputs.push_back("Chroma");
                        break;
                case YCBCR_OUTPUT_PLANAR:
                        frag_shader += "#define YCBCR_OUTPUT_PLANAR 1\n";
+                       frag_shader_outputs.push_back("Y");
+                       frag_shader_outputs.push_back("Cb");
+                       frag_shader_outputs.push_back("Cr");
                        break;
                default:
                        assert(false);
@@ -389,6 +421,7 @@ void EffectChain::compile_glsl_program(Phase *phase)
                        // output needs to see it (YCbCrConversionEffect and DitherEffect
                        // do, too).
                        frag_shader_header += "#define YCBCR_ALSO_OUTPUT_RGBA 1\n";
+                       frag_shader_outputs.push_back("RGBA");
                }
        }
        frag_shader.append(read_file("footer.frag"));
@@ -398,22 +431,34 @@ void EffectChain::compile_glsl_program(Phase *phase)
        // before in the output source, since output_fragment_shader() is allowed
        // to register new uniforms (e.g. arrays that are of unknown length until
        // finalization time).
-       // TODO: Make a uniform block for platforms that support it.
        string frag_shader_uniforms = "";
        for (unsigned i = 0; i < phase->effects.size(); ++i) {
+               const bool in_ubo_block = true;  // TODO: Check for the extension.
                Node *node = phase->effects[i];
                Effect *effect = node->effect;
                const string effect_id = phase->effect_ids[node];
-               extract_uniform_declarations(effect->uniforms_sampler2d, "sampler2D", effect_id, &phase->uniforms_sampler2d, &frag_shader_uniforms);
-               extract_uniform_declarations(effect->uniforms_bool, "bool", effect_id, &phase->uniforms_bool, &frag_shader_uniforms);
-               extract_uniform_declarations(effect->uniforms_int, "int", effect_id, &phase->uniforms_int, &frag_shader_uniforms);
-               extract_uniform_declarations(effect->uniforms_float, "float", effect_id, &phase->uniforms_float, &frag_shader_uniforms);
-               extract_uniform_declarations(effect->uniforms_vec2, "vec2", effect_id, &phase->uniforms_vec2, &frag_shader_uniforms);
-               extract_uniform_declarations(effect->uniforms_vec3, "vec3", effect_id, &phase->uniforms_vec3, &frag_shader_uniforms);
-               extract_uniform_declarations(effect->uniforms_vec4, "vec4", effect_id, &phase->uniforms_vec4, &frag_shader_uniforms);
-               extract_uniform_array_declarations(effect->uniforms_vec2_array, "vec2", effect_id, &phase->uniforms_vec2, &frag_shader_uniforms);
-               extract_uniform_array_declarations(effect->uniforms_vec4_array, "vec4", effect_id, &phase->uniforms_vec4, &frag_shader_uniforms);
-               extract_uniform_declarations(effect->uniforms_mat3, "mat3", effect_id, &phase->uniforms_mat3, &frag_shader_uniforms);
+               extract_uniform_declarations(effect->uniforms_bool, "bool", effect_id, in_ubo_block, &phase->uniforms_bool, &frag_shader_uniforms);
+               extract_uniform_declarations(effect->uniforms_int, "int", effect_id, in_ubo_block, &phase->uniforms_int, &frag_shader_uniforms);
+               extract_uniform_declarations(effect->uniforms_float, "float", effect_id, in_ubo_block, &phase->uniforms_float, &frag_shader_uniforms);
+               extract_uniform_declarations(effect->uniforms_vec2, "vec2", effect_id, in_ubo_block, &phase->uniforms_vec2, &frag_shader_uniforms);
+               extract_uniform_declarations(effect->uniforms_vec3, "vec3", effect_id, in_ubo_block, &phase->uniforms_vec3, &frag_shader_uniforms);
+               extract_uniform_declarations(effect->uniforms_vec4, "vec4", effect_id, in_ubo_block, &phase->uniforms_vec4, &frag_shader_uniforms);
+               extract_uniform_array_declarations(effect->uniforms_float_array, "float", effect_id, in_ubo_block, &phase->uniforms_float, &frag_shader_uniforms);
+               extract_uniform_array_declarations(effect->uniforms_vec2_array, "vec2", effect_id, in_ubo_block, &phase->uniforms_vec2, &frag_shader_uniforms);
+               extract_uniform_array_declarations(effect->uniforms_vec3_array, "vec3", effect_id, in_ubo_block, &phase->uniforms_vec3, &frag_shader_uniforms);
+               extract_uniform_array_declarations(effect->uniforms_vec4_array, "vec4", effect_id, in_ubo_block, &phase->uniforms_vec4, &frag_shader_uniforms);
+               extract_uniform_declarations(effect->uniforms_mat3, "mat3", effect_id, in_ubo_block, &phase->uniforms_mat3, &frag_shader_uniforms);
+       }
+       if (!frag_shader_uniforms.empty()) {
+               frag_shader_uniforms = "layout(packed) uniform MovitUniforms {\n" + frag_shader_uniforms + "};\n";
+       }
+
+       // Samplers must be outside the UBO block.
+       for (unsigned i = 0; i < phase->effects.size(); ++i) {
+               Node *node = phase->effects[i];
+               Effect *effect = node->effect;
+               const string effect_id = phase->effect_ids[node];
+               extract_uniform_declarations(effect->uniforms_sampler2d, "sampler2D", effect_id, /*in_ubo_block=*/false, &phase->uniforms_sampler2d, &frag_shader_uniforms);
        }
 
        frag_shader = frag_shader_header + frag_shader_uniforms + frag_shader;
@@ -430,7 +475,43 @@ void EffectChain::compile_glsl_program(Phase *phase)
                vert_shader[pos + needle.size() - 1] = '1';
        }
 
-       phase->glsl_program_num = resource_pool->compile_glsl_program(vert_shader, frag_shader);
+       phase->glsl_program_num = resource_pool->compile_glsl_program(vert_shader, frag_shader, frag_shader_outputs);
+       GLint position_attribute_index = glGetAttribLocation(phase->glsl_program_num, "position");
+       GLint texcoord_attribute_index = glGetAttribLocation(phase->glsl_program_num, "texcoord");
+       if (position_attribute_index != -1) {
+               phase->attribute_indexes.insert(position_attribute_index);
+       }
+       if (texcoord_attribute_index != -1) {
+               phase->attribute_indexes.insert(texcoord_attribute_index);
+       }
+
+       // Create an UBO for holding the uniforms. This UBO will be updated each frame.
+       // TODO: Delete the block on destruction.
+       phase->uniform_block_index = glGetUniformBlockIndex(phase->glsl_program_num, "MovitUniforms");
+       if (phase->uniform_block_index != GL_INVALID_INDEX) {
+               glGenBuffers(1, &phase->ubo);
+               check_error();
+               GLsizei block_size;
+               glGetActiveUniformBlockiv(
+                       phase->glsl_program_num, phase->uniform_block_index,
+                       GL_UNIFORM_BLOCK_DATA_SIZE, &block_size);
+               check_error();
+               phase->ubo_data.resize(block_size);
+
+               glBindBuffer(GL_UNIFORM_BUFFER, phase->ubo);
+               check_error();
+               glBufferData(GL_UNIFORM_BUFFER, block_size, NULL, GL_DYNAMIC_DRAW);
+               check_error();
+
+               // Associate the uniform block with binding point 0,
+               // and attach the UBO to that binding point.
+               glUniformBlockBinding(phase->glsl_program_num, phase->uniform_block_index, 0);
+               check_error();
+               glBindBufferBase(GL_UNIFORM_BUFFER, 0, phase->ubo);
+               check_error();
+       } else {
+               phase->ubo = GL_INVALID_INDEX;
+       }
 
        // Collect the resulting location numbers for each uniform.
        collect_uniform_locations(phase->glsl_program_num, &phase->uniforms_sampler2d);
@@ -441,6 +522,9 @@ void EffectChain::compile_glsl_program(Phase *phase)
        collect_uniform_locations(phase->glsl_program_num, &phase->uniforms_vec3);
        collect_uniform_locations(phase->glsl_program_num, &phase->uniforms_vec4);
        collect_uniform_locations(phase->glsl_program_num, &phase->uniforms_mat3);
+
+       glBindBuffer(GL_UNIFORM_BUFFER, 0);
+       check_error();
 }
 
 // Construct GLSL programs, starting at the given effect and following
@@ -500,7 +584,8 @@ Phase *EffectChain::construct_phase(Node *output, map<Node *, Phase *> *complete
                        bool start_new_phase = false;
 
                        if (node->effect->needs_texture_bounce() &&
-                           !deps[i]->effect->is_single_texture()) {
+                           !deps[i]->effect->is_single_texture() &&
+                           !deps[i]->effect->override_disable_bounce()) {
                                start_new_phase = true;
                        }
 
@@ -571,9 +656,17 @@ Phase *EffectChain::construct_phase(Node *output, map<Node *, Phase *> *complete
        // and create a GLSL program for it.
        assert(!phase->effects.empty());
 
-       // Deduplicate the inputs.
-       sort(phase->inputs.begin(), phase->inputs.end());
-       phase->inputs.erase(unique(phase->inputs.begin(), phase->inputs.end()), phase->inputs.end());
+       // Deduplicate the inputs, but don't change the ordering e.g. by sorting;
+       // that would be nondeterministic and thus reduce cacheability.
+       // TODO: Make this even more deterministic.
+       vector<Phase *> dedup_inputs;
+       set<Phase *> seen_inputs;
+       for (size_t i = 0; i < phase->inputs.size(); ++i) {
+               if (seen_inputs.insert(phase->inputs[i]).second) {
+                       dedup_inputs.push_back(phase->inputs[i]);
+               }
+       }
+       swap(phase->inputs, dedup_inputs);
 
        // Allocate samplers for each input.
        phase->input_samplers.resize(phase->inputs.size());
@@ -608,9 +701,8 @@ Phase *EffectChain::construct_phase(Node *output, map<Node *, Phase *> *complete
        // Actually make the shader for this phase.
        compile_glsl_program(phase);
 
-       // Initialize timer objects.
+       // Initialize timers.
        if (movit_timer_queries_supported) {
-               glGenQueries(1, &phase->timer_query_object);
                phase->time_elapsed_ns = 0;
                phase->num_measured_iterations = 0;
        }
@@ -1646,6 +1738,14 @@ void EffectChain::render_to_fbo(GLuint dest_fbo, unsigned width, unsigned height
 {
        assert(finalized);
 
+       // This needs to be set anew, in case we are coming from a different context
+       // from when we initialized.
+       check_error();
+       glDisable(GL_DITHER);
+       check_error();
+       glEnable(GL_FRAMEBUFFER_SRGB);
+       check_error();
+
        // Save original viewport.
        GLuint x = 0, y = 0;
 
@@ -1659,6 +1759,7 @@ void EffectChain::render_to_fbo(GLuint dest_fbo, unsigned width, unsigned height
        }
 
        // Basic state.
+       check_error();
        glDisable(GL_BLEND);
        check_error();
        glDisable(GL_DEPTH_TEST);
@@ -1666,22 +1767,16 @@ void EffectChain::render_to_fbo(GLuint dest_fbo, unsigned width, unsigned height
        glDepthMask(GL_FALSE);
        check_error();
 
-       // Generate a VAO. All the phases should have exactly the same vertex attributes,
-       // so it's safe to reuse this.
-       float vertices[] = {
-               0.0f, 2.0f,
-               0.0f, 0.0f,
-               2.0f, 0.0f
-       };
-
+       // Generate a VAO that will be used during the entire execution,
+       // and bind the VBO, since it contains all the data.
        GLuint vao;
        glGenVertexArrays(1, &vao);
        check_error();
        glBindVertexArray(vao);
        check_error();
-
-       GLuint position_vbo = fill_vertex_attribute(phases[0]->glsl_program_num, "position", 2, GL_FLOAT, sizeof(vertices), vertices);
-       GLuint texcoord_vbo = fill_vertex_attribute(phases[0]->glsl_program_num, "texcoord", 2, GL_FLOAT, sizeof(vertices), vertices);  // Same as vertices.
+       glBindBuffer(GL_ARRAY_BUFFER, vbo);
+       check_error();
+       set<GLint> bound_attribute_indices;
 
        set<Phase *> generated_mipmaps;
 
@@ -1693,7 +1788,15 @@ void EffectChain::render_to_fbo(GLuint dest_fbo, unsigned width, unsigned height
                Phase *phase = phases[phase_num];
 
                if (do_phase_timing) {
-                       glBeginQuery(GL_TIME_ELAPSED, phase->timer_query_object);
+                       GLuint timer_query_object;
+                       if (phase->timer_query_objects_free.empty()) {
+                               glGenQueries(1, &timer_query_object);
+                       } else {
+                               timer_query_object = phase->timer_query_objects_free.front();
+                               phase->timer_query_objects_free.pop_front();
+                       }
+                       glBeginQuery(GL_TIME_ELAPSED, timer_query_object);
+                       phase->timer_query_objects_running.push_back(timer_query_object);
                }
                if (phase_num == phases.size() - 1) {
                        // Last phase goes to the output the user specified.
@@ -1707,7 +1810,7 @@ void EffectChain::render_to_fbo(GLuint dest_fbo, unsigned width, unsigned height
                                CHECK(dither_effect->set_int("output_height", height));
                        }
                }
-               execute_phase(phase, phase_num == phases.size() - 1, &output_textures, &generated_mipmaps);
+               execute_phase(phase, phase_num == phases.size() - 1, &bound_attribute_indices, &output_textures, &generated_mipmaps);
                if (do_phase_timing) {
                        glEndQuery(GL_TIME_ELAPSED);
                }
@@ -1724,9 +1827,10 @@ void EffectChain::render_to_fbo(GLuint dest_fbo, unsigned width, unsigned height
        glUseProgram(0);
        check_error();
 
-       cleanup_vertex_attribute(phases[0]->glsl_program_num, "position", position_vbo);
-       cleanup_vertex_attribute(phases[0]->glsl_program_num, "texcoord", texcoord_vbo);
-
+       glBindBuffer(GL_ARRAY_BUFFER, 0);
+       check_error();
+       glBindVertexArray(0);
+       check_error();
        glDeleteVertexArrays(1, &vao);
        check_error();
 
@@ -1734,14 +1838,22 @@ void EffectChain::render_to_fbo(GLuint dest_fbo, unsigned width, unsigned height
                // Get back the timer queries.
                for (unsigned phase_num = 0; phase_num < phases.size(); ++phase_num) {
                        Phase *phase = phases[phase_num];
-                       GLint available = 0;
-                       while (!available) {
-                               glGetQueryObjectiv(phase->timer_query_object, GL_QUERY_RESULT_AVAILABLE, &available);
+                       for (std::list<GLuint>::iterator timer_it = phase->timer_query_objects_running.begin();
+                            timer_it != phase->timer_query_objects_running.end(); ) {
+                               GLint timer_query_object = *timer_it;
+                               GLint available;
+                               glGetQueryObjectiv(timer_query_object, GL_QUERY_RESULT_AVAILABLE, &available);
+                               if (available) {
+                                       GLuint64 time_elapsed;
+                                       glGetQueryObjectui64v(timer_query_object, GL_QUERY_RESULT, &time_elapsed);
+                                       phase->time_elapsed_ns += time_elapsed;
+                                       ++phase->num_measured_iterations;
+                                       phase->timer_query_objects_free.push_back(timer_query_object);
+                                       phase->timer_query_objects_running.erase(timer_it++);
+                               } else {
+                                       ++timer_it;
+                               }
                        }
-                       GLuint64 time_elapsed;
-                       glGetQueryObjectui64v(phase->timer_query_object, GL_QUERY_RESULT, &time_elapsed);
-                       phase->time_elapsed_ns += time_elapsed;
-                       ++phase->num_measured_iterations;
                }
        }
 }
@@ -1782,7 +1894,10 @@ void EffectChain::print_phase_timing()
        printf("Total:   %5.1f ms\n", total_time_ms);
 }
 
-void EffectChain::execute_phase(Phase *phase, bool last_phase, map<Phase *, GLuint> *output_textures, set<Phase *> *generated_mipmaps)
+void EffectChain::execute_phase(Phase *phase, bool last_phase,
+                                set<GLint> *bound_attribute_indices,
+                                map<Phase *, GLuint> *output_textures,
+                                set<Phase *> *generated_mipmaps)
 {
        GLuint fbo = 0;
 
@@ -1791,13 +1906,11 @@ void EffectChain::execute_phase(Phase *phase, bool last_phase, map<Phase *, GLui
        if (!last_phase) {
                find_output_size(phase);
 
-               GLuint tex_num = resource_pool->create_2d_texture(GL_RGBA16F, phase->output_width, phase->output_height);
+               GLuint tex_num = resource_pool->create_2d_texture(intermediate_format, phase->output_width, phase->output_height);
                output_textures->insert(make_pair(phase, tex_num));
        }
 
-       const GLuint glsl_program_num = phase->glsl_program_num;
-       check_error();
-       glUseProgram(glsl_program_num);
+       glUseProgram(phase->glsl_program_num);
        check_error();
 
        // Set up RTT inputs for this phase.
@@ -1828,7 +1941,7 @@ void EffectChain::execute_phase(Phase *phase, bool last_phase, map<Phase *, GLui
        for (unsigned i = 0; i < phase->effects.size(); ++i) {
                Node *node = phase->effects[i];
                unsigned old_sampler_num = sampler_num;
-               node->effect->set_gl_state(glsl_program_num, phase->effect_ids[node], &sampler_num);
+               node->effect->set_gl_state(phase->glsl_program_num, phase->effect_ids[node], &sampler_num);
                check_error();
 
                if (node->effect->is_single_texture()) {
@@ -1843,12 +1956,34 @@ void EffectChain::execute_phase(Phase *phase, bool last_phase, map<Phase *, GLui
        // from there.
        setup_uniforms(phase);
 
-       glDrawArrays(GL_TRIANGLES, 0, 3);
-       check_error();
+       // Clean up old attributes if they are no longer needed.
+       for (set<GLint>::iterator attr_it = bound_attribute_indices->begin();
+            attr_it != bound_attribute_indices->end(); ) {
+               if (phase->attribute_indexes.count(*attr_it) == 0) {
+                       glDisableVertexAttribArray(*attr_it);
+                       check_error();
+                       bound_attribute_indices->erase(attr_it++);
+               } else {
+                       ++attr_it;
+               }
+       }
 
-       glUseProgram(0);
-       check_error();
+       // Set up the new attributes, if needed.
+       for (set<GLint>::iterator attr_it = phase->attribute_indexes.begin();
+            attr_it != phase->attribute_indexes.end();
+            ++attr_it) {
+               if (bound_attribute_indices->count(*attr_it) == 0) {
+                       glEnableVertexAttribArray(*attr_it);
+                       check_error();
+                       glVertexAttribPointer(*attr_it, 2, GL_FLOAT, GL_FALSE, 0, BUFFER_OFFSET(0));
+                       check_error();
+                       bound_attribute_indices->insert(*attr_it);
+               }
+       }
 
+       glDrawArrays(GL_TRIANGLES, 0, 3);
+       check_error();
+       
        for (unsigned i = 0; i < phase->effects.size(); ++i) {
                Node *node = phase->effects[i];
                node->effect->clear_gl_state();
@@ -1861,54 +1996,75 @@ void EffectChain::execute_phase(Phase *phase, bool last_phase, map<Phase *, GLui
 
 void EffectChain::setup_uniforms(Phase *phase)
 {
-       // TODO: Use UBO blocks.
+       char *ubo_data = phase->ubo_data.empty() ? NULL : &phase->ubo_data[0];
+
        for (size_t i = 0; i < phase->uniforms_sampler2d.size(); ++i) {
                const Uniform<int> &uniform = phase->uniforms_sampler2d[i];
-               if (uniform.location != -1) {
+               if (uniform.location != GL_INVALID_INDEX) {
                        glUniform1iv(uniform.location, uniform.num_values, uniform.value);
                }
+               assert(uniform.ubo_offset == -1);  // Samplers don't go into UBOs.
        }
        for (size_t i = 0; i < phase->uniforms_bool.size(); ++i) {
                const Uniform<bool> &uniform = phase->uniforms_bool[i];
                assert(uniform.num_values == 1);
-               if (uniform.location != -1) {
+               if (uniform.location != GL_INVALID_INDEX) {
                        glUniform1i(uniform.location, *uniform.value);
                }
+               if (uniform.ubo_offset != -1) {
+                       GLint int_val = *uniform.value;
+                       memcpy(ubo_data + uniform.ubo_offset, &int_val, sizeof(int_val));
+               }
        }
        for (size_t i = 0; i < phase->uniforms_int.size(); ++i) {
                const Uniform<int> &uniform = phase->uniforms_int[i];
-               if (uniform.location != -1) {
+               if (uniform.location != GL_INVALID_INDEX) {
                        glUniform1iv(uniform.location, uniform.num_values, uniform.value);
                }
+               if (uniform.ubo_offset != -1) {
+                       memcpy(ubo_data + uniform.ubo_offset, uniform.value, uniform.ubo_num_elem * sizeof(*uniform.value));
+               }
        }
        for (size_t i = 0; i < phase->uniforms_float.size(); ++i) {
                const Uniform<float> &uniform = phase->uniforms_float[i];
-               if (uniform.location != -1) {
+               if (uniform.location != GL_INVALID_INDEX) {
                        glUniform1fv(uniform.location, uniform.num_values, uniform.value);
                }
+               if (uniform.ubo_offset != -1) {
+                       memcpy(ubo_data + uniform.ubo_offset, uniform.value, uniform.ubo_num_elem * sizeof(*uniform.value));
+               }
        }
        for (size_t i = 0; i < phase->uniforms_vec2.size(); ++i) {
                const Uniform<float> &uniform = phase->uniforms_vec2[i];
-               if (uniform.location != -1) {
-                       glUniform2fv(uniform.location, uniform.num_values, uniform.value);
+               if (uniform.location != GL_INVALID_INDEX) {
+                       glUniform2fv(uniform.location, uniform.ubo_num_elem, uniform.value);
+               }
+               if (uniform.ubo_offset != -1) {
+                       memcpy(ubo_data + uniform.ubo_offset, uniform.value, uniform.ubo_num_elem * 2 * sizeof(*uniform.value));
                }
        }
        for (size_t i = 0; i < phase->uniforms_vec3.size(); ++i) {
                const Uniform<float> &uniform = phase->uniforms_vec3[i];
-               if (uniform.location != -1) {
-                       glUniform3fv(uniform.location, uniform.num_values, uniform.value);
+               if (uniform.location != GL_INVALID_INDEX) {
+                       glUniform3fv(uniform.location, uniform.ubo_num_elem, uniform.value);
+               }
+               if (uniform.ubo_offset != -1) {
+                       memcpy(ubo_data + uniform.ubo_offset, uniform.value, uniform.ubo_num_elem * 3 * sizeof(*uniform.value));
                }
        }
        for (size_t i = 0; i < phase->uniforms_vec4.size(); ++i) {
                const Uniform<float> &uniform = phase->uniforms_vec4[i];
-               if (uniform.location != -1) {
-                       glUniform4fv(uniform.location, uniform.num_values, uniform.value);
+               if (uniform.location != GL_INVALID_INDEX) {
+                       glUniform4fv(uniform.location, uniform.ubo_num_elem, uniform.value);
+               }
+               if (uniform.ubo_offset != -1) {
+                       memcpy(ubo_data + uniform.ubo_offset, uniform.value, uniform.ubo_num_elem * 4 * sizeof(*uniform.value));
                }
        }
        for (size_t i = 0; i < phase->uniforms_mat3.size(); ++i) {
                const Uniform<Matrix3d> &uniform = phase->uniforms_mat3[i];
-               assert(uniform.num_values == 1);
-               if (uniform.location != -1) {
+               assert(uniform.ubo_num_elem == 1);
+               if (uniform.location != GL_INVALID_INDEX) {
                        // Convert to float (GLSL has no double matrices).
                        float matrixf[9];
                        for (unsigned y = 0; y < 3; ++y) {
@@ -1918,6 +2074,16 @@ void EffectChain::setup_uniforms(Phase *phase)
                        }
                        glUniformMatrix3fv(uniform.location, 1, GL_FALSE, matrixf);
                }
+               if (uniform.ubo_offset != -1) {
+                       // TODO
+                       assert(false);
+               }
+       }
+
+       if (phase->ubo != GL_INVALID_INDEX) {
+               // TODO: Do we want to demand DSA for this?
+               glNamedBufferSubData(phase->ubo, 0, phase->ubo_data.size(), ubo_data);
+               return;
        }
 }