Merge branch 'epoxy'

author Steinar H. Gunderson <sgunderson@bigfoot.com>

Fri, 28 Mar 2014 20:15:05 +0000 (21:15 +0100)

committer Steinar H. Gunderson <sgunderson@bigfoot.com>

Fri, 28 Mar 2014 20:17:41 +0000 (21:17 +0100)
author Steinar H. Gunderson <sgunderson@bigfoot.com>
Fri, 28 Mar 2014 20:15:05 +0000 (21:15 +0100)
committer Steinar H. Gunderson <sgunderson@bigfoot.com>
Fri, 28 Mar 2014 20:17:41 +0000 (21:17 +0100)
diff --combined Makefile.in

index d028c082e3514b250dcf4532f3b5d044d9af29d3,50431c2e5c2a4e144c47a2026e58ed209eeaed5c..05a80cae3c6e3fedb42d6c39d622cc0475fd0101
--- 1/Makefile.in
--- 2/Makefile.in
+++ b/Makefile.in
@@@ -6,8 -6,8 +6,8 @@@ GTEST_DIR ?= /usr/src/gtes
   # strive towards having a rock-stable ABI, but at least the soversion will increase
   # whenever it breaks, so that you will not have silent failures, and distribution package
   # management can run its course.
- -movit_ltversion = 1:3:0
- -movit_version = 1.0.3
+ +movit_ltversion = 2:0:0
+ +movit_version = 1.1
   
   prefix = @prefix@
   exec_prefix = @exec_prefix@
@@@ -17,15 -17,19 +17,19 @@@ datarootdir = @datarootdir
   datadir = @datadir@
   top_builddir = @top_builddir@
   with_demo_app = @with_demo_app@
+ with_SDL2 = @with_SDL2@
   with_coverage = @with_coverage@
   
   CC=@CC@
   CXX=@CXX@
- CXXFLAGS=-Wall @CXXFLAGS@ -I$(GTEST_DIR)/include @Eigen3_CFLAGS@ @GLEW_CFLAGS@ @FFTW3_CFLAGS@
+ CXXFLAGS=-Wall @CXXFLAGS@ -I$(GTEST_DIR)/include @SDL2_CFLAGS@ @SDL_CFLAGS@ @Eigen3_CFLAGS@ @epoxy_CFLAGS@ @FFTW3_CFLAGS@
+ ifeq ($(with_SDL2),yes)
+ CXXFLAGS += -DHAVE_SDL2
+ endif
   LDFLAGS=@LDFLAGS@
- LDLIBS=@GLEW_LIBS@ @FFTW3_LIBS@ -lpthread
- TEST_LDLIBS=@GLEW_LIBS@ @SDL_LIBS@ -lpthread
- DEMO_LDLIBS=@SDL_image_LIBS@ -lrt -lpthread @libpng_LIBS@ @FFTW3_LIBS@
+ LDLIBS=@epoxy_LIBS@ @FFTW3_LIBS@ -lpthread
+ TEST_LDLIBS=@epoxy_LIBS@ @SDL2_LIBS@ @SDL_LIBS@ -lpthread
+ DEMO_LDLIBS=@SDL2_image_LIBS@ @SDL_image_LIBS@ -lrt -lpthread @libpng_LIBS@ @FFTW3_LIBS@
   SHELL=@SHELL@
   LIBTOOL=@LIBTOOL@ --tag=CXX
   RANLIB=ranlib
@@@ -159,12 -163,14 +163,14 @@@ HDRS = effect_chain.h effect_util.h eff
   HDRS += $(INPUTS:=.h)
   HDRS += $(EFFECTS:=.h)
   
- SHADERS = vs.vert header.frag footer.frag
+ SHADERS = vs.vert vs.130.vert vs.300es.vert
+ SHADERS += header.frag header.130.frag header.300es.frag
+ SHADERS += footer.frag footer.130.frag footer.300es.frag
+ SHADERS += texture1d.frag texture1d.130.frag texture1d.300es.frag
   SHADERS += $(INPUTS:=.frag)
   SHADERS += $(EFFECTS:=.frag)
   SHADERS += highlight_cutoff_effect.frag
   SHADERS += overlay_matte_effect.frag
- SHADERS += texture1d.frag
   
   # These purposefully do not exist.
   MISSING_SHADERS = diffusion_effect.frag glow_effect.frag unsharp_mask_effect.frag resize_effect.frag
diff --combined effect_chain.cpp

index c6f1e89af7b2cbef25919ff922f9bc6c1bc2258c,7bce60fa21c1e076c3082d69ff2e409cc342750a..0a01bd356e6066261910a60e4ee2af2813d428f5
--- 1/effect_chain.cpp
--- 2/effect_chain.cpp
+++ b/effect_chain.cpp
@@@ -1,6 -1,6 +1,6 @@@
   #define GL_GLEXT_PROTOTYPES 1
   
- #include <GL/glew.h>
+ #include <epoxy/gl.h>
   #include <assert.h>
   #include <locale.h>
   #include <math.h>
@@@ -53,6 -53,15 +53,6 @@@ EffectChain::~EffectChain(
                 delete nodes[i];
         }
         for (unsigned i = 0; i < phases.size(); ++i) {
- -              glBindVertexArray(phases[i]->vao);
- -              check_error();
- -
- -              cleanup_vertex_attribute(phases[i]->glsl_program_num, "position", phases[i]->position_vbo);
- -              cleanup_vertex_attribute(phases[i]->glsl_program_num, "texcoord", phases[i]->texcoord_vbo);
- -
- -              glBindVertexArray(0);
- -              check_error();
- -
                 resource_pool->release_glsl_program(phases[i]->glsl_program_num);
                 delete phases[i];
         }
@@@ -229,7 -238,7 +229,7 @@@ string replace_prefix(const string &tex
   
   void EffectChain::compile_glsl_program(Phase *phase)
   {
-       string frag_shader = read_file("header.frag");
+       string frag_shader = read_version_dependent_file("header", "frag");
   
         // Create functions for all the texture inputs that we need.
         for (unsigned i = 0; i < phase->inputs.size(); ++i) {
@@@ -240,7 -249,7 +240,7 @@@
         
                 frag_shader += string("uniform sampler2D tex_") + effect_id + ";\n";
                 frag_shader += string("vec4 ") + effect_id + "(vec2 tc) {\n";
-               frag_shader += "\treturn texture2D(tex_" + string(effect_id) + ", tc);\n";
+               frag_shader += "\treturn tex2D(tex_" + string(effect_id) + ", tc);\n";
                 frag_shader += "}\n";
                 frag_shader += "\n";
         }
@@@ -279,9 -288,31 +279,10 @@@
                 frag_shader += "\n";
         }
         frag_shader += string("#define INPUT ") + phase->effect_ids[phase->effects.back()] + "\n";
-       frag_shader.append(read_file("footer.frag"));
+       frag_shader.append(read_version_dependent_file("footer", "frag"));
   
-       phase->glsl_program_num = resource_pool->compile_glsl_program(read_file("vs.vert"), frag_shader);
+       string vert_shader = read_version_dependent_file("vs", "vert");
+       phase->glsl_program_num = resource_pool->compile_glsl_program(vert_shader, frag_shader);
- -
- -      // Prepare the geometry for the fullscreen quad used in this phase.
- -      // (We have separate VAOs per shader, since the bindings can in theory
- -      // be different.)
- -      float vertices[] = {
- -              0.0f, 1.0f,
- -              0.0f, 0.0f,
- -              1.0f, 1.0f,
- -              1.0f, 0.0f
- -      };
- -
- -      glGenVertexArrays(1, &phase->vao);
- -      check_error();
- -      glBindVertexArray(phase->vao);
- -      check_error();
- -
- -      phase->position_vbo = fill_vertex_attribute(phase->glsl_program_num, "position", 2, GL_FLOAT, sizeof(vertices), vertices);
- -      phase->texcoord_vbo = fill_vertex_attribute(phase->glsl_program_num, "texcoord", 2, GL_FLOAT, sizeof(vertices), vertices);  // Same as vertices.
- -
- -      glBindVertexArray(0);
- -      check_error();
   }
   
   // Construct GLSL programs, starting at the given effect and following
@@@ -1454,6 -1485,8 +1455,6 @@@ void EffectChain::render_to_fbo(GLuint 
   
         glBindFramebuffer(GL_FRAMEBUFFER, 0);
         check_error();
- -      glBindVertexArray(0);
- -      check_error();
         glUseProgram(0);
         check_error();
   }
@@@ -1493,7 -1526,8 +1494,7 @@@ void EffectChain::execute_phase(Phase *
   
         // And now the output. (Already set up for us if it is the last phase.)
         if (!last_phase) {
- -              void *context = get_gl_context_identifier();
- -              fbo = resource_pool->create_fbo(context, (*output_textures)[phase]);
+ +              fbo = resource_pool->create_fbo((*output_textures)[phase]);
                 glBindFramebuffer(GL_FRAMEBUFFER, fbo);
                 glViewport(0, 0, phase->output_width, phase->output_height);
         }
@@@ -1514,32 -1548,11 +1515,32 @@@
                 }
         }
   
- -      glBindVertexArray(phase->vao);
+ +      // Now draw!
+ +      float vertices[] = {
+ +              0.0f, 1.0f,
+ +              0.0f, 0.0f,
+ +              1.0f, 1.0f,
+ +              1.0f, 0.0f
+ +      };
+ +
+ +      GLuint vao;
+ +      glGenVertexArrays(1, &vao);
+ +      check_error();
+ +      glBindVertexArray(vao);
         check_error();
+ +
+ +      GLuint position_vbo = fill_vertex_attribute(glsl_program_num, "position", 2, GL_FLOAT, sizeof(vertices), vertices);
+ +      GLuint texcoord_vbo = fill_vertex_attribute(glsl_program_num, "texcoord", 2, GL_FLOAT, sizeof(vertices), vertices);  // Same as vertices.
+ +
         glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
         check_error();
   
+ +      cleanup_vertex_attribute(glsl_program_num, "position", position_vbo);
+ +      cleanup_vertex_attribute(glsl_program_num, "texcoord", texcoord_vbo);
+ +      
+ +      glUseProgram(0);
+ +      check_error();
+ +
         for (unsigned i = 0; i < phase->effects.size(); ++i) {
                 Node *node = phase->effects[i];
                 node->effect->clear_gl_state();
@@@ -1548,9 -1561,6 +1549,9 @@@
         if (!last_phase) {
                 resource_pool->release_fbo(fbo);
         }
+ +
+ +      glDeleteVertexArrays(1, &vao);
+ +      check_error();
   }
   
   void EffectChain::setup_rtt_sampler(GLuint glsl_program_num, int sampler_num, const string &effect_id, bool use_mipmaps)
diff --combined effect_chain.h

index 2b959bda68bfc2f5502f743c9ccb9435e79f9794,e7d99d54821928c1248f6456c461ba1f63525eed..593232cf01e036f47728ae1fd3d2c756bf6d3d85
--- 1/effect_chain.h
--- 2/effect_chain.h
+++ b/effect_chain.h
@@@ -16,12 -16,8 +16,12 @@@
   // but if so, the threads' contexts need to be set up to share resources, since
   // the EffectChain holds textures and other OpenGL objects that are tied to the
   // context.
+ +//
+ +// Memory management (only relevant if you use multiple contexts):
+ +// See corresponding comment in resource_pool.h. This holds even if you don't
+ +// allocate your own ResourcePool, but let EffectChain hold its own.
   
- #include <GL/glew.h>
+ #include <epoxy/gl.h>
   #include <stdio.h>
   #include <map>
   #include <set>
@@@ -103,6 -99,13 +103,6 @@@ struct Phase 
         // Identifier used to create unique variables in GLSL.
         // Unique per-phase to increase cacheability of compiled shaders.
         std::map<Node *, std::string> effect_ids;
- -
- -      // The geometry needed to draw this quad, bound to the vertex array
- -      // object. (Seemingly it's actually a win not to upload geometry every
- -      // frame, even for something as small as a quad, due to fewer state
- -      // changes.)
- -      GLuint vao;
- -      GLuint position_vbo, texcoord_vbo;
   };
   
   class EffectChain {
diff --combined fft_pass_effect.cpp

index b46e5e589fc9bb4f548dafc234745c801506beb4,ee0b983ae92002875d5b8e86538e21b09846991b..48e2677576735c5577f3989705cee61a55f4adc4
--- 1/fft_pass_effect.cpp
--- 2/fft_pass_effect.cpp
+++ b/fft_pass_effect.cpp
@@@ -1,4 -1,4 +1,4 @@@
- #include <GL/glew.h>
+ #include <epoxy/gl.h>
   #include <math.h>
   
   #include "effect_chain.h"
@@@ -14,12 -14,7 +14,12 @@@ namespace movit 
   FFTPassEffect::FFTPassEffect()
         : input_width(1280),
           input_height(720),
- -        direction(HORIZONTAL)
+ +        direction(HORIZONTAL),
+ +        last_fft_size(-1),
+ +        last_direction(INVALID),
+ +        last_pass_number(-1),
+ +        last_inverse(-1),
+ +        last_input_size(-1)
   {
         register_int("fft_size", &fft_size);
         register_int("direction", (int *)&direction);
@@@ -44,6 -39,8 +44,6 @@@ void FFTPassEffect::set_gl_state(GLuin
   {
         Effect::set_gl_state(glsl_program_num, prefix, sampler_num);
   
- -      int input_size = (direction == VERTICAL) ? input_height : input_width;
- -
         // This is needed because it counteracts the precision issues we get
         // because we sample the input texture with normalized coordinates
         // (especially when the repeat count along the axis is not a power of
@@@ -57,43 -54,6 +57,43 @@@
         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
         check_error();
   
+ +      // Because of the memory layout (see below) and because we use offsets,
+ +      // the support texture values for many consecutive values will be
+ +      // the same. Thus, we can store a smaller texture (giving a small
+ +      // performance boost) and just sample it with NEAREST. Also, this
+ +      // counteracts any precision issues we might get from linear
+ +      // interpolation.
+ +      glActiveTexture(GL_TEXTURE0 + *sampler_num);
+ +      check_error();
+ +      glBindTexture(GL_TEXTURE_2D, tex);
+ +      check_error();
+ +      glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
+ +      check_error();
+ +      glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
+ +      check_error();
+ +      glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT);
+ +      check_error();
+ +
+ +      int input_size = (direction == VERTICAL) ? input_height : input_width;
+ +      if (last_fft_size != fft_size ||
+ +          last_direction != direction ||
+ +          last_pass_number != pass_number ||
+ +          last_inverse != inverse ||
+ +          last_input_size != input_size) {
+ +              generate_support_texture();
+ +      }
+ +
+ +      set_uniform_int(glsl_program_num, prefix, "support_tex", *sampler_num);
+ +      ++*sampler_num;
+ +
+ +      assert(input_size % fft_size == 0);
+ +      set_uniform_float(glsl_program_num, prefix, "num_repeats", input_size / fft_size);
+ +}
+ +
+ +void FFTPassEffect::generate_support_texture()
+ +{
+ +      int input_size = (direction == VERTICAL) ? input_height : input_width;
+ +
         // The memory layout follows figure 5.2 on page 25 of
         // http://gpuwave.sesse.net/gpuwave.pdf -- it can be a bit confusing
         // at first, but is classically explained more or less as follows:
@@@ -119,8 -79,8 +119,8 @@@
         // bit, so the stride is 8, and so on.
   
         assert((fft_size & (fft_size - 1)) == 0);  // Must be power of two.
- -      fp16_int_t *tmp = new fp16_int_t[fft_size * 4];
         int subfft_size = 1 << pass_number;
+ +      fp16_int_t *tmp = new fp16_int_t[subfft_size * 4];
         double mulfac;
         if (inverse) {
                 mulfac = 2.0 * M_PI;
@@@ -131,8 -91,9 +131,8 @@@
         assert((fft_size & (fft_size - 1)) == 0);  // Must be power of two.
         assert(fft_size % subfft_size == 0);
         int stride = fft_size / subfft_size;
- -      for (int i = 0; i < fft_size; ++i) {
- -              int k = i / stride;         // Element number within this sub-FFT.
- -              int offset = i % stride;    // Sub-FFT number.
+ +      for (int i = 0; i < subfft_size; i++) {
+ +              int k = i;
                 double twiddle_real, twiddle_imag;
   
                 if (k < subfft_size / 2) {
@@@ -155,22 -116,33 +155,22 @@@
                 // for using offsets and not direct coordinates as in GPUwave
                 // is that we can have multiple FFTs along the same line,
                 // and want to reuse the support texture by repeating it.
- -              int base = k * stride * 2 + offset;
+ +              int base = k * stride * 2;
                 int support_texture_index = i;
                 int src1 = base;
                 int src2 = base + stride;
+ +              double sign = 1.0;
                 if (direction == FFTPassEffect::VERTICAL) {
                         // Compensate for OpenGL's bottom-left convention.
- -                      support_texture_index = fft_size - support_texture_index - 1;
- -                      src1 = fft_size - src1 - 1;
- -                      src2 = fft_size - src2 - 1;
+ +                      support_texture_index = subfft_size - support_texture_index - 1;
+ +                      sign = -1.0;
                 }
- -              tmp[support_texture_index * 4 + 0] = fp64_to_fp16((src1 - support_texture_index) / double(input_size));
- -              tmp[support_texture_index * 4 + 1] = fp64_to_fp16((src2 - support_texture_index) / double(input_size));
+ +              tmp[support_texture_index * 4 + 0] = fp64_to_fp16(sign * (src1 - i * stride) / double(input_size));
+ +              tmp[support_texture_index * 4 + 1] = fp64_to_fp16(sign * (src2 - i * stride) / double(input_size));
                 tmp[support_texture_index * 4 + 2] = fp64_to_fp16(twiddle_real);
                 tmp[support_texture_index * 4 + 3] = fp64_to_fp16(twiddle_imag);
         }
   
- -      glActiveTexture(GL_TEXTURE0 + *sampler_num);
- -      check_error();
- -      glBindTexture(GL_TEXTURE_2D, tex);
- -      check_error();
- -      glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
- -      check_error();
- -      glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
- -      check_error();
- -      glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT);
- -      check_error();
- -
         // Supposedly FFTs are very sensitive to inaccuracies in the twiddle factors,
         // at least according to a paper by Schatzman (see gpuwave.pdf reference [30]
         // for the full reference); however, practical testing indicates that it's
@@@ -180,16 -152,16 +180,16 @@@
         // which gives a nice speed boost.
         //
         // Note that the source coordinates become somewhat less accurate too, though.
- -      glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA16F, fft_size, 1, 0, GL_RGBA, GL_HALF_FLOAT, tmp);
+ +      glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA16F, subfft_size, 1, 0, GL_RGBA, GL_HALF_FLOAT, tmp);
         check_error();
   
         delete[] tmp;
   
- -      set_uniform_int(glsl_program_num, prefix, "support_tex", *sampler_num);
- -      ++*sampler_num;
- -
- -      assert(input_size % fft_size == 0);
- -      set_uniform_float(glsl_program_num, prefix, "num_repeats", input_size / fft_size);
+ +      last_fft_size = fft_size;
+ +      last_direction = direction;
+ +      last_pass_number = pass_number;
+ +      last_inverse = inverse;
+ +      last_input_size = input_size;
   }
   
   }  // namespace movit
diff --combined fft_pass_effect.frag

index fcffba786facade26a2ad2983126675528107708,607f86ab0149dab1d97e7aefeb77454b193e8fa9..eb26f33442ff8baed342d89cfcbbc7f217175b67
--- 1/fft_pass_effect.frag
--- 2/fft_pass_effect.frag
+++ b/fft_pass_effect.frag
@@@ -6,11 -6,11 +6,11 @@@ uniform sampler2D PREFIX(support_tex)
   
   vec4 FUNCNAME(vec2 tc) {
   #if DIRECTION_VERTICAL
-       vec4 support = texture2D(PREFIX(support_tex), vec2(tc.y * PREFIX(num_repeats), 0.0));
+       vec4 support = tex2D(PREFIX(support_tex), vec2(tc.y * PREFIX(num_repeats), 0.0));
           vec4 c1 = INPUT(vec2(tc.x, tc.y + support.x));
           vec4 c2 = INPUT(vec2(tc.x, tc.y + support.y));
   #else
-       vec4 support = texture2D(PREFIX(support_tex), vec2(tc.x * PREFIX(num_repeats), 0.0));
+       vec4 support = tex2D(PREFIX(support_tex), vec2(tc.x * PREFIX(num_repeats), 0.0));
           vec4 c1 = INPUT(vec2(tc.x + support.x, tc.y));
           vec4 c2 = INPUT(vec2(tc.x + support.y, tc.y));
   #endif
@@@ -22,5 -22,3 +22,5 @@@
         // where * is complex multiplication.
         return c1 + support.z * c2 + support.w * vec4(-c2.y, c2.x, -c2.w, c2.z);
   }
+ +
+ +#undef DIRECTION_VERTICAL
diff --combined fft_pass_effect.h

index 7689cf1e60688fb60d1c620baffb6ba1fac1dab9,90e88bc1fddd94650c1deae08011e9c2d0e5395e..fbf4511e222cd6a725a5ffa2e597ae5c5927f292
--- 1/fft_pass_effect.h
--- 2/fft_pass_effect.h
+++ b/fft_pass_effect.h
@@@ -50,7 -50,7 +50,7 @@@
   // scaling), and as fp16 has quite limited range at times, this can be relevant
   // on some GPUs for larger sizes.
   
- #include <GL/glew.h>
+ #include <epoxy/gl.h>
   #include <assert.h>
   #include <stdio.h>
   #include <string>
@@@ -101,25 -101,16 +101,25 @@@ public
   
         virtual void inform_added(EffectChain *chain) { this->chain = chain; }
         
- -      enum Direction { HORIZONTAL = 0, VERTICAL = 1 };
+ +      enum Direction { INVALID = -1, HORIZONTAL = 0, VERTICAL = 1 };
   
   private:
+ +      void generate_support_texture();
+ +
         EffectChain *chain;
         int input_width, input_height;
         GLuint tex;
+ +
         int fft_size;
         Direction direction;
         int pass_number;  // From 1..n.
         int inverse;  // 0 = forward (FFT), 1 = reverse (IFFT).
+ +
+ +      int last_fft_size;
+ +      Direction last_direction;
+ +      int last_pass_number;
+ +      int last_inverse;
+ +      int last_input_size;
   };
   
   }  // namespace movit
diff --combined fft_pass_effect_test.cpp

index 7eafbd9da5bc99b875efb82576b4a8af9fade1ea,015847ad72838c880eb107ac9b71559ddc494b14..bce91cec4809809b76a5483cc91422660b2e7f93
--- 1/fft_pass_effect_test.cpp
--- 2/fft_pass_effect_test.cpp
+++ b/fft_pass_effect_test.cpp
@@@ -3,11 -3,11 +3,11 @@@
   #include <math.h>
   #include <stdlib.h>
   #include <string.h>
+ #include <epoxy/gl.h>
+ #include <gtest/gtest.h>
   
   #include "effect_chain.h"
   #include "fft_pass_effect.h"
- #include "glew.h"
- #include "gtest/gtest.h"
   #include "image_format.h"
   #include "multiply_effect.h"
   #include "test_util.h"
@@@ -126,7 -126,7 +126,7 @@@ TEST(FFTPassEffectTest, SingleFrequency
   
   TEST(FFTPassEffectTest, Repeat) {
         srand(12345);
- -      for (int fft_size = 2; fft_size < 512; fft_size *= 2) {
+ +      for (int fft_size = 2; fft_size <= 128; fft_size *= 2) {
                 const int num_repeats = 31;  // Prime, to make things more challenging.
                 float data[num_repeats * fft_size * 4];
                 float expected_data[num_repeats * fft_size * 4], out_data[num_repeats * fft_size * 4];
diff --combined luma_mix_effect_test.cpp

index 07829cc2c025017ed89277cdaab860230bc4e97a,4a506f5aaf4942d631aba27c4b18d99f3089d16f..2b6de1572909a216434eb6e14ce5b76635733c85
--- 1/luma_mix_effect_test.cpp
--- 2/luma_mix_effect_test.cpp
+++ b/luma_mix_effect_test.cpp
@@@ -1,6 -1,6 +1,6 @@@
   // Unit tests for LumaMixEffect.
   
- #include <GL/glew.h>
+ #include <epoxy/gl.h>
   
   #include "effect_chain.h"
   #include "gtest/gtest.h"
@@@ -96,44 -96,4 +96,44 @@@ TEST(LumaMixEffectTest, SoftWipeHalfWay
         expect_equal(expected_data, out_data, 2, 2);
   }
   
+ +TEST(LumaMixEffectTest, Inverse) {
+ +      float data_a[] = {
+ +              0.0f, 0.25f,
+ +              0.75f, 1.0f,
+ +      };
+ +      float data_b[] = {
+ +              1.0f, 0.5f,
+ +              0.65f, 0.6f,
+ +      };
+ +      float data_luma[] = {
+ +              0.0f, 0.25f,
+ +              0.5f, 0.75f,
+ +      };
+ +
+ +      EffectChainTester tester(data_a, 2, 2, FORMAT_GRAYSCALE, COLORSPACE_sRGB, GAMMA_LINEAR);
+ +      Effect *input1 = tester.get_chain()->last_added_effect();
+ +      Effect *input2 = tester.add_input(data_b, FORMAT_GRAYSCALE, COLORSPACE_sRGB, GAMMA_LINEAR);
+ +      Effect *input3 = tester.add_input(data_luma, FORMAT_GRAYSCALE, COLORSPACE_sRGB, GAMMA_LINEAR);
+ +
+ +      Effect *luma_mix_effect = tester.get_chain()->add_effect(new LumaMixEffect(), input1, input2, input3);
+ +      ASSERT_TRUE(luma_mix_effect->set_float("transition_width", 100000.0f));
+ +      ASSERT_TRUE(luma_mix_effect->set_int("inverse", 1));
+ +
+ +      // Inverse is not the same as reverse, so progress=0 should behave identically
+ +      // as HardWipe, ie. everything should be from A.
+ +      float out_data[4];
+ +      ASSERT_TRUE(luma_mix_effect->set_float("progress", 0.0f));
+ +      tester.run(out_data, GL_RED, COLORSPACE_sRGB, GAMMA_LINEAR);
+ +      expect_equal(data_a, out_data, 2, 2);
+ +
+ +      // Lower two from A, the rest from B.
+ +      float expected_data_049[] = {
+ +              1.0f, 0.5f,
+ +              0.75f, 1.0f,
+ +      };
+ +      ASSERT_TRUE(luma_mix_effect->set_float("progress", 0.49f));
+ +      tester.run(out_data, GL_RED, COLORSPACE_sRGB, GAMMA_LINEAR);
+ +      expect_equal(expected_data_049, out_data, 2, 2);
+ +}
+ +
   }  // namespace movit
diff --combined resource_pool.cpp

index 19c26a093f989b5f18d21d626a0ed95c8bbbc7d4,2298c3c7342d5ee7b264c17800aedb311319e1a1..5abc1b28b81729671bd0e4758ae9803d53c096f9
--- 1/resource_pool.cpp
--- 2/resource_pool.cpp
+++ b/resource_pool.cpp
@@@ -6,8 -6,8 +6,8 @@@
   #include <map>
   #include <string>
   #include <utility>
+ #include <epoxy/gl.h>
   
- #include "glew.h"
   #include "init.h"
   #include "resource_pool.h"
   #include "util.h"
@@@ -52,29 -52,15 +52,29 @@@ ResourcePool::~ResourcePool(
         assert(texture_formats.empty());
         assert(texture_freelist_bytes == 0);
   
- -      for (list<GLuint>::const_iterator freelist_it = fbo_freelist.begin();
- -           freelist_it != fbo_freelist.end();
- -           ++freelist_it) {
- -              GLuint free_fbo_num = *freelist_it;
- -              assert(fbo_formats.count(free_fbo_num) != 0);
- -              fbo_formats.erase(free_fbo_num);
- -              glDeleteFramebuffers(1, &free_fbo_num);
- -              check_error();
+ +      void *context = get_gl_context_identifier();
+ +      cleanup_unlinked_fbos(context);
+ +
+ +      for (map<void *, std::list<GLuint> >::iterator context_it = fbo_freelist.begin();
+ +           context_it != fbo_freelist.end();
+ +           ++context_it) {
+ +              if (context_it->first != context) {
+ +                      // If this does not hold, the client should have called clean_context() earlier.
+ +                      assert(context_it->second.empty());
+ +                      continue;
+ +              }
+ +              for (list<GLuint>::const_iterator freelist_it = context_it->second.begin();
+ +                   freelist_it != context_it->second.end();
+ +                   ++freelist_it) {
+ +                      pair<void *, GLuint> key(context, *freelist_it);
+ +                      GLuint free_fbo_num = *freelist_it;
+ +                      assert(fbo_formats.count(key) != 0);
+ +                      fbo_formats.erase(key);
+ +                      glDeleteFramebuffers(1, &free_fbo_num);
+ +                      check_error();
+ +              }
         }
+ +
         assert(fbo_formats.empty());
   }
   
@@@ -214,16 -200,19 +214,19 @@@ GLuint ResourcePool::create_2d_texture(
         case GL_SRGB8_ALPHA8:
                 format = GL_RGBA;
                 break;
-       case GL_RGB32F_ARB:
-       case GL_RGB16F_ARB:
+       case GL_RGB32F:
+       case GL_RGB16F:
         case GL_RGB8:
         case GL_SRGB8:
                 format = GL_RGB;
                 break;
         case GL_RG32F:
         case GL_RG16F:
+       case GL_RG8:
                 format = GL_RG;
                 break;
+       case GL_R32F:
+       case GL_R16F:
         case GL_R8:
                 format = GL_RED;
                 break;
@@@ -232,12 -221,39 +235,39 @@@
                 assert(false);
         }
   
+       // Same with type; GLES is stricter than desktop OpenGL here.
+       GLenum type;
+       switch (internal_format) {
+       case GL_RGBA32F_ARB:
+       case GL_RGBA16F_ARB:
+       case GL_RGB32F:
+       case GL_RGB16F:
+       case GL_RG32F:
+       case GL_RG16F:
+       case GL_R32F:
+       case GL_R16F:
+               type = GL_FLOAT;
+               break;
+       case GL_SRGB8_ALPHA8:
+       case GL_SRGB8:
+       case GL_RGBA8:
+       case GL_RGB8:
+       case GL_RG8:
+       case GL_R8:
+               type = GL_UNSIGNED_BYTE;
+               break;
+       default:
+               // TODO: Add more here as needed.
+               assert(false);
+       }
+ 
+ 
         GLuint texture_num;
         glGenTextures(1, &texture_num);
         check_error();
         glBindTexture(GL_TEXTURE_2D, texture_num);
         check_error();
-       glTexImage2D(GL_TEXTURE_2D, 0, internal_format, width, height, 0, format, GL_UNSIGNED_BYTE, NULL);
+       glTexImage2D(GL_TEXTURE_2D, 0, internal_format, width, height, 0, format, type, NULL);
         check_error();
         glBindTexture(GL_TEXTURE_2D, 0);
         check_error();
@@@ -269,40 -285,38 +299,40 @@@ void ResourcePool::release_2d_texture(G
                 glDeleteTextures(1, &free_texture_num);
                 check_error();
   
- -              // Delete any FBO related to this texture.
- -              for (list<GLuint>::iterator fbo_freelist_it = fbo_freelist.begin();
- -                   fbo_freelist_it != fbo_freelist.end(); ) {
- -                      GLuint fbo_num = *fbo_freelist_it;
- -                      map<GLuint, FBO>::const_iterator format_it = fbo_formats.find(fbo_num);
- -                      assert(format_it != fbo_formats.end());
+ +              // Unlink any lingering FBO related to this texture. We might
+ +              // not be in the right context, so don't delete it right away;
+ +              // the cleanup in release_fbo() (which calls cleanup_unlinked_fbos())
+ +              // will take care of actually doing that later.
+ +              for (map<pair<void *, GLuint>, FBO>::iterator format_it = fbo_formats.begin();
+ +                   format_it != fbo_formats.end();
+ +                   ++format_it) {
                         if (format_it->second.texture_num == free_texture_num) {
- -                              glDeleteFramebuffers(1, &fbo_num);
- -                              fbo_freelist.erase(fbo_freelist_it++);
- -                      } else {
- -                              ++fbo_freelist_it;
+ +                              format_it->second.texture_num = 0;
                         }
                 }
         }
         pthread_mutex_unlock(&lock);
   }
   
- -GLuint ResourcePool::create_fbo(void *context, GLuint texture_num)
+ +GLuint ResourcePool::create_fbo(GLuint texture_num)
   {
+ +      void *context = get_gl_context_identifier();
+ +
         pthread_mutex_lock(&lock);
- -      // See if there's an FBO on the freelist we can use.
- -      for (list<GLuint>::iterator freelist_it = fbo_freelist.begin();
- -           freelist_it != fbo_freelist.end();
- -           ++freelist_it) {
- -              GLuint fbo_num = *freelist_it;
- -              map<GLuint, FBO>::const_iterator format_it = fbo_formats.find(fbo_num);
- -              assert(format_it != fbo_formats.end());
- -              if (format_it->second.context == context &&
- -                  format_it->second.texture_num == texture_num) {
- -                      fbo_freelist.erase(freelist_it);
- -                      pthread_mutex_unlock(&lock);
- -                      return fbo_num;
+ +      if (fbo_freelist.count(context) != 0) {
+ +              // See if there's an FBO on the freelist we can use.
+ +              for (list<GLuint>::iterator freelist_it = fbo_freelist[context].begin();
+ +                   freelist_it != fbo_freelist[context].end();
+ +                   ++freelist_it) {
+ +                      GLuint fbo_num = *freelist_it;
+ +                      map<pair<void *, GLuint>, FBO>::const_iterator format_it =
+ +                              fbo_formats.find(make_pair(context, fbo_num));
+ +                      assert(format_it != fbo_formats.end());
+ +                      if (format_it->second.texture_num == texture_num) {
+ +                              fbo_freelist[context].erase(freelist_it);
+ +                              pthread_mutex_unlock(&lock);
+ +                              return fbo_num;
+ +                      }
                 }
         }
   
@@@ -325,10 -339,10 +355,10 @@@
         check_error();
   
         FBO fbo_format;
- -      fbo_format.context = context;
         fbo_format.texture_num = texture_num;
- -      assert(fbo_formats.count(fbo_num) == 0);
- -      fbo_formats.insert(make_pair(fbo_num, fbo_format));
+ +      pair<void *, GLuint> key(context, fbo_num);
+ +      assert(fbo_formats.count(key) == 0);
+ +      fbo_formats.insert(make_pair(key, fbo_format));
   
         pthread_mutex_unlock(&lock);
         return fbo_num;
@@@ -336,58 -350,19 +366,58 @@@
   
   void ResourcePool::release_fbo(GLuint fbo_num)
   {
+ +      void *context = get_gl_context_identifier();
+ +
         pthread_mutex_lock(&lock);
- -      fbo_freelist.push_front(fbo_num);
- -      assert(fbo_formats.count(fbo_num) != 0);
- -
- -      while (fbo_freelist.size() > fbo_freelist_max_length) {
- -              GLuint free_fbo_num = fbo_freelist.front();
- -              fbo_freelist.pop_front();
- -              assert(fbo_formats.count(free_fbo_num) != 0);
- -              fbo_formats.erase(free_fbo_num);
+ +      fbo_freelist[context].push_front(fbo_num);
+ +      assert(fbo_formats.count(make_pair(context, fbo_num)) != 0);
+ +
+ +      // Now that we're in this context, free up any FBOs that are connected
+ +      // to deleted textures (in release_2d_texture).
+ +      cleanup_unlinked_fbos(context);
+ +
+ +      shrink_fbo_freelist(context, fbo_freelist_max_length);
+ +      pthread_mutex_unlock(&lock);
+ +}
+ +
+ +void ResourcePool::clean_context()
+ +{
+ +      void *context = get_gl_context_identifier();
+ +
+ +      // Currently, we only need to worry about FBOs, as they are the only
+ +      // non-shareable resource we hold.
+ +      shrink_fbo_freelist(context, 0);
+ +      fbo_freelist.erase(context);
+ +}
+ +
+ +void ResourcePool::cleanup_unlinked_fbos(void *context)
+ +{
+ +      for (list<GLuint>::iterator freelist_it = fbo_freelist[context].begin();
+ +           freelist_it != fbo_freelist[context].end(); ) {
+ +              GLuint fbo_num = *freelist_it;
+ +              pair<void *, GLuint> key(context, fbo_num);
+ +              assert(fbo_formats.count(key) != 0);
+ +              if (fbo_formats[key].texture_num == 0) {
+ +                      glDeleteFramebuffers(1, &fbo_num);
+ +                      check_error();
+ +                      fbo_freelist[context].erase(freelist_it++);
+ +              } else {
+ +                      freelist_it++;
+ +              }
+ +      }
+ +}
+ +
+ +void ResourcePool::shrink_fbo_freelist(void *context, size_t max_length)
+ +{
+ +      while (fbo_freelist[context].size() > max_length) {
+ +              GLuint free_fbo_num = fbo_freelist[context].back();
+ +              pair<void *, GLuint> key(context, free_fbo_num);
+ +              fbo_freelist[context].pop_back();
+ +              assert(fbo_formats.count(key) != 0);
+ +              fbo_formats.erase(key);
                 glDeleteFramebuffers(1, &free_fbo_num);
                 check_error();
         }
- -      pthread_mutex_unlock(&lock);
   }
   
   size_t ResourcePool::estimate_texture_size(const Texture2D &texture_format)
@@@ -401,16 -376,16 +431,16 @@@
         case GL_RGBA16F_ARB:
                 bytes_per_pixel = 8;
                 break;
-       case GL_RGBA8:
-       case GL_SRGB8_ALPHA8:
-               bytes_per_pixel = 4;
-               break;
         case GL_RGB32F_ARB:
                 bytes_per_pixel = 12;
                 break;
         case GL_RGB16F_ARB:
                 bytes_per_pixel = 6;
                 break;
+       case GL_RGBA8:
+       case GL_SRGB8_ALPHA8:
+               bytes_per_pixel = 4;
+               break;
         case GL_RGB8:
         case GL_SRGB8:
                 bytes_per_pixel = 3;
@@@ -421,6 -396,12 +451,12 @@@
         case GL_RG16F:
                 bytes_per_pixel = 4;
                 break;
+       case GL_R32F:
+               bytes_per_pixel = 4;
+               break;
+       case GL_R16F:
+               bytes_per_pixel = 2;
+               break;
         case GL_R8:
                 bytes_per_pixel = 1;
                 break;
diff --combined resource_pool.h

index aad95c959ad235dc7cdc504f82e066b717322279,7029fbfd5e795cbd405a099ddd0b1450ccf187c2..a331f4f1708c823a076216c0023a81cf099bd802
--- 1/resource_pool.h
--- 2/resource_pool.h
+++ b/resource_pool.h
@@@ -15,14 -15,8 +15,14 @@@
   // Thread-safety: All functions except the constructor and destructor can be
   // safely called from multiple threads at the same time, provided they have
   // separate (but sharing) OpenGL contexts.
+ +//
+ +// Memory management (only relevant if you use multiple contexts): Some objects,
+ +// like FBOs, are not shareable across contexts, and can only be deleted from
+ +// the context they were created in. Thus, you will need to tell the
+ +// ResourcePool explicitly if you delete a context, or they will leak (and the
+ +// ResourcePool destructor will assert-fail). See clean_context().
   
- #include <GL/glew.h>
+ #include <epoxy/gl.h>
   #include <pthread.h>
   #include <stddef.h>
   #include <list>
@@@ -47,7 -41,7 +47,7 @@@ public
         // twice this estimate or more.
         ResourcePool(size_t program_freelist_max_length = 100,
                      size_t texture_freelist_max_bytes = 100 << 20,  // 100 MB.
- -                   size_t fbo_freelist_max_length = 100);
+ +                   size_t fbo_freelist_max_length = 100);  // Per context.
         ~ResourcePool();
   
         // All remaining functions are intended for calls from EffectChain only.
@@@ -69,36 -63,21 +69,36 @@@
         // Allocate an FBO with the the given texture bound as a framebuffer attachment,
         // or fetch a previous used if possible. Unbinds GL_FRAMEBUFFER afterwards.
         // Keeps ownership of the FBO; you must call release_fbo() of deleting
- -      // it when you no longer want it. You can get an appropriate context
- -      // pointer from get_gl_context_identifier().
+ +      // it when you no longer want it.
         //
         // NOTE: In principle, the FBO doesn't have a resolution or pixel format;
         // you can bind almost whatever texture you want to it. However, changing
         // textures can have an adverse effect on performance due to validation,
         // in particular on NVidia cards. Also, keep in mind that FBOs are not
- -      // shareable across contexts.
- -      GLuint create_fbo(void *context, GLuint texture_num);
+ +      // shareable across contexts, so you must have the context that's supposed
+ +      // to own the FBO current when you create or release it.
+ +      GLuint create_fbo(GLuint texture_num);
         void release_fbo(GLuint fbo_num);
   
+ +      // Informs the ResourcePool that the current context is going away soon,
+ +      // and that any resources held for it in the freelist should be deleted.
+ +      //
+ +      // You do not need to do this for the last context; the regular destructor
+ +      // will take care of that. This means that if you only ever use one
+ +      // thread/context, you never need to call this function.
+ +      void clean_context();
+ +
   private:
         // Delete the given program and both its shaders.
         void delete_program(GLuint program_num);
   
+ +      // Deletes all FBOs for the given context that belong to deleted textures.
+ +      void cleanup_unlinked_fbos(void *context);
+ +
+ +      // Remove FBOs off the end of the freelist for <context>, until it
+ +      // is no more than <max_length> elements long.
+ +      void shrink_fbo_freelist(void *context, size_t max_length);
+ +
         // Protects all the other elements in the class.
         pthread_mutex_t lock;
   
@@@ -139,18 -118,19 +139,18 @@@
         size_t texture_freelist_bytes;
   
         struct FBO {
- -              void *context;
- -              GLuint texture_num;
+ +              GLuint texture_num;  // 0 means associated to a texture that has since been deleted.
         };
   
- -      // A mapping from FBO number to format details. This is filled if the
- -      // FBO is given out to a client or on the freelist, but not if it is
- -      // deleted from the freelist.
- -      std::map<GLuint, FBO> fbo_formats;
+ +      // For each context, a mapping from FBO number to format details. This is
+ +      // filled if the FBO is given out to a client or on the freelist, but
+ +      // not if it is deleted from the freelist.
+ +      std::map<std::pair<void *, GLuint>, FBO> fbo_formats;
   
- -      // A list of all FBOs that are release but not freed (most recently freed
- -      // first). Once this reaches <fbo_freelist_max_length>, the last element
- -      // will be deleted.
- -      std::list<GLuint> fbo_freelist;
+ +      // For each context, a list of all FBOs that are released but not freed
+ +      // (most recently freed first). Once this reaches <fbo_freelist_max_length>,
+ +      // the last element will be deleted.
+ +      std::map<void *, std::list<GLuint> > fbo_freelist;
   
         // See the caveats at the constructor.
         static size_t estimate_texture_size(const Texture2D &texture_format);
author	Steinar H. Gunderson <sgunderson@bigfoot.com>
	Fri, 28 Mar 2014 20:15:05 +0000 (21:15 +0100)
committer	Steinar H. Gunderson <sgunderson@bigfoot.com>
	Fri, 28 Mar 2014 20:17:41 +0000 (21:17 +0100)
		1	2
Makefile.in	patch \|	diff1 \|	diff2 \|	blob \| history
effect_chain.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
effect_chain.h	patch \|	diff1 \|	diff2 \|	blob \| history
fft_pass_effect.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
fft_pass_effect.frag	patch \|	diff1 \|	diff2 \|	blob \| history
fft_pass_effect.h	patch \|	diff1 \|	diff2 \|	blob \| history
fft_pass_effect_test.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
luma_mix_effect_test.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
resource_pool.cpp	patch \|	diff1 \|	diff2 \|	blob \| history
resource_pool.h	patch \|	diff1 \|	diff2 \|	blob \| history