# strive towards having a rock-stable ABI, but at least the soversion will increase
# whenever it breaks, so that you will not have silent failures, and distribution package
# management can run its course.
-movit_ltversion = 1:3:0
-movit_version = 1.0.3
+movit_ltversion = 2:0:0
+movit_version = 1.1
prefix = @prefix@
exec_prefix = @exec_prefix@
datadir = @datadir@
top_builddir = @top_builddir@
with_demo_app = @with_demo_app@
+ with_SDL2 = @with_SDL2@
with_coverage = @with_coverage@
CC=@CC@
CXX=@CXX@
- CXXFLAGS=-Wall @CXXFLAGS@ -I$(GTEST_DIR)/include @Eigen3_CFLAGS@ @GLEW_CFLAGS@ @FFTW3_CFLAGS@
+ CXXFLAGS=-Wall @CXXFLAGS@ -I$(GTEST_DIR)/include @SDL2_CFLAGS@ @SDL_CFLAGS@ @Eigen3_CFLAGS@ @epoxy_CFLAGS@ @FFTW3_CFLAGS@
+ ifeq ($(with_SDL2),yes)
+ CXXFLAGS += -DHAVE_SDL2
+ endif
LDFLAGS=@LDFLAGS@
- LDLIBS=@GLEW_LIBS@ @FFTW3_LIBS@ -lpthread
- TEST_LDLIBS=@GLEW_LIBS@ @SDL_LIBS@ -lpthread
- DEMO_LDLIBS=@SDL_image_LIBS@ -lrt -lpthread @libpng_LIBS@ @FFTW3_LIBS@
+ LDLIBS=@epoxy_LIBS@ @FFTW3_LIBS@ -lpthread
+ TEST_LDLIBS=@epoxy_LIBS@ @SDL2_LIBS@ @SDL_LIBS@ -lpthread
+ DEMO_LDLIBS=@SDL2_image_LIBS@ @SDL_image_LIBS@ -lrt -lpthread @libpng_LIBS@ @FFTW3_LIBS@
SHELL=@SHELL@
LIBTOOL=@LIBTOOL@ --tag=CXX
RANLIB=ranlib
HDRS += $(INPUTS:=.h)
HDRS += $(EFFECTS:=.h)
- SHADERS = vs.vert header.frag footer.frag
+ SHADERS = vs.vert vs.130.vert vs.300es.vert
+ SHADERS += header.frag header.130.frag header.300es.frag
+ SHADERS += footer.frag footer.130.frag footer.300es.frag
+ SHADERS += texture1d.frag texture1d.130.frag texture1d.300es.frag
SHADERS += $(INPUTS:=.frag)
SHADERS += $(EFFECTS:=.frag)
SHADERS += highlight_cutoff_effect.frag
SHADERS += overlay_matte_effect.frag
- SHADERS += texture1d.frag
# These purposefully do not exist.
MISSING_SHADERS = diffusion_effect.frag glow_effect.frag unsharp_mask_effect.frag resize_effect.frag
#define GL_GLEXT_PROTOTYPES 1
- #include <GL/glew.h>
+ #include <epoxy/gl.h>
#include <assert.h>
#include <locale.h>
#include <math.h>
delete nodes[i];
}
for (unsigned i = 0; i < phases.size(); ++i) {
- glBindVertexArray(phases[i]->vao);
- check_error();
-
- cleanup_vertex_attribute(phases[i]->glsl_program_num, "position", phases[i]->position_vbo);
- cleanup_vertex_attribute(phases[i]->glsl_program_num, "texcoord", phases[i]->texcoord_vbo);
-
- glBindVertexArray(0);
- check_error();
-
resource_pool->release_glsl_program(phases[i]->glsl_program_num);
delete phases[i];
}
void EffectChain::compile_glsl_program(Phase *phase)
{
- string frag_shader = read_file("header.frag");
+ string frag_shader = read_version_dependent_file("header", "frag");
// Create functions for all the texture inputs that we need.
for (unsigned i = 0; i < phase->inputs.size(); ++i) {
frag_shader += string("uniform sampler2D tex_") + effect_id + ";\n";
frag_shader += string("vec4 ") + effect_id + "(vec2 tc) {\n";
- frag_shader += "\treturn texture2D(tex_" + string(effect_id) + ", tc);\n";
+ frag_shader += "\treturn tex2D(tex_" + string(effect_id) + ", tc);\n";
frag_shader += "}\n";
frag_shader += "\n";
}
frag_shader += "\n";
}
frag_shader += string("#define INPUT ") + phase->effect_ids[phase->effects.back()] + "\n";
- frag_shader.append(read_file("footer.frag"));
+ frag_shader.append(read_version_dependent_file("footer", "frag"));
- phase->glsl_program_num = resource_pool->compile_glsl_program(read_file("vs.vert"), frag_shader);
+ string vert_shader = read_version_dependent_file("vs", "vert");
+ phase->glsl_program_num = resource_pool->compile_glsl_program(vert_shader, frag_shader);
-
- // Prepare the geometry for the fullscreen quad used in this phase.
- // (We have separate VAOs per shader, since the bindings can in theory
- // be different.)
- float vertices[] = {
- 0.0f, 1.0f,
- 0.0f, 0.0f,
- 1.0f, 1.0f,
- 1.0f, 0.0f
- };
-
- glGenVertexArrays(1, &phase->vao);
- check_error();
- glBindVertexArray(phase->vao);
- check_error();
-
- phase->position_vbo = fill_vertex_attribute(phase->glsl_program_num, "position", 2, GL_FLOAT, sizeof(vertices), vertices);
- phase->texcoord_vbo = fill_vertex_attribute(phase->glsl_program_num, "texcoord", 2, GL_FLOAT, sizeof(vertices), vertices); // Same as vertices.
-
- glBindVertexArray(0);
- check_error();
}
// Construct GLSL programs, starting at the given effect and following
glBindFramebuffer(GL_FRAMEBUFFER, 0);
check_error();
- glBindVertexArray(0);
- check_error();
glUseProgram(0);
check_error();
}
// And now the output. (Already set up for us if it is the last phase.)
if (!last_phase) {
- void *context = get_gl_context_identifier();
- fbo = resource_pool->create_fbo(context, (*output_textures)[phase]);
+ fbo = resource_pool->create_fbo((*output_textures)[phase]);
glBindFramebuffer(GL_FRAMEBUFFER, fbo);
glViewport(0, 0, phase->output_width, phase->output_height);
}
}
}
- glBindVertexArray(phase->vao);
+ // Now draw!
+ float vertices[] = {
+ 0.0f, 1.0f,
+ 0.0f, 0.0f,
+ 1.0f, 1.0f,
+ 1.0f, 0.0f
+ };
+
+ GLuint vao;
+ glGenVertexArrays(1, &vao);
+ check_error();
+ glBindVertexArray(vao);
check_error();
+
+ GLuint position_vbo = fill_vertex_attribute(glsl_program_num, "position", 2, GL_FLOAT, sizeof(vertices), vertices);
+ GLuint texcoord_vbo = fill_vertex_attribute(glsl_program_num, "texcoord", 2, GL_FLOAT, sizeof(vertices), vertices); // Same as vertices.
+
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
check_error();
+ cleanup_vertex_attribute(glsl_program_num, "position", position_vbo);
+ cleanup_vertex_attribute(glsl_program_num, "texcoord", texcoord_vbo);
+
+ glUseProgram(0);
+ check_error();
+
for (unsigned i = 0; i < phase->effects.size(); ++i) {
Node *node = phase->effects[i];
node->effect->clear_gl_state();
if (!last_phase) {
resource_pool->release_fbo(fbo);
}
+
+ glDeleteVertexArrays(1, &vao);
+ check_error();
}
void EffectChain::setup_rtt_sampler(GLuint glsl_program_num, int sampler_num, const string &effect_id, bool use_mipmaps)
// but if so, the threads' contexts need to be set up to share resources, since
// the EffectChain holds textures and other OpenGL objects that are tied to the
// context.
+//
+// Memory management (only relevant if you use multiple contexts):
+// See corresponding comment in resource_pool.h. This holds even if you don't
+// allocate your own ResourcePool, but let EffectChain hold its own.
- #include <GL/glew.h>
+ #include <epoxy/gl.h>
#include <stdio.h>
#include <map>
#include <set>
// Identifier used to create unique variables in GLSL.
// Unique per-phase to increase cacheability of compiled shaders.
std::map<Node *, std::string> effect_ids;
-
- // The geometry needed to draw this quad, bound to the vertex array
- // object. (Seemingly it's actually a win not to upload geometry every
- // frame, even for something as small as a quad, due to fewer state
- // changes.)
- GLuint vao;
- GLuint position_vbo, texcoord_vbo;
};
class EffectChain {
- #include <GL/glew.h>
+ #include <epoxy/gl.h>
#include <math.h>
#include "effect_chain.h"
FFTPassEffect::FFTPassEffect()
: input_width(1280),
input_height(720),
- direction(HORIZONTAL)
+ direction(HORIZONTAL),
+ last_fft_size(-1),
+ last_direction(INVALID),
+ last_pass_number(-1),
+ last_inverse(-1),
+ last_input_size(-1)
{
register_int("fft_size", &fft_size);
register_int("direction", (int *)&direction);
{
Effect::set_gl_state(glsl_program_num, prefix, sampler_num);
- int input_size = (direction == VERTICAL) ? input_height : input_width;
-
// This is needed because it counteracts the precision issues we get
// because we sample the input texture with normalized coordinates
// (especially when the repeat count along the axis is not a power of
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
check_error();
+ // Because of the memory layout (see below) and because we use offsets,
+ // the support texture values for many consecutive values will be
+ // the same. Thus, we can store a smaller texture (giving a small
+ // performance boost) and just sample it with NEAREST. Also, this
+ // counteracts any precision issues we might get from linear
+ // interpolation.
+ glActiveTexture(GL_TEXTURE0 + *sampler_num);
+ check_error();
+ glBindTexture(GL_TEXTURE_2D, tex);
+ check_error();
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
+ check_error();
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
+ check_error();
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT);
+ check_error();
+
+ int input_size = (direction == VERTICAL) ? input_height : input_width;
+ if (last_fft_size != fft_size ||
+ last_direction != direction ||
+ last_pass_number != pass_number ||
+ last_inverse != inverse ||
+ last_input_size != input_size) {
+ generate_support_texture();
+ }
+
+ set_uniform_int(glsl_program_num, prefix, "support_tex", *sampler_num);
+ ++*sampler_num;
+
+ assert(input_size % fft_size == 0);
+ set_uniform_float(glsl_program_num, prefix, "num_repeats", input_size / fft_size);
+}
+
+void FFTPassEffect::generate_support_texture()
+{
+ int input_size = (direction == VERTICAL) ? input_height : input_width;
+
// The memory layout follows figure 5.2 on page 25 of
// http://gpuwave.sesse.net/gpuwave.pdf -- it can be a bit confusing
// at first, but is classically explained more or less as follows:
// bit, so the stride is 8, and so on.
assert((fft_size & (fft_size - 1)) == 0); // Must be power of two.
- fp16_int_t *tmp = new fp16_int_t[fft_size * 4];
int subfft_size = 1 << pass_number;
+ fp16_int_t *tmp = new fp16_int_t[subfft_size * 4];
double mulfac;
if (inverse) {
mulfac = 2.0 * M_PI;
assert((fft_size & (fft_size - 1)) == 0); // Must be power of two.
assert(fft_size % subfft_size == 0);
int stride = fft_size / subfft_size;
- for (int i = 0; i < fft_size; ++i) {
- int k = i / stride; // Element number within this sub-FFT.
- int offset = i % stride; // Sub-FFT number.
+ for (int i = 0; i < subfft_size; i++) {
+ int k = i;
double twiddle_real, twiddle_imag;
if (k < subfft_size / 2) {
// for using offsets and not direct coordinates as in GPUwave
// is that we can have multiple FFTs along the same line,
// and want to reuse the support texture by repeating it.
- int base = k * stride * 2 + offset;
+ int base = k * stride * 2;
int support_texture_index = i;
int src1 = base;
int src2 = base + stride;
+ double sign = 1.0;
if (direction == FFTPassEffect::VERTICAL) {
// Compensate for OpenGL's bottom-left convention.
- support_texture_index = fft_size - support_texture_index - 1;
- src1 = fft_size - src1 - 1;
- src2 = fft_size - src2 - 1;
+ support_texture_index = subfft_size - support_texture_index - 1;
+ sign = -1.0;
}
- tmp[support_texture_index * 4 + 0] = fp64_to_fp16((src1 - support_texture_index) / double(input_size));
- tmp[support_texture_index * 4 + 1] = fp64_to_fp16((src2 - support_texture_index) / double(input_size));
+ tmp[support_texture_index * 4 + 0] = fp64_to_fp16(sign * (src1 - i * stride) / double(input_size));
+ tmp[support_texture_index * 4 + 1] = fp64_to_fp16(sign * (src2 - i * stride) / double(input_size));
tmp[support_texture_index * 4 + 2] = fp64_to_fp16(twiddle_real);
tmp[support_texture_index * 4 + 3] = fp64_to_fp16(twiddle_imag);
}
- glActiveTexture(GL_TEXTURE0 + *sampler_num);
- check_error();
- glBindTexture(GL_TEXTURE_2D, tex);
- check_error();
- glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
- check_error();
- glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
- check_error();
- glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT);
- check_error();
-
// Supposedly FFTs are very sensitive to inaccuracies in the twiddle factors,
// at least according to a paper by Schatzman (see gpuwave.pdf reference [30]
// for the full reference); however, practical testing indicates that it's
// which gives a nice speed boost.
//
// Note that the source coordinates become somewhat less accurate too, though.
- glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA16F, fft_size, 1, 0, GL_RGBA, GL_HALF_FLOAT, tmp);
+ glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA16F, subfft_size, 1, 0, GL_RGBA, GL_HALF_FLOAT, tmp);
check_error();
delete[] tmp;
- set_uniform_int(glsl_program_num, prefix, "support_tex", *sampler_num);
- ++*sampler_num;
-
- assert(input_size % fft_size == 0);
- set_uniform_float(glsl_program_num, prefix, "num_repeats", input_size / fft_size);
+ last_fft_size = fft_size;
+ last_direction = direction;
+ last_pass_number = pass_number;
+ last_inverse = inverse;
+ last_input_size = input_size;
}
} // namespace movit
vec4 FUNCNAME(vec2 tc) {
#if DIRECTION_VERTICAL
- vec4 support = texture2D(PREFIX(support_tex), vec2(tc.y * PREFIX(num_repeats), 0.0));
+ vec4 support = tex2D(PREFIX(support_tex), vec2(tc.y * PREFIX(num_repeats), 0.0));
vec4 c1 = INPUT(vec2(tc.x, tc.y + support.x));
vec4 c2 = INPUT(vec2(tc.x, tc.y + support.y));
#else
- vec4 support = texture2D(PREFIX(support_tex), vec2(tc.x * PREFIX(num_repeats), 0.0));
+ vec4 support = tex2D(PREFIX(support_tex), vec2(tc.x * PREFIX(num_repeats), 0.0));
vec4 c1 = INPUT(vec2(tc.x + support.x, tc.y));
vec4 c2 = INPUT(vec2(tc.x + support.y, tc.y));
#endif
// where * is complex multiplication.
return c1 + support.z * c2 + support.w * vec4(-c2.y, c2.x, -c2.w, c2.z);
}
+
+#undef DIRECTION_VERTICAL
// scaling), and as fp16 has quite limited range at times, this can be relevant
// on some GPUs for larger sizes.
- #include <GL/glew.h>
+ #include <epoxy/gl.h>
#include <assert.h>
#include <stdio.h>
#include <string>
virtual void inform_added(EffectChain *chain) { this->chain = chain; }
- enum Direction { HORIZONTAL = 0, VERTICAL = 1 };
+ enum Direction { INVALID = -1, HORIZONTAL = 0, VERTICAL = 1 };
private:
+ void generate_support_texture();
+
EffectChain *chain;
int input_width, input_height;
GLuint tex;
+
int fft_size;
Direction direction;
int pass_number; // From 1..n.
int inverse; // 0 = forward (FFT), 1 = reverse (IFFT).
+
+ int last_fft_size;
+ Direction last_direction;
+ int last_pass_number;
+ int last_inverse;
+ int last_input_size;
};
} // namespace movit
#include <math.h>
#include <stdlib.h>
#include <string.h>
+ #include <epoxy/gl.h>
+ #include <gtest/gtest.h>
#include "effect_chain.h"
#include "fft_pass_effect.h"
- #include "glew.h"
- #include "gtest/gtest.h"
#include "image_format.h"
#include "multiply_effect.h"
#include "test_util.h"
TEST(FFTPassEffectTest, Repeat) {
srand(12345);
- for (int fft_size = 2; fft_size < 512; fft_size *= 2) {
+ for (int fft_size = 2; fft_size <= 128; fft_size *= 2) {
const int num_repeats = 31; // Prime, to make things more challenging.
float data[num_repeats * fft_size * 4];
float expected_data[num_repeats * fft_size * 4], out_data[num_repeats * fft_size * 4];
// Unit tests for LumaMixEffect.
- #include <GL/glew.h>
+ #include <epoxy/gl.h>
#include "effect_chain.h"
#include "gtest/gtest.h"
expect_equal(expected_data, out_data, 2, 2);
}
+TEST(LumaMixEffectTest, Inverse) {
+ float data_a[] = {
+ 0.0f, 0.25f,
+ 0.75f, 1.0f,
+ };
+ float data_b[] = {
+ 1.0f, 0.5f,
+ 0.65f, 0.6f,
+ };
+ float data_luma[] = {
+ 0.0f, 0.25f,
+ 0.5f, 0.75f,
+ };
+
+ EffectChainTester tester(data_a, 2, 2, FORMAT_GRAYSCALE, COLORSPACE_sRGB, GAMMA_LINEAR);
+ Effect *input1 = tester.get_chain()->last_added_effect();
+ Effect *input2 = tester.add_input(data_b, FORMAT_GRAYSCALE, COLORSPACE_sRGB, GAMMA_LINEAR);
+ Effect *input3 = tester.add_input(data_luma, FORMAT_GRAYSCALE, COLORSPACE_sRGB, GAMMA_LINEAR);
+
+ Effect *luma_mix_effect = tester.get_chain()->add_effect(new LumaMixEffect(), input1, input2, input3);
+ ASSERT_TRUE(luma_mix_effect->set_float("transition_width", 100000.0f));
+ ASSERT_TRUE(luma_mix_effect->set_int("inverse", 1));
+
+ // Inverse is not the same as reverse, so progress=0 should behave identically
+ // as HardWipe, ie. everything should be from A.
+ float out_data[4];
+ ASSERT_TRUE(luma_mix_effect->set_float("progress", 0.0f));
+ tester.run(out_data, GL_RED, COLORSPACE_sRGB, GAMMA_LINEAR);
+ expect_equal(data_a, out_data, 2, 2);
+
+ // Lower two from A, the rest from B.
+ float expected_data_049[] = {
+ 1.0f, 0.5f,
+ 0.75f, 1.0f,
+ };
+ ASSERT_TRUE(luma_mix_effect->set_float("progress", 0.49f));
+ tester.run(out_data, GL_RED, COLORSPACE_sRGB, GAMMA_LINEAR);
+ expect_equal(expected_data_049, out_data, 2, 2);
+}
+
} // namespace movit
#include <map>
#include <string>
#include <utility>
+ #include <epoxy/gl.h>
- #include "glew.h"
#include "init.h"
#include "resource_pool.h"
#include "util.h"
assert(texture_formats.empty());
assert(texture_freelist_bytes == 0);
- for (list<GLuint>::const_iterator freelist_it = fbo_freelist.begin();
- freelist_it != fbo_freelist.end();
- ++freelist_it) {
- GLuint free_fbo_num = *freelist_it;
- assert(fbo_formats.count(free_fbo_num) != 0);
- fbo_formats.erase(free_fbo_num);
- glDeleteFramebuffers(1, &free_fbo_num);
- check_error();
+ void *context = get_gl_context_identifier();
+ cleanup_unlinked_fbos(context);
+
+ for (map<void *, std::list<GLuint> >::iterator context_it = fbo_freelist.begin();
+ context_it != fbo_freelist.end();
+ ++context_it) {
+ if (context_it->first != context) {
+ // If this does not hold, the client should have called clean_context() earlier.
+ assert(context_it->second.empty());
+ continue;
+ }
+ for (list<GLuint>::const_iterator freelist_it = context_it->second.begin();
+ freelist_it != context_it->second.end();
+ ++freelist_it) {
+ pair<void *, GLuint> key(context, *freelist_it);
+ GLuint free_fbo_num = *freelist_it;
+ assert(fbo_formats.count(key) != 0);
+ fbo_formats.erase(key);
+ glDeleteFramebuffers(1, &free_fbo_num);
+ check_error();
+ }
}
+
assert(fbo_formats.empty());
}
case GL_SRGB8_ALPHA8:
format = GL_RGBA;
break;
- case GL_RGB32F_ARB:
- case GL_RGB16F_ARB:
+ case GL_RGB32F:
+ case GL_RGB16F:
case GL_RGB8:
case GL_SRGB8:
format = GL_RGB;
break;
case GL_RG32F:
case GL_RG16F:
+ case GL_RG8:
format = GL_RG;
break;
+ case GL_R32F:
+ case GL_R16F:
case GL_R8:
format = GL_RED;
break;
assert(false);
}
+ // Same with type; GLES is stricter than desktop OpenGL here.
+ GLenum type;
+ switch (internal_format) {
+ case GL_RGBA32F_ARB:
+ case GL_RGBA16F_ARB:
+ case GL_RGB32F:
+ case GL_RGB16F:
+ case GL_RG32F:
+ case GL_RG16F:
+ case GL_R32F:
+ case GL_R16F:
+ type = GL_FLOAT;
+ break;
+ case GL_SRGB8_ALPHA8:
+ case GL_SRGB8:
+ case GL_RGBA8:
+ case GL_RGB8:
+ case GL_RG8:
+ case GL_R8:
+ type = GL_UNSIGNED_BYTE;
+ break;
+ default:
+ // TODO: Add more here as needed.
+ assert(false);
+ }
+
+
GLuint texture_num;
glGenTextures(1, &texture_num);
check_error();
glBindTexture(GL_TEXTURE_2D, texture_num);
check_error();
- glTexImage2D(GL_TEXTURE_2D, 0, internal_format, width, height, 0, format, GL_UNSIGNED_BYTE, NULL);
+ glTexImage2D(GL_TEXTURE_2D, 0, internal_format, width, height, 0, format, type, NULL);
check_error();
glBindTexture(GL_TEXTURE_2D, 0);
check_error();
glDeleteTextures(1, &free_texture_num);
check_error();
- // Delete any FBO related to this texture.
- for (list<GLuint>::iterator fbo_freelist_it = fbo_freelist.begin();
- fbo_freelist_it != fbo_freelist.end(); ) {
- GLuint fbo_num = *fbo_freelist_it;
- map<GLuint, FBO>::const_iterator format_it = fbo_formats.find(fbo_num);
- assert(format_it != fbo_formats.end());
+ // Unlink any lingering FBO related to this texture. We might
+ // not be in the right context, so don't delete it right away;
+ // the cleanup in release_fbo() (which calls cleanup_unlinked_fbos())
+ // will take care of actually doing that later.
+ for (map<pair<void *, GLuint>, FBO>::iterator format_it = fbo_formats.begin();
+ format_it != fbo_formats.end();
+ ++format_it) {
if (format_it->second.texture_num == free_texture_num) {
- glDeleteFramebuffers(1, &fbo_num);
- fbo_freelist.erase(fbo_freelist_it++);
- } else {
- ++fbo_freelist_it;
+ format_it->second.texture_num = 0;
}
}
}
pthread_mutex_unlock(&lock);
}
-GLuint ResourcePool::create_fbo(void *context, GLuint texture_num)
+GLuint ResourcePool::create_fbo(GLuint texture_num)
{
+ void *context = get_gl_context_identifier();
+
pthread_mutex_lock(&lock);
- // See if there's an FBO on the freelist we can use.
- for (list<GLuint>::iterator freelist_it = fbo_freelist.begin();
- freelist_it != fbo_freelist.end();
- ++freelist_it) {
- GLuint fbo_num = *freelist_it;
- map<GLuint, FBO>::const_iterator format_it = fbo_formats.find(fbo_num);
- assert(format_it != fbo_formats.end());
- if (format_it->second.context == context &&
- format_it->second.texture_num == texture_num) {
- fbo_freelist.erase(freelist_it);
- pthread_mutex_unlock(&lock);
- return fbo_num;
+ if (fbo_freelist.count(context) != 0) {
+ // See if there's an FBO on the freelist we can use.
+ for (list<GLuint>::iterator freelist_it = fbo_freelist[context].begin();
+ freelist_it != fbo_freelist[context].end();
+ ++freelist_it) {
+ GLuint fbo_num = *freelist_it;
+ map<pair<void *, GLuint>, FBO>::const_iterator format_it =
+ fbo_formats.find(make_pair(context, fbo_num));
+ assert(format_it != fbo_formats.end());
+ if (format_it->second.texture_num == texture_num) {
+ fbo_freelist[context].erase(freelist_it);
+ pthread_mutex_unlock(&lock);
+ return fbo_num;
+ }
}
}
check_error();
FBO fbo_format;
- fbo_format.context = context;
fbo_format.texture_num = texture_num;
- assert(fbo_formats.count(fbo_num) == 0);
- fbo_formats.insert(make_pair(fbo_num, fbo_format));
+ pair<void *, GLuint> key(context, fbo_num);
+ assert(fbo_formats.count(key) == 0);
+ fbo_formats.insert(make_pair(key, fbo_format));
pthread_mutex_unlock(&lock);
return fbo_num;
void ResourcePool::release_fbo(GLuint fbo_num)
{
+ void *context = get_gl_context_identifier();
+
pthread_mutex_lock(&lock);
- fbo_freelist.push_front(fbo_num);
- assert(fbo_formats.count(fbo_num) != 0);
-
- while (fbo_freelist.size() > fbo_freelist_max_length) {
- GLuint free_fbo_num = fbo_freelist.front();
- fbo_freelist.pop_front();
- assert(fbo_formats.count(free_fbo_num) != 0);
- fbo_formats.erase(free_fbo_num);
+ fbo_freelist[context].push_front(fbo_num);
+ assert(fbo_formats.count(make_pair(context, fbo_num)) != 0);
+
+ // Now that we're in this context, free up any FBOs that are connected
+ // to deleted textures (in release_2d_texture).
+ cleanup_unlinked_fbos(context);
+
+ shrink_fbo_freelist(context, fbo_freelist_max_length);
+ pthread_mutex_unlock(&lock);
+}
+
+void ResourcePool::clean_context()
+{
+ void *context = get_gl_context_identifier();
+
+ // Currently, we only need to worry about FBOs, as they are the only
+ // non-shareable resource we hold.
+ shrink_fbo_freelist(context, 0);
+ fbo_freelist.erase(context);
+}
+
+void ResourcePool::cleanup_unlinked_fbos(void *context)
+{
+ for (list<GLuint>::iterator freelist_it = fbo_freelist[context].begin();
+ freelist_it != fbo_freelist[context].end(); ) {
+ GLuint fbo_num = *freelist_it;
+ pair<void *, GLuint> key(context, fbo_num);
+ assert(fbo_formats.count(key) != 0);
+ if (fbo_formats[key].texture_num == 0) {
+ glDeleteFramebuffers(1, &fbo_num);
+ check_error();
+ fbo_freelist[context].erase(freelist_it++);
+ } else {
+ freelist_it++;
+ }
+ }
+}
+
+void ResourcePool::shrink_fbo_freelist(void *context, size_t max_length)
+{
+ while (fbo_freelist[context].size() > max_length) {
+ GLuint free_fbo_num = fbo_freelist[context].back();
+ pair<void *, GLuint> key(context, free_fbo_num);
+ fbo_freelist[context].pop_back();
+ assert(fbo_formats.count(key) != 0);
+ fbo_formats.erase(key);
glDeleteFramebuffers(1, &free_fbo_num);
check_error();
}
- pthread_mutex_unlock(&lock);
}
size_t ResourcePool::estimate_texture_size(const Texture2D &texture_format)
case GL_RGBA16F_ARB:
bytes_per_pixel = 8;
break;
- case GL_RGBA8:
- case GL_SRGB8_ALPHA8:
- bytes_per_pixel = 4;
- break;
case GL_RGB32F_ARB:
bytes_per_pixel = 12;
break;
case GL_RGB16F_ARB:
bytes_per_pixel = 6;
break;
+ case GL_RGBA8:
+ case GL_SRGB8_ALPHA8:
+ bytes_per_pixel = 4;
+ break;
case GL_RGB8:
case GL_SRGB8:
bytes_per_pixel = 3;
case GL_RG16F:
bytes_per_pixel = 4;
break;
+ case GL_R32F:
+ bytes_per_pixel = 4;
+ break;
+ case GL_R16F:
+ bytes_per_pixel = 2;
+ break;
case GL_R8:
bytes_per_pixel = 1;
break;
// Thread-safety: All functions except the constructor and destructor can be
// safely called from multiple threads at the same time, provided they have
// separate (but sharing) OpenGL contexts.
+//
+// Memory management (only relevant if you use multiple contexts): Some objects,
+// like FBOs, are not shareable across contexts, and can only be deleted from
+// the context they were created in. Thus, you will need to tell the
+// ResourcePool explicitly if you delete a context, or they will leak (and the
+// ResourcePool destructor will assert-fail). See clean_context().
- #include <GL/glew.h>
+ #include <epoxy/gl.h>
#include <pthread.h>
#include <stddef.h>
#include <list>
// twice this estimate or more.
ResourcePool(size_t program_freelist_max_length = 100,
size_t texture_freelist_max_bytes = 100 << 20, // 100 MB.
- size_t fbo_freelist_max_length = 100);
+ size_t fbo_freelist_max_length = 100); // Per context.
~ResourcePool();
// All remaining functions are intended for calls from EffectChain only.
// Allocate an FBO with the the given texture bound as a framebuffer attachment,
// or fetch a previous used if possible. Unbinds GL_FRAMEBUFFER afterwards.
// Keeps ownership of the FBO; you must call release_fbo() of deleting
- // it when you no longer want it. You can get an appropriate context
- // pointer from get_gl_context_identifier().
+ // it when you no longer want it.
//
// NOTE: In principle, the FBO doesn't have a resolution or pixel format;
// you can bind almost whatever texture you want to it. However, changing
// textures can have an adverse effect on performance due to validation,
// in particular on NVidia cards. Also, keep in mind that FBOs are not
- // shareable across contexts.
- GLuint create_fbo(void *context, GLuint texture_num);
+ // shareable across contexts, so you must have the context that's supposed
+ // to own the FBO current when you create or release it.
+ GLuint create_fbo(GLuint texture_num);
void release_fbo(GLuint fbo_num);
+ // Informs the ResourcePool that the current context is going away soon,
+ // and that any resources held for it in the freelist should be deleted.
+ //
+ // You do not need to do this for the last context; the regular destructor
+ // will take care of that. This means that if you only ever use one
+ // thread/context, you never need to call this function.
+ void clean_context();
+
private:
// Delete the given program and both its shaders.
void delete_program(GLuint program_num);
+ // Deletes all FBOs for the given context that belong to deleted textures.
+ void cleanup_unlinked_fbos(void *context);
+
+ // Remove FBOs off the end of the freelist for <context>, until it
+ // is no more than <max_length> elements long.
+ void shrink_fbo_freelist(void *context, size_t max_length);
+
// Protects all the other elements in the class.
pthread_mutex_t lock;
size_t texture_freelist_bytes;
struct FBO {
- void *context;
- GLuint texture_num;
+ GLuint texture_num; // 0 means associated to a texture that has since been deleted.
};
- // A mapping from FBO number to format details. This is filled if the
- // FBO is given out to a client or on the freelist, but not if it is
- // deleted from the freelist.
- std::map<GLuint, FBO> fbo_formats;
+ // For each context, a mapping from FBO number to format details. This is
+ // filled if the FBO is given out to a client or on the freelist, but
+ // not if it is deleted from the freelist.
+ std::map<std::pair<void *, GLuint>, FBO> fbo_formats;
- // A list of all FBOs that are release but not freed (most recently freed
- // first). Once this reaches <fbo_freelist_max_length>, the last element
- // will be deleted.
- std::list<GLuint> fbo_freelist;
+ // For each context, a list of all FBOs that are released but not freed
+ // (most recently freed first). Once this reaches <fbo_freelist_max_length>,
+ // the last element will be deleted.
+ std::map<void *, std::list<GLuint> > fbo_freelist;
// See the caveats at the constructor.
static size_t estimate_texture_size(const Texture2D &texture_format);