From: Steinar H. Gunderson Date: Fri, 28 Mar 2014 20:15:05 +0000 (+0100) Subject: Merge branch 'epoxy' X-Git-Tag: 1.1~12 X-Git-Url: https://git.sesse.net/?a=commitdiff_plain;h=7ea0b3a5be9bafaa2d1fa5a17ce285a725ce132b;hp=-c;p=movit Merge branch 'epoxy' Conflicts: effect_chain.cpp resource_pool.cpp --- 7ea0b3a5be9bafaa2d1fa5a17ce285a725ce132b diff --combined Makefile.in index d028c08,50431c2..05a80ca --- a/Makefile.in +++ b/Makefile.in @@@ -6,8 -6,8 +6,8 @@@ GTEST_DIR ?= /usr/src/gtes # strive towards having a rock-stable ABI, but at least the soversion will increase # whenever it breaks, so that you will not have silent failures, and distribution package # management can run its course. -movit_ltversion = 1:3:0 -movit_version = 1.0.3 +movit_ltversion = 2:0:0 +movit_version = 1.1 prefix = @prefix@ exec_prefix = @exec_prefix@ @@@ -17,15 -17,19 +17,19 @@@ datarootdir = @datarootdir datadir = @datadir@ top_builddir = @top_builddir@ with_demo_app = @with_demo_app@ + with_SDL2 = @with_SDL2@ with_coverage = @with_coverage@ CC=@CC@ CXX=@CXX@ - CXXFLAGS=-Wall @CXXFLAGS@ -I$(GTEST_DIR)/include @Eigen3_CFLAGS@ @GLEW_CFLAGS@ @FFTW3_CFLAGS@ + CXXFLAGS=-Wall @CXXFLAGS@ -I$(GTEST_DIR)/include @SDL2_CFLAGS@ @SDL_CFLAGS@ @Eigen3_CFLAGS@ @epoxy_CFLAGS@ @FFTW3_CFLAGS@ + ifeq ($(with_SDL2),yes) + CXXFLAGS += -DHAVE_SDL2 + endif LDFLAGS=@LDFLAGS@ - LDLIBS=@GLEW_LIBS@ @FFTW3_LIBS@ -lpthread - TEST_LDLIBS=@GLEW_LIBS@ @SDL_LIBS@ -lpthread - DEMO_LDLIBS=@SDL_image_LIBS@ -lrt -lpthread @libpng_LIBS@ @FFTW3_LIBS@ + LDLIBS=@epoxy_LIBS@ @FFTW3_LIBS@ -lpthread + TEST_LDLIBS=@epoxy_LIBS@ @SDL2_LIBS@ @SDL_LIBS@ -lpthread + DEMO_LDLIBS=@SDL2_image_LIBS@ @SDL_image_LIBS@ -lrt -lpthread @libpng_LIBS@ @FFTW3_LIBS@ SHELL=@SHELL@ LIBTOOL=@LIBTOOL@ --tag=CXX RANLIB=ranlib @@@ -159,12 -163,14 +163,14 @@@ HDRS = effect_chain.h effect_util.h eff HDRS += $(INPUTS:=.h) HDRS += $(EFFECTS:=.h) - SHADERS = vs.vert header.frag footer.frag + SHADERS = vs.vert vs.130.vert vs.300es.vert + SHADERS += header.frag header.130.frag header.300es.frag + SHADERS += footer.frag footer.130.frag footer.300es.frag + SHADERS += texture1d.frag texture1d.130.frag texture1d.300es.frag SHADERS += $(INPUTS:=.frag) SHADERS += $(EFFECTS:=.frag) SHADERS += highlight_cutoff_effect.frag SHADERS += overlay_matte_effect.frag - SHADERS += texture1d.frag # These purposefully do not exist. MISSING_SHADERS = diffusion_effect.frag glow_effect.frag unsharp_mask_effect.frag resize_effect.frag diff --combined effect_chain.cpp index c6f1e89,7bce60f..0a01bd3 --- a/effect_chain.cpp +++ b/effect_chain.cpp @@@ -1,6 -1,6 +1,6 @@@ #define GL_GLEXT_PROTOTYPES 1 - #include + #include #include #include #include @@@ -53,6 -53,15 +53,6 @@@ EffectChain::~EffectChain( delete nodes[i]; } for (unsigned i = 0; i < phases.size(); ++i) { - glBindVertexArray(phases[i]->vao); - check_error(); - - cleanup_vertex_attribute(phases[i]->glsl_program_num, "position", phases[i]->position_vbo); - cleanup_vertex_attribute(phases[i]->glsl_program_num, "texcoord", phases[i]->texcoord_vbo); - - glBindVertexArray(0); - check_error(); - resource_pool->release_glsl_program(phases[i]->glsl_program_num); delete phases[i]; } @@@ -229,7 -238,7 +229,7 @@@ string replace_prefix(const string &tex void EffectChain::compile_glsl_program(Phase *phase) { - string frag_shader = read_file("header.frag"); + string frag_shader = read_version_dependent_file("header", "frag"); // Create functions for all the texture inputs that we need. for (unsigned i = 0; i < phase->inputs.size(); ++i) { @@@ -240,7 -249,7 +240,7 @@@ frag_shader += string("uniform sampler2D tex_") + effect_id + ";\n"; frag_shader += string("vec4 ") + effect_id + "(vec2 tc) {\n"; - frag_shader += "\treturn texture2D(tex_" + string(effect_id) + ", tc);\n"; + frag_shader += "\treturn tex2D(tex_" + string(effect_id) + ", tc);\n"; frag_shader += "}\n"; frag_shader += "\n"; } @@@ -279,9 -288,31 +279,10 @@@ frag_shader += "\n"; } frag_shader += string("#define INPUT ") + phase->effect_ids[phase->effects.back()] + "\n"; - frag_shader.append(read_file("footer.frag")); + frag_shader.append(read_version_dependent_file("footer", "frag")); - phase->glsl_program_num = resource_pool->compile_glsl_program(read_file("vs.vert"), frag_shader); + string vert_shader = read_version_dependent_file("vs", "vert"); + phase->glsl_program_num = resource_pool->compile_glsl_program(vert_shader, frag_shader); - - // Prepare the geometry for the fullscreen quad used in this phase. - // (We have separate VAOs per shader, since the bindings can in theory - // be different.) - float vertices[] = { - 0.0f, 1.0f, - 0.0f, 0.0f, - 1.0f, 1.0f, - 1.0f, 0.0f - }; - - glGenVertexArrays(1, &phase->vao); - check_error(); - glBindVertexArray(phase->vao); - check_error(); - - phase->position_vbo = fill_vertex_attribute(phase->glsl_program_num, "position", 2, GL_FLOAT, sizeof(vertices), vertices); - phase->texcoord_vbo = fill_vertex_attribute(phase->glsl_program_num, "texcoord", 2, GL_FLOAT, sizeof(vertices), vertices); // Same as vertices. - - glBindVertexArray(0); - check_error(); } // Construct GLSL programs, starting at the given effect and following @@@ -1454,6 -1485,8 +1455,6 @@@ void EffectChain::render_to_fbo(GLuint glBindFramebuffer(GL_FRAMEBUFFER, 0); check_error(); - glBindVertexArray(0); - check_error(); glUseProgram(0); check_error(); } @@@ -1493,7 -1526,8 +1494,7 @@@ void EffectChain::execute_phase(Phase * // And now the output. (Already set up for us if it is the last phase.) if (!last_phase) { - void *context = get_gl_context_identifier(); - fbo = resource_pool->create_fbo(context, (*output_textures)[phase]); + fbo = resource_pool->create_fbo((*output_textures)[phase]); glBindFramebuffer(GL_FRAMEBUFFER, fbo); glViewport(0, 0, phase->output_width, phase->output_height); } @@@ -1514,32 -1548,11 +1515,32 @@@ } } - glBindVertexArray(phase->vao); + // Now draw! + float vertices[] = { + 0.0f, 1.0f, + 0.0f, 0.0f, + 1.0f, 1.0f, + 1.0f, 0.0f + }; + + GLuint vao; + glGenVertexArrays(1, &vao); + check_error(); + glBindVertexArray(vao); check_error(); + + GLuint position_vbo = fill_vertex_attribute(glsl_program_num, "position", 2, GL_FLOAT, sizeof(vertices), vertices); + GLuint texcoord_vbo = fill_vertex_attribute(glsl_program_num, "texcoord", 2, GL_FLOAT, sizeof(vertices), vertices); // Same as vertices. + glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); check_error(); + cleanup_vertex_attribute(glsl_program_num, "position", position_vbo); + cleanup_vertex_attribute(glsl_program_num, "texcoord", texcoord_vbo); + + glUseProgram(0); + check_error(); + for (unsigned i = 0; i < phase->effects.size(); ++i) { Node *node = phase->effects[i]; node->effect->clear_gl_state(); @@@ -1548,9 -1561,6 +1549,9 @@@ if (!last_phase) { resource_pool->release_fbo(fbo); } + + glDeleteVertexArrays(1, &vao); + check_error(); } void EffectChain::setup_rtt_sampler(GLuint glsl_program_num, int sampler_num, const string &effect_id, bool use_mipmaps) diff --combined effect_chain.h index 2b959bd,e7d99d5..593232c --- a/effect_chain.h +++ b/effect_chain.h @@@ -16,12 -16,8 +16,12 @@@ // but if so, the threads' contexts need to be set up to share resources, since // the EffectChain holds textures and other OpenGL objects that are tied to the // context. +// +// Memory management (only relevant if you use multiple contexts): +// See corresponding comment in resource_pool.h. This holds even if you don't +// allocate your own ResourcePool, but let EffectChain hold its own. - #include + #include #include #include #include @@@ -103,6 -99,13 +103,6 @@@ struct Phase // Identifier used to create unique variables in GLSL. // Unique per-phase to increase cacheability of compiled shaders. std::map effect_ids; - - // The geometry needed to draw this quad, bound to the vertex array - // object. (Seemingly it's actually a win not to upload geometry every - // frame, even for something as small as a quad, due to fewer state - // changes.) - GLuint vao; - GLuint position_vbo, texcoord_vbo; }; class EffectChain { diff --combined fft_pass_effect.cpp index b46e5e5,ee0b983..48e2677 --- a/fft_pass_effect.cpp +++ b/fft_pass_effect.cpp @@@ -1,4 -1,4 +1,4 @@@ - #include + #include #include #include "effect_chain.h" @@@ -14,12 -14,7 +14,12 @@@ namespace movit FFTPassEffect::FFTPassEffect() : input_width(1280), input_height(720), - direction(HORIZONTAL) + direction(HORIZONTAL), + last_fft_size(-1), + last_direction(INVALID), + last_pass_number(-1), + last_inverse(-1), + last_input_size(-1) { register_int("fft_size", &fft_size); register_int("direction", (int *)&direction); @@@ -44,6 -39,8 +44,6 @@@ void FFTPassEffect::set_gl_state(GLuin { Effect::set_gl_state(glsl_program_num, prefix, sampler_num); - int input_size = (direction == VERTICAL) ? input_height : input_width; - // This is needed because it counteracts the precision issues we get // because we sample the input texture with normalized coordinates // (especially when the repeat count along the axis is not a power of @@@ -57,43 -54,6 +57,43 @@@ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); check_error(); + // Because of the memory layout (see below) and because we use offsets, + // the support texture values for many consecutive values will be + // the same. Thus, we can store a smaller texture (giving a small + // performance boost) and just sample it with NEAREST. Also, this + // counteracts any precision issues we might get from linear + // interpolation. + glActiveTexture(GL_TEXTURE0 + *sampler_num); + check_error(); + glBindTexture(GL_TEXTURE_2D, tex); + check_error(); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + check_error(); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + check_error(); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT); + check_error(); + + int input_size = (direction == VERTICAL) ? input_height : input_width; + if (last_fft_size != fft_size || + last_direction != direction || + last_pass_number != pass_number || + last_inverse != inverse || + last_input_size != input_size) { + generate_support_texture(); + } + + set_uniform_int(glsl_program_num, prefix, "support_tex", *sampler_num); + ++*sampler_num; + + assert(input_size % fft_size == 0); + set_uniform_float(glsl_program_num, prefix, "num_repeats", input_size / fft_size); +} + +void FFTPassEffect::generate_support_texture() +{ + int input_size = (direction == VERTICAL) ? input_height : input_width; + // The memory layout follows figure 5.2 on page 25 of // http://gpuwave.sesse.net/gpuwave.pdf -- it can be a bit confusing // at first, but is classically explained more or less as follows: @@@ -119,8 -79,8 +119,8 @@@ // bit, so the stride is 8, and so on. assert((fft_size & (fft_size - 1)) == 0); // Must be power of two. - fp16_int_t *tmp = new fp16_int_t[fft_size * 4]; int subfft_size = 1 << pass_number; + fp16_int_t *tmp = new fp16_int_t[subfft_size * 4]; double mulfac; if (inverse) { mulfac = 2.0 * M_PI; @@@ -131,8 -91,9 +131,8 @@@ assert((fft_size & (fft_size - 1)) == 0); // Must be power of two. assert(fft_size % subfft_size == 0); int stride = fft_size / subfft_size; - for (int i = 0; i < fft_size; ++i) { - int k = i / stride; // Element number within this sub-FFT. - int offset = i % stride; // Sub-FFT number. + for (int i = 0; i < subfft_size; i++) { + int k = i; double twiddle_real, twiddle_imag; if (k < subfft_size / 2) { @@@ -155,22 -116,33 +155,22 @@@ // for using offsets and not direct coordinates as in GPUwave // is that we can have multiple FFTs along the same line, // and want to reuse the support texture by repeating it. - int base = k * stride * 2 + offset; + int base = k * stride * 2; int support_texture_index = i; int src1 = base; int src2 = base + stride; + double sign = 1.0; if (direction == FFTPassEffect::VERTICAL) { // Compensate for OpenGL's bottom-left convention. - support_texture_index = fft_size - support_texture_index - 1; - src1 = fft_size - src1 - 1; - src2 = fft_size - src2 - 1; + support_texture_index = subfft_size - support_texture_index - 1; + sign = -1.0; } - tmp[support_texture_index * 4 + 0] = fp64_to_fp16((src1 - support_texture_index) / double(input_size)); - tmp[support_texture_index * 4 + 1] = fp64_to_fp16((src2 - support_texture_index) / double(input_size)); + tmp[support_texture_index * 4 + 0] = fp64_to_fp16(sign * (src1 - i * stride) / double(input_size)); + tmp[support_texture_index * 4 + 1] = fp64_to_fp16(sign * (src2 - i * stride) / double(input_size)); tmp[support_texture_index * 4 + 2] = fp64_to_fp16(twiddle_real); tmp[support_texture_index * 4 + 3] = fp64_to_fp16(twiddle_imag); } - glActiveTexture(GL_TEXTURE0 + *sampler_num); - check_error(); - glBindTexture(GL_TEXTURE_2D, tex); - check_error(); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); - check_error(); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); - check_error(); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT); - check_error(); - // Supposedly FFTs are very sensitive to inaccuracies in the twiddle factors, // at least according to a paper by Schatzman (see gpuwave.pdf reference [30] // for the full reference); however, practical testing indicates that it's @@@ -180,16 -152,16 +180,16 @@@ // which gives a nice speed boost. // // Note that the source coordinates become somewhat less accurate too, though. - glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA16F, fft_size, 1, 0, GL_RGBA, GL_HALF_FLOAT, tmp); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA16F, subfft_size, 1, 0, GL_RGBA, GL_HALF_FLOAT, tmp); check_error(); delete[] tmp; - set_uniform_int(glsl_program_num, prefix, "support_tex", *sampler_num); - ++*sampler_num; - - assert(input_size % fft_size == 0); - set_uniform_float(glsl_program_num, prefix, "num_repeats", input_size / fft_size); + last_fft_size = fft_size; + last_direction = direction; + last_pass_number = pass_number; + last_inverse = inverse; + last_input_size = input_size; } } // namespace movit diff --combined fft_pass_effect.frag index fcffba7,607f86a..eb26f33 --- a/fft_pass_effect.frag +++ b/fft_pass_effect.frag @@@ -6,11 -6,11 +6,11 @@@ uniform sampler2D PREFIX(support_tex) vec4 FUNCNAME(vec2 tc) { #if DIRECTION_VERTICAL - vec4 support = texture2D(PREFIX(support_tex), vec2(tc.y * PREFIX(num_repeats), 0.0)); + vec4 support = tex2D(PREFIX(support_tex), vec2(tc.y * PREFIX(num_repeats), 0.0)); vec4 c1 = INPUT(vec2(tc.x, tc.y + support.x)); vec4 c2 = INPUT(vec2(tc.x, tc.y + support.y)); #else - vec4 support = texture2D(PREFIX(support_tex), vec2(tc.x * PREFIX(num_repeats), 0.0)); + vec4 support = tex2D(PREFIX(support_tex), vec2(tc.x * PREFIX(num_repeats), 0.0)); vec4 c1 = INPUT(vec2(tc.x + support.x, tc.y)); vec4 c2 = INPUT(vec2(tc.x + support.y, tc.y)); #endif @@@ -22,5 -22,3 +22,5 @@@ // where * is complex multiplication. return c1 + support.z * c2 + support.w * vec4(-c2.y, c2.x, -c2.w, c2.z); } + +#undef DIRECTION_VERTICAL diff --combined fft_pass_effect.h index 7689cf1,90e88bc..fbf4511 --- a/fft_pass_effect.h +++ b/fft_pass_effect.h @@@ -50,7 -50,7 +50,7 @@@ // scaling), and as fp16 has quite limited range at times, this can be relevant // on some GPUs for larger sizes. - #include + #include #include #include #include @@@ -101,25 -101,16 +101,25 @@@ public virtual void inform_added(EffectChain *chain) { this->chain = chain; } - enum Direction { HORIZONTAL = 0, VERTICAL = 1 }; + enum Direction { INVALID = -1, HORIZONTAL = 0, VERTICAL = 1 }; private: + void generate_support_texture(); + EffectChain *chain; int input_width, input_height; GLuint tex; + int fft_size; Direction direction; int pass_number; // From 1..n. int inverse; // 0 = forward (FFT), 1 = reverse (IFFT). + + int last_fft_size; + Direction last_direction; + int last_pass_number; + int last_inverse; + int last_input_size; }; } // namespace movit diff --combined fft_pass_effect_test.cpp index 7eafbd9,015847a..bce91ce --- a/fft_pass_effect_test.cpp +++ b/fft_pass_effect_test.cpp @@@ -3,11 -3,11 +3,11 @@@ #include #include #include + #include + #include #include "effect_chain.h" #include "fft_pass_effect.h" - #include "glew.h" - #include "gtest/gtest.h" #include "image_format.h" #include "multiply_effect.h" #include "test_util.h" @@@ -126,7 -126,7 +126,7 @@@ TEST(FFTPassEffectTest, SingleFrequency TEST(FFTPassEffectTest, Repeat) { srand(12345); - for (int fft_size = 2; fft_size < 512; fft_size *= 2) { + for (int fft_size = 2; fft_size <= 128; fft_size *= 2) { const int num_repeats = 31; // Prime, to make things more challenging. float data[num_repeats * fft_size * 4]; float expected_data[num_repeats * fft_size * 4], out_data[num_repeats * fft_size * 4]; diff --combined luma_mix_effect_test.cpp index 07829cc,4a506f5..2b6de15 --- a/luma_mix_effect_test.cpp +++ b/luma_mix_effect_test.cpp @@@ -1,6 -1,6 +1,6 @@@ // Unit tests for LumaMixEffect. - #include + #include #include "effect_chain.h" #include "gtest/gtest.h" @@@ -96,44 -96,4 +96,44 @@@ TEST(LumaMixEffectTest, SoftWipeHalfWay expect_equal(expected_data, out_data, 2, 2); } +TEST(LumaMixEffectTest, Inverse) { + float data_a[] = { + 0.0f, 0.25f, + 0.75f, 1.0f, + }; + float data_b[] = { + 1.0f, 0.5f, + 0.65f, 0.6f, + }; + float data_luma[] = { + 0.0f, 0.25f, + 0.5f, 0.75f, + }; + + EffectChainTester tester(data_a, 2, 2, FORMAT_GRAYSCALE, COLORSPACE_sRGB, GAMMA_LINEAR); + Effect *input1 = tester.get_chain()->last_added_effect(); + Effect *input2 = tester.add_input(data_b, FORMAT_GRAYSCALE, COLORSPACE_sRGB, GAMMA_LINEAR); + Effect *input3 = tester.add_input(data_luma, FORMAT_GRAYSCALE, COLORSPACE_sRGB, GAMMA_LINEAR); + + Effect *luma_mix_effect = tester.get_chain()->add_effect(new LumaMixEffect(), input1, input2, input3); + ASSERT_TRUE(luma_mix_effect->set_float("transition_width", 100000.0f)); + ASSERT_TRUE(luma_mix_effect->set_int("inverse", 1)); + + // Inverse is not the same as reverse, so progress=0 should behave identically + // as HardWipe, ie. everything should be from A. + float out_data[4]; + ASSERT_TRUE(luma_mix_effect->set_float("progress", 0.0f)); + tester.run(out_data, GL_RED, COLORSPACE_sRGB, GAMMA_LINEAR); + expect_equal(data_a, out_data, 2, 2); + + // Lower two from A, the rest from B. + float expected_data_049[] = { + 1.0f, 0.5f, + 0.75f, 1.0f, + }; + ASSERT_TRUE(luma_mix_effect->set_float("progress", 0.49f)); + tester.run(out_data, GL_RED, COLORSPACE_sRGB, GAMMA_LINEAR); + expect_equal(expected_data_049, out_data, 2, 2); +} + } // namespace movit diff --combined resource_pool.cpp index 19c26a0,2298c3c..5abc1b2 --- a/resource_pool.cpp +++ b/resource_pool.cpp @@@ -6,8 -6,8 +6,8 @@@ #include #include #include + #include - #include "glew.h" #include "init.h" #include "resource_pool.h" #include "util.h" @@@ -52,29 -52,15 +52,29 @@@ ResourcePool::~ResourcePool( assert(texture_formats.empty()); assert(texture_freelist_bytes == 0); - for (list::const_iterator freelist_it = fbo_freelist.begin(); - freelist_it != fbo_freelist.end(); - ++freelist_it) { - GLuint free_fbo_num = *freelist_it; - assert(fbo_formats.count(free_fbo_num) != 0); - fbo_formats.erase(free_fbo_num); - glDeleteFramebuffers(1, &free_fbo_num); - check_error(); + void *context = get_gl_context_identifier(); + cleanup_unlinked_fbos(context); + + for (map >::iterator context_it = fbo_freelist.begin(); + context_it != fbo_freelist.end(); + ++context_it) { + if (context_it->first != context) { + // If this does not hold, the client should have called clean_context() earlier. + assert(context_it->second.empty()); + continue; + } + for (list::const_iterator freelist_it = context_it->second.begin(); + freelist_it != context_it->second.end(); + ++freelist_it) { + pair key(context, *freelist_it); + GLuint free_fbo_num = *freelist_it; + assert(fbo_formats.count(key) != 0); + fbo_formats.erase(key); + glDeleteFramebuffers(1, &free_fbo_num); + check_error(); + } } + assert(fbo_formats.empty()); } @@@ -214,16 -200,19 +214,19 @@@ GLuint ResourcePool::create_2d_texture( case GL_SRGB8_ALPHA8: format = GL_RGBA; break; - case GL_RGB32F_ARB: - case GL_RGB16F_ARB: + case GL_RGB32F: + case GL_RGB16F: case GL_RGB8: case GL_SRGB8: format = GL_RGB; break; case GL_RG32F: case GL_RG16F: + case GL_RG8: format = GL_RG; break; + case GL_R32F: + case GL_R16F: case GL_R8: format = GL_RED; break; @@@ -232,12 -221,39 +235,39 @@@ assert(false); } + // Same with type; GLES is stricter than desktop OpenGL here. + GLenum type; + switch (internal_format) { + case GL_RGBA32F_ARB: + case GL_RGBA16F_ARB: + case GL_RGB32F: + case GL_RGB16F: + case GL_RG32F: + case GL_RG16F: + case GL_R32F: + case GL_R16F: + type = GL_FLOAT; + break; + case GL_SRGB8_ALPHA8: + case GL_SRGB8: + case GL_RGBA8: + case GL_RGB8: + case GL_RG8: + case GL_R8: + type = GL_UNSIGNED_BYTE; + break; + default: + // TODO: Add more here as needed. + assert(false); + } + + GLuint texture_num; glGenTextures(1, &texture_num); check_error(); glBindTexture(GL_TEXTURE_2D, texture_num); check_error(); - glTexImage2D(GL_TEXTURE_2D, 0, internal_format, width, height, 0, format, GL_UNSIGNED_BYTE, NULL); + glTexImage2D(GL_TEXTURE_2D, 0, internal_format, width, height, 0, format, type, NULL); check_error(); glBindTexture(GL_TEXTURE_2D, 0); check_error(); @@@ -269,40 -285,38 +299,40 @@@ void ResourcePool::release_2d_texture(G glDeleteTextures(1, &free_texture_num); check_error(); - // Delete any FBO related to this texture. - for (list::iterator fbo_freelist_it = fbo_freelist.begin(); - fbo_freelist_it != fbo_freelist.end(); ) { - GLuint fbo_num = *fbo_freelist_it; - map::const_iterator format_it = fbo_formats.find(fbo_num); - assert(format_it != fbo_formats.end()); + // Unlink any lingering FBO related to this texture. We might + // not be in the right context, so don't delete it right away; + // the cleanup in release_fbo() (which calls cleanup_unlinked_fbos()) + // will take care of actually doing that later. + for (map, FBO>::iterator format_it = fbo_formats.begin(); + format_it != fbo_formats.end(); + ++format_it) { if (format_it->second.texture_num == free_texture_num) { - glDeleteFramebuffers(1, &fbo_num); - fbo_freelist.erase(fbo_freelist_it++); - } else { - ++fbo_freelist_it; + format_it->second.texture_num = 0; } } } pthread_mutex_unlock(&lock); } -GLuint ResourcePool::create_fbo(void *context, GLuint texture_num) +GLuint ResourcePool::create_fbo(GLuint texture_num) { + void *context = get_gl_context_identifier(); + pthread_mutex_lock(&lock); - // See if there's an FBO on the freelist we can use. - for (list::iterator freelist_it = fbo_freelist.begin(); - freelist_it != fbo_freelist.end(); - ++freelist_it) { - GLuint fbo_num = *freelist_it; - map::const_iterator format_it = fbo_formats.find(fbo_num); - assert(format_it != fbo_formats.end()); - if (format_it->second.context == context && - format_it->second.texture_num == texture_num) { - fbo_freelist.erase(freelist_it); - pthread_mutex_unlock(&lock); - return fbo_num; + if (fbo_freelist.count(context) != 0) { + // See if there's an FBO on the freelist we can use. + for (list::iterator freelist_it = fbo_freelist[context].begin(); + freelist_it != fbo_freelist[context].end(); + ++freelist_it) { + GLuint fbo_num = *freelist_it; + map, FBO>::const_iterator format_it = + fbo_formats.find(make_pair(context, fbo_num)); + assert(format_it != fbo_formats.end()); + if (format_it->second.texture_num == texture_num) { + fbo_freelist[context].erase(freelist_it); + pthread_mutex_unlock(&lock); + return fbo_num; + } } } @@@ -325,10 -339,10 +355,10 @@@ check_error(); FBO fbo_format; - fbo_format.context = context; fbo_format.texture_num = texture_num; - assert(fbo_formats.count(fbo_num) == 0); - fbo_formats.insert(make_pair(fbo_num, fbo_format)); + pair key(context, fbo_num); + assert(fbo_formats.count(key) == 0); + fbo_formats.insert(make_pair(key, fbo_format)); pthread_mutex_unlock(&lock); return fbo_num; @@@ -336,58 -350,19 +366,58 @@@ void ResourcePool::release_fbo(GLuint fbo_num) { + void *context = get_gl_context_identifier(); + pthread_mutex_lock(&lock); - fbo_freelist.push_front(fbo_num); - assert(fbo_formats.count(fbo_num) != 0); - - while (fbo_freelist.size() > fbo_freelist_max_length) { - GLuint free_fbo_num = fbo_freelist.front(); - fbo_freelist.pop_front(); - assert(fbo_formats.count(free_fbo_num) != 0); - fbo_formats.erase(free_fbo_num); + fbo_freelist[context].push_front(fbo_num); + assert(fbo_formats.count(make_pair(context, fbo_num)) != 0); + + // Now that we're in this context, free up any FBOs that are connected + // to deleted textures (in release_2d_texture). + cleanup_unlinked_fbos(context); + + shrink_fbo_freelist(context, fbo_freelist_max_length); + pthread_mutex_unlock(&lock); +} + +void ResourcePool::clean_context() +{ + void *context = get_gl_context_identifier(); + + // Currently, we only need to worry about FBOs, as they are the only + // non-shareable resource we hold. + shrink_fbo_freelist(context, 0); + fbo_freelist.erase(context); +} + +void ResourcePool::cleanup_unlinked_fbos(void *context) +{ + for (list::iterator freelist_it = fbo_freelist[context].begin(); + freelist_it != fbo_freelist[context].end(); ) { + GLuint fbo_num = *freelist_it; + pair key(context, fbo_num); + assert(fbo_formats.count(key) != 0); + if (fbo_formats[key].texture_num == 0) { + glDeleteFramebuffers(1, &fbo_num); + check_error(); + fbo_freelist[context].erase(freelist_it++); + } else { + freelist_it++; + } + } +} + +void ResourcePool::shrink_fbo_freelist(void *context, size_t max_length) +{ + while (fbo_freelist[context].size() > max_length) { + GLuint free_fbo_num = fbo_freelist[context].back(); + pair key(context, free_fbo_num); + fbo_freelist[context].pop_back(); + assert(fbo_formats.count(key) != 0); + fbo_formats.erase(key); glDeleteFramebuffers(1, &free_fbo_num); check_error(); } - pthread_mutex_unlock(&lock); } size_t ResourcePool::estimate_texture_size(const Texture2D &texture_format) @@@ -401,16 -376,16 +431,16 @@@ case GL_RGBA16F_ARB: bytes_per_pixel = 8; break; - case GL_RGBA8: - case GL_SRGB8_ALPHA8: - bytes_per_pixel = 4; - break; case GL_RGB32F_ARB: bytes_per_pixel = 12; break; case GL_RGB16F_ARB: bytes_per_pixel = 6; break; + case GL_RGBA8: + case GL_SRGB8_ALPHA8: + bytes_per_pixel = 4; + break; case GL_RGB8: case GL_SRGB8: bytes_per_pixel = 3; @@@ -421,6 -396,12 +451,12 @@@ case GL_RG16F: bytes_per_pixel = 4; break; + case GL_R32F: + bytes_per_pixel = 4; + break; + case GL_R16F: + bytes_per_pixel = 2; + break; case GL_R8: bytes_per_pixel = 1; break; diff --combined resource_pool.h index aad95c9,7029fbf..a331f4f --- a/resource_pool.h +++ b/resource_pool.h @@@ -15,14 -15,8 +15,14 @@@ // Thread-safety: All functions except the constructor and destructor can be // safely called from multiple threads at the same time, provided they have // separate (but sharing) OpenGL contexts. +// +// Memory management (only relevant if you use multiple contexts): Some objects, +// like FBOs, are not shareable across contexts, and can only be deleted from +// the context they were created in. Thus, you will need to tell the +// ResourcePool explicitly if you delete a context, or they will leak (and the +// ResourcePool destructor will assert-fail). See clean_context(). - #include + #include #include #include #include @@@ -47,7 -41,7 +47,7 @@@ public // twice this estimate or more. ResourcePool(size_t program_freelist_max_length = 100, size_t texture_freelist_max_bytes = 100 << 20, // 100 MB. - size_t fbo_freelist_max_length = 100); + size_t fbo_freelist_max_length = 100); // Per context. ~ResourcePool(); // All remaining functions are intended for calls from EffectChain only. @@@ -69,36 -63,21 +69,36 @@@ // Allocate an FBO with the the given texture bound as a framebuffer attachment, // or fetch a previous used if possible. Unbinds GL_FRAMEBUFFER afterwards. // Keeps ownership of the FBO; you must call release_fbo() of deleting - // it when you no longer want it. You can get an appropriate context - // pointer from get_gl_context_identifier(). + // it when you no longer want it. // // NOTE: In principle, the FBO doesn't have a resolution or pixel format; // you can bind almost whatever texture you want to it. However, changing // textures can have an adverse effect on performance due to validation, // in particular on NVidia cards. Also, keep in mind that FBOs are not - // shareable across contexts. - GLuint create_fbo(void *context, GLuint texture_num); + // shareable across contexts, so you must have the context that's supposed + // to own the FBO current when you create or release it. + GLuint create_fbo(GLuint texture_num); void release_fbo(GLuint fbo_num); + // Informs the ResourcePool that the current context is going away soon, + // and that any resources held for it in the freelist should be deleted. + // + // You do not need to do this for the last context; the regular destructor + // will take care of that. This means that if you only ever use one + // thread/context, you never need to call this function. + void clean_context(); + private: // Delete the given program and both its shaders. void delete_program(GLuint program_num); + // Deletes all FBOs for the given context that belong to deleted textures. + void cleanup_unlinked_fbos(void *context); + + // Remove FBOs off the end of the freelist for , until it + // is no more than elements long. + void shrink_fbo_freelist(void *context, size_t max_length); + // Protects all the other elements in the class. pthread_mutex_t lock; @@@ -139,18 -118,19 +139,18 @@@ size_t texture_freelist_bytes; struct FBO { - void *context; - GLuint texture_num; + GLuint texture_num; // 0 means associated to a texture that has since been deleted. }; - // A mapping from FBO number to format details. This is filled if the - // FBO is given out to a client or on the freelist, but not if it is - // deleted from the freelist. - std::map fbo_formats; + // For each context, a mapping from FBO number to format details. This is + // filled if the FBO is given out to a client or on the freelist, but + // not if it is deleted from the freelist. + std::map, FBO> fbo_formats; - // A list of all FBOs that are release but not freed (most recently freed - // first). Once this reaches , the last element - // will be deleted. - std::list fbo_freelist; + // For each context, a list of all FBOs that are released but not freed + // (most recently freed first). Once this reaches , + // the last element will be deleted. + std::map > fbo_freelist; // See the caveats at the constructor. static size_t estimate_texture_size(const Texture2D &texture_format);