From: Steinar H. Gunderson Date: Fri, 28 Mar 2014 20:15:05 +0000 (+0100) Subject: Merge branch 'epoxy' X-Git-Tag: 1.1~12 X-Git-Url: https://git.sesse.net/?a=commitdiff_plain;h=7ea0b3a5be9bafaa2d1fa5a17ce285a725ce132b;hp=d2977599b2482c4832b135fa230e8dfb1cf493df;p=movit Merge branch 'epoxy' Conflicts: effect_chain.cpp resource_pool.cpp --- diff --git a/Makefile.in b/Makefile.in index 50431c2..05a80ca 100644 --- a/Makefile.in +++ b/Makefile.in @@ -6,8 +6,8 @@ GTEST_DIR ?= /usr/src/gtest # strive towards having a rock-stable ABI, but at least the soversion will increase # whenever it breaks, so that you will not have silent failures, and distribution package # management can run its course. -movit_ltversion = 1:3:0 -movit_version = 1.0.3 +movit_ltversion = 2:0:0 +movit_version = 1.1 prefix = @prefix@ exec_prefix = @exec_prefix@ diff --git a/blur_effect.frag b/blur_effect.frag index 8853854..afe9f2e 100644 --- a/blur_effect.frag +++ b/blur_effect.frag @@ -22,3 +22,5 @@ vec4 FUNCNAME(vec2 tc) { } return sum; } + +#undef DIRECTION_VERTICAL diff --git a/effect_chain.cpp b/effect_chain.cpp index 7bce60f..0a01bd3 100644 --- a/effect_chain.cpp +++ b/effect_chain.cpp @@ -53,15 +53,6 @@ EffectChain::~EffectChain() delete nodes[i]; } for (unsigned i = 0; i < phases.size(); ++i) { - glBindVertexArray(phases[i]->vao); - check_error(); - - cleanup_vertex_attribute(phases[i]->glsl_program_num, "position", phases[i]->position_vbo); - cleanup_vertex_attribute(phases[i]->glsl_program_num, "texcoord", phases[i]->texcoord_vbo); - - glBindVertexArray(0); - check_error(); - resource_pool->release_glsl_program(phases[i]->glsl_program_num); delete phases[i]; } @@ -292,27 +283,6 @@ void EffectChain::compile_glsl_program(Phase *phase) string vert_shader = read_version_dependent_file("vs", "vert"); phase->glsl_program_num = resource_pool->compile_glsl_program(vert_shader, frag_shader); - - // Prepare the geometry for the fullscreen quad used in this phase. - // (We have separate VAOs per shader, since the bindings can in theory - // be different.) - float vertices[] = { - 0.0f, 1.0f, - 0.0f, 0.0f, - 1.0f, 1.0f, - 1.0f, 0.0f - }; - - glGenVertexArrays(1, &phase->vao); - check_error(); - glBindVertexArray(phase->vao); - check_error(); - - phase->position_vbo = fill_vertex_attribute(phase->glsl_program_num, "position", 2, GL_FLOAT, sizeof(vertices), vertices); - phase->texcoord_vbo = fill_vertex_attribute(phase->glsl_program_num, "texcoord", 2, GL_FLOAT, sizeof(vertices), vertices); // Same as vertices. - - glBindVertexArray(0); - check_error(); } // Construct GLSL programs, starting at the given effect and following @@ -1485,8 +1455,6 @@ void EffectChain::render_to_fbo(GLuint dest_fbo, unsigned width, unsigned height glBindFramebuffer(GL_FRAMEBUFFER, 0); check_error(); - glBindVertexArray(0); - check_error(); glUseProgram(0); check_error(); } @@ -1526,8 +1494,7 @@ void EffectChain::execute_phase(Phase *phase, bool last_phase, mapcreate_fbo(context, (*output_textures)[phase]); + fbo = resource_pool->create_fbo((*output_textures)[phase]); glBindFramebuffer(GL_FRAMEBUFFER, fbo); glViewport(0, 0, phase->output_width, phase->output_height); } @@ -1548,11 +1515,32 @@ void EffectChain::execute_phase(Phase *phase, bool last_phase, mapvao); + // Now draw! + float vertices[] = { + 0.0f, 1.0f, + 0.0f, 0.0f, + 1.0f, 1.0f, + 1.0f, 0.0f + }; + + GLuint vao; + glGenVertexArrays(1, &vao); + check_error(); + glBindVertexArray(vao); check_error(); + + GLuint position_vbo = fill_vertex_attribute(glsl_program_num, "position", 2, GL_FLOAT, sizeof(vertices), vertices); + GLuint texcoord_vbo = fill_vertex_attribute(glsl_program_num, "texcoord", 2, GL_FLOAT, sizeof(vertices), vertices); // Same as vertices. + glDrawArrays(GL_TRIANGLE_STRIP, 0, 4); check_error(); + cleanup_vertex_attribute(glsl_program_num, "position", position_vbo); + cleanup_vertex_attribute(glsl_program_num, "texcoord", texcoord_vbo); + + glUseProgram(0); + check_error(); + for (unsigned i = 0; i < phase->effects.size(); ++i) { Node *node = phase->effects[i]; node->effect->clear_gl_state(); @@ -1561,6 +1549,9 @@ void EffectChain::execute_phase(Phase *phase, bool last_phase, maprelease_fbo(fbo); } + + glDeleteVertexArrays(1, &vao); + check_error(); } void EffectChain::setup_rtt_sampler(GLuint glsl_program_num, int sampler_num, const string &effect_id, bool use_mipmaps) diff --git a/effect_chain.h b/effect_chain.h index e7d99d5..593232c 100644 --- a/effect_chain.h +++ b/effect_chain.h @@ -16,6 +16,10 @@ // but if so, the threads' contexts need to be set up to share resources, since // the EffectChain holds textures and other OpenGL objects that are tied to the // context. +// +// Memory management (only relevant if you use multiple contexts): +// See corresponding comment in resource_pool.h. This holds even if you don't +// allocate your own ResourcePool, but let EffectChain hold its own. #include #include @@ -99,13 +103,6 @@ struct Phase { // Identifier used to create unique variables in GLSL. // Unique per-phase to increase cacheability of compiled shaders. std::map effect_ids; - - // The geometry needed to draw this quad, bound to the vertex array - // object. (Seemingly it's actually a win not to upload geometry every - // frame, even for something as small as a quad, due to fewer state - // changes.) - GLuint vao; - GLuint position_vbo, texcoord_vbo; }; class EffectChain { diff --git a/fft_pass_effect.cpp b/fft_pass_effect.cpp index ee0b983..48e2677 100644 --- a/fft_pass_effect.cpp +++ b/fft_pass_effect.cpp @@ -14,7 +14,12 @@ namespace movit { FFTPassEffect::FFTPassEffect() : input_width(1280), input_height(720), - direction(HORIZONTAL) + direction(HORIZONTAL), + last_fft_size(-1), + last_direction(INVALID), + last_pass_number(-1), + last_inverse(-1), + last_input_size(-1) { register_int("fft_size", &fft_size); register_int("direction", (int *)&direction); @@ -39,8 +44,6 @@ void FFTPassEffect::set_gl_state(GLuint glsl_program_num, const string &prefix, { Effect::set_gl_state(glsl_program_num, prefix, sampler_num); - int input_size = (direction == VERTICAL) ? input_height : input_width; - // This is needed because it counteracts the precision issues we get // because we sample the input texture with normalized coordinates // (especially when the repeat count along the axis is not a power of @@ -54,6 +57,43 @@ void FFTPassEffect::set_gl_state(GLuint glsl_program_num, const string &prefix, glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); check_error(); + // Because of the memory layout (see below) and because we use offsets, + // the support texture values for many consecutive values will be + // the same. Thus, we can store a smaller texture (giving a small + // performance boost) and just sample it with NEAREST. Also, this + // counteracts any precision issues we might get from linear + // interpolation. + glActiveTexture(GL_TEXTURE0 + *sampler_num); + check_error(); + glBindTexture(GL_TEXTURE_2D, tex); + check_error(); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + check_error(); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + check_error(); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT); + check_error(); + + int input_size = (direction == VERTICAL) ? input_height : input_width; + if (last_fft_size != fft_size || + last_direction != direction || + last_pass_number != pass_number || + last_inverse != inverse || + last_input_size != input_size) { + generate_support_texture(); + } + + set_uniform_int(glsl_program_num, prefix, "support_tex", *sampler_num); + ++*sampler_num; + + assert(input_size % fft_size == 0); + set_uniform_float(glsl_program_num, prefix, "num_repeats", input_size / fft_size); +} + +void FFTPassEffect::generate_support_texture() +{ + int input_size = (direction == VERTICAL) ? input_height : input_width; + // The memory layout follows figure 5.2 on page 25 of // http://gpuwave.sesse.net/gpuwave.pdf -- it can be a bit confusing // at first, but is classically explained more or less as follows: @@ -79,8 +119,8 @@ void FFTPassEffect::set_gl_state(GLuint glsl_program_num, const string &prefix, // bit, so the stride is 8, and so on. assert((fft_size & (fft_size - 1)) == 0); // Must be power of two. - fp16_int_t *tmp = new fp16_int_t[fft_size * 4]; int subfft_size = 1 << pass_number; + fp16_int_t *tmp = new fp16_int_t[subfft_size * 4]; double mulfac; if (inverse) { mulfac = 2.0 * M_PI; @@ -91,9 +131,8 @@ void FFTPassEffect::set_gl_state(GLuint glsl_program_num, const string &prefix, assert((fft_size & (fft_size - 1)) == 0); // Must be power of two. assert(fft_size % subfft_size == 0); int stride = fft_size / subfft_size; - for (int i = 0; i < fft_size; ++i) { - int k = i / stride; // Element number within this sub-FFT. - int offset = i % stride; // Sub-FFT number. + for (int i = 0; i < subfft_size; i++) { + int k = i; double twiddle_real, twiddle_imag; if (k < subfft_size / 2) { @@ -116,33 +155,22 @@ void FFTPassEffect::set_gl_state(GLuint glsl_program_num, const string &prefix, // for using offsets and not direct coordinates as in GPUwave // is that we can have multiple FFTs along the same line, // and want to reuse the support texture by repeating it. - int base = k * stride * 2 + offset; + int base = k * stride * 2; int support_texture_index = i; int src1 = base; int src2 = base + stride; + double sign = 1.0; if (direction == FFTPassEffect::VERTICAL) { // Compensate for OpenGL's bottom-left convention. - support_texture_index = fft_size - support_texture_index - 1; - src1 = fft_size - src1 - 1; - src2 = fft_size - src2 - 1; + support_texture_index = subfft_size - support_texture_index - 1; + sign = -1.0; } - tmp[support_texture_index * 4 + 0] = fp64_to_fp16((src1 - support_texture_index) / double(input_size)); - tmp[support_texture_index * 4 + 1] = fp64_to_fp16((src2 - support_texture_index) / double(input_size)); + tmp[support_texture_index * 4 + 0] = fp64_to_fp16(sign * (src1 - i * stride) / double(input_size)); + tmp[support_texture_index * 4 + 1] = fp64_to_fp16(sign * (src2 - i * stride) / double(input_size)); tmp[support_texture_index * 4 + 2] = fp64_to_fp16(twiddle_real); tmp[support_texture_index * 4 + 3] = fp64_to_fp16(twiddle_imag); } - glActiveTexture(GL_TEXTURE0 + *sampler_num); - check_error(); - glBindTexture(GL_TEXTURE_2D, tex); - check_error(); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); - check_error(); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); - check_error(); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_REPEAT); - check_error(); - // Supposedly FFTs are very sensitive to inaccuracies in the twiddle factors, // at least according to a paper by Schatzman (see gpuwave.pdf reference [30] // for the full reference); however, practical testing indicates that it's @@ -152,16 +180,16 @@ void FFTPassEffect::set_gl_state(GLuint glsl_program_num, const string &prefix, // which gives a nice speed boost. // // Note that the source coordinates become somewhat less accurate too, though. - glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA16F, fft_size, 1, 0, GL_RGBA, GL_HALF_FLOAT, tmp); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA16F, subfft_size, 1, 0, GL_RGBA, GL_HALF_FLOAT, tmp); check_error(); delete[] tmp; - set_uniform_int(glsl_program_num, prefix, "support_tex", *sampler_num); - ++*sampler_num; - - assert(input_size % fft_size == 0); - set_uniform_float(glsl_program_num, prefix, "num_repeats", input_size / fft_size); + last_fft_size = fft_size; + last_direction = direction; + last_pass_number = pass_number; + last_inverse = inverse; + last_input_size = input_size; } } // namespace movit diff --git a/fft_pass_effect.frag b/fft_pass_effect.frag index 607f86a..eb26f33 100644 --- a/fft_pass_effect.frag +++ b/fft_pass_effect.frag @@ -22,3 +22,5 @@ vec4 FUNCNAME(vec2 tc) { // where * is complex multiplication. return c1 + support.z * c2 + support.w * vec4(-c2.y, c2.x, -c2.w, c2.z); } + +#undef DIRECTION_VERTICAL diff --git a/fft_pass_effect.h b/fft_pass_effect.h index 90e88bc..fbf4511 100644 --- a/fft_pass_effect.h +++ b/fft_pass_effect.h @@ -101,16 +101,25 @@ public: virtual void inform_added(EffectChain *chain) { this->chain = chain; } - enum Direction { HORIZONTAL = 0, VERTICAL = 1 }; + enum Direction { INVALID = -1, HORIZONTAL = 0, VERTICAL = 1 }; private: + void generate_support_texture(); + EffectChain *chain; int input_width, input_height; GLuint tex; + int fft_size; Direction direction; int pass_number; // From 1..n. int inverse; // 0 = forward (FFT), 1 = reverse (IFFT). + + int last_fft_size; + Direction last_direction; + int last_pass_number; + int last_inverse; + int last_input_size; }; } // namespace movit diff --git a/fft_pass_effect_test.cpp b/fft_pass_effect_test.cpp index 015847a..bce91ce 100644 --- a/fft_pass_effect_test.cpp +++ b/fft_pass_effect_test.cpp @@ -126,7 +126,7 @@ TEST(FFTPassEffectTest, SingleFrequency) { TEST(FFTPassEffectTest, Repeat) { srand(12345); - for (int fft_size = 2; fft_size < 512; fft_size *= 2) { + for (int fft_size = 2; fft_size <= 128; fft_size *= 2) { const int num_repeats = 31; // Prime, to make things more challenging. float data[num_repeats * fft_size * 4]; float expected_data[num_repeats * fft_size * 4], out_data[num_repeats * fft_size * 4]; diff --git a/luma_mix_effect.cpp b/luma_mix_effect.cpp index e34d87f..92c599e 100644 --- a/luma_mix_effect.cpp +++ b/luma_mix_effect.cpp @@ -7,10 +7,11 @@ using namespace std; namespace movit { LumaMixEffect::LumaMixEffect() - : transition_width(1.0f), progress(0.5f) + : transition_width(1.0f), progress(0.5f), inverse(0) { register_float("transition_width", &transition_width); register_float("progress", &progress); + register_int("inverse", &inverse); } string LumaMixEffect::output_fragment_shader() @@ -22,6 +23,7 @@ void LumaMixEffect::set_gl_state(GLuint glsl_program_num, const string &prefix, { Effect::set_gl_state(glsl_program_num, prefix, sampler_num); set_uniform_float(glsl_program_num, prefix, "progress_mul_w_plus_one", progress * (transition_width + 1.0)); + set_uniform_int(glsl_program_num, prefix, "inverse", inverse); } } // namespace movit diff --git a/luma_mix_effect.frag b/luma_mix_effect.frag index bf0833e..c8a05c1 100644 --- a/luma_mix_effect.frag +++ b/luma_mix_effect.frag @@ -1,4 +1,5 @@ uniform float PREFIX(progress_mul_w_plus_one); +uniform bool PREFIX(inverse); vec4 FUNCNAME(vec2 tc) { vec4 first = INPUT1(tc); @@ -36,8 +37,11 @@ vec4 FUNCNAME(vec2 tc) { // So clearly, it should move (w+1) units to the right, and apart from that // just stay a simple mapping. float w = PREFIX(transition_width); - float luma = INPUT3(tc).x * w; - float m = clamp((luma - w) + PREFIX(progress_mul_w_plus_one), 0.0, 1.0); + float luma = INPUT3(tc).x; + if (PREFIX(inverse)) { + luma = 1.0 - luma; + } + float m = clamp((luma * w - w) + PREFIX(progress_mul_w_plus_one), 0.0, 1.0); return mix(first, second, m); } diff --git a/luma_mix_effect.h b/luma_mix_effect.h index 8bd3c50..ce890df 100644 --- a/luma_mix_effect.h +++ b/luma_mix_effect.h @@ -2,8 +2,10 @@ #define _MOVIT_LUMA_MIX_EFFECT_H 1 // Fade between two images based on a third monochrome one; lighter pixels -// will be faded before darker pixels. This allows a wide range of different -// video wipes implemented using a single effect. +// will be faded before darker pixels (unless the inverse flag is set, +// in which case darker pixels will be faded before lighter pixels). +// This allows a wide range of different video wipes implemented using +// a single effect. // // Note that despite the name, the third input's _red_ channel is what's used // for transitions; there is no luma calculation done. If you need that, @@ -28,6 +30,7 @@ public: private: float transition_width, progress; + int inverse; // 0 or 1. }; } // namespace movit diff --git a/luma_mix_effect_test.cpp b/luma_mix_effect_test.cpp index 4a506f5..2b6de15 100644 --- a/luma_mix_effect_test.cpp +++ b/luma_mix_effect_test.cpp @@ -96,4 +96,44 @@ TEST(LumaMixEffectTest, SoftWipeHalfWayThrough) { expect_equal(expected_data, out_data, 2, 2); } +TEST(LumaMixEffectTest, Inverse) { + float data_a[] = { + 0.0f, 0.25f, + 0.75f, 1.0f, + }; + float data_b[] = { + 1.0f, 0.5f, + 0.65f, 0.6f, + }; + float data_luma[] = { + 0.0f, 0.25f, + 0.5f, 0.75f, + }; + + EffectChainTester tester(data_a, 2, 2, FORMAT_GRAYSCALE, COLORSPACE_sRGB, GAMMA_LINEAR); + Effect *input1 = tester.get_chain()->last_added_effect(); + Effect *input2 = tester.add_input(data_b, FORMAT_GRAYSCALE, COLORSPACE_sRGB, GAMMA_LINEAR); + Effect *input3 = tester.add_input(data_luma, FORMAT_GRAYSCALE, COLORSPACE_sRGB, GAMMA_LINEAR); + + Effect *luma_mix_effect = tester.get_chain()->add_effect(new LumaMixEffect(), input1, input2, input3); + ASSERT_TRUE(luma_mix_effect->set_float("transition_width", 100000.0f)); + ASSERT_TRUE(luma_mix_effect->set_int("inverse", 1)); + + // Inverse is not the same as reverse, so progress=0 should behave identically + // as HardWipe, ie. everything should be from A. + float out_data[4]; + ASSERT_TRUE(luma_mix_effect->set_float("progress", 0.0f)); + tester.run(out_data, GL_RED, COLORSPACE_sRGB, GAMMA_LINEAR); + expect_equal(data_a, out_data, 2, 2); + + // Lower two from A, the rest from B. + float expected_data_049[] = { + 1.0f, 0.5f, + 0.75f, 1.0f, + }; + ASSERT_TRUE(luma_mix_effect->set_float("progress", 0.49f)); + tester.run(out_data, GL_RED, COLORSPACE_sRGB, GAMMA_LINEAR); + expect_equal(expected_data_049, out_data, 2, 2); +} + } // namespace movit diff --git a/resource_pool.cpp b/resource_pool.cpp index 2298c3c..5abc1b2 100644 --- a/resource_pool.cpp +++ b/resource_pool.cpp @@ -52,15 +52,29 @@ ResourcePool::~ResourcePool() assert(texture_formats.empty()); assert(texture_freelist_bytes == 0); - for (list::const_iterator freelist_it = fbo_freelist.begin(); - freelist_it != fbo_freelist.end(); - ++freelist_it) { - GLuint free_fbo_num = *freelist_it; - assert(fbo_formats.count(free_fbo_num) != 0); - fbo_formats.erase(free_fbo_num); - glDeleteFramebuffers(1, &free_fbo_num); - check_error(); + void *context = get_gl_context_identifier(); + cleanup_unlinked_fbos(context); + + for (map >::iterator context_it = fbo_freelist.begin(); + context_it != fbo_freelist.end(); + ++context_it) { + if (context_it->first != context) { + // If this does not hold, the client should have called clean_context() earlier. + assert(context_it->second.empty()); + continue; + } + for (list::const_iterator freelist_it = context_it->second.begin(); + freelist_it != context_it->second.end(); + ++freelist_it) { + pair key(context, *freelist_it); + GLuint free_fbo_num = *freelist_it; + assert(fbo_formats.count(key) != 0); + fbo_formats.erase(key); + glDeleteFramebuffers(1, &free_fbo_num); + check_error(); + } } + assert(fbo_formats.empty()); } @@ -285,38 +299,40 @@ void ResourcePool::release_2d_texture(GLuint texture_num) glDeleteTextures(1, &free_texture_num); check_error(); - // Delete any FBO related to this texture. - for (list::iterator fbo_freelist_it = fbo_freelist.begin(); - fbo_freelist_it != fbo_freelist.end(); ) { - GLuint fbo_num = *fbo_freelist_it; - map::const_iterator format_it = fbo_formats.find(fbo_num); - assert(format_it != fbo_formats.end()); + // Unlink any lingering FBO related to this texture. We might + // not be in the right context, so don't delete it right away; + // the cleanup in release_fbo() (which calls cleanup_unlinked_fbos()) + // will take care of actually doing that later. + for (map, FBO>::iterator format_it = fbo_formats.begin(); + format_it != fbo_formats.end(); + ++format_it) { if (format_it->second.texture_num == free_texture_num) { - glDeleteFramebuffers(1, &fbo_num); - fbo_freelist.erase(fbo_freelist_it++); - } else { - ++fbo_freelist_it; + format_it->second.texture_num = 0; } } } pthread_mutex_unlock(&lock); } -GLuint ResourcePool::create_fbo(void *context, GLuint texture_num) +GLuint ResourcePool::create_fbo(GLuint texture_num) { + void *context = get_gl_context_identifier(); + pthread_mutex_lock(&lock); - // See if there's an FBO on the freelist we can use. - for (list::iterator freelist_it = fbo_freelist.begin(); - freelist_it != fbo_freelist.end(); - ++freelist_it) { - GLuint fbo_num = *freelist_it; - map::const_iterator format_it = fbo_formats.find(fbo_num); - assert(format_it != fbo_formats.end()); - if (format_it->second.context == context && - format_it->second.texture_num == texture_num) { - fbo_freelist.erase(freelist_it); - pthread_mutex_unlock(&lock); - return fbo_num; + if (fbo_freelist.count(context) != 0) { + // See if there's an FBO on the freelist we can use. + for (list::iterator freelist_it = fbo_freelist[context].begin(); + freelist_it != fbo_freelist[context].end(); + ++freelist_it) { + GLuint fbo_num = *freelist_it; + map, FBO>::const_iterator format_it = + fbo_formats.find(make_pair(context, fbo_num)); + assert(format_it != fbo_formats.end()); + if (format_it->second.texture_num == texture_num) { + fbo_freelist[context].erase(freelist_it); + pthread_mutex_unlock(&lock); + return fbo_num; + } } } @@ -339,10 +355,10 @@ GLuint ResourcePool::create_fbo(void *context, GLuint texture_num) check_error(); FBO fbo_format; - fbo_format.context = context; fbo_format.texture_num = texture_num; - assert(fbo_formats.count(fbo_num) == 0); - fbo_formats.insert(make_pair(fbo_num, fbo_format)); + pair key(context, fbo_num); + assert(fbo_formats.count(key) == 0); + fbo_formats.insert(make_pair(key, fbo_format)); pthread_mutex_unlock(&lock); return fbo_num; @@ -350,19 +366,58 @@ GLuint ResourcePool::create_fbo(void *context, GLuint texture_num) void ResourcePool::release_fbo(GLuint fbo_num) { + void *context = get_gl_context_identifier(); + pthread_mutex_lock(&lock); - fbo_freelist.push_front(fbo_num); - assert(fbo_formats.count(fbo_num) != 0); - - while (fbo_freelist.size() > fbo_freelist_max_length) { - GLuint free_fbo_num = fbo_freelist.front(); - fbo_freelist.pop_front(); - assert(fbo_formats.count(free_fbo_num) != 0); - fbo_formats.erase(free_fbo_num); + fbo_freelist[context].push_front(fbo_num); + assert(fbo_formats.count(make_pair(context, fbo_num)) != 0); + + // Now that we're in this context, free up any FBOs that are connected + // to deleted textures (in release_2d_texture). + cleanup_unlinked_fbos(context); + + shrink_fbo_freelist(context, fbo_freelist_max_length); + pthread_mutex_unlock(&lock); +} + +void ResourcePool::clean_context() +{ + void *context = get_gl_context_identifier(); + + // Currently, we only need to worry about FBOs, as they are the only + // non-shareable resource we hold. + shrink_fbo_freelist(context, 0); + fbo_freelist.erase(context); +} + +void ResourcePool::cleanup_unlinked_fbos(void *context) +{ + for (list::iterator freelist_it = fbo_freelist[context].begin(); + freelist_it != fbo_freelist[context].end(); ) { + GLuint fbo_num = *freelist_it; + pair key(context, fbo_num); + assert(fbo_formats.count(key) != 0); + if (fbo_formats[key].texture_num == 0) { + glDeleteFramebuffers(1, &fbo_num); + check_error(); + fbo_freelist[context].erase(freelist_it++); + } else { + freelist_it++; + } + } +} + +void ResourcePool::shrink_fbo_freelist(void *context, size_t max_length) +{ + while (fbo_freelist[context].size() > max_length) { + GLuint free_fbo_num = fbo_freelist[context].back(); + pair key(context, free_fbo_num); + fbo_freelist[context].pop_back(); + assert(fbo_formats.count(key) != 0); + fbo_formats.erase(key); glDeleteFramebuffers(1, &free_fbo_num); check_error(); } - pthread_mutex_unlock(&lock); } size_t ResourcePool::estimate_texture_size(const Texture2D &texture_format) diff --git a/resource_pool.h b/resource_pool.h index 7029fbf..a331f4f 100644 --- a/resource_pool.h +++ b/resource_pool.h @@ -15,6 +15,12 @@ // Thread-safety: All functions except the constructor and destructor can be // safely called from multiple threads at the same time, provided they have // separate (but sharing) OpenGL contexts. +// +// Memory management (only relevant if you use multiple contexts): Some objects, +// like FBOs, are not shareable across contexts, and can only be deleted from +// the context they were created in. Thus, you will need to tell the +// ResourcePool explicitly if you delete a context, or they will leak (and the +// ResourcePool destructor will assert-fail). See clean_context(). #include #include @@ -41,7 +47,7 @@ public: // twice this estimate or more. ResourcePool(size_t program_freelist_max_length = 100, size_t texture_freelist_max_bytes = 100 << 20, // 100 MB. - size_t fbo_freelist_max_length = 100); + size_t fbo_freelist_max_length = 100); // Per context. ~ResourcePool(); // All remaining functions are intended for calls from EffectChain only. @@ -63,21 +69,36 @@ public: // Allocate an FBO with the the given texture bound as a framebuffer attachment, // or fetch a previous used if possible. Unbinds GL_FRAMEBUFFER afterwards. // Keeps ownership of the FBO; you must call release_fbo() of deleting - // it when you no longer want it. You can get an appropriate context - // pointer from get_gl_context_identifier(). + // it when you no longer want it. // // NOTE: In principle, the FBO doesn't have a resolution or pixel format; // you can bind almost whatever texture you want to it. However, changing // textures can have an adverse effect on performance due to validation, // in particular on NVidia cards. Also, keep in mind that FBOs are not - // shareable across contexts. - GLuint create_fbo(void *context, GLuint texture_num); + // shareable across contexts, so you must have the context that's supposed + // to own the FBO current when you create or release it. + GLuint create_fbo(GLuint texture_num); void release_fbo(GLuint fbo_num); + // Informs the ResourcePool that the current context is going away soon, + // and that any resources held for it in the freelist should be deleted. + // + // You do not need to do this for the last context; the regular destructor + // will take care of that. This means that if you only ever use one + // thread/context, you never need to call this function. + void clean_context(); + private: // Delete the given program and both its shaders. void delete_program(GLuint program_num); + // Deletes all FBOs for the given context that belong to deleted textures. + void cleanup_unlinked_fbos(void *context); + + // Remove FBOs off the end of the freelist for , until it + // is no more than elements long. + void shrink_fbo_freelist(void *context, size_t max_length); + // Protects all the other elements in the class. pthread_mutex_t lock; @@ -118,19 +139,18 @@ private: size_t texture_freelist_bytes; struct FBO { - void *context; - GLuint texture_num; + GLuint texture_num; // 0 means associated to a texture that has since been deleted. }; - // A mapping from FBO number to format details. This is filled if the - // FBO is given out to a client or on the freelist, but not if it is - // deleted from the freelist. - std::map fbo_formats; + // For each context, a mapping from FBO number to format details. This is + // filled if the FBO is given out to a client or on the freelist, but + // not if it is deleted from the freelist. + std::map, FBO> fbo_formats; - // A list of all FBOs that are release but not freed (most recently freed - // first). Once this reaches , the last element - // will be deleted. - std::list fbo_freelist; + // For each context, a list of all FBOs that are released but not freed + // (most recently freed first). Once this reaches , + // the last element will be deleted. + std::map > fbo_freelist; // See the caveats at the constructor. static size_t estimate_texture_size(const Texture2D &texture_format); diff --git a/slice_effect.frag b/slice_effect.frag index 59e3cc1..e77c9fa 100644 --- a/slice_effect.frag +++ b/slice_effect.frag @@ -25,3 +25,5 @@ vec4 FUNCNAME(vec2 tc) { return INPUT(vec2(input_coord, tc.y)); #endif } + +#undef DIRECTION_VERTICAL