From 45955bca8c14dd8cac4fa922e45fb0f8be507d58 Mon Sep 17 00:00:00 2001 From: "Steinar H. Gunderson" Date: Wed, 19 Mar 2014 00:12:34 +0100 Subject: [PATCH] Reduce the amount of arithmetic in the BlurEffect shader a bit. MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit We did additions and subtractions with zero, which is sort of a waste on scalar architectures. Helps ever so slightly on the demo app on my NVidia card (3–4%). --- blur_effect.cpp | 25 ++++++++++--------------- blur_effect.frag | 20 +++++++++++++++----- effect_util.cpp | 11 +++++++++++ effect_util.h | 1 + 4 files changed, 37 insertions(+), 20 deletions(-) diff --git a/blur_effect.cpp b/blur_effect.cpp index 9397ca4..d43a7ca 100644 --- a/blur_effect.cpp +++ b/blur_effect.cpp @@ -108,7 +108,9 @@ SingleBlurPassEffect::SingleBlurPassEffect(BlurEffect *parent) string SingleBlurPassEffect::output_fragment_shader() { - return read_file("blur_effect.frag"); + char buf[256]; + sprintf(buf, "#define DIRECTION_VERTICAL %d\n", (direction == VERTICAL)); + return buf + read_file("blur_effect.frag"); } void SingleBlurPassEffect::set_gl_state(GLuint glsl_program_num, const string &prefix, unsigned *sampler_num) @@ -159,13 +161,11 @@ void SingleBlurPassEffect::set_gl_state(GLuint glsl_program_num, const string &p // // We pack the parameters into a float4: The relative sample coordinates // in (x,y), and the weight in z. w is unused. - float samples[4 * (NUM_TAPS / 2 + 1)]; + float samples[2 * (NUM_TAPS / 2 + 1)]; // Center sample. - samples[4 * 0 + 0] = 0.0f; - samples[4 * 0 + 1] = 0.0f; - samples[4 * 0 + 2] = weight[0]; - samples[4 * 0 + 3] = 0.0f; + samples[2 * 0 + 0] = 0.0f; + samples[2 * 0 + 1] = weight[0]; // All other samples. for (unsigned i = 1; i < NUM_TAPS / 2 + 1; ++i) { @@ -176,23 +176,18 @@ void SingleBlurPassEffect::set_gl_state(GLuint glsl_program_num, const string &p float offset, total_weight; combine_two_samples(w1, w2, &offset, &total_weight, NULL); - float x = 0.0f, y = 0.0f; - if (direction == HORIZONTAL) { - x = (base_pos + offset) / (float)width; + samples[2 * i + 0] = (base_pos + offset) / (float)width; } else if (direction == VERTICAL) { - y = (base_pos + offset) / (float)height; + samples[2 * i + 0] = (base_pos + offset) / (float)height; } else { assert(false); } - samples[4 * i + 0] = x; - samples[4 * i + 1] = y; - samples[4 * i + 2] = total_weight; - samples[4 * i + 3] = 0.0f; + samples[2 * i + 1] = total_weight; } - set_uniform_vec4_array(glsl_program_num, prefix, "samples", samples, NUM_TAPS / 2 + 1); + set_uniform_vec2_array(glsl_program_num, prefix, "samples", samples, NUM_TAPS / 2 + 1); } void SingleBlurPassEffect::clear_gl_state() diff --git a/blur_effect.frag b/blur_effect.frag index c6e4cf5..8853854 100644 --- a/blur_effect.frag +++ b/blur_effect.frag @@ -1,14 +1,24 @@ -// A simple unidirectional blur. +// A simple un.directional blur. +// DIRECTION_VERTICAL will be #defined to 1 if we are doing a vertical blur, +// 0 otherwise. #define NUM_TAPS 16 -uniform vec4 PREFIX(samples)[NUM_TAPS + 1]; +uniform vec2 PREFIX(samples)[NUM_TAPS + 1]; vec4 FUNCNAME(vec2 tc) { - vec4 sum = vec4(PREFIX(samples)[0].z) * INPUT(tc); + vec4 sum = vec4(PREFIX(samples)[0].y) * INPUT(tc); for (int i = 1; i < NUM_TAPS + 1; ++i) { - vec4 sample = PREFIX(samples)[i]; - sum += vec4(sample.z) * (INPUT(tc - sample.xy) + INPUT(tc + sample.xy)); + vec2 sample = PREFIX(samples)[i]; + vec2 sample1_tc = tc, sample2_tc = tc; +#if DIRECTION_VERTICAL + sample1_tc.y -= sample.x; + sample2_tc.y += sample.x; +#else + sample1_tc.x -= sample.x; + sample2_tc.x += sample.x; +#endif + sum += vec4(sample.y) * (INPUT(sample1_tc) + INPUT(sample2_tc)); } return sum; } diff --git a/effect_util.cpp b/effect_util.cpp index 35a94a7..d485d46 100644 --- a/effect_util.cpp +++ b/effect_util.cpp @@ -69,6 +69,17 @@ void set_uniform_vec4(GLuint glsl_program_num, const string &prefix, const strin check_error(); } +void set_uniform_vec2_array(GLuint glsl_program_num, const string &prefix, const string &key, const float *values, size_t num_values) +{ + GLint location = get_uniform_location(glsl_program_num, prefix, key); + if (location == -1) { + return; + } + check_error(); + glUniform2fv(location, num_values, values); + check_error(); +} + void set_uniform_vec4_array(GLuint glsl_program_num, const string &prefix, const string &key, const float *values, size_t num_values) { GLint location = get_uniform_location(glsl_program_num, prefix, key); diff --git a/effect_util.h b/effect_util.h index 65bd315..5420ce5 100644 --- a/effect_util.h +++ b/effect_util.h @@ -26,6 +26,7 @@ void set_uniform_float(GLuint glsl_program_num, const std::string &prefix, const void set_uniform_vec2(GLuint glsl_program_num, const std::string &prefix, const std::string &key, const float *values); void set_uniform_vec3(GLuint glsl_program_num, const std::string &prefix, const std::string &key, const float *values); void set_uniform_vec4(GLuint glsl_program_num, const std::string &prefix, const std::string &key, const float *values); +void set_uniform_vec2_array(GLuint glsl_program_num, const std::string &prefix, const std::string &key, const float *values, size_t num_values); void set_uniform_vec4_array(GLuint glsl_program_num, const std::string &prefix, const std::string &key, const float *values, size_t num_values); void set_uniform_mat3(GLuint glsl_program_num, const std::string &prefix, const std::string &key, const Eigen::Matrix3d &matrix); -- 2.39.2