From: Steinar H. Gunderson Date: Sun, 19 Jan 2014 22:53:18 +0000 (+0100) Subject: Round explicitly after dithering, for GPUs that don't do it properly themselves. X-Git-Tag: 1.0~78 X-Git-Url: https://git.sesse.net/?p=movit;a=commitdiff_plain;h=271fa61d1251144b5558555ec9873e9f24a13a70;ds=sidebyside Round explicitly after dithering, for GPUs that don't do it properly themselves. This was causing unit test failures in the DitherEffect unit test both on ATI and nVidia GPUs; Intel also rounds somewhat inaccurately, but much, much better, so the extra code won't be activated for them. I think this might be driver-dependent, but we will detect it correctly in any case. --- diff --git a/demo.cpp b/demo.cpp index cd9fe9f..a425566 100644 --- a/demo.cpp +++ b/demo.cpp @@ -180,6 +180,8 @@ int main(int argc, char **argv) init_movit(".", MOVIT_DEBUG_ON); printf("GPU texture subpixel precision: about %.1f bits\n", log2(1.0f / movit_texel_subpixel_precision)); + printf("Wrongly rounded x+0.48 or x+0.52 values: %d/510\n", + movit_num_wrongly_rounded); unsigned img_w, img_h; unsigned char *src_img = load_image(argc > 1 ? argv[1] : "blg_wheels_woman_1.jpg", &img_w, &img_h); diff --git a/dither_effect.cpp b/dither_effect.cpp index 1554356..4643d07 100644 --- a/dither_effect.cpp +++ b/dither_effect.cpp @@ -4,6 +4,7 @@ #include "dither_effect.h" #include "effect_util.h" +#include "init.h" #include "util.h" namespace { @@ -40,7 +41,9 @@ DitherEffect::~DitherEffect() std::string DitherEffect::output_fragment_shader() { - return read_file("dither_effect.frag"); + char buf[256]; + sprintf(buf, "#define NEED_EXPLICIT_ROUND %d\n", (movit_num_wrongly_rounded > 0)); + return buf + read_file("dither_effect.frag"); } void DitherEffect::update_texture(GLuint glsl_program_num, const std::string &prefix, unsigned *sampler_num) @@ -110,4 +113,9 @@ void DitherEffect::set_gl_state(GLuint glsl_program_num, const std::string &pref // we don't have to worry about it. float tc_scale[] = { float(width) / float(texture_width), float(height) / float(texture_height) }; set_uniform_vec2(glsl_program_num, prefix, "tc_scale", tc_scale); + + // Used if the shader needs to do explicit rounding. + int round_fac = (1 << num_bits) - 1; + set_uniform_float(glsl_program_num, prefix, "round_fac", round_fac); + set_uniform_float(glsl_program_num, prefix, "inv_round_fac", 1.0f / round_fac); } diff --git a/dither_effect.frag b/dither_effect.frag index 930bd94..f9c6ad1 100644 --- a/dither_effect.frag +++ b/dither_effect.frag @@ -1,10 +1,18 @@ uniform sampler2D PREFIX(dither_tex); uniform vec2 PREFIX(tc_scale); +uniform float PREFIX(round_fac), PREFIX(inv_round_fac); vec4 FUNCNAME(vec2 tc) { // We also choose to dither alpha, just in case. // Maybe it should in theory have a separate dither, - // but I doubt it matters much. We currently don't - // really handle alpha in any case. - return INPUT(tc) + texture2D(PREFIX(dither_tex), tc * PREFIX(tc_scale)).xxxx; + // but I doubt it matters much. + vec4 result = INPUT(tc) + texture2D(PREFIX(dither_tex), tc * PREFIX(tc_scale)).xxxx; + + // NEED_EXPLICIT_ROUND will be #defined to 1 if the GPU has inaccurate + // fp32 -> int8 framebuffer rounding, and 0 otherwise. +#if NEED_EXPLICIT_ROUND + result = round(result * vec4(PREFIX(round_fac))) * vec4(PREFIX(inv_round_fac)); +#endif + + return result; } diff --git a/init.cpp b/init.cpp index f6dd904..396cd8b 100644 --- a/init.cpp +++ b/init.cpp @@ -11,6 +11,7 @@ bool movit_initialized = false; MovitDebugLevel movit_debug_level = MOVIT_DEBUG_ON; float movit_texel_subpixel_precision; bool movit_srgb_textures_supported; +int movit_num_wrongly_rounded; // The rules for objects with nontrivial constructors in static scope // are somewhat convoluted, and easy to mess up. We simply have a @@ -131,6 +132,122 @@ void measure_texel_subpixel_precision() check_error(); } +void measure_roundoff_problems() +{ + // Generate a destination texture to render to, and an FBO. + GLuint dst_texnum, fbo; + + glGenTextures(1, &dst_texnum); + check_error(); + glBindTexture(GL_TEXTURE_2D, dst_texnum); + check_error(); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, 512, 1, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); + check_error(); + + glGenFramebuffers(1, &fbo); + check_error(); + glBindFramebuffer(GL_FRAMEBUFFER, fbo); + check_error(); + glFramebufferTexture2D( + GL_FRAMEBUFFER, + GL_COLOR_ATTACHMENT0, + GL_TEXTURE_2D, + dst_texnum, + 0); + check_error(); + + // Now generate a texture where every value except the last should be + // rounded up to the next one. However, there are cards (in highly + // common use) that can't do this right, for unknown reasons. + GLuint src_texnum; + float texdata[512]; + for (int i = 0; i < 256; ++i) { + texdata[i * 2 + 0] = (i + 0.48) / 255.0; + texdata[i * 2 + 1] = (i + 0.52) / 255.0; + } + glGenTextures(1, &src_texnum); + check_error(); + glBindTexture(GL_TEXTURE_1D, src_texnum); + check_error(); + glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + check_error(); + glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + check_error(); + glTexImage1D(GL_TEXTURE_1D, 0, GL_LUMINANCE32F_ARB, 512, 0, GL_LUMINANCE, GL_FLOAT, texdata); + check_error(); + glEnable(GL_TEXTURE_1D); + check_error(); + + // Basic state. + glDisable(GL_BLEND); + check_error(); + glDisable(GL_DEPTH_TEST); + check_error(); + glDepthMask(GL_FALSE); + check_error(); + + glViewport(0, 0, 512, 1); + + glMatrixMode(GL_PROJECTION); + glLoadIdentity(); + glOrtho(0.0, 1.0, 0.0, 1.0, 0.0, 1.0); + + glMatrixMode(GL_MODELVIEW); + glLoadIdentity(); + check_error(); + + // Draw the texture stretched over a long quad, interpolating it out. + glBegin(GL_QUADS); + + glTexCoord1f(0.0f); + glVertex2f(0.0f, 0.0f); + + glTexCoord1f(1.0f); + glVertex2f(1.0f, 0.0f); + + glTexCoord1f(1.0f); + glVertex2f(1.0f, 1.0f); + + glTexCoord1f(0.0f); + glVertex2f(0.0f, 1.0f); + + glEnd(); + check_error(); + + glDisable(GL_TEXTURE_1D); + check_error(); + + // Now read the data back and see what the card did. (Ignore the last value.) + // (We only look at the red channel; the others will surely be the same.) + unsigned char out_data[512]; + glReadPixels(0, 0, 512, 1, GL_RED, GL_UNSIGNED_BYTE, out_data); + check_error(); + + int wrongly_rounded = 0; + for (unsigned i = 0; i < 255; ++i) { + if (out_data[i * 2 + 0] != i) { + ++wrongly_rounded; + } + if (out_data[i * 2 + 1] != i + 1) { + ++wrongly_rounded; + } + } + + movit_num_wrongly_rounded = wrongly_rounded; + + // Clean up. + glBindTexture(GL_TEXTURE_1D, 0); + check_error(); + glBindFramebuffer(GL_FRAMEBUFFER, 0); + check_error(); + glDeleteFramebuffers(1, &fbo); + check_error(); + glDeleteTextures(1, &dst_texnum); + check_error(); + glDeleteTextures(1, &src_texnum); + check_error(); +} + void check_extensions() { // We fundamentally need FBOs and floating-point textures. @@ -174,6 +291,7 @@ void init_movit(const std::string& data_directory, MovitDebugLevel debug_level) glPixelStorei(GL_UNPACK_ALIGNMENT, 1); measure_texel_subpixel_precision(); + measure_roundoff_problems(); check_extensions(); movit_initialized = true; diff --git a/init.h b/init.h index 53a4018..9b4deff 100644 --- a/init.h +++ b/init.h @@ -44,6 +44,17 @@ extern MovitDebugLevel movit_debug_level; // We currently don't bother to test for more than 1024 levels. extern float movit_texel_subpixel_precision; +// Some GPUs use very inaccurate fixed-function circuits for rounding +// floating-point values to 8-bit outputs, leading to absurdities like +// the roundoff point between 128 and 129 being 128.62 instead of 128.6. +// We test, for every integer, x+0.48 and x+0.52 and check that they +// round the right way (giving some leeway, but not a lot); the number +// of errors are stored here. +// +// If this value is above 0, and you have enabled dithering, we will +// round off explicitly at the very end of the shader. +extern int movit_num_wrongly_rounded; + // Whether the GPU in use supports GL_EXT_texture_sRGB. extern bool movit_srgb_textures_supported;