From: Steinar H. Gunderson <sgunderson@bigfoot.com>
Date: Tue, 23 Feb 2016 21:48:26 +0000 (+0100)
Subject: Merge branch '1.3.x-release'
X-Git-Tag: 1.4.0~17
X-Git-Url: https://git.sesse.net/?p=movit;a=commitdiff_plain;h=9c6b86affb570a2e2d18c6da795c359da393f9a3;hp=244117563f4c74686ba8f47d1942850e601545f2

Merge branch '1.3.x-release'
---

diff --git a/README b/README
index 8e47db7..c7b129a 100644
--- a/README
+++ b/README
@@ -20,12 +20,8 @@ OK, you need
 * A C++98 compiler. GCC will do. (I haven't tried Windows, but it
   works fine on Linux and OS X, and Movit is not very POSIX-bound.)
 * GNU Make.
-* A GPU capable of running GLSL fragment shaders,
-  processing floating-point textures, and a few other things (all are
-  part of OpenGL 3.0 or newer, although most OpenGL 2.0 cards also
-  have what's needed through extensions). If your machine is less than five
-  years old _and you have the appropriate drivers_, you're home free.
-  GLES3 (for mobile devices) will also work.
+* A GPU capable of running OpenGL 3.0 or newer. GLES3 (for mobile devices)
+  will also work.
 * The [Eigen 3], [FFTW3] and [Google Test] libraries. (The library itself
   does not depend on the latter, but you probably want to run the unit tests.)
 * The [epoxy] library, for dealing with OpenGL extensions on various
diff --git a/effect_chain.cpp b/effect_chain.cpp
index 5e43474..fa7340d 100644
--- a/effect_chain.cpp
+++ b/effect_chain.cpp
@@ -32,12 +32,13 @@ using namespace std;
 
 namespace movit {
 
-EffectChain::EffectChain(float aspect_nom, float aspect_denom, ResourcePool *resource_pool)
+EffectChain::EffectChain(float aspect_nom, float aspect_denom, ResourcePool *resource_pool, GLenum intermediate_format)
 	: aspect_nom(aspect_nom),
 	  aspect_denom(aspect_denom),
 	  output_color_rgba(false),
 	  output_color_ycbcr(false),
 	  dither_effect(NULL),
+	  intermediate_format(intermediate_format),
 	  num_dither_bits(0),
 	  output_origin(OUTPUT_ORIGIN_BOTTOM_LEFT),
 	  finalized(false),
@@ -650,9 +651,8 @@ Phase *EffectChain::construct_phase(Node *output, map<Node *, Phase *> *complete
 	// Actually make the shader for this phase.
 	compile_glsl_program(phase);
 
-	// Initialize timer objects.
+	// Initialize timers.
 	if (movit_timer_queries_supported) {
-		glGenQueries(1, &phase->timer_query_object);
 		phase->time_elapsed_ns = 0;
 		phase->num_measured_iterations = 0;
 	}
@@ -1693,6 +1693,8 @@ void EffectChain::render_to_fbo(GLuint dest_fbo, unsigned width, unsigned height
 	check_error();
 	glDisable(GL_DITHER);
 	check_error();
+	glEnable(GL_FRAMEBUFFER_SRGB);
+	check_error();
 
 	// Save original viewport.
 	GLuint x = 0, y = 0;
@@ -1736,7 +1738,15 @@ void EffectChain::render_to_fbo(GLuint dest_fbo, unsigned width, unsigned height
 		Phase *phase = phases[phase_num];
 
 		if (do_phase_timing) {
-			glBeginQuery(GL_TIME_ELAPSED, phase->timer_query_object);
+			GLuint timer_query_object;
+			if (phase->timer_query_objects_free.empty()) {
+				glGenQueries(1, &timer_query_object);
+			} else {
+				timer_query_object = phase->timer_query_objects_free.front();
+				phase->timer_query_objects_free.pop_front();
+			}
+			glBeginQuery(GL_TIME_ELAPSED, timer_query_object);
+			phase->timer_query_objects_running.push_back(timer_query_object);
 		}
 		if (phase_num == phases.size() - 1) {
 			// Last phase goes to the output the user specified.
@@ -1778,14 +1788,22 @@ void EffectChain::render_to_fbo(GLuint dest_fbo, unsigned width, unsigned height
 		// Get back the timer queries.
 		for (unsigned phase_num = 0; phase_num < phases.size(); ++phase_num) {
 			Phase *phase = phases[phase_num];
-			GLint available = 0;
-			while (!available) {
-				glGetQueryObjectiv(phase->timer_query_object, GL_QUERY_RESULT_AVAILABLE, &available);
+			for (std::list<GLuint>::iterator timer_it = phase->timer_query_objects_running.begin();
+			     timer_it != phase->timer_query_objects_running.end(); ) {
+				GLint timer_query_object = *timer_it;
+				GLint available;
+				glGetQueryObjectiv(timer_query_object, GL_QUERY_RESULT_AVAILABLE, &available);
+				if (available) {
+					GLuint64 time_elapsed;
+					glGetQueryObjectui64v(timer_query_object, GL_QUERY_RESULT, &time_elapsed);
+					phase->time_elapsed_ns += time_elapsed;
+					++phase->num_measured_iterations;
+					phase->timer_query_objects_free.push_back(timer_query_object);
+					phase->timer_query_objects_running.erase(timer_it++);
+				} else {
+					++timer_it;
+				}
 			}
-			GLuint64 time_elapsed;
-			glGetQueryObjectui64v(phase->timer_query_object, GL_QUERY_RESULT, &time_elapsed);
-			phase->time_elapsed_ns += time_elapsed;
-			++phase->num_measured_iterations;
 		}
 	}
 }
@@ -1838,7 +1856,7 @@ void EffectChain::execute_phase(Phase *phase, bool last_phase,
 	if (!last_phase) {
 		find_output_size(phase);
 
-		GLuint tex_num = resource_pool->create_2d_texture(GL_RGBA16F, phase->output_width, phase->output_height);
+		GLuint tex_num = resource_pool->create_2d_texture(intermediate_format, phase->output_width, phase->output_height);
 		output_textures->insert(make_pair(phase, tex_num));
 	}
 
diff --git a/effect_chain.h b/effect_chain.h
index 70dc256..718d5ff 100644
--- a/effect_chain.h
+++ b/effect_chain.h
@@ -23,6 +23,7 @@
 
 #include <epoxy/gl.h>
 #include <stdio.h>
+#include <list>
 #include <map>
 #include <set>
 #include <string>
@@ -179,7 +180,8 @@ struct Phase {
 	std::vector<Uniform<Eigen::Matrix3d> > uniforms_mat3;
 
 	// For measurement of GPU time used.
-	GLuint timer_query_object;
+	std::list<GLuint> timer_query_objects_running;
+	std::list<GLuint> timer_query_objects_free;
 	uint64_t time_elapsed_ns;
 	uint64_t num_measured_iterations;
 };
@@ -192,7 +194,7 @@ public:
 	// will create its own that is not shared with anything else. Does not take
 	// ownership of the passed-in ResourcePool, but will naturally take ownership
 	// of its own internal one if created.
-	EffectChain(float aspect_nom, float aspect_denom, ResourcePool *resource_pool = NULL);
+	EffectChain(float aspect_nom, float aspect_denom, ResourcePool *resource_pool = NULL, GLenum intermediate_format = GL_RGBA16F);
 	~EffectChain();
 
 	// User API:
@@ -436,6 +438,7 @@ private:
 	std::vector<Input *> inputs;  // Also contained in nodes.
 	std::vector<Phase *> phases;
 
+	GLenum intermediate_format;
 	unsigned num_dither_bits;
 	OutputOrigin output_origin;
 	bool finalized;
diff --git a/effect_chain_test.cpp b/effect_chain_test.cpp
index 6ab874d..adf832a 100644
--- a/effect_chain_test.cpp
+++ b/effect_chain_test.cpp
@@ -1303,5 +1303,20 @@ TEST(EffectChainTest, StringStreamLocalesWork) {
 	free(saved_locale);
 }
 
+TEST(EffectChainTest, sRGBIntermediate) {
+	float data[] = {
+		0.0f, 0.5f, 0.0f, 1.0f,
+	};
+	float out_data[4];
+	EffectChainTester tester(data, 1, 1, FORMAT_RGBA_PREMULTIPLIED_ALPHA, COLORSPACE_sRGB, GAMMA_LINEAR, GL_RGBA16F_ARB, GL_SRGB8);
+	tester.get_chain()->add_effect(new IdentityEffect());
+	tester.get_chain()->add_effect(new BouncingIdentityEffect());
+	tester.run(out_data, GL_RGBA, COLORSPACE_sRGB, GAMMA_LINEAR);
+
+	EXPECT_GE(fabs(out_data[1] - data[1]), 1e-3)
+	    << "Expected sRGB not to be able to represent 0.5 exactly (got " << out_data[1] << ")";
+	EXPECT_LT(fabs(out_data[1] - data[1]), 0.1f)
+	    << "Expected sRGB to be able to represent 0.5 approximately (got " << out_data[1] << ")";
+}
 
 }  // namespace movit
diff --git a/flat_input.h b/flat_input.h
index d8b62b7..b337788 100644
--- a/flat_input.h
+++ b/flat_input.h
@@ -9,7 +9,6 @@
 #include "effect_chain.h"
 #include "fp16.h"
 #include "image_format.h"
-#include "init.h"
 #include "input.h"
 
 namespace movit {
@@ -30,8 +29,7 @@ public:
 		// support for single-channel sRGB decoding, but it's not supported
 		// on GLES, and we're already actively rewriting single-channel inputs
 		// to GL_RED (even on desktop), so we stick to 3- and 4-channel inputs.
-		return (movit_srgb_textures_supported &&
-		        type == GL_UNSIGNED_BYTE &&
+		return (type == GL_UNSIGNED_BYTE &&
 			(pixel_format == FORMAT_RGB ||
 			 pixel_format == FORMAT_RGBA_POSTMULTIPLIED_ALPHA) &&
 		        (image_format.gamma_curve == GAMMA_LINEAR ||
diff --git a/init.cpp b/init.cpp
index 2bade83..d312cd8 100644
--- a/init.cpp
+++ b/init.cpp
@@ -15,7 +15,6 @@ namespace movit {
 bool movit_initialized = false;
 MovitDebugLevel movit_debug_level = MOVIT_DEBUG_ON;
 float movit_texel_subpixel_precision;
-bool movit_srgb_textures_supported;
 bool movit_timer_queries_supported;
 int movit_num_wrongly_rounded;
 MovitShaderModel movit_shader_model;
@@ -282,33 +281,6 @@ void measure_roundoff_problems()
 	check_error();
 }
 
-struct RequiredExtension {
-	int min_equivalent_gl_version;
-	const char extension_name[64];
-};
-const RequiredExtension required_extensions[] = {
-	// We fundamentally need FBOs and floating-point textures.
-	// FBOs are covered by OpenGL 1.5, and are not an extension there.
-	// Floating-point textures are part of OpenGL 3.0 and newer.
-	{ 15, "GL_ARB_framebuffer_object" },
-	{ 30, "GL_ARB_texture_float" },
-
-	// We assume that we can use non-power-of-two textures without restrictions.
-	{ 20, "GL_ARB_texture_non_power_of_two" },
-
-	// We also need GLSL fragment shaders.
-	{ 20, "GL_ARB_fragment_shader" },
-	{ 20, "GL_ARB_shading_language_100" },
-
-	// FlatInput and YCbCrInput uses PBOs. (They could in theory do without,
-	// but no modern card would really not provide it.)
-	{ 21, "GL_ARB_pixel_buffer_object" },
-
-	// ResampleEffect uses RG textures to encode a two-component LUT.
-	// We also need GL_R several places, for single-channel input.
-	{ 30, "GL_ARB_texture_rg" },
-};
-
 bool check_extensions()
 {
 	// GLES generally doesn't use extensions as actively as desktop OpenGL.
@@ -316,7 +288,6 @@ bool check_extensions()
 	// we need.
 	if (!epoxy_is_desktop_gl()) {
 		if (epoxy_gl_version() >= 30) {
-			movit_srgb_textures_supported = true;
 			return true;
 		} else {
 			fprintf(stderr, "Movit system requirements: GLES version %.1f is too old (GLES 3.0 needed).\n",
@@ -326,31 +297,13 @@ bool check_extensions()
 		}
 	}
 
-	// Check all extensions, and output errors for the ones that we are missing.
-	bool all_ok = true;
-	int gl_version = epoxy_gl_version();
-
-	for (unsigned i = 0; i < sizeof(required_extensions) / sizeof(required_extensions[0]); ++i) {
-		if (gl_version < required_extensions[i].min_equivalent_gl_version &&
-		    !epoxy_has_gl_extension(required_extensions[i].extension_name)) {
-			fprintf(stderr, "Movit system requirements: Needs extension '%s' or at least OpenGL version %.1f (has version %.1f)\n",
-				required_extensions[i].extension_name,
-				0.1f * required_extensions[i].min_equivalent_gl_version,
-				0.1f * gl_version);
-			all_ok = false;
-		}
-	}
-
-	if (!all_ok) {
+	if (epoxy_gl_version() < 30) {
+		fprintf(stderr, "Movit system requirements: OpenGL version %.1f is too old (OpenGL 3.0 needed).\n",
+			0.1f * epoxy_gl_version());
 		fprintf(stderr, "Movit initialization failed.\n");
 		return false;
 	}
 
-	// sRGB texture decode would be nice, but are not mandatory
-	// (GammaExpansionEffect can do the same thing if needed).
-	movit_srgb_textures_supported =
-		(epoxy_gl_version() >= 21 || epoxy_has_gl_extension("GL_EXT_texture_sRGB"));
-
 	// The user can specify that they want a timing report for each
 	// phase in an effect chain. However, that depends on this extension;
 	// without it, we do cannot even create the query objects.
diff --git a/init.h b/init.h
index 7e1e130..a644435 100644
--- a/init.h
+++ b/init.h
@@ -50,22 +50,19 @@ extern float movit_texel_subpixel_precision;
 
 // Some GPUs use very inaccurate fixed-function circuits for rounding
 // floating-point values to 8-bit outputs, leading to absurdities like
-// the roundoff point between 128 and 129 being 128.62 instead of 128.6.
+// the roundoff point between 128 and 129 being 128.62 instead of 128.5.
 // We test, for every integer, x+0.48 and x+0.52 and check that they
 // round the right way (giving some leeway, but not a lot); the number
 // of errors are stored here.
 //
-// If this value is above 0, the extension GL_EXT_gpu_shader4 is available
-// (giving round()) and you have enabled dithering, we will round off
-// explicitly at the very end of the shader.
+// If this value is above 0, we will round off explicitly at the very end
+// of the shader. Note the following limitations:
 //
-// Note: I don't know of any cards that round off wrong (well, outside
-// our tolerance) and do not have this extension.
+//   - The measurement is done on linear 8-bit, not any sRGB format,
+//     10-bit output, or the likes.
+//   - This only covers the final pass; intermediates are not covered
+//     (only relevant if you use e.g. GL_SRGB8 intermediates).
 extern int movit_num_wrongly_rounded;
-extern bool movit_shader_rounding_supported;
-
-// Whether the GPU in use supports GL_EXT_texture_sRGB.
-extern bool movit_srgb_textures_supported;
 
 // Whether the OpenGL driver (or GPU) in use supports GL_ARB_timer_query.
 extern bool movit_timer_queries_supported;
diff --git a/test_util.cpp b/test_util.cpp
index ada84dc..b377309 100644
--- a/test_util.cpp
+++ b/test_util.cpp
@@ -46,8 +46,14 @@ void vertical_flip(T *data, unsigned width, unsigned height)
 
 EffectChainTester::EffectChainTester(const float *data, unsigned width, unsigned height,
                                      MovitPixelFormat pixel_format, Colorspace color_space, GammaCurve gamma_curve,
-                                     GLenum framebuffer_format)
-	: chain(width, height, get_static_pool()), width(width), height(height), framebuffer_format(framebuffer_format), output_added(false), finalized(false)
+                                     GLenum framebuffer_format,
+                                     GLenum intermediate_format)
+	: chain(width, height, get_static_pool(), intermediate_format),
+	  width(width),
+	  height(height),
+	  framebuffer_format(framebuffer_format),
+	  output_added(false),
+	  finalized(false)
 {
 	CHECK(init_movit(".", MOVIT_DEBUG_OFF));
 
diff --git a/test_util.h b/test_util.h
index 1f9e47f..69efe35 100644
--- a/test_util.h
+++ b/test_util.h
@@ -15,7 +15,8 @@ public:
 	                  MovitPixelFormat pixel_format = FORMAT_GRAYSCALE,
 	                  Colorspace color_space = COLORSPACE_sRGB,
 	                  GammaCurve gamma_curve = GAMMA_LINEAR,
-	                  GLenum framebuffer_format = GL_RGBA16F_ARB);
+	                  GLenum framebuffer_format = GL_RGBA16F_ARB,
+	                  GLenum intermediate_format = GL_RGBA16F_ARB);
 	~EffectChainTester();
 	
 	EffectChain *get_chain() { return &chain; }
diff --git a/version.h b/version.h
index 556c4af..3a6600d 100644
--- a/version.h
+++ b/version.h
@@ -5,6 +5,6 @@
 // changes, even within git versions. There is no specific version
 // documentation outside the regular changelogs, though.
 
-#define MOVIT_VERSION 18
+#define MOVIT_VERSION 19
 
 #endif // !defined(_MOVIT_VERSION_H)