Use ryg's much faster fp16 conversion code.

[movit] / util.h
diff --git a/util.h b/util.h

index a89d3a2adf4be608e296b2357e0b79f6104b3151..8cabaf7c3eafe8be36928521bb324545521325ba 100644 (file)
--- a/util.h
+++ b/util.h
@@ -38,6 +38,11 @@ void print_3x3_matrix(const Eigen::Matrix3d &m);
  // Output a GLSL 3x3 matrix declaration.
  std::string output_glsl_mat3(const std::string &name, const Eigen::Matrix3d &m);
  
+// Output GLSL scalar, 2-length and 3-length vector declarations.
+std::string output_glsl_float(const std::string &name, float x);
+std::string output_glsl_vec2(const std::string &name, float x, float y);
+std::string output_glsl_vec3(const std::string &name, float x, float y, float z);
+
  // Calculate a / b, rounding up. Does not handle overflow correctly.
  unsigned div_round_up(unsigned a, unsigned b);
  
@@ -49,8 +54,11 @@ enum CombineRoundingBehavior {
  // Calculate where to sample, and with what weight, if one wants to use
  // the GPU's bilinear hardware to sample w1 * x[pos1] + w2 * x[pos2],
  // where pos1 and pos2 must be normalized coordinates describing neighboring
-// pixels in the mipmap level at which you sample, and the total number of
-// pixels (in given mipmap level) is <size>.
+// texels in the mipmap level at which you sample. <num_subtexels> is the
+// number of distinct accessible subtexels in the given mipmap level,
+// calculated by num_texels / movit_texel_subpixel_precision. It is a float
+// for performance reasons, even though it is expected to be a whole number.
+// <inv_num_subtexels> is simply its inverse (1/x).
  //
  // Note that since the GPU might have limited precision in its linear
  // interpolation, the effective weights might be different from the ones you
@@ -62,8 +70,9 @@ enum CombineRoundingBehavior {
  // is COMBINE_ROUND_TO_FP16, the coordinate is assumed to be stored as a
  // rounded fp16 value. This enables more precise calculation of total_weight
  // and sum_sq_error.
-void combine_two_samples(float w1, float w2, float pos1, float pos2, unsigned size, CombineRoundingBehavior rounding_behavior,
-                         float *offset, float *total_weight, float *sum_sq_error);
+template<class DestFloat>
+void combine_two_samples(float w1, float w2, float pos1, float pos2, float num_subtexels, float inv_num_subtexels,
+                         DestFloat *offset, DestFloat *total_weight, float *sum_sq_error);
  
  // Create a VBO with the given data, and bind it to the vertex attribute
  // with name <attribute_name>. Returns the VBO number.