Make all fp16 routines work with fp32 as input instead of fp64, since that is what...

[movit] / util.cpp
diff --git a/util.cpp b/util.cpp

index d50039993bafc08853fe1278803f053212521f71..29428aad3e09ed0a1a66a660125433a7db6a3313 100644 (file)
--- a/util.cpp
+++ b/util.cpp
@@ -233,8 +233,8 @@ void combine_two_samples(float w1, float w2, float pos1, float pos2, float num_s
         }
  
         // Round to the desired precision. Note that this might take z outside the 0..1 range.
-       *offset = from_fp64<DestFloat>(pos1 + z * (pos2 - pos1));
-       z = (to_fp64(*offset) - pos1) / (pos2 - pos1);
+       *offset = from_fp32<DestFloat>(pos1 + z * (pos2 - pos1));
+       z = (to_fp32(*offset) - pos1) / (pos2 - pos1);
  
         // Round to the minimum number of bits we have measured earlier.
         // The card will do this for us anyway, but if we know what the real z
@@ -254,11 +254,11 @@ void combine_two_samples(float w1, float w2, float pos1, float pos2, float num_s
         //   w = (a(1-z) + bz) / ((1-z)² + z²)
         //
         // If z had infinite precision, this would simply reduce to w = w1 + w2.
-       *total_weight = from_fp64<DestFloat>((w1 + z * (w2 - w1)) / (z * z + (1 - z) * (1 - z)));
+       *total_weight = from_fp32<DestFloat>((w1 + z * (w2 - w1)) / (z * z + (1 - z) * (1 - z)));
  
         if (sum_sq_error != NULL) {
-               float err1 = to_fp64(*total_weight) * (1 - z) - w1;
-               float err2 = to_fp64(*total_weight) * z - w2;
+               float err1 = to_fp32(*total_weight) * (1 - z) - w1;
+               float err2 = to_fp32(*total_weight) * z - w2;
                 *sum_sq_error = err1 * err1 + err2 * err2;
         }
  }