Compute version of ResampleEffect.

[movit] / resample_effect_test.cpp
diff --git a/resample_effect_test.cpp b/resample_effect_test.cpp

index b47bd577d3740452a28715131834d26570b052de..95fa108095ed8fd51bcecb0e4d467288d20083fa 100644 (file)
--- a/resample_effect_test.cpp
+++ b/resample_effect_test.cpp
@@ -8,6 +8,7 @@
  
  #include "effect_chain.h"
  #include "flat_input.h"
+#include "fp16.h"
  #include "image_format.h"
  #include "init.h"
  #include "resample_effect.h"
@@ -35,7 +36,16 @@ float lanczos(float x, float a)
  
  }  // namespace
  
-TEST(ResampleEffectTest, IdentityTransformDoesNothing) {
+class ResampleEffectTest : public testing::TestWithParam<string> {
+protected:
+       ResampleEffectTest() : disabler(GetParam() == "fragment") {}
+       bool should_skip() { return disabler.should_skip(); }
+
+private:
+       DisableComputeShadersTemporarily disabler;
+};
+
+TEST_P(ResampleEffectTest, IdentityTransformDoesNothing) {
         const int size = 4;
  
         float data[size * size] = {
@@ -55,7 +65,7 @@ TEST(ResampleEffectTest, IdentityTransformDoesNothing) {
         expect_equal(data, out_data, size, size);
  }
  
-TEST(ResampleEffectTest, UpscaleByTwoGetsCorrectPixelCenters) {
+TEST_P(ResampleEffectTest, UpscaleByTwoGetsCorrectPixelCenters) {
         const int size = 5;
  
         float data[size * size] = {
@@ -93,7 +103,7 @@ TEST(ResampleEffectTest, UpscaleByTwoGetsCorrectPixelCenters) {
         expect_equal(expected_data, out_data, size * 2, size * 2);
  }
  
-TEST(ResampleEffectTest, DownscaleByTwoGetsCorrectPixelCenters) {
+TEST_P(ResampleEffectTest, DownscaleByTwoGetsCorrectPixelCenters) {
         const int size = 5;
  
         // This isn't a perfect dot, since the Lanczos filter has a slight
@@ -135,7 +145,7 @@ TEST(ResampleEffectTest, DownscaleByTwoGetsCorrectPixelCenters) {
         expect_equal(expected_data, out_data, size, size);
  }
  
-TEST(ResampleEffectTest, UpscaleByThreeGetsCorrectPixelCenters) {
+TEST_P(ResampleEffectTest, UpscaleByThreeGetsCorrectPixelCenters) {
         const int size = 5;
  
         float data[size * size] = {
@@ -163,8 +173,13 @@ TEST(ResampleEffectTest, UpscaleByThreeGetsCorrectPixelCenters) {
         tester.run(out_data, GL_RED, COLORSPACE_sRGB, GAMMA_LINEAR);
  
         // We only bother checking that the middle pixel is still correct,
-       // and that symmetry holds.
-       EXPECT_FLOAT_EQ(1.0, out_data[7 * (size * 3) + 7]);
+       // and that symmetry holds. Note that the middle weight in practice
+       // becomes something like 0.99999 due to the normalization
+       // (some supposedly zero weights become 1e-6 or so), and then after
+       // squaring, the error compounds. Ironically, less texture precision
+       // here will give a more accurate result, since the weight can get
+       // rounded towards 1.0.
+       EXPECT_NEAR(1.0, out_data[7 * (size * 3) + 7], 1e-3);
         for (unsigned y = 0; y < size * 3; ++y) {
                 for (unsigned x = 0; x < size * 3; ++x) {
                         EXPECT_NEAR(out_data[y * (size * 3) + x], out_data[(size * 3 - y - 1) * (size * 3) + x], 1e-6);
@@ -173,7 +188,7 @@ TEST(ResampleEffectTest, UpscaleByThreeGetsCorrectPixelCenters) {
         }
  }
  
-TEST(ResampleEffectTest, HeavyResampleGetsSumRight) {
+TEST_P(ResampleEffectTest, HeavyResampleGetsSumRight) {
         // Do only one resample pass, more specifically the last one, which goes to
         // our fp32 output. This allows us to analyze the precision without intermediate
         // fp16 rounding.
@@ -213,7 +228,7 @@ TEST(ResampleEffectTest, HeavyResampleGetsSumRight) {
         expect_equal(expected_data, out_data, dwidth, dheight, 0.12 / 1023.0);
  }
  
-TEST(ResampleEffectTest, ReadWholePixelFromLeft) {
+TEST_P(ResampleEffectTest, ReadWholePixelFromLeft) {
         const int size = 5;
  
         float data[size * size] = {
@@ -242,7 +257,7 @@ TEST(ResampleEffectTest, ReadWholePixelFromLeft) {
         expect_equal(expected_data, out_data, size, size);
  }
  
-TEST(ResampleEffectTest, ReadQuarterPixelFromLeft) {
+TEST_P(ResampleEffectTest, ReadQuarterPixelFromLeft) {
         const int size = 5;
  
         float data[size * size] = {
@@ -277,7 +292,7 @@ TEST(ResampleEffectTest, ReadQuarterPixelFromLeft) {
         expect_equal(expected_data, out_data, size, size);
  }
  
-TEST(ResampleEffectTest, ReadQuarterPixelFromTop) {
+TEST_P(ResampleEffectTest, ReadQuarterPixelFromTop) {
         const int width = 3;
         const int height = 5;
  
@@ -309,7 +324,7 @@ TEST(ResampleEffectTest, ReadQuarterPixelFromTop) {
         expect_equal(expected_data, out_data, width, height);
  }
  
-TEST(ResampleEffectTest, ReadHalfPixelFromLeftAndScale) {
+TEST_P(ResampleEffectTest, ReadHalfPixelFromLeftAndScale) {
         const int src_width = 4;
         const int dst_width = 8;
  
@@ -346,7 +361,7 @@ TEST(ResampleEffectTest, ReadHalfPixelFromLeftAndScale) {
         expect_equal(expected_data, out_data, dst_width, 1, 1.5f / 255.0f, 0.4f / 255.0f);
  }
  
-TEST(ResampleEffectTest, Zoom) {
+TEST_P(ResampleEffectTest, Zoom) {
         const int width = 5;
         const int height = 3;
  
@@ -372,7 +387,7 @@ TEST(ResampleEffectTest, Zoom) {
         expect_equal(expected_data, out_data, width, height);
  }
  
-TEST(ResampleEffectTest, VerticalZoomFromTop) {
+TEST_P(ResampleEffectTest, VerticalZoomFromTop) {
         const int width = 5;
         const int height = 5;
  
@@ -406,7 +421,7 @@ TEST(ResampleEffectTest, VerticalZoomFromTop) {
         expect_equal(expected_data, out_data, width, height);
  }
  
-TEST(ResampleEffectTest, Precision) {
+TEST_P(ResampleEffectTest, Precision) {
         const int size = 1920;  // Difficult non-power-of-two size.
         const int offset = 5;
  
@@ -430,7 +445,13 @@ TEST(ResampleEffectTest, Precision) {
         expect_equal(expected_data, out_data, size, 1);
  }
  
+INSTANTIATE_TEST_CASE_P(ResampleEffectTest,
+                        ResampleEffectTest,
+                        testing::Values("fragment", "compute"));
+
  #ifdef HAVE_BENCHMARK
+template<> inline uint8_t from_fp32<uint8_t>(float x) { return lrintf(x * 255.0f); }
+
  template<class T>
  void BM_ResampleEffect(benchmark::State &state, GammaCurve gamma_curve, GLenum output_format, const std::string &shader_type)
  {
@@ -444,7 +465,7 @@ void BM_ResampleEffect(benchmark::State &state, GammaCurve gamma_curve, GLenum o
         unique_ptr<T[]> out_data(new T[out_width * out_height * 4]);
  
         for (unsigned i = 0; i < in_width * in_height * 4; ++i) {
-               data[i] = rand();
+               data[i] = from_fp32<T>(rand() / (RAND_MAX + 1.0));
         }
  
         EffectChainTester tester(nullptr, out_width, out_height, FORMAT_BGRA_POSTMULTIPLIED_ALPHA, COLORSPACE_sRGB, gamma_curve, output_format);
@@ -457,9 +478,9 @@ void BM_ResampleEffect(benchmark::State &state, GammaCurve gamma_curve, GLenum o
         tester.benchmark(state, out_data.get(), GL_BGRA, COLORSPACE_sRGB, gamma_curve, OUTPUT_ALPHA_FORMAT_PREMULTIPLIED);
  }
  
-void BM_ResampleEffectFloat(benchmark::State &state, GammaCurve gamma_curve, const std::string &shader_type)
+void BM_ResampleEffectHalf(benchmark::State &state, GammaCurve gamma_curve, const std::string &shader_type)
  {
-       BM_ResampleEffect<float>(state, gamma_curve, GL_RGBA16F, shader_type);
+       BM_ResampleEffect<fp16_int_t>(state, gamma_curve, GL_RGBA16F, shader_type);
  }
  
  void BM_ResampleEffectInt8(benchmark::State &state, GammaCurve gamma_curve, const std::string &shader_type)
@@ -468,11 +489,33 @@ void BM_ResampleEffectInt8(benchmark::State &state, GammaCurve gamma_curve, cons
  }
  
  BENCHMARK_CAPTURE(BM_ResampleEffectInt8, Int8Upscale, GAMMA_REC_709, "fragment")->Args({640, 360, 1280, 720})->Args({320, 180, 1280, 720})->Args({321, 181, 1280, 720})->UseRealTime()->Unit(benchmark::kMicrosecond);
-BENCHMARK_CAPTURE(BM_ResampleEffectFloat, Float32Upscale, GAMMA_LINEAR, "fragment")->Args({640, 360, 1280, 720})->Args({320, 180, 1280, 720})->Args({321, 181, 1280, 720})->UseRealTime()->Unit(benchmark::kMicrosecond);
+BENCHMARK_CAPTURE(BM_ResampleEffectHalf, Float16Upscale, GAMMA_LINEAR, "fragment")->Args({640, 360, 1280, 720})->Args({320, 180, 1280, 720})->Args({321, 181, 1280, 720})->UseRealTime()->Unit(benchmark::kMicrosecond);
  BENCHMARK_CAPTURE(BM_ResampleEffectInt8, Int8Downscale, GAMMA_REC_709, "fragment")->Args({1280, 720, 640, 360})->Args({1280, 720, 320, 180})->Args({1280, 720, 321, 181})->UseRealTime()->Unit(benchmark::kMicrosecond);
-BENCHMARK_CAPTURE(BM_ResampleEffectFloat, Float32Downscale, GAMMA_LINEAR, "fragment")->Args({1280, 720, 640, 360})->Args({1280, 720, 320, 180})->Args({1280, 720, 321, 181})->UseRealTime()->Unit(benchmark::kMicrosecond);
+BENCHMARK_CAPTURE(BM_ResampleEffectHalf, Float16Downscale, GAMMA_LINEAR, "fragment")->Args({1280, 720, 640, 360})->Args({1280, 720, 320, 180})->Args({1280, 720, 321, 181})->UseRealTime()->Unit(benchmark::kMicrosecond);
+BENCHMARK_CAPTURE(BM_ResampleEffectInt8, Int8UpscaleCompute, GAMMA_REC_709, "compute")->Args({640, 360, 1280, 720})->Args({320, 180, 1280, 720})->Args({321, 181, 1280, 720})->UseRealTime()->Unit(benchmark::kMicrosecond);
+BENCHMARK_CAPTURE(BM_ResampleEffectHalf, Float16UpscaleCompute, GAMMA_LINEAR, "compute")->Args({640, 360, 1280, 720})->Args({320, 180, 1280, 720})->Args({321, 181, 1280, 720})->UseRealTime()->Unit(benchmark::kMicrosecond);
+BENCHMARK_CAPTURE(BM_ResampleEffectInt8, Int8DownscaleCompute, GAMMA_REC_709, "compute")->Args({1280, 720, 640, 360})->Args({1280, 720, 320, 180})->Args({1280, 720, 321, 181})->UseRealTime()->Unit(benchmark::kMicrosecond);
+BENCHMARK_CAPTURE(BM_ResampleEffectHalf, Float16DownscaleCompute, GAMMA_LINEAR, "compute")->Args({1280, 720, 640, 360})->Args({1280, 720, 320, 180})->Args({1280, 720, 321, 181})->UseRealTime()->Unit(benchmark::kMicrosecond);
+
+void BM_ComputeBilinearScalingWeights(benchmark::State &state)
+{
+       constexpr unsigned src_size = 1280;
+       constexpr unsigned dst_size = 35;
+       int old_precision = movit_texel_subpixel_precision;
+       movit_texel_subpixel_precision = 64;  // To get consistent results across GPUs; this is a CPU test.
+
+       // One iteration warmup to make sure the Lanczos table is computed.
+       calculate_bilinear_scaling_weights(src_size, dst_size, 0.999f, 0.0f, BilinearFormatConstraints::ALLOW_FP16_AND_FP32);
+
+       for (auto _ : state) {
+               ScalingWeights weights = calculate_bilinear_scaling_weights(src_size, dst_size, 0.999f, 0.0f, BilinearFormatConstraints::ALLOW_FP16_AND_FP32);
+       }
+
+       movit_texel_subpixel_precision = old_precision;
+}
+BENCHMARK(BM_ComputeBilinearScalingWeights)->Unit(benchmark::kMicrosecond);
  
-void BM_ComputeScalingWeights(benchmark::State &state)
+void BM_ComputeBilinearScalingWeightsNoFP16(benchmark::State &state)
  {
         constexpr unsigned src_size = 1280;
         constexpr unsigned dst_size = 35;
@@ -480,15 +523,29 @@ void BM_ComputeScalingWeights(benchmark::State &state)
         movit_texel_subpixel_precision = 64;  // To get consistent results across GPUs; this is a CPU test.
  
         // One iteration warmup to make sure the Lanczos table is computed.
-       calculate_scaling_weights(src_size, dst_size, 0.999f, 0.0f);
+       calculate_bilinear_scaling_weights(src_size, dst_size, 0.999f, 0.0f, BilinearFormatConstraints::ALLOW_FP32_ONLY);
  
         for (auto _ : state) {
-               ScalingWeights weights = calculate_scaling_weights(src_size, dst_size, 0.999f, 0.0f);
+               ScalingWeights weights = calculate_bilinear_scaling_weights(src_size, dst_size, 0.999f, 0.0f, BilinearFormatConstraints::ALLOW_FP32_ONLY);
         }
  
         movit_texel_subpixel_precision = old_precision;
  }
-BENCHMARK(BM_ComputeScalingWeights)->Unit(benchmark::kMicrosecond);
+BENCHMARK(BM_ComputeBilinearScalingWeightsNoFP16)->Unit(benchmark::kMicrosecond);
+
+void BM_ComputeRawScalingWeights(benchmark::State &state)
+{
+       constexpr unsigned src_size = 1280;
+       constexpr unsigned dst_size = 35;
+
+       // One iteration warmup to make sure the Lanczos table is computed.
+       calculate_raw_scaling_weights(src_size, dst_size, 0.999f, 0.0f);
+
+       for (auto _ : state) {
+               ScalingWeights weights = calculate_raw_scaling_weights(src_size, dst_size, 0.999f, 0.0f);
+       }
+}
+BENCHMARK(BM_ComputeRawScalingWeights)->Unit(benchmark::kMicrosecond);
  
  #endif