+ ScalingWeights weights = calculate_bilinear_scaling_weights(src_size, dst_size, zoom, offset, BilinearFormatConstraints::ALLOW_FP16_AND_FP32);
+ src_bilinear_samples = weights.src_bilinear_samples;
+ num_loops = weights.num_loops;
+ slice_height = 1.0f / weights.num_loops;
+
+ // Encode as a two-component texture. Note the GL_REPEAT.
+ glActiveTexture(GL_TEXTURE0 + *sampler_num);
+ check_error();
+ glBindTexture(GL_TEXTURE_2D, tex.get_texnum());
+ check_error();
+
+ GLenum type, internal_format;
+ void *pixels;
+ assert((weights.bilinear_weights_fp16 == nullptr) != (weights.bilinear_weights_fp32 == nullptr));
+ if (weights.bilinear_weights_fp32 != nullptr) {
+ type = GL_FLOAT;
+ internal_format = GL_RG32F;
+ pixels = weights.bilinear_weights_fp32.get();
+ } else {
+ type = GL_HALF_FLOAT;
+ internal_format = GL_RG16F;
+ pixels = weights.bilinear_weights_fp16.get();
+ }
+
+ tex.update(weights.src_bilinear_samples, weights.dst_samples, internal_format, GL_RG, type, pixels);
+}
+
+ResampleComputeEffect::ResampleComputeEffect(ResampleEffect *parent)
+ : parent(parent),
+ input_width(1280),
+ input_height(720),
+ offset_x(0.0),
+ offset_y(0.0),
+ zoom_x(1.0),
+ zoom_y(1.0),
+ last_input_width(-1),
+ last_input_height(-1),
+ last_output_width(-1),
+ last_output_height(-1),
+ last_offset_x(0.0 / 0.0), // NaN.
+ last_offset_y(0.0 / 0.0), // NaN.
+ last_zoom_x(0.0 / 0.0), // NaN.
+ last_zoom_y(0.0 / 0.0) // NaN.
+{
+ register_int("input_width", &input_width);
+ register_int("input_height", &input_height);
+ register_int("output_width", &output_width);
+ register_int("output_height", &output_height);
+ register_float("offset_x", &offset_x);
+ register_float("offset_y", &offset_y);
+ register_float("zoom_x", &zoom_x);
+ register_float("zoom_y", &zoom_y);
+ register_uniform_sampler2d("sample_tex_horizontal", &uniform_sample_tex_horizontal);
+ register_uniform_sampler2d("sample_tex_vertical", &uniform_sample_tex_vertical);
+ register_uniform_int("num_horizontal_samples", &uniform_num_horizontal_samples);
+ register_uniform_int("num_vertical_samples", &uniform_num_vertical_samples);
+ register_uniform_int("vertical_int_radius", &uniform_vertical_int_radius);
+ register_uniform_float("inv_vertical_scaling_factor", &uniform_inv_vertical_scaling_factor);
+ register_uniform_int("output_samples_per_block", &uniform_output_samples_per_block);
+ register_uniform_int("num_horizontal_filters", &uniform_num_horizontal_filters);
+ register_uniform_int("num_vertical_filters", &uniform_num_vertical_filters);
+ register_uniform_float("slice_height", &uniform_slice_height);
+ register_uniform_float("horizontal_whole_pixel_offset", &uniform_horizontal_whole_pixel_offset);
+ register_uniform_int("vertical_whole_pixel_offset", &uniform_vertical_whole_pixel_offset);
+ register_uniform_float("inv_input_height", &uniform_inv_input_height);
+ register_uniform_float("input_texcoord_y_adjust", &uniform_input_texcoord_y_adjust);
+
+ call_once(lanczos_table_init_done, init_lanczos_table);
+}
+
+ResampleComputeEffect::~ResampleComputeEffect()
+{
+}
+
+string ResampleComputeEffect::output_fragment_shader()
+{
+ char buf[256] = "";
+ return buf + read_file("resample_effect.comp");
+}
+
+// The compute shader does horizontal scaling first, using exactly the same
+// two-component texture format as in the two-pass version (see the comments
+// on ResampleComputeEffect). The vertical scaling calculates the offset values
+// in the shader, so we only store a one-component texture with the weights
+// for each filter.
+void ResampleComputeEffect::update_texture(GLuint glsl_program_num, const string &prefix, unsigned *sampler_num)
+{
+ ScalingWeights horiz_weights = calculate_bilinear_scaling_weights(input_width, output_width, zoom_x, offset_x, BilinearFormatConstraints::ALLOW_FP32_ONLY);
+ ScalingWeights vert_weights = calculate_raw_scaling_weights(input_height, output_height, zoom_y, offset_y);
+ uniform_vertical_int_radius = vert_weights.int_radius;
+ vertical_scaling_factor = vert_weights.scaling_factor;
+ uniform_inv_vertical_scaling_factor = 1.0f / vert_weights.scaling_factor;
+ src_horizontal_bilinear_samples = horiz_weights.src_bilinear_samples;
+ src_vertical_samples = vert_weights.src_bilinear_samples;
+ uniform_num_horizontal_filters = horiz_weights.dst_samples;
+ uniform_num_vertical_filters = vert_weights.dst_samples;
+ slice_height = 1.0f / horiz_weights.num_loops;
+
+ // Encode as a two-component texture. Note the GL_REPEAT.
+ glActiveTexture(GL_TEXTURE0 + *sampler_num);
+ check_error();
+ glBindTexture(GL_TEXTURE_2D, tex_horiz.get_texnum());
+ check_error();
+
+ tex_horiz.update(horiz_weights.src_bilinear_samples, horiz_weights.dst_samples, GL_RG32F, GL_RG, GL_FLOAT, horiz_weights.bilinear_weights_fp32.get());
+
+ glActiveTexture(GL_TEXTURE0 + *sampler_num + 1);
+ check_error();
+ glBindTexture(GL_TEXTURE_2D, tex_vert.get_texnum());
+ check_error();
+
+ // Storing the vertical weights as fp16 instead of fp32 saves a few
+ // percent on NVIDIA, and it doesn't seem to hurt quality any.
+ // (The horizontal weights is a different story, since the offsets
+ // can get large and are fairly accuracy-sensitive. Also, they are
+ // loaded only once per workgroup, at the very beginning.)
+ tex_vert.update(vert_weights.src_bilinear_samples, vert_weights.dst_samples, GL_R16F, GL_RED, GL_HALF_FLOAT, vert_weights.raw_weights.get());
+
+ // Figure out how many output samples each compute shader block is going to output.
+ int usable_input_samples_per_block = 128 - 2 * uniform_vertical_int_radius;
+ int output_samples_per_block = int(floor(usable_input_samples_per_block * vertical_scaling_factor));
+ if (output_samples_per_block < 1) {
+ output_samples_per_block = 1;
+ }
+ uniform_output_samples_per_block = output_samples_per_block;
+}
+
+namespace {
+
+ScalingWeights calculate_scaling_weights(unsigned src_size, unsigned dst_size, float zoom, float offset)
+{
+ // Only needed if run from outside ResampleEffect.
+ call_once(lanczos_table_init_done, init_lanczos_table);