]> git.sesse.net Git - nageru/blobdiff - chroma_subsampler.cpp
Release Nageru 1.7.2.
[nageru] / chroma_subsampler.cpp
index a9e535592634c53d9931e964fffecb6f9b85cf88..96adef1eedb9d360aea76136a17abafa760e8f29 100644 (file)
@@ -1,4 +1,5 @@
 #include "chroma_subsampler.h"
+#include "v210_converter.h"
 
 #include <vector>
 
@@ -101,6 +102,10 @@ ChromaSubsampler::ChromaSubsampler(ResourcePool *resource_pool)
                "} \n";
        cbcr_program_num = resource_pool->compile_glsl_program(cbcr_vert_shader, cbcr_frag_shader, frag_shader_outputs);
        check_error();
+       cbcr_chroma_offset_0_location = get_uniform_location(cbcr_program_num, "foo", "chroma_offset_0");
+       check_error();
+       cbcr_chroma_offset_1_location = get_uniform_location(cbcr_program_num, "foo", "chroma_offset_1");
+       check_error();
 
        cbcr_texture_sampler_uniform = glGetUniformLocation(cbcr_program_num, "cbcr_tex");
        check_error();
@@ -152,6 +157,14 @@ ChromaSubsampler::ChromaSubsampler(ResourcePool *resource_pool)
 
        uyvy_program_num = resource_pool->compile_glsl_program(uyvy_vert_shader, uyvy_frag_shader, frag_shader_outputs);
        check_error();
+       uyvy_luma_offset_0_location = get_uniform_location(uyvy_program_num, "foo", "luma_offset_0");
+       check_error();
+       uyvy_luma_offset_1_location = get_uniform_location(uyvy_program_num, "foo", "luma_offset_1");
+       check_error();
+       uyvy_chroma_offset_0_location = get_uniform_location(uyvy_program_num, "foo", "chroma_offset_0");
+       check_error();
+       uyvy_chroma_offset_1_location = get_uniform_location(uyvy_program_num, "foo", "chroma_offset_1");
+       check_error();
 
        uyvy_y_texture_sampler_uniform = glGetUniformLocation(uyvy_program_num, "y_tex");
        check_error();
@@ -170,6 +183,78 @@ ChromaSubsampler::ChromaSubsampler(ResourcePool *resource_pool)
        };
        vbo = generate_vbo(2, GL_FLOAT, sizeof(vertices), vertices);
        check_error();
+
+       // v210 compute shader.
+       if (v210Converter::has_hardware_support()) {
+               string v210_shader_src = R"(#version 150
+#extension GL_ARB_compute_shader : enable
+#extension GL_ARB_shader_image_load_store : enable
+layout(local_size_x=2, local_size_y=16) in;
+layout(r16) uniform restrict readonly image2D in_y;
+uniform sampler2D in_cbcr;  // Of type RG16.
+layout(rgb10_a2) uniform restrict writeonly image2D outbuf;
+uniform float inv_width, inv_height;
+
+void main()
+{
+       int xb = int(gl_GlobalInvocationID.x);  // X block number.
+       int y = int(gl_GlobalInvocationID.y);  // Y (actual line).
+       float yf = (gl_GlobalInvocationID.y + 0.5f) * inv_height;  // Y float coordinate.
+
+       // Load and scale CbCr values, sampling in-between the texels to get
+       // to (left/4 + center/2 + right/4).
+       vec2 pix_cbcr[3];
+       for (int i = 0; i < 3; ++i) {
+               vec2 a = texture(in_cbcr, vec2((xb * 6 + i * 2) * inv_width, yf)).xy;
+               vec2 b = texture(in_cbcr, vec2((xb * 6 + i * 2 + 1) * inv_width, yf)).xy;
+               pix_cbcr[i] = (a + b) * (0.5 * 65535.0 / 1023.0);
+       }
+
+       // Load and scale the Y values. Note that we use integer coordinates here,
+       // so we don't need to offset by 0.5.
+       float pix_y[6];
+       for (int i = 0; i < 6; ++i) {
+               pix_y[i] = imageLoad(in_y, ivec2(xb * 6 + i, y)).x * (65535.0 / 1023.0);
+       }
+
+       imageStore(outbuf, ivec2(xb * 4 + 0, y), vec4(pix_cbcr[0].x, pix_y[0],      pix_cbcr[0].y, 1.0));
+       imageStore(outbuf, ivec2(xb * 4 + 1, y), vec4(pix_y[1],      pix_cbcr[1].x, pix_y[2],      1.0));
+       imageStore(outbuf, ivec2(xb * 4 + 2, y), vec4(pix_cbcr[1].y, pix_y[3],      pix_cbcr[2].x, 1.0));
+       imageStore(outbuf, ivec2(xb * 4 + 3, y), vec4(pix_y[4],      pix_cbcr[2].y, pix_y[5],      1.0));
+}
+)";
+               GLuint shader_num = movit::compile_shader(v210_shader_src, GL_COMPUTE_SHADER);
+               check_error();
+               v210_program_num = glCreateProgram();
+               check_error();
+               glAttachShader(v210_program_num, shader_num);
+               check_error();
+               glLinkProgram(v210_program_num);
+               check_error();
+
+               GLint success;
+               glGetProgramiv(v210_program_num, GL_LINK_STATUS, &success);
+               check_error();
+               if (success == GL_FALSE) {
+                       GLchar error_log[1024] = {0};
+                       glGetProgramInfoLog(v210_program_num, 1024, nullptr, error_log);
+                       fprintf(stderr, "Error linking program: %s\n", error_log);
+                       exit(1);
+               }
+
+               v210_in_y_pos = glGetUniformLocation(v210_program_num, "in_y");
+               check_error();
+               v210_in_cbcr_pos = glGetUniformLocation(v210_program_num, "in_cbcr");
+               check_error();
+               v210_outbuf_pos = glGetUniformLocation(v210_program_num, "outbuf");
+               check_error();
+               v210_inv_width_pos = glGetUniformLocation(v210_program_num, "inv_width");
+               check_error();
+               v210_inv_height_pos = glGetUniformLocation(v210_program_num, "inv_height");
+               check_error();
+       } else {
+               v210_program_num = 0;
+       }
 }
 
 ChromaSubsampler::~ChromaSubsampler()
@@ -180,14 +265,15 @@ ChromaSubsampler::~ChromaSubsampler()
        check_error();
        glDeleteBuffers(1, &vbo);
        check_error();
+       if (v210_program_num != 0) {
+               glDeleteProgram(v210_program_num);
+               check_error();
+       }
 }
 
 void ChromaSubsampler::subsample_chroma(GLuint cbcr_tex, unsigned width, unsigned height, GLuint dst_tex, GLuint dst2_tex)
 {
-       GLuint vao;
-       glGenVertexArrays(1, &vao);
-       check_error();
-
+       GLuint vao = resource_pool->create_vec2_vao({ cbcr_position_attribute_index, cbcr_texcoord_attribute_index }, vbo);
        glBindVertexArray(vao);
        check_error();
 
@@ -216,45 +302,30 @@ void ChromaSubsampler::subsample_chroma(GLuint cbcr_tex, unsigned width, unsigne
        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
        check_error();
 
-       float chroma_offset_0[] = { -1.0f / width, 0.0f };
-       float chroma_offset_1[] = { -0.0f / width, 0.0f };
-       set_uniform_vec2(cbcr_program_num, "foo", "chroma_offset_0", chroma_offset_0);
-       set_uniform_vec2(cbcr_program_num, "foo", "chroma_offset_1", chroma_offset_1);
-
-       glUniform1i(cbcr_texture_sampler_uniform, 0);
-
-       glBindBuffer(GL_ARRAY_BUFFER, vbo);
+       glUniform2f(cbcr_chroma_offset_0_location, -1.0f / width, 0.0f);
        check_error();
-
-       for (GLint attr_index : { cbcr_position_attribute_index, cbcr_texcoord_attribute_index }) {
-               glEnableVertexAttribArray(attr_index);
-               check_error();
-               glVertexAttribPointer(attr_index, 2, GL_FLOAT, GL_FALSE, 0, BUFFER_OFFSET(0));
-               check_error();
-       }
+       glUniform2f(cbcr_chroma_offset_1_location, -0.0f / width, 0.0f);
+       check_error();
+       glUniform1i(cbcr_texture_sampler_uniform, 0);
 
        glDrawArrays(GL_TRIANGLES, 0, 3);
        check_error();
 
-       for (GLint attr_index : { cbcr_position_attribute_index, cbcr_texcoord_attribute_index }) {
-               glDisableVertexAttribArray(attr_index);
-               check_error();
-       }
-
        glUseProgram(0);
        check_error();
        glBindFramebuffer(GL_FRAMEBUFFER, 0);
        check_error();
+       glBindVertexArray(0);
+       check_error();
 
        resource_pool->release_fbo(fbo);
-       glDeleteVertexArrays(1, &vao);
-       check_error();
+       resource_pool->release_vec2_vao(vao);
 }
 
 void ChromaSubsampler::create_uyvy(GLuint y_tex, GLuint cbcr_tex, unsigned width, unsigned height, GLuint dst_tex)
 {
-       GLuint vao;
-       glGenVertexArrays(1, &vao);
+       GLuint vao = resource_pool->create_vec2_vao({ cbcr_position_attribute_index, cbcr_texcoord_attribute_index }, vbo);
+       glBindVertexArray(vao);
        check_error();
 
        glBindVertexArray(vao);
@@ -295,42 +366,87 @@ void ChromaSubsampler::create_uyvy(GLuint y_tex, GLuint cbcr_tex, unsigned width
        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
        check_error();
 
-       float y_offset_0[] = { -0.5f / width, 0.0f };
-       float y_offset_1[] = {  0.5f / width, 0.0f };
-       float cbcr_offset0[] = { -1.0f / width, 0.0f };
-       float cbcr_offset1[] = { -0.0f / width, 0.0f };
-       set_uniform_vec2(uyvy_program_num, "foo", "luma_offset_0", y_offset_0);
-       set_uniform_vec2(uyvy_program_num, "foo", "luma_offset_1", y_offset_1);
-       set_uniform_vec2(uyvy_program_num, "foo", "chroma_offset_0", cbcr_offset0);
-       set_uniform_vec2(uyvy_program_num, "foo", "chroma_offset_1", cbcr_offset1);
+       glUniform2f(uyvy_luma_offset_0_location, -0.5f / width, 0.0f);
+       check_error();
+       glUniform2f(uyvy_luma_offset_1_location,  0.5f / width, 0.0f);
+       check_error();
+       glUniform2f(uyvy_chroma_offset_0_location, -1.0f / width, 0.0f);
+       check_error();
+       glUniform2f(uyvy_chroma_offset_1_location, -0.0f / width, 0.0f);
+       check_error();
 
        glBindBuffer(GL_ARRAY_BUFFER, vbo);
        check_error();
 
-       for (GLint attr_index : { uyvy_position_attribute_index, uyvy_texcoord_attribute_index }) {
-               if (attr_index == -1) continue;
-               glEnableVertexAttribArray(attr_index);
-               check_error();
-               glVertexAttribPointer(attr_index, 2, GL_FLOAT, GL_FALSE, 0, BUFFER_OFFSET(0));
-               check_error();
-       }
-
        glDrawArrays(GL_TRIANGLES, 0, 3);
        check_error();
 
-       for (GLint attr_index : { uyvy_position_attribute_index, uyvy_texcoord_attribute_index }) {
-               if (attr_index == -1) continue;
-               glDisableVertexAttribArray(attr_index);
-               check_error();
-       }
-
        glActiveTexture(GL_TEXTURE0);
        check_error();
        glUseProgram(0);
        check_error();
        glBindFramebuffer(GL_FRAMEBUFFER, 0);
        check_error();
+       glBindVertexArray(0);
+       check_error();
 
        resource_pool->release_fbo(fbo);
-       glDeleteVertexArrays(1, &vao);
+       resource_pool->release_vec2_vao(vao);
+}
+
+void ChromaSubsampler::create_v210(GLuint y_tex, GLuint cbcr_tex, unsigned width, unsigned height, GLuint dst_tex)
+{
+       assert(v210_program_num != 0);
+
+       glUseProgram(v210_program_num);
+       check_error();
+
+       glUniform1i(v210_in_y_pos, 0);
+       check_error();
+       glUniform1i(v210_in_cbcr_pos, 1);
+       check_error();
+       glUniform1i(v210_outbuf_pos, 2);
+       check_error();
+       glUniform1f(v210_inv_width_pos, 1.0 / width);
+       check_error();
+       glUniform1f(v210_inv_height_pos, 1.0 / height);
+       check_error();
+
+       glActiveTexture(GL_TEXTURE0);
+       check_error();
+       glBindTexture(GL_TEXTURE_2D, y_tex);  // We don't actually need to bind it, but we need to set the state.
+       check_error();
+       glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+       check_error();
+       glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
+       check_error();
+       glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
+       check_error();
+       glBindImageTexture(0, y_tex, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R16);  // This is the real bind.
+       check_error();
+
+       glActiveTexture(GL_TEXTURE1);
+       check_error();
+       glBindTexture(GL_TEXTURE_2D, cbcr_tex);
+       check_error();
+       glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+       check_error();
+       glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
+       check_error();
+       glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
+       check_error();
+
+       glBindImageTexture(2, dst_tex, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGB10_A2);
+       check_error();
+
+       // Actually run the shader. We use workgroups of size 2x16 threadst , and each thread
+       // processes 6x1 input pixels, so round up to number of 12x16 pixel blocks.
+       glDispatchCompute((width + 11) / 12, (height + 15) / 16, 1);
+
+       glBindTexture(GL_TEXTURE_2D, 0);
+       check_error();
+       glActiveTexture(GL_TEXTURE0);
+       check_error();
+       glUseProgram(0);
+       check_error();
 }