X-Git-Url: https://git.sesse.net/?p=nageru;a=blobdiff_plain;f=chroma_subsampler.cpp;h=96adef1eedb9d360aea76136a17abafa760e8f29;hp=a9e535592634c53d9931e964fffecb6f9b85cf88;hb=703e00da89118df9be0354dda621bed023e6030e;hpb=336009fd7baf47b4ad71adf8d7ead8a526045788 diff --git a/chroma_subsampler.cpp b/chroma_subsampler.cpp index a9e5355..96adef1 100644 --- a/chroma_subsampler.cpp +++ b/chroma_subsampler.cpp @@ -1,4 +1,5 @@ #include "chroma_subsampler.h" +#include "v210_converter.h" #include @@ -101,6 +102,10 @@ ChromaSubsampler::ChromaSubsampler(ResourcePool *resource_pool) "} \n"; cbcr_program_num = resource_pool->compile_glsl_program(cbcr_vert_shader, cbcr_frag_shader, frag_shader_outputs); check_error(); + cbcr_chroma_offset_0_location = get_uniform_location(cbcr_program_num, "foo", "chroma_offset_0"); + check_error(); + cbcr_chroma_offset_1_location = get_uniform_location(cbcr_program_num, "foo", "chroma_offset_1"); + check_error(); cbcr_texture_sampler_uniform = glGetUniformLocation(cbcr_program_num, "cbcr_tex"); check_error(); @@ -152,6 +157,14 @@ ChromaSubsampler::ChromaSubsampler(ResourcePool *resource_pool) uyvy_program_num = resource_pool->compile_glsl_program(uyvy_vert_shader, uyvy_frag_shader, frag_shader_outputs); check_error(); + uyvy_luma_offset_0_location = get_uniform_location(uyvy_program_num, "foo", "luma_offset_0"); + check_error(); + uyvy_luma_offset_1_location = get_uniform_location(uyvy_program_num, "foo", "luma_offset_1"); + check_error(); + uyvy_chroma_offset_0_location = get_uniform_location(uyvy_program_num, "foo", "chroma_offset_0"); + check_error(); + uyvy_chroma_offset_1_location = get_uniform_location(uyvy_program_num, "foo", "chroma_offset_1"); + check_error(); uyvy_y_texture_sampler_uniform = glGetUniformLocation(uyvy_program_num, "y_tex"); check_error(); @@ -170,6 +183,78 @@ ChromaSubsampler::ChromaSubsampler(ResourcePool *resource_pool) }; vbo = generate_vbo(2, GL_FLOAT, sizeof(vertices), vertices); check_error(); + + // v210 compute shader. + if (v210Converter::has_hardware_support()) { + string v210_shader_src = R"(#version 150 +#extension GL_ARB_compute_shader : enable +#extension GL_ARB_shader_image_load_store : enable +layout(local_size_x=2, local_size_y=16) in; +layout(r16) uniform restrict readonly image2D in_y; +uniform sampler2D in_cbcr; // Of type RG16. +layout(rgb10_a2) uniform restrict writeonly image2D outbuf; +uniform float inv_width, inv_height; + +void main() +{ + int xb = int(gl_GlobalInvocationID.x); // X block number. + int y = int(gl_GlobalInvocationID.y); // Y (actual line). + float yf = (gl_GlobalInvocationID.y + 0.5f) * inv_height; // Y float coordinate. + + // Load and scale CbCr values, sampling in-between the texels to get + // to (left/4 + center/2 + right/4). + vec2 pix_cbcr[3]; + for (int i = 0; i < 3; ++i) { + vec2 a = texture(in_cbcr, vec2((xb * 6 + i * 2) * inv_width, yf)).xy; + vec2 b = texture(in_cbcr, vec2((xb * 6 + i * 2 + 1) * inv_width, yf)).xy; + pix_cbcr[i] = (a + b) * (0.5 * 65535.0 / 1023.0); + } + + // Load and scale the Y values. Note that we use integer coordinates here, + // so we don't need to offset by 0.5. + float pix_y[6]; + for (int i = 0; i < 6; ++i) { + pix_y[i] = imageLoad(in_y, ivec2(xb * 6 + i, y)).x * (65535.0 / 1023.0); + } + + imageStore(outbuf, ivec2(xb * 4 + 0, y), vec4(pix_cbcr[0].x, pix_y[0], pix_cbcr[0].y, 1.0)); + imageStore(outbuf, ivec2(xb * 4 + 1, y), vec4(pix_y[1], pix_cbcr[1].x, pix_y[2], 1.0)); + imageStore(outbuf, ivec2(xb * 4 + 2, y), vec4(pix_cbcr[1].y, pix_y[3], pix_cbcr[2].x, 1.0)); + imageStore(outbuf, ivec2(xb * 4 + 3, y), vec4(pix_y[4], pix_cbcr[2].y, pix_y[5], 1.0)); +} +)"; + GLuint shader_num = movit::compile_shader(v210_shader_src, GL_COMPUTE_SHADER); + check_error(); + v210_program_num = glCreateProgram(); + check_error(); + glAttachShader(v210_program_num, shader_num); + check_error(); + glLinkProgram(v210_program_num); + check_error(); + + GLint success; + glGetProgramiv(v210_program_num, GL_LINK_STATUS, &success); + check_error(); + if (success == GL_FALSE) { + GLchar error_log[1024] = {0}; + glGetProgramInfoLog(v210_program_num, 1024, nullptr, error_log); + fprintf(stderr, "Error linking program: %s\n", error_log); + exit(1); + } + + v210_in_y_pos = glGetUniformLocation(v210_program_num, "in_y"); + check_error(); + v210_in_cbcr_pos = glGetUniformLocation(v210_program_num, "in_cbcr"); + check_error(); + v210_outbuf_pos = glGetUniformLocation(v210_program_num, "outbuf"); + check_error(); + v210_inv_width_pos = glGetUniformLocation(v210_program_num, "inv_width"); + check_error(); + v210_inv_height_pos = glGetUniformLocation(v210_program_num, "inv_height"); + check_error(); + } else { + v210_program_num = 0; + } } ChromaSubsampler::~ChromaSubsampler() @@ -180,14 +265,15 @@ ChromaSubsampler::~ChromaSubsampler() check_error(); glDeleteBuffers(1, &vbo); check_error(); + if (v210_program_num != 0) { + glDeleteProgram(v210_program_num); + check_error(); + } } void ChromaSubsampler::subsample_chroma(GLuint cbcr_tex, unsigned width, unsigned height, GLuint dst_tex, GLuint dst2_tex) { - GLuint vao; - glGenVertexArrays(1, &vao); - check_error(); - + GLuint vao = resource_pool->create_vec2_vao({ cbcr_position_attribute_index, cbcr_texcoord_attribute_index }, vbo); glBindVertexArray(vao); check_error(); @@ -216,45 +302,30 @@ void ChromaSubsampler::subsample_chroma(GLuint cbcr_tex, unsigned width, unsigne glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); check_error(); - float chroma_offset_0[] = { -1.0f / width, 0.0f }; - float chroma_offset_1[] = { -0.0f / width, 0.0f }; - set_uniform_vec2(cbcr_program_num, "foo", "chroma_offset_0", chroma_offset_0); - set_uniform_vec2(cbcr_program_num, "foo", "chroma_offset_1", chroma_offset_1); - - glUniform1i(cbcr_texture_sampler_uniform, 0); - - glBindBuffer(GL_ARRAY_BUFFER, vbo); + glUniform2f(cbcr_chroma_offset_0_location, -1.0f / width, 0.0f); check_error(); - - for (GLint attr_index : { cbcr_position_attribute_index, cbcr_texcoord_attribute_index }) { - glEnableVertexAttribArray(attr_index); - check_error(); - glVertexAttribPointer(attr_index, 2, GL_FLOAT, GL_FALSE, 0, BUFFER_OFFSET(0)); - check_error(); - } + glUniform2f(cbcr_chroma_offset_1_location, -0.0f / width, 0.0f); + check_error(); + glUniform1i(cbcr_texture_sampler_uniform, 0); glDrawArrays(GL_TRIANGLES, 0, 3); check_error(); - for (GLint attr_index : { cbcr_position_attribute_index, cbcr_texcoord_attribute_index }) { - glDisableVertexAttribArray(attr_index); - check_error(); - } - glUseProgram(0); check_error(); glBindFramebuffer(GL_FRAMEBUFFER, 0); check_error(); + glBindVertexArray(0); + check_error(); resource_pool->release_fbo(fbo); - glDeleteVertexArrays(1, &vao); - check_error(); + resource_pool->release_vec2_vao(vao); } void ChromaSubsampler::create_uyvy(GLuint y_tex, GLuint cbcr_tex, unsigned width, unsigned height, GLuint dst_tex) { - GLuint vao; - glGenVertexArrays(1, &vao); + GLuint vao = resource_pool->create_vec2_vao({ cbcr_position_attribute_index, cbcr_texcoord_attribute_index }, vbo); + glBindVertexArray(vao); check_error(); glBindVertexArray(vao); @@ -295,42 +366,87 @@ void ChromaSubsampler::create_uyvy(GLuint y_tex, GLuint cbcr_tex, unsigned width glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); check_error(); - float y_offset_0[] = { -0.5f / width, 0.0f }; - float y_offset_1[] = { 0.5f / width, 0.0f }; - float cbcr_offset0[] = { -1.0f / width, 0.0f }; - float cbcr_offset1[] = { -0.0f / width, 0.0f }; - set_uniform_vec2(uyvy_program_num, "foo", "luma_offset_0", y_offset_0); - set_uniform_vec2(uyvy_program_num, "foo", "luma_offset_1", y_offset_1); - set_uniform_vec2(uyvy_program_num, "foo", "chroma_offset_0", cbcr_offset0); - set_uniform_vec2(uyvy_program_num, "foo", "chroma_offset_1", cbcr_offset1); + glUniform2f(uyvy_luma_offset_0_location, -0.5f / width, 0.0f); + check_error(); + glUniform2f(uyvy_luma_offset_1_location, 0.5f / width, 0.0f); + check_error(); + glUniform2f(uyvy_chroma_offset_0_location, -1.0f / width, 0.0f); + check_error(); + glUniform2f(uyvy_chroma_offset_1_location, -0.0f / width, 0.0f); + check_error(); glBindBuffer(GL_ARRAY_BUFFER, vbo); check_error(); - for (GLint attr_index : { uyvy_position_attribute_index, uyvy_texcoord_attribute_index }) { - if (attr_index == -1) continue; - glEnableVertexAttribArray(attr_index); - check_error(); - glVertexAttribPointer(attr_index, 2, GL_FLOAT, GL_FALSE, 0, BUFFER_OFFSET(0)); - check_error(); - } - glDrawArrays(GL_TRIANGLES, 0, 3); check_error(); - for (GLint attr_index : { uyvy_position_attribute_index, uyvy_texcoord_attribute_index }) { - if (attr_index == -1) continue; - glDisableVertexAttribArray(attr_index); - check_error(); - } - glActiveTexture(GL_TEXTURE0); check_error(); glUseProgram(0); check_error(); glBindFramebuffer(GL_FRAMEBUFFER, 0); check_error(); + glBindVertexArray(0); + check_error(); resource_pool->release_fbo(fbo); - glDeleteVertexArrays(1, &vao); + resource_pool->release_vec2_vao(vao); +} + +void ChromaSubsampler::create_v210(GLuint y_tex, GLuint cbcr_tex, unsigned width, unsigned height, GLuint dst_tex) +{ + assert(v210_program_num != 0); + + glUseProgram(v210_program_num); + check_error(); + + glUniform1i(v210_in_y_pos, 0); + check_error(); + glUniform1i(v210_in_cbcr_pos, 1); + check_error(); + glUniform1i(v210_outbuf_pos, 2); + check_error(); + glUniform1f(v210_inv_width_pos, 1.0 / width); + check_error(); + glUniform1f(v210_inv_height_pos, 1.0 / height); + check_error(); + + glActiveTexture(GL_TEXTURE0); + check_error(); + glBindTexture(GL_TEXTURE_2D, y_tex); // We don't actually need to bind it, but we need to set the state. + check_error(); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + check_error(); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + check_error(); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + check_error(); + glBindImageTexture(0, y_tex, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R16); // This is the real bind. + check_error(); + + glActiveTexture(GL_TEXTURE1); + check_error(); + glBindTexture(GL_TEXTURE_2D, cbcr_tex); + check_error(); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + check_error(); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + check_error(); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + check_error(); + + glBindImageTexture(2, dst_tex, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGB10_A2); + check_error(); + + // Actually run the shader. We use workgroups of size 2x16 threadst , and each thread + // processes 6x1 input pixels, so round up to number of 12x16 pixel blocks. + glDispatchCompute((width + 11) / 12, (height + 15) / 16, 1); + + glBindTexture(GL_TEXTURE_2D, 0); + check_error(); + glActiveTexture(GL_TEXTURE0); + check_error(); + glUseProgram(0); + check_error(); }