From: Steinar H. Gunderson Date: Mon, 3 Dec 2018 21:48:21 +0000 (+0100) Subject: Move most Nageru embedded shaders to the read_file() system. X-Git-Tag: 1.8.0~65 X-Git-Url: https://git.sesse.net/?p=nageru;a=commitdiff_plain;h=ebd973909f6985622c1e77753291f84919f5fdf5 Move most Nageru embedded shaders to the read_file() system. --- diff --git a/nageru/cbcr_subsample.frag b/nageru/cbcr_subsample.frag new file mode 100644 index 0000000..3714f99 --- /dev/null +++ b/nageru/cbcr_subsample.frag @@ -0,0 +1,9 @@ +#version 130 + +in vec2 tc0, tc1; +uniform sampler2D cbcr_tex; +out vec4 FragColor, FragColor2; +void main() { + FragColor = 0.5 * (texture(cbcr_tex, tc0) + texture(cbcr_tex, tc1)); + FragColor2 = FragColor; +} diff --git a/nageru/cbcr_subsample.vert b/nageru/cbcr_subsample.vert new file mode 100644 index 0000000..982cb87 --- /dev/null +++ b/nageru/cbcr_subsample.vert @@ -0,0 +1,21 @@ +#version 130 + +in vec2 position; +in vec2 texcoord; +out vec2 tc0, tc1; +uniform vec2 foo_chroma_offset_0; +uniform vec2 foo_chroma_offset_1; + +void main() +{ + // The result of glOrtho(0.0, 1.0, 0.0, 1.0, 0.0, 1.0) is: + // + // 2.000 0.000 0.000 -1.000 + // 0.000 2.000 0.000 -1.000 + // 0.000 0.000 -2.000 -1.000 + // 0.000 0.000 0.000 1.000 + gl_Position = vec4(2.0 * position.x - 1.0, 2.0 * position.y - 1.0, -1.0, 1.0); + vec2 flipped_tc = texcoord; + tc0 = flipped_tc + foo_chroma_offset_0; + tc1 = flipped_tc + foo_chroma_offset_1; +}; diff --git a/nageru/chroma_subsampler.cpp b/nageru/chroma_subsampler.cpp index 96adef1..14cb4d2 100644 --- a/nageru/chroma_subsampler.cpp +++ b/nageru/chroma_subsampler.cpp @@ -7,6 +7,9 @@ #include #include +#include "embedded_files.h" +#include "shared/read_file.h" + using namespace movit; using namespace std; @@ -69,37 +72,8 @@ ChromaSubsampler::ChromaSubsampler(ResourcePool *resource_pool) // See also http://www.poynton.com/PDFs/Merging_RGB_and_422.pdf, pages 6–7. // Cb/Cr shader. - string cbcr_vert_shader = - "#version 130 \n" - " \n" - "in vec2 position; \n" - "in vec2 texcoord; \n" - "out vec2 tc0, tc1; \n" - "uniform vec2 foo_chroma_offset_0; \n" - "uniform vec2 foo_chroma_offset_1; \n" - " \n" - "void main() \n" - "{ \n" - " // The result of glOrtho(0.0, 1.0, 0.0, 1.0, 0.0, 1.0) is: \n" - " // \n" - " // 2.000 0.000 0.000 -1.000 \n" - " // 0.000 2.000 0.000 -1.000 \n" - " // 0.000 0.000 -2.000 -1.000 \n" - " // 0.000 0.000 0.000 1.000 \n" - " gl_Position = vec4(2.0 * position.x - 1.0, 2.0 * position.y - 1.0, -1.0, 1.0); \n" - " vec2 flipped_tc = texcoord; \n" - " tc0 = flipped_tc + foo_chroma_offset_0; \n" - " tc1 = flipped_tc + foo_chroma_offset_1; \n" - "} \n"; - string cbcr_frag_shader = - "#version 130 \n" - "in vec2 tc0, tc1; \n" - "uniform sampler2D cbcr_tex; \n" - "out vec4 FragColor, FragColor2; \n" - "void main() { \n" - " FragColor = 0.5 * (texture(cbcr_tex, tc0) + texture(cbcr_tex, tc1)); \n" - " FragColor2 = FragColor; \n" - "} \n"; + string cbcr_vert_shader = read_file("cbcr_subsample.vert", _binary_cbcr_subsample_vert_data, _binary_cbcr_subsample_vert_size); + string cbcr_frag_shader = read_file("cbcr_subsample.frag", _binary_cbcr_subsample_frag_data, _binary_cbcr_subsample_frag_size); cbcr_program_num = resource_pool->compile_glsl_program(cbcr_vert_shader, cbcr_frag_shader, frag_shader_outputs); check_error(); cbcr_chroma_offset_0_location = get_uniform_location(cbcr_program_num, "foo", "chroma_offset_0"); @@ -115,45 +89,8 @@ ChromaSubsampler::ChromaSubsampler(ResourcePool *resource_pool) check_error(); // Same, for UYVY conversion. - string uyvy_vert_shader = - "#version 130 \n" - " \n" - "in vec2 position; \n" - "in vec2 texcoord; \n" - "out vec2 y_tc0, y_tc1, cbcr_tc0, cbcr_tc1; \n" - "uniform vec2 foo_luma_offset_0; \n" - "uniform vec2 foo_luma_offset_1; \n" - "uniform vec2 foo_chroma_offset_0; \n" - "uniform vec2 foo_chroma_offset_1; \n" - " \n" - "void main() \n" - "{ \n" - " // The result of glOrtho(0.0, 1.0, 0.0, 1.0, 0.0, 1.0) is: \n" - " // \n" - " // 2.000 0.000 0.000 -1.000 \n" - " // 0.000 2.000 0.000 -1.000 \n" - " // 0.000 0.000 -2.000 -1.000 \n" - " // 0.000 0.000 0.000 1.000 \n" - " gl_Position = vec4(2.0 * position.x - 1.0, 2.0 * position.y - 1.0, -1.0, 1.0); \n" - " vec2 flipped_tc = texcoord; \n" - " y_tc0 = flipped_tc + foo_luma_offset_0; \n" - " y_tc1 = flipped_tc + foo_luma_offset_1; \n" - " cbcr_tc0 = flipped_tc + foo_chroma_offset_0; \n" - " cbcr_tc1 = flipped_tc + foo_chroma_offset_1; \n" - "} \n"; - string uyvy_frag_shader = - "#version 130 \n" - "in vec2 y_tc0, y_tc1, cbcr_tc0, cbcr_tc1; \n" - "uniform sampler2D y_tex, cbcr_tex; \n" - "out vec4 FragColor; \n" - "void main() { \n" - " float y0 = texture(y_tex, y_tc0).r; \n" - " float y1 = texture(y_tex, y_tc1).r; \n" - " vec2 cbcr0 = texture(cbcr_tex, cbcr_tc0).rg; \n" - " vec2 cbcr1 = texture(cbcr_tex, cbcr_tc1).rg; \n" - " vec2 cbcr = 0.5 * (cbcr0 + cbcr1); \n" - " FragColor = vec4(cbcr.g, y0, cbcr.r, y1); \n" - "} \n"; + string uyvy_vert_shader = read_file("uyvy_subsample.vert", _binary_uyvy_subsample_vert_data, _binary_uyvy_subsample_vert_size); + string uyvy_frag_shader = read_file("uyvy_subsample.frag", _binary_uyvy_subsample_frag_data, _binary_uyvy_subsample_frag_size); uyvy_program_num = resource_pool->compile_glsl_program(uyvy_vert_shader, uyvy_frag_shader, frag_shader_outputs); check_error(); @@ -186,43 +123,7 @@ ChromaSubsampler::ChromaSubsampler(ResourcePool *resource_pool) // v210 compute shader. if (v210Converter::has_hardware_support()) { - string v210_shader_src = R"(#version 150 -#extension GL_ARB_compute_shader : enable -#extension GL_ARB_shader_image_load_store : enable -layout(local_size_x=2, local_size_y=16) in; -layout(r16) uniform restrict readonly image2D in_y; -uniform sampler2D in_cbcr; // Of type RG16. -layout(rgb10_a2) uniform restrict writeonly image2D outbuf; -uniform float inv_width, inv_height; - -void main() -{ - int xb = int(gl_GlobalInvocationID.x); // X block number. - int y = int(gl_GlobalInvocationID.y); // Y (actual line). - float yf = (gl_GlobalInvocationID.y + 0.5f) * inv_height; // Y float coordinate. - - // Load and scale CbCr values, sampling in-between the texels to get - // to (left/4 + center/2 + right/4). - vec2 pix_cbcr[3]; - for (int i = 0; i < 3; ++i) { - vec2 a = texture(in_cbcr, vec2((xb * 6 + i * 2) * inv_width, yf)).xy; - vec2 b = texture(in_cbcr, vec2((xb * 6 + i * 2 + 1) * inv_width, yf)).xy; - pix_cbcr[i] = (a + b) * (0.5 * 65535.0 / 1023.0); - } - - // Load and scale the Y values. Note that we use integer coordinates here, - // so we don't need to offset by 0.5. - float pix_y[6]; - for (int i = 0; i < 6; ++i) { - pix_y[i] = imageLoad(in_y, ivec2(xb * 6 + i, y)).x * (65535.0 / 1023.0); - } - - imageStore(outbuf, ivec2(xb * 4 + 0, y), vec4(pix_cbcr[0].x, pix_y[0], pix_cbcr[0].y, 1.0)); - imageStore(outbuf, ivec2(xb * 4 + 1, y), vec4(pix_y[1], pix_cbcr[1].x, pix_y[2], 1.0)); - imageStore(outbuf, ivec2(xb * 4 + 2, y), vec4(pix_cbcr[1].y, pix_y[3], pix_cbcr[2].x, 1.0)); - imageStore(outbuf, ivec2(xb * 4 + 3, y), vec4(pix_y[4], pix_cbcr[2].y, pix_y[5], 1.0)); -} -)"; + string v210_shader_src = read_file("v210_subsample.comp", _binary_v210_subsample_comp_data, _binary_v210_subsample_comp_size); GLuint shader_num = movit::compile_shader(v210_shader_src, GL_COMPUTE_SHADER); check_error(); v210_program_num = glCreateProgram(); diff --git a/nageru/embedded_files.h b/nageru/embedded_files.h new file mode 100644 index 0000000..cbc14e3 --- /dev/null +++ b/nageru/embedded_files.h @@ -0,0 +1,28 @@ +#ifndef _EMBEDDED_FILES_H +#define _EMBEDDED_FILES_H 1 + +// Files that are embedded into the binary as part of the build process. +// They are used as a backup if the files are not available on disk +// (which is typically the case if the program is installed, as opposed to +// being run during development). + +#include + +extern const unsigned char *_binary_cbcr_subsample_vert_data; +extern const size_t _binary_cbcr_subsample_vert_size; +extern const unsigned char *_binary_cbcr_subsample_frag_data; +extern const size_t _binary_cbcr_subsample_frag_size; +extern const unsigned char *_binary_uyvy_subsample_vert_data; +extern const size_t _binary_uyvy_subsample_vert_size; +extern const unsigned char *_binary_uyvy_subsample_frag_data; +extern const size_t _binary_uyvy_subsample_frag_size; +extern const unsigned char *_binary_v210_subsample_comp_data; +extern const size_t _binary_v210_subsample_comp_size; +extern const unsigned char *_binary_timecode_vert_data; +extern const size_t _binary_timecode_vert_size; +extern const unsigned char *_binary_timecode_frag_data; +extern const size_t _binary_timecode_frag_size; +extern const unsigned char *_binary_timecode_10bit_frag_data; +extern const size_t _binary_timecode_10bit_frag_size; + +#endif // !defined(_EMBEDDED_FILES_H) diff --git a/nageru/meson.build b/nageru/meson.build index 58a8feb..3c74a79 100644 --- a/nageru/meson.build +++ b/nageru/meson.build @@ -184,6 +184,15 @@ endif srcs += qt_files srcs += proto_generated +# Shaders needed at runtime. +shaders = ['cbcr_subsample.vert', 'cbcr_subsample.frag', 'uyvy_subsample.vert', 'uyvy_subsample.frag', 'v210_subsample.comp', 'timecode.vert', 'timecode.frag', 'timecode_10bit.frag'] +foreach shader : shaders + run_command('ln', '-s', join_paths(meson.current_source_dir(), shader), meson.current_build_dir()) +endforeach + +shader_srcs = bin2h_gen.process(shaders) +srcs += shader_srcs + # Everything except main.cpp. (We do this because if you specify a .cpp file in # both Nageru and Kaeru, it gets compiled twice. In the older Makefiles, Kaeru # depended on a smaller set of objects.) diff --git a/nageru/timecode.frag b/nageru/timecode.frag new file mode 100644 index 0000000..01333de --- /dev/null +++ b/nageru/timecode.frag @@ -0,0 +1,13 @@ +#version 130 + +in vec2 tc0; +uniform sampler2D tex; +out vec4 Y, CbCr, YCbCr; + +void main() { + vec4 gray = texture(tex, tc0);; + gray.r = gray.r * ((235.0-16.0)/255.0) + 16.0/255.0; // Limited-range Y'CbCr. + CbCr = vec4(128.0/255.0, 128.0/255.0, 0.0, 1.0);; + Y = gray.rrra; + YCbCr = vec4(Y.r, CbCr.r, CbCr.g, CbCr.a); +} diff --git a/nageru/timecode.vert b/nageru/timecode.vert new file mode 100644 index 0000000..ab4d42e --- /dev/null +++ b/nageru/timecode.vert @@ -0,0 +1,17 @@ +#version 130 + +in vec2 position; +in vec2 texcoord; +out vec2 tc0; + +void main() +{ + // The result of glOrtho(0.0, 1.0, 0.0, 1.0, 0.0, 1.0) is: + // + // 2.000 0.000 0.000 -1.000 + // 0.000 2.000 0.000 -1.000 + // 0.000 0.000 -2.000 -1.000 + // 0.000 0.000 0.000 1.000 + gl_Position = vec4(2.0 * position.x - 1.0, 2.0 * position.y - 1.0, -1.0, 1.0); + tc0 = texcoord; +} diff --git a/nageru/timecode_10bit.frag b/nageru/timecode_10bit.frag new file mode 100644 index 0000000..d9b7e69 --- /dev/null +++ b/nageru/timecode_10bit.frag @@ -0,0 +1,13 @@ +#version 130 + +in vec2 tc0; +uniform sampler2D tex; +out vec4 Y, CbCr, YCbCr; + +void main() { + vec4 gray = texture(tex, tc0);; + gray.r = gray.r * ((940.0-16.0)/65535.0) + 16.0/65535.0; // Limited-range Y'CbCr. + CbCr = vec4(512.0/65535.0, 512.0/65535.0, 0.0, 1.0);; + Y = gray.rrra; + YCbCr = vec4(Y.r, CbCr.r, CbCr.g, CbCr.a); +} diff --git a/nageru/timecode_renderer.cpp b/nageru/timecode_renderer.cpp index a923acd..2ada19a 100644 --- a/nageru/timecode_renderer.cpp +++ b/nageru/timecode_renderer.cpp @@ -14,6 +14,8 @@ #include #include "flags.h" +#include "embedded_files.h" +#include "shared/read_file.h" using namespace std; using namespace movit; @@ -21,44 +23,13 @@ using namespace movit; TimecodeRenderer::TimecodeRenderer(movit::ResourcePool *resource_pool, unsigned display_width, unsigned display_height) : resource_pool(resource_pool), display_width(display_width), display_height(display_height), height(28) { - string vert_shader = - "#version 130 \n" - " \n" - "in vec2 position; \n" - "in vec2 texcoord; \n" - "out vec2 tc0; \n" - " \n" - "void main() \n" - "{ \n" - " // The result of glOrtho(0.0, 1.0, 0.0, 1.0, 0.0, 1.0) is: \n" - " // \n" - " // 2.000 0.000 0.000 -1.000 \n" - " // 0.000 2.000 0.000 -1.000 \n" - " // 0.000 0.000 -2.000 -1.000 \n" - " // 0.000 0.000 0.000 1.000 \n" - " gl_Position = vec4(2.0 * position.x - 1.0, 2.0 * position.y - 1.0, -1.0, 1.0); \n" - " tc0 = texcoord; \n" - "} \n"; - string frag_shader = - "#version 130 \n" - "in vec2 tc0; \n" - "uniform sampler2D tex; \n" - "out vec4 Y, CbCr, YCbCr; \n" - "void main() { \n" - " vec4 gray = texture(tex, tc0); \n"; + string vert_shader = read_file("timecode.vert", _binary_timecode_vert_data, _binary_timecode_vert_size); + string frag_shader; if (global_flags.ten_bit_output) { - frag_shader += - " gray.r = gray.r * ((940.0-16.0)/65535.0) + 16.0/65535.0; \n" // Limited-range Y'CbCr. - " CbCr = vec4(512.0/65535.0, 512.0/65535.0, 0.0, 1.0); \n"; + frag_shader = read_file("timecode_10bit.frag", _binary_timecode_10bit_frag_data, _binary_timecode_10bit_frag_size); } else { - frag_shader += - " gray.r = gray.r * ((235.0-16.0)/255.0) + 16.0/255.0; \n" // Limited-range Y'CbCr. - " CbCr = vec4(128.0/255.0, 128.0/255.0, 0.0, 1.0); \n"; + frag_shader = read_file("timecode.frag", _binary_timecode_frag_data, _binary_timecode_frag_size); } - frag_shader += - " Y = gray.rrra; \n" - " YCbCr = vec4(Y.r, CbCr.r, CbCr.g, CbCr.a); \n" - "} \n"; vector frag_shader_outputs; program_num = resource_pool->compile_glsl_program(vert_shader, frag_shader, frag_shader_outputs); diff --git a/nageru/uyvy_subsample.frag b/nageru/uyvy_subsample.frag new file mode 100644 index 0000000..80e05b0 --- /dev/null +++ b/nageru/uyvy_subsample.frag @@ -0,0 +1,13 @@ +#version 130 + +in vec2 y_tc0, y_tc1, cbcr_tc0, cbcr_tc1; +uniform sampler2D y_tex, cbcr_tex; +out vec4 FragColor; +void main() { + float y0 = texture(y_tex, y_tc0).r; + float y1 = texture(y_tex, y_tc1).r; + vec2 cbcr0 = texture(cbcr_tex, cbcr_tc0).rg; + vec2 cbcr1 = texture(cbcr_tex, cbcr_tc1).rg; + vec2 cbcr = 0.5 * (cbcr0 + cbcr1); + FragColor = vec4(cbcr.g, y0, cbcr.r, y1); +}; diff --git a/nageru/uyvy_subsample.vert b/nageru/uyvy_subsample.vert new file mode 100644 index 0000000..04c1e82 --- /dev/null +++ b/nageru/uyvy_subsample.vert @@ -0,0 +1,25 @@ +#version 130 + +in vec2 position; +in vec2 texcoord; +out vec2 y_tc0, y_tc1, cbcr_tc0, cbcr_tc1; +uniform vec2 foo_luma_offset_0; +uniform vec2 foo_luma_offset_1; +uniform vec2 foo_chroma_offset_0; +uniform vec2 foo_chroma_offset_1; + +void main() +{ + // The result of glOrtho(0.0, 1.0, 0.0, 1.0, 0.0, 1.0) is: + // + // 2.000 0.000 0.000 -1.000 + // 0.000 2.000 0.000 -1.000 + // 0.000 0.000 -2.000 -1.000 + // 0.000 0.000 0.000 1.000 + gl_Position = vec4(2.0 * position.x - 1.0, 2.0 * position.y - 1.0, -1.0, 1.0); + vec2 flipped_tc = texcoord; + y_tc0 = flipped_tc + foo_luma_offset_0; + y_tc1 = flipped_tc + foo_luma_offset_1; + cbcr_tc0 = flipped_tc + foo_chroma_offset_0; + cbcr_tc1 = flipped_tc + foo_chroma_offset_1; +}; diff --git a/nageru/v210_subsample.comp b/nageru/v210_subsample.comp new file mode 100644 index 0000000..fccbbc9 --- /dev/null +++ b/nageru/v210_subsample.comp @@ -0,0 +1,37 @@ +#version 150 +#extension GL_ARB_compute_shader : enable +#extension GL_ARB_shader_image_load_store : enable + +layout(local_size_x=2, local_size_y=16) in; +layout(r16) uniform restrict readonly image2D in_y; +uniform sampler2D in_cbcr; // Of type RG16. +layout(rgb10_a2) uniform restrict writeonly image2D outbuf; +uniform float inv_width, inv_height; + +void main() +{ + int xb = int(gl_GlobalInvocationID.x); // X block number. + int y = int(gl_GlobalInvocationID.y); // Y (actual line). + float yf = (gl_GlobalInvocationID.y + 0.5f) * inv_height; // Y float coordinate. + + // Load and scale CbCr values, sampling in-between the texels to get + // to (left/4 + center/2 + right/4). + vec2 pix_cbcr[3]; + for (int i = 0; i < 3; ++i) { + vec2 a = texture(in_cbcr, vec2((xb * 6 + i * 2) * inv_width, yf)).xy; + vec2 b = texture(in_cbcr, vec2((xb * 6 + i * 2 + 1) * inv_width, yf)).xy; + pix_cbcr[i] = (a + b) * (0.5 * 65535.0 / 1023.0); + } + + // Load and scale the Y values. Note that we use integer coordinates here, + // so we don't need to offset by 0.5. + float pix_y[6]; + for (int i = 0; i < 6; ++i) { + pix_y[i] = imageLoad(in_y, ivec2(xb * 6 + i, y)).x * (65535.0 / 1023.0); + } + + imageStore(outbuf, ivec2(xb * 4 + 0, y), vec4(pix_cbcr[0].x, pix_y[0], pix_cbcr[0].y, 1.0)); + imageStore(outbuf, ivec2(xb * 4 + 1, y), vec4(pix_y[1], pix_cbcr[1].x, pix_y[2], 1.0)); + imageStore(outbuf, ivec2(xb * 4 + 2, y), vec4(pix_cbcr[1].y, pix_y[3], pix_cbcr[2].x, 1.0)); + imageStore(outbuf, ivec2(xb * 4 + 3, y), vec4(pix_y[4], pix_cbcr[2].y, pix_y[5], 1.0)); +} diff --git a/shared/read_file.cpp b/shared/read_file.cpp new file mode 100644 index 0000000..2310303 --- /dev/null +++ b/shared/read_file.cpp @@ -0,0 +1,53 @@ +#include "shared/read_file.h" + +#include + +using namespace std; + +string read_file(const string &filename, const unsigned char *start, const size_t size) +{ + FILE *fp = fopen(filename.c_str(), "r"); + if (fp == nullptr) { + // Fall back to the version we compiled in. (We prefer disk if we can, + // since that makes it possible to work on shaders without recompiling + // all the time.) + if (start != nullptr) { + return string(reinterpret_cast(start), + reinterpret_cast(start) + size); + } + + perror(filename.c_str()); + exit(1); + } + + int ret = fseek(fp, 0, SEEK_END); + if (ret == -1) { + perror("fseek(SEEK_END)"); + exit(1); + } + + int disk_size = ftell(fp); + + ret = fseek(fp, 0, SEEK_SET); + if (ret == -1) { + perror("fseek(SEEK_SET)"); + exit(1); + } + + string str; + str.resize(disk_size); + ret = fread(&str[0], disk_size, 1, fp); + if (ret == -1) { + perror("fread"); + exit(1); + } + if (ret == 0) { + fprintf(stderr, "Short read when trying to read %d bytes from %s\n", + disk_size, filename.c_str()); + exit(1); + } + fclose(fp); + + return str; +} + diff --git a/shared/read_file.h b/shared/read_file.h new file mode 100644 index 0000000..27022ab --- /dev/null +++ b/shared/read_file.h @@ -0,0 +1,14 @@ +#ifndef _READ_FILE_H +#define _READ_FILE_H 1 + +#include + +#include + +// Read the contents of and return it as a string. +// If the file does not exist, which is typical outside of development, +// return the given memory area instead (presumably created by bin2h). + +std::string read_file(const std::string &filename, const unsigned char *start = nullptr, const size_t size = 0); + +#endif