1 #include "chroma_subsampler.h"
2 #include "v210_converter.h"
6 #include <movit/effect_util.h>
7 #include <movit/resource_pool.h>
8 #include <movit/util.h>
10 #include "embedded_files.h"
11 #include "shared/read_file.h"
13 using namespace movit;
16 ChromaSubsampler::ChromaSubsampler(ResourcePool *resource_pool)
17 : resource_pool(resource_pool)
19 vector<string> frag_shader_outputs;
21 // Set up stuff for NV12 conversion.
23 // Note: Due to the horizontally co-sited chroma/luma samples in H.264
24 // (chrome position is left for horizontal and center for vertical),
25 // we need to be a bit careful in our subsampling. A diagram will make
26 // this clearer, showing some luma and chroma samples:
41 // Clearly, the rightmost chroma sample here needs to be equivalent to
42 // b/4 + c/2 + d/4. (We could also implement more sophisticated filters,
43 // of course, but as long as the upsampling is not going to be equally
44 // sophisticated, it's probably not worth it.) If we sample once with
45 // no mipmapping, we get just c, ie., no actual filtering in the
46 // horizontal direction. (For the vertical direction, we can just
47 // sample in the middle to get the right filtering.) One could imagine
48 // we could use mipmapping (assuming we can create mipmaps cheaply),
49 // but then, what we'd get is this:
64 // which ends up sampling equally from a and b, which clearly isn't right. Instead,
65 // we need to do two (non-mipmapped) chroma samples, both hitting exactly in-between
68 // Sampling in-between b and c gives us the sample (b+c)/2, and similarly for c and d.
69 // Taking the average of these gives of (b+c)/4 + (c+d)/4 = b/4 + c/2 + d/4, which is
70 // exactly what we want.
72 // See also http://www.poynton.com/PDFs/Merging_RGB_and_422.pdf, pages 6–7.
75 string cbcr_vert_shader = read_file("cbcr_subsample.vert", _binary_cbcr_subsample_vert_data, _binary_cbcr_subsample_vert_size);
76 string cbcr_frag_shader = read_file("cbcr_subsample.frag", _binary_cbcr_subsample_frag_data, _binary_cbcr_subsample_frag_size);
77 cbcr_program_num = resource_pool->compile_glsl_program(cbcr_vert_shader, cbcr_frag_shader, frag_shader_outputs);
79 cbcr_chroma_offset_0_location = get_uniform_location(cbcr_program_num, "foo", "chroma_offset_0");
81 cbcr_chroma_offset_1_location = get_uniform_location(cbcr_program_num, "foo", "chroma_offset_1");
84 cbcr_texture_sampler_uniform = glGetUniformLocation(cbcr_program_num, "cbcr_tex");
86 cbcr_position_attribute_index = glGetAttribLocation(cbcr_program_num, "position");
88 cbcr_texcoord_attribute_index = glGetAttribLocation(cbcr_program_num, "texcoord");
91 // Same, for UYVY conversion.
92 string uyvy_vert_shader = read_file("uyvy_subsample.vert", _binary_uyvy_subsample_vert_data, _binary_uyvy_subsample_vert_size);
93 string uyvy_frag_shader = read_file("uyvy_subsample.frag", _binary_uyvy_subsample_frag_data, _binary_uyvy_subsample_frag_size);
95 uyvy_program_num = resource_pool->compile_glsl_program(uyvy_vert_shader, uyvy_frag_shader, frag_shader_outputs);
97 uyvy_luma_offset_0_location = get_uniform_location(uyvy_program_num, "foo", "luma_offset_0");
99 uyvy_luma_offset_1_location = get_uniform_location(uyvy_program_num, "foo", "luma_offset_1");
101 uyvy_chroma_offset_0_location = get_uniform_location(uyvy_program_num, "foo", "chroma_offset_0");
103 uyvy_chroma_offset_1_location = get_uniform_location(uyvy_program_num, "foo", "chroma_offset_1");
106 uyvy_y_texture_sampler_uniform = glGetUniformLocation(uyvy_program_num, "y_tex");
108 uyvy_cbcr_texture_sampler_uniform = glGetUniformLocation(uyvy_program_num, "cbcr_tex");
110 uyvy_position_attribute_index = glGetAttribLocation(uyvy_program_num, "position");
112 uyvy_texcoord_attribute_index = glGetAttribLocation(uyvy_program_num, "texcoord");
115 // Shared between the two.
121 vbo = generate_vbo(2, GL_FLOAT, sizeof(vertices), vertices);
124 // v210 compute shader.
125 if (v210Converter::has_hardware_support()) {
126 string v210_shader_src = read_file("v210_subsample.comp", _binary_v210_subsample_comp_data, _binary_v210_subsample_comp_size);
127 GLuint shader_num = movit::compile_shader(v210_shader_src, GL_COMPUTE_SHADER);
129 v210_program_num = glCreateProgram();
131 glAttachShader(v210_program_num, shader_num);
133 glLinkProgram(v210_program_num);
137 glGetProgramiv(v210_program_num, GL_LINK_STATUS, &success);
139 if (success == GL_FALSE) {
140 GLchar error_log[1024] = {0};
141 glGetProgramInfoLog(v210_program_num, 1024, nullptr, error_log);
142 fprintf(stderr, "Error linking program: %s\n", error_log);
146 v210_in_y_pos = glGetUniformLocation(v210_program_num, "in_y");
148 v210_in_cbcr_pos = glGetUniformLocation(v210_program_num, "in_cbcr");
150 v210_outbuf_pos = glGetUniformLocation(v210_program_num, "outbuf");
152 v210_inv_width_pos = glGetUniformLocation(v210_program_num, "inv_width");
154 v210_inv_height_pos = glGetUniformLocation(v210_program_num, "inv_height");
157 v210_program_num = 0;
161 ChromaSubsampler::~ChromaSubsampler()
163 resource_pool->release_glsl_program(cbcr_program_num);
165 resource_pool->release_glsl_program(uyvy_program_num);
167 glDeleteBuffers(1, &vbo);
169 if (v210_program_num != 0) {
170 glDeleteProgram(v210_program_num);
175 void ChromaSubsampler::subsample_chroma(GLuint cbcr_tex, unsigned width, unsigned height, GLuint dst_tex, GLuint dst2_tex)
177 GLuint vao = resource_pool->create_vec2_vao({ cbcr_position_attribute_index, cbcr_texcoord_attribute_index }, vbo);
178 glBindVertexArray(vao);
184 fbo = resource_pool->create_fbo(dst_tex);
186 fbo = resource_pool->create_fbo(dst_tex, dst2_tex);
188 glBindFramebuffer(GL_FRAMEBUFFER, fbo);
189 glViewport(0, 0, width/2, height/2);
192 glUseProgram(cbcr_program_num);
195 glActiveTexture(GL_TEXTURE0);
197 glBindTexture(GL_TEXTURE_2D, cbcr_tex);
199 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
201 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
203 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
206 glUniform2f(cbcr_chroma_offset_0_location, -1.0f / width, 0.0f);
208 glUniform2f(cbcr_chroma_offset_1_location, -0.0f / width, 0.0f);
210 glUniform1i(cbcr_texture_sampler_uniform, 0);
212 glDrawArrays(GL_TRIANGLES, 0, 3);
217 glBindFramebuffer(GL_FRAMEBUFFER, 0);
219 glBindVertexArray(0);
222 resource_pool->release_fbo(fbo);
223 resource_pool->release_vec2_vao(vao);
226 void ChromaSubsampler::create_uyvy(GLuint y_tex, GLuint cbcr_tex, unsigned width, unsigned height, GLuint dst_tex)
228 GLuint vao = resource_pool->create_vec2_vao({ cbcr_position_attribute_index, cbcr_texcoord_attribute_index }, vbo);
229 glBindVertexArray(vao);
232 glBindVertexArray(vao);
235 GLuint fbo = resource_pool->create_fbo(dst_tex);
236 glBindFramebuffer(GL_FRAMEBUFFER, fbo);
237 glViewport(0, 0, width/2, height);
240 glUseProgram(uyvy_program_num);
243 glUniform1i(uyvy_y_texture_sampler_uniform, 0);
245 glUniform1i(uyvy_cbcr_texture_sampler_uniform, 1);
248 glActiveTexture(GL_TEXTURE0);
250 glBindTexture(GL_TEXTURE_2D, y_tex);
252 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
254 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
256 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
259 glActiveTexture(GL_TEXTURE1);
261 glBindTexture(GL_TEXTURE_2D, cbcr_tex);
263 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
265 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
267 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
270 glUniform2f(uyvy_luma_offset_0_location, -0.5f / width, 0.0f);
272 glUniform2f(uyvy_luma_offset_1_location, 0.5f / width, 0.0f);
274 glUniform2f(uyvy_chroma_offset_0_location, -1.0f / width, 0.0f);
276 glUniform2f(uyvy_chroma_offset_1_location, -0.0f / width, 0.0f);
279 glBindBuffer(GL_ARRAY_BUFFER, vbo);
282 glDrawArrays(GL_TRIANGLES, 0, 3);
285 glActiveTexture(GL_TEXTURE0);
289 glBindFramebuffer(GL_FRAMEBUFFER, 0);
291 glBindVertexArray(0);
294 resource_pool->release_fbo(fbo);
295 resource_pool->release_vec2_vao(vao);
298 void ChromaSubsampler::create_v210(GLuint y_tex, GLuint cbcr_tex, unsigned width, unsigned height, GLuint dst_tex)
300 assert(v210_program_num != 0);
302 glUseProgram(v210_program_num);
305 glUniform1i(v210_in_y_pos, 0);
307 glUniform1i(v210_in_cbcr_pos, 1);
309 glUniform1i(v210_outbuf_pos, 2);
311 glUniform1f(v210_inv_width_pos, 1.0 / width);
313 glUniform1f(v210_inv_height_pos, 1.0 / height);
316 glActiveTexture(GL_TEXTURE0);
318 glBindTexture(GL_TEXTURE_2D, y_tex); // We don't actually need to bind it, but we need to set the state.
320 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
322 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
324 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
326 glBindImageTexture(0, y_tex, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R16); // This is the real bind.
329 glActiveTexture(GL_TEXTURE1);
331 glBindTexture(GL_TEXTURE_2D, cbcr_tex);
333 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
335 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
337 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
340 glBindImageTexture(2, dst_tex, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGB10_A2);
343 // Actually run the shader. We use workgroups of size 2x16 threadst , and each thread
344 // processes 6x1 input pixels, so round up to number of 12x16 pixel blocks.
345 glDispatchCompute((width + 11) / 12, (height + 15) / 16, 1);
347 glBindTexture(GL_TEXTURE_2D, 0);
349 glActiveTexture(GL_TEXTURE0);