1 #include "chroma_subsampler.h"
2 #include "v210_converter.h"
9 #include <movit/effect_util.h>
10 #include <movit/resource_pool.h>
11 #include <movit/util.h>
13 #include "embedded_files.h"
14 #include "shared/read_file.h"
16 using namespace movit;
19 ChromaSubsampler::ChromaSubsampler(ResourcePool *resource_pool)
20 : resource_pool(resource_pool)
22 vector<string> frag_shader_outputs;
24 // Set up stuff for NV12 conversion.
26 // Note: Due to the horizontally co-sited chroma/luma samples in H.264
27 // (chrome position is left for horizontal and center for vertical),
28 // we need to be a bit careful in our subsampling. A diagram will make
29 // this clearer, showing some luma and chroma samples:
44 // Clearly, the rightmost chroma sample here needs to be equivalent to
45 // b/4 + c/2 + d/4. (We could also implement more sophisticated filters,
46 // of course, but as long as the upsampling is not going to be equally
47 // sophisticated, it's probably not worth it.) If we sample once with
48 // no mipmapping, we get just c, ie., no actual filtering in the
49 // horizontal direction. (For the vertical direction, we can just
50 // sample in the middle to get the right filtering.) One could imagine
51 // we could use mipmapping (assuming we can create mipmaps cheaply),
52 // but then, what we'd get is this:
67 // which ends up sampling equally from a and b, which clearly isn't right. Instead,
68 // we need to do two (non-mipmapped) chroma samples, both hitting exactly in-between
71 // Sampling in-between b and c gives us the sample (b+c)/2, and similarly for c and d.
72 // Taking the average of these gives of (b+c)/4 + (c+d)/4 = b/4 + c/2 + d/4, which is
73 // exactly what we want.
75 // See also http://www.poynton.com/PDFs/Merging_RGB_and_422.pdf, pages 6–7.
78 string cbcr_vert_shader = read_file("cbcr_subsample.vert", _binary_cbcr_subsample_vert_data, _binary_cbcr_subsample_vert_size);
79 string cbcr_frag_shader = read_file("cbcr_subsample.frag", _binary_cbcr_subsample_frag_data, _binary_cbcr_subsample_frag_size);
80 cbcr_program_num = resource_pool->compile_glsl_program(cbcr_vert_shader, cbcr_frag_shader, frag_shader_outputs);
82 cbcr_chroma_offset_0_location = get_uniform_location(cbcr_program_num, "foo", "chroma_offset_0");
84 cbcr_chroma_offset_1_location = get_uniform_location(cbcr_program_num, "foo", "chroma_offset_1");
87 cbcr_texture_sampler_uniform = glGetUniformLocation(cbcr_program_num, "cbcr_tex");
89 cbcr_position_attribute_index = glGetAttribLocation(cbcr_program_num, "position");
91 cbcr_texcoord_attribute_index = glGetAttribLocation(cbcr_program_num, "texcoord");
94 // Same, for UYVY conversion.
95 string uyvy_vert_shader = read_file("uyvy_subsample.vert", _binary_uyvy_subsample_vert_data, _binary_uyvy_subsample_vert_size);
96 string uyvy_frag_shader = read_file("uyvy_subsample.frag", _binary_uyvy_subsample_frag_data, _binary_uyvy_subsample_frag_size);
98 uyvy_program_num = resource_pool->compile_glsl_program(uyvy_vert_shader, uyvy_frag_shader, frag_shader_outputs);
100 uyvy_luma_offset_0_location = get_uniform_location(uyvy_program_num, "foo", "luma_offset_0");
102 uyvy_luma_offset_1_location = get_uniform_location(uyvy_program_num, "foo", "luma_offset_1");
104 uyvy_chroma_offset_0_location = get_uniform_location(uyvy_program_num, "foo", "chroma_offset_0");
106 uyvy_chroma_offset_1_location = get_uniform_location(uyvy_program_num, "foo", "chroma_offset_1");
109 uyvy_y_texture_sampler_uniform = glGetUniformLocation(uyvy_program_num, "y_tex");
111 uyvy_cbcr_texture_sampler_uniform = glGetUniformLocation(uyvy_program_num, "cbcr_tex");
113 uyvy_position_attribute_index = glGetAttribLocation(uyvy_program_num, "position");
115 uyvy_texcoord_attribute_index = glGetAttribLocation(uyvy_program_num, "texcoord");
118 // Shared between the two.
124 vbo = generate_vbo(2, GL_FLOAT, sizeof(vertices), vertices);
127 // v210 compute shader.
128 if (v210Converter::has_hardware_support()) {
129 string v210_shader_src = read_file("v210_subsample.comp", _binary_v210_subsample_comp_data, _binary_v210_subsample_comp_size);
130 GLuint shader_num = movit::compile_shader(v210_shader_src, GL_COMPUTE_SHADER);
132 v210_program_num = glCreateProgram();
134 glAttachShader(v210_program_num, shader_num);
136 glLinkProgram(v210_program_num);
140 glGetProgramiv(v210_program_num, GL_LINK_STATUS, &success);
142 if (success == GL_FALSE) {
143 GLchar error_log[1024] = {0};
144 glGetProgramInfoLog(v210_program_num, 1024, nullptr, error_log);
145 fprintf(stderr, "Error linking program: %s\n", error_log);
149 v210_in_y_pos = glGetUniformLocation(v210_program_num, "in_y");
151 v210_in_cbcr_pos = glGetUniformLocation(v210_program_num, "in_cbcr");
153 v210_outbuf_pos = glGetUniformLocation(v210_program_num, "outbuf");
155 v210_inv_width_pos = glGetUniformLocation(v210_program_num, "inv_width");
157 v210_inv_height_pos = glGetUniformLocation(v210_program_num, "inv_height");
160 v210_program_num = 0;
164 ChromaSubsampler::~ChromaSubsampler()
166 resource_pool->release_glsl_program(cbcr_program_num);
168 resource_pool->release_glsl_program(uyvy_program_num);
170 glDeleteBuffers(1, &vbo);
172 if (v210_program_num != 0) {
173 glDeleteProgram(v210_program_num);
178 void ChromaSubsampler::subsample_chroma(GLuint cbcr_tex, unsigned width, unsigned height, GLuint dst_tex, GLuint dst2_tex)
180 GLuint vao = resource_pool->create_vec2_vao({ cbcr_position_attribute_index, cbcr_texcoord_attribute_index }, vbo);
181 glBindVertexArray(vao);
187 fbo = resource_pool->create_fbo(dst_tex);
189 fbo = resource_pool->create_fbo(dst_tex, dst2_tex);
191 glBindFramebuffer(GL_FRAMEBUFFER, fbo);
192 glViewport(0, 0, width/2, height/2);
195 glUseProgram(cbcr_program_num);
198 glActiveTexture(GL_TEXTURE0);
200 glBindTexture(GL_TEXTURE_2D, cbcr_tex);
202 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
204 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
206 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
209 glUniform2f(cbcr_chroma_offset_0_location, -1.0f / width, 0.0f);
211 glUniform2f(cbcr_chroma_offset_1_location, -0.0f / width, 0.0f);
213 glUniform1i(cbcr_texture_sampler_uniform, 0);
215 glDrawArrays(GL_TRIANGLES, 0, 3);
220 glBindFramebuffer(GL_FRAMEBUFFER, 0);
222 glBindVertexArray(0);
225 resource_pool->release_fbo(fbo);
226 resource_pool->release_vec2_vao(vao);
229 void ChromaSubsampler::create_uyvy(GLuint y_tex, GLuint cbcr_tex, unsigned width, unsigned height, GLuint dst_tex)
231 GLuint vao = resource_pool->create_vec2_vao({ cbcr_position_attribute_index, cbcr_texcoord_attribute_index }, vbo);
232 glBindVertexArray(vao);
235 glBindVertexArray(vao);
238 GLuint fbo = resource_pool->create_fbo(dst_tex);
239 glBindFramebuffer(GL_FRAMEBUFFER, fbo);
240 glViewport(0, 0, width/2, height);
243 glUseProgram(uyvy_program_num);
246 glUniform1i(uyvy_y_texture_sampler_uniform, 0);
248 glUniform1i(uyvy_cbcr_texture_sampler_uniform, 1);
251 glActiveTexture(GL_TEXTURE0);
253 glBindTexture(GL_TEXTURE_2D, y_tex);
255 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
257 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
259 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
262 glActiveTexture(GL_TEXTURE1);
264 glBindTexture(GL_TEXTURE_2D, cbcr_tex);
266 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
268 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
270 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
273 glUniform2f(uyvy_luma_offset_0_location, -0.5f / width, 0.0f);
275 glUniform2f(uyvy_luma_offset_1_location, 0.5f / width, 0.0f);
277 glUniform2f(uyvy_chroma_offset_0_location, -1.0f / width, 0.0f);
279 glUniform2f(uyvy_chroma_offset_1_location, -0.0f / width, 0.0f);
282 glBindBuffer(GL_ARRAY_BUFFER, vbo);
285 glDrawArrays(GL_TRIANGLES, 0, 3);
288 glActiveTexture(GL_TEXTURE0);
292 glBindFramebuffer(GL_FRAMEBUFFER, 0);
294 glBindVertexArray(0);
297 resource_pool->release_fbo(fbo);
298 resource_pool->release_vec2_vao(vao);
301 void ChromaSubsampler::create_v210(GLuint y_tex, GLuint cbcr_tex, unsigned width, unsigned height, GLuint dst_tex)
303 assert(v210_program_num != 0);
305 glUseProgram(v210_program_num);
308 glUniform1i(v210_in_y_pos, 0);
310 glUniform1i(v210_in_cbcr_pos, 1);
312 glUniform1i(v210_outbuf_pos, 2);
314 glUniform1f(v210_inv_width_pos, 1.0 / width);
316 glUniform1f(v210_inv_height_pos, 1.0 / height);
319 glActiveTexture(GL_TEXTURE0);
321 glBindTexture(GL_TEXTURE_2D, y_tex); // We don't actually need to bind it, but we need to set the state.
323 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
325 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
327 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
329 glBindImageTexture(0, y_tex, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R16); // This is the real bind.
332 glActiveTexture(GL_TEXTURE1);
334 glBindTexture(GL_TEXTURE_2D, cbcr_tex);
336 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
338 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
340 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
343 glBindImageTexture(2, dst_tex, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGB10_A2);
346 // Actually run the shader. We use workgroups of size 2x16 threadst , and each thread
347 // processes 6x1 input pixels, so round up to number of 12x16 pixel blocks.
348 glDispatchCompute((width + 11) / 12, (height + 15) / 16, 1);
350 glBindTexture(GL_TEXTURE_2D, 0);
352 glActiveTexture(GL_TEXTURE0);