1 #include "chroma_subsampler.h"
2 #include "v210_converter.h"
6 #include <movit/effect_util.h>
7 #include <movit/resource_pool.h>
8 #include <movit/util.h>
10 using namespace movit;
13 ChromaSubsampler::ChromaSubsampler(ResourcePool *resource_pool)
14 : resource_pool(resource_pool)
16 vector<string> frag_shader_outputs;
18 // Set up stuff for NV12 conversion.
20 // Note: Due to the horizontally co-sited chroma/luma samples in H.264
21 // (chrome position is left for horizontal and center for vertical),
22 // we need to be a bit careful in our subsampling. A diagram will make
23 // this clearer, showing some luma and chroma samples:
38 // Clearly, the rightmost chroma sample here needs to be equivalent to
39 // b/4 + c/2 + d/4. (We could also implement more sophisticated filters,
40 // of course, but as long as the upsampling is not going to be equally
41 // sophisticated, it's probably not worth it.) If we sample once with
42 // no mipmapping, we get just c, ie., no actual filtering in the
43 // horizontal direction. (For the vertical direction, we can just
44 // sample in the middle to get the right filtering.) One could imagine
45 // we could use mipmapping (assuming we can create mipmaps cheaply),
46 // but then, what we'd get is this:
61 // which ends up sampling equally from a and b, which clearly isn't right. Instead,
62 // we need to do two (non-mipmapped) chroma samples, both hitting exactly in-between
65 // Sampling in-between b and c gives us the sample (b+c)/2, and similarly for c and d.
66 // Taking the average of these gives of (b+c)/4 + (c+d)/4 = b/4 + c/2 + d/4, which is
67 // exactly what we want.
69 // See also http://www.poynton.com/PDFs/Merging_RGB_and_422.pdf, pages 6–7.
72 string cbcr_vert_shader =
75 "in vec2 position; \n"
76 "in vec2 texcoord; \n"
77 "out vec2 tc0, tc1; \n"
78 "uniform vec2 foo_chroma_offset_0; \n"
79 "uniform vec2 foo_chroma_offset_1; \n"
83 " // The result of glOrtho(0.0, 1.0, 0.0, 1.0, 0.0, 1.0) is: \n"
85 " // 2.000 0.000 0.000 -1.000 \n"
86 " // 0.000 2.000 0.000 -1.000 \n"
87 " // 0.000 0.000 -2.000 -1.000 \n"
88 " // 0.000 0.000 0.000 1.000 \n"
89 " gl_Position = vec4(2.0 * position.x - 1.0, 2.0 * position.y - 1.0, -1.0, 1.0); \n"
90 " vec2 flipped_tc = texcoord; \n"
91 " tc0 = flipped_tc + foo_chroma_offset_0; \n"
92 " tc1 = flipped_tc + foo_chroma_offset_1; \n"
94 string cbcr_frag_shader =
96 "in vec2 tc0, tc1; \n"
97 "uniform sampler2D cbcr_tex; \n"
98 "out vec4 FragColor, FragColor2; \n"
100 " FragColor = 0.5 * (texture(cbcr_tex, tc0) + texture(cbcr_tex, tc1)); \n"
101 " FragColor2 = FragColor; \n"
103 cbcr_program_num = resource_pool->compile_glsl_program(cbcr_vert_shader, cbcr_frag_shader, frag_shader_outputs);
106 cbcr_texture_sampler_uniform = glGetUniformLocation(cbcr_program_num, "cbcr_tex");
108 cbcr_position_attribute_index = glGetAttribLocation(cbcr_program_num, "position");
110 cbcr_texcoord_attribute_index = glGetAttribLocation(cbcr_program_num, "texcoord");
113 // Same, for UYVY conversion.
114 string uyvy_vert_shader =
117 "in vec2 position; \n"
118 "in vec2 texcoord; \n"
119 "out vec2 y_tc0, y_tc1, cbcr_tc0, cbcr_tc1; \n"
120 "uniform vec2 foo_luma_offset_0; \n"
121 "uniform vec2 foo_luma_offset_1; \n"
122 "uniform vec2 foo_chroma_offset_0; \n"
123 "uniform vec2 foo_chroma_offset_1; \n"
127 " // The result of glOrtho(0.0, 1.0, 0.0, 1.0, 0.0, 1.0) is: \n"
129 " // 2.000 0.000 0.000 -1.000 \n"
130 " // 0.000 2.000 0.000 -1.000 \n"
131 " // 0.000 0.000 -2.000 -1.000 \n"
132 " // 0.000 0.000 0.000 1.000 \n"
133 " gl_Position = vec4(2.0 * position.x - 1.0, 2.0 * position.y - 1.0, -1.0, 1.0); \n"
134 " vec2 flipped_tc = texcoord; \n"
135 " y_tc0 = flipped_tc + foo_luma_offset_0; \n"
136 " y_tc1 = flipped_tc + foo_luma_offset_1; \n"
137 " cbcr_tc0 = flipped_tc + foo_chroma_offset_0; \n"
138 " cbcr_tc1 = flipped_tc + foo_chroma_offset_1; \n"
140 string uyvy_frag_shader =
142 "in vec2 y_tc0, y_tc1, cbcr_tc0, cbcr_tc1; \n"
143 "uniform sampler2D y_tex, cbcr_tex; \n"
144 "out vec4 FragColor; \n"
146 " float y0 = texture(y_tex, y_tc0).r; \n"
147 " float y1 = texture(y_tex, y_tc1).r; \n"
148 " vec2 cbcr0 = texture(cbcr_tex, cbcr_tc0).rg; \n"
149 " vec2 cbcr1 = texture(cbcr_tex, cbcr_tc1).rg; \n"
150 " vec2 cbcr = 0.5 * (cbcr0 + cbcr1); \n"
151 " FragColor = vec4(cbcr.g, y0, cbcr.r, y1); \n"
154 uyvy_program_num = resource_pool->compile_glsl_program(uyvy_vert_shader, uyvy_frag_shader, frag_shader_outputs);
157 uyvy_y_texture_sampler_uniform = glGetUniformLocation(uyvy_program_num, "y_tex");
159 uyvy_cbcr_texture_sampler_uniform = glGetUniformLocation(uyvy_program_num, "cbcr_tex");
161 uyvy_position_attribute_index = glGetAttribLocation(uyvy_program_num, "position");
163 uyvy_texcoord_attribute_index = glGetAttribLocation(uyvy_program_num, "texcoord");
166 // Shared between the two.
172 vbo = generate_vbo(2, GL_FLOAT, sizeof(vertices), vertices);
175 // v210 compute shader.
176 if (v210Converter::has_hardware_support()) {
177 string v210_shader_src = R"(#version 150
178 #extension GL_ARB_compute_shader : enable
179 #extension GL_ARB_shader_image_load_store : enable
180 layout(local_size_x=2, local_size_y=16) in;
181 layout(r16) uniform restrict readonly image2D in_y;
182 uniform sampler2D in_cbcr; // Of type RG16.
183 layout(rgb10_a2) uniform restrict writeonly image2D outbuf;
184 uniform float inv_width, inv_height;
188 int xb = int(gl_GlobalInvocationID.x); // X block number.
189 int y = int(gl_GlobalInvocationID.y); // Y (actual line).
190 float yf = (gl_GlobalInvocationID.y + 0.5f) * inv_height; // Y float coordinate.
192 // Load and scale CbCr values, sampling in-between the texels to get
193 // to (left/4 + center/2 + right/4).
195 for (int i = 0; i < 3; ++i) {
196 vec2 a = texture(in_cbcr, vec2((xb * 6 + i * 2) * inv_width, yf)).xy;
197 vec2 b = texture(in_cbcr, vec2((xb * 6 + i * 2 + 1) * inv_width, yf)).xy;
198 pix_cbcr[i] = (a + b) * (0.5 * 65535.0 / 1023.0);
201 // Load and scale the Y values. Note that we use integer coordinates here,
202 // so we don't need to offset by 0.5.
204 for (int i = 0; i < 6; ++i) {
205 pix_y[i] = imageLoad(in_y, ivec2(xb * 6 + i, y)).x * (65535.0 / 1023.0);
208 imageStore(outbuf, ivec2(xb * 4 + 0, y), vec4(pix_cbcr[0].x, pix_y[0], pix_cbcr[0].y, 1.0));
209 imageStore(outbuf, ivec2(xb * 4 + 1, y), vec4(pix_y[1], pix_cbcr[1].x, pix_y[2], 1.0));
210 imageStore(outbuf, ivec2(xb * 4 + 2, y), vec4(pix_cbcr[1].y, pix_y[3], pix_cbcr[2].x, 1.0));
211 imageStore(outbuf, ivec2(xb * 4 + 3, y), vec4(pix_y[4], pix_cbcr[2].y, pix_y[5], 1.0));
214 GLuint shader_num = movit::compile_shader(v210_shader_src, GL_COMPUTE_SHADER);
216 v210_program_num = glCreateProgram();
218 glAttachShader(v210_program_num, shader_num);
220 glLinkProgram(v210_program_num);
224 glGetProgramiv(v210_program_num, GL_LINK_STATUS, &success);
226 if (success == GL_FALSE) {
227 GLchar error_log[1024] = {0};
228 glGetProgramInfoLog(v210_program_num, 1024, NULL, error_log);
229 fprintf(stderr, "Error linking program: %s\n", error_log);
233 v210_in_y_pos = glGetUniformLocation(v210_program_num, "in_y");
235 v210_in_cbcr_pos = glGetUniformLocation(v210_program_num, "in_cbcr");
237 v210_outbuf_pos = glGetUniformLocation(v210_program_num, "outbuf");
239 v210_inv_width_pos = glGetUniformLocation(v210_program_num, "inv_width");
241 v210_inv_height_pos = glGetUniformLocation(v210_program_num, "inv_height");
244 v210_program_num = 0;
248 ChromaSubsampler::~ChromaSubsampler()
250 resource_pool->release_glsl_program(cbcr_program_num);
252 resource_pool->release_glsl_program(uyvy_program_num);
254 glDeleteBuffers(1, &vbo);
256 if (v210_program_num != 0) {
257 glDeleteProgram(v210_program_num);
262 void ChromaSubsampler::subsample_chroma(GLuint cbcr_tex, unsigned width, unsigned height, GLuint dst_tex, GLuint dst2_tex)
265 glGenVertexArrays(1, &vao);
268 glBindVertexArray(vao);
274 fbo = resource_pool->create_fbo(dst_tex);
276 fbo = resource_pool->create_fbo(dst_tex, dst2_tex);
278 glBindFramebuffer(GL_FRAMEBUFFER, fbo);
279 glViewport(0, 0, width/2, height/2);
282 glUseProgram(cbcr_program_num);
285 glActiveTexture(GL_TEXTURE0);
287 glBindTexture(GL_TEXTURE_2D, cbcr_tex);
289 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
291 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
293 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
296 float chroma_offset_0[] = { -1.0f / width, 0.0f };
297 float chroma_offset_1[] = { -0.0f / width, 0.0f };
298 set_uniform_vec2(cbcr_program_num, "foo", "chroma_offset_0", chroma_offset_0);
299 set_uniform_vec2(cbcr_program_num, "foo", "chroma_offset_1", chroma_offset_1);
301 glUniform1i(cbcr_texture_sampler_uniform, 0);
303 glBindBuffer(GL_ARRAY_BUFFER, vbo);
306 for (GLint attr_index : { cbcr_position_attribute_index, cbcr_texcoord_attribute_index }) {
307 glEnableVertexAttribArray(attr_index);
309 glVertexAttribPointer(attr_index, 2, GL_FLOAT, GL_FALSE, 0, BUFFER_OFFSET(0));
313 glDrawArrays(GL_TRIANGLES, 0, 3);
316 for (GLint attr_index : { cbcr_position_attribute_index, cbcr_texcoord_attribute_index }) {
317 glDisableVertexAttribArray(attr_index);
323 glBindFramebuffer(GL_FRAMEBUFFER, 0);
326 resource_pool->release_fbo(fbo);
327 glDeleteVertexArrays(1, &vao);
331 void ChromaSubsampler::create_uyvy(GLuint y_tex, GLuint cbcr_tex, unsigned width, unsigned height, GLuint dst_tex)
334 glGenVertexArrays(1, &vao);
337 glBindVertexArray(vao);
340 GLuint fbo = resource_pool->create_fbo(dst_tex);
341 glBindFramebuffer(GL_FRAMEBUFFER, fbo);
342 glViewport(0, 0, width/2, height);
345 glUseProgram(uyvy_program_num);
348 glUniform1i(uyvy_y_texture_sampler_uniform, 0);
350 glUniform1i(uyvy_cbcr_texture_sampler_uniform, 1);
353 glActiveTexture(GL_TEXTURE0);
355 glBindTexture(GL_TEXTURE_2D, y_tex);
357 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
359 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
361 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
364 glActiveTexture(GL_TEXTURE1);
366 glBindTexture(GL_TEXTURE_2D, cbcr_tex);
368 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
370 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
372 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
375 float y_offset_0[] = { -0.5f / width, 0.0f };
376 float y_offset_1[] = { 0.5f / width, 0.0f };
377 float cbcr_offset0[] = { -1.0f / width, 0.0f };
378 float cbcr_offset1[] = { -0.0f / width, 0.0f };
379 set_uniform_vec2(uyvy_program_num, "foo", "luma_offset_0", y_offset_0);
380 set_uniform_vec2(uyvy_program_num, "foo", "luma_offset_1", y_offset_1);
381 set_uniform_vec2(uyvy_program_num, "foo", "chroma_offset_0", cbcr_offset0);
382 set_uniform_vec2(uyvy_program_num, "foo", "chroma_offset_1", cbcr_offset1);
384 glBindBuffer(GL_ARRAY_BUFFER, vbo);
387 for (GLint attr_index : { uyvy_position_attribute_index, uyvy_texcoord_attribute_index }) {
388 if (attr_index == -1) continue;
389 glEnableVertexAttribArray(attr_index);
391 glVertexAttribPointer(attr_index, 2, GL_FLOAT, GL_FALSE, 0, BUFFER_OFFSET(0));
395 glDrawArrays(GL_TRIANGLES, 0, 3);
398 for (GLint attr_index : { uyvy_position_attribute_index, uyvy_texcoord_attribute_index }) {
399 if (attr_index == -1) continue;
400 glDisableVertexAttribArray(attr_index);
404 glActiveTexture(GL_TEXTURE0);
408 glBindFramebuffer(GL_FRAMEBUFFER, 0);
411 resource_pool->release_fbo(fbo);
412 glDeleteVertexArrays(1, &vao);
415 void ChromaSubsampler::create_v210(GLuint y_tex, GLuint cbcr_tex, unsigned width, unsigned height, GLuint dst_tex)
417 assert(v210_program_num != 0);
419 glUseProgram(v210_program_num);
422 glUniform1i(v210_in_y_pos, 0);
424 glUniform1i(v210_in_cbcr_pos, 1);
426 glUniform1i(v210_outbuf_pos, 2);
428 glUniform1f(v210_inv_width_pos, 1.0 / width);
430 glUniform1f(v210_inv_height_pos, 1.0 / height);
433 glActiveTexture(GL_TEXTURE0);
435 glBindTexture(GL_TEXTURE_2D, y_tex); // We don't actually need to bind it, but we need to set the state.
437 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
439 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
441 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
443 glBindImageTexture(0, y_tex, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R16); // This is the real bind.
446 glActiveTexture(GL_TEXTURE1);
448 glBindTexture(GL_TEXTURE_2D, cbcr_tex);
450 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
452 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
454 glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
457 glBindImageTexture(2, dst_tex, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGB10_A2);
460 // Actually run the shader. We use workgroups of size 2x16 threadst , and each thread
461 // processes 6x1 input pixels, so round up to number of 12x16 pixel blocks.
462 glDispatchCompute((width + 11) / 12, (height + 15) / 16, 1);
464 glBindTexture(GL_TEXTURE_2D, 0);
466 glActiveTexture(GL_TEXTURE0);