]> git.sesse.net Git - nageru/blob - chroma_subsampler.cpp
Support 4:2:2 v210 (10-bit) output for DeckLink.
[nageru] / chroma_subsampler.cpp
1 #include "chroma_subsampler.h"
2 #include "v210_converter.h"
3
4 #include <vector>
5
6 #include <movit/effect_util.h>
7 #include <movit/resource_pool.h>
8 #include <movit/util.h>
9
10 using namespace movit;
11 using namespace std;
12
13 ChromaSubsampler::ChromaSubsampler(ResourcePool *resource_pool)
14         : resource_pool(resource_pool)
15 {
16         vector<string> frag_shader_outputs;
17
18         // Set up stuff for NV12 conversion.
19         //
20         // Note: Due to the horizontally co-sited chroma/luma samples in H.264
21         // (chrome position is left for horizontal and center for vertical),
22         // we need to be a bit careful in our subsampling. A diagram will make
23         // this clearer, showing some luma and chroma samples:
24         //
25         //     a   b   c   d
26         //   +---+---+---+---+
27         //   |   |   |   |   |
28         //   | Y | Y | Y | Y |
29         //   |   |   |   |   |
30         //   +---+---+---+---+
31         //
32         // +-------+-------+
33         // |       |       |
34         // |   C   |   C   |
35         // |       |       |
36         // +-------+-------+
37         //
38         // Clearly, the rightmost chroma sample here needs to be equivalent to
39         // b/4 + c/2 + d/4. (We could also implement more sophisticated filters,
40         // of course, but as long as the upsampling is not going to be equally
41         // sophisticated, it's probably not worth it.) If we sample once with
42         // no mipmapping, we get just c, ie., no actual filtering in the
43         // horizontal direction. (For the vertical direction, we can just
44         // sample in the middle to get the right filtering.) One could imagine
45         // we could use mipmapping (assuming we can create mipmaps cheaply),
46         // but then, what we'd get is this:
47         //
48         //    (a+b)/2 (c+d)/2
49         //   +-------+-------+
50         //   |       |       |
51         //   |   Y   |   Y   |
52         //   |       |       |
53         //   +-------+-------+
54         //
55         // +-------+-------+
56         // |       |       |
57         // |   C   |   C   |
58         // |       |       |
59         // +-------+-------+
60         //
61         // which ends up sampling equally from a and b, which clearly isn't right. Instead,
62         // we need to do two (non-mipmapped) chroma samples, both hitting exactly in-between
63         // source pixels.
64         //
65         // Sampling in-between b and c gives us the sample (b+c)/2, and similarly for c and d.
66         // Taking the average of these gives of (b+c)/4 + (c+d)/4 = b/4 + c/2 + d/4, which is
67         // exactly what we want.
68         //
69         // See also http://www.poynton.com/PDFs/Merging_RGB_and_422.pdf, pages 6–7.
70
71         // Cb/Cr shader.
72         string cbcr_vert_shader =
73                 "#version 130 \n"
74                 " \n"
75                 "in vec2 position; \n"
76                 "in vec2 texcoord; \n"
77                 "out vec2 tc0, tc1; \n"
78                 "uniform vec2 foo_chroma_offset_0; \n"
79                 "uniform vec2 foo_chroma_offset_1; \n"
80                 " \n"
81                 "void main() \n"
82                 "{ \n"
83                 "    // The result of glOrtho(0.0, 1.0, 0.0, 1.0, 0.0, 1.0) is: \n"
84                 "    // \n"
85                 "    //   2.000  0.000  0.000 -1.000 \n"
86                 "    //   0.000  2.000  0.000 -1.000 \n"
87                 "    //   0.000  0.000 -2.000 -1.000 \n"
88                 "    //   0.000  0.000  0.000  1.000 \n"
89                 "    gl_Position = vec4(2.0 * position.x - 1.0, 2.0 * position.y - 1.0, -1.0, 1.0); \n"
90                 "    vec2 flipped_tc = texcoord; \n"
91                 "    tc0 = flipped_tc + foo_chroma_offset_0; \n"
92                 "    tc1 = flipped_tc + foo_chroma_offset_1; \n"
93                 "} \n";
94         string cbcr_frag_shader =
95                 "#version 130 \n"
96                 "in vec2 tc0, tc1; \n"
97                 "uniform sampler2D cbcr_tex; \n"
98                 "out vec4 FragColor, FragColor2; \n"
99                 "void main() { \n"
100                 "    FragColor = 0.5 * (texture(cbcr_tex, tc0) + texture(cbcr_tex, tc1)); \n"
101                 "    FragColor2 = FragColor; \n"
102                 "} \n";
103         cbcr_program_num = resource_pool->compile_glsl_program(cbcr_vert_shader, cbcr_frag_shader, frag_shader_outputs);
104         check_error();
105
106         cbcr_texture_sampler_uniform = glGetUniformLocation(cbcr_program_num, "cbcr_tex");
107         check_error();
108         cbcr_position_attribute_index = glGetAttribLocation(cbcr_program_num, "position");
109         check_error();
110         cbcr_texcoord_attribute_index = glGetAttribLocation(cbcr_program_num, "texcoord");
111         check_error();
112
113         // Same, for UYVY conversion.
114         string uyvy_vert_shader =
115                 "#version 130 \n"
116                 " \n"
117                 "in vec2 position; \n"
118                 "in vec2 texcoord; \n"
119                 "out vec2 y_tc0, y_tc1, cbcr_tc0, cbcr_tc1; \n"
120                 "uniform vec2 foo_luma_offset_0; \n"
121                 "uniform vec2 foo_luma_offset_1; \n"
122                 "uniform vec2 foo_chroma_offset_0; \n"
123                 "uniform vec2 foo_chroma_offset_1; \n"
124                 " \n"
125                 "void main() \n"
126                 "{ \n"
127                 "    // The result of glOrtho(0.0, 1.0, 0.0, 1.0, 0.0, 1.0) is: \n"
128                 "    // \n"
129                 "    //   2.000  0.000  0.000 -1.000 \n"
130                 "    //   0.000  2.000  0.000 -1.000 \n"
131                 "    //   0.000  0.000 -2.000 -1.000 \n"
132                 "    //   0.000  0.000  0.000  1.000 \n"
133                 "    gl_Position = vec4(2.0 * position.x - 1.0, 2.0 * position.y - 1.0, -1.0, 1.0); \n"
134                 "    vec2 flipped_tc = texcoord; \n"
135                 "    y_tc0 = flipped_tc + foo_luma_offset_0; \n"
136                 "    y_tc1 = flipped_tc + foo_luma_offset_1; \n"
137                 "    cbcr_tc0 = flipped_tc + foo_chroma_offset_0; \n"
138                 "    cbcr_tc1 = flipped_tc + foo_chroma_offset_1; \n"
139                 "} \n";
140         string uyvy_frag_shader =
141                 "#version 130 \n"
142                 "in vec2 y_tc0, y_tc1, cbcr_tc0, cbcr_tc1; \n"
143                 "uniform sampler2D y_tex, cbcr_tex; \n"
144                 "out vec4 FragColor; \n"
145                 "void main() { \n"
146                 "    float y0 = texture(y_tex, y_tc0).r; \n"
147                 "    float y1 = texture(y_tex, y_tc1).r; \n"
148                 "    vec2 cbcr0 = texture(cbcr_tex, cbcr_tc0).rg; \n"
149                 "    vec2 cbcr1 = texture(cbcr_tex, cbcr_tc1).rg; \n"
150                 "    vec2 cbcr = 0.5 * (cbcr0 + cbcr1); \n"
151                 "    FragColor = vec4(cbcr.g, y0, cbcr.r, y1); \n"
152                 "} \n";
153
154         uyvy_program_num = resource_pool->compile_glsl_program(uyvy_vert_shader, uyvy_frag_shader, frag_shader_outputs);
155         check_error();
156
157         uyvy_y_texture_sampler_uniform = glGetUniformLocation(uyvy_program_num, "y_tex");
158         check_error();
159         uyvy_cbcr_texture_sampler_uniform = glGetUniformLocation(uyvy_program_num, "cbcr_tex");
160         check_error();
161         uyvy_position_attribute_index = glGetAttribLocation(uyvy_program_num, "position");
162         check_error();
163         uyvy_texcoord_attribute_index = glGetAttribLocation(uyvy_program_num, "texcoord");
164         check_error();
165
166         // Shared between the two.
167         float vertices[] = {
168                 0.0f, 2.0f,
169                 0.0f, 0.0f,
170                 2.0f, 0.0f
171         };
172         vbo = generate_vbo(2, GL_FLOAT, sizeof(vertices), vertices);
173         check_error();
174
175         // v210 compute shader.
176         if (v210Converter::has_hardware_support()) {
177                 string v210_shader_src = R"(#version 150
178 #extension GL_ARB_compute_shader : enable
179 #extension GL_ARB_shader_image_load_store : enable
180 layout(local_size_x=2, local_size_y=16) in;
181 layout(r16) uniform restrict readonly image2D in_y;
182 uniform sampler2D in_cbcr;  // Of type RG16.
183 layout(rgb10_a2) uniform restrict writeonly image2D outbuf;
184 uniform float inv_width, inv_height;
185
186 void main()
187 {
188         int xb = int(gl_GlobalInvocationID.x);  // X block number.
189         int y = int(gl_GlobalInvocationID.y);  // Y (actual line).
190         float yf = (gl_GlobalInvocationID.y + 0.5f) * inv_height;  // Y float coordinate.
191
192         // Load and scale CbCr values, sampling in-between the texels to get
193         // to (left/4 + center/2 + right/4).
194         vec2 pix_cbcr[3];
195         for (int i = 0; i < 3; ++i) {
196                 vec2 a = texture(in_cbcr, vec2((xb * 6 + i * 2) * inv_width, yf)).xy;
197                 vec2 b = texture(in_cbcr, vec2((xb * 6 + i * 2 + 1) * inv_width, yf)).xy;
198                 pix_cbcr[i] = (a + b) * (0.5 * 65535.0 / 1023.0);
199         }
200
201         // Load and scale the Y values. Note that we use integer coordinates here,
202         // so we don't need to offset by 0.5.
203         float pix_y[6];
204         for (int i = 0; i < 6; ++i) {
205                 pix_y[i] = imageLoad(in_y, ivec2(xb * 6 + i, y)).x * (65535.0 / 1023.0);
206         }
207
208         imageStore(outbuf, ivec2(xb * 4 + 0, y), vec4(pix_cbcr[0].x, pix_y[0],      pix_cbcr[0].y, 1.0));
209         imageStore(outbuf, ivec2(xb * 4 + 1, y), vec4(pix_y[1],      pix_cbcr[1].x, pix_y[2],      1.0));
210         imageStore(outbuf, ivec2(xb * 4 + 2, y), vec4(pix_cbcr[1].y, pix_y[3],      pix_cbcr[2].x, 1.0));
211         imageStore(outbuf, ivec2(xb * 4 + 3, y), vec4(pix_y[4],      pix_cbcr[2].y, pix_y[5],      1.0));
212 }
213 )";
214                 GLuint shader_num = movit::compile_shader(v210_shader_src, GL_COMPUTE_SHADER);
215                 check_error();
216                 v210_program_num = glCreateProgram();
217                 check_error();
218                 glAttachShader(v210_program_num, shader_num);
219                 check_error();
220                 glLinkProgram(v210_program_num);
221                 check_error();
222
223                 GLint success;
224                 glGetProgramiv(v210_program_num, GL_LINK_STATUS, &success);
225                 check_error();
226                 if (success == GL_FALSE) {
227                         GLchar error_log[1024] = {0};
228                         glGetProgramInfoLog(v210_program_num, 1024, NULL, error_log);
229                         fprintf(stderr, "Error linking program: %s\n", error_log);
230                         exit(1);
231                 }
232
233                 v210_in_y_pos = glGetUniformLocation(v210_program_num, "in_y");
234                 check_error();
235                 v210_in_cbcr_pos = glGetUniformLocation(v210_program_num, "in_cbcr");
236                 check_error();
237                 v210_outbuf_pos = glGetUniformLocation(v210_program_num, "outbuf");
238                 check_error();
239                 v210_inv_width_pos = glGetUniformLocation(v210_program_num, "inv_width");
240                 check_error();
241                 v210_inv_height_pos = glGetUniformLocation(v210_program_num, "inv_height");
242                 check_error();
243         } else {
244                 v210_program_num = 0;
245         }
246 }
247
248 ChromaSubsampler::~ChromaSubsampler()
249 {
250         resource_pool->release_glsl_program(cbcr_program_num);
251         check_error();
252         resource_pool->release_glsl_program(uyvy_program_num);
253         check_error();
254         glDeleteBuffers(1, &vbo);
255         check_error();
256         if (v210_program_num != 0) {
257                 glDeleteProgram(v210_program_num);
258                 check_error();
259         }
260 }
261
262 void ChromaSubsampler::subsample_chroma(GLuint cbcr_tex, unsigned width, unsigned height, GLuint dst_tex, GLuint dst2_tex)
263 {
264         GLuint vao;
265         glGenVertexArrays(1, &vao);
266         check_error();
267
268         glBindVertexArray(vao);
269         check_error();
270
271         // Extract Cb/Cr.
272         GLuint fbo;
273         if (dst2_tex <= 0) {
274                 fbo = resource_pool->create_fbo(dst_tex);
275         } else {
276                 fbo = resource_pool->create_fbo(dst_tex, dst2_tex);
277         }
278         glBindFramebuffer(GL_FRAMEBUFFER, fbo);
279         glViewport(0, 0, width/2, height/2);
280         check_error();
281
282         glUseProgram(cbcr_program_num);
283         check_error();
284
285         glActiveTexture(GL_TEXTURE0);
286         check_error();
287         glBindTexture(GL_TEXTURE_2D, cbcr_tex);
288         check_error();
289         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
290         check_error();
291         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
292         check_error();
293         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
294         check_error();
295
296         float chroma_offset_0[] = { -1.0f / width, 0.0f };
297         float chroma_offset_1[] = { -0.0f / width, 0.0f };
298         set_uniform_vec2(cbcr_program_num, "foo", "chroma_offset_0", chroma_offset_0);
299         set_uniform_vec2(cbcr_program_num, "foo", "chroma_offset_1", chroma_offset_1);
300
301         glUniform1i(cbcr_texture_sampler_uniform, 0);
302
303         glBindBuffer(GL_ARRAY_BUFFER, vbo);
304         check_error();
305
306         for (GLint attr_index : { cbcr_position_attribute_index, cbcr_texcoord_attribute_index }) {
307                 glEnableVertexAttribArray(attr_index);
308                 check_error();
309                 glVertexAttribPointer(attr_index, 2, GL_FLOAT, GL_FALSE, 0, BUFFER_OFFSET(0));
310                 check_error();
311         }
312
313         glDrawArrays(GL_TRIANGLES, 0, 3);
314         check_error();
315
316         for (GLint attr_index : { cbcr_position_attribute_index, cbcr_texcoord_attribute_index }) {
317                 glDisableVertexAttribArray(attr_index);
318                 check_error();
319         }
320
321         glUseProgram(0);
322         check_error();
323         glBindFramebuffer(GL_FRAMEBUFFER, 0);
324         check_error();
325
326         resource_pool->release_fbo(fbo);
327         glDeleteVertexArrays(1, &vao);
328         check_error();
329 }
330
331 void ChromaSubsampler::create_uyvy(GLuint y_tex, GLuint cbcr_tex, unsigned width, unsigned height, GLuint dst_tex)
332 {
333         GLuint vao;
334         glGenVertexArrays(1, &vao);
335         check_error();
336
337         glBindVertexArray(vao);
338         check_error();
339
340         GLuint fbo = resource_pool->create_fbo(dst_tex);
341         glBindFramebuffer(GL_FRAMEBUFFER, fbo);
342         glViewport(0, 0, width/2, height);
343         check_error();
344
345         glUseProgram(uyvy_program_num);
346         check_error();
347
348         glUniform1i(uyvy_y_texture_sampler_uniform, 0);
349         check_error();
350         glUniform1i(uyvy_cbcr_texture_sampler_uniform, 1);
351         check_error();
352
353         glActiveTexture(GL_TEXTURE0);
354         check_error();
355         glBindTexture(GL_TEXTURE_2D, y_tex);
356         check_error();
357         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
358         check_error();
359         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
360         check_error();
361         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
362         check_error();
363
364         glActiveTexture(GL_TEXTURE1);
365         check_error();
366         glBindTexture(GL_TEXTURE_2D, cbcr_tex);
367         check_error();
368         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
369         check_error();
370         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
371         check_error();
372         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
373         check_error();
374
375         float y_offset_0[] = { -0.5f / width, 0.0f };
376         float y_offset_1[] = {  0.5f / width, 0.0f };
377         float cbcr_offset0[] = { -1.0f / width, 0.0f };
378         float cbcr_offset1[] = { -0.0f / width, 0.0f };
379         set_uniform_vec2(uyvy_program_num, "foo", "luma_offset_0", y_offset_0);
380         set_uniform_vec2(uyvy_program_num, "foo", "luma_offset_1", y_offset_1);
381         set_uniform_vec2(uyvy_program_num, "foo", "chroma_offset_0", cbcr_offset0);
382         set_uniform_vec2(uyvy_program_num, "foo", "chroma_offset_1", cbcr_offset1);
383
384         glBindBuffer(GL_ARRAY_BUFFER, vbo);
385         check_error();
386
387         for (GLint attr_index : { uyvy_position_attribute_index, uyvy_texcoord_attribute_index }) {
388                 if (attr_index == -1) continue;
389                 glEnableVertexAttribArray(attr_index);
390                 check_error();
391                 glVertexAttribPointer(attr_index, 2, GL_FLOAT, GL_FALSE, 0, BUFFER_OFFSET(0));
392                 check_error();
393         }
394
395         glDrawArrays(GL_TRIANGLES, 0, 3);
396         check_error();
397
398         for (GLint attr_index : { uyvy_position_attribute_index, uyvy_texcoord_attribute_index }) {
399                 if (attr_index == -1) continue;
400                 glDisableVertexAttribArray(attr_index);
401                 check_error();
402         }
403
404         glActiveTexture(GL_TEXTURE0);
405         check_error();
406         glUseProgram(0);
407         check_error();
408         glBindFramebuffer(GL_FRAMEBUFFER, 0);
409         check_error();
410
411         resource_pool->release_fbo(fbo);
412         glDeleteVertexArrays(1, &vao);
413 }
414
415 void ChromaSubsampler::create_v210(GLuint y_tex, GLuint cbcr_tex, unsigned width, unsigned height, GLuint dst_tex)
416 {
417         assert(v210_program_num != 0);
418
419         glUseProgram(v210_program_num);
420         check_error();
421
422         glUniform1i(v210_in_y_pos, 0);
423         check_error();
424         glUniform1i(v210_in_cbcr_pos, 1);
425         check_error();
426         glUniform1i(v210_outbuf_pos, 2);
427         check_error();
428         glUniform1f(v210_inv_width_pos, 1.0 / width);
429         check_error();
430         glUniform1f(v210_inv_height_pos, 1.0 / height);
431         check_error();
432
433         glActiveTexture(GL_TEXTURE0);
434         check_error();
435         glBindTexture(GL_TEXTURE_2D, y_tex);  // We don't actually need to bind it, but we need to set the state.
436         check_error();
437         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
438         check_error();
439         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
440         check_error();
441         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
442         check_error();
443         glBindImageTexture(0, y_tex, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R16);  // This is the real bind.
444         check_error();
445
446         glActiveTexture(GL_TEXTURE1);
447         check_error();
448         glBindTexture(GL_TEXTURE_2D, cbcr_tex);
449         check_error();
450         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
451         check_error();
452         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
453         check_error();
454         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
455         check_error();
456
457         glBindImageTexture(2, dst_tex, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGB10_A2);
458         check_error();
459
460         // Actually run the shader. We use workgroups of size 2x16 threadst , and each thread
461         // processes 6x1 input pixels, so round up to number of 12x16 pixel blocks.
462         glDispatchCompute((width + 11) / 12, (height + 15) / 16, 1);
463
464         glBindTexture(GL_TEXTURE_2D, 0);
465         check_error();
466         glActiveTexture(GL_TEXTURE0);
467         check_error();
468         glUseProgram(0);
469         check_error();
470 }