]> git.sesse.net Git - nageru/blob - chroma_subsampler.cpp
Support audio-only FFmpeg inputs. Somewhat wonky, though.
[nageru] / chroma_subsampler.cpp
1 #include "chroma_subsampler.h"
2 #include "v210_converter.h"
3
4 #include <vector>
5
6 #include <movit/effect_util.h>
7 #include <movit/resource_pool.h>
8 #include <movit/util.h>
9
10 using namespace movit;
11 using namespace std;
12
13 ChromaSubsampler::ChromaSubsampler(ResourcePool *resource_pool)
14         : resource_pool(resource_pool)
15 {
16         vector<string> frag_shader_outputs;
17
18         // Set up stuff for NV12 conversion.
19         //
20         // Note: Due to the horizontally co-sited chroma/luma samples in H.264
21         // (chrome position is left for horizontal and center for vertical),
22         // we need to be a bit careful in our subsampling. A diagram will make
23         // this clearer, showing some luma and chroma samples:
24         //
25         //     a   b   c   d
26         //   +---+---+---+---+
27         //   |   |   |   |   |
28         //   | Y | Y | Y | Y |
29         //   |   |   |   |   |
30         //   +---+---+---+---+
31         //
32         // +-------+-------+
33         // |       |       |
34         // |   C   |   C   |
35         // |       |       |
36         // +-------+-------+
37         //
38         // Clearly, the rightmost chroma sample here needs to be equivalent to
39         // b/4 + c/2 + d/4. (We could also implement more sophisticated filters,
40         // of course, but as long as the upsampling is not going to be equally
41         // sophisticated, it's probably not worth it.) If we sample once with
42         // no mipmapping, we get just c, ie., no actual filtering in the
43         // horizontal direction. (For the vertical direction, we can just
44         // sample in the middle to get the right filtering.) One could imagine
45         // we could use mipmapping (assuming we can create mipmaps cheaply),
46         // but then, what we'd get is this:
47         //
48         //    (a+b)/2 (c+d)/2
49         //   +-------+-------+
50         //   |       |       |
51         //   |   Y   |   Y   |
52         //   |       |       |
53         //   +-------+-------+
54         //
55         // +-------+-------+
56         // |       |       |
57         // |   C   |   C   |
58         // |       |       |
59         // +-------+-------+
60         //
61         // which ends up sampling equally from a and b, which clearly isn't right. Instead,
62         // we need to do two (non-mipmapped) chroma samples, both hitting exactly in-between
63         // source pixels.
64         //
65         // Sampling in-between b and c gives us the sample (b+c)/2, and similarly for c and d.
66         // Taking the average of these gives of (b+c)/4 + (c+d)/4 = b/4 + c/2 + d/4, which is
67         // exactly what we want.
68         //
69         // See also http://www.poynton.com/PDFs/Merging_RGB_and_422.pdf, pages 6–7.
70
71         // Cb/Cr shader.
72         string cbcr_vert_shader =
73                 "#version 130 \n"
74                 " \n"
75                 "in vec2 position; \n"
76                 "in vec2 texcoord; \n"
77                 "out vec2 tc0, tc1; \n"
78                 "uniform vec2 foo_chroma_offset_0; \n"
79                 "uniform vec2 foo_chroma_offset_1; \n"
80                 " \n"
81                 "void main() \n"
82                 "{ \n"
83                 "    // The result of glOrtho(0.0, 1.0, 0.0, 1.0, 0.0, 1.0) is: \n"
84                 "    // \n"
85                 "    //   2.000  0.000  0.000 -1.000 \n"
86                 "    //   0.000  2.000  0.000 -1.000 \n"
87                 "    //   0.000  0.000 -2.000 -1.000 \n"
88                 "    //   0.000  0.000  0.000  1.000 \n"
89                 "    gl_Position = vec4(2.0 * position.x - 1.0, 2.0 * position.y - 1.0, -1.0, 1.0); \n"
90                 "    vec2 flipped_tc = texcoord; \n"
91                 "    tc0 = flipped_tc + foo_chroma_offset_0; \n"
92                 "    tc1 = flipped_tc + foo_chroma_offset_1; \n"
93                 "} \n";
94         string cbcr_frag_shader =
95                 "#version 130 \n"
96                 "in vec2 tc0, tc1; \n"
97                 "uniform sampler2D cbcr_tex; \n"
98                 "out vec4 FragColor, FragColor2; \n"
99                 "void main() { \n"
100                 "    FragColor = 0.5 * (texture(cbcr_tex, tc0) + texture(cbcr_tex, tc1)); \n"
101                 "    FragColor2 = FragColor; \n"
102                 "} \n";
103         cbcr_program_num = resource_pool->compile_glsl_program(cbcr_vert_shader, cbcr_frag_shader, frag_shader_outputs);
104         check_error();
105         cbcr_chroma_offset_0_location = get_uniform_location(cbcr_program_num, "foo", "chroma_offset_0");
106         check_error();
107         cbcr_chroma_offset_1_location = get_uniform_location(cbcr_program_num, "foo", "chroma_offset_1");
108         check_error();
109
110         cbcr_texture_sampler_uniform = glGetUniformLocation(cbcr_program_num, "cbcr_tex");
111         check_error();
112         cbcr_position_attribute_index = glGetAttribLocation(cbcr_program_num, "position");
113         check_error();
114         cbcr_texcoord_attribute_index = glGetAttribLocation(cbcr_program_num, "texcoord");
115         check_error();
116
117         // Same, for UYVY conversion.
118         string uyvy_vert_shader =
119                 "#version 130 \n"
120                 " \n"
121                 "in vec2 position; \n"
122                 "in vec2 texcoord; \n"
123                 "out vec2 y_tc0, y_tc1, cbcr_tc0, cbcr_tc1; \n"
124                 "uniform vec2 foo_luma_offset_0; \n"
125                 "uniform vec2 foo_luma_offset_1; \n"
126                 "uniform vec2 foo_chroma_offset_0; \n"
127                 "uniform vec2 foo_chroma_offset_1; \n"
128                 " \n"
129                 "void main() \n"
130                 "{ \n"
131                 "    // The result of glOrtho(0.0, 1.0, 0.0, 1.0, 0.0, 1.0) is: \n"
132                 "    // \n"
133                 "    //   2.000  0.000  0.000 -1.000 \n"
134                 "    //   0.000  2.000  0.000 -1.000 \n"
135                 "    //   0.000  0.000 -2.000 -1.000 \n"
136                 "    //   0.000  0.000  0.000  1.000 \n"
137                 "    gl_Position = vec4(2.0 * position.x - 1.0, 2.0 * position.y - 1.0, -1.0, 1.0); \n"
138                 "    vec2 flipped_tc = texcoord; \n"
139                 "    y_tc0 = flipped_tc + foo_luma_offset_0; \n"
140                 "    y_tc1 = flipped_tc + foo_luma_offset_1; \n"
141                 "    cbcr_tc0 = flipped_tc + foo_chroma_offset_0; \n"
142                 "    cbcr_tc1 = flipped_tc + foo_chroma_offset_1; \n"
143                 "} \n";
144         string uyvy_frag_shader =
145                 "#version 130 \n"
146                 "in vec2 y_tc0, y_tc1, cbcr_tc0, cbcr_tc1; \n"
147                 "uniform sampler2D y_tex, cbcr_tex; \n"
148                 "out vec4 FragColor; \n"
149                 "void main() { \n"
150                 "    float y0 = texture(y_tex, y_tc0).r; \n"
151                 "    float y1 = texture(y_tex, y_tc1).r; \n"
152                 "    vec2 cbcr0 = texture(cbcr_tex, cbcr_tc0).rg; \n"
153                 "    vec2 cbcr1 = texture(cbcr_tex, cbcr_tc1).rg; \n"
154                 "    vec2 cbcr = 0.5 * (cbcr0 + cbcr1); \n"
155                 "    FragColor = vec4(cbcr.g, y0, cbcr.r, y1); \n"
156                 "} \n";
157
158         uyvy_program_num = resource_pool->compile_glsl_program(uyvy_vert_shader, uyvy_frag_shader, frag_shader_outputs);
159         check_error();
160         uyvy_luma_offset_0_location = get_uniform_location(uyvy_program_num, "foo", "luma_offset_0");
161         check_error();
162         uyvy_luma_offset_1_location = get_uniform_location(uyvy_program_num, "foo", "luma_offset_1");
163         check_error();
164         uyvy_chroma_offset_0_location = get_uniform_location(uyvy_program_num, "foo", "chroma_offset_0");
165         check_error();
166         uyvy_chroma_offset_1_location = get_uniform_location(uyvy_program_num, "foo", "chroma_offset_1");
167         check_error();
168
169         uyvy_y_texture_sampler_uniform = glGetUniformLocation(uyvy_program_num, "y_tex");
170         check_error();
171         uyvy_cbcr_texture_sampler_uniform = glGetUniformLocation(uyvy_program_num, "cbcr_tex");
172         check_error();
173         uyvy_position_attribute_index = glGetAttribLocation(uyvy_program_num, "position");
174         check_error();
175         uyvy_texcoord_attribute_index = glGetAttribLocation(uyvy_program_num, "texcoord");
176         check_error();
177
178         // Shared between the two.
179         float vertices[] = {
180                 0.0f, 2.0f,
181                 0.0f, 0.0f,
182                 2.0f, 0.0f
183         };
184         vbo = generate_vbo(2, GL_FLOAT, sizeof(vertices), vertices);
185         check_error();
186
187         // v210 compute shader.
188         if (v210Converter::has_hardware_support()) {
189                 string v210_shader_src = R"(#version 150
190 #extension GL_ARB_compute_shader : enable
191 #extension GL_ARB_shader_image_load_store : enable
192 layout(local_size_x=2, local_size_y=16) in;
193 layout(r16) uniform restrict readonly image2D in_y;
194 uniform sampler2D in_cbcr;  // Of type RG16.
195 layout(rgb10_a2) uniform restrict writeonly image2D outbuf;
196 uniform float inv_width, inv_height;
197
198 void main()
199 {
200         int xb = int(gl_GlobalInvocationID.x);  // X block number.
201         int y = int(gl_GlobalInvocationID.y);  // Y (actual line).
202         float yf = (gl_GlobalInvocationID.y + 0.5f) * inv_height;  // Y float coordinate.
203
204         // Load and scale CbCr values, sampling in-between the texels to get
205         // to (left/4 + center/2 + right/4).
206         vec2 pix_cbcr[3];
207         for (int i = 0; i < 3; ++i) {
208                 vec2 a = texture(in_cbcr, vec2((xb * 6 + i * 2) * inv_width, yf)).xy;
209                 vec2 b = texture(in_cbcr, vec2((xb * 6 + i * 2 + 1) * inv_width, yf)).xy;
210                 pix_cbcr[i] = (a + b) * (0.5 * 65535.0 / 1023.0);
211         }
212
213         // Load and scale the Y values. Note that we use integer coordinates here,
214         // so we don't need to offset by 0.5.
215         float pix_y[6];
216         for (int i = 0; i < 6; ++i) {
217                 pix_y[i] = imageLoad(in_y, ivec2(xb * 6 + i, y)).x * (65535.0 / 1023.0);
218         }
219
220         imageStore(outbuf, ivec2(xb * 4 + 0, y), vec4(pix_cbcr[0].x, pix_y[0],      pix_cbcr[0].y, 1.0));
221         imageStore(outbuf, ivec2(xb * 4 + 1, y), vec4(pix_y[1],      pix_cbcr[1].x, pix_y[2],      1.0));
222         imageStore(outbuf, ivec2(xb * 4 + 2, y), vec4(pix_cbcr[1].y, pix_y[3],      pix_cbcr[2].x, 1.0));
223         imageStore(outbuf, ivec2(xb * 4 + 3, y), vec4(pix_y[4],      pix_cbcr[2].y, pix_y[5],      1.0));
224 }
225 )";
226                 GLuint shader_num = movit::compile_shader(v210_shader_src, GL_COMPUTE_SHADER);
227                 check_error();
228                 v210_program_num = glCreateProgram();
229                 check_error();
230                 glAttachShader(v210_program_num, shader_num);
231                 check_error();
232                 glLinkProgram(v210_program_num);
233                 check_error();
234
235                 GLint success;
236                 glGetProgramiv(v210_program_num, GL_LINK_STATUS, &success);
237                 check_error();
238                 if (success == GL_FALSE) {
239                         GLchar error_log[1024] = {0};
240                         glGetProgramInfoLog(v210_program_num, 1024, nullptr, error_log);
241                         fprintf(stderr, "Error linking program: %s\n", error_log);
242                         exit(1);
243                 }
244
245                 v210_in_y_pos = glGetUniformLocation(v210_program_num, "in_y");
246                 check_error();
247                 v210_in_cbcr_pos = glGetUniformLocation(v210_program_num, "in_cbcr");
248                 check_error();
249                 v210_outbuf_pos = glGetUniformLocation(v210_program_num, "outbuf");
250                 check_error();
251                 v210_inv_width_pos = glGetUniformLocation(v210_program_num, "inv_width");
252                 check_error();
253                 v210_inv_height_pos = glGetUniformLocation(v210_program_num, "inv_height");
254                 check_error();
255         } else {
256                 v210_program_num = 0;
257         }
258 }
259
260 ChromaSubsampler::~ChromaSubsampler()
261 {
262         resource_pool->release_glsl_program(cbcr_program_num);
263         check_error();
264         resource_pool->release_glsl_program(uyvy_program_num);
265         check_error();
266         glDeleteBuffers(1, &vbo);
267         check_error();
268         if (v210_program_num != 0) {
269                 glDeleteProgram(v210_program_num);
270                 check_error();
271         }
272 }
273
274 void ChromaSubsampler::subsample_chroma(GLuint cbcr_tex, unsigned width, unsigned height, GLuint dst_tex, GLuint dst2_tex)
275 {
276         GLuint vao = resource_pool->create_vec2_vao({ cbcr_position_attribute_index, cbcr_texcoord_attribute_index }, vbo);
277         glBindVertexArray(vao);
278         check_error();
279
280         // Extract Cb/Cr.
281         GLuint fbo;
282         if (dst2_tex <= 0) {
283                 fbo = resource_pool->create_fbo(dst_tex);
284         } else {
285                 fbo = resource_pool->create_fbo(dst_tex, dst2_tex);
286         }
287         glBindFramebuffer(GL_FRAMEBUFFER, fbo);
288         glViewport(0, 0, width/2, height/2);
289         check_error();
290
291         glUseProgram(cbcr_program_num);
292         check_error();
293
294         glActiveTexture(GL_TEXTURE0);
295         check_error();
296         glBindTexture(GL_TEXTURE_2D, cbcr_tex);
297         check_error();
298         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
299         check_error();
300         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
301         check_error();
302         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
303         check_error();
304
305         glUniform2f(cbcr_chroma_offset_0_location, -1.0f / width, 0.0f);
306         check_error();
307         glUniform2f(cbcr_chroma_offset_1_location, -0.0f / width, 0.0f);
308         check_error();
309         glUniform1i(cbcr_texture_sampler_uniform, 0);
310
311         glDrawArrays(GL_TRIANGLES, 0, 3);
312         check_error();
313
314         glUseProgram(0);
315         check_error();
316         glBindFramebuffer(GL_FRAMEBUFFER, 0);
317         check_error();
318         glBindVertexArray(0);
319         check_error();
320
321         resource_pool->release_fbo(fbo);
322         resource_pool->release_vec2_vao(vao);
323 }
324
325 void ChromaSubsampler::create_uyvy(GLuint y_tex, GLuint cbcr_tex, unsigned width, unsigned height, GLuint dst_tex)
326 {
327         GLuint vao = resource_pool->create_vec2_vao({ cbcr_position_attribute_index, cbcr_texcoord_attribute_index }, vbo);
328         glBindVertexArray(vao);
329         check_error();
330
331         glBindVertexArray(vao);
332         check_error();
333
334         GLuint fbo = resource_pool->create_fbo(dst_tex);
335         glBindFramebuffer(GL_FRAMEBUFFER, fbo);
336         glViewport(0, 0, width/2, height);
337         check_error();
338
339         glUseProgram(uyvy_program_num);
340         check_error();
341
342         glUniform1i(uyvy_y_texture_sampler_uniform, 0);
343         check_error();
344         glUniform1i(uyvy_cbcr_texture_sampler_uniform, 1);
345         check_error();
346
347         glActiveTexture(GL_TEXTURE0);
348         check_error();
349         glBindTexture(GL_TEXTURE_2D, y_tex);
350         check_error();
351         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
352         check_error();
353         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
354         check_error();
355         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
356         check_error();
357
358         glActiveTexture(GL_TEXTURE1);
359         check_error();
360         glBindTexture(GL_TEXTURE_2D, cbcr_tex);
361         check_error();
362         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
363         check_error();
364         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
365         check_error();
366         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
367         check_error();
368
369         glUniform2f(uyvy_luma_offset_0_location, -0.5f / width, 0.0f);
370         check_error();
371         glUniform2f(uyvy_luma_offset_1_location,  0.5f / width, 0.0f);
372         check_error();
373         glUniform2f(uyvy_chroma_offset_0_location, -1.0f / width, 0.0f);
374         check_error();
375         glUniform2f(uyvy_chroma_offset_1_location, -0.0f / width, 0.0f);
376         check_error();
377
378         glBindBuffer(GL_ARRAY_BUFFER, vbo);
379         check_error();
380
381         glDrawArrays(GL_TRIANGLES, 0, 3);
382         check_error();
383
384         glActiveTexture(GL_TEXTURE0);
385         check_error();
386         glUseProgram(0);
387         check_error();
388         glBindFramebuffer(GL_FRAMEBUFFER, 0);
389         check_error();
390         glBindVertexArray(0);
391         check_error();
392
393         resource_pool->release_fbo(fbo);
394         resource_pool->release_vec2_vao(vao);
395 }
396
397 void ChromaSubsampler::create_v210(GLuint y_tex, GLuint cbcr_tex, unsigned width, unsigned height, GLuint dst_tex)
398 {
399         assert(v210_program_num != 0);
400
401         glUseProgram(v210_program_num);
402         check_error();
403
404         glUniform1i(v210_in_y_pos, 0);
405         check_error();
406         glUniform1i(v210_in_cbcr_pos, 1);
407         check_error();
408         glUniform1i(v210_outbuf_pos, 2);
409         check_error();
410         glUniform1f(v210_inv_width_pos, 1.0 / width);
411         check_error();
412         glUniform1f(v210_inv_height_pos, 1.0 / height);
413         check_error();
414
415         glActiveTexture(GL_TEXTURE0);
416         check_error();
417         glBindTexture(GL_TEXTURE_2D, y_tex);  // We don't actually need to bind it, but we need to set the state.
418         check_error();
419         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
420         check_error();
421         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
422         check_error();
423         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
424         check_error();
425         glBindImageTexture(0, y_tex, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R16);  // This is the real bind.
426         check_error();
427
428         glActiveTexture(GL_TEXTURE1);
429         check_error();
430         glBindTexture(GL_TEXTURE_2D, cbcr_tex);
431         check_error();
432         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
433         check_error();
434         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
435         check_error();
436         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
437         check_error();
438
439         glBindImageTexture(2, dst_tex, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGB10_A2);
440         check_error();
441
442         // Actually run the shader. We use workgroups of size 2x16 threadst , and each thread
443         // processes 6x1 input pixels, so round up to number of 12x16 pixel blocks.
444         glDispatchCompute((width + 11) / 12, (height + 15) / 16, 1);
445
446         glBindTexture(GL_TEXTURE_2D, 0);
447         check_error();
448         glActiveTexture(GL_TEXTURE0);
449         check_error();
450         glUseProgram(0);
451         check_error();
452 }