]> git.sesse.net Git - nageru/blob - nageru/chroma_subsampler.cpp
Fix a comment typo.
[nageru] / nageru / chroma_subsampler.cpp
1 #include "chroma_subsampler.h"
2 #include "v210_converter.h"
3
4 #include <vector>
5
6 #include <movit/effect_util.h>
7 #include <movit/resource_pool.h>
8 #include <movit/util.h>
9
10 #include "embedded_files.h"
11 #include "shared/read_file.h"
12
13 using namespace movit;
14 using namespace std;
15
16 ChromaSubsampler::ChromaSubsampler(ResourcePool *resource_pool)
17         : resource_pool(resource_pool)
18 {
19         vector<string> frag_shader_outputs;
20
21         // Set up stuff for NV12 conversion.
22         //
23         // Note: Due to the horizontally co-sited chroma/luma samples in H.264
24         // (chrome position is left for horizontal and center for vertical),
25         // we need to be a bit careful in our subsampling. A diagram will make
26         // this clearer, showing some luma and chroma samples:
27         //
28         //     a   b   c   d
29         //   +---+---+---+---+
30         //   |   |   |   |   |
31         //   | Y | Y | Y | Y |
32         //   |   |   |   |   |
33         //   +---+---+---+---+
34         //
35         // +-------+-------+
36         // |       |       |
37         // |   C   |   C   |
38         // |       |       |
39         // +-------+-------+
40         //
41         // Clearly, the rightmost chroma sample here needs to be equivalent to
42         // b/4 + c/2 + d/4. (We could also implement more sophisticated filters,
43         // of course, but as long as the upsampling is not going to be equally
44         // sophisticated, it's probably not worth it.) If we sample once with
45         // no mipmapping, we get just c, ie., no actual filtering in the
46         // horizontal direction. (For the vertical direction, we can just
47         // sample in the middle to get the right filtering.) One could imagine
48         // we could use mipmapping (assuming we can create mipmaps cheaply),
49         // but then, what we'd get is this:
50         //
51         //    (a+b)/2 (c+d)/2
52         //   +-------+-------+
53         //   |       |       |
54         //   |   Y   |   Y   |
55         //   |       |       |
56         //   +-------+-------+
57         //
58         // +-------+-------+
59         // |       |       |
60         // |   C   |   C   |
61         // |       |       |
62         // +-------+-------+
63         //
64         // which ends up sampling equally from a and b, which clearly isn't right. Instead,
65         // we need to do two (non-mipmapped) chroma samples, both hitting exactly in-between
66         // source pixels.
67         //
68         // Sampling in-between b and c gives us the sample (b+c)/2, and similarly for c and d.
69         // Taking the average of these gives of (b+c)/4 + (c+d)/4 = b/4 + c/2 + d/4, which is
70         // exactly what we want.
71         //
72         // See also http://www.poynton.com/PDFs/Merging_RGB_and_422.pdf, pages 6–7.
73
74         // Cb/Cr shader.
75         string cbcr_vert_shader = read_file("cbcr_subsample.vert", _binary_cbcr_subsample_vert_data, _binary_cbcr_subsample_vert_size);
76         string cbcr_frag_shader = read_file("cbcr_subsample.frag", _binary_cbcr_subsample_frag_data, _binary_cbcr_subsample_frag_size);
77         cbcr_program_num = resource_pool->compile_glsl_program(cbcr_vert_shader, cbcr_frag_shader, frag_shader_outputs);
78         check_error();
79         cbcr_chroma_offset_0_location = get_uniform_location(cbcr_program_num, "foo", "chroma_offset_0");
80         check_error();
81         cbcr_chroma_offset_1_location = get_uniform_location(cbcr_program_num, "foo", "chroma_offset_1");
82         check_error();
83
84         cbcr_texture_sampler_uniform = glGetUniformLocation(cbcr_program_num, "cbcr_tex");
85         check_error();
86         cbcr_position_attribute_index = glGetAttribLocation(cbcr_program_num, "position");
87         check_error();
88         cbcr_texcoord_attribute_index = glGetAttribLocation(cbcr_program_num, "texcoord");
89         check_error();
90
91         // Same, for UYVY conversion.
92         string uyvy_vert_shader = read_file("uyvy_subsample.vert", _binary_uyvy_subsample_vert_data, _binary_uyvy_subsample_vert_size);
93         string uyvy_frag_shader = read_file("uyvy_subsample.frag", _binary_uyvy_subsample_frag_data, _binary_uyvy_subsample_frag_size);
94
95         uyvy_program_num = resource_pool->compile_glsl_program(uyvy_vert_shader, uyvy_frag_shader, frag_shader_outputs);
96         check_error();
97         uyvy_luma_offset_0_location = get_uniform_location(uyvy_program_num, "foo", "luma_offset_0");
98         check_error();
99         uyvy_luma_offset_1_location = get_uniform_location(uyvy_program_num, "foo", "luma_offset_1");
100         check_error();
101         uyvy_chroma_offset_0_location = get_uniform_location(uyvy_program_num, "foo", "chroma_offset_0");
102         check_error();
103         uyvy_chroma_offset_1_location = get_uniform_location(uyvy_program_num, "foo", "chroma_offset_1");
104         check_error();
105
106         uyvy_y_texture_sampler_uniform = glGetUniformLocation(uyvy_program_num, "y_tex");
107         check_error();
108         uyvy_cbcr_texture_sampler_uniform = glGetUniformLocation(uyvy_program_num, "cbcr_tex");
109         check_error();
110         uyvy_position_attribute_index = glGetAttribLocation(uyvy_program_num, "position");
111         check_error();
112         uyvy_texcoord_attribute_index = glGetAttribLocation(uyvy_program_num, "texcoord");
113         check_error();
114
115         // Shared between the two.
116         float vertices[] = {
117                 0.0f, 2.0f,
118                 0.0f, 0.0f,
119                 2.0f, 0.0f
120         };
121         vbo = generate_vbo(2, GL_FLOAT, sizeof(vertices), vertices);
122         check_error();
123
124         // v210 compute shader.
125         if (v210Converter::has_hardware_support()) {
126                 string v210_shader_src = read_file("v210_subsample.comp", _binary_v210_subsample_comp_data, _binary_v210_subsample_comp_size);
127                 GLuint shader_num = movit::compile_shader(v210_shader_src, GL_COMPUTE_SHADER);
128                 check_error();
129                 v210_program_num = glCreateProgram();
130                 check_error();
131                 glAttachShader(v210_program_num, shader_num);
132                 check_error();
133                 glLinkProgram(v210_program_num);
134                 check_error();
135
136                 GLint success;
137                 glGetProgramiv(v210_program_num, GL_LINK_STATUS, &success);
138                 check_error();
139                 if (success == GL_FALSE) {
140                         GLchar error_log[1024] = {0};
141                         glGetProgramInfoLog(v210_program_num, 1024, nullptr, error_log);
142                         fprintf(stderr, "Error linking program: %s\n", error_log);
143                         abort();
144                 }
145
146                 v210_in_y_pos = glGetUniformLocation(v210_program_num, "in_y");
147                 check_error();
148                 v210_in_cbcr_pos = glGetUniformLocation(v210_program_num, "in_cbcr");
149                 check_error();
150                 v210_outbuf_pos = glGetUniformLocation(v210_program_num, "outbuf");
151                 check_error();
152                 v210_inv_width_pos = glGetUniformLocation(v210_program_num, "inv_width");
153                 check_error();
154                 v210_inv_height_pos = glGetUniformLocation(v210_program_num, "inv_height");
155                 check_error();
156         } else {
157                 v210_program_num = 0;
158         }
159 }
160
161 ChromaSubsampler::~ChromaSubsampler()
162 {
163         resource_pool->release_glsl_program(cbcr_program_num);
164         check_error();
165         resource_pool->release_glsl_program(uyvy_program_num);
166         check_error();
167         glDeleteBuffers(1, &vbo);
168         check_error();
169         if (v210_program_num != 0) {
170                 glDeleteProgram(v210_program_num);
171                 check_error();
172         }
173 }
174
175 void ChromaSubsampler::subsample_chroma(GLuint cbcr_tex, unsigned width, unsigned height, GLuint dst_tex, GLuint dst2_tex)
176 {
177         GLuint vao = resource_pool->create_vec2_vao({ cbcr_position_attribute_index, cbcr_texcoord_attribute_index }, vbo);
178         glBindVertexArray(vao);
179         check_error();
180
181         // Extract Cb/Cr.
182         GLuint fbo;
183         if (dst2_tex <= 0) {
184                 fbo = resource_pool->create_fbo(dst_tex);
185         } else {
186                 fbo = resource_pool->create_fbo(dst_tex, dst2_tex);
187         }
188         glBindFramebuffer(GL_FRAMEBUFFER, fbo);
189         glViewport(0, 0, width/2, height/2);
190         check_error();
191
192         glUseProgram(cbcr_program_num);
193         check_error();
194
195         glActiveTexture(GL_TEXTURE0);
196         check_error();
197         glBindTexture(GL_TEXTURE_2D, cbcr_tex);
198         check_error();
199         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
200         check_error();
201         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
202         check_error();
203         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
204         check_error();
205
206         glUniform2f(cbcr_chroma_offset_0_location, -1.0f / width, 0.0f);
207         check_error();
208         glUniform2f(cbcr_chroma_offset_1_location, -0.0f / width, 0.0f);
209         check_error();
210         glUniform1i(cbcr_texture_sampler_uniform, 0);
211
212         glDrawArrays(GL_TRIANGLES, 0, 3);
213         check_error();
214
215         glUseProgram(0);
216         check_error();
217         glBindFramebuffer(GL_FRAMEBUFFER, 0);
218         check_error();
219         glBindVertexArray(0);
220         check_error();
221
222         resource_pool->release_fbo(fbo);
223         resource_pool->release_vec2_vao(vao);
224 }
225
226 void ChromaSubsampler::create_uyvy(GLuint y_tex, GLuint cbcr_tex, unsigned width, unsigned height, GLuint dst_tex)
227 {
228         GLuint vao = resource_pool->create_vec2_vao({ cbcr_position_attribute_index, cbcr_texcoord_attribute_index }, vbo);
229         glBindVertexArray(vao);
230         check_error();
231
232         glBindVertexArray(vao);
233         check_error();
234
235         GLuint fbo = resource_pool->create_fbo(dst_tex);
236         glBindFramebuffer(GL_FRAMEBUFFER, fbo);
237         glViewport(0, 0, width/2, height);
238         check_error();
239
240         glUseProgram(uyvy_program_num);
241         check_error();
242
243         glUniform1i(uyvy_y_texture_sampler_uniform, 0);
244         check_error();
245         glUniform1i(uyvy_cbcr_texture_sampler_uniform, 1);
246         check_error();
247
248         glActiveTexture(GL_TEXTURE0);
249         check_error();
250         glBindTexture(GL_TEXTURE_2D, y_tex);
251         check_error();
252         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
253         check_error();
254         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
255         check_error();
256         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
257         check_error();
258
259         glActiveTexture(GL_TEXTURE1);
260         check_error();
261         glBindTexture(GL_TEXTURE_2D, cbcr_tex);
262         check_error();
263         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
264         check_error();
265         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
266         check_error();
267         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
268         check_error();
269
270         glUniform2f(uyvy_luma_offset_0_location, -0.5f / width, 0.0f);
271         check_error();
272         glUniform2f(uyvy_luma_offset_1_location,  0.5f / width, 0.0f);
273         check_error();
274         glUniform2f(uyvy_chroma_offset_0_location, -1.0f / width, 0.0f);
275         check_error();
276         glUniform2f(uyvy_chroma_offset_1_location, -0.0f / width, 0.0f);
277         check_error();
278
279         glBindBuffer(GL_ARRAY_BUFFER, vbo);
280         check_error();
281
282         glDrawArrays(GL_TRIANGLES, 0, 3);
283         check_error();
284
285         glActiveTexture(GL_TEXTURE0);
286         check_error();
287         glUseProgram(0);
288         check_error();
289         glBindFramebuffer(GL_FRAMEBUFFER, 0);
290         check_error();
291         glBindVertexArray(0);
292         check_error();
293
294         resource_pool->release_fbo(fbo);
295         resource_pool->release_vec2_vao(vao);
296 }
297
298 void ChromaSubsampler::create_v210(GLuint y_tex, GLuint cbcr_tex, unsigned width, unsigned height, GLuint dst_tex)
299 {
300         assert(v210_program_num != 0);
301
302         glUseProgram(v210_program_num);
303         check_error();
304
305         glUniform1i(v210_in_y_pos, 0);
306         check_error();
307         glUniform1i(v210_in_cbcr_pos, 1);
308         check_error();
309         glUniform1i(v210_outbuf_pos, 2);
310         check_error();
311         glUniform1f(v210_inv_width_pos, 1.0 / width);
312         check_error();
313         glUniform1f(v210_inv_height_pos, 1.0 / height);
314         check_error();
315
316         glActiveTexture(GL_TEXTURE0);
317         check_error();
318         glBindTexture(GL_TEXTURE_2D, y_tex);  // We don't actually need to bind it, but we need to set the state.
319         check_error();
320         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
321         check_error();
322         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
323         check_error();
324         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
325         check_error();
326         glBindImageTexture(0, y_tex, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R16);  // This is the real bind.
327         check_error();
328
329         glActiveTexture(GL_TEXTURE1);
330         check_error();
331         glBindTexture(GL_TEXTURE_2D, cbcr_tex);
332         check_error();
333         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
334         check_error();
335         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
336         check_error();
337         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
338         check_error();
339
340         glBindImageTexture(2, dst_tex, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGB10_A2);
341         check_error();
342
343         // Actually run the shader. We use workgroups of size 2x16 threadst , and each thread
344         // processes 6x1 input pixels, so round up to number of 12x16 pixel blocks.
345         glDispatchCompute((width + 11) / 12, (height + 15) / 16, 1);
346
347         glBindTexture(GL_TEXTURE_2D, 0);
348         check_error();
349         glActiveTexture(GL_TEXTURE0);
350         check_error();
351         glUseProgram(0);
352         check_error();
353 }