]> git.sesse.net Git - nageru/blob - nageru/chroma_subsampler.cpp
Fix a dangling reference (found by GCC 14).
[nageru] / nageru / chroma_subsampler.cpp
1 #include "chroma_subsampler.h"
2 #include "v210_converter.h"
3
4 #include <string>
5 #include <vector>
6
7 #include <assert.h>
8 #include <stdio.h>
9 #include <movit/effect_util.h>
10 #include <movit/resource_pool.h>
11 #include <movit/util.h>
12
13 #include "embedded_files.h"
14 #include "shared/read_file.h"
15
16 using namespace movit;
17 using namespace std;
18
19 ChromaSubsampler::ChromaSubsampler(ResourcePool *resource_pool)
20         : resource_pool(resource_pool)
21 {
22         vector<string> frag_shader_outputs;
23
24         // Set up stuff for NV12 conversion.
25         //
26         // Note: Due to the horizontally co-sited chroma/luma samples in H.264
27         // (chrome position is left for horizontal and center for vertical),
28         // we need to be a bit careful in our subsampling. A diagram will make
29         // this clearer, showing some luma and chroma samples:
30         //
31         //     a   b   c   d
32         //   +---+---+---+---+
33         //   |   |   |   |   |
34         //   | Y | Y | Y | Y |
35         //   |   |   |   |   |
36         //   +---+---+---+---+
37         //
38         // +-------+-------+
39         // |       |       |
40         // |   C   |   C   |
41         // |       |       |
42         // +-------+-------+
43         //
44         // Clearly, the rightmost chroma sample here needs to be equivalent to
45         // b/4 + c/2 + d/4. (We could also implement more sophisticated filters,
46         // of course, but as long as the upsampling is not going to be equally
47         // sophisticated, it's probably not worth it.) If we sample once with
48         // no mipmapping, we get just c, ie., no actual filtering in the
49         // horizontal direction. (For the vertical direction, we can just
50         // sample in the middle to get the right filtering.) One could imagine
51         // we could use mipmapping (assuming we can create mipmaps cheaply),
52         // but then, what we'd get is this:
53         //
54         //    (a+b)/2 (c+d)/2
55         //   +-------+-------+
56         //   |       |       |
57         //   |   Y   |   Y   |
58         //   |       |       |
59         //   +-------+-------+
60         //
61         // +-------+-------+
62         // |       |       |
63         // |   C   |   C   |
64         // |       |       |
65         // +-------+-------+
66         //
67         // which ends up sampling equally from a and b, which clearly isn't right. Instead,
68         // we need to do two (non-mipmapped) chroma samples, both hitting exactly in-between
69         // source pixels.
70         //
71         // Sampling in-between b and c gives us the sample (b+c)/2, and similarly for c and d.
72         // Taking the average of these gives of (b+c)/4 + (c+d)/4 = b/4 + c/2 + d/4, which is
73         // exactly what we want.
74         //
75         // See also http://www.poynton.com/PDFs/Merging_RGB_and_422.pdf, pages 6–7.
76
77         // Cb/Cr shader.
78         string cbcr_vert_shader = read_file("cbcr_subsample.vert", _binary_cbcr_subsample_vert_data, _binary_cbcr_subsample_vert_size);
79         string cbcr_frag_shader = read_file("cbcr_subsample.frag", _binary_cbcr_subsample_frag_data, _binary_cbcr_subsample_frag_size);
80         cbcr_program_num = resource_pool->compile_glsl_program(cbcr_vert_shader, cbcr_frag_shader, frag_shader_outputs);
81         check_error();
82         cbcr_chroma_offset_0_location = get_uniform_location(cbcr_program_num, "foo", "chroma_offset_0");
83         check_error();
84         cbcr_chroma_offset_1_location = get_uniform_location(cbcr_program_num, "foo", "chroma_offset_1");
85         check_error();
86
87         cbcr_texture_sampler_uniform = glGetUniformLocation(cbcr_program_num, "cbcr_tex");
88         check_error();
89         cbcr_position_attribute_index = glGetAttribLocation(cbcr_program_num, "position");
90         check_error();
91         cbcr_texcoord_attribute_index = glGetAttribLocation(cbcr_program_num, "texcoord");
92         check_error();
93
94         // Same, for UYVY conversion.
95         string uyvy_vert_shader = read_file("uyvy_subsample.vert", _binary_uyvy_subsample_vert_data, _binary_uyvy_subsample_vert_size);
96         string uyvy_frag_shader = read_file("uyvy_subsample.frag", _binary_uyvy_subsample_frag_data, _binary_uyvy_subsample_frag_size);
97
98         uyvy_program_num = resource_pool->compile_glsl_program(uyvy_vert_shader, uyvy_frag_shader, frag_shader_outputs);
99         check_error();
100         uyvy_luma_offset_0_location = get_uniform_location(uyvy_program_num, "foo", "luma_offset_0");
101         check_error();
102         uyvy_luma_offset_1_location = get_uniform_location(uyvy_program_num, "foo", "luma_offset_1");
103         check_error();
104         uyvy_chroma_offset_0_location = get_uniform_location(uyvy_program_num, "foo", "chroma_offset_0");
105         check_error();
106         uyvy_chroma_offset_1_location = get_uniform_location(uyvy_program_num, "foo", "chroma_offset_1");
107         check_error();
108
109         uyvy_y_texture_sampler_uniform = glGetUniformLocation(uyvy_program_num, "y_tex");
110         check_error();
111         uyvy_cbcr_texture_sampler_uniform = glGetUniformLocation(uyvy_program_num, "cbcr_tex");
112         check_error();
113         uyvy_position_attribute_index = glGetAttribLocation(uyvy_program_num, "position");
114         check_error();
115         uyvy_texcoord_attribute_index = glGetAttribLocation(uyvy_program_num, "texcoord");
116         check_error();
117
118         // Shared between the two.
119         float vertices[] = {
120                 0.0f, 2.0f,
121                 0.0f, 0.0f,
122                 2.0f, 0.0f
123         };
124         vbo = generate_vbo(2, GL_FLOAT, sizeof(vertices), vertices);
125         check_error();
126
127         // v210 compute shader.
128         if (v210Converter::has_hardware_support()) {
129                 string v210_shader_src = read_file("v210_subsample.comp", _binary_v210_subsample_comp_data, _binary_v210_subsample_comp_size);
130                 GLuint shader_num = movit::compile_shader(v210_shader_src, GL_COMPUTE_SHADER);
131                 check_error();
132                 v210_program_num = glCreateProgram();
133                 check_error();
134                 glAttachShader(v210_program_num, shader_num);
135                 check_error();
136                 glLinkProgram(v210_program_num);
137                 check_error();
138
139                 GLint success;
140                 glGetProgramiv(v210_program_num, GL_LINK_STATUS, &success);
141                 check_error();
142                 if (success == GL_FALSE) {
143                         GLchar error_log[1024] = {0};
144                         glGetProgramInfoLog(v210_program_num, 1024, nullptr, error_log);
145                         fprintf(stderr, "Error linking program: %s\n", error_log);
146                         abort();
147                 }
148
149                 v210_in_y_pos = glGetUniformLocation(v210_program_num, "in_y");
150                 check_error();
151                 v210_in_cbcr_pos = glGetUniformLocation(v210_program_num, "in_cbcr");
152                 check_error();
153                 v210_outbuf_pos = glGetUniformLocation(v210_program_num, "outbuf");
154                 check_error();
155                 v210_inv_width_pos = glGetUniformLocation(v210_program_num, "inv_width");
156                 check_error();
157                 v210_inv_height_pos = glGetUniformLocation(v210_program_num, "inv_height");
158                 check_error();
159         } else {
160                 v210_program_num = 0;
161         }
162 }
163
164 ChromaSubsampler::~ChromaSubsampler()
165 {
166         resource_pool->release_glsl_program(cbcr_program_num);
167         check_error();
168         resource_pool->release_glsl_program(uyvy_program_num);
169         check_error();
170         glDeleteBuffers(1, &vbo);
171         check_error();
172         if (v210_program_num != 0) {
173                 glDeleteProgram(v210_program_num);
174                 check_error();
175         }
176 }
177
178 void ChromaSubsampler::subsample_chroma(GLuint cbcr_tex, unsigned width, unsigned height, GLuint dst_tex, GLuint dst2_tex)
179 {
180         GLuint vao = resource_pool->create_vec2_vao({ cbcr_position_attribute_index, cbcr_texcoord_attribute_index }, vbo);
181         glBindVertexArray(vao);
182         check_error();
183
184         // Extract Cb/Cr.
185         GLuint fbo;
186         if (dst2_tex <= 0) {
187                 fbo = resource_pool->create_fbo(dst_tex);
188         } else {
189                 fbo = resource_pool->create_fbo(dst_tex, dst2_tex);
190         }
191         glBindFramebuffer(GL_FRAMEBUFFER, fbo);
192         glViewport(0, 0, width/2, height/2);
193         check_error();
194
195         glUseProgram(cbcr_program_num);
196         check_error();
197
198         glActiveTexture(GL_TEXTURE0);
199         check_error();
200         glBindTexture(GL_TEXTURE_2D, cbcr_tex);
201         check_error();
202         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
203         check_error();
204         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
205         check_error();
206         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
207         check_error();
208
209         glUniform2f(cbcr_chroma_offset_0_location, -1.0f / width, 0.0f);
210         check_error();
211         glUniform2f(cbcr_chroma_offset_1_location, -0.0f / width, 0.0f);
212         check_error();
213         glUniform1i(cbcr_texture_sampler_uniform, 0);
214
215         glDrawArrays(GL_TRIANGLES, 0, 3);
216         check_error();
217
218         glUseProgram(0);
219         check_error();
220         glBindFramebuffer(GL_FRAMEBUFFER, 0);
221         check_error();
222         glBindVertexArray(0);
223         check_error();
224
225         resource_pool->release_fbo(fbo);
226         resource_pool->release_vec2_vao(vao);
227 }
228
229 void ChromaSubsampler::create_uyvy(GLuint y_tex, GLuint cbcr_tex, unsigned width, unsigned height, GLuint dst_tex)
230 {
231         GLuint vao = resource_pool->create_vec2_vao({ cbcr_position_attribute_index, cbcr_texcoord_attribute_index }, vbo);
232         glBindVertexArray(vao);
233         check_error();
234
235         glBindVertexArray(vao);
236         check_error();
237
238         GLuint fbo = resource_pool->create_fbo(dst_tex);
239         glBindFramebuffer(GL_FRAMEBUFFER, fbo);
240         glViewport(0, 0, width/2, height);
241         check_error();
242
243         glUseProgram(uyvy_program_num);
244         check_error();
245
246         glUniform1i(uyvy_y_texture_sampler_uniform, 0);
247         check_error();
248         glUniform1i(uyvy_cbcr_texture_sampler_uniform, 1);
249         check_error();
250
251         glActiveTexture(GL_TEXTURE0);
252         check_error();
253         glBindTexture(GL_TEXTURE_2D, y_tex);
254         check_error();
255         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
256         check_error();
257         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
258         check_error();
259         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
260         check_error();
261
262         glActiveTexture(GL_TEXTURE1);
263         check_error();
264         glBindTexture(GL_TEXTURE_2D, cbcr_tex);
265         check_error();
266         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
267         check_error();
268         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
269         check_error();
270         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
271         check_error();
272
273         glUniform2f(uyvy_luma_offset_0_location, -0.5f / width, 0.0f);
274         check_error();
275         glUniform2f(uyvy_luma_offset_1_location,  0.5f / width, 0.0f);
276         check_error();
277         glUniform2f(uyvy_chroma_offset_0_location, -1.0f / width, 0.0f);
278         check_error();
279         glUniform2f(uyvy_chroma_offset_1_location, -0.0f / width, 0.0f);
280         check_error();
281
282         glBindBuffer(GL_ARRAY_BUFFER, vbo);
283         check_error();
284
285         glDrawArrays(GL_TRIANGLES, 0, 3);
286         check_error();
287
288         glActiveTexture(GL_TEXTURE0);
289         check_error();
290         glUseProgram(0);
291         check_error();
292         glBindFramebuffer(GL_FRAMEBUFFER, 0);
293         check_error();
294         glBindVertexArray(0);
295         check_error();
296
297         resource_pool->release_fbo(fbo);
298         resource_pool->release_vec2_vao(vao);
299 }
300
301 void ChromaSubsampler::create_v210(GLuint y_tex, GLuint cbcr_tex, unsigned width, unsigned height, GLuint dst_tex)
302 {
303         assert(v210_program_num != 0);
304
305         glUseProgram(v210_program_num);
306         check_error();
307
308         glUniform1i(v210_in_y_pos, 0);
309         check_error();
310         glUniform1i(v210_in_cbcr_pos, 1);
311         check_error();
312         glUniform1i(v210_outbuf_pos, 2);
313         check_error();
314         glUniform1f(v210_inv_width_pos, 1.0 / width);
315         check_error();
316         glUniform1f(v210_inv_height_pos, 1.0 / height);
317         check_error();
318
319         glActiveTexture(GL_TEXTURE0);
320         check_error();
321         glBindTexture(GL_TEXTURE_2D, y_tex);  // We don't actually need to bind it, but we need to set the state.
322         check_error();
323         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
324         check_error();
325         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
326         check_error();
327         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
328         check_error();
329         glBindImageTexture(0, y_tex, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R16);  // This is the real bind.
330         check_error();
331
332         glActiveTexture(GL_TEXTURE1);
333         check_error();
334         glBindTexture(GL_TEXTURE_2D, cbcr_tex);
335         check_error();
336         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
337         check_error();
338         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
339         check_error();
340         glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
341         check_error();
342
343         glBindImageTexture(2, dst_tex, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGB10_A2);
344         check_error();
345
346         // Actually run the shader. We use workgroups of size 2x16 threadst , and each thread
347         // processes 6x1 input pixels, so round up to number of 12x16 pixel blocks.
348         glDispatchCompute((width + 11) / 12, (height + 15) / 16, 1);
349
350         glBindTexture(GL_TEXTURE_2D, 0);
351         check_error();
352         glActiveTexture(GL_TEXTURE0);
353         check_error();
354         glUseProgram(0);
355         check_error();
356 }