From 72ab6704497f1b561588a9a0034cb223ab6425d0 Mon Sep 17 00:00:00 2001 From: "Steinar H. Gunderson" Date: Mon, 9 Jan 2017 23:11:06 +0100 Subject: [PATCH] Move chroma subsampling into its own class. This was starting to get a bit large to be part of Mixer, and with 4:2:2 coming up, it makes even more sense to split it out. --- Makefile | 2 +- chroma_subsampler.cpp | 188 ++++++++++++++++++++++++++++++++++++++++++ chroma_subsampler.h | 31 +++++++ mixer.cpp | 167 +------------------------------------ mixer.h | 9 +- theme.cpp | 2 +- 6 files changed, 228 insertions(+), 171 deletions(-) create mode 100644 chroma_subsampler.cpp create mode 100644 chroma_subsampler.h diff --git a/Makefile b/Makefile index 0380d0d..b2f0906 100644 --- a/Makefile +++ b/Makefile @@ -22,7 +22,7 @@ OBJS += midi_mapper.o midi_mapping.pb.o # Mixer objects AUDIO_MIXER_OBJS = audio_mixer.o alsa_input.o alsa_pool.o ebu_r128_proc.o stereocompressor.o resampling_queue.o flags.o correlation_measurer.o filter.o input_mapping.o state.pb.o -OBJS += mixer.o pbo_frame_allocator.o context.o ref_counted_frame.o theme.o httpd.o flags.o image_input.o alsa_output.o disk_space_estimator.o print_latency.o $(AUDIO_MIXER_OBJS) +OBJS += chroma_subsampler.o mixer.o pbo_frame_allocator.o context.o ref_counted_frame.o theme.o httpd.o flags.o image_input.o alsa_output.o disk_space_estimator.o print_latency.o $(AUDIO_MIXER_OBJS) # Streaming and encoding objects OBJS += quicksync_encoder.o x264_encoder.o x264_speed_control.o video_encoder.o metacube2.o mux.o audio_encoder.o ffmpeg_raii.o diff --git a/chroma_subsampler.cpp b/chroma_subsampler.cpp new file mode 100644 index 0000000..b75facd --- /dev/null +++ b/chroma_subsampler.cpp @@ -0,0 +1,188 @@ +#include "chroma_subsampler.h" + +#include + +#include +#include +#include + +using namespace movit; +using namespace std; + +ChromaSubsampler::ChromaSubsampler(ResourcePool *resource_pool) + : resource_pool(resource_pool) +{ + // Set up stuff for NV12 conversion. + // + // Note: Due to the horizontally co-sited chroma/luma samples in H.264 + // (chrome position is left for horizontal and center for vertical), + // we need to be a bit careful in our subsampling. A diagram will make + // this clearer, showing some luma and chroma samples: + // + // a b c d + // +---+---+---+---+ + // | | | | | + // | Y | Y | Y | Y | + // | | | | | + // +---+---+---+---+ + // + // +-------+-------+ + // | | | + // | C | C | + // | | | + // +-------+-------+ + // + // Clearly, the rightmost chroma sample here needs to be equivalent to + // b/4 + c/2 + d/4. (We could also implement more sophisticated filters, + // of course, but as long as the upsampling is not going to be equally + // sophisticated, it's probably not worth it.) If we sample once with + // no mipmapping, we get just c, ie., no actual filtering in the + // horizontal direction. (For the vertical direction, we can just + // sample in the middle to get the right filtering.) One could imagine + // we could use mipmapping (assuming we can create mipmaps cheaply), + // but then, what we'd get is this: + // + // (a+b)/2 (c+d)/2 + // +-------+-------+ + // | | | + // | Y | Y | + // | | | + // +-------+-------+ + // + // +-------+-------+ + // | | | + // | C | C | + // | | | + // +-------+-------+ + // + // which ends up sampling equally from a and b, which clearly isn't right. Instead, + // we need to do two (non-mipmapped) chroma samples, both hitting exactly in-between + // source pixels. + // + // Sampling in-between b and c gives us the sample (b+c)/2, and similarly for c and d. + // Taking the average of these gives of (b+c)/4 + (c+d)/4 = b/4 + c/2 + d/4, which is + // exactly what we want. + // + // See also http://www.poynton.com/PDFs/Merging_RGB_and_422.pdf, pages 6–7. + + // Cb/Cr shader. + string cbcr_vert_shader = + "#version 130 \n" + " \n" + "in vec2 position; \n" + "in vec2 texcoord; \n" + "out vec2 tc0, tc1; \n" + "uniform vec2 foo_chroma_offset_0; \n" + "uniform vec2 foo_chroma_offset_1; \n" + " \n" + "void main() \n" + "{ \n" + " // The result of glOrtho(0.0, 1.0, 0.0, 1.0, 0.0, 1.0) is: \n" + " // \n" + " // 2.000 0.000 0.000 -1.000 \n" + " // 0.000 2.000 0.000 -1.000 \n" + " // 0.000 0.000 -2.000 -1.000 \n" + " // 0.000 0.000 0.000 1.000 \n" + " gl_Position = vec4(2.0 * position.x - 1.0, 2.0 * position.y - 1.0, -1.0, 1.0); \n" + " vec2 flipped_tc = texcoord; \n" + " tc0 = flipped_tc + foo_chroma_offset_0; \n" + " tc1 = flipped_tc + foo_chroma_offset_1; \n" + "} \n"; + string cbcr_frag_shader = + "#version 130 \n" + "in vec2 tc0, tc1; \n" + "uniform sampler2D cbcr_tex; \n" + "out vec4 FragColor; \n" + "void main() { \n" + " FragColor = 0.5 * (texture(cbcr_tex, tc0) + texture(cbcr_tex, tc1)); \n" + "} \n"; + vector frag_shader_outputs; + cbcr_program_num = resource_pool->compile_glsl_program(cbcr_vert_shader, cbcr_frag_shader, frag_shader_outputs); + check_error(); + + float vertices[] = { + 0.0f, 2.0f, + 0.0f, 0.0f, + 2.0f, 0.0f + }; + cbcr_vbo = generate_vbo(2, GL_FLOAT, sizeof(vertices), vertices); + check_error(); + cbcr_texture_sampler_uniform = glGetUniformLocation(cbcr_program_num, "cbcr_tex"); + check_error(); + cbcr_position_attribute_index = glGetAttribLocation(cbcr_program_num, "position"); + check_error(); + cbcr_texcoord_attribute_index = glGetAttribLocation(cbcr_program_num, "texcoord"); + check_error(); +} + +ChromaSubsampler::~ChromaSubsampler() +{ + resource_pool->release_glsl_program(cbcr_program_num); + check_error(); + glDeleteBuffers(1, &cbcr_vbo); + check_error(); +} + +void ChromaSubsampler::subsample_chroma(GLuint cbcr_tex, unsigned width, unsigned height, GLuint dst_tex) +{ + GLuint vao; + glGenVertexArrays(1, &vao); + check_error(); + + glBindVertexArray(vao); + check_error(); + + // Extract Cb/Cr. + GLuint fbo = resource_pool->create_fbo(dst_tex); + glBindFramebuffer(GL_FRAMEBUFFER, fbo); + glViewport(0, 0, width/2, height/2); + check_error(); + + glUseProgram(cbcr_program_num); + check_error(); + + glActiveTexture(GL_TEXTURE0); + check_error(); + glBindTexture(GL_TEXTURE_2D, cbcr_tex); + check_error(); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + check_error(); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + check_error(); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + check_error(); + + float chroma_offset_0[] = { -1.0f / width, 0.0f }; + float chroma_offset_1[] = { -0.0f / width, 0.0f }; + set_uniform_vec2(cbcr_program_num, "foo", "chroma_offset_0", chroma_offset_0); + set_uniform_vec2(cbcr_program_num, "foo", "chroma_offset_1", chroma_offset_1); + + glUniform1i(cbcr_texture_sampler_uniform, 0); + + glBindBuffer(GL_ARRAY_BUFFER, cbcr_vbo); + check_error(); + + for (GLint attr_index : { cbcr_position_attribute_index, cbcr_texcoord_attribute_index }) { + glEnableVertexAttribArray(attr_index); + check_error(); + glVertexAttribPointer(attr_index, 2, GL_FLOAT, GL_FALSE, 0, BUFFER_OFFSET(0)); + check_error(); + } + + glDrawArrays(GL_TRIANGLES, 0, 3); + check_error(); + + for (GLint attr_index : { cbcr_position_attribute_index, cbcr_texcoord_attribute_index }) { + glDisableVertexAttribArray(attr_index); + check_error(); + } + + glUseProgram(0); + check_error(); + glBindFramebuffer(GL_FRAMEBUFFER, 0); + check_error(); + + resource_pool->release_fbo(fbo); + glDeleteVertexArrays(1, &vao); + check_error(); +} diff --git a/chroma_subsampler.h b/chroma_subsampler.h new file mode 100644 index 0000000..ea2b781 --- /dev/null +++ b/chroma_subsampler.h @@ -0,0 +1,31 @@ +#ifndef _CHROMA_SUBSAMPLER_H +#define _CHROMA_SUBSAMPLER_H 1 + +#include + +namespace movit { + +class ResourcePool; + +} // namespace movit + +class ChromaSubsampler { +public: + ChromaSubsampler(movit::ResourcePool *resource_pool); + ~ChromaSubsampler(); + + // Subsamples chroma (packed Cb and Cr) 2x2 to yield chroma suitable for + // NV12 (semiplanar 4:2:0). Chroma positioning is left/center (H.264 convention). + // width and height are the dimensions (in pixels) of the input texture. + void subsample_chroma(GLuint cbcr_tex, unsigned width, unsigned height, GLuint dst_tex); + +private: + movit::ResourcePool *resource_pool; + + GLuint cbcr_program_num; // Owned by . + GLuint cbcr_texture_sampler_uniform; + GLuint cbcr_vbo; // Holds position and texcoord data. + GLuint cbcr_position_attribute_index, cbcr_texcoord_attribute_index; +}; + +#endif // !defined(_CHROMA_SUBSAMPLER_H) diff --git a/mixer.cpp b/mixer.cpp index 875fc6f..d544b0b 100644 --- a/mixer.cpp +++ b/mixer.cpp @@ -10,7 +10,6 @@ #include #include #include -#include #include #include #include @@ -33,6 +32,7 @@ #include "alsa_output.h" #include "bmusb/bmusb.h" #include "bmusb/fake_capture.h" +#include "chroma_subsampler.h" #include "context.h" #include "decklink_capture.h" #include "defs.h" @@ -190,102 +190,7 @@ Mixer::Mixer(const QSurfaceFormat &format, unsigned num_cards) cards[card_index].queue_length_policy.reset(card_index); } - // Set up stuff for NV12 conversion. - // - // Note: Due to the horizontally co-sited chroma/luma samples in H.264 - // (chrome position is left for horizontal and center for vertical), - // we need to be a bit careful in our subsampling. A diagram will make - // this clearer, showing some luma and chroma samples: - // - // a b c d - // +---+---+---+---+ - // | | | | | - // | Y | Y | Y | Y | - // | | | | | - // +---+---+---+---+ - // - // +-------+-------+ - // | | | - // | C | C | - // | | | - // +-------+-------+ - // - // Clearly, the rightmost chroma sample here needs to be equivalent to - // b/4 + c/2 + d/4. (We could also implement more sophisticated filters, - // of course, but as long as the upsampling is not going to be equally - // sophisticated, it's probably not worth it.) If we sample once with - // no mipmapping, we get just c, ie., no actual filtering in the - // horizontal direction. (For the vertical direction, we can just - // sample in the middle to get the right filtering.) One could imagine - // we could use mipmapping (assuming we can create mipmaps cheaply), - // but then, what we'd get is this: - // - // (a+b)/2 (c+d)/2 - // +-------+-------+ - // | | | - // | Y | Y | - // | | | - // +-------+-------+ - // - // +-------+-------+ - // | | | - // | C | C | - // | | | - // +-------+-------+ - // - // which ends up sampling equally from a and b, which clearly isn't right. Instead, - // we need to do two (non-mipmapped) chroma samples, both hitting exactly in-between - // source pixels. - // - // Sampling in-between b and c gives us the sample (b+c)/2, and similarly for c and d. - // Taking the average of these gives of (b+c)/4 + (c+d)/4 = b/4 + c/2 + d/4, which is - // exactly what we want. - // - // See also http://www.poynton.com/PDFs/Merging_RGB_and_422.pdf, pages 6–7. - - // Cb/Cr shader. - string cbcr_vert_shader = - "#version 130 \n" - " \n" - "in vec2 position; \n" - "in vec2 texcoord; \n" - "out vec2 tc0, tc1; \n" - "uniform vec2 foo_chroma_offset_0; \n" - "uniform vec2 foo_chroma_offset_1; \n" - " \n" - "void main() \n" - "{ \n" - " // The result of glOrtho(0.0, 1.0, 0.0, 1.0, 0.0, 1.0) is: \n" - " // \n" - " // 2.000 0.000 0.000 -1.000 \n" - " // 0.000 2.000 0.000 -1.000 \n" - " // 0.000 0.000 -2.000 -1.000 \n" - " // 0.000 0.000 0.000 1.000 \n" - " gl_Position = vec4(2.0 * position.x - 1.0, 2.0 * position.y - 1.0, -1.0, 1.0); \n" - " vec2 flipped_tc = texcoord; \n" - " tc0 = flipped_tc + foo_chroma_offset_0; \n" - " tc1 = flipped_tc + foo_chroma_offset_1; \n" - "} \n"; - string cbcr_frag_shader = - "#version 130 \n" - "in vec2 tc0, tc1; \n" - "uniform sampler2D cbcr_tex; \n" - "out vec4 FragColor; \n" - "void main() { \n" - " FragColor = 0.5 * (texture(cbcr_tex, tc0) + texture(cbcr_tex, tc1)); \n" - "} \n"; - vector frag_shader_outputs; - cbcr_program_num = resource_pool->compile_glsl_program(cbcr_vert_shader, cbcr_frag_shader, frag_shader_outputs); - - float vertices[] = { - 0.0f, 2.0f, - 0.0f, 0.0f, - 2.0f, 0.0f - }; - cbcr_vbo = generate_vbo(2, GL_FLOAT, sizeof(vertices), vertices); - cbcr_texture_sampler_uniform = glGetUniformLocation(cbcr_program_num, "cbcr_tex"); - cbcr_position_attribute_index = glGetAttribLocation(cbcr_program_num, "position"); - cbcr_texcoord_attribute_index = glGetAttribLocation(cbcr_program_num, "texcoord"); + chroma_subsampler.reset(new ChromaSubsampler(resource_pool.get())); if (global_flags.enable_alsa_output) { alsa.reset(new ALSAOutput(OUTPUT_FREQUENCY, /*num_channels=*/2)); @@ -294,8 +199,6 @@ Mixer::Mixer(const QSurfaceFormat &format, unsigned num_cards) Mixer::~Mixer() { - resource_pool->release_glsl_program(cbcr_program_num); - glDeleteBuffers(1, &cbcr_vbo); BMUSBCapture::stop_bm_thread(); for (unsigned card_index = 0; card_index < num_cards; ++card_index) { @@ -877,7 +780,7 @@ void Mixer::render_one_frame(int64_t duration) chain->render_to_fbo(fbo, global_flags.width, global_flags.height); resource_pool->release_fbo(fbo); - subsample_chroma(cbcr_full_tex, cbcr_tex); + chroma_subsampler->subsample_chroma(cbcr_full_tex, global_flags.width, global_flags.height, cbcr_tex); resource_pool->release_2d_texture(cbcr_full_tex); // Set the right state for rgba_tex. @@ -941,73 +844,11 @@ void Mixer::audio_thread_func() if (alsa) { alsa->write(samples_out); } + decklink_output->send_audio(task.pts_int, samples_out); video_encoder->add_audio(task.pts_int, move(samples_out)); } } -void Mixer::subsample_chroma(GLuint src_tex, GLuint dst_tex) -{ - GLuint vao; - glGenVertexArrays(1, &vao); - check_error(); - - glBindVertexArray(vao); - check_error(); - - // Extract Cb/Cr. - GLuint fbo = resource_pool->create_fbo(dst_tex); - glBindFramebuffer(GL_FRAMEBUFFER, fbo); - glViewport(0, 0, global_flags.width/2, global_flags.height/2); - check_error(); - - glUseProgram(cbcr_program_num); - check_error(); - - glActiveTexture(GL_TEXTURE0); - check_error(); - glBindTexture(GL_TEXTURE_2D, src_tex); - check_error(); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); - check_error(); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); - check_error(); - glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - check_error(); - - float chroma_offset_0[] = { -1.0f / global_flags.width, 0.0f }; - float chroma_offset_1[] = { -0.0f / global_flags.width, 0.0f }; - set_uniform_vec2(cbcr_program_num, "foo", "chroma_offset_0", chroma_offset_0); - set_uniform_vec2(cbcr_program_num, "foo", "chroma_offset_1", chroma_offset_1); - - glUniform1i(cbcr_texture_sampler_uniform, 0); - - glBindBuffer(GL_ARRAY_BUFFER, cbcr_vbo); - check_error(); - - for (GLint attr_index : { cbcr_position_attribute_index, cbcr_texcoord_attribute_index }) { - glEnableVertexAttribArray(attr_index); - check_error(); - glVertexAttribPointer(attr_index, 2, GL_FLOAT, GL_FALSE, 0, BUFFER_OFFSET(0)); - check_error(); - } - - glDrawArrays(GL_TRIANGLES, 0, 3); - check_error(); - - for (GLint attr_index : { cbcr_position_attribute_index, cbcr_texcoord_attribute_index }) { - glDisableVertexAttribArray(attr_index); - check_error(); - } - - glUseProgram(0); - check_error(); - glBindFramebuffer(GL_FRAMEBUFFER, 0); - check_error(); - - resource_pool->release_fbo(fbo); - glDeleteVertexArrays(1, &vao); -} - void Mixer::release_display_frame(DisplayFrame *frame) { for (GLuint texnum : frame->temp_textures) { diff --git a/mixer.h b/mixer.h index 28b190b..0e1d295 100644 --- a/mixer.h +++ b/mixer.h @@ -37,7 +37,9 @@ #include "video_encoder.h" class ALSAOutput; +class ChromaSubsampler; class QSurface; +class QSurfaceFormat; namespace movit { class Effect; @@ -45,7 +47,6 @@ class EffectChain; class FlatInput; class ResourcePool; } // namespace movit -class QSurfaceFormat; // For any card that's not the master (where we pick out the frames as they // come, as fast as we can process), there's going to be a queue. The question @@ -296,7 +297,6 @@ private: void schedule_audio_resampling_tasks(unsigned dropped_frames, int num_samples_per_frame, int length_per_frame); void render_one_frame(int64_t duration); void audio_thread_func(); - void subsample_chroma(GLuint src_tex, GLuint dst_dst); void release_display_frame(DisplayFrame *frame); double pts() { return double(pts_int) / TIMEBASE; } @@ -309,10 +309,7 @@ private: std::atomic audio_source_channel{0}; std::atomic master_clock_channel{0}; std::unique_ptr display_chain; - GLuint cbcr_program_num; // Owned by . - GLuint cbcr_texture_sampler_uniform; - GLuint cbcr_vbo; // Holds position and texcoord data. - GLuint cbcr_position_attribute_index, cbcr_texcoord_attribute_index; + std::unique_ptr chroma_subsampler; std::unique_ptr video_encoder; // Effects part of . Owned by . diff --git a/theme.cpp b/theme.cpp index afe511c..ef63576 100644 --- a/theme.cpp +++ b/theme.cpp @@ -262,7 +262,7 @@ int EffectChain_finalize(lua_State* L) if (is_main_chain) { YCbCrFormat output_ycbcr_format; // We actually output 4:2:0 in the end, but chroma subsampling - // happens in a pass not run by Movit (see Mixer::subsample_chroma()). + // happens in a pass not run by Movit (see ChromaSubsampler::subsample_chroma()). output_ycbcr_format.chroma_subsampling_x = 1; output_ycbcr_format.chroma_subsampling_y = 1; -- 2.39.2