#include "chroma_subsampler.h"
+#include "v210_converter.h"
#include <vector>
};
vbo = generate_vbo(2, GL_FLOAT, sizeof(vertices), vertices);
check_error();
+
+ // v210 compute shader.
+ if (v210Converter::has_hardware_support()) {
+ string v210_shader_src = R"(#version 150
+#extension GL_ARB_compute_shader : enable
+#extension GL_ARB_shader_image_load_store : enable
+layout(local_size_x=2, local_size_y=16) in;
+layout(r16) uniform restrict readonly image2D in_y;
+uniform sampler2D in_cbcr; // Of type RG16.
+layout(rgb10_a2) uniform restrict writeonly image2D outbuf;
+uniform float inv_width, inv_height;
+
+void main()
+{
+ int xb = int(gl_GlobalInvocationID.x); // X block number.
+ int y = int(gl_GlobalInvocationID.y); // Y (actual line).
+ float yf = (gl_GlobalInvocationID.y + 0.5f) * inv_height; // Y float coordinate.
+
+ // Load and scale CbCr values, sampling in-between the texels to get
+ // to (left/4 + center/2 + right/4).
+ vec2 pix_cbcr[3];
+ for (int i = 0; i < 3; ++i) {
+ vec2 a = texture(in_cbcr, vec2((xb * 6 + i * 2) * inv_width, yf)).xy;
+ vec2 b = texture(in_cbcr, vec2((xb * 6 + i * 2 + 1) * inv_width, yf)).xy;
+ pix_cbcr[i] = (a + b) * (0.5 * 65535.0 / 1023.0);
+ }
+
+ // Load and scale the Y values. Note that we use integer coordinates here,
+ // so we don't need to offset by 0.5.
+ float pix_y[6];
+ for (int i = 0; i < 6; ++i) {
+ pix_y[i] = imageLoad(in_y, ivec2(xb * 6 + i, y)).x * (65535.0 / 1023.0);
+ }
+
+ imageStore(outbuf, ivec2(xb * 4 + 0, y), vec4(pix_cbcr[0].x, pix_y[0], pix_cbcr[0].y, 1.0));
+ imageStore(outbuf, ivec2(xb * 4 + 1, y), vec4(pix_y[1], pix_cbcr[1].x, pix_y[2], 1.0));
+ imageStore(outbuf, ivec2(xb * 4 + 2, y), vec4(pix_cbcr[1].y, pix_y[3], pix_cbcr[2].x, 1.0));
+ imageStore(outbuf, ivec2(xb * 4 + 3, y), vec4(pix_y[4], pix_cbcr[2].y, pix_y[5], 1.0));
+}
+)";
+ GLuint shader_num = movit::compile_shader(v210_shader_src, GL_COMPUTE_SHADER);
+ check_error();
+ v210_program_num = glCreateProgram();
+ check_error();
+ glAttachShader(v210_program_num, shader_num);
+ check_error();
+ glLinkProgram(v210_program_num);
+ check_error();
+
+ GLint success;
+ glGetProgramiv(v210_program_num, GL_LINK_STATUS, &success);
+ check_error();
+ if (success == GL_FALSE) {
+ GLchar error_log[1024] = {0};
+ glGetProgramInfoLog(v210_program_num, 1024, NULL, error_log);
+ fprintf(stderr, "Error linking program: %s\n", error_log);
+ exit(1);
+ }
+
+ v210_in_y_pos = glGetUniformLocation(v210_program_num, "in_y");
+ check_error();
+ v210_in_cbcr_pos = glGetUniformLocation(v210_program_num, "in_cbcr");
+ check_error();
+ v210_outbuf_pos = glGetUniformLocation(v210_program_num, "outbuf");
+ check_error();
+ v210_inv_width_pos = glGetUniformLocation(v210_program_num, "inv_width");
+ check_error();
+ v210_inv_height_pos = glGetUniformLocation(v210_program_num, "inv_height");
+ check_error();
+ } else {
+ v210_program_num = 0;
+ }
}
ChromaSubsampler::~ChromaSubsampler()
check_error();
glDeleteBuffers(1, &vbo);
check_error();
+ if (v210_program_num != 0) {
+ glDeleteProgram(v210_program_num);
+ check_error();
+ }
}
void ChromaSubsampler::subsample_chroma(GLuint cbcr_tex, unsigned width, unsigned height, GLuint dst_tex, GLuint dst2_tex)
resource_pool->release_fbo(fbo);
glDeleteVertexArrays(1, &vao);
}
+
+void ChromaSubsampler::create_v210(GLuint y_tex, GLuint cbcr_tex, unsigned width, unsigned height, GLuint dst_tex)
+{
+ assert(v210_program_num != 0);
+
+ glUseProgram(v210_program_num);
+ check_error();
+
+ glUniform1i(v210_in_y_pos, 0);
+ check_error();
+ glUniform1i(v210_in_cbcr_pos, 1);
+ check_error();
+ glUniform1i(v210_outbuf_pos, 2);
+ check_error();
+ glUniform1f(v210_inv_width_pos, 1.0 / width);
+ check_error();
+ glUniform1f(v210_inv_height_pos, 1.0 / height);
+ check_error();
+
+ glActiveTexture(GL_TEXTURE0);
+ check_error();
+ glBindTexture(GL_TEXTURE_2D, y_tex); // We don't actually need to bind it, but we need to set the state.
+ check_error();
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+ check_error();
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
+ check_error();
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
+ check_error();
+ glBindImageTexture(0, y_tex, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R16); // This is the real bind.
+ check_error();
+
+ glActiveTexture(GL_TEXTURE1);
+ check_error();
+ glBindTexture(GL_TEXTURE_2D, cbcr_tex);
+ check_error();
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+ check_error();
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
+ check_error();
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
+ check_error();
+
+ glBindImageTexture(2, dst_tex, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_RGB10_A2);
+ check_error();
+
+ // Actually run the shader. We use workgroups of size 2x16 threadst , and each thread
+ // processes 6x1 input pixels, so round up to number of 12x16 pixel blocks.
+ glDispatchCompute((width + 11) / 12, (height + 15) / 16, 1);
+
+ glBindTexture(GL_TEXTURE_2D, 0);
+ check_error();
+ glActiveTexture(GL_TEXTURE0);
+ check_error();
+ glUseProgram(0);
+ check_error();
+}
// width and height are the dimensions (in pixels) of the input textures.
void create_uyvy(GLuint y_tex, GLuint cbcr_tex, unsigned width, unsigned height, GLuint dst_tex);
+ // Subsamples and interleaves luma and chroma to give 10-bit 4:2:2
+ // packed Y'CbCr (v210); see v210converter.h for more information on
+ // the format. Luma and chroma are assumed to be 10-bit data packed
+ // into 16-bit textures. Chroma positioning is left (H.264 convention).
+ // width and height are the dimensions (in pixels) of the input textures;
+ // Requires compute shaders; check v210Converter::has_hardware_support().
+ void create_v210(GLuint y_tex, GLuint cbcr_tex, unsigned width, unsigned height, GLuint dst_tex);
+
private:
movit::ResourcePool *resource_pool;
GLuint uyvy_program_num; // Owned by <resource_pool>.
GLuint uyvy_y_texture_sampler_uniform, uyvy_cbcr_texture_sampler_uniform;
GLuint uyvy_position_attribute_index, uyvy_texcoord_attribute_index;
+
+ GLuint v210_program_num; // Compute shader, so owned by ourselves. Can be 0.
+ GLuint v210_in_y_pos, v210_in_cbcr_pos, v210_outbuf_pos;
+ GLuint v210_inv_width_pos, v210_inv_height_pos;
};
#endif // !defined(_CHROMA_SUBSAMPLER_H)
#include "print_latency.h"
#include "resource_pool.h"
#include "timebase.h"
+#include "v210_converter.h"
using namespace movit;
using namespace std;
BMDDisplayModeSupport support;
IDeckLinkDisplayMode *display_mode;
- if (output->DoesSupportVideoMode(mode, bmdFormat8BitYUV, bmdVideoOutputFlagDefault,
+ BMDPixelFormat pixel_format = global_flags.ten_bit_output ? bmdFormat10BitYUV : bmdFormat8BitYUV;
+ if (output->DoesSupportVideoMode(mode, pixel_format, bmdVideoOutputFlagDefault,
&support, &display_mode) != S_OK) {
fprintf(stderr, "Couldn't ask for format support\n");
exit(1);
}
unique_ptr<Frame> frame = move(get_frame());
- chroma_subsampler->create_uyvy(y_tex, cbcr_tex, width, height, frame->uyvy_tex);
+ if (global_flags.ten_bit_output) {
+ chroma_subsampler->create_v210(y_tex, cbcr_tex, width, height, frame->uyvy_tex);
+ } else {
+ chroma_subsampler->create_uyvy(y_tex, cbcr_tex, width, height, frame->uyvy_tex);
+ }
// Download the UYVY texture to the PBO.
glPixelStorei(GL_PACK_ROW_LENGTH, 0);
glBindBuffer(GL_PIXEL_PACK_BUFFER, frame->pbo);
check_error();
- glBindTexture(GL_TEXTURE_2D, frame->uyvy_tex);
- check_error();
- glGetTexImage(GL_TEXTURE_2D, 0, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, BUFFER_OFFSET(0));
- check_error();
+ if (global_flags.ten_bit_output) {
+ glBindTexture(GL_TEXTURE_2D, frame->uyvy_tex);
+ check_error();
+ glGetTexImage(GL_TEXTURE_2D, 0, GL_RGBA, GL_UNSIGNED_INT_2_10_10_10_REV, BUFFER_OFFSET(0));
+ check_error();
+ } else {
+ glBindTexture(GL_TEXTURE_2D, frame->uyvy_tex);
+ check_error();
+ glGetTexImage(GL_TEXTURE_2D, 0, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, BUFFER_OFFSET(0));
+ check_error();
+ }
glBindTexture(GL_TEXTURE_2D, 0);
check_error();
unique_ptr<Frame> frame(new Frame);
- frame->uyvy_tex = resource_pool->create_2d_texture(GL_RGBA8, width / 2, height);
+ size_t stride;
+ if (global_flags.ten_bit_output) {
+ stride = v210Converter::get_v210_stride(width);
+ GLint v210_width = stride / sizeof(uint32_t);
+ frame->uyvy_tex = resource_pool->create_2d_texture(GL_RGB10_A2, v210_width, height);
+
+ // We need valid texture state, or NVIDIA won't allow us to write to the texture.
+ glBindTexture(GL_TEXTURE_2D, frame->uyvy_tex);
+ check_error();
+ glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+ check_error();
+ } else {
+ stride = width * 2;
+ frame->uyvy_tex = resource_pool->create_2d_texture(GL_RGBA8, width / 2, height);
+ }
glGenBuffers(1, &frame->pbo);
check_error();
glBindBuffer(GL_PIXEL_PACK_BUFFER, frame->pbo);
check_error();
- glBufferStorage(GL_PIXEL_PACK_BUFFER, width * height * 2, NULL, GL_MAP_READ_BIT | GL_MAP_PERSISTENT_BIT);
+ glBufferStorage(GL_PIXEL_PACK_BUFFER, stride * height, NULL, GL_MAP_READ_BIT | GL_MAP_PERSISTENT_BIT);
check_error();
- frame->uyvy_ptr = (uint8_t *)glMapBufferRange(GL_PIXEL_PACK_BUFFER, 0, width * height * 2, GL_MAP_READ_BIT | GL_MAP_PERSISTENT_BIT);
+ frame->uyvy_ptr = (uint8_t *)glMapBufferRange(GL_PIXEL_PACK_BUFFER, 0, stride * height, GL_MAP_READ_BIT | GL_MAP_PERSISTENT_BIT);
check_error();
- frame->uyvy_ptr_local.reset(new uint8_t[width * height * 2]);
+ frame->uyvy_ptr_local.reset(new uint8_t[stride * height]);
frame->resource_pool = resource_pool;
return frame;
check_error();
frame->fence.reset();
- memcpy(frame->uyvy_ptr_local.get(), frame->uyvy_ptr, width * height * 2);
+ if (global_flags.ten_bit_output) {
+ memcpy(frame->uyvy_ptr_local.get(), frame->uyvy_ptr, v210Converter::get_v210_stride(width) * height);
+ } else {
+ memcpy(frame->uyvy_ptr_local.get(), frame->uyvy_ptr, width * height * 2);
+ }
// Release any input frames we needed to render this frame.
frame->input_frames.clear();
long DeckLinkOutput::Frame::GetRowBytes()
{
- return global_flags.width * 2;
+ if (global_flags.ten_bit_output) {
+ return v210Converter::get_v210_stride(global_flags.width);
+ } else {
+ return global_flags.width * 2;
+ }
}
BMDPixelFormat DeckLinkOutput::Frame::GetPixelFormat()
{
- return bmdFormat8BitYUV;
+ if (global_flags.ten_bit_output) {
+ return bmdFormat10BitYUV;
+ } else {
+ return bmdFormat8BitYUV;
+ }
}
BMDFrameFlags DeckLinkOutput::Frame::GetFlags()
movit::ResourcePool *resource_pool;
// These members are persistently allocated, and reused when the frame object is.
- GLuint uyvy_tex; // Owned by <resource_pool>.
+ GLuint uyvy_tex; // Owned by <resource_pool>. Can also hold v210 data.
GLuint pbo;
uint8_t *uyvy_ptr; // Persistent mapping into the PBO.
OPTION_X264_BITRATE,
OPTION_X264_VBV_BUFSIZE,
OPTION_X264_VBV_MAX_BITRATE,
- OPTION_X264_10_BIT,
OPTION_X264_PARAM,
OPTION_HTTP_MUX,
OPTION_HTTP_COARSE_TIMEBASE,
OPTION_TIMECODE_STREAM,
OPTION_TIMECODE_STDOUT,
OPTION_10_BIT_INPUT,
+ OPTION_10_BIT_OUTPUT,
};
void usage()
fprintf(stderr, " default: same as --x264-bitrate, that is, one-second VBV)\n");
fprintf(stderr, " --x264-vbv-max-bitrate x264 local max bitrate (in kilobit/sec per --vbv-bufsize,\n");
fprintf(stderr, " 0 = no limit, default: same as --x264-bitrate, i.e., CBR)\n");
- fprintf(stderr, " --x264-10-bit enable 10-bit x264 encoding\n");
fprintf(stderr, " --x264-param=NAME[,VALUE] set any x264 parameter, for fine tuning\n");
fprintf(stderr, " --http-mux=NAME mux to use for HTTP streams (default " DEFAULT_STREAM_MUX_NAME ")\n");
fprintf(stderr, " --http-audio-codec=NAME audio codec to use for HTTP streams\n");
fprintf(stderr, " --timecode-stream show timestamp and timecode in stream\n");
fprintf(stderr, " --timecode-stdout show timestamp and timecode on standard output\n");
fprintf(stderr, " --10-bit-input use 10-bit video input (requires compute shaders)\n");
+ fprintf(stderr, " --10-bit-output use 10-bit video output (requires compute shaders,\n");
+ fprintf(stderr, " implies --record-x264-video)\n");
}
void parse_flags(int argc, char * const argv[])
{ "x264-bitrate", required_argument, 0, OPTION_X264_BITRATE },
{ "x264-vbv-bufsize", required_argument, 0, OPTION_X264_VBV_BUFSIZE },
{ "x264-vbv-max-bitrate", required_argument, 0, OPTION_X264_VBV_MAX_BITRATE },
- { "x264-10-bit", no_argument, 0, OPTION_X264_10_BIT },
{ "x264-param", required_argument, 0, OPTION_X264_PARAM },
{ "http-mux", required_argument, 0, OPTION_HTTP_MUX },
{ "http-coarse-timebase", no_argument, 0, OPTION_HTTP_COARSE_TIMEBASE },
{ "timecode-stream", no_argument, 0, OPTION_TIMECODE_STREAM },
{ "timecode-stdout", no_argument, 0, OPTION_TIMECODE_STDOUT },
{ "10-bit-input", no_argument, 0, OPTION_10_BIT_INPUT },
+ { "10-bit-output", no_argument, 0, OPTION_10_BIT_OUTPUT },
{ 0, 0, 0, 0 }
};
vector<string> theme_dirs;
case OPTION_X264_VBV_BUFSIZE:
global_flags.x264_vbv_buffer_size = atoi(optarg);
break;
- case OPTION_X264_10_BIT:
- global_flags.x264_bit_depth = 10;
- break;
case OPTION_X264_VBV_MAX_BITRATE:
global_flags.x264_vbv_max_bitrate = atoi(optarg);
break;
case OPTION_10_BIT_INPUT:
global_flags.ten_bit_input = true;
break;
+ case OPTION_10_BIT_OUTPUT:
+ global_flags.ten_bit_output = true;
+ global_flags.x264_video_to_disk = true;
+ global_flags.x264_video_to_http = true;
+ global_flags.x264_bit_depth = 10;
+ break;
case OPTION_HELP:
usage();
exit(0);
int x264_bitrate = DEFAULT_X264_OUTPUT_BIT_RATE; // In kilobit/sec.
int x264_vbv_max_bitrate = -1; // In kilobits. 0 = no limit, -1 = same as <x264_bitrate> (CBR).
int x264_vbv_buffer_size = -1; // In kilobits. 0 = one-frame VBV, -1 = same as <x264_bitrate> (one-second VBV).
- int x264_bit_depth = 8;
std::vector<std::string> x264_extra_param; // In “key[,value]” format.
bool enable_alsa_output = true;
std::map<int, int> default_stream_mapping;
bool display_timecode_in_stream = false;
bool display_timecode_on_stdout = false;
bool ten_bit_input = false;
+ bool ten_bit_output = false; // Implies x264_video_to_disk == true and x264_bit_depth == 10.
+ int x264_bit_depth = 8; // Not user-settable.
};
extern Flags global_flags;
v210_converter->precompile_shader(3840);
v210_converter->precompile_shader(4096);
}
+ if (global_flags.ten_bit_output) {
+ if (!v210Converter::has_hardware_support()) {
+ fprintf(stderr, "ERROR: --ten-bit-output requires support for OpenGL compute shaders\n");
+ fprintf(stderr, " (OpenGL 4.3, or GL_ARB_compute_shader + GL_ARB_shader_image_load_store).\n");
+ exit(1);
+ }
+ }
timecode_renderer.reset(new TimecodeRenderer(resource_pool.get(), global_flags.width, global_flags.height));
display_timecode_in_stream = global_flags.display_timecode_in_stream;