#version 450 core
in vec3 tc;
+
+#ifdef SPLIT_YCBCR_OUTPUT
+out float Y;
+out vec2 CbCr;
+#else
out vec4 rgba;
+#endif
uniform sampler2DArray image_tex;
uniform sampler2D flow_tex;
// Same for d1.
float d1 = (1.0f - alpha) * length(size * (texture(flow_tex, vec2(tc.xy + (1.0f - alpha) * flow)).xy - flow));
+ vec4 result;
if (max(d0, d1) < 3.0f) { // Arbitrary constant, not all that tuned. The UW paper says 1.0 is fine for ground truth.
// Both are visible, so blend.
- rgba = I_0 + alpha * (I_1 - I_0);
+ result = I_0 + alpha * (I_1 - I_0);
} else if (d0 < d1) {
- rgba = I_0;
+ result = I_0;
} else {
- rgba = I_1;
+ result = I_1;
}
+#ifdef SPLIT_YCBCR_OUTPUT
+ Y = result.r;
+ CbCr = result.gb;
+#else
+ rgba = result;
+#endif
}
glDisable(GL_DEPTH_TEST);
}
-Blend::Blend()
+Blend::Blend(bool split_ycbcr_output)
+ : split_ycbcr_output(split_ycbcr_output)
{
+ string frag_shader = read_file("blend.frag");
+ if (split_ycbcr_output) {
+ // Insert after the first #version line.
+ size_t offset = frag_shader.find('\n');
+ assert(offset != string::npos);
+ frag_shader = frag_shader.substr(0, offset + 1) + "#define SPLIT_YCBCR_OUTPUT 1\n" + frag_shader.substr(offset + 1);
+ }
+
blend_vs_obj = compile_shader(read_file("vs.vert"), GL_VERTEX_SHADER);
- blend_fs_obj = compile_shader(read_file("blend.frag"), GL_FRAGMENT_SHADER);
+ blend_fs_obj = compile_shader(frag_shader, GL_FRAGMENT_SHADER);
blend_program = link_program(blend_vs_obj, blend_fs_obj);
uniform_image_tex = glGetUniformLocation(blend_program, "image_tex");
uniform_flow_consistency_tolerance = glGetUniformLocation(blend_program, "flow_consistency_tolerance");
}
-void Blend::exec(GLuint image_tex, GLuint flow_tex, GLuint output_tex, int level_width, int level_height, float alpha)
+void Blend::exec(GLuint image_tex, GLuint flow_tex, GLuint output_tex, GLuint output2_tex, int level_width, int level_height, float alpha)
{
glUseProgram(blend_program);
bind_sampler(blend_program, uniform_image_tex, 0, image_tex, linear_sampler);
glProgramUniform1f(blend_program, uniform_alpha, alpha);
glViewport(0, 0, level_width, level_height);
- fbos.render_to(output_tex);
+ if (split_ycbcr_output) {
+ fbos_split.render_to(output_tex, output2_tex);
+ } else {
+ fbos.render_to(output_tex);
+ }
glDisable(GL_BLEND); // A bit ironic, perhaps.
glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
}
-Interpolate::Interpolate(int width, int height, const OperatingPoint &op)
- : width(width), height(height), flow_level(op.finest_level), op(op), splat(op) {
+Interpolate::Interpolate(int width, int height, const OperatingPoint &op, bool split_ycbcr_output)
+ : width(width),
+ height(height),
+ flow_level(op.finest_level),
+ op(op),
+ split_ycbcr_output(split_ycbcr_output),
+ splat(op),
+ blend(split_ycbcr_output) {
// Set up the vertex data that will be shared between all passes.
float vertices[] = {
0.0f, 1.0f,
glVertexAttribPointer(position_attrib, 2, GL_FLOAT, GL_FALSE, 0, BUFFER_OFFSET(0));
}
-GLuint Interpolate::exec(GLuint image_tex, GLuint gray_tex, GLuint bidirectional_flow_tex, GLuint width, GLuint height, float alpha)
+pair<GLuint, GLuint> Interpolate::exec(GLuint image_tex, GLuint gray_tex, GLuint bidirectional_flow_tex, GLuint width, GLuint height, float alpha)
{
GPUTimers timers;
pool.release_texture(temp_tex[2]);
pool.release_renderbuffer(depth_rb);
- GLuint output_tex = pool.get_texture(GL_RGBA8, width, height);
- {
- ScopedTimer timer("Blend", &total_timer);
- blend.exec(image_tex, flow_tex, output_tex, width, height, alpha);
+ GLuint output_tex, output2_tex = 0;
+ if (split_ycbcr_output) {
+ output_tex = pool.get_texture(GL_R8, width, height);
+ output2_tex = pool.get_texture(GL_RG8, width, height);
+ {
+ ScopedTimer timer("Blend", &total_timer);
+ blend.exec(image_tex, flow_tex, output_tex, output2_tex, width, height, alpha);
+ }
+ } else {
+ output_tex = pool.get_texture(GL_RGBA8, width, height);
+ {
+ ScopedTimer timer("Blend", &total_timer);
+ blend.exec(image_tex, flow_tex, output_tex, 0, width, height, alpha);
+ }
}
pool.release_texture(flow_tex);
total_timer.end();
timers.print();
}
- return output_tex;
+ return make_pair(output_tex, output2_tex);
}
GLuint TexturePool::get_texture(GLenum format, GLuint width, GLuint height, GLuint num_layers)
class Blend {
public:
- Blend();
- void exec(GLuint image_tex, GLuint flow_tex, GLuint output_tex, int width, int height, float alpha);
+ Blend(bool split_ycbcr_output);
+
+ // output2_tex is only used if split_ycbcr_output was true.
+ void exec(GLuint image_tex, GLuint flow_tex, GLuint output_tex, GLuint output2_tex, int width, int height, float alpha);
private:
+ bool split_ycbcr_output;
PersistentFBOSet<1> fbos;
+ PersistentFBOSet<2> fbos_split;
GLuint blend_vs_obj;
GLuint blend_fs_obj;
GLuint blend_program;
class Interpolate {
public:
- Interpolate(int width, int height, const OperatingPoint &op);
+ Interpolate(int width, int height, const OperatingPoint &op, bool split_ycbcr_output);
- // Returns a texture that must be released with release_texture()
- // after use. image_tex must be a two-layer RGBA8 texture with mipmaps
- // (unless flow_level == 0).
- GLuint exec(GLuint image_tex, GLuint gray_tex, GLuint bidirectional_flow_tex, GLuint width, GLuint height, float alpha);
+ // Returns a texture (or two, if split_ycbcr_output is true) that must
+ // be released with release_texture() after use. image_tex must be a
+ // two-layer RGBA8 texture with mipmaps (unless flow_level == 0).
+ std::pair<GLuint, GLuint> exec(GLuint image_tex, GLuint gray_tex, GLuint bidirectional_flow_tex, GLuint width, GLuint height, float alpha);
void release_texture(GLuint tex) {
pool.release_texture(tex);
GLuint vertex_vbo, vao;
TexturePool pool;
const OperatingPoint op;
+ const bool split_ycbcr_output;
Splat splat;
HoleFill hole_fill;
}
DISComputeFlow compute_flow(width1, height1, op);
GrayscaleConversion gray;
- Interpolate interpolate(width1, height1, op);
+ Interpolate interpolate(width1, height1, op, /*split_ycbcr_output=*/false);
GLuint tex_gray;
glCreateTextures(GL_TEXTURE_2D_ARRAY, 1, &tex_gray);
in_warmup = true;
for (int i = 0; i < 10; ++i) {
GLuint bidirectional_flow_tex = compute_flow.exec(tex_gray, DISComputeFlow::FORWARD_AND_BACKWARD, DISComputeFlow::DO_NOT_RESIZE_FLOW);
- GLuint interpolated_tex = interpolate.exec(image_tex, tex_gray, bidirectional_flow_tex, width1, height1, 0.5f);
+ GLuint interpolated_tex = interpolate.exec(image_tex, tex_gray, bidirectional_flow_tex, width1, height1, 0.5f).first;
compute_flow.release_texture(bidirectional_flow_tex);
interpolate.release_texture(interpolated_tex);
}
snprintf(ppm_filename, sizeof(ppm_filename), "interp%04d.ppm", frameno);
float alpha = frameno / 60.0f;
- GLuint interpolated_tex = interpolate.exec(image_tex, tex_gray, bidirectional_flow_tex, width1, height1, alpha);
+ GLuint interpolated_tex = interpolate.exec(image_tex, tex_gray, bidirectional_flow_tex, width1, height1, alpha).first;
schedule_read<RGBAType>(interpolated_tex, width1, height1, filename0, filename1, "", ppm_filename);
interpolate.release_texture(interpolated_tex);
};
static_assert(std::is_standard_layout<VectorDestinationManager>::value, "");
-vector<uint8_t> encode_jpeg(const uint8_t *pixel_data, unsigned width, unsigned height)
+vector<uint8_t> encode_jpeg(const uint8_t *y_data, const uint8_t *cbcr_data, unsigned width, unsigned height)
{
VectorDestinationManager dest;
cinfo.CCIR601_sampling = true; // Seems to be mostly ignored by libjpeg, though.
jpeg_start_compress(&cinfo, true);
- // TODO: Subsample and deinterleave on the GPU.
-
- unique_ptr<uint8_t[]> ydata(new uint8_t[width * 8]);
+ // TODO: Subsample on the GPU.
unique_ptr<uint8_t[]> cbdata(new uint8_t[(width/2) * 8]);
unique_ptr<uint8_t[]> crdata(new uint8_t[(width/2) * 8]);
JSAMPROW yptr[8], cbptr[8], crptr[8];
JSAMPARRAY data[3] = { yptr, cbptr, crptr };
for (unsigned yy = 0; yy < 8; ++yy) {
- yptr[yy] = ydata.get() + yy * width;
cbptr[yy] = cbdata.get() + yy * (width / 2);
crptr[yy] = crdata.get() + yy * (width / 2);
}
for (unsigned y = 0; y < height; y += 8) {
- uint8_t *yptr = ydata.get();
uint8_t *cbptr = cbdata.get();
uint8_t *crptr = crdata.get();
for (unsigned yy = 0; yy < 8; ++yy) {
- const uint8_t *sptr = &pixel_data[(height - y - yy - 1) * width * 4];
+ yptr[yy] = const_cast<JSAMPROW>(&y_data[(height - y - yy - 1) * width]);
+ const uint8_t *sptr = &cbcr_data[(height - y - yy - 1) * width * 2];
for (unsigned x = 0; x < width; x += 2) {
- *yptr++ = sptr[0];
- *yptr++ = sptr[4];
- *cbptr++ = (sptr[1] + sptr[5]) / 2;
- *crptr++ = (sptr[2] + sptr[6]) / 2;
- sptr += 8;
+ *cbptr++ = (sptr[0] + sptr[2]) / 2;
+ *crptr++ = (sptr[1] + sptr[3]) / 2;
+ sptr += 4;
}
}
check_error();
compute_flow.reset(new DISComputeFlow(width, height, operating_point3));
- interpolate.reset(new Interpolate(width, height, operating_point3));
+ interpolate.reset(new Interpolate(width, height, operating_point3, /*split_ycbcr_output=*/true));
check_error();
}
// Compute the interpolated frame.
qf.flow_tex = compute_flow->exec(resources.gray_tex, DISComputeFlow::FORWARD_AND_BACKWARD, DISComputeFlow::DO_NOT_RESIZE_FLOW);
check_error();
- qf.output_tex = interpolate->exec(resources.input_tex, resources.gray_tex, qf.flow_tex, 1280, 720, alpha);
+ tie(qf.output_tex, qf.output2_tex) = interpolate->exec(resources.input_tex, resources.gray_tex, qf.flow_tex, 1280, 720, alpha);
check_error();
// We could have released qf.flow_tex here, but to make sure we don't cause a stall
glPixelStorei(GL_PACK_ROW_LENGTH, 0);
glBindBuffer(GL_PIXEL_PACK_BUFFER, resources.pbo);
check_error();
- glGetTextureImage(qf.output_tex, 0, GL_RGBA, GL_UNSIGNED_BYTE, 1280 * 720 * 4, nullptr);
+ glGetTextureImage(qf.output_tex, 0, GL_RED, GL_UNSIGNED_BYTE, 1280 * 720 * 4, BUFFER_OFFSET(0));
+ check_error();
+ glGetTextureImage(qf.output2_tex, 0, GL_RG, GL_UNSIGNED_BYTE, 1280 * 720 * 3, BUFFER_OFFSET(1280 * 720));
check_error();
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
} else if (qf.type == QueuedFrame::INTERPOLATED) {
glClientWaitSync(qf.fence.get(), /*flags=*/0, GL_TIMEOUT_IGNORED);
- vector<uint8_t> jpeg = encode_jpeg((const uint8_t *)qf.resources.pbo_contents, 1280, 720);
+ vector<uint8_t> jpeg = encode_jpeg(
+ (const uint8_t *)qf.resources.pbo_contents,
+ (const uint8_t *)qf.resources.pbo_contents + 1280 * 720,
+ 1280, 720);
compute_flow->release_texture(qf.flow_tex);
interpolate->release_texture(qf.output_tex);
+ interpolate->release_texture(qf.output2_tex);
AVPacket pkt;
av_init_packet(&pkt);
float alpha;
InterpolatedFrameResources resources;
RefCountedGLsync fence; // Set when the interpolated image is read back to the CPU.
- GLuint flow_tex, output_tex; // Released in the receiving thread; not really used for anything else.
+ GLuint flow_tex, output_tex, output2_tex; // Released in the receiving thread; not really used for anything else.
};
std::deque<QueuedFrame> frame_queue; // Under <queue_lock>.
std::mutex queue_lock;