]> git.sesse.net Git - nageru/commitdiff
Do the interpolation in Y'CbCr instead of RGBA; saves some conversions back and forth...
authorSteinar H. Gunderson <sgunderson@bigfoot.com>
Sun, 16 Sep 2018 13:35:29 +0000 (15:35 +0200)
committerSteinar H. Gunderson <sgunderson@bigfoot.com>
Sun, 16 Sep 2018 15:36:01 +0000 (17:36 +0200)
flow.cpp
flow.h
flow_main.cpp
splat.frag
video_stream.cpp
video_stream.h

index ece0d518bcfd9a7650ba0d522d598e9e4d71d052..9f816e6f30171ac1c005594fed142b974e6bd0c1 100644 (file)
--- a/flow.cpp
+++ b/flow.cpp
@@ -778,16 +778,16 @@ Splat::Splat(const OperatingPoint &op)
 
        uniform_splat_size = glGetUniformLocation(splat_program, "splat_size");
        uniform_alpha = glGetUniformLocation(splat_program, "alpha");
-       uniform_image_tex = glGetUniformLocation(splat_program, "image_tex");
+       uniform_gray_tex = glGetUniformLocation(splat_program, "gray_tex");
        uniform_flow_tex = glGetUniformLocation(splat_program, "flow_tex");
        uniform_inv_flow_size = glGetUniformLocation(splat_program, "inv_flow_size");
 }
 
-void Splat::exec(GLuint image_tex, GLuint bidirectional_flow_tex, GLuint flow_tex, GLuint depth_rb, int width, int height, float alpha)
+void Splat::exec(GLuint gray_tex, GLuint bidirectional_flow_tex, GLuint flow_tex, GLuint depth_rb, int width, int height, float alpha)
 {
        glUseProgram(splat_program);
 
-       bind_sampler(splat_program, uniform_image_tex, 0, image_tex, linear_sampler);
+       bind_sampler(splat_program, uniform_gray_tex, 0, gray_tex, linear_sampler);
        bind_sampler(splat_program, uniform_flow_tex, 1, bidirectional_flow_tex, nearest_sampler);
 
        glProgramUniform2f(splat_program, uniform_splat_size, op.splat_size / width, op.splat_size / height);
@@ -961,7 +961,7 @@ Interpolate::Interpolate(int width, int height, const OperatingPoint &op)
        glVertexAttribPointer(position_attrib, 2, GL_FLOAT, GL_FALSE, 0, BUFFER_OFFSET(0));
 }
 
-GLuint Interpolate::exec(GLuint image_tex, GLuint bidirectional_flow_tex, GLuint width, GLuint height, float alpha)
+GLuint Interpolate::exec(GLuint image_tex, GLuint gray_tex, GLuint bidirectional_flow_tex, GLuint width, GLuint height, float alpha)
 {
        GPUTimers timers;
 
@@ -973,7 +973,7 @@ GLuint Interpolate::exec(GLuint image_tex, GLuint bidirectional_flow_tex, GLuint
        // Pick out the right level to test splatting results on.
        GLuint tex_view;
        glGenTextures(1, &tex_view);
-       glTextureView(tex_view, GL_TEXTURE_2D_ARRAY, image_tex, GL_RGBA8, flow_level, 1, 0, 2);
+       glTextureView(tex_view, GL_TEXTURE_2D_ARRAY, gray_tex, GL_R8, flow_level, 1, 0, 2);
 
        int flow_width = width >> flow_level;
        int flow_height = height >> flow_level;
diff --git a/flow.h b/flow.h
index f268316bd6882e7ea0f3d1a4dd305af6a368b5d3..31111f5bd42b17f1aa20453ac24694e744d9af79 100644 (file)
--- a/flow.h
+++ b/flow.h
@@ -453,7 +453,7 @@ public:
        Splat(const OperatingPoint &op);
 
        // alpha is the time of the interpolated frame (0..1).
-       void exec(GLuint image_tex, GLuint bidirectional_flow_tex, GLuint flow_tex, GLuint depth_rb, int width, int height, float alpha);
+       void exec(GLuint gray_tex, GLuint bidirectional_flow_tex, GLuint flow_tex, GLuint depth_rb, int width, int height, float alpha);
 
 private:
        const OperatingPoint op;
@@ -464,7 +464,7 @@ private:
        GLuint splat_program;
 
        GLuint uniform_splat_size, uniform_alpha;
-       GLuint uniform_image_tex, uniform_flow_tex;
+       GLuint uniform_gray_tex, uniform_flow_tex;
        GLuint uniform_inv_flow_size;
 };
 
@@ -542,7 +542,7 @@ public:
        // Returns a texture that must be released with release_texture()
        // after use. image_tex must be a two-layer RGBA8 texture with mipmaps
        // (unless flow_level == 0).
-       GLuint exec(GLuint image_tex, GLuint bidirectional_flow_tex, GLuint width, GLuint height, float alpha);
+       GLuint exec(GLuint image_tex, GLuint gray_tex, GLuint bidirectional_flow_tex, GLuint width, GLuint height, float alpha);
 
        void release_texture(GLuint tex) {
                pool.release_texture(tex);
index 6b6308c1bd17b75e4f5b2b8f3b8b1f86d5008bfe..d8cf4773f59e6c07d2b5b6a031b2b5103d7e7ff3 100644 (file)
@@ -391,7 +391,7 @@ void interpolate_image(int argc, char **argv, int optind)
                in_warmup = true;
                for (int i = 0; i < 10; ++i) {
                        GLuint bidirectional_flow_tex = compute_flow.exec(tex_gray, DISComputeFlow::FORWARD_AND_BACKWARD, DISComputeFlow::DO_NOT_RESIZE_FLOW);
-                       GLuint interpolated_tex = interpolate.exec(image_tex, bidirectional_flow_tex, width1, height1, 0.5f);
+                       GLuint interpolated_tex = interpolate.exec(image_tex, tex_gray, bidirectional_flow_tex, width1, height1, 0.5f);
                        compute_flow.release_texture(bidirectional_flow_tex);
                        interpolate.release_texture(interpolated_tex);
                }
@@ -405,7 +405,7 @@ void interpolate_image(int argc, char **argv, int optind)
                snprintf(ppm_filename, sizeof(ppm_filename), "interp%04d.ppm", frameno);
 
                float alpha = frameno / 60.0f;
-               GLuint interpolated_tex = interpolate.exec(image_tex, bidirectional_flow_tex, width1, height1, alpha);
+               GLuint interpolated_tex = interpolate.exec(image_tex, tex_gray, bidirectional_flow_tex, width1, height1, alpha);
 
                schedule_read<RGBAType>(interpolated_tex, width1, height1, filename0, filename1, "", ppm_filename);
                interpolate.release_texture(interpolated_tex);
index 83fd68333751a9df65b00956b920ac3ae8fb97ba..6e873bc75c959dcf9713f22a5fc51e771895086f 100644 (file)
@@ -4,17 +4,15 @@ in vec2 image_pos;
 flat in vec2 flow, I_0_check_offset, I_1_check_offset;
 out vec2 out_flow;
 
-uniform sampler2DArray image_tex;
+uniform sampler2DArray gray_tex;
 
 void main()
 {
        out_flow = flow;
 
        // TODO: Check if we are sampling out-of-image.
-       // TODO: See whether using intensity values gives equally good results
-       // as RGB, since the rest of our pipeline is intensity.
-       vec3 I_0 = texture(image_tex, vec3(image_pos + I_0_check_offset, 0)).rgb;
-       vec3 I_1 = texture(image_tex, vec3(image_pos + I_1_check_offset, 1)).rgb;
-       vec3 diff = abs(I_1 - I_0);
-       gl_FragDepth = 0.125 * (diff.x + diff.y + diff.z);  // Make sure we stay well under the 1.0 maximum.
+       float I_0 = texture(gray_tex, vec3(image_pos + I_0_check_offset, 0)).r;
+       float I_1 = texture(gray_tex, vec3(image_pos + I_1_check_offset, 1)).r;
+       float diff = abs(I_1 - I_0);
+       gl_FragDepth = 0.125 * diff.x;  // Make sure we stay well under the 1.0 maximum.
 }
index 2951ff10ced84b0ae8f9fa735f930b7ee4075032..8d485ad02b20e79ab3c2a2ef578c7395835d2e9b 100644 (file)
@@ -116,28 +116,45 @@ vector<uint8_t> encode_jpeg(const uint8_t *pixel_data, unsigned width, unsigned
 
        cinfo.image_width = width;
        cinfo.image_height = height;
-       cinfo.input_components = 3;
+       cinfo.raw_data_in = true;
+       jpeg_set_colorspace(&cinfo, JCS_YCbCr);
        cinfo.comp_info[0].h_samp_factor = 2;
        cinfo.comp_info[0].v_samp_factor = 1;
        cinfo.comp_info[1].h_samp_factor = 1;
        cinfo.comp_info[1].v_samp_factor = 1;
        cinfo.comp_info[2].h_samp_factor = 1;
        cinfo.comp_info[2].v_samp_factor = 1;
-       // cinfo.CCIR601_sampling = true;  // TODO: Subsample ourselves.
+       cinfo.CCIR601_sampling = true;  // Seems to be mostly ignored by libjpeg, though.
        jpeg_start_compress(&cinfo, true);
 
-       unique_ptr<uint8_t[]> row(new uint8_t[width * 3]);
-       JSAMPROW row_pointer[1] = { row.get() };
-       for (unsigned y = 0; y < height; ++y) {
-               const uint8_t *sptr = &pixel_data[(height - cinfo.next_scanline - 1) * width * 4];
-               uint8_t *dptr = row.get();
-               for (unsigned x = 0; x < width; ++x) {
-                       *dptr++ = *sptr++;
-                       *dptr++ = *sptr++;
-                       *dptr++ = *sptr++;
-                       ++sptr;
+       // TODO: Subsample and deinterleave on the GPU.
+
+       unique_ptr<uint8_t[]> ydata(new uint8_t[width * 8]);
+       unique_ptr<uint8_t[]> cbdata(new uint8_t[(width/2) * 8]);
+       unique_ptr<uint8_t[]> crdata(new uint8_t[(width/2) * 8]);
+       JSAMPROW yptr[8], cbptr[8], crptr[8];
+       JSAMPARRAY data[3] = { yptr, cbptr, crptr };
+       for (unsigned yy = 0; yy < 8; ++yy) {
+               yptr[yy] = ydata.get() + yy * width;
+               cbptr[yy] = cbdata.get() + yy * (width / 2);
+               crptr[yy] = crdata.get() + yy * (width / 2);
+       }
+       for (unsigned y = 0; y < height; y += 8) {
+               uint8_t *yptr = ydata.get();
+               uint8_t *cbptr = cbdata.get();
+               uint8_t *crptr = crdata.get();
+               for (unsigned yy = 0; yy < 8; ++yy) {
+                       const uint8_t *sptr = &pixel_data[(height - y - yy - 1) * width * 4];
+                       for (unsigned x = 0; x < width; x += 2) {
+                               *yptr++ = sptr[0];
+                               *yptr++ = sptr[4];
+                               *cbptr++ = (sptr[1] + sptr[5]) / 2;
+                               *crptr++ = (sptr[2] + sptr[6]) / 2;
+                               sptr += 8;
+                       }
                }
-               (void) jpeg_write_scanlines(&cinfo, row_pointer, 1);
+
+               jpeg_write_raw_data(&cinfo, data, /*num_lines=*/8);
        }
 
        jpeg_finish_compress(&cinfo);
@@ -165,12 +182,21 @@ VideoStream::VideoStream()
        ycbcr_format.cr_y_position = 0.5f;
        ycbcr_input = (movit::YCbCrInput *)ycbcr_convert_chain->add_input(new YCbCrInput(image_format, ycbcr_format, 1280, 720));
 
+       YCbCrFormat ycbcr_output_format = ycbcr_format;
+       ycbcr_output_format.chroma_subsampling_x = 1;
+
        ImageFormat inout_format;
        inout_format.color_space = COLORSPACE_sRGB;
        inout_format.gamma_curve = GAMMA_sRGB;
 
        check_error();
-       ycbcr_convert_chain->add_output(inout_format, OUTPUT_ALPHA_FORMAT_POSTMULTIPLIED);
+
+       // One full Y'CbCr texture (for interpolation), one that's just Y (throwing away the
+       // Cb and Cr channels). The second copy is sort of redundant, but it's the easiest way
+       // of getting the gray data into a layered texture.
+       ycbcr_convert_chain->add_ycbcr_output(inout_format, OUTPUT_ALPHA_FORMAT_POSTMULTIPLIED, ycbcr_output_format);
+       check_error();
+       ycbcr_convert_chain->add_ycbcr_output(inout_format, OUTPUT_ALPHA_FORMAT_POSTMULTIPLIED, ycbcr_output_format);
        check_error();
        ycbcr_convert_chain->set_dither_bits(8);
        check_error();
@@ -197,13 +223,17 @@ VideoStream::VideoStream()
 
                glNamedFramebufferTextureLayer(resource.input_fbos[0], GL_COLOR_ATTACHMENT0, input_tex[i], 0, 0);
                check_error();
+               glNamedFramebufferTextureLayer(resource.input_fbos[0], GL_COLOR_ATTACHMENT1, gray_tex[i], 0, 0);
+               check_error();
                glNamedFramebufferTextureLayer(resource.input_fbos[1], GL_COLOR_ATTACHMENT0, input_tex[i], 0, 1);
                check_error();
+               glNamedFramebufferTextureLayer(resource.input_fbos[1], GL_COLOR_ATTACHMENT1, gray_tex[i], 0, 1);
+               check_error();
 
-               GLuint buf = GL_COLOR_ATTACHMENT0;
-               glNamedFramebufferDrawBuffers(resource.input_fbos[0], 1, &buf);
+               GLuint bufs[] = { GL_COLOR_ATTACHMENT0, GL_COLOR_ATTACHMENT1 };
+               glNamedFramebufferDrawBuffers(resource.input_fbos[0], 2, bufs);
                check_error();
-               glNamedFramebufferDrawBuffers(resource.input_fbos[1], 1, &buf);
+               glNamedFramebufferDrawBuffers(resource.input_fbos[1], 2, bufs);
                check_error();
 
                glCreateBuffers(1, &resource.pbo);
@@ -217,7 +247,6 @@ VideoStream::VideoStream()
        check_error();
 
        compute_flow.reset(new DISComputeFlow(width, height, operating_point3));
-       gray.reset(new GrayscaleConversion);  // NOTE: Must come after DISComputeFlow, since it sets up the VBO!
        interpolate.reset(new Interpolate(width, height, operating_point3));
        check_error();
 }
@@ -315,17 +344,14 @@ void VideoStream::schedule_interpolated_frame(int64_t output_pts, unsigned strea
        }
 
        glGenerateTextureMipmap(resources.input_tex);
-
-       // Compute the interpolated frame.
-       check_error();
-       gray->exec(resources.input_tex, resources.gray_tex, 1280, 720, /*num_layers=*/2);
        check_error();
        glGenerateTextureMipmap(resources.gray_tex);
        check_error();
+
+       // Compute the interpolated frame.
        qf.flow_tex = compute_flow->exec(resources.gray_tex, DISComputeFlow::FORWARD_AND_BACKWARD, DISComputeFlow::DO_NOT_RESIZE_FLOW);
        check_error();
-
-       qf.output_tex = interpolate->exec(resources.input_tex, qf.flow_tex, 1280, 720, alpha);
+       qf.output_tex = interpolate->exec(resources.input_tex, resources.gray_tex, qf.flow_tex, 1280, 720, alpha);
        check_error();
 
        // We could have released qf.flow_tex here, but to make sure we don't cause a stall
index 364512331ee10dbbb6843fdea3afa1bde4bd3b81..925cace4c62f5465c7bfb4d21bd0e64aca17a3c3 100644 (file)
@@ -20,7 +20,6 @@ extern "C" {
 #include "ref_counted_gl_sync.h"
 
 class DISComputeFlow;
-class GrayscaleConversion;
 class Interpolate;
 class Mux;
 class QSurface;
@@ -78,12 +77,15 @@ private:
        bool seen_sync_markers = false;
 
        QSurface *gl_surface;
-       std::unique_ptr<movit::EffectChain> ycbcr_convert_chain;  // TODO: Have a separate version with resample, for scaling?
+
+       // Effectively only converts from 4:2:2 to 4:4:4.
+       // TODO: Have a separate version with ResampleEffect, for scaling?
+       std::unique_ptr<movit::EffectChain> ycbcr_convert_chain;
+
        movit::YCbCrInput *ycbcr_input;
        movit::YCbCrFormat ycbcr_format;
 
        // Frame interpolation.
-       std::unique_ptr<GrayscaleConversion> gray;
        std::unique_ptr<DISComputeFlow> compute_flow;
        std::unique_ptr<Interpolate> interpolate;
 };