]> git.sesse.net Git - nageru/blobdiff - video_stream.cpp
Change from operating point 3 to 2 (more laptop-friendly debugging).
[nageru] / video_stream.cpp
index c525aee6140ee96974f40deb9d0c349f01e10d88..9b0e3a8bed006c6e2deb5ee14f786380e87950f3 100644 (file)
@@ -98,7 +98,7 @@ struct VectorDestinationManager {
 };
 static_assert(std::is_standard_layout<VectorDestinationManager>::value, "");
 
-vector<uint8_t> encode_jpeg(const uint8_t *pixel_data, unsigned width, unsigned height)
+vector<uint8_t> encode_jpeg(const uint8_t *y_data, const uint8_t *cbcr_data, unsigned width, unsigned height)
 {
        VectorDestinationManager dest;
 
@@ -116,28 +116,40 @@ vector<uint8_t> encode_jpeg(const uint8_t *pixel_data, unsigned width, unsigned
 
        cinfo.image_width = width;
        cinfo.image_height = height;
-       cinfo.input_components = 3;
+       cinfo.raw_data_in = true;
+       jpeg_set_colorspace(&cinfo, JCS_YCbCr);
        cinfo.comp_info[0].h_samp_factor = 2;
        cinfo.comp_info[0].v_samp_factor = 1;
        cinfo.comp_info[1].h_samp_factor = 1;
        cinfo.comp_info[1].v_samp_factor = 1;
        cinfo.comp_info[2].h_samp_factor = 1;
        cinfo.comp_info[2].v_samp_factor = 1;
-       // cinfo.CCIR601_sampling = true;  // TODO: Subsample ourselves.
+       cinfo.CCIR601_sampling = true;  // Seems to be mostly ignored by libjpeg, though.
        jpeg_start_compress(&cinfo, true);
 
-       unique_ptr<uint8_t[]> row(new uint8_t[width * 3]);
-       JSAMPROW row_pointer[1] = { row.get() };
-       for (unsigned y = 0; y < height; ++y) {
-               const uint8_t *sptr = &pixel_data[(height - cinfo.next_scanline - 1) * width * 4];
-               uint8_t *dptr = row.get();
-               for (unsigned x = 0; x < width; ++x) {
-                       *dptr++ = *sptr++;
-                       *dptr++ = *sptr++;
-                       *dptr++ = *sptr++;
-                       ++sptr;
+       // TODO: Subsample on the GPU.
+       unique_ptr<uint8_t[]> cbdata(new uint8_t[(width/2) * 8]);
+       unique_ptr<uint8_t[]> crdata(new uint8_t[(width/2) * 8]);
+       JSAMPROW yptr[8], cbptr[8], crptr[8];
+       JSAMPARRAY data[3] = { yptr, cbptr, crptr };
+       for (unsigned yy = 0; yy < 8; ++yy) {
+               cbptr[yy] = cbdata.get() + yy * (width / 2);
+               crptr[yy] = crdata.get() + yy * (width / 2);
+       }
+       for (unsigned y = 0; y < height; y += 8) {
+               uint8_t *cbptr = cbdata.get();
+               uint8_t *crptr = crdata.get();
+               for (unsigned yy = 0; yy < 8; ++yy) {
+                       yptr[yy] = const_cast<JSAMPROW>(&y_data[(height - y - yy - 1) * width]);
+                       const uint8_t *sptr = &cbcr_data[(height - y - yy - 1) * width * 2];
+                       for (unsigned x = 0; x < width; x += 2) {
+                               *cbptr++ = (sptr[0] + sptr[2]) / 2;
+                               *crptr++ = (sptr[1] + sptr[3]) / 2;
+                               sptr += 4;
+                       }
                }
-               (void) jpeg_write_scanlines(&cinfo, row_pointer, 1);
+
+               jpeg_write_raw_data(&cinfo, data, /*num_lines=*/8);
        }
 
        jpeg_finish_compress(&cinfo);
@@ -155,7 +167,7 @@ VideoStream::VideoStream()
        image_format.color_space = COLORSPACE_sRGB;
        image_format.gamma_curve = GAMMA_sRGB;
        ycbcr_format.luma_coefficients = YCBCR_REC_709;
-       ycbcr_format.full_range = false;
+       ycbcr_format.full_range = true;  // JPEG.
        ycbcr_format.num_levels = 256;
        ycbcr_format.chroma_subsampling_x = 2;
        ycbcr_format.chroma_subsampling_y = 1;
@@ -165,12 +177,21 @@ VideoStream::VideoStream()
        ycbcr_format.cr_y_position = 0.5f;
        ycbcr_input = (movit::YCbCrInput *)ycbcr_convert_chain->add_input(new YCbCrInput(image_format, ycbcr_format, 1280, 720));
 
+       YCbCrFormat ycbcr_output_format = ycbcr_format;
+       ycbcr_output_format.chroma_subsampling_x = 1;
+
        ImageFormat inout_format;
        inout_format.color_space = COLORSPACE_sRGB;
        inout_format.gamma_curve = GAMMA_sRGB;
 
        check_error();
-       ycbcr_convert_chain->add_output(inout_format, OUTPUT_ALPHA_FORMAT_POSTMULTIPLIED);
+
+       // One full Y'CbCr texture (for interpolation), one that's just Y (throwing away the
+       // Cb and Cr channels). The second copy is sort of redundant, but it's the easiest way
+       // of getting the gray data into a layered texture.
+       ycbcr_convert_chain->add_ycbcr_output(inout_format, OUTPUT_ALPHA_FORMAT_POSTMULTIPLIED, ycbcr_output_format);
+       check_error();
+       ycbcr_convert_chain->add_ycbcr_output(inout_format, OUTPUT_ALPHA_FORMAT_POSTMULTIPLIED, ycbcr_output_format);
        check_error();
        ycbcr_convert_chain->set_dither_bits(8);
        check_error();
@@ -197,13 +218,17 @@ VideoStream::VideoStream()
 
                glNamedFramebufferTextureLayer(resource.input_fbos[0], GL_COLOR_ATTACHMENT0, input_tex[i], 0, 0);
                check_error();
+               glNamedFramebufferTextureLayer(resource.input_fbos[0], GL_COLOR_ATTACHMENT1, gray_tex[i], 0, 0);
+               check_error();
                glNamedFramebufferTextureLayer(resource.input_fbos[1], GL_COLOR_ATTACHMENT0, input_tex[i], 0, 1);
                check_error();
+               glNamedFramebufferTextureLayer(resource.input_fbos[1], GL_COLOR_ATTACHMENT1, gray_tex[i], 0, 1);
+               check_error();
 
-               GLuint buf = GL_COLOR_ATTACHMENT0;
-               glNamedFramebufferDrawBuffers(resource.input_fbos[0], 1, &buf);
+               GLuint bufs[] = { GL_COLOR_ATTACHMENT0, GL_COLOR_ATTACHMENT1 };
+               glNamedFramebufferDrawBuffers(resource.input_fbos[0], 2, bufs);
                check_error();
-               glNamedFramebufferDrawBuffers(resource.input_fbos[1], 1, &buf);
+               glNamedFramebufferDrawBuffers(resource.input_fbos[1], 2, bufs);
                check_error();
 
                glCreateBuffers(1, &resource.pbo);
@@ -216,9 +241,8 @@ VideoStream::VideoStream()
 
        check_error();
 
-       compute_flow.reset(new DISComputeFlow(width, height, operating_point3));
-       gray.reset(new GrayscaleConversion);  // NOTE: Must come after DISComputeFlow, since it sets up the VBO!
-       interpolate.reset(new Interpolate(width, height, operating_point3));
+       compute_flow.reset(new DISComputeFlow(width, height, operating_point2));
+       interpolate.reset(new Interpolate(width, height, operating_point2, /*split_ycbcr_output=*/true));
        check_error();
 }
 
@@ -255,6 +279,8 @@ void VideoStream::stop()
 
 void VideoStream::schedule_original_frame(int64_t output_pts, unsigned stream_idx, int64_t input_pts)
 {
+       fprintf(stderr, "output_pts=%ld  original      input_pts=%ld\n", output_pts, input_pts);
+
        QueuedFrame qf;
        qf.type = QueuedFrame::ORIGINAL;
        qf.output_pts = output_pts;
@@ -268,6 +294,8 @@ void VideoStream::schedule_original_frame(int64_t output_pts, unsigned stream_id
 
 void VideoStream::schedule_interpolated_frame(int64_t output_pts, unsigned stream_idx, int64_t input_first_pts, int64_t input_second_pts, float alpha)
 {
+       fprintf(stderr, "output_pts=%ld  interpolated  input_pts1=%ld input_pts2=%ld alpha=%.3f\n", output_pts, input_first_pts, input_second_pts, alpha);
+
        // Get the temporary OpenGL resources we need for doing the interpolation.
        InterpolatedFrameResources resources;
        {
@@ -291,7 +319,11 @@ void VideoStream::schedule_interpolated_frame(int64_t output_pts, unsigned strea
        // Convert frame0 and frame1 to OpenGL textures.
        // TODO: Deduplicate against JPEGFrameView::setDecodedFrame?
        for (size_t frame_no = 0; frame_no < 2; ++frame_no) {
-               shared_ptr<Frame> frame = decode_jpeg(filename_for_frame(stream_idx, frame_no == 1 ? input_second_pts : input_first_pts));
+               JPEGID jpeg_id;
+               jpeg_id.stream_idx = stream_idx;
+               jpeg_id.pts = frame_no == 1 ? input_second_pts : input_first_pts;
+               bool did_decode;
+               shared_ptr<Frame> frame = decode_jpeg_with_cache(jpeg_id, DECODE_IF_NOT_IN_CACHE, &did_decode);
                ycbcr_format.chroma_subsampling_x = frame->chroma_subsampling_x;
                ycbcr_format.chroma_subsampling_y = frame->chroma_subsampling_y;
                ycbcr_input->change_ycbcr_format(ycbcr_format);
@@ -307,24 +339,27 @@ void VideoStream::schedule_interpolated_frame(int64_t output_pts, unsigned strea
        }
 
        glGenerateTextureMipmap(resources.input_tex);
-
-       // Compute the interpolated frame.
-       check_error();
-       gray->exec(resources.input_tex, resources.gray_tex, 1280, 720, /*num_layers=*/2);
        check_error();
        glGenerateTextureMipmap(resources.gray_tex);
        check_error();
-       GLuint flow_tex = compute_flow->exec(resources.gray_tex, DISComputeFlow::FORWARD_AND_BACKWARD, DISComputeFlow::DO_NOT_RESIZE_FLOW);
-       check_error();
 
-       qf.output_tex = interpolate->exec(resources.input_tex, flow_tex, 1280, 720, alpha);
+       // Compute the interpolated frame.
+       qf.flow_tex = compute_flow->exec(resources.gray_tex, DISComputeFlow::FORWARD_AND_BACKWARD, DISComputeFlow::DO_NOT_RESIZE_FLOW);
+       check_error();
+       tie(qf.output_tex, qf.output2_tex) = interpolate->exec(resources.input_tex, resources.gray_tex, qf.flow_tex, 1280, 720, alpha);
        check_error();
 
+       // We could have released qf.flow_tex here, but to make sure we don't cause a stall
+       // when trying to reuse it for the next frame, we can just as well hold on to it
+       // and release it only when the readback is done.
+
        // Read it down (asynchronously) to the CPU.
        glPixelStorei(GL_PACK_ROW_LENGTH, 0);
        glBindBuffer(GL_PIXEL_PACK_BUFFER, resources.pbo);
        check_error();
-       glGetTextureImage(qf.output_tex, 0, GL_RGBA, GL_UNSIGNED_BYTE, 1280 * 720 * 4, nullptr);
+       glGetTextureImage(qf.output_tex, 0, GL_RED, GL_UNSIGNED_BYTE, 1280 * 720 * 4, BUFFER_OFFSET(0));
+       check_error();
+       glGetTextureImage(qf.output2_tex, 0, GL_RG, GL_UNSIGNED_BYTE, 1280 * 720 * 3, BUFFER_OFFSET(1280 * 720));
        check_error();
        glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
 
@@ -341,6 +376,7 @@ void VideoStream::schedule_interpolated_frame(int64_t output_pts, unsigned strea
 
 void VideoStream::encode_thread_func()
 {
+       pthread_setname_np(pthread_self(), "VideoStream");
        QSurface *surface = create_surface();
        QOpenGLContext *context = create_context(surface);
        bool ok = make_current(context, surface);
@@ -372,7 +408,13 @@ void VideoStream::encode_thread_func()
                } else if (qf.type == QueuedFrame::INTERPOLATED) {
                        glClientWaitSync(qf.fence.get(), /*flags=*/0, GL_TIMEOUT_IGNORED);
 
-                       vector<uint8_t> jpeg = encode_jpeg((const uint8_t *)qf.resources.pbo_contents, 1280, 720);
+                       vector<uint8_t> jpeg = encode_jpeg(
+                               (const uint8_t *)qf.resources.pbo_contents,
+                               (const uint8_t *)qf.resources.pbo_contents + 1280 * 720,
+                               1280, 720);
+                       compute_flow->release_texture(qf.flow_tex);
+                       interpolate->release_texture(qf.output_tex);
+                       interpolate->release_texture(qf.output2_tex);
 
                        AVPacket pkt;
                        av_init_packet(&pkt);