X-Git-Url: https://git.sesse.net/?p=nageru;a=blobdiff_plain;f=quicksync_encoder.cpp;h=67e9668faaecbac253e6d37877664651ba47a927;hp=2e8633d3cf72bc55d7fdd84f057bb50a0c7c1683;hb=b9feb66845bf24465b7671ac9ff7a52b88f6032b;hpb=e066f18188fde1e6bd0b698c89427119cbffaaa3 diff --git a/quicksync_encoder.cpp b/quicksync_encoder.cpp index 2e8633d..67e9668 100644 --- a/quicksync_encoder.cpp +++ b/quicksync_encoder.cpp @@ -1,5 +1,6 @@ #include "quicksync_encoder.h" +#include #include // Must be above the Xlib includes. #include @@ -55,6 +56,7 @@ extern "C" { #include "timebase.h" #include "x264_encoder.h" +using namespace movit; using namespace std; using namespace std::chrono; using namespace std::placeholders; @@ -62,6 +64,18 @@ using namespace std::placeholders; class QOpenGLContext; class QSurface; +namespace { + +// These need to survive several QuickSyncEncoderImpl instances, +// so they are outside. +once_flag quick_sync_metrics_inited; +LatencyHistogram mixer_latency_histogram, qs_latency_histogram; +MuxMetrics current_file_mux_metrics, total_mux_metrics; +std::atomic metric_current_file_start_time_seconds{0.0 / 0.0}; +std::atomic metric_quick_sync_stalled_frames{0}; + +} // namespace + #define CHECK_VASTATUS(va_status, func) \ if (va_status != VA_STATUS_SUCCESS) { \ fprintf(stderr, "%s:%d (%s) failed with %d\n", __func__, __LINE__, func, va_status); \ @@ -104,14 +118,6 @@ static constexpr unsigned int MaxFrameNum = (2<<16); static constexpr unsigned int MaxPicOrderCntLsb = (2<<8); static constexpr unsigned int Log2MaxFrameNum = 16; static constexpr unsigned int Log2MaxPicOrderCntLsb = 8; -static constexpr int rc_default_modes[] = { // Priority list of modes. - VA_RC_VBR, - VA_RC_CQP, - VA_RC_VBR_CONSTRAINED, - VA_RC_CBR, - VA_RC_VCM, - VA_RC_NONE, -}; using namespace std; @@ -259,7 +265,7 @@ static void nal_header(bitstream *bs, int nal_ref_idc, int nal_unit_type) bitstream_put_ui(bs, nal_unit_type, 5); } -void QuickSyncEncoderImpl::sps_rbsp(bitstream *bs) +void QuickSyncEncoderImpl::sps_rbsp(YCbCrLumaCoefficients ycbcr_coefficients, bitstream *bs) { int profile_idc = PROFILE_IDC_BASELINE; @@ -330,10 +336,11 @@ void QuickSyncEncoderImpl::sps_rbsp(bitstream *bs) bitstream_put_ui(bs, 1, 1); /* colour_description_present_flag */ { bitstream_put_ui(bs, 1, 8); /* colour_primaries (1 = BT.709) */ - bitstream_put_ui(bs, 2, 8); /* transfer_characteristics (2 = unspecified, since we use sRGB) */ - if (global_flags.ycbcr_rec709_coefficients) { + bitstream_put_ui(bs, 13, 8); /* transfer_characteristics (13 = sRGB) */ + if (ycbcr_coefficients == YCBCR_REC_709) { bitstream_put_ui(bs, 1, 8); /* matrix_coefficients (1 = BT.709) */ } else { + assert(ycbcr_coefficients == YCBCR_REC_601); bitstream_put_ui(bs, 6, 8); /* matrix_coefficients (6 = BT.601/SMPTE 170M) */ } } @@ -515,14 +522,14 @@ int QuickSyncEncoderImpl::build_packed_pic_buffer(unsigned char **header_buffer) } int -QuickSyncEncoderImpl::build_packed_seq_buffer(unsigned char **header_buffer) +QuickSyncEncoderImpl::build_packed_seq_buffer(YCbCrLumaCoefficients ycbcr_coefficients, unsigned char **header_buffer) { bitstream bs; bitstream_start(&bs); nal_start_code_prefix(&bs); nal_header(&bs, NAL_REF_IDC_HIGH, NAL_SPS); - sps_rbsp(&bs); + sps_rbsp(ycbcr_coefficients, &bs); bitstream_end(&bs); *header_buffer = (unsigned char *)bs.buffer; @@ -699,29 +706,12 @@ void encoding2display_order( } -static const char *rc_to_string(int rc_mode) -{ - switch (rc_mode) { - case VA_RC_NONE: - return "NONE"; - case VA_RC_CBR: - return "CBR"; - case VA_RC_VBR: - return "VBR"; - case VA_RC_VCM: - return "VCM"; - case VA_RC_CQP: - return "CQP"; - case VA_RC_VBR_CONSTRAINED: - return "VBR_CONSTRAINED"; - default: - return "Unknown"; - } -} - void QuickSyncEncoderImpl::enable_zerocopy_if_possible() { - if (global_flags.uncompressed_video_to_http) { + if (global_flags.x264_video_to_disk) { + // Quick Sync is entirely disabled. + use_zerocopy = false; + } else if (global_flags.uncompressed_video_to_http) { fprintf(stderr, "Disabling zerocopy H.264 encoding due to --http-uncompressed-video.\n"); use_zerocopy = false; } else if (global_flags.x264_video_to_http) { @@ -730,6 +720,7 @@ void QuickSyncEncoderImpl::enable_zerocopy_if_possible() } else { use_zerocopy = true; } + global_flags.use_zerocopy = use_zerocopy; } VADisplay QuickSyncEncoderImpl::va_open_display(const string &va_display) @@ -740,7 +731,6 @@ VADisplay QuickSyncEncoderImpl::va_open_display(const string &va_display) fprintf(stderr, "error: can't connect to X server!\n"); return NULL; } - enable_zerocopy_if_possible(); return vaGetDisplay(x11_display); } else if (va_display[0] != '/') { x11_display = XOpenDisplay(va_display.c_str()); @@ -748,7 +738,6 @@ VADisplay QuickSyncEncoderImpl::va_open_display(const string &va_display) fprintf(stderr, "error: can't connect to X server!\n"); return NULL; } - enable_zerocopy_if_possible(); return vaGetDisplay(x11_display); } else { drm_fd = open(va_display.c_str(), O_RDWR); @@ -862,23 +851,13 @@ int QuickSyncEncoderImpl::init_va(const string &va_display) } if (attrib[VAConfigAttribRateControl].value != VA_ATTRIB_NOT_SUPPORTED) { - int tmp = attrib[VAConfigAttribRateControl].value; - - if (rc_mode == -1 || !(rc_mode & tmp)) { - if (rc_mode != -1) { - printf("Warning: Don't support the specified RateControl mode: %s!!!, switch to ", rc_to_string(rc_mode)); - } - - for (i = 0; i < sizeof(rc_default_modes) / sizeof(rc_default_modes[0]); i++) { - if (rc_default_modes[i] & tmp) { - rc_mode = rc_default_modes[i]; - break; - } - } + if (!(attrib[VAConfigAttribRateControl].value & VA_RC_CQP)) { + fprintf(stderr, "ERROR: VA-API encoder does not support CQP mode.\n"); + exit(1); } config_attrib[config_attrib_num].type = VAConfigAttribRateControl; - config_attrib[config_attrib_num].value = rc_mode; + config_attrib[config_attrib_num].value = VA_RC_CQP; config_attrib_num++; } @@ -926,88 +905,87 @@ int QuickSyncEncoderImpl::init_va(const string &va_display) int QuickSyncEncoderImpl::setup_encode() { - VAStatus va_status; - VASurfaceID *tmp_surfaceid; - int codedbuf_size, i; - VASurfaceID src_surface[SURFACE_NUM]; - VASurfaceID ref_surface[SURFACE_NUM]; - - va_status = vaCreateConfig(va_dpy, h264_profile, VAEntrypointEncSlice, - &config_attrib[0], config_attrib_num, &config_id); - CHECK_VASTATUS(va_status, "vaCreateConfig"); - - /* create source surfaces */ - va_status = vaCreateSurfaces(va_dpy, - VA_RT_FORMAT_YUV420, frame_width_mbaligned, frame_height_mbaligned, - &src_surface[0], SURFACE_NUM, - NULL, 0); - CHECK_VASTATUS(va_status, "vaCreateSurfaces"); - - /* create reference surfaces */ - va_status = vaCreateSurfaces(va_dpy, - VA_RT_FORMAT_YUV420, frame_width_mbaligned, frame_height_mbaligned, - &ref_surface[0], SURFACE_NUM, - NULL, 0); - CHECK_VASTATUS(va_status, "vaCreateSurfaces"); - - tmp_surfaceid = (VASurfaceID *)calloc(2 * SURFACE_NUM, sizeof(VASurfaceID)); - memcpy(tmp_surfaceid, src_surface, SURFACE_NUM * sizeof(VASurfaceID)); - memcpy(tmp_surfaceid + SURFACE_NUM, ref_surface, SURFACE_NUM * sizeof(VASurfaceID)); - - /* Create a context for this encode pipe */ - va_status = vaCreateContext(va_dpy, config_id, - frame_width_mbaligned, frame_height_mbaligned, - VA_PROGRESSIVE, - tmp_surfaceid, 2 * SURFACE_NUM, - &context_id); - CHECK_VASTATUS(va_status, "vaCreateContext"); - free(tmp_surfaceid); - - codedbuf_size = (frame_width_mbaligned * frame_height_mbaligned * 400) / (16*16); - - for (i = 0; i < SURFACE_NUM; i++) { - /* create coded buffer once for all - * other VA buffers which won't be used again after vaRenderPicture. - * so APP can always vaCreateBuffer for every frame - * but coded buffer need to be mapped and accessed after vaRenderPicture/vaEndPicture - * so VA won't maintain the coded buffer - */ - va_status = vaCreateBuffer(va_dpy, context_id, VAEncCodedBufferType, - codedbuf_size, 1, NULL, &gl_surfaces[i].coded_buf); - CHECK_VASTATUS(va_status, "vaCreateBuffer"); - } + if (!global_flags.x264_video_to_disk) { + VAStatus va_status; + VASurfaceID *tmp_surfaceid; + int codedbuf_size; + VASurfaceID src_surface[SURFACE_NUM]; + VASurfaceID ref_surface[SURFACE_NUM]; + + va_status = vaCreateConfig(va_dpy, h264_profile, VAEntrypointEncSlice, + &config_attrib[0], config_attrib_num, &config_id); + CHECK_VASTATUS(va_status, "vaCreateConfig"); + + /* create source surfaces */ + va_status = vaCreateSurfaces(va_dpy, + VA_RT_FORMAT_YUV420, frame_width_mbaligned, frame_height_mbaligned, + &src_surface[0], SURFACE_NUM, + NULL, 0); + CHECK_VASTATUS(va_status, "vaCreateSurfaces"); + + /* create reference surfaces */ + va_status = vaCreateSurfaces(va_dpy, + VA_RT_FORMAT_YUV420, frame_width_mbaligned, frame_height_mbaligned, + &ref_surface[0], SURFACE_NUM, + NULL, 0); + CHECK_VASTATUS(va_status, "vaCreateSurfaces"); + + tmp_surfaceid = (VASurfaceID *)calloc(2 * SURFACE_NUM, sizeof(VASurfaceID)); + memcpy(tmp_surfaceid, src_surface, SURFACE_NUM * sizeof(VASurfaceID)); + memcpy(tmp_surfaceid + SURFACE_NUM, ref_surface, SURFACE_NUM * sizeof(VASurfaceID)); + + for (int i = 0; i < SURFACE_NUM; i++) { + gl_surfaces[i].src_surface = src_surface[i]; + gl_surfaces[i].ref_surface = ref_surface[i]; + } - /* create OpenGL objects */ - //glGenFramebuffers(SURFACE_NUM, fbos); - - for (i = 0; i < SURFACE_NUM; i++) { - if (use_zerocopy) { - gl_surfaces[i].y_tex = resource_pool->create_2d_texture(GL_R8, 1, 1); - gl_surfaces[i].cbcr_tex = resource_pool->create_2d_texture(GL_RG8, 1, 1); - } else { - gl_surfaces[i].y_tex = resource_pool->create_2d_texture(GL_R8, frame_width, frame_height); - gl_surfaces[i].cbcr_tex = resource_pool->create_2d_texture(GL_RG8, frame_width / 2, frame_height / 2); - - // Generate a PBO to read into. It doesn't necessarily fit 1:1 with the VA-API - // buffers, due to potentially differing pitch. - glGenBuffers(1, &gl_surfaces[i].pbo); - glBindBuffer(GL_PIXEL_PACK_BUFFER, gl_surfaces[i].pbo); - glBufferStorage(GL_PIXEL_PACK_BUFFER, frame_width * frame_height * 2, nullptr, GL_MAP_READ_BIT | GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT); - uint8_t *ptr = (uint8_t *)glMapBufferRange(GL_PIXEL_PACK_BUFFER, 0, frame_width * frame_height * 2, GL_MAP_READ_BIT | GL_MAP_PERSISTENT_BIT); - gl_surfaces[i].y_offset = 0; - gl_surfaces[i].cbcr_offset = frame_width * frame_height; - gl_surfaces[i].y_ptr = ptr + gl_surfaces[i].y_offset; - gl_surfaces[i].cbcr_ptr = ptr + gl_surfaces[i].cbcr_offset; - glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); - } - } + /* Create a context for this encode pipe */ + va_status = vaCreateContext(va_dpy, config_id, + frame_width_mbaligned, frame_height_mbaligned, + VA_PROGRESSIVE, + tmp_surfaceid, 2 * SURFACE_NUM, + &context_id); + CHECK_VASTATUS(va_status, "vaCreateContext"); + free(tmp_surfaceid); + + codedbuf_size = (frame_width_mbaligned * frame_height_mbaligned * 400) / (16*16); + + for (int i = 0; i < SURFACE_NUM; i++) { + /* create coded buffer once for all + * other VA buffers which won't be used again after vaRenderPicture. + * so APP can always vaCreateBuffer for every frame + * but coded buffer need to be mapped and accessed after vaRenderPicture/vaEndPicture + * so VA won't maintain the coded buffer + */ + va_status = vaCreateBuffer(va_dpy, context_id, VAEncCodedBufferType, + codedbuf_size, 1, NULL, &gl_surfaces[i].coded_buf); + CHECK_VASTATUS(va_status, "vaCreateBuffer"); + } + } - for (i = 0; i < SURFACE_NUM; i++) { - gl_surfaces[i].src_surface = src_surface[i]; - gl_surfaces[i].ref_surface = ref_surface[i]; - } - - return 0; + /* create OpenGL objects */ + for (int i = 0; i < SURFACE_NUM; i++) { + if (use_zerocopy) { + gl_surfaces[i].y_tex = resource_pool->create_2d_texture(GL_R8, 1, 1); + gl_surfaces[i].cbcr_tex = resource_pool->create_2d_texture(GL_RG8, 1, 1); + } else { + size_t bytes_per_pixel = (global_flags.x264_bit_depth > 8) ? 2 : 1; + + // Generate a PBO to read into. It doesn't necessarily fit 1:1 with the VA-API + // buffers, due to potentially differing pitch. + glGenBuffers(1, &gl_surfaces[i].pbo); + glBindBuffer(GL_PIXEL_PACK_BUFFER, gl_surfaces[i].pbo); + glBufferStorage(GL_PIXEL_PACK_BUFFER, frame_width * frame_height * 2 * bytes_per_pixel, nullptr, GL_MAP_READ_BIT | GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT); + uint8_t *ptr = (uint8_t *)glMapBufferRange(GL_PIXEL_PACK_BUFFER, 0, frame_width * frame_height * 2 * bytes_per_pixel, GL_MAP_READ_BIT | GL_MAP_PERSISTENT_BIT); + gl_surfaces[i].y_offset = 0; + gl_surfaces[i].cbcr_offset = frame_width * frame_height * bytes_per_pixel; + gl_surfaces[i].y_ptr = ptr + gl_surfaces[i].y_offset; + gl_surfaces[i].cbcr_ptr = ptr + gl_surfaces[i].cbcr_offset; + glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); + } + } + + return 0; } // Given a list like 1 9 3 0 2 8 4 and a pivot element 3, will produce @@ -1220,7 +1198,7 @@ int QuickSyncEncoderImpl::render_picture(GLSurface *surf, int frame_type, int di return 0; } -int QuickSyncEncoderImpl::render_packedsequence() +int QuickSyncEncoderImpl::render_packedsequence(YCbCrLumaCoefficients ycbcr_coefficients) { VAEncPackedHeaderParameterBuffer packedheader_param_buffer; VABufferID packedseq_para_bufid, packedseq_data_bufid, render_id[2]; @@ -1228,7 +1206,7 @@ int QuickSyncEncoderImpl::render_packedsequence() unsigned char *packedseq_buffer = NULL; VAStatus va_status; - length_in_bits = build_packed_seq_buffer(&packedseq_buffer); + length_in_bits = build_packed_seq_buffer(ycbcr_coefficients, &packedseq_buffer); packedheader_param_buffer.type = VAEncPackedHeaderSequence; @@ -1409,8 +1387,8 @@ void QuickSyncEncoderImpl::save_codeddata(GLSurface *surf, storage_task task) vaUnmapBuffer(va_dpy, surf->coded_buf); static int frameno = 0; - print_latency("Current QuickSync latency (video inputs → disk mux):", - task.received_ts, (task.frame_type == FRAME_B), &frameno); + print_latency("Current Quick Sync latency (video inputs → disk mux):", + task.received_ts, (task.frame_type == FRAME_B), &frameno, &qs_latency_histogram); { // Add video. @@ -1504,14 +1482,15 @@ void QuickSyncEncoderImpl::release_gl_resources() } for (unsigned i = 0; i < SURFACE_NUM; i++) { - if (!use_zerocopy) { + if (use_zerocopy) { + resource_pool->release_2d_texture(gl_surfaces[i].y_tex); + resource_pool->release_2d_texture(gl_surfaces[i].cbcr_tex); + } else { glBindBuffer(GL_PIXEL_PACK_BUFFER, gl_surfaces[i].pbo); glUnmapBuffer(GL_PIXEL_PACK_BUFFER); glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); glDeleteBuffers(1, &gl_surfaces[i].pbo); } - resource_pool->release_2d_texture(gl_surfaces[i].y_tex); - resource_pool->release_2d_texture(gl_surfaces[i].cbcr_tex); } has_released_gl_resources = true; @@ -1526,7 +1505,7 @@ int QuickSyncEncoderImpl::deinit_va() return 0; } -QuickSyncEncoderImpl::QuickSyncEncoderImpl(const std::string &filename, movit::ResourcePool *resource_pool, QSurface *surface, const string &va_display, int width, int height, AVOutputFormat *oformat, X264Encoder *x264_encoder, DiskSpaceEstimator *disk_space_estimator) +QuickSyncEncoderImpl::QuickSyncEncoderImpl(const std::string &filename, ResourcePool *resource_pool, QSurface *surface, const string &va_display, int width, int height, AVOutputFormat *oformat, X264Encoder *x264_encoder, DiskSpaceEstimator *disk_space_estimator) : current_storage_frame(0), resource_pool(resource_pool), surface(surface), x264_encoder(x264_encoder), frame_width(width), frame_height(height), disk_space_estimator(disk_space_estimator) { file_audio_encoder.reset(new AudioEncoder(AUDIO_OUTPUT_CODEC_NAME, DEFAULT_AUDIO_OUTPUT_BIT_RATE, oformat)); @@ -1538,23 +1517,36 @@ QuickSyncEncoderImpl::QuickSyncEncoderImpl(const std::string &filename, movit::R //print_input(); - if (global_flags.x264_video_to_http) { + if (global_flags.x264_video_to_http || global_flags.x264_video_to_disk) { assert(x264_encoder != nullptr); } else { assert(x264_encoder == nullptr); } - init_va(va_display); + enable_zerocopy_if_possible(); + if (!global_flags.x264_video_to_disk) { + init_va(va_display); + } setup_encode(); - memset(&seq_param, 0, sizeof(seq_param)); - memset(&pic_param, 0, sizeof(pic_param)); - memset(&slice_param, 0, sizeof(slice_param)); + if (!global_flags.x264_video_to_disk) { + memset(&seq_param, 0, sizeof(seq_param)); + memset(&pic_param, 0, sizeof(pic_param)); + memset(&slice_param, 0, sizeof(slice_param)); + } + + call_once(quick_sync_metrics_inited, [](){ + mixer_latency_histogram.init("mixer"); + qs_latency_histogram.init("quick_sync"); + current_file_mux_metrics.init({{ "destination", "current_file" }}); + total_mux_metrics.init({{ "destination", "files_total" }}); + global_metrics.add("current_file_start_time_seconds", &metric_current_file_start_time_seconds, Metrics::TYPE_GAUGE); + global_metrics.add("quick_sync_stalled_frames", &metric_quick_sync_stalled_frames); + }); storage_thread = thread(&QuickSyncEncoderImpl::storage_task_thread, this); encode_thread = thread([this]{ - //SDL_GL_MakeCurrent(window, context); QOpenGLContext *context = create_context(this->surface); eglBindAPI(EGL_OPENGL_API); if (!make_current(context, this->surface)) { @@ -1595,7 +1587,12 @@ void QuickSyncEncoderImpl::release_gl_surface(size_t display_frame_num) } } -bool QuickSyncEncoderImpl::begin_frame(int64_t pts, int64_t duration, const vector &input_frames, GLuint *y_tex, GLuint *cbcr_tex) +bool QuickSyncEncoderImpl::is_zerocopy() const +{ + return use_zerocopy; +} + +bool QuickSyncEncoderImpl::begin_frame(int64_t pts, int64_t duration, YCbCrLumaCoefficients ycbcr_coefficients, const vector &input_frames, GLuint *y_tex, GLuint *cbcr_tex) { assert(!is_shutdown); GLSurface *surf = nullptr; @@ -1606,6 +1603,7 @@ bool QuickSyncEncoderImpl::begin_frame(int64_t pts, int64_t duration, const vect if (surf == nullptr) { fprintf(stderr, "Warning: No free slots for frame %d, rendering has to wait for H.264 encoder\n", current_storage_frame); + ++metric_quick_sync_stalled_frames; storage_task_queue_changed.wait(lock, [this, &surf]{ if (storage_thread_should_quit) return true; @@ -1618,64 +1616,72 @@ bool QuickSyncEncoderImpl::begin_frame(int64_t pts, int64_t duration, const vect surface_for_frame[current_storage_frame] = surf; } - *y_tex = surf->y_tex; - *cbcr_tex = surf->cbcr_tex; - - VAStatus va_status = vaDeriveImage(va_dpy, surf->src_surface, &surf->surface_image); - CHECK_VASTATUS(va_status, "vaDeriveImage"); - if (use_zerocopy) { - VABufferInfo buf_info; - buf_info.mem_type = VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME; // or VA_SURFACE_ATTRIB_MEM_TYPE_KERNEL_DRM? - va_status = vaAcquireBufferHandle(va_dpy, surf->surface_image.buf, &buf_info); - CHECK_VASTATUS(va_status, "vaAcquireBufferHandle"); - - // Create Y image. - surf->y_egl_image = EGL_NO_IMAGE_KHR; - EGLint y_attribs[] = { - EGL_WIDTH, frame_width, - EGL_HEIGHT, frame_height, - EGL_LINUX_DRM_FOURCC_EXT, fourcc_code('R', '8', ' ', ' '), - EGL_DMA_BUF_PLANE0_FD_EXT, EGLint(buf_info.handle), - EGL_DMA_BUF_PLANE0_OFFSET_EXT, EGLint(surf->surface_image.offsets[0]), - EGL_DMA_BUF_PLANE0_PITCH_EXT, EGLint(surf->surface_image.pitches[0]), - EGL_NONE - }; - - surf->y_egl_image = eglCreateImageKHR(eglGetCurrentDisplay(), EGL_NO_CONTEXT, EGL_LINUX_DMA_BUF_EXT, NULL, y_attribs); - assert(surf->y_egl_image != EGL_NO_IMAGE_KHR); - - // Associate Y image to a texture. - glBindTexture(GL_TEXTURE_2D, *y_tex); - glEGLImageTargetTexture2DOES(GL_TEXTURE_2D, surf->y_egl_image); - - // Create CbCr image. - surf->cbcr_egl_image = EGL_NO_IMAGE_KHR; - EGLint cbcr_attribs[] = { - EGL_WIDTH, frame_width, - EGL_HEIGHT, frame_height, - EGL_LINUX_DRM_FOURCC_EXT, fourcc_code('G', 'R', '8', '8'), - EGL_DMA_BUF_PLANE0_FD_EXT, EGLint(buf_info.handle), - EGL_DMA_BUF_PLANE0_OFFSET_EXT, EGLint(surf->surface_image.offsets[1]), - EGL_DMA_BUF_PLANE0_PITCH_EXT, EGLint(surf->surface_image.pitches[1]), - EGL_NONE - }; - - surf->cbcr_egl_image = eglCreateImageKHR(eglGetCurrentDisplay(), EGL_NO_CONTEXT, EGL_LINUX_DMA_BUF_EXT, NULL, cbcr_attribs); - assert(surf->cbcr_egl_image != EGL_NO_IMAGE_KHR); - - // Associate CbCr image to a texture. - glBindTexture(GL_TEXTURE_2D, *cbcr_tex); - glEGLImageTargetTexture2DOES(GL_TEXTURE_2D, surf->cbcr_egl_image); + *y_tex = surf->y_tex; + *cbcr_tex = surf->cbcr_tex; + } else { + surf->y_tex = *y_tex; + surf->cbcr_tex = *cbcr_tex; + } + + if (!global_flags.x264_video_to_disk) { + VAStatus va_status = vaDeriveImage(va_dpy, surf->src_surface, &surf->surface_image); + CHECK_VASTATUS(va_status, "vaDeriveImage"); + + if (use_zerocopy) { + VABufferInfo buf_info; + buf_info.mem_type = VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME; // or VA_SURFACE_ATTRIB_MEM_TYPE_KERNEL_DRM? + va_status = vaAcquireBufferHandle(va_dpy, surf->surface_image.buf, &buf_info); + CHECK_VASTATUS(va_status, "vaAcquireBufferHandle"); + + // Create Y image. + surf->y_egl_image = EGL_NO_IMAGE_KHR; + EGLint y_attribs[] = { + EGL_WIDTH, frame_width, + EGL_HEIGHT, frame_height, + EGL_LINUX_DRM_FOURCC_EXT, fourcc_code('R', '8', ' ', ' '), + EGL_DMA_BUF_PLANE0_FD_EXT, EGLint(buf_info.handle), + EGL_DMA_BUF_PLANE0_OFFSET_EXT, EGLint(surf->surface_image.offsets[0]), + EGL_DMA_BUF_PLANE0_PITCH_EXT, EGLint(surf->surface_image.pitches[0]), + EGL_NONE + }; + + surf->y_egl_image = eglCreateImageKHR(eglGetCurrentDisplay(), EGL_NO_CONTEXT, EGL_LINUX_DMA_BUF_EXT, NULL, y_attribs); + assert(surf->y_egl_image != EGL_NO_IMAGE_KHR); + + // Associate Y image to a texture. + glBindTexture(GL_TEXTURE_2D, *y_tex); + glEGLImageTargetTexture2DOES(GL_TEXTURE_2D, surf->y_egl_image); + + // Create CbCr image. + surf->cbcr_egl_image = EGL_NO_IMAGE_KHR; + EGLint cbcr_attribs[] = { + EGL_WIDTH, frame_width / 2, + EGL_HEIGHT, frame_height / 2, + EGL_LINUX_DRM_FOURCC_EXT, fourcc_code('G', 'R', '8', '8'), + EGL_DMA_BUF_PLANE0_FD_EXT, EGLint(buf_info.handle), + EGL_DMA_BUF_PLANE0_OFFSET_EXT, EGLint(surf->surface_image.offsets[1]), + EGL_DMA_BUF_PLANE0_PITCH_EXT, EGLint(surf->surface_image.pitches[1]), + EGL_NONE + }; + + surf->cbcr_egl_image = eglCreateImageKHR(eglGetCurrentDisplay(), EGL_NO_CONTEXT, EGL_LINUX_DMA_BUF_EXT, NULL, cbcr_attribs); + assert(surf->cbcr_egl_image != EGL_NO_IMAGE_KHR); + + // Associate CbCr image to a texture. + glBindTexture(GL_TEXTURE_2D, *cbcr_tex); + glEGLImageTargetTexture2DOES(GL_TEXTURE_2D, surf->cbcr_egl_image); + } } - current_video_frame = PendingFrame{ {}, input_frames, pts, duration }; + current_video_frame = PendingFrame{ {}, input_frames, pts, duration, ycbcr_coefficients }; return true; } void QuickSyncEncoderImpl::add_audio(int64_t pts, vector audio) { + lock_guard lock(file_audio_encoder_mutex); assert(!is_shutdown); file_audio_encoder->encode_audio(audio, pts + global_delay()); } @@ -1685,6 +1691,7 @@ RefCountedGLsync QuickSyncEncoderImpl::end_frame() assert(!is_shutdown); if (!use_zerocopy) { + GLenum type = global_flags.x264_bit_depth > 8 ? GL_UNSIGNED_SHORT : GL_UNSIGNED_BYTE; GLSurface *surf; { unique_lock lock(storage_task_queue_mutex); @@ -1700,14 +1707,17 @@ RefCountedGLsync QuickSyncEncoderImpl::end_frame() glBindTexture(GL_TEXTURE_2D, surf->y_tex); check_error(); - glGetTexImage(GL_TEXTURE_2D, 0, GL_RED, GL_UNSIGNED_BYTE, BUFFER_OFFSET(surf->y_offset)); + glGetTexImage(GL_TEXTURE_2D, 0, GL_RED, type, BUFFER_OFFSET(surf->y_offset)); check_error(); glBindTexture(GL_TEXTURE_2D, surf->cbcr_tex); check_error(); - glGetTexImage(GL_TEXTURE_2D, 0, GL_RG, GL_UNSIGNED_BYTE, BUFFER_OFFSET(surf->cbcr_offset)); + glGetTexImage(GL_TEXTURE_2D, 0, GL_RG, type, BUFFER_OFFSET(surf->cbcr_offset)); check_error(); + // We don't own these; the caller does. + surf->y_tex = surf->cbcr_tex = 0; + glBindTexture(GL_TEXTURE_2D, 0); check_error(); glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); @@ -1753,14 +1763,24 @@ void QuickSyncEncoderImpl::shutdown() storage_thread.join(); // Encode any leftover audio in the queues, and also any delayed frames. - file_audio_encoder->encode_last_audio(); + { + lock_guard lock(file_audio_encoder_mutex); + file_audio_encoder->encode_last_audio(); + } - release_encode(); - deinit_va(); - file_mux.reset(); + if (!global_flags.x264_video_to_disk) { + release_encode(); + deinit_va(); + } is_shutdown = true; } +void QuickSyncEncoderImpl::close_file() +{ + file_mux.reset(); + metric_current_file_start_time_seconds = 0.0 / 0.0; +} + void QuickSyncEncoderImpl::open_output_file(const std::string &filename) { AVFormatContext *avctx = avformat_alloc_context(); @@ -1776,10 +1796,26 @@ void QuickSyncEncoderImpl::open_output_file(const std::string &filename) exit(1); } - string video_extradata = ""; // FIXME: See other comment about global headers. - AVCodecParametersWithDeleter audio_codecpar = file_audio_encoder->get_codec_parameters(); - file_mux.reset(new Mux(avctx, frame_width, frame_height, Mux::CODEC_H264, video_extradata, audio_codecpar.get(), TIMEBASE, - std::bind(&DiskSpaceEstimator::report_write, disk_space_estimator, filename, _1))); + string video_extradata; // FIXME: See other comment about global headers. + if (global_flags.x264_video_to_disk) { + video_extradata = x264_encoder->get_global_headers(); + } + + current_file_mux_metrics.reset(); + + { + lock_guard lock(file_audio_encoder_mutex); + AVCodecParametersWithDeleter audio_codecpar = file_audio_encoder->get_codec_parameters(); + file_mux.reset(new Mux(avctx, frame_width, frame_height, Mux::CODEC_H264, video_extradata, audio_codecpar.get(), TIMEBASE, + std::bind(&DiskSpaceEstimator::report_write, disk_space_estimator, filename, _1), + Mux::WRITE_BACKGROUND, + { ¤t_file_mux_metrics, &total_mux_metrics })); + } + metric_current_file_start_time_seconds = get_timestamp_for_metrics(); + + if (global_flags.x264_video_to_disk) { + x264_encoder->add_mux(file_mux.get()); + } } void QuickSyncEncoderImpl::encode_thread_func() @@ -1815,6 +1851,13 @@ void QuickSyncEncoderImpl::encode_thread_func() // Pass the frame on to x264 (or uncompressed to HTTP) as needed. // Note that this implicitly waits for the frame to be done rendering. pass_frame(frame, display_frame_num, frame.pts, frame.duration); + + if (global_flags.x264_video_to_disk) { + unique_lock lock(storage_task_queue_mutex); + release_gl_surface(display_frame_num); + continue; + } + reorder_buffer[display_frame_num] = move(frame); // Now encode as many QuickSync frames as we can using the frames we have available. @@ -1850,7 +1893,7 @@ void QuickSyncEncoderImpl::encode_thread_func() } last_dts = dts; - encode_frame(frame, quicksync_encoding_frame_num, quicksync_display_frame_num, gop_start_display_frame_num, frame_type, frame.pts, dts, frame.duration); + encode_frame(frame, quicksync_encoding_frame_num, quicksync_display_frame_num, gop_start_display_frame_num, frame_type, frame.pts, dts, frame.duration, frame.ycbcr_coefficients); ++quicksync_encoding_frame_num; } } @@ -1868,7 +1911,7 @@ void QuickSyncEncoderImpl::encode_remaining_frames_as_p(int encoding_frame_num, PendingFrame frame = move(pending_frame.second); int64_t dts = last_dts + (TIMEBASE / MAX_FPS); printf("Finalizing encode: Encoding leftover frame %d as P-frame instead of B-frame.\n", display_frame_num); - encode_frame(frame, encoding_frame_num++, display_frame_num, gop_start_display_frame_num, FRAME_P, frame.pts, dts, frame.duration); + encode_frame(frame, encoding_frame_num++, display_frame_num, gop_start_display_frame_num, FRAME_P, frame.pts, dts, frame.duration, frame.ycbcr_coefficients); last_dts = dts; } } @@ -1908,15 +1951,19 @@ void QuickSyncEncoderImpl::pass_frame(QuickSyncEncoderImpl::PendingFrame frame, // Wait for the GPU to be done with the frame. GLenum sync_status; do { - sync_status = glClientWaitSync(frame.fence.get(), 0, 1000000000); + sync_status = glClientWaitSync(frame.fence.get(), 0, 0); check_error(); + if (sync_status == GL_TIMEOUT_EXPIRED) { + // NVIDIA likes to busy-wait; yield instead. + this_thread::sleep_for(milliseconds(1)); + } } while (sync_status == GL_TIMEOUT_EXPIRED); assert(sync_status != GL_WAIT_FAILED); ReceivedTimestamps received_ts = find_received_timestamp(frame.input_frames); static int frameno = 0; print_latency("Current mixer latency (video inputs → ready for encode):", - received_ts, false, &frameno); + received_ts, false, &frameno, &mixer_latency_histogram); // Release back any input frames we needed to render this frame. frame.input_frames.clear(); @@ -1930,13 +1977,13 @@ void QuickSyncEncoderImpl::pass_frame(QuickSyncEncoderImpl::PendingFrame frame, uint8_t *data = reinterpret_cast(surf->y_ptr); if (global_flags.uncompressed_video_to_http) { add_packet_for_uncompressed_frame(pts, duration, data); - } else if (global_flags.x264_video_to_http) { - x264_encoder->add_frame(pts, duration, data, received_ts); + } else if (global_flags.x264_video_to_http || global_flags.x264_video_to_disk) { + x264_encoder->add_frame(pts, duration, frame.ycbcr_coefficients, data, received_ts); } } void QuickSyncEncoderImpl::encode_frame(QuickSyncEncoderImpl::PendingFrame frame, int encoding_frame_num, int display_frame_num, int gop_start_display_frame_num, - int frame_type, int64_t pts, int64_t dts, int64_t duration) + int frame_type, int64_t pts, int64_t dts, int64_t duration, YCbCrLumaCoefficients ycbcr_coefficients) { const ReceivedTimestamps received_ts = find_received_timestamp(frame.input_frames); @@ -1980,10 +2027,14 @@ void QuickSyncEncoderImpl::encode_frame(QuickSyncEncoderImpl::PendingFrame frame // FIXME: If the mux wants global headers, we should not put the // SPS/PPS before each IDR frame, but rather put it into the // codec extradata (formatted differently?). + // + // NOTE: If we change ycbcr_coefficients, it will not take effect + // before the next IDR frame. This is acceptable, as it should only + // happen on a mode change, which is rare. render_sequence(); render_picture(surf, frame_type, display_frame_num, gop_start_display_frame_num); if (h264_packedheader) { - render_packedsequence(); + render_packedsequence(ycbcr_coefficients); render_packedpicture(); } } else { @@ -2018,13 +2069,14 @@ void QuickSyncEncoderImpl::encode_frame(QuickSyncEncoderImpl::PendingFrame frame tmp.pts = pts; tmp.dts = dts; tmp.duration = duration; + tmp.ycbcr_coefficients = ycbcr_coefficients; tmp.received_ts = received_ts; tmp.ref_display_frame_numbers = move(ref_display_frame_numbers); storage_task_enqueue(move(tmp)); } // Proxy object. -QuickSyncEncoder::QuickSyncEncoder(const std::string &filename, movit::ResourcePool *resource_pool, QSurface *surface, const string &va_display, int width, int height, AVOutputFormat *oformat, X264Encoder *x264_encoder, DiskSpaceEstimator *disk_space_estimator) +QuickSyncEncoder::QuickSyncEncoder(const std::string &filename, ResourcePool *resource_pool, QSurface *surface, const string &va_display, int width, int height, AVOutputFormat *oformat, X264Encoder *x264_encoder, DiskSpaceEstimator *disk_space_estimator) : impl(new QuickSyncEncoderImpl(filename, resource_pool, surface, va_display, width, height, oformat, x264_encoder, disk_space_estimator)) {} // Must be defined here because unique_ptr<> destructor needs to know the impl. @@ -2035,9 +2087,14 @@ void QuickSyncEncoder::add_audio(int64_t pts, vector audio) impl->add_audio(pts, audio); } -bool QuickSyncEncoder::begin_frame(int64_t pts, int64_t duration, const vector &input_frames, GLuint *y_tex, GLuint *cbcr_tex) +bool QuickSyncEncoder::is_zerocopy() const +{ + return impl->is_zerocopy(); +} + +bool QuickSyncEncoder::begin_frame(int64_t pts, int64_t duration, YCbCrLumaCoefficients ycbcr_coefficients, const vector &input_frames, GLuint *y_tex, GLuint *cbcr_tex) { - return impl->begin_frame(pts, duration, input_frames, y_tex, cbcr_tex); + return impl->begin_frame(pts, duration, ycbcr_coefficients, input_frames, y_tex, cbcr_tex); } RefCountedGLsync QuickSyncEncoder::end_frame() @@ -2050,6 +2107,11 @@ void QuickSyncEncoder::shutdown() impl->shutdown(); } +void QuickSyncEncoder::close_file() +{ + impl->shutdown(); +} + void QuickSyncEncoder::set_stream_mux(Mux *mux) { impl->set_stream_mux(mux);