From bd5b2de9a277b87c75d71d94bd8c5095ab14ecf7 Mon Sep 17 00:00:00 2001 From: "Steinar H. Gunderson" Date: Mon, 13 Mar 2017 19:05:37 +0100 Subject: [PATCH] Add support for recording the x264 video to disk. This makes recording entirely independent on Quick Sync Video (or VA-API, if you wish). There's no way of running two separate x264 encodes, though; you get the same as for the stream. --- flags.cpp | 8 + flags.h | 1 + quicksync_encoder.cpp | 313 ++++++++++++++++++++++----------------- quicksync_encoder.h | 10 +- quicksync_encoder_impl.h | 8 +- video_encoder.cpp | 28 +++- video_encoder.h | 1 + x264_encoder.cpp | 6 +- x264_encoder.h | 5 +- 9 files changed, 228 insertions(+), 152 deletions(-) diff --git a/flags.cpp b/flags.cpp index 88a2c45..62028df 100644 --- a/flags.cpp +++ b/flags.cpp @@ -19,6 +19,7 @@ enum LongOption { OPTION_FAKE_CARDS_AUDIO, OPTION_HTTP_UNCOMPRESSED_VIDEO, OPTION_HTTP_X264_VIDEO, + OPTION_RECORD_X264_VIDEO, OPTION_X264_PRESET, OPTION_X264_TUNE, OPTION_X264_SPEEDCONTROL, @@ -76,6 +77,8 @@ void usage() fprintf(stderr, " --fake-cards-audio make fake (disconnected) cards output a simple tone\n"); fprintf(stderr, " --http-uncompressed-video send uncompressed NV12 video to HTTP clients\n"); fprintf(stderr, " --http-x264-video send x264-compressed video to HTTP clients\n"); + fprintf(stderr, " --record-x264-video store x264-compressed video to disk (implies --http-x264-video,\n"); + fprintf(stderr, " removes the need for working VA-API encoding)\n"); fprintf(stderr, " --x264-preset x264 quality preset (default " X264_DEFAULT_PRESET ")\n"); fprintf(stderr, " --x264-tune x264 tuning (default " X264_DEFAULT_TUNE ", can be blank)\n"); fprintf(stderr, " --x264-speedcontrol try to match x264 preset to available CPU speed\n"); @@ -145,6 +148,7 @@ void parse_flags(int argc, char * const argv[]) { "fake-cards-audio", no_argument, 0, OPTION_FAKE_CARDS_AUDIO }, { "http-uncompressed-video", no_argument, 0, OPTION_HTTP_UNCOMPRESSED_VIDEO }, { "http-x264-video", no_argument, 0, OPTION_HTTP_X264_VIDEO }, + { "record-x264-video", no_argument, 0, OPTION_RECORD_X264_VIDEO }, { "x264-preset", required_argument, 0, OPTION_X264_PRESET }, { "x264-tune", required_argument, 0, OPTION_X264_TUNE }, { "x264-speedcontrol", no_argument, 0, OPTION_X264_SPEEDCONTROL }, @@ -261,6 +265,10 @@ void parse_flags(int argc, char * const argv[]) case OPTION_HTTP_X264_VIDEO: global_flags.x264_video_to_http = true; break; + case OPTION_RECORD_X264_VIDEO: + global_flags.x264_video_to_disk = true; + global_flags.x264_video_to_http = true; + break; case OPTION_X264_PRESET: global_flags.x264_preset = optarg; break; diff --git a/flags.h b/flags.h index 78b1f1f..97d2fe6 100644 --- a/flags.h +++ b/flags.h @@ -14,6 +14,7 @@ struct Flags { bool fake_cards_audio = false; bool uncompressed_video_to_http = false; bool x264_video_to_http = false; + bool x264_video_to_disk = false; // Disables Quick Sync entirely. Implies x264_video_to_http == true. std::vector theme_dirs { ".", "/usr/local/share/nageru" }; std::string theme_filename = "theme.lua"; bool locut_enabled = true; diff --git a/quicksync_encoder.cpp b/quicksync_encoder.cpp index d49a483..bd6b4c2 100644 --- a/quicksync_encoder.cpp +++ b/quicksync_encoder.cpp @@ -724,7 +724,10 @@ static const char *rc_to_string(int rc_mode) void QuickSyncEncoderImpl::enable_zerocopy_if_possible() { - if (global_flags.uncompressed_video_to_http) { + if (global_flags.x264_video_to_disk) { + // Quick Sync is entirely disabled. + use_zerocopy = false; + } else if (global_flags.uncompressed_video_to_http) { fprintf(stderr, "Disabling zerocopy H.264 encoding due to --http-uncompressed-video.\n"); use_zerocopy = false; } else if (global_flags.x264_video_to_http) { @@ -743,7 +746,6 @@ VADisplay QuickSyncEncoderImpl::va_open_display(const string &va_display) fprintf(stderr, "error: can't connect to X server!\n"); return NULL; } - enable_zerocopy_if_possible(); return vaGetDisplay(x11_display); } else if (va_display[0] != '/') { x11_display = XOpenDisplay(va_display.c_str()); @@ -751,7 +753,6 @@ VADisplay QuickSyncEncoderImpl::va_open_display(const string &va_display) fprintf(stderr, "error: can't connect to X server!\n"); return NULL; } - enable_zerocopy_if_possible(); return vaGetDisplay(x11_display); } else { drm_fd = open(va_display.c_str(), O_RDWR); @@ -929,88 +930,88 @@ int QuickSyncEncoderImpl::init_va(const string &va_display) int QuickSyncEncoderImpl::setup_encode() { - VAStatus va_status; - VASurfaceID *tmp_surfaceid; - int codedbuf_size, i; - VASurfaceID src_surface[SURFACE_NUM]; - VASurfaceID ref_surface[SURFACE_NUM]; - - va_status = vaCreateConfig(va_dpy, h264_profile, VAEntrypointEncSlice, - &config_attrib[0], config_attrib_num, &config_id); - CHECK_VASTATUS(va_status, "vaCreateConfig"); - - /* create source surfaces */ - va_status = vaCreateSurfaces(va_dpy, - VA_RT_FORMAT_YUV420, frame_width_mbaligned, frame_height_mbaligned, - &src_surface[0], SURFACE_NUM, - NULL, 0); - CHECK_VASTATUS(va_status, "vaCreateSurfaces"); - - /* create reference surfaces */ - va_status = vaCreateSurfaces(va_dpy, - VA_RT_FORMAT_YUV420, frame_width_mbaligned, frame_height_mbaligned, - &ref_surface[0], SURFACE_NUM, - NULL, 0); - CHECK_VASTATUS(va_status, "vaCreateSurfaces"); - - tmp_surfaceid = (VASurfaceID *)calloc(2 * SURFACE_NUM, sizeof(VASurfaceID)); - memcpy(tmp_surfaceid, src_surface, SURFACE_NUM * sizeof(VASurfaceID)); - memcpy(tmp_surfaceid + SURFACE_NUM, ref_surface, SURFACE_NUM * sizeof(VASurfaceID)); - - /* Create a context for this encode pipe */ - va_status = vaCreateContext(va_dpy, config_id, - frame_width_mbaligned, frame_height_mbaligned, - VA_PROGRESSIVE, - tmp_surfaceid, 2 * SURFACE_NUM, - &context_id); - CHECK_VASTATUS(va_status, "vaCreateContext"); - free(tmp_surfaceid); - - codedbuf_size = (frame_width_mbaligned * frame_height_mbaligned * 400) / (16*16); - - for (i = 0; i < SURFACE_NUM; i++) { - /* create coded buffer once for all - * other VA buffers which won't be used again after vaRenderPicture. - * so APP can always vaCreateBuffer for every frame - * but coded buffer need to be mapped and accessed after vaRenderPicture/vaEndPicture - * so VA won't maintain the coded buffer - */ - va_status = vaCreateBuffer(va_dpy, context_id, VAEncCodedBufferType, - codedbuf_size, 1, NULL, &gl_surfaces[i].coded_buf); - CHECK_VASTATUS(va_status, "vaCreateBuffer"); - } + if (!global_flags.x264_video_to_disk) { + VAStatus va_status; + VASurfaceID *tmp_surfaceid; + int codedbuf_size; + VASurfaceID src_surface[SURFACE_NUM]; + VASurfaceID ref_surface[SURFACE_NUM]; + + va_status = vaCreateConfig(va_dpy, h264_profile, VAEntrypointEncSlice, + &config_attrib[0], config_attrib_num, &config_id); + CHECK_VASTATUS(va_status, "vaCreateConfig"); + + /* create source surfaces */ + va_status = vaCreateSurfaces(va_dpy, + VA_RT_FORMAT_YUV420, frame_width_mbaligned, frame_height_mbaligned, + &src_surface[0], SURFACE_NUM, + NULL, 0); + CHECK_VASTATUS(va_status, "vaCreateSurfaces"); + + /* create reference surfaces */ + va_status = vaCreateSurfaces(va_dpy, + VA_RT_FORMAT_YUV420, frame_width_mbaligned, frame_height_mbaligned, + &ref_surface[0], SURFACE_NUM, + NULL, 0); + CHECK_VASTATUS(va_status, "vaCreateSurfaces"); + + tmp_surfaceid = (VASurfaceID *)calloc(2 * SURFACE_NUM, sizeof(VASurfaceID)); + memcpy(tmp_surfaceid, src_surface, SURFACE_NUM * sizeof(VASurfaceID)); + memcpy(tmp_surfaceid + SURFACE_NUM, ref_surface, SURFACE_NUM * sizeof(VASurfaceID)); + + for (int i = 0; i < SURFACE_NUM; i++) { + gl_surfaces[i].src_surface = src_surface[i]; + gl_surfaces[i].ref_surface = ref_surface[i]; + } - /* create OpenGL objects */ - //glGenFramebuffers(SURFACE_NUM, fbos); - - for (i = 0; i < SURFACE_NUM; i++) { - if (use_zerocopy) { - gl_surfaces[i].y_tex = resource_pool->create_2d_texture(GL_R8, 1, 1); - gl_surfaces[i].cbcr_tex = resource_pool->create_2d_texture(GL_RG8, 1, 1); - } else { - gl_surfaces[i].y_tex = resource_pool->create_2d_texture(GL_R8, frame_width, frame_height); - gl_surfaces[i].cbcr_tex = resource_pool->create_2d_texture(GL_RG8, frame_width / 2, frame_height / 2); - - // Generate a PBO to read into. It doesn't necessarily fit 1:1 with the VA-API - // buffers, due to potentially differing pitch. - glGenBuffers(1, &gl_surfaces[i].pbo); - glBindBuffer(GL_PIXEL_PACK_BUFFER, gl_surfaces[i].pbo); - glBufferStorage(GL_PIXEL_PACK_BUFFER, frame_width * frame_height * 2, nullptr, GL_MAP_READ_BIT | GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT); - uint8_t *ptr = (uint8_t *)glMapBufferRange(GL_PIXEL_PACK_BUFFER, 0, frame_width * frame_height * 2, GL_MAP_READ_BIT | GL_MAP_PERSISTENT_BIT); - gl_surfaces[i].y_offset = 0; - gl_surfaces[i].cbcr_offset = frame_width * frame_height; - gl_surfaces[i].y_ptr = ptr + gl_surfaces[i].y_offset; - gl_surfaces[i].cbcr_ptr = ptr + gl_surfaces[i].cbcr_offset; - glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); - } - } + /* Create a context for this encode pipe */ + va_status = vaCreateContext(va_dpy, config_id, + frame_width_mbaligned, frame_height_mbaligned, + VA_PROGRESSIVE, + tmp_surfaceid, 2 * SURFACE_NUM, + &context_id); + CHECK_VASTATUS(va_status, "vaCreateContext"); + free(tmp_surfaceid); + + codedbuf_size = (frame_width_mbaligned * frame_height_mbaligned * 400) / (16*16); + + for (int i = 0; i < SURFACE_NUM; i++) { + /* create coded buffer once for all + * other VA buffers which won't be used again after vaRenderPicture. + * so APP can always vaCreateBuffer for every frame + * but coded buffer need to be mapped and accessed after vaRenderPicture/vaEndPicture + * so VA won't maintain the coded buffer + */ + va_status = vaCreateBuffer(va_dpy, context_id, VAEncCodedBufferType, + codedbuf_size, 1, NULL, &gl_surfaces[i].coded_buf); + CHECK_VASTATUS(va_status, "vaCreateBuffer"); + } + } - for (i = 0; i < SURFACE_NUM; i++) { - gl_surfaces[i].src_surface = src_surface[i]; - gl_surfaces[i].ref_surface = ref_surface[i]; - } - - return 0; + /* create OpenGL objects */ + for (int i = 0; i < SURFACE_NUM; i++) { + if (use_zerocopy) { + gl_surfaces[i].y_tex = resource_pool->create_2d_texture(GL_R8, 1, 1); + gl_surfaces[i].cbcr_tex = resource_pool->create_2d_texture(GL_RG8, 1, 1); + } else { + gl_surfaces[i].y_tex = resource_pool->create_2d_texture(GL_R8, frame_width, frame_height); + gl_surfaces[i].cbcr_tex = resource_pool->create_2d_texture(GL_RG8, frame_width / 2, frame_height / 2); + + // Generate a PBO to read into. It doesn't necessarily fit 1:1 with the VA-API + // buffers, due to potentially differing pitch. + glGenBuffers(1, &gl_surfaces[i].pbo); + glBindBuffer(GL_PIXEL_PACK_BUFFER, gl_surfaces[i].pbo); + glBufferStorage(GL_PIXEL_PACK_BUFFER, frame_width * frame_height * 2, nullptr, GL_MAP_READ_BIT | GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT); + uint8_t *ptr = (uint8_t *)glMapBufferRange(GL_PIXEL_PACK_BUFFER, 0, frame_width * frame_height * 2, GL_MAP_READ_BIT | GL_MAP_PERSISTENT_BIT); + gl_surfaces[i].y_offset = 0; + gl_surfaces[i].cbcr_offset = frame_width * frame_height; + gl_surfaces[i].y_ptr = ptr + gl_surfaces[i].y_offset; + gl_surfaces[i].cbcr_ptr = ptr + gl_surfaces[i].cbcr_offset; + glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); + } + } + + return 0; } // Given a list like 1 9 3 0 2 8 4 and a pivot element 3, will produce @@ -1541,23 +1542,27 @@ QuickSyncEncoderImpl::QuickSyncEncoderImpl(const std::string &filename, Resource //print_input(); - if (global_flags.x264_video_to_http) { + if (global_flags.x264_video_to_http || global_flags.x264_video_to_disk) { assert(x264_encoder != nullptr); } else { assert(x264_encoder == nullptr); } - init_va(va_display); + enable_zerocopy_if_possible(); + if (!global_flags.x264_video_to_disk) { + init_va(va_display); + } setup_encode(); - memset(&seq_param, 0, sizeof(seq_param)); - memset(&pic_param, 0, sizeof(pic_param)); - memset(&slice_param, 0, sizeof(slice_param)); + if (!global_flags.x264_video_to_disk) { + memset(&seq_param, 0, sizeof(seq_param)); + memset(&pic_param, 0, sizeof(pic_param)); + memset(&slice_param, 0, sizeof(slice_param)); + } storage_thread = thread(&QuickSyncEncoderImpl::storage_task_thread, this); encode_thread = thread([this]{ - //SDL_GL_MakeCurrent(window, context); QOpenGLContext *context = create_context(this->surface); eglBindAPI(EGL_OPENGL_API); if (!make_current(context, this->surface)) { @@ -1624,52 +1629,54 @@ bool QuickSyncEncoderImpl::begin_frame(int64_t pts, int64_t duration, YCbCrLumaC *y_tex = surf->y_tex; *cbcr_tex = surf->cbcr_tex; - VAStatus va_status = vaDeriveImage(va_dpy, surf->src_surface, &surf->surface_image); - CHECK_VASTATUS(va_status, "vaDeriveImage"); - - if (use_zerocopy) { - VABufferInfo buf_info; - buf_info.mem_type = VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME; // or VA_SURFACE_ATTRIB_MEM_TYPE_KERNEL_DRM? - va_status = vaAcquireBufferHandle(va_dpy, surf->surface_image.buf, &buf_info); - CHECK_VASTATUS(va_status, "vaAcquireBufferHandle"); - - // Create Y image. - surf->y_egl_image = EGL_NO_IMAGE_KHR; - EGLint y_attribs[] = { - EGL_WIDTH, frame_width, - EGL_HEIGHT, frame_height, - EGL_LINUX_DRM_FOURCC_EXT, fourcc_code('R', '8', ' ', ' '), - EGL_DMA_BUF_PLANE0_FD_EXT, EGLint(buf_info.handle), - EGL_DMA_BUF_PLANE0_OFFSET_EXT, EGLint(surf->surface_image.offsets[0]), - EGL_DMA_BUF_PLANE0_PITCH_EXT, EGLint(surf->surface_image.pitches[0]), - EGL_NONE - }; - - surf->y_egl_image = eglCreateImageKHR(eglGetCurrentDisplay(), EGL_NO_CONTEXT, EGL_LINUX_DMA_BUF_EXT, NULL, y_attribs); - assert(surf->y_egl_image != EGL_NO_IMAGE_KHR); - - // Associate Y image to a texture. - glBindTexture(GL_TEXTURE_2D, *y_tex); - glEGLImageTargetTexture2DOES(GL_TEXTURE_2D, surf->y_egl_image); - - // Create CbCr image. - surf->cbcr_egl_image = EGL_NO_IMAGE_KHR; - EGLint cbcr_attribs[] = { - EGL_WIDTH, frame_width, - EGL_HEIGHT, frame_height, - EGL_LINUX_DRM_FOURCC_EXT, fourcc_code('G', 'R', '8', '8'), - EGL_DMA_BUF_PLANE0_FD_EXT, EGLint(buf_info.handle), - EGL_DMA_BUF_PLANE0_OFFSET_EXT, EGLint(surf->surface_image.offsets[1]), - EGL_DMA_BUF_PLANE0_PITCH_EXT, EGLint(surf->surface_image.pitches[1]), - EGL_NONE - }; - - surf->cbcr_egl_image = eglCreateImageKHR(eglGetCurrentDisplay(), EGL_NO_CONTEXT, EGL_LINUX_DMA_BUF_EXT, NULL, cbcr_attribs); - assert(surf->cbcr_egl_image != EGL_NO_IMAGE_KHR); - - // Associate CbCr image to a texture. - glBindTexture(GL_TEXTURE_2D, *cbcr_tex); - glEGLImageTargetTexture2DOES(GL_TEXTURE_2D, surf->cbcr_egl_image); + if (!global_flags.x264_video_to_disk) { + VAStatus va_status = vaDeriveImage(va_dpy, surf->src_surface, &surf->surface_image); + CHECK_VASTATUS(va_status, "vaDeriveImage"); + + if (use_zerocopy) { + VABufferInfo buf_info; + buf_info.mem_type = VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME; // or VA_SURFACE_ATTRIB_MEM_TYPE_KERNEL_DRM? + va_status = vaAcquireBufferHandle(va_dpy, surf->surface_image.buf, &buf_info); + CHECK_VASTATUS(va_status, "vaAcquireBufferHandle"); + + // Create Y image. + surf->y_egl_image = EGL_NO_IMAGE_KHR; + EGLint y_attribs[] = { + EGL_WIDTH, frame_width, + EGL_HEIGHT, frame_height, + EGL_LINUX_DRM_FOURCC_EXT, fourcc_code('R', '8', ' ', ' '), + EGL_DMA_BUF_PLANE0_FD_EXT, EGLint(buf_info.handle), + EGL_DMA_BUF_PLANE0_OFFSET_EXT, EGLint(surf->surface_image.offsets[0]), + EGL_DMA_BUF_PLANE0_PITCH_EXT, EGLint(surf->surface_image.pitches[0]), + EGL_NONE + }; + + surf->y_egl_image = eglCreateImageKHR(eglGetCurrentDisplay(), EGL_NO_CONTEXT, EGL_LINUX_DMA_BUF_EXT, NULL, y_attribs); + assert(surf->y_egl_image != EGL_NO_IMAGE_KHR); + + // Associate Y image to a texture. + glBindTexture(GL_TEXTURE_2D, *y_tex); + glEGLImageTargetTexture2DOES(GL_TEXTURE_2D, surf->y_egl_image); + + // Create CbCr image. + surf->cbcr_egl_image = EGL_NO_IMAGE_KHR; + EGLint cbcr_attribs[] = { + EGL_WIDTH, frame_width, + EGL_HEIGHT, frame_height, + EGL_LINUX_DRM_FOURCC_EXT, fourcc_code('G', 'R', '8', '8'), + EGL_DMA_BUF_PLANE0_FD_EXT, EGLint(buf_info.handle), + EGL_DMA_BUF_PLANE0_OFFSET_EXT, EGLint(surf->surface_image.offsets[1]), + EGL_DMA_BUF_PLANE0_PITCH_EXT, EGLint(surf->surface_image.pitches[1]), + EGL_NONE + }; + + surf->cbcr_egl_image = eglCreateImageKHR(eglGetCurrentDisplay(), EGL_NO_CONTEXT, EGL_LINUX_DMA_BUF_EXT, NULL, cbcr_attribs); + assert(surf->cbcr_egl_image != EGL_NO_IMAGE_KHR); + + // Associate CbCr image to a texture. + glBindTexture(GL_TEXTURE_2D, *cbcr_tex); + glEGLImageTargetTexture2DOES(GL_TEXTURE_2D, surf->cbcr_egl_image); + } } current_video_frame = PendingFrame{ {}, input_frames, pts, duration, ycbcr_coefficients }; @@ -1758,12 +1765,18 @@ void QuickSyncEncoderImpl::shutdown() // Encode any leftover audio in the queues, and also any delayed frames. file_audio_encoder->encode_last_audio(); - release_encode(); - deinit_va(); - file_mux.reset(); + if (!global_flags.x264_video_to_disk) { + release_encode(); + deinit_va(); + } is_shutdown = true; } +void QuickSyncEncoderImpl::close_file() +{ + file_mux.reset(); +} + void QuickSyncEncoderImpl::open_output_file(const std::string &filename) { AVFormatContext *avctx = avformat_alloc_context(); @@ -1779,10 +1792,18 @@ void QuickSyncEncoderImpl::open_output_file(const std::string &filename) exit(1); } - string video_extradata = ""; // FIXME: See other comment about global headers. + string video_extradata; // FIXME: See other comment about global headers. + if (global_flags.x264_video_to_disk) { + video_extradata = x264_encoder->get_global_headers(); + } + AVCodecParametersWithDeleter audio_codecpar = file_audio_encoder->get_codec_parameters(); file_mux.reset(new Mux(avctx, frame_width, frame_height, Mux::CODEC_H264, video_extradata, audio_codecpar.get(), TIMEBASE, std::bind(&DiskSpaceEstimator::report_write, disk_space_estimator, filename, _1))); + + if (global_flags.x264_video_to_disk) { + x264_encoder->add_mux(file_mux.get()); + } } void QuickSyncEncoderImpl::encode_thread_func() @@ -1818,6 +1839,13 @@ void QuickSyncEncoderImpl::encode_thread_func() // Pass the frame on to x264 (or uncompressed to HTTP) as needed. // Note that this implicitly waits for the frame to be done rendering. pass_frame(frame, display_frame_num, frame.pts, frame.duration); + + if (global_flags.x264_video_to_disk) { + unique_lock lock(storage_task_queue_mutex); + release_gl_surface(display_frame_num); + continue; + } + reorder_buffer[display_frame_num] = move(frame); // Now encode as many QuickSync frames as we can using the frames we have available. @@ -1933,7 +1961,7 @@ void QuickSyncEncoderImpl::pass_frame(QuickSyncEncoderImpl::PendingFrame frame, uint8_t *data = reinterpret_cast(surf->y_ptr); if (global_flags.uncompressed_video_to_http) { add_packet_for_uncompressed_frame(pts, duration, data); - } else if (global_flags.x264_video_to_http) { + } else if (global_flags.x264_video_to_http || global_flags.x264_video_to_disk) { x264_encoder->add_frame(pts, duration, frame.ycbcr_coefficients, data, received_ts); } } @@ -2058,6 +2086,11 @@ void QuickSyncEncoder::shutdown() impl->shutdown(); } +void QuickSyncEncoder::close_file() +{ + impl->shutdown(); +} + void QuickSyncEncoder::set_stream_mux(Mux *mux) { impl->set_stream_mux(mux); diff --git a/quicksync_encoder.h b/quicksync_encoder.h index a247ee8..eebabbd 100644 --- a/quicksync_encoder.h +++ b/quicksync_encoder.h @@ -1,5 +1,10 @@ -// Hardware H.264 encoding via VAAPI. Heavily modified based on example -// code by Intel. Intel's original copyright and license is reproduced below: +// Hardware H.264 encoding via VAAPI. Also orchestrates the H.264 encoding +// in general; this is unfortunate, and probably needs a cleanup. In particular, +// even if you don't actually use Quick Sync for anything, this class +// (or actually, QuickSyncEncoderImpl) still takes on a pretty central role. +// +// Heavily modified based on example code by Intel. Intel's original copyright +// and license is reproduced below: // // Copyright (c) 2007-2013 Intel Corporation. All Rights Reserved. // @@ -64,6 +69,7 @@ public: bool begin_frame(int64_t pts, int64_t duration, movit::YCbCrLumaCoefficients ycbcr_coefficients, const std::vector &input_frames, GLuint *y_tex, GLuint *cbcr_tex); RefCountedGLsync end_frame(); void shutdown(); // Blocking. Does not require an OpenGL context. + void close_file(); // Does not require an OpenGL context. Must be run after shutdown. void release_gl_resources(); // Requires an OpenGL context. Must be run after shutdown. int64_t global_delay() const; // So we never get negative dts. diff --git a/quicksync_encoder_impl.h b/quicksync_encoder_impl.h index 679f2a2..1846132 100644 --- a/quicksync_encoder_impl.h +++ b/quicksync_encoder_impl.h @@ -39,6 +39,7 @@ public: bool begin_frame(int64_t pts, int64_t duration, movit::YCbCrLumaCoefficients ycbcr_coefficients, const std::vector &input_frames, GLuint *y_tex, GLuint *cbcr_tex); RefCountedGLsync end_frame(); void shutdown(); + void close_file(); void release_gl_resources(); void set_stream_mux(Mux *mux) { @@ -67,13 +68,14 @@ private: movit::YCbCrLumaCoefficients ycbcr_coefficients; }; struct GLSurface { + GLuint y_tex, cbcr_tex; + + // Only if x264_video_to_disk == false. VASurfaceID src_surface, ref_surface; VABufferID coded_buf; - VAImage surface_image; - GLuint y_tex, cbcr_tex; - // Only if use_zerocopy == true. + // Only if use_zerocopy == true (which implies x264_video_to_disk == false). EGLImage y_egl_image, cbcr_egl_image; // Only if use_zerocopy == false. diff --git a/video_encoder.cpp b/video_encoder.cpp index fe0ecb1..b7e36bd 100644 --- a/video_encoder.cpp +++ b/video_encoder.cpp @@ -57,7 +57,7 @@ VideoEncoder::VideoEncoder(ResourcePool *resource_pool, QSurface *surface, const } else { stream_audio_encoder.reset(new AudioEncoder(global_flags.stream_audio_codec_name, global_flags.stream_audio_codec_bitrate, oformat)); } - if (global_flags.x264_video_to_http) { + if (global_flags.x264_video_to_http || global_flags.x264_video_to_disk) { x264_encoder.reset(new X264Encoder(oformat)); } @@ -68,12 +68,15 @@ VideoEncoder::VideoEncoder(ResourcePool *resource_pool, QSurface *surface, const stream_audio_encoder->add_mux(stream_mux.get()); quicksync_encoder->set_stream_mux(stream_mux.get()); if (global_flags.x264_video_to_http) { - x264_encoder->set_mux(stream_mux.get()); + x264_encoder->add_mux(stream_mux.get()); } } VideoEncoder::~VideoEncoder() { + quicksync_encoder->shutdown(); + x264_encoder.reset(nullptr); + quicksync_encoder->close_file(); quicksync_encoder.reset(nullptr); while (quicksync_encoders_in_shutdown.load() > 0) { usleep(10000); @@ -94,8 +97,14 @@ void VideoEncoder::do_cut(int frame) stream_mux->plug(); lock_guard lock(qs_mu); QuickSyncEncoder *old_encoder = quicksync_encoder.release(); // When we go C++14, we can use move capture instead. - thread([old_encoder, this]{ + X264Encoder *old_x264_encoder = nullptr; + if (global_flags.x264_video_to_disk) { + old_x264_encoder = x264_encoder.release(); + } + thread([old_encoder, old_x264_encoder, this]{ old_encoder->shutdown(); + delete old_x264_encoder; + old_encoder->close_file(); stream_mux->unplug(); // We cannot delete the encoder here, as this thread has no OpenGL context. @@ -104,12 +113,23 @@ void VideoEncoder::do_cut(int frame) qs_needing_cleanup.emplace_back(old_encoder); }).detach(); + if (global_flags.x264_video_to_disk) { + x264_encoder.reset(new X264Encoder(oformat)); + if (global_flags.x264_video_to_http) { + x264_encoder->add_mux(stream_mux.get()); + } + if (overriding_bitrate != 0) { + x264_encoder->change_bitrate(overriding_bitrate); + } + } + quicksync_encoder.reset(new QuickSyncEncoder(filename, resource_pool, surface, va_display, width, height, oformat, x264_encoder.get(), disk_space_estimator)); quicksync_encoder->set_stream_mux(stream_mux.get()); } void VideoEncoder::change_x264_bitrate(unsigned rate_kbit) { + overriding_bitrate = rate_kbit; x264_encoder->change_bitrate(rate_kbit); } @@ -153,7 +173,7 @@ void VideoEncoder::open_output_stream() avctx->flags = AVFMT_FLAG_CUSTOM_IO; string video_extradata; - if (global_flags.x264_video_to_http) { + if (global_flags.x264_video_to_http || global_flags.x264_video_to_disk) { video_extradata = x264_encoder->get_global_headers(); } diff --git a/video_encoder.h b/video_encoder.h index 8578462..e1518ae 100644 --- a/video_encoder.h +++ b/video_encoder.h @@ -83,6 +83,7 @@ private: std::string stream_mux_header; std::atomic quicksync_encoders_in_shutdown{0}; + std::atomic overriding_bitrate{0}; // Encoders that are shutdown, but need to call release_gl_resources() // (or be deleted) from some thread with an OpenGL context. diff --git a/x264_encoder.cpp b/x264_encoder.cpp index 7d81d55..8e3b567 100644 --- a/x264_encoder.cpp +++ b/x264_encoder.cpp @@ -61,6 +61,8 @@ X264Encoder::~X264Encoder() void X264Encoder::add_frame(int64_t pts, int64_t duration, YCbCrLumaCoefficients ycbcr_coefficients, const uint8_t *data, const ReceivedTimestamps &received_ts) { + assert(!should_quit); + QueuedFrame qf; qf.pts = pts; qf.duration = duration; @@ -354,5 +356,7 @@ void X264Encoder::encode_frame(X264Encoder::QueuedFrame qf) } pkt.duration = reinterpret_cast(pic.opaque); - mux->add_packet(pkt, pic.i_pts, pic.i_dts); + for (Mux *mux : muxes) { + mux->add_packet(pkt, pic.i_pts, pic.i_dts); + } } diff --git a/x264_encoder.h b/x264_encoder.h index 9b6f74d..34cf702 100644 --- a/x264_encoder.h +++ b/x264_encoder.h @@ -24,6 +24,7 @@ #include #include #include +#include extern "C" { #include @@ -45,7 +46,7 @@ public: ~X264Encoder(); // Must be called before first frame. Does not take ownership. - void set_mux(Mux *mux) { this->mux = mux; } + void add_mux(Mux *mux) { muxes.push_back(mux); } // is taken to be raw NV12 data of WIDTHxHEIGHT resolution. // Does not block. @@ -78,7 +79,7 @@ private: // pool. std::unique_ptr frame_pool; - Mux *mux = nullptr; + std::vector muxes; bool wants_global_headers; std::string global_headers; -- 2.39.2