#include "quicksync_encoder.h"
+#include <movit/image_format.h>
#include <movit/resource_pool.h> // Must be above the Xlib includes.
#include <movit/util.h>
#include "timebase.h"
#include "x264_encoder.h"
+using namespace movit;
using namespace std;
using namespace std::chrono;
using namespace std::placeholders;
class QOpenGLContext;
class QSurface;
+namespace {
+
+// These need to survive several QuickSyncEncoderImpl instances,
+// so they are outside.
+once_flag quick_sync_metrics_inited;
+LatencyHistogram mixer_latency_histogram, qs_latency_histogram;
+MuxMetrics current_file_mux_metrics, total_mux_metrics;
+std::atomic<double> metric_current_file_start_time_seconds{0.0 / 0.0};
+std::atomic<int64_t> metric_quick_sync_stalled_frames{0};
+
+} // namespace
+
#define CHECK_VASTATUS(va_status, func) \
if (va_status != VA_STATUS_SUCCESS) { \
fprintf(stderr, "%s:%d (%s) failed with %d\n", __func__, __LINE__, func, va_status); \
static constexpr unsigned int MaxPicOrderCntLsb = (2<<8);
static constexpr unsigned int Log2MaxFrameNum = 16;
static constexpr unsigned int Log2MaxPicOrderCntLsb = 8;
-static constexpr int rc_default_modes[] = { // Priority list of modes.
- VA_RC_VBR,
- VA_RC_CQP,
- VA_RC_VBR_CONSTRAINED,
- VA_RC_CBR,
- VA_RC_VCM,
- VA_RC_NONE,
-};
using namespace std;
bitstream_put_ui(bs, nal_unit_type, 5);
}
-void QuickSyncEncoderImpl::sps_rbsp(bitstream *bs)
+void QuickSyncEncoderImpl::sps_rbsp(YCbCrLumaCoefficients ycbcr_coefficients, bitstream *bs)
{
int profile_idc = PROFILE_IDC_BASELINE;
bitstream_put_ui(bs, 1, 1); /* colour_description_present_flag */
{
bitstream_put_ui(bs, 1, 8); /* colour_primaries (1 = BT.709) */
- bitstream_put_ui(bs, 2, 8); /* transfer_characteristics (2 = unspecified, since we use sRGB) */
- if (global_flags.ycbcr_rec709_coefficients) {
+ bitstream_put_ui(bs, 13, 8); /* transfer_characteristics (13 = sRGB) */
+ if (ycbcr_coefficients == YCBCR_REC_709) {
bitstream_put_ui(bs, 1, 8); /* matrix_coefficients (1 = BT.709) */
} else {
+ assert(ycbcr_coefficients == YCBCR_REC_601);
bitstream_put_ui(bs, 6, 8); /* matrix_coefficients (6 = BT.601/SMPTE 170M) */
}
}
}
int
-QuickSyncEncoderImpl::build_packed_seq_buffer(unsigned char **header_buffer)
+QuickSyncEncoderImpl::build_packed_seq_buffer(YCbCrLumaCoefficients ycbcr_coefficients, unsigned char **header_buffer)
{
bitstream bs;
bitstream_start(&bs);
nal_start_code_prefix(&bs);
nal_header(&bs, NAL_REF_IDC_HIGH, NAL_SPS);
- sps_rbsp(&bs);
+ sps_rbsp(ycbcr_coefficients, &bs);
bitstream_end(&bs);
*header_buffer = (unsigned char *)bs.buffer;
}
-static const char *rc_to_string(int rc_mode)
-{
- switch (rc_mode) {
- case VA_RC_NONE:
- return "NONE";
- case VA_RC_CBR:
- return "CBR";
- case VA_RC_VBR:
- return "VBR";
- case VA_RC_VCM:
- return "VCM";
- case VA_RC_CQP:
- return "CQP";
- case VA_RC_VBR_CONSTRAINED:
- return "VBR_CONSTRAINED";
- default:
- return "Unknown";
- }
-}
-
void QuickSyncEncoderImpl::enable_zerocopy_if_possible()
{
- if (global_flags.uncompressed_video_to_http) {
+ if (global_flags.x264_video_to_disk) {
+ // Quick Sync is entirely disabled.
+ use_zerocopy = false;
+ } else if (global_flags.uncompressed_video_to_http) {
fprintf(stderr, "Disabling zerocopy H.264 encoding due to --http-uncompressed-video.\n");
use_zerocopy = false;
} else if (global_flags.x264_video_to_http) {
} else {
use_zerocopy = true;
}
+ global_flags.use_zerocopy = use_zerocopy;
}
VADisplay QuickSyncEncoderImpl::va_open_display(const string &va_display)
fprintf(stderr, "error: can't connect to X server!\n");
return NULL;
}
- enable_zerocopy_if_possible();
return vaGetDisplay(x11_display);
} else if (va_display[0] != '/') {
x11_display = XOpenDisplay(va_display.c_str());
fprintf(stderr, "error: can't connect to X server!\n");
return NULL;
}
- enable_zerocopy_if_possible();
return vaGetDisplay(x11_display);
} else {
drm_fd = open(va_display.c_str(), O_RDWR);
}
if (attrib[VAConfigAttribRateControl].value != VA_ATTRIB_NOT_SUPPORTED) {
- int tmp = attrib[VAConfigAttribRateControl].value;
-
- if (rc_mode == -1 || !(rc_mode & tmp)) {
- if (rc_mode != -1) {
- printf("Warning: Don't support the specified RateControl mode: %s!!!, switch to ", rc_to_string(rc_mode));
- }
-
- for (i = 0; i < sizeof(rc_default_modes) / sizeof(rc_default_modes[0]); i++) {
- if (rc_default_modes[i] & tmp) {
- rc_mode = rc_default_modes[i];
- break;
- }
- }
+ if (!(attrib[VAConfigAttribRateControl].value & VA_RC_CQP)) {
+ fprintf(stderr, "ERROR: VA-API encoder does not support CQP mode.\n");
+ exit(1);
}
config_attrib[config_attrib_num].type = VAConfigAttribRateControl;
- config_attrib[config_attrib_num].value = rc_mode;
+ config_attrib[config_attrib_num].value = VA_RC_CQP;
config_attrib_num++;
}
int QuickSyncEncoderImpl::setup_encode()
{
- VAStatus va_status;
- VASurfaceID *tmp_surfaceid;
- int codedbuf_size, i;
- VASurfaceID src_surface[SURFACE_NUM];
- VASurfaceID ref_surface[SURFACE_NUM];
-
- va_status = vaCreateConfig(va_dpy, h264_profile, VAEntrypointEncSlice,
- &config_attrib[0], config_attrib_num, &config_id);
- CHECK_VASTATUS(va_status, "vaCreateConfig");
-
- /* create source surfaces */
- va_status = vaCreateSurfaces(va_dpy,
- VA_RT_FORMAT_YUV420, frame_width_mbaligned, frame_height_mbaligned,
- &src_surface[0], SURFACE_NUM,
- NULL, 0);
- CHECK_VASTATUS(va_status, "vaCreateSurfaces");
-
- /* create reference surfaces */
- va_status = vaCreateSurfaces(va_dpy,
- VA_RT_FORMAT_YUV420, frame_width_mbaligned, frame_height_mbaligned,
- &ref_surface[0], SURFACE_NUM,
- NULL, 0);
- CHECK_VASTATUS(va_status, "vaCreateSurfaces");
-
- tmp_surfaceid = (VASurfaceID *)calloc(2 * SURFACE_NUM, sizeof(VASurfaceID));
- memcpy(tmp_surfaceid, src_surface, SURFACE_NUM * sizeof(VASurfaceID));
- memcpy(tmp_surfaceid + SURFACE_NUM, ref_surface, SURFACE_NUM * sizeof(VASurfaceID));
-
- /* Create a context for this encode pipe */
- va_status = vaCreateContext(va_dpy, config_id,
- frame_width_mbaligned, frame_height_mbaligned,
- VA_PROGRESSIVE,
- tmp_surfaceid, 2 * SURFACE_NUM,
- &context_id);
- CHECK_VASTATUS(va_status, "vaCreateContext");
- free(tmp_surfaceid);
-
- codedbuf_size = (frame_width_mbaligned * frame_height_mbaligned * 400) / (16*16);
-
- for (i = 0; i < SURFACE_NUM; i++) {
- /* create coded buffer once for all
- * other VA buffers which won't be used again after vaRenderPicture.
- * so APP can always vaCreateBuffer for every frame
- * but coded buffer need to be mapped and accessed after vaRenderPicture/vaEndPicture
- * so VA won't maintain the coded buffer
- */
- va_status = vaCreateBuffer(va_dpy, context_id, VAEncCodedBufferType,
- codedbuf_size, 1, NULL, &gl_surfaces[i].coded_buf);
- CHECK_VASTATUS(va_status, "vaCreateBuffer");
- }
+ if (!global_flags.x264_video_to_disk) {
+ VAStatus va_status;
+ VASurfaceID *tmp_surfaceid;
+ int codedbuf_size;
+ VASurfaceID src_surface[SURFACE_NUM];
+ VASurfaceID ref_surface[SURFACE_NUM];
+
+ va_status = vaCreateConfig(va_dpy, h264_profile, VAEntrypointEncSlice,
+ &config_attrib[0], config_attrib_num, &config_id);
+ CHECK_VASTATUS(va_status, "vaCreateConfig");
+
+ /* create source surfaces */
+ va_status = vaCreateSurfaces(va_dpy,
+ VA_RT_FORMAT_YUV420, frame_width_mbaligned, frame_height_mbaligned,
+ &src_surface[0], SURFACE_NUM,
+ NULL, 0);
+ CHECK_VASTATUS(va_status, "vaCreateSurfaces");
+
+ /* create reference surfaces */
+ va_status = vaCreateSurfaces(va_dpy,
+ VA_RT_FORMAT_YUV420, frame_width_mbaligned, frame_height_mbaligned,
+ &ref_surface[0], SURFACE_NUM,
+ NULL, 0);
+ CHECK_VASTATUS(va_status, "vaCreateSurfaces");
+
+ tmp_surfaceid = (VASurfaceID *)calloc(2 * SURFACE_NUM, sizeof(VASurfaceID));
+ memcpy(tmp_surfaceid, src_surface, SURFACE_NUM * sizeof(VASurfaceID));
+ memcpy(tmp_surfaceid + SURFACE_NUM, ref_surface, SURFACE_NUM * sizeof(VASurfaceID));
+
+ for (int i = 0; i < SURFACE_NUM; i++) {
+ gl_surfaces[i].src_surface = src_surface[i];
+ gl_surfaces[i].ref_surface = ref_surface[i];
+ }
- /* create OpenGL objects */
- //glGenFramebuffers(SURFACE_NUM, fbos);
-
- for (i = 0; i < SURFACE_NUM; i++) {
- if (use_zerocopy) {
- gl_surfaces[i].y_tex = resource_pool->create_2d_texture(GL_R8, 1, 1);
- gl_surfaces[i].cbcr_tex = resource_pool->create_2d_texture(GL_RG8, 1, 1);
- } else {
- gl_surfaces[i].y_tex = resource_pool->create_2d_texture(GL_R8, frame_width, frame_height);
- gl_surfaces[i].cbcr_tex = resource_pool->create_2d_texture(GL_RG8, frame_width / 2, frame_height / 2);
-
- // Generate a PBO to read into. It doesn't necessarily fit 1:1 with the VA-API
- // buffers, due to potentially differing pitch.
- glGenBuffers(1, &gl_surfaces[i].pbo);
- glBindBuffer(GL_PIXEL_PACK_BUFFER, gl_surfaces[i].pbo);
- glBufferStorage(GL_PIXEL_PACK_BUFFER, frame_width * frame_height * 2, nullptr, GL_MAP_READ_BIT | GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT);
- uint8_t *ptr = (uint8_t *)glMapBufferRange(GL_PIXEL_PACK_BUFFER, 0, frame_width * frame_height * 2, GL_MAP_READ_BIT | GL_MAP_PERSISTENT_BIT);
- gl_surfaces[i].y_offset = 0;
- gl_surfaces[i].cbcr_offset = frame_width * frame_height;
- gl_surfaces[i].y_ptr = ptr + gl_surfaces[i].y_offset;
- gl_surfaces[i].cbcr_ptr = ptr + gl_surfaces[i].cbcr_offset;
- glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
- }
- }
+ /* Create a context for this encode pipe */
+ va_status = vaCreateContext(va_dpy, config_id,
+ frame_width_mbaligned, frame_height_mbaligned,
+ VA_PROGRESSIVE,
+ tmp_surfaceid, 2 * SURFACE_NUM,
+ &context_id);
+ CHECK_VASTATUS(va_status, "vaCreateContext");
+ free(tmp_surfaceid);
+
+ codedbuf_size = (frame_width_mbaligned * frame_height_mbaligned * 400) / (16*16);
+
+ for (int i = 0; i < SURFACE_NUM; i++) {
+ /* create coded buffer once for all
+ * other VA buffers which won't be used again after vaRenderPicture.
+ * so APP can always vaCreateBuffer for every frame
+ * but coded buffer need to be mapped and accessed after vaRenderPicture/vaEndPicture
+ * so VA won't maintain the coded buffer
+ */
+ va_status = vaCreateBuffer(va_dpy, context_id, VAEncCodedBufferType,
+ codedbuf_size, 1, NULL, &gl_surfaces[i].coded_buf);
+ CHECK_VASTATUS(va_status, "vaCreateBuffer");
+ }
+ }
- for (i = 0; i < SURFACE_NUM; i++) {
- gl_surfaces[i].src_surface = src_surface[i];
- gl_surfaces[i].ref_surface = ref_surface[i];
- }
-
- return 0;
+ /* create OpenGL objects */
+ for (int i = 0; i < SURFACE_NUM; i++) {
+ if (use_zerocopy) {
+ gl_surfaces[i].y_tex = resource_pool->create_2d_texture(GL_R8, 1, 1);
+ gl_surfaces[i].cbcr_tex = resource_pool->create_2d_texture(GL_RG8, 1, 1);
+ } else {
+ size_t bytes_per_pixel = (global_flags.x264_bit_depth > 8) ? 2 : 1;
+
+ // Generate a PBO to read into. It doesn't necessarily fit 1:1 with the VA-API
+ // buffers, due to potentially differing pitch.
+ glGenBuffers(1, &gl_surfaces[i].pbo);
+ glBindBuffer(GL_PIXEL_PACK_BUFFER, gl_surfaces[i].pbo);
+ glBufferStorage(GL_PIXEL_PACK_BUFFER, frame_width * frame_height * 2 * bytes_per_pixel, nullptr, GL_MAP_READ_BIT | GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT);
+ uint8_t *ptr = (uint8_t *)glMapBufferRange(GL_PIXEL_PACK_BUFFER, 0, frame_width * frame_height * 2 * bytes_per_pixel, GL_MAP_READ_BIT | GL_MAP_PERSISTENT_BIT);
+ gl_surfaces[i].y_offset = 0;
+ gl_surfaces[i].cbcr_offset = frame_width * frame_height * bytes_per_pixel;
+ gl_surfaces[i].y_ptr = ptr + gl_surfaces[i].y_offset;
+ gl_surfaces[i].cbcr_ptr = ptr + gl_surfaces[i].cbcr_offset;
+ glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
+ }
+ }
+
+ return 0;
}
// Given a list like 1 9 3 0 2 8 4 and a pivot element 3, will produce
return 0;
}
-int QuickSyncEncoderImpl::render_packedsequence()
+int QuickSyncEncoderImpl::render_packedsequence(YCbCrLumaCoefficients ycbcr_coefficients)
{
VAEncPackedHeaderParameterBuffer packedheader_param_buffer;
VABufferID packedseq_para_bufid, packedseq_data_bufid, render_id[2];
unsigned char *packedseq_buffer = NULL;
VAStatus va_status;
- length_in_bits = build_packed_seq_buffer(&packedseq_buffer);
+ length_in_bits = build_packed_seq_buffer(ycbcr_coefficients, &packedseq_buffer);
packedheader_param_buffer.type = VAEncPackedHeaderSequence;
vaUnmapBuffer(va_dpy, surf->coded_buf);
static int frameno = 0;
- print_latency("Current QuickSync latency (video inputs → disk mux):",
- task.received_ts, (task.frame_type == FRAME_B), &frameno);
+ print_latency("Current Quick Sync latency (video inputs → disk mux):",
+ task.received_ts, (task.frame_type == FRAME_B), &frameno, &qs_latency_histogram);
{
// Add video.
}
for (unsigned i = 0; i < SURFACE_NUM; i++) {
- if (!use_zerocopy) {
+ if (use_zerocopy) {
+ resource_pool->release_2d_texture(gl_surfaces[i].y_tex);
+ resource_pool->release_2d_texture(gl_surfaces[i].cbcr_tex);
+ } else {
glBindBuffer(GL_PIXEL_PACK_BUFFER, gl_surfaces[i].pbo);
glUnmapBuffer(GL_PIXEL_PACK_BUFFER);
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
glDeleteBuffers(1, &gl_surfaces[i].pbo);
}
- resource_pool->release_2d_texture(gl_surfaces[i].y_tex);
- resource_pool->release_2d_texture(gl_surfaces[i].cbcr_tex);
}
has_released_gl_resources = true;
return 0;
}
-QuickSyncEncoderImpl::QuickSyncEncoderImpl(const std::string &filename, movit::ResourcePool *resource_pool, QSurface *surface, const string &va_display, int width, int height, AVOutputFormat *oformat, X264Encoder *x264_encoder, DiskSpaceEstimator *disk_space_estimator)
+QuickSyncEncoderImpl::QuickSyncEncoderImpl(const std::string &filename, ResourcePool *resource_pool, QSurface *surface, const string &va_display, int width, int height, AVOutputFormat *oformat, X264Encoder *x264_encoder, DiskSpaceEstimator *disk_space_estimator)
: current_storage_frame(0), resource_pool(resource_pool), surface(surface), x264_encoder(x264_encoder), frame_width(width), frame_height(height), disk_space_estimator(disk_space_estimator)
{
file_audio_encoder.reset(new AudioEncoder(AUDIO_OUTPUT_CODEC_NAME, DEFAULT_AUDIO_OUTPUT_BIT_RATE, oformat));
//print_input();
- if (global_flags.x264_video_to_http) {
+ if (global_flags.x264_video_to_http || global_flags.x264_video_to_disk) {
assert(x264_encoder != nullptr);
} else {
assert(x264_encoder == nullptr);
}
- init_va(va_display);
+ enable_zerocopy_if_possible();
+ if (!global_flags.x264_video_to_disk) {
+ init_va(va_display);
+ }
setup_encode();
- memset(&seq_param, 0, sizeof(seq_param));
- memset(&pic_param, 0, sizeof(pic_param));
- memset(&slice_param, 0, sizeof(slice_param));
+ if (!global_flags.x264_video_to_disk) {
+ memset(&seq_param, 0, sizeof(seq_param));
+ memset(&pic_param, 0, sizeof(pic_param));
+ memset(&slice_param, 0, sizeof(slice_param));
+ }
+
+ call_once(quick_sync_metrics_inited, [](){
+ mixer_latency_histogram.init("mixer");
+ qs_latency_histogram.init("quick_sync");
+ current_file_mux_metrics.init({{ "destination", "current_file" }});
+ total_mux_metrics.init({{ "destination", "files_total" }});
+ global_metrics.add("current_file_start_time_seconds", &metric_current_file_start_time_seconds, Metrics::TYPE_GAUGE);
+ global_metrics.add("quick_sync_stalled_frames", &metric_quick_sync_stalled_frames);
+ });
storage_thread = thread(&QuickSyncEncoderImpl::storage_task_thread, this);
encode_thread = thread([this]{
- //SDL_GL_MakeCurrent(window, context);
QOpenGLContext *context = create_context(this->surface);
eglBindAPI(EGL_OPENGL_API);
if (!make_current(context, this->surface)) {
}
}
-bool QuickSyncEncoderImpl::begin_frame(int64_t pts, int64_t duration, const vector<RefCountedFrame> &input_frames, GLuint *y_tex, GLuint *cbcr_tex)
+bool QuickSyncEncoderImpl::is_zerocopy() const
+{
+ return use_zerocopy;
+}
+
+bool QuickSyncEncoderImpl::begin_frame(int64_t pts, int64_t duration, YCbCrLumaCoefficients ycbcr_coefficients, const vector<RefCountedFrame> &input_frames, GLuint *y_tex, GLuint *cbcr_tex)
{
assert(!is_shutdown);
GLSurface *surf = nullptr;
if (surf == nullptr) {
fprintf(stderr, "Warning: No free slots for frame %d, rendering has to wait for H.264 encoder\n",
current_storage_frame);
+ ++metric_quick_sync_stalled_frames;
storage_task_queue_changed.wait(lock, [this, &surf]{
if (storage_thread_should_quit)
return true;
surface_for_frame[current_storage_frame] = surf;
}
- *y_tex = surf->y_tex;
- *cbcr_tex = surf->cbcr_tex;
-
- VAStatus va_status = vaDeriveImage(va_dpy, surf->src_surface, &surf->surface_image);
- CHECK_VASTATUS(va_status, "vaDeriveImage");
-
if (use_zerocopy) {
- VABufferInfo buf_info;
- buf_info.mem_type = VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME; // or VA_SURFACE_ATTRIB_MEM_TYPE_KERNEL_DRM?
- va_status = vaAcquireBufferHandle(va_dpy, surf->surface_image.buf, &buf_info);
- CHECK_VASTATUS(va_status, "vaAcquireBufferHandle");
-
- // Create Y image.
- surf->y_egl_image = EGL_NO_IMAGE_KHR;
- EGLint y_attribs[] = {
- EGL_WIDTH, frame_width,
- EGL_HEIGHT, frame_height,
- EGL_LINUX_DRM_FOURCC_EXT, fourcc_code('R', '8', ' ', ' '),
- EGL_DMA_BUF_PLANE0_FD_EXT, EGLint(buf_info.handle),
- EGL_DMA_BUF_PLANE0_OFFSET_EXT, EGLint(surf->surface_image.offsets[0]),
- EGL_DMA_BUF_PLANE0_PITCH_EXT, EGLint(surf->surface_image.pitches[0]),
- EGL_NONE
- };
-
- surf->y_egl_image = eglCreateImageKHR(eglGetCurrentDisplay(), EGL_NO_CONTEXT, EGL_LINUX_DMA_BUF_EXT, NULL, y_attribs);
- assert(surf->y_egl_image != EGL_NO_IMAGE_KHR);
-
- // Associate Y image to a texture.
- glBindTexture(GL_TEXTURE_2D, *y_tex);
- glEGLImageTargetTexture2DOES(GL_TEXTURE_2D, surf->y_egl_image);
-
- // Create CbCr image.
- surf->cbcr_egl_image = EGL_NO_IMAGE_KHR;
- EGLint cbcr_attribs[] = {
- EGL_WIDTH, frame_width,
- EGL_HEIGHT, frame_height,
- EGL_LINUX_DRM_FOURCC_EXT, fourcc_code('G', 'R', '8', '8'),
- EGL_DMA_BUF_PLANE0_FD_EXT, EGLint(buf_info.handle),
- EGL_DMA_BUF_PLANE0_OFFSET_EXT, EGLint(surf->surface_image.offsets[1]),
- EGL_DMA_BUF_PLANE0_PITCH_EXT, EGLint(surf->surface_image.pitches[1]),
- EGL_NONE
- };
-
- surf->cbcr_egl_image = eglCreateImageKHR(eglGetCurrentDisplay(), EGL_NO_CONTEXT, EGL_LINUX_DMA_BUF_EXT, NULL, cbcr_attribs);
- assert(surf->cbcr_egl_image != EGL_NO_IMAGE_KHR);
-
- // Associate CbCr image to a texture.
- glBindTexture(GL_TEXTURE_2D, *cbcr_tex);
- glEGLImageTargetTexture2DOES(GL_TEXTURE_2D, surf->cbcr_egl_image);
+ *y_tex = surf->y_tex;
+ *cbcr_tex = surf->cbcr_tex;
+ } else {
+ surf->y_tex = *y_tex;
+ surf->cbcr_tex = *cbcr_tex;
+ }
+
+ if (!global_flags.x264_video_to_disk) {
+ VAStatus va_status = vaDeriveImage(va_dpy, surf->src_surface, &surf->surface_image);
+ CHECK_VASTATUS(va_status, "vaDeriveImage");
+
+ if (use_zerocopy) {
+ VABufferInfo buf_info;
+ buf_info.mem_type = VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME; // or VA_SURFACE_ATTRIB_MEM_TYPE_KERNEL_DRM?
+ va_status = vaAcquireBufferHandle(va_dpy, surf->surface_image.buf, &buf_info);
+ CHECK_VASTATUS(va_status, "vaAcquireBufferHandle");
+
+ // Create Y image.
+ surf->y_egl_image = EGL_NO_IMAGE_KHR;
+ EGLint y_attribs[] = {
+ EGL_WIDTH, frame_width,
+ EGL_HEIGHT, frame_height,
+ EGL_LINUX_DRM_FOURCC_EXT, fourcc_code('R', '8', ' ', ' '),
+ EGL_DMA_BUF_PLANE0_FD_EXT, EGLint(buf_info.handle),
+ EGL_DMA_BUF_PLANE0_OFFSET_EXT, EGLint(surf->surface_image.offsets[0]),
+ EGL_DMA_BUF_PLANE0_PITCH_EXT, EGLint(surf->surface_image.pitches[0]),
+ EGL_NONE
+ };
+
+ surf->y_egl_image = eglCreateImageKHR(eglGetCurrentDisplay(), EGL_NO_CONTEXT, EGL_LINUX_DMA_BUF_EXT, NULL, y_attribs);
+ assert(surf->y_egl_image != EGL_NO_IMAGE_KHR);
+
+ // Associate Y image to a texture.
+ glBindTexture(GL_TEXTURE_2D, *y_tex);
+ glEGLImageTargetTexture2DOES(GL_TEXTURE_2D, surf->y_egl_image);
+
+ // Create CbCr image.
+ surf->cbcr_egl_image = EGL_NO_IMAGE_KHR;
+ EGLint cbcr_attribs[] = {
+ EGL_WIDTH, frame_width / 2,
+ EGL_HEIGHT, frame_height / 2,
+ EGL_LINUX_DRM_FOURCC_EXT, fourcc_code('G', 'R', '8', '8'),
+ EGL_DMA_BUF_PLANE0_FD_EXT, EGLint(buf_info.handle),
+ EGL_DMA_BUF_PLANE0_OFFSET_EXT, EGLint(surf->surface_image.offsets[1]),
+ EGL_DMA_BUF_PLANE0_PITCH_EXT, EGLint(surf->surface_image.pitches[1]),
+ EGL_NONE
+ };
+
+ surf->cbcr_egl_image = eglCreateImageKHR(eglGetCurrentDisplay(), EGL_NO_CONTEXT, EGL_LINUX_DMA_BUF_EXT, NULL, cbcr_attribs);
+ assert(surf->cbcr_egl_image != EGL_NO_IMAGE_KHR);
+
+ // Associate CbCr image to a texture.
+ glBindTexture(GL_TEXTURE_2D, *cbcr_tex);
+ glEGLImageTargetTexture2DOES(GL_TEXTURE_2D, surf->cbcr_egl_image);
+ }
}
- current_video_frame = PendingFrame{ {}, input_frames, pts, duration };
+ current_video_frame = PendingFrame{ {}, input_frames, pts, duration, ycbcr_coefficients };
return true;
}
void QuickSyncEncoderImpl::add_audio(int64_t pts, vector<float> audio)
{
+ lock_guard<mutex> lock(file_audio_encoder_mutex);
assert(!is_shutdown);
file_audio_encoder->encode_audio(audio, pts + global_delay());
}
assert(!is_shutdown);
if (!use_zerocopy) {
+ GLenum type = global_flags.x264_bit_depth > 8 ? GL_UNSIGNED_SHORT : GL_UNSIGNED_BYTE;
GLSurface *surf;
{
unique_lock<mutex> lock(storage_task_queue_mutex);
glBindTexture(GL_TEXTURE_2D, surf->y_tex);
check_error();
- glGetTexImage(GL_TEXTURE_2D, 0, GL_RED, GL_UNSIGNED_BYTE, BUFFER_OFFSET(surf->y_offset));
+ glGetTexImage(GL_TEXTURE_2D, 0, GL_RED, type, BUFFER_OFFSET(surf->y_offset));
check_error();
glBindTexture(GL_TEXTURE_2D, surf->cbcr_tex);
check_error();
- glGetTexImage(GL_TEXTURE_2D, 0, GL_RG, GL_UNSIGNED_BYTE, BUFFER_OFFSET(surf->cbcr_offset));
+ glGetTexImage(GL_TEXTURE_2D, 0, GL_RG, type, BUFFER_OFFSET(surf->cbcr_offset));
check_error();
+ // We don't own these; the caller does.
+ surf->y_tex = surf->cbcr_tex = 0;
+
glBindTexture(GL_TEXTURE_2D, 0);
check_error();
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
storage_thread.join();
// Encode any leftover audio in the queues, and also any delayed frames.
- file_audio_encoder->encode_last_audio();
+ {
+ lock_guard<mutex> lock(file_audio_encoder_mutex);
+ file_audio_encoder->encode_last_audio();
+ }
- release_encode();
- deinit_va();
- file_mux.reset();
+ if (!global_flags.x264_video_to_disk) {
+ release_encode();
+ deinit_va();
+ }
is_shutdown = true;
}
+void QuickSyncEncoderImpl::close_file()
+{
+ file_mux.reset();
+ metric_current_file_start_time_seconds = 0.0 / 0.0;
+}
+
void QuickSyncEncoderImpl::open_output_file(const std::string &filename)
{
AVFormatContext *avctx = avformat_alloc_context();
exit(1);
}
- string video_extradata = ""; // FIXME: See other comment about global headers.
- AVCodecParametersWithDeleter audio_codecpar = file_audio_encoder->get_codec_parameters();
- file_mux.reset(new Mux(avctx, frame_width, frame_height, Mux::CODEC_H264, video_extradata, audio_codecpar.get(), TIMEBASE,
- std::bind(&DiskSpaceEstimator::report_write, disk_space_estimator, filename, _1)));
+ string video_extradata; // FIXME: See other comment about global headers.
+ if (global_flags.x264_video_to_disk) {
+ video_extradata = x264_encoder->get_global_headers();
+ }
+
+ current_file_mux_metrics.reset();
+
+ {
+ lock_guard<mutex> lock(file_audio_encoder_mutex);
+ AVCodecParametersWithDeleter audio_codecpar = file_audio_encoder->get_codec_parameters();
+ file_mux.reset(new Mux(avctx, frame_width, frame_height, Mux::CODEC_H264, video_extradata, audio_codecpar.get(), TIMEBASE,
+ std::bind(&DiskSpaceEstimator::report_write, disk_space_estimator, filename, _1),
+ Mux::WRITE_BACKGROUND,
+ { ¤t_file_mux_metrics, &total_mux_metrics }));
+ }
+ metric_current_file_start_time_seconds = get_timestamp_for_metrics();
+
+ if (global_flags.x264_video_to_disk) {
+ x264_encoder->add_mux(file_mux.get());
+ }
}
void QuickSyncEncoderImpl::encode_thread_func()
// Pass the frame on to x264 (or uncompressed to HTTP) as needed.
// Note that this implicitly waits for the frame to be done rendering.
pass_frame(frame, display_frame_num, frame.pts, frame.duration);
+
+ if (global_flags.x264_video_to_disk) {
+ unique_lock<mutex> lock(storage_task_queue_mutex);
+ release_gl_surface(display_frame_num);
+ continue;
+ }
+
reorder_buffer[display_frame_num] = move(frame);
// Now encode as many QuickSync frames as we can using the frames we have available.
}
last_dts = dts;
- encode_frame(frame, quicksync_encoding_frame_num, quicksync_display_frame_num, gop_start_display_frame_num, frame_type, frame.pts, dts, frame.duration);
+ encode_frame(frame, quicksync_encoding_frame_num, quicksync_display_frame_num, gop_start_display_frame_num, frame_type, frame.pts, dts, frame.duration, frame.ycbcr_coefficients);
++quicksync_encoding_frame_num;
}
}
PendingFrame frame = move(pending_frame.second);
int64_t dts = last_dts + (TIMEBASE / MAX_FPS);
printf("Finalizing encode: Encoding leftover frame %d as P-frame instead of B-frame.\n", display_frame_num);
- encode_frame(frame, encoding_frame_num++, display_frame_num, gop_start_display_frame_num, FRAME_P, frame.pts, dts, frame.duration);
+ encode_frame(frame, encoding_frame_num++, display_frame_num, gop_start_display_frame_num, FRAME_P, frame.pts, dts, frame.duration, frame.ycbcr_coefficients);
last_dts = dts;
}
}
// Wait for the GPU to be done with the frame.
GLenum sync_status;
do {
- sync_status = glClientWaitSync(frame.fence.get(), 0, 1000000000);
+ sync_status = glClientWaitSync(frame.fence.get(), 0, 0);
check_error();
+ if (sync_status == GL_TIMEOUT_EXPIRED) {
+ // NVIDIA likes to busy-wait; yield instead.
+ this_thread::sleep_for(milliseconds(1));
+ }
} while (sync_status == GL_TIMEOUT_EXPIRED);
assert(sync_status != GL_WAIT_FAILED);
ReceivedTimestamps received_ts = find_received_timestamp(frame.input_frames);
static int frameno = 0;
print_latency("Current mixer latency (video inputs → ready for encode):",
- received_ts, false, &frameno);
+ received_ts, false, &frameno, &mixer_latency_histogram);
// Release back any input frames we needed to render this frame.
frame.input_frames.clear();
uint8_t *data = reinterpret_cast<uint8_t *>(surf->y_ptr);
if (global_flags.uncompressed_video_to_http) {
add_packet_for_uncompressed_frame(pts, duration, data);
- } else if (global_flags.x264_video_to_http) {
- x264_encoder->add_frame(pts, duration, data, received_ts);
+ } else if (global_flags.x264_video_to_http || global_flags.x264_video_to_disk) {
+ x264_encoder->add_frame(pts, duration, frame.ycbcr_coefficients, data, received_ts);
}
}
void QuickSyncEncoderImpl::encode_frame(QuickSyncEncoderImpl::PendingFrame frame, int encoding_frame_num, int display_frame_num, int gop_start_display_frame_num,
- int frame_type, int64_t pts, int64_t dts, int64_t duration)
+ int frame_type, int64_t pts, int64_t dts, int64_t duration, YCbCrLumaCoefficients ycbcr_coefficients)
{
const ReceivedTimestamps received_ts = find_received_timestamp(frame.input_frames);
// FIXME: If the mux wants global headers, we should not put the
// SPS/PPS before each IDR frame, but rather put it into the
// codec extradata (formatted differently?).
+ //
+ // NOTE: If we change ycbcr_coefficients, it will not take effect
+ // before the next IDR frame. This is acceptable, as it should only
+ // happen on a mode change, which is rare.
render_sequence();
render_picture(surf, frame_type, display_frame_num, gop_start_display_frame_num);
if (h264_packedheader) {
- render_packedsequence();
+ render_packedsequence(ycbcr_coefficients);
render_packedpicture();
}
} else {
tmp.pts = pts;
tmp.dts = dts;
tmp.duration = duration;
+ tmp.ycbcr_coefficients = ycbcr_coefficients;
tmp.received_ts = received_ts;
tmp.ref_display_frame_numbers = move(ref_display_frame_numbers);
storage_task_enqueue(move(tmp));
}
// Proxy object.
-QuickSyncEncoder::QuickSyncEncoder(const std::string &filename, movit::ResourcePool *resource_pool, QSurface *surface, const string &va_display, int width, int height, AVOutputFormat *oformat, X264Encoder *x264_encoder, DiskSpaceEstimator *disk_space_estimator)
+QuickSyncEncoder::QuickSyncEncoder(const std::string &filename, ResourcePool *resource_pool, QSurface *surface, const string &va_display, int width, int height, AVOutputFormat *oformat, X264Encoder *x264_encoder, DiskSpaceEstimator *disk_space_estimator)
: impl(new QuickSyncEncoderImpl(filename, resource_pool, surface, va_display, width, height, oformat, x264_encoder, disk_space_estimator)) {}
// Must be defined here because unique_ptr<> destructor needs to know the impl.
impl->add_audio(pts, audio);
}
-bool QuickSyncEncoder::begin_frame(int64_t pts, int64_t duration, const vector<RefCountedFrame> &input_frames, GLuint *y_tex, GLuint *cbcr_tex)
+bool QuickSyncEncoder::is_zerocopy() const
+{
+ return impl->is_zerocopy();
+}
+
+bool QuickSyncEncoder::begin_frame(int64_t pts, int64_t duration, YCbCrLumaCoefficients ycbcr_coefficients, const vector<RefCountedFrame> &input_frames, GLuint *y_tex, GLuint *cbcr_tex)
{
- return impl->begin_frame(pts, duration, input_frames, y_tex, cbcr_tex);
+ return impl->begin_frame(pts, duration, ycbcr_coefficients, input_frames, y_tex, cbcr_tex);
}
RefCountedGLsync QuickSyncEncoder::end_frame()
impl->shutdown();
}
+void QuickSyncEncoder::close_file()
+{
+ impl->shutdown();
+}
+
void QuickSyncEncoder::set_stream_mux(Mux *mux)
{
impl->set_stream_mux(mux);