//#include "sysdeps.h"
#include "h264encode.h"
+#include <movit/util.h>
#include <EGL/eglplatform.h>
#include <X11/X.h>
#include <X11/Xlib.h>
exit(1); \
}
+#define BUFFER_OFFSET(i) ((char *)NULL + (i))
+
//#include "loadsurface.h"
#define NAL_REF_IDC_NONE 0
#define BITSTREAM_ALLOCATE_STEPPING 4096
#define SURFACE_NUM 16 /* 16 surfaces for source YUV */
+#define MAX_NUM_REF1 16 // Seemingly a hardware-fixed value, not related to SURFACE_NUM
+#define MAX_NUM_REF2 32 // Seemingly a hardware-fixed value, not related to SURFACE_NUM
static constexpr unsigned int MaxFrameNum = (2<<16);
static constexpr unsigned int MaxPicOrderCntLsb = (2<<8);
EGLImage y_egl_image, cbcr_egl_image;
// Only if use_zerocopy == false.
- RefCountedGLsync readback_done_fence;
GLuint pbo;
uint8_t *y_ptr, *cbcr_ptr;
+ size_t y_offset, cbcr_offset;
};
GLSurface gl_surfaces[SURFACE_NUM];
VAEncPictureParameterBufferH264 pic_param;
VAEncSliceParameterBufferH264 slice_param;
VAPictureH264 CurrentCurrPic;
- VAPictureH264 ReferenceFrames[16], RefPicList0_P[32], RefPicList0_B[32], RefPicList1_B[32];
+ VAPictureH264 ReferenceFrames[MAX_NUM_REF1], RefPicList0_P[MAX_NUM_REF2], RefPicList0_B[MAX_NUM_REF2], RefPicList1_B[MAX_NUM_REF2];
// Static quality settings.
static constexpr unsigned int frame_bitrate = 15000000 / 60; // Doesn't really matter; only initial_qp does.
if (support_encode == 0) {
printf("Can't find VAEntrypointEncSlice for H264 profiles. If you are using a non-Intel GPU\n");
- printf("but have one in your system, try launching Nageru with --va-display /dev/dri/card0\n");
+ printf("but have one in your system, try launching Nageru with --va-display /dev/dri/renderD128\n");
printf("to use VA-API against DRM instead of X11.\n");
exit(1);
} else {
// buffers, due to potentially differing pitch.
glGenBuffers(1, &gl_surfaces[i].pbo);
glBindBuffer(GL_PIXEL_PACK_BUFFER, gl_surfaces[i].pbo);
- glBufferStorage(GL_PIXEL_PACK_BUFFER, frame_width * frame_height * 2, nullptr, GL_MAP_READ_BIT | GL_MAP_PERSISTENT_BIT);
+ glBufferStorage(GL_PIXEL_PACK_BUFFER, frame_width * frame_height * 2, nullptr, GL_MAP_READ_BIT | GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT);
uint8_t *ptr = (uint8_t *)glMapBufferRange(GL_PIXEL_PACK_BUFFER, 0, frame_width * frame_height * 2, GL_MAP_READ_BIT | GL_MAP_PERSISTENT_BIT);
- gl_surfaces[i].y_ptr = ptr;
- gl_surfaces[i].cbcr_ptr = ptr + frame_width * frame_height;
+ gl_surfaces[i].y_offset = 0;
+ gl_surfaces[i].cbcr_offset = frame_width * frame_height;
+ gl_surfaces[i].y_ptr = ptr + gl_surfaces[i].y_offset;
+ gl_surfaces[i].cbcr_ptr = ptr + gl_surfaces[i].cbcr_offset;
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
}
}
CurrentCurrPic = pic_param.CurrPic;
memcpy(pic_param.ReferenceFrames, ReferenceFrames, numShortTerm*sizeof(VAPictureH264));
- for (i = numShortTerm; i < SURFACE_NUM; i++) {
+ for (i = numShortTerm; i < MAX_NUM_REF1; i++) {
pic_param.ReferenceFrames[i].picture_id = VA_INVALID_SURFACE;
pic_param.ReferenceFrames[i].flags = VA_PICTURE_H264_INVALID;
}
int refpiclist0_max = h264_maxref & 0xffff;
memcpy(slice_param.RefPicList0, RefPicList0_P, refpiclist0_max*sizeof(VAPictureH264));
- for (i = refpiclist0_max; i < 32; i++) {
+ for (i = refpiclist0_max; i < MAX_NUM_REF2; i++) {
slice_param.RefPicList0[i].picture_id = VA_INVALID_SURFACE;
slice_param.RefPicList0[i].flags = VA_PICTURE_H264_INVALID;
}
int refpiclist1_max = (h264_maxref >> 16) & 0xffff;
memcpy(slice_param.RefPicList0, RefPicList0_B, refpiclist0_max*sizeof(VAPictureH264));
- for (i = refpiclist0_max; i < 32; i++) {
+ for (i = refpiclist0_max; i < MAX_NUM_REF2; i++) {
slice_param.RefPicList0[i].picture_id = VA_INVALID_SURFACE;
slice_param.RefPicList0[i].flags = VA_PICTURE_H264_INVALID;
}
memcpy(slice_param.RefPicList1, RefPicList1_B, refpiclist1_max*sizeof(VAPictureH264));
- for (i = refpiclist1_max; i < 32; i++) {
+ for (i = refpiclist1_max; i < MAX_NUM_REF2; i++) {
slice_param.RefPicList1[i].picture_id = VA_INVALID_SURFACE;
slice_param.RefPicList1[i].flags = VA_PICTURE_H264_INVALID;
}
pending_audio_frames.erase(it);
}
- AVFrame *frame = avcodec_alloc_frame();
+ AVFrame *frame = av_frame_alloc();
frame->nb_samples = audio.size() / 2;
frame->format = AV_SAMPLE_FMT_S32;
frame->channel_layout = AV_CH_LAYOUT_STEREO;
httpd->add_packet(pkt, audio_pts + global_delay, audio_pts + global_delay);
}
// TODO: Delayed frames.
- avcodec_free_frame(&frame);
+ av_frame_unref(frame);
av_free_packet(&pkt);
if (audio_pts == task.pts) break;
}
{
// Wait until this frame slot is done encoding.
unique_lock<mutex> lock(storage_task_queue_mutex);
+ if (srcsurface_status[current_storage_frame % SURFACE_NUM] != SRC_SURFACE_FREE) {
+ fprintf(stderr, "Warning: Slot %d (for frame %d) is still encoding, rendering has to wait for H.264 encoder\n",
+ current_storage_frame % SURFACE_NUM, current_storage_frame);
+ }
storage_task_queue_changed.wait(lock, [this]{ return storage_thread_should_quit || (srcsurface_status[current_storage_frame % SURFACE_NUM] == SRC_SURFACE_FREE); });
if (storage_thread_should_quit) return false;
}
if (!use_zerocopy) {
GLSurface *surf = &gl_surfaces[current_storage_frame % SURFACE_NUM];
+
glPixelStorei(GL_PACK_ROW_LENGTH, 0);
+ check_error();
+
+ glBindBuffer(GL_PIXEL_PACK_BUFFER, surf->pbo);
+ check_error();
glBindTexture(GL_TEXTURE_2D, surf->y_tex);
- glGetTexImage(GL_TEXTURE_2D, 0, GL_RED, GL_UNSIGNED_BYTE, surf->y_ptr);
+ check_error();
+ glGetTexImage(GL_TEXTURE_2D, 0, GL_RED, GL_UNSIGNED_BYTE, BUFFER_OFFSET(surf->y_offset));
+ check_error();
glBindTexture(GL_TEXTURE_2D, surf->cbcr_tex);
- glGetTexImage(GL_TEXTURE_2D, 0, GL_RG, GL_UNSIGNED_BYTE, surf->cbcr_ptr);
+ check_error();
+ glGetTexImage(GL_TEXTURE_2D, 0, GL_RG, GL_UNSIGNED_BYTE, BUFFER_OFFSET(surf->cbcr_offset));
+ check_error();
glBindTexture(GL_TEXTURE_2D, 0);
+ check_error();
+ glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
+ check_error();
- glMemoryBarrier(GL_CLIENT_MAPPED_BUFFER_BARRIER_BIT);
+ glMemoryBarrier(GL_TEXTURE_UPDATE_BARRIER_BIT | GL_CLIENT_MAPPED_BUFFER_BARRIER_BIT);
+ check_error();
fence = RefCountedGLsync(GL_SYNC_GPU_COMMANDS_COMPLETE, /*flags=*/0);
+ check_error();
}
{
int frame_type, int64_t pts, int64_t dts)
{
// Wait for the GPU to be done with the frame.
- glClientWaitSync(frame.fence.get(), 0, 0);
+ GLenum sync_status;
+ do {
+ sync_status = glClientWaitSync(frame.fence.get(), 0, 1000000000);
+ check_error();
+ } while (sync_status == GL_TIMEOUT_EXPIRED);
+ assert(sync_status != GL_WAIT_FAILED);
// Release back any input frames we needed to render this frame.
frame.input_frames.clear();
unsigned char *surface_p = nullptr;
vaMapBuffer(va_dpy, surf->surface_image.buf, (void **)&surface_p);
- unsigned char *y_ptr = (unsigned char *)surface_p;
- memcpy_with_pitch(y_ptr, surf->y_ptr, frame_width, surf->surface_image.pitches[0], frame_height);
+ unsigned char *va_y_ptr = (unsigned char *)surface_p + surf->surface_image.offsets[0];
+ memcpy_with_pitch(va_y_ptr, surf->y_ptr, frame_width, surf->surface_image.pitches[0], frame_height);
- unsigned char *cbcr_ptr = (unsigned char *)surface_p + surf->surface_image.offsets[1];
- memcpy_with_pitch(cbcr_ptr, surf->cbcr_ptr, (frame_width / 2) * sizeof(uint16_t), surf->surface_image.pitches[1], frame_height / 2);
+ unsigned char *va_cbcr_ptr = (unsigned char *)surface_p + surf->surface_image.offsets[1];
+ memcpy_with_pitch(va_cbcr_ptr, surf->cbcr_ptr, (frame_width / 2) * sizeof(uint16_t), surf->surface_image.pitches[1], frame_height / 2);
va_status = vaUnmapBuffer(va_dpy, surf->surface_image.buf);
CHECK_VASTATUS(va_status, "vaUnmapBuffer");