]> git.sesse.net Git - nageru/blobdiff - h264encode.cpp
Fix an issue in the H264 encoder where a surface could be rendered to while it was...
[nageru] / h264encode.cpp
index ae6d3e18f40054c0ceb4fd6da56c2ebf36b671d2..6a402f45765e4932f1c0dcaf57208554d19f6969 100644 (file)
@@ -1,6 +1,7 @@
 //#include "sysdeps.h"
 #include "h264encode.h"
 
+#include <movit/util.h>
 #include <EGL/eglplatform.h>
 #include <X11/X.h>
 #include <X11/Xlib.h>
@@ -83,6 +84,8 @@ class QSurface;
    
 #define BITSTREAM_ALLOCATE_STEPPING     4096
 #define SURFACE_NUM 16 /* 16 surfaces for source YUV */
+#define MAX_NUM_REF1 16 // Seemingly a hardware-fixed value, not related to SURFACE_NUM
+#define MAX_NUM_REF2 32 // Seemingly a hardware-fixed value, not related to SURFACE_NUM
 
 static constexpr unsigned int MaxFrameNum = (2<<16);
 static constexpr unsigned int MaxPicOrderCntLsb = (2<<8);
@@ -204,7 +207,6 @@ private:
                EGLImage y_egl_image, cbcr_egl_image;
 
                // Only if use_zerocopy == false.
-               RefCountedGLsync readback_done_fence;
                GLuint pbo;
                uint8_t *y_ptr, *cbcr_ptr;
                size_t y_offset, cbcr_offset;
@@ -217,7 +219,7 @@ private:
        VAEncPictureParameterBufferH264 pic_param;
        VAEncSliceParameterBufferH264 slice_param;
        VAPictureH264 CurrentCurrPic;
-       VAPictureH264 ReferenceFrames[16], RefPicList0_P[32], RefPicList0_B[32], RefPicList1_B[32];
+       VAPictureH264 ReferenceFrames[MAX_NUM_REF1], RefPicList0_P[MAX_NUM_REF2], RefPicList0_B[MAX_NUM_REF2], RefPicList1_B[MAX_NUM_REF2];
 
        // Static quality settings.
        static constexpr unsigned int frame_bitrate = 15000000 / 60;  // Doesn't really matter; only initial_qp does.
@@ -924,7 +926,7 @@ int H264EncoderImpl::init_va(const string &va_display)
     
     if (support_encode == 0) {
         printf("Can't find VAEntrypointEncSlice for H264 profiles. If you are using a non-Intel GPU\n");
-        printf("but have one in your system, try launching Nageru with --va-display /dev/dri/card0\n");
+        printf("but have one in your system, try launching Nageru with --va-display /dev/dri/renderD128\n");
         printf("to use VA-API against DRM instead of X11.\n");
         exit(1);
     } else {
@@ -1109,10 +1111,12 @@ int H264EncoderImpl::setup_encode()
             // buffers, due to potentially differing pitch.
             glGenBuffers(1, &gl_surfaces[i].pbo);
             glBindBuffer(GL_PIXEL_PACK_BUFFER, gl_surfaces[i].pbo);
-            glBufferStorage(GL_PIXEL_PACK_BUFFER, frame_width * frame_height * 2, nullptr, GL_MAP_READ_BIT | GL_MAP_PERSISTENT_BIT);
+            glBufferStorage(GL_PIXEL_PACK_BUFFER, frame_width * frame_height * 2, nullptr, GL_MAP_READ_BIT | GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT);
             uint8_t *ptr = (uint8_t *)glMapBufferRange(GL_PIXEL_PACK_BUFFER, 0, frame_width * frame_height * 2, GL_MAP_READ_BIT | GL_MAP_PERSISTENT_BIT);
-            gl_surfaces[i].y_ptr = ptr;
-            gl_surfaces[i].cbcr_ptr = ptr + frame_width * frame_height;
+            gl_surfaces[i].y_offset = 0;
+            gl_surfaces[i].cbcr_offset = frame_width * frame_height;
+            gl_surfaces[i].y_ptr = ptr + gl_surfaces[i].y_offset;
+            gl_surfaces[i].cbcr_ptr = ptr + gl_surfaces[i].cbcr_offset;
             glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
         }
     }
@@ -1296,7 +1300,7 @@ int H264EncoderImpl::render_picture(int frame_type, int display_frame_num, int g
     CurrentCurrPic = pic_param.CurrPic;
 
     memcpy(pic_param.ReferenceFrames, ReferenceFrames, numShortTerm*sizeof(VAPictureH264));
-    for (i = numShortTerm; i < SURFACE_NUM; i++) {
+    for (i = numShortTerm; i < MAX_NUM_REF1; i++) {
         pic_param.ReferenceFrames[i].picture_id = VA_INVALID_SURFACE;
         pic_param.ReferenceFrames[i].flags = VA_PICTURE_H264_INVALID;
     }
@@ -1446,7 +1450,7 @@ int H264EncoderImpl::render_slice(int encoding_frame_num, int display_frame_num,
         int refpiclist0_max = h264_maxref & 0xffff;
         memcpy(slice_param.RefPicList0, RefPicList0_P, refpiclist0_max*sizeof(VAPictureH264));
 
-        for (i = refpiclist0_max; i < 32; i++) {
+        for (i = refpiclist0_max; i < MAX_NUM_REF2; i++) {
             slice_param.RefPicList0[i].picture_id = VA_INVALID_SURFACE;
             slice_param.RefPicList0[i].flags = VA_PICTURE_H264_INVALID;
         }
@@ -1455,13 +1459,13 @@ int H264EncoderImpl::render_slice(int encoding_frame_num, int display_frame_num,
         int refpiclist1_max = (h264_maxref >> 16) & 0xffff;
 
         memcpy(slice_param.RefPicList0, RefPicList0_B, refpiclist0_max*sizeof(VAPictureH264));
-        for (i = refpiclist0_max; i < 32; i++) {
+        for (i = refpiclist0_max; i < MAX_NUM_REF2; i++) {
             slice_param.RefPicList0[i].picture_id = VA_INVALID_SURFACE;
             slice_param.RefPicList0[i].flags = VA_PICTURE_H264_INVALID;
         }
 
         memcpy(slice_param.RefPicList1, RefPicList1_B, refpiclist1_max*sizeof(VAPictureH264));
-        for (i = refpiclist1_max; i < 32; i++) {
+        for (i = refpiclist1_max; i < MAX_NUM_REF2; i++) {
             slice_param.RefPicList1[i].picture_id = VA_INVALID_SURFACE;
             slice_param.RefPicList1[i].flags = VA_PICTURE_H264_INVALID;
         }
@@ -1536,7 +1540,7 @@ void H264EncoderImpl::save_codeddata(storage_task task)
              pending_audio_frames.erase(it); 
         }
 
-        AVFrame *frame = avcodec_alloc_frame();
+        AVFrame *frame = av_frame_alloc();
         frame->nb_samples = audio.size() / 2;
         frame->format = AV_SAMPLE_FMT_S32;
         frame->channel_layout = AV_CH_LAYOUT_STEREO;
@@ -1568,7 +1572,7 @@ void H264EncoderImpl::save_codeddata(storage_task task)
             httpd->add_packet(pkt, audio_pts + global_delay, audio_pts + global_delay);
         }
         // TODO: Delayed frames.
-        avcodec_free_frame(&frame);
+        av_frame_unref(frame);
         av_free_packet(&pkt);
         if (audio_pts == task.pts) break;
     }
@@ -1599,7 +1603,6 @@ void H264EncoderImpl::storage_task_enqueue(storage_task task)
 {
        unique_lock<mutex> lock(storage_task_queue_mutex);
        storage_task_queue.push(move(task));
-       srcsurface_status[task.display_order % SURFACE_NUM] = SRC_SURFACE_IN_ENCODING;
        storage_task_queue_changed.notify_all();
 }
 
@@ -1723,7 +1726,12 @@ bool H264EncoderImpl::begin_frame(GLuint *y_tex, GLuint *cbcr_tex)
        {
                // Wait until this frame slot is done encoding.
                unique_lock<mutex> lock(storage_task_queue_mutex);
+               if (srcsurface_status[current_storage_frame % SURFACE_NUM] != SRC_SURFACE_FREE) {
+                       fprintf(stderr, "Warning: Slot %d (for frame %d) is still encoding, rendering has to wait for H.264 encoder\n",
+                               current_storage_frame % SURFACE_NUM, current_storage_frame);
+               }
                storage_task_queue_changed.wait(lock, [this]{ return storage_thread_should_quit || (srcsurface_status[current_storage_frame % SURFACE_NUM] == SRC_SURFACE_FREE); });
+               srcsurface_status[current_storage_frame % SURFACE_NUM] = SRC_SURFACE_IN_ENCODING;
                if (storage_thread_should_quit) return false;
        }
 
@@ -1799,21 +1807,32 @@ void H264EncoderImpl::end_frame(RefCountedGLsync fence, int64_t pts, const vecto
 
        if (!use_zerocopy) {
                GLSurface *surf = &gl_surfaces[current_storage_frame % SURFACE_NUM];
+
                glPixelStorei(GL_PACK_ROW_LENGTH, 0);
+               check_error();
 
                glBindBuffer(GL_PIXEL_PACK_BUFFER, surf->pbo);
+               check_error();
 
                glBindTexture(GL_TEXTURE_2D, surf->y_tex);
+               check_error();
                glGetTexImage(GL_TEXTURE_2D, 0, GL_RED, GL_UNSIGNED_BYTE, BUFFER_OFFSET(surf->y_offset));
+               check_error();
 
                glBindTexture(GL_TEXTURE_2D, surf->cbcr_tex);
+               check_error();
                glGetTexImage(GL_TEXTURE_2D, 0, GL_RG, GL_UNSIGNED_BYTE, BUFFER_OFFSET(surf->cbcr_offset));
+               check_error();
 
                glBindTexture(GL_TEXTURE_2D, 0);
+               check_error();
                glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
+               check_error();
 
-               glMemoryBarrier(GL_CLIENT_MAPPED_BUFFER_BARRIER_BIT);
+               glMemoryBarrier(GL_TEXTURE_UPDATE_BARRIER_BIT | GL_CLIENT_MAPPED_BUFFER_BARRIER_BIT);
+               check_error();
                fence = RefCountedGLsync(GL_SYNC_GPU_COMMANDS_COMPLETE, /*flags=*/0);
+               check_error();
        }
 
        {
@@ -1936,7 +1955,12 @@ void H264EncoderImpl::encode_frame(H264EncoderImpl::PendingFrame frame, int enco
                                    int frame_type, int64_t pts, int64_t dts)
 {
        // Wait for the GPU to be done with the frame.
-       glClientWaitSync(frame.fence.get(), 0, 0);
+       GLenum sync_status;
+       do {
+               sync_status = glClientWaitSync(frame.fence.get(), 0, 1000000000);
+               check_error();
+       } while (sync_status == GL_TIMEOUT_EXPIRED);
+       assert(sync_status != GL_WAIT_FAILED);
 
        // Release back any input frames we needed to render this frame.
        frame.input_frames.clear();
@@ -1953,13 +1977,11 @@ void H264EncoderImpl::encode_frame(H264EncoderImpl::PendingFrame frame, int enco
                unsigned char *surface_p = nullptr;
                vaMapBuffer(va_dpy, surf->surface_image.buf, (void **)&surface_p);
 
-               unsigned char *y_ptr = (unsigned char *)surface_p;
-               memcpy_with_pitch(y_ptr, surf->y_ptr, frame_width, surf->surface_image.pitches[0], frame_height);
-               surf->y_offset = 0;
+               unsigned char *va_y_ptr = (unsigned char *)surface_p + surf->surface_image.offsets[0];
+               memcpy_with_pitch(va_y_ptr, surf->y_ptr, frame_width, surf->surface_image.pitches[0], frame_height);
 
-               unsigned char *cbcr_ptr = (unsigned char *)surface_p + frame_width * frame_height;
-               surf->cbcr_offset = frame_width * frame_height;
-               memcpy_with_pitch(cbcr_ptr, surf->cbcr_ptr, (frame_width / 2) * sizeof(uint16_t), surf->surface_image.pitches[1], frame_height / 2);
+               unsigned char *va_cbcr_ptr = (unsigned char *)surface_p + surf->surface_image.offsets[1];
+               memcpy_with_pitch(va_cbcr_ptr, surf->cbcr_ptr, (frame_width / 2) * sizeof(uint16_t), surf->surface_image.pitches[1], frame_height / 2);
 
                va_status = vaUnmapBuffer(va_dpy, surf->surface_image.buf);
                CHECK_VASTATUS(va_status, "vaUnmapBuffer");