]> git.sesse.net Git - nageru/blobdiff - h264encode.cpp
Release Nageru 1.0.0, with some documentation updates.
[nageru] / h264encode.cpp
index c233f39b738101140589182d9e738dedb467f861..78e34474b80e07a397c41dbaf3a0cf70d1807a3d 100644 (file)
@@ -6,11 +6,14 @@
 #include <X11/Xlib.h>
 #include <assert.h>
 #include <epoxy/egl.h>
+extern "C" {
 #include <libavcodec/avcodec.h>
+#include <libavformat/avformat.h>
 #include <libavutil/channel_layout.h>
 #include <libavutil/frame.h>
 #include <libavutil/rational.h>
 #include <libavutil/samplefmt.h>
+}
 #include <libdrm/drm_fourcc.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <va/va_drmcommon.h>
 #include <va/va_enc_h264.h>
 #include <va/va_x11.h>
+#include <algorithm>
 #include <condition_variable>
 #include <cstdint>
+#include <map>
 #include <memory>
 #include <mutex>
 #include <queue>
@@ -73,57 +78,13 @@ class QSurface;
 #define PROFILE_IDC_HIGH        100
    
 #define BITSTREAM_ALLOCATE_STEPPING     4096
-
 #define SURFACE_NUM 16 /* 16 surfaces for source YUV */
-static  VADisplay va_dpy;
-static  VAProfile h264_profile = (VAProfile)~0;
-static  VAConfigAttrib config_attrib[VAConfigAttribTypeMax];
-static  int config_attrib_num = 0, enc_packed_header_idx;
-
-struct GLSurface {
-       VASurfaceID src_surface, ref_surface;
-       VABufferID coded_buf;
-
-       VAImage surface_image;
-       GLuint y_tex, cbcr_tex;
-       EGLImage y_egl_image, cbcr_egl_image;
-};
-GLSurface gl_surfaces[SURFACE_NUM];
-
-static  VAConfigID config_id;
-static  VAContextID context_id;
-static  VAEncSequenceParameterBufferH264 seq_param;
-static  VAEncPictureParameterBufferH264 pic_param;
-static  VAEncSliceParameterBufferH264 slice_param;
-static  VAPictureH264 CurrentCurrPic;
-static  VAPictureH264 ReferenceFrames[16], RefPicList0_P[32], RefPicList0_B[32], RefPicList1_B[32];
-
-static  unsigned int MaxFrameNum = (2<<16);
-static  unsigned int MaxPicOrderCntLsb = (2<<8);
-static  unsigned int Log2MaxFrameNum = 16;
-static  unsigned int Log2MaxPicOrderCntLsb = 8;
-
-static  unsigned int num_ref_frames = 2;
-static  unsigned int numShortTerm = 0;
-static  int constraint_set_flag = 0;
-static  int h264_packedheader = 0; /* support pack header? */
-static  int h264_maxref = (1<<16|1);
-static  int h264_entropy_mode = 1; /* cabac */
-
-static  int frame_width = 176;
-static  int frame_height = 144;
-static  int frame_width_mbaligned;
-static  int frame_height_mbaligned;
-static  unsigned int frame_bitrate = 0;
-static  double frame_size = 0;
-static  int initial_qp = 15;
-//static  int initial_qp = 28;
-static  int minimal_qp = 0;
-static  int intra_period = 30;
-static  int intra_idr_period = MAX_FPS;  // About a second; more at lower frame rates. Not ideal.
-static  int ip_period = 3;
-static  int rc_mode = -1;
-static  int rc_default_modes[] = {
+
+static constexpr unsigned int MaxFrameNum = (2<<16);
+static constexpr unsigned int MaxPicOrderCntLsb = (2<<8);
+static constexpr unsigned int Log2MaxFrameNum = 16;
+static constexpr unsigned int Log2MaxPicOrderCntLsb = 8;
+static constexpr int rc_default_modes[] = {  // Priority list of modes.
     VA_RC_VBR,
     VA_RC_CQP,
     VA_RC_VBR_CONSTRAINED,
@@ -131,10 +92,6 @@ static  int rc_default_modes[] = {
     VA_RC_VCM,
     VA_RC_NONE,
 };
-static  unsigned int current_frame_num = 0;
-
-static  int misc_priv_type = 0;
-static  int misc_priv_value = 0;
 
 /* thread to save coded data */
 #define SRC_SURFACE_FREE        0
@@ -149,6 +106,132 @@ typedef struct __bitstream bitstream;
 
 using namespace std;
 
+class H264EncoderImpl {
+public:
+       H264EncoderImpl(QSurface *surface, int width, int height, HTTPD *httpd);
+       ~H264EncoderImpl();
+       void add_audio(int64_t pts, vector<float> audio);
+       bool begin_frame(GLuint *y_tex, GLuint *cbcr_tex);
+       void end_frame(RefCountedGLsync fence, int64_t pts, const vector<RefCountedFrame> &input_frames);
+       void shutdown();
+
+private:
+       struct storage_task {
+               unsigned long long display_order;
+               int frame_type;
+               vector<float> audio;
+               int64_t pts, dts;
+       };
+       struct PendingFrame {
+               RefCountedGLsync fence;
+               vector<RefCountedFrame> input_frames;
+               int64_t pts;
+       };
+
+       void encode_thread_func();
+       void encode_remaining_frames_as_p(int encoding_frame_num, int gop_start_display_frame_num, int64_t last_dts);
+       void encode_frame(PendingFrame frame, int encoding_frame_num, int display_frame_num, int gop_start_display_frame_num,
+                         int frame_type, int64_t pts, int64_t dts);
+       void storage_task_thread();
+       void storage_task_enqueue(storage_task task);
+       void save_codeddata(storage_task task);
+       int render_packedsequence();
+       int render_packedpicture();
+       void render_packedslice();
+       int render_sequence();
+       int render_picture(int frame_type, int display_frame_num, int gop_start_display_frame_num);
+       void sps_rbsp(bitstream *bs);
+       void pps_rbsp(bitstream *bs);
+       int build_packed_pic_buffer(unsigned char **header_buffer);
+       int render_slice(int encoding_frame_num, int display_frame_num, int gop_start_display_frame_num, int frame_type);
+       void slice_header(bitstream *bs);
+       int build_packed_seq_buffer(unsigned char **header_buffer);
+       int build_packed_slice_buffer(unsigned char **header_buffer);
+       int init_va();
+       int deinit_va();
+       VADisplay va_open_display(void);
+       void va_close_display(VADisplay va_dpy);
+       int setup_encode();
+       int release_encode();
+       void update_ReferenceFrames(int frame_type);
+       int update_RefPicList(int frame_type);
+
+       bool is_shutdown = false;
+
+       thread encode_thread, storage_thread;
+
+       mutex storage_task_queue_mutex;
+       condition_variable storage_task_queue_changed;
+       int srcsurface_status[SURFACE_NUM];  // protected by storage_task_queue_mutex
+       queue<storage_task> storage_task_queue;  // protected by storage_task_queue_mutex
+       bool storage_thread_should_quit = false;  // protected by storage_task_queue_mutex
+
+       mutex frame_queue_mutex;
+       condition_variable frame_queue_nonempty;
+       bool encode_thread_should_quit = false;  // under frame_queue_mutex
+
+       int current_storage_frame;
+
+       map<int, PendingFrame> pending_video_frames;  // under frame_queue_mutex
+       map<int64_t, vector<float>> pending_audio_frames;  // under frame_queue_mutex
+       QSurface *surface;
+
+       AVCodecContext *context_audio;
+       HTTPD *httpd;
+
+       Display *x11_display;
+       Window x11_window;
+
+       // Encoder parameters
+       VADisplay va_dpy;
+       VAProfile h264_profile = (VAProfile)~0;
+       VAConfigAttrib config_attrib[VAConfigAttribTypeMax];
+       int config_attrib_num = 0, enc_packed_header_idx;
+
+       struct GLSurface {
+               VASurfaceID src_surface, ref_surface;
+               VABufferID coded_buf;
+
+               VAImage surface_image;
+               GLuint y_tex, cbcr_tex;
+               EGLImage y_egl_image, cbcr_egl_image;
+       };
+       GLSurface gl_surfaces[SURFACE_NUM];
+
+       VAConfigID config_id;
+       VAContextID context_id;
+       VAEncSequenceParameterBufferH264 seq_param;
+       VAEncPictureParameterBufferH264 pic_param;
+       VAEncSliceParameterBufferH264 slice_param;
+       VAPictureH264 CurrentCurrPic;
+       VAPictureH264 ReferenceFrames[16], RefPicList0_P[32], RefPicList0_B[32], RefPicList1_B[32];
+
+       // Static quality settings.
+       static constexpr unsigned int frame_bitrate = 15000000 / 60;  // Doesn't really matter; only initial_qp does.
+       static constexpr unsigned int num_ref_frames = 2;
+       static constexpr int initial_qp = 15;
+       static constexpr int minimal_qp = 0;
+       static constexpr int intra_period = 30;
+       static constexpr int intra_idr_period = MAX_FPS;  // About a second; more at lower frame rates. Not ideal.
+
+       // Quality settings that are meant to be static, but might be overridden
+       // by the profile.
+       int constraint_set_flag = 0;
+       int h264_packedheader = 0; /* support pack header? */
+       int h264_maxref = (1<<16|1);
+       int h264_entropy_mode = 1; /* cabac */
+       int ip_period = 3;
+
+       int rc_mode = -1;
+       unsigned int current_frame_num = 0;
+       unsigned int numShortTerm = 0;
+
+       int frame_width;
+       int frame_height;
+       int frame_width_mbaligned;
+       int frame_height_mbaligned;
+};
+
 // Supposedly vaRenderPicture() is supposed to destroy the buffer implicitly,
 // but if we don't delete it here, we get leaks. The GStreamer implementation
 // does the same.
@@ -158,7 +241,7 @@ static void render_picture_and_delete(VADisplay dpy, VAContextID context, VABuff
     CHECK_VASTATUS(va_status, "vaRenderPicture");
 
     for (int i = 0; i < num_buffers; ++i) {
-        va_status = vaDestroyBuffer(va_dpy, buffers[i]);
+        va_status = vaDestroyBuffer(dpy, buffers[i]);
         CHECK_VASTATUS(va_status, "vaDestroyBuffer");
     }
 }
@@ -210,7 +293,11 @@ bitstream_put_ui(bitstream *bs, unsigned int val, int size_in_bits)
         bs->buffer[pos] = (bs->buffer[pos] << size_in_bits | val);
     } else {
         size_in_bits -= bit_left;
-        bs->buffer[pos] = (bs->buffer[pos] << bit_left) | (val >> size_in_bits);
+        if (bit_left >= 32) {
+            bs->buffer[pos] = (val >> size_in_bits);
+        } else {
+            bs->buffer[pos] = (bs->buffer[pos] << bit_left) | (val >> size_in_bits);
+        }
         bs->buffer[pos] = va_swap32(bs->buffer[pos]);
 
         if (pos + 1 == bs->max_size_in_dword) {
@@ -289,7 +376,7 @@ static void nal_header(bitstream *bs, int nal_ref_idc, int nal_unit_type)
     bitstream_put_ui(bs, nal_unit_type, 5);
 }
 
-static void sps_rbsp(bitstream *bs)
+void H264EncoderImpl::sps_rbsp(bitstream *bs)
 {
     int profile_idc = PROFILE_IDC_BASELINE;
 
@@ -397,7 +484,7 @@ static void sps_rbsp(bitstream *bs)
 }
 
 
-static void pps_rbsp(bitstream *bs)
+void H264EncoderImpl::pps_rbsp(bitstream *bs)
 {
     bitstream_put_ue(bs, pic_param.pic_parameter_set_id);      /* pic_parameter_set_id */
     bitstream_put_ue(bs, pic_param.seq_parameter_set_id);      /* seq_parameter_set_id */
@@ -430,7 +517,7 @@ static void pps_rbsp(bitstream *bs)
     rbsp_trailing_bits(bs);
 }
 
-static void slice_header(bitstream *bs)
+void H264EncoderImpl::slice_header(bitstream *bs)
 {
     int first_mb_in_slice = slice_param.macroblock_address;
 
@@ -525,8 +612,7 @@ static void slice_header(bitstream *bs)
     }
 }
 
-static int
-build_packed_pic_buffer(unsigned char **header_buffer)
+int H264EncoderImpl::build_packed_pic_buffer(unsigned char **header_buffer)
 {
     bitstream bs;
 
@@ -540,8 +626,8 @@ build_packed_pic_buffer(unsigned char **header_buffer)
     return bs.bit_offset;
 }
 
-static int
-build_packed_seq_buffer(unsigned char **header_buffer)
+int
+H264EncoderImpl::build_packed_seq_buffer(unsigned char **header_buffer)
 {
     bitstream bs;
 
@@ -555,7 +641,7 @@ build_packed_seq_buffer(unsigned char **header_buffer)
     return bs.bit_offset;
 }
 
-static int build_packed_slice_buffer(unsigned char **header_buffer)
+int H264EncoderImpl::build_packed_slice_buffer(unsigned char **header_buffer)
 {
     bitstream bs;
     int is_idr = !!pic_param.pic_fields.bits.idr_pic_flag;
@@ -725,7 +811,7 @@ void encoding2display_order(
 }
 
 
-static const char *rc_to_string(int rcmode)
+static const char *rc_to_string(int rc_mode)
 {
     switch (rc_mode) {
     case VA_RC_NONE:
@@ -745,160 +831,7 @@ static const char *rc_to_string(int rcmode)
     }
 }
 
-#if 0
-static int process_cmdline(int argc, char *argv[])
-{
-    char c;
-    const struct option long_opts[] = {
-        {"help", no_argument, NULL, 0 },
-        {"bitrate", required_argument, NULL, 1 },
-        {"minqp", required_argument, NULL, 2 },
-        {"initialqp", required_argument, NULL, 3 },
-        {"intra_period", required_argument, NULL, 4 },
-        {"idr_period", required_argument, NULL, 5 },
-        {"ip_period", required_argument, NULL, 6 },
-        {"rcmode", required_argument, NULL, 7 },
-        {"srcyuv", required_argument, NULL, 9 },
-        {"recyuv", required_argument, NULL, 10 },
-        {"fourcc", required_argument, NULL, 11 },
-        {"syncmode", no_argument, NULL, 12 },
-        {"enablePSNR", no_argument, NULL, 13 },
-        {"prit", required_argument, NULL, 14 },
-        {"priv", required_argument, NULL, 15 },
-        {"framecount", required_argument, NULL, 16 },
-        {"entropy", required_argument, NULL, 17 },
-        {"profile", required_argument, NULL, 18 },
-        {NULL, no_argument, NULL, 0 }};
-    int long_index;
-    
-    while ((c =getopt_long_only(argc, argv, "w:h:n:f:o:?", long_opts, &long_index)) != EOF) {
-        switch (c) {
-        case 'w':
-            frame_width = atoi(optarg);
-            break;
-        case 'h':
-            frame_height = atoi(optarg);
-            break;
-        case 'n':
-        case 'f':
-            frame_rate = atoi(optarg);
-            break;
-        case 'o':
-            coded_fn = strdup(optarg);
-            break;
-        case 0:
-            print_help();
-            exit(0);
-        case 1:
-            frame_bitrate = atoi(optarg);
-            break;
-        case 2:
-            minimal_qp = atoi(optarg);
-            break;
-        case 3:
-            initial_qp = atoi(optarg);
-            break;
-        case 4:
-            intra_period = atoi(optarg);
-            break;
-        case 5:
-            intra_idr_period = atoi(optarg);
-            break;
-        case 6:
-            ip_period = atoi(optarg);
-            break;
-        case 7:
-            rc_mode = string_to_rc(optarg);
-            if (rc_mode < 0) {
-                print_help();
-                exit(1);
-            }
-            break;
-        case 9:
-            srcyuv_fn = strdup(optarg);
-            break;
-        case 11:
-            srcyuv_fourcc = string_to_fourcc(optarg);
-            if (srcyuv_fourcc <= 0) {
-                print_help();
-                exit(1);
-            }
-            break;
-        case 13:
-            calc_psnr = 1;
-            break;
-        case 14:
-            misc_priv_type = strtol(optarg, NULL, 0);
-            break;
-        case 15:
-            misc_priv_value = strtol(optarg, NULL, 0);
-            break;
-        case 17:
-            h264_entropy_mode = atoi(optarg) ? 1: 0;
-            break;
-        case 18:
-            if (strncmp(optarg, "BP", 2) == 0)
-                h264_profile = VAProfileH264Baseline;
-            else if (strncmp(optarg, "MP", 2) == 0)
-                h264_profile = VAProfileH264Main;
-            else if (strncmp(optarg, "HP", 2) == 0)
-                h264_profile = VAProfileH264High;
-            else
-                h264_profile = (VAProfile)0;
-            break;
-        case ':':
-        case '?':
-            print_help();
-            exit(0);
-        }
-    }
-
-    if (ip_period < 1) {
-       printf(" ip_period must be greater than 0\n");
-        exit(0);
-    }
-    if (intra_period != 1 && intra_period % ip_period != 0) {
-       printf(" intra_period must be a multiplier of ip_period\n");
-        exit(0);        
-    }
-    if (intra_period != 0 && intra_idr_period % intra_period != 0) {
-       printf(" intra_idr_period must be a multiplier of intra_period\n");
-        exit(0);        
-    }
-
-    if (frame_bitrate == 0)
-        frame_bitrate = frame_width * frame_height * 12 * MAX_FPS / 50;
-        
-    if (coded_fn == NULL) {
-        struct stat buf;
-        if (stat("/tmp", &buf) == 0)
-            coded_fn = strdup("/tmp/test.264");
-        else if (stat("/sdcard", &buf) == 0)
-            coded_fn = strdup("/sdcard/test.264");
-        else
-            coded_fn = strdup("./test.264");
-    }
-    
-
-    frame_width_mbaligned = (frame_width + 15) & (~15);
-    frame_height_mbaligned = (frame_height + 15) & (~15);
-    if (frame_width != frame_width_mbaligned ||
-        frame_height != frame_height_mbaligned) {
-        printf("Source frame is %dx%d and will code clip to %dx%d with crop\n",
-               frame_width, frame_height,
-               frame_width_mbaligned, frame_height_mbaligned
-               );
-    }
-    
-    return 0;
-}
-#endif
-
-static Display *x11_display;
-static Window   x11_window;
-
-VADisplay
-va_open_display(void)
+VADisplay H264EncoderImpl::va_open_display(void)
 {
     x11_display = XOpenDisplay(NULL);
     if (!x11_display) {
@@ -908,8 +841,7 @@ va_open_display(void)
     return vaGetDisplay(x11_display);
 }
 
-void
-va_close_display(VADisplay va_dpy)
+void H264EncoderImpl::va_close_display(VADisplay va_dpy)
 {
     if (!x11_display)
         return;
@@ -923,7 +855,7 @@ va_close_display(VADisplay va_dpy)
     x11_display = NULL;
 }
 
-static int init_va(void)
+int H264EncoderImpl::init_va()
 {
     VAProfile profile_list[]={VAProfileH264High, VAProfileH264Main, VAProfileH264Baseline, VAProfileH264ConstrainedBaseline};
     VAEntrypoint *entrypoints;
@@ -1073,7 +1005,7 @@ static int init_va(void)
     return 0;
 }
 
-static int setup_encode()
+int H264EncoderImpl::setup_encode()
 {
     VAStatus va_status;
     VASurfaceID *tmp_surfaceid;
@@ -1142,71 +1074,18 @@ static int setup_encode()
     return 0;
 }
 
-
-
-#define partition(ref, field, key, ascending)   \
-    while (i <= j) {                            \
-        if (ascending) {                        \
-            while (ref[i].field < key)          \
-                i++;                            \
-            while (ref[j].field > key)          \
-                j--;                            \
-        } else {                                \
-            while (ref[i].field > key)          \
-                i++;                            \
-            while (ref[j].field < key)          \
-                j--;                            \
-        }                                       \
-        if (i <= j) {                           \
-            tmp = ref[i];                       \
-            ref[i] = ref[j];                    \
-            ref[j] = tmp;                       \
-            i++;                                \
-            j--;                                \
-        }                                       \
-    }                                           \
-
-static void sort_one(VAPictureH264 ref[], int left, int right,
-                     int ascending, int frame_idx)
-{
-    int i = left, j = right;
-    unsigned int key;
-    VAPictureH264 tmp;
-
-    if (frame_idx) {
-        key = ref[(left + right) / 2].frame_idx;
-        partition(ref, frame_idx, key, ascending);
-    } else {
-        key = ref[(left + right) / 2].TopFieldOrderCnt;
-        partition(ref, TopFieldOrderCnt, (signed int)key, ascending);
-    }
-    
-    /* recursion */
-    if (left < j)
-        sort_one(ref, left, j, ascending, frame_idx);
-    
-    if (i < right)
-        sort_one(ref, i, right, ascending, frame_idx);
-}
-
-static void sort_two(VAPictureH264 ref[], int left, int right, unsigned int key, unsigned int frame_idx,
-                     int partition_ascending, int list0_ascending, int list1_ascending)
+// Given a list like 1 9 3 0 2 8 4 and a pivot element 3, will produce
+//
+//   2 1 0 [3] 4 8 9
+template<class T, class C>
+static void sort_two(T *begin, T *end, const T &pivot, const C &less_than)
 {
-    int i = left, j = right;
-    VAPictureH264 tmp;
-
-    if (frame_idx) {
-        partition(ref, frame_idx, key, partition_ascending);
-    } else {
-        partition(ref, TopFieldOrderCnt, (signed int)key, partition_ascending);
-    }
-    
-
-    sort_one(ref, left, i-1, list0_ascending, frame_idx);
-    sort_one(ref, j+1, right, list1_ascending, frame_idx);
+       T *middle = partition(begin, end, [&](const T &elem) { return less_than(elem, pivot); });
+       sort(begin, middle, [&](const T &a, const T &b) { return less_than(b, a); });
+       sort(middle, end, less_than);
 }
 
-static void update_ReferenceFrames(int frame_type)
+void H264EncoderImpl::update_ReferenceFrames(int frame_type)
 {
     int i;
     
@@ -1227,34 +1106,38 @@ static void update_ReferenceFrames(int frame_type)
 }
 
 
-static int update_RefPicList(int frame_type)
+int H264EncoderImpl::update_RefPicList(int frame_type)
 {
-    unsigned int current_poc = CurrentCurrPic.TopFieldOrderCnt;
+    const auto descending_by_frame_idx = [](const VAPictureH264 &a, const VAPictureH264 &b) {
+        return a.frame_idx > b.frame_idx;
+    };
+    const auto ascending_by_top_field_order_cnt = [](const VAPictureH264 &a, const VAPictureH264 &b) {
+        return a.TopFieldOrderCnt < b.TopFieldOrderCnt;
+    };
+    const auto descending_by_top_field_order_cnt = [](const VAPictureH264 &a, const VAPictureH264 &b) {
+        return a.TopFieldOrderCnt > b.TopFieldOrderCnt;
+    };
     
     if (frame_type == FRAME_P) {
         memcpy(RefPicList0_P, ReferenceFrames, numShortTerm * sizeof(VAPictureH264));
-        sort_one(RefPicList0_P, 0, numShortTerm-1, 0, 1);
-    }
-    
-    if (frame_type == FRAME_B) {
+        sort(&RefPicList0_P[0], &RefPicList0_P[numShortTerm], descending_by_frame_idx);
+    } else if (frame_type == FRAME_B) {
         memcpy(RefPicList0_B, ReferenceFrames, numShortTerm * sizeof(VAPictureH264));
-        sort_two(RefPicList0_B, 0, numShortTerm-1, current_poc, 0,
-                 1, 0, 1);
+        sort_two(&RefPicList0_B[0], &RefPicList0_B[numShortTerm], CurrentCurrPic, ascending_by_top_field_order_cnt);
 
         memcpy(RefPicList1_B, ReferenceFrames, numShortTerm * sizeof(VAPictureH264));
-        sort_two(RefPicList1_B, 0, numShortTerm-1, current_poc, 0,
-                 0, 1, 0);
+        sort_two(&RefPicList1_B[0], &RefPicList1_B[numShortTerm], CurrentCurrPic, descending_by_top_field_order_cnt);
     }
     
     return 0;
 }
 
 
-static int render_sequence(void)
+int H264EncoderImpl::render_sequence()
 {
-    VABufferID seq_param_buf, rc_param_buf, misc_param_tmpbuf, render_id[2];
+    VABufferID seq_param_buf, rc_param_buf, render_id[2];
     VAStatus va_status;
-    VAEncMiscParameterBuffer *misc_param, *misc_param_tmp;
+    VAEncMiscParameterBuffer *misc_param;
     VAEncMiscParameterRateControl *misc_rate_ctrl;
     
     seq_param.level_idc = 41 /*SH_LEVEL_3*/;
@@ -1312,20 +1195,6 @@ static int render_sequence(void)
     render_id[1] = rc_param_buf;
     
     render_picture_and_delete(va_dpy, context_id, &render_id[0], 2);
-
-    if (misc_priv_type != 0) {
-        va_status = vaCreateBuffer(va_dpy, context_id,
-                                   VAEncMiscParameterBufferType,
-                                   sizeof(VAEncMiscParameterBuffer),
-                                   1, NULL, &misc_param_tmpbuf);
-        CHECK_VASTATUS(va_status, "vaCreateBuffer");
-        vaMapBuffer(va_dpy, misc_param_tmpbuf, (void **)&misc_param_tmp);
-        misc_param_tmp->type = (VAEncMiscParameterType)misc_priv_type;
-        misc_param_tmp->data[0] = misc_priv_value;
-        vaUnmapBuffer(va_dpy, misc_param_tmpbuf);
-    
-        render_picture_and_delete(va_dpy, context_id, &misc_param_tmpbuf, 1);
-    }
     
     return 0;
 }
@@ -1362,7 +1231,7 @@ static int calc_poc(int pic_order_cnt_lsb, int frame_type)
     return TopFieldOrderCnt;
 }
 
-static int render_picture(int frame_type, int display_frame_num, int gop_start_display_frame_num)
+int H264EncoderImpl::render_picture(int frame_type, int display_frame_num, int gop_start_display_frame_num)
 {
     VABufferID pic_param_buf;
     VAStatus va_status;
@@ -1399,7 +1268,7 @@ static int render_picture(int frame_type, int display_frame_num, int gop_start_d
     return 0;
 }
 
-static int render_packedsequence(void)
+int H264EncoderImpl::render_packedsequence()
 {
     VAEncPackedHeaderParameterBuffer packedheader_param_buffer;
     VABufferID packedseq_para_bufid, packedseq_data_bufid, render_id[2];
@@ -1437,7 +1306,7 @@ static int render_packedsequence(void)
 }
 
 
-static int render_packedpicture(void)
+int H264EncoderImpl::render_packedpicture()
 {
     VAEncPackedHeaderParameterBuffer packedheader_param_buffer;
     VABufferID packedpic_para_bufid, packedpic_data_bufid, render_id[2];
@@ -1473,7 +1342,7 @@ static int render_packedpicture(void)
     return 0;
 }
 
-static void render_packedslice()
+void H264EncoderImpl::render_packedslice()
 {
     VAEncPackedHeaderParameterBuffer packedheader_param_buffer;
     VABufferID packedslice_para_bufid, packedslice_data_bufid, render_id[2];
@@ -1507,7 +1376,7 @@ static void render_packedslice()
     free(packedslice_buffer);
 }
 
-static int render_slice(int encoding_frame_num, int display_frame_num, int gop_start_display_frame_num, int frame_type)
+int H264EncoderImpl::render_slice(int encoding_frame_num, int display_frame_num, int gop_start_display_frame_num, int frame_type)
 {
     VABufferID slice_param_buf;
     VAStatus va_status;
@@ -1568,23 +1437,20 @@ static int render_slice(int encoding_frame_num, int display_frame_num, int gop_s
 
 
 
-void H264Encoder::save_codeddata(storage_task task)
+void H264EncoderImpl::save_codeddata(storage_task task)
 {    
     VACodedBufferSegment *buf_list = NULL;
     VAStatus va_status;
-    unsigned int coded_size = 0;
 
     string data;
 
-    const int64_t global_delay = (ip_period - 1) * (TIMEBASE / MAX_FPS);  // So we never get negative dts.
+    const int64_t global_delay = int64_t(ip_period - 1) * (TIMEBASE / MAX_FPS);  // So we never get negative dts.
 
     va_status = vaMapBuffer(va_dpy, gl_surfaces[task.display_order % SURFACE_NUM].coded_buf, (void **)(&buf_list));
     CHECK_VASTATUS(va_status, "vaMapBuffer");
     while (buf_list != NULL) {
         data.append(reinterpret_cast<const char *>(buf_list->buf), buf_list->size);
         buf_list = (VACodedBufferSegment *) buf_list->next;
-
-        frame_size += coded_size;
     }
     vaUnmapBuffer(va_dpy, gl_surfaces[task.display_order % SURFACE_NUM].coded_buf);
 
@@ -1610,8 +1476,8 @@ void H264Encoder::save_codeddata(storage_task task)
         vector<float> audio;
         {
              unique_lock<mutex> lock(frame_queue_mutex);
-             frame_queue_nonempty.wait(lock, [this]{ return copy_thread_should_quit || !pending_audio_frames.empty(); });
-             if (copy_thread_should_quit) return;
+             frame_queue_nonempty.wait(lock, [this]{ return storage_thread_should_quit || !pending_audio_frames.empty(); });
+             if (storage_thread_should_quit && pending_audio_frames.empty()) return;
              auto it = pending_audio_frames.begin();
              if (it->first > task.pts) break;
              audio_pts = it->first;
@@ -1673,13 +1539,12 @@ void H264Encoder::save_codeddata(storage_task task)
             break;
     }
     printf("%08lld", encode_order);
-    printf("(%06d bytes coded)", coded_size);
 #endif
 }
 
 
 // this is weird. but it seems to put a new frame onto the queue
-void H264Encoder::storage_task_enqueue(storage_task task)
+void H264EncoderImpl::storage_task_enqueue(storage_task task)
 {
        unique_lock<mutex> lock(storage_task_queue_mutex);
        storage_task_queue.push(move(task));
@@ -1687,7 +1552,7 @@ void H264Encoder::storage_task_enqueue(storage_task task)
        storage_task_queue_changed.notify_all();
 }
 
-void H264Encoder::storage_task_thread()
+void H264EncoderImpl::storage_task_thread()
 {
        for ( ;; ) {
                storage_task current;
@@ -1695,7 +1560,7 @@ void H264Encoder::storage_task_thread()
                        // wait until there's an encoded frame  
                        unique_lock<mutex> lock(storage_task_queue_mutex);
                        storage_task_queue_changed.wait(lock, [this]{ return storage_thread_should_quit || !storage_task_queue.empty(); });
-                       if (storage_thread_should_quit) return;
+                       if (storage_thread_should_quit && storage_task_queue.empty()) return;
                        current = move(storage_task_queue.front());
                        storage_task_queue.pop();
                }
@@ -1715,7 +1580,7 @@ void H264Encoder::storage_task_thread()
        }
 }
 
-static int release_encode()
+int H264EncoderImpl::release_encode()
 {
     int i;
     
@@ -1731,7 +1596,7 @@ static int release_encode()
     return 0;
 }
 
-static int deinit_va()
+int H264EncoderImpl::deinit_va()
 { 
     vaTerminate(va_dpy);
 
@@ -1741,7 +1606,7 @@ static int deinit_va()
 }
 
 
-H264Encoder::H264Encoder(QSurface *surface, int width, int height, HTTPD *httpd)
+H264EncoderImpl::H264EncoderImpl(QSurface *surface, int width, int height, HTTPD *httpd)
        : current_storage_frame(0), surface(surface), httpd(httpd)
 {
        AVCodec *codec_audio = avcodec_find_encoder(AUDIO_OUTPUT_CODEC);
@@ -1761,7 +1626,6 @@ H264Encoder::H264Encoder(QSurface *surface, int width, int height, HTTPD *httpd)
        frame_height = height;
        frame_width_mbaligned = (frame_width + 15) & (~15);
        frame_height_mbaligned = (frame_height + 15) & (~15);
-        frame_bitrate = 15000000;  // / 60;
 
        //print_input();
 
@@ -1775,9 +1639,9 @@ H264Encoder::H264Encoder(QSurface *surface, int width, int height, HTTPD *httpd)
        memset(&pic_param, 0, sizeof(pic_param));
        memset(&slice_param, 0, sizeof(slice_param));
 
-       storage_thread = thread(&H264Encoder::storage_task_thread, this);
+       storage_thread = thread(&H264EncoderImpl::storage_task_thread, this);
 
-       copy_thread = thread([this]{
+       encode_thread = thread([this]{
                //SDL_GL_MakeCurrent(window, context);
                QOpenGLContext *context = create_context(this->surface);
                eglBindAPI(EGL_OPENGL_API);
@@ -1786,31 +1650,18 @@ H264Encoder::H264Encoder(QSurface *surface, int width, int height, HTTPD *httpd)
                                eglGetError());
                        exit(1);
                }
-               copy_thread_func();
+               encode_thread_func();
        });
 }
 
-H264Encoder::~H264Encoder()
+H264EncoderImpl::~H264EncoderImpl()
 {
-       {
-               unique_lock<mutex> lock(storage_task_queue_mutex);
-               storage_thread_should_quit = true;
-               storage_task_queue_changed.notify_all();
-       }
-       {
-               unique_lock<mutex> lock(frame_queue_mutex);
-               copy_thread_should_quit = true;
-               frame_queue_nonempty.notify_all();
-       }
-       storage_thread.join();
-       copy_thread.join();
-
-       release_encode();
-       deinit_va();
+       shutdown();
 }
 
-bool H264Encoder::begin_frame(GLuint *y_tex, GLuint *cbcr_tex)
+bool H264EncoderImpl::begin_frame(GLuint *y_tex, GLuint *cbcr_tex)
 {
+       assert(!is_shutdown);
        {
                // Wait until this frame slot is done encoding.
                unique_lock<mutex> lock(storage_task_queue_mutex);
@@ -1873,8 +1724,9 @@ bool H264Encoder::begin_frame(GLuint *y_tex, GLuint *cbcr_tex)
        return true;
 }
 
-void H264Encoder::add_audio(int64_t pts, vector<float> audio)
+void H264EncoderImpl::add_audio(int64_t pts, vector<float> audio)
 {
+       assert(!is_shutdown);
        {
                unique_lock<mutex> lock(frame_queue_mutex);
                pending_audio_frames[pts] = move(audio);
@@ -1882,8 +1734,9 @@ void H264Encoder::add_audio(int64_t pts, vector<float> audio)
        frame_queue_nonempty.notify_all();
 }
 
-void H264Encoder::end_frame(RefCountedGLsync fence, int64_t pts, const vector<RefCountedFrame> &input_frames)
+void H264EncoderImpl::end_frame(RefCountedGLsync fence, int64_t pts, const vector<RefCountedFrame> &input_frames)
 {
+       assert(!is_shutdown);
        {
                unique_lock<mutex> lock(frame_queue_mutex);
                pending_video_frames[current_storage_frame] = PendingFrame{ fence, input_frames, pts };
@@ -1892,7 +1745,32 @@ void H264Encoder::end_frame(RefCountedGLsync fence, int64_t pts, const vector<Re
        frame_queue_nonempty.notify_all();
 }
 
-void H264Encoder::copy_thread_func()
+void H264EncoderImpl::shutdown()
+{
+       if (is_shutdown) {
+               return;
+       }
+
+       {
+               unique_lock<mutex> lock(frame_queue_mutex);
+               encode_thread_should_quit = true;
+               frame_queue_nonempty.notify_all();
+       }
+       encode_thread.join();
+       {
+               unique_lock<mutex> lock(storage_task_queue_mutex);
+               storage_thread_should_quit = true;
+               frame_queue_nonempty.notify_all();
+               storage_task_queue_changed.notify_all();
+       }
+       storage_thread.join();
+
+       release_encode();
+       deinit_va();
+       is_shutdown = true;
+}
+
+void H264EncoderImpl::encode_thread_func()
 {
        int64_t last_dts = -1;
        int gop_start_display_frame_num = 0;
@@ -1911,9 +1789,15 @@ void H264Encoder::copy_thread_func()
                {
                        unique_lock<mutex> lock(frame_queue_mutex);
                        frame_queue_nonempty.wait(lock, [this, display_frame_num]{
-                               return copy_thread_should_quit || pending_video_frames.count(display_frame_num) != 0;
+                               return encode_thread_should_quit || pending_video_frames.count(display_frame_num) != 0;
                        });
-                       if (copy_thread_should_quit) {
+                       if (encode_thread_should_quit && pending_video_frames.count(display_frame_num) == 0) {
+                               // We have queued frames that were supposed to be B-frames,
+                               // but will be no P-frame to encode them against. Encode them all
+                               // as P-frames instead. Note that this happens under the mutex,
+                               // but nobody else uses it at this point, since we're shutting down,
+                               // so there's no contention.
+                               encode_remaining_frames_as_p(encoding_frame_num, gop_start_display_frame_num, last_dts);
                                return;
                        } else {
                                frame = move(pending_video_frames[display_frame_num]);
@@ -1935,8 +1819,25 @@ void H264Encoder::copy_thread_func()
        }
 }
 
-void H264Encoder::encode_frame(H264Encoder::PendingFrame frame, int encoding_frame_num, int display_frame_num, int gop_start_display_frame_num,
-                               int frame_type, int64_t pts, int64_t dts)
+void H264EncoderImpl::encode_remaining_frames_as_p(int encoding_frame_num, int gop_start_display_frame_num, int64_t last_dts)
+{
+       if (pending_video_frames.empty()) {
+               return;
+       }
+
+       for (auto &pending_frame : pending_video_frames) {
+               int display_frame_num = pending_frame.first;
+               assert(display_frame_num > 0);
+               PendingFrame frame = move(pending_frame.second);
+               int64_t dts = last_dts + (TIMEBASE / MAX_FPS);
+               printf("Finalizing encode: Encoding leftover frame %d as P-frame instead of B-frame.\n", display_frame_num);
+               encode_frame(frame, encoding_frame_num++, display_frame_num, gop_start_display_frame_num, FRAME_P, frame.pts, dts);
+               last_dts = dts;
+       }
+}
+
+void H264EncoderImpl::encode_frame(H264EncoderImpl::PendingFrame frame, int encoding_frame_num, int display_frame_num, int gop_start_display_frame_num,
+                                   int frame_type, int64_t pts, int64_t dts)
 {
        // Wait for the GPU to be done with the frame.
        glClientWaitSync(frame.fence.get(), 0, 0);
@@ -1986,3 +1887,32 @@ void H264Encoder::encode_frame(H264Encoder::PendingFrame frame, int encoding_fra
 
        update_ReferenceFrames(frame_type);
 }
+
+// Proxy object.
+H264Encoder::H264Encoder(QSurface *surface, int width, int height, HTTPD *httpd)
+       : impl(new H264EncoderImpl(surface, width, height, httpd)) {}
+
+// Must be defined here because unique_ptr<> destructor needs to know the impl.
+H264Encoder::~H264Encoder() {}
+
+void H264Encoder::add_audio(int64_t pts, vector<float> audio)
+{
+       impl->add_audio(pts, audio);
+}
+
+bool H264Encoder::begin_frame(GLuint *y_tex, GLuint *cbcr_tex)
+{
+       return impl->begin_frame(y_tex, cbcr_tex);
+}
+
+void H264Encoder::end_frame(RefCountedGLsync fence, int64_t pts, const vector<RefCountedFrame> &input_frames)
+{
+       impl->end_frame(fence, pts, input_frames);
+}
+
+void H264Encoder::shutdown()
+{
+       impl->shutdown();
+}
+
+// Real class.