X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=nageru%2Fquicksync_encoder.cpp;h=0d0fbec7d23f42155c2ae5165148976dbd745644;hb=e35786415c00652b3436dab5174c2504d314a219;hp=75b13b899a0cee05e109ef5b53764f82ebeb767b;hpb=5d6ba6daaffcddb3aaf1e3c39f80ab4447c0e074;p=nageru

diff --git a/nageru/quicksync_encoder.cpp b/nageru/quicksync_encoder.cpp
index 75b13b8..0d0fbec 100644
--- a/nageru/quicksync_encoder.cpp
+++ b/nageru/quicksync_encoder.cpp
@@ -592,51 +592,6 @@ int QuickSyncEncoderImpl::build_packed_slice_buffer(unsigned char **header_buffe
                                            {IDR(PBB)(PBB)}.
 */
 
-// General pts/dts strategy:
-//
-// Getting pts and dts right with variable frame rate (VFR) and B-frames can be a
-// bit tricky. We assume first of all that the frame rate never goes _above_
-// MAX_FPS, which gives us a frame period N. The decoder can always decode
-// in at least this speed, as long at dts <= pts (the frame is not attempted
-// presented before it is decoded). Furthermore, we never have longer chains of
-// B-frames than a fixed constant C. (In a B-frame chain, we say that the base
-// I/P-frame has order O=0, the B-frame depending on it directly has order O=1,
-// etc. The last frame in the chain, which no B-frames depend on, is the âtipâ
-// frame, with an order O <= C.)
-//
-// Many strategies are possible, but we establish these rules:
-//
-//  - Tip frames have dts = pts - (C-O)*N.
-//  - Non-tip frames have dts = dts_last + N.
-//
-// An example, with C=2 and N=10 and the data flow showed with arrows:
-//
-//        I  B  P  B  B  P
-//   pts: 30 40 50 60 70 80
-//        â  â     â
-//   dts: 10 30 20 60 50â40
-//         |  |  â        â
-//         `--|--'        |
-//             `----------'
-//
-// To show that this works fine also with irregular spacings, let's say that
-// the third frame is delayed a bit (something earlier was dropped). Now the
-// situation looks like this:
-//
-//        I  B  P  B  B   P
-//   pts: 30 40 80 90 100 110
-//        â  â     â
-//   dts: 10 30 20 90 50â40
-//         |  |  â        â
-//         `--|--'        |
-//             `----------'
-//
-// The resetting on every tip frame makes sure dts never ends up lagging a lot
-// behind pts, and the subtraction of (C-O)*N makes sure pts <= dts.
-//
-// In the output of this function, if <dts_lag> is >= 0, it means to reset the
-// dts from the current pts minus <dts_lag>, while if it's -1, the frame is not
-// a tip frame and should be given a dts based on the previous one.
 #define FRAME_P 0
 #define FRAME_B 1
 #define FRAME_I 2
@@ -645,12 +600,10 @@ void encoding2display_order(
     int encoding_order, int intra_period,
     int intra_idr_period, int ip_period,
     int *displaying_order,
-    int *frame_type, int *pts_lag)
+    int *frame_type)
 {
     int encoding_order_gop = 0;
 
-    *pts_lag = 0;
-
     if (intra_period == 1) { /* all are I/IDR frames */
         *displaying_order = encoding_order;
         if (intra_idr_period == 0)
@@ -682,20 +635,13 @@ void encoding2display_order(
 
     // We have B-frames. Sequence is like IDR (PBB)(PBB)(IBB)(PBB).
     encoding_order_gop = (intra_idr_period == 0) ? encoding_order : (encoding_order % (intra_idr_period + 1));
-    *pts_lag = -1;  // Most frames are not tip frames.
          
     if (encoding_order_gop == 0) { /* the first frame */
         *frame_type = FRAME_IDR;
         *displaying_order = encoding_order;
-        // IDR frames are a special case; I honestly can't find the logic behind
-        // why this is the right thing, but it seems to line up nicely in practice :-)
-        *pts_lag = TIMEBASE / MAX_FPS;
     } else if (((encoding_order_gop - 1) % ip_period) != 0) { /* B frames */
         *frame_type = FRAME_B;
         *displaying_order = encoding_order - 1;
-        if ((encoding_order_gop % ip_period) == 0) {
-            *pts_lag = 0;  // Last B-frame.
-        }
     } else if (intra_period != 0 && /* have I frames */
                encoding_order_gop >= 2 &&
                ((encoding_order_gop - 1) / ip_period % (intra_period / ip_period)) == 0) {
@@ -707,6 +653,72 @@ void encoding2display_order(
     }
 }
 
+// General pts/dts strategy:
+//
+// Getting pts and dts right with variable frame rate (VFR) and B-frames can be a
+// bit tricky. This strategy roughly matches what x264 seems to do: We take in
+// the pts as the frames are encoded, and reuse that as dts in the same order,
+// slightly offset.
+//
+// If we don't have B-frames (only I and P), this means pts == dts always.
+// This is the simple case. Now consider the case with a single B-frame:
+//
+//        I  B  P  B  P
+//   pts: 30 40 50 60 70
+//
+// Since we always inherently encode P-frames before B-frames, this means that
+// we see them in this order, which we can _almost_ use for dts:
+//
+//   dts: 30 50 40 70 60
+//
+// the only problem here is that for the B-frames, pts < dts. We solve this by
+// priming the queue at the very start with some made-up dts:
+//
+//        I  B  P  B  P
+//   pts: 30 40 50 60 70
+//   dts: xx 30 50 40 70 60
+//
+// Now we have all the desirable properties: pts >= dts, successive dts delta
+// is never larger than the decoder can figure out (assuming, of course,
+// the pts has that property), and there's minimal lag between pts and dts.
+// For the made-up dts, we assume 1/60 sec per frame, which should generally
+// be reasonable. dts can go negative, but this is corrected using global_delay()
+// by delaying both pts and dts (although we probably don't need to).
+//
+// If there's more than one B-frame possible, we simply insert more of them
+// (here shown with some irregular spacing, assuming B-frames don't depend
+// on each other and simply go back-to-front):
+//
+//        I  B  B  B  P  B  B  B  P
+//   pts: 30 40 55 60 65 66 67 68 80
+//   dts: xx yy zz 30 65 60 55 40 80 68 67 66
+class DTSReorderer {
+public:
+	DTSReorderer(int num_b_frames) : num_b_frames(num_b_frames) {}
+
+	void push_pts(int64_t pts)
+	{
+		if (buf.empty() && num_b_frames > 0) {  // First frame.
+			int64_t base_dts = pts - num_b_frames * (TIMEBASE / TYPICAL_FPS);
+			for (int i = 0; i < num_b_frames; ++i) {
+				buf.push(base_dts + i * (TIMEBASE / TYPICAL_FPS));
+			}
+		}
+		buf.push(pts);
+	}
+
+	int64_t pop_dts()
+	{
+		assert(!buf.empty());
+		int64_t dts = buf.front();
+		buf.pop();
+		return dts;
+	}
+
+private:
+	const int num_b_frames;
+	queue<int64_t> buf;
+};
 
 void QuickSyncEncoderImpl::enable_zerocopy_if_possible()
 {
@@ -1340,7 +1352,7 @@ void QuickSyncEncoderImpl::save_codeddata(GLSurface *surf, storage_task task)
 		}
 		if (!global_flags.x264_video_to_http &&
 		    !global_flags.av1_video_to_http) {
-			stream_mux->add_packet(pkt, task.pts + global_delay(), task.dts + global_delay());
+			http_mux->add_packet(pkt, task.pts + global_delay(), task.dts + global_delay());
 		}
 	}
 }
@@ -1757,6 +1769,8 @@ void QuickSyncEncoderImpl::encode_thread_func()
 {
 	pthread_setname_np(pthread_self(), "QS_Encode");
 
+	DTSReorderer dts_reorder_buf(ip_period - 1);
+
 	int64_t last_dts = -1;
 	int gop_start_display_frame_num = 0;
 	for (int display_frame_num = 0; ; ++display_frame_num) {
@@ -1783,6 +1797,8 @@ void QuickSyncEncoderImpl::encode_thread_func()
 			}
 		}
 
+		dts_reorder_buf.push_pts(frame.pts);
+
 		// Pass the frame on to x264 (or uncompressed to HTTP) as needed.
 		// Note that this implicitly waits for the frame to be done rendering.
 		pass_frame(frame, display_frame_num, frame.pts, frame.duration);
@@ -1798,10 +1814,9 @@ void QuickSyncEncoderImpl::encode_thread_func()
 		// Now encode as many QuickSync frames as we can using the frames we have available.
 		// (It could be zero, or it could be multiple.) FIXME: make a function.
 		for ( ;; ) {
-			int pts_lag;
 			int frame_type, quicksync_display_frame_num;
 			encoding2display_order(quicksync_encoding_frame_num, intra_period, intra_idr_period, ip_period,
-			                       &quicksync_display_frame_num, &frame_type, &pts_lag);
+			                       &quicksync_display_frame_num, &frame_type);
 			if (!reorder_buffer.count(quicksync_display_frame_num)) {
 				break;
 			}
@@ -1821,14 +1836,7 @@ void QuickSyncEncoderImpl::encode_thread_func()
 				gop_start_display_frame_num = quicksync_display_frame_num;
 			}
 
-			// Determine the dts of this frame.
-			int64_t dts;
-			if (pts_lag == -1) {
-				assert(last_dts != -1);
-				dts = last_dts + (TIMEBASE / MAX_FPS);
-			} else {
-				dts = frame.pts - pts_lag;
-			}
+			const int64_t dts = dts_reorder_buf.pop_dts();
 			last_dts = dts;
 
 			encode_frame(frame, quicksync_encoding_frame_num, quicksync_display_frame_num, gop_start_display_frame_num, frame_type, frame.pts, dts, frame.duration, frame.ycbcr_coefficients);
@@ -1847,7 +1855,7 @@ void QuickSyncEncoderImpl::encode_remaining_frames_as_p(int encoding_frame_num,
 		int display_frame_num = pending_frame.first;
 		assert(display_frame_num > 0);
 		PendingFrame frame = move(pending_frame.second);
-		int64_t dts = last_dts + (TIMEBASE / MAX_FPS);
+		int64_t dts = last_dts + (TIMEBASE / TYPICAL_FPS);
 		printf("Finalizing encode: Encoding leftover frame %d as P-frame instead of B-frame.\n", display_frame_num);
 		encode_frame(frame, encoding_frame_num++, display_frame_num, gop_start_display_frame_num, FRAME_P, frame.pts, dts, frame.duration, frame.ycbcr_coefficients);
 		last_dts = dts;
@@ -2037,9 +2045,9 @@ void QuickSyncEncoder::close_file()
 	impl->shutdown();
 }
 
-void QuickSyncEncoder::set_stream_mux(Mux *mux)
+void QuickSyncEncoder::set_http_mux(Mux *mux)
 {
-	impl->set_stream_mux(mux);
+	impl->set_http_mux(mux);
 }
 
 int64_t QuickSyncEncoder::global_delay() const {