X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=nageru%2Fquicksync_encoder.cpp;h=0d0fbec7d23f42155c2ae5165148976dbd745644;hb=e35786415c00652b3436dab5174c2504d314a219;hp=75b13b899a0cee05e109ef5b53764f82ebeb767b;hpb=5d6ba6daaffcddb3aaf1e3c39f80ab4447c0e074;p=nageru diff --git a/nageru/quicksync_encoder.cpp b/nageru/quicksync_encoder.cpp index 75b13b8..0d0fbec 100644 --- a/nageru/quicksync_encoder.cpp +++ b/nageru/quicksync_encoder.cpp @@ -592,51 +592,6 @@ int QuickSyncEncoderImpl::build_packed_slice_buffer(unsigned char **header_buffe {IDR(PBB)(PBB)}. */ -// General pts/dts strategy: -// -// Getting pts and dts right with variable frame rate (VFR) and B-frames can be a -// bit tricky. We assume first of all that the frame rate never goes _above_ -// MAX_FPS, which gives us a frame period N. The decoder can always decode -// in at least this speed, as long at dts <= pts (the frame is not attempted -// presented before it is decoded). Furthermore, we never have longer chains of -// B-frames than a fixed constant C. (In a B-frame chain, we say that the base -// I/P-frame has order O=0, the B-frame depending on it directly has order O=1, -// etc. The last frame in the chain, which no B-frames depend on, is the “tip” -// frame, with an order O <= C.) -// -// Many strategies are possible, but we establish these rules: -// -// - Tip frames have dts = pts - (C-O)*N. -// - Non-tip frames have dts = dts_last + N. -// -// An example, with C=2 and N=10 and the data flow showed with arrows: -// -// I B P B B P -// pts: 30 40 50 60 70 80 -// ↓ ↓ ↓ -// dts: 10 30 20 60 50←40 -// | | ↑ ↑ -// `--|--' | -// `----------' -// -// To show that this works fine also with irregular spacings, let's say that -// the third frame is delayed a bit (something earlier was dropped). Now the -// situation looks like this: -// -// I B P B B P -// pts: 30 40 80 90 100 110 -// ↓ ↓ ↓ -// dts: 10 30 20 90 50←40 -// | | ↑ ↑ -// `--|--' | -// `----------' -// -// The resetting on every tip frame makes sure dts never ends up lagging a lot -// behind pts, and the subtraction of (C-O)*N makes sure pts <= dts. -// -// In the output of this function, if is >= 0, it means to reset the -// dts from the current pts minus , while if it's -1, the frame is not -// a tip frame and should be given a dts based on the previous one. #define FRAME_P 0 #define FRAME_B 1 #define FRAME_I 2 @@ -645,12 +600,10 @@ void encoding2display_order( int encoding_order, int intra_period, int intra_idr_period, int ip_period, int *displaying_order, - int *frame_type, int *pts_lag) + int *frame_type) { int encoding_order_gop = 0; - *pts_lag = 0; - if (intra_period == 1) { /* all are I/IDR frames */ *displaying_order = encoding_order; if (intra_idr_period == 0) @@ -682,20 +635,13 @@ void encoding2display_order( // We have B-frames. Sequence is like IDR (PBB)(PBB)(IBB)(PBB). encoding_order_gop = (intra_idr_period == 0) ? encoding_order : (encoding_order % (intra_idr_period + 1)); - *pts_lag = -1; // Most frames are not tip frames. if (encoding_order_gop == 0) { /* the first frame */ *frame_type = FRAME_IDR; *displaying_order = encoding_order; - // IDR frames are a special case; I honestly can't find the logic behind - // why this is the right thing, but it seems to line up nicely in practice :-) - *pts_lag = TIMEBASE / MAX_FPS; } else if (((encoding_order_gop - 1) % ip_period) != 0) { /* B frames */ *frame_type = FRAME_B; *displaying_order = encoding_order - 1; - if ((encoding_order_gop % ip_period) == 0) { - *pts_lag = 0; // Last B-frame. - } } else if (intra_period != 0 && /* have I frames */ encoding_order_gop >= 2 && ((encoding_order_gop - 1) / ip_period % (intra_period / ip_period)) == 0) { @@ -707,6 +653,72 @@ void encoding2display_order( } } +// General pts/dts strategy: +// +// Getting pts and dts right with variable frame rate (VFR) and B-frames can be a +// bit tricky. This strategy roughly matches what x264 seems to do: We take in +// the pts as the frames are encoded, and reuse that as dts in the same order, +// slightly offset. +// +// If we don't have B-frames (only I and P), this means pts == dts always. +// This is the simple case. Now consider the case with a single B-frame: +// +// I B P B P +// pts: 30 40 50 60 70 +// +// Since we always inherently encode P-frames before B-frames, this means that +// we see them in this order, which we can _almost_ use for dts: +// +// dts: 30 50 40 70 60 +// +// the only problem here is that for the B-frames, pts < dts. We solve this by +// priming the queue at the very start with some made-up dts: +// +// I B P B P +// pts: 30 40 50 60 70 +// dts: xx 30 50 40 70 60 +// +// Now we have all the desirable properties: pts >= dts, successive dts delta +// is never larger than the decoder can figure out (assuming, of course, +// the pts has that property), and there's minimal lag between pts and dts. +// For the made-up dts, we assume 1/60 sec per frame, which should generally +// be reasonable. dts can go negative, but this is corrected using global_delay() +// by delaying both pts and dts (although we probably don't need to). +// +// If there's more than one B-frame possible, we simply insert more of them +// (here shown with some irregular spacing, assuming B-frames don't depend +// on each other and simply go back-to-front): +// +// I B B B P B B B P +// pts: 30 40 55 60 65 66 67 68 80 +// dts: xx yy zz 30 65 60 55 40 80 68 67 66 +class DTSReorderer { +public: + DTSReorderer(int num_b_frames) : num_b_frames(num_b_frames) {} + + void push_pts(int64_t pts) + { + if (buf.empty() && num_b_frames > 0) { // First frame. + int64_t base_dts = pts - num_b_frames * (TIMEBASE / TYPICAL_FPS); + for (int i = 0; i < num_b_frames; ++i) { + buf.push(base_dts + i * (TIMEBASE / TYPICAL_FPS)); + } + } + buf.push(pts); + } + + int64_t pop_dts() + { + assert(!buf.empty()); + int64_t dts = buf.front(); + buf.pop(); + return dts; + } + +private: + const int num_b_frames; + queue buf; +}; void QuickSyncEncoderImpl::enable_zerocopy_if_possible() { @@ -1340,7 +1352,7 @@ void QuickSyncEncoderImpl::save_codeddata(GLSurface *surf, storage_task task) } if (!global_flags.x264_video_to_http && !global_flags.av1_video_to_http) { - stream_mux->add_packet(pkt, task.pts + global_delay(), task.dts + global_delay()); + http_mux->add_packet(pkt, task.pts + global_delay(), task.dts + global_delay()); } } } @@ -1757,6 +1769,8 @@ void QuickSyncEncoderImpl::encode_thread_func() { pthread_setname_np(pthread_self(), "QS_Encode"); + DTSReorderer dts_reorder_buf(ip_period - 1); + int64_t last_dts = -1; int gop_start_display_frame_num = 0; for (int display_frame_num = 0; ; ++display_frame_num) { @@ -1783,6 +1797,8 @@ void QuickSyncEncoderImpl::encode_thread_func() } } + dts_reorder_buf.push_pts(frame.pts); + // Pass the frame on to x264 (or uncompressed to HTTP) as needed. // Note that this implicitly waits for the frame to be done rendering. pass_frame(frame, display_frame_num, frame.pts, frame.duration); @@ -1798,10 +1814,9 @@ void QuickSyncEncoderImpl::encode_thread_func() // Now encode as many QuickSync frames as we can using the frames we have available. // (It could be zero, or it could be multiple.) FIXME: make a function. for ( ;; ) { - int pts_lag; int frame_type, quicksync_display_frame_num; encoding2display_order(quicksync_encoding_frame_num, intra_period, intra_idr_period, ip_period, - &quicksync_display_frame_num, &frame_type, &pts_lag); + &quicksync_display_frame_num, &frame_type); if (!reorder_buffer.count(quicksync_display_frame_num)) { break; } @@ -1821,14 +1836,7 @@ void QuickSyncEncoderImpl::encode_thread_func() gop_start_display_frame_num = quicksync_display_frame_num; } - // Determine the dts of this frame. - int64_t dts; - if (pts_lag == -1) { - assert(last_dts != -1); - dts = last_dts + (TIMEBASE / MAX_FPS); - } else { - dts = frame.pts - pts_lag; - } + const int64_t dts = dts_reorder_buf.pop_dts(); last_dts = dts; encode_frame(frame, quicksync_encoding_frame_num, quicksync_display_frame_num, gop_start_display_frame_num, frame_type, frame.pts, dts, frame.duration, frame.ycbcr_coefficients); @@ -1847,7 +1855,7 @@ void QuickSyncEncoderImpl::encode_remaining_frames_as_p(int encoding_frame_num, int display_frame_num = pending_frame.first; assert(display_frame_num > 0); PendingFrame frame = move(pending_frame.second); - int64_t dts = last_dts + (TIMEBASE / MAX_FPS); + int64_t dts = last_dts + (TIMEBASE / TYPICAL_FPS); printf("Finalizing encode: Encoding leftover frame %d as P-frame instead of B-frame.\n", display_frame_num); encode_frame(frame, encoding_frame_num++, display_frame_num, gop_start_display_frame_num, FRAME_P, frame.pts, dts, frame.duration, frame.ycbcr_coefficients); last_dts = dts; @@ -2037,9 +2045,9 @@ void QuickSyncEncoder::close_file() impl->shutdown(); } -void QuickSyncEncoder::set_stream_mux(Mux *mux) +void QuickSyncEncoder::set_http_mux(Mux *mux) { - impl->set_stream_mux(mux); + impl->set_http_mux(mux); } int64_t QuickSyncEncoder::global_delay() const {