From: Steinar H. Gunderson Date: Tue, 19 Apr 2016 21:46:26 +0000 (+0200) Subject: Set duration for all video frames. X-Git-Tag: 1.3.0~68 X-Git-Url: https://git.sesse.net/?p=nageru;a=commitdiff_plain;h=d4ffc0e71f0908d3ecc2e88b8675cfe4e93febe4 Set duration for all video frames. Generally the muxer will ignore these, but there's one specific case where it cannot: Since we flush before keyframes, the mov (MP4) mux will not be able to set a proper duration based on the next frame (usually it just does next_dts - dts), and thus guesses that it will be the same as the previous one. If we dropped a frame between those two, the duration of said last frame will be wrong -- and the keyframe (starting in the next fragment) will get the wrong pts, possibly seemingly even going backwards. If we lose a frame between the last frame and the keyframe, the pts of the keyframe will still be a bit wonky, but according to wbs (who wrote the mux), it's much better like so. --- diff --git a/h264encode.cpp b/h264encode.cpp index 5f4ace9..3380ba4 100644 --- a/h264encode.cpp +++ b/h264encode.cpp @@ -133,14 +133,22 @@ class FrameReorderer { public: FrameReorderer(unsigned queue_length, int width, int height); + struct Frame { + int64_t pts, duration; + uint8_t *data; + + // Invert to get the smallest pts first. + bool operator< (const Frame &other) const { return pts > other.pts; } + }; + // Returns the next frame to insert with its pts, if any. Otherwise -1 and nullptr. // Does _not_ take ownership of data; a copy is taken if needed. // The returned pointer is valid until the next call to reorder_frame, or destruction. // As a special case, if queue_length == 0, will just return pts and data (no reordering needed). - pair reorder_frame(int64_t pts, const uint8_t *data); + Frame reorder_frame(int64_t pts, int64_t duration, uint8_t *data); // The same as reorder_frame, but without inserting anything. Used to empty the queue. - pair get_first_frame(); + Frame get_first_frame(); bool empty() const { return frames.empty(); } @@ -148,7 +156,7 @@ private: unsigned queue_length; int width, height; - priority_queue> frames; + priority_queue frames; stack freelist; // Includes the last value returned from reorder_frame. // Owns all the pointers. Normally, freelist and frames could do this themselves, @@ -165,33 +173,32 @@ FrameReorderer::FrameReorderer(unsigned queue_length, int width, int height) } } -pair FrameReorderer::reorder_frame(int64_t pts, const uint8_t *data) +FrameReorderer::Frame FrameReorderer::reorder_frame(int64_t pts, int64_t duration, uint8_t *data) { if (queue_length == 0) { - return make_pair(pts, data); + return Frame{pts, duration, data}; } assert(!freelist.empty()); uint8_t *storage = freelist.top(); freelist.pop(); memcpy(storage, data, width * height * 2); - frames.emplace(-pts, storage); // Invert pts to get smallest first. + frames.push(Frame{pts, duration, storage}); if (frames.size() >= queue_length) { return get_first_frame(); } else { - return make_pair(-1, nullptr); + return Frame{-1, -1, nullptr}; } } -pair FrameReorderer::get_first_frame() +FrameReorderer::Frame FrameReorderer::get_first_frame() { assert(!frames.empty()); - pair storage = frames.top(); + Frame storage = frames.top(); frames.pop(); - int64_t pts = storage.first; - freelist.push(storage.second); - return make_pair(-pts, storage.second); // Re-invert pts (see reorder_frame()). + freelist.push(storage.data); + return storage; } class H264EncoderImpl : public KeyFrameSignalReceiver { @@ -200,7 +207,7 @@ public: ~H264EncoderImpl(); void add_audio(int64_t pts, vector audio); bool begin_frame(GLuint *y_tex, GLuint *cbcr_tex); - RefCountedGLsync end_frame(int64_t pts, const vector &input_frames); + RefCountedGLsync end_frame(int64_t pts, int64_t duration, const vector &input_frames); void shutdown(); void open_output_file(const std::string &filename); void close_output_file(); @@ -214,12 +221,12 @@ private: unsigned long long display_order; int frame_type; vector audio; - int64_t pts, dts; + int64_t pts, dts, duration; }; struct PendingFrame { RefCountedGLsync fence; vector input_frames; - int64_t pts; + int64_t pts, duration; }; // So we never get negative dts. @@ -229,9 +236,9 @@ private: void encode_thread_func(); void encode_remaining_frames_as_p(int encoding_frame_num, int gop_start_display_frame_num, int64_t last_dts); - void add_packet_for_uncompressed_frame(int64_t pts, const uint8_t *data); + void add_packet_for_uncompressed_frame(int64_t pts, int64_t duration, const uint8_t *data); void encode_frame(PendingFrame frame, int encoding_frame_num, int display_frame_num, int gop_start_display_frame_num, - int frame_type, int64_t pts, int64_t dts); + int frame_type, int64_t pts, int64_t dts, int64_t duration); void storage_task_thread(); void encode_audio(const vector &audio, vector *audio_queue, @@ -1671,7 +1678,7 @@ void H264EncoderImpl::save_codeddata(storage_task task) } else { pkt.flags = 0; } - //pkt.duration = 1; + pkt.duration = task.duration; if (file_mux) { file_mux->add_packet(pkt, task.pts + global_delay(), task.dts + global_delay()); } @@ -2079,7 +2086,7 @@ void H264EncoderImpl::add_audio(int64_t pts, vector audio) frame_queue_nonempty.notify_all(); } -RefCountedGLsync H264EncoderImpl::end_frame(int64_t pts, const vector &input_frames) +RefCountedGLsync H264EncoderImpl::end_frame(int64_t pts, int64_t duration, const vector &input_frames) { assert(!is_shutdown); @@ -2118,7 +2125,7 @@ RefCountedGLsync H264EncoderImpl::end_frame(int64_t pts, const vector lock(frame_queue_mutex); - pending_video_frames[current_storage_frame] = PendingFrame{ fence, input_frames, pts }; + pending_video_frames[current_storage_frame] = PendingFrame{ fence, input_frames, pts, duration }; ++current_storage_frame; } frame_queue_nonempty.notify_all(); @@ -2285,7 +2292,7 @@ void H264EncoderImpl::encode_thread_func() } last_dts = dts; - encode_frame(frame, encoding_frame_num, display_frame_num, gop_start_display_frame_num, frame_type, frame.pts, dts); + encode_frame(frame, encoding_frame_num, display_frame_num, gop_start_display_frame_num, frame_type, frame.pts, dts, frame.duration); } } @@ -2301,7 +2308,7 @@ void H264EncoderImpl::encode_remaining_frames_as_p(int encoding_frame_num, int g PendingFrame frame = move(pending_frame.second); int64_t dts = last_dts + (TIMEBASE / MAX_FPS); printf("Finalizing encode: Encoding leftover frame %d as P-frame instead of B-frame.\n", display_frame_num); - encode_frame(frame, encoding_frame_num++, display_frame_num, gop_start_display_frame_num, FRAME_P, frame.pts, dts); + encode_frame(frame, encoding_frame_num++, display_frame_num, gop_start_display_frame_num, FRAME_P, frame.pts, dts, frame.duration); last_dts = dts; } @@ -2309,12 +2316,12 @@ void H264EncoderImpl::encode_remaining_frames_as_p(int encoding_frame_num, int g global_flags.x264_video_to_http) { // Add frames left in reorderer. while (!reorderer->empty()) { - pair output_frame = reorderer->get_first_frame(); + FrameReorderer::Frame output_frame = reorderer->get_first_frame(); if (global_flags.uncompressed_video_to_http) { - add_packet_for_uncompressed_frame(output_frame.first, output_frame.second); + add_packet_for_uncompressed_frame(output_frame.pts, output_frame.duration, output_frame.data); } else { assert(global_flags.x264_video_to_http); - x264_encoder->add_frame(output_frame.first, output_frame.second); + x264_encoder->add_frame(output_frame.pts, output_frame.duration, output_frame.data); } } } @@ -2346,7 +2353,7 @@ void H264EncoderImpl::encode_remaining_audio() } } -void H264EncoderImpl::add_packet_for_uncompressed_frame(int64_t pts, const uint8_t *data) +void H264EncoderImpl::add_packet_for_uncompressed_frame(int64_t pts, int64_t duration, const uint8_t *data) { AVPacket pkt; memset(&pkt, 0, sizeof(pkt)); @@ -2355,6 +2362,7 @@ void H264EncoderImpl::add_packet_for_uncompressed_frame(int64_t pts, const uint8 pkt.size = frame_width * frame_height * 2; pkt.stream_index = 0; pkt.flags = AV_PKT_FLAG_KEY; + pkt.duration = duration; stream_mux->add_packet(pkt, pts, pts); } @@ -2376,7 +2384,7 @@ void memcpy_with_pitch(uint8_t *dst, const uint8_t *src, size_t src_width, size_ } // namespace void H264EncoderImpl::encode_frame(H264EncoderImpl::PendingFrame frame, int encoding_frame_num, int display_frame_num, int gop_start_display_frame_num, - int frame_type, int64_t pts, int64_t dts) + int frame_type, int64_t pts, int64_t dts, int64_t duration) { // Wait for the GPU to be done with the frame. GLenum sync_status; @@ -2414,13 +2422,13 @@ void H264EncoderImpl::encode_frame(H264EncoderImpl::PendingFrame frame, int enco global_flags.x264_video_to_http) { // Add uncompressed video. (Note that pts == dts here.) // Delay needs to match audio. - pair output_frame = reorderer->reorder_frame(pts + global_delay(), reinterpret_cast(surf->y_ptr)); - if (output_frame.second != nullptr) { + FrameReorderer::Frame output_frame = reorderer->reorder_frame(pts + global_delay(), duration, reinterpret_cast(surf->y_ptr)); + if (output_frame.data != nullptr) { if (global_flags.uncompressed_video_to_http) { - add_packet_for_uncompressed_frame(output_frame.first, output_frame.second); + add_packet_for_uncompressed_frame(output_frame.pts, output_frame.duration, output_frame.data); } else { assert(global_flags.x264_video_to_http); - x264_encoder->add_frame(output_frame.first, output_frame.second); + x264_encoder->add_frame(output_frame.pts, output_frame.duration, output_frame.data); } } } @@ -2457,6 +2465,7 @@ void H264EncoderImpl::encode_frame(H264EncoderImpl::PendingFrame frame, int enco tmp.frame_type = frame_type; tmp.pts = pts; tmp.dts = dts; + tmp.duration = duration; storage_task_enqueue(move(tmp)); update_ReferenceFrames(frame_type); @@ -2479,9 +2488,9 @@ bool H264Encoder::begin_frame(GLuint *y_tex, GLuint *cbcr_tex) return impl->begin_frame(y_tex, cbcr_tex); } -RefCountedGLsync H264Encoder::end_frame(int64_t pts, const vector &input_frames) +RefCountedGLsync H264Encoder::end_frame(int64_t pts, int64_t duration, const vector &input_frames) { - return impl->end_frame(pts, input_frames); + return impl->end_frame(pts, duration, input_frames); } void H264Encoder::shutdown() diff --git a/h264encode.h b/h264encode.h index 527074e..aeeabb8 100644 --- a/h264encode.h +++ b/h264encode.h @@ -50,7 +50,7 @@ public: void add_audio(int64_t pts, std::vector audio); bool begin_frame(GLuint *y_tex, GLuint *cbcr_tex); - RefCountedGLsync end_frame(int64_t pts, const std::vector &input_frames); + RefCountedGLsync end_frame(int64_t pts, int64_t duration, const std::vector &input_frames); void shutdown(); // Blocking. // You can only have one going at the same time. diff --git a/mixer.cpp b/mixer.cpp index 3d4cccb..3134dd1 100644 --- a/mixer.cpp +++ b/mixer.cpp @@ -676,9 +676,10 @@ void Mixer::thread_func() } } - render_one_frame(); + int64_t duration = new_frames[master_card_index].length; + render_one_frame(duration); ++frame; - pts_int += new_frames[master_card_index].length; + pts_int += duration; clock_gettime(CLOCK_MONOTONIC, &now); double elapsed = now.tv_sec - start.tv_sec + @@ -774,7 +775,7 @@ void Mixer::schedule_audio_resampling_tasks(unsigned dropped_frames, int num_sam } } -void Mixer::render_one_frame() +void Mixer::render_one_frame(int64_t duration) { // Get the main chain from the theme, and set its state immediately. Theme::Chain theme_main_chain = theme->get_chain(0, pts(), WIDTH, HEIGHT, input_state); @@ -805,7 +806,7 @@ void Mixer::render_one_frame() glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); const int64_t av_delay = TIMEBASE / 10; // Corresponds to the fixed delay in resampling_queue.h. TODO: Make less hard-coded. - RefCountedGLsync fence = h264_encoder->end_frame(pts_int + av_delay, theme_main_chain.input_frames); + RefCountedGLsync fence = h264_encoder->end_frame(pts_int + av_delay, duration, theme_main_chain.input_frames); // The live frame just shows the RGBA texture we just rendered. // It owns rgba_tex now. diff --git a/mixer.h b/mixer.h index 6f04c67..c7d7e18 100644 --- a/mixer.h +++ b/mixer.h @@ -356,7 +356,7 @@ private: void place_rectangle(movit::Effect *resample_effect, movit::Effect *padding_effect, float x0, float y0, float x1, float y1); void thread_func(); void schedule_audio_resampling_tasks(unsigned dropped_frames, int num_samples_per_frame, int length_per_frame); - void render_one_frame(); + void render_one_frame(int64_t duration); void send_audio_level_callback(); void audio_thread_func(); void process_audio_one_frame(int64_t frame_pts_int, int num_samples); diff --git a/x264encode.cpp b/x264encode.cpp index 7812793..25b4423 100644 --- a/x264encode.cpp +++ b/x264encode.cpp @@ -30,10 +30,11 @@ X264Encoder::~X264Encoder() encoder_thread.join(); } -void X264Encoder::add_frame(int64_t pts, const uint8_t *data) +void X264Encoder::add_frame(int64_t pts, int64_t duration, const uint8_t *data) { QueuedFrame qf; qf.pts = pts; + qf.duration = duration; { lock_guard lock(mu); @@ -113,6 +114,7 @@ void X264Encoder::encoder_thread_func() queued_frames.pop(); } else { qf.pts = -1; + qf.duration = -1; qf.data = nullptr; } @@ -149,6 +151,7 @@ void X264Encoder::encode_frame(X264Encoder::QueuedFrame qf) pic.img.i_stride[0] = WIDTH; pic.img.plane[1] = qf.data + WIDTH * HEIGHT; pic.img.i_stride[1] = WIDTH / 2 * sizeof(uint16_t); + pic.opaque = reinterpret_cast(intptr_t(qf.duration)); x264_encoder_encode(x264, &nal, &num_nal, &pic, &pic); } else { @@ -181,6 +184,7 @@ void X264Encoder::encode_frame(X264Encoder::QueuedFrame qf) } else { pkt.flags = 0; } + pkt.duration = reinterpret_cast(pic.opaque); mux->add_packet(pkt, pic.i_pts, pic.i_dts); -} +} diff --git a/x264encode.h b/x264encode.h index 6f0d6a5..e146cd2 100644 --- a/x264encode.h +++ b/x264encode.h @@ -41,11 +41,11 @@ public: // is taken to be raw NV12 data of WIDTHxHEIGHT resolution. // Does not block. - void add_frame(int64_t pts, const uint8_t *data); + void add_frame(int64_t pts, int64_t duration, const uint8_t *data); private: struct QueuedFrame { - int64_t pts; + int64_t pts, duration; uint8_t *data; }; void encoder_thread_func();