From 02083dca89292d6ba95642cf8957f1cee8826313 Mon Sep 17 00:00:00 2001 From: "Steinar H. Gunderson" Date: Wed, 14 Oct 2015 01:26:44 +0200 Subject: [PATCH] Rework entire pts handling. Now handles dropped video frames (with audio) correctly (by just increasing pts and letting everything else stay the same), and also inserts silence whenever needed. There are some new issues with dts going above pts when this happens, but overall, this is infinitely better than what we had. --- bmusb | 2 +- h264encode.cpp | 32 ++++---- h264encode.h | 5 +- mixer.cpp | 179 ++++++++++++++++++++++++++++++++---------- mixer.h | 12 ++- ref_counted_frame.cpp | 6 +- resampler.cpp | 11 ++- resampler.h | 4 +- 8 files changed, 182 insertions(+), 69 deletions(-) diff --git a/bmusb b/bmusb index 6fd0f2d..e6f8f80 160000 --- a/bmusb +++ b/bmusb @@ -1 +1 @@ -Subproject commit 6fd0f2d883caf80001dc04e324ac4036ae396e06 +Subproject commit e6f8f805a7224fedf90ba90ef5a3ca83d21b6c6b diff --git a/h264encode.cpp b/h264encode.cpp index 7e34b5c..558bf4f 100644 --- a/h264encode.cpp +++ b/h264encode.cpp @@ -1588,10 +1588,10 @@ int H264Encoder::save_codeddata(storage_task task) } vaUnmapBuffer(va_dpy, gl_surfaces[task.display_order % SURFACE_NUM].coded_buf); - const int64_t pts_dts_delay = (ip_period - 1) * (TIMEBASE / frame_rate); - const int64_t av_delay = TIMEBASE / 30; // Corresponds to the fixed delay in resampler.h. TODO: Make less hard-coded. + const int64_t pts_dts_delay = (ip_period - 1) * (TIMEBASE / frame_rate); // FIXME: Wrong for variable frame rate. + const int64_t av_delay = TIMEBASE / 10; // Corresponds to the fixed delay in resampler.h. TODO: Make less hard-coded. + int64_t pts, dts; { - int64_t pts, dts; { unique_lock lock(frame_queue_mutex); assert(timestamps.count(task.display_order)); @@ -1619,21 +1619,16 @@ int H264Encoder::save_codeddata(storage_task task) // Encode and add all audio frames up to and including the pts of this video frame. // (They can never be queued to us after the video frame they belong to, only before.) for ( ;; ) { - int display_order; - int64_t pts; + int64_t audio_pts; std::vector audio; { unique_lock lock(frame_queue_mutex); if (pending_audio_frames.empty()) break; auto it = pending_audio_frames.begin(); - if (it->first > int(task.display_order)) break; - display_order = it->first; + if (it->first > int(pts)) break; + audio_pts = it->first; audio = move(it->second); pending_audio_frames.erase(it); - - auto pts_it = timestamps.find(display_order); - assert(pts_it != timestamps.end()); - pts = pts_it->second; } AVFrame *frame = avcodec_alloc_frame(); frame->nb_samples = audio.size() / 2; @@ -1654,7 +1649,7 @@ int H264Encoder::save_codeddata(storage_task task) int got_output; avcodec_encode_audio2(avstream_audio->codec, &pkt, frame, &got_output); if (got_output) { - pkt.pts = av_rescale_q(pts + pts_dts_delay, AVRational{1, TIMEBASE}, avstream_audio->time_base); + pkt.pts = av_rescale_q(audio_pts + pts_dts_delay, AVRational{1, TIMEBASE}, avstream_audio->time_base); pkt.dts = pkt.pts; pkt.stream_index = 1; av_interleaved_write_frame(avctx, &pkt); @@ -1946,12 +1941,21 @@ bool H264Encoder::begin_frame(GLuint *y_tex, GLuint *cbcr_tex) return true; } -void H264Encoder::end_frame(RefCountedGLsync fence, int64_t pts, std::vector audio, const std::vector &input_frames) +void H264Encoder::add_audio(int64_t pts, std::vector audio) +{ + { + unique_lock lock(frame_queue_mutex); + pending_audio_frames[pts] = move(audio); + } + frame_queue_nonempty.notify_one(); +} + + +void H264Encoder::end_frame(RefCountedGLsync fence, int64_t pts, const std::vector &input_frames) { { unique_lock lock(frame_queue_mutex); pending_video_frames[current_storage_frame] = PendingFrame{ fence, input_frames }; - pending_audio_frames[current_storage_frame] = move(audio); timestamps[current_storage_frame] = pts; ++current_storage_frame; } diff --git a/h264encode.h b/h264encode.h index 1eb09b4..ec49fb7 100644 --- a/h264encode.h +++ b/h264encode.h @@ -66,8 +66,9 @@ public: }; void #endif + void add_audio(int64_t pts, std::vector audio); // Needs to come before end_frame() of same pts. bool begin_frame(GLuint *y_tex, GLuint *cbcr_tex); - void end_frame(RefCountedGLsync fence, int64_t pts, std::vector audio, const std::vector &input_frames); + void end_frame(RefCountedGLsync fence, int64_t pts, const std::vector &input_frames); private: struct storage_task { @@ -103,7 +104,7 @@ private: std::vector input_frames; }; std::map pending_video_frames; // under frame_queue_mutex - std::map> pending_audio_frames; // under frame_queue_mutex + std::map> pending_audio_frames; // under frame_queue_mutex std::map timestamps; // under frame_queue_mutex QSurface *surface; diff --git a/mixer.cpp b/mixer.cpp index 9470e41..4a758c0 100644 --- a/mixer.cpp +++ b/mixer.cpp @@ -119,7 +119,7 @@ Mixer::Mixer(const QSurfaceFormat &format) [this]{ resource_pool->clean_context(); }); - card->resampler = new Resampler(48000.0, 48000.0, 2); + card->resampler.reset(new Resampler(48000.0, 48000.0, 2)); card->usb->configure_card(); } @@ -160,31 +160,109 @@ Mixer::~Mixer() } } +namespace { + +int unwrap_timecode(uint16_t current_wrapped, int last) +{ + uint16_t last_wrapped = last & 0xffff; + if (current_wrapped > last_wrapped) { + return (last & ~0xffff) | current_wrapped; + } else { + return 0x10000 + ((last & ~0xffff) | current_wrapped); + } +} + +} // namespace + void Mixer::bm_frame(int card_index, uint16_t timecode, FrameAllocator::Frame video_frame, size_t video_offset, uint16_t video_format, FrameAllocator::Frame audio_frame, size_t audio_offset, uint16_t audio_format) { CaptureCard *card = &cards[card_index]; - if (video_frame.len - video_offset != 1280 * 750 * 2) { - printf("dropping frame with wrong length (%ld)\n", video_frame.len - video_offset); - card->usb->get_video_frame_allocator()->release_frame(video_frame); - card->usb->get_audio_frame_allocator()->release_frame(audio_frame); - return; - } if (audio_frame.len - audio_offset > 30000) { - printf("dropping frame with implausible audio length (%ld)\n", audio_frame.len - audio_offset); - card->usb->get_video_frame_allocator()->release_frame(video_frame); - card->usb->get_audio_frame_allocator()->release_frame(audio_frame); + printf("Card %d: Dropping frame with implausible audio length (len=%d, offset=%d) [timecode=0x%04x video_len=%d video_offset=%d video_format=%x)\n", + card_index, int(audio_frame.len), int(audio_offset), + timecode, int(video_frame.len), int(video_offset), video_format); + if (video_frame.owner) { + video_frame.owner->release_frame(video_frame); + } + if (audio_frame.owner) { + audio_frame.owner->release_frame(audio_frame); + } return; } + // Convert the audio to stereo fp32 and add it. + size_t num_samples = (audio_frame.len - audio_offset) / 8 / 3; + vector audio; + audio.resize(num_samples * 2); + convert_fixed24_to_fp32(&audio[0], 2, audio_frame.data + audio_offset, 8, num_samples); + + int unwrapped_timecode = timecode; + int dropped_frames = 0; + if (card->last_timecode != -1) { + unwrapped_timecode = unwrap_timecode(unwrapped_timecode, card->last_timecode); + dropped_frames = unwrapped_timecode - card->last_timecode - 1; + } + card->last_timecode = unwrapped_timecode; + + // Add the audio. + { + unique_lock lock(card->audio_mutex); + + int unwrapped_timecode = timecode; + if (dropped_frames > 60 * 2) { + fprintf(stderr, "Card %d lost more than two seconds (or time code jumping around), resetting resampler\n", + card_index); + card->resampler.reset(new Resampler(48000.0, 48000.0, 2)); + } else if (dropped_frames > 0) { + // Insert silence as needed. + fprintf(stderr, "Card %d dropped %d frame(s) (before timecode 0x%04x), inserting silence.\n", + card_index, dropped_frames, timecode); + vector silence; + silence.resize((48000 / 60) * 2); + for (int i = 0; i < dropped_frames; ++i) { + card->resampler->add_input_samples((unwrapped_timecode - dropped_frames + i) / 60.0, silence.data(), (48000 / 60)); + } + } + card->resampler->add_input_samples(unwrapped_timecode / 60.0, audio.data(), num_samples); + } + + // Done with the audio, so release it. + if (audio_frame.owner) { + audio_frame.owner->release_frame(audio_frame); + } + { // Wait until the previous frame was consumed. unique_lock lock(bmusb_mutex); card->new_data_ready_changed.wait(lock, [card]{ return !card->new_data_ready || card->should_quit; }); if (card->should_quit) return; } + + if (video_frame.len - video_offset != 1280 * 750 * 2) { + if (video_frame.len != 0) { + printf("Card %d: Dropping video frame with wrong length (%ld)\n", + card_index, video_frame.len - video_offset); + } + if (video_frame.owner) { + video_frame.owner->release_frame(video_frame); + } + + // Still send on the information that we _had_ a frame, even though it's corrupted, + // so that pts can go up accordingly. + { + unique_lock lock(bmusb_mutex); + card->new_data_ready = true; + card->new_frame = RefCountedFrame(FrameAllocator::Frame()); + card->new_data_ready_fence = nullptr; + card->dropped_frames = dropped_frames; + card->new_data_ready_changed.notify_all(); + } + return; + } + const PBOFrameAllocator::Userdata *userdata = (const PBOFrameAllocator::Userdata *)video_frame.userdata; GLuint pbo = userdata->pbo; check_error(); @@ -210,23 +288,14 @@ void Mixer::bm_frame(int card_index, uint16_t timecode, check_error(); assert(fence != nullptr); - // Convert the audio to stereo fp32 and store it next to the video. - size_t num_samples = (audio_frame.len - audio_offset) / 8 / 3; - vector audio; - audio.resize(num_samples * 2); - convert_fixed24_to_fp32(&audio[0], 2, audio_frame.data + audio_offset, 8, num_samples); - { unique_lock lock(bmusb_mutex); card->new_data_ready = true; card->new_frame = RefCountedFrame(video_frame); card->new_data_ready_fence = fence; - card->new_frame_audio = move(audio); + card->dropped_frames = dropped_frames; card->new_data_ready_changed.notify_all(); } - - // Video frame will be released when last user of card->new_frame goes out of scope. - card->usb->get_audio_frame_allocator()->release_frame(audio_frame); } void Mixer::thread_func() @@ -241,9 +310,10 @@ void Mixer::thread_func() struct timespec start, now; clock_gettime(CLOCK_MONOTONIC, &start); - while (!should_quit) { - ++frame; + int frame = 0; + int dropped_frames = 0; + while (!should_quit) { CaptureCard card_copy[NUM_CARDS]; { @@ -260,14 +330,46 @@ void Mixer::thread_func() card_copy[card_index].new_frame = card->new_frame; card_copy[card_index].new_data_ready_fence = card->new_data_ready_fence; card_copy[card_index].new_frame_audio = move(card->new_frame_audio); + card_copy[card_index].dropped_frames = card->dropped_frames; card->new_data_ready = false; card->new_data_ready_changed.notify_all(); } } + // Resample the audio as needed, including from previously dropped frames. + vector samples_out; + // TODO: Allow using audio from the other card(s) as well. + for (unsigned frame_num = 0; frame_num < card_copy[0].dropped_frames + 1; ++frame_num) { + for (unsigned card_index = 0; card_index < NUM_CARDS; ++card_index) { + samples_out.resize((48000 / 60) * 2); + { + unique_lock lock(cards[card_index].audio_mutex); + if (!cards[card_index].resampler->get_output_samples(pts(), &samples_out[0], 48000 / 60)) { + printf("Card %d reported previous underrun.\n", card_index); + } + } + if (card_index == 0) { + h264_encoder->add_audio(pts_int, move(samples_out)); + } + } + if (frame_num != card_copy[0].dropped_frames) { + // For dropped frames, increase the pts. + ++dropped_frames; + pts_int += TIMEBASE / 60; + } + } + + // If the first card is reporting a corrupted or otherwise dropped frame, + // just increase the pts (skipping over this frame) and don't try to compute anything new. + if (card_copy[0].new_frame->len == 0) { + ++dropped_frames; + pts_int += TIMEBASE / 60; + continue; + } + for (int card_index = 0; card_index < NUM_CARDS; ++card_index) { CaptureCard *card = &card_copy[card_index]; - if (!card->new_data_ready) + if (!card->new_data_ready || card->new_frame->len == 0) continue; assert(card->new_frame != nullptr); @@ -276,17 +378,18 @@ void Mixer::thread_func() // The new texture might still be uploaded, // tell the GPU to wait until it's there. - if (card->new_data_ready_fence) + if (card->new_data_ready_fence) { glWaitSync(card->new_data_ready_fence, /*flags=*/0, GL_TIMEOUT_IGNORED); - check_error(); - glDeleteSync(card->new_data_ready_fence); - check_error(); + check_error(); + glDeleteSync(card->new_data_ready_fence); + check_error(); + } const PBOFrameAllocator::Userdata *userdata = (const PBOFrameAllocator::Userdata *)card->new_frame->userdata; theme->set_input_textures(card_index, userdata->tex_y, userdata->tex_cbcr); } // Get the main chain from the theme, and set its state immediately. - pair> theme_main_chain = theme->get_chain(0, frame / 60.0f, WIDTH, HEIGHT); + pair> theme_main_chain = theme->get_chain(0, pts(), WIDTH, HEIGHT); EffectChain *chain = theme_main_chain.first; theme_main_chain.second(); @@ -314,14 +417,6 @@ void Mixer::thread_func() RefCountedGLsync fence(GL_SYNC_GPU_COMMANDS_COMPLETE, /*flags=*/0); check_error(); - // Resample the audio as needed. - // TODO: Allow using audio from the other card(s) as well. - double pts = frame / 60.0; - cards[0].resampler->add_input_samples(pts, card_copy[0].new_frame_audio.data(), card_copy[0].new_frame_audio.size() / 2); - vector samples_out; - samples_out.resize((48000 / 60) * 2); - cards[0].resampler->get_output_samples(pts, &samples_out[0], 48000 / 60); - // Make sure the H.264 gets a reference to all the // input frames needed, so that they are not released back // until the rendering is done. @@ -329,7 +424,9 @@ void Mixer::thread_func() for (int card_index = 0; card_index < NUM_CARDS; ++card_index) { input_frames.push_back(bmusb_current_rendering_frame[card_index]); } - h264_encoder->end_frame(fence, frame * (TIMEBASE / 60), move(samples_out), input_frames); + h264_encoder->end_frame(fence, pts_int, input_frames); + ++frame; + pts_int += TIMEBASE / 60; // The live frame just shows the RGBA texture we just rendered. // It owns rgba_tex now. @@ -346,7 +443,7 @@ void Mixer::thread_func() // Set up preview and any additional channels. for (int i = 1; i < theme->get_num_channels() + 2; ++i) { DisplayFrame display_frame; - pair> chain = theme->get_chain(i, frame / 60.0f, WIDTH, HEIGHT); // FIXME: dimensions + pair> chain = theme->get_chain(i, pts(), WIDTH, HEIGHT); // FIXME: dimensions display_frame.chain = chain.first; display_frame.setup_chain = chain.second; display_frame.ready_fence = fence; @@ -359,8 +456,8 @@ void Mixer::thread_func() double elapsed = now.tv_sec - start.tv_sec + 1e-9 * (now.tv_nsec - start.tv_nsec); if (frame % 100 == 0) { - printf("%d frames in %.3f seconds = %.1f fps (%.1f ms/frame)\n", - frame, elapsed, frame / elapsed, + printf("%d frames (%d dropped) in %.3f seconds = %.1f fps (%.1f ms/frame)\n", + frame, dropped_frames, elapsed, frame / elapsed, 1e3 * elapsed / frame); // chain->print_phase_timing(); } @@ -458,7 +555,7 @@ void Mixer::quit() void Mixer::transition_clicked(int transition_num) { - theme->transition_clicked(transition_num, frame / 60.0); + theme->transition_clicked(transition_num, pts()); } void Mixer::channel_clicked(int preview_num) diff --git a/mixer.h b/mixer.h index 6098222..45311aa 100644 --- a/mixer.h +++ b/mixer.h @@ -16,6 +16,7 @@ #include "ref_counted_gl_sync.h" #include "theme.h" #include "resampler.h" +#include "timebase.h" #define NUM_CARDS 2 @@ -80,7 +81,7 @@ public: std::vector get_transition_names() { - return theme->get_transition_names(frame / 60.0); + return theme->get_transition_names(pts()); } private: @@ -91,6 +92,7 @@ private: void thread_func(); void subsample_chroma(GLuint src_tex, GLuint dst_dst); void release_display_frame(DisplayFrame *frame); + double pts() { return double(pts_int) / TIMEBASE; } QSurface *mixer_surface, *h264_encoder_surface; std::unique_ptr resource_pool; @@ -102,7 +104,7 @@ private: // Effects part of . Owned by . movit::FlatInput *display_input; - int frame = 0; + int64_t pts_int = 0; // In TIMEBASE units. std::mutex bmusb_mutex; struct CaptureCard { @@ -119,7 +121,11 @@ private: GLsync new_data_ready_fence; // Whether new_frame is ready for rendering. std::vector new_frame_audio; std::condition_variable new_data_ready_changed; // Set whenever new_data_ready is changed. - Resampler *resampler = nullptr; + unsigned dropped_frames = 0; // Before new_frame. + + std::mutex audio_mutex; + std::unique_ptr resampler; // Under audio_mutex. + int last_timecode = -1; // Unwrapped. }; CaptureCard cards[NUM_CARDS]; // protected by diff --git a/ref_counted_frame.cpp b/ref_counted_frame.cpp index 725291e..0b754ed 100644 --- a/ref_counted_frame.cpp +++ b/ref_counted_frame.cpp @@ -2,6 +2,8 @@ void release_refcounted_frame(FrameAllocator::Frame *frame) { - frame->owner->release_frame(*frame); - delete frame; + if (frame->owner) { + frame->owner->release_frame(*frame); + delete frame; + } } diff --git a/resampler.cpp b/resampler.cpp index ee0ab87..ddcc229 100644 --- a/resampler.cpp +++ b/resampler.cpp @@ -55,7 +55,7 @@ void Resampler::add_input_samples(double pts, const float *samples, ssize_t num_ } } -void Resampler::get_output_samples(double pts, float *samples, ssize_t num_samples) +bool Resampler::get_output_samples(double pts, float *samples, ssize_t num_samples) { double last_output_len; if (first_output) { @@ -88,8 +88,10 @@ void Resampler::get_output_samples(double pts, float *samples, ssize_t num_sampl // Compute loop filter coefficients for the two filters. We need to compute them // every time, since they depend on the number of samples the user asked for. // - // The loop bandwidth starts at 1.0 Hz, then goes down to 0.05 Hz after four seconds. - double loop_bandwidth_hz = (k_a0 < 4 * freq_in) ? 1.0 : 0.05; + // The loop bandwidth is at 0.02 Hz; we trust the initial estimate quite well, + // and our jitter is pretty large since none of the threads involved run at + // real-time priority. + double loop_bandwidth_hz = 0.02; // Set filters. The first filter much wider than the first one (20x as wide). double w = (2.0 * M_PI) * loop_bandwidth_hz * num_samples / freq_out; @@ -116,7 +118,7 @@ void Resampler::get_output_samples(double pts, float *samples, ssize_t num_sampl fprintf(stderr, "PANIC: Out of input samples to resample, still need %d output samples!\n", int(vresampler.out_count)); memset(vresampler.out_data, 0, vresampler.out_count * sizeof(float)); - break; + return false; } float inbuf[1024]; @@ -137,4 +139,5 @@ void Resampler::get_output_samples(double pts, float *samples, ssize_t num_sampl total_consumed_samples += consumed_samples; buffer.erase(buffer.begin(), buffer.begin() + consumed_samples * num_channels); } + return true; } diff --git a/resampler.h b/resampler.h index aa7af3d..85fea83 100644 --- a/resampler.h +++ b/resampler.h @@ -50,7 +50,7 @@ public: // Note: pts is always in seconds. void add_input_samples(double pts, const float *samples, ssize_t num_samples); - void get_output_samples(double pts, float *samples, ssize_t num_samples); + bool get_output_samples(double pts, float *samples, ssize_t num_samples); // Returns false if underrun. private: void init_loop_filter(double bandwidth_hz); @@ -80,7 +80,7 @@ private: // How much delay we are expected to have, in input samples. // If actual delay drifts too much away from this, we will start // changing the resampling ratio to compensate. - double expected_delay = 1600.0; + double expected_delay = 4800.0; // Input samples not yet fed into the resampler. // TODO: Use a circular buffer instead, for efficiency. -- 2.39.2