Generally the muxer will ignore these, but there's one specific case
where it cannot: Since we flush before keyframes, the mov (MP4) mux
will not be able to set a proper duration based on the next frame
(usually it just does next_dts - dts), and thus guesses that it will be
the same as the previous one. If we dropped a frame between those two,
the duration of said last frame will be wrong -- and the keyframe
(starting in the next fragment) will get the wrong pts, possibly
seemingly even going backwards.
If we lose a frame between the last frame and the keyframe, the pts
of the keyframe will still be a bit wonky, but according to wbs
(who wrote the mux), it's much better like so.
public:
FrameReorderer(unsigned queue_length, int width, int height);
public:
FrameReorderer(unsigned queue_length, int width, int height);
+ struct Frame {
+ int64_t pts, duration;
+ uint8_t *data;
+
+ // Invert to get the smallest pts first.
+ bool operator< (const Frame &other) const { return pts > other.pts; }
+ };
+
// Returns the next frame to insert with its pts, if any. Otherwise -1 and nullptr.
// Does _not_ take ownership of data; a copy is taken if needed.
// The returned pointer is valid until the next call to reorder_frame, or destruction.
// As a special case, if queue_length == 0, will just return pts and data (no reordering needed).
// Returns the next frame to insert with its pts, if any. Otherwise -1 and nullptr.
// Does _not_ take ownership of data; a copy is taken if needed.
// The returned pointer is valid until the next call to reorder_frame, or destruction.
// As a special case, if queue_length == 0, will just return pts and data (no reordering needed).
- pair<int64_t, const uint8_t *> reorder_frame(int64_t pts, const uint8_t *data);
+ Frame reorder_frame(int64_t pts, int64_t duration, uint8_t *data);
// The same as reorder_frame, but without inserting anything. Used to empty the queue.
// The same as reorder_frame, but without inserting anything. Used to empty the queue.
- pair<int64_t, const uint8_t *> get_first_frame();
+ Frame get_first_frame();
bool empty() const { return frames.empty(); }
bool empty() const { return frames.empty(); }
unsigned queue_length;
int width, height;
unsigned queue_length;
int width, height;
- priority_queue<pair<int64_t, uint8_t *>> frames;
+ priority_queue<Frame> frames;
stack<uint8_t *> freelist; // Includes the last value returned from reorder_frame.
// Owns all the pointers. Normally, freelist and frames could do this themselves,
stack<uint8_t *> freelist; // Includes the last value returned from reorder_frame.
// Owns all the pointers. Normally, freelist and frames could do this themselves,
-pair<int64_t, const uint8_t *> FrameReorderer::reorder_frame(int64_t pts, const uint8_t *data)
+FrameReorderer::Frame FrameReorderer::reorder_frame(int64_t pts, int64_t duration, uint8_t *data)
{
if (queue_length == 0) {
{
if (queue_length == 0) {
- return make_pair(pts, data);
+ return Frame{pts, duration, data};
}
assert(!freelist.empty());
uint8_t *storage = freelist.top();
freelist.pop();
memcpy(storage, data, width * height * 2);
}
assert(!freelist.empty());
uint8_t *storage = freelist.top();
freelist.pop();
memcpy(storage, data, width * height * 2);
- frames.emplace(-pts, storage); // Invert pts to get smallest first.
+ frames.push(Frame{pts, duration, storage});
if (frames.size() >= queue_length) {
return get_first_frame();
} else {
if (frames.size() >= queue_length) {
return get_first_frame();
} else {
- return make_pair(-1, nullptr);
+ return Frame{-1, -1, nullptr};
-pair<int64_t, const uint8_t *> FrameReorderer::get_first_frame()
+FrameReorderer::Frame FrameReorderer::get_first_frame()
{
assert(!frames.empty());
{
assert(!frames.empty());
- pair<int64_t, uint8_t *> storage = frames.top();
+ Frame storage = frames.top();
- int64_t pts = storage.first;
- freelist.push(storage.second);
- return make_pair(-pts, storage.second); // Re-invert pts (see reorder_frame()).
+ freelist.push(storage.data);
+ return storage;
}
class H264EncoderImpl : public KeyFrameSignalReceiver {
}
class H264EncoderImpl : public KeyFrameSignalReceiver {
~H264EncoderImpl();
void add_audio(int64_t pts, vector<float> audio);
bool begin_frame(GLuint *y_tex, GLuint *cbcr_tex);
~H264EncoderImpl();
void add_audio(int64_t pts, vector<float> audio);
bool begin_frame(GLuint *y_tex, GLuint *cbcr_tex);
- RefCountedGLsync end_frame(int64_t pts, const vector<RefCountedFrame> &input_frames);
+ RefCountedGLsync end_frame(int64_t pts, int64_t duration, const vector<RefCountedFrame> &input_frames);
void shutdown();
void open_output_file(const std::string &filename);
void close_output_file();
void shutdown();
void open_output_file(const std::string &filename);
void close_output_file();
unsigned long long display_order;
int frame_type;
vector<float> audio;
unsigned long long display_order;
int frame_type;
vector<float> audio;
+ int64_t pts, dts, duration;
};
struct PendingFrame {
RefCountedGLsync fence;
vector<RefCountedFrame> input_frames;
};
struct PendingFrame {
RefCountedGLsync fence;
vector<RefCountedFrame> input_frames;
};
// So we never get negative dts.
};
// So we never get negative dts.
void encode_thread_func();
void encode_remaining_frames_as_p(int encoding_frame_num, int gop_start_display_frame_num, int64_t last_dts);
void encode_thread_func();
void encode_remaining_frames_as_p(int encoding_frame_num, int gop_start_display_frame_num, int64_t last_dts);
- void add_packet_for_uncompressed_frame(int64_t pts, const uint8_t *data);
+ void add_packet_for_uncompressed_frame(int64_t pts, int64_t duration, const uint8_t *data);
void encode_frame(PendingFrame frame, int encoding_frame_num, int display_frame_num, int gop_start_display_frame_num,
void encode_frame(PendingFrame frame, int encoding_frame_num, int display_frame_num, int gop_start_display_frame_num,
- int frame_type, int64_t pts, int64_t dts);
+ int frame_type, int64_t pts, int64_t dts, int64_t duration);
void storage_task_thread();
void encode_audio(const vector<float> &audio,
vector<float> *audio_queue,
void storage_task_thread();
void encode_audio(const vector<float> &audio,
vector<float> *audio_queue,
} else {
pkt.flags = 0;
}
} else {
pkt.flags = 0;
}
+ pkt.duration = task.duration;
if (file_mux) {
file_mux->add_packet(pkt, task.pts + global_delay(), task.dts + global_delay());
}
if (file_mux) {
file_mux->add_packet(pkt, task.pts + global_delay(), task.dts + global_delay());
}
frame_queue_nonempty.notify_all();
}
frame_queue_nonempty.notify_all();
}
-RefCountedGLsync H264EncoderImpl::end_frame(int64_t pts, const vector<RefCountedFrame> &input_frames)
+RefCountedGLsync H264EncoderImpl::end_frame(int64_t pts, int64_t duration, const vector<RefCountedFrame> &input_frames)
{
unique_lock<mutex> lock(frame_queue_mutex);
{
unique_lock<mutex> lock(frame_queue_mutex);
- pending_video_frames[current_storage_frame] = PendingFrame{ fence, input_frames, pts };
+ pending_video_frames[current_storage_frame] = PendingFrame{ fence, input_frames, pts, duration };
++current_storage_frame;
}
frame_queue_nonempty.notify_all();
++current_storage_frame;
}
frame_queue_nonempty.notify_all();
- encode_frame(frame, encoding_frame_num, display_frame_num, gop_start_display_frame_num, frame_type, frame.pts, dts);
+ encode_frame(frame, encoding_frame_num, display_frame_num, gop_start_display_frame_num, frame_type, frame.pts, dts, frame.duration);
PendingFrame frame = move(pending_frame.second);
int64_t dts = last_dts + (TIMEBASE / MAX_FPS);
printf("Finalizing encode: Encoding leftover frame %d as P-frame instead of B-frame.\n", display_frame_num);
PendingFrame frame = move(pending_frame.second);
int64_t dts = last_dts + (TIMEBASE / MAX_FPS);
printf("Finalizing encode: Encoding leftover frame %d as P-frame instead of B-frame.\n", display_frame_num);
- encode_frame(frame, encoding_frame_num++, display_frame_num, gop_start_display_frame_num, FRAME_P, frame.pts, dts);
+ encode_frame(frame, encoding_frame_num++, display_frame_num, gop_start_display_frame_num, FRAME_P, frame.pts, dts, frame.duration);
global_flags.x264_video_to_http) {
// Add frames left in reorderer.
while (!reorderer->empty()) {
global_flags.x264_video_to_http) {
// Add frames left in reorderer.
while (!reorderer->empty()) {
- pair<int64_t, const uint8_t *> output_frame = reorderer->get_first_frame();
+ FrameReorderer::Frame output_frame = reorderer->get_first_frame();
if (global_flags.uncompressed_video_to_http) {
if (global_flags.uncompressed_video_to_http) {
- add_packet_for_uncompressed_frame(output_frame.first, output_frame.second);
+ add_packet_for_uncompressed_frame(output_frame.pts, output_frame.duration, output_frame.data);
} else {
assert(global_flags.x264_video_to_http);
} else {
assert(global_flags.x264_video_to_http);
- x264_encoder->add_frame(output_frame.first, output_frame.second);
+ x264_encoder->add_frame(output_frame.pts, output_frame.duration, output_frame.data);
-void H264EncoderImpl::add_packet_for_uncompressed_frame(int64_t pts, const uint8_t *data)
+void H264EncoderImpl::add_packet_for_uncompressed_frame(int64_t pts, int64_t duration, const uint8_t *data)
{
AVPacket pkt;
memset(&pkt, 0, sizeof(pkt));
{
AVPacket pkt;
memset(&pkt, 0, sizeof(pkt));
pkt.size = frame_width * frame_height * 2;
pkt.stream_index = 0;
pkt.flags = AV_PKT_FLAG_KEY;
pkt.size = frame_width * frame_height * 2;
pkt.stream_index = 0;
pkt.flags = AV_PKT_FLAG_KEY;
+ pkt.duration = duration;
stream_mux->add_packet(pkt, pts, pts);
}
stream_mux->add_packet(pkt, pts, pts);
}
} // namespace
void H264EncoderImpl::encode_frame(H264EncoderImpl::PendingFrame frame, int encoding_frame_num, int display_frame_num, int gop_start_display_frame_num,
} // namespace
void H264EncoderImpl::encode_frame(H264EncoderImpl::PendingFrame frame, int encoding_frame_num, int display_frame_num, int gop_start_display_frame_num,
- int frame_type, int64_t pts, int64_t dts)
+ int frame_type, int64_t pts, int64_t dts, int64_t duration)
{
// Wait for the GPU to be done with the frame.
GLenum sync_status;
{
// Wait for the GPU to be done with the frame.
GLenum sync_status;
global_flags.x264_video_to_http) {
// Add uncompressed video. (Note that pts == dts here.)
// Delay needs to match audio.
global_flags.x264_video_to_http) {
// Add uncompressed video. (Note that pts == dts here.)
// Delay needs to match audio.
- pair<int64_t, const uint8_t *> output_frame = reorderer->reorder_frame(pts + global_delay(), reinterpret_cast<uint8_t *>(surf->y_ptr));
- if (output_frame.second != nullptr) {
+ FrameReorderer::Frame output_frame = reorderer->reorder_frame(pts + global_delay(), duration, reinterpret_cast<uint8_t *>(surf->y_ptr));
+ if (output_frame.data != nullptr) {
if (global_flags.uncompressed_video_to_http) {
if (global_flags.uncompressed_video_to_http) {
- add_packet_for_uncompressed_frame(output_frame.first, output_frame.second);
+ add_packet_for_uncompressed_frame(output_frame.pts, output_frame.duration, output_frame.data);
} else {
assert(global_flags.x264_video_to_http);
} else {
assert(global_flags.x264_video_to_http);
- x264_encoder->add_frame(output_frame.first, output_frame.second);
+ x264_encoder->add_frame(output_frame.pts, output_frame.duration, output_frame.data);
tmp.frame_type = frame_type;
tmp.pts = pts;
tmp.dts = dts;
tmp.frame_type = frame_type;
tmp.pts = pts;
tmp.dts = dts;
+ tmp.duration = duration;
storage_task_enqueue(move(tmp));
update_ReferenceFrames(frame_type);
storage_task_enqueue(move(tmp));
update_ReferenceFrames(frame_type);
return impl->begin_frame(y_tex, cbcr_tex);
}
return impl->begin_frame(y_tex, cbcr_tex);
}
-RefCountedGLsync H264Encoder::end_frame(int64_t pts, const vector<RefCountedFrame> &input_frames)
+RefCountedGLsync H264Encoder::end_frame(int64_t pts, int64_t duration, const vector<RefCountedFrame> &input_frames)
- return impl->end_frame(pts, input_frames);
+ return impl->end_frame(pts, duration, input_frames);
}
void H264Encoder::shutdown()
}
void H264Encoder::shutdown()
void add_audio(int64_t pts, std::vector<float> audio);
bool begin_frame(GLuint *y_tex, GLuint *cbcr_tex);
void add_audio(int64_t pts, std::vector<float> audio);
bool begin_frame(GLuint *y_tex, GLuint *cbcr_tex);
- RefCountedGLsync end_frame(int64_t pts, const std::vector<RefCountedFrame> &input_frames);
+ RefCountedGLsync end_frame(int64_t pts, int64_t duration, const std::vector<RefCountedFrame> &input_frames);
void shutdown(); // Blocking.
// You can only have one going at the same time.
void shutdown(); // Blocking.
// You can only have one going at the same time.
+ int64_t duration = new_frames[master_card_index].length;
+ render_one_frame(duration);
- pts_int += new_frames[master_card_index].length;
clock_gettime(CLOCK_MONOTONIC, &now);
double elapsed = now.tv_sec - start.tv_sec +
clock_gettime(CLOCK_MONOTONIC, &now);
double elapsed = now.tv_sec - start.tv_sec +
-void Mixer::render_one_frame()
+void Mixer::render_one_frame(int64_t duration)
{
// Get the main chain from the theme, and set its state immediately.
Theme::Chain theme_main_chain = theme->get_chain(0, pts(), WIDTH, HEIGHT, input_state);
{
// Get the main chain from the theme, and set its state immediately.
Theme::Chain theme_main_chain = theme->get_chain(0, pts(), WIDTH, HEIGHT, input_state);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
const int64_t av_delay = TIMEBASE / 10; // Corresponds to the fixed delay in resampling_queue.h. TODO: Make less hard-coded.
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
const int64_t av_delay = TIMEBASE / 10; // Corresponds to the fixed delay in resampling_queue.h. TODO: Make less hard-coded.
- RefCountedGLsync fence = h264_encoder->end_frame(pts_int + av_delay, theme_main_chain.input_frames);
+ RefCountedGLsync fence = h264_encoder->end_frame(pts_int + av_delay, duration, theme_main_chain.input_frames);
// The live frame just shows the RGBA texture we just rendered.
// It owns rgba_tex now.
// The live frame just shows the RGBA texture we just rendered.
// It owns rgba_tex now.
void place_rectangle(movit::Effect *resample_effect, movit::Effect *padding_effect, float x0, float y0, float x1, float y1);
void thread_func();
void schedule_audio_resampling_tasks(unsigned dropped_frames, int num_samples_per_frame, int length_per_frame);
void place_rectangle(movit::Effect *resample_effect, movit::Effect *padding_effect, float x0, float y0, float x1, float y1);
void thread_func();
void schedule_audio_resampling_tasks(unsigned dropped_frames, int num_samples_per_frame, int length_per_frame);
- void render_one_frame();
+ void render_one_frame(int64_t duration);
void send_audio_level_callback();
void audio_thread_func();
void process_audio_one_frame(int64_t frame_pts_int, int num_samples);
void send_audio_level_callback();
void audio_thread_func();
void process_audio_one_frame(int64_t frame_pts_int, int num_samples);
-void X264Encoder::add_frame(int64_t pts, const uint8_t *data)
+void X264Encoder::add_frame(int64_t pts, int64_t duration, const uint8_t *data)
{
QueuedFrame qf;
qf.pts = pts;
{
QueuedFrame qf;
qf.pts = pts;
+ qf.duration = duration;
{
lock_guard<mutex> lock(mu);
{
lock_guard<mutex> lock(mu);
queued_frames.pop();
} else {
qf.pts = -1;
queued_frames.pop();
} else {
qf.pts = -1;
pic.img.i_stride[0] = WIDTH;
pic.img.plane[1] = qf.data + WIDTH * HEIGHT;
pic.img.i_stride[1] = WIDTH / 2 * sizeof(uint16_t);
pic.img.i_stride[0] = WIDTH;
pic.img.plane[1] = qf.data + WIDTH * HEIGHT;
pic.img.i_stride[1] = WIDTH / 2 * sizeof(uint16_t);
+ pic.opaque = reinterpret_cast<void *>(intptr_t(qf.duration));
x264_encoder_encode(x264, &nal, &num_nal, &pic, &pic);
} else {
x264_encoder_encode(x264, &nal, &num_nal, &pic, &pic);
} else {
} else {
pkt.flags = 0;
}
} else {
pkt.flags = 0;
}
+ pkt.duration = reinterpret_cast<intptr_t>(pic.opaque);
mux->add_packet(pkt, pic.i_pts, pic.i_dts);
mux->add_packet(pkt, pic.i_pts, pic.i_dts);
// <data> is taken to be raw NV12 data of WIDTHxHEIGHT resolution.
// Does not block.
// <data> is taken to be raw NV12 data of WIDTHxHEIGHT resolution.
// Does not block.
- void add_frame(int64_t pts, const uint8_t *data);
+ void add_frame(int64_t pts, int64_t duration, const uint8_t *data);
private:
struct QueuedFrame {
private:
struct QueuedFrame {
uint8_t *data;
};
void encoder_thread_func();
uint8_t *data;
};
void encoder_thread_func();