- string jpeg = readers[first_frame_stream_idx].read_frame(first_frame);
- int64_t scaled_pts = av_rescale_q(first_frame.pts, AVRational{1, TIMEBASE},
- video_streams[first_frame_stream_idx]->time_base);
- buffered_jpegs.emplace_back(BufferedJPEG{ scaled_pts, first_frame_stream_idx, std::move(jpeg) });
- if (buffered_jpegs.size() >= 1000) {
- if (!write_buffered_jpegs(avctx, buffered_jpegs)) {
+
+ FrameReader::Frame frame = readers[first_frame_stream_idx].read_frame(first_frame, /*read_video=*/true, /*read_audio=*/true);
+
+ // Write audio. (Before video, since that's what we expect on input.)
+ if (!frame.audio.empty()) {
+ unsigned audio_stream_idx = first_frame_stream_idx + video_streams.size();
+ int64_t scaled_audio_pts = av_rescale_q(first_frame.pts, AVRational{ 1, TIMEBASE },
+ audio_streams[first_frame_stream_idx]->time_base);
+ buffered_frames.emplace_back(BufferedFrame{ scaled_audio_pts, audio_stream_idx, std::move(frame.audio) });
+ }
+
+ // Write video.
+ unsigned video_stream_idx = first_frame_stream_idx;
+ int64_t scaled_video_pts = av_rescale_q(first_frame.pts, AVRational{ 1, TIMEBASE },
+ video_streams[first_frame_stream_idx]->time_base);
+ buffered_frames.emplace_back(BufferedFrame{ scaled_video_pts, video_stream_idx, std::move(frame.video) });
+
+ // Flush to disk if required.
+ if (buffered_frames.size() >= 1000) {
+ if (!write_buffered_frames(avctx, buffered_frames)) {