}
int video_stream_index = find_stream_index(format_ctx.get(), AVMEDIA_TYPE_VIDEO);
- if (video_stream_index == -1) {
- fprintf(stderr, "%s: No video stream found\n", pathname.c_str());
- return false;
- }
-
int audio_stream_index = find_stream_index(format_ctx.get(), AVMEDIA_TYPE_AUDIO);
- // Open video decoder.
- const AVCodecParameters *video_codecpar = format_ctx->streams[video_stream_index]->codecpar;
- AVCodec *video_codec = avcodec_find_decoder(video_codecpar->codec_id);
- video_timebase = format_ctx->streams[video_stream_index]->time_base;
- AVCodecContextWithDeleter video_codec_ctx = avcodec_alloc_context3_unique(nullptr);
- if (avcodec_parameters_to_context(video_codec_ctx.get(), video_codecpar) < 0) {
- fprintf(stderr, "%s: Cannot fill video codec parameters\n", pathname.c_str());
+ if (video_stream_index == -1 && audio_stream_index == -1) {
+ fprintf(stderr, "%s: No audio nor video stream found\n", pathname.c_str());
return false;
}
- if (video_codec == nullptr) {
- fprintf(stderr, "%s: Cannot find video decoder\n", pathname.c_str());
- return false;
- }
- if (avcodec_open2(video_codec_ctx.get(), video_codec, nullptr) < 0) {
- fprintf(stderr, "%s: Cannot open video decoder\n", pathname.c_str());
- return false;
+ if (video_stream_index == -1) {
+ fprintf(stderr, "%s: No video stream found, assuming audio-only.\n", pathname.c_str());
+ }
+ const bool audio_only_stream = (video_stream_index == -1);
+
+ // Open video decoder, if we have video.
+ AVCodecContextWithDeleter video_codec_ctx;
+ if (video_stream_index != -1) {
+ const AVCodecParameters *video_codecpar = format_ctx->streams[video_stream_index]->codecpar;
+ AVCodec *video_codec = avcodec_find_decoder(video_codecpar->codec_id);
+ video_timebase = format_ctx->streams[video_stream_index]->time_base;
+ video_codec_ctx = avcodec_alloc_context3_unique(nullptr);
+ if (avcodec_parameters_to_context(video_codec_ctx.get(), video_codecpar) < 0) {
+ fprintf(stderr, "%s: Cannot fill video codec parameters\n", pathname.c_str());
+ return false;
+ }
+ if (video_codec == nullptr) {
+ fprintf(stderr, "%s: Cannot find video decoder\n", pathname.c_str());
+ return false;
+ }
+ if (avcodec_open2(video_codec_ctx.get(), video_codec, nullptr) < 0) {
+ fprintf(stderr, "%s: Cannot open video decoder\n", pathname.c_str());
+ return false;
+ }
}
unique_ptr<AVCodecContext, decltype(avcodec_close)*> video_codec_ctx_cleanup(
video_codec_ctx.get(), avcodec_close);
if (error) {
return false;
}
- if (frame == nullptr) {
+ if (frame == nullptr && !(audio_only_stream && audio_frame->len > 0)) {
// EOF. Loop back to the start if we can.
if (av_seek_frame(format_ctx.get(), /*stream_index=*/-1, /*timestamp=*/0, /*flags=*/0) < 0) {
fprintf(stderr, "%s: Rewind failed, not looping.\n", pathname.c_str());
continue;
}
- VideoFormat video_format = construct_video_format(frame.get(), video_timebase);
- UniqueFrame video_frame = make_video_frame(frame.get(), pathname, &error);
- if (error) {
- return false;
+ VideoFormat video_format;
+ UniqueFrame video_frame;
+ if (!audio_only_stream) {
+ video_format = construct_video_format(frame.get(), video_timebase);
+ video_frame = make_video_frame(frame.get(), pathname, &error);
+ if (error) {
+ return false;
+ }
}
- for ( ;; ) {
+ int64_t frame_pts = audio_only_stream ? audio_pts : frame->pts;
+ AVRational timebase = audio_only_stream ? audio_timebase : video_timebase;
+ for ( ;; ) { // Try sending the frame in a loop as long as we get interrupted (then break).
if (last_pts == 0 && pts_origin == 0) {
- pts_origin = frame->pts;
+ pts_origin = frame_pts;
}
- next_frame_start = compute_frame_start(frame->pts, pts_origin, video_timebase, start, rate);
- if (first_frame && last_frame_was_connected) {
- // If reconnect took more than one second, this is probably a live feed,
- // and we should reset the resampler. (Or the rate is really, really low,
- // in which case a reset on the first frame is fine anyway.)
- if (duration<double>(next_frame_start - last_frame).count() >= 1.0) {
- last_frame_was_connected = false;
+ next_frame_start = compute_frame_start(frame_pts, pts_origin, timebase, start, rate);
+ if (audio_only_stream) {
+ audio_frame->received_timestamp = next_frame_start;
+ } else {
+ if (first_frame && last_frame_was_connected) {
+ // If reconnect took more than one second, this is probably a live feed,
+ // and we should reset the resampler. (Or the rate is really, really low,
+ // in which case a reset on the first frame is fine anyway.)
+ if (duration<double>(next_frame_start - last_frame).count() >= 1.0) {
+ last_frame_was_connected = false;
+ }
+ }
+ video_frame->received_timestamp = next_frame_start;
+
+ // The easiest way to get all the rate conversions etc. right is to move the
+ // audio PTS into the video PTS timebase and go from there. (We'll get some
+ // rounding issues, but they should not be a big problem.)
+ int64_t audio_pts_as_video_pts = av_rescale_q(audio_pts, audio_timebase, video_timebase);
+ audio_frame->received_timestamp = compute_frame_start(audio_pts_as_video_pts, pts_origin, video_timebase, start, rate);
+
+ if (audio_frame->len != 0) {
+ // The received timestamps in Nageru are measured after we've just received the frame.
+ // However, pts (especially audio pts) is at the _beginning_ of the frame.
+ // If we have locked audio, the distinction doesn't really matter, as pts is
+ // on a relative scale and a fixed offset is fine. But if we don't, we will have
+ // a different number of samples each time, which will cause huge audio jitter
+ // and throw off the resampler.
+ //
+ // In a sense, we should have compensated by adding the frame and audio lengths
+ // to video_frame->received_timestamp and audio_frame->received_timestamp respectively,
+ // but that would mean extra waiting in sleep_until(). All we need is that they
+ // are correct relative to each other, though (and to the other frames we send),
+ // so just align the end of the audio frame, and we're fine.
+ size_t num_samples = (audio_frame->len * 8) / audio_format.bits_per_sample / audio_format.num_channels;
+ double offset = double(num_samples) / OUTPUT_FREQUENCY -
+ double(video_format.frame_rate_den) / video_format.frame_rate_nom;
+ audio_frame->received_timestamp += duration_cast<steady_clock::duration>(duration<double>(offset));
}
- }
- video_frame->received_timestamp = next_frame_start;
-
- // The easiest way to get all the rate conversions etc. right is to move the
- // audio PTS into the video PTS timebase and go from there. (We'll get some
- // rounding issues, but they should not be a big problem.)
- int64_t audio_pts_as_video_pts = av_rescale_q(audio_pts, audio_timebase, video_timebase);
- audio_frame->received_timestamp = compute_frame_start(audio_pts_as_video_pts, pts_origin, video_timebase, start, rate);
-
- if (audio_frame->len != 0) {
- // The received timestamps in Nageru are measured after we've just received the frame.
- // However, pts (especially audio pts) is at the _beginning_ of the frame.
- // If we have locked audio, the distinction doesn't really matter, as pts is
- // on a relative scale and a fixed offset is fine. But if we don't, we will have
- // a different number of samples each time, which will cause huge audio jitter
- // and throw off the resampler.
- //
- // In a sense, we should have compensated by adding the frame and audio lengths
- // to video_frame->received_timestamp and audio_frame->received_timestamp respectively,
- // but that would mean extra waiting in sleep_until(). All we need is that they
- // are correct relative to each other, though (and to the other frames we send),
- // so just align the end of the audio frame, and we're fine.
- size_t num_samples = (audio_frame->len * 8) / audio_format.bits_per_sample / audio_format.num_channels;
- double offset = double(num_samples) / OUTPUT_FREQUENCY -
- double(video_format.frame_rate_den) / video_format.frame_rate_nom;
- audio_frame->received_timestamp += duration_cast<steady_clock::duration>(duration<double>(offset));
}
steady_clock::time_point now = steady_clock::now();
fprintf(stderr, "%s: Playback %.0f ms behind, resetting time scale\n",
pathname.c_str(),
1e3 * duration<double>(now - next_frame_start).count());
- pts_origin = frame->pts;
+ pts_origin = frame_pts;
start = next_frame_start = now;
timecode += MAX_FPS * 2 + 1;
}
// audio discontinuity.)
timecode += MAX_FPS * 2 + 1;
}
- frame_callback(frame->pts, video_timebase, audio_pts, audio_timebase, timecode++,
+ frame_callback(frame_pts, video_timebase, audio_pts, audio_timebase, timecode++,
video_frame.get_and_release(), 0, video_format,
audio_frame.get_and_release(), 0, audio_format);
first_frame = false;
}
}
}
- last_pts = frame->pts;
+ last_pts = frame_pts;
}
return true;
}
}
}
- // Decode video, if we have a frame.
- int err = avcodec_receive_frame(video_codec_ctx, video_avframe.get());
- if (err == 0) {
- frame_finished = true;
- break;
- } else if (err != AVERROR(EAGAIN)) {
- fprintf(stderr, "%s: Cannot receive frame from video codec.\n", pathname.c_str());
- *error = true;
+ if (video_codec_ctx != nullptr) {
+ // Decode video, if we have a frame.
+ int err = avcodec_receive_frame(video_codec_ctx, video_avframe.get());
+ if (err == 0) {
+ frame_finished = true;
+ break;
+ } else if (err != AVERROR(EAGAIN)) {
+ fprintf(stderr, "%s: Cannot receive frame from video codec.\n", pathname.c_str());
+ *error = true;
+ return AVFrameWithDeleter(nullptr);
+ }
+ } else {
return AVFrameWithDeleter(nullptr);
}
} while (!eof);
}
av_opt_set_int(resampler, "in_channel_layout", channel_layout, 0);
- av_opt_set_int(resampler, "out_channel_layout", AV_CH_LAYOUT_STEREO, 0);
+ av_opt_set_int(resampler, "out_channel_layout", AV_CH_LAYOUT_STEREO_DOWNMIX, 0);
av_opt_set_int(resampler, "in_sample_rate", av_frame_get_sample_rate(audio_avframe), 0);
av_opt_set_int(resampler, "out_sample_rate", OUTPUT_FREQUENCY, 0);
av_opt_set_int(resampler, "in_sample_fmt", audio_avframe->format, 0);