set_video_frame_allocator(owned_video_frame_allocator.get());
}
if (audio_frame_allocator == nullptr) {
- owned_audio_frame_allocator.reset(new MallocFrameAllocator(65536, NUM_QUEUED_AUDIO_FRAMES));
+ // Audio can come out in pretty large chunks, so increase from the default 1 MB.
+ owned_audio_frame_allocator.reset(new MallocFrameAllocator(1 << 20, NUM_QUEUED_AUDIO_FRAMES));
set_audio_frame_allocator(owned_audio_frame_allocator.get());
}
}
video_frame.len = width * height * 4;
memset(video_frame.data, 0, video_frame.len);
- frame_callback(-1, AVRational{1, TIMEBASE}, timecode++,
+ frame_callback(-1, AVRational{1, TIMEBASE}, -1, AVRational{1, TIMEBASE}, timecode++,
video_frame, /*video_offset=*/0, video_format,
FrameAllocator::Frame(), /*audio_offset=*/0, AudioFormat());
}
FrameAllocator::Frame audio_frame = audio_frame_allocator->alloc_frame();
AudioFormat audio_format;
+ int64_t audio_pts;
bool error;
AVFrameWithDeleter frame = decode_frame(format_ctx.get(), video_codec_ctx.get(), audio_codec_ctx.get(),
- pathname, video_stream_index, audio_stream_index, &audio_frame, &audio_format, &error);
+ pathname, video_stream_index, audio_stream_index, &audio_frame, &audio_format, &audio_pts, &error);
if (error) {
return false;
}
video_frame.received_timestamp = next_frame_start;
bool finished_wakeup = producer_thread_should_quit.sleep_until(next_frame_start);
if (finished_wakeup) {
- frame_callback(frame->pts, video_timebase, timecode++,
+ if (audio_frame.len > 0) {
+ assert(audio_pts != -1);
+ }
+ frame_callback(frame->pts, video_timebase, audio_pts, audio_timebase, timecode++,
video_frame, 0, video_format,
audio_frame, 0, audio_format);
break;
} // namespace
-AVFrameWithDeleter FFmpegCapture::decode_frame(AVFormatContext *format_ctx, AVCodecContext *video_codec_ctx, AVCodecContext *audio_codec_ctx, const std::string &pathname, int video_stream_index, int audio_stream_index, FrameAllocator::Frame *audio_frame, AudioFormat *audio_format, bool *error)
+AVFrameWithDeleter FFmpegCapture::decode_frame(AVFormatContext *format_ctx, AVCodecContext *video_codec_ctx, AVCodecContext *audio_codec_ctx,
+ const std::string &pathname, int video_stream_index, int audio_stream_index,
+ FrameAllocator::Frame *audio_frame, AudioFormat *audio_format, int64_t *audio_pts, bool *error)
{
*error = false;
AVFrameWithDeleter audio_avframe = av_frame_alloc_unique();
AVFrameWithDeleter video_avframe = av_frame_alloc_unique();
bool eof = false;
+ *audio_pts = -1;
do {
AVPacket pkt;
unique_ptr<AVPacket, decltype(av_packet_unref)*> pkt_cleanup(
return AVFrameWithDeleter(nullptr);
}
} else if (pkt.stream_index == audio_stream_index) {
+ if (*audio_pts == -1) {
+ *audio_pts = pkt.pts;
+ }
if (avcodec_send_packet(audio_codec_ctx, &pkt) < 0) {
fprintf(stderr, "%s: Cannot send packet to audio codec.\n", pathname.c_str());
*error = true;
}
// Decode audio, if any.
- int err = avcodec_receive_frame(audio_codec_ctx, audio_avframe.get());
- if (err == 0) {
- convert_audio(audio_avframe.get(), audio_frame, audio_format);
- } else if (err != AVERROR(EAGAIN)) {
- fprintf(stderr, "%s: Cannot receive frame from audio codec.\n", pathname.c_str());
- *error = true;
- return AVFrameWithDeleter(nullptr);
+ if (*audio_pts != -1) {
+ for ( ;; ) {
+ int err = avcodec_receive_frame(audio_codec_ctx, audio_avframe.get());
+ if (err == 0) {
+ convert_audio(audio_avframe.get(), audio_frame, audio_format);
+ } else if (err == AVERROR(EAGAIN)) {
+ break;
+ } else {
+ fprintf(stderr, "%s: Cannot receive frame from audio codec.\n", pathname.c_str());
+ *error = true;
+ return AVFrameWithDeleter(nullptr);
+ }
+ }
}
// Decode video, if we have a frame.
- err = avcodec_receive_frame(video_codec_ctx, video_avframe.get());
+ int err = avcodec_receive_frame(video_codec_ctx, video_avframe.get());
if (err == 0) {
frame_finished = true;
break;
// FFmpegCapture-specific overload of set_frame_callback that also gives
// the raw original pts from the video. Negative pts means a dummy frame.
- typedef std::function<void(int64_t pts, AVRational timebase, uint16_t timecode,
+ typedef std::function<void(int64_t video_pts, AVRational video_timebase, int64_t audio_pts, AVRational audio_timebase,
+ uint16_t timecode,
bmusb::FrameAllocator::Frame video_frame, size_t video_offset, bmusb::VideoFormat video_format,
bmusb::FrameAllocator::Frame audio_frame, size_t audio_offset, bmusb::AudioFormat audio_format)>
frame_callback_t;
{
frame_callback = std::bind(
callback,
- std::placeholders::_3,
- std::placeholders::_4,
std::placeholders::_5,
std::placeholders::_6,
std::placeholders::_7,
std::placeholders::_8,
- std::placeholders::_9);
+ std::placeholders::_9,
+ std::placeholders::_10,
+ std::placeholders::_11);
}
// FFmpegCapture-specific callback that gives the raw audio.
// Returns nullptr if no frame was decoded (e.g. EOF).
AVFrameWithDeleter decode_frame(AVFormatContext *format_ctx, AVCodecContext *video_codec_ctx, AVCodecContext *audio_codec_ctx,
const std::string &pathname, int video_stream_index, int audio_stream_index,
- bmusb::FrameAllocator::Frame *audio_frame, bmusb::AudioFormat *audio_format, bool *error);
+ bmusb::FrameAllocator::Frame *audio_frame, bmusb::AudioFormat *audio_format, int64_t *audio_pts, bool *error);
void convert_audio(const AVFrame *audio_avframe, bmusb::FrameAllocator::Frame *audio_frame, bmusb::AudioFormat *audio_format);
bmusb::VideoFormat construct_video_format(const AVFrame *frame, AVRational video_timebase);
OPTION_HTTP_COARSE_TIMEBASE,
OPTION_HTTP_AUDIO_CODEC,
OPTION_HTTP_AUDIO_BITRATE,
+ OPTION_NO_TRANSCODE_AUDIO,
OPTION_FLAT_AUDIO,
OPTION_GAIN_STAGING,
OPTION_DISABLE_LOCUT,
fprintf(stderr, " --http-audio-bitrate=KBITS audio codec bit rate to use for HTTP streams\n");
fprintf(stderr, " (default is %d, ignored unless --http-audio-codec is set)\n",
DEFAULT_AUDIO_OUTPUT_BIT_RATE / 1000);
+ if (program == PROGRAM_KAERU) {
+ fprintf(stderr, " --no-transcode-audio copy encoded audio raw from the source stream\n");
+ fprintf(stderr, " (requires --http-audio-codec= to be set)\n");
+ }
fprintf(stderr, " --http-coarse-timebase use less timebase for HTTP (recommended for muxers\n");
fprintf(stderr, " that handle large pts poorly, like e.g. MP4)\n");
if (program == PROGRAM_NAGERU) {
{ "http-coarse-timebase", no_argument, 0, OPTION_HTTP_COARSE_TIMEBASE },
{ "http-audio-codec", required_argument, 0, OPTION_HTTP_AUDIO_CODEC },
{ "http-audio-bitrate", required_argument, 0, OPTION_HTTP_AUDIO_BITRATE },
+ { "no-transcode-audio", no_argument, 0, OPTION_NO_TRANSCODE_AUDIO },
{ "flat-audio", no_argument, 0, OPTION_FLAT_AUDIO },
{ "gain-staging", required_argument, 0, OPTION_GAIN_STAGING },
{ "disable-locut", no_argument, 0, OPTION_DISABLE_LOCUT },
case OPTION_HTTP_AUDIO_BITRATE:
global_flags.stream_audio_codec_bitrate = atoi(optarg) * 1000;
break;
+ case OPTION_NO_TRANSCODE_AUDIO:
+ global_flags.transcode_audio = false;
+ break;
case OPTION_HTTP_X264_VIDEO:
global_flags.x264_video_to_http = true;
break;
fprintf(stderr, "ERROR: --output-card points to a nonexistant card\n");
exit(1);
}
+ if (!global_flags.transcode_audio && global_flags.stream_audio_codec_name.empty()) {
+ fprintf(stderr, "ERROR: If not transcoding audio, you must specify ahead-of-time what audio codec is in use\n");
+ fprintf(stderr, " (using --http-audio-codec).\n");
+ exit(1);
+ }
if (global_flags.x264_speedcontrol) {
if (!global_flags.x264_preset.empty() && global_flags.x264_preset != "faster") {
fprintf(stderr, "WARNING: --x264-preset is overridden by --x264-speedcontrol (implicitly uses \"faster\" as base preset)\n");
bool ten_bit_input = false;
bool ten_bit_output = false; // Implies x264_video_to_disk == true and x264_bit_depth == 10.
YCbCrInterpretation ycbcr_interpretation[MAX_VIDEO_CARDS];
+ bool transcode_audio = true; // Kaeru only.
int x264_bit_depth = 8; // Not user-settable.
bool use_zerocopy = false; // Not user-settable.
bool can_disable_srgb_decoder = false; // Not user-settable.
return mux;
}
-void video_frame_callback(FFmpegCapture *video, X264Encoder *x264_encoder, int64_t pts, AVRational timebase, uint16_t timecode,
+void video_frame_callback(FFmpegCapture *video, X264Encoder *x264_encoder, AudioEncoder *audio_encoder,
+ int64_t video_pts, AVRational video_timebase,
+ int64_t audio_pts, AVRational audio_timebase,
+ uint16_t timecode,
FrameAllocator::Frame video_frame, size_t video_offset, VideoFormat video_format,
FrameAllocator::Frame audio_frame, size_t audio_offset, AudioFormat audio_format)
{
- if (pts >= 0 && video_frame.len > 0) {
- pts = av_rescale_q(pts, timebase, AVRational{ 1, TIMEBASE });
+ if (video_pts >= 0 && video_frame.len > 0) {
+ video_pts = av_rescale_q(video_pts, video_timebase, AVRational{ 1, TIMEBASE });
int64_t frame_duration = TIMEBASE * video_format.frame_rate_nom / video_format.frame_rate_den;
- x264_encoder->add_frame(pts, frame_duration, video->get_current_frame_ycbcr_format().luma_coefficients, video_frame.data + video_offset, ReceivedTimestamps());
+ x264_encoder->add_frame(video_pts, frame_duration, video->get_current_frame_ycbcr_format().luma_coefficients, video_frame.data + video_offset, ReceivedTimestamps());
}
+ if (audio_frame.len > 0) {
+ // FFmpegCapture takes care of this for us.
+ assert(audio_format.num_channels == 2);
+ assert(audio_format.sample_rate == OUTPUT_FREQUENCY);
+
+ // TODO: Reduce some duplication against AudioMixer here.
+ size_t num_samples = audio_frame.len / (audio_format.bits_per_sample / 8);
+ vector<float> float_samples;
+ float_samples.resize(num_samples);
+ if (audio_format.bits_per_sample == 16) {
+ const int16_t *src = (const int16_t *)audio_frame.data;
+ float *dst = &float_samples[0];
+ for (size_t i = 0; i < num_samples; ++i) {
+ *dst++ = le16toh(*src++) * (1.0f / 32768.0f);
+ }
+ } else if (audio_format.bits_per_sample == 32) {
+ const int32_t *src = (const int32_t *)audio_frame.data;
+ float *dst = &float_samples[0];
+ for (size_t i = 0; i < num_samples; ++i) {
+ *dst++ = le32toh(*src++) * (1.0f / 2147483648.0f);
+ }
+ } else {
+ assert(false);
+ }
+ audio_pts = av_rescale_q(audio_pts, audio_timebase, AVRational{ 1, TIMEBASE });
+ audio_encoder->encode_audio(float_samples, audio_pts);
+ }
if (video_frame.owner) {
video_frame.owner->release_frame(video_frame);
assert(oformat != nullptr);
unique_ptr<AudioEncoder> audio_encoder;
- if (global_flags.stream_audio_codec_name.empty()) {
- audio_encoder.reset(new AudioEncoder(AUDIO_OUTPUT_CODEC_NAME, DEFAULT_AUDIO_OUTPUT_BIT_RATE, oformat));
- } else {
- audio_encoder.reset(new AudioEncoder(global_flags.stream_audio_codec_name, global_flags.stream_audio_codec_bitrate, oformat));
+ if (global_flags.transcode_audio) {
+ if (global_flags.stream_audio_codec_name.empty()) {
+ audio_encoder.reset(new AudioEncoder(AUDIO_OUTPUT_CODEC_NAME, DEFAULT_AUDIO_OUTPUT_BIT_RATE, oformat));
+ } else {
+ audio_encoder.reset(new AudioEncoder(global_flags.stream_audio_codec_name, global_flags.stream_audio_codec_bitrate, oformat));
+ }
}
X264Encoder x264_encoder(oformat);
unique_ptr<Mux> http_mux = create_mux(&httpd, oformat, &x264_encoder, audio_encoder.get());
+ if (global_flags.transcode_audio) {
+ audio_encoder->add_mux(http_mux.get());
+ }
x264_encoder.add_mux(http_mux.get());
FFmpegCapture video(argv[optind], global_flags.width, global_flags.height);
video.set_pixel_format(FFmpegCapture::PixelFormat_NV12);
- video.set_frame_callback(bind(video_frame_callback, &video, &x264_encoder, _1, _2, _3, _4, _5, _6, _7, _8, _9));
- video.set_audio_callback(bind(audio_frame_callback, http_mux.get(), _1, _2));
+ video.set_frame_callback(bind(video_frame_callback, &video, &x264_encoder, audio_encoder.get(), _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11));
+ if (!global_flags.transcode_audio) {
+ video.set_audio_callback(bind(audio_frame_callback, http_mux.get(), _1, _2));
+ }
video.configure_card();
video.start_bm_capture();
video.change_rate(2.0); // Be sure never to really fall behind, but also don't dump huge amounts of stuff onto x264.