}
}
+void convert_fixed16_to_fixed32(int32_t *dst, size_t out_channel, size_t out_num_channels,
+ const uint8_t *src, size_t in_channel, size_t in_num_channels,
+ size_t num_samples)
+{
+ assert(in_channel < in_num_channels);
+ assert(out_channel < out_num_channels);
+ src += in_channel * 2;
+ dst += out_channel;
+
+ for (size_t i = 0; i < num_samples; ++i) {
+ uint32_t s = uint32_t(uint16_t(le16toh(*(int16_t *)src))) << 16;
+
+ // Keep the sign bit in place, repeat the other 15 bits as far as they go.
+ *dst = s | ((s & 0x7fffffff) >> 15) | ((s & 0x7fffffff) >> 30);
+
+ src += 2 * in_num_channels;
+ dst += out_num_channels;
+ }
+}
+
void convert_fixed24_to_fp32(float *dst, size_t out_channel, size_t out_num_channels,
const uint8_t *src, size_t in_channel, size_t in_num_channels,
size_t num_samples)
}
}
+void convert_fixed24_to_fixed32(int32_t *dst, size_t out_channel, size_t out_num_channels,
+ const uint8_t *src, size_t in_channel, size_t in_num_channels,
+ size_t num_samples)
+{
+ assert(in_channel < in_num_channels);
+ assert(out_channel < out_num_channels);
+ src += in_channel * 3;
+ dst += out_channel;
+
+ for (size_t i = 0; i < num_samples; ++i) {
+ uint32_t s1 = src[0];
+ uint32_t s2 = src[1];
+ uint32_t s3 = src[2];
+ uint32_t s = (s1 << 8) | (s2 << 16) | (s3 << 24);
+
+ // Keep the sign bit in place, repeat the other 23 bits as far as they go.
+ *dst = s | ((s & 0x7fffffff) >> 23);
+
+ src += 3 * in_num_channels;
+ dst += out_num_channels;
+ }
+}
+
void convert_fixed32_to_fp32(float *dst, size_t out_channel, size_t out_num_channels,
const uint8_t *src, size_t in_channel, size_t in_num_channels,
size_t num_samples)
}
}
+// Basically just a reinterleave.
+void convert_fixed32_to_fixed32(int32_t *dst, size_t out_channel, size_t out_num_channels,
+ const uint8_t *src, size_t in_channel, size_t in_num_channels,
+ size_t num_samples)
+{
+ assert(in_channel < in_num_channels);
+ assert(out_channel < out_num_channels);
+ src += in_channel * 4;
+ dst += out_channel;
+
+ for (size_t i = 0; i < num_samples; ++i) {
+ int32_t s = le32toh(*(int32_t *)src);
+ *dst = s;
+
+ src += 4 * in_num_channels;
+ dst += out_num_channels;
+ }
+}
+
float find_peak_plain(const float *samples, size_t num_samples) __attribute__((unused));
float find_peak_plain(const float *samples, size_t num_samples)
return true;
}
+vector<int32_t> convert_audio_to_fixed32(const uint8_t *data, unsigned num_samples, bmusb::AudioFormat audio_format, unsigned num_channels)
+{
+ vector<int32_t> audio;
+
+ if (num_channels > audio_format.num_channels) {
+ audio.resize(num_samples * num_channels, 0);
+ } else {
+ audio.resize(num_samples * num_channels);
+ }
+ for (unsigned channel_index = 0; channel_index < num_channels && channel_index < audio_format.num_channels; ++channel_index) {
+ switch (audio_format.bits_per_sample) {
+ case 0:
+ assert(num_samples == 0);
+ break;
+ case 16:
+ convert_fixed16_to_fixed32(&audio[0], channel_index, num_channels, data, channel_index, audio_format.num_channels, num_samples);
+ break;
+ case 24:
+ convert_fixed24_to_fixed32(&audio[0], channel_index, num_channels, data, channel_index, audio_format.num_channels, num_samples);
+ break;
+ case 32:
+ convert_fixed32_to_fixed32(&audio[0], channel_index, num_channels, data, channel_index, audio_format.num_channels, num_samples);
+ break;
+ default:
+ fprintf(stderr, "Cannot handle audio with %u bits per sample\n", audio_format.bits_per_sample);
+ assert(false);
+ }
+ }
+
+ return audio;
+}
+
bool AudioMixer::add_silence(DeviceSpec device_spec, unsigned samples_per_frame, unsigned num_frames)
{
AudioDevice *device = find_audio_device(device_spec);
struct AudioFormat;
} // namespace bmusb
+// Convert the given audio from {16,24,32}-bit M-channel to 32-bit N-channel PCM.
+// Assumes little-endian and chunky, signed PCM throughout.
+std::vector<int32_t> convert_audio_to_fixed32(const uint8_t *data, unsigned num_samples, bmusb::AudioFormat audio_format, unsigned num_destination_channels);
+
enum EQBand {
EQ_BAND_BASS = 0,
EQ_BAND_MID,
if (num_samples > 0) {
audio_mixer->add_audio(device, audio_frame.data + audio_offset, num_samples, audio_format, audio_frame.received_timestamp);
+
+ // Audio for the MJPEG stream. We don't resample; audio that's not in 48 kHz
+ // just gets dropped for now.
+ //
+ // Only bother doing MJPEG encoding if there are any connected clients
+ // that want the stream.
+ if (httpd.get_num_connected_multicam_clients() > 0) {
+ vector<int32_t> converted_samples = convert_audio_to_fixed32(audio_frame.data + audio_offset, num_samples, audio_format, 2);
+ lock_guard<mutex> lock(card_mutex);
+ if (card->new_raw_audio.empty()) {
+ card->new_raw_audio = move(converted_samples);
+ } else {
+ // For raw audio, we don't really synchronize audio and video;
+ // we just put the audio in frame by frame, and if a video frame is
+ // dropped, we still keep the audio, which means it will be added
+ // to the beginning of the next frame. It would probably be better
+ // to move the audio pts earlier to show this, but most players can
+ // live with some jitter, and in a lot of ways, it's much nicer for
+ // Futatabi to have all audio locked to a video frame.
+ card->new_raw_audio.insert(card->new_raw_audio.end(), converted_samples.begin(), converted_samples.end());
+
+ // Truncate to one second, just to be sure we don't have infinite buildup in case of weirdness.
+ if (card->new_raw_audio.size() > OUTPUT_FREQUENCY * 2) {
+ size_t excess_samples = card->new_raw_audio.size() - OUTPUT_FREQUENCY * 2;
+ card->new_raw_audio.erase(card->new_raw_audio.begin(), card->new_raw_audio.begin() + excess_samples);
+ }
+ }
+ }
}
// Done with the audio, so release it.
assert(master_card_index < num_cards + num_video_inputs);
}
- OutputFrameInfo output_frame_info = get_one_frame_from_each_card(master_card_index, master_card_is_output, new_frames, has_new_frame);
+ vector<int32_t> raw_audio[MAX_VIDEO_CARDS]; // For MJPEG encoding.
+ OutputFrameInfo output_frame_info = get_one_frame_from_each_card(master_card_index, master_card_is_output, new_frames, has_new_frame, raw_audio);
schedule_audio_resampling_tasks(output_frame_info.dropped_frames, output_frame_info.num_samples, output_frame_info.frame_duration, output_frame_info.is_preroll, output_frame_info.frame_timestamp);
stats_dropped_frames += output_frame_info.dropped_frames;
if (new_frame->frame->data_copy != nullptr) {
int mjpeg_card_index = mjpeg_encoder->get_mjpeg_stream_for_card(card_index);
if (mjpeg_card_index != -1) {
- mjpeg_encoder->upload_frame(pts_int, mjpeg_card_index, new_frame->frame, new_frame->video_format, new_frame->y_offset, new_frame->cbcr_offset);
+ mjpeg_encoder->upload_frame(pts_int, mjpeg_card_index, new_frame->frame, new_frame->video_format, new_frame->y_offset, new_frame->cbcr_offset, move(raw_audio[card_index]));
}
}
}
return make_pair(theme->get_channel_color(channel_idx), "text/plain");
}
-Mixer::OutputFrameInfo Mixer::get_one_frame_from_each_card(unsigned master_card_index, bool master_card_is_output, CaptureCard::NewFrame new_frames[MAX_VIDEO_CARDS], bool has_new_frame[MAX_VIDEO_CARDS])
+Mixer::OutputFrameInfo Mixer::get_one_frame_from_each_card(unsigned master_card_index, bool master_card_is_output, CaptureCard::NewFrame new_frames[MAX_VIDEO_CARDS], bool has_new_frame[MAX_VIDEO_CARDS], vector<int32_t> raw_audio[MAX_VIDEO_CARDS])
{
OutputFrameInfo output_frame_info;
start:
card->new_frames.pop_front();
card->new_frames_changed.notify_all();
}
+
+ raw_audio[card_index] = move(card->new_raw_audio);
}
if (!master_card_is_output) {
};
std::deque<NewFrame> new_frames;
std::condition_variable new_frames_changed; // Set whenever new_frames is changed.
-
QueueLengthPolicy queue_length_policy; // Refers to the "new_frames" queue.
+ std::vector<int32_t> new_raw_audio;
+
int last_timecode = -1; // Unwrapped.
JitterHistory jitter_history;
bool is_preroll;
std::chrono::steady_clock::time_point frame_timestamp;
};
- OutputFrameInfo get_one_frame_from_each_card(unsigned master_card_index, bool master_card_is_output, CaptureCard::NewFrame new_frames[MAX_VIDEO_CARDS], bool has_new_frame[MAX_VIDEO_CARDS]);
+ OutputFrameInfo get_one_frame_from_each_card(unsigned master_card_index, bool master_card_is_output, CaptureCard::NewFrame new_frames[MAX_VIDEO_CARDS], bool has_new_frame[MAX_VIDEO_CARDS], std::vector<int32_t> raw_audio[MAX_VIDEO_CARDS]);
InputState input_state;
// a situation with only one video stream (and possibly one audio stream)
// with known width/height, and we don't need the extra functionality it provides.
avctx.reset(avformat_alloc_context());
- avctx->oformat = av_guess_format("mp4", nullptr, nullptr);
+ avctx->oformat = av_guess_format("nut", nullptr, nullptr);
uint8_t *buf = (uint8_t *)av_malloc(MUX_BUFFER_SIZE);
avctx->pb = avio_alloc_context(buf, MUX_BUFFER_SIZE, 1, this, nullptr, nullptr, nullptr);
fprintf(stderr, "avformat_new_stream() failed\n");
abort();
}
- stream->time_base = AVRational{ 1, TIMEBASE };
+
+ // FFmpeg is very picky about having audio at 1/48000 timebase,
+ // no matter what we write. Even though we'd prefer our usual 1/120000,
+ // put the video on the same one, so that we can have locked audio.
+ stream->time_base = AVRational{ 1, OUTPUT_FREQUENCY };
stream->codecpar->codec_type = AVMEDIA_TYPE_VIDEO;
stream->codecpar->codec_id = AV_CODEC_ID_MJPEG;
stream->codecpar->chroma_location = AVCHROMA_LOC_LEFT;
stream->codecpar->field_order = AV_FIELD_PROGRESSIVE;
}
+ for (unsigned card_idx = 0; card_idx < global_flags.card_to_mjpeg_stream_export.size(); ++card_idx) {
+ AVStream *stream = avformat_new_stream(avctx.get(), nullptr);
+ if (stream == nullptr) {
+ fprintf(stderr, "avformat_new_stream() failed\n");
+ abort();
+ }
+ stream->time_base = AVRational{ 1, OUTPUT_FREQUENCY };
+ stream->codecpar->codec_type = AVMEDIA_TYPE_AUDIO;
+ stream->codecpar->codec_id = AV_CODEC_ID_PCM_S32LE;
+ stream->codecpar->channel_layout = AV_CH_LAYOUT_STEREO;
+ stream->codecpar->channels = 2;
+ stream->codecpar->sample_rate = OUTPUT_FREQUENCY;
+ }
AVDictionary *options = NULL;
vector<pair<string, string>> opts = MUX_OPTS;
return va_dpy;
}
-void MJPEGEncoder::upload_frame(int64_t pts, unsigned card_index, RefCountedFrame frame, const bmusb::VideoFormat &video_format, size_t y_offset, size_t cbcr_offset)
+void MJPEGEncoder::upload_frame(int64_t pts, unsigned card_index, RefCountedFrame frame, const bmusb::VideoFormat &video_format, size_t y_offset, size_t cbcr_offset, vector<int32_t> audio)
{
PBOFrameAllocator::Userdata *userdata = (PBOFrameAllocator::Userdata *)frame->userdata;
if (video_format.width == 0 || video_format.height == 0) {
return;
}
++metric_mjpeg_overrun_submitted;
- frames_to_be_encoded.push(QueuedFrame{ pts, card_index, frame, video_format, y_offset, cbcr_offset });
+ frames_to_be_encoded.push(QueuedFrame{ pts, card_index, frame, video_format, y_offset, cbcr_offset, move(audio) });
any_frames_to_be_encoded.notify_all();
}
// Will call back in the receiver thread.
encode_jpeg_va(move(qf));
} else {
+ // Write audio before video, since Futatabi expects it.
+ if (qf.audio.size() > 0) {
+ write_audio_packet(qf.pts, qf.card_index, qf.audio);
+ }
+
// Encode synchronously, in the same thread.
vector<uint8_t> jpeg = encode_jpeg_libjpeg(qf);
write_mjpeg_packet(qf.pts, qf.card_index, jpeg.data(), jpeg.size());
pkt.flags = AV_PKT_FLAG_KEY;
AVRational time_base = avctx->streams[pkt.stream_index]->time_base;
pkt.pts = pkt.dts = av_rescale_q(pts, AVRational{ 1, TIMEBASE }, time_base);
+ pkt.duration = 0;
+
+ if (av_write_frame(avctx.get(), &pkt) < 0) {
+ fprintf(stderr, "av_write_frame() failed\n");
+ abort();
+ }
+}
+
+void MJPEGEncoder::write_audio_packet(int64_t pts, unsigned card_index, const vector<int32_t> &audio)
+{
+ AVPacket pkt;
+ memset(&pkt, 0, sizeof(pkt));
+ pkt.buf = nullptr;
+ pkt.data = reinterpret_cast<uint8_t *>(const_cast<int32_t *>(&audio[0]));
+ pkt.size = audio.size() * sizeof(audio[0]);
+ pkt.stream_index = card_index + global_flags.card_to_mjpeg_stream_export.size();
+ pkt.flags = AV_PKT_FLAG_KEY;
+ AVRational time_base = avctx->streams[pkt.stream_index]->time_base;
+ pkt.pts = pkt.dts = av_rescale_q(pts, AVRational{ 1, TIMEBASE }, time_base);
+ size_t num_stereo_samples = audio.size() / 2;
+ pkt.duration = av_rescale_q(num_stereo_samples, AVRational{ 1, OUTPUT_FREQUENCY }, time_base);
if (av_write_frame(avctx.get(), &pkt) < 0) {
fprintf(stderr, "av_write_frame() failed\n");
frames_encoding.pop();
}
+ // Write audio before video, since Futatabi expects it.
+ if (qf.audio.size() > 0) {
+ write_audio_packet(qf.pts, qf.card_index, qf.audio);
+ }
+
VAStatus va_status = vaSyncSurface(va_dpy->va_dpy, qf.resources.surface);
CHECK_VASTATUS(va_status, "vaSyncSurface");
MJPEGEncoder(HTTPD *httpd, const std::string &va_display);
~MJPEGEncoder();
void stop();
- void upload_frame(int64_t pts, unsigned card_index, RefCountedFrame frame, const bmusb::VideoFormat &video_format, size_t y_offset, size_t cbcr_offset);
+ void upload_frame(int64_t pts, unsigned card_index, RefCountedFrame frame, const bmusb::VideoFormat &video_format, size_t y_offset, size_t cbcr_offset, std::vector<int32_t> audio);
bool using_vaapi() const { return va_dpy != nullptr; }
// Returns -1 for inactive (ie., don't encode frames for this card right now).
RefCountedFrame frame;
bmusb::VideoFormat video_format;
size_t y_offset, cbcr_offset;
+ std::vector<int32_t> audio;
// Only for frames in the process of being encoded by VA-API.
VAResources resources;
void encode_jpeg_va(QueuedFrame &&qf);
std::vector<uint8_t> encode_jpeg_libjpeg(const QueuedFrame &qf);
void write_mjpeg_packet(int64_t pts, unsigned card_index, const uint8_t *jpeg, size_t jpeg_size);
+ void write_audio_packet(int64_t pts, unsigned card_index, const std::vector<int32_t> &audio);
void init_jpeg_422(unsigned width, unsigned height, VectorDestinationManager *dest, jpeg_compress_struct *cinfo);
std::vector<uint8_t> get_jpeg_header(unsigned width, unsigned height, jpeg_compress_struct *cinfo);