if (num_samples > 0) {
audio_mixer->add_audio(device, audio_frame.data + audio_offset, num_samples, audio_format, audio_frame.received_timestamp);
+
+ // Audio for the MJPEG stream. We don't resample; audio that's not in 48 kHz
+ // just gets dropped for now.
+ //
+ // Only bother doing MJPEG encoding if there are any connected clients
+ // that want the stream.
+ if (httpd.get_num_connected_multicam_clients() > 0) {
+ vector<int32_t> converted_samples = convert_audio_to_fixed32(audio_frame.data + audio_offset, num_samples, audio_format, 2);
+ lock_guard<mutex> lock(card_mutex);
+ if (card->new_raw_audio.empty()) {
+ card->new_raw_audio = move(converted_samples);
+ } else {
+ // For raw audio, we don't really synchronize audio and video;
+ // we just put the audio in frame by frame, and if a video frame is
+ // dropped, we still keep the audio, which means it will be added
+ // to the beginning of the next frame. It would probably be better
+ // to move the audio pts earlier to show this, but most players can
+ // live with some jitter, and in a lot of ways, it's much nicer for
+ // Futatabi to have all audio locked to a video frame.
+ card->new_raw_audio.insert(card->new_raw_audio.end(), converted_samples.begin(), converted_samples.end());
+
+ // Truncate to one second, just to be sure we don't have infinite buildup in case of weirdness.
+ if (card->new_raw_audio.size() > OUTPUT_FREQUENCY * 2) {
+ size_t excess_samples = card->new_raw_audio.size() - OUTPUT_FREQUENCY * 2;
+ card->new_raw_audio.erase(card->new_raw_audio.begin(), card->new_raw_audio.begin() + excess_samples);
+ }
+ }
+ }
}
// Done with the audio, so release it.
assert(master_card_index < num_cards + num_video_inputs);
}
- OutputFrameInfo output_frame_info = get_one_frame_from_each_card(master_card_index, master_card_is_output, new_frames, has_new_frame);
+ vector<int32_t> raw_audio[MAX_VIDEO_CARDS]; // For MJPEG encoding.
+ OutputFrameInfo output_frame_info = get_one_frame_from_each_card(master_card_index, master_card_is_output, new_frames, has_new_frame, raw_audio);
schedule_audio_resampling_tasks(output_frame_info.dropped_frames, output_frame_info.num_samples, output_frame_info.frame_duration, output_frame_info.is_preroll, output_frame_info.frame_timestamp);
stats_dropped_frames += output_frame_info.dropped_frames;
if (new_frame->frame->data_copy != nullptr) {
int mjpeg_card_index = mjpeg_encoder->get_mjpeg_stream_for_card(card_index);
if (mjpeg_card_index != -1) {
- mjpeg_encoder->upload_frame(pts_int, mjpeg_card_index, new_frame->frame, new_frame->video_format, new_frame->y_offset, new_frame->cbcr_offset);
+ mjpeg_encoder->upload_frame(pts_int, mjpeg_card_index, new_frame->frame, new_frame->video_format, new_frame->y_offset, new_frame->cbcr_offset, move(raw_audio[card_index]));
}
}
}
return make_pair(theme->get_channel_color(channel_idx), "text/plain");
}
-Mixer::OutputFrameInfo Mixer::get_one_frame_from_each_card(unsigned master_card_index, bool master_card_is_output, CaptureCard::NewFrame new_frames[MAX_VIDEO_CARDS], bool has_new_frame[MAX_VIDEO_CARDS])
+Mixer::OutputFrameInfo Mixer::get_one_frame_from_each_card(unsigned master_card_index, bool master_card_is_output, CaptureCard::NewFrame new_frames[MAX_VIDEO_CARDS], bool has_new_frame[MAX_VIDEO_CARDS], vector<int32_t> raw_audio[MAX_VIDEO_CARDS])
{
OutputFrameInfo output_frame_info;
start:
card->new_frames.pop_front();
card->new_frames_changed.notify_all();
}
+
+ raw_audio[card_index] = move(card->new_raw_audio);
}
if (!master_card_is_output) {