Include the raw audio in the MJPEG output.

[nageru] / nageru / mixer.cpp
diff --git a/nageru/mixer.cpp b/nageru/mixer.cpp

index c91857e62fbab09b3677fcebba41402235feabc6..5f38a673581ccbca276faf9a5d9992dafe06d2f5 100644 (file)
--- a/nageru/mixer.cpp
+++ b/nageru/mixer.cpp
@@ -785,6 +785,34 @@ void Mixer::bm_frame(unsigned card_index, uint16_t timecode,
  
         if (num_samples > 0) {
                 audio_mixer->add_audio(device, audio_frame.data + audio_offset, num_samples, audio_format, audio_frame.received_timestamp);
+
+               // Audio for the MJPEG stream. We don't resample; audio that's not in 48 kHz
+               // just gets dropped for now.
+               //
+               // Only bother doing MJPEG encoding if there are any connected clients
+               // that want the stream.
+               if (httpd.get_num_connected_multicam_clients() > 0) {
+                       vector<int32_t> converted_samples = convert_audio_to_fixed32(audio_frame.data + audio_offset, num_samples, audio_format, 2);
+                       lock_guard<mutex> lock(card_mutex);
+                       if (card->new_raw_audio.empty()) {
+                               card->new_raw_audio = move(converted_samples);
+                       } else {
+                               // For raw audio, we don't really synchronize audio and video;
+                               // we just put the audio in frame by frame, and if a video frame is
+                               // dropped, we still keep the audio, which means it will be added
+                               // to the beginning of the next frame. It would probably be better
+                               // to move the audio pts earlier to show this, but most players can
+                               // live with some jitter, and in a lot of ways, it's much nicer for
+                               // Futatabi to have all audio locked to a video frame.
+                               card->new_raw_audio.insert(card->new_raw_audio.end(), converted_samples.begin(), converted_samples.end());
+
+                               // Truncate to one second, just to be sure we don't have infinite buildup in case of weirdness.
+                               if (card->new_raw_audio.size() > OUTPUT_FREQUENCY * 2) {
+                                       size_t excess_samples = card->new_raw_audio.size() - OUTPUT_FREQUENCY * 2;
+                                       card->new_raw_audio.erase(card->new_raw_audio.begin(), card->new_raw_audio.begin() + excess_samples);
+                               }
+                       }
+               }
         }
  
         // Done with the audio, so release it.
@@ -1038,7 +1066,8 @@ void Mixer::thread_func()
                         assert(master_card_index < num_cards + num_video_inputs);
                 }
  
-               OutputFrameInfo output_frame_info = get_one_frame_from_each_card(master_card_index, master_card_is_output, new_frames, has_new_frame);
+               vector<int32_t> raw_audio[MAX_VIDEO_CARDS];  // For MJPEG encoding.
+               OutputFrameInfo output_frame_info = get_one_frame_from_each_card(master_card_index, master_card_is_output, new_frames, has_new_frame, raw_audio);
                 schedule_audio_resampling_tasks(output_frame_info.dropped_frames, output_frame_info.num_samples, output_frame_info.frame_duration, output_frame_info.is_preroll, output_frame_info.frame_timestamp);
                 stats_dropped_frames += output_frame_info.dropped_frames;
  
@@ -1084,7 +1113,7 @@ void Mixer::thread_func()
                         if (new_frame->frame->data_copy != nullptr) {
                                 int mjpeg_card_index = mjpeg_encoder->get_mjpeg_stream_for_card(card_index);
                                 if (mjpeg_card_index != -1) {
-                                       mjpeg_encoder->upload_frame(pts_int, mjpeg_card_index, new_frame->frame, new_frame->video_format, new_frame->y_offset, new_frame->cbcr_offset);
+                                       mjpeg_encoder->upload_frame(pts_int, mjpeg_card_index, new_frame->frame, new_frame->video_format, new_frame->y_offset, new_frame->cbcr_offset, move(raw_audio[card_index]));
                                 }
                         }
                 }
@@ -1198,7 +1227,7 @@ pair<string, string> Mixer::get_channel_color_http(unsigned channel_idx)
         return make_pair(theme->get_channel_color(channel_idx), "text/plain");
  }
  
-Mixer::OutputFrameInfo Mixer::get_one_frame_from_each_card(unsigned master_card_index, bool master_card_is_output, CaptureCard::NewFrame new_frames[MAX_VIDEO_CARDS], bool has_new_frame[MAX_VIDEO_CARDS])
+Mixer::OutputFrameInfo Mixer::get_one_frame_from_each_card(unsigned master_card_index, bool master_card_is_output, CaptureCard::NewFrame new_frames[MAX_VIDEO_CARDS], bool has_new_frame[MAX_VIDEO_CARDS], vector<int32_t> raw_audio[MAX_VIDEO_CARDS])
  {
         OutputFrameInfo output_frame_info;
  start:
@@ -1246,6 +1275,8 @@ start:
                         card->new_frames.pop_front();
                         card->new_frames_changed.notify_all();
                 }
+
+               raw_audio[card_index] = move(card->new_raw_audio);
         }
  
         if (!master_card_is_output) {