Heed the Exif white point when playing back (MJPEG) video.

[nageru] / nageru / mixer.cpp
diff --git a/nageru/mixer.cpp b/nageru/mixer.cpp

index d1b52ef2dc54b4a9db8ad84e755bbadc4ac76411..9a86682daa91ca19b2325e3858093f55e80a0a27 100644 (file)
--- a/nageru/mixer.cpp
+++ b/nageru/mixer.cpp
@@ -4,6 +4,7 @@
  
  #include <assert.h>
  #include <epoxy/egl.h>
+#include <movit/effect.h>
  #include <movit/effect_chain.h>
  #include <movit/effect_util.h>
  #include <movit/flat_input.h>
@@ -44,6 +45,7 @@
  #include "shared/disk_space_estimator.h"
  #include "ffmpeg_capture.h"
  #include "flags.h"
+#include "image_input.h"
  #include "input_mapping.h"
  #include "shared/metrics.h"
  #include "mjpeg_encoder.h"
@@ -152,11 +154,8 @@ void ensure_texture_resolution(PBOFrameAllocator::Userdata *userdata, unsigned f
                 case PixelFormat_8BitBGRA:
                         glBindTexture(GL_TEXTURE_2D, userdata->tex_rgba[field]);
                         check_error();
-                       if (global_flags.can_disable_srgb_decoder) {  // See the comments in tweaked_inputs.h.
-                               glTexImage2D(GL_TEXTURE_2D, 0, GL_SRGB8_ALPHA8, width, height, 0, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, nullptr);
-                       } else {
-                               glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, width, height, 0, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, nullptr);
-                       }
+                       // NOTE: sRGB may be disabled by sRGBSwitchingFlatInput.
+                       glTexImage2D(GL_TEXTURE_2D, 0, GL_SRGB8_ALPHA8, width, height, 0, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, nullptr);
                         check_error();
                         break;
                 default:
@@ -306,16 +305,18 @@ Mixer::Mixer(const QSurfaceFormat &format, unsigned num_cards)
           num_cards(num_cards),
           mixer_surface(create_surface(format)),
           h264_encoder_surface(create_surface(format)),
-         decklink_output_surface(create_surface(format))
+         decklink_output_surface(create_surface(format)),
+         image_update_surface(create_surface(format))
  {
         memcpy(ycbcr_interpretation, global_flags.ycbcr_interpretation, sizeof(ycbcr_interpretation));
         CHECK(init_movit(MOVIT_SHADER_DIR, MOVIT_DEBUG_OFF));
         check_error();
  
-       // This nearly always should be true.
-       global_flags.can_disable_srgb_decoder =
-               epoxy_has_gl_extension("GL_EXT_texture_sRGB_decode") &&
-               epoxy_has_gl_extension("GL_ARB_sampler_objects");
+       if (!epoxy_has_gl_extension("GL_EXT_texture_sRGB_decode") ||
+           !epoxy_has_gl_extension("GL_ARB_sampler_objects")) {
+               fprintf(stderr, "Nageru requires GL_EXT_texture_sRGB_decode and GL_ARB_sampler_objects to run.\n");
+               exit(1);
+       }
  
         // Since we allow non-bouncing 4:2:2 YCbCrInputs, effective subpixel precision
         // will be halved when sampling them, and we need to compensate here.
@@ -347,6 +348,11 @@ Mixer::Mixer(const QSurfaceFormat &format, unsigned num_cards)
         ycbcr_format.cb_y_position = 0.5f;
         ycbcr_format.cr_y_position = 0.5f;
  
+       // Initialize the neutral colors to sane values.
+       for (unsigned i = 0; i < MAX_VIDEO_CARDS; ++i) {
+               last_received_neutral_color[i] = RGBTriplet(1.0f, 1.0f, 1.0f);
+       }
+
         // Display chain; shows the live output produced by the main chain (or rather, a copy of it).
         display_chain.reset(new EffectChain(global_flags.width, global_flags.height, resource_pool.get()));
         check_error();
@@ -370,10 +376,10 @@ Mixer::Mixer(const QSurfaceFormat &format, unsigned num_cards)
         audio_mixer.reset(new AudioMixer(num_cards, video_inputs.size()));
  
         httpd.add_endpoint("/channels", bind(&Mixer::get_channels_json, this), HTTPD::ALLOW_ALL_ORIGINS);
-       for (int channel_idx = 2; channel_idx < theme->get_num_channels(); ++channel_idx) {
+       for (int channel_idx = 0; channel_idx < theme->get_num_channels(); ++channel_idx) {
                 char url[256];
-               snprintf(url, sizeof(url), "/channels/%d/color", channel_idx);
-               httpd.add_endpoint(url, bind(&Mixer::get_channel_color_http, this, unsigned(channel_idx)), HTTPD::ALLOW_ALL_ORIGINS);
+               snprintf(url, sizeof(url), "/channels/%d/color", channel_idx + 2);
+               httpd.add_endpoint(url, bind(&Mixer::get_channel_color_http, this, unsigned(channel_idx + 2)), HTTPD::ALLOW_ALL_ORIGINS);
         }
  
         // Start listening for clients only once VideoEncoder has written its header, if any.
@@ -432,7 +438,7 @@ Mixer::Mixer(const QSurfaceFormat &format, unsigned num_cards)
         for (unsigned video_card_index = 0; video_card_index < video_inputs.size(); ++card_index, ++video_card_index) {
                 if (card_index >= MAX_VIDEO_CARDS) {
                         fprintf(stderr, "ERROR: Not enough card slots available for the videos the theme requested.\n");
-                       exit(1);
+                       abort();
                 }
                 configure_card(card_index, video_inputs[video_card_index], CardType::FFMPEG_INPUT, /*output=*/nullptr);
                 video_inputs[video_card_index]->set_card_index(card_index);
@@ -445,7 +451,7 @@ Mixer::Mixer(const QSurfaceFormat &format, unsigned num_cards)
         for (unsigned html_card_index = 0; html_card_index < html_inputs.size(); ++card_index, ++html_card_index) {
                 if (card_index >= MAX_VIDEO_CARDS) {
                         fprintf(stderr, "ERROR: Not enough card slots available for the HTML inputs the theme requested.\n");
-                       exit(1);
+                       abort();
                 }
                 configure_card(card_index, html_inputs[html_card_index], CardType::CEF_INPUT, /*output=*/nullptr);
                 html_inputs[html_card_index]->set_card_index(card_index);
@@ -466,7 +472,7 @@ Mixer::Mixer(const QSurfaceFormat &format, unsigned num_cards)
                 if (!v210Converter::has_hardware_support()) {
                         fprintf(stderr, "ERROR: --ten-bit-input requires support for OpenGL compute shaders\n");
                         fprintf(stderr, "       (OpenGL 4.3, or GL_ARB_compute_shader + GL_ARB_shader_image_load_store).\n");
-                       exit(1);
+                       abort();
                 }
                 v210_converter.reset(new v210Converter());
  
@@ -483,7 +489,7 @@ Mixer::Mixer(const QSurfaceFormat &format, unsigned num_cards)
                 if (!v210Converter::has_hardware_support()) {
                         fprintf(stderr, "ERROR: --ten-bit-output requires support for OpenGL compute shaders\n");
                         fprintf(stderr, "       (OpenGL 4.3, or GL_ARB_compute_shader + GL_ARB_shader_image_load_store).\n");
-                       exit(1);
+                       abort();
                 }
         }
  
@@ -500,10 +506,14 @@ Mixer::Mixer(const QSurfaceFormat &format, unsigned num_cards)
         }
  
         output_jitter_history.register_metrics({{ "card", "output" }});
+
+       ImageInput::start_update_thread(image_update_surface);
  }
  
  Mixer::~Mixer()
  {
+       ImageInput::end_update_thread();
+
         if (mjpeg_encoder != nullptr) {
                 mjpeg_encoder->stop();
         }
@@ -551,7 +561,7 @@ void Mixer::configure_card(unsigned card_index, CaptureInterface *capture, CardT
  
         card->capture->set_frame_callback(bind(&Mixer::bm_frame, this, card_index, _1, _2, _3, _4, _5, _6, _7));
         if (card->frame_allocator == nullptr) {
-               card->frame_allocator.reset(new PBOFrameAllocator(pixel_format, 8 << 20, global_flags.width, global_flags.height));  // 8 MB.
+               card->frame_allocator.reset(new PBOFrameAllocator(pixel_format, 8 << 20, global_flags.width, global_flags.height, card_index, mjpeg_encoder.get()));  // 8 MB.
         }
         card->capture->set_video_frame_allocator(card->frame_allocator.get());
         if (card->surface == nullptr) {
@@ -785,6 +795,34 @@ void Mixer::bm_frame(unsigned card_index, uint16_t timecode,
  
         if (num_samples > 0) {
                 audio_mixer->add_audio(device, audio_frame.data + audio_offset, num_samples, audio_format, audio_frame.received_timestamp);
+
+               // Audio for the MJPEG stream. We don't resample; audio that's not in 48 kHz
+               // just gets dropped for now.
+               //
+               // Only bother doing MJPEG encoding if there are any connected clients
+               // that want the stream.
+               if (httpd.get_num_connected_multicam_clients() > 0) {
+                       vector<int32_t> converted_samples = convert_audio_to_fixed32(audio_frame.data + audio_offset, num_samples, audio_format, 2);
+                       lock_guard<mutex> lock(card_mutex);
+                       if (card->new_raw_audio.empty()) {
+                               card->new_raw_audio = move(converted_samples);
+                       } else {
+                               // For raw audio, we don't really synchronize audio and video;
+                               // we just put the audio in frame by frame, and if a video frame is
+                               // dropped, we still keep the audio, which means it will be added
+                               // to the beginning of the next frame. It would probably be better
+                               // to move the audio pts earlier to show this, but most players can
+                               // live with some jitter, and in a lot of ways, it's much nicer for
+                               // Futatabi to have all audio locked to a video frame.
+                               card->new_raw_audio.insert(card->new_raw_audio.end(), converted_samples.begin(), converted_samples.end());
+
+                               // Truncate to one second, just to be sure we don't have infinite buildup in case of weirdness.
+                               if (card->new_raw_audio.size() > OUTPUT_FREQUENCY * 2) {
+                                       size_t excess_samples = card->new_raw_audio.size() - OUTPUT_FREQUENCY * 2;
+                                       card->new_raw_audio.erase(card->new_raw_audio.begin(), card->new_raw_audio.begin() + excess_samples);
+                               }
+                       }
+               }
         }
  
         // Done with the audio, so release it.
@@ -824,7 +862,7 @@ void Mixer::bm_frame(unsigned card_index, uint16_t timecode,
         if (video_frame.len - video_offset == 0 ||
             video_frame.len - video_offset != expected_length) {
                 if (video_frame.len != 0) {
-                       printf("%s: Dropping video frame with wrong length (%ld; expected %ld)\n",
+                       printf("%s: Dropping video frame with wrong length (%zu; expected %zu)\n",
                                 spec_to_string(device).c_str(), video_frame.len - video_offset, expected_length);
                 }
                 if (video_frame.owner) {
@@ -971,6 +1009,10 @@ void Mixer::bm_frame(unsigned card_index, uint16_t timecode,
                         new_frame.video_format = video_format;
                         new_frame.y_offset = y_offset;
                         new_frame.cbcr_offset = cbcr_offset;
+                       if (card->type == CardType::FFMPEG_INPUT) {
+                               FFmpegCapture *ffmpeg_capture = static_cast<FFmpegCapture *>(card->capture.get());
+                               new_frame.neutral_color = ffmpeg_capture->get_last_neutral_color();
+                       }
                         card->new_frames.push_back(move(new_frame));
                         card->jitter_history.frame_arrived(video_frame.received_timestamp, frame_length, dropped_frames);
                         card->may_have_dropped_last_frame = false;
@@ -998,7 +1040,7 @@ void Mixer::thread_func()
         QOpenGLContext *context = create_context(mixer_surface);
         if (!make_current(context, mixer_surface)) {
                 printf("oops\n");
-               exit(1);
+               abort();
         }
  
         // Start the actual capture. (We don't want to do it before we're actually ready
@@ -1038,7 +1080,8 @@ void Mixer::thread_func()
                         assert(master_card_index < num_cards + num_video_inputs);
                 }
  
-               OutputFrameInfo output_frame_info = get_one_frame_from_each_card(master_card_index, master_card_is_output, new_frames, has_new_frame);
+               vector<int32_t> raw_audio[MAX_VIDEO_CARDS];  // For MJPEG encoding.
+               OutputFrameInfo output_frame_info = get_one_frame_from_each_card(master_card_index, master_card_is_output, new_frames, has_new_frame, raw_audio);
                 schedule_audio_resampling_tasks(output_frame_info.dropped_frames, output_frame_info.num_samples, output_frame_info.frame_duration, output_frame_info.is_preroll, output_frame_info.frame_timestamp);
                 stats_dropped_frames += output_frame_info.dropped_frames;
  
@@ -1081,14 +1124,27 @@ void Mixer::thread_func()
                                 new_frame->upload_func = nullptr;
                         }
  
-                       // Only bother doing MJPEG encoding if there are any connected clients
-                       // that want the stream. FIXME: We should also stop memcpy-ing if there are none!
-                       if (httpd.get_num_connected_multicam_clients() > 0) {
-                               auto stream_it = global_flags.card_to_mjpeg_stream_export.find(card_index);
-                               if (stream_it != global_flags.card_to_mjpeg_stream_export.end()) {
-                                       mjpeg_encoder->upload_frame(pts_int, stream_it->second, new_frame->frame, new_frame->video_format, new_frame->y_offset, new_frame->cbcr_offset);
+                       // Only set the white balance if it actually changed. This means that the user
+                       // is free to override the white balance in a video with no white balance information
+                       // actually set (ie. r=g=b=1 all the time), or one where the white point is wrong,
+                       // but frame-to-frame decisions will be heeded. We do this pretty much as late
+                       // as possible (ie., after picking out the frame from the buffer), so that we are sure
+                       // that the change takes effect on exactly the right frame.
+                       if (fabs(new_frame->neutral_color.r - last_received_neutral_color[card_index].r) > 1e-3 ||
+                           fabs(new_frame->neutral_color.g - last_received_neutral_color[card_index].g) > 1e-3 ||
+                           fabs(new_frame->neutral_color.b - last_received_neutral_color[card_index].b) > 1e-3) {
+                               theme->set_wb_for_signal(card_index, new_frame->neutral_color.r, new_frame->neutral_color.g, new_frame->neutral_color.b);
+                               last_received_neutral_color[card_index] = new_frame->neutral_color;
+                       }
+
+                       if (new_frame->frame->data_copy != nullptr) {
+                               int mjpeg_card_index = mjpeg_encoder->get_mjpeg_stream_for_card(card_index);
+                               if (mjpeg_card_index != -1) {
+                                       RGBTriplet neutral_color = theme->get_white_balance_for_signal(card_index);
+                                       mjpeg_encoder->upload_frame(pts_int, mjpeg_card_index, new_frame->frame, new_frame->video_format, new_frame->y_offset, new_frame->cbcr_offset, move(raw_audio[card_index]), neutral_color);
                                 }
                         }
+
                 }
  
                 int64_t frame_duration = output_frame_info.frame_duration;
@@ -1200,7 +1256,7 @@ pair<string, string> Mixer::get_channel_color_http(unsigned channel_idx)
         return make_pair(theme->get_channel_color(channel_idx), "text/plain");
  }
  
-Mixer::OutputFrameInfo Mixer::get_one_frame_from_each_card(unsigned master_card_index, bool master_card_is_output, CaptureCard::NewFrame new_frames[MAX_VIDEO_CARDS], bool has_new_frame[MAX_VIDEO_CARDS])
+Mixer::OutputFrameInfo Mixer::get_one_frame_from_each_card(unsigned master_card_index, bool master_card_is_output, CaptureCard::NewFrame new_frames[MAX_VIDEO_CARDS], bool has_new_frame[MAX_VIDEO_CARDS], vector<int32_t> raw_audio[MAX_VIDEO_CARDS])
  {
         OutputFrameInfo output_frame_info;
  start:
@@ -1239,7 +1295,7 @@ start:
                                 // we dropped. (may_have_dropped_last_frame is set whenever we
                                 // trim the queue completely away, and cleared when we actually
                                 // get a new frame.)
-                               ((CEFCapture *)card->capture.get())->request_new_frame();
+                               ((CEFCapture *)card->capture.get())->request_new_frame(/*ignore_if_locked=*/true);
                         }
  #endif
                 } else {
@@ -1248,6 +1304,8 @@ start:
                         card->new_frames.pop_front();
                         card->new_frames_changed.notify_all();
                 }
+
+               raw_audio[card_index] = move(card->new_raw_audio);
         }
  
         if (!master_card_is_output) {