From ee7da87b4aa284b7babd59dc21db925f7c384ce7 Mon Sep 17 00:00:00 2001 From: "Steinar H. Gunderson" Date: Wed, 8 Mar 2017 23:54:46 +0100 Subject: [PATCH] Support switching Y'CbCr coefficients midway, which will allow doing the Right Thing(TM) (BT.601 when you can for greater stream compatibility, BT.709 when you must for HDMI/SDI output) automatically. --- decklink_output.cpp | 38 +++++++++++++++++++++++++++--------- decklink_output.h | 8 +++++++- flags.cpp | 16 +++++++-------- flags.h | 3 ++- mixer.cpp | 22 +++++++++++++++++++-- mux.cpp | 10 +++++++++- quicksync_encoder.cpp | 42 ++++++++++++++++++++++++---------------- quicksync_encoder.h | 3 ++- quicksync_encoder_impl.h | 13 ++++++++----- theme.cpp | 3 +++ video_encoder.cpp | 4 ++-- video_encoder.h | 3 ++- x264_encoder.cpp | 42 +++++++++++++++++++++++++++++----------- x264_encoder.h | 7 ++++++- 14 files changed, 154 insertions(+), 60 deletions(-) diff --git a/decklink_output.cpp b/decklink_output.cpp index 4d6b1e1..8c6672b 100644 --- a/decklink_output.cpp +++ b/decklink_output.cpp @@ -107,14 +107,7 @@ void DeckLinkOutput::start_output(uint32_t mode, int64_t base_pts) exit(1); } - BMDDisplayModeFlags flags = display_mode->GetFlags(); - if ((flags & bmdDisplayModeColorspaceRec601) && global_flags.ycbcr_rec709_coefficients) { - fprintf(stderr, "WARNING: Chosen output mode expects Rec. 601 Y'CbCr coefficients.\n"); - fprintf(stderr, " Consider --output-ycbcr-coefficients=rec601 (or =auto).\n"); - } else if ((flags & bmdDisplayModeColorspaceRec709) && !global_flags.ycbcr_rec709_coefficients) { - fprintf(stderr, "WARNING: Chosen output mode expects Rec. 709 Y'CbCr coefficients.\n"); - fprintf(stderr, " Consider --output-ycbcr-coefficients=rec709 (or =auto).\n"); - } + current_mode_flags = display_mode->GetFlags(); BMDTimeValue time_value; BMDTimeScale time_scale; @@ -184,10 +177,26 @@ void DeckLinkOutput::end_output() } } -void DeckLinkOutput::send_frame(GLuint y_tex, GLuint cbcr_tex, const vector &input_frames, int64_t pts, int64_t duration) +void DeckLinkOutput::send_frame(GLuint y_tex, GLuint cbcr_tex, YCbCrLumaCoefficients output_ycbcr_coefficients, const vector &input_frames, int64_t pts, int64_t duration) { assert(!should_quit); + if ((current_mode_flags & bmdDisplayModeColorspaceRec601) && output_ycbcr_coefficients == YCBCR_REC_709) { + if (!last_frame_had_mode_mismatch) { + fprintf(stderr, "WARNING: Chosen output mode expects Rec. 601 Y'CbCr coefficients.\n"); + fprintf(stderr, " Consider --output-ycbcr-coefficients=rec601 (or =auto).\n"); + } + last_frame_had_mode_mismatch = true; + } else if ((current_mode_flags & bmdDisplayModeColorspaceRec709) && output_ycbcr_coefficients == YCBCR_REC_601) { + if (!last_frame_had_mode_mismatch) { + fprintf(stderr, "WARNING: Chosen output mode expects Rec. 709 Y'CbCr coefficients.\n"); + fprintf(stderr, " Consider --output-ycbcr-coefficients=rec709 (or =auto).\n"); + } + last_frame_had_mode_mismatch = true; + } else { + last_frame_had_mode_mismatch = false; + } + unique_ptr frame = move(get_frame()); chroma_subsampler->create_uyvy(y_tex, cbcr_tex, width, height, frame->uyvy_tex); @@ -334,6 +343,17 @@ uint32_t DeckLinkOutput::pick_video_mode(uint32_t mode) const return best_mode; } +YCbCrLumaCoefficients DeckLinkOutput::preferred_ycbcr_coefficients() const +{ + if (current_mode_flags & bmdDisplayModeColorspaceRec601) { + return YCBCR_REC_601; + } else { + // Don't bother checking bmdDisplayModeColorspaceRec709; + // if none is set, 709 is a good default anyway. + return YCBCR_REC_709; + } +} + HRESULT DeckLinkOutput::ScheduledFrameCompleted(/* in */ IDeckLinkVideoFrame *completedFrame, /* in */ BMDOutputFrameCompletionResult result) { Frame *frame = static_cast(completedFrame); diff --git a/decklink_output.h b/decklink_output.h index d5e743a..5581c39 100644 --- a/decklink_output.h +++ b/decklink_output.h @@ -2,6 +2,7 @@ #define _DECKLINK_OUTPUT_H 1 #include +#include #include #include #include @@ -40,7 +41,7 @@ public: void start_output(uint32_t mode, int64_t base_pts); // Mode comes from get_available_video_modes(). void end_output(); - void send_frame(GLuint y_tex, GLuint cbcr_tex, const std::vector &input_frames, int64_t pts, int64_t duration); + void send_frame(GLuint y_tex, GLuint cbcr_tex, movit::YCbCrLumaCoefficients ycbcr_coefficients, const std::vector &input_frames, int64_t pts, int64_t duration); void send_audio(int64_t pts, const std::vector &samples); // NOTE: The returned timestamp is undefined for preroll. @@ -59,6 +60,9 @@ public: // If the given mode is supported, return it. If not, pick some “best” valid mode. uint32_t pick_video_mode(uint32_t mode) const; + // Desired Y'CbCr coefficients for the current mode. Undefined before start_output(). + movit::YCbCrLumaCoefficients preferred_ycbcr_coefficients() const; + // IUnknown. HRESULT STDMETHODCALLTYPE QueryInterface(REFIID iid, LPVOID *ppv) override; ULONG STDMETHODCALLTYPE AddRef() override; @@ -132,6 +136,8 @@ private: std::condition_variable frame_queues_changed; bool playback_initiated = false, playback_started = false; int64_t base_pts, frame_duration; + BMDDisplayModeFlags current_mode_flags = 0; + bool last_frame_had_mode_mismatch = false; movit::ResourcePool *resource_pool; IDeckLinkOutput *output = nullptr; diff --git a/flags.cpp b/flags.cpp index 025b6d9..88a2c45 100644 --- a/flags.cpp +++ b/flags.cpp @@ -114,8 +114,8 @@ void usage() fprintf(stderr, " --audio-queue-length-ms=MS length of audio resampling queue (default 100.0)\n"); fprintf(stderr, " --output-ycbcr-coefficients={rec601,rec709,auto}\n"); fprintf(stderr, " Y'CbCr coefficient standard of output (default auto)\n"); - fprintf(stderr, " auto is rec709 if and only if --output-card is used\n"); - fprintf(stderr, " and a HD resolution is set\n"); + fprintf(stderr, " auto is rec601, unless --output-card is used\n"); + fprintf(stderr, " and a Rec. 709 mode (typically HD modes) is in use\n"); fprintf(stderr, " --output-buffer-frames=NUM number of frames in output buffer for --output-card,\n"); fprintf(stderr, " can be fractional (default 6.0); note also\n"); fprintf(stderr, " the audio queue can't be much longer than this\n"); @@ -430,17 +430,17 @@ void parse_flags(int argc, char * const argv[]) // On the other hand, HDMI/SDI output typically requires Rec. 709 for // HD resolutions (with no way of signaling anything else), which is // a conflicting demand. In this case, we typically let the HDMI/SDI - // output win, but the user can override this. + // output win if it is active, but the user can override this. if (output_ycbcr_coefficients == "auto") { - if (global_flags.output_card >= 0 && global_flags.width >= 1280) { - global_flags.ycbcr_rec709_coefficients = true; - } else { - global_flags.ycbcr_rec709_coefficients = false; - } + // Essentially: BT.709 if HDMI/SDI output is on, otherwise BT.601. + global_flags.ycbcr_rec709_coefficients = false; + global_flags.ycbcr_auto_coefficients = true; } else if (output_ycbcr_coefficients == "rec709") { global_flags.ycbcr_rec709_coefficients = true; + global_flags.ycbcr_auto_coefficients = false; } else if (output_ycbcr_coefficients == "rec601") { global_flags.ycbcr_rec709_coefficients = false; + global_flags.ycbcr_auto_coefficients = false; } else { fprintf(stderr, "ERROR: --output-ycbcr-coefficients must be “rec601”, “rec709” or “auto”\n"); exit(1); diff --git a/flags.h b/flags.h index 12bc3d4..78b1f1f 100644 --- a/flags.h +++ b/flags.h @@ -42,7 +42,8 @@ struct Flags { std::string midi_mapping_filename; // Empty for none. bool print_video_latency = false; double audio_queue_length_ms = 100.0; - bool ycbcr_rec709_coefficients = false; + bool ycbcr_rec709_coefficients = false; // Will be overridden by HDMI/SDI output if ycbcr_auto_coefficients == true. + bool ycbcr_auto_coefficients = true; int output_card = -1; double output_buffer_frames = 6.0; double output_slop_frames = 0.5; diff --git a/mixer.cpp b/mixer.cpp index e286370..898e7c0 100644 --- a/mixer.cpp +++ b/mixer.cpp @@ -1012,9 +1012,27 @@ void Mixer::render_one_frame(int64_t duration) theme_main_chain.setup_chain(); //theme_main_chain.chain->enable_phase_timing(true); + // If HDMI/SDI output is active and the user has requested auto mode, + // its mode overrides the existing Y'CbCr setting for the chain. + YCbCrLumaCoefficients ycbcr_output_coefficients; + if (global_flags.ycbcr_auto_coefficients && output_card_index != -1) { + ycbcr_output_coefficients = cards[output_card_index].output->preferred_ycbcr_coefficients(); + } else { + ycbcr_output_coefficients = global_flags.ycbcr_rec709_coefficients ? YCBCR_REC_709 : YCBCR_REC_601; + } + + // TODO: Reduce the duplication against theme.cpp. + YCbCrFormat output_ycbcr_format; + output_ycbcr_format.chroma_subsampling_x = 1; + output_ycbcr_format.chroma_subsampling_y = 1; + output_ycbcr_format.luma_coefficients = ycbcr_output_coefficients; + output_ycbcr_format.full_range = false; + output_ycbcr_format.num_levels = 256; + chain->change_ycbcr_output_format(output_ycbcr_format); + const int64_t av_delay = lrint(global_flags.audio_queue_length_ms * 0.001 * TIMEBASE); // Corresponds to the delay in ResamplingQueue. GLuint y_tex, cbcr_tex; - bool got_frame = video_encoder->begin_frame(pts_int + av_delay, duration, theme_main_chain.input_frames, &y_tex, &cbcr_tex); + bool got_frame = video_encoder->begin_frame(pts_int + av_delay, duration, ycbcr_output_coefficients, theme_main_chain.input_frames, &y_tex, &cbcr_tex); assert(got_frame); // Render main chain. We take an extra copy of the created outputs, @@ -1040,7 +1058,7 @@ void Mixer::render_one_frame(int64_t duration) GLuint cbcr_copy_tex = resource_pool->create_2d_texture(GL_RG8, global_flags.width / 2, global_flags.height / 2); chroma_subsampler->subsample_chroma(cbcr_full_tex, global_flags.width, global_flags.height, cbcr_tex, cbcr_copy_tex); if (output_card_index != -1) { - cards[output_card_index].output->send_frame(y_tex, cbcr_full_tex, theme_main_chain.input_frames, pts_int, duration); + cards[output_card_index].output->send_frame(y_tex, cbcr_full_tex, ycbcr_output_coefficients, theme_main_chain.input_frames, pts_int, duration); } resource_pool->release_2d_texture(cbcr_full_tex); diff --git a/mux.cpp b/mux.cpp index a7b3a7f..1fd8e30 100644 --- a/mux.cpp +++ b/mux.cpp @@ -22,6 +22,7 @@ extern "C" { } #include "defs.h" +#include "flags.h" #include "timebase.h" using namespace std; @@ -68,9 +69,16 @@ Mux::Mux(AVFormatContext *avctx, int width, int height, Codec video_codec, const // as noted in each comment. // Note that the H.264 stream also contains this information and depending on the // mux, this might simply get ignored. See sps_rbsp(). + // Note that there's no way to change this per-frame as the H.264 stream + // would like to be able to. avstream_video->codecpar->color_primaries = AVCOL_PRI_BT709; // RGB colorspace (inout_format.color_space). avstream_video->codecpar->color_trc = AVCOL_TRC_UNSPECIFIED; // Gamma curve (inout_format.gamma_curve). - avstream_video->codecpar->color_space = AVCOL_SPC_SMPTE170M; // YUV colorspace (output_ycbcr_format.luma_coefficients). + // YUV colorspace (output_ycbcr_format.luma_coefficients). + if (global_flags.ycbcr_rec709_coefficients) { + avstream_video->codecpar->color_space = AVCOL_SPC_BT709; + } else { + avstream_video->codecpar->color_space = AVCOL_SPC_SMPTE170M; + } avstream_video->codecpar->color_range = AVCOL_RANGE_MPEG; // Full vs. limited range (output_ycbcr_format.full_range). avstream_video->codecpar->chroma_location = AVCHROMA_LOC_LEFT; // Chroma sample location. See chroma_offset_0[] in Mixer::subsample_chroma(). avstream_video->codecpar->field_order = AV_FIELD_PROGRESSIVE; diff --git a/quicksync_encoder.cpp b/quicksync_encoder.cpp index 2e8633d..d49a483 100644 --- a/quicksync_encoder.cpp +++ b/quicksync_encoder.cpp @@ -1,5 +1,6 @@ #include "quicksync_encoder.h" +#include #include // Must be above the Xlib includes. #include @@ -55,6 +56,7 @@ extern "C" { #include "timebase.h" #include "x264_encoder.h" +using namespace movit; using namespace std; using namespace std::chrono; using namespace std::placeholders; @@ -259,7 +261,7 @@ static void nal_header(bitstream *bs, int nal_ref_idc, int nal_unit_type) bitstream_put_ui(bs, nal_unit_type, 5); } -void QuickSyncEncoderImpl::sps_rbsp(bitstream *bs) +void QuickSyncEncoderImpl::sps_rbsp(YCbCrLumaCoefficients ycbcr_coefficients, bitstream *bs) { int profile_idc = PROFILE_IDC_BASELINE; @@ -331,9 +333,10 @@ void QuickSyncEncoderImpl::sps_rbsp(bitstream *bs) { bitstream_put_ui(bs, 1, 8); /* colour_primaries (1 = BT.709) */ bitstream_put_ui(bs, 2, 8); /* transfer_characteristics (2 = unspecified, since we use sRGB) */ - if (global_flags.ycbcr_rec709_coefficients) { + if (ycbcr_coefficients == YCBCR_REC_709) { bitstream_put_ui(bs, 1, 8); /* matrix_coefficients (1 = BT.709) */ } else { + assert(ycbcr_coefficients == YCBCR_REC_601); bitstream_put_ui(bs, 6, 8); /* matrix_coefficients (6 = BT.601/SMPTE 170M) */ } } @@ -515,14 +518,14 @@ int QuickSyncEncoderImpl::build_packed_pic_buffer(unsigned char **header_buffer) } int -QuickSyncEncoderImpl::build_packed_seq_buffer(unsigned char **header_buffer) +QuickSyncEncoderImpl::build_packed_seq_buffer(YCbCrLumaCoefficients ycbcr_coefficients, unsigned char **header_buffer) { bitstream bs; bitstream_start(&bs); nal_start_code_prefix(&bs); nal_header(&bs, NAL_REF_IDC_HIGH, NAL_SPS); - sps_rbsp(&bs); + sps_rbsp(ycbcr_coefficients, &bs); bitstream_end(&bs); *header_buffer = (unsigned char *)bs.buffer; @@ -1220,7 +1223,7 @@ int QuickSyncEncoderImpl::render_picture(GLSurface *surf, int frame_type, int di return 0; } -int QuickSyncEncoderImpl::render_packedsequence() +int QuickSyncEncoderImpl::render_packedsequence(YCbCrLumaCoefficients ycbcr_coefficients) { VAEncPackedHeaderParameterBuffer packedheader_param_buffer; VABufferID packedseq_para_bufid, packedseq_data_bufid, render_id[2]; @@ -1228,7 +1231,7 @@ int QuickSyncEncoderImpl::render_packedsequence() unsigned char *packedseq_buffer = NULL; VAStatus va_status; - length_in_bits = build_packed_seq_buffer(&packedseq_buffer); + length_in_bits = build_packed_seq_buffer(ycbcr_coefficients, &packedseq_buffer); packedheader_param_buffer.type = VAEncPackedHeaderSequence; @@ -1526,7 +1529,7 @@ int QuickSyncEncoderImpl::deinit_va() return 0; } -QuickSyncEncoderImpl::QuickSyncEncoderImpl(const std::string &filename, movit::ResourcePool *resource_pool, QSurface *surface, const string &va_display, int width, int height, AVOutputFormat *oformat, X264Encoder *x264_encoder, DiskSpaceEstimator *disk_space_estimator) +QuickSyncEncoderImpl::QuickSyncEncoderImpl(const std::string &filename, ResourcePool *resource_pool, QSurface *surface, const string &va_display, int width, int height, AVOutputFormat *oformat, X264Encoder *x264_encoder, DiskSpaceEstimator *disk_space_estimator) : current_storage_frame(0), resource_pool(resource_pool), surface(surface), x264_encoder(x264_encoder), frame_width(width), frame_height(height), disk_space_estimator(disk_space_estimator) { file_audio_encoder.reset(new AudioEncoder(AUDIO_OUTPUT_CODEC_NAME, DEFAULT_AUDIO_OUTPUT_BIT_RATE, oformat)); @@ -1595,7 +1598,7 @@ void QuickSyncEncoderImpl::release_gl_surface(size_t display_frame_num) } } -bool QuickSyncEncoderImpl::begin_frame(int64_t pts, int64_t duration, const vector &input_frames, GLuint *y_tex, GLuint *cbcr_tex) +bool QuickSyncEncoderImpl::begin_frame(int64_t pts, int64_t duration, YCbCrLumaCoefficients ycbcr_coefficients, const vector &input_frames, GLuint *y_tex, GLuint *cbcr_tex) { assert(!is_shutdown); GLSurface *surf = nullptr; @@ -1669,7 +1672,7 @@ bool QuickSyncEncoderImpl::begin_frame(int64_t pts, int64_t duration, const vect glEGLImageTargetTexture2DOES(GL_TEXTURE_2D, surf->cbcr_egl_image); } - current_video_frame = PendingFrame{ {}, input_frames, pts, duration }; + current_video_frame = PendingFrame{ {}, input_frames, pts, duration, ycbcr_coefficients }; return true; } @@ -1850,7 +1853,7 @@ void QuickSyncEncoderImpl::encode_thread_func() } last_dts = dts; - encode_frame(frame, quicksync_encoding_frame_num, quicksync_display_frame_num, gop_start_display_frame_num, frame_type, frame.pts, dts, frame.duration); + encode_frame(frame, quicksync_encoding_frame_num, quicksync_display_frame_num, gop_start_display_frame_num, frame_type, frame.pts, dts, frame.duration, frame.ycbcr_coefficients); ++quicksync_encoding_frame_num; } } @@ -1868,7 +1871,7 @@ void QuickSyncEncoderImpl::encode_remaining_frames_as_p(int encoding_frame_num, PendingFrame frame = move(pending_frame.second); int64_t dts = last_dts + (TIMEBASE / MAX_FPS); printf("Finalizing encode: Encoding leftover frame %d as P-frame instead of B-frame.\n", display_frame_num); - encode_frame(frame, encoding_frame_num++, display_frame_num, gop_start_display_frame_num, FRAME_P, frame.pts, dts, frame.duration); + encode_frame(frame, encoding_frame_num++, display_frame_num, gop_start_display_frame_num, FRAME_P, frame.pts, dts, frame.duration, frame.ycbcr_coefficients); last_dts = dts; } } @@ -1931,12 +1934,12 @@ void QuickSyncEncoderImpl::pass_frame(QuickSyncEncoderImpl::PendingFrame frame, if (global_flags.uncompressed_video_to_http) { add_packet_for_uncompressed_frame(pts, duration, data); } else if (global_flags.x264_video_to_http) { - x264_encoder->add_frame(pts, duration, data, received_ts); + x264_encoder->add_frame(pts, duration, frame.ycbcr_coefficients, data, received_ts); } } void QuickSyncEncoderImpl::encode_frame(QuickSyncEncoderImpl::PendingFrame frame, int encoding_frame_num, int display_frame_num, int gop_start_display_frame_num, - int frame_type, int64_t pts, int64_t dts, int64_t duration) + int frame_type, int64_t pts, int64_t dts, int64_t duration, YCbCrLumaCoefficients ycbcr_coefficients) { const ReceivedTimestamps received_ts = find_received_timestamp(frame.input_frames); @@ -1980,10 +1983,14 @@ void QuickSyncEncoderImpl::encode_frame(QuickSyncEncoderImpl::PendingFrame frame // FIXME: If the mux wants global headers, we should not put the // SPS/PPS before each IDR frame, but rather put it into the // codec extradata (formatted differently?). + // + // NOTE: If we change ycbcr_coefficients, it will not take effect + // before the next IDR frame. This is acceptable, as it should only + // happen on a mode change, which is rare. render_sequence(); render_picture(surf, frame_type, display_frame_num, gop_start_display_frame_num); if (h264_packedheader) { - render_packedsequence(); + render_packedsequence(ycbcr_coefficients); render_packedpicture(); } } else { @@ -2018,13 +2025,14 @@ void QuickSyncEncoderImpl::encode_frame(QuickSyncEncoderImpl::PendingFrame frame tmp.pts = pts; tmp.dts = dts; tmp.duration = duration; + tmp.ycbcr_coefficients = ycbcr_coefficients; tmp.received_ts = received_ts; tmp.ref_display_frame_numbers = move(ref_display_frame_numbers); storage_task_enqueue(move(tmp)); } // Proxy object. -QuickSyncEncoder::QuickSyncEncoder(const std::string &filename, movit::ResourcePool *resource_pool, QSurface *surface, const string &va_display, int width, int height, AVOutputFormat *oformat, X264Encoder *x264_encoder, DiskSpaceEstimator *disk_space_estimator) +QuickSyncEncoder::QuickSyncEncoder(const std::string &filename, ResourcePool *resource_pool, QSurface *surface, const string &va_display, int width, int height, AVOutputFormat *oformat, X264Encoder *x264_encoder, DiskSpaceEstimator *disk_space_estimator) : impl(new QuickSyncEncoderImpl(filename, resource_pool, surface, va_display, width, height, oformat, x264_encoder, disk_space_estimator)) {} // Must be defined here because unique_ptr<> destructor needs to know the impl. @@ -2035,9 +2043,9 @@ void QuickSyncEncoder::add_audio(int64_t pts, vector audio) impl->add_audio(pts, audio); } -bool QuickSyncEncoder::begin_frame(int64_t pts, int64_t duration, const vector &input_frames, GLuint *y_tex, GLuint *cbcr_tex) +bool QuickSyncEncoder::begin_frame(int64_t pts, int64_t duration, YCbCrLumaCoefficients ycbcr_coefficients, const vector &input_frames, GLuint *y_tex, GLuint *cbcr_tex) { - return impl->begin_frame(pts, duration, input_frames, y_tex, cbcr_tex); + return impl->begin_frame(pts, duration, ycbcr_coefficients, input_frames, y_tex, cbcr_tex); } RefCountedGLsync QuickSyncEncoder::end_frame() diff --git a/quicksync_encoder.h b/quicksync_encoder.h index caa6586..a247ee8 100644 --- a/quicksync_encoder.h +++ b/quicksync_encoder.h @@ -27,6 +27,7 @@ #define _H264ENCODE_H #include +#include #include #include #include @@ -60,7 +61,7 @@ public: void set_stream_mux(Mux *mux); // Does not take ownership. Must be called unless x264 is used for the stream. void add_audio(int64_t pts, std::vector audio); - bool begin_frame(int64_t pts, int64_t duration, const std::vector &input_frames, GLuint *y_tex, GLuint *cbcr_tex); + bool begin_frame(int64_t pts, int64_t duration, movit::YCbCrLumaCoefficients ycbcr_coefficients, const std::vector &input_frames, GLuint *y_tex, GLuint *cbcr_tex); RefCountedGLsync end_frame(); void shutdown(); // Blocking. Does not require an OpenGL context. void release_gl_resources(); // Requires an OpenGL context. Must be run after shutdown. diff --git a/quicksync_encoder_impl.h b/quicksync_encoder_impl.h index b55edbb..679f2a2 100644 --- a/quicksync_encoder_impl.h +++ b/quicksync_encoder_impl.h @@ -2,6 +2,7 @@ #define _QUICKSYNC_ENCODER_IMPL_H 1 #include +#include #include #include @@ -35,7 +36,7 @@ public: QuickSyncEncoderImpl(const std::string &filename, movit::ResourcePool *resource_pool, QSurface *surface, const std::string &va_display, int width, int height, AVOutputFormat *oformat, X264Encoder *x264_encoder, DiskSpaceEstimator *disk_space_estimator); ~QuickSyncEncoderImpl(); void add_audio(int64_t pts, std::vector audio); - bool begin_frame(int64_t pts, int64_t duration, const std::vector &input_frames, GLuint *y_tex, GLuint *cbcr_tex); + bool begin_frame(int64_t pts, int64_t duration, movit::YCbCrLumaCoefficients ycbcr_coefficients, const std::vector &input_frames, GLuint *y_tex, GLuint *cbcr_tex); RefCountedGLsync end_frame(); void shutdown(); void release_gl_resources(); @@ -55,6 +56,7 @@ private: int frame_type; std::vector audio; int64_t pts, dts, duration; + movit::YCbCrLumaCoefficients ycbcr_coefficients; ReceivedTimestamps received_ts; std::vector ref_display_frame_numbers; }; @@ -62,6 +64,7 @@ private: RefCountedGLsync fence; std::vector input_frames; int64_t pts, duration; + movit::YCbCrLumaCoefficients ycbcr_coefficients; }; struct GLSurface { VASurfaceID src_surface, ref_surface; @@ -99,21 +102,21 @@ private: void add_packet_for_uncompressed_frame(int64_t pts, int64_t duration, const uint8_t *data); void pass_frame(PendingFrame frame, int display_frame_num, int64_t pts, int64_t duration); void encode_frame(PendingFrame frame, int encoding_frame_num, int display_frame_num, int gop_start_display_frame_num, - int frame_type, int64_t pts, int64_t dts, int64_t duration); + int frame_type, int64_t pts, int64_t dts, int64_t duration, movit::YCbCrLumaCoefficients ycbcr_coefficients); void storage_task_thread(); void storage_task_enqueue(storage_task task); void save_codeddata(GLSurface *surf, storage_task task); - int render_packedsequence(); + int render_packedsequence(movit::YCbCrLumaCoefficients ycbcr_coefficients); int render_packedpicture(); void render_packedslice(); int render_sequence(); int render_picture(GLSurface *surf, int frame_type, int display_frame_num, int gop_start_display_frame_num); - void sps_rbsp(bitstream *bs); + void sps_rbsp(movit::YCbCrLumaCoefficients ycbcr_coefficients, bitstream *bs); void pps_rbsp(bitstream *bs); int build_packed_pic_buffer(unsigned char **header_buffer); int render_slice(int encoding_frame_num, int display_frame_num, int gop_start_display_frame_num, int frame_type); void slice_header(bitstream *bs); - int build_packed_seq_buffer(unsigned char **header_buffer); + int build_packed_seq_buffer(movit::YCbCrLumaCoefficients ycbcr_coefficients, unsigned char **header_buffer); int build_packed_slice_buffer(unsigned char **header_buffer); int init_va(const std::string &va_display); int deinit_va(); diff --git a/theme.cpp b/theme.cpp index c5f335e..7bb1877 100644 --- a/theme.cpp +++ b/theme.cpp @@ -265,11 +265,14 @@ int EffectChain_finalize(lua_State* L) // happens in a pass not run by Movit (see ChromaSubsampler::subsample_chroma()). output_ycbcr_format.chroma_subsampling_x = 1; output_ycbcr_format.chroma_subsampling_y = 1; + + // This will be overridden if HDMI/SDI output is in force. if (global_flags.ycbcr_rec709_coefficients) { output_ycbcr_format.luma_coefficients = YCBCR_REC_709; } else { output_ycbcr_format.luma_coefficients = YCBCR_REC_601; } + output_ycbcr_format.full_range = false; output_ycbcr_format.num_levels = 256; diff --git a/video_encoder.cpp b/video_encoder.cpp index e00465c..fe0ecb1 100644 --- a/video_encoder.cpp +++ b/video_encoder.cpp @@ -120,11 +120,11 @@ void VideoEncoder::add_audio(int64_t pts, std::vector audio) stream_audio_encoder->encode_audio(audio, pts + quicksync_encoder->global_delay()); } -bool VideoEncoder::begin_frame(int64_t pts, int64_t duration, const std::vector &input_frames, GLuint *y_tex, GLuint *cbcr_tex) +bool VideoEncoder::begin_frame(int64_t pts, int64_t duration, movit::YCbCrLumaCoefficients ycbcr_coefficients, const std::vector &input_frames, GLuint *y_tex, GLuint *cbcr_tex) { lock_guard lock(qs_mu); qs_needing_cleanup.clear(); // Since we have an OpenGL context here, and are called regularly. - return quicksync_encoder->begin_frame(pts, duration, input_frames, y_tex, cbcr_tex); + return quicksync_encoder->begin_frame(pts, duration, ycbcr_coefficients, input_frames, y_tex, cbcr_tex); } RefCountedGLsync VideoEncoder::end_frame() diff --git a/video_encoder.h b/video_encoder.h index 368037d..8578462 100644 --- a/video_encoder.h +++ b/video_encoder.h @@ -6,6 +6,7 @@ #define _VIDEO_ENCODER_H #include +#include #include #include #include @@ -44,7 +45,7 @@ public: // Allocate a frame to render into. The returned two textures // are yours to render into (build them into an FBO). // Call end_frame() when you're done. - bool begin_frame(int64_t pts, int64_t duration, const std::vector &input_frames, GLuint *y_tex, GLuint *cbcr_tex); + bool begin_frame(int64_t pts, int64_t duration, movit::YCbCrLumaCoefficients ycbcr_coefficients, const std::vector &input_frames, GLuint *y_tex, GLuint *cbcr_tex); // Call after you are done rendering into the frame; at this point, // y_tex and cbcr_tex will be assumed done, and handed over to the diff --git a/x264_encoder.cpp b/x264_encoder.cpp index f9b5624..7d81d55 100644 --- a/x264_encoder.cpp +++ b/x264_encoder.cpp @@ -20,6 +20,7 @@ extern "C" { #include } +using namespace movit; using namespace std; using namespace std::chrono; @@ -58,11 +59,12 @@ X264Encoder::~X264Encoder() encoder_thread.join(); } -void X264Encoder::add_frame(int64_t pts, int64_t duration, const uint8_t *data, const ReceivedTimestamps &received_ts) +void X264Encoder::add_frame(int64_t pts, int64_t duration, YCbCrLumaCoefficients ycbcr_coefficients, const uint8_t *data, const ReceivedTimestamps &received_ts) { QueuedFrame qf; qf.pts = pts; qf.duration = duration; + qf.ycbcr_coefficients = ycbcr_coefficients; qf.received_ts = received_ts; { @@ -265,18 +267,36 @@ void X264Encoder::encode_frame(X264Encoder::QueuedFrame qf) // See if we have a new bitrate to change to. unsigned new_rate = new_bitrate_kbit.exchange(0); // Read and clear. if (new_rate != 0) { - if (speed_control) { - speed_control->set_config_override_function([new_rate](x264_param_t *param) { - param->rc.i_bitrate = new_rate; - update_vbv_settings(param); - }); + bitrate_override_func = [new_rate](x264_param_t *param) { + param->rc.i_bitrate = new_rate; + update_vbv_settings(param); + }; + } + + auto ycbcr_coefficients_override_func = [qf](x264_param_t *param) { + if (qf.ycbcr_coefficients == YCBCR_REC_709) { + param->vui.i_colmatrix = 1; // BT.709. } else { - x264_param_t param; - x264_encoder_parameters(x264, ¶m); - param.rc.i_bitrate = new_rate; - update_vbv_settings(¶m); - x264_encoder_reconfig(x264, ¶m); + assert(qf.ycbcr_coefficients == YCBCR_REC_601); + param->vui.i_colmatrix = 6; // BT.601/SMPTE 170M. + } + }; + + if (speed_control) { + speed_control->set_config_override_function([this, ycbcr_coefficients_override_func](x264_param_t *param) { + if (bitrate_override_func) { + bitrate_override_func(param); + } + ycbcr_coefficients_override_func(param); + }); + } else { + x264_param_t param; + x264_encoder_parameters(x264, ¶m); + if (bitrate_override_func) { + bitrate_override_func(¶m); } + ycbcr_coefficients_override_func(¶m); + x264_encoder_reconfig(x264, ¶m); } if (speed_control) { diff --git a/x264_encoder.h b/x264_encoder.h index 8adb42a..2e64e66 100644 --- a/x264_encoder.h +++ b/x264_encoder.h @@ -33,6 +33,8 @@ extern "C" { #include } +#include + #include "print_latency.h" class Mux; @@ -51,7 +53,7 @@ public: // is taken to be raw NV12 data of WIDTHxHEIGHT resolution. // Does not block. - void add_frame(int64_t pts, int64_t duration, const uint8_t *data, const ReceivedTimestamps &received_ts); + void add_frame(int64_t pts, int64_t duration, movit::YCbCrLumaCoefficients ycbcr_coefficients, const uint8_t *data, const ReceivedTimestamps &received_ts); std::string get_global_headers() const { while (!x264_init_done) { @@ -67,6 +69,7 @@ public: private: struct QueuedFrame { int64_t pts, duration; + movit::YCbCrLumaCoefficients ycbcr_coefficients; uint8_t *data; ReceivedTimestamps received_ts; }; @@ -91,6 +94,8 @@ private: x264_t *x264; std::unique_ptr speed_control; + std::function bitrate_override_func; + std::atomic new_bitrate_kbit{0}; // 0 for no change. // Protects everything below it. -- 2.39.2