X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=nageru%2Fav1_encoder.cpp;fp=nageru%2Fav1_encoder.cpp;h=a0dbdd1a8e17a4e3297a42fa32c67af2bf312ec9;hb=8bb8bb7cc9700befab35a8cc2c4b7a88f0638af9;hp=0000000000000000000000000000000000000000;hpb=9fa1937b8af43ec93e6fd4ea3a23519257b3274d;p=nageru diff --git a/nageru/av1_encoder.cpp b/nageru/av1_encoder.cpp new file mode 100644 index 0000000..a0dbdd1 --- /dev/null +++ b/nageru/av1_encoder.cpp @@ -0,0 +1,375 @@ +#include "av1_encoder.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "defs.h" +#include "flags.h" +#include "shared/metrics.h" +#include "shared/mux.h" +#include "print_latency.h" +#include "shared/timebase.h" +#include "shared/memcpy_interleaved.h" + +extern "C" { +#include +#include +} + +using namespace movit; +using namespace std; +using namespace std::chrono; +using namespace std::placeholders; + +namespace { + +// AV1Encoder can be restarted if --record-av1-video is set, so make these +// metrics global. +atomic metric_av1_queued_frames{0}; +atomic metric_av1_max_queued_frames{AV1_QUEUE_LENGTH}; +atomic metric_av1_dropped_frames{0}; +atomic metric_av1_output_frames_i{0}; +atomic metric_av1_output_frames_p{0}; +Histogram metric_av1_qp; +LatencyHistogram av1_latency_histogram; + +once_flag av1_metrics_inited; + +} // namespace + +AV1Encoder::AV1Encoder(const AVOutputFormat *oformat) + : wants_global_headers(oformat->flags & AVFMT_GLOBALHEADER) +{ + call_once(av1_metrics_inited, []{ + global_metrics.add("av1_queued_frames", {}, &metric_av1_queued_frames, Metrics::TYPE_GAUGE); + global_metrics.add("av1_max_queued_frames", {}, &metric_av1_max_queued_frames, Metrics::TYPE_GAUGE); + global_metrics.add("av1_dropped_frames", {}, &metric_av1_dropped_frames); + global_metrics.add("av1_output_frames", {{ "type", "i" }}, &metric_av1_output_frames_i); + global_metrics.add("av1_output_frames", {{ "type", "p" }}, &metric_av1_output_frames_p); + + metric_av1_qp.init_uniform(50); + global_metrics.add("av1_qp", {}, &metric_av1_qp); + av1_latency_histogram.init("av1"); + }); + + const size_t bytes_per_pixel = 1; // TODO: 10-bit support. + frame_pool.reset(new uint8_t[global_flags.width * global_flags.height * 2 * bytes_per_pixel * AV1_QUEUE_LENGTH]); + for (unsigned i = 0; i < AV1_QUEUE_LENGTH; ++i) { + free_frames.push(frame_pool.get() + i * (global_flags.width * global_flags.height * 2 * bytes_per_pixel)); + } + encoder_thread = thread(&AV1Encoder::encoder_thread_func, this); +} + +AV1Encoder::~AV1Encoder() +{ + should_quit = true; + queued_frames_nonempty.notify_all(); + encoder_thread.join(); +} + +void AV1Encoder::add_frame(int64_t pts, int64_t duration, YCbCrLumaCoefficients ycbcr_coefficients, const uint8_t *data, const ReceivedTimestamps &received_ts) +{ + assert(!should_quit); + + QueuedFrame qf; + qf.pts = pts; + qf.duration = duration; + qf.ycbcr_coefficients = ycbcr_coefficients; + qf.received_ts = received_ts; + + { + lock_guard lock(mu); + if (free_frames.empty()) { + fprintf(stderr, "WARNING: AV1 queue full, dropping frame with pts %" PRId64 "\n", pts); + ++metric_av1_dropped_frames; + return; + } + + qf.data = free_frames.front(); + free_frames.pop(); + } + + // Since we're copying anyway, we can unpack from NV12 to fully planar on the fly. + // SVT-AV1 makes its own copy, though, and it would have been nice to avoid the + // double-copy. + size_t bytes_per_pixel = 1; // TODO: 10-bit support. + size_t frame_size = global_flags.width * global_flags.height * bytes_per_pixel; + assert(global_flags.width % 2 == 0); + assert(global_flags.height % 2 == 0); + uint8_t *y = qf.data; + uint8_t *cb = y + frame_size; + uint8_t *cr = cb + frame_size / 4; + memcpy(y, data, frame_size); + memcpy_interleaved(cb, cr, data + frame_size, frame_size / 2); + + { + lock_guard lock(mu); + queued_frames.push(qf); + queued_frames_nonempty.notify_all(); + metric_av1_queued_frames = queued_frames.size(); + } +} + +void AV1Encoder::init_av1() +{ + EbSvtAv1EncConfiguration config; + EbErrorType ret = svt_av1_enc_init_handle(&encoder, nullptr, &config); + if (ret != EB_ErrorNone) { + fprintf(stderr, "Error initializing SVT-AV1 handle (error %08x)\n", ret); + exit(EXIT_FAILURE); + } + + config.enc_mode = global_flags.av1_preset; + config.intra_period_length = 63; // Approx. one second, conforms to the (n % 8) - 1 == 0 rule. + config.source_width = global_flags.width; + config.source_height = global_flags.height; + config.frame_rate_numerator = global_flags.av1_fps_num; + config.frame_rate_denominator = global_flags.av1_fps_den; + config.encoder_bit_depth = 8; // TODO: 10-bit support. + config.rate_control_mode = 2; // CBR. + config.pred_structure = 1; // PRED_LOW_DELAY_B (needed for CBR). + config.target_bit_rate = global_flags.av1_bitrate * 1000; + + // NOTE: These should be in sync with the ones in quicksync_encoder.cpp (sps_rbsp()). + config.color_primaries = EB_CICP_CP_BT_709; + config.transfer_characteristics = EB_CICP_TC_SRGB; + if (global_flags.ycbcr_rec709_coefficients) { + config.matrix_coefficients = EB_CICP_MC_BT_709; + } else { + config.matrix_coefficients = EB_CICP_MC_BT_601; + } + config.color_range = EB_CR_STUDIO_RANGE; +#if SVT_AV1_CHECK_VERSION(1, 0, 0) + config.chroma_sample_position = EB_CSP_VERTICAL; +#endif + + const vector &extra_param = global_flags.av1_extra_param; + for (const string &str : extra_param) { + const size_t pos = str.find(','); + if (pos == string::npos) { + if (svt_av1_enc_parse_parameter(&config, str.c_str(), nullptr) != EB_ErrorNone) { + fprintf(stderr, "ERROR: SVT-AV1 rejected parameter '%s' with no value\n", str.c_str()); + exit(EXIT_FAILURE); + } + } else { + const string key = str.substr(0, pos); + const string value = str.substr(pos + 1); + if (svt_av1_enc_parse_parameter(&config, key.c_str(), value.c_str()) != EB_ErrorNone) { + fprintf(stderr, "ERROR: SVT-AV1 rejected parameter '%s' set to '%s'\n", + key.c_str(), value.c_str()); + exit(EXIT_FAILURE); + } + } + } + + ret = svt_av1_enc_set_parameter(encoder, &config); + if (ret != EB_ErrorNone) { + fprintf(stderr, "Error configuring SVT-AV1 (error %08x)\n", ret); + exit(EXIT_FAILURE); + } + + ret = svt_av1_enc_init(encoder); + if (ret != EB_ErrorNone) { + fprintf(stderr, "Error initializing SVT-AV1 (error %08x)\n", ret); + exit(EXIT_FAILURE); + } + + if (wants_global_headers) { + EbBufferHeaderType *header = NULL; + + ret = svt_av1_enc_stream_header(encoder, &header); + if (ret != EB_ErrorNone) { + fprintf(stderr, "Error building SVT-AV1 header (error %08x)\n", ret); + exit(EXIT_FAILURE); + } + + global_headers = string(reinterpret_cast(header->p_buffer), header->n_filled_len); + + svt_av1_enc_stream_header_release(header); // Don't care about errors. + } +} + +void AV1Encoder::encoder_thread_func() +{ + if (nice(5) == -1) { + perror("nice()"); + // No exit; it's not fatal. + } + pthread_setname_np(pthread_self(), "AV1_encode"); + init_av1(); + av1_init_done = true; + + bool frames_left; + + do { + QueuedFrame qf; + + // Wait for a queued frame, then dequeue it. + { + unique_lock lock(mu); + queued_frames_nonempty.wait(lock, [this]() { return !queued_frames.empty() || should_quit; }); + if (!queued_frames.empty()) { + qf = queued_frames.front(); + queued_frames.pop(); + } else { + qf.pts = -1; + qf.duration = -1; + qf.data = nullptr; + } + + metric_av1_queued_frames = queued_frames.size(); + frames_left = !queued_frames.empty(); + } + + encode_frame(qf); + + { + lock_guard lock(mu); + free_frames.push(qf.data); + } + + // We should quit only if the should_quit flag is set _and_ we have nothing + // in our queue. + } while (!should_quit || frames_left); + + // Signal end of stream. + EbBufferHeaderType hdr; + hdr.n_alloc_len = 0; + hdr.n_filled_len = 0; + hdr.n_tick_count = 0; + hdr.p_app_private = nullptr; + hdr.pic_type = EB_AV1_INVALID_PICTURE; + hdr.p_buffer = nullptr; + hdr.metadata = nullptr; + hdr.flags = EB_BUFFERFLAG_EOS; + svt_av1_enc_send_picture(encoder, &hdr); + + bool seen_eof = false; + do { + EbBufferHeaderType *buf; + EbErrorType ret = svt_av1_enc_get_packet(encoder, &buf, /*pic_send_done=*/true); + if (ret == EB_NoErrorEmptyQueue) { + assert(false); + } + seen_eof = (buf->flags & EB_BUFFERFLAG_EOS); + process_packet(buf); + } while (!seen_eof); + + svt_av1_enc_deinit(encoder); + svt_av1_enc_deinit_handle(encoder); +} + +void AV1Encoder::encode_frame(AV1Encoder::QueuedFrame qf) +{ + if (qf.data) { + EbSvtIOFormat pic; + pic.luma = qf.data; + pic.cb = pic.luma + global_flags.width * global_flags.height; + pic.cr = pic.cb + global_flags.width * global_flags.height / 4; + pic.y_stride = global_flags.width; + pic.cb_stride = global_flags.width / 2; + pic.cr_stride = global_flags.width / 2; + pic.width = global_flags.width; + pic.height = global_flags.height; + pic.origin_x = 0; + pic.origin_y = 0; + pic.color_fmt = EB_YUV420; + pic.bit_depth = EB_EIGHT_BIT; // TODO: 10-bit. + + EbBufferHeaderType hdr; + hdr.p_buffer = reinterpret_cast(&pic); + hdr.n_alloc_len = global_flags.width * global_flags.height * 3 / 2; // TODO: 10-bit. + hdr.n_filled_len = hdr.n_alloc_len; + hdr.n_tick_count = 0; + hdr.p_app_private = reinterpret_cast(intptr_t(qf.duration)); + hdr.pic_type = EB_AV1_INVALID_PICTURE; // Actually means auto, according to FFmpeg. + hdr.metadata = nullptr; + hdr.flags = 0; + hdr.pts = av_rescale_q(qf.pts, AVRational{ 1, TIMEBASE }, AVRational{ global_flags.av1_fps_den, global_flags.av1_fps_num }); + if (hdr.pts <= last_pts) { + fprintf(stderr, "WARNING: Receiving frames faster than given --av1-fps value (%d/%d); dropping frame.\n", + global_flags.av1_fps_num, global_flags.av1_fps_den); + } else { + svt_av1_enc_send_picture(encoder, &hdr); + frames_being_encoded[hdr.pts] = qf.received_ts; + last_pts = hdr.pts; + } + } + + for ( ;; ) { + EbBufferHeaderType *buf; + EbErrorType ret = svt_av1_enc_get_packet(encoder, &buf, /*pic_send_done=*/false); + if (ret == EB_NoErrorEmptyQueue) { + return; + } + process_packet(buf); + } +} + +void AV1Encoder::process_packet(EbBufferHeaderType *buf) +{ + if (buf->n_filled_len == 0) { + // TODO: Can this ever happen? + svt_av1_enc_release_out_buffer(&buf); + return; + } + + switch (buf->pic_type) { + case EB_AV1_KEY_PICTURE: + case EB_AV1_INTRA_ONLY_PICTURE: + ++metric_av1_output_frames_i; + break; + case EB_AV1_INTER_PICTURE: // We don't really know whether it's P or B. + ++metric_av1_output_frames_p; + break; + default: + break; + } + metric_av1_qp.count_event(buf->qp); + + if (frames_being_encoded.count(buf->pts)) { + ReceivedTimestamps received_ts = frames_being_encoded[buf->pts]; + frames_being_encoded.erase(buf->pts); + + static int frameno = 0; + print_latency("Current AV1 latency (video inputs → network mux):", + received_ts, /*b_frame=*/false, &frameno, &av1_latency_histogram); + } else { + assert(false); + } + + AVPacket pkt; + memset(&pkt, 0, sizeof(pkt)); + pkt.buf = nullptr; + pkt.data = buf->p_buffer; + pkt.size = buf->n_filled_len; + pkt.stream_index = 0; + if (buf->pic_type == EB_AV1_KEY_PICTURE) { + pkt.flags = AV_PKT_FLAG_KEY; + } else if (buf->pic_type == EB_AV1_NON_REF_PICTURE) { + // I have no idea if this does anything in practice, + // but the libavcodec plugin does it. + pkt.flags = AV_PKT_FLAG_DISPOSABLE; + } else { + pkt.flags = 0; + } + pkt.pts = av_rescale_q(buf->pts, AVRational{ global_flags.av1_fps_den, global_flags.av1_fps_num }, AVRational{ 1, TIMEBASE }); + pkt.dts = av_rescale_q(buf->dts, AVRational{ global_flags.av1_fps_den, global_flags.av1_fps_num }, AVRational{ 1, TIMEBASE }); + + for (Mux *mux : muxes) { + mux->add_packet(pkt, pkt.pts, pkt.dts); + } + + svt_av1_enc_release_out_buffer(&buf); +}