From: Steinar H. Gunderson Date: Sun, 17 Apr 2016 20:38:05 +0000 (+0200) Subject: Support encoding the HTTP stream with x264. Highly experimental for now! X-Git-Tag: 1.3.0~90 X-Git-Url: https://git.sesse.net/?p=nageru;a=commitdiff_plain;h=cf939d7e65533771a8f242f0283c7c55d91a5100 Support encoding the HTTP stream with x264. Highly experimental for now! --- diff --git a/Makefile b/Makefile index 31e0841..080c09e 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ CXX=g++ -PKG_MODULES = Qt5Core Qt5Gui Qt5Widgets Qt5OpenGLExtensions Qt5OpenGL libusb-1.0 movit lua52 libmicrohttpd epoxy +PKG_MODULES = Qt5Core Qt5Gui Qt5Widgets Qt5OpenGLExtensions Qt5OpenGL libusb-1.0 movit lua52 libmicrohttpd epoxy x264 CXXFLAGS := -O2 -march=native -g -std=gnu++11 -Wall -Wno-deprecated-declarations -Werror -fPIC $(shell pkg-config --cflags $(PKG_MODULES)) -pthread -DMOVIT_SHADER_DIR=\"$(shell pkg-config --variable=shaderdir movit)\" -Idecklink/ LDFLAGS=$(shell pkg-config --libs $(PKG_MODULES)) -lEGL -lGL -pthread -lva -lva-drm -lva-x11 -lX11 -lavformat -lavcodec -lavutil -lswscale -lzita-resampler -lasound -ldl @@ -8,7 +8,7 @@ OBJS=glwidget.o main.o mainwindow.o vumeter.o lrameter.o vu_common.o correlation OBJS += glwidget.moc.o mainwindow.moc.o vumeter.moc.o lrameter.moc.o correlation_meter.moc.o aboutdialog.moc.o # Mixer objects -OBJS += h264encode.o mixer.o bmusb/bmusb.o pbo_frame_allocator.o context.o ref_counted_frame.o theme.o resampling_queue.o httpd.o mux.o ebu_r128_proc.o flags.o image_input.o stereocompressor.o filter.o alsa_output.o correlation_measurer.o +OBJS += h264encode.o x264encode.o mixer.o bmusb/bmusb.o pbo_frame_allocator.o context.o ref_counted_frame.o theme.o resampling_queue.o httpd.o mux.o ebu_r128_proc.o flags.o image_input.o stereocompressor.o filter.o alsa_output.o correlation_measurer.o # DeckLink OBJS += decklink_capture.o decklink/DeckLinkAPIDispatch.o diff --git a/defs.h b/defs.h index cd1e267..0e5378c 100644 --- a/defs.h +++ b/defs.h @@ -11,7 +11,7 @@ #define FRAME_HISTORY_LENGTH 5 #define AUDIO_OUTPUT_CODEC_NAME "pcm_s32le" -#define AUDIO_OUTPUT_BIT_RATE 0 +#define DEFAULT_AUDIO_OUTPUT_BIT_RATE 0 #define LOCAL_DUMP_PREFIX "record-" #define LOCAL_DUMP_SUFFIX ".nut" @@ -30,4 +30,8 @@ // the output to be very uneven. #define MUX_BUFFER_SIZE 65536 +// In number of frames. Comes in addition to any internal queues in x264 +// (frame threading, lookahead, etc.). +#define X264_QUEUE_LENGTH 50 + #endif // !defined(_DEFS_H) diff --git a/flags.cpp b/flags.cpp index 398d49d..853f6fb 100644 --- a/flags.cpp +++ b/flags.cpp @@ -16,6 +16,7 @@ void usage() fprintf(stderr, " -v, --va-display=SPEC VA-API device for H.264 encoding\n"); fprintf(stderr, " ($DISPLAY spec or /dev/dri/render* path)\n"); fprintf(stderr, " --http-uncompressed-video send uncompressed NV12 video to HTTP clients\n"); + fprintf(stderr, " --http-x264-video send x264-compressed video to HTTP clients\n"); fprintf(stderr, " --http-mux=NAME mux to use for HTTP streams (default " DEFAULT_STREAM_MUX_NAME ")\n"); fprintf(stderr, " --http-audio-codec=NAME audio codec to use for HTTP streams\n"); fprintf(stderr, " (default is to use the same as for the recording)\n"); @@ -38,6 +39,7 @@ void parse_flags(int argc, char * const argv[]) { "theme", required_argument, 0, 't' }, { "va-display", required_argument, 0, 1000 }, { "http-uncompressed-video", no_argument, 0, 1001 }, + { "http-x264-video", no_argument, 0, 1008 }, { "http-mux", required_argument, 0, 1004 }, { "http-coarse-timebase", no_argument, 0, 1005 }, { "http-audio-codec", required_argument, 0, 1006 }, @@ -78,6 +80,9 @@ void parse_flags(int argc, char * const argv[]) case 1007: global_flags.stream_audio_codec_bitrate = atoi(optarg) * 1000; break; + case 1008: + global_flags.x264_video_to_http = true; + break; case 1002: global_flags.flat_audio = true; break; @@ -94,4 +99,10 @@ void parse_flags(int argc, char * const argv[]) exit(1); } } + + if (global_flags.uncompressed_video_to_http && + global_flags.x264_video_to_http) { + fprintf(stderr, "ERROR: --http-uncompressed-video and --http-x264-video are mutually incompatible\n"); + exit(1); + } } diff --git a/flags.h b/flags.h index 7dc03c5..cfef0fa 100644 --- a/flags.h +++ b/flags.h @@ -9,6 +9,7 @@ struct Flags { int num_cards = 2; std::string va_display; bool uncompressed_video_to_http = false; + bool x264_video_to_http = false; std::string theme_filename = "theme.lua"; bool flat_audio = false; bool flush_pbos = true; diff --git a/h264encode.cpp b/h264encode.cpp index 8f45088..50cb0e2 100644 --- a/h264encode.cpp +++ b/h264encode.cpp @@ -41,6 +41,7 @@ extern "C" { #include "flags.h" #include "httpd.h" #include "timebase.h" +#include "x264encode.h" using namespace std; @@ -290,6 +291,7 @@ private: AVFrame *audio_frame = nullptr; HTTPD *httpd; unique_ptr reorderer; + unique_ptr x264_encoder; // nullptr if not using x264. Display *x11_display = nullptr; @@ -956,6 +958,9 @@ void H264EncoderImpl::enable_zerocopy_if_possible() if (global_flags.uncompressed_video_to_http) { fprintf(stderr, "Disabling zerocopy H.264 encoding due to --uncompressed_video_to_http.\n"); use_zerocopy = false; + } else if (global_flags.x264_video_to_http) { + fprintf(stderr, "Disabling zerocopy H.264 encoding due to --x264_video_to_http.\n"); + use_zerocopy = false; } else { use_zerocopy = true; } @@ -1639,7 +1644,8 @@ void H264EncoderImpl::save_codeddata(storage_task task) if (file_mux) { file_mux->add_packet(pkt, task.pts + global_delay(), task.dts + global_delay()); } - if (!global_flags.uncompressed_video_to_http) { + if (!global_flags.uncompressed_video_to_http && + !global_flags.x264_video_to_http) { httpd->add_packet(pkt, task.pts + global_delay(), task.dts + global_delay()); } } @@ -1884,9 +1890,13 @@ H264EncoderImpl::H264EncoderImpl(QSurface *surface, const string &va_display, in //print_input(); - if (global_flags.uncompressed_video_to_http) { + if (global_flags.uncompressed_video_to_http || + global_flags.x264_video_to_http) { reorderer.reset(new FrameReorderer(ip_period - 1, frame_width, frame_height)); } + if (global_flags.x264_video_to_http) { + x264_encoder.reset(new X264Encoder(httpd)); + } init_va(va_display); setup_encode(); @@ -2161,11 +2171,17 @@ void H264EncoderImpl::encode_remaining_frames_as_p(int encoding_frame_num, int g last_dts = dts; } - if (global_flags.uncompressed_video_to_http) { + if (global_flags.uncompressed_video_to_http || + global_flags.x264_video_to_http) { // Add frames left in reorderer. while (!reorderer->empty()) { pair output_frame = reorderer->get_first_frame(); - add_packet_for_uncompressed_frame(output_frame.first, output_frame.second); + if (global_flags.uncompressed_video_to_http) { + add_packet_for_uncompressed_frame(output_frame.first, output_frame.second); + } else { + assert(global_flags.x264_video_to_http); + x264_encoder->add_frame(output_frame.first, output_frame.second); + } } } } @@ -2234,12 +2250,18 @@ void H264EncoderImpl::encode_frame(H264EncoderImpl::PendingFrame frame, int enco va_status = vaUnmapBuffer(va_dpy, surf->surface_image.buf); CHECK_VASTATUS(va_status, "vaUnmapBuffer"); - if (global_flags.uncompressed_video_to_http) { + if (global_flags.uncompressed_video_to_http || + global_flags.x264_video_to_http) { // Add uncompressed video. (Note that pts == dts here.) // Delay needs to match audio. pair output_frame = reorderer->reorder_frame(pts + global_delay(), reinterpret_cast(surf->y_ptr)); if (output_frame.second != nullptr) { - add_packet_for_uncompressed_frame(output_frame.first, output_frame.second); + if (global_flags.uncompressed_video_to_http) { + add_packet_for_uncompressed_frame(output_frame.first, output_frame.second); + } else { + assert(global_flags.x264_video_to_http); + x264_encoder->add_frame(output_frame.first, output_frame.second); + } } } } diff --git a/x264encode.cpp b/x264encode.cpp new file mode 100644 index 0000000..ab34178 --- /dev/null +++ b/x264encode.cpp @@ -0,0 +1,172 @@ +#include + +#include "defs.h" +#include "httpd.h" +#include "timebase.h" +#include "x264encode.h" + +extern "C" { +#include +} + +using namespace std; + +X264Encoder::X264Encoder(HTTPD *httpd) + : httpd(httpd) +{ + frame_pool.reset(new uint8_t[WIDTH * HEIGHT * 2 * X264_QUEUE_LENGTH]); + for (unsigned i = 0; i < X264_QUEUE_LENGTH; ++i) { + free_frames.push(frame_pool.get() + i * (WIDTH * HEIGHT * 2)); + } + encoder_thread = thread(&X264Encoder::encoder_thread_func, this); +} + +X264Encoder::~X264Encoder() +{ + // TODO: close x264 +} + +void X264Encoder::add_frame(int64_t pts, const uint8_t *data) +{ + QueuedFrame qf; + qf.pts = pts; + + { + lock_guard lock(mu); + if (free_frames.empty()) { + fprintf(stderr, "WARNING: x264 queue full, dropping frame with pts %ld\n", pts); + return; + } + + qf.data = free_frames.front(); + free_frames.pop(); + } + + memcpy(qf.data, data, WIDTH * HEIGHT * 2); + + { + lock_guard lock(mu); + queued_frames.push(qf); + queued_frames_nonempty.notify_all(); + } +} + +void X264Encoder::end_encoding() +{ + // TODO +} + +void X264Encoder::init_x264() +{ + x264_param_t param; + x264_param_default_preset(¶m, "ultrafast", "film"); // TODO: flags + + param.i_width = WIDTH; + param.i_height = HEIGHT; + param.i_csp = X264_CSP_NV12; + param.b_vfr_input = 1; + param.i_timebase_num = 1; + param.i_timebase_den = TIMEBASE; + param.i_keyint_max = 50; // About one second. + + // NOTE: These should be in sync with the ones in h264encode.cpp (sbs_rbsp()). + param.vui.i_vidformat = 5; // Unspecified. + param.vui.b_fullrange = 0; + param.vui.i_colorprim = 1; // BT.709. + param.vui.i_transfer = 2; // Unspecified (since we use sRGB). + param.vui.i_colmatrix = 6; // BT.601/SMPTE 170M. + + // 4.5 Mbit/sec, CBR. + param.rc.i_rc_method = X264_RC_ABR; + param.rc.i_bitrate = 4500; + + // One-second VBV. + param.rc.i_vbv_max_bitrate = 4500; + param.rc.i_vbv_buffer_size = 4500; + + // TODO: more flags here, via x264_param_parse(). + + x264_param_apply_profile(¶m, "high"); + + x264 = x264_encoder_open(¶m); + if (x264 == nullptr) { + fprintf(stderr, "ERROR: x264 initialization failed.\n"); + exit(1); + } +} + +void X264Encoder::encoder_thread_func() +{ + nice(5); // Note that x264 further nices some of its threads. + init_x264(); + + for ( ;; ) { + QueuedFrame qf; + + // Wait for a queued frame, then dequeue it. + { + unique_lock lock(mu); + queued_frames_nonempty.wait(lock, [this]() { return !queued_frames.empty(); }); + qf = queued_frames.front(); + queued_frames.pop(); + } + + encode_frame(qf); + + { + lock_guard lock(mu); + free_frames.push(qf.data); + } + } +} + +void X264Encoder::encode_frame(X264Encoder::QueuedFrame qf) +{ + x264_picture_t pic; + x264_nal_t *nal = nullptr; + int num_nal = 0; + + x264_picture_init(&pic); + + // TODO: Delayed frames. + pic.i_pts = qf.pts; + pic.img.i_csp = X264_CSP_NV12; + pic.img.i_plane = 2; + pic.img.plane[0] = qf.data; + pic.img.i_stride[0] = WIDTH; + pic.img.plane[1] = qf.data + WIDTH * HEIGHT; + pic.img.i_stride[1] = WIDTH / 2 * sizeof(uint16_t); + + x264_encoder_encode(x264, &nal, &num_nal, &pic, &pic); + + // We really need one AVPacket for the entire frame, it seems, + // so combine it all. + size_t num_bytes = 0; + for (int i = 0; i < num_nal; ++i) { + num_bytes += nal[i].i_payload; + } + + unique_ptr data(new uint8_t[num_bytes]); + uint8_t *ptr = data.get(); + + for (int i = 0; i < num_nal; ++i) { + memcpy(ptr, nal[i].p_payload, nal[i].i_payload); + ptr += nal[i].i_payload; + } + + printf("Got frame, keyframe = %d\n", pic.b_keyframe); + + AVPacket pkt; + memset(&pkt, 0, sizeof(pkt)); + pkt.buf = nullptr; + pkt.data = data.get(); + pkt.size = num_bytes; + pkt.stream_index = 0; + if (pic.b_keyframe) { + pkt.flags = AV_PKT_FLAG_KEY; + } else { + pkt.flags = 0; + } + + httpd->add_packet(pkt, pic.i_pts, pic.i_dts); +} diff --git a/x264encode.h b/x264encode.h new file mode 100644 index 0000000..f1c3a37 --- /dev/null +++ b/x264encode.h @@ -0,0 +1,80 @@ +// A wrapper around x264, to encode video in higher quality than Quick Sync +// can give us. We maintain a queue of uncompressed Y'CbCr frames (of 50 frames, +// so a little under 100 MB at 720p), then have a separate thread pull out +// those threads as fast as we can to give it to x264 for encoding. +// +// TODO: We use x264's “speedcontrol” patch if available, so that quality is +// automatically scaled up or down to content and available CPU time. +// +// The encoding threads are niced down because mixing is more important than +// encoding; if we lose frames in mixing, we'll lose frames to disk _and_ +// to the stream, as where if we lose frames in encoding, we'll lose frames +// to the stream only, so the latter is strictly better. More importantly, +// this allows speedcontrol (when implemented) to do its thing without +// disturbing the mixer. + +#ifndef _X264ENCODE_H +#define _X264ENCODE_H 1 + +#include + +#include +#include +#include +#include +#include + +extern "C" { +#include "x264.h" +} + +class HTTPD; + +class X264Encoder { +public: + X264Encoder(HTTPD *httpd); // Does not take ownership. + ~X264Encoder(); + + // is taken to be raw NV12 data of WIDTHxHEIGHT resolution. + // Does not block. + void add_frame(int64_t pts, const uint8_t *data); + + // Called after the last frame. Will block; once this returns, + // the last data is flushed. + void end_encoding(); + +private: + struct QueuedFrame { + int64_t pts; + uint8_t *data; + }; + void encoder_thread_func(); + void init_x264(); + void encode_frame(QueuedFrame qf); + + // One big memory chunk of all 50 (or whatever) frames, allocated in + // the constructor. All data functions just use pointers into this + // pool. + std::unique_ptr frame_pool; + + HTTPD *httpd = nullptr; + + std::thread encoder_thread; + x264_t *x264; + + // Protects everything below it. + std::mutex mu; + + // Frames that are not being encoded or waiting to be encoded, + // so that add_frame() can use new ones. + std::queue free_frames; + + // Frames that are waiting to be encoded (ie., add_frame() has been + // called, but they are not picked up for encoding yet). + std::queue queued_frames; + + // Whenever the state of changes. + std::condition_variable queued_frames_nonempty; +}; + +#endif // !defined(_X264ENCODE_H)