X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=nageru%2Fav1_encoder.cpp;h=da18c6d79dd079823b09d0e32e970b1dc9a6cd88;hb=f34a3e1bbc207541842e0b54d5418d95bafc8e5b;hp=a0dbdd1a8e17a4e3297a42fa32c67af2bf312ec9;hpb=8bb8bb7cc9700befab35a8cc2c4b7a88f0638af9;p=nageru diff --git a/nageru/av1_encoder.cpp b/nageru/av1_encoder.cpp index a0dbdd1..da18c6d 100644 --- a/nageru/av1_encoder.cpp +++ b/nageru/av1_encoder.cpp @@ -63,7 +63,7 @@ AV1Encoder::AV1Encoder(const AVOutputFormat *oformat) av1_latency_histogram.init("av1"); }); - const size_t bytes_per_pixel = 1; // TODO: 10-bit support. + const size_t bytes_per_pixel = global_flags.bit_depth > 8 ? 2 : 1; frame_pool.reset(new uint8_t[global_flags.width * global_flags.height * 2 * bytes_per_pixel * AV1_QUEUE_LENGTH]); for (unsigned i = 0; i < AV1_QUEUE_LENGTH; ++i) { free_frames.push(frame_pool.get() + i * (global_flags.width * global_flags.height * 2 * bytes_per_pixel)); @@ -102,8 +102,9 @@ void AV1Encoder::add_frame(int64_t pts, int64_t duration, YCbCrLumaCoefficients // Since we're copying anyway, we can unpack from NV12 to fully planar on the fly. // SVT-AV1 makes its own copy, though, and it would have been nice to avoid the - // double-copy. - size_t bytes_per_pixel = 1; // TODO: 10-bit support. + // double-copy (and also perhaps let the GPU do the 10-bit compression SVT-AV1 + // wants, instead of doing it on the CPU). + const size_t bytes_per_pixel = global_flags.bit_depth > 8 ? 2 : 1; size_t frame_size = global_flags.width * global_flags.height * bytes_per_pixel; assert(global_flags.width % 2 == 0); assert(global_flags.height % 2 == 0); @@ -111,7 +112,14 @@ void AV1Encoder::add_frame(int64_t pts, int64_t duration, YCbCrLumaCoefficients uint8_t *cb = y + frame_size; uint8_t *cr = cb + frame_size / 4; memcpy(y, data, frame_size); - memcpy_interleaved(cb, cr, data + frame_size, frame_size / 2); + if (global_flags.bit_depth == 8) { + memcpy_interleaved(cb, cr, data + frame_size, frame_size / 2); + } else { + const uint16_t *src = reinterpret_cast(data + frame_size); + uint16_t *cb16 = reinterpret_cast(cb); + uint16_t *cr16 = reinterpret_cast(cr); + memcpy_interleaved_word(cb16, cr16, src, frame_size / 4); + } { lock_guard lock(mu); @@ -136,7 +144,7 @@ void AV1Encoder::init_av1() config.source_height = global_flags.height; config.frame_rate_numerator = global_flags.av1_fps_num; config.frame_rate_denominator = global_flags.av1_fps_den; - config.encoder_bit_depth = 8; // TODO: 10-bit support. + config.encoder_bit_depth = global_flags.bit_depth; config.rate_control_mode = 2; // CBR. config.pred_structure = 1; // PRED_LOW_DELAY_B (needed for CBR). config.target_bit_rate = global_flags.av1_bitrate * 1000; @@ -273,23 +281,25 @@ void AV1Encoder::encoder_thread_func() void AV1Encoder::encode_frame(AV1Encoder::QueuedFrame qf) { if (qf.data) { + const size_t bytes_per_pixel = global_flags.bit_depth > 8 ? 2 : 1; + EbSvtIOFormat pic; pic.luma = qf.data; - pic.cb = pic.luma + global_flags.width * global_flags.height; - pic.cr = pic.cb + global_flags.width * global_flags.height / 4; - pic.y_stride = global_flags.width; - pic.cb_stride = global_flags.width / 2; - pic.cr_stride = global_flags.width / 2; + pic.cb = pic.luma + global_flags.width * global_flags.height * bytes_per_pixel; + pic.cr = pic.cb + (global_flags.width * global_flags.height / 4) * bytes_per_pixel; + pic.y_stride = global_flags.width; // In pixels, so no bytes_per_pixel. + pic.cb_stride = global_flags.width / 2; // Likewise. + pic.cr_stride = global_flags.width / 2; // Likewise. pic.width = global_flags.width; pic.height = global_flags.height; pic.origin_x = 0; pic.origin_y = 0; pic.color_fmt = EB_YUV420; - pic.bit_depth = EB_EIGHT_BIT; // TODO: 10-bit. + pic.bit_depth = global_flags.bit_depth > 8 ? EB_TEN_BIT : EB_EIGHT_BIT; EbBufferHeaderType hdr; hdr.p_buffer = reinterpret_cast(&pic); - hdr.n_alloc_len = global_flags.width * global_flags.height * 3 / 2; // TODO: 10-bit. + hdr.n_alloc_len = (global_flags.width * global_flags.height * 3 / 2) * bytes_per_pixel; hdr.n_filled_len = hdr.n_alloc_len; hdr.n_tick_count = 0; hdr.p_app_private = reinterpret_cast(intptr_t(qf.duration));