X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=nageru%2Fav1_encoder.cpp;h=da18c6d79dd079823b09d0e32e970b1dc9a6cd88;hb=f34a3e1bbc207541842e0b54d5418d95bafc8e5b;hp=a0dbdd1a8e17a4e3297a42fa32c67af2bf312ec9;hpb=8bb8bb7cc9700befab35a8cc2c4b7a88f0638af9;p=nageru

diff --git a/nageru/av1_encoder.cpp b/nageru/av1_encoder.cpp
index a0dbdd1..da18c6d 100644
--- a/nageru/av1_encoder.cpp
+++ b/nageru/av1_encoder.cpp
@@ -63,7 +63,7 @@ AV1Encoder::AV1Encoder(const AVOutputFormat *oformat)
 			av1_latency_histogram.init("av1");
 		});
 
-	const size_t bytes_per_pixel = 1;  // TODO: 10-bit support.
+	const size_t bytes_per_pixel = global_flags.bit_depth > 8 ? 2 : 1;
 	frame_pool.reset(new uint8_t[global_flags.width * global_flags.height * 2 * bytes_per_pixel * AV1_QUEUE_LENGTH]);
 	for (unsigned i = 0; i < AV1_QUEUE_LENGTH; ++i) {
 		free_frames.push(frame_pool.get() + i * (global_flags.width * global_flags.height * 2 * bytes_per_pixel));
@@ -102,8 +102,9 @@ void AV1Encoder::add_frame(int64_t pts, int64_t duration, YCbCrLumaCoefficients
 
 	// Since we're copying anyway, we can unpack from NV12 to fully planar on the fly.
 	// SVT-AV1 makes its own copy, though, and it would have been nice to avoid the
-	// double-copy.
-	size_t bytes_per_pixel = 1;  // TODO: 10-bit support.
+	// double-copy (and also perhaps let the GPU do the 10-bit compression SVT-AV1
+	// wants, instead of doing it on the CPU).
+	const size_t bytes_per_pixel = global_flags.bit_depth > 8 ? 2 : 1;
 	size_t frame_size = global_flags.width * global_flags.height * bytes_per_pixel;
 	assert(global_flags.width % 2 == 0);
 	assert(global_flags.height % 2 == 0);
@@ -111,7 +112,14 @@ void AV1Encoder::add_frame(int64_t pts, int64_t duration, YCbCrLumaCoefficients
 	uint8_t *cb = y + frame_size;
 	uint8_t *cr = cb + frame_size / 4;
 	memcpy(y, data, frame_size);
-	memcpy_interleaved(cb, cr, data + frame_size, frame_size / 2);
+	if (global_flags.bit_depth == 8) {
+		memcpy_interleaved(cb, cr, data + frame_size, frame_size / 2);
+	} else {
+		const uint16_t *src = reinterpret_cast<const uint16_t *>(data + frame_size);
+		uint16_t *cb16 = reinterpret_cast<uint16_t *>(cb);
+		uint16_t *cr16 = reinterpret_cast<uint16_t *>(cr);
+		memcpy_interleaved_word(cb16, cr16, src, frame_size / 4);
+	}
 
 	{
 		lock_guard<mutex> lock(mu);
@@ -136,7 +144,7 @@ void AV1Encoder::init_av1()
 	config.source_height = global_flags.height;
 	config.frame_rate_numerator = global_flags.av1_fps_num;
 	config.frame_rate_denominator = global_flags.av1_fps_den;
-	config.encoder_bit_depth = 8;  // TODO: 10-bit support.
+	config.encoder_bit_depth = global_flags.bit_depth;
 	config.rate_control_mode = 2;  // CBR.
 	config.pred_structure = 1;  // PRED_LOW_DELAY_B (needed for CBR).
 	config.target_bit_rate = global_flags.av1_bitrate * 1000;
@@ -273,23 +281,25 @@ void AV1Encoder::encoder_thread_func()
 void AV1Encoder::encode_frame(AV1Encoder::QueuedFrame qf)
 {
 	if (qf.data) {
+		const size_t bytes_per_pixel = global_flags.bit_depth > 8 ? 2 : 1;
+
 		EbSvtIOFormat pic;
 		pic.luma = qf.data;	
-		pic.cb = pic.luma + global_flags.width * global_flags.height;
-		pic.cr = pic.cb + global_flags.width * global_flags.height / 4;
-		pic.y_stride = global_flags.width;
-		pic.cb_stride = global_flags.width / 2;
-		pic.cr_stride = global_flags.width / 2;
+		pic.cb = pic.luma + global_flags.width * global_flags.height * bytes_per_pixel;
+		pic.cr = pic.cb + (global_flags.width * global_flags.height / 4) * bytes_per_pixel;
+		pic.y_stride = global_flags.width;  // In pixels, so no bytes_per_pixel.
+		pic.cb_stride = global_flags.width / 2;  // Likewise.
+		pic.cr_stride = global_flags.width / 2;  // Likewise.
 		pic.width = global_flags.width;
 		pic.height = global_flags.height;
 		pic.origin_x = 0;
 		pic.origin_y = 0;
 		pic.color_fmt = EB_YUV420;
-		pic.bit_depth = EB_EIGHT_BIT;  // TODO: 10-bit.
+		pic.bit_depth = global_flags.bit_depth > 8 ? EB_TEN_BIT : EB_EIGHT_BIT;
 
 		EbBufferHeaderType hdr;
 		hdr.p_buffer      = reinterpret_cast<uint8_t *>(&pic);
-		hdr.n_alloc_len   = global_flags.width * global_flags.height * 3 / 2;  // TODO: 10-bit.
+		hdr.n_alloc_len   = (global_flags.width * global_flags.height * 3 / 2) * bytes_per_pixel;
 		hdr.n_filled_len  = hdr.n_alloc_len;
 		hdr.n_tick_count  = 0;
 		hdr.p_app_private = reinterpret_cast<void *>(intptr_t(qf.duration));