]> git.sesse.net Git - nageru/commitdiff
Support 10-bit x264 output.
authorSteinar H. Gunderson <sgunderson@bigfoot.com>
Mon, 13 Mar 2017 22:55:11 +0000 (23:55 +0100)
committerSteinar H. Gunderson <sgunderson@bigfoot.com>
Mon, 13 Mar 2017 22:55:11 +0000 (23:55 +0100)
Requires a 10-bit-compiled x264. Probably breaks DeckLink output for now.

mixer.cpp
quicksync_encoder.cpp
theme.cpp
x264_encoder.cpp

index 898e7c04cef19e52e6060720b71bcab1a4769f38..8e5b25996722d46a2a66805b6c112e107136401e 100644 (file)
--- a/mixer.cpp
+++ b/mixer.cpp
@@ -213,7 +213,7 @@ Mixer::Mixer(const QSurfaceFormat &format, unsigned num_cards)
                ycbcr_format.luma_coefficients = YCBCR_REC_601;
        }
        ycbcr_format.full_range = false;
-       ycbcr_format.num_levels = 256;
+       ycbcr_format.num_levels = 1 << global_flags.x264_bit_depth;
        ycbcr_format.cb_x_position = 0.0f;
        ycbcr_format.cr_x_position = 0.0f;
        ycbcr_format.cb_y_position = 0.5f;
@@ -222,7 +222,8 @@ Mixer::Mixer(const QSurfaceFormat &format, unsigned num_cards)
        // Display chain; shows the live output produced by the main chain (or rather, a copy of it).
        display_chain.reset(new EffectChain(global_flags.width, global_flags.height, resource_pool.get()));
        check_error();
-       display_input = new YCbCrInput(inout_format, ycbcr_format, global_flags.width, global_flags.height, YCBCR_INPUT_SPLIT_Y_AND_CBCR);
+       GLenum type = global_flags.x264_bit_depth > 8 ? GL_UNSIGNED_SHORT : GL_UNSIGNED_BYTE;
+       display_input = new YCbCrInput(inout_format, ycbcr_format, global_flags.width, global_flags.height, YCBCR_INPUT_SPLIT_Y_AND_CBCR, type);
        display_chain->add_input(display_input);
        display_chain->add_output(inout_format, OUTPUT_ALPHA_FORMAT_POSTMULTIPLIED);
        display_chain->set_dither_bits(0);  // Don't bother.
@@ -1027,7 +1028,7 @@ void Mixer::render_one_frame(int64_t duration)
        output_ycbcr_format.chroma_subsampling_y = 1;
        output_ycbcr_format.luma_coefficients = ycbcr_output_coefficients;
        output_ycbcr_format.full_range = false;
-       output_ycbcr_format.num_levels = 256;
+       output_ycbcr_format.num_levels = 1 << global_flags.x264_bit_depth;
        chain->change_ycbcr_output_format(output_ycbcr_format);
 
        const int64_t av_delay = lrint(global_flags.audio_queue_length_ms * 0.001 * TIMEBASE);  // Corresponds to the delay in ResamplingQueue.
@@ -1042,8 +1043,16 @@ void Mixer::render_one_frame(int64_t duration)
        // for display as well, but if they're used for zero-copy Quick Sync encoding
        // (the default case), they're just views into VA-API memory and must be
        // unmapped during encoding, so we can't use them for display, unfortunately.
-       GLuint cbcr_full_tex = resource_pool->create_2d_texture(GL_RG8, global_flags.width, global_flags.height);
-       GLuint y_copy_tex = resource_pool->create_2d_texture(GL_R8, global_flags.width, global_flags.height);
+       GLuint cbcr_full_tex, cbcr_copy_tex, y_copy_tex;
+       if (global_flags.x264_bit_depth > 8) {
+               cbcr_full_tex = resource_pool->create_2d_texture(GL_RG16, global_flags.width, global_flags.height);
+               y_copy_tex = resource_pool->create_2d_texture(GL_R16, global_flags.width, global_flags.height);
+               cbcr_copy_tex = resource_pool->create_2d_texture(GL_RG16, global_flags.width / 2, global_flags.height / 2);
+       } else {
+               cbcr_full_tex = resource_pool->create_2d_texture(GL_RG8, global_flags.width, global_flags.height);
+               y_copy_tex = resource_pool->create_2d_texture(GL_R8, global_flags.width, global_flags.height);
+               cbcr_copy_tex = resource_pool->create_2d_texture(GL_RG8, global_flags.width / 2, global_flags.height / 2);
+       }
        GLuint fbo = resource_pool->create_fbo(y_tex, cbcr_full_tex, y_copy_tex);
        check_error();
        chain->render_to_fbo(fbo, global_flags.width, global_flags.height);
@@ -1055,7 +1064,6 @@ void Mixer::render_one_frame(int64_t duration)
 
        resource_pool->release_fbo(fbo);
 
-       GLuint cbcr_copy_tex = resource_pool->create_2d_texture(GL_RG8, global_flags.width / 2, global_flags.height / 2);
        chroma_subsampler->subsample_chroma(cbcr_full_tex, global_flags.width, global_flags.height, cbcr_tex, cbcr_copy_tex);
        if (output_card_index != -1) {
                cards[output_card_index].output->send_frame(y_tex, cbcr_full_tex, ycbcr_output_coefficients, theme_main_chain.input_frames, pts_int, duration);
index bd6b4c2edbb7097360ee02d6915abea7c599bddd..635a95a5fb297e8a7fb9de7bca63ce9a73ab6d8d 100644 (file)
@@ -994,17 +994,25 @@ int QuickSyncEncoderImpl::setup_encode()
                        gl_surfaces[i].y_tex = resource_pool->create_2d_texture(GL_R8, 1, 1);
                        gl_surfaces[i].cbcr_tex = resource_pool->create_2d_texture(GL_RG8, 1, 1);
                } else {
-                       gl_surfaces[i].y_tex = resource_pool->create_2d_texture(GL_R8, frame_width, frame_height);
-                       gl_surfaces[i].cbcr_tex = resource_pool->create_2d_texture(GL_RG8, frame_width / 2, frame_height / 2);
+                       size_t bytes_per_pixel;
+                       if (global_flags.x264_bit_depth > 8) {
+                               bytes_per_pixel = 2;
+                               gl_surfaces[i].y_tex = resource_pool->create_2d_texture(GL_R16, frame_width, frame_height);
+                               gl_surfaces[i].cbcr_tex = resource_pool->create_2d_texture(GL_RG16, frame_width / 2, frame_height / 2);
+                       } else {
+                               bytes_per_pixel = 1;
+                               gl_surfaces[i].y_tex = resource_pool->create_2d_texture(GL_R8, frame_width, frame_height);
+                               gl_surfaces[i].cbcr_tex = resource_pool->create_2d_texture(GL_RG8, frame_width / 2, frame_height / 2);
+                       }
 
                        // Generate a PBO to read into. It doesn't necessarily fit 1:1 with the VA-API
                        // buffers, due to potentially differing pitch.
                        glGenBuffers(1, &gl_surfaces[i].pbo);
                        glBindBuffer(GL_PIXEL_PACK_BUFFER, gl_surfaces[i].pbo);
-                       glBufferStorage(GL_PIXEL_PACK_BUFFER, frame_width * frame_height * 2, nullptr, GL_MAP_READ_BIT | GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT);
-                       uint8_t *ptr = (uint8_t *)glMapBufferRange(GL_PIXEL_PACK_BUFFER, 0, frame_width * frame_height * 2, GL_MAP_READ_BIT | GL_MAP_PERSISTENT_BIT);
+                       glBufferStorage(GL_PIXEL_PACK_BUFFER, frame_width * frame_height * 2 * bytes_per_pixel, nullptr, GL_MAP_READ_BIT | GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT);
+                       uint8_t *ptr = (uint8_t *)glMapBufferRange(GL_PIXEL_PACK_BUFFER, 0, frame_width * frame_height * 2 * bytes_per_pixel, GL_MAP_READ_BIT | GL_MAP_PERSISTENT_BIT);
                        gl_surfaces[i].y_offset = 0;
-                       gl_surfaces[i].cbcr_offset = frame_width * frame_height;
+                       gl_surfaces[i].cbcr_offset = frame_width * frame_height * bytes_per_pixel;
                        gl_surfaces[i].y_ptr = ptr + gl_surfaces[i].y_offset;
                        gl_surfaces[i].cbcr_ptr = ptr + gl_surfaces[i].cbcr_offset;
                        glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
@@ -1695,6 +1703,7 @@ RefCountedGLsync QuickSyncEncoderImpl::end_frame()
        assert(!is_shutdown);
 
        if (!use_zerocopy) {
+               GLenum type = global_flags.x264_bit_depth > 8 ? GL_UNSIGNED_SHORT : GL_UNSIGNED_BYTE;
                GLSurface *surf;
                {
                        unique_lock<mutex> lock(storage_task_queue_mutex);
@@ -1710,12 +1719,12 @@ RefCountedGLsync QuickSyncEncoderImpl::end_frame()
 
                glBindTexture(GL_TEXTURE_2D, surf->y_tex);
                check_error();
-               glGetTexImage(GL_TEXTURE_2D, 0, GL_RED, GL_UNSIGNED_BYTE, BUFFER_OFFSET(surf->y_offset));
+               glGetTexImage(GL_TEXTURE_2D, 0, GL_RED, type, BUFFER_OFFSET(surf->y_offset));
                check_error();
 
                glBindTexture(GL_TEXTURE_2D, surf->cbcr_tex);
                check_error();
-               glGetTexImage(GL_TEXTURE_2D, 0, GL_RG, GL_UNSIGNED_BYTE, BUFFER_OFFSET(surf->cbcr_offset));
+               glGetTexImage(GL_TEXTURE_2D, 0, GL_RG, type, BUFFER_OFFSET(surf->cbcr_offset));
                check_error();
 
                glBindTexture(GL_TEXTURE_2D, 0);
index 7bb187714e6f2ef0041a777ce864a4402525b78b..e5002bca3b0a0499ed73ea654b5f57cf18505dba 100644 (file)
--- a/theme.cpp
+++ b/theme.cpp
@@ -274,11 +274,13 @@ int EffectChain_finalize(lua_State* L)
                }
 
                output_ycbcr_format.full_range = false;
-               output_ycbcr_format.num_levels = 256;
+               output_ycbcr_format.num_levels = 1 << global_flags.x264_bit_depth;
 
-               chain->add_ycbcr_output(inout_format, OUTPUT_ALPHA_FORMAT_POSTMULTIPLIED, output_ycbcr_format, YCBCR_OUTPUT_SPLIT_Y_AND_CBCR);
-               chain->add_ycbcr_output(inout_format, OUTPUT_ALPHA_FORMAT_POSTMULTIPLIED, output_ycbcr_format, YCBCR_OUTPUT_INTERLEAVED);  // Add a copy where we'll only be using the Y component.
-               chain->set_dither_bits(8);
+               GLenum type = global_flags.x264_bit_depth > 8 ? GL_UNSIGNED_SHORT : GL_UNSIGNED_BYTE;
+
+               chain->add_ycbcr_output(inout_format, OUTPUT_ALPHA_FORMAT_POSTMULTIPLIED, output_ycbcr_format, YCBCR_OUTPUT_SPLIT_Y_AND_CBCR, type);
+               chain->add_ycbcr_output(inout_format, OUTPUT_ALPHA_FORMAT_POSTMULTIPLIED, output_ycbcr_format, YCBCR_OUTPUT_INTERLEAVED, type);  // Add a copy where we'll only be using the Y component.
+               chain->set_dither_bits(global_flags.x264_bit_depth > 8 ? 16 : 8);
                chain->set_output_origin(OUTPUT_ORIGIN_TOP_LEFT);
        } else {
                chain->add_output(inout_format, OUTPUT_ALPHA_FORMAT_POSTMULTIPLIED);
index c8f7b81279f2cf9e5c4a57dc3fe969a978cf4b0f..0c1ecbc1fbd63c6649d6560da8c846b634695100 100644 (file)
@@ -48,9 +48,10 @@ X264Encoder::X264Encoder(AVOutputFormat *oformat)
        : wants_global_headers(oformat->flags & AVFMT_GLOBALHEADER),
          dyn(load_x264_for_bit_depth(global_flags.x264_bit_depth))
 {
-       frame_pool.reset(new uint8_t[global_flags.width * global_flags.height * 2 * X264_QUEUE_LENGTH]);
+       size_t bytes_per_pixel = global_flags.x264_bit_depth > 8 ? 2 : 1;
+       frame_pool.reset(new uint8_t[global_flags.width * global_flags.height * 2 * bytes_per_pixel * X264_QUEUE_LENGTH]);
        for (unsigned i = 0; i < X264_QUEUE_LENGTH; ++i) {
-               free_frames.push(frame_pool.get() + i * (global_flags.width * global_flags.height * 2));
+               free_frames.push(frame_pool.get() + i * (global_flags.width * global_flags.height * 2 * bytes_per_pixel));
        }
        encoder_thread = thread(&X264Encoder::encoder_thread_func, this);
 }
@@ -86,7 +87,8 @@ void X264Encoder::add_frame(int64_t pts, int64_t duration, YCbCrLumaCoefficients
                free_frames.pop();
        }
 
-       memcpy(qf.data, data, global_flags.width * global_flags.height * 2);
+       size_t bytes_per_pixel = global_flags.x264_bit_depth > 8 ? 2 : 1;
+       memcpy(qf.data, data, global_flags.width * global_flags.height * 2 * bytes_per_pixel);
 
        {
                lock_guard<mutex> lock(mu);
@@ -103,6 +105,9 @@ void X264Encoder::init_x264()
        param.i_width = global_flags.width;
        param.i_height = global_flags.height;
        param.i_csp = X264_CSP_NV12;
+       if (global_flags.x264_bit_depth > 8) {
+               param.i_csp |= X264_CSP_HIGH_DEPTH;
+       }
        param.b_vfr_input = 1;
        param.i_timebase_num = 1;
        param.i_timebase_den = TIMEBASE;
@@ -263,12 +268,21 @@ void X264Encoder::encode_frame(X264Encoder::QueuedFrame qf)
                dyn.x264_picture_init(&pic);
 
                pic.i_pts = qf.pts;
-               pic.img.i_csp = X264_CSP_NV12;
-               pic.img.i_plane = 2;
-               pic.img.plane[0] = qf.data;
-               pic.img.i_stride[0] = global_flags.width;
-               pic.img.plane[1] = qf.data + global_flags.width * global_flags.height;
-               pic.img.i_stride[1] = global_flags.width / 2 * sizeof(uint16_t);
+               if (global_flags.x264_bit_depth > 8) {
+                       pic.img.i_csp = X264_CSP_NV12 | X264_CSP_HIGH_DEPTH;
+                       pic.img.i_plane = 2;
+                       pic.img.plane[0] = qf.data;
+                       pic.img.i_stride[0] = global_flags.width * sizeof(uint16_t);
+                       pic.img.plane[1] = qf.data + global_flags.width * global_flags.height * sizeof(uint16_t);
+                       pic.img.i_stride[1] = global_flags.width / 2 * sizeof(uint32_t);
+               } else {
+                       pic.img.i_csp = X264_CSP_NV12;
+                       pic.img.i_plane = 2;
+                       pic.img.plane[0] = qf.data;
+                       pic.img.i_stride[0] = global_flags.width;
+                       pic.img.plane[1] = qf.data + global_flags.width * global_flags.height;
+                       pic.img.i_stride[1] = global_flags.width / 2 * sizeof(uint16_t);
+               }
                pic.opaque = reinterpret_cast<void *>(intptr_t(qf.duration));
 
                input_pic = &pic;