X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=libavcodec%2Fvaapi_encode.c;h=2fe8501287108389542962a8b7a191d21fa63a56;hb=506839a3e9cc34c8f719937430008fc12d132fce;hp=e9eeb6eb835688d9f11f1e0e403707871e7416c1;hpb=8479f99c7dd227d9cb94d262602f1298f71cf33b;p=ffmpeg

diff --git a/libavcodec/vaapi_encode.c b/libavcodec/vaapi_encode.c
index e9eeb6eb835..2fe85012871 100644
--- a/libavcodec/vaapi_encode.c
+++ b/libavcodec/vaapi_encode.c
@@ -319,16 +319,60 @@ static int vaapi_encode_issue(AVCodecContext *avctx,
         }
     }
 
+    if (pic->nb_slices == 0)
+        pic->nb_slices = ctx->nb_slices;
     if (pic->nb_slices > 0) {
+        int rounding;
+
         pic->slices = av_mallocz_array(pic->nb_slices, sizeof(*pic->slices));
         if (!pic->slices) {
             err = AVERROR(ENOMEM);
             goto fail;
         }
+
+        for (i = 0; i < pic->nb_slices; i++)
+            pic->slices[i].row_size = ctx->slice_size;
+
+        rounding = ctx->slice_block_rows - ctx->nb_slices * ctx->slice_size;
+        if (rounding > 0) {
+            // Place rounding error at top and bottom of frame.
+            av_assert0(rounding < pic->nb_slices);
+            // Some Intel drivers contain a bug where the encoder will fail
+            // if the last slice is smaller than the one before it.  Since
+            // that's straightforward to avoid here, just do so.
+            if (rounding <= 2) {
+                for (i = 0; i < rounding; i++)
+                    ++pic->slices[i].row_size;
+            } else {
+                for (i = 0; i < (rounding + 1) / 2; i++)
+                    ++pic->slices[pic->nb_slices - i - 1].row_size;
+                for (i = 0; i < rounding / 2; i++)
+                    ++pic->slices[i].row_size;
+            }
+        } else if (rounding < 0) {
+            // Remove rounding error from last slice only.
+            av_assert0(rounding < ctx->slice_size);
+            pic->slices[pic->nb_slices - 1].row_size += rounding;
+        }
     }
     for (i = 0; i < pic->nb_slices; i++) {
         slice = &pic->slices[i];
         slice->index = i;
+        if (i == 0) {
+            slice->row_start   = 0;
+            slice->block_start = 0;
+        } else {
+            const VAAPIEncodeSlice *prev = &pic->slices[i - 1];
+            slice->row_start   = prev->row_start   + prev->row_size;
+            slice->block_start = prev->block_start + prev->block_size;
+        }
+        slice->block_size  = slice->row_size * ctx->slice_block_cols;
+
+        av_log(avctx, AV_LOG_DEBUG, "Slice %d: %d-%d (%d rows), "
+               "%d-%d (%d blocks).\n", i, slice->row_start,
+               slice->row_start + slice->row_size - 1, slice->row_size,
+               slice->block_start, slice->block_start + slice->block_size - 1,
+               slice->block_size);
 
         if (ctx->codec->slice_params_size > 0) {
             slice->codec_slice_params = av_mallocz(ctx->codec->slice_params_size);
@@ -671,7 +715,7 @@ static int vaapi_encode_get_next(AVCodecContext *avctx,
         return AVERROR(ENOMEM);
 
     if (ctx->input_order == 0 || ctx->force_idr ||
-        ctx->gop_counter >= avctx->gop_size) {
+        ctx->gop_counter >= ctx->gop_size) {
         pic->type = PICTURE_TYPE_IDR;
         ctx->force_idr = 0;
         ctx->gop_counter = 1;
@@ -694,7 +738,7 @@ static int vaapi_encode_get_next(AVCodecContext *avctx,
         // encode-after it, but not exceeding the GOP size.
 
         for (i = 0; i < ctx->b_per_p &&
-             ctx->gop_counter < avctx->gop_size; i++) {
+             ctx->gop_counter < ctx->gop_size; i++) {
             pic = vaapi_encode_alloc();
             if (!pic)
                 goto fail;
@@ -1206,79 +1250,6 @@ fail:
     return err;
 }
 
-static av_cold int vaapi_encode_config_attributes(AVCodecContext *avctx)
-{
-    VAAPIEncodeContext *ctx = avctx->priv_data;
-    VAStatus vas;
-    int i;
-
-    VAConfigAttrib attr[] = {
-        { VAConfigAttribEncMaxRefFrames  },
-        { VAConfigAttribEncPackedHeaders },
-    };
-
-    vas = vaGetConfigAttributes(ctx->hwctx->display,
-                                ctx->va_profile, ctx->va_entrypoint,
-                                attr, FF_ARRAY_ELEMS(attr));
-    if (vas != VA_STATUS_SUCCESS) {
-        av_log(avctx, AV_LOG_ERROR, "Failed to fetch config "
-               "attributes: %d (%s).\n", vas, vaErrorStr(vas));
-        return AVERROR(EINVAL);
-    }
-
-    for (i = 0; i < FF_ARRAY_ELEMS(attr); i++) {
-        if (attr[i].value == VA_ATTRIB_NOT_SUPPORTED) {
-            // Unfortunately we have to treat this as "don't know" and hope
-            // for the best, because the Intel MJPEG encoder returns this
-            // for all the interesting attributes.
-            av_log(avctx, AV_LOG_DEBUG, "Attribute (%d) is not supported.\n",
-                   attr[i].type);
-            continue;
-        }
-        switch (attr[i].type) {
-        case VAConfigAttribEncMaxRefFrames:
-        {
-            unsigned int ref_l0 = attr[i].value & 0xffff;
-            unsigned int ref_l1 = (attr[i].value >> 16) & 0xffff;
-
-            if (avctx->gop_size > 1 && ref_l0 < 1) {
-                av_log(avctx, AV_LOG_ERROR, "P frames are not "
-                       "supported (%#x).\n", attr[i].value);
-                return AVERROR(EINVAL);
-            }
-            if (avctx->max_b_frames > 0 && ref_l1 < 1) {
-                av_log(avctx, AV_LOG_WARNING, "B frames are not "
-                       "supported (%#x) by the underlying driver.\n",
-                       attr[i].value);
-                avctx->max_b_frames = 0;
-            }
-        }
-        break;
-        case VAConfigAttribEncPackedHeaders:
-            if (ctx->va_packed_headers & ~attr[i].value) {
-                // This isn't fatal, but packed headers are always
-                // preferable because they are under our control.
-                // When absent, the driver is generating them and some
-                // features may not work (e.g. VUI or SEI in H.264).
-                av_log(avctx, AV_LOG_WARNING, "Warning: some packed "
-                       "headers are not supported (want %#x, got %#x).\n",
-                       ctx->va_packed_headers, attr[i].value);
-                ctx->va_packed_headers &= attr[i].value;
-            }
-            ctx->config_attributes[ctx->nb_config_attributes++] =
-                (VAConfigAttrib) {
-                .type  = VAConfigAttribEncPackedHeaders,
-                .value = ctx->va_packed_headers,
-            };
-            break;
-        default:
-            av_assert0(0 && "Unexpected config attribute.");
-        }
-    }
-
-    return 0;
-}
-
 static av_cold int vaapi_encode_init_rate_control(AVCodecContext *avctx)
 {
     VAAPIEncodeContext *ctx = avctx->priv_data;
@@ -1306,7 +1277,8 @@ static av_cold int vaapi_encode_init_rate_control(AVCodecContext *avctx)
         ctx->va_rc_mode = VA_RC_CQP;
         return 0;
     }
-    if (avctx->flags & AV_CODEC_FLAG_QSCALE ||
+    if (ctx->codec->flags & FLAG_CONSTANT_QUALITY_ONLY ||
+        avctx->flags & AV_CODEC_FLAG_QSCALE ||
         avctx->bit_rate <= 0) {
         if (rc_attr.value & VA_RC_CQP) {
             av_log(avctx, AV_LOG_VERBOSE, "Using constant-quality mode.\n");
@@ -1469,6 +1441,214 @@ static av_cold int vaapi_encode_init_rate_control(AVCodecContext *avctx)
     return 0;
 }
 
+static av_cold int vaapi_encode_init_gop_structure(AVCodecContext *avctx)
+{
+    VAAPIEncodeContext *ctx = avctx->priv_data;
+    VAStatus vas;
+    VAConfigAttrib attr = { VAConfigAttribEncMaxRefFrames };
+    uint32_t ref_l0, ref_l1;
+
+    vas = vaGetConfigAttributes(ctx->hwctx->display,
+                                ctx->va_profile,
+                                ctx->va_entrypoint,
+                                &attr, 1);
+    if (vas != VA_STATUS_SUCCESS) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to query reference frames "
+               "attribute: %d (%s).\n", vas, vaErrorStr(vas));
+        return AVERROR_EXTERNAL;
+    }
+
+    if (attr.value == VA_ATTRIB_NOT_SUPPORTED) {
+        ref_l0 = ref_l1 = 0;
+    } else {
+        ref_l0 = attr.value       & 0xffff;
+        ref_l1 = attr.value >> 16 & 0xffff;
+    }
+
+    if (avctx->gop_size <= 1) {
+        av_log(avctx, AV_LOG_VERBOSE, "Using intra frames only.\n");
+        ctx->gop_size = 1;
+    } else if (ref_l0 < 1) {
+        av_log(avctx, AV_LOG_ERROR, "Driver does not support any "
+               "reference frames.\n");
+        return AVERROR(EINVAL);
+    } else if (ref_l1 < 1 || avctx->max_b_frames < 1) {
+        av_log(avctx, AV_LOG_VERBOSE, "Using intra and P-frames "
+               "(supported references: %d / %d).\n", ref_l0, ref_l1);
+        ctx->gop_size = avctx->gop_size;
+        ctx->p_per_i  = INT_MAX;
+        ctx->b_per_p  = 0;
+    } else {
+        av_log(avctx, AV_LOG_VERBOSE, "Using intra, P- and B-frames "
+               "(supported references: %d / %d).\n", ref_l0, ref_l1);
+        ctx->gop_size = avctx->gop_size;
+        ctx->p_per_i  = INT_MAX;
+        ctx->b_per_p  = avctx->max_b_frames;
+    }
+
+    return 0;
+}
+
+static av_cold int vaapi_encode_init_slice_structure(AVCodecContext *avctx)
+{
+    VAAPIEncodeContext *ctx = avctx->priv_data;
+    VAConfigAttrib attr[2] = { { VAConfigAttribEncMaxSlices },
+                               { VAConfigAttribEncSliceStructure } };
+    VAStatus vas;
+    uint32_t max_slices, slice_structure;
+    int req_slices;
+
+    if (!(ctx->codec->flags & FLAG_SLICE_CONTROL)) {
+        if (avctx->slices > 0) {
+            av_log(avctx, AV_LOG_WARNING, "Multiple slices were requested "
+                   "but this codec does not support controlling slices.\n");
+        }
+        return 0;
+    }
+
+    ctx->slice_block_rows = (avctx->height + ctx->slice_block_height - 1) /
+                             ctx->slice_block_height;
+    ctx->slice_block_cols = (avctx->width  + ctx->slice_block_width  - 1) /
+                             ctx->slice_block_width;
+
+    if (avctx->slices <= 1) {
+        ctx->nb_slices  = 1;
+        ctx->slice_size = ctx->slice_block_rows;
+        return 0;
+    }
+
+    vas = vaGetConfigAttributes(ctx->hwctx->display,
+                                ctx->va_profile,
+                                ctx->va_entrypoint,
+                                attr, FF_ARRAY_ELEMS(attr));
+    if (vas != VA_STATUS_SUCCESS) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to query slice "
+               "attributes: %d (%s).\n", vas, vaErrorStr(vas));
+        return AVERROR_EXTERNAL;
+    }
+    max_slices      = attr[0].value;
+    slice_structure = attr[1].value;
+    if (max_slices      == VA_ATTRIB_NOT_SUPPORTED ||
+        slice_structure == VA_ATTRIB_NOT_SUPPORTED) {
+        av_log(avctx, AV_LOG_ERROR, "Driver does not support encoding "
+               "pictures as multiple slices.\n.");
+        return AVERROR(EINVAL);
+    }
+
+    // For fixed-size slices currently we only support whole rows, making
+    // rectangular slices.  This could be extended to arbitrary runs of
+    // blocks, but since slices tend to be a conformance requirement and
+    // most cases (such as broadcast or bluray) want rectangular slices
+    // only it would need to be gated behind another option.
+    if (avctx->slices > ctx->slice_block_rows) {
+        av_log(avctx, AV_LOG_WARNING, "Not enough rows to use "
+               "configured number of slices (%d < %d); using "
+               "maximum.\n", ctx->slice_block_rows, avctx->slices);
+        req_slices = ctx->slice_block_rows;
+    } else {
+        req_slices = avctx->slices;
+    }
+    if (slice_structure & VA_ENC_SLICE_STRUCTURE_ARBITRARY_ROWS ||
+        slice_structure & VA_ENC_SLICE_STRUCTURE_ARBITRARY_MACROBLOCKS) {
+        ctx->nb_slices  = req_slices;
+        ctx->slice_size = ctx->slice_block_rows / ctx->nb_slices;
+    } else if (slice_structure & VA_ENC_SLICE_STRUCTURE_POWER_OF_TWO_ROWS) {
+        int k;
+        for (k = 1;; k *= 2) {
+            if (2 * k * (req_slices - 1) + 1 >= ctx->slice_block_rows)
+                break;
+        }
+        ctx->nb_slices  = (ctx->slice_block_rows + k - 1) / k;
+        ctx->slice_size = k;
+#if VA_CHECK_VERSION(1, 0, 0)
+    } else if (slice_structure & VA_ENC_SLICE_STRUCTURE_EQUAL_ROWS) {
+        ctx->nb_slices  = ctx->slice_block_rows;
+        ctx->slice_size = 1;
+#endif
+    } else {
+        av_log(avctx, AV_LOG_ERROR, "Driver does not support any usable "
+               "slice structure modes (%#x).\n", slice_structure);
+        return AVERROR(EINVAL);
+    }
+
+    if (ctx->nb_slices > avctx->slices) {
+        av_log(avctx, AV_LOG_WARNING, "Slice count rounded up to "
+               "%d (from %d) due to driver constraints on slice "
+               "structure.\n", ctx->nb_slices, avctx->slices);
+    }
+    if (ctx->nb_slices > max_slices) {
+        av_log(avctx, AV_LOG_ERROR, "Driver does not support "
+               "encoding with %d slices (max %"PRIu32").\n",
+               ctx->nb_slices, max_slices);
+        return AVERROR(EINVAL);
+    }
+
+    av_log(avctx, AV_LOG_VERBOSE, "Encoding pictures with %d slices "
+           "(default size %d block rows).\n",
+           ctx->nb_slices, ctx->slice_size);
+    return 0;
+}
+
+static av_cold int vaapi_encode_init_packed_headers(AVCodecContext *avctx)
+{
+    VAAPIEncodeContext *ctx = avctx->priv_data;
+    VAStatus vas;
+    VAConfigAttrib attr = { VAConfigAttribEncPackedHeaders };
+
+    vas = vaGetConfigAttributes(ctx->hwctx->display,
+                                ctx->va_profile,
+                                ctx->va_entrypoint,
+                                &attr, 1);
+    if (vas != VA_STATUS_SUCCESS) {
+        av_log(avctx, AV_LOG_ERROR, "Failed to query packed headers "
+               "attribute: %d (%s).\n", vas, vaErrorStr(vas));
+        return AVERROR_EXTERNAL;
+    }
+
+    if (attr.value == VA_ATTRIB_NOT_SUPPORTED) {
+        if (ctx->desired_packed_headers) {
+            av_log(avctx, AV_LOG_WARNING, "Driver does not support any "
+                   "packed headers (wanted %#x).\n",
+                   ctx->desired_packed_headers);
+        } else {
+            av_log(avctx, AV_LOG_VERBOSE, "Driver does not support any "
+                   "packed headers (none wanted).\n");
+        }
+        ctx->va_packed_headers = 0;
+    } else {
+        if (ctx->desired_packed_headers & ~attr.value) {
+            av_log(avctx, AV_LOG_WARNING, "Driver does not support some "
+                   "wanted packed headers (wanted %#x, found %#x).\n",
+                   ctx->desired_packed_headers, attr.value);
+        } else {
+            av_log(avctx, AV_LOG_VERBOSE, "All wanted packed headers "
+                   "available (wanted %#x, found %#x).\n",
+                   ctx->desired_packed_headers, attr.value);
+        }
+        ctx->va_packed_headers = ctx->desired_packed_headers & attr.value;
+    }
+
+    if (ctx->va_packed_headers) {
+        ctx->config_attributes[ctx->nb_config_attributes++] =
+            (VAConfigAttrib) {
+            .type  = VAConfigAttribEncPackedHeaders,
+            .value = ctx->va_packed_headers,
+        };
+    }
+
+    if ( (ctx->desired_packed_headers & VA_ENC_PACKED_HEADER_SEQUENCE) &&
+        !(ctx->va_packed_headers      & VA_ENC_PACKED_HEADER_SEQUENCE) &&
+         (avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER)) {
+        av_log(avctx, AV_LOG_WARNING, "Driver does not support packed "
+               "sequence headers, but a global header is requested.\n");
+        av_log(avctx, AV_LOG_WARNING, "No global header will be written: "
+               "this may result in a stream which is not usable for some "
+               "purposes (e.g. not muxable to some containers).\n");
+    }
+
+    return 0;
+}
+
 static av_cold int vaapi_encode_init_quality(AVCodecContext *avctx)
 {
 #if VA_CHECK_VERSION(0, 36, 0)
@@ -1640,7 +1820,7 @@ static av_cold int vaapi_encode_create_recon_frames(AVCodecContext *avctx)
     ctx->recon_frames->height    = ctx->surface_height;
     // At most three IDR/I/P frames and two runs of B frames can be in
     // flight at any one time.
-    ctx->recon_frames->initial_pool_size = 3 + 2 * avctx->max_b_frames;
+    ctx->recon_frames->initial_pool_size = 3 + 2 * ctx->b_per_p;
 
     err = av_hwframe_ctx_init(ctx->recon_frames_ref);
     if (err < 0) {
@@ -1695,7 +1875,15 @@ av_cold int ff_vaapi_encode_init(AVCodecContext *avctx)
     if (err < 0)
         goto fail;
 
-    err = vaapi_encode_config_attributes(avctx);
+    err = vaapi_encode_init_gop_structure(avctx);
+    if (err < 0)
+        goto fail;
+
+    err = vaapi_encode_init_slice_structure(avctx);
+    if (err < 0)
+        goto fail;
+
+    err = vaapi_encode_init_packed_headers(avctx);
     if (err < 0)
         goto fail;
 
@@ -1749,14 +1937,10 @@ av_cold int ff_vaapi_encode_init(AVCodecContext *avctx)
     }
 
     ctx->input_order  = 0;
-    ctx->output_delay = avctx->max_b_frames;
+    ctx->output_delay = ctx->b_per_p;
     ctx->decode_delay = 1;
     ctx->output_order = - ctx->output_delay - 1;
 
-    // Currently we never generate I frames, only IDR.
-    ctx->p_per_i = INT_MAX;
-    ctx->b_per_p = avctx->max_b_frames;
-
     if (ctx->codec->sequence_params_size > 0) {
         ctx->codec_sequence_params =
             av_mallocz(ctx->codec->sequence_params_size);
@@ -1788,7 +1972,8 @@ av_cold int ff_vaapi_encode_init(AVCodecContext *avctx)
     ctx->issue_mode = ISSUE_MODE_MAXIMISE_THROUGHPUT;
 
     if (ctx->va_packed_headers & VA_ENC_PACKED_HEADER_SEQUENCE &&
-        ctx->codec->write_sequence_header) {
+        ctx->codec->write_sequence_header &&
+        avctx->flags & AV_CODEC_FLAG_GLOBAL_HEADER) {
         char data[MAX_PARAM_BUFFER_SIZE];
         size_t bit_len = 8 * sizeof(data);