]> git.sesse.net Git - ffmpeg/blobdiff - libavfilter/vf_unsharp.c
avfilter: Constify all AVFilters
[ffmpeg] / libavfilter / vf_unsharp.c
index 41ccc56942b699b1c96fd04686bd09606405014f..0bf72169a9cf2bf170b024a808fe949fa2495e70 100644 (file)
 #include "libavutil/pixdesc.h"
 #include "unsharp.h"
 
-static void apply_unsharp(      uint8_t *dst, int dst_stride,
-                          const uint8_t *src, int src_stride,
-                          int width, int height, UnsharpFilterParam *fp)
-{
-    uint32_t **sc = fp->sc;
-    uint32_t sr[MAX_MATRIX_SIZE - 1], tmp1, tmp2;
-
-    int32_t res;
-    int x, y, z;
-    const uint8_t *src2 = NULL;  //silence a warning
-    const int amount = fp->amount;
-    const int steps_x = fp->steps_x;
-    const int steps_y = fp->steps_y;
-    const int scalebits = fp->scalebits;
-    const int32_t halfscale = fp->halfscale;
-
-    if (!amount) {
-        av_image_copy_plane(dst, dst_stride, src, src_stride, width, height);
-        return;
-    }
-
-    for (y = 0; y < 2 * steps_y; y++)
-        memset(sc[y], 0, sizeof(sc[y][0]) * (width + 2 * steps_x));
-
-    for (y = -steps_y; y < height + steps_y; y++) {
-        if (y < height)
-            src2 = src;
-
-        memset(sr, 0, sizeof(sr[0]) * (2 * steps_x - 1));
-        for (x = -steps_x; x < width + steps_x; x++) {
-            tmp1 = x <= 0 ? src2[0] : x >= width ? src2[width-1] : src2[x];
-            for (z = 0; z < steps_x * 2; z += 2) {
-                tmp2 = sr[z + 0] + tmp1; sr[z + 0] = tmp1;
-                tmp1 = sr[z + 1] + tmp2; sr[z + 1] = tmp2;
-            }
-            for (z = 0; z < steps_y * 2; z += 2) {
-                tmp2 = sc[z + 0][x + steps_x] + tmp1; sc[z + 0][x + steps_x] = tmp1;
-                tmp1 = sc[z + 1][x + steps_x] + tmp2; sc[z + 1][x + steps_x] = tmp2;
-            }
-            if (x >= steps_x && y >= steps_y) {
-                const uint8_t *srx = src - steps_y * src_stride + x - steps_x;
-                uint8_t *dsx       = dst - steps_y * dst_stride + x - steps_x;
-
-                res = (int32_t)*srx + ((((int32_t) * srx - (int32_t)((tmp1 + halfscale) >> scalebits)) * amount) >> 16);
-                *dsx = av_clip_uint8(res);
-            }
-        }
-        if (y >= 0) {
-            dst += dst_stride;
-            src += src_stride;
-        }
-    }
+typedef struct TheadData {
+    UnsharpFilterParam *fp;
+    uint8_t       *dst;
+    const uint8_t *src;
+    int dst_stride;
+    int src_stride;
+    int width;
+    int height;
+} ThreadData;
+
+#define DEF_UNSHARP_SLICE_FUNC(name, nbits)                                                           \
+static int name##_##nbits(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)                    \
+{                                                                                                     \
+    ThreadData *td = arg;                                                                             \
+    UnsharpFilterParam *fp = td->fp;                                                                  \
+    UnsharpContext *s = ctx->priv;                                                                    \
+    uint32_t **sc = fp->sc;                                                                           \
+    uint32_t *sr = fp->sr;                                                                            \
+    const uint##nbits##_t *src2 = NULL;                                                               \
+    const int amount = fp->amount;                                                                    \
+    const int steps_x = fp->steps_x;                                                                  \
+    const int steps_y = fp->steps_y;                                                                  \
+    const int scalebits = fp->scalebits;                                                              \
+    const int32_t halfscale = fp->halfscale;                                                          \
+                                                                                                      \
+    uint##nbits##_t *dst = (uint##nbits##_t*)td->dst;                                                 \
+    const uint##nbits##_t *src = (const uint##nbits##_t *)td->src;                                    \
+    int dst_stride = td->dst_stride;                                                                  \
+    int src_stride = td->src_stride;                                                                  \
+    const int width = td->width;                                                                      \
+    const int height = td->height;                                                                    \
+    const int sc_offset = jobnr * 2 * steps_y;                                                        \
+    const int sr_offset = jobnr * (MAX_MATRIX_SIZE - 1);                                              \
+    const int slice_start = (height * jobnr) / nb_jobs;                                               \
+    const int slice_end = (height * (jobnr+1)) / nb_jobs;                                             \
+                                                                                                      \
+    int32_t res;                                                                                      \
+    int x, y, z;                                                                                      \
+    uint32_t tmp1, tmp2;                                                                              \
+                                                                                                      \
+    if (!amount) {                                                                                    \
+        av_image_copy_plane(td->dst + slice_start * dst_stride, dst_stride,                           \
+                            td->src + slice_start * src_stride, src_stride,                           \
+                            width * s->bps, slice_end - slice_start);                                 \
+        return 0;                                                                                     \
+    }                                                                                                 \
+                                                                                                      \
+    for (y = 0; y < 2 * steps_y; y++)                                                                 \
+        memset(sc[sc_offset + y], 0, sizeof(sc[y][0]) * (width + 2 * steps_x));                       \
+                                                                                                      \
+    dst_stride = dst_stride / s->bps;                                                                 \
+    src_stride = src_stride / s->bps;                                                                 \
+    /* if this is not the first tile, we start from (slice_start - steps_y) */                        \
+    /* so we can get smooth result at slice boundary */                                               \
+    if (slice_start > steps_y) {                                                                      \
+        src += (slice_start - steps_y) * src_stride;                                                  \
+        dst += (slice_start - steps_y) * dst_stride;                                                  \
+    }                                                                                                 \
+                                                                                                      \
+    for (y = -steps_y + slice_start; y < steps_y + slice_end; y++) {                                  \
+        if (y < height)                                                                               \
+            src2 = src;                                                                               \
+                                                                                                      \
+        memset(sr + sr_offset, 0, sizeof(sr[0]) * (2 * steps_x - 1));                                 \
+        for (x = -steps_x; x < width + steps_x; x++) {                                                \
+            tmp1 = x <= 0 ? src2[0] : x >= width ? src2[width-1] : src2[x];                           \
+            for (z = 0; z < steps_x * 2; z += 2) {                                                    \
+                tmp2 = sr[sr_offset + z + 0] + tmp1; sr[sr_offset + z + 0] = tmp1;                    \
+                tmp1 = sr[sr_offset + z + 1] + tmp2; sr[sr_offset + z + 1] = tmp2;                    \
+            }                                                                                         \
+            for (z = 0; z < steps_y * 2; z += 2) {                                                    \
+                tmp2 = sc[sc_offset + z + 0][x + steps_x] + tmp1;                                     \
+                sc[sc_offset + z + 0][x + steps_x] = tmp1;                                            \
+                tmp1 = sc[sc_offset + z + 1][x + steps_x] + tmp2;                                     \
+                sc[sc_offset + z + 1][x + steps_x] = tmp2;                                            \
+            }                                                                                         \
+            if (x >= steps_x && y >= (steps_y + slice_start)) {                                       \
+                const uint##nbits##_t *srx = src - steps_y * src_stride + x - steps_x;                \
+                uint##nbits##_t *dsx       = dst - steps_y * dst_stride + x - steps_x;                \
+                                                                                                      \
+                res = (int32_t)*srx + ((((int32_t) * srx -                                            \
+                      (int32_t)((tmp1 + halfscale) >> scalebits)) * amount) >> (8+nbits));            \
+                *dsx = av_clip_uint##nbits(res);                                                      \
+            }                                                                                         \
+        }                                                                                             \
+        if (y >= 0) {                                                                                 \
+            dst += dst_stride;                                                                        \
+            src += src_stride;                                                                        \
+        }                                                                                             \
+    }                                                                                                 \
+    return 0;                                                                                         \
 }
+DEF_UNSHARP_SLICE_FUNC(unsharp_slice, 16)
+DEF_UNSHARP_SLICE_FUNC(unsharp_slice, 8)
 
 static int apply_unsharp_c(AVFilterContext *ctx, AVFrame *in, AVFrame *out)
 {
@@ -107,6 +148,8 @@ static int apply_unsharp_c(AVFilterContext *ctx, AVFrame *in, AVFrame *out)
     UnsharpContext *s = ctx->priv;
     int i, plane_w[3], plane_h[3];
     UnsharpFilterParam *fp[3];
+    ThreadData td;
+
     plane_w[0] = inlink->w;
     plane_w[1] = plane_w[2] = AV_CEIL_RSHIFT(inlink->w, s->hsub);
     plane_h[0] = inlink->h;
@@ -114,7 +157,14 @@ static int apply_unsharp_c(AVFilterContext *ctx, AVFrame *in, AVFrame *out)
     fp[0] = &s->luma;
     fp[1] = fp[2] = &s->chroma;
     for (i = 0; i < 3; i++) {
-        apply_unsharp(out->data[i], out->linesize[i], in->data[i], in->linesize[i], plane_w[i], plane_h[i], fp[i]);
+        td.fp = fp[i];
+        td.dst = out->data[i];
+        td.src = in->data[i];
+        td.width = plane_w[i];
+        td.height = plane_h[i];
+        td.dst_stride = out->linesize[i];
+        td.src_stride = in->linesize[i];
+        ctx->internal->execute(ctx, s->unsharp_slice, &td, NULL, FFMIN(plane_h[i], s->nb_threads));
     }
     return 0;
 }
@@ -151,6 +201,10 @@ static int query_formats(AVFilterContext *ctx)
     static const enum AVPixelFormat pix_fmts[] = {
         AV_PIX_FMT_YUV420P,  AV_PIX_FMT_YUV422P,  AV_PIX_FMT_YUV444P,  AV_PIX_FMT_YUV410P,
         AV_PIX_FMT_YUV411P,  AV_PIX_FMT_YUV440P,  AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ422P,
+        AV_PIX_FMT_YUV420P9, AV_PIX_FMT_YUV422P9, AV_PIX_FMT_YUV444P9,
+        AV_PIX_FMT_YUV420P10, AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10, AV_PIX_FMT_YUV440P10,
+        AV_PIX_FMT_YUV420P12, AV_PIX_FMT_YUV422P12, AV_PIX_FMT_YUV444P12, AV_PIX_FMT_YUV440P12,
+        AV_PIX_FMT_YUV420P16, AV_PIX_FMT_YUV422P16, AV_PIX_FMT_YUV444P16,
         AV_PIX_FMT_YUVJ444P, AV_PIX_FMT_YUVJ440P, AV_PIX_FMT_NONE
     };
 
@@ -163,6 +217,7 @@ static int query_formats(AVFilterContext *ctx)
 static int init_filter_param(AVFilterContext *ctx, UnsharpFilterParam *fp, const char *effect_type, int width)
 {
     int z;
+    UnsharpContext *s = ctx->priv;
     const char *effect = fp->amount == 0 ? "none" : fp->amount < 0 ? "blur" : "sharpen";
 
     if  (!(fp->msize_x & fp->msize_y & 1)) {
@@ -175,7 +230,12 @@ static int init_filter_param(AVFilterContext *ctx, UnsharpFilterParam *fp, const
     av_log(ctx, AV_LOG_VERBOSE, "effect:%s type:%s msize_x:%d msize_y:%d amount:%0.2f\n",
            effect, effect_type, fp->msize_x, fp->msize_y, fp->amount / 65535.0);
 
-    for (z = 0; z < 2 * fp->steps_y; z++)
+    fp->sr = av_malloc_array((MAX_MATRIX_SIZE - 1) * s->nb_threads, sizeof(uint32_t));
+    fp->sc = av_mallocz_array(2 * fp->steps_y * s->nb_threads, sizeof(uint32_t *));
+    if (!fp->sr || !fp->sc)
+        return AVERROR(ENOMEM);
+
+    for (z = 0; z < 2 * fp->steps_y * s->nb_threads; z++)
         if (!(fp->sc[z] = av_malloc_array(width + 2 * fp->steps_x,
                                           sizeof(*(fp->sc[z])))))
             return AVERROR(ENOMEM);
@@ -183,39 +243,51 @@ static int init_filter_param(AVFilterContext *ctx, UnsharpFilterParam *fp, const
     return 0;
 }
 
-static int config_props(AVFilterLink *link)
+static int config_input(AVFilterLink *inlink)
 {
-    UnsharpContext *s = link->dst->priv;
-    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(link->format);
+    UnsharpContext *s = inlink->dst->priv;
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
     int ret;
 
     s->hsub = desc->log2_chroma_w;
     s->vsub = desc->log2_chroma_h;
+    s->bitdepth = desc->comp[0].depth;
+    s->bps = s->bitdepth > 8 ? 2 : 1;
+    s->unsharp_slice = s->bitdepth > 8 ? unsharp_slice_16 : unsharp_slice_8;
 
-    ret = init_filter_param(link->dst, &s->luma,   "luma",   link->w);
+    // ensure (height / nb_threads) > 4 * steps_y,
+    // so that we don't have too much overlap between two threads
+    s->nb_threads = FFMIN(ff_filter_get_nb_threads(inlink->dst),
+                          inlink->h / (4 * s->luma.steps_y));
+
+    ret = init_filter_param(inlink->dst, &s->luma,   "luma",   inlink->w);
     if (ret < 0)
         return ret;
-    ret = init_filter_param(link->dst, &s->chroma, "chroma", AV_CEIL_RSHIFT(link->w, s->hsub));
+    ret = init_filter_param(inlink->dst, &s->chroma, "chroma", AV_CEIL_RSHIFT(inlink->w, s->hsub));
     if (ret < 0)
         return ret;
 
     return 0;
 }
 
-static void free_filter_param(UnsharpFilterParam *fp)
+static void free_filter_param(UnsharpFilterParam *fp, int nb_threads)
 {
     int z;
 
-    for (z = 0; z < 2 * fp->steps_y; z++)
-        av_freep(&fp->sc[z]);
+    if (fp->sc) {
+        for (z = 0; z < 2 * fp->steps_y * nb_threads; z++)
+            av_freep(&fp->sc[z]);
+        av_freep(&fp->sc);
+    }
+    av_freep(&fp->sr);
 }
 
 static av_cold void uninit(AVFilterContext *ctx)
 {
     UnsharpContext *s = ctx->priv;
 
-    free_filter_param(&s->luma);
-    free_filter_param(&s->chroma);
+    free_filter_param(&s->luma, s->nb_threads);
+    free_filter_param(&s->chroma, s->nb_threads);
 }
 
 static int filter_frame(AVFilterLink *link, AVFrame *in)
@@ -271,7 +343,7 @@ static const AVFilterPad avfilter_vf_unsharp_inputs[] = {
         .name         = "default",
         .type         = AVMEDIA_TYPE_VIDEO,
         .filter_frame = filter_frame,
-        .config_props = config_props,
+        .config_props = config_input,
     },
     { NULL }
 };
@@ -284,7 +356,7 @@ static const AVFilterPad avfilter_vf_unsharp_outputs[] = {
     { NULL }
 };
 
-AVFilter ff_vf_unsharp = {
+const AVFilter ff_vf_unsharp = {
     .name          = "unsharp",
     .description   = NULL_IF_CONFIG_SMALL("Sharpen or blur the input video."),
     .priv_size     = sizeof(UnsharpContext),
@@ -294,5 +366,5 @@ AVFilter ff_vf_unsharp = {
     .query_formats = query_formats,
     .inputs        = avfilter_vf_unsharp_inputs,
     .outputs       = avfilter_vf_unsharp_outputs,
-    .flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC,
+    .flags         = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC | AVFILTER_FLAG_SLICE_THREADS,
 };