X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=libavfilter%2Fvf_unsharp.c;h=0bf72169a9cf2bf170b024a808fe949fa2495e70;hb=a04ad248a05e7b613abe09b3bb067f555108d794;hp=41ccc56942b699b1c96fd04686bd09606405014f;hpb=f4cf6ba8c9646814af842a99335c6ee312ded299;p=ffmpeg diff --git a/libavfilter/vf_unsharp.c b/libavfilter/vf_unsharp.c index 41ccc56942b..0bf72169a9c 100644 --- a/libavfilter/vf_unsharp.c +++ b/libavfilter/vf_unsharp.c @@ -47,59 +47,100 @@ #include "libavutil/pixdesc.h" #include "unsharp.h" -static void apply_unsharp( uint8_t *dst, int dst_stride, - const uint8_t *src, int src_stride, - int width, int height, UnsharpFilterParam *fp) -{ - uint32_t **sc = fp->sc; - uint32_t sr[MAX_MATRIX_SIZE - 1], tmp1, tmp2; - - int32_t res; - int x, y, z; - const uint8_t *src2 = NULL; //silence a warning - const int amount = fp->amount; - const int steps_x = fp->steps_x; - const int steps_y = fp->steps_y; - const int scalebits = fp->scalebits; - const int32_t halfscale = fp->halfscale; - - if (!amount) { - av_image_copy_plane(dst, dst_stride, src, src_stride, width, height); - return; - } - - for (y = 0; y < 2 * steps_y; y++) - memset(sc[y], 0, sizeof(sc[y][0]) * (width + 2 * steps_x)); - - for (y = -steps_y; y < height + steps_y; y++) { - if (y < height) - src2 = src; - - memset(sr, 0, sizeof(sr[0]) * (2 * steps_x - 1)); - for (x = -steps_x; x < width + steps_x; x++) { - tmp1 = x <= 0 ? src2[0] : x >= width ? src2[width-1] : src2[x]; - for (z = 0; z < steps_x * 2; z += 2) { - tmp2 = sr[z + 0] + tmp1; sr[z + 0] = tmp1; - tmp1 = sr[z + 1] + tmp2; sr[z + 1] = tmp2; - } - for (z = 0; z < steps_y * 2; z += 2) { - tmp2 = sc[z + 0][x + steps_x] + tmp1; sc[z + 0][x + steps_x] = tmp1; - tmp1 = sc[z + 1][x + steps_x] + tmp2; sc[z + 1][x + steps_x] = tmp2; - } - if (x >= steps_x && y >= steps_y) { - const uint8_t *srx = src - steps_y * src_stride + x - steps_x; - uint8_t *dsx = dst - steps_y * dst_stride + x - steps_x; - - res = (int32_t)*srx + ((((int32_t) * srx - (int32_t)((tmp1 + halfscale) >> scalebits)) * amount) >> 16); - *dsx = av_clip_uint8(res); - } - } - if (y >= 0) { - dst += dst_stride; - src += src_stride; - } - } +typedef struct TheadData { + UnsharpFilterParam *fp; + uint8_t *dst; + const uint8_t *src; + int dst_stride; + int src_stride; + int width; + int height; +} ThreadData; + +#define DEF_UNSHARP_SLICE_FUNC(name, nbits) \ +static int name##_##nbits(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) \ +{ \ + ThreadData *td = arg; \ + UnsharpFilterParam *fp = td->fp; \ + UnsharpContext *s = ctx->priv; \ + uint32_t **sc = fp->sc; \ + uint32_t *sr = fp->sr; \ + const uint##nbits##_t *src2 = NULL; \ + const int amount = fp->amount; \ + const int steps_x = fp->steps_x; \ + const int steps_y = fp->steps_y; \ + const int scalebits = fp->scalebits; \ + const int32_t halfscale = fp->halfscale; \ + \ + uint##nbits##_t *dst = (uint##nbits##_t*)td->dst; \ + const uint##nbits##_t *src = (const uint##nbits##_t *)td->src; \ + int dst_stride = td->dst_stride; \ + int src_stride = td->src_stride; \ + const int width = td->width; \ + const int height = td->height; \ + const int sc_offset = jobnr * 2 * steps_y; \ + const int sr_offset = jobnr * (MAX_MATRIX_SIZE - 1); \ + const int slice_start = (height * jobnr) / nb_jobs; \ + const int slice_end = (height * (jobnr+1)) / nb_jobs; \ + \ + int32_t res; \ + int x, y, z; \ + uint32_t tmp1, tmp2; \ + \ + if (!amount) { \ + av_image_copy_plane(td->dst + slice_start * dst_stride, dst_stride, \ + td->src + slice_start * src_stride, src_stride, \ + width * s->bps, slice_end - slice_start); \ + return 0; \ + } \ + \ + for (y = 0; y < 2 * steps_y; y++) \ + memset(sc[sc_offset + y], 0, sizeof(sc[y][0]) * (width + 2 * steps_x)); \ + \ + dst_stride = dst_stride / s->bps; \ + src_stride = src_stride / s->bps; \ + /* if this is not the first tile, we start from (slice_start - steps_y) */ \ + /* so we can get smooth result at slice boundary */ \ + if (slice_start > steps_y) { \ + src += (slice_start - steps_y) * src_stride; \ + dst += (slice_start - steps_y) * dst_stride; \ + } \ + \ + for (y = -steps_y + slice_start; y < steps_y + slice_end; y++) { \ + if (y < height) \ + src2 = src; \ + \ + memset(sr + sr_offset, 0, sizeof(sr[0]) * (2 * steps_x - 1)); \ + for (x = -steps_x; x < width + steps_x; x++) { \ + tmp1 = x <= 0 ? src2[0] : x >= width ? src2[width-1] : src2[x]; \ + for (z = 0; z < steps_x * 2; z += 2) { \ + tmp2 = sr[sr_offset + z + 0] + tmp1; sr[sr_offset + z + 0] = tmp1; \ + tmp1 = sr[sr_offset + z + 1] + tmp2; sr[sr_offset + z + 1] = tmp2; \ + } \ + for (z = 0; z < steps_y * 2; z += 2) { \ + tmp2 = sc[sc_offset + z + 0][x + steps_x] + tmp1; \ + sc[sc_offset + z + 0][x + steps_x] = tmp1; \ + tmp1 = sc[sc_offset + z + 1][x + steps_x] + tmp2; \ + sc[sc_offset + z + 1][x + steps_x] = tmp2; \ + } \ + if (x >= steps_x && y >= (steps_y + slice_start)) { \ + const uint##nbits##_t *srx = src - steps_y * src_stride + x - steps_x; \ + uint##nbits##_t *dsx = dst - steps_y * dst_stride + x - steps_x; \ + \ + res = (int32_t)*srx + ((((int32_t) * srx - \ + (int32_t)((tmp1 + halfscale) >> scalebits)) * amount) >> (8+nbits)); \ + *dsx = av_clip_uint##nbits(res); \ + } \ + } \ + if (y >= 0) { \ + dst += dst_stride; \ + src += src_stride; \ + } \ + } \ + return 0; \ } +DEF_UNSHARP_SLICE_FUNC(unsharp_slice, 16) +DEF_UNSHARP_SLICE_FUNC(unsharp_slice, 8) static int apply_unsharp_c(AVFilterContext *ctx, AVFrame *in, AVFrame *out) { @@ -107,6 +148,8 @@ static int apply_unsharp_c(AVFilterContext *ctx, AVFrame *in, AVFrame *out) UnsharpContext *s = ctx->priv; int i, plane_w[3], plane_h[3]; UnsharpFilterParam *fp[3]; + ThreadData td; + plane_w[0] = inlink->w; plane_w[1] = plane_w[2] = AV_CEIL_RSHIFT(inlink->w, s->hsub); plane_h[0] = inlink->h; @@ -114,7 +157,14 @@ static int apply_unsharp_c(AVFilterContext *ctx, AVFrame *in, AVFrame *out) fp[0] = &s->luma; fp[1] = fp[2] = &s->chroma; for (i = 0; i < 3; i++) { - apply_unsharp(out->data[i], out->linesize[i], in->data[i], in->linesize[i], plane_w[i], plane_h[i], fp[i]); + td.fp = fp[i]; + td.dst = out->data[i]; + td.src = in->data[i]; + td.width = plane_w[i]; + td.height = plane_h[i]; + td.dst_stride = out->linesize[i]; + td.src_stride = in->linesize[i]; + ctx->internal->execute(ctx, s->unsharp_slice, &td, NULL, FFMIN(plane_h[i], s->nb_threads)); } return 0; } @@ -151,6 +201,10 @@ static int query_formats(AVFilterContext *ctx) static const enum AVPixelFormat pix_fmts[] = { AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUV411P, AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ422P, + AV_PIX_FMT_YUV420P9, AV_PIX_FMT_YUV422P9, AV_PIX_FMT_YUV444P9, + AV_PIX_FMT_YUV420P10, AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10, AV_PIX_FMT_YUV440P10, + AV_PIX_FMT_YUV420P12, AV_PIX_FMT_YUV422P12, AV_PIX_FMT_YUV444P12, AV_PIX_FMT_YUV440P12, + AV_PIX_FMT_YUV420P16, AV_PIX_FMT_YUV422P16, AV_PIX_FMT_YUV444P16, AV_PIX_FMT_YUVJ444P, AV_PIX_FMT_YUVJ440P, AV_PIX_FMT_NONE }; @@ -163,6 +217,7 @@ static int query_formats(AVFilterContext *ctx) static int init_filter_param(AVFilterContext *ctx, UnsharpFilterParam *fp, const char *effect_type, int width) { int z; + UnsharpContext *s = ctx->priv; const char *effect = fp->amount == 0 ? "none" : fp->amount < 0 ? "blur" : "sharpen"; if (!(fp->msize_x & fp->msize_y & 1)) { @@ -175,7 +230,12 @@ static int init_filter_param(AVFilterContext *ctx, UnsharpFilterParam *fp, const av_log(ctx, AV_LOG_VERBOSE, "effect:%s type:%s msize_x:%d msize_y:%d amount:%0.2f\n", effect, effect_type, fp->msize_x, fp->msize_y, fp->amount / 65535.0); - for (z = 0; z < 2 * fp->steps_y; z++) + fp->sr = av_malloc_array((MAX_MATRIX_SIZE - 1) * s->nb_threads, sizeof(uint32_t)); + fp->sc = av_mallocz_array(2 * fp->steps_y * s->nb_threads, sizeof(uint32_t *)); + if (!fp->sr || !fp->sc) + return AVERROR(ENOMEM); + + for (z = 0; z < 2 * fp->steps_y * s->nb_threads; z++) if (!(fp->sc[z] = av_malloc_array(width + 2 * fp->steps_x, sizeof(*(fp->sc[z]))))) return AVERROR(ENOMEM); @@ -183,39 +243,51 @@ static int init_filter_param(AVFilterContext *ctx, UnsharpFilterParam *fp, const return 0; } -static int config_props(AVFilterLink *link) +static int config_input(AVFilterLink *inlink) { - UnsharpContext *s = link->dst->priv; - const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(link->format); + UnsharpContext *s = inlink->dst->priv; + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format); int ret; s->hsub = desc->log2_chroma_w; s->vsub = desc->log2_chroma_h; + s->bitdepth = desc->comp[0].depth; + s->bps = s->bitdepth > 8 ? 2 : 1; + s->unsharp_slice = s->bitdepth > 8 ? unsharp_slice_16 : unsharp_slice_8; - ret = init_filter_param(link->dst, &s->luma, "luma", link->w); + // ensure (height / nb_threads) > 4 * steps_y, + // so that we don't have too much overlap between two threads + s->nb_threads = FFMIN(ff_filter_get_nb_threads(inlink->dst), + inlink->h / (4 * s->luma.steps_y)); + + ret = init_filter_param(inlink->dst, &s->luma, "luma", inlink->w); if (ret < 0) return ret; - ret = init_filter_param(link->dst, &s->chroma, "chroma", AV_CEIL_RSHIFT(link->w, s->hsub)); + ret = init_filter_param(inlink->dst, &s->chroma, "chroma", AV_CEIL_RSHIFT(inlink->w, s->hsub)); if (ret < 0) return ret; return 0; } -static void free_filter_param(UnsharpFilterParam *fp) +static void free_filter_param(UnsharpFilterParam *fp, int nb_threads) { int z; - for (z = 0; z < 2 * fp->steps_y; z++) - av_freep(&fp->sc[z]); + if (fp->sc) { + for (z = 0; z < 2 * fp->steps_y * nb_threads; z++) + av_freep(&fp->sc[z]); + av_freep(&fp->sc); + } + av_freep(&fp->sr); } static av_cold void uninit(AVFilterContext *ctx) { UnsharpContext *s = ctx->priv; - free_filter_param(&s->luma); - free_filter_param(&s->chroma); + free_filter_param(&s->luma, s->nb_threads); + free_filter_param(&s->chroma, s->nb_threads); } static int filter_frame(AVFilterLink *link, AVFrame *in) @@ -271,7 +343,7 @@ static const AVFilterPad avfilter_vf_unsharp_inputs[] = { .name = "default", .type = AVMEDIA_TYPE_VIDEO, .filter_frame = filter_frame, - .config_props = config_props, + .config_props = config_input, }, { NULL } }; @@ -284,7 +356,7 @@ static const AVFilterPad avfilter_vf_unsharp_outputs[] = { { NULL } }; -AVFilter ff_vf_unsharp = { +const AVFilter ff_vf_unsharp = { .name = "unsharp", .description = NULL_IF_CONFIG_SMALL("Sharpen or blur the input video."), .priv_size = sizeof(UnsharpContext), @@ -294,5 +366,5 @@ AVFilter ff_vf_unsharp = { .query_formats = query_formats, .inputs = avfilter_vf_unsharp_inputs, .outputs = avfilter_vf_unsharp_outputs, - .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC, + .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC | AVFILTER_FLAG_SLICE_THREADS, };