X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=libavfilter%2Fvf_neighbor.c;h=2db1e5e57c3cacc3a1a4d842316697f01b104829;hb=1b98bfb932ad36667ea7f18e24c54978623f6654;hp=de4a12f04830d6f7ad96a3158dde8683aca2702b;hpb=67e8f476b7d3c21686a2d453d052818ac92688b3;p=ffmpeg diff --git a/libavfilter/vf_neighbor.c b/libavfilter/vf_neighbor.c index de4a12f0483..2db1e5e57c3 100644 --- a/libavfilter/vf_neighbor.c +++ b/libavfilter/vf_neighbor.c @@ -20,6 +20,7 @@ */ #include "libavutil/imgutils.h" +#include "libavutil/intreadwrite.h" #include "libavutil/pixdesc.h" #include "libavutil/opt.h" #include "avfilter.h" @@ -27,6 +28,10 @@ #include "internal.h" #include "video.h" +typedef struct ThreadData { + AVFrame *in, *out; +} ThreadData; + typedef struct NContext { const AVClass *class; int planeheight[4]; @@ -34,7 +39,9 @@ typedef struct NContext { int nb_planes; int threshold[4]; int coordinates; - uint8_t *buffer; + + int depth; + int bpc; void (*filter)(uint8_t *dst, const uint8_t *p1, int width, int threshold, const uint8_t *coordinates[], int coord); @@ -43,46 +50,62 @@ typedef struct NContext { static int query_formats(AVFilterContext *ctx) { static const enum AVPixelFormat pix_fmts[] = { - AV_PIX_FMT_YUVA444P, AV_PIX_FMT_YUVA422P, AV_PIX_FMT_YUVA420P, - AV_PIX_FMT_YUVJ444P, AV_PIX_FMT_YUVJ440P, AV_PIX_FMT_YUVJ422P,AV_PIX_FMT_YUVJ420P, AV_PIX_FMT_YUVJ411P, - AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV440P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV411P, AV_PIX_FMT_YUV410P, - AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRAP, AV_PIX_FMT_GRAY8, AV_PIX_FMT_NONE + AV_PIX_FMT_YUVA444P, AV_PIX_FMT_YUV444P, AV_PIX_FMT_YUV440P, + AV_PIX_FMT_YUVJ444P, AV_PIX_FMT_YUVJ440P, + AV_PIX_FMT_YUVA422P, AV_PIX_FMT_YUV422P, AV_PIX_FMT_YUVA420P, AV_PIX_FMT_YUV420P, + AV_PIX_FMT_YUVJ422P, AV_PIX_FMT_YUVJ420P, + AV_PIX_FMT_YUVJ411P, AV_PIX_FMT_YUV411P, AV_PIX_FMT_YUV410P, + AV_PIX_FMT_YUV420P9, AV_PIX_FMT_YUV422P9, AV_PIX_FMT_YUV444P9, + AV_PIX_FMT_YUV420P10, AV_PIX_FMT_YUV422P10, AV_PIX_FMT_YUV444P10, + AV_PIX_FMT_YUV420P12, AV_PIX_FMT_YUV422P12, AV_PIX_FMT_YUV444P12, AV_PIX_FMT_YUV440P12, + AV_PIX_FMT_YUV420P14, AV_PIX_FMT_YUV422P14, AV_PIX_FMT_YUV444P14, + AV_PIX_FMT_YUV420P16, AV_PIX_FMT_YUV422P16, AV_PIX_FMT_YUV444P16, + AV_PIX_FMT_YUVA420P9, AV_PIX_FMT_YUVA422P9, AV_PIX_FMT_YUVA444P9, + AV_PIX_FMT_YUVA420P10, AV_PIX_FMT_YUVA422P10, AV_PIX_FMT_YUVA444P10, + AV_PIX_FMT_YUVA420P16, AV_PIX_FMT_YUVA422P16, AV_PIX_FMT_YUVA444P16, + AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP9, AV_PIX_FMT_GBRP10, + AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRP14, AV_PIX_FMT_GBRP16, + AV_PIX_FMT_GBRAP, AV_PIX_FMT_GBRAP10, AV_PIX_FMT_GBRAP12, AV_PIX_FMT_GBRAP16, + AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAY9, AV_PIX_FMT_GRAY10, AV_PIX_FMT_GRAY12, AV_PIX_FMT_GRAY14, AV_PIX_FMT_GRAY16, + AV_PIX_FMT_NONE }; return ff_set_common_formats(ctx, ff_make_format_list(pix_fmts)); } -static av_cold void uninit(AVFilterContext *ctx) +static void erosion(uint8_t *dst, const uint8_t *p1, int width, + int threshold, const uint8_t *coordinates[], int coord) { - NContext *s = ctx->priv; - - av_freep(&s->buffer); -} + int x, i; -static inline void line_copy8(uint8_t *line, const uint8_t *srcp, int width, int mergin) -{ - int i; + for (x = 0; x < width; x++) { + int min = p1[x]; + int limit = FFMAX(min - threshold, 0); - memcpy(line, srcp, width); + for (i = 0; i < 8; i++) { + if (coord & (1 << i)) { + min = FFMIN(min, *(coordinates[i] + x)); + } + min = FFMAX(min, limit); + } - for (i = mergin; i > 0; i--) { - line[-i] = line[i]; - line[width - 1 + i] = line[width - 1 - i]; + dst[x] = min; } } -static void erosion(uint8_t *dst, const uint8_t *p1, int width, - int threshold, const uint8_t *coordinates[], int coord) +static void erosion16(uint8_t *dstp, const uint8_t *p1, int width, + int threshold, const uint8_t *coordinates[], int coord) { + uint16_t *dst = (uint16_t *)dstp; int x, i; for (x = 0; x < width; x++) { - int min = p1[x]; + int min = AV_RN16A(&p1[2 * x]); int limit = FFMAX(min - threshold, 0); for (i = 0; i < 8; i++) { if (coord & (1 << i)) { - min = FFMIN(min, *(coordinates[i] + x)); + min = FFMIN(min, AV_RN16A(coordinates[i] + x * 2)); } min = FFMAX(min, limit); } @@ -111,6 +134,27 @@ static void dilation(uint8_t *dst, const uint8_t *p1, int width, } } +static void dilation16(uint8_t *dstp, const uint8_t *p1, int width, + int threshold, const uint8_t *coordinates[], int coord) +{ + uint16_t *dst = (uint16_t *)dstp; + int x, i; + + for (x = 0; x < width; x++) { + int max = AV_RN16A(&p1[x * 2]); + int limit = FFMIN(max + threshold, 255); + + for (i = 0; i < 8; i++) { + if (coord & (1 << i)) { + max = FFMAX(max, AV_RN16A(coordinates[i] + x * 2)); + } + max = FFMIN(max, limit); + } + + dst[x] = max; + } +} + static void deflate(uint8_t *dst, const uint8_t *p1, int width, int threshold, const uint8_t *coordinates[], int coord) { @@ -126,6 +170,22 @@ static void deflate(uint8_t *dst, const uint8_t *p1, int width, } } +static void deflate16(uint8_t *dstp, const uint8_t *p1, int width, + int threshold, const uint8_t *coordinates[], int coord) +{ + uint16_t *dst = (uint16_t *)dstp; + int x, i; + + for (x = 0; x < width; x++) { + int sum = 0; + int limit = FFMAX(AV_RN16A(&p1[2 * x]) - threshold, 0); + + for (i = 0; i < 8; sum += AV_RN16A(coordinates[i++] + x * 2)); + + dst[x] = FFMAX(FFMIN(sum / 8, p1[x]), limit); + } +} + static void inflate(uint8_t *dst, const uint8_t *p1, int width, int threshold, const uint8_t *coordinates[], int coord) { @@ -141,32 +201,98 @@ static void inflate(uint8_t *dst, const uint8_t *p1, int width, } } +static void inflate16(uint8_t *dstp, const uint8_t *p1, int width, + int threshold, const uint8_t *coordinates[], int coord) +{ + uint16_t *dst = (uint16_t *)dstp; + int x, i; + + for (x = 0; x < width; x++) { + int sum = 0; + int limit = FFMIN(AV_RN16A(&p1[2 * x]) + threshold, 255); + + for (i = 0; i < 8; sum += AV_RN16A(coordinates[i++] + x * 2)); + + dst[x] = FFMIN(FFMAX(sum / 8, p1[x]), limit); + } +} + static int config_input(AVFilterLink *inlink) { AVFilterContext *ctx = inlink->dst; NContext *s = ctx->priv; const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format); - int ret; - if ((ret = av_image_fill_linesizes(s->planewidth, inlink->format, inlink->w)) < 0) - return ret; + s->depth = desc->comp[0].depth; + s->bpc = (s->depth + 7) / 8; + s->planewidth[1] = s->planewidth[2] = AV_CEIL_RSHIFT(inlink->w, desc->log2_chroma_w); + s->planewidth[0] = s->planewidth[3] = inlink->w; s->planeheight[1] = s->planeheight[2] = AV_CEIL_RSHIFT(inlink->h, desc->log2_chroma_h); s->planeheight[0] = s->planeheight[3] = inlink->h; s->nb_planes = av_pix_fmt_count_planes(inlink->format); - s->buffer = av_malloc(3 * (s->planewidth[0] + 32)); - if (!s->buffer) - return AVERROR(ENOMEM); if (!strcmp(ctx->filter->name, "erosion")) - s->filter = erosion; + s->filter = s->depth > 8 ? erosion16 : erosion; else if (!strcmp(ctx->filter->name, "dilation")) - s->filter = dilation; + s->filter = s->depth > 8 ? dilation16 : dilation; else if (!strcmp(ctx->filter->name, "deflate")) - s->filter = deflate; + s->filter = s->depth > 8 ? deflate16 : deflate; else if (!strcmp(ctx->filter->name, "inflate")) - s->filter = inflate; + s->filter = s->depth > 8 ? inflate16 : inflate; + + return 0; +} + +static int filter_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) +{ + NContext *s = ctx->priv; + ThreadData *td = arg; + AVFrame *out = td->out; + AVFrame *in = td->in; + int plane, y; + + for (plane = 0; plane < s->nb_planes; plane++) { + const int bpc = s->bpc; + const int threshold = s->threshold[plane]; + const int stride = in->linesize[plane]; + const int dstride = out->linesize[plane]; + const int height = s->planeheight[plane]; + const int width = s->planewidth[plane]; + const int slice_start = (height * jobnr) / nb_jobs; + const int slice_end = (height * (jobnr+1)) / nb_jobs; + const uint8_t *src = (const uint8_t *)in->data[plane] + slice_start * stride; + uint8_t *dst = out->data[plane] + slice_start * dstride; + + if (!threshold) { + av_image_copy_plane(dst, dstride, src, stride, width * bpc, slice_end - slice_start); + continue; + } + + for (y = slice_start; y < slice_end; y++) { + const int nh = y > 0; + const int ph = y < height - 1; + const uint8_t *coordinates[] = { src - nh * stride, src + 1 * bpc - nh * stride, src + 2 * bpc - nh * stride, + src, src + 2 * bpc, + src + ph * stride, src + 1 * bpc + ph * stride, src + 2 * bpc + ph * stride}; + + const uint8_t *coordinateslb[] = { src + 1 * bpc - nh * stride, src - nh * stride, src + 1 * bpc - nh * stride, + src + 1 * bpc, src + 1 * bpc, + src + 1 * bpc + ph * stride, src + ph * stride, src + 1 * bpc + ph * stride}; + + const uint8_t *coordinatesrb[] = { src + (width - 2) * bpc - nh * stride, src + (width - 1) * bpc - nh * stride, src + (width - 2) * bpc - nh * stride, + src + (width - 2) * bpc, src + (width - 2) * bpc, + src + (width - 2) * bpc + ph * stride, src + (width - 1) * bpc + ph * stride, src + (width - 2) * bpc + ph * stride}; + + s->filter(dst, src, 1, threshold, coordinateslb, s->coordinates); + s->filter(dst + 1 * bpc, src + 1 * bpc, width - 2, threshold, coordinates, s->coordinates); + s->filter(dst + (width - 1) * bpc, src + (width - 1) * bpc, 1, threshold, coordinatesrb, s->coordinates); + + src += stride; + dst += dstride; + } + } return 0; } @@ -176,8 +302,8 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in) AVFilterContext *ctx = inlink->dst; AVFilterLink *outlink = ctx->outputs[0]; NContext *s = ctx->priv; + ThreadData td; AVFrame *out; - int plane, y; out = ff_get_video_buffer(outlink, outlink->w, outlink->h); if (!out) { @@ -186,43 +312,9 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in) } av_frame_copy_props(out, in); - for (plane = 0; plane < s->nb_planes; plane++) { - const int threshold = s->threshold[plane]; - - if (threshold) { - const uint8_t *src = in->data[plane]; - uint8_t *dst = out->data[plane]; - int stride = in->linesize[plane]; - int height = s->planeheight[plane]; - int width = s->planewidth[plane]; - uint8_t *p0 = s->buffer + 16; - uint8_t *p1 = p0 + s->planewidth[0]; - uint8_t *p2 = p1 + s->planewidth[0]; - uint8_t *orig = p0, *end = p2; - - line_copy8(p0, src + stride, width, 1); - line_copy8(p1, src, width, 1); - - for (y = 0; y < height; y++) { - const uint8_t *coordinates[] = { p0 - 1, p0, p0 + 1, - p1 - 1, p1 + 1, - p2 - 1, p2, p2 + 1}; - src += stride * (y < height - 1 ? 1 : -1); - line_copy8(p2, src, width, 1); - - s->filter(dst, p1, width, threshold, coordinates, s->coordinates); - - p0 = p1; - p1 = p2; - p2 = (p2 == end) ? orig: p2 + s->planewidth[0]; - dst += out->linesize[plane]; - } - } else { - av_image_copy_plane(out->data[plane], out->linesize[plane], - in->data[plane], in->linesize[plane], - s->planewidth[plane], s->planeheight[plane]); - } - } + td.in = in; + td.out = out; + ctx->internal->execute(ctx, filter_slice, &td, NULL, FFMIN(s->planeheight[1], ff_filter_get_nb_threads(ctx))); av_frame_free(&in); return ff_filter_frame(outlink, out); @@ -257,11 +349,11 @@ AVFilter ff_vf_##name_ = { \ .description = NULL_IF_CONFIG_SMALL(description_), \ .priv_size = sizeof(NContext), \ .priv_class = &name_##_class, \ - .uninit = uninit, \ .query_formats = query_formats, \ .inputs = neighbor_inputs, \ .outputs = neighbor_outputs, \ - .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC, \ + .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_GENERIC| \ + AVFILTER_FLAG_SLICE_THREADS, \ } #if CONFIG_EROSION_FILTER