X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=libavfilter%2Fvf_convolution.c;h=96d40b40f1edf8eab23230565c016d8d1b3df74b;hb=fe6c4f0c47d4390bead6e226cb12b45584b76301;hp=ce09e338cc5b1456b5a52b7962aed1a3896758b8;hpb=6f277e1f76121736baf98aedb2bf55d8bb413fa7;p=ffmpeg diff --git a/libavfilter/vf_convolution.c b/libavfilter/vf_convolution.c index ce09e338cc5..96d40b40f1e 100644 --- a/libavfilter/vf_convolution.c +++ b/libavfilter/vf_convolution.c @@ -21,6 +21,7 @@ #include "libavutil/avstring.h" #include "libavutil/imgutils.h" +#include "libavutil/intreadwrite.h" #include "libavutil/opt.h" #include "libavutil/pixdesc.h" #include "avfilter.h" @@ -28,22 +29,28 @@ #include "internal.h" #include "video.h" +enum MatrixMode { + MATRIX_SQUARE, + MATRIX_ROW, + MATRIX_COLUMN, + MATRIX_NBMODES, +}; + typedef struct ConvolutionContext { const AVClass *class; char *matrix_str[4]; float rdiv[4]; float bias[4]; + int mode[4]; float scale; float delta; int planes; int size[4]; int depth; + int max; int bpc; - int bstride; - uint8_t *buffer; - uint8_t **bptrs; int nb_planes; int nb_threads; int planewidth[4]; @@ -52,7 +59,12 @@ typedef struct ConvolutionContext { int matrix_length[4]; int copy[4]; - int (*filter[4])(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs); + void (*setup[4])(int radius, const uint8_t *c[], const uint8_t *src, int stride, + int x, int width, int y, int height, int bpc); + void (*filter[4])(uint8_t *dst, int width, + float rdiv, float bias, const int *const matrix, + const uint8_t *c[], int peak, int radius, + int dstride, int stride); } ConvolutionContext; #define OFFSET(x) offsetof(ConvolutionContext, x) @@ -63,14 +75,21 @@ static const AVOption convolution_options[] = { { "1m", "set matrix for 2nd plane", OFFSET(matrix_str[1]), AV_OPT_TYPE_STRING, {.str="0 0 0 0 1 0 0 0 0"}, 0, 0, FLAGS }, { "2m", "set matrix for 3rd plane", OFFSET(matrix_str[2]), AV_OPT_TYPE_STRING, {.str="0 0 0 0 1 0 0 0 0"}, 0, 0, FLAGS }, { "3m", "set matrix for 4th plane", OFFSET(matrix_str[3]), AV_OPT_TYPE_STRING, {.str="0 0 0 0 1 0 0 0 0"}, 0, 0, FLAGS }, - { "0rdiv", "set rdiv for 1st plane", OFFSET(rdiv[0]), AV_OPT_TYPE_FLOAT, {.dbl=1.0}, 0.0, INT_MAX, FLAGS}, - { "1rdiv", "set rdiv for 2nd plane", OFFSET(rdiv[1]), AV_OPT_TYPE_FLOAT, {.dbl=1.0}, 0.0, INT_MAX, FLAGS}, - { "2rdiv", "set rdiv for 3rd plane", OFFSET(rdiv[2]), AV_OPT_TYPE_FLOAT, {.dbl=1.0}, 0.0, INT_MAX, FLAGS}, - { "3rdiv", "set rdiv for 4th plane", OFFSET(rdiv[3]), AV_OPT_TYPE_FLOAT, {.dbl=1.0}, 0.0, INT_MAX, FLAGS}, + { "0rdiv", "set rdiv for 1st plane", OFFSET(rdiv[0]), AV_OPT_TYPE_FLOAT, {.dbl=0.0}, 0.0, INT_MAX, FLAGS}, + { "1rdiv", "set rdiv for 2nd plane", OFFSET(rdiv[1]), AV_OPT_TYPE_FLOAT, {.dbl=0.0}, 0.0, INT_MAX, FLAGS}, + { "2rdiv", "set rdiv for 3rd plane", OFFSET(rdiv[2]), AV_OPT_TYPE_FLOAT, {.dbl=0.0}, 0.0, INT_MAX, FLAGS}, + { "3rdiv", "set rdiv for 4th plane", OFFSET(rdiv[3]), AV_OPT_TYPE_FLOAT, {.dbl=0.0}, 0.0, INT_MAX, FLAGS}, { "0bias", "set bias for 1st plane", OFFSET(bias[0]), AV_OPT_TYPE_FLOAT, {.dbl=0.0}, 0.0, INT_MAX, FLAGS}, { "1bias", "set bias for 2nd plane", OFFSET(bias[1]), AV_OPT_TYPE_FLOAT, {.dbl=0.0}, 0.0, INT_MAX, FLAGS}, { "2bias", "set bias for 3rd plane", OFFSET(bias[2]), AV_OPT_TYPE_FLOAT, {.dbl=0.0}, 0.0, INT_MAX, FLAGS}, { "3bias", "set bias for 4th plane", OFFSET(bias[3]), AV_OPT_TYPE_FLOAT, {.dbl=0.0}, 0.0, INT_MAX, FLAGS}, + { "0mode", "set matrix mode for 1st plane", OFFSET(mode[0]), AV_OPT_TYPE_INT, {.i64=MATRIX_SQUARE}, 0, MATRIX_NBMODES-1, FLAGS, "mode" }, + { "1mode", "set matrix mode for 2nd plane", OFFSET(mode[1]), AV_OPT_TYPE_INT, {.i64=MATRIX_SQUARE}, 0, MATRIX_NBMODES-1, FLAGS, "mode" }, + { "2mode", "set matrix mode for 3rd plane", OFFSET(mode[2]), AV_OPT_TYPE_INT, {.i64=MATRIX_SQUARE}, 0, MATRIX_NBMODES-1, FLAGS, "mode" }, + { "3mode", "set matrix mode for 4th plane", OFFSET(mode[3]), AV_OPT_TYPE_INT, {.i64=MATRIX_SQUARE}, 0, MATRIX_NBMODES-1, FLAGS, "mode" }, + { "square", "square matrix", 0, AV_OPT_TYPE_CONST, {.i64=MATRIX_SQUARE}, 0, 0, FLAGS, "mode" }, + { "row", "single row matrix", 0, AV_OPT_TYPE_CONST, {.i64=MATRIX_ROW} , 0, 0, FLAGS, "mode" }, + { "column", "single column matrix", 0, AV_OPT_TYPE_CONST, {.i64=MATRIX_COLUMN}, 0, 0, FLAGS, "mode" }, { NULL } }; @@ -120,743 +139,453 @@ static int query_formats(AVFilterContext *ctx) return ff_set_common_formats(ctx, ff_make_format_list(pix_fmts)); } -static inline void line_copy8(uint8_t *line, const uint8_t *srcp, int width, int mergin) +typedef struct ThreadData { + AVFrame *in, *out; +} ThreadData; + +static void filter16_prewitt(uint8_t *dstp, int width, + float scale, float delta, const int *const matrix, + const uint8_t *c[], int peak, int radius, + int dstride, int stride) { - int i; + uint16_t *dst = (uint16_t *)dstp; + int x; - memcpy(line, srcp, width); + for (x = 0; x < width; x++) { + int suma = AV_RN16A(&c[0][2 * x]) * -1 + AV_RN16A(&c[1][2 * x]) * -1 + AV_RN16A(&c[2][2 * x]) * -1 + + AV_RN16A(&c[6][2 * x]) * 1 + AV_RN16A(&c[7][2 * x]) * 1 + AV_RN16A(&c[8][2 * x]) * 1; + int sumb = AV_RN16A(&c[0][2 * x]) * -1 + AV_RN16A(&c[2][2 * x]) * 1 + AV_RN16A(&c[3][2 * x]) * -1 + + AV_RN16A(&c[5][2 * x]) * 1 + AV_RN16A(&c[6][2 * x]) * -1 + AV_RN16A(&c[8][2 * x]) * 1; - for (i = mergin; i > 0; i--) { - line[-i] = line[i]; - line[width - 1 + i] = line[width - 1 - i]; + dst[x] = av_clip(sqrt(suma*suma + sumb*sumb) * scale + delta, 0, peak); } } -static inline void line_copy16(uint16_t *line, const uint16_t *srcp, int width, int mergin) +static void filter16_roberts(uint8_t *dstp, int width, + float scale, float delta, const int *const matrix, + const uint8_t *c[], int peak, int radius, + int dstride, int stride) { - int i; + uint16_t *dst = (uint16_t *)dstp; + int x; - memcpy(line, srcp, width * 2); + for (x = 0; x < width; x++) { + int suma = AV_RN16A(&c[0][2 * x]) * 1 + AV_RN16A(&c[1][2 * x]) * -1; + int sumb = AV_RN16A(&c[4][2 * x]) * 1 + AV_RN16A(&c[3][2 * x]) * -1; - for (i = mergin; i > 0; i--) { - line[-i] = line[i]; - line[width - 1 + i] = line[width - 1 - i]; + dst[x] = av_clip(sqrt(suma*suma + sumb*sumb) * scale + delta, 0, peak); } } -typedef struct ThreadData { - AVFrame *in, *out; - int plane; -} ThreadData; - -static int filter16_prewitt(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) +static void filter16_sobel(uint8_t *dstp, int width, + float scale, float delta, const int *const matrix, + const uint8_t *c[], int peak, int radius, + int dstride, int stride) { - ConvolutionContext *s = ctx->priv; - ThreadData *td = arg; - AVFrame *in = td->in; - AVFrame *out = td->out; - const int plane = td->plane; - const int peak = (1 << s->depth) - 1; - const int stride = in->linesize[plane] / 2; - const int bstride = s->bstride; - const int height = s->planeheight[plane]; - const int width = s->planewidth[plane]; - const int slice_start = (height * jobnr) / nb_jobs; - const int slice_end = (height * (jobnr+1)) / nb_jobs; - const uint16_t *src = (const uint16_t *)in->data[plane] + slice_start * stride; - uint16_t *dst = (uint16_t *)out->data[plane] + slice_start * (out->linesize[plane] / 2); - const float scale = s->scale; - const float delta = s->delta; - uint16_t *p0 = (uint16_t *)s->bptrs[jobnr] + 16; - uint16_t *p1 = p0 + bstride; - uint16_t *p2 = p1 + bstride; - uint16_t *orig = p0, *end = p2; - int y, x; - - line_copy16(p0, src + stride * (slice_start == 0 ? 1 : -1), width, 1); - line_copy16(p1, src, width, 1); - - for (y = slice_start; y < slice_end; y++) { - src += stride * (y < height - 1 ? 1 : -1); - line_copy16(p2, src, width, 1); - - for (x = 0; x < width; x++) { - int suma = p0[x - 1] * -1 + - p0[x] * -1 + - p0[x + 1] * -1 + - p2[x - 1] * 1 + - p2[x] * 1 + - p2[x + 1] * 1; - int sumb = p0[x - 1] * -1 + - p0[x + 1] * 1 + - p1[x - 1] * -1 + - p1[x + 1] * 1 + - p2[x - 1] * -1 + - p2[x + 1] * 1; - - dst[x] = av_clip(sqrt(suma*suma + sumb*sumb) * scale + delta, 0, peak); - } + uint16_t *dst = (uint16_t *)dstp; + int x; - p0 = p1; - p1 = p2; - p2 = (p2 == end) ? orig: p2 + bstride; - dst += out->linesize[plane] / 2; + for (x = 0; x < width; x++) { + int suma = AV_RN16A(&c[0][2 * x]) * -1 + AV_RN16A(&c[1][2 * x]) * -2 + AV_RN16A(&c[2][2 * x]) * -1 + + AV_RN16A(&c[6][2 * x]) * 1 + AV_RN16A(&c[7][2 * x]) * 2 + AV_RN16A(&c[8][2 * x]) * 1; + int sumb = AV_RN16A(&c[0][2 * x]) * -1 + AV_RN16A(&c[2][2 * x]) * 1 + AV_RN16A(&c[3][2 * x]) * -2 + + AV_RN16A(&c[5][2 * x]) * 2 + AV_RN16A(&c[6][2 * x]) * -1 + AV_RN16A(&c[8][2 * x]) * 1; + + dst[x] = av_clip(sqrt(suma*suma + sumb*sumb) * scale + delta, 0, peak); } +} - return 0; +static void filter_prewitt(uint8_t *dst, int width, + float scale, float delta, const int *const matrix, + const uint8_t *c[], int peak, int radius, + int dstride, int stride) +{ + const uint8_t *c0 = c[0], *c1 = c[1], *c2 = c[2]; + const uint8_t *c3 = c[3], *c5 = c[5]; + const uint8_t *c6 = c[6], *c7 = c[7], *c8 = c[8]; + int x; + + for (x = 0; x < width; x++) { + int suma = c0[x] * -1 + c1[x] * -1 + c2[x] * -1 + + c6[x] * 1 + c7[x] * 1 + c8[x] * 1; + int sumb = c0[x] * -1 + c2[x] * 1 + c3[x] * -1 + + c5[x] * 1 + c6[x] * -1 + c8[x] * 1; + + dst[x] = av_clip_uint8(sqrt(suma*suma + sumb*sumb) * scale + delta); + } } -static int filter16_roberts(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) +static void filter_roberts(uint8_t *dst, int width, + float scale, float delta, const int *const matrix, + const uint8_t *c[], int peak, int radius, + int dstride, int stride) { - ConvolutionContext *s = ctx->priv; - ThreadData *td = arg; - AVFrame *in = td->in; - AVFrame *out = td->out; - const int plane = td->plane; - const int peak = (1 << s->depth) - 1; - const int stride = in->linesize[plane] / 2; - const int bstride = s->bstride; - const int height = s->planeheight[plane]; - const int width = s->planewidth[plane]; - const int slice_start = (height * jobnr) / nb_jobs; - const int slice_end = (height * (jobnr+1)) / nb_jobs; - const uint16_t *src = (const uint16_t *)in->data[plane] + slice_start * stride; - uint16_t *dst = (uint16_t *)out->data[plane] + slice_start * (out->linesize[plane] / 2); - const float scale = s->scale; - const float delta = s->delta; - uint16_t *p0 = (uint16_t *)s->bptrs[jobnr] + 16; - uint16_t *p1 = p0 + bstride; - uint16_t *p2 = p1 + bstride; - uint16_t *orig = p0, *end = p2; - int y, x; - - line_copy16(p0, src + stride * (slice_start == 0 ? 1 : -1), width, 1); - line_copy16(p1, src, width, 1); - - for (y = slice_start; y < slice_end; y++) { - src += stride * (y < height - 1 ? 1 : -1); - line_copy16(p2, src, width, 1); - - for (x = 0; x < width; x++) { - int suma = p0[x - 1] * 1 + - p1[x ] * -1; - int sumb = p0[x ] * 1 + - p1[x - 1] * -1; - - dst[x] = av_clip(sqrt(suma*suma + sumb*sumb) * scale + delta, 0, peak); - } + int x; - p0 = p1; - p1 = p2; - p2 = (p2 == end) ? orig: p2 + bstride; - dst += out->linesize[plane] / 2; - } + for (x = 0; x < width; x++) { + int suma = c[0][x] * 1 + c[1][x] * -1; + int sumb = c[4][x] * 1 + c[3][x] * -1; - return 0; + dst[x] = av_clip_uint8(sqrt(suma*suma + sumb*sumb) * scale + delta); + } } -static int filter16_sobel(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) +static void filter_sobel(uint8_t *dst, int width, + float scale, float delta, const int *const matrix, + const uint8_t *c[], int peak, int radius, + int dstride, int stride) { - ConvolutionContext *s = ctx->priv; - ThreadData *td = arg; - AVFrame *in = td->in; - AVFrame *out = td->out; - const int plane = td->plane; - const int peak = (1 << s->depth) - 1; - const int stride = in->linesize[plane] / 2; - const int bstride = s->bstride; - const int height = s->planeheight[plane]; - const int width = s->planewidth[plane]; - const int slice_start = (height * jobnr) / nb_jobs; - const int slice_end = (height * (jobnr+1)) / nb_jobs; - const uint16_t *src = (const uint16_t *)in->data[plane] + slice_start * stride; - uint16_t *dst = (uint16_t *)out->data[plane] + slice_start * (out->linesize[plane] / 2); - const float scale = s->scale; - const float delta = s->delta; - uint16_t *p0 = (uint16_t *)s->bptrs[jobnr] + 16; - uint16_t *p1 = p0 + bstride; - uint16_t *p2 = p1 + bstride; - uint16_t *orig = p0, *end = p2; - int y, x; - - line_copy16(p0, src + stride * (slice_start == 0 ? 1 : -1), width, 1); - line_copy16(p1, src, width, 1); - - for (y = slice_start; y < slice_end; y++) { - src += stride * (y < height - 1 ? 1 : -1); - line_copy16(p2, src, width, 1); - - for (x = 0; x < width; x++) { - int suma = p0[x - 1] * -1 + - p0[x] * -2 + - p0[x + 1] * -1 + - p2[x - 1] * 1 + - p2[x] * 2 + - p2[x + 1] * 1; - int sumb = p0[x - 1] * -1 + - p0[x + 1] * 1 + - p1[x - 1] * -2 + - p1[x + 1] * 2 + - p2[x - 1] * -1 + - p2[x + 1] * 1; - - dst[x] = av_clip(sqrt(suma*suma + sumb*sumb) * scale + delta, 0, peak); - } - - p0 = p1; - p1 = p2; - p2 = (p2 == end) ? orig: p2 + bstride; - dst += out->linesize[plane] / 2; + const uint8_t *c0 = c[0], *c1 = c[1], *c2 = c[2]; + const uint8_t *c3 = c[3], *c5 = c[5]; + const uint8_t *c6 = c[6], *c7 = c[7], *c8 = c[8]; + int x; + + for (x = 0; x < width; x++) { + int suma = c0[x] * -1 + c1[x] * -2 + c2[x] * -1 + + c6[x] * 1 + c7[x] * 2 + c8[x] * 1; + int sumb = c0[x] * -1 + c2[x] * 1 + c3[x] * -2 + + c5[x] * 2 + c6[x] * -1 + c8[x] * 1; + + dst[x] = av_clip_uint8(sqrt(suma*suma + sumb*sumb) * scale + delta); } +} - return 0; +static void filter16_3x3(uint8_t *dstp, int width, + float rdiv, float bias, const int *const matrix, + const uint8_t *c[], int peak, int radius, + int dstride, int stride) +{ + uint16_t *dst = (uint16_t *)dstp; + int x; + + for (x = 0; x < width; x++) { + int sum = AV_RN16A(&c[0][2 * x]) * matrix[0] + + AV_RN16A(&c[1][2 * x]) * matrix[1] + + AV_RN16A(&c[2][2 * x]) * matrix[2] + + AV_RN16A(&c[3][2 * x]) * matrix[3] + + AV_RN16A(&c[4][2 * x]) * matrix[4] + + AV_RN16A(&c[5][2 * x]) * matrix[5] + + AV_RN16A(&c[6][2 * x]) * matrix[6] + + AV_RN16A(&c[7][2 * x]) * matrix[7] + + AV_RN16A(&c[8][2 * x]) * matrix[8]; + sum = (int)(sum * rdiv + bias + 0.5f); + dst[x] = av_clip(sum, 0, peak); + } } -static int filter_prewitt(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) +static void filter16_5x5(uint8_t *dstp, int width, + float rdiv, float bias, const int *const matrix, + const uint8_t *c[], int peak, int radius, + int dstride, int stride) { - ConvolutionContext *s = ctx->priv; - ThreadData *td = arg; - AVFrame *in = td->in; - AVFrame *out = td->out; - const int plane = td->plane; - const int stride = in->linesize[plane]; - const int bstride = s->bstride; - const int height = s->planeheight[plane]; - const int width = s->planewidth[plane]; - const int slice_start = (height * jobnr) / nb_jobs; - const int slice_end = (height * (jobnr+1)) / nb_jobs; - const uint8_t *src = in->data[plane] + slice_start * stride; - uint8_t *dst = out->data[plane] + slice_start * out->linesize[plane]; - const float scale = s->scale; - const float delta = s->delta; - uint8_t *p0 = s->bptrs[jobnr] + 16; - uint8_t *p1 = p0 + bstride; - uint8_t *p2 = p1 + bstride; - uint8_t *orig = p0, *end = p2; - int y, x; - - line_copy8(p0, src + stride * (slice_start == 0 ? 1 : -1), width, 1); - line_copy8(p1, src, width, 1); - - for (y = slice_start; y < slice_end; y++) { - src += stride * (y < height - 1 ? 1 : -1); - line_copy8(p2, src, width, 1); - - for (x = 0; x < width; x++) { - int suma = p0[x - 1] * -1 + - p0[x] * -1 + - p0[x + 1] * -1 + - p2[x - 1] * 1 + - p2[x] * 1 + - p2[x + 1] * 1; - int sumb = p0[x - 1] * -1 + - p0[x + 1] * 1 + - p1[x - 1] * -1 + - p1[x + 1] * 1 + - p2[x - 1] * -1 + - p2[x + 1] * 1; - - dst[x] = av_clip_uint8(sqrt(suma*suma + sumb*sumb) * scale + delta); - } + uint16_t *dst = (uint16_t *)dstp; + int x; - p0 = p1; - p1 = p2; - p2 = (p2 == end) ? orig: p2 + bstride; - dst += out->linesize[plane]; - } + for (x = 0; x < width; x++) { + int i, sum = 0; - return 0; + for (i = 0; i < 25; i++) + sum += AV_RN16A(&c[i][2 * x]) * matrix[i]; + + sum = (int)(sum * rdiv + bias + 0.5f); + dst[x] = av_clip(sum, 0, peak); + } } -static int filter_roberts(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) +static void filter16_7x7(uint8_t *dstp, int width, + float rdiv, float bias, const int *const matrix, + const uint8_t *c[], int peak, int radius, + int dstride, int stride) { - ConvolutionContext *s = ctx->priv; - ThreadData *td = arg; - AVFrame *in = td->in; - AVFrame *out = td->out; - const int plane = td->plane; - const int stride = in->linesize[plane]; - const int bstride = s->bstride; - const int height = s->planeheight[plane]; - const int width = s->planewidth[plane]; - const int slice_start = (height * jobnr) / nb_jobs; - const int slice_end = (height * (jobnr+1)) / nb_jobs; - const uint8_t *src = in->data[plane] + slice_start * stride; - uint8_t *dst = out->data[plane] + slice_start * out->linesize[plane]; - const float scale = s->scale; - const float delta = s->delta; - uint8_t *p0 = s->bptrs[jobnr] + 16; - uint8_t *p1 = p0 + bstride; - uint8_t *p2 = p1 + bstride; - uint8_t *orig = p0, *end = p2; - int y, x; - - line_copy8(p0, src + stride * (slice_start == 0 ? 1 : -1), width, 1); - line_copy8(p1, src, width, 1); - - for (y = slice_start; y < slice_end; y++) { - src += stride * (y < height - 1 ? 1 : -1); - line_copy8(p2, src, width, 1); - - for (x = 0; x < width; x++) { - int suma = p0[x - 1] * 1 + - p1[x ] * -1; - int sumb = p0[x ] * 1 + - p1[x - 1] * -1; - - dst[x] = av_clip_uint8(sqrt(suma*suma + sumb*sumb) * scale + delta); - } + uint16_t *dst = (uint16_t *)dstp; + int x; - p0 = p1; - p1 = p2; - p2 = (p2 == end) ? orig: p2 + bstride; - dst += out->linesize[plane]; + for (x = 0; x < width; x++) { + int i, sum = 0; + + for (i = 0; i < 49; i++) + sum += AV_RN16A(&c[i][2 * x]) * matrix[i]; + + sum = (int)(sum * rdiv + bias + 0.5f); + dst[x] = av_clip(sum, 0, peak); } +} - return 0; +static void filter16_row(uint8_t *dstp, int width, + float rdiv, float bias, const int *const matrix, + const uint8_t *c[], int peak, int radius, + int dstride, int stride) +{ + uint16_t *dst = (uint16_t *)dstp; + int x; + + for (x = 0; x < width; x++) { + int i, sum = 0; + + for (i = 0; i < 2 * radius + 1; i++) + sum += AV_RN16A(&c[i][2 * x]) * matrix[i]; + + sum = (int)(sum * rdiv + bias + 0.5f); + dst[x] = av_clip(sum, 0, peak); + } } -static int filter_sobel(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) +static void filter16_column(uint8_t *dstp, int height, + float rdiv, float bias, const int *const matrix, + const uint8_t *c[], int peak, int radius, + int dstride, int stride) { - ConvolutionContext *s = ctx->priv; - ThreadData *td = arg; - AVFrame *in = td->in; - AVFrame *out = td->out; - const int plane = td->plane; - const int stride = in->linesize[plane]; - const int bstride = s->bstride; - const int height = s->planeheight[plane]; - const int width = s->planewidth[plane]; - const int slice_start = (height * jobnr) / nb_jobs; - const int slice_end = (height * (jobnr+1)) / nb_jobs; - const uint8_t *src = in->data[plane] + slice_start * stride; - uint8_t *dst = out->data[plane] + slice_start * out->linesize[plane]; - const float scale = s->scale; - const float delta = s->delta; - uint8_t *p0 = s->bptrs[jobnr] + 16; - uint8_t *p1 = p0 + bstride; - uint8_t *p2 = p1 + bstride; - uint8_t *orig = p0, *end = p2; - int y, x; - - line_copy8(p0, src + stride * (slice_start == 0 ? 1 : -1), width, 1); - line_copy8(p1, src, width, 1); - - for (y = slice_start; y < slice_end; y++) { - src += stride * (y < height - 1 ? 1 : -1); - line_copy8(p2, src, width, 1); - - for (x = 0; x < width; x++) { - int suma = p0[x - 1] * -1 + - p0[x] * -2 + - p0[x + 1] * -1 + - p2[x - 1] * 1 + - p2[x] * 2 + - p2[x + 1] * 1; - int sumb = p0[x - 1] * -1 + - p0[x + 1] * 1 + - p1[x - 1] * -2 + - p1[x + 1] * 2 + - p2[x - 1] * -1 + - p2[x + 1] * 1; - - dst[x] = av_clip_uint8(sqrt(suma*suma + sumb*sumb) * scale + delta); - } + uint16_t *dst = (uint16_t *)dstp; + int y; - p0 = p1; - p1 = p2; - p2 = (p2 == end) ? orig: p2 + bstride; - dst += out->linesize[plane]; + for (y = 0; y < height; y++) { + int i, sum = 0; + + for (i = 0; i < 2 * radius + 1; i++) + sum += AV_RN16A(&c[i][0 + y * stride]) * matrix[i]; + + sum = (int)(sum * rdiv + bias + 0.5f); + dst[0] = av_clip(sum, 0, peak); + dst += dstride / 2; } +} - return 0; +static void filter_7x7(uint8_t *dst, int width, + float rdiv, float bias, const int *const matrix, + const uint8_t *c[], int peak, int radius, + int dstride, int stride) +{ + int x; + + for (x = 0; x < width; x++) { + int i, sum = 0; + + for (i = 0; i < 49; i++) + sum += c[i][x] * matrix[i]; + + sum = (int)(sum * rdiv + bias + 0.5f); + dst[x] = av_clip_uint8(sum); + } } -static int filter16_3x3(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) +static void filter_5x5(uint8_t *dst, int width, + float rdiv, float bias, const int *const matrix, + const uint8_t *c[], int peak, int radius, + int dstride, int stride) { - ConvolutionContext *s = ctx->priv; - ThreadData *td = arg; - AVFrame *in = td->in; - AVFrame *out = td->out; - const int plane = td->plane; - const int peak = (1 << s->depth) - 1; - const int stride = in->linesize[plane] / 2; - const int bstride = s->bstride; - const int height = s->planeheight[plane]; - const int width = s->planewidth[plane]; - const int slice_start = (height * jobnr) / nb_jobs; - const int slice_end = (height * (jobnr+1)) / nb_jobs; - const uint16_t *src = (const uint16_t *)in->data[plane] + slice_start * stride; - uint16_t *dst = (uint16_t *)out->data[plane] + slice_start * (out->linesize[plane] / 2); - uint16_t *p0 = (uint16_t *)s->bptrs[jobnr] + 16; - uint16_t *p1 = p0 + bstride; - uint16_t *p2 = p1 + bstride; - uint16_t *orig = p0, *end = p2; - const int *matrix = s->matrix[plane]; - const float rdiv = s->rdiv[plane]; - const float bias = s->bias[plane]; - int y, x; - - line_copy16(p0, src + stride * (slice_start == 0 ? 1 : -1), width, 1); - line_copy16(p1, src, width, 1); - - for (y = slice_start; y < slice_end; y++) { - src += stride * (y < height - 1 ? 1 : -1); - line_copy16(p2, src, width, 1); - - for (x = 0; x < width; x++) { - int sum = p0[x - 1] * matrix[0] + - p0[x] * matrix[1] + - p0[x + 1] * matrix[2] + - p1[x - 1] * matrix[3] + - p1[x] * matrix[4] + - p1[x + 1] * matrix[5] + - p2[x - 1] * matrix[6] + - p2[x] * matrix[7] + - p2[x + 1] * matrix[8]; - sum = (int)(sum * rdiv + bias + 0.5f); - dst[x] = av_clip(sum, 0, peak); - } + int x; - p0 = p1; - p1 = p2; - p2 = (p2 == end) ? orig: p2 + bstride; - dst += out->linesize[plane] / 2; + for (x = 0; x < width; x++) { + int i, sum = 0; + + for (i = 0; i < 25; i++) + sum += c[i][x] * matrix[i]; + + sum = (int)(sum * rdiv + bias + 0.5f); + dst[x] = av_clip_uint8(sum); } +} - return 0; +static void filter_3x3(uint8_t *dst, int width, + float rdiv, float bias, const int *const matrix, + const uint8_t *c[], int peak, int radius, + int dstride, int stride) +{ + const uint8_t *c0 = c[0], *c1 = c[1], *c2 = c[2]; + const uint8_t *c3 = c[3], *c4 = c[4], *c5 = c[5]; + const uint8_t *c6 = c[6], *c7 = c[7], *c8 = c[8]; + int x; + + for (x = 0; x < width; x++) { + int sum = c0[x] * matrix[0] + c1[x] * matrix[1] + c2[x] * matrix[2] + + c3[x] * matrix[3] + c4[x] * matrix[4] + c5[x] * matrix[5] + + c6[x] * matrix[6] + c7[x] * matrix[7] + c8[x] * matrix[8]; + sum = (int)(sum * rdiv + bias + 0.5f); + dst[x] = av_clip_uint8(sum); + } } -static int filter16_5x5(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) +static void filter_row(uint8_t *dst, int width, + float rdiv, float bias, const int *const matrix, + const uint8_t *c[], int peak, int radius, + int dstride, int stride) { - ConvolutionContext *s = ctx->priv; - ThreadData *td = arg; - AVFrame *in = td->in; - AVFrame *out = td->out; - const int plane = td->plane; - const int peak = (1 << s->depth) - 1; - const int stride = in->linesize[plane] / 2; - const int bstride = s->bstride; - const int height = s->planeheight[plane]; - const int width = s->planewidth[plane]; - const int slice_start = (height * jobnr) / nb_jobs; - const int slice_end = (height * (jobnr+1)) / nb_jobs; - const uint16_t *src = (const uint16_t *)in->data[plane] + slice_start * stride; - uint16_t *dst = (uint16_t *)out->data[plane] + slice_start * (out->linesize[plane] / 2); - uint16_t *p0 = (uint16_t *)s->bptrs[jobnr] + 16; - uint16_t *p1 = p0 + bstride; - uint16_t *p2 = p1 + bstride; - uint16_t *p3 = p2 + bstride; - uint16_t *p4 = p3 + bstride; - uint16_t *orig = p0, *end = p4; - const int *matrix = s->matrix[plane]; - float rdiv = s->rdiv[plane]; - float bias = s->bias[plane]; - int y, x, i; - - line_copy16(p0, src + 2 * stride * (slice_start < 2 ? 1 : -1), width, 2); - line_copy16(p1, src + stride * (slice_start == 0 ? 1 : -1), width, 2); - line_copy16(p2, src, width, 2); - src += stride; - line_copy16(p3, src, width, 2); - - for (y = slice_start; y < slice_end; y++) { - uint16_t *array[] = { - p0 - 2, p0 - 1, p0, p0 + 1, p0 + 2, - p1 - 2, p1 - 1, p1, p1 + 1, p1 + 2, - p2 - 2, p2 - 1, p2, p2 + 1, p2 + 2, - p3 - 2, p3 - 1, p3, p3 + 1, p3 + 2, - p4 - 2, p4 - 1, p4, p4 + 1, p4 + 2 - }; - - src += stride * (y < height - 2 ? 1 : -1); - line_copy16(p4, src, width, 2); - - for (x = 0; x < width; x++) { - int sum = 0; - - for (i = 0; i < 25; i++) { - sum += *(array[i] + x) * matrix[i]; - } - sum = (int)(sum * rdiv + bias + 0.5f); - dst[x] = av_clip(sum, 0, peak); - } + int x; - p0 = p1; - p1 = p2; - p2 = p3; - p3 = p4; - p4 = (p4 == end) ? orig: p4 + bstride; - dst += out->linesize[plane] / 2; - } + for (x = 0; x < width; x++) { + int i, sum = 0; - return 0; + for (i = 0; i < 2 * radius + 1; i++) + sum += c[i][x] * matrix[i]; + + sum = (int)(sum * rdiv + bias + 0.5f); + dst[x] = av_clip_uint8(sum); + } } -static int filter16_7x7(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) +static void filter_column(uint8_t *dst, int height, + float rdiv, float bias, const int *const matrix, + const uint8_t *c[], int peak, int radius, + int dstride, int stride) { - ConvolutionContext *s = ctx->priv; - ThreadData *td = arg; - AVFrame *in = td->in; - AVFrame *out = td->out; - const int plane = td->plane; - const int peak = (1 << s->depth) - 1; - const int stride = in->linesize[plane] / 2; - const int bstride = s->bstride; - const int height = s->planeheight[plane]; - const int width = s->planewidth[plane]; - const int slice_start = (height * jobnr) / nb_jobs; - const int slice_end = (height * (jobnr+1)) / nb_jobs; - const uint16_t *src = (const uint16_t *)in->data[plane] + slice_start * stride; - uint16_t *dst = (uint16_t *)out->data[plane] + slice_start * (out->linesize[plane] / 2); - uint16_t *p0 = (uint16_t *)s->bptrs[jobnr] + 32; - uint16_t *p1 = p0 + bstride; - uint16_t *p2 = p1 + bstride; - uint16_t *p3 = p2 + bstride; - uint16_t *p4 = p3 + bstride; - uint16_t *p5 = p4 + bstride; - uint16_t *p6 = p5 + bstride; - uint16_t *orig = p0, *end = p6; - const int *matrix = s->matrix[plane]; - float rdiv = s->rdiv[plane]; - float bias = s->bias[plane]; - int y, x, i; - - line_copy16(p0, src + 3 * stride * (slice_start < 3 ? 1 : -1), width, 3); - line_copy16(p1, src + 2 * stride * (slice_start < 2 ? 1 : -1), width, 3); - line_copy16(p2, src + stride * (slice_start == 0 ? 1 : -1), width, 3); - line_copy16(p3, src, width, 3); - src += stride; - line_copy16(p4, src, width, 3); - src += stride; - line_copy16(p5, src, width, 3); - - for (y = slice_start; y < slice_end; y++) { - uint16_t *array[] = { - p0 - 3, p0 - 2, p0 - 1, p0, p0 + 1, p0 + 2, p0 + 3, - p1 - 3, p1 - 2, p1 - 1, p1, p1 + 1, p1 + 2, p1 + 3, - p2 - 3, p2 - 2, p2 - 1, p2, p2 + 1, p2 + 2, p2 + 3, - p3 - 3, p3 - 2, p3 - 1, p3, p3 + 1, p3 + 2, p3 + 3, - p4 - 3, p4 - 2, p4 - 1, p4, p4 + 1, p4 + 2, p4 + 3, - p5 - 3, p5 - 2, p5 - 1, p5, p5 + 1, p5 + 2, p5 + 3, - p6 - 3, p6 - 2, p6 - 1, p6, p6 + 1, p6 + 2, p6 + 3, - }; - - src += stride * (y < height - 3 ? 1 : -1); - line_copy16(p6, src, width, 3); - - for (x = 0; x < width; x++) { - int sum = 0; - - for (i = 0; i < 25; i++) { - sum += *(array[i] + x) * matrix[i]; - } - sum = (int)(sum * rdiv + bias + 0.5f); - dst[x] = av_clip(sum, 0, peak); - } + int y; + + for (y = 0; y < height; y++) { + int i, sum = 0; - p0 = p1; - p1 = p2; - p2 = p3; - p3 = p4; - p4 = p5; - p5 = p6; - p6 = (p6 == end) ? orig: p6 + bstride; - dst += out->linesize[plane] / 2; + for (i = 0; i < 2 * radius + 1; i++) + sum += c[i][0 + y * stride] * matrix[i]; + + sum = (int)(sum * rdiv + bias + 0.5f); + dst[0] = av_clip_uint8(sum); + dst += dstride; } +} - return 0; +static void setup_3x3(int radius, const uint8_t *c[], const uint8_t *src, int stride, + int x, int w, int y, int h, int bpc) +{ + int i; + + for (i = 0; i < 9; i++) { + int xoff = FFABS(x + ((i % 3) - 1)); + int yoff = FFABS(y + (i / 3) - 1); + + xoff = xoff >= w ? 2 * w - 1 - xoff : xoff; + yoff = yoff >= h ? 2 * h - 1 - yoff : yoff; + + c[i] = src + xoff * bpc + yoff * stride; + } } -static int filter_3x3(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) +static void setup_5x5(int radius, const uint8_t *c[], const uint8_t *src, int stride, + int x, int w, int y, int h, int bpc) { - ConvolutionContext *s = ctx->priv; - ThreadData *td = arg; - AVFrame *in = td->in; - AVFrame *out = td->out; - const int plane = td->plane; - const int stride = in->linesize[plane]; - const int bstride = s->bstride; - const int height = s->planeheight[plane]; - const int width = s->planewidth[plane]; - const int slice_start = (height * jobnr) / nb_jobs; - const int slice_end = (height * (jobnr+1)) / nb_jobs; - const uint8_t *src = in->data[plane] + slice_start * stride; - uint8_t *dst = out->data[plane] + slice_start * out->linesize[plane]; - uint8_t *p0 = s->bptrs[jobnr] + 16; - uint8_t *p1 = p0 + bstride; - uint8_t *p2 = p1 + bstride; - uint8_t *orig = p0, *end = p2; - const int *matrix = s->matrix[plane]; - const float rdiv = s->rdiv[plane]; - const float bias = s->bias[plane]; - int y, x; - - line_copy8(p0, src + stride * (slice_start == 0 ? 1 : -1), width, 1); - line_copy8(p1, src, width, 1); - - for (y = slice_start; y < slice_end; y++) { - src += stride * (y < height - 1 ? 1 : -1); - line_copy8(p2, src, width, 1); - - for (x = 0; x < width; x++) { - int sum = p0[x - 1] * matrix[0] + - p0[x] * matrix[1] + - p0[x + 1] * matrix[2] + - p1[x - 1] * matrix[3] + - p1[x] * matrix[4] + - p1[x + 1] * matrix[5] + - p2[x - 1] * matrix[6] + - p2[x] * matrix[7] + - p2[x + 1] * matrix[8]; - sum = (int)(sum * rdiv + bias + 0.5f); - dst[x] = av_clip_uint8(sum); - } + int i; + + for (i = 0; i < 25; i++) { + int xoff = FFABS(x + ((i % 5) - 2)); + int yoff = FFABS(y + (i / 5) - 2); + + xoff = xoff >= w ? 2 * w - 1 - xoff : xoff; + yoff = yoff >= h ? 2 * h - 1 - yoff : yoff; - p0 = p1; - p1 = p2; - p2 = (p2 == end) ? orig: p2 + bstride; - dst += out->linesize[plane]; + c[i] = src + xoff * bpc + yoff * stride; } +} - return 0; +static void setup_7x7(int radius, const uint8_t *c[], const uint8_t *src, int stride, + int x, int w, int y, int h, int bpc) +{ + int i; + + for (i = 0; i < 49; i++) { + int xoff = FFABS(x + ((i % 7) - 3)); + int yoff = FFABS(y + (i / 7) - 3); + + xoff = xoff >= w ? 2 * w - 1 - xoff : xoff; + yoff = yoff >= h ? 2 * h - 1 - yoff : yoff; + + c[i] = src + xoff * bpc + yoff * stride; + } } -static int filter_5x5(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) +static void setup_row(int radius, const uint8_t *c[], const uint8_t *src, int stride, + int x, int w, int y, int h, int bpc) { - ConvolutionContext *s = ctx->priv; - ThreadData *td = arg; - AVFrame *in = td->in; - AVFrame *out = td->out; - const int plane = td->plane; - const int stride = in->linesize[plane]; - const int bstride = s->bstride; - const int height = s->planeheight[plane]; - const int width = s->planewidth[plane]; - const int slice_start = (height * jobnr) / nb_jobs; - const int slice_end = (height * (jobnr+1)) / nb_jobs; - const uint8_t *src = in->data[plane] + slice_start * stride; - uint8_t *dst = out->data[plane] + slice_start * out->linesize[plane]; - uint8_t *p0 = s->bptrs[jobnr] + 16; - uint8_t *p1 = p0 + bstride; - uint8_t *p2 = p1 + bstride; - uint8_t *p3 = p2 + bstride; - uint8_t *p4 = p3 + bstride; - uint8_t *orig = p0, *end = p4; - const int *matrix = s->matrix[plane]; - float rdiv = s->rdiv[plane]; - float bias = s->bias[plane]; - int y, x, i; - - line_copy8(p0, src + 2 * stride * (slice_start < 2 ? 1 : -1), width, 2); - line_copy8(p1, src + stride * (slice_start == 0 ? 1 : -1), width, 2); - line_copy8(p2, src, width, 2); - src += stride; - line_copy8(p3, src, width, 2); - - - for (y = slice_start; y < slice_end; y++) { - uint8_t *array[] = { - p0 - 2, p0 - 1, p0, p0 + 1, p0 + 2, - p1 - 2, p1 - 1, p1, p1 + 1, p1 + 2, - p2 - 2, p2 - 1, p2, p2 + 1, p2 + 2, - p3 - 2, p3 - 1, p3, p3 + 1, p3 + 2, - p4 - 2, p4 - 1, p4, p4 + 1, p4 + 2 - }; - - src += stride * (y < height - 2 ? 1 : -1); - line_copy8(p4, src, width, 2); - - for (x = 0; x < width; x++) { - int sum = 0; - - for (i = 0; i < 25; i++) { - sum += *(array[i] + x) * matrix[i]; - } - sum = (int)(sum * rdiv + bias + 0.5f); - dst[x] = av_clip_uint8(sum); - } + int i; + + for (i = 0; i < radius * 2 + 1; i++) { + int xoff = FFABS(x + i - radius); + + xoff = xoff >= w ? 2 * w - 1 - xoff : xoff; - p0 = p1; - p1 = p2; - p2 = p3; - p3 = p4; - p4 = (p4 == end) ? orig: p4 + bstride; - dst += out->linesize[plane]; + c[i] = src + xoff * bpc + y * stride; } +} - return 0; +static void setup_column(int radius, const uint8_t *c[], const uint8_t *src, int stride, + int x, int w, int y, int h, int bpc) +{ + int i; + + for (i = 0; i < radius * 2 + 1; i++) { + int xoff = FFABS(x + i - radius); + + xoff = xoff >= h ? 2 * h - 1 - xoff : xoff; + + c[i] = src + y * bpc + xoff * stride; + } } -static int filter_7x7(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) +static int filter_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) { ConvolutionContext *s = ctx->priv; ThreadData *td = arg; AVFrame *in = td->in; AVFrame *out = td->out; - const int plane = td->plane; - const int stride = in->linesize[plane]; - const int bstride = s->bstride; - const int height = s->planeheight[plane]; - const int width = s->planewidth[plane]; - const int slice_start = (height * jobnr) / nb_jobs; - const int slice_end = (height * (jobnr+1)) / nb_jobs; - const uint8_t *src = in->data[plane] + slice_start * stride; - uint8_t *dst = out->data[plane] + slice_start * out->linesize[plane]; - uint8_t *p0 = s->bptrs[jobnr] + 32; - uint8_t *p1 = p0 + bstride; - uint8_t *p2 = p1 + bstride; - uint8_t *p3 = p2 + bstride; - uint8_t *p4 = p3 + bstride; - uint8_t *p5 = p4 + bstride; - uint8_t *p6 = p5 + bstride; - uint8_t *orig = p0, *end = p6; - const int *matrix = s->matrix[plane]; - float rdiv = s->rdiv[plane]; - float bias = s->bias[plane]; - int y, x, i; - - line_copy8(p0, src + 3 * stride * (slice_start < 3 ? 1 : -1), width, 3); - line_copy8(p1, src + 2 * stride * (slice_start < 2 ? 1 : -1), width, 3); - line_copy8(p2, src + stride * (slice_start == 0 ? 1 : -1), width, 3); - line_copy8(p3, src, width, 3); - src += stride; - line_copy8(p4, src, width, 3); - src += stride; - line_copy8(p5, src, width, 3); - - for (y = slice_start; y < slice_end; y++) { - uint8_t *array[] = { - p0 - 3, p0 - 2, p0 - 1, p0, p0 + 1, p0 + 2, p0 + 3, - p1 - 3, p1 - 2, p1 - 1, p1, p1 + 1, p1 + 2, p1 + 3, - p2 - 3, p2 - 2, p2 - 1, p2, p2 + 1, p2 + 2, p2 + 3, - p3 - 3, p3 - 2, p3 - 1, p3, p3 + 1, p3 + 2, p3 + 3, - p4 - 3, p4 - 2, p4 - 1, p4, p4 + 1, p4 + 2, p4 + 3, - p5 - 3, p5 - 2, p5 - 1, p5, p5 + 1, p5 + 2, p5 + 3, - p6 - 3, p6 - 2, p6 - 1, p6, p6 + 1, p6 + 2, p6 + 3, - }; - - src += stride * (y < height - 3 ? 1 : -1); - line_copy8(p6, src, width, 3); - - for (x = 0; x < width; x++) { - int sum = 0; - - for (i = 0; i < 49; i++) { - sum += *(array[i] + x) * matrix[i]; - } - sum = (int)(sum * rdiv + bias + 0.5f); - dst[x] = av_clip_uint8(sum); + int plane; + + for (plane = 0; plane < s->nb_planes; plane++) { + const int mode = s->mode[plane]; + const int bpc = s->bpc; + const int radius = s->size[plane] / 2; + const int height = s->planeheight[plane]; + const int width = s->planewidth[plane]; + const int stride = in->linesize[plane]; + const int dstride = out->linesize[plane]; + const int sizeh = mode == MATRIX_COLUMN ? width : height; + const int sizew = mode == MATRIX_COLUMN ? height : width; + const int slice_start = (sizeh * jobnr) / nb_jobs; + const int slice_end = (sizeh * (jobnr+1)) / nb_jobs; + const float rdiv = s->rdiv[plane]; + const float bias = s->bias[plane]; + const uint8_t *src = in->data[plane]; + const int dst_pos = slice_start * (mode == MATRIX_COLUMN ? bpc : dstride); + uint8_t *dst = out->data[plane] + dst_pos; + const int *matrix = s->matrix[plane]; + const uint8_t *c[49]; + int y, x; + + if (s->copy[plane]) { + if (mode == MATRIX_COLUMN) + av_image_copy_plane(dst, dstride, src + slice_start * bpc, stride, + (slice_end - slice_start) * bpc, height); + else + av_image_copy_plane(dst, dstride, src + slice_start * stride, stride, + width * bpc, slice_end - slice_start); + continue; } - p0 = p1; - p1 = p2; - p2 = p3; - p3 = p4; - p4 = p5; - p5 = p6; - p6 = (p6 == end) ? orig: p6 + bstride; - dst += out->linesize[plane]; + for (y = slice_start; y < slice_end; y++) { + const int xoff = mode == MATRIX_COLUMN ? (y - slice_start) * bpc : radius * bpc; + const int yoff = mode == MATRIX_COLUMN ? radius * stride : 0; + + for (x = 0; x < radius; x++) { + const int xoff = mode == MATRIX_COLUMN ? (y - slice_start) * bpc : x * bpc; + const int yoff = mode == MATRIX_COLUMN ? x * stride : 0; + + s->setup[plane](radius, c, src, stride, x, width, y, height, bpc); + s->filter[plane](dst + yoff + xoff, 1, rdiv, + bias, matrix, c, s->max, radius, + dstride, stride); + } + s->setup[plane](radius, c, src, stride, radius, width, y, height, bpc); + s->filter[plane](dst + yoff + xoff, sizew - 2 * radius, + rdiv, bias, matrix, c, s->max, radius, + dstride, stride); + for (x = sizew - radius; x < sizew; x++) { + const int xoff = mode == MATRIX_COLUMN ? (y - slice_start) * bpc : x * bpc; + const int yoff = mode == MATRIX_COLUMN ? x * stride : 0; + + s->setup[plane](radius, c, src, stride, x, width, y, height, bpc); + s->filter[plane](dst + yoff + xoff, 1, rdiv, + bias, matrix, c, s->max, radius, + dstride, stride); + } + if (mode != MATRIX_COLUMN) + dst += dstride; + } } return 0; @@ -870,6 +599,7 @@ static int config_input(AVFilterLink *inlink) int p; s->depth = desc->comp[0].depth; + s->max = (1 << s->depth) - 1; s->planewidth[1] = s->planewidth[2] = AV_CEIL_RSHIFT(inlink->w, desc->log2_chroma_w); s->planewidth[0] = s->planewidth[3] = inlink->w; @@ -878,24 +608,16 @@ static int config_input(AVFilterLink *inlink) s->nb_planes = av_pix_fmt_count_planes(inlink->format); s->nb_threads = ff_filter_get_nb_threads(ctx); - s->bptrs = av_calloc(s->nb_threads, sizeof(*s->bptrs)); - if (!s->bptrs) - return AVERROR(ENOMEM); - - s->bstride = s->planewidth[0] + 64; s->bpc = (s->depth + 7) / 8; - s->buffer = av_malloc_array(7 * s->bstride * s->nb_threads, s->bpc); - if (!s->buffer) - return AVERROR(ENOMEM); - - for (p = 0; p < s->nb_threads; p++) { - s->bptrs[p] = s->buffer + 7 * s->bstride * s->bpc * p; - } if (!strcmp(ctx->filter->name, "convolution")) { if (s->depth > 8) { for (p = 0; p < s->nb_planes; p++) { - if (s->size[p] == 3) + if (s->mode[p] == MATRIX_ROW) + s->filter[p] = filter16_row; + else if (s->mode[p] == MATRIX_COLUMN) + s->filter[p] = filter16_column; + else if (s->size[p] == 3) s->filter[p] = filter16_3x3; else if (s->size[p] == 5) s->filter[p] = filter16_5x5; @@ -926,7 +648,7 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in) ConvolutionContext *s = ctx->priv; AVFilterLink *outlink = ctx->outputs[0]; AVFrame *out; - int plane; + ThreadData td; out = ff_get_video_buffer(outlink, outlink->w, outlink->h); if (!out) { @@ -935,22 +657,9 @@ static int filter_frame(AVFilterLink *inlink, AVFrame *in) } av_frame_copy_props(out, in); - for (plane = 0; plane < s->nb_planes; plane++) { - ThreadData td; - - if (s->copy[plane]) { - av_image_copy_plane(out->data[plane], out->linesize[plane], - in->data[plane], in->linesize[plane], - s->planewidth[plane] * s->bpc, - s->planeheight[plane]); - continue; - } - - td.in = in; - td.out = out; - td.plane = plane; - ctx->internal->execute(ctx, s->filter[plane], &td, NULL, FFMIN(s->planeheight[plane], s->nb_threads)); - } + td.in = in; + td.out = out; + ctx->internal->execute(ctx, filter_slice, &td, NULL, FFMIN3(s->planeheight[1], s->planewidth[1], s->nb_threads)); av_frame_free(&in); return ff_filter_frame(outlink, out); @@ -965,6 +674,7 @@ static av_cold int init(AVFilterContext *ctx) for (i = 0; i < 4; i++) { int *matrix = (int *)s->matrix[i]; char *p, *arg, *saveptr = NULL; + float sum = 0; p = s->matrix_str[i]; while (s->matrix_length[i] < 49) { @@ -973,31 +683,52 @@ static av_cold int init(AVFilterContext *ctx) p = NULL; sscanf(arg, "%d", &matrix[s->matrix_length[i]]); + sum += matrix[s->matrix_length[i]]; s->matrix_length[i]++; } - if (s->matrix_length[i] == 9) { + if (!(s->matrix_length[i] & 1)) { + av_log(ctx, AV_LOG_ERROR, "number of matrix elements must be odd\n"); + return AVERROR(EINVAL); + } + if (s->mode[i] == MATRIX_ROW) { + s->filter[i] = filter_row; + s->setup[i] = setup_row; + s->size[i] = s->matrix_length[i]; + } else if (s->mode[i] == MATRIX_COLUMN) { + s->filter[i] = filter_column; + s->setup[i] = setup_column; + s->size[i] = s->matrix_length[i]; + } else if (s->matrix_length[i] == 9) { s->size[i] = 3; if (!memcmp(matrix, same3x3, sizeof(same3x3))) s->copy[i] = 1; else s->filter[i] = filter_3x3; + s->setup[i] = setup_3x3; } else if (s->matrix_length[i] == 25) { s->size[i] = 5; if (!memcmp(matrix, same5x5, sizeof(same5x5))) s->copy[i] = 1; else s->filter[i] = filter_5x5; + s->setup[i] = setup_5x5; } else if (s->matrix_length[i] == 49) { s->size[i] = 7; if (!memcmp(matrix, same7x7, sizeof(same7x7))) s->copy[i] = 1; else s->filter[i] = filter_7x7; + s->setup[i] = setup_7x7; } else { return AVERROR(EINVAL); } + if (sum == 0) + sum = 1; + if (s->rdiv[i] == 0) + s->rdiv[i] = 1. / sum; + if (s->copy[i] && (s->rdiv[i] != 1. || s->bias[i] != 0.)) s->copy[i] = 0; } @@ -1007,6 +738,10 @@ static av_cold int init(AVFilterContext *ctx) s->filter[i] = filter_prewitt; else s->copy[i] = 1; + s->size[i] = 3; + s->setup[i] = setup_3x3; + s->rdiv[i] = s->scale; + s->bias[i] = s->delta; } } else if (!strcmp(ctx->filter->name, "roberts")) { for (i = 0; i < 4; i++) { @@ -1014,6 +749,10 @@ static av_cold int init(AVFilterContext *ctx) s->filter[i] = filter_roberts; else s->copy[i] = 1; + s->size[i] = 3; + s->setup[i] = setup_3x3; + s->rdiv[i] = s->scale; + s->bias[i] = s->delta; } } else if (!strcmp(ctx->filter->name, "sobel")) { for (i = 0; i < 4; i++) { @@ -1021,20 +760,16 @@ static av_cold int init(AVFilterContext *ctx) s->filter[i] = filter_sobel; else s->copy[i] = 1; + s->size[i] = 3; + s->setup[i] = setup_3x3; + s->rdiv[i] = s->scale; + s->bias[i] = s->delta; } } return 0; } -static av_cold void uninit(AVFilterContext *ctx) -{ - ConvolutionContext *s = ctx->priv; - - av_freep(&s->bptrs); - av_freep(&s->buffer); -} - static const AVFilterPad convolution_inputs[] = { { .name = "default", @@ -1061,7 +796,6 @@ AVFilter ff_vf_convolution = { .priv_size = sizeof(ConvolutionContext), .priv_class = &convolution_class, .init = init, - .uninit = uninit, .query_formats = query_formats, .inputs = convolution_inputs, .outputs = convolution_outputs, @@ -1087,7 +821,6 @@ AVFilter ff_vf_prewitt = { .priv_size = sizeof(ConvolutionContext), .priv_class = &prewitt_class, .init = init, - .uninit = uninit, .query_formats = query_formats, .inputs = convolution_inputs, .outputs = convolution_outputs, @@ -1113,7 +846,6 @@ AVFilter ff_vf_sobel = { .priv_size = sizeof(ConvolutionContext), .priv_class = &sobel_class, .init = init, - .uninit = uninit, .query_formats = query_formats, .inputs = convolution_inputs, .outputs = convolution_outputs, @@ -1139,7 +871,6 @@ AVFilter ff_vf_roberts = { .priv_size = sizeof(ConvolutionContext), .priv_class = &roberts_class, .init = init, - .uninit = uninit, .query_formats = query_formats, .inputs = convolution_inputs, .outputs = convolution_outputs,