#define FF_BUFQUEUE_SIZE 129
#include "bufferqueue.h"
+#include "atadenoise.h"
#include "formats.h"
#include "internal.h"
#include "video.h"
const AVClass *class;
float fthra[4], fthrb[4];
+ float sigma[4];
int thra[4], thrb[4];
+ int algorithm;
int planes;
int nb_planes;
struct FFBufQueue q;
void *data[4][SIZE];
int linesize[4][SIZE];
- int size, mid;
+ float weights[4][SIZE];
+ int size, mid, radius;
int available;
int (*filter_slice)(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs);
+
+ ATADenoiseDSPContext dsp;
} ATADenoiseContext;
#define OFFSET(x) offsetof(ATADenoiseContext, x)
-#define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
+#define FLAGS AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM|AV_OPT_FLAG_RUNTIME_PARAM
+#define VF AV_OPT_FLAG_VIDEO_PARAM|AV_OPT_FLAG_FILTERING_PARAM
static const AVOption atadenoise_options[] = {
{ "0a", "set threshold A for 1st plane", OFFSET(fthra[0]), AV_OPT_TYPE_FLOAT, {.dbl=0.02}, 0, 0.3, FLAGS },
{ "1b", "set threshold B for 2nd plane", OFFSET(fthrb[1]), AV_OPT_TYPE_FLOAT, {.dbl=0.04}, 0, 5.0, FLAGS },
{ "2a", "set threshold A for 3rd plane", OFFSET(fthra[2]), AV_OPT_TYPE_FLOAT, {.dbl=0.02}, 0, 0.3, FLAGS },
{ "2b", "set threshold B for 3rd plane", OFFSET(fthrb[2]), AV_OPT_TYPE_FLOAT, {.dbl=0.04}, 0, 5.0, FLAGS },
- { "s", "set how many frames to use", OFFSET(size), AV_OPT_TYPE_INT, {.i64=9}, 5, SIZE, FLAGS },
+ { "s", "set how many frames to use", OFFSET(size), AV_OPT_TYPE_INT, {.i64=9}, 5, SIZE, VF },
{ "p", "set what planes to filter", OFFSET(planes), AV_OPT_TYPE_FLAGS, {.i64=7}, 0, 15, FLAGS },
+ { "a", "set variant of algorithm", OFFSET(algorithm),AV_OPT_TYPE_INT, {.i64=PARALLEL}, 0, NB_ATAA-1, FLAGS, "a" },
+ { "p", "parallel", 0, AV_OPT_TYPE_CONST, {.i64=PARALLEL}, 0, 0, FLAGS, "a" },
+ { "s", "serial", 0, AV_OPT_TYPE_CONST, {.i64=SERIAL}, 0, 0, FLAGS, "a" },
+ { "0s", "set sigma for 1st plane", OFFSET(sigma[0]), AV_OPT_TYPE_FLOAT, {.dbl=INT16_MAX}, 0, INT16_MAX, FLAGS },
+ { "1s", "set sigma for 2nd plane", OFFSET(sigma[1]), AV_OPT_TYPE_FLOAT, {.dbl=INT16_MAX}, 0, INT16_MAX, FLAGS },
+ { "2s", "set sigma for 3rd plane", OFFSET(sigma[2]), AV_OPT_TYPE_FLOAT, {.dbl=INT16_MAX}, 0, INT16_MAX, FLAGS },
{ NULL }
};
AV_PIX_FMT_YUV420P16, AV_PIX_FMT_YUV422P16, AV_PIX_FMT_YUV444P16,
AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP9, AV_PIX_FMT_GBRP10,
AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRP14, AV_PIX_FMT_GBRP16,
+ AV_PIX_FMT_YUVA420P, AV_PIX_FMT_YUVA422P, AV_PIX_FMT_YUVA444P,
+ AV_PIX_FMT_YUVA444P9, AV_PIX_FMT_YUVA444P10, AV_PIX_FMT_YUVA444P12, AV_PIX_FMT_YUVA444P16,
+ AV_PIX_FMT_YUVA422P9, AV_PIX_FMT_YUVA422P10, AV_PIX_FMT_YUVA422P12, AV_PIX_FMT_YUVA422P16,
+ AV_PIX_FMT_YUVA420P9, AV_PIX_FMT_YUVA420P10, AV_PIX_FMT_YUVA420P16,
+ AV_PIX_FMT_GBRAP, AV_PIX_FMT_GBRAP10, AV_PIX_FMT_GBRAP12, AV_PIX_FMT_GBRAP16,
AV_PIX_FMT_NONE
};
AVFilterFormats *formats = ff_make_format_list(pixel_fmts);
av_log(ctx, AV_LOG_WARNING, "size %d is invalid. Must be an odd value, setting it to %d.\n", s->size, s->size|1);
s->size |= 1;
}
- s->mid = s->size / 2 + 1;
+ s->radius = s->size / 2;
+ s->mid = s->radius;
return 0;
}
AVFrame *in, *out;
} ThreadData;
-static int filter_slice8(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+#define WFILTER_ROW(type, name) \
+static void fweight_row##name(const uint8_t *ssrc, uint8_t *ddst, \
+ const uint8_t *ssrcf[SIZE], \
+ int w, int mid, int size, \
+ int thra, int thrb, const float *weights) \
+{ \
+ const type *src = (const type *)ssrc; \
+ const type **srcf = (const type **)ssrcf; \
+ type *dst = (type *)ddst; \
+ \
+ for (int x = 0; x < w; x++) { \
+ const int srcx = src[x]; \
+ unsigned lsumdiff = 0, rsumdiff = 0; \
+ unsigned ldiff, rdiff; \
+ float sum = srcx; \
+ float wsum = 1.f; \
+ int l = 0, r = 0; \
+ int srcjx, srcix; \
+ \
+ for (int j = mid - 1, i = mid + 1; j >= 0 && i < size; j--, i++) { \
+ srcjx = srcf[j][x]; \
+ \
+ ldiff = FFABS(srcx - srcjx); \
+ lsumdiff += ldiff; \
+ if (ldiff > thra || \
+ lsumdiff > thrb) \
+ break; \
+ l++; \
+ sum += srcjx * weights[j]; \
+ wsum += weights[j]; \
+ \
+ srcix = srcf[i][x]; \
+ \
+ rdiff = FFABS(srcx - srcix); \
+ rsumdiff += rdiff; \
+ if (rdiff > thra || \
+ rsumdiff > thrb) \
+ break; \
+ r++; \
+ sum += srcix * weights[i]; \
+ wsum += weights[i]; \
+ } \
+ \
+ dst[x] = lrintf(sum / wsum); \
+ } \
+}
+
+WFILTER_ROW(uint8_t, 8)
+WFILTER_ROW(uint16_t, 16)
+
+#define WFILTER_ROW_SERIAL(type, name) \
+static void fweight_row##name##_serial(const uint8_t *ssrc, uint8_t *ddst, \
+ const uint8_t *ssrcf[SIZE], \
+ int w, int mid, int size, \
+ int thra, int thrb, \
+ const float *weights) \
+{ \
+ const type *src = (const type *)ssrc; \
+ const type **srcf = (const type **)ssrcf; \
+ type *dst = (type *)ddst; \
+ \
+ for (int x = 0; x < w; x++) { \
+ const int srcx = src[x]; \
+ unsigned lsumdiff = 0, rsumdiff = 0; \
+ unsigned ldiff, rdiff; \
+ float sum = srcx; \
+ float wsum = 1.f; \
+ int l = 0, r = 0; \
+ int srcjx, srcix; \
+ \
+ for (int j = mid - 1; j >= 0; j--) { \
+ srcjx = srcf[j][x]; \
+ \
+ ldiff = FFABS(srcx - srcjx); \
+ lsumdiff += ldiff; \
+ if (ldiff > thra || \
+ lsumdiff > thrb) \
+ break; \
+ l++; \
+ sum += srcjx * weights[j]; \
+ wsum += weights[j]; \
+ } \
+ \
+ for (int i = mid + 1; i < size; i++) { \
+ srcix = srcf[i][x]; \
+ \
+ rdiff = FFABS(srcx - srcix); \
+ rsumdiff += rdiff; \
+ if (rdiff > thra || \
+ rsumdiff > thrb) \
+ break; \
+ r++; \
+ sum += srcix * weights[i]; \
+ wsum += weights[i]; \
+ } \
+ \
+ dst[x] = lrintf(sum / wsum); \
+ } \
+}
+
+WFILTER_ROW_SERIAL(uint8_t, 8)
+WFILTER_ROW_SERIAL(uint16_t, 16)
+
+#define FILTER_ROW(type, name) \
+static void filter_row##name(const uint8_t *ssrc, uint8_t *ddst, \
+ const uint8_t *ssrcf[SIZE], \
+ int w, int mid, int size, \
+ int thra, int thrb, const float *weights) \
+{ \
+ const type *src = (const type *)ssrc; \
+ const type **srcf = (const type **)ssrcf; \
+ type *dst = (type *)ddst; \
+ \
+ for (int x = 0; x < w; x++) { \
+ const int srcx = src[x]; \
+ unsigned lsumdiff = 0, rsumdiff = 0; \
+ unsigned ldiff, rdiff; \
+ unsigned sum = srcx; \
+ int l = 0, r = 0; \
+ int srcjx, srcix; \
+ \
+ for (int j = mid - 1, i = mid + 1; j >= 0 && i < size; j--, i++) { \
+ srcjx = srcf[j][x]; \
+ \
+ ldiff = FFABS(srcx - srcjx); \
+ lsumdiff += ldiff; \
+ if (ldiff > thra || \
+ lsumdiff > thrb) \
+ break; \
+ l++; \
+ sum += srcjx; \
+ \
+ srcix = srcf[i][x]; \
+ \
+ rdiff = FFABS(srcx - srcix); \
+ rsumdiff += rdiff; \
+ if (rdiff > thra || \
+ rsumdiff > thrb) \
+ break; \
+ r++; \
+ sum += srcix; \
+ } \
+ \
+ dst[x] = (sum + ((r + l + 1) >> 1)) / (r + l + 1); \
+ } \
+}
+
+FILTER_ROW(uint8_t, 8)
+FILTER_ROW(uint16_t, 16)
+
+#define FILTER_ROW_SERIAL(type, name) \
+static void filter_row##name##_serial(const uint8_t *ssrc, uint8_t *ddst, \
+ const uint8_t *ssrcf[SIZE], \
+ int w, int mid, int size, \
+ int thra, int thrb, \
+ const float *weights) \
+{ \
+ const type *src = (const type *)ssrc; \
+ const type **srcf = (const type **)ssrcf; \
+ type *dst = (type *)ddst; \
+ \
+ for (int x = 0; x < w; x++) { \
+ const int srcx = src[x]; \
+ unsigned lsumdiff = 0, rsumdiff = 0; \
+ unsigned ldiff, rdiff; \
+ unsigned sum = srcx; \
+ int l = 0, r = 0; \
+ int srcjx, srcix; \
+ \
+ for (int j = mid - 1; j >= 0; j--) { \
+ srcjx = srcf[j][x]; \
+ \
+ ldiff = FFABS(srcx - srcjx); \
+ lsumdiff += ldiff; \
+ if (ldiff > thra || \
+ lsumdiff > thrb) \
+ break; \
+ l++; \
+ sum += srcjx; \
+ } \
+ \
+ for (int i = mid + 1; i < size; i++) { \
+ srcix = srcf[i][x]; \
+ \
+ rdiff = FFABS(srcx - srcix); \
+ rsumdiff += rdiff; \
+ if (rdiff > thra || \
+ rsumdiff > thrb) \
+ break; \
+ r++; \
+ sum += srcix; \
+ } \
+ \
+ dst[x] = (sum + ((r + l + 1) >> 1)) / (r + l + 1); \
+ } \
+}
+
+FILTER_ROW_SERIAL(uint8_t, 8)
+FILTER_ROW_SERIAL(uint16_t, 16)
+
+static int filter_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
{
ATADenoiseContext *s = ctx->priv;
ThreadData *td = arg;
AVFrame *out = td->out;
const int size = s->size;
const int mid = s->mid;
- int p, x, y, i, j;
+ int p, y, i;
for (p = 0; p < s->nb_planes; p++) {
+ const float *weights = s->weights[p];
const int h = s->planeheight[p];
const int w = s->planewidth[p];
const int slice_start = (h * jobnr) / nb_jobs;
srcf[i] = data[i] + slice_start * linesize[i];
for (y = slice_start; y < slice_end; y++) {
- for (x = 0; x < w; x++) {
- const int srcx = src[x];
- unsigned lsumdiff = 0, rsumdiff = 0;
- unsigned ldiff, rdiff;
- unsigned sum = srcx;
- int l = 0, r = 0;
- int srcjx, srcix;
-
- for (j = mid - 1, i = mid + 1; j >= 0 && i < size; j--, i++) {
- srcjx = srcf[j][x];
-
- ldiff = FFABS(srcx - srcjx);
- lsumdiff += ldiff;
- if (ldiff > thra ||
- lsumdiff > thrb)
- break;
- l++;
- sum += srcjx;
-
- srcix = srcf[i][x];
-
- rdiff = FFABS(srcx - srcix);
- rsumdiff += rdiff;
- if (rdiff > thra ||
- rsumdiff > thrb)
- break;
- r++;
- sum += srcix;
- }
-
- dst[x] = sum / (r + l + 1);
- }
+ s->dsp.filter_row[p](src, dst, srcf, w, mid, size, thra, thrb, weights);
dst += out->linesize[p];
src += in->linesize[p];
return 0;
}
-static int filter_slice16(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
-{
- ATADenoiseContext *s = ctx->priv;
- ThreadData *td = arg;
- AVFrame *in = td->in;
- AVFrame *out = td->out;
- const int size = s->size;
- const int mid = s->mid;
- int p, x, y, i, j;
-
- for (p = 0; p < s->nb_planes; p++) {
- const int h = s->planeheight[p];
- const int w = s->planewidth[p];
- const int slice_start = (h * jobnr) / nb_jobs;
- const int slice_end = (h * (jobnr+1)) / nb_jobs;
- const uint16_t *src = (uint16_t *)(in->data[p] + slice_start * in->linesize[p]);
- uint16_t *dst = (uint16_t *)(out->data[p] + slice_start * out->linesize[p]);
- const int thra = s->thra[p];
- const int thrb = s->thrb[p];
- const uint8_t **data = (const uint8_t **)s->data[p];
- const int *linesize = (const int *)s->linesize[p];
- const uint16_t *srcf[SIZE];
-
- if (!((1 << p) & s->planes)) {
- av_image_copy_plane((uint8_t *)dst, out->linesize[p], (uint8_t *)src, in->linesize[p],
- w * 2, slice_end - slice_start);
- continue;
- }
-
- for (i = 0; i < s->size; i++)
- srcf[i] = (const uint16_t *)(data[i] + slice_start * linesize[i]);
-
- for (y = slice_start; y < slice_end; y++) {
- for (x = 0; x < w; x++) {
- const int srcx = src[x];
- unsigned lsumdiff = 0, rsumdiff = 0;
- unsigned ldiff, rdiff;
- unsigned sum = srcx;
- int l = 0, r = 0;
- int srcjx, srcix;
-
- for (j = mid - 1, i = mid + 1; j >= 0 && i < size; j--, i++) {
- srcjx = srcf[j][x];
-
- ldiff = FFABS(srcx - srcjx);
- lsumdiff += ldiff;
- if (ldiff > thra ||
- lsumdiff > thrb)
- break;
- l++;
- sum += srcjx;
-
- srcix = srcf[i][x];
-
- rdiff = FFABS(srcx - srcix);
- rsumdiff += rdiff;
- if (rdiff > thra ||
- rsumdiff > thrb)
- break;
- r++;
- sum += srcix;
- }
-
- dst[x] = sum / (r + l + 1);
- }
-
- dst += out->linesize[p] / 2;
- src += in->linesize[p] / 2;
-
- for (i = 0; i < size; i++)
- srcf[i] += linesize[i] / 2;
- }
- }
-
- return 0;
-}
-
static int config_input(AVFilterLink *inlink)
{
const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(inlink->format);
s->planewidth[0] = s->planewidth[3] = inlink->w;
depth = desc->comp[0].depth;
- if (depth == 8)
- s->filter_slice = filter_slice8;
- else
- s->filter_slice = filter_slice16;
+ s->filter_slice = filter_slice;
+
+ for (int p = 0; p < s->nb_planes; p++) {
+ if (depth == 8 && s->sigma[p] == INT16_MAX)
+ s->dsp.filter_row[p] = s->algorithm == PARALLEL ? filter_row8 : filter_row8_serial;
+ else if (s->sigma[p] == INT16_MAX)
+ s->dsp.filter_row[p] = s->algorithm == PARALLEL ? filter_row16 : filter_row16_serial;
+ else if (depth == 8 && s->sigma[p] < INT16_MAX)
+ s->dsp.filter_row[p] = s->algorithm == PARALLEL ? fweight_row8 : fweight_row8_serial;
+ else if (s->sigma[p] < INT16_MAX)
+ s->dsp.filter_row[p] = s->algorithm == PARALLEL ? fweight_row16 : fweight_row16_serial;
+ }
s->thra[0] = s->fthra[0] * (1 << depth) - 1;
s->thra[1] = s->fthra[1] * (1 << depth) - 1;
s->thrb[1] = s->fthrb[1] * (1 << depth) - 1;
s->thrb[2] = s->fthrb[2] * (1 << depth) - 1;
+ for (int p = 0; p < s->nb_planes; p++) {
+ float sigma = s->radius * s->sigma[p];
+
+ s->weights[p][s->radius] = 1.f;
+ for (int n = 1; n <= s->radius; n++) {
+ s->weights[p][s->radius + n] =
+ s->weights[p][s->radius - n] = expf(-0.5 * (n + 1) * (n + 1) / (sigma * sigma));
+ }
+ }
+
+ if (ARCH_X86)
+ ff_atadenoise_init_x86(&s->dsp, depth, s->algorithm, s->sigma);
+
return 0;
}
ff_bufqueue_discard_all(&s->q);
}
+static int process_command(AVFilterContext *ctx,
+ const char *cmd,
+ const char *arg,
+ char *res,
+ int res_len,
+ int flags)
+{
+ int ret = ff_filter_process_command(ctx, cmd, arg, res, res_len, flags);
+
+ if (ret < 0)
+ return ret;
+
+ return config_input(ctx->inputs[0]);
+}
+
static const AVFilterPad inputs[] = {
{
.name = "default",
{ NULL }
};
-AVFilter ff_vf_atadenoise = {
+const AVFilter ff_vf_atadenoise = {
.name = "atadenoise",
.description = NULL_IF_CONFIG_SMALL("Apply an Adaptive Temporal Averaging Denoiser."),
.priv_size = sizeof(ATADenoiseContext),
.inputs = inputs,
.outputs = outputs,
.flags = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL | AVFILTER_FLAG_SLICE_THREADS,
+ .process_command = process_command,
};