#include "internal.h"
#include "video.h"
+typedef struct ThreadData {
+ AVFrame *m, *a, *d;
+} ThreadData;
+
typedef struct PreMultiplyContext {
const AVClass *class;
int width[4], height[4];
AV_PIX_FMT_YUV444P12, AV_PIX_FMT_YUV444P14,
AV_PIX_FMT_YUV444P16,
AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP9, AV_PIX_FMT_GBRP10,
- AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRP14, AV_PIX_FMT_GBRP16,
- AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAY9, AV_PIX_FMT_GRAY10, AV_PIX_FMT_GRAY12, AV_PIX_FMT_GRAY16,
+ AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRP14, AV_PIX_FMT_GBRP16, AV_PIX_FMT_GBRPF32,
+ AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAY9, AV_PIX_FMT_GRAY10, AV_PIX_FMT_GRAY12, AV_PIX_FMT_GRAY14, AV_PIX_FMT_GRAY16,
AV_PIX_FMT_NONE
};
static const enum AVPixelFormat alpha_pix_fmts[] = {
AV_PIX_FMT_YUVA444P,
- AV_PIX_FMT_YUVA444P9, AV_PIX_FMT_YUVA444P10, AV_PIX_FMT_YUVA444P16,
+ AV_PIX_FMT_YUVA444P9, AV_PIX_FMT_YUVA444P10, AV_PIX_FMT_YUVA444P12, AV_PIX_FMT_YUVA444P16,
AV_PIX_FMT_GBRAP,
- AV_PIX_FMT_GBRAP10, AV_PIX_FMT_GBRAP12, AV_PIX_FMT_GBRAP16,
+ AV_PIX_FMT_GBRAP10, AV_PIX_FMT_GBRAP12, AV_PIX_FMT_GBRAP16, AV_PIX_FMT_GBRAPF32,
AV_PIX_FMT_NONE
};
for (y = 0; y < h; y++) {
for (x = 0; x < w; x++) {
- dst[x] = ((((msrc[x] - half) * (((asrc[x] >> 1) & 1) + asrc[x]))) >> shift) + half;
+ dst[x] = ((((msrc[x] - half) * (int64_t)(((asrc[x] >> 1) & 1) + asrc[x]))) >> shift) + half;
}
dst += dlinesize / 2;
for (y = 0; y < h; y++) {
for (x = 0; x < w; x++) {
- dst[x] = ((((msrc[x] - offset) * (((asrc[x] >> 1) & 1) + asrc[x])) + half) >> shift) + offset;
+ dst[x] = ((((msrc[x] - offset) * (int64_t)(((asrc[x] >> 1) & 1) + asrc[x])) + half) >> shift) + offset;
}
dst += dlinesize / 2;
}
}
+static void premultiplyf32(const uint8_t *mmsrc, const uint8_t *aasrc,
+ uint8_t *ddst,
+ ptrdiff_t mlinesize, ptrdiff_t alinesize,
+ ptrdiff_t dlinesize,
+ int w, int h,
+ int half, int shift, int offset)
+{
+ const float *msrc = (const float *)mmsrc;
+ const float *asrc = (const float *)aasrc;
+ float *dst = (float *)ddst;
+ int x, y;
+
+ for (y = 0; y < h; y++) {
+ for (x = 0; x < w; x++) {
+ dst[x] = msrc[x] * asrc[x];
+ }
+
+ dst += dlinesize / 4;
+ msrc += mlinesize / 4;
+ asrc += alinesize / 4;
+ }
+}
+
+static void premultiplyf32offset(const uint8_t *mmsrc, const uint8_t *aasrc,
+ uint8_t *ddst,
+ ptrdiff_t mlinesize, ptrdiff_t alinesize,
+ ptrdiff_t dlinesize,
+ int w, int h,
+ int half, int shift, int offset)
+{
+ const float *msrc = (const float *)mmsrc;
+ const float *asrc = (const float *)aasrc;
+ float *dst = (float *)ddst;
+ int x, y;
+
+ float offsetf = offset / 65535.0f;
+
+ for (y = 0; y < h; y++) {
+ for (x = 0; x < w; x++) {
+ dst[x] = ((msrc[x] - offsetf) * asrc[x]) + offsetf;
+ }
+
+ dst += dlinesize / 4;
+ msrc += mlinesize / 4;
+ asrc += alinesize / 4;
+ }
+}
+
static void unpremultiply8(const uint8_t *msrc, const uint8_t *asrc,
uint8_t *dst,
ptrdiff_t mlinesize, ptrdiff_t alinesize,
for (y = 0; y < h; y++) {
for (x = 0; x < w; x++) {
if (asrc[x] > 0 && asrc[x] < 255)
- dst[x] = FFMIN((msrc[x] - offset) * 255 / asrc[x] + offset, 255);
+ dst[x] = FFMIN(FFMAX(msrc[x] - offset, 0) * 255 / asrc[x] + offset, 255);
else
dst[x] = msrc[x];
}
for (y = 0; y < h; y++) {
for (x = 0; x < w; x++) {
if (asrc[x] > 0 && asrc[x] < max)
- dst[x] = FFMAX(FFMIN((msrc[x] - offset) * (unsigned)max / asrc[x] + offset, max), 0);
+ dst[x] = FFMAX(FFMIN(FFMAX(msrc[x] - offset, 0) * (unsigned)max / asrc[x] + offset, max), 0);
else
dst[x] = msrc[x];
}
}
}
+static void unpremultiplyf32(const uint8_t *mmsrc, const uint8_t *aasrc,
+ uint8_t *ddst,
+ ptrdiff_t mlinesize, ptrdiff_t alinesize,
+ ptrdiff_t dlinesize,
+ int w, int h,
+ int half, int max, int offset)
+{
+ const float *msrc = (const float *)mmsrc;
+ const float *asrc = (const float *)aasrc;
+
+ float *dst = (float *)ddst;
+ int x, y;
+
+ for (y = 0; y < h; y++) {
+ for (x = 0; x < w; x++) {
+ if (asrc[x] > 0.0f)
+ dst[x] = msrc[x] / asrc[x];
+ else
+ dst[x] = msrc[x];
+ }
+
+ dst += dlinesize / 4;
+ msrc += mlinesize / 4;
+ asrc += alinesize / 4;
+ }
+}
+
+static void unpremultiplyf32offset(const uint8_t *mmsrc, const uint8_t *aasrc,
+ uint8_t *ddst,
+ ptrdiff_t mlinesize, ptrdiff_t alinesize,
+ ptrdiff_t dlinesize,
+ int w, int h,
+ int half, int max, int offset)
+{
+ const float *msrc = (const float *)mmsrc;
+ const float *asrc = (const float *)aasrc;
+
+ float *dst = (float *)ddst;
+ int x, y;
+
+ float offsetf = offset / 65535.0f;
+
+ for (y = 0; y < h; y++) {
+ for (x = 0; x < w; x++) {
+ if (asrc[x] > 0.0f)
+ dst[x] = (msrc[x] - offsetf) / asrc[x] + offsetf;
+ else
+ dst[x] = msrc[x];
+ }
+
+ dst += dlinesize / 4;
+ msrc += mlinesize / 4;
+ asrc += alinesize / 4;
+ }
+}
+
+static int premultiply_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+{
+ PreMultiplyContext *s = ctx->priv;
+ ThreadData *td = arg;
+ AVFrame *out = td->d;
+ AVFrame *alpha = td->a;
+ AVFrame *base = td->m;
+ int p;
+
+ for (p = 0; p < s->nb_planes; p++) {
+ const int slice_start = (s->height[p] * jobnr) / nb_jobs;
+ const int slice_end = (s->height[p] * (jobnr+1)) / nb_jobs;
+
+ if (!((1 << p) & s->planes) || p == 3) {
+ av_image_copy_plane(out->data[p] + slice_start * out->linesize[p],
+ out->linesize[p],
+ base->data[p] + slice_start * base->linesize[p],
+ base->linesize[p],
+ s->linesize[p], slice_end - slice_start);
+ continue;
+ }
+
+ s->premultiply[p](base->data[p] + slice_start * base->linesize[p],
+ s->inplace ? alpha->data[3] + slice_start * alpha->linesize[3] :
+ alpha->data[0] + slice_start * alpha->linesize[0],
+ out->data[p] + slice_start * out->linesize[p],
+ base->linesize[p], s->inplace ? alpha->linesize[3] : alpha->linesize[0],
+ out->linesize[p],
+ s->width[p], slice_end - slice_start,
+ s->half, s->inverse ? s->max : s->depth, s->offset);
+ }
+
+ return 0;
+}
+
static int filter_frame(AVFilterContext *ctx,
AVFrame **out, AVFrame *base, AVFrame *alpha)
{
if (!*out)
return AVERROR(ENOMEM);
} else {
- int p, full, limited;
+ ThreadData td;
+ int full, limited;
*out = ff_get_video_buffer(outlink, outlink->w, outlink->h);
if (!*out)
case AV_PIX_FMT_YUV444P10:
case AV_PIX_FMT_YUVA444P10:
case AV_PIX_FMT_YUV444P12:
+ case AV_PIX_FMT_YUVA444P12:
case AV_PIX_FMT_YUV444P14:
case AV_PIX_FMT_YUV444P16:
case AV_PIX_FMT_YUVA444P16:
case AV_PIX_FMT_GBRAP16:
s->premultiply[0] = s->premultiply[1] = s->premultiply[2] = limited ? unpremultiply16offset : unpremultiply16;
break;
+ case AV_PIX_FMT_GBRPF32:
+ case AV_PIX_FMT_GBRAPF32:
+ s->premultiply[0] = s->premultiply[1] = s->premultiply[2] = limited ? unpremultiplyf32offset : unpremultiplyf32;
+ break;
case AV_PIX_FMT_GRAY8:
s->premultiply[0] = limited ? unpremultiply8offset : unpremultiply8;
break;
case AV_PIX_FMT_GRAY9:
case AV_PIX_FMT_GRAY10:
case AV_PIX_FMT_GRAY12:
+ case AV_PIX_FMT_GRAY14:
case AV_PIX_FMT_GRAY16:
s->premultiply[0] = limited ? unpremultiply16offset : unpremultiply16;
break;
case AV_PIX_FMT_YUV444P10:
case AV_PIX_FMT_YUVA444P10:
case AV_PIX_FMT_YUV444P12:
+ case AV_PIX_FMT_YUVA444P12:
case AV_PIX_FMT_YUV444P14:
case AV_PIX_FMT_YUV444P16:
case AV_PIX_FMT_YUVA444P16:
case AV_PIX_FMT_GBRAP16:
s->premultiply[0] = s->premultiply[1] = s->premultiply[2] = limited ? premultiply16offset : premultiply16;
break;
+ case AV_PIX_FMT_GBRPF32:
+ case AV_PIX_FMT_GBRAPF32:
+ s->premultiply[0] = s->premultiply[1] = s->premultiply[2] = limited ? premultiplyf32offset: premultiplyf32;
+ break;
case AV_PIX_FMT_GRAY8:
s->premultiply[0] = limited ? premultiply8offset : premultiply8;
break;
case AV_PIX_FMT_GRAY9:
case AV_PIX_FMT_GRAY10:
case AV_PIX_FMT_GRAY12:
+ case AV_PIX_FMT_GRAY14:
case AV_PIX_FMT_GRAY16:
s->premultiply[0] = limited ? premultiply16offset : premultiply16;
break;
}
}
- for (p = 0; p < s->nb_planes; p++) {
- if (!((1 << p) & s->planes) || p == 3) {
- av_image_copy_plane((*out)->data[p], (*out)->linesize[p], base->data[p], base->linesize[p],
- s->linesize[p], s->height[p]);
- continue;
- }
-
- s->premultiply[p](base->data[p], s->inplace ? alpha->data[3] : alpha->data[0],
- (*out)->data[p],
- base->linesize[p], s->inplace ? alpha->linesize[3] : alpha->linesize[0],
- (*out)->linesize[p],
- s->width[p], s->height[p],
- s->half, s->inverse ? s->max : s->depth, s->offset);
- }
+ td.d = *out;
+ td.a = alpha;
+ td.m = base;
+ ctx->internal->execute(ctx, premultiply_slice, &td, NULL, FFMIN(s->height[0],
+ ff_filter_get_nb_threads(ctx)));
}
return 0;
s->width[1] = s->width[2] = AV_CEIL_RSHIFT(inlink->w, hsub);
s->width[0] = s->width[3] = inlink->w;
- s->depth = desc->comp[0].depth;
+ s->depth = desc->flags & AV_PIX_FMT_FLAG_FLOAT ? 16 : desc->comp[0].depth;
s->max = (1 << s->depth) - 1;
s->half = (1 << s->depth) / 2;
s->offset = 16 << (s->depth - 8);
int ret, status;
int64_t pts;
+ FF_FILTER_FORWARD_STATUS_BACK_ALL(ctx->outputs[0], ctx);
+
if ((ret = ff_inlink_consume_frame(ctx->inputs[0], &frame)) > 0) {
ret = filter_frame(ctx, &out, frame, frame);
av_frame_free(&frame);
s->inverse = 1;
pad.type = AVMEDIA_TYPE_VIDEO;
- pad.name = av_strdup("main");
+ pad.name = "main";
pad.config_props = config_input;
- if (!pad.name)
- return AVERROR(ENOMEM);
- if ((ret = ff_insert_inpad(ctx, 0, &pad)) < 0) {
- av_freep(&pad.name);
+ if ((ret = ff_insert_inpad(ctx, 0, &pad)) < 0)
return ret;
- }
if (!s->inplace) {
pad.type = AVMEDIA_TYPE_VIDEO;
- pad.name = av_strdup("alpha");
+ pad.name = "alpha";
pad.config_props = NULL;
- if (!pad.name)
- return AVERROR(ENOMEM);
- if ((ret = ff_insert_inpad(ctx, 1, &pad)) < 0) {
- av_freep(&pad.name);
+ if ((ret = ff_insert_inpad(ctx, 1, &pad)) < 0)
return ret;
- }
}
return 0;
.outputs = premultiply_outputs,
.priv_class = &premultiply_class,
.flags = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL |
- AVFILTER_FLAG_DYNAMIC_INPUTS,
+ AVFILTER_FLAG_DYNAMIC_INPUTS |
+ AVFILTER_FLAG_SLICE_THREADS,
};
#endif /* CONFIG_PREMULTIPLY_FILTER */
.outputs = premultiply_outputs,
.priv_class = &unpremultiply_class,
.flags = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL |
- AVFILTER_FLAG_DYNAMIC_INPUTS,
+ AVFILTER_FLAG_DYNAMIC_INPUTS |
+ AVFILTER_FLAG_SLICE_THREADS,
};
#endif /* CONFIG_UNPREMULTIPLY_FILTER */