X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=libavfilter%2Fvf_premultiply.c;h=b5d0ca6d67d121c045909bb72d0979d6d040c420;hb=2606c48391377681541111263de41c79adeffa49;hp=df4f26578d114a75f784fe63ebef4e4c07ee604a;hpb=8e9be8ffba03d3327ca1aaf0b1a5f3e2458ea285;p=ffmpeg diff --git a/libavfilter/vf_premultiply.c b/libavfilter/vf_premultiply.c index df4f26578d1..b5d0ca6d67d 100644 --- a/libavfilter/vf_premultiply.c +++ b/libavfilter/vf_premultiply.c @@ -28,6 +28,10 @@ #include "internal.h" #include "video.h" +typedef struct ThreadData { + AVFrame *m, *a, *d; +} ThreadData; + typedef struct PreMultiplyContext { const AVClass *class; int width[4], height[4]; @@ -69,16 +73,16 @@ static int query_formats(AVFilterContext *ctx) AV_PIX_FMT_YUV444P12, AV_PIX_FMT_YUV444P14, AV_PIX_FMT_YUV444P16, AV_PIX_FMT_GBRP, AV_PIX_FMT_GBRP9, AV_PIX_FMT_GBRP10, - AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRP14, AV_PIX_FMT_GBRP16, - AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAY9, AV_PIX_FMT_GRAY10, AV_PIX_FMT_GRAY12, AV_PIX_FMT_GRAY16, + AV_PIX_FMT_GBRP12, AV_PIX_FMT_GBRP14, AV_PIX_FMT_GBRP16, AV_PIX_FMT_GBRPF32, + AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAY9, AV_PIX_FMT_GRAY10, AV_PIX_FMT_GRAY12, AV_PIX_FMT_GRAY14, AV_PIX_FMT_GRAY16, AV_PIX_FMT_NONE }; static const enum AVPixelFormat alpha_pix_fmts[] = { AV_PIX_FMT_YUVA444P, - AV_PIX_FMT_YUVA444P9, AV_PIX_FMT_YUVA444P10, AV_PIX_FMT_YUVA444P16, + AV_PIX_FMT_YUVA444P9, AV_PIX_FMT_YUVA444P10, AV_PIX_FMT_YUVA444P12, AV_PIX_FMT_YUVA444P16, AV_PIX_FMT_GBRAP, - AV_PIX_FMT_GBRAP10, AV_PIX_FMT_GBRAP12, AV_PIX_FMT_GBRAP16, + AV_PIX_FMT_GBRAP10, AV_PIX_FMT_GBRAP12, AV_PIX_FMT_GBRAP16, AV_PIX_FMT_GBRAPF32, AV_PIX_FMT_NONE }; @@ -182,7 +186,7 @@ static void premultiply16yuv(const uint8_t *mmsrc, const uint8_t *aasrc, for (y = 0; y < h; y++) { for (x = 0; x < w; x++) { - dst[x] = ((((msrc[x] - half) * (((asrc[x] >> 1) & 1) + asrc[x]))) >> shift) + half; + dst[x] = ((((msrc[x] - half) * (int64_t)(((asrc[x] >> 1) & 1) + asrc[x]))) >> shift) + half; } dst += dlinesize / 2; @@ -205,7 +209,7 @@ static void premultiply16offset(const uint8_t *mmsrc, const uint8_t *aasrc, for (y = 0; y < h; y++) { for (x = 0; x < w; x++) { - dst[x] = ((((msrc[x] - offset) * (((asrc[x] >> 1) & 1) + asrc[x])) + half) >> shift) + offset; + dst[x] = ((((msrc[x] - offset) * (int64_t)(((asrc[x] >> 1) & 1) + asrc[x])) + half) >> shift) + offset; } dst += dlinesize / 2; @@ -214,6 +218,54 @@ static void premultiply16offset(const uint8_t *mmsrc, const uint8_t *aasrc, } } +static void premultiplyf32(const uint8_t *mmsrc, const uint8_t *aasrc, + uint8_t *ddst, + ptrdiff_t mlinesize, ptrdiff_t alinesize, + ptrdiff_t dlinesize, + int w, int h, + int half, int shift, int offset) +{ + const float *msrc = (const float *)mmsrc; + const float *asrc = (const float *)aasrc; + float *dst = (float *)ddst; + int x, y; + + for (y = 0; y < h; y++) { + for (x = 0; x < w; x++) { + dst[x] = msrc[x] * asrc[x]; + } + + dst += dlinesize / 4; + msrc += mlinesize / 4; + asrc += alinesize / 4; + } +} + +static void premultiplyf32offset(const uint8_t *mmsrc, const uint8_t *aasrc, + uint8_t *ddst, + ptrdiff_t mlinesize, ptrdiff_t alinesize, + ptrdiff_t dlinesize, + int w, int h, + int half, int shift, int offset) +{ + const float *msrc = (const float *)mmsrc; + const float *asrc = (const float *)aasrc; + float *dst = (float *)ddst; + int x, y; + + float offsetf = offset / 65535.0f; + + for (y = 0; y < h; y++) { + for (x = 0; x < w; x++) { + dst[x] = ((msrc[x] - offsetf) * asrc[x]) + offsetf; + } + + dst += dlinesize / 4; + msrc += mlinesize / 4; + asrc += alinesize / 4; + } +} + static void unpremultiply8(const uint8_t *msrc, const uint8_t *asrc, uint8_t *dst, ptrdiff_t mlinesize, ptrdiff_t alinesize, @@ -272,7 +324,7 @@ static void unpremultiply8offset(const uint8_t *msrc, const uint8_t *asrc, for (y = 0; y < h; y++) { for (x = 0; x < w; x++) { if (asrc[x] > 0 && asrc[x] < 255) - dst[x] = FFMIN((msrc[x] - offset) * 255 / asrc[x] + offset, 255); + dst[x] = FFMIN(FFMAX(msrc[x] - offset, 0) * 255 / asrc[x] + offset, 255); else dst[x] = msrc[x]; } @@ -350,7 +402,7 @@ static void unpremultiply16offset(const uint8_t *mmsrc, const uint8_t *aasrc, for (y = 0; y < h; y++) { for (x = 0; x < w; x++) { if (asrc[x] > 0 && asrc[x] < max) - dst[x] = FFMAX(FFMIN((msrc[x] - offset) * (unsigned)max / asrc[x] + offset, max), 0); + dst[x] = FFMAX(FFMIN(FFMAX(msrc[x] - offset, 0) * (unsigned)max / asrc[x] + offset, max), 0); else dst[x] = msrc[x]; } @@ -361,6 +413,97 @@ static void unpremultiply16offset(const uint8_t *mmsrc, const uint8_t *aasrc, } } +static void unpremultiplyf32(const uint8_t *mmsrc, const uint8_t *aasrc, + uint8_t *ddst, + ptrdiff_t mlinesize, ptrdiff_t alinesize, + ptrdiff_t dlinesize, + int w, int h, + int half, int max, int offset) +{ + const float *msrc = (const float *)mmsrc; + const float *asrc = (const float *)aasrc; + + float *dst = (float *)ddst; + int x, y; + + for (y = 0; y < h; y++) { + for (x = 0; x < w; x++) { + if (asrc[x] > 0.0f) + dst[x] = msrc[x] / asrc[x]; + else + dst[x] = msrc[x]; + } + + dst += dlinesize / 4; + msrc += mlinesize / 4; + asrc += alinesize / 4; + } +} + +static void unpremultiplyf32offset(const uint8_t *mmsrc, const uint8_t *aasrc, + uint8_t *ddst, + ptrdiff_t mlinesize, ptrdiff_t alinesize, + ptrdiff_t dlinesize, + int w, int h, + int half, int max, int offset) +{ + const float *msrc = (const float *)mmsrc; + const float *asrc = (const float *)aasrc; + + float *dst = (float *)ddst; + int x, y; + + float offsetf = offset / 65535.0f; + + for (y = 0; y < h; y++) { + for (x = 0; x < w; x++) { + if (asrc[x] > 0.0f) + dst[x] = (msrc[x] - offsetf) / asrc[x] + offsetf; + else + dst[x] = msrc[x]; + } + + dst += dlinesize / 4; + msrc += mlinesize / 4; + asrc += alinesize / 4; + } +} + +static int premultiply_slice(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs) +{ + PreMultiplyContext *s = ctx->priv; + ThreadData *td = arg; + AVFrame *out = td->d; + AVFrame *alpha = td->a; + AVFrame *base = td->m; + int p; + + for (p = 0; p < s->nb_planes; p++) { + const int slice_start = (s->height[p] * jobnr) / nb_jobs; + const int slice_end = (s->height[p] * (jobnr+1)) / nb_jobs; + + if (!((1 << p) & s->planes) || p == 3) { + av_image_copy_plane(out->data[p] + slice_start * out->linesize[p], + out->linesize[p], + base->data[p] + slice_start * base->linesize[p], + base->linesize[p], + s->linesize[p], slice_end - slice_start); + continue; + } + + s->premultiply[p](base->data[p] + slice_start * base->linesize[p], + s->inplace ? alpha->data[3] + slice_start * alpha->linesize[3] : + alpha->data[0] + slice_start * alpha->linesize[0], + out->data[p] + slice_start * out->linesize[p], + base->linesize[p], s->inplace ? alpha->linesize[3] : alpha->linesize[0], + out->linesize[p], + s->width[p], slice_end - slice_start, + s->half, s->inverse ? s->max : s->depth, s->offset); + } + + return 0; +} + static int filter_frame(AVFilterContext *ctx, AVFrame **out, AVFrame *base, AVFrame *alpha) { @@ -372,7 +515,8 @@ static int filter_frame(AVFilterContext *ctx, if (!*out) return AVERROR(ENOMEM); } else { - int p, full, limited; + ThreadData td; + int full, limited; *out = ff_get_video_buffer(outlink, outlink->w, outlink->h); if (!*out) @@ -402,6 +546,7 @@ static int filter_frame(AVFilterContext *ctx, case AV_PIX_FMT_YUV444P10: case AV_PIX_FMT_YUVA444P10: case AV_PIX_FMT_YUV444P12: + case AV_PIX_FMT_YUVA444P12: case AV_PIX_FMT_YUV444P14: case AV_PIX_FMT_YUV444P16: case AV_PIX_FMT_YUVA444P16: @@ -418,12 +563,17 @@ static int filter_frame(AVFilterContext *ctx, case AV_PIX_FMT_GBRAP16: s->premultiply[0] = s->premultiply[1] = s->premultiply[2] = limited ? unpremultiply16offset : unpremultiply16; break; + case AV_PIX_FMT_GBRPF32: + case AV_PIX_FMT_GBRAPF32: + s->premultiply[0] = s->premultiply[1] = s->premultiply[2] = limited ? unpremultiplyf32offset : unpremultiplyf32; + break; case AV_PIX_FMT_GRAY8: s->premultiply[0] = limited ? unpremultiply8offset : unpremultiply8; break; case AV_PIX_FMT_GRAY9: case AV_PIX_FMT_GRAY10: case AV_PIX_FMT_GRAY12: + case AV_PIX_FMT_GRAY14: case AV_PIX_FMT_GRAY16: s->premultiply[0] = limited ? unpremultiply16offset : unpremultiply16; break; @@ -448,6 +598,7 @@ static int filter_frame(AVFilterContext *ctx, case AV_PIX_FMT_YUV444P10: case AV_PIX_FMT_YUVA444P10: case AV_PIX_FMT_YUV444P12: + case AV_PIX_FMT_YUVA444P12: case AV_PIX_FMT_YUV444P14: case AV_PIX_FMT_YUV444P16: case AV_PIX_FMT_YUVA444P16: @@ -464,32 +615,28 @@ static int filter_frame(AVFilterContext *ctx, case AV_PIX_FMT_GBRAP16: s->premultiply[0] = s->premultiply[1] = s->premultiply[2] = limited ? premultiply16offset : premultiply16; break; + case AV_PIX_FMT_GBRPF32: + case AV_PIX_FMT_GBRAPF32: + s->premultiply[0] = s->premultiply[1] = s->premultiply[2] = limited ? premultiplyf32offset: premultiplyf32; + break; case AV_PIX_FMT_GRAY8: s->premultiply[0] = limited ? premultiply8offset : premultiply8; break; case AV_PIX_FMT_GRAY9: case AV_PIX_FMT_GRAY10: case AV_PIX_FMT_GRAY12: + case AV_PIX_FMT_GRAY14: case AV_PIX_FMT_GRAY16: s->premultiply[0] = limited ? premultiply16offset : premultiply16; break; } } - for (p = 0; p < s->nb_planes; p++) { - if (!((1 << p) & s->planes) || p == 3) { - av_image_copy_plane((*out)->data[p], (*out)->linesize[p], base->data[p], base->linesize[p], - s->linesize[p], s->height[p]); - continue; - } - - s->premultiply[p](base->data[p], s->inplace ? alpha->data[3] : alpha->data[0], - (*out)->data[p], - base->linesize[p], s->inplace ? alpha->linesize[3] : alpha->linesize[0], - (*out)->linesize[p], - s->width[p], s->height[p], - s->half, s->inverse ? s->max : s->depth, s->offset); - } + td.d = *out; + td.a = alpha; + td.m = base; + ctx->internal->execute(ctx, premultiply_slice, &td, NULL, FFMIN(s->height[0], + ff_filter_get_nb_threads(ctx))); } return 0; @@ -534,7 +681,7 @@ static int config_input(AVFilterLink *inlink) s->width[1] = s->width[2] = AV_CEIL_RSHIFT(inlink->w, hsub); s->width[0] = s->width[3] = inlink->w; - s->depth = desc->comp[0].depth; + s->depth = desc->flags & AV_PIX_FMT_FLAG_FLOAT ? 16 : desc->comp[0].depth; s->max = (1 << s->depth) - 1; s->half = (1 << s->depth) / 2; s->offset = 16 << (s->depth - 8); @@ -606,6 +753,8 @@ static int activate(AVFilterContext *ctx) int ret, status; int64_t pts; + FF_FILTER_FORWARD_STATUS_BACK_ALL(ctx->outputs[0], ctx); + if ((ret = ff_inlink_consume_frame(ctx->inputs[0], &frame)) > 0) { ret = filter_frame(ctx, &out, frame, frame); av_frame_free(&frame); @@ -638,27 +787,19 @@ static av_cold int init(AVFilterContext *ctx) s->inverse = 1; pad.type = AVMEDIA_TYPE_VIDEO; - pad.name = av_strdup("main"); + pad.name = "main"; pad.config_props = config_input; - if (!pad.name) - return AVERROR(ENOMEM); - if ((ret = ff_insert_inpad(ctx, 0, &pad)) < 0) { - av_freep(&pad.name); + if ((ret = ff_insert_inpad(ctx, 0, &pad)) < 0) return ret; - } if (!s->inplace) { pad.type = AVMEDIA_TYPE_VIDEO; - pad.name = av_strdup("alpha"); + pad.name = "alpha"; pad.config_props = NULL; - if (!pad.name) - return AVERROR(ENOMEM); - if ((ret = ff_insert_inpad(ctx, 1, &pad)) < 0) { - av_freep(&pad.name); + if ((ret = ff_insert_inpad(ctx, 1, &pad)) < 0) return ret; - } } return 0; @@ -683,7 +824,7 @@ static const AVFilterPad premultiply_outputs[] = { #if CONFIG_PREMULTIPLY_FILTER -AVFilter ff_vf_premultiply = { +const AVFilter ff_vf_premultiply = { .name = "premultiply", .description = NULL_IF_CONFIG_SMALL("PreMultiply first stream with first plane of second stream."), .priv_size = sizeof(PreMultiplyContext), @@ -695,7 +836,8 @@ AVFilter ff_vf_premultiply = { .outputs = premultiply_outputs, .priv_class = &premultiply_class, .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL | - AVFILTER_FLAG_DYNAMIC_INPUTS, + AVFILTER_FLAG_DYNAMIC_INPUTS | + AVFILTER_FLAG_SLICE_THREADS, }; #endif /* CONFIG_PREMULTIPLY_FILTER */ @@ -705,7 +847,7 @@ AVFilter ff_vf_premultiply = { #define unpremultiply_options options AVFILTER_DEFINE_CLASS(unpremultiply); -AVFilter ff_vf_unpremultiply = { +const AVFilter ff_vf_unpremultiply = { .name = "unpremultiply", .description = NULL_IF_CONFIG_SMALL("UnPreMultiply first stream with first plane of second stream."), .priv_size = sizeof(PreMultiplyContext), @@ -717,7 +859,8 @@ AVFilter ff_vf_unpremultiply = { .outputs = premultiply_outputs, .priv_class = &unpremultiply_class, .flags = AVFILTER_FLAG_SUPPORT_TIMELINE_INTERNAL | - AVFILTER_FLAG_DYNAMIC_INPUTS, + AVFILTER_FLAG_DYNAMIC_INPUTS | + AVFILTER_FLAG_SLICE_THREADS, }; #endif /* CONFIG_UNPREMULTIPLY_FILTER */