+static int filter16_7x7(AVFilterContext *ctx, void *arg, int jobnr, int nb_jobs)
+{
+ ConvolutionContext *s = ctx->priv;
+ ThreadData *td = arg;
+ AVFrame *in = td->in;
+ AVFrame *out = td->out;
+ const int plane = td->plane;
+ const int peak = (1 << s->depth) - 1;
+ const int stride = in->linesize[plane] / 2;
+ const int bstride = s->bstride;
+ const int height = s->planeheight[plane];
+ const int width = s->planewidth[plane];
+ const int slice_start = (height * jobnr) / nb_jobs;
+ const int slice_end = (height * (jobnr+1)) / nb_jobs;
+ const uint16_t *src = (const uint16_t *)in->data[plane] + slice_start * stride;
+ uint16_t *dst = (uint16_t *)out->data[plane] + slice_start * (out->linesize[plane] / 2);
+ uint16_t *p0 = (uint16_t *)s->bptrs[jobnr] + 32;
+ uint16_t *p1 = p0 + bstride;
+ uint16_t *p2 = p1 + bstride;
+ uint16_t *p3 = p2 + bstride;
+ uint16_t *p4 = p3 + bstride;
+ uint16_t *p5 = p4 + bstride;
+ uint16_t *p6 = p5 + bstride;
+ uint16_t *orig = p0, *end = p6;
+ const int *matrix = s->matrix[plane];
+ float rdiv = s->rdiv[plane];
+ float bias = s->bias[plane];
+ int y, x, i;
+
+ line_copy16(p0, src + 3 * stride * (slice_start < 3 ? 1 : -1), width, 3);
+ line_copy16(p1, src + 2 * stride * (slice_start < 2 ? 1 : -1), width, 3);
+ line_copy16(p2, src + stride * (slice_start == 0 ? 1 : -1), width, 3);
+ line_copy16(p3, src, width, 3);
+ src += stride;
+ line_copy16(p4, src, width, 3);
+ src += stride;
+ line_copy16(p5, src, width, 3);
+
+ for (y = slice_start; y < slice_end; y++) {
+ uint16_t *array[] = {
+ p0 - 3, p0 - 2, p0 - 1, p0, p0 + 1, p0 + 2, p0 + 3,
+ p1 - 3, p1 - 2, p1 - 1, p1, p1 + 1, p1 + 2, p1 + 3,
+ p2 - 3, p2 - 2, p2 - 1, p2, p2 + 1, p2 + 2, p2 + 3,
+ p3 - 3, p3 - 2, p3 - 1, p3, p3 + 1, p3 + 2, p3 + 3,
+ p4 - 3, p4 - 2, p4 - 1, p4, p4 + 1, p4 + 2, p4 + 3,
+ p5 - 3, p5 - 2, p5 - 1, p5, p5 + 1, p5 + 2, p5 + 3,
+ p6 - 3, p6 - 2, p6 - 1, p6, p6 + 1, p6 + 2, p6 + 3,
+ };
+
+ src += stride * (y < height - 3 ? 1 : -1);
+ line_copy16(p6, src, width, 3);
+
+ for (x = 0; x < width; x++) {
+ int sum = 0;
+
+ for (i = 0; i < 25; i++) {
+ sum += *(array[i] + x) * matrix[i];
+ }
+ sum = (int)(sum * rdiv + bias + 0.5f);
+ dst[x] = av_clip(sum, 0, peak);
+ }
+
+ p0 = p1;
+ p1 = p2;
+ p2 = p3;
+ p3 = p4;
+ p4 = p5;
+ p5 = p6;
+ p6 = (p6 == end) ? orig: p6 + bstride;
+ dst += out->linesize[plane] / 2;
+ }
+
+ return 0;
+}
+