- ConvolutionContext *s = ctx->priv;
- ThreadData *td = arg;
- AVFrame *in = td->in;
- AVFrame *out = td->out;
- const int plane = td->plane;
- const int peak = (1 << s->depth) - 1;
- const int stride = in->linesize[plane] / 2;
- const int bstride = s->bstride;
- const int height = s->planeheight[plane];
- const int width = s->planewidth[plane];
- const int slice_start = (height * jobnr) / nb_jobs;
- const int slice_end = (height * (jobnr+1)) / nb_jobs;
- const uint16_t *src = (const uint16_t *)in->data[plane] + slice_start * stride;
- uint16_t *dst = (uint16_t *)out->data[plane] + slice_start * (out->linesize[plane] / 2);
- uint16_t *p0 = (uint16_t *)s->bptrs[jobnr] + 16;
- uint16_t *p1 = p0 + bstride;
- uint16_t *p2 = p1 + bstride;
- uint16_t *p3 = p2 + bstride;
- uint16_t *p4 = p3 + bstride;
- uint16_t *orig = p0, *end = p4;
- const int *matrix = s->matrix[plane];
- float rdiv = s->rdiv[plane];
- float bias = s->bias[plane];
- int y, x, i;
-
- line_copy16(p0, src + 2 * stride * (slice_start < 2 ? 1 : -1), width, 2);
- line_copy16(p1, src + stride * (slice_start == 0 ? 1 : -1), width, 2);
- line_copy16(p2, src, width, 2);
- src += stride;
- line_copy16(p3, src, width, 2);
-
- for (y = slice_start; y < slice_end; y++) {
- uint16_t *array[] = {
- p0 - 2, p0 - 1, p0, p0 + 1, p0 + 2,
- p1 - 2, p1 - 1, p1, p1 + 1, p1 + 2,
- p2 - 2, p2 - 1, p2, p2 + 1, p2 + 2,
- p3 - 2, p3 - 1, p3, p3 + 1, p3 + 2,
- p4 - 2, p4 - 1, p4, p4 + 1, p4 + 2
- };
-
- src += stride * (y < height - 2 ? 1 : -1);
- line_copy16(p4, src, width, 2);
-
- for (x = 0; x < width; x++) {
- int sum = 0;
-
- for (i = 0; i < 25; i++) {
- sum += *(array[i] + x) * matrix[i];
- }
- sum = (int)(sum * rdiv + bias + 0.5f);
- dst[x] = av_clip(sum, 0, peak);
- }