+static void filter16_kirsch(uint8_t *dstp, int width,
+ float scale, float delta, const int *const matrix,
+ const uint8_t *c[], int peak, int radius,
+ int dstride, int stride, int size)
+{
+ uint16_t *dst = (uint16_t *)dstp;
+ const uint16_t *c0 = (const uint16_t *)c[0], *c1 = (const uint16_t *)c[1], *c2 = (const uint16_t *)c[2];
+ const uint16_t *c3 = (const uint16_t *)c[3], *c5 = (const uint16_t *)c[5];
+ const uint16_t *c6 = (const uint16_t *)c[6], *c7 = (const uint16_t *)c[7], *c8 = (const uint16_t *)c[8];
+ int x;
+
+ for (x = 0; x < width; x++) {
+ int sum0 = c0[x] * 5 + c1[x] * 5 + c2[x] * 5 +
+ c3[x] * -3 + c5[x] * -3 +
+ c6[x] * -3 + c7[x] * -3 + c8[x] * -3;
+ int sum1 = c0[x] * -3 + c1[x] * 5 + c2[x] * 5 +
+ c3[x] * 5 + c5[x] * -3 +
+ c6[x] * -3 + c7[x] * -3 + c8[x] * -3;
+ int sum2 = c0[x] * -3 + c1[x] * -3 + c2[x] * 5 +
+ c3[x] * 5 + c5[x] * 5 +
+ c6[x] * -3 + c7[x] * -3 + c8[x] * -3;
+ int sum3 = c0[x] * -3 + c1[x] * -3 + c2[x] * -3 +
+ c3[x] * 5 + c5[x] * 5 +
+ c6[x] * 5 + c7[x] * -3 + c8[x] * -3;
+ int sum4 = c0[x] * -3 + c1[x] * -3 + c2[x] * -3 +
+ c3[x] * -3 + c5[x] * 5 +
+ c6[x] * 5 + c7[x] * 5 + c8[x] * -3;
+ int sum5 = c0[x] * -3 + c1[x] * -3 + c2[x] * -3 +
+ c3[x] * -3 + c5[x] * -3 +
+ c6[x] * 5 + c7[x] * 5 + c8[x] * 5;
+ int sum6 = c0[x] * 5 + c1[x] * -3 + c2[x] * -3 +
+ c3[x] * -3 + c5[x] * -3 +
+ c6[x] * -3 + c7[x] * 5 + c8[x] * 5;
+ int sum7 = c0[x] * 5 + c1[x] * 5 + c2[x] * -3 +
+ c3[x] * -3 + c5[x] * -3 +
+ c6[x] * -3 + c7[x] * -3 + c8[x] * 5;
+
+ sum0 = FFMAX(sum0, sum1);
+ sum2 = FFMAX(sum2, sum3);
+ sum4 = FFMAX(sum4, sum5);
+ sum6 = FFMAX(sum6, sum7);
+ sum0 = FFMAX(sum0, sum2);
+ sum4 = FFMAX(sum4, sum6);
+ sum0 = FFMAX(sum0, sum4);
+
+ dst[x] = av_clip(FFABS(sum0) * scale + delta, 0, peak);
+ }
+}
+