+ dstU[i] = (ru * r + gu * g + bu * b + rnd) >> S;
+ dstV[i] = (rv * r + gv * g + bv * b + rnd) >> S;
+ }
+}
+
+static av_always_inline void
+rgb16_32ToUV_half_c_template(uint8_t *dstU, uint8_t *dstV,
+ const uint8_t *src, int width,
+ enum PixelFormat origin,
+ int shr, int shg, int shb, int shp,
+ int maskr, int maskg, int maskb,
+ int rsh, int gsh, int bsh, int S)
+{
+ const int ru = RU << rsh, gu = GU << gsh, bu = BU << bsh,
+ rv = RV << rsh, gv = GV << gsh, bv = BV << bsh,
+ rnd = 257 << S, maskgx = ~(maskr | maskb);
+ int i;
+
+ maskr |= maskr << 1; maskb |= maskb << 1; maskg |= maskg << 1;
+ for (i = 0; i < width; i++) {
+ int px0 = input_pixel(2 * i + 0) >> shp;
+ int px1 = input_pixel(2 * i + 1) >> shp;
+ int b, r, g = (px0 & maskgx) + (px1 & maskgx);
+ int rb = px0 + px1 - g;
+
+ b = (rb & maskb) >> shb;
+ if (shp || origin == PIX_FMT_BGR565LE || origin == PIX_FMT_BGR565BE ||
+ origin == PIX_FMT_RGB565LE || origin == PIX_FMT_RGB565BE) {
+ g >>= shg;
+ } else {
+ g = (g & maskg) >> shg;
+ }
+ r = (rb & maskr) >> shr;
+
+ dstU[i] = (ru * r + gu * g + bu * b + rnd) >> (S + 1);
+ dstV[i] = (rv * r + gv * g + bv * b + rnd) >> (S + 1);
+ }
+}
+
+#undef input_pixel
+
+#define rgb16_32_wrapper(fmt, name, shr, shg, shb, shp, maskr, \
+ maskg, maskb, rsh, gsh, bsh, S) \
+static void name ## ToY_c(uint8_t *dst, const uint8_t *src, \
+ int width, uint32_t *unused) \
+{ \
+ rgb16_32ToY_c_template(dst, src, width, fmt, shr, shg, shb, shp, \
+ maskr, maskg, maskb, rsh, gsh, bsh, S); \
+} \
+ \
+static void name ## ToUV_c(uint8_t *dstU, uint8_t *dstV, \
+ const uint8_t *src, const uint8_t *dummy, \
+ int width, uint32_t *unused) \
+{ \
+ rgb16_32ToUV_c_template(dstU, dstV, src, width, fmt, shr, shg, shb, shp, \
+ maskr, maskg, maskb, rsh, gsh, bsh, S); \
+} \
+ \
+static void name ## ToUV_half_c(uint8_t *dstU, uint8_t *dstV, \
+ const uint8_t *src, const uint8_t *dummy, \
+ int width, uint32_t *unused) \
+{ \
+ rgb16_32ToUV_half_c_template(dstU, dstV, src, width, fmt, shr, shg, shb, shp, \
+ maskr, maskg, maskb, rsh, gsh, bsh, S); \
+}
+
+rgb16_32_wrapper(PIX_FMT_BGR32, bgr32, 16, 0, 0, 0, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8);
+rgb16_32_wrapper(PIX_FMT_BGR32_1, bgr321, 16, 0, 0, 8, 0xFF0000, 0xFF00, 0x00FF, 8, 0, 8, RGB2YUV_SHIFT+8);
+rgb16_32_wrapper(PIX_FMT_RGB32, rgb32, 0, 0, 16, 0, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8);
+rgb16_32_wrapper(PIX_FMT_RGB32_1, rgb321, 0, 0, 16, 8, 0x00FF, 0xFF00, 0xFF0000, 8, 0, 8, RGB2YUV_SHIFT+8);
+rgb16_32_wrapper(PIX_FMT_BGR565LE, bgr16le, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8);
+rgb16_32_wrapper(PIX_FMT_BGR555LE, bgr15le, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7);
+rgb16_32_wrapper(PIX_FMT_RGB565LE, rgb16le, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8);
+rgb16_32_wrapper(PIX_FMT_RGB555LE, rgb15le, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7);
+rgb16_32_wrapper(PIX_FMT_BGR565BE, bgr16be, 0, 0, 0, 0, 0x001F, 0x07E0, 0xF800, 11, 5, 0, RGB2YUV_SHIFT+8);
+rgb16_32_wrapper(PIX_FMT_BGR555BE, bgr15be, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, 10, 5, 0, RGB2YUV_SHIFT+7);
+rgb16_32_wrapper(PIX_FMT_RGB565BE, rgb16be, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, 0, 5, 11, RGB2YUV_SHIFT+8);
+rgb16_32_wrapper(PIX_FMT_RGB555BE, rgb15be, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, 0, 5, 10, RGB2YUV_SHIFT+7);
+
+static void abgrToA_c(uint8_t *dst, const uint8_t *src, int width, uint32_t *unused)