+#define r ((origin == AV_PIX_FMT_BGR48BE || origin == AV_PIX_FMT_BGR48LE || origin == AV_PIX_FMT_BGRA64BE || origin == AV_PIX_FMT_BGRA64LE) ? b_r : r_b)
+#define b ((origin == AV_PIX_FMT_BGR48BE || origin == AV_PIX_FMT_BGR48LE || origin == AV_PIX_FMT_BGRA64BE || origin == AV_PIX_FMT_BGRA64LE) ? r_b : b_r)
+
+static av_always_inline void
+rgb64ToY_c_template(uint16_t *dst, const uint16_t *src, int width,
+ enum AVPixelFormat origin, int32_t *rgb2yuv)
+{
+ int32_t ry = rgb2yuv[RY_IDX], gy = rgb2yuv[GY_IDX], by = rgb2yuv[BY_IDX];
+ int i;
+ for (i = 0; i < width; i++) {
+ unsigned int r_b = input_pixel(&src[i*4+0]);
+ unsigned int g = input_pixel(&src[i*4+1]);
+ unsigned int b_r = input_pixel(&src[i*4+2]);
+
+ dst[i] = (ry*r + gy*g + by*b + (0x2001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
+ }
+}
+
+static av_always_inline void
+rgb64ToUV_c_template(uint16_t *dstU, uint16_t *dstV,
+ const uint16_t *src1, const uint16_t *src2,
+ int width, enum AVPixelFormat origin, int32_t *rgb2yuv)
+{
+ int i;
+ int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX];
+ int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX];
+ av_assert1(src1==src2);
+ for (i = 0; i < width; i++) {
+ int r_b = input_pixel(&src1[i*4+0]);
+ int g = input_pixel(&src1[i*4+1]);
+ int b_r = input_pixel(&src1[i*4+2]);
+
+ dstU[i] = (ru*r + gu*g + bu*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
+ dstV[i] = (rv*r + gv*g + bv*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
+ }
+}
+
+static av_always_inline void
+rgb64ToUV_half_c_template(uint16_t *dstU, uint16_t *dstV,
+ const uint16_t *src1, const uint16_t *src2,
+ int width, enum AVPixelFormat origin, int32_t *rgb2yuv)
+{
+ int i;
+ int32_t ru = rgb2yuv[RU_IDX], gu = rgb2yuv[GU_IDX], bu = rgb2yuv[BU_IDX];
+ int32_t rv = rgb2yuv[RV_IDX], gv = rgb2yuv[GV_IDX], bv = rgb2yuv[BV_IDX];
+ av_assert1(src1==src2);
+ for (i = 0; i < width; i++) {
+ int r_b = (input_pixel(&src1[8 * i + 0]) + input_pixel(&src1[8 * i + 4]) + 1) >> 1;
+ int g = (input_pixel(&src1[8 * i + 1]) + input_pixel(&src1[8 * i + 5]) + 1) >> 1;
+ int b_r = (input_pixel(&src1[8 * i + 2]) + input_pixel(&src1[8 * i + 6]) + 1) >> 1;
+
+ dstU[i]= (ru*r + gu*g + bu*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
+ dstV[i]= (rv*r + gv*g + bv*b + (0x10001<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
+ }
+}
+
+#define rgb64funcs(pattern, BE_LE, origin) \
+static void pattern ## 64 ## BE_LE ## ToY_c(uint8_t *_dst, const uint8_t *_src, const uint8_t *unused0, const uint8_t *unused1,\
+ int width, uint32_t *rgb2yuv) \
+{ \
+ const uint16_t *src = (const uint16_t *) _src; \
+ uint16_t *dst = (uint16_t *) _dst; \
+ rgb64ToY_c_template(dst, src, width, origin, rgb2yuv); \
+} \
+ \
+static void pattern ## 64 ## BE_LE ## ToUV_c(uint8_t *_dstU, uint8_t *_dstV, \
+ const uint8_t *unused0, const uint8_t *_src1, const uint8_t *_src2, \
+ int width, uint32_t *rgb2yuv) \
+{ \
+ const uint16_t *src1 = (const uint16_t *) _src1, \
+ *src2 = (const uint16_t *) _src2; \
+ uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
+ rgb64ToUV_c_template(dstU, dstV, src1, src2, width, origin, rgb2yuv); \
+} \
+ \
+static void pattern ## 64 ## BE_LE ## ToUV_half_c(uint8_t *_dstU, uint8_t *_dstV, \
+ const uint8_t *unused0, const uint8_t *_src1, const uint8_t *_src2, \
+ int width, uint32_t *rgb2yuv) \
+{ \
+ const uint16_t *src1 = (const uint16_t *) _src1, \
+ *src2 = (const uint16_t *) _src2; \
+ uint16_t *dstU = (uint16_t *) _dstU, *dstV = (uint16_t *) _dstV; \
+ rgb64ToUV_half_c_template(dstU, dstV, src1, src2, width, origin, rgb2yuv); \
+}
+
+rgb64funcs(rgb, LE, AV_PIX_FMT_RGBA64LE)
+rgb64funcs(rgb, BE, AV_PIX_FMT_RGBA64BE)
+rgb64funcs(bgr, LE, AV_PIX_FMT_BGRA64LE)
+rgb64funcs(bgr, BE, AV_PIX_FMT_BGRA64BE)