#include <stdio.h>
#include <string.h>
+#include "libavutil/attributes.h"
#include "libavutil/avutil.h"
#include "libavutil/bswap.h"
#include "libavutil/cpu.h"
}
}
-#define YUV2PACKED16WRAPPER(name, base, ext, fmt) \
-static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
- const int16_t **_lumSrc, int lumFilterSize, \
- const int16_t *chrFilter, const int16_t **_chrUSrc, \
- const int16_t **_chrVSrc, int chrFilterSize, \
- const int16_t **_alpSrc, uint8_t *_dest, int dstW, \
- int y) \
-{ \
- const int32_t **lumSrc = (const int32_t **) _lumSrc, \
- **chrUSrc = (const int32_t **) _chrUSrc, \
- **chrVSrc = (const int32_t **) _chrVSrc, \
- **alpSrc = (const int32_t **) _alpSrc; \
- uint16_t *dest = (uint16_t *) _dest; \
- name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
- chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
- alpSrc, dest, dstW, y, fmt); \
-} \
- \
-static void name ## ext ## _2_c(SwsContext *c, const int16_t *_buf[2], \
- const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
- const int16_t *_abuf[2], uint8_t *_dest, int dstW, \
- int yalpha, int uvalpha, int y) \
-{ \
- const int32_t **buf = (const int32_t **) _buf, \
- **ubuf = (const int32_t **) _ubuf, \
- **vbuf = (const int32_t **) _vbuf, \
- **abuf = (const int32_t **) _abuf; \
- uint16_t *dest = (uint16_t *) _dest; \
- name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
- dest, dstW, yalpha, uvalpha, y, fmt); \
-} \
- \
-static void name ## ext ## _1_c(SwsContext *c, const int16_t *_buf0, \
- const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
- const int16_t *_abuf0, uint8_t *_dest, int dstW, \
- int uvalpha, int y) \
-{ \
- const int32_t *buf0 = (const int32_t *) _buf0, \
- **ubuf = (const int32_t **) _ubuf, \
- **vbuf = (const int32_t **) _vbuf, \
- *abuf0 = (const int32_t *) _abuf0; \
- uint16_t *dest = (uint16_t *) _dest; \
- name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
- dstW, uvalpha, y, fmt); \
-}
-
+#define accumulate_bit(acc, val) \
+ acc <<= 1; \
+ acc |= (val) >= (128 + 110)
#define output_pixel(pos, acc) \
if (target == PIX_FMT_MONOBLACK) { \
pos = acc; \
int y, enum PixelFormat target)
{
const uint8_t * const d128=dither_8x8_220[y&7];
- uint8_t *g = c->table_gU[128] + c->table_gV[128];
int i;
unsigned acc = 0;
- for (i = 0; i < dstW - 1; i += 2) {
+ for (i = 0; i < dstW; i += 2) {
int j;
int Y1 = 1 << 18;
int Y2 = 1 << 18;
Y1 = av_clip_uint8(Y1);
Y2 = av_clip_uint8(Y2);
}
- acc += acc + g[Y1 + d128[(i + 0) & 7]];
- acc += acc + g[Y2 + d128[(i + 1) & 7]];
+ accumulate_bit(acc, Y1 + d128[(i + 0) & 7]);
+ accumulate_bit(acc, Y2 + d128[(i + 1) & 7]);
if ((i & 7) == 6) {
output_pixel(*dest++, acc);
}
}
+
+ if (i & 6) {
+ output_pixel(*dest, acc);
+ }
}
static av_always_inline void
{
const int16_t *buf0 = buf[0], *buf1 = buf[1];
const uint8_t * const d128 = dither_8x8_220[y & 7];
- uint8_t *g = c->table_gU[128] + c->table_gV[128];
- int yalpha1 = 4095 - yalpha;
+ int yalpha1 = 4096 - yalpha;
int i;
- for (i = 0; i < dstW - 7; i += 8) {
- int acc = g[((buf0[i ] * yalpha1 + buf1[i ] * yalpha) >> 19) + d128[0]];
- acc += acc + g[((buf0[i + 1] * yalpha1 + buf1[i + 1] * yalpha) >> 19) + d128[1]];
- acc += acc + g[((buf0[i + 2] * yalpha1 + buf1[i + 2] * yalpha) >> 19) + d128[2]];
- acc += acc + g[((buf0[i + 3] * yalpha1 + buf1[i + 3] * yalpha) >> 19) + d128[3]];
- acc += acc + g[((buf0[i + 4] * yalpha1 + buf1[i + 4] * yalpha) >> 19) + d128[4]];
- acc += acc + g[((buf0[i + 5] * yalpha1 + buf1[i + 5] * yalpha) >> 19) + d128[5]];
- acc += acc + g[((buf0[i + 6] * yalpha1 + buf1[i + 6] * yalpha) >> 19) + d128[6]];
- acc += acc + g[((buf0[i + 7] * yalpha1 + buf1[i + 7] * yalpha) >> 19) + d128[7]];
+ for (i = 0; i < dstW; i += 8) {
+ int Y, acc = 0;
+
+ Y = (buf0[i + 0] * yalpha1 + buf1[i + 0] * yalpha) >> 19;
+ accumulate_bit(acc, Y + d128[0]);
+ Y = (buf0[i + 1] * yalpha1 + buf1[i + 1] * yalpha) >> 19;
+ accumulate_bit(acc, Y + d128[1]);
+ Y = (buf0[i + 2] * yalpha1 + buf1[i + 2] * yalpha) >> 19;
+ accumulate_bit(acc, Y + d128[2]);
+ Y = (buf0[i + 3] * yalpha1 + buf1[i + 3] * yalpha) >> 19;
+ accumulate_bit(acc, Y + d128[3]);
+ Y = (buf0[i + 4] * yalpha1 + buf1[i + 4] * yalpha) >> 19;
+ accumulate_bit(acc, Y + d128[4]);
+ Y = (buf0[i + 5] * yalpha1 + buf1[i + 5] * yalpha) >> 19;
+ accumulate_bit(acc, Y + d128[5]);
+ Y = (buf0[i + 6] * yalpha1 + buf1[i + 6] * yalpha) >> 19;
+ accumulate_bit(acc, Y + d128[6]);
+ Y = (buf0[i + 7] * yalpha1 + buf1[i + 7] * yalpha) >> 19;
+ accumulate_bit(acc, Y + d128[7]);
+
output_pixel(*dest++, acc);
}
}
int uvalpha, int y, enum PixelFormat target)
{
const uint8_t * const d128 = dither_8x8_220[y & 7];
- uint8_t *g = c->table_gU[128] + c->table_gV[128];
int i;
- for (i = 0; i < dstW - 7; i += 8) {
- int acc = g[(buf0[i ] >> 7) + d128[0]];
- acc += acc + g[(buf0[i + 1] >> 7) + d128[1]];
- acc += acc + g[(buf0[i + 2] >> 7) + d128[2]];
- acc += acc + g[(buf0[i + 3] >> 7) + d128[3]];
- acc += acc + g[(buf0[i + 4] >> 7) + d128[4]];
- acc += acc + g[(buf0[i + 5] >> 7) + d128[5]];
- acc += acc + g[(buf0[i + 6] >> 7) + d128[6]];
- acc += acc + g[(buf0[i + 7] >> 7) + d128[7]];
+ for (i = 0; i < dstW; i += 8) {
+ int acc = 0;
+
+ accumulate_bit(acc, (buf0[i + 0] >> 7) + d128[0]);
+ accumulate_bit(acc, (buf0[i + 1] >> 7) + d128[1]);
+ accumulate_bit(acc, (buf0[i + 2] >> 7) + d128[2]);
+ accumulate_bit(acc, (buf0[i + 3] >> 7) + d128[3]);
+ accumulate_bit(acc, (buf0[i + 4] >> 7) + d128[4]);
+ accumulate_bit(acc, (buf0[i + 5] >> 7) + d128[5]);
+ accumulate_bit(acc, (buf0[i + 6] >> 7) + d128[6]);
+ accumulate_bit(acc, (buf0[i + 7] >> 7) + d128[7]);
+
output_pixel(*dest++, acc);
}
}
#undef output_pixel
+#undef accumulate_bit
#define YUV2PACKEDWRAPPER(name, base, ext, fmt) \
static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
{
int i;
- for (i = 0; i < (dstW >> 1); i++) {
+ for (i = 0; i < ((dstW + 1) >> 1); i++) {
int j;
int Y1 = 1 << 18;
int Y2 = 1 << 18;
const int16_t *buf0 = buf[0], *buf1 = buf[1],
*ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
*vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
- int yalpha1 = 4095 - yalpha;
- int uvalpha1 = 4095 - uvalpha;
+ int yalpha1 = 4096 - yalpha;
+ int uvalpha1 = 4096 - uvalpha;
int i;
- for (i = 0; i < (dstW >> 1); i++) {
+ for (i = 0; i < ((dstW + 1) >> 1); i++) {
int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19;
int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19;
int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;
int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;
+ Y1 = av_clip_uint8(Y1);
+ Y2 = av_clip_uint8(Y2);
+ U = av_clip_uint8(U);
+ V = av_clip_uint8(V);
+
output_pixels(i * 4, Y1, U, Y2, V);
}
}
int i;
if (uvalpha < 2048) {
- for (i = 0; i < (dstW >> 1); i++) {
+ for (i = 0; i < ((dstW + 1) >> 1); i++) {
int Y1 = buf0[i * 2] >> 7;
int Y2 = buf0[i * 2 + 1] >> 7;
int U = ubuf0[i] >> 7;
int V = vbuf0[i] >> 7;
+ Y1 = av_clip_uint8(Y1);
+ Y2 = av_clip_uint8(Y2);
+ U = av_clip_uint8(U);
+ V = av_clip_uint8(V);
+
output_pixels(i * 4, Y1, U, Y2, V);
}
} else {
const int16_t *ubuf1 = ubuf[1], *vbuf1 = vbuf[1];
- for (i = 0; i < (dstW >> 1); i++) {
+ for (i = 0; i < ((dstW + 1) >> 1); i++) {
int Y1 = buf0[i * 2] >> 7;
int Y2 = buf0[i * 2 + 1] >> 7;
int U = (ubuf0[i] + ubuf1[i]) >> 8;
int V = (vbuf0[i] + vbuf1[i]) >> 8;
+ Y1 = av_clip_uint8(Y1);
+ Y2 = av_clip_uint8(Y2);
+ U = av_clip_uint8(U);
+ V = av_clip_uint8(V);
+
output_pixels(i * 4, Y1, U, Y2, V);
}
}
{
int i;
- for (i = 0; i < (dstW >> 1); i++) {
+ for (i = 0; i < ((dstW + 1) >> 1); i++) {
int j;
int Y1 = -0x40000000;
int Y2 = -0x40000000;
const int32_t *buf0 = buf[0], *buf1 = buf[1],
*ubuf0 = ubuf[0], *ubuf1 = ubuf[1],
*vbuf0 = vbuf[0], *vbuf1 = vbuf[1];
- int yalpha1 = 4095 - yalpha;
- int uvalpha1 = 4095 - uvalpha;
+ int yalpha1 = 4096 - yalpha;
+ int uvalpha1 = 4096 - uvalpha;
int i;
- for (i = 0; i < (dstW >> 1); i++) {
+ for (i = 0; i < ((dstW + 1) >> 1); i++) {
int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 14;
int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 14;
int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha + (-128 << 23)) >> 14;
int i;
if (uvalpha < 2048) {
- for (i = 0; i < (dstW >> 1); i++) {
+ for (i = 0; i < ((dstW + 1) >> 1); i++) {
int Y1 = (buf0[i * 2] ) >> 2;
int Y2 = (buf0[i * 2 + 1]) >> 2;
int U = (ubuf0[i] + (-128 << 11)) >> 2;
}
} else {
const int32_t *ubuf1 = ubuf[1], *vbuf1 = vbuf[1];
- for (i = 0; i < (dstW >> 1); i++) {
+ for (i = 0; i < ((dstW + 1) >> 1); i++) {
int Y1 = (buf0[i * 2] ) >> 2;
int Y2 = (buf0[i * 2 + 1]) >> 2;
int U = (ubuf0[i] + ubuf1[i] + (-128 << 12)) >> 3;
#undef r_b
#undef b_r
+#define YUV2PACKED16WRAPPER(name, base, ext, fmt) \
+static void name ## ext ## _X_c(SwsContext *c, const int16_t *lumFilter, \
+ const int16_t **_lumSrc, int lumFilterSize, \
+ const int16_t *chrFilter, const int16_t **_chrUSrc, \
+ const int16_t **_chrVSrc, int chrFilterSize, \
+ const int16_t **_alpSrc, uint8_t *_dest, int dstW, \
+ int y) \
+{ \
+ const int32_t **lumSrc = (const int32_t **) _lumSrc, \
+ **chrUSrc = (const int32_t **) _chrUSrc, \
+ **chrVSrc = (const int32_t **) _chrVSrc, \
+ **alpSrc = (const int32_t **) _alpSrc; \
+ uint16_t *dest = (uint16_t *) _dest; \
+ name ## base ## _X_c_template(c, lumFilter, lumSrc, lumFilterSize, \
+ chrFilter, chrUSrc, chrVSrc, chrFilterSize, \
+ alpSrc, dest, dstW, y, fmt); \
+} \
+ \
+static void name ## ext ## _2_c(SwsContext *c, const int16_t *_buf[2], \
+ const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
+ const int16_t *_abuf[2], uint8_t *_dest, int dstW, \
+ int yalpha, int uvalpha, int y) \
+{ \
+ const int32_t **buf = (const int32_t **) _buf, \
+ **ubuf = (const int32_t **) _ubuf, \
+ **vbuf = (const int32_t **) _vbuf, \
+ **abuf = (const int32_t **) _abuf; \
+ uint16_t *dest = (uint16_t *) _dest; \
+ name ## base ## _2_c_template(c, buf, ubuf, vbuf, abuf, \
+ dest, dstW, yalpha, uvalpha, y, fmt); \
+} \
+ \
+static void name ## ext ## _1_c(SwsContext *c, const int16_t *_buf0, \
+ const int16_t *_ubuf[2], const int16_t *_vbuf[2], \
+ const int16_t *_abuf0, uint8_t *_dest, int dstW, \
+ int uvalpha, int y) \
+{ \
+ const int32_t *buf0 = (const int32_t *) _buf0, \
+ **ubuf = (const int32_t **) _ubuf, \
+ **vbuf = (const int32_t **) _vbuf, \
+ *abuf0 = (const int32_t *) _abuf0; \
+ uint16_t *dest = (uint16_t *) _dest; \
+ name ## base ## _1_c_template(c, buf0, ubuf, vbuf, abuf0, dest, \
+ dstW, uvalpha, y, fmt); \
+}
+
YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48be, PIX_FMT_RGB48BE)
YUV2PACKED16WRAPPER(yuv2, rgb48, rgb48le, PIX_FMT_RGB48LE)
YUV2PACKED16WRAPPER(yuv2, rgb48, bgr48be, PIX_FMT_BGR48BE)
{
int i;
- for (i = 0; i < (dstW >> 1); i++) {
- int j;
+ for (i = 0; i < ((dstW + 1) >> 1); i++) {
+ int j, A1, A2;
int Y1 = 1 << 18;
int Y2 = 1 << 18;
int U = 1 << 18;
int V = 1 << 18;
- int av_unused A1, A2;
const void *r, *g, *b;
for (j = 0; j < lumFilterSize; j++) {
*vbuf0 = vbuf[0], *vbuf1 = vbuf[1],
*abuf0 = hasAlpha ? abuf[0] : NULL,
*abuf1 = hasAlpha ? abuf[1] : NULL;
- int yalpha1 = 4095 - yalpha;
- int uvalpha1 = 4095 - uvalpha;
+ int yalpha1 = 4096 - yalpha;
+ int uvalpha1 = 4096 - uvalpha;
int i;
- for (i = 0; i < (dstW >> 1); i++) {
+ for (i = 0; i < ((dstW + 1) >> 1); i++) {
int Y1 = (buf0[i * 2] * yalpha1 + buf1[i * 2] * yalpha) >> 19;
int Y2 = (buf0[i * 2 + 1] * yalpha1 + buf1[i * 2 + 1] * yalpha) >> 19;
int U = (ubuf0[i] * uvalpha1 + ubuf1[i] * uvalpha) >> 19;
int V = (vbuf0[i] * uvalpha1 + vbuf1[i] * uvalpha) >> 19;
int A1, A2;
- const void *r = c->table_rV[V],
- *g = (c->table_gU[U] + c->table_gV[V]),
- *b = c->table_bU[U];
+ const void *r, *g, *b;
+
+ Y1 = av_clip_uint8(Y1);
+ Y2 = av_clip_uint8(Y2);
+ U = av_clip_uint8(U);
+ V = av_clip_uint8(V);
+
+ r = c->table_rV[V];
+ g = (c->table_gU[U] + c->table_gV[V]);
+ b = c->table_bU[U];
if (hasAlpha) {
A1 = (abuf0[i * 2 ] * yalpha1 + abuf1[i * 2 ] * yalpha) >> 19;
A2 = (abuf0[i * 2 + 1] * yalpha1 + abuf1[i * 2 + 1] * yalpha) >> 19;
+ A1 = av_clip_uint8(A1);
+ A2 = av_clip_uint8(A2);
}
yuv2rgb_write(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
int i;
if (uvalpha < 2048) {
- for (i = 0; i < (dstW >> 1); i++) {
+ for (i = 0; i < ((dstW + 1) >> 1); i++) {
int Y1 = buf0[i * 2] >> 7;
int Y2 = buf0[i * 2 + 1] >> 7;
int U = ubuf0[i] >> 7;
int V = vbuf0[i] >> 7;
int A1, A2;
- const void *r = c->table_rV[V],
- *g = (c->table_gU[U] + c->table_gV[V]),
- *b = c->table_bU[U];
+ const void *r, *g, *b;
+
+ Y1 = av_clip_uint8(Y1);
+ Y2 = av_clip_uint8(Y2);
+ U = av_clip_uint8(U);
+ V = av_clip_uint8(V);
+
+ r = c->table_rV[V];
+ g = (c->table_gU[U] + c->table_gV[V]);
+ b = c->table_bU[U];
if (hasAlpha) {
A1 = abuf0[i * 2 ] >> 7;
A2 = abuf0[i * 2 + 1] >> 7;
+ A1 = av_clip_uint8(A1);
+ A2 = av_clip_uint8(A2);
}
yuv2rgb_write(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
}
} else {
const int16_t *ubuf1 = ubuf[1], *vbuf1 = vbuf[1];
- for (i = 0; i < (dstW >> 1); i++) {
+ for (i = 0; i < ((dstW + 1) >> 1); i++) {
int Y1 = buf0[i * 2] >> 7;
int Y2 = buf0[i * 2 + 1] >> 7;
int U = (ubuf0[i] + ubuf1[i]) >> 8;
int V = (vbuf0[i] + vbuf1[i]) >> 8;
int A1, A2;
- const void *r = c->table_rV[V],
- *g = (c->table_gU[U] + c->table_gV[V]),
- *b = c->table_bU[U];
+ const void *r, *g, *b;
+
+ Y1 = av_clip_uint8(Y1);
+ Y2 = av_clip_uint8(Y2);
+ U = av_clip_uint8(U);
+ V = av_clip_uint8(V);
+
+ r = c->table_rV[V];
+ g = (c->table_gU[U] + c->table_gV[V]);
+ b = c->table_bU[U];
if (hasAlpha) {
A1 = abuf0[i * 2 ] >> 7;
A2 = abuf0[i * 2 + 1] >> 7;
+ A1 = av_clip_uint8(A1);
+ A2 = av_clip_uint8(A2);
}
yuv2rgb_write(dest, i, Y1, Y2, hasAlpha ? A1 : 0, hasAlpha ? A2 : 0,
int Y = 0;
int U = -128 << 19;
int V = -128 << 19;
- int av_unused A;
- int R, G, B;
+ int R, G, B, A;
for (j = 0; j < lumFilterSize; j++) {
Y += lumSrc[j][i] * lumFilter[j];
YUV2RGBWRAPPERX(yuv2, rgb_full, bgr24_full, PIX_FMT_BGR24, 0)
YUV2RGBWRAPPERX(yuv2, rgb_full, rgb24_full, PIX_FMT_RGB24, 0)
-void ff_sws_init_output_funcs(SwsContext *c,
- yuv2planar1_fn *yuv2plane1,
- yuv2planarX_fn *yuv2planeX,
- yuv2interleavedX_fn *yuv2nv12cX,
- yuv2packed1_fn *yuv2packed1,
- yuv2packed2_fn *yuv2packed2,
- yuv2packedX_fn *yuv2packedX)
+av_cold void ff_sws_init_output_funcs(SwsContext *c,
+ yuv2planar1_fn *yuv2plane1,
+ yuv2planarX_fn *yuv2planeX,
+ yuv2interleavedX_fn *yuv2nv12cX,
+ yuv2packed1_fn *yuv2packed1,
+ yuv2packed2_fn *yuv2packed2,
+ yuv2packedX_fn *yuv2packedX)
{
enum PixelFormat dstFormat = c->dstFormat;