X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=libswscale%2Foutput.c;h=e855ad606a0bbb51ab794b6b6c6b0b5fde47106f;hb=HEAD;hp=d7c53e60d9fb02769229072bde577233b1cc78a1;hpb=47e12966b75490cfa5fb8ed65a48a9a3d84a7bce;p=ffmpeg diff --git a/libswscale/output.c b/libswscale/output.c index d7c53e60d9f..e855ad606a0 100644 --- a/libswscale/output.c +++ b/libswscale/output.c @@ -30,6 +30,7 @@ #include "libavutil/cpu.h" #include "libavutil/intreadwrite.h" #include "libavutil/mathematics.h" +#include "libavutil/mem_internal.h" #include "libavutil/pixdesc.h" #include "config.h" #include "rgb2rgb.h" @@ -180,7 +181,8 @@ yuv2planeX_16_c_template(const int16_t *filter, int filterSize, } } -static void yuv2p016cX_c(SwsContext *c, const int16_t *chrFilter, int chrFilterSize, +static void yuv2p016cX_c(enum AVPixelFormat dstFormat, const uint8_t *chrDither, + const int16_t *chrFilter, int chrFilterSize, const int16_t **chrUSrc, const int16_t **chrVSrc, uint8_t *dest8, int chrDstW) { @@ -188,7 +190,7 @@ static void yuv2p016cX_c(SwsContext *c, const int16_t *chrFilter, int chrFilterS const int32_t **uSrc = (const int32_t **)chrUSrc; const int32_t **vSrc = (const int32_t **)chrVSrc; int shift = 15; - int big_endian = c->dstFormat == AV_PIX_FMT_P016BE; + int big_endian = dstFormat == AV_PIX_FMT_P016BE; int i, j; for (i = 0; i < chrDstW; i++) { @@ -402,15 +404,15 @@ static void yuv2plane1_8_c(const int16_t *src, uint8_t *dest, int dstW, } } -static void yuv2nv12cX_c(SwsContext *c, const int16_t *chrFilter, int chrFilterSize, - const int16_t **chrUSrc, const int16_t **chrVSrc, - uint8_t *dest, int chrDstW) +static void yuv2nv12cX_c(enum AVPixelFormat dstFormat, const uint8_t *chrDither, + const int16_t *chrFilter, int chrFilterSize, + const int16_t **chrUSrc, const int16_t **chrVSrc, + uint8_t *dest, int chrDstW) { - enum AVPixelFormat dstFormat = c->dstFormat; - const uint8_t *chrDither = c->chrDither8; int i; - if (dstFormat == AV_PIX_FMT_NV12) + if (dstFormat == AV_PIX_FMT_NV12 || + dstFormat == AV_PIX_FMT_NV24) for (i=0; idstFormat == AV_PIX_FMT_P010BE; + int big_endian = dstFormat == AV_PIX_FMT_P010BE; int i, j; for (i = 0; i < chrDstW; i++) { @@ -629,28 +632,28 @@ yuv2mono_2_c_template(SwsContext *c, const int16_t *buf[2], } c->dither_error[0][i] = err; } else { - for (i = 0; i < dstW; i += 8) { - int Y, acc = 0; - - Y = (buf0[i + 0] * yalpha1 + buf1[i + 0] * yalpha) >> 19; - accumulate_bit(acc, Y + d128[0]); - Y = (buf0[i + 1] * yalpha1 + buf1[i + 1] * yalpha) >> 19; - accumulate_bit(acc, Y + d128[1]); - Y = (buf0[i + 2] * yalpha1 + buf1[i + 2] * yalpha) >> 19; - accumulate_bit(acc, Y + d128[2]); - Y = (buf0[i + 3] * yalpha1 + buf1[i + 3] * yalpha) >> 19; - accumulate_bit(acc, Y + d128[3]); - Y = (buf0[i + 4] * yalpha1 + buf1[i + 4] * yalpha) >> 19; - accumulate_bit(acc, Y + d128[4]); - Y = (buf0[i + 5] * yalpha1 + buf1[i + 5] * yalpha) >> 19; - accumulate_bit(acc, Y + d128[5]); - Y = (buf0[i + 6] * yalpha1 + buf1[i + 6] * yalpha) >> 19; - accumulate_bit(acc, Y + d128[6]); - Y = (buf0[i + 7] * yalpha1 + buf1[i + 7] * yalpha) >> 19; - accumulate_bit(acc, Y + d128[7]); - - output_pixel(*dest++, acc); - } + for (i = 0; i < dstW; i += 8) { + int Y, acc = 0; + + Y = (buf0[i + 0] * yalpha1 + buf1[i + 0] * yalpha) >> 19; + accumulate_bit(acc, Y + d128[0]); + Y = (buf0[i + 1] * yalpha1 + buf1[i + 1] * yalpha) >> 19; + accumulate_bit(acc, Y + d128[1]); + Y = (buf0[i + 2] * yalpha1 + buf1[i + 2] * yalpha) >> 19; + accumulate_bit(acc, Y + d128[2]); + Y = (buf0[i + 3] * yalpha1 + buf1[i + 3] * yalpha) >> 19; + accumulate_bit(acc, Y + d128[3]); + Y = (buf0[i + 4] * yalpha1 + buf1[i + 4] * yalpha) >> 19; + accumulate_bit(acc, Y + d128[4]); + Y = (buf0[i + 5] * yalpha1 + buf1[i + 5] * yalpha) >> 19; + accumulate_bit(acc, Y + d128[5]); + Y = (buf0[i + 6] * yalpha1 + buf1[i + 6] * yalpha) >> 19; + accumulate_bit(acc, Y + d128[6]); + Y = (buf0[i + 7] * yalpha1 + buf1[i + 7] * yalpha) >> 19; + accumulate_bit(acc, Y + d128[7]); + + output_pixel(*dest++, acc); + } } } @@ -686,19 +689,19 @@ yuv2mono_1_c_template(SwsContext *c, const int16_t *buf0, } c->dither_error[0][i] = err; } else { - for (i = 0; i < dstW; i += 8) { - int acc = 0; - accumulate_bit(acc, ((buf0[i + 0] + 64) >> 7) + d128[0]); - accumulate_bit(acc, ((buf0[i + 1] + 64) >> 7) + d128[1]); - accumulate_bit(acc, ((buf0[i + 2] + 64) >> 7) + d128[2]); - accumulate_bit(acc, ((buf0[i + 3] + 64) >> 7) + d128[3]); - accumulate_bit(acc, ((buf0[i + 4] + 64) >> 7) + d128[4]); - accumulate_bit(acc, ((buf0[i + 5] + 64) >> 7) + d128[5]); - accumulate_bit(acc, ((buf0[i + 6] + 64) >> 7) + d128[6]); - accumulate_bit(acc, ((buf0[i + 7] + 64) >> 7) + d128[7]); - - output_pixel(*dest++, acc); - } + for (i = 0; i < dstW; i += 8) { + int acc = 0; + accumulate_bit(acc, ((buf0[i + 0] + 64) >> 7) + d128[0]); + accumulate_bit(acc, ((buf0[i + 1] + 64) >> 7) + d128[1]); + accumulate_bit(acc, ((buf0[i + 2] + 64) >> 7) + d128[2]); + accumulate_bit(acc, ((buf0[i + 3] + 64) >> 7) + d128[3]); + accumulate_bit(acc, ((buf0[i + 4] + 64) >> 7) + d128[4]); + accumulate_bit(acc, ((buf0[i + 5] + 64) >> 7) + d128[5]); + accumulate_bit(acc, ((buf0[i + 6] + 64) >> 7) + d128[6]); + accumulate_bit(acc, ((buf0[i + 7] + 64) >> 7) + d128[7]); + + output_pixel(*dest++, acc); + } } } @@ -853,11 +856,6 @@ yuv2422_1_c_template(SwsContext *c, const int16_t *buf0, V = av_clip_uint8(V); } - Y1 = av_clip_uint8(Y1); - Y2 = av_clip_uint8(Y2); - U = av_clip_uint8(U); - V = av_clip_uint8(V); - output_pixels(i * 4, Y1, U, Y2, V); } } else { @@ -875,11 +873,6 @@ yuv2422_1_c_template(SwsContext *c, const int16_t *buf0, V = av_clip_uint8(V); } - Y1 = av_clip_uint8(Y1); - Y2 = av_clip_uint8(Y2); - U = av_clip_uint8(U); - V = av_clip_uint8(V); - output_pixels(i * 4, Y1, U, Y2, V); } } @@ -913,25 +906,28 @@ yuv2ya16_X_c_template(SwsContext *c, const int16_t *lumFilter, for (i = 0; i < dstW; i++) { int j; - int Y = 1 << 18; - int64_t A = 0xffff<<14; + int Y = -0x40000000; + int A = 0xffff; for (j = 0; j < lumFilterSize; j++) Y += lumSrc[j][i] * lumFilter[j]; Y >>= 15; + Y += (1<<3) + 0x8000; Y = av_clip_uint16(Y); if (hasAlpha) { + A = -0x40000000 + (1<<14); for (j = 0; j < lumFilterSize; j++) A += alpSrc[j][i] * lumFilter[j]; A >>= 15; + A += 0x8000; A = av_clip_uint16(A); } output_pixel(&dest[2 * i ], Y); - output_pixel(&dest[2 * i + 1], hasAlpha ? A : 65535); + output_pixel(&dest[2 * i + 1], A); } } @@ -1608,6 +1604,13 @@ yuv2rgb_write(uint8_t *_dest, int i, int Y1, int Y2, dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1]; dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2]; + } else if (target == AV_PIX_FMT_X2RGB10) { + uint32_t *dest = (uint32_t *) _dest; + const uint32_t *r = (const uint32_t *) _r; + const uint32_t *g = (const uint32_t *) _g; + const uint32_t *b = (const uint32_t *) _b; + dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1]; + dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2]; } else /* 8/4 bits */ { uint8_t *dest = (uint8_t *) _dest; const uint8_t *r = (const uint8_t *) _r; @@ -1845,6 +1848,7 @@ YUV2RGBWRAPPER(yuv2rgb,, 12, AV_PIX_FMT_RGB444, 0) YUV2RGBWRAPPER(yuv2rgb,, 8, AV_PIX_FMT_RGB8, 0) YUV2RGBWRAPPER(yuv2rgb,, 4, AV_PIX_FMT_RGB4, 0) YUV2RGBWRAPPER(yuv2rgb,, 4b, AV_PIX_FMT_RGB4_BYTE, 0) +YUV2RGBWRAPPER(yuv2, rgb, x2rgb10, AV_PIX_FMT_X2RGB10, 0) static av_always_inline void yuv2rgb_write_full(SwsContext *c, uint8_t *dest, int i, int Y, int A, int U, int V, @@ -1856,9 +1860,9 @@ static av_always_inline void yuv2rgb_write_full(SwsContext *c, Y -= c->yuv2rgb_y_offset; Y *= c->yuv2rgb_y_coeff; Y += 1 << 21; - R = Y + V*c->yuv2rgb_v2r_coeff; - G = Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff; - B = Y + U*c->yuv2rgb_u2b_coeff; + R = (unsigned)Y + V*c->yuv2rgb_v2r_coeff; + G = (unsigned)Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff; + B = (unsigned)Y + U*c->yuv2rgb_u2b_coeff; if ((R | G | B) & 0xC0000000) { R = av_clip_uintp2(R, 30); G = av_clip_uintp2(G, 30); @@ -2099,7 +2103,7 @@ yuv2rgb_full_1_c_template(SwsContext *c, const int16_t *buf0, if (uvalpha < 2048) { int A = 0; //init to silence warning for (i = 0; i < dstW; i++) { - int Y = buf0[i] << 2; + int Y = buf0[i] * 4; int U = (ubuf0[i] - (128<<7)) * 4; int V = (vbuf0[i] - (128<<7)) * 4; @@ -2116,9 +2120,9 @@ yuv2rgb_full_1_c_template(SwsContext *c, const int16_t *buf0, const int16_t *ubuf1 = ubuf[1], *vbuf1 = vbuf[1]; int A = 0; //init to silence warning for (i = 0; i < dstW; i++) { - int Y = buf0[i] << 2; - int U = (ubuf0[i] + ubuf1[i] - (128<<8)) << 1; - int V = (vbuf0[i] + vbuf1[i] - (128<<8)) << 1; + int Y = buf0[i] * 4; + int U = (ubuf0[i] + ubuf1[i] - (128<<8)) * 2; + int V = (vbuf0[i] + vbuf1[i] - (128<<8)) * 2; if (hasAlpha) { A = (abuf0[i] + 64) >> 7; @@ -2284,7 +2288,7 @@ yuv2gbrp16_full_X_c(SwsContext *c, const int16_t *lumFilter, A = -0x40000000; for (j = 0; j < lumFilterSize; j++) - A += alpSrc[j][i] * lumFilter[j]; + A += alpSrc[j][i] * (unsigned)lumFilter[j]; A >>= 1; A += 0x20002000; @@ -2318,6 +2322,82 @@ yuv2gbrp16_full_X_c(SwsContext *c, const int16_t *lumFilter, } } +static void +yuv2gbrpf32_full_X_c(SwsContext *c, const int16_t *lumFilter, + const int16_t **lumSrcx, int lumFilterSize, + const int16_t *chrFilter, const int16_t **chrUSrcx, + const int16_t **chrVSrcx, int chrFilterSize, + const int16_t **alpSrcx, uint8_t **dest, + int dstW, int y) +{ + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->dstFormat); + int i; + int hasAlpha = (desc->flags & AV_PIX_FMT_FLAG_ALPHA) && alpSrcx; + uint32_t **dest32 = (uint32_t**)dest; + const int32_t **lumSrc = (const int32_t**)lumSrcx; + const int32_t **chrUSrc = (const int32_t**)chrUSrcx; + const int32_t **chrVSrc = (const int32_t**)chrVSrcx; + const int32_t **alpSrc = (const int32_t**)alpSrcx; + static const float float_mult = 1.0f / 65535.0f; + + for (i = 0; i < dstW; i++) { + int j; + int Y = -0x40000000; + int U = -(128 << 23); + int V = -(128 << 23); + int R, G, B, A; + + for (j = 0; j < lumFilterSize; j++) + Y += lumSrc[j][i] * (unsigned)lumFilter[j]; + + for (j = 0; j < chrFilterSize; j++) { + U += chrUSrc[j][i] * (unsigned)chrFilter[j]; + V += chrVSrc[j][i] * (unsigned)chrFilter[j]; + } + + Y >>= 14; + Y += 0x10000; + U >>= 14; + V >>= 14; + + if (hasAlpha) { + A = -0x40000000; + + for (j = 0; j < lumFilterSize; j++) + A += alpSrc[j][i] * (unsigned)lumFilter[j]; + + A >>= 1; + A += 0x20002000; + } + + Y -= c->yuv2rgb_y_offset; + Y *= c->yuv2rgb_y_coeff; + Y += 1 << 13; + R = V * c->yuv2rgb_v2r_coeff; + G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff; + B = U * c->yuv2rgb_u2b_coeff; + + R = av_clip_uintp2(Y + R, 30); + G = av_clip_uintp2(Y + G, 30); + B = av_clip_uintp2(Y + B, 30); + + dest32[0][i] = av_float2int(float_mult * (float)(G >> 14)); + dest32[1][i] = av_float2int(float_mult * (float)(B >> 14)); + dest32[2][i] = av_float2int(float_mult * (float)(R >> 14)); + if (hasAlpha) + dest32[3][i] = av_float2int(float_mult * (float)(av_clip_uintp2(A, 30) >> 14)); + } + if ((!isBE(c->dstFormat)) != (!HAVE_BIGENDIAN)) { + for (i = 0; i < dstW; i++) { + dest32[0][i] = av_bswap32(dest32[0][i]); + dest32[1][i] = av_bswap32(dest32[1][i]); + dest32[2][i] = av_bswap32(dest32[2][i]); + if (hasAlpha) + dest32[3][i] = av_bswap32(dest32[3][i]); + } + } +} + static void yuv2ya8_1_c(SwsContext *c, const int16_t *buf0, const int16_t *ubuf[2], const int16_t *vbuf[2], @@ -2506,7 +2586,8 @@ av_cold void ff_sws_init_output_funcs(SwsContext *c, } else { *yuv2plane1 = yuv2plane1_8_c; *yuv2planeX = yuv2planeX_8_c; - if (dstFormat == AV_PIX_FMT_NV12 || dstFormat == AV_PIX_FMT_NV21) + if (dstFormat == AV_PIX_FMT_NV12 || dstFormat == AV_PIX_FMT_NV21 || + dstFormat == AV_PIX_FMT_NV24 || dstFormat == AV_PIX_FMT_NV42) *yuv2nv12cX = yuv2nv12cX_c; } @@ -2721,6 +2802,12 @@ av_cold void ff_sws_init_output_funcs(SwsContext *c, case AV_PIX_FMT_GBRAP16LE: *yuv2anyX = yuv2gbrp16_full_X_c; break; + case AV_PIX_FMT_GBRPF32BE: + case AV_PIX_FMT_GBRPF32LE: + case AV_PIX_FMT_GBRAPF32BE: + case AV_PIX_FMT_GBRAPF32LE: + *yuv2anyX = yuv2gbrpf32_full_X_c; + break; } if (!*yuv2packedX && !*yuv2anyX) goto YUV_PACKED; @@ -2897,6 +2984,12 @@ av_cold void ff_sws_init_output_funcs(SwsContext *c, *yuv2packed2 = yuv2rgb4b_2_c; *yuv2packedX = yuv2rgb4b_X_c; break; + case AV_PIX_FMT_X2RGB10LE: + case AV_PIX_FMT_X2RGB10BE: + *yuv2packed1 = yuv2x2rgb10_1_c; + *yuv2packed2 = yuv2x2rgb10_2_c; + *yuv2packedX = yuv2x2rgb10_X_c; + break; } } switch (dstFormat) {