X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=libswscale%2Foutput.c;h=4ef436e7e4cc0492a76559648fc435e50d7a9de3;hb=7c8ad72f1ce2ae747edd5c3f49bb052b1586c90f;hp=de8637aa3b43a0ff2adfbd66b0c56cb39fede403;hpb=23ce57af3ad684363881cdd66d5724f40963a65c;p=ffmpeg diff --git a/libswscale/output.c b/libswscale/output.c index de8637aa3b4..4ef436e7e4c 100644 --- a/libswscale/output.c +++ b/libswscale/output.c @@ -180,7 +180,8 @@ yuv2planeX_16_c_template(const int16_t *filter, int filterSize, } } -static void yuv2p016cX_c(SwsContext *c, const int16_t *chrFilter, int chrFilterSize, +static void yuv2p016cX_c(enum AVPixelFormat dstFormat, const uint8_t *chrDither, + const int16_t *chrFilter, int chrFilterSize, const int16_t **chrUSrc, const int16_t **chrVSrc, uint8_t *dest8, int chrDstW) { @@ -188,7 +189,7 @@ static void yuv2p016cX_c(SwsContext *c, const int16_t *chrFilter, int chrFilterS const int32_t **uSrc = (const int32_t **)chrUSrc; const int32_t **vSrc = (const int32_t **)chrVSrc; int shift = 15; - int big_endian = c->dstFormat == AV_PIX_FMT_P016BE; + int big_endian = dstFormat == AV_PIX_FMT_P016BE; int i, j; for (i = 0; i < chrDstW; i++) { @@ -402,15 +403,15 @@ static void yuv2plane1_8_c(const int16_t *src, uint8_t *dest, int dstW, } } -static void yuv2nv12cX_c(SwsContext *c, const int16_t *chrFilter, int chrFilterSize, - const int16_t **chrUSrc, const int16_t **chrVSrc, - uint8_t *dest, int chrDstW) +static void yuv2nv12cX_c(enum AVPixelFormat dstFormat, const uint8_t *chrDither, + const int16_t *chrFilter, int chrFilterSize, + const int16_t **chrUSrc, const int16_t **chrVSrc, + uint8_t *dest, int chrDstW) { - enum AVPixelFormat dstFormat = c->dstFormat; - const uint8_t *chrDither = c->chrDither8; int i; - if (dstFormat == AV_PIX_FMT_NV12) + if (dstFormat == AV_PIX_FMT_NV12 || + dstFormat == AV_PIX_FMT_NV24) for (i=0; idstFormat == AV_PIX_FMT_P010BE; + int big_endian = dstFormat == AV_PIX_FMT_P010BE; int i, j; for (i = 0; i < chrDstW; i++) { @@ -629,28 +631,28 @@ yuv2mono_2_c_template(SwsContext *c, const int16_t *buf[2], } c->dither_error[0][i] = err; } else { - for (i = 0; i < dstW; i += 8) { - int Y, acc = 0; - - Y = (buf0[i + 0] * yalpha1 + buf1[i + 0] * yalpha) >> 19; - accumulate_bit(acc, Y + d128[0]); - Y = (buf0[i + 1] * yalpha1 + buf1[i + 1] * yalpha) >> 19; - accumulate_bit(acc, Y + d128[1]); - Y = (buf0[i + 2] * yalpha1 + buf1[i + 2] * yalpha) >> 19; - accumulate_bit(acc, Y + d128[2]); - Y = (buf0[i + 3] * yalpha1 + buf1[i + 3] * yalpha) >> 19; - accumulate_bit(acc, Y + d128[3]); - Y = (buf0[i + 4] * yalpha1 + buf1[i + 4] * yalpha) >> 19; - accumulate_bit(acc, Y + d128[4]); - Y = (buf0[i + 5] * yalpha1 + buf1[i + 5] * yalpha) >> 19; - accumulate_bit(acc, Y + d128[5]); - Y = (buf0[i + 6] * yalpha1 + buf1[i + 6] * yalpha) >> 19; - accumulate_bit(acc, Y + d128[6]); - Y = (buf0[i + 7] * yalpha1 + buf1[i + 7] * yalpha) >> 19; - accumulate_bit(acc, Y + d128[7]); - - output_pixel(*dest++, acc); - } + for (i = 0; i < dstW; i += 8) { + int Y, acc = 0; + + Y = (buf0[i + 0] * yalpha1 + buf1[i + 0] * yalpha) >> 19; + accumulate_bit(acc, Y + d128[0]); + Y = (buf0[i + 1] * yalpha1 + buf1[i + 1] * yalpha) >> 19; + accumulate_bit(acc, Y + d128[1]); + Y = (buf0[i + 2] * yalpha1 + buf1[i + 2] * yalpha) >> 19; + accumulate_bit(acc, Y + d128[2]); + Y = (buf0[i + 3] * yalpha1 + buf1[i + 3] * yalpha) >> 19; + accumulate_bit(acc, Y + d128[3]); + Y = (buf0[i + 4] * yalpha1 + buf1[i + 4] * yalpha) >> 19; + accumulate_bit(acc, Y + d128[4]); + Y = (buf0[i + 5] * yalpha1 + buf1[i + 5] * yalpha) >> 19; + accumulate_bit(acc, Y + d128[5]); + Y = (buf0[i + 6] * yalpha1 + buf1[i + 6] * yalpha) >> 19; + accumulate_bit(acc, Y + d128[6]); + Y = (buf0[i + 7] * yalpha1 + buf1[i + 7] * yalpha) >> 19; + accumulate_bit(acc, Y + d128[7]); + + output_pixel(*dest++, acc); + } } } @@ -686,19 +688,19 @@ yuv2mono_1_c_template(SwsContext *c, const int16_t *buf0, } c->dither_error[0][i] = err; } else { - for (i = 0; i < dstW; i += 8) { - int acc = 0; - accumulate_bit(acc, ((buf0[i + 0] + 64) >> 7) + d128[0]); - accumulate_bit(acc, ((buf0[i + 1] + 64) >> 7) + d128[1]); - accumulate_bit(acc, ((buf0[i + 2] + 64) >> 7) + d128[2]); - accumulate_bit(acc, ((buf0[i + 3] + 64) >> 7) + d128[3]); - accumulate_bit(acc, ((buf0[i + 4] + 64) >> 7) + d128[4]); - accumulate_bit(acc, ((buf0[i + 5] + 64) >> 7) + d128[5]); - accumulate_bit(acc, ((buf0[i + 6] + 64) >> 7) + d128[6]); - accumulate_bit(acc, ((buf0[i + 7] + 64) >> 7) + d128[7]); - - output_pixel(*dest++, acc); - } + for (i = 0; i < dstW; i += 8) { + int acc = 0; + accumulate_bit(acc, ((buf0[i + 0] + 64) >> 7) + d128[0]); + accumulate_bit(acc, ((buf0[i + 1] + 64) >> 7) + d128[1]); + accumulate_bit(acc, ((buf0[i + 2] + 64) >> 7) + d128[2]); + accumulate_bit(acc, ((buf0[i + 3] + 64) >> 7) + d128[3]); + accumulate_bit(acc, ((buf0[i + 4] + 64) >> 7) + d128[4]); + accumulate_bit(acc, ((buf0[i + 5] + 64) >> 7) + d128[5]); + accumulate_bit(acc, ((buf0[i + 6] + 64) >> 7) + d128[6]); + accumulate_bit(acc, ((buf0[i + 7] + 64) >> 7) + d128[7]); + + output_pixel(*dest++, acc); + } } } @@ -853,11 +855,6 @@ yuv2422_1_c_template(SwsContext *c, const int16_t *buf0, V = av_clip_uint8(V); } - Y1 = av_clip_uint8(Y1); - Y2 = av_clip_uint8(Y2); - U = av_clip_uint8(U); - V = av_clip_uint8(V); - output_pixels(i * 4, Y1, U, Y2, V); } } else { @@ -875,11 +872,6 @@ yuv2422_1_c_template(SwsContext *c, const int16_t *buf0, V = av_clip_uint8(V); } - Y1 = av_clip_uint8(Y1); - Y2 = av_clip_uint8(Y2); - U = av_clip_uint8(U); - V = av_clip_uint8(V); - output_pixels(i * 4, Y1, U, Y2, V); } } @@ -900,6 +892,102 @@ YUV2PACKEDWRAPPER(yuv2, 422, uyvy422, AV_PIX_FMT_UYVY422) AV_WL16(pos, val); \ } +static av_always_inline void +yuv2ya16_X_c_template(SwsContext *c, const int16_t *lumFilter, + const int32_t **lumSrc, int lumFilterSize, + const int16_t *chrFilter, const int32_t **unused_chrUSrc, + const int32_t **unused_chrVSrc, int unused_chrFilterSize, + const int32_t **alpSrc, uint16_t *dest, int dstW, + int y, enum AVPixelFormat target, int unused_hasAlpha, int unused_eightbytes) +{ + int hasAlpha = !!alpSrc; + int i; + + for (i = 0; i < dstW; i++) { + int j; + int Y = -0x40000000; + int A = 0xffff; + + for (j = 0; j < lumFilterSize; j++) + Y += lumSrc[j][i] * lumFilter[j]; + + Y >>= 15; + Y += (1<<3) + 0x8000; + Y = av_clip_uint16(Y); + + if (hasAlpha) { + A = -0x40000000 + (1<<14); + for (j = 0; j < lumFilterSize; j++) + A += alpSrc[j][i] * lumFilter[j]; + + A >>= 15; + A += 0x8000; + A = av_clip_uint16(A); + } + + output_pixel(&dest[2 * i ], Y); + output_pixel(&dest[2 * i + 1], A); + } +} + +static av_always_inline void +yuv2ya16_2_c_template(SwsContext *c, const int32_t *buf[2], + const int32_t *unused_ubuf[2], const int32_t *unused_vbuf[2], + const int32_t *abuf[2], uint16_t *dest, int dstW, + int yalpha, int unused_uvalpha, int y, + enum AVPixelFormat target, int unused_hasAlpha, int unused_eightbytes) +{ + int hasAlpha = abuf && abuf[0] && abuf[1]; + const int32_t *buf0 = buf[0], *buf1 = buf[1], + *abuf0 = hasAlpha ? abuf[0] : NULL, + *abuf1 = hasAlpha ? abuf[1] : NULL; + int yalpha1 = 4096 - yalpha; + int i; + + av_assert2(yalpha <= 4096U); + + for (i = 0; i < dstW; i++) { + int Y = (buf0[i] * yalpha1 + buf1[i] * yalpha) >> 15; + int A; + + Y = av_clip_uint16(Y); + + if (hasAlpha) { + A = (abuf0[i] * yalpha1 + abuf1[i] * yalpha) >> 15; + A = av_clip_uint16(A); + } + + output_pixel(&dest[2 * i ], Y); + output_pixel(&dest[2 * i + 1], hasAlpha ? A : 65535); + } +} + +static av_always_inline void +yuv2ya16_1_c_template(SwsContext *c, const int32_t *buf0, + const int32_t *unused_ubuf[2], const int32_t *unused_vbuf[2], + const int32_t *abuf0, uint16_t *dest, int dstW, + int unused_uvalpha, int y, enum AVPixelFormat target, int unused_hasAlpha, int unused_eightbytes) +{ + int hasAlpha = !!abuf0; + int i; + + for (i = 0; i < dstW; i++) { + int Y = buf0[i] >> 3;/* 19 - 16 */ + int A; + + Y = av_clip_uint16(Y); + + if (hasAlpha) { + A = abuf0[i] >> 3; + if (A & 0x100) + A = av_clip_uint16(A); + } + + output_pixel(&dest[2 * i ], Y); + output_pixel(&dest[2 * i + 1], hasAlpha ? A : 65535); + } +} + static av_always_inline void yuv2rgba64_X_c_template(SwsContext *c, const int16_t *lumFilter, const int32_t **lumSrc, int lumFilterSize, @@ -1405,6 +1493,8 @@ YUV2PACKED16WRAPPER(yuv2, rgba64, bgra64be, AV_PIX_FMT_BGRA64BE, 1, 1) YUV2PACKED16WRAPPER(yuv2, rgba64, bgra64le, AV_PIX_FMT_BGRA64LE, 1, 1) YUV2PACKED16WRAPPER(yuv2, rgba64, bgrx64be, AV_PIX_FMT_BGRA64BE, 0, 1) YUV2PACKED16WRAPPER(yuv2, rgba64, bgrx64le, AV_PIX_FMT_BGRA64LE, 0, 1) +YUV2PACKED16WRAPPER(yuv2, ya16, ya16be, AV_PIX_FMT_YA16BE, 1, 0) +YUV2PACKED16WRAPPER(yuv2, ya16, ya16le, AV_PIX_FMT_YA16LE, 1, 0) YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgb48be_full, AV_PIX_FMT_RGB48BE, 0, 0) YUV2PACKED16WRAPPER(yuv2, rgba64_full, rgb48le_full, AV_PIX_FMT_RGB48LE, 0, 0) @@ -1513,6 +1603,13 @@ yuv2rgb_write(uint8_t *_dest, int i, int Y1, int Y2, dest[i * 2 + 0] = r[Y1 + dr1] + g[Y1 + dg1] + b[Y1 + db1]; dest[i * 2 + 1] = r[Y2 + dr2] + g[Y2 + dg2] + b[Y2 + db2]; + } else if (target == AV_PIX_FMT_X2RGB10) { + uint32_t *dest = (uint32_t *) _dest; + const uint32_t *r = (const uint32_t *) _r; + const uint32_t *g = (const uint32_t *) _g; + const uint32_t *b = (const uint32_t *) _b; + dest[i * 2 + 0] = r[Y1] + g[Y1] + b[Y1]; + dest[i * 2 + 1] = r[Y2] + g[Y2] + b[Y2]; } else /* 8/4 bits */ { uint8_t *dest = (uint8_t *) _dest; const uint8_t *r = (const uint8_t *) _r; @@ -1750,6 +1847,7 @@ YUV2RGBWRAPPER(yuv2rgb,, 12, AV_PIX_FMT_RGB444, 0) YUV2RGBWRAPPER(yuv2rgb,, 8, AV_PIX_FMT_RGB8, 0) YUV2RGBWRAPPER(yuv2rgb,, 4, AV_PIX_FMT_RGB4, 0) YUV2RGBWRAPPER(yuv2rgb,, 4b, AV_PIX_FMT_RGB4_BYTE, 0) +YUV2RGBWRAPPER(yuv2, rgb, x2rgb10, AV_PIX_FMT_X2RGB10, 0) static av_always_inline void yuv2rgb_write_full(SwsContext *c, uint8_t *dest, int i, int Y, int A, int U, int V, @@ -1761,9 +1859,9 @@ static av_always_inline void yuv2rgb_write_full(SwsContext *c, Y -= c->yuv2rgb_y_offset; Y *= c->yuv2rgb_y_coeff; Y += 1 << 21; - R = Y + V*c->yuv2rgb_v2r_coeff; - G = Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff; - B = Y + U*c->yuv2rgb_u2b_coeff; + R = (unsigned)Y + V*c->yuv2rgb_v2r_coeff; + G = (unsigned)Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff; + B = (unsigned)Y + U*c->yuv2rgb_u2b_coeff; if ((R | G | B) & 0xC0000000) { R = av_clip_uintp2(R, 30); G = av_clip_uintp2(G, 30); @@ -2004,7 +2102,7 @@ yuv2rgb_full_1_c_template(SwsContext *c, const int16_t *buf0, if (uvalpha < 2048) { int A = 0; //init to silence warning for (i = 0; i < dstW; i++) { - int Y = buf0[i] << 2; + int Y = buf0[i] * 4; int U = (ubuf0[i] - (128<<7)) * 4; int V = (vbuf0[i] - (128<<7)) * 4; @@ -2021,9 +2119,9 @@ yuv2rgb_full_1_c_template(SwsContext *c, const int16_t *buf0, const int16_t *ubuf1 = ubuf[1], *vbuf1 = vbuf[1]; int A = 0; //init to silence warning for (i = 0; i < dstW; i++) { - int Y = buf0[i] << 2; - int U = (ubuf0[i] + ubuf1[i] - (128<<8)) << 1; - int V = (vbuf0[i] + vbuf1[i] - (128<<8)) << 1; + int Y = buf0[i] * 4; + int U = (ubuf0[i] + ubuf1[i] - (128<<8)) * 2; + int V = (vbuf0[i] + vbuf1[i] - (128<<8)) * 2; if (hasAlpha) { A = (abuf0[i] + 64) >> 7; @@ -2189,7 +2287,7 @@ yuv2gbrp16_full_X_c(SwsContext *c, const int16_t *lumFilter, A = -0x40000000; for (j = 0; j < lumFilterSize; j++) - A += alpSrc[j][i] * lumFilter[j]; + A += alpSrc[j][i] * (unsigned)lumFilter[j]; A >>= 1; A += 0x20002000; @@ -2223,6 +2321,82 @@ yuv2gbrp16_full_X_c(SwsContext *c, const int16_t *lumFilter, } } +static void +yuv2gbrpf32_full_X_c(SwsContext *c, const int16_t *lumFilter, + const int16_t **lumSrcx, int lumFilterSize, + const int16_t *chrFilter, const int16_t **chrUSrcx, + const int16_t **chrVSrcx, int chrFilterSize, + const int16_t **alpSrcx, uint8_t **dest, + int dstW, int y) +{ + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->dstFormat); + int i; + int hasAlpha = (desc->flags & AV_PIX_FMT_FLAG_ALPHA) && alpSrcx; + uint32_t **dest32 = (uint32_t**)dest; + const int32_t **lumSrc = (const int32_t**)lumSrcx; + const int32_t **chrUSrc = (const int32_t**)chrUSrcx; + const int32_t **chrVSrc = (const int32_t**)chrVSrcx; + const int32_t **alpSrc = (const int32_t**)alpSrcx; + static const float float_mult = 1.0f / 65535.0f; + + for (i = 0; i < dstW; i++) { + int j; + int Y = -0x40000000; + int U = -(128 << 23); + int V = -(128 << 23); + int R, G, B, A; + + for (j = 0; j < lumFilterSize; j++) + Y += lumSrc[j][i] * (unsigned)lumFilter[j]; + + for (j = 0; j < chrFilterSize; j++) { + U += chrUSrc[j][i] * (unsigned)chrFilter[j]; + V += chrVSrc[j][i] * (unsigned)chrFilter[j]; + } + + Y >>= 14; + Y += 0x10000; + U >>= 14; + V >>= 14; + + if (hasAlpha) { + A = -0x40000000; + + for (j = 0; j < lumFilterSize; j++) + A += alpSrc[j][i] * (unsigned)lumFilter[j]; + + A >>= 1; + A += 0x20002000; + } + + Y -= c->yuv2rgb_y_offset; + Y *= c->yuv2rgb_y_coeff; + Y += 1 << 13; + R = V * c->yuv2rgb_v2r_coeff; + G = V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff; + B = U * c->yuv2rgb_u2b_coeff; + + R = av_clip_uintp2(Y + R, 30); + G = av_clip_uintp2(Y + G, 30); + B = av_clip_uintp2(Y + B, 30); + + dest32[0][i] = av_float2int(float_mult * (float)(G >> 14)); + dest32[1][i] = av_float2int(float_mult * (float)(B >> 14)); + dest32[2][i] = av_float2int(float_mult * (float)(R >> 14)); + if (hasAlpha) + dest32[3][i] = av_float2int(float_mult * (float)(av_clip_uintp2(A, 30) >> 14)); + } + if ((!isBE(c->dstFormat)) != (!HAVE_BIGENDIAN)) { + for (i = 0; i < dstW; i++) { + dest32[0][i] = av_bswap32(dest32[0][i]); + dest32[1][i] = av_bswap32(dest32[1][i]); + dest32[2][i] = av_bswap32(dest32[2][i]); + if (hasAlpha) + dest32[3][i] = av_bswap32(dest32[3][i]); + } + } +} + static void yuv2ya8_1_c(SwsContext *c, const int16_t *buf0, const int16_t *ubuf[2], const int16_t *vbuf[2], @@ -2411,7 +2585,8 @@ av_cold void ff_sws_init_output_funcs(SwsContext *c, } else { *yuv2plane1 = yuv2plane1_8_c; *yuv2planeX = yuv2planeX_8_c; - if (dstFormat == AV_PIX_FMT_NV12 || dstFormat == AV_PIX_FMT_NV21) + if (dstFormat == AV_PIX_FMT_NV12 || dstFormat == AV_PIX_FMT_NV21 || + dstFormat == AV_PIX_FMT_NV24 || dstFormat == AV_PIX_FMT_NV42) *yuv2nv12cX = yuv2nv12cX_c; } @@ -2626,6 +2801,12 @@ av_cold void ff_sws_init_output_funcs(SwsContext *c, case AV_PIX_FMT_GBRAP16LE: *yuv2anyX = yuv2gbrp16_full_X_c; break; + case AV_PIX_FMT_GBRPF32BE: + case AV_PIX_FMT_GBRPF32LE: + case AV_PIX_FMT_GBRAPF32BE: + case AV_PIX_FMT_GBRAPF32LE: + *yuv2anyX = yuv2gbrpf32_full_X_c; + break; } if (!*yuv2packedX && !*yuv2anyX) goto YUV_PACKED; @@ -2802,6 +2983,12 @@ av_cold void ff_sws_init_output_funcs(SwsContext *c, *yuv2packed2 = yuv2rgb4b_2_c; *yuv2packedX = yuv2rgb4b_X_c; break; + case AV_PIX_FMT_X2RGB10LE: + case AV_PIX_FMT_X2RGB10BE: + *yuv2packed1 = yuv2x2rgb10_1_c; + *yuv2packed2 = yuv2x2rgb10_2_c; + *yuv2packedX = yuv2x2rgb10_X_c; + break; } } switch (dstFormat) { @@ -2835,6 +3022,16 @@ av_cold void ff_sws_init_output_funcs(SwsContext *c, *yuv2packed2 = yuv2ya8_2_c; *yuv2packedX = yuv2ya8_X_c; break; + case AV_PIX_FMT_YA16LE: + *yuv2packed1 = yuv2ya16le_1_c; + *yuv2packed2 = yuv2ya16le_2_c; + *yuv2packedX = yuv2ya16le_X_c; + break; + case AV_PIX_FMT_YA16BE: + *yuv2packed1 = yuv2ya16be_1_c; + *yuv2packed2 = yuv2ya16be_2_c; + *yuv2packedX = yuv2ya16be_X_c; + break; case AV_PIX_FMT_AYUV64LE: *yuv2packedX = yuv2ayuv64le_X_c; break;