X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=libavcodec%2Fvp9dsp_template.c;h=c3273dd726bcc702810c88037b2aceb6dd9f8fc5;hb=ffae62d96c75d4a476eb3890357c4f3e4f8bd4f5;hp=bb54561a60b33bd601013ee6a8886e62b5ad8c4a;hpb=bec3b2041dcc20ab4b06a6b31d09130e1a7166c3;p=ffmpeg diff --git a/libavcodec/vp9dsp_template.c b/libavcodec/vp9dsp_template.c index bb54561a60b..c3273dd726b 100644 --- a/libavcodec/vp9dsp_template.c +++ b/libavcodec/vp9dsp_template.c @@ -1479,38 +1479,38 @@ itxfm_wrap(16, 6) static av_always_inline void idct32_1d(const dctcoef *in, ptrdiff_t stride, dctcoef *out, int pass) { - dctint t0a = ((IN(0) + IN(16)) * 11585 + (1 << 13)) >> 14; - dctint t1a = ((IN(0) - IN(16)) * 11585 + (1 << 13)) >> 14; - dctint t2a = (IN( 8) * 6270 - IN(24) * 15137 + (1 << 13)) >> 14; - dctint t3a = (IN( 8) * 15137 + IN(24) * 6270 + (1 << 13)) >> 14; - dctint t4a = (IN( 4) * 3196 - IN(28) * 16069 + (1 << 13)) >> 14; - dctint t7a = (IN( 4) * 16069 + IN(28) * 3196 + (1 << 13)) >> 14; - dctint t5a = (IN(20) * 13623 - IN(12) * 9102 + (1 << 13)) >> 14; - dctint t6a = (IN(20) * 9102 + IN(12) * 13623 + (1 << 13)) >> 14; - dctint t8a = (IN( 2) * 1606 - IN(30) * 16305 + (1 << 13)) >> 14; - dctint t15a = (IN( 2) * 16305 + IN(30) * 1606 + (1 << 13)) >> 14; - dctint t9a = (IN(18) * 12665 - IN(14) * 10394 + (1 << 13)) >> 14; - dctint t14a = (IN(18) * 10394 + IN(14) * 12665 + (1 << 13)) >> 14; - dctint t10a = (IN(10) * 7723 - IN(22) * 14449 + (1 << 13)) >> 14; - dctint t13a = (IN(10) * 14449 + IN(22) * 7723 + (1 << 13)) >> 14; - dctint t11a = (IN(26) * 15679 - IN( 6) * 4756 + (1 << 13)) >> 14; - dctint t12a = (IN(26) * 4756 + IN( 6) * 15679 + (1 << 13)) >> 14; - dctint t16a = (IN( 1) * 804 - IN(31) * 16364 + (1 << 13)) >> 14; - dctint t31a = (IN( 1) * 16364 + IN(31) * 804 + (1 << 13)) >> 14; - dctint t17a = (IN(17) * 12140 - IN(15) * 11003 + (1 << 13)) >> 14; - dctint t30a = (IN(17) * 11003 + IN(15) * 12140 + (1 << 13)) >> 14; - dctint t18a = (IN( 9) * 7005 - IN(23) * 14811 + (1 << 13)) >> 14; - dctint t29a = (IN( 9) * 14811 + IN(23) * 7005 + (1 << 13)) >> 14; - dctint t19a = (IN(25) * 15426 - IN( 7) * 5520 + (1 << 13)) >> 14; - dctint t28a = (IN(25) * 5520 + IN( 7) * 15426 + (1 << 13)) >> 14; - dctint t20a = (IN( 5) * 3981 - IN(27) * 15893 + (1 << 13)) >> 14; - dctint t27a = (IN( 5) * 15893 + IN(27) * 3981 + (1 << 13)) >> 14; - dctint t21a = (IN(21) * 14053 - IN(11) * 8423 + (1 << 13)) >> 14; - dctint t26a = (IN(21) * 8423 + IN(11) * 14053 + (1 << 13)) >> 14; - dctint t22a = (IN(13) * 9760 - IN(19) * 13160 + (1 << 13)) >> 14; - dctint t25a = (IN(13) * 13160 + IN(19) * 9760 + (1 << 13)) >> 14; - dctint t23a = (IN(29) * 16207 - IN( 3) * 2404 + (1 << 13)) >> 14; - dctint t24a = (IN(29) * 2404 + IN( 3) * 16207 + (1 << 13)) >> 14; + dctint t0a = (dctint)((IN(0) + IN(16)) * 11585U + (1 << 13)) >> 14; + dctint t1a = (dctint)((IN(0) - IN(16)) * 11585U + (1 << 13)) >> 14; + dctint t2a = (dctint)(IN( 8) * 6270U - IN(24) * 15137U + (1 << 13)) >> 14; + dctint t3a = (dctint)(IN( 8) * 15137U + IN(24) * 6270U + (1 << 13)) >> 14; + dctint t4a = (dctint)(IN( 4) * 3196U - IN(28) * 16069U + (1 << 13)) >> 14; + dctint t7a = (dctint)(IN( 4) * 16069U + IN(28) * 3196U + (1 << 13)) >> 14; + dctint t5a = (dctint)(IN(20) * 13623U - IN(12) * 9102U + (1 << 13)) >> 14; + dctint t6a = (dctint)(IN(20) * 9102U + IN(12) * 13623U + (1 << 13)) >> 14; + dctint t8a = (dctint)(IN( 2) * 1606U - IN(30) * 16305U + (1 << 13)) >> 14; + dctint t15a = (dctint)(IN( 2) * 16305U + IN(30) * 1606U + (1 << 13)) >> 14; + dctint t9a = (dctint)(IN(18) * 12665U - IN(14) * 10394U + (1 << 13)) >> 14; + dctint t14a = (dctint)(IN(18) * 10394U + IN(14) * 12665U + (1 << 13)) >> 14; + dctint t10a = (dctint)(IN(10) * 7723U - IN(22) * 14449U + (1 << 13)) >> 14; + dctint t13a = (dctint)(IN(10) * 14449U + IN(22) * 7723U + (1 << 13)) >> 14; + dctint t11a = (dctint)(IN(26) * 15679U - IN( 6) * 4756U + (1 << 13)) >> 14; + dctint t12a = (dctint)(IN(26) * 4756U + IN( 6) * 15679U + (1 << 13)) >> 14; + dctint t16a = (dctint)(IN( 1) * 804U - IN(31) * 16364U + (1 << 13)) >> 14; + dctint t31a = (dctint)(IN( 1) * 16364U + IN(31) * 804U + (1 << 13)) >> 14; + dctint t17a = (dctint)(IN(17) * 12140U - IN(15) * 11003U + (1 << 13)) >> 14; + dctint t30a = (dctint)(IN(17) * 11003U + IN(15) * 12140U + (1 << 13)) >> 14; + dctint t18a = (dctint)(IN( 9) * 7005U - IN(23) * 14811U + (1 << 13)) >> 14; + dctint t29a = (dctint)(IN( 9) * 14811U + IN(23) * 7005U + (1 << 13)) >> 14; + dctint t19a = (dctint)(IN(25) * 15426U - IN( 7) * 5520U + (1 << 13)) >> 14; + dctint t28a = (dctint)(IN(25) * 5520U + IN( 7) * 15426U + (1 << 13)) >> 14; + dctint t20a = (dctint)(IN( 5) * 3981U - IN(27) * 15893U + (1 << 13)) >> 14; + dctint t27a = (dctint)(IN( 5) * 15893U + IN(27) * 3981U + (1 << 13)) >> 14; + dctint t21a = (dctint)(IN(21) * 14053U - IN(11) * 8423U + (1 << 13)) >> 14; + dctint t26a = (dctint)(IN(21) * 8423U + IN(11) * 14053U + (1 << 13)) >> 14; + dctint t22a = (dctint)(IN(13) * 9760U - IN(19) * 13160U + (1 << 13)) >> 14; + dctint t25a = (dctint)(IN(13) * 13160U + IN(19) * 9760U + (1 << 13)) >> 14; + dctint t23a = (dctint)(IN(29) * 16207U - IN( 3) * 2404U + (1 << 13)) >> 14; + dctint t24a = (dctint)(IN(29) * 2404U + IN( 3) * 16207U + (1 << 13)) >> 14; dctint t0 = t0a + t3a; dctint t1 = t1a + t2a; @@ -1545,20 +1545,20 @@ static av_always_inline void idct32_1d(const dctcoef *in, ptrdiff_t stride, dctint t30 = t31a - t30a; dctint t31 = t31a + t30a; - t5a = ((t6 - t5) * 11585 + (1 << 13)) >> 14; - t6a = ((t6 + t5) * 11585 + (1 << 13)) >> 14; - t9a = ( t14 * 6270 - t9 * 15137 + (1 << 13)) >> 14; - t14a = ( t14 * 15137 + t9 * 6270 + (1 << 13)) >> 14; - t10a = (-(t13 * 15137 + t10 * 6270) + (1 << 13)) >> 14; - t13a = ( t13 * 6270 - t10 * 15137 + (1 << 13)) >> 14; - t17a = ( t30 * 3196 - t17 * 16069 + (1 << 13)) >> 14; - t30a = ( t30 * 16069 + t17 * 3196 + (1 << 13)) >> 14; - t18a = (-(t29 * 16069 + t18 * 3196) + (1 << 13)) >> 14; - t29a = ( t29 * 3196 - t18 * 16069 + (1 << 13)) >> 14; - t21a = ( t26 * 13623 - t21 * 9102 + (1 << 13)) >> 14; - t26a = ( t26 * 9102 + t21 * 13623 + (1 << 13)) >> 14; - t22a = (-(t25 * 9102 + t22 * 13623) + (1 << 13)) >> 14; - t25a = ( t25 * 13623 - t22 * 9102 + (1 << 13)) >> 14; + t5a = (dctint)((t6 - t5) * 11585U + (1 << 13)) >> 14; + t6a = (dctint)((t6 + t5) * 11585U + (1 << 13)) >> 14; + t9a = (dctint)( t14 * 6270U - t9 * 15137U + (1 << 13)) >> 14; + t14a = (dctint)( t14 * 15137U + t9 * 6270U + (1 << 13)) >> 14; + t10a = (dctint)(-(t13 * 15137U + t10 * 6270U) + (1 << 13)) >> 14; + t13a = (dctint)( t13 * 6270U - t10 * 15137U + (1 << 13)) >> 14; + t17a = (dctint)( t30 * 3196U - t17 * 16069U + (1 << 13)) >> 14; + t30a = (dctint)( t30 * 16069U + t17 * 3196U + (1 << 13)) >> 14; + t18a = (dctint)(-(t29 * 16069U + t18 * 3196U) + (1 << 13)) >> 14; + t29a = (dctint)( t29 * 3196U - t18 * 16069U + (1 << 13)) >> 14; + t21a = (dctint)( t26 * 13623U - t21 * 9102U + (1 << 13)) >> 14; + t26a = (dctint)( t26 * 9102U + t21 * 13623U + (1 << 13)) >> 14; + t22a = (dctint)(-(t25 * 9102U + t22 * 13623U) + (1 << 13)) >> 14; + t25a = (dctint)( t25 * 13623U - t22 * 9102U + (1 << 13)) >> 14; t0a = t0 + t7; t1a = t1 + t6a; @@ -1593,18 +1593,18 @@ static av_always_inline void idct32_1d(const dctcoef *in, ptrdiff_t stride, t30 = t30a + t29a; t31a = t31 + t28; - t10a = ((t13 - t10) * 11585 + (1 << 13)) >> 14; - t13a = ((t13 + t10) * 11585 + (1 << 13)) >> 14; - t11 = ((t12a - t11a) * 11585 + (1 << 13)) >> 14; - t12 = ((t12a + t11a) * 11585 + (1 << 13)) >> 14; - t18a = ( t29 * 6270 - t18 * 15137 + (1 << 13)) >> 14; - t29a = ( t29 * 15137 + t18 * 6270 + (1 << 13)) >> 14; - t19 = ( t28a * 6270 - t19a * 15137 + (1 << 13)) >> 14; - t28 = ( t28a * 15137 + t19a * 6270 + (1 << 13)) >> 14; - t20 = (-(t27a * 15137 + t20a * 6270) + (1 << 13)) >> 14; - t27 = ( t27a * 6270 - t20a * 15137 + (1 << 13)) >> 14; - t21a = (-(t26 * 15137 + t21 * 6270) + (1 << 13)) >> 14; - t26a = ( t26 * 6270 - t21 * 15137 + (1 << 13)) >> 14; + t10a = (dctint)((t13 - t10) * 11585U + (1 << 13)) >> 14; + t13a = (dctint)((t13 + t10) * 11585U + (1 << 13)) >> 14; + t11 = (dctint)((t12a - t11a) * 11585U + (1 << 13)) >> 14; + t12 = (dctint)((t12a + t11a) * 11585U + (1 << 13)) >> 14; + t18a = (dctint)( t29 * 6270U - t18 * 15137U + (1 << 13)) >> 14; + t29a = (dctint)( t29 * 15137U + t18 * 6270U + (1 << 13)) >> 14; + t19 = (dctint)( t28a * 6270U - t19a * 15137U + (1 << 13)) >> 14; + t28 = (dctint)( t28a * 15137U + t19a * 6270U + (1 << 13)) >> 14; + t20 = (dctint)(-(t27a * 15137U + t20a * 6270U) + (1 << 13)) >> 14; + t27 = (dctint)( t27a * 6270U - t20a * 15137U + (1 << 13)) >> 14; + t21a = (dctint)(-(t26 * 15137U + t21 * 6270U) + (1 << 13)) >> 14; + t26a = (dctint)( t26 * 6270U - t21 * 15137U + (1 << 13)) >> 14; t0 = t0a + t15a; t1 = t1a + t14; @@ -1639,14 +1639,14 @@ static av_always_inline void idct32_1d(const dctcoef *in, ptrdiff_t stride, t30a = t30 + t25; t31 = t31a + t24a; - t20 = ((t27a - t20a) * 11585 + (1 << 13)) >> 14; - t27 = ((t27a + t20a) * 11585 + (1 << 13)) >> 14; - t21a = ((t26 - t21 ) * 11585 + (1 << 13)) >> 14; - t26a = ((t26 + t21 ) * 11585 + (1 << 13)) >> 14; - t22 = ((t25a - t22a) * 11585 + (1 << 13)) >> 14; - t25 = ((t25a + t22a) * 11585 + (1 << 13)) >> 14; - t23a = ((t24 - t23 ) * 11585 + (1 << 13)) >> 14; - t24a = ((t24 + t23 ) * 11585 + (1 << 13)) >> 14; + t20 = (dctint)((t27a - t20a) * 11585U + (1 << 13)) >> 14; + t27 = (dctint)((t27a + t20a) * 11585U + (1 << 13)) >> 14; + t21a = (dctint)((t26 - t21 ) * 11585U + (1 << 13)) >> 14; + t26a = (dctint)((t26 + t21 ) * 11585U + (1 << 13)) >> 14; + t22 = (dctint)((t25a - t22a) * 11585U + (1 << 13)) >> 14; + t25 = (dctint)((t25a + t22a) * 11585U + (1 << 13)) >> 14; + t23a = (dctint)((t24 - t23 ) * 11585U + (1 << 13)) >> 14; + t24a = (dctint)((t24 + t23 ) * 11585U + (1 << 13)) >> 14; out[ 0] = t0 + t31; out[ 1] = t1 + t30a;