From b877c1eae5d301a3ce36e0ba9b0b830a95e06ec8 Mon Sep 17 00:00:00 2001 From: "Steinar H. Gunderson" Date: Mon, 16 Oct 2017 22:30:57 +0200 Subject: [PATCH] Save some needless sign extending, and fix an escaping bug. --- coded.dat | Bin 408505 -> 408494 bytes rans.shader | 18 ++++++++++-------- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/coded.dat b/coded.dat index 3980407c36b3a939eda4e5c24eec4998427f9957..bfe60fb37829987db21a3e54e59eb651c1a2fa9e 100644 GIT binary patch delta 2938 zcmYM0c~DdL9>swG0YVaXl+_T0N8A$^)_}QWvyM#abYQsGt?- zqU>vKC@3PLQXfK)5U>a$B8ti)Np28XBtY^m+L_Lr@15UYch2X``7L)A*8z*`>ak`V zY*MIZw7GnRo+LG#LR(}y43&bfx{g@C`U7_nbq1v|FbGk;PO|7CW~?TfjpaAltPb3soi=O zMGtYoUJ?Sv!g#aXEB6`*M*zC*CgXK3qUMI``2`0*99?IK?O+% z0rC7}Wqa3$3Yl`#bHaoa1ZTh*NFNB@!cjmnp71@>Th>JD7A*&3)k4ggFf;1>Z_R@^ z*I{MTVL{6Ay1SUfl3N`uwyGMNi-o7Mqk)8$WU(xqy19#uZ@H09`L#URsXBFM&Hp}W zB!`vOy}US0&}dpbbr zYYB1q)?<3-Q`YFB(CV`X?2MXYKcmK~Ay<$fOV$cITX25h8ThECf}qPRLmq*1Q{pJ{ zjF6~bXfe)O)UEOZ+&EfH5$H;}ow-1Zsk+=3#ME;YY;&m_KfIu?JN%9G*S19SGw0_^ z%lB3FWknsrUCFAT?8Teg7*(x4E_Yvyycl0++Fo+xuhk2WMfM4r1xl52$F}#s*-eIL zJ+Z=JbF?xqj+HkT}|7Y3Fo*0mRr$W}OJ+_C`e z+Y=AW)+`@`uX`_1M*m!O7ar(XA&R|2yiq)WGdid|Tu|1Yb2L4W_QTtR7+Nq_N|((l z=+ofT#9LrusFUB3miwwPy&HUvTyr#|w*6wk9`eTd*B|8Qvs=hJv_!L7*%65^BtflL z!CJv(G1CV&iXVX?5ldzF#Z$AgGpKhz;nD||zCs}?CLMU;;TBXq4>RLBKX*OpUhY`o zk=dS#OZiv53twFe+ws@$1Jv)+5k9D_vP#246YbgXRH-TVdG?jecM0 zK5(YvJb7rzT^eD5Le`)cK)sJor$651-J@k5x&q`C)!YaBUjh@;z{E(`2>4D$mo6Cu zqgHLdG#7$0SJ+|xEe%AvR;PU7I&TgSR!LsbNVYP0wC9t(P<1#@hjT7y@M?q+W2?gn zDY+}6Y>rTuWf`G$18oi4mYkv!f1hOeb=h?#Z{t`=1Q{I(+o$&oXNXe!ES@mUMnpH_ zT7^=T$4UGjEez38FHrR4Jz>z8z^-3*HFTxHg!smz~ zpODz*)zq+W5>f2%O#%D(`!;)eu?jd7}<%55ux#ZK|9oF zFt@MUxNe2ALQel9z`6Qf_PN3kvXQ}^zu8ypO{r+oLtG3kB&SRDHd&}jmTdF*?%K1g zk5@)3cXyNN1;nsxlEYaA+0H}TB2&EvryEE6zKzYwnroSr4mJZ%QpA)>8%8+)7%)Vby!Z+)Q2D%J*|kS#IhtOM~6_eWVK#zfzVv89Z_3*(!YB8zwf@KQ}794=n^HmNki*cE*j}Up>;|{07So`f1nr`c8Ij z=NZ$ZGK>tf6+erntra$F5?WcU7p}*W1V$EUqNbJ{ zoO%k1xO5k^yPOqDl)t2%YIt~(i8cCuPCOvO4-2eu}1Sqb-oy%z$t;vO?z(okLW4xLrftq za%)NpN@F(sPygdnl*x?sFKAwYPXV~@-;RN(NcIswksC7yRQr#ELoX$>VwAtO7CbDT zRexeTz!y5{<7_3HHn?fY7hb6K2_Ez#QA7d_Ymmz}{CsRfO^0+*JMRb&{!6SeqvoVc z=FFKO@cw&SM?l#QJ9vP>Gsvfm`C;&}vV7iRUa!K&f^A`<&DJ;2V!G18%%MEj=F7*1 zXr-e;P`wy=o2i$0e zDnq`K+W@xiw50C{G}H5m;DXJU5F1kna4k+5Or1m8Gp5jx9<3{~*&IG^#UObUx6W6K zcty{z1ADtS`g%#c_BsuLsv7=r`GoUDfcr!tnQkK#`WcK?1KjjxKq9C*Nys~vu%Ese zjl(9#QEv87wM>JRWHI+vX~`(=3m6%af@Z9Z0H!W_&i1ml=d<>GX=_E zy*daJwuuRXt*@`C4<~Bsyz<1gVccrdo&8@4NC4VqmJvxxN7@XeA(s0oVAR>7JcK}0 zoa5^U4uP+>(YvOmlpxqg@4O$$Tcj2MMP;`LMLXevw+UrsU^pKQH<%$HZFjruM>!xo zDr?UkY?x8lKt%R4`E_ngju7@!j!ocf2K5=1qR4M$RR1E|qq6=JKRNHsG|doi0PAXn zE)A7jW|G5|>$#y|zZ_gtE3|YB*e1~PYVAk1P*aQ)+Dnqz8Nz!h^zwo(-uwJEAg#E9 zE7@Z-8;t*3bdOpq_jv~@pe|ZVRHaz_xx9R8X>5lf$DO_7hUY7ByGah?fITCf99oXo zn2g!Et5L9d?x-Mce<^ssSM5b7DM)dKNU$45)HE^FCu(ZyBamzZ6DT99#`Y<)cGAEOIIu=#pOQ?GxeO9SnqEczIvyBILkbF=5QFE zG?vW^Pdu2BkVpTW)!ftt_Vk0pqp!ht?fJXNVoY!oy1)Q?;Sja(lm+ZDhSxNsZVnGar! z%RwCi#?7A!kas4W$RiY$S3%8xKxpa`KVjV(86!+ggHjndGb@pqi5qxDjOr#{|FI83 pE4#Hf0S4hDPT$b&D<&{(q#dsIUM4 delta 2973 zcmX|C3piAH8^;*Tg)t+hCYR7g8RMFvZ42GhTu@PbQ!BL9noFAvsZ5vM3^S&dc8lFU zMdWs7Tq;sYyT!ybl5}y)3SnmcgF;3-^BwuV?|I($ch2*i^PKa0fB)b6zLJ^6cV-q> zIxpheZ58Ykj*!q%vBn=AR-DHFCcIYSv)SD-f#j{nzU%JqjJdPnRxVc4a%sCBaRSw+ zRW3VvznQp)Bxw>_;d+nkYmSRx$v!(o)@NRJ*nsoD@l$6Ov#5z;8qO6lZ$ORWogoc( zAvJz?}-0zFy}l8uyPZ}tFmH7UXj`4QZ@d$pjlA_%H}aGGP7@Jl1~b7rmuFOHyB zto|f>y^?9+>t{cSb5pzfUIHdul!*fSx4KLwsd6tZiE4g%bL_y$V+gI*J*0;{BSXzAzmH%9OYuc?z%hH1C0UBhR;8(sbCr%K4 z`P)7DP3UQ(`u0iDjXART&9D-HaWD?Y1f{j|t+;h)KXfc$f?eDCL^A834|Uc{9p^DL z=UeNo`~pONrWcB2`f1rA_XDT?s2R49xe0Sqazct}`|ir8p5a{Fo^bCa6r^4$Nr+h? z<;7Ijo0(M?lz$~MsqJ1-kTxWEHsVB-wX}{F(aNfw?56C0S!<;d`3fayb!3;LB{@Rn+wD>owpOw zt|zS@j+hlRw+NUq7X{Zs+2K};En5q<`F^S$!lN<7n`LF+-^B=I-x0Bwwg0qe*HVi; zKGFnFP2AjUH>GDknj43%k&98Sxlr{F*`qq3(^IrsEnEt>uLCH%DBf*ypd8UjNefbb zDdPWiEbG_9vHT0dIy>vn=b(sz{5(XPnJuaNx$>ZC=hZ~q>A^H3oU#i+v&N*5j2n^v z-1oTb5%e@hGTBnZGqZRiH`*nxkP#!Nul`AEaL6y~Qkv}3CR{GNBRrhCCFIl)BMM?< zs>EZH8s-=n8Ga`je*Hw;l#=zf=2)xb6?%b$-2$n3#14ngAQdUe#oF+8GD!!bmz)T`pHy3ieT?X!DrU+?F_RY1D& zdt3%Ox^7#YM920?5?#5=mTB80dF`W$$+Z3=hQz~<_kfW>DZ*}^$C5cCzQ$r+ z_XV97&cTXVu{UIM?BNKm>*gIhm@B=!orA@+FlK~g2hH8lO-9&EC$FKbW>Y;^u&Ha< zRG$DJ5z%hB$VO*~r3s4H-+0valP46hn0Tu(@ek^fWH!O)L2M>-_pjB4OqYCZi;qvj zg-aK{pSjpy8Xyb3Y#+-KHbn_83tmynqm%r)@_XA>#rccIl=YLHnA?)hX#=H>u!0@0 zz6REvYrZx7aR%sgZN?*+UT-7~1Ctbuc&xu3bg9~qDA+YM6q1JpUs}4FzBPQ2p&O>{ zM5&d5yz_Klll)~El!A*`?fPz1k9Hq+@}n^$bHV6ptu#Pal?i{cK@A7zziWFUx& zJQE*Aal{qQDw?R02J2CdmCYLUwqW_i)^heh^PlEfW_13l+w88tsc%Iy1P;Q_UsNd& z-5)EEgg;oSv7HYoi9}kWaPr6VYzW9ugRv3N(M&n-%0;c-l|czmccves{H*>8nFkWZdP@eA{e<_ zEOsA`5B-bdbz#DQ@~`1Hy^6^eZm_X~sbu3p$q|Z#QoRD_qOe_Q>q#-0>DFT{d>IEL z!Rl?!=q#QE{L6*;2O~1;w|LV_5DQ{??weZATK7!etp`<5Q;#8S(1 zrlIUMjV;NVM?e~4H3nYv4PiFc^NM}}c=6g2Jq(Rz!5~x@8Ern*g|Y1B+$w2(3YApf z%y_5@vOa28-guTK>f*WG|EVhG4*4;ZLNELp`@yjQq7SJ*0=(z04W8>1Upe4oDZ7#? zT|n`@R>iHl!{bHZ@f~K8gT98@;Q0X9F`hD-ANH!3E%OfJvVWx6$j`)1X>8#}hUZO9 zEXCA_UxfIFcENzwKmHk$G)lHv#WcaL#e99G;bWD3 zPD^mz)CW;3J}lBiv8gA`7j-5IXPz^6ic|8k(M{&pMccQ$24HQk;?Bs2SpeF^Pws73 zP1Apu(#r8PQ;$-!{>hbyAZojNQr@)KDuSD?5 zj~Gv%p$T6spSUQ$SIYjCZb)BRH7uW6V> 24); } -void encode_coeff(uint coeff, uint bits, uint sign_bias, inout RansEncoder enc) +int sign_extend(uint coeff, uint bits) +{ + return int(coeff << (32 - bits)) >> (32 - bits); +} + +void encode_coeff(int signed_k, uint sign_bias, inout RansEncoder enc) { - // Sign-extend to recover the coefficient. - // FIXME: not needed for the bits == 8 case! - int signed_k = int(coeff << (32 - bits)) >> (32 - bits); uint k = abs(signed_k); if (k >= ESCAPE_LIMIT) { // ... boring stuff here - RansEncPut(enc.rans, enc.rans_offset, k, 1, prob_bits); + RansEncPut(enc.rans, enc.rans_offset, k, 1, prob_bits - 1); k = ESCAPE_LIMIT; } @@ -129,8 +131,8 @@ void encode_9_7(uint streamgroup_num, uint coeff_row, layout(r16ui) restrict rea uint y = (streamgroup_num * BLOCKS_PER_STREAM + subblock_idx) / 160; uint f = imageLoad(tex, ivec2(x, y * 8 + coeff_row)).x; - encode_coeff(f & 0x1ffu, 9, sign_bias1, enc1); - encode_coeff(f >> 9, 7, sign_bias2, enc2); + encode_coeff(sign_extend(f & 0x1ffu, 9), sign_bias1, enc1); + encode_coeff(sign_extend(f >> 9, 7), sign_bias2, enc2); } encode_end(enc1); @@ -150,7 +152,7 @@ void encode_8(uint streamgroup_num, uint coeff_row, layout(r8i) restrict readonl uint y = (streamgroup_num * BLOCKS_PER_STREAM + subblock_idx) / 160; int f = imageLoad(tex, ivec2(x, y * 8 + coeff_row)).x; - encode_coeff(f, 8, sign_bias, enc); + encode_coeff(f, sign_bias, enc); } encode_end(enc); -- 2.39.2