git.sesse.net Git - pistorm/blob - softfloat/fyl2x.c

   1 /*============================================================================
   2 This source file is an extension to the SoftFloat IEC/IEEE Floating-point
   3 Arithmetic Package, Release 2b, written for Bochs (x86 achitecture simulator)
   4 floating point emulation.
   5 float_raise(float_flag_invalid)
   6 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
   7 been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
   8 RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
   9 AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
  10 COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
  11 EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
  12 INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
  13 OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
  14
  15 Derivative works are acceptable, even for commercial purposes, so long as
  16 (1) the source code for the derivative work includes prominent notice that
  17 the work is derivative, and (2) the source code includes prominent notice with
  18 these four paragraphs for those parts of this code that are retained.
  19 =============================================================================*/
  20
  21 /*============================================================================
  22  * Written for Bochs (x86 achitecture simulator) by
  23  *            Stanislav Shwartsman [sshwarts at sourceforge net]
  24  * Adapted for lib/softfloat in MESS by Hans Ostermeyer (03/2012)
  25  * ==========================================================================*/
  26
  27 #define FLOAT128
  28
  29 #define USE_estimateDiv128To64
  30
  31 #include "m68kcpu.h" // which includes softfloat.h after defining the basic types
  32
  33 //#include "softfloat-specialize"
  34 #include "fpu_constant.h"
  35
  36 #define floatx80_log10_2 packFloatx80(0, 0x3ffd, 0x9a209a84fbcff798U)
  37 #define floatx80_ln_2 packFloatx80(0, 0x3ffe, 0xb17217f7d1cf79acU)
  38 #define floatx80_one packFloatx80(0, 0x3fff, 0x8000000000000000U)
  39 #define floatx80_default_nan packFloatx80(0, 0xffff, 0xffffffffffffffffU)
  40
  41 #define packFloat_128(zHi, zLo) {(zHi), (zLo)}
  42 #define PACK_FLOAT_128(hi,lo) packFloat_128(LIT64(hi),LIT64(lo))
  43
  44 #define EXP_BIAS 0x3FFF
  45
  46 /*----------------------------------------------------------------------------
  47 | Returns the fraction bits of the extended double-precision floating-point
  48 | value `a'.
  49 *----------------------------------------------------------------------------*/
  50
  51 static inline bits64 extractFloatx80Frac( floatx80 a )
  52 {
  53         return a.low;
  54
  55 }
  56
  57 /*----------------------------------------------------------------------------
  58 | Returns the exponent bits of the extended double-precision floating-point
  59 | value `a'.
  60 *----------------------------------------------------------------------------*/
  61
  62 static inline int32 extractFloatx80Exp( floatx80 a )
  63 {
  64         return a.high & 0x7FFF;
  65
  66 }
  67
  68 /*----------------------------------------------------------------------------
  69 | Returns the sign bit of the extended double-precision floating-point value
  70 | `a'.
  71 *----------------------------------------------------------------------------*/
  72
  73 static inline flag extractFloatx80Sign( floatx80 a )
  74 {
  75         return a.high>>15;
  76
  77 }
  78
  79 #if 0
  80 /*----------------------------------------------------------------------------
  81 | Takes extended double-precision floating-point  NaN  `a' and returns the
  82 | appropriate NaN result. If `a' is a signaling NaN, the invalid exception
  83 | is raised.
  84 *----------------------------------------------------------------------------*/
  85
  86 static inline floatx80 propagateFloatx80NaNOneArg(floatx80 a)
  87 {
  88         if (floatx80_is_signaling_nan(a))
  89                 float_raise(float_flag_invalid);
  90
  91         a.low |= 0xC000000000000000U;
  92
  93         return a;
  94 }
  95 #endif
  96
  97 /*----------------------------------------------------------------------------
  98 | Normalizes the subnormal extended double-precision floating-point value
  99 | represented by the denormalized significand `aSig'.  The normalized exponent
 100 | and significand are stored at the locations pointed to by `zExpPtr' and
 101 | `zSigPtr', respectively.
 102 *----------------------------------------------------------------------------*/
 103
 104 static inline void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr, uint64_t *zSigPtr)
 105 {
 106         int shiftCount = countLeadingZeros64(aSig);
 107         *zSigPtr = aSig<<shiftCount;
 108         *zExpPtr = 1 - shiftCount;
 109 }
 110
 111
 112 /*----------------------------------------------------------------------------
 113 | Returns 1 if the extended double-precision floating-point value `a' is a
 114 | NaN; otherwise returns 0.
 115 *----------------------------------------------------------------------------*/
 116
 117 static inline int floatx80_is_nan(floatx80 a)
 118 {
 119         return ((a.high & 0x7FFF) == 0x7FFF) && (int64_t) (a.low<<1);
 120 }
 121
 122 /*----------------------------------------------------------------------------
 123 | Takes two extended double-precision floating-point values `a' and `b', one
 124 | of which is a NaN, and returns the appropriate NaN result.  If either `a' or
 125 | `b' is a signaling NaN, the invalid exception is raised.
 126 *----------------------------------------------------------------------------*/
 127
 128 static floatx80 propagateFloatx80NaN(floatx80 a, floatx80 b)
 129 {
 130         int aIsNaN = floatx80_is_nan(a);
 131         int aIsSignalingNaN = floatx80_is_signaling_nan(a);
 132         int bIsNaN = floatx80_is_nan(b);
 133         int bIsSignalingNaN = floatx80_is_signaling_nan(b);
 134         a.low |= 0xC000000000000000U;
 135         b.low |= 0xC000000000000000U;
 136         if (aIsSignalingNaN | bIsSignalingNaN) float_raise(float_flag_invalid);
 137         if (aIsSignalingNaN) {
 138                 if (bIsSignalingNaN) goto returnLargerSignificand;
 139                 return bIsNaN ? b : a;
 140         }
 141         else if (aIsNaN) {
 142                 if (bIsSignalingNaN | ! bIsNaN) return a;
 143         returnLargerSignificand:
 144                 if (a.low < b.low) return b;
 145                 if (b.low < a.low) return a;
 146                 return (a.high < b.high) ? a : b;
 147         }
 148         else {
 149                 return b;
 150         }
 151 }
 152
 153 static const float128 float128_one =
 154         packFloat_128(0x3fff000000000000U, 0x0000000000000000U);
 155 static const float128 float128_two =
 156         packFloat_128(0x4000000000000000U, 0x0000000000000000U);
 157
 158 static const float128 float128_ln2inv2 =
 159         packFloat_128(0x400071547652b82fU, 0xe1777d0ffda0d23aU);
 160
 161 #define SQRT2_HALF_SIG  0xb504f333f9de6484U
 162
 163 extern float128 OddPoly(float128 x, float128 *arr, unsigned n);
 164
 165 #define L2_ARR_SIZE 9
 166
 167 static float128 ln_arr[L2_ARR_SIZE] =
 168 {
 169         PACK_FLOAT_128(0x3fff000000000000, 0x0000000000000000), /*  1 */
 170         PACK_FLOAT_128(0x3ffd555555555555, 0x5555555555555555), /*  3 */
 171         PACK_FLOAT_128(0x3ffc999999999999, 0x999999999999999a), /*  5 */
 172         PACK_FLOAT_128(0x3ffc249249249249, 0x2492492492492492), /*  7 */
 173         PACK_FLOAT_128(0x3ffbc71c71c71c71, 0xc71c71c71c71c71c), /*  9 */
 174         PACK_FLOAT_128(0x3ffb745d1745d174, 0x5d1745d1745d1746), /* 11 */
 175         PACK_FLOAT_128(0x3ffb3b13b13b13b1, 0x3b13b13b13b13b14), /* 13 */
 176         PACK_FLOAT_128(0x3ffb111111111111, 0x1111111111111111), /* 15 */
 177         PACK_FLOAT_128(0x3ffae1e1e1e1e1e1, 0xe1e1e1e1e1e1e1e2)  /* 17 */
 178 };
 179
 180 static float128 poly_ln(float128 x1)
 181 {
 182 /*
 183     //
 184     //                     3     5     7     9     11     13     15
 185     //        1+u         u     u     u     u     u      u      u
 186     // 1/2 ln ---  ~ u + --- + --- + --- + --- + ---- + ---- + ---- =
 187     //        1-u         3     5     7     9     11     13     15
 188     //
 189     //                     2     4     6     8     10     12     14
 190     //                    u     u     u     u     u      u      u
 191     //       = u * [ 1 + --- + --- + --- + --- + ---- + ---- + ---- ] =
 192     //                    3     5     7     9     11     13     15
 193     //
 194     //           3                          3
 195     //          --       4k                --        4k+2
 196     //   p(u) = >  C  * u           q(u) = >  C   * u
 197     //          --  2k                     --  2k+1
 198     //          k=0                        k=0
 199     //
 200     //          1+u                 2
 201     //   1/2 ln --- ~ u * [ p(u) + u * q(u) ]
 202     //          1-u
 203     //
 204 */
 205         return OddPoly(x1, ln_arr, L2_ARR_SIZE);
 206 }
 207
 208 /* required sqrt(2)/2 < x < sqrt(2) */
 209 static float128 poly_l2(float128 x)
 210 {
 211         /* using float128 for approximation */
 212         float128 x_p1 = float128_add(x, float128_one);
 213         float128 x_m1 = float128_sub(x, float128_one);
 214         x = float128_div(x_m1, x_p1);
 215         x = poly_ln(x);
 216         x = float128_mul(x, float128_ln2inv2);
 217         return x;
 218 }
 219
 220 static float128 poly_l2p1(float128 x)
 221 {
 222         /* using float128 for approximation */
 223         float128 x_p2 = float128_add(x, float128_two);
 224         x = float128_div(x, x_p2);
 225         x = poly_ln(x);
 226         x = float128_mul(x, float128_ln2inv2);
 227         return x;
 228 }
 229
 230 // =================================================
 231 // FYL2X                   Compute y * log (x)
 232 //                                        2
 233 // =================================================
 234
 235 //
 236 // Uses the following identities:
 237 //
 238 // 1. ----------------------------------------------------------
 239 //              ln(x)
 240 //   log (x) = -------,  ln (x*y) = ln(x) + ln(y)
 241 //      2       ln(2)
 242 //
 243 // 2. ----------------------------------------------------------
 244 //                1+u             x-1
 245 //   ln (x) = ln -----, when u = -----
 246 //                1-u             x+1
 247 //
 248 // 3. ----------------------------------------------------------
 249 //                        3     5     7           2n+1
 250 //       1+u             u     u     u           u
 251 //   ln ----- = 2 [ u + --- + --- + --- + ... + ------ + ... ]
 252 //       1-u             3     5     7           2n+1
 253 //
 254
 255 static floatx80 fyl2x(floatx80 a, floatx80 b)
 256 {
 257         uint64_t aSig = extractFloatx80Frac(a);
 258         int32_t aExp = extractFloatx80Exp(a);
 259         int aSign = extractFloatx80Sign(a);
 260         uint64_t bSig = extractFloatx80Frac(b);
 261         int32_t bExp = extractFloatx80Exp(b);
 262         int bSign = extractFloatx80Sign(b);
 263
 264         int zSign = bSign ^ 1;
 265
 266         if (aExp == 0x7FFF) {
 267                 if ((uint64_t) (aSig<<1)
 268                                 || ((bExp == 0x7FFF) && (uint64_t) (bSig<<1)))
 269                 {
 270                         return propagateFloatx80NaN(a, b);
 271                 }
 272                 if (aSign)
 273                 {
 274 invalid:
 275                         float_raise(float_flag_invalid);
 276                         return floatx80_default_nan;
 277                 }
 278                 else {
 279                         if (bExp == 0) {
 280                                 if (bSig == 0) goto invalid;
 281                                 float_raise(float_flag_denormal);
 282                         }
 283                         return packFloatx80(bSign, 0x7FFF, 0x8000000000000000U);
 284                 }
 285         }
 286         if (bExp == 0x7FFF)
 287         {
 288                 if ((uint64_t) (bSig<<1)) return propagateFloatx80NaN(a, b);
 289                 if (aSign && (uint64_t)(aExp | aSig)) goto invalid;
 290                 if (aSig && (aExp == 0))
 291                         float_raise(float_flag_denormal);
 292                 if (aExp < 0x3FFF) {
 293                         return packFloatx80(zSign, 0x7FFF, 0x8000000000000000U);
 294                 }
 295                 if (aExp == 0x3FFF && ((uint64_t) (aSig<<1) == 0)) goto invalid;
 296                 return packFloatx80(bSign, 0x7FFF, 0x8000000000000000U);
 297         }
 298         if (aExp == 0) {
 299                 if (aSig == 0) {
 300                         if ((bExp | bSig) == 0) goto invalid;
 301                         float_raise(float_flag_divbyzero);
 302                         return packFloatx80(zSign, 0x7FFF, 0x8000000000000000U);
 303                 }
 304                 if (aSign) goto invalid;
 305                 float_raise(float_flag_denormal);
 306                 normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
 307         }
 308         if (aSign) goto invalid;
 309         if (bExp == 0) {
 310                 if (bSig == 0) {
 311                         if (aExp < 0x3FFF) return packFloatx80(zSign, 0, 0);
 312                         return packFloatx80(bSign, 0, 0);
 313                 }
 314                 float_raise(float_flag_denormal);
 315                 normalizeFloatx80Subnormal(bSig, &bExp, &bSig);
 316         }
 317         if (aExp == 0x3FFF && ((uint64_t) (aSig<<1) == 0))
 318                 return packFloatx80(bSign, 0, 0);
 319
 320         float_raise(float_flag_inexact);
 321
 322         int ExpDiff = aExp - 0x3FFF;
 323         aExp = 0;
 324         if (aSig >= SQRT2_HALF_SIG) {
 325                 ExpDiff++;
 326                 aExp--;
 327         }
 328
 329         /* ******************************** */
 330         /* using float128 for approximation */
 331         /* ******************************** */
 332
 333         uint64_t zSig0, zSig1;
 334         shift128Right(aSig<<1, 0, 16, &zSig0, &zSig1);
 335         float128 x = packFloat128(0, aExp+0x3FFF, zSig0, zSig1);
 336         x = poly_l2(x);
 337         x = float128_add(x, int64_to_float128((int64_t) ExpDiff));
 338         return floatx80_mul(b, float128_to_floatx80(x));
 339 }
 340
 341 // =================================================
 342 // FYL2XP1                 Compute y * log (x + 1)
 343 //                                        2
 344 // =================================================
 345
 346 //
 347 // Uses the following identities:
 348 //
 349 // 1. ----------------------------------------------------------
 350 //              ln(x)
 351 //   log (x) = -------
 352 //      2       ln(2)
 353 //
 354 // 2. ----------------------------------------------------------
 355 //                  1+u              x
 356 //   ln (x+1) = ln -----, when u = -----
 357 //                  1-u             x+2
 358 //
 359 // 3. ----------------------------------------------------------
 360 //                        3     5     7           2n+1
 361 //       1+u             u     u     u           u
 362 //   ln ----- = 2 [ u + --- + --- + --- + ... + ------ + ... ]
 363 //       1-u             3     5     7           2n+1
 364 //
 365
 366 floatx80 fyl2xp1(floatx80 a, floatx80 b)
 367 {
 368         int32_t aExp, bExp;
 369         uint64_t aSig, bSig, zSig0, zSig1, zSig2;
 370         int aSign, bSign;
 371
 372         aSig = extractFloatx80Frac(a);
 373         aExp = extractFloatx80Exp(a);
 374         aSign = extractFloatx80Sign(a);
 375         bSig = extractFloatx80Frac(b);
 376         bExp = extractFloatx80Exp(b);
 377         bSign = extractFloatx80Sign(b);
 378         int zSign = aSign ^ bSign;
 379
 380         if (aExp == 0x7FFF) {
 381                 if ((uint64_t) (aSig<<1)
 382                                 || ((bExp == 0x7FFF) && (uint64_t) (bSig<<1)))
 383                 {
 384                         return propagateFloatx80NaN(a, b);
 385                 }
 386                 if (aSign)
 387                 {
 388 invalid:
 389                         float_raise(float_flag_invalid);
 390                         return floatx80_default_nan;
 391                 }
 392                         else {
 393                         if (bExp == 0) {
 394                                 if (bSig == 0) goto invalid;
 395                                 float_raise(float_flag_denormal);
 396                         }
 397                         return packFloatx80(bSign, 0x7FFF, 0x8000000000000000U);
 398                 }
 399         }
 400         if (bExp == 0x7FFF)
 401         {
 402                 if ((uint64_t) (bSig<<1))
 403                         return propagateFloatx80NaN(a, b);
 404
 405                 if (aExp == 0) {
 406                         if (aSig == 0) goto invalid;
 407                         float_raise(float_flag_denormal);
 408                 }
 409
 410                 return packFloatx80(zSign, 0x7FFF, 0x8000000000000000U);
 411         }
 412         if (aExp == 0) {
 413                 if (aSig == 0) {
 414                         if (bSig && (bExp == 0)) float_raise(float_flag_denormal);
 415                         return packFloatx80(zSign, 0, 0);
 416                 }
 417                 float_raise(float_flag_denormal);
 418                 normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
 419         }
 420         if (bExp == 0) {
 421                 if (bSig == 0) return packFloatx80(zSign, 0, 0);
 422                 float_raise(float_flag_denormal);
 423                 normalizeFloatx80Subnormal(bSig, &bExp, &bSig);
 424         }
 425
 426         float_raise(float_flag_inexact);
 427
 428         if (aSign && aExp >= 0x3FFF)
 429                 return a;
 430
 431         if (aExp >= 0x3FFC) // big argument
 432         {
 433                 return fyl2x(floatx80_add(a, floatx80_one), b);
 434         }
 435
 436         // handle tiny argument
 437         if (aExp < EXP_BIAS-70)
 438         {
 439                 // first order approximation, return (a*b)/ln(2)
 440                 int32_t zExp = aExp + FLOAT_LN2INV_EXP - 0x3FFE;
 441
 442         mul128By64To192(FLOAT_LN2INV_HI, FLOAT_LN2INV_LO, aSig, &zSig0, &zSig1, &zSig2);
 443                 if (0 < (int64_t) zSig0) {
 444                         shortShift128Left(zSig0, zSig1, 1, &zSig0, &zSig1);
 445                         --zExp;
 446                 }
 447
 448                 zExp = zExp + bExp - 0x3FFE;
 449         mul128By64To192(zSig0, zSig1, bSig, &zSig0, &zSig1, &zSig2);
 450                 if (0 < (int64_t) zSig0) {
 451                         shortShift128Left(zSig0, zSig1, 1, &zSig0, &zSig1);
 452                         --zExp;
 453                 }
 454
 455                 return
 456                         roundAndPackFloatx80(80, aSign ^ bSign, zExp, zSig0, zSig1);
 457         }
 458
 459         /* ******************************** */
 460         /* using float128 for approximation */
 461         /* ******************************** */
 462
 463         shift128Right(aSig<<1, 0, 16, &zSig0, &zSig1);
 464         float128 x = packFloat128(aSign, aExp, zSig0, zSig1);
 465         x = poly_l2p1(x);
 466         return floatx80_mul(b, float128_to_floatx80(x));
 467 }
 468
 469 floatx80 floatx80_flognp1(floatx80 a)
 470 {
 471         return fyl2xp1(a, floatx80_ln_2);
 472 }
 473
 474 floatx80 floatx80_flogn(floatx80 a)
 475 {
 476         return fyl2x(a, floatx80_ln_2);
 477 }
 478
 479 floatx80 floatx80_flog2(floatx80 a)
 480 {
 481         return fyl2x(a, floatx80_one);
 482 }
 483
 484 floatx80 floatx80_flog10(floatx80 a)
 485 {
 486         return fyl2x(a, floatx80_log10_2);
 487 }