git.sesse.net Git - pistorm/blob - softfloat/fsincos.c

   1 /*============================================================================
   2 This source file is an extension to the SoftFloat IEC/IEEE Floating-point
   3 Arithmetic Package, Release 2b, written for Bochs (x86 achitecture simulator)
   4 floating point emulation.
   5
   6 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
   7 been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
   8 RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
   9 AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
  10 COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
  11 EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
  12 INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
  13 OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
  14
  15 Derivative works are acceptable, even for commercial purposes, so long as
  16 (1) the source code for the derivative work includes prominent notice that
  17 the work is derivative, and (2) the source code includes prominent notice with
  18 these four paragraphs for those parts of this code that are retained.
  19 =============================================================================*/
  20
  21 /*============================================================================
  22  * Written for Bochs (x86 achitecture simulator) by
  23  *            Stanislav Shwartsman [sshwarts at sourceforge net]
  24  * ==========================================================================*/
  25
  26 #define FLOAT128
  27
  28 #define USE_estimateDiv128To64
  29
  30 #include "m68kcpu.h" // which includes softfloat.h after defining the basic types
  31 #include "assert.h"
  32
  33 //#include "softfloat-specialize"
  34 #include "fpu_constant.h"
  35
  36 #define floatx80_one packFloatx80(0, 0x3fff, 0x8000000000000000U)
  37 #define floatx80_default_nan packFloatx80(0, 0xffff, 0xffffffffffffffffU)
  38
  39 #define packFloat2x128m(zHi, zLo) {(zHi), (zLo)}
  40 #define PACK_FLOAT_128(hi,lo) packFloat2x128m(LIT64(hi),LIT64(lo))
  41
  42 #define EXP_BIAS 0x3FFF
  43
  44 /*----------------------------------------------------------------------------
  45 | Returns the fraction bits of the extended double-precision floating-point
  46 | value `a'.
  47 *----------------------------------------------------------------------------*/
  48
  49 static inline bits64 extractFloatx80Frac( floatx80 a )
  50 {
  51         return a.low;
  52
  53 }
  54
  55 /*----------------------------------------------------------------------------
  56 | Returns the exponent bits of the extended double-precision floating-point
  57 | value `a'.
  58 *----------------------------------------------------------------------------*/
  59
  60 static inline int32 extractFloatx80Exp( floatx80 a )
  61 {
  62         return a.high & 0x7FFF;
  63
  64 }
  65
  66 /*----------------------------------------------------------------------------
  67 | Returns the sign bit of the extended double-precision floating-point value
  68 | `a'.
  69 *----------------------------------------------------------------------------*/
  70
  71 static inline flag extractFloatx80Sign( floatx80 a )
  72 {
  73         return a.high>>15;
  74
  75 }
  76
  77 /*----------------------------------------------------------------------------
  78 | Takes extended double-precision floating-point  NaN  `a' and returns the
  79 | appropriate NaN result. If `a' is a signaling NaN, the invalid exception
  80 | is raised.
  81 *----------------------------------------------------------------------------*/
  82
  83 static inline floatx80 propagateFloatx80NaNOneArg(floatx80 a)
  84 {
  85         if (floatx80_is_signaling_nan(a))
  86                 float_raise(float_flag_invalid);
  87
  88         a.low |= 0xC000000000000000U;
  89
  90         return a;
  91 }
  92
  93 /*----------------------------------------------------------------------------
  94 | Normalizes the subnormal extended double-precision floating-point value
  95 | represented by the denormalized significand `aSig'.  The normalized exponent
  96 | and significand are stored at the locations pointed to by `zExpPtr' and
  97 | `zSigPtr', respectively.
  98 *----------------------------------------------------------------------------*/
  99
 100 void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr, uint64_t *zSigPtr)
 101 {
 102         int shiftCount = countLeadingZeros64(aSig);
 103         *zSigPtr = aSig<<shiftCount;
 104         *zExpPtr = 1 - shiftCount;
 105 }
 106
 107 /* reduce trigonometric function argument using 128-bit precision
 108    M_PI approximation */
 109 static uint64_t argument_reduction_kernel(uint64_t aSig0, int Exp, uint64_t *zSig0, uint64_t *zSig1)
 110 {
 111         uint64_t term0, term1, term2;
 112         uint64_t aSig1 = 0;
 113
 114         shortShift128Left(aSig1, aSig0, Exp, &aSig1, &aSig0);
 115         uint64_t q = estimateDiv128To64(aSig1, aSig0, FLOAT_PI_HI);
 116         mul128By64To192(FLOAT_PI_HI, FLOAT_PI_LO, q, &term0, &term1, &term2);
 117         sub128(aSig1, aSig0, term0, term1, zSig1, zSig0);
 118         while ((int64_t)(*zSig1) < 0) {
 119                 --q;
 120                 add192(*zSig1, *zSig0, term2, 0, FLOAT_PI_HI, FLOAT_PI_LO, zSig1, zSig0, &term2);
 121         }
 122         *zSig1 = term2;
 123         return q;
 124 }
 125
 126 static int reduce_trig_arg(int expDiff, int zSign, uint64_t aSig0, uint64_t aSig1)
 127 {
 128         uint64_t term0, term1, q = 0;
 129
 130         if (expDiff < 0) {
 131                 shift128Right(aSig0, 0, 1, &aSig0, &aSig1);
 132                 expDiff = 0;
 133         }
 134         if (expDiff > 0) {
 135                 q = argument_reduction_kernel(aSig0, expDiff, &aSig0, &aSig1);
 136         }
 137         else {
 138                 if (FLOAT_PI_HI <= aSig0) {
 139                         aSig0 -= FLOAT_PI_HI;
 140                         q = 1;
 141                 }
 142         }
 143
 144         shift128Right(FLOAT_PI_HI, FLOAT_PI_LO, 1, &term0, &term1);
 145         if (! lt128(aSig0, aSig1, term0, term1))
 146         {
 147                 int lt = lt128(term0, term1, aSig0, aSig1);
 148                 int eq = eq128(aSig0, aSig1, term0, term1);
 149
 150                 if ((eq && (q & 1)) || lt) {
 151                         zSign = !zSign;
 152                         ++q;
 153                 }
 154                 if (lt) sub128(FLOAT_PI_HI, FLOAT_PI_LO, aSig0, aSig1, &aSig0, &aSig1);
 155         }
 156
 157         return (int)(q & 3);
 158 }
 159
 160 #define SIN_ARR_SIZE 11
 161 #define COS_ARR_SIZE 11
 162
 163 static float128 sin_arr[SIN_ARR_SIZE] =
 164 {
 165         PACK_FLOAT_128(0x3fff000000000000, 0x0000000000000000), /*  1 */
 166         PACK_FLOAT_128(0xbffc555555555555, 0x5555555555555555), /*  3 */
 167         PACK_FLOAT_128(0x3ff8111111111111, 0x1111111111111111), /*  5 */
 168         PACK_FLOAT_128(0xbff2a01a01a01a01, 0xa01a01a01a01a01a), /*  7 */
 169         PACK_FLOAT_128(0x3fec71de3a556c73, 0x38faac1c88e50017), /*  9 */
 170         PACK_FLOAT_128(0xbfe5ae64567f544e, 0x38fe747e4b837dc7), /* 11 */
 171         PACK_FLOAT_128(0x3fde6124613a86d0, 0x97ca38331d23af68), /* 13 */
 172         PACK_FLOAT_128(0xbfd6ae7f3e733b81, 0xf11d8656b0ee8cb0), /* 15 */
 173         PACK_FLOAT_128(0x3fce952c77030ad4, 0xa6b2605197771b00), /* 17 */
 174         PACK_FLOAT_128(0xbfc62f49b4681415, 0x724ca1ec3b7b9675), /* 19 */
 175         PACK_FLOAT_128(0x3fbd71b8ef6dcf57, 0x18bef146fcee6e45)  /* 21 */
 176 };
 177
 178 static float128 cos_arr[COS_ARR_SIZE] =
 179 {
 180         PACK_FLOAT_128(0x3fff000000000000, 0x0000000000000000), /*  0 */
 181         PACK_FLOAT_128(0xbffe000000000000, 0x0000000000000000), /*  2 */
 182         PACK_FLOAT_128(0x3ffa555555555555, 0x5555555555555555), /*  4 */
 183         PACK_FLOAT_128(0xbff56c16c16c16c1, 0x6c16c16c16c16c17), /*  6 */
 184         PACK_FLOAT_128(0x3fefa01a01a01a01, 0xa01a01a01a01a01a), /*  8 */
 185         PACK_FLOAT_128(0xbfe927e4fb7789f5, 0xc72ef016d3ea6679), /* 10 */
 186         PACK_FLOAT_128(0x3fe21eed8eff8d89, 0x7b544da987acfe85), /* 12 */
 187         PACK_FLOAT_128(0xbfda93974a8c07c9, 0xd20badf145dfa3e5), /* 14 */
 188         PACK_FLOAT_128(0x3fd2ae7f3e733b81, 0xf11d8656b0ee8cb0), /* 16 */
 189         PACK_FLOAT_128(0xbfca6827863b97d9, 0x77bb004886a2c2ab), /* 18 */
 190         PACK_FLOAT_128(0x3fc1e542ba402022, 0x507a9cad2bf8f0bb)  /* 20 */
 191 };
 192
 193 extern float128 OddPoly (float128 x, float128 *arr, unsigned n);
 194
 195 /* 0 <= x <= pi/4 */
 196 static inline float128 poly_sin(float128 x)
 197 {
 198         //                 3     5     7     9     11     13     15
 199         //                x     x     x     x     x      x      x
 200         // sin (x) ~ x - --- + --- - --- + --- - ---- + ---- - ---- =
 201         //                3!    5!    7!    9!    11!    13!    15!
 202         //
 203         //                 2     4     6     8     10     12     14
 204         //                x     x     x     x     x      x      x
 205         //   = x * [ 1 - --- + --- - --- + --- - ---- + ---- - ---- ] =
 206         //                3!    5!    7!    9!    11!    13!    15!
 207         //
 208         //           3                          3
 209         //          --       4k                --        4k+2
 210         //   p(x) = >  C  * x   > 0     q(x) = >  C   * x     < 0
 211         //          --  2k                     --  2k+1
 212         //          k=0                        k=0
 213         //
 214         //                          2
 215         //   sin(x) ~ x * [ p(x) + x * q(x) ]
 216         //
 217
 218         return OddPoly(x, sin_arr, SIN_ARR_SIZE);
 219 }
 220
 221 extern float128 EvenPoly(float128 x, float128 *arr, unsigned n);
 222
 223 /* 0 <= x <= pi/4 */
 224 static inline float128 poly_cos(float128 x)
 225 {
 226         //                 2     4     6     8     10     12     14
 227         //                x     x     x     x     x      x      x
 228         // cos (x) ~ 1 - --- + --- - --- + --- - ---- + ---- - ----
 229         //                2!    4!    6!    8!    10!    12!    14!
 230         //
 231         //           3                          3
 232         //          --       4k                --        4k+2
 233         //   p(x) = >  C  * x   > 0     q(x) = >  C   * x     < 0
 234         //          --  2k                     --  2k+1
 235         //          k=0                        k=0
 236         //
 237         //                      2
 238         //   cos(x) ~ [ p(x) + x * q(x) ]
 239         //
 240
 241         return EvenPoly(x, cos_arr, COS_ARR_SIZE);
 242 }
 243
 244 static inline void sincos_invalid(floatx80 *sin_a, floatx80 *cos_a, floatx80 a)
 245 {
 246         if (sin_a) *sin_a = a;
 247         if (cos_a) *cos_a = a;
 248 }
 249
 250 static inline void sincos_tiny_argument(floatx80 *sin_a, floatx80 *cos_a, floatx80 a)
 251 {
 252         if (sin_a) *sin_a = a;
 253         if (cos_a) *cos_a = floatx80_one;
 254 }
 255
 256 static floatx80 sincos_approximation(int neg, float128 r, uint64_t quotient)
 257 {
 258         if (quotient & 0x1) {
 259                 r = poly_cos(r);
 260                 neg = 0;
 261         } else  {
 262                 r = poly_sin(r);
 263         }
 264
 265         floatx80 result = float128_to_floatx80(r);
 266         if (quotient & 0x2)
 267                 neg = ! neg;
 268
 269         if (neg)
 270                 result = floatx80_chs(result);
 271
 272         return result;
 273 }
 274
 275 // =================================================
 276 // SFFSINCOS               Compute sin(x) and cos(x)
 277 // =================================================
 278
 279 //
 280 // Uses the following identities:
 281 // ----------------------------------------------------------
 282 //
 283 //  sin(-x) = -sin(x)
 284 //  cos(-x) =  cos(x)
 285 //
 286 //  sin(x+y) = sin(x)*cos(y)+cos(x)*sin(y)
 287 //  cos(x+y) = sin(x)*sin(y)+cos(x)*cos(y)
 288 //
 289 //  sin(x+ pi/2)  =  cos(x)
 290 //  sin(x+ pi)    = -sin(x)
 291 //  sin(x+3pi/2)  = -cos(x)
 292 //  sin(x+2pi)    =  sin(x)
 293 //
 294
 295 int sf_fsincos(floatx80 a, floatx80 *sin_a, floatx80 *cos_a)
 296 {
 297         uint64_t aSig0, aSig1 = 0;
 298         int32_t aExp, zExp, expDiff;
 299         int aSign, zSign;
 300         int q = 0;
 301
 302         aSig0 = extractFloatx80Frac(a);
 303         aExp = extractFloatx80Exp(a);
 304         aSign = extractFloatx80Sign(a);
 305
 306         /* invalid argument */
 307         if (aExp == 0x7FFF) {
 308                 if ((uint64_t) (aSig0<<1)) {
 309                         sincos_invalid(sin_a, cos_a, propagateFloatx80NaNOneArg(a));
 310                         return 0;
 311                 }
 312
 313                 float_raise(float_flag_invalid);
 314                 sincos_invalid(sin_a, cos_a, floatx80_default_nan);
 315                 return 0;
 316         }
 317
 318         if (aExp == 0) {
 319                 if (aSig0 == 0) {
 320                         sincos_tiny_argument(sin_a, cos_a, a);
 321                         return 0;
 322                 }
 323
 324 //        float_raise(float_flag_denormal);
 325
 326                 /* handle pseudo denormals */
 327                 if (! (aSig0 & 0x8000000000000000U))
 328                 {
 329                         float_raise(float_flag_inexact);
 330                         if (sin_a)
 331                                 float_raise(float_flag_underflow);
 332                         sincos_tiny_argument(sin_a, cos_a, a);
 333                         return 0;
 334                 }
 335
 336                 normalizeFloatx80Subnormal(aSig0, &aExp, &aSig0);
 337         }
 338
 339         zSign = aSign;
 340         zExp = EXP_BIAS;
 341         expDiff = aExp - zExp;
 342
 343         /* argument is out-of-range */
 344         if (expDiff >= 63)
 345                 return -1;
 346
 347         float_raise(float_flag_inexact);
 348
 349         if (expDiff < -1) {    // doesn't require reduction
 350                 if (expDiff <= -68) {
 351                         a = packFloatx80(aSign, aExp, aSig0);
 352                         sincos_tiny_argument(sin_a, cos_a, a);
 353                         return 0;
 354                 }
 355                 zExp = aExp;
 356         }
 357         else {
 358                 q = reduce_trig_arg(expDiff, zSign, aSig0, aSig1);
 359         }
 360
 361         /* **************************** */
 362         /* argument reduction completed */
 363         /* **************************** */
 364
 365         /* using float128 for approximation */
 366         float128 r = normalizeRoundAndPackFloat128(0, zExp-0x10, aSig0, aSig1);
 367
 368         if (aSign) q = -q;
 369         if (sin_a) *sin_a = sincos_approximation(zSign, r,   q);
 370         if (cos_a) *cos_a = sincos_approximation(zSign, r, q+1);
 371
 372         return 0;
 373 }
 374
 375 int floatx80_fsin(floatx80 *a)
 376 {
 377         return sf_fsincos(*a, a, 0);
 378 }
 379
 380 int floatx80_fcos(floatx80 *a)
 381 {
 382         return sf_fsincos(*a, 0, a);
 383 }
 384
 385 // =================================================
 386 // FPTAN                 Compute tan(x)
 387 // =================================================
 388
 389 //
 390 // Uses the following identities:
 391 //
 392 // 1. ----------------------------------------------------------
 393 //
 394 //  sin(-x) = -sin(x)
 395 //  cos(-x) =  cos(x)
 396 //
 397 //  sin(x+y) = sin(x)*cos(y)+cos(x)*sin(y)
 398 //  cos(x+y) = sin(x)*sin(y)+cos(x)*cos(y)
 399 //
 400 //  sin(x+ pi/2)  =  cos(x)
 401 //  sin(x+ pi)    = -sin(x)
 402 //  sin(x+3pi/2)  = -cos(x)
 403 //  sin(x+2pi)    =  sin(x)
 404 //
 405 // 2. ----------------------------------------------------------
 406 //
 407 //           sin(x)
 408 //  tan(x) = ------
 409 //           cos(x)
 410 //
 411
 412 int floatx80_ftan(floatx80 *a)
 413 {
 414         uint64_t aSig0, aSig1 = 0;
 415         int32_t aExp, zExp, expDiff;
 416         int aSign, zSign;
 417         int q = 0;
 418
 419         aSig0 = extractFloatx80Frac(*a);
 420         aExp = extractFloatx80Exp(*a);
 421         aSign = extractFloatx80Sign(*a);
 422
 423         /* invalid argument */
 424         if (aExp == 0x7FFF) {
 425                 if ((uint64_t) (aSig0<<1))
 426                 {
 427                         *a = propagateFloatx80NaNOneArg(*a);
 428                         return 0;
 429                 }
 430
 431                 float_raise(float_flag_invalid);
 432                 *a = floatx80_default_nan;
 433                 return 0;
 434         }
 435
 436         if (aExp == 0) {
 437                 if (aSig0 == 0) return 0;
 438 //        float_raise(float_flag_denormal);
 439                 /* handle pseudo denormals */
 440                 if (! (aSig0 & 0x8000000000000000U))
 441                 {
 442                         float_raise(float_flag_inexact | float_flag_underflow);
 443                         return 0;
 444                 }
 445                 normalizeFloatx80Subnormal(aSig0, &aExp, &aSig0);
 446         }
 447
 448         zSign = aSign;
 449         zExp = EXP_BIAS;
 450         expDiff = aExp - zExp;
 451
 452         /* argument is out-of-range */
 453         if (expDiff >= 63)
 454                 return -1;
 455
 456         float_raise(float_flag_inexact);
 457
 458         if (expDiff < -1) {    // doesn't require reduction
 459                 if (expDiff <= -68) {
 460                         *a = packFloatx80(aSign, aExp, aSig0);
 461                         return 0;
 462                 }
 463                 zExp = aExp;
 464         }
 465         else {
 466                 q = reduce_trig_arg(expDiff, zSign, aSig0, aSig1);
 467         }
 468
 469         /* **************************** */
 470         /* argument reduction completed */
 471         /* **************************** */
 472
 473         /* using float128 for approximation */
 474         float128 r = normalizeRoundAndPackFloat128(0, zExp-0x10, aSig0, aSig1);
 475
 476         float128 sin_r = poly_sin(r);
 477         float128 cos_r = poly_cos(r);
 478
 479         if (q & 0x1) {
 480                 r = float128_div(cos_r, sin_r);
 481                 zSign = ! zSign;
 482         } else {
 483                 r = float128_div(sin_r, cos_r);
 484         }
 485
 486         *a = float128_to_floatx80(r);
 487         if (zSign)
 488                 *a = floatx80_chs(*a);
 489
 490         return 0;
 491 }
 492
 493 //                            2         3         4               n
 494 // f(x) ~ C + (C * x) + (C * x) + (C * x) + (C * x) + ... + (C * x)
 495 //         0    1         2         3         4               n
 496 //
 497 //          --       2k                --        2k+1
 498 //   p(x) = >  C  * x           q(x) = >  C   * x
 499 //          --  2k                     --  2k+1
 500 //
 501 //   f(x) ~ [ p(x) + x * q(x) ]
 502 //
 503
 504 float128 EvalPoly(float128 x, float128 *arr, unsigned n)
 505 {
 506         float128 x2 = float128_mul(x, x);
 507         unsigned i;
 508
 509         assert(n > 1);
 510
 511         float128 r1 = arr[--n];
 512         i = n;
 513         while(i >= 2) {
 514                 r1 = float128_mul(r1, x2);
 515                 i -= 2;
 516                 r1 = float128_add(r1, arr[i]);
 517         }
 518         if (i) r1 = float128_mul(r1, x);
 519
 520         float128 r2 = arr[--n];
 521         i = n;
 522         while(i >= 2) {
 523                 r2 = float128_mul(r2, x2);
 524                 i -= 2;
 525                 r2 = float128_add(r2, arr[i]);
 526         }
 527         if (i) r2 = float128_mul(r2, x);
 528
 529         return float128_add(r1, r2);
 530 }
 531
 532 //                  2         4         6         8               2n
 533 // f(x) ~ C + (C * x) + (C * x) + (C * x) + (C * x) + ... + (C * x)
 534 //         0    1         2         3         4               n
 535 //
 536 //          --       4k                --        4k+2
 537 //   p(x) = >  C  * x           q(x) = >  C   * x
 538 //          --  2k                     --  2k+1
 539 //
 540 //                    2
 541 //   f(x) ~ [ p(x) + x * q(x) ]
 542 //
 543
 544 float128 EvenPoly(float128 x, float128 *arr, unsigned n)
 545 {
 546                 return EvalPoly(float128_mul(x, x), arr, n);
 547 }
 548
 549 //                        3         5         7         9               2n+1
 550 // f(x) ~ (C * x) + (C * x) + (C * x) + (C * x) + (C * x) + ... + (C * x)
 551 //          0         1         2         3         4               n
 552 //                        2         4         6         8               2n
 553 //      = x * [ C + (C * x) + (C * x) + (C * x) + (C * x) + ... + (C * x)
 554 //               0    1         2         3         4               n
 555 //
 556 //          --       4k                --        4k+2
 557 //   p(x) = >  C  * x           q(x) = >  C   * x
 558 //          --  2k                     --  2k+1
 559 //
 560 //                        2
 561 //   f(x) ~ x * [ p(x) + x * q(x) ]
 562 //
 563
 564 float128 OddPoly(float128 x, float128 *arr, unsigned n)
 565 {
 566                 return float128_mul(x, EvenPoly(x, arr, n));
 567 }
 568
 569 /*----------------------------------------------------------------------------
 570 | Scales extended double-precision floating-point value in operand `a' by
 571 | value `b'. The function truncates the value in the second operand 'b' to
 572 | an integral value and adds that value to the exponent of the operand 'a'.
 573 | The operation performed according to the IEC/IEEE Standard for Binary
 574 | Floating-Point Arithmetic.
 575 *----------------------------------------------------------------------------*/
 576
 577 extern floatx80 propagateFloatx80NaN( floatx80 a, floatx80 b );
 578
 579 floatx80 floatx80_scale(floatx80 a, floatx80 b)
 580 {
 581         sbits32 aExp, bExp;
 582         bits64 aSig, bSig;
 583
 584         // handle unsupported extended double-precision floating encodings
 585 /*    if (floatx80_is_unsupported(a) || floatx80_is_unsupported(b))
 586     {
 587         float_raise(float_flag_invalid);
 588         return floatx80_default_nan;
 589     }*/
 590
 591         aSig = extractFloatx80Frac(a);
 592         aExp = extractFloatx80Exp(a);
 593         int aSign = extractFloatx80Sign(a);
 594         bSig = extractFloatx80Frac(b);
 595         bExp = extractFloatx80Exp(b);
 596         int bSign = extractFloatx80Sign(b);
 597
 598         if (aExp == 0x7FFF) {
 599                 if ((bits64) (aSig<<1) || ((bExp == 0x7FFF) && (bits64) (bSig<<1)))
 600                 {
 601                         return propagateFloatx80NaN(a, b);
 602                 }
 603                 if ((bExp == 0x7FFF) && bSign) {
 604                         float_raise(float_flag_invalid);
 605                         return floatx80_default_nan;
 606                 }
 607                 if (bSig && (bExp == 0)) float_raise(float_flag_denormal);
 608                 return a;
 609         }
 610         if (bExp == 0x7FFF) {
 611                 if ((bits64) (bSig<<1)) return propagateFloatx80NaN(a, b);
 612                 if ((aExp | aSig) == 0) {
 613                         if (! bSign) {
 614                                 float_raise(float_flag_invalid);
 615                                 return floatx80_default_nan;
 616                         }
 617                         return a;
 618                 }
 619                 if (aSig && (aExp == 0)) float_raise(float_flag_denormal);
 620                 if (bSign) return packFloatx80(aSign, 0, 0);
 621                 return packFloatx80(aSign, 0x7FFF, 0x8000000000000000U);
 622         }
 623         if (aExp == 0) {
 624                 if (aSig == 0) return a;
 625                 float_raise(float_flag_denormal);
 626                 normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
 627         }
 628         if (bExp == 0) {
 629                 if (bSig == 0) return a;
 630                 float_raise(float_flag_denormal);
 631                 normalizeFloatx80Subnormal(bSig, &bExp, &bSig);
 632         }
 633
 634         if (bExp > 0x400E) {
 635                 /* generate appropriate overflow/underflow */
 636                 return roundAndPackFloatx80(80, aSign,
 637                                                         bSign ? -0x3FFF : 0x7FFF, aSig, 0);
 638         }
 639         if (bExp < 0x3FFF) return a;
 640
 641         int shiftCount = 0x403E - bExp;
 642         bSig >>= shiftCount;
 643         sbits32 scale = bSig;
 644         if (bSign) scale = -scale; /* -32768..32767 */
 645         return
 646                 roundAndPackFloatx80(80, aSign, aExp+scale, aSig, 0);
 647 }