1 /*============================================================================
2 This source file is an extension to the SoftFloat IEC/IEEE Floating-point
3 Arithmetic Package, Release 2b, written for Bochs (x86 achitecture simulator)
4 floating point emulation.
6 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has
7 been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
8 RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
9 AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
10 COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
11 EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
12 INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
13 OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
15 Derivative works are acceptable, even for commercial purposes, so long as
16 (1) the source code for the derivative work includes prominent notice that
17 the work is derivative, and (2) the source code includes prominent notice with
18 these four paragraphs for those parts of this code that are retained.
19 =============================================================================*/
21 /*============================================================================
22 * Written for Bochs (x86 achitecture simulator) by
23 * Stanislav Shwartsman [sshwarts at sourceforge net]
24 * ==========================================================================*/
28 #define USE_estimateDiv128To64
30 #include "m68kcpu.h" // which includes softfloat.h after defining the basic types
33 //#include "softfloat-specialize"
34 #include "fpu_constant.h"
36 #define floatx80_one packFloatx80(0, 0x3fff, 0x8000000000000000U)
37 #define floatx80_default_nan packFloatx80(0, 0xffff, 0xffffffffffffffffU)
39 #define packFloat2x128m(zHi, zLo) {(zHi), (zLo)}
40 #define PACK_FLOAT_128(hi,lo) packFloat2x128m(LIT64(hi),LIT64(lo))
42 #define EXP_BIAS 0x3FFF
44 /*----------------------------------------------------------------------------
45 | Returns the fraction bits of the extended double-precision floating-point
47 *----------------------------------------------------------------------------*/
49 static inline bits64 extractFloatx80Frac( floatx80 a )
55 /*----------------------------------------------------------------------------
56 | Returns the exponent bits of the extended double-precision floating-point
58 *----------------------------------------------------------------------------*/
60 static inline int32 extractFloatx80Exp( floatx80 a )
62 return a.high & 0x7FFF;
66 /*----------------------------------------------------------------------------
67 | Returns the sign bit of the extended double-precision floating-point value
69 *----------------------------------------------------------------------------*/
71 static inline flag extractFloatx80Sign( floatx80 a )
77 /*----------------------------------------------------------------------------
78 | Takes extended double-precision floating-point NaN `a' and returns the
79 | appropriate NaN result. If `a' is a signaling NaN, the invalid exception
81 *----------------------------------------------------------------------------*/
83 static inline floatx80 propagateFloatx80NaNOneArg(floatx80 a)
85 if (floatx80_is_signaling_nan(a))
86 float_raise(float_flag_invalid);
88 a.low |= 0xC000000000000000U;
93 /*----------------------------------------------------------------------------
94 | Normalizes the subnormal extended double-precision floating-point value
95 | represented by the denormalized significand `aSig'. The normalized exponent
96 | and significand are stored at the locations pointed to by `zExpPtr' and
97 | `zSigPtr', respectively.
98 *----------------------------------------------------------------------------*/
100 void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr, uint64_t *zSigPtr)
102 int shiftCount = countLeadingZeros64(aSig);
103 *zSigPtr = aSig<<shiftCount;
104 *zExpPtr = 1 - shiftCount;
107 /* reduce trigonometric function argument using 128-bit precision
108 M_PI approximation */
109 static uint64_t argument_reduction_kernel(uint64_t aSig0, int Exp, uint64_t *zSig0, uint64_t *zSig1)
111 uint64_t term0, term1, term2;
114 shortShift128Left(aSig1, aSig0, Exp, &aSig1, &aSig0);
115 uint64_t q = estimateDiv128To64(aSig1, aSig0, FLOAT_PI_HI);
116 mul128By64To192(FLOAT_PI_HI, FLOAT_PI_LO, q, &term0, &term1, &term2);
117 sub128(aSig1, aSig0, term0, term1, zSig1, zSig0);
118 while ((int64_t)(*zSig1) < 0) {
120 add192(*zSig1, *zSig0, term2, 0, FLOAT_PI_HI, FLOAT_PI_LO, zSig1, zSig0, &term2);
126 static int reduce_trig_arg(int expDiff, int zSign, uint64_t aSig0, uint64_t aSig1)
128 uint64_t term0, term1, q = 0;
131 shift128Right(aSig0, 0, 1, &aSig0, &aSig1);
135 q = argument_reduction_kernel(aSig0, expDiff, &aSig0, &aSig1);
138 if (FLOAT_PI_HI <= aSig0) {
139 aSig0 -= FLOAT_PI_HI;
144 shift128Right(FLOAT_PI_HI, FLOAT_PI_LO, 1, &term0, &term1);
145 if (! lt128(aSig0, aSig1, term0, term1))
147 int lt = lt128(term0, term1, aSig0, aSig1);
148 int eq = eq128(aSig0, aSig1, term0, term1);
150 if ((eq && (q & 1)) || lt) {
154 if (lt) sub128(FLOAT_PI_HI, FLOAT_PI_LO, aSig0, aSig1, &aSig0, &aSig1);
160 #define SIN_ARR_SIZE 11
161 #define COS_ARR_SIZE 11
163 static float128 sin_arr[SIN_ARR_SIZE] =
165 PACK_FLOAT_128(0x3fff000000000000, 0x0000000000000000), /* 1 */
166 PACK_FLOAT_128(0xbffc555555555555, 0x5555555555555555), /* 3 */
167 PACK_FLOAT_128(0x3ff8111111111111, 0x1111111111111111), /* 5 */
168 PACK_FLOAT_128(0xbff2a01a01a01a01, 0xa01a01a01a01a01a), /* 7 */
169 PACK_FLOAT_128(0x3fec71de3a556c73, 0x38faac1c88e50017), /* 9 */
170 PACK_FLOAT_128(0xbfe5ae64567f544e, 0x38fe747e4b837dc7), /* 11 */
171 PACK_FLOAT_128(0x3fde6124613a86d0, 0x97ca38331d23af68), /* 13 */
172 PACK_FLOAT_128(0xbfd6ae7f3e733b81, 0xf11d8656b0ee8cb0), /* 15 */
173 PACK_FLOAT_128(0x3fce952c77030ad4, 0xa6b2605197771b00), /* 17 */
174 PACK_FLOAT_128(0xbfc62f49b4681415, 0x724ca1ec3b7b9675), /* 19 */
175 PACK_FLOAT_128(0x3fbd71b8ef6dcf57, 0x18bef146fcee6e45) /* 21 */
178 static float128 cos_arr[COS_ARR_SIZE] =
180 PACK_FLOAT_128(0x3fff000000000000, 0x0000000000000000), /* 0 */
181 PACK_FLOAT_128(0xbffe000000000000, 0x0000000000000000), /* 2 */
182 PACK_FLOAT_128(0x3ffa555555555555, 0x5555555555555555), /* 4 */
183 PACK_FLOAT_128(0xbff56c16c16c16c1, 0x6c16c16c16c16c17), /* 6 */
184 PACK_FLOAT_128(0x3fefa01a01a01a01, 0xa01a01a01a01a01a), /* 8 */
185 PACK_FLOAT_128(0xbfe927e4fb7789f5, 0xc72ef016d3ea6679), /* 10 */
186 PACK_FLOAT_128(0x3fe21eed8eff8d89, 0x7b544da987acfe85), /* 12 */
187 PACK_FLOAT_128(0xbfda93974a8c07c9, 0xd20badf145dfa3e5), /* 14 */
188 PACK_FLOAT_128(0x3fd2ae7f3e733b81, 0xf11d8656b0ee8cb0), /* 16 */
189 PACK_FLOAT_128(0xbfca6827863b97d9, 0x77bb004886a2c2ab), /* 18 */
190 PACK_FLOAT_128(0x3fc1e542ba402022, 0x507a9cad2bf8f0bb) /* 20 */
193 extern float128 OddPoly (float128 x, float128 *arr, unsigned n);
196 static inline float128 poly_sin(float128 x)
200 // sin (x) ~ x - --- + --- - --- + --- - ---- + ---- - ---- =
201 // 3! 5! 7! 9! 11! 13! 15!
205 // = x * [ 1 - --- + --- - --- + --- - ---- + ---- - ---- ] =
206 // 3! 5! 7! 9! 11! 13! 15!
210 // p(x) = > C * x > 0 q(x) = > C * x < 0
215 // sin(x) ~ x * [ p(x) + x * q(x) ]
218 return OddPoly(x, sin_arr, SIN_ARR_SIZE);
221 extern float128 EvenPoly(float128 x, float128 *arr, unsigned n);
224 static inline float128 poly_cos(float128 x)
228 // cos (x) ~ 1 - --- + --- - --- + --- - ---- + ---- - ----
229 // 2! 4! 6! 8! 10! 12! 14!
233 // p(x) = > C * x > 0 q(x) = > C * x < 0
238 // cos(x) ~ [ p(x) + x * q(x) ]
241 return EvenPoly(x, cos_arr, COS_ARR_SIZE);
244 static inline void sincos_invalid(floatx80 *sin_a, floatx80 *cos_a, floatx80 a)
246 if (sin_a) *sin_a = a;
247 if (cos_a) *cos_a = a;
250 static inline void sincos_tiny_argument(floatx80 *sin_a, floatx80 *cos_a, floatx80 a)
252 if (sin_a) *sin_a = a;
253 if (cos_a) *cos_a = floatx80_one;
256 static floatx80 sincos_approximation(int neg, float128 r, uint64_t quotient)
258 if (quotient & 0x1) {
265 floatx80 result = float128_to_floatx80(r);
270 result = floatx80_chs(result);
275 // =================================================
276 // SFFSINCOS Compute sin(x) and cos(x)
277 // =================================================
280 // Uses the following identities:
281 // ----------------------------------------------------------
286 // sin(x+y) = sin(x)*cos(y)+cos(x)*sin(y)
287 // cos(x+y) = sin(x)*sin(y)+cos(x)*cos(y)
289 // sin(x+ pi/2) = cos(x)
290 // sin(x+ pi) = -sin(x)
291 // sin(x+3pi/2) = -cos(x)
292 // sin(x+2pi) = sin(x)
295 int sf_fsincos(floatx80 a, floatx80 *sin_a, floatx80 *cos_a)
297 uint64_t aSig0, aSig1 = 0;
298 int32_t aExp, zExp, expDiff;
302 aSig0 = extractFloatx80Frac(a);
303 aExp = extractFloatx80Exp(a);
304 aSign = extractFloatx80Sign(a);
306 /* invalid argument */
307 if (aExp == 0x7FFF) {
308 if ((uint64_t) (aSig0<<1)) {
309 sincos_invalid(sin_a, cos_a, propagateFloatx80NaNOneArg(a));
313 float_raise(float_flag_invalid);
314 sincos_invalid(sin_a, cos_a, floatx80_default_nan);
320 sincos_tiny_argument(sin_a, cos_a, a);
324 // float_raise(float_flag_denormal);
326 /* handle pseudo denormals */
327 if (! (aSig0 & 0x8000000000000000U))
329 float_raise(float_flag_inexact);
331 float_raise(float_flag_underflow);
332 sincos_tiny_argument(sin_a, cos_a, a);
336 normalizeFloatx80Subnormal(aSig0, &aExp, &aSig0);
341 expDiff = aExp - zExp;
343 /* argument is out-of-range */
347 float_raise(float_flag_inexact);
349 if (expDiff < -1) { // doesn't require reduction
350 if (expDiff <= -68) {
351 a = packFloatx80(aSign, aExp, aSig0);
352 sincos_tiny_argument(sin_a, cos_a, a);
358 q = reduce_trig_arg(expDiff, zSign, aSig0, aSig1);
361 /* **************************** */
362 /* argument reduction completed */
363 /* **************************** */
365 /* using float128 for approximation */
366 float128 r = normalizeRoundAndPackFloat128(0, zExp-0x10, aSig0, aSig1);
369 if (sin_a) *sin_a = sincos_approximation(zSign, r, q);
370 if (cos_a) *cos_a = sincos_approximation(zSign, r, q+1);
375 int floatx80_fsin(floatx80 *a)
377 return sf_fsincos(*a, a, 0);
380 int floatx80_fcos(floatx80 *a)
382 return sf_fsincos(*a, 0, a);
385 // =================================================
386 // FPTAN Compute tan(x)
387 // =================================================
390 // Uses the following identities:
392 // 1. ----------------------------------------------------------
397 // sin(x+y) = sin(x)*cos(y)+cos(x)*sin(y)
398 // cos(x+y) = sin(x)*sin(y)+cos(x)*cos(y)
400 // sin(x+ pi/2) = cos(x)
401 // sin(x+ pi) = -sin(x)
402 // sin(x+3pi/2) = -cos(x)
403 // sin(x+2pi) = sin(x)
405 // 2. ----------------------------------------------------------
412 int floatx80_ftan(floatx80 *a)
414 uint64_t aSig0, aSig1 = 0;
415 int32_t aExp, zExp, expDiff;
419 aSig0 = extractFloatx80Frac(*a);
420 aExp = extractFloatx80Exp(*a);
421 aSign = extractFloatx80Sign(*a);
423 /* invalid argument */
424 if (aExp == 0x7FFF) {
425 if ((uint64_t) (aSig0<<1))
427 *a = propagateFloatx80NaNOneArg(*a);
431 float_raise(float_flag_invalid);
432 *a = floatx80_default_nan;
437 if (aSig0 == 0) return 0;
438 // float_raise(float_flag_denormal);
439 /* handle pseudo denormals */
440 if (! (aSig0 & 0x8000000000000000U))
442 float_raise(float_flag_inexact | float_flag_underflow);
445 normalizeFloatx80Subnormal(aSig0, &aExp, &aSig0);
450 expDiff = aExp - zExp;
452 /* argument is out-of-range */
456 float_raise(float_flag_inexact);
458 if (expDiff < -1) { // doesn't require reduction
459 if (expDiff <= -68) {
460 *a = packFloatx80(aSign, aExp, aSig0);
466 q = reduce_trig_arg(expDiff, zSign, aSig0, aSig1);
469 /* **************************** */
470 /* argument reduction completed */
471 /* **************************** */
473 /* using float128 for approximation */
474 float128 r = normalizeRoundAndPackFloat128(0, zExp-0x10, aSig0, aSig1);
476 float128 sin_r = poly_sin(r);
477 float128 cos_r = poly_cos(r);
480 r = float128_div(cos_r, sin_r);
483 r = float128_div(sin_r, cos_r);
486 *a = float128_to_floatx80(r);
488 *a = floatx80_chs(*a);
494 // f(x) ~ C + (C * x) + (C * x) + (C * x) + (C * x) + ... + (C * x)
498 // p(x) = > C * x q(x) = > C * x
501 // f(x) ~ [ p(x) + x * q(x) ]
504 float128 EvalPoly(float128 x, float128 *arr, unsigned n)
506 float128 x2 = float128_mul(x, x);
511 float128 r1 = arr[--n];
514 r1 = float128_mul(r1, x2);
516 r1 = float128_add(r1, arr[i]);
518 if (i) r1 = float128_mul(r1, x);
520 float128 r2 = arr[--n];
523 r2 = float128_mul(r2, x2);
525 r2 = float128_add(r2, arr[i]);
527 if (i) r2 = float128_mul(r2, x);
529 return float128_add(r1, r2);
533 // f(x) ~ C + (C * x) + (C * x) + (C * x) + (C * x) + ... + (C * x)
537 // p(x) = > C * x q(x) = > C * x
541 // f(x) ~ [ p(x) + x * q(x) ]
544 float128 EvenPoly(float128 x, float128 *arr, unsigned n)
546 return EvalPoly(float128_mul(x, x), arr, n);
550 // f(x) ~ (C * x) + (C * x) + (C * x) + (C * x) + (C * x) + ... + (C * x)
553 // = x * [ C + (C * x) + (C * x) + (C * x) + (C * x) + ... + (C * x)
557 // p(x) = > C * x q(x) = > C * x
561 // f(x) ~ x * [ p(x) + x * q(x) ]
564 float128 OddPoly(float128 x, float128 *arr, unsigned n)
566 return float128_mul(x, EvenPoly(x, arr, n));
569 /*----------------------------------------------------------------------------
570 | Scales extended double-precision floating-point value in operand `a' by
571 | value `b'. The function truncates the value in the second operand 'b' to
572 | an integral value and adds that value to the exponent of the operand 'a'.
573 | The operation performed according to the IEC/IEEE Standard for Binary
574 | Floating-Point Arithmetic.
575 *----------------------------------------------------------------------------*/
577 extern floatx80 propagateFloatx80NaN( floatx80 a, floatx80 b );
579 floatx80 floatx80_scale(floatx80 a, floatx80 b)
584 // handle unsupported extended double-precision floating encodings
585 /* if (floatx80_is_unsupported(a) || floatx80_is_unsupported(b))
587 float_raise(float_flag_invalid);
588 return floatx80_default_nan;
591 aSig = extractFloatx80Frac(a);
592 aExp = extractFloatx80Exp(a);
593 int aSign = extractFloatx80Sign(a);
594 bSig = extractFloatx80Frac(b);
595 bExp = extractFloatx80Exp(b);
596 int bSign = extractFloatx80Sign(b);
598 if (aExp == 0x7FFF) {
599 if ((bits64) (aSig<<1) || ((bExp == 0x7FFF) && (bits64) (bSig<<1)))
601 return propagateFloatx80NaN(a, b);
603 if ((bExp == 0x7FFF) && bSign) {
604 float_raise(float_flag_invalid);
605 return floatx80_default_nan;
607 if (bSig && (bExp == 0)) float_raise(float_flag_denormal);
610 if (bExp == 0x7FFF) {
611 if ((bits64) (bSig<<1)) return propagateFloatx80NaN(a, b);
612 if ((aExp | aSig) == 0) {
614 float_raise(float_flag_invalid);
615 return floatx80_default_nan;
619 if (aSig && (aExp == 0)) float_raise(float_flag_denormal);
620 if (bSign) return packFloatx80(aSign, 0, 0);
621 return packFloatx80(aSign, 0x7FFF, 0x8000000000000000U);
624 if (aSig == 0) return a;
625 float_raise(float_flag_denormal);
626 normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
629 if (bSig == 0) return a;
630 float_raise(float_flag_denormal);
631 normalizeFloatx80Subnormal(bSig, &bExp, &bSig);
635 /* generate appropriate overflow/underflow */
636 return roundAndPackFloatx80(80, aSign,
637 bSign ? -0x3FFF : 0x7FFF, aSig, 0);
639 if (bExp < 0x3FFF) return a;
641 int shiftCount = 0x403E - bExp;
643 sbits32 scale = bSig;
644 if (bSign) scale = -scale; /* -32768..32767 */
646 roundAndPackFloatx80(80, aSign, aExp+scale, aSig, 0);