1 /*============================================================================
2 This source file is an extension to the SoftFloat IEC/IEEE Floating-point
3 Arithmetic Package, Release 2b, written for Bochs (x86 achitecture simulator)
4 floating point emulation.
5 float_raise(float_flag_invalid)
6 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has
7 been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
8 RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
9 AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
10 COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
11 EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
12 INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
13 OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
15 Derivative works are acceptable, even for commercial purposes, so long as
16 (1) the source code for the derivative work includes prominent notice that
17 the work is derivative, and (2) the source code includes prominent notice with
18 these four paragraphs for those parts of this code that are retained.
19 =============================================================================*/
21 /*============================================================================
22 * Written for Bochs (x86 achitecture simulator) by
23 * Stanislav Shwartsman [sshwarts at sourceforge net]
24 * Adapted for lib/softfloat in MESS by Hans Ostermeyer (03/2012)
25 * ==========================================================================*/
29 #define USE_estimateDiv128To64
31 #include "m68kcpu.h" // which includes softfloat.h after defining the basic types
33 //#include "softfloat-specialize"
34 #include "fpu_constant.h"
36 #define floatx80_log10_2 packFloatx80(0, 0x3ffd, 0x9a209a84fbcff798U)
37 #define floatx80_ln_2 packFloatx80(0, 0x3ffe, 0xb17217f7d1cf79acU)
38 #define floatx80_one packFloatx80(0, 0x3fff, 0x8000000000000000U)
39 #define floatx80_default_nan packFloatx80(0, 0xffff, 0xffffffffffffffffU)
41 #define packFloat_128(zHi, zLo) {(zHi), (zLo)}
42 #define PACK_FLOAT_128(hi,lo) packFloat_128(LIT64(hi),LIT64(lo))
44 #define EXP_BIAS 0x3FFF
46 /*----------------------------------------------------------------------------
47 | Returns the fraction bits of the extended double-precision floating-point
49 *----------------------------------------------------------------------------*/
51 static inline bits64 extractFloatx80Frac( floatx80 a )
57 /*----------------------------------------------------------------------------
58 | Returns the exponent bits of the extended double-precision floating-point
60 *----------------------------------------------------------------------------*/
62 static inline int32 extractFloatx80Exp( floatx80 a )
64 return a.high & 0x7FFF;
68 /*----------------------------------------------------------------------------
69 | Returns the sign bit of the extended double-precision floating-point value
71 *----------------------------------------------------------------------------*/
73 static inline flag extractFloatx80Sign( floatx80 a )
80 /*----------------------------------------------------------------------------
81 | Takes extended double-precision floating-point NaN `a' and returns the
82 | appropriate NaN result. If `a' is a signaling NaN, the invalid exception
84 *----------------------------------------------------------------------------*/
86 static inline floatx80 propagateFloatx80NaNOneArg(floatx80 a)
88 if (floatx80_is_signaling_nan(a))
89 float_raise(float_flag_invalid);
91 a.low |= 0xC000000000000000U;
97 /*----------------------------------------------------------------------------
98 | Normalizes the subnormal extended double-precision floating-point value
99 | represented by the denormalized significand `aSig'. The normalized exponent
100 | and significand are stored at the locations pointed to by `zExpPtr' and
101 | `zSigPtr', respectively.
102 *----------------------------------------------------------------------------*/
104 static inline void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr, uint64_t *zSigPtr)
106 int shiftCount = countLeadingZeros64(aSig);
107 *zSigPtr = aSig<<shiftCount;
108 *zExpPtr = 1 - shiftCount;
112 /*----------------------------------------------------------------------------
113 | Returns 1 if the extended double-precision floating-point value `a' is a
114 | NaN; otherwise returns 0.
115 *----------------------------------------------------------------------------*/
117 static inline int floatx80_is_nan(floatx80 a)
119 return ((a.high & 0x7FFF) == 0x7FFF) && (int64_t) (a.low<<1);
122 /*----------------------------------------------------------------------------
123 | Takes two extended double-precision floating-point values `a' and `b', one
124 | of which is a NaN, and returns the appropriate NaN result. If either `a' or
125 | `b' is a signaling NaN, the invalid exception is raised.
126 *----------------------------------------------------------------------------*/
128 static floatx80 propagateFloatx80NaN(floatx80 a, floatx80 b)
130 int aIsNaN = floatx80_is_nan(a);
131 int aIsSignalingNaN = floatx80_is_signaling_nan(a);
132 int bIsNaN = floatx80_is_nan(b);
133 int bIsSignalingNaN = floatx80_is_signaling_nan(b);
134 a.low |= 0xC000000000000000U;
135 b.low |= 0xC000000000000000U;
136 if (aIsSignalingNaN | bIsSignalingNaN) float_raise(float_flag_invalid);
137 if (aIsSignalingNaN) {
138 if (bIsSignalingNaN) goto returnLargerSignificand;
139 return bIsNaN ? b : a;
142 if (bIsSignalingNaN | ! bIsNaN) return a;
143 returnLargerSignificand:
144 if (a.low < b.low) return b;
145 if (b.low < a.low) return a;
146 return (a.high < b.high) ? a : b;
153 static const float128 float128_one =
154 packFloat_128(0x3fff000000000000U, 0x0000000000000000U);
155 static const float128 float128_two =
156 packFloat_128(0x4000000000000000U, 0x0000000000000000U);
158 static const float128 float128_ln2inv2 =
159 packFloat_128(0x400071547652b82fU, 0xe1777d0ffda0d23aU);
161 #define SQRT2_HALF_SIG 0xb504f333f9de6484U
163 extern float128 OddPoly(float128 x, float128 *arr, unsigned n);
165 #define L2_ARR_SIZE 9
167 static float128 ln_arr[L2_ARR_SIZE] =
169 PACK_FLOAT_128(0x3fff000000000000, 0x0000000000000000), /* 1 */
170 PACK_FLOAT_128(0x3ffd555555555555, 0x5555555555555555), /* 3 */
171 PACK_FLOAT_128(0x3ffc999999999999, 0x999999999999999a), /* 5 */
172 PACK_FLOAT_128(0x3ffc249249249249, 0x2492492492492492), /* 7 */
173 PACK_FLOAT_128(0x3ffbc71c71c71c71, 0xc71c71c71c71c71c), /* 9 */
174 PACK_FLOAT_128(0x3ffb745d1745d174, 0x5d1745d1745d1746), /* 11 */
175 PACK_FLOAT_128(0x3ffb3b13b13b13b1, 0x3b13b13b13b13b14), /* 13 */
176 PACK_FLOAT_128(0x3ffb111111111111, 0x1111111111111111), /* 15 */
177 PACK_FLOAT_128(0x3ffae1e1e1e1e1e1, 0xe1e1e1e1e1e1e1e2) /* 17 */
180 static float128 poly_ln(float128 x1)
186 // 1/2 ln --- ~ u + --- + --- + --- + --- + ---- + ---- + ---- =
187 // 1-u 3 5 7 9 11 13 15
191 // = u * [ 1 + --- + --- + --- + --- + ---- + ---- + ---- ] =
196 // p(u) = > C * u q(u) = > C * u
201 // 1/2 ln --- ~ u * [ p(u) + u * q(u) ]
205 return OddPoly(x1, ln_arr, L2_ARR_SIZE);
208 /* required sqrt(2)/2 < x < sqrt(2) */
209 static float128 poly_l2(float128 x)
211 /* using float128 for approximation */
212 float128 x_p1 = float128_add(x, float128_one);
213 float128 x_m1 = float128_sub(x, float128_one);
214 x = float128_div(x_m1, x_p1);
216 x = float128_mul(x, float128_ln2inv2);
220 static float128 poly_l2p1(float128 x)
222 /* using float128 for approximation */
223 float128 x_p2 = float128_add(x, float128_two);
224 x = float128_div(x, x_p2);
226 x = float128_mul(x, float128_ln2inv2);
230 // =================================================
231 // FYL2X Compute y * log (x)
233 // =================================================
236 // Uses the following identities:
238 // 1. ----------------------------------------------------------
240 // log (x) = -------, ln (x*y) = ln(x) + ln(y)
243 // 2. ----------------------------------------------------------
245 // ln (x) = ln -----, when u = -----
248 // 3. ----------------------------------------------------------
251 // ln ----- = 2 [ u + --- + --- + --- + ... + ------ + ... ]
255 static floatx80 fyl2x(floatx80 a, floatx80 b)
257 uint64_t aSig = extractFloatx80Frac(a);
258 int32_t aExp = extractFloatx80Exp(a);
259 int aSign = extractFloatx80Sign(a);
260 uint64_t bSig = extractFloatx80Frac(b);
261 int32_t bExp = extractFloatx80Exp(b);
262 int bSign = extractFloatx80Sign(b);
264 int zSign = bSign ^ 1;
266 if (aExp == 0x7FFF) {
267 if ((uint64_t) (aSig<<1)
268 || ((bExp == 0x7FFF) && (uint64_t) (bSig<<1)))
270 return propagateFloatx80NaN(a, b);
275 float_raise(float_flag_invalid);
276 return floatx80_default_nan;
280 if (bSig == 0) goto invalid;
281 float_raise(float_flag_denormal);
283 return packFloatx80(bSign, 0x7FFF, 0x8000000000000000U);
288 if ((uint64_t) (bSig<<1)) return propagateFloatx80NaN(a, b);
289 if (aSign && (uint64_t)(aExp | aSig)) goto invalid;
290 if (aSig && (aExp == 0))
291 float_raise(float_flag_denormal);
293 return packFloatx80(zSign, 0x7FFF, 0x8000000000000000U);
295 if (aExp == 0x3FFF && ((uint64_t) (aSig<<1) == 0)) goto invalid;
296 return packFloatx80(bSign, 0x7FFF, 0x8000000000000000U);
300 if ((bExp | bSig) == 0) goto invalid;
301 float_raise(float_flag_divbyzero);
302 return packFloatx80(zSign, 0x7FFF, 0x8000000000000000U);
304 if (aSign) goto invalid;
305 float_raise(float_flag_denormal);
306 normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
308 if (aSign) goto invalid;
311 if (aExp < 0x3FFF) return packFloatx80(zSign, 0, 0);
312 return packFloatx80(bSign, 0, 0);
314 float_raise(float_flag_denormal);
315 normalizeFloatx80Subnormal(bSig, &bExp, &bSig);
317 if (aExp == 0x3FFF && ((uint64_t) (aSig<<1) == 0))
318 return packFloatx80(bSign, 0, 0);
320 float_raise(float_flag_inexact);
322 int ExpDiff = aExp - 0x3FFF;
324 if (aSig >= SQRT2_HALF_SIG) {
329 /* ******************************** */
330 /* using float128 for approximation */
331 /* ******************************** */
333 uint64_t zSig0, zSig1;
334 shift128Right(aSig<<1, 0, 16, &zSig0, &zSig1);
335 float128 x = packFloat128(0, aExp+0x3FFF, zSig0, zSig1);
337 x = float128_add(x, int64_to_float128((int64_t) ExpDiff));
338 return floatx80_mul(b, float128_to_floatx80(x));
341 // =================================================
342 // FYL2XP1 Compute y * log (x + 1)
344 // =================================================
347 // Uses the following identities:
349 // 1. ----------------------------------------------------------
354 // 2. ----------------------------------------------------------
356 // ln (x+1) = ln -----, when u = -----
359 // 3. ----------------------------------------------------------
362 // ln ----- = 2 [ u + --- + --- + --- + ... + ------ + ... ]
366 floatx80 fyl2xp1(floatx80 a, floatx80 b)
369 uint64_t aSig, bSig, zSig0, zSig1, zSig2;
372 aSig = extractFloatx80Frac(a);
373 aExp = extractFloatx80Exp(a);
374 aSign = extractFloatx80Sign(a);
375 bSig = extractFloatx80Frac(b);
376 bExp = extractFloatx80Exp(b);
377 bSign = extractFloatx80Sign(b);
378 int zSign = aSign ^ bSign;
380 if (aExp == 0x7FFF) {
381 if ((uint64_t) (aSig<<1)
382 || ((bExp == 0x7FFF) && (uint64_t) (bSig<<1)))
384 return propagateFloatx80NaN(a, b);
389 float_raise(float_flag_invalid);
390 return floatx80_default_nan;
394 if (bSig == 0) goto invalid;
395 float_raise(float_flag_denormal);
397 return packFloatx80(bSign, 0x7FFF, 0x8000000000000000U);
402 if ((uint64_t) (bSig<<1))
403 return propagateFloatx80NaN(a, b);
406 if (aSig == 0) goto invalid;
407 float_raise(float_flag_denormal);
410 return packFloatx80(zSign, 0x7FFF, 0x8000000000000000U);
414 if (bSig && (bExp == 0)) float_raise(float_flag_denormal);
415 return packFloatx80(zSign, 0, 0);
417 float_raise(float_flag_denormal);
418 normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
421 if (bSig == 0) return packFloatx80(zSign, 0, 0);
422 float_raise(float_flag_denormal);
423 normalizeFloatx80Subnormal(bSig, &bExp, &bSig);
426 float_raise(float_flag_inexact);
428 if (aSign && aExp >= 0x3FFF)
431 if (aExp >= 0x3FFC) // big argument
433 return fyl2x(floatx80_add(a, floatx80_one), b);
436 // handle tiny argument
437 if (aExp < EXP_BIAS-70)
439 // first order approximation, return (a*b)/ln(2)
440 int32_t zExp = aExp + FLOAT_LN2INV_EXP - 0x3FFE;
442 mul128By64To192(FLOAT_LN2INV_HI, FLOAT_LN2INV_LO, aSig, &zSig0, &zSig1, &zSig2);
443 if (0 < (int64_t) zSig0) {
444 shortShift128Left(zSig0, zSig1, 1, &zSig0, &zSig1);
448 zExp = zExp + bExp - 0x3FFE;
449 mul128By64To192(zSig0, zSig1, bSig, &zSig0, &zSig1, &zSig2);
450 if (0 < (int64_t) zSig0) {
451 shortShift128Left(zSig0, zSig1, 1, &zSig0, &zSig1);
456 roundAndPackFloatx80(80, aSign ^ bSign, zExp, zSig0, zSig1);
459 /* ******************************** */
460 /* using float128 for approximation */
461 /* ******************************** */
463 shift128Right(aSig<<1, 0, 16, &zSig0, &zSig1);
464 float128 x = packFloat128(aSign, aExp, zSig0, zSig1);
466 return floatx80_mul(b, float128_to_floatx80(x));
469 floatx80 floatx80_flognp1(floatx80 a)
471 return fyl2xp1(a, floatx80_ln_2);
474 floatx80 floatx80_flogn(floatx80 a)
476 return fyl2x(a, floatx80_ln_2);
479 floatx80 floatx80_flog2(floatx80 a)
481 return fyl2x(a, floatx80_one);
484 floatx80 floatx80_flog10(floatx80 a)
486 return fyl2x(a, floatx80_log10_2);