From 90013f84fdba0e039ed7bc2740166dd273560603 Mon Sep 17 00:00:00 2001 From: shanshe Date: Sun, 18 Apr 2021 15:17:31 +0200 Subject: [PATCH] Full FPU implementation (adapted from UAE) --- Makefile | 2 +- m68kfpu.c | 221 +- softfloat/README.txt | 78 - softfloat/fpu_constant.h | 80 - softfloat/fsincos.c | 647 -- softfloat/fyl2x.c | 487 -- softfloat/mamesf.h | 2 +- .../{softfloat-macros => softfloat-macros.h} | 347 +- softfloat/softfloat-specialize | 470 -- softfloat/softfloat-specialize.h | 438 + softfloat/softfloat.c | 7519 +++++++---------- softfloat/softfloat.h | 796 +- softfloat/softfloat_fpsp.c | 2154 +++++ softfloat/softfloat_fpsp_tables.h | 528 ++ 14 files changed, 6992 insertions(+), 6777 deletions(-) delete mode 100644 softfloat/README.txt delete mode 100644 softfloat/fpu_constant.h delete mode 100644 softfloat/fsincos.c delete mode 100644 softfloat/fyl2x.c rename softfloat/{softfloat-macros => softfloat-macros.h} (72%) delete mode 100644 softfloat/softfloat-specialize create mode 100644 softfloat/softfloat-specialize.h create mode 100644 softfloat/softfloat_fpsp.c create mode 100644 softfloat/softfloat_fpsp_tables.h diff --git a/Makefile b/Makefile index 3396440..ca69756 100644 --- a/Makefile +++ b/Makefile @@ -21,7 +21,7 @@ MAINFILES = emulator.c \ platforms/amiga/net/pi-net.c \ platforms/shared/rtc.c -MUSASHIFILES = m68kcpu.c m68kdasm.c softfloat/softfloat.c softfloat/fsincos.c softfloat/fyl2x.c +MUSASHIFILES = m68kcpu.c m68kdasm.c softfloat/softfloat.c softfloat/softfloat_fpsp.c MUSASHIGENCFILES = m68kops.c MUSASHIGENHFILES = m68kops.h MUSASHIGENERATOR = m68kmake diff --git a/m68kfpu.c b/m68kfpu.c index db7aef5..505c833 100644 --- a/m68kfpu.c +++ b/m68kfpu.c @@ -3,6 +3,9 @@ #include #include +#include "softfloat/softfloat.h" +float_status status; + extern void exit(int); static void fatalerror(char *format, ...) { @@ -25,7 +28,18 @@ static void fatalerror(char *format, ...) { #define DOUBLE_EXPONENT (unsigned long long)(0x7ff0000000000000) #define DOUBLE_MANTISSA (unsigned long long)(0x000fffffffffffff) -extern flag floatx80_is_nan( floatx80 a ); +/*---------------------------------------------------------------------------- +| Returns 1 if the extended double-precision floating-point value `a' is a +| NaN; otherwise returns 0. +*----------------------------------------------------------------------------*/ + +flag floatx80_is_nan( floatx80 a ) +{ + + return ( ( a.high & 0x7FFF ) == 0x7FFF ) && (uint64_t) ( a.low<<1 ); + +} + // masks for packed dwords, positive k-factor static uint32 pkmask2[18] = @@ -50,7 +64,7 @@ static inline double fx80_to_double(floatx80 fx) foo = (double *)&d; - d = floatx80_to_float64(fx); + d = floatx80_to_float64(fx, &status); return *foo; } @@ -61,7 +75,7 @@ static inline floatx80 double_to_fx80(double in) d = (uint64 *)∈ - return float64_to_floatx80(*d); + return float64_to_floatx80(*d, &status); } static inline floatx80 load_extended_float80(uint32 ea) @@ -1229,7 +1243,7 @@ static void fpgen_rm_reg(uint16 w2) case 1: // Single-precision Real { uint32 d = READ_EA_32(ea); - source = float32_to_floatx80(d); + source = float32_to_floatx80(d, &status); break; } case 2: // Extended-precision Real @@ -1252,7 +1266,7 @@ static void fpgen_rm_reg(uint16 w2) { uint64 d = READ_EA_64(ea); - source = float64_to_floatx80(d); + source = float64_to_floatx80(d, &status); break; } case 6: // Byte Integer @@ -1403,16 +1417,24 @@ static void fpgen_rm_reg(uint16 w2) case 0x01: // FINT { sint32 temp; - temp = floatx80_to_int32(source); + temp = floatx80_to_int32(source, &status); REG_FP[dst] = int32_to_floatx80(temp); //FIXME mame doesn't use SET_CONDITION_CODES here SET_CONDITION_CODES(REG_FP[dst]); // JFF needs update condition codes + USE_CYCLES(4); + break; + } + case 0x02: // FSINH + { + REG_FP[dst] = floatx80_sinh(source, &status); + SET_CONDITION_CODES(REG_FP[dst]); + USE_CYCLES(75); break; } case 0x03: // FINTRZ { sint32 temp; - temp = floatx80_to_int32_round_to_zero(source); + temp = floatx80_to_int32_round_to_zero(source, &status); REG_FP[dst] = int32_to_floatx80(temp); //FIXME mame doesn't use SET_CONDITION_CODES here SET_CONDITION_CODES(REG_FP[dst]); // JFF needs update condition codes @@ -1420,51 +1442,105 @@ static void fpgen_rm_reg(uint16 w2) } case 0x04: // FSQRT { - REG_FP[dst] = floatx80_sqrt(source); + REG_FP[dst] = floatx80_sqrt(source, &status); SET_CONDITION_CODES(REG_FP[dst]); USE_CYCLES(109); break; } case 0x06: // FLOGNP1 { - REG_FP[dst] = floatx80_flognp1 (source); + REG_FP[dst] = floatx80_lognp1 (source, &status); SET_CONDITION_CODES(REG_FP[dst]); USE_CYCLES(594); // for MC68881 break; } + case 0x08: // FETOXM1 + { + REG_FP[dst] = floatx80_etoxm1(source, &status); + SET_CONDITION_CODES(REG_FP[dst]); + USE_CYCLES(6); + break; + } + case 0x09: // FTANH + { + REG_FP[dst] = floatx80_tanh(source, &status); + SET_CONDITION_CODES(REG_FP[dst]); + USE_CYCLES(75); + break; + } + case 0x0a: // FATAN + { + REG_FP[dst] = floatx80_atan(source, &status); + SET_CONDITION_CODES(REG_FP[dst]); + USE_CYCLES(75); + break; + } + case 0x0c: // FASIN + { + REG_FP[dst] = floatx80_asin(source, &status); + SET_CONDITION_CODES(REG_FP[dst]); + USE_CYCLES(75); + break; + } + case 0x0d: // FATANH + { + REG_FP[dst] = floatx80_atanh(source, &status); + SET_CONDITION_CODES(REG_FP[dst]); + USE_CYCLES(75); + break; + } case 0x0e: // FSIN { - REG_FP[dst] = source; - floatx80_fsin(®_FP[dst]); + REG_FP[dst] = floatx80_sin(source, &status); SET_CONDITION_CODES(REG_FP[dst]); USE_CYCLES(75); break; } case 0x0f: // FTAN { - REG_FP[dst] = source; - floatx80_ftan(®_FP[dst]); + REG_FP[dst] = floatx80_tan(source, &status); + SET_CONDITION_CODES(REG_FP[dst]); + USE_CYCLES(75); + break; + } + case 0x10: // FETOX + { + REG_FP[dst] = floatx80_etox(source, &status); + SET_CONDITION_CODES(REG_FP[dst]); + USE_CYCLES(75); + break; + } + case 0x11: // FTWOTOX + { + REG_FP[dst] = floatx80_twotox(source, &status); + SET_CONDITION_CODES(REG_FP[dst]); + USE_CYCLES(75); + break; + } + case 0x12: // FTENTOX + { + REG_FP[dst] = floatx80_tentox(source, &status); SET_CONDITION_CODES(REG_FP[dst]); USE_CYCLES(75); break; } case 0x14: // FLOGN { - REG_FP[dst] = floatx80_flogn (source); + REG_FP[dst] = floatx80_logn(source, &status); SET_CONDITION_CODES(REG_FP[dst]); USE_CYCLES(548); // for MC68881 break; } case 0x15: // FLOG10 { - REG_FP[dst] = floatx80_flog10 (source); + REG_FP[dst] = floatx80_log10(source, &status); SET_CONDITION_CODES(REG_FP[dst]); USE_CYCLES(604); // for MC68881 break; } case 0x16: // FLOG2 { - REG_FP[dst] = floatx80_flog2 (source); + REG_FP[dst] = floatx80_log2(source, &status); SET_CONDITION_CODES(REG_FP[dst]); USE_CYCLES(604); // for MC68881 break; @@ -1477,6 +1553,13 @@ static void fpgen_rm_reg(uint16 w2) USE_CYCLES(3); break; } + case 0x19: // FCOSH + { + REG_FP[dst] = floatx80_cosh(source, &status); + SET_CONDITION_CODES(REG_FP[dst]); + USE_CYCLES(64); + break; + } case 0x1a: // FNEG { REG_FP[dst] = source; @@ -1485,21 +1568,31 @@ static void fpgen_rm_reg(uint16 w2) USE_CYCLES(3); break; } + case 0x1c: // FACOS + { + REG_FP[dst] = floatx80_acos(source, &status); + SET_CONDITION_CODES(REG_FP[dst]); + USE_CYCLES(604); // for MC68881 + break; + break; + } case 0x1d: // FCOS { - REG_FP[dst] = source; - floatx80_fcos(®_FP[dst]); + REG_FP[dst] = floatx80_cos(source, &status); SET_CONDITION_CODES(REG_FP[dst]); USE_CYCLES(75); break; } case 0x1e: // FGETEXP { - sint16 temp2; - - temp2 = source.high; // get the exponent - temp2 -= 0x3fff; // take off the bias - REG_FP[dst] = double_to_fx80((double)temp2); + REG_FP[dst] = floatx80_getexp(source, &status); + SET_CONDITION_CODES(REG_FP[dst]); + USE_CYCLES(6); + break; + } + case 0x1f: // FGETMAN + { + REG_FP[dst] = floatx80_getman(source, &status); SET_CONDITION_CODES(REG_FP[dst]); USE_CYCLES(6); break; @@ -1507,7 +1600,7 @@ static void fpgen_rm_reg(uint16 w2) case 0x60: // FSDIVS (JFF) (source has already been converted to floatx80) case 0x20: // FDIV { - REG_FP[dst] = floatx80_div(REG_FP[dst], source); + REG_FP[dst] = floatx80_div(REG_FP[dst], source, &status); //FIXME mame doesn't use SET_CONDITION_CODES here SET_CONDITION_CODES(REG_FP[dst]); // JFF USE_CYCLES(43); @@ -1515,17 +1608,19 @@ static void fpgen_rm_reg(uint16 w2) } case 0x21: // FMOD { - sint8 const mode = float_rounding_mode; - float_rounding_mode = float_round_to_zero; - REG_FP[dst] = floatx80_rem(REG_FP[dst], source); + sint8 const mode = status.float_rounding_mode; + status.float_rounding_mode = float_round_to_zero; + uint64_t q; + flag s; + REG_FP[dst] = floatx80_rem(REG_FP[dst], source, &q, &s, &status); SET_CONDITION_CODES(REG_FP[dst]); - float_rounding_mode = mode; + status.float_rounding_mode = mode; USE_CYCLES(43); // guess break; } case 0x22: // FADD { - REG_FP[dst] = floatx80_add(REG_FP[dst], source); + REG_FP[dst] = floatx80_add(REG_FP[dst], source, &status); SET_CONDITION_CODES(REG_FP[dst]); USE_CYCLES(9); break; @@ -1533,56 +1628,70 @@ static void fpgen_rm_reg(uint16 w2) case 0x63: // FSMULS (JFF) (source has already been converted to floatx80) case 0x23: // FMUL { - REG_FP[dst] = floatx80_mul(REG_FP[dst], source); + REG_FP[dst] = floatx80_mul(REG_FP[dst], source, &status); SET_CONDITION_CODES(REG_FP[dst]); USE_CYCLES(11); break; } case 0x24: // FSGLDIV { - float32 a = floatx80_to_float32( REG_FP[dst] ); - float32 b = floatx80_to_float32( source ); - REG_FP[dst] = float32_to_floatx80( float32_div(a, b) ); + REG_FP[dst] = floatx80_sgldiv(REG_FP[dst], source, &status); USE_CYCLES(43); // // ? (value is from FDIV) break; } case 0x25: // FREM { - sint8 const mode = float_rounding_mode; - float_rounding_mode = float_round_nearest_even; - REG_FP[dst] = floatx80_rem(REG_FP[dst], source); + sint8 const mode = status.float_rounding_mode; + status.float_rounding_mode = float_round_nearest_even; + uint64_t q; + flag s; + REG_FP[dst] = floatx80_rem(REG_FP[dst], source, &q, &s, &status); SET_CONDITION_CODES(REG_FP[dst]); - float_rounding_mode = mode; + status.float_rounding_mode = mode; USE_CYCLES(43); // guess break; } case 0x26: // FSCALE { - REG_FP[dst] = floatx80_scale(REG_FP[dst], source); + REG_FP[dst] = floatx80_scale(REG_FP[dst], source, &status); SET_CONDITION_CODES(REG_FP[dst]); USE_CYCLES(46); // (better?) guess break; } case 0x27: // FSGLMUL { - float32 a = floatx80_to_float32( REG_FP[dst] ); - float32 b = floatx80_to_float32( source ); - REG_FP[dst] = float32_to_floatx80( float32_mul(a, b) ); + REG_FP[dst] = floatx80_sglmul(REG_FP[dst], source, &status); SET_CONDITION_CODES(REG_FP[dst]); USE_CYCLES(11); // ? (value is from FMUL) break; } case 0x28: // FSUB { - REG_FP[dst] = floatx80_sub(REG_FP[dst], source); + REG_FP[dst] = floatx80_sub(REG_FP[dst], source, &status); SET_CONDITION_CODES(REG_FP[dst]); USE_CYCLES(9); break; } + case 0x30: // FSINCOS + case 0x31: // FSINCOS + case 0x32: // FSINCOS + case 0x33: // FSINCOS + case 0x34: // FSINCOS + case 0x35: // FSINCOS + case 0x36: // FSINCOS + case 0x37: // FSINCOS + { + REG_FP[dst] = floatx80_cos(source, &status); + REG_FP[w2&7] = floatx80_sin(source, &status); + SET_CONDITION_CODES(REG_FP[dst]); + USE_CYCLES(75); + + break; + } case 0x38: // FCMP { floatx80 res; - res = floatx80_sub(REG_FP[dst], source); + res = floatx80_sub(REG_FP[dst], source, &status); SET_CONDITION_CODES(res); USE_CYCLES(7); break; @@ -1611,13 +1720,13 @@ static void fmove_reg_mem(uint16 w2) { case 0: // Long-Word Integer { - sint32 d = (sint32)floatx80_to_int32(REG_FP[src]); + sint32 d = (sint32)floatx80_to_int32(REG_FP[src], &status); WRITE_EA_32(ea, d); break; } case 1: // Single-precision Real { - uint32 d = floatx80_to_float32(REG_FP[src]); + uint32 d = floatx80_to_float32(REG_FP[src], &status); WRITE_EA_32(ea, d); break; } @@ -1635,7 +1744,7 @@ static void fmove_reg_mem(uint16 w2) } case 4: // Word Integer { - sint32 value = floatx80_to_int32(REG_FP[src]); + sint32 value = floatx80_to_int32(REG_FP[src], &status); if (value > 0x7fff || value < -0x8000 ) { REG_FPSR |= FPES_OE | FPAE_IOP; @@ -1647,14 +1756,14 @@ static void fmove_reg_mem(uint16 w2) { uint64 d; - d = floatx80_to_float64(REG_FP[src]); + d = floatx80_to_float64(REG_FP[src], &status); WRITE_EA_64(ea, d); break; } case 6: // Byte Integer { - sint32 value = floatx80_to_int32(REG_FP[src]); + sint32 value = floatx80_to_int32(REG_FP[src], &status); if (value > 127 || value < -128) { REG_FPSR |= FPES_OE | FPAE_IOP; @@ -1738,16 +1847,16 @@ static void fmove_fpcr(uint16 w2) switch (prec) { case 0: // Extend (X) - floatx80_rounding_precision = 80; + status.floatx80_rounding_precision = 80; break; case 1: // Single (S) - floatx80_rounding_precision = 32; + status.floatx80_rounding_precision = 32; break; case 2: // Double (D) - floatx80_rounding_precision = 64; + status.floatx80_rounding_precision = 64; break; case 3: // Undefined - floatx80_rounding_precision = 80; + status.floatx80_rounding_precision = 80; break; } #endif @@ -1755,16 +1864,16 @@ static void fmove_fpcr(uint16 w2) switch (rnd) { case 0: // To Nearest (RN) - float_rounding_mode = float_round_nearest_even; + status.float_rounding_mode = float_round_nearest_even; break; case 1: // To Zero (RZ) - float_rounding_mode = float_round_to_zero; + status.float_rounding_mode = float_round_to_zero; break; case 2: // To Minus Infinitiy (RM) - float_rounding_mode = float_round_down; + status.float_rounding_mode = float_round_down; break; case 3: // To Plus Infinitiy (RP) - float_rounding_mode = float_round_up; + status.float_rounding_mode = float_round_up; break; } } diff --git a/softfloat/README.txt b/softfloat/README.txt deleted file mode 100644 index 9500d25..0000000 --- a/softfloat/README.txt +++ /dev/null @@ -1,78 +0,0 @@ -MAME note: this package is derived from the following original SoftFloat -package and has been "re-packaged" to work with MAME's conventions and -build system. The source files come from bits64/ and bits64/templates -in the original distribution as MAME requires a compiler with a 64-bit -integer type. - - -Package Overview for SoftFloat Release 2b - -John R. Hauser -2002 May 27 - - ----------------------------------------------------------------------------- -Overview - -SoftFloat is a software implementation of floating-point that conforms to -the IEC/IEEE Standard for Binary Floating-Point Arithmetic. SoftFloat is -distributed in the form of C source code. Compiling the SoftFloat sources -generates two things: - --- A SoftFloat object file (typically `softfloat.o') containing the complete - set of IEC/IEEE floating-point routines. - --- A `timesoftfloat' program for evaluating the speed of the SoftFloat - routines. (The SoftFloat module is linked into this program.) - -The SoftFloat package is documented in four text files: - - SoftFloat.txt Documentation for using the SoftFloat functions. - SoftFloat-source.txt Documentation for compiling SoftFloat. - SoftFloat-history.txt History of major changes to SoftFloat. - timesoftfloat.txt Documentation for using `timesoftfloat'. - -Other files in the package comprise the source code for SoftFloat. - -Please be aware that some work is involved in porting this software to other -targets. It is not just a matter of getting `make' to complete without -error messages. I would have written the code that way if I could, but -there are fundamental differences between systems that can't be hidden. -You should not attempt to compile SoftFloat without first reading both -`SoftFloat.txt' and `SoftFloat-source.txt'. - - ----------------------------------------------------------------------------- -Legal Notice - -SoftFloat was written by me, John R. Hauser. This work was made possible in -part by the International Computer Science Institute, located at Suite 600, -1947 Center Street, Berkeley, California 94704. Funding was partially -provided by the National Science Foundation under grant MIP-9311980. The -original version of this code was written as part of a project to build -a fixed-point vector processor in collaboration with the University of -California at Berkeley, overseen by Profs. Nelson Morgan and John Wawrzynek. - -THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort -has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT -TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO -PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL -LOSSES, COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO -FURTHERMORE EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER -SCIENCE INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, -COSTS, OR OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE -SOFTWARE. - -Derivative works are acceptable, even for commercial purposes, provided -that the minimal documentation requirements stated in the source code are -satisfied. - - ----------------------------------------------------------------------------- -Contact Information - -At the time of this writing, the most up-to-date information about -SoftFloat and the latest release can be found at the Web page `http:// -www.cs.berkeley.edu/~jhauser/arithmetic/SoftFloat.html'. - - diff --git a/softfloat/fpu_constant.h b/softfloat/fpu_constant.h deleted file mode 100644 index fdd9719..0000000 --- a/softfloat/fpu_constant.h +++ /dev/null @@ -1,80 +0,0 @@ -/*============================================================================ -This source file is an extension to the SoftFloat IEC/IEEE Floating-point -Arithmetic Package, Release 2b, written for Bochs (x86 achitecture simulator) -floating point emulation. - -THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has -been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES -RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS -AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES, -COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE -EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE -INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR -OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE. - -Derivative works are acceptable, even for commercial purposes, so long as -(1) the source code for the derivative work includes prominent notice that -the work is derivative, and (2) the source code includes prominent notice with -these four paragraphs for those parts of this code that are retained. -=============================================================================*/ - -#ifndef _FPU_CONSTANTS_H_ -#define _FPU_CONSTANTS_H_ - -// Pentium CPU uses only 68-bit precision M_PI approximation -#define BETTER_THAN_PENTIUM - -/*============================================================================ - * Written for Bochs (x86 achitecture simulator) by - * Stanislav Shwartsman [sshwarts at sourceforge net] - * ==========================================================================*/ - -////////////////////////////// -// PI, PI/2, PI/4 constants -////////////////////////////// - -#define FLOATX80_PI_EXP (0x4000) - -// 128-bit PI fraction -#ifdef BETTER_THAN_PENTIUM -#define FLOAT_PI_HI (0xc90fdaa22168c234U) -#define FLOAT_PI_LO (0xc4c6628b80dc1cd1U) -#else -#define FLOAT_PI_HI (0xc90fdaa22168c234U) -#define FLOAT_PI_LO (0xC000000000000000U) -#endif - -#define FLOATX80_PI2_EXP (0x3FFF) -#define FLOATX80_PI4_EXP (0x3FFE) - -////////////////////////////// -// 3PI/4 constant -////////////////////////////// - -#define FLOATX80_3PI4_EXP (0x4000) - -// 128-bit 3PI/4 fraction -#ifdef BETTER_THAN_PENTIUM -#define FLOAT_3PI4_HI (0x96cbe3f9990e91a7U) -#define FLOAT_3PI4_LO (0x9394c9e8a0a5159cU) -#else -#define FLOAT_3PI4_HI (0x96cbe3f9990e91a7U) -#define FLOAT_3PI4_LO (0x9000000000000000U) -#endif - -////////////////////////////// -// 1/LN2 constant -////////////////////////////// - -#define FLOAT_LN2INV_EXP (0x3FFF) - -// 128-bit 1/LN2 fraction -#ifdef BETTER_THAN_PENTIUM -#define FLOAT_LN2INV_HI (0xb8aa3b295c17f0bbU) -#define FLOAT_LN2INV_LO (0xbe87fed0691d3e89U) -#else -#define FLOAT_LN2INV_HI (0xb8aa3b295c17f0bbU) -#define FLOAT_LN2INV_LO (0xC000000000000000U) -#endif - -#endif diff --git a/softfloat/fsincos.c b/softfloat/fsincos.c deleted file mode 100644 index 764abd0..0000000 --- a/softfloat/fsincos.c +++ /dev/null @@ -1,647 +0,0 @@ -/*============================================================================ -This source file is an extension to the SoftFloat IEC/IEEE Floating-point -Arithmetic Package, Release 2b, written for Bochs (x86 achitecture simulator) -floating point emulation. - -THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has -been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES -RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS -AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES, -COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE -EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE -INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR -OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE. - -Derivative works are acceptable, even for commercial purposes, so long as -(1) the source code for the derivative work includes prominent notice that -the work is derivative, and (2) the source code includes prominent notice with -these four paragraphs for those parts of this code that are retained. -=============================================================================*/ - -/*============================================================================ - * Written for Bochs (x86 achitecture simulator) by - * Stanislav Shwartsman [sshwarts at sourceforge net] - * ==========================================================================*/ - -#define FLOAT128 - -#define USE_estimateDiv128To64 - -#include "m68kcpu.h" // which includes softfloat.h after defining the basic types -#include "assert.h" - -//#include "softfloat-specialize" -#include "fpu_constant.h" - -#define floatx80_one packFloatx80(0, 0x3fff, 0x8000000000000000U) -#define floatx80_default_nan packFloatx80(0, 0xffff, 0xffffffffffffffffU) - -#define packFloat2x128m(zHi, zLo) {(zHi), (zLo)} -#define PACK_FLOAT_128(hi,lo) packFloat2x128m(LIT64(hi),LIT64(lo)) - -#define EXP_BIAS 0x3FFF - -/*---------------------------------------------------------------------------- -| Returns the fraction bits of the extended double-precision floating-point -| value `a'. -*----------------------------------------------------------------------------*/ - -static inline bits64 extractFloatx80Frac( floatx80 a ) -{ - return a.low; - -} - -/*---------------------------------------------------------------------------- -| Returns the exponent bits of the extended double-precision floating-point -| value `a'. -*----------------------------------------------------------------------------*/ - -static inline int32 extractFloatx80Exp( floatx80 a ) -{ - return a.high & 0x7FFF; - -} - -/*---------------------------------------------------------------------------- -| Returns the sign bit of the extended double-precision floating-point value -| `a'. -*----------------------------------------------------------------------------*/ - -static inline flag extractFloatx80Sign( floatx80 a ) -{ - return a.high>>15; - -} - -/*---------------------------------------------------------------------------- -| Takes extended double-precision floating-point NaN `a' and returns the -| appropriate NaN result. If `a' is a signaling NaN, the invalid exception -| is raised. -*----------------------------------------------------------------------------*/ - -static inline floatx80 propagateFloatx80NaNOneArg(floatx80 a) -{ - if (floatx80_is_signaling_nan(a)) - float_raise(float_flag_invalid); - - a.low |= 0xC000000000000000U; - - return a; -} - -/*---------------------------------------------------------------------------- -| Normalizes the subnormal extended double-precision floating-point value -| represented by the denormalized significand `aSig'. The normalized exponent -| and significand are stored at the locations pointed to by `zExpPtr' and -| `zSigPtr', respectively. -*----------------------------------------------------------------------------*/ - -void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr, uint64_t *zSigPtr) -{ - int shiftCount = countLeadingZeros64(aSig); - *zSigPtr = aSig< 0) { - q = argument_reduction_kernel(aSig0, expDiff, &aSig0, &aSig1); - } - else { - if (FLOAT_PI_HI <= aSig0) { - aSig0 -= FLOAT_PI_HI; - q = 1; - } - } - - shift128Right(FLOAT_PI_HI, FLOAT_PI_LO, 1, &term0, &term1); - if (! lt128(aSig0, aSig1, term0, term1)) - { - int lt = lt128(term0, term1, aSig0, aSig1); - int eq = eq128(aSig0, aSig1, term0, term1); - - if ((eq && (q & 1)) || lt) { - zSign = !zSign; - ++q; - } - if (lt) sub128(FLOAT_PI_HI, FLOAT_PI_LO, aSig0, aSig1, &aSig0, &aSig1); - } - - return (int)(q & 3); -} - -#define SIN_ARR_SIZE 11 -#define COS_ARR_SIZE 11 - -static float128 sin_arr[SIN_ARR_SIZE] = -{ - PACK_FLOAT_128(0x3fff000000000000, 0x0000000000000000), /* 1 */ - PACK_FLOAT_128(0xbffc555555555555, 0x5555555555555555), /* 3 */ - PACK_FLOAT_128(0x3ff8111111111111, 0x1111111111111111), /* 5 */ - PACK_FLOAT_128(0xbff2a01a01a01a01, 0xa01a01a01a01a01a), /* 7 */ - PACK_FLOAT_128(0x3fec71de3a556c73, 0x38faac1c88e50017), /* 9 */ - PACK_FLOAT_128(0xbfe5ae64567f544e, 0x38fe747e4b837dc7), /* 11 */ - PACK_FLOAT_128(0x3fde6124613a86d0, 0x97ca38331d23af68), /* 13 */ - PACK_FLOAT_128(0xbfd6ae7f3e733b81, 0xf11d8656b0ee8cb0), /* 15 */ - PACK_FLOAT_128(0x3fce952c77030ad4, 0xa6b2605197771b00), /* 17 */ - PACK_FLOAT_128(0xbfc62f49b4681415, 0x724ca1ec3b7b9675), /* 19 */ - PACK_FLOAT_128(0x3fbd71b8ef6dcf57, 0x18bef146fcee6e45) /* 21 */ -}; - -static float128 cos_arr[COS_ARR_SIZE] = -{ - PACK_FLOAT_128(0x3fff000000000000, 0x0000000000000000), /* 0 */ - PACK_FLOAT_128(0xbffe000000000000, 0x0000000000000000), /* 2 */ - PACK_FLOAT_128(0x3ffa555555555555, 0x5555555555555555), /* 4 */ - PACK_FLOAT_128(0xbff56c16c16c16c1, 0x6c16c16c16c16c17), /* 6 */ - PACK_FLOAT_128(0x3fefa01a01a01a01, 0xa01a01a01a01a01a), /* 8 */ - PACK_FLOAT_128(0xbfe927e4fb7789f5, 0xc72ef016d3ea6679), /* 10 */ - PACK_FLOAT_128(0x3fe21eed8eff8d89, 0x7b544da987acfe85), /* 12 */ - PACK_FLOAT_128(0xbfda93974a8c07c9, 0xd20badf145dfa3e5), /* 14 */ - PACK_FLOAT_128(0x3fd2ae7f3e733b81, 0xf11d8656b0ee8cb0), /* 16 */ - PACK_FLOAT_128(0xbfca6827863b97d9, 0x77bb004886a2c2ab), /* 18 */ - PACK_FLOAT_128(0x3fc1e542ba402022, 0x507a9cad2bf8f0bb) /* 20 */ -}; - -extern float128 OddPoly (float128 x, float128 *arr, unsigned n); - -/* 0 <= x <= pi/4 */ -static inline float128 poly_sin(float128 x) -{ - // 3 5 7 9 11 13 15 - // x x x x x x x - // sin (x) ~ x - --- + --- - --- + --- - ---- + ---- - ---- = - // 3! 5! 7! 9! 11! 13! 15! - // - // 2 4 6 8 10 12 14 - // x x x x x x x - // = x * [ 1 - --- + --- - --- + --- - ---- + ---- - ---- ] = - // 3! 5! 7! 9! 11! 13! 15! - // - // 3 3 - // -- 4k -- 4k+2 - // p(x) = > C * x > 0 q(x) = > C * x < 0 - // -- 2k -- 2k+1 - // k=0 k=0 - // - // 2 - // sin(x) ~ x * [ p(x) + x * q(x) ] - // - - return OddPoly(x, sin_arr, SIN_ARR_SIZE); -} - -extern float128 EvenPoly(float128 x, float128 *arr, unsigned n); - -/* 0 <= x <= pi/4 */ -static inline float128 poly_cos(float128 x) -{ - // 2 4 6 8 10 12 14 - // x x x x x x x - // cos (x) ~ 1 - --- + --- - --- + --- - ---- + ---- - ---- - // 2! 4! 6! 8! 10! 12! 14! - // - // 3 3 - // -- 4k -- 4k+2 - // p(x) = > C * x > 0 q(x) = > C * x < 0 - // -- 2k -- 2k+1 - // k=0 k=0 - // - // 2 - // cos(x) ~ [ p(x) + x * q(x) ] - // - - return EvenPoly(x, cos_arr, COS_ARR_SIZE); -} - -static inline void sincos_invalid(floatx80 *sin_a, floatx80 *cos_a, floatx80 a) -{ - if (sin_a) *sin_a = a; - if (cos_a) *cos_a = a; -} - -static inline void sincos_tiny_argument(floatx80 *sin_a, floatx80 *cos_a, floatx80 a) -{ - if (sin_a) *sin_a = a; - if (cos_a) *cos_a = floatx80_one; -} - -static floatx80 sincos_approximation(int neg, float128 r, uint64_t quotient) -{ - if (quotient & 0x1) { - r = poly_cos(r); - neg = 0; - } else { - r = poly_sin(r); - } - - floatx80 result = float128_to_floatx80(r); - if (quotient & 0x2) - neg = ! neg; - - if (neg) - result = floatx80_chs(result); - - return result; -} - -// ================================================= -// SFFSINCOS Compute sin(x) and cos(x) -// ================================================= - -// -// Uses the following identities: -// ---------------------------------------------------------- -// -// sin(-x) = -sin(x) -// cos(-x) = cos(x) -// -// sin(x+y) = sin(x)*cos(y)+cos(x)*sin(y) -// cos(x+y) = sin(x)*sin(y)+cos(x)*cos(y) -// -// sin(x+ pi/2) = cos(x) -// sin(x+ pi) = -sin(x) -// sin(x+3pi/2) = -cos(x) -// sin(x+2pi) = sin(x) -// - -int sf_fsincos(floatx80 a, floatx80 *sin_a, floatx80 *cos_a) -{ - uint64_t aSig0, aSig1 = 0; - int32_t aExp, zExp, expDiff; - int aSign, zSign; - int q = 0; - - aSig0 = extractFloatx80Frac(a); - aExp = extractFloatx80Exp(a); - aSign = extractFloatx80Sign(a); - - /* invalid argument */ - if (aExp == 0x7FFF) { - if ((uint64_t) (aSig0<<1)) { - sincos_invalid(sin_a, cos_a, propagateFloatx80NaNOneArg(a)); - return 0; - } - - float_raise(float_flag_invalid); - sincos_invalid(sin_a, cos_a, floatx80_default_nan); - return 0; - } - - if (aExp == 0) { - if (aSig0 == 0) { - sincos_tiny_argument(sin_a, cos_a, a); - return 0; - } - -// float_raise(float_flag_denormal); - - /* handle pseudo denormals */ - if (! (aSig0 & 0x8000000000000000U)) - { - float_raise(float_flag_inexact); - if (sin_a) - float_raise(float_flag_underflow); - sincos_tiny_argument(sin_a, cos_a, a); - return 0; - } - - normalizeFloatx80Subnormal(aSig0, &aExp, &aSig0); - } - - zSign = aSign; - zExp = EXP_BIAS; - expDiff = aExp - zExp; - - /* argument is out-of-range */ - if (expDiff >= 63) - return -1; - - float_raise(float_flag_inexact); - - if (expDiff < -1) { // doesn't require reduction - if (expDiff <= -68) { - a = packFloatx80(aSign, aExp, aSig0); - sincos_tiny_argument(sin_a, cos_a, a); - return 0; - } - zExp = aExp; - } - else { - q = reduce_trig_arg(expDiff, zSign, aSig0, aSig1); - } - - /* **************************** */ - /* argument reduction completed */ - /* **************************** */ - - /* using float128 for approximation */ - float128 r = normalizeRoundAndPackFloat128(0, zExp-0x10, aSig0, aSig1); - - if (aSign) q = -q; - if (sin_a) *sin_a = sincos_approximation(zSign, r, q); - if (cos_a) *cos_a = sincos_approximation(zSign, r, q+1); - - return 0; -} - -int floatx80_fsin(floatx80 *a) -{ - return sf_fsincos(*a, a, 0); -} - -int floatx80_fcos(floatx80 *a) -{ - return sf_fsincos(*a, 0, a); -} - -// ================================================= -// FPTAN Compute tan(x) -// ================================================= - -// -// Uses the following identities: -// -// 1. ---------------------------------------------------------- -// -// sin(-x) = -sin(x) -// cos(-x) = cos(x) -// -// sin(x+y) = sin(x)*cos(y)+cos(x)*sin(y) -// cos(x+y) = sin(x)*sin(y)+cos(x)*cos(y) -// -// sin(x+ pi/2) = cos(x) -// sin(x+ pi) = -sin(x) -// sin(x+3pi/2) = -cos(x) -// sin(x+2pi) = sin(x) -// -// 2. ---------------------------------------------------------- -// -// sin(x) -// tan(x) = ------ -// cos(x) -// - -int floatx80_ftan(floatx80 *a) -{ - uint64_t aSig0, aSig1 = 0; - int32_t aExp, zExp, expDiff; - int aSign, zSign; - int q = 0; - - aSig0 = extractFloatx80Frac(*a); - aExp = extractFloatx80Exp(*a); - aSign = extractFloatx80Sign(*a); - - /* invalid argument */ - if (aExp == 0x7FFF) { - if ((uint64_t) (aSig0<<1)) - { - *a = propagateFloatx80NaNOneArg(*a); - return 0; - } - - float_raise(float_flag_invalid); - *a = floatx80_default_nan; - return 0; - } - - if (aExp == 0) { - if (aSig0 == 0) return 0; -// float_raise(float_flag_denormal); - /* handle pseudo denormals */ - if (! (aSig0 & 0x8000000000000000U)) - { - float_raise(float_flag_inexact | float_flag_underflow); - return 0; - } - normalizeFloatx80Subnormal(aSig0, &aExp, &aSig0); - } - - zSign = aSign; - zExp = EXP_BIAS; - expDiff = aExp - zExp; - - /* argument is out-of-range */ - if (expDiff >= 63) - return -1; - - float_raise(float_flag_inexact); - - if (expDiff < -1) { // doesn't require reduction - if (expDiff <= -68) { - *a = packFloatx80(aSign, aExp, aSig0); - return 0; - } - zExp = aExp; - } - else { - q = reduce_trig_arg(expDiff, zSign, aSig0, aSig1); - } - - /* **************************** */ - /* argument reduction completed */ - /* **************************** */ - - /* using float128 for approximation */ - float128 r = normalizeRoundAndPackFloat128(0, zExp-0x10, aSig0, aSig1); - - float128 sin_r = poly_sin(r); - float128 cos_r = poly_cos(r); - - if (q & 0x1) { - r = float128_div(cos_r, sin_r); - zSign = ! zSign; - } else { - r = float128_div(sin_r, cos_r); - } - - *a = float128_to_floatx80(r); - if (zSign) - *a = floatx80_chs(*a); - - return 0; -} - -// 2 3 4 n -// f(x) ~ C + (C * x) + (C * x) + (C * x) + (C * x) + ... + (C * x) -// 0 1 2 3 4 n -// -// -- 2k -- 2k+1 -// p(x) = > C * x q(x) = > C * x -// -- 2k -- 2k+1 -// -// f(x) ~ [ p(x) + x * q(x) ] -// - -float128 EvalPoly(float128 x, float128 *arr, unsigned n) -{ - float128 x2 = float128_mul(x, x); - unsigned i; - - assert(n > 1); - - float128 r1 = arr[--n]; - i = n; - while(i >= 2) { - r1 = float128_mul(r1, x2); - i -= 2; - r1 = float128_add(r1, arr[i]); - } - if (i) r1 = float128_mul(r1, x); - - float128 r2 = arr[--n]; - i = n; - while(i >= 2) { - r2 = float128_mul(r2, x2); - i -= 2; - r2 = float128_add(r2, arr[i]); - } - if (i) r2 = float128_mul(r2, x); - - return float128_add(r1, r2); -} - -// 2 4 6 8 2n -// f(x) ~ C + (C * x) + (C * x) + (C * x) + (C * x) + ... + (C * x) -// 0 1 2 3 4 n -// -// -- 4k -- 4k+2 -// p(x) = > C * x q(x) = > C * x -// -- 2k -- 2k+1 -// -// 2 -// f(x) ~ [ p(x) + x * q(x) ] -// - -float128 EvenPoly(float128 x, float128 *arr, unsigned n) -{ - return EvalPoly(float128_mul(x, x), arr, n); -} - -// 3 5 7 9 2n+1 -// f(x) ~ (C * x) + (C * x) + (C * x) + (C * x) + (C * x) + ... + (C * x) -// 0 1 2 3 4 n -// 2 4 6 8 2n -// = x * [ C + (C * x) + (C * x) + (C * x) + (C * x) + ... + (C * x) -// 0 1 2 3 4 n -// -// -- 4k -- 4k+2 -// p(x) = > C * x q(x) = > C * x -// -- 2k -- 2k+1 -// -// 2 -// f(x) ~ x * [ p(x) + x * q(x) ] -// - -float128 OddPoly(float128 x, float128 *arr, unsigned n) -{ - return float128_mul(x, EvenPoly(x, arr, n)); -} - -/*---------------------------------------------------------------------------- -| Scales extended double-precision floating-point value in operand `a' by -| value `b'. The function truncates the value in the second operand 'b' to -| an integral value and adds that value to the exponent of the operand 'a'. -| The operation performed according to the IEC/IEEE Standard for Binary -| Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -extern floatx80 propagateFloatx80NaN( floatx80 a, floatx80 b ); - -floatx80 floatx80_scale(floatx80 a, floatx80 b) -{ - sbits32 aExp, bExp; - bits64 aSig, bSig; - - // handle unsupported extended double-precision floating encodings -/* if (floatx80_is_unsupported(a) || floatx80_is_unsupported(b)) - { - float_raise(float_flag_invalid); - return floatx80_default_nan; - }*/ - - aSig = extractFloatx80Frac(a); - aExp = extractFloatx80Exp(a); - int aSign = extractFloatx80Sign(a); - bSig = extractFloatx80Frac(b); - bExp = extractFloatx80Exp(b); - int bSign = extractFloatx80Sign(b); - - if (aExp == 0x7FFF) { - if ((bits64) (aSig<<1) || ((bExp == 0x7FFF) && (bits64) (bSig<<1))) - { - return propagateFloatx80NaN(a, b); - } - if ((bExp == 0x7FFF) && bSign) { - float_raise(float_flag_invalid); - return floatx80_default_nan; - } - if (bSig && (bExp == 0)) float_raise(float_flag_denormal); - return a; - } - if (bExp == 0x7FFF) { - if ((bits64) (bSig<<1)) return propagateFloatx80NaN(a, b); - if ((aExp | aSig) == 0) { - if (! bSign) { - float_raise(float_flag_invalid); - return floatx80_default_nan; - } - return a; - } - if (aSig && (aExp == 0)) float_raise(float_flag_denormal); - if (bSign) return packFloatx80(aSign, 0, 0); - return packFloatx80(aSign, 0x7FFF, 0x8000000000000000U); - } - if (aExp == 0) { - if (aSig == 0) return a; - float_raise(float_flag_denormal); - normalizeFloatx80Subnormal(aSig, &aExp, &aSig); - } - if (bExp == 0) { - if (bSig == 0) return a; - float_raise(float_flag_denormal); - normalizeFloatx80Subnormal(bSig, &bExp, &bSig); - } - - if (bExp > 0x400E) { - /* generate appropriate overflow/underflow */ - return roundAndPackFloatx80(80, aSign, - bSign ? -0x3FFF : 0x7FFF, aSig, 0); - } - if (bExp < 0x3FFF) return a; - - int shiftCount = 0x403E - bExp; - bSig >>= shiftCount; - sbits32 scale = bSig; - if (bSign) scale = -scale; /* -32768..32767 */ - return - roundAndPackFloatx80(80, aSign, aExp+scale, aSig, 0); -} diff --git a/softfloat/fyl2x.c b/softfloat/fyl2x.c deleted file mode 100644 index c7e0d45..0000000 --- a/softfloat/fyl2x.c +++ /dev/null @@ -1,487 +0,0 @@ -/*============================================================================ -This source file is an extension to the SoftFloat IEC/IEEE Floating-point -Arithmetic Package, Release 2b, written for Bochs (x86 achitecture simulator) -floating point emulation. -float_raise(float_flag_invalid) -THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has -been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES -RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS -AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES, -COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE -EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE -INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR -OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE. - -Derivative works are acceptable, even for commercial purposes, so long as -(1) the source code for the derivative work includes prominent notice that -the work is derivative, and (2) the source code includes prominent notice with -these four paragraphs for those parts of this code that are retained. -=============================================================================*/ - -/*============================================================================ - * Written for Bochs (x86 achitecture simulator) by - * Stanislav Shwartsman [sshwarts at sourceforge net] - * Adapted for lib/softfloat in MESS by Hans Ostermeyer (03/2012) - * ==========================================================================*/ - -#define FLOAT128 - -#define USE_estimateDiv128To64 - -#include "m68kcpu.h" // which includes softfloat.h after defining the basic types - -//#include "softfloat-specialize" -#include "fpu_constant.h" - -#define floatx80_log10_2 packFloatx80(0, 0x3ffd, 0x9a209a84fbcff798U) -#define floatx80_ln_2 packFloatx80(0, 0x3ffe, 0xb17217f7d1cf79acU) -#define floatx80_one packFloatx80(0, 0x3fff, 0x8000000000000000U) -#define floatx80_default_nan packFloatx80(0, 0xffff, 0xffffffffffffffffU) - -#define packFloat_128(zHi, zLo) {(zHi), (zLo)} -#define PACK_FLOAT_128(hi,lo) packFloat_128(LIT64(hi),LIT64(lo)) - -#define EXP_BIAS 0x3FFF - -/*---------------------------------------------------------------------------- -| Returns the fraction bits of the extended double-precision floating-point -| value `a'. -*----------------------------------------------------------------------------*/ - -static inline bits64 extractFloatx80Frac( floatx80 a ) -{ - return a.low; - -} - -/*---------------------------------------------------------------------------- -| Returns the exponent bits of the extended double-precision floating-point -| value `a'. -*----------------------------------------------------------------------------*/ - -static inline int32 extractFloatx80Exp( floatx80 a ) -{ - return a.high & 0x7FFF; - -} - -/*---------------------------------------------------------------------------- -| Returns the sign bit of the extended double-precision floating-point value -| `a'. -*----------------------------------------------------------------------------*/ - -static inline flag extractFloatx80Sign( floatx80 a ) -{ - return a.high>>15; - -} - -#if 0 -/*---------------------------------------------------------------------------- -| Takes extended double-precision floating-point NaN `a' and returns the -| appropriate NaN result. If `a' is a signaling NaN, the invalid exception -| is raised. -*----------------------------------------------------------------------------*/ - -static inline floatx80 propagateFloatx80NaNOneArg(floatx80 a) -{ - if (floatx80_is_signaling_nan(a)) - float_raise(float_flag_invalid); - - a.low |= 0xC000000000000000U; - - return a; -} -#endif - -/*---------------------------------------------------------------------------- -| Normalizes the subnormal extended double-precision floating-point value -| represented by the denormalized significand `aSig'. The normalized exponent -| and significand are stored at the locations pointed to by `zExpPtr' and -| `zSigPtr', respectively. -*----------------------------------------------------------------------------*/ - -static inline void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr, uint64_t *zSigPtr) -{ - int shiftCount = countLeadingZeros64(aSig); - *zSigPtr = aSig< C * u q(u) = > C * u - // -- 2k -- 2k+1 - // k=0 k=0 - // - // 1+u 2 - // 1/2 ln --- ~ u * [ p(u) + u * q(u) ] - // 1-u - // -*/ - return OddPoly(x1, ln_arr, L2_ARR_SIZE); -} - -/* required sqrt(2)/2 < x < sqrt(2) */ -static float128 poly_l2(float128 x) -{ - /* using float128 for approximation */ - float128 x_p1 = float128_add(x, float128_one); - float128 x_m1 = float128_sub(x, float128_one); - x = float128_div(x_m1, x_p1); - x = poly_ln(x); - x = float128_mul(x, float128_ln2inv2); - return x; -} - -static float128 poly_l2p1(float128 x) -{ - /* using float128 for approximation */ - float128 x_p2 = float128_add(x, float128_two); - x = float128_div(x, x_p2); - x = poly_ln(x); - x = float128_mul(x, float128_ln2inv2); - return x; -} - -// ================================================= -// FYL2X Compute y * log (x) -// 2 -// ================================================= - -// -// Uses the following identities: -// -// 1. ---------------------------------------------------------- -// ln(x) -// log (x) = -------, ln (x*y) = ln(x) + ln(y) -// 2 ln(2) -// -// 2. ---------------------------------------------------------- -// 1+u x-1 -// ln (x) = ln -----, when u = ----- -// 1-u x+1 -// -// 3. ---------------------------------------------------------- -// 3 5 7 2n+1 -// 1+u u u u u -// ln ----- = 2 [ u + --- + --- + --- + ... + ------ + ... ] -// 1-u 3 5 7 2n+1 -// - -static floatx80 fyl2x(floatx80 a, floatx80 b) -{ - uint64_t aSig = extractFloatx80Frac(a); - int32_t aExp = extractFloatx80Exp(a); - int aSign = extractFloatx80Sign(a); - uint64_t bSig = extractFloatx80Frac(b); - int32_t bExp = extractFloatx80Exp(b); - int bSign = extractFloatx80Sign(b); - - int zSign = bSign ^ 1; - - if (aExp == 0x7FFF) { - if ((uint64_t) (aSig<<1) - || ((bExp == 0x7FFF) && (uint64_t) (bSig<<1))) - { - return propagateFloatx80NaN(a, b); - } - if (aSign) - { -invalid: - float_raise(float_flag_invalid); - return floatx80_default_nan; - } - else { - if (bExp == 0) { - if (bSig == 0) goto invalid; - float_raise(float_flag_denormal); - } - return packFloatx80(bSign, 0x7FFF, 0x8000000000000000U); - } - } - if (bExp == 0x7FFF) - { - if ((uint64_t) (bSig<<1)) return propagateFloatx80NaN(a, b); - if (aSign && (uint64_t)(aExp | aSig)) goto invalid; - if (aSig && (aExp == 0)) - float_raise(float_flag_denormal); - if (aExp < 0x3FFF) { - return packFloatx80(zSign, 0x7FFF, 0x8000000000000000U); - } - if (aExp == 0x3FFF && ((uint64_t) (aSig<<1) == 0)) goto invalid; - return packFloatx80(bSign, 0x7FFF, 0x8000000000000000U); - } - if (aExp == 0) { - if (aSig == 0) { - if ((bExp | bSig) == 0) goto invalid; - float_raise(float_flag_divbyzero); - return packFloatx80(zSign, 0x7FFF, 0x8000000000000000U); - } - if (aSign) goto invalid; - float_raise(float_flag_denormal); - normalizeFloatx80Subnormal(aSig, &aExp, &aSig); - } - if (aSign) goto invalid; - if (bExp == 0) { - if (bSig == 0) { - if (aExp < 0x3FFF) return packFloatx80(zSign, 0, 0); - return packFloatx80(bSign, 0, 0); - } - float_raise(float_flag_denormal); - normalizeFloatx80Subnormal(bSig, &bExp, &bSig); - } - if (aExp == 0x3FFF && ((uint64_t) (aSig<<1) == 0)) - return packFloatx80(bSign, 0, 0); - - float_raise(float_flag_inexact); - - int ExpDiff = aExp - 0x3FFF; - aExp = 0; - if (aSig >= SQRT2_HALF_SIG) { - ExpDiff++; - aExp--; - } - - /* ******************************** */ - /* using float128 for approximation */ - /* ******************************** */ - - uint64_t zSig0, zSig1; - shift128Right(aSig<<1, 0, 16, &zSig0, &zSig1); - float128 x = packFloat128(0, aExp+0x3FFF, zSig0, zSig1); - x = poly_l2(x); - x = float128_add(x, int64_to_float128((int64_t) ExpDiff)); - return floatx80_mul(b, float128_to_floatx80(x)); -} - -// ================================================= -// FYL2XP1 Compute y * log (x + 1) -// 2 -// ================================================= - -// -// Uses the following identities: -// -// 1. ---------------------------------------------------------- -// ln(x) -// log (x) = ------- -// 2 ln(2) -// -// 2. ---------------------------------------------------------- -// 1+u x -// ln (x+1) = ln -----, when u = ----- -// 1-u x+2 -// -// 3. ---------------------------------------------------------- -// 3 5 7 2n+1 -// 1+u u u u u -// ln ----- = 2 [ u + --- + --- + --- + ... + ------ + ... ] -// 1-u 3 5 7 2n+1 -// - -floatx80 fyl2xp1(floatx80 a, floatx80 b) -{ - int32_t aExp, bExp; - uint64_t aSig, bSig, zSig0, zSig1, zSig2; - int aSign, bSign; - - aSig = extractFloatx80Frac(a); - aExp = extractFloatx80Exp(a); - aSign = extractFloatx80Sign(a); - bSig = extractFloatx80Frac(b); - bExp = extractFloatx80Exp(b); - bSign = extractFloatx80Sign(b); - int zSign = aSign ^ bSign; - - if (aExp == 0x7FFF) { - if ((uint64_t) (aSig<<1) - || ((bExp == 0x7FFF) && (uint64_t) (bSig<<1))) - { - return propagateFloatx80NaN(a, b); - } - if (aSign) - { -invalid: - float_raise(float_flag_invalid); - return floatx80_default_nan; - } - else { - if (bExp == 0) { - if (bSig == 0) goto invalid; - float_raise(float_flag_denormal); - } - return packFloatx80(bSign, 0x7FFF, 0x8000000000000000U); - } - } - if (bExp == 0x7FFF) - { - if ((uint64_t) (bSig<<1)) - return propagateFloatx80NaN(a, b); - - if (aExp == 0) { - if (aSig == 0) goto invalid; - float_raise(float_flag_denormal); - } - - return packFloatx80(zSign, 0x7FFF, 0x8000000000000000U); - } - if (aExp == 0) { - if (aSig == 0) { - if (bSig && (bExp == 0)) float_raise(float_flag_denormal); - return packFloatx80(zSign, 0, 0); - } - float_raise(float_flag_denormal); - normalizeFloatx80Subnormal(aSig, &aExp, &aSig); - } - if (bExp == 0) { - if (bSig == 0) return packFloatx80(zSign, 0, 0); - float_raise(float_flag_denormal); - normalizeFloatx80Subnormal(bSig, &bExp, &bSig); - } - - float_raise(float_flag_inexact); - - if (aSign && aExp >= 0x3FFF) - return a; - - if (aExp >= 0x3FFC) // big argument - { - return fyl2x(floatx80_add(a, floatx80_one), b); - } - - // handle tiny argument - if (aExp < EXP_BIAS-70) - { - // first order approximation, return (a*b)/ln(2) - int32_t zExp = aExp + FLOAT_LN2INV_EXP - 0x3FFE; - - mul128By64To192(FLOAT_LN2INV_HI, FLOAT_LN2INV_LO, aSig, &zSig0, &zSig1, &zSig2); - if (0 < (int64_t) zSig0) { - shortShift128Left(zSig0, zSig1, 1, &zSig0, &zSig1); - --zExp; - } - - zExp = zExp + bExp - 0x3FFE; - mul128By64To192(zSig0, zSig1, bSig, &zSig0, &zSig1, &zSig2); - if (0 < (int64_t) zSig0) { - shortShift128Left(zSig0, zSig1, 1, &zSig0, &zSig1); - --zExp; - } - - return - roundAndPackFloatx80(80, aSign ^ bSign, zExp, zSig0, zSig1); - } - - /* ******************************** */ - /* using float128 for approximation */ - /* ******************************** */ - - shift128Right(aSig<<1, 0, 16, &zSig0, &zSig1); - float128 x = packFloat128(aSign, aExp, zSig0, zSig1); - x = poly_l2p1(x); - return floatx80_mul(b, float128_to_floatx80(x)); -} - -floatx80 floatx80_flognp1(floatx80 a) -{ - return fyl2xp1(a, floatx80_ln_2); -} - -floatx80 floatx80_flogn(floatx80 a) -{ - return fyl2x(a, floatx80_ln_2); -} - -floatx80 floatx80_flog2(floatx80 a) -{ - return fyl2x(a, floatx80_one); -} - -floatx80 floatx80_flog10(floatx80 a) -{ - return fyl2x(a, floatx80_log10_2); -} diff --git a/softfloat/mamesf.h b/softfloat/mamesf.h index c419503..c8133bf 100644 --- a/softfloat/mamesf.h +++ b/softfloat/mamesf.h @@ -22,7 +22,7 @@ | to the same as `int'. *----------------------------------------------------------------------------*/ -typedef sint8 flag; +typedef uint64_t flag; typedef sint8 int8; typedef sint16 int16; typedef sint32 int32; diff --git a/softfloat/softfloat-macros b/softfloat/softfloat-macros.h similarity index 72% rename from softfloat/softfloat-macros rename to softfloat/softfloat-macros.h index 5de9031..aa6665e 100644 --- a/softfloat/softfloat-macros +++ b/softfloat/softfloat-macros.h @@ -1,8 +1,24 @@ - -/*============================================================================ - +/* + * QEMU float support macros + * + * The code in this source file is derived from release 2a of the SoftFloat + * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and + * some later contributions) are provided under that license, as detailed below. + * It has subsequently been modified by contributors to the QEMU Project, + * so some portions are provided under: + * the SoftFloat-2a license + * the BSD license + * GPL-v2-or-later + * + * Any future contributions to this file after December 1st 2014 will be + * taken to be licensed under the Softfloat-2a license unless specifically + * indicated otherwise. + */ + +/* +=============================================================================== This C source fragment is part of the SoftFloat IEC/IEEE Floating-point -Arithmetic Package, Release 2b. +Arithmetic Package, Release 2a. Written by John R. Hauser. This work was made possible in part by the International Computer Science Institute, located at Suite 600, 1947 Center @@ -11,24 +27,68 @@ National Science Foundation under grant MIP-9311980. The original version of this code was written as part of a project to build a fixed-point vector processor in collaboration with the University of California at Berkeley, overseen by Profs. Nelson Morgan and John Wawrzynek. More information -is available through the Web page `http://www.cs.berkeley.edu/~jhauser/ +is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ arithmetic/SoftFloat.html'. -THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has -been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES -RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS -AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES, -COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE -EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE -INSTITUTE (possibly via similar legal notice) AGAINST ALL LOSSES, COSTS, OR -OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE. +THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort +has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT +TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO +PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY +AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. Derivative works are acceptable, even for commercial purposes, so long as -(1) the source code for the derivative work includes prominent notice that -the work is derivative, and (2) the source code includes prominent notice with -these four paragraphs for those parts of this code that are retained. +(1) they include prominent notice that the work is derivative, and (2) they +include prominent notice akin to these four paragraphs for those parts of +this code that are retained. + +=============================================================================== +*/ + +/* BSD licensing: + * Copyright (c) 2006, Fabrice Bellard + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its contributors + * may be used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* Portions of this work are licensed under the terms of the GNU GPL, + * version 2 or later. See the COPYING file in the top-level directory. + */ + +/*---------------------------------------------------------------------------- +| This macro tests for minimum version of the GNU C compiler. +*----------------------------------------------------------------------------*/ +#if defined(__GNUC__) && defined(__GNUC_MINOR__) +# define SOFTFLOAT_GNUC_PREREQ(maj, min) \ + ((__GNUC__ << 16) + __GNUC_MINOR__ >= ((maj) << 16) + (min)) +#else +# define SOFTFLOAT_GNUC_PREREQ(maj, min) 0 +#endif -=============================================================================*/ /*---------------------------------------------------------------------------- | Shifts `a' right by the number of bits given in `count'. If any nonzero @@ -39,9 +99,9 @@ these four paragraphs for those parts of this code that are retained. | The result is stored in the location pointed to by `zPtr'. *----------------------------------------------------------------------------*/ -static inline void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr ) +static inline void shift32RightJamming(uint32_t a, int count, uint32_t *zPtr) { - bits32 z; + uint32_t z; if ( count == 0 ) { z = a; @@ -65,9 +125,9 @@ static inline void shift32RightJamming( bits32 a, int16 count, bits32 *zPtr ) | The result is stored in the location pointed to by `zPtr'. *----------------------------------------------------------------------------*/ -static inline void shift64RightJamming( bits64 a, int16 count, bits64 *zPtr ) +static inline void shift64RightJamming(uint64_t a, int count, uint64_t *zPtr) { - bits64 z; + uint64_t z; if ( count == 0 ) { z = a; @@ -91,20 +151,20 @@ static inline void shift64RightJamming( bits64 a, int16 count, bits64 *zPtr ) | 63 bits of the extra result are all zero if and only if _all_but_the_last_ | bits shifted off were all zero. This extra result is stored in the location | pointed to by `z1Ptr'. The value of `count' can be arbitrarily large. -| (This routine makes more sense if `a0' and `a1' are considered to form -| a fixed-point value with binary point between `a0' and `a1'. This fixed- -| point value is shifted right by the number of bits given in `count', and -| the integer part of the result is returned at the location pointed to by +| (This routine makes more sense if `a0' and `a1' are considered to form a +| fixed-point value with binary point between `a0' and `a1'. This fixed-point +| value is shifted right by the number of bits given in `count', and the +| integer part of the result is returned at the location pointed to by | `z0Ptr'. The fractional part of the result may be slightly corrupted as | described above, and is returned at the location pointed to by `z1Ptr'.) *----------------------------------------------------------------------------*/ static inline void shift64ExtraRightJamming( - bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr ) + uint64_t a0, uint64_t a1, int count, uint64_t *z0Ptr, uint64_t *z1Ptr) { - bits64 z0, z1; - int8 negCount = ( - count ) & 63; + uint64_t z0, z1; + int8_t negCount = ( - count ) & 63; if ( count == 0 ) { z1 = a1; @@ -138,10 +198,10 @@ static inline void static inline void shift128Right( - bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr ) + uint64_t a0, uint64_t a1, int count, uint64_t *z0Ptr, uint64_t *z1Ptr) { - bits64 z0, z1; - int8 negCount = ( - count ) & 63; + uint64_t z0, z1; + int8_t negCount = ( - count ) & 63; if ( count == 0 ) { z1 = a1; @@ -152,7 +212,7 @@ static inline void z0 = a0>>count; } else { - z1 = ( count < 64 ) ? ( a0>>( count & 63 ) ) : 0; + z1 = (count < 128) ? (a0 >> (count & 63)) : 0; z0 = 0; } *z1Ptr = z1; @@ -173,10 +233,10 @@ static inline void static inline void shift128RightJamming( - bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr ) + uint64_t a0, uint64_t a1, int count, uint64_t *z0Ptr, uint64_t *z1Ptr) { - bits64 z0, z1; - int8 negCount = ( - count ) & 63; + uint64_t z0, z1; + int8_t negCount = ( - count ) & 63; if ( count == 0 ) { z1 = a1; @@ -224,17 +284,17 @@ static inline void static inline void shift128ExtraRightJamming( - bits64 a0, - bits64 a1, - bits64 a2, - int16 count, - bits64 *z0Ptr, - bits64 *z1Ptr, - bits64 *z2Ptr + uint64_t a0, + uint64_t a1, + uint64_t a2, + int count, + uint64_t *z0Ptr, + uint64_t *z1Ptr, + uint64_t *z2Ptr ) { - bits64 z0, z1, z2; - int8 negCount = ( - count ) & 63; + uint64_t z0, z1, z2; + int8_t negCount = ( - count ) & 63; if ( count == 0 ) { z2 = a2; @@ -282,7 +342,7 @@ static inline void static inline void shortShift128Left( - bits64 a0, bits64 a1, int16 count, bits64 *z0Ptr, bits64 *z1Ptr ) + uint64_t a0, uint64_t a1, int count, uint64_t *z0Ptr, uint64_t *z1Ptr) { *z1Ptr = a1<>32; bLow = b; bHigh = b>>32; - z1 = ( (bits64) aLow ) * bLow; - zMiddleA = ( (bits64) aLow ) * bHigh; - zMiddleB = ( (bits64) aHigh ) * bLow; - z0 = ( (bits64) aHigh ) * bHigh; + z1 = ( (uint64_t) aLow ) * bLow; + zMiddleA = ( (uint64_t) aLow ) * bHigh; + zMiddleB = ( (uint64_t) aHigh ) * bLow; + z0 = ( (uint64_t) aHigh ) * bHigh; zMiddleA += zMiddleB; - z0 += ( ( (bits64) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 ); + z0 += ( ( (uint64_t) ( zMiddleA < zMiddleB ) )<<32 ) + ( zMiddleA>>32 ); zMiddleA <<= 32; z1 += zMiddleA; z0 += ( z1 < zMiddleA ); @@ -478,15 +538,15 @@ static inline void mul64To128( bits64 a, bits64 b, bits64 *z0Ptr, bits64 *z1Ptr static inline void mul128By64To192( - bits64 a0, - bits64 a1, - bits64 b, - bits64 *z0Ptr, - bits64 *z1Ptr, - bits64 *z2Ptr + uint64_t a0, + uint64_t a1, + uint64_t b, + uint64_t *z0Ptr, + uint64_t *z1Ptr, + uint64_t *z2Ptr ) { - bits64 z0, z1, z2, more1; + uint64_t z0, z1, z2, more1; mul64To128( a1, b, &z1, &z2 ); mul64To128( a0, b, &z0, &more1 ); @@ -506,18 +566,18 @@ static inline void static inline void mul128To256( - bits64 a0, - bits64 a1, - bits64 b0, - bits64 b1, - bits64 *z0Ptr, - bits64 *z1Ptr, - bits64 *z2Ptr, - bits64 *z3Ptr + uint64_t a0, + uint64_t a1, + uint64_t b0, + uint64_t b1, + uint64_t *z0Ptr, + uint64_t *z1Ptr, + uint64_t *z2Ptr, + uint64_t *z3Ptr ) { - bits64 z0, z1, z2, z3; - bits64 more1, more2; + uint64_t z0, z1, z2, z3; + uint64_t more1, more2; mul64To128( a1, b1, &z2, &z3 ); mul64To128( a1, b0, &z1, &more2 ); @@ -543,18 +603,18 @@ static inline void | unsigned integer is returned. *----------------------------------------------------------------------------*/ -static inline bits64 estimateDiv128To64( bits64 a0, bits64 a1, bits64 b ) +static uint64_t estimateDiv128To64( uint64_t a0, uint64_t a1, uint64_t b ) { - bits64 b0, b1; - bits64 rem0, rem1, term0, term1; - bits64 z; + uint64_t b0, b1; + uint64_t rem0, rem1, term0, term1; + uint64_t z; if ( b <= a0 ) return LIT64( 0xFFFFFFFFFFFFFFFF ); b0 = b>>32; z = ( b0<<32 <= a0 ) ? LIT64( 0xFFFFFFFF00000000 ) : ( a0 / b0 )<<32; mul64To128( b, z, &term0, &term1 ); sub128( a0, a1, term0, term1, &rem0, &rem1 ); - while ( ( (sbits64) rem0 ) < 0 ) { + while ( ( (int64_t) rem0 ) < 0 ) { z -= LIT64( 0x100000000 ); b1 = b<<32; add128( rem0, rem1, b0, b1, &rem0, &rem1 ); @@ -575,32 +635,32 @@ static inline bits64 estimateDiv128To64( bits64 a0, bits64 a1, bits64 b ) | value. *----------------------------------------------------------------------------*/ -static inline bits32 estimateSqrt32( int16 aExp, bits32 a ) +static uint32_t estimateSqrt32(int aExp, uint32_t a) { - static const bits16 sqrtOddAdjustments[] = { + static const uint16_t sqrtOddAdjustments[] = { 0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0, 0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67 }; - static const bits16 sqrtEvenAdjustments[] = { + static const uint16_t sqrtEvenAdjustments[] = { 0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E, 0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002 }; - int8 index; - bits32 z; + int8_t index; + uint32_t z; index = ( a>>27 ) & 15; if ( aExp & 1 ) { - z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ index ]; + z = 0x4000 + ( a>>17 ) - sqrtOddAdjustments[ (int)index ]; z = ( ( a / z )<<14 ) + ( z<<15 ); a >>= 1; } else { - z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ index ]; + z = 0x8000 + ( a>>17 ) - sqrtEvenAdjustments[ (int)index ]; z = a / z + z; z = ( 0x20000 <= z ) ? 0xFFFF8000 : ( z<<15 ); - if ( z <= a ) return (bits32) ( ( (sbits32) a )>>1 ); + if ( z <= a ) return (uint32_t) ( ( (int32_t) a )>>1 ); } - return ( (bits32) ( ( ( (bits64) a )<<31 ) / z ) ) + ( z>>1 ); + return ( (uint32_t) ( ( ( (uint64_t) a )<<31 ) / z ) ) + ( z>>1 ); } @@ -609,9 +669,16 @@ static inline bits32 estimateSqrt32( int16 aExp, bits32 a ) | `a'. If `a' is zero, 32 is returned. *----------------------------------------------------------------------------*/ -static int8 countLeadingZeros32( bits32 a ) +static inline int8_t countLeadingZeros32( uint32_t a ) { - static const int8 countLeadingZerosHigh[] = { +#if SOFTFLOAT_GNUC_PREREQ(3, 4) + if (a) { + return __builtin_clz(a); + } else { + return 32; + } +#else + static const int8_t countLeadingZerosHigh[] = { 8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, @@ -629,7 +696,7 @@ static int8 countLeadingZeros32( bits32 a ) 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; - int8 shiftCount; + int8_t shiftCount; shiftCount = 0; if ( a < 0x10000 ) { @@ -642,7 +709,7 @@ static int8 countLeadingZeros32( bits32 a ) } shiftCount += countLeadingZerosHigh[ a>>24 ]; return shiftCount; - +#endif } /*---------------------------------------------------------------------------- @@ -650,12 +717,19 @@ static int8 countLeadingZeros32( bits32 a ) | `a'. If `a' is zero, 64 is returned. *----------------------------------------------------------------------------*/ -static int8 countLeadingZeros64( bits64 a ) +static inline int8_t countLeadingZeros64( uint64_t a ) { - int8 shiftCount; +#if SOFTFLOAT_GNUC_PREREQ(3, 4) + if (a) { + return __builtin_clzll(a); + } else { + return 64; + } +#else + int8_t shiftCount; shiftCount = 0; - if ( a < ( (bits64) 1 )<<32 ) { + if ( a < ( (uint64_t) 1 )<<32 ) { shiftCount += 32; } else { @@ -663,7 +737,7 @@ static int8 countLeadingZeros64( bits64 a ) } shiftCount += countLeadingZeros32( a ); return shiftCount; - +#endif } /*---------------------------------------------------------------------------- @@ -672,7 +746,7 @@ static int8 countLeadingZeros64( bits64 a ) | Otherwise, returns 0. *----------------------------------------------------------------------------*/ -static inline flag eq128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 ) +static inline flag eq128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 ) { return ( a0 == b0 ) && ( a1 == b1 ); @@ -685,7 +759,7 @@ static inline flag eq128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 ) | Otherwise, returns 0. *----------------------------------------------------------------------------*/ -static inline flag le128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 ) +static inline flag le128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 ) { return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 <= b1 ) ); @@ -698,7 +772,7 @@ static inline flag le128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 ) | returns 0. *----------------------------------------------------------------------------*/ -static inline flag lt128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 ) +static inline flag lt128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 ) { return ( a0 < b0 ) || ( ( a0 == b0 ) && ( a1 < b1 ) ); @@ -711,22 +785,9 @@ static inline flag lt128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 ) | Otherwise, returns 0. *----------------------------------------------------------------------------*/ -static inline flag ne128( bits64 a0, bits64 a1, bits64 b0, bits64 b1 ) +static inline flag ne128( uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1 ) { return ( a0 != b0 ) || ( a1 != b1 ); } - -/*----------------------------------------------------------------------------- -| Changes the sign of the extended double-precision floating-point value 'a'. -| The operation is performed according to the IEC/IEEE Standard for Binary -| Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -static inline floatx80 floatx80_chs(floatx80 reg) -{ - reg.high ^= 0x8000; - return reg; -} - diff --git a/softfloat/softfloat-specialize b/softfloat/softfloat-specialize deleted file mode 100644 index cc97273..0000000 --- a/softfloat/softfloat-specialize +++ /dev/null @@ -1,470 +0,0 @@ - -/*============================================================================ - -This C source fragment is part of the SoftFloat IEC/IEEE Floating-point -Arithmetic Package, Release 2b. - -Written by John R. Hauser. This work was made possible in part by the -International Computer Science Institute, located at Suite 600, 1947 Center -Street, Berkeley, California 94704. Funding was partially provided by the -National Science Foundation under grant MIP-9311980. The original version -of this code was written as part of a project to build a fixed-point vector -processor in collaboration with the University of California at Berkeley, -overseen by Profs. Nelson Morgan and John Wawrzynek. More information -is available through the Web page `http://www.cs.berkeley.edu/~jhauser/ -arithmetic/SoftFloat.html'. - -THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has -been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES -RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS -AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES, -COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE -EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE -INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR -OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE. - -Derivative works are acceptable, even for commercial purposes, so long as -(1) the source code for the derivative work includes prominent notice that -the work is derivative, and (2) the source code includes prominent notice with -these four paragraphs for those parts of this code that are retained. - -=============================================================================*/ - -/*---------------------------------------------------------------------------- -| Underflow tininess-detection mode, statically initialized to default value. -| (The declaration in `softfloat.h' must match the `int8' type here.) -*----------------------------------------------------------------------------*/ -int8 float_detect_tininess = float_tininess_after_rounding; - -/*---------------------------------------------------------------------------- -| Raises the exceptions specified by `flags'. Floating-point traps can be -| defined here if desired. It is currently not possible for such a trap to -| substitute a result value. If traps are not implemented, this routine -| should be simply `float_exception_flags |= flags;'. -*----------------------------------------------------------------------------*/ - -void float_raise( int8 flags ) -{ - - float_exception_flags |= flags; - -} - -/*---------------------------------------------------------------------------- -| Internal canonical NaN format. -*----------------------------------------------------------------------------*/ -typedef struct { - flag sign; - bits64 high, low; -} commonNaNT; - -/*---------------------------------------------------------------------------- -| The pattern for a default generated single-precision NaN. -*----------------------------------------------------------------------------*/ -#define float32_default_nan 0xFFFFFFFF - -/*---------------------------------------------------------------------------- -| Returns 1 if the single-precision floating-point value `a' is a NaN; -| otherwise returns 0. -*----------------------------------------------------------------------------*/ - -flag float32_is_nan( float32 a ) -{ - - return ( 0xFF000000 < (bits32) ( a<<1 ) ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the single-precision floating-point value `a' is a signaling -| NaN; otherwise returns 0. -*----------------------------------------------------------------------------*/ - -flag float32_is_signaling_nan( float32 a ) -{ - - return ( ( ( a>>22 ) & 0x1FF ) == 0x1FE ) && ( a & 0x003FFFFF ); - -} - -/*---------------------------------------------------------------------------- -| Returns the result of converting the single-precision floating-point NaN -| `a' to the canonical NaN format. If `a' is a signaling NaN, the invalid -| exception is raised. -*----------------------------------------------------------------------------*/ - -static commonNaNT float32ToCommonNaN( float32 a ) -{ - commonNaNT z; - - if ( float32_is_signaling_nan( a ) ) float_raise( float_flag_invalid ); - z.sign = a>>31; - z.low = 0; - z.high = ( (bits64) a )<<41; - return z; - -} - -/*---------------------------------------------------------------------------- -| Returns the result of converting the canonical NaN `a' to the single- -| precision floating-point format. -*----------------------------------------------------------------------------*/ - -static float32 commonNaNToFloat32( commonNaNT a ) -{ - - return ( ( (bits32) a.sign )<<31 ) | 0x7FC00000 | ( a.high>>41 ); - -} - -/*---------------------------------------------------------------------------- -| Takes two single-precision floating-point values `a' and `b', one of which -| is a NaN, and returns the appropriate NaN result. If either `a' or `b' is a -| signaling NaN, the invalid exception is raised. -*----------------------------------------------------------------------------*/ - -static float32 propagateFloat32NaN( float32 a, float32 b ) -{ - flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN; - - aIsNaN = float32_is_nan( a ); - aIsSignalingNaN = float32_is_signaling_nan( a ); - bIsNaN = float32_is_nan( b ); - bIsSignalingNaN = float32_is_signaling_nan( b ); - a |= 0x00400000; - b |= 0x00400000; - if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid ); - if ( aIsNaN ) { - return ( aIsSignalingNaN & bIsNaN ) ? b : a; - } - else { - return b; - } - -} - -/*---------------------------------------------------------------------------- -| The pattern for a default generated double-precision NaN. -*----------------------------------------------------------------------------*/ -#define float64_default_nan LIT64( 0xFFFFFFFFFFFFFFFF ) - -/*---------------------------------------------------------------------------- -| Returns 1 if the double-precision floating-point value `a' is a NaN; -| otherwise returns 0. -*----------------------------------------------------------------------------*/ - -flag float64_is_nan( float64 a ) -{ - - return ( LIT64( 0xFFE0000000000000 ) < (bits64) ( a<<1 ) ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the double-precision floating-point value `a' is a signaling -| NaN; otherwise returns 0. -*----------------------------------------------------------------------------*/ - -flag float64_is_signaling_nan( float64 a ) -{ - - return - ( ( ( a>>51 ) & 0xFFF ) == 0xFFE ) - && ( a & LIT64( 0x0007FFFFFFFFFFFF ) ); - -} - -/*---------------------------------------------------------------------------- -| Returns the result of converting the double-precision floating-point NaN -| `a' to the canonical NaN format. If `a' is a signaling NaN, the invalid -| exception is raised. -*----------------------------------------------------------------------------*/ - -static commonNaNT float64ToCommonNaN( float64 a ) -{ - commonNaNT z; - - if ( float64_is_signaling_nan( a ) ) float_raise( float_flag_invalid ); - z.sign = a>>63; - z.low = 0; - z.high = a<<12; - return z; - -} - -/*---------------------------------------------------------------------------- -| Returns the result of converting the canonical NaN `a' to the double- -| precision floating-point format. -*----------------------------------------------------------------------------*/ - -static float64 commonNaNToFloat64( commonNaNT a ) -{ - - return - ( ( (bits64) a.sign )<<63 ) - | LIT64( 0x7FF8000000000000 ) - | ( a.high>>12 ); - -} - -/*---------------------------------------------------------------------------- -| Takes two double-precision floating-point values `a' and `b', one of which -| is a NaN, and returns the appropriate NaN result. If either `a' or `b' is a -| signaling NaN, the invalid exception is raised. -*----------------------------------------------------------------------------*/ - -static float64 propagateFloat64NaN( float64 a, float64 b ) -{ - flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN; - - aIsNaN = float64_is_nan( a ); - aIsSignalingNaN = float64_is_signaling_nan( a ); - bIsNaN = float64_is_nan( b ); - bIsSignalingNaN = float64_is_signaling_nan( b ); - a |= LIT64( 0x0008000000000000 ); - b |= LIT64( 0x0008000000000000 ); - if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid ); - if ( aIsNaN ) { - return ( aIsSignalingNaN & bIsNaN ) ? b : a; - } - else { - return b; - } - -} - -#ifdef FLOATX80 - -/*---------------------------------------------------------------------------- -| The pattern for a default generated extended double-precision NaN. The -| `high' and `low' values hold the most- and least-significant bits, -| respectively. -*----------------------------------------------------------------------------*/ -#define floatx80_default_nan_high 0xFFFF -#define floatx80_default_nan_low LIT64( 0xFFFFFFFFFFFFFFFF ) - -/*---------------------------------------------------------------------------- -| Returns 1 if the extended double-precision floating-point value `a' is a -| NaN; otherwise returns 0. -*----------------------------------------------------------------------------*/ - -flag floatx80_is_nan( floatx80 a ) -{ - - return ( ( a.high & 0x7FFF ) == 0x7FFF ) && (bits64) ( a.low<<1 ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the extended double-precision floating-point value `a' is a -| signaling NaN; otherwise returns 0. -*----------------------------------------------------------------------------*/ - -flag floatx80_is_signaling_nan( floatx80 a ) -{ - bits64 aLow; - - aLow = a.low & ~ LIT64( 0x4000000000000000 ); - return - ( ( a.high & 0x7FFF ) == 0x7FFF ) - && (bits64) ( aLow<<1 ) - && ( a.low == aLow ); - -} - -/*---------------------------------------------------------------------------- -| Returns the result of converting the extended double-precision floating- -| point NaN `a' to the canonical NaN format. If `a' is a signaling NaN, the -| invalid exception is raised. -*----------------------------------------------------------------------------*/ - -static commonNaNT floatx80ToCommonNaN( floatx80 a ) -{ - commonNaNT z; - - if ( floatx80_is_signaling_nan( a ) ) float_raise( float_flag_invalid ); - z.sign = a.high>>15; - z.low = 0; - z.high = a.low<<1; - return z; - -} - -/*---------------------------------------------------------------------------- -| Returns the result of converting the canonical NaN `a' to the extended -| double-precision floating-point format. -*----------------------------------------------------------------------------*/ - -static floatx80 commonNaNToFloatx80( commonNaNT a ) -{ - floatx80 z; - - z.low = LIT64( 0xC000000000000000 ) | ( a.high>>1 ); - z.high = ( ( (bits16) a.sign )<<15 ) | 0x7FFF; - return z; - -} - -/*---------------------------------------------------------------------------- -| Takes two extended double-precision floating-point values `a' and `b', one -| of which is a NaN, and returns the appropriate NaN result. If either `a' or -| `b' is a signaling NaN, the invalid exception is raised. -*----------------------------------------------------------------------------*/ - -floatx80 propagateFloatx80NaN( floatx80 a, floatx80 b ) -{ - flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN; - - aIsNaN = floatx80_is_nan( a ); - aIsSignalingNaN = floatx80_is_signaling_nan( a ); - bIsNaN = floatx80_is_nan( b ); - bIsSignalingNaN = floatx80_is_signaling_nan( b ); - a.low |= LIT64( 0xC000000000000000 ); - b.low |= LIT64( 0xC000000000000000 ); - if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid ); - if ( aIsNaN ) { - return ( aIsSignalingNaN & bIsNaN ) ? b : a; - } - else { - return b; - } - -} - -#define EXP_BIAS 0x3FFF - -/*---------------------------------------------------------------------------- -| Returns the fraction bits of the extended double-precision floating-point -| value `a'. -*----------------------------------------------------------------------------*/ - -static inline bits64 extractFloatx80Frac( floatx80 a ) -{ - - return a.low; - -} - -/*---------------------------------------------------------------------------- -| Returns the exponent bits of the extended double-precision floating-point -| value `a'. -*----------------------------------------------------------------------------*/ - -static inline int32 extractFloatx80Exp( floatx80 a ) -{ - - return a.high & 0x7FFF; - -} - -/*---------------------------------------------------------------------------- -| Returns the sign bit of the extended double-precision floating-point value -| `a'. -*----------------------------------------------------------------------------*/ - -static inline flag extractFloatx80Sign( floatx80 a ) -{ - - return a.high>>15; - -} - -#endif - -#ifdef FLOAT128 - -/*---------------------------------------------------------------------------- -| The pattern for a default generated quadruple-precision NaN. The `high' and -| `low' values hold the most- and least-significant bits, respectively. -*----------------------------------------------------------------------------*/ -#define float128_default_nan_high LIT64( 0xFFFFFFFFFFFFFFFF ) -#define float128_default_nan_low LIT64( 0xFFFFFFFFFFFFFFFF ) - -/*---------------------------------------------------------------------------- -| Returns 1 if the quadruple-precision floating-point value `a' is a NaN; -| otherwise returns 0. -*----------------------------------------------------------------------------*/ - -flag float128_is_nan( float128 a ) -{ - - return - ( LIT64( 0xFFFE000000000000 ) <= (bits64) ( a.high<<1 ) ) - && ( a.low || ( a.high & LIT64( 0x0000FFFFFFFFFFFF ) ) ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the quadruple-precision floating-point value `a' is a -| signaling NaN; otherwise returns 0. -*----------------------------------------------------------------------------*/ - -flag float128_is_signaling_nan( float128 a ) -{ - - return - ( ( ( a.high>>47 ) & 0xFFFF ) == 0xFFFE ) - && ( a.low || ( a.high & LIT64( 0x00007FFFFFFFFFFF ) ) ); - -} - -/*---------------------------------------------------------------------------- -| Returns the result of converting the quadruple-precision floating-point NaN -| `a' to the canonical NaN format. If `a' is a signaling NaN, the invalid -| exception is raised. -*----------------------------------------------------------------------------*/ - -static commonNaNT float128ToCommonNaN( float128 a ) -{ - commonNaNT z; - - if ( float128_is_signaling_nan( a ) ) float_raise( float_flag_invalid ); - z.sign = a.high>>63; - shortShift128Left( a.high, a.low, 16, &z.high, &z.low ); - return z; - -} - -/*---------------------------------------------------------------------------- -| Returns the result of converting the canonical NaN `a' to the quadruple- -| precision floating-point format. -*----------------------------------------------------------------------------*/ - -static float128 commonNaNToFloat128( commonNaNT a ) -{ - float128 z; - - shift128Right( a.high, a.low, 16, &z.high, &z.low ); - z.high |= ( ( (bits64) a.sign )<<63 ) | LIT64( 0x7FFF800000000000 ); - return z; - -} - -/*---------------------------------------------------------------------------- -| Takes two quadruple-precision floating-point values `a' and `b', one of -| which is a NaN, and returns the appropriate NaN result. If either `a' or -| `b' is a signaling NaN, the invalid exception is raised. -*----------------------------------------------------------------------------*/ - -static float128 propagateFloat128NaN( float128 a, float128 b ) -{ - flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN; - - aIsNaN = float128_is_nan( a ); - aIsSignalingNaN = float128_is_signaling_nan( a ); - bIsNaN = float128_is_nan( b ); - bIsSignalingNaN = float128_is_signaling_nan( b ); - a.high |= LIT64( 0x0000800000000000 ); - b.high |= LIT64( 0x0000800000000000 ); - if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid ); - if ( aIsNaN ) { - return ( aIsSignalingNaN & bIsNaN ) ? b : a; - } - else { - return b; - } - -} - -#endif - diff --git a/softfloat/softfloat-specialize.h b/softfloat/softfloat-specialize.h new file mode 100644 index 0000000..7d59012 --- /dev/null +++ b/softfloat/softfloat-specialize.h @@ -0,0 +1,438 @@ +/* + * QEMU float support + * + * The code in this source file is derived from release 2a of the SoftFloat + * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and + * some later contributions) are provided under that license, as detailed below. + * It has subsequently been modified by contributors to the QEMU Project, + * so some portions are provided under: + * the SoftFloat-2a license + * the BSD license + * GPL-v2-or-later + * + * Any future contributions to this file after December 1st 2014 will be + * taken to be licensed under the Softfloat-2a license unless specifically + * indicated otherwise. + */ + +/* +=============================================================================== +This C source fragment is part of the SoftFloat IEC/IEEE Floating-point +Arithmetic Package, Release 2a. + +Written by John R. Hauser. This work was made possible in part by the +International Computer Science Institute, located at Suite 600, 1947 Center +Street, Berkeley, California 94704. Funding was partially provided by the +National Science Foundation under grant MIP-9311980. The original version +of this code was written as part of a project to build a fixed-point vector +processor in collaboration with the University of California at Berkeley, +overseen by Profs. Nelson Morgan and John Wawrzynek. More information +is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ +arithmetic/SoftFloat.html'. + +THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort +has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT +TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO +PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY +AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. + +Derivative works are acceptable, even for commercial purposes, so long as +(1) they include prominent notice that the work is derivative, and (2) they +include prominent notice akin to these four paragraphs for those parts of +this code that are retained. + +=============================================================================== +*/ + +/* BSD licensing: + * Copyright (c) 2006, Fabrice Bellard + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its contributors + * may be used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* Portions of this work are licensed under the terms of the GNU GPL, + * version 2 or later. See the COPYING file in the top-level directory. + */ + +/*---------------------------------------------------------------------------- +| Returns 1 if the extended double-precision floating-point value `a' is a +| NaN; otherwise returns 0. +*----------------------------------------------------------------------------*/ + +flag floatx80_is_nan( floatx80 a ); + +/*---------------------------------------------------------------------------- +| The pattern for a default generated extended double-precision NaN. +*----------------------------------------------------------------------------*/ +static inline floatx80 floatx80_default_nan(float_status *status) +{ + (void)status; + floatx80 r; + r.high = 0x7FFF; + r.low = LIT64( 0xFFFFFFFFFFFFFFFF ); + return r; +} + +/*---------------------------------------------------------------------------- +| Raises the exceptions specified by `flags'. Floating-point traps can be +| defined here if desired. It is currently not possible for such a trap +| to substitute a result value. If traps are not implemented, this routine +| should be simply `float_exception_flags |= flags;'. +*----------------------------------------------------------------------------*/ + +void float_raise(uint8_t flags, float_status *status); + +/*---------------------------------------------------------------------------- +| Internal canonical NaN format. +*----------------------------------------------------------------------------*/ +typedef struct { + flag sign; + uint64_t high, low; +} commonNaNT; + +/*---------------------------------------------------------------------------- +| Returns 1 if the single-precision floating-point value `a' is a NaN; +| otherwise returns 0. +*----------------------------------------------------------------------------*/ + +static inline flag float32_is_nan( float32 a ) +{ + + return ( 0xFF000000 < (uint32_t) ( a<<1 ) ); + +} + +/*---------------------------------------------------------------------------- +| Returns 1 if the single-precision floating-point value `a' is a signaling +| NaN; otherwise returns 0. +*----------------------------------------------------------------------------*/ + +static inline flag float32_is_signaling_nan( float32 a ) +{ + + return ( ( ( a>>22 ) & 0x1FF ) == 0x1FE ) && ( a & 0x003FFFFF ); + +} + +/*---------------------------------------------------------------------------- +| Returns the result of converting the single-precision floating-point NaN +| `a' to the canonical NaN format. If `a' is a signaling NaN, the invalid +| exception is raised. +*----------------------------------------------------------------------------*/ + +static inline commonNaNT float32ToCommonNaN( float32 a, float_status *status ) +{ + commonNaNT z; + + if ( float32_is_signaling_nan( a ) ) float_raise( float_flag_signaling, status ); + z.sign = a>>31; + z.low = 0; + z.high = ( (uint64_t) a )<<41; + return z; + +} + +/*---------------------------------------------------------------------------- +| Returns the result of converting the canonical NaN `a' to the single- +| precision floating-point format. +*----------------------------------------------------------------------------*/ + +static inline float32 commonNaNToFloat32( commonNaNT a ) +{ + + return ( ( (uint32_t) a.sign )<<31 ) | 0x7FC00000 | ( a.high>>41 ); + +} + +/*---------------------------------------------------------------------------- +| Takes two single-precision floating-point values `a' and `b', one of which +| is a NaN, and returns the appropriate NaN result. If either `a' or `b' is a +| signaling NaN, the invalid exception is raised. +*----------------------------------------------------------------------------*/ + +static inline float32 propagateFloat32NaN( float32 a, float32 b, float_status *status ) +{ + flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN; + + aIsNaN = float32_is_nan( a ); + aIsSignalingNaN = float32_is_signaling_nan( a ); + bIsNaN = float32_is_nan( b ); + bIsSignalingNaN = float32_is_signaling_nan( b ); + a |= 0x00400000; + b |= 0x00400000; + if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_signaling, status ); + if ( aIsNaN ) { + return ( aIsSignalingNaN & bIsNaN ) ? b : a; + } + else { + return b; + } + +} + +/*---------------------------------------------------------------------------- +| Returns 1 if the double-precision floating-point value `a' is a NaN; +| otherwise returns 0. +*----------------------------------------------------------------------------*/ + +static inline flag float64_is_nan( float64 a ) +{ + + return ( LIT64( 0xFFE0000000000000 ) < (uint64_t) ( a<<1 ) ); + +} + +/*---------------------------------------------------------------------------- +| Returns 1 if the double-precision floating-point value `a' is a signaling +| NaN; otherwise returns 0. +*----------------------------------------------------------------------------*/ + +static inline flag float64_is_signaling_nan( float64 a ) +{ + + return + ( ( ( a>>51 ) & 0xFFF ) == 0xFFE ) + && ( a & LIT64( 0x0007FFFFFFFFFFFF ) ); + +} + +/*---------------------------------------------------------------------------- +| Returns the result of converting the double-precision floating-point NaN +| `a' to the canonical NaN format. If `a' is a signaling NaN, the invalid +| exception is raised. +*----------------------------------------------------------------------------*/ + +static inline commonNaNT float64ToCommonNaN(float64 a, float_status *status) +{ + commonNaNT z; + + if (float64_is_signaling_nan(a)) { + float_raise(float_flag_invalid, status); + } + z.sign = float64_val(a) >> 63; + z.low = 0; + z.high = float64_val(a) << 12; + return z; +} + +/*---------------------------------------------------------------------------- +| Returns the result of converting the canonical NaN `a' to the double- +| precision floating-point format. +*----------------------------------------------------------------------------*/ + +static inline float64 commonNaNToFloat64(commonNaNT a, float_status *status) +{ + (void)status; + return + ( ( (uint64_t) a.sign )<<63 ) + | LIT64( 0x7FF8000000000000 ) + | ( a.high>>12 ); +} + +/*---------------------------------------------------------------------------- +| Returns 1 if the extended double-precision floating-point value `a' is a +| signaling NaN; otherwise returns 0. +*----------------------------------------------------------------------------*/ + +static inline flag floatx80_is_signaling_nan( floatx80 a ) +{ + uint64_t aLow; + + aLow = a.low & ~ LIT64( 0x4000000000000000 ); + return + ( ( a.high & 0x7FFF ) == 0x7FFF ) + && (uint64_t) ( aLow<<1 ) + && ( a.low == aLow ); + +} + +/*---------------------------------------------------------------------------- +| Returns the result of converting the extended double-precision floating- +| point NaN `a' to the canonical NaN format. If `a' is a signaling NaN, the +| invalid exception is raised. +*----------------------------------------------------------------------------*/ + +static inline commonNaNT floatx80ToCommonNaN( floatx80 a, float_status *status ) +{ + commonNaNT z; + + if ( floatx80_is_signaling_nan( a ) ) float_raise( float_flag_signaling, status ); + z.sign = a.high>>15; + z.low = 0; + z.high = a.low<<1; + return z; + +} + +/*---------------------------------------------------------------------------- +| Returns the result of converting the canonical NaN `a' to the extended +| double-precision floating-point format. +*----------------------------------------------------------------------------*/ + +static inline floatx80 commonNaNToFloatx80(commonNaNT a, float_status *status) +{ + (void)status; + floatx80 z; +#ifdef SOFTFLOAT_68K + z.low = LIT64( 0x4000000000000000 ) | ( a.high>>1 ); +#else + z.low = LIT64( 0xC000000000000000 ) | ( a.high>>1 ); +#endif + z.high = ( ( (int16_t) a.sign )<<15 ) | 0x7FFF; + return z; +} + +/*---------------------------------------------------------------------------- +| Takes two extended double-precision floating-point values `a' and `b', one +| of which is a NaN, and returns the appropriate NaN result. If either `a' or +| `b' is a signaling NaN, the invalid exception is raised. +*----------------------------------------------------------------------------*/ + +static inline floatx80 propagateFloatx80NaN( floatx80 a, floatx80 b, float_status *status ) +{ + flag aIsNaN, aIsSignalingNaN, bIsSignalingNaN; +#ifndef SOFTFLOAT_68K + flag bIsNaN; +#endif + + aIsNaN = floatx80_is_nan( a ); + aIsSignalingNaN = floatx80_is_signaling_nan( a ); + bIsSignalingNaN = floatx80_is_signaling_nan( b ); +#ifdef SOFTFLOAT_68K + a.low |= LIT64( 0x4000000000000000 ); + b.low |= LIT64( 0x4000000000000000 ); + if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_signaling, status ); + return aIsNaN ? a : b; +#else + bIsNaN = floatx80_is_nan( b ); + a.low |= LIT64( 0xC000000000000000 ); + b.low |= LIT64( 0xC000000000000000 ); + if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_signaling, status ); + if ( aIsNaN ) { + return ( aIsSignalingNaN & bIsNaN ) ? b : a; + } + else { + return b; + } +#endif + +} + +#ifdef SOFTFLOAT_68K +/*---------------------------------------------------------------------------- + | Takes extended double-precision floating-point NaN `a' and returns the + | appropriate NaN result. If `a' is a signaling NaN, the invalid exception + | is raised. + *----------------------------------------------------------------------------*/ + +static inline floatx80 propagateFloatx80NaNOneArg(floatx80 a, float_status *status) +{ + if ( floatx80_is_signaling_nan( a ) ) + float_raise( float_flag_signaling, status ); + a.low |= LIT64( 0x4000000000000000 ); + + return a; +} +#endif + +// 28-12-2016: Added for Previous: + +/*---------------------------------------------------------------------------- + | Returns 1 if the extended double-precision floating-point value `a' is + | zero; otherwise returns 0. + *----------------------------------------------------------------------------*/ + +static inline flag floatx80_is_zero( floatx80 a ) +{ + + return ( ( a.high & 0x7FFF ) < 0x7FFF ) && ( a.low == 0 ); + +} + +/*---------------------------------------------------------------------------- + | Returns 1 if the extended double-precision floating-point value `a' is + | infinity; otherwise returns 0. + *----------------------------------------------------------------------------*/ + +static inline flag floatx80_is_infinity( floatx80 a ) +{ + + return ( ( a.high & 0x7FFF ) == 0x7FFF ) && ( (uint64_t) ( a.low<<1 ) == 0 ); + +} + +/*---------------------------------------------------------------------------- + | Returns 1 if the extended double-precision floating-point value `a' is + | negative; otherwise returns 0. + *----------------------------------------------------------------------------*/ + +static inline flag floatx80_is_negative( floatx80 a ) +{ + + return ( ( a.high & 0x8000 ) == 0x8000 ); + +} + +/*---------------------------------------------------------------------------- + | Returns 1 if the extended double-precision floating-point value `a' is + | unnormal; otherwise returns 0. + *----------------------------------------------------------------------------*/ +static inline flag floatx80_is_unnormal( floatx80 a ) +{ + return + ( ( a.high & 0x7FFF ) > 0 ) + && ( ( a.high & 0x7FFF ) < 0x7FFF) + && ( (uint64_t) ( a.low & LIT64( 0x8000000000000000 ) ) == LIT64( 0x0000000000000000 ) ); +} + +/*---------------------------------------------------------------------------- + | Returns 1 if the extended double-precision floating-point value `a' is + | denormal; otherwise returns 0. + *----------------------------------------------------------------------------*/ + +static inline flag floatx80_is_denormal( floatx80 a ) +{ + return + ( ( a.high & 0x7FFF ) == 0 ) + && ( (uint64_t) ( a.low & LIT64( 0x8000000000000000 ) ) == LIT64( 0x0000000000000000 ) ) + && (uint64_t) ( a.low<<1 ); +} + +/*---------------------------------------------------------------------------- + | Returns 1 if the extended double-precision floating-point value `a' is + | normal; otherwise returns 0. + *----------------------------------------------------------------------------*/ + +static inline flag floatx80_is_normal( floatx80 a ) +{ + return + ( ( a.high & 0x7FFF ) < 0x7FFF ) + && ( (uint64_t) ( a.low & LIT64( 0x8000000000000000 ) ) == LIT64( 0x8000000000000000 ) ); +} +// End of addition for Previous + diff --git a/softfloat/softfloat.c b/softfloat/softfloat.c index d561daa..7bb5983 100644 --- a/softfloat/softfloat.c +++ b/softfloat/softfloat.c @@ -1,8 +1,32 @@ -/*============================================================================ - -This C source file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic -Package, Release 2b. +#define SOFTFLOAT_68K + +#include +#include +#include "softfloat/softfloat.h" + + +/* + * QEMU float support + * + * The code in this source file is derived from release 2a of the SoftFloat + * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and + * some later contributions) are provided under that license, as detailed below. + * It has subsequently been modified by contributors to the QEMU Project, + * so some portions are provided under: + * the SoftFloat-2a license + * the BSD license + * GPL-v2-or-later + * + * Any future contributions to this file after December 1st 2014 will be + * taken to be licensed under the Softfloat-2a license unless specifically + * indicated otherwise. + */ + +/* +=============================================================================== +This C source file is part of the SoftFloat IEC/IEEE Floating-point +Arithmetic Package, Release 2a. Written by John R. Hauser. This work was made possible in part by the International Computer Science Institute, located at Suite 600, 1947 Center @@ -11,37 +35,420 @@ National Science Foundation under grant MIP-9311980. The original version of this code was written as part of a project to build a fixed-point vector processor in collaboration with the University of California at Berkeley, overseen by Profs. Nelson Morgan and John Wawrzynek. More information -is available through the Web page `http://www.cs.berkeley.edu/~jhauser/ +is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/ arithmetic/SoftFloat.html'. -THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has -been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES -RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS -AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES, -COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE -EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE -INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR -OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE. +THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort +has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT +TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO +PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY +AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE. Derivative works are acceptable, even for commercial purposes, so long as -(1) the source code for the derivative work includes prominent notice that -the work is derivative, and (2) the source code includes prominent notice with -these four paragraphs for those parts of this code that are retained. - -=============================================================================*/ - -#include "m68kcpu.h" // which includes softfloat.h after defining the basic types - -/*---------------------------------------------------------------------------- -| Floating-point rounding mode, extended double-precision rounding precision, -| and exception flags. -*----------------------------------------------------------------------------*/ -int8 float_exception_flags = 0; -#ifdef FLOATX80 -int8 floatx80_rounding_precision = 80; -#endif - -int8 float_rounding_mode = float_round_nearest_even; +(1) they include prominent notice that the work is derivative, and (2) they +include prominent notice akin to these four paragraphs for those parts of +this code that are retained. + +=============================================================================== +*/ + +/* BSD licensing: + * Copyright (c) 2006, Fabrice Bellard + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its contributors + * may be used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF + * THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* Portions of this work are licensed under the terms of the GNU GPL, + * version 2 or later. See the COPYING file in the top-level directory. + */ + +/* We only need stdlib for abort() */ + +/*---------------------------------------------------------------------------- +| Primitive arithmetic functions, including multi-word arithmetic, and +| division and square root approximations. (Can be specialized to target if +| desired.) +*----------------------------------------------------------------------------*/ +#include "softfloat-macros.h" + +/*---------------------------------------------------------------------------- + | Variables for storing sign, exponent and significand of internal extended + | double-precision floating-point value for external use. + *----------------------------------------------------------------------------*/ +flag floatx80_internal_sign = 0; +int32_t floatx80_internal_exp = 0; +uint64_t floatx80_internal_sig = 0; +int32_t floatx80_internal_exp0 = 0; +uint64_t floatx80_internal_sig0 = 0; +uint64_t floatx80_internal_sig1 = 0; +int8_t floatx80_internal_precision = 80; +int8_t floatx80_internal_mode = float_round_nearest_even; + +/*---------------------------------------------------------------------------- + | Functions for storing sign, exponent and significand of extended + | double-precision floating-point intermediate result for external use. + *----------------------------------------------------------------------------*/ +floatx80 roundSaveFloatx80Internal( int8_t roundingPrecision, flag zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1, float_status *status ) +{ + uint64_t roundIncrement, roundMask, roundBits; + flag increment; + + if ( roundingPrecision == 80 ) { + goto precision80; + } else if ( roundingPrecision == 64 ) { + roundIncrement = LIT64( 0x0000000000000400 ); + roundMask = LIT64( 0x00000000000007FF ); + } else if ( roundingPrecision == 32 ) { + roundIncrement = LIT64( 0x0000008000000000 ); + roundMask = LIT64( 0x000000FFFFFFFFFF ); + } else { + goto precision80; + } + + zSig0 |= ( zSig1 != 0 ); + if ( status->float_rounding_mode != float_round_nearest_even ) { + if ( status->float_rounding_mode == float_round_to_zero ) { + roundIncrement = 0; + } else { + roundIncrement = roundMask; + if ( zSign ) { + if ( status->float_rounding_mode == float_round_up ) roundIncrement = 0; + } else { + if ( status->float_rounding_mode == float_round_down ) roundIncrement = 0; + } + } + } + + roundBits = zSig0 & roundMask; + + zSig0 += roundIncrement; + if ( zSig0 < roundIncrement ) { + ++zExp; + zSig0 = LIT64( 0x8000000000000000 ); + } + roundIncrement = roundMask + 1; + if ( status->float_rounding_mode == float_round_nearest_even && ( roundBits<<1 == roundIncrement ) ) { + roundMask |= roundIncrement; + } + zSig0 &= ~ roundMask; + if ( zSig0 == 0 ) zExp = 0; + return packFloatx80( zSign, zExp, zSig0 ); + +precision80: + increment = ( (int64_t) zSig1 < 0 ); + if ( status->float_rounding_mode != float_round_nearest_even ) { + if ( status->float_rounding_mode == float_round_to_zero ) { + increment = 0; + } else { + if ( zSign ) { + increment = ( status->float_rounding_mode == float_round_down ) && zSig1; + } else { + increment = ( status->float_rounding_mode == float_round_up ) && zSig1; + } + } + } + if ( increment ) { + ++zSig0; + if ( zSig0 == 0 ) { + ++zExp; + zSig0 = LIT64( 0x8000000000000000 ); + } else { + if ((zSig1 << 1) == 0 && status->float_rounding_mode == float_round_nearest_even) + zSig0 &= ~1; + } + } else { + if ( zSig0 == 0 ) zExp = 0; + } + return packFloatx80( zSign, zExp, zSig0 ); +} + +static void saveFloatx80Internal( int8_t prec, flag zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1, float_status *status ) +{ + floatx80_internal_sign = zSign; + floatx80_internal_exp = zExp; + floatx80_internal_sig0 = zSig0; + floatx80_internal_sig1 = zSig1; + floatx80_internal_precision = prec; + floatx80_internal_mode = status->float_rounding_mode; +} + +static void saveFloat64Internal( flag zSign, int16_t zExp, uint64_t zSig, float_status *status ) +{ + floatx80_internal_sign = zSign; + floatx80_internal_exp = zExp + 0x3C01; + floatx80_internal_sig0 = zSig<<1; + floatx80_internal_sig1 = 0; + floatx80_internal_precision = 64; + floatx80_internal_mode = status->float_rounding_mode; +} + +static void saveFloat32Internal( flag zSign, int16_t zExp, uint32_t zSig, float_status *status ) +{ + floatx80 z = roundSaveFloatx80Internal( 32, zSign, zExp + 0x3F81, ( (uint64_t) zSig )<<33, 0, status ); + + floatx80_internal_sign = zSign; + floatx80_internal_exp = extractFloatx80Exp( z ); + floatx80_internal_sig = extractFloatx80Frac( z ); + floatx80_internal_exp0 = zExp + 0x3F81; + floatx80_internal_sig0 = ( (uint64_t) zSig )<<33; + floatx80_internal_sig1 = 0; +} + +/*---------------------------------------------------------------------------- + | Functions for returning sign, exponent and significand of extended + | double-precision floating-point intermediate result for external use. + *----------------------------------------------------------------------------*/ + +void getRoundedFloatInternal( int8_t roundingPrecision, flag *pzSign, int32_t *pzExp, uint64_t *pzSig ) +{ + uint64_t roundIncrement, roundMask, roundBits; + flag increment; + + flag zSign = floatx80_internal_sign; + int32_t zExp = floatx80_internal_exp; + uint64_t zSig0 = floatx80_internal_sig0; + uint64_t zSig1 = floatx80_internal_sig1; + + if ( roundingPrecision == 80 ) { + goto precision80; + } else if ( roundingPrecision == 64 ) { + roundIncrement = LIT64( 0x0000000000000400 ); + roundMask = LIT64( 0x00000000000007FF ); + } else if ( roundingPrecision == 32 ) { + roundIncrement = LIT64( 0x0000008000000000 ); + roundMask = LIT64( 0x000000FFFFFFFFFF ); + } else { + goto precision80; + } + + zSig0 |= ( zSig1 != 0 ); + if ( floatx80_internal_mode != float_round_nearest_even ) { + if ( floatx80_internal_mode == float_round_to_zero ) { + roundIncrement = 0; + } else { + roundIncrement = roundMask; + if ( zSign ) { + if ( floatx80_internal_mode == float_round_up ) roundIncrement = 0; + } else { + if ( floatx80_internal_mode == float_round_down ) roundIncrement = 0; + } + } + } + + roundBits = zSig0 & roundMask; + + zSig0 += roundIncrement; + if ( zSig0 < roundIncrement ) { + ++zExp; + zSig0 = LIT64( 0x8000000000000000 ); + } + roundIncrement = roundMask + 1; + if ( floatx80_internal_mode == float_round_nearest_even && ( roundBits<<1 == roundIncrement ) ) { + roundMask |= roundIncrement; + } + zSig0 &= ~ roundMask; + if ( zSig0 == 0 ) zExp = 0; + + *pzSign = zSign; + *pzExp = zExp; + *pzSig = zSig0; + return; + +precision80: + increment = ( (int64_t) zSig1 < 0 ); + if ( floatx80_internal_mode != float_round_nearest_even ) { + if ( floatx80_internal_mode == float_round_to_zero ) { + increment = 0; + } else { + if ( zSign ) { + increment = ( floatx80_internal_mode == float_round_down ) && zSig1; + } else { + increment = ( floatx80_internal_mode == float_round_up ) && zSig1; + } + } + } + if ( increment ) { + ++zSig0; + if ( zSig0 == 0 ) { + ++zExp; + zSig0 = LIT64( 0x8000000000000000 ); + } else { + if ((zSig1 << 1) == 0 && floatx80_internal_mode == float_round_nearest_even) + zSig0 &= ~1; + } + } else { + if ( zSig0 == 0 ) zExp = 0; + } + + *pzSign = zSign; + *pzExp = zExp; + *pzSig = zSig0; +} + +floatx80 getFloatInternalOverflow( void ) +{ + flag zSign; + int32_t zExp; + uint64_t zSig; + + getRoundedFloatInternal( floatx80_internal_precision, &zSign, &zExp, &zSig ); + + if (zExp > (0x7fff + 0x6000)) { // catastrophic + zExp = 0; + } else { + zExp -= 0x6000; + } + + return packFloatx80( zSign, zExp, zSig ); + +} + +floatx80 getFloatInternalUnderflow( void ) +{ + flag zSign; + int32_t zExp; + uint64_t zSig; + + getRoundedFloatInternal( floatx80_internal_precision, &zSign, &zExp, &zSig ); + + if (zExp < (0x0000 - 0x6000)) { // catastrophic + zExp = 0; + } else { + zExp += 0x6000; + } + + return packFloatx80( zSign, zExp, zSig ); + +} + +floatx80 getFloatInternalRoundedAll( void ) +{ + flag zSign; + int32_t zExp; + uint64_t zSig, zSig32, zSig64, zSig80; + + if (floatx80_internal_precision == 80) { + getRoundedFloatInternal( 80, &zSign, &zExp, &zSig80 ); + zSig = zSig80; + } else if (floatx80_internal_precision == 64) { + getRoundedFloatInternal( 80, &zSign, &zExp, &zSig80 ); + getRoundedFloatInternal( 64, &zSign, &zExp, &zSig64 ); + zSig = zSig64; + zSig |= zSig80 & LIT64( 0x00000000000007FF ); + } else { + getRoundedFloatInternal( 80, &zSign, &zExp, &zSig80 ); + getRoundedFloatInternal( 64, &zSign, &zExp, &zSig64 ); + getRoundedFloatInternal( 32, &zSign, &zExp, &zSig32 ); + zSig = zSig32; + zSig |= zSig64 & LIT64( 0x000000FFFFFFFFFF ); + zSig |= zSig80 & LIT64( 0x00000000000007FF ); + } + + return packFloatx80( zSign, zExp & 0x7FFF, zSig ); + +} + +floatx80 getFloatInternalRoundedSome( void ) +{ + flag zSign; + int32_t zExp; + uint64_t zSig, zSig32, zSig64, zSig80; + + if (floatx80_internal_precision == 80) { + getRoundedFloatInternal( 80, &zSign, &zExp, &zSig80 ); + zSig = zSig80; + } else if (floatx80_internal_precision == 64) { + getRoundedFloatInternal( 64, &zSign, &zExp, &zSig64 ); + zSig80 = floatx80_internal_sig0; + if (zSig64 != (zSig80 & LIT64( 0xFFFFFFFFFFFFF800 ))) { + zSig80++; + } + zSig = zSig64; + zSig |= zSig80 & LIT64( 0x00000000000007FF ); + } else { + getRoundedFloatInternal( 32, &zSign, &zExp, &zSig32 ); + zSig80 = floatx80_internal_sig0; + if (zSig32 != (zSig80 & LIT64( 0xFFFFFF0000000000 ))) { + zSig80++; + } + zSig = zSig32; + zSig |= zSig80 & LIT64( 0x000000FFFFFFFFFF ); + } + + return packFloatx80( zSign, zExp & 0x7FFF, zSig ); + +} + +floatx80 getFloatInternalFloatx80( void ) +{ + flag zSign; + int32_t zExp; + uint64_t zSig; + + getRoundedFloatInternal( 80, &zSign, &zExp, &zSig ); + + return packFloatx80( zSign, zExp & 0x7FFF, zSig ); + +} + +floatx80 getFloatInternalUnrounded( void ) +{ + flag zSign = floatx80_internal_sign; + int32_t zExp = floatx80_internal_exp; + uint64_t zSig = floatx80_internal_sig0; + + return packFloatx80( zSign, zExp & 0x7FFF, zSig ); + +} + +uint64_t getFloatInternalGRS( void ) +{ +#if 1 + if (floatx80_internal_sig1) + return 5; + + if (floatx80_internal_precision == 64 && + floatx80_internal_sig0 & LIT64( 0x00000000000007FF )) { + return 1; + } + if (floatx80_internal_precision == 32 && + floatx80_internal_sig0 & LIT64( 0x000000FFFFFFFFFF )) { + return 1; + } + + return 0; +#else + uint64_t roundbits; + shift64RightJamming(floatx80_internal_sig1, 61, &roundbits); + + return roundbits; +#endif +} /*---------------------------------------------------------------------------- | Functions and definitions to determine: (1) whether tininess for underflow @@ -51,7 +458,20 @@ int8 float_rounding_mode = float_round_nearest_even; | are propagated from function inputs to output. These details are target- | specific. *----------------------------------------------------------------------------*/ -#include "softfloat-specialize" +#include "softfloat-specialize.h" + +/*---------------------------------------------------------------------------- +| Raises the exceptions specified by `flags'. Floating-point traps can be +| defined here if desired. It is currently not possible for such a trap +| to substitute a result value. If traps are not implemented, this routine +| should be simply `float_exception_flags |= flags;'. +*----------------------------------------------------------------------------*/ + +void float_raise(uint8_t flags, float_status *status) +{ + status->float_exception_flags |= flags; +} + /*---------------------------------------------------------------------------- | Takes a 64-bit fixed-point value `absZ' with binary point between bits 6 @@ -64,43 +484,128 @@ int8 float_rounding_mode = float_round_nearest_even; | positive or negative integer is returned. *----------------------------------------------------------------------------*/ -static int32 roundAndPackInt32( flag zSign, bits64 absZ ) -{ - int8 roundingMode; - flag roundNearestEven; - int8 roundIncrement, roundBits; - int32 z; - - roundingMode = float_rounding_mode; - roundNearestEven = ( roundingMode == float_round_nearest_even ); - roundIncrement = 0x40; - if ( ! roundNearestEven ) { - if ( roundingMode == float_round_to_zero ) { - roundIncrement = 0; - } - else { - roundIncrement = 0x7F; - if ( zSign ) { - if ( roundingMode == float_round_up ) roundIncrement = 0; - } - else { - if ( roundingMode == float_round_down ) roundIncrement = 0; - } - } - } - roundBits = absZ & 0x7F; - absZ = ( absZ + roundIncrement )>>7; - absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven ); - z = absZ; - if ( zSign ) z = - z; - if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) { - float_raise( float_flag_invalid ); - return zSign ? (sbits32) 0x80000000 : 0x7FFFFFFF; - } - if ( roundBits ) float_exception_flags |= float_flag_inexact; - return z; - -} +static int32_t roundAndPackInt32(flag zSign, uint64_t absZ, float_status *status) +{ + int8_t roundingMode; + flag roundNearestEven; + int8_t roundIncrement, roundBits; + int32_t z; + + roundingMode = status->float_rounding_mode; + roundNearestEven = ( roundingMode == float_round_nearest_even ); + switch (roundingMode) { + case float_round_nearest_even: + case float_round_ties_away: + roundIncrement = 0x40; + break; + case float_round_to_zero: + roundIncrement = 0; + break; + case float_round_up: + roundIncrement = zSign ? 0 : 0x7f; + break; + case float_round_down: + roundIncrement = zSign ? 0x7f : 0; + break; + default: + abort(); + } + roundBits = absZ & 0x7F; + absZ = ( absZ + roundIncrement )>>7; + absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven ); + z = absZ; + if ( zSign ) z = - z; + if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) { + float_raise(float_flag_invalid, status); + return zSign ? (int32_t) 0x80000000 : 0x7FFFFFFF; + } + if (roundBits) { + status->float_exception_flags |= float_flag_inexact; + } + return z; + +} + + +#ifdef SOFTFLOAT_68K // 30-01-2017: Added for Previous +static int16_t roundAndPackInt16( flag zSign, uint64_t absZ, float_status *status ) +{ + int8_t roundingMode; + flag roundNearestEven; + int8_t roundIncrement, roundBits; + int16_t z; + + roundingMode = status->float_rounding_mode; + roundNearestEven = ( roundingMode == float_round_nearest_even ); + roundIncrement = 0x40; + if ( ! roundNearestEven ) { + if ( roundingMode == float_round_to_zero ) { + roundIncrement = 0; + } + else { + roundIncrement = 0x7F; + if ( zSign ) { + if ( roundingMode == float_round_up ) roundIncrement = 0; + } + else { + if ( roundingMode == float_round_down ) roundIncrement = 0; + } + } + } + roundBits = absZ & 0x7F; + absZ = ( absZ + roundIncrement )>>7; + absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven ); + z = absZ; + if ( zSign ) z = - z; + z = (int16_t) z; + if ( ( absZ>>16 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) { + float_raise( float_flag_invalid, status ); + return zSign ? (int16_t) 0x8000 : 0x7FFF; + } + if ( roundBits ) status->float_exception_flags |= float_flag_inexact; + return z; + +} + +static int8_t roundAndPackInt8( flag zSign, uint64_t absZ, float_status *status ) +{ + int8_t roundingMode; + flag roundNearestEven; + int8_t roundIncrement, roundBits; + int8_t z; + + roundingMode = status->float_rounding_mode; + roundNearestEven = ( roundingMode == float_round_nearest_even ); + roundIncrement = 0x40; + if ( ! roundNearestEven ) { + if ( roundingMode == float_round_to_zero ) { + roundIncrement = 0; + } + else { + roundIncrement = 0x7F; + if ( zSign ) { + if ( roundingMode == float_round_up ) roundIncrement = 0; + } + else { + if ( roundingMode == float_round_down ) roundIncrement = 0; + } + } + } + roundBits = absZ & 0x7F; + absZ = ( absZ + roundIncrement )>>7; + absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven ); + z = absZ; + if ( zSign ) z = - z; + z = (int8_t) z; + if ( ( absZ>>8 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) { + float_raise( float_flag_invalid, status ); + return zSign ? (int8_t) 0x80 : 0x7F; + } + if ( roundBits ) status->float_exception_flags |= float_flag_inexact; + return z; + +} +#endif // End of addition for Previous /*---------------------------------------------------------------------------- | Takes the 128-bit fixed-point value formed by concatenating `absZ0' and @@ -114,44 +619,50 @@ static int32 roundAndPackInt32( flag zSign, bits64 absZ ) | returned. *----------------------------------------------------------------------------*/ -static int64 roundAndPackInt64( flag zSign, bits64 absZ0, bits64 absZ1 ) -{ - int8 roundingMode; - flag roundNearestEven, increment; - int64 z; - - roundingMode = float_rounding_mode; - roundNearestEven = ( roundingMode == float_round_nearest_even ); - increment = ( (sbits64) absZ1 < 0 ); - if ( ! roundNearestEven ) { - if ( roundingMode == float_round_to_zero ) { - increment = 0; - } - else { - if ( zSign ) { - increment = ( roundingMode == float_round_down ) && absZ1; - } - else { - increment = ( roundingMode == float_round_up ) && absZ1; - } - } - } - if ( increment ) { - ++absZ0; - if ( absZ0 == 0 ) goto overflow; - absZ0 &= ~ ( ( (bits64) ( absZ1<<1 ) == 0 ) & roundNearestEven ); - } - z = absZ0; - if ( zSign ) z = - z; - if ( z && ( ( z < 0 ) ^ zSign ) ) { - overflow: - float_raise( float_flag_invalid ); - return - zSign ? (sbits64) LIT64( 0x8000000000000000 ) - : (sbits64) LIT64( 0x7FFFFFFFFFFFFFFF ); - } - if ( absZ1 ) float_exception_flags |= float_flag_inexact; - return z; +static int64_t roundAndPackInt64(flag zSign, uint64_t absZ0, uint64_t absZ1, + float_status *status) +{ + int8_t roundingMode; + flag roundNearestEven, increment; + int64_t z; + + roundingMode = status->float_rounding_mode; + roundNearestEven = ( roundingMode == float_round_nearest_even ); + switch (roundingMode) { + case float_round_nearest_even: + case float_round_ties_away: + increment = ((int64_t) absZ1 < 0); + break; + case float_round_to_zero: + increment = 0; + break; + case float_round_up: + increment = !zSign && absZ1; + break; + case float_round_down: + increment = zSign && absZ1; + break; + default: + abort(); + } + if ( increment ) { + ++absZ0; + if ( absZ0 == 0 ) goto overflow; + absZ0 &= ~ ( ( (uint64_t) ( absZ1<<1 ) == 0 ) & roundNearestEven ); + } + z = absZ0; + if ( zSign ) z = - z; + if ( z && ( ( z < 0 ) ^ zSign ) ) { + overflow: + float_raise(float_flag_invalid, status); + return + zSign ? (uint64_t) LIT64( 0x8000000000000000 ) + : LIT64( 0x7FFFFFFFFFFFFFFF ); + } + if (absZ1) { + status->float_exception_flags |= float_flag_inexact; + } + return z; } @@ -159,9 +670,10 @@ static int64 roundAndPackInt64( flag zSign, bits64 absZ0, bits64 absZ1 ) | Returns the fraction bits of the single-precision floating-point value `a'. *----------------------------------------------------------------------------*/ -static inline bits32 extractFloat32Frac( float32 a ) +static inline uint32_t extractFloat32Frac( float32 a ) { - return a & 0x007FFFFF; + + return float32_val(a) & 0x007FFFFF; } @@ -169,9 +681,10 @@ static inline bits32 extractFloat32Frac( float32 a ) | Returns the exponent bits of the single-precision floating-point value `a'. *----------------------------------------------------------------------------*/ -static inline int16 extractFloat32Exp( float32 a ) +static inline int extractFloat32Exp(float32 a) { - return ( a>>23 ) & 0xFF; + + return ( float32_val(a)>>23 ) & 0xFF; } @@ -181,7 +694,8 @@ static inline int16 extractFloat32Exp( float32 a ) static inline flag extractFloat32Sign( float32 a ) { - return a>>31; + + return float32_val(a)>>31; } @@ -193,13 +707,13 @@ static inline flag extractFloat32Sign( float32 a ) *----------------------------------------------------------------------------*/ static void - normalizeFloat32Subnormal( bits32 aSig, int16 *zExpPtr, bits32 *zSigPtr ) + normalizeFloat32Subnormal(uint32_t aSig, int *zExpPtr, uint32_t *zSigPtr) { - int8 shiftCount; + int8_t shiftCount; - shiftCount = countLeadingZeros32( aSig ) - 8; - *zSigPtr = aSig<>7; - zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven ); - if ( zSig == 0 ) zExp = 0; - return packFloat32( zSign, zExp, zSig ); +static float32 roundAndPackFloat32(flag zSign, int zExp, uint32_t zSig, + float_status *status) +{ + int8_t roundingMode; + flag roundNearestEven; + int8_t roundIncrement, roundBits; + flag isTiny; + + roundingMode = status->float_rounding_mode; + roundNearestEven = ( roundingMode == float_round_nearest_even ); + switch (roundingMode) { + case float_round_nearest_even: + case float_round_ties_away: + roundIncrement = 0x40; + break; + case float_round_to_zero: + roundIncrement = 0; + break; + case float_round_up: + roundIncrement = zSign ? 0 : 0x7f; + break; + case float_round_down: + roundIncrement = zSign ? 0x7f : 0; + break; + default: + abort(); + break; + } + roundBits = zSig & 0x7F; + if ( 0xFD <= (uint16_t) zExp ) { + if ( ( 0xFD < zExp ) + || ( ( zExp == 0xFD ) + && ( (int32_t) ( zSig + roundIncrement ) < 0 ) ) + ) { +#ifdef SOFTFLOAT_68K + float_raise( float_flag_overflow, status ); + saveFloat32Internal( zSign, zExp, zSig, status ); + if ( roundBits ) float_raise( float_flag_inexact, status ); +#else + float_raise(float_flag_overflow | float_flag_inexact, status); +#endif + return packFloat32( zSign, 0xFF, - ( roundIncrement == 0 )); + } + if ( zExp < 0 ) { + if (status->flush_to_zero) { + //float_raise(float_flag_output_denormal, status); + return packFloat32(zSign, 0, 0); + } + isTiny = + (status->float_detect_tininess + == float_tininess_before_rounding) + || ( zExp < -1 ) + || ( zSig + roundIncrement < 0x80000000 ); +#ifdef SOFTFLOAT_68K + if ( isTiny ) { + float_raise( float_flag_underflow, status ); + saveFloat32Internal( zSign, zExp, zSig, status ); + } +#endif + shift32RightJamming( zSig, - zExp, &zSig ); + zExp = 0; + roundBits = zSig & 0x7F; +#ifndef SOFTFLOAT_68K + if (isTiny && roundBits) + float_raise(float_flag_underflow, status); +#endif + } + } + if (roundBits) { + status->float_exception_flags |= float_flag_inexact; + } + zSig = ( zSig + roundIncrement )>>7; + zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven ); + if ( zSig == 0 ) zExp = 0; + return packFloat32( zSign, zExp, zSig ); } /*---------------------------------------------------------------------------- -| Takes an abstract floating-point value having sign `zSign', exponent `zExp', -| and significand `zSig', and returns the proper single-precision floating- -| point value corresponding to the abstract input. This routine is just like -| `roundAndPackFloat32' except that `zSig' does not have to be normalized. -| Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true'' -| floating-point exponent. +| Returns the fraction bits of the double-precision floating-point value `a'. *----------------------------------------------------------------------------*/ -static float32 - normalizeRoundAndPackFloat32( flag zSign, int16 zExp, bits32 zSig ) +static inline uint64_t extractFloat64Frac( float64 a ) { - int8 shiftCount; - shiftCount = countLeadingZeros32( zSig ) - 1; - return roundAndPackFloat32( zSign, zExp - shiftCount, zSig<>52 ) & 0x7FF; } /*---------------------------------------------------------------------------- -| Returns the exponent bits of the double-precision floating-point value `a'. +| Returns the sign bit of the double-precision floating-point value `a'. *----------------------------------------------------------------------------*/ -static inline int16 extractFloat64Exp( float64 a ) +static inline flag extractFloat64Sign( float64 a ) { - return ( a>>52 ) & 0x7FF; + + return float64_val(a)>>63; } /*---------------------------------------------------------------------------- -| Returns the sign bit of the double-precision floating-point value `a'. +| If `a' is denormal and we are in flush-to-zero mode then set the +| input-denormal exception and return zero. Otherwise just return the value. *----------------------------------------------------------------------------*/ - -static inline flag extractFloat64Sign( float64 a ) +float64 float64_squash_input_denormal(float64 a, float_status *status) { - return a>>63; - + if (status->flush_inputs_to_zero) { + if (extractFloat64Exp(a) == 0 && extractFloat64Frac(a) != 0) { + //float_raise(float_flag_input_denormal, status); + return make_float64(float64_val(a) & (1ULL << 63)); + } + } + return a; } /*---------------------------------------------------------------------------- @@ -351,13 +892,13 @@ static inline flag extractFloat64Sign( float64 a ) *----------------------------------------------------------------------------*/ static void - normalizeFloat64Subnormal( bits64 aSig, int16 *zExpPtr, bits64 *zSigPtr ) + normalizeFloat64Subnormal(uint64_t aSig, int *zExpPtr, uint64_t *zSigPtr) { - int8 shiftCount; + int8_t shiftCount; - shiftCount = countLeadingZeros64( aSig ) - 11; - *zSigPtr = aSig<float_rounding_mode; + roundNearestEven = ( roundingMode == float_round_nearest_even ); + switch (roundingMode) { + case float_round_nearest_even: + case float_round_ties_away: + roundIncrement = 0x200; + break; + case float_round_to_zero: + roundIncrement = 0; + break; + case float_round_up: + roundIncrement = zSign ? 0 : 0x3ff; + break; + case float_round_down: + roundIncrement = zSign ? 0x3ff : 0; + break; + default: + abort(); + } + roundBits = zSig & 0x3FF; + if ( 0x7FD <= (uint16_t) zExp ) { + if ( ( 0x7FD < zExp ) + || ( ( zExp == 0x7FD ) + && ( (int64_t) ( zSig + roundIncrement ) < 0 ) ) + ) { +#ifdef SOFTFLOAT_68K + float_raise( float_flag_overflow, status ); + saveFloat64Internal( zSign, zExp, zSig, status ); + if ( roundBits ) float_raise( float_flag_inexact, status ); +#else + float_raise(float_flag_overflow | float_flag_inexact, status); +#endif + return packFloat64( zSign, 0x7FF, - ( roundIncrement == 0 )); + } + if ( zExp < 0 ) { + if (status->flush_to_zero) { + //float_raise(float_flag_output_denormal, status); + return packFloat64(zSign, 0, 0); + } + isTiny = + (status->float_detect_tininess + == float_tininess_before_rounding) + || ( zExp < -1 ) + || ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) ); +#ifdef SOFTFLOAT_68K + if ( isTiny ) { + float_raise( float_flag_underflow, status ); + saveFloat64Internal( zSign, zExp, zSig, status ); + } +#endif + shift64RightJamming( zSig, - zExp, &zSig ); + zExp = 0; + roundBits = zSig & 0x3FF; +#ifndef SOFTFLOAT_68K + if (isTiny && roundBits) + float_raise(float_flag_underflow, status); +#endif + } + } + if (roundBits) { + status->float_exception_flags |= float_flag_inexact; + } + zSig = ( zSig + roundIncrement )>>10; + zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven ); + if ( zSig == 0 ) zExp = 0; + return packFloat64( zSign, zExp, zSig ); + +} + +/*---------------------------------------------------------------------------- +| Returns the fraction bits of the extended double-precision floating-point +| value `a'. +*----------------------------------------------------------------------------*/ + +uint64_t extractFloatx80Frac( floatx80 a ) { - int8 roundingMode; - flag roundNearestEven; - int16 roundIncrement, roundBits; - flag isTiny; - - roundingMode = float_rounding_mode; - roundNearestEven = ( roundingMode == float_round_nearest_even ); - roundIncrement = 0x200; - if ( ! roundNearestEven ) { - if ( roundingMode == float_round_to_zero ) { - roundIncrement = 0; - } - else { - roundIncrement = 0x3FF; - if ( zSign ) { - if ( roundingMode == float_round_up ) roundIncrement = 0; - } - else { - if ( roundingMode == float_round_down ) roundIncrement = 0; - } - } - } - roundBits = zSig & 0x3FF; - if ( 0x7FD <= (bits16) zExp ) { - if ( ( 0x7FD < zExp ) - || ( ( zExp == 0x7FD ) - && ( (sbits64) ( zSig + roundIncrement ) < 0 ) ) - ) { - float_raise( float_flag_overflow | float_flag_inexact ); - return packFloat64( zSign, 0x7FF, 0 ) - ( roundIncrement == 0 ); - } - if ( zExp < 0 ) { - isTiny = - ( float_detect_tininess == float_tininess_before_rounding ) - || ( zExp < -1 ) - || ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) ); - shift64RightJamming( zSig, - zExp, &zSig ); - zExp = 0; - roundBits = zSig & 0x3FF; - if ( isTiny && roundBits ) float_raise( float_flag_underflow ); - } - } - if ( roundBits ) float_exception_flags |= float_flag_inexact; - zSig = ( zSig + roundIncrement )>>10; - zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven ); - if ( zSig == 0 ) zExp = 0; - return packFloat64( zSign, zExp, zSig ); + + return a.low; } /*---------------------------------------------------------------------------- -| Takes an abstract floating-point value having sign `zSign', exponent `zExp', -| and significand `zSig', and returns the proper double-precision floating- -| point value corresponding to the abstract input. This routine is just like -| `roundAndPackFloat64' except that `zSig' does not have to be normalized. -| Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true'' -| floating-point exponent. +| Returns the exponent bits of the extended double-precision floating-point +| value `a'. *----------------------------------------------------------------------------*/ -static float64 - normalizeRoundAndPackFloat64( flag zSign, int16 zExp, bits64 zSig ) +int32_t extractFloatx80Exp( floatx80 a ) { - int8 shiftCount; - shiftCount = countLeadingZeros64( zSig ) - 1; - return roundAndPackFloat64( zSign, zExp - shiftCount, zSig<>15; + +} /*---------------------------------------------------------------------------- | Normalizes the subnormal extended double-precision floating-point value @@ -480,14 +1063,31 @@ static float64 | `zSigPtr', respectively. *----------------------------------------------------------------------------*/ -static void - normalizeFloatx80Subnormal( bits64 aSig, int32 *zExpPtr, bits64 *zSigPtr ) +void normalizeFloatx80Subnormal( uint64_t aSig, int32_t *zExpPtr, uint64_t *zSigPtr ) { - int8 shiftCount; + int8_t shiftCount; - shiftCount = countLeadingZeros64( aSig ); - *zSigPtr = aSig<float_rounding_mode; + roundNearestEven = ( roundingMode == float_round_nearest_even ); + if ( roundingPrecision == 80 ) goto precision80; + if ( roundingPrecision == 64 ) { + roundIncrement = LIT64( 0x0000000000000400 ); + roundMask = LIT64( 0x00000000000007FF ); + } + else if ( roundingPrecision == 32 ) { + roundIncrement = LIT64( 0x0000008000000000 ); + roundMask = LIT64( 0x000000FFFFFFFFFF ); + } + else { + goto precision80; + } + zSig0 |= ( zSig1 != 0 ); + switch (roundingMode) { + case float_round_nearest_even: + case float_round_ties_away: + break; + case float_round_to_zero: + roundIncrement = 0; + break; + case float_round_up: + roundIncrement = zSign ? 0 : roundMask; + break; + case float_round_down: + roundIncrement = zSign ? roundMask : 0; + break; + default: + abort(); + } + roundBits = zSig0 & roundMask; +#ifdef SOFTFLOAT_68K + if ( 0x7FFE <= (uint32_t) zExp ) { +#else + if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) { +#endif + if ( ( 0x7FFE < zExp ) + || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) ) + ) { + goto overflow; + } +#ifdef SOFTFLOAT_68K + if ( zExp < 0 ) { +#else if ( zExp <= 0 ) { - isTiny = - ( float_detect_tininess == float_tininess_before_rounding ) +#endif + if (status->flush_to_zero) { + //float_raise(float_flag_output_denormal, status); + return packFloatx80(zSign, 0, 0); + } + isTiny = + (status->float_detect_tininess + == float_tininess_before_rounding) +#ifdef SOFTFLOAT_68K + || ( zExp < -1 ) +#else || ( zExp < 0 ) +#endif || ( zSig0 <= zSig0 + roundIncrement ); - shift64RightJamming( zSig0, 1 - zExp, &zSig0 ); - zExp = 0; - roundBits = zSig0 & roundMask; - if ( isTiny && roundBits ) float_raise( float_flag_underflow ); - if ( roundBits ) float_exception_flags |= float_flag_inexact; - zSig0 += roundIncrement; - if ( (sbits64) zSig0 < 0 ) zExp = 1; - roundIncrement = roundMask + 1; - if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) { - roundMask |= roundIncrement; - } - zSig0 &= ~ roundMask; - return packFloatx80( zSign, zExp, zSig0 ); - } - } - if ( roundBits ) float_exception_flags |= float_flag_inexact; - zSig0 += roundIncrement; - if ( zSig0 < (bits64)roundIncrement ) { - ++zExp; - zSig0 = LIT64( 0x8000000000000000 ); - } - roundIncrement = roundMask + 1; - if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) { - roundMask |= roundIncrement; - } - zSig0 &= ~ roundMask; - if ( zSig0 == 0 ) zExp = 0; - return packFloatx80( zSign, zExp, zSig0 ); - precision80: - increment = ( (sbits64) zSig1 < 0 ); - if ( ! roundNearestEven ) { - if ( roundingMode == float_round_to_zero ) { - increment = 0; - } - else { - if ( zSign ) { - increment = ( roundingMode == float_round_down ) && zSig1; - } - else { - increment = ( roundingMode == float_round_up ) && zSig1; +#ifdef SOFTFLOAT_68K + if ( isTiny ) { + float_raise( float_flag_underflow, status ); + saveFloatx80Internal( zSign, zExp, zSig0, zSig1, status ); } - } - } - if ( 0x7FFD <= (bits32) ( zExp - 1 ) ) { - if ( ( 0x7FFE < zExp ) - || ( ( zExp == 0x7FFE ) - && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) ) - && increment - ) - ) { - roundMask = 0; - overflow: - float_raise( float_flag_overflow | float_flag_inexact ); + shift64RightJamming( zSig0, -zExp, &zSig0 ); +#else + shift64RightJamming( zSig0, 1 - zExp, &zSig0 ); +#endif + zExp = 0; + roundBits = zSig0 & roundMask; +#ifdef SOFTFLOAT_68K + if ( isTiny ) float_raise( float_flag_underflow, status ); +#else + if (isTiny && roundBits) { + float_raise(float_flag_underflow, status); + } +#endif +if (roundBits) { + status->float_exception_flags |= float_flag_inexact; + } + zSig0 += roundIncrement; +#ifndef SOFTFLOAT_68K + if ( (int64_t) zSig0 < 0 ) zExp = 1; +#endif + roundIncrement = roundMask + 1; + if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) { + roundMask |= roundIncrement; + } + zSig0 &= ~ roundMask; + return packFloatx80( zSign, zExp, zSig0 ); + } + } + if (roundBits) { + status->float_exception_flags |= float_flag_inexact; + } + zSig0 += roundIncrement; + if ( zSig0 < roundIncrement ) { + ++zExp; + zSig0 = LIT64( 0x8000000000000000 ); + } + roundIncrement = roundMask + 1; + if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) { + roundMask |= roundIncrement; + } + zSig0 &= ~ roundMask; + if ( zSig0 == 0 ) zExp = 0; + return packFloatx80( zSign, zExp, zSig0 ); + precision80: + switch (roundingMode) { + case float_round_nearest_even: + case float_round_ties_away: + increment = ((int64_t)zSig1 < 0); + break; + case float_round_to_zero: + increment = 0; + break; + case float_round_up: + increment = !zSign && zSig1; + break; + case float_round_down: + increment = zSign && zSig1; + break; + default: + abort(); + } +#ifdef SOFTFLOAT_68K + if ( 0x7FFE <= (uint32_t) zExp ) { +#else + if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) { +#endif + if ( ( 0x7FFE < zExp ) + || ( ( zExp == 0x7FFE ) + && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) ) + && increment + ) + ) { + roundMask = 0; + overflow: +#ifndef SOFTFLOAT_68K + float_raise(float_flag_overflow | float_flag_inexact, status); +#else + float_raise( float_flag_overflow, status ); + saveFloatx80Internal( zSign, zExp, zSig0, zSig1, status ); + if ( ( zSig0 & roundMask ) || zSig1 ) float_raise( float_flag_inexact, status ); +#endif if ( ( roundingMode == float_round_to_zero ) - || ( zSign && ( roundingMode == float_round_up ) ) - || ( ! zSign && ( roundingMode == float_round_down ) ) - ) { - return packFloatx80( zSign, 0x7FFE, ~ roundMask ); - } - return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); - } + || ( zSign && ( roundingMode == float_round_up ) ) + || ( ! zSign && ( roundingMode == float_round_down ) ) + ) { + return packFloatx80( zSign, 0x7FFE, ~ roundMask ); + } + return packFloatx80( zSign, 0x7FFF, floatx80_default_infinity_low ); + } +#ifdef SOFTFLOAT_68K + if ( zExp < 0 ) { +#else if ( zExp <= 0 ) { - isTiny = - ( float_detect_tininess == float_tininess_before_rounding ) +#endif + isTiny = + (status->float_detect_tininess + == float_tininess_before_rounding) +#ifdef SOFTFLOAT_68K + || ( zExp < -1 ) +#else || ( zExp < 0 ) - || ! increment - || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) ); - shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 ); - zExp = 0; - if ( isTiny && zSig1 ) float_raise( float_flag_underflow ); - if ( zSig1 ) float_exception_flags |= float_flag_inexact; - if ( roundNearestEven ) { - increment = ( (sbits64) zSig1 < 0 ); - } - else { - if ( zSign ) { - increment = ( roundingMode == float_round_down ) && zSig1; - } - else { - increment = ( roundingMode == float_round_up ) && zSig1; - } +#endif + || ! increment + || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) ); +#ifdef SOFTFLOAT_68K + if ( isTiny ) { + float_raise( float_flag_underflow, status ); + saveFloatx80Internal( zSign, zExp, zSig0, zSig1, status ); } - if ( increment ) { - ++zSig0; - zSig0 &= - ~ ( ( (bits64) ( zSig1<<1 ) == 0 ) & roundNearestEven ); - if ( (sbits64) zSig0 < 0 ) zExp = 1; + shift64ExtraRightJamming( zSig0, zSig1, -zExp, &zSig0, &zSig1 ); +#else + shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 ); +#endif + zExp = 0; +#ifndef SOFTFLOAT_68K + if ( isTiny && zSig1 ) float_raise( float_flag_underflow, status ); +#endif + if (zSig1) float_raise(float_flag_inexact, status); + switch (roundingMode) { + case float_round_nearest_even: + case float_round_ties_away: + increment = ((int64_t)zSig1 < 0); + break; + case float_round_to_zero: + increment = 0; + break; + case float_round_up: + increment = !zSign && zSig1; + break; + case float_round_down: + increment = zSign && zSig1; + break; + default: + abort(); + } + if ( increment ) { + ++zSig0; + zSig0 &= + ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven ); +#ifndef SOFTFLOAT_68K + if ( (int64_t) zSig0 < 0 ) zExp = 1; +#endif + } + return packFloatx80( zSign, zExp, zSig0 ); + } + } + if (zSig1) { + status->float_exception_flags |= float_flag_inexact; + } + if ( increment ) { + ++zSig0; + if ( zSig0 == 0 ) { + ++zExp; + zSig0 = LIT64( 0x8000000000000000 ); + } + else { + zSig0 &= ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven ); + } + } + else { + if ( zSig0 == 0 ) zExp = 0; + } + return packFloatx80( zSign, zExp, zSig0 ); + +} + +#else // SOFTFLOAT_68K + +floatx80 roundAndPackFloatx80( int8_t roundingPrecision, flag zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1, float_status *status ) +{ + int8_t roundingMode; + flag roundNearestEven, increment; + uint64_t roundIncrement, roundMask, roundBits; + int32_t expOffset; + + roundingMode = status->float_rounding_mode; + roundNearestEven = ( roundingMode == float_round_nearest_even ); + if ( roundingPrecision == 80 ) goto precision80; + if ( roundingPrecision == 64 ) { + roundIncrement = LIT64( 0x0000000000000400 ); + roundMask = LIT64( 0x00000000000007FF ); + expOffset = 0x3C00; + } else if ( roundingPrecision == 32 ) { + roundIncrement = LIT64( 0x0000008000000000 ); + roundMask = LIT64( 0x000000FFFFFFFFFF ); + expOffset = 0x3F80; + } else { + goto precision80; + } + zSig0 |= ( zSig1 != 0 ); + if ( ! roundNearestEven ) { + if ( roundingMode == float_round_to_zero ) { + roundIncrement = 0; + } else { + roundIncrement = roundMask; + if ( zSign ) { + if ( roundingMode == float_round_up ) roundIncrement = 0; + } else { + if ( roundingMode == float_round_down ) roundIncrement = 0; + } + } + } + roundBits = zSig0 & roundMask; + if ( ( ( 0x7FFE - expOffset ) < zExp ) || + ( ( zExp == ( 0x7FFE - expOffset ) ) && ( zSig0 + roundIncrement < zSig0 ) ) ) { + float_raise( float_flag_overflow, status ); + saveFloatx80Internal( roundingPrecision, zSign, zExp, zSig0, zSig1, status ); + if ( zSig0 & roundMask ) float_raise( float_flag_inexact, status ); + if ( ( roundingMode == float_round_to_zero ) + || ( zSign && ( roundingMode == float_round_up ) ) + || ( ! zSign && ( roundingMode == float_round_down ) ) + ) { + return packFloatx80( zSign, 0x7FFE - expOffset, ~ roundMask ); + } + return packFloatx80( zSign, 0x7FFF, floatx80_default_infinity_low ); + } + if ( zExp < ( expOffset + 1 ) ) { + float_raise( float_flag_underflow, status ); + saveFloatx80Internal( roundingPrecision, zSign, zExp, zSig0, zSig1, status ); + shift64RightJamming( zSig0, -( zExp - ( expOffset + 1 ) ), &zSig0 ); + zExp = expOffset + 1; + roundBits = zSig0 & roundMask; + if ( roundBits ) float_raise( float_flag_inexact, status ); + zSig0 += roundIncrement; + roundIncrement = roundMask + 1; + if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) { + roundMask |= roundIncrement; + } + zSig0 &= ~ roundMask; + return packFloatx80( zSign, zExp, zSig0 ); + } + if ( roundBits ) { + float_raise( float_flag_inexact, status ); + saveFloatx80Internal( roundingPrecision, zSign, zExp, zSig0, zSig1, status); + } + zSig0 += roundIncrement; + if ( zSig0 < roundIncrement ) { + ++zExp; + zSig0 = LIT64( 0x8000000000000000 ); + } + roundIncrement = roundMask + 1; + if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) { + roundMask |= roundIncrement; + } + zSig0 &= ~ roundMask; + if ( zSig0 == 0 ) zExp = 0; + return packFloatx80( zSign, zExp, zSig0 ); +precision80: + increment = ( (int64_t) zSig1 < 0 ); + if ( ! roundNearestEven ) { + if ( roundingMode == float_round_to_zero ) { + increment = 0; + } else { + if ( zSign ) { + increment = ( roundingMode == float_round_down ) && zSig1; + } else { + increment = ( roundingMode == float_round_up ) && zSig1; + } + } + } + if ( 0x7FFE <= (uint32_t) zExp ) { + if ( ( 0x7FFE < zExp ) || + ( ( zExp == 0x7FFE ) && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) ) && increment ) + ) { + roundMask = 0; + float_raise( float_flag_overflow, status ); + saveFloatx80Internal( roundingPrecision, zSign, zExp, zSig0, zSig1, status ); + if ( ( zSig0 & roundMask ) || zSig1 ) float_raise( float_flag_inexact, status ); + if ( ( roundingMode == float_round_to_zero ) + || ( zSign && ( roundingMode == float_round_up ) ) + || ( ! zSign && ( roundingMode == float_round_down ) ) + ) { + return packFloatx80( zSign, 0x7FFE, ~ roundMask ); + } + return packFloatx80( zSign, 0x7FFF, floatx80_default_infinity_low ); + } + if ( zExp < 0 ) { + float_raise( float_flag_underflow, status ); + saveFloatx80Internal( roundingPrecision, zSign, zExp, zSig0, zSig1, status); + shift64ExtraRightJamming( zSig0, zSig1, -zExp, &zSig0, &zSig1 ); + zExp = 0; + if ( zSig1 ) float_raise( float_flag_inexact, status ); + if ( roundNearestEven ) { + increment = ( (int64_t) zSig1 < 0 ); + } else { + if ( zSign ) { + increment = ( roundingMode == float_round_down ) && zSig1; + } else { + increment = ( roundingMode == float_round_up ) && zSig1; + } + } + if ( increment ) { + ++zSig0; + zSig0 &= + ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven ); + } + return packFloatx80( zSign, zExp, zSig0 ); + } + } + if ( zSig1 ) { + float_raise( float_flag_inexact, status ); + saveFloatx80Internal( roundingPrecision, zSign, zExp, zSig0, zSig1, status ); + } + if ( increment ) { + ++zSig0; + if ( zSig0 == 0 ) { + ++zExp; + zSig0 = LIT64( 0x8000000000000000 ); + } else { + zSig0 &= ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven ); + } + } else { + if ( zSig0 == 0 ) zExp = 0; + } + return packFloatx80( zSign, zExp, zSig0 ); + +} + +#endif + +#ifdef SOFTFLOAT_68K // 21-01-2017: Added for Previous +floatx80 roundSigAndPackFloatx80( int8_t roundingPrecision, flag zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1, float_status *status ) +{ + int8_t roundingMode; + flag roundNearestEven, isTiny; + uint64_t roundIncrement, roundMask, roundBits; + + roundingMode = status->float_rounding_mode; + roundNearestEven = ( roundingMode == float_round_nearest_even ); + if ( roundingPrecision == 32 ) { + roundIncrement = LIT64( 0x0000008000000000 ); + roundMask = LIT64( 0x000000FFFFFFFFFF ); + } else if ( roundingPrecision == 64 ) { + roundIncrement = LIT64( 0x0000000000000400 ); + roundMask = LIT64( 0x00000000000007FF ); + } else { + return roundAndPackFloatx80( 80, zSign, zExp, zSig0, zSig1, status ); + } + zSig0 |= ( zSig1 != 0 ); + if ( ! roundNearestEven ) { + if ( roundingMode == float_round_to_zero ) { + roundIncrement = 0; + } + else { + roundIncrement = roundMask; + if ( zSign ) { + if ( roundingMode == float_round_up ) roundIncrement = 0; + } + else { + if ( roundingMode == float_round_down ) roundIncrement = 0; + } + } + } + roundBits = zSig0 & roundMask; + + if ( 0x7FFE <= (uint32_t) zExp ) { + if ( ( 0x7FFE < zExp ) + || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) ) + ) { + float_raise( float_flag_overflow, status ); + saveFloatx80Internal( roundingPrecision, zSign, zExp, zSig0, zSig1, status); + if ( zSig0 & roundMask ) float_raise( float_flag_inexact, status ); + if ( ( roundingMode == float_round_to_zero ) + || ( zSign && ( roundingMode == float_round_up ) ) + || ( ! zSign && ( roundingMode == float_round_down ) ) + ) { + return packFloatx80( zSign, 0x7FFE, LIT64( 0xFFFFFFFFFFFFFFFF ) ); + } + return packFloatx80( zSign, 0x7FFF, floatx80_default_infinity_low ); + } + + if ( zExp < 0 ) { + isTiny = + ( status->float_detect_tininess == float_tininess_before_rounding ) + || ( zExp < -1 ) + || ( zSig0 <= zSig0 + roundIncrement ); + if ( isTiny ) { + float_raise( float_flag_underflow, status ); + saveFloatx80Internal( roundingPrecision, zSign, zExp, zSig0, zSig1, status ); } - return packFloatx80( zSign, zExp, zSig0 ); - } - } - if ( zSig1 ) float_exception_flags |= float_flag_inexact; - if ( increment ) { - ++zSig0; - if ( zSig0 == 0 ) { - ++zExp; - zSig0 = LIT64( 0x8000000000000000 ); - } - else { - zSig0 &= ~ ( ( (bits64) ( zSig1<<1 ) == 0 ) & roundNearestEven ); - } - } - else { - if ( zSig0 == 0 ) zExp = 0; - } - return packFloatx80( zSign, zExp, zSig0 ); + shift64RightJamming( zSig0, -zExp, &zSig0 ); + zExp = 0; + roundBits = zSig0 & roundMask; + if ( roundBits ) float_raise ( float_flag_inexact, status ); + zSig0 += roundIncrement; + if ( roundNearestEven && ( roundBits == roundIncrement ) ) { + roundMask |= roundIncrement<<1; + } + zSig0 &= ~roundMask; + return packFloatx80( zSign, zExp, zSig0 ); + } + } + if ( roundBits ) { + float_raise( float_flag_inexact, status ); + saveFloatx80Internal( roundingPrecision, zSign, zExp, zSig0, zSig1, status ); + } + zSig0 += roundIncrement; + if ( zSig0 < roundIncrement ) { + ++zExp; + zSig0 = LIT64( 0x8000000000000000 ); + } + roundIncrement = roundMask + 1; + if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) { + roundMask |= roundIncrement; + } + zSig0 &= ~ roundMask; + if ( zSig0 == 0 ) zExp = 0; + return packFloatx80( zSign, zExp, zSig0 ); + +} +#endif // End of Addition for Previous -} /*---------------------------------------------------------------------------- | Takes an abstract floating-point value having sign `zSign', exponent @@ -685,4083 +1611,1867 @@ static void | normalized. *----------------------------------------------------------------------------*/ -static floatx80 - normalizeRoundAndPackFloatx80( - int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 - ) +static floatx80 normalizeRoundAndPackFloatx80(int8_t roundingPrecision, + flag zSign, int32_t zExp, + uint64_t zSig0, uint64_t zSig1, + float_status *status) { - int8 shiftCount; + int8_t shiftCount; - if ( zSig0 == 0 ) { - zSig0 = zSig1; - zSig1 = 0; - zExp -= 64; - } - shiftCount = countLeadingZeros64( zSig0 ); - shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 ); - zExp -= shiftCount; - return - roundAndPackFloatx80( roundingPrecision, zSign, zExp, zSig0, zSig1 ); + if ( zSig0 == 0 ) { + zSig0 = zSig1; + zSig1 = 0; + zExp -= 64; + } + shiftCount = countLeadingZeros64( zSig0 ); + shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 ); + zExp -= shiftCount; + return roundAndPackFloatx80(roundingPrecision, zSign, zExp, + zSig0, zSig1, status); } -#endif - -#ifdef FLOAT128 - /*---------------------------------------------------------------------------- -| Returns the least-significant 64 fraction bits of the quadruple-precision -| floating-point value `a'. +| Returns the result of converting the 32-bit two's complement integer `a' +| to the extended double-precision floating-point format. The conversion +| is performed according to the IEC/IEEE Standard for Binary Floating-Point +| Arithmetic. *----------------------------------------------------------------------------*/ -static inline bits64 extractFloat128Frac1( float128 a ) +floatx80 int32_to_floatx80(int32_t a) { - return a.low; + flag zSign; + uint32_t absA; + int8_t shiftCount; + uint64_t zSig; + + if ( a == 0 ) return packFloatx80( 0, 0, 0 ); + zSign = ( a < 0 ); + absA = zSign ? - a : a; + shiftCount = countLeadingZeros32( absA ) + 32; + zSig = absA; + return packFloatx80( zSign, 0x403E - shiftCount, zSig<>48 ) & 0x7FFF; - -} +floatx80 float64_to_floatx80(float64 a, float_status *status) +{ + flag aSign; + int aExp; + uint64_t aSig; + + aSig = extractFloat64Frac( a ); + aExp = extractFloat64Exp( a ); + aSign = extractFloat64Sign( a ); + if ( aExp == 0x7FF ) { + if ( aSig ) return commonNaNToFloatx80( float64ToCommonNaN( a, status ), status ); + return packFloatx80( aSign, 0x7FFF, floatx80_default_infinity_low ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 ); + normalizeFloat64Subnormal( aSig, &aExp, &aSig ); + } + return + packFloatx80( + aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 ); + +} + +#ifdef SOFTFLOAT_68K // 31-12-2016: Added for Previous +floatx80 float64_to_floatx80_allowunnormal( float64 a, float_status *status ) +{ + (void)status; + flag aSign; + int16_t aExp; + uint64_t aSig; + + aSig = extractFloat64Frac( a ); + aExp = extractFloat64Exp( a ); + aSign = extractFloat64Sign( a ); + if ( aExp == 0x7FF ) { + return packFloatx80( aSign, 0x7FFF, aSig<<11 ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 ); + return packFloatx80( aSign, 0x3C01, aSig<<11 ); + } + return + packFloatx80( + aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 ); + +} +#endif // end of addition for Previous /*---------------------------------------------------------------------------- -| Returns the sign bit of the quadruple-precision floating-point value `a'. +| Returns the result of converting the extended double-precision floating- +| point value `a' to the 32-bit two's complement integer format. The +| conversion is performed according to the IEC/IEEE Standard for Binary +| Floating-Point Arithmetic---which means in particular that the conversion +| is rounded according to the current rounding mode. If `a' is a NaN, the +| largest positive integer is returned. Otherwise, if the conversion +| overflows, the largest integer with the same sign as `a' is returned. *----------------------------------------------------------------------------*/ -static inline flag extractFloat128Sign( float128 a ) -{ - return a.high>>63; +int32_t floatx80_to_int32(floatx80 a, float_status *status) +{ + flag aSign; + int32_t aExp, shiftCount; + uint64_t aSig; + + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); +#ifdef SOFTFLOAT_68K + if ( aExp == 0x7FFF ) { + if ( (uint64_t) ( aSig<<1 ) ) { + a = propagateFloatx80NaNOneArg( a, status ); + if ( a.low == aSig ) float_raise( float_flag_invalid, status ); + return (int32_t)(a.low>>32); + } + float_raise( float_flag_invalid, status ); + return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF; + } +#else + if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0; +#endif + shiftCount = 0x4037 - aExp; + if ( shiftCount <= 0 ) shiftCount = 1; + shift64RightJamming( aSig, shiftCount, &aSig ); + return roundAndPackInt32(aSign, aSig, status); + +} + +#ifdef SOFTFLOAT_68K // 30-01-2017: Addition for Previous +int16_t floatx80_to_int16( floatx80 a, float_status *status) +{ + flag aSign; + int32_t aExp, shiftCount; + uint64_t aSig; + + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + if ( aExp == 0x7FFF ) { + float_raise( float_flag_invalid, status ); + if ( (uint64_t) ( aSig<<1 ) ) { + a = propagateFloatx80NaNOneArg( a, status ); + if ( a.low == aSig ) float_raise( float_flag_invalid, status ); + return (int16_t)(a.low>>48); + } + return aSign ? (int16_t) 0x8000 : 0x7FFF; + } + shiftCount = 0x4037 - aExp; + if ( shiftCount <= 0 ) shiftCount = 1; + shift64RightJamming( aSig, shiftCount, &aSig ); + return roundAndPackInt16( aSign, aSig, status ); + +} +int8_t floatx80_to_int8( floatx80 a, float_status *status) +{ + flag aSign; + int32_t aExp, shiftCount; + uint64_t aSig; + + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + if ( aExp == 0x7FFF ) { + if ( (uint64_t) ( aSig<<1 ) ) { + a = propagateFloatx80NaNOneArg( a, status ); + if ( a.low == aSig ) float_raise( float_flag_invalid, status ); + return (int8_t)(a.low>>56); + } + float_raise( float_flag_invalid, status ); + return aSign ? (int8_t) 0x80 : 0x7F; + } + shiftCount = 0x4037 - aExp; + if ( shiftCount <= 0 ) shiftCount = 1; + shift64RightJamming( aSig, shiftCount, &aSig ); + return roundAndPackInt8( aSign, aSig, status ); + +} +#endif // End of addition for Previous -} /*---------------------------------------------------------------------------- -| Normalizes the subnormal quadruple-precision floating-point value -| represented by the denormalized significand formed by the concatenation of -| `aSig0' and `aSig1'. The normalized exponent is stored at the location -| pointed to by `zExpPtr'. The most significant 49 bits of the normalized -| significand are stored at the location pointed to by `zSig0Ptr', and the -| least significant 64 bits of the normalized significand are stored at the -| location pointed to by `zSig1Ptr'. +| Returns the result of converting the extended double-precision floating- +| point value `a' to the 32-bit two's complement integer format. The +| conversion is performed according to the IEC/IEEE Standard for Binary +| Floating-Point Arithmetic, except that the conversion is always rounded +| toward zero. If `a' is a NaN, the largest positive integer is returned. +| Otherwise, if the conversion overflows, the largest integer with the same +| sign as `a' is returned. *----------------------------------------------------------------------------*/ -static void - normalizeFloat128Subnormal( - bits64 aSig0, - bits64 aSig1, - int32 *zExpPtr, - bits64 *zSig0Ptr, - bits64 *zSig1Ptr - ) -{ - int8 shiftCount; - - if ( aSig0 == 0 ) { - shiftCount = countLeadingZeros64( aSig1 ) - 15; - if ( shiftCount < 0 ) { - *zSig0Ptr = aSig1>>( - shiftCount ); - *zSig1Ptr = aSig1<<( shiftCount & 63 ); - } - else { - *zSig0Ptr = aSig1<float_exception_flags |= float_flag_inexact; + } + return 0; + } + shiftCount = 0x403E - aExp; + savedASig = aSig; + aSig >>= shiftCount; + z = aSig; + if ( aSign ) z = - z; + if ( ( z < 0 ) ^ aSign ) { + invalid: + float_raise(float_flag_invalid, status); + return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF; + } + if ( ( aSig<float_exception_flags |= float_flag_inexact; + } + return z; } -#endif - /*---------------------------------------------------------------------------- -| Returns the result of converting the 32-bit two's complement integer `a' -| to the single-precision floating-point format. The conversion is performed -| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +| Returns the result of converting the extended double-precision floating- +| point value `a' to the 64-bit two's complement integer format. The +| conversion is performed according to the IEC/IEEE Standard for Binary +| Floating-Point Arithmetic---which means in particular that the conversion +| is rounded according to the current rounding mode. If `a' is a NaN, +| the largest positive integer is returned. Otherwise, if the conversion +| overflows, the largest integer with the same sign as `a' is returned. *----------------------------------------------------------------------------*/ -float32 int32_to_float32( int32 a ) -{ - flag zSign; - - if ( a == 0 ) return 0; - if ( a == (sbits32) 0x80000000 ) return packFloat32( 1, 0x9E, 0 ); - zSign = ( a < 0 ); - return normalizeRoundAndPackFloat32( zSign, 0x9C, zSign ? - a : a ); +int64_t floatx80_to_int64(floatx80 a, float_status *status) +{ + flag aSign; + int32_t aExp, shiftCount; + uint64_t aSig, aSigExtra; + + if (floatx80_invalid_encoding(a)) { + float_raise(float_flag_invalid, status); + return 1ULL << 63; + } + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + shiftCount = 0x403E - aExp; + if ( shiftCount <= 0 ) { + if ( shiftCount ) { + float_raise(float_flag_invalid, status); + if ( ! aSign + || ( ( aExp == 0x7FFF ) + && ( aSig != LIT64( 0x8000000000000000 ) ) ) + ) { + return LIT64( 0x7FFFFFFFFFFFFFFF ); + } + return (int64_t) LIT64( 0x8000000000000000 ); + } + aSigExtra = 0; + } + else { + shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra ); + } + return roundAndPackInt64(aSign, aSig, aSigExtra, status); } /*---------------------------------------------------------------------------- -| Returns the result of converting the 32-bit two's complement integer `a' -| to the double-precision floating-point format. The conversion is performed -| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +| Returns the result of converting the extended double-precision floating- +| point value `a' to the single-precision floating-point format. The +| conversion is performed according to the IEC/IEEE Standard for Binary +| Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -float64 int32_to_float64( int32 a ) -{ - flag zSign; - uint32 absA; - int8 shiftCount; - bits64 zSig; - - if ( a == 0 ) return 0; - zSign = ( a < 0 ); - absA = zSign ? - a : a; - shiftCount = countLeadingZeros32( absA ) + 21; - zSig = absA; - return packFloat64( zSign, 0x432 - shiftCount, zSig<floatx80_rounding_precision, aSign, aExp, aSig, 0, status ); + } + return a; + +} +#endif - if ( a == 0 ) return packFloatx80( 0, 0, 0 ); - zSign = ( a < 0 ); - absA = zSign ? - a : a; - shiftCount = countLeadingZeros32( absA ) + 32; - zSig = absA; - return packFloatx80( zSign, 0x403E - shiftCount, zSig<>( - shiftCount ); - if ( (bits32) ( aSig<<( shiftCount & 31 ) ) ) { - float_exception_flags |= float_flag_inexact; - } - if ( aSign ) z = - z; - return z; - -} - -/*---------------------------------------------------------------------------- -| Returns the result of converting the single-precision floating-point value -| `a' to the 64-bit two's complement integer format. The conversion is -| performed according to the IEC/IEEE Standard for Binary Floating-Point -| Arithmetic---which means in particular that the conversion is rounded -| according to the current rounding mode. If `a' is a NaN, the largest -| positive integer is returned. Otherwise, if the conversion overflows, the -| largest integer with the same sign as `a' is returned. -*----------------------------------------------------------------------------*/ - -int64 float32_to_int64( float32 a ) -{ - flag aSign; - int16 aExp, shiftCount; - bits32 aSig; - bits64 aSig64, aSigExtra; - - aSig = extractFloat32Frac( a ); - aExp = extractFloat32Exp( a ); - aSign = extractFloat32Sign( a ); - shiftCount = 0xBE - aExp; - if ( shiftCount < 0 ) { - float_raise( float_flag_invalid ); - if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) { - return LIT64( 0x7FFFFFFFFFFFFFFF ); - } - return (sbits64) LIT64( 0x8000000000000000 ); - } - if ( aExp ) aSig |= 0x00800000; - aSig64 = aSig; - aSig64 <<= 40; - shift64ExtraRightJamming( aSig64, 0, shiftCount, &aSig64, &aSigExtra ); - return roundAndPackInt64( aSign, aSig64, aSigExtra ); - -} - -/*---------------------------------------------------------------------------- -| Returns the result of converting the single-precision floating-point value -| `a' to the 64-bit two's complement integer format. The conversion is -| performed according to the IEC/IEEE Standard for Binary Floating-Point -| Arithmetic, except that the conversion is always rounded toward zero. If -| `a' is a NaN, the largest positive integer is returned. Otherwise, if the -| conversion overflows, the largest integer with the same sign as `a' is -| returned. -*----------------------------------------------------------------------------*/ - -int64 float32_to_int64_round_to_zero( float32 a ) -{ - flag aSign; - int16 aExp, shiftCount; - bits32 aSig; - bits64 aSig64; - int64 z; - - aSig = extractFloat32Frac( a ); - aExp = extractFloat32Exp( a ); - aSign = extractFloat32Sign( a ); - shiftCount = aExp - 0xBE; - if ( 0 <= shiftCount ) { - if ( a != 0xDF000000 ) { - float_raise( float_flag_invalid ); - if ( ! aSign || ( ( aExp == 0xFF ) && aSig ) ) { - return LIT64( 0x7FFFFFFFFFFFFFFF ); - } - } - return (sbits64) LIT64( 0x8000000000000000 ); - } - else if ( aExp <= 0x7E ) { - if ( aExp | aSig ) float_exception_flags |= float_flag_inexact; - return 0; - } - aSig64 = aSig | 0x00800000; - aSig64 <<= 40; - z = aSig64>>( - shiftCount ); - if ( (bits64) ( aSig64<<( shiftCount & 63 ) ) ) { - float_exception_flags |= float_flag_inexact; - } - if ( aSign ) z = - z; - return z; - -} - -/*---------------------------------------------------------------------------- -| Returns the result of converting the single-precision floating-point value -| `a' to the double-precision floating-point format. The conversion is -| performed according to the IEC/IEEE Standard for Binary Floating-Point -| Arithmetic. -*----------------------------------------------------------------------------*/ - -float64 float32_to_float64( float32 a ) -{ - flag aSign; - int16 aExp; - bits32 aSig; - - aSig = extractFloat32Frac( a ); - aExp = extractFloat32Exp( a ); - aSign = extractFloat32Sign( a ); - if ( aExp == 0xFF ) { - if ( aSig ) return commonNaNToFloat64( float32ToCommonNaN( a ) ); - return packFloat64( aSign, 0x7FF, 0 ); - } - if ( aExp == 0 ) { - if ( aSig == 0 ) return packFloat64( aSign, 0, 0 ); - normalizeFloat32Subnormal( aSig, &aExp, &aSig ); - --aExp; - } - return packFloat64( aSign, aExp + 0x380, ( (bits64) aSig )<<29 ); - -} - -#ifdef FLOATX80 - -/*---------------------------------------------------------------------------- -| Returns the result of converting the single-precision floating-point value -| `a' to the extended double-precision floating-point format. The conversion -| is performed according to the IEC/IEEE Standard for Binary Floating-Point -| Arithmetic. -*----------------------------------------------------------------------------*/ - -floatx80 float32_to_floatx80( float32 a ) -{ - flag aSign; - int16 aExp; - bits32 aSig; - - aSig = extractFloat32Frac( a ); - aExp = extractFloat32Exp( a ); - aSign = extractFloat32Sign( a ); - if ( aExp == 0xFF ) { - if ( aSig ) return commonNaNToFloatx80( float32ToCommonNaN( a ) ); - return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); - } - if ( aExp == 0 ) { - if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 ); - normalizeFloat32Subnormal( aSig, &aExp, &aSig ); - } - aSig |= 0x00800000; - return packFloatx80( aSign, aExp + 0x3F80, ( (bits64) aSig )<<40 ); - -} - -#endif - -#ifdef FLOAT128 - -/*---------------------------------------------------------------------------- -| Returns the result of converting the single-precision floating-point value -| `a' to the double-precision floating-point format. The conversion is -| performed according to the IEC/IEEE Standard for Binary Floating-Point -| Arithmetic. -*----------------------------------------------------------------------------*/ - -float128 float32_to_float128( float32 a ) -{ - flag aSign; - int16 aExp; - bits32 aSig; - - aSig = extractFloat32Frac( a ); - aExp = extractFloat32Exp( a ); - aSign = extractFloat32Sign( a ); - if ( aExp == 0xFF ) { - if ( aSig ) return commonNaNToFloat128( float32ToCommonNaN( a ) ); - return packFloat128( aSign, 0x7FFF, 0, 0 ); - } - if ( aExp == 0 ) { - if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 ); - normalizeFloat32Subnormal( aSig, &aExp, &aSig ); - --aExp; - } - return packFloat128( aSign, aExp + 0x3F80, ( (bits64) aSig )<<25, 0 ); - -} - -#endif - -/*---------------------------------------------------------------------------- -| Rounds the single-precision floating-point value `a' to an integer, and -| returns the result as a single-precision floating-point value. The -| operation is performed according to the IEC/IEEE Standard for Binary -| Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -float32 float32_round_to_int( float32 a ) -{ - flag aSign; - int16 aExp; - bits32 lastBitMask, roundBitsMask; - int8 roundingMode; - float32 z; - - aExp = extractFloat32Exp( a ); - if ( 0x96 <= aExp ) { - if ( ( aExp == 0xFF ) && extractFloat32Frac( a ) ) { - return propagateFloat32NaN( a, a ); - } - return a; - } - if ( aExp <= 0x7E ) { - if ( (bits32) ( a<<1 ) == 0 ) return a; - float_exception_flags |= float_flag_inexact; - aSign = extractFloat32Sign( a ); - switch ( float_rounding_mode ) { - case float_round_nearest_even: - if ( ( aExp == 0x7E ) && extractFloat32Frac( a ) ) { - return packFloat32( aSign, 0x7F, 0 ); - } - break; - case float_round_down: - return aSign ? 0xBF800000 : 0; - case float_round_up: - return aSign ? 0x80000000 : 0x3F800000; - } - return packFloat32( aSign, 0, 0 ); - } - lastBitMask = 1; - lastBitMask <<= 0x96 - aExp; - roundBitsMask = lastBitMask - 1; - z = a; - roundingMode = float_rounding_mode; - if ( roundingMode == float_round_nearest_even ) { - z += lastBitMask>>1; - if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask; - } - else if ( roundingMode != float_round_to_zero ) { - if ( extractFloat32Sign( z ) ^ ( roundingMode == float_round_up ) ) { - z += roundBitsMask; - } - } - z &= ~ roundBitsMask; - if ( z != a ) float_exception_flags |= float_flag_inexact; - return z; - -} - -/*---------------------------------------------------------------------------- -| Returns the result of adding the absolute values of the single-precision -| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated -| before being returned. `zSign' is ignored if the result is a NaN. -| The addition is performed according to the IEC/IEEE Standard for Binary -| Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -static float32 addFloat32Sigs( float32 a, float32 b, flag zSign ) -{ - int16 aExp, bExp, zExp; - bits32 aSig, bSig, zSig; - int16 expDiff; - - aSig = extractFloat32Frac( a ); - aExp = extractFloat32Exp( a ); - bSig = extractFloat32Frac( b ); - bExp = extractFloat32Exp( b ); - expDiff = aExp - bExp; - aSig <<= 6; - bSig <<= 6; - if ( 0 < expDiff ) { - if ( aExp == 0xFF ) { - if ( aSig ) return propagateFloat32NaN( a, b ); - return a; - } - if ( bExp == 0 ) { - --expDiff; - } - else { - bSig |= 0x20000000; - } - shift32RightJamming( bSig, expDiff, &bSig ); - zExp = aExp; - } - else if ( expDiff < 0 ) { - if ( bExp == 0xFF ) { - if ( bSig ) return propagateFloat32NaN( a, b ); - return packFloat32( zSign, 0xFF, 0 ); - } - if ( aExp == 0 ) { - ++expDiff; - } - else { - aSig |= 0x20000000; - } - shift32RightJamming( aSig, - expDiff, &aSig ); - zExp = bExp; - } - else { - if ( aExp == 0xFF ) { - if ( aSig | bSig ) return propagateFloat32NaN( a, b ); - return a; - } - if ( aExp == 0 ) return packFloat32( zSign, 0, ( aSig + bSig )>>6 ); - zSig = 0x40000000 + aSig + bSig; - zExp = aExp; - goto roundAndPack; - } - aSig |= 0x20000000; - zSig = ( aSig + bSig )<<1; - --zExp; - if ( (sbits32) zSig < 0 ) { - zSig = aSig + bSig; - ++zExp; - } - roundAndPack: - return roundAndPackFloat32( zSign, zExp, zSig ); - -} - -/*---------------------------------------------------------------------------- -| Returns the result of subtracting the absolute values of the single- -| precision floating-point values `a' and `b'. If `zSign' is 1, the -| difference is negated before being returned. `zSign' is ignored if the -| result is a NaN. The subtraction is performed according to the IEC/IEEE -| Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -static float32 subFloat32Sigs( float32 a, float32 b, flag zSign ) -{ - int16 aExp, bExp, zExp; - bits32 aSig, bSig, zSig; - int16 expDiff; - - aSig = extractFloat32Frac( a ); - aExp = extractFloat32Exp( a ); - bSig = extractFloat32Frac( b ); - bExp = extractFloat32Exp( b ); - expDiff = aExp - bExp; - aSig <<= 7; - bSig <<= 7; - if ( 0 < expDiff ) goto aExpBigger; - if ( expDiff < 0 ) goto bExpBigger; - if ( aExp == 0xFF ) { - if ( aSig | bSig ) return propagateFloat32NaN( a, b ); - float_raise( float_flag_invalid ); - return float32_default_nan; - } - if ( aExp == 0 ) { - aExp = 1; - bExp = 1; - } - if ( bSig < aSig ) goto aBigger; - if ( aSig < bSig ) goto bBigger; - return packFloat32( float_rounding_mode == float_round_down, 0, 0 ); - bExpBigger: - if ( bExp == 0xFF ) { - if ( bSig ) return propagateFloat32NaN( a, b ); - return packFloat32( zSign ^ 1, 0xFF, 0 ); - } - if ( aExp == 0 ) { - ++expDiff; - } - else { - aSig |= 0x40000000; - } - shift32RightJamming( aSig, - expDiff, &aSig ); - bSig |= 0x40000000; - bBigger: - zSig = bSig - aSig; - zExp = bExp; - zSign ^= 1; - goto normalizeRoundAndPack; - aExpBigger: - if ( aExp == 0xFF ) { - if ( aSig ) return propagateFloat32NaN( a, b ); - return a; - } - if ( bExp == 0 ) { - --expDiff; - } - else { - bSig |= 0x40000000; - } - shift32RightJamming( bSig, expDiff, &bSig ); - aSig |= 0x40000000; - aBigger: - zSig = aSig - bSig; - zExp = aExp; - normalizeRoundAndPack: - --zExp; - return normalizeRoundAndPackFloat32( zSign, zExp, zSig ); - -} - -/*---------------------------------------------------------------------------- -| Returns the result of adding the single-precision floating-point values `a' -| and `b'. The operation is performed according to the IEC/IEEE Standard for -| Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -float32 float32_add( float32 a, float32 b ) -{ - flag aSign, bSign; - - aSign = extractFloat32Sign( a ); - bSign = extractFloat32Sign( b ); - if ( aSign == bSign ) { - return addFloat32Sigs( a, b, aSign ); - } - else { - return subFloat32Sigs( a, b, aSign ); - } - -} - -/*---------------------------------------------------------------------------- -| Returns the result of subtracting the single-precision floating-point values -| `a' and `b'. The operation is performed according to the IEC/IEEE Standard -| for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -float32 float32_sub( float32 a, float32 b ) -{ - flag aSign, bSign; - - aSign = extractFloat32Sign( a ); - bSign = extractFloat32Sign( b ); - if ( aSign == bSign ) { - return subFloat32Sigs( a, b, aSign ); - } - else { - return addFloat32Sigs( a, b, aSign ); - } - -} - -/*---------------------------------------------------------------------------- -| Returns the result of multiplying the single-precision floating-point values -| `a' and `b'. The operation is performed according to the IEC/IEEE Standard -| for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -float32 float32_mul( float32 a, float32 b ) -{ - flag aSign, bSign, zSign; - int16 aExp, bExp, zExp; - bits32 aSig, bSig; - bits64 zSig64; - bits32 zSig; - - aSig = extractFloat32Frac( a ); - aExp = extractFloat32Exp( a ); - aSign = extractFloat32Sign( a ); - bSig = extractFloat32Frac( b ); - bExp = extractFloat32Exp( b ); - bSign = extractFloat32Sign( b ); - zSign = aSign ^ bSign; - if ( aExp == 0xFF ) { - if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) { - return propagateFloat32NaN( a, b ); - } - if ( ( bExp | bSig ) == 0 ) { - float_raise( float_flag_invalid ); - return float32_default_nan; - } - return packFloat32( zSign, 0xFF, 0 ); - } - if ( bExp == 0xFF ) { - if ( bSig ) return propagateFloat32NaN( a, b ); - if ( ( aExp | aSig ) == 0 ) { - float_raise( float_flag_invalid ); - return float32_default_nan; - } - return packFloat32( zSign, 0xFF, 0 ); - } - if ( aExp == 0 ) { - if ( aSig == 0 ) return packFloat32( zSign, 0, 0 ); - normalizeFloat32Subnormal( aSig, &aExp, &aSig ); - } - if ( bExp == 0 ) { - if ( bSig == 0 ) return packFloat32( zSign, 0, 0 ); - normalizeFloat32Subnormal( bSig, &bExp, &bSig ); - } - zExp = aExp + bExp - 0x7F; - aSig = ( aSig | 0x00800000 )<<7; - bSig = ( bSig | 0x00800000 )<<8; - shift64RightJamming( ( (bits64) aSig ) * bSig, 32, &zSig64 ); - zSig = zSig64; - if ( 0 <= (sbits32) ( zSig<<1 ) ) { - zSig <<= 1; - --zExp; - } - return roundAndPackFloat32( zSign, zExp, zSig ); - -} - -/*---------------------------------------------------------------------------- -| Returns the result of dividing the single-precision floating-point value `a' -| by the corresponding value `b'. The operation is performed according to the -| IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -float32 float32_div( float32 a, float32 b ) -{ - flag aSign, bSign, zSign; - int16 aExp, bExp, zExp; - bits32 aSig, bSig, zSig; - - aSig = extractFloat32Frac( a ); - aExp = extractFloat32Exp( a ); - aSign = extractFloat32Sign( a ); - bSig = extractFloat32Frac( b ); - bExp = extractFloat32Exp( b ); - bSign = extractFloat32Sign( b ); - zSign = aSign ^ bSign; - if ( aExp == 0xFF ) { - if ( aSig ) return propagateFloat32NaN( a, b ); - if ( bExp == 0xFF ) { - if ( bSig ) return propagateFloat32NaN( a, b ); - float_raise( float_flag_invalid ); - return float32_default_nan; - } - return packFloat32( zSign, 0xFF, 0 ); - } - if ( bExp == 0xFF ) { - if ( bSig ) return propagateFloat32NaN( a, b ); - return packFloat32( zSign, 0, 0 ); - } - if ( bExp == 0 ) { - if ( bSig == 0 ) { - if ( ( aExp | aSig ) == 0 ) { - float_raise( float_flag_invalid ); - return float32_default_nan; - } - float_raise( float_flag_divbyzero ); - return packFloat32( zSign, 0xFF, 0 ); - } - normalizeFloat32Subnormal( bSig, &bExp, &bSig ); - } - if ( aExp == 0 ) { - if ( aSig == 0 ) return packFloat32( zSign, 0, 0 ); - normalizeFloat32Subnormal( aSig, &aExp, &aSig ); - } - zExp = aExp - bExp + 0x7D; - aSig = ( aSig | 0x00800000 )<<7; - bSig = ( bSig | 0x00800000 )<<8; - if ( bSig <= ( aSig + aSig ) ) { - aSig >>= 1; - ++zExp; - } - zSig = ( ( (bits64) aSig )<<32 ) / bSig; - if ( ( zSig & 0x3F ) == 0 ) { - zSig |= ( (bits64) bSig * zSig != ( (bits64) aSig )<<32 ); - } - return roundAndPackFloat32( zSign, zExp, zSig ); - -} - -/*---------------------------------------------------------------------------- -| Returns the remainder of the single-precision floating-point value `a' -| with respect to the corresponding value `b'. The operation is performed -| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -float32 float32_rem( float32 a, float32 b ) -{ - flag aSign, zSign; - int16 aExp, bExp, expDiff; - bits32 aSig, bSig; - bits32 q; - bits64 aSig64, bSig64, q64; - bits32 alternateASig; - sbits32 sigMean; - - aSig = extractFloat32Frac( a ); - aExp = extractFloat32Exp( a ); - aSign = extractFloat32Sign( a ); - bSig = extractFloat32Frac( b ); - bExp = extractFloat32Exp( b ); -// bSign = extractFloat32Sign( b ); - if ( aExp == 0xFF ) { - if ( aSig || ( ( bExp == 0xFF ) && bSig ) ) { - return propagateFloat32NaN( a, b ); - } - float_raise( float_flag_invalid ); - return float32_default_nan; - } - if ( bExp == 0xFF ) { - if ( bSig ) return propagateFloat32NaN( a, b ); - return a; - } - if ( bExp == 0 ) { - if ( bSig == 0 ) { - float_raise( float_flag_invalid ); - return float32_default_nan; - } - normalizeFloat32Subnormal( bSig, &bExp, &bSig ); - } - if ( aExp == 0 ) { - if ( aSig == 0 ) return a; - normalizeFloat32Subnormal( aSig, &aExp, &aSig ); - } - expDiff = aExp - bExp; - aSig |= 0x00800000; - bSig |= 0x00800000; - if ( expDiff < 32 ) { - aSig <<= 8; - bSig <<= 8; - if ( expDiff < 0 ) { - if ( expDiff < -1 ) return a; - aSig >>= 1; - } - q = ( bSig <= aSig ); - if ( q ) aSig -= bSig; - if ( 0 < expDiff ) { - q = ( ( (bits64) aSig )<<32 ) / bSig; - q >>= 32 - expDiff; - bSig >>= 2; - aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q; - } - else { - aSig >>= 2; - bSig >>= 2; - } - } - else { - if ( bSig <= aSig ) aSig -= bSig; - aSig64 = ( (bits64) aSig )<<40; - bSig64 = ( (bits64) bSig )<<40; - expDiff -= 64; - while ( 0 < expDiff ) { - q64 = estimateDiv128To64( aSig64, 0, bSig64 ); - q64 = ( 2 < q64 ) ? q64 - 2 : 0; - aSig64 = - ( ( bSig * q64 )<<38 ); - expDiff -= 62; - } - expDiff += 64; - q64 = estimateDiv128To64( aSig64, 0, bSig64 ); - q64 = ( 2 < q64 ) ? q64 - 2 : 0; - q = q64>>( 64 - expDiff ); - bSig <<= 6; - aSig = ( ( aSig64>>33 )<<( expDiff - 1 ) ) - bSig * q; - } - do { - alternateASig = aSig; - ++q; - aSig -= bSig; - } while ( 0 <= (sbits32) aSig ); - sigMean = aSig + alternateASig; - if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) { - aSig = alternateASig; - } - zSign = ( (sbits32) aSig < 0 ); - if ( zSign ) aSig = - aSig; - return normalizeRoundAndPackFloat32( aSign ^ zSign, bExp, aSig ); - -} - -/*---------------------------------------------------------------------------- -| Returns the square root of the single-precision floating-point value `a'. -| The operation is performed according to the IEC/IEEE Standard for Binary -| Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -float32 float32_sqrt( float32 a ) -{ - flag aSign; - int16 aExp, zExp; - bits32 aSig, zSig; - bits64 rem, term; - - aSig = extractFloat32Frac( a ); - aExp = extractFloat32Exp( a ); - aSign = extractFloat32Sign( a ); - if ( aExp == 0xFF ) { - if ( aSig ) return propagateFloat32NaN( a, 0 ); - if ( ! aSign ) return a; - float_raise( float_flag_invalid ); - return float32_default_nan; - } - if ( aSign ) { - if ( ( aExp | aSig ) == 0 ) return a; - float_raise( float_flag_invalid ); - return float32_default_nan; - } - if ( aExp == 0 ) { - if ( aSig == 0 ) return 0; - normalizeFloat32Subnormal( aSig, &aExp, &aSig ); - } - zExp = ( ( aExp - 0x7F )>>1 ) + 0x7E; - aSig = ( aSig | 0x00800000 )<<8; - zSig = estimateSqrt32( aExp, aSig ) + 2; - if ( ( zSig & 0x7F ) <= 5 ) { - if ( zSig < 2 ) { - zSig = 0x7FFFFFFF; - goto roundAndPack; - } - aSig >>= aExp & 1; - term = ( (bits64) zSig ) * zSig; - rem = ( ( (bits64) aSig )<<32 ) - term; - while ( (sbits64) rem < 0 ) { - --zSig; - rem += ( ( (bits64) zSig )<<1 ) | 1; - } - zSig |= ( rem != 0 ); - } - shift32RightJamming( zSig, 1, &zSig ); - roundAndPack: - return roundAndPackFloat32( 0, zExp, zSig ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the single-precision floating-point value `a' is equal to -| the corresponding value `b', and 0 otherwise. The comparison is performed -| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -flag float32_eq( float32 a, float32 b ) -{ - if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) - || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) - ) { - if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) { - float_raise( float_flag_invalid ); - } - return 0; - } - return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the single-precision floating-point value `a' is less than -| or equal to the corresponding value `b', and 0 otherwise. The comparison -| is performed according to the IEC/IEEE Standard for Binary Floating-Point -| Arithmetic. -*----------------------------------------------------------------------------*/ - -flag float32_le( float32 a, float32 b ) -{ - flag aSign, bSign; - - if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) - || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) - ) { - float_raise( float_flag_invalid ); - return 0; - } - aSign = extractFloat32Sign( a ); - bSign = extractFloat32Sign( b ); - if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 ); - return ( a == b ) || ( aSign ^ ( a < b ) ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the single-precision floating-point value `a' is less than -| the corresponding value `b', and 0 otherwise. The comparison is performed -| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -flag float32_lt( float32 a, float32 b ) -{ - flag aSign, bSign; - - if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) - || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) - ) { - float_raise( float_flag_invalid ); - return 0; - } - aSign = extractFloat32Sign( a ); - bSign = extractFloat32Sign( b ); - if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 ); - return ( a != b ) && ( aSign ^ ( a < b ) ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the single-precision floating-point value `a' is equal to -| the corresponding value `b', and 0 otherwise. The invalid exception is -| raised if either operand is a NaN. Otherwise, the comparison is performed -| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -flag float32_eq_signaling( float32 a, float32 b ) -{ - if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) - || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) - ) { - float_raise( float_flag_invalid ); - return 0; - } - return ( a == b ) || ( (bits32) ( ( a | b )<<1 ) == 0 ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the single-precision floating-point value `a' is less than or -| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not -| cause an exception. Otherwise, the comparison is performed according to the -| IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -flag float32_le_quiet( float32 a, float32 b ) -{ - flag aSign, bSign; -// int16 aExp, bExp; - - if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) - || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) - ) { - if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) { - float_raise( float_flag_invalid ); - } - return 0; - } - aSign = extractFloat32Sign( a ); - bSign = extractFloat32Sign( b ); - if ( aSign != bSign ) return aSign || ( (bits32) ( ( a | b )<<1 ) == 0 ); - return ( a == b ) || ( aSign ^ ( a < b ) ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the single-precision floating-point value `a' is less than -| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an -| exception. Otherwise, the comparison is performed according to the IEC/IEEE -| Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -flag float32_lt_quiet( float32 a, float32 b ) -{ - flag aSign, bSign; - - if ( ( ( extractFloat32Exp( a ) == 0xFF ) && extractFloat32Frac( a ) ) - || ( ( extractFloat32Exp( b ) == 0xFF ) && extractFloat32Frac( b ) ) - ) { - if ( float32_is_signaling_nan( a ) || float32_is_signaling_nan( b ) ) { - float_raise( float_flag_invalid ); - } - return 0; - } - aSign = extractFloat32Sign( a ); - bSign = extractFloat32Sign( b ); - if ( aSign != bSign ) return aSign && ( (bits32) ( ( a | b )<<1 ) != 0 ); - return ( a != b ) && ( aSign ^ ( a < b ) ); - -} - -/*---------------------------------------------------------------------------- -| Returns the result of converting the double-precision floating-point value -| `a' to the 32-bit two's complement integer format. The conversion is -| performed according to the IEC/IEEE Standard for Binary Floating-Point -| Arithmetic---which means in particular that the conversion is rounded -| according to the current rounding mode. If `a' is a NaN, the largest -| positive integer is returned. Otherwise, if the conversion overflows, the -| largest integer with the same sign as `a' is returned. -*----------------------------------------------------------------------------*/ - -int32 float64_to_int32( float64 a ) -{ - flag aSign; - int16 aExp, shiftCount; - bits64 aSig; - - aSig = extractFloat64Frac( a ); - aExp = extractFloat64Exp( a ); - aSign = extractFloat64Sign( a ); - if ( ( aExp == 0x7FF ) && aSig ) aSign = 0; - if ( aExp ) aSig |= LIT64( 0x0010000000000000 ); - shiftCount = 0x42C - aExp; - if ( 0 < shiftCount ) shift64RightJamming( aSig, shiftCount, &aSig ); - return roundAndPackInt32( aSign, aSig ); - -} - -/*---------------------------------------------------------------------------- -| Returns the result of converting the double-precision floating-point value -| `a' to the 32-bit two's complement integer format. The conversion is -| performed according to the IEC/IEEE Standard for Binary Floating-Point -| Arithmetic, except that the conversion is always rounded toward zero. -| If `a' is a NaN, the largest positive integer is returned. Otherwise, if -| the conversion overflows, the largest integer with the same sign as `a' is -| returned. -*----------------------------------------------------------------------------*/ - -int32 float64_to_int32_round_to_zero( float64 a ) -{ - flag aSign; - int16 aExp, shiftCount; - bits64 aSig, savedASig; - int32 z; - - aSig = extractFloat64Frac( a ); - aExp = extractFloat64Exp( a ); - aSign = extractFloat64Sign( a ); - if ( 0x41E < aExp ) { - if ( ( aExp == 0x7FF ) && aSig ) aSign = 0; - goto invalid; - } - else if ( aExp < 0x3FF ) { - if ( aExp || aSig ) float_exception_flags |= float_flag_inexact; - return 0; - } - aSig |= LIT64( 0x0010000000000000 ); - shiftCount = 0x433 - aExp; - savedASig = aSig; - aSig >>= shiftCount; - z = aSig; - if ( aSign ) z = - z; - if ( ( z < 0 ) ^ aSign ) { - invalid: - float_raise( float_flag_invalid ); - return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF; - } - if ( ( aSig<>( - shiftCount ); - if ( (bits64) ( aSig<<( shiftCount & 63 ) ) ) { - float_exception_flags |= float_flag_inexact; - } - } - if ( aSign ) z = - z; - return z; - -} - -/*---------------------------------------------------------------------------- -| Returns the result of converting the double-precision floating-point value -| `a' to the single-precision floating-point format. The conversion is -| performed according to the IEC/IEEE Standard for Binary Floating-Point -| Arithmetic. -*----------------------------------------------------------------------------*/ - -float32 float64_to_float32( float64 a ) -{ - flag aSign; - int16 aExp; - bits64 aSig; - bits32 zSig; - - aSig = extractFloat64Frac( a ); - aExp = extractFloat64Exp( a ); - aSign = extractFloat64Sign( a ); - if ( aExp == 0x7FF ) { - if ( aSig ) return commonNaNToFloat32( float64ToCommonNaN( a ) ); - return packFloat32( aSign, 0xFF, 0 ); - } - shift64RightJamming( aSig, 22, &aSig ); - zSig = aSig; - if ( aExp || zSig ) { - zSig |= 0x40000000; - aExp -= 0x381; - } - return roundAndPackFloat32( aSign, aExp, zSig ); - -} - -#ifdef FLOATX80 - -/*---------------------------------------------------------------------------- -| Returns the result of converting the double-precision floating-point value -| `a' to the extended double-precision floating-point format. The conversion -| is performed according to the IEC/IEEE Standard for Binary Floating-Point -| Arithmetic. -*----------------------------------------------------------------------------*/ - -floatx80 float64_to_floatx80( float64 a ) -{ - flag aSign; - int16 aExp; - bits64 aSig; - - aSig = extractFloat64Frac( a ); - aExp = extractFloat64Exp( a ); - aSign = extractFloat64Sign( a ); - if ( aExp == 0x7FF ) { - if ( aSig ) return commonNaNToFloatx80( float64ToCommonNaN( a ) ); - return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); - } - if ( aExp == 0 ) { - if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 ); - normalizeFloat64Subnormal( aSig, &aExp, &aSig ); - } - return - packFloatx80( - aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 ); - -} - -#endif - -#ifdef FLOAT128 - -/*---------------------------------------------------------------------------- -| Returns the result of converting the double-precision floating-point value -| `a' to the quadruple-precision floating-point format. The conversion is -| performed according to the IEC/IEEE Standard for Binary Floating-Point -| Arithmetic. -*----------------------------------------------------------------------------*/ - -float128 float64_to_float128( float64 a ) -{ - flag aSign; - int16 aExp; - bits64 aSig, zSig0, zSig1; - - aSig = extractFloat64Frac( a ); - aExp = extractFloat64Exp( a ); - aSign = extractFloat64Sign( a ); - if ( aExp == 0x7FF ) { - if ( aSig ) return commonNaNToFloat128( float64ToCommonNaN( a ) ); - return packFloat128( aSign, 0x7FFF, 0, 0 ); - } - if ( aExp == 0 ) { - if ( aSig == 0 ) return packFloat128( aSign, 0, 0, 0 ); - normalizeFloat64Subnormal( aSig, &aExp, &aSig ); - --aExp; - } - shift128Right( aSig, 0, 4, &zSig0, &zSig1 ); - return packFloat128( aSign, aExp + 0x3C00, zSig0, zSig1 ); - -} - -#endif - -/*---------------------------------------------------------------------------- -| Rounds the double-precision floating-point value `a' to an integer, and -| returns the result as a double-precision floating-point value. The -| operation is performed according to the IEC/IEEE Standard for Binary -| Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -float64 float64_round_to_int( float64 a ) -{ - flag aSign; - int16 aExp; - bits64 lastBitMask, roundBitsMask; - int8 roundingMode; - float64 z; - - aExp = extractFloat64Exp( a ); - if ( 0x433 <= aExp ) { - if ( ( aExp == 0x7FF ) && extractFloat64Frac( a ) ) { - return propagateFloat64NaN( a, a ); - } - return a; - } - if ( aExp < 0x3FF ) { - if ( (bits64) ( a<<1 ) == 0 ) return a; - float_exception_flags |= float_flag_inexact; - aSign = extractFloat64Sign( a ); - switch ( float_rounding_mode ) { - case float_round_nearest_even: - if ( ( aExp == 0x3FE ) && extractFloat64Frac( a ) ) { - return packFloat64( aSign, 0x3FF, 0 ); - } - break; - case float_round_down: - return aSign ? LIT64( 0xBFF0000000000000 ) : 0; - case float_round_up: - return - aSign ? LIT64( 0x8000000000000000 ) : LIT64( 0x3FF0000000000000 ); - } - return packFloat64( aSign, 0, 0 ); - } - lastBitMask = 1; - lastBitMask <<= 0x433 - aExp; - roundBitsMask = lastBitMask - 1; - z = a; - roundingMode = float_rounding_mode; - if ( roundingMode == float_round_nearest_even ) { - z += lastBitMask>>1; - if ( ( z & roundBitsMask ) == 0 ) z &= ~ lastBitMask; - } - else if ( roundingMode != float_round_to_zero ) { - if ( extractFloat64Sign( z ) ^ ( roundingMode == float_round_up ) ) { - z += roundBitsMask; - } - } - z &= ~ roundBitsMask; - if ( z != a ) float_exception_flags |= float_flag_inexact; - return z; - -} - -/*---------------------------------------------------------------------------- -| Returns the result of adding the absolute values of the double-precision -| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated -| before being returned. `zSign' is ignored if the result is a NaN. -| The addition is performed according to the IEC/IEEE Standard for Binary -| Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -static float64 addFloat64Sigs( float64 a, float64 b, flag zSign ) -{ - int16 aExp, bExp, zExp; - bits64 aSig, bSig, zSig; - int16 expDiff; - - aSig = extractFloat64Frac( a ); - aExp = extractFloat64Exp( a ); - bSig = extractFloat64Frac( b ); - bExp = extractFloat64Exp( b ); - expDiff = aExp - bExp; - aSig <<= 9; - bSig <<= 9; - if ( 0 < expDiff ) { - if ( aExp == 0x7FF ) { - if ( aSig ) return propagateFloat64NaN( a, b ); - return a; - } - if ( bExp == 0 ) { - --expDiff; - } - else { - bSig |= LIT64( 0x2000000000000000 ); - } - shift64RightJamming( bSig, expDiff, &bSig ); - zExp = aExp; - } - else if ( expDiff < 0 ) { - if ( bExp == 0x7FF ) { - if ( bSig ) return propagateFloat64NaN( a, b ); - return packFloat64( zSign, 0x7FF, 0 ); - } - if ( aExp == 0 ) { - ++expDiff; - } - else { - aSig |= LIT64( 0x2000000000000000 ); - } - shift64RightJamming( aSig, - expDiff, &aSig ); - zExp = bExp; - } - else { - if ( aExp == 0x7FF ) { - if ( aSig | bSig ) return propagateFloat64NaN( a, b ); - return a; - } - if ( aExp == 0 ) return packFloat64( zSign, 0, ( aSig + bSig )>>9 ); - zSig = LIT64( 0x4000000000000000 ) + aSig + bSig; - zExp = aExp; - goto roundAndPack; - } - aSig |= LIT64( 0x2000000000000000 ); - zSig = ( aSig + bSig )<<1; - --zExp; - if ( (sbits64) zSig < 0 ) { - zSig = aSig + bSig; - ++zExp; - } - roundAndPack: - return roundAndPackFloat64( zSign, zExp, zSig ); - -} - -/*---------------------------------------------------------------------------- -| Returns the result of subtracting the absolute values of the double- -| precision floating-point values `a' and `b'. If `zSign' is 1, the -| difference is negated before being returned. `zSign' is ignored if the -| result is a NaN. The subtraction is performed according to the IEC/IEEE -| Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -static float64 subFloat64Sigs( float64 a, float64 b, flag zSign ) -{ - int16 aExp, bExp, zExp; - bits64 aSig, bSig, zSig; - int16 expDiff; - - aSig = extractFloat64Frac( a ); - aExp = extractFloat64Exp( a ); - bSig = extractFloat64Frac( b ); - bExp = extractFloat64Exp( b ); - expDiff = aExp - bExp; - aSig <<= 10; - bSig <<= 10; - if ( 0 < expDiff ) goto aExpBigger; - if ( expDiff < 0 ) goto bExpBigger; - if ( aExp == 0x7FF ) { - if ( aSig | bSig ) return propagateFloat64NaN( a, b ); - float_raise( float_flag_invalid ); - return float64_default_nan; - } - if ( aExp == 0 ) { - aExp = 1; - bExp = 1; - } - if ( bSig < aSig ) goto aBigger; - if ( aSig < bSig ) goto bBigger; - return packFloat64( float_rounding_mode == float_round_down, 0, 0 ); - bExpBigger: - if ( bExp == 0x7FF ) { - if ( bSig ) return propagateFloat64NaN( a, b ); - return packFloat64( zSign ^ 1, 0x7FF, 0 ); - } - if ( aExp == 0 ) { - ++expDiff; - } - else { - aSig |= LIT64( 0x4000000000000000 ); - } - shift64RightJamming( aSig, - expDiff, &aSig ); - bSig |= LIT64( 0x4000000000000000 ); - bBigger: - zSig = bSig - aSig; - zExp = bExp; - zSign ^= 1; - goto normalizeRoundAndPack; - aExpBigger: - if ( aExp == 0x7FF ) { - if ( aSig ) return propagateFloat64NaN( a, b ); - return a; - } - if ( bExp == 0 ) { - --expDiff; - } - else { - bSig |= LIT64( 0x4000000000000000 ); - } - shift64RightJamming( bSig, expDiff, &bSig ); - aSig |= LIT64( 0x4000000000000000 ); - aBigger: - zSig = aSig - bSig; - zExp = aExp; - normalizeRoundAndPack: - --zExp; - return normalizeRoundAndPackFloat64( zSign, zExp, zSig ); - -} - -/*---------------------------------------------------------------------------- -| Returns the result of adding the double-precision floating-point values `a' -| and `b'. The operation is performed according to the IEC/IEEE Standard for -| Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -float64 float64_add( float64 a, float64 b ) -{ - flag aSign, bSign; - - aSign = extractFloat64Sign( a ); - bSign = extractFloat64Sign( b ); - if ( aSign == bSign ) { - return addFloat64Sigs( a, b, aSign ); - } - else { - return subFloat64Sigs( a, b, aSign ); - } - -} - -/*---------------------------------------------------------------------------- -| Returns the result of subtracting the double-precision floating-point values -| `a' and `b'. The operation is performed according to the IEC/IEEE Standard -| for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -float64 float64_sub( float64 a, float64 b ) -{ - flag aSign, bSign; - - aSign = extractFloat64Sign( a ); - bSign = extractFloat64Sign( b ); - if ( aSign == bSign ) { - return subFloat64Sigs( a, b, aSign ); - } - else { - return addFloat64Sigs( a, b, aSign ); - } - -} - -/*---------------------------------------------------------------------------- -| Returns the result of multiplying the double-precision floating-point values -| `a' and `b'. The operation is performed according to the IEC/IEEE Standard -| for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -float64 float64_mul( float64 a, float64 b ) -{ - flag aSign, bSign, zSign; - int16 aExp, bExp, zExp; - bits64 aSig, bSig, zSig0, zSig1; - - aSig = extractFloat64Frac( a ); - aExp = extractFloat64Exp( a ); - aSign = extractFloat64Sign( a ); - bSig = extractFloat64Frac( b ); - bExp = extractFloat64Exp( b ); - bSign = extractFloat64Sign( b ); - zSign = aSign ^ bSign; - if ( aExp == 0x7FF ) { - if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) { - return propagateFloat64NaN( a, b ); - } - if ( ( bExp | bSig ) == 0 ) { - float_raise( float_flag_invalid ); - return float64_default_nan; - } - return packFloat64( zSign, 0x7FF, 0 ); - } - if ( bExp == 0x7FF ) { - if ( bSig ) return propagateFloat64NaN( a, b ); - if ( ( aExp | aSig ) == 0 ) { - float_raise( float_flag_invalid ); - return float64_default_nan; - } - return packFloat64( zSign, 0x7FF, 0 ); - } - if ( aExp == 0 ) { - if ( aSig == 0 ) return packFloat64( zSign, 0, 0 ); - normalizeFloat64Subnormal( aSig, &aExp, &aSig ); - } - if ( bExp == 0 ) { - if ( bSig == 0 ) return packFloat64( zSign, 0, 0 ); - normalizeFloat64Subnormal( bSig, &bExp, &bSig ); - } - zExp = aExp + bExp - 0x3FF; - aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10; - bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11; - mul64To128( aSig, bSig, &zSig0, &zSig1 ); - zSig0 |= ( zSig1 != 0 ); - if ( 0 <= (sbits64) ( zSig0<<1 ) ) { - zSig0 <<= 1; - --zExp; - } - return roundAndPackFloat64( zSign, zExp, zSig0 ); - -} - -/*---------------------------------------------------------------------------- -| Returns the result of dividing the double-precision floating-point value `a' -| by the corresponding value `b'. The operation is performed according to -| the IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -float64 float64_div( float64 a, float64 b ) -{ - flag aSign, bSign, zSign; - int16 aExp, bExp, zExp; - bits64 aSig, bSig, zSig; - bits64 rem0, rem1; - bits64 term0, term1; - - aSig = extractFloat64Frac( a ); - aExp = extractFloat64Exp( a ); - aSign = extractFloat64Sign( a ); - bSig = extractFloat64Frac( b ); - bExp = extractFloat64Exp( b ); - bSign = extractFloat64Sign( b ); - zSign = aSign ^ bSign; - if ( aExp == 0x7FF ) { - if ( aSig ) return propagateFloat64NaN( a, b ); - if ( bExp == 0x7FF ) { - if ( bSig ) return propagateFloat64NaN( a, b ); - float_raise( float_flag_invalid ); - return float64_default_nan; - } - return packFloat64( zSign, 0x7FF, 0 ); - } - if ( bExp == 0x7FF ) { - if ( bSig ) return propagateFloat64NaN( a, b ); - return packFloat64( zSign, 0, 0 ); - } - if ( bExp == 0 ) { - if ( bSig == 0 ) { - if ( ( aExp | aSig ) == 0 ) { - float_raise( float_flag_invalid ); - return float64_default_nan; - } - float_raise( float_flag_divbyzero ); - return packFloat64( zSign, 0x7FF, 0 ); - } - normalizeFloat64Subnormal( bSig, &bExp, &bSig ); - } - if ( aExp == 0 ) { - if ( aSig == 0 ) return packFloat64( zSign, 0, 0 ); - normalizeFloat64Subnormal( aSig, &aExp, &aSig ); - } - zExp = aExp - bExp + 0x3FD; - aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<10; - bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11; - if ( bSig <= ( aSig + aSig ) ) { - aSig >>= 1; - ++zExp; - } - zSig = estimateDiv128To64( aSig, 0, bSig ); - if ( ( zSig & 0x1FF ) <= 2 ) { - mul64To128( bSig, zSig, &term0, &term1 ); - sub128( aSig, 0, term0, term1, &rem0, &rem1 ); - while ( (sbits64) rem0 < 0 ) { - --zSig; - add128( rem0, rem1, 0, bSig, &rem0, &rem1 ); - } - zSig |= ( rem1 != 0 ); - } - return roundAndPackFloat64( zSign, zExp, zSig ); - -} - -/*---------------------------------------------------------------------------- -| Returns the remainder of the double-precision floating-point value `a' -| with respect to the corresponding value `b'. The operation is performed -| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -float64 float64_rem( float64 a, float64 b ) -{ - flag aSign, zSign; - int16 aExp, bExp, expDiff; - bits64 aSig, bSig; - bits64 q, alternateASig; - sbits64 sigMean; - - aSig = extractFloat64Frac( a ); - aExp = extractFloat64Exp( a ); - aSign = extractFloat64Sign( a ); - bSig = extractFloat64Frac( b ); - bExp = extractFloat64Exp( b ); -// bSign = extractFloat64Sign( b ); - if ( aExp == 0x7FF ) { - if ( aSig || ( ( bExp == 0x7FF ) && bSig ) ) { - return propagateFloat64NaN( a, b ); - } - float_raise( float_flag_invalid ); - return float64_default_nan; - } - if ( bExp == 0x7FF ) { - if ( bSig ) return propagateFloat64NaN( a, b ); - return a; - } - if ( bExp == 0 ) { - if ( bSig == 0 ) { - float_raise( float_flag_invalid ); - return float64_default_nan; - } - normalizeFloat64Subnormal( bSig, &bExp, &bSig ); - } - if ( aExp == 0 ) { - if ( aSig == 0 ) return a; - normalizeFloat64Subnormal( aSig, &aExp, &aSig ); - } - expDiff = aExp - bExp; - aSig = ( aSig | LIT64( 0x0010000000000000 ) )<<11; - bSig = ( bSig | LIT64( 0x0010000000000000 ) )<<11; - if ( expDiff < 0 ) { - if ( expDiff < -1 ) return a; - aSig >>= 1; - } - q = ( bSig <= aSig ); - if ( q ) aSig -= bSig; - expDiff -= 64; - while ( 0 < expDiff ) { - q = estimateDiv128To64( aSig, 0, bSig ); - q = ( 2 < q ) ? q - 2 : 0; - aSig = - ( ( bSig>>2 ) * q ); - expDiff -= 62; - } - expDiff += 64; - if ( 0 < expDiff ) { - q = estimateDiv128To64( aSig, 0, bSig ); - q = ( 2 < q ) ? q - 2 : 0; - q >>= 64 - expDiff; - bSig >>= 2; - aSig = ( ( aSig>>1 )<<( expDiff - 1 ) ) - bSig * q; - } - else { - aSig >>= 2; - bSig >>= 2; - } - do { - alternateASig = aSig; - ++q; - aSig -= bSig; - } while ( 0 <= (sbits64) aSig ); - sigMean = aSig + alternateASig; - if ( ( sigMean < 0 ) || ( ( sigMean == 0 ) && ( q & 1 ) ) ) { - aSig = alternateASig; - } - zSign = ( (sbits64) aSig < 0 ); - if ( zSign ) aSig = - aSig; - return normalizeRoundAndPackFloat64( aSign ^ zSign, bExp, aSig ); - -} - -/*---------------------------------------------------------------------------- -| Returns the square root of the double-precision floating-point value `a'. -| The operation is performed according to the IEC/IEEE Standard for Binary -| Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -float64 float64_sqrt( float64 a ) -{ - flag aSign; - int16 aExp, zExp; - bits64 aSig, zSig, doubleZSig; - bits64 rem0, rem1, term0, term1; -// float64 z; - - aSig = extractFloat64Frac( a ); - aExp = extractFloat64Exp( a ); - aSign = extractFloat64Sign( a ); - if ( aExp == 0x7FF ) { - if ( aSig ) return propagateFloat64NaN( a, a ); - if ( ! aSign ) return a; - float_raise( float_flag_invalid ); - return float64_default_nan; - } - if ( aSign ) { - if ( ( aExp | aSig ) == 0 ) return a; - float_raise( float_flag_invalid ); - return float64_default_nan; - } - if ( aExp == 0 ) { - if ( aSig == 0 ) return 0; - normalizeFloat64Subnormal( aSig, &aExp, &aSig ); - } - zExp = ( ( aExp - 0x3FF )>>1 ) + 0x3FE; - aSig |= LIT64( 0x0010000000000000 ); - zSig = estimateSqrt32( aExp, aSig>>21 ); - aSig <<= 9 - ( aExp & 1 ); - zSig = estimateDiv128To64( aSig, 0, zSig<<32 ) + ( zSig<<30 ); - if ( ( zSig & 0x1FF ) <= 5 ) { - doubleZSig = zSig<<1; - mul64To128( zSig, zSig, &term0, &term1 ); - sub128( aSig, 0, term0, term1, &rem0, &rem1 ); - while ( (sbits64) rem0 < 0 ) { - --zSig; - doubleZSig -= 2; - add128( rem0, rem1, zSig>>63, doubleZSig | 1, &rem0, &rem1 ); - } - zSig |= ( ( rem0 | rem1 ) != 0 ); - } - return roundAndPackFloat64( 0, zExp, zSig ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the double-precision floating-point value `a' is equal to the -| corresponding value `b', and 0 otherwise. The comparison is performed -| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -flag float64_eq( float64 a, float64 b ) -{ - if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) - || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) - ) { - if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) { - float_raise( float_flag_invalid ); - } - return 0; - } - return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the double-precision floating-point value `a' is less than or -| equal to the corresponding value `b', and 0 otherwise. The comparison is -| performed according to the IEC/IEEE Standard for Binary Floating-Point -| Arithmetic. -*----------------------------------------------------------------------------*/ - -flag float64_le( float64 a, float64 b ) -{ - flag aSign, bSign; - - if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) - || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) - ) { - float_raise( float_flag_invalid ); - return 0; - } - aSign = extractFloat64Sign( a ); - bSign = extractFloat64Sign( b ); - if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 ); - return ( a == b ) || ( aSign ^ ( a < b ) ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the double-precision floating-point value `a' is less than -| the corresponding value `b', and 0 otherwise. The comparison is performed -| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -flag float64_lt( float64 a, float64 b ) -{ - flag aSign, bSign; - - if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) - || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) - ) { - float_raise( float_flag_invalid ); - return 0; - } - aSign = extractFloat64Sign( a ); - bSign = extractFloat64Sign( b ); - if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 ); - return ( a != b ) && ( aSign ^ ( a < b ) ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the double-precision floating-point value `a' is equal to the -| corresponding value `b', and 0 otherwise. The invalid exception is raised -| if either operand is a NaN. Otherwise, the comparison is performed -| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -flag float64_eq_signaling( float64 a, float64 b ) -{ - if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) - || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) - ) { - float_raise( float_flag_invalid ); - return 0; - } - return ( a == b ) || ( (bits64) ( ( a | b )<<1 ) == 0 ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the double-precision floating-point value `a' is less than or -| equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not -| cause an exception. Otherwise, the comparison is performed according to the -| IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -flag float64_le_quiet( float64 a, float64 b ) -{ - flag aSign, bSign; -// int16 aExp, bExp; - - if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) - || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) - ) { - if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) { - float_raise( float_flag_invalid ); - } - return 0; - } - aSign = extractFloat64Sign( a ); - bSign = extractFloat64Sign( b ); - if ( aSign != bSign ) return aSign || ( (bits64) ( ( a | b )<<1 ) == 0 ); - return ( a == b ) || ( aSign ^ ( a < b ) ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the double-precision floating-point value `a' is less than -| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an -| exception. Otherwise, the comparison is performed according to the IEC/IEEE -| Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -flag float64_lt_quiet( float64 a, float64 b ) -{ - flag aSign, bSign; - - if ( ( ( extractFloat64Exp( a ) == 0x7FF ) && extractFloat64Frac( a ) ) - || ( ( extractFloat64Exp( b ) == 0x7FF ) && extractFloat64Frac( b ) ) - ) { - if ( float64_is_signaling_nan( a ) || float64_is_signaling_nan( b ) ) { - float_raise( float_flag_invalid ); - } - return 0; - } - aSign = extractFloat64Sign( a ); - bSign = extractFloat64Sign( b ); - if ( aSign != bSign ) return aSign && ( (bits64) ( ( a | b )<<1 ) != 0 ); - return ( a != b ) && ( aSign ^ ( a < b ) ); - -} - -#ifdef FLOATX80 - -/*---------------------------------------------------------------------------- -| Returns the result of converting the extended double-precision floating- -| point value `a' to the 32-bit two's complement integer format. The -| conversion is performed according to the IEC/IEEE Standard for Binary -| Floating-Point Arithmetic---which means in particular that the conversion -| is rounded according to the current rounding mode. If `a' is a NaN, the -| largest positive integer is returned. Otherwise, if the conversion -| overflows, the largest integer with the same sign as `a' is returned. -*----------------------------------------------------------------------------*/ - -int32 floatx80_to_int32( floatx80 a ) -{ - flag aSign; - int32 aExp, shiftCount; - bits64 aSig; - - aSig = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - aSign = extractFloatx80Sign( a ); - if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0; - shiftCount = 0x4037 - aExp; - if ( shiftCount <= 0 ) shiftCount = 1; - shift64RightJamming( aSig, shiftCount, &aSig ); - return roundAndPackInt32( aSign, aSig ); - -} - -/*---------------------------------------------------------------------------- -| Returns the result of converting the extended double-precision floating- -| point value `a' to the 32-bit two's complement integer format. The -| conversion is performed according to the IEC/IEEE Standard for Binary -| Floating-Point Arithmetic, except that the conversion is always rounded -| toward zero. If `a' is a NaN, the largest positive integer is returned. -| Otherwise, if the conversion overflows, the largest integer with the same -| sign as `a' is returned. -*----------------------------------------------------------------------------*/ - -int32 floatx80_to_int32_round_to_zero( floatx80 a ) -{ - flag aSign; - int32 aExp, shiftCount; - bits64 aSig, savedASig; - int32 z; - - aSig = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - aSign = extractFloatx80Sign( a ); - if ( 0x401E < aExp ) { - if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0; - goto invalid; - } - else if ( aExp < 0x3FFF ) { - if ( aExp || aSig ) float_exception_flags |= float_flag_inexact; - return 0; - } - shiftCount = 0x403E - aExp; - savedASig = aSig; - aSig >>= shiftCount; - z = aSig; - if ( aSign ) z = - z; - if ( ( z < 0 ) ^ aSign ) { - invalid: - float_raise( float_flag_invalid ); - return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF; - } - if ( ( aSig<>( - shiftCount ); - if ( (bits64) ( aSig<<( shiftCount & 63 ) ) ) { - float_exception_flags |= float_flag_inexact; - } - if ( aSign ) z = - z; - return z; - -} - -/*---------------------------------------------------------------------------- -| Returns the result of converting the extended double-precision floating- -| point value `a' to the single-precision floating-point format. The -| conversion is performed according to the IEC/IEEE Standard for Binary -| Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -float32 floatx80_to_float32( floatx80 a ) -{ - flag aSign; - int32 aExp; - bits64 aSig; - - aSig = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - aSign = extractFloatx80Sign( a ); - if ( aExp == 0x7FFF ) { - if ( (bits64) ( aSig<<1 ) ) { - return commonNaNToFloat32( floatx80ToCommonNaN( a ) ); - } - return packFloat32( aSign, 0xFF, 0 ); - } - shift64RightJamming( aSig, 33, &aSig ); - if ( aExp || aSig ) aExp -= 0x3F81; - return roundAndPackFloat32( aSign, aExp, aSig ); - -} - -/*---------------------------------------------------------------------------- -| Returns the result of converting the extended double-precision floating- -| point value `a' to the double-precision floating-point format. The -| conversion is performed according to the IEC/IEEE Standard for Binary -| Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -float64 floatx80_to_float64( floatx80 a ) -{ - flag aSign; - int32 aExp; - bits64 aSig, zSig; - - aSig = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - aSign = extractFloatx80Sign( a ); - if ( aExp == 0x7FFF ) { - if ( (bits64) ( aSig<<1 ) ) { - return commonNaNToFloat64( floatx80ToCommonNaN( a ) ); - } - return packFloat64( aSign, 0x7FF, 0 ); - } - shift64RightJamming( aSig, 1, &zSig ); - if ( aExp || aSig ) aExp -= 0x3C01; - return roundAndPackFloat64( aSign, aExp, zSig ); - -} - -#ifdef FLOAT128 - -/*---------------------------------------------------------------------------- -| Returns the result of converting the extended double-precision floating- -| point value `a' to the quadruple-precision floating-point format. The -| conversion is performed according to the IEC/IEEE Standard for Binary -| Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -float128 floatx80_to_float128( floatx80 a ) -{ - flag aSign; - int16 aExp; - bits64 aSig, zSig0, zSig1; - - aSig = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - aSign = extractFloatx80Sign( a ); - if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) { - return commonNaNToFloat128( floatx80ToCommonNaN( a ) ); - } - shift128Right( aSig<<1, 0, 16, &zSig0, &zSig1 ); - return packFloat128( aSign, aExp, zSig0, zSig1 ); - -} - -#endif - -/*---------------------------------------------------------------------------- -| Rounds the extended double-precision floating-point value `a' to an integer, -| and returns the result as an extended quadruple-precision floating-point -| value. The operation is performed according to the IEC/IEEE Standard for -| Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -floatx80 floatx80_round_to_int( floatx80 a ) -{ - flag aSign; - int32 aExp; - bits64 lastBitMask, roundBitsMask; - int8 roundingMode; - floatx80 z; - - aExp = extractFloatx80Exp( a ); - if ( 0x403E <= aExp ) { - if ( ( aExp == 0x7FFF ) && (bits64) ( extractFloatx80Frac( a )<<1 ) ) { - return propagateFloatx80NaN( a, a ); - } - return a; - } - if ( aExp < 0x3FFF ) { - if ( ( aExp == 0 ) - && ( (bits64) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) { - return a; - } - float_exception_flags |= float_flag_inexact; - aSign = extractFloatx80Sign( a ); - switch ( float_rounding_mode ) { - case float_round_nearest_even: - if ( ( aExp == 0x3FFE ) && (bits64) ( extractFloatx80Frac( a )<<1 ) - ) { - return - packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) ); - } - break; - case float_round_down: - return - aSign ? - packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) ) - : packFloatx80( 0, 0, 0 ); - case float_round_up: - return - aSign ? packFloatx80( 1, 0, 0 ) - : packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) ); - } - return packFloatx80( aSign, 0, 0 ); - } - lastBitMask = 1; - lastBitMask <<= 0x403E - aExp; - roundBitsMask = lastBitMask - 1; - z = a; - roundingMode = float_rounding_mode; - if ( roundingMode == float_round_nearest_even ) { - z.low += lastBitMask>>1; - if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask; - } - else if ( roundingMode != float_round_to_zero ) { - if ( extractFloatx80Sign( z ) ^ ( roundingMode == float_round_up ) ) { - z.low += roundBitsMask; - } - } - z.low &= ~ roundBitsMask; - if ( z.low == 0 ) { - ++z.high; - z.low = LIT64( 0x8000000000000000 ); - } - if ( z.low != a.low ) float_exception_flags |= float_flag_inexact; - return z; - -} - -/*---------------------------------------------------------------------------- -| Returns the result of adding the absolute values of the extended double- -| precision floating-point values `a' and `b'. If `zSign' is 1, the sum is -| negated before being returned. `zSign' is ignored if the result is a NaN. -| The addition is performed according to the IEC/IEEE Standard for Binary -| Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -static floatx80 addFloatx80Sigs( floatx80 a, floatx80 b, flag zSign ) -{ - int32 aExp, bExp, zExp; - bits64 aSig, bSig, zSig0, zSig1; - int32 expDiff; - - aSig = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - bSig = extractFloatx80Frac( b ); - bExp = extractFloatx80Exp( b ); - expDiff = aExp - bExp; - if ( 0 < expDiff ) { - if ( aExp == 0x7FFF ) { - if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b ); - return a; - } - if ( bExp == 0 ) --expDiff; - shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 ); - zExp = aExp; - } - else if ( expDiff < 0 ) { - if ( bExp == 0x7FFF ) { - if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); - return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); - } - if ( aExp == 0 ) ++expDiff; - shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 ); - zExp = bExp; - } - else { - if ( aExp == 0x7FFF ) { - if ( (bits64) ( ( aSig | bSig )<<1 ) ) { - return propagateFloatx80NaN( a, b ); - } - return a; - } - zSig1 = 0; - zSig0 = aSig + bSig; - if ( aExp == 0 ) { - normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 ); - goto roundAndPack; - } - zExp = aExp; - goto shiftRight1; - } - zSig0 = aSig + bSig; - if ( (sbits64) zSig0 < 0 ) goto roundAndPack; - shiftRight1: - shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 ); - zSig0 |= LIT64( 0x8000000000000000 ); - ++zExp; - roundAndPack: - return - roundAndPackFloatx80( - floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 ); - -} - -/*---------------------------------------------------------------------------- -| Returns the result of subtracting the absolute values of the extended -| double-precision floating-point values `a' and `b'. If `zSign' is 1, the -| difference is negated before being returned. `zSign' is ignored if the -| result is a NaN. The subtraction is performed according to the IEC/IEEE -| Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -static floatx80 subFloatx80Sigs( floatx80 a, floatx80 b, flag zSign ) -{ - int32 aExp, bExp, zExp; - bits64 aSig, bSig, zSig0, zSig1; - int32 expDiff; - floatx80 z; - - aSig = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - bSig = extractFloatx80Frac( b ); - bExp = extractFloatx80Exp( b ); - expDiff = aExp - bExp; - if ( 0 < expDiff ) goto aExpBigger; - if ( expDiff < 0 ) goto bExpBigger; - if ( aExp == 0x7FFF ) { - if ( (bits64) ( ( aSig | bSig )<<1 ) ) { - return propagateFloatx80NaN( a, b ); - } - float_raise( float_flag_invalid ); - z.low = floatx80_default_nan_low; - z.high = floatx80_default_nan_high; - return z; - } - if ( aExp == 0 ) { - aExp = 1; - bExp = 1; - } - zSig1 = 0; - if ( bSig < aSig ) goto aBigger; - if ( aSig < bSig ) goto bBigger; - return packFloatx80( float_rounding_mode == float_round_down, 0, 0 ); - bExpBigger: - if ( bExp == 0x7FFF ) { - if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); - return packFloatx80( zSign ^ 1, 0x7FFF, LIT64( 0x8000000000000000 ) ); - } - if ( aExp == 0 ) ++expDiff; - shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 ); - bBigger: - sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 ); - zExp = bExp; - zSign ^= 1; - goto normalizeRoundAndPack; - aExpBigger: - if ( aExp == 0x7FFF ) { - if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b ); - return a; - } - if ( bExp == 0 ) --expDiff; - shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 ); - aBigger: - sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 ); - zExp = aExp; - normalizeRoundAndPack: - return - normalizeRoundAndPackFloatx80( - floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 ); - -} - -/*---------------------------------------------------------------------------- -| Returns the result of adding the extended double-precision floating-point -| values `a' and `b'. The operation is performed according to the IEC/IEEE -| Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -floatx80 floatx80_add( floatx80 a, floatx80 b ) -{ - flag aSign, bSign; - - aSign = extractFloatx80Sign( a ); - bSign = extractFloatx80Sign( b ); - if ( aSign == bSign ) { - return addFloatx80Sigs( a, b, aSign ); - } - else { - return subFloatx80Sigs( a, b, aSign ); - } - -} - -/*---------------------------------------------------------------------------- -| Returns the result of subtracting the extended double-precision floating- -| point values `a' and `b'. The operation is performed according to the -| IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -floatx80 floatx80_sub( floatx80 a, floatx80 b ) -{ - flag aSign, bSign; - - aSign = extractFloatx80Sign( a ); - bSign = extractFloatx80Sign( b ); - if ( aSign == bSign ) { - return subFloatx80Sigs( a, b, aSign ); - } - else { - return addFloatx80Sigs( a, b, aSign ); - } - -} - -/*---------------------------------------------------------------------------- -| Returns the result of multiplying the extended double-precision floating- -| point values `a' and `b'. The operation is performed according to the -| IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -floatx80 floatx80_mul( floatx80 a, floatx80 b ) -{ - flag aSign, bSign, zSign; - int32 aExp, bExp, zExp; - bits64 aSig, bSig, zSig0, zSig1; - floatx80 z; - - aSig = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - aSign = extractFloatx80Sign( a ); - bSig = extractFloatx80Frac( b ); - bExp = extractFloatx80Exp( b ); - bSign = extractFloatx80Sign( b ); - zSign = aSign ^ bSign; - if ( aExp == 0x7FFF ) { - if ( (bits64) ( aSig<<1 ) - || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) { - return propagateFloatx80NaN( a, b ); - } - if ( ( bExp | bSig ) == 0 ) goto invalid; - return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); - } - if ( bExp == 0x7FFF ) { - if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); - if ( ( aExp | aSig ) == 0 ) { - invalid: - float_raise( float_flag_invalid ); - z.low = floatx80_default_nan_low; - z.high = floatx80_default_nan_high; - return z; - } - return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); - } - if ( aExp == 0 ) { - if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 ); - normalizeFloatx80Subnormal( aSig, &aExp, &aSig ); - } - if ( bExp == 0 ) { - if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 ); - normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); - } - zExp = aExp + bExp - 0x3FFE; - mul64To128( aSig, bSig, &zSig0, &zSig1 ); - if ( 0 < (sbits64) zSig0 ) { - shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 ); - --zExp; - } - return - roundAndPackFloatx80( - floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 ); - -} - -/*---------------------------------------------------------------------------- -| Returns the result of dividing the extended double-precision floating-point -| value `a' by the corresponding value `b'. The operation is performed -| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -floatx80 floatx80_div( floatx80 a, floatx80 b ) -{ - flag aSign, bSign, zSign; - int32 aExp, bExp, zExp; - bits64 aSig, bSig, zSig0, zSig1; - bits64 rem0, rem1, rem2, term0, term1, term2; - floatx80 z; - - aSig = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - aSign = extractFloatx80Sign( a ); - bSig = extractFloatx80Frac( b ); - bExp = extractFloatx80Exp( b ); - bSign = extractFloatx80Sign( b ); - zSign = aSign ^ bSign; - if ( aExp == 0x7FFF ) { - if ( (bits64) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b ); - if ( bExp == 0x7FFF ) { - if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); - goto invalid; - } - return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); - } - if ( bExp == 0x7FFF ) { - if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); - return packFloatx80( zSign, 0, 0 ); - } - if ( bExp == 0 ) { - if ( bSig == 0 ) { - if ( ( aExp | aSig ) == 0 ) { - invalid: - float_raise( float_flag_invalid ); - z.low = floatx80_default_nan_low; - z.high = floatx80_default_nan_high; - return z; - } - float_raise( float_flag_divbyzero ); - return packFloatx80( zSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); - } - normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); - } - if ( aExp == 0 ) { - if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 ); - normalizeFloatx80Subnormal( aSig, &aExp, &aSig ); - } - zExp = aExp - bExp + 0x3FFE; - rem1 = 0; - if ( bSig <= aSig ) { - shift128Right( aSig, 0, 1, &aSig, &rem1 ); - ++zExp; - } - zSig0 = estimateDiv128To64( aSig, rem1, bSig ); - mul64To128( bSig, zSig0, &term0, &term1 ); - sub128( aSig, rem1, term0, term1, &rem0, &rem1 ); - while ( (sbits64) rem0 < 0 ) { - --zSig0; - add128( rem0, rem1, 0, bSig, &rem0, &rem1 ); - } - zSig1 = estimateDiv128To64( rem1, 0, bSig ); - if ( (bits64) ( zSig1<<1 ) <= 8 ) { - mul64To128( bSig, zSig1, &term1, &term2 ); - sub128( rem1, 0, term1, term2, &rem1, &rem2 ); - while ( (sbits64) rem1 < 0 ) { - --zSig1; - add128( rem1, rem2, 0, bSig, &rem1, &rem2 ); - } - zSig1 |= ( ( rem1 | rem2 ) != 0 ); - } - return - roundAndPackFloatx80( - floatx80_rounding_precision, zSign, zExp, zSig0, zSig1 ); - -} - -/*---------------------------------------------------------------------------- -| Returns the remainder of the extended double-precision floating-point value -| `a' with respect to the corresponding value `b'. The operation is performed -| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -floatx80 floatx80_rem( floatx80 a, floatx80 b ) -{ - flag aSign, zSign; - int32 aExp, bExp, expDiff; - bits64 aSig0, aSig1, bSig; - bits64 q, term0, term1, alternateASig0, alternateASig1; - floatx80 z; - - aSig0 = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - aSign = extractFloatx80Sign( a ); - bSig = extractFloatx80Frac( b ); - bExp = extractFloatx80Exp( b ); -// bSign = extractFloatx80Sign( b ); - if ( aExp == 0x7FFF ) { - if ( (bits64) ( aSig0<<1 ) - || ( ( bExp == 0x7FFF ) && (bits64) ( bSig<<1 ) ) ) { - return propagateFloatx80NaN( a, b ); - } - goto invalid; - } - if ( bExp == 0x7FFF ) { - if ( (bits64) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b ); - return a; - } - if ( bExp == 0 ) { - if ( bSig == 0 ) { - invalid: - float_raise( float_flag_invalid ); - z.low = floatx80_default_nan_low; - z.high = floatx80_default_nan_high; - return z; - } - normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); - } - if ( aExp == 0 ) { - if ( (bits64) ( aSig0<<1 ) == 0 ) return a; - normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 ); - } - bSig |= LIT64( 0x8000000000000000 ); - zSign = aSign; - expDiff = aExp - bExp; - aSig1 = 0; - if ( expDiff < 0 ) { - if ( expDiff < -1 ) return a; - shift128Right( aSig0, 0, 1, &aSig0, &aSig1 ); - expDiff = 0; - } - q = ( bSig <= aSig0 ); - if ( q ) aSig0 -= bSig; - expDiff -= 64; - while ( 0 < expDiff ) { - q = estimateDiv128To64( aSig0, aSig1, bSig ); - q = ( 2 < q ) ? q - 2 : 0; - mul64To128( bSig, q, &term0, &term1 ); - sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 ); - shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 ); - expDiff -= 62; - } - expDiff += 64; - if ( 0 < expDiff ) { - q = estimateDiv128To64( aSig0, aSig1, bSig ); - q = ( 2 < q ) ? q - 2 : 0; - q >>= 64 - expDiff; - mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 ); - sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 ); - shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 ); - while ( le128( term0, term1, aSig0, aSig1 ) ) { - ++q; - sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 ); - } - } - else { - term1 = 0; - term0 = bSig; - } - sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 ); - if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 ) - || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 ) - && ( q & 1 ) ) - ) { - aSig0 = alternateASig0; - aSig1 = alternateASig1; - zSign = ! zSign; - } - return - normalizeRoundAndPackFloatx80( - 80, zSign, bExp + expDiff, aSig0, aSig1 ); - -} - -/*---------------------------------------------------------------------------- -| Returns the square root of the extended double-precision floating-point -| value `a'. The operation is performed according to the IEC/IEEE Standard -| for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -floatx80 floatx80_sqrt( floatx80 a ) -{ - flag aSign; - int32 aExp, zExp; - bits64 aSig0, aSig1, zSig0, zSig1, doubleZSig0; - bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3; - floatx80 z; - - aSig0 = extractFloatx80Frac( a ); - aExp = extractFloatx80Exp( a ); - aSign = extractFloatx80Sign( a ); - if ( aExp == 0x7FFF ) { - if ( (bits64) ( aSig0<<1 ) ) return propagateFloatx80NaN( a, a ); - if ( ! aSign ) return a; - goto invalid; - } - if ( aSign ) { - if ( ( aExp | aSig0 ) == 0 ) return a; - invalid: - float_raise( float_flag_invalid ); - z.low = floatx80_default_nan_low; - z.high = floatx80_default_nan_high; - return z; - } - if ( aExp == 0 ) { - if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 ); - normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 ); - } - zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF; - zSig0 = estimateSqrt32( aExp, aSig0>>32 ); - shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 ); - zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 ); - doubleZSig0 = zSig0<<1; - mul64To128( zSig0, zSig0, &term0, &term1 ); - sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 ); - while ( (sbits64) rem0 < 0 ) { - --zSig0; - doubleZSig0 -= 2; - add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 ); - } - zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 ); - if ( ( zSig1 & LIT64( 0x3FFFFFFFFFFFFFFF ) ) <= 5 ) { - if ( zSig1 == 0 ) zSig1 = 1; - mul64To128( doubleZSig0, zSig1, &term1, &term2 ); - sub128( rem1, 0, term1, term2, &rem1, &rem2 ); - mul64To128( zSig1, zSig1, &term2, &term3 ); - sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 ); - while ( (sbits64) rem1 < 0 ) { - --zSig1; - shortShift128Left( 0, zSig1, 1, &term2, &term3 ); - term3 |= 1; - term2 |= doubleZSig0; - add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 ); - } - zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 ); - } - shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 ); - zSig0 |= doubleZSig0; - return - roundAndPackFloatx80( - floatx80_rounding_precision, 0, zExp, zSig0, zSig1 ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the extended double-precision floating-point value `a' is -| equal to the corresponding value `b', and 0 otherwise. The comparison is -| performed according to the IEC/IEEE Standard for Binary Floating-Point -| Arithmetic. -*----------------------------------------------------------------------------*/ - -flag floatx80_eq( floatx80 a, floatx80 b ) -{ - if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) - && (bits64) ( extractFloatx80Frac( a )<<1 ) ) - || ( ( extractFloatx80Exp( b ) == 0x7FFF ) - && (bits64) ( extractFloatx80Frac( b )<<1 ) ) - ) { - if ( floatx80_is_signaling_nan( a ) - || floatx80_is_signaling_nan( b ) ) { - float_raise( float_flag_invalid ); - } - return 0; - } - return - ( a.low == b.low ) - && ( ( a.high == b.high ) - || ( ( a.low == 0 ) - && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) ) - ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the extended double-precision floating-point value `a' is -| less than or equal to the corresponding value `b', and 0 otherwise. The -| comparison is performed according to the IEC/IEEE Standard for Binary -| Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -flag floatx80_le( floatx80 a, floatx80 b ) -{ - flag aSign, bSign; - - if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) - && (bits64) ( extractFloatx80Frac( a )<<1 ) ) - || ( ( extractFloatx80Exp( b ) == 0x7FFF ) - && (bits64) ( extractFloatx80Frac( b )<<1 ) ) - ) { - float_raise( float_flag_invalid ); - return 0; - } - aSign = extractFloatx80Sign( a ); - bSign = extractFloatx80Sign( b ); - if ( aSign != bSign ) { - return - aSign - || ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) - == 0 ); - } - return - aSign ? le128( b.high, b.low, a.high, a.low ) - : le128( a.high, a.low, b.high, b.low ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the extended double-precision floating-point value `a' is -| less than the corresponding value `b', and 0 otherwise. The comparison -| is performed according to the IEC/IEEE Standard for Binary Floating-Point -| Arithmetic. -*----------------------------------------------------------------------------*/ - -flag floatx80_lt( floatx80 a, floatx80 b ) +#ifdef SOFTFLOAT_68K // 30-01-2016: Added for Previous +floatx80 floatx80_round32( floatx80 a, float_status *status ) +{ + flag aSign; + int32_t aExp; + uint64_t aSig; + + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + + if ( aExp == 0x7FFF || aSig == 0 ) { + return a; + } + if ( aExp == 0 ) { + normalizeFloatx80Subnormal( aSig, &aExp, &aSig ); + } + + return roundSigAndPackFloatx80( 32, aSign, aExp, aSig, 0, status ); +} + +floatx80 floatx80_round64( floatx80 a, float_status *status ) +{ + flag aSign; + int32_t aExp; + uint64_t aSig; + + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + + if ( aExp == 0x7FFF || aSig == 0 ) { + return a; + } + if ( aExp == 0 ) { + normalizeFloatx80Subnormal( aSig, &aExp, &aSig ); + } + + return roundSigAndPackFloatx80( 64, aSign, aExp, aSig, 0, status ); +} + +floatx80 floatx80_round_to_float32( floatx80 a, float_status *status ) { - flag aSign, bSign; - - if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) - && (bits64) ( extractFloatx80Frac( a )<<1 ) ) - || ( ( extractFloatx80Exp( b ) == 0x7FFF ) - && (bits64) ( extractFloatx80Frac( b )<<1 ) ) - ) { - float_raise( float_flag_invalid ); - return 0; - } + flag aSign; + int32_t aExp; + uint64_t aSig; + aSign = extractFloatx80Sign( a ); - bSign = extractFloatx80Sign( b ); - if ( aSign != bSign ) { - return - aSign - && ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) - != 0 ); - } - return - aSign ? lt128( b.high, b.low, a.high, a.low ) - : lt128( a.high, a.low, b.high, b.low ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the extended double-precision floating-point value `a' is equal -| to the corresponding value `b', and 0 otherwise. The invalid exception is -| raised if either operand is a NaN. Otherwise, the comparison is performed -| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -flag floatx80_eq_signaling( floatx80 a, floatx80 b ) -{ - if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) - && (bits64) ( extractFloatx80Frac( a )<<1 ) ) - || ( ( extractFloatx80Exp( b ) == 0x7FFF ) - && (bits64) ( extractFloatx80Frac( b )<<1 ) ) - ) { - float_raise( float_flag_invalid ); - return 0; - } - return - ( a.low == b.low ) - && ( ( a.high == b.high ) - || ( ( a.low == 0 ) - && ( (bits16) ( ( a.high | b.high )<<1 ) == 0 ) ) - ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the extended double-precision floating-point value `a' is less -| than or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs -| do not cause an exception. Otherwise, the comparison is performed according -| to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -flag floatx80_le_quiet( floatx80 a, floatx80 b ) -{ - flag aSign, bSign; - - if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) - && (bits64) ( extractFloatx80Frac( a )<<1 ) ) - || ( ( extractFloatx80Exp( b ) == 0x7FFF ) - && (bits64) ( extractFloatx80Frac( b )<<1 ) ) - ) { - if ( floatx80_is_signaling_nan( a ) - || floatx80_is_signaling_nan( b ) ) { - float_raise( float_flag_invalid ); - } - return 0; - } + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + + if ( aExp == 0x7FFF ) { + if ( (uint64_t) ( aSig<<1 ) ) return propagateFloatx80NaNOneArg( a, status ); + return a; + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return a; + normalizeFloatx80Subnormal( aSig, &aExp, &aSig ); + } + + return roundAndPackFloatx80( 32, aSign, aExp, aSig, 0, status ); +} + +floatx80 floatx80_round_to_float64( floatx80 a, float_status *status ) +{ + flag aSign; + int32_t aExp; + uint64_t aSig; + aSign = extractFloatx80Sign( a ); - bSign = extractFloatx80Sign( b ); - if ( aSign != bSign ) { - return - aSign - || ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) - == 0 ); - } - return - aSign ? le128( b.high, b.low, a.high, a.low ) - : le128( a.high, a.low, b.high, b.low ); - -} + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + + if ( aExp == 0x7FFF ) { + if ( (uint64_t) ( aSig<<1 ) ) return propagateFloatx80NaNOneArg( a, status ); + return a; + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return a; + normalizeFloatx80Subnormal( aSig, &aExp, &aSig ); + } + + return roundAndPackFloatx80( 64, aSign, aExp, aSig, 0, status ); +} + + +floatx80 floatx80_normalize( floatx80 a ) +{ + flag aSign; + int16_t aExp; + uint64_t aSig; + int8_t shiftCount; + + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + + if ( aExp == 0x7FFF || aExp == 0 ) return a; + if ( aSig == 0 ) return packFloatx80(aSign, 0, 0); + + shiftCount = countLeadingZeros64( aSig ); + + if ( shiftCount > aExp ) shiftCount = aExp; + + aExp -= shiftCount; + aSig <<= shiftCount; + + return packFloatx80( aSign, aExp, aSig ); +} +#endif // end of addition for Previous /*---------------------------------------------------------------------------- -| Returns 1 if the extended double-precision floating-point value `a' is less -| than the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause -| an exception. Otherwise, the comparison is performed according to the -| IEC/IEEE Standard for Binary Floating-Point Arithmetic. +| Rounds the extended double-precision floating-point value `a' to an integer, +| and returns the result as an extended quadruple-precision floating-point +| value. The operation is performed according to the IEC/IEEE Standard for +| Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -flag floatx80_lt_quiet( floatx80 a, floatx80 b ) +floatx80 floatx80_round_to_int(floatx80 a, float_status *status) { - flag aSign, bSign; - - if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) - && (bits64) ( extractFloatx80Frac( a )<<1 ) ) - || ( ( extractFloatx80Exp( b ) == 0x7FFF ) - && (bits64) ( extractFloatx80Frac( b )<<1 ) ) - ) { - if ( floatx80_is_signaling_nan( a ) - || floatx80_is_signaling_nan( b ) ) { - float_raise( float_flag_invalid ); - } - return 0; - } - aSign = extractFloatx80Sign( a ); - bSign = extractFloatx80Sign( b ); - if ( aSign != bSign ) { - return - aSign - && ( ( ( (bits16) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) - != 0 ); - } - return - aSign ? lt128( b.high, b.low, a.high, a.low ) - : lt128( a.high, a.low, b.high, b.low ); - -} + flag aSign; + int32_t aExp; + uint64_t lastBitMask, roundBitsMask; +// int8_t roundingMode; + floatx80 z; +// roundingMode = status->float_rounding_mode; + aSign = extractFloatx80Sign(a); + aExp = extractFloatx80Exp( a ); + if ( 0x403E <= aExp ) { + if ( aExp == 0x7FFF ) { + if ((uint64_t) ( extractFloatx80Frac( a )<<1 ) ) + return propagateFloatx80NaNOneArg(a, status); + return inf_clear_intbit(status) ? packFloatx80(aSign, aExp, 0) : a; + } + return a; + } + if ( aExp < 0x3FFF ) { + if ( ( aExp == 0 ) + #ifdef SOFTFLOAT_68K + && ( (uint64_t) extractFloatx80Frac( a ) == 0 ) ) { +#else + && ( (uint64_t) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) { #endif - -#ifdef FLOAT128 - -/*---------------------------------------------------------------------------- -| Returns the result of converting the quadruple-precision floating-point -| value `a' to the 32-bit two's complement integer format. The conversion -| is performed according to the IEC/IEEE Standard for Binary Floating-Point -| Arithmetic---which means in particular that the conversion is rounded -| according to the current rounding mode. If `a' is a NaN, the largest -| positive integer is returned. Otherwise, if the conversion overflows, the -| largest integer with the same sign as `a' is returned. -*----------------------------------------------------------------------------*/ - -int32 float128_to_int32( float128 a ) -{ - flag aSign; - int32 aExp, shiftCount; - bits64 aSig0, aSig1; - - aSig1 = extractFloat128Frac1( a ); - aSig0 = extractFloat128Frac0( a ); - aExp = extractFloat128Exp( a ); - aSign = extractFloat128Sign( a ); - if ( ( aExp == 0x7FFF ) && ( aSig0 | aSig1 ) ) aSign = 0; - if ( aExp ) aSig0 |= LIT64( 0x0001000000000000 ); - aSig0 |= ( aSig1 != 0 ); - shiftCount = 0x4028 - aExp; - if ( 0 < shiftCount ) shift64RightJamming( aSig0, shiftCount, &aSig0 ); - return roundAndPackInt32( aSign, aSig0 ); - -} - -/*---------------------------------------------------------------------------- -| Returns the result of converting the quadruple-precision floating-point -| value `a' to the 32-bit two's complement integer format. The conversion -| is performed according to the IEC/IEEE Standard for Binary Floating-Point -| Arithmetic, except that the conversion is always rounded toward zero. If -| `a' is a NaN, the largest positive integer is returned. Otherwise, if the -| conversion overflows, the largest integer with the same sign as `a' is -| returned. -*----------------------------------------------------------------------------*/ - -int32 float128_to_int32_round_to_zero( float128 a ) -{ - flag aSign; - int32 aExp, shiftCount; - bits64 aSig0, aSig1, savedASig; - int32 z; - - aSig1 = extractFloat128Frac1( a ); - aSig0 = extractFloat128Frac0( a ); - aExp = extractFloat128Exp( a ); - aSign = extractFloat128Sign( a ); - aSig0 |= ( aSig1 != 0 ); - if ( 0x401E < aExp ) { - if ( ( aExp == 0x7FFF ) && aSig0 ) aSign = 0; - goto invalid; - } - else if ( aExp < 0x3FFF ) { - if ( aExp || aSig0 ) float_exception_flags |= float_flag_inexact; - return 0; - } - aSig0 |= LIT64( 0x0001000000000000 ); - shiftCount = 0x402F - aExp; - savedASig = aSig0; - aSig0 >>= shiftCount; - z = aSig0; - if ( aSign ) z = - z; - if ( ( z < 0 ) ^ aSign ) { - invalid: - float_raise( float_flag_invalid ); - return aSign ? (sbits32) 0x80000000 : 0x7FFFFFFF; - } - if ( ( aSig0<>( ( - shiftCount ) & 63 ) ); - if ( (bits64) ( aSig1<>( - shiftCount ); - if ( aSig1 - || ( shiftCount && (bits64) ( aSig0<<( shiftCount & 63 ) ) ) ) { - float_exception_flags |= float_flag_inexact; - } - } - if ( aSign ) z = - z; - return z; - -} - -/*---------------------------------------------------------------------------- -| Returns the result of converting the quadruple-precision floating-point -| value `a' to the single-precision floating-point format. The conversion -| is performed according to the IEC/IEEE Standard for Binary Floating-Point -| Arithmetic. -*----------------------------------------------------------------------------*/ - -float32 float128_to_float32( float128 a ) -{ - flag aSign; - int32 aExp; - bits64 aSig0, aSig1; - bits32 zSig; - - aSig1 = extractFloat128Frac1( a ); - aSig0 = extractFloat128Frac0( a ); - aExp = extractFloat128Exp( a ); - aSign = extractFloat128Sign( a ); - if ( aExp == 0x7FFF ) { - if ( aSig0 | aSig1 ) { - return commonNaNToFloat32( float128ToCommonNaN( a ) ); - } - return packFloat32( aSign, 0xFF, 0 ); - } - aSig0 |= ( aSig1 != 0 ); - shift64RightJamming( aSig0, 18, &aSig0 ); - zSig = aSig0; - if ( aExp || zSig ) { - zSig |= 0x40000000; - aExp -= 0x3F81; - } - return roundAndPackFloat32( aSign, aExp, zSig ); - -} - -/*---------------------------------------------------------------------------- -| Returns the result of converting the quadruple-precision floating-point -| value `a' to the double-precision floating-point format. The conversion -| is performed according to the IEC/IEEE Standard for Binary Floating-Point -| Arithmetic. -*----------------------------------------------------------------------------*/ - -float64 float128_to_float64( float128 a ) -{ - flag aSign; - int32 aExp; - bits64 aSig0, aSig1; - - aSig1 = extractFloat128Frac1( a ); - aSig0 = extractFloat128Frac0( a ); - aExp = extractFloat128Exp( a ); - aSign = extractFloat128Sign( a ); - if ( aExp == 0x7FFF ) { - if ( aSig0 | aSig1 ) { - return commonNaNToFloat64( float128ToCommonNaN( a ) ); - } - return packFloat64( aSign, 0x7FF, 0 ); - } - shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 ); - aSig0 |= ( aSig1 != 0 ); - if ( aExp || aSig0 ) { - aSig0 |= LIT64( 0x4000000000000000 ); - aExp -= 0x3C01; - } - return roundAndPackFloat64( aSign, aExp, aSig0 ); - -} - -#ifdef FLOATX80 - -/*---------------------------------------------------------------------------- -| Returns the result of converting the quadruple-precision floating-point -| value `a' to the extended double-precision floating-point format. The -| conversion is performed according to the IEC/IEEE Standard for Binary -| Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -floatx80 float128_to_floatx80( float128 a ) -{ - flag aSign; - int32 aExp; - bits64 aSig0, aSig1; - - aSig1 = extractFloat128Frac1( a ); - aSig0 = extractFloat128Frac0( a ); - aExp = extractFloat128Exp( a ); - aSign = extractFloat128Sign( a ); - if ( aExp == 0x7FFF ) { - if ( aSig0 | aSig1 ) { - return commonNaNToFloatx80( float128ToCommonNaN( a ) ); - } - return packFloatx80( aSign, 0x7FFF, LIT64( 0x8000000000000000 ) ); - } - if ( aExp == 0 ) { - if ( ( aSig0 | aSig1 ) == 0 ) return packFloatx80( aSign, 0, 0 ); - normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); - } - else { - aSig0 |= LIT64( 0x0001000000000000 ); - } - shortShift128Left( aSig0, aSig1, 15, &aSig0, &aSig1 ); - return roundAndPackFloatx80( 80, aSign, aExp, aSig0, aSig1 ); - -} - + return a; + } + status->float_exception_flags |= float_flag_inexact; + switch (status->float_rounding_mode) { + case float_round_nearest_even: + if ( ( aExp == 0x3FFE ) && (uint64_t) ( extractFloatx80Frac( a )<<1 ) + ) { + return + packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) ); + } + break; + case float_round_ties_away: + if (aExp == 0x3FFE) { + return packFloatx80(aSign, 0x3FFF, LIT64(0x8000000000000000)); + } + break; + case float_round_down: + return + aSign ? + packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) ) + : packFloatx80( 0, 0, 0 ); + case float_round_up: + return + aSign ? packFloatx80( 1, 0, 0 ) + : packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) ); + } + return packFloatx80( aSign, 0, 0 ); + } + lastBitMask = 1; + lastBitMask <<= 0x403E - aExp; + roundBitsMask = lastBitMask - 1; + z = a; + switch (status->float_rounding_mode) { + case float_round_nearest_even: + z.low += lastBitMask>>1; + if ((z.low & roundBitsMask) == 0) { + z.low &= ~lastBitMask; + } + break; + case float_round_ties_away: + z.low += lastBitMask >> 1; + break; + case float_round_to_zero: + break; + case float_round_up: + if (!extractFloatx80Sign(z)) { + z.low += roundBitsMask; + } + break; + case float_round_down: + if (extractFloatx80Sign(z)) { + z.low += roundBitsMask; + } + break; + default: + abort(); + } + z.low &= ~ roundBitsMask; + if ( z.low == 0 ) { + ++z.high; + z.low = LIT64( 0x8000000000000000 ); + } + if (z.low != a.low) { + status->float_exception_flags |= float_flag_inexact; + } + return z; + +} + +#ifdef SOFTFLOAT_68K // 09-01-2017: Added for Previous +floatx80 floatx80_round_to_int_toward_zero( floatx80 a, float_status *status) +{ + flag aSign; + int32_t aExp; + uint64_t lastBitMask, roundBitsMask; + floatx80 z; + + aSign = extractFloatx80Sign(a); + aExp = extractFloatx80Exp( a ); + if ( 0x403E <= aExp ) { + if ( aExp == 0x7FFF ) { + if ( (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) + return propagateFloatx80NaNOneArg( a, status ); + return inf_clear_intbit(status) ? packFloatx80(aSign, aExp, 0) : a; + } + return a; + } + if ( aExp < 0x3FFF ) { + if ( ( aExp == 0 ) +#ifdef SOFTFLOAT_68K + && ( (uint64_t) extractFloatx80Frac( a ) == 0 ) ) { +#else + && ( (uint64_t) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) { #endif + return a; + } + status->float_exception_flags |= float_flag_inexact; + return packFloatx80( aSign, 0, 0 ); + } + lastBitMask = 1; + lastBitMask <<= 0x403E - aExp; + roundBitsMask = lastBitMask - 1; + z = a; + z.low &= ~ roundBitsMask; + if ( z.low == 0 ) { + ++z.high; + z.low = LIT64( 0x8000000000000000 ); + } + if ( z.low != a.low ) status->float_exception_flags |= float_flag_inexact; + return z; + +} +#endif // End of addition for Previous /*---------------------------------------------------------------------------- -| Rounds the quadruple-precision floating-point value `a' to an integer, and -| returns the result as a quadruple-precision floating-point value. The -| operation is performed according to the IEC/IEEE Standard for Binary -| Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -float128 float128_round_to_int( float128 a ) -{ - flag aSign; - int32 aExp; - bits64 lastBitMask, roundBitsMask; - int8 roundingMode; - float128 z; - - aExp = extractFloat128Exp( a ); - if ( 0x402F <= aExp ) { - if ( 0x406F <= aExp ) { - if ( ( aExp == 0x7FFF ) - && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) - ) { - return propagateFloat128NaN( a, a ); - } - return a; - } - lastBitMask = 1; - lastBitMask = ( lastBitMask<<( 0x406E - aExp ) )<<1; - roundBitsMask = lastBitMask - 1; - z = a; - roundingMode = float_rounding_mode; - if ( roundingMode == float_round_nearest_even ) { - if ( lastBitMask ) { - add128( z.high, z.low, 0, lastBitMask>>1, &z.high, &z.low ); - if ( ( z.low & roundBitsMask ) == 0 ) z.low &= ~ lastBitMask; - } - else { - if ( (sbits64) z.low < 0 ) { - ++z.high; - if ( (bits64) ( z.low<<1 ) == 0 ) z.high &= ~1; - } - } - } - else if ( roundingMode != float_round_to_zero ) { - if ( extractFloat128Sign( z ) - ^ ( roundingMode == float_round_up ) ) { - add128( z.high, z.low, 0, roundBitsMask, &z.high, &z.low ); - } - } - z.low &= ~ roundBitsMask; - } - else { - if ( aExp < 0x3FFF ) { - if ( ( ( (bits64) ( a.high<<1 ) ) | a.low ) == 0 ) return a; - float_exception_flags |= float_flag_inexact; - aSign = extractFloat128Sign( a ); - switch ( float_rounding_mode ) { - case float_round_nearest_even: - if ( ( aExp == 0x3FFE ) - && ( extractFloat128Frac0( a ) - | extractFloat128Frac1( a ) ) - ) { - return packFloat128( aSign, 0x3FFF, 0, 0 ); - } - break; - case float_round_down: - return - aSign ? packFloat128( 1, 0x3FFF, 0, 0 ) - : packFloat128( 0, 0, 0, 0 ); - case float_round_up: - return - aSign ? packFloat128( 1, 0, 0, 0 ) - : packFloat128( 0, 0x3FFF, 0, 0 ); - } - return packFloat128( aSign, 0, 0, 0 ); - } - lastBitMask = 1; - lastBitMask <<= 0x402F - aExp; - roundBitsMask = lastBitMask - 1; - z.low = 0; - z.high = a.high; - roundingMode = float_rounding_mode; - if ( roundingMode == float_round_nearest_even ) { - z.high += lastBitMask>>1; - if ( ( ( z.high & roundBitsMask ) | a.low ) == 0 ) { - z.high &= ~ lastBitMask; - } - } - else if ( roundingMode != float_round_to_zero ) { - if ( extractFloat128Sign( z ) - ^ ( roundingMode == float_round_up ) ) { - z.high |= ( a.low != 0 ); - z.high += roundBitsMask; - } - } - z.high &= ~ roundBitsMask; - } - if ( ( z.low != a.low ) || ( z.high != a.high ) ) { - float_exception_flags |= float_flag_inexact; - } - return z; - -} - -/*---------------------------------------------------------------------------- -| Returns the result of adding the absolute values of the quadruple-precision -| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated -| before being returned. `zSign' is ignored if the result is a NaN. +| Returns the result of adding the absolute values of the extended double- +| precision floating-point values `a' and `b'. If `zSign' is 1, the sum is +| negated before being returned. `zSign' is ignored if the result is a NaN. | The addition is performed according to the IEC/IEEE Standard for Binary | Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -static float128 addFloat128Sigs( float128 a, float128 b, flag zSign ) +static floatx80 addFloatx80Sigs(floatx80 a, floatx80 b, flag zSign, + float_status *status) { - int32 aExp, bExp, zExp; - bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2; - int32 expDiff; - - aSig1 = extractFloat128Frac1( a ); - aSig0 = extractFloat128Frac0( a ); - aExp = extractFloat128Exp( a ); - bSig1 = extractFloat128Frac1( b ); - bSig0 = extractFloat128Frac0( b ); - bExp = extractFloat128Exp( b ); - expDiff = aExp - bExp; - if ( 0 < expDiff ) { - if ( aExp == 0x7FFF ) { - if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b ); - return a; - } - if ( bExp == 0 ) { - --expDiff; - } - else { - bSig0 |= LIT64( 0x0001000000000000 ); - } - shift128ExtraRightJamming( - bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2 ); - zExp = aExp; + int32_t aExp, bExp, zExp; + uint64_t aSig, bSig, zSig0, zSig1; + int32_t expDiff; + + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + bSig = extractFloatx80Frac( b ); + bExp = extractFloatx80Exp( b ); +#ifdef SOFTFLOAT_68K + if ( aExp == 0 ) { + normalizeFloatx80Subnormal( aSig, &aExp, &aSig ); } - else if ( expDiff < 0 ) { - if ( bExp == 0x7FFF ) { - if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b ); - return packFloat128( zSign, 0x7FFF, 0, 0 ); - } - if ( aExp == 0 ) { - ++expDiff; - } - else { - aSig0 |= LIT64( 0x0001000000000000 ); - } - shift128ExtraRightJamming( - aSig0, aSig1, 0, - expDiff, &aSig0, &aSig1, &zSig2 ); - zExp = bExp; + if ( bExp == 0 ) { + normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); } - else { - if ( aExp == 0x7FFF ) { - if ( aSig0 | aSig1 | bSig0 | bSig1 ) { - return propagateFloat128NaN( a, b ); - } - return a; +#endif + expDiff = aExp - bExp; + if ( 0 < expDiff ) { + if ( aExp == 0x7FFF ) { + if ((uint64_t)(aSig << 1)) + return propagateFloatx80NaN(a, b, status); + return inf_clear_intbit(status) ? packFloatx80(extractFloatx80Sign(a), aExp, 0) : a; + } +#ifndef SOFTFLOAT_68K + if ( bExp == 0 ) --expDiff; +#endif + shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 ); + zExp = aExp; + } + else if ( expDiff < 0 ) { + if ( bExp == 0x7FFF ) { + if ((uint64_t)(bSig << 1)) + return propagateFloatx80NaN(a, b, status); + if (inf_clear_intbit(status)) bSig = 0; + return packFloatx80( zSign, bExp, bSig ); + } +#ifndef SOFTFLOAT_68K + if ( aExp == 0 ) ++expDiff; +#endif + shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 ); + zExp = bExp; + } + else { + if ( aExp == 0x7FFF ) { + if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) { + return propagateFloatx80NaN(a, b, status); + } + if (inf_clear_intbit(status)) return packFloatx80(extractFloatx80Sign(a), aExp, 0); + return faddsub_swap_inf(status) ? b : a; + } + zSig1 = 0; + zSig0 = aSig + bSig; + #ifndef SOFTFLOAT_68K + if ( aExp == 0 ) { + normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 ); + goto roundAndPack; } - add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 ); - if ( aExp == 0 ) return packFloat128( zSign, 0, zSig0, zSig1 ); - zSig2 = 0; - zSig0 |= LIT64( 0x0002000000000000 ); - zExp = aExp; +#endif + zExp = aExp; +#ifdef SOFTFLOAT_68K + if ( aSig == 0 && bSig == 0 ) return packFloatx80( zSign, 0, 0 ); + if ( aSig == 0 || bSig == 0 ) goto roundAndPack; +#endif goto shiftRight1; - } - aSig0 |= LIT64( 0x0001000000000000 ); - add128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 ); - --zExp; - if ( zSig0 < LIT64( 0x0002000000000000 ) ) goto roundAndPack; - ++zExp; - shiftRight1: - shift128ExtraRightJamming( - zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 ); - roundAndPack: - return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 ); - + } + zSig0 = aSig + bSig; + if ( (int64_t) zSig0 < 0 ) goto roundAndPack; + shiftRight1: + shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 ); + zSig0 |= LIT64( 0x8000000000000000 ); + ++zExp; + roundAndPack: + return roundAndPackFloatx80(status->floatx80_rounding_precision, + zSign, zExp, zSig0, zSig1, status); } /*---------------------------------------------------------------------------- -| Returns the result of subtracting the absolute values of the quadruple- -| precision floating-point values `a' and `b'. If `zSign' is 1, the +| Returns the result of subtracting the absolute values of the extended +| double-precision floating-point values `a' and `b'. If `zSign' is 1, the | difference is negated before being returned. `zSign' is ignored if the | result is a NaN. The subtraction is performed according to the IEC/IEEE | Standard for Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -static float128 subFloat128Sigs( float128 a, float128 b, flag zSign ) -{ - int32 aExp, bExp, zExp; - bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1; - int32 expDiff; - float128 z; - - aSig1 = extractFloat128Frac1( a ); - aSig0 = extractFloat128Frac0( a ); - aExp = extractFloat128Exp( a ); - bSig1 = extractFloat128Frac1( b ); - bSig0 = extractFloat128Frac0( b ); - bExp = extractFloat128Exp( b ); - expDiff = aExp - bExp; - shortShift128Left( aSig0, aSig1, 14, &aSig0, &aSig1 ); - shortShift128Left( bSig0, bSig1, 14, &bSig0, &bSig1 ); - if ( 0 < expDiff ) goto aExpBigger; - if ( expDiff < 0 ) goto bExpBigger; - if ( aExp == 0x7FFF ) { - if ( aSig0 | aSig1 | bSig0 | bSig1 ) { - return propagateFloat128NaN( a, b ); - } - float_raise( float_flag_invalid ); - z.low = float128_default_nan_low; - z.high = float128_default_nan_high; - return z; - } +static floatx80 subFloatx80Sigs(floatx80 a, floatx80 b, flag zSign, + float_status *status) +{ + int32_t aExp, bExp, zExp; + uint64_t aSig, bSig, zSig0, zSig1; + int32_t expDiff; + + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + bSig = extractFloatx80Frac( b ); + bExp = extractFloatx80Exp( b ); + expDiff = aExp - bExp; + if ( 0 < expDiff ) goto aExpBigger; + if ( expDiff < 0 ) goto bExpBigger; + if ( aExp == 0x7FFF ) { + if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) { + return propagateFloatx80NaN(a, b, status); + } + float_raise(float_flag_invalid, status); + return floatx80_default_nan(status); + } + #ifndef SOFTFLOAT_68K if ( aExp == 0 ) { aExp = 1; bExp = 1; } - if ( bSig0 < aSig0 ) goto aBigger; - if ( aSig0 < bSig0 ) goto bBigger; - if ( bSig1 < aSig1 ) goto aBigger; - if ( aSig1 < bSig1 ) goto bBigger; - return packFloat128( float_rounding_mode == float_round_down, 0, 0, 0 ); - bExpBigger: - if ( bExp == 0x7FFF ) { - if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b ); - return packFloat128( zSign ^ 1, 0x7FFF, 0, 0 ); - } - if ( aExp == 0 ) { - ++expDiff; - } - else { - aSig0 |= LIT64( 0x4000000000000000 ); - } - shift128RightJamming( aSig0, aSig1, - expDiff, &aSig0, &aSig1 ); - bSig0 |= LIT64( 0x4000000000000000 ); - bBigger: - sub128( bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1 ); - zExp = bExp; - zSign ^= 1; - goto normalizeRoundAndPack; - aExpBigger: - if ( aExp == 0x7FFF ) { - if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b ); - return a; - } - if ( bExp == 0 ) { - --expDiff; - } - else { - bSig0 |= LIT64( 0x4000000000000000 ); - } - shift128RightJamming( bSig0, bSig1, expDiff, &bSig0, &bSig1 ); - aSig0 |= LIT64( 0x4000000000000000 ); - aBigger: - sub128( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1 ); - zExp = aExp; - normalizeRoundAndPack: - --zExp; - return normalizeRoundAndPackFloat128( zSign, zExp - 14, zSig0, zSig1 ); - +#endif + zSig1 = 0; + if ( bSig < aSig ) goto aBigger; + if ( aSig < bSig ) goto bBigger; + return packFloatx80(status->float_rounding_mode == float_round_down, 0, 0); + bExpBigger: + if ( bExp == 0x7FFF ) { + if ((uint64_t)(bSig << 1)) return propagateFloatx80NaN(a, b, status); + if (inf_clear_intbit(status)) bSig = 0; + return packFloatx80(zSign ^ 1, bExp, bSig); + } +#ifndef SOFTFLOAT_68K + if ( aExp == 0 ) ++expDiff; +#endif + shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 ); + bBigger: + sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 ); + zExp = bExp; + zSign ^= 1; + goto normalizeRoundAndPack; + aExpBigger: + if ( aExp == 0x7FFF ) { + if ((uint64_t)(aSig << 1)) return propagateFloatx80NaN(a, b, status); + return inf_clear_intbit(status) ? packFloatx80(extractFloatx80Sign(a), aExp, 0) : a; + } +#ifndef SOFTFLOAT_68K + if ( bExp == 0 ) --expDiff; +#endif + shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 ); + aBigger: + sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 ); + zExp = aExp; + normalizeRoundAndPack: + return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision, + zSign, zExp, zSig0, zSig1, status); } /*---------------------------------------------------------------------------- -| Returns the result of adding the quadruple-precision floating-point values -| `a' and `b'. The operation is performed according to the IEC/IEEE Standard -| for Binary Floating-Point Arithmetic. +| Returns the result of adding the extended double-precision floating-point +| values `a' and `b'. The operation is performed according to the IEC/IEEE +| Standard for Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -float128 float128_add( float128 a, float128 b ) +floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status) { - flag aSign, bSign; + flag aSign, bSign; - aSign = extractFloat128Sign( a ); - bSign = extractFloat128Sign( b ); - if ( aSign == bSign ) { - return addFloat128Sigs( a, b, aSign ); - } - else { - return subFloat128Sigs( a, b, aSign ); - } + if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) { + float_raise(float_flag_invalid, status); + return floatx80_default_nan(status); + } + aSign = extractFloatx80Sign( a ); + bSign = extractFloatx80Sign( b ); + if ( aSign == bSign ) { + return addFloatx80Sigs(a, b, aSign, status); + } + else { + return subFloatx80Sigs(a, b, aSign, status); + } } /*---------------------------------------------------------------------------- -| Returns the result of subtracting the quadruple-precision floating-point -| values `a' and `b'. The operation is performed according to the IEC/IEEE -| Standard for Binary Floating-Point Arithmetic. +| Returns the result of subtracting the extended double-precision floating- +| point values `a' and `b'. The operation is performed according to the +| IEC/IEEE Standard for Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -float128 float128_sub( float128 a, float128 b ) +floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status) { - flag aSign, bSign; + flag aSign, bSign; - aSign = extractFloat128Sign( a ); - bSign = extractFloat128Sign( b ); - if ( aSign == bSign ) { - return subFloat128Sigs( a, b, aSign ); - } - else { - return addFloat128Sigs( a, b, aSign ); - } + if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) { + float_raise(float_flag_invalid, status); + return floatx80_default_nan(status); + } + aSign = extractFloatx80Sign( a ); + bSign = extractFloatx80Sign( b ); + if ( aSign == bSign ) { + return subFloatx80Sigs(a, b, aSign, status); + } + else { + return addFloatx80Sigs(a, b, aSign, status); + } } /*---------------------------------------------------------------------------- -| Returns the result of multiplying the quadruple-precision floating-point -| values `a' and `b'. The operation is performed according to the IEC/IEEE -| Standard for Binary Floating-Point Arithmetic. +| Returns the result of multiplying the extended double-precision floating- +| point values `a' and `b'. The operation is performed according to the +| IEC/IEEE Standard for Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -float128 float128_mul( float128 a, float128 b ) +floatx80 floatx80_mul(floatx80 a, floatx80 b, float_status *status) +{ + flag aSign, bSign, zSign; + int32_t aExp, bExp, zExp; + uint64_t aSig, bSig, zSig0, zSig1; + + if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) { + float_raise(float_flag_invalid, status); + return floatx80_default_nan(status); + } + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + bSig = extractFloatx80Frac( b ); + bExp = extractFloatx80Exp( b ); + bSign = extractFloatx80Sign( b ); + zSign = aSign ^ bSign; + if ( aExp == 0x7FFF ) { + if ( (uint64_t) ( aSig<<1 ) + || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) { + return propagateFloatx80NaN(a, b, status); + } + if ( ( bExp | bSig ) == 0 ) goto invalid; + if (inf_clear_intbit(status)) aSig = 0; + return packFloatx80(zSign, aExp, aSig); + } + if ( bExp == 0x7FFF ) { + if ((uint64_t)(bSig << 1)) { + return propagateFloatx80NaN(a, b, status); + } + if ( ( aExp | aSig ) == 0 ) { + invalid: + float_raise(float_flag_invalid, status); + return floatx80_default_nan(status); + } + if (inf_clear_intbit(status)) bSig = 0; + return packFloatx80(zSign, bExp, bSig); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 ); + normalizeFloatx80Subnormal( aSig, &aExp, &aSig ); + } + if ( bExp == 0 ) { + if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 ); + normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); + } + zExp = aExp + bExp - 0x3FFE; + mul64To128( aSig, bSig, &zSig0, &zSig1 ); + if ( 0 < (int64_t) zSig0 ) { + shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 ); + --zExp; + } + return roundAndPackFloatx80(status->floatx80_rounding_precision, + zSign, zExp, zSig0, zSig1, status); +} + +#ifdef SOFTFLOAT_68K // 21-01-2017: Added for Previous +floatx80 floatx80_sglmul( floatx80 a, floatx80 b, float_status *status ) { flag aSign, bSign, zSign; - int32 aExp, bExp, zExp; - bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3; - float128 z; - - aSig1 = extractFloat128Frac1( a ); - aSig0 = extractFloat128Frac0( a ); - aExp = extractFloat128Exp( a ); - aSign = extractFloat128Sign( a ); - bSig1 = extractFloat128Frac1( b ); - bSig0 = extractFloat128Frac0( b ); - bExp = extractFloat128Exp( b ); - bSign = extractFloat128Sign( b ); + int32_t aExp, bExp, zExp; + uint64_t aSig, bSig, zSig0, zSig1; + + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + bSig = extractFloatx80Frac( b ); + bExp = extractFloatx80Exp( b ); + bSign = extractFloatx80Sign( b ); zSign = aSign ^ bSign; if ( aExp == 0x7FFF ) { - if ( ( aSig0 | aSig1 ) - || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) { - return propagateFloat128NaN( a, b ); + if ( (uint64_t) ( aSig<<1 ) + || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) { + return propagateFloatx80NaN( a, b, status ); } - if ( ( bExp | bSig0 | bSig1 ) == 0 ) goto invalid; - return packFloat128( zSign, 0x7FFF, 0, 0 ); + if ( ( bExp | bSig ) == 0 ) goto invalid; + if (inf_clear_intbit(status)) aSig = 0; + return packFloatx80(zSign, aExp, aSig); } if ( bExp == 0x7FFF ) { - if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b ); - if ( ( aExp | aSig0 | aSig1 ) == 0 ) { - invalid: - float_raise( float_flag_invalid ); - z.low = float128_default_nan_low; - z.high = float128_default_nan_high; - return z; + if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b, status ); + if ( ( aExp | aSig ) == 0 ) { + invalid: + float_raise( float_flag_invalid, status ); + return floatx80_default_nan(status); } - return packFloat128( zSign, 0x7FFF, 0, 0 ); + if (inf_clear_intbit(status)) bSig = 0; + return packFloatx80(zSign, bExp, bSig); } if ( aExp == 0 ) { - if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 ); - normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); + if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 ); + normalizeFloatx80Subnormal( aSig, &aExp, &aSig ); } if ( bExp == 0 ) { - if ( ( bSig0 | bSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 ); - normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 ); + if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 ); + normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); } - zExp = aExp + bExp - 0x4000; - aSig0 |= LIT64( 0x0001000000000000 ); - shortShift128Left( bSig0, bSig1, 16, &bSig0, &bSig1 ); - mul128To256( aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3 ); - add128( zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1 ); - zSig2 |= ( zSig3 != 0 ); - if ( LIT64( 0x0002000000000000 ) <= zSig0 ) { - shift128ExtraRightJamming( - zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2 ); - ++zExp; + aSig &= LIT64( 0xFFFFFF0000000000 ); + bSig &= LIT64( 0xFFFFFF0000000000 ); + zExp = aExp + bExp - 0x3FFE; + mul64To128( aSig, bSig, &zSig0, &zSig1 ); + if ( 0 < (int64_t) zSig0 ) { + shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 ); + --zExp; } - return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 ); - + return roundSigAndPackFloatx80( 32, zSign, zExp, zSig0, zSig1, status); + } +#endif // End of addition for Previous + /*---------------------------------------------------------------------------- -| Returns the result of dividing the quadruple-precision floating-point value -| `a' by the corresponding value `b'. The operation is performed according to -| the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +| Returns the result of dividing the extended double-precision floating-point +| value `a' by the corresponding value `b'. The operation is performed +| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -float128 float128_div( float128 a, float128 b ) +floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status) +{ + flag aSign, bSign, zSign; + int32_t aExp, bExp, zExp; + uint64_t aSig, bSig, zSig0, zSig1; + uint64_t rem0, rem1, rem2, term0, term1, term2; + + if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) { + float_raise(float_flag_invalid, status); + return floatx80_default_nan(status); + } + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + bSig = extractFloatx80Frac( b ); + bExp = extractFloatx80Exp( b ); + bSign = extractFloatx80Sign( b ); + zSign = aSign ^ bSign; + if ( aExp == 0x7FFF ) { + if ((uint64_t)(aSig << 1)) { + return propagateFloatx80NaN(a, b, status); + } + if ( bExp == 0x7FFF ) { + if ((uint64_t)(bSig << 1)) { + return propagateFloatx80NaN(a, b, status); + } + goto invalid; + } + if (inf_clear_intbit(status)) aSig = 0; + return packFloatx80(zSign, aExp, aSig); + } + if ( bExp == 0x7FFF ) { + if ((uint64_t)(bSig << 1)) { + return propagateFloatx80NaN(a, b, status); + } + return packFloatx80( zSign, 0, 0 ); + } + if ( bExp == 0 ) { + if ( bSig == 0 ) { + if ( ( aExp | aSig ) == 0 ) { + invalid: + float_raise(float_flag_invalid, status); + return floatx80_default_nan(status); + } + float_raise(float_flag_divbyzero, status); + return packFloatx80( zSign, 0x7FFF, floatx80_default_infinity_low ); + } + normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 ); + normalizeFloatx80Subnormal( aSig, &aExp, &aSig ); + } + zExp = aExp - bExp + 0x3FFE; + rem1 = 0; + if ( bSig <= aSig ) { + shift128Right( aSig, 0, 1, &aSig, &rem1 ); + ++zExp; + } + zSig0 = estimateDiv128To64( aSig, rem1, bSig ); + mul64To128( bSig, zSig0, &term0, &term1 ); + sub128( aSig, rem1, term0, term1, &rem0, &rem1 ); + while ( (int64_t) rem0 < 0 ) { + --zSig0; + add128( rem0, rem1, 0, bSig, &rem0, &rem1 ); + } + zSig1 = estimateDiv128To64( rem1, 0, bSig ); + if ( (uint64_t) ( zSig1<<1 ) <= 8 ) { + mul64To128( bSig, zSig1, &term1, &term2 ); + sub128( rem1, 0, term1, term2, &rem1, &rem2 ); + while ( (int64_t) rem1 < 0 ) { + --zSig1; + add128( rem1, rem2, 0, bSig, &rem1, &rem2 ); + } + zSig1 |= ( ( rem1 | rem2 ) != 0 ); + } + return roundAndPackFloatx80(status->floatx80_rounding_precision, + zSign, zExp, zSig0, zSig1, status); +} + +#ifdef SOFTFLOAT_68K // 21-01-2017: Addition for Previous +floatx80 floatx80_sgldiv( floatx80 a, floatx80 b, float_status *status ) { flag aSign, bSign, zSign; - int32 aExp, bExp, zExp; - bits64 aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2; - bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3; - float128 z; - - aSig1 = extractFloat128Frac1( a ); - aSig0 = extractFloat128Frac0( a ); - aExp = extractFloat128Exp( a ); - aSign = extractFloat128Sign( a ); - bSig1 = extractFloat128Frac1( b ); - bSig0 = extractFloat128Frac0( b ); - bExp = extractFloat128Exp( b ); - bSign = extractFloat128Sign( b ); + int32_t aExp, bExp, zExp; + uint64_t aSig, bSig, zSig0, zSig1; + uint64_t rem0, rem1, rem2, term0, term1, term2; + + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + bSig = extractFloatx80Frac( b ); + bExp = extractFloatx80Exp( b ); + bSign = extractFloatx80Sign( b ); zSign = aSign ^ bSign; if ( aExp == 0x7FFF ) { - if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, b ); + if ( (uint64_t) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b, status ); if ( bExp == 0x7FFF ) { - if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b ); + if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b, status ); goto invalid; } - return packFloat128( zSign, 0x7FFF, 0, 0 ); + if (inf_clear_intbit(status)) aSig = 0; + return packFloatx80(zSign, aExp, aSig); } if ( bExp == 0x7FFF ) { - if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b ); - return packFloat128( zSign, 0, 0, 0 ); + if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b, status ); + return packFloatx80( zSign, 0, 0 ); } if ( bExp == 0 ) { - if ( ( bSig0 | bSig1 ) == 0 ) { - if ( ( aExp | aSig0 | aSig1 ) == 0 ) { - invalid: - float_raise( float_flag_invalid ); - z.low = float128_default_nan_low; - z.high = float128_default_nan_high; - return z; + if ( bSig == 0 ) { + if ( ( aExp | aSig ) == 0 ) { + invalid: + float_raise( float_flag_invalid, status ); + return floatx80_default_nan(status); } - float_raise( float_flag_divbyzero ); - return packFloat128( zSign, 0x7FFF, 0, 0 ); + float_raise( float_flag_divbyzero, status ); + return packFloatx80( zSign, 0x7FFF, floatx80_default_infinity_low ); } - normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 ); + normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); } if ( aExp == 0 ) { - if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( zSign, 0, 0, 0 ); - normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); + if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 ); + normalizeFloatx80Subnormal( aSig, &aExp, &aSig ); } - zExp = aExp - bExp + 0x3FFD; - shortShift128Left( - aSig0 | LIT64( 0x0001000000000000 ), aSig1, 15, &aSig0, &aSig1 ); - shortShift128Left( - bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 ); - if ( le128( bSig0, bSig1, aSig0, aSig1 ) ) { - shift128Right( aSig0, aSig1, 1, &aSig0, &aSig1 ); + + zExp = aExp - bExp + 0x3FFE; + rem1 = 0; + if ( bSig <= aSig ) { + shift128Right( aSig, 0, 1, &aSig, &rem1 ); ++zExp; } - zSig0 = estimateDiv128To64( aSig0, aSig1, bSig0 ); - mul128By64To192( bSig0, bSig1, zSig0, &term0, &term1, &term2 ); - sub192( aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2 ); - while ( (sbits64) rem0 < 0 ) { + zSig0 = estimateDiv128To64( aSig, rem1, bSig ); + mul64To128( bSig, zSig0, &term0, &term1 ); + sub128( aSig, rem1, term0, term1, &rem0, &rem1 ); + while ( (int64_t) rem0 < 0 ) { --zSig0; - add192( rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2 ); + add128( rem0, rem1, 0, bSig, &rem0, &rem1 ); } - zSig1 = estimateDiv128To64( rem1, rem2, bSig0 ); - if ( ( zSig1 & 0x3FFF ) <= 4 ) { - mul128By64To192( bSig0, bSig1, zSig1, &term1, &term2, &term3 ); - sub192( rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3 ); - while ( (sbits64) rem1 < 0 ) { + zSig1 = estimateDiv128To64( rem1, 0, bSig ); + if ( (uint64_t) ( zSig1<<1 ) <= 8 ) { + mul64To128( bSig, zSig1, &term1, &term2 ); + sub128( rem1, 0, term1, term2, &rem1, &rem2 ); + while ( (int64_t) rem1 < 0 ) { --zSig1; - add192( rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3 ); + add128( rem1, rem2, 0, bSig, &rem1, &rem2 ); } - zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 ); + zSig1 |= ( ( rem1 | rem2 ) != 0 ); } - shift128ExtraRightJamming( zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2 ); - return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 ); - + return roundSigAndPackFloatx80( 32, zSign, zExp, zSig0, zSig1, status); + } +#endif // End of addition for Previous + /*---------------------------------------------------------------------------- -| Returns the remainder of the quadruple-precision floating-point value `a' -| with respect to the corresponding value `b'. The operation is performed +| Returns the remainder of the extended double-precision floating-point value +| `a' with respect to the corresponding value `b'. The operation is performed | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -float128 float128_rem( float128 a, float128 b ) -{ - flag aSign, zSign; - int32 aExp, bExp, expDiff; - bits64 aSig0, aSig1, bSig0, bSig1, q, term0, term1, term2; - bits64 allZero, alternateASig0, alternateASig1, sigMean1; - sbits64 sigMean0; - float128 z; - - aSig1 = extractFloat128Frac1( a ); - aSig0 = extractFloat128Frac0( a ); - aExp = extractFloat128Exp( a ); - aSign = extractFloat128Sign( a ); - bSig1 = extractFloat128Frac1( b ); - bSig0 = extractFloat128Frac0( b ); - bExp = extractFloat128Exp( b ); -// bSign = extractFloat128Sign( b ); +#ifndef SOFTFLOAT_68K +floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status) +{ + flag aSign, zSign; + int32_t aExp, bExp, expDiff; + uint64_t aSig0, aSig1, bSig; + uint64_t q, term0, term1, alternateASig0, alternateASig1; + + if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) { + float_raise(float_flag_invalid, status); + return floatx80_default_nan(status); + } + aSig0 = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + bSig = extractFloatx80Frac( b ); + bExp = extractFloatx80Exp( b ); + if ( aExp == 0x7FFF ) { + if ( (uint64_t) ( aSig0<<1 ) + || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) { + return propagateFloatx80NaN(a, b, status); + } + goto invalid; + } + if ( bExp == 0x7FFF ) { + if ((uint64_t)(bSig << 1)) { + return propagateFloatx80NaN(a, b, status); + } + return a; + } + if ( bExp == 0 ) { + if ( bSig == 0 ) { + invalid: + float_raise(float_flag_invalid, status); + return floatx80_default_nan(status); + } + normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); + } + if ( aExp == 0 ) { + if ( (uint64_t) ( aSig0<<1 ) == 0 ) return a; + normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 ); + } + bSig |= LIT64( 0x8000000000000000 ); + zSign = aSign; + expDiff = aExp - bExp; + aSig1 = 0; + if ( expDiff < 0 ) { + if ( expDiff < -1 ) return a; + shift128Right( aSig0, 0, 1, &aSig0, &aSig1 ); + expDiff = 0; + } + q = ( bSig <= aSig0 ); + if ( q ) aSig0 -= bSig; + expDiff -= 64; + while ( 0 < expDiff ) { + q = estimateDiv128To64( aSig0, aSig1, bSig ); + q = ( 2 < q ) ? q - 2 : 0; + mul64To128( bSig, q, &term0, &term1 ); + sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 ); + shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 ); + expDiff -= 62; + } + expDiff += 64; + if ( 0 < expDiff ) { + q = estimateDiv128To64( aSig0, aSig1, bSig ); + q = ( 2 < q ) ? q - 2 : 0; + q >>= 64 - expDiff; + mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 ); + sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 ); + shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 ); + while ( le128( term0, term1, aSig0, aSig1 ) ) { + ++q; + sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 ); + } + } + else { + term1 = 0; + term0 = bSig; + } + sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 ); + if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 ) + || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 ) + && ( q & 1 ) ) + ) { + aSig0 = alternateASig0; + aSig1 = alternateASig1; + zSign = ! zSign; + } + return + normalizeRoundAndPackFloatx80( + 80, zSign, bExp + expDiff, aSig0, aSig1, status); + +} +#else // 09-01-2017: Modified version for Previous +floatx80 floatx80_rem( floatx80 a, floatx80 b, uint64_t *q, flag *s, float_status *status ) +{ + flag aSign, bSign, zSign; + int32_t aExp, bExp, expDiff; + uint64_t aSig0, aSig1, bSig; + uint64_t qTemp, term0, term1, alternateASig0, alternateASig1; + + aSig0 = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + bSig = extractFloatx80Frac( b ); + bExp = extractFloatx80Exp( b ); + bSign = extractFloatx80Sign( b ); + if ( aExp == 0x7FFF ) { - if ( ( aSig0 | aSig1 ) - || ( ( bExp == 0x7FFF ) && ( bSig0 | bSig1 ) ) ) { - return propagateFloat128NaN( a, b ); - } - goto invalid; - } - if ( bExp == 0x7FFF ) { - if ( bSig0 | bSig1 ) return propagateFloat128NaN( a, b ); - return a; - } - if ( bExp == 0 ) { - if ( ( bSig0 | bSig1 ) == 0 ) { - invalid: - float_raise( float_flag_invalid ); - z.low = float128_default_nan_low; - z.high = float128_default_nan_high; - return z; - } - normalizeFloat128Subnormal( bSig0, bSig1, &bExp, &bSig0, &bSig1 ); - } - if ( aExp == 0 ) { - if ( ( aSig0 | aSig1 ) == 0 ) return a; - normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); - } - expDiff = aExp - bExp; - if ( expDiff < -1 ) return a; - shortShift128Left( - aSig0 | LIT64( 0x0001000000000000 ), - aSig1, - 15 - ( expDiff < 0 ), - &aSig0, - &aSig1 - ); - shortShift128Left( - bSig0 | LIT64( 0x0001000000000000 ), bSig1, 15, &bSig0, &bSig1 ); - q = le128( bSig0, bSig1, aSig0, aSig1 ); - if ( q ) sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 ); - expDiff -= 64; - while ( 0 < expDiff ) { - q = estimateDiv128To64( aSig0, aSig1, bSig0 ); - q = ( 4 < q ) ? q - 4 : 0; - mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 ); - shortShift192Left( term0, term1, term2, 61, &term1, &term2, &allZero ); - shortShift128Left( aSig0, aSig1, 61, &aSig0, &allZero ); - sub128( aSig0, 0, term1, term2, &aSig0, &aSig1 ); - expDiff -= 61; - } - if ( -64 < expDiff ) { - q = estimateDiv128To64( aSig0, aSig1, bSig0 ); - q = ( 4 < q ) ? q - 4 : 0; - q >>= - expDiff; - shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 ); - expDiff += 52; - if ( expDiff < 0 ) { - shift128Right( aSig0, aSig1, - expDiff, &aSig0, &aSig1 ); - } - else { - shortShift128Left( aSig0, aSig1, expDiff, &aSig0, &aSig1 ); - } - mul128By64To192( bSig0, bSig1, q, &term0, &term1, &term2 ); - sub128( aSig0, aSig1, term1, term2, &aSig0, &aSig1 ); - } - else { - shift128Right( aSig0, aSig1, 12, &aSig0, &aSig1 ); - shift128Right( bSig0, bSig1, 12, &bSig0, &bSig1 ); - } - do { - alternateASig0 = aSig0; - alternateASig1 = aSig1; - ++q; - sub128( aSig0, aSig1, bSig0, bSig1, &aSig0, &aSig1 ); - } while ( 0 <= (sbits64) aSig0 ); - add128( - aSig0, aSig1, alternateASig0, alternateASig1, (bits64 *)&sigMean0, &sigMean1 ); - if ( ( sigMean0 < 0 ) - || ( ( ( sigMean0 | sigMean1 ) == 0 ) && ( q & 1 ) ) ) { - aSig0 = alternateASig0; - aSig1 = alternateASig1; - } - zSign = ( (sbits64) aSig0 < 0 ); - if ( zSign ) sub128( 0, 0, aSig0, aSig1, &aSig0, &aSig1 ); - return - normalizeRoundAndPackFloat128( aSign ^ zSign, bExp - 4, aSig0, aSig1 ); + if ( (uint64_t) ( aSig0<<1 ) + || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) { + return propagateFloatx80NaN( a, b, status ); + } + goto invalid; + } + if ( bExp == 0x7FFF ) { + if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b, status ); + *s = (aSign != bSign); + *q = 0; + return a; + } + if ( bExp == 0 ) { + if ( bSig == 0 ) { + invalid: + float_raise( float_flag_invalid, status ); + return floatx80_default_nan(status); + } + normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); + } + if ( aExp == 0 ) { +#ifdef SOFTFLOAT_68K + if ( aSig0 == 0 ) { + *s = (aSign != bSign); + *q = 0; + return a; + } +#else + if ( (uint64_t) ( aSig0<<1 ) == 0 ) return a; +#endif + normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 ); + } + bSig |= LIT64( 0x8000000000000000 ); + zSign = aSign; + expDiff = aExp - bExp; + *s = (aSign != bSign); + aSig1 = 0; + if ( expDiff < 0 ) { + if ( expDiff < -1 ) return a; + shift128Right( aSig0, 0, 1, &aSig0, &aSig1 ); + expDiff = 0; + } + qTemp = ( bSig <= aSig0 ); + if ( qTemp ) aSig0 -= bSig; + *q = ( expDiff > 63 ) ? 0 : ( qTemp< 63 ) ? 0 : ( qTemp<>= 64 - expDiff; + mul64To128( bSig, qTemp<<( 64 - expDiff ), &term0, &term1 ); + sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 ); + shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 ); + while ( le128( term0, term1, aSig0, aSig1 ) ) { + ++qTemp; + sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 ); + } + *q += qTemp; + } + else { + term1 = 0; + term0 = bSig; + } + sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 ); + if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 ) + || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 ) + && ( qTemp & 1 ) ) + ) { + aSig0 = alternateASig0; + aSig1 = alternateASig1; + zSign = ! zSign; + ++*q; + } + return + normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision, + zSign, bExp + expDiff, aSig0, aSig1, status ); + +} +#endif // End of modification + + +#ifdef SOFTFLOAT_68K // 08-01-2017: Added for Previous +/*---------------------------------------------------------------------------- + | Returns the modulo remainder of the extended double-precision floating-point + | value `a' with respect to the corresponding value `b'. + *----------------------------------------------------------------------------*/ + +floatx80 floatx80_mod( floatx80 a, floatx80 b, uint64_t *q, flag *s, float_status *status ) +{ + flag aSign, bSign, zSign; + int32_t aExp, bExp, expDiff; + uint64_t aSig0, aSig1, bSig; + uint64_t qTemp, term0, term1; + + aSig0 = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + bSig = extractFloatx80Frac( b ); + bExp = extractFloatx80Exp( b ); + bSign = extractFloatx80Sign( b ); + + if ( aExp == 0x7FFF ) { + if ( (uint64_t) ( aSig0<<1 ) + || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) { + return propagateFloatx80NaN( a, b, status ); + } + goto invalid; + } + if ( bExp == 0x7FFF ) { + if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b, status ); + *s = (aSign != bSign); + *q = 0; + return a; + } + if ( bExp == 0 ) { + if ( bSig == 0 ) { + invalid: + float_raise( float_flag_invalid, status ); + return floatx80_default_nan(status); + } + normalizeFloatx80Subnormal( bSig, &bExp, &bSig ); + } + if ( aExp == 0 ) { +#ifdef SOFTFLOAT_68K + if ( aSig0 == 0 ) { + *s = (aSign != bSign); + *q = 0; + return a; + } +#else + if ( (uint64_t) ( aSig0<<1 ) == 0 ) return a; +#endif + normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 ); + } + bSig |= LIT64( 0x8000000000000000 ); + zSign = aSign; + expDiff = aExp - bExp; + *s = (aSign != bSign); + aSig1 = 0; + if ( expDiff < 0 ) return a; + qTemp = ( bSig <= aSig0 ); + if ( qTemp ) aSig0 -= bSig; + *q = ( expDiff > 63 ) ? 0 : ( qTemp< 63 ) ? 0 : ( qTemp<>= 64 - expDiff; + mul64To128( bSig, qTemp<<( 64 - expDiff ), &term0, &term1 ); + sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 ); + shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 ); + while ( le128( term0, term1, aSig0, aSig1 ) ) { + ++qTemp; + sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 ); + } + *q += qTemp; + } + return + normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision, + zSign, bExp + expDiff, aSig0, aSig1, status ); + +} +#endif // end of addition for Previous -} /*---------------------------------------------------------------------------- -| Returns the square root of the quadruple-precision floating-point value `a'. -| The operation is performed according to the IEC/IEEE Standard for Binary -| Floating-Point Arithmetic. +| Returns the square root of the extended double-precision floating-point +| value `a'. The operation is performed according to the IEC/IEEE Standard +| for Binary Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -float128 float128_sqrt( float128 a ) +floatx80 floatx80_sqrt(floatx80 a, float_status *status) +{ + flag aSign; + int32_t aExp, zExp; + uint64_t aSig0, aSig1, zSig0, zSig1, doubleZSig0; + uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3; + + if (floatx80_invalid_encoding(a)) { + float_raise(float_flag_invalid, status); + return floatx80_default_nan(status); + } + aSig0 = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + if ( aExp == 0x7FFF ) { + if ((uint64_t)(aSig0 << 1)) + return propagateFloatx80NaNOneArg(a, status); + if (!aSign) return inf_clear_intbit(status) ? packFloatx80(aSign, aExp, 0) : a; + goto invalid; + } + if ( aSign ) { + if ( ( aExp | aSig0 ) == 0 ) return a; + invalid: + float_raise(float_flag_invalid, status); + return floatx80_default_nan(status); + } + if ( aExp == 0 ) { + if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 ); + normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 ); + } + zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF; + zSig0 = estimateSqrt32( aExp, aSig0>>32 ); + shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 ); + zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 ); + doubleZSig0 = zSig0<<1; + mul64To128( zSig0, zSig0, &term0, &term1 ); + sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 ); + while ( (int64_t) rem0 < 0 ) { + --zSig0; + doubleZSig0 -= 2; + add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 ); + } + zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 ); + if ( ( zSig1 & LIT64( 0x3FFFFFFFFFFFFFFF ) ) <= 5 ) { + if ( zSig1 == 0 ) zSig1 = 1; + mul64To128( doubleZSig0, zSig1, &term1, &term2 ); + sub128( rem1, 0, term1, term2, &rem1, &rem2 ); + mul64To128( zSig1, zSig1, &term2, &term3 ); + sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 ); + while ( (int64_t) rem1 < 0 ) { + --zSig1; + shortShift128Left( 0, zSig1, 1, &term2, &term3 ); + term3 |= 1; + term2 |= doubleZSig0; + add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 ); + } + zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 ); + } + shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 ); + zSig0 |= doubleZSig0; + return roundAndPackFloatx80(status->floatx80_rounding_precision, + 0, zExp, zSig0, zSig1, status); +} + + +#ifdef SOFTFLOAT_68K // 07-01-2017: Added for Previous +/*---------------------------------------------------------------------------- + | Returns the mantissa of the extended double-precision floating-point + | value `a'. + *----------------------------------------------------------------------------*/ + +floatx80 floatx80_getman( floatx80 a, float_status *status) +{ + flag aSign; + int32_t aExp; + uint64_t aSig; + + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + + if ( aExp == 0x7FFF ) { + if ( (uint64_t) ( aSig<<1 ) ) return propagateFloatx80NaNOneArg( a, status ); + float_raise( float_flag_invalid, status ); + return floatx80_default_nan(status); + } + + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 ); + normalizeFloatx80Subnormal( aSig, &aExp, &aSig ); + } + + return packFloatx80(aSign, 0x3fff, aSig); +} + +/*---------------------------------------------------------------------------- + | Returns the exponent of the extended double-precision floating-point + | value `a' as an extended double-precision value. + *----------------------------------------------------------------------------*/ + +floatx80 floatx80_getexp( floatx80 a, float_status *status) +{ + flag aSign; + int32_t aExp; + uint64_t aSig; + + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + + if ( aExp == 0x7FFF ) { + if ( (uint64_t) ( aSig<<1 ) ) return propagateFloatx80NaNOneArg( a, status ); + float_raise( float_flag_invalid, status ); + return floatx80_default_nan(status); + } + + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 ); + normalizeFloatx80Subnormal( aSig, &aExp, &aSig ); + } + + return int32_to_floatx80(aExp - 0x3FFF); +} + +/*---------------------------------------------------------------------------- + | Scales extended double-precision floating-point value in operand `a' by + | value `b'. The function truncates the value in the second operand 'b' to + | an integral value and adds that value to the exponent of the operand 'a'. + | The operation performed according to the IEC/IEEE Standard for Binary + | Floating-Point Arithmetic. + *----------------------------------------------------------------------------*/ + +floatx80 floatx80_scale(floatx80 a, floatx80 b, float_status *status) +{ + flag aSign, bSign; + int32_t aExp, bExp, shiftCount; + uint64_t aSig, bSig; + + aSig = extractFloatx80Frac(a); + aExp = extractFloatx80Exp(a); + aSign = extractFloatx80Sign(a); + bSig = extractFloatx80Frac(b); + bExp = extractFloatx80Exp(b); + bSign = extractFloatx80Sign(b); + + if ( bExp == 0x7FFF ) { + if ( (uint64_t) ( bSig<<1 ) || + ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) ) { + return propagateFloatx80NaN( a, b, status ); + } + float_raise( float_flag_invalid, status ); + return floatx80_default_nan(status); + } + if ( aExp == 0x7FFF ) { + if ( (uint64_t) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b, status ); + return a; + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloatx80( aSign, 0, 0); + if ( bExp < 0x3FFF ) return a; + normalizeFloatx80Subnormal( aSig, &aExp, &aSig ); + } + + if (bExp < 0x3FFF) { + return roundAndPackFloatx80( + status->floatx80_rounding_precision, aSign, aExp, aSig, 0, status); + } + + if ( 0x400F < bExp ) { + aExp = bSign ? -0x6001 : 0xE000; + return roundAndPackFloatx80( + status->floatx80_rounding_precision, aSign, aExp, aSig, 0, status ); + } + + shiftCount = 0x403E - bExp; + bSig >>= shiftCount; + aExp = bSign ? ( aExp - bSig ) : ( aExp + bSig ); + + return roundAndPackFloatx80( + status->floatx80_rounding_precision, aSign, aExp, aSig, 0, status); + +} + +/*----------------------------------------------------------------------------- + | Calculates the absolute value of the extended double-precision floating-point + | value `a'. The operation is performed according to the IEC/IEEE Standard + | for Binary Floating-Point Arithmetic. + *----------------------------------------------------------------------------*/ + +floatx80 floatx80_abs(floatx80 a, float_status *status) +{ + int32_t aExp; + uint64_t aSig; + + aSig = extractFloatx80Frac(a); + aExp = extractFloatx80Exp(a); + + if ( aExp == 0x7FFF ) { + if ( (uint64_t) ( aSig<<1 ) ) + return propagateFloatx80NaNOneArg( a, status ); + if (inf_clear_intbit(status)) aSig = 0; + return packFloatx80(0, aExp, aSig); + } + + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloatx80( 0, 0, 0 ); + normalizeFloatx80Subnormal( aSig, &aExp, &aSig ); + } + + return roundAndPackFloatx80( + status->floatx80_rounding_precision, 0, aExp, aSig, 0, status ); + +} + +/*----------------------------------------------------------------------------- + | Changes the sign of the extended double-precision floating-point value 'a'. + | The operation is performed according to the IEC/IEEE Standard for Binary + | Floating-Point Arithmetic. + *----------------------------------------------------------------------------*/ + +floatx80 floatx80_neg(floatx80 a, float_status *status) +{ + flag aSign; + int32_t aExp; + uint64_t aSig; + + aSig = extractFloatx80Frac(a); + aExp = extractFloatx80Exp(a); + aSign = extractFloatx80Sign(a); + + if ( aExp == 0x7FFF ) { + if ( (uint64_t) ( aSig<<1 ) ) + return propagateFloatx80NaNOneArg( a, status ); + if (inf_clear_intbit(status)) aSig = 0; + return packFloatx80(!aSign, aExp, aSig); + } + + aSign = !aSign; + + if ( aExp == 0 ) { + if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 ); + normalizeFloatx80Subnormal( aSig, &aExp, &aSig ); + } + + return roundAndPackFloatx80( + status->floatx80_rounding_precision, aSign, aExp, aSig, 0, status ); + +} + +/*---------------------------------------------------------------------------- + | Returns the result of comparing the extended double-precision floating- + | point values `a' and `b'. The result is abstracted for matching the + | corresponding condition codes. + *----------------------------------------------------------------------------*/ + +floatx80 floatx80_cmp( floatx80 a, floatx80 b, float_status *status ) +{ + flag aSign, bSign; + int32_t aExp, bExp; + uint64_t aSig, bSig; + + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + bSig = extractFloatx80Frac( b ); + bExp = extractFloatx80Exp( b ); + bSign = extractFloatx80Sign( b ); + + if ( ( aExp == 0x7FFF && (uint64_t) ( aSig<<1 ) ) || + ( bExp == 0x7FFF && (uint64_t) ( bSig<<1 ) ) ) { + // 68040 FCMP -NaN return N flag set + if (fcmp_signed_nan(status)) + return propagateFloatx80NaN(a, b, status ); + return propagateFloatx80NaN(packFloatx80(0, aExp, aSig), + packFloatx80(0, bExp, bSig), status); + } + + if ( bExp < aExp ) return packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) ); + if ( aExp < bExp ) return packFloatx80( bSign ^ 1, 0x3FFF, LIT64( 0x8000000000000000 ) ); + + if ( aExp == 0x7FFF ) { + if ( aSign == bSign ) return packFloatx80( aSign, 0, 0 ); + return packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) ); + } + + if ( bSig < aSig ) return packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) ); + if ( aSig < bSig ) return packFloatx80( bSign ^ 1, 0x3FFF, LIT64( 0x8000000000000000 ) ); + + if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 ); + + if ( aSign == bSign ) return packFloatx80( 0, 0, 0 ); + + return packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) ); + +} + +floatx80 floatx80_tst( floatx80 a, float_status *status ) +{ + int32_t aExp; + uint64_t aSig; + + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + + if ( aExp == 0x7FFF && (uint64_t) ( aSig<<1 ) ) + return propagateFloatx80NaNOneArg( a, status ); + return a; +} + +floatx80 floatx80_move( floatx80 a, float_status *status ) +{ + flag aSign; + int32_t aExp; + uint64_t aSig; + + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + + if ( aExp == 0x7FFF ) { + if ((uint64_t)(aSig << 1)) return propagateFloatx80NaNOneArg(a, status); + return inf_clear_intbit(status) ? packFloatx80(aSign, aExp, 0) : a; + } + if ( aExp == 0 ) { + if ( aSig == 0 ) return a; + return normalizeRoundAndPackFloatx80( status->floatx80_rounding_precision, aSign, aExp, aSig, 0, status ); + } + return roundAndPackFloatx80( status->floatx80_rounding_precision, aSign, aExp, aSig, 0, status ); +} + +floatx80 floatx80_denormalize( floatx80 a, flag eSign) { flag aSign; - int32 aExp, zExp; - bits64 aSig0, aSig1, zSig0, zSig1, zSig2, doubleZSig0; - bits64 rem0, rem1, rem2, rem3, term0, term1, term2, term3; - float128 z; - - aSig1 = extractFloat128Frac1( a ); - aSig0 = extractFloat128Frac0( a ); - aExp = extractFloat128Exp( a ); - aSign = extractFloat128Sign( a ); - if ( aExp == 0x7FFF ) { - if ( aSig0 | aSig1 ) return propagateFloat128NaN( a, a ); - if ( ! aSign ) return a; - goto invalid; - } - if ( aSign ) { - if ( ( aExp | aSig0 | aSig1 ) == 0 ) return a; - invalid: - float_raise( float_flag_invalid ); - z.low = float128_default_nan_low; - z.high = float128_default_nan_high; - return z; - } - if ( aExp == 0 ) { - if ( ( aSig0 | aSig1 ) == 0 ) return packFloat128( 0, 0, 0, 0 ); - normalizeFloat128Subnormal( aSig0, aSig1, &aExp, &aSig0, &aSig1 ); - } - zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFE; - aSig0 |= LIT64( 0x0001000000000000 ); - zSig0 = estimateSqrt32( aExp, aSig0>>17 ); - shortShift128Left( aSig0, aSig1, 13 - ( aExp & 1 ), &aSig0, &aSig1 ); - zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 ); - doubleZSig0 = zSig0<<1; - mul64To128( zSig0, zSig0, &term0, &term1 ); - sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 ); - while ( (sbits64) rem0 < 0 ) { - --zSig0; - doubleZSig0 -= 2; - add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 ); - } - zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 ); - if ( ( zSig1 & 0x1FFF ) <= 5 ) { - if ( zSig1 == 0 ) zSig1 = 1; - mul64To128( doubleZSig0, zSig1, &term1, &term2 ); - sub128( rem1, 0, term1, term2, &rem1, &rem2 ); - mul64To128( zSig1, zSig1, &term2, &term3 ); - sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 ); - while ( (sbits64) rem1 < 0 ) { - --zSig1; - shortShift128Left( 0, zSig1, 1, &term2, &term3 ); - term3 |= 1; - term2 |= doubleZSig0; - add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 ); + int32_t aExp; + uint64_t aSig; + int32_t shiftCount; + + aSig = extractFloatx80Frac( a ); + aExp = extractFloatx80Exp( a ); + aSign = extractFloatx80Sign( a ); + + if ( eSign ) { + shiftCount = 0x8000 - aExp; + aExp = 0; + if (shiftCount > 63) { + aSig = 0; + } else { + aSig >>= shiftCount; } - zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 ); } - shift128ExtraRightJamming( zSig0, zSig1, 0, 14, &zSig0, &zSig1, &zSig2 ); - return roundAndPackFloat128( 0, zExp, zSig0, zSig1, zSig2 ); - + return packFloatx80(aSign, aExp, aSig); } +#endif // End of addition for Previous + /*---------------------------------------------------------------------------- -| Returns 1 if the quadruple-precision floating-point value `a' is equal to -| the corresponding value `b', and 0 otherwise. The comparison is performed -| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +| Returns 1 if the extended double-precision floating-point value `a' is +| equal to the corresponding value `b', and 0 otherwise. The comparison is +| performed according to the IEC/IEEE Standard for Binary Floating-Point +| Arithmetic. *----------------------------------------------------------------------------*/ -flag float128_eq( float128 a, float128 b ) +flag floatx80_eq( floatx80 a, floatx80 b, float_status *status ) { - if ( ( ( extractFloat128Exp( a ) == 0x7FFF ) - && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) - || ( ( extractFloat128Exp( b ) == 0x7FFF ) - && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) ) + if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) + && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) + || ( ( extractFloatx80Exp( b ) == 0x7FFF ) + && (uint64_t) ( extractFloatx80Frac( b )<<1 ) ) ) { - if ( float128_is_signaling_nan( a ) - || float128_is_signaling_nan( b ) ) { - float_raise( float_flag_invalid ); + if ( floatx80_is_signaling_nan( a ) + || floatx80_is_signaling_nan( b ) ) { + float_raise( float_flag_invalid, status ); } return 0; } @@ -4769,68 +3479,68 @@ flag float128_eq( float128 a, float128 b ) ( a.low == b.low ) && ( ( a.high == b.high ) || ( ( a.low == 0 ) - && ( (bits64) ( ( a.high | b.high )<<1 ) == 0 ) ) + && ( (uint16_t) ( ( a.high | b.high )<<1 ) == 0 ) ) ); } /*---------------------------------------------------------------------------- -| Returns 1 if the quadruple-precision floating-point value `a' is less than -| or equal to the corresponding value `b', and 0 otherwise. The comparison -| is performed according to the IEC/IEEE Standard for Binary Floating-Point -| Arithmetic. +| Returns 1 if the extended double-precision floating-point value `a' is +| less than or equal to the corresponding value `b', and 0 otherwise. The +| comparison is performed according to the IEC/IEEE Standard for Binary +| Floating-Point Arithmetic. *----------------------------------------------------------------------------*/ -flag float128_le( float128 a, float128 b ) +flag floatx80_le( floatx80 a, floatx80 b, float_status *status ) { flag aSign, bSign; - if ( ( ( extractFloat128Exp( a ) == 0x7FFF ) - && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) - || ( ( extractFloat128Exp( b ) == 0x7FFF ) - && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) ) + if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) + && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) + || ( ( extractFloatx80Exp( b ) == 0x7FFF ) + && (uint64_t) ( extractFloatx80Frac( b )<<1 ) ) ) { - float_raise( float_flag_invalid ); + float_raise( float_flag_invalid, status ); return 0; } - aSign = extractFloat128Sign( a ); - bSign = extractFloat128Sign( b ); + aSign = extractFloatx80Sign( a ); + bSign = extractFloatx80Sign( b ); if ( aSign != bSign ) { return aSign - || ( ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) + || ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) == 0 ); } return aSign ? le128( b.high, b.low, a.high, a.low ) : le128( a.high, a.low, b.high, b.low ); - } /*---------------------------------------------------------------------------- -| Returns 1 if the quadruple-precision floating-point value `a' is less than -| the corresponding value `b', and 0 otherwise. The comparison is performed -| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +| Returns 1 if the extended double-precision floating-point value `a' is +| less than the corresponding value `b', and 0 otherwise. The comparison +| is performed according to the IEC/IEEE Standard for Binary Floating-Point +| Arithmetic. *----------------------------------------------------------------------------*/ -flag float128_lt( float128 a, float128 b ) +flag floatx80_lt( floatx80 a, floatx80 b, float_status *status ) { flag aSign, bSign; - if ( ( ( extractFloat128Exp( a ) == 0x7FFF ) - && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) - || ( ( extractFloat128Exp( b ) == 0x7FFF ) - && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) ) + if ( ( ( extractFloatx80Exp( a ) == 0x7FFF ) + && (uint64_t) ( extractFloatx80Frac( a )<<1 ) ) + || ( ( extractFloatx80Exp( b ) == 0x7FFF ) + && (uint64_t) ( extractFloatx80Frac( b )<<1 ) ) ) { - float_raise( float_flag_invalid ); + float_raise( float_flag_invalid, status ); return 0; } - aSign = extractFloat128Sign( a ); - bSign = extractFloat128Sign( b ); + aSign = extractFloatx80Sign( a ); + bSign = extractFloatx80Sign( b ); if ( aSign != bSign ) { return aSign - && ( ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) + && ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) != 0 ); } return @@ -4839,102 +3549,23 @@ flag float128_lt( float128 a, float128 b ) } -/*---------------------------------------------------------------------------- -| Returns 1 if the quadruple-precision floating-point value `a' is equal to -| the corresponding value `b', and 0 otherwise. The invalid exception is -| raised if either operand is a NaN. Otherwise, the comparison is performed -| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -flag float128_eq_signaling( float128 a, float128 b ) -{ - if ( ( ( extractFloat128Exp( a ) == 0x7FFF ) - && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) - || ( ( extractFloat128Exp( b ) == 0x7FFF ) - && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) ) - ) { - float_raise( float_flag_invalid ); - return 0; - } - return - ( a.low == b.low ) - && ( ( a.high == b.high ) - || ( ( a.low == 0 ) - && ( (bits64) ( ( a.high | b.high )<<1 ) == 0 ) ) - ); - -} - -/*---------------------------------------------------------------------------- -| Returns 1 if the quadruple-precision floating-point value `a' is less than -| or equal to the corresponding value `b', and 0 otherwise. Quiet NaNs do not -| cause an exception. Otherwise, the comparison is performed according to the -| IEC/IEEE Standard for Binary Floating-Point Arithmetic. -*----------------------------------------------------------------------------*/ - -flag float128_le_quiet( float128 a, float128 b ) -{ - flag aSign, bSign; - - if ( ( ( extractFloat128Exp( a ) == 0x7FFF ) - && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) - || ( ( extractFloat128Exp( b ) == 0x7FFF ) - && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) ) - ) { - if ( float128_is_signaling_nan( a ) - || float128_is_signaling_nan( b ) ) { - float_raise( float_flag_invalid ); - } - return 0; - } - aSign = extractFloat128Sign( a ); - bSign = extractFloat128Sign( b ); - if ( aSign != bSign ) { - return - aSign - || ( ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) - == 0 ); - } - return - aSign ? le128( b.high, b.low, a.high, a.low ) - : le128( a.high, a.low, b.high, b.low ); - -} /*---------------------------------------------------------------------------- -| Returns 1 if the quadruple-precision floating-point value `a' is less than -| the corresponding value `b', and 0 otherwise. Quiet NaNs do not cause an -| exception. Otherwise, the comparison is performed according to the IEC/IEEE -| Standard for Binary Floating-Point Arithmetic. +| Returns the result of converting the 64-bit two's complement integer `a' +| to the extended double-precision floating-point format. The conversion +| is performed according to the IEC/IEEE Standard for Binary Floating-Point +| Arithmetic. *----------------------------------------------------------------------------*/ -flag float128_lt_quiet( float128 a, float128 b ) +floatx80 int64_to_floatx80( int64_t a ) { - flag aSign, bSign; - - if ( ( ( extractFloat128Exp( a ) == 0x7FFF ) - && ( extractFloat128Frac0( a ) | extractFloat128Frac1( a ) ) ) - || ( ( extractFloat128Exp( b ) == 0x7FFF ) - && ( extractFloat128Frac0( b ) | extractFloat128Frac1( b ) ) ) - ) { - if ( float128_is_signaling_nan( a ) - || float128_is_signaling_nan( b ) ) { - float_raise( float_flag_invalid ); - } - return 0; - } - aSign = extractFloat128Sign( a ); - bSign = extractFloat128Sign( b ); - if ( aSign != bSign ) { - return - aSign - && ( ( ( (bits64) ( ( a.high | b.high )<<1 ) ) | a.low | b.low ) - != 0 ); - } - return - aSign ? lt128( b.high, b.low, a.high, a.low ) - : lt128( a.high, a.low, b.high, b.low ); + flag zSign; + uint64_t absA; + int8_t shiftCount; + if ( a == 0 ) return packFloatx80( 0, 0, 0 ); + zSign = ( a < 0 ); + absA = zSign ? - a : a; + shiftCount = countLeadingZeros64( absA ); + return packFloatx80( zSign, 0x403E - shiftCount, absA< +#endif + + +/* This 'flag' type must be able to hold at least 0 and 1. It should + * probably be replaced with 'bool' but the uses would need to be audited + * to check that they weren't accidentally relying on it being a larger type. + */ + +typedef uint64_t flag; +typedef uint8_t bool; -=============================================================================*/ +#define LIT64( a ) a##ULL /*---------------------------------------------------------------------------- -| The macro `FLOATX80' must be defined to enable the extended double-precision -| floating-point format `floatx80'. If this macro is not defined, the -| `floatx80' type will not be defined, and none of the functions that either -| input or output the `floatx80' type will be defined. The same applies to -| the `FLOAT128' macro and the quadruple-precision format `float128'. +| Software IEC/IEEE floating-point ordering relations *----------------------------------------------------------------------------*/ -#define FLOATX80 -#define FLOAT128 +enum { + float_relation_less = -1, + float_relation_equal = 0, + float_relation_greater = 1, + float_relation_unordered = 2 +}; /*---------------------------------------------------------------------------- | Software IEC/IEEE floating-point types. *----------------------------------------------------------------------------*/ -typedef bits32 float32; -typedef bits64 float64; -#ifdef FLOATX80 +/* Use structures for soft-float types. This prevents accidentally mixing + them with native int/float types. A sufficiently clever compiler and + sane ABI should be able to see though these structs. However + x86/gcc 3.x seems to struggle a bit, so leave them disabled by default. */ +//#define USE_SOFTFLOAT_STRUCT_TYPES +#ifdef USE_SOFTFLOAT_STRUCT_TYPES typedef struct { - bits16 high; - bits64 low; -} floatx80; + uint16_t v; +} float16; +#define float16_val(x) (((float16)(x)).v) +#define make_float16(x) __extension__ ({ float16 f16_val = {x}; f16_val; }) +#define const_float16(x) { x } +typedef struct { + uint32_t v; +} float32; +/* The cast ensures an error if the wrong type is passed. */ +#define float32_val(x) (((float32)(x)).v) +#define make_float32(x) __extension__ ({ float32 f32_val = {x}; f32_val; }) +#define const_float32(x) { x } +typedef struct { + uint64_t v; +} float64; +#define float64_val(x) (((float64)(x)).v) +#define make_float64(x) __extension__ ({ float64 f64_val = {x}; f64_val; }) +#define const_float64(x) { x } +#else +typedef uint16_t float16; +typedef uint32_t float32; +typedef uint64_t float64; +#define float16_val(x) (x) +#define float32_val(x) (x) +#define float64_val(x) (x) +#define make_float16(x) (x) +#define make_float32(x) (x) +#define make_float64(x) (x) +#define const_float16(x) (x) +#define const_float32(x) (x) +#define const_float64(x) (x) #endif -#ifdef FLOAT128 typedef struct { - bits64 high, low; -} float128; + uint16_t high; + uint64_t low; +} floatx80; +typedef struct { +#ifdef HOST_WORDS_BIGENDIAN + uint64_t high, low; +#else + uint64_t low, high; #endif - -/*---------------------------------------------------------------------------- -| Primitive arithmetic functions, including multi-word arithmetic, and -| division and square root approximations. (Can be specialized to target if -| desired.) -*----------------------------------------------------------------------------*/ -#include "softfloat-macros" +} float128; /*---------------------------------------------------------------------------- | Software IEC/IEEE floating-point underflow tininess-detection mode. *----------------------------------------------------------------------------*/ -extern int8 float_detect_tininess; enum { - float_tininess_after_rounding = 0, - float_tininess_before_rounding = 1 + float_tininess_after_rounding = 0, + float_tininess_before_rounding = 1 }; /*---------------------------------------------------------------------------- | Software IEC/IEEE floating-point rounding mode. *----------------------------------------------------------------------------*/ -extern int8 float_rounding_mode; enum { - float_round_nearest_even = 0, - float_round_to_zero = 1, - float_round_down = 2, - float_round_up = 3 + float_round_nearest_even = 0, + float_round_down = 1, + float_round_up = 2, + float_round_to_zero = 3, + float_round_ties_away = 4, }; /*---------------------------------------------------------------------------- | Software IEC/IEEE floating-point exception flags. *----------------------------------------------------------------------------*/ -extern int8 float_exception_flags; enum { - float_flag_invalid = 0x01, float_flag_denormal = 0x02, float_flag_divbyzero = 0x04, float_flag_overflow = 0x08, - float_flag_underflow = 0x10, float_flag_inexact = 0x20 + float_flag_invalid = 0x01, + float_flag_denormal = 0x02, + float_flag_divbyzero = 0x04, + float_flag_overflow = 0x08, + float_flag_underflow = 0x10, + float_flag_inexact = 0x20, + float_flag_signaling = 0x40, + float_flag_decimal = 0x80 }; /*---------------------------------------------------------------------------- -| Routine to raise any or all of the software IEC/IEEE floating-point -| exception flags. -*----------------------------------------------------------------------------*/ -void float_raise( int8 ); + | Variables for storing sign, exponent and significand of overflowed or + | underflowed extended double-precision floating-point value. + | Variables for storing sign, exponent and significand of internal extended + | double-precision floating-point value for external use. + *----------------------------------------------------------------------------*/ + +extern flag floatx80_internal_sign; +extern int32_t floatx80_internal_exp; +extern uint64_t floatx80_internal_sig; +extern int32_t floatx80_internal_exp0; +extern uint64_t floatx80_internal_sig0; +extern uint64_t floatx80_internal_sig1; +extern int8_t floatx80_internal_precision; +extern int8_t floatx80_internal_mode; + +typedef struct float_status { + signed char float_detect_tininess; + signed char float_rounding_mode; + uint8_t float_exception_flags; + signed char floatx80_rounding_precision; + /* should denormalised results go to zero and set the inexact flag? */ + flag flush_to_zero; + /* should denormalised inputs go to zero and set the input_denormal flag? */ + flag flush_inputs_to_zero; + flag default_nan_mode; + flag snan_bit_is_one; + flag floatx80_special_flags; +} float_status; /*---------------------------------------------------------------------------- -| Software IEC/IEEE integer-to-floating-point conversion routines. -*----------------------------------------------------------------------------*/ -float32 int32_to_float32( int32 ); -float64 int32_to_float64( int32 ); -#ifdef FLOATX80 -floatx80 int32_to_floatx80( int32 ); -#endif -#ifdef FLOAT128 -float128 int32_to_float128( int32 ); -#endif -float32 int64_to_float32( int64 ); -float64 int64_to_float64( int64 ); -#ifdef FLOATX80 -floatx80 int64_to_floatx80( int64 ); -#endif -#ifdef FLOAT128 -float128 int64_to_float128( int64 ); -#endif + | Function for getting sign, exponent and significand of extended + | double-precision floating-point intermediate result for external use. + *----------------------------------------------------------------------------*/ +floatx80 getFloatInternalOverflow( void ); +floatx80 getFloatInternalUnderflow( void ); +floatx80 getFloatInternalRoundedAll( void ); +floatx80 getFloatInternalRoundedSome( void ); +floatx80 getFloatInternalUnrounded( void ); +floatx80 getFloatInternalFloatx80( void ); +uint64_t getFloatInternalGRS( void ); + +static inline void set_float_detect_tininess(int val, float_status *status) +{ + status->float_detect_tininess = val; +} +static inline void set_float_rounding_mode(int val, float_status *status) +{ + status->float_rounding_mode = val; +} +static inline void set_float_exception_flags(int val, float_status *status) +{ + status->float_exception_flags = val; +} +static inline void set_floatx80_rounding_precision(int val, + float_status *status) +{ + status->floatx80_rounding_precision = val; +} +static inline void set_flush_to_zero(flag val, float_status *status) +{ + status->flush_to_zero = val; +} +static inline void set_flush_inputs_to_zero(flag val, float_status *status) +{ + status->flush_inputs_to_zero = val; +} +static inline void set_default_nan_mode(flag val, float_status *status) +{ + status->default_nan_mode = val; +} +static inline void set_snan_bit_is_one(flag val, float_status *status) +{ + status->snan_bit_is_one = val; +} +static inline int get_float_detect_tininess(float_status *status) +{ + return status->float_detect_tininess; +} +static inline int get_float_rounding_mode(float_status *status) +{ + return status->float_rounding_mode; +} +static inline int get_float_exception_flags(float_status *status) +{ + return status->float_exception_flags; +} +static inline int get_floatx80_rounding_precision(float_status *status) +{ + return status->floatx80_rounding_precision; +} +static inline flag get_flush_to_zero(float_status *status) +{ + return status->flush_to_zero; +} +static inline flag get_flush_inputs_to_zero(float_status *status) +{ + return status->flush_inputs_to_zero; +} +static inline flag get_default_nan_mode(float_status *status) +{ + return status->default_nan_mode; +} /*---------------------------------------------------------------------------- -| Software IEC/IEEE single-precision conversion routines. +| Special flags for indicating some unique behavior is required. *----------------------------------------------------------------------------*/ -int32 float32_to_int32( float32 ); -int32 float32_to_int32_round_to_zero( float32 ); -int64 float32_to_int64( float32 ); -int64 float32_to_int64_round_to_zero( float32 ); -float64 float32_to_float64( float32 ); -#ifdef FLOATX80 -floatx80 float32_to_floatx80( float32 ); -#endif -#ifdef FLOAT128 -float128 float32_to_float128( float32 ); -#endif +enum { + cmp_signed_nan = 0x01, addsub_swap_inf = 0x02, infinity_clear_intbit = 0x04 +}; -/*---------------------------------------------------------------------------- -| Software IEC/IEEE single-precision operations. -*----------------------------------------------------------------------------*/ -float32 float32_round_to_int( float32 ); -float32 float32_add( float32, float32 ); -float32 float32_sub( float32, float32 ); -float32 float32_mul( float32, float32 ); -float32 float32_div( float32, float32 ); -float32 float32_rem( float32, float32 ); -float32 float32_sqrt( float32 ); -flag float32_eq( float32, float32 ); -flag float32_le( float32, float32 ); -flag float32_lt( float32, float32 ); -flag float32_eq_signaling( float32, float32 ); -flag float32_le_quiet( float32, float32 ); -flag float32_lt_quiet( float32, float32 ); -flag float32_is_signaling_nan( float32 ); +static inline void set_special_flags(uint8_t flags, float_status *status) +{ + status->floatx80_special_flags = flags; +} +static inline int8_t fcmp_signed_nan(float_status *status) +{ + return status->floatx80_special_flags & cmp_signed_nan; +} +static inline int8_t faddsub_swap_inf(float_status *status) +{ + return status->floatx80_special_flags & addsub_swap_inf; +} +static inline int8_t inf_clear_intbit(float_status *status) +{ + return status->floatx80_special_flags & infinity_clear_intbit; +} /*---------------------------------------------------------------------------- -| Software IEC/IEEE double-precision conversion routines. +| Routine to raise any or all of the software IEC/IEEE floating-point +| exception flags. *----------------------------------------------------------------------------*/ -int32 float64_to_int32( float64 ); -int32 float64_to_int32_round_to_zero( float64 ); -int64 float64_to_int64( float64 ); -int64 float64_to_int64_round_to_zero( float64 ); -float32 float64_to_float32( float64 ); -#ifdef FLOATX80 -floatx80 float64_to_floatx80( float64 ); -#endif -#ifdef FLOAT128 -float128 float64_to_float128( float64 ); -#endif +void float_raise(uint8_t flags, float_status *status); -/*---------------------------------------------------------------------------- -| Software IEC/IEEE double-precision operations. -*----------------------------------------------------------------------------*/ -float64 float64_round_to_int( float64 ); -float64 float64_add( float64, float64 ); -float64 float64_sub( float64, float64 ); -float64 float64_mul( float64, float64 ); -float64 float64_div( float64, float64 ); -float64 float64_rem( float64, float64 ); -float64 float64_sqrt( float64 ); -flag float64_eq( float64, float64 ); -flag float64_le( float64, float64 ); -flag float64_lt( float64, float64 ); -flag float64_eq_signaling( float64, float64 ); -flag float64_le_quiet( float64, float64 ); -flag float64_lt_quiet( float64, float64 ); -flag float64_is_signaling_nan( float64 ); - -#ifdef FLOATX80 /*---------------------------------------------------------------------------- -| Software IEC/IEEE extended double-precision conversion routines. -*----------------------------------------------------------------------------*/ -int32 floatx80_to_int32( floatx80 ); -int32 floatx80_to_int32_round_to_zero( floatx80 ); -int64 floatx80_to_int64( floatx80 ); -int64 floatx80_to_int64_round_to_zero( floatx80 ); -float32 floatx80_to_float32( floatx80 ); -float64 floatx80_to_float64( floatx80 ); -#ifdef FLOAT128 -float128 floatx80_to_float128( floatx80 ); -#endif -floatx80 floatx80_scale(floatx80 a, floatx80 b); + | The pattern for a default generated single-precision NaN. + *----------------------------------------------------------------------------*/ +#define float32_default_nan 0x7FFFFFFF /*---------------------------------------------------------------------------- -| Packs the sign `zSign', exponent `zExp', and significand `zSig' into an -| extended double-precision floating-point value, returning the result. -*----------------------------------------------------------------------------*/ + | The pattern for a default generated double-precision NaN. + *----------------------------------------------------------------------------*/ +#define float64_default_nan LIT64( 0x7FFFFFFFFFFFFFFF ) -static inline floatx80 packFloatx80( flag zSign, int32 zExp, bits64 zSig ) -{ - floatx80 z; - - z.low = zSig; - z.high = ( ( (bits16) zSign )<<15 ) + zExp; - return z; +/*---------------------------------------------------------------------------- + | The pattern for a default generated extended double-precision NaN. The + | `high' and `low' values hold the most- and least-significant bits, + | respectively. + *----------------------------------------------------------------------------*/ +#define floatx80_default_nan_high 0x7FFF +#define floatx80_default_nan_low LIT64( 0xFFFFFFFFFFFFFFFF ) -} +/*---------------------------------------------------------------------------- + | The pattern for a default generated extended double-precision infinity. + *----------------------------------------------------------------------------*/ +#define floatx80_default_infinity_low LIT64( 0x0000000000000000 ) /*---------------------------------------------------------------------------- -| Software IEC/IEEE extended double-precision rounding precision. Valid -| values are 32, 64, and 80. +| If `a' is denormal and we are in flush-to-zero mode then set the +| input-denormal exception and return zero. Otherwise just return the value. *----------------------------------------------------------------------------*/ -extern int8 floatx80_rounding_precision; +float64 float64_squash_input_denormal(float64 a, float_status *status); /*---------------------------------------------------------------------------- -| Software IEC/IEEE extended double-precision operations. +| Options to indicate which negations to perform in float*_muladd() +| Using these differs from negating an input or output before calling +| the muladd function in that this means that a NaN doesn't have its +| sign bit inverted before it is propagated. +| We also support halving the result before rounding, as a special +| case to support the ARM fused-sqrt-step instruction FRSQRTS. *----------------------------------------------------------------------------*/ -floatx80 floatx80_round_to_int( floatx80 ); -floatx80 floatx80_add( floatx80, floatx80 ); -floatx80 floatx80_sub( floatx80, floatx80 ); -floatx80 floatx80_mul( floatx80, floatx80 ); -floatx80 floatx80_div( floatx80, floatx80 ); -floatx80 floatx80_rem( floatx80, floatx80 ); -floatx80 floatx80_sqrt( floatx80 ); -flag floatx80_eq( floatx80, floatx80 ); -flag floatx80_le( floatx80, floatx80 ); -flag floatx80_lt( floatx80, floatx80 ); -flag floatx80_eq_signaling( floatx80, floatx80 ); -flag floatx80_le_quiet( floatx80, floatx80 ); -flag floatx80_lt_quiet( floatx80, floatx80 ); -flag floatx80_is_signaling_nan( floatx80 ); - -int floatx80_fsin(floatx80 *a); -int floatx80_fcos(floatx80 *a); -int floatx80_ftan(floatx80 *a); - -floatx80 floatx80_flognp1(floatx80 a); -floatx80 floatx80_flogn(floatx80 a); -floatx80 floatx80_flog2(floatx80 a); -floatx80 floatx80_flog10(floatx80 a); - -// roundAndPackFloatx80 used to be in softfloat-round-pack, is now in softfloat.c -floatx80 roundAndPackFloatx80(int8 roundingPrecision, flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1); +enum { + float_muladd_negate_c = 1, + float_muladd_negate_product = 2, + float_muladd_negate_result = 4, + float_muladd_halve_result = 8, +}; -#endif +/*---------------------------------------------------------------------------- +| Software IEC/IEEE integer-to-floating-point conversion routines. +*----------------------------------------------------------------------------*/ -#ifdef FLOAT128 +floatx80 int32_to_floatx80(int32_t); +floatx80 int64_to_floatx80(int64_t); /*---------------------------------------------------------------------------- -| Software IEC/IEEE quadruple-precision conversion routines. +| Software IEC/IEEE single-precision conversion routines. *----------------------------------------------------------------------------*/ -int32 float128_to_int32( float128 ); -int32 float128_to_int32_round_to_zero( float128 ); -int64 float128_to_int64( float128 ); -int64 float128_to_int64_round_to_zero( float128 ); -float32 float128_to_float32( float128 ); -float64 float128_to_float64( float128 ); -#ifdef FLOATX80 -floatx80 float128_to_floatx80( float128 ); -#endif +floatx80 float32_to_floatx80(float32, float_status *status); +floatx80 float32_to_floatx80_allowunnormal(float32, float_status *status); /*---------------------------------------------------------------------------- -| Software IEC/IEEE quadruple-precision operations. +| Software IEC/IEEE double-precision conversion routines. *----------------------------------------------------------------------------*/ -float128 float128_round_to_int( float128 ); -float128 float128_add( float128, float128 ); -float128 float128_sub( float128, float128 ); -float128 float128_mul( float128, float128 ); -float128 float128_div( float128, float128 ); -float128 float128_rem( float128, float128 ); -float128 float128_sqrt( float128 ); -flag float128_eq( float128, float128 ); -flag float128_le( float128, float128 ); -flag float128_lt( float128, float128 ); -flag float128_eq_signaling( float128, float128 ); -flag float128_le_quiet( float128, float128 ); -flag float128_lt_quiet( float128, float128 ); -flag float128_is_signaling_nan( float128 ); +floatx80 float64_to_floatx80(float64, float_status *status); + +floatx80 float64_to_floatx80_allowunnormal( float64 a, float_status *status ); /*---------------------------------------------------------------------------- -| Packs the sign `zSign', the exponent `zExp', and the significand formed -| by the concatenation of `zSig0' and `zSig1' into a quadruple-precision -| floating-point value, returning the result. After being shifted into the -| proper positions, the three fields `zSign', `zExp', and `zSig0' are simply -| added together to form the most significant 32 bits of the result. This -| means that any integer portion of `zSig0' will be added into the exponent. -| Since a properly normalized significand will have an integer portion equal -| to 1, the `zExp' input should be 1 less than the desired result exponent -| whenever `zSig0' and `zSig1' concatenated form a complete, normalized -| significand. +| Software IEC/IEEE extended double-precision conversion routines. *----------------------------------------------------------------------------*/ +int32_t floatx80_to_int32(floatx80, float_status *status); +#ifdef SOFTFLOAT_68K +int16_t floatx80_to_int16(floatx80, float_status *status); +int8_t floatx80_to_int8(floatx80, float_status *status); +#endif +int32_t floatx80_to_int32_round_to_zero(floatx80, float_status *status); +int64_t floatx80_to_int64(floatx80, float_status *status); +float32 floatx80_to_float32(floatx80, float_status *status); +float64 floatx80_to_float64(floatx80, float_status *status); +#ifdef SOFTFLOAT_68K +floatx80 floatx80_to_floatx80( floatx80, float_status *status); +floatx80 floatdecimal_to_floatx80(floatx80, float_status *status); +floatx80 floatx80_to_floatdecimal(floatx80, int32_t*, float_status *status); +#endif -static inline float128 - packFloat128( flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 ) -{ - float128 z; - - z.low = zSig1; - z.high = ( ( (bits64) zSign )<<63 ) + ( ( (bits64) zExp )<<48 ) + zSig0; - return z; +uint64_t extractFloatx80Frac( floatx80 a ); +int32_t extractFloatx80Exp( floatx80 a ); +flag extractFloatx80Sign( floatx80 a ); + +floatx80 floatx80_round_to_int_toward_zero( floatx80 a, float_status *status); +floatx80 floatx80_round_to_float32( floatx80, float_status *status ); +floatx80 floatx80_round_to_float64( floatx80, float_status *status ); +floatx80 floatx80_round32( floatx80, float_status *status); +floatx80 floatx80_round64( floatx80, float_status *status); + +flag floatx80_eq( floatx80, floatx80, float_status *status); +flag floatx80_le( floatx80, floatx80, float_status *status); +flag floatx80_lt( floatx80, floatx80, float_status *status); + +#ifdef SOFTFLOAT_68K +// functions are in softfloat.c +floatx80 floatx80_move( floatx80 a, float_status *status ); +floatx80 floatx80_abs( floatx80 a, float_status *status ); +floatx80 floatx80_neg( floatx80 a, float_status *status ); +floatx80 floatx80_getexp( floatx80 a, float_status *status ); +floatx80 floatx80_getman( floatx80 a, float_status *status ); +floatx80 floatx80_scale(floatx80 a, floatx80 b, float_status *status ); +floatx80 floatx80_rem( floatx80 a, floatx80 b, uint64_t *q, flag *s, float_status *status ); +floatx80 floatx80_mod( floatx80 a, floatx80 b, uint64_t *q, flag *s, float_status *status ); +floatx80 floatx80_sglmul( floatx80 a, floatx80 b, float_status *status ); +floatx80 floatx80_sgldiv( floatx80 a, floatx80 b, float_status *status ); +floatx80 floatx80_cmp( floatx80 a, floatx80 b, float_status *status ); +floatx80 floatx80_tst( floatx80 a, float_status *status ); + +// functions are in softfloat_fpsp.c +floatx80 floatx80_acos(floatx80 a, float_status *status); +floatx80 floatx80_asin(floatx80 a, float_status *status); +floatx80 floatx80_atan(floatx80 a, float_status *status); +floatx80 floatx80_atanh(floatx80 a, float_status *status); +floatx80 floatx80_cos(floatx80 a, float_status *status); +floatx80 floatx80_cosh(floatx80 a, float_status *status); +floatx80 floatx80_etox(floatx80 a, float_status *status); +floatx80 floatx80_etoxm1(floatx80 a, float_status *status); +floatx80 floatx80_log10(floatx80 a, float_status *status); +floatx80 floatx80_log2(floatx80 a, float_status *status); +floatx80 floatx80_logn(floatx80 a, float_status *status); +floatx80 floatx80_lognp1(floatx80 a, float_status *status); +floatx80 floatx80_sin(floatx80 a, float_status *status); +floatx80 floatx80_sinh(floatx80 a, float_status *status); +floatx80 floatx80_tan(floatx80 a, float_status *status); +floatx80 floatx80_tanh(floatx80 a, float_status *status); +floatx80 floatx80_tentox(floatx80 a, float_status *status); +floatx80 floatx80_twotox(floatx80 a, float_status *status); +#endif -} +// functions originally internal to softfloat.c +void normalizeFloatx80Subnormal( uint64_t aSig, int32_t *zExpPtr, uint64_t *zSigPtr ); +floatx80 packFloatx80( flag zSign, int32_t zExp, uint64_t zSig ); +floatx80 roundAndPackFloatx80(int8_t roundingPrecision, flag zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1, float_status *status); /*---------------------------------------------------------------------------- -| Takes an abstract floating-point value having sign `zSign', exponent `zExp', -| and extended significand formed by the concatenation of `zSig0', `zSig1', -| and `zSig2', and returns the proper quadruple-precision floating-point value -| corresponding to the abstract input. Ordinarily, the abstract value is -| simply rounded and packed into the quadruple-precision format, with the -| inexact exception raised if the abstract input cannot be represented -| exactly. However, if the abstract value is too large, the overflow and -| inexact exceptions are raised and an infinity or maximal finite value is -| returned. If the abstract value is too small, the input value is rounded to -| a subnormal number, and the underflow and inexact exceptions are raised if -| the abstract input cannot be represented exactly as a subnormal quadruple- -| precision floating-point number. -| The input significand must be normalized or smaller. If the input -| significand is not normalized, `zExp' must be 0; in that case, the result -| returned is a subnormal number, and it must not require rounding. In the -| usual case that the input significand is normalized, `zExp' must be 1 less -| than the ``true'' floating-point exponent. The handling of underflow and -| overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic. +| Software IEC/IEEE extended double-precision operations. *----------------------------------------------------------------------------*/ - -static inline float128 - roundAndPackFloat128( - flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1, bits64 zSig2 ) +floatx80 floatx80_round_to_int(floatx80, float_status *status); +floatx80 floatx80_add(floatx80, floatx80, float_status *status); +floatx80 floatx80_sub(floatx80, floatx80, float_status *status); +floatx80 floatx80_mul(floatx80, floatx80, float_status *status); +floatx80 floatx80_div(floatx80, floatx80, float_status *status); +floatx80 floatx80_sqrt(floatx80, float_status *status); +floatx80 floatx80_normalize(floatx80); +floatx80 floatx80_denormalize(floatx80, flag); + +static inline int floatx80_is_zero_or_denormal(floatx80 a) { - int8 roundingMode; - flag roundNearestEven, increment, isTiny; - - roundingMode = float_rounding_mode; - roundNearestEven = ( roundingMode == float_round_nearest_even ); - increment = ( (sbits64) zSig2 < 0 ); - if ( ! roundNearestEven ) { - if ( roundingMode == float_round_to_zero ) { - increment = 0; - } - else { - if ( zSign ) { - increment = ( roundingMode == float_round_down ) && zSig2; - } - else { - increment = ( roundingMode == float_round_up ) && zSig2; - } - } - } - if ( 0x7FFD <= (bits32) zExp ) { - if ( ( 0x7FFD < zExp ) - || ( ( zExp == 0x7FFD ) - && eq128( - LIT64( 0x0001FFFFFFFFFFFF ), - LIT64( 0xFFFFFFFFFFFFFFFF ), - zSig0, - zSig1 - ) - && increment - ) - ) { - float_raise( float_flag_overflow | float_flag_inexact ); - if ( ( roundingMode == float_round_to_zero ) - || ( zSign && ( roundingMode == float_round_up ) ) - || ( ! zSign && ( roundingMode == float_round_down ) ) - ) { - return - packFloat128( - zSign, - 0x7FFE, - LIT64( 0x0000FFFFFFFFFFFF ), - LIT64( 0xFFFFFFFFFFFFFFFF ) - ); - } - return packFloat128( zSign, 0x7FFF, 0, 0 ); - } - if ( zExp < 0 ) { - isTiny = - ( float_detect_tininess == float_tininess_before_rounding ) - || ( zExp < -1 ) - || ! increment - || lt128( - zSig0, - zSig1, - LIT64( 0x0001FFFFFFFFFFFF ), - LIT64( 0xFFFFFFFFFFFFFFFF ) - ); - shift128ExtraRightJamming( - zSig0, zSig1, zSig2, - zExp, &zSig0, &zSig1, &zSig2 ); - zExp = 0; - if ( isTiny && zSig2 ) float_raise( float_flag_underflow ); - if ( roundNearestEven ) { - increment = ( (sbits64) zSig2 < 0 ); - } - else { - if ( zSign ) { - increment = ( roundingMode == float_round_down ) && zSig2; - } - else { - increment = ( roundingMode == float_round_up ) && zSig2; - } - } - } - } - if ( zSig2 ) float_exception_flags |= float_flag_inexact; - if ( increment ) { - add128( zSig0, zSig1, 0, 1, &zSig0, &zSig1 ); - zSig1 &= ~ ( ( zSig2 + zSig2 == 0 ) & roundNearestEven ); - } - else { - if ( ( zSig0 | zSig1 ) == 0 ) zExp = 0; - } - return packFloat128( zSign, zExp, zSig0, zSig1 ); + return (a.high & 0x7fff) == 0; +} +static inline int floatx80_is_any_nan(floatx80 a) +{ + return ((a.high & 0x7fff) == 0x7fff) && (a.low<<1); } /*---------------------------------------------------------------------------- -| Takes an abstract floating-point value having sign `zSign', exponent `zExp', -| and significand formed by the concatenation of `zSig0' and `zSig1', and -| returns the proper quadruple-precision floating-point value corresponding -| to the abstract input. This routine is just like `roundAndPackFloat128' -| except that the input significand has fewer bits and does not have to be -| normalized. In all cases, `zExp' must be 1 less than the ``true'' floating- -| point exponent. +| Return whether the given value is an invalid floatx80 encoding. +| Invalid floatx80 encodings arise when the integer bit is not set, but +| the exponent is not zero. The only times the integer bit is permitted to +| be zero is in subnormal numbers and the value zero. +| This includes what the Intel software developer's manual calls pseudo-NaNs, +| pseudo-infinities and un-normal numbers. It does not include +| pseudo-denormals, which must still be correctly handled as inputs even +| if they are never generated as outputs. *----------------------------------------------------------------------------*/ - -static inline float128 - normalizeRoundAndPackFloat128( - flag zSign, int32 zExp, bits64 zSig0, bits64 zSig1 ) +static inline bool floatx80_invalid_encoding(floatx80 a) { - int8 shiftCount; - bits64 zSig2; - - if ( zSig0 == 0 ) { - zSig0 = zSig1; - zSig1 = 0; - zExp -= 64; - } - shiftCount = countLeadingZeros64( zSig0 ) - 15; - if ( 0 <= shiftCount ) { - zSig2 = 0; - shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 ); - } - else { - shift128ExtraRightJamming( - zSig0, zSig1, 0, - shiftCount, &zSig0, &zSig1, &zSig2 ); - } - zExp -= shiftCount; - return roundAndPackFloat128( zSign, zExp, zSig0, zSig1, zSig2 ); - + return (a.low & (1ULL << 63)) == 0 && (a.high & 0x7FFF) != 0 && (a.high & 0x7FFF) != 0x7FFF; } -#endif + +#define floatx80_zero make_floatx80(0x0000, 0x0000000000000000LL) +#define floatx80_one make_floatx80(0x3fff, 0x8000000000000000LL) +#define floatx80_ln2 make_floatx80(0x3ffe, 0xb17217f7d1cf79acLL) +#define floatx80_pi make_floatx80(0x4000, 0xc90fdaa22168c235LL) +#define floatx80_half make_floatx80(0x3ffe, 0x8000000000000000LL) +#define floatx80_infinity make_floatx80(0x7fff, 0x8000000000000000LL) + +#endif /* SOFTFLOAT_H */ diff --git a/softfloat/softfloat_fpsp.c b/softfloat/softfloat_fpsp.c new file mode 100644 index 0000000..61dcd10 --- /dev/null +++ b/softfloat/softfloat_fpsp.c @@ -0,0 +1,2154 @@ + +/*============================================================================ + + This C source file is an extension to the SoftFloat IEC/IEEE Floating-point + Arithmetic Package, Release 2a. + + Written by Andreas Grabher for Previous, NeXT Computer Emulator. + +=============================================================================*/ + +#include +#include + +#include "softfloat.h" +#include "softfloat-specialize.h" +#include "softfloat_fpsp_tables.h" + + +/*---------------------------------------------------------------------------- +| Algorithms for transcendental functions supported by MC68881 and MC68882 +| mathematical coprocessors. The functions are derived from FPSP library. +*----------------------------------------------------------------------------*/ + +#define pi_sig LIT64(0xc90fdaa22168c235) +#define pi_sig0 LIT64(0xc90fdaa22168c234) +#define pi_sig1 LIT64(0xc4c6628b80dc1cd1) + +#define pi_exp 0x4000 +#define piby2_exp 0x3FFF +#define piby4_exp 0x3FFE + +#define one_exp 0x3FFF +#define one_sig LIT64(0x8000000000000000) + +#define SET_PREC \ + int8_t user_rnd_mode, user_rnd_prec; \ + user_rnd_mode = status->float_rounding_mode; \ + user_rnd_prec = status->floatx80_rounding_precision; \ + status->float_rounding_mode = float_round_nearest_even; \ + status->floatx80_rounding_precision = 80 + +#define RESET_PREC \ + status->float_rounding_mode = user_rnd_mode; \ + status->floatx80_rounding_precision = user_rnd_prec + +/*---------------------------------------------------------------------------- + | Function for compactifying extended double-precision floating point values. + *----------------------------------------------------------------------------*/ + +static int32_t floatx80_make_compact(int32_t aExp, uint64_t aSig) +{ + return (aExp<<16)|(aSig>>48); +} + + +/*---------------------------------------------------------------------------- + | Arc cosine + *----------------------------------------------------------------------------*/ + +floatx80 floatx80_acos(floatx80 a, float_status *status) +{ + flag aSign; + int32_t aExp; + uint64_t aSig; + + + int32_t compact; + floatx80 fp0, fp1, one; + + aSig = extractFloatx80Frac(a); + aExp = extractFloatx80Exp(a); + aSign = extractFloatx80Sign(a); + + if (aExp == 0x7FFF && (uint64_t) (aSig<<1)) { + return propagateFloatx80NaNOneArg(a, status); + } + if (aExp == 0 && aSig == 0) { + float_raise(float_flag_inexact, status); + return roundAndPackFloatx80(status->floatx80_rounding_precision, 0, piby2_exp, pi_sig, 0, status); + } + + compact = floatx80_make_compact(aExp, aSig); + + if (compact >= 0x3FFF8000) { // |X| >= 1 + if (aExp == one_exp && aSig == one_sig) { // |X| == 1 + if (aSign) { // X == -1 + a = packFloatx80(0, pi_exp, pi_sig); + float_raise(float_flag_inexact, status); + return floatx80_move(a, status); + } else { // X == +1 + return packFloatx80(0, 0, 0); + } + } else { // |X| > 1 + float_raise(float_flag_invalid, status); + return floatx80_default_nan(status); + } + } // |X| < 1 + + SET_PREC; + + one = packFloatx80(0, one_exp, one_sig); + fp0 = a; + + fp1 = floatx80_add(one, fp0, status); // 1 + X + fp0 = floatx80_sub(one, fp0, status); // 1 - X + fp0 = floatx80_div(fp0, fp1, status); // (1-X)/(1+X) + fp0 = floatx80_sqrt(fp0, status); // SQRT((1-X)/(1+X)) + fp0 = floatx80_atan(fp0, status); // ATAN(SQRT((1-X)/(1+X))) + + RESET_PREC; + + a = floatx80_add(fp0, fp0, status); // 2 * ATAN(SQRT((1-X)/(1+X))) + + float_raise(float_flag_inexact, status); + + return a; +} + +/*---------------------------------------------------------------------------- + | Arc sine + *----------------------------------------------------------------------------*/ + +floatx80 floatx80_asin(floatx80 a, float_status *status) +{ + flag aSign; + int32_t aExp; + uint64_t aSig; + + int32_t compact; + floatx80 fp0, fp1, fp2, one; + + aSig = extractFloatx80Frac(a); + aExp = extractFloatx80Exp(a); + aSign = extractFloatx80Sign(a); + + if (aExp == 0x7FFF && (uint64_t) (aSig<<1)) { + return propagateFloatx80NaNOneArg(a, status); + } + + if (aExp == 0 && aSig == 0) { + return packFloatx80(aSign, 0, 0); + } + + compact = floatx80_make_compact(aExp, aSig); + + if (compact >= 0x3FFF8000) { // |X| >= 1 + if (aExp == one_exp && aSig == one_sig) { // |X| == 1 + float_raise(float_flag_inexact, status); + a = packFloatx80(aSign, piby2_exp, pi_sig); + return floatx80_move(a, status); + } else { // |X| > 1 + float_raise(float_flag_invalid, status); + return floatx80_default_nan(status); + } + + } // |X| < 1 + + SET_PREC; + + one = packFloatx80(0, one_exp, one_sig); + fp0 = a; + + fp1 = floatx80_sub(one, fp0, status); // 1 - X + fp2 = floatx80_add(one, fp0, status); // 1 + X + fp1 = floatx80_mul(fp2, fp1, status); // (1+X)*(1-X) + fp1 = floatx80_sqrt(fp1, status); // SQRT((1+X)*(1-X)) + fp0 = floatx80_div(fp0, fp1, status); // X/SQRT((1+X)*(1-X)) + + RESET_PREC; + + a = floatx80_atan(fp0, status); // ATAN(X/SQRT((1+X)*(1-X))) + + float_raise(float_flag_inexact, status); + + return a; +} + +/*---------------------------------------------------------------------------- + | Arc tangent + *----------------------------------------------------------------------------*/ + +floatx80 floatx80_atan(floatx80 a, float_status *status) +{ + flag aSign; + int32_t aExp; + uint64_t aSig; + + int32_t compact, tbl_index; + floatx80 fp0, fp1, fp2, fp3, xsave; + + aSig = extractFloatx80Frac(a); + aExp = extractFloatx80Exp(a); + aSign = extractFloatx80Sign(a); + + if (aExp == 0x7FFF) { + if ((uint64_t) (aSig<<1)) return propagateFloatx80NaNOneArg(a, status); + a = packFloatx80(aSign, piby2_exp, pi_sig); + float_raise(float_flag_inexact, status); + return floatx80_move(a, status); + } + + if (aExp == 0 && aSig == 0) { + return packFloatx80(aSign, 0, 0); + } + + compact = floatx80_make_compact(aExp, aSig); + + SET_PREC; + + if (compact < 0x3FFB8000 || compact > 0x4002FFFF) { // |X| >= 16 or |X| < 1/16 + if (compact > 0x3FFF8000) { // |X| >= 16 + if (compact > 0x40638000) { // |X| > 2^(100) + fp0 = packFloatx80(aSign, piby2_exp, pi_sig); + fp1 = packFloatx80(aSign, 0x0001, one_sig); + + RESET_PREC; + + a = floatx80_sub(fp0, fp1, status); + + float_raise(float_flag_inexact, status); + + return a; + } else { + fp0 = a; + fp1 = packFloatx80(1, one_exp, one_sig); // -1 + fp1 = floatx80_div(fp1, fp0, status); // X' = -1/X + xsave = fp1; + fp0 = floatx80_mul(fp1, fp1, status); // Y = X'*X' + fp1 = floatx80_mul(fp0, fp0, status); // Z = Y*Y + fp3 = float64_to_floatx80(LIT64(0xBFB70BF398539E6A), status); // C5 + fp2 = float64_to_floatx80(LIT64(0x3FBC7187962D1D7D), status); // C4 + fp3 = floatx80_mul(fp3, fp1, status); // Z*C5 + fp2 = floatx80_mul(fp2, fp1, status); // Z*C4 + fp3 = floatx80_add(fp3, float64_to_floatx80(LIT64(0xBFC24924827107B8), status), status); // C3+Z*C5 + fp2 = floatx80_add(fp2, float64_to_floatx80(LIT64(0x3FC999999996263E), status), status); // C2+Z*C4 + fp1 = floatx80_mul(fp1, fp3, status); // Z*(C3+Z*C5) + fp2 = floatx80_mul(fp2, fp0, status); // Y*(C2+Z*C4) + fp1 = floatx80_add(fp1, float64_to_floatx80(LIT64(0xBFD5555555555536), status), status); // C1+Z*(C3+Z*C5) + fp0 = floatx80_mul(fp0, xsave, status); // X'*Y + fp1 = floatx80_add(fp1, fp2, status); // [Y*(C2+Z*C4)]+[C1+Z*(C3+Z*C5)] + fp0 = floatx80_mul(fp0, fp1, status); // X'*Y*([B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))]) ?? + fp0 = floatx80_add(fp0, xsave, status); + fp1 = packFloatx80(aSign, piby2_exp, pi_sig); + + RESET_PREC; + + a = floatx80_add(fp0, fp1, status); + + float_raise(float_flag_inexact, status); + + return a; + } + } else { // |X| < 1/16 + if (compact < 0x3FD78000) { // |X| < 2^(-40) + RESET_PREC; + + a = floatx80_move(a, status); + + float_raise(float_flag_inexact, status); + + return a; + } else { + fp0 = a; + xsave = a; + fp0 = floatx80_mul(fp0, fp0, status); // Y = X*X + fp1 = floatx80_mul(fp0, fp0, status); // Z = Y*Y + fp2 = float64_to_floatx80(LIT64(0x3FB344447F876989), status); // B6 + fp3 = float64_to_floatx80(LIT64(0xBFB744EE7FAF45DB), status); // B5 + fp2 = floatx80_mul(fp2, fp1, status); // Z*B6 + fp3 = floatx80_mul(fp3, fp1, status); // Z*B5 + fp2 = floatx80_add(fp2, float64_to_floatx80(LIT64(0x3FBC71C646940220), status), status); // B4+Z*B6 + fp3 = floatx80_add(fp3, float64_to_floatx80(LIT64(0xBFC24924921872F9), status), status); // B3+Z*B5 + fp2 = floatx80_mul(fp2, fp1, status); // Z*(B4+Z*B6) + fp1 = floatx80_mul(fp1, fp3, status); // Z*(B3+Z*B5) + fp2 = floatx80_add(fp2, float64_to_floatx80(LIT64(0x3FC9999999998FA9), status), status); // B2+Z*(B4+Z*B6) + fp1 = floatx80_add(fp1, float64_to_floatx80(LIT64(0xBFD5555555555555), status), status); // B1+Z*(B3+Z*B5) + fp2 = floatx80_mul(fp2, fp0, status); // Y*(B2+Z*(B4+Z*B6)) + fp0 = floatx80_mul(fp0, xsave, status); // X*Y + fp1 = floatx80_add(fp1, fp2, status); // [B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))] + fp0 = floatx80_mul(fp0, fp1, status); // X*Y*([B1+Z*(B3+Z*B5)]+[Y*(B2+Z*(B4+Z*B6))]) + + RESET_PREC; + + a = floatx80_add(fp0, xsave, status); + + float_raise(float_flag_inexact, status); + + return a; + } + } + } else { + aSig &= LIT64(0xF800000000000000); + aSig |= LIT64(0x0400000000000000); + xsave = packFloatx80(aSign, aExp, aSig); // F + fp0 = a; + fp1 = a; // X + fp2 = packFloatx80(0, one_exp, one_sig); // 1 + fp1 = floatx80_mul(fp1, xsave, status); // X*F + fp0 = floatx80_sub(fp0, xsave, status); // X-F + fp1 = floatx80_add(fp1, fp2, status); // 1 + X*F + fp0 = floatx80_div(fp0, fp1, status); // U = (X-F)/(1+X*F) + + tbl_index = compact; + + tbl_index &= 0x7FFF0000; + tbl_index -= 0x3FFB0000; + tbl_index >>= 1; + tbl_index += compact&0x00007800; + tbl_index >>= 11; + + fp3 = atan_tbl[tbl_index]; + + fp3.high |= aSign ? 0x8000 : 0; // ATAN(F) + + fp1 = floatx80_mul(fp0, fp0, status); // V = U*U + fp2 = float64_to_floatx80(LIT64(0xBFF6687E314987D8), status); // A3 + fp2 = floatx80_add(fp2, fp1, status); // A3+V + fp2 = floatx80_mul(fp2, fp1, status); // V*(A3+V) + fp1 = floatx80_mul(fp1, fp0, status); // U*V + fp2 = floatx80_add(fp2, float64_to_floatx80(LIT64(0x4002AC6934A26DB3), status), status); // A2+V*(A3+V) + fp1 = floatx80_mul(fp1, float64_to_floatx80(LIT64(0xBFC2476F4E1DA28E), status), status); // A1+U*V + fp1 = floatx80_mul(fp1, fp2, status); // A1*U*V*(A2+V*(A3+V)) + fp0 = floatx80_add(fp0, fp1, status); // ATAN(U) + + RESET_PREC; + + a = floatx80_add(fp0, fp3, status); // ATAN(X) + + float_raise(float_flag_inexact, status); + + return a; + } +} + +/*---------------------------------------------------------------------------- + | Hyperbolic arc tangent + *----------------------------------------------------------------------------*/ + +floatx80 floatx80_atanh(floatx80 a, float_status *status) +{ + flag aSign; + int32_t aExp; + uint64_t aSig; + + int32_t compact; + floatx80 fp0, fp1, fp2, one; + + aSig = extractFloatx80Frac(a); + aExp = extractFloatx80Exp(a); + aSign = extractFloatx80Sign(a); + + if (aExp == 0x7FFF && (uint64_t) (aSig<<1)) { + return propagateFloatx80NaNOneArg(a, status); + } + + if (aExp == 0 && aSig == 0) { + return packFloatx80(aSign, 0, 0); + } + + compact = floatx80_make_compact(aExp, aSig); + + if (compact >= 0x3FFF8000) { // |X| >= 1 + if (aExp == one_exp && aSig == one_sig) { // |X| == 1 + float_raise(float_flag_divbyzero, status); + return packFloatx80(aSign, 0x7FFF, floatx80_default_infinity_low); + } else { // |X| > 1 + float_raise(float_flag_invalid, status); + return floatx80_default_nan(status); + } + } // |X| < 1 + + SET_PREC; + + one = packFloatx80(0, one_exp, one_sig); + fp2 = packFloatx80(aSign, 0x3FFE, one_sig); // SIGN(X) * (1/2) + fp0 = packFloatx80(0, aExp, aSig); // Y = |X| + fp1 = packFloatx80(1, aExp, aSig); // -Y + fp0 = floatx80_add(fp0, fp0, status); // 2Y + fp1 = floatx80_add(fp1, one, status); // 1-Y + fp0 = floatx80_div(fp0, fp1, status); // Z = 2Y/(1-Y) + fp0 = floatx80_lognp1(fp0, status); // LOG1P(Z) + + RESET_PREC; + + a = floatx80_mul(fp0, fp2, status); // ATANH(X) = SIGN(X) * (1/2) * LOG1P(Z) + + float_raise(float_flag_inexact, status); + + return a; +} + +/*---------------------------------------------------------------------------- + | Cosine + *----------------------------------------------------------------------------*/ + +floatx80 floatx80_cos(floatx80 a, float_status *status) +{ + flag aSign, xSign; + int32_t aExp, xExp; + uint64_t aSig, xSig; + + int32_t compact, l, n, j; + floatx80 fp0, fp1, fp2, fp3, fp4, fp5, x, invtwopi, twopi1, twopi2; + float32 posneg1, twoto63; + flag adjn, endflag; + + aSig = extractFloatx80Frac(a); + aExp = extractFloatx80Exp(a); + aSign = extractFloatx80Sign(a); + + if (aExp == 0x7FFF) { + if ((uint64_t) (aSig<<1)) return propagateFloatx80NaNOneArg(a, status); + float_raise(float_flag_invalid, status); + return floatx80_default_nan(status); + } + + if (aExp == 0 && aSig == 0) { + return packFloatx80(0, one_exp, one_sig); + } + + adjn = 1; + + SET_PREC; + + compact = floatx80_make_compact(aExp, aSig); + + fp0 = a; + + if (compact < 0x3FD78000 || compact > 0x4004BC7E) { // 2^(-40) > |X| > 15 PI + if (compact > 0x3FFF8000) { // |X| >= 15 PI + // REDUCEX + fp1 = packFloatx80(0, 0, 0); + if (compact == 0x7FFEFFFF) { + twopi1 = packFloatx80(aSign ^ 1, 0x7FFE, LIT64(0xC90FDAA200000000)); + twopi2 = packFloatx80(aSign ^ 1, 0x7FDC, LIT64(0x85A308D300000000)); + fp0 = floatx80_add(fp0, twopi1, status); + fp1 = fp0; + fp0 = floatx80_add(fp0, twopi2, status); + fp1 = floatx80_sub(fp1, fp0, status); + fp1 = floatx80_add(fp1, twopi2, status); + } + loop: + xSign = extractFloatx80Sign(fp0); + xExp = extractFloatx80Exp(fp0); + xExp -= 0x3FFF; + if (xExp <= 28) { + l = 0; + endflag = 1; + } else { + l = xExp - 27; + endflag = 0; + } + invtwopi = packFloatx80(0, 0x3FFE - l, LIT64(0xA2F9836E4E44152A)); // INVTWOPI + twopi1 = packFloatx80(0, 0x3FFF + l, LIT64(0xC90FDAA200000000)); + twopi2 = packFloatx80(0, 0x3FDD + l, LIT64(0x85A308D300000000)); + + twoto63 = 0x5F000000; + twoto63 |= xSign ? 0x80000000 : 0x00000000; // SIGN(INARG)*2^63 IN SGL + + fp2 = floatx80_mul(fp0, invtwopi, status); + fp2 = floatx80_add(fp2, float32_to_floatx80(twoto63, status), status); // THE FRACTIONAL PART OF FP2 IS ROUNDED + fp2 = floatx80_sub(fp2, float32_to_floatx80(twoto63, status), status); // FP2 is N + fp4 = floatx80_mul(twopi1, fp2, status); // W = N*P1 + fp5 = floatx80_mul(twopi2, fp2, status); // w = N*P2 + fp3 = floatx80_add(fp4, fp5, status); // FP3 is P + fp4 = floatx80_sub(fp4, fp3, status); // W-P + fp0 = floatx80_sub(fp0, fp3, status); // FP0 is A := R - P + fp4 = floatx80_add(fp4, fp5, status); // FP4 is p = (W-P)+w + fp3 = fp0; // FP3 is A + fp1 = floatx80_sub(fp1, fp4, status); // FP1 is a := r - p + fp0 = floatx80_add(fp0, fp1, status); // FP0 is R := A+a + + if (endflag > 0) { + n = floatx80_to_int32(fp2, status); + goto sincont; + } + fp3 = floatx80_sub(fp3, fp0, status); // A-R + fp1 = floatx80_add(fp1, fp3, status); // FP1 is r := (A-R)+a + goto loop; + } else { + // SINSM + fp0 = float32_to_floatx80(0x3F800000, status); // 1 + + RESET_PREC; + + if (adjn) { + // COSTINY + a = floatx80_sub(fp0, float32_to_floatx80(0x00800000, status), status); + } else { + // SINTINY + a = floatx80_move(a, status); + } + float_raise(float_flag_inexact, status); + + return a; + } + } else { + fp1 = floatx80_mul(fp0, float64_to_floatx80(LIT64(0x3FE45F306DC9C883), status), status); // X*2/PI + + n = floatx80_to_int32(fp1, status); + j = 32 + n; + + fp0 = floatx80_sub(fp0, pi_tbl[j], status); // X-Y1 + fp0 = floatx80_sub(fp0, float32_to_floatx80(pi_tbl2[j], status), status); // FP0 IS R = (X-Y1)-Y2 + + sincont: + if ((n + adjn) & 1) { + // COSPOLY + fp0 = floatx80_mul(fp0, fp0, status); // FP0 IS S + fp1 = floatx80_mul(fp0, fp0, status); // FP1 IS T + fp2 = float64_to_floatx80(LIT64(0x3D2AC4D0D6011EE3), status); // B8 + fp3 = float64_to_floatx80(LIT64(0xBDA9396F9F45AC19), status); // B7 + + xSign = extractFloatx80Sign(fp0); // X IS S + xExp = extractFloatx80Exp(fp0); + xSig = extractFloatx80Frac(fp0); + + if (((n + adjn) >> 1) & 1) { + xSign ^= 1; + posneg1 = 0xBF800000; // -1 + } else { + xSign ^= 0; + posneg1 = 0x3F800000; // 1 + } // X IS NOW R'= SGN*R + + fp2 = floatx80_mul(fp2, fp1, status); // TB8 + fp3 = floatx80_mul(fp3, fp1, status); // TB7 + fp2 = floatx80_add(fp2, float64_to_floatx80(LIT64(0x3E21EED90612C972), status), status); // B6+TB8 + fp3 = floatx80_add(fp3, float64_to_floatx80(LIT64(0xBE927E4FB79D9FCF), status), status); // B5+TB7 + fp2 = floatx80_mul(fp2, fp1, status); // T(B6+TB8) + fp3 = floatx80_mul(fp3, fp1, status); // T(B5+TB7) + fp2 = floatx80_add(fp2, float64_to_floatx80(LIT64(0x3EFA01A01A01D423), status), status); // B4+T(B6+TB8) + fp4 = packFloatx80(1, 0x3FF5, LIT64(0xB60B60B60B61D438)); + fp3 = floatx80_add(fp3, fp4, status); // B3+T(B5+TB7) + fp2 = floatx80_mul(fp2, fp1, status); // T(B4+T(B6+TB8)) + fp1 = floatx80_mul(fp1, fp3, status); // T(B3+T(B5+TB7)) + fp4 = packFloatx80(0, 0x3FFA, LIT64(0xAAAAAAAAAAAAAB5E)); + fp2 = floatx80_add(fp2, fp4, status); // B2+T(B4+T(B6+TB8)) + fp1 = floatx80_add(fp1, float32_to_floatx80(0xBF000000, status), status); // B1+T(B3+T(B5+TB7)) + fp0 = floatx80_mul(fp0, fp2, status); // S(B2+T(B4+T(B6+TB8))) + fp0 = floatx80_add(fp0, fp1, status); // [B1+T(B3+T(B5+TB7))]+[S(B2+T(B4+T(B6+TB8)))] + + x = packFloatx80(xSign, xExp, xSig); + fp0 = floatx80_mul(fp0, x, status); + + RESET_PREC; + + a = floatx80_add(fp0, float32_to_floatx80(posneg1, status), status); + + float_raise(float_flag_inexact, status); + + return a; + } else { + // SINPOLY + xSign = extractFloatx80Sign(fp0); // X IS R + xExp = extractFloatx80Exp(fp0); + xSig = extractFloatx80Frac(fp0); + + xSign ^= ((n + adjn) >> 1) & 1; // X IS NOW R'= SGN*R + + fp0 = floatx80_mul(fp0, fp0, status); // FP0 IS S + fp1 = floatx80_mul(fp0, fp0, status); // FP1 IS T + fp3 = float64_to_floatx80(LIT64(0xBD6AAA77CCC994F5), status); // A7 + fp2 = float64_to_floatx80(LIT64(0x3DE612097AAE8DA1), status); // A6 + fp3 = floatx80_mul(fp3, fp1, status); // T*A7 + fp2 = floatx80_mul(fp2, fp1, status); // T*A6 + fp3 = floatx80_add(fp3, float64_to_floatx80(LIT64(0xBE5AE6452A118AE4), status), status); // A5+T*A7 + fp2 = floatx80_add(fp2, float64_to_floatx80(LIT64(0x3EC71DE3A5341531), status), status); // A4+T*A6 + fp3 = floatx80_mul(fp3, fp1, status); // T(A5+TA7) + fp2 = floatx80_mul(fp2, fp1, status); // T(A4+TA6) + fp3 = floatx80_add(fp3, float64_to_floatx80(LIT64(0xBF2A01A01A018B59), status), status); // A3+T(A5+TA7) + fp4 = packFloatx80(0, 0x3FF8, LIT64(0x88888888888859AF)); + fp2 = floatx80_add(fp2, fp4, status); // A2+T(A4+TA6) + fp1 = floatx80_mul(fp1, fp3, status); // T(A3+T(A5+TA7)) + fp2 = floatx80_mul(fp2, fp0, status); // S(A2+T(A4+TA6)) + fp4 = packFloatx80(1, 0x3FFC, LIT64(0xAAAAAAAAAAAAAA99)); + fp1 = floatx80_add(fp1, fp4, status); // A1+T(A3+T(A5+TA7)) + fp1 = floatx80_add(fp1, fp2, status); // [A1+T(A3+T(A5+TA7))]+[S(A2+T(A4+TA6))] + + x = packFloatx80(xSign, xExp, xSig); + fp0 = floatx80_mul(fp0, x, status); // R'*S + fp0 = floatx80_mul(fp0, fp1, status); // SIN(R')-R' + + RESET_PREC; + + a = floatx80_add(fp0, x, status); + + float_raise(float_flag_inexact, status); + + return a; + } + } +} + +/*---------------------------------------------------------------------------- + | Hyperbolic cosine + *----------------------------------------------------------------------------*/ + +floatx80 floatx80_cosh(floatx80 a, float_status *status) +{ +// flag aSign; + int32_t aExp; + uint64_t aSig; + + int32_t compact; + floatx80 fp0, fp1; + + aSig = extractFloatx80Frac(a); + aExp = extractFloatx80Exp(a); +// aSign = extractFloatx80Sign(a); + + if (aExp == 0x7FFF) { + if ((uint64_t) (aSig<<1)) return propagateFloatx80NaNOneArg(a, status); + return packFloatx80(0, aExp, aSig); + } + + if (aExp == 0 && aSig == 0) { + return packFloatx80(0, one_exp, one_sig); + } + + SET_PREC; + + compact = floatx80_make_compact(aExp, aSig); + + if (compact > 0x400CB167) { + if (compact > 0x400CB2B3) { + RESET_PREC; + a = roundAndPackFloatx80(status->floatx80_rounding_precision, 0, 0x8000, one_sig, 0, status); + float_raise(float_flag_inexact, status); + return a; + } else { + fp0 = packFloatx80(0, aExp, aSig); + fp0 = floatx80_sub(fp0, float64_to_floatx80(LIT64(0x40C62D38D3D64634), status), status); + fp0 = floatx80_sub(fp0, float64_to_floatx80(LIT64(0x3D6F90AEB1E75CC7), status), status); + fp0 = floatx80_etox(fp0, status); + fp1 = packFloatx80(0, 0x7FFB, one_sig); + + RESET_PREC; + + a = floatx80_mul(fp0, fp1, status); + + float_raise(float_flag_inexact, status); + + return a; + } + } + + fp0 = packFloatx80(0, aExp, aSig); // |X| + fp0 = floatx80_etox(fp0, status); // EXP(|X|) + fp0 = floatx80_mul(fp0, float32_to_floatx80(0x3F000000, status), status); // (1/2)*EXP(|X|) + fp1 = float32_to_floatx80(0x3E800000, status); // 1/4 + fp1 = floatx80_div(fp1, fp0, status); // 1/(2*EXP(|X|)) + + RESET_PREC; + + a = floatx80_add(fp0, fp1, status); + + float_raise(float_flag_inexact, status); + + return a; +} + +/*---------------------------------------------------------------------------- + | e to x + *----------------------------------------------------------------------------*/ + +floatx80 floatx80_etox(floatx80 a, float_status *status) +{ + flag aSign; + int32_t aExp; + uint64_t aSig; + + int32_t compact, n, j, k, m, m1; + floatx80 fp0, fp1, fp2, fp3, l2, scale, adjscale; + flag adjflag; + + aSig = extractFloatx80Frac(a); + aExp = extractFloatx80Exp(a); + aSign = extractFloatx80Sign(a); + + if (aExp == 0x7FFF) { + if ((uint64_t) (aSig<<1)) return propagateFloatx80NaNOneArg(a, status); + if (aSign) return packFloatx80(0, 0, 0); + return a; + } + + if (aExp == 0 && aSig == 0) { + return packFloatx80(0, one_exp, one_sig); + } + + SET_PREC; + + adjflag = 0; + + if (aExp >= 0x3FBE) { // |X| >= 2^(-65) + compact = floatx80_make_compact(aExp, aSig); + + if (compact < 0x400CB167) { // |X| < 16380 log2 + fp0 = a; + fp1 = a; + fp0 = floatx80_mul(fp0, float32_to_floatx80(0x42B8AA3B, status), status); // 64/log2 * X + adjflag = 0; + n = floatx80_to_int32(fp0, status); // int(64/log2*X) + fp0 = int32_to_floatx80(n); + + j = n & 0x3F; // J = N mod 64 + m = n / 64; // NOTE: this is really arithmetic right shift by 6 + if (n < 0 && j) { // arithmetic right shift is division and round towards minus infinity + m--; + } + m += 0x3FFF; // biased exponent of 2^(M) + + expcont1: + fp2 = fp0; // N + fp0 = floatx80_mul(fp0, float32_to_floatx80(0xBC317218, status), status); // N * L1, L1 = lead(-log2/64) + l2 = packFloatx80(0, 0x3FDC, LIT64(0x82E308654361C4C6)); + fp2 = floatx80_mul(fp2, l2, status); // N * L2, L1+L2 = -log2/64 + fp0 = floatx80_add(fp0, fp1, status); // X + N*L1 + fp0 = floatx80_add(fp0, fp2, status); // R + + fp1 = floatx80_mul(fp0, fp0, status); // S = R*R + fp2 = float32_to_floatx80(0x3AB60B70, status); // A5 + fp2 = floatx80_mul(fp2, fp1, status); // fp2 is S*A5 + fp3 = floatx80_mul(float32_to_floatx80(0x3C088895, status), fp1, status); // fp3 is S*A4 + fp2 = floatx80_add(fp2, float64_to_floatx80(LIT64(0x3FA5555555554431), status), status); // fp2 is A3+S*A5 + fp3 = floatx80_add(fp3, float64_to_floatx80(LIT64(0x3FC5555555554018), status), status); // fp3 is A2+S*A4 + fp2 = floatx80_mul(fp2, fp1, status); // fp2 is S*(A3+S*A5) + fp3 = floatx80_mul(fp3, fp1, status); // fp3 is S*(A2+S*A4) + fp2 = floatx80_add(fp2, float32_to_floatx80(0x3F000000, status), status); // fp2 is A1+S*(A3+S*A5) + fp3 = floatx80_mul(fp3, fp0, status); // fp3 IS R*S*(A2+S*A4) + fp2 = floatx80_mul(fp2, fp1, status); // fp2 IS S*(A1+S*(A3+S*A5)) + fp0 = floatx80_add(fp0, fp3, status); // fp0 IS R+R*S*(A2+S*A4) + fp0 = floatx80_add(fp0, fp2, status); // fp0 IS EXP(R) - 1 + + fp1 = exp_tbl[j]; + fp0 = floatx80_mul(fp0, fp1, status); // 2^(J/64)*(Exp(R)-1) + fp0 = floatx80_add(fp0, float32_to_floatx80(exp_tbl2[j], status), status); // accurate 2^(J/64) + fp0 = floatx80_add(fp0, fp1, status); // 2^(J/64) + 2^(J/64)*(Exp(R)-1) + + scale = packFloatx80(0, m, one_sig); + if (adjflag) { + adjscale = packFloatx80(0, m1, one_sig); + fp0 = floatx80_mul(fp0, adjscale, status); + } + + RESET_PREC; + + a = floatx80_mul(fp0, scale, status); + + float_raise(float_flag_inexact, status); + + return a; + } else { // |X| >= 16380 log2 + if (compact > 0x400CB27C) { // |X| >= 16480 log2 + RESET_PREC; + if (aSign) { + a = roundAndPackFloatx80(status->floatx80_rounding_precision, 0, -0x1000, aSig, 0, status); + } else { + a = roundAndPackFloatx80(status->floatx80_rounding_precision, 0, 0x8000, aSig, 0, status); + } + float_raise(float_flag_inexact, status); + + return a; + } else { + fp0 = a; + fp1 = a; + fp0 = floatx80_mul(fp0, float32_to_floatx80(0x42B8AA3B, status), status); // 64/log2 * X + adjflag = 1; + n = floatx80_to_int32(fp0, status); // int(64/log2*X) + fp0 = int32_to_floatx80(n); + + j = n & 0x3F; // J = N mod 64 + k = n / 64; // NOTE: this is really arithmetic right shift by 6 + if (n < 0 && j) { // arithmetic right shift is division and round towards minus infinity + k--; + } + m1 = k / 2; // NOTE: this is really arithmetic right shift by 1 + if (k < 0 && (k & 1)) { // arithmetic right shift is division and round towards minus infinity + m1--; + } + m = k - m1; + m1 += 0x3FFF; // biased exponent of 2^(M1) + m += 0x3FFF; // biased exponent of 2^(M) + + goto expcont1; + } + } + } else { // |X| < 2^(-65) + RESET_PREC; + + a = floatx80_add(a, float32_to_floatx80(0x3F800000, status), status); // 1 + X + + float_raise(float_flag_inexact, status); + + return a; + } +} + +/*---------------------------------------------------------------------------- + | e to x minus 1 + *----------------------------------------------------------------------------*/ + +floatx80 floatx80_etoxm1(floatx80 a, float_status *status) +{ + flag aSign; + int32_t aExp; + uint64_t aSig; + + int32_t compact, n, j, m, m1; + floatx80 fp0, fp1, fp2, fp3, l2, sc, onebysc; + + aSig = extractFloatx80Frac(a); + aExp = extractFloatx80Exp(a); + aSign = extractFloatx80Sign(a); + + if (aExp == 0x7FFF) { + if ((uint64_t) (aSig<<1)) return propagateFloatx80NaNOneArg(a, status); + if (aSign) return packFloatx80(aSign, one_exp, one_sig); + return a; + } + + if (aExp == 0 && aSig == 0) { + return packFloatx80(aSign, 0, 0); + } + + SET_PREC; + + if (aExp >= 0x3FFD) { // |X| >= 1/4 + compact = floatx80_make_compact(aExp, aSig); + + if (compact <= 0x4004C215) { // |X| <= 70 log2 + fp0 = a; + fp1 = a; + fp0 = floatx80_mul(fp0, float32_to_floatx80(0x42B8AA3B, status), status); // 64/log2 * X + n = floatx80_to_int32(fp0, status); // int(64/log2*X) + fp0 = int32_to_floatx80(n); + + j = n & 0x3F; // J = N mod 64 + m = n / 64; // NOTE: this is really arithmetic right shift by 6 + if (n < 0 && j) { // arithmetic right shift is division and round towards minus infinity + m--; + } + m1 = -m; + //m += 0x3FFF; // biased exponent of 2^(M) + //m1 += 0x3FFF; // biased exponent of -2^(-M) + + fp2 = fp0; // N + fp0 = floatx80_mul(fp0, float32_to_floatx80(0xBC317218, status), status); // N * L1, L1 = lead(-log2/64) + l2 = packFloatx80(0, 0x3FDC, LIT64(0x82E308654361C4C6)); + fp2 = floatx80_mul(fp2, l2, status); // N * L2, L1+L2 = -log2/64 + fp0 = floatx80_add(fp0, fp1, status); // X + N*L1 + fp0 = floatx80_add(fp0, fp2, status); // R + + fp1 = floatx80_mul(fp0, fp0, status); // S = R*R + fp2 = float32_to_floatx80(0x3950097B, status); // A6 + fp2 = floatx80_mul(fp2, fp1, status); // fp2 is S*A6 + fp3 = floatx80_mul(float32_to_floatx80(0x3AB60B6A, status), fp1, status); // fp3 is S*A5 + fp2 = floatx80_add(fp2, float64_to_floatx80(LIT64(0x3F81111111174385), status), status); // fp2 IS A4+S*A6 + fp3 = floatx80_add(fp3, float64_to_floatx80(LIT64(0x3FA5555555554F5A), status), status); // fp3 is A3+S*A5 + fp2 = floatx80_mul(fp2, fp1, status); // fp2 IS S*(A4+S*A6) + fp3 = floatx80_mul(fp3, fp1, status); // fp3 IS S*(A3+S*A5) + fp2 = floatx80_add(fp2, float64_to_floatx80(LIT64(0x3FC5555555555555), status), status); // fp2 IS A2+S*(A4+S*A6) + fp3 = floatx80_add(fp3, float32_to_floatx80(0x3F000000, status), status); // fp3 IS A1+S*(A3+S*A5) + fp2 = floatx80_mul(fp2, fp1, status); // fp2 IS S*(A2+S*(A4+S*A6)) + fp1 = floatx80_mul(fp1, fp3, status); // fp1 IS S*(A1+S*(A3+S*A5)) + fp2 = floatx80_mul(fp2, fp0, status); // fp2 IS R*S*(A2+S*(A4+S*A6)) + fp0 = floatx80_add(fp0, fp1, status); // fp0 IS R+S*(A1+S*(A3+S*A5)) + fp0 = floatx80_add(fp0, fp2, status); // fp0 IS EXP(R) - 1 + + fp0 = floatx80_mul(fp0, exp_tbl[j], status); // 2^(J/64)*(Exp(R)-1) + + if (m >= 64) { + fp1 = float32_to_floatx80(exp_tbl2[j], status); + onebysc = packFloatx80(1, m1 + 0x3FFF, one_sig); // -2^(-M) + fp1 = floatx80_add(fp1, onebysc, status); + fp0 = floatx80_add(fp0, fp1, status); + fp0 = floatx80_add(fp0, exp_tbl[j], status); + } else if (m < -3) { + fp0 = floatx80_add(fp0, float32_to_floatx80(exp_tbl2[j], status), status); + fp0 = floatx80_add(fp0, exp_tbl[j], status); + onebysc = packFloatx80(1, m1 + 0x3FFF, one_sig); // -2^(-M) + fp0 = floatx80_add(fp0, onebysc, status); + } else { // -3 <= m <= 63 + fp1 = exp_tbl[j]; + fp0 = floatx80_add(fp0, float32_to_floatx80(exp_tbl2[j], status), status); + onebysc = packFloatx80(1, m1 + 0x3FFF, one_sig); // -2^(-M) + fp1 = floatx80_add(fp1, onebysc, status); + fp0 = floatx80_add(fp0, fp1, status); + } + + sc = packFloatx80(0, m + 0x3FFF, one_sig); + + RESET_PREC; + + a = floatx80_mul(fp0, sc, status); + + float_raise(float_flag_inexact, status); + + return a; + } else { // |X| > 70 log2 + if (aSign) { + fp0 = float32_to_floatx80(0xBF800000, status); // -1 + + RESET_PREC; + + a = floatx80_add(fp0, float32_to_floatx80(0x00800000, status), status); // -1 + 2^(-126) + + float_raise(float_flag_inexact, status); + + return a; + } else { + RESET_PREC; + + return floatx80_etox(a, status); + } + } + } else { // |X| < 1/4 + if (aExp >= 0x3FBE) { + fp0 = a; + fp0 = floatx80_mul(fp0, fp0, status); // S = X*X + fp1 = float32_to_floatx80(0x2F30CAA8, status); // B12 + fp1 = floatx80_mul(fp1, fp0, status); // S * B12 + fp2 = float32_to_floatx80(0x310F8290, status); // B11 + fp1 = floatx80_add(fp1, float32_to_floatx80(0x32D73220, status), status); // B10 + fp2 = floatx80_mul(fp2, fp0, status); + fp1 = floatx80_mul(fp1, fp0, status); + fp2 = floatx80_add(fp2, float32_to_floatx80(0x3493F281, status), status); // B9 + fp1 = floatx80_add(fp1, float64_to_floatx80(LIT64(0x3EC71DE3A5774682), status), status); // B8 + fp2 = floatx80_mul(fp2, fp0, status); + fp1 = floatx80_mul(fp1, fp0, status); + fp2 = floatx80_add(fp2, float64_to_floatx80(LIT64(0x3EFA01A019D7CB68), status), status); // B7 + fp1 = floatx80_add(fp1, float64_to_floatx80(LIT64(0x3F2A01A01A019DF3), status), status); // B6 + fp2 = floatx80_mul(fp2, fp0, status); + fp1 = floatx80_mul(fp1, fp0, status); + fp2 = floatx80_add(fp2, float64_to_floatx80(LIT64(0x3F56C16C16C170E2), status), status); // B5 + fp1 = floatx80_add(fp1, float64_to_floatx80(LIT64(0x3F81111111111111), status), status); // B4 + fp2 = floatx80_mul(fp2, fp0, status); + fp1 = floatx80_mul(fp1, fp0, status); + fp2 = floatx80_add(fp2, float64_to_floatx80(LIT64(0x3FA5555555555555), status), status); // B3 + fp3 = packFloatx80(0, 0x3FFC, LIT64(0xAAAAAAAAAAAAAAAB)); + fp1 = floatx80_add(fp1, fp3, status); // B2 + fp2 = floatx80_mul(fp2, fp0, status); + fp1 = floatx80_mul(fp1, fp0, status); + + fp2 = floatx80_mul(fp2, fp0, status); + fp1 = floatx80_mul(fp1, a, status); + + fp0 = floatx80_mul(fp0, float32_to_floatx80(0x3F000000, status), status); // S*B1 + fp1 = floatx80_add(fp1, fp2, status); // Q + fp0 = floatx80_add(fp0, fp1, status); // S*B1+Q + + RESET_PREC; + + a = floatx80_add(fp0, a, status); + + float_raise(float_flag_inexact, status); + + return a; + } else { // |X| < 2^(-65) + sc = packFloatx80(1, 1, one_sig); + fp0 = a; + + if (aExp < 0x0033) { // |X| < 2^(-16382) + fp0 = floatx80_mul(fp0, float64_to_floatx80(LIT64(0x48B0000000000000), status), status); + fp0 = floatx80_add(fp0, sc, status); + + RESET_PREC; + + a = floatx80_mul(fp0, float64_to_floatx80(LIT64(0x3730000000000000), status), status); + } else { + RESET_PREC; + + a = floatx80_add(fp0, sc, status); + } + + float_raise(float_flag_inexact, status); + + return a; + } + } +} + +/*---------------------------------------------------------------------------- + | Log base 10 + *----------------------------------------------------------------------------*/ + +floatx80 floatx80_log10(floatx80 a, float_status *status) +{ + flag aSign; + int32_t aExp; + uint64_t aSig; + + floatx80 fp0, fp1; + + aSig = extractFloatx80Frac(a); + aExp = extractFloatx80Exp(a); + aSign = extractFloatx80Sign(a); + + if (aExp == 0x7FFF) { + if ((uint64_t) (aSig<<1)) return propagateFloatx80NaNOneArg(a, status); + if (aSign == 0) + return a; + } + + if (aExp == 0 && aSig == 0) { + float_raise(float_flag_divbyzero, status); + return packFloatx80(1, 0x7FFF, floatx80_default_infinity_low); + } + + if (aSign) { + float_raise(float_flag_invalid, status); + return floatx80_default_nan(status); + } + + SET_PREC; + + fp0 = floatx80_logn(a, status); + fp1 = packFloatx80(0, 0x3FFD, LIT64(0xDE5BD8A937287195)); // INV_L10 + + RESET_PREC; + + a = floatx80_mul(fp0, fp1, status); // LOGN(X)*INV_L10 + + float_raise(float_flag_inexact, status); + + return a; +} + +/*---------------------------------------------------------------------------- + | Log base 2 + *----------------------------------------------------------------------------*/ + +floatx80 floatx80_log2(floatx80 a, float_status *status) +{ + flag aSign; + int32_t aExp; + uint64_t aSig; + + floatx80 fp0, fp1; + + aSig = extractFloatx80Frac(a); + aExp = extractFloatx80Exp(a); + aSign = extractFloatx80Sign(a); + + if (aExp == 0x7FFF) { + if ((uint64_t) (aSig<<1)) return propagateFloatx80NaNOneArg(a, status); + if (aSign == 0) + return a; + } + + if (aExp == 0) { + if (aSig == 0) { + float_raise(float_flag_divbyzero, status); + return packFloatx80(1, 0x7FFF, floatx80_default_infinity_low); + } + normalizeFloatx80Subnormal(aSig, &aExp, &aSig); + } + + if (aSign) { + float_raise(float_flag_invalid, status); + return floatx80_default_nan(status); + } + + SET_PREC; + + if (aSig == one_sig) { // X is 2^k + RESET_PREC; + + a = int32_to_floatx80(aExp-0x3FFF); + } else { + fp0 = floatx80_logn(a, status); + fp1 = packFloatx80(0, 0x3FFF, LIT64(0xB8AA3B295C17F0BC)); // INV_L2 + + RESET_PREC; + + a = floatx80_mul(fp0, fp1, status); // LOGN(X)*INV_L2 + } + + float_raise(float_flag_inexact, status); + + return a; +} + +/*---------------------------------------------------------------------------- + | Log base e + *----------------------------------------------------------------------------*/ + +floatx80 floatx80_logn(floatx80 a, float_status *status) +{ + flag aSign; + int32_t aExp; + uint64_t aSig, fSig; + + int32_t compact, j, k, adjk; + floatx80 fp0, fp1, fp2, fp3, f, logof2, klog2, saveu; + + aSig = extractFloatx80Frac(a); + aExp = extractFloatx80Exp(a); + aSign = extractFloatx80Sign(a); + + if (aExp == 0x7FFF) { + if ((uint64_t) (aSig<<1)) return propagateFloatx80NaNOneArg(a, status); + if (aSign == 0) + return a; + } + + adjk = 0; + + if (aExp == 0) { + if (aSig == 0) { // zero + float_raise(float_flag_divbyzero, status); + return packFloatx80(1, 0x7FFF, floatx80_default_infinity_low); + } +#if 1 + if ((aSig & one_sig) == 0) { // denormal + normalizeFloatx80Subnormal(aSig, &aExp, &aSig); + adjk = -100; + aExp += 100; + a = packFloatx80(aSign, aExp, aSig); + } +#else + normalizeFloatx80Subnormal(aSig, &aExp, &aSig); +#endif + } + + if (aSign) { + float_raise(float_flag_invalid, status); + return floatx80_default_nan(status); + } + + SET_PREC; + + compact = floatx80_make_compact(aExp, aSig); + + if (compact < 0x3FFEF07D || compact > 0x3FFF8841) { // |X| < 15/16 or |X| > 17/16 + k = aExp - 0x3FFF; + k += adjk; + fp1 = int32_to_floatx80(k); + + fSig = (aSig & LIT64(0xFE00000000000000)) | LIT64(0x0100000000000000); + j = (fSig >> 56) & 0x7E; // DISPLACEMENT FOR 1/F + + f = packFloatx80(0, 0x3FFF, fSig); // F + fp0 = packFloatx80(0, 0x3FFF, aSig); // Y + + fp0 = floatx80_sub(fp0, f, status); // Y-F + + // LP1CONT1 + fp0 = floatx80_mul(fp0, log_tbl[j], status); // FP0 IS U = (Y-F)/F + logof2 = packFloatx80(0, 0x3FFE, LIT64(0xB17217F7D1CF79AC)); + klog2 = floatx80_mul(fp1, logof2, status); // FP1 IS K*LOG2 + fp2 = floatx80_mul(fp0, fp0, status); // FP2 IS V=U*U + + fp3 = fp2; + fp1 = fp2; + + fp1 = floatx80_mul(fp1, float64_to_floatx80(LIT64(0x3FC2499AB5E4040B), status), status); // V*A6 + fp2 = floatx80_mul(fp2, float64_to_floatx80(LIT64(0xBFC555B5848CB7DB), status), status); // V*A5 + fp1 = floatx80_add(fp1, float64_to_floatx80(LIT64(0x3FC99999987D8730), status), status); // A4+V*A6 + fp2 = floatx80_add(fp2, float64_to_floatx80(LIT64(0xBFCFFFFFFF6F7E97), status), status); // A3+V*A5 + fp1 = floatx80_mul(fp1, fp3, status); // V*(A4+V*A6) + fp2 = floatx80_mul(fp2, fp3, status); // V*(A3+V*A5) + fp1 = floatx80_add(fp1, float64_to_floatx80(LIT64(0x3FD55555555555A4), status), status); // A2+V*(A4+V*A6) + fp2 = floatx80_add(fp2, float64_to_floatx80(LIT64(0xBFE0000000000008), status), status); // A1+V*(A3+V*A5) + fp1 = floatx80_mul(fp1, fp3, status); // V*(A2+V*(A4+V*A6)) + fp2 = floatx80_mul(fp2, fp3, status); // V*(A1+V*(A3+V*A5)) + fp1 = floatx80_mul(fp1, fp0, status); // U*V*(A2+V*(A4+V*A6)) + fp0 = floatx80_add(fp0, fp2, status); // U+V*(A1+V*(A3+V*A5)) + + fp1 = floatx80_add(fp1, log_tbl[j+1], status); // LOG(F)+U*V*(A2+V*(A4+V*A6)) + fp0 = floatx80_add(fp0, fp1, status); // FP0 IS LOG(F) + LOG(1+U) + + RESET_PREC; + + a = floatx80_add(fp0, klog2, status); + + float_raise(float_flag_inexact, status); + + return a; + } else { // |X-1| >= 1/16 + fp0 = a; + fp1 = a; + fp1 = floatx80_sub(fp1, float32_to_floatx80(0x3F800000, status), status); // FP1 IS X-1 + fp0 = floatx80_add(fp0, float32_to_floatx80(0x3F800000, status), status); // FP0 IS X+1 + fp1 = floatx80_add(fp1, fp1, status); // FP1 IS 2(X-1) + + // LP1CONT2 + fp1 = floatx80_div(fp1, fp0, status); // U + saveu = fp1; + fp0 = floatx80_mul(fp1, fp1, status); // FP0 IS V = U*U + fp1 = floatx80_mul(fp0, fp0, status); // FP1 IS W = V*V + + fp3 = float64_to_floatx80(LIT64(0x3F175496ADD7DAD6), status); // B5 + fp2 = float64_to_floatx80(LIT64(0x3F3C71C2FE80C7E0), status); // B4 + fp3 = floatx80_mul(fp3, fp1, status); // W*B5 + fp2 = floatx80_mul(fp2, fp1, status); // W*B4 + fp3 = floatx80_add(fp3, float64_to_floatx80(LIT64(0x3F624924928BCCFF), status), status); // B3+W*B5 + fp2 = floatx80_add(fp2, float64_to_floatx80(LIT64(0x3F899999999995EC), status), status); // B2+W*B4 + fp1 = floatx80_mul(fp1, fp3, status); // W*(B3+W*B5) + fp2 = floatx80_mul(fp2, fp0, status); // V*(B2+W*B4) + fp1 = floatx80_add(fp1, float64_to_floatx80(LIT64(0x3FB5555555555555), status), status); // B1+W*(B3+W*B5) + + fp0 = floatx80_mul(fp0, saveu, status); // FP0 IS U*V + fp1 = floatx80_add(fp1, fp2, status); // B1+W*(B3+W*B5) + V*(B2+W*B4) + fp0 = floatx80_mul(fp0, fp1, status); // U*V*( [B1+W*(B3+W*B5)] + [V*(B2+W*B4)] ) + + RESET_PREC; + + a = floatx80_add(fp0, saveu, status); + + float_raise(float_flag_inexact, status); + + return a; + } +} + +/*---------------------------------------------------------------------------- + | Log base e of x plus 1 + *----------------------------------------------------------------------------*/ + +floatx80 floatx80_lognp1(floatx80 a, float_status *status) +{ + flag aSign; + int32_t aExp; + uint64_t aSig, fSig; + + int32_t compact, j, k; + floatx80 fp0, fp1, fp2, fp3, f, logof2, klog2, saveu; + + aSig = extractFloatx80Frac(a); + aExp = extractFloatx80Exp(a); + aSign = extractFloatx80Sign(a); + + if (aExp == 0x7FFF) { + if ((uint64_t) (aSig<<1)) return propagateFloatx80NaNOneArg(a, status); + if (aSign) { + float_raise(float_flag_invalid, status); + return floatx80_default_nan(status); + } + return a; + } + + if (aExp == 0 && aSig == 0) { + return packFloatx80(aSign, 0, 0); + } + + if (aSign && aExp >= one_exp) { + if (aExp == one_exp && aSig == one_sig) { + float_raise(float_flag_divbyzero, status); + return packFloatx80(aSign, 0x7FFF, floatx80_default_infinity_low); + } + float_raise(float_flag_invalid, status); + return floatx80_default_nan(status); + } + + if (aExp < 0x3f99 || (aExp == 0x3f99 && aSig == one_sig)) { // <= min threshold + float_raise(float_flag_inexact, status); + return floatx80_move(a, status); + } + + SET_PREC; + + compact = floatx80_make_compact(aExp, aSig); + + fp0 = a; // Z + fp1 = a; + + fp0 = floatx80_add(fp0, float32_to_floatx80(0x3F800000, status), status); // X = (1+Z) + + aExp = extractFloatx80Exp(fp0); + aSig = extractFloatx80Frac(fp0); + + compact = floatx80_make_compact(aExp, aSig); + + if (compact < 0x3FFE8000 || compact > 0x3FFFC000) { // |X| < 1/2 or |X| > 3/2 + k = aExp - 0x3FFF; + fp1 = int32_to_floatx80(k); + + fSig = (aSig & LIT64(0xFE00000000000000)) | LIT64(0x0100000000000000); + j = (fSig >> 56) & 0x7E; // DISPLACEMENT FOR 1/F + + f = packFloatx80(0, 0x3FFF, fSig); // F + fp0 = packFloatx80(0, 0x3FFF, aSig); // Y + + fp0 = floatx80_sub(fp0, f, status); // Y-F + + lp1cont1: + // LP1CONT1 + fp0 = floatx80_mul(fp0, log_tbl[j], status); // FP0 IS U = (Y-F)/F + logof2 = packFloatx80(0, 0x3FFE, LIT64(0xB17217F7D1CF79AC)); + klog2 = floatx80_mul(fp1, logof2, status); // FP1 IS K*LOG2 + fp2 = floatx80_mul(fp0, fp0, status); // FP2 IS V=U*U + + fp3 = fp2; + fp1 = fp2; + + fp1 = floatx80_mul(fp1, float64_to_floatx80(LIT64(0x3FC2499AB5E4040B), status), status); // V*A6 + fp2 = floatx80_mul(fp2, float64_to_floatx80(LIT64(0xBFC555B5848CB7DB), status), status); // V*A5 + fp1 = floatx80_add(fp1, float64_to_floatx80(LIT64(0x3FC99999987D8730), status), status); // A4+V*A6 + fp2 = floatx80_add(fp2, float64_to_floatx80(LIT64(0xBFCFFFFFFF6F7E97), status), status); // A3+V*A5 + fp1 = floatx80_mul(fp1, fp3, status); // V*(A4+V*A6) + fp2 = floatx80_mul(fp2, fp3, status); // V*(A3+V*A5) + fp1 = floatx80_add(fp1, float64_to_floatx80(LIT64(0x3FD55555555555A4), status), status); // A2+V*(A4+V*A6) + fp2 = floatx80_add(fp2, float64_to_floatx80(LIT64(0xBFE0000000000008), status), status); // A1+V*(A3+V*A5) + fp1 = floatx80_mul(fp1, fp3, status); // V*(A2+V*(A4+V*A6)) + fp2 = floatx80_mul(fp2, fp3, status); // V*(A1+V*(A3+V*A5)) + fp1 = floatx80_mul(fp1, fp0, status); // U*V*(A2+V*(A4+V*A6)) + fp0 = floatx80_add(fp0, fp2, status); // U+V*(A1+V*(A3+V*A5)) + + fp1 = floatx80_add(fp1, log_tbl[j+1], status); // LOG(F)+U*V*(A2+V*(A4+V*A6)) + fp0 = floatx80_add(fp0, fp1, status); // FP0 IS LOG(F) + LOG(1+U) + + RESET_PREC; + + a = floatx80_add(fp0, klog2, status); + + float_raise(float_flag_inexact, status); + + return a; + } else if (compact < 0x3FFEF07D || compact > 0x3FFF8841) { // |X| < 1/16 or |X| > -1/16 + // LP1CARE + fSig = (aSig & LIT64(0xFE00000000000000)) | LIT64(0x0100000000000000); + f = packFloatx80(0, 0x3FFF, fSig); // F + j = (fSig >> 56) & 0x7E; // DISPLACEMENT FOR 1/F + + if (compact >= 0x3FFF8000) { // 1+Z >= 1 + // KISZERO + fp0 = floatx80_sub(float32_to_floatx80(0x3F800000, status), f, status); // 1-F + fp0 = floatx80_add(fp0, fp1, status); // FP0 IS Y-F = (1-F)+Z + fp1 = packFloatx80(0, 0, 0); // K = 0 + } else { + // KISNEG + fp0 = floatx80_sub(float32_to_floatx80(0x40000000, status), f, status); // 2-F + fp1 = floatx80_add(fp1, fp1, status); // 2Z + fp0 = floatx80_add(fp0, fp1, status); // FP0 IS Y-F = (2-F)+2Z + fp1 = packFloatx80(1, one_exp, one_sig); // K = -1 + } + goto lp1cont1; + } else { + // LP1ONE16 + fp1 = floatx80_add(fp1, fp1, status); // FP1 IS 2Z + fp0 = floatx80_add(fp0, float32_to_floatx80(0x3F800000, status), status); // FP0 IS 1+X + + // LP1CONT2 + fp1 = floatx80_div(fp1, fp0, status); // U + saveu = fp1; + fp0 = floatx80_mul(fp1, fp1, status); // FP0 IS V = U*U + fp1 = floatx80_mul(fp0, fp0, status); // FP1 IS W = V*V + + fp3 = float64_to_floatx80(LIT64(0x3F175496ADD7DAD6), status); // B5 + fp2 = float64_to_floatx80(LIT64(0x3F3C71C2FE80C7E0), status); // B4 + fp3 = floatx80_mul(fp3, fp1, status); // W*B5 + fp2 = floatx80_mul(fp2, fp1, status); // W*B4 + fp3 = floatx80_add(fp3, float64_to_floatx80(LIT64(0x3F624924928BCCFF), status), status); // B3+W*B5 + fp2 = floatx80_add(fp2, float64_to_floatx80(LIT64(0x3F899999999995EC), status), status); // B2+W*B4 + fp1 = floatx80_mul(fp1, fp3, status); // W*(B3+W*B5) + fp2 = floatx80_mul(fp2, fp0, status); // V*(B2+W*B4) + fp1 = floatx80_add(fp1, float64_to_floatx80(LIT64(0x3FB5555555555555), status), status); // B1+W*(B3+W*B5) + + fp0 = floatx80_mul(fp0, saveu, status); // FP0 IS U*V + fp1 = floatx80_add(fp1, fp2, status); // B1+W*(B3+W*B5) + V*(B2+W*B4) + fp0 = floatx80_mul(fp0, fp1, status); // U*V*( [B1+W*(B3+W*B5)] + [V*(B2+W*B4)] ) + + RESET_PREC; + + a = floatx80_add(fp0, saveu, status); + + float_raise(float_flag_inexact, status); + + return a; + } +} + +/*---------------------------------------------------------------------------- + | Sine + *----------------------------------------------------------------------------*/ + +floatx80 floatx80_sin(floatx80 a, float_status *status) +{ + flag aSign, xSign; + int32_t aExp, xExp; + uint64_t aSig, xSig; + + int32_t compact, l, n, j; + floatx80 fp0, fp1, fp2, fp3, fp4, fp5, x, invtwopi, twopi1, twopi2; + float32 posneg1, twoto63; + flag adjn, endflag; + + aSig = extractFloatx80Frac(a); + aExp = extractFloatx80Exp(a); + aSign = extractFloatx80Sign(a); + + if (aExp == 0x7FFF) { + if ((uint64_t) (aSig<<1)) return propagateFloatx80NaNOneArg(a, status); + float_raise(float_flag_invalid, status); + return floatx80_default_nan(status); + } + + if (aExp == 0 && aSig == 0) { + return packFloatx80(aSign, 0, 0); + } + + adjn = 0; + + SET_PREC; + + compact = floatx80_make_compact(aExp, aSig); + + fp0 = a; + + if (compact < 0x3FD78000 || compact > 0x4004BC7E) { // 2^(-40) > |X| > 15 PI + if (compact > 0x3FFF8000) { // |X| >= 15 PI + // REDUCEX + fp1 = packFloatx80(0, 0, 0); + if (compact == 0x7FFEFFFF) { + twopi1 = packFloatx80(aSign ^ 1, 0x7FFE, LIT64(0xC90FDAA200000000)); + twopi2 = packFloatx80(aSign ^ 1, 0x7FDC, LIT64(0x85A308D300000000)); + fp0 = floatx80_add(fp0, twopi1, status); + fp1 = fp0; + fp0 = floatx80_add(fp0, twopi2, status); + fp1 = floatx80_sub(fp1, fp0, status); + fp1 = floatx80_add(fp1, twopi2, status); + } + loop: + xSign = extractFloatx80Sign(fp0); + xExp = extractFloatx80Exp(fp0); + xExp -= 0x3FFF; + if (xExp <= 28) { + l = 0; + endflag = 1; + } else { + l = xExp - 27; + endflag = 0; + } + invtwopi = packFloatx80(0, 0x3FFE - l, LIT64(0xA2F9836E4E44152A)); // INVTWOPI + twopi1 = packFloatx80(0, 0x3FFF + l, LIT64(0xC90FDAA200000000)); + twopi2 = packFloatx80(0, 0x3FDD + l, LIT64(0x85A308D300000000)); + + twoto63 = 0x5F000000; + twoto63 |= xSign ? 0x80000000 : 0x00000000; // SIGN(INARG)*2^63 IN SGL + + fp2 = floatx80_mul(fp0, invtwopi, status); + fp2 = floatx80_add(fp2, float32_to_floatx80(twoto63, status), status); // THE FRACTIONAL PART OF FP2 IS ROUNDED + fp2 = floatx80_sub(fp2, float32_to_floatx80(twoto63, status), status); // FP2 is N + fp4 = floatx80_mul(twopi1, fp2, status); // W = N*P1 + fp5 = floatx80_mul(twopi2, fp2, status); // w = N*P2 + fp3 = floatx80_add(fp4, fp5, status); // FP3 is P + fp4 = floatx80_sub(fp4, fp3, status); // W-P + fp0 = floatx80_sub(fp0, fp3, status); // FP0 is A := R - P + fp4 = floatx80_add(fp4, fp5, status); // FP4 is p = (W-P)+w + fp3 = fp0; // FP3 is A + fp1 = floatx80_sub(fp1, fp4, status); // FP1 is a := r - p + fp0 = floatx80_add(fp0, fp1, status); // FP0 is R := A+a + + if (endflag > 0) { + n = floatx80_to_int32(fp2, status); + goto sincont; + } + fp3 = floatx80_sub(fp3, fp0, status); // A-R + fp1 = floatx80_add(fp1, fp3, status); // FP1 is r := (A-R)+a + goto loop; + } else { + // SINSM + fp0 = float32_to_floatx80(0x3F800000, status); // 1 + + RESET_PREC; + + if (adjn) { + // COSTINY + a = floatx80_sub(fp0, float32_to_floatx80(0x00800000, status), status); + } else { + // SINTINY + a = floatx80_move(a, status); + } + float_raise(float_flag_inexact, status); + + return a; + } + } else { + fp1 = floatx80_mul(fp0, float64_to_floatx80(LIT64(0x3FE45F306DC9C883), status), status); // X*2/PI + + n = floatx80_to_int32(fp1, status); + j = 32 + n; + + fp0 = floatx80_sub(fp0, pi_tbl[j], status); // X-Y1 + fp0 = floatx80_sub(fp0, float32_to_floatx80(pi_tbl2[j], status), status); // FP0 IS R = (X-Y1)-Y2 + + sincont: + if ((n + adjn) & 1) { + // COSPOLY + fp0 = floatx80_mul(fp0, fp0, status); // FP0 IS S + fp1 = floatx80_mul(fp0, fp0, status); // FP1 IS T + fp2 = float64_to_floatx80(LIT64(0x3D2AC4D0D6011EE3), status); // B8 + fp3 = float64_to_floatx80(LIT64(0xBDA9396F9F45AC19), status); // B7 + + xSign = extractFloatx80Sign(fp0); // X IS S + xExp = extractFloatx80Exp(fp0); + xSig = extractFloatx80Frac(fp0); + + if (((n + adjn) >> 1) & 1) { + xSign ^= 1; + posneg1 = 0xBF800000; // -1 + } else { + xSign ^= 0; + posneg1 = 0x3F800000; // 1 + } // X IS NOW R'= SGN*R + + fp2 = floatx80_mul(fp2, fp1, status); // TB8 + fp3 = floatx80_mul(fp3, fp1, status); // TB7 + fp2 = floatx80_add(fp2, float64_to_floatx80(LIT64(0x3E21EED90612C972), status), status); // B6+TB8 + fp3 = floatx80_add(fp3, float64_to_floatx80(LIT64(0xBE927E4FB79D9FCF), status), status); // B5+TB7 + fp2 = floatx80_mul(fp2, fp1, status); // T(B6+TB8) + fp3 = floatx80_mul(fp3, fp1, status); // T(B5+TB7) + fp2 = floatx80_add(fp2, float64_to_floatx80(LIT64(0x3EFA01A01A01D423), status), status); // B4+T(B6+TB8) + fp4 = packFloatx80(1, 0x3FF5, LIT64(0xB60B60B60B61D438)); + fp3 = floatx80_add(fp3, fp4, status); // B3+T(B5+TB7) + fp2 = floatx80_mul(fp2, fp1, status); // T(B4+T(B6+TB8)) + fp1 = floatx80_mul(fp1, fp3, status); // T(B3+T(B5+TB7)) + fp4 = packFloatx80(0, 0x3FFA, LIT64(0xAAAAAAAAAAAAAB5E)); + fp2 = floatx80_add(fp2, fp4, status); // B2+T(B4+T(B6+TB8)) + fp1 = floatx80_add(fp1, float32_to_floatx80(0xBF000000, status), status); // B1+T(B3+T(B5+TB7)) + fp0 = floatx80_mul(fp0, fp2, status); // S(B2+T(B4+T(B6+TB8))) + fp0 = floatx80_add(fp0, fp1, status); // [B1+T(B3+T(B5+TB7))]+[S(B2+T(B4+T(B6+TB8)))] + + x = packFloatx80(xSign, xExp, xSig); + fp0 = floatx80_mul(fp0, x, status); + + RESET_PREC; + + a = floatx80_add(fp0, float32_to_floatx80(posneg1, status), status); + + float_raise(float_flag_inexact, status); + + return a; + } else { + // SINPOLY + xSign = extractFloatx80Sign(fp0); // X IS R + xExp = extractFloatx80Exp(fp0); + xSig = extractFloatx80Frac(fp0); + + xSign ^= ((n + adjn) >> 1) & 1; // X IS NOW R'= SGN*R + + fp0 = floatx80_mul(fp0, fp0, status); // FP0 IS S + fp1 = floatx80_mul(fp0, fp0, status); // FP1 IS T + fp3 = float64_to_floatx80(LIT64(0xBD6AAA77CCC994F5), status); // A7 + fp2 = float64_to_floatx80(LIT64(0x3DE612097AAE8DA1), status); // A6 + fp3 = floatx80_mul(fp3, fp1, status); // T*A7 + fp2 = floatx80_mul(fp2, fp1, status); // T*A6 + fp3 = floatx80_add(fp3, float64_to_floatx80(LIT64(0xBE5AE6452A118AE4), status), status); // A5+T*A7 + fp2 = floatx80_add(fp2, float64_to_floatx80(LIT64(0x3EC71DE3A5341531), status), status); // A4+T*A6 + fp3 = floatx80_mul(fp3, fp1, status); // T(A5+TA7) + fp2 = floatx80_mul(fp2, fp1, status); // T(A4+TA6) + fp3 = floatx80_add(fp3, float64_to_floatx80(LIT64(0xBF2A01A01A018B59), status), status); // A3+T(A5+TA7) + fp4 = packFloatx80(0, 0x3FF8, LIT64(0x88888888888859AF)); + fp2 = floatx80_add(fp2, fp4, status); // A2+T(A4+TA6) + fp1 = floatx80_mul(fp1, fp3, status); // T(A3+T(A5+TA7)) + fp2 = floatx80_mul(fp2, fp0, status); // S(A2+T(A4+TA6)) + fp4 = packFloatx80(1, 0x3FFC, LIT64(0xAAAAAAAAAAAAAA99)); + fp1 = floatx80_add(fp1, fp4, status); // A1+T(A3+T(A5+TA7)) + fp1 = floatx80_add(fp1, fp2, status); // [A1+T(A3+T(A5+TA7))]+[S(A2+T(A4+TA6))] + + x = packFloatx80(xSign, xExp, xSig); + fp0 = floatx80_mul(fp0, x, status); // R'*S + fp0 = floatx80_mul(fp0, fp1, status); // SIN(R')-R' + + RESET_PREC; + + a = floatx80_add(fp0, x, status); + + float_raise(float_flag_inexact, status); + + return a; + } + } +} + +/*---------------------------------------------------------------------------- + | Hyperbolic sine + *----------------------------------------------------------------------------*/ + +floatx80 floatx80_sinh(floatx80 a, float_status *status) +{ + flag aSign; + int32_t aExp; + uint64_t aSig; + + int32_t compact; + floatx80 fp0, fp1, fp2; + float32 fact; + + aSig = extractFloatx80Frac(a); + aExp = extractFloatx80Exp(a); + aSign = extractFloatx80Sign(a); + + if (aExp == 0x7FFF) { + if ((uint64_t) (aSig<<1)) return propagateFloatx80NaNOneArg(a, status); + return a; + } + + if (aExp == 0 && aSig == 0) { + return packFloatx80(aSign, 0, 0); + } + + SET_PREC; + + compact = floatx80_make_compact(aExp, aSig); + + if (compact > 0x400CB167) { + // SINHBIG + if (compact > 0x400CB2B3) { + RESET_PREC; + + a = roundAndPackFloatx80(status->floatx80_rounding_precision, aSign, 0x8000, aSig, 0, status); + float_raise(float_flag_inexact, status); + return a; + } else { + fp0 = floatx80_abs(a, status); // Y = |X| + fp0 = floatx80_sub(fp0, float64_to_floatx80(LIT64(0x40C62D38D3D64634), status), status); // (|X|-16381LOG2_LEAD) + fp0 = floatx80_sub(fp0, float64_to_floatx80(LIT64(0x3D6F90AEB1E75CC7), status), status); // |X| - 16381 LOG2, ACCURATE + fp0 = floatx80_etox(fp0, status); + fp2 = packFloatx80(aSign, 0x7FFB, one_sig); + + RESET_PREC; + + a = floatx80_mul(fp0, fp2, status); + + float_raise(float_flag_inexact, status); + + return a; + } + } else { // |X| < 16380 LOG2 + fp0 = floatx80_abs(a, status); // Y = |X| + fp0 = floatx80_etoxm1(fp0, status); // FP0 IS Z = EXPM1(Y) + fp1 = floatx80_add(fp0, float32_to_floatx80(0x3F800000, status), status); // 1+Z + fp2 = fp0; + fp0 = floatx80_div(fp0, fp1, status); // Z/(1+Z) + fp0 = floatx80_add(fp0, fp2, status); + + fact = 0x3F000000; + fact |= aSign ? 0x80000000 : 0x00000000; + + RESET_PREC; + + a = floatx80_mul(fp0, float32_to_floatx80(fact, status), status); + + float_raise(float_flag_inexact, status); + + return a; + } +} + +/*---------------------------------------------------------------------------- + | Tangent + *----------------------------------------------------------------------------*/ + +floatx80 floatx80_tan(floatx80 a, float_status *status) +{ + flag aSign, xSign; + int32_t aExp, xExp; + uint64_t aSig, xSig; + + int32_t compact, l, n, j; + floatx80 fp0, fp1, fp2, fp3, fp4, fp5, invtwopi, twopi1, twopi2; + float32 twoto63; + flag endflag; + + aSig = extractFloatx80Frac(a); + aExp = extractFloatx80Exp(a); + aSign = extractFloatx80Sign(a); + + if (aExp == 0x7FFF) { + if ((uint64_t) (aSig<<1)) return propagateFloatx80NaNOneArg(a, status); + float_raise(float_flag_invalid, status); + return floatx80_default_nan(status); + } + + if (aExp == 0 && aSig == 0) { + return packFloatx80(aSign, 0, 0); + } + + SET_PREC; + + compact = floatx80_make_compact(aExp, aSig); + + fp0 = a; + + if (compact < 0x3FD78000 || compact > 0x4004BC7E) { // 2^(-40) > |X| > 15 PI + if (compact > 0x3FFF8000) { // |X| >= 15 PI + // REDUCEX + fp1 = packFloatx80(0, 0, 0); + if (compact == 0x7FFEFFFF) { + twopi1 = packFloatx80(aSign ^ 1, 0x7FFE, LIT64(0xC90FDAA200000000)); + twopi2 = packFloatx80(aSign ^ 1, 0x7FDC, LIT64(0x85A308D300000000)); + fp0 = floatx80_add(fp0, twopi1, status); + fp1 = fp0; + fp0 = floatx80_add(fp0, twopi2, status); + fp1 = floatx80_sub(fp1, fp0, status); + fp1 = floatx80_add(fp1, twopi2, status); + } + loop: + xSign = extractFloatx80Sign(fp0); + xExp = extractFloatx80Exp(fp0); + xExp -= 0x3FFF; + if (xExp <= 28) { + l = 0; + endflag = 1; + } else { + l = xExp - 27; + endflag = 0; + } + invtwopi = packFloatx80(0, 0x3FFE - l, LIT64(0xA2F9836E4E44152A)); // INVTWOPI + twopi1 = packFloatx80(0, 0x3FFF + l, LIT64(0xC90FDAA200000000)); + twopi2 = packFloatx80(0, 0x3FDD + l, LIT64(0x85A308D300000000)); + + twoto63 = 0x5F000000; + twoto63 |= xSign ? 0x80000000 : 0x00000000; // SIGN(INARG)*2^63 IN SGL + + fp2 = floatx80_mul(fp0, invtwopi, status); + fp2 = floatx80_add(fp2, float32_to_floatx80(twoto63, status), status); // THE FRACTIONAL PART OF FP2 IS ROUNDED + fp2 = floatx80_sub(fp2, float32_to_floatx80(twoto63, status), status); // FP2 is N + fp4 = floatx80_mul(twopi1, fp2, status); // W = N*P1 + fp5 = floatx80_mul(twopi2, fp2, status); // w = N*P2 + fp3 = floatx80_add(fp4, fp5, status); // FP3 is P + fp4 = floatx80_sub(fp4, fp3, status); // W-P + fp0 = floatx80_sub(fp0, fp3, status); // FP0 is A := R - P + fp4 = floatx80_add(fp4, fp5, status); // FP4 is p = (W-P)+w + fp3 = fp0; // FP3 is A + fp1 = floatx80_sub(fp1, fp4, status); // FP1 is a := r - p + fp0 = floatx80_add(fp0, fp1, status); // FP0 is R := A+a + + if (endflag > 0) { + n = floatx80_to_int32(fp2, status); + goto tancont; + } + fp3 = floatx80_sub(fp3, fp0, status); // A-R + fp1 = floatx80_add(fp1, fp3, status); // FP1 is r := (A-R)+a + goto loop; + } else { + RESET_PREC; + + a = floatx80_move(a, status); + + float_raise(float_flag_inexact, status); + + return a; + } + } else { + fp1 = floatx80_mul(fp0, float64_to_floatx80(LIT64(0x3FE45F306DC9C883), status), status); // X*2/PI + + n = floatx80_to_int32(fp1, status); + j = 32 + n; + + fp0 = floatx80_sub(fp0, pi_tbl[j], status); // X-Y1 + fp0 = floatx80_sub(fp0, float32_to_floatx80(pi_tbl2[j], status), status); // FP0 IS R = (X-Y1)-Y2 + + tancont: + if (n & 1) { + // NODD + fp1 = fp0; // R + fp0 = floatx80_mul(fp0, fp0, status); // S = R*R + fp3 = float64_to_floatx80(LIT64(0x3EA0B759F50F8688), status); // Q4 + fp2 = float64_to_floatx80(LIT64(0xBEF2BAA5A8924F04), status); // P3 + fp3 = floatx80_mul(fp3, fp0, status); // SQ4 + fp2 = floatx80_mul(fp2, fp0, status); // SP3 + fp3 = floatx80_add(fp3, float64_to_floatx80(LIT64(0xBF346F59B39BA65F), status), status); // Q3+SQ4 + fp4 = packFloatx80(0, 0x3FF6, LIT64(0xE073D3FC199C4A00)); + fp2 = floatx80_add(fp2, fp4, status); // P2+SP3 + fp3 = floatx80_mul(fp3, fp0, status); // S(Q3+SQ4) + fp2 = floatx80_mul(fp2, fp0, status); // S(P2+SP3) + fp4 = packFloatx80(0, 0x3FF9, LIT64(0xD23CD68415D95FA1)); + fp3 = floatx80_add(fp3, fp4, status); // Q2+S(Q3+SQ4) + fp4 = packFloatx80(1, 0x3FFC, LIT64(0x8895A6C5FB423BCA)); + fp2 = floatx80_add(fp2, fp4, status); // P1+S(P2+SP3) + fp3 = floatx80_mul(fp3, fp0, status); // S(Q2+S(Q3+SQ4)) + fp2 = floatx80_mul(fp2, fp0, status); // S(P1+S(P2+SP3)) + fp4 = packFloatx80(1, 0x3FFD, LIT64(0xEEF57E0DA84BC8CE)); + fp3 = floatx80_add(fp3, fp4, status); // Q1+S(Q2+S(Q3+SQ4)) + fp2 = floatx80_mul(fp2, fp1, status); // RS(P1+S(P2+SP3)) + fp0 = floatx80_mul(fp0, fp3, status); // S(Q1+S(Q2+S(Q3+SQ4))) + fp1 = floatx80_add(fp1, fp2, status); // R+RS(P1+S(P2+SP3)) + fp0 = floatx80_add(fp0, float32_to_floatx80(0x3F800000, status), status); // 1+S(Q1+S(Q2+S(Q3+SQ4))) + + xSign = extractFloatx80Sign(fp1); + xExp = extractFloatx80Exp(fp1); + xSig = extractFloatx80Frac(fp1); + xSign ^= 1; + fp1 = packFloatx80(xSign, xExp, xSig); + + RESET_PREC; + + a = floatx80_div(fp0, fp1, status); + + float_raise(float_flag_inexact, status); + + return a; + } else { + fp1 = floatx80_mul(fp0, fp0, status); // S = R*R + fp3 = float64_to_floatx80(LIT64(0x3EA0B759F50F8688), status); // Q4 + fp2 = float64_to_floatx80(LIT64(0xBEF2BAA5A8924F04), status); // P3 + fp3 = floatx80_mul(fp3, fp1, status); // SQ4 + fp2 = floatx80_mul(fp2, fp1, status); // SP3 + fp3 = floatx80_add(fp3, float64_to_floatx80(LIT64(0xBF346F59B39BA65F), status), status); // Q3+SQ4 + fp4 = packFloatx80(0, 0x3FF6, LIT64(0xE073D3FC199C4A00)); + fp2 = floatx80_add(fp2, fp4, status); // P2+SP3 + fp3 = floatx80_mul(fp3, fp1, status); // S(Q3+SQ4) + fp2 = floatx80_mul(fp2, fp1, status); // S(P2+SP3) + fp4 = packFloatx80(0, 0x3FF9, LIT64(0xD23CD68415D95FA1)); + fp3 = floatx80_add(fp3, fp4, status); // Q2+S(Q3+SQ4) + fp4 = packFloatx80(1, 0x3FFC, LIT64(0x8895A6C5FB423BCA)); + fp2 = floatx80_add(fp2, fp4, status); // P1+S(P2+SP3) + fp3 = floatx80_mul(fp3, fp1, status); // S(Q2+S(Q3+SQ4)) + fp2 = floatx80_mul(fp2, fp1, status); // S(P1+S(P2+SP3)) + fp4 = packFloatx80(1, 0x3FFD, LIT64(0xEEF57E0DA84BC8CE)); + fp3 = floatx80_add(fp3, fp4, status); // Q1+S(Q2+S(Q3+SQ4)) + fp2 = floatx80_mul(fp2, fp0, status); // RS(P1+S(P2+SP3)) + fp1 = floatx80_mul(fp1, fp3, status); // S(Q1+S(Q2+S(Q3+SQ4))) + fp0 = floatx80_add(fp0, fp2, status); // R+RS(P1+S(P2+SP3)) + fp1 = floatx80_add(fp1, float32_to_floatx80(0x3F800000, status), status); // 1+S(Q1+S(Q2+S(Q3+SQ4))) + + RESET_PREC; + + a = floatx80_div(fp0, fp1, status); + + float_raise(float_flag_inexact, status); + + return a; + } + } +} + +/*---------------------------------------------------------------------------- + | Hyperbolic tangent + *----------------------------------------------------------------------------*/ + +floatx80 floatx80_tanh(floatx80 a, float_status *status) +{ + flag aSign, vSign; + int32_t aExp, vExp; + uint64_t aSig, vSig; + + int32_t compact; + floatx80 fp0, fp1; + float32 sign; + + aSig = extractFloatx80Frac(a); + aExp = extractFloatx80Exp(a); + aSign = extractFloatx80Sign(a); + + if (aExp == 0x7FFF) { + if ((uint64_t) (aSig<<1)) return propagateFloatx80NaNOneArg(a, status); + return packFloatx80(aSign, one_exp, one_sig); + } + + if (aExp == 0 && aSig == 0) { + return packFloatx80(aSign, 0, 0); + } + + SET_PREC; + + compact = floatx80_make_compact(aExp, aSig); + + if (compact < 0x3FD78000 || compact > 0x3FFFDDCE) { + // TANHBORS + if (compact < 0x3FFF8000) { + // TANHSM + RESET_PREC; + + a = floatx80_move(a, status); + + float_raise(float_flag_inexact, status); + + return a; + } else { + if (compact > 0x40048AA1) { + // TANHHUGE + sign = 0x3F800000; + sign |= aSign ? 0x80000000 : 0x00000000; + fp0 = float32_to_floatx80(sign, status); + sign &= 0x80000000; + sign ^= 0x80800000; // -SIGN(X)*EPS + + RESET_PREC; + + a = floatx80_add(fp0, float32_to_floatx80(sign, status), status); + + float_raise(float_flag_inexact, status); + + return a; + } else { + fp0 = packFloatx80(0, aExp+1, aSig); // Y = 2|X| + fp0 = floatx80_etox(fp0, status); // FP0 IS EXP(Y) + fp0 = floatx80_add(fp0, float32_to_floatx80(0x3F800000, status), status); // EXP(Y)+1 + sign = aSign ? 0x80000000 : 0x00000000; + fp1 = floatx80_div(float32_to_floatx80(sign^0xC0000000, status), fp0, status); // -SIGN(X)*2 / [EXP(Y)+1] + fp0 = float32_to_floatx80(sign | 0x3F800000, status); // SIGN + + RESET_PREC; + + a = floatx80_add(fp1, fp0, status); + + float_raise(float_flag_inexact, status); + + return a; + } + } + } else { // 2**(-40) < |X| < (5/2)LOG2 + fp0 = packFloatx80(0, aExp+1, aSig); // Y = 2|X| + fp0 = floatx80_etoxm1(fp0, status); // FP0 IS Z = EXPM1(Y) + fp1 = floatx80_add(fp0, float32_to_floatx80(0x40000000, status), status); // Z+2 + + vSign = extractFloatx80Sign(fp1); + vExp = extractFloatx80Exp(fp1); + vSig = extractFloatx80Frac(fp1); + + fp1 = packFloatx80(vSign ^ aSign, vExp, vSig); + + RESET_PREC; + + a = floatx80_div(fp0, fp1, status); + + float_raise(float_flag_inexact, status); + + return a; + } +} + +/*---------------------------------------------------------------------------- + | 10 to x + *----------------------------------------------------------------------------*/ + +floatx80 floatx80_tentox(floatx80 a, float_status *status) +{ + flag aSign; + int32_t aExp; + uint64_t aSig; + + int32_t compact, n, j, l, m, m1; + floatx80 fp0, fp1, fp2, fp3, adjfact, fact1, fact2; + + aSig = extractFloatx80Frac(a); + aExp = extractFloatx80Exp(a); + aSign = extractFloatx80Sign(a); + + if (aExp == 0x7FFF) { + if ((uint64_t) (aSig<<1)) return propagateFloatx80NaNOneArg(a, status); + if (aSign) return packFloatx80(0, 0, 0); + return a; + } + + if (aExp == 0 && aSig == 0) { + return packFloatx80(0, one_exp, one_sig); + } + + SET_PREC; + + fp0 = a; + + compact = floatx80_make_compact(aExp, aSig); + + if (compact < 0x3FB98000 || compact > 0x400B9B07) { // |X| > 16480 LOG2/LOG10 or |X| < 2^(-70) + if (compact > 0x3FFF8000) { // |X| > 16480 + RESET_PREC; + + if (aSign) { + return roundAndPackFloatx80(status->floatx80_rounding_precision, 0, -0x1000, aSig, 0, status); + } else { + return roundAndPackFloatx80(status->floatx80_rounding_precision, 0, 0x8000, aSig, 0, status); + } + } else { // |X| < 2^(-70) + RESET_PREC; + + a = floatx80_add(fp0, float32_to_floatx80(0x3F800000, status), status); // 1 + X + + float_raise(float_flag_inexact, status); + + return a; + } + } else { // 2^(-70) <= |X| <= 16480 LOG 2 / LOG 10 + fp1 = fp0; // X + fp1 = floatx80_mul(fp1, float64_to_floatx80(LIT64(0x406A934F0979A371), status), status); // X*64*LOG10/LOG2 + n = floatx80_to_int32(fp1, status); // N=INT(X*64*LOG10/LOG2) + fp1 = int32_to_floatx80(n); + + j = n & 0x3F; + l = n / 64; // NOTE: this is really arithmetic right shift by 6 + if (n < 0 && j) { // arithmetic right shift is division and round towards minus infinity + l--; + } + m = l / 2; // NOTE: this is really arithmetic right shift by 1 + if (l < 0 && (l & 1)) { // arithmetic right shift is division and round towards minus infinity + m--; + } + m1 = l - m; + m1 += 0x3FFF; // ADJFACT IS 2^(M') + + adjfact = packFloatx80(0, m1, one_sig); + fact1 = exp2_tbl[j]; + fact1.high += m; + fact2.high = exp2_tbl2[j]>>16; + fact2.high += m; + fact2.low = (uint64_t)(exp2_tbl2[j] & 0xFFFF); + fact2.low <<= 48; + + fp2 = fp1; // N + fp1 = floatx80_mul(fp1, float64_to_floatx80(LIT64(0x3F734413509F8000), status), status); // N*(LOG2/64LOG10)_LEAD + fp3 = packFloatx80(1, 0x3FCD, LIT64(0xC0219DC1DA994FD2)); + fp2 = floatx80_mul(fp2, fp3, status); // N*(LOG2/64LOG10)_TRAIL + fp0 = floatx80_sub(fp0, fp1, status); // X - N L_LEAD + fp0 = floatx80_sub(fp0, fp2, status); // X - N L_TRAIL + fp2 = packFloatx80(0, 0x4000, LIT64(0x935D8DDDAAA8AC17)); // LOG10 + fp0 = floatx80_mul(fp0, fp2, status); // R + + // EXPR + fp1 = floatx80_mul(fp0, fp0, status); // S = R*R + fp2 = float64_to_floatx80(LIT64(0x3F56C16D6F7BD0B2), status); // A5 + fp3 = float64_to_floatx80(LIT64(0x3F811112302C712C), status); // A4 + fp2 = floatx80_mul(fp2, fp1, status); // S*A5 + fp3 = floatx80_mul(fp3, fp1, status); // S*A4 + fp2 = floatx80_add(fp2, float64_to_floatx80(LIT64(0x3FA5555555554CC1), status), status); // A3+S*A5 + fp3 = floatx80_add(fp3, float64_to_floatx80(LIT64(0x3FC5555555554A54), status), status); // A2+S*A4 + fp2 = floatx80_mul(fp2, fp1, status); // S*(A3+S*A5) + fp3 = floatx80_mul(fp3, fp1, status); // S*(A2+S*A4) + fp2 = floatx80_add(fp2, float64_to_floatx80(LIT64(0x3FE0000000000000), status), status); // A1+S*(A3+S*A5) + fp3 = floatx80_mul(fp3, fp0, status); // R*S*(A2+S*A4) + + fp2 = floatx80_mul(fp2, fp1, status); // S*(A1+S*(A3+S*A5)) + fp0 = floatx80_add(fp0, fp3, status); // R+R*S*(A2+S*A4) + fp0 = floatx80_add(fp0, fp2, status); // EXP(R) - 1 + + fp0 = floatx80_mul(fp0, fact1, status); + fp0 = floatx80_add(fp0, fact2, status); + fp0 = floatx80_add(fp0, fact1, status); + + RESET_PREC; + + a = floatx80_mul(fp0, adjfact, status); + + float_raise(float_flag_inexact, status); + + return a; + } +} + +/*---------------------------------------------------------------------------- + | 2 to x + *----------------------------------------------------------------------------*/ + +floatx80 floatx80_twotox(floatx80 a, float_status *status) +{ + flag aSign; + int32_t aExp; + uint64_t aSig; + + int32_t compact, n, j, l, m, m1; + floatx80 fp0, fp1, fp2, fp3, adjfact, fact1, fact2; + + aSig = extractFloatx80Frac(a); + aExp = extractFloatx80Exp(a); + aSign = extractFloatx80Sign(a); + + if (aExp == 0x7FFF) { + if ((uint64_t) (aSig<<1)) return propagateFloatx80NaNOneArg(a, status); + if (aSign) return packFloatx80(0, 0, 0); + return a; + } + + if (aExp == 0 && aSig == 0) { + return packFloatx80(0, one_exp, one_sig); + } + + SET_PREC; + + fp0 = a; + + compact = floatx80_make_compact(aExp, aSig); + + if (compact < 0x3FB98000 || compact > 0x400D80C0) { // |X| > 16480 or |X| < 2^(-70) + if (compact > 0x3FFF8000) { // |X| > 16480 + RESET_PREC;; + + if (aSign) { + return roundAndPackFloatx80(status->floatx80_rounding_precision, 0, -0x1000, aSig, 0, status); + } else { + return roundAndPackFloatx80(status->floatx80_rounding_precision, 0, 0x8000, aSig, 0, status); + } + } else { // |X| < 2^(-70) + RESET_PREC;; + + a = floatx80_add(fp0, float32_to_floatx80(0x3F800000, status), status); // 1 + X + + float_raise(float_flag_inexact, status); + + return a; + } + } else { // 2^(-70) <= |X| <= 16480 + fp1 = fp0; // X + fp1 = floatx80_mul(fp1, float32_to_floatx80(0x42800000, status), status); // X * 64 + n = floatx80_to_int32(fp1, status); + fp1 = int32_to_floatx80(n); + j = n & 0x3F; + l = n / 64; // NOTE: this is really arithmetic right shift by 6 + if (n < 0 && j) { // arithmetic right shift is division and round towards minus infinity + l--; + } + m = l / 2; // NOTE: this is really arithmetic right shift by 1 + if (l < 0 && (l & 1)) { // arithmetic right shift is division and round towards minus infinity + m--; + } + m1 = l - m; + m1 += 0x3FFF; // ADJFACT IS 2^(M') + + adjfact = packFloatx80(0, m1, one_sig); + fact1 = exp2_tbl[j]; + fact1.high += m; + fact2.high = exp2_tbl2[j]>>16; + fact2.high += m; + fact2.low = (uint64_t)(exp2_tbl2[j] & 0xFFFF); + fact2.low <<= 48; + + fp1 = floatx80_mul(fp1, float32_to_floatx80(0x3C800000, status), status); // (1/64)*N + fp0 = floatx80_sub(fp0, fp1, status); // X - (1/64)*INT(64 X) + fp2 = packFloatx80(0, 0x3FFE, LIT64(0xB17217F7D1CF79AC)); // LOG2 + fp0 = floatx80_mul(fp0, fp2, status); // R + + // EXPR + fp1 = floatx80_mul(fp0, fp0, status); // S = R*R + fp2 = float64_to_floatx80(LIT64(0x3F56C16D6F7BD0B2), status); // A5 + fp3 = float64_to_floatx80(LIT64(0x3F811112302C712C), status); // A4 + fp2 = floatx80_mul(fp2, fp1, status); // S*A5 + fp3 = floatx80_mul(fp3, fp1, status); // S*A4 + fp2 = floatx80_add(fp2, float64_to_floatx80(LIT64(0x3FA5555555554CC1), status), status); // A3+S*A5 + fp3 = floatx80_add(fp3, float64_to_floatx80(LIT64(0x3FC5555555554A54), status), status); // A2+S*A4 + fp2 = floatx80_mul(fp2, fp1, status); // S*(A3+S*A5) + fp3 = floatx80_mul(fp3, fp1, status); // S*(A2+S*A4) + fp2 = floatx80_add(fp2, float64_to_floatx80(LIT64(0x3FE0000000000000), status), status); // A1+S*(A3+S*A5) + fp3 = floatx80_mul(fp3, fp0, status); // R*S*(A2+S*A4) + + fp2 = floatx80_mul(fp2, fp1, status); // S*(A1+S*(A3+S*A5)) + fp0 = floatx80_add(fp0, fp3, status); // R+R*S*(A2+S*A4) + fp0 = floatx80_add(fp0, fp2, status); // EXP(R) - 1 + + fp0 = floatx80_mul(fp0, fact1, status); + fp0 = floatx80_add(fp0, fact2, status); + fp0 = floatx80_add(fp0, fact1, status); + + RESET_PREC; + + a = floatx80_mul(fp0, adjfact, status); + + float_raise(float_flag_inexact, status); + + return a; + } +} diff --git a/softfloat/softfloat_fpsp_tables.h b/softfloat/softfloat_fpsp_tables.h new file mode 100644 index 0000000..b2b2952 --- /dev/null +++ b/softfloat/softfloat_fpsp_tables.h @@ -0,0 +1,528 @@ + +static const floatx80 atan_tbl[128] = { + {0x3FFB, LIT64(0x83D152C5060B7A51)}, + {0x3FFB, LIT64(0x8BC8544565498B8B)}, + {0x3FFB, LIT64(0x93BE406017626B0D)}, + {0x3FFB, LIT64(0x9BB3078D35AEC202)}, + {0x3FFB, LIT64(0xA3A69A525DDCE7DE)}, + {0x3FFB, LIT64(0xAB98E94362765619)}, + {0x3FFB, LIT64(0xB389E502F9C59862)}, + {0x3FFB, LIT64(0xBB797E436B09E6FB)}, + {0x3FFB, LIT64(0xC367A5C739E5F446)}, + {0x3FFB, LIT64(0xCB544C61CFF7D5C6)}, + {0x3FFB, LIT64(0xD33F62F82488533E)}, + {0x3FFB, LIT64(0xDB28DA8162404C77)}, + {0x3FFB, LIT64(0xE310A4078AD34F18)}, + {0x3FFB, LIT64(0xEAF6B0A8188EE1EB)}, + {0x3FFB, LIT64(0xF2DAF1949DBE79D5)}, + {0x3FFB, LIT64(0xFABD581361D47E3E)}, + {0x3FFC, LIT64(0x8346AC210959ECC4)}, + {0x3FFC, LIT64(0x8B232A08304282D8)}, + {0x3FFC, LIT64(0x92FB70B8D29AE2F9)}, + {0x3FFC, LIT64(0x9ACF476F5CCD1CB4)}, + {0x3FFC, LIT64(0xA29E76304954F23F)}, + {0x3FFC, LIT64(0xAA68C5D08AB85230)}, + {0x3FFC, LIT64(0xB22DFFFD9D539F83)}, + {0x3FFC, LIT64(0xB9EDEF453E900EA5)}, + {0x3FFC, LIT64(0xC1A85F1CC75E3EA5)}, + {0x3FFC, LIT64(0xC95D1BE828138DE6)}, + {0x3FFC, LIT64(0xD10BF300840D2DE4)}, + {0x3FFC, LIT64(0xD8B4B2BA6BC05E7A)}, + {0x3FFC, LIT64(0xE0572A6BB42335F6)}, + {0x3FFC, LIT64(0xE7F32A70EA9CAA8F)}, + {0x3FFC, LIT64(0xEF88843264ECEFAA)}, + {0x3FFC, LIT64(0xF7170A28ECC06666)}, + {0x3FFD, LIT64(0x812FD288332DAD32)}, + {0x3FFD, LIT64(0x88A8D1B1218E4D64)}, + {0x3FFD, LIT64(0x9012AB3F23E4AEE8)}, + {0x3FFD, LIT64(0x976CC3D411E7F1B9)}, + {0x3FFD, LIT64(0x9EB689493889A227)}, + {0x3FFD, LIT64(0xA5EF72C34487361B)}, + {0x3FFD, LIT64(0xAD1700BAF07A7227)}, + {0x3FFD, LIT64(0xB42CBCFAFD37EFB7)}, + {0x3FFD, LIT64(0xBB303A940BA80F89)}, + {0x3FFD, LIT64(0xC22115C6FCAEBBAF)}, + {0x3FFD, LIT64(0xC8FEF3E686331221)}, + {0x3FFD, LIT64(0xCFC98330B4000C70)}, + {0x3FFD, LIT64(0xD6807AA1102C5BF9)}, + {0x3FFD, LIT64(0xDD2399BC31252AA3)}, + {0x3FFD, LIT64(0xE3B2A8556B8FC517)}, + {0x3FFD, LIT64(0xEA2D764F64315989)}, + {0x3FFD, LIT64(0xF3BF5BF8BAD1A21D)}, + {0x3FFE, LIT64(0x801CE39E0D205C9A)}, + {0x3FFE, LIT64(0x8630A2DADA1ED066)}, + {0x3FFE, LIT64(0x8C1AD445F3E09B8C)}, + {0x3FFE, LIT64(0x91DB8F1664F350E2)}, + {0x3FFE, LIT64(0x97731420365E538C)}, + {0x3FFE, LIT64(0x9CE1C8E6A0B8CDBA)}, + {0x3FFE, LIT64(0xA22832DBCADAAE09)}, + {0x3FFE, LIT64(0xA746F2DDB7602294)}, + {0x3FFE, LIT64(0xAC3EC0FB997DD6A2)}, + {0x3FFE, LIT64(0xB110688AEBDC6F6A)}, + {0x3FFE, LIT64(0xB5BCC49059ECC4B0)}, + {0x3FFE, LIT64(0xBA44BC7DD470782F)}, + {0x3FFE, LIT64(0xBEA94144FD049AAC)}, + {0x3FFE, LIT64(0xC2EB4ABB661628B6)}, + {0x3FFE, LIT64(0xC70BD54CE602EE14)}, + {0x3FFE, LIT64(0xCD000549ADEC7159)}, + {0x3FFE, LIT64(0xD48457D2D8EA4EA3)}, + {0x3FFE, LIT64(0xDB948DA712DECE3B)}, + {0x3FFE, LIT64(0xE23855F969E8096A)}, + {0x3FFE, LIT64(0xE8771129C4353259)}, + {0x3FFE, LIT64(0xEE57C16E0D379C0D)}, + {0x3FFE, LIT64(0xF3E10211A87C3779)}, + {0x3FFE, LIT64(0xF919039D758B8D41)}, + {0x3FFE, LIT64(0xFE058B8F64935FB3)}, + {0x3FFF, LIT64(0x8155FB497B685D04)}, + {0x3FFF, LIT64(0x83889E3549D108E1)}, + {0x3FFF, LIT64(0x859CFA76511D724B)}, + {0x3FFF, LIT64(0x87952ECFFF8131E7)}, + {0x3FFF, LIT64(0x89732FD19557641B)}, + {0x3FFF, LIT64(0x8B38CAD101932A35)}, + {0x3FFF, LIT64(0x8CE7A8D8301EE6B5)}, + {0x3FFF, LIT64(0x8F46A39E2EAE5281)}, + {0x3FFF, LIT64(0x922DA7D791888487)}, + {0x3FFF, LIT64(0x94D19FCBDEDF5241)}, + {0x3FFF, LIT64(0x973AB94419D2A08B)}, + {0x3FFF, LIT64(0x996FF00E08E10B96)}, + {0x3FFF, LIT64(0x9B773F9512321DA7)}, + {0x3FFF, LIT64(0x9D55CC320F935624)}, + {0x3FFF, LIT64(0x9F100575006CC571)}, + {0x3FFF, LIT64(0xA0A9C290D97CC06C)}, + {0x3FFF, LIT64(0xA22659EBEBC0630A)}, + {0x3FFF, LIT64(0xA388B4AFF6EF0EC9)}, + {0x3FFF, LIT64(0xA4D35F1061D292C4)}, + {0x3FFF, LIT64(0xA60895DCFBE3187E)}, + {0x3FFF, LIT64(0xA72A51DC7367BEAC)}, + {0x3FFF, LIT64(0xA83A51530956168F)}, + {0x3FFF, LIT64(0xA93A20077539546E)}, + {0x3FFF, LIT64(0xAA9E7245023B2605)}, + {0x3FFF, LIT64(0xAC4C84BA6FE4D58F)}, + {0x3FFF, LIT64(0xADCE4A4A606B9712)}, + {0x3FFF, LIT64(0xAF2A2DCD8D263C9C)}, + {0x3FFF, LIT64(0xB0656F81F22265C7)}, + {0x3FFF, LIT64(0xB18465150F71496A)}, + {0x3FFF, LIT64(0xB28AAA156F9ADA35)}, + {0x3FFF, LIT64(0xB37B44FF3766B895)}, + {0x3FFF, LIT64(0xB458C3DCE9630433)}, + {0x3FFF, LIT64(0xB525529D562246BD)}, + {0x3FFF, LIT64(0xB5E2CCA95F9D88CC)}, + {0x3FFF, LIT64(0xB692CADA7ACA1ADA)}, + {0x3FFF, LIT64(0xB736AEA7A6925838)}, + {0x3FFF, LIT64(0xB7CFAB287E9F7B36)}, + {0x3FFF, LIT64(0xB85ECC66CB219835)}, + {0x3FFF, LIT64(0xB8E4FD5A20A593DA)}, + {0x3FFF, LIT64(0xB99F41F64AFF9BB5)}, + {0x3FFF, LIT64(0xBA7F1E17842BBE7B)}, + {0x3FFF, LIT64(0xBB4712857637E17D)}, + {0x3FFF, LIT64(0xBBFABE8A4788DF6F)}, + {0x3FFF, LIT64(0xBC9D0FAD2B689D79)}, + {0x3FFF, LIT64(0xBD306A39471ECD86)}, + {0x3FFF, LIT64(0xBDB6C731856AF18A)}, + {0x3FFF, LIT64(0xBE31CAC502E80D70)}, + {0x3FFF, LIT64(0xBEA2D55CE33194E2)}, + {0x3FFF, LIT64(0xBF0B10B7C03128F0)}, + {0x3FFF, LIT64(0xBF6B7A18DACB778D)}, + {0x3FFF, LIT64(0xBFC4EA4663FA18F6)}, + {0x3FFF, LIT64(0xC0181BDE8B89A454)}, + {0x3FFF, LIT64(0xC065B066CFBF6439)}, + {0x3FFF, LIT64(0xC0AE345F56340AE6)}, + {0x3FFF, LIT64(0xC0F222919CB9E6A7)} +}; + + +static const floatx80 exp_tbl[64] = { + {0x3FFF, LIT64(0x8000000000000000)}, + {0x3FFF, LIT64(0x8164D1F3BC030774)}, + {0x3FFF, LIT64(0x82CD8698AC2BA1D8)}, + {0x3FFF, LIT64(0x843A28C3ACDE4048)}, + {0x3FFF, LIT64(0x85AAC367CC487B14)}, + {0x3FFF, LIT64(0x871F61969E8D1010)}, + {0x3FFF, LIT64(0x88980E8092DA8528)}, + {0x3FFF, LIT64(0x8A14D575496EFD9C)}, + {0x3FFF, LIT64(0x8B95C1E3EA8BD6E8)}, + {0x3FFF, LIT64(0x8D1ADF5B7E5BA9E4)}, + {0x3FFF, LIT64(0x8EA4398B45CD53C0)}, + {0x3FFF, LIT64(0x9031DC431466B1DC)}, + {0x3FFF, LIT64(0x91C3D373AB11C338)}, + {0x3FFF, LIT64(0x935A2B2F13E6E92C)}, + {0x3FFF, LIT64(0x94F4EFA8FEF70960)}, + {0x3FFF, LIT64(0x96942D3720185A00)}, + {0x3FFF, LIT64(0x9837F0518DB8A970)}, + {0x3FFF, LIT64(0x99E0459320B7FA64)}, + {0x3FFF, LIT64(0x9B8D39B9D54E5538)}, + {0x3FFF, LIT64(0x9D3ED9A72CFFB750)}, + {0x3FFF, LIT64(0x9EF5326091A111AC)}, + {0x3FFF, LIT64(0xA0B0510FB9714FC4)}, + {0x3FFF, LIT64(0xA27043030C496818)}, + {0x3FFF, LIT64(0xA43515AE09E680A0)}, + {0x3FFF, LIT64(0xA5FED6A9B15138EC)}, + {0x3FFF, LIT64(0xA7CD93B4E9653568)}, + {0x3FFF, LIT64(0xA9A15AB4EA7C0EF8)}, + {0x3FFF, LIT64(0xAB7A39B5A93ED338)}, + {0x3FFF, LIT64(0xAD583EEA42A14AC8)}, + {0x3FFF, LIT64(0xAF3B78AD690A4374)}, + {0x3FFF, LIT64(0xB123F581D2AC2590)}, + {0x3FFF, LIT64(0xB311C412A9112488)}, + {0x3FFF, LIT64(0xB504F333F9DE6484)}, + {0x3FFF, LIT64(0xB6FD91E328D17790)}, + {0x3FFF, LIT64(0xB8FBAF4762FB9EE8)}, + {0x3FFF, LIT64(0xBAFF5AB2133E45FC)}, + {0x3FFF, LIT64(0xBD08A39F580C36C0)}, + {0x3FFF, LIT64(0xBF1799B67A731084)}, + {0x3FFF, LIT64(0xC12C4CCA66709458)}, + {0x3FFF, LIT64(0xC346CCDA24976408)}, + {0x3FFF, LIT64(0xC5672A115506DADC)}, + {0x3FFF, LIT64(0xC78D74C8ABB9B15C)}, + {0x3FFF, LIT64(0xC9B9BD866E2F27A4)}, + {0x3FFF, LIT64(0xCBEC14FEF2727C5C)}, + {0x3FFF, LIT64(0xCE248C151F8480E4)}, + {0x3FFF, LIT64(0xD06333DAEF2B2594)}, + {0x3FFF, LIT64(0xD2A81D91F12AE45C)}, + {0x3FFF, LIT64(0xD4F35AABCFEDFA20)}, + {0x3FFF, LIT64(0xD744FCCAD69D6AF4)}, + {0x3FFF, LIT64(0xD99D15C278AFD7B4)}, + {0x3FFF, LIT64(0xDBFBB797DAF23754)}, + {0x3FFF, LIT64(0xDE60F4825E0E9124)}, + {0x3FFF, LIT64(0xE0CCDEEC2A94E110)}, + {0x3FFF, LIT64(0xE33F8972BE8A5A50)}, + {0x3FFF, LIT64(0xE5B906E77C8348A8)}, + {0x3FFF, LIT64(0xE8396A503C4BDC68)}, + {0x3FFF, LIT64(0xEAC0C6E7DD243930)}, + {0x3FFF, LIT64(0xED4F301ED9942B84)}, + {0x3FFF, LIT64(0xEFE4B99BDCDAF5CC)}, + {0x3FFF, LIT64(0xF281773C59FFB138)}, + {0x3FFF, LIT64(0xF5257D152486CC2C)}, + {0x3FFF, LIT64(0xF7D0DF730AD13BB8)}, + {0x3FFF, LIT64(0xFA83B2DB722A033C)}, + {0x3FFF, LIT64(0xFD3E0C0CF486C174)} +}; + +static const float32 exp_tbl2[64] = { + 0x00000000, 0x9F841A9B, 0x9FC1D5B9, 0xA0728369, + 0x1FC5C95C, 0x1EE85C9F, 0x9FA20729, 0xA07BF9AF, + 0xA0020DCF, 0x205A63DA, 0x1EB70051, 0x1F6EB029, + 0xA0781494, 0x9EB319B0, 0x2017457D, 0x1F11D537, + 0x9FB952DD, 0x1FE43087, 0x1FA2A818, 0x1FDE494D, + 0x20504890, 0xA073691C, 0x1F9B7A05, 0xA0797126, + 0xA071A140, 0x204F62DA, 0x1F283C4A, 0x9F9A7FDC, + 0xA05B3FAC, 0x1FDF2610, 0x9F705F90, 0x201F678A, + 0x1F32FB13, 0x20038B30, 0x200DC3CC, 0x9F8B2AE6, + 0xA02BBF70, 0xA00BF518, 0xA041DD41, 0x9FDF137B, + 0x201F1568, 0x1FC13A2E, 0xA03F8F03, 0x1FF4907D, + 0x9E6E53E4, 0x1FD6D45C, 0xA076EDB9, 0x9FA6DE21, + 0x1EE69A2F, 0x207F439F, 0x201EC207, 0x9E8BE175, + 0x20032C4B, 0x2004DFF5, 0x1E72F47A, 0x1F722F22, + 0xA017E945, 0x1F401A5B, 0x9FB9A9E3, 0x20744C05, + 0x1F773A19, 0x1FFE90D5, 0xA041ED22, 0x1F853F3A +}; + + +static const floatx80 exp2_tbl[64] = { + {0x3FFF, LIT64(0x8000000000000000)}, + {0x3FFF, LIT64(0x8164D1F3BC030773)}, + {0x3FFF, LIT64(0x82CD8698AC2BA1D7)}, + {0x3FFF, LIT64(0x843A28C3ACDE4046)}, + {0x3FFF, LIT64(0x85AAC367CC487B15)}, + {0x3FFF, LIT64(0x871F61969E8D1010)}, + {0x3FFF, LIT64(0x88980E8092DA8527)}, + {0x3FFF, LIT64(0x8A14D575496EFD9A)}, + {0x3FFF, LIT64(0x8B95C1E3EA8BD6E7)}, + {0x3FFF, LIT64(0x8D1ADF5B7E5BA9E6)}, + {0x3FFF, LIT64(0x8EA4398B45CD53C0)}, + {0x3FFF, LIT64(0x9031DC431466B1DC)}, + {0x3FFF, LIT64(0x91C3D373AB11C336)}, + {0x3FFF, LIT64(0x935A2B2F13E6E92C)}, + {0x3FFF, LIT64(0x94F4EFA8FEF70961)}, + {0x3FFF, LIT64(0x96942D3720185A00)}, + {0x3FFF, LIT64(0x9837F0518DB8A96F)}, + {0x3FFF, LIT64(0x99E0459320B7FA65)}, + {0x3FFF, LIT64(0x9B8D39B9D54E5539)}, + {0x3FFF, LIT64(0x9D3ED9A72CFFB751)}, + {0x3FFF, LIT64(0x9EF5326091A111AE)}, + {0x3FFF, LIT64(0xA0B0510FB9714FC2)}, + {0x3FFF, LIT64(0xA27043030C496819)}, + {0x3FFF, LIT64(0xA43515AE09E6809E)}, + {0x3FFF, LIT64(0xA5FED6A9B15138EA)}, + {0x3FFF, LIT64(0xA7CD93B4E965356A)}, + {0x3FFF, LIT64(0xA9A15AB4EA7C0EF8)}, + {0x3FFF, LIT64(0xAB7A39B5A93ED337)}, + {0x3FFF, LIT64(0xAD583EEA42A14AC6)}, + {0x3FFF, LIT64(0xAF3B78AD690A4375)}, + {0x3FFF, LIT64(0xB123F581D2AC2590)}, + {0x3FFF, LIT64(0xB311C412A9112489)}, + {0x3FFF, LIT64(0xB504F333F9DE6484)}, + {0x3FFF, LIT64(0xB6FD91E328D17791)}, + {0x3FFF, LIT64(0xB8FBAF4762FB9EE9)}, + {0x3FFF, LIT64(0xBAFF5AB2133E45FB)}, + {0x3FFF, LIT64(0xBD08A39F580C36BF)}, + {0x3FFF, LIT64(0xBF1799B67A731083)}, + {0x3FFF, LIT64(0xC12C4CCA66709456)}, + {0x3FFF, LIT64(0xC346CCDA24976407)}, + {0x3FFF, LIT64(0xC5672A115506DADD)}, + {0x3FFF, LIT64(0xC78D74C8ABB9B15D)}, + {0x3FFF, LIT64(0xC9B9BD866E2F27A3)}, + {0x3FFF, LIT64(0xCBEC14FEF2727C5D)}, + {0x3FFF, LIT64(0xCE248C151F8480E4)}, + {0x3FFF, LIT64(0xD06333DAEF2B2595)}, + {0x3FFF, LIT64(0xD2A81D91F12AE45A)}, + {0x3FFF, LIT64(0xD4F35AABCFEDFA1F)}, + {0x3FFF, LIT64(0xD744FCCAD69D6AF4)}, + {0x3FFF, LIT64(0xD99D15C278AFD7B6)}, + {0x3FFF, LIT64(0xDBFBB797DAF23755)}, + {0x3FFF, LIT64(0xDE60F4825E0E9124)}, + {0x3FFF, LIT64(0xE0CCDEEC2A94E111)}, + {0x3FFF, LIT64(0xE33F8972BE8A5A51)}, + {0x3FFF, LIT64(0xE5B906E77C8348A8)}, + {0x3FFF, LIT64(0xE8396A503C4BDC68)}, + {0x3FFF, LIT64(0xEAC0C6E7DD24392F)}, + {0x3FFF, LIT64(0xED4F301ED9942B84)}, + {0x3FFF, LIT64(0xEFE4B99BDCDAF5CB)}, + {0x3FFF, LIT64(0xF281773C59FFB13A)}, + {0x3FFF, LIT64(0xF5257D152486CC2C)}, + {0x3FFF, LIT64(0xF7D0DF730AD13BB9)}, + {0x3FFF, LIT64(0xFA83B2DB722A033A)}, + {0x3FFF, LIT64(0xFD3E0C0CF486C175)} +}; + + +static const float32 exp2_tbl2[64] = { + 0x3F738000, 0x3FBEF7CA, 0x3FBDF8A9, 0x3FBCD7C9, + 0xBFBDE8DA, 0x3FBDE85C, 0x3FBEBBF1, 0x3FBB80CA, + 0xBFBA8373, 0xBFBE9670, 0x3FBDB700, 0x3FBEEEB0, + 0x3FBBFD6D, 0xBFBDB319, 0x3FBDBA2B, 0x3FBE91D5, + 0x3FBE8D5A, 0xBFBCDE7B, 0xBFBEBAAF, 0xBFBD86DA, + 0xBFBEBEDD, 0x3FBCC96E, 0xBFBEC90B, 0x3FBBD1DB, + 0x3FBCE5EB, 0xBFBEC274, 0x3FBEA83C, 0x3FBECB00, + 0x3FBE9301, 0xBFBD8367, 0xBFBEF05F, 0x3FBDFB3C, + 0x3FBEB2FB, 0x3FBAE2CB, 0x3FBCDC3C, 0x3FBEE9AA, + 0xBFBEAEFD, 0xBFBCBF51, 0x3FBEF88A, 0x3FBD83B2, + 0x3FBDF8AB, 0xBFBDFB17, 0xBFBEFE3C, 0xBFBBB6F8, + 0xBFBCEE53, 0xBFBDA4AE, 0x3FBC9124, 0x3FBEB243, + 0x3FBDE69A, 0xBFB8BC61, 0x3FBDF610, 0xBFBD8BE1, + 0x3FBACB12, 0x3FBB9BFE, 0x3FBCF2F4, 0x3FBEF22F, + 0xBFBDBF4A, 0x3FBEC01A, 0x3FBE8CAC, 0xBFBCBB3F, + 0x3FBEF73A, 0xBFB8B795, 0x3FBEF84B, 0xBFBEF581 +}; + + +static const floatx80 log_tbl[128] = { + {0x3FFE, LIT64(0xFE03F80FE03F80FE)}, + {0x3FF7, LIT64(0xFF015358833C47E2)}, + {0x3FFE, LIT64(0xFA232CF252138AC0)}, + {0x3FF9, LIT64(0xBDC8D83EAD88D549)}, + {0x3FFE, LIT64(0xF6603D980F6603DA)}, + {0x3FFA, LIT64(0x9CF43DCFF5EAFD48)}, + {0x3FFE, LIT64(0xF2B9D6480F2B9D65)}, + {0x3FFA, LIT64(0xDA16EB88CB8DF614)}, + {0x3FFE, LIT64(0xEF2EB71FC4345238)}, + {0x3FFB, LIT64(0x8B29B7751BD70743)}, + {0x3FFE, LIT64(0xEBBDB2A5C1619C8C)}, + {0x3FFB, LIT64(0xA8D839F830C1FB49)}, + {0x3FFE, LIT64(0xE865AC7B7603A197)}, + {0x3FFB, LIT64(0xC61A2EB18CD907AD)}, + {0x3FFE, LIT64(0xE525982AF70C880E)}, + {0x3FFB, LIT64(0xE2F2A47ADE3A18AF)}, + {0x3FFE, LIT64(0xE1FC780E1FC780E2)}, + {0x3FFB, LIT64(0xFF64898EDF55D551)}, + {0x3FFE, LIT64(0xDEE95C4CA037BA57)}, + {0x3FFC, LIT64(0x8DB956A97B3D0148)}, + {0x3FFE, LIT64(0xDBEB61EED19C5958)}, + {0x3FFC, LIT64(0x9B8FE100F47BA1DE)}, + {0x3FFE, LIT64(0xD901B2036406C80E)}, + {0x3FFC, LIT64(0xA9372F1D0DA1BD17)}, + {0x3FFE, LIT64(0xD62B80D62B80D62C)}, + {0x3FFC, LIT64(0xB6B07F38CE90E46B)}, + {0x3FFE, LIT64(0xD3680D3680D3680D)}, + {0x3FFC, LIT64(0xC3FD032906488481)}, + {0x3FFE, LIT64(0xD0B69FCBD2580D0B)}, + {0x3FFC, LIT64(0xD11DE0FF15AB18CA)}, + {0x3FFE, LIT64(0xCE168A7725080CE1)}, + {0x3FFC, LIT64(0xDE1433A16C66B150)}, + {0x3FFE, LIT64(0xCB8727C065C393E0)}, + {0x3FFC, LIT64(0xEAE10B5A7DDC8ADD)}, + {0x3FFE, LIT64(0xC907DA4E871146AD)}, + {0x3FFC, LIT64(0xF7856E5EE2C9B291)}, + {0x3FFE, LIT64(0xC6980C6980C6980C)}, + {0x3FFD, LIT64(0x82012CA5A68206D7)}, + {0x3FFE, LIT64(0xC4372F855D824CA6)}, + {0x3FFD, LIT64(0x882C5FCD7256A8C5)}, + {0x3FFE, LIT64(0xC1E4BBD595F6E947)}, + {0x3FFD, LIT64(0x8E44C60B4CCFD7DE)}, + {0x3FFE, LIT64(0xBFA02FE80BFA02FF)}, + {0x3FFD, LIT64(0x944AD09EF4351AF6)}, + {0x3FFE, LIT64(0xBD69104707661AA3)}, + {0x3FFD, LIT64(0x9A3EECD4C3EAA6B2)}, + {0x3FFE, LIT64(0xBB3EE721A54D880C)}, + {0x3FFD, LIT64(0xA0218434353F1DE8)}, + {0x3FFE, LIT64(0xB92143FA36F5E02E)}, + {0x3FFD, LIT64(0xA5F2FCABBBC506DA)}, + {0x3FFE, LIT64(0xB70FBB5A19BE3659)}, + {0x3FFD, LIT64(0xABB3B8BA2AD362A5)}, + {0x3FFE, LIT64(0xB509E68A9B94821F)}, + {0x3FFD, LIT64(0xB1641795CE3CA97B)}, + {0x3FFE, LIT64(0xB30F63528917C80B)}, + {0x3FFD, LIT64(0xB70475515D0F1C61)}, + {0x3FFE, LIT64(0xB11FD3B80B11FD3C)}, + {0x3FFD, LIT64(0xBC952AFEEA3D13E1)}, + {0x3FFE, LIT64(0xAF3ADDC680AF3ADE)}, + {0x3FFD, LIT64(0xC2168ED0F458BA4A)}, + {0x3FFE, LIT64(0xAD602B580AD602B6)}, + {0x3FFD, LIT64(0xC788F439B3163BF1)}, + {0x3FFE, LIT64(0xAB8F69E28359CD11)}, + {0x3FFD, LIT64(0xCCECAC08BF04565D)}, + {0x3FFE, LIT64(0xA9C84A47A07F5638)}, + {0x3FFD, LIT64(0xD24204872DD85160)}, + {0x3FFE, LIT64(0xA80A80A80A80A80B)}, + {0x3FFD, LIT64(0xD78949923BC3588A)}, + {0x3FFE, LIT64(0xA655C4392D7B73A8)}, + {0x3FFD, LIT64(0xDCC2C4B49887DACC)}, + {0x3FFE, LIT64(0xA4A9CF1D96833751)}, + {0x3FFD, LIT64(0xE1EEBD3E6D6A6B9E)}, + {0x3FFE, LIT64(0xA3065E3FAE7CD0E0)}, + {0x3FFD, LIT64(0xE70D785C2F9F5BDC)}, + {0x3FFE, LIT64(0xA16B312EA8FC377D)}, + {0x3FFD, LIT64(0xEC1F392C5179F283)}, + {0x3FFE, LIT64(0x9FD809FD809FD80A)}, + {0x3FFD, LIT64(0xF12440D3E36130E6)}, + {0x3FFE, LIT64(0x9E4CAD23DD5F3A20)}, + {0x3FFD, LIT64(0xF61CCE92346600BB)}, + {0x3FFE, LIT64(0x9CC8E160C3FB19B9)}, + {0x3FFD, LIT64(0xFB091FD38145630A)}, + {0x3FFE, LIT64(0x9B4C6F9EF03A3CAA)}, + {0x3FFD, LIT64(0xFFE97042BFA4C2AD)}, + {0x3FFE, LIT64(0x99D722DABDE58F06)}, + {0x3FFE, LIT64(0x825EFCED49369330)}, + {0x3FFE, LIT64(0x9868C809868C8098)}, + {0x3FFE, LIT64(0x84C37A7AB9A905C9)}, + {0x3FFE, LIT64(0x97012E025C04B809)}, + {0x3FFE, LIT64(0x87224C2E8E645FB7)}, + {0x3FFE, LIT64(0x95A02568095A0257)}, + {0x3FFE, LIT64(0x897B8CAC9F7DE298)}, + {0x3FFE, LIT64(0x9445809445809446)}, + {0x3FFE, LIT64(0x8BCF55DEC4CD05FE)}, + {0x3FFE, LIT64(0x92F113840497889C)}, + {0x3FFE, LIT64(0x8E1DC0FB89E125E5)}, + {0x3FFE, LIT64(0x91A2B3C4D5E6F809)}, + {0x3FFE, LIT64(0x9066E68C955B6C9B)}, + {0x3FFE, LIT64(0x905A38633E06C43B)}, + {0x3FFE, LIT64(0x92AADE74C7BE59E0)}, + {0x3FFE, LIT64(0x8F1779D9FDC3A219)}, + {0x3FFE, LIT64(0x94E9BFF615845643)}, + {0x3FFE, LIT64(0x8DDA520237694809)}, + {0x3FFE, LIT64(0x9723A1B720134203)}, + {0x3FFE, LIT64(0x8CA29C046514E023)}, + {0x3FFE, LIT64(0x995899C890EB8990)}, + {0x3FFE, LIT64(0x8B70344A139BC75A)}, + {0x3FFE, LIT64(0x9B88BDAA3A3DAE2F)}, + {0x3FFE, LIT64(0x8A42F8705669DB46)}, + {0x3FFE, LIT64(0x9DB4224FFFE1157C)}, + {0x3FFE, LIT64(0x891AC73AE9819B50)}, + {0x3FFE, LIT64(0x9FDADC268B7A12DA)}, + {0x3FFE, LIT64(0x87F78087F78087F8)}, + {0x3FFE, LIT64(0xA1FCFF17CE733BD4)}, + {0x3FFE, LIT64(0x86D905447A34ACC6)}, + {0x3FFE, LIT64(0xA41A9E8F5446FB9F)}, + {0x3FFE, LIT64(0x85BF37612CEE3C9B)}, + {0x3FFE, LIT64(0xA633CD7E6771CD8B)}, + {0x3FFE, LIT64(0x84A9F9C8084A9F9D)}, + {0x3FFE, LIT64(0xA8489E600B435A5E)}, + {0x3FFE, LIT64(0x839930523FBE3368)}, + {0x3FFE, LIT64(0xAA59233CCCA4BD49)}, + {0x3FFE, LIT64(0x828CBFBEB9A020A3)}, + {0x3FFE, LIT64(0xAC656DAE6BCC4985)}, + {0x3FFE, LIT64(0x81848DA8FAF0D277)}, + {0x3FFE, LIT64(0xAE6D8EE360BB2468)}, + {0x3FFE, LIT64(0x8080808080808081)}, + {0x3FFE, LIT64(0xB07197A23C46C654)} +}; + + +static const floatx80 pi_tbl[65] = { + {0xC004, LIT64(0xC90FDAA22168C235)}, + {0xC004, LIT64(0xC2C75BCD105D7C23)}, + {0xC004, LIT64(0xBC7EDCF7FF523611)}, + {0xC004, LIT64(0xB6365E22EE46F000)}, + {0xC004, LIT64(0xAFEDDF4DDD3BA9EE)}, + {0xC004, LIT64(0xA9A56078CC3063DD)}, + {0xC004, LIT64(0xA35CE1A3BB251DCB)}, + {0xC004, LIT64(0x9D1462CEAA19D7B9)}, + {0xC004, LIT64(0x96CBE3F9990E91A8)}, + {0xC004, LIT64(0x9083652488034B96)}, + {0xC004, LIT64(0x8A3AE64F76F80584)}, + {0xC004, LIT64(0x83F2677A65ECBF73)}, + {0xC003, LIT64(0xFB53D14AA9C2F2C2)}, + {0xC003, LIT64(0xEEC2D3A087AC669F)}, + {0xC003, LIT64(0xE231D5F66595DA7B)}, + {0xC003, LIT64(0xD5A0D84C437F4E58)}, + {0xC003, LIT64(0xC90FDAA22168C235)}, + {0xC003, LIT64(0xBC7EDCF7FF523611)}, + {0xC003, LIT64(0xAFEDDF4DDD3BA9EE)}, + {0xC003, LIT64(0xA35CE1A3BB251DCB)}, + {0xC003, LIT64(0x96CBE3F9990E91A8)}, + {0xC003, LIT64(0x8A3AE64F76F80584)}, + {0xC002, LIT64(0xFB53D14AA9C2F2C2)}, + {0xC002, LIT64(0xE231D5F66595DA7B)}, + {0xC002, LIT64(0xC90FDAA22168C235)}, + {0xC002, LIT64(0xAFEDDF4DDD3BA9EE)}, + {0xC002, LIT64(0x96CBE3F9990E91A8)}, + {0xC001, LIT64(0xFB53D14AA9C2F2C2)}, + {0xC001, LIT64(0xC90FDAA22168C235)}, + {0xC001, LIT64(0x96CBE3F9990E91A8)}, + {0xC000, LIT64(0xC90FDAA22168C235)}, + {0xBFFF, LIT64(0xC90FDAA22168C235)}, + {0x0000, LIT64(0x0000000000000000)}, + {0x3FFF, LIT64(0xC90FDAA22168C235)}, + {0x4000, LIT64(0xC90FDAA22168C235)}, + {0x4001, LIT64(0x96CBE3F9990E91A8)}, + {0x4001, LIT64(0xC90FDAA22168C235)}, + {0x4001, LIT64(0xFB53D14AA9C2F2C2)}, + {0x4002, LIT64(0x96CBE3F9990E91A8)}, + {0x4002, LIT64(0xAFEDDF4DDD3BA9EE)}, + {0x4002, LIT64(0xC90FDAA22168C235)}, + {0x4002, LIT64(0xE231D5F66595DA7B)}, + {0x4002, LIT64(0xFB53D14AA9C2F2C2)}, + {0x4003, LIT64(0x8A3AE64F76F80584)}, + {0x4003, LIT64(0x96CBE3F9990E91A8)}, + {0x4003, LIT64(0xA35CE1A3BB251DCB)}, + {0x4003, LIT64(0xAFEDDF4DDD3BA9EE)}, + {0x4003, LIT64(0xBC7EDCF7FF523611)}, + {0x4003, LIT64(0xC90FDAA22168C235)}, + {0x4003, LIT64(0xD5A0D84C437F4E58)}, + {0x4003, LIT64(0xE231D5F66595DA7B)}, + {0x4003, LIT64(0xEEC2D3A087AC669F)}, + {0x4003, LIT64(0xFB53D14AA9C2F2C2)}, + {0x4004, LIT64(0x83F2677A65ECBF73)}, + {0x4004, LIT64(0x8A3AE64F76F80584)}, + {0x4004, LIT64(0x9083652488034B96)}, + {0x4004, LIT64(0x96CBE3F9990E91A8)}, + {0x4004, LIT64(0x9D1462CEAA19D7B9)}, + {0x4004, LIT64(0xA35CE1A3BB251DCB)}, + {0x4004, LIT64(0xA9A56078CC3063DD)}, + {0x4004, LIT64(0xAFEDDF4DDD3BA9EE)}, + {0x4004, LIT64(0xB6365E22EE46F000)}, + {0x4004, LIT64(0xBC7EDCF7FF523611)}, + {0x4004, LIT64(0xC2C75BCD105D7C23)}, + {0x4004, LIT64(0xC90FDAA22168C235)} +}; + + +static const float32 pi_tbl2[65] = { + 0x21800000, 0xA0D00000, 0xA1E80000, 0x21480000, + 0xA1200000, 0x21FC0000, 0x21100000, 0xA1580000, + 0x21E00000, 0x20B00000, 0xA1880000, 0x21C40000, + 0x20000000, 0x21380000, 0xA1300000, 0x9FC00000, + 0x21000000, 0xA1680000, 0xA0A00000, 0x20900000, + 0x21600000, 0xA1080000, 0x1F800000, 0xA0B00000, + 0x20800000, 0xA0200000, 0x20E00000, 0x1F000000, + 0x20000000, 0x20600000, 0x1F800000, 0x1F000000, + 0x00000000, + 0x9F000000, 0x9F800000, 0xA0600000, 0xA0000000, + 0x9F000000, 0xA0E00000, 0x20200000, 0xA0800000, + 0x20B00000, 0x9F800000, 0x21080000, 0xA1600000, + 0xA0900000, 0x20A00000, 0x21680000, 0xA1000000, + 0x1FC00000, 0x21300000, 0xA1380000, 0xA0000000, + 0xA1C40000, 0x21880000, 0xA0B00000, 0xA1E00000, + 0x21580000, 0xA1100000, 0xA1FC0000, 0x21200000, + 0xA1480000, 0x21E80000, 0x20D00000, 0xA1800000 +}; -- 2.39.5