6 #include "softfloat/softfloat.h"
12 * The code in this source file is derived from release 2a of the SoftFloat
13 * IEC/IEEE Floating-point Arithmetic Package. Those parts of the code (and
14 * some later contributions) are provided under that license, as detailed below.
15 * It has subsequently been modified by contributors to the QEMU Project,
16 * so some portions are provided under:
17 * the SoftFloat-2a license
21 * Any future contributions to this file after December 1st 2014 will be
22 * taken to be licensed under the Softfloat-2a license unless specifically
23 * indicated otherwise.
27 ===============================================================================
28 This C source file is part of the SoftFloat IEC/IEEE Floating-point
29 Arithmetic Package, Release 2a.
31 Written by John R. Hauser. This work was made possible in part by the
32 International Computer Science Institute, located at Suite 600, 1947 Center
33 Street, Berkeley, California 94704. Funding was partially provided by the
34 National Science Foundation under grant MIP-9311980. The original version
35 of this code was written as part of a project to build a fixed-point vector
36 processor in collaboration with the University of California at Berkeley,
37 overseen by Profs. Nelson Morgan and John Wawrzynek. More information
38 is available through the Web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
39 arithmetic/SoftFloat.html'.
41 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort
42 has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
43 TIMES RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO
44 PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
45 AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
47 Derivative works are acceptable, even for commercial purposes, so long as
48 (1) they include prominent notice that the work is derivative, and (2) they
49 include prominent notice akin to these four paragraphs for those parts of
50 this code that are retained.
52 ===============================================================================
56 * Copyright (c) 2006, Fabrice Bellard
57 * All rights reserved.
59 * Redistribution and use in source and binary forms, with or without
60 * modification, are permitted provided that the following conditions are met:
62 * 1. Redistributions of source code must retain the above copyright notice,
63 * this list of conditions and the following disclaimer.
65 * 2. Redistributions in binary form must reproduce the above copyright notice,
66 * this list of conditions and the following disclaimer in the documentation
67 * and/or other materials provided with the distribution.
69 * 3. Neither the name of the copyright holder nor the names of its contributors
70 * may be used to endorse or promote products derived from this software without
71 * specific prior written permission.
73 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
74 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
75 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
76 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
77 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
78 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
79 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
80 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
81 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
82 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
83 * THE POSSIBILITY OF SUCH DAMAGE.
86 /* Portions of this work are licensed under the terms of the GNU GPL,
87 * version 2 or later. See the COPYING file in the top-level directory.
90 /* We only need stdlib for abort() */
92 /*----------------------------------------------------------------------------
93 | Primitive arithmetic functions, including multi-word arithmetic, and
94 | division and square root approximations. (Can be specialized to target if
96 *----------------------------------------------------------------------------*/
97 #include "softfloat-macros.h"
99 /*----------------------------------------------------------------------------
100 | Variables for storing sign, exponent and significand of internal extended
101 | double-precision floating-point value for external use.
102 *----------------------------------------------------------------------------*/
103 flag floatx80_internal_sign = 0;
104 int32_t floatx80_internal_exp = 0;
105 uint64_t floatx80_internal_sig = 0;
106 int32_t floatx80_internal_exp0 = 0;
107 uint64_t floatx80_internal_sig0 = 0;
108 uint64_t floatx80_internal_sig1 = 0;
109 int8_t floatx80_internal_precision = 80;
110 int8_t floatx80_internal_mode = float_round_nearest_even;
112 /*----------------------------------------------------------------------------
113 | Functions for storing sign, exponent and significand of extended
114 | double-precision floating-point intermediate result for external use.
115 *----------------------------------------------------------------------------*/
116 floatx80 roundSaveFloatx80Internal( int8_t roundingPrecision, flag zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1, float_status *status )
118 uint64_t roundIncrement, roundMask, roundBits;
121 if ( roundingPrecision == 80 ) {
123 } else if ( roundingPrecision == 64 ) {
124 roundIncrement = LIT64( 0x0000000000000400 );
125 roundMask = LIT64( 0x00000000000007FF );
126 } else if ( roundingPrecision == 32 ) {
127 roundIncrement = LIT64( 0x0000008000000000 );
128 roundMask = LIT64( 0x000000FFFFFFFFFF );
133 zSig0 |= ( zSig1 != 0 );
134 if ( status->float_rounding_mode != float_round_nearest_even ) {
135 if ( status->float_rounding_mode == float_round_to_zero ) {
138 roundIncrement = roundMask;
140 if ( status->float_rounding_mode == float_round_up ) roundIncrement = 0;
142 if ( status->float_rounding_mode == float_round_down ) roundIncrement = 0;
147 roundBits = zSig0 & roundMask;
149 zSig0 += roundIncrement;
150 if ( zSig0 < roundIncrement ) {
152 zSig0 = LIT64( 0x8000000000000000 );
154 roundIncrement = roundMask + 1;
155 if ( status->float_rounding_mode == float_round_nearest_even && ( roundBits<<1 == roundIncrement ) ) {
156 roundMask |= roundIncrement;
158 zSig0 &= ~ roundMask;
159 if ( zSig0 == 0 ) zExp = 0;
160 return packFloatx80( zSign, zExp, zSig0 );
163 increment = ( (int64_t) zSig1 < 0 );
164 if ( status->float_rounding_mode != float_round_nearest_even ) {
165 if ( status->float_rounding_mode == float_round_to_zero ) {
169 increment = ( status->float_rounding_mode == float_round_down ) && zSig1;
171 increment = ( status->float_rounding_mode == float_round_up ) && zSig1;
179 zSig0 = LIT64( 0x8000000000000000 );
181 if ((zSig1 << 1) == 0 && status->float_rounding_mode == float_round_nearest_even)
185 if ( zSig0 == 0 ) zExp = 0;
187 return packFloatx80( zSign, zExp, zSig0 );
190 static void saveFloatx80Internal( int8_t prec, flag zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1, float_status *status )
192 floatx80_internal_sign = zSign;
193 floatx80_internal_exp = zExp;
194 floatx80_internal_sig0 = zSig0;
195 floatx80_internal_sig1 = zSig1;
196 floatx80_internal_precision = prec;
197 floatx80_internal_mode = status->float_rounding_mode;
200 static void saveFloat64Internal( flag zSign, int16_t zExp, uint64_t zSig, float_status *status )
202 floatx80_internal_sign = zSign;
203 floatx80_internal_exp = zExp + 0x3C01;
204 floatx80_internal_sig0 = zSig<<1;
205 floatx80_internal_sig1 = 0;
206 floatx80_internal_precision = 64;
207 floatx80_internal_mode = status->float_rounding_mode;
210 static void saveFloat32Internal( flag zSign, int16_t zExp, uint32_t zSig, float_status *status )
212 floatx80 z = roundSaveFloatx80Internal( 32, zSign, zExp + 0x3F81, ( (uint64_t) zSig )<<33, 0, status );
214 floatx80_internal_sign = zSign;
215 floatx80_internal_exp = extractFloatx80Exp( z );
216 floatx80_internal_sig = extractFloatx80Frac( z );
217 floatx80_internal_exp0 = zExp + 0x3F81;
218 floatx80_internal_sig0 = ( (uint64_t) zSig )<<33;
219 floatx80_internal_sig1 = 0;
222 /*----------------------------------------------------------------------------
223 | Functions for returning sign, exponent and significand of extended
224 | double-precision floating-point intermediate result for external use.
225 *----------------------------------------------------------------------------*/
227 void getRoundedFloatInternal( int8_t roundingPrecision, flag *pzSign, int32_t *pzExp, uint64_t *pzSig )
229 uint64_t roundIncrement, roundMask, roundBits;
232 flag zSign = floatx80_internal_sign;
233 int32_t zExp = floatx80_internal_exp;
234 uint64_t zSig0 = floatx80_internal_sig0;
235 uint64_t zSig1 = floatx80_internal_sig1;
237 if ( roundingPrecision == 80 ) {
239 } else if ( roundingPrecision == 64 ) {
240 roundIncrement = LIT64( 0x0000000000000400 );
241 roundMask = LIT64( 0x00000000000007FF );
242 } else if ( roundingPrecision == 32 ) {
243 roundIncrement = LIT64( 0x0000008000000000 );
244 roundMask = LIT64( 0x000000FFFFFFFFFF );
249 zSig0 |= ( zSig1 != 0 );
250 if ( floatx80_internal_mode != float_round_nearest_even ) {
251 if ( floatx80_internal_mode == float_round_to_zero ) {
254 roundIncrement = roundMask;
256 if ( floatx80_internal_mode == float_round_up ) roundIncrement = 0;
258 if ( floatx80_internal_mode == float_round_down ) roundIncrement = 0;
263 roundBits = zSig0 & roundMask;
265 zSig0 += roundIncrement;
266 if ( zSig0 < roundIncrement ) {
268 zSig0 = LIT64( 0x8000000000000000 );
270 roundIncrement = roundMask + 1;
271 if ( floatx80_internal_mode == float_round_nearest_even && ( roundBits<<1 == roundIncrement ) ) {
272 roundMask |= roundIncrement;
274 zSig0 &= ~ roundMask;
275 if ( zSig0 == 0 ) zExp = 0;
283 increment = ( (int64_t) zSig1 < 0 );
284 if ( floatx80_internal_mode != float_round_nearest_even ) {
285 if ( floatx80_internal_mode == float_round_to_zero ) {
289 increment = ( floatx80_internal_mode == float_round_down ) && zSig1;
291 increment = ( floatx80_internal_mode == float_round_up ) && zSig1;
299 zSig0 = LIT64( 0x8000000000000000 );
301 if ((zSig1 << 1) == 0 && floatx80_internal_mode == float_round_nearest_even)
305 if ( zSig0 == 0 ) zExp = 0;
313 floatx80 getFloatInternalOverflow( void )
319 getRoundedFloatInternal( floatx80_internal_precision, &zSign, &zExp, &zSig );
321 if (zExp > (0x7fff + 0x6000)) { // catastrophic
327 return packFloatx80( zSign, zExp, zSig );
331 floatx80 getFloatInternalUnderflow( void )
337 getRoundedFloatInternal( floatx80_internal_precision, &zSign, &zExp, &zSig );
339 if (zExp < (0x0000 - 0x6000)) { // catastrophic
345 return packFloatx80( zSign, zExp, zSig );
349 floatx80 getFloatInternalRoundedAll( void )
353 uint64_t zSig, zSig32, zSig64, zSig80;
355 if (floatx80_internal_precision == 80) {
356 getRoundedFloatInternal( 80, &zSign, &zExp, &zSig80 );
358 } else if (floatx80_internal_precision == 64) {
359 getRoundedFloatInternal( 80, &zSign, &zExp, &zSig80 );
360 getRoundedFloatInternal( 64, &zSign, &zExp, &zSig64 );
362 zSig |= zSig80 & LIT64( 0x00000000000007FF );
364 getRoundedFloatInternal( 80, &zSign, &zExp, &zSig80 );
365 getRoundedFloatInternal( 64, &zSign, &zExp, &zSig64 );
366 getRoundedFloatInternal( 32, &zSign, &zExp, &zSig32 );
368 zSig |= zSig64 & LIT64( 0x000000FFFFFFFFFF );
369 zSig |= zSig80 & LIT64( 0x00000000000007FF );
372 return packFloatx80( zSign, zExp & 0x7FFF, zSig );
376 floatx80 getFloatInternalRoundedSome( void )
380 uint64_t zSig, zSig32, zSig64, zSig80;
382 if (floatx80_internal_precision == 80) {
383 getRoundedFloatInternal( 80, &zSign, &zExp, &zSig80 );
385 } else if (floatx80_internal_precision == 64) {
386 getRoundedFloatInternal( 64, &zSign, &zExp, &zSig64 );
387 zSig80 = floatx80_internal_sig0;
388 if (zSig64 != (zSig80 & LIT64( 0xFFFFFFFFFFFFF800 ))) {
392 zSig |= zSig80 & LIT64( 0x00000000000007FF );
394 getRoundedFloatInternal( 32, &zSign, &zExp, &zSig32 );
395 zSig80 = floatx80_internal_sig0;
396 if (zSig32 != (zSig80 & LIT64( 0xFFFFFF0000000000 ))) {
400 zSig |= zSig80 & LIT64( 0x000000FFFFFFFFFF );
403 return packFloatx80( zSign, zExp & 0x7FFF, zSig );
407 floatx80 getFloatInternalFloatx80( void )
413 getRoundedFloatInternal( 80, &zSign, &zExp, &zSig );
415 return packFloatx80( zSign, zExp & 0x7FFF, zSig );
419 floatx80 getFloatInternalUnrounded( void )
421 flag zSign = floatx80_internal_sign;
422 int32_t zExp = floatx80_internal_exp;
423 uint64_t zSig = floatx80_internal_sig0;
425 return packFloatx80( zSign, zExp & 0x7FFF, zSig );
429 uint64_t getFloatInternalGRS( void )
432 if (floatx80_internal_sig1)
435 if (floatx80_internal_precision == 64 &&
436 floatx80_internal_sig0 & LIT64( 0x00000000000007FF )) {
439 if (floatx80_internal_precision == 32 &&
440 floatx80_internal_sig0 & LIT64( 0x000000FFFFFFFFFF )) {
447 shift64RightJamming(floatx80_internal_sig1, 61, &roundbits);
453 /*----------------------------------------------------------------------------
454 | Functions and definitions to determine: (1) whether tininess for underflow
455 | is detected before or after rounding by default, (2) what (if anything)
456 | happens when exceptions are raised, (3) how signaling NaNs are distinguished
457 | from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
458 | are propagated from function inputs to output. These details are target-
460 *----------------------------------------------------------------------------*/
461 #include "softfloat-specialize.h"
463 /*----------------------------------------------------------------------------
464 | Raises the exceptions specified by `flags'. Floating-point traps can be
465 | defined here if desired. It is currently not possible for such a trap
466 | to substitute a result value. If traps are not implemented, this routine
467 | should be simply `float_exception_flags |= flags;'.
468 *----------------------------------------------------------------------------*/
470 void float_raise(uint8_t flags, float_status *status)
472 status->float_exception_flags |= flags;
476 /*----------------------------------------------------------------------------
477 | Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
478 | and 7, and returns the properly rounded 32-bit integer corresponding to the
479 | input. If `zSign' is 1, the input is negated before being converted to an
480 | integer. Bit 63 of `absZ' must be zero. Ordinarily, the fixed-point input
481 | is simply rounded to an integer, with the inexact exception raised if the
482 | input cannot be represented exactly as an integer. However, if the fixed-
483 | point input is too large, the invalid exception is raised and the largest
484 | positive or negative integer is returned.
485 *----------------------------------------------------------------------------*/
487 static int32_t roundAndPackInt32(flag zSign, uint64_t absZ, float_status *status)
490 flag roundNearestEven;
491 int8_t roundIncrement, roundBits;
494 roundingMode = status->float_rounding_mode;
495 roundNearestEven = ( roundingMode == float_round_nearest_even );
496 switch (roundingMode) {
497 case float_round_nearest_even:
498 case float_round_ties_away:
499 roundIncrement = 0x40;
501 case float_round_to_zero:
505 roundIncrement = zSign ? 0 : 0x7f;
507 case float_round_down:
508 roundIncrement = zSign ? 0x7f : 0;
513 roundBits = absZ & 0x7F;
514 absZ = ( absZ + roundIncrement )>>7;
515 absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
517 if ( zSign ) z = - z;
518 if ( ( absZ>>32 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
519 float_raise(float_flag_invalid, status);
520 return zSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
523 status->float_exception_flags |= float_flag_inexact;
530 #ifdef SOFTFLOAT_68K // 30-01-2017: Added for Previous
531 static int16_t roundAndPackInt16( flag zSign, uint64_t absZ, float_status *status )
534 flag roundNearestEven;
535 int8_t roundIncrement, roundBits;
538 roundingMode = status->float_rounding_mode;
539 roundNearestEven = ( roundingMode == float_round_nearest_even );
540 roundIncrement = 0x40;
541 if ( ! roundNearestEven ) {
542 if ( roundingMode == float_round_to_zero ) {
546 roundIncrement = 0x7F;
548 if ( roundingMode == float_round_up ) roundIncrement = 0;
551 if ( roundingMode == float_round_down ) roundIncrement = 0;
555 roundBits = absZ & 0x7F;
556 absZ = ( absZ + roundIncrement )>>7;
557 absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
559 if ( zSign ) z = - z;
561 if ( ( absZ>>16 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
562 float_raise( float_flag_invalid, status );
563 return zSign ? (int16_t) 0x8000 : 0x7FFF;
565 if ( roundBits ) status->float_exception_flags |= float_flag_inexact;
570 static int8_t roundAndPackInt8( flag zSign, uint64_t absZ, float_status *status )
573 flag roundNearestEven;
574 int8_t roundIncrement, roundBits;
577 roundingMode = status->float_rounding_mode;
578 roundNearestEven = ( roundingMode == float_round_nearest_even );
579 roundIncrement = 0x40;
580 if ( ! roundNearestEven ) {
581 if ( roundingMode == float_round_to_zero ) {
585 roundIncrement = 0x7F;
587 if ( roundingMode == float_round_up ) roundIncrement = 0;
590 if ( roundingMode == float_round_down ) roundIncrement = 0;
594 roundBits = absZ & 0x7F;
595 absZ = ( absZ + roundIncrement )>>7;
596 absZ &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
598 if ( zSign ) z = - z;
600 if ( ( absZ>>8 ) || ( z && ( ( z < 0 ) ^ zSign ) ) ) {
601 float_raise( float_flag_invalid, status );
602 return zSign ? (int8_t) 0x80 : 0x7F;
604 if ( roundBits ) status->float_exception_flags |= float_flag_inexact;
608 #endif // End of addition for Previous
610 /*----------------------------------------------------------------------------
611 | Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
612 | `absZ1', with binary point between bits 63 and 64 (between the input words),
613 | and returns the properly rounded 64-bit integer corresponding to the input.
614 | If `zSign' is 1, the input is negated before being converted to an integer.
615 | Ordinarily, the fixed-point input is simply rounded to an integer, with
616 | the inexact exception raised if the input cannot be represented exactly as
617 | an integer. However, if the fixed-point input is too large, the invalid
618 | exception is raised and the largest positive or negative integer is
620 *----------------------------------------------------------------------------*/
622 static int64_t roundAndPackInt64(flag zSign, uint64_t absZ0, uint64_t absZ1,
623 float_status *status)
626 flag roundNearestEven, increment;
629 roundingMode = status->float_rounding_mode;
630 roundNearestEven = ( roundingMode == float_round_nearest_even );
631 switch (roundingMode) {
632 case float_round_nearest_even:
633 case float_round_ties_away:
634 increment = ((int64_t) absZ1 < 0);
636 case float_round_to_zero:
640 increment = !zSign && absZ1;
642 case float_round_down:
643 increment = zSign && absZ1;
650 if ( absZ0 == 0 ) goto overflow;
651 absZ0 &= ~ ( ( (uint64_t) ( absZ1<<1 ) == 0 ) & roundNearestEven );
654 if ( zSign ) z = - z;
655 if ( z && ( ( z < 0 ) ^ zSign ) ) {
657 float_raise(float_flag_invalid, status);
659 zSign ? (uint64_t) LIT64( 0x8000000000000000 )
660 : LIT64( 0x7FFFFFFFFFFFFFFF );
663 status->float_exception_flags |= float_flag_inexact;
669 /*----------------------------------------------------------------------------
670 | Returns the fraction bits of the single-precision floating-point value `a'.
671 *----------------------------------------------------------------------------*/
673 static inline uint32_t extractFloat32Frac( float32 a )
676 return float32_val(a) & 0x007FFFFF;
680 /*----------------------------------------------------------------------------
681 | Returns the exponent bits of the single-precision floating-point value `a'.
682 *----------------------------------------------------------------------------*/
684 static inline int extractFloat32Exp(float32 a)
687 return ( float32_val(a)>>23 ) & 0xFF;
691 /*----------------------------------------------------------------------------
692 | Returns the sign bit of the single-precision floating-point value `a'.
693 *----------------------------------------------------------------------------*/
695 static inline flag extractFloat32Sign( float32 a )
698 return float32_val(a)>>31;
702 /*----------------------------------------------------------------------------
703 | Normalizes the subnormal single-precision floating-point value represented
704 | by the denormalized significand `aSig'. The normalized exponent and
705 | significand are stored at the locations pointed to by `zExpPtr' and
706 | `zSigPtr', respectively.
707 *----------------------------------------------------------------------------*/
710 normalizeFloat32Subnormal(uint32_t aSig, int *zExpPtr, uint32_t *zSigPtr)
714 shiftCount = countLeadingZeros32( aSig ) - 8;
715 *zSigPtr = aSig<<shiftCount;
716 *zExpPtr = 1 - shiftCount;
720 /*----------------------------------------------------------------------------
721 | Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
722 | single-precision floating-point value, returning the result. After being
723 | shifted into the proper positions, the three fields are simply added
724 | together to form the result. This means that any integer portion of `zSig'
725 | will be added into the exponent. Since a properly normalized significand
726 | will have an integer portion equal to 1, the `zExp' input should be 1 less
727 | than the desired result exponent whenever `zSig' is a complete, normalized
729 *----------------------------------------------------------------------------*/
731 static inline float32 packFloat32(flag zSign, int zExp, uint32_t zSig)
735 ( ( (uint32_t) zSign )<<31 ) + ( ( (uint32_t) zExp )<<23 ) + zSig);
739 /*----------------------------------------------------------------------------
740 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
741 | and significand `zSig', and returns the proper single-precision floating-
742 | point value corresponding to the abstract input. Ordinarily, the abstract
743 | value is simply rounded and packed into the single-precision format, with
744 | the inexact exception raised if the abstract input cannot be represented
745 | exactly. However, if the abstract value is too large, the overflow and
746 | inexact exceptions are raised and an infinity or maximal finite value is
747 | returned. If the abstract value is too small, the input value is rounded to
748 | a subnormal number, and the underflow and inexact exceptions are raised if
749 | the abstract input cannot be represented exactly as a subnormal single-
750 | precision floating-point number.
751 | The input significand `zSig' has its binary point between bits 30
752 | and 29, which is 7 bits to the left of the usual location. This shifted
753 | significand must be normalized or smaller. If `zSig' is not normalized,
754 | `zExp' must be 0; in that case, the result returned is a subnormal number,
755 | and it must not require rounding. In the usual case that `zSig' is
756 | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
757 | The handling of underflow and overflow follows the IEC/IEEE Standard for
758 | Binary Floating-Point Arithmetic.
759 *----------------------------------------------------------------------------*/
761 static float32 roundAndPackFloat32(flag zSign, int zExp, uint32_t zSig,
762 float_status *status)
765 flag roundNearestEven;
766 int8_t roundIncrement, roundBits;
769 roundingMode = status->float_rounding_mode;
770 roundNearestEven = ( roundingMode == float_round_nearest_even );
771 switch (roundingMode) {
772 case float_round_nearest_even:
773 case float_round_ties_away:
774 roundIncrement = 0x40;
776 case float_round_to_zero:
780 roundIncrement = zSign ? 0 : 0x7f;
782 case float_round_down:
783 roundIncrement = zSign ? 0x7f : 0;
789 roundBits = zSig & 0x7F;
790 if ( 0xFD <= (uint16_t) zExp ) {
792 || ( ( zExp == 0xFD )
793 && ( (int32_t) ( zSig + roundIncrement ) < 0 ) )
796 float_raise( float_flag_overflow, status );
797 saveFloat32Internal( zSign, zExp, zSig, status );
798 if ( roundBits ) float_raise( float_flag_inexact, status );
800 float_raise(float_flag_overflow | float_flag_inexact, status);
802 return packFloat32( zSign, 0xFF, - ( roundIncrement == 0 ));
805 if (status->flush_to_zero) {
806 //float_raise(float_flag_output_denormal, status);
807 return packFloat32(zSign, 0, 0);
810 (status->float_detect_tininess
811 == float_tininess_before_rounding)
813 || ( zSig + roundIncrement < 0x80000000 );
816 float_raise( float_flag_underflow, status );
817 saveFloat32Internal( zSign, zExp, zSig, status );
820 shift32RightJamming( zSig, - zExp, &zSig );
822 roundBits = zSig & 0x7F;
823 #ifndef SOFTFLOAT_68K
824 if (isTiny && roundBits)
825 float_raise(float_flag_underflow, status);
830 status->float_exception_flags |= float_flag_inexact;
832 zSig = ( zSig + roundIncrement )>>7;
833 zSig &= ~ ( ( ( roundBits ^ 0x40 ) == 0 ) & roundNearestEven );
834 if ( zSig == 0 ) zExp = 0;
835 return packFloat32( zSign, zExp, zSig );
839 /*----------------------------------------------------------------------------
840 | Returns the fraction bits of the double-precision floating-point value `a'.
841 *----------------------------------------------------------------------------*/
843 static inline uint64_t extractFloat64Frac( float64 a )
846 return float64_val(a) & LIT64( 0x000FFFFFFFFFFFFF );
850 /*----------------------------------------------------------------------------
851 | Returns the exponent bits of the double-precision floating-point value `a'.
852 *----------------------------------------------------------------------------*/
854 static inline int extractFloat64Exp(float64 a)
857 return ( float64_val(a)>>52 ) & 0x7FF;
861 /*----------------------------------------------------------------------------
862 | Returns the sign bit of the double-precision floating-point value `a'.
863 *----------------------------------------------------------------------------*/
865 static inline flag extractFloat64Sign( float64 a )
868 return float64_val(a)>>63;
872 /*----------------------------------------------------------------------------
873 | If `a' is denormal and we are in flush-to-zero mode then set the
874 | input-denormal exception and return zero. Otherwise just return the value.
875 *----------------------------------------------------------------------------*/
876 float64 float64_squash_input_denormal(float64 a, float_status *status)
878 if (status->flush_inputs_to_zero) {
879 if (extractFloat64Exp(a) == 0 && extractFloat64Frac(a) != 0) {
880 //float_raise(float_flag_input_denormal, status);
881 return make_float64(float64_val(a) & (1ULL << 63));
887 /*----------------------------------------------------------------------------
888 | Normalizes the subnormal double-precision floating-point value represented
889 | by the denormalized significand `aSig'. The normalized exponent and
890 | significand are stored at the locations pointed to by `zExpPtr' and
891 | `zSigPtr', respectively.
892 *----------------------------------------------------------------------------*/
895 normalizeFloat64Subnormal(uint64_t aSig, int *zExpPtr, uint64_t *zSigPtr)
899 shiftCount = countLeadingZeros64( aSig ) - 11;
900 *zSigPtr = aSig<<shiftCount;
901 *zExpPtr = 1 - shiftCount;
905 /*----------------------------------------------------------------------------
906 | Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
907 | double-precision floating-point value, returning the result. After being
908 | shifted into the proper positions, the three fields are simply added
909 | together to form the result. This means that any integer portion of `zSig'
910 | will be added into the exponent. Since a properly normalized significand
911 | will have an integer portion equal to 1, the `zExp' input should be 1 less
912 | than the desired result exponent whenever `zSig' is a complete, normalized
914 *----------------------------------------------------------------------------*/
916 static inline float64 packFloat64(flag zSign, int zExp, uint64_t zSig)
920 ( ( (uint64_t) zSign )<<63 ) + ( ( (uint64_t) zExp )<<52 ) + zSig);
924 /*----------------------------------------------------------------------------
925 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
926 | and significand `zSig', and returns the proper double-precision floating-
927 | point value corresponding to the abstract input. Ordinarily, the abstract
928 | value is simply rounded and packed into the double-precision format, with
929 | the inexact exception raised if the abstract input cannot be represented
930 | exactly. However, if the abstract value is too large, the overflow and
931 | inexact exceptions are raised and an infinity or maximal finite value is
932 | returned. If the abstract value is too small, the input value is rounded to
933 | a subnormal number, and the underflow and inexact exceptions are raised if
934 | the abstract input cannot be represented exactly as a subnormal double-
935 | precision floating-point number.
936 | The input significand `zSig' has its binary point between bits 62
937 | and 61, which is 10 bits to the left of the usual location. This shifted
938 | significand must be normalized or smaller. If `zSig' is not normalized,
939 | `zExp' must be 0; in that case, the result returned is a subnormal number,
940 | and it must not require rounding. In the usual case that `zSig' is
941 | normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
942 | The handling of underflow and overflow follows the IEC/IEEE Standard for
943 | Binary Floating-Point Arithmetic.
944 *----------------------------------------------------------------------------*/
946 static float64 roundAndPackFloat64(flag zSign, int zExp, uint64_t zSig,
947 float_status *status)
950 flag roundNearestEven;
951 int roundIncrement, roundBits;
954 roundingMode = status->float_rounding_mode;
955 roundNearestEven = ( roundingMode == float_round_nearest_even );
956 switch (roundingMode) {
957 case float_round_nearest_even:
958 case float_round_ties_away:
959 roundIncrement = 0x200;
961 case float_round_to_zero:
965 roundIncrement = zSign ? 0 : 0x3ff;
967 case float_round_down:
968 roundIncrement = zSign ? 0x3ff : 0;
973 roundBits = zSig & 0x3FF;
974 if ( 0x7FD <= (uint16_t) zExp ) {
975 if ( ( 0x7FD < zExp )
976 || ( ( zExp == 0x7FD )
977 && ( (int64_t) ( zSig + roundIncrement ) < 0 ) )
980 float_raise( float_flag_overflow, status );
981 saveFloat64Internal( zSign, zExp, zSig, status );
982 if ( roundBits ) float_raise( float_flag_inexact, status );
984 float_raise(float_flag_overflow | float_flag_inexact, status);
986 return packFloat64( zSign, 0x7FF, - ( roundIncrement == 0 ));
989 if (status->flush_to_zero) {
990 //float_raise(float_flag_output_denormal, status);
991 return packFloat64(zSign, 0, 0);
994 (status->float_detect_tininess
995 == float_tininess_before_rounding)
997 || ( zSig + roundIncrement < LIT64( 0x8000000000000000 ) );
1000 float_raise( float_flag_underflow, status );
1001 saveFloat64Internal( zSign, zExp, zSig, status );
1004 shift64RightJamming( zSig, - zExp, &zSig );
1006 roundBits = zSig & 0x3FF;
1007 #ifndef SOFTFLOAT_68K
1008 if (isTiny && roundBits)
1009 float_raise(float_flag_underflow, status);
1014 status->float_exception_flags |= float_flag_inexact;
1016 zSig = ( zSig + roundIncrement )>>10;
1017 zSig &= ~ ( ( ( roundBits ^ 0x200 ) == 0 ) & roundNearestEven );
1018 if ( zSig == 0 ) zExp = 0;
1019 return packFloat64( zSign, zExp, zSig );
1023 /*----------------------------------------------------------------------------
1024 | Returns the fraction bits of the extended double-precision floating-point
1026 *----------------------------------------------------------------------------*/
1028 uint64_t extractFloatx80Frac( floatx80 a )
1035 /*----------------------------------------------------------------------------
1036 | Returns the exponent bits of the extended double-precision floating-point
1038 *----------------------------------------------------------------------------*/
1040 int32_t extractFloatx80Exp( floatx80 a )
1043 return a.high & 0x7FFF;
1047 /*----------------------------------------------------------------------------
1048 | Returns the sign bit of the extended double-precision floating-point value
1050 *----------------------------------------------------------------------------*/
1052 flag extractFloatx80Sign( floatx80 a )
1059 /*----------------------------------------------------------------------------
1060 | Normalizes the subnormal extended double-precision floating-point value
1061 | represented by the denormalized significand `aSig'. The normalized exponent
1062 | and significand are stored at the locations pointed to by `zExpPtr' and
1063 | `zSigPtr', respectively.
1064 *----------------------------------------------------------------------------*/
1066 void normalizeFloatx80Subnormal( uint64_t aSig, int32_t *zExpPtr, uint64_t *zSigPtr )
1070 shiftCount = countLeadingZeros64( aSig );
1071 *zSigPtr = aSig<<shiftCount;
1072 #ifdef SOFTFLOAT_68K
1073 *zExpPtr = -shiftCount;
1075 *zExpPtr = 1 - shiftCount;
1079 /*----------------------------------------------------------------------------
1080 | Packs the sign `zSign', exponent `zExp', and significand `zSig' into an
1081 | extended double-precision floating-point value, returning the result.
1082 *----------------------------------------------------------------------------*/
1084 floatx80 packFloatx80( flag zSign, int32_t zExp, uint64_t zSig )
1089 z.high = ( ( (uint16_t) zSign )<<15 ) + zExp;
1094 /*----------------------------------------------------------------------------
1095 | Takes an abstract floating-point value having sign `zSign', exponent `zExp',
1096 | and extended significand formed by the concatenation of `zSig0' and `zSig1',
1097 | and returns the proper extended double-precision floating-point value
1098 | corresponding to the abstract input. Ordinarily, the abstract value is
1099 | rounded and packed into the extended double-precision format, with the
1100 | inexact exception raised if the abstract input cannot be represented
1101 | exactly. However, if the abstract value is too large, the overflow and
1102 | inexact exceptions are raised and an infinity or maximal finite value is
1103 | returned. If the abstract value is too small, the input value is rounded to
1104 | a subnormal number, and the underflow and inexact exceptions are raised if
1105 | the abstract input cannot be represented exactly as a subnormal extended
1106 | double-precision floating-point number.
1107 | If `roundingPrecision' is 32 or 64, the result is rounded to the same
1108 | number of bits as single or double precision, respectively. Otherwise, the
1109 | result is rounded to the full precision of the extended double-precision
1111 | The input significand must be normalized or smaller. If the input
1112 | significand is not normalized, `zExp' must be 0; in that case, the result
1113 | returned is a subnormal number, and it must not require rounding. The
1114 | handling of underflow and overflow follows the IEC/IEEE Standard for Binary
1115 | Floating-Point Arithmetic.
1116 *----------------------------------------------------------------------------*/
1118 #ifndef SOFTFLOAT_68K
1119 floatx80 roundAndPackFloatx80(int8_t roundingPrecision, flag zSign,
1120 int32_t zExp, uint64_t zSig0, uint64_t zSig1,
1121 float_status *status)
1123 int8_t roundingMode;
1124 flag roundNearestEven, increment, isTiny;
1125 int64_t roundIncrement, roundMask, roundBits;
1127 roundingMode = status->float_rounding_mode;
1128 roundNearestEven = ( roundingMode == float_round_nearest_even );
1129 if ( roundingPrecision == 80 ) goto precision80;
1130 if ( roundingPrecision == 64 ) {
1131 roundIncrement = LIT64( 0x0000000000000400 );
1132 roundMask = LIT64( 0x00000000000007FF );
1134 else if ( roundingPrecision == 32 ) {
1135 roundIncrement = LIT64( 0x0000008000000000 );
1136 roundMask = LIT64( 0x000000FFFFFFFFFF );
1141 zSig0 |= ( zSig1 != 0 );
1142 switch (roundingMode) {
1143 case float_round_nearest_even:
1144 case float_round_ties_away:
1146 case float_round_to_zero:
1149 case float_round_up:
1150 roundIncrement = zSign ? 0 : roundMask;
1152 case float_round_down:
1153 roundIncrement = zSign ? roundMask : 0;
1158 roundBits = zSig0 & roundMask;
1159 #ifdef SOFTFLOAT_68K
1160 if ( 0x7FFE <= (uint32_t) zExp ) {
1162 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
1164 if ( ( 0x7FFE < zExp )
1165 || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
1169 #ifdef SOFTFLOAT_68K
1174 if (status->flush_to_zero) {
1175 //float_raise(float_flag_output_denormal, status);
1176 return packFloatx80(zSign, 0, 0);
1179 (status->float_detect_tininess
1180 == float_tininess_before_rounding)
1181 #ifdef SOFTFLOAT_68K
1186 || ( zSig0 <= zSig0 + roundIncrement );
1187 #ifdef SOFTFLOAT_68K
1189 float_raise( float_flag_underflow, status );
1190 saveFloatx80Internal( zSign, zExp, zSig0, zSig1, status );
1192 shift64RightJamming( zSig0, -zExp, &zSig0 );
1194 shift64RightJamming( zSig0, 1 - zExp, &zSig0 );
1197 roundBits = zSig0 & roundMask;
1198 #ifdef SOFTFLOAT_68K
1199 if ( isTiny ) float_raise( float_flag_underflow, status );
1201 if (isTiny && roundBits) {
1202 float_raise(float_flag_underflow, status);
1206 status->float_exception_flags |= float_flag_inexact;
1208 zSig0 += roundIncrement;
1209 #ifndef SOFTFLOAT_68K
1210 if ( (int64_t) zSig0 < 0 ) zExp = 1;
1212 roundIncrement = roundMask + 1;
1213 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
1214 roundMask |= roundIncrement;
1216 zSig0 &= ~ roundMask;
1217 return packFloatx80( zSign, zExp, zSig0 );
1221 status->float_exception_flags |= float_flag_inexact;
1223 zSig0 += roundIncrement;
1224 if ( zSig0 < roundIncrement ) {
1226 zSig0 = LIT64( 0x8000000000000000 );
1228 roundIncrement = roundMask + 1;
1229 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
1230 roundMask |= roundIncrement;
1232 zSig0 &= ~ roundMask;
1233 if ( zSig0 == 0 ) zExp = 0;
1234 return packFloatx80( zSign, zExp, zSig0 );
1236 switch (roundingMode) {
1237 case float_round_nearest_even:
1238 case float_round_ties_away:
1239 increment = ((int64_t)zSig1 < 0);
1241 case float_round_to_zero:
1244 case float_round_up:
1245 increment = !zSign && zSig1;
1247 case float_round_down:
1248 increment = zSign && zSig1;
1253 #ifdef SOFTFLOAT_68K
1254 if ( 0x7FFE <= (uint32_t) zExp ) {
1256 if ( 0x7FFD <= (uint32_t) ( zExp - 1 ) ) {
1258 if ( ( 0x7FFE < zExp )
1259 || ( ( zExp == 0x7FFE )
1260 && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) )
1266 #ifndef SOFTFLOAT_68K
1267 float_raise(float_flag_overflow | float_flag_inexact, status);
1269 float_raise( float_flag_overflow, status );
1270 saveFloatx80Internal( zSign, zExp, zSig0, zSig1, status );
1271 if ( ( zSig0 & roundMask ) || zSig1 ) float_raise( float_flag_inexact, status );
1273 if ( ( roundingMode == float_round_to_zero )
1274 || ( zSign && ( roundingMode == float_round_up ) )
1275 || ( ! zSign && ( roundingMode == float_round_down ) )
1277 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
1279 return packFloatx80( zSign, 0x7FFF, floatx80_default_infinity_low );
1281 #ifdef SOFTFLOAT_68K
1287 (status->float_detect_tininess
1288 == float_tininess_before_rounding)
1289 #ifdef SOFTFLOAT_68K
1295 || ( zSig0 < LIT64( 0xFFFFFFFFFFFFFFFF ) );
1296 #ifdef SOFTFLOAT_68K
1298 float_raise( float_flag_underflow, status );
1299 saveFloatx80Internal( zSign, zExp, zSig0, zSig1, status );
1301 shift64ExtraRightJamming( zSig0, zSig1, -zExp, &zSig0, &zSig1 );
1303 shift64ExtraRightJamming( zSig0, zSig1, 1 - zExp, &zSig0, &zSig1 );
1306 #ifndef SOFTFLOAT_68K
1307 if ( isTiny && zSig1 ) float_raise( float_flag_underflow, status );
1309 if (zSig1) float_raise(float_flag_inexact, status);
1310 switch (roundingMode) {
1311 case float_round_nearest_even:
1312 case float_round_ties_away:
1313 increment = ((int64_t)zSig1 < 0);
1315 case float_round_to_zero:
1318 case float_round_up:
1319 increment = !zSign && zSig1;
1321 case float_round_down:
1322 increment = zSign && zSig1;
1330 ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven );
1331 #ifndef SOFTFLOAT_68K
1332 if ( (int64_t) zSig0 < 0 ) zExp = 1;
1335 return packFloatx80( zSign, zExp, zSig0 );
1339 status->float_exception_flags |= float_flag_inexact;
1345 zSig0 = LIT64( 0x8000000000000000 );
1348 zSig0 &= ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven );
1352 if ( zSig0 == 0 ) zExp = 0;
1354 return packFloatx80( zSign, zExp, zSig0 );
1358 #else // SOFTFLOAT_68K
1360 floatx80 roundAndPackFloatx80( int8_t roundingPrecision, flag zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1, float_status *status )
1362 int8_t roundingMode;
1363 flag roundNearestEven, increment;
1364 uint64_t roundIncrement, roundMask, roundBits;
1367 roundingMode = status->float_rounding_mode;
1368 roundNearestEven = ( roundingMode == float_round_nearest_even );
1369 if ( roundingPrecision == 80 ) goto precision80;
1370 if ( roundingPrecision == 64 ) {
1371 roundIncrement = LIT64( 0x0000000000000400 );
1372 roundMask = LIT64( 0x00000000000007FF );
1374 } else if ( roundingPrecision == 32 ) {
1375 roundIncrement = LIT64( 0x0000008000000000 );
1376 roundMask = LIT64( 0x000000FFFFFFFFFF );
1381 zSig0 |= ( zSig1 != 0 );
1382 if ( ! roundNearestEven ) {
1383 if ( roundingMode == float_round_to_zero ) {
1386 roundIncrement = roundMask;
1388 if ( roundingMode == float_round_up ) roundIncrement = 0;
1390 if ( roundingMode == float_round_down ) roundIncrement = 0;
1394 roundBits = zSig0 & roundMask;
1395 if ( ( ( 0x7FFE - expOffset ) < zExp ) ||
1396 ( ( zExp == ( 0x7FFE - expOffset ) ) && ( zSig0 + roundIncrement < zSig0 ) ) ) {
1397 float_raise( float_flag_overflow, status );
1398 saveFloatx80Internal( roundingPrecision, zSign, zExp, zSig0, zSig1, status );
1399 if ( zSig0 & roundMask ) float_raise( float_flag_inexact, status );
1400 if ( ( roundingMode == float_round_to_zero )
1401 || ( zSign && ( roundingMode == float_round_up ) )
1402 || ( ! zSign && ( roundingMode == float_round_down ) )
1404 return packFloatx80( zSign, 0x7FFE - expOffset, ~ roundMask );
1406 return packFloatx80( zSign, 0x7FFF, floatx80_default_infinity_low );
1408 if ( zExp < ( expOffset + 1 ) ) {
1409 float_raise( float_flag_underflow, status );
1410 saveFloatx80Internal( roundingPrecision, zSign, zExp, zSig0, zSig1, status );
1411 shift64RightJamming( zSig0, -( zExp - ( expOffset + 1 ) ), &zSig0 );
1412 zExp = expOffset + 1;
1413 roundBits = zSig0 & roundMask;
1414 if ( roundBits ) float_raise( float_flag_inexact, status );
1415 zSig0 += roundIncrement;
1416 roundIncrement = roundMask + 1;
1417 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
1418 roundMask |= roundIncrement;
1420 zSig0 &= ~ roundMask;
1421 return packFloatx80( zSign, zExp, zSig0 );
1424 float_raise( float_flag_inexact, status );
1425 saveFloatx80Internal( roundingPrecision, zSign, zExp, zSig0, zSig1, status);
1427 zSig0 += roundIncrement;
1428 if ( zSig0 < roundIncrement ) {
1430 zSig0 = LIT64( 0x8000000000000000 );
1432 roundIncrement = roundMask + 1;
1433 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
1434 roundMask |= roundIncrement;
1436 zSig0 &= ~ roundMask;
1437 if ( zSig0 == 0 ) zExp = 0;
1438 return packFloatx80( zSign, zExp, zSig0 );
1440 increment = ( (int64_t) zSig1 < 0 );
1441 if ( ! roundNearestEven ) {
1442 if ( roundingMode == float_round_to_zero ) {
1446 increment = ( roundingMode == float_round_down ) && zSig1;
1448 increment = ( roundingMode == float_round_up ) && zSig1;
1452 if ( 0x7FFE <= (uint32_t) zExp ) {
1453 if ( ( 0x7FFE < zExp ) ||
1454 ( ( zExp == 0x7FFE ) && ( zSig0 == LIT64( 0xFFFFFFFFFFFFFFFF ) ) && increment )
1457 float_raise( float_flag_overflow, status );
1458 saveFloatx80Internal( roundingPrecision, zSign, zExp, zSig0, zSig1, status );
1459 if ( ( zSig0 & roundMask ) || zSig1 ) float_raise( float_flag_inexact, status );
1460 if ( ( roundingMode == float_round_to_zero )
1461 || ( zSign && ( roundingMode == float_round_up ) )
1462 || ( ! zSign && ( roundingMode == float_round_down ) )
1464 return packFloatx80( zSign, 0x7FFE, ~ roundMask );
1466 return packFloatx80( zSign, 0x7FFF, floatx80_default_infinity_low );
1469 float_raise( float_flag_underflow, status );
1470 saveFloatx80Internal( roundingPrecision, zSign, zExp, zSig0, zSig1, status);
1471 shift64ExtraRightJamming( zSig0, zSig1, -zExp, &zSig0, &zSig1 );
1473 if ( zSig1 ) float_raise( float_flag_inexact, status );
1474 if ( roundNearestEven ) {
1475 increment = ( (int64_t) zSig1 < 0 );
1478 increment = ( roundingMode == float_round_down ) && zSig1;
1480 increment = ( roundingMode == float_round_up ) && zSig1;
1486 ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven );
1488 return packFloatx80( zSign, zExp, zSig0 );
1492 float_raise( float_flag_inexact, status );
1493 saveFloatx80Internal( roundingPrecision, zSign, zExp, zSig0, zSig1, status );
1499 zSig0 = LIT64( 0x8000000000000000 );
1501 zSig0 &= ~ ( ( (uint64_t) ( zSig1<<1 ) == 0 ) & roundNearestEven );
1504 if ( zSig0 == 0 ) zExp = 0;
1506 return packFloatx80( zSign, zExp, zSig0 );
1512 #ifdef SOFTFLOAT_68K // 21-01-2017: Added for Previous
1513 floatx80 roundSigAndPackFloatx80( int8_t roundingPrecision, flag zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1, float_status *status )
1515 int8_t roundingMode;
1516 flag roundNearestEven, isTiny;
1517 uint64_t roundIncrement, roundMask, roundBits;
1519 roundingMode = status->float_rounding_mode;
1520 roundNearestEven = ( roundingMode == float_round_nearest_even );
1521 if ( roundingPrecision == 32 ) {
1522 roundIncrement = LIT64( 0x0000008000000000 );
1523 roundMask = LIT64( 0x000000FFFFFFFFFF );
1524 } else if ( roundingPrecision == 64 ) {
1525 roundIncrement = LIT64( 0x0000000000000400 );
1526 roundMask = LIT64( 0x00000000000007FF );
1528 return roundAndPackFloatx80( 80, zSign, zExp, zSig0, zSig1, status );
1530 zSig0 |= ( zSig1 != 0 );
1531 if ( ! roundNearestEven ) {
1532 if ( roundingMode == float_round_to_zero ) {
1536 roundIncrement = roundMask;
1538 if ( roundingMode == float_round_up ) roundIncrement = 0;
1541 if ( roundingMode == float_round_down ) roundIncrement = 0;
1545 roundBits = zSig0 & roundMask;
1547 if ( 0x7FFE <= (uint32_t) zExp ) {
1548 if ( ( 0x7FFE < zExp )
1549 || ( ( zExp == 0x7FFE ) && ( zSig0 + roundIncrement < zSig0 ) )
1551 float_raise( float_flag_overflow, status );
1552 saveFloatx80Internal( roundingPrecision, zSign, zExp, zSig0, zSig1, status);
1553 if ( zSig0 & roundMask ) float_raise( float_flag_inexact, status );
1554 if ( ( roundingMode == float_round_to_zero )
1555 || ( zSign && ( roundingMode == float_round_up ) )
1556 || ( ! zSign && ( roundingMode == float_round_down ) )
1558 return packFloatx80( zSign, 0x7FFE, LIT64( 0xFFFFFFFFFFFFFFFF ) );
1560 return packFloatx80( zSign, 0x7FFF, floatx80_default_infinity_low );
1565 ( status->float_detect_tininess == float_tininess_before_rounding )
1567 || ( zSig0 <= zSig0 + roundIncrement );
1569 float_raise( float_flag_underflow, status );
1570 saveFloatx80Internal( roundingPrecision, zSign, zExp, zSig0, zSig1, status );
1572 shift64RightJamming( zSig0, -zExp, &zSig0 );
1574 roundBits = zSig0 & roundMask;
1575 if ( roundBits ) float_raise ( float_flag_inexact, status );
1576 zSig0 += roundIncrement;
1577 if ( roundNearestEven && ( roundBits == roundIncrement ) ) {
1578 roundMask |= roundIncrement<<1;
1580 zSig0 &= ~roundMask;
1581 return packFloatx80( zSign, zExp, zSig0 );
1585 float_raise( float_flag_inexact, status );
1586 saveFloatx80Internal( roundingPrecision, zSign, zExp, zSig0, zSig1, status );
1588 zSig0 += roundIncrement;
1589 if ( zSig0 < roundIncrement ) {
1591 zSig0 = LIT64( 0x8000000000000000 );
1593 roundIncrement = roundMask + 1;
1594 if ( roundNearestEven && ( roundBits<<1 == roundIncrement ) ) {
1595 roundMask |= roundIncrement;
1597 zSig0 &= ~ roundMask;
1598 if ( zSig0 == 0 ) zExp = 0;
1599 return packFloatx80( zSign, zExp, zSig0 );
1602 #endif // End of Addition for Previous
1605 /*----------------------------------------------------------------------------
1606 | Takes an abstract floating-point value having sign `zSign', exponent
1607 | `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
1608 | and returns the proper extended double-precision floating-point value
1609 | corresponding to the abstract input. This routine is just like
1610 | `roundAndPackFloatx80' except that the input significand does not have to be
1612 *----------------------------------------------------------------------------*/
1614 static floatx80 normalizeRoundAndPackFloatx80(int8_t roundingPrecision,
1615 flag zSign, int32_t zExp,
1616 uint64_t zSig0, uint64_t zSig1,
1617 float_status *status)
1626 shiftCount = countLeadingZeros64( zSig0 );
1627 shortShift128Left( zSig0, zSig1, shiftCount, &zSig0, &zSig1 );
1629 return roundAndPackFloatx80(roundingPrecision, zSign, zExp,
1630 zSig0, zSig1, status);
1634 /*----------------------------------------------------------------------------
1635 | Returns the result of converting the 32-bit two's complement integer `a'
1636 | to the extended double-precision floating-point format. The conversion
1637 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
1639 *----------------------------------------------------------------------------*/
1641 floatx80 int32_to_floatx80(int32_t a)
1648 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
1650 absA = zSign ? - a : a;
1651 shiftCount = countLeadingZeros32( absA ) + 32;
1653 return packFloatx80( zSign, 0x403E - shiftCount, zSig<<shiftCount );
1657 /*----------------------------------------------------------------------------
1658 | Returns the result of converting the single-precision floating-point value
1659 | `a' to the extended double-precision floating-point format. The conversion
1660 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
1662 *----------------------------------------------------------------------------*/
1664 floatx80 float32_to_floatx80(float32 a, float_status *status)
1670 aSig = extractFloat32Frac( a );
1671 aExp = extractFloat32Exp( a );
1672 aSign = extractFloat32Sign( a );
1673 if ( aExp == 0xFF ) {
1674 if ( aSig ) return commonNaNToFloatx80( float32ToCommonNaN( a, status ), status );
1675 return packFloatx80( aSign, 0x7FFF, floatx80_default_infinity_low );
1678 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
1679 normalizeFloat32Subnormal( aSig, &aExp, &aSig );
1682 return packFloatx80( aSign, aExp + 0x3F80, ( (uint64_t) aSig )<<40 );
1686 #ifdef SOFTFLOAT_68K // 31-12-2016: Added for Previous
1687 floatx80 float32_to_floatx80_allowunnormal(float32 a , float_status *status)
1694 aSig = extractFloat32Frac(a);
1695 aExp = extractFloat32Exp(a);
1696 aSign = extractFloat32Sign(a);
1698 return packFloatx80( aSign, 0x7FFF, ( (uint64_t) aSig )<<40 );
1701 if (aSig == 0) return packFloatx80(aSign, 0, 0);
1702 return packFloatx80(aSign, 0x3F81, ((uint64_t) aSig) << 40);
1705 return packFloatx80(aSign, aExp + 0x3F80, ((uint64_t)aSig) << 40);
1708 #endif // end of addition for Previous
1710 /*----------------------------------------------------------------------------
1711 | Returns the result of converting the double-precision floating-point value
1712 | `a' to the extended double-precision floating-point format. The conversion
1713 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
1715 *----------------------------------------------------------------------------*/
1717 floatx80 float64_to_floatx80(float64 a, float_status *status)
1723 aSig = extractFloat64Frac( a );
1724 aExp = extractFloat64Exp( a );
1725 aSign = extractFloat64Sign( a );
1726 if ( aExp == 0x7FF ) {
1727 if ( aSig ) return commonNaNToFloatx80( float64ToCommonNaN( a, status ), status );
1728 return packFloatx80( aSign, 0x7FFF, floatx80_default_infinity_low );
1731 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
1732 normalizeFloat64Subnormal( aSig, &aExp, &aSig );
1736 aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 );
1740 #ifdef SOFTFLOAT_68K // 31-12-2016: Added for Previous
1741 floatx80 float64_to_floatx80_allowunnormal( float64 a, float_status *status )
1748 aSig = extractFloat64Frac( a );
1749 aExp = extractFloat64Exp( a );
1750 aSign = extractFloat64Sign( a );
1751 if ( aExp == 0x7FF ) {
1752 return packFloatx80( aSign, 0x7FFF, aSig<<11 );
1755 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
1756 return packFloatx80( aSign, 0x3C01, aSig<<11 );
1760 aSign, aExp + 0x3C00, ( aSig | LIT64( 0x0010000000000000 ) )<<11 );
1763 #endif // end of addition for Previous
1765 /*----------------------------------------------------------------------------
1766 | Returns the result of converting the extended double-precision floating-
1767 | point value `a' to the 32-bit two's complement integer format. The
1768 | conversion is performed according to the IEC/IEEE Standard for Binary
1769 | Floating-Point Arithmetic---which means in particular that the conversion
1770 | is rounded according to the current rounding mode. If `a' is a NaN, the
1771 | largest positive integer is returned. Otherwise, if the conversion
1772 | overflows, the largest integer with the same sign as `a' is returned.
1773 *----------------------------------------------------------------------------*/
1775 int32_t floatx80_to_int32(floatx80 a, float_status *status)
1778 int32_t aExp, shiftCount;
1781 aSig = extractFloatx80Frac( a );
1782 aExp = extractFloatx80Exp( a );
1783 aSign = extractFloatx80Sign( a );
1784 #ifdef SOFTFLOAT_68K
1785 if ( aExp == 0x7FFF ) {
1786 if ( (uint64_t) ( aSig<<1 ) ) {
1787 a = propagateFloatx80NaNOneArg( a, status );
1788 if ( a.low == aSig ) float_raise( float_flag_invalid, status );
1789 return (int32_t)(a.low>>32);
1791 float_raise( float_flag_invalid, status );
1792 return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
1795 if ( ( aExp == 0x7FFF ) && (bits64) ( aSig<<1 ) ) aSign = 0;
1797 shiftCount = 0x4037 - aExp;
1798 if ( shiftCount <= 0 ) shiftCount = 1;
1799 shift64RightJamming( aSig, shiftCount, &aSig );
1800 return roundAndPackInt32(aSign, aSig, status);
1804 #ifdef SOFTFLOAT_68K // 30-01-2017: Addition for Previous
1805 int16_t floatx80_to_int16( floatx80 a, float_status *status)
1808 int32_t aExp, shiftCount;
1811 aSig = extractFloatx80Frac( a );
1812 aExp = extractFloatx80Exp( a );
1813 aSign = extractFloatx80Sign( a );
1814 if ( aExp == 0x7FFF ) {
1815 float_raise( float_flag_invalid, status );
1816 if ( (uint64_t) ( aSig<<1 ) ) {
1817 a = propagateFloatx80NaNOneArg( a, status );
1818 if ( a.low == aSig ) float_raise( float_flag_invalid, status );
1819 return (int16_t)(a.low>>48);
1821 return aSign ? (int16_t) 0x8000 : 0x7FFF;
1823 shiftCount = 0x4037 - aExp;
1824 if ( shiftCount <= 0 ) shiftCount = 1;
1825 shift64RightJamming( aSig, shiftCount, &aSig );
1826 return roundAndPackInt16( aSign, aSig, status );
1829 int8_t floatx80_to_int8( floatx80 a, float_status *status)
1832 int32_t aExp, shiftCount;
1835 aSig = extractFloatx80Frac( a );
1836 aExp = extractFloatx80Exp( a );
1837 aSign = extractFloatx80Sign( a );
1838 if ( aExp == 0x7FFF ) {
1839 if ( (uint64_t) ( aSig<<1 ) ) {
1840 a = propagateFloatx80NaNOneArg( a, status );
1841 if ( a.low == aSig ) float_raise( float_flag_invalid, status );
1842 return (int8_t)(a.low>>56);
1844 float_raise( float_flag_invalid, status );
1845 return aSign ? (int8_t) 0x80 : 0x7F;
1847 shiftCount = 0x4037 - aExp;
1848 if ( shiftCount <= 0 ) shiftCount = 1;
1849 shift64RightJamming( aSig, shiftCount, &aSig );
1850 return roundAndPackInt8( aSign, aSig, status );
1853 #endif // End of addition for Previous
1856 /*----------------------------------------------------------------------------
1857 | Returns the result of converting the extended double-precision floating-
1858 | point value `a' to the 32-bit two's complement integer format. The
1859 | conversion is performed according to the IEC/IEEE Standard for Binary
1860 | Floating-Point Arithmetic, except that the conversion is always rounded
1861 | toward zero. If `a' is a NaN, the largest positive integer is returned.
1862 | Otherwise, if the conversion overflows, the largest integer with the same
1863 | sign as `a' is returned.
1864 *----------------------------------------------------------------------------*/
1866 int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status *status)
1869 int32_t aExp, shiftCount;
1870 uint64_t aSig, savedASig;
1873 if (floatx80_invalid_encoding(a)) {
1874 float_raise(float_flag_invalid, status);
1877 aSig = extractFloatx80Frac( a );
1878 aExp = extractFloatx80Exp( a );
1879 aSign = extractFloatx80Sign( a );
1880 if ( 0x401E < aExp ) {
1881 if ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) aSign = 0;
1884 else if ( aExp < 0x3FFF ) {
1886 status->float_exception_flags |= float_flag_inexact;
1890 shiftCount = 0x403E - aExp;
1892 aSig >>= shiftCount;
1894 if ( aSign ) z = - z;
1895 if ( ( z < 0 ) ^ aSign ) {
1897 float_raise(float_flag_invalid, status);
1898 return aSign ? (int32_t) 0x80000000 : 0x7FFFFFFF;
1900 if ( ( aSig<<shiftCount ) != savedASig ) {
1901 status->float_exception_flags |= float_flag_inexact;
1907 /*----------------------------------------------------------------------------
1908 | Returns the result of converting the extended double-precision floating-
1909 | point value `a' to the 64-bit two's complement integer format. The
1910 | conversion is performed according to the IEC/IEEE Standard for Binary
1911 | Floating-Point Arithmetic---which means in particular that the conversion
1912 | is rounded according to the current rounding mode. If `a' is a NaN,
1913 | the largest positive integer is returned. Otherwise, if the conversion
1914 | overflows, the largest integer with the same sign as `a' is returned.
1915 *----------------------------------------------------------------------------*/
1917 int64_t floatx80_to_int64(floatx80 a, float_status *status)
1920 int32_t aExp, shiftCount;
1921 uint64_t aSig, aSigExtra;
1923 if (floatx80_invalid_encoding(a)) {
1924 float_raise(float_flag_invalid, status);
1927 aSig = extractFloatx80Frac( a );
1928 aExp = extractFloatx80Exp( a );
1929 aSign = extractFloatx80Sign( a );
1930 shiftCount = 0x403E - aExp;
1931 if ( shiftCount <= 0 ) {
1933 float_raise(float_flag_invalid, status);
1935 || ( ( aExp == 0x7FFF )
1936 && ( aSig != LIT64( 0x8000000000000000 ) ) )
1938 return LIT64( 0x7FFFFFFFFFFFFFFF );
1940 return (int64_t) LIT64( 0x8000000000000000 );
1945 shift64ExtraRightJamming( aSig, 0, shiftCount, &aSig, &aSigExtra );
1947 return roundAndPackInt64(aSign, aSig, aSigExtra, status);
1951 /*----------------------------------------------------------------------------
1952 | Returns the result of converting the extended double-precision floating-
1953 | point value `a' to the single-precision floating-point format. The
1954 | conversion is performed according to the IEC/IEEE Standard for Binary
1955 | Floating-Point Arithmetic.
1956 *----------------------------------------------------------------------------*/
1958 float32 floatx80_to_float32(floatx80 a, float_status *status)
1964 aSig = extractFloatx80Frac( a );
1965 aExp = extractFloatx80Exp( a );
1966 aSign = extractFloatx80Sign( a );
1967 if ( aExp == 0x7FFF ) {
1968 if ( (uint64_t) ( aSig<<1 ) ) {
1969 return commonNaNToFloat32(floatx80ToCommonNaN(a, status));
1971 return packFloat32( aSign, 0xFF, 0 );
1973 #ifdef SOFTFLOAT_68K
1975 if ( aSig == 0) return packFloat32( aSign, 0, 0 );
1976 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
1978 shift64RightJamming( aSig, 33, &aSig );
1981 shift64RightJamming( aSig, 33, &aSig );
1982 if ( aExp || aSig ) aExp -= 0x3F81;
1984 return roundAndPackFloat32(aSign, aExp, aSig, status);
1988 /*----------------------------------------------------------------------------
1989 | Returns the result of converting the extended double-precision floating-
1990 | point value `a' to the double-precision floating-point format. The
1991 | conversion is performed according to the IEC/IEEE Standard for Binary
1992 | Floating-Point Arithmetic.
1993 *----------------------------------------------------------------------------*/
1995 float64 floatx80_to_float64(floatx80 a, float_status *status)
1999 uint64_t aSig, zSig;
2001 aSig = extractFloatx80Frac( a );
2002 aExp = extractFloatx80Exp( a );
2003 aSign = extractFloatx80Sign( a );
2004 if ( aExp == 0x7FFF ) {
2005 if ( (uint64_t) ( aSig<<1 ) ) {
2006 return commonNaNToFloat64(floatx80ToCommonNaN(a, status), status);
2008 return packFloat64( aSign, 0x7FF, 0 );
2010 #ifdef SOFTFLOAT_68K
2012 if ( aSig == 0) return packFloat64( aSign, 0, 0 );
2013 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
2015 shift64RightJamming( aSig, 1, &zSig );
2018 shift64RightJamming( aSig, 1, &zSig );
2019 if ( aExp || aSig ) aExp -= 0x3C01;
2021 return roundAndPackFloat64(aSign, aExp, zSig, status);
2025 #ifdef SOFTFLOAT_68K // 31-01-2017
2026 /*----------------------------------------------------------------------------
2027 | Returns the result of converting the extended double-precision floating-
2028 | point value `a' to the extended double-precision floating-point format.
2029 | The conversion is performed according to the IEC/IEEE Standard for Binary
2030 | Floating-Point Arithmetic.
2031 *----------------------------------------------------------------------------*/
2033 floatx80 floatx80_to_floatx80( floatx80 a, float_status *status )
2039 aSig = extractFloatx80Frac( a );
2040 aExp = extractFloatx80Exp( a );
2041 aSign = extractFloatx80Sign( a );
2043 if ( aExp == 0x7FFF && (uint64_t) ( aSig<<1 ) ) {
2044 return propagateFloatx80NaNOneArg( a, status );
2046 if ( aExp == 0 && aSig != 0 ) {
2047 return normalizeRoundAndPackFloatx80( status->floatx80_rounding_precision, aSign, aExp, aSig, 0, status );
2054 #ifdef SOFTFLOAT_68K // 30-01-2016: Added for Previous
2055 floatx80 floatx80_round32( floatx80 a, float_status *status )
2061 aSig = extractFloatx80Frac( a );
2062 aExp = extractFloatx80Exp( a );
2063 aSign = extractFloatx80Sign( a );
2065 if ( aExp == 0x7FFF || aSig == 0 ) {
2069 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
2072 return roundSigAndPackFloatx80( 32, aSign, aExp, aSig, 0, status );
2075 floatx80 floatx80_round64( floatx80 a, float_status *status )
2081 aSig = extractFloatx80Frac( a );
2082 aExp = extractFloatx80Exp( a );
2083 aSign = extractFloatx80Sign( a );
2085 if ( aExp == 0x7FFF || aSig == 0 ) {
2089 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
2092 return roundSigAndPackFloatx80( 64, aSign, aExp, aSig, 0, status );
2095 floatx80 floatx80_round_to_float32( floatx80 a, float_status *status )
2101 aSign = extractFloatx80Sign( a );
2102 aSig = extractFloatx80Frac( a );
2103 aExp = extractFloatx80Exp( a );
2105 if ( aExp == 0x7FFF ) {
2106 if ( (uint64_t) ( aSig<<1 ) ) return propagateFloatx80NaNOneArg( a, status );
2110 if ( aSig == 0 ) return a;
2111 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
2114 return roundAndPackFloatx80( 32, aSign, aExp, aSig, 0, status );
2117 floatx80 floatx80_round_to_float64( floatx80 a, float_status *status )
2123 aSign = extractFloatx80Sign( a );
2124 aSig = extractFloatx80Frac( a );
2125 aExp = extractFloatx80Exp( a );
2127 if ( aExp == 0x7FFF ) {
2128 if ( (uint64_t) ( aSig<<1 ) ) return propagateFloatx80NaNOneArg( a, status );
2132 if ( aSig == 0 ) return a;
2133 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
2136 return roundAndPackFloatx80( 64, aSign, aExp, aSig, 0, status );
2140 floatx80 floatx80_normalize( floatx80 a )
2147 aSig = extractFloatx80Frac( a );
2148 aExp = extractFloatx80Exp( a );
2149 aSign = extractFloatx80Sign( a );
2151 if ( aExp == 0x7FFF || aExp == 0 ) return a;
2152 if ( aSig == 0 ) return packFloatx80(aSign, 0, 0);
2154 shiftCount = countLeadingZeros64( aSig );
2156 if ( shiftCount > aExp ) shiftCount = aExp;
2159 aSig <<= shiftCount;
2161 return packFloatx80( aSign, aExp, aSig );
2163 #endif // end of addition for Previous
2165 /*----------------------------------------------------------------------------
2166 | Rounds the extended double-precision floating-point value `a' to an integer,
2167 | and returns the result as an extended quadruple-precision floating-point
2168 | value. The operation is performed according to the IEC/IEEE Standard for
2169 | Binary Floating-Point Arithmetic.
2170 *----------------------------------------------------------------------------*/
2172 floatx80 floatx80_round_to_int(floatx80 a, float_status *status)
2176 uint64_t lastBitMask, roundBitsMask;
2177 // int8_t roundingMode;
2180 // roundingMode = status->float_rounding_mode;
2181 aSign = extractFloatx80Sign(a);
2182 aExp = extractFloatx80Exp( a );
2183 if ( 0x403E <= aExp ) {
2184 if ( aExp == 0x7FFF ) {
2185 if ((uint64_t) ( extractFloatx80Frac( a )<<1 ) )
2186 return propagateFloatx80NaNOneArg(a, status);
2187 return inf_clear_intbit(status) ? packFloatx80(aSign, aExp, 0) : a;
2191 if ( aExp < 0x3FFF ) {
2193 #ifdef SOFTFLOAT_68K
2194 && ( (uint64_t) extractFloatx80Frac( a ) == 0 ) ) {
2196 && ( (uint64_t) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
2200 status->float_exception_flags |= float_flag_inexact;
2201 switch (status->float_rounding_mode) {
2202 case float_round_nearest_even:
2203 if ( ( aExp == 0x3FFE ) && (uint64_t) ( extractFloatx80Frac( a )<<1 )
2206 packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
2209 case float_round_ties_away:
2210 if (aExp == 0x3FFE) {
2211 return packFloatx80(aSign, 0x3FFF, LIT64(0x8000000000000000));
2214 case float_round_down:
2217 packFloatx80( 1, 0x3FFF, LIT64( 0x8000000000000000 ) )
2218 : packFloatx80( 0, 0, 0 );
2219 case float_round_up:
2221 aSign ? packFloatx80( 1, 0, 0 )
2222 : packFloatx80( 0, 0x3FFF, LIT64( 0x8000000000000000 ) );
2224 return packFloatx80( aSign, 0, 0 );
2227 lastBitMask <<= 0x403E - aExp;
2228 roundBitsMask = lastBitMask - 1;
2230 switch (status->float_rounding_mode) {
2231 case float_round_nearest_even:
2232 z.low += lastBitMask>>1;
2233 if ((z.low & roundBitsMask) == 0) {
2234 z.low &= ~lastBitMask;
2237 case float_round_ties_away:
2238 z.low += lastBitMask >> 1;
2240 case float_round_to_zero:
2242 case float_round_up:
2243 if (!extractFloatx80Sign(z)) {
2244 z.low += roundBitsMask;
2247 case float_round_down:
2248 if (extractFloatx80Sign(z)) {
2249 z.low += roundBitsMask;
2255 z.low &= ~ roundBitsMask;
2258 z.low = LIT64( 0x8000000000000000 );
2260 if (z.low != a.low) {
2261 status->float_exception_flags |= float_flag_inexact;
2267 #ifdef SOFTFLOAT_68K // 09-01-2017: Added for Previous
2268 floatx80 floatx80_round_to_int_toward_zero( floatx80 a, float_status *status)
2272 uint64_t lastBitMask, roundBitsMask;
2275 aSign = extractFloatx80Sign(a);
2276 aExp = extractFloatx80Exp( a );
2277 if ( 0x403E <= aExp ) {
2278 if ( aExp == 0x7FFF ) {
2279 if ( (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
2280 return propagateFloatx80NaNOneArg( a, status );
2281 return inf_clear_intbit(status) ? packFloatx80(aSign, aExp, 0) : a;
2285 if ( aExp < 0x3FFF ) {
2287 #ifdef SOFTFLOAT_68K
2288 && ( (uint64_t) extractFloatx80Frac( a ) == 0 ) ) {
2290 && ( (uint64_t) ( extractFloatx80Frac( a )<<1 ) == 0 ) ) {
2294 status->float_exception_flags |= float_flag_inexact;
2295 return packFloatx80( aSign, 0, 0 );
2298 lastBitMask <<= 0x403E - aExp;
2299 roundBitsMask = lastBitMask - 1;
2301 z.low &= ~ roundBitsMask;
2304 z.low = LIT64( 0x8000000000000000 );
2306 if ( z.low != a.low ) status->float_exception_flags |= float_flag_inexact;
2310 #endif // End of addition for Previous
2312 /*----------------------------------------------------------------------------
2313 | Returns the result of adding the absolute values of the extended double-
2314 | precision floating-point values `a' and `b'. If `zSign' is 1, the sum is
2315 | negated before being returned. `zSign' is ignored if the result is a NaN.
2316 | The addition is performed according to the IEC/IEEE Standard for Binary
2317 | Floating-Point Arithmetic.
2318 *----------------------------------------------------------------------------*/
2320 static floatx80 addFloatx80Sigs(floatx80 a, floatx80 b, flag zSign,
2321 float_status *status)
2323 int32_t aExp, bExp, zExp;
2324 uint64_t aSig, bSig, zSig0, zSig1;
2327 aSig = extractFloatx80Frac( a );
2328 aExp = extractFloatx80Exp( a );
2329 bSig = extractFloatx80Frac( b );
2330 bExp = extractFloatx80Exp( b );
2331 #ifdef SOFTFLOAT_68K
2333 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
2336 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
2339 expDiff = aExp - bExp;
2340 if ( 0 < expDiff ) {
2341 if ( aExp == 0x7FFF ) {
2342 if ((uint64_t)(aSig << 1))
2343 return propagateFloatx80NaN(a, b, status);
2344 return inf_clear_intbit(status) ? packFloatx80(extractFloatx80Sign(a), aExp, 0) : a;
2346 #ifndef SOFTFLOAT_68K
2347 if ( bExp == 0 ) --expDiff;
2349 shift64ExtraRightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
2352 else if ( expDiff < 0 ) {
2353 if ( bExp == 0x7FFF ) {
2354 if ((uint64_t)(bSig << 1))
2355 return propagateFloatx80NaN(a, b, status);
2356 if (inf_clear_intbit(status)) bSig = 0;
2357 return packFloatx80( zSign, bExp, bSig );
2359 #ifndef SOFTFLOAT_68K
2360 if ( aExp == 0 ) ++expDiff;
2362 shift64ExtraRightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
2366 if ( aExp == 0x7FFF ) {
2367 if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
2368 return propagateFloatx80NaN(a, b, status);
2370 if (inf_clear_intbit(status)) return packFloatx80(extractFloatx80Sign(a), aExp, 0);
2371 return faddsub_swap_inf(status) ? b : a;
2374 zSig0 = aSig + bSig;
2375 #ifndef SOFTFLOAT_68K
2377 normalizeFloatx80Subnormal( zSig0, &zExp, &zSig0 );
2382 #ifdef SOFTFLOAT_68K
2383 if ( aSig == 0 && bSig == 0 ) return packFloatx80( zSign, 0, 0 );
2384 if ( aSig == 0 || bSig == 0 ) goto roundAndPack;
2388 zSig0 = aSig + bSig;
2389 if ( (int64_t) zSig0 < 0 ) goto roundAndPack;
2391 shift64ExtraRightJamming( zSig0, zSig1, 1, &zSig0, &zSig1 );
2392 zSig0 |= LIT64( 0x8000000000000000 );
2395 return roundAndPackFloatx80(status->floatx80_rounding_precision,
2396 zSign, zExp, zSig0, zSig1, status);
2399 /*----------------------------------------------------------------------------
2400 | Returns the result of subtracting the absolute values of the extended
2401 | double-precision floating-point values `a' and `b'. If `zSign' is 1, the
2402 | difference is negated before being returned. `zSign' is ignored if the
2403 | result is a NaN. The subtraction is performed according to the IEC/IEEE
2404 | Standard for Binary Floating-Point Arithmetic.
2405 *----------------------------------------------------------------------------*/
2407 static floatx80 subFloatx80Sigs(floatx80 a, floatx80 b, flag zSign,
2408 float_status *status)
2410 int32_t aExp, bExp, zExp;
2411 uint64_t aSig, bSig, zSig0, zSig1;
2414 aSig = extractFloatx80Frac( a );
2415 aExp = extractFloatx80Exp( a );
2416 bSig = extractFloatx80Frac( b );
2417 bExp = extractFloatx80Exp( b );
2418 expDiff = aExp - bExp;
2419 if ( 0 < expDiff ) goto aExpBigger;
2420 if ( expDiff < 0 ) goto bExpBigger;
2421 if ( aExp == 0x7FFF ) {
2422 if ( (uint64_t) ( ( aSig | bSig )<<1 ) ) {
2423 return propagateFloatx80NaN(a, b, status);
2425 float_raise(float_flag_invalid, status);
2426 return floatx80_default_nan(status);
2428 #ifndef SOFTFLOAT_68K
2435 if ( bSig < aSig ) goto aBigger;
2436 if ( aSig < bSig ) goto bBigger;
2437 return packFloatx80(status->float_rounding_mode == float_round_down, 0, 0);
2439 if ( bExp == 0x7FFF ) {
2440 if ((uint64_t)(bSig << 1)) return propagateFloatx80NaN(a, b, status);
2441 if (inf_clear_intbit(status)) bSig = 0;
2442 return packFloatx80(zSign ^ 1, bExp, bSig);
2444 #ifndef SOFTFLOAT_68K
2445 if ( aExp == 0 ) ++expDiff;
2447 shift128RightJamming( aSig, 0, - expDiff, &aSig, &zSig1 );
2449 sub128( bSig, 0, aSig, zSig1, &zSig0, &zSig1 );
2452 goto normalizeRoundAndPack;
2454 if ( aExp == 0x7FFF ) {
2455 if ((uint64_t)(aSig << 1)) return propagateFloatx80NaN(a, b, status);
2456 return inf_clear_intbit(status) ? packFloatx80(extractFloatx80Sign(a), aExp, 0) : a;
2458 #ifndef SOFTFLOAT_68K
2459 if ( bExp == 0 ) --expDiff;
2461 shift128RightJamming( bSig, 0, expDiff, &bSig, &zSig1 );
2463 sub128( aSig, 0, bSig, zSig1, &zSig0, &zSig1 );
2465 normalizeRoundAndPack:
2466 return normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
2467 zSign, zExp, zSig0, zSig1, status);
2470 /*----------------------------------------------------------------------------
2471 | Returns the result of adding the extended double-precision floating-point
2472 | values `a' and `b'. The operation is performed according to the IEC/IEEE
2473 | Standard for Binary Floating-Point Arithmetic.
2474 *----------------------------------------------------------------------------*/
2476 floatx80 floatx80_add(floatx80 a, floatx80 b, float_status *status)
2480 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
2481 float_raise(float_flag_invalid, status);
2482 return floatx80_default_nan(status);
2484 aSign = extractFloatx80Sign( a );
2485 bSign = extractFloatx80Sign( b );
2486 if ( aSign == bSign ) {
2487 return addFloatx80Sigs(a, b, aSign, status);
2490 return subFloatx80Sigs(a, b, aSign, status);
2495 /*----------------------------------------------------------------------------
2496 | Returns the result of subtracting the extended double-precision floating-
2497 | point values `a' and `b'. The operation is performed according to the
2498 | IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2499 *----------------------------------------------------------------------------*/
2501 floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status *status)
2505 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
2506 float_raise(float_flag_invalid, status);
2507 return floatx80_default_nan(status);
2509 aSign = extractFloatx80Sign( a );
2510 bSign = extractFloatx80Sign( b );
2511 if ( aSign == bSign ) {
2512 return subFloatx80Sigs(a, b, aSign, status);
2515 return addFloatx80Sigs(a, b, aSign, status);
2520 /*----------------------------------------------------------------------------
2521 | Returns the result of multiplying the extended double-precision floating-
2522 | point values `a' and `b'. The operation is performed according to the
2523 | IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2524 *----------------------------------------------------------------------------*/
2526 floatx80 floatx80_mul(floatx80 a, floatx80 b, float_status *status)
2528 flag aSign, bSign, zSign;
2529 int32_t aExp, bExp, zExp;
2530 uint64_t aSig, bSig, zSig0, zSig1;
2532 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
2533 float_raise(float_flag_invalid, status);
2534 return floatx80_default_nan(status);
2536 aSig = extractFloatx80Frac( a );
2537 aExp = extractFloatx80Exp( a );
2538 aSign = extractFloatx80Sign( a );
2539 bSig = extractFloatx80Frac( b );
2540 bExp = extractFloatx80Exp( b );
2541 bSign = extractFloatx80Sign( b );
2542 zSign = aSign ^ bSign;
2543 if ( aExp == 0x7FFF ) {
2544 if ( (uint64_t) ( aSig<<1 )
2545 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
2546 return propagateFloatx80NaN(a, b, status);
2548 if ( ( bExp | bSig ) == 0 ) goto invalid;
2549 if (inf_clear_intbit(status)) aSig = 0;
2550 return packFloatx80(zSign, aExp, aSig);
2552 if ( bExp == 0x7FFF ) {
2553 if ((uint64_t)(bSig << 1)) {
2554 return propagateFloatx80NaN(a, b, status);
2556 if ( ( aExp | aSig ) == 0 ) {
2558 float_raise(float_flag_invalid, status);
2559 return floatx80_default_nan(status);
2561 if (inf_clear_intbit(status)) bSig = 0;
2562 return packFloatx80(zSign, bExp, bSig);
2565 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
2566 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
2569 if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
2570 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
2572 zExp = aExp + bExp - 0x3FFE;
2573 mul64To128( aSig, bSig, &zSig0, &zSig1 );
2574 if ( 0 < (int64_t) zSig0 ) {
2575 shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
2578 return roundAndPackFloatx80(status->floatx80_rounding_precision,
2579 zSign, zExp, zSig0, zSig1, status);
2582 #ifdef SOFTFLOAT_68K // 21-01-2017: Added for Previous
2583 floatx80 floatx80_sglmul( floatx80 a, floatx80 b, float_status *status )
2585 flag aSign, bSign, zSign;
2586 int32_t aExp, bExp, zExp;
2587 uint64_t aSig, bSig, zSig0, zSig1;
2589 aSig = extractFloatx80Frac( a );
2590 aExp = extractFloatx80Exp( a );
2591 aSign = extractFloatx80Sign( a );
2592 bSig = extractFloatx80Frac( b );
2593 bExp = extractFloatx80Exp( b );
2594 bSign = extractFloatx80Sign( b );
2595 zSign = aSign ^ bSign;
2596 if ( aExp == 0x7FFF ) {
2597 if ( (uint64_t) ( aSig<<1 )
2598 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
2599 return propagateFloatx80NaN( a, b, status );
2601 if ( ( bExp | bSig ) == 0 ) goto invalid;
2602 if (inf_clear_intbit(status)) aSig = 0;
2603 return packFloatx80(zSign, aExp, aSig);
2605 if ( bExp == 0x7FFF ) {
2606 if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b, status );
2607 if ( ( aExp | aSig ) == 0 ) {
2609 float_raise( float_flag_invalid, status );
2610 return floatx80_default_nan(status);
2612 if (inf_clear_intbit(status)) bSig = 0;
2613 return packFloatx80(zSign, bExp, bSig);
2616 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
2617 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
2620 if ( bSig == 0 ) return packFloatx80( zSign, 0, 0 );
2621 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
2623 aSig &= LIT64( 0xFFFFFF0000000000 );
2624 bSig &= LIT64( 0xFFFFFF0000000000 );
2625 zExp = aExp + bExp - 0x3FFE;
2626 mul64To128( aSig, bSig, &zSig0, &zSig1 );
2627 if ( 0 < (int64_t) zSig0 ) {
2628 shortShift128Left( zSig0, zSig1, 1, &zSig0, &zSig1 );
2631 return roundSigAndPackFloatx80( 32, zSign, zExp, zSig0, zSig1, status);
2634 #endif // End of addition for Previous
2637 /*----------------------------------------------------------------------------
2638 | Returns the result of dividing the extended double-precision floating-point
2639 | value `a' by the corresponding value `b'. The operation is performed
2640 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2641 *----------------------------------------------------------------------------*/
2643 floatx80 floatx80_div(floatx80 a, floatx80 b, float_status *status)
2645 flag aSign, bSign, zSign;
2646 int32_t aExp, bExp, zExp;
2647 uint64_t aSig, bSig, zSig0, zSig1;
2648 uint64_t rem0, rem1, rem2, term0, term1, term2;
2650 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
2651 float_raise(float_flag_invalid, status);
2652 return floatx80_default_nan(status);
2654 aSig = extractFloatx80Frac( a );
2655 aExp = extractFloatx80Exp( a );
2656 aSign = extractFloatx80Sign( a );
2657 bSig = extractFloatx80Frac( b );
2658 bExp = extractFloatx80Exp( b );
2659 bSign = extractFloatx80Sign( b );
2660 zSign = aSign ^ bSign;
2661 if ( aExp == 0x7FFF ) {
2662 if ((uint64_t)(aSig << 1)) {
2663 return propagateFloatx80NaN(a, b, status);
2665 if ( bExp == 0x7FFF ) {
2666 if ((uint64_t)(bSig << 1)) {
2667 return propagateFloatx80NaN(a, b, status);
2671 if (inf_clear_intbit(status)) aSig = 0;
2672 return packFloatx80(zSign, aExp, aSig);
2674 if ( bExp == 0x7FFF ) {
2675 if ((uint64_t)(bSig << 1)) {
2676 return propagateFloatx80NaN(a, b, status);
2678 return packFloatx80( zSign, 0, 0 );
2682 if ( ( aExp | aSig ) == 0 ) {
2684 float_raise(float_flag_invalid, status);
2685 return floatx80_default_nan(status);
2687 float_raise(float_flag_divbyzero, status);
2688 return packFloatx80( zSign, 0x7FFF, floatx80_default_infinity_low );
2690 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
2693 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
2694 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
2696 zExp = aExp - bExp + 0x3FFE;
2698 if ( bSig <= aSig ) {
2699 shift128Right( aSig, 0, 1, &aSig, &rem1 );
2702 zSig0 = estimateDiv128To64( aSig, rem1, bSig );
2703 mul64To128( bSig, zSig0, &term0, &term1 );
2704 sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
2705 while ( (int64_t) rem0 < 0 ) {
2707 add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
2709 zSig1 = estimateDiv128To64( rem1, 0, bSig );
2710 if ( (uint64_t) ( zSig1<<1 ) <= 8 ) {
2711 mul64To128( bSig, zSig1, &term1, &term2 );
2712 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
2713 while ( (int64_t) rem1 < 0 ) {
2715 add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
2717 zSig1 |= ( ( rem1 | rem2 ) != 0 );
2719 return roundAndPackFloatx80(status->floatx80_rounding_precision,
2720 zSign, zExp, zSig0, zSig1, status);
2723 #ifdef SOFTFLOAT_68K // 21-01-2017: Addition for Previous
2724 floatx80 floatx80_sgldiv( floatx80 a, floatx80 b, float_status *status )
2726 flag aSign, bSign, zSign;
2727 int32_t aExp, bExp, zExp;
2728 uint64_t aSig, bSig, zSig0, zSig1;
2729 uint64_t rem0, rem1, rem2, term0, term1, term2;
2731 aSig = extractFloatx80Frac( a );
2732 aExp = extractFloatx80Exp( a );
2733 aSign = extractFloatx80Sign( a );
2734 bSig = extractFloatx80Frac( b );
2735 bExp = extractFloatx80Exp( b );
2736 bSign = extractFloatx80Sign( b );
2737 zSign = aSign ^ bSign;
2738 if ( aExp == 0x7FFF ) {
2739 if ( (uint64_t) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b, status );
2740 if ( bExp == 0x7FFF ) {
2741 if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b, status );
2744 if (inf_clear_intbit(status)) aSig = 0;
2745 return packFloatx80(zSign, aExp, aSig);
2747 if ( bExp == 0x7FFF ) {
2748 if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b, status );
2749 return packFloatx80( zSign, 0, 0 );
2753 if ( ( aExp | aSig ) == 0 ) {
2755 float_raise( float_flag_invalid, status );
2756 return floatx80_default_nan(status);
2758 float_raise( float_flag_divbyzero, status );
2759 return packFloatx80( zSign, 0x7FFF, floatx80_default_infinity_low );
2761 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
2764 if ( aSig == 0 ) return packFloatx80( zSign, 0, 0 );
2765 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
2768 zExp = aExp - bExp + 0x3FFE;
2770 if ( bSig <= aSig ) {
2771 shift128Right( aSig, 0, 1, &aSig, &rem1 );
2774 zSig0 = estimateDiv128To64( aSig, rem1, bSig );
2775 mul64To128( bSig, zSig0, &term0, &term1 );
2776 sub128( aSig, rem1, term0, term1, &rem0, &rem1 );
2777 while ( (int64_t) rem0 < 0 ) {
2779 add128( rem0, rem1, 0, bSig, &rem0, &rem1 );
2781 zSig1 = estimateDiv128To64( rem1, 0, bSig );
2782 if ( (uint64_t) ( zSig1<<1 ) <= 8 ) {
2783 mul64To128( bSig, zSig1, &term1, &term2 );
2784 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
2785 while ( (int64_t) rem1 < 0 ) {
2787 add128( rem1, rem2, 0, bSig, &rem1, &rem2 );
2789 zSig1 |= ( ( rem1 | rem2 ) != 0 );
2791 return roundSigAndPackFloatx80( 32, zSign, zExp, zSig0, zSig1, status);
2794 #endif // End of addition for Previous
2797 /*----------------------------------------------------------------------------
2798 | Returns the remainder of the extended double-precision floating-point value
2799 | `a' with respect to the corresponding value `b'. The operation is performed
2800 | according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
2801 *----------------------------------------------------------------------------*/
2803 #ifndef SOFTFLOAT_68K
2804 floatx80 floatx80_rem(floatx80 a, floatx80 b, float_status *status)
2807 int32_t aExp, bExp, expDiff;
2808 uint64_t aSig0, aSig1, bSig;
2809 uint64_t q, term0, term1, alternateASig0, alternateASig1;
2811 if (floatx80_invalid_encoding(a) || floatx80_invalid_encoding(b)) {
2812 float_raise(float_flag_invalid, status);
2813 return floatx80_default_nan(status);
2815 aSig0 = extractFloatx80Frac( a );
2816 aExp = extractFloatx80Exp( a );
2817 aSign = extractFloatx80Sign( a );
2818 bSig = extractFloatx80Frac( b );
2819 bExp = extractFloatx80Exp( b );
2820 if ( aExp == 0x7FFF ) {
2821 if ( (uint64_t) ( aSig0<<1 )
2822 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
2823 return propagateFloatx80NaN(a, b, status);
2827 if ( bExp == 0x7FFF ) {
2828 if ((uint64_t)(bSig << 1)) {
2829 return propagateFloatx80NaN(a, b, status);
2836 float_raise(float_flag_invalid, status);
2837 return floatx80_default_nan(status);
2839 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
2842 if ( (uint64_t) ( aSig0<<1 ) == 0 ) return a;
2843 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
2845 bSig |= LIT64( 0x8000000000000000 );
2847 expDiff = aExp - bExp;
2849 if ( expDiff < 0 ) {
2850 if ( expDiff < -1 ) return a;
2851 shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
2854 q = ( bSig <= aSig0 );
2855 if ( q ) aSig0 -= bSig;
2857 while ( 0 < expDiff ) {
2858 q = estimateDiv128To64( aSig0, aSig1, bSig );
2859 q = ( 2 < q ) ? q - 2 : 0;
2860 mul64To128( bSig, q, &term0, &term1 );
2861 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
2862 shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
2866 if ( 0 < expDiff ) {
2867 q = estimateDiv128To64( aSig0, aSig1, bSig );
2868 q = ( 2 < q ) ? q - 2 : 0;
2870 mul64To128( bSig, q<<( 64 - expDiff ), &term0, &term1 );
2871 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
2872 shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
2873 while ( le128( term0, term1, aSig0, aSig1 ) ) {
2875 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
2882 sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
2883 if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
2884 || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
2887 aSig0 = alternateASig0;
2888 aSig1 = alternateASig1;
2892 normalizeRoundAndPackFloatx80(
2893 80, zSign, bExp + expDiff, aSig0, aSig1, status);
2896 #else // 09-01-2017: Modified version for Previous
2897 floatx80 floatx80_rem( floatx80 a, floatx80 b, uint64_t *q, flag *s, float_status *status )
2899 flag aSign, bSign, zSign;
2900 int32_t aExp, bExp, expDiff;
2901 uint64_t aSig0, aSig1, bSig;
2902 uint64_t qTemp, term0, term1, alternateASig0, alternateASig1;
2904 aSig0 = extractFloatx80Frac( a );
2905 aExp = extractFloatx80Exp( a );
2906 aSign = extractFloatx80Sign( a );
2907 bSig = extractFloatx80Frac( b );
2908 bExp = extractFloatx80Exp( b );
2909 bSign = extractFloatx80Sign( b );
2911 if ( aExp == 0x7FFF ) {
2912 if ( (uint64_t) ( aSig0<<1 )
2913 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
2914 return propagateFloatx80NaN( a, b, status );
2918 if ( bExp == 0x7FFF ) {
2919 if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b, status );
2920 *s = (aSign != bSign);
2927 float_raise( float_flag_invalid, status );
2928 return floatx80_default_nan(status);
2930 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
2933 #ifdef SOFTFLOAT_68K
2935 *s = (aSign != bSign);
2940 if ( (uint64_t) ( aSig0<<1 ) == 0 ) return a;
2942 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
2944 bSig |= LIT64( 0x8000000000000000 );
2946 expDiff = aExp - bExp;
2947 *s = (aSign != bSign);
2949 if ( expDiff < 0 ) {
2950 if ( expDiff < -1 ) return a;
2951 shift128Right( aSig0, 0, 1, &aSig0, &aSig1 );
2954 qTemp = ( bSig <= aSig0 );
2955 if ( qTemp ) aSig0 -= bSig;
2956 *q = ( expDiff > 63 ) ? 0 : ( qTemp<<expDiff );
2958 while ( 0 < expDiff ) {
2959 qTemp = estimateDiv128To64( aSig0, aSig1, bSig );
2960 qTemp = ( 2 < qTemp ) ? qTemp - 2 : 0;
2961 mul64To128( bSig, qTemp, &term0, &term1 );
2962 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
2963 shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
2964 *q = ( expDiff > 63 ) ? 0 : ( qTemp<<expDiff );
2968 if ( 0 < expDiff ) {
2969 qTemp = estimateDiv128To64( aSig0, aSig1, bSig );
2970 qTemp = ( 2 < qTemp ) ? qTemp - 2 : 0;
2971 qTemp >>= 64 - expDiff;
2972 mul64To128( bSig, qTemp<<( 64 - expDiff ), &term0, &term1 );
2973 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
2974 shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
2975 while ( le128( term0, term1, aSig0, aSig1 ) ) {
2977 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
2985 sub128( term0, term1, aSig0, aSig1, &alternateASig0, &alternateASig1 );
2986 if ( lt128( alternateASig0, alternateASig1, aSig0, aSig1 )
2987 || ( eq128( alternateASig0, alternateASig1, aSig0, aSig1 )
2990 aSig0 = alternateASig0;
2991 aSig1 = alternateASig1;
2996 normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
2997 zSign, bExp + expDiff, aSig0, aSig1, status );
3000 #endif // End of modification
3003 #ifdef SOFTFLOAT_68K // 08-01-2017: Added for Previous
3004 /*----------------------------------------------------------------------------
3005 | Returns the modulo remainder of the extended double-precision floating-point
3006 | value `a' with respect to the corresponding value `b'.
3007 *----------------------------------------------------------------------------*/
3009 floatx80 floatx80_mod( floatx80 a, floatx80 b, uint64_t *q, flag *s, float_status *status )
3011 flag aSign, bSign, zSign;
3012 int32_t aExp, bExp, expDiff;
3013 uint64_t aSig0, aSig1, bSig;
3014 uint64_t qTemp, term0, term1;
3016 aSig0 = extractFloatx80Frac( a );
3017 aExp = extractFloatx80Exp( a );
3018 aSign = extractFloatx80Sign( a );
3019 bSig = extractFloatx80Frac( b );
3020 bExp = extractFloatx80Exp( b );
3021 bSign = extractFloatx80Sign( b );
3023 if ( aExp == 0x7FFF ) {
3024 if ( (uint64_t) ( aSig0<<1 )
3025 || ( ( bExp == 0x7FFF ) && (uint64_t) ( bSig<<1 ) ) ) {
3026 return propagateFloatx80NaN( a, b, status );
3030 if ( bExp == 0x7FFF ) {
3031 if ( (uint64_t) ( bSig<<1 ) ) return propagateFloatx80NaN( a, b, status );
3032 *s = (aSign != bSign);
3039 float_raise( float_flag_invalid, status );
3040 return floatx80_default_nan(status);
3042 normalizeFloatx80Subnormal( bSig, &bExp, &bSig );
3045 #ifdef SOFTFLOAT_68K
3047 *s = (aSign != bSign);
3052 if ( (uint64_t) ( aSig0<<1 ) == 0 ) return a;
3054 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
3056 bSig |= LIT64( 0x8000000000000000 );
3058 expDiff = aExp - bExp;
3059 *s = (aSign != bSign);
3061 if ( expDiff < 0 ) return a;
3062 qTemp = ( bSig <= aSig0 );
3063 if ( qTemp ) aSig0 -= bSig;
3064 *q = ( expDiff > 63 ) ? 0 : ( qTemp<<expDiff );
3066 while ( 0 < expDiff ) {
3067 qTemp = estimateDiv128To64( aSig0, aSig1, bSig );
3068 qTemp = ( 2 < qTemp ) ? qTemp - 2 : 0;
3069 mul64To128( bSig, qTemp, &term0, &term1 );
3070 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
3071 shortShift128Left( aSig0, aSig1, 62, &aSig0, &aSig1 );
3072 *q = ( expDiff > 63 ) ? 0 : ( qTemp<<expDiff );
3076 if ( 0 < expDiff ) {
3077 qTemp = estimateDiv128To64( aSig0, aSig1, bSig );
3078 qTemp = ( 2 < qTemp ) ? qTemp - 2 : 0;
3079 qTemp >>= 64 - expDiff;
3080 mul64To128( bSig, qTemp<<( 64 - expDiff ), &term0, &term1 );
3081 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
3082 shortShift128Left( 0, bSig, 64 - expDiff, &term0, &term1 );
3083 while ( le128( term0, term1, aSig0, aSig1 ) ) {
3085 sub128( aSig0, aSig1, term0, term1, &aSig0, &aSig1 );
3090 normalizeRoundAndPackFloatx80(status->floatx80_rounding_precision,
3091 zSign, bExp + expDiff, aSig0, aSig1, status );
3094 #endif // end of addition for Previous
3097 /*----------------------------------------------------------------------------
3098 | Returns the square root of the extended double-precision floating-point
3099 | value `a'. The operation is performed according to the IEC/IEEE Standard
3100 | for Binary Floating-Point Arithmetic.
3101 *----------------------------------------------------------------------------*/
3103 floatx80 floatx80_sqrt(floatx80 a, float_status *status)
3107 uint64_t aSig0, aSig1, zSig0, zSig1, doubleZSig0;
3108 uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
3110 if (floatx80_invalid_encoding(a)) {
3111 float_raise(float_flag_invalid, status);
3112 return floatx80_default_nan(status);
3114 aSig0 = extractFloatx80Frac( a );
3115 aExp = extractFloatx80Exp( a );
3116 aSign = extractFloatx80Sign( a );
3117 if ( aExp == 0x7FFF ) {
3118 if ((uint64_t)(aSig0 << 1))
3119 return propagateFloatx80NaNOneArg(a, status);
3120 if (!aSign) return inf_clear_intbit(status) ? packFloatx80(aSign, aExp, 0) : a;
3124 if ( ( aExp | aSig0 ) == 0 ) return a;
3126 float_raise(float_flag_invalid, status);
3127 return floatx80_default_nan(status);
3130 if ( aSig0 == 0 ) return packFloatx80( 0, 0, 0 );
3131 normalizeFloatx80Subnormal( aSig0, &aExp, &aSig0 );
3133 zExp = ( ( aExp - 0x3FFF )>>1 ) + 0x3FFF;
3134 zSig0 = estimateSqrt32( aExp, aSig0>>32 );
3135 shift128Right( aSig0, 0, 2 + ( aExp & 1 ), &aSig0, &aSig1 );
3136 zSig0 = estimateDiv128To64( aSig0, aSig1, zSig0<<32 ) + ( zSig0<<30 );
3137 doubleZSig0 = zSig0<<1;
3138 mul64To128( zSig0, zSig0, &term0, &term1 );
3139 sub128( aSig0, aSig1, term0, term1, &rem0, &rem1 );
3140 while ( (int64_t) rem0 < 0 ) {
3143 add128( rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1 );
3145 zSig1 = estimateDiv128To64( rem1, 0, doubleZSig0 );
3146 if ( ( zSig1 & LIT64( 0x3FFFFFFFFFFFFFFF ) ) <= 5 ) {
3147 if ( zSig1 == 0 ) zSig1 = 1;
3148 mul64To128( doubleZSig0, zSig1, &term1, &term2 );
3149 sub128( rem1, 0, term1, term2, &rem1, &rem2 );
3150 mul64To128( zSig1, zSig1, &term2, &term3 );
3151 sub192( rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3 );
3152 while ( (int64_t) rem1 < 0 ) {
3154 shortShift128Left( 0, zSig1, 1, &term2, &term3 );
3156 term2 |= doubleZSig0;
3157 add192( rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3 );
3159 zSig1 |= ( ( rem1 | rem2 | rem3 ) != 0 );
3161 shortShift128Left( 0, zSig1, 1, &zSig0, &zSig1 );
3162 zSig0 |= doubleZSig0;
3163 return roundAndPackFloatx80(status->floatx80_rounding_precision,
3164 0, zExp, zSig0, zSig1, status);
3168 #ifdef SOFTFLOAT_68K // 07-01-2017: Added for Previous
3169 /*----------------------------------------------------------------------------
3170 | Returns the mantissa of the extended double-precision floating-point
3172 *----------------------------------------------------------------------------*/
3174 floatx80 floatx80_getman( floatx80 a, float_status *status)
3180 aSig = extractFloatx80Frac( a );
3181 aExp = extractFloatx80Exp( a );
3182 aSign = extractFloatx80Sign( a );
3184 if ( aExp == 0x7FFF ) {
3185 if ( (uint64_t) ( aSig<<1 ) ) return propagateFloatx80NaNOneArg( a, status );
3186 float_raise( float_flag_invalid, status );
3187 return floatx80_default_nan(status);
3191 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
3192 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
3195 return packFloatx80(aSign, 0x3fff, aSig);
3198 /*----------------------------------------------------------------------------
3199 | Returns the exponent of the extended double-precision floating-point
3200 | value `a' as an extended double-precision value.
3201 *----------------------------------------------------------------------------*/
3203 floatx80 floatx80_getexp( floatx80 a, float_status *status)
3209 aSig = extractFloatx80Frac( a );
3210 aExp = extractFloatx80Exp( a );
3211 aSign = extractFloatx80Sign( a );
3213 if ( aExp == 0x7FFF ) {
3214 if ( (uint64_t) ( aSig<<1 ) ) return propagateFloatx80NaNOneArg( a, status );
3215 float_raise( float_flag_invalid, status );
3216 return floatx80_default_nan(status);
3220 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
3221 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
3224 return int32_to_floatx80(aExp - 0x3FFF);
3227 /*----------------------------------------------------------------------------
3228 | Scales extended double-precision floating-point value in operand `a' by
3229 | value `b'. The function truncates the value in the second operand 'b' to
3230 | an integral value and adds that value to the exponent of the operand 'a'.
3231 | The operation performed according to the IEC/IEEE Standard for Binary
3232 | Floating-Point Arithmetic.
3233 *----------------------------------------------------------------------------*/
3235 floatx80 floatx80_scale(floatx80 a, floatx80 b, float_status *status)
3238 int32_t aExp, bExp, shiftCount;
3239 uint64_t aSig, bSig;
3241 aSig = extractFloatx80Frac(a);
3242 aExp = extractFloatx80Exp(a);
3243 aSign = extractFloatx80Sign(a);
3244 bSig = extractFloatx80Frac(b);
3245 bExp = extractFloatx80Exp(b);
3246 bSign = extractFloatx80Sign(b);
3248 if ( bExp == 0x7FFF ) {
3249 if ( (uint64_t) ( bSig<<1 ) ||
3250 ( ( aExp == 0x7FFF ) && (uint64_t) ( aSig<<1 ) ) ) {
3251 return propagateFloatx80NaN( a, b, status );
3253 float_raise( float_flag_invalid, status );
3254 return floatx80_default_nan(status);
3256 if ( aExp == 0x7FFF ) {
3257 if ( (uint64_t) ( aSig<<1 ) ) return propagateFloatx80NaN( a, b, status );
3261 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0);
3262 if ( bExp < 0x3FFF ) return a;
3263 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
3266 if (bExp < 0x3FFF) {
3267 return roundAndPackFloatx80(
3268 status->floatx80_rounding_precision, aSign, aExp, aSig, 0, status);
3271 if ( 0x400F < bExp ) {
3272 aExp = bSign ? -0x6001 : 0xE000;
3273 return roundAndPackFloatx80(
3274 status->floatx80_rounding_precision, aSign, aExp, aSig, 0, status );
3277 shiftCount = 0x403E - bExp;
3278 bSig >>= shiftCount;
3279 aExp = bSign ? ( aExp - bSig ) : ( aExp + bSig );
3281 return roundAndPackFloatx80(
3282 status->floatx80_rounding_precision, aSign, aExp, aSig, 0, status);
3286 /*-----------------------------------------------------------------------------
3287 | Calculates the absolute value of the extended double-precision floating-point
3288 | value `a'. The operation is performed according to the IEC/IEEE Standard
3289 | for Binary Floating-Point Arithmetic.
3290 *----------------------------------------------------------------------------*/
3292 floatx80 floatx80_abs(floatx80 a, float_status *status)
3297 aSig = extractFloatx80Frac(a);
3298 aExp = extractFloatx80Exp(a);
3300 if ( aExp == 0x7FFF ) {
3301 if ( (uint64_t) ( aSig<<1 ) )
3302 return propagateFloatx80NaNOneArg( a, status );
3303 if (inf_clear_intbit(status)) aSig = 0;
3304 return packFloatx80(0, aExp, aSig);
3308 if ( aSig == 0 ) return packFloatx80( 0, 0, 0 );
3309 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
3312 return roundAndPackFloatx80(
3313 status->floatx80_rounding_precision, 0, aExp, aSig, 0, status );
3317 /*-----------------------------------------------------------------------------
3318 | Changes the sign of the extended double-precision floating-point value 'a'.
3319 | The operation is performed according to the IEC/IEEE Standard for Binary
3320 | Floating-Point Arithmetic.
3321 *----------------------------------------------------------------------------*/
3323 floatx80 floatx80_neg(floatx80 a, float_status *status)
3329 aSig = extractFloatx80Frac(a);
3330 aExp = extractFloatx80Exp(a);
3331 aSign = extractFloatx80Sign(a);
3333 if ( aExp == 0x7FFF ) {
3334 if ( (uint64_t) ( aSig<<1 ) )
3335 return propagateFloatx80NaNOneArg( a, status );
3336 if (inf_clear_intbit(status)) aSig = 0;
3337 return packFloatx80(!aSign, aExp, aSig);
3343 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
3344 normalizeFloatx80Subnormal( aSig, &aExp, &aSig );
3347 return roundAndPackFloatx80(
3348 status->floatx80_rounding_precision, aSign, aExp, aSig, 0, status );
3352 /*----------------------------------------------------------------------------
3353 | Returns the result of comparing the extended double-precision floating-
3354 | point values `a' and `b'. The result is abstracted for matching the
3355 | corresponding condition codes.
3356 *----------------------------------------------------------------------------*/
3358 floatx80 floatx80_cmp( floatx80 a, floatx80 b, float_status *status )
3362 uint64_t aSig, bSig;
3364 aSig = extractFloatx80Frac( a );
3365 aExp = extractFloatx80Exp( a );
3366 aSign = extractFloatx80Sign( a );
3367 bSig = extractFloatx80Frac( b );
3368 bExp = extractFloatx80Exp( b );
3369 bSign = extractFloatx80Sign( b );
3371 if ( ( aExp == 0x7FFF && (uint64_t) ( aSig<<1 ) ) ||
3372 ( bExp == 0x7FFF && (uint64_t) ( bSig<<1 ) ) ) {
3373 // 68040 FCMP -NaN return N flag set
3374 if (fcmp_signed_nan(status))
3375 return propagateFloatx80NaN(a, b, status );
3376 return propagateFloatx80NaN(packFloatx80(0, aExp, aSig),
3377 packFloatx80(0, bExp, bSig), status);
3380 if ( bExp < aExp ) return packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
3381 if ( aExp < bExp ) return packFloatx80( bSign ^ 1, 0x3FFF, LIT64( 0x8000000000000000 ) );
3383 if ( aExp == 0x7FFF ) {
3384 if ( aSign == bSign ) return packFloatx80( aSign, 0, 0 );
3385 return packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
3388 if ( bSig < aSig ) return packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
3389 if ( aSig < bSig ) return packFloatx80( bSign ^ 1, 0x3FFF, LIT64( 0x8000000000000000 ) );
3391 if ( aSig == 0 ) return packFloatx80( aSign, 0, 0 );
3393 if ( aSign == bSign ) return packFloatx80( 0, 0, 0 );
3395 return packFloatx80( aSign, 0x3FFF, LIT64( 0x8000000000000000 ) );
3399 floatx80 floatx80_tst( floatx80 a, float_status *status )
3404 aSig = extractFloatx80Frac( a );
3405 aExp = extractFloatx80Exp( a );
3407 if ( aExp == 0x7FFF && (uint64_t) ( aSig<<1 ) )
3408 return propagateFloatx80NaNOneArg( a, status );
3412 floatx80 floatx80_move( floatx80 a, float_status *status )
3418 aSig = extractFloatx80Frac( a );
3419 aExp = extractFloatx80Exp( a );
3420 aSign = extractFloatx80Sign( a );
3422 if ( aExp == 0x7FFF ) {
3423 if ((uint64_t)(aSig << 1)) return propagateFloatx80NaNOneArg(a, status);
3424 return inf_clear_intbit(status) ? packFloatx80(aSign, aExp, 0) : a;
3427 if ( aSig == 0 ) return a;
3428 return normalizeRoundAndPackFloatx80( status->floatx80_rounding_precision, aSign, aExp, aSig, 0, status );
3430 return roundAndPackFloatx80( status->floatx80_rounding_precision, aSign, aExp, aSig, 0, status );
3433 floatx80 floatx80_denormalize( floatx80 a, flag eSign)
3440 aSig = extractFloatx80Frac( a );
3441 aExp = extractFloatx80Exp( a );
3442 aSign = extractFloatx80Sign( a );
3445 shiftCount = 0x8000 - aExp;
3447 if (shiftCount > 63) {
3450 aSig >>= shiftCount;
3453 return packFloatx80(aSign, aExp, aSig);
3456 #endif // End of addition for Previous
3458 /*----------------------------------------------------------------------------
3459 | Returns 1 if the extended double-precision floating-point value `a' is
3460 | equal to the corresponding value `b', and 0 otherwise. The comparison is
3461 | performed according to the IEC/IEEE Standard for Binary Floating-Point
3463 *----------------------------------------------------------------------------*/
3465 flag floatx80_eq( floatx80 a, floatx80 b, float_status *status )
3467 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
3468 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
3469 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
3470 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
3472 if ( floatx80_is_signaling_nan( a )
3473 || floatx80_is_signaling_nan( b ) ) {
3474 float_raise( float_flag_invalid, status );
3480 && ( ( a.high == b.high )
3482 && ( (uint16_t) ( ( a.high | b.high )<<1 ) == 0 ) )
3487 /*----------------------------------------------------------------------------
3488 | Returns 1 if the extended double-precision floating-point value `a' is
3489 | less than or equal to the corresponding value `b', and 0 otherwise. The
3490 | comparison is performed according to the IEC/IEEE Standard for Binary
3491 | Floating-Point Arithmetic.
3492 *----------------------------------------------------------------------------*/
3494 flag floatx80_le( floatx80 a, floatx80 b, float_status *status )
3498 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
3499 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
3500 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
3501 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
3503 float_raise( float_flag_invalid, status );
3506 aSign = extractFloatx80Sign( a );
3507 bSign = extractFloatx80Sign( b );
3508 if ( aSign != bSign ) {
3511 || ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
3515 aSign ? le128( b.high, b.low, a.high, a.low )
3516 : le128( a.high, a.low, b.high, b.low );
3519 /*----------------------------------------------------------------------------
3520 | Returns 1 if the extended double-precision floating-point value `a' is
3521 | less than the corresponding value `b', and 0 otherwise. The comparison
3522 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
3524 *----------------------------------------------------------------------------*/
3526 flag floatx80_lt( floatx80 a, floatx80 b, float_status *status )
3530 if ( ( ( extractFloatx80Exp( a ) == 0x7FFF )
3531 && (uint64_t) ( extractFloatx80Frac( a )<<1 ) )
3532 || ( ( extractFloatx80Exp( b ) == 0x7FFF )
3533 && (uint64_t) ( extractFloatx80Frac( b )<<1 ) )
3535 float_raise( float_flag_invalid, status );
3538 aSign = extractFloatx80Sign( a );
3539 bSign = extractFloatx80Sign( b );
3540 if ( aSign != bSign ) {
3543 && ( ( ( (uint16_t) ( ( a.high | b.high )<<1 ) ) | a.low | b.low )
3547 aSign ? lt128( b.high, b.low, a.high, a.low )
3548 : lt128( a.high, a.low, b.high, b.low );
3553 /*----------------------------------------------------------------------------
3554 | Returns the result of converting the 64-bit two's complement integer `a'
3555 | to the extended double-precision floating-point format. The conversion
3556 | is performed according to the IEC/IEEE Standard for Binary Floating-Point
3558 *----------------------------------------------------------------------------*/
3560 floatx80 int64_to_floatx80( int64_t a )
3566 if ( a == 0 ) return packFloatx80( 0, 0, 0 );
3568 absA = zSign ? - a : a;
3569 shiftCount = countLeadingZeros64( absA );
3570 return packFloatx80( zSign, 0x403E - shiftCount, absA<<shiftCount );