2 /*============================================================================
4 This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
5 Arithmetic Package, Release 2b.
7 Written by John R. Hauser. This work was made possible in part by the
8 International Computer Science Institute, located at Suite 600, 1947 Center
9 Street, Berkeley, California 94704. Funding was partially provided by the
10 National Science Foundation under grant MIP-9311980. The original version
11 of this code was written as part of a project to build a fixed-point vector
12 processor in collaboration with the University of California at Berkeley,
13 overseen by Profs. Nelson Morgan and John Wawrzynek. More information
14 is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
15 arithmetic/SoftFloat.html'.
17 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has
18 been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
19 RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
20 AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
21 COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
22 EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
23 INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
24 OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
26 Derivative works are acceptable, even for commercial purposes, so long as
27 (1) the source code for the derivative work includes prominent notice that
28 the work is derivative, and (2) the source code includes prominent notice with
29 these four paragraphs for those parts of this code that are retained.
31 =============================================================================*/
33 /*----------------------------------------------------------------------------
34 | Underflow tininess-detection mode, statically initialized to default value.
35 | (The declaration in `softfloat.h' must match the `int8' type here.)
36 *----------------------------------------------------------------------------*/
37 int8 float_detect_tininess = float_tininess_after_rounding;
39 /*----------------------------------------------------------------------------
40 | Raises the exceptions specified by `flags'. Floating-point traps can be
41 | defined here if desired. It is currently not possible for such a trap to
42 | substitute a result value. If traps are not implemented, this routine
43 | should be simply `float_exception_flags |= flags;'.
44 *----------------------------------------------------------------------------*/
46 void float_raise( int8 flags )
49 float_exception_flags |= flags;
53 /*----------------------------------------------------------------------------
54 | Internal canonical NaN format.
55 *----------------------------------------------------------------------------*/
61 /*----------------------------------------------------------------------------
62 | The pattern for a default generated single-precision NaN.
63 *----------------------------------------------------------------------------*/
64 #define float32_default_nan 0xFFFFFFFF
66 /*----------------------------------------------------------------------------
67 | Returns 1 if the single-precision floating-point value `a' is a NaN;
68 | otherwise returns 0.
69 *----------------------------------------------------------------------------*/
71 flag float32_is_nan( float32 a )
74 return ( 0xFF000000 < (bits32) ( a<<1 ) );
78 /*----------------------------------------------------------------------------
79 | Returns 1 if the single-precision floating-point value `a' is a signaling
80 | NaN; otherwise returns 0.
81 *----------------------------------------------------------------------------*/
83 flag float32_is_signaling_nan( float32 a )
86 return ( ( ( a>>22 ) & 0x1FF ) == 0x1FE ) && ( a & 0x003FFFFF );
90 /*----------------------------------------------------------------------------
91 | Returns the result of converting the single-precision floating-point NaN
92 | `a' to the canonical NaN format. If `a' is a signaling NaN, the invalid
93 | exception is raised.
94 *----------------------------------------------------------------------------*/
96 static commonNaNT float32ToCommonNaN( float32 a )
100 if ( float32_is_signaling_nan( a ) ) float_raise( float_flag_invalid );
103 z.high = ( (bits64) a )<<41;
108 /*----------------------------------------------------------------------------
109 | Returns the result of converting the canonical NaN `a' to the single-
110 | precision floating-point format.
111 *----------------------------------------------------------------------------*/
113 static float32 commonNaNToFloat32( commonNaNT a )
116 return ( ( (bits32) a.sign )<<31 ) | 0x7FC00000 | ( a.high>>41 );
120 /*----------------------------------------------------------------------------
121 | Takes two single-precision floating-point values `a' and `b', one of which
122 | is a NaN, and returns the appropriate NaN result. If either `a' or `b' is a
123 | signaling NaN, the invalid exception is raised.
124 *----------------------------------------------------------------------------*/
126 static float32 propagateFloat32NaN( float32 a, float32 b )
128 flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
130 aIsNaN = float32_is_nan( a );
131 aIsSignalingNaN = float32_is_signaling_nan( a );
132 bIsNaN = float32_is_nan( b );
133 bIsSignalingNaN = float32_is_signaling_nan( b );
136 if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid );
138 return ( aIsSignalingNaN & bIsNaN ) ? b : a;
146 /*----------------------------------------------------------------------------
147 | The pattern for a default generated double-precision NaN.
148 *----------------------------------------------------------------------------*/
149 #define float64_default_nan LIT64( 0xFFFFFFFFFFFFFFFF )
151 /*----------------------------------------------------------------------------
152 | Returns 1 if the double-precision floating-point value `a' is a NaN;
153 | otherwise returns 0.
154 *----------------------------------------------------------------------------*/
156 flag float64_is_nan( float64 a )
159 return ( LIT64( 0xFFE0000000000000 ) < (bits64) ( a<<1 ) );
163 /*----------------------------------------------------------------------------
164 | Returns 1 if the double-precision floating-point value `a' is a signaling
165 | NaN; otherwise returns 0.
166 *----------------------------------------------------------------------------*/
168 flag float64_is_signaling_nan( float64 a )
172 ( ( ( a>>51 ) & 0xFFF ) == 0xFFE )
173 && ( a & LIT64( 0x0007FFFFFFFFFFFF ) );
177 /*----------------------------------------------------------------------------
178 | Returns the result of converting the double-precision floating-point NaN
179 | `a' to the canonical NaN format. If `a' is a signaling NaN, the invalid
180 | exception is raised.
181 *----------------------------------------------------------------------------*/
183 static commonNaNT float64ToCommonNaN( float64 a )
187 if ( float64_is_signaling_nan( a ) ) float_raise( float_flag_invalid );
195 /*----------------------------------------------------------------------------
196 | Returns the result of converting the canonical NaN `a' to the double-
197 | precision floating-point format.
198 *----------------------------------------------------------------------------*/
200 static float64 commonNaNToFloat64( commonNaNT a )
204 ( ( (bits64) a.sign )<<63 )
205 | LIT64( 0x7FF8000000000000 )
210 /*----------------------------------------------------------------------------
211 | Takes two double-precision floating-point values `a' and `b', one of which
212 | is a NaN, and returns the appropriate NaN result. If either `a' or `b' is a
213 | signaling NaN, the invalid exception is raised.
214 *----------------------------------------------------------------------------*/
216 static float64 propagateFloat64NaN( float64 a, float64 b )
218 flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
220 aIsNaN = float64_is_nan( a );
221 aIsSignalingNaN = float64_is_signaling_nan( a );
222 bIsNaN = float64_is_nan( b );
223 bIsSignalingNaN = float64_is_signaling_nan( b );
224 a |= LIT64( 0x0008000000000000 );
225 b |= LIT64( 0x0008000000000000 );
226 if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid );
228 return ( aIsSignalingNaN & bIsNaN ) ? b : a;
238 /*----------------------------------------------------------------------------
239 | The pattern for a default generated extended double-precision NaN. The
240 | `high' and `low' values hold the most- and least-significant bits,
242 *----------------------------------------------------------------------------*/
243 #define floatx80_default_nan_high 0xFFFF
244 #define floatx80_default_nan_low LIT64( 0xFFFFFFFFFFFFFFFF )
246 /*----------------------------------------------------------------------------
247 | Returns 1 if the extended double-precision floating-point value `a' is a
248 | NaN; otherwise returns 0.
249 *----------------------------------------------------------------------------*/
251 flag floatx80_is_nan( floatx80 a )
254 return ( ( a.high & 0x7FFF ) == 0x7FFF ) && (bits64) ( a.low<<1 );
258 /*----------------------------------------------------------------------------
259 | Returns 1 if the extended double-precision floating-point value `a' is a
260 | signaling NaN; otherwise returns 0.
261 *----------------------------------------------------------------------------*/
263 flag floatx80_is_signaling_nan( floatx80 a )
267 aLow = a.low & ~ LIT64( 0x4000000000000000 );
269 ( ( a.high & 0x7FFF ) == 0x7FFF )
270 && (bits64) ( aLow<<1 )
271 && ( a.low == aLow );
275 /*----------------------------------------------------------------------------
276 | Returns the result of converting the extended double-precision floating-
277 | point NaN `a' to the canonical NaN format. If `a' is a signaling NaN, the
278 | invalid exception is raised.
279 *----------------------------------------------------------------------------*/
281 static commonNaNT floatx80ToCommonNaN( floatx80 a )
285 if ( floatx80_is_signaling_nan( a ) ) float_raise( float_flag_invalid );
293 /*----------------------------------------------------------------------------
294 | Returns the result of converting the canonical NaN `a' to the extended
295 | double-precision floating-point format.
296 *----------------------------------------------------------------------------*/
298 static floatx80 commonNaNToFloatx80( commonNaNT a )
302 z.low = LIT64( 0xC000000000000000 ) | ( a.high>>1 );
303 z.high = ( ( (bits16) a.sign )<<15 ) | 0x7FFF;
308 /*----------------------------------------------------------------------------
309 | Takes two extended double-precision floating-point values `a' and `b', one
310 | of which is a NaN, and returns the appropriate NaN result. If either `a' or
311 | `b' is a signaling NaN, the invalid exception is raised.
312 *----------------------------------------------------------------------------*/
314 floatx80 propagateFloatx80NaN( floatx80 a, floatx80 b )
316 flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
318 aIsNaN = floatx80_is_nan( a );
319 aIsSignalingNaN = floatx80_is_signaling_nan( a );
320 bIsNaN = floatx80_is_nan( b );
321 bIsSignalingNaN = floatx80_is_signaling_nan( b );
322 a.low |= LIT64( 0xC000000000000000 );
323 b.low |= LIT64( 0xC000000000000000 );
324 if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid );
326 return ( aIsSignalingNaN & bIsNaN ) ? b : a;
334 #define EXP_BIAS 0x3FFF
336 /*----------------------------------------------------------------------------
337 | Returns the fraction bits of the extended double-precision floating-point
339 *----------------------------------------------------------------------------*/
341 static inline bits64 extractFloatx80Frac( floatx80 a )
348 /*----------------------------------------------------------------------------
349 | Returns the exponent bits of the extended double-precision floating-point
351 *----------------------------------------------------------------------------*/
353 static inline int32 extractFloatx80Exp( floatx80 a )
356 return a.high & 0x7FFF;
360 /*----------------------------------------------------------------------------
361 | Returns the sign bit of the extended double-precision floating-point value
363 *----------------------------------------------------------------------------*/
365 static inline flag extractFloatx80Sign( floatx80 a )
376 /*----------------------------------------------------------------------------
377 | The pattern for a default generated quadruple-precision NaN. The `high' and
378 | `low' values hold the most- and least-significant bits, respectively.
379 *----------------------------------------------------------------------------*/
380 #define float128_default_nan_high LIT64( 0xFFFFFFFFFFFFFFFF )
381 #define float128_default_nan_low LIT64( 0xFFFFFFFFFFFFFFFF )
383 /*----------------------------------------------------------------------------
384 | Returns 1 if the quadruple-precision floating-point value `a' is a NaN;
385 | otherwise returns 0.
386 *----------------------------------------------------------------------------*/
388 flag float128_is_nan( float128 a )
392 ( LIT64( 0xFFFE000000000000 ) <= (bits64) ( a.high<<1 ) )
393 && ( a.low || ( a.high & LIT64( 0x0000FFFFFFFFFFFF ) ) );
397 /*----------------------------------------------------------------------------
398 | Returns 1 if the quadruple-precision floating-point value `a' is a
399 | signaling NaN; otherwise returns 0.
400 *----------------------------------------------------------------------------*/
402 flag float128_is_signaling_nan( float128 a )
406 ( ( ( a.high>>47 ) & 0xFFFF ) == 0xFFFE )
407 && ( a.low || ( a.high & LIT64( 0x00007FFFFFFFFFFF ) ) );
411 /*----------------------------------------------------------------------------
412 | Returns the result of converting the quadruple-precision floating-point NaN
413 | `a' to the canonical NaN format. If `a' is a signaling NaN, the invalid
414 | exception is raised.
415 *----------------------------------------------------------------------------*/
417 static commonNaNT float128ToCommonNaN( float128 a )
421 if ( float128_is_signaling_nan( a ) ) float_raise( float_flag_invalid );
423 shortShift128Left( a.high, a.low, 16, &z.high, &z.low );
428 /*----------------------------------------------------------------------------
429 | Returns the result of converting the canonical NaN `a' to the quadruple-
430 | precision floating-point format.
431 *----------------------------------------------------------------------------*/
433 static float128 commonNaNToFloat128( commonNaNT a )
437 shift128Right( a.high, a.low, 16, &z.high, &z.low );
438 z.high |= ( ( (bits64) a.sign )<<63 ) | LIT64( 0x7FFF800000000000 );
443 /*----------------------------------------------------------------------------
444 | Takes two quadruple-precision floating-point values `a' and `b', one of
445 | which is a NaN, and returns the appropriate NaN result. If either `a' or
446 | `b' is a signaling NaN, the invalid exception is raised.
447 *----------------------------------------------------------------------------*/
449 static float128 propagateFloat128NaN( float128 a, float128 b )
451 flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
453 aIsNaN = float128_is_nan( a );
454 aIsSignalingNaN = float128_is_signaling_nan( a );
455 bIsNaN = float128_is_nan( b );
456 bIsSignalingNaN = float128_is_signaling_nan( b );
457 a.high |= LIT64( 0x0000800000000000 );
458 b.high |= LIT64( 0x0000800000000000 );
459 if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid );
461 return ( aIsSignalingNaN & bIsNaN ) ? b : a;