git.sesse.net Git - pistorm/blob - softfloat/softfloat-specialize

   1
   2 /*============================================================================
   3
   4 This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
   5 Arithmetic Package, Release 2b.
   6
   7 Written by John R. Hauser.  This work was made possible in part by the
   8 International Computer Science Institute, located at Suite 600, 1947 Center
   9 Street, Berkeley, California 94704.  Funding was partially provided by the
  10 National Science Foundation under grant MIP-9311980.  The original version
  11 of this code was written as part of a project to build a fixed-point vector
  12 processor in collaboration with the University of California at Berkeley,
  13 overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
  14 is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
  15 arithmetic/SoftFloat.html'.
  16
  17 THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
  18 been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
  19 RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
  20 AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
  21 COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
  22 EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
  23 INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
  24 OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
  25
  26 Derivative works are acceptable, even for commercial purposes, so long as
  27 (1) the source code for the derivative work includes prominent notice that
  28 the work is derivative, and (2) the source code includes prominent notice with
  29 these four paragraphs for those parts of this code that are retained.
  30
  31 =============================================================================*/
  32
  33 /*----------------------------------------------------------------------------
  34 | Underflow tininess-detection mode, statically initialized to default value.
  35 | (The declaration in `softfloat.h' must match the `int8' type here.)
  36 *----------------------------------------------------------------------------*/
  37 int8 float_detect_tininess = float_tininess_after_rounding;
  38
  39 /*----------------------------------------------------------------------------
  40 | Raises the exceptions specified by `flags'.  Floating-point traps can be
  41 | defined here if desired.  It is currently not possible for such a trap to
  42 | substitute a result value.  If traps are not implemented, this routine
  43 | should be simply `float_exception_flags |= flags;'.
  44 *----------------------------------------------------------------------------*/
  45
  46 void float_raise( int8 flags )
  47 {
  48
  49     float_exception_flags |= flags;
  50
  51 }
  52
  53 /*----------------------------------------------------------------------------
  54 | Internal canonical NaN format.
  55 *----------------------------------------------------------------------------*/
  56 typedef struct {
  57     flag sign;
  58     bits64 high, low;
  59 } commonNaNT;
  60
  61 /*----------------------------------------------------------------------------
  62 | The pattern for a default generated single-precision NaN.
  63 *----------------------------------------------------------------------------*/
  64 #define float32_default_nan 0xFFFFFFFF
  65
  66 /*----------------------------------------------------------------------------
  67 | Returns 1 if the single-precision floating-point value `a' is a NaN;
  68 | otherwise returns 0.
  69 *----------------------------------------------------------------------------*/
  70
  71 flag float32_is_nan( float32 a )
  72 {
  73
  74     return ( 0xFF000000 < (bits32) ( a<<1 ) );
  75
  76 }
  77
  78 /*----------------------------------------------------------------------------
  79 | Returns 1 if the single-precision floating-point value `a' is a signaling
  80 | NaN; otherwise returns 0.
  81 *----------------------------------------------------------------------------*/
  82
  83 flag float32_is_signaling_nan( float32 a )
  84 {
  85
  86     return ( ( ( a>>22 ) & 0x1FF ) == 0x1FE ) && ( a & 0x003FFFFF );
  87
  88 }
  89
  90 /*----------------------------------------------------------------------------
  91 | Returns the result of converting the single-precision floating-point NaN
  92 | `a' to the canonical NaN format.  If `a' is a signaling NaN, the invalid
  93 | exception is raised.
  94 *----------------------------------------------------------------------------*/
  95
  96 static commonNaNT float32ToCommonNaN( float32 a )
  97 {
  98     commonNaNT z;
  99
 100     if ( float32_is_signaling_nan( a ) ) float_raise( float_flag_invalid );
 101     z.sign = a>>31;
 102     z.low = 0;
 103     z.high = ( (bits64) a )<<41;
 104     return z;
 105
 106 }
 107
 108 /*----------------------------------------------------------------------------
 109 | Returns the result of converting the canonical NaN `a' to the single-
 110 | precision floating-point format.
 111 *----------------------------------------------------------------------------*/
 112
 113 static float32 commonNaNToFloat32( commonNaNT a )
 114 {
 115
 116     return ( ( (bits32) a.sign )<<31 ) | 0x7FC00000 | ( a.high>>41 );
 117
 118 }
 119
 120 /*----------------------------------------------------------------------------
 121 | Takes two single-precision floating-point values `a' and `b', one of which
 122 | is a NaN, and returns the appropriate NaN result.  If either `a' or `b' is a
 123 | signaling NaN, the invalid exception is raised.
 124 *----------------------------------------------------------------------------*/
 125
 126 static float32 propagateFloat32NaN( float32 a, float32 b )
 127 {
 128     flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
 129
 130     aIsNaN = float32_is_nan( a );
 131     aIsSignalingNaN = float32_is_signaling_nan( a );
 132     bIsNaN = float32_is_nan( b );
 133     bIsSignalingNaN = float32_is_signaling_nan( b );
 134     a |= 0x00400000;
 135     b |= 0x00400000;
 136     if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid );
 137     if ( aIsNaN ) {
 138         return ( aIsSignalingNaN & bIsNaN ) ? b : a;
 139     }
 140     else {
 141         return b;
 142     }
 143
 144 }
 145
 146 /*----------------------------------------------------------------------------
 147 | The pattern for a default generated double-precision NaN.
 148 *----------------------------------------------------------------------------*/
 149 #define float64_default_nan LIT64( 0xFFFFFFFFFFFFFFFF )
 150
 151 /*----------------------------------------------------------------------------
 152 | Returns 1 if the double-precision floating-point value `a' is a NaN;
 153 | otherwise returns 0.
 154 *----------------------------------------------------------------------------*/
 155
 156 flag float64_is_nan( float64 a )
 157 {
 158
 159     return ( LIT64( 0xFFE0000000000000 ) < (bits64) ( a<<1 ) );
 160
 161 }
 162
 163 /*----------------------------------------------------------------------------
 164 | Returns 1 if the double-precision floating-point value `a' is a signaling
 165 | NaN; otherwise returns 0.
 166 *----------------------------------------------------------------------------*/
 167
 168 flag float64_is_signaling_nan( float64 a )
 169 {
 170
 171     return
 172            ( ( ( a>>51 ) & 0xFFF ) == 0xFFE )
 173         && ( a & LIT64( 0x0007FFFFFFFFFFFF ) );
 174
 175 }
 176
 177 /*----------------------------------------------------------------------------
 178 | Returns the result of converting the double-precision floating-point NaN
 179 | `a' to the canonical NaN format.  If `a' is a signaling NaN, the invalid
 180 | exception is raised.
 181 *----------------------------------------------------------------------------*/
 182
 183 static commonNaNT float64ToCommonNaN( float64 a )
 184 {
 185     commonNaNT z;
 186
 187     if ( float64_is_signaling_nan( a ) ) float_raise( float_flag_invalid );
 188     z.sign = a>>63;
 189     z.low = 0;
 190     z.high = a<<12;
 191     return z;
 192
 193 }
 194
 195 /*----------------------------------------------------------------------------
 196 | Returns the result of converting the canonical NaN `a' to the double-
 197 | precision floating-point format.
 198 *----------------------------------------------------------------------------*/
 199
 200 static float64 commonNaNToFloat64( commonNaNT a )
 201 {
 202
 203     return
 204           ( ( (bits64) a.sign )<<63 )
 205         | LIT64( 0x7FF8000000000000 )
 206         | ( a.high>>12 );
 207
 208 }
 209
 210 /*----------------------------------------------------------------------------
 211 | Takes two double-precision floating-point values `a' and `b', one of which
 212 | is a NaN, and returns the appropriate NaN result.  If either `a' or `b' is a
 213 | signaling NaN, the invalid exception is raised.
 214 *----------------------------------------------------------------------------*/
 215
 216 static float64 propagateFloat64NaN( float64 a, float64 b )
 217 {
 218     flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
 219
 220     aIsNaN = float64_is_nan( a );
 221     aIsSignalingNaN = float64_is_signaling_nan( a );
 222     bIsNaN = float64_is_nan( b );
 223     bIsSignalingNaN = float64_is_signaling_nan( b );
 224     a |= LIT64( 0x0008000000000000 );
 225     b |= LIT64( 0x0008000000000000 );
 226     if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid );
 227     if ( aIsNaN ) {
 228         return ( aIsSignalingNaN & bIsNaN ) ? b : a;
 229     }
 230     else {
 231         return b;
 232     }
 233
 234 }
 235
 236 #ifdef FLOATX80
 237
 238 /*----------------------------------------------------------------------------
 239 | The pattern for a default generated extended double-precision NaN.  The
 240 | `high' and `low' values hold the most- and least-significant bits,
 241 | respectively.
 242 *----------------------------------------------------------------------------*/
 243 #define floatx80_default_nan_high 0xFFFF
 244 #define floatx80_default_nan_low  LIT64( 0xFFFFFFFFFFFFFFFF )
 245
 246 /*----------------------------------------------------------------------------
 247 | Returns 1 if the extended double-precision floating-point value `a' is a
 248 | NaN; otherwise returns 0.
 249 *----------------------------------------------------------------------------*/
 250
 251 flag floatx80_is_nan( floatx80 a )
 252 {
 253
 254     return ( ( a.high & 0x7FFF ) == 0x7FFF ) && (bits64) ( a.low<<1 );
 255
 256 }
 257
 258 /*----------------------------------------------------------------------------
 259 | Returns 1 if the extended double-precision floating-point value `a' is a
 260 | signaling NaN; otherwise returns 0.
 261 *----------------------------------------------------------------------------*/
 262
 263 flag floatx80_is_signaling_nan( floatx80 a )
 264 {
 265     bits64 aLow;
 266
 267     aLow = a.low & ~ LIT64( 0x4000000000000000 );
 268     return
 269            ( ( a.high & 0x7FFF ) == 0x7FFF )
 270         && (bits64) ( aLow<<1 )
 271         && ( a.low == aLow );
 272
 273 }
 274
 275 /*----------------------------------------------------------------------------
 276 | Returns the result of converting the extended double-precision floating-
 277 | point NaN `a' to the canonical NaN format.  If `a' is a signaling NaN, the
 278 | invalid exception is raised.
 279 *----------------------------------------------------------------------------*/
 280
 281 static commonNaNT floatx80ToCommonNaN( floatx80 a )
 282 {
 283     commonNaNT z;
 284
 285     if ( floatx80_is_signaling_nan( a ) ) float_raise( float_flag_invalid );
 286     z.sign = a.high>>15;
 287     z.low = 0;
 288     z.high = a.low<<1;
 289     return z;
 290
 291 }
 292
 293 /*----------------------------------------------------------------------------
 294 | Returns the result of converting the canonical NaN `a' to the extended
 295 | double-precision floating-point format.
 296 *----------------------------------------------------------------------------*/
 297
 298 static floatx80 commonNaNToFloatx80( commonNaNT a )
 299 {
 300     floatx80 z;
 301
 302     z.low = LIT64( 0xC000000000000000 ) | ( a.high>>1 );
 303     z.high = ( ( (bits16) a.sign )<<15 ) | 0x7FFF;
 304     return z;
 305
 306 }
 307
 308 /*----------------------------------------------------------------------------
 309 | Takes two extended double-precision floating-point values `a' and `b', one
 310 | of which is a NaN, and returns the appropriate NaN result.  If either `a' or
 311 | `b' is a signaling NaN, the invalid exception is raised.
 312 *----------------------------------------------------------------------------*/
 313
 314 floatx80 propagateFloatx80NaN( floatx80 a, floatx80 b )
 315 {
 316     flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
 317
 318     aIsNaN = floatx80_is_nan( a );
 319     aIsSignalingNaN = floatx80_is_signaling_nan( a );
 320     bIsNaN = floatx80_is_nan( b );
 321     bIsSignalingNaN = floatx80_is_signaling_nan( b );
 322     a.low |= LIT64( 0xC000000000000000 );
 323     b.low |= LIT64( 0xC000000000000000 );
 324     if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid );
 325     if ( aIsNaN ) {
 326         return ( aIsSignalingNaN & bIsNaN ) ? b : a;
 327     }
 328     else {
 329         return b;
 330     }
 331
 332 }
 333
 334 #define EXP_BIAS 0x3FFF
 335
 336 /*----------------------------------------------------------------------------
 337 | Returns the fraction bits of the extended double-precision floating-point
 338 | value `a'.
 339 *----------------------------------------------------------------------------*/
 340
 341 static inline bits64 extractFloatx80Frac( floatx80 a )
 342 {
 343
 344     return a.low;
 345
 346 }
 347
 348 /*----------------------------------------------------------------------------
 349 | Returns the exponent bits of the extended double-precision floating-point
 350 | value `a'.
 351 *----------------------------------------------------------------------------*/
 352
 353 static inline int32 extractFloatx80Exp( floatx80 a )
 354 {
 355
 356     return a.high & 0x7FFF;
 357
 358 }
 359
 360 /*----------------------------------------------------------------------------
 361 | Returns the sign bit of the extended double-precision floating-point value
 362 | `a'.
 363 *----------------------------------------------------------------------------*/
 364
 365 static inline flag extractFloatx80Sign( floatx80 a )
 366 {
 367
 368     return a.high>>15;
 369
 370 }
 371
 372 #endif
 373
 374 #ifdef FLOAT128
 375
 376 /*----------------------------------------------------------------------------
 377 | The pattern for a default generated quadruple-precision NaN.  The `high' and
 378 | `low' values hold the most- and least-significant bits, respectively.
 379 *----------------------------------------------------------------------------*/
 380 #define float128_default_nan_high LIT64( 0xFFFFFFFFFFFFFFFF )
 381 #define float128_default_nan_low  LIT64( 0xFFFFFFFFFFFFFFFF )
 382
 383 /*----------------------------------------------------------------------------
 384 | Returns 1 if the quadruple-precision floating-point value `a' is a NaN;
 385 | otherwise returns 0.
 386 *----------------------------------------------------------------------------*/
 387
 388 flag float128_is_nan( float128 a )
 389 {
 390
 391     return
 392            ( LIT64( 0xFFFE000000000000 ) <= (bits64) ( a.high<<1 ) )
 393         && ( a.low || ( a.high & LIT64( 0x0000FFFFFFFFFFFF ) ) );
 394
 395 }
 396
 397 /*----------------------------------------------------------------------------
 398 | Returns 1 if the quadruple-precision floating-point value `a' is a
 399 | signaling NaN; otherwise returns 0.
 400 *----------------------------------------------------------------------------*/
 401
 402 flag float128_is_signaling_nan( float128 a )
 403 {
 404
 405     return
 406            ( ( ( a.high>>47 ) & 0xFFFF ) == 0xFFFE )
 407         && ( a.low || ( a.high & LIT64( 0x00007FFFFFFFFFFF ) ) );
 408
 409 }
 410
 411 /*----------------------------------------------------------------------------
 412 | Returns the result of converting the quadruple-precision floating-point NaN
 413 | `a' to the canonical NaN format.  If `a' is a signaling NaN, the invalid
 414 | exception is raised.
 415 *----------------------------------------------------------------------------*/
 416
 417 static commonNaNT float128ToCommonNaN( float128 a )
 418 {
 419     commonNaNT z;
 420
 421     if ( float128_is_signaling_nan( a ) ) float_raise( float_flag_invalid );
 422     z.sign = a.high>>63;
 423     shortShift128Left( a.high, a.low, 16, &z.high, &z.low );
 424     return z;
 425
 426 }
 427
 428 /*----------------------------------------------------------------------------
 429 | Returns the result of converting the canonical NaN `a' to the quadruple-
 430 | precision floating-point format.
 431 *----------------------------------------------------------------------------*/
 432
 433 static float128 commonNaNToFloat128( commonNaNT a )
 434 {
 435     float128 z;
 436
 437     shift128Right( a.high, a.low, 16, &z.high, &z.low );
 438     z.high |= ( ( (bits64) a.sign )<<63 ) | LIT64( 0x7FFF800000000000 );
 439     return z;
 440
 441 }
 442
 443 /*----------------------------------------------------------------------------
 444 | Takes two quadruple-precision floating-point values `a' and `b', one of
 445 | which is a NaN, and returns the appropriate NaN result.  If either `a' or
 446 | `b' is a signaling NaN, the invalid exception is raised.
 447 *----------------------------------------------------------------------------*/
 448
 449 static float128 propagateFloat128NaN( float128 a, float128 b )
 450 {
 451     flag aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
 452
 453     aIsNaN = float128_is_nan( a );
 454     aIsSignalingNaN = float128_is_signaling_nan( a );
 455     bIsNaN = float128_is_nan( b );
 456     bIsSignalingNaN = float128_is_signaling_nan( b );
 457     a.high |= LIT64( 0x0000800000000000 );
 458     b.high |= LIT64( 0x0000800000000000 );
 459     if ( aIsSignalingNaN | bIsSignalingNaN ) float_raise( float_flag_invalid );
 460     if ( aIsNaN ) {
 461         return ( aIsSignalingNaN & bIsNaN ) ? b : a;
 462     }
 463     else {
 464         return b;
 465     }
 466
 467 }
 468
 469 #endif
 470