-template<class FP16_INT_T,
- int FP16_BIAS, int FP16_MANTISSA_BITS, int FP16_EXPONENT_BITS, int FP16_MAX_EXPONENT,
- int FP64_BIAS, int FP64_MANTISSA_BITS, int FP64_EXPONENT_BITS, int FP64_MAX_EXPONENT>
-inline double fp_upconvert(FP16_INT_T x)
-{
- int sign = x >> (FP16_MANTISSA_BITS + FP16_EXPONENT_BITS);
- int exponent = (x & ((1ULL << (FP16_MANTISSA_BITS + FP16_EXPONENT_BITS)) - 1)) >> FP16_MANTISSA_BITS;
- unsigned long long mantissa = x & ((1ULL << FP16_MANTISSA_BITS) - 1);
-
- int sign64;
- int exponent64;
- unsigned long long mantissa64;
-
- if (exponent == 0) {
- /*
- * Denormals, or zero. Zero is still zero, denormals become
- * ordinary numbers.
- */
- if (mantissa == 0) {
- sign64 = sign;
- exponent64 = 0;
- mantissa64 = 0;
- } else {
- sign64 = sign;
- exponent64 = FP64_BIAS - FP16_BIAS;
- mantissa64 = mantissa << (FP64_MANTISSA_BITS - FP16_MANTISSA_BITS + 1);
-
- /* Normalize the number. */
- while ((mantissa64 & (1ULL << FP64_MANTISSA_BITS)) == 0) {
- --exponent64;
- mantissa64 <<= 1;
- }
-
- /* Clear the now-implicit one-bit. */
- mantissa64 &= ~(1ULL << FP64_MANTISSA_BITS);
- }
- } else if (exponent == FP16_MAX_EXPONENT) {
- /*
- * Infinities or NaN (mantissa=0 => infinity, otherwise NaN).
- * We don't care much about NaNs, so let us just make sure we
- * keep the first bit (which signals signalling/non-signalling
- * in many implementations).
- */
- sign64 = sign;
- exponent64 = FP64_MAX_EXPONENT;
- mantissa64 = mantissa << (FP64_MANTISSA_BITS - FP16_MANTISSA_BITS);
- } else {
- sign64 = sign;
-
- /* Up-conversion is simple. Just re-bias the exponent... */
- exponent64 = exponent + FP64_BIAS - FP16_BIAS;