From: shanshe Date: Tue, 13 Apr 2021 07:56:39 +0000 (+0200) Subject: FPU update as latest MAME X-Git-Url: https://git.sesse.net/?a=commitdiff_plain;h=6c9276f882444e8f164bed4e642fbf3cbb259971;p=pistorm FPU update as latest MAME --- diff --git a/Makefile b/Makefile index 1ccd7e8..c3483e0 100644 --- a/Makefile +++ b/Makefile @@ -22,7 +22,7 @@ MAINFILES = emulator.c \ platforms/amiga/net/pi-net.c \ platforms/shared/rtc.c -MUSASHIFILES = m68kcpu.c m68kdasm.c softfloat/softfloat.c +MUSASHIFILES = m68kcpu.c m68kdasm.c softfloat/softfloat.c softfloat/fsincos.c softfloat/fyl2x.c MUSASHIGENCFILES = m68kops.c MUSASHIGENHFILES = m68kops.h MUSASHIGENERATOR = m68kmake diff --git a/m68k_in.c b/m68k_in.c index cf0eb18..cf2ab62 100644 --- a/m68k_in.c +++ b/m68k_in.c @@ -284,6 +284,7 @@ M68KMAKE_OPCODE_HANDLER_HEADER extern void m68040_fpu_op0(void); extern void m68040_fpu_op1(void); extern void m68881_mmu_ops(); +extern void m68881_ftrap(); /* ======================================================================== */ /* ========================= INSTRUCTION HANDLERS ========================= */ @@ -561,6 +562,7 @@ cpdbcc 32 . . 1111...001001... .......... . . U U . . . 4 cpgen 32 . . 1111...000...... .......... . . U U . . . 4 4 . unemulated cpscc 32 . . 1111...001...... .......... . . U U . . . 4 4 . unemulated cptrapcc 32 . . 1111...001111... .......... . . U U . . . 4 4 . unemulated +ftrapcc 32 . . 1111001001111... .......... . . U U . . . 4 4 . dbt 16 . . 0101000011001... .......... U U U U U 12 12 6 6 6 dbf 16 . . 0101000111001... .......... U U U U U 12 12 6 6 6 dbcc 16 . . 0101....11001... .......... U U U U U 12 12 6 6 6 @@ -915,7 +917,8 @@ M68KMAKE_OP(1111, 0, ., .) M68KMAKE_OP(040fpu0, 32, ., .) { - if(CPU_TYPE_IS_030_PLUS(CPU_TYPE)) +// printf("FPU 040fpu0 HAS_FPU=%d\n",!!HAS_FPU); + if(HAS_FPU) { m68040_fpu_op0(); return; @@ -926,7 +929,8 @@ M68KMAKE_OP(040fpu0, 32, ., .) M68KMAKE_OP(040fpu1, 32, ., .) { - if(CPU_TYPE_IS_030_PLUS(CPU_TYPE)) +// printf("FPU 040fpu1 HAS_FPU=%d\n",!!HAS_FPU); + if(HAS_FPU) { m68040_fpu_op1(); return; @@ -4370,6 +4374,15 @@ M68KMAKE_OP(cptrapcc, 32, ., .) m68ki_exception_1111(); } +M68KMAKE_OP(ftrapcc,32, ., .) +{ + if(HAS_FPU) + { + m68881_ftrap(); + } else { + m68ki_exception_1111(); + } +} M68KMAKE_OP(dbt, 16, ., .) { diff --git a/m68kcpu.c b/m68kcpu.c index dd79593..293f937 100644 --- a/m68kcpu.c +++ b/m68kcpu.c @@ -794,7 +794,8 @@ void m68k_set_cpu_type(unsigned int cpu_type) CYC_MOVEM_L = 3; CYC_SHIFT = 1; CYC_RESET = 132; - HAS_PMMU = 0; + HAS_PMMU = 0; + HAS_FPU = 0; return; case M68K_CPU_TYPE_SCC68070: m68k_set_cpu_type(M68K_CPU_TYPE_68010); @@ -816,7 +817,8 @@ void m68k_set_cpu_type(unsigned int cpu_type) CYC_MOVEM_L = 3; CYC_SHIFT = 1; CYC_RESET = 130; - HAS_PMMU = 0; + HAS_PMMU = 0; + HAS_FPU = 0; return; case M68K_CPU_TYPE_68EC020: CPU_TYPE = CPU_TYPE_EC020; @@ -833,7 +835,8 @@ void m68k_set_cpu_type(unsigned int cpu_type) CYC_MOVEM_L = 2; CYC_SHIFT = 0; CYC_RESET = 518; - HAS_PMMU = 0; + HAS_PMMU = 0; + HAS_FPU = 0; return; case M68K_CPU_TYPE_68020: CPU_TYPE = CPU_TYPE_020; @@ -850,7 +853,8 @@ void m68k_set_cpu_type(unsigned int cpu_type) CYC_MOVEM_L = 2; CYC_SHIFT = 0; CYC_RESET = 518; - HAS_PMMU = 0; + HAS_PMMU = 0; + HAS_FPU = 0; return; case M68K_CPU_TYPE_68030: CPU_TYPE = CPU_TYPE_030; @@ -867,7 +871,8 @@ void m68k_set_cpu_type(unsigned int cpu_type) CYC_MOVEM_L = 2; CYC_SHIFT = 0; CYC_RESET = 518; - HAS_PMMU = 1; + HAS_PMMU = 1; + HAS_FPU = 1; return; case M68K_CPU_TYPE_68EC030: CPU_TYPE = CPU_TYPE_EC030; @@ -884,7 +889,8 @@ void m68k_set_cpu_type(unsigned int cpu_type) CYC_MOVEM_L = 2; CYC_SHIFT = 0; CYC_RESET = 518; - HAS_PMMU = 0; /* EC030 lacks the PMMU and is effectively a die-shrink 68020 */ + HAS_PMMU = 0; /* EC030 lacks the PMMU and is effectively a die-shrink 68020 */ + HAS_FPU = 1; return; case M68K_CPU_TYPE_68040: // TODO: these values are not correct CPU_TYPE = CPU_TYPE_040; @@ -901,7 +907,8 @@ void m68k_set_cpu_type(unsigned int cpu_type) CYC_MOVEM_L = 2; CYC_SHIFT = 0; CYC_RESET = 518; - HAS_PMMU = 1; + HAS_PMMU = 1; + HAS_FPU = 1; return; case M68K_CPU_TYPE_68EC040: // Just a 68040 without pmmu apparently... CPU_TYPE = CPU_TYPE_EC040; @@ -918,10 +925,12 @@ void m68k_set_cpu_type(unsigned int cpu_type) CYC_MOVEM_L = 2; CYC_SHIFT = 0; CYC_RESET = 518; - HAS_PMMU = 0; + HAS_PMMU = 0; + HAS_FPU = 0; return; case M68K_CPU_TYPE_68LC040: CPU_TYPE = CPU_TYPE_LC040; + CPU_ADDRESS_MASK = 0xffffffff; m68ki_cpu.sr_mask = 0xf71f; /* T1 T0 S M -- I2 I1 I0 -- -- -- X N Z V C */ m68ki_cpu.cyc_instruction = m68ki_cycles[4]; m68ki_cpu.cyc_exception = m68ki_exception_cycle_table[4]; @@ -934,7 +943,8 @@ void m68k_set_cpu_type(unsigned int cpu_type) m68ki_cpu.cyc_movem_l = 2; m68ki_cpu.cyc_shift = 0; m68ki_cpu.cyc_reset = 518; - HAS_PMMU = 1; + HAS_PMMU = 1; + HAS_FPU = 0; return; } } diff --git a/m68kcpu.h b/m68kcpu.h index ab47308..8cd03d6 100644 --- a/m68kcpu.h +++ b/m68kcpu.h @@ -381,9 +381,10 @@ typedef uint32 uint64; #define CYC_MOVEM_L m68ki_cpu.cyc_movem_l #define CYC_SHIFT m68ki_cpu.cyc_shift #define CYC_RESET m68ki_cpu.cyc_reset -#define HAS_PMMU m68ki_cpu.has_pmmu -#define PMMU_ENABLED m68ki_cpu.pmmu_enabled -#define RESET_CYCLES m68ki_cpu.reset_cycles +#define HAS_PMMU m68ki_cpu.has_pmmu +#define HAS_FPU m68ki_cpu.has_fpu +#define PMMU_ENABLED m68ki_cpu.pmmu_enabled +#define RESET_CYCLES m68ki_cpu.reset_cycles #define CALLBACK_INT_ACK m68ki_cpu.int_ack_callback @@ -960,7 +961,8 @@ typedef struct uint sr_mask; /* Implemented status register bits */ uint instr_mode; /* Stores whether we are in instruction mode or group 0/1 exception mode */ uint run_mode; /* Stores whether we are processing a reset, bus error, address error, or something else */ - int has_pmmu; /* Indicates if a PMMU available (yes on 030, 040, no on EC030) */ + int has_pmmu; /* Indicates if a PMMU available (yes on 030, 040, no on EC030) */ + int has_fpu; /* Indicates if a FPU available */ int pmmu_enabled; /* Indicates if the PMMU is enabled */ int fpu_just_reset; /* Indicates the FPU was just reset */ uint reset_cycles; diff --git a/m68kfpu.c b/m68kfpu.c index 473e078..75fc0f5 100644 --- a/m68kfpu.c +++ b/m68kfpu.c @@ -17,6 +17,9 @@ static void fatalerror(char *format, ...) { #define FPCC_I 0x02000000 #define FPCC_NAN 0x01000000 +#define FPES_OE 0x00002000 +#define FPAE_IOP 0x00000080 + #define DOUBLE_INFINITY (unsigned long long)(0x7ff0000000000000) #define DOUBLE_EXPONENT (unsigned long long)(0x7ff0000000000000) #define DOUBLE_MANTISSA (unsigned long long)(0x000fffffffffffff) @@ -269,6 +272,10 @@ static inline void store_pack_float80(uint32 ea, int k, floatx80 fpr) static inline void SET_CONDITION_CODES(floatx80 reg) { +// u64 *regi; + +// regi = (u64 *)® + REG_FPSR &= ~(FPCC_N|FPCC_Z|FPCC_I|FPCC_NAN); // sign flag @@ -374,6 +381,16 @@ static uint8 READ_EA_8(int ea) uint32 ea = REG_A[reg]; return m68ki_read_8(ea); } + case 3: // (An)+ + { + uint32 ea = EA_AY_PI_8(); + return m68ki_read_8(ea); + } + case 4: // -(An) + { + uint32 ea = EA_AY_PD_8(); + return m68ki_read_8(ea); + } case 5: // (d16, An) { uint32 ea = EA_AY_DI_8(); @@ -400,6 +417,16 @@ static uint8 READ_EA_8(int ea) uint32 ea = (d1 << 16) | d2; return m68ki_read_8(ea); } + case 2: // (d16, PC) + { + uint32 ea = EA_PCDI_8(); + return m68ki_read_8(ea); + } + case 3: // (PC) + (Xn) + d8 + { + uint32 ea = EA_PCIX_8(); + return m68ki_read_8(ea); + } case 4: // # { return OPER_I_8(); @@ -430,6 +457,16 @@ static uint16 READ_EA_16(int ea) uint32 ea = REG_A[reg]; return m68ki_read_16(ea); } + case 3: // (An)+ + { + uint32 ea = EA_AY_PI_16(); + return m68ki_read_16(ea); + } + case 4: // -(An) + { + uint32 ea = EA_AY_PD_16(); + return m68ki_read_16(ea); + } case 5: // (d16, An) { uint32 ea = EA_AY_DI_16(); @@ -456,6 +493,16 @@ static uint16 READ_EA_16(int ea) uint32 ea = (d1 << 16) | d2; return m68ki_read_16(ea); } + case 2: // (d16, PC) + { + uint32 ea = EA_PCDI_16(); + return m68ki_read_16(ea); + } + case 3: // (PC) + (Xn) + d8 + { + uint32 ea = EA_PCIX_16(); + return m68ki_read_16(ea); + } case 4: // # { return OPER_I_16(); @@ -492,6 +539,11 @@ static uint32 READ_EA_32(int ea) uint32 ea = EA_AY_PI_32(); return m68ki_read_32(ea); } + case 4: // -(An) + { + uint32 ea = EA_AY_PD_32(); + return m68ki_read_32(ea); + } case 5: // (d16, An) { uint32 ea = EA_AY_DI_32(); @@ -523,6 +575,11 @@ static uint32 READ_EA_32(int ea) uint32 ea = EA_PCDI_32(); return m68ki_read_32(ea); } + case 3: // (PC) + (Xn) + d8 + { + uint32 ea = EA_PCIX_32(); + return m68ki_read_32(ea); + } case 4: // # { return OPER_I_32(); @@ -559,6 +616,14 @@ static uint64 READ_EA_64(int ea) h2 = m68ki_read_32(ea+4); return (uint64)(h1) << 32 | (uint64)(h2); } + case 4: // -(An) + { + uint32 ea = REG_A[reg]-8; + REG_A[reg] -= 8; + h1 = m68ki_read_32(ea+0); + h2 = m68ki_read_32(ea+4); + return (uint64)(h1) << 32 | (uint64)(h2); + } case 5: // (d16, An) { uint32 ea = EA_AY_DI_32(); @@ -566,10 +631,31 @@ static uint64 READ_EA_64(int ea) h2 = m68ki_read_32(ea+4); return (uint64)(h1) << 32 | (uint64)(h2); } + case 6: // (An) + (Xn) + d8 + { + uint32 ea = EA_AY_IX_32(); + h1 = m68ki_read_32(ea+0); + h2 = m68ki_read_32(ea+4); + return (uint64)(h1) << 32 | (uint64)(h2); + } case 7: { switch (reg) { + case 1: // (xxx).L + { + uint32 d1 = OPER_I_16(); + uint32 d2 = OPER_I_16(); + uint32 ea = (d1 << 16) | d2; + return (uint64)(m68ki_read_32(ea)) << 32 | (uint64)(m68ki_read_32(ea+4)); + } + case 3: // (PC) + (Xn) + d8 + { + uint32 ea = EA_PCIX_32(); + h1 = m68ki_read_32(ea+0); + h2 = m68ki_read_32(ea+4); + return (uint64)(h1) << 32 | (uint64)(h2); + } case 4: // # { h1 = OPER_I_32(); @@ -594,9 +680,11 @@ static uint64 READ_EA_64(int ea) } -static floatx80 READ_EA_FPE(int mode, int reg, uint32 di_mode_ea) +static floatx80 READ_EA_FPE(uint32 ea) { floatx80 fpr; + int mode = (ea >> 3) & 0x7; + int reg = (ea & 0x7); switch (mode) { @@ -614,16 +702,41 @@ static floatx80 READ_EA_FPE(int mode, int reg, uint32 di_mode_ea) fpr = load_extended_float80(ea); break; } - case 5: // (d16, An) (added by JFF) + case 4: // -(An) { - fpr = load_extended_float80(di_mode_ea); + uint32 ea = REG_A[reg]-12; + REG_A[reg] -= 12; + fpr = load_extended_float80(ea); + break; + } + case 5: // (d16, An) + { + // FIXME: will fail for fmovem + uint32 ea = EA_AY_DI_32(); + fpr = load_extended_float80(ea); break; - } + case 6: // (An) + (Xn) + d8 + { + // FIXME: will fail for fmovem + uint32 ea = EA_AY_IX_32(); + fpr = load_extended_float80(ea); + break; + } + case 7: // extended modes { switch (reg) { + case 1: // (xxx) + { + uint32 d1 = OPER_I_16(); + uint32 d2 = OPER_I_16(); + uint32 ea = (d1 << 16) | d2; + fpr = load_extended_float80(ea); + } + break; + case 2: // (d16, PC) { uint32 ea = EA_PCDI_32(); @@ -637,13 +750,15 @@ static floatx80 READ_EA_FPE(int mode, int reg, uint32 di_mode_ea) fpr = load_extended_float80(ea); } break; - case 4: // immediate (JFF) - { - uint32 ea = REG_PC; - fpr = load_extended_float80(ea); - REG_PC += 12; - } - break; + + case 4: // immediate (JFF) + { + uint32 ea = REG_PC; + fpr = load_extended_float80(ea); + REG_PC += 12; + } + break; + default: fatalerror("M68kFPU: READ_EA_FPE: unhandled mode %d, reg %d, at %08X\n", mode, reg, REG_PC); break; @@ -891,6 +1006,12 @@ static void WRITE_EA_32(int ea, uint32 data) { switch (reg) { + case 0: // (xxx).W + { + uint32 ea = OPER_I_16(); + m68ki_write_32(ea, data); + break; + } case 1: // (xxx).L { uint32 d1 = OPER_I_16(); @@ -927,6 +1048,14 @@ static void WRITE_EA_64(int ea, uint64 data) m68ki_write_32(ea+4, (uint32)(data)); break; } + case 3: // (An)+ + { + uint32 ea = REG_A[reg]; + REG_A[reg] += 8; + m68ki_write_32(ea+0, (uint32)(data >> 32)); + m68ki_write_32(ea+4, (uint32)(data)); + break; + } case 4: // -(An) { uint32 ea; @@ -943,13 +1072,45 @@ static void WRITE_EA_64(int ea, uint64 data) m68ki_write_32(ea+4, (uint32)(data)); break; } + case 6: // (An) + (Xn) + d8 + { + uint32 ea = EA_AY_IX_32(); + m68ki_write_32(ea+0, (uint32)(data >> 32)); + m68ki_write_32(ea+4, (uint32)(data)); + break; + } + case 7: + { + switch (reg) + { + case 1: // (xxx).L + { + uint32 d1 = OPER_I_16(); + uint32 d2 = OPER_I_16(); + uint32 ea = (d1 << 16) | d2; + m68ki_write_32(ea+0, (uint32)(data >> 32)); + m68ki_write_32(ea+4, (uint32)(data)); + break; + } + case 2: // (d16, PC) + { + uint32 ea = EA_PCDI_32(); + m68ki_write_32(ea+0, (uint32)(data >> 32)); + m68ki_write_32(ea+4, (uint32)(data)); + break; + } + default: fatalerror("M68kFPU: WRITE_EA_64: unhandled mode %d, reg %d at %08X\n", mode, reg, REG_PC); + } + break; + } default: fatalerror("M68kFPU: WRITE_EA_64: unhandled mode %d, reg %d, data %08X%08X at %08X\n", mode, reg, (uint32)(data >> 32), (uint32)(data), REG_PC); } } -static void WRITE_EA_FPE(int mode, int reg, floatx80 fpr, uint32 di_mode_ea) +static void WRITE_EA_FPE(uint32 ea, floatx80 fpr) { - + int mode = (ea >> 3) & 0x7; + int reg = (ea & 0x7); switch (mode) { @@ -978,12 +1139,10 @@ static void WRITE_EA_FPE(int mode, int reg, floatx80 fpr, uint32 di_mode_ea) store_extended_float80(ea, fpr); break; } - case 5: // (d16, An) (added by JFF) + case 5: // (d16, An) { - // EA_AY_DI_32() should not be done here because fmovem would increase - // PC each time, reading incorrect displacement & advancing PC too much - // uint32 ea = EA_AY_DI_32(); - store_extended_float80(di_mode_ea, fpr); + uint32 ea = EA_AY_DI_32(); + store_extended_float80(ea, fpr); break; } @@ -1074,11 +1233,8 @@ static void fpgen_rm_reg(uint16 w2) } case 2: // Extended-precision Real { - int imode = (ea >> 3) & 0x7; - int reg = (ea & 0x7); - uint32 di_mode_ea = imode == 5 ? (REG_A[reg]+MAKE_INT_16(m68ki_read_imm_16())) : 0; - source = READ_EA_FPE(imode,reg,di_mode_ea); - break; + source = READ_EA_FPE(ea); + break; } case 3: // Packed-decimal Real { @@ -1158,6 +1314,59 @@ static void fpgen_rm_reg(uint16 w2) case 0x34: // 10^2 source = int32_to_floatx80((sint32)10*10); break; + case 0x35: // 10^4 + source = int32_to_floatx80((sint32)1000*10); + break; + + case 0x36: // 1.0e8 + source = int32_to_floatx80((sint32)10000000*10); + break; + + case 0x37: // 1.0e16 - can't get the right precision from s32 so go "direct" with constants from h/w + source.high = 0x4034; + source.low = U64(0x8e1bc9bf04000000); + break; + + case 0x38: // 1.0e32 + source.high = 0x4069; + source.low = U64(0x9dc5ada82b70b59e); + break; + + case 0x39: // 1.0e64 + source.high = 0x40d3; + source.low = U64(0xc2781f49ffcfa6d5); + break; + + case 0x3a: // 1.0e128 + source.high = 0x41a8; + source.low = U64(0x93ba47c980e98ce0); + break; + + case 0x3b: // 1.0e256 + source.high = 0x4351; + source.low = U64(0xaa7eebfb9df9de8e); + break; + + case 0x3c: // 1.0e512 + source.high = 0x46a3; + source.low = U64(0xe319a0aea60e91c7); + break; + + case 0x3d: // 1.0e1024 + source.high = 0x4d48; + source.low = U64(0xc976758681750c17); + break; + + case 0x3e: // 1.0e2048 + source.high = 0x5a92; + source.low = U64(0x9e8b3b5dc53d5de5); + break; + + case 0x3f: // 1.0e4096 + source.high = 0x7525; + source.low = U64(0xc46052028a20979b); + break; + default: fatalerror("fmove_rm_reg: unknown constant ROM offset %x at %08x\n", w2&0x7f, REG_PC-4); @@ -1166,7 +1375,8 @@ static void fpgen_rm_reg(uint16 w2) // handle it right here, the usual opmode bits aren't valid in the FMOVECR case REG_FP[dst] = source; - SET_CONDITION_CODES(REG_FP[dst]); // JFF when destination is a register, we HAVE to update FPCR + //FIXME mame doesn't use SET_CONDITION_CODES here + SET_CONDITION_CODES(REG_FP[dst]); // JFF when destination is a register, we HAVE to update FPCR USE_CYCLES(4); return; } @@ -1185,23 +1395,25 @@ static void fpgen_rm_reg(uint16 w2) case 0x00: // FMOVE { REG_FP[dst] = source; - SET_CONDITION_CODES(REG_FP[dst]); // JFF needs update condition codes + SET_CONDITION_CODES(REG_FP[dst]); USE_CYCLES(4); break; } - case 0x01: // Fsint + case 0x01: // FINT { sint32 temp; temp = floatx80_to_int32(source); REG_FP[dst] = int32_to_floatx80(temp); - SET_CONDITION_CODES(REG_FP[dst]); // JFF needs update condition codes + //FIXME mame doesn't use SET_CONDITION_CODES here + SET_CONDITION_CODES(REG_FP[dst]); // JFF needs update condition codes break; } - case 0x03: // FsintRZ + case 0x03: // FINTRZ { sint32 temp; temp = floatx80_to_int32_round_to_zero(source); REG_FP[dst] = int32_to_floatx80(temp); + //FIXME mame doesn't use SET_CONDITION_CODES here SET_CONDITION_CODES(REG_FP[dst]); // JFF needs update condition codes break; } @@ -1212,6 +1424,50 @@ static void fpgen_rm_reg(uint16 w2) USE_CYCLES(109); break; } + case 0x06: // FLOGNP1 + { + REG_FP[dst] = floatx80_flognp1 (source); + SET_CONDITION_CODES(REG_FP[dst]); + USE_CYCLES(594); // for MC68881 + break; + } + case 0x0e: // FSIN + { + REG_FP[dst] = source; + floatx80_fsin(®_FP[dst]); + SET_CONDITION_CODES(REG_FP[dst]); + USE_CYCLES(75); + break; + } + case 0x0f: // FTAN + { + REG_FP[dst] = source; + floatx80_ftan(®_FP[dst]); + SET_CONDITION_CODES(REG_FP[dst]); + USE_CYCLES(75); + break; + } + case 0x14: // FLOGN + { + REG_FP[dst] = floatx80_flogn (source); + SET_CONDITION_CODES(REG_FP[dst]); + USE_CYCLES(548); // for MC68881 + break; + } + case 0x15: // FLOG10 + { + REG_FP[dst] = floatx80_flog10 (source); + SET_CONDITION_CODES(REG_FP[dst]); + USE_CYCLES(604); // for MC68881 + break; + } + case 0x16: // FLOG2 + { + REG_FP[dst] = floatx80_flog2 (source); + SET_CONDITION_CODES(REG_FP[dst]); + USE_CYCLES(604); // for MC68881 + break; + } case 0x18: // FABS { REG_FP[dst] = source; @@ -1228,24 +1484,44 @@ static void fpgen_rm_reg(uint16 w2) USE_CYCLES(3); break; } + case 0x1d: // FCOS + { + REG_FP[dst] = source; + floatx80_fcos(®_FP[dst]); + SET_CONDITION_CODES(REG_FP[dst]); + USE_CYCLES(75); + break; + } case 0x1e: // FGETEXP { - sint16 temp; - temp = source.high; // get the exponent - temp -= 0x3fff; // take off the bias - REG_FP[dst] = double_to_fx80((double)temp); + sint16 temp2; + + temp2 = source.high; // get the exponent + temp2 -= 0x3fff; // take off the bias + REG_FP[dst] = double_to_fx80((double)temp2); SET_CONDITION_CODES(REG_FP[dst]); USE_CYCLES(6); break; } - case 0x60: // FSDIVS (JFF) (source has already been converted to floatx80) + case 0x60: // FSDIVS (JFF) (source has already been converted to floatx80) case 0x20: // FDIV { REG_FP[dst] = floatx80_div(REG_FP[dst], source); - SET_CONDITION_CODES(REG_FP[dst]); // JFF + //FIXME mame doesn't use SET_CONDITION_CODES here + SET_CONDITION_CODES(REG_FP[dst]); // JFF USE_CYCLES(43); break; } + case 0x21: // FMOD + { + sint8 const mode = float_rounding_mode; + float_rounding_mode = float_round_to_zero; + REG_FP[dst] = floatx80_rem(REG_FP[dst], source); + SET_CONDITION_CODES(REG_FP[dst]); + float_rounding_mode = mode; + USE_CYCLES(43); // guess + break; + } case 0x22: // FADD { REG_FP[dst] = floatx80_add(REG_FP[dst], source); @@ -1253,7 +1529,7 @@ static void fpgen_rm_reg(uint16 w2) USE_CYCLES(9); break; } - case 0x63: // FSMULS (JFF) (source has already been converted to floatx80) + case 0x63: // FSMULS (JFF) (source has already been converted to floatx80) case 0x23: // FMUL { REG_FP[dst] = floatx80_mul(REG_FP[dst], source); @@ -1261,13 +1537,40 @@ static void fpgen_rm_reg(uint16 w2) USE_CYCLES(11); break; } + case 0x24: // FSGLDIV + { + float32 a = floatx80_to_float32( REG_FP[dst] ); + float32 b = floatx80_to_float32( source ); + REG_FP[dst] = float32_to_floatx80( float32_div(a, b) ); + USE_CYCLES(43); // // ? (value is from FDIV) + break; + } case 0x25: // FREM { + sint8 const mode = float_rounding_mode; + float_rounding_mode = float_round_nearest_even; REG_FP[dst] = floatx80_rem(REG_FP[dst], source); SET_CONDITION_CODES(REG_FP[dst]); + float_rounding_mode = mode; USE_CYCLES(43); // guess break; } + case 0x26: // FSCALE + { + REG_FP[dst] = floatx80_scale(REG_FP[dst], source); + SET_CONDITION_CODES(REG_FP[dst]); + USE_CYCLES(46); // (better?) guess + break; + } + case 0x27: // FSGLMUL + { + float32 a = floatx80_to_float32( REG_FP[dst] ); + float32 b = floatx80_to_float32( source ); + REG_FP[dst] = float32_to_floatx80( float32_mul(a, b) ); + SET_CONDITION_CODES(REG_FP[dst]); + USE_CYCLES(11); // ? (value is from FMUL) + break; + } case 0x28: // FSUB { REG_FP[dst] = floatx80_sub(REG_FP[dst], source); @@ -1319,10 +1622,7 @@ static void fmove_reg_mem(uint16 w2) } case 2: // Extended-precision Real { - int mode = (ea >> 3) & 0x7; - int reg = (ea & 0x7); - uint32 di_mode_ea = mode == 5 ? (REG_A[reg]+MAKE_INT_16(m68ki_read_imm_16())) : 0; - WRITE_EA_FPE(mode, reg, REG_FP[src], di_mode_ea); + WRITE_EA_FPE(ea, REG_FP[src]); break; } case 3: // Packed-decimal Real with Static K-factor @@ -1334,7 +1634,12 @@ static void fmove_reg_mem(uint16 w2) } case 4: // Word Integer { - WRITE_EA_16(ea, (sint16)floatx80_to_int32(REG_FP[src])); + sint32 value = floatx80_to_int32(REG_FP[src]); + if (value > 0x7fff || value < -0x8000 ) + { + REG_FPSR |= FPES_OE | FPAE_IOP; + } + WRITE_EA_16(ea, (sint16)value); break; } case 5: // Double-precision Real @@ -1348,7 +1653,12 @@ static void fmove_reg_mem(uint16 w2) } case 6: // Byte Integer { - WRITE_EA_8(ea, (sint8)floatx80_to_int32(REG_FP[src])); + sint32 value = floatx80_to_int32(REG_FP[src]); + if (value > 127 || value < -128) + { + REG_FPSR |= FPES_OE | FPAE_IOP; + } + WRITE_EA_8(ea, (sint8)value); break; } case 7: // Packed-decimal Real with Dynamic K-factor @@ -1365,24 +1675,97 @@ static void fmove_fpcr(uint16 w2) { int ea = REG_IR & 0x3f; int dir = (w2 >> 13) & 0x1; - int reg = (w2 >> 10) & 0x7; + int regsel = (w2 >> 10) & 0x7; + int mode = (ea >> 3) & 0x7; + + if ((mode == 5) || (mode == 6)) + { + uint32 address = 0xffffffff; // force a bus error if this doesn't get assigned + + if (mode == 5) + { + address = EA_AY_DI_32(); + } + else if (mode == 6) + { + address = EA_AY_IX_32(); + } - if (dir) // From system control reg to + if (dir) // From system control reg to + { + if (regsel & 4) { m68ki_write_32(address, REG_FPCR); address += 4; } + if (regsel & 2) { m68ki_write_32(address, REG_FPSR); address += 4; } + if (regsel & 1) { m68ki_write_32(address, REG_FPIAR); address += 4; } + } + else // From to system control reg + { + if (regsel & 4) { REG_FPCR = m68ki_read_32(address); address += 4; } + if (regsel & 2) { REG_FPSR = m68ki_read_32(address); address += 4; } + if (regsel & 1) { REG_FPIAR = m68ki_read_32(address); address += 4; } + } + } + else { - if (reg & 4) WRITE_EA_32(ea, REG_FPCR); - if (reg & 2) WRITE_EA_32(ea, REG_FPSR); - if (reg & 1) WRITE_EA_32(ea, REG_FPIAR); + if (dir) // From system control reg to + { + if (regsel & 4) WRITE_EA_32(ea, REG_FPCR); + if (regsel & 2) WRITE_EA_32(ea, REG_FPSR); + if (regsel & 1) WRITE_EA_32(ea, REG_FPIAR); + } + else // From to system control reg + { + if (regsel & 4) REG_FPCR = READ_EA_32(ea); + if (regsel & 2) REG_FPSR = READ_EA_32(ea); + if (regsel & 1) REG_FPIAR = READ_EA_32(ea); + } } - else // From to system control reg + + // FIXME: (2011-12-18 ost) + // rounding_mode and rounding_precision of softfloat.c should be set according to current fpcr + // but: with this code on Apollo the following programs in /systest/fptest will fail: + // 1. Single Precision Whetstone will return wrong results never the less + // 2. Vector Test will fault with 00040004: reference to illegal address + + if ((regsel & 4) && dir == 0) { - if (reg & 4) + int rnd = (REG_FPCR >> 4) & 3; + int prec = (REG_FPCR >> 6) & 3; + +// logerror("m68k_fpsp:fmove_fpcr fpcr=%04x prec=%d rnd=%d\n", m_fpcr, prec, rnd); + +#ifdef FLOATX80 + switch (prec) { - REG_FPCR = READ_EA_32(ea); - // JFF: need to update rounding mode from softfloat module - float_rounding_mode = (REG_FPCR >> 4) & 0x3; + case 0: // Extend (X) + floatx80_rounding_precision = 80; + break; + case 1: // Single (S) + floatx80_rounding_precision = 32; + break; + case 2: // Double (D) + floatx80_rounding_precision = 64; + break; + case 3: // Undefined + floatx80_rounding_precision = 80; + break; + } +#endif + + switch (rnd) + { + case 0: // To Nearest (RN) + float_rounding_mode = float_round_nearest_even; + break; + case 1: // To Zero (RZ) + float_rounding_mode = float_round_to_zero; + break; + case 2: // To Minus Infinitiy (RM) + float_rounding_mode = float_round_down; + break; + case 3: // To Plus Infinitiy (RP) + float_rounding_mode = float_round_up; + break; } - if (reg & 2) REG_FPSR = READ_EA_32(ea); - if (reg & 1) REG_FPIAR = READ_EA_32(ea); } USE_CYCLES(10); @@ -1396,122 +1779,121 @@ static void fmovem(uint16 w2) int mode = (w2 >> 11) & 0x3; int reglist = w2 & 0xff; + uint32 mem_addr = 0; + switch (ea >> 3) + { + case 5: // (d16, An) + mem_addr= EA_AY_DI_32(); + break; + case 6: // (An) + (Xn) + d8 + mem_addr= EA_AY_IX_32(); + break; + } + if (dir) // From FP regs to mem { switch (mode) - { - case 2: // (JFF): Static register list, postincrement or control addressing mode. - { - int imode = (ea >> 3) & 0x7; - int reg = (ea & 0x7); - int di_mode = imode == 5; - uint32 di_mode_ea = di_mode ? (REG_A[reg]+MAKE_INT_16(m68ki_read_imm_16())) : 0; - for (i=0; i < 8; i++) + { + case 1: // Dynamic register list, postincrement or control addressing mode. + // FIXME: not really tested, but seems to work + reglist = REG_D[(reglist >> 4) & 7]; + /* fall through */ + /* no break */ + case 0: // Static register list, predecrement or control addressing mode { - if (reglist & (1 << i)) - { - WRITE_EA_FPE(imode,reg, REG_FP[7-i],di_mode_ea); - USE_CYCLES(2); - if (di_mode) + for (i=0; i < 8; i++) { - di_mode_ea += 12; + if (reglist & (1 << i)) + { + switch (ea >> 3) + { + case 5: // (d16, An) + case 6: // (An) + (Xn) + d8 + store_extended_float80(mem_addr, REG_FP[i]); + mem_addr += 12; + break; + default: + WRITE_EA_FPE(ea, REG_FP[i]); + break; + } + + USE_CYCLES(2); + } } - } + break; } - break; - } - case 0: // Static register list, predecrement addressing mode + + case 2: // Static register list, postdecrement or control addressing mode. { - int imode = (ea >> 3) & 0x7; - int reg = (ea & 0x7); - // the "di_mode_ea" parameter kludge is required here else WRITE_EA_FPE would have - // to call EA_AY_DI_32() (that advances PC & reads displacement) each time - // when the proper behaviour is 1) read once, 2) increment ea for each matching register - // this forces to pre-read the mode (named "imode") so we can decide to read displacement, only once - int di_mode = imode == 5; - uint32 di_mode_ea = di_mode ? (REG_A[reg]+MAKE_INT_16(m68ki_read_imm_16())) : 0; for (i=0; i < 8; i++) { if (reglist & (1 << i)) { - WRITE_EA_FPE(imode,reg, REG_FP[i],di_mode_ea); - USE_CYCLES(2); - if (di_mode) + switch (ea >> 3) { - di_mode_ea += 12; + case 5: // (d16, An) + case 6: // (An) + (Xn) + d8 + store_extended_float80(mem_addr, REG_FP[7-i]); + mem_addr += 12; + break; + default: + WRITE_EA_FPE(ea, REG_FP[7-i]); + break; } + + USE_CYCLES(2); } } break; } - default: fatalerror("040fpu0: FMOVEM: mode %d unimplemented at %08X\n", mode, REG_PC-4); + default: fatalerror("M680x0: FMOVEM: mode %d unimplemented at %08X\n", mode, REG_PC-4); } } else // From mem to FP regs { switch (mode) { - case 2: // Static register list, postincrement addressing mode + case 3: // Dynamic register list, predecrement addressing mode. + // FIXME: not really tested, but seems to work + reglist = REG_D[(reglist >> 4) & 7]; + /* fall through */ + /* no break */ + case 2: // Static register list, postincrement or control addressing mode { - int imode = (ea >> 3) & 0x7; - int reg = (ea & 0x7); - int di_mode = imode == 5; - uint32 di_mode_ea = di_mode ? (REG_A[reg]+MAKE_INT_16(m68ki_read_imm_16())) : 0; for (i=0; i < 8; i++) { if (reglist & (1 << i)) { - REG_FP[7-i] = READ_EA_FPE(imode,reg,di_mode_ea); - USE_CYCLES(2); - if (di_mode) + switch (ea >> 3) { - di_mode_ea += 12; + case 5: // (d16, An) + case 6: // (An) + (Xn) + d8 + REG_FP[7-i] = load_extended_float80(mem_addr); + mem_addr += 12; + break; + default: + REG_FP[7-i] = READ_EA_FPE(ea); + break; } + USE_CYCLES(2); } } break; } - default: fatalerror("040fpu0: FMOVEM: mode %d unimplemented at %08X\n", mode, REG_PC-4); + default: fatalerror("M680x0: FMOVEM: mode %d unimplemented at %08X\n", mode, REG_PC-4); } } } static void fscc() { - // added by JFF, this seems to work properly now - int condition = OPER_I_16() & 0x3f; - - int cc = TEST_CONDITION(condition); - int mode = (REG_IR & 0x38) >> 3; - int v = (cc ? 0xff : 0x00); - - switch (mode) - { - case 0: // fscc Dx - { - // If the specified floating-point condition is true, sets the byte integer operand at - // the destination to TRUE (all ones); otherwise, sets the byte to FALSE (all zeros). - - REG_D[REG_IR & 7] = (REG_D[REG_IR & 7] & 0xFFFFFF00) | v; - break; - } - case 5: // (disp,Ax) - { - int reg = REG_IR & 7; - uint32 ea = REG_A[reg]+MAKE_INT_16(m68ki_read_imm_16()); - m68ki_write_8(ea,v); - break; - } - - default: - { - // unimplemented see fpu_uae.cpp around line 1300 - fatalerror("040fpu0: fscc: mode %d not implemented at %08X\n", mode, REG_PC-4); - } - } - USE_CYCLES(7); // JFF unsure of the number of cycles!! + int ea = REG_IR & 0x3f; + int condition = (sint16)(OPER_I_16()); + + WRITE_EA_8(ea, TEST_CONDITION(condition) ? 0xff : 0); + USE_CYCLES(7); // ??? } static void fbcc16(void) { @@ -1528,7 +1910,7 @@ static void fbcc16(void) } USE_CYCLES(7); - } +} static void fbcc32(void) { @@ -1591,28 +1973,51 @@ void m68040_fpu_op0() break; } - case 1: // FScc (JFF) + case 1: // FBcc disp16 { - fscc(); - break; + switch ((REG_IR >> 3) & 0x7) { + case 1: // FDBcc + // TODO: + printf("M68kFPU: unimplemented FDBcc main op %d with mode %d at %08X\n", (REG_IR >> 6) & 0x3, (REG_IR >> 3) & 0x7, REG_PC-4); + break; + default: // FScc (?) + fscc(); + return; + } + fatalerror("M68kFPU: unimplemented main op %d with mode %d at %08X\n", (REG_IR >> 6) & 0x3, (REG_IR >> 3) & 0x7, REG_PC-4); + break; } - case 2: // FBcc disp16 + case 2: // FBcc disp16 { fbcc16(); break; } - case 3: // FBcc disp32 + case 3: // FBcc disp32 { fbcc32(); break; } - default: fatalerror("M68kFPU: unimplemented main op %d at %08X\n", (m68ki_cpu.ir >> 6) & 0x3, REG_PC-4); + default: fatalerror("M68kFPU: unimplemented main op %d\n", (REG_IR >> 6) & 0x3); } } -static void perform_fsave(uint32 addr, int inc) +static int perform_fsave(uint32 addr, int inc) { + if(m68ki_cpu.cpu_type & CPU_TYPE_040) + { + if(inc) + { + m68ki_write_32(addr, 0x41000000); + return 4 -4; + } + else + { + m68ki_write_32(addr, 0x41000000); + return -4 +4; + } + } + if (inc) { // 68881 IDLE, version 0x1f @@ -1623,21 +2028,23 @@ static void perform_fsave(uint32 addr, int inc) m68ki_write_32(addr+16, 0); m68ki_write_32(addr+20, 0); m68ki_write_32(addr+24, 0x70000000); + return 7*4 -4; } else { - m68ki_write_32(addr, 0x70000000); - m68ki_write_32(addr-4, 0); - m68ki_write_32(addr-8, 0); - m68ki_write_32(addr-12, 0); - m68ki_write_32(addr-16, 0); - m68ki_write_32(addr-20, 0); - m68ki_write_32(addr-24, 0x1f180000); + m68ki_write_32(addr+4-4, 0x70000000); + m68ki_write_32(addr+4-8, 0); + m68ki_write_32(addr+4-12, 0); + m68ki_write_32(addr+4-16, 0); + m68ki_write_32(addr+4-20, 0); + m68ki_write_32(addr+4-24, 0); + m68ki_write_32(addr+4-28, 0x1f180000); + return -7*4 +4; } } // FRESTORE on a NULL frame reboots the FPU - all registers to NaN, the 3 status regs to 0 -static void do_frestore_null() +static void do_frestore_null(void) { int i; @@ -1655,12 +2062,64 @@ static void do_frestore_null() m68ki_cpu.fpu_just_reset = 1; } +void m68040_do_fsave(uint32 addr, int reg, int inc) +{ + if (m68ki_cpu.fpu_just_reset) + { + m68ki_write_32(addr, 0); + } + else + { + // we normally generate an IDLE frame + int delta = perform_fsave(addr, inc); + if(reg != -1) + REG_A[reg] += delta; + } +} + +void m68040_do_frestore(uint32 addr, int reg) +{ + uint32 temp = m68ki_read_32(addr); + // check for nullptr frame + if (temp & 0xff000000) + { + // we don't handle non-nullptr frames + m68ki_cpu.fpu_just_reset = 0; + + if (reg != -1) + { + uint8 m40 = !!(m68ki_cpu.cpu_type & CPU_TYPE_040); + // how about an IDLE frame? + if (!m40 && ((temp & 0x00ff0000) == 0x00180000)) + { + REG_A[reg] += 7*4-4; + } + else if (m40 && ((temp & 0xffff0000) == 0x41000000)) + { +// REG_A[reg] += 4; + } // check UNIMP + else if ((temp & 0x00ff0000) == 0x00380000) + { + REG_A[reg] += 14*4; + } // check BUSY + else if ((temp & 0x00ff0000) == 0x00b40000) + { + REG_A[reg] += 45*4; + } + } + } + else + { + do_frestore_null(); + } +} + void m68040_fpu_op1() { int ea = REG_IR & 0x3f; int mode = (ea >> 3) & 0x7; int reg = (ea & 0x7); - uint32 addr, temp; + uint32 addr; switch ((REG_IR >> 6) & 0x3) { @@ -1668,40 +2127,56 @@ void m68040_fpu_op1() { switch (mode) { - case 3: // (An)+ - addr = EA_AY_PI_32(); + case 2: // (An) + addr = REG_A[reg]; + m68040_do_fsave(addr, -1, 1); + break; - if (m68ki_cpu.fpu_just_reset) - { - m68ki_write_32(addr, 0); - } - else - { - // we normally generate an IDLE frame - REG_A[reg] += 6*4; - perform_fsave(addr, 1); - } + case 3: // (An)+ + addr = EA_AY_PI_32(); + printf("FSAVE mode %d, reg A%d=0x%08x\n",mode,reg,REG_A[reg]); + m68040_do_fsave(addr, -1, 1); // FIXME: -1 was reg break; case 4: // -(An) - addr = EA_AY_PD_32(); + addr = EA_AY_PD_32(); + m68040_do_fsave(addr, reg, 0); // FIXME: -1 was reg + break; + case 5: // (D16, An) + addr = EA_AY_DI_16(); + m68040_do_fsave(addr, -1, 1); + break; - if (m68ki_cpu.fpu_just_reset) - { - m68ki_write_32(addr, 0); - } - else + case 6: // (An) + (Xn) + d8 + addr = EA_AY_IX_16(); + m68040_do_fsave(addr, -1, 1); + break; + + case 7: // + { + switch (reg) { - // we normally generate an IDLE frame - REG_A[reg] -= 6*4; - perform_fsave(addr, 0); + case 1: // (abs32) + { + addr = EA_AL_32(); + m68040_do_fsave(addr, -1, 1); + break; + } + case 2: // (d16, PC) + { + addr = EA_PCDI_16(); + m68040_do_fsave(addr, -1, 1); + break; + } + default: + fatalerror("M68kFPU: FSAVE unhandled mode %d reg %d at %x\n", mode, reg, REG_PC); } - break; + } + break; default: fatalerror("M68kFPU: FSAVE unhandled mode %d reg %d at %x\n", mode, reg, REG_PC); } - break; } break; @@ -1711,59 +2186,80 @@ void m68040_fpu_op1() { case 2: // (An) addr = REG_A[reg]; - temp = m68ki_read_32(addr); - - // check for NULL frame - if (temp & 0xff000000) - { - // we don't handle non-NULL frames and there's no pre/post inc/dec to do here - m68ki_cpu.fpu_just_reset = 0; - } - else - { - do_frestore_null(); - } + m68040_do_frestore(addr, -1); break; case 3: // (An)+ - addr = EA_AY_PI_32(); - temp = m68ki_read_32(addr); + addr = EA_AY_PI_32(); + m68040_do_frestore(addr, reg); + break; - // check for NULL frame - if (temp & 0xff000000) - { - m68ki_cpu.fpu_just_reset = 0; + case 5: // (D16, An) + addr = EA_AY_DI_16(); + m68040_do_frestore(addr, -1); + break; - // how about an IDLE frame? - if ((temp & 0x00ff0000) == 0x00180000) - { - REG_A[reg] += 6*4; - } // check UNIMP - else if ((temp & 0x00ff0000) == 0x00380000) + case 6: // (An) + (Xn) + d8 + addr = EA_AY_IX_16(); + m68040_do_frestore(addr, -1); + break; + + case 7: // + { + switch (reg) + { + case 1: // (abs32) { - REG_A[reg] += 14*4; - } // check BUSY - else if ((temp & 0x00ff0000) == 0x00b40000) + addr = EA_AL_32(); + m68040_do_frestore(addr, -1); + break; + } + case 2: // (d16, PC) { - REG_A[reg] += 45*4; + addr = EA_PCDI_16(); + m68040_do_frestore(addr, -1); + break; } + default: + fatalerror("M68kFPU: FRESTORE unhandled mode %d reg %d at %x\n", mode, reg, REG_PC); } - else - { - do_frestore_null(); - } - break; + } + break; default: fatalerror("M68kFPU: FRESTORE unhandled mode %d reg %d at %x\n", mode, reg, REG_PC); } - break; } break; - default: fatalerror("m68040_fpu_op1: unimplemented op %d at %08X\n", (REG_IR >> 6) & 0x3, REG_PC-2); + default: fatalerror("m68040_fpu_op1: unimplemented op %d at %08X\n", (REG_IR >> 6) & 0x3, REG_PC-2); } } +void m68881_ftrap() +{ + uint16 w2 = OPER_I_16(); + + // now check the condition + if (TEST_CONDITION(w2 & 0x3f)) + { + // trap here + m68ki_exception_trap(EXCEPTION_TRAPV); + } + else // fall through, requires eating the operand + { + switch (REG_IR & 0x7) + { + case 2: // word operand + OPER_I_16(); + break; + case 3: // long word operand + OPER_I_32(); + break; + case 4: // no operand + break; + } + } +} diff --git a/softfloat/fpu_constant.h b/softfloat/fpu_constant.h new file mode 100644 index 0000000..fdd9719 --- /dev/null +++ b/softfloat/fpu_constant.h @@ -0,0 +1,80 @@ +/*============================================================================ +This source file is an extension to the SoftFloat IEC/IEEE Floating-point +Arithmetic Package, Release 2b, written for Bochs (x86 achitecture simulator) +floating point emulation. + +THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has +been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES +RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS +AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES, +COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE +EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE +INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR +OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE. + +Derivative works are acceptable, even for commercial purposes, so long as +(1) the source code for the derivative work includes prominent notice that +the work is derivative, and (2) the source code includes prominent notice with +these four paragraphs for those parts of this code that are retained. +=============================================================================*/ + +#ifndef _FPU_CONSTANTS_H_ +#define _FPU_CONSTANTS_H_ + +// Pentium CPU uses only 68-bit precision M_PI approximation +#define BETTER_THAN_PENTIUM + +/*============================================================================ + * Written for Bochs (x86 achitecture simulator) by + * Stanislav Shwartsman [sshwarts at sourceforge net] + * ==========================================================================*/ + +////////////////////////////// +// PI, PI/2, PI/4 constants +////////////////////////////// + +#define FLOATX80_PI_EXP (0x4000) + +// 128-bit PI fraction +#ifdef BETTER_THAN_PENTIUM +#define FLOAT_PI_HI (0xc90fdaa22168c234U) +#define FLOAT_PI_LO (0xc4c6628b80dc1cd1U) +#else +#define FLOAT_PI_HI (0xc90fdaa22168c234U) +#define FLOAT_PI_LO (0xC000000000000000U) +#endif + +#define FLOATX80_PI2_EXP (0x3FFF) +#define FLOATX80_PI4_EXP (0x3FFE) + +////////////////////////////// +// 3PI/4 constant +////////////////////////////// + +#define FLOATX80_3PI4_EXP (0x4000) + +// 128-bit 3PI/4 fraction +#ifdef BETTER_THAN_PENTIUM +#define FLOAT_3PI4_HI (0x96cbe3f9990e91a7U) +#define FLOAT_3PI4_LO (0x9394c9e8a0a5159cU) +#else +#define FLOAT_3PI4_HI (0x96cbe3f9990e91a7U) +#define FLOAT_3PI4_LO (0x9000000000000000U) +#endif + +////////////////////////////// +// 1/LN2 constant +////////////////////////////// + +#define FLOAT_LN2INV_EXP (0x3FFF) + +// 128-bit 1/LN2 fraction +#ifdef BETTER_THAN_PENTIUM +#define FLOAT_LN2INV_HI (0xb8aa3b295c17f0bbU) +#define FLOAT_LN2INV_LO (0xbe87fed0691d3e89U) +#else +#define FLOAT_LN2INV_HI (0xb8aa3b295c17f0bbU) +#define FLOAT_LN2INV_LO (0xC000000000000000U) +#endif + +#endif diff --git a/softfloat/fsincos.c b/softfloat/fsincos.c new file mode 100644 index 0000000..764abd0 --- /dev/null +++ b/softfloat/fsincos.c @@ -0,0 +1,647 @@ +/*============================================================================ +This source file is an extension to the SoftFloat IEC/IEEE Floating-point +Arithmetic Package, Release 2b, written for Bochs (x86 achitecture simulator) +floating point emulation. + +THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has +been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES +RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS +AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES, +COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE +EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE +INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR +OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE. + +Derivative works are acceptable, even for commercial purposes, so long as +(1) the source code for the derivative work includes prominent notice that +the work is derivative, and (2) the source code includes prominent notice with +these four paragraphs for those parts of this code that are retained. +=============================================================================*/ + +/*============================================================================ + * Written for Bochs (x86 achitecture simulator) by + * Stanislav Shwartsman [sshwarts at sourceforge net] + * ==========================================================================*/ + +#define FLOAT128 + +#define USE_estimateDiv128To64 + +#include "m68kcpu.h" // which includes softfloat.h after defining the basic types +#include "assert.h" + +//#include "softfloat-specialize" +#include "fpu_constant.h" + +#define floatx80_one packFloatx80(0, 0x3fff, 0x8000000000000000U) +#define floatx80_default_nan packFloatx80(0, 0xffff, 0xffffffffffffffffU) + +#define packFloat2x128m(zHi, zLo) {(zHi), (zLo)} +#define PACK_FLOAT_128(hi,lo) packFloat2x128m(LIT64(hi),LIT64(lo)) + +#define EXP_BIAS 0x3FFF + +/*---------------------------------------------------------------------------- +| Returns the fraction bits of the extended double-precision floating-point +| value `a'. +*----------------------------------------------------------------------------*/ + +static inline bits64 extractFloatx80Frac( floatx80 a ) +{ + return a.low; + +} + +/*---------------------------------------------------------------------------- +| Returns the exponent bits of the extended double-precision floating-point +| value `a'. +*----------------------------------------------------------------------------*/ + +static inline int32 extractFloatx80Exp( floatx80 a ) +{ + return a.high & 0x7FFF; + +} + +/*---------------------------------------------------------------------------- +| Returns the sign bit of the extended double-precision floating-point value +| `a'. +*----------------------------------------------------------------------------*/ + +static inline flag extractFloatx80Sign( floatx80 a ) +{ + return a.high>>15; + +} + +/*---------------------------------------------------------------------------- +| Takes extended double-precision floating-point NaN `a' and returns the +| appropriate NaN result. If `a' is a signaling NaN, the invalid exception +| is raised. +*----------------------------------------------------------------------------*/ + +static inline floatx80 propagateFloatx80NaNOneArg(floatx80 a) +{ + if (floatx80_is_signaling_nan(a)) + float_raise(float_flag_invalid); + + a.low |= 0xC000000000000000U; + + return a; +} + +/*---------------------------------------------------------------------------- +| Normalizes the subnormal extended double-precision floating-point value +| represented by the denormalized significand `aSig'. The normalized exponent +| and significand are stored at the locations pointed to by `zExpPtr' and +| `zSigPtr', respectively. +*----------------------------------------------------------------------------*/ + +void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr, uint64_t *zSigPtr) +{ + int shiftCount = countLeadingZeros64(aSig); + *zSigPtr = aSig< 0) { + q = argument_reduction_kernel(aSig0, expDiff, &aSig0, &aSig1); + } + else { + if (FLOAT_PI_HI <= aSig0) { + aSig0 -= FLOAT_PI_HI; + q = 1; + } + } + + shift128Right(FLOAT_PI_HI, FLOAT_PI_LO, 1, &term0, &term1); + if (! lt128(aSig0, aSig1, term0, term1)) + { + int lt = lt128(term0, term1, aSig0, aSig1); + int eq = eq128(aSig0, aSig1, term0, term1); + + if ((eq && (q & 1)) || lt) { + zSign = !zSign; + ++q; + } + if (lt) sub128(FLOAT_PI_HI, FLOAT_PI_LO, aSig0, aSig1, &aSig0, &aSig1); + } + + return (int)(q & 3); +} + +#define SIN_ARR_SIZE 11 +#define COS_ARR_SIZE 11 + +static float128 sin_arr[SIN_ARR_SIZE] = +{ + PACK_FLOAT_128(0x3fff000000000000, 0x0000000000000000), /* 1 */ + PACK_FLOAT_128(0xbffc555555555555, 0x5555555555555555), /* 3 */ + PACK_FLOAT_128(0x3ff8111111111111, 0x1111111111111111), /* 5 */ + PACK_FLOAT_128(0xbff2a01a01a01a01, 0xa01a01a01a01a01a), /* 7 */ + PACK_FLOAT_128(0x3fec71de3a556c73, 0x38faac1c88e50017), /* 9 */ + PACK_FLOAT_128(0xbfe5ae64567f544e, 0x38fe747e4b837dc7), /* 11 */ + PACK_FLOAT_128(0x3fde6124613a86d0, 0x97ca38331d23af68), /* 13 */ + PACK_FLOAT_128(0xbfd6ae7f3e733b81, 0xf11d8656b0ee8cb0), /* 15 */ + PACK_FLOAT_128(0x3fce952c77030ad4, 0xa6b2605197771b00), /* 17 */ + PACK_FLOAT_128(0xbfc62f49b4681415, 0x724ca1ec3b7b9675), /* 19 */ + PACK_FLOAT_128(0x3fbd71b8ef6dcf57, 0x18bef146fcee6e45) /* 21 */ +}; + +static float128 cos_arr[COS_ARR_SIZE] = +{ + PACK_FLOAT_128(0x3fff000000000000, 0x0000000000000000), /* 0 */ + PACK_FLOAT_128(0xbffe000000000000, 0x0000000000000000), /* 2 */ + PACK_FLOAT_128(0x3ffa555555555555, 0x5555555555555555), /* 4 */ + PACK_FLOAT_128(0xbff56c16c16c16c1, 0x6c16c16c16c16c17), /* 6 */ + PACK_FLOAT_128(0x3fefa01a01a01a01, 0xa01a01a01a01a01a), /* 8 */ + PACK_FLOAT_128(0xbfe927e4fb7789f5, 0xc72ef016d3ea6679), /* 10 */ + PACK_FLOAT_128(0x3fe21eed8eff8d89, 0x7b544da987acfe85), /* 12 */ + PACK_FLOAT_128(0xbfda93974a8c07c9, 0xd20badf145dfa3e5), /* 14 */ + PACK_FLOAT_128(0x3fd2ae7f3e733b81, 0xf11d8656b0ee8cb0), /* 16 */ + PACK_FLOAT_128(0xbfca6827863b97d9, 0x77bb004886a2c2ab), /* 18 */ + PACK_FLOAT_128(0x3fc1e542ba402022, 0x507a9cad2bf8f0bb) /* 20 */ +}; + +extern float128 OddPoly (float128 x, float128 *arr, unsigned n); + +/* 0 <= x <= pi/4 */ +static inline float128 poly_sin(float128 x) +{ + // 3 5 7 9 11 13 15 + // x x x x x x x + // sin (x) ~ x - --- + --- - --- + --- - ---- + ---- - ---- = + // 3! 5! 7! 9! 11! 13! 15! + // + // 2 4 6 8 10 12 14 + // x x x x x x x + // = x * [ 1 - --- + --- - --- + --- - ---- + ---- - ---- ] = + // 3! 5! 7! 9! 11! 13! 15! + // + // 3 3 + // -- 4k -- 4k+2 + // p(x) = > C * x > 0 q(x) = > C * x < 0 + // -- 2k -- 2k+1 + // k=0 k=0 + // + // 2 + // sin(x) ~ x * [ p(x) + x * q(x) ] + // + + return OddPoly(x, sin_arr, SIN_ARR_SIZE); +} + +extern float128 EvenPoly(float128 x, float128 *arr, unsigned n); + +/* 0 <= x <= pi/4 */ +static inline float128 poly_cos(float128 x) +{ + // 2 4 6 8 10 12 14 + // x x x x x x x + // cos (x) ~ 1 - --- + --- - --- + --- - ---- + ---- - ---- + // 2! 4! 6! 8! 10! 12! 14! + // + // 3 3 + // -- 4k -- 4k+2 + // p(x) = > C * x > 0 q(x) = > C * x < 0 + // -- 2k -- 2k+1 + // k=0 k=0 + // + // 2 + // cos(x) ~ [ p(x) + x * q(x) ] + // + + return EvenPoly(x, cos_arr, COS_ARR_SIZE); +} + +static inline void sincos_invalid(floatx80 *sin_a, floatx80 *cos_a, floatx80 a) +{ + if (sin_a) *sin_a = a; + if (cos_a) *cos_a = a; +} + +static inline void sincos_tiny_argument(floatx80 *sin_a, floatx80 *cos_a, floatx80 a) +{ + if (sin_a) *sin_a = a; + if (cos_a) *cos_a = floatx80_one; +} + +static floatx80 sincos_approximation(int neg, float128 r, uint64_t quotient) +{ + if (quotient & 0x1) { + r = poly_cos(r); + neg = 0; + } else { + r = poly_sin(r); + } + + floatx80 result = float128_to_floatx80(r); + if (quotient & 0x2) + neg = ! neg; + + if (neg) + result = floatx80_chs(result); + + return result; +} + +// ================================================= +// SFFSINCOS Compute sin(x) and cos(x) +// ================================================= + +// +// Uses the following identities: +// ---------------------------------------------------------- +// +// sin(-x) = -sin(x) +// cos(-x) = cos(x) +// +// sin(x+y) = sin(x)*cos(y)+cos(x)*sin(y) +// cos(x+y) = sin(x)*sin(y)+cos(x)*cos(y) +// +// sin(x+ pi/2) = cos(x) +// sin(x+ pi) = -sin(x) +// sin(x+3pi/2) = -cos(x) +// sin(x+2pi) = sin(x) +// + +int sf_fsincos(floatx80 a, floatx80 *sin_a, floatx80 *cos_a) +{ + uint64_t aSig0, aSig1 = 0; + int32_t aExp, zExp, expDiff; + int aSign, zSign; + int q = 0; + + aSig0 = extractFloatx80Frac(a); + aExp = extractFloatx80Exp(a); + aSign = extractFloatx80Sign(a); + + /* invalid argument */ + if (aExp == 0x7FFF) { + if ((uint64_t) (aSig0<<1)) { + sincos_invalid(sin_a, cos_a, propagateFloatx80NaNOneArg(a)); + return 0; + } + + float_raise(float_flag_invalid); + sincos_invalid(sin_a, cos_a, floatx80_default_nan); + return 0; + } + + if (aExp == 0) { + if (aSig0 == 0) { + sincos_tiny_argument(sin_a, cos_a, a); + return 0; + } + +// float_raise(float_flag_denormal); + + /* handle pseudo denormals */ + if (! (aSig0 & 0x8000000000000000U)) + { + float_raise(float_flag_inexact); + if (sin_a) + float_raise(float_flag_underflow); + sincos_tiny_argument(sin_a, cos_a, a); + return 0; + } + + normalizeFloatx80Subnormal(aSig0, &aExp, &aSig0); + } + + zSign = aSign; + zExp = EXP_BIAS; + expDiff = aExp - zExp; + + /* argument is out-of-range */ + if (expDiff >= 63) + return -1; + + float_raise(float_flag_inexact); + + if (expDiff < -1) { // doesn't require reduction + if (expDiff <= -68) { + a = packFloatx80(aSign, aExp, aSig0); + sincos_tiny_argument(sin_a, cos_a, a); + return 0; + } + zExp = aExp; + } + else { + q = reduce_trig_arg(expDiff, zSign, aSig0, aSig1); + } + + /* **************************** */ + /* argument reduction completed */ + /* **************************** */ + + /* using float128 for approximation */ + float128 r = normalizeRoundAndPackFloat128(0, zExp-0x10, aSig0, aSig1); + + if (aSign) q = -q; + if (sin_a) *sin_a = sincos_approximation(zSign, r, q); + if (cos_a) *cos_a = sincos_approximation(zSign, r, q+1); + + return 0; +} + +int floatx80_fsin(floatx80 *a) +{ + return sf_fsincos(*a, a, 0); +} + +int floatx80_fcos(floatx80 *a) +{ + return sf_fsincos(*a, 0, a); +} + +// ================================================= +// FPTAN Compute tan(x) +// ================================================= + +// +// Uses the following identities: +// +// 1. ---------------------------------------------------------- +// +// sin(-x) = -sin(x) +// cos(-x) = cos(x) +// +// sin(x+y) = sin(x)*cos(y)+cos(x)*sin(y) +// cos(x+y) = sin(x)*sin(y)+cos(x)*cos(y) +// +// sin(x+ pi/2) = cos(x) +// sin(x+ pi) = -sin(x) +// sin(x+3pi/2) = -cos(x) +// sin(x+2pi) = sin(x) +// +// 2. ---------------------------------------------------------- +// +// sin(x) +// tan(x) = ------ +// cos(x) +// + +int floatx80_ftan(floatx80 *a) +{ + uint64_t aSig0, aSig1 = 0; + int32_t aExp, zExp, expDiff; + int aSign, zSign; + int q = 0; + + aSig0 = extractFloatx80Frac(*a); + aExp = extractFloatx80Exp(*a); + aSign = extractFloatx80Sign(*a); + + /* invalid argument */ + if (aExp == 0x7FFF) { + if ((uint64_t) (aSig0<<1)) + { + *a = propagateFloatx80NaNOneArg(*a); + return 0; + } + + float_raise(float_flag_invalid); + *a = floatx80_default_nan; + return 0; + } + + if (aExp == 0) { + if (aSig0 == 0) return 0; +// float_raise(float_flag_denormal); + /* handle pseudo denormals */ + if (! (aSig0 & 0x8000000000000000U)) + { + float_raise(float_flag_inexact | float_flag_underflow); + return 0; + } + normalizeFloatx80Subnormal(aSig0, &aExp, &aSig0); + } + + zSign = aSign; + zExp = EXP_BIAS; + expDiff = aExp - zExp; + + /* argument is out-of-range */ + if (expDiff >= 63) + return -1; + + float_raise(float_flag_inexact); + + if (expDiff < -1) { // doesn't require reduction + if (expDiff <= -68) { + *a = packFloatx80(aSign, aExp, aSig0); + return 0; + } + zExp = aExp; + } + else { + q = reduce_trig_arg(expDiff, zSign, aSig0, aSig1); + } + + /* **************************** */ + /* argument reduction completed */ + /* **************************** */ + + /* using float128 for approximation */ + float128 r = normalizeRoundAndPackFloat128(0, zExp-0x10, aSig0, aSig1); + + float128 sin_r = poly_sin(r); + float128 cos_r = poly_cos(r); + + if (q & 0x1) { + r = float128_div(cos_r, sin_r); + zSign = ! zSign; + } else { + r = float128_div(sin_r, cos_r); + } + + *a = float128_to_floatx80(r); + if (zSign) + *a = floatx80_chs(*a); + + return 0; +} + +// 2 3 4 n +// f(x) ~ C + (C * x) + (C * x) + (C * x) + (C * x) + ... + (C * x) +// 0 1 2 3 4 n +// +// -- 2k -- 2k+1 +// p(x) = > C * x q(x) = > C * x +// -- 2k -- 2k+1 +// +// f(x) ~ [ p(x) + x * q(x) ] +// + +float128 EvalPoly(float128 x, float128 *arr, unsigned n) +{ + float128 x2 = float128_mul(x, x); + unsigned i; + + assert(n > 1); + + float128 r1 = arr[--n]; + i = n; + while(i >= 2) { + r1 = float128_mul(r1, x2); + i -= 2; + r1 = float128_add(r1, arr[i]); + } + if (i) r1 = float128_mul(r1, x); + + float128 r2 = arr[--n]; + i = n; + while(i >= 2) { + r2 = float128_mul(r2, x2); + i -= 2; + r2 = float128_add(r2, arr[i]); + } + if (i) r2 = float128_mul(r2, x); + + return float128_add(r1, r2); +} + +// 2 4 6 8 2n +// f(x) ~ C + (C * x) + (C * x) + (C * x) + (C * x) + ... + (C * x) +// 0 1 2 3 4 n +// +// -- 4k -- 4k+2 +// p(x) = > C * x q(x) = > C * x +// -- 2k -- 2k+1 +// +// 2 +// f(x) ~ [ p(x) + x * q(x) ] +// + +float128 EvenPoly(float128 x, float128 *arr, unsigned n) +{ + return EvalPoly(float128_mul(x, x), arr, n); +} + +// 3 5 7 9 2n+1 +// f(x) ~ (C * x) + (C * x) + (C * x) + (C * x) + (C * x) + ... + (C * x) +// 0 1 2 3 4 n +// 2 4 6 8 2n +// = x * [ C + (C * x) + (C * x) + (C * x) + (C * x) + ... + (C * x) +// 0 1 2 3 4 n +// +// -- 4k -- 4k+2 +// p(x) = > C * x q(x) = > C * x +// -- 2k -- 2k+1 +// +// 2 +// f(x) ~ x * [ p(x) + x * q(x) ] +// + +float128 OddPoly(float128 x, float128 *arr, unsigned n) +{ + return float128_mul(x, EvenPoly(x, arr, n)); +} + +/*---------------------------------------------------------------------------- +| Scales extended double-precision floating-point value in operand `a' by +| value `b'. The function truncates the value in the second operand 'b' to +| an integral value and adds that value to the exponent of the operand 'a'. +| The operation performed according to the IEC/IEEE Standard for Binary +| Floating-Point Arithmetic. +*----------------------------------------------------------------------------*/ + +extern floatx80 propagateFloatx80NaN( floatx80 a, floatx80 b ); + +floatx80 floatx80_scale(floatx80 a, floatx80 b) +{ + sbits32 aExp, bExp; + bits64 aSig, bSig; + + // handle unsupported extended double-precision floating encodings +/* if (floatx80_is_unsupported(a) || floatx80_is_unsupported(b)) + { + float_raise(float_flag_invalid); + return floatx80_default_nan; + }*/ + + aSig = extractFloatx80Frac(a); + aExp = extractFloatx80Exp(a); + int aSign = extractFloatx80Sign(a); + bSig = extractFloatx80Frac(b); + bExp = extractFloatx80Exp(b); + int bSign = extractFloatx80Sign(b); + + if (aExp == 0x7FFF) { + if ((bits64) (aSig<<1) || ((bExp == 0x7FFF) && (bits64) (bSig<<1))) + { + return propagateFloatx80NaN(a, b); + } + if ((bExp == 0x7FFF) && bSign) { + float_raise(float_flag_invalid); + return floatx80_default_nan; + } + if (bSig && (bExp == 0)) float_raise(float_flag_denormal); + return a; + } + if (bExp == 0x7FFF) { + if ((bits64) (bSig<<1)) return propagateFloatx80NaN(a, b); + if ((aExp | aSig) == 0) { + if (! bSign) { + float_raise(float_flag_invalid); + return floatx80_default_nan; + } + return a; + } + if (aSig && (aExp == 0)) float_raise(float_flag_denormal); + if (bSign) return packFloatx80(aSign, 0, 0); + return packFloatx80(aSign, 0x7FFF, 0x8000000000000000U); + } + if (aExp == 0) { + if (aSig == 0) return a; + float_raise(float_flag_denormal); + normalizeFloatx80Subnormal(aSig, &aExp, &aSig); + } + if (bExp == 0) { + if (bSig == 0) return a; + float_raise(float_flag_denormal); + normalizeFloatx80Subnormal(bSig, &bExp, &bSig); + } + + if (bExp > 0x400E) { + /* generate appropriate overflow/underflow */ + return roundAndPackFloatx80(80, aSign, + bSign ? -0x3FFF : 0x7FFF, aSig, 0); + } + if (bExp < 0x3FFF) return a; + + int shiftCount = 0x403E - bExp; + bSig >>= shiftCount; + sbits32 scale = bSig; + if (bSign) scale = -scale; /* -32768..32767 */ + return + roundAndPackFloatx80(80, aSign, aExp+scale, aSig, 0); +} diff --git a/softfloat/fyl2x.c b/softfloat/fyl2x.c new file mode 100644 index 0000000..c7e0d45 --- /dev/null +++ b/softfloat/fyl2x.c @@ -0,0 +1,487 @@ +/*============================================================================ +This source file is an extension to the SoftFloat IEC/IEEE Floating-point +Arithmetic Package, Release 2b, written for Bochs (x86 achitecture simulator) +floating point emulation. +float_raise(float_flag_invalid) +THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE. Although reasonable effort has +been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES +RESULT IN INCORRECT BEHAVIOR. USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS +AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES, +COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE +EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE +INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR +OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE. + +Derivative works are acceptable, even for commercial purposes, so long as +(1) the source code for the derivative work includes prominent notice that +the work is derivative, and (2) the source code includes prominent notice with +these four paragraphs for those parts of this code that are retained. +=============================================================================*/ + +/*============================================================================ + * Written for Bochs (x86 achitecture simulator) by + * Stanislav Shwartsman [sshwarts at sourceforge net] + * Adapted for lib/softfloat in MESS by Hans Ostermeyer (03/2012) + * ==========================================================================*/ + +#define FLOAT128 + +#define USE_estimateDiv128To64 + +#include "m68kcpu.h" // which includes softfloat.h after defining the basic types + +//#include "softfloat-specialize" +#include "fpu_constant.h" + +#define floatx80_log10_2 packFloatx80(0, 0x3ffd, 0x9a209a84fbcff798U) +#define floatx80_ln_2 packFloatx80(0, 0x3ffe, 0xb17217f7d1cf79acU) +#define floatx80_one packFloatx80(0, 0x3fff, 0x8000000000000000U) +#define floatx80_default_nan packFloatx80(0, 0xffff, 0xffffffffffffffffU) + +#define packFloat_128(zHi, zLo) {(zHi), (zLo)} +#define PACK_FLOAT_128(hi,lo) packFloat_128(LIT64(hi),LIT64(lo)) + +#define EXP_BIAS 0x3FFF + +/*---------------------------------------------------------------------------- +| Returns the fraction bits of the extended double-precision floating-point +| value `a'. +*----------------------------------------------------------------------------*/ + +static inline bits64 extractFloatx80Frac( floatx80 a ) +{ + return a.low; + +} + +/*---------------------------------------------------------------------------- +| Returns the exponent bits of the extended double-precision floating-point +| value `a'. +*----------------------------------------------------------------------------*/ + +static inline int32 extractFloatx80Exp( floatx80 a ) +{ + return a.high & 0x7FFF; + +} + +/*---------------------------------------------------------------------------- +| Returns the sign bit of the extended double-precision floating-point value +| `a'. +*----------------------------------------------------------------------------*/ + +static inline flag extractFloatx80Sign( floatx80 a ) +{ + return a.high>>15; + +} + +#if 0 +/*---------------------------------------------------------------------------- +| Takes extended double-precision floating-point NaN `a' and returns the +| appropriate NaN result. If `a' is a signaling NaN, the invalid exception +| is raised. +*----------------------------------------------------------------------------*/ + +static inline floatx80 propagateFloatx80NaNOneArg(floatx80 a) +{ + if (floatx80_is_signaling_nan(a)) + float_raise(float_flag_invalid); + + a.low |= 0xC000000000000000U; + + return a; +} +#endif + +/*---------------------------------------------------------------------------- +| Normalizes the subnormal extended double-precision floating-point value +| represented by the denormalized significand `aSig'. The normalized exponent +| and significand are stored at the locations pointed to by `zExpPtr' and +| `zSigPtr', respectively. +*----------------------------------------------------------------------------*/ + +static inline void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr, uint64_t *zSigPtr) +{ + int shiftCount = countLeadingZeros64(aSig); + *zSigPtr = aSig< C * u q(u) = > C * u + // -- 2k -- 2k+1 + // k=0 k=0 + // + // 1+u 2 + // 1/2 ln --- ~ u * [ p(u) + u * q(u) ] + // 1-u + // +*/ + return OddPoly(x1, ln_arr, L2_ARR_SIZE); +} + +/* required sqrt(2)/2 < x < sqrt(2) */ +static float128 poly_l2(float128 x) +{ + /* using float128 for approximation */ + float128 x_p1 = float128_add(x, float128_one); + float128 x_m1 = float128_sub(x, float128_one); + x = float128_div(x_m1, x_p1); + x = poly_ln(x); + x = float128_mul(x, float128_ln2inv2); + return x; +} + +static float128 poly_l2p1(float128 x) +{ + /* using float128 for approximation */ + float128 x_p2 = float128_add(x, float128_two); + x = float128_div(x, x_p2); + x = poly_ln(x); + x = float128_mul(x, float128_ln2inv2); + return x; +} + +// ================================================= +// FYL2X Compute y * log (x) +// 2 +// ================================================= + +// +// Uses the following identities: +// +// 1. ---------------------------------------------------------- +// ln(x) +// log (x) = -------, ln (x*y) = ln(x) + ln(y) +// 2 ln(2) +// +// 2. ---------------------------------------------------------- +// 1+u x-1 +// ln (x) = ln -----, when u = ----- +// 1-u x+1 +// +// 3. ---------------------------------------------------------- +// 3 5 7 2n+1 +// 1+u u u u u +// ln ----- = 2 [ u + --- + --- + --- + ... + ------ + ... ] +// 1-u 3 5 7 2n+1 +// + +static floatx80 fyl2x(floatx80 a, floatx80 b) +{ + uint64_t aSig = extractFloatx80Frac(a); + int32_t aExp = extractFloatx80Exp(a); + int aSign = extractFloatx80Sign(a); + uint64_t bSig = extractFloatx80Frac(b); + int32_t bExp = extractFloatx80Exp(b); + int bSign = extractFloatx80Sign(b); + + int zSign = bSign ^ 1; + + if (aExp == 0x7FFF) { + if ((uint64_t) (aSig<<1) + || ((bExp == 0x7FFF) && (uint64_t) (bSig<<1))) + { + return propagateFloatx80NaN(a, b); + } + if (aSign) + { +invalid: + float_raise(float_flag_invalid); + return floatx80_default_nan; + } + else { + if (bExp == 0) { + if (bSig == 0) goto invalid; + float_raise(float_flag_denormal); + } + return packFloatx80(bSign, 0x7FFF, 0x8000000000000000U); + } + } + if (bExp == 0x7FFF) + { + if ((uint64_t) (bSig<<1)) return propagateFloatx80NaN(a, b); + if (aSign && (uint64_t)(aExp | aSig)) goto invalid; + if (aSig && (aExp == 0)) + float_raise(float_flag_denormal); + if (aExp < 0x3FFF) { + return packFloatx80(zSign, 0x7FFF, 0x8000000000000000U); + } + if (aExp == 0x3FFF && ((uint64_t) (aSig<<1) == 0)) goto invalid; + return packFloatx80(bSign, 0x7FFF, 0x8000000000000000U); + } + if (aExp == 0) { + if (aSig == 0) { + if ((bExp | bSig) == 0) goto invalid; + float_raise(float_flag_divbyzero); + return packFloatx80(zSign, 0x7FFF, 0x8000000000000000U); + } + if (aSign) goto invalid; + float_raise(float_flag_denormal); + normalizeFloatx80Subnormal(aSig, &aExp, &aSig); + } + if (aSign) goto invalid; + if (bExp == 0) { + if (bSig == 0) { + if (aExp < 0x3FFF) return packFloatx80(zSign, 0, 0); + return packFloatx80(bSign, 0, 0); + } + float_raise(float_flag_denormal); + normalizeFloatx80Subnormal(bSig, &bExp, &bSig); + } + if (aExp == 0x3FFF && ((uint64_t) (aSig<<1) == 0)) + return packFloatx80(bSign, 0, 0); + + float_raise(float_flag_inexact); + + int ExpDiff = aExp - 0x3FFF; + aExp = 0; + if (aSig >= SQRT2_HALF_SIG) { + ExpDiff++; + aExp--; + } + + /* ******************************** */ + /* using float128 for approximation */ + /* ******************************** */ + + uint64_t zSig0, zSig1; + shift128Right(aSig<<1, 0, 16, &zSig0, &zSig1); + float128 x = packFloat128(0, aExp+0x3FFF, zSig0, zSig1); + x = poly_l2(x); + x = float128_add(x, int64_to_float128((int64_t) ExpDiff)); + return floatx80_mul(b, float128_to_floatx80(x)); +} + +// ================================================= +// FYL2XP1 Compute y * log (x + 1) +// 2 +// ================================================= + +// +// Uses the following identities: +// +// 1. ---------------------------------------------------------- +// ln(x) +// log (x) = ------- +// 2 ln(2) +// +// 2. ---------------------------------------------------------- +// 1+u x +// ln (x+1) = ln -----, when u = ----- +// 1-u x+2 +// +// 3. ---------------------------------------------------------- +// 3 5 7 2n+1 +// 1+u u u u u +// ln ----- = 2 [ u + --- + --- + --- + ... + ------ + ... ] +// 1-u 3 5 7 2n+1 +// + +floatx80 fyl2xp1(floatx80 a, floatx80 b) +{ + int32_t aExp, bExp; + uint64_t aSig, bSig, zSig0, zSig1, zSig2; + int aSign, bSign; + + aSig = extractFloatx80Frac(a); + aExp = extractFloatx80Exp(a); + aSign = extractFloatx80Sign(a); + bSig = extractFloatx80Frac(b); + bExp = extractFloatx80Exp(b); + bSign = extractFloatx80Sign(b); + int zSign = aSign ^ bSign; + + if (aExp == 0x7FFF) { + if ((uint64_t) (aSig<<1) + || ((bExp == 0x7FFF) && (uint64_t) (bSig<<1))) + { + return propagateFloatx80NaN(a, b); + } + if (aSign) + { +invalid: + float_raise(float_flag_invalid); + return floatx80_default_nan; + } + else { + if (bExp == 0) { + if (bSig == 0) goto invalid; + float_raise(float_flag_denormal); + } + return packFloatx80(bSign, 0x7FFF, 0x8000000000000000U); + } + } + if (bExp == 0x7FFF) + { + if ((uint64_t) (bSig<<1)) + return propagateFloatx80NaN(a, b); + + if (aExp == 0) { + if (aSig == 0) goto invalid; + float_raise(float_flag_denormal); + } + + return packFloatx80(zSign, 0x7FFF, 0x8000000000000000U); + } + if (aExp == 0) { + if (aSig == 0) { + if (bSig && (bExp == 0)) float_raise(float_flag_denormal); + return packFloatx80(zSign, 0, 0); + } + float_raise(float_flag_denormal); + normalizeFloatx80Subnormal(aSig, &aExp, &aSig); + } + if (bExp == 0) { + if (bSig == 0) return packFloatx80(zSign, 0, 0); + float_raise(float_flag_denormal); + normalizeFloatx80Subnormal(bSig, &bExp, &bSig); + } + + float_raise(float_flag_inexact); + + if (aSign && aExp >= 0x3FFF) + return a; + + if (aExp >= 0x3FFC) // big argument + { + return fyl2x(floatx80_add(a, floatx80_one), b); + } + + // handle tiny argument + if (aExp < EXP_BIAS-70) + { + // first order approximation, return (a*b)/ln(2) + int32_t zExp = aExp + FLOAT_LN2INV_EXP - 0x3FFE; + + mul128By64To192(FLOAT_LN2INV_HI, FLOAT_LN2INV_LO, aSig, &zSig0, &zSig1, &zSig2); + if (0 < (int64_t) zSig0) { + shortShift128Left(zSig0, zSig1, 1, &zSig0, &zSig1); + --zExp; + } + + zExp = zExp + bExp - 0x3FFE; + mul128By64To192(zSig0, zSig1, bSig, &zSig0, &zSig1, &zSig2); + if (0 < (int64_t) zSig0) { + shortShift128Left(zSig0, zSig1, 1, &zSig0, &zSig1); + --zExp; + } + + return + roundAndPackFloatx80(80, aSign ^ bSign, zExp, zSig0, zSig1); + } + + /* ******************************** */ + /* using float128 for approximation */ + /* ******************************** */ + + shift128Right(aSig<<1, 0, 16, &zSig0, &zSig1); + float128 x = packFloat128(aSign, aExp, zSig0, zSig1); + x = poly_l2p1(x); + return floatx80_mul(b, float128_to_floatx80(x)); +} + +floatx80 floatx80_flognp1(floatx80 a) +{ + return fyl2xp1(a, floatx80_ln_2); +} + +floatx80 floatx80_flogn(floatx80 a) +{ + return fyl2x(a, floatx80_ln_2); +} + +floatx80 floatx80_flog2(floatx80 a) +{ + return fyl2x(a, floatx80_one); +} + +floatx80 floatx80_flog10(floatx80 a) +{ + return fyl2x(a, floatx80_log10_2); +} diff --git a/softfloat/softfloat.h b/softfloat/softfloat.h index 92135e6..8f47f0a 100644 --- a/softfloat/softfloat.h +++ b/softfloat/softfloat.h @@ -240,9 +240,9 @@ flag floatx80_le_quiet( floatx80, floatx80 ); flag floatx80_lt_quiet( floatx80, floatx80 ); flag floatx80_is_signaling_nan( floatx80 ); -/* int floatx80_fsin(floatx80 &a); -int floatx80_fcos(floatx80 &a); -int floatx80_ftan(floatx80 &a); */ +int floatx80_fsin(floatx80 *a); +int floatx80_fcos(floatx80 *a); +int floatx80_ftan(floatx80 *a); floatx80 floatx80_flognp1(floatx80 a); floatx80 floatx80_flogn(floatx80 a);