From: shanshe <shanshe@gmail.com>
Date: Tue, 13 Apr 2021 07:56:39 +0000 (+0200)
Subject: FPU update as latest MAME
X-Git-Url: https://git.sesse.net/?a=commitdiff_plain;h=6c9276f882444e8f164bed4e642fbf3cbb259971;p=pistorm

FPU update as latest MAME
---

diff --git a/Makefile b/Makefile
index 1ccd7e8..c3483e0 100644
--- a/Makefile
+++ b/Makefile
@@ -22,7 +22,7 @@ MAINFILES        = emulator.c \
 	platforms/amiga/net/pi-net.c \
 	platforms/shared/rtc.c
 
-MUSASHIFILES     = m68kcpu.c m68kdasm.c softfloat/softfloat.c
+MUSASHIFILES     = m68kcpu.c m68kdasm.c softfloat/softfloat.c softfloat/fsincos.c softfloat/fyl2x.c
 MUSASHIGENCFILES = m68kops.c
 MUSASHIGENHFILES = m68kops.h
 MUSASHIGENERATOR = m68kmake
diff --git a/m68k_in.c b/m68k_in.c
index cf0eb18..cf2ab62 100644
--- a/m68k_in.c
+++ b/m68k_in.c
@@ -284,6 +284,7 @@ M68KMAKE_OPCODE_HANDLER_HEADER
 extern void m68040_fpu_op0(void);
 extern void m68040_fpu_op1(void);
 extern void m68881_mmu_ops();
+extern void m68881_ftrap();
 
 /* ======================================================================== */
 /* ========================= INSTRUCTION HANDLERS ========================= */
@@ -561,6 +562,7 @@ cpdbcc    32  .     .     1111...001001...  ..........  . . U U .   .   .   4
 cpgen     32  .     .     1111...000......  ..........  . . U U .   .   .   4   4   .  unemulated
 cpscc     32  .     .     1111...001......  ..........  . . U U .   .   .   4   4   .  unemulated
 cptrapcc  32  .     .     1111...001111...  ..........  . . U U .   .   .   4   4   .  unemulated
+ftrapcc   32  .     .     1111001001111...  ..........  . . U U .   .   .   4   4   .
 dbt       16  .     .     0101000011001...  ..........  U U U U U  12  12   6   6   6
 dbf       16  .     .     0101000111001...  ..........  U U U U U  12  12   6   6   6
 dbcc      16  .     .     0101....11001...  ..........  U U U U U  12  12   6   6   6
@@ -915,7 +917,8 @@ M68KMAKE_OP(1111, 0, ., .)
 
 M68KMAKE_OP(040fpu0, 32, ., .)
 {
-	if(CPU_TYPE_IS_030_PLUS(CPU_TYPE))
+//	printf("FPU 040fpu0 HAS_FPU=%d\n",!!HAS_FPU);
+	if(HAS_FPU)
 	{
 		m68040_fpu_op0();
 		return;
@@ -926,7 +929,8 @@ M68KMAKE_OP(040fpu0, 32, ., .)
 
 M68KMAKE_OP(040fpu1, 32, ., .)
 {
-	if(CPU_TYPE_IS_030_PLUS(CPU_TYPE))
+//	printf("FPU 040fpu1 HAS_FPU=%d\n",!!HAS_FPU);
+	if(HAS_FPU)
 	{
 		m68040_fpu_op1();
 		return;
@@ -4370,6 +4374,15 @@ M68KMAKE_OP(cptrapcc, 32, ., .)
 	m68ki_exception_1111();
 }
 
+M68KMAKE_OP(ftrapcc,32, ., .)
+{
+	if(HAS_FPU)
+	{
+		m68881_ftrap();
+	} else {
+		m68ki_exception_1111();
+	}
+}
 
 M68KMAKE_OP(dbt, 16, ., .)
 {
diff --git a/m68kcpu.c b/m68kcpu.c
index dd79593..293f937 100644
--- a/m68kcpu.c
+++ b/m68kcpu.c
@@ -794,7 +794,8 @@ void m68k_set_cpu_type(unsigned int cpu_type)
 			CYC_MOVEM_L      = 3;
 			CYC_SHIFT        = 1;
 			CYC_RESET        = 132;
-			HAS_PMMU	 = 0;
+			HAS_PMMU         = 0;
+			HAS_FPU          = 0;
 			return;
 		case M68K_CPU_TYPE_SCC68070:
 			m68k_set_cpu_type(M68K_CPU_TYPE_68010);
@@ -816,7 +817,8 @@ void m68k_set_cpu_type(unsigned int cpu_type)
 			CYC_MOVEM_L      = 3;
 			CYC_SHIFT        = 1;
 			CYC_RESET        = 130;
-			HAS_PMMU	 = 0;
+			HAS_PMMU         = 0;
+			HAS_FPU          = 0;
 			return;
 		case M68K_CPU_TYPE_68EC020:
 			CPU_TYPE         = CPU_TYPE_EC020;
@@ -833,7 +835,8 @@ void m68k_set_cpu_type(unsigned int cpu_type)
 			CYC_MOVEM_L      = 2;
 			CYC_SHIFT        = 0;
 			CYC_RESET        = 518;
-			HAS_PMMU	 = 0;
+			HAS_PMMU         = 0;
+			HAS_FPU          = 0;
 			return;
 		case M68K_CPU_TYPE_68020:
 			CPU_TYPE         = CPU_TYPE_020;
@@ -850,7 +853,8 @@ void m68k_set_cpu_type(unsigned int cpu_type)
 			CYC_MOVEM_L      = 2;
 			CYC_SHIFT        = 0;
 			CYC_RESET        = 518;
-			HAS_PMMU	 = 0;
+			HAS_PMMU         = 0;
+			HAS_FPU          = 0;
 			return;
 		case M68K_CPU_TYPE_68030:
 			CPU_TYPE         = CPU_TYPE_030;
@@ -867,7 +871,8 @@ void m68k_set_cpu_type(unsigned int cpu_type)
 			CYC_MOVEM_L      = 2;
 			CYC_SHIFT        = 0;
 			CYC_RESET        = 518;
-			HAS_PMMU	       = 1;
+			HAS_PMMU         = 1;
+			HAS_FPU          = 1;
 			return;
 		case M68K_CPU_TYPE_68EC030:
 			CPU_TYPE         = CPU_TYPE_EC030;
@@ -884,7 +889,8 @@ void m68k_set_cpu_type(unsigned int cpu_type)
 			CYC_MOVEM_L      = 2;
 			CYC_SHIFT        = 0;
 			CYC_RESET        = 518;
-			HAS_PMMU	       = 0;		/* EC030 lacks the PMMU and is effectively a die-shrink 68020 */
+			HAS_PMMU         = 0;		/* EC030 lacks the PMMU and is effectively a die-shrink 68020 */
+			HAS_FPU          = 1;
 			return;
 		case M68K_CPU_TYPE_68040:		// TODO: these values are not correct
 			CPU_TYPE         = CPU_TYPE_040;
@@ -901,7 +907,8 @@ void m68k_set_cpu_type(unsigned int cpu_type)
 			CYC_MOVEM_L      = 2;
 			CYC_SHIFT        = 0;
 			CYC_RESET        = 518;
-			HAS_PMMU	 = 1;
+			HAS_PMMU         = 1;
+			HAS_FPU          = 1;
 			return;
 		case M68K_CPU_TYPE_68EC040: // Just a 68040 without pmmu apparently...
 			CPU_TYPE         = CPU_TYPE_EC040;
@@ -918,10 +925,12 @@ void m68k_set_cpu_type(unsigned int cpu_type)
 			CYC_MOVEM_L      = 2;
 			CYC_SHIFT        = 0;
 			CYC_RESET        = 518;
-			HAS_PMMU	 = 0;
+			HAS_PMMU         = 0;
+			HAS_FPU          = 0;
 			return;
 		case M68K_CPU_TYPE_68LC040:
 			CPU_TYPE         = CPU_TYPE_LC040;
+			CPU_ADDRESS_MASK = 0xffffffff;
 			m68ki_cpu.sr_mask          = 0xf71f; /* T1 T0 S  M  -- I2 I1 I0 -- -- -- X  N  Z  V  C  */
 			m68ki_cpu.cyc_instruction  = m68ki_cycles[4];
 			m68ki_cpu.cyc_exception    = m68ki_exception_cycle_table[4];
@@ -934,7 +943,8 @@ void m68k_set_cpu_type(unsigned int cpu_type)
 			m68ki_cpu.cyc_movem_l      = 2;
 			m68ki_cpu.cyc_shift        = 0;
 			m68ki_cpu.cyc_reset        = 518;
-			HAS_PMMU	       = 1;
+			HAS_PMMU         = 1;
+			HAS_FPU          = 0;
 			return;
 	}
 }
diff --git a/m68kcpu.h b/m68kcpu.h
index ab47308..8cd03d6 100644
--- a/m68kcpu.h
+++ b/m68kcpu.h
@@ -381,9 +381,10 @@ typedef uint32 uint64;
 #define CYC_MOVEM_L      m68ki_cpu.cyc_movem_l
 #define CYC_SHIFT        m68ki_cpu.cyc_shift
 #define CYC_RESET        m68ki_cpu.cyc_reset
-#define HAS_PMMU	 m68ki_cpu.has_pmmu
-#define PMMU_ENABLED	 m68ki_cpu.pmmu_enabled
-#define RESET_CYCLES	 m68ki_cpu.reset_cycles
+#define HAS_PMMU         m68ki_cpu.has_pmmu
+#define HAS_FPU          m68ki_cpu.has_fpu
+#define PMMU_ENABLED     m68ki_cpu.pmmu_enabled
+#define RESET_CYCLES     m68ki_cpu.reset_cycles
 
 
 #define CALLBACK_INT_ACK     m68ki_cpu.int_ack_callback
@@ -960,7 +961,8 @@ typedef struct
 	uint sr_mask;      /* Implemented status register bits */
 	uint instr_mode;   /* Stores whether we are in instruction mode or group 0/1 exception mode */
 	uint run_mode;     /* Stores whether we are processing a reset, bus error, address error, or something else */
-	int    has_pmmu;     /* Indicates if a PMMU available (yes on 030, 040, no on EC030) */
+	int    has_pmmu;   /* Indicates if a PMMU available (yes on 030, 040, no on EC030) */
+	int    has_fpu;    /* Indicates if a FPU available */
 	int    pmmu_enabled; /* Indicates if the PMMU is enabled */
 	int    fpu_just_reset; /* Indicates the FPU was just reset */
 	uint reset_cycles;
diff --git a/m68kfpu.c b/m68kfpu.c
index 473e078..75fc0f5 100644
--- a/m68kfpu.c
+++ b/m68kfpu.c
@@ -17,6 +17,9 @@ static void fatalerror(char *format, ...) {
 #define FPCC_I			0x02000000
 #define FPCC_NAN		0x01000000
 
+#define FPES_OE      0x00002000
+#define FPAE_IOP     0x00000080
+
 #define DOUBLE_INFINITY					(unsigned long long)(0x7ff0000000000000)
 #define DOUBLE_EXPONENT					(unsigned long long)(0x7ff0000000000000)
 #define DOUBLE_MANTISSA					(unsigned long long)(0x000fffffffffffff)
@@ -269,6 +272,10 @@ static inline void store_pack_float80(uint32 ea, int k, floatx80 fpr)
 
 static inline void SET_CONDITION_CODES(floatx80 reg)
 {
+//  u64 *regi;
+
+//  regi = (u64 *)&reg;
+
 	REG_FPSR &= ~(FPCC_N|FPCC_Z|FPCC_I|FPCC_NAN);
 
 	// sign flag
@@ -374,6 +381,16 @@ static uint8 READ_EA_8(int ea)
 			uint32 ea = REG_A[reg];
 			return m68ki_read_8(ea);
 		}
+		case 3:     // (An)+
+		{
+			uint32 ea = EA_AY_PI_8();
+			return m68ki_read_8(ea);
+		}
+		case 4:     // -(An)
+		{
+			uint32 ea = EA_AY_PD_8();
+			return m68ki_read_8(ea);
+		}
 		case 5:		// (d16, An)
 		{
 			uint32 ea = EA_AY_DI_8();
@@ -400,6 +417,16 @@ static uint8 READ_EA_8(int ea)
 					uint32 ea = (d1 << 16) | d2;
 					return m68ki_read_8(ea);
 				}
+				case 2:     // (d16, PC)
+				{
+					uint32 ea = EA_PCDI_8();
+					return m68ki_read_8(ea);
+				}
+				case 3:     // (PC) + (Xn) + d8
+				{
+					uint32 ea =  EA_PCIX_8();
+					return m68ki_read_8(ea);
+				}
 				case 4:		// #<data>
 				{
 					return  OPER_I_8();
@@ -430,6 +457,16 @@ static uint16 READ_EA_16(int ea)
 			uint32 ea = REG_A[reg];
 			return m68ki_read_16(ea);
 		}
+		case 3:     // (An)+
+		{
+			uint32 ea = EA_AY_PI_16();
+			return m68ki_read_16(ea);
+		}
+		case 4:     // -(An)
+		{
+			uint32 ea = EA_AY_PD_16();
+			return m68ki_read_16(ea);
+		}
 		case 5:		// (d16, An)
 		{
 			uint32 ea = EA_AY_DI_16();
@@ -456,6 +493,16 @@ static uint16 READ_EA_16(int ea)
 					uint32 ea = (d1 << 16) | d2;
 					return m68ki_read_16(ea);
 				}
+				case 2:     // (d16, PC)
+				{
+					uint32 ea = EA_PCDI_16();
+					return m68ki_read_16(ea);
+				}
+				case 3:     // (PC) + (Xn) + d8
+				{
+					uint32 ea =  EA_PCIX_16();
+					return m68ki_read_16(ea);
+				}
 				case 4:		// #<data>
 				{
 					return OPER_I_16();
@@ -492,6 +539,11 @@ static uint32 READ_EA_32(int ea)
 			uint32 ea = EA_AY_PI_32();
 			return m68ki_read_32(ea);
 		}
+		case 4:		// -(An)
+		{
+			uint32 ea = EA_AY_PD_32();
+			return m68ki_read_32(ea);
+		}
 		case 5:		// (d16, An)
 		{
 			uint32 ea = EA_AY_DI_32();
@@ -523,6 +575,11 @@ static uint32 READ_EA_32(int ea)
 					uint32 ea = EA_PCDI_32();
 					return m68ki_read_32(ea);
 				}
+				case 3:     // (PC) + (Xn) + d8
+				{
+					uint32 ea =  EA_PCIX_32();
+					return m68ki_read_32(ea);
+				}
 				case 4:		// #<data>
 				{
 					return  OPER_I_32();
@@ -559,6 +616,14 @@ static uint64 READ_EA_64(int ea)
 			h2 = m68ki_read_32(ea+4);
 			return  (uint64)(h1) << 32 | (uint64)(h2);
 		}
+		case 4:     // -(An)
+		{
+			uint32 ea = REG_A[reg]-8;
+			REG_A[reg] -= 8;
+			h1 = m68ki_read_32(ea+0);
+			h2 = m68ki_read_32(ea+4);
+			return  (uint64)(h1) << 32 | (uint64)(h2);
+		}
 		case 5:		// (d16, An)
 		{
 			uint32 ea = EA_AY_DI_32();
@@ -566,10 +631,31 @@ static uint64 READ_EA_64(int ea)
 			h2 = m68ki_read_32(ea+4);
 			return  (uint64)(h1) << 32 | (uint64)(h2);
 		}
+		case 6:     // (An) + (Xn) + d8
+		{
+			uint32 ea = EA_AY_IX_32();
+			h1 = m68ki_read_32(ea+0);
+			h2 = m68ki_read_32(ea+4);
+			return  (uint64)(h1) << 32 | (uint64)(h2);
+		}
 		case 7:
 		{
 			switch (reg)
 			{
+				case 1:     // (xxx).L
+				{
+					uint32 d1 = OPER_I_16();
+					uint32 d2 = OPER_I_16();
+					uint32 ea = (d1 << 16) | d2;
+					return (uint64)(m68ki_read_32(ea)) << 32 | (uint64)(m68ki_read_32(ea+4));
+				}
+				case 3:     // (PC) + (Xn) + d8
+				{
+					uint32 ea =  EA_PCIX_32();
+					h1 = m68ki_read_32(ea+0);
+					h2 = m68ki_read_32(ea+4);
+					return  (uint64)(h1) << 32 | (uint64)(h2);
+				}
 				case 4:		// #<data>
 				{
 					h1 = OPER_I_32();
@@ -594,9 +680,11 @@ static uint64 READ_EA_64(int ea)
 }
 
 
-static floatx80 READ_EA_FPE(int mode, int reg, uint32 di_mode_ea)
+static floatx80 READ_EA_FPE(uint32 ea)
 {
 	floatx80 fpr;
+	int mode = (ea >> 3) & 0x7;
+	int reg = (ea & 0x7);
 
 	switch (mode)
 	{
@@ -614,16 +702,41 @@ static floatx80 READ_EA_FPE(int mode, int reg, uint32 di_mode_ea)
 			fpr = load_extended_float80(ea);
 			break;
 		}
-      case 5:		// (d16, An)  (added by JFF)
+		case 4:     // -(An)
 		{
-		  fpr = load_extended_float80(di_mode_ea);
+			uint32 ea = REG_A[reg]-12;
+			REG_A[reg] -= 12;
+			fpr = load_extended_float80(ea);
+			break;
+		}
+      case 5:		// (d16, An)
+		{
+			// FIXME: will fail for fmovem
+			uint32 ea = EA_AY_DI_32();
+			fpr = load_extended_float80(ea);
 	  	break;
-
 		}
+		case 6:     // (An) + (Xn) + d8
+		{
+			// FIXME: will fail for fmovem
+			uint32 ea = EA_AY_IX_32();
+			fpr = load_extended_float80(ea);
+			break;
+		}
+
 		case 7:	// extended modes
 		{
 			switch (reg)
 			{
+				case 1:     // (xxx)
+					{
+						uint32 d1 = OPER_I_16();
+						uint32 d2 = OPER_I_16();
+						uint32 ea = (d1 << 16) | d2;
+						fpr = load_extended_float80(ea);
+					}
+					break;
+
 				case 2:	// (d16, PC)
 					{
 						uint32 ea = EA_PCDI_32();
@@ -637,13 +750,15 @@ static floatx80 READ_EA_FPE(int mode, int reg, uint32 di_mode_ea)
 						fpr = load_extended_float80(ea);
 					}
 					break;
-	      		case 4: // immediate (JFF)
-				{
-				  uint32 ea = REG_PC;
-				  fpr = load_extended_float80(ea);
-				  REG_PC += 12;
-				}
-				break;
+
+				case 4: // immediate (JFF)
+					{
+						uint32 ea = REG_PC;
+						fpr = load_extended_float80(ea);
+						REG_PC += 12;
+					}
+					break;
+
 				default:
 					fatalerror("M68kFPU: READ_EA_FPE: unhandled mode %d, reg %d, at %08X\n", mode, reg, REG_PC);
 					break;
@@ -891,6 +1006,12 @@ static void WRITE_EA_32(int ea, uint32 data)
 		{
 			switch (reg)
 			{
+				case 0:     // (xxx).W
+				{
+					uint32 ea = OPER_I_16();
+					m68ki_write_32(ea, data);
+					break;
+				}
 				case 1:		// (xxx).L
 				{
 					uint32 d1 = OPER_I_16();
@@ -927,6 +1048,14 @@ static void WRITE_EA_64(int ea, uint64 data)
 			m68ki_write_32(ea+4, (uint32)(data));
 			break;
 		}
+		case 3:     // (An)+
+		{
+			uint32 ea = REG_A[reg];
+			REG_A[reg] += 8;
+			m68ki_write_32(ea+0, (uint32)(data >> 32));
+			m68ki_write_32(ea+4, (uint32)(data));
+			break;
+		}
 		case 4:		// -(An)
 		{
 			uint32 ea;
@@ -943,13 +1072,45 @@ static void WRITE_EA_64(int ea, uint64 data)
 			m68ki_write_32(ea+4, (uint32)(data));
 			break;
 		}
+		case 6:     // (An) + (Xn) + d8
+		{
+			uint32 ea = EA_AY_IX_32();
+			m68ki_write_32(ea+0, (uint32)(data >> 32));
+			m68ki_write_32(ea+4, (uint32)(data));
+			break;
+		}
+		case 7:
+		{
+			switch (reg)
+			{
+				case 1:     // (xxx).L
+				{
+					uint32 d1 = OPER_I_16();
+					uint32 d2 = OPER_I_16();
+					uint32 ea = (d1 << 16) | d2;
+					m68ki_write_32(ea+0, (uint32)(data >> 32));
+					m68ki_write_32(ea+4, (uint32)(data));
+					break;
+				}
+				case 2:     // (d16, PC)
+				{
+					uint32 ea = EA_PCDI_32();
+					m68ki_write_32(ea+0, (uint32)(data >> 32));
+					m68ki_write_32(ea+4, (uint32)(data));
+					break;
+				}
+				default:    fatalerror("M68kFPU: WRITE_EA_64: unhandled mode %d, reg %d at %08X\n", mode, reg, REG_PC);
+			}
+			break;
+		}
 		default:	fatalerror("M68kFPU: WRITE_EA_64: unhandled mode %d, reg %d, data %08X%08X at %08X\n", mode, reg, (uint32)(data >> 32), (uint32)(data), REG_PC);
 	}
 }
 
-static void WRITE_EA_FPE(int mode, int reg, floatx80 fpr, uint32 di_mode_ea)
+static void WRITE_EA_FPE(uint32 ea, floatx80 fpr)
 {
-
+	int mode = (ea >> 3) & 0x7;
+	int reg = (ea & 0x7);
 
 	switch (mode)
 	{
@@ -978,12 +1139,10 @@ static void WRITE_EA_FPE(int mode, int reg, floatx80 fpr, uint32 di_mode_ea)
 			store_extended_float80(ea, fpr);
 			break;
 		}
-    	  case 5:		// (d16, An)  (added by JFF)
+    	  case 5:		// (d16, An)
 		{
-		  // EA_AY_DI_32() should not be done here because fmovem would increase
-		  // PC each time, reading incorrect displacement & advancing PC too much
-		  // uint32 ea = EA_AY_DI_32();
-		  store_extended_float80(di_mode_ea, fpr);
+		  uint32 ea = EA_AY_DI_32();
+		  store_extended_float80(ea, fpr);
 	 	 break;
 
 		}
@@ -1074,11 +1233,8 @@ static void fpgen_rm_reg(uint16 w2)
 			}
 			case 2:		// Extended-precision Real
 			{
-	  	    	int imode = (ea >> 3) & 0x7;
-	  	    	int reg = (ea & 0x7);
-		      	uint32 di_mode_ea = imode == 5 ? (REG_A[reg]+MAKE_INT_16(m68ki_read_imm_16())) : 0;
-		      	source = READ_EA_FPE(imode,reg,di_mode_ea);
-			  	break;
+				source = READ_EA_FPE(ea);
+				break;
 			}
 			case 3:		// Packed-decimal Real
 			{
@@ -1158,6 +1314,59 @@ static void fpgen_rm_reg(uint16 w2)
 					case 0x34:	// 10^2
 						source = int32_to_floatx80((sint32)10*10);
 						break;
+					case 0x35:  // 10^4
+						source = int32_to_floatx80((sint32)1000*10);
+						break;
+
+					case 0x36:  // 1.0e8
+						source = int32_to_floatx80((sint32)10000000*10);
+						break;
+
+					case 0x37:  // 1.0e16 - can't get the right precision from s32 so go "direct" with constants from h/w
+						source.high = 0x4034;
+						source.low = U64(0x8e1bc9bf04000000);
+						break;
+
+					case 0x38:  // 1.0e32
+						source.high = 0x4069;
+						source.low = U64(0x9dc5ada82b70b59e);
+						break;
+
+					case 0x39:  // 1.0e64
+						source.high = 0x40d3;
+						source.low = U64(0xc2781f49ffcfa6d5);
+						break;
+
+					case 0x3a:  // 1.0e128
+						source.high = 0x41a8;
+						source.low = U64(0x93ba47c980e98ce0);
+						break;
+
+					case 0x3b:  // 1.0e256
+						source.high = 0x4351;
+						source.low = U64(0xaa7eebfb9df9de8e);
+						break;
+
+					case 0x3c:  // 1.0e512
+						source.high = 0x46a3;
+						source.low = U64(0xe319a0aea60e91c7);
+						break;
+
+					case 0x3d:  // 1.0e1024
+						source.high = 0x4d48;
+						source.low = U64(0xc976758681750c17);
+						break;
+
+					case 0x3e:  // 1.0e2048
+						source.high = 0x5a92;
+						source.low = U64(0x9e8b3b5dc53d5de5);
+						break;
+
+					case 0x3f:  // 1.0e4096
+						source.high = 0x7525;
+						source.low = U64(0xc46052028a20979b);
+						break;
+
 
 					default:
 						fatalerror("fmove_rm_reg: unknown constant ROM offset %x at %08x\n", w2&0x7f, REG_PC-4);
@@ -1166,7 +1375,8 @@ static void fpgen_rm_reg(uint16 w2)
 
 				// handle it right here, the usual opmode bits aren't valid in the FMOVECR case
 				REG_FP[dst] = source;
-	     		SET_CONDITION_CODES(REG_FP[dst]); // JFF when destination is a register, we HAVE to update FPCR
+				//FIXME mame doesn't use SET_CONDITION_CODES here
+				SET_CONDITION_CODES(REG_FP[dst]); // JFF when destination is a register, we HAVE to update FPCR
 				USE_CYCLES(4);
 				return;
 			}
@@ -1185,23 +1395,25 @@ static void fpgen_rm_reg(uint16 w2)
 		case 0x00:		// FMOVE
 		{
 			REG_FP[dst] = source;
-		    SET_CONDITION_CODES(REG_FP[dst]);  // JFF needs update condition codes
+			SET_CONDITION_CODES(REG_FP[dst]);
 			USE_CYCLES(4);
 			break;
 		}
-		case 0x01:		// Fsint
+		case 0x01:		// FINT
 		{
 			sint32 temp;
 			temp = floatx80_to_int32(source);
 			REG_FP[dst] = int32_to_floatx80(temp);
-	  		SET_CONDITION_CODES(REG_FP[dst]);  // JFF needs update condition codes
+			//FIXME mame doesn't use SET_CONDITION_CODES here
+			SET_CONDITION_CODES(REG_FP[dst]);  // JFF needs update condition codes
 			break;
 		}
-		case 0x03:		// FsintRZ
+		case 0x03:		// FINTRZ
 		{
 			sint32 temp;
 			temp = floatx80_to_int32_round_to_zero(source);
 			REG_FP[dst] = int32_to_floatx80(temp);
+			//FIXME mame doesn't use SET_CONDITION_CODES here
 			SET_CONDITION_CODES(REG_FP[dst]);  // JFF needs update condition codes
 			break;
 		}
@@ -1212,6 +1424,50 @@ static void fpgen_rm_reg(uint16 w2)
 			USE_CYCLES(109);
 			break;
 		}
+		case 0x06:      // FLOGNP1
+		{
+			REG_FP[dst] = floatx80_flognp1 (source);
+			SET_CONDITION_CODES(REG_FP[dst]);
+			USE_CYCLES(594); // for MC68881
+			break;
+		}
+		case 0x0e:      // FSIN
+		{
+			REG_FP[dst] = source;
+			floatx80_fsin(&REG_FP[dst]);
+			SET_CONDITION_CODES(REG_FP[dst]);
+			USE_CYCLES(75);
+			break;
+		}
+		case 0x0f:      // FTAN
+		{
+			REG_FP[dst] = source;
+			floatx80_ftan(&REG_FP[dst]);
+			SET_CONDITION_CODES(REG_FP[dst]);
+			USE_CYCLES(75);
+			break;
+		}
+		case 0x14:      // FLOGN
+		{
+			REG_FP[dst] = floatx80_flogn (source);
+			SET_CONDITION_CODES(REG_FP[dst]);
+			USE_CYCLES(548); // for MC68881
+			break;
+		}
+		case 0x15:      // FLOG10
+		{
+			REG_FP[dst] = floatx80_flog10 (source);
+			SET_CONDITION_CODES(REG_FP[dst]);
+			USE_CYCLES(604); // for MC68881
+			break;
+		}
+		case 0x16:      // FLOG2
+		{
+			REG_FP[dst] = floatx80_flog2 (source);
+			SET_CONDITION_CODES(REG_FP[dst]);
+			USE_CYCLES(604); // for MC68881
+			break;
+		}
 		case 0x18:		// FABS
 		{
 			REG_FP[dst] = source;
@@ -1228,24 +1484,44 @@ static void fpgen_rm_reg(uint16 w2)
 			USE_CYCLES(3);
 			break;
 		}
+		case 0x1d:      // FCOS
+		{
+			REG_FP[dst] = source;
+			floatx80_fcos(&REG_FP[dst]);
+			SET_CONDITION_CODES(REG_FP[dst]);
+			USE_CYCLES(75);
+			break;
+		}
 		case 0x1e:		// FGETEXP
 		{
-			sint16 temp;
-			temp = source.high;	// get the exponent
-			temp -= 0x3fff;	// take off the bias
-			REG_FP[dst] = double_to_fx80((double)temp);
+			sint16 temp2;
+
+			temp2 = source.high;	// get the exponent
+			temp2 -= 0x3fff;	// take off the bias
+			REG_FP[dst] = double_to_fx80((double)temp2);
 			SET_CONDITION_CODES(REG_FP[dst]);
 			USE_CYCLES(6);
 			break;
 		}
-  	    case 0x60:		// FSDIVS (JFF) (source has already been converted to floatx80)
+		case 0x60:		// FSDIVS (JFF) (source has already been converted to floatx80)
 		case 0x20:		// FDIV
 		{
 			REG_FP[dst] = floatx80_div(REG_FP[dst], source);
-		    SET_CONDITION_CODES(REG_FP[dst]); // JFF
+			//FIXME mame doesn't use SET_CONDITION_CODES here
+			SET_CONDITION_CODES(REG_FP[dst]); // JFF
 			USE_CYCLES(43);
 			break;
 		}
+		case 0x21:      // FMOD
+		{
+			sint8 const mode = float_rounding_mode;
+			float_rounding_mode = float_round_to_zero;
+			REG_FP[dst] = floatx80_rem(REG_FP[dst], source);
+			SET_CONDITION_CODES(REG_FP[dst]);
+			float_rounding_mode = mode;
+			USE_CYCLES(43);   // guess
+			break;
+		}
 		case 0x22:		// FADD
 		{
 			REG_FP[dst] = floatx80_add(REG_FP[dst], source);
@@ -1253,7 +1529,7 @@ static void fpgen_rm_reg(uint16 w2)
 			USE_CYCLES(9);
 			break;
 		}
-   		case 0x63:		// FSMULS (JFF) (source has already been converted to floatx80)
+		case 0x63:		// FSMULS (JFF) (source has already been converted to floatx80)
 		case 0x23:		// FMUL
 		{
 			REG_FP[dst] = floatx80_mul(REG_FP[dst], source);
@@ -1261,13 +1537,40 @@ static void fpgen_rm_reg(uint16 w2)
 			USE_CYCLES(11);
 			break;
 		}
+		case 0x24:      // FSGLDIV
+		{
+			float32 a = floatx80_to_float32( REG_FP[dst] );
+			float32 b = floatx80_to_float32( source );
+			REG_FP[dst] = float32_to_floatx80( float32_div(a, b) );
+			USE_CYCLES(43); //  // ? (value is from FDIV)
+			break;
+		}
 		case 0x25:		// FREM
 		{
+			sint8 const mode = float_rounding_mode;
+			float_rounding_mode = float_round_nearest_even;
 			REG_FP[dst] = floatx80_rem(REG_FP[dst], source);
 			SET_CONDITION_CODES(REG_FP[dst]);
+			float_rounding_mode = mode;
 			USE_CYCLES(43);	// guess
 			break;
 		}
+		case 0x26:      // FSCALE
+		{
+			REG_FP[dst] = floatx80_scale(REG_FP[dst], source);
+			SET_CONDITION_CODES(REG_FP[dst]);
+			USE_CYCLES(46);   // (better?) guess
+			break;
+		}
+		case 0x27:      // FSGLMUL
+		{
+			float32 a = floatx80_to_float32( REG_FP[dst] );
+			float32 b = floatx80_to_float32( source );
+			REG_FP[dst] = float32_to_floatx80( float32_mul(a, b) );
+			SET_CONDITION_CODES(REG_FP[dst]);
+			USE_CYCLES(11); // ? (value is from FMUL)
+			break;
+		}
 		case 0x28:		// FSUB
 		{
 			REG_FP[dst] = floatx80_sub(REG_FP[dst], source);
@@ -1319,10 +1622,7 @@ static void fmove_reg_mem(uint16 w2)
 		}
 		case 2:		// Extended-precision Real
 		{
-		  	int mode = (ea >> 3) & 0x7;
-		  	int reg = (ea & 0x7);
-		  	uint32 di_mode_ea = mode == 5 ? (REG_A[reg]+MAKE_INT_16(m68ki_read_imm_16())) : 0;
-			WRITE_EA_FPE(mode, reg, REG_FP[src], di_mode_ea);
+			WRITE_EA_FPE(ea, REG_FP[src]);
 			break;
 		}
 		case 3:		// Packed-decimal Real with Static K-factor
@@ -1334,7 +1634,12 @@ static void fmove_reg_mem(uint16 w2)
 		}
 		case 4:		// Word Integer
 		{
-			WRITE_EA_16(ea, (sint16)floatx80_to_int32(REG_FP[src]));
+			sint32 value = floatx80_to_int32(REG_FP[src]);
+			if (value > 0x7fff || value < -0x8000 )
+			{
+				REG_FPSR |= FPES_OE | FPAE_IOP;
+			}
+			WRITE_EA_16(ea, (sint16)value);
 			break;
 		}
 		case 5:		// Double-precision Real
@@ -1348,7 +1653,12 @@ static void fmove_reg_mem(uint16 w2)
 		}
 		case 6:		// Byte Integer
 		{
-			WRITE_EA_8(ea, (sint8)floatx80_to_int32(REG_FP[src]));
+			sint32 value = floatx80_to_int32(REG_FP[src]);
+			if (value > 127 || value < -128)
+			{
+				REG_FPSR |= FPES_OE | FPAE_IOP;
+			}
+			WRITE_EA_8(ea, (sint8)value);
 			break;
 		}
 		case 7:		// Packed-decimal Real with Dynamic K-factor
@@ -1365,24 +1675,97 @@ static void fmove_fpcr(uint16 w2)
 {
 	int ea = REG_IR & 0x3f;
 	int dir = (w2 >> 13) & 0x1;
-	int reg = (w2 >> 10) & 0x7;
+	int regsel = (w2 >> 10) & 0x7;
+	int mode = (ea >> 3) & 0x7;
+
+	if ((mode == 5) || (mode == 6))
+	{
+		uint32 address = 0xffffffff;    // force a bus error if this doesn't get assigned
+
+		if (mode == 5)
+		{
+			address = EA_AY_DI_32();
+		}
+		else if (mode == 6)
+		{
+			address = EA_AY_IX_32();
+		}
 
-	if (dir)	// From system control reg to <ea>
+		if (dir)	// From system control reg to <ea>
+		{
+			if (regsel & 4) { m68ki_write_32(address, REG_FPCR); address += 4; }
+			if (regsel & 2) { m68ki_write_32(address, REG_FPSR); address += 4; }
+			if (regsel & 1) { m68ki_write_32(address, REG_FPIAR); address += 4; }
+		}
+		else		// From <ea> to system control reg
+		{
+			if (regsel & 4) { REG_FPCR = m68ki_read_32(address); address += 4; }
+			if (regsel & 2) { REG_FPSR = m68ki_read_32(address); address += 4; }
+			if (regsel & 1) { REG_FPIAR = m68ki_read_32(address); address += 4; }
+		}
+	}
+	else
 	{
-		if (reg & 4) WRITE_EA_32(ea, REG_FPCR);
-		if (reg & 2) WRITE_EA_32(ea, REG_FPSR);
-		if (reg & 1) WRITE_EA_32(ea, REG_FPIAR);
+		if (dir)    // From system control reg to <ea>
+		{
+			if (regsel & 4) WRITE_EA_32(ea, REG_FPCR);
+			if (regsel & 2) WRITE_EA_32(ea, REG_FPSR);
+			if (regsel & 1) WRITE_EA_32(ea, REG_FPIAR);
+		}
+		else        // From <ea> to system control reg
+		{
+			if (regsel & 4) REG_FPCR = READ_EA_32(ea);
+			if (regsel & 2) REG_FPSR = READ_EA_32(ea);
+			if (regsel & 1) REG_FPIAR = READ_EA_32(ea);
+		}
 	}
-	else		// From <ea> to system control reg
+
+	// FIXME: (2011-12-18 ost)
+	// rounding_mode and rounding_precision of softfloat.c should be set according to current fpcr
+	// but:  with this code on Apollo the following programs in /systest/fptest will fail:
+	// 1. Single Precision Whetstone will return wrong results never the less
+	// 2. Vector Test will fault with 00040004: reference to illegal address
+
+	if ((regsel & 4) && dir == 0)
 	{
-      if (reg & 4) 
+		int rnd = (REG_FPCR >> 4) & 3;
+		int prec = (REG_FPCR >> 6) & 3;
+
+//      logerror("m68k_fpsp:fmove_fpcr fpcr=%04x prec=%d rnd=%d\n", m_fpcr, prec, rnd);
+
+#ifdef FLOATX80
+		switch (prec)
 		{
-		  REG_FPCR = READ_EA_32(ea);
-		  // JFF: need to update rounding mode from softfloat module
-		  float_rounding_mode = (REG_FPCR >> 4) & 0x3;
+		case 0: // Extend (X)
+			floatx80_rounding_precision = 80;
+			break;
+		case 1: // Single (S)
+			floatx80_rounding_precision = 32;
+			break;
+		case 2: // Double (D)
+			floatx80_rounding_precision = 64;
+			break;
+		case 3: // Undefined
+			floatx80_rounding_precision = 80;
+			break;
+		}
+#endif
+
+		switch (rnd)
+		{
+		case 0: // To Nearest (RN)
+			float_rounding_mode = float_round_nearest_even;
+			break;
+		case 1: // To Zero (RZ)
+			float_rounding_mode = float_round_to_zero;
+			break;
+		case 2: // To Minus Infinitiy (RM)
+			float_rounding_mode = float_round_down;
+			break;
+		case 3: // To Plus Infinitiy (RP)
+			float_rounding_mode = float_round_up;
+			break;
 		}
-		if (reg & 2) REG_FPSR = READ_EA_32(ea);
-		if (reg & 1) REG_FPIAR = READ_EA_32(ea);
 	}
 
 	USE_CYCLES(10);
@@ -1396,122 +1779,121 @@ static void fmovem(uint16 w2)
 	int mode = (w2 >> 11) & 0x3;
 	int reglist = w2 & 0xff;
 
+	uint32 mem_addr = 0;
+	switch (ea >> 3)
+	{
+		case 5:     // (d16, An)
+			mem_addr= EA_AY_DI_32();
+			break;
+		case 6:     // (An) + (Xn) + d8
+			mem_addr= EA_AY_IX_32();
+			break;
+	}
+
 	if (dir)	// From FP regs to mem
 	{
 		switch (mode)
-	{
-	  	case 2:		// (JFF): Static register list, postincrement or control addressing mode.     
-	    {
-	      int imode = (ea >> 3) & 0x7;
-	      int reg = (ea & 0x7);
-	      int di_mode = imode == 5;	      
-	      uint32 di_mode_ea = di_mode ? (REG_A[reg]+MAKE_INT_16(m68ki_read_imm_16())) : 0;
-	      for (i=0; i < 8; i++)
+		{
+			case 1: // Dynamic register list, postincrement or control addressing mode.
+				// FIXME: not really tested, but seems to work
+				reglist = REG_D[(reglist >> 4) & 7];
+				/* fall through */
+				/* no break */
+			case 0:     // Static register list, predecrement or control addressing mode
 			{
-			  if (reglist & (1 << i))
-			    {
-			      WRITE_EA_FPE(imode,reg, REG_FP[7-i],di_mode_ea);
-			      USE_CYCLES(2);
-			      if (di_mode)
+				for (i=0; i < 8; i++)
 				{
-				  di_mode_ea += 12;
+					if (reglist & (1 << i))
+					{
+						switch (ea >> 3)
+						{
+							case 5:     // (d16, An)
+							case 6:     // (An) + (Xn) + d8
+								store_extended_float80(mem_addr, REG_FP[i]);
+								mem_addr += 12;
+								break;
+							default:
+								WRITE_EA_FPE(ea, REG_FP[i]);
+								break;
+						}
+
+						USE_CYCLES(2);
+					}
 				}
-		    	}
+				break;
 			}
-	      break;
-	   	 }
-			case 0:		// Static register list, predecrement addressing mode
+
+			case 2:		// Static register list, postdecrement or control addressing mode.     
 			{
-	      int imode = (ea >> 3) & 0x7;
-	      int reg = (ea & 0x7);
-	      // the "di_mode_ea" parameter kludge is required here else WRITE_EA_FPE would have
-	      // to call EA_AY_DI_32() (that advances PC & reads displacement) each time
-	      // when the proper behaviour is 1) read once, 2) increment ea for each matching register
-	      // this forces to pre-read the mode (named "imode") so we can decide to read displacement, only once
-	      int di_mode = imode == 5;	      
-	      uint32 di_mode_ea =  di_mode ? (REG_A[reg]+MAKE_INT_16(m68ki_read_imm_16())) : 0;
 				for (i=0; i < 8; i++)
 				{
 					if (reglist & (1 << i))
 					{
-		 			    WRITE_EA_FPE(imode,reg, REG_FP[i],di_mode_ea);
-						USE_CYCLES(2);
-					    if (di_mode)
+						switch (ea >> 3)
 						{
-						  di_mode_ea += 12;
+							case 5:     // (d16, An)
+							case 6:     // (An) + (Xn) + d8
+								store_extended_float80(mem_addr, REG_FP[7-i]);
+								mem_addr += 12;
+								break;
+							default:
+								WRITE_EA_FPE(ea, REG_FP[7-i]);
+								break;
 						}
+
+						USE_CYCLES(2);
 					}
 				}
 				break;
 			}
 
-			default:	fatalerror("040fpu0: FMOVEM: mode %d unimplemented at %08X\n", mode, REG_PC-4);
+			default:	fatalerror("M680x0: FMOVEM: mode %d unimplemented at %08X\n", mode, REG_PC-4);
 		}
 	}
 	else		// From mem to FP regs
 	{
 		switch (mode)
 		{
-			case 2:		// Static register list, postincrement addressing mode
+			case 3: // Dynamic register list, predecrement addressing mode.
+				// FIXME: not really tested, but seems to work
+				reglist = REG_D[(reglist >> 4) & 7];
+				/* fall through */
+				/* no break */
+			case 2:		// Static register list, postincrement or control addressing mode
 			{
-		      int imode = (ea >> 3) & 0x7;
-		      int reg = (ea & 0x7);
-		      int di_mode = imode == 5;	    
-		      uint32 di_mode_ea = di_mode ? (REG_A[reg]+MAKE_INT_16(m68ki_read_imm_16())) : 0;
 				for (i=0; i < 8; i++)
 				{
 					if (reglist & (1 << i))
 					{
-		   			   	REG_FP[7-i] = READ_EA_FPE(imode,reg,di_mode_ea);
-						USE_CYCLES(2);
-					    if (di_mode)
+						switch (ea >> 3)
 						{
-						  di_mode_ea += 12;
+							case 5:     // (d16, An)
+							case 6:     // (An) + (Xn) + d8
+								REG_FP[7-i] = load_extended_float80(mem_addr);
+								mem_addr += 12;
+								break;
+							default:
+								REG_FP[7-i] = READ_EA_FPE(ea);
+								break;
 						}
+						USE_CYCLES(2);
 					}
 				}
 				break;
 			}
 
-			default:	fatalerror("040fpu0: FMOVEM: mode %d unimplemented at %08X\n", mode, REG_PC-4);
+			default:	fatalerror("M680x0: FMOVEM: mode %d unimplemented at %08X\n", mode, REG_PC-4);
 		}
 	}
 }
 
 static void fscc()
 {
-  // added by JFF, this seems to work properly now 
-  int condition = OPER_I_16() & 0x3f;
-
-  int cc = TEST_CONDITION(condition);
-  int mode = (REG_IR & 0x38) >> 3;
-  int v = (cc ? 0xff : 0x00);
-  
-  switch (mode)
-  {
-  case 0:  // fscc Dx
-    {
-      // If the specified floating-point condition is true, sets the byte integer operand at
-      // the destination to TRUE (all ones); otherwise, sets the byte to FALSE (all zeros).
-      
-      REG_D[REG_IR & 7] = (REG_D[REG_IR & 7] & 0xFFFFFF00) | v;
-      break;
-    }
-    case 5: // (disp,Ax)
-    {
-    int reg = REG_IR & 7;
-    uint32 ea = REG_A[reg]+MAKE_INT_16(m68ki_read_imm_16());
-    m68ki_write_8(ea,v);
-    break;
-    }
-    
-  default:
-    {
-      // unimplemented see fpu_uae.cpp around line 1300
-      fatalerror("040fpu0: fscc: mode %d not implemented at %08X\n", mode, REG_PC-4);
-    }
-    }
-  USE_CYCLES(7);  // JFF unsure of the number of cycles!!
+	int ea = REG_IR & 0x3f;
+	int condition = (sint16)(OPER_I_16());
+
+	WRITE_EA_8(ea, TEST_CONDITION(condition) ? 0xff : 0);
+	USE_CYCLES(7);  // ???
 }
 static void fbcc16(void)
 {
@@ -1528,7 +1910,7 @@ static void fbcc16(void)
 	}
 
 	USE_CYCLES(7);
-	}
+}
 
 static void fbcc32(void)
 {
@@ -1591,28 +1973,51 @@ void m68040_fpu_op0()
 			break;
 		}
 
-	    case 1:           // FScc (JFF)
+		case 1:           // FBcc disp16
 		{
-		  fscc();
-		  break;
+			switch ((REG_IR >> 3) & 0x7) {
+			case 1: // FDBcc
+				// TODO:
+				printf("M68kFPU: unimplemented FDBcc main op %d with mode %d at %08X\n", (REG_IR >> 6) & 0x3, (REG_IR >> 3) & 0x7, REG_PC-4);
+				break;
+			default: // FScc (?)
+				fscc();
+				return;
+			}
+			fatalerror("M68kFPU: unimplemented main op %d with mode %d at %08X\n", (REG_IR >> 6) & 0x3, (REG_IR >> 3) & 0x7, REG_PC-4);
+			break;
 		}
-		case 2:		// FBcc disp16
+		case 2:     // FBcc disp16
 		{
 			fbcc16();
 			break;
 		}
-		case 3:		// FBcc disp32
+		case 3:     // FBcc disp32
 		{
 			fbcc32();
 			break;
 		}
 
-      default:	fatalerror("M68kFPU: unimplemented main op %d at %08X\n", (m68ki_cpu.ir >> 6) & 0x3,  REG_PC-4);
+		default:    fatalerror("M68kFPU: unimplemented main op %d\n", (REG_IR >> 6) & 0x3);
 	}
 }
 
-static void perform_fsave(uint32 addr, int inc)
+static int perform_fsave(uint32 addr, int inc)
 {
+	if(m68ki_cpu.cpu_type & CPU_TYPE_040)
+	{
+		if(inc)
+		{
+			m68ki_write_32(addr, 0x41000000);
+			return 4 -4;
+		}
+		else
+		{
+			m68ki_write_32(addr, 0x41000000);
+			return -4 +4;
+		}
+	}
+
 	if (inc)
 	{
 		// 68881 IDLE, version 0x1f
@@ -1623,21 +2028,23 @@ static void perform_fsave(uint32 addr, int inc)
 		m68ki_write_32(addr+16, 0);
 		m68ki_write_32(addr+20, 0);
 		m68ki_write_32(addr+24, 0x70000000);
+		return 7*4 -4;
 	}
 	else
 	{
-		m68ki_write_32(addr, 0x70000000);
-		m68ki_write_32(addr-4, 0);
-		m68ki_write_32(addr-8, 0);
-		m68ki_write_32(addr-12, 0);
-		m68ki_write_32(addr-16, 0);
-		m68ki_write_32(addr-20, 0);
-		m68ki_write_32(addr-24, 0x1f180000);
+		m68ki_write_32(addr+4-4, 0x70000000);
+		m68ki_write_32(addr+4-8, 0);
+		m68ki_write_32(addr+4-12, 0);
+		m68ki_write_32(addr+4-16, 0);
+		m68ki_write_32(addr+4-20, 0);
+		m68ki_write_32(addr+4-24, 0);
+		m68ki_write_32(addr+4-28, 0x1f180000);
+		return -7*4 +4;
 	}
 }
 
 // FRESTORE on a NULL frame reboots the FPU - all registers to NaN, the 3 status regs to 0
-static void do_frestore_null()
+static void do_frestore_null(void)
 {
 	int i;
 
@@ -1655,12 +2062,64 @@ static void do_frestore_null()
 	m68ki_cpu.fpu_just_reset = 1;
 }
 
+void m68040_do_fsave(uint32 addr, int reg, int inc)
+{
+	if (m68ki_cpu.fpu_just_reset)
+	{
+		m68ki_write_32(addr, 0);
+	}
+	else
+	{
+		// we normally generate an IDLE frame
+		int delta = perform_fsave(addr, inc);
+		if(reg != -1)
+			REG_A[reg] += delta;
+	}
+}
+
+void m68040_do_frestore(uint32 addr, int reg)
+{
+	uint32 temp = m68ki_read_32(addr);
+	// check for nullptr frame
+	if (temp & 0xff000000)
+	{
+		// we don't handle non-nullptr frames
+		m68ki_cpu.fpu_just_reset = 0;
+
+		if (reg != -1)
+		{
+			uint8 m40 = !!(m68ki_cpu.cpu_type & CPU_TYPE_040);
+			// how about an IDLE frame?
+			if (!m40 && ((temp & 0x00ff0000) == 0x00180000))
+			{
+				REG_A[reg] += 7*4-4;
+			}
+			else if (m40 && ((temp & 0xffff0000) == 0x41000000))
+			{
+//				REG_A[reg] += 4;
+			} // check UNIMP
+			else if ((temp & 0x00ff0000) == 0x00380000)
+			{
+				REG_A[reg] += 14*4;
+			} // check BUSY
+			else if ((temp & 0x00ff0000) == 0x00b40000)
+			{
+				REG_A[reg] += 45*4;
+			}
+		}
+	}
+	else
+	{
+		do_frestore_null();
+	}
+}
+
 void m68040_fpu_op1()
 {
 	int ea = REG_IR & 0x3f;
 	int mode = (ea >> 3) & 0x7;
 	int reg = (ea & 0x7);
-	uint32 addr, temp;
+	uint32 addr;
 
 	switch ((REG_IR >> 6) & 0x3)
 	{
@@ -1668,40 +2127,56 @@ void m68040_fpu_op1()
 		{
 			switch (mode)
 			{
-				case 3:	// (An)+
-		    			addr = EA_AY_PI_32();
+				case 2: // (An)
+					addr = REG_A[reg];
+					m68040_do_fsave(addr, -1, 1);
+					break;
 
-					if (m68ki_cpu.fpu_just_reset)
-					{
-						m68ki_write_32(addr, 0);
-					}
-					else
-					{
-						// we normally generate an IDLE frame
-						REG_A[reg] += 6*4;
-						perform_fsave(addr, 1);
-					}
+				case 3:	// (An)+
+					addr = EA_AY_PI_32();
+					printf("FSAVE mode %d, reg A%d=0x%08x\n",mode,reg,REG_A[reg]);
+					m68040_do_fsave(addr, -1, 1); // FIXME: -1 was reg
 					break;
 
 				case 4: // -(An)
-		    			addr = EA_AY_PD_32();
+					addr = EA_AY_PD_32();
+					m68040_do_fsave(addr, reg, 0); // FIXME: -1 was reg
+					break;
+				case 5: // (D16, An)
+					addr = EA_AY_DI_16();
+					m68040_do_fsave(addr, -1, 1);
+					break;
 
-					if (m68ki_cpu.fpu_just_reset)
-					{
-						m68ki_write_32(addr, 0);
-					}
-					else
+				case 6: // (An) + (Xn) + d8
+					addr = EA_AY_IX_16();
+					m68040_do_fsave(addr, -1, 1);
+					break;
+
+				case 7: //
+				{
+					switch (reg)
 					{
-						// we normally generate an IDLE frame
-						REG_A[reg] -= 6*4;
-						perform_fsave(addr, 0);
+						case 1:     // (abs32)
+						{
+							addr = EA_AL_32();
+							m68040_do_fsave(addr, -1, 1);
+							break;
+						}
+						case 2:     // (d16, PC)
+						{
+							addr = EA_PCDI_16();
+							m68040_do_fsave(addr, -1, 1);
+							break;
+						}
+						default:
+							fatalerror("M68kFPU: FSAVE unhandled mode %d reg %d at %x\n", mode, reg, REG_PC);
 					}
-					break;
+				}
+				break;
 
 				default:
 					fatalerror("M68kFPU: FSAVE unhandled mode %d reg %d at %x\n", mode, reg, REG_PC);
 			}
-			break;
 		}
 		break;
 
@@ -1711,59 +2186,80 @@ void m68040_fpu_op1()
 			{
 				case 2: // (An)
 					addr = REG_A[reg];
-					temp = m68ki_read_32(addr);
-
-					// check for NULL frame
-					if (temp & 0xff000000)
-					{
-						// we don't handle non-NULL frames and there's no pre/post inc/dec to do here
-						m68ki_cpu.fpu_just_reset = 0;
-					}
-					else
-					{
-						do_frestore_null();
-					}
+					m68040_do_frestore(addr, -1);
 					break;
 
 				case 3:	// (An)+
-		    			addr = EA_AY_PI_32();
-					temp = m68ki_read_32(addr);
+					addr = EA_AY_PI_32();
+					m68040_do_frestore(addr, reg);
+					break;
 
-					// check for NULL frame
-					if (temp & 0xff000000)
-					{
-						m68ki_cpu.fpu_just_reset = 0;
+				case 5: // (D16, An)
+					addr = EA_AY_DI_16();
+					m68040_do_frestore(addr, -1);
+					break;
 
-						// how about an IDLE frame?
-						if ((temp & 0x00ff0000) == 0x00180000)
-						{
-							REG_A[reg] += 6*4;
-						} // check UNIMP
-						else if ((temp & 0x00ff0000) == 0x00380000)
+				case 6: // (An) + (Xn) + d8
+					addr = EA_AY_IX_16();
+					m68040_do_frestore(addr, -1);
+					break;
+
+				case 7: //
+				{
+					switch (reg)
+					{
+						case 1:     // (abs32)
 						{
-							REG_A[reg] += 14*4;
-						} // check BUSY
-						else if ((temp & 0x00ff0000) == 0x00b40000)
+							addr = EA_AL_32();
+							m68040_do_frestore(addr, -1);
+							break;
+						}
+						case 2:     // (d16, PC)
 						{
-							REG_A[reg] += 45*4;
+							addr = EA_PCDI_16();
+							m68040_do_frestore(addr, -1);
+							break;
 						}
+						default:
+							fatalerror("M68kFPU: FRESTORE unhandled mode %d reg %d at %x\n", mode, reg, REG_PC);
 					}
-					else
-					{
-						do_frestore_null();
-					}
-					break;
+				}
+				break;
 
 				default:
 					fatalerror("M68kFPU: FRESTORE unhandled mode %d reg %d at %x\n", mode, reg, REG_PC);
 			}
-			break;
 		}
 		break;
 
-		default:	fatalerror("m68040_fpu_op1: unimplemented op %d at %08X\n", (REG_IR >> 6) & 0x3, REG_PC-2);
+		default:    fatalerror("m68040_fpu_op1: unimplemented op %d at %08X\n", (REG_IR >> 6) & 0x3, REG_PC-2);
 	}
 }
 
+void m68881_ftrap()
+{
+	uint16 w2  = OPER_I_16();
+
+	// now check the condition
+	if (TEST_CONDITION(w2 & 0x3f))
+	{
+		// trap here
+		m68ki_exception_trap(EXCEPTION_TRAPV);
+	}
+	else    // fall through, requires eating the operand
+	{
+		switch (REG_IR & 0x7)
+		{
+			case 2: // word operand
+				OPER_I_16();
+				break;
 
+			case 3: // long word operand
+				OPER_I_32();
+				break;
 
+			case 4: // no operand
+				break;
+		}
+	}
+}
diff --git a/softfloat/fpu_constant.h b/softfloat/fpu_constant.h
new file mode 100644
index 0000000..fdd9719
--- /dev/null
+++ b/softfloat/fpu_constant.h
@@ -0,0 +1,80 @@
+/*============================================================================
+This source file is an extension to the SoftFloat IEC/IEEE Floating-point
+Arithmetic Package, Release 2b, written for Bochs (x86 achitecture simulator)
+floating point emulation.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+=============================================================================*/
+
+#ifndef _FPU_CONSTANTS_H_
+#define _FPU_CONSTANTS_H_
+
+// Pentium CPU uses only 68-bit precision M_PI approximation
+#define BETTER_THAN_PENTIUM
+
+/*============================================================================
+ * Written for Bochs (x86 achitecture simulator) by
+ *            Stanislav Shwartsman [sshwarts at sourceforge net]
+ * ==========================================================================*/
+
+//////////////////////////////
+// PI, PI/2, PI/4 constants
+//////////////////////////////
+
+#define FLOATX80_PI_EXP  (0x4000)
+
+// 128-bit PI fraction
+#ifdef BETTER_THAN_PENTIUM
+#define FLOAT_PI_HI (0xc90fdaa22168c234U)
+#define FLOAT_PI_LO (0xc4c6628b80dc1cd1U)
+#else
+#define FLOAT_PI_HI (0xc90fdaa22168c234U)
+#define FLOAT_PI_LO (0xC000000000000000U)
+#endif
+
+#define FLOATX80_PI2_EXP  (0x3FFF)
+#define FLOATX80_PI4_EXP  (0x3FFE)
+
+//////////////////////////////
+// 3PI/4 constant
+//////////////////////////////
+
+#define FLOATX80_3PI4_EXP (0x4000)
+
+// 128-bit 3PI/4 fraction
+#ifdef BETTER_THAN_PENTIUM
+#define FLOAT_3PI4_HI (0x96cbe3f9990e91a7U)
+#define FLOAT_3PI4_LO (0x9394c9e8a0a5159cU)
+#else
+#define FLOAT_3PI4_HI (0x96cbe3f9990e91a7U)
+#define FLOAT_3PI4_LO (0x9000000000000000U)
+#endif
+
+//////////////////////////////
+// 1/LN2 constant
+//////////////////////////////
+
+#define FLOAT_LN2INV_EXP  (0x3FFF)
+
+// 128-bit 1/LN2 fraction
+#ifdef BETTER_THAN_PENTIUM
+#define FLOAT_LN2INV_HI (0xb8aa3b295c17f0bbU)
+#define FLOAT_LN2INV_LO (0xbe87fed0691d3e89U)
+#else
+#define FLOAT_LN2INV_HI (0xb8aa3b295c17f0bbU)
+#define FLOAT_LN2INV_LO (0xC000000000000000U)
+#endif
+
+#endif
diff --git a/softfloat/fsincos.c b/softfloat/fsincos.c
new file mode 100644
index 0000000..764abd0
--- /dev/null
+++ b/softfloat/fsincos.c
@@ -0,0 +1,647 @@
+/*============================================================================
+This source file is an extension to the SoftFloat IEC/IEEE Floating-point
+Arithmetic Package, Release 2b, written for Bochs (x86 achitecture simulator)
+floating point emulation.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+=============================================================================*/
+
+/*============================================================================
+ * Written for Bochs (x86 achitecture simulator) by
+ *            Stanislav Shwartsman [sshwarts at sourceforge net]
+ * ==========================================================================*/
+
+#define FLOAT128
+
+#define USE_estimateDiv128To64
+
+#include "m68kcpu.h" // which includes softfloat.h after defining the basic types
+#include "assert.h"
+
+//#include "softfloat-specialize"
+#include "fpu_constant.h"
+
+#define floatx80_one packFloatx80(0, 0x3fff, 0x8000000000000000U)
+#define floatx80_default_nan packFloatx80(0, 0xffff, 0xffffffffffffffffU)
+
+#define packFloat2x128m(zHi, zLo) {(zHi), (zLo)}
+#define PACK_FLOAT_128(hi,lo) packFloat2x128m(LIT64(hi),LIT64(lo))
+
+#define EXP_BIAS 0x3FFF
+
+/*----------------------------------------------------------------------------
+| Returns the fraction bits of the extended double-precision floating-point
+| value `a'.
+*----------------------------------------------------------------------------*/
+
+static inline bits64 extractFloatx80Frac( floatx80 a )
+{
+	return a.low;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the exponent bits of the extended double-precision floating-point
+| value `a'.
+*----------------------------------------------------------------------------*/
+
+static inline int32 extractFloatx80Exp( floatx80 a )
+{
+	return a.high & 0x7FFF;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the sign bit of the extended double-precision floating-point value
+| `a'.
+*----------------------------------------------------------------------------*/
+
+static inline flag extractFloatx80Sign( floatx80 a )
+{
+	return a.high>>15;
+
+}
+
+/*----------------------------------------------------------------------------
+| Takes extended double-precision floating-point  NaN  `a' and returns the
+| appropriate NaN result. If `a' is a signaling NaN, the invalid exception
+| is raised.
+*----------------------------------------------------------------------------*/
+
+static inline floatx80 propagateFloatx80NaNOneArg(floatx80 a)
+{
+	if (floatx80_is_signaling_nan(a))
+		float_raise(float_flag_invalid);
+
+	a.low |= 0xC000000000000000U;
+
+	return a;
+}
+
+/*----------------------------------------------------------------------------
+| Normalizes the subnormal extended double-precision floating-point value
+| represented by the denormalized significand `aSig'.  The normalized exponent
+| and significand are stored at the locations pointed to by `zExpPtr' and
+| `zSigPtr', respectively.
+*----------------------------------------------------------------------------*/
+
+void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr, uint64_t *zSigPtr)
+{
+	int shiftCount = countLeadingZeros64(aSig);
+	*zSigPtr = aSig<<shiftCount;
+	*zExpPtr = 1 - shiftCount;
+}
+
+/* reduce trigonometric function argument using 128-bit precision
+   M_PI approximation */
+static uint64_t argument_reduction_kernel(uint64_t aSig0, int Exp, uint64_t *zSig0, uint64_t *zSig1)
+{
+	uint64_t term0, term1, term2;
+	uint64_t aSig1 = 0;
+
+	shortShift128Left(aSig1, aSig0, Exp, &aSig1, &aSig0);
+	uint64_t q = estimateDiv128To64(aSig1, aSig0, FLOAT_PI_HI);
+	mul128By64To192(FLOAT_PI_HI, FLOAT_PI_LO, q, &term0, &term1, &term2);
+	sub128(aSig1, aSig0, term0, term1, zSig1, zSig0);
+	while ((int64_t)(*zSig1) < 0) {
+		--q;
+		add192(*zSig1, *zSig0, term2, 0, FLOAT_PI_HI, FLOAT_PI_LO, zSig1, zSig0, &term2);
+	}
+	*zSig1 = term2;
+	return q;
+}
+
+static int reduce_trig_arg(int expDiff, int zSign, uint64_t aSig0, uint64_t aSig1)
+{
+	uint64_t term0, term1, q = 0;
+
+	if (expDiff < 0) {
+		shift128Right(aSig0, 0, 1, &aSig0, &aSig1);
+		expDiff = 0;
+	}
+	if (expDiff > 0) {
+		q = argument_reduction_kernel(aSig0, expDiff, &aSig0, &aSig1);
+	}
+	else {
+		if (FLOAT_PI_HI <= aSig0) {
+			aSig0 -= FLOAT_PI_HI;
+			q = 1;
+		}
+	}
+
+	shift128Right(FLOAT_PI_HI, FLOAT_PI_LO, 1, &term0, &term1);
+	if (! lt128(aSig0, aSig1, term0, term1))
+	{
+		int lt = lt128(term0, term1, aSig0, aSig1);
+		int eq = eq128(aSig0, aSig1, term0, term1);
+
+		if ((eq && (q & 1)) || lt) {
+			zSign = !zSign;
+			++q;
+		}
+		if (lt) sub128(FLOAT_PI_HI, FLOAT_PI_LO, aSig0, aSig1, &aSig0, &aSig1);
+	}
+
+	return (int)(q & 3);
+}
+
+#define SIN_ARR_SIZE 11
+#define COS_ARR_SIZE 11
+
+static float128 sin_arr[SIN_ARR_SIZE] =
+{
+	PACK_FLOAT_128(0x3fff000000000000, 0x0000000000000000), /*  1 */
+	PACK_FLOAT_128(0xbffc555555555555, 0x5555555555555555), /*  3 */
+	PACK_FLOAT_128(0x3ff8111111111111, 0x1111111111111111), /*  5 */
+	PACK_FLOAT_128(0xbff2a01a01a01a01, 0xa01a01a01a01a01a), /*  7 */
+	PACK_FLOAT_128(0x3fec71de3a556c73, 0x38faac1c88e50017), /*  9 */
+	PACK_FLOAT_128(0xbfe5ae64567f544e, 0x38fe747e4b837dc7), /* 11 */
+	PACK_FLOAT_128(0x3fde6124613a86d0, 0x97ca38331d23af68), /* 13 */
+	PACK_FLOAT_128(0xbfd6ae7f3e733b81, 0xf11d8656b0ee8cb0), /* 15 */
+	PACK_FLOAT_128(0x3fce952c77030ad4, 0xa6b2605197771b00), /* 17 */
+	PACK_FLOAT_128(0xbfc62f49b4681415, 0x724ca1ec3b7b9675), /* 19 */
+	PACK_FLOAT_128(0x3fbd71b8ef6dcf57, 0x18bef146fcee6e45)  /* 21 */
+};
+
+static float128 cos_arr[COS_ARR_SIZE] =
+{
+	PACK_FLOAT_128(0x3fff000000000000, 0x0000000000000000), /*  0 */
+	PACK_FLOAT_128(0xbffe000000000000, 0x0000000000000000), /*  2 */
+	PACK_FLOAT_128(0x3ffa555555555555, 0x5555555555555555), /*  4 */
+	PACK_FLOAT_128(0xbff56c16c16c16c1, 0x6c16c16c16c16c17), /*  6 */
+	PACK_FLOAT_128(0x3fefa01a01a01a01, 0xa01a01a01a01a01a), /*  8 */
+	PACK_FLOAT_128(0xbfe927e4fb7789f5, 0xc72ef016d3ea6679), /* 10 */
+	PACK_FLOAT_128(0x3fe21eed8eff8d89, 0x7b544da987acfe85), /* 12 */
+	PACK_FLOAT_128(0xbfda93974a8c07c9, 0xd20badf145dfa3e5), /* 14 */
+	PACK_FLOAT_128(0x3fd2ae7f3e733b81, 0xf11d8656b0ee8cb0), /* 16 */
+	PACK_FLOAT_128(0xbfca6827863b97d9, 0x77bb004886a2c2ab), /* 18 */
+	PACK_FLOAT_128(0x3fc1e542ba402022, 0x507a9cad2bf8f0bb)  /* 20 */
+};
+
+extern float128 OddPoly (float128 x, float128 *arr, unsigned n);
+
+/* 0 <= x <= pi/4 */
+static inline float128 poly_sin(float128 x)
+{
+	//                 3     5     7     9     11     13     15
+	//                x     x     x     x     x      x      x
+	// sin (x) ~ x - --- + --- - --- + --- - ---- + ---- - ---- =
+	//                3!    5!    7!    9!    11!    13!    15!
+	//
+	//                 2     4     6     8     10     12     14
+	//                x     x     x     x     x      x      x
+	//   = x * [ 1 - --- + --- - --- + --- - ---- + ---- - ---- ] =
+	//                3!    5!    7!    9!    11!    13!    15!
+	//
+	//           3                          3
+	//          --       4k                --        4k+2
+	//   p(x) = >  C  * x   > 0     q(x) = >  C   * x     < 0
+	//          --  2k                     --  2k+1
+	//          k=0                        k=0
+	//
+	//                          2
+	//   sin(x) ~ x * [ p(x) + x * q(x) ]
+	//
+
+	return OddPoly(x, sin_arr, SIN_ARR_SIZE);
+}
+
+extern float128 EvenPoly(float128 x, float128 *arr, unsigned n);
+
+/* 0 <= x <= pi/4 */
+static inline float128 poly_cos(float128 x)
+{
+	//                 2     4     6     8     10     12     14
+	//                x     x     x     x     x      x      x
+	// cos (x) ~ 1 - --- + --- - --- + --- - ---- + ---- - ----
+	//                2!    4!    6!    8!    10!    12!    14!
+	//
+	//           3                          3
+	//          --       4k                --        4k+2
+	//   p(x) = >  C  * x   > 0     q(x) = >  C   * x     < 0
+	//          --  2k                     --  2k+1
+	//          k=0                        k=0
+	//
+	//                      2
+	//   cos(x) ~ [ p(x) + x * q(x) ]
+	//
+
+	return EvenPoly(x, cos_arr, COS_ARR_SIZE);
+}
+
+static inline void sincos_invalid(floatx80 *sin_a, floatx80 *cos_a, floatx80 a)
+{
+	if (sin_a) *sin_a = a;
+	if (cos_a) *cos_a = a;
+}
+
+static inline void sincos_tiny_argument(floatx80 *sin_a, floatx80 *cos_a, floatx80 a)
+{
+	if (sin_a) *sin_a = a;
+	if (cos_a) *cos_a = floatx80_one;
+}
+
+static floatx80 sincos_approximation(int neg, float128 r, uint64_t quotient)
+{
+	if (quotient & 0x1) {
+		r = poly_cos(r);
+		neg = 0;
+	} else  {
+		r = poly_sin(r);
+	}
+
+	floatx80 result = float128_to_floatx80(r);
+	if (quotient & 0x2)
+		neg = ! neg;
+
+	if (neg)
+		result = floatx80_chs(result);
+
+	return result;
+}
+
+// =================================================
+// SFFSINCOS               Compute sin(x) and cos(x)
+// =================================================
+
+//
+// Uses the following identities:
+// ----------------------------------------------------------
+//
+//  sin(-x) = -sin(x)
+//  cos(-x) =  cos(x)
+//
+//  sin(x+y) = sin(x)*cos(y)+cos(x)*sin(y)
+//  cos(x+y) = sin(x)*sin(y)+cos(x)*cos(y)
+//
+//  sin(x+ pi/2)  =  cos(x)
+//  sin(x+ pi)    = -sin(x)
+//  sin(x+3pi/2)  = -cos(x)
+//  sin(x+2pi)    =  sin(x)
+//
+
+int sf_fsincos(floatx80 a, floatx80 *sin_a, floatx80 *cos_a)
+{
+	uint64_t aSig0, aSig1 = 0;
+	int32_t aExp, zExp, expDiff;
+	int aSign, zSign;
+	int q = 0;
+
+	aSig0 = extractFloatx80Frac(a);
+	aExp = extractFloatx80Exp(a);
+	aSign = extractFloatx80Sign(a);
+
+	/* invalid argument */
+	if (aExp == 0x7FFF) {
+		if ((uint64_t) (aSig0<<1)) {
+			sincos_invalid(sin_a, cos_a, propagateFloatx80NaNOneArg(a));
+			return 0;
+		}
+
+		float_raise(float_flag_invalid);
+		sincos_invalid(sin_a, cos_a, floatx80_default_nan);
+		return 0;
+	}
+
+	if (aExp == 0) {
+		if (aSig0 == 0) {
+			sincos_tiny_argument(sin_a, cos_a, a);
+			return 0;
+		}
+
+//        float_raise(float_flag_denormal);
+
+		/* handle pseudo denormals */
+		if (! (aSig0 & 0x8000000000000000U))
+		{
+			float_raise(float_flag_inexact);
+			if (sin_a)
+				float_raise(float_flag_underflow);
+			sincos_tiny_argument(sin_a, cos_a, a);
+			return 0;
+		}
+
+		normalizeFloatx80Subnormal(aSig0, &aExp, &aSig0);
+	}
+
+	zSign = aSign;
+	zExp = EXP_BIAS;
+	expDiff = aExp - zExp;
+
+	/* argument is out-of-range */
+	if (expDiff >= 63)
+		return -1;
+
+	float_raise(float_flag_inexact);
+
+	if (expDiff < -1) {    // doesn't require reduction
+		if (expDiff <= -68) {
+			a = packFloatx80(aSign, aExp, aSig0);
+			sincos_tiny_argument(sin_a, cos_a, a);
+			return 0;
+		}
+		zExp = aExp;
+	}
+	else {
+		q = reduce_trig_arg(expDiff, zSign, aSig0, aSig1);
+	}
+
+	/* **************************** */
+	/* argument reduction completed */
+	/* **************************** */
+
+	/* using float128 for approximation */
+	float128 r = normalizeRoundAndPackFloat128(0, zExp-0x10, aSig0, aSig1);
+
+	if (aSign) q = -q;
+	if (sin_a) *sin_a = sincos_approximation(zSign, r,   q);
+	if (cos_a) *cos_a = sincos_approximation(zSign, r, q+1);
+
+	return 0;
+}
+
+int floatx80_fsin(floatx80 *a)
+{
+	return sf_fsincos(*a, a, 0);
+}
+
+int floatx80_fcos(floatx80 *a)
+{
+	return sf_fsincos(*a, 0, a);
+}
+
+// =================================================
+// FPTAN                 Compute tan(x)
+// =================================================
+
+//
+// Uses the following identities:
+//
+// 1. ----------------------------------------------------------
+//
+//  sin(-x) = -sin(x)
+//  cos(-x) =  cos(x)
+//
+//  sin(x+y) = sin(x)*cos(y)+cos(x)*sin(y)
+//  cos(x+y) = sin(x)*sin(y)+cos(x)*cos(y)
+//
+//  sin(x+ pi/2)  =  cos(x)
+//  sin(x+ pi)    = -sin(x)
+//  sin(x+3pi/2)  = -cos(x)
+//  sin(x+2pi)    =  sin(x)
+//
+// 2. ----------------------------------------------------------
+//
+//           sin(x)
+//  tan(x) = ------
+//           cos(x)
+//
+
+int floatx80_ftan(floatx80 *a)
+{
+	uint64_t aSig0, aSig1 = 0;
+	int32_t aExp, zExp, expDiff;
+	int aSign, zSign;
+	int q = 0;
+
+	aSig0 = extractFloatx80Frac(*a);
+	aExp = extractFloatx80Exp(*a);
+	aSign = extractFloatx80Sign(*a);
+
+	/* invalid argument */
+	if (aExp == 0x7FFF) {
+		if ((uint64_t) (aSig0<<1))
+		{
+			*a = propagateFloatx80NaNOneArg(*a);
+			return 0;
+		}
+
+		float_raise(float_flag_invalid);
+		*a = floatx80_default_nan;
+		return 0;
+	}
+
+	if (aExp == 0) {
+		if (aSig0 == 0) return 0;
+//        float_raise(float_flag_denormal);
+		/* handle pseudo denormals */
+		if (! (aSig0 & 0x8000000000000000U))
+		{
+			float_raise(float_flag_inexact | float_flag_underflow);
+			return 0;
+		}
+		normalizeFloatx80Subnormal(aSig0, &aExp, &aSig0);
+	}
+
+	zSign = aSign;
+	zExp = EXP_BIAS;
+	expDiff = aExp - zExp;
+
+	/* argument is out-of-range */
+	if (expDiff >= 63)
+		return -1;
+
+	float_raise(float_flag_inexact);
+
+	if (expDiff < -1) {    // doesn't require reduction
+		if (expDiff <= -68) {
+			*a = packFloatx80(aSign, aExp, aSig0);
+			return 0;
+		}
+		zExp = aExp;
+	}
+	else {
+		q = reduce_trig_arg(expDiff, zSign, aSig0, aSig1);
+	}
+
+	/* **************************** */
+	/* argument reduction completed */
+	/* **************************** */
+
+	/* using float128 for approximation */
+	float128 r = normalizeRoundAndPackFloat128(0, zExp-0x10, aSig0, aSig1);
+
+	float128 sin_r = poly_sin(r);
+	float128 cos_r = poly_cos(r);
+
+	if (q & 0x1) {
+		r = float128_div(cos_r, sin_r);
+		zSign = ! zSign;
+	} else {
+		r = float128_div(sin_r, cos_r);
+	}
+
+	*a = float128_to_floatx80(r);
+	if (zSign)
+		*a = floatx80_chs(*a);
+
+	return 0;
+}
+
+//                            2         3         4               n
+// f(x) ~ C + (C * x) + (C * x) + (C * x) + (C * x) + ... + (C * x)
+//         0    1         2         3         4               n
+//
+//          --       2k                --        2k+1
+//   p(x) = >  C  * x           q(x) = >  C   * x
+//          --  2k                     --  2k+1
+//
+//   f(x) ~ [ p(x) + x * q(x) ]
+//
+
+float128 EvalPoly(float128 x, float128 *arr, unsigned n)
+{
+	float128 x2 = float128_mul(x, x);
+	unsigned i;
+
+	assert(n > 1);
+
+	float128 r1 = arr[--n];
+	i = n;
+	while(i >= 2) {
+		r1 = float128_mul(r1, x2);
+		i -= 2;
+		r1 = float128_add(r1, arr[i]);
+	}
+	if (i) r1 = float128_mul(r1, x);
+
+	float128 r2 = arr[--n];
+	i = n;
+	while(i >= 2) {
+		r2 = float128_mul(r2, x2);
+		i -= 2;
+		r2 = float128_add(r2, arr[i]);
+	}
+	if (i) r2 = float128_mul(r2, x);
+
+	return float128_add(r1, r2);
+}
+
+//                  2         4         6         8               2n
+// f(x) ~ C + (C * x) + (C * x) + (C * x) + (C * x) + ... + (C * x)
+//         0    1         2         3         4               n
+//
+//          --       4k                --        4k+2
+//   p(x) = >  C  * x           q(x) = >  C   * x
+//          --  2k                     --  2k+1
+//
+//                    2
+//   f(x) ~ [ p(x) + x * q(x) ]
+//
+
+float128 EvenPoly(float128 x, float128 *arr, unsigned n)
+{
+		return EvalPoly(float128_mul(x, x), arr, n);
+}
+
+//                        3         5         7         9               2n+1
+// f(x) ~ (C * x) + (C * x) + (C * x) + (C * x) + (C * x) + ... + (C * x)
+//          0         1         2         3         4               n
+//                        2         4         6         8               2n
+//      = x * [ C + (C * x) + (C * x) + (C * x) + (C * x) + ... + (C * x)
+//               0    1         2         3         4               n
+//
+//          --       4k                --        4k+2
+//   p(x) = >  C  * x           q(x) = >  C   * x
+//          --  2k                     --  2k+1
+//
+//                        2
+//   f(x) ~ x * [ p(x) + x * q(x) ]
+//
+
+float128 OddPoly(float128 x, float128 *arr, unsigned n)
+{
+		return float128_mul(x, EvenPoly(x, arr, n));
+}
+
+/*----------------------------------------------------------------------------
+| Scales extended double-precision floating-point value in operand `a' by
+| value `b'. The function truncates the value in the second operand 'b' to
+| an integral value and adds that value to the exponent of the operand 'a'.
+| The operation performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+extern floatx80 propagateFloatx80NaN( floatx80 a, floatx80 b );
+
+floatx80 floatx80_scale(floatx80 a, floatx80 b)
+{
+	sbits32 aExp, bExp;
+	bits64 aSig, bSig;
+
+	// handle unsupported extended double-precision floating encodings
+/*    if (floatx80_is_unsupported(a) || floatx80_is_unsupported(b))
+    {
+        float_raise(float_flag_invalid);
+        return floatx80_default_nan;
+    }*/
+
+	aSig = extractFloatx80Frac(a);
+	aExp = extractFloatx80Exp(a);
+	int aSign = extractFloatx80Sign(a);
+	bSig = extractFloatx80Frac(b);
+	bExp = extractFloatx80Exp(b);
+	int bSign = extractFloatx80Sign(b);
+
+	if (aExp == 0x7FFF) {
+		if ((bits64) (aSig<<1) || ((bExp == 0x7FFF) && (bits64) (bSig<<1)))
+		{
+			return propagateFloatx80NaN(a, b);
+		}
+		if ((bExp == 0x7FFF) && bSign) {
+			float_raise(float_flag_invalid);
+			return floatx80_default_nan;
+		}
+		if (bSig && (bExp == 0)) float_raise(float_flag_denormal);
+		return a;
+	}
+	if (bExp == 0x7FFF) {
+		if ((bits64) (bSig<<1)) return propagateFloatx80NaN(a, b);
+		if ((aExp | aSig) == 0) {
+			if (! bSign) {
+				float_raise(float_flag_invalid);
+				return floatx80_default_nan;
+			}
+			return a;
+		}
+		if (aSig && (aExp == 0)) float_raise(float_flag_denormal);
+		if (bSign) return packFloatx80(aSign, 0, 0);
+		return packFloatx80(aSign, 0x7FFF, 0x8000000000000000U);
+	}
+	if (aExp == 0) {
+		if (aSig == 0) return a;
+		float_raise(float_flag_denormal);
+		normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
+	}
+	if (bExp == 0) {
+		if (bSig == 0) return a;
+		float_raise(float_flag_denormal);
+		normalizeFloatx80Subnormal(bSig, &bExp, &bSig);
+	}
+
+	if (bExp > 0x400E) {
+		/* generate appropriate overflow/underflow */
+		return roundAndPackFloatx80(80, aSign,
+							bSign ? -0x3FFF : 0x7FFF, aSig, 0);
+	}
+	if (bExp < 0x3FFF) return a;
+
+	int shiftCount = 0x403E - bExp;
+	bSig >>= shiftCount;
+	sbits32 scale = bSig;
+	if (bSign) scale = -scale; /* -32768..32767 */
+	return
+		roundAndPackFloatx80(80, aSign, aExp+scale, aSig, 0);
+}
diff --git a/softfloat/fyl2x.c b/softfloat/fyl2x.c
new file mode 100644
index 0000000..c7e0d45
--- /dev/null
+++ b/softfloat/fyl2x.c
@@ -0,0 +1,487 @@
+/*============================================================================
+This source file is an extension to the SoftFloat IEC/IEEE Floating-point
+Arithmetic Package, Release 2b, written for Bochs (x86 achitecture simulator)
+floating point emulation.
+float_raise(float_flag_invalid)
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+=============================================================================*/
+
+/*============================================================================
+ * Written for Bochs (x86 achitecture simulator) by
+ *            Stanislav Shwartsman [sshwarts at sourceforge net]
+ * Adapted for lib/softfloat in MESS by Hans Ostermeyer (03/2012)
+ * ==========================================================================*/
+
+#define FLOAT128
+
+#define USE_estimateDiv128To64
+
+#include "m68kcpu.h" // which includes softfloat.h after defining the basic types
+
+//#include "softfloat-specialize"
+#include "fpu_constant.h"
+
+#define floatx80_log10_2 packFloatx80(0, 0x3ffd, 0x9a209a84fbcff798U)
+#define floatx80_ln_2 packFloatx80(0, 0x3ffe, 0xb17217f7d1cf79acU)
+#define floatx80_one packFloatx80(0, 0x3fff, 0x8000000000000000U)
+#define floatx80_default_nan packFloatx80(0, 0xffff, 0xffffffffffffffffU)
+
+#define packFloat_128(zHi, zLo) {(zHi), (zLo)}
+#define PACK_FLOAT_128(hi,lo) packFloat_128(LIT64(hi),LIT64(lo))
+
+#define EXP_BIAS 0x3FFF
+
+/*----------------------------------------------------------------------------
+| Returns the fraction bits of the extended double-precision floating-point
+| value `a'.
+*----------------------------------------------------------------------------*/
+
+static inline bits64 extractFloatx80Frac( floatx80 a )
+{
+	return a.low;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the exponent bits of the extended double-precision floating-point
+| value `a'.
+*----------------------------------------------------------------------------*/
+
+static inline int32 extractFloatx80Exp( floatx80 a )
+{
+	return a.high & 0x7FFF;
+
+}
+
+/*----------------------------------------------------------------------------
+| Returns the sign bit of the extended double-precision floating-point value
+| `a'.
+*----------------------------------------------------------------------------*/
+
+static inline flag extractFloatx80Sign( floatx80 a )
+{
+	return a.high>>15;
+
+}
+
+#if 0
+/*----------------------------------------------------------------------------
+| Takes extended double-precision floating-point  NaN  `a' and returns the
+| appropriate NaN result. If `a' is a signaling NaN, the invalid exception
+| is raised.
+*----------------------------------------------------------------------------*/
+
+static inline floatx80 propagateFloatx80NaNOneArg(floatx80 a)
+{
+	if (floatx80_is_signaling_nan(a))
+		float_raise(float_flag_invalid);
+
+	a.low |= 0xC000000000000000U;
+
+	return a;
+}
+#endif
+
+/*----------------------------------------------------------------------------
+| Normalizes the subnormal extended double-precision floating-point value
+| represented by the denormalized significand `aSig'.  The normalized exponent
+| and significand are stored at the locations pointed to by `zExpPtr' and
+| `zSigPtr', respectively.
+*----------------------------------------------------------------------------*/
+
+static inline void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr, uint64_t *zSigPtr)
+{
+	int shiftCount = countLeadingZeros64(aSig);
+	*zSigPtr = aSig<<shiftCount;
+	*zExpPtr = 1 - shiftCount;
+}
+
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the extended double-precision floating-point value `a' is a
+| NaN; otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+static inline int floatx80_is_nan(floatx80 a)
+{
+	return ((a.high & 0x7FFF) == 0x7FFF) && (int64_t) (a.low<<1);
+}
+
+/*----------------------------------------------------------------------------
+| Takes two extended double-precision floating-point values `a' and `b', one
+| of which is a NaN, and returns the appropriate NaN result.  If either `a' or
+| `b' is a signaling NaN, the invalid exception is raised.
+*----------------------------------------------------------------------------*/
+
+static floatx80 propagateFloatx80NaN(floatx80 a, floatx80 b)
+{
+	int aIsNaN = floatx80_is_nan(a);
+	int aIsSignalingNaN = floatx80_is_signaling_nan(a);
+	int bIsNaN = floatx80_is_nan(b);
+	int bIsSignalingNaN = floatx80_is_signaling_nan(b);
+	a.low |= 0xC000000000000000U;
+	b.low |= 0xC000000000000000U;
+	if (aIsSignalingNaN | bIsSignalingNaN) float_raise(float_flag_invalid);
+	if (aIsSignalingNaN) {
+		if (bIsSignalingNaN) goto returnLargerSignificand;
+		return bIsNaN ? b : a;
+	}
+	else if (aIsNaN) {
+		if (bIsSignalingNaN | ! bIsNaN) return a;
+	returnLargerSignificand:
+		if (a.low < b.low) return b;
+		if (b.low < a.low) return a;
+		return (a.high < b.high) ? a : b;
+	}
+	else {
+		return b;
+	}
+}
+
+static const float128 float128_one =
+	packFloat_128(0x3fff000000000000U, 0x0000000000000000U);
+static const float128 float128_two =
+	packFloat_128(0x4000000000000000U, 0x0000000000000000U);
+
+static const float128 float128_ln2inv2 =
+	packFloat_128(0x400071547652b82fU, 0xe1777d0ffda0d23aU);
+
+#define SQRT2_HALF_SIG  0xb504f333f9de6484U
+
+extern float128 OddPoly(float128 x, float128 *arr, unsigned n);
+
+#define L2_ARR_SIZE 9
+
+static float128 ln_arr[L2_ARR_SIZE] =
+{
+	PACK_FLOAT_128(0x3fff000000000000, 0x0000000000000000), /*  1 */
+	PACK_FLOAT_128(0x3ffd555555555555, 0x5555555555555555), /*  3 */
+	PACK_FLOAT_128(0x3ffc999999999999, 0x999999999999999a), /*  5 */
+	PACK_FLOAT_128(0x3ffc249249249249, 0x2492492492492492), /*  7 */
+	PACK_FLOAT_128(0x3ffbc71c71c71c71, 0xc71c71c71c71c71c), /*  9 */
+	PACK_FLOAT_128(0x3ffb745d1745d174, 0x5d1745d1745d1746), /* 11 */
+	PACK_FLOAT_128(0x3ffb3b13b13b13b1, 0x3b13b13b13b13b14), /* 13 */
+	PACK_FLOAT_128(0x3ffb111111111111, 0x1111111111111111), /* 15 */
+	PACK_FLOAT_128(0x3ffae1e1e1e1e1e1, 0xe1e1e1e1e1e1e1e2)  /* 17 */
+};
+
+static float128 poly_ln(float128 x1)
+{
+/*
+    //
+    //                     3     5     7     9     11     13     15
+    //        1+u         u     u     u     u     u      u      u
+    // 1/2 ln ---  ~ u + --- + --- + --- + --- + ---- + ---- + ---- =
+    //        1-u         3     5     7     9     11     13     15
+    //
+    //                     2     4     6     8     10     12     14
+    //                    u     u     u     u     u      u      u
+    //       = u * [ 1 + --- + --- + --- + --- + ---- + ---- + ---- ] =
+    //                    3     5     7     9     11     13     15
+    //
+    //           3                          3
+    //          --       4k                --        4k+2
+    //   p(u) = >  C  * u           q(u) = >  C   * u
+    //          --  2k                     --  2k+1
+    //          k=0                        k=0
+    //
+    //          1+u                 2
+    //   1/2 ln --- ~ u * [ p(u) + u * q(u) ]
+    //          1-u
+    //
+*/
+	return OddPoly(x1, ln_arr, L2_ARR_SIZE);
+}
+
+/* required sqrt(2)/2 < x < sqrt(2) */
+static float128 poly_l2(float128 x)
+{
+	/* using float128 for approximation */
+	float128 x_p1 = float128_add(x, float128_one);
+	float128 x_m1 = float128_sub(x, float128_one);
+	x = float128_div(x_m1, x_p1);
+	x = poly_ln(x);
+	x = float128_mul(x, float128_ln2inv2);
+	return x;
+}
+
+static float128 poly_l2p1(float128 x)
+{
+	/* using float128 for approximation */
+	float128 x_p2 = float128_add(x, float128_two);
+	x = float128_div(x, x_p2);
+	x = poly_ln(x);
+	x = float128_mul(x, float128_ln2inv2);
+	return x;
+}
+
+// =================================================
+// FYL2X                   Compute y * log (x)
+//                                        2
+// =================================================
+
+//
+// Uses the following identities:
+//
+// 1. ----------------------------------------------------------
+//              ln(x)
+//   log (x) = -------,  ln (x*y) = ln(x) + ln(y)
+//      2       ln(2)
+//
+// 2. ----------------------------------------------------------
+//                1+u             x-1
+//   ln (x) = ln -----, when u = -----
+//                1-u             x+1
+//
+// 3. ----------------------------------------------------------
+//                        3     5     7           2n+1
+//       1+u             u     u     u           u
+//   ln ----- = 2 [ u + --- + --- + --- + ... + ------ + ... ]
+//       1-u             3     5     7           2n+1
+//
+
+static floatx80 fyl2x(floatx80 a, floatx80 b)
+{
+	uint64_t aSig = extractFloatx80Frac(a);
+	int32_t aExp = extractFloatx80Exp(a);
+	int aSign = extractFloatx80Sign(a);
+	uint64_t bSig = extractFloatx80Frac(b);
+	int32_t bExp = extractFloatx80Exp(b);
+	int bSign = extractFloatx80Sign(b);
+
+	int zSign = bSign ^ 1;
+
+	if (aExp == 0x7FFF) {
+		if ((uint64_t) (aSig<<1)
+				|| ((bExp == 0x7FFF) && (uint64_t) (bSig<<1)))
+		{
+			return propagateFloatx80NaN(a, b);
+		}
+		if (aSign)
+		{
+invalid:
+			float_raise(float_flag_invalid);
+			return floatx80_default_nan;
+		}
+		else {
+			if (bExp == 0) {
+				if (bSig == 0) goto invalid;
+				float_raise(float_flag_denormal);
+			}
+			return packFloatx80(bSign, 0x7FFF, 0x8000000000000000U);
+		}
+	}
+	if (bExp == 0x7FFF)
+	{
+		if ((uint64_t) (bSig<<1)) return propagateFloatx80NaN(a, b);
+		if (aSign && (uint64_t)(aExp | aSig)) goto invalid;
+		if (aSig && (aExp == 0))
+			float_raise(float_flag_denormal);
+		if (aExp < 0x3FFF) {
+			return packFloatx80(zSign, 0x7FFF, 0x8000000000000000U);
+		}
+		if (aExp == 0x3FFF && ((uint64_t) (aSig<<1) == 0)) goto invalid;
+		return packFloatx80(bSign, 0x7FFF, 0x8000000000000000U);
+	}
+	if (aExp == 0) {
+		if (aSig == 0) {
+			if ((bExp | bSig) == 0) goto invalid;
+			float_raise(float_flag_divbyzero);
+			return packFloatx80(zSign, 0x7FFF, 0x8000000000000000U);
+		}
+		if (aSign) goto invalid;
+		float_raise(float_flag_denormal);
+		normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
+	}
+	if (aSign) goto invalid;
+	if (bExp == 0) {
+		if (bSig == 0) {
+			if (aExp < 0x3FFF) return packFloatx80(zSign, 0, 0);
+			return packFloatx80(bSign, 0, 0);
+		}
+		float_raise(float_flag_denormal);
+		normalizeFloatx80Subnormal(bSig, &bExp, &bSig);
+	}
+	if (aExp == 0x3FFF && ((uint64_t) (aSig<<1) == 0))
+		return packFloatx80(bSign, 0, 0);
+
+	float_raise(float_flag_inexact);
+
+	int ExpDiff = aExp - 0x3FFF;
+	aExp = 0;
+	if (aSig >= SQRT2_HALF_SIG) {
+		ExpDiff++;
+		aExp--;
+	}
+
+	/* ******************************** */
+	/* using float128 for approximation */
+	/* ******************************** */
+
+	uint64_t zSig0, zSig1;
+	shift128Right(aSig<<1, 0, 16, &zSig0, &zSig1);
+	float128 x = packFloat128(0, aExp+0x3FFF, zSig0, zSig1);
+	x = poly_l2(x);
+	x = float128_add(x, int64_to_float128((int64_t) ExpDiff));
+	return floatx80_mul(b, float128_to_floatx80(x));
+}
+
+// =================================================
+// FYL2XP1                 Compute y * log (x + 1)
+//                                        2
+// =================================================
+
+//
+// Uses the following identities:
+//
+// 1. ----------------------------------------------------------
+//              ln(x)
+//   log (x) = -------
+//      2       ln(2)
+//
+// 2. ----------------------------------------------------------
+//                  1+u              x
+//   ln (x+1) = ln -----, when u = -----
+//                  1-u             x+2
+//
+// 3. ----------------------------------------------------------
+//                        3     5     7           2n+1
+//       1+u             u     u     u           u
+//   ln ----- = 2 [ u + --- + --- + --- + ... + ------ + ... ]
+//       1-u             3     5     7           2n+1
+//
+
+floatx80 fyl2xp1(floatx80 a, floatx80 b)
+{
+	int32_t aExp, bExp;
+	uint64_t aSig, bSig, zSig0, zSig1, zSig2;
+	int aSign, bSign;
+
+	aSig = extractFloatx80Frac(a);
+	aExp = extractFloatx80Exp(a);
+	aSign = extractFloatx80Sign(a);
+	bSig = extractFloatx80Frac(b);
+	bExp = extractFloatx80Exp(b);
+	bSign = extractFloatx80Sign(b);
+	int zSign = aSign ^ bSign;
+
+	if (aExp == 0x7FFF) {
+		if ((uint64_t) (aSig<<1)
+				|| ((bExp == 0x7FFF) && (uint64_t) (bSig<<1)))
+		{
+			return propagateFloatx80NaN(a, b);
+		}
+		if (aSign)
+		{
+invalid:
+			float_raise(float_flag_invalid);
+			return floatx80_default_nan;
+		}
+			else {
+			if (bExp == 0) {
+				if (bSig == 0) goto invalid;
+				float_raise(float_flag_denormal);
+			}
+			return packFloatx80(bSign, 0x7FFF, 0x8000000000000000U);
+		}
+	}
+	if (bExp == 0x7FFF)
+	{
+		if ((uint64_t) (bSig<<1))
+			return propagateFloatx80NaN(a, b);
+
+		if (aExp == 0) {
+			if (aSig == 0) goto invalid;
+			float_raise(float_flag_denormal);
+		}
+
+		return packFloatx80(zSign, 0x7FFF, 0x8000000000000000U);
+	}
+	if (aExp == 0) {
+		if (aSig == 0) {
+			if (bSig && (bExp == 0)) float_raise(float_flag_denormal);
+			return packFloatx80(zSign, 0, 0);
+		}
+		float_raise(float_flag_denormal);
+		normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
+	}
+	if (bExp == 0) {
+		if (bSig == 0) return packFloatx80(zSign, 0, 0);
+		float_raise(float_flag_denormal);
+		normalizeFloatx80Subnormal(bSig, &bExp, &bSig);
+	}
+
+	float_raise(float_flag_inexact);
+
+	if (aSign && aExp >= 0x3FFF)
+		return a;
+
+	if (aExp >= 0x3FFC) // big argument
+	{
+		return fyl2x(floatx80_add(a, floatx80_one), b);
+	}
+
+	// handle tiny argument
+	if (aExp < EXP_BIAS-70)
+	{
+		// first order approximation, return (a*b)/ln(2)
+		int32_t zExp = aExp + FLOAT_LN2INV_EXP - 0x3FFE;
+
+	mul128By64To192(FLOAT_LN2INV_HI, FLOAT_LN2INV_LO, aSig, &zSig0, &zSig1, &zSig2);
+		if (0 < (int64_t) zSig0) {
+			shortShift128Left(zSig0, zSig1, 1, &zSig0, &zSig1);
+			--zExp;
+		}
+
+		zExp = zExp + bExp - 0x3FFE;
+	mul128By64To192(zSig0, zSig1, bSig, &zSig0, &zSig1, &zSig2);
+		if (0 < (int64_t) zSig0) {
+			shortShift128Left(zSig0, zSig1, 1, &zSig0, &zSig1);
+			--zExp;
+		}
+
+		return
+			roundAndPackFloatx80(80, aSign ^ bSign, zExp, zSig0, zSig1);
+	}
+
+	/* ******************************** */
+	/* using float128 for approximation */
+	/* ******************************** */
+
+	shift128Right(aSig<<1, 0, 16, &zSig0, &zSig1);
+	float128 x = packFloat128(aSign, aExp, zSig0, zSig1);
+	x = poly_l2p1(x);
+	return floatx80_mul(b, float128_to_floatx80(x));
+}
+
+floatx80 floatx80_flognp1(floatx80 a)
+{
+	return fyl2xp1(a, floatx80_ln_2);
+}
+
+floatx80 floatx80_flogn(floatx80 a)
+{
+	return fyl2x(a, floatx80_ln_2);
+}
+
+floatx80 floatx80_flog2(floatx80 a)
+{
+	return fyl2x(a, floatx80_one);
+}
+
+floatx80 floatx80_flog10(floatx80 a)
+{
+	return fyl2x(a, floatx80_log10_2);
+}
diff --git a/softfloat/softfloat.h b/softfloat/softfloat.h
index 92135e6..8f47f0a 100644
--- a/softfloat/softfloat.h
+++ b/softfloat/softfloat.h
@@ -240,9 +240,9 @@ flag floatx80_le_quiet( floatx80, floatx80 );
 flag floatx80_lt_quiet( floatx80, floatx80 );
 flag floatx80_is_signaling_nan( floatx80 );
 
-/* int floatx80_fsin(floatx80 &a);
-int floatx80_fcos(floatx80 &a);
-int floatx80_ftan(floatx80 &a); */
+int floatx80_fsin(floatx80 *a);
+int floatx80_fcos(floatx80 *a);
+int floatx80_ftan(floatx80 *a);
 
 floatx80 floatx80_flognp1(floatx80 a);
 floatx80 floatx80_flogn(floatx80 a);