changeset 1216:0649cd8ca097

Cycle accurate MULU/MULS emulation
author Michael Pavone <pavone@retrodev.com>
date Sat, 04 Feb 2017 00:41:15 -0800
parents cf69a179aeaf
children f2f17267b0e1
files gen_x86.c gen_x86.h m68k_core_x86.c
diffstat 3 files changed, 58 insertions(+), 2 deletions(-) [+]
line wrap: on
line diff
--- a/gen_x86.c	Wed Feb 01 21:19:35 2017 -0800
+++ b/gen_x86.c	Sat Feb 04 00:41:15 2017 -0800
@@ -31,6 +31,7 @@
 #define OP_POP 0x58
 #define OP_MOVSXD 0x63
 #define PRE_SIZE 0x66
+#define OP_IMUL 0x69
 #define OP_JCC 0x70
 #define OP_IMMED_ARITH 0x80
 #define OP_TEST 0x84
@@ -530,7 +531,7 @@
 	if (size == SZ_W) {
 		*(out++) = PRE_SIZE;
 	}
-	if (dst == RAX && !sign_extend) {
+	if (dst == RAX && !sign_extend && al_opcode) {
 		if (size != SZ_B) {
 			al_opcode |= BIT_SIZE;
 			if (size == SZ_Q) {
@@ -1146,6 +1147,15 @@
 	x86_rrdisp_sizedir(code, OP2_IMUL | (PRE_2BYTE << 8), dst, src_base, disp, size, 0);
 }
 
+void imul_irr(code_info *code, int32_t val, uint8_t src, uint8_t dst, uint8_t size)
+{
+	if (size == SZ_B) {
+		fatal_error("imul immediate only supports 16-bit sizes and up");
+	}
+	
+	x86_ir(code, OP_IMUL, dst, 0, val, src, size);
+}
+
 void not_r(code_info *code, uint8_t dst, uint8_t size)
 {
 	x86_r_size(code, OP_NOT_NEG, OP_EX_NOT, dst, size);
--- a/gen_x86.h	Wed Feb 01 21:19:35 2017 -0800
+++ b/gen_x86.h	Sat Feb 04 00:41:15 2017 -0800
@@ -148,6 +148,7 @@
 void sbb_rdispr(code_info *code, uint8_t src_base, int32_t disp, uint8_t dst, uint8_t size);
 void cmp_rrdisp(code_info *code, uint8_t src, uint8_t dst_base, int32_t disp, uint8_t size);
 void cmp_rdispr(code_info *code, uint8_t src_base, int32_t disp, uint8_t dst, uint8_t size);
+void imul_irr(code_info *code, int32_t val, uint8_t src, uint8_t dst, uint8_t size);
 void imul_rr(code_info *code, uint8_t src, uint8_t dst, uint8_t size);
 void imul_rrdisp(code_info *code, uint8_t src, uint8_t dst_base, int32_t disp, uint8_t size);
 void imul_rdispr(code_info *code, uint8_t src_base, int32_t disp, uint8_t dst, uint8_t size);
--- a/m68k_core_x86.c	Wed Feb 01 21:19:35 2017 -0800
+++ b/m68k_core_x86.c	Sat Feb 04 00:41:15 2017 -0800
@@ -1807,11 +1807,40 @@
 	}
 }
 
+
+
+static uint32_t mulu_cycles(uint16_t value)
+{
+	//4 for prefetch, 2-cycles per bit x 16, 2 for cleanup
+	uint32_t cycles = 38;
+	uint16_t a = (value & 0b1010101010101010) >> 1;
+	uint16_t b = value & 0b0101010101010101;
+	value = a + b;
+	a = (value & 0b1100110011001100) >> 2;
+	b = value & 0b0011001100110011;
+	value = a + b;
+	a = (value & 0b1111000011110000) >> 4;
+	b = value & 0b0000111100001111;
+	value = a + b;
+	a = (value & 0b1111111100000000) >> 8;
+	b = value & 0b0000000011111111;
+	value = a + b;
+	return cycles + 2*value;
+}
+
+static uint32_t muls_cycles(uint16_t value)
+{
+	//muls timing is essentially the same as muls, but it's based on the number of 0/1
+	//transitions rather than the number of 1 bits. xoring the value with itself shifted
+	//by one effectively sets one bit for every transition
+	return mulu_cycles((value << 1) ^ value);
+}
+
 void translate_m68k_mul(m68k_options *opts, m68kinst *inst, host_ea *src_op, host_ea *dst_op)
 {
 	code_info *code = &opts->gen.code;
-	cycles(&opts->gen, 70); //TODO: Calculate the actual value based on the value of the <ea> parameter
 	if (src_op->mode == MODE_IMMED) {
+		cycles(&opts->gen, inst->op == M68K_MULU ? mulu_cycles(src_op->disp) : muls_cycles(src_op->disp));
 		mov_ir(code, inst->op == M68K_MULU ? (src_op->disp & 0xFFFF) : ((src_op->disp & 0x8000) ? src_op->disp | 0xFFFF0000 : src_op->disp), opts->gen.scratch1, SZ_D);
 	} else if (src_op->mode == MODE_REG_DIRECT) {
 		if (inst->op == M68K_MULS) {
@@ -1826,6 +1855,22 @@
 			movzx_rdispr(code, src_op->base, src_op->disp, opts->gen.scratch1, SZ_W, SZ_D);
 		}
 	}
+	if (src_op->mode != MODE_IMMED) {
+		//TODO: Inline cycle calculation so we don't need to save/restore a bunch of registers
+		//save context to memory and call the relevant C function for calculating the cycle count
+		call(code, opts->gen.save_context);
+		push_r(code, opts->gen.scratch1);
+		push_r(code, opts->gen.context_reg);
+		call_args(code, (code_ptr)(inst->op == M68K_MULS ? muls_cycles : mulu_cycles), 1, opts->gen.scratch1);
+		pop_r(code, opts->gen.context_reg);
+		//turn 68K cycles into master clock cycles and add to the current cycle count
+		imul_irr(code, opts->gen.clock_divider, RAX, RAX, SZ_D);
+		add_rrdisp(code, RAX, opts->gen.context_reg, offsetof(m68k_context, current_cycle), SZ_D);
+		//restore context and scratch1
+		call(code, opts->gen.load_context);
+		pop_r(code, opts->gen.scratch1);
+	}
+	
 	uint8_t dst_reg;
 	if (dst_op->mode == MODE_REG_DIRECT) {
 		dst_reg = dst_op->base;