changeset 151:6b593ea0ed90

Implement MULU/MULS and DIVU/DIVS
author Mike Pavone <pavone@retrodev.com>
date Thu, 03 Jan 2013 22:07:40 -0800
parents 3e68e517cc01
children 79958b95526f
files dis.c gen_x86.c gen_x86.h m68k_to_x86.c vdp.c
diffstat 5 files changed, 294 insertions(+), 27 deletions(-) [+]
line wrap: on
line diff
--- a/dis.c	Tue Jan 01 09:40:17 2013 -0800
+++ b/dis.c	Thu Jan 03 22:07:40 2013 -0800
@@ -63,6 +63,7 @@
 
 uint8_t labels = 0;
 uint8_t addr = 0;
+uint8_t only = 0;
 
 int main(int argc, char ** argv)
 {
@@ -78,6 +79,7 @@
 	filebuf = malloc(filesize);
 	fread(filebuf, 2, filesize/2, f);
 	fclose(f);
+	deferred *def = NULL, *tmpd;
 	for(uint8_t opt = 2; opt < argc; ++opt) {
 		if (argv[opt][0] == '-') {
 			switch (argv[opt][1])
@@ -88,7 +90,14 @@
 			case 'a':
 				addr = 1;
 				break;
+			case 'o':
+				only = 1;
+				break;
 			}
+		} else {
+			uint32_t address = strtol(argv[opt], NULL, 16);
+			def = defer(address, def);
+			reference(address);
 		}
 	}
 	for(cur = filebuf; cur - filebuf < (filesize/2); ++cur)
@@ -101,11 +110,12 @@
 	uint32_t int_6 = filebuf[0x78/2] << 16 | filebuf[0x7A/2];
 	uint16_t *encoded, *next;
 	uint32_t size;
-	deferred *def = NULL, *tmpd;
-	def = defer(start, def);
-	def = defer(int_2, def);
-	def = defer(int_4, def);
-	def = defer(int_6, def);
+	if (!def || !only) {
+		def = defer(start, def);
+		def = defer(int_2, def);
+		def = defer(int_4, def);
+		def = defer(int_6, def);
+	}
 	uint32_t address;
 	while(def) {
 		do {
--- a/gen_x86.c	Tue Jan 01 09:40:17 2013 -0800
+++ b/gen_x86.c	Thu Jan 03 22:07:40 2013 -0800
@@ -25,6 +25,7 @@
 #define OP_JCC 0x70
 #define OP_IMMED_ARITH 0x80
 #define OP_MOV 0x88
+#define OP_CDQ 0x99
 #define OP_PUSHF 0x9C
 #define OP_POPF 0x9D
 #define OP_MOV_I8R 0xB0
@@ -44,10 +45,12 @@
 #define OP2_SETCC 0x90
 #define OP2_BT 0xA3
 #define OP2_BTS 0xAB
+#define OP2_IMUL 0xAF
 #define OP2_BTR 0xB3
 #define OP2_BTX_I 0xBA
 #define OP2_BTC 0xBB
 #define OP2_MOVSX 0xBE
+#define OP2_MOVZX 0xB6
 
 #define OP_EX_ADDI 0x0
 #define OP_EX_ORI  0x1
@@ -75,6 +78,10 @@
 #define OP_EX_TEST_I 0x0
 #define OP_EX_NOT    0x2
 #define OP_EX_NEG    0x3
+#define OP_EX_MUL    0x4
+#define OP_EX_IMUL   0x5
+#define OP_EX_DIV    0x6
+#define OP_EX_IDIV   0x7
 
 #define OP_EX_INC     0x0
 #define OP_EX_DEC     0x1
@@ -110,7 +117,7 @@
 	X86_R15
 } x86_regs_enc;
 
-uint8_t * x86_rr_sizedir(uint8_t * out, uint8_t opcode, uint8_t src, uint8_t dst, uint8_t size)
+uint8_t * x86_rr_sizedir(uint8_t * out, uint16_t opcode, uint8_t src, uint8_t dst, uint8_t size)
 {
 	//TODO: Deal with the fact that AH, BH, CH and DH can only be in the R/M param when there's a REX prefix
 	uint8_t tmp;
@@ -148,12 +155,17 @@
 	} else {
 		opcode |= BIT_SIZE;
 	}
-	*(out++) = opcode;
+	if (opcode >= 0x100) {
+		*(out++) = opcode >> 8;
+		*(out++) = opcode;
+	} else {
+		*(out++) = opcode;
+	}
 	*(out++) = MODE_REG_DIRECT | dst | (src << 3);
 	return out;
 }
 
-uint8_t * x86_rrdisp8_sizedir(uint8_t * out, uint8_t opcode, uint8_t reg, uint8_t base, int8_t disp, uint8_t size, uint8_t dir)
+uint8_t * x86_rrdisp8_sizedir(uint8_t * out, uint16_t opcode, uint8_t reg, uint8_t base, int8_t disp, uint8_t size, uint8_t dir)
 {
 	//TODO: Deal with the fact that AH, BH, CH and DH can only be in the R/M param when there's a REX prefix
 	uint8_t tmp;
@@ -182,7 +194,13 @@
 	} else {
 		opcode |= BIT_SIZE;
 	}
-	*(out++) = opcode | dir;
+	opcode |= dir;
+	if (opcode >= 0x100) {
+		*(out++) = opcode >> 8;
+		*(out++) = opcode;
+	} else {
+		*(out++) = opcode;
+	}
 	*(out++) = MODE_REG_DISPLACE8 | base | (reg << 3);
 	if (base == RSP) {
 		//add SIB byte, with no index and RSP as base
@@ -828,6 +846,16 @@
 	return x86_rrdisp8_sizedir(out, OP_CMP, dst, src_base, disp, size, BIT_DIR);
 }
 
+uint8_t * imul_rr(uint8_t * out, uint8_t src, uint8_t dst, uint8_t size)
+{
+	return x86_rr_sizedir(out, OP2_IMUL | (PRE_2BYTE << 8), dst, src, size);
+}
+
+uint8_t * imul_rdisp8r(uint8_t * out, uint8_t src_base, int8_t disp, uint8_t dst, uint8_t size)
+{
+	return x86_rrdisp8_sizedir(out, OP2_IMUL | (PRE_2BYTE << 8), dst, src_base, disp, size, 0);
+}
+
 uint8_t * not_r(uint8_t * out, uint8_t dst, uint8_t size)
 {
 	return x86_r_size(out, OP_NOT_NEG, OP_EX_NOT, dst, size);
@@ -848,6 +876,46 @@
 	return x86_rdisp8_size(out, OP_NOT_NEG, OP_EX_NEG, dst_base, disp, size);
 }
 
+uint8_t * mul_r(uint8_t * out, uint8_t dst, uint8_t size)
+{
+	return x86_r_size(out, OP_NOT_NEG, OP_EX_MUL, dst, size);
+}
+
+uint8_t * imul_r(uint8_t * out, uint8_t dst, uint8_t size)
+{
+	return x86_r_size(out, OP_NOT_NEG, OP_EX_IMUL, dst, size);
+}
+
+uint8_t * div_r(uint8_t * out, uint8_t dst, uint8_t size)
+{
+	return x86_r_size(out, OP_NOT_NEG, OP_EX_DIV, dst, size);
+}
+
+uint8_t * idiv_r(uint8_t * out, uint8_t dst, uint8_t size)
+{
+	return x86_r_size(out, OP_NOT_NEG, OP_EX_IDIV, dst, size);
+}
+
+uint8_t * mul_rdisp8(uint8_t * out, uint8_t dst_base, int8_t disp, uint8_t size)
+{
+	return x86_rdisp8_size(out, OP_NOT_NEG, OP_EX_MUL, dst_base, disp, size);
+}
+
+uint8_t * imul_rdisp8(uint8_t * out, uint8_t dst_base, int8_t disp, uint8_t size)
+{
+	return x86_rdisp8_size(out, OP_NOT_NEG, OP_EX_IMUL, dst_base, disp, size);
+}
+
+uint8_t * div_rdisp8(uint8_t * out, uint8_t dst_base, int8_t disp, uint8_t size)
+{
+	return x86_rdisp8_size(out, OP_NOT_NEG, OP_EX_DIV, dst_base, disp, size);
+}
+
+uint8_t * idiv_rdisp8(uint8_t * out, uint8_t dst_base, int8_t disp, uint8_t size)
+{
+	return x86_rdisp8_size(out, OP_NOT_NEG, OP_EX_IDIV, dst_base, disp, size);
+}
+
 uint8_t * mov_rr(uint8_t * out, uint8_t src, uint8_t dst, uint8_t size)
 {
 	return x86_rr_sizedir(out, OP_MOV, src, dst, size);
@@ -1062,6 +1130,59 @@
 	return out;
 }
 
+uint8_t * movzx_rr(uint8_t * out, uint8_t src, uint8_t dst, uint8_t src_size, uint8_t size)
+{
+	if (size == SZ_W) {
+		*(out++) = PRE_SIZE;
+	}
+	if (size == SZ_Q || dst >= R8 || src >= R8) {
+		*out = PRE_REX;
+		if (size == SZ_Q) {
+			*out |= REX_QUAD;
+		}
+		if (src >= R8) {
+			*out |= REX_RM_FIELD;
+			src -= (R8 - X86_R8);
+		}
+		if (dst >= R8) {
+			*out |= REX_REG_FIELD;
+			dst -= (R8 - X86_R8);
+		}
+		out++;
+	}
+	*(out++) = PRE_2BYTE;
+	*(out++) = OP2_MOVZX | (src_size == SZ_B ? 0 : BIT_SIZE);
+	*(out++) = MODE_REG_DIRECT | src | (dst << 3);
+	return out;
+}
+
+uint8_t * movzx_rdisp8r(uint8_t * out, uint8_t src, int8_t disp, uint8_t dst, uint8_t src_size, uint8_t size)
+{
+	if (size == SZ_W) {
+		*(out++) = PRE_SIZE;
+	}
+	if (size == SZ_Q || dst >= R8 || src >= R8) {
+		*out = PRE_REX;
+		if (size == SZ_Q) {
+			*out |= REX_QUAD;
+		}
+		if (src >= R8) {
+			*out |= REX_RM_FIELD;
+			src -= (R8 - X86_R8);
+		}
+		if (dst >= R8) {
+			*out |= REX_REG_FIELD;
+			dst -= (R8 - X86_R8);
+		}
+		out++;
+	}
+	*(out++) = PRE_2BYTE;
+	*(out++) = OP2_MOVZX | (src_size == SZ_B ? 0 : BIT_SIZE);
+	*(out++) = MODE_REG_DISPLACE8 | src | (dst << 3);
+	*(out++) = disp;
+	return out;
+}
+
 uint8_t * pushf(uint8_t * out)
 {
 	*(out++) = OP_PUSHF;
@@ -1406,4 +1527,10 @@
 	return out;
 }
 
+uint8_t * cdq(uint8_t * out)
+{
+	*(out++) = OP_CDQ;
+	return out;
+}
 
+
--- a/gen_x86.h	Tue Jan 01 09:40:17 2013 -0800
+++ b/gen_x86.h	Thu Jan 03 22:07:40 2013 -0800
@@ -133,10 +133,21 @@
 uint8_t * sbb_rdisp8r(uint8_t * out, uint8_t src_base, int8_t disp, uint8_t dst, uint8_t size);
 uint8_t * cmp_rrdisp8(uint8_t * out, uint8_t src, uint8_t dst_base, int8_t disp, uint8_t size);
 uint8_t * cmp_rdisp8r(uint8_t * out, uint8_t src_base, int8_t disp, uint8_t dst, uint8_t size);
+uint8_t * imul_rr(uint8_t * out, uint8_t src, uint8_t dst, uint8_t size);
+uint8_t * imul_rrdisp8(uint8_t * out, uint8_t src, uint8_t dst_base, int8_t disp, uint8_t size);
+uint8_t * imul_rdisp8r(uint8_t * out, uint8_t src_base, int8_t disp, uint8_t dst, uint8_t size);
 uint8_t * not_r(uint8_t * out, uint8_t dst, uint8_t size);
 uint8_t * neg_r(uint8_t * out, uint8_t dst, uint8_t size);
 uint8_t * not_rdisp8(uint8_t * out, uint8_t dst_base, int8_t disp, uint8_t size);
 uint8_t * neg_rdisp8(uint8_t * out, uint8_t dst_base, int8_t disp, uint8_t size);
+uint8_t * mul_r(uint8_t * out, uint8_t dst, uint8_t size);
+uint8_t * imul_r(uint8_t * out, uint8_t dst, uint8_t size);
+uint8_t * div_r(uint8_t * out, uint8_t dst, uint8_t size);
+uint8_t * idiv_r(uint8_t * out, uint8_t dst, uint8_t size);
+uint8_t * mul_rdisp8(uint8_t * out, uint8_t dst_base, int8_t disp, uint8_t size);
+uint8_t * imul_rdisp8(uint8_t * out, uint8_t dst_base, int8_t disp, uint8_t size);
+uint8_t * div_rdisp8(uint8_t * out, uint8_t dst_base, int8_t disp, uint8_t size);
+uint8_t * idiv_rdisp8(uint8_t * out, uint8_t dst_base, int8_t disp, uint8_t size);
 uint8_t * mov_rr(uint8_t * out, uint8_t src, uint8_t dst, uint8_t size);
 uint8_t * mov_rrdisp8(uint8_t * out, uint8_t src, uint8_t dst_base, int8_t disp, uint8_t size);
 uint8_t * mov_rdisp8r(uint8_t * out, uint8_t src_base, int8_t disp, uint8_t dst, uint8_t size);
@@ -147,6 +158,8 @@
 uint8_t * mov_irind(uint8_t * out, int32_t val, uint8_t dst, uint8_t size);
 uint8_t * movsx_rr(uint8_t * out, uint8_t src, uint8_t dst, uint8_t src_size, uint8_t size);
 uint8_t * movsx_rdisp8r(uint8_t * out, uint8_t src, int8_t disp, uint8_t dst, uint8_t src_size, uint8_t size);
+uint8_t * movzx_rr(uint8_t * out, uint8_t src, uint8_t dst, uint8_t src_size, uint8_t size);
+uint8_t * movzx_rdisp8r(uint8_t * out, uint8_t src, int8_t disp, uint8_t dst, uint8_t src_size, uint8_t size);
 uint8_t * pushf(uint8_t * out);
 uint8_t * popf(uint8_t * out);
 uint8_t * push_r(uint8_t * out, uint8_t reg);
@@ -176,6 +189,7 @@
 uint8_t * call(uint8_t * out, uint8_t * fun);
 uint8_t * call_r(uint8_t * out, uint8_t dst);
 uint8_t * retn(uint8_t * out);
+uint8_t * cdq(uint8_t * out);
 
 #endif //GEN_X86_H_
 
--- a/m68k_to_x86.c	Tue Jan 01 09:40:17 2013 -0800
+++ b/m68k_to_x86.c	Thu Jan 03 22:07:40 2013 -0800
@@ -27,6 +27,8 @@
 	uint8_t cycles;
 } x86_ea;
 
+char disasm_buf[1024];
+
 void handle_cycle_limit_int();
 void m68k_read_word_scratch1();
 void m68k_read_long_scratch1();
@@ -343,7 +345,8 @@
 		ea->disp = inst->src.params.immed;
 		break;
 	default:
-		printf("address mode %d not implemented (src)\n", inst->src.addr_mode);
+		m68k_disasm(inst, disasm_buf);
+		printf("%s\naddress mode %d not implemented (src)\n", disasm_buf, inst->src.addr_mode);
 		exit(1);
 	}
 	return out;
@@ -608,7 +611,8 @@
 		ea->base = SCRATCH1;
 		break;
 	default:
-		printf("address mode %d not implemented (dst)\n", inst->dst.addr_mode);
+		m68k_disasm(inst, disasm_buf);
+		printf("%s\naddress mode %d not implemented (dst)\n", disasm_buf, inst->dst.addr_mode);
 		exit(1);
 	}
 	return out;
@@ -955,7 +959,8 @@
 		}
 		break;
 	default:
-		printf("address mode %d not implemented (move dst)\n", inst->dst.addr_mode);
+		m68k_disasm(inst, disasm_buf);
+		printf("%s\naddress mode %d not implemented (move dst)\n", disasm_buf, inst->dst.addr_mode);
 		exit(1);
 	}
 
@@ -995,7 +1000,8 @@
 			dst = mov_ir(dst, inst->dst.params.immed, SCRATCH2, SZ_D);
 			break;
 		default:
-			printf("address mode %d not implemented (movem dst)\n", inst->dst.addr_mode);
+			m68k_disasm(inst, disasm_buf);
+			printf("%s\naddress mode %d not implemented (movem dst)\n", disasm_buf, inst->dst.addr_mode);
 			exit(1);
 		}
 		dst = cycles(dst, early_cycles);
@@ -1056,7 +1062,8 @@
 			dst = mov_ir(dst, inst->src.params.immed, SCRATCH1, SZ_D);
 			break;
 		default:
-			printf("address mode %d not implemented (movem src)\n", inst->src.addr_mode);
+			m68k_disasm(inst, disasm_buf);
+			printf("%s\naddress mode %d not implemented (movem src)\n", disasm_buf, inst->src.addr_mode);
 			exit(1);
 		}
 		dst = cycles(dst, early_cycles);
@@ -1292,7 +1299,8 @@
 		}
 		break;
 	default:
-		printf("address mode %d not implemented (lea src)\n", inst->src.addr_mode);
+		m68k_disasm(inst, disasm_buf);
+		printf("%s\naddress mode %d not implemented (lea src)\n", disasm_buf, inst->src.addr_mode);
 		exit(1);
 	}
 	return dst;
@@ -1372,7 +1380,8 @@
 		dst = mov_ir(dst, inst->src.params.immed, SCRATCH1, SZ_D);
 		break;
 	default:
-		printf("address mode %d not implemented (lea src)\n", inst->src.addr_mode);
+		m68k_disasm(inst, disasm_buf);
+		printf("%s\naddress mode %d not implemented (lea src)\n", disasm_buf, inst->src.addr_mode);
 		exit(1);
 	}
 	dst = sub_ir(dst, 4, opts->aregs[7], SZ_D);
@@ -1698,7 +1707,8 @@
 		}
 		break;
 	default:
-		printf("address mode %d not yet supported (jmp)\n", inst->src.addr_mode);
+		m68k_disasm(inst, disasm_buf);
+		printf("%s\naddress mode %d not yet supported (jmp)\n", disasm_buf, inst->src.addr_mode);
 		exit(1);
 	}
 	return dst;
@@ -1878,7 +1888,8 @@
 		dst = pop_r(dst, SCRATCH1);
 		break;
 	default:
-		printf("address mode %d not yet supported (jsr)\n", inst->src.addr_mode);
+		m68k_disasm(inst, disasm_buf);
+		printf("%s\naddress mode %d not yet supported (jsr)\n", disasm_buf, inst->src.addr_mode);
 		exit(1);
 	}
 	return dst;
@@ -2107,6 +2118,7 @@
 uint8_t * translate_m68k(uint8_t * dst, m68kinst * inst, x86_68k_options * opts)
 {
 	uint8_t * end_off, *zero_off, *norm_off;
+	uint8_t dst_reg;
 	map_native_address(opts->native_code_map, inst->address, dst);
 	dst = check_cycles_int(dst, inst->address);
 	if (inst->op == M68K_MOVE) {
@@ -2368,9 +2380,69 @@
 		dst = setcc_r(dst, CC_S, FLAG_N);
 		dst = setcc_r(dst, CC_O, FLAG_V);
 		break;
-	/*case M68K_DIVS:
+	case M68K_DIVS:
 	case M68K_DIVU:
-		break;*/
+		//TODO: Trap on division by zero
+		dst = cycles(dst, inst->op == M68K_DIVS ? 158 : 140);
+		dst = push_r(dst, RDX);
+		dst = push_r(dst, RAX);
+		if (dst_op.mode == MODE_REG_DIRECT) {
+			dst = mov_rr(dst, dst_op.base, RAX, SZ_D);
+		} else {
+			dst = mov_rdisp8r(dst, dst_op.base, dst_op.disp, RAX, SZ_D);
+		}
+		if (src_op.mode == MODE_IMMED) {
+			dst = mov_ir(dst, src_op.disp, SCRATCH2, SZ_D);
+		} else if (src_op.mode == MODE_REG_DIRECT) {
+			if (inst->op == M68K_DIVS) {
+				dst = movsx_rr(dst, src_op.base, SCRATCH2, SZ_W, SZ_D);
+			} else {
+				dst = movzx_rr(dst, src_op.base, SCRATCH2, SZ_W, SZ_D);
+			}
+		} else if (src_op.mode == MODE_REG_DISPLACE8) {
+			if (inst->op == M68K_DIVS) {
+				dst = movsx_rdisp8r(dst, src_op.base, src_op.disp, SCRATCH2, SZ_W, SZ_D);
+			} else {
+				dst = movzx_rdisp8r(dst, src_op.base, src_op.disp, SCRATCH2, SZ_W, SZ_D);
+			}
+		}
+		if (inst->op == M68K_DIVS) {
+			dst = cdq(dst);
+		} else {
+			dst = xor_rr(dst, RDX, RDX, SZ_D);
+		}
+		if (inst->op == M68K_DIVS) {
+			dst = idiv_r(dst, SCRATCH2, SZ_D);
+		} else {
+			dst = div_r(dst, SCRATCH2, SZ_D);
+		}
+		dst = cmp_ir(dst, 0x10000, RAX, SZ_D);
+		norm_off = dst+1;
+		dst = jcc(dst, CC_NC, dst+2);
+		if (dst_op.mode == MODE_REG_DIRECT) {
+			dst = mov_rr(dst, RDX, dst_op.base, SZ_W);
+			dst = shl_ir(dst, 16, dst_op.base, SZ_D);
+			dst = mov_rr(dst, RAX, dst_op.base, SZ_W);
+		} else {
+			dst = mov_rrdisp8(dst, RDX, dst_op.base, dst_op.disp, SZ_W);
+			dst = shl_irdisp8(dst, 16, dst_op.base, dst_op.disp, SZ_D);
+			dst = mov_rrdisp8(dst, RAX, dst_op.base, dst_op.disp, SZ_W);
+		}
+		dst = pop_r(dst, RAX);
+		dst = pop_r(dst, RDX);
+		dst = mov_ir(dst, 0, FLAG_V, SZ_B);
+		dst = cmp_ir(dst, 0, RAX, SZ_W);
+		dst = setcc_r(dst, CC_Z, FLAG_Z);
+		dst = setcc_r(dst, CC_S, FLAG_N);
+		end_off = dst+1;
+		dst = jmp(dst, dst+2);
+		*norm_off = dst - (norm_off + 1);
+		dst = pop_r(dst, RAX);
+		dst = pop_r(dst, RDX);
+		dst = mov_ir(dst, 1, FLAG_V, SZ_B);
+		*end_off = dst - (end_off + 1);
+		dst = mov_ir(dst, 0, FLAG_C, SZ_B);
+		break;
 	case M68K_EOR:
 		dst = cycles(dst, BUS);
 		if (src_op.mode == MODE_REG_DIRECT) {
@@ -2487,10 +2559,52 @@
 			}
 		}
 		break;
-	/*case M68K_MOVEP:
+	//case M68K_MOVEP:
 	case M68K_MULS:
 	case M68K_MULU:
-	case M68K_NBCD:*/
+		dst = cycles(dst, 70); //TODO: Calculate the actual value based on the value of the <ea> parameter
+		if (src_op.mode == MODE_IMMED) {
+			//immediate value should already be sign extended to 32-bits
+			dst = mov_ir(dst, inst->op == M68K_MULU ? (src_op.disp & 0xFFFF) : src_op.disp, SCRATCH1, SZ_D);
+		} else if (src_op.mode == MODE_REG_DIRECT) {
+			if (inst->op == M68K_MULS) {
+				dst = movsx_rr(dst, src_op.base, SCRATCH1, SZ_W, SZ_D);
+			} else {
+				dst = movzx_rr(dst, src_op.base, SCRATCH1, SZ_W, SZ_D);
+			}
+		} else {
+			if (inst->op == M68K_MULS) {
+				dst = movsx_rdisp8r(dst, src_op.base, src_op.disp, SCRATCH1, SZ_W, SZ_D);
+			} else {
+				dst = movzx_rdisp8r(dst, src_op.base, src_op.disp, SCRATCH1, SZ_W, SZ_D);
+			}
+		}
+		if (dst_op.mode == MODE_REG_DIRECT) {
+			dst_reg = dst_op.base;
+			if (inst->op == M68K_MULS) {
+				dst = movsx_rr(dst, dst_reg, dst_reg, SZ_W, SZ_D);
+			} else {
+				dst = movzx_rr(dst, dst_reg, dst_reg, SZ_W, SZ_D);
+			}
+		} else {
+			dst_reg = SCRATCH2;
+			if (inst->op == M68K_MULS) {
+				dst = movsx_rdisp8r(dst, dst_op.base, dst_op.disp, SCRATCH2, SZ_W, SZ_D);
+			} else {
+				dst = movzx_rdisp8r(dst, dst_op.base, dst_op.disp, SCRATCH2, SZ_W, SZ_D);
+			}
+		}
+		dst = imul_rr(dst, SCRATCH1, dst_reg, SZ_D);
+		if (dst_op.mode == MODE_REG_DISPLACE8) {
+			dst = mov_rrdisp8(dst, dst_reg, dst_op.base, dst_op.disp, SZ_D);
+		}
+		dst = mov_ir(dst, 0, FLAG_V, SZ_B);
+		dst = mov_ir(dst, 0, FLAG_C, SZ_B);
+		dst = cmp_ir(dst, 0, dst_reg, SZ_D);
+		dst = setcc_r(dst, CC_Z, FLAG_Z);
+		dst = setcc_r(dst, CC_S, FLAG_N);
+		break;
+	//case M68K_NBCD:
 	case M68K_NEG:
 		if (dst_op.mode == MODE_REG_DIRECT) {
 			dst = neg_r(dst, dst_op.base, inst->extra.size);
@@ -2887,7 +3001,8 @@
 	/*case M68K_INVALID:
 		break;*/
 	default:
-		printf("instruction %d not yet implemented\n", inst->op);
+		m68k_disasm(inst, disasm_buf);
+		printf("%X: %s\ninstruction %d not yet implemented\n", inst->address, disasm_buf, inst->op);
 		exit(1);
 	}
 	return dst;
--- a/vdp.c	Tue Jan 01 09:40:17 2013 -0800
+++ b/vdp.c	Thu Jan 03 22:07:40 2013 -0800
@@ -205,6 +205,7 @@
 				break;
 			case CRAM_WRITE:
 				context->cram[(context->address/2) & (CRAM_SIZE-1)] = read_dma_value((context->regs[REG_DMASRC_H] << 16) | (context->regs[REG_DMASRC_M] << 8) | context->regs[REG_DMASRC_L]);
+				//printf("CRAM DMA | %X set to %X from %X at %d\n", (context->address/2) & (CRAM_SIZE-1), context->cram[(context->address/2) & (CRAM_SIZE-1)], (context->regs[REG_DMASRC_H] << 17) | (context->regs[REG_DMASRC_M] << 9) | (context->regs[REG_DMASRC_L] << 1), context->cycles);
 				break;
 			case VSRAM_WRITE:
 				if (((context->address/2) & 63) < VSRAM_SIZE) {
@@ -310,7 +311,7 @@
 					}
 					break;
 				case CRAM_WRITE:
-					//printf("CRAM Write: %X to %X\n", start->value, context->address);
+					//printf("CRAM Write | %X to %X\n", start->value, (start->address/2) & (CRAM_SIZE-1));
 					context->cram[(start->address/2) & (CRAM_SIZE-1)] = start->value;
 					break;
 				case VSRAM_WRITE:
@@ -1047,9 +1048,9 @@
 			if (reg < VDP_REGS) {
 				//printf("register %d set to %X\n", reg, value & 0xFF);
 				context->regs[reg] = value;
-				/*if (reg == REG_MODE_2) {
-					printf("Display is now %s\n", (context->regs[REG_MODE_2] & DISPLAY_ENABLE) ? "enabled" : "disabled");
-				}*/
+				if (reg == REG_MODE_2) {
+					//printf("Display is now %s\n", (context->regs[REG_MODE_2] & DISPLAY_ENABLE) ? "enabled" : "disabled");
+				}
 			}
 		} else {
 			context->flags |= FLAG_PENDING;