changeset 447:e730fc040169

Fix performance regression from stop instruction work
author Mike Pavone <pavone@retrodev.com>
date Sat, 20 Jul 2013 23:40:28 -0700
parents 1e828ed04a7c
children e85a107e6ec0
files blastem.c m68k_to_x86.c
diffstat 2 files changed, 63 insertions(+), 61 deletions(-) [+]
line wrap: on
line diff
--- a/blastem.c	Fri Jul 19 22:44:00 2013 -0700
+++ b/blastem.c	Sat Jul 20 23:40:28 2013 -0700
@@ -52,7 +52,7 @@
 	uint8_t block[SMD_BLOCK_SIZE];
 	filesize -= SMD_HEADER_SIZE;
 	fseek(f, SMD_HEADER_SIZE, SEEK_SET);
-	
+
 	uint16_t * dst = cart;
 	while (filesize > 0) {
 		fread(block, 1, SMD_BLOCK_SIZE, f);
@@ -139,15 +139,15 @@
 				if (next_hint < context->int_cycle) {
 					context->int_cycle = next_hint;
 					context->int_num = 4;
-			
+
 				}
 			}
 		}
 	}
 
 	context->target_cycle = context->int_cycle < context->sync_cycle ? context->int_cycle : context->sync_cycle;
-	/*printf("Cyc: %d, Trgt: %d, Int Cyc: %d, Int: %d, Mask: %X, V: %d, H: %d, HICount: %d, HReg: %d, Line: %d\n", 
-		context->current_cycle, context->target_cycle, context->int_cycle, context->int_num, (context->status & 0x7), 
+	/*printf("Cyc: %d, Trgt: %d, Int Cyc: %d, Int: %d, Mask: %X, V: %d, H: %d, HICount: %d, HReg: %d, Line: %d\n",
+		context->current_cycle, context->target_cycle, context->int_cycle, context->int_num, (context->status & 0x7),
 		v_context->regs[REG_MODE_2] & 0x20, v_context->regs[REG_MODE_1] & 0x10, v_context->hint_counter, v_context->regs[REG_HINT], v_context->cycles / MCLKS_LINE);*/
 }
 
@@ -199,7 +199,7 @@
 	//printf("YM | Cycle: %d, bpos: %d, PSG | Cycle: %d, bpos: %d\n", gen->ym->current_cycle, gen->ym->buffer_pos, gen->psg->cycles, gen->psg->buffer_pos * 2);
 	psg_run(gen->psg, target);
 	ym_run(gen->ym, target);
-	
+
 	//printf("Target: %d, YM bufferpos: %d, PSG bufferpos: %d\n", target, gen->ym->buffer_pos, gen->psg->buffer_pos * 2);
 }
 
@@ -221,7 +221,7 @@
 		}
 		//printf("reached frame end | 68K Cycles: %d, MCLK Cycles: %d\n", context->current_cycle, mclks);
 		vdp_run_context(v_context, mclks_per_frame);
-		
+
 		if (!headless) {
 			break_on_sync |= wait_render_frame(v_context, frame_limit);
 		}
@@ -258,12 +258,6 @@
 		context->int_ack = 0;
 	}
 	adjust_int_cycle(context, v_context);
-	if (context->current_cycle <= context->sync_cycle) {
-		context->sync_cycle = context->current_cycle + 4;
-		if (context->sync_cycle < context->int_cycle) {
-			context->target_cycle = context->sync_cycle;
-		}
-	}
 	if (break_on_sync && address) {
 		break_on_sync = 0;
 		debugger(context, address);
@@ -504,7 +498,7 @@
 				}
 				if (value & 1) {
 					dputs("bus requesting Z80");
-					
+
 					if(!reset && !busreq) {
 						busack_cycle = ((gen->z80->current_cycle + Z80_ACK_DELAY) * MCLKS_PER_Z80) / MCLKS_PER_68K;//context->current_cycle + Z80_ACK_DELAY;
 						new_busack = Z80_REQ_ACK;
@@ -526,7 +520,7 @@
 					}
 					//busack_cycle = CYCLE_NEVER;
 					//busack = Z80_REQ_BUSY;
-					
+
 				}
 			} else if (location == 0x1200) {
 				sync_z80(gen->z80, context->current_cycle * MCLKS_PER_68K);
@@ -1441,7 +1435,7 @@
 				//Z80 debug commands
 				switch(input_buf[1])
 				{
-				case 'b': 
+				case 'b':
 					param = find_param(input_buf);
 					if (!param) {
 						fputs("zb command requires a parameter\n", stderr);
@@ -1560,7 +1554,7 @@
 	context->flags[ZF_Z] = f & 1;
 	f >>= 1;
 	context->flags[ZF_S] = f;
-	
+
 	context->regs[Z80_A] = *curpos;
 	curpos += 3;
 	for (int reg = Z80_C; reg <= Z80_IYH; reg++) {
@@ -1648,7 +1642,7 @@
 	adjust_int_cycle(gen->m68k, gen->vdp);
 	fclose(gstfile);
 	return pc;
-	
+
 error_close:
 	fclose(gstfile);
 error:
@@ -1666,7 +1660,7 @@
 const memmap_chunk static_map[] = {
 		{0,        0x400000,  0xFFFFFF, 0, MMAP_READ,                          cart,
 		           NULL,          NULL,         NULL,            NULL},
-		{0xE00000, 0x1000000, 0xFFFF,   0, MMAP_READ | MMAP_WRITE | MMAP_CODE, ram, 
+		{0xE00000, 0x1000000, 0xFFFF,   0, MMAP_READ | MMAP_WRITE | MMAP_CODE, ram,
 		           NULL,          NULL,         NULL,            NULL},
 		{0xC00000, 0xE00000,  0x1FFFFF, 0, 0,                                  NULL,
 		           (read_16_fun)vdp_port_read,  (write_16_fun)vdp_port_write,
@@ -1719,7 +1713,7 @@
 			memmap[0].mask = 0xFFFFFF;
 			memmap[0].flags = MMAP_READ;
 			memmap[0].buffer = cart;
-			
+
 			ram_start &= 0xFFFFFE;
 			ram_end |= 1;
 			memmap[1].start = ram_start;
@@ -1736,7 +1730,7 @@
 				size /= 2;
 			}
 			memmap[1].buffer = gen->save_ram = malloc(size);
-			
+
 			memcpy(memmap+2, static_map+1, sizeof(static_map)-sizeof(static_map[0]));
 			num_chunks = sizeof(static_map)/sizeof(memmap_chunk)+1;
 		} else {
@@ -1745,7 +1739,7 @@
 			memmap[0].mask = 0xFFFFFF;
 			memmap[0].flags = MMAP_READ;
 			memmap[0].buffer = cart;
-			
+
 			memmap[1].start = 0x200000;
 			memmap[1].end = 0x400000;
 			memmap[1].mask = 0x1FFFFF;
@@ -1765,7 +1759,7 @@
 			memmap[num_chunks].end = 0xA13100;
 			memmap[num_chunks].mask = 0xFF;
 			memmap[num_chunks].write_16 = (write_16_fun)write_bank_reg_w;
-			memmap[num_chunks].write_8 = (write_8_fun)write_bank_reg_b; 
+			memmap[num_chunks].write_8 = (write_8_fun)write_bank_reg_b;
 			num_chunks++;
 			ram_end++;
 			size = ram_end-ram_start;
@@ -1794,7 +1788,7 @@
 	init_x86_68k_opts(&opts, memmap, num_chunks);
 	opts.address_log = address_log;
 	init_68k_context(&context, opts.native_code_map, &opts);
-	
+
 	context.video_context = gen->vdp;
 	context.system = gen;
 	//cartridge ROM
@@ -1998,15 +1992,15 @@
 		render_init(width, height, title, fps, fullscreen);
 	}
 	vdp_context v_context;
-	
+
 	init_vdp_context(&v_context);
-	
+
 	ym2612_context y_context;
 	ym_init(&y_context, render_sample_rate(), fps == 60 ? MCLKS_NTSC : MCLKS_PAL, MCLKS_PER_YM, render_audio_buffer(), ym_log ? YM_OPT_WAVE_LOG : 0);
-	
+
 	psg_context p_context;
 	psg_init(&p_context, render_sample_rate(), fps == 60 ? MCLKS_NTSC : MCLKS_PAL, MCLKS_PER_PSG, render_audio_buffer());
-	
+
 	z80_context z_context;
 	x86_z80_options z_opts;
 	init_x86_z80_opts(&z_opts);
@@ -2020,13 +2014,13 @@
 	z_context.sync_cycle = z_context.target_cycle = mclks_per_frame/MCLKS_PER_Z80;
 	z_context.int_cycle = CYCLE_NEVER;
 	z_context.mem_pointers[1] = z_context.mem_pointers[2] = (uint8_t *)cart;
-	
+
 	gen.z80 = &z_context;
 	gen.vdp = &v_context;
 	gen.ym = &y_context;
 	gen.psg = &p_context;
 	genesis = &gen;
-	
+
 	int fname_size = strlen(argv[1]);
 	sram_filename = malloc(fname_size+6);
 	memcpy(sram_filename, argv[1], fname_size);
@@ -2041,7 +2035,7 @@
 		strcpy(sram_filename + fname_size, ".sram");
 	}
 	set_keybindings();
-	
+
 	init_run_cpu(&gen, debug, address_log, statefile);
 	return 0;
 }
--- a/m68k_to_x86.c	Fri Jul 19 22:44:00 2013 -0700
+++ b/m68k_to_x86.c	Sat Jul 20 23:40:28 2013 -0700
@@ -123,9 +123,9 @@
 		//We only get one memory parameter, so if the dst operand is a register in memory,
 		//we need to copy this to a temp register first
 		reg = native_reg(&(inst->dst), opts);
-		if (reg >= 0 || inst->dst.addr_mode == MODE_UNUSED || !(inst->dst.addr_mode == MODE_REG || inst->dst.addr_mode == MODE_AREG) 
+		if (reg >= 0 || inst->dst.addr_mode == MODE_UNUSED || !(inst->dst.addr_mode == MODE_REG || inst->dst.addr_mode == MODE_AREG)
 		    || inst->op == M68K_EXG) {
-			
+
 			ea->mode = MODE_REG_DISPLACE8;
 			ea->base = CONTEXT;
 			ea->disp = reg_offset(&(inst->src));
@@ -150,7 +150,7 @@
 			out = sub_irdisp8(out, dec_amount, CONTEXT, reg_offset(&(inst->src)), SZ_D);
 		}
 	case MODE_AREG_INDIRECT:
-	case MODE_AREG_POSTINC:	
+	case MODE_AREG_POSTINC:
 		if (opts->aregs[inst->src.params.regs.pri] >= 0) {
 			out = mov_rr(out, opts->aregs[inst->src.params.regs.pri], SCRATCH1, SZ_D);
 		} else {
@@ -168,7 +168,7 @@
 			out = call(out, opts->read_32);
 			break;
 		}
-		
+
 		if (inst->src.addr_mode == MODE_AREG_POSTINC) {
 			inc_amount = inst->extra.size == OPSIZE_WORD ? 2 : (inst->extra.size == OPSIZE_LONG ? 4 : (inst->src.params.regs.pri == 7 ? 2 : 1));
 			if (opts->aregs[inst->src.params.regs.pri] >= 0) {
@@ -441,7 +441,7 @@
 				out = mov_rdisp8r(out, CONTEXT, reg_offset(&(inst->dst)), SCRATCH2, SZ_D);
 			}
 		}
-		
+
 		if (inst->dst.addr_mode == MODE_AREG_POSTINC) {
 			inc_amount = inst->extra.size == OPSIZE_WORD ? 2 : (inst->extra.size == OPSIZE_LONG ? 4 : (inst->dst.params.regs.pri == 7 ? 2 : 1));
 			if (opts->aregs[inst->dst.params.regs.pri] >= 0) {
@@ -781,7 +781,7 @@
 		dst = mov_ir(dst, 0, FLAG_V, SZ_B);
 		dst = mov_ir(dst, 0, FLAG_C, SZ_B);
 	}
-	
+
 	if (inst->dst.addr_mode != MODE_AREG) {
 		if (src.mode == MODE_REG_DIRECT) {
 			flags_reg = src.base;
@@ -2459,7 +2459,7 @@
 				dst = pop_r(dst, SCRATCH2);
 				dst = mov_rr(dst, reg, SCRATCH1, SZ_D);
 				dst = shr_ir(dst, 16, SCRATCH1, SZ_D);
-				
+
 			} else {
 				dst = mov_rdisp8r(dst, CONTEXT, reg_offset(&(inst->src))+3, SCRATCH1, SZ_B);
 				dst = push_r(dst, SCRATCH2);
@@ -2527,7 +2527,7 @@
 		dst = push_r(dst, SCRATCH1);
 		dst = call(dst, opts->read_8);
 		if (reg >= 0) {
-			
+
 			dst = shl_ir(dst, 8, SCRATCH1, SZ_W);
 			dst = mov_rr(dst, SCRATCH1, reg, SZ_W);
 			dst = pop_r(dst, SCRATCH1);
@@ -2628,7 +2628,7 @@
 				} else {
 					dst = mov_rdisp8r(dst, src_op->base, src_op->disp, RCX, SZ_B);
 				}
-				
+
 			}
 			dst = and_ir(dst, 63, RCX, SZ_D);
 			nz_off = dst+1;
@@ -2676,7 +2676,7 @@
 					if (inst->extra.size == OPSIZE_LONG) {
 						uint8_t * neq_32_off = dst + 1;
 						dst = jcc(dst, CC_NZ, dst+2);
-			
+
 						//set the carry bit to the lsb
 						if (dst_op->mode == MODE_REG_DIRECT) {
 							dst = special(dst, 1, dst_op->base, SZ_D);
@@ -2703,7 +2703,7 @@
 						dst = shift_irdisp8(dst, 31, dst_op->base, dst_op->disp, inst->extra.size);
 						dst = shift_irdisp8(dst, 1, dst_op->base, dst_op->disp, inst->extra.size);
 					}
-				
+
 				}
 				end_off = dst+1;
 				dst = jmp(dst, dst+2);
@@ -2715,7 +2715,7 @@
 				}
 			}
 		}
-		
+
 	}
 	if (!special && end_off) {
 		*end_off = dst - (end_off + 1);
@@ -3084,7 +3084,7 @@
 		default:
 			isize = 2;
 		}
-		uint8_t * passed = dst+1;			
+		uint8_t * passed = dst+1;
 		dst = jcc(dst, CC_GE, dst+2);
 		dst = mov_ir(dst, 1, FLAG_N, SZ_B);
 		dst = mov_ir(dst, VECTOR_CHK, SCRATCH2, SZ_D);
@@ -3322,7 +3322,7 @@
 			}
 			dst = call(dst, (uint8_t *)(inst->op == M68K_MOVE_SR ? set_sr : set_ccr));
 			dst = cycles(dst, 12);
-			
+
 		}
 		break;
 	case M68K_MOVE_USP:
@@ -3446,7 +3446,7 @@
 			dst = not_rdisp8(dst, dst_op.base, dst_op.disp, inst->extra.size);
 			dst = cmp_irdisp8(dst, 0, dst_op.base, dst_op.disp, inst->extra.size);
 		}
-		
+
 		dst = mov_ir(dst, 0, FLAG_C, SZ_B);
 		dst = setcc_r(dst, CC_Z, FLAG_Z);
 		dst = setcc_r(dst, CC_S, FLAG_N);
@@ -3800,7 +3800,15 @@
 		}
 		uint8_t * loop_top = dst;
 		dst = call(dst, (uint8_t *)do_sync);
+    dst = cmp_rr(dst, LIMIT, CYCLES, SZ_D);
+    uint8_t * normal_cycle_up = dst + 1;
+    dst = jcc(dst, CC_A, dst+2);
+    dst = cycles(dst, BUS);
+    uint8_t * after_cycle_up = dst + 1;
+    dst = jmp(dst, dst+2);
+    *normal_cycle_up = dst - (normal_cycle_up + 1);
 		dst = mov_rr(dst, LIMIT, CYCLES, SZ_D);
+    *after_cycle_up = dst - (after_cycle_up+1);
 		dst = cmp_rdisp8r(dst, CONTEXT, offsetof(m68k_context, int_cycle), CYCLES, SZ_D);
 		dst = jcc(dst, CC_C, loop_top);
 		break;
@@ -3867,7 +3875,7 @@
 			dst = rol_irdisp8(dst, 16, src_op.base, src_op.disp, SZ_D);
 			dst = cmp_irdisp8(dst, 0, src_op.base, src_op.disp, SZ_D);
 		}
-		
+
 		dst = mov_ir(dst, 0, FLAG_C, SZ_B);
 		dst = setcc_r(dst, CC_Z, FLAG_Z);
 		dst = setcc_r(dst, CC_S, FLAG_N);
@@ -3937,7 +3945,7 @@
 	m68kinst instbuf;
 	x86_68k_options * opts = context->options;
 	uint8_t * dst = opts->cur_code;
-	uint8_t * dst_end = opts->code_end; 
+	uint8_t * dst_end = opts->code_end;
 	address &= 0xFFFFFF;
 	if(get_native_address(opts->native_code_map, address)) {
 		return dst;
@@ -4065,7 +4073,7 @@
 				return orig_start;
 			}
 		}
-		
+
 		map_native_address(context, instbuf.address, dst, (after-inst)*2, MAX_NATIVE_SIZE);
 		opts->cur_code = dst+MAX_NATIVE_SIZE;
 		jmp(orig_start, dst);
@@ -4112,12 +4120,12 @@
 		}
 		bp_stub = dst;
 		native = call(native, bp_stub);
-		
+
 		//Calculate length of prologue
 		dst = check_cycles_int(dst, address, opts);
 		int check_int_size = dst-bp_stub;
 		dst = bp_stub;
-		
+
 		//Save context and call breakpoint handler
 		dst = call(dst, (uint8_t *)m68k_save_context);
 		dst = push_r(dst, SCRATCH1);
@@ -4195,7 +4203,7 @@
 			ub_jcc = dst + 1;
 			dst = jcc(dst, CC_NC, dst+2);
 		}
-		
+
 		if (memmap[chunk].mask != 0xFFFFFF) {
 			dst = and_ir(dst, memmap[chunk].mask, adr_reg, SZ_D);
 		}
@@ -4239,7 +4247,7 @@
 						dst = mov_rr(dst, RAX, SCRATCH1, size);
 					}
 					dst = jmp(dst, (uint8_t *)m68k_load_context);
-					
+
 					*not_null = dst - (not_null + 1);
 				}
 				if (size == SZ_B) {
@@ -4248,7 +4256,7 @@
 				dst = add_rdisp8r(dst, CONTEXT, offsetof(m68k_context, mem_pointers) + sizeof(void*) * memmap[chunk].ptr_index, adr_reg, SZ_Q);
 				if (is_write) {
 					dst = mov_rrind(dst, SCRATCH1, SCRATCH2, size);
-					
+
 				} else {
 					dst = mov_rindr(dst, SCRATCH1, SCRATCH1, size);
 				}
@@ -4377,14 +4385,14 @@
 	opts->code_end = opts->cur_code + size;
 	opts->ram_inst_sizes = malloc(sizeof(uint8_t *) * 64);
 	memset(opts->ram_inst_sizes, 0, sizeof(uint8_t *) * 64);
-	
+
 	opts->read_16 = gen_mem_fun(opts, memmap, num_chunks, READ_16);
 	opts->read_8 = gen_mem_fun(opts, memmap, num_chunks, READ_8);
 	opts->write_16 = gen_mem_fun(opts, memmap, num_chunks, WRITE_16);
 	opts->write_8 = gen_mem_fun(opts, memmap, num_chunks, WRITE_8);
-	
+
 	uint8_t * dst = opts->cur_code;
-	
+
 	opts->read_32 = dst;
 	dst = push_r(dst, SCRATCH1);
 	dst = call(dst, opts->read_16);
@@ -4398,7 +4406,7 @@
 	dst = shl_ir(dst, 16, SCRATCH2, SZ_D);
 	dst = or_rr(dst, SCRATCH2, SCRATCH1, SZ_D);
 	dst = retn(dst);
-	
+
 	opts->write_32_lowfirst = dst;
 	dst = push_r(dst, SCRATCH2);
 	dst = push_r(dst, SCRATCH1);
@@ -4408,7 +4416,7 @@
 	dst = pop_r(dst, SCRATCH2);
 	dst = shr_ir(dst, 16, SCRATCH1, SZ_D);
 	dst = jmp(dst, opts->write_16);
-	
+
 	opts->write_32_highfirst = dst;
 	dst = push_r(dst, SCRATCH1);
 	dst = push_r(dst, SCRATCH2);
@@ -4418,7 +4426,7 @@
 	dst = pop_r(dst, SCRATCH1);
 	dst = add_ir(dst, 2, SCRATCH2, SZ_D);
 	dst = jmp(dst, opts->write_16);
-	
+
 	opts->handle_cycle_limit_int = dst;
 	dst = cmp_rdisp8r(dst, CONTEXT, offsetof(m68k_context, int_cycle), CYCLES, SZ_D);
 	uint8_t * do_int = dst+1;
@@ -4470,7 +4478,7 @@
 	//discard function return address
 	dst = pop_r(dst, SCRATCH2);
 	dst = jmp_r(dst, SCRATCH1);
-	
+
 	opts->trap = dst;
 	dst = push_r(dst, SCRATCH2);
 	//swap USP and SSP if not already in supervisor mode
@@ -4499,7 +4507,7 @@
 	dst = call(dst, (uint8_t *)m68k_native_addr_and_sync);
 	dst = cycles(dst, 18);
 	dst = jmp_r(dst, SCRATCH1);
-	
+
 	opts->cur_code = dst;
 }