changeset 894:a7774fc2de4b

Partially working change to do proper stack alignment rather than doing a lame alignment check when calling a C compile dfunction. 68K core seems okay, but Z80 is busted.
author Michael Pavone <pavone@retrodev.com>
date Wed, 25 Nov 2015 08:40:45 -0800
parents 4f46b4cd5035
children 13388ab6d78a
files backend_x86.c gen.c gen.h gen_x86.c m68k_core.c m68k_core_x86.c z80_to_x86.c
diffstat 7 files changed, 60 insertions(+), 4 deletions(-) [+]
line wrap: on
line diff
--- a/backend_x86.c	Sun Nov 22 14:43:51 2015 -0800
+++ b/backend_x86.c	Wed Nov 25 08:40:45 2015 -0800
@@ -173,6 +173,7 @@
 							pop_r(code, opts->scratch2);
 						} else {
 							add_ir(code, sizeof(void*), RSP, SZ_D);
+							code->stack_off -= sizeof(void *);
 						}
 					} else {
 						push_r(code, opts->scratch2);
--- a/gen.c	Sun Nov 22 14:43:51 2015 -0800
+++ b/gen.c	Wed Nov 25 08:40:45 2015 -0800
@@ -12,4 +12,5 @@
 		fatal_error("Failed to allocate memory for generated code\n");
 	}
 	code->last = code->cur + size/sizeof(code_word) - RESERVE_WORDS;
+	code->stack_off = 0;
 }
--- a/gen.h	Sun Nov 22 14:43:51 2015 -0800
+++ b/gen.h	Wed Nov 25 08:40:45 2015 -0800
@@ -15,6 +15,7 @@
 typedef struct {
 	code_ptr cur;
 	code_ptr last;
+	uint32_t stack_off;
 } code_info;
 
 void check_alloc_code(code_info *code, uint32_t inst_size);
@@ -26,7 +27,8 @@
 //call a function and put the arguments in the appropriate place according to the host ABI
 void call_args(code_info *code, code_ptr fun, uint32_t num_args, ...);
 //like the above, but follows other aspects of the ABI like stack alignment
-void call_args_abi(code_info *code, code_ptr fun, uint32_t num_args, ...);
+//void call_args_abi(code_info *code, code_ptr fun, uint32_t num_args, ...);
+#define call_args_abi call_args
 void save_callee_save_regs(code_info *code);
 void restore_callee_save_regs(code_info *code);
 
--- a/gen_x86.c	Sun Nov 22 14:43:51 2015 -0800
+++ b/gen_x86.c	Wed Nov 25 08:40:45 2015 -0800
@@ -1592,6 +1592,7 @@
 	}
 	*(out++) = OP_PUSH | reg;
 	code->cur = out;
+	code->stack_off += sizeof(void *);
 }
 
 void push_rdisp(code_info *code, uint8_t base, int32_t disp)
@@ -1599,6 +1600,7 @@
 	//This instruction has no explicit size, so we pass SZ_B
 	//to avoid any prefixes or bits being set
 	x86_rdisp_size(code, OP_SINGLE_EA, OP_EX_PUSH_EA, base, disp, SZ_B);
+	code->stack_off += sizeof(void *);
 }
 
 void pop_r(code_info *code, uint8_t reg)
@@ -1611,6 +1613,7 @@
 	}
 	*(out++) = OP_POP | reg;
 	code->cur = out;
+	code->stack_off -= sizeof(void *);
 }
 
 void pop_rind(code_info *code, uint8_t reg)
@@ -1624,6 +1627,7 @@
 	*(out++) = PRE_XOP;
 	*(out++) = MODE_REG_INDIRECT | reg;
 	code->cur = out;
+	code->stack_off -=  sizeof(void *);
 }
 
 void setcc_r(code_info *code, uint8_t cc, uint8_t dst)
@@ -1966,6 +1970,13 @@
 
 void call(code_info *code, code_ptr fun)
 {
+	code->stack_off += sizeof(void *);
+	int32_t adjust = 0;
+	if (code->stack_off & 0xF) {
+		adjust = 16 - (code->stack_off & 0xF);
+		code->stack_off += adjust;
+		sub_ir(code, adjust, RSP, SZ_PTR);
+	}
 	check_alloc_code(code, 5);
 	code_ptr out = code->cur;
 	ptrdiff_t disp = fun-(out+5);
@@ -1983,6 +1994,10 @@
 		fatal_error("call: %p - %p = %lX which is out of range for a 32-bit displacement\n", fun, out + 5, (long)disp);
 	}
 	code->cur = out;
+	if (adjust) {
+		add_ir(code, adjust, RSP, SZ_PTR);
+	}
+	code->stack_off -= sizeof(void *) + adjust;
 }
 
 void call_raxfallback(code_info *code, code_ptr fun)
@@ -2008,11 +2023,22 @@
 
 void call_r(code_info *code, uint8_t dst)
 {
+	code->stack_off += sizeof(void *);
+	int32_t adjust = 0;
+	if (code->stack_off & 0xF) {
+		adjust = 16 - (code->stack_off & 0xF);
+		code->stack_off += adjust;
+		sub_ir(code, adjust, RSP, SZ_PTR);
+	}
 	check_alloc_code(code, 2);
 	code_ptr out = code->cur;
 	*(out++) = OP_SINGLE_EA;
 	*(out++) = MODE_REG_DIRECT | dst | (OP_EX_CALL_EA << 3);
 	code->cur = out;
+	if (adjust) {
+		add_ir(code, adjust, RSP, SZ_PTR);
+	}
+	code->stack_off -= sizeof(void *) + adjust;
 }
 
 void retn(code_info *code)
@@ -2084,8 +2110,15 @@
 	{
 		push_r(code, arg_arr[i]);
 	}
+	uint32_t stack_off_call = code->stack_off + sizeof(void *);
+	uint32_t adjust = 0;
+	if (stack_off_call & 0xF) {
+		adjust = 16 - (stack_off_call & 0xF);
+		sub_ir(code, adjust, RSP, SZ_PTR);
+		code->stack_off += adjust;
+	}
 
-	return stack_args * sizeof(void *);
+	return stack_args * sizeof(void *) + adjust;
 }
 
 void call_args(code_info *code, code_ptr fun, uint32_t num_args, ...)
@@ -2097,9 +2130,10 @@
 	call_raxfallback(code, fun);
 	if (adjust) {
 		add_ir(code, adjust, RSP, SZ_PTR);
+		code->stack_off -= adjust;
 	}
 }
-
+/*
 void call_args_abi(code_info *code, code_ptr fun, uint32_t num_args, ...)
 {
 	va_list args;
@@ -2125,7 +2159,7 @@
 	*no_adjust_rsp = code->cur - (no_adjust_rsp+1);
 #endif
 }
-
+*/
 void save_callee_save_regs(code_info *code)
 {
 	push_r(code, RBX);
--- a/m68k_core.c	Sun Nov 22 14:43:51 2015 -0800
+++ b/m68k_core.c	Wed Nov 25 08:40:45 2015 -0800
@@ -827,6 +827,10 @@
 		m68k_disasm(inst, disasm_buf);
 		fatal_error("%X: %s\ninstruction %d not yet implemented\n", inst->address, disasm_buf, inst->op);
 	}
+	if (opts->gen.code.stack_off) {
+		m68k_disasm(inst, disasm_buf);
+		fatal_error("Stack offset is %X after %X: %s\n", opts->gen.code.stack_off, inst->address, disasm_buf);
+	}
 }
 
 void translate_m68k_stream(uint32_t address, m68k_context * context)
--- a/m68k_core_x86.c	Sun Nov 22 14:43:51 2015 -0800
+++ b/m68k_core_x86.c	Wed Nov 25 08:40:45 2015 -0800
@@ -1674,6 +1674,7 @@
 	set_flag(opts, 0, FLAG_C);
 	push_r(code, RDX);
 	push_r(code, RAX);
+	uint32_t tmp_stack_off = code->stack_off;
 	if (dst_op->mode == MODE_REG_DIRECT) {
 		mov_rr(code, dst_op->base, RAX, SZ_D);
 	} else {
@@ -1717,6 +1718,8 @@
 	mov_ir(code, VECTOR_INT_DIV_ZERO, opts->gen.scratch2, SZ_D);
 	mov_ir(code, inst->address+isize, opts->gen.scratch1, SZ_D);
 	jmp(code, opts->trap);
+
+	code->stack_off = tmp_stack_off;
 	*not_zero = code->cur - (not_zero+1);
 	if (inst->op == M68K_DIVS) {
 		cdq(code);
@@ -1761,6 +1764,7 @@
 	}
 	code_ptr end_off = code->cur + 1;
 	jmp(code, code->cur + 2);
+	code->stack_off = tmp_stack_off;
 	*norm_off = code->cur - (norm_off + 1);
 	if (inst->op == M68K_DIVS) {
 		*skip_sec_check = code->cur - (skip_sec_check+1);
@@ -2514,9 +2518,14 @@
 	jcc(code, CC_NZ, do_ret);
 	retn(code);
 	*do_ret = code->cur - (do_ret+1);
+	uint32_t tmp_stack_off = code->stack_off;
+	//fetch return address and adjust RSP
 	pop_r(code, opts->gen.scratch1);
+	add_ir(code, 16-sizeof(void *), RSP, SZ_PTR);
+	//save return address for restoring later
 	mov_rrdisp(code, opts->gen.scratch1, opts->gen.context_reg, offsetof(m68k_context, resume_pc), SZ_PTR);
 	retn(code);
+	code->stack_off = tmp_stack_off;
 	*do_int = code->cur - (do_int+1);
 	//implement 1 instruction latency
 	cmp_irdisp(code, 0, opts->gen.context_reg, offsetof(m68k_context, int_pending), SZ_B);
@@ -2593,9 +2602,12 @@
 	call(code, opts->native_addr_and_sync);
 	//2 prefetch bus operations + 2 idle bus cycles
 	cycles(&opts->gen, 10);
+	tmp_stack_off = code->stack_off;
 	//discard function return address
 	pop_r(code, opts->gen.scratch2);
+	add_ir(code, 16-sizeof(void *), RSP, SZ_PTR);
 	jmp_r(code, opts->gen.scratch1);
+	code->stack_off = tmp_stack_off;
 
 	opts->trap = code->cur;
 	push_r(code, opts->gen.scratch2);
--- a/z80_to_x86.c	Sun Nov 22 14:43:51 2015 -0800
+++ b/z80_to_x86.c	Wed Nov 25 08:40:45 2015 -0800
@@ -2695,6 +2695,7 @@
 {
 	z80_options * opts = context->options;
 	code_info *code = &opts->gen.code;
+	uint32_t start_stack_off = code->stack_off;
 	check_code_prologue(code);
 	context->bp_stub = code->cur;
 
@@ -2727,6 +2728,7 @@
 	pop_r(code, opts->gen.scratch1);
 	add_ir(code, check_int_size - patch_size, opts->gen.scratch1, SZ_PTR);
 	jmp_r(code, opts->gen.scratch1);
+	code->stack_off = start_stack_off;
 }
 
 void zinsert_breakpoint(z80_context * context, uint16_t address, uint8_t * bp_handler)