diff gen_x86.c @ 567:8e395210f50f

Refactor gen_x86 to use an interface more like gen_arm and to remove the need for the caller to decide whether an 8-bit or 32-bit displacement is needed in the rdisp functions. Update m68k_to_x86 to use the new version of the gen_x86 functions and do some minor refactoring there in the process
author Michael Pavone <pavone@retrodev.com>
date Sun, 02 Mar 2014 14:45:36 -0800
parents 96489fb27dbf
children 5ef3fe516da9
line wrap: on
line diff
--- a/gen_x86.c	Sun Mar 02 14:41:43 2014 -0800
+++ b/gen_x86.c	Sun Mar 02 14:45:36 2014 -0800
@@ -4,7 +4,7 @@
  BlastEm is free software distributed under the terms of the GNU General Public License version 3 or greater. See COPYING for full license text.
 */
 #include "gen_x86.h"
-#include "68kinst.h"
+#include "mem.h"
 #include <stddef.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -127,8 +127,55 @@
 	X86_R15
 } x86_regs_enc;
 
-uint8_t * x86_rr_sizedir(uint8_t * out, uint16_t opcode, uint8_t src, uint8_t dst, uint8_t size)
+void jmp_nocheck(code_info *code, code_ptr dest)
 {
+	code_ptr out = code->cur;
+	ptrdiff_t disp = dest-(out+2);
+	if (disp <= 0x7F && disp >= -0x80) {
+		*(out++) = OP_JMP_BYTE;
+		*(out++) = disp;
+	} else {
+		disp = dest-(out+5);
+		if (disp <= 0x7FFFFFFF && disp >= -2147483648) {
+			*(out++) = OP_JMP;
+			*(out++) = disp;
+			disp >>= 8;
+			*(out++) = disp;
+			disp >>= 8;
+			*(out++) = disp;
+			disp >>= 8;
+			*(out++) = disp;
+		} else {
+			fprintf(stderr, "jmp: %p - %p = %lX\n", dest, out + 6, (long)disp);
+			exit(1);
+		}
+	}
+	code->cur = out;
+}
+
+void check_alloc_code(code_info *code, uint32_t inst_size)
+{
+	if (code->cur + inst_size > code->last) {
+		size_t size = CODE_ALLOC_SIZE;
+		code_ptr next_code = alloc_code(&size);
+		if (!next_code) {
+			fputs("Failed to allocate memory for generated code\n", stderr);
+			exit(1);
+		}
+		if (next_code != code->last + RESERVE_WORDS) {
+			//new chunk is not contiguous with the current one
+			jmp_nocheck(code, next_code);
+			code->cur = next_code;
+			code->last = next_code + size/sizeof(RESERVE_WORDS);
+		}
+		code->last = next_code + size/sizeof(code_word) - RESERVE_WORDS;
+	}
+}
+
+void x86_rr_sizedir(code_info *code, uint16_t opcode, uint8_t src, uint8_t dst, uint8_t size)
+{
+	check_alloc_code(code, 5);
+	code_ptr out = code->cur;
 	uint8_t tmp;
 	if (size == SZ_W) {
 		*(out++) = PRE_SIZE;
@@ -175,60 +222,13 @@
 		*(out++) = opcode;
 	}
 	*(out++) = MODE_REG_DIRECT | dst | (src << 3);
-	return out;
+	code->cur = out;
 }
 
-uint8_t * x86_rrdisp8_sizedir(uint8_t * out, uint16_t opcode, uint8_t reg, uint8_t base, int8_t disp, uint8_t size, uint8_t dir)
+void x86_rrdisp_sizedir(code_info *code, uint16_t opcode, uint8_t reg, uint8_t base, int32_t disp, uint8_t size, uint8_t dir)
 {
-	//TODO: Deal with the fact that AH, BH, CH and DH can only be in the R/M param when there's a REX prefix
-	uint8_t tmp;
-	if (size == SZ_W) {
-		*(out++) = PRE_SIZE;
-	}
-	if (size == SZ_Q || reg >= R8 || base >= R8 || (size == SZ_B && reg >= RSP && reg <= RDI)) {
-		*out = PRE_REX;
-		if (reg >= AH && reg <= BH) {
-			fprintf(stderr, "attempt to use *H reg in an instruction requiring REX prefix. opcode = %X\n", opcode);
-			exit(1);
-		}
-		if (size == SZ_Q) {
-			*out |= REX_QUAD;
-		}
-		if (reg >= R8) {
-			*out |= REX_REG_FIELD;
-			reg -= (R8 - X86_R8);
-		}
-		if (base >= R8) {
-			*out |= REX_RM_FIELD;
-			base -= (R8 - X86_R8);
-		}
-		out++;
-	}
-	if (size == SZ_B) {
-		if (reg >= AH && reg <= BH) {
-			reg -= (AH-X86_AH);
-		}
-	} else {
-		opcode |= BIT_SIZE;
-	}
-	opcode |= dir;
-	if (opcode >= 0x100) {
-		*(out++) = opcode >> 8;
-		*(out++) = opcode;
-	} else {
-		*(out++) = opcode;
-	}
-	*(out++) = MODE_REG_DISPLACE8 | base | (reg << 3);
-	if (base == RSP) {
-		//add SIB byte, with no index and RSP as base
-		*(out++) = (RSP << 3) | RSP;
-	}
-	*(out++) = disp;
-	return out;
-}
-
-uint8_t * x86_rrdisp32_sizedir(uint8_t * out, uint16_t opcode, uint8_t reg, uint8_t base, int32_t disp, uint8_t size, uint8_t dir)
-{
+	check_alloc_code(code, 10);
+	code_ptr out = code->cur;
 	//TODO: Deal with the fact that AH, BH, CH and DH can only be in the R/M param when there's a REX prefix
 	uint8_t tmp;
 	if (size == SZ_W) {
@@ -267,20 +267,28 @@
 	} else {
 		*(out++) = opcode;
 	}
-	*(out++) = MODE_REG_DISPLACE32 | base | (reg << 3);
+	if (disp < 128 && disp >= -128) {
+		*(out++) = MODE_REG_DISPLACE8 | base | (reg << 3);
+	} else {
+		*(out++) = MODE_REG_DISPLACE32 | base | (reg << 3);
+	}
 	if (base == RSP) {
 		//add SIB byte, with no index and RSP as base
 		*(out++) = (RSP << 3) | RSP;
 	}
 	*(out++) = disp;
-	*(out++) = disp >> 8;
-	*(out++) = disp >> 16;
-	*(out++) = disp >> 24;
-	return out;
+	if (disp >= 128 || disp < -128) {
+		*(out++) = disp >> 8;
+		*(out++) = disp >> 16;
+		*(out++) = disp >> 24;
+	}
+	code->cur = out;
 }
 
-uint8_t * x86_rrind_sizedir(uint8_t * out, uint8_t opcode, uint8_t reg, uint8_t base, uint8_t size, uint8_t dir)
+void x86_rrind_sizedir(code_info *code, uint8_t opcode, uint8_t reg, uint8_t base, uint8_t size, uint8_t dir)
 {
+	check_alloc_code(code, 5);
+	code_ptr out = code->cur;
 	//TODO: Deal with the fact that AH, BH, CH and DH can only be in the R/M param when there's a REX prefix
 	uint8_t tmp;
 	if (size == SZ_W) {
@@ -318,11 +326,13 @@
 		//add SIB byte, with no index and RSP as base
 		*(out++) = (RSP << 3) | RSP;
 	}
-	return out;
+	code->cur = out;
 }
 
-uint8_t * x86_rrindex_sizedir(uint8_t * out, uint8_t opcode, uint8_t reg, uint8_t base, uint8_t index, uint8_t scale, uint8_t size, uint8_t dir)
+void x86_rrindex_sizedir(code_info *code, uint8_t opcode, uint8_t reg, uint8_t base, uint8_t index, uint8_t scale, uint8_t size, uint8_t dir)
 {
+	check_alloc_code(code, 5);
+	code_ptr out = code->cur;
 	//TODO: Deal with the fact that AH, BH, CH and DH can only be in the R/M param when there's a REX prefix
 	uint8_t tmp;
 	if (size == SZ_W) {
@@ -359,18 +369,22 @@
 		opcode |= BIT_SIZE;
 	}
 	*(out++) = opcode | dir;
-	*(out++) = MODE_REG_INDIRECT | base | (RSP << 3);
-	if (base == RSP) {
-		if (scale == 4) {
-			scale = 3;
-		}
-		*(out++) = scale << 6 | (index << 3) | base;
+	*(out++) = MODE_REG_INDIRECT | RSP | (reg << 3);
+	if (scale == 4) {
+		scale = 2;
+	} else if(scale == 8) {
+		scale = 3;
+	} else {
+		scale--;
 	}
-	return out;
+	*(out++) = scale << 6 | (index << 3) | base;
+	code->cur = out;
 }
 
-uint8_t * x86_r_size(uint8_t * out, uint8_t opcode, uint8_t opex, uint8_t dst, uint8_t size)
+void x86_r_size(code_info *code, uint8_t opcode, uint8_t opex, uint8_t dst, uint8_t size)
 {
+	check_alloc_code(code, 4);
+	code_ptr out = code->cur;
 	uint8_t tmp;
 	if (size == SZ_W) {
 		*(out++) = PRE_SIZE;
@@ -399,11 +413,13 @@
 	}
 	*(out++) = opcode;
 	*(out++) = MODE_REG_DIRECT | dst | (opex << 3);
-	return out;
+	code->cur = out;
 }
 
-uint8_t * x86_rdisp8_size(uint8_t * out, uint8_t opcode, uint8_t opex, uint8_t dst, int8_t disp, uint8_t size)
+void x86_rdisp_size(code_info *code, uint8_t opcode, uint8_t opex, uint8_t dst, int32_t disp, uint8_t size)
 {
+	check_alloc_code(code, 7);
+	code_ptr out = code->cur;
 	uint8_t tmp;
 	if (size == SZ_W) {
 		*(out++) = PRE_SIZE;
@@ -423,13 +439,23 @@
 		opcode |= BIT_SIZE;
 	}
 	*(out++) = opcode;
-	*(out++) = MODE_REG_DISPLACE8 | dst | (opex << 3);
-	*(out++) = disp;
-	return out;
+	if (disp < 128 && disp >= -128) {
+		*(out++) = MODE_REG_DISPLACE8 | dst | (opex << 3);
+		*(out++) = disp;
+	} else {
+		*(out++) = MODE_REG_DISPLACE32 | dst | (opex << 3);
+		*(out++) = disp;
+		*(out++) = disp >> 8;
+		*(out++) = disp >> 16;
+		*(out++) = disp >> 24;
+	}
+	code->cur = out;
 }
 
-uint8_t * x86_ir(uint8_t * out, uint8_t opcode, uint8_t op_ex, uint8_t al_opcode, int32_t val, uint8_t dst, uint8_t size)
+void x86_ir(code_info *code, uint8_t opcode, uint8_t op_ex, uint8_t al_opcode, int32_t val, uint8_t dst, uint8_t size)
 {
+	check_alloc_code(code, 8);
+	code_ptr out = code->cur;
 	uint8_t sign_extend = 0;
 	if (opcode != OP_NOT_NEG && (size == SZ_D || size == SZ_Q) && val <= 0x7F && val >= -0x80) {
 		sign_extend = 1;
@@ -478,53 +504,13 @@
 			*(out++) = val;
 		}
 	}
-	return out;
+	code->cur = out;
 }
 
-uint8_t * x86_irdisp8(uint8_t * out, uint8_t opcode, uint8_t op_ex, int32_t val, uint8_t dst, int8_t disp, uint8_t size)
+void x86_irdisp(code_info *code, uint8_t opcode, uint8_t op_ex, int32_t val, uint8_t dst, int32_t disp, uint8_t size)
 {
-	uint8_t sign_extend = 0;
-	if ((size == SZ_D || size == SZ_Q) && val <= 0x7F && val >= -0x80) {
-		sign_extend = 1;
-		opcode |= BIT_DIR;
-	}
-	if (size == SZ_W) {
-		*(out++) = PRE_SIZE;
-	}
-
-	if (size == SZ_Q || dst >= R8) {
-		*out = PRE_REX;
-		if (size == SZ_Q) {
-			*out |= REX_QUAD;
-		}
-		if (dst >= R8) {
-			*out |= REX_RM_FIELD;
-			dst -= (R8 - X86_R8);
-		}
-		out++;
-	}
-	if (size != SZ_B) {
-		opcode |= BIT_SIZE;
-	}
-	*(out++) = opcode;
-	*(out++) = MODE_REG_DISPLACE8 | dst | (op_ex << 3);
-	*(out++) = disp;
-	*(out++) = val;
-	if (size != SZ_B && !sign_extend) {
-		val >>= 8;
-		*(out++) = val;
-		if (size != SZ_W) {
-			val >>= 8;
-			*(out++) = val;
-			val >>= 8;
-			*(out++) = val;
-		}
-	}
-	return out;
-}
-
-uint8_t * x86_irdisp32(uint8_t * out, uint8_t opcode, uint8_t op_ex, int32_t val, uint8_t dst, int32_t disp, uint8_t size)
-{
+	check_alloc_code(code, 12);
+	code_ptr out = code->cur;
 	uint8_t sign_extend = 0;
 	if ((size == SZ_D || size == SZ_Q) && val <= 0x7F && val >= -0x80) {
 		sign_extend = 1;
@@ -549,14 +535,19 @@
 		opcode |= BIT_SIZE;
 	}
 	*(out++) = opcode;
-	*(out++) = MODE_REG_DISPLACE32 | dst | (op_ex << 3);
-	*(out++) = disp;
-	disp >>= 8;
-	*(out++) = disp;
-	disp >>= 8;
-	*(out++) = disp;
-	disp >>= 8;
-	*(out++) = disp;
+	if (disp < 128 && disp >= -128) {
+		*(out++) = MODE_REG_DISPLACE8 | dst | (op_ex << 3);
+		*(out++) = disp;
+	} else {
+		*(out++) = MODE_REG_DISPLACE32 | dst | (op_ex << 3);
+		*(out++) = disp;
+		disp >>= 8;
+		*(out++) = disp;
+		disp >>= 8;
+		*(out++) = disp;
+		disp >>= 8;
+		*(out++) = disp;
+	}
 	*(out++) = val;
 	if (size != SZ_B && !sign_extend) {
 		val >>= 8;
@@ -568,12 +559,13 @@
 			*(out++) = val;
 		}
 	}
-	return out;
+	code->cur = out;
 }
 
-
-uint8_t * x86_shiftrot_ir(uint8_t * out, uint8_t op_ex, uint8_t val, uint8_t dst, uint8_t size)
+void x86_shiftrot_ir(code_info *code, uint8_t op_ex, uint8_t val, uint8_t dst, uint8_t size)
 {
+	check_alloc_code(code, 5);
+	code_ptr out = code->cur;
 	if (size == SZ_W) {
 		*(out++) = PRE_SIZE;
 	}
@@ -597,11 +589,13 @@
 	if (val != 1) {
 		*(out++) = val;
 	}
-	return out;
+	code->cur = out;
 }
 
-uint8_t * x86_shiftrot_irdisp8(uint8_t * out, uint8_t op_ex, uint8_t val, uint8_t dst, int8_t disp, uint8_t size)
+void x86_shiftrot_irdisp(code_info *code, uint8_t op_ex, uint8_t val, uint8_t dst, int32_t disp, uint8_t size)
 {
+	check_alloc_code(code, 9);
+	code_ptr out = code->cur;
 	if (size == SZ_W) {
 		*(out++) = PRE_SIZE;
 	}
@@ -621,16 +615,26 @@
 	}
 
 	*(out++) = (val == 1 ? OP_SHIFTROT_1: OP_SHIFTROT_IR) | (size == SZ_B ? 0 : BIT_SIZE);
-	*(out++) = MODE_REG_DISPLACE8 | dst | (op_ex << 3);
-	*(out++) = disp;
+	if (disp < 128 && disp >= -128) {
+		*(out++) = MODE_REG_DISPLACE8 | dst | (op_ex << 3);
+		*(out++) = disp;
+	} else {
+		*(out++) = MODE_REG_DISPLACE32 | dst | (op_ex << 3);
+		*(out++) = disp;
+		*(out++) = disp >> 8;
+		*(out++) = disp >> 16;
+		*(out++) = disp >> 24;
+	}
 	if (val != 1) {
 		*(out++) = val;
 	}
-	return out;
+	code->cur = out;
 }
 
-uint8_t * x86_shiftrot_clr(uint8_t * out, uint8_t op_ex, uint8_t dst, uint8_t size)
+void x86_shiftrot_clr(code_info *code, uint8_t op_ex, uint8_t dst, uint8_t size)
 {
+	check_alloc_code(code, 4);
+	code_ptr out = code->cur;
 	if (size == SZ_W) {
 		*(out++) = PRE_SIZE;
 	}
@@ -651,11 +655,13 @@
 
 	*(out++) = OP_SHIFTROT_CL | (size == SZ_B ? 0 : BIT_SIZE);
 	*(out++) = MODE_REG_DIRECT | dst | (op_ex << 3);
-	return out;
+	code->cur = out;
 }
 
-uint8_t * x86_shiftrot_clrdisp8(uint8_t * out, uint8_t op_ex, uint8_t dst, int8_t disp, uint8_t size)
+void x86_shiftrot_clrdisp(code_info *code, uint8_t op_ex, uint8_t dst, int32_t disp, uint8_t size)
 {
+	check_alloc_code(code, 8);
+	code_ptr out = code->cur;
 	if (size == SZ_W) {
 		*(out++) = PRE_SIZE;
 	}
@@ -675,497 +681,492 @@
 	}
 
 	*(out++) = OP_SHIFTROT_CL | (size == SZ_B ? 0 : BIT_SIZE);
-	*(out++) = MODE_REG_DISPLACE8 | dst | (op_ex << 3);
-	*(out++) = disp;
-	return out;
-}
-
-uint8_t * rol_ir(uint8_t * out, uint8_t val, uint8_t dst, uint8_t size)
-{
-	return x86_shiftrot_ir(out, OP_EX_ROL, val, dst, size);
+	if (disp < 128 && disp >= -128) {
+		*(out++) = MODE_REG_DISPLACE8 | dst | (op_ex << 3);
+		*(out++) = disp;
+	} else {
+		*(out++) = MODE_REG_DISPLACE32 | dst | (op_ex << 3);
+		*(out++) = disp;
+		*(out++) = disp >> 8;
+		*(out++) = disp >> 16;
+		*(out++) = disp >> 24;
+	}
+	code->cur = out;
 }
 
-uint8_t * ror_ir(uint8_t * out, uint8_t val, uint8_t dst, uint8_t size)
+void rol_ir(code_info *code, uint8_t val, uint8_t dst, uint8_t size)
 {
-	return x86_shiftrot_ir(out, OP_EX_ROR, val, dst, size);
+	x86_shiftrot_ir(code, OP_EX_ROL, val, dst, size);
 }
 
-uint8_t * rcl_ir(uint8_t * out, uint8_t val, uint8_t dst, uint8_t size)
+void ror_ir(code_info *code, uint8_t val, uint8_t dst, uint8_t size)
 {
-	return x86_shiftrot_ir(out, OP_EX_RCL, val, dst, size);
+	x86_shiftrot_ir(code, OP_EX_ROR, val, dst, size);
 }
 
-uint8_t * rcr_ir(uint8_t * out, uint8_t val, uint8_t dst, uint8_t size)
+void rcl_ir(code_info *code, uint8_t val, uint8_t dst, uint8_t size)
 {
-	return x86_shiftrot_ir(out, OP_EX_RCR, val, dst, size);
+	x86_shiftrot_ir(code, OP_EX_RCL, val, dst, size);
 }
 
-uint8_t * shl_ir(uint8_t * out, uint8_t val, uint8_t dst, uint8_t size)
+void rcr_ir(code_info *code, uint8_t val, uint8_t dst, uint8_t size)
 {
-	return x86_shiftrot_ir(out, OP_EX_SHL, val, dst, size);
+	x86_shiftrot_ir(code, OP_EX_RCR, val, dst, size);
 }
 
-uint8_t * shr_ir(uint8_t * out, uint8_t val, uint8_t dst, uint8_t size)
+void shl_ir(code_info *code, uint8_t val, uint8_t dst, uint8_t size)
 {
-	return x86_shiftrot_ir(out, OP_EX_SHR, val, dst, size);
+	x86_shiftrot_ir(code, OP_EX_SHL, val, dst, size);
 }
 
-uint8_t * sar_ir(uint8_t * out, uint8_t val, uint8_t dst, uint8_t size)
+void shr_ir(code_info *code, uint8_t val, uint8_t dst, uint8_t size)
 {
-	return x86_shiftrot_ir(out, OP_EX_SAR, val, dst, size);
+	x86_shiftrot_ir(code, OP_EX_SHR, val, dst, size);
 }
 
-uint8_t * rol_irdisp8(uint8_t * out, uint8_t val, uint8_t dst_base, int8_t disp, uint8_t size)
+void sar_ir(code_info *code, uint8_t val, uint8_t dst, uint8_t size)
 {
-	return x86_shiftrot_irdisp8(out, OP_EX_ROL, val, dst_base, disp, size);
+	x86_shiftrot_ir(code, OP_EX_SAR, val, dst, size);
 }
 
-uint8_t * ror_irdisp8(uint8_t * out, uint8_t val, uint8_t dst_base, int8_t disp, uint8_t size)
+void rol_irdisp(code_info *code, uint8_t val, uint8_t dst_base, int32_t disp, uint8_t size)
 {
-	return x86_shiftrot_irdisp8(out, OP_EX_ROR, val, dst_base, disp, size);
+	x86_shiftrot_irdisp(code, OP_EX_ROL, val, dst_base, disp, size);
 }
 
-uint8_t * rcl_irdisp8(uint8_t * out, uint8_t val, uint8_t dst_base, int8_t disp, uint8_t size)
+void ror_irdisp(code_info *code, uint8_t val, uint8_t dst_base, int32_t disp, uint8_t size)
 {
-	return x86_shiftrot_irdisp8(out, OP_EX_RCL, val, dst_base, disp, size);
+	x86_shiftrot_irdisp(code, OP_EX_ROR, val, dst_base, disp, size);
 }
 
-uint8_t * rcr_irdisp8(uint8_t * out, uint8_t val, uint8_t dst_base, int8_t disp, uint8_t size)
+void rcl_irdisp(code_info *code, uint8_t val, uint8_t dst_base, int32_t disp, uint8_t size)
 {
-	return x86_shiftrot_irdisp8(out, OP_EX_RCR, val, dst_base, disp, size);
+	x86_shiftrot_irdisp(code, OP_EX_RCL, val, dst_base, disp, size);
 }
 
-uint8_t * shl_irdisp8(uint8_t * out, uint8_t val, uint8_t dst_base, int8_t disp, uint8_t size)
+void rcr_irdisp(code_info *code, uint8_t val, uint8_t dst_base, int32_t disp, uint8_t size)
 {
-	return x86_shiftrot_irdisp8(out, OP_EX_SHL, val, dst_base, disp, size);
+	x86_shiftrot_irdisp(code, OP_EX_RCR, val, dst_base, disp, size);
 }
 
-uint8_t * shr_irdisp8(uint8_t * out, uint8_t val, uint8_t dst_base, int8_t disp, uint8_t size)
+void shl_irdisp(code_info *code, uint8_t val, uint8_t dst_base, int32_t disp, uint8_t size)
 {
-	return x86_shiftrot_irdisp8(out, OP_EX_SHR, val, dst_base, disp, size);
+	x86_shiftrot_irdisp(code, OP_EX_SHL, val, dst_base, disp, size);
 }
 
-uint8_t * sar_irdisp8(uint8_t * out, uint8_t val, uint8_t dst_base, int8_t disp, uint8_t size)
+void shr_irdisp(code_info *code, uint8_t val, uint8_t dst_base, int32_t disp, uint8_t size)
 {
-	return x86_shiftrot_irdisp8(out, OP_EX_SAR, val, dst_base, disp, size);
+	x86_shiftrot_irdisp(code, OP_EX_SHR, val, dst_base, disp, size);
 }
 
-uint8_t * rol_clr(uint8_t * out, uint8_t dst, uint8_t size)
+void sar_irdisp(code_info *code, uint8_t val, uint8_t dst_base, int32_t disp, uint8_t size)
 {
-	return x86_shiftrot_clr(out, OP_EX_ROL, dst, size);
-}
-
-uint8_t * ror_clr(uint8_t * out, uint8_t dst, uint8_t size)
-{
-	return x86_shiftrot_clr(out, OP_EX_ROR, dst, size);
+	x86_shiftrot_irdisp(code, OP_EX_SAR, val, dst_base, disp, size);
 }
 
-uint8_t * rcl_clr(uint8_t * out, uint8_t dst, uint8_t size)
+void rol_clr(code_info *code, uint8_t dst, uint8_t size)
 {
-	return x86_shiftrot_clr(out, OP_EX_RCL, dst, size);
+	x86_shiftrot_clr(code, OP_EX_ROL, dst, size);
 }
 
-uint8_t * rcr_clr(uint8_t * out, uint8_t dst, uint8_t size)
+void ror_clr(code_info *code, uint8_t dst, uint8_t size)
 {
-	return x86_shiftrot_clr(out, OP_EX_RCR, dst, size);
+	x86_shiftrot_clr(code, OP_EX_ROR, dst, size);
 }
 
-uint8_t * shl_clr(uint8_t * out, uint8_t dst, uint8_t size)
+void rcl_clr(code_info *code, uint8_t dst, uint8_t size)
 {
-	return x86_shiftrot_clr(out, OP_EX_SHL, dst, size);
+	x86_shiftrot_clr(code, OP_EX_RCL, dst, size);
 }
 
-uint8_t * shr_clr(uint8_t * out, uint8_t dst, uint8_t size)
+void rcr_clr(code_info *code, uint8_t dst, uint8_t size)
 {
-	return x86_shiftrot_clr(out, OP_EX_SHR, dst, size);
+	x86_shiftrot_clr(code, OP_EX_RCR, dst, size);
 }
 
-uint8_t * sar_clr(uint8_t * out, uint8_t dst, uint8_t size)
+void shl_clr(code_info *code, uint8_t dst, uint8_t size)
 {
-	return x86_shiftrot_clr(out, OP_EX_SAR, dst, size);
+	x86_shiftrot_clr(code, OP_EX_SHL, dst, size);
 }
 
-uint8_t * rol_clrdisp8(uint8_t * out, uint8_t dst_base, int8_t disp, uint8_t size)
+void shr_clr(code_info *code, uint8_t dst, uint8_t size)
 {
-	return x86_shiftrot_clrdisp8(out, OP_EX_ROL, dst_base, disp, size);
+	x86_shiftrot_clr(code, OP_EX_SHR, dst, size);
 }
 
-uint8_t * ror_clrdisp8(uint8_t * out, uint8_t dst_base, int8_t disp, uint8_t size)
+void sar_clr(code_info *code, uint8_t dst, uint8_t size)
 {
-	return x86_shiftrot_clrdisp8(out, OP_EX_ROR, dst_base, disp, size);
+	x86_shiftrot_clr(code, OP_EX_SAR, dst, size);
 }
 
-uint8_t * rcl_clrdisp8(uint8_t * out, uint8_t dst_base, int8_t disp, uint8_t size)
+void rol_clrdisp(code_info *code, uint8_t dst_base, int32_t disp, uint8_t size)
 {
-	return x86_shiftrot_clrdisp8(out, OP_EX_RCL, dst_base, disp, size);
+	x86_shiftrot_clrdisp(code, OP_EX_ROL, dst_base, disp, size);
 }
 
-uint8_t * rcr_clrdisp8(uint8_t * out, uint8_t dst_base, int8_t disp, uint8_t size)
+void ror_clrdisp(code_info *code, uint8_t dst_base, int32_t disp, uint8_t size)
 {
-	return x86_shiftrot_clrdisp8(out, OP_EX_RCR, dst_base, disp, size);
+	x86_shiftrot_clrdisp(code, OP_EX_ROR, dst_base, disp, size);
 }
 
-uint8_t * shl_clrdisp8(uint8_t * out, uint8_t dst_base, int8_t disp, uint8_t size)
+void rcl_clrdisp(code_info *code, uint8_t dst_base, int32_t disp, uint8_t size)
 {
-	return x86_shiftrot_clrdisp8(out, OP_EX_SHL, dst_base, disp, size);
+	x86_shiftrot_clrdisp(code, OP_EX_RCL, dst_base, disp, size);
 }
 
-uint8_t * shr_clrdisp8(uint8_t * out, uint8_t dst_base, int8_t disp, uint8_t size)
+void rcr_clrdisp(code_info *code, uint8_t dst_base, int32_t disp, uint8_t size)
 {
-	return x86_shiftrot_clrdisp8(out, OP_EX_SHR, dst_base, disp, size);
+	x86_shiftrot_clrdisp(code, OP_EX_RCR, dst_base, disp, size);
 }
 
-uint8_t * sar_clrdisp8(uint8_t * out, uint8_t dst_base, int8_t disp, uint8_t size)
+void shl_clrdisp(code_info *code, uint8_t dst_base, int32_t disp, uint8_t size)
 {
-	return x86_shiftrot_clrdisp8(out, OP_EX_SAR, dst_base, disp, size);
+	x86_shiftrot_clrdisp(code, OP_EX_SHL, dst_base, disp, size);
 }
 
-uint8_t * add_rr(uint8_t * out, uint8_t src, uint8_t dst, uint8_t size)
+void shr_clrdisp(code_info *code, uint8_t dst_base, int32_t disp, uint8_t size)
 {
-	return x86_rr_sizedir(out, OP_ADD, src, dst, size);
+	x86_shiftrot_clrdisp(code, OP_EX_SHR, dst_base, disp, size);
 }
 
-uint8_t * add_ir(uint8_t * out, int32_t val, uint8_t dst, uint8_t size)
+void sar_clrdisp(code_info *code, uint8_t dst_base, int32_t disp, uint8_t size)
 {
-	return x86_ir(out, OP_IMMED_ARITH, OP_EX_ADDI, OP_ADD, val, dst, size);
+	x86_shiftrot_clrdisp(code, OP_EX_SAR, dst_base, disp, size);
 }
 
-uint8_t * add_irdisp8(uint8_t * out, int32_t val, uint8_t dst_base, int8_t disp, uint8_t size)
+void add_rr(code_info *code, uint8_t src, uint8_t dst, uint8_t size)
 {
-	return x86_irdisp8(out, OP_IMMED_ARITH, OP_EX_ADDI, val, dst_base, disp, size);
+	x86_rr_sizedir(code, OP_ADD, src, dst, size);
 }
 
-uint8_t * add_irdisp32(uint8_t * out, int32_t val, uint8_t dst_base, int32_t disp, uint8_t size)
+void add_ir(code_info *code, int32_t val, uint8_t dst, uint8_t size)
 {
-	return x86_irdisp32(out, OP_IMMED_ARITH, OP_EX_ADDI, val, dst_base, disp, size);
+	x86_ir(code, OP_IMMED_ARITH, OP_EX_ADDI, OP_ADD, val, dst, size);
 }
 
-uint8_t * add_rrdisp8(uint8_t * out, uint8_t src, uint8_t dst_base, int8_t disp, uint8_t size)
+void add_irdisp(code_info *code, int32_t val, uint8_t dst_base, int32_t disp, uint8_t size)
 {
-	return x86_rrdisp8_sizedir(out, OP_ADD, src, dst_base, disp, size, 0);
+	x86_irdisp(code, OP_IMMED_ARITH, OP_EX_ADDI, val, dst_base, disp, size);
 }
 
-uint8_t * add_rdisp8r(uint8_t * out, uint8_t src_base, int8_t disp, uint8_t dst, uint8_t size)
+void add_rrdisp(code_info *code, uint8_t src, uint8_t dst_base, int32_t disp, uint8_t size)
 {
-	return x86_rrdisp8_sizedir(out, OP_ADD, dst, src_base, disp, size, BIT_DIR);
+	x86_rrdisp_sizedir(code, OP_ADD, src, dst_base, disp, size, 0);
 }
 
-uint8_t * adc_rr(uint8_t * out, uint8_t src, uint8_t dst, uint8_t size)
+void add_rdispr(code_info *code, uint8_t src_base, int32_t disp, uint8_t dst, uint8_t size)
 {
-	return x86_rr_sizedir(out, OP_ADC, src, dst, size);
+	x86_rrdisp_sizedir(code, OP_ADD, dst, src_base, disp, size, BIT_DIR);
 }
 
-uint8_t * adc_ir(uint8_t * out, int32_t val, uint8_t dst, uint8_t size)
+void adc_rr(code_info *code, uint8_t src, uint8_t dst, uint8_t size)
 {
-	return x86_ir(out, OP_IMMED_ARITH, OP_EX_ADCI, OP_ADC, val, dst, size);
+	x86_rr_sizedir(code, OP_ADC, src, dst, size);
 }
 
-uint8_t * adc_irdisp8(uint8_t * out, int32_t val, uint8_t dst_base, int8_t disp, uint8_t size)
+void adc_ir(code_info *code, int32_t val, uint8_t dst, uint8_t size)
 {
-	return x86_irdisp8(out, OP_IMMED_ARITH, OP_EX_ADCI, val, dst_base, disp, size);
+	x86_ir(code, OP_IMMED_ARITH, OP_EX_ADCI, OP_ADC, val, dst, size);
 }
 
-uint8_t * adc_rrdisp8(uint8_t * out, uint8_t src, uint8_t dst_base, int8_t disp, uint8_t size)
+void adc_irdisp(code_info *code, int32_t val, uint8_t dst_base, int32_t disp, uint8_t size)
 {
-	return x86_rrdisp8_sizedir(out, OP_ADC, src, dst_base, disp, size, 0);
+	x86_irdisp(code, OP_IMMED_ARITH, OP_EX_ADCI, val, dst_base, disp, size);
 }
 
-uint8_t * adc_rdisp8r(uint8_t * out, uint8_t src_base, int8_t disp, uint8_t dst, uint8_t size)
+void adc_rrdisp(code_info *code, uint8_t src, uint8_t dst_base, int32_t disp, uint8_t size)
 {
-	return x86_rrdisp8_sizedir(out, OP_ADC, dst, src_base, disp, size, BIT_DIR);
+	x86_rrdisp_sizedir(code, OP_ADC, src, dst_base, disp, size, 0);
 }
 
-uint8_t * or_rr(uint8_t * out, uint8_t src, uint8_t dst, uint8_t size)
+void adc_rdispr(code_info *code, uint8_t src_base, int32_t disp, uint8_t dst, uint8_t size)
 {
-	return x86_rr_sizedir(out, OP_OR, src, dst, size);
-}
-uint8_t * or_ir(uint8_t * out, int32_t val, uint8_t dst, uint8_t size)
-{
-	return x86_ir(out, OP_IMMED_ARITH, OP_EX_ORI, OP_OR, val, dst, size);
+	x86_rrdisp_sizedir(code, OP_ADC, dst, src_base, disp, size, BIT_DIR);
 }
 
-uint8_t * or_irdisp8(uint8_t * out, int32_t val, uint8_t dst_base, int8_t disp, uint8_t size)
+void or_rr(code_info *code, uint8_t src, uint8_t dst, uint8_t size)
 {
-	return x86_irdisp8(out, OP_IMMED_ARITH, OP_EX_ORI, val, dst_base, disp, size);
+	x86_rr_sizedir(code, OP_OR, src, dst, size);
+}
+void or_ir(code_info *code, int32_t val, uint8_t dst, uint8_t size)
+{
+	x86_ir(code, OP_IMMED_ARITH, OP_EX_ORI, OP_OR, val, dst, size);
 }
 
-uint8_t * or_rrdisp8(uint8_t * out, uint8_t src, uint8_t dst_base, int8_t disp, uint8_t size)
+void or_irdisp(code_info *code, int32_t val, uint8_t dst_base, int32_t disp, uint8_t size)
 {
-	return x86_rrdisp8_sizedir(out, OP_OR, src, dst_base, disp, size, 0);
+	x86_irdisp(code, OP_IMMED_ARITH, OP_EX_ORI, val, dst_base, disp, size);
 }
 
-uint8_t * or_rdisp8r(uint8_t * out, uint8_t src_base, int8_t disp, uint8_t dst, uint8_t size)
+void or_rrdisp(code_info *code, uint8_t src, uint8_t dst_base, int32_t disp, uint8_t size)
 {
-	return x86_rrdisp8_sizedir(out, OP_OR, dst, src_base, disp, size, BIT_DIR);
+	x86_rrdisp_sizedir(code, OP_OR, src, dst_base, disp, size, 0);
 }
 
-uint8_t * and_rr(uint8_t * out, uint8_t src, uint8_t dst, uint8_t size)
+void or_rdispr(code_info *code, uint8_t src_base, int32_t disp, uint8_t dst, uint8_t size)
 {
-	return x86_rr_sizedir(out, OP_AND, src, dst, size);
+	x86_rrdisp_sizedir(code, OP_OR, dst, src_base, disp, size, BIT_DIR);
 }
 
-uint8_t * and_ir(uint8_t * out, int32_t val, uint8_t dst, uint8_t size)
+void and_rr(code_info *code, uint8_t src, uint8_t dst, uint8_t size)
 {
-	return x86_ir(out, OP_IMMED_ARITH, OP_EX_ANDI, OP_AND, val, dst, size);
+	x86_rr_sizedir(code, OP_AND, src, dst, size);
 }
 
-uint8_t * and_irdisp8(uint8_t * out, int32_t val, uint8_t dst_base, int8_t disp, uint8_t size)
+void and_ir(code_info *code, int32_t val, uint8_t dst, uint8_t size)
 {
-	return x86_irdisp8(out, OP_IMMED_ARITH, OP_EX_ANDI, val, dst_base, disp, size);
+	x86_ir(code, OP_IMMED_ARITH, OP_EX_ANDI, OP_AND, val, dst, size);
 }
 
-uint8_t * and_rrdisp8(uint8_t * out, uint8_t src, uint8_t dst_base, int8_t disp, uint8_t size)
+void and_irdisp(code_info *code, int32_t val, uint8_t dst_base, int32_t disp, uint8_t size)
 {
-	return x86_rrdisp8_sizedir(out, OP_AND, src, dst_base, disp, size, 0);
+	x86_irdisp(code, OP_IMMED_ARITH, OP_EX_ANDI, val, dst_base, disp, size);
 }
 
-uint8_t * and_rdisp8r(uint8_t * out, uint8_t src_base, int8_t disp, uint8_t dst, uint8_t size)
+void and_rrdisp(code_info *code, uint8_t src, uint8_t dst_base, int32_t disp, uint8_t size)
 {
-	return x86_rrdisp8_sizedir(out, OP_AND, dst, src_base, disp, size, BIT_DIR);
+	x86_rrdisp_sizedir(code, OP_AND, src, dst_base, disp, size, 0);
 }
 
-uint8_t * xor_rr(uint8_t * out, uint8_t src, uint8_t dst, uint8_t size)
+void and_rdispr(code_info *code, uint8_t src_base, int32_t disp, uint8_t dst, uint8_t size)
 {
-	return x86_rr_sizedir(out, OP_XOR, src, dst, size);
+	x86_rrdisp_sizedir(code, OP_AND, dst, src_base, disp, size, BIT_DIR);
 }
 
-uint8_t * xor_ir(uint8_t * out, int32_t val, uint8_t dst, uint8_t size)
+void xor_rr(code_info *code, uint8_t src, uint8_t dst, uint8_t size)
 {
-	return x86_ir(out, OP_IMMED_ARITH, OP_EX_XORI, OP_XOR, val, dst, size);
+	x86_rr_sizedir(code, OP_XOR, src, dst, size);
 }
 
-uint8_t * xor_irdisp8(uint8_t * out, int32_t val, uint8_t dst_base, int8_t disp, uint8_t size)
+void xor_ir(code_info *code, int32_t val, uint8_t dst, uint8_t size)
 {
-	return x86_irdisp8(out, OP_IMMED_ARITH, OP_EX_XORI, val, dst_base, disp, size);
+	x86_ir(code, OP_IMMED_ARITH, OP_EX_XORI, OP_XOR, val, dst, size);
 }
 
-uint8_t * xor_rrdisp8(uint8_t * out, uint8_t src, uint8_t dst_base, int8_t disp, uint8_t size)
+void xor_irdisp(code_info *code, int32_t val, uint8_t dst_base, int32_t disp, uint8_t size)
 {
-	return x86_rrdisp8_sizedir(out, OP_XOR, src, dst_base, disp, size, 0);
+	x86_irdisp(code, OP_IMMED_ARITH, OP_EX_XORI, val, dst_base, disp, size);
 }
 
-uint8_t * xor_rdisp8r(uint8_t * out, uint8_t src_base, int8_t disp, uint8_t dst, uint8_t size)
+void xor_rrdisp(code_info *code, uint8_t src, uint8_t dst_base, int32_t disp, uint8_t size)
 {
-	return x86_rrdisp8_sizedir(out, OP_XOR, dst, src_base, disp, size, BIT_DIR);
+	x86_rrdisp_sizedir(code, OP_XOR, src, dst_base, disp, size, 0);
 }
 
-uint8_t * sub_rr(uint8_t * out, uint8_t src, uint8_t dst, uint8_t size)
+void xor_rdispr(code_info *code, uint8_t src_base, int32_t disp, uint8_t dst, uint8_t size)
 {
-	return x86_rr_sizedir(out, OP_SUB, src, dst, size);
+	x86_rrdisp_sizedir(code, OP_XOR, dst, src_base, disp, size, BIT_DIR);
 }
 
-uint8_t * sub_ir(uint8_t * out, int32_t val, uint8_t dst, uint8_t size)
+void sub_rr(code_info *code, uint8_t src, uint8_t dst, uint8_t size)
 {
-	return x86_ir(out, OP_IMMED_ARITH, OP_EX_SUBI, OP_SUB, val, dst, size);
+	x86_rr_sizedir(code, OP_SUB, src, dst, size);
 }
 
-uint8_t * sub_irdisp8(uint8_t * out, int32_t val, uint8_t dst_base, int8_t disp, uint8_t size)
+void sub_ir(code_info *code, int32_t val, uint8_t dst, uint8_t size)
 {
-	return x86_irdisp8(out, OP_IMMED_ARITH, OP_EX_SUBI, val, dst_base, disp, size);
+	x86_ir(code, OP_IMMED_ARITH, OP_EX_SUBI, OP_SUB, val, dst, size);
 }
 
-uint8_t * sub_rrdisp8(uint8_t * out, uint8_t src, uint8_t dst_base, int8_t disp, uint8_t size)
+void sub_irdisp(code_info *code, int32_t val, uint8_t dst_base, int32_t disp, uint8_t size)
 {
-	return x86_rrdisp8_sizedir(out, OP_SUB, src, dst_base, disp, size, 0);
+	x86_irdisp(code, OP_IMMED_ARITH, OP_EX_SUBI, val, dst_base, disp, size);
 }
 
-uint8_t * sub_rdisp8r(uint8_t * out, uint8_t src_base, int8_t disp, uint8_t dst, uint8_t size)
+void sub_rrdisp(code_info *code, uint8_t src, uint8_t dst_base, int32_t disp, uint8_t size)
 {
-	return x86_rrdisp8_sizedir(out, OP_SUB, dst, src_base, disp, size, BIT_DIR);
+	x86_rrdisp_sizedir(code, OP_SUB, src, dst_base, disp, size, 0);
 }
 
-uint8_t * sbb_rr(uint8_t * out, uint8_t src, uint8_t dst, uint8_t size)
+void sub_rdispr(code_info *code, uint8_t src_base, int32_t disp, uint8_t dst, uint8_t size)
 {
-	return x86_rr_sizedir(out, OP_SBB, src, dst, size);
+	x86_rrdisp_sizedir(code, OP_SUB, dst, src_base, disp, size, BIT_DIR);
 }
 
-uint8_t * sbb_ir(uint8_t * out, int32_t val, uint8_t dst, uint8_t size)
+void sbb_rr(code_info *code, uint8_t src, uint8_t dst, uint8_t size)
 {
-	return x86_ir(out, OP_IMMED_ARITH, OP_EX_SBBI, OP_SBB, val, dst, size);
+	x86_rr_sizedir(code, OP_SBB, src, dst, size);
 }
 
-uint8_t * sbb_irdisp8(uint8_t * out, int32_t val, uint8_t dst_base, int8_t disp, uint8_t size)
+void sbb_ir(code_info *code, int32_t val, uint8_t dst, uint8_t size)
 {
-	return x86_irdisp8(out, OP_IMMED_ARITH, OP_EX_SBBI, val, dst_base, disp, size);
+	x86_ir(code, OP_IMMED_ARITH, OP_EX_SBBI, OP_SBB, val, dst, size);
 }
 
-uint8_t * sbb_rrdisp8(uint8_t * out, uint8_t src, uint8_t dst_base, int8_t disp, uint8_t size)
+void sbb_irdisp(code_info *code, int32_t val, uint8_t dst_base, int32_t disp, uint8_t size)
 {
-	return x86_rrdisp8_sizedir(out, OP_SBB, src, dst_base, disp, size, 0);
+	x86_irdisp(code, OP_IMMED_ARITH, OP_EX_SBBI, val, dst_base, disp, size);
 }
 
-uint8_t * sbb_rdisp8r(uint8_t * out, uint8_t src_base, int8_t disp, uint8_t dst, uint8_t size)
+void sbb_rrdisp(code_info *code, uint8_t src, uint8_t dst_base, int32_t disp, uint8_t size)
 {
-	return x86_rrdisp8_sizedir(out, OP_SBB, dst, src_base, disp, size, BIT_DIR);
+	x86_rrdisp_sizedir(code, OP_SBB, src, dst_base, disp, size, 0);
 }
 
-uint8_t * cmp_rr(uint8_t * out, uint8_t src, uint8_t dst, uint8_t size)
+void sbb_rdispr(code_info *code, uint8_t src_base, int32_t disp, uint8_t dst, uint8_t size)
 {
-	return x86_rr_sizedir(out, OP_CMP, src, dst, size);
+	x86_rrdisp_sizedir(code, OP_SBB, dst, src_base, disp, size, BIT_DIR);
 }
 
-uint8_t * cmp_ir(uint8_t * out, int32_t val, uint8_t dst, uint8_t size)
+void cmp_rr(code_info *code, uint8_t src, uint8_t dst, uint8_t size)
 {
-	return x86_ir(out, OP_IMMED_ARITH, OP_EX_CMPI, OP_CMP, val, dst, size);
+	x86_rr_sizedir(code, OP_CMP, src, dst, size);
 }
 
-uint8_t * cmp_irdisp8(uint8_t * out, int32_t val, uint8_t dst_base, int8_t disp, uint8_t size)
+void cmp_ir(code_info *code, int32_t val, uint8_t dst, uint8_t size)
 {
-	return x86_irdisp8(out, OP_IMMED_ARITH, OP_EX_CMPI, val, dst_base, disp, size);
+	x86_ir(code, OP_IMMED_ARITH, OP_EX_CMPI, OP_CMP, val, dst, size);
 }
 
-uint8_t * cmp_rrdisp8(uint8_t * out, uint8_t src, uint8_t dst_base, int8_t disp, uint8_t size)
+void cmp_irdisp(code_info *code, int32_t val, uint8_t dst_base, int32_t disp, uint8_t size)
 {
-	return x86_rrdisp8_sizedir(out, OP_CMP, src, dst_base, disp, size, 0);
+	x86_irdisp(code, OP_IMMED_ARITH, OP_EX_CMPI, val, dst_base, disp, size);
 }
 
-uint8_t * cmp_rdisp8r(uint8_t * out, uint8_t src_base, int8_t disp, uint8_t dst, uint8_t size)
+void cmp_rrdisp(code_info *code, uint8_t src, uint8_t dst_base, int32_t disp, uint8_t size)
 {
-	return x86_rrdisp8_sizedir(out, OP_CMP, dst, src_base, disp, size, BIT_DIR);
+	x86_rrdisp_sizedir(code, OP_CMP, src, dst_base, disp, size, 0);
 }
 
-uint8_t * test_rr(uint8_t * out, uint8_t src, uint8_t dst, uint8_t size)
+void cmp_rdispr(code_info *code, uint8_t src_base, int32_t disp, uint8_t dst, uint8_t size)
 {
-	return x86_rr_sizedir(out, OP_TEST, src, dst, size);
+	x86_rrdisp_sizedir(code, OP_CMP, dst, src_base, disp, size, BIT_DIR);
 }
 
-uint8_t * test_ir(uint8_t * out, int32_t val, uint8_t dst, uint8_t size)
+void test_rr(code_info *code, uint8_t src, uint8_t dst, uint8_t size)
 {
-	return x86_ir(out, OP_NOT_NEG, OP_EX_TEST_I, OP_TEST, val, dst, size);
+	x86_rr_sizedir(code, OP_TEST, src, dst, size);
 }
 
-uint8_t * test_irdisp8(uint8_t * out, int32_t val, uint8_t dst_base, int8_t disp, uint8_t size)
+void test_ir(code_info *code, int32_t val, uint8_t dst, uint8_t size)
 {
-	return x86_irdisp8(out, OP_NOT_NEG, OP_EX_TEST_I, val, dst_base, disp, size);
+	x86_ir(code, OP_NOT_NEG, OP_EX_TEST_I, OP_TEST, val, dst, size);
 }
 
-uint8_t * test_rrdisp8(uint8_t * out, uint8_t src, uint8_t dst_base, int8_t disp, uint8_t size)
+void test_irdisp(code_info *code, int32_t val, uint8_t dst_base, int32_t disp, uint8_t size)
 {
-	return x86_rrdisp8_sizedir(out, OP_TEST, src, dst_base, disp, size, 0);
+	x86_irdisp(code, OP_NOT_NEG, OP_EX_TEST_I, val, dst_base, disp, size);
 }
 
-uint8_t * test_rdisp8r(uint8_t * out, uint8_t src_base, int8_t disp, uint8_t dst, uint8_t size)
+void test_rrdisp(code_info *code, uint8_t src, uint8_t dst_base, int32_t disp, uint8_t size)
 {
-	return x86_rrdisp8_sizedir(out, OP_TEST, dst, src_base, disp, size, BIT_DIR);
+	x86_rrdisp_sizedir(code, OP_TEST, src, dst_base, disp, size, 0);
 }
 
-uint8_t * imul_rr(uint8_t * out, uint8_t src, uint8_t dst, uint8_t size)
+void test_rdispr(code_info *code, uint8_t src_base, int32_t disp, uint8_t dst, uint8_t size)
 {
-	return x86_rr_sizedir(out, OP2_IMUL | (PRE_2BYTE << 8), dst, src, size);
+	x86_rrdisp_sizedir(code, OP_TEST, dst, src_base, disp, size, BIT_DIR);
 }
 
-uint8_t * imul_rdisp8r(uint8_t * out, uint8_t src_base, int8_t disp, uint8_t dst, uint8_t size)
+void imul_rr(code_info *code, uint8_t src, uint8_t dst, uint8_t size)
 {
-	return x86_rrdisp8_sizedir(out, OP2_IMUL | (PRE_2BYTE << 8), dst, src_base, disp, size, 0);
+	x86_rr_sizedir(code, OP2_IMUL | (PRE_2BYTE << 8), dst, src, size);
 }
 
-uint8_t * not_r(uint8_t * out, uint8_t dst, uint8_t size)
+void imul_rdispr(code_info *code, uint8_t src_base, int32_t disp, uint8_t dst, uint8_t size)
 {
-	return x86_r_size(out, OP_NOT_NEG, OP_EX_NOT, dst, size);
+	x86_rrdisp_sizedir(code, OP2_IMUL | (PRE_2BYTE << 8), dst, src_base, disp, size, 0);
 }
 
-uint8_t * neg_r(uint8_t * out, uint8_t dst, uint8_t size)
+void not_r(code_info *code, uint8_t dst, uint8_t size)
 {
-	return x86_r_size(out, OP_NOT_NEG, OP_EX_NEG, dst, size);
+	x86_r_size(code, OP_NOT_NEG, OP_EX_NOT, dst, size);
 }
 
-uint8_t * not_rdisp8(uint8_t * out, uint8_t dst_base, int8_t disp, uint8_t size)
+void neg_r(code_info *code, uint8_t dst, uint8_t size)
 {
-	return x86_rdisp8_size(out, OP_NOT_NEG, OP_EX_NOT, dst_base, disp, size);
+	x86_r_size(code, OP_NOT_NEG, OP_EX_NEG, dst, size);
 }
 
-uint8_t * neg_rdisp8(uint8_t * out, uint8_t dst_base, int8_t disp, uint8_t size)
+void not_rdisp(code_info *code, uint8_t dst_base, int32_t disp, uint8_t size)
 {
-	return x86_rdisp8_size(out, OP_NOT_NEG, OP_EX_NEG, dst_base, disp, size);
+	x86_rdisp_size(code, OP_NOT_NEG, OP_EX_NOT, dst_base, disp, size);
 }
 
-uint8_t * mul_r(uint8_t * out, uint8_t dst, uint8_t size)
+void neg_rdisp(code_info *code, uint8_t dst_base, int32_t disp, uint8_t size)
 {
-	return x86_r_size(out, OP_NOT_NEG, OP_EX_MUL, dst, size);
+	x86_rdisp_size(code, OP_NOT_NEG, OP_EX_NEG, dst_base, disp, size);
 }
 
-uint8_t * imul_r(uint8_t * out, uint8_t dst, uint8_t size)
+void mul_r(code_info *code, uint8_t dst, uint8_t size)
 {
-	return x86_r_size(out, OP_NOT_NEG, OP_EX_IMUL, dst, size);
+	x86_r_size(code, OP_NOT_NEG, OP_EX_MUL, dst, size);
 }
 
-uint8_t * div_r(uint8_t * out, uint8_t dst, uint8_t size)
+void imul_r(code_info *code, uint8_t dst, uint8_t size)
 {
-	return x86_r_size(out, OP_NOT_NEG, OP_EX_DIV, dst, size);
+	x86_r_size(code, OP_NOT_NEG, OP_EX_IMUL, dst, size);
 }
 
-uint8_t * idiv_r(uint8_t * out, uint8_t dst, uint8_t size)
+void div_r(code_info *code, uint8_t dst, uint8_t size)
 {
-	return x86_r_size(out, OP_NOT_NEG, OP_EX_IDIV, dst, size);
+	x86_r_size(code, OP_NOT_NEG, OP_EX_DIV, dst, size);
 }
 
-uint8_t * mul_rdisp8(uint8_t * out, uint8_t dst_base, int8_t disp, uint8_t size)
+void idiv_r(code_info *code, uint8_t dst, uint8_t size)
 {
-	return x86_rdisp8_size(out, OP_NOT_NEG, OP_EX_MUL, dst_base, disp, size);
+	x86_r_size(code, OP_NOT_NEG, OP_EX_IDIV, dst, size);
 }
 
-uint8_t * imul_rdisp8(uint8_t * out, uint8_t dst_base, int8_t disp, uint8_t size)
+void mul_rdisp(code_info *code, uint8_t dst_base, int32_t disp, uint8_t size)
 {
-	return x86_rdisp8_size(out, OP_NOT_NEG, OP_EX_IMUL, dst_base, disp, size);
+	x86_rdisp_size(code, OP_NOT_NEG, OP_EX_MUL, dst_base, disp, size);
 }
 
-uint8_t * div_rdisp8(uint8_t * out, uint8_t dst_base, int8_t disp, uint8_t size)
+void imul_rdisp(code_info *code, uint8_t dst_base, int32_t disp, uint8_t size)
 {
-	return x86_rdisp8_size(out, OP_NOT_NEG, OP_EX_DIV, dst_base, disp, size);
+	x86_rdisp_size(code, OP_NOT_NEG, OP_EX_IMUL, dst_base, disp, size);
 }
 
-uint8_t * idiv_rdisp8(uint8_t * out, uint8_t dst_base, int8_t disp, uint8_t size)
+void div_rdisp(code_info *code, uint8_t dst_base, int32_t disp, uint8_t size)
 {
-	return x86_rdisp8_size(out, OP_NOT_NEG, OP_EX_IDIV, dst_base, disp, size);
+	x86_rdisp_size(code, OP_NOT_NEG, OP_EX_DIV, dst_base, disp, size);
 }
 
-uint8_t * mov_rr(uint8_t * out, uint8_t src, uint8_t dst, uint8_t size)
+void idiv_rdisp(code_info *code, uint8_t dst_base, int32_t disp, uint8_t size)
 {
-	return x86_rr_sizedir(out, OP_MOV, src, dst, size);
+	x86_rdisp_size(code, OP_NOT_NEG, OP_EX_IDIV, dst_base, disp, size);
 }
 
-uint8_t * mov_rrdisp8(uint8_t * out, uint8_t src, uint8_t dst_base, int8_t disp, uint8_t size)
+void mov_rr(code_info *code, uint8_t src, uint8_t dst, uint8_t size)
 {
-	return x86_rrdisp8_sizedir(out, OP_MOV, src, dst_base, disp, size, 0);
+	x86_rr_sizedir(code, OP_MOV, src, dst, size);
 }
 
-uint8_t * mov_rdisp8r(uint8_t * out, uint8_t src_base, int8_t disp, uint8_t dst, uint8_t size)
+void mov_rrdisp(code_info *code, uint8_t src, uint8_t dst_base, int32_t disp, uint8_t size)
 {
-	return x86_rrdisp8_sizedir(out, OP_MOV, dst, src_base, disp, size, BIT_DIR);
+	x86_rrdisp_sizedir(code, OP_MOV, src, dst_base, disp, size, 0);
 }
 
-uint8_t * mov_rrdisp32(uint8_t * out, uint8_t src, uint8_t dst_base, int32_t disp, uint8_t size)
+void mov_rdispr(code_info *code, uint8_t src_base, int32_t disp, uint8_t dst, uint8_t size)
 {
-	return x86_rrdisp32_sizedir(out, OP_MOV, src, dst_base, disp, size, 0);
+	x86_rrdisp_sizedir(code, OP_MOV, dst, src_base, disp, size, BIT_DIR);
 }
 
-uint8_t * mov_rdisp32r(uint8_t * out, uint8_t src_base, int32_t disp, uint8_t dst, uint8_t size)
+void mov_rrind(code_info *code, uint8_t src, uint8_t dst, uint8_t size)
 {
-	return x86_rrdisp32_sizedir(out, OP_MOV, dst, src_base, disp, size, BIT_DIR);
+	x86_rrind_sizedir(code, OP_MOV, src, dst, size, 0);
 }
 
-uint8_t * mov_rrind(uint8_t * out, uint8_t src, uint8_t dst, uint8_t size)
+void mov_rindr(code_info *code, uint8_t src, uint8_t dst, uint8_t size)
 {
-	return x86_rrind_sizedir(out, OP_MOV, src, dst, size, 0);
+	x86_rrind_sizedir(code, OP_MOV, dst, src, size, BIT_DIR);
 }
 
-uint8_t * mov_rindr(uint8_t * out, uint8_t src, uint8_t dst, uint8_t size)
+void mov_rrindex(code_info *code, uint8_t src, uint8_t dst_base, uint8_t dst_index, uint8_t scale, uint8_t size)
 {
-	return x86_rrind_sizedir(out, OP_MOV, dst, src, size, BIT_DIR);
+	x86_rrindex_sizedir(code, OP_MOV, src, dst_base, dst_index, scale, size, 0);
 }
 
-uint8_t * mov_rrindex(uint8_t * out, uint8_t src, uint8_t dst_base, uint8_t dst_index, uint8_t scale, uint8_t size)
+void mov_rindexr(code_info *code, uint8_t src_base, uint8_t src_index, uint8_t scale, uint8_t dst, uint8_t size)
 {
-	return x86_rrindex_sizedir(out, OP_MOV, src, dst_base, dst_index, scale, size, 0);
+	x86_rrindex_sizedir(code, OP_MOV, dst, src_base, src_index, scale, size, BIT_DIR);
 }
 
-uint8_t * mov_rindexr(uint8_t * out, uint8_t src_base, uint8_t src_index, uint8_t scale, uint8_t dst, uint8_t size)
+void mov_ir(code_info *code, int64_t val, uint8_t dst, uint8_t size)
 {
-	return x86_rrindex_sizedir(out, OP_MOV, dst, src_base, src_index, scale, size, BIT_DIR);
-}
-
-uint8_t * mov_ir(uint8_t * out, int64_t val, uint8_t dst, uint8_t size)
-{
+	check_alloc_code(code, 14);
+	code_ptr out = code->cur;
 	uint8_t sign_extend = 0;
 	if (size == SZ_Q && val <= 0x7FFFFFFF && val >= -2147483648) {
 		sign_extend = 1;
@@ -1216,11 +1217,13 @@
 			}
 		}
 	}
-	return out;
+	code->cur = out;
 }
 
-uint8_t * mov_irdisp8(uint8_t * out, int32_t val, uint8_t dst, int8_t disp, uint8_t size)
+void mov_irdisp(code_info *code, int32_t val, uint8_t dst, int32_t disp, uint8_t size)
 {
+	check_alloc_code(code, 12);
+	code_ptr out = code->cur;
 	if (size == SZ_W) {
 		*(out++) = PRE_SIZE;
 	}
@@ -1239,8 +1242,16 @@
 		dst -= (AH-X86_AH);
 	}
 	*(out++) = OP_MOV_IEA | (size == SZ_B ? 0 : BIT_SIZE);
-	*(out++) = MODE_REG_DISPLACE8 | dst;
-	*(out++) = disp;
+	if (disp < 128 && disp >= -128) {
+		*(out++) = MODE_REG_DISPLACE8 | dst;
+		*(out++) = disp;
+	} else {
+		*(out++) = MODE_REG_DISPLACE32 | dst;
+		*(out++) = disp;
+		*(out++) = disp >> 8;
+		*(out++) = disp >> 16;
+		*(out++) = disp >> 24;
+	}
 
 	*(out++) = val;
 	if (size != SZ_B) {
@@ -1253,11 +1264,13 @@
 			*(out++) = val;
 		}
 	}
-	return out;
+	code->cur = out;
 }
 
-uint8_t * mov_irind(uint8_t * out, int32_t val, uint8_t dst, uint8_t size)
+void mov_irind(code_info *code, int32_t val, uint8_t dst, uint8_t size)
 {
+	check_alloc_code(code, 8);
+	code_ptr out = code->cur;
 	if (size == SZ_W) {
 		*(out++) = PRE_SIZE;
 	}
@@ -1289,11 +1302,13 @@
 			*(out++) = val;
 		}
 	}
-	return out;
+	code->cur = out;
 }
 
-uint8_t * movsx_rr(uint8_t * out, uint8_t src, uint8_t dst, uint8_t src_size, uint8_t size)
+void movsx_rr(code_info *code, uint8_t src, uint8_t dst, uint8_t src_size, uint8_t size)
 {
+	check_alloc_code(code, 5);
+	code_ptr out = code->cur;
 	if (size == SZ_W) {
 		*(out++) = PRE_SIZE;
 	}
@@ -1319,11 +1334,13 @@
 		*(out++) = OP2_MOVSX | (src_size == SZ_B ? 0 : BIT_SIZE);
 	}
 	*(out++) = MODE_REG_DIRECT | src | (dst << 3);
-	return out;
+	code->cur = out;
 }
 
-uint8_t * movsx_rdisp8r(uint8_t * out, uint8_t src, int8_t disp, uint8_t dst, uint8_t src_size, uint8_t size)
+void movsx_rdispr(code_info *code, uint8_t src, int32_t disp, uint8_t dst, uint8_t src_size, uint8_t size)
 {
+	check_alloc_code(code, 12);
+	code_ptr out = code->cur;
 	if (size == SZ_W) {
 		*(out++) = PRE_SIZE;
 	}
@@ -1348,13 +1365,23 @@
 		*(out++) = PRE_2BYTE;
 		*(out++) = OP2_MOVSX | (src_size == SZ_B ? 0 : BIT_SIZE);
 	}
-	*(out++) = MODE_REG_DISPLACE8 | src | (dst << 3);
-	*(out++) = disp;
-	return out;
+	if (disp < 128 && disp >= -128) {
+		*(out++) = MODE_REG_DISPLACE8 | src | (dst << 3);
+		*(out++) = disp;
+	} else {
+		*(out++) = MODE_REG_DISPLACE32 | src | (dst << 3);
+		*(out++) = disp;
+		*(out++) = disp >> 8;
+		*(out++) = disp >> 16;
+		*(out++) = disp >> 24;
+	}
+	code->cur = out;
 }
 
-uint8_t * movzx_rr(uint8_t * out, uint8_t src, uint8_t dst, uint8_t src_size, uint8_t size)
+void movzx_rr(code_info *code, uint8_t src, uint8_t dst, uint8_t src_size, uint8_t size)
 {
+	check_alloc_code(code, 5);
+	code_ptr out = code->cur;
 	if (size == SZ_W) {
 		*(out++) = PRE_SIZE;
 	}
@@ -1376,11 +1403,13 @@
 	*(out++) = PRE_2BYTE;
 	*(out++) = OP2_MOVZX | (src_size == SZ_B ? 0 : BIT_SIZE);
 	*(out++) = MODE_REG_DIRECT | src | (dst << 3);
-	return out;
+	code->cur = out;
 }
 
-uint8_t * movzx_rdisp8r(uint8_t * out, uint8_t src, int8_t disp, uint8_t dst, uint8_t src_size, uint8_t size)
+void movzx_rdispr(code_info *code, uint8_t src, int32_t disp, uint8_t dst, uint8_t src_size, uint8_t size)
 {
+	check_alloc_code(code, 9);
+	code_ptr out = code->cur;
 	if (size == SZ_W) {
 		*(out++) = PRE_SIZE;
 	}
@@ -1401,13 +1430,23 @@
 	}
 	*(out++) = PRE_2BYTE;
 	*(out++) = OP2_MOVZX | (src_size == SZ_B ? 0 : BIT_SIZE);
-	*(out++) = MODE_REG_DISPLACE8 | src | (dst << 3);
-	*(out++) = disp;
-	return out;
+	if (disp < 128 && disp >= -128) {
+		*(out++) = MODE_REG_DISPLACE8 | src | (dst << 3);
+		*(out++) = disp;
+	} else {
+		*(out++) = MODE_REG_DISPLACE32 | src | (dst << 3);
+		*(out++) = disp;
+		*(out++) = disp >> 8;
+		*(out++) = disp >> 16;
+		*(out++) = disp >> 24;
+	}
+	code->cur = out;
 }
 
-uint8_t * xchg_rr(uint8_t * out, uint8_t src, uint8_t dst, uint8_t size)
+void xchg_rr(code_info *code, uint8_t src, uint8_t dst, uint8_t size)
 {
+	check_alloc_code(code, 4);
+	code_ptr out = code->cur;
 	//TODO: Use OP_XCHG_AX when one of the registers is AX, EAX or RAX
 	uint8_t tmp;
 	if (size == SZ_W) {
@@ -1446,43 +1485,53 @@
 	}
 	*(out++) = opcode;
 	*(out++) = MODE_REG_DIRECT | dst | (src << 3);
-	return out;
+	code->cur = out;
 }
 
-uint8_t * pushf(uint8_t * out)
+void pushf(code_info *code)
 {
+	check_alloc_code(code, 1);
+	code_ptr out = code->cur;
 	*(out++) = OP_PUSHF;
-	return out;
+	code->cur = out;
 }
 
-uint8_t * popf(uint8_t * out)
+void popf(code_info *code)
 {
+	check_alloc_code(code, 1);
+	code_ptr out = code->cur;
 	*(out++) = OP_POPF;
-	return out;
+	code->cur = out;
 }
 
-uint8_t * push_r(uint8_t * out, uint8_t reg)
+void push_r(code_info *code, uint8_t reg)
 {
+	check_alloc_code(code, 2);
+	code_ptr out = code->cur;
 	if (reg >= R8) {
 		*(out++) = PRE_REX | REX_RM_FIELD;
 		reg -= R8 - X86_R8;
 	}
 	*(out++) = OP_PUSH | reg;
-	return out;
+	code->cur = out;
 }
 
-uint8_t * pop_r(uint8_t * out, uint8_t reg)
+void pop_r(code_info *code, uint8_t reg)
 {
+	check_alloc_code(code, 2);
+	code_ptr out = code->cur;
 	if (reg >= R8) {
 		*(out++) = PRE_REX | REX_RM_FIELD;
 		reg -= R8 - X86_R8;
 	}
 	*(out++) = OP_POP | reg;
-	return out;
+	code->cur = out;
 }
 
-uint8_t * setcc_r(uint8_t * out, uint8_t cc, uint8_t dst)
+void setcc_r(code_info *code, uint8_t cc, uint8_t dst)
 {
+	check_alloc_code(code, 4);
+	code_ptr out = code->cur;
 	if (dst >= R8) {
 		*(out++) = PRE_REX | REX_RM_FIELD;
 		dst -= R8 - X86_R8;
@@ -1494,11 +1543,13 @@
 	*(out++) = PRE_2BYTE;
 	*(out++) = OP2_SETCC | cc;
 	*(out++) = MODE_REG_DIRECT | dst;
-	return out;
+	code->cur = out;
 }
 
-uint8_t * setcc_rind(uint8_t * out, uint8_t cc, uint8_t dst)
+void setcc_rind(code_info *code, uint8_t cc, uint8_t dst)
 {
+	check_alloc_code(code, 4);
+	code_ptr out = code->cur;
 	if (dst >= R8) {
 		*(out++) = PRE_REX | REX_RM_FIELD;
 		dst -= R8 - X86_R8;
@@ -1506,24 +1557,36 @@
 	*(out++) = PRE_2BYTE;
 	*(out++) = OP2_SETCC | cc;
 	*(out++) = MODE_REG_INDIRECT | dst;
-	return out;
+	code->cur = out;
 }
 
-uint8_t * setcc_rdisp8(uint8_t * out, uint8_t cc, uint8_t dst, int8_t disp)
+void setcc_rdisp(code_info *code, uint8_t cc, uint8_t dst, int32_t disp)
 {
+	check_alloc_code(code, 8);
+	code_ptr out = code->cur;
 	if (dst >= R8) {
 		*(out++) = PRE_REX | REX_RM_FIELD;
 		dst -= R8 - X86_R8;
 	}
 	*(out++) = PRE_2BYTE;
 	*(out++) = OP2_SETCC | cc;
-	*(out++) = MODE_REG_DISPLACE8 | dst;
-	*(out++) = disp;
-	return out;
+	if (disp < 128 && disp >= -128) {
+		*(out++) = MODE_REG_DISPLACE8 | dst;
+		*(out++) = disp;
+	} else {
+		*(out++) = MODE_REG_DISPLACE32 | dst;
+		*(out++) = disp;
+		*(out++) = disp >> 8;
+		*(out++) = disp >> 16;
+		*(out++) = disp >> 24;
+	}
+	code->cur = out;
 }
 
-uint8_t * bit_rr(uint8_t * out, uint8_t op2, uint8_t src, uint8_t dst, uint8_t size)
+void bit_rr(code_info *code, uint8_t op2, uint8_t src, uint8_t dst, uint8_t size)
 {
+	check_alloc_code(code, 5);
+	code_ptr out = code->cur;
 	if (size == SZ_W) {
 		*(out++) = PRE_SIZE;
 	}
@@ -1545,38 +1608,13 @@
 	*(out++) = PRE_2BYTE;
 	*(out++) = op2;
 	*(out++) = MODE_REG_DIRECT | dst | (src << 3);
-	return out;
+	code->cur = out;
 }
 
-uint8_t * bit_rrdisp8(uint8_t * out, uint8_t op2, uint8_t src, uint8_t dst_base, int8_t dst_disp, uint8_t size)
+void bit_rrdisp(code_info *code, uint8_t op2, uint8_t src, uint8_t dst_base, int32_t dst_disp, uint8_t size)
 {
-	if (size == SZ_W) {
-		*(out++) = PRE_SIZE;
-	}
-	if (size == SZ_Q || src >= R8 || dst_base >= R8) {
-		*out = PRE_REX;
-		if (size == SZ_Q) {
-			*out |= REX_QUAD;
-		}
-		if (src >= R8) {
-			*out |= REX_REG_FIELD;
-			src -= (R8 - X86_R8);
-		}
-		if (dst_base >= R8) {
-			*out |= REX_RM_FIELD;
-			dst_base -= (R8 - X86_R8);
-		}
-		out++;
-	}
-	*(out++) = PRE_2BYTE;
-	*(out++) = op2;
-	*(out++) = MODE_REG_DISPLACE8 | dst_base | (src << 3);
-	*(out++) = dst_disp;
-	return out;
-}
-
-uint8_t * bit_rrdisp32(uint8_t * out, uint8_t op2, uint8_t src, uint8_t dst_base, int32_t dst_disp, uint8_t size)
-{
+	check_alloc_code(code, 9);
+	code_ptr out = code->cur;
 	if (size == SZ_W) {
 		*(out++) = PRE_SIZE;
 	}
@@ -1597,16 +1635,23 @@
 	}
 	*(out++) = PRE_2BYTE;
 	*(out++) = op2;
-	*(out++) = MODE_REG_DISPLACE32 | dst_base | (src << 3);
-	*(out++) = dst_disp;
-	*(out++) = dst_disp >> 8;
-	*(out++) = dst_disp >> 16;
-	*(out++) = dst_disp >> 24;
-	return out;
+	if (dst_disp < 128 && dst_disp >= -128) {
+		*(out++) = MODE_REG_DISPLACE8 | dst_base | (src << 3);
+		*(out++) = dst_disp;
+	} else {
+		*(out++) = MODE_REG_DISPLACE32 | dst_base | (src << 3);
+		*(out++) = dst_disp;
+		*(out++) = dst_disp >> 8;
+		*(out++) = dst_disp >> 16;
+		*(out++) = dst_disp >> 24;
+	}
+	code->cur = out;
 }
 
-uint8_t * bit_ir(uint8_t * out, uint8_t op_ex, uint8_t val, uint8_t dst, uint8_t size)
+void bit_ir(code_info *code, uint8_t op_ex, uint8_t val, uint8_t dst, uint8_t size)
 {
+	check_alloc_code(code, 6);
+	code_ptr out = code->cur;
 	if (size == SZ_W) {
 		*(out++) = PRE_SIZE;
 	}
@@ -1625,11 +1670,13 @@
 	*(out++) = OP2_BTX_I;
 	*(out++) = MODE_REG_DIRECT | dst | (op_ex << 3);
 	*(out++) = val;
-	return out;
+	code->cur = out;
 }
 
-uint8_t * bit_irdisp8(uint8_t * out, uint8_t op_ex, uint8_t val, uint8_t dst_base, int8_t dst_disp, uint8_t size)
+void bit_irdisp(code_info *code, uint8_t op_ex, uint8_t val, uint8_t dst_base, int32_t dst_disp, uint8_t size)
 {
+	check_alloc_code(code, 10);
+	code_ptr out = code->cur;
 	if (size == SZ_W) {
 		*(out++) = PRE_SIZE;
 	}
@@ -1646,99 +1693,104 @@
 	}
 	*(out++) = PRE_2BYTE;
 	*(out++) = OP2_BTX_I;
-	*(out++) = MODE_REG_DISPLACE8 | dst_base | (op_ex << 3);
-	*(out++) = dst_disp;
+	if (dst_disp < 128 && dst_disp >= -128) {
+		*(out++) = MODE_REG_DISPLACE8 | dst_base | (op_ex << 3);
+		*(out++) = dst_disp;
+	} else {
+		*(out++) = MODE_REG_DISPLACE32 | dst_base | (op_ex << 3);
+		*(out++) = dst_disp;
+		*(out++) = dst_disp >> 8;
+		*(out++) = dst_disp >> 16;
+		*(out++) = dst_disp >> 24;
+	}
 	*(out++) = val;
-	return out;
+	code->cur = out;
 }
 
-uint8_t * bt_rr(uint8_t * out, uint8_t src, uint8_t dst, uint8_t size)
+void bt_rr(code_info *code, uint8_t src, uint8_t dst, uint8_t size)
 {
-	return bit_rr(out, OP2_BT, src, dst, size);
+	return bit_rr(code, OP2_BT, src, dst, size);
 }
 
-uint8_t * bt_rrdisp8(uint8_t * out, uint8_t src, uint8_t dst_base, int8_t dst_disp, uint8_t size)
+void bt_rrdisp(code_info *code, uint8_t src, uint8_t dst_base, int32_t dst_disp, uint8_t size)
 {
-	return bit_rrdisp8(out, OP2_BT, src, dst_base, dst_disp, size);
-}
-
-uint8_t * bt_rrdisp32(uint8_t * out, uint8_t src, uint8_t dst_base, int32_t dst_disp, uint8_t size)
-{
-	return bit_rrdisp32(out, OP2_BT, src, dst_base, dst_disp, size);
+	return bit_rrdisp(code, OP2_BT, src, dst_base, dst_disp, size);
 }
 
-uint8_t * bt_ir(uint8_t * out, uint8_t val, uint8_t dst, uint8_t size)
+void bt_ir(code_info *code, uint8_t val, uint8_t dst, uint8_t size)
 {
-	return bit_ir(out, OP_EX_BT, val, dst, size);
+	return bit_ir(code, OP_EX_BT, val, dst, size);
 }
 
-uint8_t * bt_irdisp8(uint8_t * out, uint8_t val, uint8_t dst_base, int8_t dst_disp, uint8_t size)
+void bt_irdisp(code_info *code, uint8_t val, uint8_t dst_base, int32_t dst_disp, uint8_t size)
 {
-	return bit_irdisp8(out, OP_EX_BT, val, dst_base, dst_disp, size);
+	return bit_irdisp(code, OP_EX_BT, val, dst_base, dst_disp, size);
 }
 
-uint8_t * bts_rr(uint8_t * out, uint8_t src, uint8_t dst, uint8_t size)
+void bts_rr(code_info *code, uint8_t src, uint8_t dst, uint8_t size)
 {
-	return bit_rr(out, OP2_BTS, src, dst, size);
+	return bit_rr(code, OP2_BTS, src, dst, size);
 }
 
-uint8_t * bts_rrdisp8(uint8_t * out, uint8_t src, uint8_t dst_base, int8_t dst_disp, uint8_t size)
+void bts_rrdisp(code_info *code, uint8_t src, uint8_t dst_base, int32_t dst_disp, uint8_t size)
 {
-	return bit_rrdisp8(out, OP2_BTS, src, dst_base, dst_disp, size);
+	return bit_rrdisp(code, OP2_BTS, src, dst_base, dst_disp, size);
 }
 
-uint8_t * bts_ir(uint8_t * out, uint8_t val, uint8_t dst, uint8_t size)
+void bts_ir(code_info *code, uint8_t val, uint8_t dst, uint8_t size)
 {
-	return bit_ir(out, OP_EX_BTS, val, dst, size);
+	return bit_ir(code, OP_EX_BTS, val, dst, size);
 }
 
-uint8_t * bts_irdisp8(uint8_t * out, uint8_t val, uint8_t dst_base, int8_t dst_disp, uint8_t size)
+void bts_irdisp(code_info *code, uint8_t val, uint8_t dst_base, int32_t dst_disp, uint8_t size)
 {
-	return bit_irdisp8(out, OP_EX_BTS, val, dst_base, dst_disp, size);
+	return bit_irdisp(code, OP_EX_BTS, val, dst_base, dst_disp, size);
+}
+
+void btr_rr(code_info *code, uint8_t src, uint8_t dst, uint8_t size)
+{
+	return bit_rr(code, OP2_BTR, src, dst, size);
 }
 
-uint8_t * btr_rr(uint8_t * out, uint8_t src, uint8_t dst, uint8_t size)
+void btr_rrdisp(code_info *code, uint8_t src, uint8_t dst_base, int32_t dst_disp, uint8_t size)
 {
-	return bit_rr(out, OP2_BTR, src, dst, size);
+	return bit_rrdisp(code, OP2_BTR, src, dst_base, dst_disp, size);
 }
 
-uint8_t * btr_rrdisp8(uint8_t * out, uint8_t src, uint8_t dst_base, int8_t dst_disp, uint8_t size)
+void btr_ir(code_info *code, uint8_t val, uint8_t dst, uint8_t size)
 {
-	return bit_rrdisp8(out, OP2_BTR, src, dst_base, dst_disp, size);
+	return bit_ir(code, OP_EX_BTR, val, dst, size);
 }
 
-uint8_t * btr_ir(uint8_t * out, uint8_t val, uint8_t dst, uint8_t size)
+void btr_irdisp(code_info *code, uint8_t val, uint8_t dst_base, int32_t dst_disp, uint8_t size)
 {
-	return bit_ir(out, OP_EX_BTR, val, dst, size);
+	return bit_irdisp(code, OP_EX_BTR, val, dst_base, dst_disp, size);
 }
 
-uint8_t * btr_irdisp8(uint8_t * out, uint8_t val, uint8_t dst_base, int8_t dst_disp, uint8_t size)
+void btc_rr(code_info *code, uint8_t src, uint8_t dst, uint8_t size)
 {
-	return bit_irdisp8(out, OP_EX_BTR, val, dst_base, dst_disp, size);
+	return bit_rr(code, OP2_BTC, src, dst, size);
 }
 
-uint8_t * btc_rr(uint8_t * out, uint8_t src, uint8_t dst, uint8_t size)
+void btc_rrdisp(code_info *code, uint8_t src, uint8_t dst_base, int32_t dst_disp, uint8_t size)
 {
-	return bit_rr(out, OP2_BTC, src, dst, size);
+	return bit_rrdisp(code, OP2_BTC, src, dst_base, dst_disp, size);
 }
 
-uint8_t * btc_rrdisp8(uint8_t * out, uint8_t src, uint8_t dst_base, int8_t dst_disp, uint8_t size)
+void btc_ir(code_info *code, uint8_t val, uint8_t dst, uint8_t size)
 {
-	return bit_rrdisp8(out, OP2_BTC, src, dst_base, dst_disp, size);
+	return bit_ir(code, OP_EX_BTC, val, dst, size);
 }
 
-uint8_t * btc_ir(uint8_t * out, uint8_t val, uint8_t dst, uint8_t size)
+void btc_irdisp(code_info *code, uint8_t val, uint8_t dst_base, int32_t dst_disp, uint8_t size)
 {
-	return bit_ir(out, OP_EX_BTC, val, dst, size);
+	return bit_irdisp(code, OP_EX_BTC, val, dst_base, dst_disp, size);
 }
 
-uint8_t * btc_irdisp8(uint8_t * out, uint8_t val, uint8_t dst_base, int8_t dst_disp, uint8_t size)
+void jcc(code_info *code, uint8_t cc, code_ptr dest)
 {
-	return bit_irdisp8(out, OP_EX_BTC, val, dst_base, dst_disp, size);
-}
-
-uint8_t * jcc(uint8_t * out, uint8_t cc, uint8_t * dest)
-{
+	check_alloc_code(code, 6);
+	code_ptr out = code->cur;
 	ptrdiff_t disp = dest-(out+2);
 	if (disp <= 0x7F && disp >= -0x80) {
 		*(out++) = OP_JCC | cc;
@@ -1756,15 +1808,17 @@
 			disp >>= 8;
 			*(out++) = disp;
 		} else {
-			printf("%p - %p = %lX\n", dest, out + 6, (long)disp);
-			return NULL;
+			fprintf(stderr, "jcc: %p - %p = %lX\n", dest, out + 6, (long)disp);
+			exit(1);
 		}
 	}
-	return out;
+	code->cur = out;
 }
 
-uint8_t * jmp(uint8_t * out, uint8_t * dest)
+void jmp(code_info *code, code_ptr dest)
 {
+	check_alloc_code(code, 5);
+	code_ptr out = code->cur;
 	ptrdiff_t disp = dest-(out+2);
 	if (disp <= 0x7F && disp >= -0x80) {
 		*(out++) = OP_JMP_BYTE;
@@ -1781,26 +1835,30 @@
 			disp >>= 8;
 			*(out++) = disp;
 		} else {
-			printf("%p - %p = %lX\n", dest, out + 6, (long)disp);
-			return NULL;
+			fprintf(stderr, "jmp: %p - %p = %lX\n", dest, out + 6, (long)disp);
+			exit(1);
 		}
 	}
-	return out;
+	code->cur = out;
 }
 
-uint8_t * jmp_r(uint8_t * out, uint8_t dst)
+void jmp_r(code_info *code, uint8_t dst)
 {
+	check_alloc_code(code, 3);
+	code_ptr out = code->cur;
 	if (dst >= R8) {
 		dst -= R8 - X86_R8;
 		*(out++) = PRE_REX | REX_RM_FIELD;
 	}
 	*(out++) = OP_SINGLE_EA;
 	*(out++) = MODE_REG_DIRECT | dst | (OP_EX_JMP_EA << 3);
-	return out;
+	code->cur = out;
 }
 
-uint8_t * call(uint8_t * out, uint8_t * fun)
+void call(code_info *code, code_ptr fun)
 {
+	check_alloc_code(code, 5);
+	code_ptr out = code->cur;
 	ptrdiff_t disp = fun-(out+5);
 	if (disp <= 0x7FFFFFFF && disp >= -2147483648) {
 		*(out++) = OP_CALL;
@@ -1813,35 +1871,44 @@
 		*(out++) = disp;
 	} else {
 		//TODO: Implement far call???
-		printf("%p - %p = %lX\n", fun, out + 5, (long)disp);
-		return NULL;
+		fprintf(stderr, "%p - %p = %lX\n", fun, out + 5, (long)disp);
+		exit(1);
 	}
-	return out;
+	code->cur = out;
 }
 
-uint8_t * call_r(uint8_t * out, uint8_t dst)
+void call_r(code_info *code, uint8_t dst)
 {
+	check_alloc_code(code, 2);
+	code_ptr out = code->cur;
 	*(out++) = OP_SINGLE_EA;
 	*(out++) = MODE_REG_DIRECT | dst | (OP_EX_CALL_EA << 3);
-	return out;
+	code->cur = out;
 }
 
-uint8_t * retn(uint8_t * out)
+void retn(code_info *code)
 {
+	check_alloc_code(code, 1);
+	code_ptr out = code->cur;
 	*(out++) = OP_RETN;
-	return out;
+	code->cur = out;
 }
 
-uint8_t * cdq(uint8_t * out)
+void cdq(code_info *code)
 {
+	check_alloc_code(code, 1);
+	code_ptr out = code->cur;
 	*(out++) = OP_CDQ;
-	return out;
+	code->cur = out;
 }
 
-uint8_t * loop(uint8_t * out, uint8_t * dst)
+void loop(code_info *code, code_ptr dst)
 {
+	check_alloc_code(code, 2);
+	code_ptr out = code->cur;
 	ptrdiff_t disp = dst-(out+2);
 	*(out++) = OP_LOOP;
 	*(out++) = disp;
-	return out;
+	code->cur = out;
 }
+