changeset 744:fc68992cf18d

Merge windows branch with latest changes
author Michael Pavone <pavone@retrodev.com>
date Thu, 28 May 2015 21:19:55 -0700
parents cf78cb045fa4 (current diff) 8972378e314f (diff)
children daa31ee7d8cd
files Makefile render.h render_sdl.c runtime.S runtime_32.S util.c
diffstat 52 files changed, 7140 insertions(+), 3160 deletions(-) [+]
line wrap: on
line diff
--- a/.hgignore	Thu May 28 21:09:33 2015 -0700
+++ b/.hgignore	Thu May 28 21:19:55 2015 -0700
@@ -6,14 +6,22 @@
 *.jpg
 *.pdf
 *.tar.gz
+*.list
 *~
 starscream/*
+gxz80/*
+musashi/*
 vdpreverse/*
 nemesis/*
 html/*
+generated_tests/*
+ztests/*
 *.o
+*.list
 blastem
 dis
 stateview
 trans
-
+zdis
+ztestrun
+address.log
--- a/68kinst.c	Thu May 28 21:09:33 2015 -0700
+++ b/68kinst.c	Thu May 28 21:19:55 2015 -0700
@@ -19,7 +19,7 @@
 
 uint16_t *m68k_decode_op_ex(uint16_t *cur, uint8_t mode, uint8_t reg, uint8_t size, m68k_op_info *dst)
 {
-	uint16_t ext;
+	uint16_t ext, tmp;
 	dst->addr_mode = mode;
 	switch(mode)
 	{
@@ -36,15 +36,95 @@
 		dst->params.regs.displacement = sign_extend16(ext);
 		break;
 	case MODE_AREG_INDEX_MEM:
-		#ifdef M68020
-			//TODO: implement me for M68020+ support
-		#else
+		dst->params.regs.pri = reg;
+		ext = *(++cur);
+		dst->params.regs.sec = ext >> 11;//includes areg/dreg bit, reg num and word/long bit
+#ifdef M68020
+		dst->params.regs.scale = ext >> 9 & 3;
+		if (ext & 0x100)
+		{
+			dst->params.regs.disp_sizes = ext >> 4 & 3;
+			switch (dst->params.regs.disp_sizes)
+			{
+			case 0:
+				//reserved
+				return NULL;
+			case 1:
+				dst->params.regs.displacement = 0;
+				break;
+			case 2:
+				dst->params.regs.displacement = sign_extend16(*(cur++));
+				break;
+			case 3:
+				tmp = *(cur++);
+				dst->params.regs.displacement = tmp << 16 | *(cur++);
+				break;
+			}
+			if (ext & 0x3)
+			{
+				//memory indirect
+				switch (ext & 0xC4)
+				{
+				case 0x00:
+					dst->addr_mode = MODE_AREG_PREINDEX;
+					break;
+				case 0x04:
+					dst->addr_mode = MODE_AREG_POSTINDEX;
+					break;
+				case 0x40:
+					dst->addr_mode = MODE_AREG_MEM_INDIRECT;
+					break;
+				case 0x80:
+					dst->addr_mode = MODE_PREINDEX;
+					break;
+				case 0x84:
+					dst->addr_mode = MODE_POSTINDEX;
+					break;
+				case 0xC0:
+					dst->addr_mode = MODE_MEM_INDIRECT;
+					break;
+				}
+				dst->params.regs.disp_sizes |= ext << 4 & 0x30;
+				switch (ext & 0x3)
+				{
+				case 0:
+					//reserved
+					return NULL;
+				case 1:
+					dst->params.regs.outer_disp = 0;
+					break;
+				case 2:
+					dst->params.regs.outer_disp = sign_extend16(*(cur++));
+					break;
+				case 3:
+					tmp = *(cur++);
+					dst->params.regs.outer_disp = tmp << 16 | *(cur++);
+					break;
+				}
+			} else {
+				switch (ext >> 6 & 3)
+				{
+				case 0:
+					dst->addr_mode = MODE_AREG_INDEX_BASE_DISP;
+					break;
+				case 1:
+					dst->addr_mode = MODE_AREG_BASE_DISP;
+					break;
+				case 2:
+					dst->addr_mode = MODE_INDEX_BASE_DISP;
+					break;
+				case 3:
+					dst->addr_mode = MODE_BASE_DISP;
+					break;
+				}
+			}
+		} else {
+#endif
 			dst->addr_mode = MODE_AREG_INDEX_DISP8;
-			dst->params.regs.pri = reg;
-			ext = *(++cur);
-			dst->params.regs.sec = ext >> 11;//includes areg/dreg bit, reg num and word/long bit
 			dst->params.regs.displacement = sign_extend8(ext&0xFF);
-		#endif
+#ifdef M68020
+		}
+#endif
 		break;
 	case MODE_PC_INDIRECT_ABS_IMMED:
 		switch(reg)
@@ -60,13 +140,93 @@
 			dst->params.immed = ext << 16 | *(++cur);
 			break;
 		case 3:
-#ifdef M68020
-			//TODO: Implement me for M68020+ support;
-#else
-			dst->addr_mode = MODE_PC_INDEX_DISP8;
 			ext = *(++cur);
 			dst->params.regs.sec = ext >> 11;//includes areg/dreg bit, reg num and word/long bit
-			dst->params.regs.displacement = sign_extend8(ext&0xFF);
+#ifdef M68020
+			dst->params.regs.scale = ext >> 9 & 3;
+			if (ext & 0x100)
+			{
+				dst->params.regs.disp_sizes = ext >> 4 & 3;
+				switch (dst->params.regs.disp_sizes)
+				{
+				case 0:
+					//reserved
+					return NULL;
+				case 1:
+					dst->params.regs.displacement = 0;
+					break;
+				case 2:
+					dst->params.regs.displacement = sign_extend16(*(cur++));
+					break;
+				case 3:
+					tmp = *(cur++);
+					dst->params.regs.displacement = tmp << 16 | *(cur++);
+					break;
+				}
+				if (ext & 0x3)
+				{
+					//memory indirect
+					switch (ext & 0xC4)
+					{
+					case 0x00:
+						dst->addr_mode = MODE_PC_PREINDEX;
+						break;
+					case 0x04:
+						dst->addr_mode = MODE_PC_POSTINDEX;
+						break;
+					case 0x40:
+						dst->addr_mode = MODE_PC_MEM_INDIRECT;
+						break;
+					case 0x80:
+						dst->addr_mode = MODE_ZPC_PREINDEX;
+						break;
+					case 0x84:
+						dst->addr_mode = MODE_ZPC_POSTINDEX;
+						break;
+					case 0xC0:
+						dst->addr_mode = MODE_ZPC_MEM_INDIRECT;
+						break;
+					}
+					dst->params.regs.disp_sizes |= ext << 4 & 0x30;
+					switch (ext & 0x3)
+					{
+					case 0:
+						//reserved
+						return NULL;
+					case 1:
+						dst->params.regs.outer_disp = 0;
+						break;
+					case 2:
+						dst->params.regs.outer_disp = sign_extend16(*(cur++));
+						break;
+					case 3:
+						tmp = *(cur++);
+						dst->params.regs.outer_disp = tmp << 16 | *(cur++);
+						break;
+					}
+				} else {
+					switch (ext >> 6 & 3)
+					{
+					case 0:
+						dst->addr_mode = MODE_PC_INDEX_BASE_DISP;
+						break;
+					case 1:
+						dst->addr_mode = MODE_PC_BASE_DISP;
+						break;
+					case 2:
+						dst->addr_mode = MODE_ZPC_INDEX_BASE_DISP;
+						break;
+					case 3:
+						dst->addr_mode = MODE_ZPC_BASE_DISP;
+						break;
+					}
+				}
+			} else {
+#endif
+				dst->addr_mode = MODE_PC_INDEX_DISP8;
+				dst->params.regs.displacement = sign_extend8(ext&0xFF);
+#ifdef M68020
+			}
 #endif
 			break;
 		case 2:
@@ -172,7 +332,7 @@
 			istream = m68k_decode_op(istream, decoded->extra.size, &(decoded->dst));
 			if (!istream) {
 				decoded->op = M68K_INVALID;
-				return start+1;
+				break;
 			}
 			if (decoded->dst.addr_mode == MODE_REG) {
 				decoded->extra.size = OPSIZE_LONG;
@@ -202,7 +362,7 @@
 			istream = m68k_decode_op_ex(istream, opmode, reg, decoded->extra.size, &(decoded->dst));
 			if (!istream) {
 				decoded->op = M68K_INVALID;
-				return start+1;
+				break;
 			}
 			if (decoded->dst.addr_mode == MODE_REG) {
 				decoded->extra.size = OPSIZE_LONG;
@@ -248,7 +408,7 @@
 					istream = m68k_decode_op_ex(istream, opmode, reg, size, &(decoded->dst));
 					if (!istream) {
 						decoded->op = M68K_INVALID;
-						return start+1;
+						break;
 					}
 				}
 				break;
@@ -287,7 +447,7 @@
 					istream = m68k_decode_op_ex(istream, opmode, reg, size, &(decoded->dst));
 					if (!istream) {
 						decoded->op = M68K_INVALID;
-						return start+1;
+						break;
 					}
 				}
 				break;
@@ -314,7 +474,7 @@
 				istream = m68k_decode_op_ex(istream, opmode, reg, size, &(decoded->dst));
 				if (!istream) {
 					decoded->op = M68K_INVALID;
-					return start+1;
+					break;
 				}
 				break;
 			case 3:
@@ -340,7 +500,7 @@
 				istream = m68k_decode_op_ex(istream, opmode, reg, size, &(decoded->dst));
 				if (!istream) {
 					decoded->op = M68K_INVALID;
-					return start+1;
+					break;
 				}
 				break;
 			case 4:
@@ -365,7 +525,7 @@
 				istream = m68k_decode_op(istream, OPSIZE_BYTE, &(decoded->dst));
 				if (!istream) {
 					decoded->op = M68K_INVALID;
-					return start+1;
+					break;
 				}
 				break;
 			case 5:
@@ -403,7 +563,7 @@
 					istream = m68k_decode_op_ex(istream, opmode, reg, size, &(decoded->dst));
 					if (!istream) {
 						decoded->op = M68K_INVALID;
-						return start+1;
+						break;
 					}
 				}
 				break;
@@ -427,15 +587,29 @@
 					decoded->src.params.immed = (immed << 16) | *(++istream);
 					break;
 				}
-				istream = m68k_decode_op_ex(istream, opmode, reg, size, &(decoded->dst));
+				istream = m68k_decode_op_ex(istream, opmode, reg, decoded->extra.size, &(decoded->dst));
 				if (!istream) {
 					decoded->op = M68K_INVALID;
-					return start+1;
+					break;
 				}
 				break;
 			case 7:
-
-
+#ifdef M68010
+				decoded->op = M68K_MOVES;
+				decoded->extra.size = *istream >> 6 & 0x3;
+				immed = *(++istream);
+				reg = immed  >> 12 & 0x7;
+				opmode = immed & 0x8000 ? MODE_AREG : MODE_REG;
+				if (immed & 0x800) {
+					decoded->src.addr_mode = opmode;
+					decoded->src.params.regs.pri = reg;
+					m68k_decode_op_ex(istream, *start >> 3 & 0x7, *start & 0x7, decoded->extra.size, &(decoded->dst));
+				} else {
+					m68k_decode_op_ex(istream, *start >> 3 & 0x7, *start & 0x7, decoded->extra.size, &(decoded->src));
+					decoded->dst.addr_mode = opmode;
+					decoded->dst.params.regs.pri = reg;
+				}
+#endif
 				break;
 			}
 		}
@@ -450,12 +624,12 @@
 		istream = m68k_decode_op(istream, decoded->extra.size, &(decoded->src));
 		if (!istream) {
 			decoded->op = M68K_INVALID;
-			return start+1;
+			break;
 		}
 		istream = m68k_decode_op_ex(istream, opmode, reg, decoded->extra.size, &(decoded->dst));
 		if (!istream || decoded->dst.addr_mode == MODE_IMMEDIATE) {
 			decoded->op = M68K_INVALID;
-			return start+1;
+			break;
 		}
 		break;
 	case MISC:
@@ -468,7 +642,7 @@
 			istream = m68k_decode_op(istream, decoded->extra.size, &(decoded->src));
 			if (!istream) {
 				decoded->op = M68K_INVALID;
-				return start+1;
+				break;
 			}
 		} else {
 			if (*istream & 0x100) {
@@ -489,7 +663,7 @@
 				istream = m68k_decode_op(istream, decoded->extra.size, &(decoded->src));
 				if (!istream) {
 					decoded->op = M68K_INVALID;
-					return start+1;
+					break;
 				}
 			} else {
 				opmode = (*istream >> 3) & 0x7;
@@ -504,7 +678,7 @@
 						istream = m68k_decode_op_ex(istream, opmode, reg, decoded->extra.size, &(decoded->src));
 						if (!istream) {
 							decoded->op = M68K_INVALID;
-							return start+1;
+							break;
 						}
 						if (decoded->src.addr_mode == MODE_PC_DISPLACE || decoded->src.addr_mode == MODE_PC_INDEX_DISP8) {
 							//adjust displacement to account for extra instruction word
@@ -516,7 +690,7 @@
 						istream = m68k_decode_op_ex(istream, opmode, reg, decoded->extra.size, &(decoded->dst));
 						if (!istream) {
 							decoded->op = M68K_INVALID;
-							return start+1;
+							break;
 						}
 					}
 				} else {
@@ -536,7 +710,7 @@
 						istream= m68k_decode_op(istream, size, &(decoded->dst));
 						if (!istream) {
 							decoded->op = M68K_INVALID;
-							return start+1;
+							break;
 						}
 						break;
 					case 1:
@@ -546,7 +720,7 @@
 							decoded->op = M68K_MOVE_FROM_CCR;
 							size = OPSIZE_WORD;
 #else
-							return istream+1;
+							break;
 #endif
 						} else {
 							decoded->op = M68K_CLR;
@@ -555,7 +729,7 @@
 						istream= m68k_decode_op(istream, size, &(decoded->dst));
 						if (!istream) {
 							decoded->op = M68K_INVALID;
-							return start+1;
+							break;
 						}
 						break;
 					case 2:
@@ -566,14 +740,14 @@
 							istream= m68k_decode_op(istream, size, &(decoded->src));
 							if (!istream) {
 								decoded->op = M68K_INVALID;
-								return start+1;
+								break;
 							}
 						} else {
 							decoded->op = M68K_NEG;
 							istream= m68k_decode_op(istream, size, &(decoded->dst));
 							if (!istream) {
 								decoded->op = M68K_INVALID;
-								return start+1;
+								break;
 							}
 						}
 						decoded->extra.size = size;
@@ -586,14 +760,14 @@
 							istream= m68k_decode_op(istream, size, &(decoded->src));
 							if (!istream) {
 								decoded->op = M68K_INVALID;
-								return start+1;
+								break;
 							}
 						} else {
 							decoded->op = M68K_NOT;
 							istream= m68k_decode_op(istream, size, &(decoded->dst));
 							if (!istream) {
 								decoded->op = M68K_INVALID;
-								return start+1;
+								break;
 							}
 						}
 						decoded->extra.size = size;
@@ -648,7 +822,7 @@
 								istream = m68k_decode_op(istream, OPSIZE_BYTE, &(decoded->dst));
 								if (!istream) {
 									decoded->op = M68K_INVALID;
-									return start+1;
+									break;
 								}
 							} else if((*istream & 0x1C0) == 0x40) {
 								decoded->op = M68K_PEA;
@@ -656,7 +830,7 @@
 								istream = m68k_decode_op(istream, OPSIZE_LONG, &(decoded->src));
 								if (!istream) {
 									decoded->op = M68K_INVALID;
-									return start+1;
+									break;
 								}
 							}
 						}
@@ -678,7 +852,7 @@
 								istream = m68k_decode_op(istream, decoded->extra.size, &(decoded->src));
 								if (!istream) {
 									decoded->op = M68K_INVALID;
-									return start+1;
+									break;
 								}
 							}
 						}
@@ -702,7 +876,7 @@
 							istream = m68k_decode_op(istream, OPSIZE_UNSIZED, &(decoded->src));
 							if (!istream) {
 								decoded->op = M68K_INVALID;
-								return start+1;
+								break;
 							}
 						} else {
 							//it would appear bit 6 needs to be set for it to be a valid instruction here
@@ -783,6 +957,33 @@
 							case 7:
 								//MOVEC
 #ifdef M68010
+								decoded->op = M68K_MOVEC;
+								immed = *(++istream);
+								reg = immed >> 12 & 0x7;
+								opmode = immed & 0x8000 ? MODE_AREG : MODE_REG;
+								immed &= 0xFFF;
+								if (immed & 0x800) {
+									if (immed > MAX_HIGH_CR) {
+										decoded->op = M68K_INVALID;
+										break;
+									} else {
+										immed = immed - 0x800 + CR_USP;
+									}
+								} else {
+									if (immed > MAX_LOW_CR) {
+										decoded->op = M68K_INVALID;
+										break;
+									}
+								}
+								if (*start & 1) {
+									decoded->src.addr_mode = opmode;
+									decoded->src.params.regs.pri = reg;
+									decoded->dst.params.immed = immed;
+								} else {
+									decoded->dst.addr_mode = opmode;
+									decoded->dst.params.regs.pri = reg;
+									decoded->src.params.immed = immed;
+								}
 #endif
 								break;
 							}
@@ -816,7 +1017,7 @@
 				istream = m68k_decode_op(istream, OPSIZE_BYTE, &(decoded->dst));
 				if (!istream) {
 					decoded->op = M68K_INVALID;
-					return start+1;
+					break;
 				}
 			}
 		} else {
@@ -837,7 +1038,7 @@
 			istream = m68k_decode_op(istream, size, &(decoded->dst));
 			if (!istream) {
 				decoded->op = M68K_INVALID;
-				return start+1;
+				break;
 			}
 		}
 		break;
@@ -865,7 +1066,7 @@
 	case MOVEQ:
 		if (*istream & 0x100) {
 			decoded->op = M68K_INVALID;
-			return start+1;
+			break;
 		}
 		decoded->op = M68K_MOVE;
 		decoded->variant = VAR_QUICK;
@@ -891,11 +1092,12 @@
 				istream = m68k_decode_op(istream, OPSIZE_WORD, &(decoded->src));
 				if (!istream || decoded->src.addr_mode == MODE_AREG) {
 					decoded->op = M68K_INVALID;
-					return start+1;
+					break;
 				}
 				break;
 			case 4:
 				decoded->op = M68K_SBCD;
+				decoded->extra.size = OPSIZE_BYTE;
 				decoded->dst.addr_mode = decoded->src.addr_mode = *istream & 0x8 ? MODE_AREG_PREDEC : MODE_REG;
 				decoded->src.params.regs.pri = *istream & 0x7;
 				decoded->dst.params.regs.pri = (*istream >> 9) & 0x7;
@@ -916,7 +1118,7 @@
 				istream = m68k_decode_op(istream, OPSIZE_WORD, &(decoded->src));
 				if (!istream || decoded->src.addr_mode == MODE_AREG) {
 					decoded->op = M68K_INVALID;
-					return start+1;
+					break;
 				}
 				break;
 			}
@@ -929,7 +1131,7 @@
 				istream = m68k_decode_op(istream, size, &(decoded->dst));
 				if (!istream) {
 					decoded->op = M68K_INVALID;
-					return start+1;
+					break;
 				}
 			} else {
 				decoded->dst.addr_mode = MODE_REG;
@@ -937,7 +1139,7 @@
 				istream = m68k_decode_op(istream, size, &(decoded->src));
 				if (!istream) {
 					decoded->op = M68K_INVALID;
-					return start+1;
+					break;
 				}
 			}
 		}
@@ -956,7 +1158,7 @@
 					istream = m68k_decode_op(istream, OPSIZE_LONG, &(decoded->src));
 					if (!istream) {
 						decoded->op = M68K_INVALID;
-						return start+1;
+						break;
 					}
 				} else {
 					decoded->extra.size = size;
@@ -965,7 +1167,7 @@
 					istream = m68k_decode_op(istream, size, &(decoded->dst));
 					if (!istream) {
 						decoded->op = M68K_INVALID;
-						return start+1;
+						break;
 					}
 				}
 			} else {
@@ -993,7 +1195,7 @@
 			istream = m68k_decode_op(istream, decoded->extra.size, &(decoded->src));
 			if (!istream) {
 				decoded->op = M68K_INVALID;
-				return start+1;
+				break;
 			}
 		}
 		break;
@@ -1011,14 +1213,14 @@
 				istream = m68k_decode_op(istream, decoded->extra.size, &(decoded->src));
 				if (!istream) {
 					decoded->op = M68K_INVALID;
-					return start+1;
+					break;
 				}
 			} else {
 				reg = m68k_reg_quick_field(*istream);
 				istream = m68k_decode_op(istream, size, &(decoded->dst));
 				if (!istream) {
 					decoded->op = M68K_INVALID;
-					return start+1;
+					break;
 				}
 				decoded->extra.size = size;
 				if (decoded->dst.addr_mode == MODE_AREG) {
@@ -1046,7 +1248,7 @@
 			istream = m68k_decode_op(istream, decoded->extra.size, &(decoded->src));
 			if (!istream) {
 				decoded->op = M68K_INVALID;
-				return start+1;
+				break;
 			}
 		}
 		break;
@@ -1069,7 +1271,7 @@
 				istream = m68k_decode_op(istream, OPSIZE_WORD, &(decoded->src));
 				if (!istream) {
 					decoded->op = M68K_INVALID;
-					return start+1;
+					break;
 				}
 			} else if(!(*istream & 0xF0)) {
 				decoded->op = M68K_ABCD;
@@ -1100,7 +1302,7 @@
 				istream = m68k_decode_op(istream, decoded->extra.size, &(decoded->dst));
 				if (!istream) {
 					decoded->op = M68K_INVALID;
-					return start+1;
+					break;
 				}
 			}
 		} else {
@@ -1112,7 +1314,7 @@
 				istream = m68k_decode_op(istream, OPSIZE_WORD, &(decoded->src));
 				if (!istream) {
 					decoded->op = M68K_INVALID;
-					return start+1;
+					break;
 				}
 			} else {
 				decoded->op = M68K_AND;
@@ -1122,7 +1324,7 @@
 				istream = m68k_decode_op(istream, decoded->extra.size, &(decoded->src));
 				if (!istream) {
 					decoded->op = M68K_INVALID;
-					return start+1;
+					break;
 				}
 			}
 		}
@@ -1141,7 +1343,7 @@
 					istream = m68k_decode_op(istream, OPSIZE_LONG, &(decoded->src));
 					if (!istream) {
 						decoded->op = M68K_INVALID;
-						return start+1;
+						break;
 					}
 				} else {
 					decoded->extra.size = size;
@@ -1150,7 +1352,7 @@
 					istream = m68k_decode_op(istream, size, &(decoded->dst));
 					if (!istream) {
 						decoded->op = M68K_INVALID;
-						return start+1;
+						break;
 					}
 				}
 			} else {
@@ -1178,7 +1380,7 @@
 			istream = m68k_decode_op(istream, decoded->extra.size, &(decoded->src));
 			if (!istream) {
 				decoded->op = M68K_INVALID;
-				return start+1;
+				break;
 			}
 		}
 		break;
@@ -1215,7 +1417,7 @@
 			istream = m68k_decode_op(istream, OPSIZE_WORD, &(decoded->dst));
 			if (!istream) {
 				decoded->op = M68K_INVALID;
-				return start+1;
+				break;
 			}
 		} else if((*istream & 0xC0) != 0xC0) {
 			switch(((*istream >> 2) & 0x6) | ((*istream >> 8) & 1))
@@ -1263,6 +1465,56 @@
 		} else {
 #ifdef M68020
 			//TODO: Implement bitfield instructions for M68020+ support
+			switch (*istream >> 8 & 7)
+			{
+			case 0:
+				decoded->op = M68K_BFTST; //<ea>
+				break;
+			case 1:
+				decoded->op = M68K_BFEXTU; //<ea>, Dn
+				break;
+			case 2:
+				decoded->op = M68K_BFCHG; //<ea>
+				break;
+			case 3:
+				decoded->op = M68K_BFEXTS; //<ea>, Dn
+				break;
+			case 4:
+				decoded->op = M68K_BFCLR; //<ea>
+				break;
+			case 5:
+				decoded->op = M68K_BFFFO; //<ea>, Dn
+				break;
+			case 6:
+				decoded->op = M68K_BFSET; //<ea>
+				break;
+			case 7:
+				decoded->op = M68K_BFINS; //Dn, <ea>
+				break;
+			}
+			opmode = *istream >> 3 & 0x7;
+			reg = *istream & 0x7;
+			m68k_op_info *ea, *other;
+			if (decoded->op == M68K_BFEXTU || decoded->op == M68K_BFEXTS || decoded->op == M68K_BFFFO)
+			{
+				ea = &(decoded->src);
+				other = &(decoded->dst);
+			} else {
+				ea = &(decoded->dst);
+				other = &(decoded->dst);
+			}
+			if (*istream & 0x100)
+			{
+				immed = *(istream++);
+				other->addr_mode = MODE_REG;
+				other->params.regs.pri = immed >> 12 & 0x7;
+			} else {
+				immed = *(istream++);
+			}
+			decoded->extra.size = OPSIZE_UNSIZED;
+			istream = m68k_decode_op_ex(istream, opmode, reg, decoded->extra.size, ea);
+			ea->addr_mode |= M68K_FLAG_BITFIELD;
+			ea->bitfield = immed & 0xFFF;
 #endif
 		}
 		break;
@@ -1270,6 +1522,10 @@
 		//TODO: Implement me
 		break;
 	}
+	if (decoded->op == M68K_INVALID) {
+		decoded->src.params.immed = *start;
+		return start + 1;
+	}
 	return istream+1;
 }
 
@@ -1416,7 +1672,43 @@
 	"trapv",
 	"tst",
 	"unlk",
-	"invalid"
+	"invalid",
+#ifdef M68010
+	"bkpt",
+	"move", //from ccr
+	"movec",
+	"moves",
+	"rtd",
+#endif
+#ifdef M68020
+	"bfchg",
+	"bfclr",
+	"bfexts",
+	"bfextu",
+	"bfffo",
+	"bfins",
+	"bfset",
+	"bftst",
+	"callm",
+	"cas",
+	"cas2",
+	"chk2",
+	"cmp2",
+	"cpbcc",
+	"cpdbcc",
+	"cpgen",
+	"cprestore",
+	"cpsave",
+	"cpscc",
+	"cptrapcc",
+	"divsl",
+	"divul",
+	"extb",
+	"pack",
+	"rtm",
+	"trapcc",
+	"unpk"
+#endif
 };
 
 char * cond_mnem[] = {
@@ -1437,55 +1729,651 @@
 	"gt",
 	"le"
 };
+#ifdef M68010
+char * cr_mnem[] = {
+	"SFC",
+	"DFC",
+#ifdef M68020
+	"CACR",
+#endif
+	"USP",
+	"VBR",
+#ifdef M68020
+	"CAAR",
+	"MSP",
+	"ISP"
+#endif
+};
+#endif
 
-int m68k_disasm_op(m68k_op_info *decoded, char *dst, int need_comma, uint8_t labels, uint32_t address)
+int m68k_disasm_op(m68k_op_info *decoded, char *dst, int need_comma, uint8_t labels, uint32_t address, format_label_fun label_fun, void * data)
 {
 	char * c = need_comma ? "," : "";
-	switch(decoded->addr_mode)
+	int ret = 0;
+#ifdef M68020
+	uint8_t addr_mode = decoded->addr_mode & (~M68K_FLAG_BITFIELD);
+#else
+	uint8_t addr_mode = decoded->addr_mode;
+#endif
+	switch(addr_mode)
 	{
 	case MODE_REG:
-		return sprintf(dst, "%s d%d", c, decoded->params.regs.pri);
+		ret = sprintf(dst, "%s d%d", c, decoded->params.regs.pri);
+		break;
 	case MODE_AREG:
-		return sprintf(dst, "%s a%d", c, decoded->params.regs.pri);
+		ret = sprintf(dst, "%s a%d", c, decoded->params.regs.pri);
+		break;
 	case MODE_AREG_INDIRECT:
-		return sprintf(dst, "%s (a%d)", c, decoded->params.regs.pri);
+		ret = sprintf(dst, "%s (a%d)", c, decoded->params.regs.pri);
+		break;
 	case MODE_AREG_POSTINC:
-		return sprintf(dst, "%s (a%d)+", c, decoded->params.regs.pri);
+		ret = sprintf(dst, "%s (a%d)+", c, decoded->params.regs.pri);
+		break;
 	case MODE_AREG_PREDEC:
-		return sprintf(dst, "%s -(a%d)", c, decoded->params.regs.pri);
+		ret = sprintf(dst, "%s -(a%d)", c, decoded->params.regs.pri);
+		break;
 	case MODE_AREG_DISPLACE:
-		return sprintf(dst, "%s (%d, a%d)", c, decoded->params.regs.displacement, decoded->params.regs.pri);
+		ret = sprintf(dst, "%s (%d, a%d)", c, decoded->params.regs.displacement, decoded->params.regs.pri);
+		break;
 	case MODE_AREG_INDEX_DISP8:
-		return sprintf(dst, "%s (%d, a%d, %c%d.%c)", c, decoded->params.regs.displacement, decoded->params.regs.pri, (decoded->params.regs.sec & 0x10) ? 'a': 'd', (decoded->params.regs.sec >> 1) & 0x7, (decoded->params.regs.sec & 1) ? 'l': 'w');
+#ifdef M68020
+		if (decoded->params.regs.scale)
+		{
+			ret = sprintf(dst, "%s (%d, a%d, %c%d.%c*%d)", c, decoded->params.regs.displacement, decoded->params.regs.pri, (decoded->params.regs.sec & 0x10) ? 'a': 'd', (decoded->params.regs.sec >> 1) & 0x7, (decoded->params.regs.sec & 1) ? 'l': 'w', 1 << decoded->params.regs.scale);
+		} else {
+#endif
+			ret = sprintf(dst, "%s (%d, a%d, %c%d.%c)", c, decoded->params.regs.displacement, decoded->params.regs.pri, (decoded->params.regs.sec & 0x10) ? 'a': 'd', (decoded->params.regs.sec >> 1) & 0x7, (decoded->params.regs.sec & 1) ? 'l': 'w');
+#ifdef M68020
+		}
+#endif
+		break;
+#ifdef M68020
+	case MODE_AREG_INDEX_BASE_DISP:
+		if (decoded->params.regs.disp_sizes > 1)
+		{
+			ret = sprintf(dst, "%s (%d.%c, a%d, %c%d.%c*%d)", c, decoded->params.regs.displacement,
+			              decoded->params.regs.disp_sizes == 2 ? 'w' : 'l', decoded->params.regs.pri,
+			              (decoded->params.regs.sec & 0x10) ? 'a': 'd', (decoded->params.regs.sec >> 1) & 0x7,
+			              (decoded->params.regs.sec & 1) ? 'l': 'w', 1 << decoded->params.regs.scale);
+		} else {
+			ret = sprintf(dst, "%s (a%d, %c%d.%c*%d)", c, decoded->params.regs.pri,
+			              (decoded->params.regs.sec & 0x10) ? 'a': 'd', (decoded->params.regs.sec >> 1) & 0x7,
+			              (decoded->params.regs.sec & 1) ? 'l': 'w', 1 << decoded->params.regs.scale);
+		}
+		break;
+	case MODE_AREG_PREINDEX:
+		switch (decoded->params.regs.disp_sizes)
+		{
+		case 0x11:
+			//no base displacement or outer displacement
+			ret = sprintf(dst, "%s ([a%d, %c%d.%c*%d])", c, decoded->params.regs.pri,
+						  (decoded->params.regs.sec & 0x10) ? 'a': 'd', (decoded->params.regs.sec >> 1) & 0x7,
+						  (decoded->params.regs.sec & 1) ? 'l': 'w', 1 << decoded->params.regs.scale);
+			break;
+		case 0x12:
+		case 0x13:
+			//base displacement only
+			ret = sprintf(dst, "%s ([%d.%c, a%d, %c%d.%c*%d])", c, decoded->params.regs.displacement,
+			              decoded->params.regs.disp_sizes & 3 == 2 ? 'w' : 'l', decoded->params.regs.pri,
+			              (decoded->params.regs.sec & 0x10) ? 'a': 'd', (decoded->params.regs.sec >> 1) & 0x7,
+			              (decoded->params.regs.sec & 1) ? 'l': 'w', 1 << decoded->params.regs.scale);
+			break;
+		case 0x21:
+		case 0x31:
+			//outer displacement only
+			ret = sprintf(dst, "%s ([a%d, %c%d.%c*%d], %d.%c)", c, decoded->params.regs.pri,
+			              (decoded->params.regs.sec & 0x10) ? 'a': 'd', (decoded->params.regs.sec >> 1) & 0x7,
+			              (decoded->params.regs.sec & 1) ? 'l': 'w', 1 << decoded->params.regs.scale,
+			              decoded->params.regs.outer_disp, decoded->params.regs.disp_sizes & 0x30 == 0x20 ? 'w' : 'l');
+			break;
+		case 0x22:
+		case 0x23:
+		case 0x32:
+		case 0x33:
+			//both outer and inner displacement
+			ret = sprintf(dst, "%s ([%d.%c, a%d, %c%d.%c*%d], %d.%c)", c, decoded->params.regs.displacement,
+			              decoded->params.regs.disp_sizes & 3 == 2 ? 'w' : 'l', decoded->params.regs.pri,
+			              (decoded->params.regs.sec & 0x10) ? 'a': 'd', (decoded->params.regs.sec >> 1) & 0x7,
+			              (decoded->params.regs.sec & 1) ? 'l': 'w', 1 << decoded->params.regs.scale,
+			              decoded->params.regs.outer_disp, decoded->params.regs.disp_sizes & 0x30 == 0x20 ? 'w' : 'l');
+			break;
+		}
+		break;
+	case MODE_AREG_POSTINDEX:
+		switch (decoded->params.regs.disp_sizes)
+		{
+		case 0x11:
+			//no base displacement or outer displacement
+			ret = sprintf(dst, "%s ([a%d], %c%d.%c*%d)", c, decoded->params.regs.pri,
+						  (decoded->params.regs.sec & 0x10) ? 'a': 'd', (decoded->params.regs.sec >> 1) & 0x7,
+						  (decoded->params.regs.sec & 1) ? 'l': 'w', 1 << decoded->params.regs.scale);
+			break;
+		case 0x12:
+		case 0x13:
+			//base displacement only
+			ret = sprintf(dst, "%s ([%d.%c, a%d], %c%d.%c*%d)", c, decoded->params.regs.displacement,
+			              decoded->params.regs.disp_sizes & 3 == 2 ? 'w' : 'l', decoded->params.regs.pri,
+			              (decoded->params.regs.sec & 0x10) ? 'a': 'd', (decoded->params.regs.sec >> 1) & 0x7,
+			              (decoded->params.regs.sec & 1) ? 'l': 'w', 1 << decoded->params.regs.scale);
+			break;
+		case 0x21:
+		case 0x31:
+			//outer displacement only
+			ret = sprintf(dst, "%s ([a%d], %c%d.%c*%d, %d.%c)", c, decoded->params.regs.pri,
+			              (decoded->params.regs.sec & 0x10) ? 'a': 'd', (decoded->params.regs.sec >> 1) & 0x7,
+			              (decoded->params.regs.sec & 1) ? 'l': 'w', 1 << decoded->params.regs.scale,
+			              decoded->params.regs.outer_disp, decoded->params.regs.disp_sizes & 0x30 == 0x20 ? 'w' : 'l');
+			break;
+		case 0x22:
+		case 0x23:
+		case 0x32:
+		case 0x33:
+			//both outer and inner displacement
+			ret = sprintf(dst, "%s ([%d.%c, a%d], %c%d.%c*%d, %d.%c)", c, decoded->params.regs.displacement,
+			              decoded->params.regs.disp_sizes & 3 == 2 ? 'w' : 'l', decoded->params.regs.pri,
+			              (decoded->params.regs.sec & 0x10) ? 'a': 'd', (decoded->params.regs.sec >> 1) & 0x7,
+			              (decoded->params.regs.sec & 1) ? 'l': 'w', 1 << decoded->params.regs.scale,
+			              decoded->params.regs.outer_disp, decoded->params.regs.disp_sizes & 0x30 == 0x20 ? 'w' : 'l');
+			break;
+		}
+		break;
+	case MODE_AREG_MEM_INDIRECT:
+		switch (decoded->params.regs.disp_sizes)
+		{
+		case 0x11:
+			//no base displacement or outer displacement
+			ret = sprintf(dst, "%s ([a%d])", c, decoded->params.regs.pri);
+			break;
+		case 0x12:
+		case 0x13:
+			//base displacement only
+			ret = sprintf(dst, "%s ([%d.%c, a%d])", c, decoded->params.regs.displacement,
+			              decoded->params.regs.disp_sizes & 3 == 2 ? 'w' : 'l', decoded->params.regs.pri);
+			break;
+		case 0x21:
+		case 0x31:
+			//outer displacement only
+			ret = sprintf(dst, "%s ([a%d], %d.%c)", c, decoded->params.regs.pri, decoded->params.regs.outer_disp,
+			              decoded->params.regs.disp_sizes & 0x30 == 0x20 ? 'w' : 'l');
+			break;
+		case 0x22:
+		case 0x23:
+		case 0x32:
+		case 0x33:
+			//both outer and inner displacement
+			ret = sprintf(dst, "%s ([%d.%c, a%d], %d.%c)", c, decoded->params.regs.displacement,
+			              decoded->params.regs.disp_sizes & 3 == 2 ? 'w' : 'l', decoded->params.regs.pri,
+			              decoded->params.regs.outer_disp, decoded->params.regs.disp_sizes & 0x30 == 0x20 ? 'w' : 'l');
+			break;
+		}
+		break;
+	case MODE_AREG_BASE_DISP:
+		if (decoded->params.regs.disp_sizes > 1)
+		{
+			ret = sprintf(dst, "%s (%d.%c, a%d)", c, decoded->params.regs.displacement,
+			              decoded->params.regs.disp_sizes == 2 ? 'w' : 'l', decoded->params.regs.pri);
+		} else {
+			//this is a lossy representation of the encoded instruction
+			//not sure if there's a better way to print it though
+			ret = sprintf(dst, "%s (a%d)", c, decoded->params.regs.pri);
+		}
+		break;
+	case MODE_INDEX_BASE_DISP:
+		if (decoded->params.regs.disp_sizes > 1)
+		{
+			ret = sprintf(dst, "%s (%d.%c, %c%d.%c*%d)", c, decoded->params.regs.displacement,
+			              decoded->params.regs.disp_sizes == 2 ? 'w' : 'l',
+			              (decoded->params.regs.sec & 0x10) ? 'a': 'd', (decoded->params.regs.sec >> 1) & 0x7,
+			              (decoded->params.regs.sec & 1) ? 'l': 'w', 1 << decoded->params.regs.scale);
+		} else {
+			ret = sprintf(dst, "%s (%c%d.%c*%d)", c, (decoded->params.regs.sec & 0x10) ? 'a': 'd',
+			              (decoded->params.regs.sec >> 1) & 0x7, (decoded->params.regs.sec & 1) ? 'l': 'w',
+			              1 << decoded->params.regs.scale);
+		}
+		break;
+	case MODE_PREINDEX:
+		switch (decoded->params.regs.disp_sizes)
+		{
+		case 0x11:
+			//no base displacement or outer displacement
+			ret = sprintf(dst, "%s ([%c%d.%c*%d])", c, (decoded->params.regs.sec & 0x10) ? 'a': 'd',
+			              (decoded->params.regs.sec >> 1) & 0x7, (decoded->params.regs.sec & 1) ? 'l': 'w',
+			              1 << decoded->params.regs.scale);
+			break;
+		case 0x12:
+		case 0x13:
+			//base displacement only
+			ret = sprintf(dst, "%s ([%d.%c, %c%d.%c*%d])", c, decoded->params.regs.displacement,
+			              decoded->params.regs.disp_sizes & 3 == 2 ? 'w' : 'l',
+			              (decoded->params.regs.sec & 0x10) ? 'a': 'd', (decoded->params.regs.sec >> 1) & 0x7,
+			              (decoded->params.regs.sec & 1) ? 'l': 'w', 1 << decoded->params.regs.scale);
+			break;
+		case 0x21:
+		case 0x31:
+			//outer displacement only
+			ret = sprintf(dst, "%s ([%c%d.%c*%d], %d.%c)", c, (decoded->params.regs.sec & 0x10) ? 'a': 'd',
+			              (decoded->params.regs.sec >> 1) & 0x7, (decoded->params.regs.sec & 1) ? 'l': 'w',
+			              1 << decoded->params.regs.scale, decoded->params.regs.outer_disp,
+			              decoded->params.regs.disp_sizes & 0x30 == 0x20 ? 'w' : 'l');
+			break;
+		case 0x22:
+		case 0x23:
+		case 0x32:
+		case 0x33:
+			//both outer and inner displacement
+			ret = sprintf(dst, "%s ([%d.%c, %c%d.%c*%d], %d.%c)", c, decoded->params.regs.displacement,
+			              decoded->params.regs.disp_sizes & 3 == 2 ? 'w' : 'l',
+			              (decoded->params.regs.sec & 0x10) ? 'a': 'd', (decoded->params.regs.sec >> 1) & 0x7,
+			              (decoded->params.regs.sec & 1) ? 'l': 'w', 1 << decoded->params.regs.scale,
+			              decoded->params.regs.outer_disp, decoded->params.regs.disp_sizes & 0x30 == 0x20 ? 'w' : 'l');
+			break;
+		}
+		break;
+	case MODE_POSTINDEX:
+		switch (decoded->params.regs.disp_sizes)
+		{
+		case 0x11:
+			//no base displacement or outer displacement
+			ret = sprintf(dst, "%s ([], %c%d.%c*%d)", c, (decoded->params.regs.sec & 0x10) ? 'a': 'd',
+			              (decoded->params.regs.sec >> 1) & 0x7, (decoded->params.regs.sec & 1) ? 'l': 'w',
+			              1 << decoded->params.regs.scale);
+			break;
+		case 0x12:
+		case 0x13:
+			//base displacement only
+			ret = sprintf(dst, "%s ([%d.%c], %c%d.%c*%d)", c, decoded->params.regs.displacement,
+			              decoded->params.regs.disp_sizes & 3 == 2 ? 'w' : 'l',
+			              (decoded->params.regs.sec & 0x10) ? 'a': 'd', (decoded->params.regs.sec >> 1) & 0x7,
+			              (decoded->params.regs.sec & 1) ? 'l': 'w', 1 << decoded->params.regs.scale);
+			break;
+		case 0x21:
+		case 0x31:
+			//outer displacement only
+			ret = sprintf(dst, "%s ([], %c%d.%c*%d, %d.%c)", c, (decoded->params.regs.sec & 0x10) ? 'a': 'd',
+			              (decoded->params.regs.sec >> 1) & 0x7, (decoded->params.regs.sec & 1) ? 'l': 'w',
+			              1 << decoded->params.regs.scale, decoded->params.regs.outer_disp,
+			              decoded->params.regs.disp_sizes & 0x30 == 0x20 ? 'w' : 'l');
+			break;
+		case 0x22:
+		case 0x23:
+		case 0x32:
+		case 0x33:
+			//both outer and inner displacement
+			ret = sprintf(dst, "%s ([%d.%c], %c%d.%c*%d, %d.%c)", c, decoded->params.regs.displacement,
+			              decoded->params.regs.disp_sizes & 3 == 2 ? 'w' : 'l',
+			              (decoded->params.regs.sec & 0x10) ? 'a': 'd', (decoded->params.regs.sec >> 1) & 0x7,
+			              (decoded->params.regs.sec & 1) ? 'l': 'w', 1 << decoded->params.regs.scale,
+			              decoded->params.regs.outer_disp, decoded->params.regs.disp_sizes & 0x30 == 0x20 ? 'w' : 'l');
+			break;
+		}
+		break;
+	case MODE_MEM_INDIRECT:
+		switch (decoded->params.regs.disp_sizes)
+		{
+		case 0x11:
+			//no base displacement or outer displacement
+			ret = sprintf(dst, "%s ([])", c);
+			break;
+		case 0x12:
+		case 0x13:
+			//base displacement only
+			ret = sprintf(dst, "%s ([%d.%c])", c, decoded->params.regs.displacement,
+			              decoded->params.regs.disp_sizes & 3 == 2 ? 'w' : 'l');
+			break;
+		case 0x21:
+		case 0x31:
+			//outer displacement only
+			ret = sprintf(dst, "%s ([], %d.%c)", c, decoded->params.regs.outer_disp,
+			              decoded->params.regs.disp_sizes & 0x30 == 0x20 ? 'w' : 'l');
+			break;
+		case 0x22:
+		case 0x23:
+		case 0x32:
+		case 0x33:
+			//both outer and inner displacement
+			ret = sprintf(dst, "%s ([%d.%c], %d.%c)", c, decoded->params.regs.displacement,
+			              decoded->params.regs.disp_sizes & 3 == 2 ? 'w' : 'l', decoded->params.regs.outer_disp,
+			              decoded->params.regs.disp_sizes & 0x30 == 0x20 ? 'w' : 'l');
+			break;
+		}
+		break;
+	case MODE_BASE_DISP:
+		if (decoded->params.regs.disp_sizes > 1)
+		{
+			ret = sprintf(dst, "%s (%d.%c)", c, decoded->params.regs.displacement,
+			              decoded->params.regs.disp_sizes & 3 == 2 ? 'w' : 'l');
+		} else {
+			ret = sprintf(dst, "%s ()", c);
+		}
+		break;
+#endif
 	case MODE_IMMEDIATE:
 	case MODE_IMMEDIATE_WORD:
-		return sprintf(dst, (decoded->params.immed <= 128 ? "%s #%d" : "%s #$%X"), c, decoded->params.immed);
+		ret = sprintf(dst, (decoded->params.immed <= 128 ? "%s #%d" : "%s #$%X"), c, decoded->params.immed);
+		break;
 	case MODE_ABSOLUTE_SHORT:
 		if (labels) {
-			return sprintf(dst, "%s ADR_%X.w", c, decoded->params.immed);
+			ret = sprintf(dst, "%s ", c);
+			ret += label_fun(dst+ret, decoded->params.immed, data);
+			strcat(dst+ret, ".w");
+			ret = ret + 2;
 		} else {
-			return sprintf(dst, "%s $%X.w", c, decoded->params.immed);
+			ret = sprintf(dst, "%s $%X.w", c, decoded->params.immed);
 		}
+		break;
 	case MODE_ABSOLUTE:
 		if (labels) {
-			return sprintf(dst, "%s ADR_%X.l", c, decoded->params.immed);
+			ret = sprintf(dst, "%s ", c);
+			ret += label_fun(dst+ret, decoded->params.immed, data);
+			strcat(dst+ret, ".l");
+			ret = ret + 2;
 		} else {
-			return sprintf(dst, "%s $%X", c, decoded->params.immed);
+			ret = sprintf(dst, "%s $%X", c, decoded->params.immed);
 		}
+		break;
 	case MODE_PC_DISPLACE:
 		if (labels) {
-			return sprintf(dst, "%s ADR_%X(pc)", c, address + 2 + decoded->params.regs.displacement);
+			ret = sprintf(dst, "%s ", c);
+			ret += label_fun(dst+ret, address + 2 + decoded->params.regs.displacement, data);
+			strcat(dst+ret, "(pc)");
+			ret = ret + 4;
 		} else {
-			return sprintf(dst, "%s (%d, pc)", c, decoded->params.regs.displacement);
+			ret = sprintf(dst, "%s (%d, pc)", c, decoded->params.regs.displacement);
 		}
+		break;
 	case MODE_PC_INDEX_DISP8:
-		return sprintf(dst, "%s (%d, pc, %c%d.%c)", c, decoded->params.regs.displacement, (decoded->params.regs.sec & 0x10) ? 'a': 'd', (decoded->params.regs.sec >> 1) & 0x7, (decoded->params.regs.sec & 1) ? 'l': 'w');
+#ifdef M68020
+		if (decoded->params.regs.scale)
+		{
+			ret = sprintf(dst, "%s (%d, pc, %c%d.%c*%d)", c, decoded->params.regs.displacement, (decoded->params.regs.sec & 0x10) ? 'a': 'd', (decoded->params.regs.sec >> 1) & 0x7, (decoded->params.regs.sec & 1) ? 'l': 'w', 1 << decoded->params.regs.scale);
+		} else {
+#endif
+			ret = sprintf(dst, "%s (%d, pc, %c%d.%c)", c, decoded->params.regs.displacement, (decoded->params.regs.sec & 0x10) ? 'a': 'd', (decoded->params.regs.sec >> 1) & 0x7, (decoded->params.regs.sec & 1) ? 'l': 'w');
+#ifdef M68020
+		}
+#endif
+		break;
+#ifdef M68020
+	case MODE_PC_INDEX_BASE_DISP:
+		if (decoded->params.regs.disp_sizes > 1)
+		{
+			ret = sprintf(dst, "%s (%d.%c, pc, %c%d.%c*%d)", c, decoded->params.regs.displacement,
+			              decoded->params.regs.disp_sizes == 2 ? 'w' : 'l',
+			              (decoded->params.regs.sec & 0x10) ? 'a': 'd', (decoded->params.regs.sec >> 1) & 0x7,
+			              (decoded->params.regs.sec & 1) ? 'l': 'w', 1 << decoded->params.regs.scale);
+		} else {
+			ret = sprintf(dst, "%s (pc, %c%d.%c*%d)", c, (decoded->params.regs.sec & 0x10) ? 'a': 'd',
+			              (decoded->params.regs.sec >> 1) & 0x7, (decoded->params.regs.sec & 1) ? 'l': 'w',
+			              1 << decoded->params.regs.scale);
+		}
+		break;
+	case MODE_PC_PREINDEX:
+		switch (decoded->params.regs.disp_sizes)
+		{
+		case 0x11:
+			//no base displacement or outer displacement
+			ret = sprintf(dst, "%s ([pc, %c%d.%c*%d])", c, (decoded->params.regs.sec & 0x10) ? 'a': 'd',
+			              (decoded->params.regs.sec >> 1) & 0x7, (decoded->params.regs.sec & 1) ? 'l': 'w',
+			              1 << decoded->params.regs.scale);
+			break;
+		case 0x12:
+		case 0x13:
+			//base displacement only
+			ret = sprintf(dst, "%s ([%d.%c, pc, %c%d.%c*%d])", c, decoded->params.regs.displacement,
+			              decoded->params.regs.disp_sizes & 3 == 2 ? 'w' : 'l',
+			              (decoded->params.regs.sec & 0x10) ? 'a': 'd', (decoded->params.regs.sec >> 1) & 0x7,
+			              (decoded->params.regs.sec & 1) ? 'l': 'w', 1 << decoded->params.regs.scale);
+			break;
+		case 0x21:
+		case 0x31:
+			//outer displacement only
+			ret = sprintf(dst, "%s ([pc, %c%d.%c*%d], %d.%c)", c, (decoded->params.regs.sec & 0x10) ? 'a': 'd',
+			              (decoded->params.regs.sec >> 1) & 0x7, (decoded->params.regs.sec & 1) ? 'l': 'w',
+			              1 << decoded->params.regs.scale, decoded->params.regs.outer_disp,
+			              decoded->params.regs.disp_sizes & 0x30 == 0x20 ? 'w' : 'l');
+			break;
+		case 0x22:
+		case 0x23:
+		case 0x32:
+		case 0x33:
+			//both outer and inner displacement
+			ret = sprintf(dst, "%s ([%d.%c, pc, %c%d.%c*%d], %d.%c)", c, decoded->params.regs.displacement,
+			              decoded->params.regs.disp_sizes & 3 == 2 ? 'w' : 'l',
+			              (decoded->params.regs.sec & 0x10) ? 'a': 'd', (decoded->params.regs.sec >> 1) & 0x7,
+			              (decoded->params.regs.sec & 1) ? 'l': 'w', 1 << decoded->params.regs.scale,
+			              decoded->params.regs.outer_disp, decoded->params.regs.disp_sizes & 0x30 == 0x20 ? 'w' : 'l');
+			break;
+		}
+		break;
+	case MODE_PC_POSTINDEX:
+		switch (decoded->params.regs.disp_sizes)
+		{
+		case 0x11:
+			//no base displacement or outer displacement
+			ret = sprintf(dst, "%s ([pc], %c%d.%c*%d)", c, (decoded->params.regs.sec & 0x10) ? 'a': 'd',
+			              (decoded->params.regs.sec >> 1) & 0x7, (decoded->params.regs.sec & 1) ? 'l': 'w',
+			              1 << decoded->params.regs.scale);
+			break;
+		case 0x12:
+		case 0x13:
+			//base displacement only
+			ret = sprintf(dst, "%s ([%d.%c, pc], %c%d.%c*%d)", c, decoded->params.regs.displacement,
+			              decoded->params.regs.disp_sizes & 3 == 2 ? 'w' : 'l',
+			              (decoded->params.regs.sec & 0x10) ? 'a': 'd', (decoded->params.regs.sec >> 1) & 0x7,
+			              (decoded->params.regs.sec & 1) ? 'l': 'w', 1 << decoded->params.regs.scale);
+			break;
+		case 0x21:
+		case 0x31:
+			//outer displacement only
+			ret = sprintf(dst, "%s ([pc], %c%d.%c*%d, %d.%c)", c, (decoded->params.regs.sec & 0x10) ? 'a': 'd',
+			              (decoded->params.regs.sec >> 1) & 0x7, (decoded->params.regs.sec & 1) ? 'l': 'w',
+			              1 << decoded->params.regs.scale, decoded->params.regs.outer_disp,
+			              decoded->params.regs.disp_sizes & 0x30 == 0x20 ? 'w' : 'l');
+			break;
+		case 0x22:
+		case 0x23:
+		case 0x32:
+		case 0x33:
+			//both outer and inner displacement
+			ret = sprintf(dst, "%s ([%d.%c, pc], %c%d.%c*%d, %d.%c)", c, decoded->params.regs.displacement,
+			              decoded->params.regs.disp_sizes & 3 == 2 ? 'w' : 'l',
+			              (decoded->params.regs.sec & 0x10) ? 'a': 'd', (decoded->params.regs.sec >> 1) & 0x7,
+			              (decoded->params.regs.sec & 1) ? 'l': 'w', 1 << decoded->params.regs.scale,
+			              decoded->params.regs.outer_disp, decoded->params.regs.disp_sizes & 0x30 == 0x20 ? 'w' : 'l');
+			break;
+		}
+		break;
+	case MODE_PC_MEM_INDIRECT:
+		switch (decoded->params.regs.disp_sizes)
+		{
+		case 0x11:
+			//no base displacement or outer displacement
+			ret = sprintf(dst, "%s ([pc])", c);
+			break;
+		case 0x12:
+		case 0x13:
+			//base displacement only
+			ret = sprintf(dst, "%s ([%d.%c, pc])", c, decoded->params.regs.displacement,
+			              decoded->params.regs.disp_sizes & 3 == 2 ? 'w' : 'l');
+			break;
+		case 0x21:
+		case 0x31:
+			//outer displacement only
+			ret = sprintf(dst, "%s ([pc], %d.%c)", c, decoded->params.regs.outer_disp,
+			              decoded->params.regs.disp_sizes & 0x30 == 0x20 ? 'w' : 'l');
+			break;
+		case 0x22:
+		case 0x23:
+		case 0x32:
+		case 0x33:
+			//both outer and inner displacement
+			ret = sprintf(dst, "%s ([%d.%c, pc], %d.%c)", c, decoded->params.regs.displacement,
+			              decoded->params.regs.disp_sizes & 3 == 2 ? 'w' : 'l', decoded->params.regs.outer_disp,
+			              decoded->params.regs.disp_sizes & 0x30 == 0x20 ? 'w' : 'l');
+			break;
+		}
+		break;
+	case MODE_PC_BASE_DISP:
+		if (decoded->params.regs.disp_sizes > 1)
+		{
+			ret = sprintf(dst, "%s (%d.%c, pc)", c, decoded->params.regs.displacement,
+			              decoded->params.regs.disp_sizes & 3 == 2 ? 'w' : 'l');
+		} else {
+			ret = sprintf(dst, "%s (pc)", c);
+		}
+		break;
+	case MODE_ZPC_INDEX_BASE_DISP:
+		if (decoded->params.regs.disp_sizes > 1)
+		{
+			ret = sprintf(dst, "%s (%d.%c, zpc, %c%d.%c*%d)", c, decoded->params.regs.displacement,
+			              decoded->params.regs.disp_sizes == 2 ? 'w' : 'l',
+			              (decoded->params.regs.sec & 0x10) ? 'a': 'd', (decoded->params.regs.sec >> 1) & 0x7,
+			              (decoded->params.regs.sec & 1) ? 'l': 'w', 1 << decoded->params.regs.scale);
+		} else {
+			ret = sprintf(dst, "%s (zpc, %c%d.%c*%d)", c, (decoded->params.regs.sec & 0x10) ? 'a': 'd',
+			              (decoded->params.regs.sec >> 1) & 0x7, (decoded->params.regs.sec & 1) ? 'l': 'w',
+			              1 << decoded->params.regs.scale);
+		}
+		break;
+	case MODE_ZPC_PREINDEX:
+		switch (decoded->params.regs.disp_sizes)
+		{
+		case 0x11:
+			//no base displacement or outer displacement
+			ret = sprintf(dst, "%s ([zpc, %c%d.%c*%d])", c, (decoded->params.regs.sec & 0x10) ? 'a': 'd',
+			              (decoded->params.regs.sec >> 1) & 0x7, (decoded->params.regs.sec & 1) ? 'l': 'w',
+			              1 << decoded->params.regs.scale);
+			break;
+		case 0x12:
+		case 0x13:
+			//base displacement only
+			ret = sprintf(dst, "%s ([%d.%c, zpc, %c%d.%c*%d])", c, decoded->params.regs.displacement,
+			              decoded->params.regs.disp_sizes & 3 == 2 ? 'w' : 'l',
+			              (decoded->params.regs.sec & 0x10) ? 'a': 'd', (decoded->params.regs.sec >> 1) & 0x7,
+			              (decoded->params.regs.sec & 1) ? 'l': 'w', 1 << decoded->params.regs.scale);
+			break;
+		case 0x21:
+		case 0x31:
+			//outer displacement only
+			ret = sprintf(dst, "%s ([zpc, %c%d.%c*%d], %d.%c)", c, (decoded->params.regs.sec & 0x10) ? 'a': 'd',
+			              (decoded->params.regs.sec >> 1) & 0x7, (decoded->params.regs.sec & 1) ? 'l': 'w',
+			              1 << decoded->params.regs.scale, decoded->params.regs.outer_disp,
+			              decoded->params.regs.disp_sizes & 0x30 == 0x20 ? 'w' : 'l');
+			break;
+		case 0x22:
+		case 0x23:
+		case 0x32:
+		case 0x33:
+			//both outer and inner displacement
+			ret = sprintf(dst, "%s ([%d.%c, zpc, %c%d.%c*%d], %d.%c)", c, decoded->params.regs.displacement,
+			              decoded->params.regs.disp_sizes & 3 == 2 ? 'w' : 'l',
+			              (decoded->params.regs.sec & 0x10) ? 'a': 'd', (decoded->params.regs.sec >> 1) & 0x7,
+			              (decoded->params.regs.sec & 1) ? 'l': 'w', 1 << decoded->params.regs.scale,
+			              decoded->params.regs.outer_disp, decoded->params.regs.disp_sizes & 0x30 == 0x20 ? 'w' : 'l');
+			break;
+		}
+		break;
+	case MODE_ZPC_POSTINDEX:
+		switch (decoded->params.regs.disp_sizes)
+		{
+		case 0x11:
+			//no base displacement or outer displacement
+			ret = sprintf(dst, "%s ([zpc], %c%d.%c*%d)", c, (decoded->params.regs.sec & 0x10) ? 'a': 'd',
+			              (decoded->params.regs.sec >> 1) & 0x7, (decoded->params.regs.sec & 1) ? 'l': 'w',
+			              1 << decoded->params.regs.scale);
+			break;
+		case 0x12:
+		case 0x13:
+			//base displacement only
+			ret = sprintf(dst, "%s ([%d.%c, zpc], %c%d.%c*%d)", c, decoded->params.regs.displacement,
+			              decoded->params.regs.disp_sizes & 3 == 2 ? 'w' : 'l',
+			              (decoded->params.regs.sec & 0x10) ? 'a': 'd', (decoded->params.regs.sec >> 1) & 0x7,
+			              (decoded->params.regs.sec & 1) ? 'l': 'w', 1 << decoded->params.regs.scale);
+			break;
+		case 0x21:
+		case 0x31:
+			//outer displacement only
+			ret = sprintf(dst, "%s ([zpc], %c%d.%c*%d, %d.%c)", c, (decoded->params.regs.sec & 0x10) ? 'a': 'd',
+			              (decoded->params.regs.sec >> 1) & 0x7, (decoded->params.regs.sec & 1) ? 'l': 'w',
+			              1 << decoded->params.regs.scale, decoded->params.regs.outer_disp,
+			              decoded->params.regs.disp_sizes & 0x30 == 0x20 ? 'w' : 'l');
+			break;
+		case 0x22:
+		case 0x23:
+		case 0x32:
+		case 0x33:
+			//both outer and inner displacement
+			ret = sprintf(dst, "%s ([%d.%c, zpc], %c%d.%c*%d, %d.%c)", c, decoded->params.regs.displacement,
+			              decoded->params.regs.disp_sizes & 3 == 2 ? 'w' : 'l',
+			              (decoded->params.regs.sec & 0x10) ? 'a': 'd', (decoded->params.regs.sec >> 1) & 0x7,
+			              (decoded->params.regs.sec & 1) ? 'l': 'w', 1 << decoded->params.regs.scale,
+			              decoded->params.regs.outer_disp, decoded->params.regs.disp_sizes & 0x30 == 0x20 ? 'w' : 'l');
+			break;
+		}
+		break;
+	case MODE_ZPC_MEM_INDIRECT:
+		switch (decoded->params.regs.disp_sizes)
+		{
+		case 0x11:
+			//no base displacement or outer displacement
+			ret = sprintf(dst, "%s ([zpc])", c);
+			break;
+		case 0x12:
+		case 0x13:
+			//base displacement only
+			ret = sprintf(dst, "%s ([%d.%c, zpc])", c, decoded->params.regs.displacement,
+			              decoded->params.regs.disp_sizes & 3 == 2 ? 'w' : 'l');
+			break;
+		case 0x21:
+		case 0x31:
+			//outer displacement only
+			ret = sprintf(dst, "%s ([zpc], %d.%c)", c, decoded->params.regs.outer_disp,
+			              decoded->params.regs.disp_sizes & 0x30 == 0x20 ? 'w' : 'l');
+			break;
+		case 0x22:
+		case 0x23:
+		case 0x32:
+		case 0x33:
+			//both outer and inner displacement
+			ret = sprintf(dst, "%s ([%d.%c, zpc], %d.%c)", c, decoded->params.regs.displacement,
+			              decoded->params.regs.disp_sizes & 3 == 2 ? 'w' : 'l', decoded->params.regs.outer_disp,
+			              decoded->params.regs.disp_sizes & 0x30 == 0x20 ? 'w' : 'l');
+			break;
+		}
+		break;
+	case MODE_ZPC_BASE_DISP:
+		if (decoded->params.regs.disp_sizes > 1)
+		{
+			ret = sprintf(dst, "%s (%d.%c, zpc)", c, decoded->params.regs.displacement,
+			              decoded->params.regs.disp_sizes & 3 == 2 ? 'w' : 'l');
+		} else {
+			ret = sprintf(dst, "%s (zpc)", c);
+		}
+		break;
+#endif
 	default:
-		return 0;
+		ret = 0;
 	}
+#ifdef M68020
+	if (decoded->addr_mode & M68K_FLAG_BITFIELD)
+	{
+		switch (decoded->bitfield & 0x820)
+		{
+		case 0:
+			return ret + sprintf(dst+ret, " {$%X:%d}", decoded->bitfield >> 6 & 0x1F, decoded->bitfield & 0x1F ? decoded->bitfield & 0x1F : 32);
+		case 0x20:
+			return ret + sprintf(dst+ret, " {$%X:d%d}", decoded->bitfield >> 6 & 0x1F, decoded->bitfield & 0x7);
+		case 0x800:
+			return ret + sprintf(dst+ret, " {d%d:%d}", decoded->bitfield >> 6 & 0x7, decoded->bitfield & 0x1F ? decoded->bitfield & 0x1F : 32);
+		case 0x820:
+			return ret + sprintf(dst+ret, " {d%d:d%d}", decoded->bitfield >> 6 & 0x7, decoded->bitfield & 0x7);
+		}
+	}
+#endif
+	return ret;
 }
 
-int m68k_disasm_movem_op(m68k_op_info *decoded, m68k_op_info *other, char *dst, int need_comma, uint8_t labels, uint32_t address)
+int m68k_disasm_movem_op(m68k_op_info *decoded, m68k_op_info *other, char *dst, int need_comma, uint8_t labels, uint32_t address, format_label_fun label_fun, void * data)
 {
 	int8_t dir, reg, bit, regnum, last=-1, lastreg, first=-1;
 	char *rtype, *last_rtype;
@@ -1539,11 +2427,16 @@
 		}
 		return oplen;
 	} else {
-		return m68k_disasm_op(decoded, dst, need_comma, labels, address);
+		return m68k_disasm_op(decoded, dst, need_comma, labels, address, label_fun, data);
 	}
 }
 
-int m68k_disasm_ex(m68kinst * decoded, char * dst, uint8_t labels)
+int m68k_default_label_fun(char * dst, uint32_t address, void * data)
+{
+	return sprintf(dst, "ADR_%X", address);
+}
+
+int m68k_disasm_ex(m68kinst * decoded, char * dst, uint8_t labels, format_label_fun label_fun, void * data)
 {
 	int ret,op1len;
 	uint8_t size;
@@ -1561,9 +2454,11 @@
 		if (decoded->op != M68K_SCC) {
 			if (labels) {
 				if (decoded->op == M68K_DBCC) {
-					ret += sprintf(dst+ret, " d%d, ADR_%X", decoded->dst.params.regs.pri, decoded->address + 2 + decoded->src.params.immed);
+					ret += sprintf(dst+ret, " d%d, ", decoded->dst.params.regs.pri);
+					ret += label_fun(dst+ret, decoded->address + 2 + decoded->src.params.immed, data);
 				} else {
-					ret += sprintf(dst+ret, " ADR_%X", decoded->address + 2 + decoded->src.params.immed);
+					dst[ret++] = ' ';
+					ret += label_fun(dst+ret, decoded->address + 2 + decoded->src.params.immed, data);
 				}
 			} else {
 				if (decoded->op == M68K_DBCC) {
@@ -1577,8 +2472,8 @@
 		break;
 	case M68K_BSR:
 		if (labels) {
-			ret = sprintf(dst, "bsr%s ADR_%X", decoded->variant == VAR_BYTE ? ".s" : "",
-			decoded->address + 2 + decoded->src.params.immed);
+			ret = sprintf(dst, "bsr%s ", decoded->variant == VAR_BYTE ? ".s" : "");
+			ret += label_fun(dst+ret, decoded->address + 2 + decoded->src.params.immed, data);
 		} else {
 			ret = sprintf(dst, "bsr%s #%d <%X>", decoded->variant == VAR_BYTE ? ".s" : "", decoded->src.params.immed, decoded->address + 2 + decoded->src.params.immed);
 		}
@@ -1586,7 +2481,7 @@
 	case M68K_MOVE_FROM_SR:
 		ret = sprintf(dst, "%s", mnemonics[decoded->op]);
 		ret += sprintf(dst + ret, " SR");
-		ret += m68k_disasm_op(&(decoded->dst), dst + ret, 1, labels, decoded->address);
+		ret += m68k_disasm_op(&(decoded->dst), dst + ret, 1, labels, decoded->address, label_fun, data);
 		return ret;
 	case M68K_ANDI_SR:
 	case M68K_EORI_SR:
@@ -1598,19 +2493,34 @@
 	case M68K_MOVE_CCR:
 	case M68K_ORI_CCR:
 		ret = sprintf(dst, "%s", mnemonics[decoded->op]);
-		ret += m68k_disasm_op(&(decoded->src), dst + ret, 0, labels, decoded->address);
+		ret += m68k_disasm_op(&(decoded->src), dst + ret, 0, labels, decoded->address, label_fun, data);
 		ret += sprintf(dst + ret, ", %s", special_op);
 		return ret;
 	case M68K_MOVE_USP:
 		ret = sprintf(dst, "%s", mnemonics[decoded->op]);
 		if (decoded->src.addr_mode != MODE_UNUSED) {
-			ret += m68k_disasm_op(&(decoded->src), dst + ret, 0, labels, decoded->address);
+			ret += m68k_disasm_op(&(decoded->src), dst + ret, 0, labels, decoded->address, label_fun, data);
 			ret += sprintf(dst + ret, ", USP");
 		} else {
 			ret += sprintf(dst + ret, "USP, ");
-			ret += m68k_disasm_op(&(decoded->dst), dst + ret, 0, labels, decoded->address);
+			ret += m68k_disasm_op(&(decoded->dst), dst + ret, 0, labels, decoded->address, label_fun, data);
 		}
 		return ret;
+	case M68K_INVALID:
+		ret = sprintf(dst, "dc.w $%X", decoded->src.params.immed);
+		return ret;
+#ifdef M68010
+	case M68K_MOVEC:
+		ret = sprintf(dst, "%s ", mnemonics[decoded->op]);
+		if (decoded->src.addr_mode == MODE_UNUSED) {
+			ret += sprintf(dst + ret, "%s, ", cr_mnem[decoded->src.params.immed]);
+			ret += m68k_disasm_op(&(decoded->dst), dst + ret, 0, labels, decoded->address, label_fun, data);
+		} else {
+			ret += m68k_disasm_op(&(decoded->src), dst + ret, 0, labels, decoded->address, label_fun, data);
+			ret += sprintf(dst + ret, ", %s", cr_mnem[decoded->dst.params.immed]);
+		}
+		return ret;
+#endif
 	default:
 		size = decoded->extra.size;
 		ret = sprintf(dst, "%s%s%s",
@@ -1619,23 +2529,27 @@
 				size == OPSIZE_BYTE ? ".b" : (size == OPSIZE_WORD ? ".w" : (size == OPSIZE_LONG ? ".l" : "")));
 	}
 	if (decoded->op == M68K_MOVEM) {
-		op1len = m68k_disasm_movem_op(&(decoded->src), &(decoded->dst), dst + ret, 0, labels, decoded->address);
+		op1len = m68k_disasm_movem_op(&(decoded->src), &(decoded->dst), dst + ret, 0, labels, decoded->address, label_fun, data);
 		ret += op1len;
-		ret += m68k_disasm_movem_op(&(decoded->dst), &(decoded->src), dst + ret, op1len, labels, decoded->address);
+		ret += m68k_disasm_movem_op(&(decoded->dst), &(decoded->src), dst + ret, op1len, labels, decoded->address, label_fun, data);
 	} else {
-		op1len = m68k_disasm_op(&(decoded->src), dst + ret, 0, labels, decoded->address);
+		op1len = m68k_disasm_op(&(decoded->src), dst + ret, 0, labels, decoded->address, label_fun, data);
 		ret += op1len;
-		ret += m68k_disasm_op(&(decoded->dst), dst + ret, op1len, labels, decoded->address);
+		ret += m68k_disasm_op(&(decoded->dst), dst + ret, op1len, labels, decoded->address, label_fun, data);
 	}
 	return ret;
 }
 
 int m68k_disasm(m68kinst * decoded, char * dst)
 {
-	return m68k_disasm_ex(decoded, dst, 0);
+	return m68k_disasm_ex(decoded, dst, 0, NULL, NULL);
 }
 
-int m68k_disasm_labels(m68kinst * decoded, char * dst)
+int m68k_disasm_labels(m68kinst * decoded, char * dst, format_label_fun label_fun, void * data)
 {
-	return m68k_disasm_ex(decoded, dst, 1);
+	if (!label_fun)
+	{
+		label_fun = m68k_default_label_fun;
+	}
+	return m68k_disasm_ex(decoded, dst, 1, label_fun, data);
 }
--- a/68kinst.h	Thu May 28 21:09:33 2015 -0700
+++ b/68kinst.h	Thu May 28 21:19:55 2015 -0700
@@ -8,6 +8,13 @@
 
 #include <stdint.h>
 
+#ifdef M68030
+#define M68020
+#endif
+#ifdef M68020
+#define M68010
+#endif
+
 typedef enum {
 	BIT_MOVEP_IMMED = 0,
 	MOVE_BYTE,
@@ -97,7 +104,43 @@
 	M68K_TRAPV,
 	M68K_TST,
 	M68K_UNLK,
-	M68K_INVALID
+	M68K_INVALID,
+#ifdef M68010
+	M68K_BKPT,
+	M68K_MOVE_FROM_CCR,
+	M68K_MOVEC,
+	M68K_MOVES,
+	M68K_RTD,
+#endif
+#ifdef M68020
+	M68K_BFCHG,
+	M68K_BFCLR,
+	M68K_BFEXTS,
+	M68K_BFEXTU,
+	M68K_BFFFO,
+	M68K_BFINS,
+	M68K_BFSET,
+	M68K_BFTST,
+	M68K_CALLM,
+	M68K_CAS,
+	M68K_CAS2,
+	M68K_CHK2,
+	M68K_CMP2,
+	M68K_CP_BCC,
+	M68K_CP_DBCC,
+	M68K_CP_GEN,
+	M68K_CP_RESTORE,
+	M68K_CP_SAVE,
+	M68K_CP_SCC,
+	M68K_CP_TRAPCC,
+	M68K_DIVSL,
+	M68K_DIVUL,
+	M68K_EXTB,
+	M68K_PACK,
+	M68K_RTM,
+	M68K_TRAPCC,
+	M68K_UNPK,
+#endif
 } m68K_op;
 
 typedef enum {
@@ -130,19 +173,40 @@
 //expanded values
 	MODE_AREG_INDEX_DISP8,
 #ifdef M68020
-	MODE_AREG_INDEX_DISP32,
+	MODE_AREG_INDEX_BASE_DISP,
+	MODE_AREG_PREINDEX,
+	MODE_AREG_POSTINDEX,
+	MODE_AREG_MEM_INDIRECT,
+	MODE_AREG_BASE_DISP,
+	MODE_INDEX_BASE_DISP,
+	MODE_PREINDEX,
+	MODE_POSTINDEX,
+	MODE_MEM_INDIRECT,
+	MODE_BASE_DISP,
 #endif
 	MODE_ABSOLUTE_SHORT,
 	MODE_ABSOLUTE,
 	MODE_PC_DISPLACE,
 	MODE_PC_INDEX_DISP8,
 #ifdef M68020
-	MODE_PC_INDEX_DISP32,
+	MODE_PC_INDEX_BASE_DISP,
+	MODE_PC_PREINDEX,
+	MODE_PC_POSTINDEX,
+	MODE_PC_MEM_INDIRECT,
+	MODE_PC_BASE_DISP,
+	MODE_ZPC_INDEX_BASE_DISP,
+	MODE_ZPC_PREINDEX,
+	MODE_ZPC_POSTINDEX,
+	MODE_ZPC_MEM_INDIRECT,
+	MODE_ZPC_BASE_DISP,
 #endif
 	MODE_IMMEDIATE,
 	MODE_IMMEDIATE_WORD,//used to indicate an immediate operand that only uses a single extension word even for a long operation
 	MODE_UNUSED
 } m68k_addr_modes;
+#ifdef M68020
+#define M68K_FLAG_BITFIELD 0x80
+#endif
 
 typedef enum {
 	COND_TRUE,
@@ -163,13 +227,49 @@
 	COND_LESS_EQ
 } m68K_condition;
 
+#ifdef M68010
+typedef enum {
+	CR_SFC,
+	CR_DFC,
+#ifdef M68020
+	CR_CACR,
+#endif
+	CR_USP,
+	CR_VBR,
+#ifdef M68020
+	CR_CAAR,
+	CR_MSP,
+	CR_ISP
+#endif
+} m68k_control_reg;
+
+#ifdef M68020
+#define MAX_HIGH_CR 0x804
+#define MAX_LOW_CR 0x002
+#else
+#define MAX_HIGH_CR 0x801
+#define MAX_LOW_CR 0x001
+#endif
+
+#endif
+
 typedef struct {
-	uint8_t addr_mode;
+#ifdef M68020
+	uint16_t bitfield;
+#endif
+	uint8_t  addr_mode;
 	union {
 		struct {
 			uint8_t pri;
 			uint8_t sec;
+#ifdef M68020
+			uint8_t scale;
+			uint8_t disp_sizes;
+#endif
 			int32_t displacement;
+#ifdef M68020
+			int32_t outer_disp;
+#endif
 		} regs;
 		uint32_t immed;
 	} params;
@@ -229,12 +329,15 @@
 	VECTOR_TRAP_15
 } m68k_vector;
 
+typedef int (*format_label_fun)(char * dst, uint32_t address, void * data);
+
 uint16_t * m68k_decode(uint16_t * istream, m68kinst * dst, uint32_t address);
 uint32_t m68k_branch_target(m68kinst * inst, uint32_t *dregs, uint32_t *aregs);
 uint8_t m68k_is_branch(m68kinst * inst);
 uint8_t m68k_is_noncall_branch(m68kinst * inst);
 int m68k_disasm(m68kinst * decoded, char * dst);
-int m68k_disasm_labels(m68kinst * decoded, char * dst);
+int m68k_disasm_labels(m68kinst * decoded, char * dst, format_label_fun label_fun, void * data);
+int m68k_default_label_fun(char * dst, uint32_t address, void * data);
 
 #endif
 
--- a/Makefile	Thu May 28 21:09:33 2015 -0700
+++ b/Makefile	Thu May 28 21:19:55 2015 -0700
@@ -1,36 +1,40 @@
+ifndef OS
+OS:=$(shell uname -s)
+endif
 
-ifdef WINDOWS
+ifeq ($(OS),Windows)
+CC:=wine gcc.exe
 
 MEM:=mem_win.o
 BLASTEM:=blastem.exe
-RUNTIME32:=runtime_win.S
 
 CC:=wine gcc.exe
-CFLAGS:=-O2 -std=gnu99 -Wreturn-type -Werror=return-type -Werror=implicit-function-declaration -I"C:/MinGW/usr/include/SDL" -DGLEW_STATIC
-LDFLAGS:= -L"C:/MinGW/usr/lib" -lm -lmingw32 -lSDLmain -lSDL
-ifndef NOGL
-LDFLAGS+= -lopengl32 -lglu32
-endif
-LDFLAGS+= -mwindows
+CFLAGS:=-O2 -std=gnu99 -Wreturn-type -Werror=return-type -Werror=
+LDFLAGS:= -L"C:/MinGW/usr/lib" -lm -lmingw32 -lSDLmain -lSDL -mwindows
 CPU:=i686
 
 else
 
 MEM:=mem.o
 BLASTEM:=blastem
-RUNTIME32:=runtime_32.S
 
-ifdef NOGL
-LIBS=sdl
+ifeq ($(OS),Darwin)
+LIBS=sdl2 glew
 else
-LIBS=sdl glew gl
-endif
-ifdef DEBUG
+LIBS=sdl2 glew gl
+endif #Darwin
+
+ifdef DEBUGW
 CFLAGS:=-ggdb -std=gnu99 $(shell pkg-config --cflags-only-I $(LIBS)) -Wreturn-type -Werror=return-type -Werror=implicit-function-declaration
 LDFLAGS:=-ggdb -lm $(shell pkg-config --libs $(LIBS))
 else
 CFLAGS:=-O2 -flto -std=gnu99 $(shell pkg-config  --cflags-only-I $(LIBS)) -Wreturn-type -Werror=return-type -Werror=implicit-function-declaration
 LDFLAGS:=-O2 -flto -lm $(shell pkg-config --libs $(LIBS))
+endif #DEBUG
+endif #Windows
+
+ifdef Z80_LOG_ADDRESS
+CFLAGS+= -DZ80_LOG_ADDRESS
 endif
 
 ifdef PROFILE
@@ -41,36 +45,49 @@
 CFLAGS+= -DDISABLE_OPENGL
 endif
 
+ifdef M68030
+CFLAGS+= -DM68030
+endif
+ifdef M68020
+CFLAGS+= -DM68020
+endif
+ifdef M68010
+CFLAGS+= -DM68010
+endif
+
 ifndef CPU
 CPU:=$(shell uname -m)
 endif
+
+ifeq ($(OS),Darwin)
+LDFLAGS+= -framework OpenGL
+endif
+
+TRANSOBJS=gen.o backend.o mem.o
+M68KOBJS=68kinst.o m68k_core.o
+ifeq ($(CPU),x86_64)
+M68KOBJS+= m68k_core_x86.o
+TRANSOBJS+= gen_x86.o backend_x86.o
+else
+ifeq ($(CPU),i686)
+M68KOBJS+= m68k_core_x86.o
+TRANSOBJS+= gen_x86.o backend_x86.o
+endif
 endif
 
-
-TRANSOBJS=gen.o backend.o $(MEM)
-M68KOBJS=68kinst.o m68k_core.o
-ifeq ($(CPU),x86_64)
-M68KOBJS+= runtime.o m68k_core_x86.o
-TRANSOBJS+= gen_x86.o backend_x86.o
-else
-ifeq ($(CPU),i686)
-M68KOBJS+= $(RUNTIME32) m68k_core_x86.o
-TRANSOBJS+= gen_x86.o backend_x86.o
-NOZ80:=1
-endif
-endif
-
-Z80OBJS=z80inst.o z80_to_x86.o zruntime.o
+Z80OBJS=z80inst.o z80_to_x86.o
 AUDIOOBJS=ym2612.o psg.o wave.o
 CONFIGOBJS=config.o tern.o util.o
 
 MAINOBJS=blastem.o debug.o gdb_remote.o vdp.o render_sdl.o io.o $(CONFIGOBJS) gst.o $(M68KOBJS) $(TRANSOBJS) $(AUDIOOBJS)
 
 ifeq ($(CPU),x86_64)
-CFLAGS+=-DX86_64
+CFLAGS+=-DX86_64 -m64
+LDFLAGS+=-m64
 else
 ifeq ($(CPU),i686)
-CFLAGS+=-DX86_32
+CFLAGS+=-DX86_32 -m32
+LDFLAGS+=-m32
 endif
 endif
 
@@ -81,18 +98,16 @@
 endif
 
 ifdef WINDOWS
-ifndef NOGL
 MAINOBJS+= glew32s.lib
 endif
-endif
 
 all : dis zdis stateview vgmplay blastem
 
 $(BLASTEM) : $(MAINOBJS)
 	$(CC) -o $(BLASTEM) $(MAINOBJS) $(LDFLAGS)
 
-dis : dis.o 68kinst.o
-	$(CC) -o dis dis.o 68kinst.o
+dis : dis.o 68kinst.o tern.o vos_program_module.o
+	$(CC) -o dis dis.o 68kinst.o tern.o vos_program_module.o
 
 zdis : zdis.o z80inst.o
 	$(CC) -o zdis zdis.o z80inst.o
@@ -117,6 +132,9 @@
 
 vgmplay : vgmplay.o render_sdl.o $(CONFIGOBJS) $(AUDIOOBJS)
 	$(CC) -o vgmplay vgmplay.o render_sdl.o $(CONFIGOBJS) $(AUDIOOBJS) $(LDFLAGS)
+	
+test : test.o vdp.o
+	$(CC) -o test test.o vdp.o
 
 testgst : testgst.o gst.o
 	$(CC) -o testgst testgst.o gst.o
@@ -133,6 +151,9 @@
 offsets : offsets.c z80_to_x86.h m68k_core.h
 	$(CC) -o offsets offsets.c
 
+vos_prog_info : vos_prog_info.o vos_program_module.o
+	$(CC) -o vos_prog_info vos_prog_info.o vos_program_module.o
+
 %.o : %.S
 	$(CC) -c -o $@ $<
 
@@ -140,7 +161,7 @@
 	$(CC) $(CFLAGS) -c -o $@ $<
 
 %.bin : %.s68
-	vasmm68k_mot -Fbin -m68000 -no-opt -spaces -o $@ $<
+	vasmm68k_mot -Fbin -m68000 -no-opt -spaces -o $@ -L $@.list $<
 
 %.bin : %.sz8
 	vasmz80_mot -Fbin -spaces -o $@ $<
--- a/backend.c	Thu May 28 21:09:33 2015 -0700
+++ b/backend.c	Thu May 28 21:19:55 2015 -0700
@@ -51,3 +51,49 @@
 	}
 }
 
+void * get_native_pointer(uint32_t address, void ** mem_pointers, cpu_options * opts)
+{
+	memmap_chunk const * memmap = opts->memmap;
+	address &= opts->address_mask;
+	for (uint32_t chunk = 0; chunk < opts->memmap_chunks; chunk++)
+	{
+		if (address >= memmap[chunk].start && address < memmap[chunk].end) {
+			if (!(memmap[chunk].flags & MMAP_READ)) {
+				return NULL;
+			}
+			uint8_t * base = memmap[chunk].flags & MMAP_PTR_IDX
+				? mem_pointers[memmap[chunk].ptr_index]
+				: memmap[chunk].buffer;
+			if (!base) {
+				return NULL;
+			}
+			return base + (address & memmap[chunk].mask);
+		}
+	}
+	return NULL;
+}
+
+uint32_t chunk_size(cpu_options *opts, memmap_chunk const *chunk)
+{
+	if (chunk->mask == opts->address_mask) {
+		return chunk->end - chunk->start;
+	} else {
+		return chunk->mask + 1;
+	}
+}
+
+uint32_t ram_size(cpu_options *opts)
+{
+	uint32_t size = 0;
+	for (int i = 0; i < opts->memmap_chunks; i++)
+	{
+		if ((opts->memmap[i].flags & (MMAP_WRITE | MMAP_CODE)) == (MMAP_WRITE | MMAP_CODE)) {
+			if (opts->memmap[i].mask == opts->address_mask) {
+				size += opts->memmap[i].end - opts->memmap[i].start;
+			} else {
+				size += opts->memmap[i].mask + 1;
+			}
+		}
+	}
+	return size;
+}
--- a/backend.h	Thu May 28 21:09:33 2015 -0700
+++ b/backend.h	Thu May 28 21:19:55 2015 -0700
@@ -12,6 +12,7 @@
 
 #define INVALID_OFFSET 0xFFFFFFFF
 #define EXTENSION_WORD 0xFFFFFFFE
+#define CYCLE_NEVER 0xFFFFFFFF
 
 #if defined(X86_32) || defined(X86_64)
 typedef struct {
@@ -46,32 +47,6 @@
 	WRITE_8
 } ftype;
 
-typedef struct {
-	uint32_t flags;
-	native_map_slot *native_code_map;
-	deferred_addr   *deferred;
-	code_info       code;
-	uint8_t         **ram_inst_sizes;
-	code_ptr        save_context;
-	code_ptr        load_context;
-	code_ptr        handle_cycle_limit;
-	code_ptr        handle_cycle_limit_int;
-	code_ptr        handle_code_write;
-	uint32_t        address_mask;
-	uint32_t        max_address;
-	uint32_t        bus_cycles;
-	int32_t         mem_ptr_off;
-	int32_t         ram_flags_off;
-	uint8_t         address_size;
-	uint8_t         byte_swap;
-	uint8_t         context_reg;
-	uint8_t         cycles;
-	uint8_t         limit;
-	uint8_t			scratch1;
-	uint8_t			scratch2;
-} cpu_options;
-
-
 #define MMAP_READ      0x01
 #define MMAP_WRITE     0x02
 #define MMAP_CODE      0x04
@@ -79,6 +54,7 @@
 #define MMAP_ONLY_ODD  0x10
 #define MMAP_ONLY_EVEN 0x20
 #define MMAP_FUNC_NULL 0x40
+#define MMAP_BYTESWAP  0x80
 
 typedef uint16_t (*read_16_fun)(uint32_t address, void * context);
 typedef uint8_t (*read_8_fun)(uint32_t address, void * context);
@@ -98,6 +74,35 @@
 	write_8_fun  write_8;
 } memmap_chunk;
 
+typedef struct {
+	uint32_t flags;
+	native_map_slot    *native_code_map;
+	deferred_addr      *deferred;
+	code_info          code;
+	uint8_t            **ram_inst_sizes;
+	memmap_chunk const *memmap;
+	code_ptr           save_context;
+	code_ptr           load_context;
+	code_ptr           handle_cycle_limit;
+	code_ptr           handle_cycle_limit_int;
+	code_ptr           handle_code_write;
+	uint32_t           memmap_chunks;
+	uint32_t           address_mask;
+	uint32_t           max_address;
+	uint32_t           bus_cycles;
+	uint32_t           clock_divider;
+	int32_t            mem_ptr_off;
+	int32_t            ram_flags_off;
+	uint8_t            ram_flags_shift;
+	uint8_t            address_size;
+	uint8_t            byte_swap;
+	uint8_t            context_reg;
+	uint8_t            cycles;
+	uint8_t            limit;
+	uint8_t			   scratch1;
+	uint8_t			   scratch2;
+} cpu_options;
+
 typedef uint8_t * (*native_addr_func)(void * context, uint32_t address);
 
 deferred_addr * defer_address(deferred_addr * old_head, uint32_t address, uint8_t *dest);
@@ -107,8 +112,13 @@
 void cycles(cpu_options *opts, uint32_t num);
 void check_cycles_int(cpu_options *opts, uint32_t address);
 void check_cycles(cpu_options * opts);
+void check_code_prologue(code_info *code);
+void log_address(cpu_options *opts, uint32_t address, char * format);
 
-code_ptr gen_mem_fun(cpu_options * opts, memmap_chunk * memmap, uint32_t num_chunks, ftype fun_type);
+code_ptr gen_mem_fun(cpu_options * opts, memmap_chunk const * memmap, uint32_t num_chunks, ftype fun_type, code_ptr *after_inc);
+void * get_native_pointer(uint32_t address, void ** mem_pointers, cpu_options * opts);
+uint32_t chunk_size(cpu_options *opts, memmap_chunk const *chunk);
+uint32_t ram_size(cpu_options *opts);
 
 #endif //BACKEND_H_
 
--- a/backend_x86.c	Thu May 28 21:09:33 2015 -0700
+++ b/backend_x86.c	Thu May 28 21:19:55 2015 -0700
@@ -3,7 +3,7 @@
 
 void cycles(cpu_options *opts, uint32_t num)
 {
-	add_ir(&opts->code, num, opts->cycles, SZ_D);
+	add_ir(&opts->code, num*opts->clock_divider, opts->cycles, SZ_D);
 }
 
 void check_cycles_int(cpu_options *opts, uint32_t address)
@@ -11,7 +11,7 @@
 	code_info *code = &opts->code;
 	cmp_rr(code, opts->cycles, opts->limit, SZ_D);
 	code_ptr jmp_off = code->cur+1;
-	jcc(code, CC_NC, jmp_off+1);
+	jcc(code, CC_A, jmp_off+1);
 	mov_ir(code, address, opts->scratch1, SZ_D);
 	call(code, opts->handle_cycle_limit_int);
 	*jmp_off = code->cur - (jmp_off+1);
@@ -23,17 +23,38 @@
 	cmp_rr(code, opts->cycles, opts->limit, SZ_D);
 	check_alloc_code(code, MAX_INST_LEN*2);
 	code_ptr jmp_off = code->cur+1;
-	jcc(code, CC_NC, jmp_off+1);
+	jcc(code, CC_A, jmp_off+1);
 	call(code, opts->handle_cycle_limit);
 	*jmp_off = code->cur - (jmp_off+1);
 }
 
-code_ptr gen_mem_fun(cpu_options * opts, memmap_chunk * memmap, uint32_t num_chunks, ftype fun_type)
+void log_address(cpu_options *opts, uint32_t address, char * format)
+{
+	code_info *code = &opts->code;
+	call(code, opts->save_context);
+	push_r(code, opts->context_reg);
+	mov_rr(code, opts->cycles, RDX, SZ_D);
+	mov_ir(code, (int64_t)format, RDI, SZ_PTR);
+	mov_ir(code, address, RSI, SZ_D);
+	call_args_abi(code, (code_ptr)printf, 3, RDI, RSI, RDX);
+	pop_r(code, opts->context_reg);
+	call(code, opts->load_context);
+}
+
+void check_code_prologue(code_info *code)
+{
+	check_alloc_code(code, MAX_INST_LEN*4);
+}
+
+code_ptr gen_mem_fun(cpu_options * opts, memmap_chunk const * memmap, uint32_t num_chunks, ftype fun_type, code_ptr *after_inc)
 {
 	code_info *code = &opts->code;
 	code_ptr start = code->cur;
 	check_cycles(opts);
 	cycles(opts, opts->bus_cycles);
+	if (after_inc) {
+		*after_inc = code->cur;
+	}
 	if (opts->address_size == SZ_D && opts->address_mask < 0xFFFFFFFF) {
 		and_ir(code, opts->address_mask, opts->scratch1, SZ_D);
 	}
@@ -42,6 +63,7 @@
 	uint8_t adr_reg = is_write ? opts->scratch2 : opts->scratch1;
 	uint16_t access_flag = is_write ? MMAP_WRITE : MMAP_READ;
 	uint8_t size =  (fun_type == READ_16 || fun_type == WRITE_16) ? SZ_W : SZ_B;
+	uint32_t ram_flags_off = opts->ram_flags_off;
 	for (uint32_t chunk = 0; chunk < num_chunks; chunk++)
 	{
 		if (memmap[chunk].start > 0) {
@@ -76,48 +98,19 @@
 		default:
 			cfun = NULL;
 		}
-		if(memmap[chunk].buffer && memmap[chunk].flags & access_flag) {
+		if(memmap[chunk].flags & access_flag) {
 			if (memmap[chunk].flags & MMAP_PTR_IDX) {
 				if (memmap[chunk].flags & MMAP_FUNC_NULL) {
 					cmp_irdisp(code, 0, opts->context_reg, opts->mem_ptr_off + sizeof(void*) * memmap[chunk].ptr_index, SZ_PTR);
 					code_ptr not_null = code->cur + 1;
 					jcc(code, CC_NZ, code->cur + 2);
 					call(code, opts->save_context);
-#ifdef X86_64
 					if (is_write) {
-						if (opts->scratch2 != RDI) {
-							mov_rr(code, opts->scratch2, RDI, opts->address_size);
-						}
-						mov_rr(code, opts->scratch1, RDX, size);
+						call_args_abi(code, cfun, 3, opts->scratch2, opts->context_reg, opts->scratch1);
+						mov_rr(code, RAX, opts->context_reg, SZ_PTR);
 					} else {
 						push_r(code, opts->context_reg);
-						mov_rr(code, opts->scratch1, RDI, opts->address_size);
-					}
-					test_ir(code, 8, RSP, opts->address_size);
-					code_ptr adjust_rsp = code->cur + 1;
-					jcc(code, CC_NZ, code->cur + 2);
-					call(code, cfun);
-					code_ptr no_adjust = code->cur + 1;
-					jmp(code, code->cur + 2);
-					*adjust_rsp = code->cur - (adjust_rsp + 1);
-					sub_ir(code, 8, RSP, SZ_PTR);
-					call(code, cfun);
-					add_ir(code, 8, RSP, SZ_PTR);
-					*no_adjust = code->cur - (no_adjust + 1);
-#else
-					if (is_write) {
-						push_r(code, opts->scratch1);
-					} else {
-						push_r(code, opts->context_reg);//save opts->context_reg for later
-					}
-					push_r(code, opts->context_reg);
-					push_r(code, is_write ? opts->scratch2 : opts->scratch1);
-					call(code, cfun);
-					add_ir(code, is_write ? 12 : 8, RSP, opts->address_size);
-#endif
-					if (is_write) {
-						mov_rr(code, RAX, opts->context_reg, SZ_PTR);
-					} else {
+						call_args_abi(code, cfun, 2, opts->scratch1, opts->context_reg);
 						pop_r(code, opts->context_reg);
 						mov_rr(code, RAX, opts->scratch1, size);
 					}
@@ -125,7 +118,7 @@
 
 					*not_null = code->cur - (not_null + 1);
 				}
-				if (opts->byte_swap && size == SZ_B) {
+				if ((opts->byte_swap || memmap[chunk].flags & MMAP_BYTESWAP) && size == SZ_B) {
 					xor_ir(code, 1, adr_reg, opts->address_size);
 				}
 				if (opts->address_size != SZ_D) {
@@ -151,7 +144,7 @@
 						retn(code);
 						*good_addr = code->cur - (good_addr + 1);
 						shr_ir(code, 1, adr_reg, opts->address_size);
-					} else {
+					} else if (opts->byte_swap || memmap[chunk].flags & MMAP_BYTESWAP) {
 						xor_ir(code, 1, adr_reg, opts->address_size);
 					}
 				} else if ((memmap[chunk].flags & MMAP_ONLY_ODD) || (memmap[chunk].flags & MMAP_ONLY_EVEN)) {
@@ -161,6 +154,9 @@
 						shr_ir(code, 8, opts->scratch1, SZ_W);
 					}
 				}
+				if (opts->address_size != SZ_D) {
+					movzx_rr(code, adr_reg, adr_reg, opts->address_size, SZ_D);
+				}
 				if ((intptr_t)memmap[chunk].buffer <= 0x7FFFFFFF && (intptr_t)memmap[chunk].buffer >= -2147483648) {
 					if (is_write) {
 						mov_rrdisp(code, opts->scratch1, opts->scratch2, (intptr_t)memmap[chunk].buffer, tmp_size);
@@ -189,21 +185,19 @@
 				}
 			}
 			if (is_write && (memmap[chunk].flags & MMAP_CODE)) {
-				//TODO: Fixme for Z80
 				mov_rr(code, opts->scratch2, opts->scratch1, opts->address_size);
-				shr_ir(code, 11, opts->scratch1, opts->address_size);
-				bt_rrdisp(code, opts->scratch1, opts->context_reg, opts->ram_flags_off, opts->address_size);
+				shr_ir(code, opts->ram_flags_shift, opts->scratch1, opts->address_size);
+				bt_rrdisp(code, opts->scratch1, opts->context_reg, ram_flags_off, opts->address_size);
+				//FIXME: These adjustments to ram_flags_off need to take into account bits vs bytes and ram_flags_shift
+				if (memmap[chunk].mask == opts->address_mask) {
+					ram_flags_off += memmap[chunk].end - memmap[chunk].start;
+				} else {
+					ram_flags_off += memmap[chunk].mask + 1;
+				}
 				code_ptr not_code = code->cur + 1;
 				jcc(code, CC_NC, code->cur + 2);
 				call(code, opts->save_context);
-#ifdef X86_32
-				push_r(code, opts->context_reg);
-				push_r(code, opts->scratch2);
-#endif
-				call(code, opts->handle_code_write);
-#ifdef X86_32
-				add_ir(code, 8, RSP, SZ_D);
-#endif
+				call_args(code, opts->handle_code_write, 2, opts->scratch2, opts->context_reg);
 				mov_rr(code, RAX, opts->context_reg, SZ_PTR);
 				call(code, opts->load_context);
 				*not_code = code->cur - (not_code+1);
@@ -211,41 +205,12 @@
 			retn(code);
 		} else if (cfun) {
 			call(code, opts->save_context);
-#ifdef X86_64
 			if (is_write) {
-				if (opts->scratch2 != RDI) {
-					mov_rr(code, opts->scratch2, RDI, opts->address_size);
-				}
-				mov_rr(code, opts->scratch1, RDX, size);
+				call_args_abi(code, cfun, 3, opts->scratch2, opts->context_reg, opts->scratch1);
+				mov_rr(code, RAX, opts->context_reg, SZ_PTR);
 			} else {
 				push_r(code, opts->context_reg);
-				mov_rr(code, opts->scratch1, RDI, opts->address_size);
-			}
-			test_ir(code, 8, RSP, SZ_D);
-			code_ptr adjust_rsp = code->cur + 1;
-			jcc(code, CC_NZ, code->cur + 2);
-			call(code, cfun);
-			code_ptr no_adjust = code->cur + 1;
-			jmp(code, code->cur + 2);
-			*adjust_rsp = code->cur - (adjust_rsp + 1);
-			sub_ir(code, 8, RSP, SZ_PTR);
-			call(code, cfun);
-			add_ir(code, 8, RSP, SZ_PTR);
-			*no_adjust = code->cur - (no_adjust+1);
-#else
-			if (is_write) {
-				push_r(code, opts->scratch1);
-			} else {
-				push_r(code, opts->context_reg);//save opts->context_reg for later
-			}
-			push_r(code, opts->context_reg);
-			push_r(code, is_write ? opts->scratch2 : opts->scratch1);
-			call(code, cfun);
-			add_ir(code, is_write ? 12 : 8, RSP, SZ_D);
-#endif
-			if (is_write) {
-				mov_rr(code, RAX, opts->context_reg, SZ_PTR);
-			} else {
+				call_args_abi(code, cfun, 2, opts->scratch1, opts->context_reg);
 				pop_r(code, opts->context_reg);
 				mov_rr(code, RAX, opts->scratch1, size);
 			}
--- a/blastem.c	Thu May 28 21:09:33 2015 -0700
+++ b/blastem.c	Thu May 28 21:19:55 2015 -0700
@@ -33,8 +33,6 @@
 
 #define MAX_SOUND_CYCLES 100000
 
-uint32_t mclks_per_frame = MCLKS_LINE*LINES_NTSC;
-
 uint16_t cart[CARTRIDGE_WORDS];
 uint16_t ram[RAM_WORDS];
 uint8_t z80_ram[Z80_RAM_BYTES];
@@ -125,26 +123,23 @@
 	return 0;
 }
 
-//TODO: Make these dependent on the video mode
-//#define VINT_CYCLE ((MCLKS_LINE * 225 + (148 + 40) * 4)/MCLKS_PER_68K)
-#define ZVINT_CYCLE ((MCLKS_LINE * 225 + (148 + 40) * 4)/MCLKS_PER_Z80)
-//#define VINT_CYCLE ((MCLKS_LINE * 226)/MCLKS_PER_68K)
-//#define ZVINT_CYCLE ((MCLKS_LINE * 226)/MCLKS_PER_Z80)
-
 void adjust_int_cycle(m68k_context * context, vdp_context * v_context)
 {
+	//static int old_int_cycle = CYCLE_NEVER;
+	genesis_context *gen = context->system;
+	if (context->sync_cycle - context->current_cycle > gen->max_cycles) {
+		context->sync_cycle = context->current_cycle + gen->max_cycles;
+	}
 	context->int_cycle = CYCLE_NEVER;
 	if ((context->status & 0x7) < 6) {
 		uint32_t next_vint = vdp_next_vint(v_context);
 		if (next_vint != CYCLE_NEVER) {
-			next_vint /= MCLKS_PER_68K;
 			context->int_cycle = next_vint;
 			context->int_num = 6;
 		}
 		if ((context->status & 0x7) < 4) {
 			uint32_t next_hint = vdp_next_hint(v_context);
 			if (next_hint != CYCLE_NEVER) {
-				next_hint /= MCLKS_PER_68K;
 				if (next_hint < context->int_cycle) {
 					context->int_cycle = next_hint;
 					context->int_num = 4;
@@ -153,6 +148,10 @@
 			}
 		}
 	}
+	/*if (context->int_cycle != old_int_cycle) {
+		printf("int cycle changed to: %d, level: %d @ %d(%d), frame: %d, vcounter: %d, hslot: %d, mask: %d, hint_counter: %d\n", context->int_cycle, context->int_num, v_context->cycles, context->current_cycle, v_context->frame, v_context->vcounter, v_context->hslot, context->status & 0x7, v_context->hint_counter);
+		old_int_cycle = context->int_cycle;
+	}*/
 
 	context->target_cycle = context->int_cycle < context->sync_cycle ? context->int_cycle : context->sync_cycle;
 	/*printf("Cyc: %d, Trgt: %d, Int Cyc: %d, Int: %d, Mask: %X, V: %d, H: %d, HICount: %d, HReg: %d, Line: %d\n",
@@ -163,12 +162,6 @@
 int break_on_sync = 0;
 int save_state = 0;
 
-uint8_t reset = 1;
-uint8_t need_reset = 0;
-uint8_t busreq = 0;
-uint8_t busack = 0;
-uint32_t busack_cycle = CYCLE_NEVER;
-uint8_t new_busack = 0;
 //#define DO_DEBUG_PRINT
 #ifdef DO_DEBUG_PRINT
 #define dprintf printf
@@ -180,32 +173,22 @@
 
 #define Z80_VINT_DURATION 128
 
+void z80_next_int_pulse(z80_context * z_context)
+{
+		genesis_context * gen = z_context->system;
+	z_context->int_pulse_start = vdp_next_vint_z80(gen->vdp);
+	z_context->int_pulse_end = z_context->int_pulse_start + Z80_VINT_DURATION * MCLKS_PER_Z80;
+			}
+
 void sync_z80(z80_context * z_context, uint32_t mclks)
 {
 #ifndef NO_Z80
-	if (z80_enabled && !reset && !busreq) {
-		genesis_context * gen = z_context->system;
-		z_context->sync_cycle = mclks / MCLKS_PER_Z80;
-		if (z_context->current_cycle < z_context->sync_cycle) {
-			if (need_reset) {
-				z80_reset(z_context);
-				need_reset = 0;
-			}
-			uint32_t vint_cycle = vdp_next_vint_z80(gen->vdp) / MCLKS_PER_Z80;
-			while (z_context->current_cycle < z_context->sync_cycle) {
-				if (z_context->iff1 && z_context->current_cycle < (vint_cycle + Z80_VINT_DURATION)) {
-					z_context->int_cycle = vint_cycle < z_context->int_enable_cycle ? z_context->int_enable_cycle : vint_cycle;
-				}
-				z_context->target_cycle = z_context->sync_cycle < z_context->int_cycle ? z_context->sync_cycle : z_context->int_cycle;
-				dprintf("Running Z80 from cycle %d to cycle %d. Native PC: %p\n", z_context->current_cycle, z_context->sync_cycle, z_context->native_pc);
-				z80_run(z_context);
-				dprintf("Z80 ran to cycle %d\n", z_context->current_cycle);
-			}
-		}
+	if (z80_enabled) {
+		z80_run(z_context, mclks);
 	} else
 #endif
 	{
-		z_context->current_cycle = mclks / MCLKS_PER_Z80;
+		z_context->current_cycle = mclks;
 	}
 }
 
@@ -225,24 +208,24 @@
 	//printf("Target: %d, YM bufferpos: %d, PSG bufferpos: %d\n", target, gen->ym->buffer_pos, gen->psg->buffer_pos * 2);
 }
 
-uint32_t frame=0;
+uint32_t last_frame_num;
 m68k_context * sync_components(m68k_context * context, uint32_t address)
 {
-	//TODO: Handle sync targets smaller than a single frame
 	genesis_context * gen = context->system;
 	vdp_context * v_context = gen->vdp;
 	z80_context * z_context = gen->z80;
-	uint32_t mclks = context->current_cycle * MCLKS_PER_68K;
+	uint32_t mclks = context->current_cycle;
 	sync_z80(z_context, mclks);
-	if (mclks >= mclks_per_frame) {
+	sync_sound(gen, mclks);
+	while (context->current_cycle > mclks) {
+		mclks = context->current_cycle;
+		sync_z80(z_context, mclks);
 		sync_sound(gen, mclks);
-		gen->ym->current_cycle -= mclks_per_frame;
-		gen->psg->cycles -= mclks_per_frame;
-		if (gen->ym->write_cycle != CYCLE_NEVER) {
-			gen->ym->write_cycle = gen->ym->write_cycle >= mclks_per_frame/MCLKS_PER_68K ? gen->ym->write_cycle - mclks_per_frame/MCLKS_PER_68K : 0;
-		}
-		//printf("reached frame end | 68K Cycles: %d, MCLK Cycles: %d\n", context->current_cycle, mclks);
-		vdp_run_context(v_context, mclks_per_frame);
+	}
+	vdp_run_context(v_context, mclks);
+	if (v_context->frame != last_frame_num) {
+		//printf("reached frame end %d | MCLK Cycles: %d, Target: %d, VDP cycles: %d, vcounter: %d, hslot: %d\n", last_frame_num, mclks, gen->frame_end, v_context->cycles, v_context->vcounter, v_context->hslot);
+		last_frame_num = v_context->frame;
 
 		if (!headless) {
 			break_on_sync |= wait_render_frame(v_context, frame_limit);
@@ -252,51 +235,47 @@
 				exit(0);
 			}
 		}
-		frame++;
-		mclks -= mclks_per_frame;
-		vdp_adjust_cycles(v_context, mclks_per_frame);
-		io_adjust_cycles(gen->ports, context->current_cycle, mclks_per_frame/MCLKS_PER_68K);
-		io_adjust_cycles(gen->ports+1, context->current_cycle, mclks_per_frame/MCLKS_PER_68K);
-		io_adjust_cycles(gen->ports+2, context->current_cycle, mclks_per_frame/MCLKS_PER_68K);
-		if (busack_cycle != CYCLE_NEVER) {
-			if (busack_cycle > mclks_per_frame/MCLKS_PER_68K) {
-				busack_cycle -= mclks_per_frame/MCLKS_PER_68K;
-			} else {
-				busack_cycle = CYCLE_NEVER;
-				busack = new_busack;
-			}
+		
+		vdp_adjust_cycles(v_context, mclks);
+		io_adjust_cycles(gen->ports, context->current_cycle, mclks);
+		io_adjust_cycles(gen->ports+1, context->current_cycle, mclks);
+		io_adjust_cycles(gen->ports+2, context->current_cycle, mclks);
+		context->current_cycle -= mclks;
+		z80_adjust_cycles(z_context, mclks);
+		gen->ym->current_cycle -= mclks;
+		gen->psg->cycles -= mclks;
+		if (gen->ym->write_cycle != CYCLE_NEVER) {
+			gen->ym->write_cycle = gen->ym->write_cycle >= mclks ? gen->ym->write_cycle - mclks : 0;
 		}
-		context->current_cycle -= mclks_per_frame/MCLKS_PER_68K;
-		if (z_context->current_cycle >= mclks_per_frame/MCLKS_PER_Z80) {
-			z_context->current_cycle -= mclks_per_frame/MCLKS_PER_Z80;
-		} else {
-			z_context->current_cycle = 0;
-		}
-		if (mclks) {
-			vdp_run_context(v_context, mclks);
-		}
-	} else {
-		//printf("running VDP for %d cycles\n", mclks - v_context->cycles);
-		vdp_run_context(v_context, mclks);
-		sync_sound(gen, mclks);
 	}
+	gen->frame_end = vdp_cycles_to_frame_end(v_context);
+	context->sync_cycle = gen->frame_end;
+	//printf("Set sync cycle to: %d @ %d, vcounter: %d, hslot: %d\n", context->sync_cycle, context->current_cycle, v_context->vcounter, v_context->hslot);
 	if (context->int_ack) {
+		//printf("acknowledging %d @ %d:%d, vcounter: %d, hslot: %d\n", context->int_ack, context->current_cycle, v_context->cycles, v_context->vcounter, v_context->hslot);
 		vdp_int_ack(v_context, context->int_ack);
 		context->int_ack = 0;
 	}
+	if (!address && (break_on_sync || save_state)) {
+		context->sync_cycle = context->current_cycle + 1;
+	}
 	adjust_int_cycle(context, v_context);
 	if (address) {
 		if (break_on_sync) {
-		break_on_sync = 0;
-		debugger(context, address);
-	}
-		if (save_state) {
+			break_on_sync = 0;
+			debugger(context, address);
+		}
+		if (save_state && (z_context->pc || (!z_context->reset && !z_context->busreq))) {
 			save_state = 0;
+			//advance Z80 core to the start of an instruction
 			while (!z_context->pc)
 			{
-				sync_z80(z_context, z_context->current_cycle * MCLKS_PER_Z80 + MCLKS_PER_Z80);
+				sync_z80(z_context, z_context->current_cycle + MCLKS_PER_Z80);
 			}
 			save_gst(gen, "savestate.gst", address);
+			puts("Saved state to savestate.gst");
+		} else if(save_state) {
+			context->sync_cycle = context->current_cycle + 1;
 		}
 	}
 	return context;
@@ -317,33 +296,30 @@
 		int blocked;
 		uint32_t before_cycle = v_context->cycles;
 		if (vdp_port < 4) {
-			gen->bus_busy = 1;
+			
 			while (vdp_data_port_write(v_context, value) < 0) {
 				while(v_context->flags & FLAG_DMA_RUN) {
-					vdp_run_dma_done(v_context, mclks_per_frame);
-					if (v_context->cycles >= mclks_per_frame) {
-						context->current_cycle = v_context->cycles / MCLKS_PER_68K;
-						if (context->current_cycle * MCLKS_PER_68K < mclks_per_frame) {
-							++context->current_cycle;
-						}
+					vdp_run_dma_done(v_context, gen->frame_end);
+					if (v_context->cycles >= gen->frame_end) {
+						context->current_cycle = v_context->cycles;
+						gen->bus_busy = 1;
 						sync_components(context, 0);
+						gen->bus_busy = 0;
 					}
 				}
-				//context->current_cycle = v_context->cycles / MCLKS_PER_68K;
+				//context->current_cycle = v_context->cycles;
 			}
 		} else if(vdp_port < 8) {
-			gen->bus_busy = 1;
 			blocked = vdp_control_port_write(v_context, value);
 			if (blocked) {
 				while (blocked) {
 					while(v_context->flags & FLAG_DMA_RUN) {
-						vdp_run_dma_done(v_context, mclks_per_frame);
-						if (v_context->cycles >= mclks_per_frame) {
-							context->current_cycle = v_context->cycles / MCLKS_PER_68K;
-							if (context->current_cycle * MCLKS_PER_68K < mclks_per_frame) {
-								++context->current_cycle;
-							}
+						vdp_run_dma_done(v_context, gen->frame_end);
+						if (v_context->cycles >= gen->frame_end) {
+							context->current_cycle = v_context->cycles;
+							gen->bus_busy = 1;
 							sync_components(context, 0);
+							gen->bus_busy = 0;
 						}
 					}
 					if (blocked < 0) {
@@ -353,6 +329,8 @@
 					}
 				}
 			} else {
+				context->sync_cycle = gen->frame_end = vdp_cycles_to_frame_end(v_context);
+				//printf("Set sync cycle to: %d @ %d, vcounter: %d, hslot: %d\n", context->sync_cycle, context->current_cycle, v_context->vcounter, v_context->hslot);
 				adjust_int_cycle(context, v_context);
 			}
 		} else {
@@ -360,21 +338,18 @@
 			exit(1);
 		}
 		if (v_context->cycles != before_cycle) {
-			//printf("68K paused for %d (%d) cycles at cycle %d (%d) for write\n", v_context->cycles / MCLKS_PER_68K - context->current_cycle, v_context->cycles - before_cycle, context->current_cycle, before_cycle);
-			context->current_cycle = v_context->cycles / MCLKS_PER_68K;
+			//printf("68K paused for %d (%d) cycles at cycle %d (%d) for write\n", v_context->cycles - context->current_cycle, v_context->cycles - before_cycle, context->current_cycle, before_cycle);
+			context->current_cycle = v_context->cycles;
+			//Lock the Z80 out of the bus until the VDP access is complete
+			gen->bus_busy = 1;
+			sync_z80(gen->z80, v_context->cycles);
+			gen->bus_busy = 0;
 		}
 	} else if (vdp_port < 0x18) {
-		sync_sound(gen, context->current_cycle * MCLKS_PER_68K);
 		psg_write(gen->psg, value);
 	} else {
 		//TODO: Implement undocumented test register(s)
 	}
-	if (gen->bus_busy)
-	{
-		//Lock the Z80 out of the bus until the VDP access is complete
-		sync_z80(gen->z80, v_context->cycles);
-		gen->bus_busy = 0;
-	}
 	return context;
 }
 
@@ -383,16 +358,18 @@
 	return vdp_port_write(vdp_port, context, vdp_port < 0x10 ? value | value << 8 : ((vdp_port & 1) ? value : 0));
 }
 
-z80_context * z80_vdp_port_write(uint16_t vdp_port, z80_context * context, uint8_t value)
+void * z80_vdp_port_write(uint32_t vdp_port, void * vcontext, uint8_t value)
 {
+	z80_context * context = vcontext;
 	genesis_context * gen = context->system;
+	vdp_port &= 0xFF;
 	if (vdp_port & 0xE0) {
 		printf("machine freeze due to write to Z80 address %X\n", 0x7F00 | vdp_port);
 		exit(1);
 	}
 	if (vdp_port < 0x10) {
 		//These probably won't currently interact well with the 68K accessing the VDP
-		vdp_run_context(gen->vdp, context->current_cycle * MCLKS_PER_Z80);
+		vdp_run_context(gen->vdp, context->current_cycle);
 		if (vdp_port < 4) {
 			vdp_data_port_write(gen->vdp, value << 8 | value);
 		} else if (vdp_port < 8) {
@@ -402,7 +379,7 @@
 			exit(1);
 		}
 	} else if (vdp_port < 0x18) {
-		sync_sound(gen, context->current_cycle * MCLKS_PER_Z80);
+		sync_sound(gen, context->current_cycle);
 		psg_write(gen->psg, value);
 	} else {
 		vdp_test_port_write(gen->vdp, value);
@@ -437,8 +414,13 @@
 		value = vdp_test_port_read(v_context);
 	}
 	if (v_context->cycles != before_cycle) {
-		//printf("68K paused for %d (%d) cycles at cycle %d (%d) for read\n", v_context->cycles / MCLKS_PER_68K - context->current_cycle, v_context->cycles - before_cycle, context->current_cycle, before_cycle);
-		context->current_cycle = v_context->cycles / MCLKS_PER_68K;
+		//printf("68K paused for %d (%d) cycles at cycle %d (%d) for read\n", v_context->cycles - context->current_cycle, v_context->cycles - before_cycle, context->current_cycle, before_cycle);
+		context->current_cycle = v_context->cycles;
+		//Lock the Z80 out of the bus until the VDP access is complete
+		genesis_context *gen = context->system;
+		gen->bus_busy = 1;
+		sync_z80(gen->z80, v_context->cycles);
+		gen->bus_busy = 0;
 	}
 	return value;
 }
@@ -453,22 +435,50 @@
 	}
 }
 
+uint8_t z80_vdp_port_read(uint32_t vdp_port, void * vcontext)
+{
+	z80_context * context = vcontext;
+	if (vdp_port & 0xE0) {
+		printf("machine freeze due to read from Z80 address %X\n", 0x7F00 | vdp_port);
+		exit(1);
+	}
+	genesis_context * gen = context->system;
+	//VDP access goes over the 68K bus like a bank area access
+	//typical delay from bus arbitration
+	context->current_cycle += 3 * MCLKS_PER_Z80;
+	//TODO: add cycle for an access right after a previous one
+	//TODO: Below cycle time is an estimate based on the time between 68K !BG goes low and Z80 !MREQ goes high
+	//      Needs a new logic analyzer capture to get the actual delay on the 68K side
+	gen->m68k->current_cycle += 8 * MCLKS_PER_68K;
+	
+	
+	vdp_port &= 0x1F;
+	uint16_t ret;
+	if (vdp_port < 0x10) {
+		//These probably won't currently interact well with the 68K accessing the VDP
+		vdp_run_context(gen->vdp, context->current_cycle);
+		if (vdp_port < 4) {
+			ret = vdp_data_port_read(gen->vdp);
+		} else if (vdp_port < 8) {
+			ret = vdp_control_port_read(gen->vdp);
+		} else {
+			printf("Illegal write to HV Counter port %X\n", vdp_port);
+			exit(1);
+		}
+	} else {
+		//TODO: Figure out the correct value today
+		ret = 0xFFFF;
+	}
+	return vdp_port & 1 ? ret : ret >> 8;
+}
+
 uint32_t zram_counter = 0;
-#define Z80_ACK_DELAY 3
-#define Z80_BUSY_DELAY 1//TODO: Find the actual value for this
-#define Z80_REQ_BUSY 1
-#define Z80_REQ_ACK 0
-#define Z80_RES_BUSACK reset
 
 m68k_context * io_write(uint32_t location, m68k_context * context, uint8_t value)
 {
 	genesis_context * gen = context->system;
 	if (location < 0x10000) {
-		if (busack_cycle <= context->current_cycle) {
-			busack = new_busack;
-			busack_cycle = CYCLE_NEVER;
-		}
-		if (!(busack || reset)) {
+		if (!z80_enabled || z80_get_busack(gen->z80, context->current_cycle)) {
 			location &= 0x7FFF;
 			if (location < 0x4000) {
 				z80_ram[location & 0x1FFF] = value;
@@ -476,7 +486,7 @@
 				z80_handle_code_write(location & 0x1FFF, gen->z80);
 #endif
 			} else if (location < 0x6000) {
-				sync_sound(gen, context->current_cycle * MCLKS_PER_68K);
+				sync_sound(gen, context->current_cycle);
 				if (location & 1) {
 					ym_data_write(gen->ym, value);
 				} else if(location & 2) {
@@ -522,22 +532,15 @@
 			}
 		} else {
 			if (location == 0x1100) {
-				if (busack_cycle <= context->current_cycle) {
-					busack = new_busack;
-					busack_cycle = CYCLE_NEVER;
-				}
 				if (value & 1) {
 					dputs("bus requesting Z80");
-
-					if(!reset && !busreq) {
-						sync_z80(gen->z80, context->current_cycle * MCLKS_PER_68K + Z80_ACK_DELAY*MCLKS_PER_Z80);
-						busack_cycle = (gen->z80->current_cycle * MCLKS_PER_Z80) / MCLKS_PER_68K;//context->current_cycle + Z80_ACK_DELAY;
-						new_busack = Z80_REQ_ACK;
+					if (z80_enabled) {
+						z80_assert_busreq(gen->z80, context->current_cycle);
+					} else {
+						gen->z80->busack = 1;
 					}
-					busreq = 1;
 				} else {
-					sync_z80(gen->z80, context->current_cycle * MCLKS_PER_68K);
-					if (busreq) {
+					if (gen->z80->busreq) {
 						dputs("releasing z80 bus");
 						#ifdef DO_DEBUG_PRINT
 						char fname[20];
@@ -546,30 +549,27 @@
 						fwrite(z80_ram, 1, sizeof(z80_ram), f);
 						fclose(f);
 						#endif
-						busack_cycle = ((gen->z80->current_cycle + Z80_BUSY_DELAY) * MCLKS_PER_Z80) / MCLKS_PER_68K;
-						new_busack = Z80_REQ_BUSY;
-						busreq = 0;
 					}
-					//busack_cycle = CYCLE_NEVER;
-					//busack = Z80_REQ_BUSY;
-
+					if (z80_enabled) {
+						z80_clear_busreq(gen->z80, context->current_cycle);
+					} else {
+						gen->z80->busack = 0;
+					}
 				}
 			} else if (location == 0x1200) {
-				sync_z80(gen->z80, context->current_cycle * MCLKS_PER_68K);
+				sync_z80(gen->z80, context->current_cycle);
 				if (value & 1) {
-					if (reset && busreq) {
-						new_busack = 0;
-						busack_cycle = ((gen->z80->current_cycle + Z80_ACK_DELAY) * MCLKS_PER_Z80) / MCLKS_PER_68K;//context->current_cycle + Z80_ACK_DELAY;
+					if (z80_enabled) {
+						z80_clear_reset(gen->z80, context->current_cycle);
+					} else {
+						gen->z80->reset = 0;
 					}
-					//TODO: Deal with the scenario in which reset is not asserted long enough
-					if (reset) {
-						need_reset = 1;
-						//TODO: Add necessary delay between release of reset and start of execution
-						gen->z80->current_cycle = (context->current_cycle * MCLKS_PER_68K) / MCLKS_PER_Z80 + 16;
+				} else {
+					if (z80_enabled) {
+						z80_assert_reset(gen->z80, context->current_cycle);
+					} else {
+						gen->z80->reset = 1;
 					}
-					reset = 0;
-				} else {
-					reset = 1;
 				}
 			}
 		}
@@ -597,16 +597,12 @@
 	uint8_t value;
 	genesis_context *gen = context->system;
 	if (location < 0x10000) {
-		if (busack_cycle <= context->current_cycle) {
-			busack = new_busack;
-			busack_cycle = CYCLE_NEVER;
-		}
-		if (!(busack==Z80_REQ_BUSY || reset)) {
+		if (!z80_enabled || z80_get_busack(gen->z80, context->current_cycle)) {
 			location &= 0x7FFF;
 			if (location < 0x4000) {
 				value = z80_ram[location & 0x1FFF];
 			} else if (location < 0x6000) {
-				sync_sound(gen, context->current_cycle * MCLKS_PER_68K);
+				sync_sound(gen, context->current_cycle);
 				value = ym_read_status(gen->ym);
 			} else {
 				value = 0xFF;
@@ -646,14 +642,10 @@
 			}
 		} else {
 			if (location == 0x1100) {
-				if (busack_cycle <= context->current_cycle) {
-					busack = new_busack;
-					busack_cycle = CYCLE_NEVER;
-				}
-				value = Z80_RES_BUSACK || busack;
-				dprintf("Byte read of BUSREQ returned %d @ %d (reset: %d, busack: %d, busack_cycle %d)\n", value, context->current_cycle, reset, busack, busack_cycle);
+				value = z80_enabled ? !z80_get_busack(gen->z80, context->current_cycle) : !gen->z80->busack;
+				dprintf("Byte read of BUSREQ returned %d @ %d (reset: %d)\n", value, context->current_cycle, gen->z80->reset);
 			} else if (location == 0x1200) {
-				value = !reset;
+				value = !gen->z80->reset;
 			} else {
 				value = 0xFF;
 				printf("Byte read of unknown IO location: %X\n", location);
@@ -674,10 +666,11 @@
 	return value;
 }
 
-z80_context * z80_write_ym(uint16_t location, z80_context * context, uint8_t value)
+void * z80_write_ym(uint32_t location, void * vcontext, uint8_t value)
 {
+	z80_context * context = vcontext;
 	genesis_context * gen = context->system;
-	sync_sound(gen, context->current_cycle * MCLKS_PER_Z80);
+	sync_sound(gen, context->current_cycle);
 	if (location & 1) {
 		ym_data_write(gen->ym, value);
 	} else if (location & 2) {
@@ -688,13 +681,83 @@
 	return context;
 }
 
-uint8_t z80_read_ym(uint16_t location, z80_context * context)
+uint8_t z80_read_ym(uint32_t location, void * vcontext)
 {
+	z80_context * context = vcontext;
 	genesis_context * gen = context->system;
-	sync_sound(gen, context->current_cycle * MCLKS_PER_Z80);
+	sync_sound(gen, context->current_cycle);
 	return ym_read_status(gen->ym);
 }
 
+uint8_t z80_read_bank(uint32_t location, void * vcontext)
+{
+	z80_context * context = vcontext;
+	genesis_context *gen = context->system;
+	if (gen->bus_busy) {
+		context->current_cycle = context->sync_cycle;
+	}
+	//typical delay from bus arbitration
+	context->current_cycle += 3 * MCLKS_PER_Z80;
+	//TODO: add cycle for an access right after a previous one
+	//TODO: Below cycle time is an estimate based on the time between 68K !BG goes low and Z80 !MREQ goes high
+	//      Needs a new logic analyzer capture to get the actual delay on the 68K side
+	gen->m68k->current_cycle += 8 * MCLKS_PER_68K;
+
+	location &= 0x7FFF;
+	if (context->mem_pointers[1]) {
+		return context->mem_pointers[1][location ^ 1];
+	}
+	uint32_t address = context->bank_reg << 15 | location;
+	if (address >= 0xC00000 && address < 0xE00000) {
+		return z80_vdp_port_read(location & 0xFF, context);
+	} else {
+		fprintf(stderr, "Unhandled read by Z80 from address %X through banked memory area (%X)\n", address, context->bank_reg << 15);
+	}
+	return 0;
+}
+
+void *z80_write_bank(uint32_t location, void * vcontext, uint8_t value)
+{
+	z80_context * context = vcontext;
+	genesis_context *gen = context->system;
+	if (gen->bus_busy) {
+		context->current_cycle = context->sync_cycle;
+	}
+	//typical delay from bus arbitration
+	context->current_cycle += 3 * MCLKS_PER_Z80;
+	//TODO: add cycle for an access right after a previous one
+	//TODO: Below cycle time is an estimate based on the time between 68K !BG goes low and Z80 !MREQ goes high
+	//      Needs a new logic analyzer capture to get the actual delay on the 68K side
+	gen->m68k->current_cycle += 8 * MCLKS_PER_68K;
+
+	location &= 0x7FFF;
+	uint32_t address = context->bank_reg << 15 | location;
+	if (address >= 0xE00000) {
+		address &= 0xFFFF;
+		((uint8_t *)ram)[address ^ 1] = value;
+	} else if (address >= 0xC00000) {
+		z80_vdp_port_write(location & 0xFF, context, value);
+	} else {
+		fprintf(stderr, "Unhandled write by Z80 to address %X through banked memory area\n", address);
+	}
+	return context;
+}
+
+void *z80_write_bank_reg(uint32_t location, void * vcontext, uint8_t value)
+{
+	z80_context * context = vcontext;
+
+	context->bank_reg = (context->bank_reg >> 1 | value << 8) & 0x1FF;
+	if (context->bank_reg < 0x80) {
+		genesis_context *gen = context->system;
+		context->mem_pointers[1] = get_native_pointer(context->bank_reg << 15, (void **)gen->m68k->mem_pointers, &gen->m68k->options->gen);
+	} else {
+		context->mem_pointers[1] = NULL;
+	}
+
+	return context;
+}
+
 uint16_t read_sram_w(uint32_t address, m68k_context * context)
 {
 	genesis_context * gen = context->system;
@@ -868,9 +931,7 @@
 
 void init_run_cpu(genesis_context * gen, FILE * address_log, char * statefile, uint8_t * debugger)
 {
-	m68k_context context;
 	m68k_options opts;
-	gen->m68k = &context;
 	memmap_chunk memmap[MAX_MAP_CHUNKS];
 	uint32_t num_chunks;
 	void * initial_mapped = NULL;
@@ -963,23 +1024,24 @@
 		}
 		atexit(save_sram);
 	}
-	init_m68k_opts(&opts, memmap, num_chunks);
+	init_m68k_opts(&opts, memmap, num_chunks, MCLKS_PER_68K);
 	opts.address_log = address_log;
-	init_68k_context(&context, opts.gen.native_code_map, &opts);
+	m68k_context *context = init_68k_context(&opts);
+	gen->m68k = context;
 
-	context.video_context = gen->vdp;
-	context.system = gen;
+	context->video_context = gen->vdp;
+	context->system = gen;
 	//cartridge ROM
-	context.mem_pointers[0] = cart;
-	context.target_cycle = context.sync_cycle = mclks_per_frame/MCLKS_PER_68K;
+	context->mem_pointers[0] = cart;
+	context->target_cycle = context->sync_cycle = gen->frame_end > gen->max_cycles ? gen->frame_end : gen->max_cycles;
 	//work RAM
-	context.mem_pointers[1] = ram;
+	context->mem_pointers[1] = ram;
 	//save RAM/map
-	context.mem_pointers[2] = initial_mapped;
-	context.mem_pointers[3] = (uint16_t *)gen->save_ram;
+	context->mem_pointers[2] = initial_mapped;
+	context->mem_pointers[3] = (uint16_t *)gen->save_ram;
 	uint32_t address;
 	address = cart[2] << 16 | cart[3];
-	translate_m68k_stream(address, &context);
+	translate_m68k_stream(address, context);
 	if (statefile) {
 		uint32_t pc = load_gst(gen, statefile);
 		if (!pc) {
@@ -988,18 +1050,15 @@
 		}
 		printf("Loaded %s\n", statefile);
 		if (debugger) {
-			insert_breakpoint(&context, pc, debugger);
+			insert_breakpoint(context, pc, debugger);
 		}
 		adjust_int_cycle(gen->m68k, gen->vdp);
-#ifndef NO_Z80
-		gen->z80->native_pc =  z80_get_native_address_trans(gen->z80, gen->z80->pc);
-#endif
-		start_68k_context(&context, pc);
+		start_68k_context(context, pc);
 	} else {
 		if (debugger) {
-			insert_breakpoint(&context, address, debugger);
+			insert_breakpoint(context, address, debugger);
 		}
-		m68k_reset(&context);
+		m68k_reset(context);
 	}
 }
 
@@ -1072,6 +1131,15 @@
 		}
 	}
 }
+#ifndef NO_Z80
+const memmap_chunk z80_map[] = {
+	{ 0x0000, 0x4000,  0x1FFF, 0, MMAP_READ | MMAP_WRITE | MMAP_CODE, z80_ram, NULL, NULL, NULL,              NULL },
+	{ 0x8000, 0x10000, 0x7FFF, 0, 0,                                  NULL,    NULL, NULL, z80_read_bank,     z80_write_bank},
+	{ 0x4000, 0x6000,  0x0003, 0, 0,                                  NULL,    NULL, NULL, z80_read_ym,       z80_write_ym},
+	{ 0x6000, 0x6100,  0xFFFF, 0, 0,                                  NULL,    NULL, NULL, NULL,              z80_write_bank_reg},
+	{ 0x7F00, 0x8000,  0x00FF, 0, 0,                                  NULL,    NULL, NULL, z80_vdp_port_read, z80_vdp_port_write}
+};
+#endif
 
 int main(int argc, char ** argv)
 {
@@ -1081,7 +1149,6 @@
 	}
 	set_exe_str(argv[0]);
 	config = load_config();
-	detect_region();
 	int width = -1;
 	int height = -1;
 	int debug = 0;
@@ -1203,6 +1270,8 @@
 	}
 	if (force_version) {
 		version_reg = force_version;
+	} else {
+		detect_region();
 	}
 	update_title();
 	int def_width = 0;
@@ -1217,18 +1286,20 @@
 	height = height < 240 ? (width/320) * 240 : height;
 	uint32_t fps = 60;
 	if (version_reg & 0x40) {
-		mclks_per_frame = MCLKS_LINE * LINES_PAL;
 		fps = 50;
 	}
 	if (!headless) {
-		render_init(width, height, title, fps, fullscreen, use_gl);
+		render_init(width, height, title, fps, fullscreen);
 	}
 	vdp_context v_context;
 	genesis_context gen;
 	memset(&gen, 0, sizeof(gen));
 	gen.master_clock = gen.normal_clock = fps == 60 ? MCLKS_NTSC : MCLKS_PAL;
 
-	init_vdp_context(&v_context);
+	init_vdp_context(&v_context, version_reg & 0x40);
+	gen.frame_end = vdp_cycles_to_frame_end(&v_context);
+	char * config_cycles = tern_find_ptr(config, "clocksmax_cycles");
+	gen.max_cycles = config_cycles ? atoi(config_cycles) : 10000000;
 
 	ym2612_context y_context;
 	ym_init(&y_context, render_sample_rate(), gen.master_clock, MCLKS_PER_YM, render_audio_buffer(), ym_log ? YM_OPT_WAVE_LOG : 0);
@@ -1237,16 +1308,15 @@
 	psg_init(&p_context, render_sample_rate(), gen.master_clock, MCLKS_PER_PSG, render_audio_buffer());
 
 	z80_context z_context;
-	x86_z80_options z_opts;
 #ifndef NO_Z80
-	init_x86_z80_opts(&z_opts);
+	z80_options z_opts;
+	init_z80_opts(&z_opts, z80_map, 5, MCLKS_PER_Z80);
 	init_z80_context(&z_context, &z_opts);
+	z80_assert_reset(&z_context, 0);
 #endif
 
 	z_context.system = &gen;
 	z_context.mem_pointers[0] = z80_ram;
-	z_context.sync_cycle = z_context.target_cycle = mclks_per_frame/MCLKS_PER_Z80;
-	z_context.int_cycle = CYCLE_NEVER;
 	z_context.mem_pointers[1] = z_context.mem_pointers[2] = (uint8_t *)cart;
 
 	gen.z80 = &z_context;
@@ -1254,6 +1324,7 @@
 	gen.ym = &y_context;
 	gen.psg = &p_context;
 	genesis = &gen;
+	setup_io_devices(config, gen.ports);
 
 	int fname_size = strlen(romfname);
 	sram_filename = malloc(fname_size+6);
@@ -1268,7 +1339,7 @@
 	if (i < 0) {
 		strcpy(sram_filename + fname_size, ".sram");
 	}
-	set_keybindings();
+	set_keybindings(gen.ports);
 
 	init_run_cpu(&gen, address_log, statefile, debuggerfun);
 	return 0;
--- a/blastem.h	Thu May 28 21:09:33 2015 -0700
+++ b/blastem.h	Thu May 28 21:19:55 2015 -0700
@@ -19,8 +19,6 @@
 #define RAM_FLAG_EVEN 0x1000
 #define RAM_FLAG_BOTH 0x0000
 
-#define CYCLE_NEVER 0xFFFFFFFF
-
 typedef struct {
 	m68k_context   *m68k;
 	z80_context    *z80;
@@ -32,6 +30,8 @@
 	uint32_t       save_flags;
 	uint32_t       master_clock; //Current master clock value
 	uint32_t       normal_clock; //Normal master clock (used to restore master clock after turbo mode)
+	uint32_t       frame_end;
+	uint32_t       max_cycles;
 	uint8_t        bank_regs[8];
 	io_port        ports[3];
 	uint8_t        bus_busy;
@@ -42,8 +42,6 @@
 extern int break_on_sync;
 extern int save_state;
 extern tern_node * config;
-extern uint8_t busreq;
-extern uint8_t reset;
 
 #define CARTRIDGE_WORDS 0x200000
 #define RAM_WORDS 32 * 1024
@@ -54,6 +52,7 @@
 extern uint8_t z80_ram[Z80_RAM_BYTES];
 
 uint16_t read_dma_value(uint32_t address);
+m68k_context * sync_components(m68k_context *context, uint32_t address);
 m68k_context * debugger(m68k_context * context, uint32_t address);
 void set_speed_percent(genesis_context * context, uint32_t percent);
 
--- a/debug.c	Thu May 28 21:09:33 2015 -0700
+++ b/debug.c	Thu May 28 21:19:55 2015 -0700
@@ -3,6 +3,8 @@
 #include "68kinst.h"
 #include <stdlib.h>
 #include <string.h>
+#include <sys/select.h>
+#include "render.h"
 
 static bp_def * breakpoints = NULL;
 static bp_def * zbreakpoints = NULL;
@@ -471,6 +473,7 @@
 	static uint32_t branch_t;
 	static uint32_t branch_f;
 	m68kinst inst;
+	sync_components(context, 0);
 	//probably not necessary, but let's play it safe
 	address &= 0xFFFFFF;
 	if (address == branch_t) {
@@ -507,8 +510,25 @@
 	printf("%X: %s\n", address, input_buf);
 	uint32_t after = address + (after_pc-pc)*2;
 	int debugging = 1;
+	int prompt = 1;
+	fd_set read_fds;
+	FD_ZERO(&read_fds);
+	struct timeval timeout;
 	while (debugging) {
-		fputs(">", stdout);
+		if (prompt) {
+			fputs(">", stdout);
+			fflush(stdout);
+		}
+		process_events();
+		timeout.tv_sec = 0;
+		timeout.tv_usec = 16667;
+		FD_SET(fileno(stdin), &read_fds);
+		if(select(fileno(stdin) + 1, &read_fds, NULL, NULL, &timeout) < 1) {
+			prompt = 0;
+			continue;
+		} else {
+			prompt = 1;
+		}
 		if (!fgets(input_buf, sizeof(input_buf), stdin)) {
 			fputs("fgets failed", stderr);
 			break;
@@ -632,9 +652,14 @@
 					}
 				} else if(param[0] == 'c') {
 					value = context->current_cycle;
-				} else if (param[0] == '0' && param[1] == 'x') {
-					uint32_t p_addr = strtol(param+2, NULL, 16);
-					value = read_dma_value(p_addr/2);
+				} else if ((param[0] == '0' && param[1] == 'x') || param[0] == '$') {
+					uint32_t p_addr = strtol(param+(param[0] == '0' ? 2 : 1), NULL, 16);
+					if ((p_addr & 0xFFFFFF) == 0xC00004) {
+						genesis_context * gen = context->system;
+						value = vdp_hv_counter_read(gen->vdp);
+					} else {
+						value = read_dma_value(p_addr/2);
+					}
 				} else {
 					fprintf(stderr, "Unrecognized parameter to p: %s\n", param);
 					break;
--- a/default.cfg	Thu May 28 21:09:33 2015 -0700
+++ b/default.cfg	Thu May 28 21:19:55 2015 -0700
@@ -54,6 +54,13 @@
 	}
 }
 
+io {
+	devices {
+		1 gamepad6.1
+		2 gamepad6.2
+	}
+}
+
 video {
 	width 640
 	vertex_shader default.v.glsl
--- a/dis.c	Thu May 28 21:09:33 2015 -0700
+++ b/dis.c	Thu May 28 21:19:55 2015 -0700
@@ -6,9 +6,12 @@
 #include "68kinst.h"
 #include <stdio.h>
 #include <stdlib.h>
+#include <string.h>
+#include "vos_program_module.h"
+#include "tern.h"
 
 uint8_t visited[(16*1024*1024)/16];
-uint8_t label[(16*1024*1024)/8];
+uint16_t label[(16*1024*1024)/8];
 
 void visit(uint32_t address)
 {
@@ -20,7 +23,7 @@
 {
 	address &= 0xFFFFFF;
 	//printf("referenced: %X\n", address);
-	label[address/16] |= 1 << (address % 8);
+	label[address/16] |= 1 << (address % 16);
 }
 
 uint8_t is_visited(uint32_t address)
@@ -29,10 +32,40 @@
 	return visited[address/16] & (1 << ((address / 2) % 8));
 }
 
-uint8_t is_label(uint32_t address)
+uint16_t is_label(uint32_t address)
 {
 	address &= 0xFFFFFF;
-	return label[address/16] & (1 << (address % 8));
+	return label[address/16] & (1 << (address % 16));
+}
+
+typedef struct {
+	uint32_t num_labels;
+	uint32_t storage;
+	char     *labels[];
+} label_names;
+
+tern_node * add_label(tern_node * head, char * name, uint32_t address)
+{
+	char key[MAX_INT_KEY_SIZE];
+	address &= 0xFFFFFF;
+	reference(address);
+	tern_int_key(address, key);
+	label_names * names = tern_find_ptr(head, key);
+	if (names)
+	{
+		if (names->num_labels == names->storage)
+		{
+			names->storage = names->storage + (names->storage >> 1);
+			names = realloc(names, sizeof(label_names) + names->storage * sizeof(char *));
+		}
+	} else {
+		names = malloc(sizeof(label_names) + 4 * sizeof(char *));
+		names->num_labels = 0;
+		names->storage = 4;
+		head = tern_insert_ptr(head, key, names);
+	}
+	names->labels[names->num_labels++] = strdup(name);
+	return head;
 }
 
 typedef struct deferred {
@@ -42,7 +75,7 @@
 
 deferred * defer(uint32_t address, deferred * next)
 {
-	if (is_visited(address)) {
+	if (is_visited(address) || address & 1) {
 		return next;
 	}
 	//printf("deferring %X\n", address);
@@ -66,9 +99,18 @@
 	}
 }
 
-uint8_t labels = 0;
-uint8_t addr = 0;
-uint8_t only = 0;
+int label_fun(char *dst, uint32_t address, void * data)
+{
+	tern_node * labels = data;
+	char key[MAX_INT_KEY_SIZE];
+	label_names * names = tern_find_ptr(labels, tern_int_key(address & 0xFFFFFF, key));
+	if (names)
+	{
+		return sprintf(dst, "%s", names->labels[0]);
+	} else {
+		return m68k_default_label_fun(dst, address, NULL);
+	}
+}
 
 int main(int argc, char ** argv)
 {
@@ -77,14 +119,10 @@
 	char disbuf[1024];
 	m68kinst instbuf;
 	unsigned short * cur;
-	FILE * f = fopen(argv[1], "rb");
-	fseek(f, 0, SEEK_END);
-	filesize = ftell(f);
-	fseek(f, 0, SEEK_SET);
-	filebuf = malloc(filesize);
-	fread(filebuf, 2, filesize/2, f);
-	fclose(f);
 	deferred *def = NULL, *tmpd;
+
+	uint8_t labels = 0, addr = 0, only = 0, vos = 0, reset = 0;
+
 	for(uint8_t opt = 2; opt < argc; ++opt) {
 		if (argv[opt][0] == '-') {
 			FILE * address_log;
@@ -99,6 +137,12 @@
 			case 'o':
 				only = 1;
 				break;
+			case 'v':
+				vos = 1;
+				break;
+			case 'r':
+				reset = 1;
+				break;
 			case 'f':
 				opt++;
 				if (opt >= argc) {
@@ -126,29 +170,85 @@
 			reference(address);
 		}
 	}
-	for(cur = filebuf; cur - filebuf < (filesize/2); ++cur)
+
+	FILE * f = fopen(argv[1], "rb");
+	fseek(f, 0, SEEK_END);
+	filesize = ftell(f);
+	fseek(f, 0, SEEK_SET);
+
+	tern_node * named_labels = NULL;
+	char int_key[MAX_INT_KEY_SIZE];
+	uint32_t address_off, address_end;
+	if (vos)
 	{
-		*cur = (*cur >> 8) | (*cur << 8);
+		vos_program_module header;
+		vos_read_header(f, &header);
+		vos_read_alloc_module_map(f, &header);
+		address_off = header.user_boundary;
+		address_end = address_off + filesize - 0x1000;
+		def = defer(header.main_entry_link.code_address, def);
+		named_labels = add_label(named_labels, "main_entry_link", header.main_entry_link.code_address);
+		for (int i = 0; i < header.n_modules; i++)
+		{
+			if (!reset || header.module_map_entries[i].code_address != header.user_boundary)
+			{
+				def = defer(header.module_map_entries[i].code_address, def);
+			}
+			named_labels = add_label(named_labels, header.module_map_entries[i].name.str, header.module_map_entries[i].code_address);
+		}
+		fseek(f, 0x1000, SEEK_SET);
+		filebuf = malloc(filesize - 0x1000);
+		if (fread(filebuf, 2, (filesize - 0x1000)/2, f) != (filesize - 0x1000)/2)
+		{
+			fprintf(stderr, "Failure while reading file %s\n", argv[1]);
+		}
+		fclose(f);
+		for(cur = filebuf; cur - filebuf < ((filesize - 0x1000)/2); ++cur)
+		{
+			*cur = (*cur >> 8) | (*cur << 8);
+		}
+		if (reset)
+		{
+			def = defer(filebuf[2] << 16 | filebuf[3], def);
+			named_labels = add_label(named_labels, "reset", filebuf[2] << 16 | filebuf[3]);
+		}
+	} else {
+		address_off = 0;
+		address_end = filesize;
+		filebuf = malloc(filesize);
+		if (fread(filebuf, 2, filesize/2, f) != filesize/2)
+		{
+			fprintf(stderr, "Failure while reading file %s\n", argv[1]);
+		}
+		fclose(f);
+		for(cur = filebuf; cur - filebuf < (filesize/2); ++cur)
+		{
+			*cur = (*cur >> 8) | (*cur << 8);
+		}
+		uint32_t start = filebuf[2] << 16 | filebuf[3];
+		uint32_t int_2 = filebuf[0x68/2] << 16 | filebuf[0x6A/2];
+		uint32_t int_4 = filebuf[0x70/2] << 16 | filebuf[0x72/2];
+		uint32_t int_6 = filebuf[0x78/2] << 16 | filebuf[0x7A/2];
+		named_labels = add_label(named_labels, "start", start);
+		named_labels = add_label(named_labels, "int_2", int_2);
+		named_labels = add_label(named_labels, "int_4", int_4);
+		named_labels = add_label(named_labels, "int_6", int_6);
+		if (!def || !only) {
+			def = defer(start, def);
+			def = defer(int_2, def);
+			def = defer(int_4, def);
+			def = defer(int_6, def);
+		}
 	}
-	uint32_t start = filebuf[2] << 16 | filebuf[3], tmp_addr;
-	uint32_t int_2 = filebuf[0x68/2] << 16 | filebuf[0x6A/2];
-	uint32_t int_4 = filebuf[0x70/2] << 16 | filebuf[0x72/2];
-	uint32_t int_6 = filebuf[0x78/2] << 16 | filebuf[0x7A/2];
 	uint16_t *encoded, *next;
-	uint32_t size;
-	if (!def || !only) {
-		def = defer(start, def);
-		def = defer(int_2, def);
-		def = defer(int_4, def);
-		def = defer(int_6, def);
-	}
+	uint32_t size, tmp_addr;
 	uint32_t address;
 	while(def) {
 		do {
 			encoded = NULL;
 			address = def->address;
 			if (!is_visited(address)) {
-				encoded = filebuf + address/2;
+				encoded = filebuf + (address - address_off)/2;
 			}
 			tmpd = def;
 			def = def->next;
@@ -158,7 +258,7 @@
 			break;
 		}
 		for(;;) {
-			if (address > filesize) {
+			if (address > address_end || address < address_off) {
 				break;
 			}
 			visit(address);
@@ -175,7 +275,7 @@
 			if (instbuf.op == M68K_BCC || instbuf.op == M68K_DBCC || instbuf.op == M68K_BSR) {
 				if (instbuf.op == M68K_BCC && instbuf.extra.cond == COND_TRUE) {
 					address = instbuf.address + 2 + instbuf.src.params.immed;
-					encoded = filebuf + address/2;
+					encoded = filebuf + (address - address_off)/2;
 					reference(address);
 					if (is_visited(address)) {
 						break;
@@ -188,13 +288,13 @@
 			} else if(instbuf.op == M68K_JMP) {
 				if (instbuf.src.addr_mode == MODE_ABSOLUTE || instbuf.src.addr_mode == MODE_ABSOLUTE_SHORT) {
 					address = instbuf.src.params.immed;
-					encoded = filebuf + address/2;
+					encoded = filebuf + (address - address_off)/2;
 					if (is_visited(address)) {
 						break;
 					}
 				} else if (instbuf.src.addr_mode == MODE_PC_DISPLACE) {
 					address = instbuf.src.params.regs.displacement + instbuf.address + 2;
-					encoded = filebuf + address/2;
+					encoded = filebuf + (address - address_off)/2;
 					if (is_visited(address)) {
 						break;
 					}
@@ -211,32 +311,41 @@
 		}
 	}
 	if (labels) {
+		for (address = 0; address < address_off; address++) {
+			if (is_label(address)) {
+				printf("ADR_%X equ $%X\n", address, address);
+			}
+		}
 		for (address = filesize; address < (16*1024*1024); address++) {
-			if (is_label(address)) {
+			char key[MAX_INT_KEY_SIZE];
+			tern_int_key(address, key);
+			label_names *names = tern_find_ptr(named_labels, key);
+			if (names) {
+				for (int i = 0; i < names->num_labels; i++)
+				{
+					printf("%s equ $%X\n", names->labels[i], address);
+				}
+			} else if (is_label(address)) {
 				printf("ADR_%X equ $%X\n", address, address);
 			}
 		}
 		puts("");
 	}
-	for (address = 0; address < filesize; address+=2) {
+	for (address = address_off; address < address_end; address+=2) {
 		if (is_visited(address)) {
-			encoded = filebuf + address/2;
+			encoded = filebuf + (address-address_off)/2;
 			m68k_decode(encoded, &instbuf, address);
 			if (labels) {
-				m68k_disasm_labels(&instbuf, disbuf);
-				if (address == start) {
-					puts("start:");
-				}
-				if(address == int_2) {
-					puts("int_2:");
-				}
-				if(address == int_4) {
-					puts("int_4:");
-				}
-				if(address == int_6) {
-					puts("int_6:");
-				}
-				if (is_label(instbuf.address)) {
+				m68k_disasm_labels(&instbuf, disbuf, label_fun, named_labels);
+				char keybuf[MAX_INT_KEY_SIZE];
+				label_names * names = tern_find_ptr(named_labels, tern_int_key(address, keybuf));
+				if (names)
+				{
+					for (int i = 0; i < names->num_labels; i++)
+					{
+						printf("%s:\n", names->labels[i]);
+					}
+				} else if (is_label(instbuf.address)) {
 					printf("ADR_%X:\n", instbuf.address);
 				}
 				if (addr) {
--- a/gen.h	Thu May 28 21:09:33 2015 -0700
+++ b/gen.h	Thu May 28 21:19:55 2015 -0700
@@ -17,9 +17,17 @@
 	code_ptr last;
 } code_info;
 
+void check_alloc_code(code_info *code, uint32_t inst_size);
+
 void init_code_info(code_info *code);
 void call(code_info *code, code_ptr fun);
 void jmp(code_info *code, code_ptr dest);
 void jmp_r(code_info *code, uint8_t dst);
+//call a function and put the arguments in the appropriate place according to the host ABI
+void call_args(code_info *code, code_ptr fun, uint32_t num_args, ...);
+//like the above, but follows other aspects of the ABI like stack alignment
+void call_args_abi(code_info *code, code_ptr fun, uint32_t num_args, ...);
+void save_callee_save_regs(code_info *code);
+void restore_callee_save_regs(code_info *code);
 
 #endif //GEN_H_
--- a/gen_arm.c	Thu May 28 21:09:33 2015 -0700
+++ b/gen_arm.c	Thu May 28 21:19:55 2015 -0700
@@ -548,3 +548,39 @@
 	*(code->cur++) = cc | POPM | reglist;
 	return CODE_OK;
 }
+
+uint32_t load_store_immoff(code_info *code, uint32_t op, uint32_t dst, uint32_t base, int32_t offset, uint32_t cc)
+{
+	if (offset >= 0x1000 || offset <= -0x1000) {
+		return INVALID_IMMED;
+	}
+	check_alloc_code(code);
+	uint32_t instruction = cc | op | POST_IND | OFF_IMM | SZ_W | base << 16 | dst << 12;
+	if (offset >= 0) {
+		instruction |= offset | DIR_UP;
+	} else {
+		instruction |= (-offset) | DIR_DOWN;
+	}
+	*(code->cur++) = instruction;
+	return CODE_OK;
+}
+
+uint32_t ldr_cc(code_info *code, uint32_t dst, uint32_t base, int32_t offset, uint32_t cc)
+{
+	return load_store_immoff(code, OP_LDR, dst, base, offset, cc);
+}
+
+uint32_t ldr(code_info *code, uint32_t dst, uint32_t base, int32_t offset)
+{
+	return ldr_cc(code, dst, base, offset, CC_AL);
+}
+
+uint32_t str_cc(code_info *code, uint32_t src, uint32_t base, int32_t offset, uint32_t cc)
+{
+	return load_store_immoff(code, OP_STR, src, base, offset, cc);
+}
+
+uint32_t str(code_info *code, uint32_t src, uint32_t base, int32_t offset)
+{
+	return str_cc(code, src, base, offset, CC_AL);
+}
--- a/gen_arm.h	Thu May 28 21:09:33 2015 -0700
+++ b/gen_arm.h	Thu May 28 21:19:55 2015 -0700
@@ -149,5 +149,9 @@
 uint32_t pop_cc(code_info *code, uint32_t reg, uint32_t cc);
 uint32_t popm(code_info *code, uint32_t reglist);
 uint32_t popm_cc(code_info *code, uint32_t reglist, uint32_t cc);
+uint32_t ldr_cc(code_info *code, uint32_t dst, uint32_t base, int32_t offset, uint32_t cc);
+uint32_t ldr(code_info *code, uint32_t rst, uint32_t base, int32_t offset);
+uint32_t str_cc(code_info *code, uint32_t src, uint32_t base, int32_t offset, uint32_t cc);
+uint32_t str(code_info *code, uint32_t src, uint32_t base, int32_t offset);
 
 #endif //GEN_ARM_H_
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gen_test_hv.s68	Thu May 28 21:19:55 2015 -0700
@@ -0,0 +1,631 @@
+	dc.l $0, start
+	dc.l empty_handler
+	dc.l empty_handler
+	;$10
+	dc.l empty_handler
+	dc.l empty_handler
+	dc.l empty_handler
+	dc.l empty_handler
+	;$20
+	dc.l empty_handler
+	dc.l empty_handler
+	dc.l empty_handler
+	dc.l empty_handler
+	;$30
+	dc.l empty_handler
+	dc.l empty_handler
+	dc.l empty_handler
+	dc.l empty_handler
+	;$40
+	dc.l empty_handler
+	dc.l empty_handler
+	dc.l empty_handler
+	dc.l empty_handler
+	;$50
+	dc.l empty_handler
+	dc.l empty_handler
+	dc.l empty_handler
+	dc.l empty_handler
+	;$60
+	dc.l empty_handler
+	dc.l empty_handler
+	dc.l empty_handler
+	dc.l empty_handler
+	;$70
+	dc.l int_4
+	dc.l empty_handler
+	dc.l int_6
+	dc.l empty_handler
+	;$80
+	dc.l empty_handler
+	dc.l empty_handler
+	dc.l empty_handler
+	dc.l empty_handler
+	;$90
+	dc.l empty_handler
+	dc.l empty_handler
+	dc.l empty_handler
+	dc.l empty_handler
+	;$A0
+	dc.l empty_handler
+	dc.l empty_handler
+	dc.l empty_handler
+	dc.l empty_handler
+	;$B0
+	dc.l empty_handler
+	dc.l empty_handler
+	dc.l empty_handler
+	dc.l empty_handler
+	;$C0
+	dc.l empty_handler
+	dc.l empty_handler
+	dc.l empty_handler
+	dc.l empty_handler
+	;$D0
+	dc.l empty_handler
+	dc.l empty_handler
+	dc.l empty_handler
+	dc.l empty_handler
+	;$E0
+	dc.l empty_handler
+	dc.l empty_handler
+	dc.l empty_handler
+	dc.l empty_handler
+	;$F0
+	dc.l empty_handler
+	dc.l empty_handler
+	dc.l empty_handler
+	dc.l empty_handler
+	dc.b "SEGA"
+empty_handler:
+int_6:
+	rte
+int_4:
+	move.w (a2), d0
+	ori.w #$8000, d0
+	move.w d0, (a4)+
+	rte
+
+start:
+	lea $C00000, a0
+	lea $C00004, a1
+	move.w #$8104, (a1) ;Mode 5, everything turned off
+	move.w #$8004, (a1)
+	move.w #$8220, (a1) ;Scroll a table $8000
+	move.w #$8404, (a1) ;Scroll b table $8000
+	move.w #$8560, (a1) ;SAT table $C000
+	move.w #$8700, (a1) ;backdrop color 0
+	move.w #$8B00, (a1) ;full screen scroll
+	move.w #$8C81, (a1) ;40 cell mode, no interlace
+	move.w #$8C81, (mode).w
+	move.w #$8D00, (a1) ;hscroll table at 0
+	move.w #$8F02, (a1) ;autoinc 2
+	move.w #$9011, (a1) ;64x64 scroll size
+	move.l #$C0000000, (a1)
+	move.w #$000, (a0)
+	move.w #$EEE, (a0)
+
+	;clear scroll table
+	move.l #$40000000, (a1)
+	move.l #0, (a0)
+
+	;load tiles
+	move.l #$44000000, (a1)
+	lea font(pc), a2
+	move.w #((fontend-font)/4 - 1), d0
+tloop:
+	move.l (a2)+, (a0)
+	dbra d0, tloop
+
+
+
+	;clear name table
+	move.l #$40000002, (a1)
+	moveq #32, d0
+	move.w #(64*64-1), d1
+ploop:
+	move.w d0, (a0)
+	dbra d1, ploop
+
+
+	lea $FF0000, a4
+	move.b #$40, (a4, 6)
+	move.w #$8144, (a1) ;enable display
+	move #$2300, sr
+
+	lea (4, a1), a2 ;hv counter line address
+	lea (2, a1), a3 ;second contro/status address
+
+	move.b #254, d0
+init_wait:
+	cmp.b (a2), d0
+	beq init_wait
+
+top:
+	move.b #254, d0
+	lea $FF0000, a4
+	move.w #$8F00, (a1)     ;autoinc of 0
+	move.l #$40040000, (a1) ;unused VRAM address
+wait_active:
+	cmp.b (a2), d0
+	bne.s wait_active
+
+	move.l #$8A718014, (a1) ;enable Hints
+
+	;sync to VDP by attempting to fill FIFO
+	;being in vblank makes this a bit difficult
+
+	rept 8
+	move.l d0, (a0)
+	endr
+
+	;sample data for vblank flag off
+	rept 82 ;two lines worth of move.l
+	move.l (a3), (a4)+
+	endr
+
+	move.l a4, a5 ;save end of first buffer
+
+	move.b (a2), d0
+wait_new_line:
+	cmp.b (a2), d0
+	beq.s wait_new_line
+
+	;sync to VDP by filling FIFO
+	move.l d0, (a0)
+	move.l d0, (a0)
+	move.w d0, (a0)
+
+	;sample data for line change HV value
+	rept 45 ;one line worth of move.l
+	move.l (a2), (a4)+
+	endr
+
+	move.l a4, usp ;save end of second buffer
+
+	moveq #$70, d0
+wait_hint_line:
+	cmp.b (a2), d0
+	bne.s wait_hint_line
+
+	;sample data for line change HV value
+	rept 45 ;one line worth of move.l
+	move.l (a2), (a4)+
+	endr
+
+	move.l a4, a6
+
+	move.b #223, d0
+wait_inactive:
+	cmp.b (a2), d0
+	bne.s wait_inactive
+
+	;sync to VDP by filling FIFO
+	move.l d0, (a0)
+	move.l d0, (a0)
+	move.w d0, (a0)
+
+	;sample data for vblank on
+	rept 82 ;two lines worth of move.l
+	move.l (a3), (a4)+
+	endr
+
+	move.l #$8AFF8004, (a1) ;disable Hints
+
+	rsset $FFFF8000
+vblank_start_min rs.w 1
+vblank_start_max rs.w 1
+vblank_end_min   rs.w 1
+vblank_end_max   rs.w 1
+hblank_start_min rs.w 1
+hblank_start_max rs.w 1
+hblank_end_min   rs.w 1
+hblank_end_max   rs.w 1
+line_change_min  rs.w 1
+line_change_max  rs.w 1
+hint_min         rs.w 1
+hint_max         rs.w 1
+mode             rs.w 1
+printed_hv_dump  rs.b 1
+button_state     rs.b 1
+
+	lea $FF0001, a4
+.loop:
+	btst.b #3, (a4)
+	beq.s found_vblank_off
+	move.w 1(a4), d6
+	addq #4, a4
+	bra.s .loop
+found_vblank_off:
+
+	move.w (vblank_end_max).w, d0
+	beq .new_max
+	cmp.w d0, d6
+	blo .no_new_max
+.new_max
+	move.w d6, (vblank_end_max).w
+.no_new_max:
+
+
+	move.w 1(a4), d6
+
+	move.w (vblank_end_min).w, d0
+	beq .new_min
+	cmp.w d0, d6
+	bhi .no_new_min
+.new_min
+	move.w d6, (vblank_end_min).w
+.no_new_min:
+
+	lea $FF0001, a4
+;first find a point where HBLANK is not set
+	bra.s .start
+.loop:
+	addq #4, a4
+.start
+	btst.b #2, (a4)
+	bne.s .loop
+
+;then find a point after that where it switches to on
+.loop2:
+	btst.b #2, (a4)
+	bne.s found_hblank_on
+	move.w 1(a4), d5
+	addq #4, a4
+	bra.s .loop2
+found_hblank_on:
+
+	move.w (hblank_start_max).w, d0
+	beq .new_max
+	cmp.w d0, d5
+	blo .no_new_max
+.new_max
+	move.w d5, (hblank_start_max).w
+.no_new_max:
+
+
+	move.w 1(a4), d5
+
+	move.w (hblank_start_min).w, d0
+	beq .new_min
+	cmp.w d0, d5
+	bhi .no_new_min
+.new_min
+	move.w d5, (hblank_start_min).w
+.no_new_min:
+
+;finally find a point after that where it switches back off
+.loop2:
+	btst.b #2, (a4)
+	beq.s found_hblank_off
+	move.w 1(a4), d5
+	addq #4, a4
+	bra.s .loop2
+found_hblank_off:
+
+	move.w (hblank_end_max).w, d0
+	beq .new_max
+	cmp.w d0, d5
+	blo .no_new_max
+.new_max
+	move.w d5, (hblank_end_max).w
+.no_new_max:
+
+
+	move.w 1(a4), d5
+
+	move.w (hblank_end_min).w, d0
+	beq .new_min
+	cmp.w d0, d5
+	bhi .no_new_min
+.new_min
+	move.w d5, (hblank_end_min).w
+.no_new_min:
+
+	move.l a5, a4 ;save line change buffer for later
+	move.b (a5), d0
+.loop
+	move.w (a5), d7
+	addq #2, a5
+	cmp.b (a5), d0
+	beq .loop
+found_line_change:
+
+	move.w (line_change_max).w, d0
+	beq .new_max
+	cmp.w d0, d7
+	blo .no_new_max
+.new_max
+	move.w d7, (line_change_max).w
+.no_new_max:
+
+	move.w (a5), d7
+
+	move.w (line_change_min).w, d0
+	beq .new_min
+	cmp.w d0, d7
+	bhi .no_new_min
+.new_min
+	move.w d7, (line_change_min).w
+.no_new_min:
+
+	addq #1, a6
+.loop:
+	btst.b #3, (a6)
+	bne.s found_vblank_on
+	move.w 1(a6), d5
+	addq #4, a6
+	bra.s .loop
+found_vblank_on:
+
+	move.w (vblank_start_max).w, d0
+	beq .new_max
+	cmp.w d0, d5
+	blo .no_new_max
+.new_max
+	move.w d5, (vblank_start_max).w
+.no_new_max:
+
+	move.w 1(a6), d5
+
+	move.w (vblank_start_min).w, d0
+	beq .new_min
+	cmp.b d0, d5
+	bhi .no_new_min
+.new_min
+	move.w d5, (vblank_start_min).w
+.no_new_min:
+
+	move usp, a5
+.loop:
+	btst.b #7, (a5)
+	bne.s found_hint
+	move.w (a5), d1
+	addq #2, a5
+	bra.s .loop
+found_hint:
+
+	move.w (hint_max).w, d0
+	beq .new_max
+	cmp.w d0, d1
+	blo .no_new_max
+.new_max
+	move.w d1, (hint_max).w
+.no_new_max:
+
+	move.w (a5), d1
+	and.w #$7FFF, d1
+
+	move.w (hint_min).w, d0
+	beq .new_min
+	cmp.b d0, d1
+	bhi .no_new_min
+.new_min
+	move.w d1, (hint_min).w
+.no_new_min:
+
+draw_info:
+	;draw data
+	move.w #$8F02, (a1)     ;autoinc of 2
+	move.l #$40840002, (a1)
+
+	moveq #0, d0
+	lea VBlankStart(pc), a6
+	bsr print_string
+
+
+	move.w (vblank_start_max), d0
+	moveq #0, d1
+	bsr print_hexw
+
+	move.w #32, (a0)
+	move.w d5, d0
+	bsr print_hexw
+
+	move.w #32, (a0)
+	move.w (vblank_start_min), d0
+	bsr print_hexw
+
+	moveq #0, d0
+	move.l #$41040002, (a1)
+	lea VBlankEnd(pc), a6
+	bsr print_string
+
+	;max value before vblank end
+	moveq #0, d1
+	move.w (vblank_end_max), d0
+	bsr print_hexw
+
+	move.w #32, (a0)
+	move.w d6, d0
+	bsr print_hexw
+
+	;min value after vblank end
+	move.w (vblank_end_min), d0
+	move.w #32, (a0)
+	bsr print_hexw
+
+	moveq #0, d0
+	move.l #$41840002, (a1)
+	lea LineChange(pc), a6
+	bsr print_string
+
+	move.w (line_change_max), d0
+	moveq #0, d1
+	bsr print_hexw
+
+	move.w #32, (a0)
+	move.w d7, d0
+	bsr print_hexw
+
+	move.w (line_change_min), d0
+	move.w #32, (a0)
+	bsr print_hexw
+
+	moveq #0, d0
+	move.l #$42040002, (a1)
+	lea HBlankStart(pc), a6
+	bsr print_string
+
+	move.w (hblank_start_max), d0
+	moveq #0, d1
+	bsr print_hexw
+
+	move.w (hblank_start_min), d0
+	move.w #32, (a0)
+	bsr print_hexw
+
+	moveq #0, d0
+	move.l #$42840002, (a1)
+	lea HBlankEnd(pc), a6
+	bsr print_string
+
+	move.w (hblank_end_max), d0
+	moveq #0, d1
+	bsr print_hexw
+
+	move.w (hblank_end_min), d0
+	move.w #32, (a0)
+	bsr print_hexw
+
+	moveq #0, d0
+	move.l #$43040002, (a1)
+	lea HInterrupt(pc), a6
+	bsr print_string
+
+	move.w (hint_max), d0
+	moveq #0, d1
+	bsr print_hexw
+
+	move.w (hint_min), d0
+	move.w #32, (a0)
+	bsr print_hexw
+
+	;read pad
+	move.b #$40, $A10003
+	move.b $A10003, d0
+	move.b #$00, $A10003
+	and.b #$3f, d0
+	move.b $A10003, d1
+	and.b #$30, d1
+	lsl.b #2, d1
+	or.b d1, d0
+	not.b d0
+	move.b (button_state).w, d2
+	eor.b d0, d2
+	and.b d0, d2
+	move.b d2, d3 ;d3 contains newly pressed buttons, SACBRLDU
+	move.b d0, (button_state).w
+
+	btst.l #7, d3
+	beq not_pressed
+
+	moveq #0, d0
+	move.l d0, (vblank_start_min).w
+	move.l d0, (vblank_end_min).w
+	move.l d0, (hblank_start_min).w
+	move.l d0, (hblank_end_min).w
+	move.l d0, (line_change_min).w
+	move.l d0, (hint_min).w
+	move.b d0, (printed_hv_dump).w
+	move.w (mode).w, d0
+	eor.w  #$81, d0
+	move.w d0, (mode).w
+	move.w d0, (a1)
+	bra top
+
+not_pressed
+
+	move.b (printed_hv_dump).w, d0
+	bne top
+	move.b #1, (printed_hv_dump).w
+
+	moveq #0, d1
+	moveq #89, d4
+	moveq #6, d5
+	move.l #$45820002, d6
+	move.l d6, (a1)
+
+print_loop:
+	dbra d5, .no_line_change
+		 ;#$45820002
+	add.l #$00800000, d6
+	move.l d6, (a1)
+	moveq #5, d5
+.no_line_change
+	move.w #32, (a0)
+	move.w (a4)+, d0
+	bsr print_hexw
+	dbra d4, print_loop
+
+	add.l #$01020000, d6
+	move.l d6, (a1)
+	moveq #0, d0
+	lea Instructions(pc), a6
+	bsr print_string
+
+	bra top
+
+VBlankStart:
+	dc.b "VBlank Start: ", 0
+VBlankEnd:
+	dc.b "VBlank End:   ", 0
+LineChange:
+	dc.b "Line Change:  ", 0
+HBlankStart:
+	dc.b "HBlank Start: ", 0
+HBlankEnd:
+	dc.b "HBlank End:   ", 0
+HInterrupt:
+	dc.b "HInterrupt:   ", 0
+Instructions:
+	dc.b "Press Start to switch modes", 0
+
+	align 1
+;Prints a number in hex format
+;d0.w - number to print
+;d1.w - base tile attribute
+;a0 - VDP data port
+;
+;Clobbers: d2.l, d3.l
+;
+print_hexw:
+	moveq #3, d3
+.digitloop
+	rol.w #4, d0
+	moveq #$F, d2
+	and.b d0, d2
+	cmp.b #$A, d2
+	bge .hex
+	add.w #$30, d2
+	bra .makeattrib
+.hex
+	add.w #($41-$A), d2
+.makeattrib
+	add.w d1, d2
+	move.w d2, (a0)
+	dbra d3, .digitloop
+	rts
+
+;Prints a null terminated string
+;a6 - pointer to string
+;a0 - VDP data port
+;d0 - base tile attribute
+;
+;Clobbers: d1.w
+print_string:
+.loop
+	moveq #0, d1
+	move.b (a6)+, d1
+	beq .end
+	add.w d0, d1
+	move.w d1, (a0)
+	bra .loop
+.end
+	rts
+
+	align 1
+font:
+	incbin font.tiles
+fontend
+
--- a/gen_x86.c	Thu May 28 21:09:33 2015 -0700
+++ b/gen_x86.c	Thu May 28 21:19:55 2015 -0700
@@ -8,6 +8,8 @@
 #include <stddef.h>
 #include <stdio.h>
 #include <stdlib.h>
+#include <stdarg.h>
+#include <string.h>
 
 #define REX_RM_FIELD 0x1
 #define REX_SIB_FIELD 0x2
@@ -33,6 +35,7 @@
 #define OP_TEST 0x84
 #define OP_XCHG 0x86
 #define OP_MOV 0x88
+#define PRE_XOP 0x8F
 #define OP_XCHG_AX 0x90
 #define OP_CDQ 0x99
 #define OP_PUSHF 0x9C
@@ -127,6 +130,44 @@
 	X86_R15
 } x86_regs_enc;
 
+char * x86_reg_names[] = {
+#ifdef X86_64
+	"rax",
+	"rcx",
+	"rdx",
+	"rbx",
+	"rsp",
+	"rbp",
+	"rsi",
+	"rdi",
+#else
+	"eax",
+	"ecx",
+	"edx",
+	"ebx",
+	"esp",
+	"ebp",
+	"esi",
+	"edi",
+#endif
+	"ah",
+	"ch",
+	"dh",
+	"bh",
+	"r8",
+	"r9",
+	"r10",
+	"r11",
+	"r12",
+	"r13",
+	"r14",
+	"r15",
+};
+
+char * x86_sizes[] = {
+	"b", "w", "d", "q"
+};
+
 void jmp_nocheck(code_info *code, code_ptr dest)
 {
 	code_ptr out = code->cur;
@@ -187,6 +228,7 @@
 		src = tmp;
 	}
 	if (size == SZ_Q || src >= R8 || dst >= R8 || (size == SZ_B && src >= RSP && src <= RDI)) {
+#ifdef X86_64
 		*out = PRE_REX;
 		if (src >= AH && src <= BH || dst >= AH && dst <= BH) {
 			fprintf(stderr, "attempt to use *H reg in an instruction requiring REX prefix. opcode = %X\n", opcode);
@@ -204,6 +246,10 @@
 			dst -= (R8 - X86_R8);
 		}
 		out++;
+#else
+		fprintf(stderr, "Instruction requires REX prefix but this is a 32-bit build | opcode: %X, src: %s, dst: %s, size: %s\n", opcode, x86_reg_names[src], x86_reg_names[dst], x86_sizes[size]);
+		exit(1);
+#endif
 	}
 	if (size == SZ_B) {
 		if (src >= AH && src <= BH) {
@@ -235,6 +281,7 @@
 		*(out++) = PRE_SIZE;
 	}
 	if (size == SZ_Q || reg >= R8 || base >= R8 || (size == SZ_B && reg >= RSP && reg <= RDI)) {
+#ifdef X86_64
 		*out = PRE_REX;
 		if (reg >= AH && reg <= BH) {
 			fprintf(stderr, "attempt to use *H reg in an instruction requiring REX prefix. opcode = %X\n", opcode);
@@ -252,6 +299,10 @@
 			base -= (R8 - X86_R8);
 		}
 		out++;
+#else
+		fprintf(stderr, "Instruction requires REX prefix but this is a 32-bit build | opcode: %X, reg: %s, base: %s, size: %s\n", opcode, x86_reg_names[reg], x86_reg_names[base], x86_sizes[size]);
+		exit(1);
+#endif
 	}
 	if (size == SZ_B) {
 		if (reg >= AH && reg <= BH) {
@@ -268,7 +319,7 @@
 		*(out++) = opcode;
 	}
 	if (disp < 128 && disp >= -128) {
-		*(out++) = MODE_REG_DISPLACE8 | base | (reg << 3);
+	*(out++) = MODE_REG_DISPLACE8 | base | (reg << 3);
 	} else {
 		*(out++) = MODE_REG_DISPLACE32 | base | (reg << 3);
 	}
@@ -278,9 +329,9 @@
 	}
 	*(out++) = disp;
 	if (disp >= 128 || disp < -128) {
-		*(out++) = disp >> 8;
-		*(out++) = disp >> 16;
-		*(out++) = disp >> 24;
+	*(out++) = disp >> 8;
+	*(out++) = disp >> 16;
+	*(out++) = disp >> 24;
 	}
 	code->cur = out;
 }
@@ -295,6 +346,7 @@
 		*(out++) = PRE_SIZE;
 	}
 	if (size == SZ_Q || reg >= R8 || base >= R8 || (size == SZ_B && reg >= RSP && reg <= RDI)) {
+#ifdef X86_64
 		*out = PRE_REX;
 		if (reg >= AH && reg <= BH) {
 			fprintf(stderr, "attempt to use *H reg in an instruction requiring REX prefix. opcode = %X\n", opcode);
@@ -312,6 +364,10 @@
 			base -= (R8 - X86_R8);
 		}
 		out++;
+#else
+		fprintf(stderr, "Instruction requires REX prefix but this is a 32-bit build | opcode: %X, reg: %s, base: %s, size: %s\n", opcode, x86_reg_names[reg], x86_reg_names[base], x86_sizes[size]);
+		exit(1);
+#endif
 	}
 	if (size == SZ_B) {
 		if (reg >= AH && reg <= BH) {
@@ -321,11 +377,18 @@
 		opcode |= BIT_SIZE;
 	}
 	*(out++) = opcode | dir;
+	if (base == RBP) {
+		//add a dummy 8-bit displacement since MODE_REG_INDIRECT with
+		//an R/M field of RBP selects RIP, relative addressing
+		*(out++) = MODE_REG_DISPLACE8 | base | (reg << 3);
+		*(out++) = 0;
+	} else {
 	*(out++) = MODE_REG_INDIRECT | base | (reg << 3);
 	if (base == RSP) {
 		//add SIB byte, with no index and RSP as base
 		*(out++) = (RSP << 3) | RSP;
 	}
+	}
 	code->cur = out;
 }
 
@@ -339,6 +402,7 @@
 		*(out++) = PRE_SIZE;
 	}
 	if (size == SZ_Q || reg >= R8 || base >= R8 || (size == SZ_B && reg >= RSP && reg <= RDI)) {
+#ifdef X86_64
 		*out = PRE_REX;
 		if (reg >= AH && reg <= BH) {
 			fprintf(stderr, "attempt to use *H reg in an instruction requiring REX prefix. opcode = %X\n", opcode);
@@ -360,6 +424,10 @@
 			index -= (R8 - X86_R8);
 		}
 		out++;
+#else
+		fprintf(stderr, "Instruction requires REX prefix but this is a 32-bit build | opcode: %X, reg: %s, base: %s, size: %s\n", opcode, x86_reg_names[reg], x86_reg_names[base], x86_sizes[size]);
+		exit(1);
+#endif
 	}
 	if (size == SZ_B) {
 		if (reg >= AH && reg <= BH) {
@@ -373,7 +441,7 @@
 	if (scale == 4) {
 		scale = 2;
 	} else if(scale == 8) {
-		scale = 3;
+			scale = 3;
 	} else {
 		scale--;
 	}
@@ -390,6 +458,7 @@
 		*(out++) = PRE_SIZE;
 	}
 	if (size == SZ_Q || dst >= R8) {
+#ifdef X86_64
 		*out = PRE_REX;
 		if (dst >= AH && dst <= BH) {
 			fprintf(stderr, "attempt to use *H reg in an instruction requiring REX prefix. opcode = %X\n", opcode);
@@ -403,6 +472,10 @@
 			dst -= (R8 - X86_R8);
 		}
 		out++;
+#else
+		fprintf(stderr, "Instruction requires REX prefix but this is a 32-bit build | opcode: %X:%X, reg: %s, size: %s\n", opcode, opex, x86_reg_names[dst], x86_sizes[size]);
+		exit(1);
+#endif
 	}
 	if (size == SZ_B) {
 		if (dst >= AH && dst <= BH) {
@@ -425,6 +498,7 @@
 		*(out++) = PRE_SIZE;
 	}
 	if (size == SZ_Q || dst >= R8) {
+#ifdef X86_64
 		*out = PRE_REX;
 		if (size == SZ_Q) {
 			*out |= REX_QUAD;
@@ -434,14 +508,18 @@
 			dst -= (R8 - X86_R8);
 		}
 		out++;
+#else
+		fprintf(stderr, "Instruction requires REX prefix but this is a 32-bit build | opcode: %X:%X, reg: %s, size: %s\n", opcode, opex, x86_reg_names[dst], x86_sizes[size]);
+		exit(1);
+#endif
 	}
 	if (size != SZ_B) {
 		opcode |= BIT_SIZE;
 	}
 	*(out++) = opcode;
 	if (disp < 128 && disp >= -128) {
-		*(out++) = MODE_REG_DISPLACE8 | dst | (opex << 3);
-		*(out++) = disp;
+	*(out++) = MODE_REG_DISPLACE8 | dst | (opex << 3);
+	*(out++) = disp;
 	} else {
 		*(out++) = MODE_REG_DISPLACE32 | dst | (opex << 3);
 		*(out++) = disp;
@@ -468,12 +546,18 @@
 		if (size != SZ_B) {
 			al_opcode |= BIT_SIZE;
 			if (size == SZ_Q) {
+#ifdef X86_64
 				*out = PRE_REX | REX_QUAD;
+#else
+		fprintf(stderr, "Instruction requires REX prefix but this is a 32-bit build | opcode: %X, reg: %s, size: %s\n", al_opcode, x86_reg_names[dst], x86_sizes[size]);
+		exit(1);
+#endif
 			}
 		}
 		*(out++) = al_opcode | BIT_IMMED_RAX;
 	} else {
 		if (size == SZ_Q || dst >= R8 || (size == SZ_B && dst >= RSP && dst <= RDI)) {
+#ifdef X86_64
 			*out = PRE_REX;
 			if (size == SZ_Q) {
 				*out |= REX_QUAD;
@@ -483,6 +567,10 @@
 				dst -= (R8 - X86_R8);
 			}
 			out++;
+#else
+		fprintf(stderr, "Instruction requires REX prefix but this is a 32-bit build | opcode: %X:%X, reg: %s, size: %s\n", opcode, op_ex, x86_reg_names[dst], x86_sizes[size]);
+		exit(1);
+#endif
 		}
 		if (dst >= AH && dst <= BH) {
 			dst -= (AH-X86_AH);
@@ -521,6 +609,7 @@
 	}
 
 	if (size == SZ_Q || dst >= R8) {
+#ifdef X86_64
 		*out = PRE_REX;
 		if (size == SZ_Q) {
 			*out |= REX_QUAD;
@@ -530,23 +619,27 @@
 			dst -= (R8 - X86_R8);
 		}
 		out++;
+#else
+		fprintf(stderr, "Instruction requires REX prefix but this is a 32-bit build | opcode: %X:%X, reg: %s, size: %s\n", opcode, op_ex, x86_reg_names[dst], x86_sizes[size]);
+		exit(1);
+#endif
 	}
 	if (size != SZ_B) {
 		opcode |= BIT_SIZE;
 	}
 	*(out++) = opcode;
 	if (disp < 128 && disp >= -128) {
-		*(out++) = MODE_REG_DISPLACE8 | dst | (op_ex << 3);
-		*(out++) = disp;
+	*(out++) = MODE_REG_DISPLACE8 | dst | (op_ex << 3);
+	*(out++) = disp;
 	} else {
-		*(out++) = MODE_REG_DISPLACE32 | dst | (op_ex << 3);
-		*(out++) = disp;
-		disp >>= 8;
-		*(out++) = disp;
-		disp >>= 8;
-		*(out++) = disp;
-		disp >>= 8;
-		*(out++) = disp;
+	*(out++) = MODE_REG_DISPLACE32 | dst | (op_ex << 3);
+	*(out++) = disp;
+	disp >>= 8;
+	*(out++) = disp;
+	disp >>= 8;
+	*(out++) = disp;
+	disp >>= 8;
+	*(out++) = disp;
 	}
 	*(out++) = val;
 	if (size != SZ_B && !sign_extend) {
@@ -616,8 +709,8 @@
 
 	*(out++) = (val == 1 ? OP_SHIFTROT_1: OP_SHIFTROT_IR) | (size == SZ_B ? 0 : BIT_SIZE);
 	if (disp < 128 && disp >= -128) {
-		*(out++) = MODE_REG_DISPLACE8 | dst | (op_ex << 3);
-		*(out++) = disp;
+	*(out++) = MODE_REG_DISPLACE8 | dst | (op_ex << 3);
+	*(out++) = disp;
 	} else {
 		*(out++) = MODE_REG_DISPLACE32 | dst | (op_ex << 3);
 		*(out++) = disp;
@@ -682,15 +775,15 @@
 
 	*(out++) = OP_SHIFTROT_CL | (size == SZ_B ? 0 : BIT_SIZE);
 	if (disp < 128 && disp >= -128) {
-		*(out++) = MODE_REG_DISPLACE8 | dst | (op_ex << 3);
-		*(out++) = disp;
+	*(out++) = MODE_REG_DISPLACE8 | dst | (op_ex << 3);
+	*(out++) = disp;
 	} else {
 		*(out++) = MODE_REG_DISPLACE32 | dst | (op_ex << 3);
 		*(out++) = disp;
 		*(out++) = disp >> 8;
 		*(out++) = disp >> 16;
 		*(out++) = disp >> 24;
-	}
+}
 	code->cur = out;
 }
 
@@ -1243,8 +1336,8 @@
 	}
 	*(out++) = OP_MOV_IEA | (size == SZ_B ? 0 : BIT_SIZE);
 	if (disp < 128 && disp >= -128) {
-		*(out++) = MODE_REG_DISPLACE8 | dst;
-		*(out++) = disp;
+	*(out++) = MODE_REG_DISPLACE8 | dst;
+	*(out++) = disp;
 	} else {
 		*(out++) = MODE_REG_DISPLACE32 | dst;
 		*(out++) = disp;
@@ -1366,8 +1459,8 @@
 		*(out++) = OP2_MOVSX | (src_size == SZ_B ? 0 : BIT_SIZE);
 	}
 	if (disp < 128 && disp >= -128) {
-		*(out++) = MODE_REG_DISPLACE8 | src | (dst << 3);
-		*(out++) = disp;
+	*(out++) = MODE_REG_DISPLACE8 | src | (dst << 3);
+	*(out++) = disp;
 	} else {
 		*(out++) = MODE_REG_DISPLACE32 | src | (dst << 3);
 		*(out++) = disp;
@@ -1431,8 +1524,8 @@
 	*(out++) = PRE_2BYTE;
 	*(out++) = OP2_MOVZX | (src_size == SZ_B ? 0 : BIT_SIZE);
 	if (disp < 128 && disp >= -128) {
-		*(out++) = MODE_REG_DISPLACE8 | src | (dst << 3);
-		*(out++) = disp;
+	*(out++) = MODE_REG_DISPLACE8 | src | (dst << 3);
+	*(out++) = disp;
 	} else {
 		*(out++) = MODE_REG_DISPLACE32 | src | (dst << 3);
 		*(out++) = disp;
@@ -1516,6 +1609,13 @@
 	code->cur = out;
 }
 
+void push_rdisp(code_info *code, uint8_t base, int32_t disp)
+{
+	//This instruction has no explicit size, so we pass SZ_B
+	//to avoid any prefixes or bits being set
+	x86_rdisp_size(code, OP_SINGLE_EA, OP_EX_PUSH_EA, base, disp, SZ_B);
+}
+
 void pop_r(code_info *code, uint8_t reg)
 {
 	check_alloc_code(code, 2);
@@ -1528,6 +1628,19 @@
 	code->cur = out;
 }
 
+void pop_rind(code_info *code, uint8_t reg)
+{
+	check_alloc_code(code, 3);
+	code_ptr out = code->cur;
+	if (reg >= R8) {
+		*(out++) = PRE_REX | REX_RM_FIELD;
+		reg -= R8 - X86_R8;
+	}
+	*(out++) = PRE_XOP;
+	*(out++) = MODE_REG_INDIRECT | reg;
+	code->cur = out;
+}
+
 void setcc_r(code_info *code, uint8_t cc, uint8_t dst)
 {
 	check_alloc_code(code, 4);
@@ -1571,8 +1684,8 @@
 	*(out++) = PRE_2BYTE;
 	*(out++) = OP2_SETCC | cc;
 	if (disp < 128 && disp >= -128) {
-		*(out++) = MODE_REG_DISPLACE8 | dst;
-		*(out++) = disp;
+	*(out++) = MODE_REG_DISPLACE8 | dst;
+	*(out++) = disp;
 	} else {
 		*(out++) = MODE_REG_DISPLACE32 | dst;
 		*(out++) = disp;
@@ -1636,14 +1749,14 @@
 	*(out++) = PRE_2BYTE;
 	*(out++) = op2;
 	if (dst_disp < 128 && dst_disp >= -128) {
-		*(out++) = MODE_REG_DISPLACE8 | dst_base | (src << 3);
-		*(out++) = dst_disp;
+	*(out++) = MODE_REG_DISPLACE8 | dst_base | (src << 3);
+	*(out++) = dst_disp;
 	} else {
-		*(out++) = MODE_REG_DISPLACE32 | dst_base | (src << 3);
-		*(out++) = dst_disp;
-		*(out++) = dst_disp >> 8;
-		*(out++) = dst_disp >> 16;
-		*(out++) = dst_disp >> 24;
+	*(out++) = MODE_REG_DISPLACE32 | dst_base | (src << 3);
+	*(out++) = dst_disp;
+	*(out++) = dst_disp >> 8;
+	*(out++) = dst_disp >> 16;
+	*(out++) = dst_disp >> 24;
 	}
 	code->cur = out;
 }
@@ -1694,8 +1807,8 @@
 	*(out++) = PRE_2BYTE;
 	*(out++) = OP2_BTX_I;
 	if (dst_disp < 128 && dst_disp >= -128) {
-		*(out++) = MODE_REG_DISPLACE8 | dst_base | (op_ex << 3);
-		*(out++) = dst_disp;
+	*(out++) = MODE_REG_DISPLACE8 | dst_base | (op_ex << 3);
+	*(out++) = dst_disp;
 	} else {
 		*(out++) = MODE_REG_DISPLACE32 | dst_base | (op_ex << 3);
 		*(out++) = dst_disp;
@@ -1855,6 +1968,19 @@
 	code->cur = out;
 }
 
+void jmp_rind(code_info *code, uint8_t dst)
+{
+	check_alloc_code(code, 3);
+	code_ptr out = code->cur;
+	if (dst >= R8) {
+		dst -= R8 - X86_R8;
+		*(out++) = PRE_REX | REX_RM_FIELD;
+	}
+	*(out++) = OP_SINGLE_EA;
+	*(out++) = MODE_REG_INDIRECT | dst | (OP_EX_JMP_EA << 3);
+	code->cur = out;
+}
+
 void call(code_info *code, code_ptr fun)
 {
 	check_alloc_code(code, 5);
@@ -1912,3 +2038,198 @@
 	code->cur = out;
 }
 
+uint32_t prep_args(code_info *code, uint32_t num_args, va_list args)
+{
+	uint8_t *arg_arr = malloc(num_args);
+	for (int i = 0; i < num_args; i ++)
+	{
+		arg_arr[i] = va_arg(args, int);
+	}
+#ifdef X86_64
+	uint32_t stack_args = 0;
+	uint8_t abi_regs[] = {RDI, RSI, RDX, RCX, R8, R9};
+	int8_t reg_swap[R15+1];
+	uint32_t usage = 0;
+	memset(reg_swap, -1, sizeof(reg_swap));
+	for (int i = 0; i < num_args; i ++)
+	{
+		usage |= 1 << arg_arr[i];
+	}
+	for (int i = 0; i < num_args; i ++)
+	{
+		uint8_t reg_arg = arg_arr[i];
+		if (i < sizeof(abi_regs)) {
+			if (reg_swap[reg_arg] >= 0) {
+				reg_arg = reg_swap[reg_arg];
+			}
+			if (reg_arg != abi_regs[i]) {
+				if (usage & (1 << abi_regs[i])) {
+					xchg_rr(code, reg_arg, abi_regs[i], SZ_PTR);
+					reg_swap[abi_regs[i]] = reg_arg;
+				} else {
+					mov_rr(code, reg_arg, abi_regs[i], SZ_PTR);
+				}
+			}
+		} else {
+			arg_arr[stack_args++] = reg_arg;
+		}
+	}
+#else
+#define stack_args num_args
+#endif
+	for (int i = stack_args -1; i >= 0; i--)
+	{
+		push_r(code, arg_arr[i]);
+	}
+
+	return stack_args * sizeof(void *);
+}
+
+void call_args(code_info *code, code_ptr fun, uint32_t num_args, ...)
+{
+	va_list args;
+	va_start(args, num_args);
+	uint32_t adjust = prep_args(code, num_args, args);
+	va_end(args);
+	call(code, fun);
+	if (adjust) {
+		add_ir(code, adjust, RSP, SZ_PTR);
+	}
+}
+
+void call_args_abi(code_info *code, code_ptr fun, uint32_t num_args, ...)
+{
+	va_list args;
+	va_start(args, num_args);
+	uint32_t adjust = prep_args(code, num_args, args);
+	va_end(args);
+#ifdef X86_64
+	test_ir(code, 8, RSP, SZ_PTR); //check stack alignment
+	code_ptr do_adjust_rsp = code->cur + 1;
+	jcc(code, CC_NZ, code->cur + 2);
+#endif
+	call(code, fun);
+	if (adjust) {
+		add_ir(code, adjust, RSP, SZ_PTR);
+	}
+#ifdef X86_64
+	code_ptr no_adjust_rsp = code->cur + 1;
+	jmp(code, code->cur + 2);
+	*do_adjust_rsp = code->cur - (do_adjust_rsp+1);
+	sub_ir(code, 8, RSP, SZ_PTR);
+	call(code, fun);
+	add_ir(code, adjust + 8 , RSP, SZ_PTR);
+	*no_adjust_rsp = code->cur - (no_adjust_rsp+1);
+#endif
+}
+
+void save_callee_save_regs(code_info *code)
+{
+	push_r(code, RBX);
+	push_r(code, RBP);
+#ifdef X86_64
+	push_r(code, R12);
+	push_r(code, R13);
+	push_r(code, R14);
+	push_r(code, R15);
+#else
+	push_r(code, RDI);
+	push_r(code, RSI);
+#endif
+}
+
+void restore_callee_save_regs(code_info *code)
+{
+#ifdef X86_64
+	pop_r(code, R15);
+	pop_r(code, R14);
+	pop_r(code, R13);
+	pop_r(code, R12);
+#else
+	pop_r(code, RSI);
+	pop_r(code, RDI);
+#endif
+	pop_r(code, RBP);
+	pop_r(code, RBX);
+}
+
+uint8_t has_modrm(uint8_t prefix, uint8_t opcode)
+{
+	if (!prefix) {
+		switch (opcode)
+		{
+		case OP_JMP:
+		case OP_JMP_BYTE:
+		case OP_JCC:
+		case OP_CALL:
+		case OP_RETN:
+		case OP_LOOP:
+		case OP_MOV_I8R:
+		case OP_MOV_IR:
+		case OP_PUSHF:
+		case OP_POPF:
+		case OP_PUSH:
+		case OP_POP:
+		case OP_CDQ:
+			return 0;
+		}
+	} else if (prefix == PRE_2BYTE) {
+		switch (opcode)
+		{
+		case OP2_JCC:
+			return 0;
+		}
+	}
+	return 1;
+}
+
+uint8_t has_sib(uint8_t mod_rm)
+{
+	uint8_t mode = mod_rm & 0xC0;
+	uint8_t rm = mod_rm & 3;
+
+	return mode != MODE_REG_DIRECT && rm == RSP;
+}
+
+uint32_t x86_inst_size(code_ptr start)
+{
+	code_ptr code = start;
+	uint8_t cont = 1;
+	uint8_t prefix = 0;
+	uint8_t op_size = SZ_B;
+	uint8_t main_op;
+
+	while (cont)
+	{
+		if (*code == PRE_SIZE) {
+			op_size = SZ_W;
+		} else if (*code == PRE_REX) {
+			if (*code & REX_QUAD) {
+				op_size = SZ_Q;
+			}
+		} else if(*code == PRE_2BYTE || PRE_XOP) {
+			prefix = *code;
+		} else {
+			main_op = *code;
+			cont = 0;
+		}
+		code++;
+	}
+	if (has_modrm(prefix, main_op)) {
+		uint8_t mod_rm = *(code++);
+		if (has_sib(mod_rm)) {
+			//sib takes up a byte, but can't add any additional ones beyond that
+			code++;
+		}
+		uint8_t mode = mod_rm & 0xC0;
+		uint8_t rm = mod_rm & 3;
+		if (mode == MODE_REG_DISPLACE8) {
+			code++;
+		} else if (mode == MODE_REG_DISPLACE32 || (mode == MODE_REG_INDIRECT && rm == RBP)) {
+			code += 4;
+		}
+	} else {
+	}
+
+	return code-start;
+}
--- a/gen_x86.h	Thu May 28 21:09:33 2015 -0700
+++ b/gen_x86.h	Thu May 28 21:19:55 2015 -0700
@@ -36,7 +36,9 @@
 	CC_O = 0,
 	CC_NO,
 	CC_C,
+	CC_B = CC_C,
 	CC_NC,
+	CC_NB = CC_NC,
 	CC_Z,
 	CC_NZ,
 	CC_BE,
@@ -78,8 +80,6 @@
 	MODE_IMMED = 0xFF
 } x86_modes;
 
-void check_alloc_code(code_info *code, uint32_t inst_size);
-
 void rol_ir(code_info *code, uint8_t val, uint8_t dst, uint8_t size);
 void ror_ir(code_info *code, uint8_t val, uint8_t dst, uint8_t size);
 void rcl_ir(code_info *code, uint8_t val, uint8_t dst, uint8_t size);
@@ -186,7 +186,9 @@
 void pushf(code_info *code);
 void popf(code_info *code);
 void push_r(code_info *code, uint8_t reg);
+void push_rdisp(code_info *code, uint8_t base, int32_t disp);
 void pop_r(code_info *code, uint8_t reg);
+void pop_rind(code_info *code, uint8_t reg);
 void setcc_r(code_info *code, uint8_t cc, uint8_t dst);
 void setcc_rind(code_info *code, uint8_t cc, uint8_t dst);
 void setcc_rdisp(code_info *code, uint8_t cc, uint8_t dst, int32_t disp);
@@ -207,6 +209,7 @@
 void btc_ir(code_info *code, uint8_t val, uint8_t dst, uint8_t size);
 void btc_irdisp(code_info *code, uint8_t val, uint8_t dst_base, int32_t dst_disp, uint8_t size);
 void jcc(code_info *code, uint8_t cc, code_ptr dest);
+void jmp_rind(code_info *code, uint8_t dst);
 void call_r(code_info *code, uint8_t dst);
 void retn(code_info *code);
 void cdq(code_info *code);
--- a/gentests.py	Thu May 28 21:09:33 2015 -0700
+++ b/gentests.py	Thu May 28 21:19:55 2015 -0700
@@ -178,9 +178,17 @@
 					self.disp -= (address & 0xFFFFFF)
 				else:
 					self.disp += 0xE00000-(address & 0xFFFFFF)
+				if self.disp > 127:
+					self.disp = 127
+				elif self.disp < -128:
+					self.disp = -128
 				address = base + index + self.disp
 			elif (address & 0xFFFFFF) > 0xFFFFFC:
 				self.disp -= (address & 0xFFFFFF) - 0xFFFFFC
+				if self.disp > 127:
+					self.disp = 127
+				elif self.disp < -128:
+					self.disp = -128
 				address = base + index + self.disp
 			if size != 'b' and address & 1:
 				self.disp = self.disp ^ 1
--- a/gst.c	Thu May 28 21:09:33 2015 -0700
+++ b/gst.c	Thu May 28 21:19:55 2015 -0700
@@ -1,6 +1,6 @@
 /*
  Copyright 2013 Michael Pavone
- This file is part of BlastEm. 
+ This file is part of BlastEm.
  BlastEm is free software distributed under the terms of the GNU General Public License version 3 or greater. See COPYING for full license text.
 */
 #include "gst.h"
@@ -207,8 +207,8 @@
 	curpos += 2;
 	context->iff1 = context->iff2 = *curpos;
 	curpos += 2;
-	reset = !*(curpos++);
-	busreq = *curpos;
+	context->reset = !*(curpos++);
+	context->busreq = *curpos;
 	curpos += 3;
 	uint32_t bank = read_le_32(curpos);
 	if (bank < 0x400000) {
@@ -350,8 +350,8 @@
 	curpos += 2;
 	*curpos = context->iff1;
 	curpos += 2;
-	*(curpos++) = !reset;
-	*curpos = busreq;
+	*(curpos++) = !context->reset;
+	*curpos = context->busreq;
 	curpos += 3;
 	uint32_t bank = context->bank_reg << 15;
 	write_le_32(curpos, bank);
@@ -423,7 +423,7 @@
 		fprintf(stderr, "Could not read ident code from %s\n", fname);
 		goto error_close;
 	}
-	if (memcmp(ident, "GST\x40\xE0", 5) != 0) {
+	if (memcmp(ident, "GST\x40\xE0", 3) != 0) {
 		fprintf(stderr, "%s doesn't appear to be a GST savestate. The ident code is %c%c%c\\x%X\\x%X instead of GST\\x40\\xE0.\n", fname, ident[0], ident[1], ident[2], ident[3], ident[4]);
 		goto error_close;
 	}
--- a/io.c	Thu May 28 21:09:33 2015 -0700
+++ b/io.c	Thu May 28 21:19:55 2015 -0700
@@ -3,15 +3,44 @@
  This file is part of BlastEm.
  BlastEm is free software distributed under the terms of the GNU General Public License version 3 or greater. See COPYING for full license text.
 */
+#include <unistd.h>
+#include <fcntl.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <errno.h>
+#include <string.h>
+
 #include "io.h"
 #include "blastem.h"
 #include "render.h"
 
+const char * device_type_names[] = {
+	"3-button gamepad",
+	"6-button gamepad",
+	"Mega Mouse",
+	"Menacer",
+	"Justifier",
+	"Sega multi-tap",
+	"EA 4-way Play cable A",
+	"EA 4-way Play cable B",
+	"Sega Parallel Transfer Board",
+	"Generic Device",
+	"None"
+};
+
 enum {
 	BIND_NONE,
+	BIND_UI,
 	BIND_GAMEPAD1,
 	BIND_GAMEPAD2,
-	BIND_UI
+	BIND_GAMEPAD3,
+	BIND_GAMEPAD4,
+	BIND_GAMEPAD5,
+	BIND_GAMEPAD6,
+	BIND_GAMEPAD7,
+	BIND_GAMEPAD8
 };
 
 typedef enum {
@@ -26,6 +55,7 @@
 } ui_action;
 
 typedef struct {
+	io_port *port;
 	uint8_t bind_type;
 	uint8_t subtype_a;
 	uint8_t subtype_b;
@@ -117,7 +147,7 @@
 void bind_gamepad(int keycode, int gamepadnum, int button)
 {
 
-	if (gamepadnum < 1 || gamepadnum > 2) {
+	if (gamepadnum < 1 || gamepadnum > 8) {
 		return;
 	}
 	uint8_t bind_type = gamepadnum - 1 + BIND_GAMEPAD1;
@@ -126,7 +156,7 @@
 
 void bind_button_gamepad(int joystick, int joybutton, int gamepadnum, int padbutton)
 {
-	if (gamepadnum < 1 || gamepadnum > 2) {
+	if (gamepadnum < 1 || gamepadnum > 8) {
 		return;
 	}
 	uint8_t bind_type = gamepadnum - 1 + BIND_GAMEPAD1;
@@ -135,7 +165,7 @@
 
 void bind_dpad_gamepad(int joystick, int dpad, uint8_t direction, int gamepadnum, int button)
 {
-	if (gamepadnum < 1 || gamepadnum > 2) {
+	if (gamepadnum < 1 || gamepadnum > 8) {
 		return;
 	}
 	uint8_t bind_type = gamepadnum - 1 + BIND_GAMEPAD1;
@@ -159,17 +189,14 @@
 
 void handle_binding_down(keybinding * binding)
 {
-	switch(binding->bind_type)
+	if (binding->bind_type >= BIND_GAMEPAD1)
 	{
-	case BIND_GAMEPAD1:
-	case BIND_GAMEPAD2:
-		if (binding->subtype_a <= GAMEPAD_EXTRA) {
-			genesis->ports[binding->bind_type - BIND_GAMEPAD1].input[binding->subtype_a] |= binding->value;
+		if (binding->subtype_a <= GAMEPAD_EXTRA && binding->port) {
+			binding->port->input[binding->subtype_a] |= binding->value;
 		}
-		if (binding->subtype_b <= GAMEPAD_EXTRA) {
-			genesis->ports[binding->bind_type - BIND_GAMEPAD1].input[binding->subtype_b] |= binding->value;
+		if (binding->subtype_b <= GAMEPAD_EXTRA && binding->port) {
+			binding->port->input[binding->subtype_b] |= binding->value;
 		}
-		break;
 	}
 }
 
@@ -206,11 +233,11 @@
 	{
 	case BIND_GAMEPAD1:
 	case BIND_GAMEPAD2:
-		if (binding->subtype_a <= GAMEPAD_EXTRA) {
-			genesis->ports[binding->bind_type - BIND_GAMEPAD1].input[binding->subtype_a] &= ~binding->value;
+		if (binding->subtype_a <= GAMEPAD_EXTRA && binding->port) {
+			binding->port->input[binding->subtype_a] &= ~binding->value;
 		}
-		if (binding->subtype_b <= GAMEPAD_EXTRA) {
-			genesis->ports[binding->bind_type - BIND_GAMEPAD1].input[binding->subtype_b] &= ~binding->value;
+		if (binding->subtype_b <= GAMEPAD_EXTRA && binding->port) {
+			binding->port->input[binding->subtype_b] &= ~binding->value;
 		}
 		break;
 	case BIND_UI:
@@ -228,7 +255,7 @@
 			if (ui_debug_pal == 4) {
 				ui_debug_pal = 0;
 			}
-			render_debug_pal(ui_debug_pal);
+			genesis->vdp->debug_pal = ui_debug_pal;
 			break;
 		case UI_ENTER_DEBUGGER:
 			break_on_sync = 1;
@@ -447,7 +474,169 @@
 	}
 }
 
-void set_keybindings()
+void process_device(char * device_type, io_port * port)
+{
+	port->device_type = IO_NONE;
+	if (!device_type)
+	{
+		return;
+	}
+
+	const int gamepad_len = strlen("gamepad");
+	if (!memcmp(device_type, "gamepad", gamepad_len))
+	{
+		if (
+			(device_type[gamepad_len] != '3' && device_type[gamepad_len] != '6')
+			|| device_type[gamepad_len+1] != '.' || device_type[gamepad_len+2] < '1'
+			|| device_type[gamepad_len+2] > '8' || device_type[gamepad_len+3] != 0
+		)
+		{
+			fprintf(stderr, "%s is not a valid gamepad type\n", device_type);
+		} else if (device_type[gamepad_len] == '3')
+		{
+			port->device_type = IO_GAMEPAD3;
+		} else {
+			port->device_type = IO_GAMEPAD6;
+		}
+		port->device.pad.gamepad_num = device_type[gamepad_len+2] - '1';
+	} else if(!strcmp(device_type, "sega_parallel")) {
+		port->device_type = IO_SEGA_PARALLEL;
+		port->device.stream.data_fd = -1;
+		port->device.stream.listen_fd = -1;
+	} else if(!strcmp(device_type, "generic")) {
+		port->device_type = IO_GENERIC;
+		port->device.stream.data_fd = -1;
+		port->device.stream.listen_fd = -1;
+	}
+}
+
+char * io_name(int i)
+{
+	switch (i)
+	{
+	case 0:
+		return "1";
+	case 1:
+		return "2";
+	case 2:
+		return "EXT";
+	default:
+		return "invalid";
+	}
+}
+
+static char * sockfile_name;
+static void cleanup_sockfile()
+{
+	unlink(sockfile_name);
+}
+
+void setup_io_devices(tern_node * config, io_port * ports)
+{
+	tern_node *io_nodes = tern_find_prefix(config, "iodevices");
+	char * io_1 = tern_find_ptr(io_nodes, "1");
+	char * io_2 = tern_find_ptr(io_nodes, "2");
+	char * io_ext = tern_find_ptr(io_nodes, "ext");
+
+	process_device(io_1, ports);
+	process_device(io_2, ports+1);
+	process_device(io_ext, ports+2);
+
+	for (int i = 0; i < 3; i++)
+	{
+
+		if (ports[i].device_type == IO_SEGA_PARALLEL)
+		{
+			char *pipe_name = tern_find_ptr(config, "ioparallel_pipe");
+			if (!pipe_name)
+			{
+				fprintf(stderr, "IO port %s is configured to use the sega parallel board, but no paralell_pipe is set!\n", io_name(i));
+				ports[i].device_type = IO_NONE;
+			} else {
+				printf("IO port: %s connected to device '%s' with pipe name: %s\n", io_name(i), device_type_names[ports[i].device_type], pipe_name);
+				if (!strcmp("stdin", pipe_name))
+				{
+					ports[i].device.stream.data_fd = STDIN_FILENO;
+				} else {
+					if (mkfifo(pipe_name, 0666) && errno != EEXIST)
+					{
+						fprintf(stderr, "Failed to create fifo %s for Sega parallel board emulation: %d %s\n", pipe_name, errno, strerror(errno));
+						ports[i].device_type = IO_NONE;
+					} else {
+						ports[i].device.stream.data_fd = open(pipe_name, O_NONBLOCK | O_RDONLY);
+						if (ports[i].device.stream.data_fd == -1)
+						{
+							fprintf(stderr, "Failed to open fifo %s for Sega parallel board emulation: %d %s\n", pipe_name, errno, strerror(errno));
+							ports[i].device_type = IO_NONE;
+						}
+					}
+				}
+			}
+		} else if (ports[i].device_type == IO_GENERIC) {
+			char *sock_name = tern_find_ptr(config, "iosocket");
+			if (!sock_name)
+			{
+				fprintf(stderr, "IO port %s is configured to use generic IO, but no socket is set!\n", io_name(i));
+				ports[i].device_type = IO_NONE;
+			} else {
+				printf("IO port: %s connected to device '%s' with socket name: %s\n", io_name(i), device_type_names[ports[i].device_type], sock_name);
+				ports[i].device.stream.data_fd = -1;
+				ports[i].device.stream.listen_fd = socket(AF_UNIX, SOCK_STREAM, 0);
+				size_t pathlen = strlen(sock_name);
+				size_t addrlen = offsetof(struct sockaddr_un, sun_path) + pathlen + 1;
+				struct sockaddr_un *saddr = malloc(addrlen);
+				saddr->sun_family = AF_UNIX;
+				memcpy(saddr->sun_path, sock_name, pathlen+1);
+				if (bind(ports[i].device.stream.listen_fd, (struct sockaddr *)saddr, addrlen))
+				{
+					fprintf(stderr, "Failed to bind socket for IO Port %s to path %s: %d %s\n", io_name(i), sock_name, errno, strerror(errno));
+					goto cleanup_sock;
+				}
+				if (listen(ports[i].device.stream.listen_fd, 1))
+				{
+					fprintf(stderr, "Failed to listen on socket for IO Port %s: %d %s\n", io_name(i), errno, strerror(errno));
+					goto cleanup_sockfile;
+				}
+				sockfile_name = sock_name;
+				atexit(cleanup_sockfile);
+				continue;
+cleanup_sockfile:
+				unlink(sock_name);
+cleanup_sock:
+				close(ports[i].device.stream.listen_fd);
+				ports[i].device_type = IO_NONE;
+			}
+		} else if (ports[i].device_type == IO_GAMEPAD3 || ports[i].device_type == IO_GAMEPAD6) {
+			printf("IO port %s connected to gamepad #%d with type '%s'\n", io_name(i), ports[i].device.pad.gamepad_num + 1, device_type_names[ports[i].device_type]);
+		} else {
+			printf("IO port %s connected to device '%s'\n", io_name(i), device_type_names[ports[i].device_type]);
+		}
+	}
+}
+
+void map_bindings(io_port *ports, keybinding *bindings, int numbindings)
+{
+	for (int i = 0; i < numbindings; i++)
+	{
+		if (bindings[i].bind_type >= BIND_GAMEPAD1)
+		{
+			int num = bindings[i].bind_type - BIND_GAMEPAD1;
+			for (int j = 0; j < 3; j++)
+			{
+				if ((ports[j].device_type == IO_GAMEPAD3
+					 || ports[j].device_type ==IO_GAMEPAD6)
+					 && ports[j].device.pad.gamepad_num == num
+				)
+				{
+					bindings[i].port = ports + j;
+					break;
+				}
+			}
+		}
+	}
+}
+
+void set_keybindings(io_port *ports)
 {
 	tern_node * special = tern_insert_int(NULL, "up", RENDERKEY_UP);
 	special = tern_insert_int(special, "down", RENDERKEY_DOWN);
@@ -532,76 +721,245 @@
 	speeds = malloc(sizeof(uint32_t));
 	speeds[0] = 100;
 	process_speeds(speed_nodes, NULL);
-	for (int i = 0; i < num_speeds; i++) {
+	for (int i = 0; i < num_speeds; i++)
+	{
 		if (!speeds[i]) {
 			fprintf(stderr, "Speed index %d was not set to a valid percentage!", i);
 			speeds[i] = 100;
 		}
 	}
+	for (int bucket = 0; bucket < 256; bucket++)
+	{
+		if (bindings[bucket])
+		{
+			map_bindings(ports, bindings[bucket], 256);
+		}
+	}
+	for (int stick = 0; stick < MAX_JOYSTICKS; stick++)
+	{
+		if (joybindings[stick])
+		{
+			int numbuttons = render_joystick_num_buttons(stick);
+			map_bindings(ports, joybindings[stick], render_joystick_num_buttons(stick));
+		}
+		if (joydpads[stick])
+		{
+			map_bindings(ports, joydpads[stick]->bindings, 4);
+		}
+	}
 }
 
 #define TH 0x40
-#define TH_TIMEOUT 8000
+#define TH_TIMEOUT 56000
 
-void io_adjust_cycles(io_port * pad, uint32_t current_cycle, uint32_t deduction)
+void io_adjust_cycles(io_port * port, uint32_t current_cycle, uint32_t deduction)
 {
 	/*uint8_t control = pad->control | 0x80;
 	uint8_t th = control & pad->output;
 	if (pad->input[GAMEPAD_TH0] || pad->input[GAMEPAD_TH1]) {
 		printf("adjust_cycles | control: %X, TH: %X, GAMEPAD_TH0: %X, GAMEPAD_TH1: %X, TH Counter: %d, Timeout: %d, Cycle: %d\n", control, th, pad->input[GAMEPAD_TH0], pad->input[GAMEPAD_TH1], pad->th_counter,pad->timeout_cycle, current_cycle);
 	}*/
-	if (current_cycle >= pad->timeout_cycle) {
-		pad->th_counter = 0;
-	} else {
-		pad->timeout_cycle -= deduction;
+	if (port->device_type == IO_GAMEPAD6)
+	{
+		if (current_cycle >= port->device.pad.timeout_cycle)
+		{
+			port->device.pad.th_counter = 0;
+		} else {
+			port->device.pad.timeout_cycle -= deduction;
+		}
+	}
+}
+
+static void wait_for_connection(io_port * port)
+{
+	if (port->device.stream.data_fd == -1)
+	{
+		puts("Waiting for socket connection...");
+		port->device.stream.data_fd = accept(port->device.stream.listen_fd, NULL, NULL);
+		fcntl(port->device.stream.data_fd, F_SETFL, O_NONBLOCK | O_RDWR);
+	}
+}
+
+static void service_pipe(io_port * port)
+{
+	uint8_t value;
+	int numRead = read(port->device.stream.data_fd, &value, sizeof(value));
+	if (numRead > 0)
+	{
+		port->input[IO_TH0] = (value & 0xF) | 0x10;
+		port->input[IO_TH1] = (value >> 4) | 0x10;
+	} else if(numRead == -1 && errno != EAGAIN && errno != EWOULDBLOCK) {
+		fprintf(stderr, "Error reading pipe for IO port: %d %s\n", errno, strerror(errno));
 	}
 }
 
-void io_data_write(io_port * pad, uint8_t value, uint32_t current_cycle)
+static void service_socket(io_port *port)
 {
-	if (pad->control & TH) {
-		//check if TH has changed
-		if ((pad->output & TH) ^ (value & TH)) {
-			if (current_cycle >= pad->timeout_cycle) {
-				pad->th_counter = 0;
+	uint8_t buf[32];
+	uint8_t blocking = 0;
+	int numRead = 0;
+	while (numRead <= 0)
+	{
+		numRead = recv(port->device.stream.data_fd, buf, sizeof(buf), 0);
+		if (numRead > 0)
+		{
+			port->input[IO_TH0] = buf[numRead-1];
+			if (port->input[IO_STATE] == IO_READ_PENDING)
+			{
+				port->input[IO_STATE] = IO_READ;
+				if (blocking)
+				{
+					//pending read satisfied, back to non-blocking mode
+					fcntl(port->device.stream.data_fd, F_SETFL, O_RDWR | O_NONBLOCK);
+				}
+			} else if (port->input[IO_STATE] == IO_WRITTEN) {
+				port->input[IO_STATE] = IO_READ;
 			}
-			if (!(value & TH)) {
-				pad->th_counter++;
+		} else if (numRead == 0) {
+			port->device.stream.data_fd = -1;
+			wait_for_connection(port);
+		} else if (errno != EAGAIN && errno != EWOULDBLOCK) {
+			fprintf(stderr, "Error reading from socket for IO port: %d %s\n", errno, strerror(errno));
+			close(port->device.stream.data_fd);
+			wait_for_connection(port);
+		} else if (port->input[IO_STATE] == IO_READ_PENDING) {
+			//clear the nonblocking flag so the next read will block
+			if (!blocking)
+			{
+				fcntl(port->device.stream.data_fd, F_SETFL, O_RDWR);
+				blocking = 1;
 			}
-			pad->timeout_cycle = current_cycle + TH_TIMEOUT;
+		} else {
+			//no new data, but that's ok
+			break;
 		}
 	}
-	pad->output = value;
+
+	if (port->input[IO_STATE] == IO_WRITE_PENDING)
+	{
+		uint8_t value = port->output & port->control;
+		int written = 0;
+		blocking = 0;
+		while (written <= 0)
+		{
+			send(port->device.stream.data_fd, &value, sizeof(value), 0);
+			if (written > 0)
+			{
+				port->input[IO_STATE] = IO_WRITTEN;
+				if (blocking)
+				{
+					//pending write satisfied, back to non-blocking mode
+					fcntl(port->device.stream.data_fd, F_SETFL, O_RDWR | O_NONBLOCK);
+				}
+			} else if (written == 0) {
+				port->device.stream.data_fd = -1;
+				wait_for_connection(port);
+			} else if (errno != EAGAIN && errno != EWOULDBLOCK) {
+				fprintf(stderr, "Error writing to socket for IO port: %d %s\n", errno, strerror(errno));
+				close(port->device.stream.data_fd);
+				wait_for_connection(port);
+			} else {
+				//clear the nonblocking flag so the next write will block
+				if (!blocking)
+				{
+					fcntl(port->device.stream.data_fd, F_SETFL, O_RDWR);
+					blocking = 1;
+				}
+			}
+		}
+	}
 }
 
-uint8_t io_data_read(io_port * pad, uint32_t current_cycle)
+void io_data_write(io_port * port, uint8_t value, uint32_t current_cycle)
 {
-	uint8_t control = pad->control | 0x80;
-	uint8_t th = control & pad->output;
+	switch (port->device_type)
+	{
+	case IO_GAMEPAD6:
+		if (port->control & TH) {
+			//check if TH has changed
+			if ((port->output & TH) ^ (value & TH)) {
+				if (current_cycle >= port->device.pad.timeout_cycle) {
+					port->device.pad.th_counter = 0;
+				}
+				if (!(value & TH)) {
+					port->device.pad.th_counter++;
+				}
+				port->device.pad.timeout_cycle = current_cycle + TH_TIMEOUT;
+			}
+		}
+		port->output = value;
+		break;
+	case IO_GENERIC:
+		wait_for_connection(port);
+		port->input[IO_STATE] = IO_WRITE_PENDING;
+		port->output = value;
+		service_socket(port);
+		break;
+	default:
+		port->output = value;
+	}
+
+}
+
+uint8_t io_data_read(io_port * port, uint32_t current_cycle)
+{
+	uint8_t control = port->control | 0x80;
+	uint8_t th = control & port->output & 0x40;
 	uint8_t input;
-	if (current_cycle >= pad->timeout_cycle) {
-		pad->th_counter = 0;
+	switch (port->device_type)
+	{
+	case IO_GAMEPAD3:
+	{
+		input = port->input[th ? GAMEPAD_TH1 : GAMEPAD_TH0];
+		break;
 	}
-	/*if (pad->input[GAMEPAD_TH0] || pad->input[GAMEPAD_TH1]) {
-		printf("io_data_read | control: %X, TH: %X, GAMEPAD_TH0: %X, GAMEPAD_TH1: %X, TH Counter: %d, Timeout: %d, Cycle: %d\n", control, th, pad->input[GAMEPAD_TH0], pad->input[GAMEPAD_TH1], pad->th_counter,pad->timeout_cycle, context->current_cycle);
-	}*/
-	if (th) {
-		if (pad->th_counter == 3) {
-			input = pad->input[GAMEPAD_EXTRA];
+	case IO_GAMEPAD6:
+	{
+		if (current_cycle >= port->device.pad.timeout_cycle) {
+			port->device.pad.th_counter = 0;
+		}
+		/*if (port->input[GAMEPAD_TH0] || port->input[GAMEPAD_TH1]) {
+			printf("io_data_read | control: %X, TH: %X, GAMEPAD_TH0: %X, GAMEPAD_TH1: %X, TH Counter: %d, Timeout: %d, Cycle: %d\n", control, th, port->input[GAMEPAD_TH0], port->input[GAMEPAD_TH1], port->th_counter,port->timeout_cycle, context->current_cycle);
+		}*/
+		if (th) {
+			if (port->device.pad.th_counter == 3) {
+				input = port->input[GAMEPAD_EXTRA];
+			} else {
+				input = port->input[GAMEPAD_TH1];
+			}
 		} else {
-			input = pad->input[GAMEPAD_TH1];
+			if (port->device.pad.th_counter == 3) {
+				input = port->input[GAMEPAD_TH0] | 0xF;
+			} else if(port->device.pad.th_counter == 4) {
+				input = port->input[GAMEPAD_TH0]  & 0x30;
+			} else {
+				input = port->input[GAMEPAD_TH0] | 0xC;
+			}
 		}
-	} else {
-		if (pad->th_counter == 3) {
-			input = pad->input[GAMEPAD_TH0] | 0xF;
-		} else if(pad->th_counter == 4) {
-			input = pad->input[GAMEPAD_TH0]  & 0x30;
-		} else {
-			input = pad->input[GAMEPAD_TH0] | 0xC;
+		break;
+	}
+	case IO_SEGA_PARALLEL:
+		if (!th)
+		{
+			service_pipe(port);
 		}
+		input = ~port->input[th ? IO_TH1 : IO_TH0];
+		break;
+	case IO_GENERIC:
+		if (port->input[IO_TH0] & 0x80 && port->input[IO_STATE] == IO_WRITTEN)
+		{
+			//device requested a blocking read after writes
+			port->input[IO_STATE] = IO_READ_PENDING;
+		}
+		service_socket(port);
+		input = ~port->input[IO_TH0];
+		break;
+	default:
+		input = 0;
+		break;
 	}
-	uint8_t value = ((~input) & (~control)) | (pad->output & control);
-	/*if (pad->input[GAMEPAD_TH0] || pad->input[GAMEPAD_TH1]) {
+	uint8_t value = ((~input) & (~control)) | (port->output & control);
+	/*if (port->input[GAMEPAD_TH0] || port->input[GAMEPAD_TH1]) {
 		printf ("value: %X\n", value);
 	}*/
 	return value;
--- a/io.h	Thu May 28 21:09:33 2015 -0700
+++ b/io.h	Thu May 28 21:19:55 2015 -0700
@@ -1,18 +1,43 @@
 /*
  Copyright 2013 Michael Pavone
- This file is part of BlastEm. 
+ This file is part of BlastEm.
  BlastEm is free software distributed under the terms of the GNU General Public License version 3 or greater. See COPYING for full license text.
 */
 #ifndef IO_H_
 #define IO_H_
 #include <stdint.h>
+#include "tern.h"
+
+enum {
+	IO_GAMEPAD3,
+	IO_GAMEPAD6,
+	IO_MOUSE,
+	IO_MENACER,
+	IO_JUSTIFIER,
+	IO_SEGA_MULTI,
+	IO_EA_MULTI_A,
+	IO_EA_MULTI_B,
+	IO_SEGA_PARALLEL,
+	IO_GENERIC,
+	IO_NONE
+};
 
 typedef struct {
-	uint32_t th_counter;
-	uint32_t timeout_cycle;
-	uint8_t output;
-	uint8_t control;
-	uint8_t input[3];
+	union {
+		struct {
+			uint32_t timeout_cycle;
+			uint16_t th_counter;
+			uint16_t gamepad_num;
+		} pad;
+		struct {
+			int data_fd;
+			int listen_fd;
+		} stream;
+	} device;
+	uint8_t  output;
+	uint8_t  control;
+	uint8_t  input[3];
+	uint8_t  device_type;
 } io_port;
 
 #define GAMEPAD_TH0 0
@@ -20,7 +45,19 @@
 #define GAMEPAD_EXTRA 2
 #define GAMEPAD_NONE 0xF
 
-void set_keybindings();
+#define IO_TH0 0
+#define IO_TH1 1
+#define IO_STATE 2
+
+enum {
+	IO_WRITE_PENDING,
+	IO_WRITTEN,
+	IO_READ_PENDING,
+	IO_READ
+};
+
+void set_keybindings(io_port *ports);
+void setup_io_devices(tern_node * config, io_port * ports);
 void io_adjust_cycles(io_port * pad, uint32_t current_cycle, uint32_t deduction);
 void io_data_write(io_port * pad, uint8_t value, uint32_t current_cycle);
 uint8_t io_data_read(io_port * pad, uint32_t current_cycle);
--- a/m68k_core.c	Thu May 28 21:09:33 2015 -0700
+++ b/m68k_core.c	Thu May 28 21:19:55 2015 -0700
@@ -86,6 +86,28 @@
 	}
 }
 
+void m68k_save_result(m68kinst * inst, m68k_options * opts)
+{
+	code_info *code = &opts->gen.code;
+	if (inst->dst.addr_mode != MODE_REG && inst->dst.addr_mode != MODE_AREG && inst->dst.addr_mode != MODE_UNUSED) {
+		if (inst->dst.addr_mode == MODE_AREG_PREDEC && inst->src.addr_mode == MODE_AREG_PREDEC && inst->op != M68K_MOVE) {
+			areg_to_native(opts, inst->dst.params.regs.pri, opts->gen.scratch2);
+		}
+		switch (inst->extra.size)
+		{
+		case OPSIZE_BYTE:
+			call(code, opts->write_8);
+			break;
+		case OPSIZE_WORD:
+			call(code, opts->write_16);
+			break;
+		case OPSIZE_LONG:
+			call(code, opts->write_32_lowfirst);
+			break;
+		}
+	}
+}
+
 void translate_m68k_lea_pea(m68k_options * opts, m68kinst * inst)
 {
 	code_info *code = &opts->gen.code;
@@ -173,7 +195,7 @@
 void jump_m68k_abs(m68k_options * opts, uint32_t address)
 {
 	code_info *code = &opts->gen.code;
-	code_ptr dest_addr = get_native_address(opts->gen.native_code_map, address);
+	code_ptr dest_addr = get_native_address(opts, address);
 	if (!dest_addr) {
 		opts->gen.deferred = defer_address(opts->gen.deferred, address, code->cur + 1);
 		//dummy address to be replaced later, make sure it generates a 4-byte displacement
@@ -503,9 +525,39 @@
 	native_to_areg(opts, opts->gen.scratch2, 8);
 }
 
-code_ptr get_native_address(native_map_slot * native_code_map, uint32_t address)
+void translate_m68k_reset(m68k_options *opts, m68kinst *inst)
+{
+	code_info *code = &opts->gen.code;
+	call(code, opts->gen.save_context);
+	call_args(code, (code_ptr)print_regs_exit, 1, opts->gen.context_reg);
+}
+
+void translate_m68k_rte(m68k_options *opts, m68kinst *inst)
 {
-	address &= 0xFFFFFF;
+	code_info *code = &opts->gen.code;
+	//TODO: Trap if not in system mode
+	//Read saved SR
+	areg_to_native(opts, 7, opts->gen.scratch1);
+	call(code, opts->read_16);
+	addi_areg(opts, 2, 7);
+	call(code, opts->set_sr);
+	//Read saved PC
+	areg_to_native(opts, 7, opts->gen.scratch1);
+	call(code, opts->read_32);
+	addi_areg(opts, 4, 7);
+	check_user_mode_swap_ssp_usp(opts);
+	//Get native address, sync components, recalculate integer points and jump to returned address
+	call(code, opts->native_addr_and_sync);
+	jmp_r(code, opts->gen.scratch1);
+}
+
+code_ptr get_native_address(m68k_options *opts, uint32_t address)
+{
+	native_map_slot * native_code_map = opts->gen.native_code_map;
+	address &= opts->gen.address_mask;
+	if (address & 1) {
+		return opts->odd_address;
+	}
 	address /= 2;
 	uint32_t chunk = address / NATIVE_CHUNK_SIZE;
 	if (!native_code_map[chunk].base) {
@@ -520,11 +572,12 @@
 
 code_ptr get_native_from_context(m68k_context * context, uint32_t address)
 {
-	return get_native_address(context->native_code_map, address);
+	return get_native_address(context->options, address);
 }
 
 uint32_t get_instruction_start(native_map_slot * native_code_map, uint32_t address)
 {
+	//FIXME: Use opts->gen.address_mask
 	address &= 0xFFFFFF;
 	address /= 2;
 	uint32_t chunk = address / NATIVE_CHUNK_SIZE;
@@ -547,17 +600,34 @@
 {
 	native_map_slot * native_code_map = context->native_code_map;
 	m68k_options * opts = context->options;
-	address &= 0xFFFFFF;
-	if (address > 0xE00000) {
-		context->ram_code_flags[(address & 0xC000) >> 14] |= 1 << ((address & 0x3800) >> 11);
-		if (((address & 0x3FFF) + size) & 0xC000) {
-			context->ram_code_flags[((address+size) & 0xC000) >> 14] |= 1 << (((address+size) & 0x3800) >> 11);
+	address &= opts->gen.address_mask;
+	uint32_t meta_off = 0;
+	//TODO: Refactor part of this loop into some kind of get_ram_chunk function
+	for (int i = 0; i < opts->gen.memmap_chunks; i++) {
+		if (address >= opts->gen.memmap[i].start && address < opts->gen.memmap[i].end) {
+			if ((opts->gen.memmap[i].flags & (MMAP_WRITE | MMAP_CODE)) == (MMAP_WRITE | MMAP_CODE)) {
+				uint32_t masked = (address & opts->gen.memmap[i].mask);
+				uint32_t final_off = masked + meta_off;
+				uint32_t ram_flags_off = final_off >> (opts->gen.ram_flags_shift + 3);
+				context->ram_code_flags[ram_flags_off] |= 1 << ((final_off >> opts->gen.ram_flags_shift) & 7);
+
+				uint32_t slot = final_off / 1024;
+				if (!opts->gen.ram_inst_sizes[slot]) {
+					opts->gen.ram_inst_sizes[slot] = malloc(sizeof(uint8_t) * 512);
+				}
+				opts->gen.ram_inst_sizes[slot][(final_off/2) & 511] = native_size;
+
+				//TODO: Deal with case in which end of instruction is in a different memory chunk
+				masked = (address + size - 1) & opts->gen.memmap[i].mask;
+				final_off = masked + meta_off;
+				ram_flags_off = final_off >> (opts->gen.ram_flags_shift + 3);
+				context->ram_code_flags[ram_flags_off] |= 1 << ((final_off >> opts->gen.ram_flags_shift) & 7);
+			}
+			break;
+		} else if ((opts->gen.memmap[i].flags & (MMAP_WRITE | MMAP_CODE)) == (MMAP_WRITE | MMAP_CODE)) {
+			uint32_t size = chunk_size(&opts->gen, opts->gen.memmap + i);
+			meta_off += size;
 		}
-		uint32_t slot = (address & 0xFFFF)/1024;
-		if (!opts->gen.ram_inst_sizes[slot]) {
-			opts->gen.ram_inst_sizes[slot] = malloc(sizeof(uint8_t) * 512);
-		}
-		opts->gen.ram_inst_sizes[slot][((address & 0xFFFF)/2)%512] = native_size;
 	}
 	address/= 2;
 	uint32_t chunk = address / NATIVE_CHUNK_SIZE;
@@ -569,6 +639,7 @@
 	uint32_t offset = address % NATIVE_CHUNK_SIZE;
 	native_code_map[chunk].offsets[offset] = native_addr-native_code_map[chunk].base;
 	for(address++,size-=2; size; address++,size-=2) {
+		address &= opts->gen.address_mask >> 1;
 		chunk = address / NATIVE_CHUNK_SIZE;
 		offset = address % NATIVE_CHUNK_SIZE;
 		if (!native_code_map[chunk].base) {
@@ -576,17 +647,31 @@
 			native_code_map[chunk].offsets = malloc(sizeof(int32_t) * NATIVE_CHUNK_SIZE);
 			memset(native_code_map[chunk].offsets, 0xFF, sizeof(int32_t) * NATIVE_CHUNK_SIZE);
 		}
-		native_code_map[chunk].offsets[offset] = EXTENSION_WORD;
+		if (native_code_map[chunk].offsets[offset] == INVALID_OFFSET) {
+			//TODO: Better handling of overlapping instructions
+			native_code_map[chunk].offsets[offset] = EXTENSION_WORD;
+		}
 	}
 }
 
 uint8_t get_native_inst_size(m68k_options * opts, uint32_t address)
 {
-	if (address < 0xE00000) {
-		return 0;
+	address &= opts->gen.address_mask;
+	uint32_t meta_off = 0;
+	for (int i = 0; i < opts->gen.memmap_chunks; i++) {
+		if (address >= opts->gen.memmap[i].start && address < opts->gen.memmap[i].end) {
+			if ((opts->gen.memmap[i].flags & (MMAP_WRITE | MMAP_CODE)) != (MMAP_WRITE | MMAP_CODE)) {
+				return 0;
+			}
+			meta_off += address & opts->gen.memmap[i].mask;
+			break;
+		} else if ((opts->gen.memmap[i].flags & (MMAP_WRITE | MMAP_CODE)) == (MMAP_WRITE | MMAP_CODE)) {
+			uint32_t size = chunk_size(&opts->gen, opts->gen.memmap + i);
+			meta_off += size;
+		}
 	}
-	uint32_t slot = (address & 0xFFFF)/1024;
-	return opts->gen.ram_inst_sizes[slot][((address & 0xFFFF)/2)%512];
+	uint32_t slot = meta_off/1024;
+	return opts->gen.ram_inst_sizes[slot][(meta_off/2)%512];
 }
 
 uint8_t m68k_is_terminal(m68kinst * inst)
@@ -649,7 +734,7 @@
 	RAW_IMPL(M68K_EXT, translate_m68k_ext),
 	UNARY_IMPL(M68K_NEG, X|N|Z|V|C),
 	OP_IMPL(M68K_NEGX, translate_m68k_negx),
-	UNARY_IMPL(M68K_NOT, X|N|Z|V|C),
+	UNARY_IMPL(M68K_NOT, N|Z|V|C),
 	UNARY_IMPL(M68K_TST, N|Z|V0|C0),
 
 	//shift/rotate
@@ -723,6 +808,7 @@
 void translate_m68k(m68k_options * opts, m68kinst * inst)
 {
 	check_cycles_int(&opts->gen, inst->address);
+	//log_address(&opts->gen, inst->address, "M68K: %X @ %d\n");
 	impl_info * info = m68k_impls + inst->op;
 	if (info->itype == RAW_FUNC) {
 		info->impl.raw(opts, inst);
@@ -754,30 +840,26 @@
 	m68kinst instbuf;
 	m68k_options * opts = context->options;
 	code_info *code = &opts->gen.code;
-	address &= 0xFFFFFF;
-	if(get_native_address(opts->gen.native_code_map, address)) {
+	if(get_native_address(opts, address)) {
 		return;
 	}
-	char disbuf[1024];
 	uint16_t *encoded, *next;
-	if ((address & 0xFFFFFF) < 0x400000) {
-		encoded = context->mem_pointers[0] + (address & 0xFFFFFF)/2;
-	} else if ((address & 0xFFFFFF) > 0xE00000) {
-		encoded = context->mem_pointers[1] + (address  & 0xFFFF)/2;
-	} else {
-		printf("attempt to translate non-memory address: %X\n", address);
-		exit(1);
-	}
 	do {
 		if (opts->address_log) {
 			fprintf(opts->address_log, "%X\n", address);
+			fflush(opts->address_log);
 		}
 		do {
-			if (address >= 0x400000 && address < 0xE00000) {
+			if (address & 1) {
+				break;
+			}
+			encoded = get_native_pointer(address, (void **)context->mem_pointers, &opts->gen);
+			if (!encoded) {
+				map_native_address(context, address, code->cur, 2, 1);
 				translate_out_of_bounds(code);
 				break;
 			}
-			code_ptr existing = get_native_address(opts->gen.native_code_map, address);
+			code_ptr existing = get_native_address(opts, address);
 			if (existing) {
 				jmp(code, existing);
 				break;
@@ -788,7 +870,7 @@
 			}
 			uint16_t m68k_size = (next-encoded)*2;
 			address += m68k_size;
-			encoded = next;
+			//char disbuf[1024];
 			//m68k_disasm(&instbuf, disbuf);
 			//printf("%X: %s\n", instbuf.address, disbuf);
 
@@ -802,18 +884,8 @@
 		process_deferred(&opts->gen.deferred, context, (native_addr_func)get_native_from_context);
 		if (opts->gen.deferred) {
 			address = opts->gen.deferred->address;
-			if ((address & 0xFFFFFF) < 0x400000) {
-				encoded = context->mem_pointers[0] + (address & 0xFFFFFF)/2;
-			} else if ((address & 0xFFFFFF) > 0xE00000) {
-				encoded = context->mem_pointers[1] + (address  & 0xFFFF)/2;
-			} else {
-				printf("attempt to translate non-memory address: %X\n", address);
-				exit(1);
-			}
-		} else {
-			encoded = NULL;
 		}
-	} while(encoded != NULL);
+	} while(opts->gen.deferred);
 }
 
 void * m68k_retranslate_inst(uint32_t address, m68k_context * context)
@@ -821,30 +893,30 @@
 	m68k_options * opts = context->options;
 	code_info *code = &opts->gen.code;
 	uint8_t orig_size = get_native_inst_size(opts, address);
-	code_ptr orig_start = get_native_address(context->native_code_map, address);
+	code_ptr orig_start = get_native_address(context->options, address);
 	uint32_t orig = address;
 	code_info orig_code;
 	orig_code.cur = orig_start;
 	orig_code.last = orig_start + orig_size + 5;
-	address &= 0xFFFF;
-	uint16_t *after, *inst = context->mem_pointers[1] + address/2;
+	uint16_t *after, *inst = get_native_pointer(address, (void **)context->mem_pointers, &opts->gen);
 	m68kinst instbuf;
 	after = m68k_decode(inst, &instbuf, orig);
 	if (orig_size != MAX_NATIVE_SIZE) {
 		deferred_addr * orig_deferred = opts->gen.deferred;
 
-		//make sure the beginning of the code for an instruction is contiguous
-		check_code_prologue(code);
+		//make sure we have enough code space for the max size instruction
+		check_alloc_code(code, MAX_NATIVE_SIZE);
 		code_ptr native_start = code->cur;
 		translate_m68k(opts, &instbuf);
 		code_ptr native_end = code->cur;
-		uint8_t is_terminal = m68k_is_terminal(&instbuf);
+		/*uint8_t is_terminal = m68k_is_terminal(&instbuf);
 		if ((native_end - native_start) <= orig_size) {
 			code_ptr native_next;
 			if (!is_terminal) {
 				native_next = get_native_address(context->native_code_map, orig + (after-inst)*2);
 			}
 			if (is_terminal || (native_next && ((native_next == orig_start + orig_size) || (orig_size - (native_end - native_start)) > 5))) {
+				printf("Using original location: %p\n", orig_code.cur);
 				remove_deferred_until(&opts->gen.deferred, orig_deferred);
 				code_info tmp;
 				tmp.cur = code->cur;
@@ -861,7 +933,7 @@
 				m68k_handle_deferred(context);
 				return orig_start;
 			}
-		}
+		}*/
 
 		map_native_address(context, instbuf.address, native_start, (after-inst)*2, MAX_NATIVE_SIZE);
 
@@ -880,17 +952,14 @@
 		m68k_handle_deferred(context);
 		return native_start;
 	} else {
-		code_info tmp;
-		tmp.cur = code->cur;
-		tmp.last = code->last;
-		code->cur = orig_code.cur;
-		code->last = orig_code.last;
+		code_info tmp = *code;
+		*code = orig_code;
 		translate_m68k(opts, &instbuf);
+		orig_code = *code;
+		*code = tmp;
 		if (!m68k_is_terminal(&instbuf)) {
-			jmp(code, get_native_address_trans(context, orig + (after-inst)*2));
+			jmp(&orig_code, get_native_address_trans(context, orig + (after-inst)*2));
 		}
-		code->cur = tmp.cur;
-		code->last = tmp.last;
 		m68k_handle_deferred(context);
 		return orig_start;
 	}
@@ -898,19 +967,22 @@
 
 code_ptr get_native_address_trans(m68k_context * context, uint32_t address)
 {
-	address &= 0xFFFFFF;
-	code_ptr ret = get_native_address(context->native_code_map, address);
+	code_ptr ret = get_native_address(context->options, address);
 	if (!ret) {
 		translate_m68k_stream(address, context);
-		ret = get_native_address(context->native_code_map, address);
+		ret = get_native_address(context->options, address);
 	}
 	return ret;
 }
 
 void remove_breakpoint(m68k_context * context, uint32_t address)
 {
-	code_ptr native = get_native_address(context->native_code_map, address);
-	check_cycles_int(context->options, address);
+	code_ptr native = get_native_address(context->options, address);
+	code_info tmp = context->options->gen.code;
+	context->options->gen.code.cur = native;
+	context->options->gen.code.last = native + MAX_NATIVE_SIZE;
+	check_cycles_int(&context->options->gen, address);
+	context->options->gen.code = tmp;
 }
 
 void start_68k_context(m68k_context * context, uint32_t address)
@@ -922,18 +994,21 @@
 
 void m68k_reset(m68k_context * context)
 {
-	//TODO: Make this actually use the normal read functions
-	context->aregs[7] = context->mem_pointers[0][0] << 16 | context->mem_pointers[0][1];
-	uint32_t address = context->mem_pointers[0][2] << 16 | context->mem_pointers[0][3];
+	//TODO: Actually execute the M68K reset vector rather than simulating some of its behavior
+	uint16_t *reset_vec = get_native_pointer(0, (void **)context->mem_pointers, &context->options->gen);
+	context->aregs[7] = reset_vec[0] << 16 | reset_vec[1];
+	uint32_t address = reset_vec[2] << 16 | reset_vec[3];
 	start_68k_context(context, address);
 }
 
 
-void init_68k_context(m68k_context * context, native_map_slot * native_code_map, void * opts)
+m68k_context * init_68k_context(m68k_options * opts)
 {
+	m68k_context * context = malloc(sizeof(m68k_context) + ram_size(&opts->gen) / (1 << opts->gen.ram_flags_shift) / 8);
 	memset(context, 0, sizeof(m68k_context));
-	context->native_code_map = native_code_map;
+	context->native_code_map = opts->gen.native_code_map;
 	context->options = opts;
-	context->int_cycle = 0xFFFFFFFF;
+	context->int_cycle = CYCLE_NEVER;
 	context->status = 0x27;
+	return context;
 }
--- a/m68k_core.h	Thu May 28 21:09:33 2015 -0700
+++ b/m68k_core.h	Thu May 28 21:19:55 2015 -0700
@@ -34,6 +34,7 @@
 	code_ptr        write_32_highfirst;
 	code_ptr        do_sync;
 	code_ptr        trap;
+	code_ptr        odd_address;
 	start_fun       start_context;
 	code_ptr        retrans_stub;
 	code_ptr        native_addr;
@@ -59,16 +60,16 @@
 	uint16_t        reserved;
 
 	native_map_slot *native_code_map;
-	void            *options;
-	uint8_t         ram_code_flags[32/8];
+	m68k_options    *options;
 	void            *system;
+	uint8_t         ram_code_flags[];
 } m68k_context;
 
 void translate_m68k(m68k_options * opts, struct m68kinst * inst);
 void translate_m68k_stream(uint32_t address, m68k_context * context);
 void start_68k_context(m68k_context * context, uint32_t address);
-void init_m68k_opts(m68k_options * opts, memmap_chunk * memmap, uint32_t num_chunks);
-void init_68k_context(m68k_context * context, native_map_slot * native_code_map, void * opts);
+void init_m68k_opts(m68k_options * opts, memmap_chunk * memmap, uint32_t num_chunks, uint32_t clock_divider);
+m68k_context * init_68k_context(m68k_options * opts);
 void m68k_reset(m68k_context * context);
 void insert_breakpoint(m68k_context * context, uint32_t address, uint8_t * bp_handler);
 void remove_breakpoint(m68k_context * context, uint32_t address);
--- a/m68k_core_x86.c	Thu May 28 21:09:33 2015 -0700
+++ b/m68k_core_x86.c	Thu May 28 21:19:55 2015 -0700
@@ -14,17 +14,6 @@
 #include <stdlib.h>
 #include <string.h>
 
-#define CYCLES RAX
-#define LIMIT RBP
-#define CONTEXT RSI
-#define SCRATCH1 RCX
-
-#ifdef X86_64
-#define SCRATCH2 RDI
-#else
-#define SCRATCH2 RBX
-#endif
-
 enum {
 	FLAG_X,
 	FLAG_N,
@@ -233,7 +222,7 @@
 }
 
 void native_to_areg(m68k_options *opts, uint8_t native_reg, uint8_t reg)
-{
+	{
 	if (opts->aregs[reg] >= 0) {
 		mov_rr(&opts->gen.code, native_reg, opts->aregs[reg], SZ_D);
 	} else {
@@ -267,7 +256,7 @@
 void addi_native(m68k_options *opts, int32_t value, uint8_t reg)
 {
 	add_ir(&opts->gen.code, value, reg, SZ_D);
-}
+			}
 
 void subi_native(m68k_options *opts, int32_t value, uint8_t reg)
 {
@@ -366,7 +355,7 @@
 	m68k_op_info *op = dst ? &inst->dst : &inst->src;
 	int8_t reg = native_reg(op, opts);
 	uint8_t sec_reg;
-	int32_t dec_amount,inc_amount;
+	int32_t dec_amount, inc_amount;
 	if (reg >= 0) {
 		ea->mode = MODE_REG_DIRECT;
 		if (!dst && inst->dst.addr_mode == MODE_AREG && inst->extra.size == OPSIZE_WORD) {
@@ -532,26 +521,15 @@
 	}
 }
 
-void m68k_save_result(m68kinst * inst, m68k_options * opts)
+void check_user_mode_swap_ssp_usp(m68k_options *opts)
 {
-	code_info *code = &opts->gen.code;
-	if (inst->dst.addr_mode != MODE_REG && inst->dst.addr_mode != MODE_AREG && inst->dst.addr_mode != MODE_UNUSED) {
-		if (inst->dst.addr_mode == MODE_AREG_PREDEC && inst->src.addr_mode == MODE_AREG_PREDEC && inst->op != M68K_MOVE) {
-			areg_to_native(opts, inst->dst.params.regs.pri, opts->gen.scratch2);
-		}
-		switch (inst->extra.size)
-		{
-		case OPSIZE_BYTE:
-			call(code, opts->write_8);
-			break;
-		case OPSIZE_WORD:
-			call(code, opts->write_16);
-			break;
-		case OPSIZE_LONG:
-			call(code, opts->write_32_lowfirst);
-			break;
-		}
-	}
+	code_info * code = &opts->gen.code;
+	//Check if we've switched to user mode and swap stack pointers if needed
+	bt_irdisp(code, 5, opts->gen.context_reg, offsetof(m68k_context, status), SZ_B);
+	code_ptr end_off = code->cur + 1;
+	jcc(code, CC_C, code->cur + 2);
+	swap_ssp_usp(opts);
+	*end_off = code->cur - (end_off + 1);
 }
 
 void translate_m68k_move(m68k_options * opts, m68kinst * inst)
@@ -604,10 +582,8 @@
 		break;
 	case MODE_AREG_PREDEC:
 		dec_amount = inst->extra.size == OPSIZE_WORD ? 2 : (inst->extra.size == OPSIZE_LONG ? 4 : (inst->dst.params.regs.pri == 7 ? 2 : 1));
-		subi_areg(opts, dec_amount, inst->dst.params.regs.pri);
 	case MODE_AREG_INDIRECT:
 	case MODE_AREG_POSTINC:
-		areg_to_native(opts, inst->dst.params.regs.pri, opts->gen.scratch2);
 		if (src.mode == MODE_REG_DIRECT) {
 			if (src.base != opts->gen.scratch1) {
 				mov_rr(code, src.base, opts->gen.scratch1, inst->extra.size);
@@ -617,6 +593,10 @@
 		} else {
 			mov_ir(code, src.disp, opts->gen.scratch1, inst->extra.size);
 		}
+		if (inst->dst.addr_mode == MODE_AREG_PREDEC) {
+			subi_areg(opts, dec_amount, inst->dst.params.regs.pri);
+		}
+		areg_to_native(opts, inst->dst.params.regs.pri, opts->gen.scratch2);
 		break;
 	case MODE_AREG_DISPLACE:
 		cycles(&opts->gen, BUS);
@@ -821,7 +801,7 @@
 	if (inst->extra.cond == COND_TRUE) {
 		jump_m68k_abs(opts, after + disp);
 	} else {
-		code_ptr dest_addr = get_native_address(opts->gen.native_code_map, after + disp);
+		code_ptr dest_addr = get_native_address(opts, after + disp);
 		uint8_t cond = m68k_eval_cond(opts, inst->extra.cond);
 		if (!dest_addr) {
 			opts->gen.deferred = defer_address(opts->gen.deferred, after + disp, code->cur + 2);
@@ -1021,8 +1001,8 @@
 		//Memory shift
 		shift_ir(code, 1, dst_op->base, SZ_W);
 	} else {
-		cycles(&opts->gen, inst->extra.size == OPSIZE_LONG ? 8 : 6);
 		if (src_op->mode == MODE_IMMED) {
+			cycles(&opts->gen, (inst->extra.size == OPSIZE_LONG ? 8 : 6) + 2 * src_op->disp);
 			if (src_op->disp != 1 && inst->op == M68K_ASL) {
 				set_flag(opts, 0, FLAG_V);
 				for (int i = 0; i < src_op->disp; i++) {
@@ -1046,6 +1026,7 @@
 				set_flag_cond(opts, CC_O, FLAG_V);
 			}
 		} else {
+			cycles(&opts->gen, inst->extra.size == OPSIZE_LONG ? 8 : 6);
 			if (src_op->base != RCX) {
 				if (src_op->mode == MODE_REG_DIRECT) {
 					mov_rr(code, src_op->base, RCX, SZ_B);
@@ -1075,8 +1056,9 @@
 			jmp(code, code->cur + 2);
 			*nz_off = code->cur - (nz_off + 1);
 			//add 2 cycles for every bit shifted
-			add_rr(code, RCX, CYCLES, SZ_D);
-			add_rr(code, RCX, CYCLES, SZ_D);
+			mov_ir(code, 2 * opts->gen.clock_divider, opts->gen.scratch2, SZ_D);
+			imul_rr(code, RCX, opts->gen.scratch2, SZ_D);
+			add_rr(code, opts->gen.scratch2, opts->gen.cycles, SZ_D);
 			if (inst->op == M68K_ASL) {
 				//ASL has Overflow flag behavior that depends on all of the bits shifted through the MSB
 				//Easiest way to deal with this is to shift one bit at a time
@@ -1241,7 +1223,7 @@
 
 void op_rrdisp(code_info *code, m68kinst *inst, uint8_t src, uint8_t dst, int32_t disp, uint8_t size)
 {
-	switch (inst->op)
+	switch(inst->op)
 	{
 	case M68K_ADD:  add_rrdisp(code, src, dst, disp, size); break;
 	case M68K_ADDX: adc_rrdisp(code, src, dst, disp, size); break;
@@ -1381,8 +1363,12 @@
 		retn(code);
 		return;
 	}
-	mov_ir(code, inst->address, opts->gen.scratch1, SZ_D);
-	call(code, (code_ptr)m68k_invalid);
+	mov_ir(code, (int64_t)stderr, RDI, SZ_PTR);
+	mov_ir(code, (int64_t)"Invalid instruction at %X\n", RSI, SZ_PTR);
+	mov_ir(code, inst->address, RDX, SZ_D);
+	call_args_abi(code, (code_ptr)fprintf, 3, RDI, RSI, RDX);
+	mov_ir(code, 1, RDI, SZ_D);
+	call_args(code, (code_ptr)exit, 1, RDI);
 }
 
 void translate_m68k_abcd_sbcd(m68k_options *opts, m68kinst *inst, host_ea *src_op, host_ea *dst_op)
@@ -1402,19 +1388,73 @@
 			mov_rdispr(code, dst_op->base, dst_op->disp, opts->gen.scratch1, SZ_B);
 		}
 	}
+	uint8_t other_reg;
+	//WARNING: This may need adjustment if register assignments change
+	if (opts->gen.scratch2 > RBX) {
+		other_reg = RAX;
+		xchg_rr(code, opts->gen.scratch2, RAX, SZ_D);
+	} else {
+		other_reg = opts->gen.scratch2;
+	}
+	mov_rr(code, opts->gen.scratch1, opts->gen.scratch1 + (AH-RAX), SZ_B);
+	mov_rr(code, other_reg, other_reg + (AH-RAX), SZ_B);
+	and_ir(code, 0xF0, opts->gen.scratch1, SZ_B);
+	and_ir(code, 0xF0, other_reg, SZ_B);
+	and_ir(code, 0xF, opts->gen.scratch1 + (AH-RAX), SZ_B);
+	and_ir(code, 0xF, other_reg + (AH-RAX), SZ_B);
+	//do op on low nibble
 	flag_to_carry(opts, FLAG_X);
-	jcc(code, CC_NC, code->cur + 5);
 	if (inst->op == M68K_ABCD) {
-		add_ir(code, 1, opts->gen.scratch1, SZ_B);
+		adc_rr(code, other_reg + (AH-RAX), opts->gen.scratch1 + (AH-RAX), SZ_B);
 	} else {
-		sub_ir(code, 1, opts->gen.scratch1, SZ_B);
+		sbb_rr(code, other_reg + (AH-RAX), opts->gen.scratch1 + (AH-RAX), SZ_B);
+	}
+	cmp_ir(code, 0xA, opts->gen.scratch1 + (AH-RAX), SZ_B);
+	code_ptr no_adjust = code->cur+1;
+	//add correction factor if necessary
+	jcc(code, CC_B, no_adjust);
+	if (inst->op == M68K_ABCD) {
+		add_ir(code, 6, opts->gen.scratch1 + (AH-RAX), SZ_B);
+	} else {
+		sub_ir(code, 6, opts->gen.scratch1 + (AH-RAX), SZ_B);
 	}
-	call(code, (code_ptr) (inst->op == M68K_ABCD ? bcd_add : bcd_sub));
-	reg_to_flag(opts, CH, FLAG_C);
-	reg_to_flag(opts, CH, FLAG_X);
+	*no_adjust = code->cur - (no_adjust+1);
+	//add low nibble result to one of the high nibble operands
+	add_rr(code, opts->gen.scratch1 + (AH-RAX), opts->gen.scratch1, SZ_B);
+	if (inst->op == M68K_ABCD) {
+		add_rr(code, other_reg, opts->gen.scratch1, SZ_B);
+	} else {
+		sub_rr(code, other_reg, opts->gen.scratch1, SZ_B);
+	}
+	if (opts->gen.scratch2 > RBX) {
+		mov_rr(code, opts->gen.scratch2, RAX, SZ_D);
+	}
+	set_flag(opts, 0, FLAG_C);
+	set_flag(opts, 0, FLAG_V);
+	code_ptr def_adjust = code->cur+1;
+	jcc(code, CC_C, def_adjust);
+	cmp_ir(code, 0xA0, opts->gen.scratch1, SZ_B);
+	no_adjust = code->cur+1;
+	jcc(code, CC_B, no_adjust);
+	*def_adjust = code->cur - (def_adjust + 1);
+	set_flag(opts, 1, FLAG_C);
+	if (inst->op == M68K_ABCD) {
+		add_ir(code, 0x60, opts->gen.scratch1, SZ_B);
+	} else {
+		sub_ir(code, 0x60, opts->gen.scratch1, SZ_B);
+	}
+	//V flag is set based on the result of the addition of the
+	//result and the correction factor
+	set_flag_cond(opts, CC_O, FLAG_V);
+	*no_adjust = code->cur - (no_adjust+1);
+	flag_to_flag(opts, FLAG_C, FLAG_X);
+
 	cmp_ir(code, 0, opts->gen.scratch1, SZ_B);
-	jcc(code, CC_Z, code->cur + 4);
+	set_flag_cond(opts, CC_S, FLAG_N);
+	code_ptr no_setz = code->cur+1;
+	jcc(code, CC_Z, no_setz);
 	set_flag(opts, 0, FLAG_Z);
+	*no_setz = code->cur - (no_setz + 1);
 	if (dst_op->base != opts->gen.scratch1) {
 		if (dst_op->mode == MODE_REG_DIRECT) {
 			mov_rr(code, opts->gen.scratch1, dst_op->base, SZ_B);
@@ -1444,7 +1484,7 @@
 {
 	code_info *code = &opts->gen.code;
 	cycles(&opts->gen, inst->extra.size == OPSIZE_BYTE ? 4 : (
-		inst->op == M68K_BTST ? 6 : (inst->op == M68K_BCLR ? 10 : 8))
+			inst->op == M68K_BTST ? 6 : (inst->op == M68K_BCLR ? 10 : 8))
 	);
 	if (src_op->mode == MODE_IMMED) {
 		if (inst->extra.size == OPSIZE_BYTE) {
@@ -1472,9 +1512,9 @@
 					mov_rdispr(code, src_op->base, src_op->disp, opts->gen.scratch1, SZ_B);
 				}
 				src_op->base = opts->gen.scratch1;
+				}
 			}
-		}
-		uint8_t size = inst->extra.size;
+			uint8_t size = inst->extra.size;
 		if (dst_op->mode == MODE_REG_DISPLACE8) {
 			if (src_op->base != opts->gen.scratch1 && src_op->base != opts->gen.scratch2) {
 				if (src_op->mode == MODE_REG_DIRECT) {
@@ -1514,7 +1554,7 @@
 }
 
 void translate_m68k_chk(m68k_options *opts, m68kinst *inst, host_ea *src_op, host_ea *dst_op)
-{
+	{
 	code_info *code = &opts->gen.code;
 	cycles(&opts->gen, 6);
 	if (dst_op->mode == MODE_REG_DIRECT) {
@@ -1602,6 +1642,20 @@
 			movzx_rdispr(code, src_op->base, src_op->disp, opts->gen.scratch2, SZ_W, SZ_D);
 		}
 	}
+	uint32_t isize = 2;
+	switch(inst->src.addr_mode)
+	{
+	case MODE_AREG_DISPLACE:
+	case MODE_AREG_INDEX_DISP8:
+	case MODE_ABSOLUTE_SHORT:
+	case MODE_PC_INDEX_DISP8:
+	case MODE_IMMEDIATE:
+		isize = 4;
+		break;
+	case MODE_ABSOLUTE:
+		isize = 6;
+		break;
+	}
 	cmp_ir(code, 0, opts->gen.scratch2, SZ_D);
 	check_alloc_code(code, 6*MAX_INST_LEN);
 	code_ptr not_zero = code->cur + 1;
@@ -1609,7 +1663,7 @@
 	pop_r(code, RAX);
 	pop_r(code, RDX);
 	mov_ir(code, VECTOR_INT_DIV_ZERO, opts->gen.scratch2, SZ_D);
-	mov_ir(code, inst->address+2, opts->gen.scratch1, SZ_D);
+	mov_ir(code, inst->address+isize, opts->gen.scratch1, SZ_D);
 	jmp(code, opts->trap);
 	*not_zero = code->cur - (not_zero+1);
 	if (inst->op == M68K_DIVS) {
@@ -1646,8 +1700,13 @@
 	}
 	cmp_ir(code, 0, RAX, SZ_W);
 	pop_r(code, RAX);
-	pop_r(code, RDX);
-	update_flags(opts, V0|Z|N);
+	if (dst_op->base == RDX) {
+		update_flags(opts, V0|Z|N);
+		add_ir(code, sizeof(void *), RSP, SZ_D);
+	} else {
+		pop_r(code, RDX);
+		update_flags(opts, V0|Z|N);
+	}
 	code_ptr end_off = code->cur + 1;
 	jmp(code, code->cur + 2);
 	*norm_off = code->cur - (norm_off + 1);
@@ -1808,8 +1867,10 @@
 			and_ir(code, 63, opts->gen.scratch1, SZ_D);
 			code_ptr zero_off = code->cur + 1;
 			jcc(code, CC_Z, code->cur + 2);
-			add_rr(code, opts->gen.scratch1, CYCLES, SZ_D);
-			add_rr(code, opts->gen.scratch1, CYCLES, SZ_D);
+			//add 2 cycles for every bit shifted
+			mov_ir(code, 2 * opts->gen.clock_divider, opts->gen.scratch2, SZ_D);
+			imul_rr(code, RCX, opts->gen.scratch2, SZ_D);
+			add_rr(code, opts->gen.scratch2, opts->gen.cycles, SZ_D);
 			cmp_ir(code, 32, opts->gen.scratch1, SZ_B);
 			code_ptr norm_off = code->cur + 1;
 			jcc(code, CC_L, code->cur + 2);
@@ -1865,12 +1926,7 @@
 {
 	code_info *code = &opts->gen.code;
 	call(code, opts->gen.save_context);
-#ifdef X86_64
-	mov_rr(code, opts->gen.context_reg, RDI, SZ_PTR);
-#else
-	push_r(code, opts->gen.context_reg);
-#endif
-	call(code, (code_ptr)print_regs_exit);
+	call_args(code, (code_ptr)print_regs_exit, 1, opts->gen.context_reg);
 }
 
 #define BIT_SUPERVISOR 5
@@ -1896,7 +1952,7 @@
 		} else {
 			or_irdisp(code, inst->src.params.immed >> 8, opts->gen.context_reg, offsetof(m68k_context, status), SZ_B);
 		}
-		if ((base_flag == X0) ^ (((inst->src.params.immed >> 8) & (1 << BIT_SUPERVISOR)) > 0)) {
+		if (inst->op == M68K_ANDI_SR && !(inst->src.params.immed & (1 << (BIT_SUPERVISOR + 8)))) {
 			//leave supervisor mode
 			swap_ssp_usp(opts);
 		}
@@ -1968,7 +2024,12 @@
 				mov_rdispr(code, src_op->base, src_op->disp, opts->gen.scratch1, SZ_W);
 			}
 		}
-		call(code, inst->op == M68K_MOVE_SR ? opts->set_sr : opts->set_ccr);
+		if (inst->op == M68K_MOVE_SR) {
+			call(code, opts->set_sr);
+			call(code, opts->do_sync);
+		} else {
+			call(code, opts->set_ccr);
+		}
 		cycles(&opts->gen, 12);
 	}
 }
@@ -2016,56 +2077,12 @@
 	m68k_save_result(inst, opts);
 }
 
-void translate_m68k_reset(m68k_options *opts, m68kinst *inst)
-{
-	code_info *code = &opts->gen.code;
-	call(code, opts->gen.save_context);
-#ifdef X86_64
-	mov_rr(code, opts->gen.context_reg, RDI, SZ_PTR);
-#else
-	push_r(code, opts->gen.context_reg);
-#endif
-	call(code, (code_ptr)print_regs_exit);
-}
-
-void translate_m68k_rte(m68k_options *opts, m68kinst *inst)
-{
-	code_info *code = &opts->gen.code;
-	//TODO: Trap if not in system mode
-	//Read saved SR
-	areg_to_native(opts, 7, opts->gen.scratch1);
-	call(code, opts->read_16);
-	addi_areg(opts, 2, 7);
-	call(code, opts->set_sr);
-	//Read saved PC
-	areg_to_native(opts, 7, opts->gen.scratch1);
-	call(code, opts->read_32);
-	addi_areg(opts, 4, 7);
-	//Check if we've switched to user mode and swap stack pointers if needed
-	bt_irdisp(code, 5, opts->gen.context_reg, offsetof(m68k_context, status), SZ_B);
-	code_ptr end_off = code->cur + 1;
-	jcc(code, CC_C, code->cur + 2);
-	swap_ssp_usp(opts);
-	*end_off = code->cur - (end_off+1);
-	//Get native address, sync components, recalculate integer points and jump to returned address
-	call(code, opts->native_addr_and_sync);
-	jmp_r(code, opts->gen.scratch1);
-}
-
 void translate_out_of_bounds(code_info *code)
 {
 	xor_rr(code, RDI, RDI, SZ_D);
-#ifdef X86_32
-	push_r(code, RDI);
-#endif
-	call(code, (code_ptr)exit);
+	call_args(code, (code_ptr)exit, 1, RDI);
 }
 
-void check_code_prologue(code_info *code)
-{
-	check_alloc_code(code, MAX_INST_LEN*4);
-};
-
 void nop_fill_or_jmp_next(code_info *code, code_ptr old_end, code_ptr next_inst)
 {
 	if (next_inst == old_end && next_inst - code->cur < 2) {
@@ -2083,7 +2100,7 @@
 	if (inst_start) {
 		m68k_options * options = context->options;
 		code_info *code = &options->gen.code;
-		code_ptr dst = get_native_address(context->native_code_map, inst_start);
+		code_ptr dst = get_native_address(context->options, inst_start);
 		code_info orig;
 		orig.cur = dst;
 		orig.last = dst + 128;
@@ -2093,14 +2110,7 @@
 			options->retrans_stub = code->cur;
 			call(code, options->gen.save_context);
 			push_r(code, options->gen.context_reg);
-#ifdef X86_32
-			push_r(code, options->gen.context_reg);
-			push_r(code, options->gen.scratch2);
-#endif
-			call(code, (code_ptr)m68k_retranslate_inst);
-#ifdef X86_32
-			add_ir(code, 8, RSP, SZ_D);
-#endif
+			call_args(code,(code_ptr)m68k_retranslate_inst, 2, options->gen.scratch2, options->gen.context_reg);
 			pop_r(code, options->gen.context_reg);
 			mov_rr(code, RAX, options->gen.scratch1, SZ_PTR);
 			call(code, options->gen.load_context);
@@ -2122,7 +2132,7 @@
 	mov_ir(&native, address, opts->gen.scratch1, SZ_D);
 	if (!bp_stub) {
 		code_info *code = &opts->gen.code;
-		check_alloc_code(code, 5);
+		check_code_prologue(code);
 		bp_stub = code->cur;
 		call(&native, bp_stub);
 
@@ -2134,17 +2144,7 @@
 		//Save context and call breakpoint handler
 		call(code, opts->gen.save_context);
 		push_r(code, opts->gen.scratch1);
-#ifdef X86_64
-		mov_rr(code, opts->gen.context_reg, RDI, SZ_PTR);
-		mov_rr(code, opts->gen.scratch1, RSI, SZ_D);
-#else
-		push_r(code, opts->gen.scratch1);
-		push_r(code, opts->gen.context_reg);
-#endif
-		call(code, bp_handler);
-#ifdef X86_32
-		add_ir(code, 8, RSP, SZ_D);
-#endif
+		call_args_abi(code, bp_handler, 2, opts->gen.context_reg, opts->gen.scratch1);
 		mov_rr(code, RAX, opts->gen.context_reg, SZ_PTR);
 		//Restore context
 		call(code, opts->gen.load_context);
@@ -2164,15 +2164,20 @@
 	}
 }
 
-void init_m68k_opts(m68k_options * opts, memmap_chunk * memmap, uint32_t num_chunks)
+void init_m68k_opts(m68k_options * opts, memmap_chunk * memmap, uint32_t num_chunks, uint32_t clock_divider)
 {
 	memset(opts, 0, sizeof(*opts));
+	opts->gen.memmap = memmap;
+	opts->gen.memmap_chunks = num_chunks;
 	opts->gen.address_size = SZ_D;
 	opts->gen.address_mask = 0xFFFFFF;
+	opts->gen.byte_swap = 1;
 	opts->gen.max_address = 0x1000000;
 	opts->gen.bus_cycles = BUS;
+	opts->gen.clock_divider = clock_divider;
 	opts->gen.mem_ptr_off = offsetof(m68k_context, mem_pointers);
 	opts->gen.ram_flags_off = offsetof(m68k_context, ram_code_flags);
+	opts->gen.ram_flags_shift = 11;
 	for (int i = 0; i < 8; i++)
 	{
 		opts->dregs[i] = opts->aregs[i] = -1;
@@ -2213,8 +2218,10 @@
 	opts->gen.native_code_map = malloc(sizeof(native_map_slot) * NATIVE_MAP_CHUNKS);
 	memset(opts->gen.native_code_map, 0, sizeof(native_map_slot) * NATIVE_MAP_CHUNKS);
 	opts->gen.deferred = NULL;
-	opts->gen.ram_inst_sizes = malloc(sizeof(uint8_t *) * 64);
-	memset(opts->gen.ram_inst_sizes, 0, sizeof(uint8_t *) * 64);
+
+	uint32_t inst_size_size = sizeof(uint8_t *) * ram_size(&opts->gen) / 1024;
+	opts->gen.ram_inst_sizes = malloc(inst_size_size);
+	memset(opts->gen.ram_inst_sizes, 0, inst_size_size);
 
 	code_info *code = &opts->gen.code;
 	init_code_info(code);
@@ -2238,9 +2245,11 @@
 
 	opts->gen.load_context = code->cur;
 	for (int i = 0; i < 5; i++)
+	{
 		if (opts->flag_regs[i] >= 0) {
 			mov_rdispr(code, opts->gen.context_reg, offsetof(m68k_context, flags) + i, opts->flag_regs[i], SZ_B);
 		}
+	}
 	for (int i = 0; i < 8; i++)
 	{
 		if (opts->dregs[i] >= 0) {
@@ -2250,63 +2259,30 @@
 			mov_rdispr(code, opts->gen.context_reg, offsetof(m68k_context, aregs) + sizeof(uint32_t) * i, opts->aregs[i], SZ_D);
 		}
 	}
-	mov_rdispr(code, opts->gen.context_reg, offsetof(m68k_context, current_cycle), CYCLES, SZ_D);
-	mov_rdispr(code, opts->gen.context_reg, offsetof(m68k_context, target_cycle), LIMIT, SZ_D);
+	mov_rdispr(code, opts->gen.context_reg, offsetof(m68k_context, current_cycle), opts->gen.cycles, SZ_D);
+	mov_rdispr(code, opts->gen.context_reg, offsetof(m68k_context, target_cycle), opts->gen.limit, SZ_D);
 	retn(code);
 
 	opts->start_context = (start_fun)code->cur;
+	save_callee_save_regs(code);
 #ifdef X86_64
 	if (opts->gen.scratch2 != RDI) {
 		mov_rr(code, RDI, opts->gen.scratch2, SZ_PTR);
 	}
-	//save callee save registers
-	push_r(code, RBP);
-	push_r(code, R12);
-	push_r(code, R13);
-	push_r(code, R14);
-	push_r(code, R15);
 #else
-	//save callee save registers
-	push_r(code, RBP);
-	push_r(code, RBX);
-	push_r(code, RSI);
-	push_r(code, RDI);
-
 	mov_rdispr(code, RSP, 20, opts->gen.scratch2, SZ_D);
 	mov_rdispr(code, RSP, 24, opts->gen.context_reg, SZ_D);
 #endif
 	call(code, opts->gen.load_context);
 	call_r(code, opts->gen.scratch2);
 	call(code, opts->gen.save_context);
-#ifdef X86_64
-	//restore callee save registers
-	pop_r(code, R15);
-	pop_r(code, R14);
-	pop_r(code, R13);
-	pop_r(code, R12);
-	pop_r(code, RBP);
-#else
-	pop_r(code, RDI);
-	pop_r(code, RSI);
-	pop_r(code, RBX);
-	pop_r(code, RBP);
-#endif
+	restore_callee_save_regs(code);
 	retn(code);
 
 	opts->native_addr = code->cur;
 	call(code, opts->gen.save_context);
 	push_r(code, opts->gen.context_reg);
-#ifdef X86_64
-	mov_rr(code, opts->gen.context_reg, RDI, SZ_PTR); //move context to 1st arg reg
-	mov_rr(code, opts->gen.scratch1, RSI, SZ_D); //move address to 2nd arg reg
-#else
-	push_r(code, opts->gen.scratch1);
-	push_r(code, opts->gen.context_reg);
-#endif
-	call(code, (code_ptr)get_native_address_trans);
-#ifdef X86_32
-	add_ir(code, 8, RSP, SZ_D);
-#endif
+	call_args(code, (code_ptr)get_native_address_trans, 2, opts->gen.context_reg, opts->gen.scratch1);
 	mov_rr(code, RAX, opts->gen.scratch1, SZ_PTR); //move result to scratch reg
 	pop_r(code, opts->gen.context_reg);
 	call(code, opts->gen.load_context);
@@ -2315,74 +2291,27 @@
 	opts->native_addr_and_sync = code->cur;
 	call(code, opts->gen.save_context);
 	push_r(code, opts->gen.scratch1);
-#ifdef X86_64
-	mov_rr(code, opts->gen.context_reg, RDI, SZ_PTR);
-	xor_rr(code, RSI, RSI, SZ_D);
-	test_ir(code, 8, RSP, SZ_PTR); //check stack alignment
-	code_ptr do_adjust_rsp = code->cur + 1;
-	jcc(code, CC_NZ, code->cur + 2);
-	call(code, (code_ptr)sync_components);
-	code_ptr no_adjust_rsp = code->cur + 1;
-	jmp(code, code->cur + 2);
-	*do_adjust_rsp = code->cur - (do_adjust_rsp+1);
-	sub_ir(code, 8, RSP, SZ_PTR);
-	call(code, (code_ptr)sync_components);
-	add_ir(code, 8, RSP, SZ_PTR);
-	*no_adjust_rsp = code->cur - (no_adjust_rsp+1);
-	pop_r(code, RSI);
-	push_r(code, RAX);
-	mov_rr(code, RAX, RDI, SZ_PTR);
-	call(code, (code_ptr)get_native_address_trans);
-#else
-	//TODO: Add support for pushing a constant in gen_x86
-	xor_rr(code, RAX, RAX, SZ_D);
-	push_r(code, RAX);
-	push_r(code, opts->gen.context_reg);
-	call(code, (code_ptr)sync_components);
-	add_ir(code, 8, RSP, SZ_D);
+
+	xor_rr(code, opts->gen.scratch1, opts->gen.scratch1, SZ_D);
+	call_args_abi(code, (code_ptr)sync_components, 2, opts->gen.context_reg, opts->gen.scratch1);
 	pop_r(code, RSI); //restore saved address from opts->gen.scratch1
 	push_r(code, RAX); //save context pointer for later
-	push_r(code, RSI); //2nd arg -- address
-	push_r(code, RAX); //1st arg -- context pointer
-	call(code, (code_ptr)get_native_address_trans);
-	add_ir(code, 8, RSP, SZ_D);
-#endif
-
+	call_args(code, (code_ptr)get_native_address_trans, 2, RAX, RSI);
 	mov_rr(code, RAX, opts->gen.scratch1, SZ_PTR); //move result to scratch reg
 	pop_r(code, opts->gen.context_reg);
 	call(code, opts->gen.load_context);
 	retn(code);
 
 	opts->gen.handle_cycle_limit = code->cur;
-	cmp_rdispr(code, opts->gen.context_reg, offsetof(m68k_context, sync_cycle), CYCLES, SZ_D);
+	cmp_rdispr(code, opts->gen.context_reg, offsetof(m68k_context, sync_cycle), opts->gen.cycles, SZ_D);
 	code_ptr skip_sync = code->cur + 1;
 	jcc(code, CC_C, code->cur + 2);
 	opts->do_sync = code->cur;
 	push_r(code, opts->gen.scratch1);
 	push_r(code, opts->gen.scratch2);
 	call(code, opts->gen.save_context);
-#ifdef X86_64
-	mov_rr(code, opts->gen.context_reg, RDI, SZ_PTR);
-	xor_rr(code, RSI, RSI, SZ_D);
-	test_ir(code, 8, RSP, SZ_D);
-	code_ptr adjust_rsp = code->cur + 1;
-	jcc(code, CC_NZ, code->cur + 2);
-	call(code, (code_ptr)sync_components);
-	code_ptr no_adjust = code->cur + 1;
-	jmp(code, code->cur + 2);
-	*adjust_rsp = code->cur - (adjust_rsp + 1);
-	sub_ir(code, 8, RSP, SZ_PTR);
-	call(code, (code_ptr)sync_components);
-	add_ir(code, 8, RSP, SZ_PTR);
-	*no_adjust = code->cur - (no_adjust+1);
-#else
-	//TODO: Add support for pushing a constant in gen_x86
-	xor_rr(code, RAX, RAX, SZ_D);
-	push_r(code, RAX);
-	push_r(code, opts->gen.context_reg);
-	call(code, (code_ptr)sync_components);
-	add_ir(code, 8, RSP, SZ_D);
-#endif
+	xor_rr(code, opts->gen.scratch1, opts->gen.scratch1, SZ_D);
+	call_args_abi(code, (code_ptr)sync_components, 2, opts->gen.context_reg, opts->gen.scratch1);
 	mov_rr(code, RAX, opts->gen.context_reg, SZ_PTR);
 	call(code, opts->gen.load_context);
 	pop_r(code, opts->gen.scratch2);
@@ -2390,10 +2319,12 @@
 	*skip_sync = code->cur - (skip_sync+1);
 	retn(code);
 
-	opts->read_16 = gen_mem_fun(&opts->gen, memmap, num_chunks, READ_16);
-	opts->read_8 = gen_mem_fun(&opts->gen, memmap, num_chunks, READ_8);
-	opts->write_16 = gen_mem_fun(&opts->gen, memmap, num_chunks, WRITE_16);
-	opts->write_8 = gen_mem_fun(&opts->gen, memmap, num_chunks, WRITE_8);
+	opts->gen.handle_code_write = (code_ptr)m68k_handle_code_write;
+
+	opts->read_16 = gen_mem_fun(&opts->gen, memmap, num_chunks, READ_16, NULL);
+	opts->read_8 = gen_mem_fun(&opts->gen, memmap, num_chunks, READ_8, NULL);
+	opts->write_16 = gen_mem_fun(&opts->gen, memmap, num_chunks, WRITE_16, NULL);
+	opts->write_8 = gen_mem_fun(&opts->gen, memmap, num_chunks, WRITE_8, NULL);
 
 	opts->read_32 = code->cur;
 	push_r(code, opts->gen.scratch1);
@@ -2490,46 +2421,23 @@
 	retn(code);
 
 	opts->gen.handle_cycle_limit_int = code->cur;
-	cmp_rdispr(code, opts->gen.context_reg, offsetof(m68k_context, int_cycle), CYCLES, SZ_D);
+	cmp_rdispr(code, opts->gen.context_reg, offsetof(m68k_context, int_cycle), opts->gen.cycles, SZ_D);
 	code_ptr do_int = code->cur + 1;
 	jcc(code, CC_NC, code->cur + 2);
-	cmp_rdispr(code, opts->gen.context_reg, offsetof(m68k_context, sync_cycle), CYCLES, SZ_D);
+	cmp_rdispr(code, opts->gen.context_reg, offsetof(m68k_context, sync_cycle), opts->gen.cycles, SZ_D);
 	skip_sync = code->cur + 1;
 	jcc(code, CC_C, code->cur + 2);
 	call(code, opts->gen.save_context);
-#ifdef X86_64
-	mov_rr(code, opts->gen.context_reg, RDI, SZ_PTR);
-	mov_rr(code, opts->gen.scratch1, RSI, SZ_D);
-	test_ir(code, 8, RSP, SZ_D);
-	adjust_rsp = code->cur + 1;
-	jcc(code, CC_NZ, code->cur + 2);
-	call(code, (code_ptr)sync_components);
-	no_adjust = code->cur + 1;
-	jmp(code, code->cur + 2);
-	*adjust_rsp = code->cur - (adjust_rsp + 1);
-	sub_ir(code, 8, RSP, SZ_PTR);
-	call(code, (code_ptr)sync_components);
-	add_ir(code, 8, RSP, SZ_PTR);
-	*no_adjust = code->cur - (no_adjust+1);
-#else
-	push_r(code, opts->gen.scratch1);
-	push_r(code, opts->gen.context_reg);
-	call(code, (code_ptr)sync_components);
-	add_ir(code, 8, RSP, SZ_D);
-#endif
+	call_args_abi(code, (code_ptr)sync_components, 2, opts->gen.context_reg, opts->gen.scratch1);
 	mov_rr(code, RAX, opts->gen.context_reg, SZ_PTR);
 	jmp(code, opts->gen.load_context);
 	*skip_sync = code->cur - (skip_sync+1);
 	retn(code);
 	*do_int = code->cur - (do_int+1);
 	//set target cycle to sync cycle
-	mov_rdispr(code, opts->gen.context_reg, offsetof(m68k_context, sync_cycle), LIMIT, SZ_D);
+	mov_rdispr(code, opts->gen.context_reg, offsetof(m68k_context, sync_cycle), opts->gen.limit, SZ_D);
 	//swap USP and SSP if not already in supervisor mode
-	bt_irdisp(code, 5, opts->gen.context_reg, offsetof(m68k_context, status), SZ_B);
-	code_ptr already_supervisor = code->cur + 1;
-	jcc(code, CC_C, code->cur + 2);
-	swap_ssp_usp(opts);
-	*already_supervisor = code->cur - (already_supervisor+1);
+	check_user_mode_swap_ssp_usp(opts);
 	//save PC
 	subi_areg(opts, 4, 7);
 	areg_to_native(opts, 7, opts->gen.scratch2);
@@ -2559,11 +2467,7 @@
 	opts->trap = code->cur;
 	push_r(code, opts->gen.scratch2);
 	//swap USP and SSP if not already in supervisor mode
-	bt_irdisp(code, 5, opts->gen.context_reg, offsetof(m68k_context, status), SZ_B);
-	already_supervisor = code->cur + 1;
-	jcc(code, CC_C, code->cur + 2);
-	swap_ssp_usp(opts);
-	*already_supervisor = code->cur - (already_supervisor+1);
+	check_user_mode_swap_ssp_usp(opts);
 	//save PC
 	subi_areg(opts, 4, 7);
 	areg_to_native(opts, 7, opts->gen.scratch2);
@@ -2582,4 +2486,11 @@
 	call(code, opts->native_addr_and_sync);
 	cycles(&opts->gen, 18);
 	jmp_r(code, opts->gen.scratch1);
+	
+	opts->odd_address = code->cur;
+	mov_ir(code, (int64_t)stderr, RDI, SZ_PTR);
+	mov_ir(code, (int64_t)"Attempt to execute code at odd address\n", RSI, SZ_PTR);
+	call_args_abi(code, (code_ptr)fprintf, 2, RDI, RSI, RDX);
+	xor_rr(code, RDI, RDI, SZ_D);
+	call_args(code, (code_ptr)exit, 1, RDI);
 }
--- a/m68k_internal.h	Thu May 28 21:09:33 2015 -0700
+++ b/m68k_internal.h	Thu May 28 21:19:55 2015 -0700
@@ -10,7 +10,6 @@
 
 //functions implemented in host CPU specfic file
 void translate_out_of_bounds(code_info *code);
-void check_code_prologue(code_info *code);
 void areg_to_native(m68k_options *opts, uint8_t reg, uint8_t native_reg);
 void dreg_to_native(m68k_options *opts, uint8_t reg, uint8_t native_reg);
 void areg_to_native_sx(m68k_options *opts, uint8_t reg, uint8_t native_reg);
@@ -32,6 +31,7 @@
 void calc_index_disp8(m68k_options *opts, m68k_op_info *op, uint8_t native_reg);
 void calc_areg_index_disp8(m68k_options *opts, m68k_op_info *op, uint8_t native_reg);
 void nop_fill_or_jmp_next(code_info *code, code_ptr old_end, code_ptr next_inst);
+void check_user_mode_swap_ssp_usp(m68k_options *opts);
 
 //functions implemented in m68k_core.c
 int8_t native_reg(m68k_op_info * op, m68k_options * opts);
@@ -42,10 +42,11 @@
 void print_regs_exit(m68k_context * context);
 void m68k_read_size(m68k_options *opts, uint8_t size);
 void m68k_write_size(m68k_options *opts, uint8_t size);
+void m68k_save_result(m68kinst * inst, m68k_options * opts);
 void push_const(m68k_options *opts, int32_t value);
 void jump_m68k_abs(m68k_options * opts, uint32_t address);
 void swap_ssp_usp(m68k_options * opts);
-code_ptr get_native_address(native_map_slot * native_code_map, uint32_t address);
+code_ptr get_native_address(m68k_options *opts, uint32_t address);
 void map_native_address(m68k_context * context, uint32_t address, code_ptr native_addr, uint8_t size, uint8_t native_size);
 uint8_t get_native_inst_size(m68k_options * opts, uint32_t address);
 uint8_t m68k_is_terminal(m68kinst * inst);
@@ -84,8 +85,6 @@
 void translate_m68k_move_ccr_sr(m68k_options *opts, m68kinst *inst, host_ea *src_op, host_ea *dst_op);
 void translate_m68k_stop(m68k_options *opts, m68kinst *inst);
 void translate_m68k_move_from_sr(m68k_options *opts, m68kinst *inst, host_ea *src_op, host_ea *dst_op);
-void translate_m68k_reset(m68k_options *opts, m68kinst *inst);
-void translate_m68k_rte(m68k_options *opts, m68kinst *inst);
 
 //flag update bits
 #define X0  0x0001
--- a/render.h	Thu May 28 21:09:33 2015 -0700
+++ b/render.h	Thu May 28 21:19:55 2015 -0700
@@ -34,8 +34,7 @@
 
 uint32_t render_map_color(uint8_t r, uint8_t g, uint8_t b);
 void render_alloc_surfaces(vdp_context * context);
-uint8_t render_depth();
-void render_init(int width, int height, char * title, uint32_t fps, uint8_t fullscreen, uint8_t use_gl);
+void render_init(int width, int height, char * title, uint32_t fps, uint8_t fullscreen);
 void render_context(vdp_context * context);
 void render_wait_quit(vdp_context * context);
 void render_wait_psg(psg_context * context);
@@ -50,6 +49,7 @@
 int render_joystick_num_buttons(int joystick);
 int render_joystick_num_hats(int joystick);
 int render_num_joysticks();
+void process_events();
 
 
 
--- a/render_sdl.c	Thu May 28 21:09:33 2015 -0700
+++ b/render_sdl.c	Thu May 28 21:19:55 2015 -0700
@@ -11,14 +11,12 @@
 #include "io.h"
 #include "util.h"
 
-#ifndef DISABLE_OPENGL
 #include <GL/glew.h>
-#endif
 
-SDL_Surface *screen;
+SDL_Window *main_window;
+SDL_GLContext *main_context;
 uint8_t render_dbg = 0;
 uint8_t debug_pal = 0;
-uint8_t render_gl = 1;
 
 uint32_t last_frame = 0;
 
@@ -92,14 +90,9 @@
 
 uint32_t render_map_color(uint8_t r, uint8_t g, uint8_t b)
 {
-	if (render_gl) {
-		return 255 << 24 | r << 16 | g << 8 | b;
-	} else {
-		return SDL_MapRGB(screen->format, r, g, b);
-	}
+	return 255 << 24 | r << 16 | g << 8 | b;
 }
 
-#ifndef DISABLE_OPENGL
 GLuint textures[3], buffers[2], vshader, fshader, program, un_textures[2], un_width, at_pos;
 
 GLfloat vertex_data[] = {
@@ -113,22 +106,14 @@
 
 GLuint load_shader(char * fname, GLenum shader_type)
 {
-	char * parts[] = {get_home_dir(), "/.config/blastem/shaders/", fname};
+	char * parts[] = {getenv("HOME"), "/.config/blastem/shaders/", fname};
 	char * shader_path = alloc_concat_m(3, parts);
-	printf("Trying to find shader at %s\n", shader_path);
 	FILE * f = fopen(shader_path, "r");
 	free(shader_path);
 	if (!f) {
-#ifdef _WIN32
-		parts[0] = "shaders/";
-		parts[1] = fname;
-		shader_path = alloc_concat_m(2, parts);
-#else
 		parts[0] = get_exe_dir();
 		parts[1] = "/shaders/";
 		shader_path = alloc_concat_m(3, parts);
-#endif
-		printf("Trying to find shader at %s\n", shader_path);
 		f = fopen(shader_path, "r");
 		free(shader_path);
 		if (!f) {
@@ -136,7 +121,6 @@
 			return 0;
 		}
 	}
-	puts("reading shader");
 	long fsize = file_size(f);
 	GLchar * text = malloc(fsize);
 	if (fread(text, 1, fsize, f) != fsize) {
@@ -147,7 +131,6 @@
 	GLuint ret = glCreateShader(shader_type);
 	glShaderSource(ret, 1, (const GLchar **)&text, (const GLint *)&fsize);
 	free(text);
-	puts("compiling shader");
 	glCompileShader(ret);
 	GLint compile_status, loglen;
 	glGetShaderiv(ret, GL_COMPILE_STATUS, &compile_status);
@@ -163,143 +146,108 @@
 	}
 	return ret;
 }
-#endif
 
 void render_alloc_surfaces(vdp_context * context)
 {
-#ifndef DISABLE_OPENGL
-	if (render_gl) {
-		context->oddbuf = context->framebuf = malloc(512 * 256 * 4 * 2);
-		memset(context->oddbuf, 0, 512 * 256 * 4 * 2);
-		context->evenbuf = ((char *)context->oddbuf) + 512 * 256 * 4;
-		puts("generating textures");
-		glGenTextures(3, textures);
-		for (int i = 0; i < 3; i++)
-		{
-			glBindTexture(GL_TEXTURE_2D, textures[i]);
-			glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
-			glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
-			glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
-			glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
-			if (i < 2) {
-				glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, 512, 256, 0, GL_BGRA, GL_UNSIGNED_BYTE, i ? context->evenbuf : context->oddbuf);
-			} else {
-				uint32_t blank = 255 << 24;
-				glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, 1, 1, 0, GL_BGRA, GL_UNSIGNED_BYTE, &blank);
-			}
+	context->oddbuf = context->framebuf = malloc(512 * 256 * 4 * 2);
+	memset(context->oddbuf, 0, 512 * 256 * 4 * 2);
+	context->evenbuf = ((char *)context->oddbuf) + 512 * 256 * 4;
+	glGenTextures(3, textures);
+	for (int i = 0; i < 3; i++)
+	{
+		glBindTexture(GL_TEXTURE_2D, textures[i]);
+		glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+		glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
+		glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
+		glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
+		if (i < 2) {
+			glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, 512, 256, 0, GL_BGRA, GL_UNSIGNED_BYTE, i ? context->evenbuf : context->oddbuf);
+		} else {
+			uint32_t blank = 255 << 24;
+			glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, 1, 1, 0, GL_BGRA, GL_UNSIGNED_BYTE, &blank);
 		}
-		glGenBuffers(2, buffers);
-		glBindBuffer(GL_ARRAY_BUFFER, buffers[0]);
-		glBufferData(GL_ARRAY_BUFFER, sizeof(vertex_data), vertex_data, GL_STATIC_DRAW);
-		glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, buffers[1]);
-		glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(element_data), element_data, GL_STATIC_DRAW);
-		puts("Loading vertex shader");
-		vshader = load_shader(tern_find_ptr_default(config, "videovertex_shader", "default.v.glsl"), GL_VERTEX_SHADER);
-		puts("loading fragment shader");
-		fshader = load_shader(tern_find_ptr_default(config, "videofragment_shader", "default.f.glsl"), GL_FRAGMENT_SHADER);
-		puts("creating program");
-		program = glCreateProgram();
-		glAttachShader(program, vshader);
-		glAttachShader(program, fshader);
-		puts("linking program");
-		glLinkProgram(program);
-		GLint link_status;
-		glGetProgramiv(program, GL_LINK_STATUS, &link_status);
-		if (!link_status) {
-			fputs("Failed to link shader program\n", stderr);
-			exit(1);
-		}
-		un_textures[0] = glGetUniformLocation(program, "textures[0]");
-		un_textures[1] = glGetUniformLocation(program, "textures[1]");
-		un_width = glGetUniformLocation(program, "width");
-		at_pos = glGetAttribLocation(program, "pos");
-	} else {
-#endif
-		context->oddbuf = context->framebuf = malloc(320 * 240 * screen->format->BytesPerPixel * 2);
-		context->evenbuf = ((char *)context->oddbuf) + 320 * 240 * screen->format->BytesPerPixel;
-#ifndef DISABLE_OPENGL
 	}
-#endif
-	puts("alloc surfaces done");
-}
-
-uint8_t render_depth()
-{
-	return screen->format->BytesPerPixel * 8;
+	glGenBuffers(2, buffers);
+	glBindBuffer(GL_ARRAY_BUFFER, buffers[0]);
+	glBufferData(GL_ARRAY_BUFFER, sizeof(vertex_data), vertex_data, GL_STATIC_DRAW);
+	glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, buffers[1]);
+	glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(element_data), element_data, GL_STATIC_DRAW);
+	vshader = load_shader(tern_find_ptr_default(config, "videovertex_shader", "default.v.glsl"), GL_VERTEX_SHADER);
+	fshader = load_shader(tern_find_ptr_default(config, "videofragment_shader", "default.f.glsl"), GL_FRAGMENT_SHADER);
+	program = glCreateProgram();
+	glAttachShader(program, vshader);
+	glAttachShader(program, fshader);
+	glLinkProgram(program);
+	GLint link_status;
+	glGetProgramiv(program, GL_LINK_STATUS, &link_status);
+	if (!link_status) {
+		fputs("Failed to link shader program\n", stderr);
+		exit(1);
+	}
+	un_textures[0] = glGetUniformLocation(program, "textures[0]");
+	un_textures[1] = glGetUniformLocation(program, "textures[1]");
+	un_width = glGetUniformLocation(program, "width");
+	at_pos = glGetAttribLocation(program, "pos");
 }
 
 char * caption = NULL;
 
-void render_init(int width, int height, char * title, uint32_t fps, uint8_t fullscreen, uint8_t use_gl)
+void render_init(int width, int height, char * title, uint32_t fps, uint8_t fullscreen)
 {
 	if (SDL_Init(SDL_INIT_VIDEO | SDL_INIT_AUDIO | SDL_INIT_JOYSTICK) < 0) {
 		fprintf(stderr, "Unable to init SDL: %s\n", SDL_GetError());
 		exit(1);
 	}
-	atexit(SDL_Quit);
-	atexit(render_close_audio);
 	printf("width: %d, height: %d\n", width, height);
-	uint32_t flags = SDL_ANYFORMAT;
+	uint32_t flags = SDL_WINDOW_OPENGL;
+
 
-#ifndef DISABLE_OPENGL
-	if (use_gl)
-	{
-		SDL_GL_SetAttribute(SDL_GL_RED_SIZE, 5);
-		SDL_GL_SetAttribute(SDL_GL_GREEN_SIZE, 5);
-		SDL_GL_SetAttribute(SDL_GL_BLUE_SIZE, 5);
-		SDL_GL_SetAttribute(SDL_GL_DEPTH_SIZE, 0);
-		SDL_GL_SetAttribute(SDL_GL_DOUBLEBUFFER, 1);
-		flags = SDL_OPENGL;
-		if (fullscreen) {
-			flags |= SDL_FULLSCREEN;
-		}
-	} else {
-#else
-	{
-#endif
-		if (fullscreen) {
-			flags |= SDL_FULLSCREEN | SDL_HWSURFACE | SDL_DOUBLEBUF;
-		} else {
-			flags |= SDL_SWSURFACE;
-		}
+	SDL_GL_SetAttribute(SDL_GL_RED_SIZE, 5);
+	SDL_GL_SetAttribute(SDL_GL_GREEN_SIZE, 5);
+	SDL_GL_SetAttribute(SDL_GL_BLUE_SIZE, 5);
+	SDL_GL_SetAttribute(SDL_GL_DEPTH_SIZE, 0);
+	SDL_GL_SetAttribute(SDL_GL_DOUBLEBUFFER, 1);
+	if (fullscreen) {
+		flags |= SDL_WINDOW_FULLSCREEN_DESKTOP;
+		SDL_DisplayMode mode;
+		//TODO: Multiple monitor support
+		SDL_GetCurrentDisplayMode(0, &mode);
+		//the SDL2 migration guide suggests setting width and height to 0 when using SDL_WINDOW_FULLSCREEN_DESKTOP
+		//but that doesn't seem to work right when using OpenGL, at least on Linux anyway
+		width = mode.w;
+		height = mode.h;
 	}
-	screen = SDL_SetVideoMode(width, height, 32, flags);
-	if (!screen) {
-		fprintf(stderr, "Unable to get SDL surface: %s\n", SDL_GetError());
+	main_window = SDL_CreateWindow(title, SDL_WINDOWPOS_UNDEFINED, SDL_WINDOWPOS_UNDEFINED, width, height, flags);
+	if (!main_window) {
+		fprintf(stderr, "Unable to create SDL window: %s\n", SDL_GetError());
+		SDL_Quit();
 		exit(1);
 	}
-	if (!use_gl && screen->format->BytesPerPixel != 2 && screen->format->BytesPerPixel != 4) {
-		fprintf(stderr, "BlastEm requires a 16-bit or 32-bit surface, SDL returned a %d-bit surface\n", screen->format->BytesPerPixel * 8);
+	SDL_GetWindowSize(main_window, &width, &height);
+	printf("Window created with size: %d x %d\n", width, height);
+	main_context = SDL_GL_CreateContext(main_window);
+	GLenum res = glewInit();
+	if (res != GLEW_OK) {
+		fprintf(stderr, "Initialization of GLEW failed with code %d\n", res);
+		SDL_Quit();
 		exit(1);
 	}
-#ifndef DISABLE_OPENGL
-	//TODO: fallback on standard rendering if OpenGL 2.0 is unavailable or if init fails
-	if (use_gl)
-	{
-		GLenum res = glewInit();
-		if (res != GLEW_OK) {
-			fprintf(stderr, "Initialization of GLEW failed with code %d\n", res);
-			exit(1);
-		}
-		if (!GLEW_VERSION_2_0) {
-			fputs("OpenGL 2.0 is unable, falling back to standard SDL rendering\n", stderr);
-			exit(1);
-		}
-		float aspect = (float)width / height;
-		if (fabs(aspect - 4.0/3.0) > 0.01 && strcmp(tern_find_ptr_default(config, "videoaspect", "normal"), "stretch")) {
-			for (int i = 0; i < 4; i++)
-			{
-				if (aspect > 4.0/3.0) {
-					vertex_data[i*2] *= (4.0/3.0)/aspect;
-				} else {
-					vertex_data[i*2+1] *= aspect/(4.0/3.0);
-				}
+	if (!GLEW_VERSION_2_0) {
+		fputs("BlastEm requires at least OpenGL 2.0, but it is unavailable\n", stderr);
+		SDL_Quit();
+		exit(1);
+	}
+	float aspect = (float)width / height;
+	if (fabs(aspect - 4.0/3.0) > 0.01 && strcmp(tern_find_ptr_default(config, "videoaspect", "normal"), "stretch")) {
+		for (int i = 0; i < 4; i++)
+		{
+			if (aspect > 4.0/3.0) {
+				vertex_data[i*2] *= (4.0/3.0)/aspect;
+			} else {
+				vertex_data[i*2+1] *= aspect/(4.0/3.0);
 			}
 		}
 	}
-	render_gl = use_gl;
-#endif
-	SDL_WM_SetCaption(title, title);
 	caption = title;
 	min_delay = 0;
 	for (int i = 0; i < 100; i++) {
@@ -343,6 +291,7 @@
 
 	if (SDL_OpenAudio(&desired, &actual) < 0) {
 		fprintf(stderr, "Unable to open SDL audio: %s\n", SDL_GetError());
+		SDL_Quit();
 		exit(1);
 	}
 	buffer_samples = actual.samples;
@@ -354,17 +303,22 @@
 		num_joysticks = MAX_JOYSTICKS;
 	}
 	for (int i = 0; i < num_joysticks; i++) {
-		printf("Joystick %d: %s\n", i, SDL_JoystickName(i));
 		SDL_Joystick * joy = joysticks[i] = SDL_JoystickOpen(i);
+		printf("Joystick %d: %s\n", i, SDL_JoystickName(joy));
 		if (joy) {
 			printf("\tNum Axes: %d\n\tNum Buttons: %d\n\tNum Hats: %d\n", SDL_JoystickNumAxes(joy), SDL_JoystickNumButtons(joy), SDL_JoystickNumHats(joy));
 		}
 	}
 	SDL_JoystickEventState(SDL_ENABLE);
+	
+	atexit(SDL_Quit);
+	atexit(render_close_audio);
 }
-#ifndef DISABLE_OPENGL
-void render_context_gl(vdp_context * context)
+
+void render_context(vdp_context * context)
 {
+	last_frame = SDL_GetTicks();
+	
 	glBindTexture(GL_TEXTURE_2D, textures[context->framebuf == context->oddbuf ? 0 : 1]);
 	glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, 320, 240, GL_BGRA, GL_UNSIGNED_BYTE, context->framebuf);;
 
@@ -380,7 +334,7 @@
 	glBindTexture(GL_TEXTURE_2D, (context->regs[REG_MODE_4] & BIT_INTERLACE) ? textures[1] : textures[2]);
 	glUniform1i(un_textures[1], 1);
 
-	glUniform1f(un_width, context->latched_mode & BIT_H40 ? 320.0f : 256.0f);
+	glUniform1f(un_width, context->regs[REG_MODE_4] & BIT_H40 ? 320.0f : 256.0f);
 
 	glBindBuffer(GL_ARRAY_BUFFER, buffers[0]);
 	glVertexAttribPointer(at_pos, 2, GL_FLOAT, GL_FALSE, sizeof(GLfloat[2]), (void *)0);
@@ -391,81 +345,7 @@
 
 	glDisableVertexAttribArray(at_pos);
 
-	SDL_GL_SwapBuffers();
-	if (context->regs[REG_MODE_4] & BIT_INTERLACE)
-	{
-		context->framebuf = context->framebuf == context->oddbuf ? context->evenbuf : context->oddbuf;
-	}
-}
-#endif
-
-uint32_t blankbuf[320*240];
-
-void render_context(vdp_context * context)
-{
-	uint16_t *buf_16;
-	uint32_t *buf_32;
-	uint8_t b,g,r;
-	last_frame = SDL_GetTicks();
-#ifndef DISABLE_OPENGL
-	if (render_gl)
-	{
-		render_context_gl(context);
-		return;
-	}
-#endif
-	if (SDL_MUSTLOCK(screen)) {
-		if (SDL_LockSurface(screen) < 0) {
-			return;
-		}
-	}
-	uint16_t repeat_x = screen->clip_rect.w / 320;
-	uint16_t repeat_y = screen->clip_rect.h / 240;
-	if (repeat_x > repeat_y) {
-		repeat_x = repeat_y;
-	} else {
-		repeat_y = repeat_x;
-	}
-	int othermask = repeat_y >> 1;
-
-	if (screen->format->BytesPerPixel == 2) {
-		uint16_t *otherbuf = (context->regs[REG_MODE_4] & BIT_INTERLACE) ? context->evenbuf : (uint16_t *)blankbuf;
-		uint16_t * oddbuf = context->oddbuf;
-		buf_16 = (uint16_t *)screen->pixels;
-		for (int y = 0; y < 240; y++) {
-			for (int i = 0; i < repeat_y; i++,buf_16 += screen->pitch/2) {
-				uint16_t *line = buf_16;
-				uint16_t *src_line = (i & othermask ? otherbuf : oddbuf) + y * 320;
-				for (int x = 0; x < 320; x++) {
-					uint16_t color = *(src_line++);
-					for (int j = 0; j < repeat_x; j++) {
-						*(line++) = color;
-					}
-				}
-			}
-		}
-	} else {
-		uint32_t *otherbuf = (context->regs[REG_MODE_4] & BIT_INTERLACE) ? context->evenbuf : (uint32_t *)blankbuf;
-		uint32_t * oddbuf = context->oddbuf;
-		buf_32 = (uint32_t *)screen->pixels;
-		for (int y = 0; y < 240; y++) {
-			for (int i = 0; i < repeat_y; i++,buf_32 += screen->pitch/4) {
-				uint32_t *line = buf_32;
-				uint32_t *src_line = (i & othermask ? otherbuf : oddbuf) + y * 320;
-				for (int x = 0; x < 320; x++) {
-					uint32_t color = *(src_line++);
-					for (int j = 0; j < repeat_x; j++) {
-						*(line++) = color;
-					}
-				}
-			}
-		}
-	}
-	if ( SDL_MUSTLOCK(screen) ) {
-		SDL_UnlockSurface(screen);
-	}
-    //SDL_UpdateRect(screen, 0, 0, screen->clip_rect.w, screen->clip_rect.h);
-    SDL_Flip(screen);
+	SDL_GL_SwapWindow(main_window);
 	if (context->regs[REG_MODE_4] & BIT_INTERLACE)
 	{
 		context->framebuf = context->framebuf == context->oddbuf ? context->evenbuf : context->oddbuf;
@@ -578,8 +458,6 @@
 	}
 	render_context(context);
 
-
-	//TODO: Figure out why this causes segfaults
 	frame_counter++;
 	if ((last_frame - start) > 1000) {
 		if (start && (last_frame-start)) {
@@ -587,8 +465,7 @@
 				fps_caption = malloc(strlen(caption) + strlen(" - 1000.1 fps") + 1);
 			}
 			sprintf(fps_caption, "%s - %.1f fps", caption, ((float)frame_counter) / (((float)(last_frame-start)) / 1000.0));
-			SDL_WM_SetCaption(fps_caption, caption);
-			fflush(stdout);
+			SDL_SetWindowTitle(main_window, fps_caption);
 		}
 		start = last_frame;
 		frame_counter = 0;
--- a/runtime.S	Thu May 28 21:09:33 2015 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,74 +0,0 @@
-
-
-invalid_msg:
-	.asciz "Invalid instruction at %X\n"
-
-	.global m68k_invalid
-m68k_invalid:
-	lea invalid_msg(%rip), %rdi
-	mov %ecx, %esi
-	xor %rax, %rax
-	call printf
-	mov $1, %rdi
-	call exit
-
-	.global bcd_add
-bcd_add:
-	xchg %rax, %rdi
-
-	mov %cl, %ch
-	mov %al, %ah
-	and $0xF, %ch
-	and $0xF, %ah
-	and $0xF0, %cl
-	and $0xF0, %al
-	add %ah, %ch
-	cmp $10, %ch
-	jb no_adjust
-	add $6, %ch
-no_adjust:
-	add %ch, %al
-	add %al, %cl
-	mov $0, %ch
-	jc def_adjust
-	cmp $0xA0, %cl
-	jb no_adjust_h
-def_adjust:
-	add $0x60, %cl
-	mov $1, %ch
-no_adjust_h:
-
-	mov %rdi, %rax
-	ret
-
-	.global bcd_sub
-bcd_sub:
-	xchg %rax, %rdi
-
-	mov %cl, %ch
-	mov %al, %ah
-	and $0xF, %ch
-	and $0xF, %ah
-	and $0xF0, %cl
-	and $0xF0, %al
-	sub %ah, %ch
-	cmp $10, %ch
-	jb no_adjusts
-	sub $6, %ch
-no_adjusts:
-	add %ch, %cl
-	sub %al, %cl
-	mov $0, %ch
-	jc def_adjusts
-	cmp $0xA0, %cl
-	jb no_adjust_hs
-def_adjusts:
-	sub $0x60, %cl
-	mov $1, %ch
-no_adjust_hs:
-
-	mov %rdi, %rax
-	ret
-
-
-
--- a/runtime_32.S	Thu May 28 21:09:33 2015 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,74 +0,0 @@
-
-
-invalid_msg:
-	.asciz "Invalid instruction at %X\n"
-
-	.global m68k_invalid
-m68k_invalid:
-	push %ecx
-	push invalid_msg
-	xor %eax, %eax
-	call printf
-	push $1
-	call exit
-
-	.global bcd_add
-bcd_add:
-	xchg %eax, %edi
-
-	mov %cl, %ch
-	mov %al, %ah
-	and $0xF, %ch
-	and $0xF, %ah
-	and $0xF0, %cl
-	and $0xF0, %al
-	add %ah, %ch
-	cmp $10, %ch
-	jb no_adjust
-	add $6, %ch
-no_adjust:
-	add %ch, %al
-	add %al, %cl
-	mov $0, %ch
-	jc def_adjust
-	cmp $0xA0, %cl
-	jb no_adjust_h
-def_adjust:
-	add $0x60, %cl
-	mov $1, %ch
-no_adjust_h:
-
-	mov %edi, %eax
-	ret
-
-	.global bcd_sub
-bcd_sub:
-	xchg %eax, %edi
-
-	mov %cl, %ch
-	mov %al, %ah
-	and $0xF, %ch
-	and $0xF, %ah
-	and $0xF0, %cl
-	and $0xF0, %al
-	sub %ah, %ch
-	cmp $10, %ch
-	jb no_adjusts
-	sub $6, %ch
-no_adjusts:
-	add %ch, %cl
-	sub %al, %cl
-	mov $0, %ch
-	jc def_adjusts
-	cmp $0xA0, %cl
-	jb no_adjust_hs
-def_adjusts:
-	sub $0x60, %cl
-	mov $1, %ch
-no_adjust_hs:
-
-	mov %edi, %eax
-	ret
-
-
-
--- a/stateview.c	Thu May 28 21:09:33 2015 -0700
+++ b/stateview.c	Thu May 28 21:19:55 2015 -0700
@@ -53,6 +53,7 @@
 }
 
 tern_node * config;
+int headless = 0;
 
 int main(int argc, char ** argv)
 {
@@ -87,7 +88,7 @@
 
 	vdp_context context;
 	render_init(width, height, "GST State Viewer", 60, 0);
-	init_vdp_context(&context);
+	init_vdp_context(&context, 0);
 	vdp_load_gst(&context, state_file);
 	vdp_run_to_vblank(&context);
 	vdp_print_sprite_table(&context);
--- a/tern.c	Thu May 28 21:09:33 2015 -0700
+++ b/tern.c	Thu May 28 21:19:55 2015 -0700
@@ -122,4 +122,14 @@
 	return tern_insert(head, key, val);
 }
 
-
+char * tern_int_key(uint32_t key, char * buf)
+{
+	char * cur = buf;
+	while (key)
+	{
+		*(cur++) = (key & 0x7F) + 1;
+		key >>= 7;
+	}
+	*cur = 0;
+	return buf;
+}
--- a/tern.h	Thu May 28 21:09:33 2015 -0700
+++ b/tern.h	Thu May 28 21:19:55 2015 -0700
@@ -8,6 +8,8 @@
 
 #include <stdint.h>
 
+#define MAX_INT_KEY_SIZE (sizeof(uint32_t) + 2)
+
 typedef union {
 	void     *ptrval;
 	intptr_t intval;
@@ -31,5 +33,6 @@
 void * tern_find_ptr_default(tern_node * head, char * key, void * def);
 void * tern_find_ptr(tern_node * head, char * key);
 tern_node * tern_insert_ptr(tern_node * head, char * key, void * value);
+char * tern_int_key(uint32_t key, char * buf);
 
 #endif //TERN_H_
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test.c	Thu May 28 21:19:55 2015 -0700
@@ -0,0 +1,76 @@
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include "vdp.h"
+
+int headless = 1;
+uint16_t read_dma_value(uint32_t address)
+{
+	return 0;
+}
+
+uint32_t render_map_color(uint8_t r, uint8_t g, uint8_t b)
+{
+	return 0;
+}
+
+void render_alloc_surfaces(vdp_context * context)
+{
+	context->oddbuf = context->framebuf = malloc(512 * 256 * 4 * 2);
+	memset(context->oddbuf, 0, 512 * 256 * 4 * 2);
+	context->evenbuf = ((char *)context->oddbuf) + 512 * 256 * 4;
+}
+
+int check_hint_time(vdp_context * v_context)
+{
+	uint32_t orig_hint_cycle = vdp_next_hint(v_context);
+	uint32_t cur_hint_cycle;
+	printf("hint cycle is %d at vcounter: %d, hslot: %d\n", orig_hint_cycle, v_context->vcounter, v_context->hslot);
+	int res = 1;
+	while ((cur_hint_cycle = vdp_next_hint(v_context)) > v_context->cycles)
+	{
+		if (cur_hint_cycle != orig_hint_cycle) {
+			fprintf(stderr, "ERROR: hint cycle changed to %d at vcounter: %d, hslot: %d\n", cur_hint_cycle, v_context->vcounter, v_context->hslot);
+			orig_hint_cycle = cur_hint_cycle;
+			res = 0;
+		}
+		vdp_run_context(v_context, v_context->cycles + 1);
+	}
+	printf("hint fired at cycle: %d, vcounter: %d, hslot: %d\n", cur_hint_cycle, v_context->vcounter, v_context->hslot);
+	vdp_int_ack(v_context, 4);
+	return res;
+}
+
+
+int main(int argc, char ** argv)
+{
+	vdp_context v_context;
+	init_vdp_context(&v_context, 0);
+	vdp_control_port_write(&v_context, 0x8144);
+	vdp_control_port_write(&v_context, 0x8C81);
+	vdp_control_port_write(&v_context, 0x8A7F);
+	vdp_control_port_write(&v_context, 0x8014);
+	v_context.hint_counter = 0x7F;
+	v_context.vcounter = 128;
+	v_context.hslot = 165;
+	//check single shot behavior
+	int res = check_hint_time(&v_context);
+	//check every line behavior
+	while (v_context.vcounter < 225)
+	{
+		vdp_run_context(&v_context, v_context.cycles + 1);
+	}
+	vdp_control_port_write(&v_context, 0x8A00);
+	int hint_count = 0;
+	while (res && v_context.vcounter != 224)
+	{
+		res = res && check_hint_time(&v_context);
+		hint_count++;
+	}
+	if (res && hint_count != 225) {
+		fprintf(stderr, "ERROR: hint count should be 225 but was %d instead\n", hint_count);
+		res = 0;
+	}
+	return 0;
+}
--- a/testcases.txt	Thu May 28 21:09:33 2015 -0700
+++ b/testcases.txt	Thu May 28 21:19:55 2015 -0700
@@ -1,88 +1,88 @@
 Name	Sizes	Src Modes														Dst Modes
-#add		bwl		d;a;(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l;#n;(n,pc);(n,pc,x)	d
-#add		bwl		d																(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l
-#adda	wl		d;a;(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l;#n;(n,pc);(n,pc,x)	a
-#addi	bwl		#n																d;(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l
-#addq	bwl		#(1-8)															d;a;(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l
-#addx	bwl		d																d
-#addx	bwl		-(a)															-(a)
-#and		bwl		d;(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l;#n;(n,pc);(n,pc,x)	d
-#and		bwl		d																(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l
-#andi	bwl		#n																d;(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l
-#asl		bwl		d;#(1-8)														d
-#asr		bwl		d;#(1-8)														d
-#lsl		bwl		d;#(1-8)														d
-#lsr		bwl		d;#(1-8)														d
-#sub		bwl		d;a;(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l;#n;(n,pc);(n,pc,x)	d
-#sub		bwl		d																(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l
-#suba	wl		d;a;(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l;#n;(n,pc);(n,pc,x)	a
-#subi	bwl		#n																d;(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l
-#subq	bwl		#(1-8)															d;a;(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l
-#subx	bwl		d																d
-#subx	bwl		-(a)															-(a)
-#bchg	b		d;#(0-255)														(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l
-#bchg	l		d;#(0-255)														d
-#bset	b		d;#(0-255)														(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l
-#bset	l		d;#(0-255)														d
-#bclr	b		d;#(0-255)														(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l
-#bclr	l		d;#(0-255)														d
-#btst	b		d;#(0-255)														(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l
-#btst	l		d;#(0-255)														d
-#rol		bwl		d;#(1-8)														d
-#ror		bwl		d;#(1-8)														d
-#abcd	b		d																d
-#abcd	b		-(a)															-(a)
-#sbcd	b		d																d
-#sbcd	b		-(a)															-(a)
-#muls	w		d;(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l;#n;(n,pc);(n,pc,x)	d
-#mulu	w		d;(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l;#n;(n,pc);(n,pc,x)	d
-#move	bwl		d;a;(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l;#n;(n,pc);(n,pc,x)	d;(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l
-#movea	wl		d;a;(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l;#n;(n,pc);(n,pc,x)	a
-#moveq	l		#(-128-127)														d
-#roxl	bwl		d;#(1-8)														d
-#roxr	bwl		d;#(1-8)														d
-#divs	w		d;(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l;#n;(n,pc);(n,pc,x)	d
-#divu	w		d;(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l;#n;(n,pc);(n,pc,x)	d
-#chk		w		d;(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l;#n;(n,pc);(n,pc,x)	d
-#cmp		bwl		d;a;(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l;#n;(n,pc);(n,pc,x)	d
-#cmpa	wl		d;a;(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l;#n;(n,pc);(n,pc,x)	a
-#cmpi	bwl		#n																d;(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l
-#cmpm	bwl		(a)+															(a)+
-#eor		bwl		d																d;(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l
-#eori	bwl		#n																d;(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l
-#exg		l		d																d;a
-#exg		l		a																a
-#link	w		a																#n
-#or		bwl		d;(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l;#n;(n,pc);(n,pc,x)	d
-#or		bwl		d																(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l
-#ori		bwl		#n																d;(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l
-#clr		bwl		d;(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l
-#ext		wl		d
-#neg		bwl		d;(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l
-#negx	bwl		d;(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l
-#not		bwl		d;(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l
-#pea		l		(a);(n,a);(n,a,x);(n).w;(n).l;(n,pc);(n,pc,x)
-#rol		w		(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l
-#ror		w		(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l
-#roxl	w		(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l
-#roxr	w		(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l
-#st		b		d;(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l
-#sf		b		d;(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l
-#shi		b		d;(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l
-#sls		b		d;(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l
-#scc		b		d;(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l
-#scs		b		d;(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l
-#sne		b		d;(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l
-#seq		b		d;(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l
-#svc		b		d;(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l
-#svs		b		d;(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l
-#spl		b		d;(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l
-#smi		b		d;(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l
-#sge		b		d;(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l
-#slt		b		d;(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l
-#sgt		b		d;(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l
-#sle		b		d;(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l
-#swap	w		d
+add		bwl		d;a;(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l;#n;(n,pc);(n,pc,x)	d
+add		bwl		d																(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l
+adda	wl		d;a;(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l;#n;(n,pc);(n,pc,x)	a
+addi	bwl		#n																d;(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l
+addq	bwl		#(1-8)															d;a;(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l
+addx	bwl		d																d
+addx	bwl		-(a)															-(a)
+and		bwl		d;(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l;#n;(n,pc);(n,pc,x)	d
+and		bwl		d																(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l
+andi	bwl		#n																d;(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l
+asl		bwl		d;#(1-8)														d
+asr		bwl		d;#(1-8)														d
+lsl		bwl		d;#(1-8)														d
+lsr		bwl		d;#(1-8)														d
+sub		bwl		d;a;(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l;#n;(n,pc);(n,pc,x)	d
+sub		bwl		d																(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l
+suba	wl		d;a;(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l;#n;(n,pc);(n,pc,x)	a
+subi	bwl		#n																d;(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l
+subq	bwl		#(1-8)															d;a;(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l
+subx	bwl		d																d
+subx	bwl		-(a)															-(a)
+bchg	b		d;#(0-255)														(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l
+bchg	l		d;#(0-255)														d
+bset	b		d;#(0-255)														(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l
+bset	l		d;#(0-255)														d
+bclr	b		d;#(0-255)														(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l
+bclr	l		d;#(0-255)														d
+btst	b		d;#(0-255)														(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l
+btst	l		d;#(0-255)														d
+rol		bwl		d;#(1-8)														d
+ror		bwl		d;#(1-8)														d
+abcd	b		d																d
+abcd	b		-(a)															-(a)
+sbcd	b		d																d
+sbcd	b		-(a)															-(a)
+muls	w		d;(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l;#n;(n,pc);(n,pc,x)	d
+mulu	w		d;(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l;#n;(n,pc);(n,pc,x)	d
+move	bwl		d;a;(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l;#n;(n,pc);(n,pc,x)	d;(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l
+movea	wl		d;a;(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l;#n;(n,pc);(n,pc,x)	a
+moveq	l		#(-128-127)														d
+roxl	bwl		d;#(1-8)														d
+roxr	bwl		d;#(1-8)														d
+divs	w		d;(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l;#n;(n,pc);(n,pc,x)	d
+divu	w		d;(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l;#n;(n,pc);(n,pc,x)	d
+chk		w		d;(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l;#n;(n,pc);(n,pc,x)	d
+cmp		bwl		d;a;(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l;#n;(n,pc);(n,pc,x)	d
+cmpa	wl		d;a;(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l;#n;(n,pc);(n,pc,x)	a
+cmpi	bwl		#n																d;(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l
+cmpm	bwl		(a)+															(a)+
+eor		bwl		d																d;(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l
+eori	bwl		#n																d;(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l
+exg		l		d																d;a
+exg		l		a																a
+link	w		a																#n
+or		bwl		d;(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l;#n;(n,pc);(n,pc,x)	d
+or		bwl		d																(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l
+ori		bwl		#n																d;(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l
+clr		bwl		d;(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l
+ext		wl		d
+neg		bwl		d;(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l
+negx	bwl		d;(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l
+not		bwl		d;(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l
+pea		l		(a);(n,a);(n,a,x);(n).w;(n).l;(n,pc);(n,pc,x)
+rol		w		(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l
+ror		w		(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l
+roxl	w		(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l
+roxr	w		(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l
+st		b		d;(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l
+sf		b		d;(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l
+shi		b		d;(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l
+sls		b		d;(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l
+scc		b		d;(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l
+scs		b		d;(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l
+sne		b		d;(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l
+seq		b		d;(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l
+svc		b		d;(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l
+svs		b		d;(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l
+spl		b		d;(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l
+smi		b		d;(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l
+sge		b		d;(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l
+slt		b		d;(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l
+sgt		b		d;(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l
+sle		b		d;(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l
+swap	w		d
 tst		bwl		d;(a);(a)+;-(a);(n,a);(n,a,x);(n).w;(n).l
 lea		l		(a);(n,a);(n,a,x);(n).w;(n).l;(n,pc);(n,pc,x)					a
 
--- a/trans.c	Thu May 28 21:09:33 2015 -0700
+++ b/trans.c	Thu May 28 21:19:55 2015 -0700
@@ -25,7 +25,6 @@
 	char disbuf[1024];
 	unsigned short * cur;
 	m68k_options opts;
-	m68k_context context;
 	FILE * f = fopen(argv[1], "rb");
 	fseek(f, 0, SEEK_END);
 	filesize = ftell(f);
@@ -51,15 +50,15 @@
 	memmap[1].flags = MMAP_READ | MMAP_WRITE | MMAP_CODE;
 	memmap[1].buffer = malloc(64 * 1024);
 	memset(memmap[1].buffer, 0, 64 * 1024);
-	init_m68k_opts(&opts, memmap, 2);
-	init_68k_context(&context, opts.gen.native_code_map, &opts);
-	context.mem_pointers[0] = memmap[0].buffer;
-	context.mem_pointers[1] = memmap[1].buffer;
-	context.target_cycle = context.sync_cycle = 0x80000000;
+	init_m68k_opts(&opts, memmap, 2, 1);
+	m68k_context * context = init_68k_context(&opts);
+	context->mem_pointers[0] = memmap[0].buffer;
+	context->mem_pointers[1] = memmap[1].buffer;
+	context->target_cycle = context->sync_cycle = 0x80000000;
 	uint32_t address;
 	address = filebuf[2] << 16 | filebuf[3];
-	translate_m68k_stream(address, &context);
-	m68k_reset(&context);
+	translate_m68k_stream(address, context);
+	m68k_reset(context);
 	return 0;
 }
 
--- a/transz80.c	Thu May 28 21:09:33 2015 -0700
+++ b/transz80.c	Thu May 28 21:19:55 2015 -0700
@@ -38,7 +38,7 @@
 {
 	long filesize;
 	uint8_t *filebuf;
-	x86_z80_options opts;
+	z80_options opts;
 	z80_context context;
 	if (argc < 2) {
 		fputs("usage: transz80 zrom [cartrom]\n", stderr);
@@ -70,7 +70,7 @@
 			*cur = (*cur >> 8) | (*cur << 8);
 		}
 	}
-	init_x86_z80_opts(&opts);
+	init_z80_opts(&opts);
 	init_z80_context(&context, &opts);
 	//Z80 RAM
 	context.mem_pointers[0] = z80_ram;
--- a/util.c	Thu May 28 21:09:33 2015 -0700
+++ b/util.c	Thu May 28 21:19:55 2015 -0700
@@ -127,7 +127,7 @@
 		if (linksize == -1) {
 			perror("readlink");
 			free(linktext);
-			linktext = NULL;
+			return NULL;
 		}
 	} while ((linksize+1) > cursize);
 	linktext[linksize] = 0;
--- a/vdp.c	Thu May 28 21:09:33 2015 -0700
+++ b/vdp.c	Thu May 28 21:19:55 2015 -0700
@@ -9,8 +9,8 @@
 #include <string.h>
 #include "render.h"
 
-#define NTSC_ACTIVE 225
-#define PAL_ACTIVE 241
+#define NTSC_INACTIVE_START 224
+#define PAL_INACTIVE_START 240
 #define BUF_BIT_PRIORITY 0x40
 #define MAP_BIT_PRIORITY 0x8000
 #define MAP_BIT_H_FLIP 0x800
@@ -22,14 +22,19 @@
 
 #define MCLKS_SLOT_H40  16
 #define MCLKS_SLOT_H32  20
-#define VINT_CYCLE_H40  (21*MCLKS_SLOT_H40+332+9*MCLKS_SLOT_H40) //21 slots before HSYNC, 16 during, 10 after
-#define VINT_CYCLE_H32  ((33+20+7)*MCLKS_SLOT_H32)  //33 slots before HSYNC, 20 during, 7 after  TODO: confirm final number
-#define HSYNC_SLOT_H40  21
-#define MCLK_WEIRD_END  (HSYNC_SLOT_H40*MCLKS_SLOT_H40 + 332)
-#define SLOT_WEIRD_END  (HSYNC_SLOT_H40+17)
+#define VINT_SLOT_H40  4 //21 slots before HSYNC, 16 during, 10 after
+#define VINT_SLOT_H32  4  //old value was 23, but recent tests suggest the actual value is close to the H40 one
+#define HSYNC_SLOT_H40  234
+#define HSYNC_END_H40  (HSYNC_SLOT_H40+17)
 #define HSYNC_END_H32   (33 * MCLKS_SLOT_H32)
-#define HBLANK_CLEAR_H40 (MCLK_WEIRD_END+61*4)
-#define HBLANK_CLEAR_H32 (HSYNC_END_H32 + 46*5)
+#define HBLANK_START_H40 178 //should be 179 according to Nemesis, but 178 seems to fit slightly better with my test ROM results
+#define HBLANK_END_H40  0 //should be 5.5 according to Nemesis, but 0 seems to fit better with my test ROM results
+#define HBLANK_START_H32 233 //should be 147 according to Nemesis which is very different from my test ROM result
+#define HBLANK_END_H32 0 //should be 5 according to Nemesis, but 0 seems to fit better with my test ROM results
+#define LINE_CHANGE_H40 165
+#define LINE_CHANGE_H32 132
+#define VBLANK_START_H40 (LINE_CHANGE_H40+2)
+#define VBLANK_START_H32 (LINE_CHANGE_H32+2)
 #define FIFO_LATENCY    3
 
 int32_t color_map[1 << 12];
@@ -45,7 +50,7 @@
 
 uint8_t color_map_init_done;
 
-void init_vdp_context(vdp_context * context)
+void init_vdp_context(vdp_context * context, uint8_t region_pal)
 {
 	memset(context, 0, sizeof(*context));
 	context->vdpmem = malloc(VRAM_SIZE);
@@ -57,10 +62,8 @@
 		memset(context->framebuf, 0, FRAMEBUF_ENTRIES * (32 / 8));
 		context->evenbuf = malloc(FRAMEBUF_ENTRIES * (32 / 8));
 		memset(context->evenbuf, 0, FRAMEBUF_ENTRIES * (32 / 8));
-		context->b32 = 1;
 	} else {
 		render_alloc_surfaces(context);
-		context->b32 = render_depth() == 32;
 	}
 	context->framebuf = context->oddbuf;
 	context->linebuf = malloc(LINEBUF_SIZE + SCROLL_BUFFER_SIZE*2);
@@ -132,18 +135,21 @@
 			context->debugcolors[color] = render_map_color(r, g, b);
 		}
 	}
+	if (region_pal) {
+		context->flags2 |= FLAG2_REGION_PAL;
+	}
 }
 
 int is_refresh(vdp_context * context, uint32_t slot)
 {
-	if (context->latched_mode & BIT_H40) {
-		return (slot == 37 || slot == 69 || slot == 102 || slot == 133 || slot == 165 || slot == 197 || slot >= 210);
+	if (context->regs[REG_MODE_4] & BIT_H40) {
+		return slot == 250 || slot == 26 || slot == 59 || slot == 90 || slot == 122 || slot == 154;
 	} else {
 		//TODO: Figure out which slots are refresh when display is off in 32-cell mode
 		//These numbers are guesses based on H40 numbers
-		return (slot == 24 || slot == 56 || slot == 88 || slot == 120 || slot == 152);
+		return slot == 243 || slot == 19 || slot == 51 || slot == 83 || slot == 115;
 		//The numbers below are the refresh slots during active display
-		//return (slot == 66 || slot == 98 || slot == 130 || slot == 162);
+		//return (slot == 29 || slot == 61 || slot == 93 || slot == 125);
 	}
 }
 
@@ -203,6 +209,45 @@
 	} while (current_index != 0 && count < 80);
 }
 
+#define VRAM_READ 0 //0000
+#define VRAM_WRITE 1 //0001
+//2 would trigger register write 0010
+#define CRAM_WRITE 3 //0011
+#define VSRAM_READ 4 //0100
+#define VSRAM_WRITE 5//0101
+//6 would trigger regsiter write 0110
+//7 is a mystery
+#define CRAM_READ 8  //1000
+//9 is also a mystery //1001
+//A would trigger register write 1010
+//B is a mystery 1011
+#define VRAM_READ8 0xC //1100
+//D is a mystery 1101
+//E would trigger register write 1110
+//F is a mystery 1111
+#define DMA_START 0x20
+
+const char * cd_name(uint8_t cd)
+{
+	switch (cd & 0xF)
+	{
+	case VRAM_READ:
+		return "VRAM read";
+	case VRAM_WRITE:
+		return "VRAM write";
+	case CRAM_WRITE:
+		return "CRAM write";
+	case VSRAM_READ:
+		return "VSRAM read";
+	case VSRAM_WRITE:
+		return "VSRAM write";
+	case VRAM_READ8:
+		return "VRAM read (undocumented 8-bit mode)";
+	default:
+		return "invalid";
+	}
+}
+
 void vdp_print_reg_explain(vdp_context * context)
 {
 	char * hscroll[] = {"full", "7-line", "cell", "line"};
@@ -227,8 +272,8 @@
 	       context->regs[REG_SCROLL_A], (context->regs[REG_SCROLL_A] & 0x38) << 10,
 	       context->regs[REG_WINDOW], (context->regs[REG_WINDOW] & (context->regs[REG_MODE_4] & BIT_H40 ? 0x3C : 0x3E)) << 10,
 	       context->regs[REG_SCROLL_B], (context->regs[REG_SCROLL_B] & 0x7) << 13,
-	       context->regs[REG_SAT], (context->regs[REG_SAT] & (context->regs[REG_MODE_4] & BIT_H40 ? 0x3E : 0x3F)) << 9,
-	       context->regs[REG_HSCROLL], (context->regs[REG_HSCROLL] & 0x1F) << 10);
+	       context->regs[REG_SAT], (context->regs[REG_SAT] & (context->regs[REG_MODE_4] & BIT_H40 ? 0x7E : 0x7F)) << 9,
+	       context->regs[REG_HSCROLL], (context->regs[REG_HSCROLL] & 0x3F) << 10);
 	char * sizes[] = {"32", "64", "invalid", "128"};
 	printf("\n**Misc Group**\n"
 	       "07: %.2X | Backdrop Color: $%X\n"
@@ -239,11 +284,28 @@
 	       context->regs[REG_HINT], context->regs[REG_HINT],
 	       context->regs[REG_AUTOINC], context->regs[REG_AUTOINC],
 	       context->regs[REG_SCROLL], sizes[context->regs[REG_SCROLL] & 0x3], sizes[context->regs[REG_SCROLL] >> 4 & 0x3]);
+	char * src_types[] = {"68K", "68K", "Copy", "Fill"};
+	printf("\n**DMA Group**\n"
+	       "13: %.2X |\n"
+		   "14: %.2X | DMA Length: $%.4X words\n"
+		   "15: %.2X |\n"
+		   "16: %.2X |\n"
+		   "17: %.2X | DMA Source Address: $%.6X, Type: %s\n",
+		   context->regs[REG_DMALEN_L],
+		   context->regs[REG_DMALEN_H], context->regs[REG_DMALEN_H] << 8 | context->regs[REG_DMALEN_L],
+		   context->regs[REG_DMASRC_L],
+		   context->regs[REG_DMASRC_M],
+		   context->regs[REG_DMASRC_H],
+		       context->regs[REG_DMASRC_H] << 17 | context->regs[REG_DMASRC_M] << 9 | context->regs[REG_DMASRC_L] << 1,
+			   src_types[context->regs[REG_DMASRC_H] >> 6 & 3]);
 	printf("\n**Internal Group**\n"
 	       "Address: %X\n"
-	       "CD:      %X\n"
-	       "Pending: %s\n",
-	       context->address, context->cd, (context->flags & FLAG_PENDING) ? "true" : "false");
+	       "CD:      %X - %s\n"
+	       "Pending: %s\n"
+		   "VCounter: %d\n"
+		   "HCounter: %d\n",
+	       context->address, context->cd, cd_name(context->cd), (context->flags & FLAG_PENDING) ? "true" : "false",
+		   context->vcounter, context->hslot*2);
 
 	//TODO: Window Group, DMA Group
 }
@@ -269,7 +331,7 @@
 			height_mult = 8;
 		}
 		context->sprite_index &= 0x7F;
-		if (context->latched_mode & BIT_H40) {
+		if (context->regs[REG_MODE_4] & BIT_H40) {
 			if (context->sprite_index >= MAX_SPRITES_FRAME) {
 				context->sprite_index = 0;
 				return;
@@ -393,24 +455,6 @@
 	context->colors[addr + CRAM_SIZE*2] = color_map[(value & 0xEEE) | FBUF_HILIGHT];
 }
 
-#define VRAM_READ 0 //0000
-#define VRAM_WRITE 1 //0001
-//2 would trigger register write 0010
-#define CRAM_WRITE 3 //0011
-#define VSRAM_READ 4 //0100
-#define VSRAM_WRITE 5//0101
-//6 would trigger regsiter write 0110
-//7 is a mystery
-#define CRAM_READ 8  //1000
-//9 is also a mystery //1001
-//A would trigger register write 1010
-//B is a mystery 1011
-#define VRAM_READ8 0xC //1100
-//D is a mystery 1101
-//E would trigger register write 1110
-//F is a mystery 1111
-#define DMA_START 0x20
-
 void external_slot(vdp_context * context)
 {
 	fifo_entry * start = context->fifo + context->fifo_read;
@@ -441,7 +485,7 @@
 		}
 		case VSRAM_WRITE:
 			if (((start->address/2) & 63) < VSRAM_SIZE) {
-				//printf("VSRAM Write: %X to %X\n", start->value, context->address);
+				//printf("VSRAM Write: %X to %X @ vcounter: %d, hslot: %d, cycle: %d\n", start->value, context->address, context->vcounter, context->hslot, context->cycles);
 				context->vsram[(start->address/2) & 63] = start->partial == 2 ? context->fifo[context->fifo_write].value : start->value;
 			}
 
@@ -472,7 +516,7 @@
 	case 0x40:
 		if (!slot || !is_refresh(context, slot-1)) {
 			cur = context->fifo + context->fifo_write;
-			cur->cycle = context->cycles + ((context->latched_mode & BIT_H40) ? 16 : 20)*FIFO_LATENCY;
+			cur->cycle = context->cycles + ((context->regs[REG_MODE_4] & BIT_H40) ? 16 : 20)*FIFO_LATENCY;
 			cur->address = context->address;
 			cur->value = read_dma_value((context->regs[REG_DMASRC_H] << 16) | (context->regs[REG_DMASRC_M] << 8) | context->regs[REG_DMASRC_L]);
 			cur->cd = context->cd;
@@ -518,7 +562,7 @@
 		context->regs[REG_DMALEN_H] = dma_len >> 8;
 		context->regs[REG_DMALEN_L] = dma_len;
 		if (!dma_len) {
-			//printf("DMA end at cycle %d\n", context->cycles);
+			//printf("DMA end at cycle %d, frame: %d, vcounter: %d, hslot: %d\n", context->cycles, context->frame, context->vcounter, context->hslot);
 			context->flags &= ~FLAG_DMA_RUN;
 			context->cd &= 0xF;
 		}
@@ -567,7 +611,7 @@
 		if ((column >= left_col && column < right_col) || (line >= top_line && line < bottom_line)) {
 			uint16_t address = context->regs[REG_WINDOW] << 10;
 			uint16_t line_offset, offset, mask;
-			if (context->latched_mode & BIT_H40) {
+			if (context->regs[REG_MODE_4] & BIT_H40) {
 				address &= 0xF000;
 				line_offset = (((line) >> vscroll_shift) * 64 * 2) & 0xFFF;
 				mask = 0x7F;
@@ -613,7 +657,27 @@
 		vscroll <<= 1;
 		vscroll |= 1;
 	}
-	vscroll &= (context->vsram[(context->regs[REG_MODE_3] & BIT_VSCROLL ? (column-2)&63 : 0) + vsram_off] + line);
+	//TODO: Further research on vscroll latch behavior and the "first column bug"
+	if (!column) {
+		if (context->regs[REG_MODE_3] & BIT_VSCROLL) {
+			if (context->regs[REG_MODE_4] & BIT_H40) {
+				//Based on observed behavior documented by Eke-Eke, I'm guessing the VDP
+				//ends up fetching the last value on the VSRAM bus in the H40 case
+				//getting the last latched value should be close enough for now
+				if (!vsram_off) {
+					context->vscroll_latch[0] = context->vscroll_latch[1];
+				}
+			} else {
+				//supposedly it's always forced to 0 in the H32 case
+				context->vscroll_latch[0] = context->vscroll_latch[1] = 0;
+			}
+		} else {
+			context->vscroll_latch[vsram_off] = context->vsram[vsram_off];
+		}
+	} else if (context->regs[REG_MODE_3] & BIT_VSCROLL) {
+		context->vscroll_latch[vsram_off] = context->vsram[column - 2 + vsram_off];
+	}
+	vscroll &= context->vscroll_latch[vsram_off] + line;
 	context->v_offset = vscroll & v_offset_mask;
 	//printf("%s | line %d, vsram: %d, vscroll: %d, v_offset: %d\n",(vsram_off ? "B" : "A"), line, context->vsram[context->regs[REG_MODE_3] & 0x4 ? column : 0], vscroll, context->v_offset);
 	vscroll >>= vscroll_shift;
@@ -722,113 +786,143 @@
 		return;
 	}
 	render_map(context->col_2, context->tmp_buf_b, context->buf_b_off+8, context);
-	uint16_t *dst;
-	uint32_t *dst32;
+	uint32_t *dst;
 	uint8_t *sprite_buf,  *plane_a, *plane_b;
 	int plane_a_off, plane_b_off;
 	if (col)
 	{
 		col-=2;
-		if (context->b32) {
-			dst32 = context->framebuf;
-			dst32 += line * 320 + col * 8;
-		} else {
-			dst = context->framebuf;
-			dst += line * 320 + col * 8;
-		}
-		sprite_buf = context->linebuf + col * 8;
-		uint8_t a_src, src;
-		if (context->flags & FLAG_WINDOW) {
-			plane_a_off = context->buf_a_off;
-			a_src = DBG_SRC_W;
-		} else {
-			plane_a_off = context->buf_a_off - (context->hscroll_a & 0xF);
-			a_src = DBG_SRC_A;
-		}
-		plane_b_off = context->buf_b_off - (context->hscroll_b & 0xF);
-		//printf("A | tmp_buf offset: %d\n", 8 - (context->hscroll_a & 0x7));
+		dst = context->framebuf;
+		dst += line * 320 + col * 8;
+		if (context->debug < 2) {
+			sprite_buf = context->linebuf + col * 8;
+			uint8_t a_src, src;
+			if (context->flags & FLAG_WINDOW) {
+				plane_a_off = context->buf_a_off;
+				a_src = DBG_SRC_W;
+			} else {
+				plane_a_off = context->buf_a_off - (context->hscroll_a & 0xF);
+				a_src = DBG_SRC_A;
+			}
+			plane_b_off = context->buf_b_off - (context->hscroll_b & 0xF);
+			//printf("A | tmp_buf offset: %d\n", 8 - (context->hscroll_a & 0x7));
 
-		if (context->regs[REG_MODE_4] & BIT_HILIGHT) {
-			for (int i = 0; i < 16; ++plane_a_off, ++plane_b_off, ++sprite_buf, ++i) {
-				uint8_t pixel;
-				plane_a = context->tmp_buf_a + (plane_a_off & SCROLL_BUFFER_MASK);
-				plane_b = context->tmp_buf_b + (plane_b_off & SCROLL_BUFFER_MASK);
-				uint32_t * colors = context->colors;
-				src = 0;
-				pixel = context->regs[REG_BG_COLOR];
-				src = DBG_SRC_BG;
-				if (*plane_b & 0xF) {
-					pixel = *plane_b;
-					src = DBG_SRC_B;
-				}
-				if (*plane_a & 0xF && (*plane_a & BUF_BIT_PRIORITY) >= (pixel & BUF_BIT_PRIORITY)) {
-					pixel = *plane_a;
-					src = DBG_SRC_A;
-				}
-				if (*sprite_buf & 0xF) {
-					uint8_t sprite_color = *sprite_buf & 0x3F;
-					if (sprite_color == 0x3E) {
-						colors += CRAM_SIZE*2;
-						src |= DBG_HILIGHT;
-					} else if (sprite_color == 0x3F) {
+			if (context->regs[REG_MODE_4] & BIT_HILIGHT) {
+				for (int i = 0; i < 16; ++plane_a_off, ++plane_b_off, ++sprite_buf, ++i) {
+					uint8_t pixel;
+					plane_a = context->tmp_buf_a + (plane_a_off & SCROLL_BUFFER_MASK);
+					plane_b = context->tmp_buf_b + (plane_b_off & SCROLL_BUFFER_MASK);
+					uint32_t * colors = context->colors;
+					src = 0;
+					pixel = context->regs[REG_BG_COLOR];
+					src = DBG_SRC_BG;
+					if (*plane_b & 0xF) {
+						pixel = *plane_b;
+						src = DBG_SRC_B;
+					}
+					if (*plane_a & 0xF && (*plane_a & BUF_BIT_PRIORITY) >= (pixel & BUF_BIT_PRIORITY)) {
+						pixel = *plane_a;
+						src = DBG_SRC_A;
+					}
+					if (*sprite_buf & 0xF) {
+						uint8_t sprite_color = *sprite_buf & 0x3F;
+						if (sprite_color == 0x3E) {
+							colors += CRAM_SIZE*2;
+							src |= DBG_HILIGHT;
+						} else if (sprite_color == 0x3F) {
+							colors += CRAM_SIZE;
+							src |= DBG_SHADOW;
+						} else if ((*sprite_buf & BUF_BIT_PRIORITY) >= (pixel & BUF_BIT_PRIORITY)) {
+							pixel = *sprite_buf;
+							src = DBG_SRC_S;
+							if ((pixel & 0xF) == 0xE) {
+								src |= DBG_SHADOW;
+								colors += CRAM_SIZE;
+							}
+
+						}
+					} else if (!((*plane_a | *plane_b) & BUF_BIT_PRIORITY)) {
 						colors += CRAM_SIZE;
 						src |= DBG_SHADOW;
-					} else if ((*sprite_buf & BUF_BIT_PRIORITY) >= (pixel & BUF_BIT_PRIORITY)) {
+					}
+					pixel &= 0x3F;
+					uint32_t outpixel;
+					if (context->debug) {
+						outpixel = context->debugcolors[src];
+					} else {
+						outpixel = colors[pixel];
+					}
+					*(dst++) = outpixel;
+					//*dst = (context->cram[pixel & 0x3F] & 0xEEE) | ((pixel & BUF_BIT_PRIORITY) ? FBUF_BIT_PRIORITY : 0) | src;
+				}
+			} else {
+				for (int i = 0; i < 16; ++plane_a_off, ++plane_b_off, ++sprite_buf, ++i) {
+					plane_a = context->tmp_buf_a + (plane_a_off & SCROLL_BUFFER_MASK);
+					plane_b = context->tmp_buf_b + (plane_b_off & SCROLL_BUFFER_MASK);
+					uint8_t pixel = context->regs[REG_BG_COLOR];
+					src = DBG_SRC_BG;
+					if (*plane_b & 0xF) {
+						pixel = *plane_b;
+						src = DBG_SRC_B;
+					}
+					if (*plane_a & 0xF && (*plane_a & BUF_BIT_PRIORITY) >= (pixel & BUF_BIT_PRIORITY)) {
+						pixel = *plane_a;
+						src = DBG_SRC_A;
+					}
+					if (*sprite_buf & 0xF && (*sprite_buf & BUF_BIT_PRIORITY) >= (pixel & BUF_BIT_PRIORITY)) {
 						pixel = *sprite_buf;
 						src = DBG_SRC_S;
-						if ((pixel & 0xF) == 0xE) {
-							src |= DBG_SHADOW;
-							colors += CRAM_SIZE;
-						}
-
 					}
-				} else if (!((*plane_a | *plane_b) & BUF_BIT_PRIORITY)) {
-					colors += CRAM_SIZE;
-					src |= DBG_SHADOW;
-				}
-				pixel &= 0x3F;
-				uint32_t outpixel;
-				if (context->debug) {
-					outpixel = context->debugcolors[src];
-				} else {
-					outpixel = colors[pixel];
-				}
-				if (context->b32) {
-					*(dst32++) = outpixel;
-				} else {
+					uint32_t outpixel;
+					if (context->debug) {
+						outpixel = context->debugcolors[src];
+					} else {
+						outpixel = context->colors[pixel & 0x3F];
+					}
 					*(dst++) = outpixel;
 				}
-				//*dst = (context->cram[pixel & 0x3F] & 0xEEE) | ((pixel & BUF_BIT_PRIORITY) ? FBUF_BIT_PRIORITY : 0) | src;
+			}
+		} else if (context->debug == 2) {
+			if (col < 32) {
+				*(dst++) = context->colors[col * 2];
+				*(dst++) = context->colors[col * 2];
+				*(dst++) = context->colors[col * 2];
+				*(dst++) = context->colors[col * 2];
+				*(dst++) = context->colors[col * 2 + 1];
+				*(dst++) = context->colors[col * 2 + 1];
+				*(dst++) = context->colors[col * 2 + 1];
+				*(dst++) = context->colors[col * 2 + 1];
+				*(dst++) = context->colors[col * 2 + 2];
+				*(dst++) = context->colors[col * 2 + 2];
+				*(dst++) = context->colors[col * 2 + 2];
+				*(dst++) = context->colors[col * 2 + 2];
+				*(dst++) = context->colors[col * 2 + 3];
+				*(dst++) = context->colors[col * 2 + 3];
+				*(dst++) = context->colors[col * 2 + 3];
+				*(dst++) = context->colors[col * 2 + 3];
+			} else if (col == 32 || line >= 192) {
+				for (int32_t i = 0; i < 16; i ++) {
+					*(dst++) = 0;
+				}
+			} else {
+				for (int32_t i = 0; i < 16; i ++) {
+					*(dst++) = context->colors[line / 3 + (col - 34) * 0x20];
+				}
 			}
 		} else {
-			for (int i = 0; i < 16; ++plane_a_off, ++plane_b_off, ++sprite_buf, ++i) {
-				plane_a = context->tmp_buf_a + (plane_a_off & SCROLL_BUFFER_MASK);
-				plane_b = context->tmp_buf_b + (plane_b_off & SCROLL_BUFFER_MASK);
-				uint8_t pixel = context->regs[REG_BG_COLOR];
-				src = DBG_SRC_BG;
-				if (*plane_b & 0xF) {
-					pixel = *plane_b;
-					src = DBG_SRC_B;
-				}
-				if (*plane_a & 0xF && (*plane_a & BUF_BIT_PRIORITY) >= (pixel & BUF_BIT_PRIORITY)) {
-					pixel = *plane_a;
-					src = DBG_SRC_A;
-				}
-				if (*sprite_buf & 0xF && (*sprite_buf & BUF_BIT_PRIORITY) >= (pixel & BUF_BIT_PRIORITY)) {
-					pixel = *sprite_buf;
-					src = DBG_SRC_S;
-				}
-				uint32_t outpixel;
-				if (context->debug) {
-					outpixel = context->debugcolors[src];
-				} else {
-					outpixel = context->colors[pixel & 0x3F];
-				}
-				if (context->b32) {
-					*(dst32++) = outpixel;
-				} else {
-					*(dst++) = outpixel;
-				}
+			uint32_t cell = (line / 8) * (context->regs[REG_MODE_4] & BIT_H40 ? 40 : 32) + col;
+			uint32_t address = cell * 32 + (line % 8) * 4;
+			for (int32_t i = 0; i < 4; i ++) {
+				*(dst++) = context->colors[(context->debug_pal << 4) | (context->vdpmem[address] >> 4)];
+				*(dst++) = context->colors[(context->debug_pal << 4) | (context->vdpmem[address] & 0xF)];
+				address++;
+			}
+			cell++;
+			address = cell * 32 + (line % 8) * 4;
+			for (int32_t i = 0; i < 4; i ++) {
+				*(dst++) = context->colors[(context->debug_pal << 4) | (context->vdpmem[address] >> 4)];
+				*(dst++) = context->colors[(context->debug_pal << 4) | (context->vdpmem[address] & 0xF)];
+				address++;
 			}
 		}
 	}
@@ -893,13 +987,15 @@
 	uint32_t mask;
 	switch(linecyc)
 	{
+	case 165:
+	case 166:
+		external_slot(context);
+		break;
 	//sprite render to line buffer starts
-	case 0:
-		context->cur_slot = MAX_DRAWS-1;
-		memset(context->linebuf, 0, LINEBUF_SIZE);
-	case 1:
-	case 2:
-	case 3:
+	case 167:
+	case 168:
+	case 169:
+	case 170:
 		if (line == 0xFF) {
 			external_slot(context);
 		} else {
@@ -907,52 +1003,50 @@
 		}
 		break;
 	//sprite attribute table scan starts
-	case 4:
+	case 171:
 		render_sprite_cells( context);
-		context->sprite_index = 0x80;
-		context->slot_counter = MAX_SPRITES_LINE;
 		scan_sprite_table(line, context);
 		break;
-	case 5:
-	case 6:
-	case 7:
-	case 8:
-	case 9:
-	case 10:
-	case 11:
-	case 12:
-	case 13:
-	case 14:
-	case 15:
-	case 16:
-	case 17:
-	case 18:
-	case 19:
-	case 20:
+	case 172:
+	case 173:
+	case 174:
+	case 175:
+	case 176:
+	case 177:
+	case 178:
+	case 179:
+	case 180:
+	case 181:
+	case 182:
+	case 229:
+	case 230:
+	case 231:
+	case 232:
+	case 233:
 	//!HSYNC asserted
-	case 21:
-	case 22:
+	case 234:
+	case 235:
 		render_sprite_cells(context);
 		scan_sprite_table(line, context);
 		break;
-	case 23:
+	case 236:
 		external_slot(context);
 		break;
-	case 24:
-	case 25:
-	case 26:
-	case 27:
-	case 28:
-	case 29:
-	case 30:
-	case 31:
-	case 32:
-	case 33:
-	case 34:
+	case 237:
+	case 238:
+	case 239:
+	case 240:
+	case 241:
+	case 242:
+	case 243:
+	case 244:
+	case 245:
+	case 246:
+	case 247:
 		render_sprite_cells(context);
 		scan_sprite_table(line, context);
 		break;
-	case 35:
+	case 248:
 		address = (context->regs[REG_HSCROLL] & 0x3F) << 10;
 		mask = 0;
 		if (context->regs[REG_MODE_3] & 0x2) {
@@ -967,41 +1061,41 @@
 		context->hscroll_b = context->vdpmem[address+2] << 8 | context->vdpmem[address+3];
 		//printf("%d: HScroll A: %d, HScroll B: %d\n", line, context->hscroll_a, context->hscroll_b);
 		break;
-	case 36:
+	case 249:
 	//!HSYNC high
-	case 37:
-	case 38:
-	case 39:
+	case 250:
+	case 251:
+	case 252:
 		render_sprite_cells(context);
 		scan_sprite_table(line, context);
 		break;
-	case 40:
+	case 253:
 		read_map_scroll_a(0, line, context);
 		break;
-	case 41:
+	case 254:
 		render_sprite_cells(context);
 		scan_sprite_table(line, context);
 		break;
-	case 42:
+	case 255:
 		render_map_1(context);
 		scan_sprite_table(line, context);//Just a guess
 		break;
-	case 43:
+	case 0:
 		render_map_2(context);
 		scan_sprite_table(line, context);//Just a guess
 		break;
-	case 44:
+	case 1:
 		read_map_scroll_b(0, line, context);
 		break;
-	case 45:
+	case 2:
 		render_sprite_cells(context);
 		scan_sprite_table(line, context);
 		break;
-	case 46:
+	case 3:
 		render_map_3(context);
 		scan_sprite_table(line, context);//Just a guess
 		break;
-	case 47:
+	case 4:
 		render_map_output(line, 0, context);
 		scan_sprite_table(line, context);//Just a guess
 		//reverse context slot counter so it counts the number of sprite slots
@@ -1011,33 +1105,26 @@
 		context->sprite_draws = MAX_DRAWS;
 		context->flags &= (~FLAG_CAN_MASK & ~FLAG_MASKED);
 		break;
-	COLUMN_RENDER_BLOCK(2, 48)
-	COLUMN_RENDER_BLOCK(4, 56)
-	COLUMN_RENDER_BLOCK(6, 64)
-	COLUMN_RENDER_BLOCK_REFRESH(8, 72)
-	COLUMN_RENDER_BLOCK(10, 80)
-	COLUMN_RENDER_BLOCK(12, 88)
-	COLUMN_RENDER_BLOCK(14, 96)
-	COLUMN_RENDER_BLOCK_REFRESH(16, 104)
-	COLUMN_RENDER_BLOCK(18, 112)
-	COLUMN_RENDER_BLOCK(20, 120)
-	COLUMN_RENDER_BLOCK(22, 128)
-	COLUMN_RENDER_BLOCK_REFRESH(24, 136)
-	COLUMN_RENDER_BLOCK(26, 144)
-	COLUMN_RENDER_BLOCK(28, 152)
-	COLUMN_RENDER_BLOCK(30, 160)
-	COLUMN_RENDER_BLOCK_REFRESH(32, 168)
-	COLUMN_RENDER_BLOCK(34, 176)
-	COLUMN_RENDER_BLOCK(36, 184)
-	COLUMN_RENDER_BLOCK(38, 192)
-	COLUMN_RENDER_BLOCK_REFRESH(40, 200)
-	case 208:
-	case 209:
-		external_slot(context);
-		break;
-	default:
-		//leftovers from HSYNC clock change nonsense
-		break;
+	COLUMN_RENDER_BLOCK(2, 5)
+	COLUMN_RENDER_BLOCK(4, 13)
+	COLUMN_RENDER_BLOCK(6, 21)
+	COLUMN_RENDER_BLOCK_REFRESH(8, 29)
+	COLUMN_RENDER_BLOCK(10, 37)
+	COLUMN_RENDER_BLOCK(12, 45)
+	COLUMN_RENDER_BLOCK(14, 53)
+	COLUMN_RENDER_BLOCK_REFRESH(16, 61)
+	COLUMN_RENDER_BLOCK(18, 69)
+	COLUMN_RENDER_BLOCK(20, 77)
+	COLUMN_RENDER_BLOCK(22, 85)
+	COLUMN_RENDER_BLOCK_REFRESH(24, 93)
+	COLUMN_RENDER_BLOCK(26, 101)
+	COLUMN_RENDER_BLOCK(28, 109)
+	COLUMN_RENDER_BLOCK(30, 117)
+	COLUMN_RENDER_BLOCK_REFRESH(32, 125)
+	COLUMN_RENDER_BLOCK(34, 133)
+	COLUMN_RENDER_BLOCK(36, 141)
+	COLUMN_RENDER_BLOCK(38, 149)
+	COLUMN_RENDER_BLOCK_REFRESH(40, 157)
 	}
 }
 
@@ -1047,13 +1134,15 @@
 	uint32_t mask;
 	switch(linecyc)
 	{
+	case 132:
+	case 133:
+		external_slot(context);
+		break;
 	//sprite render to line buffer starts
-	case 0:
-		context->cur_slot = MAX_DRAWS_H32-1;
-		memset(context->linebuf, 0, LINEBUF_SIZE);
-	case 1:
-	case 2:
-	case 3:
+	case 134:
+	case 135:
+	case 136:
+	case 137:
 		if (line == 0xFF) {
 			external_slot(context);
 		} else {
@@ -1061,46 +1150,44 @@
 		}
 		break;
 	//sprite attribute table scan starts
-	case 4:
+	case 138:
 		render_sprite_cells( context);
-		context->sprite_index = 0x80;
-		context->slot_counter = MAX_SPRITES_LINE_H32;
 		scan_sprite_table(line, context);
 		break;
-	case 5:
-	case 6:
-	case 7:
-	case 8:
-	case 9:
-	case 10:
-	case 11:
-	case 12:
-	case 13:
+	case 139:
+	case 140:
+	case 141:
+	case 142:
+	case 143:
+	case 144:
+	case 145:
+	case 146:
+	case 147:
 		render_sprite_cells(context);
 		scan_sprite_table(line, context);
-	case 14:
+	case 233:
 		external_slot(context);
 		break;
-	case 15:
-	case 16:
-	case 17:
-	case 18:
-	case 19:
+	case 234:
+	case 235:
+	case 236:
+	case 237:
+	case 238:
 	//HSYNC start
-	case 20:
-	case 21:
-	case 22:
-	case 23:
-	case 24:
-	case 25:
-	case 26:
+	case 239:
+	case 240:
+	case 241:
+	case 242:
+	case 243:
+	case 244:
+	case 245:
 		render_sprite_cells(context);
 		scan_sprite_table(line, context);
 		break;
-	case 27:
+	case 246:
 		external_slot(context);
 		break;
-	case 28:
+	case 247:
 		address = (context->regs[REG_HSCROLL] & 0x3F) << 10;
 		mask = 0;
 		if (context->regs[REG_MODE_3] & 0x2) {
@@ -1115,41 +1202,41 @@
 		context->hscroll_b = context->vdpmem[address+2] << 8 | context->vdpmem[address+3];
 		//printf("%d: HScroll A: %d, HScroll B: %d\n", line, context->hscroll_a, context->hscroll_b);
 		break;
-	case 29:
-	case 30:
-	case 31:
-	case 32:
+	case 248:
+	case 249:
+	case 250:
+	case 251:
 		render_sprite_cells(context);
 		scan_sprite_table(line, context);
 		break;
 	//!HSYNC high
-	case 33:
+	case 252:
 		read_map_scroll_a(0, line, context);
 		break;
-	case 34:
+	case 253:
 		render_sprite_cells(context);
 		scan_sprite_table(line, context);
 		break;
-	case 35:
+	case 254:
 		render_map_1(context);
 		scan_sprite_table(line, context);//Just a guess
 		break;
-	case 36:
+	case 255:
 		render_map_2(context);
 		scan_sprite_table(line, context);//Just a guess
 		break;
-	case 37:
+	case 0:
 		read_map_scroll_b(0, line, context);
 		break;
-	case 38:
+	case 1:
 		render_sprite_cells(context);
 		scan_sprite_table(line, context);
 		break;
-	case 39:
+	case 2:
 		render_map_3(context);
 		scan_sprite_table(line, context);//Just a guess
 		break;
-	case 40:
+	case 3:
 		render_map_output(line, 0, context);
 		scan_sprite_table(line, context);//Just a guess
 		//reverse context slot counter so it counts the number of sprite slots
@@ -1159,26 +1246,22 @@
 		context->sprite_draws = MAX_DRAWS_H32;
 		context->flags &= (~FLAG_CAN_MASK & ~FLAG_MASKED);
 		break;
-	COLUMN_RENDER_BLOCK(2, 41)
-	COLUMN_RENDER_BLOCK(4, 49)
-	COLUMN_RENDER_BLOCK(6, 57)
-	COLUMN_RENDER_BLOCK_REFRESH(8, 65)
-	COLUMN_RENDER_BLOCK(10, 73)
-	COLUMN_RENDER_BLOCK(12, 81)
-	COLUMN_RENDER_BLOCK(14, 89)
-	COLUMN_RENDER_BLOCK_REFRESH(16, 97)
-	COLUMN_RENDER_BLOCK(18, 105)
-	COLUMN_RENDER_BLOCK(20, 113)
-	COLUMN_RENDER_BLOCK(22, 121)
-	COLUMN_RENDER_BLOCK_REFRESH(24, 129)
-	COLUMN_RENDER_BLOCK(26, 137)
-	COLUMN_RENDER_BLOCK(28, 145)
-	COLUMN_RENDER_BLOCK(30, 153)
-	COLUMN_RENDER_BLOCK_REFRESH(32, 161)
-	case 169:
-	case 170:
-		external_slot(context);
-		break;
+	COLUMN_RENDER_BLOCK(2, 4)
+	COLUMN_RENDER_BLOCK(4, 12)
+	COLUMN_RENDER_BLOCK(6, 20)
+	COLUMN_RENDER_BLOCK_REFRESH(8, 28)
+	COLUMN_RENDER_BLOCK(10, 36)
+	COLUMN_RENDER_BLOCK(12, 44)
+	COLUMN_RENDER_BLOCK(14, 52)
+	COLUMN_RENDER_BLOCK_REFRESH(16, 60)
+	COLUMN_RENDER_BLOCK(18, 68)
+	COLUMN_RENDER_BLOCK(20, 76)
+	COLUMN_RENDER_BLOCK(22, 84)
+	COLUMN_RENDER_BLOCK_REFRESH(24, 92)
+	COLUMN_RENDER_BLOCK(26, 100)
+	COLUMN_RENDER_BLOCK(28, 108)
+	COLUMN_RENDER_BLOCK(30, 116)
+	COLUMN_RENDER_BLOCK_REFRESH(32, 124)
 	}
 }
 
@@ -1203,6 +1286,14 @@
 		if (context->flags & FLAG_DMA_RUN) {
 			run_dma_src(context, 0);
 		}
+		external_slot(context);
+		if (context->flags & FLAG_DMA_RUN) {
+			run_dma_src(context, 0);
+		}
+		external_slot(context);
+		if (context->flags & FLAG_DMA_RUN) {
+			run_dma_src(context, 0);
+		}
 		for (int i = 0; i < 19; i++)
 		{
 			scan_sprite_table(line, context);
@@ -1240,13 +1331,17 @@
 
 			read_sprite_x(line, context);
 		}
-		external_slot(context);
-		if (context->flags & FLAG_DMA_RUN) {
-			run_dma_src(context, 0);
-		}
-		external_slot(context);
+
 		return;
 	}
+	external_slot(context);
+	if (context->flags & FLAG_DMA_RUN) {
+		run_dma_src(context, 0);
+	}
+	external_slot(context);
+	if (context->flags & FLAG_DMA_RUN) {
+		run_dma_src(context, 0);
+	}
 
 	render_sprite_cells(context);
 	render_sprite_cells(context);
@@ -1356,57 +1451,63 @@
 		render_map_3(context);
 		render_map_output(line, column, context);
 	}
-	external_slot(context);
-	if (context->flags & FLAG_DMA_RUN) {
-		run_dma_src(context, 0);
-	}
-	external_slot(context);
 }
 
 void latch_mode(vdp_context * context)
 {
-	context->latched_mode = (context->regs[REG_MODE_4] & 0x81) | (context->regs[REG_MODE_2] & BIT_PAL);
+	context->latched_mode = context->regs[REG_MODE_2] & BIT_PAL;
 }
 
 void check_render_bg(vdp_context * context, int32_t line, uint32_t slot)
 {
-	if (line > 0) {
-		line -= 1;
-		int starti = -1;
-		if (context->latched_mode & BIT_H40) {
-			if (slot >= 55 && slot < 210) {
-				uint32_t x = (slot-55)*2;
-				starti = line * 320 + x;
-			} else if (slot < 5) {
-				uint32_t x = (slot + 155)*2;
-				starti = (line-1)*320 + x;
-			}
-		} else {
-			if (slot >= 48 && slot < 171) {
-				uint32_t x = (slot-48)*2;
-				starti = line * 320 + x;
-			} else if (slot < 5) {
-				uint32_t x = (slot + 123)*2;
-				starti = (line-1)*320 + x;
+	int starti = -1;
+	if (context->regs[REG_MODE_4] & BIT_H40) {
+		if (slot >= 12 && slot < 172) {
+			uint32_t x = (slot-12)*2;
+			starti = line * 320 + x;
+		}
+	} else {
+		if (slot >= 11 && slot < 139) {
+			uint32_t x = (slot-11)*2;
+			starti = line * 320 + x;
+		}
+	}
+	if (starti >= 0) {
+		uint32_t color = context->colors[context->regs[REG_BG_COLOR]];
+		uint32_t * start = context->framebuf;
+		start += starti;
+		for (int i = 0; i < 2; i++) {
+			*(start++) = color;
+		}
+	}
+}
+
+uint32_t const h40_hsync_cycles[] = {19, 20, 20, 20, 18, 20, 20, 20, 18, 20, 20, 20, 18, 20, 20, 20, 19};
+
+void vdp_advance_line(vdp_context *context)
+{
+	context->vcounter++;
+	context->vcounter &= 0x1FF;
+	if (context->flags2 & FLAG2_REGION_PAL) {
+		if (context->latched_mode & BIT_PAL) {
+			if (context->vcounter == 0x10B) {
+				context->vcounter = 0x1D2;
 			}
+		} else if (context->vcounter == 0x103){
+			context->vcounter = 0x1CA;
 		}
-		if (starti >= 0) {
-			if (context->b32) {
-				uint32_t color = context->colors[context->regs[REG_BG_COLOR]];
-				uint32_t * start = context->framebuf;
-				start += starti;
-				for (int i = 0; i < 2; i++) {
-					*(start++) = color;
-				}
-			} else {
-				uint16_t color = context->colors[context->regs[REG_BG_COLOR]];
-				uint16_t * start = context->framebuf;
-				start += starti;
-				for (int i = 0; i < 2; i++) {
-					*(start++) = color;
-				}
-			}
-		}
+	} else if (!(context->latched_mode & BIT_PAL) &&  context->vcounter == 0xEB) {
+		context->vcounter = 0x1E5;
+	}
+	
+	if (context->vcounter > (context->latched_mode & BIT_PAL ? PAL_INACTIVE_START : NTSC_INACTIVE_START)) {
+		context->hint_counter = context->regs[REG_HINT];
+	} else if (context->hint_counter) {
+		context->hint_counter--;
+	} else {
+		context->flags2 |= FLAG2_HINT_PENDING;
+		context->pending_hint_start = context->cycles;
+		context->hint_counter = context->regs[REG_HINT];
 	}
 }
 
@@ -1415,126 +1516,68 @@
 	while(context->cycles < target_cycles)
 	{
 		context->flags &= ~FLAG_UNUSED_SLOT;
-		uint32_t line = context->cycles / MCLKS_LINE;
-		uint32_t active_lines = context->latched_mode & BIT_PAL ? PAL_ACTIVE : NTSC_ACTIVE;
-		if (!context->cycles) {
+		uint32_t line = context->vcounter;
+		uint32_t slot = context->hslot;
+		
+		if (!line && !slot) {
+			//TODO: Figure out when this actually happens
 			latch_mode(context);
 		}
-		uint32_t linecyc = context->cycles % MCLKS_LINE;
-		if (linecyc == 0) {
-			if (line <= 1 || line >= active_lines) {
-				context->hint_counter = context->regs[REG_HINT];
-			} else if (context->hint_counter) {
-				context->hint_counter--;
-			} else {
-				context->flags2 |= FLAG2_HINT_PENDING;
-				context->hint_counter = context->regs[REG_HINT];
+		uint32_t inactive_start = context->latched_mode & BIT_PAL ? PAL_INACTIVE_START : NTSC_INACTIVE_START;
+
+		uint8_t is_h40 = context->regs[REG_MODE_4] & BIT_H40;
+		if (is_h40) {
+			if (slot == 167) {
+				context->cur_slot = MAX_DRAWS-1;
+				memset(context->linebuf, 0, LINEBUF_SIZE);
+			} else if (slot == 171) {
+				context->sprite_index = 0x80;
+				context->slot_counter = MAX_SPRITES_LINE;
 			}
-		} else if(line == active_lines) {
-			uint32_t intcyc = context->latched_mode & BIT_H40 ? VINT_CYCLE_H40 :  VINT_CYCLE_H32;
-			if (linecyc == intcyc) {
-				context->flags2 |= FLAG2_VINT_PENDING;
+		} else {
+			if (slot == 134) {
+				context->cur_slot = MAX_DRAWS_H32-1;
+				memset(context->linebuf, 0, LINEBUF_SIZE);
+			} else if (slot == 138) {
+				context->sprite_index = 0x80;
+				context->slot_counter = MAX_SPRITES_LINE_H32;
 			}
 		}
-		uint32_t inccycles, slot;
-		if (context->latched_mode & BIT_H40){
-			if (linecyc < MCLKS_SLOT_H40*HSYNC_SLOT_H40) {
-				slot = linecyc/MCLKS_SLOT_H40;
+		if(line == inactive_start) {
+			uint32_t intslot = context->regs[REG_MODE_4] & BIT_H40 ? VINT_SLOT_H40 :  VINT_SLOT_H32;
+			if (slot == intslot) {
+				context->flags2 |= FLAG2_VINT_PENDING;
+				context->pending_vint_start = context->cycles;
+			}
+		}
+		uint32_t inccycles;
+		//line 0x1FF is basically active even though it's not displayed
+		uint8_t active_slot = line < inactive_start || line == 0x1FF;
+		if (is_h40) {
+			if (slot < HSYNC_SLOT_H40 || slot >= HSYNC_END_H40) {
 				inccycles = MCLKS_SLOT_H40;
-			} else if(linecyc < MCLK_WEIRD_END) {
-				switch(linecyc-(MCLKS_SLOT_H40*HSYNC_SLOT_H40))
-				{
-				case 0:
-					inccycles = 19;
-					slot = 0;
-					break;
-				case 19:
-					slot = 1;
-					inccycles = 20;
-					break;
-				case 39:
-					slot = 2;
-					inccycles = 20;
-					break;
-				case 59:
-					slot = 3;
-					inccycles = 20;
-					break;
-				case 79:
-					slot = 4;
-					inccycles = 18;
-					break;
-				case 97:
-					slot = 5;
-					inccycles = 20;
-					break;
-				case 117:
-					slot = 6;
-					inccycles = 20;
-					break;
-				case 137:
-					slot = 7;
-					inccycles = 20;
-					break;
-				case 157:
-					slot = 8;
-					inccycles = 18;
-					break;
-				case 175:
-					slot = 9;
-					inccycles = 20;
-					break;
-				case 195:
-					slot = 10;
-					inccycles = 20;
-					break;
-				case 215:
-					slot = 11;
-					inccycles = 20;
-					break;
-				case 235:
-					slot = 12;
-					inccycles = 18;
-					break;
-				case 253:
-					slot = 13;
-					inccycles = 20;
-					break;
-				case 273:
-					slot = 14;
-					inccycles = 20;
-					break;
-				case 293:
-					slot = 15;
-					inccycles = 20;
-					break;
-				case 313:
-					slot = 16;
-					inccycles = 19;
-					break;
-				default:
-					fprintf(stderr, "cycles after weirdness %d\n", linecyc-(MCLKS_SLOT_H40*HSYNC_SLOT_H40));
-					exit(1);
-				}
-				slot += HSYNC_SLOT_H40;
 			} else {
-				slot = (linecyc-MCLK_WEIRD_END)/MCLKS_SLOT_H40 + SLOT_WEIRD_END;
-				inccycles = MCLKS_SLOT_H40;
+				inccycles = h40_hsync_cycles[slot-HSYNC_SLOT_H40];
+			}
+			//the first inactive line behaves as an active one for the first 4 slots
+			if (line == inactive_start && slot > 166 && slot < 171) {
+				active_slot = 1;
 			}
 		} else {
 			inccycles = MCLKS_SLOT_H32;
-			slot = linecyc/MCLKS_SLOT_H32;
+			//the first inactive line behaves as an active one for the first 4 slots
+			if (line == inactive_start && slot > 166 && slot < 171) {
+				active_slot = 1;
+			}
 		}
-		if ((line < active_lines || (line == active_lines && linecyc < (context->latched_mode & BIT_H40 ? 64 : 80))) && context->regs[REG_MODE_2] & DISPLAY_ENABLE) {
-			//first sort-of active line is treated as 255 internally
-			//it's used for gathering sprite info for line
-			line = (line - 1) & 0xFF;
-
-			//Convert to slot number
-			if (context->latched_mode & BIT_H40){
-				if (!slot && line != (active_lines-1) && (target_cycles - context->cycles) >= MCLKS_LINE) {
+		uint8_t inc_slot = 1;
+		if (context->regs[REG_MODE_2] & DISPLAY_ENABLE && active_slot) {
+			//run VDP rendering for a slot or a line
+			if (is_h40) {
+				if (slot == LINE_CHANGE_H40 && line < inactive_start && (target_cycles - context->cycles) >= MCLKS_LINE) {
 					vdp_h40_line(line, context);
 					inccycles = MCLKS_LINE;
+					inc_slot = 0;
 				} else {
 					vdp_h40(line, slot, context);
 				}
@@ -1545,7 +1588,7 @@
 			if (!is_refresh(context, slot)) {
 				external_slot(context);
 			}
-			if (line < active_lines) {
+			if (line < inactive_start) {
 				check_render_bg(context, line, slot);
 			}
 		}
@@ -1553,12 +1596,38 @@
 			run_dma_src(context, slot);
 		}
 		context->cycles += inccycles;
+		if (inc_slot) {
+			context->hslot++;
+			context->hslot &= 0xFF;
+			if (is_h40) {
+				if (context->hslot == LINE_CHANGE_H40) {
+					vdp_advance_line(context);
+					if (context->vcounter == (inactive_start + 8)) {
+						context->frame++;
+					}
+				} else if (context->hslot == 183) {
+					context->hslot = 229;
+				}
+			} else {
+				if (context->hslot == LINE_CHANGE_H32) {
+					vdp_advance_line(context);
+					if (context->vcounter == (inactive_start + 8)) {
+						context->frame++;
+					}
+				} else if (context->hslot == 148) {
+					context->hslot = 233;
+				}
+			}
+
+		} else {
+			vdp_advance_line(context);
+		}
 	}
 }
 
 uint32_t vdp_run_to_vblank(vdp_context * context)
 {
-	uint32_t target_cycles = ((context->latched_mode & BIT_PAL) ? PAL_ACTIVE : NTSC_ACTIVE) * MCLKS_LINE;
+	uint32_t target_cycles = ((context->latched_mode & BIT_PAL) ? PAL_INACTIVE_START : NTSC_INACTIVE_START) * MCLKS_LINE;
 	vdp_run_context(context, target_cycles);
 	return context->cycles;
 }
@@ -1570,7 +1639,7 @@
 		if (!dmalen) {
 			dmalen = 0x10000;
 		}
-		uint32_t min_dma_complete = dmalen * (context->latched_mode & BIT_H40 ? 16 : 20);
+		uint32_t min_dma_complete = dmalen * (context->regs[REG_MODE_4] & BIT_H40 ? 16 : 20);
 		if ((context->regs[REG_DMASRC_H] & 0xC0) == 0xC0 || (context->cd & 0xF) == VRAM_WRITE) {
 			//DMA copies take twice as long to complete since they require a read and a write
 			//DMA Fills and transfers to VRAM also take twice as long as it requires 2 writes for a single word
@@ -1606,7 +1675,7 @@
 				//DMA copy or 68K -> VDP, transfer starts immediately
 				context->flags |= FLAG_DMA_RUN;
 				context->dma_cd = context->cd;
-				//printf("DMA start at cycle %d\n", context->cycles);
+				//printf("DMA start (length: %X) at cycle %d, frame: %d, vcounter: %d, hslot: %d\n", (context->regs[REG_DMALEN_H] << 8) | context->regs[REG_DMALEN_L], context->cycles, context->frame, context->vcounter, context->hslot);
 				if (!(context->regs[REG_DMASRC_H] & 0x80)) {
 					//printf("DMA Address: %X, New CD: %X, Source: %X, Length: %X\n", context->address, context->cd, (context->regs[REG_DMASRC_H] << 17) | (context->regs[REG_DMASRC_M] << 9) | (context->regs[REG_DMASRC_L] << 1), context->regs[REG_DMALEN_H] << 8 | context->regs[REG_DMALEN_L]);
 					return 1;
@@ -1629,13 +1698,16 @@
 				if (reg == REG_BG_COLOR) {
 					value &= 0x3F;
 				}
+				/*if (reg == REG_MODE_4 && ((value ^ context->regs[reg]) & BIT_H40)) {
+					printf("Mode changed from H%d to H%d @ %d, frame: %d\n", context->regs[reg] & BIT_H40 ? 40 : 32, value & BIT_H40 ? 40 : 32, context->cycles, context->frame);
+				}*/
 				context->regs[reg] = value;
 				if (reg == REG_MODE_4) {
 					context->double_res = (value & (BIT_INTERLACE | BIT_DOUBLE_RES)) == (BIT_INTERLACE | BIT_DOUBLE_RES);
 					if (!context->double_res) {
 						context->framebuf = context->oddbuf;
 					}
-				}
+					}
 				context->cd &= 0x3C;
 			}
 		} else {
@@ -1661,10 +1733,10 @@
 		context->flags &= ~FLAG_DMA_RUN;
 	}
 	while (context->fifo_write == context->fifo_read) {
-		vdp_run_context(context, context->cycles + ((context->latched_mode & BIT_H40) ? 16 : 20));
+		vdp_run_context(context, context->cycles + ((context->regs[REG_MODE_4] & BIT_H40) ? 16 : 20));
 	}
 	fifo_entry * cur = context->fifo + context->fifo_write;
-	cur->cycle = context->cycles + ((context->latched_mode & BIT_H40) ? 16 : 20)*FIFO_LATENCY;
+	cur->cycle = context->cycles + ((context->regs[REG_MODE_4] & BIT_H40) ? 16 : 20)*FIFO_LATENCY;
 	cur->address = context->address;
 	cur->value = value;
 	if (context->cd & 0x20 && (context->regs[REG_DMASRC_H] & 0xC0) == 0x80) {
@@ -1709,18 +1781,40 @@
 	if ((context->regs[REG_MODE_4] & BIT_INTERLACE) && context->framebuf == context->oddbuf) {
 		value |= 0x10;
 	}
-	uint32_t line= context->cycles / MCLKS_LINE;
-	uint32_t linecyc = context->cycles % MCLKS_LINE;
-	if (line >= (context->latched_mode & BIT_PAL ? PAL_ACTIVE : NTSC_ACTIVE) || !(context->regs[REG_MODE_2] & BIT_DISP_EN)) {
+	uint32_t line= context->vcounter;
+	uint32_t slot = context->hslot;
+	uint32_t inactive_start = (context->latched_mode & BIT_PAL ? PAL_INACTIVE_START : NTSC_INACTIVE_START);
+	if (
+		(
+			line > inactive_start
+			&& line < 0x1FF
+		)
+		|| (line == inactive_start 
+			&& (
+				slot >= (context->regs[REG_MODE_4] & BIT_H40 ? VBLANK_START_H40 : VBLANK_START_H32)
+				|| slot < (context->regs[REG_MODE_4] & BIT_H40 ? LINE_CHANGE_H40 : LINE_CHANGE_H32)
+			)
+		)
+		|| (line == 0x1FF
+			&& slot < (context->regs[REG_MODE_4] & BIT_H40 ? VBLANK_START_H40 : VBLANK_START_H32))
+			&& slot >= (context->regs[REG_MODE_4] & BIT_H40 ? LINE_CHANGE_H40 : LINE_CHANGE_H32)
+		|| !(context->regs[REG_MODE_2] & BIT_DISP_EN)
+	) {
 		value |= 0x8;
 	}
-	if (linecyc < (context->latched_mode & BIT_H40 ? HBLANK_CLEAR_H40 : HBLANK_CLEAR_H32)) {
-		value |= 0x4;
+	if (context->regs[REG_MODE_4] & BIT_H40) {
+		if (slot < HBLANK_END_H40 || slot > HBLANK_START_H40) {
+			value |= 0x4;
+		}
+	} else {
+		if (slot < HBLANK_END_H32 || slot > HBLANK_START_H32) {
+			value |= 0x4;
+		}
 	}
 	if (context->flags & FLAG_DMA_RUN) {
 		value |= 0x2;
 	}
-	if (context->latched_mode & BIT_PAL) {//Not sure about this, need to verify
+	if (context->flags2 & FLAG2_REGION_PAL) {
 		value |= 0x1;
 	}
 	//printf("status read at cycle %d returned %X\n", context->cycles, value);
@@ -1741,7 +1835,7 @@
 	context->flags &= ~FLAG_UNUSED_SLOT;
 	//context->flags2 |= FLAG2_READ_PENDING;
 	while (!(context->flags & FLAG_UNUSED_SLOT)) {
-		vdp_run_context(context, context->cycles + ((context->latched_mode & BIT_H40) ? 16 : 20));
+		vdp_run_context(context, context->cycles + ((context->regs[REG_MODE_4] & BIT_H40) ? 16 : 20));
 	}
 	uint16_t value = 0;
 	switch (context->cd & 0xF)
@@ -1751,7 +1845,7 @@
 		context->flags &= ~FLAG_UNUSED_SLOT;
 		context->flags2 |= FLAG2_READ_PENDING;
 		while (!(context->flags & FLAG_UNUSED_SLOT)) {
-			vdp_run_context(context, context->cycles + ((context->latched_mode & BIT_H40) ? 16 : 20));
+			vdp_run_context(context, context->cycles + ((context->regs[REG_MODE_4] & BIT_H40) ? 16 : 20));
 		}
 		value |= context->vdpmem[context->address | 1];
 		break;
@@ -1782,102 +1876,8 @@
 	if (context->regs[REG_MODE_1] & BIT_HVC_LATCH) {
 		return context->hv_latch;
 	}
-	uint32_t line= context->cycles / MCLKS_LINE;
-	if (!line) {
-		line = 0xFF;
-	} else {
-		line--;
-		if (line > 0xEA) {
-			line = (line + 0xFA) & 0xFF;
-		}
-	}
-	uint32_t linecyc = context->cycles % MCLKS_LINE;
-	if (context->latched_mode & BIT_H40) {
-		uint32_t slot;
-		if (linecyc < MCLKS_SLOT_H40*HSYNC_SLOT_H40) {
-			slot = linecyc/MCLKS_SLOT_H40;
-		} else if(linecyc < MCLK_WEIRD_END) {
-			switch(linecyc-(MCLKS_SLOT_H40*HSYNC_SLOT_H40))
-			{
-			case 0:
-				slot = 0;
-				break;
-			case 19:
-				slot = 1;
-				break;
-			case 39:
-				slot = 2;
-				break;
-			case 59:
-				slot = 2;
-				break;
-			case 79:
-				slot = 3;
-				break;
-			case 97:
-				slot = 4;
-				break;
-			case 117:
-				slot = 5;
-				break;
-			case 137:
-				slot = 6;
-				break;
-			case 157:
-				slot = 7;
-				break;
-			case 175:
-				slot = 8;
-				break;
-			case 195:
-				slot = 9;
-				break;
-			case 215:
-				slot = 11;
-				break;
-			case 235:
-				slot = 12;
-				break;
-			case 253:
-				slot = 13;
-				break;
-			case 273:
-				slot = 14;
-				break;
-			case 293:
-				slot = 15;
-				break;
-			case 313:
-				slot = 16;
-				break;
-			default:
-				fprintf(stderr, "cycles after weirdness %d\n", linecyc-(MCLKS_SLOT_H40*HSYNC_SLOT_H40));
-				exit(1);
-			}
-			slot += HSYNC_SLOT_H40;
-		} else {
-			slot = (linecyc-MCLK_WEIRD_END)/MCLKS_SLOT_H40 + SLOT_WEIRD_END;
-		}
-		linecyc = slot * 2;
-		if (linecyc >= 86) {
-			linecyc -= 86;
-		} else {
-			linecyc += 334;
-		}
-		if (linecyc > 0x16C) {
-			linecyc += 92;
-		}
-	} else {
-		linecyc /= 10;
-		if (linecyc >= 74) {
-			linecyc -= 74;
-		} else {
-			linecyc += 268;
-		}
-		if (linecyc > 0x127) {
-			linecyc += 170;
-		}
-	}
+	uint32_t line= context->vcounter & 0xFF;
+	uint32_t linecyc = context->hslot;
 	linecyc &= 0xFF;
 	if (context->double_res) {
 		line <<= 1;
@@ -1897,6 +1897,16 @@
 void vdp_adjust_cycles(vdp_context * context, uint32_t deduction)
 {
 	context->cycles -= deduction;
+	if (context->pending_vint_start >= deduction) {
+		context->pending_vint_start -= deduction;
+	} else {
+		context->pending_vint_start = 0;
+	}
+	if (context->pending_hint_start >= deduction) {
+		context->pending_hint_start -= deduction;
+	} else {
+		context->pending_hint_start = 0;
+	}
 	if (context->fifo_read >= 0) {
 		int32_t idx = context->fifo_read;
 		do {
@@ -1910,25 +1920,129 @@
 	}
 }
 
+uint32_t vdp_cycles_hslot_wrap_h40(vdp_context * context)
+{
+	if (context->hslot < 183) {
+		return MCLKS_LINE - context->hslot * MCLKS_SLOT_H40;
+	} else if (context->hslot < HSYNC_END_H40) {
+		uint32_t before_hsync = context->hslot < HSYNC_SLOT_H40 ? (HSYNC_SLOT_H40 - context->hslot) * MCLKS_SLOT_H40 : 0;
+		uint32_t hsync = 0;
+		for (int i = context->hslot <= HSYNC_SLOT_H40 ? 0 : context->hslot - HSYNC_SLOT_H40; i < sizeof(h40_hsync_cycles)/sizeof(uint32_t); i++)
+		{
+			hsync += h40_hsync_cycles[i];
+		}
+		uint32_t after_hsync = (256- HSYNC_END_H40) * MCLKS_SLOT_H40;
+		return before_hsync + hsync + after_hsync;
+	} else {
+		return (256-context->hslot) * MCLKS_SLOT_H40;
+	}
+}
+
+uint32_t vdp_cycles_next_line(vdp_context * context)
+{
+	if (context->regs[REG_MODE_4] & BIT_H40) {
+		if (context->hslot < LINE_CHANGE_H40) {
+			return (LINE_CHANGE_H40 - context->hslot) * MCLKS_SLOT_H40;
+		} else {
+			return vdp_cycles_hslot_wrap_h40(context) + LINE_CHANGE_H40 * MCLKS_SLOT_H40;
+		}
+	} else {
+		if (context->hslot < LINE_CHANGE_H32) {
+			return (LINE_CHANGE_H32 - context->hslot) * MCLKS_SLOT_H32;
+		} else if (context->hslot < 148) {
+			return MCLKS_LINE - (context->hslot - LINE_CHANGE_H32) * MCLKS_SLOT_H32;
+		} else {
+			return (256-context->hslot + LINE_CHANGE_H32) * MCLKS_SLOT_H32;
+		}
+	}
+}
+
+uint32_t vdp_cycles_to_line(vdp_context * context, uint32_t target)
+{
+	uint32_t jump_start, jump_dst;
+	if (context->flags2 & FLAG2_REGION_PAL) {
+		if (context->latched_mode & BIT_PAL) {
+			jump_start = 0x10B;
+			jump_dst = 0x1D2;
+		} else {
+			jump_start = 0x103;
+			jump_dst = 0x1CA;
+		}
+	} else {
+		if (context->latched_mode & BIT_PAL) {
+			jump_start = 0;
+			jump_dst = 0;
+		} else {
+			jump_start = 0xEB;
+			jump_dst = 0x1E5;
+		}
+	}
+	uint32_t lines;
+	if (context->vcounter < target) {
+		if (target < jump_start) {
+			lines = target - context->vcounter;
+		} else {
+			lines = jump_start - context->vcounter + target - jump_dst;
+		}
+	} else {
+		if (context->vcounter < jump_start) {
+			lines = jump_start - context->vcounter + 512 - jump_dst;
+		} else {
+			lines = 512 - context->vcounter;
+		}
+		if (target < jump_start) {
+			lines += target;
+		} else {
+			lines += jump_start + target - jump_dst;
+		}
+	}
+	return MCLKS_LINE * (lines - 1) + vdp_cycles_next_line(context);
+}
+
+uint32_t vdp_frame_end_line(vdp_context * context)
+{
+	uint32_t frame_end;
+	if (context->flags2 & FLAG2_REGION_PAL) {
+		if (context->latched_mode & BIT_PAL) {
+			frame_end = PAL_INACTIVE_START + 8;
+		} else {
+			frame_end = NTSC_INACTIVE_START + 8;
+		}
+	} else {
+		if (context->latched_mode & BIT_PAL) {
+			frame_end = 512;
+		} else {
+			frame_end = NTSC_INACTIVE_START + 8;
+		}
+	}
+	return frame_end;
+}
+
+uint32_t vdp_cycles_to_frame_end(vdp_context * context)
+{
+	return context->cycles + vdp_cycles_to_line(context, vdp_frame_end_line(context));
+}
+
 uint32_t vdp_next_hint(vdp_context * context)
 {
 	if (!(context->regs[REG_MODE_1] & BIT_HINT_EN)) {
 		return 0xFFFFFFFF;
 	}
 	if (context->flags2 & FLAG2_HINT_PENDING) {
-		return context->cycles;
-	}
-	uint32_t active_lines = context->latched_mode & BIT_PAL ? PAL_ACTIVE : NTSC_ACTIVE;
-	uint32_t line = context->cycles / MCLKS_LINE;
-	if (line >= active_lines) {
-		return 0xFFFFFFFF;
+		return context->pending_hint_start;
 	}
-	uint32_t linecyc = context->cycles % MCLKS_LINE;
-	uint32_t hcycle = context->cycles + context->hint_counter * MCLKS_LINE + MCLKS_LINE - linecyc;
-	if (!line) {
-		hcycle += MCLKS_LINE;
+	uint32_t inactive_start = context->latched_mode & BIT_PAL ? PAL_INACTIVE_START : NTSC_INACTIVE_START;
+	uint32_t hint_line;
+	if (context->vcounter + context->hint_counter >= inactive_start) {
+		if (context->regs[REG_HINT] > inactive_start) {
+			return 0xFFFFFFFF;
+		}
+		hint_line = context->regs[REG_HINT];
+	} else {
+		hint_line = context->vcounter + context->hint_counter + 1;
 	}
-	return hcycle;
+
+	return context->cycles + vdp_cycles_to_line(context, hint_line);
 }
 
 uint32_t vdp_next_vint(vdp_context * context)
@@ -1937,31 +2051,42 @@
 		return 0xFFFFFFFF;
 	}
 	if (context->flags2 & FLAG2_VINT_PENDING) {
-		return context->cycles;
+		return context->pending_vint_start;
 	}
-	uint32_t active_lines = context->latched_mode & BIT_PAL ? PAL_ACTIVE : NTSC_ACTIVE;
-	uint32_t vcycle =  MCLKS_LINE * active_lines;
-	if (context->latched_mode & BIT_H40) {
-		vcycle += VINT_CYCLE_H40;
-	} else {
-		vcycle += VINT_CYCLE_H32;
-	}
-	if (vcycle < context->cycles) {
-		return 0xFFFFFFFF;
-	}
-	return vcycle;
+
+
+	return vdp_next_vint_z80(context);
 }
 
 uint32_t vdp_next_vint_z80(vdp_context * context)
 {
-	uint32_t active_lines = context->latched_mode & BIT_PAL ? PAL_ACTIVE : NTSC_ACTIVE;
-	uint32_t vcycle =  MCLKS_LINE * active_lines;
-	if (context->latched_mode & BIT_H40) {
-		vcycle += VINT_CYCLE_H40;
+	uint32_t inactive_start = context->latched_mode & BIT_PAL ? PAL_INACTIVE_START : NTSC_INACTIVE_START;
+	if (context->vcounter == inactive_start) {
+		if (context->regs[REG_MODE_4] & BIT_H40) {
+			if (context->hslot >= LINE_CHANGE_H40) {
+				return context->cycles + vdp_cycles_hslot_wrap_h40(context) + VINT_SLOT_H40 * MCLKS_SLOT_H40;
+			} else if (context->hslot <= VINT_SLOT_H40) {
+				return context->cycles + (VINT_SLOT_H40 - context->hslot) * MCLKS_SLOT_H40;
+			}
+		} else {
+			if (context->hslot >= LINE_CHANGE_H32) {
+				if (context->hslot < 148) {
+					return context->cycles + (VINT_SLOT_H32 + 148 - context->hslot + 256 - 233) * MCLKS_SLOT_H32;
+				} else {
+					return context->cycles + (VINT_SLOT_H32 + 256 - context->hslot) * MCLKS_SLOT_H32;
+				}
+			} else if (context->hslot <= VINT_SLOT_H32) {
+				return context->cycles + (VINT_SLOT_H32 - context->hslot) * MCLKS_SLOT_H32;
+			}
+		}
+	}
+	int32_t cycles_to_vint = vdp_cycles_to_line(context, inactive_start);
+	if (context->regs[REG_MODE_4] & BIT_H40) {
+		cycles_to_vint += MCLKS_LINE - (LINE_CHANGE_H40 - VINT_SLOT_H40) * MCLKS_SLOT_H40;
 	} else {
-		vcycle += VINT_CYCLE_H32;
+		cycles_to_vint += (VINT_SLOT_H32 + 148 - LINE_CHANGE_H32 + 256 - 233) * MCLKS_SLOT_H32;
 	}
-	return vcycle;
+	return context->cycles + cycles_to_vint;
 }
 
 void vdp_int_ack(vdp_context * context, uint16_t int_num)
--- a/vdp.h	Thu May 28 21:09:33 2015 -0700
+++ b/vdp.h	Thu May 28 21:19:55 2015 -0700
@@ -49,6 +49,7 @@
 #define FLAG2_HINT_PENDING   0x02
 #define FLAG2_READ_PENDING   0x04
 #define FLAG2_SPRITE_COLLIDE 0x08
+#define FLAG2_REGION_PAL     0x10
 
 #define DISPLAY_ENABLE 0x40
 
@@ -131,6 +132,8 @@
 	uint8_t     regs[VDP_REGS];
 	//cycle count in MCLKs
 	uint32_t    cycles;
+	uint32_t    pending_vint_start;
+	uint32_t    pending_hint_start;
 	uint8_t     *vdpmem;
 	//stores 2-bit palette + 4-bit palette index + priority for current sprite line
 	uint8_t     *linebuf;
@@ -142,9 +145,13 @@
 	uint32_t    colors[CRAM_SIZE*3];
 	uint32_t    debugcolors[1 << (3 + 1 + 1 + 1)];//3 bits for source, 1 bit for priority, 1 bit for shadow, 1 bit for hilight
 	uint16_t    vsram[VSRAM_SIZE];
-	uint8_t     latched_mode;
+	uint16_t    vscroll_latch[2];
+	uint32_t    frame;
+	uint16_t    vcounter;
+	uint16_t    hslot; //hcounter/2
 	uint16_t    hscroll_a;
 	uint16_t    hscroll_b;
+	uint8_t     latched_mode;
 	uint8_t	    sprite_index;
 	uint8_t     sprite_draws;
 	int8_t      slot_counter;
@@ -163,11 +170,12 @@
 	uint8_t     buf_a_off;
 	uint8_t     buf_b_off;
 	uint8_t     debug;
+	uint8_t     debug_pal;
 	uint8_t     *tmp_buf_a;
 	uint8_t     *tmp_buf_b;
 } vdp_context;
 
-void init_vdp_context(vdp_context * context);
+void init_vdp_context(vdp_context * context, uint8_t region_pal);
 void vdp_run_context(vdp_context * context, uint32_t target_cycles);
 //runs from current cycle count to VBLANK for the current mode, returns ending cycle count
 uint32_t vdp_run_to_vblank(vdp_context * context);
@@ -190,6 +198,8 @@
 void vdp_print_sprite_table(vdp_context * context);
 void vdp_print_reg_explain(vdp_context * context);
 void latch_mode(vdp_context * context);
+uint32_t vdp_cycles_to_frame_end(vdp_context * context);
+uint32_t vdp_frame_end_line(vdp_context *context);
 
 extern int32_t color_map[1 << 12];
 
--- a/vgmplay.c	Thu May 28 21:09:33 2015 -0700
+++ b/vgmplay.c	Thu May 28 21:19:55 2015 -0700
@@ -142,7 +142,7 @@
 
 	uint32_t fps = 60;
 	config = load_config(argv[0]);
-	render_init(320, 240, "vgm play", 60, 0, 0);
+	render_init(320, 240, "vgm play", 60, 0);
 
 	uint32_t opts = 0;
 	if (argc >= 3 && !strcmp(argv[2], "-y")) {
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/vos_prog_info.c	Thu May 28 21:19:55 2015 -0700
@@ -0,0 +1,100 @@
+#include <stdio.h>
+#include "vos_program_module.h"
+
+int main(int argc, char ** argv)
+{
+	vos_program_module header;
+	FILE * f = fopen(argv[1], "rb");
+	vos_read_header(f, &header);
+	vos_read_alloc_module_map(f, &header);
+	vos_read_alloc_external_vars(f, &header);
+
+	printf("Version: %d\n", header.version);
+	printf("Binder Version: %s\n", header.binder_version.str);
+	printf("Binder Options: %s\n", header.binder_options.str);
+	printf("System name: %s\n", header.system_name.str);
+	printf("User name: %s\n", header.user_name.str);
+	printf("Date bound: %d\n", header.date_bound);
+	printf("Code addresss: 0x%X, Static address: 0x%X\n",
+	       header.main_entry_link.code_address, header.main_entry_link.static_address);
+	printf("User boundary: 0x%X\n", header.user_boundary);
+	printf("Num modules: %d\n", header.n_modules);
+	printf("Num extern vars: %d\n", header.n_external_vars);
+	printf("Num link names: %d\n", header.n_link_names);
+	printf("Num unsapped links: %d\n", header.n_unsnapped_links);
+	printf("Num VM pages: %d\n", header.n_vm_pages);
+	printf("Num header pages: %d\n", header.n_header_pages);
+	for (int i = 0; i < 3; i++) {
+		for (int j = 0; j < 4; j++) {
+			printf("Info %d:%d\n\tAddress: 0x%X\n\tLength: 0x%X\n",
+			       i, j, header.info[i][j].address, header.info[i][j].len);
+		}
+	}
+	printf("Module map address: 0x%X\n", header.module_map_address);
+	printf("Module map length: 0x%X\n", header.module_map_len);
+	printf("External vars map address: 0x%X\n", header.external_vars_map_address);
+	printf("External vars map length: 0x%X\n", header.external_vars_map_len);
+	printf("Link names map address: 0x%X\n", header.link_names_map_address);
+	printf("Link names map length: 0x%X\n", header.link_names_map_len);
+	printf("Header address: 0x%X\n", header.header_address);
+	printf("Header length: 0x%X\n", header.header_len);
+	//printf("Access Info: 0x%X\n", header.header_address);
+	printf("Flags: 0x%X\n", header.flags);
+	printf("Num tasks: %d\n", header.n_tasks);
+	printf("Stack Size: 0x%X\n", header.stack_len);
+	printf("Num entries: %d\n", header.n_entries);
+	printf("Entry map address: 0x%X\n", header.entry_map_address);
+	printf("Entry map length: 0x%X\n", header.entry_map_len);
+	printf("Pop Version: %d\n", header.pop_version);
+	printf("Processor: %d\n", header.processor);
+	printf("Processor family: %d\n", header.processor_family);
+	printf("Release name: %s\n", header.release_name.str);
+	printf("Relocation info:\n\tMap Addres: 0x%X\n\tMap Length: 0x%X\n\tNum Relocations: %d\n",
+	       header.relocation_info.map_address, header.relocation_info.map_len,
+		   header.relocation_info.n_relocations);
+	printf("High water mark: 0x%X\n", header.high_water_mark);
+	printf("Copyright notice: %s\n", header.program_name.str);
+	printf("String pool address: 0x%X\n", header.string_pool_address);
+	printf("String pool length: 0x%X\n", header.string_pool_len);
+	printf("Object dir map address: 0x%X\n", header.obj_dir_map_address);
+	printf("Object dir map length: 0x%X\n", header.obj_dir_map_len);
+	puts("Global offset table addresses:");
+	for (int i = 0; i < 3; i++) {
+		printf("\t%d: 0x%X\n", i, header.global_offset_table_address[i]);
+	}
+	for (int i = 0; i < 3; i++) {
+		printf("Block map info %d\n\tAddress: 0x%X\n\tLength: 0x%X\n",
+			   i, header.block_map_info[i].address, header.block_map_info[i].len);
+	}
+	printf("Secton map file address: 0x%X\n", header.section_map_file_address);
+	printf("Secton map address: 0x%X\n", header.section_map_address);
+	printf("Secton map length: 0x%X\n", header.section_map_len);
+	printf("Num sections: %d\n", header.n_sections);
+	printf("Max heap size: 0x%X\n", header.max_heap_size);
+	printf("Max program size: 0x%X\n", header.max_program_size);
+	printf("Max stack size: 0x%X\n", header.max_stack_size);
+	printf("Stack fence size: 0x%X\n", header.stack_fence_size);
+
+	puts("\nModules");
+	for (int i = 0; i < header.n_modules; i++) {
+		printf("\t%s:\n\t\tCode Address: 0x%X, Length: 0x%X\n",
+			   header.module_map_entries[i].name.str,
+			   header.module_map_entries[i].code_address,
+			   header.module_map_entries[i].code_length);
+		printf("\t\tFoo Address: 0x%X, Length: 0x%X\n",
+		       header.module_map_entries[i].foo_address,
+			   header.module_map_entries[i].foo_length);
+		printf("\t\tBar Address: 0x%X, Length: 0x%X\n",
+		       header.module_map_entries[i].bar_address,
+			   header.module_map_entries[i].bar_length);
+	}
+
+	puts("\nExtrnal Vars");
+	for (int i = 0; i < header.n_external_vars; i++) {
+		printf("\t%s: 0x%X\n",
+		       header.external_vars[i].name.str, header.external_vars[i].address);
+	}
+
+	vos_header_cleanup(&header);
+	return 0;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/vos_program_module.c	Thu May 28 21:19:55 2015 -0700
@@ -0,0 +1,208 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <string.h>
+#include "vos_program_module.h"
+
+static uint16_t big16(uint8_t ** src)
+{
+	uint16_t ret = *((*src)++) << 8;
+	ret |= *((*src)++);
+	return ret;
+}
+
+static uint32_t big32(uint8_t ** src)
+{
+	uint32_t ret = *((*src)++) << 24;
+	ret |= *((*src)++) << 16;
+	ret |= *((*src)++) << 8;
+	ret |= *((*src)++);
+	return ret;
+}
+
+static void string_(uint8_t ** src, uint16_t *len, char * str, uint32_t storage)
+{
+	*len = big16(src);
+	memcpy(str, *src, storage);
+	*src += storage;
+	if (*len >= storage)
+	{
+		*len = storage;
+	} else {
+		str[*len] = 0;
+	}
+	if (storage & 1)
+	{
+		(*src)++;
+	}
+}
+
+#define string(src, field) string_(src, &(field).len, (field).str, sizeof((field).str))
+
+
+int vos_read_header(FILE * f, vos_program_module *out)
+{
+	uint8_t buffer[4096];
+	if (fread(buffer, 1, sizeof(buffer), f) != sizeof(buffer))
+	{
+		return 0;
+	}
+	uint8_t *cur = buffer;
+	out->version = big16(&cur);
+	string(&cur, out->binder_version);
+	string(&cur, out->binder_options);
+	string(&cur, out->system_name);
+	string(&cur, out->user_name);
+	out->date_bound = big32(&cur);
+	out->main_entry_link.code_address = big32(&cur);
+	out->main_entry_link.static_address = big32(&cur);
+	out->user_boundary = big32(&cur);
+	out->n_modules = big16(&cur);
+	out->n_external_vars = big16(&cur);
+	out->n_link_names = big16(&cur);
+	out->n_unsnapped_links = big16(&cur);
+	out->n_vm_pages = big16(&cur);
+	out->n_header_pages = big16(&cur);
+	for (int i = 0; i < 3; i++)
+	{
+		for (int j = 0; j < 4; j++)
+		{
+			out->info[i][j].address = big32(&cur);
+			out->info[i][j].len = big32(&cur);
+		}
+	}
+	out->module_map_address = big32(&cur);
+	out->module_map_len = big32(&cur);
+	out->external_vars_map_address = big32(&cur);
+	out->external_vars_map_len = big32(&cur);
+	out->link_names_map_address = big32(&cur);
+	out->link_names_map_len = big32(&cur);
+	out->link_map_address = big32(&cur);
+	out->link_map_len = big32(&cur);
+	out->header_address = big32(&cur);
+	out->header_len = big32(&cur);
+	memcpy(out->access_info, cur, sizeof(out->access_info));
+	cur += sizeof(out->access_info);
+	out->flags = big32(&cur);
+	out->n_tasks = big16(&cur);
+	for (int i = 0; i < 3; i++)
+	{
+		out->task_static_len[i] = big32(&cur);
+	}
+	out->stack_len = big32(&cur);
+	out->n_entries = big16(&cur);
+	out->entry_map_address = big32(&cur);
+	out->entry_map_len = big32(&cur);
+	out->pop_version = big16(&cur);
+	out->processor = big16(&cur);
+	string(&cur, out->release_name);
+	out->relocation_info.map_address = big32(&cur);
+	out->relocation_info.map_len = big32(&cur);
+	out->relocation_info.n_relocations = big32(&cur);
+	out->high_water_mark = big32(&cur);
+	string(&cur, out->copyright_notice);
+	for (int i = 0; i < 14; i++)
+	{
+		out->module_origins[i] = big32(&cur);
+	}
+	out->processor_family = big16(&cur);
+	string(&cur, out->program_name);
+	out->string_pool_address = big32(&cur);
+	out->string_pool_len = big32(&cur);
+	out->obj_dir_map_address = big32(&cur);
+	out->obj_dir_map_len = big32(&cur);
+	for (int i = 0; i < 3; i++)
+	{
+		out->global_offset_table_address[i] = big32(&cur);
+	}
+	for (int i = 0; i < 3; i++)
+	{
+		out->block_map_info[i].address = big32(&cur);
+		out->block_map_info[i].len = big32(&cur);
+	}
+	out->section_map_file_address = big32(&cur);
+	out->section_map_address = big32(&cur);
+	out->section_map_len = big32(&cur);
+	out->n_sections = big16(&cur);
+	out->max_heap_size = big32(&cur);
+	out->max_program_size = big32(&cur);
+	out->max_stack_size = big32(&cur);
+	out->stack_fence_size = big32(&cur);
+
+	out->module_map_entries = NULL;
+	out->external_vars = NULL;
+	return 1;
+}
+
+#define MODULE_MAP_ENTRY_SIZE 74
+
+int vos_read_alloc_module_map(FILE * f, vos_program_module *header)
+{
+	if (header->module_map_len != header->n_modules * MODULE_MAP_ENTRY_SIZE)
+	{
+		return 0;
+	}
+	uint8_t * buf = malloc(header->module_map_len);
+	fseek(f, header->module_map_address + 0x1000 - header->user_boundary, SEEK_SET);
+	if (fread(buf, 1, header->module_map_len, f) != header->module_map_len)
+	{
+		free(buf);
+		return 0;
+	}
+	uint8_t * cur = buf;
+	header->module_map_entries = malloc(sizeof(vos_module_map_entry) * header->n_modules);
+	for (int i = 0; i < header->n_modules; i++)
+	{
+		string(&cur, header->module_map_entries[i].name);
+		for (int j = 0; j < 5; j++)
+		{
+			header->module_map_entries[i].unknown[j] = big16(&cur);
+		}
+		header->module_map_entries[i].code_address = big32(&cur);
+		header->module_map_entries[i].code_length = big32(&cur);
+		header->module_map_entries[i].foo_address = big32(&cur);
+		header->module_map_entries[i].foo_length = big32(&cur);
+		header->module_map_entries[i].bar_address = big32(&cur);
+		header->module_map_entries[i].bar_length = big32(&cur);
+		for (int j = 0; j < 3; j++)
+		{
+			header->module_map_entries[i].unknown2[j] = big16(&cur);
+		}
+	}
+	return 1;
+}
+
+#define EXTERNAL_VAR_ENTRY_SIZE 44
+
+int vos_read_alloc_external_vars(FILE * f, vos_program_module *header)
+{
+	if (header->external_vars_map_len != header->n_external_vars * EXTERNAL_VAR_ENTRY_SIZE)
+	{
+		return 0;
+	}
+	uint8_t * buf = malloc(header->external_vars_map_len);
+	fseek(f, header->external_vars_map_address + 0x1000 - header->user_boundary, SEEK_SET);
+	if (fread(buf, 1, header->external_vars_map_len, f) != header->external_vars_map_len)
+	{
+		free(buf);
+		return 0;
+	}
+	uint8_t * cur = buf;
+	header->external_vars = malloc(sizeof(vos_external_var_entry) * header->n_external_vars);
+	for (int i = 0; i < header->n_external_vars; i++)
+	{
+		string(&cur, header->external_vars[i].name);
+		header->external_vars[i].address = big32(&cur);
+		for (int j = 0; j < 3; j++)
+		{
+			header->external_vars[i].unknown[j] = big16(&cur);
+		}
+	}
+	return 1;
+}
+
+void vos_header_cleanup(vos_program_module *header)
+{
+	free(header->module_map_entries);
+	free(header->external_vars);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/vos_program_module.h	Thu May 28 21:19:55 2015 -0700
@@ -0,0 +1,134 @@
+#ifndef VOS_PROGRAM_MODULE_H_
+#define VOS_PROGRAM_MODULE_H_
+
+#include <stdint.h>
+
+typedef struct
+{
+	struct {
+		uint16_t len;
+		char     str[32];
+	} name;
+	uint16_t unknown[5];
+	uint32_t code_address;
+	uint32_t code_length;
+	uint32_t foo_address;
+	uint32_t foo_length;
+	uint32_t bar_address;
+	uint32_t bar_length;
+	uint16_t unknown2[3];
+} vos_module_map_entry;
+
+typedef struct
+{
+	struct {
+		uint16_t len;
+		char     str[32];
+	} name;
+	uint32_t address;
+	uint16_t unknown[3];
+} vos_external_var_entry;
+
+typedef struct
+{
+	uint16_t version;
+	struct {
+		uint16_t len;
+		char     str[32];
+	} binder_version;
+	struct {
+		uint16_t len;
+		char     str[32];
+	} binder_options;
+	struct {
+		uint16_t len;
+		char     str[32];
+	} system_name;
+	struct {
+		uint16_t len;
+		char     str[65];
+	} user_name;
+	uint32_t date_bound;
+	struct {
+		uint32_t code_address;
+		uint32_t static_address;
+	} main_entry_link;
+	uint32_t user_boundary;
+	uint16_t n_modules;
+	uint16_t n_external_vars;
+	uint16_t n_link_names;
+	uint16_t n_unsnapped_links;
+	uint16_t n_vm_pages;
+	uint16_t n_header_pages;
+	struct {
+		uint32_t address;
+		uint32_t len;
+	} info[3][4];
+	uint32_t module_map_address;
+	uint32_t module_map_len;
+	uint32_t external_vars_map_address;
+	uint32_t external_vars_map_len;
+	uint32_t link_names_map_address;
+	uint32_t link_names_map_len;
+	uint32_t link_map_address;
+	uint32_t link_map_len;
+	uint32_t header_address;
+	uint32_t header_len;
+	uint8_t  access_info[2048];
+	uint32_t flags;
+	uint16_t n_tasks;
+	uint32_t task_static_len[3];
+	uint32_t stack_len;
+	uint16_t n_entries;
+	uint32_t entry_map_address;
+	uint32_t entry_map_len;
+	uint16_t pop_version;
+	uint16_t processor;
+	struct {
+		uint16_t len;
+		char     str[32];
+	} release_name;
+	struct {
+		uint32_t map_address;
+		uint32_t map_len;
+		uint32_t n_relocations;
+	} relocation_info;
+	uint32_t high_water_mark;
+	struct {
+		uint16_t len;
+		char     str[256];
+	} copyright_notice;
+	uint32_t module_origins[14];
+	uint16_t processor_family;
+	struct {
+		uint16_t len;
+		char     str[32];
+	} program_name;
+	uint32_t string_pool_address;
+	uint32_t string_pool_len;
+	uint32_t obj_dir_map_address;
+	uint32_t obj_dir_map_len;
+	uint32_t global_offset_table_address[3];
+	struct {
+		uint32_t address;
+		uint32_t len;
+	} block_map_info[3];
+	uint32_t section_map_file_address;
+	uint32_t section_map_address;
+	uint32_t section_map_len;
+	uint16_t n_sections;
+	uint32_t max_heap_size;
+	uint32_t max_program_size;
+	uint32_t max_stack_size;
+	uint32_t stack_fence_size;
+
+	vos_module_map_entry   *module_map_entries;
+	vos_external_var_entry *external_vars;
+} vos_program_module;
+
+int vos_read_header(FILE * f, vos_program_module *out);
+int vos_read_alloc_module_map(FILE * f, vos_program_module *header);
+int vos_read_alloc_external_vars(FILE * f, vos_program_module *header);
+void vos_header_cleanup(vos_program_module *header);
+
+#endif //VOS_PROGRAM_MODULE_H_
--- a/ym2612.c	Thu May 28 21:09:33 2015 -0700
+++ b/ym2612.c	Thu May 28 21:19:55 2015 -0700
@@ -521,6 +521,7 @@
 	context->selected_part = 0;
 	context->write_cycle = context->current_cycle;
 	context->busy_cycles = BUSY_CYCLES_ADDRESS;
+	context->status |= 0x80;
 }
 
 void ym_address_write_part2(ym2612_context * context, uint8_t address)
@@ -530,6 +531,7 @@
 	context->selected_part = 1;
 	context->write_cycle = context->current_cycle;
 	context->busy_cycles = BUSY_CYCLES_ADDRESS;
+	context->status |= 0x80;
 }
 
 uint8_t fnum_to_keycode[] = {
@@ -583,14 +585,16 @@
 	ym_channel * channel = context->channels + chan_num;
 	uint32_t inc, detune;
 	if (chan_num == 2 && context->ch3_mode && (op < (2*4 + 3))) {
-		inc = context->ch3_supp[op-2*4].fnum;
-		if (!context->ch3_supp[op-2*4].block) {
+		//supplemental fnum registers are in a different order than normal slot paramters
+		int index = (op-2*4) ^ 2;
+		inc = context->ch3_supp[index].fnum;
+		if (!context->ch3_supp[index].block) {
 			inc >>= 1;
 		} else {
-			inc <<= (context->ch3_supp[op-2*4].block-1);
+			inc <<= (context->ch3_supp[index].block-1);
 		}
 		//detune
-		detune = detune_table[context->ch3_supp[op-2*4].keycode][operator->detune & 0x3];
+		detune = detune_table[context->ch3_supp[index].keycode][operator->detune & 0x3];
 	} else {
 		inc = channel->fnum;
 		if (!channel->block) {
--- a/z80_to_x86.c	Thu May 28 21:09:33 2015 -0700
+++ b/z80_to_x86.c	Thu May 28 21:19:55 2015 -0700
@@ -14,12 +14,6 @@
 
 #define MODE_UNUSED (MODE_IMMED-1)
 
-#define ZCYCLES RBP
-#define ZLIMIT RDI
-#define SCRATCH1 R13
-#define SCRATCH2 R14
-#define CONTEXT RSI
-
 //#define DO_DEBUG_PRINT
 
 #ifdef DO_DEBUG_PRINT
@@ -28,21 +22,8 @@
 #define dprintf
 #endif
 
-void z80_read_byte();
-void z80_read_word();
-void z80_write_byte();
-void z80_write_word_highfirst();
-void z80_write_word_lowfirst();
-void z80_save_context();
-void z80_native_addr();
-void z80_do_sync();
-void z80_handle_cycle_limit_int();
-void z80_retrans_stub();
-void z80_io_read();
-void z80_io_write();
-void z80_halt();
-void z80_save_context();
-void z80_load_context();
+uint32_t zbreakpoint_patch(z80_context * context, uint16_t address, code_ptr dst);
+void z80_handle_deferred(z80_context * context);
 
 uint8_t z80_size(z80inst * inst)
 {
@@ -54,24 +35,53 @@
 	return SZ_B;
 }
 
-uint8_t * zcycles(uint8_t * dst, uint32_t num_cycles)
+uint8_t zf_off(uint8_t flag)
+{
+	return offsetof(z80_context, flags) + flag;
+}
+
+uint8_t zaf_off(uint8_t flag)
 {
-	return add_ir(dst, num_cycles, ZCYCLES, SZ_D);
+	return offsetof(z80_context, alt_flags) + flag;
+}
+
+uint8_t zr_off(uint8_t reg)
+{
+	if (reg > Z80_A) {
+		reg = z80_low_reg(reg);
+	}
+	return offsetof(z80_context, regs) + reg;
 }
 
-uint8_t * z80_check_cycles_int(uint8_t * dst, uint16_t address)
+uint8_t zar_off(uint8_t reg)
 {
-	dst = cmp_rr(dst, ZCYCLES, ZLIMIT, SZ_D);
-	uint8_t * jmp_off = dst+1;
-	dst = jcc(dst, CC_NC, dst + 7);
-	dst = mov_ir(dst, address, SCRATCH1, SZ_W);
-	dst = call(dst, (uint8_t *)z80_handle_cycle_limit_int);
-	*jmp_off = dst - (jmp_off+1);
-	return dst;
+	if (reg > Z80_A) {
+		reg = z80_low_reg(reg);
+	}
+	return offsetof(z80_context, alt_regs) + reg;
 }
 
-uint8_t * translate_z80_reg(z80inst * inst, x86_ea * ea, uint8_t * dst, x86_z80_options * opts)
+void zreg_to_native(z80_options *opts, uint8_t reg, uint8_t native_reg)
+{
+	if (opts->regs[reg] >= 0) {
+		mov_rr(&opts->gen.code, opts->regs[reg], native_reg, reg > Z80_A ? SZ_W : SZ_B);
+	} else {
+		mov_rdispr(&opts->gen.code, opts->gen.context_reg, zr_off(reg), native_reg, reg > Z80_A ? SZ_W : SZ_B);
+	}
+}
+
+void native_to_zreg(z80_options *opts, uint8_t native_reg, uint8_t reg)
 {
+	if (opts->regs[reg] >= 0) {
+		mov_rr(&opts->gen.code, native_reg, opts->regs[reg], reg > Z80_A ? SZ_W : SZ_B);
+	} else {
+		mov_rrdisp(&opts->gen.code, native_reg, opts->gen.context_reg, zr_off(reg), reg > Z80_A ? SZ_W : SZ_B);
+	}
+}
+
+void translate_z80_reg(z80inst * inst, host_ea * ea, z80_options * opts)
+{
+	code_info *code = &opts->gen.code;
 	if (inst->reg == Z80_USE_IMMED) {
 		ea->mode = MODE_IMMED;
 		ea->disp = inst->immed;
@@ -79,14 +89,14 @@
 		ea->mode = MODE_UNUSED;
 	} else {
 		ea->mode = MODE_REG_DIRECT;
-		if (inst->reg == Z80_IYH) {
+		if (inst->reg == Z80_IYH && opts->regs[Z80_IYL] >= 0) {
 			if ((inst->addr_mode & 0x1F) == Z80_REG && inst->ea_reg == Z80_IYL) {
-				dst = mov_rr(dst, opts->regs[Z80_IY], SCRATCH1, SZ_W);
-				dst = ror_ir(dst, 8, SCRATCH1, SZ_W);
-				ea->base = SCRATCH1;
+				mov_rr(code, opts->regs[Z80_IY], opts->gen.scratch1, SZ_W);
+				ror_ir(code, 8, opts->gen.scratch1, SZ_W);
+				ea->base = opts->gen.scratch1;
 			} else {
 				ea->base = opts->regs[Z80_IYL];
-				dst = ror_ir(dst, 8, opts->regs[Z80_IY], SZ_W);
+				ror_ir(code, 8, opts->regs[Z80_IY], SZ_W);
 			}
 		} else if(opts->regs[inst->reg] >= 0) {
 			ea->base = opts->regs[inst->reg];
@@ -96,142 +106,151 @@
 					if (other_reg >= R8 || (other_reg >= RSP && other_reg <= RDI)) {
 						//we can't mix an *H reg with a register that requires the REX prefix
 						ea->base = opts->regs[z80_low_reg(inst->reg)];
-						dst = ror_ir(dst, 8, ea->base, SZ_W);
+						ror_ir(code, 8, ea->base, SZ_W);
 					}
 				} else if((inst->addr_mode & 0x1F) != Z80_UNUSED && (inst->addr_mode & 0x1F) != Z80_IMMED) {
 					//temp regs require REX prefix too
 					ea->base = opts->regs[z80_low_reg(inst->reg)];
-					dst = ror_ir(dst, 8, ea->base, SZ_W);
+					ror_ir(code, 8, ea->base, SZ_W);
 				}
 			}
 		} else {
 			ea->mode = MODE_REG_DISPLACE8;
-			ea->base = CONTEXT;
-			ea->disp = offsetof(z80_context, regs) + inst->reg;
+			ea->base = opts->gen.context_reg;
+			ea->disp = zr_off(inst->reg);
 		}
 	}
-	return dst;
 }
 
-uint8_t * z80_save_reg(uint8_t * dst, z80inst * inst, x86_z80_options * opts)
+void z80_save_reg(z80inst * inst, z80_options * opts)
 {
-	if (inst->reg == Z80_IYH) {
+	code_info *code = &opts->gen.code;
+	if (inst->reg == Z80_USE_IMMED || inst->reg == Z80_UNUSED) {
+		return;
+	}
+	if (inst->reg == Z80_IYH && opts->regs[Z80_IYL] >= 0) {
 		if ((inst->addr_mode & 0x1F) == Z80_REG && inst->ea_reg == Z80_IYL) {
-			dst = ror_ir(dst, 8, opts->regs[Z80_IY], SZ_W);
-			dst = mov_rr(dst, SCRATCH1, opts->regs[Z80_IYL], SZ_B);
-			dst = ror_ir(dst, 8, opts->regs[Z80_IY], SZ_W);
+			ror_ir(code, 8, opts->regs[Z80_IY], SZ_W);
+			mov_rr(code, opts->gen.scratch1, opts->regs[Z80_IYL], SZ_B);
+			ror_ir(code, 8, opts->regs[Z80_IY], SZ_W);
 		} else {
-			dst = ror_ir(dst, 8, opts->regs[Z80_IY], SZ_W);
+			ror_ir(code, 8, opts->regs[Z80_IY], SZ_W);
 		}
 	} else if (opts->regs[inst->reg] >= AH && opts->regs[inst->reg] <= BH) {
 		if ((inst->addr_mode & 0x1F) == Z80_REG) {
 			uint8_t other_reg = opts->regs[inst->ea_reg];
 			if (other_reg >= R8 || (other_reg >= RSP && other_reg <= RDI)) {
 				//we can't mix an *H reg with a register that requires the REX prefix
-				dst = ror_ir(dst, 8, opts->regs[z80_low_reg(inst->reg)], SZ_W);
+				ror_ir(code, 8, opts->regs[z80_low_reg(inst->reg)], SZ_W);
 			}
 		} else if((inst->addr_mode & 0x1F) != Z80_UNUSED && (inst->addr_mode & 0x1F) != Z80_IMMED) {
 			//temp regs require REX prefix too
-			dst = ror_ir(dst, 8, opts->regs[z80_low_reg(inst->reg)], SZ_W);
+			ror_ir(code, 8, opts->regs[z80_low_reg(inst->reg)], SZ_W);
 		}
 	}
-	return dst;
 }
 
-uint8_t * translate_z80_ea(z80inst * inst, x86_ea * ea, uint8_t * dst, x86_z80_options * opts, uint8_t read, uint8_t modify)
+void translate_z80_ea(z80inst * inst, host_ea * ea, z80_options * opts, uint8_t read, uint8_t modify)
 {
-	uint8_t size, reg, areg;
+	code_info *code = &opts->gen.code;
+	uint8_t size, areg;
+	int8_t reg;
 	ea->mode = MODE_REG_DIRECT;
-	areg = read ? SCRATCH1 : SCRATCH2;
+	areg = read ? opts->gen.scratch1 : opts->gen.scratch2;
 	switch(inst->addr_mode & 0x1F)
 	{
 	case Z80_REG:
-		if (inst->ea_reg == Z80_IYH) {
+		if (inst->ea_reg == Z80_IYH && opts->regs[Z80_IYL] >= 0) {
 			if (inst->reg == Z80_IYL) {
-				dst = mov_rr(dst, opts->regs[Z80_IY], SCRATCH1, SZ_W);
-				dst = ror_ir(dst, 8, SCRATCH1, SZ_W);
-				ea->base = SCRATCH1;
+				mov_rr(code, opts->regs[Z80_IY], opts->gen.scratch1, SZ_W);
+				ror_ir(code, 8, opts->gen.scratch1, SZ_W);
+				ea->base = opts->gen.scratch1;
 			} else {
 				ea->base = opts->regs[Z80_IYL];
-				dst = ror_ir(dst, 8, opts->regs[Z80_IY], SZ_W);
+				ror_ir(code, 8, opts->regs[Z80_IY], SZ_W);
 			}
-		} else {
+		} else if(opts->regs[inst->ea_reg] >= 0) {
 			ea->base = opts->regs[inst->ea_reg];
 			if (ea->base >= AH && ea->base <= BH && inst->reg != Z80_UNUSED && inst->reg != Z80_USE_IMMED) {
 				uint8_t other_reg = opts->regs[inst->reg];
+#ifdef X86_64
 				if (other_reg >= R8 || (other_reg >= RSP && other_reg <= RDI)) {
 					//we can't mix an *H reg with a register that requires the REX prefix
 					ea->base = opts->regs[z80_low_reg(inst->ea_reg)];
-					dst = ror_ir(dst, 8, ea->base, SZ_W);
+					ror_ir(code, 8, ea->base, SZ_W);
 				}
+#endif
 			}
+		} else {
+			ea->mode = MODE_REG_DISPLACE8;
+			ea->base = opts->gen.context_reg;
+			ea->disp = zr_off(inst->ea_reg);
 		}
 		break;
 	case Z80_REG_INDIRECT:
-		dst = mov_rr(dst, opts->regs[inst->ea_reg], areg, SZ_W);
+		zreg_to_native(opts, inst->ea_reg, areg);
 		size = z80_size(inst);
 		if (read) {
 			if (modify) {
-				//dst = push_r(dst, SCRATCH1);
-				dst = mov_rrdisp8(dst, SCRATCH1, CONTEXT, offsetof(z80_context, scratch1), SZ_W);
+				//push_r(code, opts->gen.scratch1);
+				mov_rrdisp(code, opts->gen.scratch1, opts->gen.context_reg, offsetof(z80_context, scratch1), SZ_W);
 			}
 			if (size == SZ_B) {
-				dst = call(dst, (uint8_t *)z80_read_byte);
+				call(code, opts->read_8);
 			} else {
-				dst = call(dst, (uint8_t *)z80_read_word);
+				call(code, opts->read_16);
 			}
 			if (modify) {
-				//dst = pop_r(dst, SCRATCH2);
-				dst = mov_rdisp8r(dst, CONTEXT, offsetof(z80_context, scratch1), SCRATCH2, SZ_W);
+				//pop_r(code, opts->gen.scratch2);
+				mov_rdispr(code, opts->gen.context_reg, offsetof(z80_context, scratch1), opts->gen.scratch2, SZ_W);
 			}
 		}
-		ea->base = SCRATCH1;
+		ea->base = opts->gen.scratch1;
 		break;
 	case Z80_IMMED:
 		ea->mode = MODE_IMMED;
 		ea->disp = inst->immed;
 		break;
 	case Z80_IMMED_INDIRECT:
-		dst = mov_ir(dst, inst->immed, areg, SZ_W);
+		mov_ir(code, inst->immed, areg, SZ_W);
 		size = z80_size(inst);
 		if (read) {
 			/*if (modify) {
-				dst = push_r(dst, SCRATCH1);
+				push_r(code, opts->gen.scratch1);
 			}*/
 			if (size == SZ_B) {
-				dst = call(dst, (uint8_t *)z80_read_byte);
+				call(code, opts->read_8);
 			} else {
-				dst = call(dst, (uint8_t *)z80_read_word);
+				call(code, opts->read_16);
 			}
 			if (modify) {
-				//dst = pop_r(dst, SCRATCH2);
-				dst = mov_ir(dst, inst->immed, SCRATCH2, SZ_W);
+				//pop_r(code, opts->gen.scratch2);
+				mov_ir(code, inst->immed, opts->gen.scratch2, SZ_W);
 			}
 		}
-		ea->base = SCRATCH1;
+		ea->base = opts->gen.scratch1;
 		break;
 	case Z80_IX_DISPLACE:
 	case Z80_IY_DISPLACE:
-		reg = opts->regs[(inst->addr_mode & 0x1F) == Z80_IX_DISPLACE ? Z80_IX : Z80_IY];
-		dst = mov_rr(dst, reg, areg, SZ_W);
-		dst = add_ir(dst, inst->ea_reg & 0x80 ? inst->ea_reg - 256 : inst->ea_reg, areg, SZ_W);
+		zreg_to_native(opts, (inst->addr_mode & 0x1F) == Z80_IX_DISPLACE ? Z80_IX : Z80_IY, areg);
+		add_ir(code, inst->ea_reg & 0x80 ? inst->ea_reg - 256 : inst->ea_reg, areg, SZ_W);
 		size = z80_size(inst);
 		if (read) {
 			if (modify) {
-				//dst = push_r(dst, SCRATCH1);
-				dst = mov_rrdisp8(dst, SCRATCH1, CONTEXT, offsetof(z80_context, scratch1), SZ_W);
+				//push_r(code, opts->gen.scratch1);
+				mov_rrdisp(code, opts->gen.scratch1, opts->gen.context_reg, offsetof(z80_context, scratch1), SZ_W);
 			}
 			if (size == SZ_B) {
-				dst = call(dst, (uint8_t *)z80_read_byte);
+				call(code, opts->read_8);
 			} else {
-				dst = call(dst, (uint8_t *)z80_read_word);
+				call(code, opts->read_16);
 			}
 			if (modify) {
-				//dst = pop_r(dst, SCRATCH2);
-				dst = mov_rdisp8r(dst, CONTEXT, offsetof(z80_context, scratch1), SCRATCH2, SZ_W);
+				//pop_r(code, opts->gen.scratch2);
+				mov_rdispr(code, opts->gen.context_reg, offsetof(z80_context, scratch1), opts->gen.scratch2, SZ_W);
 			}
 		}
-		ea->base = SCRATCH1;
+		ea->base = opts->gen.scratch1;
 		break;
 	case Z80_UNUSED:
 		ea->mode = MODE_UNUSED;
@@ -240,32 +259,32 @@
 		fprintf(stderr, "Unrecognized Z80 addressing mode %d\n", inst->addr_mode & 0x1F);
 		exit(1);
 	}
-	return dst;
 }
 
-uint8_t * z80_save_ea(uint8_t * dst, z80inst * inst, x86_z80_options * opts)
+void z80_save_ea(code_info *code, z80inst * inst, z80_options * opts)
 {
 	if ((inst->addr_mode & 0x1F) == Z80_REG) {
-		if (inst->ea_reg == Z80_IYH) {
+		if (inst->ea_reg == Z80_IYH && opts->regs[Z80_IYL] >= 0) {
 			if (inst->reg == Z80_IYL) {
-				dst = ror_ir(dst, 8, opts->regs[Z80_IY], SZ_W);
-				dst = mov_rr(dst, SCRATCH1, opts->regs[Z80_IYL], SZ_B);
-				dst = ror_ir(dst, 8, opts->regs[Z80_IY], SZ_W);
+				ror_ir(code, 8, opts->regs[Z80_IY], SZ_W);
+				mov_rr(code, opts->gen.scratch1, opts->regs[Z80_IYL], SZ_B);
+				ror_ir(code, 8, opts->regs[Z80_IY], SZ_W);
 			} else {
-				dst = ror_ir(dst, 8, opts->regs[Z80_IY], SZ_W);
+				ror_ir(code, 8, opts->regs[Z80_IY], SZ_W);
 			}
 		} else if (inst->reg != Z80_UNUSED && inst->reg != Z80_USE_IMMED && opts->regs[inst->ea_reg] >= AH && opts->regs[inst->ea_reg] <= BH) {
 			uint8_t other_reg = opts->regs[inst->reg];
+#ifdef X86_64
 			if (other_reg >= R8 || (other_reg >= RSP && other_reg <= RDI)) {
 				//we can't mix an *H reg with a register that requires the REX prefix
-				dst = ror_ir(dst, 8, opts->regs[z80_low_reg(inst->ea_reg)], SZ_W);
+				ror_ir(code, 8, opts->regs[z80_low_reg(inst->ea_reg)], SZ_W);
 			}
+#endif
 		}
 	}
-	return dst;
 }
 
-uint8_t * z80_save_result(uint8_t * dst, z80inst * inst)
+void z80_save_result(z80_options *opts, z80inst * inst)
 {
 	switch(inst->addr_mode & 0x1f)
 	{
@@ -274,12 +293,11 @@
 	case Z80_IX_DISPLACE:
 	case Z80_IY_DISPLACE:
 		if (z80_size(inst) == SZ_B) {
-			dst = call(dst, (uint8_t *)z80_write_byte);
+			call(&opts->gen.code, opts->write_8);
 		} else {
-			dst = call(dst, (uint8_t *)z80_write_word_lowfirst);
+			call(&opts->gen.code, opts->write_16_lowfirst);
 		}
 	}
-	return dst;
 }
 
 enum {
@@ -292,21 +310,6 @@
 	MODIFY
 };
 
-uint8_t zf_off(uint8_t flag)
-{
-	return offsetof(z80_context, flags) + flag;
-}
-
-uint8_t zaf_off(uint8_t flag)
-{
-	return offsetof(z80_context, alt_flags) + flag;
-}
-
-uint8_t zar_off(uint8_t reg)
-{
-	return offsetof(z80_context, alt_regs) + reg;
-}
-
 void z80_print_regs_exit(z80_context * context)
 {
 	printf("A: %X\nB: %X\nC: %X\nD: %X\nE: %X\nHL: %X\nIX: %X\nIY: %X\nSP: %X\n\nIM: %d, IFF1: %d, IFF2: %d\n",
@@ -326,14 +329,23 @@
 	exit(0);
 }
 
-uint8_t * translate_z80inst(z80inst * inst, uint8_t * dst, z80_context * context, uint16_t address)
+void translate_z80inst(z80inst * inst, z80_context * context, uint16_t address, uint8_t interp)
 {
-	uint32_t cycles;
-	x86_ea src_op, dst_op;
+	uint32_t num_cycles;
+	host_ea src_op, dst_op;
 	uint8_t size;
-	x86_z80_options *opts = context->options;
-	uint8_t * start = dst;
-	dst = z80_check_cycles_int(dst, address);
+	z80_options *opts = context->options;
+	uint8_t * start = opts->gen.code.cur;
+	code_info *code = &opts->gen.code;
+	if (!interp) {
+		check_cycles_int(&opts->gen, address);
+		if (context->breakpoint_flags[address / sizeof(uint8_t)] & (1 << (address % sizeof(uint8_t)))) {
+			zbreakpoint_patch(context, address, start);
+		}
+#ifdef Z80_LOG_ADDRESS
+		log_address(&opts->gen, address, "Z80: %X @ %d\n");
+#endif
+	}
 	switch(inst->op)
 	{
 	case Z80_LD:
@@ -342,243 +354,330 @@
 		{
 		case Z80_REG:
 		case Z80_REG_INDIRECT:
- 			cycles = size == SZ_B ? 4 : 6;
+ 			num_cycles = size == SZ_B ? 4 : 6;
 			if (inst->ea_reg == Z80_IX || inst->ea_reg == Z80_IY) {
-				cycles += 4;
+				num_cycles += 4;
 			}
 			if (inst->reg == Z80_I || inst->ea_reg == Z80_I) {
-				cycles += 5;
+				num_cycles += 5;
 			}
 			break;
 		case Z80_IMMED:
-			cycles = size == SZ_B ? 7 : 10;
+			num_cycles = size == SZ_B ? 7 : 10;
 			break;
 		case Z80_IMMED_INDIRECT:
-			cycles = 10;
+			num_cycles = 10;
 			break;
 		case Z80_IX_DISPLACE:
 		case Z80_IY_DISPLACE:
-			cycles = 16;
+			num_cycles = 16;
 			break;
 		}
 		if ((inst->reg >= Z80_IXL && inst->reg <= Z80_IYH) || inst->reg == Z80_IX || inst->reg == Z80_IY) {
-			cycles += 4;
+			num_cycles += 4;
 		}
-		dst = zcycles(dst, cycles);
+		cycles(&opts->gen, num_cycles);
 		if (inst->addr_mode & Z80_DIR) {
-			dst = translate_z80_ea(inst, &dst_op, dst, opts, DONT_READ, MODIFY);
-			dst = translate_z80_reg(inst, &src_op, dst, opts);
+			translate_z80_ea(inst, &dst_op, opts, DONT_READ, MODIFY);
+			translate_z80_reg(inst, &src_op, opts);
 		} else {
-			dst = translate_z80_ea(inst, &src_op, dst, opts, READ, DONT_MODIFY);
-			dst = translate_z80_reg(inst, &dst_op, dst, opts);
+			translate_z80_ea(inst, &src_op, opts, READ, DONT_MODIFY);
+			translate_z80_reg(inst, &dst_op, opts);
 		}
 		if (src_op.mode == MODE_REG_DIRECT) {
 			if(dst_op.mode == MODE_REG_DISPLACE8) {
-				dst = mov_rrdisp8(dst, src_op.base, dst_op.base, dst_op.disp, size);
+				mov_rrdisp(code, src_op.base, dst_op.base, dst_op.disp, size);
 			} else {
-				dst = mov_rr(dst, src_op.base, dst_op.base, size);
+				mov_rr(code, src_op.base, dst_op.base, size);
 			}
 		} else if(src_op.mode == MODE_IMMED) {
-			dst = mov_ir(dst, src_op.disp, dst_op.base, size);
+			if(dst_op.mode == MODE_REG_DISPLACE8) {
+				mov_irdisp(code, src_op.disp, dst_op.base, dst_op.disp, size);
+			} else {
+				mov_ir(code, src_op.disp, dst_op.base, size);
+			}
 		} else {
-			dst = mov_rdisp8r(dst, src_op.base, src_op.disp, dst_op.base, size);
+			if(dst_op.mode == MODE_REG_DISPLACE8) {
+				mov_rdispr(code, src_op.base, src_op.disp, opts->gen.scratch1, size);
+				mov_rrdisp(code, opts->gen.scratch1, dst_op.base, dst_op.disp, size);
+			} else {
+				mov_rdispr(code, src_op.base, src_op.disp, dst_op.base, size);
+			}
 		}
-		dst = z80_save_reg(dst, inst, opts);
-		dst = z80_save_ea(dst, inst, opts);
+		if (inst->ea_reg == Z80_I && inst->addr_mode == Z80_REG) {
+			//ld a, i sets some flags
+			//TODO: Implement half-carry flag
+			cmp_ir(code, 0, dst_op.base, SZ_B);
+			setcc_rdisp(code, CC_Z, opts->gen.context_reg, zf_off(ZF_Z));
+			setcc_rdisp(code, CC_S, opts->gen.context_reg, zf_off(ZF_S));
+			mov_irdisp(code, 0, opts->gen.context_reg, zf_off(ZF_N), SZ_B);;
+			mov_rdispr(code, opts->gen.context_reg, offsetof(z80_context, iff2), opts->gen.scratch1, SZ_B);
+			mov_rrdisp(code, opts->gen.scratch1, opts->gen.context_reg, zf_off(ZF_PV), SZ_B);
+		}
+		z80_save_reg(inst, opts);
+		z80_save_ea(code, inst, opts);
 		if (inst->addr_mode & Z80_DIR) {
-			dst = z80_save_result(dst, inst);
+			z80_save_result(opts, inst);
 		}
 		break;
 	case Z80_PUSH:
-		dst = zcycles(dst, (inst->reg == Z80_IX || inst->reg == Z80_IY) ? 9 : 5);
-		dst = sub_ir(dst, 2, opts->regs[Z80_SP], SZ_W);
+		cycles(&opts->gen, (inst->reg == Z80_IX || inst->reg == Z80_IY) ? 9 : 5);
+		sub_ir(code, 2, opts->regs[Z80_SP], SZ_W);
 		if (inst->reg == Z80_AF) {
-			dst = mov_rr(dst, opts->regs[Z80_A], SCRATCH1, SZ_B);
-			dst = shl_ir(dst, 8, SCRATCH1, SZ_W);
-			dst = mov_rdisp8r(dst, CONTEXT, zf_off(ZF_S), SCRATCH1, SZ_B);
-			dst = shl_ir(dst, 1, SCRATCH1, SZ_B);
-			dst = or_rdisp8r(dst, CONTEXT, zf_off(ZF_Z), SCRATCH1, SZ_B);
-			dst = shl_ir(dst, 2, SCRATCH1, SZ_B);
-			dst = or_rdisp8r(dst, CONTEXT, zf_off(ZF_H), SCRATCH1, SZ_B);
-			dst = shl_ir(dst, 2, SCRATCH1, SZ_B);
-			dst = or_rdisp8r(dst, CONTEXT, zf_off(ZF_PV), SCRATCH1, SZ_B);
-			dst = shl_ir(dst, 1, SCRATCH1, SZ_B);
-			dst = or_rdisp8r(dst, CONTEXT, zf_off(ZF_N), SCRATCH1, SZ_B);
-			dst = shl_ir(dst, 1, SCRATCH1, SZ_B);
-			dst = or_rdisp8r(dst, CONTEXT, zf_off(ZF_C), SCRATCH1, SZ_B);
+			zreg_to_native(opts, Z80_A, opts->gen.scratch1);
+			shl_ir(code, 8, opts->gen.scratch1, SZ_W);
+			mov_rdispr(code, opts->gen.context_reg, zf_off(ZF_S), opts->gen.scratch1, SZ_B);
+			shl_ir(code, 1, opts->gen.scratch1, SZ_B);
+			or_rdispr(code, opts->gen.context_reg, zf_off(ZF_Z), opts->gen.scratch1, SZ_B);
+			shl_ir(code, 2, opts->gen.scratch1, SZ_B);
+			or_rdispr(code, opts->gen.context_reg, zf_off(ZF_H), opts->gen.scratch1, SZ_B);
+			shl_ir(code, 2, opts->gen.scratch1, SZ_B);
+			or_rdispr(code, opts->gen.context_reg, zf_off(ZF_PV), opts->gen.scratch1, SZ_B);
+			shl_ir(code, 1, opts->gen.scratch1, SZ_B);
+			or_rdispr(code, opts->gen.context_reg, zf_off(ZF_N), opts->gen.scratch1, SZ_B);
+			shl_ir(code, 1, opts->gen.scratch1, SZ_B);
+			or_rdispr(code, opts->gen.context_reg, zf_off(ZF_C), opts->gen.scratch1, SZ_B);
 		} else {
-			dst = translate_z80_reg(inst, &src_op, dst, opts);
-			dst = mov_rr(dst, src_op.base, SCRATCH1, SZ_W);
+			zreg_to_native(opts, inst->reg, opts->gen.scratch1);
 		}
-		dst = mov_rr(dst, opts->regs[Z80_SP], SCRATCH2, SZ_W);
-		dst = call(dst, (uint8_t *)z80_write_word_highfirst);
+		mov_rr(code, opts->regs[Z80_SP], opts->gen.scratch2, SZ_W);
+		call(code, opts->write_16_highfirst);
 		//no call to save_z80_reg needed since there's no chance we'll use the only
 		//the upper half of a register pair
 		break;
 	case Z80_POP:
-		dst = zcycles(dst, (inst->reg == Z80_IX || inst->reg == Z80_IY) ? 8 : 4);
-		dst = mov_rr(dst, opts->regs[Z80_SP], SCRATCH1, SZ_W);
-		dst = call(dst, (uint8_t *)z80_read_word);
-		dst = add_ir(dst, 2, opts->regs[Z80_SP], SZ_W);
+		cycles(&opts->gen, (inst->reg == Z80_IX || inst->reg == Z80_IY) ? 8 : 4);
+		mov_rr(code, opts->regs[Z80_SP], opts->gen.scratch1, SZ_W);
+		call(code, opts->read_16);
+		add_ir(code, 2, opts->regs[Z80_SP], SZ_W);
 		if (inst->reg == Z80_AF) {
 
-			dst = bt_ir(dst, 0, SCRATCH1, SZ_W);
-			dst = setcc_rdisp8(dst, CC_C, CONTEXT, zf_off(ZF_C));
-			dst = bt_ir(dst, 1, SCRATCH1, SZ_W);
-			dst = setcc_rdisp8(dst, CC_C, CONTEXT, zf_off(ZF_N));
-			dst = bt_ir(dst, 2, SCRATCH1, SZ_W);
-			dst = setcc_rdisp8(dst, CC_C, CONTEXT, zf_off(ZF_PV));
-			dst = bt_ir(dst, 4, SCRATCH1, SZ_W);
-			dst = setcc_rdisp8(dst, CC_C, CONTEXT, zf_off(ZF_H));
-			dst = bt_ir(dst, 6, SCRATCH1, SZ_W);
-			dst = setcc_rdisp8(dst, CC_C, CONTEXT, zf_off(ZF_Z));
-			dst = bt_ir(dst, 7, SCRATCH1, SZ_W);
-			dst = setcc_rdisp8(dst, CC_C, CONTEXT, zf_off(ZF_S));
-			dst = shr_ir(dst, 8, SCRATCH1, SZ_W);
-			dst = mov_rr(dst, SCRATCH1, opts->regs[Z80_A], SZ_B);
+			bt_ir(code, 0, opts->gen.scratch1, SZ_W);
+			setcc_rdisp(code, CC_C, opts->gen.context_reg, zf_off(ZF_C));
+			bt_ir(code, 1, opts->gen.scratch1, SZ_W);
+			setcc_rdisp(code, CC_C, opts->gen.context_reg, zf_off(ZF_N));
+			bt_ir(code, 2, opts->gen.scratch1, SZ_W);
+			setcc_rdisp(code, CC_C, opts->gen.context_reg, zf_off(ZF_PV));
+			bt_ir(code, 4, opts->gen.scratch1, SZ_W);
+			setcc_rdisp(code, CC_C, opts->gen.context_reg, zf_off(ZF_H));
+			bt_ir(code, 6, opts->gen.scratch1, SZ_W);
+			setcc_rdisp(code, CC_C, opts->gen.context_reg, zf_off(ZF_Z));
+			bt_ir(code, 7, opts->gen.scratch1, SZ_W);
+			setcc_rdisp(code, CC_C, opts->gen.context_reg, zf_off(ZF_S));
+			shr_ir(code, 8, opts->gen.scratch1, SZ_W);
+			native_to_zreg(opts, opts->gen.scratch1, Z80_A);
 		} else {
-			dst = translate_z80_reg(inst, &src_op, dst, opts);
-			dst = mov_rr(dst, SCRATCH1, src_op.base, SZ_W);
+			native_to_zreg(opts, opts->gen.scratch1, inst->reg);
 		}
 		//no call to save_z80_reg needed since there's no chance we'll use the only
 		//the upper half of a register pair
 		break;
 	case Z80_EX:
 		if (inst->addr_mode == Z80_REG || inst->reg == Z80_HL) {
-			cycles = 4;
+			num_cycles = 4;
 		} else {
-			cycles = 8;
+			num_cycles = 8;
 		}
-		dst = zcycles(dst, cycles);
+		cycles(&opts->gen, num_cycles);
 		if (inst->addr_mode == Z80_REG) {
 			if(inst->reg == Z80_AF) {
-				dst = mov_rr(dst, opts->regs[Z80_A], SCRATCH1, SZ_B);
-				dst = mov_rdisp8r(dst, CONTEXT, zar_off(Z80_A), opts->regs[Z80_A], SZ_B);
-				dst = mov_rrdisp8(dst, SCRATCH1, CONTEXT, zar_off(Z80_A), SZ_B);
+				zreg_to_native(opts, Z80_A, opts->gen.scratch1);
+				mov_rdispr(code, opts->gen.context_reg, zar_off(Z80_A), opts->gen.scratch2, SZ_B);
+				mov_rrdisp(code, opts->gen.scratch1, opts->gen.context_reg, zar_off(Z80_A), SZ_B);
+				native_to_zreg(opts, opts->gen.scratch2, Z80_A);
 
 				//Flags are currently word aligned, so we can move
 				//them efficiently a word at a time
 				for (int f = ZF_C; f < ZF_NUM; f+=2) {
-					dst = mov_rdisp8r(dst, CONTEXT, zf_off(f), SCRATCH1, SZ_W);
-					dst = mov_rdisp8r(dst, CONTEXT, zaf_off(f), SCRATCH2, SZ_W);
-					dst = mov_rrdisp8(dst, SCRATCH1, CONTEXT, zaf_off(f), SZ_W);
-					dst = mov_rrdisp8(dst, SCRATCH2, CONTEXT, zf_off(f), SZ_W);
+					mov_rdispr(code, opts->gen.context_reg, zf_off(f), opts->gen.scratch1, SZ_W);
+					mov_rdispr(code, opts->gen.context_reg, zaf_off(f), opts->gen.scratch2, SZ_W);
+					mov_rrdisp(code, opts->gen.scratch1, opts->gen.context_reg, zaf_off(f), SZ_W);
+					mov_rrdisp(code, opts->gen.scratch2, opts->gen.context_reg, zf_off(f), SZ_W);
 				}
 			} else {
-				dst = xchg_rr(dst, opts->regs[Z80_DE], opts->regs[Z80_HL], SZ_W);
+				if (opts->regs[Z80_DE] >= 0 && opts->regs[Z80_HL] >= 0) {
+					xchg_rr(code, opts->regs[Z80_DE], opts->regs[Z80_HL], SZ_W);
+				} else {
+					zreg_to_native(opts, Z80_DE, opts->gen.scratch1);
+					zreg_to_native(opts, Z80_HL, opts->gen.scratch2);
+					native_to_zreg(opts, opts->gen.scratch1, Z80_HL);
+					native_to_zreg(opts, opts->gen.scratch2, Z80_DE);
+				}
 			}
 		} else {
-			dst = mov_rr(dst, opts->regs[Z80_SP], SCRATCH1, SZ_W);
-			dst = call(dst, (uint8_t *)z80_read_byte);
-			dst = xchg_rr(dst, opts->regs[inst->reg], SCRATCH1, SZ_B);
-			dst = mov_rr(dst, opts->regs[Z80_SP], SCRATCH2, SZ_W);
-			dst = call(dst, (uint8_t *)z80_write_byte);
-			dst = zcycles(dst, 1);
+			mov_rr(code, opts->regs[Z80_SP], opts->gen.scratch1, SZ_W);
+			call(code, opts->read_8);
+			if (opts->regs[inst->reg] >= 0) {
+				xchg_rr(code, opts->regs[inst->reg], opts->gen.scratch1, SZ_B);
+			} else {
+				zreg_to_native(opts, inst->reg, opts->gen.scratch2);
+				xchg_rr(code, opts->gen.scratch1, opts->gen.scratch2, SZ_B);
+				native_to_zreg(opts, opts->gen.scratch2, inst->reg);
+			}
+			mov_rr(code, opts->regs[Z80_SP], opts->gen.scratch2, SZ_W);
+			call(code, opts->write_8);
+			cycles(&opts->gen, 1);
 			uint8_t high_reg = z80_high_reg(inst->reg);
-			uint8_t use_reg;
-			//even though some of the upper halves can be used directly
-			//the limitations on mixing *H regs with the REX prefix
-			//prevent us from taking advantage of it
-			use_reg = opts->regs[inst->reg];
-			dst = ror_ir(dst, 8, use_reg, SZ_W);
-			dst = mov_rr(dst, opts->regs[Z80_SP], SCRATCH1, SZ_W);
-			dst = add_ir(dst, 1, SCRATCH1, SZ_W);
-			dst = call(dst, (uint8_t *)z80_read_byte);
-			dst = xchg_rr(dst, use_reg, SCRATCH1, SZ_B);
-			dst = mov_rr(dst, opts->regs[Z80_SP], SCRATCH2, SZ_W);
-			dst = add_ir(dst, 1, SCRATCH2, SZ_W);
-			dst = call(dst, (uint8_t *)z80_write_byte);
-			//restore reg to normal rotation
-			dst = ror_ir(dst, 8, use_reg, SZ_W);
-			dst = zcycles(dst, 2);
+			mov_rr(code, opts->regs[Z80_SP], opts->gen.scratch1, SZ_W);
+			add_ir(code, 1, opts->gen.scratch1, SZ_W);
+			call(code, opts->read_8);
+			if (opts->regs[inst->reg] >= 0) {
+				//even though some of the upper halves can be used directly
+				//the limitations on mixing *H regs with the REX prefix
+				//prevent us from taking advantage of it
+				uint8_t use_reg = opts->regs[inst->reg];
+				ror_ir(code, 8, use_reg, SZ_W);
+				xchg_rr(code, use_reg, opts->gen.scratch1, SZ_B);
+				//restore reg to normal rotation
+				ror_ir(code, 8, use_reg, SZ_W);
+			} else {
+				zreg_to_native(opts, high_reg, opts->gen.scratch2);
+				xchg_rr(code, opts->gen.scratch1, opts->gen.scratch2, SZ_B);
+				native_to_zreg(opts, opts->gen.scratch2, high_reg);
+			}
+			mov_rr(code, opts->regs[Z80_SP], opts->gen.scratch2, SZ_W);
+			add_ir(code, 1, opts->gen.scratch2, SZ_W);
+			call(code, opts->write_8);
+			cycles(&opts->gen, 2);
 		}
 		break;
 	case Z80_EXX:
-		dst = zcycles(dst, 4);
-		dst = mov_rr(dst, opts->regs[Z80_BC], SCRATCH1, SZ_W);
-		dst = mov_rr(dst, opts->regs[Z80_HL], SCRATCH2, SZ_W);
-		dst = mov_rdisp8r(dst, CONTEXT, zar_off(Z80_C), opts->regs[Z80_BC], SZ_W);
-		dst = mov_rdisp8r(dst, CONTEXT, zar_off(Z80_L), opts->regs[Z80_HL], SZ_W);
-		dst = mov_rrdisp8(dst, SCRATCH1, CONTEXT, zar_off(Z80_C), SZ_W);
-		dst = mov_rrdisp8(dst, SCRATCH2, CONTEXT, zar_off(Z80_L), SZ_W);
-		dst = mov_rr(dst, opts->regs[Z80_DE], SCRATCH1, SZ_W);
-		dst = mov_rdisp8r(dst, CONTEXT, zar_off(Z80_E), opts->regs[Z80_DE], SZ_W);
-		dst = mov_rrdisp8(dst, SCRATCH1, CONTEXT, zar_off(Z80_E), SZ_W);
+		cycles(&opts->gen, 4);
+		zreg_to_native(opts, Z80_BC, opts->gen.scratch1);
+		mov_rdispr(code, opts->gen.context_reg, zar_off(Z80_BC), opts->gen.scratch2, SZ_W);
+		mov_rrdisp(code, opts->gen.scratch1, opts->gen.context_reg, zar_off(Z80_BC), SZ_W);
+		native_to_zreg(opts, opts->gen.scratch2, Z80_BC);
+		
+		zreg_to_native(opts, Z80_HL, opts->gen.scratch1);
+		mov_rdispr(code, opts->gen.context_reg, zar_off(Z80_HL), opts->gen.scratch2, SZ_W);
+		mov_rrdisp(code, opts->gen.scratch1, opts->gen.context_reg, zar_off(Z80_HL), SZ_W);
+		native_to_zreg(opts, opts->gen.scratch2, Z80_HL);
+		
+		zreg_to_native(opts, Z80_DE, opts->gen.scratch1);
+		mov_rdispr(code, opts->gen.context_reg, zar_off(Z80_DE), opts->gen.scratch2, SZ_W);
+		mov_rrdisp(code, opts->gen.scratch1, opts->gen.context_reg, zar_off(Z80_DE), SZ_W);
+		native_to_zreg(opts, opts->gen.scratch2, Z80_DE);
 		break;
 	case Z80_LDI: {
-		dst = zcycles(dst, 8);
-		dst = mov_rr(dst, opts->regs[Z80_HL], SCRATCH1, SZ_W);
-		dst = call(dst, (uint8_t *)z80_read_byte);
-		dst = mov_rr(dst, opts->regs[Z80_DE], SCRATCH2, SZ_W);
-		dst = call(dst, (uint8_t *)z80_write_byte);
-		dst = zcycles(dst, 2);
-		dst = add_ir(dst, 1, opts->regs[Z80_DE], SZ_W);
-		dst = add_ir(dst, 1, opts->regs[Z80_HL], SZ_W);
-		dst = sub_ir(dst, 1, opts->regs[Z80_BC], SZ_W);
+		cycles(&opts->gen, 8);
+		zreg_to_native(opts, Z80_HL, opts->gen.scratch1);
+		call(code, opts->read_8);
+		zreg_to_native(opts, Z80_DE, opts->gen.scratch2);
+		call(code, opts->write_8);
+		cycles(&opts->gen, 2);
+		if (opts->regs[Z80_DE] >= 0) {
+			add_ir(code, 1, opts->regs[Z80_DE], SZ_W);
+		} else {
+			add_irdisp(code, 1, opts->gen.context_reg, zr_off(Z80_DE), SZ_W);
+		}
+		if (opts->regs[Z80_HL] >= 0) {
+			add_ir(code, 1, opts->regs[Z80_HL], SZ_W);
+		} else {
+			add_irdisp(code, 1, opts->gen.context_reg,  zr_off(Z80_HL), SZ_W);
+		}
+		if (opts->regs[Z80_BC] >= 0) {
+			sub_ir(code, 1, opts->regs[Z80_BC], SZ_W);
+		} else {
+			sub_irdisp(code, 1, opts->gen.context_reg,  zr_off(Z80_BC), SZ_W);
+		}
 		//TODO: Implement half-carry
-		dst = mov_irdisp8(dst, 0, CONTEXT, zf_off(ZF_N), SZ_B);
-		dst = setcc_rdisp8(dst, CC_NZ, CONTEXT, zf_off(ZF_PV));
+		mov_irdisp(code, 0, opts->gen.context_reg, zf_off(ZF_N), SZ_B);
+		setcc_rdisp(code, CC_NZ, opts->gen.context_reg, zf_off(ZF_PV));
 		break;
 	}
 	case Z80_LDIR: {
-		dst = zcycles(dst, 8);
-		dst = mov_rr(dst, opts->regs[Z80_HL], SCRATCH1, SZ_W);
-		dst = call(dst, (uint8_t *)z80_read_byte);
-		dst = mov_rr(dst, opts->regs[Z80_DE], SCRATCH2, SZ_W);
-		dst = call(dst, (uint8_t *)z80_write_byte);
-		dst = add_ir(dst, 1, opts->regs[Z80_DE], SZ_W);
-		dst = add_ir(dst, 1, opts->regs[Z80_HL], SZ_W);
-
-		dst = sub_ir(dst, 1, opts->regs[Z80_BC], SZ_W);
-		uint8_t * cont = dst+1;
-		dst = jcc(dst, CC_Z, dst+2);
-		dst = zcycles(dst, 7);
+		cycles(&opts->gen, 8);
+		zreg_to_native(opts, Z80_HL, opts->gen.scratch1);
+		call(code, opts->read_8);
+		zreg_to_native(opts, Z80_DE, opts->gen.scratch2);
+		call(code, opts->write_8);
+		if (opts->regs[Z80_DE] >= 0) {
+			add_ir(code, 1, opts->regs[Z80_DE], SZ_W);
+		} else {
+			add_irdisp(code, 1, opts->gen.context_reg, zr_off(Z80_DE), SZ_W);
+		}
+		if (opts->regs[Z80_HL] >= 0) {
+			add_ir(code, 1, opts->regs[Z80_HL], SZ_W);
+		} else {
+			add_irdisp(code, 1, opts->gen.context_reg,  zr_off(Z80_HL), SZ_W);
+		}
+		if (opts->regs[Z80_BC] >= 0) {
+			sub_ir(code, 1, opts->regs[Z80_BC], SZ_W);
+		} else {
+			sub_irdisp(code, 1, opts->gen.context_reg,  zr_off(Z80_BC), SZ_W);
+		}
+		uint8_t * cont = code->cur+1;
+		jcc(code, CC_Z, code->cur+2);
+		cycles(&opts->gen, 7);
 		//TODO: Figure out what the flag state should be here
 		//TODO: Figure out whether an interrupt can interrupt this
-		dst = jmp(dst, start);
-		*cont = dst - (cont + 1);
-		dst = zcycles(dst, 2);
+		jmp(code, start);
+		*cont = code->cur - (cont + 1);
+		cycles(&opts->gen, 2);
 		//TODO: Implement half-carry
-		dst = mov_irdisp8(dst, 0, CONTEXT, zf_off(ZF_N), SZ_B);
-		dst = mov_irdisp8(dst, 0, CONTEXT, zf_off(ZF_PV), SZ_B);
+		mov_irdisp(code, 0, opts->gen.context_reg, zf_off(ZF_N), SZ_B);
+		mov_irdisp(code, 0, opts->gen.context_reg, zf_off(ZF_PV), SZ_B);
 		break;
 	}
 	case Z80_LDD: {
-		dst = zcycles(dst, 8);
-		dst = mov_rr(dst, opts->regs[Z80_HL], SCRATCH1, SZ_W);
-		dst = call(dst, (uint8_t *)z80_read_byte);
-		dst = mov_rr(dst, opts->regs[Z80_DE], SCRATCH2, SZ_W);
-		dst = call(dst, (uint8_t *)z80_write_byte);
-		dst = zcycles(dst, 2);
-		dst = sub_ir(dst, 1, opts->regs[Z80_DE], SZ_W);
-		dst = sub_ir(dst, 1, opts->regs[Z80_HL], SZ_W);
-		dst = sub_ir(dst, 1, opts->regs[Z80_BC], SZ_W);
+		cycles(&opts->gen, 8);
+		zreg_to_native(opts, Z80_HL, opts->gen.scratch1);
+		call(code, opts->read_8);
+		zreg_to_native(opts, Z80_DE, opts->gen.scratch2);
+		call(code, opts->write_8);
+		cycles(&opts->gen, 2);
+		if (opts->regs[Z80_DE] >= 0) {
+			sub_ir(code, 1, opts->regs[Z80_DE], SZ_W);
+		} else {
+			sub_irdisp(code, 1, opts->gen.context_reg, zr_off(Z80_DE), SZ_W);
+		}
+		if (opts->regs[Z80_HL] >= 0) {
+			add_ir(code, 1, opts->regs[Z80_HL], SZ_W);
+		} else {
+			sub_irdisp(code, 1, opts->gen.context_reg, zr_off(Z80_HL), SZ_W);
+		}
+		if (opts->regs[Z80_BC] >= 0) {
+			sub_ir(code, 1, opts->regs[Z80_BC], SZ_W);
+		} else {
+			sub_irdisp(code, 1, opts->gen.context_reg, zr_off(Z80_BC), SZ_W);
+		}
 		//TODO: Implement half-carry
-		dst = mov_irdisp8(dst, 0, CONTEXT, zf_off(ZF_N), SZ_B);
-		dst = setcc_rdisp8(dst, CC_NZ, CONTEXT, zf_off(ZF_PV));
+		mov_irdisp(code, 0, opts->gen.context_reg, zf_off(ZF_N), SZ_B);
+		setcc_rdisp(code, CC_NZ, opts->gen.context_reg, zf_off(ZF_PV));
 		break;
 	}
 	case Z80_LDDR: {
-		dst = zcycles(dst, 8);
-		dst = mov_rr(dst, opts->regs[Z80_HL], SCRATCH1, SZ_W);
-		dst = call(dst, (uint8_t *)z80_read_byte);
-		dst = mov_rr(dst, opts->regs[Z80_DE], SCRATCH2, SZ_W);
-		dst = call(dst, (uint8_t *)z80_write_byte);
-		dst = sub_ir(dst, 1, opts->regs[Z80_DE], SZ_W);
-		dst = sub_ir(dst, 1, opts->regs[Z80_HL], SZ_W);
-
-		dst = sub_ir(dst, 1, opts->regs[Z80_BC], SZ_W);
-		uint8_t * cont = dst+1;
-		dst = jcc(dst, CC_Z, dst+2);
-		dst = zcycles(dst, 7);
+		cycles(&opts->gen, 8);
+		zreg_to_native(opts, Z80_HL, opts->gen.scratch1);
+		call(code, opts->read_8);
+		zreg_to_native(opts, Z80_DE, opts->gen.scratch2);
+		call(code, opts->write_8);
+		if (opts->regs[Z80_DE] >= 0) {
+			sub_ir(code, 1, opts->regs[Z80_DE], SZ_W);
+		} else {
+			sub_irdisp(code, 1, opts->gen.context_reg, zr_off(Z80_DE), SZ_W);
+		}
+		if (opts->regs[Z80_HL] >= 0) {
+			add_ir(code, 1, opts->regs[Z80_HL], SZ_W);
+		} else {
+			sub_irdisp(code, 1, opts->gen.context_reg, zr_off(Z80_HL), SZ_W);
+		}
+		if (opts->regs[Z80_BC] >= 0) {
+			sub_ir(code, 1, opts->regs[Z80_BC], SZ_W);
+		} else {
+			sub_irdisp(code, 1, opts->gen.context_reg, zr_off(Z80_BC), SZ_W);
+		}
+		uint8_t * cont = code->cur+1;
+		jcc(code, CC_Z, code->cur+2);
+		cycles(&opts->gen, 7);
 		//TODO: Figure out what the flag state should be here
 		//TODO: Figure out whether an interrupt can interrupt this
-		dst = jmp(dst, start);
-		*cont = dst - (cont + 1);
-		dst = zcycles(dst, 2);
+		jmp(code, start);
+		*cont = code->cur - (cont + 1);
+		cycles(&opts->gen, 2);
 		//TODO: Implement half-carry
-		dst = mov_irdisp8(dst, 0, CONTEXT, zf_off(ZF_N), SZ_B);
-		dst = mov_irdisp8(dst, 0, CONTEXT, zf_off(ZF_PV), SZ_B);
+		mov_irdisp(code, 0, opts->gen.context_reg, zf_off(ZF_N), SZ_B);
+		mov_irdisp(code, 0, opts->gen.context_reg, zf_off(ZF_PV), SZ_B);
 		break;
 	}
 	/*case Z80_CPI:
@@ -587,1056 +686,1251 @@
 	case Z80_CPDR:
 		break;*/
 	case Z80_ADD:
-		cycles = 4;
-		if (inst->addr_mode == Z80_IX_DISPLACE || inst->addr_mode == Z80_IY_DISPLACE) {
-			cycles += 12;
-		} else if(inst->addr_mode == Z80_IMMED) {
-			cycles += 3;
-		} else if(z80_size(inst) == SZ_W) {
-			cycles += 4;
-		}
-		dst = zcycles(dst, cycles);
-		dst = translate_z80_reg(inst, &dst_op, dst, opts);
-		dst = translate_z80_ea(inst, &src_op, dst, opts, READ, DONT_MODIFY);
-		if (src_op.mode == MODE_REG_DIRECT) {
-			dst = add_rr(dst, src_op.base, dst_op.base, z80_size(inst));
-		} else {
-			dst = add_ir(dst, src_op.disp, dst_op.base, z80_size(inst));
-		}
-		dst = setcc_rdisp8(dst, CC_C, CONTEXT, zf_off(ZF_C));
-		dst = mov_irdisp8(dst, 0, CONTEXT, zf_off(ZF_N), SZ_B);
-		//TODO: Implement half-carry flag
-		if (z80_size(inst) == SZ_B) {
-			dst = setcc_rdisp8(dst, CC_O, CONTEXT, zf_off(ZF_PV));
-			dst = setcc_rdisp8(dst, CC_Z, CONTEXT, zf_off(ZF_Z));
-			dst = setcc_rdisp8(dst, CC_S, CONTEXT, zf_off(ZF_S));
-		}
-		dst = z80_save_reg(dst, inst, opts);
-		dst = z80_save_ea(dst, inst, opts);
-		break;
-	case Z80_ADC:
-		cycles = 4;
+		num_cycles = 4;
 		if (inst->addr_mode == Z80_IX_DISPLACE || inst->addr_mode == Z80_IY_DISPLACE) {
-			cycles += 12;
+			num_cycles += 12;
 		} else if(inst->addr_mode == Z80_IMMED) {
-			cycles += 3;
+			num_cycles += 3;
 		} else if(z80_size(inst) == SZ_W) {
-			cycles += 4;
-		}
-		dst = zcycles(dst, cycles);
-		dst = translate_z80_reg(inst, &dst_op, dst, opts);
-		dst = translate_z80_ea(inst, &src_op, dst, opts, READ, DONT_MODIFY);
-		dst = bt_irdisp8(dst, 0, CONTEXT, zf_off(ZF_C), SZ_B);
-		if (src_op.mode == MODE_REG_DIRECT) {
-			dst = adc_rr(dst, src_op.base, dst_op.base, z80_size(inst));
-		} else {
-			dst = adc_ir(dst, src_op.disp, dst_op.base, z80_size(inst));
-		}
-		dst = setcc_rdisp8(dst, CC_C, CONTEXT, zf_off(ZF_C));
-		dst = mov_irdisp8(dst, 0, CONTEXT, zf_off(ZF_N), SZ_B);
-		//TODO: Implement half-carry flag
-		dst = setcc_rdisp8(dst, CC_O, CONTEXT, zf_off(ZF_PV));
-		dst = setcc_rdisp8(dst, CC_Z, CONTEXT, zf_off(ZF_Z));
-		dst = setcc_rdisp8(dst, CC_S, CONTEXT, zf_off(ZF_S));
-		dst = z80_save_reg(dst, inst, opts);
-		dst = z80_save_ea(dst, inst, opts);
-		break;
-	case Z80_SUB:
-		cycles = 4;
-		if (inst->addr_mode == Z80_IX_DISPLACE || inst->addr_mode == Z80_IY_DISPLACE) {
-			cycles += 12;
-		} else if(inst->addr_mode == Z80_IMMED) {
-			cycles += 3;
+			num_cycles += 4;
 		}
-		dst = zcycles(dst, cycles);
-		dst = translate_z80_reg(inst, &dst_op, dst, opts);
-		dst = translate_z80_ea(inst, &src_op, dst, opts, READ, DONT_MODIFY);
-		if (src_op.mode == MODE_REG_DIRECT) {
-			dst = sub_rr(dst, src_op.base, dst_op.base, z80_size(inst));
+		cycles(&opts->gen, num_cycles);
+		translate_z80_reg(inst, &dst_op, opts);
+		translate_z80_ea(inst, &src_op, opts, READ, DONT_MODIFY);
+		if (dst_op.mode == MODE_REG_DIRECT) {
+			if (src_op.mode == MODE_REG_DIRECT) {
+				add_rr(code, src_op.base, dst_op.base, z80_size(inst));
+			} else if (src_op.mode == MODE_IMMED) {
+				add_ir(code, src_op.disp, dst_op.base, z80_size(inst));
+			} else {
+				add_rdispr(code, src_op.base, src_op.disp, dst_op.base, z80_size(inst));
+			}
 		} else {
-			dst = sub_ir(dst, src_op.disp, dst_op.base, z80_size(inst));
-		}
-		dst = setcc_rdisp8(dst, CC_C, CONTEXT, zf_off(ZF_C));
-		dst = mov_irdisp8(dst, 1, CONTEXT, zf_off(ZF_N), SZ_B);
-		dst = setcc_rdisp8(dst, CC_O, CONTEXT, zf_off(ZF_PV));
-		//TODO: Implement half-carry flag
-		dst = setcc_rdisp8(dst, CC_Z, CONTEXT, zf_off(ZF_Z));
-		dst = setcc_rdisp8(dst, CC_S, CONTEXT, zf_off(ZF_S));
-		dst = z80_save_reg(dst, inst, opts);
-		dst = z80_save_ea(dst, inst, opts);
-		break;
-	case Z80_SBC:
-		cycles = 4;
-		if (inst->addr_mode == Z80_IX_DISPLACE || inst->addr_mode == Z80_IY_DISPLACE) {
-			cycles += 12;
-		} else if(inst->addr_mode == Z80_IMMED) {
-			cycles += 3;
-		} else if(z80_size(inst) == SZ_W) {
-			cycles += 4;
+			if (src_op.mode == MODE_REG_DIRECT) {
+				add_rrdisp(code, src_op.base, dst_op.base, dst_op.disp, z80_size(inst));
+			} else if (src_op.mode == MODE_IMMED) {
+				add_irdisp(code, src_op.disp, dst_op.base, dst_op.disp, z80_size(inst));
+			} else {
+				mov_rdispr(code, src_op.base, src_op.disp, opts->gen.scratch1, z80_size(inst));
+				add_rrdisp(code, opts->gen.scratch1, dst_op.base, dst_op.disp, z80_size(inst));
+			}
 		}
-		dst = zcycles(dst, cycles);
-		dst = translate_z80_reg(inst, &dst_op, dst, opts);
-		dst = translate_z80_ea(inst, &src_op, dst, opts, READ, DONT_MODIFY);
-		dst = bt_irdisp8(dst, 0, CONTEXT, zf_off(ZF_C), SZ_B);
-		if (src_op.mode == MODE_REG_DIRECT) {
-			dst = sbb_rr(dst, src_op.base, dst_op.base, z80_size(inst));
-		} else {
-			dst = sbb_ir(dst, src_op.disp, dst_op.base, z80_size(inst));
-		}
-		dst = setcc_rdisp8(dst, CC_C, CONTEXT, zf_off(ZF_C));
-		dst = mov_irdisp8(dst, 1, CONTEXT, zf_off(ZF_N), SZ_B);
-		//TODO: Implement half-carry flag
-		dst = setcc_rdisp8(dst, CC_O, CONTEXT, zf_off(ZF_PV));
-		dst = setcc_rdisp8(dst, CC_Z, CONTEXT, zf_off(ZF_Z));
-		dst = setcc_rdisp8(dst, CC_S, CONTEXT, zf_off(ZF_S));
-		dst = z80_save_reg(dst, inst, opts);
-		dst = z80_save_ea(dst, inst, opts);
-		break;
-	case Z80_AND:
-		cycles = 4;
-		if (inst->addr_mode == Z80_IX_DISPLACE || inst->addr_mode == Z80_IY_DISPLACE) {
-			cycles += 12;
-		} else if(inst->addr_mode == Z80_IMMED) {
-			cycles += 3;
-		} else if(z80_size(inst) == SZ_W) {
-			cycles += 4;
-		}
-		dst = zcycles(dst, cycles);
-		dst = translate_z80_reg(inst, &dst_op, dst, opts);
-		dst = translate_z80_ea(inst, &src_op, dst, opts, READ, DONT_MODIFY);
-		if (src_op.mode == MODE_REG_DIRECT) {
-			dst = and_rr(dst, src_op.base, dst_op.base, z80_size(inst));
-		} else {
-			dst = and_ir(dst, src_op.disp, dst_op.base, z80_size(inst));
-		}
-		//TODO: Cleanup flags
-		dst = setcc_rdisp8(dst, CC_C, CONTEXT, zf_off(ZF_C));
-		dst = mov_irdisp8(dst, 0, CONTEXT, zf_off(ZF_N), SZ_B);
+		setcc_rdisp(code, CC_C, opts->gen.context_reg, zf_off(ZF_C));
+		mov_irdisp(code, 0, opts->gen.context_reg, zf_off(ZF_N), SZ_B);
 		//TODO: Implement half-carry flag
 		if (z80_size(inst) == SZ_B) {
-			dst = setcc_rdisp8(dst, CC_P, CONTEXT, zf_off(ZF_PV));
-			dst = setcc_rdisp8(dst, CC_Z, CONTEXT, zf_off(ZF_Z));
-			dst = setcc_rdisp8(dst, CC_S, CONTEXT, zf_off(ZF_S));
+			setcc_rdisp(code, CC_O, opts->gen.context_reg, zf_off(ZF_PV));
+			setcc_rdisp(code, CC_Z, opts->gen.context_reg, zf_off(ZF_Z));
+			setcc_rdisp(code, CC_S, opts->gen.context_reg, zf_off(ZF_S));
+		}
+		z80_save_reg(inst, opts);
+		z80_save_ea(code, inst, opts);
+		break;
+	case Z80_ADC:
+		num_cycles = 4;
+		if (inst->addr_mode == Z80_IX_DISPLACE || inst->addr_mode == Z80_IY_DISPLACE) {
+			num_cycles += 12;
+		} else if(inst->addr_mode == Z80_IMMED) {
+			num_cycles += 3;
+		} else if(z80_size(inst) == SZ_W) {
+			num_cycles += 4;
 		}
-		dst = z80_save_reg(dst, inst, opts);
-		dst = z80_save_ea(dst, inst, opts);
+		cycles(&opts->gen, num_cycles);
+		translate_z80_reg(inst, &dst_op, opts);
+		translate_z80_ea(inst, &src_op, opts, READ, DONT_MODIFY);
+		bt_irdisp(code, 0, opts->gen.context_reg, zf_off(ZF_C), SZ_B);
+		if (dst_op.mode == MODE_REG_DIRECT) {
+			if (src_op.mode == MODE_REG_DIRECT) {
+				adc_rr(code, src_op.base, dst_op.base, z80_size(inst));
+			} else if (src_op.mode == MODE_IMMED) {
+				adc_ir(code, src_op.disp, dst_op.base, z80_size(inst));
+			} else {
+				adc_rdispr(code, src_op.base, src_op.disp, dst_op.base, z80_size(inst));
+			}
+		} else {
+			if (src_op.mode == MODE_REG_DIRECT) {
+				adc_rrdisp(code, src_op.base, dst_op.base, dst_op.disp, z80_size(inst));
+			} else if (src_op.mode == MODE_IMMED) {
+				adc_irdisp(code, src_op.disp, dst_op.base, dst_op.disp, z80_size(inst));
+			} else {
+				mov_rdispr(code, src_op.base, src_op.disp, opts->gen.scratch1, z80_size(inst));
+				adc_rrdisp(code, opts->gen.scratch1, dst_op.base, dst_op.disp, z80_size(inst));
+			}
+		}
+		setcc_rdisp(code, CC_C, opts->gen.context_reg, zf_off(ZF_C));
+		mov_irdisp(code, 0, opts->gen.context_reg, zf_off(ZF_N), SZ_B);
+		//TODO: Implement half-carry flag
+		setcc_rdisp(code, CC_O, opts->gen.context_reg, zf_off(ZF_PV));
+		setcc_rdisp(code, CC_Z, opts->gen.context_reg, zf_off(ZF_Z));
+		setcc_rdisp(code, CC_S, opts->gen.context_reg, zf_off(ZF_S));
+		z80_save_reg(inst, opts);
+		z80_save_ea(code, inst, opts);
 		break;
-	case Z80_OR:
-		cycles = 4;
+	case Z80_SUB:
+		num_cycles = 4;
 		if (inst->addr_mode == Z80_IX_DISPLACE || inst->addr_mode == Z80_IY_DISPLACE) {
-			cycles += 12;
+			num_cycles += 12;
 		} else if(inst->addr_mode == Z80_IMMED) {
-			cycles += 3;
+			num_cycles += 3;
+		}
+		cycles(&opts->gen, num_cycles);
+		translate_z80_reg(inst, &dst_op, opts);
+		translate_z80_ea(inst, &src_op, opts, READ, DONT_MODIFY);
+		if (dst_op.mode == MODE_REG_DIRECT) {
+			if (src_op.mode == MODE_REG_DIRECT) {
+				sub_rr(code, src_op.base, dst_op.base, z80_size(inst));
+			} else if (src_op.mode == MODE_IMMED) {
+				sub_ir(code, src_op.disp, dst_op.base, z80_size(inst));
+			} else {
+				sub_rdispr(code, src_op.base, src_op.disp, dst_op.base, z80_size(inst));
+			}
+		} else {
+			if (src_op.mode == MODE_REG_DIRECT) {
+				sub_rrdisp(code, src_op.base, dst_op.base, dst_op.disp, z80_size(inst));
+			} else if (src_op.mode == MODE_IMMED) {
+				sub_irdisp(code, src_op.disp, dst_op.base, dst_op.disp, z80_size(inst));
+			} else {
+				mov_rdispr(code, src_op.base, src_op.disp, opts->gen.scratch1, z80_size(inst));
+				sub_rrdisp(code, opts->gen.scratch1, dst_op.base, dst_op.disp, z80_size(inst));
+			}
+		}
+		setcc_rdisp(code, CC_C, opts->gen.context_reg, zf_off(ZF_C));
+		mov_irdisp(code, 1, opts->gen.context_reg, zf_off(ZF_N), SZ_B);
+		setcc_rdisp(code, CC_O, opts->gen.context_reg, zf_off(ZF_PV));
+		//TODO: Implement half-carry flag
+		setcc_rdisp(code, CC_Z, opts->gen.context_reg, zf_off(ZF_Z));
+		setcc_rdisp(code, CC_S, opts->gen.context_reg, zf_off(ZF_S));
+		z80_save_reg(inst, opts);
+		z80_save_ea(code, inst, opts);
+		break;
+	case Z80_SBC:
+		num_cycles = 4;
+		if (inst->addr_mode == Z80_IX_DISPLACE || inst->addr_mode == Z80_IY_DISPLACE) {
+			num_cycles += 12;
+		} else if(inst->addr_mode == Z80_IMMED) {
+			num_cycles += 3;
 		} else if(z80_size(inst) == SZ_W) {
-			cycles += 4;
+			num_cycles += 4;
 		}
-		dst = zcycles(dst, cycles);
-		dst = translate_z80_reg(inst, &dst_op, dst, opts);
-		dst = translate_z80_ea(inst, &src_op, dst, opts, READ, DONT_MODIFY);
+		cycles(&opts->gen, num_cycles);
+		translate_z80_reg(inst, &dst_op, opts);
+		translate_z80_ea(inst, &src_op, opts, READ, DONT_MODIFY);
+		bt_irdisp(code, 0, opts->gen.context_reg, zf_off(ZF_C), SZ_B);
+		if (dst_op.mode == MODE_REG_DIRECT) {
+			if (src_op.mode == MODE_REG_DIRECT) {
+				sbb_rr(code, src_op.base, dst_op.base, z80_size(inst));
+			} else if (src_op.mode == MODE_IMMED) {
+				sbb_ir(code, src_op.disp, dst_op.base, z80_size(inst));
+			} else {
+				sbb_rdispr(code, src_op.base, src_op.disp, dst_op.base, z80_size(inst));
+			}
+		} else {
+			if (src_op.mode == MODE_REG_DIRECT) {
+				sbb_rrdisp(code, src_op.base, dst_op.base, dst_op.disp, z80_size(inst));
+			} else if (src_op.mode == MODE_IMMED) {
+				sbb_irdisp(code, src_op.disp, dst_op.base, dst_op.disp, z80_size(inst));
+			} else {
+				mov_rdispr(code, src_op.base, src_op.disp, opts->gen.scratch1, z80_size(inst));
+				sbb_rrdisp(code, opts->gen.scratch1, dst_op.base, dst_op.disp, z80_size(inst));
+			}
+		}
+		setcc_rdisp(code, CC_C, opts->gen.context_reg, zf_off(ZF_C));
+		mov_irdisp(code, 1, opts->gen.context_reg, zf_off(ZF_N), SZ_B);
+		//TODO: Implement half-carry flag
+		setcc_rdisp(code, CC_O, opts->gen.context_reg, zf_off(ZF_PV));
+		setcc_rdisp(code, CC_Z, opts->gen.context_reg, zf_off(ZF_Z));
+		setcc_rdisp(code, CC_S, opts->gen.context_reg, zf_off(ZF_S));
+		z80_save_reg(inst, opts);
+		z80_save_ea(code, inst, opts);
+		break;
+	case Z80_AND:
+		num_cycles = 4;
+		if (inst->addr_mode == Z80_IX_DISPLACE || inst->addr_mode == Z80_IY_DISPLACE) {
+			num_cycles += 12;
+		} else if(inst->addr_mode == Z80_IMMED) {
+			num_cycles += 3;
+		} else if(z80_size(inst) == SZ_W) {
+			num_cycles += 4;
+		}
+		cycles(&opts->gen, num_cycles);
+		translate_z80_reg(inst, &dst_op, opts);
+		translate_z80_ea(inst, &src_op, opts, READ, DONT_MODIFY);
 		if (src_op.mode == MODE_REG_DIRECT) {
-			dst = or_rr(dst, src_op.base, dst_op.base, z80_size(inst));
+			and_rr(code, src_op.base, dst_op.base, z80_size(inst));
+		} else if (src_op.mode == MODE_IMMED) {
+			and_ir(code, src_op.disp, dst_op.base, z80_size(inst));
 		} else {
-			dst = or_ir(dst, src_op.disp, dst_op.base, z80_size(inst));
+			and_rdispr(code, src_op.base, src_op.disp, dst_op.base, z80_size(inst));
 		}
 		//TODO: Cleanup flags
-		dst = setcc_rdisp8(dst, CC_C, CONTEXT, zf_off(ZF_C));
-		dst = mov_irdisp8(dst, 0, CONTEXT, zf_off(ZF_N), SZ_B);
+		setcc_rdisp(code, CC_C, opts->gen.context_reg, zf_off(ZF_C));
+		mov_irdisp(code, 0, opts->gen.context_reg, zf_off(ZF_N), SZ_B);
 		//TODO: Implement half-carry flag
 		if (z80_size(inst) == SZ_B) {
-			dst = setcc_rdisp8(dst, CC_P, CONTEXT, zf_off(ZF_PV));
-			dst = setcc_rdisp8(dst, CC_Z, CONTEXT, zf_off(ZF_Z));
-			dst = setcc_rdisp8(dst, CC_S, CONTEXT, zf_off(ZF_S));
+			setcc_rdisp(code, CC_P, opts->gen.context_reg, zf_off(ZF_PV));
+			setcc_rdisp(code, CC_Z, opts->gen.context_reg, zf_off(ZF_Z));
+			setcc_rdisp(code, CC_S, opts->gen.context_reg, zf_off(ZF_S));
+		}
+		z80_save_reg(inst, opts);
+		z80_save_ea(code, inst, opts);
+		break;
+	case Z80_OR:
+		num_cycles = 4;
+		if (inst->addr_mode == Z80_IX_DISPLACE || inst->addr_mode == Z80_IY_DISPLACE) {
+			num_cycles += 12;
+		} else if(inst->addr_mode == Z80_IMMED) {
+			num_cycles += 3;
+		} else if(z80_size(inst) == SZ_W) {
+			num_cycles += 4;
 		}
-		dst = z80_save_reg(dst, inst, opts);
-		dst = z80_save_ea(dst, inst, opts);
+		cycles(&opts->gen, num_cycles);
+		translate_z80_reg(inst, &dst_op, opts);
+		translate_z80_ea(inst, &src_op, opts, READ, DONT_MODIFY);
+		if (src_op.mode == MODE_REG_DIRECT) {
+			or_rr(code, src_op.base, dst_op.base, z80_size(inst));
+		} else if (src_op.mode == MODE_IMMED) {
+			or_ir(code, src_op.disp, dst_op.base, z80_size(inst));
+		} else {
+			or_rdispr(code, src_op.base, src_op.disp, dst_op.base, z80_size(inst));
+		}
+		//TODO: Cleanup flags
+		setcc_rdisp(code, CC_C, opts->gen.context_reg, zf_off(ZF_C));
+		mov_irdisp(code, 0, opts->gen.context_reg, zf_off(ZF_N), SZ_B);
+		//TODO: Implement half-carry flag
+		if (z80_size(inst) == SZ_B) {
+			setcc_rdisp(code, CC_P, opts->gen.context_reg, zf_off(ZF_PV));
+			setcc_rdisp(code, CC_Z, opts->gen.context_reg, zf_off(ZF_Z));
+			setcc_rdisp(code, CC_S, opts->gen.context_reg, zf_off(ZF_S));
+		}
+		z80_save_reg(inst, opts);
+		z80_save_ea(code, inst, opts);
 		break;
 	case Z80_XOR:
-		cycles = 4;
+		num_cycles = 4;
 		if (inst->addr_mode == Z80_IX_DISPLACE || inst->addr_mode == Z80_IY_DISPLACE) {
-			cycles += 12;
+			num_cycles += 12;
 		} else if(inst->addr_mode == Z80_IMMED) {
-			cycles += 3;
+			num_cycles += 3;
 		} else if(z80_size(inst) == SZ_W) {
-			cycles += 4;
+			num_cycles += 4;
 		}
-		dst = zcycles(dst, cycles);
-		dst = translate_z80_reg(inst, &dst_op, dst, opts);
-		dst = translate_z80_ea(inst, &src_op, dst, opts, READ, DONT_MODIFY);
+		cycles(&opts->gen, num_cycles);
+		translate_z80_reg(inst, &dst_op, opts);
+		translate_z80_ea(inst, &src_op, opts, READ, DONT_MODIFY);
 		if (src_op.mode == MODE_REG_DIRECT) {
-			dst = xor_rr(dst, src_op.base, dst_op.base, z80_size(inst));
+			xor_rr(code, src_op.base, dst_op.base, z80_size(inst));
+		} else if (src_op.mode == MODE_IMMED) {
+			xor_ir(code, src_op.disp, dst_op.base, z80_size(inst));
 		} else {
-			dst = xor_ir(dst, src_op.disp, dst_op.base, z80_size(inst));
+			xor_rdispr(code, src_op.base, src_op.disp, dst_op.base, z80_size(inst));
 		}
 		//TODO: Cleanup flags
-		dst = setcc_rdisp8(dst, CC_C, CONTEXT, zf_off(ZF_C));
-		dst = mov_irdisp8(dst, 0, CONTEXT, zf_off(ZF_N), SZ_B);
+		setcc_rdisp(code, CC_C, opts->gen.context_reg, zf_off(ZF_C));
+		mov_irdisp(code, 0, opts->gen.context_reg, zf_off(ZF_N), SZ_B);
 		//TODO: Implement half-carry flag
 		if (z80_size(inst) == SZ_B) {
-			dst = setcc_rdisp8(dst, CC_P, CONTEXT, zf_off(ZF_PV));
-			dst = setcc_rdisp8(dst, CC_Z, CONTEXT, zf_off(ZF_Z));
-			dst = setcc_rdisp8(dst, CC_S, CONTEXT, zf_off(ZF_S));
+			setcc_rdisp(code, CC_P, opts->gen.context_reg, zf_off(ZF_PV));
+			setcc_rdisp(code, CC_Z, opts->gen.context_reg, zf_off(ZF_Z));
+			setcc_rdisp(code, CC_S, opts->gen.context_reg, zf_off(ZF_S));
 		}
-		dst = z80_save_reg(dst, inst, opts);
-		dst = z80_save_ea(dst, inst, opts);
+		z80_save_reg(inst, opts);
+		z80_save_ea(code, inst, opts);
 		break;
 	case Z80_CP:
-		cycles = 4;
+		num_cycles = 4;
 		if (inst->addr_mode == Z80_IX_DISPLACE || inst->addr_mode == Z80_IY_DISPLACE) {
-			cycles += 12;
+			num_cycles += 12;
 		} else if(inst->addr_mode == Z80_IMMED) {
-			cycles += 3;
+			num_cycles += 3;
 		}
-		dst = zcycles(dst, cycles);
-		dst = translate_z80_reg(inst, &dst_op, dst, opts);
-		dst = translate_z80_ea(inst, &src_op, dst, opts, READ, DONT_MODIFY);
+		cycles(&opts->gen, num_cycles);
+		translate_z80_reg(inst, &dst_op, opts);
+		translate_z80_ea(inst, &src_op, opts, READ, DONT_MODIFY);
 		if (src_op.mode == MODE_REG_DIRECT) {
-			dst = cmp_rr(dst, src_op.base, dst_op.base, z80_size(inst));
+			cmp_rr(code, src_op.base, dst_op.base, z80_size(inst));
+		} else if (src_op.mode == MODE_IMMED) {
+			cmp_ir(code, src_op.disp, dst_op.base, z80_size(inst));
 		} else {
-			dst = cmp_ir(dst, src_op.disp, dst_op.base, z80_size(inst));
+			cmp_rdispr(code, src_op.base, src_op.disp, dst_op.base, z80_size(inst));
 		}
-		dst = setcc_rdisp8(dst, CC_C, CONTEXT, zf_off(ZF_C));
-		dst = mov_irdisp8(dst, 1, CONTEXT, zf_off(ZF_N), SZ_B);
-		dst = setcc_rdisp8(dst, CC_O, CONTEXT, zf_off(ZF_PV));
+		setcc_rdisp(code, CC_C, opts->gen.context_reg, zf_off(ZF_C));
+		mov_irdisp(code, 1, opts->gen.context_reg, zf_off(ZF_N), SZ_B);
+		setcc_rdisp(code, CC_O, opts->gen.context_reg, zf_off(ZF_PV));
 		//TODO: Implement half-carry flag
-		dst = setcc_rdisp8(dst, CC_Z, CONTEXT, zf_off(ZF_Z));
-		dst = setcc_rdisp8(dst, CC_S, CONTEXT, zf_off(ZF_S));
-		dst = z80_save_reg(dst, inst, opts);
-		dst = z80_save_ea(dst, inst, opts);
+		setcc_rdisp(code, CC_Z, opts->gen.context_reg, zf_off(ZF_Z));
+		setcc_rdisp(code, CC_S, opts->gen.context_reg, zf_off(ZF_S));
+		z80_save_reg(inst, opts);
+		z80_save_ea(code, inst, opts);
 		break;
 	case Z80_INC:
-		cycles = 4;
+		num_cycles = 4;
 		if (inst->reg == Z80_IX || inst->reg == Z80_IY) {
-			cycles += 6;
+			num_cycles += 6;
 		} else if(z80_size(inst) == SZ_W) {
-			cycles += 2;
+			num_cycles += 2;
 		} else if(inst->reg == Z80_IXH || inst->reg == Z80_IXL || inst->reg == Z80_IYH || inst->reg == Z80_IYL || inst->addr_mode == Z80_IX_DISPLACE || inst->addr_mode == Z80_IY_DISPLACE) {
-			cycles += 4;
+			num_cycles += 4;
 		}
-		dst = zcycles(dst, cycles);
-		dst = translate_z80_reg(inst, &dst_op, dst, opts);
+		cycles(&opts->gen, num_cycles);
+		translate_z80_reg(inst, &dst_op, opts);
 		if (dst_op.mode == MODE_UNUSED) {
-			dst = translate_z80_ea(inst, &dst_op, dst, opts, READ, MODIFY);
+			translate_z80_ea(inst, &dst_op, opts, READ, MODIFY);
 		}
-		dst = add_ir(dst, 1, dst_op.base, z80_size(inst));
+		if (dst_op.mode == MODE_REG_DIRECT) {
+			add_ir(code, 1, dst_op.base, z80_size(inst));
+		} else {
+			add_irdisp(code, 1, dst_op.base, dst_op.disp, z80_size(inst));
+		}
 		if (z80_size(inst) == SZ_B) {
-			dst = mov_irdisp8(dst, 0, CONTEXT, zf_off(ZF_N), SZ_B);
+			mov_irdisp(code, 0, opts->gen.context_reg, zf_off(ZF_N), SZ_B);
 			//TODO: Implement half-carry flag
-			dst = setcc_rdisp8(dst, CC_O, CONTEXT, zf_off(ZF_PV));
-			dst = setcc_rdisp8(dst, CC_Z, CONTEXT, zf_off(ZF_Z));
-			dst = setcc_rdisp8(dst, CC_S, CONTEXT, zf_off(ZF_S));
+			setcc_rdisp(code, CC_O, opts->gen.context_reg, zf_off(ZF_PV));
+			setcc_rdisp(code, CC_Z, opts->gen.context_reg, zf_off(ZF_Z));
+			setcc_rdisp(code, CC_S, opts->gen.context_reg, zf_off(ZF_S));
 		}
-		dst = z80_save_reg(dst, inst, opts);
-		dst = z80_save_ea(dst, inst, opts);
-		dst = z80_save_result(dst, inst);
+		z80_save_reg(inst, opts);
+		z80_save_ea(code, inst, opts);
+		z80_save_result(opts, inst);
 		break;
 	case Z80_DEC:
-		cycles = 4;
+		num_cycles = 4;
 		if (inst->reg == Z80_IX || inst->reg == Z80_IY) {
-			cycles += 6;
+			num_cycles += 6;
 		} else if(z80_size(inst) == SZ_W) {
-			cycles += 2;
+			num_cycles += 2;
 		} else if(inst->reg == Z80_IXH || inst->reg == Z80_IXL || inst->reg == Z80_IYH || inst->reg == Z80_IYL || inst->addr_mode == Z80_IX_DISPLACE || inst->addr_mode == Z80_IY_DISPLACE) {
-			cycles += 4;
+			num_cycles += 4;
 		}
-		dst = zcycles(dst, cycles);
-		dst = translate_z80_reg(inst, &dst_op, dst, opts);
+		cycles(&opts->gen, num_cycles);
+		translate_z80_reg(inst, &dst_op, opts);
 		if (dst_op.mode == MODE_UNUSED) {
-			dst = translate_z80_ea(inst, &dst_op, dst, opts, READ, MODIFY);
+			translate_z80_ea(inst, &dst_op, opts, READ, MODIFY);
 		}
-		dst = sub_ir(dst, 1, dst_op.base, z80_size(inst));
+		if (dst_op.mode == MODE_REG_DIRECT) {
+			sub_ir(code, 1, dst_op.base, z80_size(inst));
+		} else {
+			sub_irdisp(code, 1, dst_op.base, dst_op.disp, z80_size(inst));
+		}
+		
 		if (z80_size(inst) == SZ_B) {
-			dst = mov_irdisp8(dst, 1, CONTEXT, zf_off(ZF_N), SZ_B);
+			mov_irdisp(code, 1, opts->gen.context_reg, zf_off(ZF_N), SZ_B);
 			//TODO: Implement half-carry flag
-			dst = setcc_rdisp8(dst, CC_O, CONTEXT, zf_off(ZF_PV));
-			dst = setcc_rdisp8(dst, CC_Z, CONTEXT, zf_off(ZF_Z));
-			dst = setcc_rdisp8(dst, CC_S, CONTEXT, zf_off(ZF_S));
+			setcc_rdisp(code, CC_O, opts->gen.context_reg, zf_off(ZF_PV));
+			setcc_rdisp(code, CC_Z, opts->gen.context_reg, zf_off(ZF_Z));
+			setcc_rdisp(code, CC_S, opts->gen.context_reg, zf_off(ZF_S));
 		}
-		dst = z80_save_reg(dst, inst, opts);
-		dst = z80_save_ea(dst, inst, opts);
-		dst = z80_save_result(dst, inst);
+		z80_save_reg(inst, opts);
+		z80_save_ea(code, inst, opts);
+		z80_save_result(opts, inst);
 		break;
 	//case Z80_DAA:
 	case Z80_CPL:
-		dst = zcycles(dst, 4);
-		dst = not_r(dst, opts->regs[Z80_A], SZ_B);
+		cycles(&opts->gen, 4);
+		not_r(code, opts->regs[Z80_A], SZ_B);
 		//TODO: Implement half-carry flag
-		dst = mov_irdisp8(dst, 1, CONTEXT, zf_off(ZF_N), SZ_B);
+		mov_irdisp(code, 1, opts->gen.context_reg, zf_off(ZF_N), SZ_B);
 		break;
 	case Z80_NEG:
-		dst = zcycles(dst, 8);
-		dst = neg_r(dst, opts->regs[Z80_A], SZ_B);
+		cycles(&opts->gen, 8);
+		neg_r(code, opts->regs[Z80_A], SZ_B);
 		//TODO: Implement half-carry flag
-		dst = setcc_rdisp8(dst, CC_Z, CONTEXT, zf_off(ZF_Z));
-		dst = setcc_rdisp8(dst, CC_S, CONTEXT, zf_off(ZF_S));
-		dst = setcc_rdisp8(dst, CC_C, CONTEXT, zf_off(ZF_C));
-		dst = setcc_rdisp8(dst, CC_O, CONTEXT, zf_off(ZF_PV));
-		dst = mov_irdisp8(dst, 1, CONTEXT, zf_off(ZF_N), SZ_B);
+		setcc_rdisp(code, CC_Z, opts->gen.context_reg, zf_off(ZF_Z));
+		setcc_rdisp(code, CC_S, opts->gen.context_reg, zf_off(ZF_S));
+		setcc_rdisp(code, CC_C, opts->gen.context_reg, zf_off(ZF_C));
+		setcc_rdisp(code, CC_O, opts->gen.context_reg, zf_off(ZF_PV));
+		mov_irdisp(code, 1, opts->gen.context_reg, zf_off(ZF_N), SZ_B);
 		break;
 	case Z80_CCF:
-		dst = zcycles(dst, 4);
-		dst = xor_irdisp8(dst, 1, CONTEXT, zf_off(ZF_C), SZ_B);
-		dst = mov_irdisp8(dst, 0, CONTEXT, zf_off(ZF_N), SZ_B);
+		cycles(&opts->gen, 4);
+		xor_irdisp(code, 1, opts->gen.context_reg, zf_off(ZF_C), SZ_B);
+		mov_irdisp(code, 0, opts->gen.context_reg, zf_off(ZF_N), SZ_B);
 		//TODO: Implement half-carry flag
 		break;
 	case Z80_SCF:
-		dst = zcycles(dst, 4);
-		dst = mov_irdisp8(dst, 1, CONTEXT, zf_off(ZF_C), SZ_B);
-		dst = mov_irdisp8(dst, 0, CONTEXT, zf_off(ZF_N), SZ_B);
+		cycles(&opts->gen, 4);
+		mov_irdisp(code, 1, opts->gen.context_reg, zf_off(ZF_C), SZ_B);
+		mov_irdisp(code, 0, opts->gen.context_reg, zf_off(ZF_N), SZ_B);
 		//TODO: Implement half-carry flag
 		break;
 	case Z80_NOP:
 		if (inst->immed == 42) {
-			dst = call(dst, (uint8_t *)z80_save_context);
-			dst = mov_rr(dst, CONTEXT, RDI, SZ_Q);
-			dst = jmp(dst, (uint8_t *)z80_print_regs_exit);
+			call(code, opts->gen.save_context);
+			call_args(code, (code_ptr)z80_print_regs_exit, 1, opts->gen.context_reg);
 		} else {
-			dst = zcycles(dst, 4 * inst->immed);
+			cycles(&opts->gen, 4 * inst->immed);
 		}
 		break;
-	case Z80_HALT:
-		dst = zcycles(dst, 4);
-		dst = mov_ir(dst, address, SCRATCH1, SZ_W);
-		uint8_t * call_inst = dst;
-		dst = call(dst, (uint8_t *)z80_halt);
-		dst = jmp(dst, call_inst);
+	case Z80_HALT: {
+		code_ptr loop_top = code->cur;
+		//this isn't terribly efficient, but it's good enough for now
+		cycles(&opts->gen, 4);
+		check_cycles_int(&opts->gen, address);
+		jmp(code, loop_top);
 		break;
+	}
 	case Z80_DI:
-		dst = zcycles(dst, 4);
-		dst = mov_irdisp8(dst, 0, CONTEXT, offsetof(z80_context, iff1), SZ_B);
-		dst = mov_irdisp8(dst, 0, CONTEXT, offsetof(z80_context, iff2), SZ_B);
-		dst = mov_rdisp8r(dst, CONTEXT, offsetof(z80_context, sync_cycle), ZLIMIT, SZ_D);
-		dst = mov_irdisp8(dst, 0xFFFFFFFF, CONTEXT, offsetof(z80_context, int_cycle), SZ_D);
+		cycles(&opts->gen, 4);
+		mov_irdisp(code, 0, opts->gen.context_reg, offsetof(z80_context, iff1), SZ_B);
+		mov_irdisp(code, 0, opts->gen.context_reg, offsetof(z80_context, iff2), SZ_B);
+		mov_rdispr(code, opts->gen.context_reg, offsetof(z80_context, sync_cycle), opts->gen.limit, SZ_D);
+		mov_irdisp(code, 0xFFFFFFFF, opts->gen.context_reg, offsetof(z80_context, int_cycle), SZ_D);
 		break;
 	case Z80_EI:
-		dst = zcycles(dst, 4);
-		dst = mov_rrdisp32(dst, ZCYCLES, CONTEXT, offsetof(z80_context, int_enable_cycle), SZ_D);
-		dst = mov_irdisp8(dst, 1, CONTEXT, offsetof(z80_context, iff1), SZ_B);
-		dst = mov_irdisp8(dst, 1, CONTEXT, offsetof(z80_context, iff2), SZ_B);
+		cycles(&opts->gen, 4);
+		mov_rrdisp(code, opts->gen.cycles, opts->gen.context_reg, offsetof(z80_context, int_enable_cycle), SZ_D);
+		mov_irdisp(code, 1, opts->gen.context_reg, offsetof(z80_context, iff1), SZ_B);
+		mov_irdisp(code, 1, opts->gen.context_reg, offsetof(z80_context, iff2), SZ_B);
 		//interrupt enable has a one-instruction latency, minimum instruction duration is 4 cycles
-		dst = add_irdisp32(dst, 4, CONTEXT, offsetof(z80_context, int_enable_cycle), SZ_D);
-		dst = call(dst, (uint8_t *)z80_do_sync);
+		add_irdisp(code, 4*opts->gen.clock_divider, opts->gen.context_reg, offsetof(z80_context, int_enable_cycle), SZ_D);
+		call(code, opts->do_sync);
 		break;
 	case Z80_IM:
-		dst = zcycles(dst, 4);
-		dst = mov_irdisp8(dst, inst->immed, CONTEXT, offsetof(z80_context, im), SZ_B);
+		cycles(&opts->gen, 4);
+		mov_irdisp(code, inst->immed, opts->gen.context_reg, offsetof(z80_context, im), SZ_B);
 		break;
 	case Z80_RLC:
-		cycles = inst->immed == 0 ? 4 : (inst->addr_mode == Z80_IX_DISPLACE || inst->addr_mode == Z80_IY_DISPLACE ? 16 : 8);
-		dst = zcycles(dst, cycles);
+		num_cycles = inst->immed == 0 ? 4 : (inst->addr_mode == Z80_IX_DISPLACE || inst->addr_mode == Z80_IY_DISPLACE ? 16 : 8);
+		cycles(&opts->gen, num_cycles);
 		if (inst->addr_mode != Z80_UNUSED) {
-			dst = translate_z80_ea(inst, &dst_op, dst, opts, READ, MODIFY);
-			dst = translate_z80_reg(inst, &src_op, dst, opts); //For IX/IY variants that also write to a register
-			dst = zcycles(dst, 1);
+			translate_z80_ea(inst, &dst_op, opts, READ, MODIFY);
+			translate_z80_reg(inst, &src_op, opts); //For IX/IY variants that also write to a register
+			cycles(&opts->gen, 1);
 		} else {
 			src_op.mode = MODE_UNUSED;
-			dst = translate_z80_reg(inst, &dst_op, dst, opts);
+			translate_z80_reg(inst, &dst_op, opts);
+		}
+		if (dst_op.mode == MODE_REG_DIRECT) {
+			rol_ir(code, 1, dst_op.base, SZ_B);
+		} else {
+			rol_irdisp(code, 1, dst_op.base, dst_op.disp, SZ_B);
 		}
-		dst = rol_ir(dst, 1, dst_op.base, SZ_B);
-		if (src_op.mode != MODE_UNUSED) {
-			dst = mov_rr(dst, dst_op.base, src_op.base, SZ_B);
+		if (src_op.mode == MODE_REG_DIRECT) {
+			mov_rr(code, dst_op.base, src_op.base, SZ_B);
+		} else if(src_op.mode == MODE_REG_DISPLACE8) {
+			mov_rrdisp(code, dst_op.base, src_op.base, src_op.disp, SZ_B);
 		}
-		dst = setcc_rdisp8(dst, CC_C, CONTEXT, zf_off(ZF_C));
-		dst = mov_irdisp8(dst, 0, CONTEXT, zf_off(ZF_N), SZ_B);
+		setcc_rdisp(code, CC_C, opts->gen.context_reg, zf_off(ZF_C));
+		mov_irdisp(code, 0, opts->gen.context_reg, zf_off(ZF_N), SZ_B);
 		//TODO: Implement half-carry flag
-		dst = cmp_ir(dst, 0, dst_op.base, SZ_B);
-		dst = setcc_rdisp8(dst, CC_P, CONTEXT, zf_off(ZF_PV));
-		dst = setcc_rdisp8(dst, CC_Z, CONTEXT, zf_off(ZF_Z));
-		dst = setcc_rdisp8(dst, CC_S, CONTEXT, zf_off(ZF_S));
+		if (inst->immed) {
+			//rlca does not set these flags
+			if (dst_op.mode == MODE_REG_DIRECT) {
+				cmp_ir(code, 0, dst_op.base, SZ_B);
+			} else {
+				cmp_irdisp(code, 0, dst_op.base, dst_op.disp, SZ_B);
+			}
+			setcc_rdisp(code, CC_P, opts->gen.context_reg, zf_off(ZF_PV));
+			setcc_rdisp(code, CC_Z, opts->gen.context_reg, zf_off(ZF_Z));
+			setcc_rdisp(code, CC_S, opts->gen.context_reg, zf_off(ZF_S));
+		}
 		if (inst->addr_mode != Z80_UNUSED) {
-			dst = z80_save_result(dst, inst);
+			z80_save_result(opts, inst);
 			if (src_op.mode != MODE_UNUSED) {
-				dst = z80_save_reg(dst, inst, opts);
+				z80_save_reg(inst, opts);
 			}
 		} else {
-			dst = z80_save_reg(dst, inst, opts);
+			z80_save_reg(inst, opts);
 		}
 		break;
 	case Z80_RL:
-		cycles = inst->immed == 0 ? 4 : (inst->addr_mode == Z80_IX_DISPLACE || inst->addr_mode == Z80_IY_DISPLACE ? 16 : 8);
-		dst = zcycles(dst, cycles);
+		num_cycles = inst->immed == 0 ? 4 : (inst->addr_mode == Z80_IX_DISPLACE || inst->addr_mode == Z80_IY_DISPLACE ? 16 : 8);
+		cycles(&opts->gen, num_cycles);
 		if (inst->addr_mode != Z80_UNUSED) {
-			dst = translate_z80_ea(inst, &dst_op, dst, opts, READ, MODIFY);
-			dst = translate_z80_reg(inst, &src_op, dst, opts); //For IX/IY variants that also write to a register
-			dst = zcycles(dst, 1);
+			translate_z80_ea(inst, &dst_op, opts, READ, MODIFY);
+			translate_z80_reg(inst, &src_op, opts); //For IX/IY variants that also write to a register
+			cycles(&opts->gen, 1);
 		} else {
 			src_op.mode = MODE_UNUSED;
-			dst = translate_z80_reg(inst, &dst_op, dst, opts);
+			translate_z80_reg(inst, &dst_op, opts);
 		}
-		dst = bt_irdisp8(dst, 0, CONTEXT, zf_off(ZF_C), SZ_B);
-		dst = rcl_ir(dst, 1, dst_op.base, SZ_B);
-		if (src_op.mode != MODE_UNUSED) {
-			dst = mov_rr(dst, dst_op.base, src_op.base, SZ_B);
+		bt_irdisp(code, 0, opts->gen.context_reg, zf_off(ZF_C), SZ_B);
+		if (dst_op.mode == MODE_REG_DIRECT) {
+			rcl_ir(code, 1, dst_op.base, SZ_B);
+		} else {
+			rcl_irdisp(code, 1, dst_op.base, dst_op.disp, SZ_B);
+		}
+		if (src_op.mode == MODE_REG_DIRECT) {
+			mov_rr(code, dst_op.base, src_op.base, SZ_B);
+		} else if(src_op.mode == MODE_REG_DISPLACE8) {
+			mov_rrdisp(code, dst_op.base, src_op.base, src_op.disp, SZ_B);
 		}
-		dst = setcc_rdisp8(dst, CC_C, CONTEXT, zf_off(ZF_C));
-		dst = mov_irdisp8(dst, 0, CONTEXT, zf_off(ZF_N), SZ_B);
+		setcc_rdisp(code, CC_C, opts->gen.context_reg, zf_off(ZF_C));
+		mov_irdisp(code, 0, opts->gen.context_reg, zf_off(ZF_N), SZ_B);
 		//TODO: Implement half-carry flag
-		dst = cmp_ir(dst, 0, dst_op.base, SZ_B);
-		dst = setcc_rdisp8(dst, CC_P, CONTEXT, zf_off(ZF_PV));
-		dst = setcc_rdisp8(dst, CC_Z, CONTEXT, zf_off(ZF_Z));
-		dst = setcc_rdisp8(dst, CC_S, CONTEXT, zf_off(ZF_S));
+		if (inst->immed) {
+			//rla does not set these flags
+			if (dst_op.mode == MODE_REG_DIRECT) {
+				cmp_ir(code, 0, dst_op.base, SZ_B);
+			} else {
+				cmp_irdisp(code, 0, dst_op.base, dst_op.disp, SZ_B);
+			}
+			setcc_rdisp(code, CC_P, opts->gen.context_reg, zf_off(ZF_PV));
+			setcc_rdisp(code, CC_Z, opts->gen.context_reg, zf_off(ZF_Z));
+			setcc_rdisp(code, CC_S, opts->gen.context_reg, zf_off(ZF_S));
+		}
 		if (inst->addr_mode != Z80_UNUSED) {
-			dst = z80_save_result(dst, inst);
+			z80_save_result(opts, inst);
 			if (src_op.mode != MODE_UNUSED) {
-				dst = z80_save_reg(dst, inst, opts);
+				z80_save_reg(inst, opts);
 			}
 		} else {
-			dst = z80_save_reg(dst, inst, opts);
+			z80_save_reg(inst, opts);
 		}
 		break;
 	case Z80_RRC:
-		cycles = inst->immed == 0 ? 4 : (inst->addr_mode == Z80_IX_DISPLACE || inst->addr_mode == Z80_IY_DISPLACE ? 16 : 8);
-		dst = zcycles(dst, cycles);
+		num_cycles = inst->immed == 0 ? 4 : (inst->addr_mode == Z80_IX_DISPLACE || inst->addr_mode == Z80_IY_DISPLACE ? 16 : 8);
+		cycles(&opts->gen, num_cycles);
 		if (inst->addr_mode != Z80_UNUSED) {
-			dst = translate_z80_ea(inst, &dst_op, dst, opts, READ, MODIFY);
-			dst = translate_z80_reg(inst, &src_op, dst, opts); //For IX/IY variants that also write to a register
-			dst = zcycles(dst, 1);
+			translate_z80_ea(inst, &dst_op, opts, READ, MODIFY);
+			translate_z80_reg(inst, &src_op, opts); //For IX/IY variants that also write to a register
+			cycles(&opts->gen, 1);
 		} else {
 			src_op.mode = MODE_UNUSED;
-			dst = translate_z80_reg(inst, &dst_op, dst, opts);
+			translate_z80_reg(inst, &dst_op, opts);
+		}
+		if (dst_op.mode == MODE_REG_DIRECT) {
+			ror_ir(code, 1, dst_op.base, SZ_B);
+		} else {
+			ror_irdisp(code, 1, dst_op.base, dst_op.disp, SZ_B);
 		}
-		dst = ror_ir(dst, 1, dst_op.base, SZ_B);
-		if (src_op.mode != MODE_UNUSED) {
-			dst = mov_rr(dst, dst_op.base, src_op.base, SZ_B);
+		if (src_op.mode == MODE_REG_DIRECT) {
+			mov_rr(code, dst_op.base, src_op.base, SZ_B);
+		} else if(src_op.mode == MODE_REG_DISPLACE8) {
+			mov_rrdisp(code, dst_op.base, src_op.base, src_op.disp, SZ_B);
 		}
-		dst = setcc_rdisp8(dst, CC_C, CONTEXT, zf_off(ZF_C));
-		dst = mov_irdisp8(dst, 0, CONTEXT, zf_off(ZF_N), SZ_B);
+		setcc_rdisp(code, CC_C, opts->gen.context_reg, zf_off(ZF_C));
+		mov_irdisp(code, 0, opts->gen.context_reg, zf_off(ZF_N), SZ_B);
 		//TODO: Implement half-carry flag
-		dst = cmp_ir(dst, 0, dst_op.base, SZ_B);
-		dst = setcc_rdisp8(dst, CC_P, CONTEXT, zf_off(ZF_PV));
-		dst = setcc_rdisp8(dst, CC_Z, CONTEXT, zf_off(ZF_Z));
-		dst = setcc_rdisp8(dst, CC_S, CONTEXT, zf_off(ZF_S));
+		if (inst->immed) {
+			//rrca does not set these flags
+			if (dst_op.mode == MODE_REG_DIRECT) {
+				cmp_ir(code, 0, dst_op.base, SZ_B);
+			} else {
+				cmp_irdisp(code, 0, dst_op.base, dst_op.disp, SZ_B);
+			}
+			setcc_rdisp(code, CC_P, opts->gen.context_reg, zf_off(ZF_PV));
+			setcc_rdisp(code, CC_Z, opts->gen.context_reg, zf_off(ZF_Z));
+			setcc_rdisp(code, CC_S, opts->gen.context_reg, zf_off(ZF_S));
+		}
 		if (inst->addr_mode != Z80_UNUSED) {
-			dst = z80_save_result(dst, inst);
+			z80_save_result(opts, inst);
 			if (src_op.mode != MODE_UNUSED) {
-				dst = z80_save_reg(dst, inst, opts);
+				z80_save_reg(inst, opts);
 			}
 		} else {
-			dst = z80_save_reg(dst, inst, opts);
+			z80_save_reg(inst, opts);
 		}
 		break;
 	case Z80_RR:
-		cycles = inst->immed == 0 ? 4 : (inst->addr_mode == Z80_IX_DISPLACE || inst->addr_mode == Z80_IY_DISPLACE ? 16 : 8);
-		dst = zcycles(dst, cycles);
+		num_cycles = inst->immed == 0 ? 4 : (inst->addr_mode == Z80_IX_DISPLACE || inst->addr_mode == Z80_IY_DISPLACE ? 16 : 8);
+		cycles(&opts->gen, num_cycles);
 		if (inst->addr_mode != Z80_UNUSED) {
-			dst = translate_z80_ea(inst, &dst_op, dst, opts, READ, MODIFY);
-			dst = translate_z80_reg(inst, &src_op, dst, opts); //For IX/IY variants that also write to a register
-			dst = zcycles(dst, 1);
+			translate_z80_ea(inst, &dst_op, opts, READ, MODIFY);
+			translate_z80_reg(inst, &src_op, opts); //For IX/IY variants that also write to a register
+			cycles(&opts->gen, 1);
 		} else {
 			src_op.mode = MODE_UNUSED;
-			dst = translate_z80_reg(inst, &dst_op, dst, opts);
+			translate_z80_reg(inst, &dst_op, opts);
 		}
-		dst = bt_irdisp8(dst, 0, CONTEXT, zf_off(ZF_C), SZ_B);
-		dst = rcr_ir(dst, 1, dst_op.base, SZ_B);
-		if (src_op.mode != MODE_UNUSED) {
-			dst = mov_rr(dst, dst_op.base, src_op.base, SZ_B);
+		bt_irdisp(code, 0, opts->gen.context_reg, zf_off(ZF_C), SZ_B);
+		if (dst_op.mode == MODE_REG_DIRECT) {
+			rcr_ir(code, 1, dst_op.base, SZ_B);
+		} else {
+			rcr_irdisp(code, 1, dst_op.base, dst_op.disp, SZ_B);
+		}
+		if (src_op.mode == MODE_REG_DIRECT) {
+			mov_rr(code, dst_op.base, src_op.base, SZ_B);
+		} else if(src_op.mode == MODE_REG_DISPLACE8) {
+			mov_rrdisp(code, dst_op.base, src_op.base, src_op.disp, SZ_B);
 		}
-		dst = setcc_rdisp8(dst, CC_C, CONTEXT, zf_off(ZF_C));
-		dst = mov_irdisp8(dst, 0, CONTEXT, zf_off(ZF_N), SZ_B);
+		setcc_rdisp(code, CC_C, opts->gen.context_reg, zf_off(ZF_C));
+		mov_irdisp(code, 0, opts->gen.context_reg, zf_off(ZF_N), SZ_B);
 		//TODO: Implement half-carry flag
-		dst = cmp_ir(dst, 0, dst_op.base, SZ_B);
-		dst = setcc_rdisp8(dst, CC_P, CONTEXT, zf_off(ZF_PV));
-		dst = setcc_rdisp8(dst, CC_Z, CONTEXT, zf_off(ZF_Z));
-		dst = setcc_rdisp8(dst, CC_S, CONTEXT, zf_off(ZF_S));
+		if (inst->immed) {
+			//rra does not set these flags
+			if (dst_op.mode == MODE_REG_DIRECT) {
+				cmp_ir(code, 0, dst_op.base, SZ_B);
+			} else {
+				cmp_irdisp(code, 0, dst_op.base, dst_op.disp, SZ_B);
+			}
+			setcc_rdisp(code, CC_P, opts->gen.context_reg, zf_off(ZF_PV));
+			setcc_rdisp(code, CC_Z, opts->gen.context_reg, zf_off(ZF_Z));
+			setcc_rdisp(code, CC_S, opts->gen.context_reg, zf_off(ZF_S));
+		}
 		if (inst->addr_mode != Z80_UNUSED) {
-			dst = z80_save_result(dst, inst);
+			z80_save_result(opts, inst);
 			if (src_op.mode != MODE_UNUSED) {
-				dst = z80_save_reg(dst, inst, opts);
+				z80_save_reg(inst, opts);
 			}
 		} else {
-			dst = z80_save_reg(dst, inst, opts);
+			z80_save_reg(inst, opts);
 		}
 		break;
 	case Z80_SLA:
 	case Z80_SLL:
-		cycles = inst->addr_mode == Z80_IX_DISPLACE || inst->addr_mode == Z80_IY_DISPLACE ? 16 : 8;
-		dst = zcycles(dst, cycles);
+		num_cycles = inst->addr_mode == Z80_IX_DISPLACE || inst->addr_mode == Z80_IY_DISPLACE ? 16 : 8;
+		cycles(&opts->gen, num_cycles);
 		if (inst->addr_mode != Z80_UNUSED) {
-			dst = translate_z80_ea(inst, &dst_op, dst, opts, READ, MODIFY);
-			dst = translate_z80_reg(inst, &src_op, dst, opts); //For IX/IY variants that also write to a register
-			dst = zcycles(dst, 1);
+			translate_z80_ea(inst, &dst_op, opts, READ, MODIFY);
+			translate_z80_reg(inst, &src_op, opts); //For IX/IY variants that also write to a register
+			cycles(&opts->gen, 1);
 		} else {
 			src_op.mode = MODE_UNUSED;
-			dst = translate_z80_reg(inst, &dst_op, dst, opts);
+			translate_z80_reg(inst, &dst_op, opts);
 		}
-		dst = shl_ir(dst, 1, dst_op.base, SZ_B);
-		dst  = setcc_rdisp8(dst, CC_C, CONTEXT, zf_off(ZF_C));
+		if (dst_op.mode == MODE_REG_DIRECT) {
+			shl_ir(code, 1, dst_op.base, SZ_B);
+		} else {
+			shl_irdisp(code, 1, dst_op.base, dst_op.disp, SZ_B);
+		}
+		setcc_rdisp(code, CC_C, opts->gen.context_reg, zf_off(ZF_C));
 		if (inst->op == Z80_SLL) {
-			dst = or_ir(dst, 1, dst_op.base, SZ_B);
-		}
-		if (src_op.mode != MODE_UNUSED) {
-			dst = mov_rr(dst, dst_op.base, src_op.base, SZ_B);
+			if (dst_op.mode == MODE_REG_DIRECT) {
+				or_ir(code, 1, dst_op.base, SZ_B);
+			} else {
+				or_irdisp(code, 1, dst_op.base, dst_op.disp, SZ_B);
+			}
 		}
-		dst = mov_irdisp8(dst, 0, CONTEXT, zf_off(ZF_N), SZ_B);
+		if (src_op.mode == MODE_REG_DIRECT) {
+			mov_rr(code, dst_op.base, src_op.base, SZ_B);
+		} else if(src_op.mode == MODE_REG_DISPLACE8) {
+			mov_rrdisp(code, dst_op.base, src_op.base, src_op.disp, SZ_B);
+		}
+		mov_irdisp(code, 0, opts->gen.context_reg, zf_off(ZF_N), SZ_B);
 		//TODO: Implement half-carry flag
-		dst = cmp_ir(dst, 0, dst_op.base, SZ_B);
-		dst = setcc_rdisp8(dst, CC_P, CONTEXT, zf_off(ZF_PV));
-		dst = setcc_rdisp8(dst, CC_Z, CONTEXT, zf_off(ZF_Z));
-		dst = setcc_rdisp8(dst, CC_S, CONTEXT, zf_off(ZF_S));
+		if (dst_op.mode == MODE_REG_DIRECT) {
+			cmp_ir(code, 0, dst_op.base, SZ_B);
+		} else {
+			cmp_irdisp(code, 0, dst_op.base, dst_op.disp, SZ_B);
+		}
+		setcc_rdisp(code, CC_P, opts->gen.context_reg, zf_off(ZF_PV));
+		setcc_rdisp(code, CC_Z, opts->gen.context_reg, zf_off(ZF_Z));
+		setcc_rdisp(code, CC_S, opts->gen.context_reg, zf_off(ZF_S));
 		if (inst->addr_mode != Z80_UNUSED) {
-			dst = z80_save_result(dst, inst);
+			z80_save_result(opts, inst);
 			if (src_op.mode != MODE_UNUSED) {
-				dst = z80_save_reg(dst, inst, opts);
+				z80_save_reg(inst, opts);
 			}
 		} else {
-			dst = z80_save_reg(dst, inst, opts);
+			z80_save_reg(inst, opts);
 		}
 		break;
 	case Z80_SRA:
-		cycles = inst->addr_mode == Z80_IX_DISPLACE || inst->addr_mode == Z80_IY_DISPLACE ? 16 : 8;
-		dst = zcycles(dst, cycles);
+		num_cycles = inst->addr_mode == Z80_IX_DISPLACE || inst->addr_mode == Z80_IY_DISPLACE ? 16 : 8;
+		cycles(&opts->gen, num_cycles);
 		if (inst->addr_mode != Z80_UNUSED) {
-			dst = translate_z80_ea(inst, &dst_op, dst, opts, READ, MODIFY);
-			dst = translate_z80_reg(inst, &src_op, dst, opts); //For IX/IY variants that also write to a register
-			dst = zcycles(dst, 1);
+			translate_z80_ea(inst, &dst_op, opts, READ, MODIFY);
+			translate_z80_reg(inst, &src_op, opts); //For IX/IY variants that also write to a register
+			cycles(&opts->gen, 1);
 		} else {
 			src_op.mode = MODE_UNUSED;
-			dst = translate_z80_reg(inst, &dst_op, dst, opts);
+			translate_z80_reg(inst, &dst_op, opts);
+		}
+		if (dst_op.mode == MODE_REG_DIRECT) {
+			sar_ir(code, 1, dst_op.base, SZ_B);
+		} else {
+			sar_irdisp(code, 1, dst_op.base, dst_op.disp, SZ_B);
 		}
-		dst = sar_ir(dst, 1, dst_op.base, SZ_B);
-		if (src_op.mode != MODE_UNUSED) {
-			dst = mov_rr(dst, dst_op.base, src_op.base, SZ_B);
+		if (src_op.mode == MODE_REG_DIRECT) {
+			mov_rr(code, dst_op.base, src_op.base, SZ_B);
+		} else if(src_op.mode == MODE_REG_DISPLACE8) {
+			mov_rrdisp(code, dst_op.base, src_op.base, src_op.disp, SZ_B);
 		}
-		dst  = setcc_rdisp8(dst, CC_C, CONTEXT, zf_off(ZF_C));
-		dst = mov_irdisp8(dst, 0, CONTEXT, zf_off(ZF_N), SZ_B);
+		setcc_rdisp(code, CC_C, opts->gen.context_reg, zf_off(ZF_C));
+		mov_irdisp(code, 0, opts->gen.context_reg, zf_off(ZF_N), SZ_B);
 		//TODO: Implement half-carry flag
-		dst = cmp_ir(dst, 0, dst_op.base, SZ_B);
-		dst = setcc_rdisp8(dst, CC_P, CONTEXT, zf_off(ZF_PV));
-		dst = setcc_rdisp8(dst, CC_Z, CONTEXT, zf_off(ZF_Z));
-		dst = setcc_rdisp8(dst, CC_S, CONTEXT, zf_off(ZF_S));
+		if (dst_op.mode == MODE_REG_DIRECT) {
+			cmp_ir(code, 0, dst_op.base, SZ_B);
+		} else {
+			cmp_irdisp(code, 0, dst_op.base, dst_op.disp, SZ_B);
+		}
+		setcc_rdisp(code, CC_P, opts->gen.context_reg, zf_off(ZF_PV));
+		setcc_rdisp(code, CC_Z, opts->gen.context_reg, zf_off(ZF_Z));
+		setcc_rdisp(code, CC_S, opts->gen.context_reg, zf_off(ZF_S));
 		if (inst->addr_mode != Z80_UNUSED) {
-			dst = z80_save_result(dst, inst);
+			z80_save_result(opts, inst);
 			if (src_op.mode != MODE_UNUSED) {
-				dst = z80_save_reg(dst, inst, opts);
+				z80_save_reg(inst, opts);
 			}
 		} else {
-			dst = z80_save_reg(dst, inst, opts);
+			z80_save_reg(inst, opts);
 		}
 		break;
 	case Z80_SRL:
-		cycles = inst->addr_mode == Z80_IX_DISPLACE || inst->addr_mode == Z80_IY_DISPLACE ? 16 : 8;
-		dst = zcycles(dst, cycles);
+		num_cycles = inst->addr_mode == Z80_IX_DISPLACE || inst->addr_mode == Z80_IY_DISPLACE ? 16 : 8;
+		cycles(&opts->gen, num_cycles);
 		if (inst->addr_mode != Z80_UNUSED) {
-			dst = translate_z80_ea(inst, &dst_op, dst, opts, READ, MODIFY);
-			dst = translate_z80_reg(inst, &src_op, dst, opts); //For IX/IY variants that also write to a register
-			dst = zcycles(dst, 1);
+			translate_z80_ea(inst, &dst_op, opts, READ, MODIFY);
+			translate_z80_reg(inst, &src_op, opts); //For IX/IY variants that also write to a register
+			cycles(&opts->gen, 1);
 		} else {
 			src_op.mode = MODE_UNUSED;
-			dst = translate_z80_reg(inst, &dst_op, dst, opts);
+			translate_z80_reg(inst, &dst_op, opts);
+		}
+		if (dst_op.mode == MODE_REG_DIRECT) {
+			shr_ir(code, 1, dst_op.base, SZ_B);
+		} else {
+			shr_irdisp(code, 1, dst_op.base, dst_op.disp, SZ_B);
 		}
-		dst = shr_ir(dst, 1, dst_op.base, SZ_B);
-		if (src_op.mode != MODE_UNUSED) {
-			dst = mov_rr(dst, dst_op.base, src_op.base, SZ_B);
+		if (src_op.mode == MODE_REG_DIRECT) {
+			mov_rr(code, dst_op.base, src_op.base, SZ_B);
+		} else if(src_op.mode == MODE_REG_DISPLACE8) {
+			mov_rrdisp(code, dst_op.base, src_op.base, src_op.disp, SZ_B);
 		}
-		dst  = setcc_rdisp8(dst, CC_C, CONTEXT, zf_off(ZF_C));
-		dst = mov_irdisp8(dst, 0, CONTEXT, zf_off(ZF_N), SZ_B);
+		setcc_rdisp(code, CC_C, opts->gen.context_reg, zf_off(ZF_C));
+		mov_irdisp(code, 0, opts->gen.context_reg, zf_off(ZF_N), SZ_B);
 		//TODO: Implement half-carry flag
-		dst = cmp_ir(dst, 0, dst_op.base, SZ_B);
-		dst = setcc_rdisp8(dst, CC_P, CONTEXT, zf_off(ZF_PV));
-		dst = setcc_rdisp8(dst, CC_Z, CONTEXT, zf_off(ZF_Z));
-		dst = setcc_rdisp8(dst, CC_S, CONTEXT, zf_off(ZF_S));
+		if (dst_op.mode == MODE_REG_DIRECT) {
+			cmp_ir(code, 0, dst_op.base, SZ_B);
+		} else {
+			cmp_irdisp(code, 0, dst_op.base, dst_op.disp, SZ_B);
+		}
+		setcc_rdisp(code, CC_P, opts->gen.context_reg, zf_off(ZF_PV));
+		setcc_rdisp(code, CC_Z, opts->gen.context_reg, zf_off(ZF_Z));
+		setcc_rdisp(code, CC_S, opts->gen.context_reg, zf_off(ZF_S));
 		if (inst->addr_mode != Z80_UNUSED) {
-			dst = z80_save_result(dst, inst);
+			z80_save_result(opts, inst);
 			if (src_op.mode != MODE_UNUSED) {
-				dst = z80_save_reg(dst, inst, opts);
+				z80_save_reg(inst, opts);
 			}
 		} else {
-			dst = z80_save_reg(dst, inst, opts);
+			z80_save_reg(inst, opts);
 		}
 		break;
 	case Z80_RLD:
-		dst = zcycles(dst, 8);
-		dst = mov_rr(dst, opts->regs[Z80_HL], SCRATCH1, SZ_W);
-		dst = call(dst, (uint8_t *)z80_read_byte);
+		cycles(&opts->gen, 8);
+		zreg_to_native(opts, Z80_HL, opts->gen.scratch1);
+		call(code, opts->read_8);
 		//Before: (HL) = 0x12, A = 0x34
 		//After: (HL) = 0x24, A = 0x31
-		dst = mov_rr(dst, opts->regs[Z80_A], SCRATCH2, SZ_B);
-		dst = shl_ir(dst, 4, SCRATCH1, SZ_W);
-		dst = and_ir(dst, 0xF, SCRATCH2, SZ_W);
-		dst = and_ir(dst, 0xFFF, SCRATCH1, SZ_W);
-		dst = and_ir(dst, 0xF0, opts->regs[Z80_A], SZ_B);
-		dst = or_rr(dst, SCRATCH2, SCRATCH1, SZ_W);
-		//SCRATCH1 = 0x0124
-		dst = ror_ir(dst, 8, SCRATCH1, SZ_W);
-		dst = zcycles(dst, 4);
-		dst = or_rr(dst, SCRATCH1, opts->regs[Z80_A], SZ_B);
+		zreg_to_native(opts, Z80_A, opts->gen.scratch2);
+		shl_ir(code, 4, opts->gen.scratch1, SZ_W);
+		and_ir(code, 0xF, opts->gen.scratch2, SZ_W);
+		and_ir(code, 0xFFF, opts->gen.scratch1, SZ_W);
+		and_ir(code, 0xF0, opts->regs[Z80_A], SZ_B);
+		or_rr(code, opts->gen.scratch2, opts->gen.scratch1, SZ_W);
+		//opts->gen.scratch1 = 0x0124
+		ror_ir(code, 8, opts->gen.scratch1, SZ_W);
+		cycles(&opts->gen, 4);
+		or_rr(code, opts->gen.scratch1, opts->regs[Z80_A], SZ_B);
 		//set flags
 		//TODO: Implement half-carry flag
-		dst = mov_irdisp8(dst, 0, CONTEXT, zf_off(ZF_N), SZ_B);
-		dst = setcc_rdisp8(dst, CC_P, CONTEXT, zf_off(ZF_PV));
-		dst = setcc_rdisp8(dst, CC_Z, CONTEXT, zf_off(ZF_Z));
-		dst = setcc_rdisp8(dst, CC_S, CONTEXT, zf_off(ZF_S));
+		mov_irdisp(code, 0, opts->gen.context_reg, zf_off(ZF_N), SZ_B);
+		setcc_rdisp(code, CC_P, opts->gen.context_reg, zf_off(ZF_PV));
+		setcc_rdisp(code, CC_Z, opts->gen.context_reg, zf_off(ZF_Z));
+		setcc_rdisp(code, CC_S, opts->gen.context_reg, zf_off(ZF_S));
 
-		dst = mov_rr(dst, opts->regs[Z80_HL], SCRATCH2, SZ_W);
-		dst = ror_ir(dst, 8, SCRATCH1, SZ_W);
-		dst = call(dst, (uint8_t *)z80_write_byte);
+		zreg_to_native(opts, Z80_HL, opts->gen.scratch2);
+		ror_ir(code, 8, opts->gen.scratch1, SZ_W);
+		call(code, opts->write_8);
 		break;
 	case Z80_RRD:
-		dst = zcycles(dst, 8);
-		dst = mov_rr(dst, opts->regs[Z80_HL], SCRATCH1, SZ_W);
-		dst = call(dst, (uint8_t *)z80_read_byte);
+		cycles(&opts->gen, 8);
+		zreg_to_native(opts, Z80_HL, opts->gen.scratch1);
+		call(code, opts->read_8);
 		//Before: (HL) = 0x12, A = 0x34
 		//After: (HL) = 0x41, A = 0x32
-		dst = movzx_rr(dst, opts->regs[Z80_A], SCRATCH2, SZ_B, SZ_W);
-		dst = ror_ir(dst, 4, SCRATCH1, SZ_W);
-		dst = shl_ir(dst, 4, SCRATCH2, SZ_W);
-		dst = and_ir(dst, 0xF00F, SCRATCH1, SZ_W);
-		dst = and_ir(dst, 0xF0, opts->regs[Z80_A], SZ_B);
-		//SCRATCH1 = 0x2001
-		//SCRATCH2 = 0x0040
-		dst = or_rr(dst, SCRATCH2, SCRATCH1, SZ_W);
-		//SCRATCH1 = 0x2041
-		dst = ror_ir(dst, 8, SCRATCH1, SZ_W);
-		dst = zcycles(dst, 4);
-		dst = shr_ir(dst, 4, SCRATCH1, SZ_B);
-		dst = or_rr(dst, SCRATCH1, opts->regs[Z80_A], SZ_B);
+		zreg_to_native(opts, Z80_A, opts->gen.scratch2);
+		ror_ir(code, 4, opts->gen.scratch1, SZ_W);
+		shl_ir(code, 4, opts->gen.scratch2, SZ_W);
+		and_ir(code, 0xF00F, opts->gen.scratch1, SZ_W);
+		and_ir(code, 0xF0, opts->regs[Z80_A], SZ_B);
+		//opts->gen.scratch1 = 0x2001
+		//opts->gen.scratch2 = 0x0040
+		or_rr(code, opts->gen.scratch2, opts->gen.scratch1, SZ_W);
+		//opts->gen.scratch1 = 0x2041
+		ror_ir(code, 8, opts->gen.scratch1, SZ_W);
+		cycles(&opts->gen, 4);
+		shr_ir(code, 4, opts->gen.scratch1, SZ_B);
+		or_rr(code, opts->gen.scratch1, opts->regs[Z80_A], SZ_B);
 		//set flags
 		//TODO: Implement half-carry flag
-		dst = mov_irdisp8(dst, 0, CONTEXT, zf_off(ZF_N), SZ_B);
-		dst = setcc_rdisp8(dst, CC_P, CONTEXT, zf_off(ZF_PV));
-		dst = setcc_rdisp8(dst, CC_Z, CONTEXT, zf_off(ZF_Z));
-		dst = setcc_rdisp8(dst, CC_S, CONTEXT, zf_off(ZF_S));
+		mov_irdisp(code, 0, opts->gen.context_reg, zf_off(ZF_N), SZ_B);
+		setcc_rdisp(code, CC_P, opts->gen.context_reg, zf_off(ZF_PV));
+		setcc_rdisp(code, CC_Z, opts->gen.context_reg, zf_off(ZF_Z));
+		setcc_rdisp(code, CC_S, opts->gen.context_reg, zf_off(ZF_S));
 
-		dst = mov_rr(dst, opts->regs[Z80_HL], SCRATCH2, SZ_W);
-		dst = ror_ir(dst, 8, SCRATCH1, SZ_W);
-		dst = call(dst, (uint8_t *)z80_write_byte);
+		zreg_to_native(opts, Z80_HL, opts->gen.scratch2);
+		ror_ir(code, 8, opts->gen.scratch1, SZ_W);
+		call(code, opts->write_8);
 		break;
 	case Z80_BIT: {
-		cycles = (inst->addr_mode == Z80_IX_DISPLACE || inst->addr_mode == Z80_IY_DISPLACE) ? 8 : 16;
-		dst = zcycles(dst, cycles);
+		num_cycles = (inst->addr_mode == Z80_IX_DISPLACE || inst->addr_mode == Z80_IY_DISPLACE) ? 8 : 16;
+		cycles(&opts->gen, num_cycles);
 		uint8_t bit;
 		if ((inst->addr_mode & 0x1F) == Z80_REG && opts->regs[inst->ea_reg] >= AH && opts->regs[inst->ea_reg] <= BH) {
 			src_op.base = opts->regs[z80_word_reg(inst->ea_reg)];
+			src_op.mode = MODE_REG_DIRECT;
 			size = SZ_W;
 			bit = inst->immed + 8;
 		} else {
 			size = SZ_B;
 			bit = inst->immed;
-			dst = translate_z80_ea(inst, &src_op, dst, opts, READ, DONT_MODIFY);
+			translate_z80_ea(inst, &src_op, opts, READ, DONT_MODIFY);
 		}
 		if (inst->addr_mode != Z80_REG) {
 			//Reads normally take 3 cycles, but the read at the end of a bit instruction takes 4
-			dst = zcycles(dst, 1);
+			cycles(&opts->gen, 1);
+		}
+		if (src_op.mode == MODE_REG_DIRECT) {
+			bt_ir(code, bit, src_op.base, size);
+		} else {
+			bt_irdisp(code, bit, src_op.base, src_op.disp, size);
 		}
-		dst = bt_ir(dst, bit, src_op.base, size);
-		dst = setcc_rdisp8(dst, CC_NC, CONTEXT, zf_off(ZF_Z));
-		dst = setcc_rdisp8(dst, CC_NC, CONTEXT, zf_off(ZF_PV));
-		dst = mov_irdisp8(dst, 0, CONTEXT, zf_off(ZF_N), SZ_B);
+		setcc_rdisp(code, CC_NC, opts->gen.context_reg, zf_off(ZF_Z));
+		setcc_rdisp(code, CC_NC, opts->gen.context_reg, zf_off(ZF_PV));
+		mov_irdisp(code, 0, opts->gen.context_reg, zf_off(ZF_N), SZ_B);
 		if (inst->immed == 7) {
-			dst = cmp_ir(dst, 0, src_op.base, size);
-			dst = setcc_rdisp8(dst, CC_S, CONTEXT, zf_off(ZF_S));
+			if (src_op.mode == MODE_REG_DIRECT) {
+				cmp_ir(code, 0, src_op.base, size);
+			} else {
+				cmp_irdisp(code, 0, src_op.base, src_op.disp, size);
+			}
+			setcc_rdisp(code, CC_S, opts->gen.context_reg, zf_off(ZF_S));
 		} else {
-			dst = mov_irdisp8(dst, 0, CONTEXT, zf_off(ZF_S), SZ_B);
+			mov_irdisp(code, 0, opts->gen.context_reg, zf_off(ZF_S), SZ_B);
 		}
 		break;
 	}
 	case Z80_SET: {
-		cycles = (inst->addr_mode == Z80_IX_DISPLACE || inst->addr_mode == Z80_IY_DISPLACE) ? 8 : 16;
-		dst = zcycles(dst, cycles);
+		num_cycles = (inst->addr_mode == Z80_IX_DISPLACE || inst->addr_mode == Z80_IY_DISPLACE) ? 8 : 16;
+		cycles(&opts->gen, num_cycles);
 		uint8_t bit;
 		if ((inst->addr_mode & 0x1F) == Z80_REG && opts->regs[inst->ea_reg] >= AH && opts->regs[inst->ea_reg] <= BH) {
 			src_op.base = opts->regs[z80_word_reg(inst->ea_reg)];
+			src_op.mode = MODE_REG_DIRECT;
 			size = SZ_W;
 			bit = inst->immed + 8;
 		} else {
 			size = SZ_B;
 			bit = inst->immed;
-			dst = translate_z80_ea(inst, &src_op, dst, opts, READ, MODIFY);
+			translate_z80_ea(inst, &src_op, opts, READ, MODIFY);
 		}
 		if (inst->reg != Z80_USE_IMMED) {
-			dst = translate_z80_reg(inst, &dst_op, dst, opts);
+			translate_z80_reg(inst, &dst_op, opts);
 		}
 		if (inst->addr_mode != Z80_REG) {
 			//Reads normally take 3 cycles, but the read in the middle of a set instruction takes 4
-			dst = zcycles(dst, 1);
+			cycles(&opts->gen, 1);
 		}
-		dst = bts_ir(dst, bit, src_op.base, size);
+		if (src_op.mode == MODE_REG_DIRECT) {
+			bts_ir(code, bit, src_op.base, size);
+		} else {
+			bts_irdisp(code, bit, src_op.base, src_op.disp, size);
+		}
 		if (inst->reg != Z80_USE_IMMED) {
 			if (size == SZ_W) {
+#ifdef X86_64
 				if (dst_op.base >= R8) {
-					dst = ror_ir(dst, 8, src_op.base, SZ_W);
-					dst = mov_rr(dst, opts->regs[z80_low_reg(inst->ea_reg)], dst_op.base, SZ_B);
-					dst = ror_ir(dst, 8, src_op.base, SZ_W);
+					ror_ir(code, 8, src_op.base, SZ_W);
+					mov_rr(code, opts->regs[z80_low_reg(inst->ea_reg)], dst_op.base, SZ_B);
+					ror_ir(code, 8, src_op.base, SZ_W);
 				} else {
-					dst = mov_rr(dst, opts->regs[inst->ea_reg], dst_op.base, SZ_B);
+#endif
+					if (dst_op.mode == MODE_REG_DIRECT) {
+						zreg_to_native(opts, inst->ea_reg, dst_op.base);
+					} else {
+						zreg_to_native(opts, inst->ea_reg, opts->gen.scratch1);
+						mov_rrdisp(code, opts->gen.scratch1, dst_op.base, dst_op.disp, SZ_B);
+					}
+#ifdef X86_64
 				}
+#endif
 			} else {
-				dst = mov_rr(dst, src_op.base, dst_op.base, SZ_B);
+				if (dst_op.mode == MODE_REG_DIRECT) {
+					if (src_op.mode == MODE_REG_DIRECT) {
+						mov_rr(code, src_op.base, dst_op.base, SZ_B);
+					} else {
+						mov_rdispr(code, src_op.base, src_op.disp, dst_op.base, SZ_B);
+					}
+				} else if (src_op.mode == MODE_REG_DIRECT) {
+					mov_rrdisp(code, src_op.base, dst_op.base, dst_op.disp, SZ_B);
+				} else {
+					mov_rdispr(code, src_op.base, src_op.disp, opts->gen.scratch1, SZ_B);
+					mov_rrdisp(code, opts->gen.scratch1, dst_op.base, dst_op.disp, SZ_B);
+				}
 			}
 		}
 		if ((inst->addr_mode & 0x1F) != Z80_REG) {
-			dst = z80_save_result(dst, inst);
+			z80_save_result(opts, inst);
 			if (inst->reg != Z80_USE_IMMED) {
-				dst = z80_save_reg(dst, inst, opts);
+				z80_save_reg(inst, opts);
 			}
 		}
 		break;
 	}
 	case Z80_RES: {
-		cycles = (inst->addr_mode == Z80_IX_DISPLACE || inst->addr_mode == Z80_IY_DISPLACE) ? 8 : 16;
-		dst = zcycles(dst, cycles);
+		num_cycles = (inst->addr_mode == Z80_IX_DISPLACE || inst->addr_mode == Z80_IY_DISPLACE) ? 8 : 16;
+		cycles(&opts->gen, num_cycles);
 		uint8_t bit;
 		if ((inst->addr_mode & 0x1F) == Z80_REG && opts->regs[inst->ea_reg] >= AH && opts->regs[inst->ea_reg] <= BH) {
 			src_op.base = opts->regs[z80_word_reg(inst->ea_reg)];
+			src_op.mode = MODE_REG_DIRECT;
 			size = SZ_W;
 			bit = inst->immed + 8;
 		} else {
 			size = SZ_B;
 			bit = inst->immed;
-			dst = translate_z80_ea(inst, &src_op, dst, opts, READ, MODIFY);
+			translate_z80_ea(inst, &src_op, opts, READ, MODIFY);
 		}
 		if (inst->reg != Z80_USE_IMMED) {
-			dst = translate_z80_reg(inst, &dst_op, dst, opts);
+			translate_z80_reg(inst, &dst_op, opts);
 		}
 		if (inst->addr_mode != Z80_REG) {
 			//Reads normally take 3 cycles, but the read in the middle of a set instruction takes 4
-			dst = zcycles(dst, 1);
+			cycles(&opts->gen, 1);
 		}
-		dst = btr_ir(dst, bit, src_op.base, size);
+		if (src_op.mode == MODE_REG_DIRECT) {
+			btr_ir(code, bit, src_op.base, size);
+		} else {
+			btr_irdisp(code, bit, src_op.base, src_op.disp, size);
+		}
 		if (inst->reg != Z80_USE_IMMED) {
 			if (size == SZ_W) {
+#ifdef X86_64
 				if (dst_op.base >= R8) {
-					dst = ror_ir(dst, 8, src_op.base, SZ_W);
-					dst = mov_rr(dst, opts->regs[z80_low_reg(inst->ea_reg)], dst_op.base, SZ_B);
-					dst = ror_ir(dst, 8, src_op.base, SZ_W);
+					ror_ir(code, 8, src_op.base, SZ_W);
+					mov_rr(code, opts->regs[z80_low_reg(inst->ea_reg)], dst_op.base, SZ_B);
+					ror_ir(code, 8, src_op.base, SZ_W);
 				} else {
-					dst = mov_rr(dst, opts->regs[inst->ea_reg], dst_op.base, SZ_B);
+#endif
+					if (dst_op.mode == MODE_REG_DIRECT) {
+						zreg_to_native(opts, inst->ea_reg, dst_op.base);
+					} else {
+						zreg_to_native(opts, inst->ea_reg, opts->gen.scratch1);
+						mov_rrdisp(code, opts->gen.scratch1, dst_op.base, dst_op.disp, SZ_B);
+					}
+#ifdef X86_64
 				}
+#endif
 			} else {
-				dst = mov_rr(dst, src_op.base, dst_op.base, SZ_B);
+				if (dst_op.mode == MODE_REG_DIRECT) {
+					if (src_op.mode == MODE_REG_DIRECT) {
+						mov_rr(code, src_op.base, dst_op.base, SZ_B);
+					} else {
+						mov_rdispr(code, src_op.base, src_op.disp, dst_op.base, SZ_B);
+					}
+				} else if (src_op.mode == MODE_REG_DIRECT) {
+					mov_rrdisp(code, src_op.base, dst_op.base, dst_op.disp, SZ_B);
+				} else {
+					mov_rdispr(code, src_op.base, src_op.disp, opts->gen.scratch1, SZ_B);
+					mov_rrdisp(code, opts->gen.scratch1, dst_op.base, dst_op.disp, SZ_B);
+				}
 			}
 		}
 		if (inst->addr_mode != Z80_REG) {
-			dst = z80_save_result(dst, inst);
+			z80_save_result(opts, inst);
 			if (inst->reg != Z80_USE_IMMED) {
-				dst = z80_save_reg(dst, inst, opts);
+				z80_save_reg(inst, opts);
 			}
 		}
 		break;
 	}
 	case Z80_JP: {
-		cycles = 4;
+		num_cycles = 4;
 		if (inst->addr_mode != Z80_REG_INDIRECT) {
-			cycles += 6;
+			num_cycles += 6;
 		} else if(inst->ea_reg == Z80_IX || inst->ea_reg == Z80_IY) {
-			cycles += 4;
+			num_cycles += 4;
 		}
-		dst = zcycles(dst, cycles);
-		if (inst->addr_mode != Z80_REG_INDIRECT && inst->immed < 0x4000) {
-			uint8_t * call_dst = z80_get_native_address(context, inst->immed);
+		cycles(&opts->gen, num_cycles);
+		if (inst->addr_mode != Z80_REG_INDIRECT) {
+			code_ptr call_dst = z80_get_native_address(context, inst->immed);
 			if (!call_dst) {
-				opts->deferred = defer_address(opts->deferred, inst->immed, dst + 1);
+				opts->gen.deferred = defer_address(opts->gen.deferred, inst->immed, code->cur + 1);
 				//fake address to force large displacement
-				call_dst = dst + 256;
+				call_dst = code->cur + 256;
 			}
-			dst = jmp(dst, call_dst);
+			jmp(code, call_dst);
 		} else {
 			if (inst->addr_mode == Z80_REG_INDIRECT) {
-				dst = mov_rr(dst, opts->regs[inst->ea_reg], SCRATCH1, SZ_W);
+				zreg_to_native(opts, inst->ea_reg, opts->gen.scratch1);
 			} else {
-				dst = mov_ir(dst, inst->immed, SCRATCH1, SZ_W);
+				mov_ir(code, inst->immed, opts->gen.scratch1, SZ_W);
 			}
-			dst = call(dst, (uint8_t *)z80_native_addr);
-			dst = jmp_r(dst, SCRATCH1);
+			call(code, opts->native_addr);
+			jmp_r(code, opts->gen.scratch1);
 		}
 		break;
 	}
 	case Z80_JPCC: {
-		dst = zcycles(dst, 7);//T States: 4,3
+		cycles(&opts->gen, 7);//T States: 4,3
 		uint8_t cond = CC_Z;
 		switch (inst->reg)
 		{
 		case Z80_CC_NZ:
 			cond = CC_NZ;
 		case Z80_CC_Z:
-			dst = cmp_irdisp8(dst, 0, CONTEXT, zf_off(ZF_Z), SZ_B);
+			cmp_irdisp(code, 0, opts->gen.context_reg, zf_off(ZF_Z), SZ_B);
 			break;
 		case Z80_CC_NC:
 			cond = CC_NZ;
 		case Z80_CC_C:
-			dst = cmp_irdisp8(dst, 0, CONTEXT, zf_off(ZF_C), SZ_B);
+			cmp_irdisp(code, 0, opts->gen.context_reg, zf_off(ZF_C), SZ_B);
 			break;
 		case Z80_CC_PO:
 			cond = CC_NZ;
 		case Z80_CC_PE:
-			dst = cmp_irdisp8(dst, 0, CONTEXT, zf_off(ZF_PV), SZ_B);
+			cmp_irdisp(code, 0, opts->gen.context_reg, zf_off(ZF_PV), SZ_B);
 			break;
 		case Z80_CC_P:
 			cond = CC_NZ;
 		case Z80_CC_M:
-			dst = cmp_irdisp8(dst, 0, CONTEXT, zf_off(ZF_S), SZ_B);
+			cmp_irdisp(code, 0, opts->gen.context_reg, zf_off(ZF_S), SZ_B);
 			break;
 		}
-		uint8_t *no_jump_off = dst+1;
-		dst = jcc(dst, cond, dst+2);
-		dst = zcycles(dst, 5);//T States: 5
+		uint8_t *no_jump_off = code->cur+1;
+		jcc(code, cond, code->cur+2);
+		cycles(&opts->gen, 5);//T States: 5
 		uint16_t dest_addr = inst->immed;
-		if (dest_addr < 0x4000) {
-			uint8_t * call_dst = z80_get_native_address(context, dest_addr);
+		code_ptr call_dst = z80_get_native_address(context, dest_addr);
 			if (!call_dst) {
-				opts->deferred = defer_address(opts->deferred, dest_addr, dst + 1);
+			opts->gen.deferred = defer_address(opts->gen.deferred, dest_addr, code->cur + 1);
 				//fake address to force large displacement
-				call_dst = dst + 256;
+			call_dst = code->cur + 256;
 			}
-			dst = jmp(dst, call_dst);
-		} else {
-			dst = mov_ir(dst, dest_addr, SCRATCH1, SZ_W);
-			dst = call(dst, (uint8_t *)z80_native_addr);
-			dst = jmp_r(dst, SCRATCH1);
-		}
-		*no_jump_off = dst - (no_jump_off+1);
+		jmp(code, call_dst);
+		*no_jump_off = code->cur - (no_jump_off+1);
 		break;
 	}
 	case Z80_JR: {
-		dst = zcycles(dst, 12);//T States: 4,3,5
+		cycles(&opts->gen, 12);//T States: 4,3,5
 		uint16_t dest_addr = address + inst->immed + 2;
-		if (dest_addr < 0x4000) {
-			uint8_t * call_dst = z80_get_native_address(context, dest_addr);
+		code_ptr call_dst = z80_get_native_address(context, dest_addr);
 			if (!call_dst) {
-				opts->deferred = defer_address(opts->deferred, dest_addr, dst + 1);
+			opts->gen.deferred = defer_address(opts->gen.deferred, dest_addr, code->cur + 1);
 				//fake address to force large displacement
-				call_dst = dst + 256;
+			call_dst = code->cur + 256;
 			}
-			dst = jmp(dst, call_dst);
-		} else {
-			dst = mov_ir(dst, dest_addr, SCRATCH1, SZ_W);
-			dst = call(dst, (uint8_t *)z80_native_addr);
-			dst = jmp_r(dst, SCRATCH1);
-		}
+		jmp(code, call_dst);
 		break;
 	}
 	case Z80_JRCC: {
-		dst = zcycles(dst, 7);//T States: 4,3
+		cycles(&opts->gen, 7);//T States: 4,3
 		uint8_t cond = CC_Z;
 		switch (inst->reg)
 		{
 		case Z80_CC_NZ:
 			cond = CC_NZ;
 		case Z80_CC_Z:
-			dst = cmp_irdisp8(dst, 0, CONTEXT, zf_off(ZF_Z), SZ_B);
+			cmp_irdisp(code, 0, opts->gen.context_reg, zf_off(ZF_Z), SZ_B);
 			break;
 		case Z80_CC_NC:
 			cond = CC_NZ;
 		case Z80_CC_C:
-			dst = cmp_irdisp8(dst, 0, CONTEXT, zf_off(ZF_C), SZ_B);
+			cmp_irdisp(code, 0, opts->gen.context_reg, zf_off(ZF_C), SZ_B);
 			break;
 		}
-		uint8_t *no_jump_off = dst+1;
-		dst = jcc(dst, cond, dst+2);
-		dst = zcycles(dst, 5);//T States: 5
+		uint8_t *no_jump_off = code->cur+1;
+		jcc(code, cond, code->cur+2);
+		cycles(&opts->gen, 5);//T States: 5
 		uint16_t dest_addr = address + inst->immed + 2;
-		if (dest_addr < 0x4000) {
-			uint8_t * call_dst = z80_get_native_address(context, dest_addr);
+		code_ptr call_dst = z80_get_native_address(context, dest_addr);
 			if (!call_dst) {
-				opts->deferred = defer_address(opts->deferred, dest_addr, dst + 1);
+			opts->gen.deferred = defer_address(opts->gen.deferred, dest_addr, code->cur + 1);
 				//fake address to force large displacement
-				call_dst = dst + 256;
+			call_dst = code->cur + 256;
 			}
-			dst = jmp(dst, call_dst);
-		} else {
-			dst = mov_ir(dst, dest_addr, SCRATCH1, SZ_W);
-			dst = call(dst, (uint8_t *)z80_native_addr);
-			dst = jmp_r(dst, SCRATCH1);
-		}
-		*no_jump_off = dst - (no_jump_off+1);
+		jmp(code, call_dst);
+		*no_jump_off = code->cur - (no_jump_off+1);
 		break;
 	}
-	case Z80_DJNZ:
-		dst = zcycles(dst, 8);//T States: 5,3
-		dst = sub_ir(dst, 1, opts->regs[Z80_B], SZ_B);
-		uint8_t *no_jump_off = dst+1;
-		dst = jcc(dst, CC_Z, dst+2);
-		dst = zcycles(dst, 5);//T States: 5
-		uint16_t dest_addr = address + inst->immed + 2;
-		if (dest_addr < 0x4000) {
-			uint8_t * call_dst = z80_get_native_address(context, dest_addr);
-			if (!call_dst) {
-				opts->deferred = defer_address(opts->deferred, dest_addr, dst + 1);
-				//fake address to force large displacement
-				call_dst = dst + 256;
-			}
-			dst = jmp(dst, call_dst);
+	case Z80_DJNZ: {
+		cycles(&opts->gen, 8);//T States: 5,3
+		if (opts->regs[Z80_B] >= 0) {
+			sub_ir(code, 1, opts->regs[Z80_B], SZ_B);
 		} else {
-			dst = mov_ir(dst, dest_addr, SCRATCH1, SZ_W);
-			dst = call(dst, (uint8_t *)z80_native_addr);
-			dst = jmp_r(dst, SCRATCH1);
+			sub_irdisp(code, 1, opts->gen.context_reg, zr_off(Z80_B), SZ_B);
 		}
-		*no_jump_off = dst - (no_jump_off+1);
-		break;
-	case Z80_CALL: {
-		dst = zcycles(dst, 11);//T States: 4,3,4
-		dst = sub_ir(dst, 2, opts->regs[Z80_SP], SZ_W);
-		dst = mov_ir(dst, address + 3, SCRATCH1, SZ_W);
-		dst = mov_rr(dst, opts->regs[Z80_SP], SCRATCH2, SZ_W);
-		dst = call(dst, (uint8_t *)z80_write_word_highfirst);//T States: 3, 3
-		if (inst->immed < 0x4000) {
-			uint8_t * call_dst = z80_get_native_address(context, inst->immed);
+		uint8_t *no_jump_off = code->cur+1;
+		jcc(code, CC_Z, code->cur+2);
+		cycles(&opts->gen, 5);//T States: 5
+		uint16_t dest_addr = address + inst->immed + 2;
+		code_ptr call_dst = z80_get_native_address(context, dest_addr);
 			if (!call_dst) {
-				opts->deferred = defer_address(opts->deferred, inst->immed, dst + 1);
+			opts->gen.deferred = defer_address(opts->gen.deferred, dest_addr, code->cur + 1);
 				//fake address to force large displacement
-				call_dst = dst + 256;
+			call_dst = code->cur + 256;
 			}
-			dst = jmp(dst, call_dst);
-		} else {
-			dst = mov_ir(dst, inst->immed, SCRATCH1, SZ_W);
-			dst = call(dst, (uint8_t *)z80_native_addr);
-			dst = jmp_r(dst, SCRATCH1);
+		jmp(code, call_dst);
+		*no_jump_off = code->cur - (no_jump_off+1);
+		break;
 		}
+	case Z80_CALL: {
+		cycles(&opts->gen, 11);//T States: 4,3,4
+		sub_ir(code, 2, opts->regs[Z80_SP], SZ_W);
+		mov_ir(code, address + 3, opts->gen.scratch1, SZ_W);
+		mov_rr(code, opts->regs[Z80_SP], opts->gen.scratch2, SZ_W);
+		call(code, opts->write_16_highfirst);//T States: 3, 3
+		code_ptr call_dst = z80_get_native_address(context, inst->immed);
+			if (!call_dst) {
+			opts->gen.deferred = defer_address(opts->gen.deferred, inst->immed, code->cur + 1);
+				//fake address to force large displacement
+			call_dst = code->cur + 256;
+			}
+		jmp(code, call_dst);
 		break;
 	}
-	case Z80_CALLCC:
-		dst = zcycles(dst, 10);//T States: 4,3,3 (false case)
+	case Z80_CALLCC: {
+		cycles(&opts->gen, 10);//T States: 4,3,3 (false case)
 		uint8_t cond = CC_Z;
 		switch (inst->reg)
 		{
 		case Z80_CC_NZ:
 			cond = CC_NZ;
 		case Z80_CC_Z:
-			dst = cmp_irdisp8(dst, 0, CONTEXT, zf_off(ZF_Z), SZ_B);
+			cmp_irdisp(code, 0, opts->gen.context_reg, zf_off(ZF_Z), SZ_B);
 			break;
 		case Z80_CC_NC:
 			cond = CC_NZ;
 		case Z80_CC_C:
-			dst = cmp_irdisp8(dst, 0, CONTEXT, zf_off(ZF_C), SZ_B);
+			cmp_irdisp(code, 0, opts->gen.context_reg, zf_off(ZF_C), SZ_B);
 			break;
 		case Z80_CC_PO:
 			cond = CC_NZ;
 		case Z80_CC_PE:
-			dst = cmp_irdisp8(dst, 0, CONTEXT, zf_off(ZF_PV), SZ_B);
+			cmp_irdisp(code, 0, opts->gen.context_reg, zf_off(ZF_PV), SZ_B);
 			break;
 		case Z80_CC_P:
 			cond = CC_NZ;
 		case Z80_CC_M:
-			dst = cmp_irdisp8(dst, 0, CONTEXT, zf_off(ZF_S), SZ_B);
+			cmp_irdisp(code, 0, opts->gen.context_reg, zf_off(ZF_S), SZ_B);
 			break;
 		}
-		uint8_t *no_call_off = dst+1;
-		dst = jcc(dst, cond, dst+2);
-		dst = zcycles(dst, 1);//Last of the above T states takes an extra cycle in the true case
-		dst = sub_ir(dst, 2, opts->regs[Z80_SP], SZ_W);
-		dst = mov_ir(dst, address + 3, SCRATCH1, SZ_W);
-		dst = mov_rr(dst, opts->regs[Z80_SP], SCRATCH2, SZ_W);
-		dst = call(dst, (uint8_t *)z80_write_word_highfirst);//T States: 3, 3
-		if (inst->immed < 0x4000) {
-			uint8_t * call_dst = z80_get_native_address(context, inst->immed);
+		uint8_t *no_call_off = code->cur+1;
+		jcc(code, cond, code->cur+2);
+		cycles(&opts->gen, 1);//Last of the above T states takes an extra cycle in the true case
+		sub_ir(code, 2, opts->regs[Z80_SP], SZ_W);
+		mov_ir(code, address + 3, opts->gen.scratch1, SZ_W);
+		mov_rr(code, opts->regs[Z80_SP], opts->gen.scratch2, SZ_W);
+		call(code, opts->write_16_highfirst);//T States: 3, 3
+		code_ptr call_dst = z80_get_native_address(context, inst->immed);
 			if (!call_dst) {
-				opts->deferred = defer_address(opts->deferred, inst->immed, dst + 1);
+			opts->gen.deferred = defer_address(opts->gen.deferred, inst->immed, code->cur + 1);
 				//fake address to force large displacement
-				call_dst = dst + 256;
+			call_dst = code->cur + 256;
 			}
-			dst = jmp(dst, call_dst);
-		} else {
-			dst = mov_ir(dst, inst->immed, SCRATCH1, SZ_W);
-			dst = call(dst, (uint8_t *)z80_native_addr);
-			dst = jmp_r(dst, SCRATCH1);
-		}
-		*no_call_off = dst - (no_call_off+1);
+		jmp(code, call_dst);
+		*no_call_off = code->cur - (no_call_off+1);
 		break;
+		}
 	case Z80_RET:
-		dst = zcycles(dst, 4);//T States: 4
-		dst = mov_rr(dst, opts->regs[Z80_SP], SCRATCH1, SZ_W);
-		dst = call(dst, (uint8_t *)z80_read_word);//T STates: 3, 3
-		dst = add_ir(dst, 2, opts->regs[Z80_SP], SZ_W);
-		dst = call(dst, (uint8_t *)z80_native_addr);
-		dst = jmp_r(dst, SCRATCH1);
+		cycles(&opts->gen, 4);//T States: 4
+		mov_rr(code, opts->regs[Z80_SP], opts->gen.scratch1, SZ_W);
+		call(code, opts->read_16);//T STates: 3, 3
+		add_ir(code, 2, opts->regs[Z80_SP], SZ_W);
+		call(code, opts->native_addr);
+		jmp_r(code, opts->gen.scratch1);
 		break;
 	case Z80_RETCC: {
-		dst = zcycles(dst, 5);//T States: 5
+		cycles(&opts->gen, 5);//T States: 5
 		uint8_t cond = CC_Z;
 		switch (inst->reg)
 		{
 		case Z80_CC_NZ:
 			cond = CC_NZ;
 		case Z80_CC_Z:
-			dst = cmp_irdisp8(dst, 0, CONTEXT, zf_off(ZF_Z), SZ_B);
+			cmp_irdisp(code, 0, opts->gen.context_reg, zf_off(ZF_Z), SZ_B);
 			break;
 		case Z80_CC_NC:
 			cond = CC_NZ;
 		case Z80_CC_C:
-			dst = cmp_irdisp8(dst, 0, CONTEXT, zf_off(ZF_C), SZ_B);
+			cmp_irdisp(code, 0, opts->gen.context_reg, zf_off(ZF_C), SZ_B);
 			break;
 		case Z80_CC_PO:
 			cond = CC_NZ;
 		case Z80_CC_PE:
-			dst = cmp_irdisp8(dst, 0, CONTEXT, zf_off(ZF_PV), SZ_B);
+			cmp_irdisp(code, 0, opts->gen.context_reg, zf_off(ZF_PV), SZ_B);
 			break;
 		case Z80_CC_P:
 			cond = CC_NZ;
 		case Z80_CC_M:
-			dst = cmp_irdisp8(dst, 0, CONTEXT, zf_off(ZF_S), SZ_B);
+			cmp_irdisp(code, 0, opts->gen.context_reg, zf_off(ZF_S), SZ_B);
 			break;
 		}
-		uint8_t *no_call_off = dst+1;
-		dst = jcc(dst, cond, dst+2);
-		dst = mov_rr(dst, opts->regs[Z80_SP], SCRATCH1, SZ_W);
-		dst = call(dst, (uint8_t *)z80_read_word);//T STates: 3, 3
-		dst = add_ir(dst, 2, opts->regs[Z80_SP], SZ_W);
-		dst = call(dst, (uint8_t *)z80_native_addr);
-		dst = jmp_r(dst, SCRATCH1);
-		*no_call_off = dst - (no_call_off+1);
+		uint8_t *no_call_off = code->cur+1;
+		jcc(code, cond, code->cur+2);
+		mov_rr(code, opts->regs[Z80_SP], opts->gen.scratch1, SZ_W);
+		call(code, opts->read_16);//T STates: 3, 3
+		add_ir(code, 2, opts->regs[Z80_SP], SZ_W);
+		call(code, opts->native_addr);
+		jmp_r(code, opts->gen.scratch1);
+		*no_call_off = code->cur - (no_call_off+1);
 		break;
 	}
 	case Z80_RETI:
 		//For some systems, this may need a callback for signalling interrupt routine completion
-		dst = zcycles(dst, 8);//T States: 4, 4
-		dst = mov_rr(dst, opts->regs[Z80_SP], SCRATCH1, SZ_W);
-		dst = call(dst, (uint8_t *)z80_read_word);//T STates: 3, 3
-		dst = add_ir(dst, 2, opts->regs[Z80_SP], SZ_W);
-		dst = call(dst, (uint8_t *)z80_native_addr);
-		dst = jmp_r(dst, SCRATCH1);
+		cycles(&opts->gen, 8);//T States: 4, 4
+		mov_rr(code, opts->regs[Z80_SP], opts->gen.scratch1, SZ_W);
+		call(code, opts->read_16);//T STates: 3, 3
+		add_ir(code, 2, opts->regs[Z80_SP], SZ_W);
+		call(code, opts->native_addr);
+		jmp_r(code, opts->gen.scratch1);
 		break;
 	case Z80_RETN:
-		dst = zcycles(dst, 8);//T States: 4, 4
-		dst = mov_rdisp8r(dst, CONTEXT, offsetof(z80_context, iff2), SCRATCH2, SZ_B);
-		dst = mov_rr(dst, opts->regs[Z80_SP], SCRATCH1, SZ_W);
-		dst = mov_rrdisp8(dst, SCRATCH2, CONTEXT, offsetof(z80_context, iff1), SZ_B);
-		dst = call(dst, (uint8_t *)z80_read_word);//T STates: 3, 3
-		dst = add_ir(dst, 2, opts->regs[Z80_SP], SZ_W);
-		dst = call(dst, (uint8_t *)z80_native_addr);
-		dst = jmp_r(dst, SCRATCH1);
+		cycles(&opts->gen, 8);//T States: 4, 4
+		mov_rdispr(code, opts->gen.context_reg, offsetof(z80_context, iff2), opts->gen.scratch2, SZ_B);
+		mov_rr(code, opts->regs[Z80_SP], opts->gen.scratch1, SZ_W);
+		mov_rrdisp(code, opts->gen.scratch2, opts->gen.context_reg, offsetof(z80_context, iff1), SZ_B);
+		call(code, opts->read_16);//T STates: 3, 3
+		add_ir(code, 2, opts->regs[Z80_SP], SZ_W);
+		call(code, opts->native_addr);
+		jmp_r(code, opts->gen.scratch1);
 		break;
 	case Z80_RST: {
 		//RST is basically CALL to an address in page 0
-		dst = zcycles(dst, 5);//T States: 5
-		dst = sub_ir(dst, 2, opts->regs[Z80_SP], SZ_W);
-		dst = mov_ir(dst, address + 1, SCRATCH1, SZ_W);
-		dst = mov_rr(dst, opts->regs[Z80_SP], SCRATCH2, SZ_W);
-		dst = call(dst, (uint8_t *)z80_write_word_highfirst);//T States: 3, 3
-		uint8_t * call_dst = z80_get_native_address(context, inst->immed);
+		cycles(&opts->gen, 5);//T States: 5
+		sub_ir(code, 2, opts->regs[Z80_SP], SZ_W);
+		mov_ir(code, address + 1, opts->gen.scratch1, SZ_W);
+		mov_rr(code, opts->regs[Z80_SP], opts->gen.scratch2, SZ_W);
+		call(code, opts->write_16_highfirst);//T States: 3, 3
+		code_ptr call_dst = z80_get_native_address(context, inst->immed);
 		if (!call_dst) {
-			opts->deferred = defer_address(opts->deferred, inst->immed, dst + 1);
+			opts->gen.deferred = defer_address(opts->gen.deferred, inst->immed, code->cur + 1);
 			//fake address to force large displacement
-			call_dst = dst + 256;
+			call_dst = code->cur + 256;
 		}
-		dst = jmp(dst, call_dst);
+		jmp(code, call_dst);
 		break;
 	}
 	case Z80_IN:
-		dst = zcycles(dst, inst->reg == Z80_A ? 7 : 8);//T States: 4 3/4
+		cycles(&opts->gen, inst->reg == Z80_A ? 7 : 8);//T States: 4 3/4
 		if (inst->addr_mode == Z80_IMMED_INDIRECT) {
-			dst = mov_ir(dst, inst->immed, SCRATCH1, SZ_B);
+			mov_ir(code, inst->immed, opts->gen.scratch1, SZ_B);
 		} else {
-			dst = mov_rr(dst, opts->regs[Z80_C], SCRATCH1, SZ_B);
+			mov_rr(code, opts->regs[Z80_C], opts->gen.scratch1, SZ_B);
 		}
-		dst = call(dst, (uint8_t *)z80_io_read);
-		translate_z80_reg(inst, &dst_op, dst, opts);
-		dst = mov_rr(dst, SCRATCH1, dst_op.base, SZ_B);
-		dst = z80_save_reg(dst, inst, opts);
+		call(code, opts->read_io);
+		translate_z80_reg(inst, &dst_op, opts);
+		if (dst_op.mode == MODE_REG_DIRECT) {
+			mov_rr(code, opts->gen.scratch1, dst_op.base, SZ_B);
+		} else {
+			mov_rrdisp(code, opts->gen.scratch1, dst_op.base, dst_op.disp, SZ_B);
+		}
+		z80_save_reg(inst, opts);
 		break;
 	/*case Z80_INI:
 	case Z80_INIR:
 	case Z80_IND:
 	case Z80_INDR:*/
 	case Z80_OUT:
-		dst = zcycles(dst, inst->reg == Z80_A ? 7 : 8);//T States: 4 3/4
+		cycles(&opts->gen, inst->reg == Z80_A ? 7 : 8);//T States: 4 3/4
 		if ((inst->addr_mode & 0x1F) == Z80_IMMED_INDIRECT) {
-			dst = mov_ir(dst, inst->immed, SCRATCH2, SZ_B);
+			mov_ir(code, inst->immed, opts->gen.scratch2, SZ_B);
 		} else {
-			dst = mov_rr(dst, opts->regs[Z80_C], SCRATCH2, SZ_B);
+			zreg_to_native(opts, Z80_C, opts->gen.scratch2);
+			mov_rr(code, opts->regs[Z80_C], opts->gen.scratch2, SZ_B);
 		}
-		translate_z80_reg(inst, &src_op, dst, opts);
-		dst = mov_rr(dst, dst_op.base, SCRATCH1, SZ_B);
-		dst = call(dst, (uint8_t *)z80_io_write);
-		dst = z80_save_reg(dst, inst, opts);
+		translate_z80_reg(inst, &src_op, opts);
+		if (src_op.mode == MODE_REG_DIRECT) {
+			mov_rr(code, src_op.base, opts->gen.scratch1, SZ_B);
+		} else if (src_op.mode == MODE_IMMED) {
+			mov_ir(code, src_op.disp, opts->gen.scratch1, SZ_B);
+		} else {
+			mov_rdispr(code, src_op.base, src_op.disp, opts->gen.scratch1, SZ_B);
+		}
+		call(code, opts->write_io);
+		z80_save_reg(inst, opts);
 		break;
 	/*case Z80_OUTI:
 	case Z80_OTIR:
@@ -1652,21 +1946,75 @@
 		exit(1);
 	}
 	}
-	return dst;
 }
 
+uint8_t * z80_interp_handler(uint8_t opcode, z80_context * context)
+{
+	if (!context->interp_code[opcode]) {
+		if (opcode == 0xCB || (opcode >= 0xDD && opcode & 0xF == 0xD)) {
+			fprintf(stderr, "Encountered prefix byte %X at address %X. Z80 interpeter doesn't support those yet.", opcode, context->pc);
+			exit(1);
+		}
+		uint8_t codebuf[8];
+		memset(codebuf, 0, sizeof(codebuf));
+		codebuf[0] = opcode;
+		z80inst inst;
+		uint8_t * after = z80_decode(codebuf, &inst);
+		if (after - codebuf > 1) {
+			fprintf(stderr, "Encountered multi-byte Z80 instruction at %X. Z80 interpeter doesn't support those yet.", context->pc);
+			exit(1);
+		}
+
+		z80_options * opts = context->options;
+		code_info *code = &opts->gen.code;
+		check_alloc_code(code, ZMAX_NATIVE_SIZE);
+		context->interp_code[opcode] = code->cur;
+		translate_z80inst(&inst, context, 0, 1);
+		mov_rdispr(code, opts->gen.context_reg, offsetof(z80_context, pc), opts->gen.scratch1, SZ_W);
+		add_ir(code, after - codebuf, opts->gen.scratch1, SZ_W);
+		call(code, opts->native_addr);
+		jmp_r(code, opts->gen.scratch1);
+		z80_handle_deferred(context);
+	}
+	return context->interp_code[opcode];
+}
+
+code_info z80_make_interp_stub(z80_context * context, uint16_t address)
+{
+	z80_options *opts = context->options;
+	code_info * code = &opts->gen.code;
+	check_alloc_code(code, 32);
+	code_info stub = {code->cur, NULL};
+	//TODO: make this play well with the breakpoint code
+	mov_ir(code, address, opts->gen.scratch1, SZ_W);
+	call(code, opts->read_8);
+	//normal opcode fetch is already factored into instruction timing
+	//back out the base 3 cycles from a read here
+	//not quite perfect, but it will have to do for now
+	cycles(&opts->gen, -3);
+	check_cycles_int(&opts->gen, address);
+	call(code, opts->gen.save_context);
+	mov_irdisp(code, address, opts->gen.context_reg, offsetof(z80_context, pc), SZ_W);
+	push_r(code, opts->gen.context_reg);
+	call_args(code, (code_ptr)z80_interp_handler, 2, opts->gen.scratch1, opts->gen.context_reg);
+	mov_rr(code, RAX, opts->gen.scratch1, SZ_PTR);
+	pop_r(code, opts->gen.context_reg);
+	call(code, opts->gen.load_context);
+	jmp_r(code, opts->gen.scratch1);
+	stub.last = code->cur;
+	return stub;
+}
+
+
 uint8_t * z80_get_native_address(z80_context * context, uint32_t address)
 {
 	native_map_slot *map;
 	if (address < 0x4000) {
 		address &= 0x1FFF;
 		map = context->static_code_map;
-	} else if (address >= 0x8000) {
-		address &= 0x7FFF;
-		map = context->banked_code_map + context->bank_reg;
 	} else {
-		//dprintf("z80_get_native_address: %X NULL\n", address);
-		return NULL;
+		address -= 0x4000;
+		map = context->banked_code_map;
 	}
 	if (!map->base || !map->offsets || map->offsets[address] == INVALID_OFFSET || map->offsets[address] == EXTENSION_WORD) {
 		//dprintf("z80_get_native_address: %X NULL\n", address);
@@ -1676,34 +2024,34 @@
 	return map->base + map->offsets[address];
 }
 
-uint8_t z80_get_native_inst_size(x86_z80_options * opts, uint32_t address)
+uint8_t z80_get_native_inst_size(z80_options * opts, uint32_t address)
 {
+	//TODO: Fix for addresses >= 0x4000
 	if (address >= 0x4000) {
 		return 0;
 	}
-	return opts->ram_inst_sizes[address & 0x1FFF];
+	return opts->gen.ram_inst_sizes[0][address & 0x1FFF];
 }
 
 void z80_map_native_address(z80_context * context, uint32_t address, uint8_t * native_address, uint8_t size, uint8_t native_size)
 {
 	uint32_t orig_address = address;
 	native_map_slot *map;
-	x86_z80_options * opts = context->options;
+	z80_options * opts = context->options;
 	if (address < 0x4000) {
 		address &= 0x1FFF;
 		map = context->static_code_map;
-		opts->ram_inst_sizes[address] = native_size;
+		opts->gen.ram_inst_sizes[0][address] = native_size;
 		context->ram_code_flags[(address & 0x1C00) >> 10] |= 1 << ((address & 0x380) >> 7);
 		context->ram_code_flags[((address + size) & 0x1C00) >> 10] |= 1 << (((address + size) & 0x380) >> 7);
-	} else if (address >= 0x8000) {
-		address &= 0x7FFF;
-		map = context->banked_code_map + context->bank_reg;
+	} else {
+		//HERE
+		address -= 0x4000;
+		map = context->banked_code_map;
 		if (!map->offsets) {
-			map->offsets = malloc(sizeof(int32_t) * 0x8000);
-			memset(map->offsets, 0xFF, sizeof(int32_t) * 0x8000);
+			map->offsets = malloc(sizeof(int32_t) * 0xC000);
+			memset(map->offsets, 0xFF, sizeof(int32_t) * 0xC000);
 		}
-	} else {
-		return;
 	}
 	if (!map->base) {
 		map->base = native_address;
@@ -1714,15 +2062,13 @@
 		if (address < 0x4000) {
 			address &= 0x1FFF;
 			map = context->static_code_map;
-		} else if (address >= 0x8000) {
-			address &= 0x7FFF;
-			map = context->banked_code_map + context->bank_reg;
 		} else {
-			return;
+			address -= 0x4000;
+			map = context->banked_code_map;
 		}
 		if (!map->offsets) {
-			map->offsets = malloc(sizeof(int32_t) * 0x8000);
-			memset(map->offsets, 0xFF, sizeof(int32_t) * 0x8000);
+			map->offsets = malloc(sizeof(int32_t) * 0xC000);
+			memset(map->offsets, 0xFF, sizeof(int32_t) * 0xC000);
 		}
 		map->offsets[address] = EXTENSION_WORD;
 	}
@@ -1732,6 +2078,7 @@
 
 uint32_t z80_get_instruction_start(native_map_slot * static_code_map, uint32_t address)
 {
+	//TODO: Fixme for address >= 0x4000
 	if (!static_code_map->base || address >= 0x4000) {
 		return INVALID_INSTRUCTION_START;
 	}
@@ -1750,10 +2097,12 @@
 {
 	uint32_t inst_start = z80_get_instruction_start(context->static_code_map, address);
 	if (inst_start != INVALID_INSTRUCTION_START) {
-		uint8_t * dst = z80_get_native_address(context, inst_start);
-		dprintf("patching code at %p for Z80 instruction at %X due to write to %X\n", dst, inst_start, address);
-		dst = mov_ir(dst, inst_start, SCRATCH1, SZ_D);
-		dst = call(dst, (uint8_t *)z80_retrans_stub);
+		code_ptr dst = z80_get_native_address(context, inst_start);
+		code_info code = {dst, dst+16};
+		z80_options * opts = context->options;
+		dprintf("patching code at %p for Z80 instruction at %X due to write to %X\n", code.cur, inst_start, address);
+		mov_ir(&code, inst_start, opts->gen.scratch1, SZ_D);
+		call(&code, opts->retrans_stub);
 	}
 	return context;
 }
@@ -1773,23 +2122,21 @@
 
 void z80_handle_deferred(z80_context * context)
 {
-	x86_z80_options * opts = context->options;
-	process_deferred(&opts->deferred, context, (native_addr_func)z80_get_native_address);
-	if (opts->deferred) {
-		translate_z80_stream(context, opts->deferred->address);
+	z80_options * opts = context->options;
+	process_deferred(&opts->gen.deferred, context, (native_addr_func)z80_get_native_address);
+	if (opts->gen.deferred) {
+		translate_z80_stream(context, opts->gen.deferred->address);
 	}
 }
 
+extern void * z80_retranslate_inst(uint32_t address, z80_context * context, uint8_t * orig_start) asm("z80_retranslate_inst");
 void * z80_retranslate_inst(uint32_t address, z80_context * context, uint8_t * orig_start)
 {
 	char disbuf[80];
-	x86_z80_options * opts = context->options;
+	z80_options * opts = context->options;
 	uint8_t orig_size = z80_get_native_inst_size(opts, address);
-	uint32_t orig = address;
-	address &= 0x1FFF;
-	uint8_t * dst = opts->cur_code;
-	uint8_t * dst_end = opts->code_end;
-	uint8_t *after, *inst = context->mem_pointers[0] + address;
+	code_info *code = &opts->gen.code;
+	uint8_t *after, *inst = get_native_pointer(address, (void **)context->mem_pointers, &opts->gen);
 	z80inst instbuf;
 	dprintf("Retranslating code at Z80 address %X, native address %p\n", address, orig_start);
 	after = z80_decode(inst, &instbuf);
@@ -1802,19 +2149,16 @@
 	}
 	#endif
 	if (orig_size != ZMAX_NATIVE_SIZE) {
-		if (dst_end - dst < ZMAX_NATIVE_SIZE) {
-			size_t size = 1024*1024;
-			dst = alloc_code(&size);
-			opts->code_end = dst_end = dst + size;
-			opts->cur_code = dst;
-		}
-		deferred_addr * orig_deferred = opts->deferred;
-		uint8_t * native_end = translate_z80inst(&instbuf, dst, context, address);
+		check_alloc_code(code, ZMAX_NATIVE_SIZE);
+		code_ptr start = code->cur;
+		deferred_addr * orig_deferred = opts->gen.deferred;
+		translate_z80inst(&instbuf, context, address, 0);
+		/*
 		if ((native_end - dst) <= orig_size) {
 			uint8_t * native_next = z80_get_native_address(context, address + after-inst);
 			if (native_next && ((native_next == orig_start + orig_size) || (orig_size - (native_end - dst)) > 5)) {
-				remove_deferred_until(&opts->deferred, orig_deferred);
-				native_end = translate_z80inst(&instbuf, orig_start, context, address);
+				remove_deferred_until(&opts->gen.deferred, orig_deferred);
+				native_end = translate_z80inst(&instbuf, orig_start, context, address, 0);
 				if (native_next == orig_start + orig_size && (native_next-native_end) < 2) {
 					while (native_end < orig_start + orig_size) {
 						*(native_end++) = 0x90; //NOP
@@ -1825,19 +2169,27 @@
 				z80_handle_deferred(context);
 				return orig_start;
 			}
-		}
-		z80_map_native_address(context, address, dst, after-inst, ZMAX_NATIVE_SIZE);
-		opts->cur_code = dst+ZMAX_NATIVE_SIZE;
-		jmp(orig_start, dst);
+		}*/
+		z80_map_native_address(context, address, start, after-inst, ZMAX_NATIVE_SIZE);
+		code_info tmp_code = {orig_start, orig_start + 16};
+		jmp(&tmp_code, start);
+		tmp_code = *code;
+		code->cur = start + ZMAX_NATIVE_SIZE;
 		if (!z80_is_terminal(&instbuf)) {
-			jmp(native_end, z80_get_native_address_trans(context, address + after-inst));
+			jmp(&tmp_code, z80_get_native_address_trans(context, address + after-inst));
 		}
 		z80_handle_deferred(context);
-		return dst;
+		return start;
 	} else {
-		dst = translate_z80inst(&instbuf, orig_start, context, address);
+		code_info tmp_code = *code;
+		code->cur = orig_start;
+		code->last = orig_start + ZMAX_NATIVE_SIZE;
+		translate_z80inst(&instbuf, context, address, 0);
+		code_info tmp2 = *code;
+		*code = tmp_code;
 		if (!z80_is_terminal(&instbuf)) {
-			dst = jmp(dst, z80_get_native_address_trans(context, address + after-inst));
+
+			jmp(&tmp2, z80_get_native_address_trans(context, address + after-inst));
 		}
 		z80_handle_deferred(context);
 		return orig_start;
@@ -1850,41 +2202,28 @@
 	if (z80_get_native_address(context, address)) {
 		return;
 	}
-	x86_z80_options * opts = context->options;
+	z80_options * opts = context->options;
 	uint32_t start_address = address;
-	uint8_t * encoded = NULL, *next;
-	if (address < 0x4000) {
-		encoded = context->mem_pointers[0] + (address & 0x1FFF);
-	} else if(address >= 0x8000 && context->mem_pointers[1]) {
-		printf("attempt to translate Z80 code from banked area at address %X\n", address);
-		exit(1);
-		//encoded = context->mem_pointers[1] + (address & 0x7FFF);
-	}
-	while (encoded != NULL)
+
+	do
 	{
 		z80inst inst;
 		dprintf("translating Z80 code at address %X\n", address);
 		do {
-			if (opts->code_end-opts->cur_code < ZMAX_NATIVE_SIZE) {
-				if (opts->code_end-opts->cur_code < 5) {
-					puts("out of code memory, not enough space for jmp to next chunk");
-					exit(1);
-				}
-				size_t size = 1024*1024;
-				opts->cur_code = alloc_code(&size);
-				opts->code_end = opts->cur_code + size;
-				jmp(opts->cur_code, opts->cur_code);
-			}
-			if (address > 0x4000 && address < 0x8000) {
-				opts->cur_code = xor_rr(opts->cur_code, RDI, RDI, SZ_D);
-				opts->cur_code = call(opts->cur_code, (uint8_t *)exit);
+			uint8_t * existing = z80_get_native_address(context, address);
+			if (existing) {
+				jmp(&opts->gen.code, existing);
 				break;
 			}
-			uint8_t * existing = z80_get_native_address(context, address);
-			if (existing) {
-				opts->cur_code = jmp(opts->cur_code, existing);
+			uint8_t * encoded, *next;
+			encoded = get_native_pointer(address, (void **)context->mem_pointers, &opts->gen);
+			if (!encoded) {
+				code_info stub = z80_make_interp_stub(context, address);
+				z80_map_native_address(context, address, stub.cur, 1, stub.last - stub.cur);
 				break;
 			}
+			//make sure prologue is in a contiguous chunk of code
+			check_code_prologue(&opts->gen.code);
 			next = z80_decode(encoded, &inst);
 			#ifdef DO_DEBUG_PRINT
 			z80_disasm(&inst, disbuf, address);
@@ -1894,38 +2233,37 @@
 				printf("%X\t%s\n", address, disbuf);
 			}
 			#endif
-			uint8_t *after = translate_z80inst(&inst, opts->cur_code, context, address);
-			z80_map_native_address(context, address, opts->cur_code, next-encoded, after - opts->cur_code);
-			opts->cur_code = after;
+			code_ptr start = opts->gen.code.cur;
+			translate_z80inst(&inst, context, address, 0);
+			z80_map_native_address(context, address, start, next-encoded, opts->gen.code.cur - start);
 			address += next-encoded;
-			if (address > 0xFFFF) {
 				address &= 0xFFFF;
-
-			} else {
-				encoded = next;
-			}
 		} while (!z80_is_terminal(&inst));
-		process_deferred(&opts->deferred, context, (native_addr_func)z80_get_native_address);
-		if (opts->deferred) {
-			address = opts->deferred->address;
+		process_deferred(&opts->gen.deferred, context, (native_addr_func)z80_get_native_address);
+		if (opts->gen.deferred) {
+			address = opts->gen.deferred->address;
 			dprintf("defferred address: %X\n", address);
-			if (address < 0x4000) {
-				encoded = context->mem_pointers[0] + (address & 0x1FFF);
-			} else if (address > 0x8000 && context->mem_pointers[1]) {
-				encoded = context->mem_pointers[1] + (address  & 0x7FFF);
-			} else {
-				printf("attempt to translate non-memory address: %X\n", address);
-				exit(1);
 			}
-		} else {
-			encoded = NULL;
-		}
-	}
+	} while (opts->gen.deferred);
 }
 
-void init_x86_z80_opts(x86_z80_options * options)
+void init_z80_opts(z80_options * options, memmap_chunk const * chunks, uint32_t num_chunks, uint32_t clock_divider)
 {
+	memset(options, 0, sizeof(*options));
+
+	options->gen.memmap = chunks;
+	options->gen.memmap_chunks = num_chunks;
+	options->gen.address_size = SZ_W;
+	options->gen.address_mask = 0xFFFF;
+	options->gen.max_address = 0x10000;
+	options->gen.bus_cycles = 3;
+	options->gen.clock_divider = clock_divider;
+	options->gen.mem_ptr_off = offsetof(z80_context, mem_pointers);
+	options->gen.ram_flags_off = offsetof(z80_context, ram_code_flags);
+	options->gen.ram_flags_shift = 7;
+
 	options->flags = 0;
+#ifdef X86_64
 	options->regs[Z80_B] = BH;
 	options->regs[Z80_C] = RBX;
 	options->regs[Z80_D] = CH;
@@ -1946,90 +2284,471 @@
 	options->regs[Z80_AF] = -1;
 	options->regs[Z80_IX] = RDX;
 	options->regs[Z80_IY] = R8;
-	size_t size = 1024 * 1024;
-	options->cur_code = alloc_code(&size);
-	options->code_end = options->cur_code + size;
-	options->ram_inst_sizes = malloc(sizeof(uint8_t) * 0x2000);
-	memset(options->ram_inst_sizes, 0, sizeof(uint8_t) * 0x2000);
-	options->deferred = NULL;
+
+	options->gen.scratch1 = R13;
+	options->gen.scratch2 = R14;
+#else
+	memset(options->regs, -1, sizeof(options->regs));
+	options->regs[Z80_A] = RAX;
+	options->regs[Z80_SP] = RBX;
+
+	options->gen.scratch1 = RCX;
+	options->gen.scratch2 = RDX;
+#endif
+
+	options->gen.context_reg = RSI;
+	options->gen.cycles = RBP;
+	options->gen.limit = RDI;
+
+	options->gen.native_code_map = malloc(sizeof(native_map_slot));
+	memset(options->gen.native_code_map, 0, sizeof(native_map_slot));
+	options->gen.deferred = NULL;
+	options->gen.ram_inst_sizes = malloc(sizeof(uint8_t) * 0x2000 + sizeof(uint8_t *));
+	options->gen.ram_inst_sizes[0] = (uint8_t *)(options->gen.ram_inst_sizes + 1);
+	memset(options->gen.ram_inst_sizes[0], 0, sizeof(uint8_t) * 0x2000);
+
+	code_info *code = &options->gen.code;
+	init_code_info(code);
+
+	options->save_context_scratch = code->cur;
+	mov_rrdisp(code, options->gen.scratch1, options->gen.context_reg, offsetof(z80_context, scratch1), SZ_W);
+	mov_rrdisp(code, options->gen.scratch2, options->gen.context_reg, offsetof(z80_context, scratch2), SZ_W);
+
+	options->gen.save_context = code->cur;
+	for (int i = 0; i <= Z80_A; i++)
+	{
+		int reg;
+		uint8_t size;
+		if (i < Z80_I) {
+			reg = i /2 + Z80_BC + (i > Z80_H ? 2 : 0);
+			size = SZ_W;
+		} else {
+			reg = i;
+			size = SZ_B;
+}
+		if (options->regs[reg] >= 0) {
+			mov_rrdisp(code, options->regs[reg], options->gen.context_reg, offsetof(z80_context, regs) + i, size);
+		}
+		if (size == SZ_W) {
+			i++;
+		}
+	}
+	if (options->regs[Z80_SP] >= 0) {
+		mov_rrdisp(code, options->regs[Z80_SP], options->gen.context_reg, offsetof(z80_context, sp), SZ_W);
+	}
+	mov_rrdisp(code, options->gen.limit, options->gen.context_reg, offsetof(z80_context, target_cycle), SZ_D);
+	mov_rrdisp(code, options->gen.cycles, options->gen.context_reg, offsetof(z80_context, current_cycle), SZ_D);
+	retn(code);
+
+	options->load_context_scratch = code->cur;
+	mov_rdispr(code, options->gen.context_reg, offsetof(z80_context, scratch1), options->gen.scratch1, SZ_W);
+	mov_rdispr(code, options->gen.context_reg, offsetof(z80_context, scratch2), options->gen.scratch2, SZ_W);
+	options->gen.load_context = code->cur;
+	for (int i = 0; i <= Z80_A; i++)
+	{
+		int reg;
+		uint8_t size;
+		if (i < Z80_I) {
+			reg = i /2 + Z80_BC + (i > Z80_H ? 2 : 0);
+			size = SZ_W;
+		} else {
+			reg = i;
+			size = SZ_B;
+		}
+		if (options->regs[reg] >= 0) {
+			mov_rdispr(code, options->gen.context_reg, offsetof(z80_context, regs) + i, options->regs[reg], size);
+		}
+		if (size == SZ_W) {
+			i++;
+		}
+	}
+	if (options->regs[Z80_SP] >= 0) {
+		mov_rdispr(code, options->gen.context_reg, offsetof(z80_context, sp), options->regs[Z80_SP], SZ_W);
+	}
+	mov_rdispr(code, options->gen.context_reg, offsetof(z80_context, target_cycle), options->gen.limit, SZ_D);
+	mov_rdispr(code, options->gen.context_reg, offsetof(z80_context, current_cycle), options->gen.cycles, SZ_D);
+	retn(code);
+
+	options->native_addr = code->cur;
+	call(code, options->gen.save_context);
+	push_r(code, options->gen.context_reg);
+	movzx_rr(code, options->gen.scratch1, options->gen.scratch1, SZ_W, SZ_D);
+	call_args(code, (code_ptr)z80_get_native_address_trans, 2, options->gen.context_reg, options->gen.scratch1);
+	mov_rr(code, RAX, options->gen.scratch1, SZ_PTR);
+	pop_r(code, options->gen.context_reg);
+	call(code, options->gen.load_context);
+	retn(code);
+
+	options->gen.handle_cycle_limit = code->cur;
+	cmp_rdispr(code, options->gen.context_reg, offsetof(z80_context, sync_cycle), options->gen.cycles, SZ_D);
+	code_ptr no_sync = code->cur+1;
+	jcc(code, CC_B, no_sync);
+	mov_irdisp(code, 0, options->gen.context_reg, offsetof(z80_context, pc), SZ_W);
+	call(code, options->save_context_scratch);
+	pop_r(code, RAX); //return address in read/write func
+	pop_r(code, RBX); //return address in translated code
+	sub_ir(code, 5, RAX, SZ_PTR); //adjust return address to point to the call that got us here
+	mov_rrdisp(code, RBX, options->gen.context_reg, offsetof(z80_context, extra_pc), SZ_PTR);
+	mov_rrind(code, RAX, options->gen.context_reg, SZ_PTR);
+	restore_callee_save_regs(code);
+	*no_sync = code->cur - (no_sync + 1);
+	//return to caller of z80_run
+	retn(code);
+
+	options->gen.handle_code_write = (code_ptr)z80_handle_code_write;
+
+	options->read_8 = gen_mem_fun(&options->gen, chunks, num_chunks, READ_8, &options->read_8_noinc);
+	options->write_8 = gen_mem_fun(&options->gen, chunks, num_chunks, WRITE_8, &options->write_8_noinc);
+
+	options->gen.handle_cycle_limit_int = code->cur;
+	cmp_rdispr(code, options->gen.context_reg, offsetof(z80_context, int_cycle), options->gen.cycles, SZ_D);
+	code_ptr skip_int = code->cur+1;
+	jcc(code, CC_B, skip_int);
+	//set limit to the cycle limit
+	mov_rdispr(code, options->gen.context_reg, offsetof(z80_context, sync_cycle), options->gen.limit, SZ_D);
+	//disable interrupts
+	mov_irdisp(code, 0, options->gen.context_reg, offsetof(z80_context, iff1), SZ_B);
+	mov_irdisp(code, 0, options->gen.context_reg, offsetof(z80_context, iff2), SZ_B);
+	cycles(&options->gen, 7);
+	//save return address (in scratch1) to Z80 stack
+	sub_ir(code, 2, options->regs[Z80_SP], SZ_W);
+	mov_rr(code, options->regs[Z80_SP], options->gen.scratch2, SZ_W);
+	//we need to do check_cycles and cycles outside of the write_8 call
+	//so that the stack has the correct depth if we need to return to C
+	//for a synchronization
+	check_cycles(&options->gen);
+	cycles(&options->gen, 3);
+	//save word to write before call to write_8_noinc
+	push_r(code, options->gen.scratch1);
+	call(code, options->write_8_noinc);
+	//restore word to write
+	pop_r(code, options->gen.scratch1);
+	//write high byte to SP+1
+	mov_rr(code, options->regs[Z80_SP], options->gen.scratch2, SZ_W);
+	add_ir(code, 1, options->gen.scratch2, SZ_W);
+	shr_ir(code, 8, options->gen.scratch1, SZ_W);
+	check_cycles(&options->gen);
+	cycles(&options->gen, 3);
+	call(code, options->write_8_noinc);
+	//dispose of return address as we'll be jumping somewhere else
+	pop_r(code, options->gen.scratch2);
+	//TODO: Support interrupt mode 0 and 2
+	mov_ir(code, 0x38, options->gen.scratch1, SZ_W);
+	call(code, options->native_addr);
+	mov_rrind(code, options->gen.scratch1, options->gen.context_reg, SZ_PTR);
+	restore_callee_save_regs(code);
+	//return to caller of z80_run to sync
+	retn(code);
+	*skip_int = code->cur - (skip_int+1);
+	cmp_rdispr(code, options->gen.context_reg, offsetof(z80_context, sync_cycle), options->gen.cycles, SZ_D);
+	code_ptr skip_sync = code->cur + 1;
+	jcc(code, CC_B, skip_sync);
+	//save PC
+	mov_rrdisp(code, options->gen.scratch1, options->gen.context_reg, offsetof(z80_context, pc), SZ_D);
+	options->do_sync = code->cur;
+	call(code, options->gen.save_context);
+	pop_rind(code, options->gen.context_reg);
+	//restore callee saved registers
+	restore_callee_save_regs(code);
+	//return to caller of z80_run
+	*skip_sync = code->cur - (skip_sync+1);
+	retn(code);
+
+	options->read_io = code->cur;
+	check_cycles(&options->gen);
+	cycles(&options->gen, 4);
+	//Genesis has no IO hardware and always returns FF
+	//eventually this should use a second memory map array
+	mov_ir(code, 0xFF, options->gen.scratch1, SZ_B);
+	retn(code);
+
+	options->write_io = code->cur;
+	check_cycles(&options->gen);
+	cycles(&options->gen, 4);
+	retn(code);
+
+	options->read_16 = code->cur;
+	cycles(&options->gen, 3);
+	check_cycles(&options->gen);
+	//TODO: figure out how to handle the extra wait state for word reads to bank area
+	//may also need special handling to avoid too much stack depth when access is blocked
+	push_r(code, options->gen.scratch1);
+	call(code, options->read_8_noinc);
+	mov_rr(code, options->gen.scratch1, options->gen.scratch2, SZ_B);
+#ifndef X86_64
+	//scratch 2 is a caller save register in 32-bit builds and may be clobbered by something called from the read8 fun
+	mov_rrdisp(code, options->gen.scratch1, options->gen.context_reg, offsetof(z80_context, scratch2), SZ_B);
+#endif
+	pop_r(code, options->gen.scratch1);
+	add_ir(code, 1, options->gen.scratch1, SZ_W);
+	cycles(&options->gen, 3);
+	check_cycles(&options->gen);
+	call(code, options->read_8_noinc);
+	shl_ir(code, 8, options->gen.scratch1, SZ_W);
+#ifdef X86_64
+	mov_rr(code, options->gen.scratch2, options->gen.scratch1, SZ_B);
+#else
+	mov_rdispr(code, options->gen.context_reg, offsetof(z80_context, scratch2), options->gen.scratch1, SZ_B);
+#endif
+	retn(code);
+
+	options->write_16_highfirst = code->cur;
+	cycles(&options->gen, 3);
+	check_cycles(&options->gen);
+	push_r(code, options->gen.scratch2);
+	push_r(code, options->gen.scratch1);
+	add_ir(code, 1, options->gen.scratch2, SZ_W);
+	shr_ir(code, 8, options->gen.scratch1, SZ_W);
+	call(code, options->write_8_noinc);
+	pop_r(code, options->gen.scratch1);
+	pop_r(code, options->gen.scratch2);
+	cycles(&options->gen, 3);
+	check_cycles(&options->gen);
+	//TODO: Check if we can get away with TCO here
+	call(code, options->write_8_noinc);
+	retn(code);
+
+	options->write_16_lowfirst = code->cur;
+	cycles(&options->gen, 3);
+	check_cycles(&options->gen);
+	push_r(code, options->gen.scratch2);
+	push_r(code, options->gen.scratch1);
+	call(code, options->write_8_noinc);
+	pop_r(code, options->gen.scratch1);
+	pop_r(code, options->gen.scratch2);
+	add_ir(code, 1, options->gen.scratch2, SZ_W);
+	shr_ir(code, 8, options->gen.scratch1, SZ_W);
+	cycles(&options->gen, 3);
+	check_cycles(&options->gen);
+	//TODO: Check if we can get away with TCO here
+	call(code, options->write_8_noinc);
+	retn(code);
+
+	options->retrans_stub = code->cur;
+	//pop return address
+	pop_r(code, options->gen.scratch2);
+	call(code, options->gen.save_context);
+	//adjust pointer before move and call instructions that got us here
+	sub_ir(code, options->gen.scratch1 >= R8 ? 11 : 10, options->gen.scratch2, SZ_PTR);
+	push_r(code, options->gen.context_reg);
+	call_args(code, (code_ptr)z80_retranslate_inst, 3, options->gen.scratch1, options->gen.context_reg, options->gen.scratch2);
+	pop_r(code, options->gen.context_reg);
+	mov_rr(code, RAX, options->gen.scratch1, SZ_PTR);
+	call(code, options->gen.load_context);
+	jmp_r(code, options->gen.scratch1);
+
+	options->run = (z80_run_fun)code->cur;
+	save_callee_save_regs(code);
+#ifdef X86_64
+	mov_rr(code, RDI, options->gen.context_reg, SZ_PTR);
+#else
+	mov_rdispr(code, RSP, 5 * sizeof(int32_t), options->gen.context_reg, SZ_PTR);
+#endif
+	call(code, options->load_context_scratch);
+	cmp_irdisp(code, 0, options->gen.context_reg, offsetof(z80_context, extra_pc), SZ_PTR);
+	code_ptr no_extra = code->cur+1;
+	jcc(code, CC_Z, no_extra);
+	push_rdisp(code, options->gen.context_reg, offsetof(z80_context, extra_pc));
+	mov_irdisp(code, 0, options->gen.context_reg, offsetof(z80_context, extra_pc), SZ_PTR);
+	*no_extra = code->cur - (no_extra + 1);
+	jmp_rind(code, options->gen.context_reg);
 }
 
-void init_z80_context(z80_context * context, x86_z80_options * options)
+void init_z80_context(z80_context * context, z80_options * options)
 {
 	memset(context, 0, sizeof(*context));
 	context->static_code_map = malloc(sizeof(*context->static_code_map));
 	context->static_code_map->base = NULL;
 	context->static_code_map->offsets = malloc(sizeof(int32_t) * 0x2000);
 	memset(context->static_code_map->offsets, 0xFF, sizeof(int32_t) * 0x2000);
-	context->banked_code_map = malloc(sizeof(native_map_slot) * (1 << 9));
-	memset(context->banked_code_map, 0, sizeof(native_map_slot) * (1 << 9));
+	context->banked_code_map = malloc(sizeof(native_map_slot));
+	memset(context->banked_code_map, 0, sizeof(native_map_slot));
 	context->options = options;
+	context->int_cycle = CYCLE_NEVER;
+	context->int_pulse_start = CYCLE_NEVER;
+	context->int_pulse_end = CYCLE_NEVER;
+}
+
+void z80_run(z80_context * context, uint32_t target_cycle)
+{
+	if (context->reset || context->busack) {
+		context->current_cycle = target_cycle;
+	} else {
+		if (context->current_cycle < target_cycle) {
+			//busreq is sampled at the end of an m-cycle
+			//we can approximate that by running for a single m-cycle after a bus request
+			context->sync_cycle = context->busreq ? context->current_cycle + 3*context->options->gen.clock_divider : target_cycle;
+			if (!context->native_pc) {
+				context->native_pc = z80_get_native_address_trans(context, context->pc);
+			}
+			while (context->current_cycle < context->sync_cycle)
+			{
+				if (context->int_pulse_end < context->current_cycle || context->int_pulse_end == CYCLE_NEVER) {
+					z80_next_int_pulse(context);
+				}
+				if (context->iff1) {
+					context->int_cycle = context->int_pulse_start < context->int_enable_cycle ? context->int_enable_cycle : context->int_pulse_start;
+				} else {
+					context->int_cycle = CYCLE_NEVER;
+				}
+				context->target_cycle = context->sync_cycle < context->int_cycle ? context->sync_cycle : context->int_cycle;
+				dprintf("Running Z80 from cycle %d to cycle %d. Int cycle: %d (%d - %d)\n", context->current_cycle, context->sync_cycle, context->int_cycle, context->int_pulse_start, context->int_pulse_end);
+				context->options->run(context);
+				dprintf("Z80 ran to cycle %d\n", context->current_cycle);
+			}
+			if (context->busreq) {
+				context->busack = 1;
+				context->current_cycle = target_cycle;
+			}
+		}
+	}
+}
+
+void z80_assert_reset(z80_context * context, uint32_t cycle)
+{
+	z80_run(context, cycle);
+	context->reset = 1;
+}
+
+void z80_clear_reset(z80_context * context, uint32_t cycle)
+{
+	z80_run(context, cycle);
+	if (context->reset) {
+		//TODO: Handle case where reset is not asserted long enough
+		context->im = 0;
+		context->iff1 = context->iff2 = 0;
+		context->native_pc = NULL;
+		context->extra_pc = NULL;
+		context->pc = 0;
+		context->reset = 0;
+		if (context->busreq) {
+			//TODO: Figure out appropriate delay
+			context->busack = 1;
+		}
+	}
+}
+
+void z80_assert_busreq(z80_context * context, uint32_t cycle)
+{
+	z80_run(context, cycle);
+	context->busreq = 1;
+}
+
+void z80_clear_busreq(z80_context * context, uint32_t cycle)
+{
+	z80_run(context, cycle);
+	context->busreq = 0;
+	context->busack = 0;
 }
 
-void z80_reset(z80_context * context)
+uint8_t z80_get_busack(z80_context * context, uint32_t cycle)
+{
+	z80_run(context, cycle);
+	return context->busack;
+}
+
+void z80_adjust_cycles(z80_context * context, uint32_t deduction)
+{
+	if (context->current_cycle < deduction) {
+		fprintf(stderr, "WARNING: Deduction of %u cycles when Z80 cycle counter is only %u\n", deduction, context->current_cycle);
+		context->current_cycle = 0;
+	} else {
+		context->current_cycle -= deduction;
+	}
+	if (context->int_enable_cycle != CYCLE_NEVER) {
+		if (context->int_enable_cycle < deduction) {
+			context->int_enable_cycle = 0;
+		} else {
+			context->int_enable_cycle -= deduction;
+		}
+	}
+	if (context->int_pulse_start != CYCLE_NEVER) {
+		if (context->int_pulse_end < deduction) {
+			context->int_pulse_start = context->int_pulse_end = CYCLE_NEVER;
+		} else {
+			context->int_pulse_end -= deduction;
+			if (context->int_pulse_start < deduction) {
+				context->int_pulse_start = 0;
+			} else {
+				context->int_pulse_start -= deduction;
+			}
+		}
+	}
+}
+
+uint32_t zbreakpoint_patch(z80_context * context, uint16_t address, code_ptr dst)
 {
-	context->im = 0;
-	context->iff1 = context->iff2 = 0;
-	context->native_pc = z80_get_native_address_trans(context, 0);
-	context->extra_pc = NULL;
+	code_info code = {dst, dst+16};
+	mov_ir(&code, address, context->options->gen.scratch1, SZ_W);
+	call(&code, context->bp_stub);
+	return code.cur-dst;
+}
+
+void zcreate_stub(z80_context * context)
+{
+	z80_options * opts = context->options;
+	code_info *code = &opts->gen.code;
+	check_code_prologue(code);
+	context->bp_stub = code->cur;
+
+		//Calculate length of prologue
+	check_cycles_int(&opts->gen, 0);
+	int check_int_size = code->cur-context->bp_stub;
+	code->cur = context->bp_stub;
+
+	//Calculate length of patch
+	int patch_size = zbreakpoint_patch(context, 0, code->cur);
+
+		//Save context and call breakpoint handler
+	call(code, opts->gen.save_context);
+	push_r(code, opts->gen.scratch1);
+	call_args_abi(code, context->bp_handler, 2, opts->gen.context_reg, opts->gen.scratch1);
+	mov_rr(code, RAX, opts->gen.context_reg, SZ_PTR);
+		//Restore context
+	call(code, opts->gen.load_context);
+	pop_r(code, opts->gen.scratch1);
+		//do prologue stuff
+	cmp_rr(code, opts->gen.cycles, opts->gen.limit, SZ_D);
+	uint8_t * jmp_off = code->cur+1;
+	jcc(code, CC_NC, code->cur + 7);
+	pop_r(code, opts->gen.scratch1);
+	add_ir(code, check_int_size - patch_size, opts->gen.scratch1, SZ_PTR);
+	push_r(code, opts->gen.scratch1);
+	jmp(code, opts->gen.handle_cycle_limit_int);
+	*jmp_off = code->cur - (jmp_off+1);
+		//jump back to body of translated instruction
+	pop_r(code, opts->gen.scratch1);
+	add_ir(code, check_int_size - patch_size, opts->gen.scratch1, SZ_PTR);
+	jmp_r(code, opts->gen.scratch1);
 }
 
 void zinsert_breakpoint(z80_context * context, uint16_t address, uint8_t * bp_handler)
 {
-	static uint8_t * bp_stub = NULL;
-	uint8_t * native = z80_get_native_address_trans(context, address);
-	uint8_t * start_native = native;
-	native = mov_ir(native, address, SCRATCH1, SZ_W);
-	if (!bp_stub) {
-		x86_z80_options * opts = context->options;
-		uint8_t * dst = opts->cur_code;
-		uint8_t * dst_end = opts->code_end;
-		if (dst_end - dst < 128) {
-			size_t size = 1024*1024;
-			dst = alloc_code(&size);
-			opts->code_end = dst_end = dst + size;
+	context->bp_handler = bp_handler;
+	uint8_t bit = 1 << (address % sizeof(uint8_t));
+	if (!(bit & context->breakpoint_flags[address / sizeof(uint8_t)])) {
+		context->breakpoint_flags[address / sizeof(uint8_t)] |= bit;
+		if (!context->bp_stub) {
+			zcreate_stub(context);
 		}
-		bp_stub = dst;
-		native = call(native, bp_stub);
-
-		//Calculate length of prologue
-		dst = z80_check_cycles_int(dst, address);
-		int check_int_size = dst-bp_stub;
-		dst = bp_stub;
-
-		//Save context and call breakpoint handler
-		dst = call(dst, (uint8_t *)z80_save_context);
-		dst = push_r(dst, SCRATCH1);
-		dst = mov_rr(dst, CONTEXT, RDI, SZ_Q);
-		dst = mov_rr(dst, SCRATCH1, RSI, SZ_W);
-		dst = call(dst, bp_handler);
-		dst = mov_rr(dst, RAX, CONTEXT, SZ_Q);
-		//Restore context
-		dst = call(dst, (uint8_t *)z80_load_context);
-		dst = pop_r(dst, SCRATCH1);
-		//do prologue stuff
-		dst = cmp_rr(dst, ZCYCLES, ZLIMIT, SZ_D);
-		uint8_t * jmp_off = dst+1;
-		dst = jcc(dst, CC_NC, dst + 7);
-		dst = pop_r(dst, SCRATCH1);
-		dst = add_ir(dst, check_int_size - (native-start_native), SCRATCH1, SZ_Q);
-		dst = push_r(dst, SCRATCH1);
-		dst = jmp(dst, (uint8_t *)z80_handle_cycle_limit_int);
-		*jmp_off = dst - (jmp_off+1);
-		//jump back to body of translated instruction
-		dst = pop_r(dst, SCRATCH1);
-		dst = add_ir(dst, check_int_size - (native-start_native), SCRATCH1, SZ_Q);
-		dst = jmp_r(dst, SCRATCH1);
-		opts->cur_code = dst;
-	} else {
-		native = call(native, bp_stub);
+		uint8_t * native = z80_get_native_address(context, address);
+		if (native) {
+			zbreakpoint_patch(context, address, native);
+		}
 	}
 }
 
 void zremove_breakpoint(z80_context * context, uint16_t address)
 {
+	context->breakpoint_flags[address / sizeof(uint8_t)] &= ~(1 << (address % sizeof(uint8_t)));
 	uint8_t * native = z80_get_native_address(context, address);
-	z80_check_cycles_int(native, address);
+	if (native) {
+		z80_options * opts = context->options;
+		code_info tmp_code = opts->gen.code;
+		opts->gen.code.cur = native;
+		opts->gen.code.last = native + 16;
+		check_cycles_int(&opts->gen, address);
+		opts->gen.code = tmp_code;
+}
 }
 
-
--- a/z80_to_x86.h	Thu May 28 21:09:33 2015 -0700
+++ b/z80_to_x86.h	Thu May 28 21:19:55 2015 -0700
@@ -9,7 +9,11 @@
 #include "backend.h"
 
 #define ZNUM_MEM_AREAS 4
+#ifdef Z80_LOG_ADDRESS
+#define ZMAX_NATIVE_SIZE 255
+#else
 #define ZMAX_NATIVE_SIZE 128
+#endif
 
 enum {
 	ZF_C = 0,
@@ -21,14 +25,29 @@
 	ZF_NUM
 };
 
+typedef void (*z80_run_fun)(void * context);
+
 typedef struct {
-	uint8_t *       cur_code;
-	uint8_t *       code_end;
-	uint8_t         *ram_inst_sizes;
-	deferred_addr * deferred;
+	cpu_options     gen;
+	code_ptr        save_context_scratch;
+	code_ptr        load_context_scratch;
+	code_ptr        native_addr;
+	code_ptr        retrans_stub;
+	code_ptr        do_sync;
+	code_ptr        read_8;
+	code_ptr        write_8;
+	code_ptr        read_8_noinc;
+	code_ptr        write_8_noinc;
+	code_ptr        read_16;
+	code_ptr        write_16_highfirst;
+	code_ptr        write_16_lowfirst;
+	code_ptr		read_io;
+	code_ptr		write_io;
+
 	uint32_t        flags;
 	int8_t          regs[Z80_UNUSED];
-} x86_z80_options;
+	z80_run_fun     run;
+} z80_options;
 
 typedef struct {
 	void *            native_pc;
@@ -51,23 +70,40 @@
 	uint32_t          int_cycle;
 	native_map_slot * static_code_map;
 	native_map_slot * banked_code_map;
-	void *            options;
+	z80_options *     options;
 	void *            system;
 	uint8_t           ram_code_flags[(8 * 1024)/128/8];
 	uint32_t          int_enable_cycle;
   uint16_t          pc;
+	uint32_t          int_pulse_start;
+	uint32_t          int_pulse_end;
+	uint8_t           breakpoint_flags[(16 * 1024)/sizeof(uint8_t)];
+	uint8_t *         bp_handler;
+	uint8_t *         bp_stub;
+	uint8_t *         interp_code[256];
+	uint8_t           reset;
+	uint8_t           busreq;
+	uint8_t           busack;
 } z80_context;
 
 void translate_z80_stream(z80_context * context, uint32_t address);
-void init_x86_z80_opts(x86_z80_options * options);
-void init_z80_context(z80_context * context, x86_z80_options * options);
-uint8_t * z80_get_native_address(z80_context * context, uint32_t address);
-uint8_t * z80_get_native_address_trans(z80_context * context, uint32_t address);
+void init_z80_opts(z80_options * options, memmap_chunk const * chunks, uint32_t num_chunks, uint32_t clock_divider);
+void init_z80_context(z80_context * context, z80_options * options);
+code_ptr z80_get_native_address(z80_context * context, uint32_t address);
+code_ptr z80_get_native_address_trans(z80_context * context, uint32_t address);
 z80_context * z80_handle_code_write(uint32_t address, z80_context * context);
-void z80_run(z80_context * context);
 void z80_reset(z80_context * context);
 void zinsert_breakpoint(z80_context * context, uint16_t address, uint8_t * bp_handler);
 void zremove_breakpoint(z80_context * context, uint16_t address);
+void z80_run(z80_context * context, uint32_t target_cycle);
+void z80_assert_reset(z80_context * context, uint32_t cycle);
+void z80_clear_reset(z80_context * context, uint32_t cycle);
+void z80_assert_busreq(z80_context * context, uint32_t cycle);
+void z80_clear_busreq(z80_context * context, uint32_t cycle);
+uint8_t z80_get_busack(z80_context * context, uint32_t cycle);
+void z80_adjust_cycles(z80_context * context, uint32_t deduction);
+//to be provided by system code
+void z80_next_int_pulse(z80_context * z_context);
 
 #endif //Z80_TO_X86_H_
 
--- a/z80inst.c	Thu May 28 21:09:33 2015 -0700
+++ b/z80inst.c	Thu May 28 21:19:55 2015 -0700
@@ -1,6 +1,6 @@
 /*
  Copyright 2013 Michael Pavone
- This file is part of BlastEm. 
+ This file is part of BlastEm.
  BlastEm is free software distributed under the terms of the GNU General Public License version 3 or greater. See COPYING for full license text.
 */
 #include "z80inst.h"
@@ -433,7 +433,7 @@
 	{op, Z80_L, Z80_UNUSED, Z80_UNUSED, 1},\
 	{op, Z80_UNUSED, Z80_REG_INDIRECT, Z80_HL, 1},\
 	{op, Z80_A, Z80_UNUSED, Z80_UNUSED, 1}
-	
+
 #define BIT_BLOCK(op, bit) \
 	{op, Z80_USE_IMMED, Z80_REG, Z80_B, bit},\
 	{op, Z80_USE_IMMED, Z80_REG, Z80_C, bit},\
@@ -771,14 +771,14 @@
 };
 
 #define SHIFT_BLOCK_IX(op) \
-	{op, Z80_B, Z80_IX_DISPLACE | Z80_DIR, 0, 0},\
-	{op, Z80_C, Z80_IX_DISPLACE | Z80_DIR, 0, 0},\
-	{op, Z80_D, Z80_IX_DISPLACE | Z80_DIR, 0, 0},\
-	{op, Z80_E, Z80_IX_DISPLACE | Z80_DIR, 0, 0},\
-	{op, Z80_H, Z80_IX_DISPLACE | Z80_DIR, 0, 0},\
-	{op, Z80_L, Z80_IX_DISPLACE | Z80_DIR, 0, 0},\
-	{op, Z80_UNUSED, Z80_IX_DISPLACE | Z80_DIR, 0, 0},\
-	{op, Z80_A, Z80_IX_DISPLACE | Z80_DIR, 0, 0}
+	{op, Z80_B, Z80_IX_DISPLACE | Z80_DIR, 0, 1},\
+	{op, Z80_C, Z80_IX_DISPLACE | Z80_DIR, 0, 1},\
+	{op, Z80_D, Z80_IX_DISPLACE | Z80_DIR, 0, 1},\
+	{op, Z80_E, Z80_IX_DISPLACE | Z80_DIR, 0, 1},\
+	{op, Z80_H, Z80_IX_DISPLACE | Z80_DIR, 0, 1},\
+	{op, Z80_L, Z80_IX_DISPLACE | Z80_DIR, 0, 1},\
+	{op, Z80_UNUSED, Z80_IX_DISPLACE | Z80_DIR, 0, 1},\
+	{op, Z80_A, Z80_IX_DISPLACE | Z80_DIR, 0, 1}
 
 #define BIT_BLOCK_IX(bit) \
 	{Z80_BIT, Z80_USE_IMMED, Z80_IX_DISPLACE, 0, bit},\
@@ -1129,14 +1129,14 @@
 };
 
 #define SHIFT_BLOCK_IY(op) \
-	{op, Z80_B, Z80_IY_DISPLACE | Z80_DIR, 0, 0},\
-	{op, Z80_C, Z80_IY_DISPLACE | Z80_DIR, 0, 0},\
-	{op, Z80_D, Z80_IY_DISPLACE | Z80_DIR, 0, 0},\
-	{op, Z80_E, Z80_IY_DISPLACE | Z80_DIR, 0, 0},\
-	{op, Z80_H, Z80_IY_DISPLACE | Z80_DIR, 0, 0},\
-	{op, Z80_L, Z80_IY_DISPLACE | Z80_DIR, 0, 0},\
-	{op, Z80_UNUSED, Z80_IY_DISPLACE | Z80_DIR, 0, 0},\
-	{op, Z80_A, Z80_IY_DISPLACE | Z80_DIR, 0, 0}
+	{op, Z80_B, Z80_IY_DISPLACE | Z80_DIR, 0, 1},\
+	{op, Z80_C, Z80_IY_DISPLACE | Z80_DIR, 0, 1},\
+	{op, Z80_D, Z80_IY_DISPLACE | Z80_DIR, 0, 1},\
+	{op, Z80_E, Z80_IY_DISPLACE | Z80_DIR, 0, 1},\
+	{op, Z80_H, Z80_IY_DISPLACE | Z80_DIR, 0, 1},\
+	{op, Z80_L, Z80_IY_DISPLACE | Z80_DIR, 0, 1},\
+	{op, Z80_UNUSED, Z80_IY_DISPLACE | Z80_DIR, 0, 1},\
+	{op, Z80_A, Z80_IY_DISPLACE | Z80_DIR, 0, 1}
 
 #define BIT_BLOCK_IY(bit) \
 	{Z80_BIT, Z80_USE_IMMED, Z80_IY_DISPLACE, 0, bit},\
@@ -1250,7 +1250,7 @@
 		}
 	} else {
 		memcpy(decoded, z80_tbl_a + *istream, sizeof(z80inst));
-		
+
 	}
 	if ((decoded->addr_mode & 0x1F) == Z80_IMMED && decoded->op != Z80_RST && decoded->op != Z80_IM) {
 		decoded->immed = *(++istream);
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/zcompare.py	Thu May 28 21:19:55 2015 -0700
@@ -0,0 +1,58 @@
+#!/usr/bin/env python
+from glob import glob
+import subprocess
+from sys import exit,argv
+
+prefixes = []
+skip = set()
+for i in range(1, len(argv)):
+	if '.' in argv[i]:
+		f = open(argv[i])
+		for line in f:
+			parts = line.split()
+			for part in parts:
+				if part.endswith('.bin'):
+					skip.add(part)
+		f.close()
+		print 'Skipping',len(skip),'entries from previous report.'
+	else:
+		prefixes.append(argv[i])
+
+for path in glob('ztests/*/*.bin'):
+	if path in skip:
+		continue
+	if prefixes:
+		good = False
+		fname = path.split('/')[-1]
+		for prefix in prefixes:
+			if fname.startswith(prefix):
+				good = True
+				break
+		if not good:
+			continue
+	try:
+		b = subprocess.check_output(['./ztestrun', path])
+		try:
+			m = subprocess.check_output(['gxz80/gxzrun', path])
+			#_,_,b = b.partition('\n')
+			if b != m:
+				print '-----------------------------'
+				print 'Mismatch in ' + path
+				print 'blastem output:'
+				print b
+				print 'gxz80 output:'
+				print m
+				print '-----------------------------'
+			else:
+				print path, 'passed'
+		except subprocess.CalledProcessError as e:
+			print '-----------------------------'
+			print 'gxz80 exited with code', e.returncode, 'for test', path
+			print 'blastem output:'
+			print b
+			print '-----------------------------'
+	except subprocess.CalledProcessError as e:
+		print '-----------------------------'
+		print 'blastem exited with code', e.returncode, 'for test', path
+		print '-----------------------------'
+
--- a/zruntime.S	Thu May 28 21:09:33 2015 -0700
+++ b/zruntime.S	Thu May 28 21:19:55 2015 -0700
@@ -125,7 +125,7 @@
 	add $3, %ebp
 	push %rsi
 	mov 144(%rsi), %rsi /* get system context pointer */
-	cmp $0, 120(%rsi) /* check bus busy flag */
+	cmpb $0, 120(%rsi) /* check bus busy flag */
 	pop %rsi
 	jne bus_busy
 z80_read_bank_cont:
@@ -150,7 +150,15 @@
 	call z80_save_context
 	mov %r13w, %di
 	push %rsi
+	test $8, %rsp
+	jnz 0f
 	call z80_read_ym
+	jmp 1f
+0:
+	sub $8, %rsp
+	call z80_read_ym
+	add $8, %rsp
+1:
 	pop %rsi
 	mov %al, %r13b
 	call z80_load_context
@@ -196,7 +204,15 @@
 	call z80_save_context
 	mov %r14w, %di
 	mov %r13b, %dl
+	test $8, %rsp
+	jnz 0f
 	call z80_write_ym
+	jmp 1f
+0:
+	sub $8, %rsp
+	call z80_write_ym
+	add $8, %rsp
+1:
 	mov %rax, %rsi
 	jmp z80_load_context
 z80_write_bank_reg:
@@ -219,7 +235,15 @@
 	call z80_save_context
 	mov %r14w, %di
 	mov %r13b, %dl
+	test $8, %rsp
+	jnz 0f
 	call z80_vdp_port_write
+	jmp 1f
+0:
+	sub $8, %rsp
+	call z80_vdp_port_write
+	add $8, %rsp
+1:
 	mov %rax, %rsi
 	jmp z80_load_context
 
@@ -243,7 +267,7 @@
 	add $3, %ebp /* first read typically has 3 wait states */
 	push %rsi
 	mov 144(%rsi), %rsi /* get system context pointer */
-	cmp $0, 120(%rsi) /* check bus busy flag */
+	cmpb $0, 120(%rsi) /* check bus busy flag */
 	pop %rsi
 	jne bus_busy_word
 z80_read_bank_word_cont:
@@ -256,7 +280,7 @@
 	add $4, %ebp /* second read typically has 4 wait states */
 	push %rsi
 	mov 144(%rsi), %rsi /* get system context pointer */
-	cmp $0, 120(%rsi) /* check bus busy flag */
+	cmpb $0, 120(%rsi) /* check bus busy flag */
 	pop %rsi
 	jne bus_busy_word2
 z80_read_bank_word_cont2:
--- a/ztestgen.c	Thu May 28 21:09:33 2015 -0700
+++ b/ztestgen.c	Thu May 28 21:19:55 2015 -0700
@@ -24,6 +24,7 @@
 #define PRE_IX  0xDD
 #define PRE_IY  0xFD
 #define LD_IR16 0x01
+#define INC_R8  0x04
 #define LD_IR8  0x06
 #define LD_RR8  0x40
 #define AND_R   0xA0
@@ -143,6 +144,43 @@
 	}
 }
 
+uint8_t * inc_r(uint8_t *dst, uint8_t reg)
+{
+	if (reg == Z80_IXH || reg == Z80_IXL) {
+		*(dst++) = PRE_IX;
+		return inc_r(dst, reg - (Z80_IXL - Z80_L));
+	} else if(reg == Z80_IYH || reg == Z80_IYL) {
+		*(dst++) = PRE_IY;
+		return inc_r(dst, reg - (Z80_IYL - Z80_L));
+	} else {
+		*(dst++) = INC_R8 | reg << 3;
+		return dst;
+	}
+}
+
+void mark_used8(uint8_t *reg_usage, uint16_t *reg_values, uint8_t reg, uint8_t init_value)
+{
+	reg_usage[reg] = 1;
+	reg_values[reg] = init_value;
+	uint8_t word_reg = z80_word_reg(reg);
+	if (word_reg != Z80_UNUSED) {
+		reg_usage[word_reg] = 1;
+		reg_values[word_reg] = (reg_values[z80_high_reg(word_reg)] << 8) | (reg_values[z80_low_reg(word_reg)] & 0xFF);
+	}
+}
+
+uint8_t alloc_reg8(uint8_t *reg_usage, uint16_t *reg_values, uint8_t init_value)
+{
+	for (uint8_t reg = 0; reg < Z80_BC; reg++)
+	{
+		if (!reg_usage[reg]) {
+			mark_used8(reg_usage, reg_values, reg, init_value);
+			return reg;
+		}
+	}
+	return Z80_UNUSED;
+}
+
 void z80_gen_test(z80inst * inst, uint8_t *instbuf, uint8_t instlen)
 {
 	z80inst copy;
@@ -184,12 +222,7 @@
 			reg_values[z80_low_reg(inst->ea_reg)] = reg_values[inst->ea_reg] & 0xFF;
 			reg_usage[z80_low_reg(inst->ea_reg)] = 1;
 		} else {
-			reg_values[inst->ea_reg] = rand() % 256;
-			uint8_t word_reg = z80_word_reg(inst->ea_reg);
-			if (word_reg != Z80_UNUSED) {
-				reg_usage[word_reg] = 1;
-				reg_values[word_reg] = (reg_values[z80_high_reg(word_reg)] << 8) | (reg_values[z80_low_reg(word_reg)] & 0xFF);
-			}
+			mark_used8(reg_usage, reg_values, inst->ea_reg, rand() % 256);
 		}
 		break;
 	case Z80_REG_INDIRECT:
@@ -255,6 +288,10 @@
 		}
 		reg_usage[inst->reg] = 1;
 	}
+	uint8_t counter_reg = Z80_UNUSED;
+	if (inst->op >= Z80_JP && inst->op <= Z80_JRCC) {
+		counter_reg = alloc_reg8(reg_usage, reg_values, 0);
+	}
 	puts("--------------");
 	for (uint8_t reg = 0; reg < Z80_UNUSED; reg++) {
 		if (reg_values[reg]) {
@@ -293,11 +330,26 @@
 
 		//setup other regs
 		for (uint8_t reg = Z80_BC; reg <= Z80_IY; reg++) {
-			if (reg != Z80_AF && reg != Z80_SP) {
-				cur = ld_ir16(cur, reg, reg_values[reg]);
+			if (reg != Z80_AF && reg != Z80_SP && (inst->op != Z80_JP || addr_mode != Z80_REG_INDIRECT || inst->ea_reg != reg)) {
+				if (i == 1 && (z80_high_reg(reg) == counter_reg || z80_low_reg(reg) == counter_reg)) {
+					if (z80_high_reg(reg) == counter_reg) {
+						if (reg_usage[z80_low_reg(reg)]) {
+							cur = ld_ir8(cur, z80_low_reg(reg), reg_values[z80_low_reg(reg)]);
+						}
+					} else if (reg_usage[z80_high_reg(reg)]) {
+						cur = ld_ir8(cur, z80_high_reg(reg), reg_values[z80_high_reg(reg)]);
+					}
+				} else {
+					cur = ld_ir16(cur, reg, reg_values[reg]);
+				}
 			}
 		}
 
+		if (inst->op == Z80_JP && addr_mode == Z80_REG_INDIRECT) {
+			uint16_t address = cur - prog + (inst->ea_reg == Z80_HL ? 3 : 4) + instlen + 1 + i;
+			cur = ld_ir16(cur, inst->ea_reg, address);
+		}
+
 		//copy instruction
 		if (instlen == 3) {
 			memcpy(cur, instbuf, 2);
@@ -310,6 +362,12 @@
 		//immed/displacement byte(s)
 		if (addr_mode == Z80_IX_DISPLACE || addr_mode == Z80_IY_DISPLACE) {
 			*(cur++) = inst->ea_reg;
+		} else if ((inst->op == Z80_JP || inst->op == Z80_JPCC) && addr_mode == Z80_IMMED) {
+			uint16_t address = cur - prog + 3 + i; //2 for immed address, 1/2 for instruction(s) to skip
+			*(cur++) = address;
+			*(cur++) = address >> 8;
+		} else if(inst->op == Z80_JR || inst->op == Z80_JRCC) {
+			*(cur++) = 1 + i; //skip one or 2 instructions based on value of i
 		} else if (addr_mode == Z80_IMMED & inst->op != Z80_IM) {
 			*(cur++) = inst->immed & 0xFF;
 			if (word_sized) {
@@ -325,6 +383,13 @@
 		if (instlen == 3) {
 			*(cur++) = instbuf[2];
 		}
+		if (inst->op >= Z80_JP && inst->op <= Z80_JRCC) {
+			cur = inc_r(cur, counter_reg);
+			if (i) {
+				//inc twice on second iteration so we can differentiate the two
+				cur = inc_r(cur, counter_reg);
+			}
+		}
 		if (!i) {
 			//Save AF from first run
 			cur = push(cur, Z80_AF);
@@ -399,7 +464,7 @@
 
 uint8_t should_skip(z80inst * inst)
 {
-	return inst->op >= Z80_JP || (inst->op >= Z80_LDI && inst->op <= Z80_CPDR) || inst->op == Z80_HALT
+	return inst->op >= Z80_DJNZ || (inst->op >= Z80_LDI && inst->op <= Z80_CPDR) || inst->op == Z80_HALT
 		|| inst->op == Z80_DAA || inst->op == Z80_RLD || inst->op == Z80_RRD || inst->op == Z80_NOP
 		|| inst->op == Z80_DI || inst->op == Z80_EI;
 }
--- a/ztestrun.c	Thu May 28 21:09:33 2015 -0700
+++ b/ztestrun.c	Thu May 28 21:19:55 2015 -0700
@@ -1,6 +1,6 @@
 /*
  Copyright 2013 Michael Pavone
- This file is part of BlastEm. 
+ This file is part of BlastEm.
  BlastEm is free software distributed under the terms of the GNU General Public License version 3 or greater. See COPYING for full license text.
 */
 #include "z80inst.h"
@@ -9,92 +9,101 @@
 #include "vdp.h"
 #include <stdio.h>
 #include <stdlib.h>
+#include <stddef.h>
 
 uint8_t z80_ram[0x2000];
-uint16_t cart[0x200000];
 
-#define MCLKS_PER_Z80 15
-//TODO: Figure out the exact value for this
-#define MCLKS_PER_FRAME (MCLKS_LINE*262)
-#define VINT_CYCLE ((MCLKS_LINE * 226)/MCLKS_PER_Z80)
-#define CYCLE_NEVER 0xFFFFFFFF
-
-uint8_t z80_read_ym(uint16_t location, z80_context * context)
+uint8_t z80_unmapped_read(uint32_t location, void * context)
 {
 	return 0xFF;
 }
 
-z80_context * z80_write_ym(uint16_t location, z80_context * context, uint8_t value)
+void * z80_unmapped_write(uint32_t location, void * context, uint8_t value)
 {
 	return context;
 }
 
-z80_context * z80_vdp_port_write(uint16_t location, z80_context * context, uint8_t value)
+const memmap_chunk z80_map[] = {
+	{ 0x0000, 0x4000,  0x1FFF, 0, MMAP_READ | MMAP_WRITE | MMAP_CODE, z80_ram, NULL, NULL, NULL,              NULL },
+	{ 0x4000, 0x10000, 0xFFFF, 0, 0,                                  NULL,    NULL, NULL, z80_unmapped_read, z80_unmapped_write}
+};
+
+void z80_next_int_pulse(z80_context * context)
 {
-	return context;
+	context->int_pulse_start = context->int_pulse_end = CYCLE_NEVER;
 }
 
 int main(int argc, char ** argv)
 {
 	long filesize;
 	uint8_t *filebuf;
-	x86_z80_options opts;
+	z80_options opts;
 	z80_context context;
-	if (argc < 2) {
-		fputs("usage: transz80 zrom [cartrom]\n", stderr);
+	char *fname = NULL;
+	uint8_t retranslate = 0;
+	for (int i = 1; i < argc; i++)
+	{
+		if (argv[i][0] == '-') {
+			switch(argv[i][1])
+			{
+			case 'r':
+				retranslate = 1;
+				break;
+			default:
+				fprintf(stderr, "Unrecognized switch -%c\n", argv[i][1]);
+				exit(1);
+			}
+		} else if (!fname) {
+			fname = argv[i];
+		}
+	}
+	if (!fname) {
+		fputs("usage: ztestrun zrom [cartrom]\n", stderr);
 		exit(1);
 	}
-	FILE * f = fopen(argv[1], "rb");
+	FILE * f = fopen(fname, "rb");
 	if (!f) {
-		fprintf(stderr, "unable to open file %s\n", argv[2]);
+		fprintf(stderr, "unable to open file %s\n", fname);
 		exit(1);
 	}
 	fseek(f, 0, SEEK_END);
 	filesize = ftell(f);
 	fseek(f, 0, SEEK_SET);
-	fread(z80_ram, 1, filesize < sizeof(z80_ram) ? filesize : sizeof(z80_ram), f);
+	filesize = filesize < sizeof(z80_ram) ? filesize : sizeof(z80_ram);
+	if (fread(z80_ram, 1, filesize, f) != filesize) {
+		fprintf(stderr, "error reading %s\n",fname);
+		exit(1);
+	}
 	fclose(f);
-	if (argc > 2) {
-		f = fopen(argv[2], "rb");
-		if (!f) {
-			fprintf(stderr, "unable to open file %s\n", argv[2]);
-			exit(1);
-		}
-		fseek(f, 0, SEEK_END);
-		filesize = ftell(f);
-		fseek(f, 0, SEEK_SET);
-		fread(cart, 1, filesize < sizeof(cart) ? filesize : sizeof(cart), f);
-		fclose(f);
-		for(unsigned short * cur = cart; cur - cart < (filesize/2); ++cur)
-		{
-			*cur = (*cur >> 8) | (*cur << 8);
-		}
-	}
-	init_x86_z80_opts(&opts);
+	init_z80_opts(&opts, z80_map, 2, 1);
 	init_z80_context(&context, &opts);
 	//Z80 RAM
 	context.mem_pointers[0] = z80_ram;
-	context.sync_cycle = context.target_cycle = 1000;
-	context.int_cycle = CYCLE_NEVER;
-	//cartridge/bank
-	context.mem_pointers[1] = context.mem_pointers[2] = (uint8_t *)cart;
-	z80_reset(&context);
-	while (context.current_cycle < 1000) {
-		z80_run(&context);
+	if (retranslate) {
+		//run core long enough to translate code
+		z80_run(&context, 1);
+		for (int i = 0; i < filesize; i++)
+		{
+			z80_handle_code_write(i, &context);
+		}
+		z80_assert_reset(&context, context.current_cycle);
+		z80_clear_reset(&context, context.current_cycle + 3);
+		z80_adjust_cycles(&context, context.current_cycle);
 	}
-	printf("A: %X\nB: %X\nC: %X\nD: %X\nE: %X\nHL: %X\nIX: %X\nIY: %X\nSP: %X\n\nIM: %d, IFF1: %d, IFF2: %d\n", 
+	z80_run(&context, 1000);
+	printf("A: %X\nB: %X\nC: %X\nD: %X\nE: %X\nHL: %X\nIX: %X\nIY: %X\nSP: %X\n\nIM: %d, IFF1: %d, IFF2: %d\n",
 		context.regs[Z80_A], context.regs[Z80_B], context.regs[Z80_C],
-		context.regs[Z80_D], context.regs[Z80_E], 
-		(context.regs[Z80_H] << 8) | context.regs[Z80_L], 
-		(context.regs[Z80_IXH] << 8) | context.regs[Z80_IXL], 
-		(context.regs[Z80_IYH] << 8) | context.regs[Z80_IYL], 
+		context.regs[Z80_D], context.regs[Z80_E],
+		(context.regs[Z80_H] << 8) | context.regs[Z80_L],
+		(context.regs[Z80_IXH] << 8) | context.regs[Z80_IXL],
+		(context.regs[Z80_IYH] << 8) | context.regs[Z80_IYL],
 		context.sp, context.im, context.iff1, context.iff2);
 	printf("Flags: SZVNC\n"
 	       "       %d%d%d%d%d\n", context.flags[ZF_S], context.flags[ZF_Z], context.flags[ZF_PV], context.flags[ZF_N], context.flags[ZF_C]);
 	puts("--Alternate Regs--");
-	printf("A: %X\nB: %X\nC: %X\nD: %X\nE: %X\nHL: %X\n", 
+	printf("A: %X\nB: %X\nC: %X\nD: %X\nE: %X\nHL: %X\n",
 		context.alt_regs[Z80_A], context.alt_regs[Z80_B], context.alt_regs[Z80_C],
-		context.alt_regs[Z80_D], context.alt_regs[Z80_E], 
+		context.alt_regs[Z80_D], context.alt_regs[Z80_E],
 		(context.alt_regs[Z80_H] << 8) | context.alt_regs[Z80_L]);
 	return 0;
 }