diff vdp.c @ 803:236a184bf6f0

Merge
author Michael Pavone <pavone@retrodev.com>
date Sun, 26 Jul 2015 16:51:03 -0700
parents 0565b2c1a034
children ac65086c031e
line wrap: on
line diff
--- a/vdp.c	Sun Jul 26 16:48:25 2015 -0700
+++ b/vdp.c	Sun Jul 26 16:51:03 2015 -0700
@@ -9,8 +9,8 @@
 #include <string.h>
 #include "render.h"
 
-#define NTSC_ACTIVE 225
-#define PAL_ACTIVE 241
+#define NTSC_INACTIVE_START 224
+#define PAL_INACTIVE_START 240
 #define BUF_BIT_PRIORITY 0x40
 #define MAP_BIT_PRIORITY 0x8000
 #define MAP_BIT_H_FLIP 0x800
@@ -22,14 +22,19 @@
 
 #define MCLKS_SLOT_H40  16
 #define MCLKS_SLOT_H32  20
-#define VINT_CYCLE_H40  (21*MCLKS_SLOT_H40+332+9*MCLKS_SLOT_H40) //21 slots before HSYNC, 16 during, 10 after
-#define VINT_CYCLE_H32  ((33+20+7)*MCLKS_SLOT_H32)  //33 slots before HSYNC, 20 during, 7 after  TODO: confirm final number
-#define HSYNC_SLOT_H40  21
-#define MCLK_WEIRD_END  (HSYNC_SLOT_H40*MCLKS_SLOT_H40 + 332)
-#define SLOT_WEIRD_END  (HSYNC_SLOT_H40+17)
+#define VINT_SLOT_H40  4 //21 slots before HSYNC, 16 during, 10 after
+#define VINT_SLOT_H32  4  //old value was 23, but recent tests suggest the actual value is close to the H40 one
+#define HSYNC_SLOT_H40  234
+#define HSYNC_END_H40  (HSYNC_SLOT_H40+17)
 #define HSYNC_END_H32   (33 * MCLKS_SLOT_H32)
-#define HBLANK_CLEAR_H40 (MCLK_WEIRD_END+61*4)
-#define HBLANK_CLEAR_H32 (HSYNC_END_H32 + 46*5)
+#define HBLANK_START_H40 178 //should be 179 according to Nemesis, but 178 seems to fit slightly better with my test ROM results
+#define HBLANK_END_H40  0 //should be 5.5 according to Nemesis, but 0 seems to fit better with my test ROM results
+#define HBLANK_START_H32 233 //should be 147 according to Nemesis which is very different from my test ROM result
+#define HBLANK_END_H32 0 //should be 5 according to Nemesis, but 0 seems to fit better with my test ROM results
+#define LINE_CHANGE_H40 165
+#define LINE_CHANGE_H32 132
+#define VBLANK_START_H40 (LINE_CHANGE_H40+2)
+#define VBLANK_START_H32 (LINE_CHANGE_H32+2)
 #define FIFO_LATENCY    3
 
 int32_t color_map[1 << 12];
@@ -45,7 +50,7 @@
 
 uint8_t color_map_init_done;
 
-void init_vdp_context(vdp_context * context)
+void init_vdp_context(vdp_context * context, uint8_t region_pal)
 {
 	memset(context, 0, sizeof(*context));
 	context->vdpmem = malloc(VRAM_SIZE);
@@ -57,10 +62,8 @@
 		memset(context->framebuf, 0, FRAMEBUF_ENTRIES * (32 / 8));
 		context->evenbuf = malloc(FRAMEBUF_ENTRIES * (32 / 8));
 		memset(context->evenbuf, 0, FRAMEBUF_ENTRIES * (32 / 8));
-		context->b32 = 1;
 	} else {
 		render_alloc_surfaces(context);
-		context->b32 = render_depth() == 32;
 	}
 	context->framebuf = context->oddbuf;
 	context->linebuf = malloc(LINEBUF_SIZE + SCROLL_BUFFER_SIZE*2);
@@ -132,18 +135,21 @@
 			context->debugcolors[color] = render_map_color(r, g, b);
 		}
 	}
+	if (region_pal) {
+		context->flags2 |= FLAG2_REGION_PAL;
+	}
 }
 
 int is_refresh(vdp_context * context, uint32_t slot)
 {
-	if (context->latched_mode & BIT_H40) {
-		return (slot == 37 || slot == 69 || slot == 102 || slot == 133 || slot == 165 || slot == 197 || slot >= 210);
+	if (context->regs[REG_MODE_4] & BIT_H40) {
+		return slot == 250 || slot == 26 || slot == 59 || slot == 90 || slot == 122 || slot == 154;
 	} else {
 		//TODO: Figure out which slots are refresh when display is off in 32-cell mode
 		//These numbers are guesses based on H40 numbers
-		return (slot == 24 || slot == 56 || slot == 88 || slot == 120 || slot == 152);
+		return slot == 243 || slot == 19 || slot == 51 || slot == 83 || slot == 115;
 		//The numbers below are the refresh slots during active display
-		//return (slot == 66 || slot == 98 || slot == 130 || slot == 162);
+		//return (slot == 29 || slot == 61 || slot == 93 || slot == 125);
 	}
 }
 
@@ -203,6 +209,45 @@
 	} while (current_index != 0 && count < 80);
 }
 
+#define VRAM_READ 0 //0000
+#define VRAM_WRITE 1 //0001
+//2 would trigger register write 0010
+#define CRAM_WRITE 3 //0011
+#define VSRAM_READ 4 //0100
+#define VSRAM_WRITE 5//0101
+//6 would trigger regsiter write 0110
+//7 is a mystery
+#define CRAM_READ 8  //1000
+//9 is also a mystery //1001
+//A would trigger register write 1010
+//B is a mystery 1011
+#define VRAM_READ8 0xC //1100
+//D is a mystery 1101
+//E would trigger register write 1110
+//F is a mystery 1111
+#define DMA_START 0x20
+
+const char * cd_name(uint8_t cd)
+{
+	switch (cd & 0xF)
+	{
+	case VRAM_READ:
+		return "VRAM read";
+	case VRAM_WRITE:
+		return "VRAM write";
+	case CRAM_WRITE:
+		return "CRAM write";
+	case VSRAM_READ:
+		return "VSRAM read";
+	case VSRAM_WRITE:
+		return "VSRAM write";
+	case VRAM_READ8:
+		return "VRAM read (undocumented 8-bit mode)";
+	default:
+		return "invalid";
+	}
+}
+
 void vdp_print_reg_explain(vdp_context * context)
 {
 	char * hscroll[] = {"full", "7-line", "cell", "line"};
@@ -211,7 +256,7 @@
 	       "01: %.2X | Display %s, V-ints %s, Height: %d, Mode %d\n"
 	       "0B: %.2X | E-ints %s, V-Scroll: %s, H-Scroll: %s\n"
 	       "0C: %.2X | Width: %d, Shadow/Highlight: %s\n",
-	       context->regs[REG_MODE_1], context->regs[REG_MODE_1] & BIT_HINT_EN ? "enabled" : "disabled", context->regs[REG_MODE_1] & BIT_PAL_SEL != 0,
+	       context->regs[REG_MODE_1], context->regs[REG_MODE_1] & BIT_HINT_EN ? "enabled" : "disabled", (context->regs[REG_MODE_1] & BIT_PAL_SEL) != 0,
 	           context->regs[REG_MODE_1] & BIT_HVC_LATCH ? "enabled" : "disabled", context->regs[REG_MODE_1] & BIT_DISP_DIS ? "disabled" : "enabled",
 	       context->regs[REG_MODE_2], context->regs[REG_MODE_2] & BIT_DISP_EN ? "enabled" : "disabled", context->regs[REG_MODE_2] & BIT_VINT_EN ? "enabled" : "disabled",
 	           context->regs[REG_MODE_2] & BIT_PAL ? 30 : 28, context->regs[REG_MODE_2] & BIT_MODE_5 ? 5 : 4,
@@ -227,8 +272,8 @@
 	       context->regs[REG_SCROLL_A], (context->regs[REG_SCROLL_A] & 0x38) << 10,
 	       context->regs[REG_WINDOW], (context->regs[REG_WINDOW] & (context->regs[REG_MODE_4] & BIT_H40 ? 0x3C : 0x3E)) << 10,
 	       context->regs[REG_SCROLL_B], (context->regs[REG_SCROLL_B] & 0x7) << 13,
-	       context->regs[REG_SAT], (context->regs[REG_SAT] & (context->regs[REG_MODE_4] & BIT_H40 ? 0x3E : 0x3F)) << 9,
-	       context->regs[REG_HSCROLL], (context->regs[REG_HSCROLL] & 0x1F) << 10);
+	       context->regs[REG_SAT], (context->regs[REG_SAT] & (context->regs[REG_MODE_4] & BIT_H40 ? 0x7E : 0x7F)) << 9,
+	       context->regs[REG_HSCROLL], (context->regs[REG_HSCROLL] & 0x3F) << 10);
 	char * sizes[] = {"32", "64", "invalid", "128"};
 	printf("\n**Misc Group**\n"
 	       "07: %.2X | Backdrop Color: $%X\n"
@@ -239,11 +284,28 @@
 	       context->regs[REG_HINT], context->regs[REG_HINT],
 	       context->regs[REG_AUTOINC], context->regs[REG_AUTOINC],
 	       context->regs[REG_SCROLL], sizes[context->regs[REG_SCROLL] & 0x3], sizes[context->regs[REG_SCROLL] >> 4 & 0x3]);
+	char * src_types[] = {"68K", "68K", "Copy", "Fill"};
+	printf("\n**DMA Group**\n"
+	       "13: %.2X |\n"
+		   "14: %.2X | DMA Length: $%.4X words\n"
+		   "15: %.2X |\n"
+		   "16: %.2X |\n"
+		   "17: %.2X | DMA Source Address: $%.6X, Type: %s\n",
+		   context->regs[REG_DMALEN_L],
+		   context->regs[REG_DMALEN_H], context->regs[REG_DMALEN_H] << 8 | context->regs[REG_DMALEN_L],
+		   context->regs[REG_DMASRC_L],
+		   context->regs[REG_DMASRC_M],
+		   context->regs[REG_DMASRC_H],
+		       context->regs[REG_DMASRC_H] << 17 | context->regs[REG_DMASRC_M] << 9 | context->regs[REG_DMASRC_L] << 1,
+			   src_types[context->regs[REG_DMASRC_H] >> 6 & 3]);
 	printf("\n**Internal Group**\n"
 	       "Address: %X\n"
-	       "CD:      %X\n"
-	       "Pending: %s\n",
-	       context->address, context->cd, (context->flags & FLAG_PENDING) ? "true" : "false");
+	       "CD:      %X - %s\n"
+	       "Pending: %s\n"
+		   "VCounter: %d\n"
+		   "HCounter: %d\n",
+	       context->address, context->cd, cd_name(context->cd), (context->flags & FLAG_PENDING) ? "true" : "false",
+		   context->vcounter, context->hslot*2);
 
 	//TODO: Window Group, DMA Group
 }
@@ -269,7 +331,7 @@
 			height_mult = 8;
 		}
 		context->sprite_index &= 0x7F;
-		if (context->latched_mode & BIT_H40) {
+		if (context->regs[REG_MODE_4] & BIT_H40) {
 			if (context->sprite_index >= MAX_SPRITES_FRAME) {
 				context->sprite_index = 0;
 				return;
@@ -393,24 +455,6 @@
 	context->colors[addr + CRAM_SIZE*2] = color_map[(value & 0xEEE) | FBUF_HILIGHT];
 }
 
-#define VRAM_READ 0 //0000
-#define VRAM_WRITE 1 //0001
-//2 would trigger register write 0010
-#define CRAM_WRITE 3 //0011
-#define VSRAM_READ 4 //0100
-#define VSRAM_WRITE 5//0101
-//6 would trigger regsiter write 0110
-//7 is a mystery
-#define CRAM_READ 8  //1000
-//9 is also a mystery //1001
-//A would trigger register write 1010
-//B is a mystery 1011
-#define VRAM_READ8 0xC //1100
-//D is a mystery 1101
-//E would trigger register write 1110
-//F is a mystery 1111
-#define DMA_START 0x20
-
 void external_slot(vdp_context * context)
 {
 	fifo_entry * start = context->fifo + context->fifo_read;
@@ -441,7 +485,7 @@
 		}
 		case VSRAM_WRITE:
 			if (((start->address/2) & 63) < VSRAM_SIZE) {
-				//printf("VSRAM Write: %X to %X\n", start->value, context->address);
+				//printf("VSRAM Write: %X to %X @ vcounter: %d, hslot: %d, cycle: %d\n", start->value, context->address, context->vcounter, context->hslot, context->cycles);
 				context->vsram[(start->address/2) & 63] = start->partial == 2 ? context->fifo[context->fifo_write].value : start->value;
 			}
 
@@ -472,7 +516,7 @@
 	case 0x40:
 		if (!slot || !is_refresh(context, slot-1)) {
 			cur = context->fifo + context->fifo_write;
-			cur->cycle = context->cycles + ((context->latched_mode & BIT_H40) ? 16 : 20)*FIFO_LATENCY;
+			cur->cycle = context->cycles + ((context->regs[REG_MODE_4] & BIT_H40) ? 16 : 20)*FIFO_LATENCY;
 			cur->address = context->address;
 			cur->value = read_dma_value((context->regs[REG_DMASRC_H] << 16) | (context->regs[REG_DMASRC_M] << 8) | context->regs[REG_DMASRC_L]);
 			cur->cd = context->cd;
@@ -518,7 +562,7 @@
 		context->regs[REG_DMALEN_H] = dma_len >> 8;
 		context->regs[REG_DMALEN_L] = dma_len;
 		if (!dma_len) {
-			//printf("DMA end at cycle %d\n", context->cycles);
+			//printf("DMA end at cycle %d, frame: %d, vcounter: %d, hslot: %d\n", context->cycles, context->frame, context->vcounter, context->hslot);
 			context->flags &= ~FLAG_DMA_RUN;
 			context->cd &= 0xF;
 		}
@@ -567,7 +611,7 @@
 		if ((column >= left_col && column < right_col) || (line >= top_line && line < bottom_line)) {
 			uint16_t address = context->regs[REG_WINDOW] << 10;
 			uint16_t line_offset, offset, mask;
-			if (context->latched_mode & BIT_H40) {
+			if (context->regs[REG_MODE_4] & BIT_H40) {
 				address &= 0xF000;
 				line_offset = (((line) >> vscroll_shift) * 64 * 2) & 0xFFF;
 				mask = 0x7F;
@@ -613,7 +657,27 @@
 		vscroll <<= 1;
 		vscroll |= 1;
 	}
-	vscroll &= (context->vsram[(context->regs[REG_MODE_3] & BIT_VSCROLL ? (column-2)&63 : 0) + vsram_off] + line);
+	//TODO: Further research on vscroll latch behavior and the "first column bug"
+	if (!column) {
+		if (context->regs[REG_MODE_3] & BIT_VSCROLL) {
+			if (context->regs[REG_MODE_4] & BIT_H40) {
+				//Based on observed behavior documented by Eke-Eke, I'm guessing the VDP
+				//ends up fetching the last value on the VSRAM bus in the H40 case
+				//getting the last latched value should be close enough for now
+				if (!vsram_off) {
+					context->vscroll_latch[0] = context->vscroll_latch[1];
+				}
+			} else {
+				//supposedly it's always forced to 0 in the H32 case
+				context->vscroll_latch[0] = context->vscroll_latch[1] = 0;
+			}
+		} else {
+			context->vscroll_latch[vsram_off] = context->vsram[vsram_off];
+		}
+	} else if (context->regs[REG_MODE_3] & BIT_VSCROLL) {
+		context->vscroll_latch[vsram_off] = context->vsram[column - 2 + vsram_off];
+	}
+	vscroll &= context->vscroll_latch[vsram_off] + line;
 	context->v_offset = vscroll & v_offset_mask;
 	//printf("%s | line %d, vsram: %d, vscroll: %d, v_offset: %d\n",(vsram_off ? "B" : "A"), line, context->vsram[context->regs[REG_MODE_3] & 0x4 ? column : 0], vscroll, context->v_offset);
 	vscroll >>= vscroll_shift;
@@ -722,113 +786,144 @@
 		return;
 	}
 	render_map(context->col_2, context->tmp_buf_b, context->buf_b_off+8, context);
-	uint16_t *dst;
-	uint32_t *dst32;
+	uint32_t *dst;
 	uint8_t *sprite_buf,  *plane_a, *plane_b;
 	int plane_a_off, plane_b_off;
 	if (col)
 	{
 		col-=2;
-		if (context->b32) {
-			dst32 = context->framebuf;
-			dst32 += line * 320 + col * 8;
-		} else {
-			dst = context->framebuf;
-			dst += line * 320 + col * 8;
-		}
-		sprite_buf = context->linebuf + col * 8;
-		uint8_t a_src, src;
-		if (context->flags & FLAG_WINDOW) {
-			plane_a_off = context->buf_a_off;
-			a_src = DBG_SRC_W;
-		} else {
-			plane_a_off = context->buf_a_off - (context->hscroll_a & 0xF);
-			a_src = DBG_SRC_A;
-		}
-		plane_b_off = context->buf_b_off - (context->hscroll_b & 0xF);
-		//printf("A | tmp_buf offset: %d\n", 8 - (context->hscroll_a & 0x7));
+		dst = context->framebuf;
+		dst += line * 320 + col * 8;
+		if (context->debug < 2) {
+			sprite_buf = context->linebuf + col * 8;
+			uint8_t a_src, src;
+			if (context->flags & FLAG_WINDOW) {
+				plane_a_off = context->buf_a_off;
+				a_src = DBG_SRC_W;
+			} else {
+				plane_a_off = context->buf_a_off - (context->hscroll_a & 0xF);
+				a_src = DBG_SRC_A;
+			}
+			plane_b_off = context->buf_b_off - (context->hscroll_b & 0xF);
+			//printf("A | tmp_buf offset: %d\n", 8 - (context->hscroll_a & 0x7));
 
-		if (context->regs[REG_MODE_4] & BIT_HILIGHT) {
-			for (int i = 0; i < 16; ++plane_a_off, ++plane_b_off, ++sprite_buf, ++i) {
-				uint8_t pixel;
-				plane_a = context->tmp_buf_a + (plane_a_off & SCROLL_BUFFER_MASK);
-				plane_b = context->tmp_buf_b + (plane_b_off & SCROLL_BUFFER_MASK);
-				uint32_t * colors = context->colors;
-				src = 0;
-				pixel = context->regs[REG_BG_COLOR];
-				src = DBG_SRC_BG;
-				if (*plane_b & 0xF) {
-					pixel = *plane_b;
-					src = DBG_SRC_B;
+			if (context->regs[REG_MODE_4] & BIT_HILIGHT) {
+				for (int i = 0; i < 16; ++plane_a_off, ++plane_b_off, ++sprite_buf, ++i) {
+					plane_a = context->tmp_buf_a + (plane_a_off & SCROLL_BUFFER_MASK);
+					plane_b = context->tmp_buf_b + (plane_b_off & SCROLL_BUFFER_MASK);
+					uint8_t pixel = context->regs[REG_BG_COLOR];
+					uint32_t *colors = context->colors;
+					src = DBG_SRC_BG;
+					if (*plane_b & 0xF) {
+						pixel = *plane_b;
+						src = DBG_SRC_B;
+					}
+					uint8_t intensity = *plane_b & BUF_BIT_PRIORITY;
+					if (*plane_a & 0xF && (*plane_a & BUF_BIT_PRIORITY) >= (pixel & BUF_BIT_PRIORITY)) {
+						pixel = *plane_a;
+						src = DBG_SRC_A;
+					}
+					intensity |= *plane_a & BUF_BIT_PRIORITY;
+					if (*sprite_buf & 0xF && (*sprite_buf & BUF_BIT_PRIORITY) >= (pixel & BUF_BIT_PRIORITY)) {
+						if ((*sprite_buf & 0x3F) == 0x3E) {
+							intensity += BUF_BIT_PRIORITY;
+						} else if ((*sprite_buf & 0x3F) == 0x3F) {
+							intensity = 0;
+						} else {
+							pixel = *sprite_buf;
+							src = DBG_SRC_S;
+							if ((pixel & 0xF) == 0xE) {
+								intensity = BUF_BIT_PRIORITY;
+							} else {
+								intensity |= pixel & BUF_BIT_PRIORITY;
+							}
+						}
+					}
+					if (!intensity) {
+						src |= DBG_SHADOW;
+						colors += CRAM_SIZE;
+					} else if (intensity ==  BUF_BIT_PRIORITY*2) {
+						src |= DBG_HILIGHT;
+						colors += CRAM_SIZE*2;
+					}
+					
+					uint32_t outpixel;
+					if (context->debug) {
+						outpixel = context->debugcolors[src];
+					} else {
+						outpixel = colors[pixel & 0x3F];
+					}
+					*(dst++) = outpixel;
 				}
-				if (*plane_a & 0xF && (*plane_a & BUF_BIT_PRIORITY) >= (pixel & BUF_BIT_PRIORITY)) {
-					pixel = *plane_a;
-					src = DBG_SRC_A;
-				}
-				if (*sprite_buf & 0xF) {
-					uint8_t sprite_color = *sprite_buf & 0x3F;
-					if (sprite_color == 0x3E) {
-						colors += CRAM_SIZE*2;
-						src |= DBG_HILIGHT;
-					} else if (sprite_color == 0x3F) {
-						colors += CRAM_SIZE;
-						src |= DBG_SHADOW;
-					} else if ((*sprite_buf & BUF_BIT_PRIORITY) >= (pixel & BUF_BIT_PRIORITY)) {
+			} else {
+				for (int i = 0; i < 16; ++plane_a_off, ++plane_b_off, ++sprite_buf, ++i) {
+					plane_a = context->tmp_buf_a + (plane_a_off & SCROLL_BUFFER_MASK);
+					plane_b = context->tmp_buf_b + (plane_b_off & SCROLL_BUFFER_MASK);
+					uint8_t pixel = context->regs[REG_BG_COLOR];
+					src = DBG_SRC_BG;
+					if (*plane_b & 0xF) {
+						pixel = *plane_b;
+						src = DBG_SRC_B;
+					}
+					if (*plane_a & 0xF && (*plane_a & BUF_BIT_PRIORITY) >= (pixel & BUF_BIT_PRIORITY)) {
+						pixel = *plane_a;
+						src = DBG_SRC_A;
+					}
+					if (*sprite_buf & 0xF && (*sprite_buf & BUF_BIT_PRIORITY) >= (pixel & BUF_BIT_PRIORITY)) {
 						pixel = *sprite_buf;
 						src = DBG_SRC_S;
-						if ((pixel & 0xF) == 0xE) {
-							src |= DBG_SHADOW;
-							colors += CRAM_SIZE;
-						}
-
 					}
-				} else if (!((*plane_a | *plane_b) & BUF_BIT_PRIORITY)) {
-					colors += CRAM_SIZE;
-					src |= DBG_SHADOW;
-				}
-				pixel &= 0x3F;
-				uint32_t outpixel;
-				if (context->debug) {
-					outpixel = context->debugcolors[src];
-				} else {
-					outpixel = colors[pixel];
-				}
-				if (context->b32) {
-					*(dst32++) = outpixel;
-				} else {
+					uint32_t outpixel;
+					if (context->debug) {
+						outpixel = context->debugcolors[src];
+					} else {
+						outpixel = context->colors[pixel & 0x3F];
+					}
 					*(dst++) = outpixel;
 				}
-				//*dst = (context->cram[pixel & 0x3F] & 0xEEE) | ((pixel & BUF_BIT_PRIORITY) ? FBUF_BIT_PRIORITY : 0) | src;
+			}
+		} else if (context->debug == 2) {
+			if (col < 32) {
+				*(dst++) = context->colors[col * 2];
+				*(dst++) = context->colors[col * 2];
+				*(dst++) = context->colors[col * 2];
+				*(dst++) = context->colors[col * 2];
+				*(dst++) = context->colors[col * 2 + 1];
+				*(dst++) = context->colors[col * 2 + 1];
+				*(dst++) = context->colors[col * 2 + 1];
+				*(dst++) = context->colors[col * 2 + 1];
+				*(dst++) = context->colors[col * 2 + 2];
+				*(dst++) = context->colors[col * 2 + 2];
+				*(dst++) = context->colors[col * 2 + 2];
+				*(dst++) = context->colors[col * 2 + 2];
+				*(dst++) = context->colors[col * 2 + 3];
+				*(dst++) = context->colors[col * 2 + 3];
+				*(dst++) = context->colors[col * 2 + 3];
+				*(dst++) = context->colors[col * 2 + 3];
+			} else if (col == 32 || line >= 192) {
+				for (int32_t i = 0; i < 16; i ++) {
+					*(dst++) = 0;
+				}
+			} else {
+				for (int32_t i = 0; i < 16; i ++) {
+					*(dst++) = context->colors[line / 3 + (col - 34) * 0x20];
+				}
 			}
 		} else {
-			for (int i = 0; i < 16; ++plane_a_off, ++plane_b_off, ++sprite_buf, ++i) {
-				plane_a = context->tmp_buf_a + (plane_a_off & SCROLL_BUFFER_MASK);
-				plane_b = context->tmp_buf_b + (plane_b_off & SCROLL_BUFFER_MASK);
-				uint8_t pixel = context->regs[REG_BG_COLOR];
-				src = DBG_SRC_BG;
-				if (*plane_b & 0xF) {
-					pixel = *plane_b;
-					src = DBG_SRC_B;
-				}
-				if (*plane_a & 0xF && (*plane_a & BUF_BIT_PRIORITY) >= (pixel & BUF_BIT_PRIORITY)) {
-					pixel = *plane_a;
-					src = DBG_SRC_A;
-				}
-				if (*sprite_buf & 0xF && (*sprite_buf & BUF_BIT_PRIORITY) >= (pixel & BUF_BIT_PRIORITY)) {
-					pixel = *sprite_buf;
-					src = DBG_SRC_S;
-				}
-				uint32_t outpixel;
-				if (context->debug) {
-					outpixel = context->debugcolors[src];
-				} else {
-					outpixel = context->colors[pixel & 0x3F];
-				}
-				if (context->b32) {
-					*(dst32++) = outpixel;
-				} else {
-					*(dst++) = outpixel;
-				}
+			uint32_t base = (context->debug - 3) * 0x200;
+			uint32_t cell = base + (line / 8) * (context->regs[REG_MODE_4] & BIT_H40 ? 40 : 32) + col;
+			uint32_t address = (cell * 32 + (line % 8) * 4) & 0xFFFF;
+			for (int32_t i = 0; i < 4; i ++) {
+				*(dst++) = context->colors[(context->debug_pal << 4) | (context->vdpmem[address] >> 4)];
+				*(dst++) = context->colors[(context->debug_pal << 4) | (context->vdpmem[address] & 0xF)];
+				address++;
+			}
+			cell++;
+			address = (cell * 32 + (line % 8) * 4) & 0xFFFF;
+			for (int32_t i = 0; i < 4; i ++) {
+				*(dst++) = context->colors[(context->debug_pal << 4) | (context->vdpmem[address] >> 4)];
+				*(dst++) = context->colors[(context->debug_pal << 4) | (context->vdpmem[address] & 0xF)];
+				address++;
 			}
 		}
 	}
@@ -893,13 +988,15 @@
 	uint32_t mask;
 	switch(linecyc)
 	{
+	case 165:
+	case 166:
+		external_slot(context);
+		break;
 	//sprite render to line buffer starts
-	case 0:
-		context->cur_slot = MAX_DRAWS-1;
-		memset(context->linebuf, 0, LINEBUF_SIZE);
-	case 1:
-	case 2:
-	case 3:
+	case 167:
+	case 168:
+	case 169:
+	case 170:
 		if (line == 0xFF) {
 			external_slot(context);
 		} else {
@@ -907,52 +1004,50 @@
 		}
 		break;
 	//sprite attribute table scan starts
-	case 4:
+	case 171:
 		render_sprite_cells( context);
-		context->sprite_index = 0x80;
-		context->slot_counter = MAX_SPRITES_LINE;
 		scan_sprite_table(line, context);
 		break;
-	case 5:
-	case 6:
-	case 7:
-	case 8:
-	case 9:
-	case 10:
-	case 11:
-	case 12:
-	case 13:
-	case 14:
-	case 15:
-	case 16:
-	case 17:
-	case 18:
-	case 19:
-	case 20:
+	case 172:
+	case 173:
+	case 174:
+	case 175:
+	case 176:
+	case 177:
+	case 178:
+	case 179:
+	case 180:
+	case 181:
+	case 182:
+	case 229:
+	case 230:
+	case 231:
+	case 232:
+	case 233:
 	//!HSYNC asserted
-	case 21:
-	case 22:
+	case 234:
+	case 235:
 		render_sprite_cells(context);
 		scan_sprite_table(line, context);
 		break;
-	case 23:
+	case 236:
 		external_slot(context);
 		break;
-	case 24:
-	case 25:
-	case 26:
-	case 27:
-	case 28:
-	case 29:
-	case 30:
-	case 31:
-	case 32:
-	case 33:
-	case 34:
+	case 237:
+	case 238:
+	case 239:
+	case 240:
+	case 241:
+	case 242:
+	case 243:
+	case 244:
+	case 245:
+	case 246:
+	case 247:
 		render_sprite_cells(context);
 		scan_sprite_table(line, context);
 		break;
-	case 35:
+	case 248:
 		address = (context->regs[REG_HSCROLL] & 0x3F) << 10;
 		mask = 0;
 		if (context->regs[REG_MODE_3] & 0x2) {
@@ -967,41 +1062,41 @@
 		context->hscroll_b = context->vdpmem[address+2] << 8 | context->vdpmem[address+3];
 		//printf("%d: HScroll A: %d, HScroll B: %d\n", line, context->hscroll_a, context->hscroll_b);
 		break;
-	case 36:
+	case 249:
 	//!HSYNC high
-	case 37:
-	case 38:
-	case 39:
+	case 250:
+	case 251:
+	case 252:
 		render_sprite_cells(context);
 		scan_sprite_table(line, context);
 		break;
-	case 40:
+	case 253:
 		read_map_scroll_a(0, line, context);
 		break;
-	case 41:
+	case 254:
 		render_sprite_cells(context);
 		scan_sprite_table(line, context);
 		break;
-	case 42:
+	case 255:
 		render_map_1(context);
 		scan_sprite_table(line, context);//Just a guess
 		break;
-	case 43:
+	case 0:
 		render_map_2(context);
 		scan_sprite_table(line, context);//Just a guess
 		break;
-	case 44:
+	case 1:
 		read_map_scroll_b(0, line, context);
 		break;
-	case 45:
+	case 2:
 		render_sprite_cells(context);
 		scan_sprite_table(line, context);
 		break;
-	case 46:
+	case 3:
 		render_map_3(context);
 		scan_sprite_table(line, context);//Just a guess
 		break;
-	case 47:
+	case 4:
 		render_map_output(line, 0, context);
 		scan_sprite_table(line, context);//Just a guess
 		//reverse context slot counter so it counts the number of sprite slots
@@ -1011,33 +1106,26 @@
 		context->sprite_draws = MAX_DRAWS;
 		context->flags &= (~FLAG_CAN_MASK & ~FLAG_MASKED);
 		break;
-	COLUMN_RENDER_BLOCK(2, 48)
-	COLUMN_RENDER_BLOCK(4, 56)
-	COLUMN_RENDER_BLOCK(6, 64)
-	COLUMN_RENDER_BLOCK_REFRESH(8, 72)
-	COLUMN_RENDER_BLOCK(10, 80)
-	COLUMN_RENDER_BLOCK(12, 88)
-	COLUMN_RENDER_BLOCK(14, 96)
-	COLUMN_RENDER_BLOCK_REFRESH(16, 104)
-	COLUMN_RENDER_BLOCK(18, 112)
-	COLUMN_RENDER_BLOCK(20, 120)
-	COLUMN_RENDER_BLOCK(22, 128)
-	COLUMN_RENDER_BLOCK_REFRESH(24, 136)
-	COLUMN_RENDER_BLOCK(26, 144)
-	COLUMN_RENDER_BLOCK(28, 152)
-	COLUMN_RENDER_BLOCK(30, 160)
-	COLUMN_RENDER_BLOCK_REFRESH(32, 168)
-	COLUMN_RENDER_BLOCK(34, 176)
-	COLUMN_RENDER_BLOCK(36, 184)
-	COLUMN_RENDER_BLOCK(38, 192)
-	COLUMN_RENDER_BLOCK_REFRESH(40, 200)
-	case 208:
-	case 209:
-		external_slot(context);
-		break;
-	default:
-		//leftovers from HSYNC clock change nonsense
-		break;
+	COLUMN_RENDER_BLOCK(2, 5)
+	COLUMN_RENDER_BLOCK(4, 13)
+	COLUMN_RENDER_BLOCK(6, 21)
+	COLUMN_RENDER_BLOCK_REFRESH(8, 29)
+	COLUMN_RENDER_BLOCK(10, 37)
+	COLUMN_RENDER_BLOCK(12, 45)
+	COLUMN_RENDER_BLOCK(14, 53)
+	COLUMN_RENDER_BLOCK_REFRESH(16, 61)
+	COLUMN_RENDER_BLOCK(18, 69)
+	COLUMN_RENDER_BLOCK(20, 77)
+	COLUMN_RENDER_BLOCK(22, 85)
+	COLUMN_RENDER_BLOCK_REFRESH(24, 93)
+	COLUMN_RENDER_BLOCK(26, 101)
+	COLUMN_RENDER_BLOCK(28, 109)
+	COLUMN_RENDER_BLOCK(30, 117)
+	COLUMN_RENDER_BLOCK_REFRESH(32, 125)
+	COLUMN_RENDER_BLOCK(34, 133)
+	COLUMN_RENDER_BLOCK(36, 141)
+	COLUMN_RENDER_BLOCK(38, 149)
+	COLUMN_RENDER_BLOCK_REFRESH(40, 157)
 	}
 }
 
@@ -1047,13 +1135,15 @@
 	uint32_t mask;
 	switch(linecyc)
 	{
+	case 132:
+	case 133:
+		external_slot(context);
+		break;
 	//sprite render to line buffer starts
-	case 0:
-		context->cur_slot = MAX_DRAWS_H32-1;
-		memset(context->linebuf, 0, LINEBUF_SIZE);
-	case 1:
-	case 2:
-	case 3:
+	case 134:
+	case 135:
+	case 136:
+	case 137:
 		if (line == 0xFF) {
 			external_slot(context);
 		} else {
@@ -1061,46 +1151,44 @@
 		}
 		break;
 	//sprite attribute table scan starts
-	case 4:
+	case 138:
 		render_sprite_cells( context);
-		context->sprite_index = 0x80;
-		context->slot_counter = MAX_SPRITES_LINE_H32;
 		scan_sprite_table(line, context);
 		break;
-	case 5:
-	case 6:
-	case 7:
-	case 8:
-	case 9:
-	case 10:
-	case 11:
-	case 12:
-	case 13:
+	case 139:
+	case 140:
+	case 141:
+	case 142:
+	case 143:
+	case 144:
+	case 145:
+	case 146:
+	case 147:
 		render_sprite_cells(context);
 		scan_sprite_table(line, context);
-	case 14:
+	case 233:
 		external_slot(context);
 		break;
-	case 15:
-	case 16:
-	case 17:
-	case 18:
-	case 19:
+	case 234:
+	case 235:
+	case 236:
+	case 237:
+	case 238:
 	//HSYNC start
-	case 20:
-	case 21:
-	case 22:
-	case 23:
-	case 24:
-	case 25:
-	case 26:
+	case 239:
+	case 240:
+	case 241:
+	case 242:
+	case 243:
+	case 244:
+	case 245:
 		render_sprite_cells(context);
 		scan_sprite_table(line, context);
 		break;
-	case 27:
+	case 246:
 		external_slot(context);
 		break;
-	case 28:
+	case 247:
 		address = (context->regs[REG_HSCROLL] & 0x3F) << 10;
 		mask = 0;
 		if (context->regs[REG_MODE_3] & 0x2) {
@@ -1115,41 +1203,41 @@
 		context->hscroll_b = context->vdpmem[address+2] << 8 | context->vdpmem[address+3];
 		//printf("%d: HScroll A: %d, HScroll B: %d\n", line, context->hscroll_a, context->hscroll_b);
 		break;
-	case 29:
-	case 30:
-	case 31:
-	case 32:
+	case 248:
+	case 249:
+	case 250:
+	case 251:
 		render_sprite_cells(context);
 		scan_sprite_table(line, context);
 		break;
 	//!HSYNC high
-	case 33:
+	case 252:
 		read_map_scroll_a(0, line, context);
 		break;
-	case 34:
+	case 253:
 		render_sprite_cells(context);
 		scan_sprite_table(line, context);
 		break;
-	case 35:
+	case 254:
 		render_map_1(context);
 		scan_sprite_table(line, context);//Just a guess
 		break;
-	case 36:
+	case 255:
 		render_map_2(context);
 		scan_sprite_table(line, context);//Just a guess
 		break;
-	case 37:
+	case 0:
 		read_map_scroll_b(0, line, context);
 		break;
-	case 38:
+	case 1:
 		render_sprite_cells(context);
 		scan_sprite_table(line, context);
 		break;
-	case 39:
+	case 2:
 		render_map_3(context);
 		scan_sprite_table(line, context);//Just a guess
 		break;
-	case 40:
+	case 3:
 		render_map_output(line, 0, context);
 		scan_sprite_table(line, context);//Just a guess
 		//reverse context slot counter so it counts the number of sprite slots
@@ -1159,26 +1247,22 @@
 		context->sprite_draws = MAX_DRAWS_H32;
 		context->flags &= (~FLAG_CAN_MASK & ~FLAG_MASKED);
 		break;
-	COLUMN_RENDER_BLOCK(2, 41)
-	COLUMN_RENDER_BLOCK(4, 49)
-	COLUMN_RENDER_BLOCK(6, 57)
-	COLUMN_RENDER_BLOCK_REFRESH(8, 65)
-	COLUMN_RENDER_BLOCK(10, 73)
-	COLUMN_RENDER_BLOCK(12, 81)
-	COLUMN_RENDER_BLOCK(14, 89)
-	COLUMN_RENDER_BLOCK_REFRESH(16, 97)
-	COLUMN_RENDER_BLOCK(18, 105)
-	COLUMN_RENDER_BLOCK(20, 113)
-	COLUMN_RENDER_BLOCK(22, 121)
-	COLUMN_RENDER_BLOCK_REFRESH(24, 129)
-	COLUMN_RENDER_BLOCK(26, 137)
-	COLUMN_RENDER_BLOCK(28, 145)
-	COLUMN_RENDER_BLOCK(30, 153)
-	COLUMN_RENDER_BLOCK_REFRESH(32, 161)
-	case 169:
-	case 170:
-		external_slot(context);
-		break;
+	COLUMN_RENDER_BLOCK(2, 4)
+	COLUMN_RENDER_BLOCK(4, 12)
+	COLUMN_RENDER_BLOCK(6, 20)
+	COLUMN_RENDER_BLOCK_REFRESH(8, 28)
+	COLUMN_RENDER_BLOCK(10, 36)
+	COLUMN_RENDER_BLOCK(12, 44)
+	COLUMN_RENDER_BLOCK(14, 52)
+	COLUMN_RENDER_BLOCK_REFRESH(16, 60)
+	COLUMN_RENDER_BLOCK(18, 68)
+	COLUMN_RENDER_BLOCK(20, 76)
+	COLUMN_RENDER_BLOCK(22, 84)
+	COLUMN_RENDER_BLOCK_REFRESH(24, 92)
+	COLUMN_RENDER_BLOCK(26, 100)
+	COLUMN_RENDER_BLOCK(28, 108)
+	COLUMN_RENDER_BLOCK(30, 116)
+	COLUMN_RENDER_BLOCK_REFRESH(32, 124)
 	}
 }
 
@@ -1203,6 +1287,14 @@
 		if (context->flags & FLAG_DMA_RUN) {
 			run_dma_src(context, 0);
 		}
+		external_slot(context);
+		if (context->flags & FLAG_DMA_RUN) {
+			run_dma_src(context, 0);
+		}
+		external_slot(context);
+		if (context->flags & FLAG_DMA_RUN) {
+			run_dma_src(context, 0);
+		}
 		for (int i = 0; i < 19; i++)
 		{
 			scan_sprite_table(line, context);
@@ -1240,13 +1332,17 @@
 
 			read_sprite_x(line, context);
 		}
-		external_slot(context);
-		if (context->flags & FLAG_DMA_RUN) {
-			run_dma_src(context, 0);
-		}
-		external_slot(context);
+
 		return;
 	}
+	external_slot(context);
+	if (context->flags & FLAG_DMA_RUN) {
+		run_dma_src(context, 0);
+	}
+	external_slot(context);
+	if (context->flags & FLAG_DMA_RUN) {
+		run_dma_src(context, 0);
+	}
 
 	render_sprite_cells(context);
 	render_sprite_cells(context);
@@ -1356,57 +1452,63 @@
 		render_map_3(context);
 		render_map_output(line, column, context);
 	}
-	external_slot(context);
-	if (context->flags & FLAG_DMA_RUN) {
-		run_dma_src(context, 0);
-	}
-	external_slot(context);
 }
 
 void latch_mode(vdp_context * context)
 {
-	context->latched_mode = (context->regs[REG_MODE_4] & 0x81) | (context->regs[REG_MODE_2] & BIT_PAL);
+	context->latched_mode = context->regs[REG_MODE_2] & BIT_PAL;
 }
 
 void check_render_bg(vdp_context * context, int32_t line, uint32_t slot)
 {
-	if (line > 0) {
-		line -= 1;
-		int starti = -1;
-		if (context->latched_mode & BIT_H40) {
-			if (slot >= 55 && slot < 210) {
-				uint32_t x = (slot-55)*2;
-				starti = line * 320 + x;
-			} else if (slot < 5) {
-				uint32_t x = (slot + 155)*2;
-				starti = (line-1)*320 + x;
-			}
-		} else {
-			if (slot >= 48 && slot < 171) {
-				uint32_t x = (slot-48)*2;
-				starti = line * 320 + x;
-			} else if (slot < 5) {
-				uint32_t x = (slot + 123)*2;
-				starti = (line-1)*320 + x;
+	int starti = -1;
+	if (context->regs[REG_MODE_4] & BIT_H40) {
+		if (slot >= 12 && slot < 172) {
+			uint32_t x = (slot-12)*2;
+			starti = line * 320 + x;
+		}
+	} else {
+		if (slot >= 11 && slot < 139) {
+			uint32_t x = (slot-11)*2;
+			starti = line * 320 + x;
+		}
+	}
+	if (starti >= 0) {
+		uint32_t color = context->colors[context->regs[REG_BG_COLOR]];
+		uint32_t * start = context->framebuf;
+		start += starti;
+		for (int i = 0; i < 2; i++) {
+			*(start++) = color;
+		}
+	}
+}
+
+uint32_t const h40_hsync_cycles[] = {19, 20, 20, 20, 18, 20, 20, 20, 18, 20, 20, 20, 18, 20, 20, 20, 19};
+
+void vdp_advance_line(vdp_context *context)
+{
+	context->vcounter++;
+	context->vcounter &= 0x1FF;
+	if (context->flags2 & FLAG2_REGION_PAL) {
+		if (context->latched_mode & BIT_PAL) {
+			if (context->vcounter == 0x10B) {
+				context->vcounter = 0x1D2;
 			}
+		} else if (context->vcounter == 0x103){
+			context->vcounter = 0x1CA;
 		}
-		if (starti >= 0) {
-			if (context->b32) {
-				uint32_t color = context->colors[context->regs[REG_BG_COLOR]];
-				uint32_t * start = context->framebuf;
-				start += starti;
-				for (int i = 0; i < 2; i++) {
-					*(start++) = color;
-				}
-			} else {
-				uint16_t color = context->colors[context->regs[REG_BG_COLOR]];
-				uint16_t * start = context->framebuf;
-				start += starti;
-				for (int i = 0; i < 2; i++) {
-					*(start++) = color;
-				}
-			}
-		}
+	} else if (!(context->latched_mode & BIT_PAL) &&  context->vcounter == 0xEB) {
+		context->vcounter = 0x1E5;
+	}
+	
+	if (context->vcounter > (context->latched_mode & BIT_PAL ? PAL_INACTIVE_START : NTSC_INACTIVE_START)) {
+		context->hint_counter = context->regs[REG_HINT];
+	} else if (context->hint_counter) {
+		context->hint_counter--;
+	} else {
+		context->flags2 |= FLAG2_HINT_PENDING;
+		context->pending_hint_start = context->cycles;
+		context->hint_counter = context->regs[REG_HINT];
 	}
 }
 
@@ -1415,126 +1517,68 @@
 	while(context->cycles < target_cycles)
 	{
 		context->flags &= ~FLAG_UNUSED_SLOT;
-		uint32_t line = context->cycles / MCLKS_LINE;
-		uint32_t active_lines = context->latched_mode & BIT_PAL ? PAL_ACTIVE : NTSC_ACTIVE;
-		if (!context->cycles) {
+		uint32_t line = context->vcounter;
+		uint32_t slot = context->hslot;
+		
+		if (!line && !slot) {
+			//TODO: Figure out when this actually happens
 			latch_mode(context);
 		}
-		uint32_t linecyc = context->cycles % MCLKS_LINE;
-		if (linecyc == 0) {
-			if (line <= 1 || line >= active_lines) {
-				context->hint_counter = context->regs[REG_HINT];
-			} else if (context->hint_counter) {
-				context->hint_counter--;
-			} else {
-				context->flags2 |= FLAG2_HINT_PENDING;
-				context->hint_counter = context->regs[REG_HINT];
+		uint32_t inactive_start = context->latched_mode & BIT_PAL ? PAL_INACTIVE_START : NTSC_INACTIVE_START;
+
+		uint8_t is_h40 = context->regs[REG_MODE_4] & BIT_H40;
+		if (is_h40) {
+			if (slot == 167) {
+				context->cur_slot = MAX_DRAWS-1;
+				memset(context->linebuf, 0, LINEBUF_SIZE);
+			} else if (slot == 171) {
+				context->sprite_index = 0x80;
+				context->slot_counter = MAX_SPRITES_LINE;
 			}
-		} else if(line == active_lines) {
-			uint32_t intcyc = context->latched_mode & BIT_H40 ? VINT_CYCLE_H40 :  VINT_CYCLE_H32;
-			if (linecyc == intcyc) {
-				context->flags2 |= FLAG2_VINT_PENDING;
+		} else {
+			if (slot == 134) {
+				context->cur_slot = MAX_DRAWS_H32-1;
+				memset(context->linebuf, 0, LINEBUF_SIZE);
+			} else if (slot == 138) {
+				context->sprite_index = 0x80;
+				context->slot_counter = MAX_SPRITES_LINE_H32;
 			}
 		}
-		uint32_t inccycles, slot;
-		if (context->latched_mode & BIT_H40){
-			if (linecyc < MCLKS_SLOT_H40*HSYNC_SLOT_H40) {
-				slot = linecyc/MCLKS_SLOT_H40;
+		if(line == inactive_start) {
+			uint32_t intslot = context->regs[REG_MODE_4] & BIT_H40 ? VINT_SLOT_H40 :  VINT_SLOT_H32;
+			if (slot == intslot) {
+				context->flags2 |= FLAG2_VINT_PENDING;
+				context->pending_vint_start = context->cycles;
+			}
+		}
+		uint32_t inccycles;
+		//line 0x1FF is basically active even though it's not displayed
+		uint8_t active_slot = line < inactive_start || line == 0x1FF;
+		if (is_h40) {
+			if (slot < HSYNC_SLOT_H40 || slot >= HSYNC_END_H40) {
 				inccycles = MCLKS_SLOT_H40;
-			} else if(linecyc < MCLK_WEIRD_END) {
-				switch(linecyc-(MCLKS_SLOT_H40*HSYNC_SLOT_H40))
-				{
-				case 0:
-					inccycles = 19;
-					slot = 0;
-					break;
-				case 19:
-					slot = 1;
-					inccycles = 20;
-					break;
-				case 39:
-					slot = 2;
-					inccycles = 20;
-					break;
-				case 59:
-					slot = 3;
-					inccycles = 20;
-					break;
-				case 79:
-					slot = 4;
-					inccycles = 18;
-					break;
-				case 97:
-					slot = 5;
-					inccycles = 20;
-					break;
-				case 117:
-					slot = 6;
-					inccycles = 20;
-					break;
-				case 137:
-					slot = 7;
-					inccycles = 20;
-					break;
-				case 157:
-					slot = 8;
-					inccycles = 18;
-					break;
-				case 175:
-					slot = 9;
-					inccycles = 20;
-					break;
-				case 195:
-					slot = 10;
-					inccycles = 20;
-					break;
-				case 215:
-					slot = 11;
-					inccycles = 20;
-					break;
-				case 235:
-					slot = 12;
-					inccycles = 18;
-					break;
-				case 253:
-					slot = 13;
-					inccycles = 20;
-					break;
-				case 273:
-					slot = 14;
-					inccycles = 20;
-					break;
-				case 293:
-					slot = 15;
-					inccycles = 20;
-					break;
-				case 313:
-					slot = 16;
-					inccycles = 19;
-					break;
-				default:
-					fprintf(stderr, "cycles after weirdness %d\n", linecyc-(MCLKS_SLOT_H40*HSYNC_SLOT_H40));
-					exit(1);
-				}
-				slot += HSYNC_SLOT_H40;
 			} else {
-				slot = (linecyc-MCLK_WEIRD_END)/MCLKS_SLOT_H40 + SLOT_WEIRD_END;
-				inccycles = MCLKS_SLOT_H40;
+				inccycles = h40_hsync_cycles[slot-HSYNC_SLOT_H40];
+			}
+			//the first inactive line behaves as an active one for the first 4 slots
+			if (line == inactive_start && slot > 166 && slot < 171) {
+				active_slot = 1;
 			}
 		} else {
 			inccycles = MCLKS_SLOT_H32;
-			slot = linecyc/MCLKS_SLOT_H32;
+			//the first inactive line behaves as an active one for the first 4 slots
+			if (line == inactive_start && slot > 166 && slot < 171) {
+				active_slot = 1;
+			}
 		}
-		if ((line < active_lines || (line == active_lines && linecyc < (context->latched_mode & BIT_H40 ? 64 : 80))) && context->regs[REG_MODE_2] & DISPLAY_ENABLE) {
-			//first sort-of active line is treated as 255 internally
-			//it's used for gathering sprite info for line
-			line = (line - 1) & 0xFF;
-
-			//Convert to slot number
-			if (context->latched_mode & BIT_H40){
-				if (!slot && line != (active_lines-1) && (target_cycles - context->cycles) >= MCLKS_LINE) {
+		uint8_t inc_slot = 1;
+		if (context->regs[REG_MODE_2] & DISPLAY_ENABLE && active_slot) {
+			//run VDP rendering for a slot or a line
+			if (is_h40) {
+				if (slot == LINE_CHANGE_H40 && line < inactive_start && (target_cycles - context->cycles) >= MCLKS_LINE) {
 					vdp_h40_line(line, context);
 					inccycles = MCLKS_LINE;
+					inc_slot = 0;
 				} else {
 					vdp_h40(line, slot, context);
 				}
@@ -1545,7 +1589,7 @@
 			if (!is_refresh(context, slot)) {
 				external_slot(context);
 			}
-			if (line < active_lines) {
+			if (line < inactive_start) {
 				check_render_bg(context, line, slot);
 			}
 		}
@@ -1553,12 +1597,38 @@
 			run_dma_src(context, slot);
 		}
 		context->cycles += inccycles;
+		if (inc_slot) {
+			context->hslot++;
+			context->hslot &= 0xFF;
+			if (is_h40) {
+				if (context->hslot == LINE_CHANGE_H40) {
+					vdp_advance_line(context);
+					if (context->vcounter == (inactive_start + 8)) {
+						context->frame++;
+					}
+				} else if (context->hslot == 183) {
+					context->hslot = 229;
+				}
+			} else {
+				if (context->hslot == LINE_CHANGE_H32) {
+					vdp_advance_line(context);
+					if (context->vcounter == (inactive_start + 8)) {
+						context->frame++;
+					}
+				} else if (context->hslot == 148) {
+					context->hslot = 233;
+				}
+			}
+
+		} else {
+			vdp_advance_line(context);
+		}
 	}
 }
 
 uint32_t vdp_run_to_vblank(vdp_context * context)
 {
-	uint32_t target_cycles = ((context->latched_mode & BIT_PAL) ? PAL_ACTIVE : NTSC_ACTIVE) * MCLKS_LINE;
+	uint32_t target_cycles = ((context->latched_mode & BIT_PAL) ? PAL_INACTIVE_START : NTSC_INACTIVE_START) * MCLKS_LINE;
 	vdp_run_context(context, target_cycles);
 	return context->cycles;
 }
@@ -1570,7 +1640,7 @@
 		if (!dmalen) {
 			dmalen = 0x10000;
 		}
-		uint32_t min_dma_complete = dmalen * (context->latched_mode & BIT_H40 ? 16 : 20);
+		uint32_t min_dma_complete = dmalen * (context->regs[REG_MODE_4] & BIT_H40 ? 16 : 20);
 		if ((context->regs[REG_DMASRC_H] & 0xC0) == 0xC0 || (context->cd & 0xF) == VRAM_WRITE) {
 			//DMA copies take twice as long to complete since they require a read and a write
 			//DMA Fills and transfers to VRAM also take twice as long as it requires 2 writes for a single word
@@ -1606,7 +1676,7 @@
 				//DMA copy or 68K -> VDP, transfer starts immediately
 				context->flags |= FLAG_DMA_RUN;
 				context->dma_cd = context->cd;
-				//printf("DMA start at cycle %d\n", context->cycles);
+				//printf("DMA start (length: %X) at cycle %d, frame: %d, vcounter: %d, hslot: %d\n", (context->regs[REG_DMALEN_H] << 8) | context->regs[REG_DMALEN_L], context->cycles, context->frame, context->vcounter, context->hslot);
 				if (!(context->regs[REG_DMASRC_H] & 0x80)) {
 					//printf("DMA Address: %X, New CD: %X, Source: %X, Length: %X\n", context->address, context->cd, (context->regs[REG_DMASRC_H] << 17) | (context->regs[REG_DMASRC_M] << 9) | (context->regs[REG_DMASRC_L] << 1), context->regs[REG_DMALEN_H] << 8 | context->regs[REG_DMALEN_L]);
 					return 1;
@@ -1629,13 +1699,16 @@
 				if (reg == REG_BG_COLOR) {
 					value &= 0x3F;
 				}
+				/*if (reg == REG_MODE_4 && ((value ^ context->regs[reg]) & BIT_H40)) {
+					printf("Mode changed from H%d to H%d @ %d, frame: %d\n", context->regs[reg] & BIT_H40 ? 40 : 32, value & BIT_H40 ? 40 : 32, context->cycles, context->frame);
+				}*/
 				context->regs[reg] = value;
 				if (reg == REG_MODE_4) {
 					context->double_res = (value & (BIT_INTERLACE | BIT_DOUBLE_RES)) == (BIT_INTERLACE | BIT_DOUBLE_RES);
 					if (!context->double_res) {
 						context->framebuf = context->oddbuf;
 					}
-				}
+					}
 				context->cd &= 0x3C;
 			}
 		} else {
@@ -1661,13 +1734,13 @@
 		context->flags &= ~FLAG_DMA_RUN;
 	}
 	while (context->fifo_write == context->fifo_read) {
-		vdp_run_context(context, context->cycles + ((context->latched_mode & BIT_H40) ? 16 : 20));
+		vdp_run_context(context, context->cycles + ((context->regs[REG_MODE_4] & BIT_H40) ? 16 : 20));
 	}
 	fifo_entry * cur = context->fifo + context->fifo_write;
-	cur->cycle = context->cycles + ((context->latched_mode & BIT_H40) ? 16 : 20)*FIFO_LATENCY;
+	cur->cycle = context->cycles + ((context->regs[REG_MODE_4] & BIT_H40) ? 16 : 20)*FIFO_LATENCY;
 	cur->address = context->address;
 	cur->value = value;
-	if (context->cd & 0x20 && (context->regs[REG_DMASRC_H] & 0xC0) == 0x80) {
+	if (context->cd & 0x20 && (context->regs[REG_DMASRC_H] & 0xC0) == 0x80 && (context->regs[REG_MODE_2] & BIT_DMA_ENABLE)) {
 		context->flags |= FLAG_DMA_RUN;
 	}
 	cur->cd = context->cd;
@@ -1709,18 +1782,40 @@
 	if ((context->regs[REG_MODE_4] & BIT_INTERLACE) && context->framebuf == context->oddbuf) {
 		value |= 0x10;
 	}
-	uint32_t line= context->cycles / MCLKS_LINE;
-	uint32_t linecyc = context->cycles % MCLKS_LINE;
-	if (line >= (context->latched_mode & BIT_PAL ? PAL_ACTIVE : NTSC_ACTIVE) || !(context->regs[REG_MODE_2] & BIT_DISP_EN)) {
+	uint32_t line= context->vcounter;
+	uint32_t slot = context->hslot;
+	uint32_t inactive_start = (context->latched_mode & BIT_PAL ? PAL_INACTIVE_START : NTSC_INACTIVE_START);
+	if (
+		(
+			line > inactive_start
+			&& line < 0x1FF
+		)
+		|| (line == inactive_start 
+			&& (
+				slot >= (context->regs[REG_MODE_4] & BIT_H40 ? VBLANK_START_H40 : VBLANK_START_H32)
+				|| slot < (context->regs[REG_MODE_4] & BIT_H40 ? LINE_CHANGE_H40 : LINE_CHANGE_H32)
+			)
+		)
+		|| (line == 0x1FF
+			&& slot < (context->regs[REG_MODE_4] & BIT_H40 ? VBLANK_START_H40 : VBLANK_START_H32))
+			&& slot >= (context->regs[REG_MODE_4] & BIT_H40 ? LINE_CHANGE_H40 : LINE_CHANGE_H32)
+		|| !(context->regs[REG_MODE_2] & BIT_DISP_EN)
+	) {
 		value |= 0x8;
 	}
-	if (linecyc < (context->latched_mode & BIT_H40 ? HBLANK_CLEAR_H40 : HBLANK_CLEAR_H32)) {
-		value |= 0x4;
+	if (context->regs[REG_MODE_4] & BIT_H40) {
+		if (slot < HBLANK_END_H40 || slot > HBLANK_START_H40) {
+			value |= 0x4;
+		}
+	} else {
+		if (slot < HBLANK_END_H32 || slot > HBLANK_START_H32) {
+			value |= 0x4;
+		}
 	}
 	if (context->flags & FLAG_DMA_RUN) {
 		value |= 0x2;
 	}
-	if (context->latched_mode & BIT_PAL) {//Not sure about this, need to verify
+	if (context->flags2 & FLAG2_REGION_PAL) {
 		value |= 0x1;
 	}
 	//printf("status read at cycle %d returned %X\n", context->cycles, value);
@@ -1741,7 +1836,7 @@
 	context->flags &= ~FLAG_UNUSED_SLOT;
 	//context->flags2 |= FLAG2_READ_PENDING;
 	while (!(context->flags & FLAG_UNUSED_SLOT)) {
-		vdp_run_context(context, context->cycles + ((context->latched_mode & BIT_H40) ? 16 : 20));
+		vdp_run_context(context, context->cycles + ((context->regs[REG_MODE_4] & BIT_H40) ? 16 : 20));
 	}
 	uint16_t value = 0;
 	switch (context->cd & 0xF)
@@ -1751,7 +1846,7 @@
 		context->flags &= ~FLAG_UNUSED_SLOT;
 		context->flags2 |= FLAG2_READ_PENDING;
 		while (!(context->flags & FLAG_UNUSED_SLOT)) {
-			vdp_run_context(context, context->cycles + ((context->latched_mode & BIT_H40) ? 16 : 20));
+			vdp_run_context(context, context->cycles + ((context->regs[REG_MODE_4] & BIT_H40) ? 16 : 20));
 		}
 		value |= context->vdpmem[context->address | 1];
 		break;
@@ -1782,102 +1877,8 @@
 	if (context->regs[REG_MODE_1] & BIT_HVC_LATCH) {
 		return context->hv_latch;
 	}
-	uint32_t line= context->cycles / MCLKS_LINE;
-	if (!line) {
-		line = 0xFF;
-	} else {
-		line--;
-		if (line > 0xEA) {
-			line = (line + 0xFA) & 0xFF;
-		}
-	}
-	uint32_t linecyc = context->cycles % MCLKS_LINE;
-	if (context->latched_mode & BIT_H40) {
-		uint32_t slot;
-		if (linecyc < MCLKS_SLOT_H40*HSYNC_SLOT_H40) {
-			slot = linecyc/MCLKS_SLOT_H40;
-		} else if(linecyc < MCLK_WEIRD_END) {
-			switch(linecyc-(MCLKS_SLOT_H40*HSYNC_SLOT_H40))
-			{
-			case 0:
-				slot = 0;
-				break;
-			case 19:
-				slot = 1;
-				break;
-			case 39:
-				slot = 2;
-				break;
-			case 59:
-				slot = 2;
-				break;
-			case 79:
-				slot = 3;
-				break;
-			case 97:
-				slot = 4;
-				break;
-			case 117:
-				slot = 5;
-				break;
-			case 137:
-				slot = 6;
-				break;
-			case 157:
-				slot = 7;
-				break;
-			case 175:
-				slot = 8;
-				break;
-			case 195:
-				slot = 9;
-				break;
-			case 215:
-				slot = 11;
-				break;
-			case 235:
-				slot = 12;
-				break;
-			case 253:
-				slot = 13;
-				break;
-			case 273:
-				slot = 14;
-				break;
-			case 293:
-				slot = 15;
-				break;
-			case 313:
-				slot = 16;
-				break;
-			default:
-				fprintf(stderr, "cycles after weirdness %d\n", linecyc-(MCLKS_SLOT_H40*HSYNC_SLOT_H40));
-				exit(1);
-			}
-			slot += HSYNC_SLOT_H40;
-		} else {
-			slot = (linecyc-MCLK_WEIRD_END)/MCLKS_SLOT_H40 + SLOT_WEIRD_END;
-		}
-		linecyc = slot * 2;
-		if (linecyc >= 86) {
-			linecyc -= 86;
-		} else {
-			linecyc += 334;
-		}
-		if (linecyc > 0x16C) {
-			linecyc += 92;
-		}
-	} else {
-		linecyc /= 10;
-		if (linecyc >= 74) {
-			linecyc -= 74;
-		} else {
-			linecyc += 268;
-		}
-		if (linecyc > 0x127) {
-			linecyc += 170;
-		}
-	}
+	uint32_t line= context->vcounter & 0xFF;
+	uint32_t linecyc = context->hslot;
 	linecyc &= 0xFF;
 	if (context->double_res) {
 		line <<= 1;
@@ -1897,6 +1898,16 @@
 void vdp_adjust_cycles(vdp_context * context, uint32_t deduction)
 {
 	context->cycles -= deduction;
+	if (context->pending_vint_start >= deduction) {
+		context->pending_vint_start -= deduction;
+	} else {
+		context->pending_vint_start = 0;
+	}
+	if (context->pending_hint_start >= deduction) {
+		context->pending_hint_start -= deduction;
+	} else {
+		context->pending_hint_start = 0;
+	}
 	if (context->fifo_read >= 0) {
 		int32_t idx = context->fifo_read;
 		do {
@@ -1910,25 +1921,129 @@
 	}
 }
 
+uint32_t vdp_cycles_hslot_wrap_h40(vdp_context * context)
+{
+	if (context->hslot < 183) {
+		return MCLKS_LINE - context->hslot * MCLKS_SLOT_H40;
+	} else if (context->hslot < HSYNC_END_H40) {
+		uint32_t before_hsync = context->hslot < HSYNC_SLOT_H40 ? (HSYNC_SLOT_H40 - context->hslot) * MCLKS_SLOT_H40 : 0;
+		uint32_t hsync = 0;
+		for (int i = context->hslot <= HSYNC_SLOT_H40 ? 0 : context->hslot - HSYNC_SLOT_H40; i < sizeof(h40_hsync_cycles)/sizeof(uint32_t); i++)
+		{
+			hsync += h40_hsync_cycles[i];
+		}
+		uint32_t after_hsync = (256- HSYNC_END_H40) * MCLKS_SLOT_H40;
+		return before_hsync + hsync + after_hsync;
+	} else {
+		return (256-context->hslot) * MCLKS_SLOT_H40;
+	}
+}
+
+uint32_t vdp_cycles_next_line(vdp_context * context)
+{
+	if (context->regs[REG_MODE_4] & BIT_H40) {
+		if (context->hslot < LINE_CHANGE_H40) {
+			return (LINE_CHANGE_H40 - context->hslot) * MCLKS_SLOT_H40;
+		} else {
+			return vdp_cycles_hslot_wrap_h40(context) + LINE_CHANGE_H40 * MCLKS_SLOT_H40;
+		}
+	} else {
+		if (context->hslot < LINE_CHANGE_H32) {
+			return (LINE_CHANGE_H32 - context->hslot) * MCLKS_SLOT_H32;
+		} else if (context->hslot < 148) {
+			return MCLKS_LINE - (context->hslot - LINE_CHANGE_H32) * MCLKS_SLOT_H32;
+		} else {
+			return (256-context->hslot + LINE_CHANGE_H32) * MCLKS_SLOT_H32;
+		}
+	}
+}
+
+uint32_t vdp_cycles_to_line(vdp_context * context, uint32_t target)
+{
+	uint32_t jump_start, jump_dst;
+	if (context->flags2 & FLAG2_REGION_PAL) {
+		if (context->latched_mode & BIT_PAL) {
+			jump_start = 0x10B;
+			jump_dst = 0x1D2;
+		} else {
+			jump_start = 0x103;
+			jump_dst = 0x1CA;
+		}
+	} else {
+		if (context->latched_mode & BIT_PAL) {
+			jump_start = 0;
+			jump_dst = 0;
+		} else {
+			jump_start = 0xEB;
+			jump_dst = 0x1E5;
+		}
+	}
+	uint32_t lines;
+	if (context->vcounter < target) {
+		if (target < jump_start) {
+			lines = target - context->vcounter;
+		} else {
+			lines = jump_start - context->vcounter + target - jump_dst;
+		}
+	} else {
+		if (context->vcounter < jump_start) {
+			lines = jump_start - context->vcounter + 512 - jump_dst;
+		} else {
+			lines = 512 - context->vcounter;
+		}
+		if (target < jump_start) {
+			lines += target;
+		} else {
+			lines += jump_start + target - jump_dst;
+		}
+	}
+	return MCLKS_LINE * (lines - 1) + vdp_cycles_next_line(context);
+}
+
+uint32_t vdp_frame_end_line(vdp_context * context)
+{
+	uint32_t frame_end;
+	if (context->flags2 & FLAG2_REGION_PAL) {
+		if (context->latched_mode & BIT_PAL) {
+			frame_end = PAL_INACTIVE_START + 8;
+		} else {
+			frame_end = NTSC_INACTIVE_START + 8;
+		}
+	} else {
+		if (context->latched_mode & BIT_PAL) {
+			frame_end = 512;
+		} else {
+			frame_end = NTSC_INACTIVE_START + 8;
+		}
+	}
+	return frame_end;
+}
+
+uint32_t vdp_cycles_to_frame_end(vdp_context * context)
+{
+	return context->cycles + vdp_cycles_to_line(context, vdp_frame_end_line(context));
+}
+
 uint32_t vdp_next_hint(vdp_context * context)
 {
 	if (!(context->regs[REG_MODE_1] & BIT_HINT_EN)) {
 		return 0xFFFFFFFF;
 	}
 	if (context->flags2 & FLAG2_HINT_PENDING) {
-		return context->cycles;
-	}
-	uint32_t active_lines = context->latched_mode & BIT_PAL ? PAL_ACTIVE : NTSC_ACTIVE;
-	uint32_t line = context->cycles / MCLKS_LINE;
-	if (line >= active_lines) {
-		return 0xFFFFFFFF;
+		return context->pending_hint_start;
 	}
-	uint32_t linecyc = context->cycles % MCLKS_LINE;
-	uint32_t hcycle = context->cycles + context->hint_counter * MCLKS_LINE + MCLKS_LINE - linecyc;
-	if (!line) {
-		hcycle += MCLKS_LINE;
+	uint32_t inactive_start = context->latched_mode & BIT_PAL ? PAL_INACTIVE_START : NTSC_INACTIVE_START;
+	uint32_t hint_line;
+	if (context->vcounter + context->hint_counter >= inactive_start) {
+		if (context->regs[REG_HINT] > inactive_start) {
+			return 0xFFFFFFFF;
+		}
+		hint_line = context->regs[REG_HINT];
+	} else {
+		hint_line = context->vcounter + context->hint_counter + 1;
 	}
-	return hcycle;
+
+	return context->cycles + vdp_cycles_to_line(context, hint_line);
 }
 
 uint32_t vdp_next_vint(vdp_context * context)
@@ -1937,31 +2052,42 @@
 		return 0xFFFFFFFF;
 	}
 	if (context->flags2 & FLAG2_VINT_PENDING) {
-		return context->cycles;
+		return context->pending_vint_start;
 	}
-	uint32_t active_lines = context->latched_mode & BIT_PAL ? PAL_ACTIVE : NTSC_ACTIVE;
-	uint32_t vcycle =  MCLKS_LINE * active_lines;
-	if (context->latched_mode & BIT_H40) {
-		vcycle += VINT_CYCLE_H40;
-	} else {
-		vcycle += VINT_CYCLE_H32;
-	}
-	if (vcycle < context->cycles) {
-		return 0xFFFFFFFF;
-	}
-	return vcycle;
+
+
+	return vdp_next_vint_z80(context);
 }
 
 uint32_t vdp_next_vint_z80(vdp_context * context)
 {
-	uint32_t active_lines = context->latched_mode & BIT_PAL ? PAL_ACTIVE : NTSC_ACTIVE;
-	uint32_t vcycle =  MCLKS_LINE * active_lines;
-	if (context->latched_mode & BIT_H40) {
-		vcycle += VINT_CYCLE_H40;
+	uint32_t inactive_start = context->latched_mode & BIT_PAL ? PAL_INACTIVE_START : NTSC_INACTIVE_START;
+	if (context->vcounter == inactive_start) {
+		if (context->regs[REG_MODE_4] & BIT_H40) {
+			if (context->hslot >= LINE_CHANGE_H40) {
+				return context->cycles + vdp_cycles_hslot_wrap_h40(context) + VINT_SLOT_H40 * MCLKS_SLOT_H40;
+			} else if (context->hslot <= VINT_SLOT_H40) {
+				return context->cycles + (VINT_SLOT_H40 - context->hslot) * MCLKS_SLOT_H40;
+			}
+		} else {
+			if (context->hslot >= LINE_CHANGE_H32) {
+				if (context->hslot < 148) {
+					return context->cycles + (VINT_SLOT_H32 + 148 - context->hslot + 256 - 233) * MCLKS_SLOT_H32;
+				} else {
+					return context->cycles + (VINT_SLOT_H32 + 256 - context->hslot) * MCLKS_SLOT_H32;
+				}
+			} else if (context->hslot <= VINT_SLOT_H32) {
+				return context->cycles + (VINT_SLOT_H32 - context->hslot) * MCLKS_SLOT_H32;
+			}
+		}
+	}
+	int32_t cycles_to_vint = vdp_cycles_to_line(context, inactive_start);
+	if (context->regs[REG_MODE_4] & BIT_H40) {
+		cycles_to_vint += MCLKS_LINE - (LINE_CHANGE_H40 - VINT_SLOT_H40) * MCLKS_SLOT_H40;
 	} else {
-		vcycle += VINT_CYCLE_H32;
+		cycles_to_vint += (VINT_SLOT_H32 + 148 - LINE_CHANGE_H32 + 256 - 233) * MCLKS_SLOT_H32;
 	}
-	return vcycle;
+	return context->cycles + cycles_to_vint;
 }
 
 void vdp_int_ack(vdp_context * context, uint16_t int_num)