changeset 460:788ba843a731

Implement FIFO latency and improve DMA accuracy
author Mike Pavone <pavone@retrodev.com>
date Tue, 10 Sep 2013 00:29:46 -0700
parents 249d24973682
children 6221f8f534fa
files vdp.c vdp.h
diffstat 2 files changed, 122 insertions(+), 165 deletions(-) [+]
line wrap: on
line diff
--- a/vdp.c	Wed Sep 04 19:34:19 2013 -0700
+++ b/vdp.c	Tue Sep 10 00:29:46 2013 -0700
@@ -27,6 +27,7 @@
 #define HSYNC_END_H32   (33 * MCLKS_SLOT_H32)
 #define HBLANK_CLEAR_H40 (MCLK_WEIRD_END+61*4)
 #define HBLANK_CLEAR_H32 (HSYNC_END_H32 + 46*5)
+#define FIFO_LATENCY    3
 
 int32_t color_map[1 << 12];
 uint8_t levels[] = {0, 27, 49, 71, 87, 103, 119, 130, 146, 157, 174, 190, 206, 228, 255};
@@ -121,6 +122,19 @@
 	}
 }
 
+int is_refresh(vdp_context * context, uint32_t slot)
+{
+	if (context->latched_mode & BIT_H40) {
+		return (slot == 37 || slot == 69 || slot == 102 || slot == 133 || slot == 165 || slot == 197 || slot >= 210);
+	} else {
+		//TODO: Figure out which slots are refresh when display is off in 32-cell mode
+		//These numbers are guesses based on H40 numbers
+		return (slot == 24 || slot == 56 || slot == 88 || slot == 120 || slot == 152);
+		//The numbers below are the refresh slots during active display
+		//return (slot == 66 || slot == 98 || slot == 130 || slot == 162);
+	}
+}
+
 void render_sprite_cells(vdp_context * context)
 {
 	if (context->cur_slot >= context->sprite_draws) {
@@ -371,162 +385,115 @@
 
 void external_slot(vdp_context * context)
 {
+	fifo_entry * start = (context->fifo_end - FIFO_SIZE);
+	if (context->fifo_cur != start && start->cycle <= context->cycles) {
+		switch (start->cd & 0xF)
+		{
+		case VRAM_WRITE:
+			if (start->partial) {
+				//printf("VRAM Write: %X to %X\n", start->value, context->address ^ 1);
+				context->vdpmem[start->address ^ 1] = start->value;
+			} else {
+				//printf("VRAM Write High: %X to %X\n", start->value >> 8, context->address);
+				context->vdpmem[start->address] = start->value >> 8;
+				start->partial = 1;
+				//skip auto-increment and removal of entry from fifo
+				return;
+			}
+			break;
+		case CRAM_WRITE: {
+			//printf("CRAM Write | %X to %X\n", start->value, (start->address/2) & (CRAM_SIZE-1));
+			write_cram(context, start->address, start->value);
+			break;
+		}
+		case VSRAM_WRITE:
+			if (((start->address/2) & 63) < VSRAM_SIZE) {
+				//printf("VSRAM Write: %X to %X\n", start->value, context->address);
+				context->vsram[(start->address/2) & 63] = start->value;
+			}
+
+			break;
+		}
+		fifo_entry * cur = start+1;
+		if (cur < context->fifo_cur) {
+			memmove(start, cur, sizeof(fifo_entry) * (context->fifo_cur - cur));
+		}
+		context->fifo_cur -= 1;
+	} else {
+		context->flags |= FLAG_UNUSED_SLOT;
+	}
+}
+
+void run_dma_src(vdp_context * context, uint32_t slot)
+{
 	//TODO: Figure out what happens if CD bit 4 is not set in DMA copy mode
 	//TODO: Figure out what happens when CD:0-3 is not set to a write mode in DMA operations
 	//TODO: Figure out what happens if DMA gets disabled part way through a DMA fill or DMA copy
-	if(context->flags & FLAG_DMA_RUN) {
-		uint16_t dma_len;
-		switch(context->regs[REG_DMASRC_H] & 0xC0)
-		{
-		//68K -> VDP
-		case 0:
-		case 0x40:
+	if (context->fifo_cur == context->fifo_end) {
+		return;
+	}
+	uint16_t read_val;
+	uint8_t ran_source = 0, partial = 0;
+	uint16_t dma_len;
+	switch(context->regs[REG_DMASRC_H] & 0xC0)
+	{
+	//68K -> VDP
+	case 0:
+	case 0x40:
+		if (!slot || !is_refresh(context, slot-1)) {
+			read_val = read_dma_value((context->regs[REG_DMASRC_H] << 16) | (context->regs[REG_DMASRC_M] << 8) | context->regs[REG_DMASRC_L]);
+			ran_source = 1;
+		}
+		break;
+	//Copy
+	case 0xC0:
+		if (context->flags & FLAG_UNUSED_SLOT) {
 			switch(context->dma_cd & 0xF)
 			{
 			case VRAM_WRITE:
-				if (context->flags & FLAG_DMA_PROG) {
-					context->vdpmem[context->address ^ 1] = context->dma_val;
-					context->flags &= ~FLAG_DMA_PROG;
-				} else {
-					context->dma_val = read_dma_value((context->regs[REG_DMASRC_H] << 16) | (context->regs[REG_DMASRC_M] << 8) | context->regs[REG_DMASRC_L]);
-					context->vdpmem[context->address] = context->dma_val >> 8;
-					context->flags |= FLAG_DMA_PROG;
-				}
-				break;
-			case CRAM_WRITE: {
-				write_cram(context, context->address, read_dma_value((context->regs[REG_DMASRC_H] << 16) | (context->regs[REG_DMASRC_M] << 8) | context->regs[REG_DMASRC_L]));
-				//printf("CRAM DMA | %X set to %X from %X at %d\n", (context->address/2) & (CRAM_SIZE-1), context->cram[(context->address/2) & (CRAM_SIZE-1)], (context->regs[REG_DMASRC_H] << 17) | (context->regs[REG_DMASRC_M] << 9) | (context->regs[REG_DMASRC_L] << 1), context->cycles);
-				break;
-			}
-			case VSRAM_WRITE:
-				if (((context->address/2) & 63) < VSRAM_SIZE) {
-					context->vsram[(context->address/2) & 63] = read_dma_value((context->regs[REG_DMASRC_H] << 16) | (context->regs[REG_DMASRC_M] << 8) | context->regs[REG_DMASRC_L]);
-				}
-				break;
-			}
-			break;
-		//Fill
-		case 0x80:
-			switch(context->dma_cd & 0xF)
-			{
-			case VRAM_WRITE:
-				//Charles MacDonald's VDP doc says that the low byte gets written first
-				context->vdpmem[context->address] = context->dma_val;
-				context->dma_val = (context->dma_val << 8) | ((context->dma_val >> 8) & 0xFF);
+				read_val = context->vdpmem[(context->regs[REG_DMASRC_M] << 8) | context->regs[REG_DMASRC_L]];
 				break;
 			case CRAM_WRITE:
-				write_cram(context, context->address, context->dma_val);
-				//printf("CRAM DMA Fill | %X set to %X at %d\n", (context->address/2) & (CRAM_SIZE-1), context->cram[(context->address/2) & (CRAM_SIZE-1)], context->cycles);
+				read_val = context->cram[context->regs[REG_DMASRC_L] & (CRAM_SIZE-1)];
 				break;
 			case VSRAM_WRITE:
-				if (((context->address/2) & 63) < VSRAM_SIZE) {
-					context->vsram[(context->address/2) & 63] = context->dma_val;
+				if ((context->regs[REG_DMASRC_L] & 63) < VSRAM_SIZE) {
+					read_val = context->vsram[context->regs[REG_DMASRC_L] & 63];
+				} else {
+					read_val = 0;
 				}
 				break;
 			}
-			break;
-		//Copy
-		case 0xC0:
-			if (context->flags & FLAG_DMA_PROG) {
-				switch(context->dma_cd & 0xF)
-				{
-				case VRAM_WRITE:
-					context->vdpmem[context->address] = context->dma_val;
-					break;
-				case CRAM_WRITE: {
-					write_cram(context, context->address, context->dma_val);
-					//printf("CRAM DMA Copy | %X set to %X from %X at %d\n", (context->address/2) & (CRAM_SIZE-1), context->cram[(context->address/2) & (CRAM_SIZE-1)], context->regs[REG_DMASRC_L] & (CRAM_SIZE-1), context->cycles);
-					break;
-				}
-				case VSRAM_WRITE:
-					if (((context->address/2) & 63) < VSRAM_SIZE) {
-						context->vsram[(context->address/2) & 63] = context->dma_val;
-					}
-					break;
-				}
-				context->flags &= ~FLAG_DMA_PROG;
-			} else {
-				//I assume, that DMA copy copies from the same RAM as the destination
-				//but it's possible I'm mistaken
-				switch(context->dma_cd & 0xF)
-				{
-				case VRAM_WRITE:
-					context->dma_val = context->vdpmem[(context->regs[REG_DMASRC_M] << 8) | context->regs[REG_DMASRC_L]];
-					break;
-				case CRAM_WRITE:
-					context->dma_val = context->cram[context->regs[REG_DMASRC_L] & (CRAM_SIZE-1)];
-					break;
-				case VSRAM_WRITE:
-					if ((context->regs[REG_DMASRC_L] & 63) < VSRAM_SIZE) {
-						context->dma_val = context->vsram[context->regs[REG_DMASRC_L] & 63];
-					} else {
-						context->dma_val = 0;
-					}
-					break;
-				}
-				context->flags |= FLAG_DMA_PROG;
-			}
-			break;
+			ran_source = 1;
+			context->flags &= ~FLAG_UNUSED_SLOT;
 		}
-		if (!(context->flags & FLAG_DMA_PROG)) {
-			context->address += context->regs[REG_AUTOINC];
-			context->regs[REG_DMASRC_L] += 1;
-			if (!context->regs[REG_DMASRC_L]) {
-				context->regs[REG_DMASRC_M] += 1;
-			}
-			dma_len = ((context->regs[REG_DMALEN_H] << 8) | context->regs[REG_DMALEN_L]) - 1;
-			context->regs[REG_DMALEN_H] = dma_len >> 8;
-			context->regs[REG_DMALEN_L] = dma_len;
-			if (!dma_len) {
-				//printf("DMA end at cycle %d\n", context->cycles);
-				context->flags &= ~FLAG_DMA_RUN;
-			}
+		break;
+	case 0x80:
+		read_val = (context->cd & 0xF) == VRAM_WRITE ? context->last_write_val >> 8 : context->last_write_val;
+		partial = 1;
+		ran_source = 1;
+		break;
+	}
+
+	if (ran_source) {
+		context->fifo_cur->cycle = context->cycles + ((context->latched_mode & BIT_H40) ? 16 : 20)*FIFO_LATENCY;
+		context->fifo_cur->address = context->address;
+		context->fifo_cur->value = read_val;
+		context->fifo_cur->cd = context->cd;
+		context->fifo_cur->partial = partial;
+		context->fifo_cur++;
+		context->regs[REG_DMASRC_L] += 1;
+		if (!context->regs[REG_DMASRC_L]) {
+			context->regs[REG_DMASRC_M] += 1;
 		}
-	} else {
-		fifo_entry * start = (context->fifo_end - FIFO_SIZE);
-		if (context->fifo_cur != start && start->cycle <= context->cycles) {
-			if ((context->regs[REG_MODE_2] & BIT_DMA_ENABLE) && (context->cd & DMA_START) && (context->regs[REG_DMASRC_H] & 0xC0) == 0x80) {
-				//printf("DMA fill started at %d\n", context->cycles);
-				context->flags |= FLAG_DMA_RUN;
-				context->dma_val = start->value;
-				context->address = start->address; //undo auto-increment
-				context->dma_cd = context->cd;
-			} else {
-				switch (start->cd & 0xF)
-				{
-				case VRAM_WRITE:
-					if (start->partial) {
-						//printf("VRAM Write: %X to %X\n", start->value, context->address ^ 1);
-						context->vdpmem[start->address ^ 1] = start->value;
-					} else {
-						//printf("VRAM Write High: %X to %X\n", start->value >> 8, context->address);
-						context->vdpmem[start->address] = start->value >> 8;
-						start->partial = 1;
-						//skip auto-increment and removal of entry from fifo
-						return;
-					}
-					break;
-				case CRAM_WRITE: {
-					//printf("CRAM Write | %X to %X\n", start->value, (start->address/2) & (CRAM_SIZE-1));
-					write_cram(context, start->address, start->value);
-					break;
-				}
-				case VSRAM_WRITE:
-					if (((start->address/2) & 63) < VSRAM_SIZE) {
-						//printf("VSRAM Write: %X to %X\n", start->value, context->address);
-						context->vsram[(start->address/2) & 63] = start->value;
-					}
-					break;
-				}
-				//context->address += context->regs[REG_AUTOINC];
-			}
-			fifo_entry * cur = start+1;
-			if (cur < context->fifo_cur) {
-				memmove(start, cur, sizeof(fifo_entry) * (context->fifo_cur - cur));
-			}
-			context->fifo_cur -= 1;
-		} else {
-			context->flags |= FLAG_UNUSED_SLOT;
+		context->address += context->regs[REG_AUTOINC];
+		dma_len = ((context->regs[REG_DMALEN_H] << 8) | context->regs[REG_DMALEN_L]) - 1;
+		context->regs[REG_DMALEN_H] = dma_len >> 8;
+		context->regs[REG_DMALEN_L] = dma_len;
+		if (!dma_len) {
+			//printf("DMA end at cycle %d\n", context->cycles);
+			context->flags &= ~FLAG_DMA_RUN;
+			context->cd &= 0xF;
 		}
 	}
 }
@@ -1238,27 +1205,6 @@
 	context->latched_mode = (context->regs[REG_MODE_4] & 0x81) | (context->regs[REG_MODE_2] & BIT_PAL);
 }
 
-int is_refresh(vdp_context * context, uint32_t slot)
-{
-	if (context->latched_mode & BIT_H40) {
-		//TODO: Determine behavior for DMA fills and copies
-		return (slot == 37 || slot == 69 || slot == 102 || slot == 133 || slot == 165 || slot == 197 || slot >= 210
-		        || ((context->flags & FLAG_DMA_RUN) && ((context->dma_cd & 0xF) != VRAM_WRITE)) && (
-		            //both of the missed reads occurred right next to each other, but there seems
-					//to be some buffering involved, these values produce similar artifacts
-					//to what I see on my Model 2
-				    slot == 34 || slot == 66 || slot == 99 || slot == 130 || slot == 162 || slot == 194));
-	} else {
-		//TODO: Figure out which slots are refresh when display is off in 32-cell mode
-		//These numbers are guesses based on H40 numbers
-		return (slot == 24 || slot == 56 || slot == 88 || slot == 120 || slot == 152
-		        || ((context->flags & FLAG_DMA_RUN) && ((context->dma_cd & 0xF) != VRAM_WRITE)) && (
-				    slot == 21 || slot == 53 || slot == 85 || slot == 117 || slot == 149));
-		//The numbers below are the refresh slots during active display
-		//return (slot == 66 || slot == 98 || slot == 130 || slot == 162);
-	}
-}
-
 void check_render_bg(vdp_context * context, int32_t line, uint32_t slot)
 {
 	if (line > 0) {
@@ -1299,6 +1245,7 @@
 {
 	while(context->cycles < target_cycles)
 	{
+		context->flags &= ~FLAG_UNUSED_SLOT;
 		uint32_t line = context->cycles / MCLKS_LINE;
 		uint32_t active_lines = context->latched_mode & BIT_PAL ? PAL_ACTIVE : NTSC_ACTIVE;
 		if (!context->cycles) {
@@ -1428,6 +1375,9 @@
 				check_render_bg(context, line, slot);
 			}
 		}
+		if (context->flags & FLAG_DMA_RUN && !is_refresh(context, slot)) {
+			run_dma_src(context, slot);
+		}
 		context->cycles += inccycles;
 	}
 }
@@ -1522,7 +1472,7 @@
 int vdp_data_port_write(vdp_context * context, uint16_t value)
 {
 	//printf("data port write: %X at %d\n", value, context->cycles);
-	if (context->flags & FLAG_DMA_RUN) {
+	if (context->flags & FLAG_DMA_RUN && (context->regs[REG_DMASRC_H] & 0xC0) != 0x80) {
 		return -1;
 	}
 	if (!(context->cd & 1)) {
@@ -1533,12 +1483,19 @@
 	/*if (context->fifo_cur == context->fifo_end) {
 		printf("FIFO full, waiting for space before next write at cycle %X\n", context->cycles);
 	}*/
+	if (context->cd & 0x20 && (context->regs[REG_DMASRC_H] & 0xC0) == 0x80) {
+		context->flags &= ~FLAG_DMA_RUN;
+	}
 	while (context->fifo_cur == context->fifo_end) {
 		vdp_run_context(context, context->cycles + ((context->latched_mode & BIT_H40) ? 16 : 20));
 	}
-	context->fifo_cur->cycle = context->cycles;
+	context->fifo_cur->cycle = context->cycles + ((context->latched_mode & BIT_H40) ? 16 : 20)*FIFO_LATENCY;
 	context->fifo_cur->address = context->address;
 	context->fifo_cur->value = value;
+	context->last_write_val = value;
+	if (context->cd & 0x20 && (context->regs[REG_DMASRC_H] & 0xC0) == 0x80) {
+		context->flags |= FLAG_DMA_RUN;
+	}
 	context->fifo_cur->cd = context->cd;
 	context->fifo_cur->partial = 0;
 	context->fifo_cur++;
--- a/vdp.h	Wed Sep 04 19:34:19 2013 -0700
+++ b/vdp.h	Tue Sep 10 00:29:46 2013 -0700
@@ -143,7 +143,7 @@
 	sprite_info sprite_info_list[MAX_SPRITES_LINE];
 	uint16_t    col_1;
 	uint16_t    col_2;
-	uint16_t    dma_val;
+	uint16_t    last_write_val;
 	uint8_t     v_offset;
 	uint8_t     dma_cd;
 	uint8_t     hint_counter;