changeset 1090:a68274a25e2f

Initial stab at implementing the Jaguar object processor
author Michael Pavone <pavone@retrodev.com>
date Sun, 16 Oct 2016 18:25:18 -0700
parents 87597a048d38
children 9a74eb24e53c
files jag_video.c jag_video.h jaguar.c jaguar.h m68k_core.h
diffstat 5 files changed, 505 insertions(+), 15 deletions(-) [+]
line wrap: on
line diff
--- a/jag_video.c	Wed Oct 12 09:39:52 2016 -0700
+++ b/jag_video.c	Sun Oct 16 18:25:18 2016 -0700
@@ -2,6 +2,7 @@
 #include <stdlib.h>
 #include <stdio.h>
 #include "jag_video.h"
+#include "jaguar.h"
 #include "render.h"
 
 enum {
@@ -167,6 +168,325 @@
 	}
 }
 
+enum {
+	OBJ_IDLE,
+	OBJ_FETCH_DESC1,
+	OBJ_FETCH_DESC2,
+	OBJ_FETCH_DESC3,
+	OBJ_PROCESS,
+	OBJ_HEIGHT_WB,
+	OBJ_REMAINDER_WB,
+	OBJ_GPU_WAIT
+};
+
+enum {
+	OBJ_BITMAP,
+	OBJ_SCALED,
+	OBJ_GPU,
+	OBJ_BRANCH,
+	OBJ_STOP
+};
+
+void op_run(jag_video *context)
+{
+	while (context->op.cycles < context->cycles)
+	{
+		switch (context->op.state)
+		{
+		case OBJ_IDLE:
+		case OBJ_GPU_WAIT:
+			context->op.cycles = context->cycles;
+			break;
+		case OBJ_FETCH_DESC1: {
+			uint32_t address = context->regs[VID_OBJLIST1] | context->regs[VID_OBJLIST2] << 16;
+			uint64_t val = jag_read_phrase(context->system, address, &context->op.cycles);
+			address += 8;
+				
+			context->regs[VID_OBJ0] = val >> 48;
+			context->regs[VID_OBJ1] = val >> 32;
+			context->regs[VID_OBJ2] = val >> 16;
+			context->regs[VID_OBJ3] = val;
+			context->op.type = val & 7;
+			context->op.has_prefetch = 0;
+			uint16_t ypos = val >> 3 & 0x7FF;
+			switch (context->op.type)
+			{
+			case OBJ_BITMAP:
+			case OBJ_SCALED: {
+				uint16_t height = val >> 14 & 0x7FF;
+				uint32_t link = (address & 0xC00007) | (val >> 21 & 0x3FFFF8);
+				if ((ypos == 0x7FF || context->regs[VID_VCOUNT] >= ypos) && height) {
+					context->op.state = OBJ_FETCH_DESC2;
+					context->op.obj_start = address - 8;
+					context->op.ypos = ypos;
+					context->op.height = height;
+					context->op.link = link;
+					context->op.data_address = val >> 40 & 0xFFFFF8;
+					context->op.cur_address = context->op.data_address;
+				} else {
+					//object is not visible on this line, advance to next object
+					address = link;
+				}
+				break;
+			}
+			case OBJ_GPU:
+				context->op.state = OBJ_GPU_WAIT;
+				break;
+			case OBJ_BRANCH: {
+				uint8_t branch;
+				switch(val >> 14 & 7)
+				{
+				case 0:
+					branch = ypos == context->regs[VID_VCOUNT] || ypos == 0x7FF;
+					break;
+				case 1:
+					branch = ypos > context->regs[VID_VCOUNT];
+					break;
+				case 2:
+					branch = ypos < context->regs[VID_VCOUNT];
+					break;
+				case 3:
+					branch = context->regs[VID_OBJFLAG] & 1;
+					break;
+				case 4:
+					branch = (context->regs[VID_HCOUNT] & 0x400) != 0;
+					break;
+				default:
+					branch = 0;
+					fprintf(stderr, "Invalid branch CC type %d in object at %X\n", (int)(val >> 14 & 7), address-8);
+					break;
+				}
+				if (branch) {
+					address &= 0xC00007;
+					address |= val >> 21 & 0x3FFFF8;
+				}
+			}
+			case OBJ_STOP:
+				//TODO: trigger interrupt
+				context->op.state = OBJ_IDLE;
+				break;
+			}
+			context->regs[VID_OBJLIST1] = address;
+			context->regs[VID_OBJLIST2] = address >> 16;
+			break;
+		}
+		case OBJ_FETCH_DESC2: {
+			uint32_t address = context->regs[VID_OBJLIST1] | context->regs[VID_OBJLIST2] << 16;
+			uint64_t val = jag_read_phrase(context->system, address, &context->op.cycles);
+			address += 8;
+			
+			context->op.xpos = val & 0xFFF;
+			if (context->op.xpos & 0x800) {
+				context->op.xpos |= 0xF000;
+			}
+			context->op.increment = (val >> 15 & 0x7) * 8;
+			context->op.bpp = 1 << (val >> 12 & 7);
+			if (context->op.bpp == 32) {
+				context->op.bpp = 24;
+			}
+			context->op.line_pitch = (val >> 18 & 0x3FF) * 8;
+			if (context->op.bpp < 8) {
+				context->op.pal_offset = val >> 37;
+				if (context->op.bpp == 4) {
+					context->op.pal_offset &= 0xF0;
+				} else if(context->op.bpp == 2) {
+					context->op.pal_offset &= 0xFC;
+				} else {
+					context->op.pal_offset &= 0xFE;
+				}
+			} else {
+				context->op.pal_offset = 0;
+			}
+			context->op.line_phrases = val >> 28 & 0x3FF;
+			context->op.hflip = (val & (1UL << 45)) != 0;
+			context->op.addpixels = (val & (1UL << 46)) != 0;
+			context->op.transparent = (val & (1UL << 47)) != 0;
+			//TODO: do something with RELEASE flag
+			context->op.leftclip = val >> 49;
+			if (context->op.type == OBJ_SCALED) {
+				context->op.state = OBJ_FETCH_DESC3;
+				switch (context->op.bpp)
+				{
+				case 1:
+					context->op.leftclip &= 0x3F;
+					
+					break;
+				//documentation isn't clear exactly how this works for higher bpp values
+				case 2:
+					context->op.leftclip &= 0x3E;
+					break;
+				case 4:
+					context->op.leftclip &= 0x3C;
+					break;
+				case 8:
+					context->op.leftclip &= 0x38;
+					break;
+				case 16:
+					context->op.leftclip &= 0x30;
+					break;
+				default:
+					context->op.leftclip = 0x20;
+					break;
+				}
+			} else {
+				context->op.state = OBJ_PROCESS;
+				address = context->op.link;
+				switch (context->op.bpp)
+				{
+				case 1:
+					context->op.leftclip &= 0x3E;
+					break;
+				case 2:
+					context->op.leftclip &= 0x3C;
+					break;
+				//values for 4bpp and up are sort of a guess
+				case 4:
+					context->op.leftclip &= 0x38;
+					break;
+				case 8:
+					context->op.leftclip &= 0x30;
+					break;
+				case 16:
+					context->op.leftclip &= 0x20;
+					break;
+				default:
+					context->op.leftclip = 0;
+					break;
+				}
+			}
+			if (context->op.xpos < 0) {
+				int16_t pixels_per_phrase = 64 / context->op.bpp;
+				int16_t clip = -context->op.xpos / pixels_per_phrase;
+				int16_t rem = -context->op.xpos % pixels_per_phrase;
+				if (clip >= context->op.line_phrases) {
+					context->op.line_phrases = 0;
+				} else {
+					context->op.line_phrases -= clip;
+					context->op.leftclip += rem * context->op.bpp;
+					if (context->op.leftclip >= 64) {
+						context->op.line_phrases--;
+						context->op.leftclip -= 64;
+					}
+					
+				}
+			} else if (context->op.bpp < 32){
+				context->op.lb_offset = context->op.xpos;
+			} else {
+				context->op.lb_offset = context->op.xpos * 2;
+			}
+			if (context->op.lb_offset >= LINEBUFFER_WORDS || !context->op.line_phrases) {
+				//ignore objects that are completely offscreen
+				//not sure if that's how the hardware does it, but it would make sense
+				context->op.state = OBJ_FETCH_DESC1;
+				address = context->op.link;
+			}
+			context->regs[VID_OBJLIST1] = address;
+			context->regs[VID_OBJLIST2] = address >> 16;
+			break;
+		}
+		case OBJ_FETCH_DESC3: {
+			uint32_t address = context->regs[VID_OBJLIST1] | context->regs[VID_OBJLIST2] << 16;
+			uint64_t val = jag_read_phrase(context->system, address, &context->op.cycles);
+			
+			context->op.state = OBJ_PROCESS;
+			context->op.hscale = val & 0xFF;;
+			context->op.hremainder = val & 0xFF;
+			context->op.vscale = val >> 8 & 0xFF;
+			context->op.remainder = val >> 16 & 0xFF;
+			
+			context->regs[VID_OBJLIST1] = context->op.link;
+			context->regs[VID_OBJLIST2] = context->op.link >> 16;
+			break;
+		}
+		case OBJ_PROCESS: {
+			uint32_t proc_cycles = 0;
+			if (!context->op.has_prefetch && context->op.line_phrases) {
+				context->op.prefetch = jag_read_phrase(context->system, context->op.cur_address, &proc_cycles);
+				context->op.cur_address += context->op.increment;
+				context->op.has_prefetch = 1;
+				context->op.line_phrases--;
+			}
+			if (!proc_cycles) {
+				//run at least one cycle of writes even if we didn't spend any time reading
+				proc_cycles = 1;
+			}
+			while (proc_cycles)
+			{
+				if (context->op.type == OBJ_SCALED && context->op.hscale) {
+					while (context->op.hremainder <= 0 && context->op.im_bits) {
+						context->op.im_bits -= context->op.bpp;
+						context->op.hremainder += context->op.hscale;
+					}
+				}
+				if (context->op.im_bits) {
+					uint32_t val = context->op.im_data >> (context->op.im_bits - context->op.bpp);
+					val &= (1 << context->op.bpp) - 1;
+					context->op.im_bits -= context->op.bpp;
+					if (context->op.bpp < 16) {
+						val = context->clut[val + context->op.pal_offset];
+					}
+					if (context->op.bpp == 32) {
+						context->write_line_buffer[context->op.lb_offset++] = val >> 16;
+					}
+					context->write_line_buffer[context->op.lb_offset++] = val;
+					if (context->op.type == OBJ_SCALED) {
+						context->op.hremainder -= 0x20;
+					}
+				}
+				if (context->op.im_bits && context->op.bpp < 32 && context->op.type == OBJ_BITMAP && context->op.lb_offset < LINEBUFFER_WORDS) {
+					uint32_t val = context->op.im_data >> (context->op.im_bits - context->op.bpp);
+					val &= (1 << context->op.bpp) - 1;
+					context->op.im_bits -= context->op.bpp;
+					val = context->clut[val + context->op.pal_offset];
+					context->write_line_buffer[context->op.lb_offset++] = val;
+				}
+				context->op_cycles++;
+				proc_cycles--;
+			}
+			if (!context->op.im_bits && context->op.has_prefetch) {
+				context->op.im_data = context->op.prefetch;
+				context->op.has_prefetch = 0;
+				//docs say this is supposed to be a value in pixels
+				//but given the "significant" bits part I'm guessing
+				//this is actually how many bits are pre-shifted off
+				//the first phrase read in a line
+				context->op.im_bits = 64 - context->op.leftclip;
+				context->op.leftclip = 0;
+			}
+			if (context->op.lb_offset == LINEBUFFER_WORDS || (!context->op.im_bits && !context->op.line_phrases)) {
+				context->op.state = OBJ_HEIGHT_WB;
+			}
+			break;
+		}
+		case OBJ_HEIGHT_WB: {
+			if (context->op.type == OBJ_BITMAP) {
+				context->op.height--;
+				context->op.data_address += context->op.line_pitch;
+				context->op.state = OBJ_FETCH_DESC1;
+			} else {
+				context->op.remainder -= 0x20;
+				context->op.state = OBJ_REMAINDER_WB;
+				while (context->op.height && context->op.remainder <= 0) {
+					context->op.height--;
+					context->op.remainder += context->op.vscale;
+					context->op.data_address += context->op.line_pitch;
+				}
+			}
+			uint64_t val = context->op.type | context->op.ypos << 3  | context->op.height << 14
+				| ((uint64_t)context->op.link & 0x3FFFF8) << 21 | ((uint64_t)context->op.data_address) << 40;
+			context->op.cycles += jag_write_phrase(context->system, context->op.obj_start, val);
+			break;
+		}
+		case OBJ_REMAINDER_WB: {
+			uint64_t val = context->op.hscale | context->op.vscale << 8 | context->op.remainder << 16;
+			context->op.cycles += jag_write_phrase(context->system, context->op.obj_start+16, val);
+			context->op.state = OBJ_FETCH_DESC1;
+			break;
+		}
+		}
+	}
+}
+
 void jag_video_run(jag_video *context, uint32_t target_cycle)
 {
 	if (context->regs[VID_VMODE] & BIT_TBGEN) {
@@ -195,9 +515,17 @@
 					context->write_line_buffer[i] = context->regs[VID_BGCOLOR];
 				}
 				
-				//TODO: kick off object processor
+				//kick off object processor
+				context->op.state = OBJ_FETCH_DESC1;
+			} else if (context->regs[VID_HCOUNT] == context->regs[VID_HDISP_END]) {
+				//stob object processor
+				context->op.state = OBJ_IDLE;
 			}
 			
+			context->cycles++;
+			op_run(context);
+			
+			//advance counters
 			if (
 				!context->output 
 				&& context->regs[VID_VCOUNT] == context->regs[VID_VDISP_BEGIN]
@@ -223,7 +551,7 @@
 			} else {
 				context->regs[VID_HCOUNT]++;
 			}
-			context->cycles++;
+			
 		}
 	} else {
 		context->cycles = target_cycle;
--- a/jag_video.h	Wed Oct 12 09:39:52 2016 -0700
+++ b/jag_video.h	Sun Oct 16 18:25:18 2016 -0700
@@ -50,19 +50,55 @@
 #define LINEBUFFER_WORDS 720
 
 typedef struct {
-	uint32_t     *output;
-	uint32_t     output_pitch;
-	uint16_t     regs[JAG_VIDEO_REGS];
+	uint64_t im_data;
+	uint64_t prefetch;
+	uint32_t cycles;
+	uint32_t obj_start;
+	uint32_t link;
+	uint32_t data_address;
+	uint32_t cur_address;
+	uint32_t increment;
+	uint32_t line_pitch;
+	uint32_t lb_offset;
+	int16_t  xpos;
+	uint16_t ypos;
+	uint16_t height;
+	int16_t  hscale;
+	int16_t  vscale;
+	int16_t  hremainder;
+	int16_t  remainder;
+	uint8_t  bpp;
+	uint8_t  line_phrases;
+	uint8_t  state;
+	uint8_t  type;
+	uint8_t  im_bits;
+	uint8_t  pal_offset;
+	uint8_t  has_prefetch;
+	uint8_t  hflip;
+	uint8_t  addpixels;
+	uint8_t  transparent;
+	uint8_t  leftclip;
+} object_processor;
+
+typedef struct {
+	void             *system;
+	uint32_t         *output;
+	uint32_t         output_pitch;
+	uint16_t         regs[JAG_VIDEO_REGS];
 	
-	uint16_t     clut[256];
-	uint16_t     line_buffer_a[LINEBUFFER_WORDS];
-	uint16_t     line_buffer_b[LINEBUFFER_WORDS];
-	uint16_t     *write_line_buffer;
+	uint16_t         clut[256];
+	uint16_t         line_buffer_a[LINEBUFFER_WORDS];
+	uint16_t         line_buffer_b[LINEBUFFER_WORDS];
+	uint16_t         *write_line_buffer;
 	
-	uint32_t     cycles;
-	uint8_t      pclock_div;
-	uint8_t      pclock_counter;
-	uint8_t      mode;
+	uint32_t         cycles;
+	uint32_t         op_cycles;
+	uint8_t          pclock_div;
+	uint8_t          pclock_counter;
+	uint8_t          mode;
+	
+	object_processor op;
+	
 } jag_video;
 
 
--- a/jaguar.c	Wed Oct 12 09:39:52 2016 -0700
+++ b/jaguar.c	Sun Oct 16 18:25:18 2016 -0700
@@ -89,6 +89,21 @@
 						mem_pointers[rom + 2] = system->cart + ((0x400000 & (system->cart_size-1)) >> 1);
 						system->memcon_written = 1;
 						printf("MEMCON1 write - ROMHI: %d\n", value & 1);
+						switch (system->memcon1 >> 3 & 3)
+						{
+						case 0:
+							system->rom_cycles = 10;
+							break;
+						case 1:
+							system->rom_cycles = 8;
+							break;
+						case 2:
+							system->rom_cycles = 6;
+							break;
+						case 3:
+							system->rom_cycles = 5;
+							break;
+						}
 						//TODO: invalidate code cache
 					}
 					system->memcon1 = value;
@@ -239,6 +254,112 @@
 	return 0xFFFF;
 }
 
+uint64_t rom0_read_64(uint32_t address, jaguar_context *system)
+{
+	address &= 0x1FFFFF;
+	uint64_t high = rom0_read_16(address, system);
+	uint64_t highmid = rom0_read_16(address+2, system);
+	uint64_t lowmid = rom0_read_16(address+4, system);
+	uint64_t low = rom0_read_16(address+6, system);
+	return high << 48 | highmid << 32 | lowmid << 16 | low;
+}
+
+void rom0_write_64(uint32_t address, jaguar_context *system, uint64_t val)
+{
+	address &= 0x1FFFFF;
+	rom0_write_16(address, system, val >> 48);
+	rom0_write_16(address+2, system, val >> 32);
+	rom0_write_16(address+4, system, val >> 16);
+	rom0_write_16(address+6, system, val);
+}
+
+uint64_t jag_read_phrase(jaguar_context *system, uint32_t address, uint32_t *cycles)
+{
+	if (!system->memcon_written) {
+		//unsure of timing, but presumably at least 2 32-bit reads 
+		//which presumably take a minimum of 1 cycle
+		//reality probably depends on the exact area read
+		//docs seem to imply some areas only 16-bits wide whereas others are 32-bit
+		*cycles += 2;
+		return rom0_read_64(address, system);
+	}
+	uint16_t *src;
+	if (system->memcon1 & 1) {
+		if (address < 0x800000) {
+			src = system->dram + (address >> 1 & (DRAM_WORDS - 1));
+			//DRAM is 64-bits wide, but sounds like an access is still at least two cycles
+			*cycles += 2;
+		} else if (address < 0xE00000) {
+			//cart is slow and only 32-bits wide
+			*cycles += 2 * (system->rom_cycles);
+			src = system->cart + (address >> 1 & (system->cart_size - 1));
+		} else {
+			*cycles += 2;
+			return rom0_read_64(address, system);
+		}
+	} else if (address > 0x800000) {
+		src = system->dram + (address >> 1 & (DRAM_WORDS - 1));
+		//DRAM is 64-bits wide, but sounds like an access is still at least two cycles
+		*cycles += 2;
+	} else if (address > 0x200000) {
+		//cart is slow and only 32-bits wide
+		*cycles += 2 * (system->rom_cycles);
+		src = system->cart + (address >> 1 & (system->cart_size - 1));
+	} else {
+		*cycles += 2;
+		return rom0_read_64(address, system);
+	}
+	uint64_t high = src[0];
+	uint64_t highmid = src[1];
+	uint64_t lowmid = src[2];
+	uint64_t low = src[3];
+	return high << 48 | highmid << 32 | lowmid << 16 | low;
+}
+
+uint32_t jag_write_phrase(jaguar_context *system, uint32_t address, uint64_t val)
+{
+	if (!system->memcon_written) {
+		//unsure of timing, but presumably at least 2 32-bit reads 
+		//which presumably take a minimum of 1 cycle
+		//reality probably depends on the exact area read
+		//docs seem to imply some areas only 16-bits wide whereas others are 32-bit
+		rom0_write_64(address, system, val);
+		return 2;
+	}
+	uint16_t *dst;
+	uint32_t cycles;
+	if (system->memcon1 & 1) {
+		if (address < 0x800000) {
+			dst = system->dram + (address >> 1 & (DRAM_WORDS - 1));
+			//DRAM is 64-bits wide, but sounds like an access is still at least two cycles
+			cycles = 2;
+		} else if (address < 0xE00000) {
+			dst = system->cart + (address >> 1 & (system->cart_size - 1));
+			//cart is slow and only 32-bits wide
+			cycles = 2 * (system->rom_cycles);
+		} else {
+			rom0_write_64(address, system, val);
+			return 2;
+		}
+	} else if (address > 0x800000) {
+		dst = system->dram + (address >> 1 & (DRAM_WORDS - 1));
+		//DRAM is 64-bits wide, but sounds like an access is still at least two cycles
+		cycles = 2;
+	} else if (address > 0x200000) {
+		dst = system->cart + (address >> 1 & (system->cart_size - 1));
+		//cart is slow and only 32-bits wide
+		cycles = 2 * (system->rom_cycles);
+	} else {
+		rom0_write_64(address, system, val);
+		return 2;
+	}
+	dst[0] = val >> 48;
+	dst[1] = val >> 32;
+	dst[2] = val >> 16;
+	dst[3] = val;
+	return cycles;
+}
+
 m68k_context * sync_components(m68k_context * context, uint32_t address)
 {
 	jaguar_context *system = context->system;
@@ -318,6 +439,7 @@
 	system->m68k = init_68k_context(opts, handle_m68k_reset);
 	system->m68k->system = system;
 	system->video = jag_video_init();
+	system->video->system = system;
 	return system;
 }
 
--- a/jaguar.h	Wed Oct 12 09:39:52 2016 -0700
+++ b/jaguar.h	Sun Oct 16 18:25:18 2016 -0700
@@ -6,7 +6,7 @@
 #define DSP_RAM_BYTES 8192
 
 #include "jag_video.h"
-
+typedef struct m68k_context m68k_context;
 typedef struct {
 	m68k_context *m68k;
 	jag_video    *video;
@@ -16,6 +16,7 @@
 	uint32_t     cart_size;
 	uint32_t     memcon1;
 	uint32_t     memcon2;
+	uint32_t     rom_cycles;
 	uint16_t     write_latch;
 	uint8_t      write_pending;
 	
@@ -26,5 +27,8 @@
 	uint8_t      memcon_written;
 } jaguar_context;
 
+uint64_t jag_read_phrase(jaguar_context *system, uint32_t address, uint32_t *cycles);
+uint32_t jag_write_phrase(jaguar_context *system, uint32_t address, uint64_t value);
+
 
 #endif //JAGUAR_H_
--- a/m68k_core.h	Wed Oct 12 09:39:52 2016 -0700
+++ b/m68k_core.h	Sun Oct 16 18:25:18 2016 -0700
@@ -47,7 +47,7 @@
 	code_ptr		set_ccr;
 } m68k_options;
 
-typedef struct {
+typedef struct m68k_context {
 	uint8_t         flags[5];
 	uint8_t         status;
 	uint16_t        int_ack;