# HG changeset patch # User Michael Pavone # Date 1476667518 25200 # Node ID a68274a25e2fb2ede7488850a83574ae6155e4f3 # Parent 87597a048d380ce703c7bc7a2d2d6609ea296e7d Initial stab at implementing the Jaguar object processor diff -r 87597a048d38 -r a68274a25e2f jag_video.c --- a/jag_video.c Wed Oct 12 09:39:52 2016 -0700 +++ b/jag_video.c Sun Oct 16 18:25:18 2016 -0700 @@ -2,6 +2,7 @@ #include #include #include "jag_video.h" +#include "jaguar.h" #include "render.h" enum { @@ -167,6 +168,325 @@ } } +enum { + OBJ_IDLE, + OBJ_FETCH_DESC1, + OBJ_FETCH_DESC2, + OBJ_FETCH_DESC3, + OBJ_PROCESS, + OBJ_HEIGHT_WB, + OBJ_REMAINDER_WB, + OBJ_GPU_WAIT +}; + +enum { + OBJ_BITMAP, + OBJ_SCALED, + OBJ_GPU, + OBJ_BRANCH, + OBJ_STOP +}; + +void op_run(jag_video *context) +{ + while (context->op.cycles < context->cycles) + { + switch (context->op.state) + { + case OBJ_IDLE: + case OBJ_GPU_WAIT: + context->op.cycles = context->cycles; + break; + case OBJ_FETCH_DESC1: { + uint32_t address = context->regs[VID_OBJLIST1] | context->regs[VID_OBJLIST2] << 16; + uint64_t val = jag_read_phrase(context->system, address, &context->op.cycles); + address += 8; + + context->regs[VID_OBJ0] = val >> 48; + context->regs[VID_OBJ1] = val >> 32; + context->regs[VID_OBJ2] = val >> 16; + context->regs[VID_OBJ3] = val; + context->op.type = val & 7; + context->op.has_prefetch = 0; + uint16_t ypos = val >> 3 & 0x7FF; + switch (context->op.type) + { + case OBJ_BITMAP: + case OBJ_SCALED: { + uint16_t height = val >> 14 & 0x7FF; + uint32_t link = (address & 0xC00007) | (val >> 21 & 0x3FFFF8); + if ((ypos == 0x7FF || context->regs[VID_VCOUNT] >= ypos) && height) { + context->op.state = OBJ_FETCH_DESC2; + context->op.obj_start = address - 8; + context->op.ypos = ypos; + context->op.height = height; + context->op.link = link; + context->op.data_address = val >> 40 & 0xFFFFF8; + context->op.cur_address = context->op.data_address; + } else { + //object is not visible on this line, advance to next object + address = link; + } + break; + } + case OBJ_GPU: + context->op.state = OBJ_GPU_WAIT; + break; + case OBJ_BRANCH: { + uint8_t branch; + switch(val >> 14 & 7) + { + case 0: + branch = ypos == context->regs[VID_VCOUNT] || ypos == 0x7FF; + break; + case 1: + branch = ypos > context->regs[VID_VCOUNT]; + break; + case 2: + branch = ypos < context->regs[VID_VCOUNT]; + break; + case 3: + branch = context->regs[VID_OBJFLAG] & 1; + break; + case 4: + branch = (context->regs[VID_HCOUNT] & 0x400) != 0; + break; + default: + branch = 0; + fprintf(stderr, "Invalid branch CC type %d in object at %X\n", (int)(val >> 14 & 7), address-8); + break; + } + if (branch) { + address &= 0xC00007; + address |= val >> 21 & 0x3FFFF8; + } + } + case OBJ_STOP: + //TODO: trigger interrupt + context->op.state = OBJ_IDLE; + break; + } + context->regs[VID_OBJLIST1] = address; + context->regs[VID_OBJLIST2] = address >> 16; + break; + } + case OBJ_FETCH_DESC2: { + uint32_t address = context->regs[VID_OBJLIST1] | context->regs[VID_OBJLIST2] << 16; + uint64_t val = jag_read_phrase(context->system, address, &context->op.cycles); + address += 8; + + context->op.xpos = val & 0xFFF; + if (context->op.xpos & 0x800) { + context->op.xpos |= 0xF000; + } + context->op.increment = (val >> 15 & 0x7) * 8; + context->op.bpp = 1 << (val >> 12 & 7); + if (context->op.bpp == 32) { + context->op.bpp = 24; + } + context->op.line_pitch = (val >> 18 & 0x3FF) * 8; + if (context->op.bpp < 8) { + context->op.pal_offset = val >> 37; + if (context->op.bpp == 4) { + context->op.pal_offset &= 0xF0; + } else if(context->op.bpp == 2) { + context->op.pal_offset &= 0xFC; + } else { + context->op.pal_offset &= 0xFE; + } + } else { + context->op.pal_offset = 0; + } + context->op.line_phrases = val >> 28 & 0x3FF; + context->op.hflip = (val & (1UL << 45)) != 0; + context->op.addpixels = (val & (1UL << 46)) != 0; + context->op.transparent = (val & (1UL << 47)) != 0; + //TODO: do something with RELEASE flag + context->op.leftclip = val >> 49; + if (context->op.type == OBJ_SCALED) { + context->op.state = OBJ_FETCH_DESC3; + switch (context->op.bpp) + { + case 1: + context->op.leftclip &= 0x3F; + + break; + //documentation isn't clear exactly how this works for higher bpp values + case 2: + context->op.leftclip &= 0x3E; + break; + case 4: + context->op.leftclip &= 0x3C; + break; + case 8: + context->op.leftclip &= 0x38; + break; + case 16: + context->op.leftclip &= 0x30; + break; + default: + context->op.leftclip = 0x20; + break; + } + } else { + context->op.state = OBJ_PROCESS; + address = context->op.link; + switch (context->op.bpp) + { + case 1: + context->op.leftclip &= 0x3E; + break; + case 2: + context->op.leftclip &= 0x3C; + break; + //values for 4bpp and up are sort of a guess + case 4: + context->op.leftclip &= 0x38; + break; + case 8: + context->op.leftclip &= 0x30; + break; + case 16: + context->op.leftclip &= 0x20; + break; + default: + context->op.leftclip = 0; + break; + } + } + if (context->op.xpos < 0) { + int16_t pixels_per_phrase = 64 / context->op.bpp; + int16_t clip = -context->op.xpos / pixels_per_phrase; + int16_t rem = -context->op.xpos % pixels_per_phrase; + if (clip >= context->op.line_phrases) { + context->op.line_phrases = 0; + } else { + context->op.line_phrases -= clip; + context->op.leftclip += rem * context->op.bpp; + if (context->op.leftclip >= 64) { + context->op.line_phrases--; + context->op.leftclip -= 64; + } + + } + } else if (context->op.bpp < 32){ + context->op.lb_offset = context->op.xpos; + } else { + context->op.lb_offset = context->op.xpos * 2; + } + if (context->op.lb_offset >= LINEBUFFER_WORDS || !context->op.line_phrases) { + //ignore objects that are completely offscreen + //not sure if that's how the hardware does it, but it would make sense + context->op.state = OBJ_FETCH_DESC1; + address = context->op.link; + } + context->regs[VID_OBJLIST1] = address; + context->regs[VID_OBJLIST2] = address >> 16; + break; + } + case OBJ_FETCH_DESC3: { + uint32_t address = context->regs[VID_OBJLIST1] | context->regs[VID_OBJLIST2] << 16; + uint64_t val = jag_read_phrase(context->system, address, &context->op.cycles); + + context->op.state = OBJ_PROCESS; + context->op.hscale = val & 0xFF;; + context->op.hremainder = val & 0xFF; + context->op.vscale = val >> 8 & 0xFF; + context->op.remainder = val >> 16 & 0xFF; + + context->regs[VID_OBJLIST1] = context->op.link; + context->regs[VID_OBJLIST2] = context->op.link >> 16; + break; + } + case OBJ_PROCESS: { + uint32_t proc_cycles = 0; + if (!context->op.has_prefetch && context->op.line_phrases) { + context->op.prefetch = jag_read_phrase(context->system, context->op.cur_address, &proc_cycles); + context->op.cur_address += context->op.increment; + context->op.has_prefetch = 1; + context->op.line_phrases--; + } + if (!proc_cycles) { + //run at least one cycle of writes even if we didn't spend any time reading + proc_cycles = 1; + } + while (proc_cycles) + { + if (context->op.type == OBJ_SCALED && context->op.hscale) { + while (context->op.hremainder <= 0 && context->op.im_bits) { + context->op.im_bits -= context->op.bpp; + context->op.hremainder += context->op.hscale; + } + } + if (context->op.im_bits) { + uint32_t val = context->op.im_data >> (context->op.im_bits - context->op.bpp); + val &= (1 << context->op.bpp) - 1; + context->op.im_bits -= context->op.bpp; + if (context->op.bpp < 16) { + val = context->clut[val + context->op.pal_offset]; + } + if (context->op.bpp == 32) { + context->write_line_buffer[context->op.lb_offset++] = val >> 16; + } + context->write_line_buffer[context->op.lb_offset++] = val; + if (context->op.type == OBJ_SCALED) { + context->op.hremainder -= 0x20; + } + } + if (context->op.im_bits && context->op.bpp < 32 && context->op.type == OBJ_BITMAP && context->op.lb_offset < LINEBUFFER_WORDS) { + uint32_t val = context->op.im_data >> (context->op.im_bits - context->op.bpp); + val &= (1 << context->op.bpp) - 1; + context->op.im_bits -= context->op.bpp; + val = context->clut[val + context->op.pal_offset]; + context->write_line_buffer[context->op.lb_offset++] = val; + } + context->op_cycles++; + proc_cycles--; + } + if (!context->op.im_bits && context->op.has_prefetch) { + context->op.im_data = context->op.prefetch; + context->op.has_prefetch = 0; + //docs say this is supposed to be a value in pixels + //but given the "significant" bits part I'm guessing + //this is actually how many bits are pre-shifted off + //the first phrase read in a line + context->op.im_bits = 64 - context->op.leftclip; + context->op.leftclip = 0; + } + if (context->op.lb_offset == LINEBUFFER_WORDS || (!context->op.im_bits && !context->op.line_phrases)) { + context->op.state = OBJ_HEIGHT_WB; + } + break; + } + case OBJ_HEIGHT_WB: { + if (context->op.type == OBJ_BITMAP) { + context->op.height--; + context->op.data_address += context->op.line_pitch; + context->op.state = OBJ_FETCH_DESC1; + } else { + context->op.remainder -= 0x20; + context->op.state = OBJ_REMAINDER_WB; + while (context->op.height && context->op.remainder <= 0) { + context->op.height--; + context->op.remainder += context->op.vscale; + context->op.data_address += context->op.line_pitch; + } + } + uint64_t val = context->op.type | context->op.ypos << 3 | context->op.height << 14 + | ((uint64_t)context->op.link & 0x3FFFF8) << 21 | ((uint64_t)context->op.data_address) << 40; + context->op.cycles += jag_write_phrase(context->system, context->op.obj_start, val); + break; + } + case OBJ_REMAINDER_WB: { + uint64_t val = context->op.hscale | context->op.vscale << 8 | context->op.remainder << 16; + context->op.cycles += jag_write_phrase(context->system, context->op.obj_start+16, val); + context->op.state = OBJ_FETCH_DESC1; + break; + } + } + } +} + void jag_video_run(jag_video *context, uint32_t target_cycle) { if (context->regs[VID_VMODE] & BIT_TBGEN) { @@ -195,9 +515,17 @@ context->write_line_buffer[i] = context->regs[VID_BGCOLOR]; } - //TODO: kick off object processor + //kick off object processor + context->op.state = OBJ_FETCH_DESC1; + } else if (context->regs[VID_HCOUNT] == context->regs[VID_HDISP_END]) { + //stob object processor + context->op.state = OBJ_IDLE; } + context->cycles++; + op_run(context); + + //advance counters if ( !context->output && context->regs[VID_VCOUNT] == context->regs[VID_VDISP_BEGIN] @@ -223,7 +551,7 @@ } else { context->regs[VID_HCOUNT]++; } - context->cycles++; + } } else { context->cycles = target_cycle; diff -r 87597a048d38 -r a68274a25e2f jag_video.h --- a/jag_video.h Wed Oct 12 09:39:52 2016 -0700 +++ b/jag_video.h Sun Oct 16 18:25:18 2016 -0700 @@ -50,19 +50,55 @@ #define LINEBUFFER_WORDS 720 typedef struct { - uint32_t *output; - uint32_t output_pitch; - uint16_t regs[JAG_VIDEO_REGS]; + uint64_t im_data; + uint64_t prefetch; + uint32_t cycles; + uint32_t obj_start; + uint32_t link; + uint32_t data_address; + uint32_t cur_address; + uint32_t increment; + uint32_t line_pitch; + uint32_t lb_offset; + int16_t xpos; + uint16_t ypos; + uint16_t height; + int16_t hscale; + int16_t vscale; + int16_t hremainder; + int16_t remainder; + uint8_t bpp; + uint8_t line_phrases; + uint8_t state; + uint8_t type; + uint8_t im_bits; + uint8_t pal_offset; + uint8_t has_prefetch; + uint8_t hflip; + uint8_t addpixels; + uint8_t transparent; + uint8_t leftclip; +} object_processor; + +typedef struct { + void *system; + uint32_t *output; + uint32_t output_pitch; + uint16_t regs[JAG_VIDEO_REGS]; - uint16_t clut[256]; - uint16_t line_buffer_a[LINEBUFFER_WORDS]; - uint16_t line_buffer_b[LINEBUFFER_WORDS]; - uint16_t *write_line_buffer; + uint16_t clut[256]; + uint16_t line_buffer_a[LINEBUFFER_WORDS]; + uint16_t line_buffer_b[LINEBUFFER_WORDS]; + uint16_t *write_line_buffer; - uint32_t cycles; - uint8_t pclock_div; - uint8_t pclock_counter; - uint8_t mode; + uint32_t cycles; + uint32_t op_cycles; + uint8_t pclock_div; + uint8_t pclock_counter; + uint8_t mode; + + object_processor op; + } jag_video; diff -r 87597a048d38 -r a68274a25e2f jaguar.c --- a/jaguar.c Wed Oct 12 09:39:52 2016 -0700 +++ b/jaguar.c Sun Oct 16 18:25:18 2016 -0700 @@ -89,6 +89,21 @@ mem_pointers[rom + 2] = system->cart + ((0x400000 & (system->cart_size-1)) >> 1); system->memcon_written = 1; printf("MEMCON1 write - ROMHI: %d\n", value & 1); + switch (system->memcon1 >> 3 & 3) + { + case 0: + system->rom_cycles = 10; + break; + case 1: + system->rom_cycles = 8; + break; + case 2: + system->rom_cycles = 6; + break; + case 3: + system->rom_cycles = 5; + break; + } //TODO: invalidate code cache } system->memcon1 = value; @@ -239,6 +254,112 @@ return 0xFFFF; } +uint64_t rom0_read_64(uint32_t address, jaguar_context *system) +{ + address &= 0x1FFFFF; + uint64_t high = rom0_read_16(address, system); + uint64_t highmid = rom0_read_16(address+2, system); + uint64_t lowmid = rom0_read_16(address+4, system); + uint64_t low = rom0_read_16(address+6, system); + return high << 48 | highmid << 32 | lowmid << 16 | low; +} + +void rom0_write_64(uint32_t address, jaguar_context *system, uint64_t val) +{ + address &= 0x1FFFFF; + rom0_write_16(address, system, val >> 48); + rom0_write_16(address+2, system, val >> 32); + rom0_write_16(address+4, system, val >> 16); + rom0_write_16(address+6, system, val); +} + +uint64_t jag_read_phrase(jaguar_context *system, uint32_t address, uint32_t *cycles) +{ + if (!system->memcon_written) { + //unsure of timing, but presumably at least 2 32-bit reads + //which presumably take a minimum of 1 cycle + //reality probably depends on the exact area read + //docs seem to imply some areas only 16-bits wide whereas others are 32-bit + *cycles += 2; + return rom0_read_64(address, system); + } + uint16_t *src; + if (system->memcon1 & 1) { + if (address < 0x800000) { + src = system->dram + (address >> 1 & (DRAM_WORDS - 1)); + //DRAM is 64-bits wide, but sounds like an access is still at least two cycles + *cycles += 2; + } else if (address < 0xE00000) { + //cart is slow and only 32-bits wide + *cycles += 2 * (system->rom_cycles); + src = system->cart + (address >> 1 & (system->cart_size - 1)); + } else { + *cycles += 2; + return rom0_read_64(address, system); + } + } else if (address > 0x800000) { + src = system->dram + (address >> 1 & (DRAM_WORDS - 1)); + //DRAM is 64-bits wide, but sounds like an access is still at least two cycles + *cycles += 2; + } else if (address > 0x200000) { + //cart is slow and only 32-bits wide + *cycles += 2 * (system->rom_cycles); + src = system->cart + (address >> 1 & (system->cart_size - 1)); + } else { + *cycles += 2; + return rom0_read_64(address, system); + } + uint64_t high = src[0]; + uint64_t highmid = src[1]; + uint64_t lowmid = src[2]; + uint64_t low = src[3]; + return high << 48 | highmid << 32 | lowmid << 16 | low; +} + +uint32_t jag_write_phrase(jaguar_context *system, uint32_t address, uint64_t val) +{ + if (!system->memcon_written) { + //unsure of timing, but presumably at least 2 32-bit reads + //which presumably take a minimum of 1 cycle + //reality probably depends on the exact area read + //docs seem to imply some areas only 16-bits wide whereas others are 32-bit + rom0_write_64(address, system, val); + return 2; + } + uint16_t *dst; + uint32_t cycles; + if (system->memcon1 & 1) { + if (address < 0x800000) { + dst = system->dram + (address >> 1 & (DRAM_WORDS - 1)); + //DRAM is 64-bits wide, but sounds like an access is still at least two cycles + cycles = 2; + } else if (address < 0xE00000) { + dst = system->cart + (address >> 1 & (system->cart_size - 1)); + //cart is slow and only 32-bits wide + cycles = 2 * (system->rom_cycles); + } else { + rom0_write_64(address, system, val); + return 2; + } + } else if (address > 0x800000) { + dst = system->dram + (address >> 1 & (DRAM_WORDS - 1)); + //DRAM is 64-bits wide, but sounds like an access is still at least two cycles + cycles = 2; + } else if (address > 0x200000) { + dst = system->cart + (address >> 1 & (system->cart_size - 1)); + //cart is slow and only 32-bits wide + cycles = 2 * (system->rom_cycles); + } else { + rom0_write_64(address, system, val); + return 2; + } + dst[0] = val >> 48; + dst[1] = val >> 32; + dst[2] = val >> 16; + dst[3] = val; + return cycles; +} + m68k_context * sync_components(m68k_context * context, uint32_t address) { jaguar_context *system = context->system; @@ -318,6 +439,7 @@ system->m68k = init_68k_context(opts, handle_m68k_reset); system->m68k->system = system; system->video = jag_video_init(); + system->video->system = system; return system; } diff -r 87597a048d38 -r a68274a25e2f jaguar.h --- a/jaguar.h Wed Oct 12 09:39:52 2016 -0700 +++ b/jaguar.h Sun Oct 16 18:25:18 2016 -0700 @@ -6,7 +6,7 @@ #define DSP_RAM_BYTES 8192 #include "jag_video.h" - +typedef struct m68k_context m68k_context; typedef struct { m68k_context *m68k; jag_video *video; @@ -16,6 +16,7 @@ uint32_t cart_size; uint32_t memcon1; uint32_t memcon2; + uint32_t rom_cycles; uint16_t write_latch; uint8_t write_pending; @@ -26,5 +27,8 @@ uint8_t memcon_written; } jaguar_context; +uint64_t jag_read_phrase(jaguar_context *system, uint32_t address, uint32_t *cycles); +uint32_t jag_write_phrase(jaguar_context *system, uint32_t address, uint64_t value); + #endif //JAGUAR_H_ diff -r 87597a048d38 -r a68274a25e2f m68k_core.h --- a/m68k_core.h Wed Oct 12 09:39:52 2016 -0700 +++ b/m68k_core.h Sun Oct 16 18:25:18 2016 -0700 @@ -47,7 +47,7 @@ code_ptr set_ccr; } m68k_options; -typedef struct { +typedef struct m68k_context { uint8_t flags[5]; uint8_t status; uint16_t int_ack;