# HG changeset patch # User Michael Pavone # Date 1697524204 25200 # Node ID f8b5142c06aa47d77b7bc2bfcbbbf2749d21d5d7 # Parent f0fc6c09517df79fc416167712bbd3be0c8ad8f3 Allow 68K to return mid-instruction. Adjust how 68K interrupt ack works so int2 busy flag timing is more correct. Fix some other SCD timing issues diff -r f0fc6c09517d -r f8b5142c06aa genesis.c --- a/genesis.c Fri Oct 13 22:44:36 2023 -0700 +++ b/genesis.c Mon Oct 16 23:30:04 2023 -0700 @@ -471,6 +471,49 @@ #define REFRESH_INTERVAL 128 #define REFRESH_DELAY 2 +void gen_update_refresh(m68k_context *context) +{ + uint32_t interval = MCLKS_PER_68K * REFRESH_INTERVAL; + genesis_context *gen = context->system; + gen->refresh_counter += context->current_cycle - gen->last_sync_cycle; + gen->last_sync_cycle = context->current_cycle; + context->current_cycle += REFRESH_DELAY * MCLKS_PER_68K * (gen->refresh_counter / interval); + gen->refresh_counter = gen->refresh_counter % interval; +} + +void gen_update_refresh_free_access(m68k_context *context) +{ + genesis_context *gen = context->system; + uint32_t before = context->current_cycle - 4*MCLKS_PER_68K; + if (before < gen->last_sync_cycle) { + return; + } + //Add refresh delays for any accesses that happened beofre the current one + gen->refresh_counter += before - gen->last_sync_cycle; + uint32_t interval = MCLKS_PER_68K * REFRESH_INTERVAL; + uint32_t delay = REFRESH_DELAY * MCLKS_PER_68K * (gen->refresh_counter / interval); + if (delay) { + //To avoid the extra cycles being absorbed in the refresh free update below, we need to update again + gen->refresh_counter = gen->refresh_counter % interval; + gen->refresh_counter += delay; + delay += REFRESH_DELAY * MCLKS_PER_68K * (gen->refresh_counter / interval); + context->current_cycle += delay; + } + gen->last_sync_cycle = context->current_cycle; + //advance refresh counter for the current access, but don't generate delays + gen->refresh_counter += 4*MCLKS_PER_68K; + gen->refresh_counter = gen->refresh_counter % interval; +} + +void gen_update_refresh_no_wait(m68k_context *context) +{ + uint32_t interval = MCLKS_PER_68K * REFRESH_INTERVAL; + genesis_context *gen = context->system; + gen->refresh_counter += context->current_cycle - gen->last_sync_cycle; + gen->last_sync_cycle = context->current_cycle; + gen->refresh_counter = gen->refresh_counter % interval; +} + #include #define ADJUST_BUFFER (8*MCLKS_LINE*313) #define MAX_NO_ADJUST (UINT_MAX-ADJUST_BUFFER) @@ -480,12 +523,11 @@ genesis_context * gen = context->system; vdp_context * v_context = gen->vdp; z80_context * z_context = gen->z80; - //lame estimation of refresh cycle delay - gen->refresh_counter += context->current_cycle - gen->last_sync_cycle; - if (!gen->bus_busy) { - context->current_cycle += REFRESH_DELAY * MCLKS_PER_68K * (gen->refresh_counter / (MCLKS_PER_68K * REFRESH_INTERVAL)); + if (gen->bus_busy) { + gen_update_refresh_no_wait(context); + } else { + gen_update_refresh(context); } - gen->refresh_counter = gen->refresh_counter % (MCLKS_PER_68K * REFRESH_INTERVAL); uint32_t mclks = context->current_cycle; sync_z80(gen, mclks); @@ -561,11 +603,6 @@ gen->frame_end = vdp_cycles_to_frame_end(v_context); context->sync_cycle = gen->frame_end; //printf("Set sync cycle to: %d @ %d, vcounter: %d, hslot: %d\n", context->sync_cycle, context->current_cycle, v_context->vcounter, v_context->hslot); - if (context->int_ack) { - //printf("acknowledging %d @ %d:%d, vcounter: %d, hslot: %d\n", context->int_ack, context->current_cycle, v_context->cycles, v_context->vcounter, v_context->hslot); - vdp_int_ack(v_context); - context->int_ack = 0; - } if (!address && (gen->header.enter_debugger || gen->header.save_state)) { context->sync_cycle = context->current_cycle + 1; } @@ -627,7 +664,26 @@ context->sync_cycle = context->current_cycle + 1; } } - gen->last_sync_cycle = context->current_cycle; + return context; +} + +static m68k_context *int_ack(m68k_context *context) +{ + genesis_context * gen = context->system; + vdp_context * v_context = gen->vdp; + //printf("acknowledging %d @ %d:%d, vcounter: %d, hslot: %d\n", context->int_ack, context->current_cycle, v_context->cycles, v_context->vcounter, v_context->hslot); + vdp_run_context(v_context, context->current_cycle); + vdp_int_ack(v_context); + + //the Genesis responds to these exclusively with !VPA which means its a slow + //6800 operation. documentation says these can take between 10 and 19 cycles. + //actual results measurements seem to suggest it's actually between 9 and 18 + //Base 68K core has added 4 cycles for a normal int ack cycle already + //We add 5 + the current cycle count (in 68K cycles) mod 10 to simulate the + //additional variable delay from the use of the 6800 cycle + uint32_t cycle_count = context->current_cycle / context->options->gen.clock_divider; + context->current_cycle += 5 + (cycle_count % 10); + return context; } @@ -644,10 +700,7 @@ //printf("vdp_port write: %X, value: %X, cycle: %d\n", vdp_port, value, context->current_cycle); //do refresh check here so we can avoid adding a penalty for a refresh that happens during a VDP access - gen->refresh_counter += context->current_cycle - 4*MCLKS_PER_68K - gen->last_sync_cycle; - context->current_cycle += REFRESH_DELAY * MCLKS_PER_68K * (gen->refresh_counter / (MCLKS_PER_68K * REFRESH_INTERVAL)); - gen->refresh_counter = gen->refresh_counter % (MCLKS_PER_68K * REFRESH_INTERVAL); - gen->last_sync_cycle = context->current_cycle; + gen_update_refresh_free_access(context); sync_components(context, 0); vdp_context *v_context = gen->vdp; @@ -726,16 +779,14 @@ vdp_test_port_write(gen->vdp, value); } - gen->last_sync_cycle -= 4 * MCLKS_PER_68K; //refresh may have happened while we were waiting on the VDP, //so advance refresh_counter but don't add any delays if (vdp_port >= 4 && vdp_port < 8 && v_context->cycles != before_cycle) { gen->refresh_counter = 0; + gen->last_sync_cycle = context->current_cycle; } else { - gen->refresh_counter += (context->current_cycle - gen->last_sync_cycle); - gen->refresh_counter = gen->refresh_counter % (MCLKS_PER_68K * REFRESH_INTERVAL); + gen_update_refresh_no_wait(context); } - gen->last_sync_cycle = context->current_cycle; return context; } @@ -785,10 +836,7 @@ uint16_t value; //do refresh check here so we can avoid adding a penalty for a refresh that happens during a VDP access - gen->refresh_counter += context->current_cycle - 4*MCLKS_PER_68K - gen->last_sync_cycle; - context->current_cycle += REFRESH_DELAY * MCLKS_PER_68K * (gen->refresh_counter / (MCLKS_PER_68K * REFRESH_INTERVAL)); - gen->refresh_counter = gen->refresh_counter % (MCLKS_PER_68K * REFRESH_INTERVAL); - gen->last_sync_cycle = context->current_cycle; + gen_update_refresh_free_access(context); sync_components(context, 0); vdp_context * v_context = gen->vdp; @@ -816,12 +864,9 @@ gen->bus_busy = 0; } - gen->last_sync_cycle -= 4 * MCLKS_PER_68K; //refresh may have happened while we were waiting on the VDP, //so advance refresh_counter but don't add any delays - gen->refresh_counter += (context->current_cycle - gen->last_sync_cycle); - gen->refresh_counter = gen->refresh_counter % (MCLKS_PER_68K * REFRESH_INTERVAL); - gen->last_sync_cycle = context->current_cycle; + gen_update_refresh_no_wait(context); return value; } @@ -882,10 +927,7 @@ genesis_context * gen = context->system; //do refresh check here so we can avoid adding a penalty for a refresh that happens during an IO area access - gen->refresh_counter += context->current_cycle - 4*MCLKS_PER_68K - gen->last_sync_cycle; - context->current_cycle += REFRESH_DELAY * MCLKS_PER_68K * (gen->refresh_counter / (MCLKS_PER_68K * REFRESH_INTERVAL)); - gen->refresh_counter = gen->refresh_counter % (MCLKS_PER_68K * REFRESH_INTERVAL); - gen->last_sync_cycle = context->current_cycle - 4*MCLKS_PER_68K; + gen_update_refresh_free_access(context); if (location < 0x10000) { //Access to Z80 memory incurs a one 68K cycle wait state @@ -1014,8 +1056,7 @@ } //no refresh delays during IO access - gen->refresh_counter += context->current_cycle - gen->last_sync_cycle; - gen->refresh_counter = gen->refresh_counter % (MCLKS_PER_68K * REFRESH_INTERVAL); + gen_update_refresh_no_wait(context); return context; } @@ -1041,10 +1082,7 @@ genesis_context *gen = context->system; //do refresh check here so we can avoid adding a penalty for a refresh that happens during an IO area access - gen->refresh_counter += context->current_cycle - 4*MCLKS_PER_68K - gen->last_sync_cycle; - context->current_cycle += REFRESH_DELAY * MCLKS_PER_68K * (gen->refresh_counter / (MCLKS_PER_68K * REFRESH_INTERVAL)); - gen->refresh_counter = gen->refresh_counter % (MCLKS_PER_68K * REFRESH_INTERVAL); - gen->last_sync_cycle = context->current_cycle - 4*MCLKS_PER_68K; + gen_update_refresh_free_access(context); if (location < 0x10000) { //Access to Z80 memory incurs a one 68K cycle wait state @@ -1143,9 +1181,7 @@ } //no refresh delays during IO access - gen->refresh_counter += context->current_cycle - gen->last_sync_cycle; - gen->refresh_counter = gen->refresh_counter % (MCLKS_PER_68K * REFRESH_INTERVAL); - gen->last_sync_cycle = context->current_cycle; + gen_update_refresh_no_wait(context); return value; } @@ -2327,7 +2363,7 @@ info.map = gen->header.info.map = NULL; m68k_options *opts = malloc(sizeof(m68k_options)); - init_m68k_opts(opts, map, map_chunks, MCLKS_PER_68K, sync_components); + init_m68k_opts(opts, map, map_chunks, MCLKS_PER_68K, sync_components, int_ack); if (!strcmp(tern_find_ptr_default(model, "tas", "broken"), "broken")) { opts->gen.flags |= M68K_OPT_BROKEN_READ_MODIFY; } @@ -2400,7 +2436,7 @@ uint32_t num_chunks = cd_chunks + base_chunks; m68k_options *opts = malloc(sizeof(m68k_options)); - init_m68k_opts(opts, map, num_chunks, MCLKS_PER_68K, sync_components); + init_m68k_opts(opts, map, num_chunks, MCLKS_PER_68K, sync_components, int_ack); //TODO: make this configurable opts->gen.flags |= M68K_OPT_BROKEN_READ_MODIFY; gen->m68k = init_68k_context(opts, NULL); diff -r f0fc6c09517d -r f8b5142c06aa genesis.h --- a/genesis.h Fri Oct 13 22:44:36 2023 -0700 +++ b/genesis.h Mon Oct 16 23:30:04 2023 -0700 @@ -88,6 +88,7 @@ genesis_context *alloc_config_genesis_cdboot(system_media *media, uint32_t system_opts, uint8_t force_region); void genesis_serialize(genesis_context *gen, serialize_buffer *buf, uint32_t m68k_pc, uint8_t all); void genesis_deserialize(deserialize_buffer *buf, genesis_context *gen); +void gen_update_refresh_free_access(m68k_context *context); #endif //GENESIS_H_ diff -r f0fc6c09517d -r f8b5142c06aa lc8951.c --- a/lc8951.c Fri Oct 13 22:44:36 2023 -0700 +++ b/lc8951.c Mon Oct 16 23:30:04 2023 -0700 @@ -293,20 +293,12 @@ } } -void lc8951_resume_transfer(lc8951 *context, uint32_t cycle) +void lc8951_resume_transfer(lc8951 *context) { if (context->triggered && context->transfer_end == CYCLE_NEVER && (context->ifctrl & BIT_DOUTEN)) { uint16_t transfer_size = context->regs[DBCL] | (context->regs[DBCH] << 8); - //HACK!!! Work around Sub CPU running longer than we would like and dragging other components with it - uint32_t step_diff = (context->cycle - cycle) / context->clock_step; - if (step_diff) { - context->cycle -= step_diff * context->clock_step; - } context->transfer_end = context->cycle + transfer_size * context->cycles_per_byte; context->next_byte_cycle = context->cycle; - if (step_diff) { - lc8951_run(context, cycle); - } } } diff -r f0fc6c09517d -r f8b5142c06aa lc8951.h --- a/lc8951.h Fri Oct 13 22:44:36 2023 -0700 +++ b/lc8951.h Mon Oct 16 23:30:04 2023 -0700 @@ -43,7 +43,7 @@ void lc8951_ar_write(lc8951 *context, uint8_t value); void lc8951_write_byte(lc8951 *context, uint32_t cycle, int sector_offset, uint8_t byte); uint32_t lc8951_next_interrupt(lc8951 *context); -void lc8951_resume_transfer(lc8951 *context, uint32_t cycle); +void lc8951_resume_transfer(lc8951 *context); void lc8951_adjust_cycles(lc8951 *context, uint32_t deduction); void lc8951_serialize(lc8951 *context, serialize_buffer *buf); void lc8951_deserialize(deserialize_buffer *buf, void *vcontext); diff -r f0fc6c09517d -r f8b5142c06aa m68k_core.c --- a/m68k_core.c Fri Oct 13 22:44:36 2023 -0700 +++ b/m68k_core.c Mon Oct 16 23:30:04 2023 -0700 @@ -1206,7 +1206,9 @@ void resume_68k(m68k_context *context) { code_ptr addr = context->resume_pc; - context->resume_pc = NULL; + if (!context->stack_storage_count) { + context->resume_pc = NULL; + } m68k_options * options = context->options; context->should_return = 0; options->start_context(addr, context); @@ -1220,6 +1222,8 @@ //switching from user to system mode so swap stack pointers context->aregs[8] = context->aregs[7]; } + context->resume_pc = NULL; + context->stack_storage_count = 0; context->status = 0x27; context->aregs[7] = ((uint32_t)reset_vec[0]) << 16 | reset_vec[1]; uint32_t address = ((uint32_t)reset_vec[2]) << 16 | reset_vec[3]; diff -r f0fc6c09517d -r f8b5142c06aa m68k_core.h --- a/m68k_core.h Fri Oct 13 22:44:36 2023 -0700 +++ b/m68k_core.h Mon Oct 16 23:30:04 2023 -0700 @@ -27,6 +27,7 @@ typedef void (*start_fun)(uint8_t * addr, void * context); typedef struct m68k_context m68k_context; typedef m68k_context *(*sync_fun)(m68k_context * context, uint32_t address); +typedef m68k_context *(*int_ack_fun)(m68k_context * context); typedef struct { code_ptr impl; @@ -61,7 +62,10 @@ code_ptr set_sr; code_ptr set_ccr; code_ptr bp_stub; + code_ptr save_context_scratch; + code_ptr load_context_scratch; sync_fun sync_components; + int_ack_fun int_ack; code_info extra_code; movem_fun *big_movem; uint32_t num_movem; @@ -79,7 +83,6 @@ struct m68k_context { uint8_t flags[5]; uint8_t status; - uint16_t int_ack; uint32_t dregs[8]; uint32_t aregs[9]; uint32_t target_cycle; //cycle at which the next synchronization or interrupt occurs @@ -88,17 +91,22 @@ uint32_t int_cycle; uint32_t int_num; uint32_t last_prefetch_address; + uint32_t scratch1; + uint32_t scratch2; uint16_t *mem_pointers[NUM_MEM_AREAS]; code_ptr resume_pc; code_ptr reset_handler; m68k_options *options; void *system; + void *host_sp_entry; + void *stack_storage[10]; m68k_breakpoint *breakpoints; uint32_t num_breakpoints; uint32_t bp_storage; uint8_t int_pending; uint8_t trace_pending; uint8_t should_return; + uint8_t stack_storage_count; uint8_t ram_code_flags[]; }; @@ -108,7 +116,7 @@ void translate_m68k_stream(uint32_t address, m68k_context * context); void start_68k_context(m68k_context * context, uint32_t address); void resume_68k(m68k_context *context); -void init_m68k_opts(m68k_options * opts, memmap_chunk * memmap, uint32_t num_chunks, uint32_t clock_divider, sync_fun sync_components); +void init_m68k_opts(m68k_options * opts, memmap_chunk * memmap, uint32_t num_chunks, uint32_t clock_divider, sync_fun sync_components, int_ack_fun int_ack); m68k_context * init_68k_context(m68k_options * opts, m68k_reset_handler reset_handler); void m68k_reset(m68k_context * context); void m68k_options_free(m68k_options *opts); diff -r f0fc6c09517d -r f8b5142c06aa m68k_core_x86.c --- a/m68k_core_x86.c Fri Oct 13 22:44:36 2023 -0700 +++ b/m68k_core_x86.c Mon Oct 16 23:30:04 2023 -0700 @@ -2584,7 +2584,7 @@ call(&native, opts->bp_stub); } -void init_m68k_opts(m68k_options * opts, memmap_chunk * memmap, uint32_t num_chunks, uint32_t clock_divider, sync_fun sync_components) +void init_m68k_opts(m68k_options * opts, memmap_chunk * memmap, uint32_t num_chunks, uint32_t clock_divider, sync_fun sync_components, int_ack_fun int_ack) { memset(opts, 0, sizeof(*opts)); opts->gen.memmap = memmap; @@ -2636,6 +2636,7 @@ opts->gen.scratch1 = RCX; opts->gen.align_error_mask = 1; opts->sync_components = sync_components; + opts->int_ack = int_ack; opts->gen.native_code_map = malloc(sizeof(native_map_slot) * NATIVE_MAP_CHUNKS); @@ -2649,6 +2650,9 @@ code_info *code = &opts->gen.code; init_code_info(code); + opts->save_context_scratch = code->cur; + mov_rrdisp(code, opts->gen.scratch1, opts->gen.context_reg, offsetof(m68k_context, scratch1), SZ_D); + mov_rrdisp(code, opts->gen.scratch2, opts->gen.context_reg, offsetof(m68k_context, scratch2), SZ_D); opts->gen.save_context = code->cur; for (int i = 0; i < 5; i++) if (opts->flag_regs[i] >= 0) { @@ -2666,6 +2670,9 @@ mov_rrdisp(code, opts->gen.cycles, opts->gen.context_reg, offsetof(m68k_context, current_cycle), SZ_D); retn(code); + opts->load_context_scratch = code->cur; + mov_rdispr(code, opts->gen.context_reg, offsetof(m68k_context, scratch1), opts->gen.scratch1, SZ_D); + mov_rdispr(code, opts->gen.context_reg, offsetof(m68k_context, scratch2), opts->gen.scratch2, SZ_D); opts->gen.load_context = code->cur; for (int i = 0; i < 5; i++) { @@ -2699,9 +2706,40 @@ mov_rdispr(code, RSP, 20, opts->gen.scratch2, SZ_D); mov_rdispr(code, RSP, 24, opts->gen.context_reg, SZ_D); #endif + movzx_rdispr(code, opts->gen.context_reg, offsetof(m68k_context, stack_storage_count), opts->gen.scratch1, SZ_B, SZ_D); + mov_rrdisp(code, RSP, opts->gen.context_reg, offsetof(m68k_context, host_sp_entry), SZ_PTR); + cmp_ir(code, 0, opts->gen.scratch1, SZ_D); + code_ptr normal_start = code->cur + 1; + jcc(code, CC_Z, normal_start); + uint32_t stack_off_save = code->stack_off; + mov_rr(code, opts->gen.context_reg, opts->gen.scratch2, SZ_PTR); +#ifdef X86_64 + shl_ir(code, 3, opts->gen.scratch1, SZ_D); +#else + shl_ir(code, 2, opts->gen.scratch1, SZ_D); +#endif + add_ir(code, offsetof(m68k_context, stack_storage) - sizeof(void *), opts->gen.scratch2, SZ_PTR); + add_rr(code, opts->gen.scratch1, opts->gen.scratch2, SZ_PTR); + code_ptr loop_top = code->cur; + cmp_ir(code, 0, opts->gen.scratch1, SZ_D); + code_ptr loop_bot = code->cur + 1; + jcc(code, CC_Z, loop_bot); + sub_ir(code, sizeof(void*), opts->gen.scratch1, SZ_D); + mov_rindr(code, opts->gen.scratch2, opts->gen.cycles, SZ_PTR); + sub_ir(code, sizeof(void*), opts->gen.scratch2, SZ_PTR); + push_r(code, opts->gen.cycles); + jmp(code, loop_top); + *loop_bot = code->cur - (loop_bot + 1); + call_noalign(code, opts->load_context_scratch); + push_rdisp(code, opts->gen.context_reg, offsetof(m68k_context, resume_pc)); + retn(code); + + code->stack_off = stack_off_save; + *normal_start = code->cur - (normal_start + 1); call(code, opts->gen.load_context); call_r(code, opts->gen.scratch2); call(code, opts->gen.save_context); + mov_irdisp(code, 0, opts->gen.context_reg, offsetof(m68k_context, stack_storage_count), SZ_B); restore_callee_save_regs(code); retn(code); @@ -2733,18 +2771,39 @@ code_ptr skip_sync = code->cur + 1; jcc(code, CC_C, code->cur + 2); opts->do_sync = code->cur; - push_r(code, opts->gen.scratch1); - push_r(code, opts->gen.scratch2); - call(code, opts->gen.save_context); + call(code, opts->save_context_scratch); xor_rr(code, opts->gen.scratch1, opts->gen.scratch1, SZ_D); call_args_abi(code, (code_ptr)opts->sync_components, 2, opts->gen.context_reg, opts->gen.scratch1); mov_rr(code, RAX, opts->gen.context_reg, SZ_PTR); - call(code, opts->gen.load_context); - pop_r(code, opts->gen.scratch2); - pop_r(code, opts->gen.scratch1); + cmp_irdisp(code, 0, RAX, offsetof(m68k_context, should_return), SZ_B); + code_ptr do_return = code->cur + 1; + jcc(code, CC_NZ, do_return); + call(code, opts->load_context_scratch); *skip_sync = code->cur - (skip_sync+1); retn(code); - + stack_off_save = code->stack_off; + *do_return = code->cur - (do_return + 1); + pop_r(code, opts->gen.scratch1); + mov_rrdisp(code, opts->gen.scratch1, opts->gen.context_reg, offsetof(m68k_context, resume_pc), SZ_PTR); + mov_rdispr(code, opts->gen.context_reg, offsetof(m68k_context, host_sp_entry), opts->gen.scratch2, SZ_PTR); + mov_rr(code, opts->gen.context_reg, opts->aregs[7], SZ_PTR); + xor_rr(code, opts->gen.scratch1, opts->gen.scratch1, SZ_B); + add_ir(code, offsetof(m68k_context, stack_storage), opts->aregs[7], SZ_PTR); + loop_top = code->cur; + cmp_rr(code, opts->gen.scratch2, RSP, SZ_PTR); + code_ptr done_stack_save = code->cur + 1; + jcc(code, CC_Z, done_stack_save); + pop_r(code, opts->gen.cycles); + add_ir(code, 1, opts->gen.scratch1, SZ_B); + mov_rrind(code, opts->gen.cycles, opts->aregs[7], SZ_PTR); + add_ir(code, sizeof(void*), opts->aregs[7], SZ_PTR); + jmp(code, loop_top); + *done_stack_save = code->cur - (done_stack_save + 1); + mov_rrdisp(code, opts->gen.scratch1, opts->gen.context_reg, offsetof(m68k_context, stack_storage_count), SZ_B); + restore_callee_save_regs(code); + retn(code); + code->stack_off = stack_off_save; + opts->gen.handle_code_write = (code_ptr)m68k_handle_code_write; check_alloc_code(code, 256); @@ -3107,32 +3166,12 @@ areg_to_native(opts, 7, opts->gen.scratch2); call(code, opts->write_16); //interrupt ack cycle - //the Genesis responds to these exclusively with !VPA which means its a slow - //6800 operation. documentation says these can take between 10 and 19 cycles. - //actual results measurements seem to suggest it's actually between 9 and 18 - //WARNING: this code might break with register assignment changes - //save RDX - push_r(code, RDX); - //save cycle count - mov_rr(code, RAX, opts->gen.scratch1, SZ_D); - //clear top doubleword of dividend - xor_rr(code, RDX, RDX, SZ_D); - //set divisor to clock divider - mov_ir(code, opts->gen.clock_divider, opts->gen.scratch2, SZ_D); - div_r(code, opts->gen.scratch2, SZ_D); - //discard remainder - xor_rr(code, RDX, RDX, SZ_D); - //set divisor to 10, the period of E - mov_ir(code, 10, opts->gen.scratch2, SZ_D); - div_r(code, opts->gen.scratch2, SZ_D); - //delay will be (9 + 4 + the remainder) * clock_divider - //the extra 4 is to cover the idle bus period after the ack - add_ir(code, 9 + 4, RDX, SZ_D); - mov_ir(code, opts->gen.clock_divider, RAX, SZ_D); - mul_r(code, RDX, SZ_D); - pop_r(code, RDX); - //add saved cycle count to result - add_rr(code, opts->gen.scratch1, RAX, SZ_D); + cycles(&opts->gen, 4); //base interrupt ack cycle count + call(code, opts->gen.save_context); + call_args_abi(code, (code_ptr)opts->int_ack, 1, opts->gen.context_reg); + mov_rr(code, RAX, opts->gen.context_reg, SZ_PTR); + call(code, opts->gen.load_context); + cycles(&opts->gen, 4); //idle period after int ack //update status register and_irdisp(code, 0x78, opts->gen.context_reg, offsetof(m68k_context, status), SZ_B); @@ -3154,8 +3193,6 @@ //grab saved interrupt number xor_rr(code, opts->gen.scratch1, opts->gen.scratch1, SZ_D); mov_rdispr(code, opts->gen.context_reg, offsetof(m68k_context, int_pending), opts->gen.scratch1, SZ_B); - //ack the interrupt (happens earlier on hardware, but shouldn't be an observable difference) - mov_rrdisp(code, opts->gen.scratch1, opts->gen.context_reg, offsetof(m68k_context, int_ack), SZ_W); //calculate the vector address shl_ir(code, 2, opts->gen.scratch1, SZ_D); add_ir(code, 0x60, opts->gen.scratch1, SZ_D); diff -r f0fc6c09517d -r f8b5142c06aa segacd.c --- a/segacd.c Fri Oct 13 22:44:36 2023 -0700 +++ b/segacd.c Mon Oct 16 23:30:04 2023 -0700 @@ -11,7 +11,11 @@ #define SCD_MCLKS 50000000 #define SCD_PERIPH_RESET_CLKS (SCD_MCLKS / 10) -#define TIMER_TICK_CLKS 1536 +#define TIMER_TICK_CLKS 1536/*1792*/ + +//TODO: do some logic analyzer captuers to get actual values +#define REFRESH_INTERVAL 259 +#define REFRESH_DELAY 2 enum { GA_SUB_CPU_CTRL, @@ -632,15 +636,13 @@ } context->target_cycle = context->sync_cycle < context->int_cycle ? context->sync_cycle : context->int_cycle; if (context->int_cycle == cdc_cycle && context->int_num == 5) { - uint32_t before = context->target_cycle - 2 * cd->cdc.clock_step; + uint32_t before = cdc_cycle - cd->m68k->options->gen.clock_divider * 158; //divs worst case if (before < context->target_cycle) { - if (before > context->current_cycle) { + while (before <= context->current_cycle) { + before += cd->cdc.clock_step; + } + if (before < context->target_cycle) { context->target_cycle = context->sync_cycle = before; - } else { - before = context->target_cycle - cd->cdc.clock_step; - if (before > context->current_cycle) { - context->target_cycle = context->sync_cycle = before; - } } } } @@ -650,6 +652,15 @@ { m68k_context *m68k = vcontext; segacd_context *cd = m68k->system; + uint32_t before_cycle = m68k->current_cycle - m68k->options->gen.clock_divider * 4; + if (before_cycle >= cd->last_refresh_cycle) { + uint32_t num_refresh = (before_cycle - cd->last_refresh_cycle) / REFRESH_INTERVAL; + uint32_t num_full = (m68k->current_cycle - cd->last_refresh_cycle) / REFRESH_INTERVAL; + cd->last_refresh_cycle = cd->last_refresh_cycle + num_full * REFRESH_INTERVAL; + m68k->current_cycle += num_refresh * REFRESH_DELAY; + } + + uint32_t reg = address >> 1; switch (reg) { @@ -674,7 +685,7 @@ if (dst == DST_SUB_CPU) { if (cd->gate_array[GA_CDC_CTRL] & BIT_DSR) { cd->gate_array[GA_CDC_CTRL] &= ~BIT_DSR; - lc8951_resume_transfer(&cd->cdc, cd->cdc.cycle); + lc8951_resume_transfer(&cd->cdc); } calculate_target_cycle(cd->m68k); @@ -738,6 +749,14 @@ { m68k_context *m68k = vcontext; segacd_context *cd = m68k->system; + uint32_t before_cycle = m68k->current_cycle - m68k->options->gen.clock_divider * 4; + if (before_cycle >= cd->last_refresh_cycle) { + uint32_t num_refresh = (before_cycle - cd->last_refresh_cycle) / REFRESH_INTERVAL; + uint32_t num_full = (m68k->current_cycle - cd->last_refresh_cycle) / REFRESH_INTERVAL; + cd->last_refresh_cycle = cd->last_refresh_cycle + num_full * REFRESH_INTERVAL; + m68k->current_cycle += num_refresh * REFRESH_DELAY; + } + uint32_t reg = address >> 1; switch (reg) { @@ -831,7 +850,7 @@ lc8951_set_dma_multiple(&cd->cdc, 6); } if ((old_dest < DST_MAIN_CPU || old_dest == 6) && dest >= DST_MAIN_CPU && dest != 6) { - lc8951_resume_transfer(&cd->cdc, m68k->current_cycle); + lc8951_resume_transfer(&cd->cdc); } calculate_target_cycle(m68k); } @@ -878,6 +897,7 @@ case GA_TIMER: timers_run(cd, m68k->current_cycle); cd->gate_array[reg] = value & 0xFF; + cd->timer_value = 0; calculate_target_cycle(m68k); break; case GA_INT_MASK: @@ -1119,10 +1139,6 @@ rf5c164_run(&cd->pcm, cycle); } -//TODO: do some logic analyzer captuers to get actual values -#define REFRESH_INTERVAL 256 -#define REFRESH_DELAY 2 - static m68k_context *sync_components(m68k_context * context, uint32_t address) { segacd_context *cd = context->system; @@ -1146,7 +1162,15 @@ } cd->m68k_pc = address; } - switch (context->int_ack) + calculate_target_cycle(context); + return context; +} + +static m68k_context *int_ack(m68k_context *context) +{ + segacd_context *cd = context->system; + scd_peripherals_run(cd, context->current_cycle); + switch (context->int_pending) { case 1: cd->graphics_int_cycle = CYCLE_NEVER; @@ -1167,8 +1191,15 @@ cd->cdd.subcode_int_pending = 0; break; } - context->int_ack = 0; - calculate_target_cycle(context); + //the Sega CD responds to these exclusively with !VPA which means its a slow + //6800 operation. documentation says these can take between 10 and 19 cycles. + //actual results measurements seem to suggest it's actually between 9 and 18 + //Base 68K core has added 4 cycles for a normal int ack cycle already + //We add 5 + the current cycle count (in 68K cycles) mod 10 to simulate the + //additional variable delay from the use of the 6800 cycle + uint32_t cycle_count = context->current_cycle / context->options->gen.clock_divider; + context->current_cycle += 5 + (cycle_count % 10); + return context; } @@ -1177,7 +1208,10 @@ uint8_t m68k_run = !can_main_access_prog(cd); while (cycle > cd->m68k->current_cycle) { if (m68k_run && !cd->sub_paused_wordram) { - uint32_t start = cd->m68k->current_cycle; + uint32_t num_refresh = (cd->m68k->current_cycle - cd->last_refresh_cycle) / REFRESH_INTERVAL; + cd->last_refresh_cycle = cd->last_refresh_cycle + num_refresh * REFRESH_INTERVAL; + cd->m68k->current_cycle += num_refresh * REFRESH_DELAY; + cd->m68k->sync_cycle = cd->enter_debugger ? cd->m68k->current_cycle + 1 : cycle; if (cd->need_reset) { @@ -1237,6 +1271,7 @@ static uint16_t main_gate_read16(uint32_t address, void *vcontext) { m68k_context *m68k = vcontext; + gen_update_refresh_free_access(m68k); genesis_context *gen = m68k->system; segacd_context *cd = gen->expansion; uint32_t scd_cycle = gen_cycle_to_scd(m68k->current_cycle, gen); @@ -1270,10 +1305,7 @@ if (dst == DST_MAIN_CPU) { if (cd->gate_array[GA_CDC_CTRL] & BIT_DSR) { cd->gate_array[GA_CDC_CTRL] &= ~BIT_DSR; - //Using the sub CPU's cycle count here is a bit of a hack - //needed to ensure the interrupt does not get triggered prematurely - //because the sub CPU execution granularity is too high - lc8951_resume_transfer(&cd->cdc, cd->m68k->current_cycle); + lc8951_resume_transfer(&cd->cdc); } else { printf("Read of CDC host data with DSR clear at %u\n", scd_cycle); } @@ -1328,14 +1360,28 @@ static void *main_gate_write16(uint32_t address, void *vcontext, uint16_t value) { m68k_context *m68k = vcontext; + gen_update_refresh_free_access(m68k); genesis_context *gen = m68k->system; segacd_context *cd = gen->expansion; uint32_t scd_cycle = gen_cycle_to_scd(m68k->current_cycle, gen); - scd_run(cd, scd_cycle); uint32_t reg = (address & 0x1FF) >> 1; + if (reg != GA_SUB_CPU_CTRL) { + scd_run(cd, scd_cycle); + } switch (reg) { case GA_SUB_CPU_CTRL: { + if ((value & BIT_IFL2) && (cd->gate_array[GA_INT_MASK] & BIT_MASK_IEN2)) { + if (cd->int2_cycle != CYCLE_NEVER) { + scd_run(cd, scd_cycle - 4 * cd->m68k->options->gen.clock_divider); + while (cd->int2_cycle != CYCLE_NEVER && cd->m68k->current_cycle < scd_cycle) { + scd_run(cd, cd->m68k->current_cycle + cd->m68k->options->gen.clock_divider); + } + } + cd->int2_cycle = scd_cycle; + + } + scd_run(cd, scd_cycle); uint8_t old_access = can_main_access_prog(cd); cd->busreq = value & BIT_SBRQ; uint8_t old_reset = cd->reset; @@ -1343,9 +1389,6 @@ if (cd->reset && !old_reset) { cd->need_reset = 1; } - if (value & BIT_IFL2) { - cd->int2_cycle = scd_cycle; - } /*cd->gate_array[reg] &= 0x7FFF; cd->gate_array[reg] |= value & 0x8000;*/ uint8_t new_access = can_main_access_prog(cd); @@ -1362,7 +1405,7 @@ dump_prog_ram(cd); uint16_t dst = cd->gate_array[GA_CDC_CTRL] >> 8 & 0x7; if (dst == DST_PROG_RAM) { - lc8951_resume_transfer(&cd->cdc, cd->cdc.cycle); + lc8951_resume_transfer(&cd->cdc); } } break; @@ -1395,7 +1438,7 @@ uint16_t dst = cd->gate_array[GA_CDC_CTRL] >> 8 & 0x7; if (dst == DST_WORD_RAM) { - lc8951_resume_transfer(&cd->cdc, cd->cdc.cycle); + lc8951_resume_transfer(&cd->cdc); } m68k_invalidate_code_range(m68k, cd->base + 0x200000, cd->base + 0x240000); @@ -1587,7 +1630,7 @@ sub_cpu_map[0].buffer = sub_cpu_map[1].buffer = cd->prog_ram; sub_cpu_map[4].buffer = cd->bram; m68k_options *mopts = malloc(sizeof(m68k_options)); - init_m68k_opts(mopts, sub_cpu_map, sizeof(sub_cpu_map) / sizeof(*sub_cpu_map), 4, sync_components); + init_m68k_opts(mopts, sub_cpu_map, sizeof(sub_cpu_map) / sizeof(*sub_cpu_map), 4, sync_components, int_ack); cd->m68k = init_68k_context(mopts, NULL); cd->m68k->system = cd; cd->int2_cycle = CYCLE_NEVER;