Mercurial > repos > blastem
changeset 2666:38c281ef57b0
Memory access optimizaiton in new 68K core that gives a modest speed bump on average and will allow low-cost watchpoints
author | Michael Pavone <pavone@retrodev.com> |
---|---|
date | Fri, 07 Mar 2025 23:40:58 -0800 |
parents | 54ac5fe14cf9 |
children | 1f6503bcb1d5 |
files | backend.c backend.h cpu_dsl.py m68k.cpu m68k_util.c |
diffstat | 5 files changed, 509 insertions(+), 8 deletions(-) [+] |
line wrap: on
line diff
--- a/backend.c Fri Mar 07 21:45:53 2025 -0800 +++ b/backend.c Fri Mar 07 23:40:58 2025 -0800 @@ -340,3 +340,459 @@ } return size; } + +uint16_t interp_read_direct_16(uint32_t address, void *context, void *data) +{ + return *(uint16_t *)((address & 0xFFFE) + (uint8_t *)data); +} + +uint8_t interp_read_direct_8(uint32_t address, void *context, void *data) +{ + return ((uint8_t *)data)[(address & 0xFFFF) ^ 1]; +} + +void interp_write_direct_16(uint32_t address, void *context, uint16_t value, void *data) +{ + *(uint16_t *)((address & 0xFFFE) + (uint8_t *)data) = value; +} + +void interp_write_direct_8(uint32_t address, void *context, uint8_t value, void *data) +{ + ((uint8_t *)data)[(address & 0xFFFF) ^ 1] = value; +} + +uint16_t interp_read_indexed_16(uint32_t address, void *context, void *data) +{ + return *(uint16_t *)((*(uint8_t **)data) + (address & 0xFFFE)); +} + +uint8_t interp_read_indexed_8(uint32_t address, void *context, void *data) +{ + return (*(uint8_t **)data)[(address & 0xFFFF) ^ 1]; +} + +void interp_write_indexed_16(uint32_t address, void *context, uint16_t value, void *data) +{ + *(uint16_t *)((*(uint8_t **)data) + (address & 0xFFFE)) = value; +} + +void interp_write_indexed_8(uint32_t address, void *context, uint8_t value, void *data) +{ + (*(uint8_t **)data)[(address & 0xFFFF) ^ 1] = value; +} + +uint16_t interp_read_fixed_16(uint32_t address, void *context, void *data) +{ + return (uintptr_t)data; +} + +uint8_t interp_read_fixed_8(uint32_t address, void *context, void *data) +{ + uint16_t val = (uintptr_t)data; + if (address & 1) { + return val; + } + return val >> 8; +} + +void interp_write_ignored_16(uint32_t address, void *context, uint16_t value, void *data) +{ +} + +void interp_write_ignored_8(uint32_t address, void *context, uint8_t value, void *data) +{ +} + +uint16_t interp_read_map_16(uint32_t address, void *context, void *data) +{ + const memmap_chunk *chunk = data; + cpu_options * opts = *(cpu_options **)context; + if (address < chunk->start || address >= chunk->end) + { + const memmap_chunk *map_end = opts->memmap + opts->memmap_chunks; + for (chunk++; chunk < map_end; chunk++) + { + if (address >= chunk->start && address < chunk->end) { + break; + } + } + if (chunk == map_end) { + return 0xFFFF; + } + } + uint32_t offset = address & chunk->mask; + if (chunk->flags & MMAP_READ) { + uint8_t *base; + if (chunk->flags & MMAP_PTR_IDX) { + uint8_t ** mem_pointers = (uint8_t**)(opts->mem_ptr_off + (uint8_t *)context); + base = mem_pointers[chunk->ptr_index]; + } else { + base = chunk->buffer; + } + if (base) { + uint16_t val; + if (chunk->shift > 0) { + offset <<= chunk->shift; + } else if (chunk->shift < 0){ + offset >>= chunk->shift; + } + if ((chunk->flags & MMAP_ONLY_ODD) || (chunk->flags & MMAP_ONLY_EVEN)) { + offset /= 2; + val = base[offset]; + if (chunk->flags & MMAP_ONLY_ODD) { + val |= 0xFF00; + } else { + val = val << 8 | 0xFF; + } + } else { + val = *(uint16_t *)(base + offset); + } + return val; + } + } + if ((!(chunk->flags & MMAP_READ) || (chunk->flags & MMAP_FUNC_NULL)) && chunk->read_16) { + return chunk->read_16(offset, context); + } + return 0xFFFF; +} + +uint8_t interp_read_map_8(uint32_t address, void *context, void *data) +{ + const memmap_chunk *chunk = data; + cpu_options * opts = *(cpu_options **)context; + if (address < chunk->start || address >= chunk->end) + { + const memmap_chunk *map_end = opts->memmap + opts->memmap_chunks; + for (chunk++; chunk < map_end; chunk++) + { + if (address >= chunk->start && address < chunk->end) { + break; + } + } + + if (chunk == map_end) { + return 0xFF; + } + } + uint32_t offset = address & chunk->mask; + if (chunk->flags & MMAP_READ) { + uint8_t *base; + if (chunk->flags & MMAP_PTR_IDX) { + uint8_t ** mem_pointers = (uint8_t**)(opts->mem_ptr_off + (uint8_t *)context); + base = mem_pointers[chunk->ptr_index]; + } else { + base = chunk->buffer; + } + if (base) { + if (chunk->shift > 0) { + offset <<= chunk->shift; + } else if (chunk->shift < 0){ + offset >>= chunk->shift; + } + if ((chunk->flags & MMAP_ONLY_ODD) || (chunk->flags & MMAP_ONLY_EVEN)) { + if (address & 1) { + if (chunk->flags & MMAP_ONLY_EVEN) { + return 0xFF; + } + } else if (chunk->flags & MMAP_ONLY_ODD) { + return 0xFF; + } + offset /= 2; + } else if(opts->byte_swap) { + offset ^= 1; + } + return base[offset]; + } + } + if ((!(chunk->flags & MMAP_READ) || (chunk->flags & MMAP_FUNC_NULL)) && chunk->read_8) { + return chunk->read_8(offset, context); + } + return 0xFF; +} + +void interp_write_map_16(uint32_t address, void *context, uint16_t value, void *data) +{ + const memmap_chunk *chunk = data; + cpu_options * opts = *(cpu_options **)context; + if (address < chunk->start || address >= chunk->end) + { + const memmap_chunk *map_end = opts->memmap + opts->memmap_chunks; + for (chunk++; chunk < map_end; chunk++) + { + if (address >= chunk->start && address < chunk->end) { + break; + } + } + if (chunk == map_end) { + return; + } + } + uint32_t offset = address & chunk->mask; + if (chunk->flags & MMAP_WRITE) { + uint8_t *base; + if (chunk->flags & MMAP_PTR_IDX) { + uint8_t ** mem_pointers = (uint8_t**)(opts->mem_ptr_off + (uint8_t *)context); + base = mem_pointers[chunk->ptr_index]; + } else { + base = chunk->buffer; + } + if (base) { + if (chunk->shift > 0) { + offset <<= chunk->shift; + } else if (chunk->shift < 0){ + offset >>= chunk->shift; + } + if ((chunk->flags & MMAP_ONLY_ODD) || (chunk->flags & MMAP_ONLY_EVEN)) { + offset /= 2; + if (chunk->flags & MMAP_ONLY_EVEN) { + value >>= 16; + } + base[offset] = value; + } else { + *(uint16_t *)(base + offset) = value; + } + return; + } + } + if ((!(chunk->flags & MMAP_WRITE) || (chunk->flags & MMAP_FUNC_NULL)) && chunk->write_16) { + chunk->write_16(offset, context, value); + } +} + +void interp_write_map_8(uint32_t address, void *context, uint8_t value, void *data) +{ + const memmap_chunk *chunk = data; + cpu_options * opts = *(cpu_options **)context; + if (address < chunk->start || address >= chunk->end) + { + const memmap_chunk *map_end = opts->memmap + opts->memmap_chunks; + for (chunk++; chunk < map_end; chunk++) + { + if (address >= chunk->start && address < chunk->end) { + break; + } + } + if (chunk == map_end) { + return; + } + } + uint32_t offset = address & chunk->mask; + if (chunk->flags & MMAP_WRITE) { + uint8_t *base; + if (chunk->flags & MMAP_PTR_IDX) { + uint8_t ** mem_pointers = (uint8_t**)(opts->mem_ptr_off + (uint8_t *)context); + base = mem_pointers[chunk->ptr_index]; + } else { + base = chunk->buffer; + } + if (base) { + if (chunk->shift > 0) { + offset <<= chunk->shift; + } else if (chunk->shift < 0){ + offset >>= chunk->shift; + } + if ((chunk->flags & MMAP_ONLY_ODD) || (chunk->flags & MMAP_ONLY_EVEN)) { + if (address & 1) { + if (chunk->flags & MMAP_ONLY_EVEN) { + return; + } + } else if (chunk->flags & MMAP_ONLY_ODD) { + return; + } + offset /= 2; + } else if(opts->byte_swap) { + offset ^= 1; + } + base[offset] = value; + } + } + if ((!(chunk->flags & MMAP_WRITE) || (chunk->flags & MMAP_FUNC_NULL)) && chunk->write_8) { + chunk->write_8(offset, context, value); + } +} + +interp_read_16 get_interp_read_16(void *context, cpu_options *opts, uint32_t start, uint32_t end, void **data_out) +{ + const memmap_chunk *chunk; + for (chunk = opts->memmap; chunk < opts->memmap + opts->memmap_chunks; chunk++) + { + if (chunk->end > start && chunk->start < end) { + break; + } + } + if (chunk == opts->memmap + opts->memmap_chunks) { + *data_out = (void *)(uintptr_t)0xFFFF; + return interp_read_fixed_16; + } + if (chunk->end < end || chunk->start > start) { + goto use_map; + } + if (chunk->flags & MMAP_READ) { + if ((chunk->flags & (MMAP_ONLY_ODD|MMAP_ONLY_EVEN|MMAP_FUNC_NULL)) || chunk->shift) { + goto use_map; + } + if (!chunk->mask && !(chunk->flags & ~MMAP_READ)) { + uintptr_t value = *(uint16_t *)chunk->buffer; + *data_out = (void *)value; + return interp_read_fixed_16; + } + if ((chunk->mask & 0xFFFF) != 0xFFFF) { + goto use_map; + } + if (chunk->flags & MMAP_PTR_IDX) { + if (chunk->mask != 0xFFFF && start > 0) { + goto use_map; + } + *data_out = (void *)(chunk->ptr_index + (void **)(((char *)context) + opts->mem_ptr_off)); + return interp_read_indexed_16; + } else { + *data_out = (start & chunk->mask) + (uint8_t *)chunk->buffer; + return interp_read_direct_16; + } + } + if (chunk->read_16 && chunk->mask == opts->address_mask) { + *data_out = NULL; + //This is not safe for all calling conventions due to the extra param + //but should work for the ones we actually care about + return (interp_read_16)chunk->read_16; + } +use_map: + *data_out = (void *)chunk; + return interp_read_map_16; +} + +interp_read_8 get_interp_read_8(void *context, cpu_options *opts, uint32_t start, uint32_t end, void **data_out) +{ + const memmap_chunk *chunk; + for (chunk = opts->memmap; chunk < opts->memmap + opts->memmap_chunks; chunk++) + { + if (chunk->end > start && chunk->start < end) { + break; + } + } + if (chunk == opts->memmap + opts->memmap_chunks) { + *data_out = (void *)(uintptr_t)0xFFFF; + return interp_read_fixed_8; + } + if (chunk->end != end || chunk->start != start) { + goto use_map; + } + if (chunk->flags & MMAP_READ) { + if ((chunk->flags & (MMAP_ONLY_ODD|MMAP_ONLY_EVEN|MMAP_FUNC_NULL)) || chunk->shift) { + goto use_map; + } + if (!chunk->mask && !(chunk->flags & ~MMAP_READ)) { + uintptr_t value = *(uint8_t *)chunk->buffer; + *data_out = (void *)value; + return interp_read_fixed_8; + } + if ((chunk->mask & 0xFFFF) != 0xFFFF) { + goto use_map; + } + if (chunk->flags & MMAP_PTR_IDX) { + if (chunk->mask != 0xFFFF && start > 0) { + goto use_map; + } + *data_out = (void *)(chunk->ptr_index + (void **)(((char *)context) + opts->mem_ptr_off)); + return interp_read_indexed_8; + } else { + *data_out = (start & chunk->mask) + (uint8_t *)chunk->buffer; + return interp_read_direct_8; + } + } + if (chunk->read_8 && chunk->mask == opts->address_mask) { + *data_out = NULL; + //This is not safe for all calling conventions due to the extra param + //but should work for the ones we actually care about + return (interp_read_8)chunk->read_8; + } +use_map: + *data_out = (void *)chunk; + return interp_read_map_8; +} + +interp_write_16 get_interp_write_16(void *context, cpu_options *opts, uint32_t start, uint32_t end, void **data_out) +{ + const memmap_chunk *chunk; + for (chunk = opts->memmap; chunk < opts->memmap + opts->memmap_chunks; chunk++) + { + if (chunk->end > start && chunk->start < end) { + break; + } + } + if (chunk == opts->memmap + opts->memmap_chunks) { + *data_out = NULL; + return interp_write_ignored_16; + } + if (chunk->end != end || chunk->start != start) { + goto use_map; + } + if (chunk->flags & MMAP_READ) { + if ((chunk->flags & (MMAP_ONLY_ODD|MMAP_ONLY_EVEN|MMAP_FUNC_NULL)) || chunk->shift || (chunk->mask & 0xFFFF) != 0xFFFF) { + goto use_map; + } + if (chunk->flags & MMAP_PTR_IDX) { + if (chunk->mask != 0xFFFF && start > 0) { + goto use_map; + } + *data_out = (void *)(chunk->ptr_index + (void **)(((char *)context) + opts->mem_ptr_off)); + return interp_write_indexed_16; + } else { + *data_out = (start & chunk->mask) + (uint8_t *)chunk->buffer; + return interp_write_direct_16; + } + } + if (chunk->write_16 && chunk->mask == opts->address_mask) { + *data_out = NULL; + //This is not safe for all calling conventions due to the extra param + //but should work for the ones we actually care about + return (interp_write_16)chunk->write_16; + } +use_map: + *data_out = (void *)chunk; + return interp_write_map_16; +} + +interp_write_8 get_interp_write_8(void *context, cpu_options *opts, uint32_t start, uint32_t end, void **data_out) +{ + const memmap_chunk *chunk; + for (chunk = opts->memmap; chunk < opts->memmap + opts->memmap_chunks; chunk++) + { + if (chunk->end > start && chunk->start < end) { + break; + } + } + if (chunk == opts->memmap + opts->memmap_chunks) { + *data_out = NULL; + return interp_write_ignored_8; + } + if (chunk->end != end || chunk->start != start) { + goto use_map; + } + if (chunk->flags & MMAP_READ) { + if ((chunk->flags & (MMAP_ONLY_ODD|MMAP_ONLY_EVEN|MMAP_FUNC_NULL)) || chunk->shift + || (chunk->mask & 0xFFFF) != 0xFFFF || !opts->byte_swap + ) { + goto use_map; + } + if (chunk->flags & MMAP_PTR_IDX) { + if (chunk->mask != 0xFFFF && start > 0) { + goto use_map; + } + *data_out = (void *)(chunk->ptr_index + (void **)(((char *)context) + opts->mem_ptr_off)); + return interp_write_indexed_8; + } else { + *data_out = (start & chunk->mask) + (uint8_t *)chunk->buffer; + return interp_write_direct_8; + } + } + if (chunk->write_16 && chunk->mask == opts->address_mask) { + *data_out = NULL; + //This is not safe for all calling conventions due to the extra param + //but should work for the ones we actually care about + return (interp_write_8)chunk->write_8; + } +use_map: + *data_out = (void *)chunk; + return interp_write_map_8; +}
--- a/backend.h Fri Mar 07 21:45:53 2025 -0800 +++ b/backend.h Fri Mar 07 23:40:58 2025 -0800 @@ -86,6 +86,11 @@ typedef uint8_t * (*native_addr_func)(void * context, uint32_t address); +typedef uint16_t (*interp_read_16)(uint32_t address, void *context, void *data); +typedef uint8_t (*interp_read_8)(uint32_t address, void *context, void *data); +typedef void (*interp_write_16)(uint32_t address, void *context, uint16_t value, void *data); +typedef void (*interp_write_8)(uint32_t address, void *context, uint8_t value, void *data); + deferred_addr * defer_address(deferred_addr * old_head, uint32_t address, uint8_t *dest); void remove_deferred_until(deferred_addr **head_ptr, deferred_addr * remove_to); void process_deferred(deferred_addr ** head_ptr, void * context, native_addr_func get_native); @@ -110,6 +115,10 @@ memmap_chunk const *find_map_chunk(uint32_t address, cpu_options *opts, uint16_t flags, uint32_t *size_sum); uint32_t chunk_size(cpu_options *opts, memmap_chunk const *chunk); uint32_t ram_size(cpu_options *opts); +interp_read_16 get_interp_read_16(void *context, cpu_options *opts, uint32_t start, uint32_t end, void **data_out); +interp_read_8 get_interp_read_8(void *context, cpu_options *opts, uint32_t start, uint32_t end, void **data_out); +interp_write_16 get_interp_write_16(void *context, cpu_options *opts, uint32_t start, uint32_t end, void **data_out); +interp_write_8 get_interp_write_8(void *context, cpu_options *opts, uint32_t start, uint32_t end, void **data_out); #endif //BACKEND_H_
--- a/cpu_dsl.py Fri Mar 07 21:45:53 2025 -0800 +++ b/cpu_dsl.py Fri Mar 07 23:40:58 2025 -0800 @@ -1819,15 +1819,21 @@ if len(parts) == 3: if parts[1].startswith('ptr'): self.addPointer(parts[0], parts[1][3:], int(parts[2])) + elif parts[1].isdigit(): + self.addRegArray(parts[0], int(parts[1]), int(parts[2])) else: - self.addRegArray(parts[0], int(parts[1]), int(parts[2])) + #assume some other C type + self.addRegArray(parts[0], parts[1], int(parts[2])) elif len(parts) > 2: self.addRegArray(parts[0], int(parts[1]), parts[2:]) else: if parts[1].startswith('ptr'): self.addPointer(parts[0], parts[1][3:], 1) + elif parts[1].isdigit(): + self.addReg(parts[0], int(parts[1])) else: - self.addReg(parts[0], int(parts[1])) + #assume some other C type + self.addReg(parts[0], parts[1]) return self def writeHeader(self, otype, hFile): @@ -1847,11 +1853,17 @@ hFile.write('\n\t{ptype} {stars}{nm}{arr};'.format(nm=pointer, ptype=ptype, stars=stars, arr=arr)) for reg in self.regs: if not self.isRegArrayMember(reg): - fieldList.append((self.regs[reg], 1, reg)) + if type(self.regs[reg]) is int: + fieldList.append((self.regs[reg], 1, reg)) + else: + hFile.write(f'\n\t{self.regs[reg]} {reg};') for arr in self.regArrays: size,regs = self.regArrays[arr] if not type(regs) is int: regs = len(regs) + if not type(size) is int: + hFile.write(f'\n\t{size} {arr}[{regs}];') + continue fieldList.append((size, regs, arr)) fieldList.sort() fieldList.reverse()
--- a/m68k.cpu Fri Mar 07 21:45:53 2025 -0800 +++ b/m68k.cpu Fri Mar 07 23:40:58 2025 -0800 @@ -66,6 +66,14 @@ int_ack_handler ptrvoid sync_components ptrsync_fun mem_pointers ptr16 10 + read16 interp_read_16 256 + read8 interp_read_8 256 + write16 interp_write_16 256 + write8 interp_write_8 256 + read16_data ptrvoid 256 + read8_data ptrvoid 256 + write16_data ptrvoid 256 + write8_data ptrvoid 256 flags register ccr
--- a/m68k_util.c Fri Mar 07 21:45:53 2025 -0800 +++ b/m68k_util.c Fri Mar 07 23:40:58 2025 -0800 @@ -6,7 +6,9 @@ #ifdef DEBUG_DISASM uint32_t tmp = context->scratch1; #endif - context->scratch1 = read_byte(context->scratch1, (void**)context->mem_pointers, &context->opts->gen, context); + uint32_t address = context->scratch1 & context->opts->gen.address_mask; + uint32_t index = address >> 16; + context->scratch1 = context->read8[index](address, context, context->read8_data[index]); #ifdef DEBUG_DISASM printf("Read.b %05X: %02X\n", tmp, context->scratch1); #endif @@ -26,7 +28,9 @@ #ifdef DEBUG_DISASM uint32_t tmp = context->scratch1; #endif - context->scratch1 = read_word(context->scratch1, (void**)context->mem_pointers, &context->opts->gen, context); + uint32_t address = context->scratch1 & context->opts->gen.address_mask; + uint32_t index = address >> 16; + context->scratch1 = context->read16[index](address, context, context->read16_data[index]); #ifdef DEBUG_DISASM if (tmp == context->pc) { m68kinst inst; @@ -46,7 +50,9 @@ void m68k_write_8(m68k_context *context) { context->cycles += 4 * context->opts->gen.clock_divider; - write_byte(context->scratch2, context->scratch1, (void**)context->mem_pointers, &context->opts->gen, context); + uint32_t address = context->scratch2 & context->opts->gen.address_mask; + uint32_t index = address >> 16; + context->write8[index](address, context, context->scratch1, context->write8_data[index]); #ifdef DEBUG_DISASM printf("Write.b %05X: %02X\n", context->scratch2, context->scratch1); #endif @@ -57,14 +63,16 @@ if (context->opts->gen.flags & M68K_OPT_BROKEN_READ_MODIFY) { context->cycles += 4 * context->opts->gen.clock_divider; } else { - write_byte(context->scratch2, context->scratch1, (void**)context->mem_pointers, &context->opts->gen, context); + m68k_write_8(context); } } void m68k_write_16(m68k_context *context) { context->cycles += 4 * context->opts->gen.clock_divider; - write_word(context->scratch2, context->scratch1, (void**)context->mem_pointers, &context->opts->gen, context); + int32_t address = context->scratch2 & context->opts->gen.address_mask; + uint32_t index = address >> 16; + context->write16[index](address, context, context->scratch1, context->write16_data[index]); #ifdef DEBUG_DISASM printf("Write %05X: %04X\n", context->scratch2, context->scratch1); #endif @@ -222,6 +230,7 @@ opts->gen.max_address = 0x1000000; opts->gen.bus_cycles = 4; opts->gen.clock_divider = clock_divider; + opts->gen.mem_ptr_off = offsetof(m68k_context, mem_pointers); sync_comp_tmp = sync_components; int_ack_tmp = int_ack; } @@ -234,6 +243,13 @@ context->int_cycle = 0xFFFFFFFFU; context->int_pending = 255; context->sync_components = sync_comp_tmp; + for (uint32_t i = 0; i < 256; i++) + { + context->read16[i] = get_interp_read_16(context, &opts->gen, i << 16, (i + 1) << 16, context->read16_data + i); + context->read8[i] = get_interp_read_8(context, &opts->gen, i << 16, (i + 1) << 16, context->read8_data + i); + context->write16[i] = get_interp_write_16(context, &opts->gen, i << 16, (i + 1) << 16, context->write16_data + i); + context->write8[i] = get_interp_write_8(context, &opts->gen, i << 16, (i + 1) << 16, context->write8_data + i); + } sync_comp_tmp = NULL; context->int_ack_handler = int_ack_tmp; int_ack_tmp = NULL;