changeset 1931:374a5ae694e8 mame_interp

Merge from default
author Michael Pavone <pavone@retrodev.com>
date Sat, 18 Apr 2020 11:42:53 -0700
parents 13abdc98379e (current diff) 0f135b214927 (diff)
children 2c1c88cd1a3f
files Makefile backend.c backend.h blastcpm.c blastem.c debug.c debug.h gdb_remote.c genesis.c genesis.h m68k_core.c m68k_core.h nuklear_ui/nuklear.h sega_mapper.c sms.c sms.h trans.c ztestrun.c
diffstat 57 files changed, 4012 insertions(+), 1396 deletions(-) [+]
line wrap: on
line diff
--- a/Makefile	Thu Apr 18 22:06:47 2019 -0700
+++ b/Makefile	Sat Apr 18 11:42:53 2020 -0700
@@ -116,7 +116,7 @@
 LDFLAGS:=-lm
 else
 CFLAGS:=$(shell pkg-config --cflags-only-I $(LIBS)) $(CFLAGS)
-LDFLAGS:=-lm $(shell pkg-config --libs $(LIBS)) $(GLES_LIB)
+LDFLAGS:=-lm $(shell pkg-config --libs $(LIBS))
 ifdef USE_FBDEV
 LDFLAGS+= -pthread
 endif
@@ -179,26 +179,26 @@
 endif
 endif
 
-ifdef USE_NATIVE
 TRANSOBJS=gen.o backend.o $(MEM) arena.o tern.o
-M68KOBJS=68kinst.o m68k_core.o
+M68KOBJS=68kinst.o
+
+ifdef NEW_CORE
+Z80OBJS=z80.o z80inst.o 
+M68KOBJS+= m68k_core.o musashi/m68kops.o musashi/m68kcpu.o
+CFLAGS+= -DNEW_CORE
+else
 Z80OBJS=z80inst.o z80_to_x86.o
 ifeq ($(CPU),x86_64)
-M68KOBJS+= m68k_core_x86.o
+M68KOBJS+= m68k_core.o m68k_core_x86.o
 TRANSOBJS+= gen_x86.o backend_x86.o
 else
 ifeq ($(CPU),i686)
-M68KOBJS+= m68k_core_x86.o
+M68KOBJS+= m68k_core.o m68k_core_x86.o
 TRANSOBJS+= gen_x86.o backend_x86.o
 endif
 endif
-CFLAGS+= -DUSE_NATIVE
-else
-Z80OBJS=z80.o z80inst.o
-TRANSOBJS=backend.o tern.o
-M68KOBJS=68kinst.o m68k_core.o musashi/m68kops.o musashi/m68kcpu.o
 endif
-AUDIOOBJS=ym2612.o psg.o wave.o
+AUDIOOBJS=ym2612.o psg.o wave.o vgm.o render_audio.o
 CONFIGOBJS=config.o tern.o util.o paths.o 
 NUKLEAROBJS=$(FONT) nuklear_ui/blastem_nuklear.o nuklear_ui/sfnt.o
 RENDEROBJS=ppm.o controller_info.o
@@ -308,7 +308,7 @@
 ztestgen : ztestgen.o z80inst.o
 	$(CC) -ggdb -o ztestgen ztestgen.o z80inst.o
 
-stateview$(EXE) : stateview.o vdp.o $(RENDEROBJS) serialize.o $(CONFIGOBJS) gst.o
+stateview$(EXE) : stateview.o vdp.o $(RENDEROBJS) serialize.o $(CONFIGOBJS) gst.o render_audio.o
 	$(CC) -o $@ $^ $(LDFLAGS)
 	$(FIXUP) ./$@
 
@@ -343,6 +343,9 @@
 vos_prog_info : vos_prog_info.o vos_program_module.o
 	$(CC) -o vos_prog_info vos_prog_info.o vos_program_module.o
 	
+m68k.c : m68k.cpu cpu_dsl.py
+	./cpu_dsl.py -d call $< > $@
+	
 %.c : %.cpu cpu_dsl.py
 	./cpu_dsl.py -d goto $< > $@
 
--- a/backend.c	Thu Apr 18 22:06:47 2019 -0700
+++ b/backend.c	Sat Apr 18 11:42:53 2020 -0700
@@ -6,7 +6,7 @@
 #include "backend.h"
 #include <stdlib.h>
 
-#ifdef USE_NATIVE
+#ifndef NEW_CORE
 deferred_addr * defer_address(deferred_addr * old_head, uint32_t address, uint8_t *dest)
 {
 	deferred_addr * new_head = malloc(sizeof(deferred_addr));
--- a/backend.h	Thu Apr 18 22:06:47 2019 -0700
+++ b/backend.h	Sat Apr 18 11:42:53 2020 -0700
@@ -8,7 +8,7 @@
 
 #include <stdint.h>
 #include <stdio.h>
-#ifdef USE_NATIVE
+#ifndef NEW_CORE
 #include "gen.h"
 #else
 typedef uint8_t * code_ptr;
@@ -54,14 +54,14 @@
 
 typedef struct {
 	uint32_t flags;
-#ifdef USE_NATIVE
+#ifndef NEW_CORE
 	native_map_slot    *native_code_map;
 	deferred_addr      *deferred;
 	code_info          code;
 	uint8_t            **ram_inst_sizes;
 #endif	
 	memmap_chunk const *memmap;
-#ifdef USE_NATIVE
+#ifndef NEW_CORE
 	code_ptr           save_context;
 	code_ptr           load_context;
 	code_ptr           handle_cycle_limit;
@@ -76,7 +76,7 @@
 	uint32_t           max_address;
 	uint32_t           bus_cycles;
 	uint32_t           clock_divider;
-#ifdef USE_NATIVE
+#ifndef NEW_CORE
 	uint32_t           move_pc_off;
 	uint32_t           move_pc_size;
 	int32_t            mem_ptr_off;
@@ -85,7 +85,7 @@
 #endif
 	uint8_t            address_size;
 	uint8_t            byte_swap;
-#ifdef USE_NATIVE
+#ifndef NEW_CORE
 	int8_t             context_reg;
 	int8_t             cycles;
 	int8_t             limit;
@@ -95,7 +95,7 @@
 #endif
 } cpu_options;
 
-#ifdef USE_NATIVE
+#ifndef NEW_CORE
 typedef uint8_t * (*native_addr_func)(void * context, uint32_t address);
 
 deferred_addr * defer_address(deferred_addr * old_head, uint32_t address, uint8_t *dest);
--- a/bindings.c	Thu Apr 18 22:06:47 2019 -0700
+++ b/bindings.c	Sat Apr 18 11:42:53 2020 -0700
@@ -36,6 +36,7 @@
 	UI_RELOAD,
 	UI_SMS_PAUSE,
 	UI_SCREENSHOT,
+	UI_VGM_LOG,
 	UI_EXIT,
 	UI_PLANE_DEBUG,
 	UI_VRAM_DEBUG,
@@ -258,6 +259,39 @@
 #define localtime_r(a,b) localtime(a)
 #endif
 
+char *get_content_config_path(char *config_path, char *config_template, char *default_name)
+{
+	char *base = tern_find_path(config, config_path, TVAL_PTR).ptrval;
+	if (!base) {
+		base = "$HOME";
+	}
+	const system_media *media = current_media();
+	tern_node *vars = tern_insert_ptr(NULL, "HOME", get_home_dir());
+	vars = tern_insert_ptr(vars, "EXEDIR", get_exe_dir());
+	vars = tern_insert_ptr(vars, "USERDATA", (char *)get_userdata_dir());
+	vars = tern_insert_ptr(vars, "ROMNAME", media->name);
+	vars = tern_insert_ptr(vars, "ROMDIR", media->dir);
+	base = replace_vars(base, vars, 1);
+	tern_free(vars);
+	ensure_dir_exists(base);
+	time_t now = time(NULL);
+	struct tm local_store;
+	char fname_part[256];
+	char *template = tern_find_path(config, config_template, TVAL_PTR).ptrval;
+	if (template) {
+		vars = tern_insert_ptr(NULL, "ROMNAME", media->name);
+		template = replace_vars(template, vars, 0);
+	} else {
+		template = strdup(default_name);
+	}
+	strftime(fname_part, sizeof(fname_part), template, localtime_r(&now, &local_store));
+	char const *parts[] = {base, PATH_SEP, fname_part};
+	char *path = alloc_concat_m(3, parts);
+	free(base);
+	free(template);
+	return path;
+}
+
 void handle_binding_up(keybinding * binding)
 {
 	uint8_t allow_content_binds = content_binds_enabled && current_system;
@@ -352,31 +386,23 @@
 				current_system->gamepad_down(current_system, GAMEPAD_MAIN_UNIT, MAIN_UNIT_PAUSE);
 			}
 			break;
-		case UI_SCREENSHOT: {
+		case UI_SCREENSHOT:
 			if (allow_content_binds) {
-				char *screenshot_base = tern_find_path(config, "ui\0screenshot_path\0", TVAL_PTR).ptrval;
-				if (!screenshot_base) {
-					screenshot_base = "$HOME";
-				}
-				tern_node *vars = tern_insert_ptr(NULL, "HOME", get_home_dir());
-				vars = tern_insert_ptr(vars, "EXEDIR", get_exe_dir());
-				screenshot_base = replace_vars(screenshot_base, vars, 1);
-				tern_free(vars);
-				time_t now = time(NULL);
-				struct tm local_store;
-				char fname_part[256];
-				char *template = tern_find_path(config, "ui\0screenshot_template\0", TVAL_PTR).ptrval;
-				if (!template) {
-					template = "blastem_%c.ppm";
-				}
-				strftime(fname_part, sizeof(fname_part), template, localtime_r(&now, &local_store));
-				char const *parts[] = {screenshot_base, PATH_SEP, fname_part};
-				char *path = alloc_concat_m(3, parts);
-				free(screenshot_base);
+				char *path = get_content_config_path("ui\0screenshot_path\0", "ui\0screenshot_template\0", "blastem_%c.ppm");
 				render_save_screenshot(path);
 			}
 			break;
-		}
+		case UI_VGM_LOG:
+			if (allow_content_binds && current_system->start_vgm_log) {
+				if (current_system->vgm_logging) {
+					current_system->stop_vgm_log(current_system);
+				} else {
+					char *path = get_content_config_path("ui\0vgm_path\0", "ui\0vgm_template\0", "blastem_%c.vgm");
+					current_system->start_vgm_log(current_system, path);
+					free(path);
+				}
+			}
+			break;
 		case UI_EXIT:
 #ifndef DISABLE_NUKLEAR
 			if (is_nuklear_active()) {
@@ -625,6 +651,8 @@
 			*subtype_a = UI_SMS_PAUSE;
 		} else if (!strcmp(target + 3, "screenshot")) {
 			*subtype_a = UI_SCREENSHOT;
+		} else if (!strcmp(target + 3, "vgm_log")) {
+			*subtype_a = UI_VGM_LOG;
 		} else if(!strcmp(target + 3, "exit")) {
 			*subtype_a = UI_EXIT;
 		} else if (!strcmp(target + 3, "plane_debug")) {
--- a/blastcpm.c	Thu Apr 18 22:06:47 2019 -0700
+++ b/blastcpm.c	Sat Apr 18 11:42:53 2020 -0700
@@ -5,7 +5,7 @@
 #include <time.h>
 #include <sys/select.h>
 
-#ifdef USE_NATIVE
+#ifndef NEW_CORE
 #include "z80_to_x86.h"
 #else
 #include "z80.h"
@@ -19,7 +19,7 @@
 #define OS_RESET 0xE403
 int headless = 1;
 
-#ifdef USE_NATIVE
+#ifndef NEW_CORE
 void z80_next_int_pulse(z80_context * context)
 {
 	context->int_pulse_start = context->int_pulse_end = CYCLE_NEVER;
@@ -68,7 +68,7 @@
 {
 	time_t duration = time(NULL) - start;
 	z80_context *z80 = context;
-#ifdef USE_NATIVE
+#ifndef NEW_CORE
 	total_cycles += context->current_cycle;
 #else
 	total_cycles += z80->cycles;
@@ -124,7 +124,7 @@
 	for(;;)
 	{
 		z80_run(context, 1000000);
-#ifdef USE_NATIVE		
+#ifndef NEW_CORE		
 		total_cycles += context->current_cycle;
 		context->current_cycle = 0;
 #else
--- a/blastem.c	Thu Apr 18 22:06:47 2019 -0700
+++ b/blastem.c	Sat Apr 18 11:42:53 2020 -0700
@@ -11,7 +11,7 @@
 #include "system.h"
 #include "68kinst.h"
 #include "m68k_core.h"
-#ifdef USE_NATIVE
+#ifndef NEW_CORE
 #include "z80_to_x86.h"
 #else
 #include "z80.h"
@@ -267,6 +267,7 @@
 		savedir_template = "$USERDATA/blastem/$ROMNAME";
 	}
 	tern_node *vars = tern_insert_ptr(NULL, "ROMNAME", media->name);
+	vars = tern_insert_ptr(vars, "ROMDIR", media->dir);
 	vars = tern_insert_ptr(vars, "HOME", get_home_dir());
 	vars = tern_insert_ptr(vars, "EXEDIR", get_exe_dir());
 	vars = tern_insert_ptr(vars, "USERDATA", (char *)get_userdata_dir());
@@ -343,6 +344,11 @@
 }
 
 static system_media cart, lock_on;
+const system_media *current_media(void)
+{
+	return &cart;
+}
+
 void reload_media(void)
 {
 	if (!current_system) {
@@ -382,7 +388,7 @@
 {
 	if (game_system) {
 		game_system->persist_save(game_system);
-#ifdef USE_NATIVE
+#ifndef NEW_CORE
 		//swap to game context arena and mark all allocated pages in it free
 		if (current_system == menu_system) {
 			current_system->arena = set_current_arena(game_system->arena);
@@ -390,7 +396,7 @@
 		mark_all_free();
 #endif
 		game_system->free_context(game_system);
-#ifdef USE_NATIVE
+#ifndef NEW_CORE
 	} else if(current_system) {
 		//start a new arena and save old one in suspended system context
 		current_system->arena = start_new_arena();
@@ -461,7 +467,7 @@
 					debug_target = 1;
 				}
 				break;
-#ifdef USE_NATIVE
+#ifndef NEW_CORE
 			case 'D':
 				gdb_remote_init();
 				dtype = DEBUGGER_GDB;
@@ -692,7 +698,7 @@
 			current_system->enter_debugger = start_in_debugger && menu == debug_target;
 			current_system->start_context(current_system, statefile);
 		} else if (menu && game_system) {
-#ifdef USE_NATIVE
+#ifndef NEW_CORE
 			current_system->arena = set_current_arena(game_system->arena);
 #endif
 			current_system = game_system;
@@ -704,7 +710,7 @@
 				ui_idle_loop();
 #endif
 			} else {
-#ifdef USE_NATIVE
+#ifndef NEW_CORE
 				current_system->arena = set_current_arena(menu_system->arena);
 #endif
 				current_system = menu_system;
--- a/blastem.h	Thu Apr 18 22:06:47 2019 -0700
+++ b/blastem.h	Sat Apr 18 11:42:53 2020 -0700
@@ -19,5 +19,6 @@
 void lockon_media(char *lock_on_path);
 void init_system_with_media(const char *path, system_type force_stype);
 void apply_updated_config(void);
+const system_media *current_media(void);
 
 #endif //BLASTEM_H_
--- a/build_release	Thu Apr 18 22:06:47 2019 -0700
+++ b/build_release	Sat Apr 18 11:42:53 2020 -0700
@@ -62,7 +62,7 @@
 echo $dir
 rm -rf "$dir"
 mkdir "$dir"
-cp -r $binaries shaders images default.cfg rom.db gamecontrollerdb.txt "$dir"
+cp -r $binaries shaders images default.cfg rom.db gamecontrollerdb.txt systems.cfg "$dir"
 for file in README COPYING CHANGELOG; do
 	cp "$file" "$dir"/"$file$txt"
 done
--- a/config.c	Thu Apr 18 22:06:47 2019 -0700
+++ b/config.c	Sat Apr 18 11:42:53 2020 -0700
@@ -6,6 +6,7 @@
 #include "tern.h"
 #include "util.h"
 #include "paths.h"
+#include "config.h"
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -225,7 +226,7 @@
 	return ret;
 }
 
-tern_node *load_overrideable_config(char *name, char *bundled_name)
+tern_node *load_overrideable_config(char *name, char *bundled_name, uint8_t *used_config_dir)
 {
 	char const *confdir = get_config_dir();
 	char *confpath = NULL;
@@ -233,58 +234,60 @@
 	if (confdir) {
 		confpath = path_append(confdir, name);
 		ret = parse_config_file(confpath);
-		if (ret) {
-			free(confpath);
-			return ret;
+	}
+	free(confpath);
+	if (used_config_dir) {
+		*used_config_dir = ret != NULL;
+	}
+	
+	if (!ret) {
+		ret = parse_bundled_config(name);
+		if (!ret) {
+			ret = parse_bundled_config(bundled_name);
 		}
 	}
 
-	ret = parse_bundled_config(bundled_name);
-	if (ret) {
-		free(confpath);
-		return ret;
-	}
-	return NULL;
+	return ret;
 }
 
+static uint8_t app_config_in_config_dir;
 tern_node *load_config()
 {
-	char const *confdir = get_config_dir();
-	char *confpath = NULL;
-	tern_node *ret = load_overrideable_config("blastem.cfg", "default.cfg");
-	if (confdir) {
-		confpath = path_append(confdir, "blastem.cfg");
-		ret = parse_config_file(confpath);
-		if (ret) {
-			free(confpath);
-			return ret;
+	tern_node *ret = load_overrideable_config("blastem.cfg", "default.cfg", &app_config_in_config_dir);
+	
+	if (!ret) {
+		if (get_config_dir()) {
+			fatal_error("Failed to find a config file at %s or in the blastem executable directory\n", get_config_dir());
+		} else {
+			fatal_error("Failed to find a config file in the BlastEm executable directory and the config directory path could not be determined\n");
 		}
 	}
-
-	ret = parse_bundled_config("default.cfg");
-	if (ret) {
-		free(confpath);
-		return ret;
-	}
-
-	if (get_config_dir()) {
-		fatal_error("Failed to find a config file at %s or in the blastem executable directory\n", get_config_dir());
-	} else {
-		fatal_error("Failed to find a config file in the BlastEm executable directory and the config directory path could not be determined\n");
-	}
-	//this will never get reached, but the compiler doesn't know that. Let's make it happy
-	return NULL;
+	return ret;
 }
 
-void persist_config_at(tern_node *config, char *fname)
+void persist_config_at(tern_node *app_config, tern_node *to_save, char *fname)
 {
-	char const *confdir = get_config_dir();
-	if (!confdir) {
-		fatal_error("Failed to locate config file directory\n");
+	char*use_exe_dir = tern_find_path_default(app_config, "ui\0config_in_exe_dir\0", (tern_val){.ptrval = "off"}, TVAL_PTR).ptrval;
+	char *confpath;
+	if (!strcmp(use_exe_dir, "on")) {
+		confpath = path_append(get_exe_dir(), fname);
+		if (app_config == to_save && app_config_in_config_dir) {
+			//user switched to "portable" configs this session and there is an
+			//existing config file in the user-specific config directory
+			//delete it so we don't end up loading it next time
+			char *oldpath = path_append(get_config_dir(), fname);
+			delete_file(oldpath);
+			free(oldpath);
+		}
+	} else {
+		char const *confdir = get_config_dir();
+		if (!confdir) {
+			fatal_error("Failed to locate config file directory\n");
+		}
+		ensure_dir_exists(confdir);
+		confpath = path_append(confdir, fname);
 	}
-	ensure_dir_exists(confdir);
-	char *confpath = path_append(confdir, fname);
-	if (!serialize_config_file(config, confpath)) {
+	if (!serialize_config_file(to_save, confpath)) {
 		fatal_error("Failed to write config to %s\n", confpath);
 	}
 	free(confpath);
@@ -292,7 +295,7 @@
 
 void persist_config(tern_node *config)
 {
-	persist_config_at(config, "blastem.cfg");
+	persist_config_at(config, config, "blastem.cfg");
 }
 
 char **get_extension_list(tern_node *config, uint32_t *num_exts_out)
@@ -320,3 +323,18 @@
 	char * lowpass_cutoff_str = tern_find_path(config, "audio\0lowpass_cutoff\0", TVAL_PTR).ptrval;
 	return lowpass_cutoff_str ? atoi(lowpass_cutoff_str) : DEFAULT_LOWPASS_CUTOFF;
 }
+
+tern_node *get_systems_config(void)
+{
+	static tern_node *systems;
+	if (!systems) {
+		systems = parse_bundled_config("systems.cfg");
+	}
+	return systems;
+}
+
+tern_node *get_model(tern_node *config, system_type stype)
+{
+	char *model = tern_find_path_default(config, "system\0model\0", (tern_val){.ptrval = "md1va3"}, TVAL_PTR).ptrval;
+	return tern_find_node(get_systems_config(), model);
+}
--- a/config.h	Thu Apr 18 22:06:47 2019 -0700
+++ b/config.h	Sat Apr 18 11:42:53 2020 -0700
@@ -6,17 +6,20 @@
 #ifndef CONFIG_H_
 #define CONFIG_H_
 #include "tern.h"
+#include "system.h"
 
 tern_node *parse_config_file(char *config_path);
 tern_node *parse_bundled_config(char *config_name);
-tern_node *load_overrideable_config(char *name, char *bundled_name);
+tern_node *load_overrideable_config(char *name, char *bundled_name, uint8_t *used_config_dir);
 tern_node *load_config();
 char *serialize_config(tern_node *config, uint32_t *size_out);
 uint8_t serialize_config_file(tern_node *config, char *path);
-void persist_config_at(tern_node *config, char *fname);
+void persist_config_at(tern_node *app_config, tern_node *to_save, char *fname);
 void persist_config(tern_node *config);
 char **get_extension_list(tern_node *config, uint32_t *num_exts_out);
 uint32_t get_lowpass_cutoff(tern_node *config);
+tern_node *get_systems_config(void);
+tern_node *get_model(tern_node *config, system_type stype);
 
 #endif //CONFIG_H_
 
--- a/controller_info.c	Thu Apr 18 22:06:47 2019 -0700
+++ b/controller_info.c	Sat Apr 18 11:42:53 2020 -0700
@@ -6,6 +6,8 @@
 #include "controller_info.h"
 #include "config.h"
 #include "util.h"
+#include "blastem.h"
+#include "bindings.h"
 
 typedef struct {
 	char const      *name;
@@ -66,7 +68,7 @@
 static void load_ctype_config(void)
 {
 	if (!loaded) {
-		info_config = load_overrideable_config("controller_types.cfg", "controller_types.cfg");
+		info_config = load_overrideable_config("controller_types.cfg", "controller_types.cfg", NULL);
 		loaded = 1;
 	}
 }
@@ -194,10 +196,11 @@
 	char guid_string[33];
 	SDL_JoystickGetGUIDString(SDL_JoystickGetGUID(render_get_joystick(joystick)), guid_string, sizeof(guid_string));
 	tern_node *existing = tern_find_node(info_config, guid_string);
-	existing = tern_insert_ptr(existing, "subtype", (void *)subtype_names[info->subtype]);
-	existing = tern_insert_ptr(existing, "variant",  (void *)variant_names[info->variant]);
+	existing = tern_insert_ptr(existing, "subtype", strdup(subtype_names[info->subtype]));
+	existing = tern_insert_ptr(existing, "variant", strdup(variant_names[info->variant]));
 	info_config = tern_insert_node(info_config, guid_string, existing);
-	persist_config_at(info_config, "controller_types.cfg");
+	persist_config_at(config, info_config, "controller_types.cfg");
+	handle_joy_added(joystick);
 #endif	
 }
 
@@ -209,7 +212,12 @@
 	tern_node *existing = tern_find_node(info_config, guid_string);
 	existing = tern_insert_ptr(existing, "mapping", mapping_string);
 	info_config = tern_insert_node(info_config, guid_string, existing);
-	persist_config_at(info_config, "controller_types.cfg");
+	persist_config_at(config, info_config, "controller_types.cfg");
+	const char *parts[] = {guid_string, ",", mapping_string};
+	char * full = alloc_concat_m(3, parts);
+	SDL_GameControllerAddMapping(full);
+	free(full);
+	handle_joy_added(joystick);
 #endif
 }
 
--- a/cpu_dsl.py	Thu Apr 18 22:06:47 2019 -0700
+++ b/cpu_dsl.py	Sat Apr 18 11:42:53 2020 -0700
@@ -48,6 +48,7 @@
 		self.regValues = {}
 		self.varyingBits = 0
 		self.invalidFieldValues = {}
+		self.invalidCombos = []
 		self.newLocals = []
 		for field in fields:
 			self.varyingBits += fields[field][1]
@@ -58,9 +59,17 @@
 			size = int(op.params[1])
 			self.locals[name] = size
 		elif op.op == 'invalid':
-			name = op.params[0]
-			value = int(op.params[1])
-			self.invalidFieldValues.setdefault(name, set()).add(value)
+			if len(op.params) < 3:
+				name = op.params[0]
+				value = int(op.params[1])
+				self.invalidFieldValues.setdefault(name, set()).add(value)
+			else:
+				vmap = {}
+				for i in range(0, len(op.params), 2):
+					name = op.params[i]
+					value = int(op.params[i+1])
+					vmap[name] = value
+				self.invalidCombos.append(vmap)
 		else:
 			self.implementation.append(op)
 			
@@ -89,12 +98,29 @@
 		for i in range(0, 1 << self.varyingBits):
 			iword = self.value
 			doIt = True
+			combos = []
+			for combo in self.invalidCombos:
+				combos.append(dict(combo))
 			for field in self.fields:
 				shift,bits = self.fields[field]
 				val = i & ((1 << bits) - 1)
 				if field in self.invalidFieldValues and val in self.invalidFieldValues[field]:
 					doIt = False
 					break
+				nextcombos = []
+				for combo in combos:
+					if field in combo:
+						if combo[field] == val:
+							del combo[field]
+							if not combo:
+								doIt = False
+								break
+						else:
+							continue
+					nextcombos.append(combo)
+				combos = nextcombos
+				if not doIt:
+					break
 				i >>= bits
 				iword |= val << shift
 			if doIt:
@@ -132,7 +158,7 @@
 		self.processOps(prog, fieldVals, output, otype, self.implementation)
 		
 		if prog.dispatch == 'call':
-			begin = '\nvoid ' + self.generateName(value) + '(' + prog.context_type + ' *context)\n{'
+			begin = '\nvoid ' + self.generateName(value) + '(' + prog.context_type + ' *context, uint32_t target_cycle)\n{'
 		elif prog.dispatch == 'goto':
 			begin = '\n' + self.generateName(value) + ': {'
 		else:
@@ -143,6 +169,8 @@
 			output.append(prog.flags.disperseFlags(prog, otype))
 		for var in self.newLocals:
 			begin += '\n\tuint{sz}_t {name};'.format(sz=self.locals[var], name=var)
+		for size in prog.temp:
+			begin += '\n\tuint{sz}_t gen_tmp{sz}__;'.format(sz=size)
 		prog.popScope()
 		if prog.dispatch == 'goto':
 			output += prog.nextInstruction(otype)
@@ -237,6 +265,20 @@
 			else:
 				a = params[0]
 				b = params[1]
+			needsSizeAdjust = False
+			if len(params) > 3:
+				size = params[3]
+				if size == 0:
+					size = 8
+				elif size == 1:
+					size = 16
+				else:
+					size = 32
+				prog.lastSize = size
+				destSize = prog.paramSize(rawParams[2])
+				if destSize > size:
+					needsSizeAdjust = True
+					prog.sizeAdjust = size
 			needsCarry = needsOflow = needsHalf = False
 			if flagUpdates:
 				for flag in flagUpdates:
@@ -248,20 +290,35 @@
 					elif calc == 'overflow':
 						needsOflow = True
 			decl = ''
-			if needsCarry or needsOflow or needsHalf:
-				size = prog.paramSize(rawParams[2])
+			if needsCarry or needsOflow or needsHalf or (flagUpdates and needsSizeAdjust):
+				if len(params) <= 3:
+					size = prog.paramSize(rawParams[2])
 				if needsCarry and op != 'lsr':
 					size *= 2
 				decl,name = prog.getTemp(size)
 				dst = prog.carryFlowDst = name
 				prog.lastA = a
 				prog.lastB = b
+				if size == 64:
+					a = '((uint64_t){a})'.format(a=a)
+					b = '((uint64_t){b})'.format(b=b)
 				prog.lastBFlow = b if op == '-' else '(~{b})'.format(b=b)
+			elif needsSizeAdjust:
+				decl,name = prog.getTemp(size)
+				dst = params[2]
+				return '{decl}\n\t{tmp} = ({a} & {mask}) {op} ({b} & {mask});\n\t{dst} = ({dst} & ~{mask}) | {tmp};'.format(
+					decl = decl, tmp = name, a = a, b = b, op = op, dst = dst, mask = ((1 << size) - 1)
+				)
 			else:
 				dst = params[2]
-			return decl + '\n\t{dst} = {a} {op} {b};'.format(
-				dst = dst, a = a, b = b, op = op
-			)
+			if needsSizeAdjust:
+				return decl + '\n\t{dst} = ({a} & {mask}) {op} ({b} & {mask});'.format(
+					dst = dst, a = a, b = b, op = op, mask = (1 << prog.sizeAdjust) - 1
+				)
+			else:
+				return decl + '\n\t{dst} = {a} {op} {b};'.format(
+					dst = dst, a = a, b = b, op = op
+				)
 		self.impls['c'] = _impl
 		self.outOp = (2,)
 		return self
@@ -269,6 +326,20 @@
 		def _impl(prog, params, rawParams, flagUpdates):
 			dst = params[1]
 			decl = ''
+			needsSizeAdjust = False
+			if len(params) > 2:
+				size = params[2]
+				if size == 0:
+					size = 8
+				elif size == 1:
+					size = 16
+				else:
+					size = 32
+				prog.lastSize = size
+				destSize = prog.paramSize(rawParams[1])
+				if destSize > size:
+					needsSizeAdjust = True
+					prog.sizeAdjust = size
 			if op == '-':
 				if flagUpdates:
 					for flag in flagUpdates:
@@ -279,7 +350,7 @@
 							needsHalf = True
 						elif calc == 'overflow':
 							needsOflow = True
-				if needsCarry or needsOflow or needsHalf:
+				if needsCarry or needsOflow or needsHalf or (flagUpdates and needsSizeAdjust):
 					size = prog.paramSize(rawParams[1])
 					if needsCarry:
 						size *= 2
@@ -288,9 +359,14 @@
 					prog.lastA = 0
 					prog.lastB = params[0]
 					prog.lastBFlow = params[0]
-			return decl + '\n\t{dst} = {op}{a};'.format(
-				dst = dst, a = params[0], op = op
-			)
+			if needsSizeAdjust:
+				return decl + '\n\t{dst} = ({dst} & ~{mask}) | (({op}{a}) & {mask});'.format(
+					dst = dst, a = params[0], op = op, mask = (1 << prog.sizeAdjust) - 1
+				)
+			else:
+				return decl + '\n\t{dst} = {op}{a};'.format(
+					dst = dst, a = params[0], op = op
+				)
 		self.impls['c'] = _impl
 		self.outOp = (1,)
 		return self
@@ -336,7 +412,7 @@
 	else:
 		table = params[1]
 	if prog.dispatch == 'call':
-		return '\n\timpl_{tbl}[{op}](context);'.format(tbl = table, op = params[0])
+		return '\n\timpl_{tbl}[{op}](context, target_cycle);'.format(tbl = table, op = params[0])
 	elif prog.dispatch == 'goto':
 		return '\n\tgoto *impl_{tbl}[{op}];'.format(tbl = table, op = params[0])
 	else:
@@ -358,25 +434,25 @@
 		if calc == 'bit' or calc == 'sign' or calc == 'carry' or calc == 'half' or calc == 'overflow':
 			myRes = lastDst
 			if calc == 'sign':
-				resultBit = prog.paramSize(prog.lastDst) - 1
+				resultBit = prog.getLastSize() - 1
 			elif calc == 'carry':
 				if prog.lastOp.op in ('asr', 'lsr'):
 					resultBit = 0
 					myRes = prog.lastA
 				else:
-					resultBit = prog.paramSize(prog.lastDst)
+					resultBit = prog.getLastSize()
 					if prog.lastOp.op == 'ror':
 						resultBit -= 1
 			elif calc == 'half':
-				resultBit = prog.paramSize(prog.lastDst) - 4
+				resultBit = prog.getLastSize() - 4
 				myRes = '({a} ^ {b} ^ {res})'.format(a = prog.lastA, b = prog.lastB, res = lastDst)
 			elif calc == 'overflow':
-				resultBit = prog.paramSize(prog.lastDst) - 1
+				resultBit = prog.getLastSize() - 1
 				myRes = '((({a} ^ {b})) & ({a} ^ {res}))'.format(a = prog.lastA, b = prog.lastBFlow, res = lastDst)
 			else:
 				#Note: offsetting this by the operation size - 8 makes sense for the Z80
 				#but might not for other CPUs with this kind of fixed bit flag behavior
-				resultBit = int(resultBit) + prog.paramSize(prog.lastDst) - 8
+				resultBit = int(resultBit) + prog.getLastSize() - 8
 			if type(storage) is tuple:
 				reg,storageBit = storage
 				if storageBit == resultBit:
@@ -401,7 +477,7 @@
 					output.append('\n\t{reg} = {res} & {mask}U;'.format(reg=reg, res=myRes, mask = 1 << resultBit))
 		elif calc == 'zero':
 			if prog.carryFlowDst:
-				realSize = prog.paramSize(prog.lastDst)
+				realSize = prog.getLastSize()
 				if realSize != prog.paramSize(prog.carryFlowDst):
 					lastDst = '({res} & {mask})'.format(res=lastDst, mask = (1 << realSize) - 1)
 			if type(storage) is tuple:
@@ -417,7 +493,7 @@
 				))
 		elif calc == 'parity':
 			parity = storage
-			paritySize = prog.paramSize(prog.lastDst)
+			paritySize = prog.getLastSize()
 			if prog.carryFlowDst:
 				parityDst = paritySrc = prog.carryFlowDst
 			else:
@@ -441,7 +517,13 @@
 			))
 	if prog.carryFlowDst:
 		if prog.lastOp.op != 'cmp':
-			output.append('\n\t{dst} = {tmpdst};'.format(dst = prog.resolveParam(prog.lastDst, prog.currentScope, {}), tmpdst = prog.carryFlowDst))
+			if prog.sizeAdjust:
+				output.append('\n\t{dst} = ({dst} & ~{mask}) | ({tmpdst} & {mask});'.format(
+					dst = prog.resolveParam(prog.lastDst, prog.currentScope, {}), tmpdst = prog.carryFlowDst, mask = ((1 << prog.sizeAdjust) - 1)
+				))
+				prog.sizeAdjust = None
+			else:
+				output.append('\n\t{dst} = {tmpdst};'.format(dst = prog.resolveParam(prog.lastDst, prog.currentScope, {}), tmpdst = prog.carryFlowDst))
 		prog.carryFlowDst = None
 	if parity:
 		if paritySize > 8:
@@ -500,6 +582,17 @@
 	if not scope.resolveLocal(tmpvar):
 		scope.addLocal(tmpvar, size)
 	prog.lastDst = rawParams[1]
+	if len(params) > 2:
+		size = params[2]
+		if size == 0:
+			size = 8
+		elif size == 1:
+			size = 16
+		else:
+			size = 32
+		prog.lastSize = size
+	else:
+		prog.lastSize = None
 	return '\n\t{var} = {b} - {a};'.format(var = tmpvar, a = params[0], b = params[1])
 
 def _asrCImpl(prog, params, rawParams, flagUpdates):
@@ -523,22 +616,23 @@
 	
 def _sext(size, src):
 	if size == 16:
-		return src | 0xFF00 if src & 0x80 else src
+		return src | 0xFF00 if src & 0x80 else src & 0x7F
 	else:
-		return src | 0xFFFF0000 if src & 0x8000 else src
+		return src | 0xFFFF0000 if src & 0x8000 else src & 0x7FFF
 
 def _sextCImpl(prog, params, rawParms):
 	if params[0] == 16:
-		fmt = '\n\t{dst} = {src} & 0x80 ? {src} | 0xFF00 : {src};'
+		fmt = '\n\t{dst} = {src} & 0x80 ? {src} | 0xFF00 : {src} & 0x7F;'
 	else:
-		fmt = '\n\t{dst} = {src} & 0x8000 ? {src} | 0xFFFF0000 : {src};'
+		fmt = '\n\t{dst} = {src} & 0x8000 ? {src} | 0xFFFF0000 : {src} & 0x7FFF;'
 	return fmt.format(src=params[1], dst=params[2])
 	
 def _getCarryCheck(prog):
 	carryFlag = None
-	for flag in prog.flags.flagCalc:
+	for flag in prog.flags.flagOrder:
 		if prog.flags.flagCalc[flag] == 'carry':
 			carryFlag = flag
+			break
 	if carryFlag is None:
 		raise Exception('adc requires a defined carry flag')
 	carryStorage = prog.flags.getStorage(carryFlag)
@@ -550,6 +644,20 @@
 		return prog.resolveReg(carryStorage, None, (), False)
 
 def _adcCImpl(prog, params, rawParams, flagUpdates):
+	needsSizeAdjust = False
+	if len(params) > 3:
+		size = params[3]
+		if size == 0:
+			size = 8
+		elif size == 1:
+			size = 16
+		else:
+			size = 32
+		prog.lastSize = size
+		destSize = prog.paramSize(rawParams[2])
+		if destSize > size:
+			needsSizeAdjust = True
+			prog.sizeAdjust = size
 	needsCarry = needsOflow = needsHalf = False
 	if flagUpdates:
 		for flag in flagUpdates:
@@ -562,8 +670,10 @@
 				needsOflow = True
 	decl = ''
 	carryCheck = _getCarryCheck(prog)
-	if needsCarry or needsOflow or needsHalf:
-		size = prog.paramSize(rawParams[2])
+	vals = '1 : 0'
+	if needsCarry or needsOflow or needsHalf or (flagUpdates and needsSizeAdjust):
+		if len(params) <= 3:
+			size = prog.paramSize(rawParams[2])
 		if needsCarry:
 			size *= 2
 		decl,name = prog.getTemp(size)
@@ -571,13 +681,37 @@
 		prog.lastA = params[0]
 		prog.lastB = params[1]
 		prog.lastBFlow = '(~{b})'.format(b=params[1])
+		if size == 64:
+			params[0] = '((uint64_t){a})'.format(a=params[0])
+			params[1] = '((uint64_t){b})'.format(b=params[1])
+			vals = '((uint64_t)1) : ((uint64_t)0)'
+	elif needsSizeAdjust:
+		decl,name = prog.getTemp(size)
+		dst = params[2]
+		return '{decl}\n\t{tmp} = ({a} & {mask}) + ({b} & {mask}) + ({check} ? 1 : 0);\n\t{dst} = ({dst} & ~{mask}) | {tmp};'.format(
+			decl = decl, tmp = name, a = a, b = b, op = op, dst = dst, mask = ((1 << size) - 1), check = carryCheck
+		)
 	else:
 		dst = params[2]
-	return decl + '\n\t{dst} = {a} + {b} + ({check} ? 1 : 0);'.format(dst = dst,
-		a = params[0], b = params[1], check = carryCheck
+	return decl + '\n\t{dst} = {a} + {b} + ({check} ? {vals});'.format(dst = dst,
+		a = params[0], b = params[1], check = carryCheck, vals = vals
 	)
 
 def _sbcCImpl(prog, params, rawParams, flagUpdates):
+	needsSizeAdjust = False
+	if len(params) > 3:
+		size = params[3]
+		if size == 0:
+			size = 8
+		elif size == 1:
+			size = 16
+		else:
+			size = 32
+		prog.lastSize = size
+		destSize = prog.paramSize(rawParams[2])
+		if destSize > size:
+			needsSizeAdjust = True
+			prog.sizeAdjust = size
 	needsCarry = needsOflow = needsHalf = False
 	if flagUpdates:
 		for flag in flagUpdates:
@@ -590,7 +724,8 @@
 				needsOflow = True
 	decl = ''
 	carryCheck = _getCarryCheck(prog)
-	if needsCarry or needsOflow or needsHalf:
+	vals = '1 : 0'
+	if needsCarry or needsOflow or needsHalf or (flagUpdates and needsSizeAdjust):
 		size = prog.paramSize(rawParams[2])
 		if needsCarry:
 			size *= 2
@@ -599,10 +734,20 @@
 		prog.lastA = params[1]
 		prog.lastB = params[0]
 		prog.lastBFlow = params[0]
+		if size == 64:
+			params[1] = '((uint64_t){a})'.format(a=params[1])
+			params[0] = '((uint64_t){b})'.format(b=params[0])
+			vals = '((uint64_t)1) : ((uint64_t)0)'
+	elif needsSizeAdjust:
+		decl,name = prog.getTemp(size)
+		dst = params[2]
+		return '{decl}\n\t{tmp} = ({b} & {mask}) - ({a} & {mask}) - ({check} ? 1 : 0);\n\t{dst} = ({dst} & ~{mask}) | {tmp};'.format(
+			decl = decl, tmp = name, a = params[0], b = params[1], op = op, dst = dst, mask = ((1 << size) - 1), check = carryCheck
+		)
 	else:
 		dst = params[2]
-	return decl + '\n\t{dst} = {b} - {a} - ({check} ? 1 : 0);'.format(dst = dst,
-		a = params[0], b = params[1], check=_getCarryCheck(prog)
+	return decl + '\n\t{dst} = {b} - {a} - ({check} ? {vals});'.format(dst = dst,
+		a = params[0], b = params[1], check=_getCarryCheck(prog), vals = vals
 	)
 	
 def _rolCImpl(prog, params, rawParams, flagUpdates):
@@ -697,6 +842,9 @@
 	'ocall': Op().addImplementation('c', None, lambda prog, params: '\n\t{pre}{fun}({args});'.format(
 		pre = prog.prefix, fun = params[0], args = ', '.join(['context'] + [str(p) for p in params[1:]])
 	)),
+	'pcall': Op().addImplementation('c', None, lambda prog, params: '\n\t(({typ}){fun})({args});'.format(
+		typ = params[1], fun = params[0], args = ', '.join([str(p) for p in params[2:]])
+	)),
 	'cycles': Op().addImplementation('c', None,
 		lambda prog, params: '\n\tcontext->cycles += context->opts->gen.clock_divider * {0};'.format(
 			params[0]
@@ -704,12 +852,12 @@
 	),
 	'addsize': Op(
 		lambda a, b: b + (2 * a if a else 1)
-	).addImplementation('c', 2, lambda prog, params: '\n\t{dst} = {val} + {sz} ? {sz} * 2 : 1;'.format(
+	).addImplementation('c', 2, lambda prog, params: '\n\t{dst} = {val} + ({sz} ? {sz} * 2 : 1);'.format(
 		dst = params[2], sz = params[0], val = params[1]
 	)),
 	'decsize': Op(
 		lambda a, b: b - (2 * a if a else 1)
-	).addImplementation('c', 2, lambda prog, params: '\n\t{dst} = {val} - {sz} ? {sz} * 2 : 1;'.format(
+	).addImplementation('c', 2, lambda prog, params: '\n\t{dst} = {val} - ({sz} ? {sz} * 2 : 1);'.format(
 		dst = params[2], sz = params[0], val = params[1]
 	)),
 	'xchg': Op().addImplementation('c', (0,1), _xchgCImpl),
@@ -729,8 +877,8 @@
 		allParamsConst = flagUpdates is None and not prog.conditional
 		opDef = _opMap.get(self.op)
 		for param in self.params:
-			allowConst = (self.op in prog.subroutines or len(procParams) != len(self.params) - 1) and param in parent.regValues
 			isDst = (not opDef is None) and len(procParams) in opDef.outOp
+			allowConst = (self.op in prog.subroutines or not isDst) and param in parent.regValues
 			if isDst and self.op == 'xchg':
 				#xchg uses its regs as both source and destination
 				#we need to resolve as both so that disperse/coalesce flag stuff gets done
@@ -794,6 +942,10 @@
 				else:
 					if param in fieldVals:
 						param = fieldVals[param]
+					else:
+						maybeLocal = parent.resolveLocal(param)
+						if maybeLocal and maybeLocal in parent.regValues:
+							param = parent.regValues[maybeLocal]
 				procParams.append(param)
 			prog.subroutines[self.op].inline(prog, procParams, output, otype, parent)
 		else:
@@ -872,7 +1024,7 @@
 			output.append('\n\tswitch(' + param + ')')
 			output.append('\n\t{')
 			for case in self.cases:
-				temp = prog.temp.copy()
+				#temp = prog.temp.copy()
 				self.current_locals = self.case_locals[case]
 				self.regValues = dict(self.parent.regValues)
 				output.append('\n\tcase {0}U: '.format(case) + '{')
@@ -881,16 +1033,16 @@
 				self.processOps(prog, fieldVals, output, otype, self.cases[case])
 				output.append('\n\tbreak;')
 				output.append('\n\t}')
-				prog.temp = temp
+				#prog.temp = temp
 			if self.default:
-				temp = prog.temp.copy()
+				#temp = prog.temp.copy()
 				self.current_locals = self.default_locals
 				self.regValues = dict(self.parent.regValues)
 				output.append('\n\tdefault: {')
 				for local in self.default_locals:
 					output.append('\n\tuint{0}_t {1};'.format(self.default_locals[local], local))
 				self.processOps(prog, fieldVals, output, otype, self.default)
-				prog.temp = temp
+				#prog.temp = temp
 			output.append('\n\t}')
 			prog.conditional = oldCond
 		prog.popScope()
@@ -915,10 +1067,15 @@
 		raise Exception(">=U not implemented in the general case yet")
 
 def _eqCImpl(prog, parent, fieldVals, output):
-	return '\n\tif (!{a}) {'.format(a=prog.resolveParam(prog.lastDst, None, {}))
+	if prog.lastOp.op == 'cmp':
+		output.pop()
+		params = [prog.resolveParam(p, parent, fieldVals) for p in prog.lastOp.params]
+		return '\n\tif ({a} == {b}) '.format(a=params[1], b = params[0]) + '{'
+	else:
+		return '\n\tif (!{a}) {{'.format(a=prog.resolveParam(prog.lastDst, None, {}))
 
 def _neqCImpl(prog, parent, fieldVals, output):
-	return '\n\tif ({a}) {'.format(a=prog.resolveParam(prog.lastDst, None, {}))
+	return '\n\tif ({a}) {{'.format(a=prog.resolveParam(prog.lastDst, None, {}))
 	
 _ifCmpImpl = {
 	'c': {
@@ -986,21 +1143,21 @@
 			
 	def generate(self, prog, parent, fieldVals, output, otype, flagUpdates):
 		self.regValues = parent.regValues
-		try:
+		if self.cond in prog.booleans:
 			self._genConstParam(prog.checkBool(self.cond), prog, fieldVals, output, otype)
-		except Exception:
+		else:
 			if self.cond in _ifCmpImpl[otype]:
 				oldCond = prog.conditional
 				prog.conditional = True
-				temp = prog.temp.copy()
+				#temp = prog.temp.copy()
 				output.append(_ifCmpImpl[otype][self.cond](prog, parent, fieldVals, output))
 				self._genTrueBody(prog, fieldVals, output, otype)
-				prog.temp = temp
+				#prog.temp = temp
 				if self.elseBody:
-					temp = prog.temp.copy()
+					#temp = prog.temp.copy()
 					output.append('\n\t} else {')
 					self._genFalseBody(prog, fieldVals, output, otype)
-					prog.temp = temp
+					#prog.temp = temp
 				output.append('\n\t}')
 				prog.conditional = oldCond
 			else:
@@ -1008,17 +1165,17 @@
 				if type(cond) is int:
 					self._genConstParam(cond, prog, fieldVals, output, otype)
 				else:
-					temp = prog.temp.copy()
+					#temp = prog.temp.copy()
 					output.append('\n\tif ({cond}) '.format(cond=cond) + '{')
 					oldCond = prog.conditional
 					prog.conditional = True
 					self._genTrueBody(prog, fieldVals, output, otype)
-					prog.temp = temp
+					#prog.temp = temp
 					if self.elseBody:
-						temp = prog.temp.copy()
+						#temp = prog.temp.copy()
 						output.append('\n\t} else {')
 						self._genFalseBody(prog, fieldVals, output, otype)
-						prog.temp = temp
+						#prog.temp = temp
 					output.append('\n\t}')
 					prog.conditional = oldCond
 						
@@ -1129,6 +1286,7 @@
 		self.flagBits = {}
 		self.flagCalc = {}
 		self.flagStorage = {}
+		self.flagOrder = []
 		self.flagReg = None
 		self.storageToFlags = {}
 		self.maxBit = -1
@@ -1153,6 +1311,7 @@
 			self.flagStorage[flag] = storage
 			storage,_,storebit = storage.partition('.')
 			self.storageToFlags.setdefault(storage, []).append((storebit, flag))
+			self.flagOrder.append(flag)
 		return self
 	
 	def getStorage(self, flag):
@@ -1312,8 +1471,10 @@
 		self.lastA = None
 		self.lastB = None
 		self.lastBFlow = None
+		self.sizeAdjust = None
 		self.conditional = False
 		self.declares = []
+		self.lastSize = None
 		
 	def __str__(self):
 		pieces = []
@@ -1411,11 +1572,11 @@
 		for include in self.includes:
 			body.append('#include "{0}"\n'.format(include))
 		if self.dispatch == 'call':
-			body.append('\nstatic void unimplemented({pre}context *context)'.format(pre = self.prefix))
+			body.append('\nstatic void unimplemented({pre}context *context, uint32_t target_cycle)'.format(pre = self.prefix))
 			body.append('\n{')
 			body.append('\n\tfatal_error("Unimplemented instruction\\n");')
 			body.append('\n}\n')
-			body.append('\ntypedef void (*impl_fun)({pre}context *context);'.format(pre=self.prefix))
+			body.append('\ntypedef void (*impl_fun)({pre}context *context, uint32_t target_cycle);'.format(pre=self.prefix))
 			for table in self.extra_tables:
 				body.append('\nstatic impl_fun impl_{name}[{sz}];'.format(name = table, sz=(1 << self.opsize)))
 			body.append('\nstatic impl_fun impl_main[{sz}];'.format(sz=(1 << self.opsize)))
@@ -1432,7 +1593,12 @@
 			pieces.append('\n\t{sync}(context, target_cycle);'.format(sync=self.sync_cycle))
 			pieces.append('\n\twhile (context->cycles < target_cycle)')
 			pieces.append('\n\t{')
-			#TODO: Handle interrupts in call dispatch mode
+			if self.interrupt in self.subroutines:
+				pieces.append('\n\t\tif (context->cycles >= context->sync_cycle) {')
+				self.meta = {}
+				self.temp = {}
+				self.subroutines[self.interrupt].inline(self, [], pieces, otype, None)
+				pieces.append('\n\t\t}')
 			self.meta = {}
 			self.temp = {}
 			self.subroutines[self.body].inline(self, [], pieces, otype, None)
@@ -1455,7 +1621,7 @@
 		if size in self.temp:
 			return ('', self.temp[size])
 		self.temp[size] = 'gen_tmp{sz}__'.format(sz=size);
-		return ('\n\tuint{sz}_t gen_tmp{sz}__;'.format(sz=size), self.temp[size])
+		return ('', self.temp[size])
 		
 	def resolveParam(self, param, parent, fieldVals, allowConstant=True, isdst=False):
 		keepGoing = True
@@ -1477,6 +1643,7 @@
 					if maybeLocal:
 						if isdst:
 							self.lastDst = param
+							self.lastSize = None
 						return maybeLocal
 				if param in fieldVals:
 					param = fieldVals[param]
@@ -1487,8 +1654,11 @@
 					keepGoing = True
 				elif self.isReg(param):
 					return self.resolveReg(param, parent, fieldVals, isdst)
+				elif param in self.regs.pointers:
+					return 'context->' + param
 		if isdst:
 			self.lastDst = param
+			self.lastSize = None
 		return param
 	
 	def isReg(self, name):
@@ -1551,6 +1721,11 @@
 			return self.regs.regs[name]
 		return 32
 	
+	def getLastSize(self):
+		if self.lastSize:
+			return self.lastSize
+		return self.paramSize(self.lastDst)
+	
 	def pushScope(self, scope):
 		self.scopes.append(scope)
 		self.currentScope = scope
--- a/debug.c	Thu Apr 18 22:06:47 2019 -0700
+++ b/debug.c	Sat Apr 18 11:42:53 2020 -0700
@@ -11,7 +11,7 @@
 #include "terminal.h"
 #include "z80inst.h"
 
-#ifndef USE_NATIVE
+#ifdef NEW_CORE
 #define Z80_OPTS opts
 #else
 #define Z80_OPTS options
@@ -105,7 +105,7 @@
 	return m68k_read_word(address, context) << 16 | m68k_read_word(address + 2, context);
 }
 
-void debugger_print(m68k_context *context, char format_char, char *param)
+void debugger_print(m68k_context *context, char format_char, char *param, uint32_t address)
 {
 	uint32_t value;
 	char format[8];
@@ -141,7 +141,7 @@
 				value &= 0xFF;
 			}
 		}
-	} else if (param[0] == 'S' && param[1] == 'R') {
+	} else if (param[0] == 's' && param[1] == 'r') {
 		value = (context->status << 8);
 		for (int flag = 0; flag < 5; flag++) {
 			value |= context->flags[flag] << (4-flag);
@@ -151,6 +151,8 @@
 	} else if(param[0] == 'f') {
 		genesis_context *gen = context->system;
 		value = gen->vdp->frame;
+	} else if (param[0] == 'p' && param[1] == 'c') {
+		value = address;
 	} else if ((param[0] == '0' && param[1] == 'x') || param[0] == '$') {
 		char *after;
 		uint32_t p_addr = strtol(param+(param[0] == '0' ? 2 : 1), &after, 16);
@@ -197,7 +199,7 @@
 	}
 	switch (param[0])
 	{
-#ifdef USE_NATIVE
+#ifndef NEW_CORE
 	case 'a':
 		if (param[1] == 'f') {
 			if(param[2] == '\'') {
@@ -348,22 +350,7 @@
 	case '0':
 		if (param[1] == 'x') {
 			uint16_t p_addr = strtol(param+2, NULL, 16);
-			if (p_addr < 0x4000) {
-				value = system->zram[p_addr & 0x1FFF];
-			} else if(p_addr >= 0x8000) {
-				uint32_t v_addr = system->z80_bank_reg << 15;
-				v_addr += p_addr & 0x7FFF;
-				if (v_addr < 0x400000) {
-					value = system->cart[v_addr/2];
-				} else if(v_addr > 0xE00000) {
-					value = system->work_ram[(v_addr & 0xFFFF)/2];
-				}
-				if (v_addr & 1) {
-					value &= 0xFF;
-				} else {
-					value >>= 8;
-				}
-			}
+			value = read_byte(p_addr, (void **)context->mem_pointers, &context->options->gen, context);
 		}
 		break;
 	}
@@ -496,7 +483,7 @@
 					if (inst.addr_mode == Z80_IMMED) {
 						after = inst.immed;
 					} else if (inst.ea_reg == Z80_HL) {
-#ifdef USE_NATIVE
+#ifndef NEW_CORE
 						after = context->regs[Z80_H] << 8 | context->regs[Z80_L];
 					} else if (inst.ea_reg == Z80_IX) {
 						after = context->regs[Z80_IXH] << 8 | context->regs[Z80_IXL];
@@ -567,12 +554,15 @@
 				}
 				break;
 			}
+			case '?':
+				print_z80_help();
+				break;
 			default:
 				if (
 					!context->Z80_OPTS->gen.debug_cmd_handler
 					|| !context->Z80_OPTS->gen.debug_cmd_handler(&system->header, input_buf)
 				) {
-					fprintf(stderr, "Unrecognized debugger command %s\n", input_buf);
+					fprintf(stderr, "Unrecognized debugger command %s\nUse '?' for help.\n", input_buf);
 				}
 				break;
 		}
@@ -585,7 +575,7 @@
 static uint32_t branch_t;
 static uint32_t branch_f;
 
-int run_debugger_command(m68k_context *context, char *input_buf, m68kinst inst, uint32_t after)
+int run_debugger_command(m68k_context *context, uint32_t address, char *input_buf, m68kinst inst, uint32_t after)
 {
 	char * param;
 	char format_char;
@@ -703,7 +693,7 @@
 					fputs("display command requires a parameter\n", stderr);
 					break;
 				}
-				debugger_print(context, format_char, param);
+				debugger_print(context, format_char, param, address);
 				add_display(&displays, &disp_index, format_char, param);
 			} else {
 				param = find_param(input_buf);
@@ -734,11 +724,13 @@
 				}
 			}
 			param = find_param(input_buf);
-			if (!param) {
-				fputs("p command requires a parameter\n", stderr);
-				break;
+			if (param) {
+				debugger_print(context, format_char, param, address);
+			} else {
+				m68k_disasm(&inst, input_buf);
+				printf("%X: %s\n", address, input_buf);
 			}
-			debugger_print(context, format_char, param);
+			
 			break;
 		case 'n':
 			if (inst.op == M68K_RTS) {
@@ -802,10 +794,12 @@
 				param = find_param(input_buf);
 				if (!param) {
 					fputs("Missing destination parameter for set\n", stderr);
+					return 1;
 				}
 				char *val = find_param(param);
 				if (!val) {
 					fputs("Missing value parameter for set\n", stderr);
+					return 1;
 				}
 				long int_val;
 				int reg_num;
@@ -846,6 +840,9 @@
 					fprintf(stderr, "Invalid destinatino %s\n", param);
 				}
 				break;
+			} else if (input_buf[1] == 'r') {
+				system->header.soft_reset(&system->header);
+				return 0;
 			} else {
 				if (inst.op == M68K_RTS) {
 					after = m68k_read_long(context->aregs[7], context);
@@ -940,17 +937,62 @@
 			break;
 		}
 #endif
+		case '?':
+			print_m68k_help();
+			break;
 		case 'q':
 			puts("Quitting");
 			exit(0);
 			break;
 		default:
-			fprintf(stderr, "Unrecognized debugger command %s\n", input_buf);
+			fprintf(stderr, "Unrecognized debugger command %s\nUse '?' for help.\n", input_buf);
 			break;
 	}
 	return 1;
 }
 
+void print_m68k_help()
+{
+	printf("M68k Debugger Commands\n");
+	printf("    b ADDRESS            - Set a breakpoint at ADDRESS\n");
+	printf("    d BREAKPOINT         - Delete a 68K breakpoint\n");
+	printf("    co BREAKPOINT        - Run a list of debugger commands each time\n");
+	printf("                           BREAKPOINT is hit\n");
+	printf("    a ADDRESS            - Advance to address\n");
+	printf("    n                    - Advance to next instruction\n");
+	printf("    o                    - Advance to next instruction ignoring branches to\n");
+	printf("                           lower addresses (good for breaking out of loops)\n");
+	printf("    s                    - Advance to next instruction (follows bsr/jsr)\n");
+	printf("    se REG|ADDRESS VALUE - Set value\n");
+	printf("    sr                   - Soft reset\n");
+	printf("    c                    - Continue\n");
+	printf("    bt                   - Print a backtrace\n");
+	printf("    p[/(x|X|d|c)] VALUE  - Print a register or memory location\n");
+	printf("    di[/(x|X|d|c)] VALUE - Print a register or memory location each time\n");
+	printf("                           a breakpoint is hit\n");
+	printf("    vs                   - Print VDP sprite list\n");
+	printf("    vr                   - Print VDP register info\n");
+	printf("    yc [CHANNEL NUM]     - Print YM-2612 channel info\n");
+	printf("    yt                   - Print YM-2612 timer info\n");
+	printf("    zb ADDRESS           - Set a Z80 breakpoint\n");
+	printf("    zp[/(x|X|d|c)] VALUE - Display a Z80 value\n");
+	printf("    ?                    - Display help\n");
+	printf("    q                    - Quit BlastEm\n");
+}
+
+void print_z80_help()
+{
+	printf("Z80 Debugger Commands\n");
+	printf("    b  ADDRESS           - Set a breakpoint at ADDRESS\n");
+	printf("    de BREAKPOINT        - Delete a Z80 breakpoint\n");
+	printf("    a  ADDRESS           - Advance to address\n");
+	printf("    n                    - Advance to next instruction\n");
+	printf("    c                    - Continue\n");
+	printf("    p[/(x|X|d|c)] VALUE  - Print a register or memory location\n");
+	printf("    di[/(x|X|d|c)] VALUE - Print a register or memory location each time\n");
+	printf("                           a breakpoint is hit\n");
+	printf("    q                    - Quit BlastEm\n");
+}
 
 void debugger(m68k_context * context, uint32_t address)
 {
@@ -1000,7 +1042,7 @@
 				char *cmd = commands;
 				strip_nl(cmd);
 				commands += strlen(cmd) + 1;
-				debugging = run_debugger_command(context, cmd, inst, after);
+				debugging = run_debugger_command(context, address, cmd, inst, after);
 			}
 			free(copy);
 		}
@@ -1013,7 +1055,7 @@
 		remove_breakpoint(context, address);
 	}
 	for (disp_def * cur = displays; cur; cur = cur->next) {
-		debugger_print(context, cur->format_char, cur->param);
+		debugger_print(context, cur->format_char, cur->param, address);
 	}
 	m68k_disasm(&inst, input_buf);
 	printf("%X: %s\n", address, input_buf);
@@ -1053,7 +1095,7 @@
 		} else {
 			strcpy(input_buf, last_cmd);
 		}
-		debugging = run_debugger_command(context, input_buf, inst, after);
+		debugging = run_debugger_command(context, address, input_buf, inst, after);
 	}
 	return;
 }
--- a/debug.h	Thu Apr 18 22:06:47 2019 -0700
+++ b/debug.h	Sat Apr 18 11:42:53 2020 -0700
@@ -3,7 +3,7 @@
 
 #include <stdint.h>
 #include "m68k_core.h"
-#ifdef USE_NATIVE
+#ifndef NEW_CORE
 #include "z80_to_x86.h"
 #else
 #include "z80.h"
@@ -29,5 +29,7 @@
 void remove_display(disp_def ** head, uint32_t index);
 void debugger(m68k_context * context, uint32_t address);
 z80_context * zdebugger(z80_context * context, uint16_t address);
+void print_m68k_help();
+void print_z80_help();
 
 #endif //DEBUG_H_
--- a/default.cfg	Thu Apr 18 22:06:47 2019 -0700
+++ b/default.cfg	Sat Apr 18 11:42:53 2020 -0700
@@ -22,6 +22,7 @@
 		v ui.vram_debug
 		c ui.cram_debug
 		n ui.compositing_debug
+		m ui.vgm_log
 		esc ui.exit
 		` ui.save_state
 		0 ui.set_speed.0
@@ -364,6 +365,10 @@
 	screenshot_path $HOME
 	#see strftime for the format specifiers valid in screenshot_template
 	screenshot_template blastem_%Y%m%d_%H%M%S.png
+	#path for storing VGM recordings, accepts the same variables as initial_path
+	vgm_path $HOME
+	#see strftime for the format specifiers valid in vgm_template
+	vgm_template blastem_%Y%m%d_%H%M%S.vgm
 	#path template for saving SRAM, EEPROM and savestates
 	#accepts special variables $HOME, $EXEDIR, $USERDATA, $ROMNAME
 	save_path $USERDATA/blastem/$ROMNAME
@@ -385,6 +390,8 @@
 	#MegaWiFi allows ROMs to make connections to the internet
 	#so it should only be enabled for ROMs you trust
 	megawifi off
+	#Model of the emulated Gen/MD system, see systems.cfg for a list of options
+	model md1va3
 }
 
 
--- a/dis.c	Thu Apr 18 22:06:47 2019 -0700
+++ b/dis.c	Sat Apr 18 11:42:53 2020 -0700
@@ -292,7 +292,7 @@
 			encoded = NULL;
 			address = def->address;
 			if (!is_visited(address)) {
-				encoded = filebuf + (address - address_off)/2;
+				encoded = filebuf + ((address & 0xFFFFFF) - address_off)/2;
 			}
 			tmpd = def;
 			def = def->next;
@@ -302,7 +302,7 @@
 			break;
 		}
 		for(;;) {
-			if (address > address_end || address < address_off) {
+			if ((address & 0xFFFFFF) > address_end || address < address_off) {
 				break;
 			}
 			visit(address);
--- a/gdb_remote.c	Thu Apr 18 22:06:47 2019 -0700
+++ b/gdb_remote.c	Sat Apr 18 11:42:53 2020 -0700
@@ -170,7 +170,7 @@
 	if (address >= 0xA00000 && address < 0xA04000) {
 		gen->zram[address & 0x1FFF] = value;
 		genesis_context * gen = context->system;
-#if !defined(NO_Z80) && defined(USE_NATIVE)
+#if !defined(NO_Z80) && !defined(NEW_CORE)
 		z80_handle_code_write(address & 0x1FFF, gen->z80);
 #endif
 		return;
--- a/genesis.c	Thu Apr 18 22:06:47 2019 -0700
+++ b/genesis.c	Sat Apr 18 11:42:53 2020 -0700
@@ -18,6 +18,7 @@
 #include "saves.h"
 #include "bindings.h"
 #include "jcart.h"
+#include "config.h"
 #define MCLKS_NTSC 53693175
 #define MCLKS_PAL  53203395
 
@@ -34,12 +35,12 @@
 #define LINES_PAL 313
 
 #ifdef IS_LIB
-#define MAX_SOUND_CYCLES 1000
+#define MAX_SOUND_CYCLES (MCLKS_PER_YM*NUM_OPERATORS*6*4)
 #else
 #define MAX_SOUND_CYCLES 100000	
 #endif
 
-#ifndef USE_NATIVE
+#ifdef NEW_CORE
 #define Z80_CYCLE cycles
 #define Z80_OPTS opts
 #define z80_handle_code_write(...)
@@ -199,7 +200,7 @@
 	deserialize_buffer buffer;
 	init_deserialize(&buffer, data, size);
 	genesis_deserialize(&buffer, gen);
-#ifdef USE_NATIVE
+#ifndef NEW_CORE
 	//HACK: Fix this once PC/IR is represented in a better way in 68K core
 	gen->m68k->resume_pc = get_native_address_trans(gen->m68k, gen->m68k->last_prefetch_address);
 #endif
@@ -220,7 +221,7 @@
 static uint16_t get_open_bus_value(system_header *system)
 {
 	genesis_context *genesis = (genesis_context *)system;
-#ifdef USE_NATIVE
+#ifndef NEW_CORE
 	return read_dma_value(genesis->m68k->last_prefetch_address/2);
 #else
 	m68000_base_device *device = (m68000_base_device *)genesis->m68k;
@@ -305,7 +306,7 @@
 static void z80_next_int_pulse(z80_context * z_context)
 {
 	genesis_context * gen = z_context->system;
-#ifndef USE_NATIVE
+#ifdef NEW_CORE
 	z_context->int_cycle = vdp_next_vint_z80(gen->vdp);
 	z_context->int_end_cycle = z_context->int_cycle + Z80_INT_PULSE_MCLKS;
 	z_context->int_value = 0xFF;
@@ -321,7 +322,7 @@
 {
 #ifndef NO_Z80
 	if (z80_enabled) {
-#ifndef USE_NATIVE
+#ifdef NEW_CORE
 		if (z_context->int_cycle == 0xFFFFFFFFU) {
 			z80_next_int_pulse(z_context);
 		}
@@ -373,12 +374,12 @@
 	z80_context * z_context = gen->z80;
 #ifdef REFRESH_EMULATION
 	if (context->current_cycle != last_sync_cycle) {
-		//lame estimation of refresh cycle delay
-		refresh_counter += context->current_cycle - last_sync_cycle;
-		if (!gen->bus_busy) {
-			context->current_cycle += REFRESH_DELAY * MCLKS_PER_68K * (refresh_counter / (MCLKS_PER_68K * REFRESH_INTERVAL));
-		}
-		refresh_counter = refresh_counter % (MCLKS_PER_68K * REFRESH_INTERVAL);
+	//lame estimation of refresh cycle delay
+	refresh_counter += context->current_cycle - last_sync_cycle;
+	if (!gen->bus_busy) {
+		context->current_cycle += REFRESH_DELAY * MCLKS_PER_68K * (refresh_counter / (MCLKS_PER_68K * REFRESH_INTERVAL));
+	}
+	refresh_counter = refresh_counter % (MCLKS_PER_68K * REFRESH_INTERVAL);
 	}
 #endif
 
@@ -412,11 +413,11 @@
 			}
 			context->current_cycle -= deduction;
 			z80_adjust_cycles(z_context, deduction);
-			gen->ym->current_cycle -= deduction;
+			ym_adjust_cycles(gen->ym, deduction);
+			if (gen->ym->vgm) {
+				vgm_adjust_cycles(gen->ym->vgm, deduction);
+			}
 			gen->psg->cycles -= deduction;
-			if (gen->ym->write_cycle != CYCLE_NEVER) {
-				gen->ym->write_cycle = gen->ym->write_cycle >= deduction ? gen->ym->write_cycle - deduction : 0;
-			}
 			if (gen->reset_cycle != CYCLE_NEVER) {
 				gen->reset_cycle -= deduction;
 			}
@@ -438,18 +439,20 @@
 		context->target_cycle = gen->reset_cycle;
 	}
 	if (address) {
-#ifdef USE_NATIVE
+#ifndef NEW_CORE
 		if (gen->header.enter_debugger) {
 			gen->header.enter_debugger = 0;
 			debugger(context, address);
 		}
+#endif
+#ifdef NEW_CORE
+		if (gen->header.save_state) {
+#else
 		if (gen->header.save_state && (z_context->pc || !z_context->native_pc || z_context->reset || !z_context->busreq)) {
-#else
-		if (gen->header.save_state) {
 #endif
 			uint8_t slot = gen->header.save_state - 1;
 			gen->header.save_state = 0;
-#ifdef USE_NATIVE
+#ifndef NEW_CORE
 			if (z_context->native_pc && !z_context->reset) {
 				//advance Z80 core to the start of an instruction
 				while (!z_context->pc)
@@ -459,7 +462,7 @@
 			}
 #endif
 			char *save_path = slot == SERIALIZE_SLOT ? NULL : get_slot_name(&gen->header, slot, use_native_states ? "state" : "gst");
-#ifdef USE_NATIVE
+#ifndef NEW_CORE
 			if (use_native_states || slot == SERIALIZE_SLOT) {
 #endif
 				serialize_buffer state;
@@ -474,7 +477,7 @@
 					save_to_file(&state, save_path);
 					free(state.data);
 				}
-#ifdef USE_NATIVE
+#ifndef NEW_CORE
 			} else {
 				save_gst(gen, save_path, address);
 			}
@@ -501,10 +504,10 @@
 #ifdef REFRESH_EMULATION
 	//do refresh check here so we can avoid adding a penalty for a refresh that happens during a VDP access
 	if (context->current_cycle - 4*MCLKS_PER_68K > last_sync_cycle) {
-		refresh_counter += context->current_cycle - 4*MCLKS_PER_68K - last_sync_cycle;
-		context->current_cycle += REFRESH_DELAY * MCLKS_PER_68K * (refresh_counter / (MCLKS_PER_68K * REFRESH_INTERVAL));
-		refresh_counter = refresh_counter % (MCLKS_PER_68K * REFRESH_INTERVAL);
-		last_sync_cycle = context->current_cycle;
+	refresh_counter += context->current_cycle - 4*MCLKS_PER_68K - last_sync_cycle;
+	context->current_cycle += REFRESH_DELAY * MCLKS_PER_68K * (refresh_counter / (MCLKS_PER_68K * REFRESH_INTERVAL));
+	refresh_counter = refresh_counter % (MCLKS_PER_68K * REFRESH_INTERVAL);
+	last_sync_cycle = context->current_cycle;
 	}
 #endif
 	sync_components(context, 0);
@@ -648,32 +651,34 @@
 		last_sync_cycle = context->current_cycle;
 	}
 #endif
-	sync_components(context, 0);
 	genesis_context *gen = context->system;
 	vdp_context * v_context = gen->vdp;
-	uint32_t before_cycle = v_context->cycles;
 	if (vdp_port < 0x10) {
 		if (vdp_port < 4) {
+			sync_components(context, 0);
+			uint32_t before_cycle = v_context->cycles;
 			value = vdp_data_port_read(v_context);
+			if (v_context->cycles != before_cycle) {
+				//printf("68K paused for %d (%d) cycles at cycle %d (%d) for read\n", v_context->cycles - context->current_cycle, v_context->cycles - before_cycle, context->current_cycle, before_cycle);
+				context->current_cycle = v_context->cycles;
+				//Lock the Z80 out of the bus until the VDP access is complete
+				genesis_context *gen = context->system;
+				gen->bus_busy = 1;
+				sync_z80(gen->z80, v_context->cycles);
+				gen->bus_busy = 0;
+			}
 		} else if(vdp_port < 8) {
+			vdp_run_context(v_context, context->current_cycle);
 			value = vdp_control_port_read(v_context);
 		} else {
+			vdp_run_context(v_context, context->current_cycle);
 			value = vdp_hv_counter_read(v_context);
 			//printf("HV Counter: %X at cycle %d\n", value, v_context->cycles);
 		}
 	} else if (vdp_port < 0x18){
 		fatal_error("Illegal read from PSG  port %X\n", vdp_port);
 	} else {
-		value = vdp_test_port_read(v_context);
-	}
-	if (v_context->cycles != before_cycle) {
-		//printf("68K paused for %d (%d) cycles at cycle %d (%d) for read\n", v_context->cycles - context->current_cycle, v_context->cycles - before_cycle, context->current_cycle, before_cycle);
-		context->current_cycle = v_context->cycles;
-		//Lock the Z80 out of the bus until the VDP access is complete
-		genesis_context *gen = context->system;
-		gen->bus_busy = 1;
-		sync_z80(gen->z80, v_context->cycles);
-		gen->bus_busy = 0;
+		value = get_open_bus_value(&gen->header);
 	}
 #ifdef REFRESH_EMULATION
 	last_sync_cycle -= 4;
@@ -768,9 +773,8 @@
 			}
 		}
 	} else {
-		location &= 0x1FFF;
-		if (location < 0x100) {
-			switch(location/2)
+		if (location < 0x10100) {
+			switch(location >> 1 & 0xFF)
 			{
 			case 0x1:
 				io_data_write(gen->io.ports, value, context->current_cycle);
@@ -815,7 +819,8 @@
 				break;
 			}
 		} else {
-			if (location == 0x1100) {
+			uint32_t masked = location & 0xFFF00;
+			if (masked == 0x11100) {
 				if (value & 1) {
 					dputs("bus requesting Z80");
 					if (z80_enabled) {
@@ -840,7 +845,7 @@
 						gen->z80->busack = 0;
 					}
 				}
-			} else if (location == 0x1200) {
+			} else if (masked == 0x11200) {
 				sync_z80(gen->z80, context->current_cycle);
 				if (value & 1) {
 					if (z80_enabled) {
@@ -856,6 +861,8 @@
 					}
 					ym_reset(gen->ym);
 				}
+			} else if (masked != 0x11300 && masked != 0x11000) {
+				fatal_error("Machine freeze due to unmapped write to address %X\n", location | 0xA00000);
 			}
 		}
 	}
@@ -891,17 +898,20 @@
 				value = gen->zram[location & 0x1FFF];
 			} else if (location < 0x6000) {
 				sync_sound(gen, context->current_cycle);
-				value = ym_read_status(gen->ym);
+				value = ym_read_status(gen->ym, context->current_cycle, location);
+			} else if (location < 0x7F00) {
+				value = 0xFF;
 			} else {
+				fatal_error("Machine freeze due to read of Z80 VDP memory window by 68K: %X\n", location | 0xA00000);
 				value = 0xFF;
 			}
 		} else {
-			value = 0xFF;
+			uint16_t word = get_open_bus_value(&gen->header);
+			value = location & 1 ? word : word >> 8;
 		}
 	} else {
-		location &= 0x1FFF;
-		if (location < 0x100) {
-			switch(location/2)
+		if (location < 0x10100) {
+			switch(location >> 1 & 0xFF)
 			{
 			case 0x0:
 				//version bits should be 0 for now since we're not emulating TMSS
@@ -953,18 +963,24 @@
 				value = gen->io.ports[2].serial_ctrl;
 				break;
 			default:
-				value = 0xFF;
+				value = get_open_bus_value(&gen->header) >> 8;
 			}
 		} else {
-			if (location == 0x1100) {
+			uint32_t masked = location & 0xFFF00;
+			if (masked == 0x11100) {
 				value = z80_enabled ? !z80_get_busack(gen->z80, context->current_cycle) : !gen->z80->busack;
 				value |= (get_open_bus_value(&gen->header) >> 8) & 0xFE;
 				dprintf("Byte read of BUSREQ returned %d @ %d (reset: %d)\n", value, context->current_cycle, gen->z80->reset);
-			} else if (location == 0x1200) {
+			} else if (masked == 0x11200) {
 				value = !gen->z80->reset;
+			} else if (masked == 0x11300 || masked == 0x11000) {
+				//A11300 is apparently completely unused
+				//A11000 is the memory control register which I am assuming is write only
+				value = get_open_bus_value(&gen->header) >> 8;
 			} else {
+				location |= 0xA00000;
+				fatal_error("Machine freeze due to read of unmapped IO location %X\n", location);
 				value = 0xFF;
-				printf("Byte read of unknown IO location: %X\n", location);
 			}
 		}
 	}
@@ -1004,18 +1020,16 @@
 	z80_context * context = vcontext;
 	genesis_context * gen = context->system;
 	sync_sound(gen, context->Z80_CYCLE);
-	return ym_read_status(gen->ym);
+	return ym_read_status(gen->ym, context->Z80_CYCLE, location);
 }
 
 static uint8_t z80_read_bank(uint32_t location, void * vcontext)
 {
 	z80_context * context = vcontext;
 	genesis_context *gen = context->system;
-
 	if (gen->bus_busy) {
 		context->Z80_CYCLE = gen->m68k->current_cycle;
 	}
-
 	//typical delay from bus arbitration
 	context->Z80_CYCLE += 3 * MCLKS_PER_Z80;
 	//TODO: add cycle for an access right after a previous one
@@ -1030,6 +1044,10 @@
 	uint32_t address = gen->z80_bank_reg << 15 | location;
 	if (address >= 0xC00000 && address < 0xE00000) {
 		return z80_vdp_port_read(location & 0xFF, context);
+	} else if (address >= 0xA10000 && address <= 0xA10001) {
+		//Apparently version reg can be read through Z80 banked area
+		//TODO: Check rest of IO region addresses
+		return gen->version_reg;
 	} else {
 		fprintf(stderr, "Unhandled read by Z80 from address %X through banked memory area (%X)\n", address, gen->z80_bank_reg << 15);
 	}
@@ -1074,6 +1092,80 @@
 	return context;
 }
 
+static uint16_t unused_read(uint32_t location, void *vcontext)
+{
+	m68k_context *context = vcontext;
+	genesis_context *gen = context->system;
+	if ((location >= 0xA13000 && location < 0xA13100) || (location >= 0xA12000 && location < 0xA12100)) {
+		//Only called if the cart/exp doesn't have a more specific handler for this region
+		return get_open_bus_value(&gen->header);
+	} else if (location == 0xA14000 || location == 0xA14002) {
+		if (gen->version_reg & 0xF) {
+			return gen->tmss_lock[location >> 1 & 1];
+		} else {
+			fatal_error("Machine freeze due to read from TMSS lock when TMSS is not present %X\n", location);
+			return 0xFFFF;
+		}
+	} else if (location == 0xA14100) {
+		if (gen->version_reg & 0xF) {
+			return get_open_bus_value(&gen->header);
+		} else {
+			fatal_error("Machine freeze due to read from TMSS control when TMSS is not present %X\n", location);
+			return 0xFFFF;
+		}
+	} else {
+		fatal_error("Machine freeze due to unmapped read from %X\n", location);
+		return 0xFFFF;
+	}
+}
+
+static uint8_t unused_read_b(uint32_t location, void *vcontext)
+{
+	uint16_t v = unused_read(location & 0xFFFFFE, vcontext);
+	if (location & 1) {
+		return v;
+	} else {
+		return v >> 8;
+	}
+}
+
+static void *unused_write(uint32_t location, void *vcontext, uint16_t value)
+{
+	m68k_context *context = vcontext;
+	genesis_context *gen = context->system;
+	uint8_t has_tmss = gen->version_reg & 0xF;
+	if (has_tmss && (location == 0xA14000 || location == 0xA14002)) {
+		gen->tmss_lock[location >> 1 & 1] = value;
+	} else if (has_tmss && location == 0xA14100) {
+		//TODO: implement TMSS control register
+	} else if (location < 0xA12000 || location >= 0xA13100 || (location >= 0xA12100 && location < 0xA13000)) {
+		fatal_error("Machine freeze due to unmapped write to %X\n", location);
+	}
+	return vcontext;
+}
+
+static void *unused_write_b(uint32_t location, void *vcontext, uint8_t value)
+{
+	m68k_context *context = vcontext;
+	genesis_context *gen = context->system;
+	uint8_t has_tmss = gen->version_reg & 0xF;
+	if (has_tmss && location >= 0xA14000 && location <= 0xA14003) {
+		uint32_t offset = location >> 1 & 1;
+		if (location & 1) {
+			gen->tmss_lock[offset] &= 0xFF00;
+			gen->tmss_lock[offset] |= value;
+		} else {
+			gen->tmss_lock[offset] &= 0xFF;
+			gen->tmss_lock[offset] |= value << 8;
+		}
+	} else if (has_tmss && (location == 0xA14100 || location == 0xA14101)) {
+		//TODO: implement TMSS control register
+	} else if (location < 0xA12000 || location >= 0xA13100 || (location >= 0xA12100 && location < 0xA13000)) {
+		fatal_error("Machine freeze due to unmapped byte write to %X\n", location);
+	}
+	return vcontext;
+}
+
 static void set_speed_percent(system_header * system, uint32_t percent)
 {
 	genesis_context *context = (genesis_context *)system;
@@ -1132,19 +1224,19 @@
 	if (load_from_file(&state, statepath)) {
 		genesis_deserialize(&state, gen);
 		free(state.data);
-#ifdef USE_NATIVE
+#ifndef NEW_CORE
 		//HACK
 		pc = gen->m68k->last_prefetch_address;
 #endif
 		ret = 1;
 	} else {
-#ifdef USE_NATIVE
+#ifndef NEW_CORE
 		strcpy(statepath + strlen(statepath)-strlen("state"), "gst");
 		pc = load_gst(gen, statepath);
 		ret = pc != 0;
 #endif
 	}
-#ifdef USE_NATIVE
+#ifndef NEW_CORE
 	if (ret) {
 		gen->m68k->resume_pc = get_native_address_trans(gen->m68k, pc);
 	}
@@ -1191,12 +1283,12 @@
 		if (load_from_file(&state, statefile)) {
 			genesis_deserialize(&state, gen);
 			free(state.data);
-#ifdef USE_NATIVE
+#ifndef NEW_CORE
 			//HACK
 			pc = gen->m68k->last_prefetch_address;
 #endif
 		} else {
-#ifdef USE_NATIVE
+#ifndef NEW_CORE
 			pc = load_gst(gen, statefile);
 			if (!pc) {
 				fatal_error("Failed to load save state %s\n", statefile);
@@ -1204,7 +1296,7 @@
 #endif
 		}
 		printf("Loaded %s\n", statefile);
-#ifdef USE_NATIVE
+#ifndef NEW_CORE
 		if (gen->header.enter_debugger) {
 			gen->header.enter_debugger = 0;
 			insert_breakpoint(gen->m68k, pc, gen->header.debugger_type == DEBUGGER_NATIVE ? debugger : gdb_debug_enter);
@@ -1213,7 +1305,7 @@
 		adjust_int_cycle(gen->m68k, gen->vdp);
 		start_68k_context(gen->m68k, pc);
 	} else {
-#ifdef USE_NATIVE
+#ifndef NEW_CORE
 		if (gen->header.enter_debugger) {
 			gen->header.enter_debugger = 0;
 			uint32_t address = gen->cart[2] << 16 | gen->cart[3];
@@ -1264,7 +1356,13 @@
 		fprintf(stderr, "Failed to open %s file %s for writing\n", save_type_name(gen->save_type), save_filename);
 		return;
 	}
+	if (gen->save_type == RAM_FLAG_BOTH) {
+		byteswap_rom(gen->save_size, (uint16_t *)gen->save_storage);
+	}
 	fwrite(gen->save_storage, 1, gen->save_size, f);
+	if (gen->save_type == RAM_FLAG_BOTH) {
+		byteswap_rom(gen->save_size, (uint16_t *)gen->save_storage);
+	}
 	fclose(f);
 	printf("Saved %s to %s\n", save_type_name(gen->save_type), save_filename);
 }
@@ -1277,6 +1375,9 @@
 		uint32_t read = fread(gen->save_storage, 1, gen->save_size, f);
 		fclose(f);
 		if (read > 0) {
+			if (gen->save_type == RAM_FLAG_BOTH) {
+				byteswap_rom(gen->save_size, (uint16_t *)gen->save_storage);
+			}
 			printf("Loaded %s from %s\n", save_type_name(gen->save_type), save_filename);
 		}
 	}
@@ -1387,6 +1488,30 @@
 	set_audio_config(gen);
 }
 
+static void start_vgm_log(system_header *system, char *filename)
+{
+	genesis_context *gen = (genesis_context *)system;
+	vgm_writer *vgm = vgm_write_open(filename, gen->version_reg & HZ50 ? 50 : 60, gen->master_clock, gen->m68k->current_cycle);
+	if (vgm) {
+		printf("Started logging VGM to %s\n", filename);
+		sync_sound(gen, vgm->last_cycle);
+		ym_vgm_log(gen->ym, gen->master_clock, vgm);
+		psg_vgm_log(gen->psg, gen->master_clock, vgm);
+		gen->header.vgm_logging = 1;
+	} else {
+		printf("Failed to start logging to %s\n", filename);
+	}
+}
+
+static void stop_vgm_log(system_header *system)
+{
+	puts("Stopped VGM log");
+	genesis_context *gen = (genesis_context *)system;
+	vgm_close(gen->ym->vgm);
+	gen->ym->vgm = gen->psg->vgm = NULL;
+	gen->header.vgm_logging = 0;
+}
+
 genesis_context *alloc_init_genesis(rom_info *rom, void *main_rom, void *lock_on, uint32_t system_opts, uint8_t force_region)
 {
 	static memmap_chunk z80_map[] = {
@@ -1419,11 +1544,19 @@
 	gen->header.config_updated = config_updated;
 	gen->header.serialize = serialize;
 	gen->header.deserialize = deserialize;
+	gen->header.start_vgm_log = start_vgm_log;
+	gen->header.stop_vgm_log = stop_vgm_log;
 	gen->header.type = SYSTEM_GENESIS;
 	gen->header.info = *rom;
 	set_region(gen, rom, force_region);
+	tern_node *model = get_model(config, SYSTEM_GENESIS);
+	uint8_t tmss = !strcmp(tern_find_ptr_default(model, "tmss", "off"), "on");
+	if (tmss) {
+		gen->version_reg |= 1;
+	}
 
-	gen->vdp = init_vdp_context(gen->version_reg & 0x40);
+	uint8_t max_vsram = !strcmp(tern_find_ptr_default(model, "vsram", "40"), "64");
+	gen->vdp = init_vdp_context(gen->version_reg & 0x40, max_vsram);
 	gen->vdp->system = &gen->header;
 	gen->frame_end = vdp_cycles_to_frame_end(gen->vdp);
 	char * config_cycles = tern_find_path(config, "clocks\0max_cycles\0", TVAL_PTR).ptrval;
@@ -1446,7 +1579,7 @@
 	z80_options *z_opts = malloc(sizeof(z80_options));
 	init_z80_opts(z_opts, z80_map, 5, NULL, 0, MCLKS_PER_Z80, 0xFFFF);
 	gen->z80 = init_z80_context(z_opts);
-#ifdef USE_NATIVE
+#ifndef NEW_CORE
 	gen->z80->next_int_pulse = z80_next_int_pulse;
 #endif
 	z80_assert_reset(gen->z80, 0);
@@ -1484,7 +1617,7 @@
 		{
 			write_cram_internal(gen->vdp, i, rand());
 		}
-		for (int i = 0; i < VSRAM_SIZE; i++)
+		for (int i = 0; i < gen->vdp->vsram_size; i++)
 		{
 			gen->vdp->vsram[i] = rand();
 		}
@@ -1521,8 +1654,9 @@
 
 	m68k_options *opts = malloc(sizeof(m68k_options));
 	init_m68k_opts(opts, rom->map, rom->map_chunks, MCLKS_PER_68K);
-	//TODO: make this configurable
-	opts->gen.flags |= M68K_OPT_BROKEN_READ_MODIFY;
+	if (!strcmp(tern_find_ptr_default(model, "tas", "broken"), "broken")) {
+		opts->gen.flags |= M68K_OPT_BROKEN_READ_MODIFY;
+	}
 	gen->m68k = init_68k_context(opts, NULL);
 	gen->m68k->system = gen;
 	opts->address_log = (system_opts & OPT_ADDRESS_LOG) ? fopen("address.log", "w") : NULL;
@@ -1556,7 +1690,10 @@
 		           (read_8_fun)vdp_port_read_b, (write_8_fun)vdp_port_write_b},
 		{0xA00000, 0xA12000,  0x1FFFF,  0, 0, 0,                                  NULL,
 		           (read_16_fun)io_read_w,      (write_16_fun)io_write_w,
-		           (read_8_fun)io_read,         (write_8_fun)io_write}
+		           (read_8_fun)io_read,         (write_8_fun)io_write},
+		{0x000000, 0xFFFFFF, 0xFFFFFF, 0, 0, 0,                                   NULL,
+		           (read_16_fun)unused_read,    (write_16_fun)unused_write,
+		           (read_8_fun)unused_read_b,   (write_8_fun)unused_write_b}
 	};
 	static tern_node *rom_db;
 	if (!rom_db) {
--- a/genesis.h	Thu Apr 18 22:06:47 2019 -0700
+++ b/genesis.h	Sat Apr 18 11:42:53 2020 -0700
@@ -9,7 +9,7 @@
 #include <stdint.h>
 #include "system.h"
 #include "m68k_core.h"
-#ifdef USE_NATIVE
+#ifndef NEW_CORE
 #include "z80_to_x86.h"
 #else
 #include "z80.h"
@@ -53,6 +53,7 @@
 	uint32_t        reset_cycle;
 	uint8_t         bank_regs[8];
 	uint16_t        z80_bank_reg;
+	uint16_t        tmss_lock[2];
 	uint16_t        mapper_start_index;
 	uint8_t         mapper_type;
 	uint8_t         save_type;
@@ -60,6 +61,7 @@
 	uint8_t         version_reg;
 	uint8_t         bus_busy;
 	uint8_t         reset_requested;
+	uint8_t         tmss;
 	eeprom_state    eeprom;
 	nor_state       nor;
 };
--- a/gentests.py	Thu Apr 18 22:06:47 2019 -0700
+++ b/gentests.py	Sat Apr 18 11:42:53 2020 -0700
@@ -136,7 +136,10 @@
 			num = already.get('label', 0)+1
 			already['label'] = num
 			if (already[str(self.index)] + self.disp) & 1:
-				self.disp += 1
+				if self.disp > 0:
+					self.disp -= 1
+				else:
+					self.disp += 1
 			address = 'lbl_' + str(num) + ' + 2 + ' + str(self.disp) + ' + ' + str(index)
 		else:
 			if self.base == self.index:
--- a/gst.c	Thu Apr 18 22:06:47 2019 -0700
+++ b/gst.c	Sat Apr 18 11:42:53 2020 -0700
@@ -244,11 +244,11 @@
 		uint16_t value;
 		write_cram_internal(context, i, (tmp_buf[i*2+1] << 8) | tmp_buf[i*2]);
 	}
-	if (fread(tmp_buf, 2, VSRAM_SIZE, state_file) != VSRAM_SIZE) {
+	if (fread(tmp_buf, 2, MIN_VSRAM_SIZE, state_file) != MIN_VSRAM_SIZE) {
 		fputs("Failed to read VSRAM from savestate\n", stderr);
 		return 0;
 	}
-	for (int i = 0; i < VSRAM_SIZE; i++) {
+	for (int i = 0; i < MIN_VSRAM_SIZE; i++) {
 		context->vsram[i] = (tmp_buf[i*2+1] << 8) | tmp_buf[i*2];
 	}
 	fseek(state_file, GST_VDP_MEM, SEEK_SET);
@@ -280,12 +280,12 @@
 		fputs("Error writing CRAM to savestate\n", stderr);
 		return 0;
 	}
-	for (int i = 0; i < VSRAM_SIZE; i++)
+	for (int i = 0; i < MIN_VSRAM_SIZE; i++)
 	{
 		tmp_buf[i*2] = context->vsram[i];
 		tmp_buf[i*2+1] = context->vsram[i] >> 8;
 	}
-	if (fwrite(tmp_buf, 2, VSRAM_SIZE, outfile) != VSRAM_SIZE) {
+	if (fwrite(tmp_buf, 2, MIN_VSRAM_SIZE, outfile) != MIN_VSRAM_SIZE) {
 		fputs("Error writing VSRAM to savestate\n", stderr);
 		return 0;
 	}
--- a/libblastem.c	Thu Apr 18 22:06:47 2019 -0700
+++ b/libblastem.c	Sat Apr 18 11:42:53 2020 -0700
@@ -6,11 +6,40 @@
 #include "vdp.h"
 #include "render.h"
 #include "io.h"
+#include "genesis.h"
+#include "sms.h"
 
 static retro_environment_t retro_environment;
 RETRO_API void retro_set_environment(retro_environment_t re)
 {
 	retro_environment = re;
+#	define input_descriptor_macro(pad_num) \
+		{ pad_num, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_LEFT,  "D-Pad Left" }, \
+		{ pad_num, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_UP,    "D-Pad Up" }, \
+		{ pad_num, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_DOWN,  "D-Pad Down" }, \
+		{ pad_num, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_RIGHT, "D-Pad Right" }, \
+		{ pad_num, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_B,     "A" }, \
+		{ pad_num, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_A,     "B" }, \
+		{ pad_num, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_X,     "Y" }, \
+		{ pad_num, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_Y,     "X" }, \
+		{ pad_num, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_L,     "Z" }, \
+		{ pad_num, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_R,     "C" }, \
+		{ pad_num, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_SELECT,    "Mode" }, \
+		{ pad_num, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_START,    "Start" }, \
+
+	static const struct retro_input_descriptor desc[] = {
+		input_descriptor_macro(0)
+		input_descriptor_macro(1)
+		input_descriptor_macro(2)
+		input_descriptor_macro(3)
+		input_descriptor_macro(4)
+		input_descriptor_macro(5)
+		input_descriptor_macro(6)
+		input_descriptor_macro(7)
+		{ 0 },
+	};
+
+	re(RETRO_ENVIRONMENT_SET_INPUT_DESCRIPTORS, (void *)desc);
 }
 
 static retro_video_refresh_t retro_video_refresh;
@@ -19,14 +48,14 @@
 	retro_video_refresh = rvf;
 }
 
-static retro_audio_sample_t retro_audio_sample;
 RETRO_API void retro_set_audio_sample(retro_audio_sample_t ras)
 {
-	retro_audio_sample = ras;
 }
 
+static retro_audio_sample_batch_t retro_audio_sample_batch;
 RETRO_API void retro_set_audio_sample_batch(retro_audio_sample_batch_t rasb)
 {
+	retro_audio_sample_batch = rasb;
 }
 
 static retro_input_poll_t retro_input_poll;
@@ -52,6 +81,7 @@
 
 RETRO_API void retro_init(void)
 {
+	render_audio_initialized(RENDER_AUDIO_S16, 53693175 / (7 * 6 * 4), 2, 4, sizeof(int16_t));
 }
 
 RETRO_API void retro_deinit(void)
@@ -77,16 +107,46 @@
 
 static vid_std video_standard;
 static uint32_t last_width, last_height;
+static uint32_t overscan_top, overscan_bot, overscan_left, overscan_right;
+static void update_overscan(void)
+{
+	uint8_t overscan;
+	retro_environment(RETRO_ENVIRONMENT_GET_OVERSCAN, &overscan);
+	if (overscan) {
+		overscan_top = overscan_bot = overscan_left = overscan_right = 0;
+	} else {
+		if (video_standard == VID_NTSC) {
+			overscan_top = 11;
+			overscan_bot = 8;
+			overscan_left = 13;
+			overscan_right = 14;
+		} else {
+			overscan_top = 30;
+			overscan_bot = 24;
+			overscan_left = 13;
+			overscan_right = 14;
+		}
+	}
+}
+
+static int32_t sample_rate;
 RETRO_API void retro_get_system_av_info(struct retro_system_av_info *info)
 {
+	update_overscan();
 	last_width = LINEBUF_SIZE;
-	info->geometry.base_width = info->geometry.max_width = LINEBUF_SIZE;
-	info->geometry.base_height = video_standard == VID_NTSC ? 243 : 294;
+	info->geometry.base_width = info->geometry.max_width = LINEBUF_SIZE - (overscan_left + overscan_right);
+	info->geometry.base_height = (video_standard == VID_NTSC ? 243 : 294) - (overscan_top + overscan_bot);
 	last_height = info->geometry.base_height;
 	info->geometry.max_height = info->geometry.base_height * 2;
 	info->geometry.aspect_ratio = 0;
-	info->timing.fps = video_standard == VID_NTSC ? 60 : 50;
-	info->timing.sample_rate = 53267; //approximate sample rate of YM2612
+	double master_clock = video_standard == VID_NTSC ? 53693175 : 53203395;
+	double lines = video_standard == VID_NTSC ? 262 : 313;
+	info->timing.fps = master_clock / (3420.0 * lines);
+	info->timing.sample_rate = master_clock / (7 * 6 * 24); //sample rate of YM2612
+	sample_rate = info->timing.sample_rate;
+	render_audio_initialized(RENDER_AUDIO_S16, info->timing.sample_rate, 2, 4, sizeof(int16_t));
+	//force adjustment of resampling parameters since target sample rate may have changed slightly
+	current_system->set_speed_percent(current_system, 100);
 }
 
 RETRO_API void retro_set_controller_port_device(unsigned port, unsigned device)
@@ -164,6 +224,7 @@
 }
 
 /* Loads a game. */
+static system_type stype;
 RETRO_API bool retro_load_game(const struct retro_game_info *game)
 {
 	serialize_size_cache = 0;
@@ -175,10 +236,12 @@
 	media.buffer = malloc(nearest_pow2(game->size));
 	memcpy(media.buffer, game->data, game->size);
 	media.size = game->size;
-	current_system = alloc_config_system(detect_system_type(&media), &media, 0, 0);
+	stype = detect_system_type(&media);
+	current_system = alloc_config_system(stype, &media, 0, 0);
 	
 	unsigned format = RETRO_PIXEL_FORMAT_XRGB8888;
 	retro_environment(RETRO_ENVIRONMENT_SET_PIXEL_FORMAT, &format);
+	
 	return current_system != NULL;
 }
 
@@ -211,11 +274,57 @@
 /* Gets region of memory. */
 RETRO_API void *retro_get_memory_data(unsigned id)
 {
+	switch (id) {
+	case RETRO_MEMORY_SYSTEM_RAM:
+		switch (stype) {
+		case SYSTEM_GENESIS: {
+			genesis_context *gen = (genesis_context *)current_system;
+			return (uint8_t *)gen->work_ram;
+		}
+#ifndef NO_Z80
+		case SYSTEM_SMS: {
+			sms_context *sms = (sms_context *)current_system;
+			return sms->ram;
+		}
+#endif
+		}
+		break;
+	case RETRO_MEMORY_SAVE_RAM:
+		if (stype == SYSTEM_GENESIS) {
+			genesis_context *gen = (genesis_context *)current_system;
+			if (gen->save_type != SAVE_NONE)
+				return gen->save_storage;
+		}
+		break;
+	default:
+		break;
+	}
 	return NULL;
 }
 
 RETRO_API size_t retro_get_memory_size(unsigned id)
 {
+	switch (id) {
+	case RETRO_MEMORY_SYSTEM_RAM:
+		switch (stype) {
+		case SYSTEM_GENESIS:
+			return RAM_WORDS * sizeof(uint16_t);
+#ifndef NO_Z80
+		case SYSTEM_SMS:
+			return SMS_RAM_SIZE;
+#endif
+		}
+		break;
+	case RETRO_MEMORY_SAVE_RAM:
+		if (stype == SYSTEM_GENESIS) {
+			genesis_context *gen = (genesis_context *)current_system;
+			if (gen->save_type != SAVE_NONE)
+				return gen->save_size;
+		}
+		break;
+	default:
+		break;
+	}
 	return 0;
 }
 
@@ -254,7 +363,8 @@
 
 void render_framebuffer_updated(uint8_t which, int width)
 {
-	unsigned height = video_standard == VID_NTSC ? 243 : 294;
+	unsigned height = (video_standard == VID_NTSC ? 243 : 294) - (overscan_top + overscan_bot);
+	width -= (overscan_left + overscan_right);
 	unsigned base_height = height;
 	if (which != last_fb) {
 		height *= 2;
@@ -270,7 +380,7 @@
 		last_width = width;
 		last_height = height;
 	}
-	retro_video_refresh(fb, width, height, LINEBUF_SIZE * sizeof(uint32_t));
+	retro_video_refresh(fb + overscan_left + LINEBUF_SIZE * overscan_top, width, height, LINEBUF_SIZE * sizeof(uint32_t));
 	current_system->request_exit(current_system);
 }
 
@@ -289,6 +399,16 @@
 	return 1;
 }
 
+uint32_t render_overscan_top()
+{
+	return overscan_top;
+}
+
+uint32_t render_overscan_bot()
+{
+	return overscan_bot;
+}
+
 void process_events()
 {
 	static int16_t prev_state[2][RETRO_DEVICE_ID_JOYPAD_L2];
@@ -326,77 +446,70 @@
 {
 }
 
-struct audio_source {
-	int32_t freq;
-	int32_t left_accum;
-	int32_t right_accum;
-	int32_t num_samples;
-};
-
-static audio_source *audio_sources[8];
-static uint8_t num_audio_sources;
-audio_source *render_audio_source(uint64_t master_clock, uint64_t sample_divider, uint8_t channels)
+uint8_t render_is_audio_sync(void)
 {
-	audio_sources[num_audio_sources] = calloc(1, sizeof(audio_source));
-	audio_sources[num_audio_sources]->freq = master_clock / sample_divider;
-	return audio_sources[num_audio_sources++];
+	//whether this is true depends on the libretro frontend implementation
+	//but the sync to audio path works better here
+	return 1;
 }
 
-void render_audio_adjust_clock(audio_source *src, uint64_t master_clock, uint64_t sample_divider)
+void render_buffer_consumed(audio_source *src)
 {
 }
 
-void render_audio_source_gaindb(audio_source *src, float gain)
+void *render_new_audio_opaque(void)
 {
-	//TODO: Implement this once I hook up a core option for individual FM/PSG gain
+	return NULL;
 }
 
-static void check_put_sample(void)
+void render_free_audio_opaque(void *opaque)
 {
-	for (int i = 0; i < num_audio_sources; i++)
-	{
-		int32_t min_samples = audio_sources[i]->freq / 53267;
-		if (audio_sources[i]->num_samples < min_samples) {
-			return;
-		}
-	}
-	int16_t left = 0, right = 0;
-	for (int i = 0; i < num_audio_sources; i++)
-	{
-		left += audio_sources[i]->left_accum / audio_sources[i]->num_samples;
-		right += audio_sources[i]->right_accum / audio_sources[i]->num_samples;
-		audio_sources[i]->left_accum = audio_sources[i]->right_accum = audio_sources[i]->num_samples = 0;
-	}
-	retro_audio_sample(left, right);
 }
 
-void render_put_mono_sample(audio_source *src, int16_t value)
+void render_lock_audio(void)
 {
-	src->left_accum += value;
-	src->right_accum += value;
-	src->num_samples++;
-	check_put_sample();
-}
-void render_put_stereo_sample(audio_source *src, int16_t left, int16_t right)
-{
-	src->left_accum += left;
-	src->right_accum += right;
-	src->num_samples++;
-	check_put_sample();
 }
 
-void render_free_source(audio_source *src)
+void render_unlock_audio()
 {
-	int index;
-	for (index = 0; index < num_audio_sources; index++)
-	{
-		if (audio_sources[index] == src) {
-			break;
-		}
+}
+
+uint32_t render_min_buffered(void)
+{
+	//not actually used in the sync to audio path
+	return 4;
+}
+
+uint32_t render_audio_syncs_per_sec(void)
+{
+	return 0;
+}
+
+void render_audio_created(audio_source *src)
+{
+}
+
+void render_do_audio_ready(audio_source *src)
+{
+	int16_t *tmp = src->front;
+	src->front = src->back;
+	src->back = tmp;
+	src->front_populated = 1;
+	src->buffer_pos = 0;
+	if (all_sources_ready()) {
+		int16_t buffer[8];
+		int min_remaining_out;
+		mix_and_convert((uint8_t *)buffer, sizeof(buffer), &min_remaining_out);
+		retro_audio_sample_batch(buffer, sizeof(buffer)/(2*sizeof(*buffer)));
 	}
-	num_audio_sources--;
-	audio_sources[index] = audio_sources[num_audio_sources];
-	free(src);
+}
+
+void render_source_paused(audio_source *src, uint8_t remaining_sources)
+{
+}
+
+void render_source_resumed(audio_source *src)
+{
 }
 
 void bindings_set_mouse_mode(uint8_t mode)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/m68k.cpu	Sat Apr 18 11:42:53 2020 -0700
@@ -0,0 +1,770 @@
+info
+	prefix m68k_
+	opcode_size 16
+	body m68k_run_op
+	header m68k.h
+	interrupt m68k_interrupt
+	include m68k_util.c
+	sync_cycle m68k_sync_cycle
+	
+declare
+	typedef m68k_context *(*m68k_reset_handler)(m68k_context *context);
+	void init_m68k_opts(m68k_options *opts, memmap_chunk * memmap, uint32_t num_chunks, uint32_t clock_divider);
+	m68k_context *init_68k_context(m68k_options * opts, m68k_reset_handler reset_handler);
+	void m68k_reset(m68k_context *context);
+	void m68k_print_regs(m68k_context *context);
+
+regs
+	dregs 32 d0 d1 d2 d3 d4 d5 d6 d7
+	aregs 32 a0 a1 a2 a3 a4 a5 a6 a7
+	pc 32
+	other_sp 32
+	scratch1 32
+	scratch2 32
+	int_cycle 32
+	prefetch 16
+	int_priority 8
+	int_num 8
+	int_pending 8
+	int_pending_num 8
+	int_ack 8
+	status 8
+	ccr 8
+	xflag 8
+	nflag 8
+	zflag 8
+	vflag 8
+	cflag 8
+	reset_handler ptrvoid
+	mem_pointers ptrvoid 8
+	
+flags
+	register ccr
+	X 4 carry xflag
+	N 3 sign nflag
+	Z 2 zero zflag
+	V 1 overflow vflag
+	C 0 carry cflag
+
+m68k_prefetch
+	if dynarec
+	
+	ccall m68k_read16_noinc context pc
+	mov result prefetch
+	
+	end
+	
+	if interp
+	
+	mov pc scratch1
+	ocall read_16
+	mov scratch1 prefetch
+	
+	end
+	
+	add 2 pc pc
+	
+check_user_mode_swap_ssp_usp
+	local tmp 8
+	and 0x20 status tmp
+	if tmp
+	else
+	
+	xchg other_sp a7
+	
+	end
+	
+m68k_get_sr
+	lsl status 8 scratch1
+	or ccr scratch1 scratch1
+	
+m68k_write32_lowfirst
+	arg value 32
+	add 2 scratch2 scratch2
+	mov value scratch1
+	ocall write_16
+	
+	sub 2 scratch2 scratch2
+	lsr value 16 scratch1
+	ocall write_16
+
+m68k_write32
+	arg value 32
+	local tmp 32
+	mov value tmp
+	lsr value 16 scratch1
+	ocall write_16
+	
+	add 2 scratch2 scratch2
+	mov tmp scratch1
+	ocall write_16
+	
+m68k_read32
+	local tmp 32
+	add 2 scratch1 tmp
+	ocall read_16
+	xchg scratch1 tmp
+	ocall read_16
+	lsl tmp 16 tmp
+	or tmp scratch1 scratch1
+	
+m68k_interrupt
+	cmp int_cycle cycles
+	if >=U
+	
+	#INT_PENDING_NONE
+	cmp 255 int_pending
+	if =
+	
+	mov int_priority int_pending
+	mov int_num int_pending_num
+	
+	else
+	
+	#INT_PENDING_SR_CHANGE
+	cmp 254 int_pending
+	if =
+	
+	mov int_priority int_pending
+	mov int_num int_pending_num
+	
+	else
+	
+	check_user_mode_swap_ssp_usp
+	
+	cycles 6
+	#save status reg
+	sub 6 a7 a7
+	m68k_get_sr
+	mov a7 scratch2
+	ocall write_16
+	
+	#update status register
+	and 0x78 status status
+	or int_priority status status
+	or 0x20 status status
+	
+	#Interrupt ack cycle
+	mov int_pending int_ack
+	if int_pending_num
+	cycles 4
+	else
+	#TODO: do the whole E clock variable latency nonsense
+	cycles 13
+	add 24 int_pending int_pending_num
+	end
+	
+	#save pc
+	add 2 a7 scratch2
+	m68k_write32_lowfirst pc
+	
+	lsl int_pending_num 2 scratch1
+	m68k_read32
+	mov scratch1 pc
+	update_sync
+	end
+	
+m68k_run_op
+	dispatch prefetch
+
+m68k_mem_src
+	arg address 32
+	arg size 16
+	arg isdst 8
+	mov address scratch1
+	if isdst
+	mov address scratch2
+	meta ismem 1
+	end
+	switch size
+	
+	case 0
+	ocall read_8
+	
+	case 1
+	ocall read_16
+	
+	case 2
+	m68k_read32
+	
+	end
+	meta op scratch1
+
+m68k_write_size
+	arg size 16
+	arg lowfirst 8
+	switch size
+	case 0
+	ocall write_8
+	
+	case 1
+	ocall write_16
+	
+	case 2
+	if lowfirst
+	m68k_write32_lowfirst scratch1
+	else
+	m68k_write32 scratch1
+	end
+	end
+	
+m68k_index_word
+	m68k_prefetch
+	local disp 32
+	and prefetch 255 disp
+	sext 16 disp disp
+	sext 32 disp disp
+	local index 16
+	lsr prefetch 12 index
+	local isareg 16
+	and index 8 isareg
+	and index 7 index
+	local islong 16
+	and prefetch 2048 islong
+	
+	switch isareg
+	case 0
+		switch islong
+		case 0
+		sext 32 dregs.index scratch1
+		case 2048
+		mov dregs.index scratch1
+		end
+	case 8
+		switch islong
+		case 0
+		sext 32 aregs.index scratch1
+		case 2048
+		mov aregs.index scratch1
+		end
+	end
+	add disp scratch1 scratch1
+
+m68k_fetch_op_ea
+	arg mode 16
+	arg reg 16
+	arg Z 16
+	arg isdst 8
+	switch mode
+	
+	case 0
+	#data reg direct
+	meta op dregs.reg
+	if isdst
+	meta ismem 0
+	end
+	
+	case 1
+	#address reg direct
+	meta op aregs.reg
+	if isdst
+	meta ismem 0
+	end
+	
+	case 2
+	#address reg indirect
+	m68k_mem_src aregs.reg Z isdst
+	
+	case 3
+	#postincrement
+	m68k_mem_src aregs.reg Z isdst
+	switch reg
+	case 7
+		if Z
+			addsize Z aregs.reg aregs.reg
+		else
+			addsize 1 aregs.reg aregs.reg
+		end
+	default
+		addsize Z aregs.reg aregs.reg
+	end
+	
+	case 4
+	#predecrement
+	switch reg
+	case 7
+		if Z
+			decsize Z aregs.reg aregs.reg
+		else
+			decsize 1 aregs.reg aregs.reg
+		end
+	default
+		decsize Z aregs.reg aregs.reg
+	end
+	cycles 2
+	m68k_mem_src aregs.reg Z isdst
+	
+	case 5
+	#displacement
+	m68k_prefetch
+	sext 32 prefetch scratch1
+	add scratch1 aregs.reg scratch1
+	m68k_mem_src scratch1 Z isdst
+	
+	case 6
+	#indexed
+	m68k_index_word
+	add aregs.reg scratch1 scratch1
+	
+	m68k_mem_src scratch1 Z isdst
+	case 7
+	#pc-relative and absolute modes
+	
+	switch reg
+	case 0
+	#absolute short
+	m68k_prefetch
+	sext 32 prefetch scratch1
+	m68k_mem_src scratch1 Z isdst
+	
+	case 1
+	#absolute long
+	local address 32
+	m68k_prefetch
+	lsl prefetch 16 address
+	m68k_prefetch
+	or prefetch address scratch1
+	m68k_mem_src scratch1 Z isdst
+	
+	case 2
+	#pc displaceent
+	m68k_prefetch
+	sext 32 prefetch scratch1
+	add scratch1 pc scratch1
+	sub 2 scratch1 scratch1
+	m68k_mem_src scratch1 Z isdst
+	
+	case 3
+	#pc indexed
+	m68k_index_word
+	add pc scratch1 scratch1
+	sub 2 scratch1 scratch1
+	m68k_mem_src scratch1 Z isdst
+	
+	case 4
+	#immediate
+	switch Z
+	case 2
+		local tmp32 32
+		m68k_prefetch
+		lsl prefetch 16 tmp32
+		m68k_prefetch
+		or prefetch tmp32 scratch1
+		
+	default
+		m68k_prefetch
+		mov prefetch scratch1
+	end
+	meta op scratch1
+	
+	end
+	
+	end
+
+m68k_fetch_src_ea
+	arg mode 16
+	arg reg 16
+	arg Z 16
+	m68k_fetch_op_ea mode reg Z 0
+	meta src op
+	switch mode
+	case 0
+		meta src_is_mem 0
+	case 1
+		meta src_is_mem 0
+	default
+		meta src_is_mem 1
+	end
+
+m68k_fetch_dst_ea
+	arg mode 16
+	arg reg 16
+	arg Z 16
+	m68k_fetch_op_ea mode reg Z 1
+	meta dst op
+	
+m68k_save_dst
+	arg Z 16
+	if ismem
+	m68k_write_size Z 0
+	end
+
+1101DDD0ZZMMMRRR add_ea_dn
+	invalid M 7 R 5
+	invalid M 7 R 6
+	invalid M 7 R 7
+	invalid Z 3
+	m68k_fetch_src_ea M R Z
+	
+	add src dregs.D dregs.D Z
+	update_flags XNZVC
+	m68k_prefetch
+	
+1101DDD1ZZMMMRRR add_dn_ea
+	invalid M 7 R 2
+	invalid M 7 R 3
+	invalid M 7 R 4
+	invalid M 7 R 5
+	invalid M 7 R 6
+	invalid M 7 R 7
+	invalid Z 3
+	m68k_fetch_dst_ea M R Z
+	
+	add dregs.D dst dst Z
+	update_flags XNZVC
+	m68k_save_dst Z
+	m68k_prefetch
+
+1101AAAZ11MMMRRR adda
+	invalid M 7 R 5
+	invalid M 7 R 6
+	invalid M 7 R 7
+	local size 16
+	local ext_src 32
+	if Z
+	mov 2 size
+	else
+	mov 1 size
+	end
+	m68k_fetch_src_ea M R size
+	switch size
+	case 1
+	sext 32 src ext_src
+	meta src ext_src
+	end
+	
+	add src aregs.A aregs.A
+	m68k_prefetch
+
+00000110ZZMMMRRR addi
+	local immed 32
+	invalid Z 3
+	invalid M 7 R 2
+	invalid M 7 R 3
+	invalid M 7 R 4
+	invalid M 7 R 5
+	invalid M 7 R 6
+	invalid M 7 R 7
+	#fetch immediate operand
+	m68k_prefetch
+	switch Z
+	case 2
+		lsl prefetch 16 immed
+		m68k_prefetch
+		or prefetch immed immed
+	default
+		mov prefetch immed
+	end
+	#fetch dst EA
+	m68k_fetch_dst_ea M R Z
+	
+	add immed dst dst Z
+	update_flags XNZVC
+	m68k_save_dst Z
+	m68k_prefetch
+	
+0101III0ZZMMMRRR addq
+	invalid Z 3
+	invalid M 7 R 2
+	invalid M 7 R 3
+	invalid M 7 R 4
+	invalid M 7 R 5
+	invalid M 7 R 6
+	invalid M 7 R 7
+	local src 32
+	switch I
+	case 0
+	mov 8 src
+	default
+	mov I src
+	end
+	
+	m68k_fetch_dst_ea M R Z
+	switch M
+	case 1
+		add src dst dst Z
+	default
+		add src dst dst Z
+		update_flags XNZVC
+	end
+	m68k_save_dst Z
+	m68k_prefetch
+
+1101DDD1ZZ000SSS addx_dy_dx
+	invalid Z 3
+	adc dregs.S dregs.D dregs.D Z
+	update_flags XNVC
+	switch Z
+	case 0
+	local tmp8 8
+	mov dregs.D tmp8
+	if tmp8
+		update_flags Z0
+	end
+	case 1
+	local tmp16 16
+	mov dregs.D tmp16
+	if tmp16
+		update_flags Z0
+	end
+	case 2
+	if dregs.D
+		update_flags Z0
+	end
+	end
+	m68k_prefetch
+
+1101DDD1ZZ001SSS addx_ay_ax
+	invalid Z 3
+	if Z
+		decsize Z aregs.S aregs.S
+	else
+		switch S
+		case 7
+			sub 2 aregs.S aregs.S
+		default
+			decsize Z aregs.S aregs.S
+		end
+	end
+	mov aregs.S scratch1
+	switch Z
+	case 0
+	ocall read_8
+	case 1
+	ocall read_16
+	case 2
+	m68k_read32
+	end
+	mov scratch1 scratch2
+	if Z
+		decsize Z aregs.D aregs.D
+	else
+		switch D
+		case 7
+			sub 2 aregs.D aregs.D
+		default
+			decsize Z aregs.D aregs.D
+		end
+	end
+	mov aregs.D scratch1
+	switch Z
+	case 0
+	ocall read_8
+	case 1
+	ocall read_16
+	case 2
+	m68k_read32
+	end
+	adc scratch2 scratch1 scratch1 Z
+	update_flags XNVC
+	switch Z
+	case 0
+	local tmp8 8
+	mov dregs.D tmp8
+	if tmp8
+		update_flags Z0
+	end
+	case 1
+	local tmp16 16
+	mov dregs.D tmp16
+	if tmp16
+		update_flags Z0
+	end
+	case 2
+	if dregs.D
+		update_flags Z0
+	end
+	end
+	mov aregs.D scratch2
+	m68k_write_size Z 0
+	m68k_prefetch
+	
+
+00ZZRRRMMMEEESSS move
+	invalid Z 0
+	invalid M 1
+	invalid M 7 #not actually invalid, but will be handled separately due to DSL limitations
+	invalid E 7 S 5
+	invalid E 7 S 6
+	invalid E 7 S 7
+	local size 8
+	local memsrc 32
+	#move uses a different size format than most instructions
+	switch Z
+		case 1
+			mov 0 size
+		case 2
+			mov 2 size
+		case 3
+			mov 1 size
+	end
+	m68k_fetch_src_ea E S size
+	
+	if src_is_mem
+		#avoid clobbering src if we need scratch1
+		mov src memsrc
+		meta src memsrc
+	end
+	
+	cmp 0 src size
+	update_flags NZV0C0
+	
+	switch M
+		case 0
+		mov src dregs.R size
+		
+		case 2
+		mov aregs.R scratch2
+		mov src scratch1
+		m68k_write_size size 0
+		
+		case 3
+		mov aregs.R scratch2
+		mov src scratch1
+		switch R
+			case 7
+				if size
+					addsize size aregs.R aregs.R
+				else
+					addsize 1 aregs.R aregs.R
+				end
+			default
+				addsize size aregs.R aregs.R
+		end
+		m68k_write_size size 0
+		
+		case 4
+		mov src scratch1
+		switch R
+			case 7
+				if size
+					decsize size aregs.R aregs.R
+				else
+					decsize 1 aregs.R aregs.R
+				end
+			default
+				decsize size aregs.R aregs.R
+		end
+		mov aregs.R scratch2
+		m68k_write_size size 1
+		
+		case 5
+		m68k_prefetch
+		sext 32 prefetch scratch2
+		add aregs.R scratch2 scratch2
+		mov src scratch1
+		m68k_write_size size 0
+		
+		case 6
+		m68k_index_word
+		add aregs.R scratch1 scratch2
+		mov src scratch1
+		m68k_write_size size 0
+	end
+	m68k_prefetch
+
+
+00ZZ00M111EEESSS move_abs
+	invalid E 7 S 5
+	invalid E 7 S 6
+	invalid E 7 S 7
+	invalid Z 0
+	local size 8
+	local memsrc 32
+	#move uses a different size format than most instructions
+	switch Z
+	case 1
+		mov 0 size
+	case 2
+		mov 2 size
+	case 3
+		mov 1 size
+	end
+	m68k_fetch_src_ea E S size
+	
+	if src_is_mem
+		#avoid clobbering src if we need scratch1
+		mov src memsrc
+		meta src memsrc
+	end
+	
+	cmp 0 src size
+	update_flags NZV0C0
+	
+	switch M
+	case 0
+	m68k_prefetch
+	sext 32 prefetch scratch2
+	
+	case 1
+	m68k_prefetch
+	lsl prefetch 16 scratch2
+	m68k_prefetch
+	or prefetch scratch2 scratch2
+	end
+	mov src scratch1
+	m68k_write_size size 0
+	m68k_prefetch
+	
+00ZZRRR001EEESSS movea
+	local size 8
+	invalid Z 0
+	invalid Z 1
+	invalid E 7 S 5
+	invalid E 7 S 6
+	invalid E 7 S 7
+	switch Z
+	case 2
+		mov 2 size
+	case 3
+		mov 1 size
+	end
+	m68k_fetch_src_ea E S size
+	switch Z
+	case 2
+		mov src aregs.R
+	case 3
+		sext 32 src aregs.R
+	end
+	m68k_prefetch
+	
+0100010011MMMRRR move_to_ccr
+	invalid M 1
+	invalid M 7 R 5
+	invalid M 7 R 6
+	invalid M 7 R 7
+	m68k_fetch_src_ea M R 1
+	mov scratch1 ccr
+	m68k_prefetch
+
+0100011011MMMRRR move_to_sr
+	invalid M 1
+	invalid M 7 R 5
+	invalid M 7 R 6
+	invalid M 7 R 7
+	m68k_fetch_src_ea M R 1
+	mov scratch1 ccr
+	lsr scratch1 8 status
+	update_sync
+	m68k_prefetch
+
+0100000011MMMRRR move_from_sr
+	invalid M 1
+	invalid M 7 R 2
+	invalid M 7 R 3
+	invalid M 7 R 4
+	invalid M 7 R 5
+	invalid M 7 R 6
+	invalid M 7 R 7
+	m68k_fetch_dst_ea M R 1
+	lsl status 8 scratch1
+	or ccr scratch1 scratch1
+	mov scratch1 dst
+	m68k_save_dst 1
+	m68k_prefetch
+
+0100111001110000 reset
+	cycles 124
+	if reset_handler
+	pcall reset_handler m68k_reset_handler context
+	end
--- a/m68k_core.c	Thu Apr 18 22:06:47 2019 -0700
+++ b/m68k_core.c	Sat Apr 18 11:42:53 2020 -0700
@@ -4,17 +4,17 @@
  BlastEm is free software distributed under the terms of the GNU General Public License version 3 or greater. See COPYING for full license text.
 */
 #include "m68k_core.h"
-#ifdef USE_NATIVE
+#ifndef NEW_CORE
 #include "m68k_internal.h"
 #endif
 #include "68kinst.h"
 #include "backend.h"
-#ifdef USE_NATIVE
+#ifndef NEW_CORE
 #include "gen.h"
 #endif
 #include "util.h"
 #include "serialize.h"
-#ifndef USE_NATIVE
+#ifdef NEW_CORE
 #include "musashi/m68kcpu.h"
 #endif
 #include <stdio.h>
@@ -23,7 +23,7 @@
 #include <string.h>
 
 char disasm_buf[1024];
-#ifdef USE_NATIVE
+#ifndef NEW_CORE
 int8_t native_reg(m68k_op_info * op, m68k_options * opts)
 {
 	if (op->addr_mode == MODE_REG) {
@@ -61,7 +61,7 @@
 		printf("a%d: %X\n", i, context->aregs[i]);
 	}
 }
-#ifdef USE_NATIVE
+#ifndef NEW_CORE
 void m68k_read_size(m68k_options *opts, uint8_t size)
 {
 	switch (size)
@@ -779,7 +779,7 @@
 		|| (inst->op == M68K_BCC && inst->extra.cond == COND_TRUE);
 }
 
-#ifdef USE_NATIVE
+#ifndef NEW_CORE
 static void m68k_handle_deferred(m68k_context * context)
 {
 	m68k_options * opts = context->options;
@@ -792,7 +792,7 @@
 
 uint16_t m68k_get_ir(m68k_context *context)
 {
-#ifdef USE_NATIVE
+#ifndef NEW_CORE
 	uint32_t inst_addr = get_instruction_start(context->options, context->last_prefetch_address-2);
 	uint16_t *native_addr = get_native_pointer(inst_addr, (void **)context->mem_pointers, &context->options->gen);
 	if (native_addr) {
@@ -830,7 +830,7 @@
 			.handler = bp_handler,
 			.address = address
 		};
-#ifdef USE_NATIVE
+#ifndef NEW_CORE
 		m68k_breakpoint_patch(context, address, bp_handler, NULL);
 #endif
 	}
@@ -850,7 +850,7 @@
 	return context;
 }
 
-#ifdef USE_NATIVE
+#ifndef NEW_CORE
 typedef enum {
 	RAW_FUNC = 1,
 	BINARY_ARITH,
@@ -1175,7 +1175,7 @@
 			break;
 		}
 	}
-#ifdef USE_NATIVE
+#ifndef NEW_CORE
 	code_ptr native = get_native_address(context->options, address);
 	if (!native) {
 		return;
@@ -1192,7 +1192,7 @@
 void start_68k_context(m68k_context * context, uint32_t address)
 {
 	m68k_options * options = context->options;
-#ifdef USE_NATIVE
+#ifndef NEW_CORE
 	code_ptr addr = get_native_address_trans(context, address);
 	options->start_context(addr, context);
 #else
@@ -1210,7 +1210,7 @@
 
 void resume_68k(m68k_context *context)
 {
-#ifdef USE_NATIVE
+#ifndef NEW_CORE
 	code_ptr addr = context->resume_pc;
 	context->resume_pc = NULL;
 	m68k_options * options = context->options;
@@ -1223,7 +1223,7 @@
 
 void m68k_reset(m68k_context * context)
 {
-#ifdef USE_NATIVE
+#ifndef NEW_CORE
 	//TODO: Actually execute the M68K reset vector rather than simulating some of its behavior
 	uint16_t *reset_vec = get_native_pointer(0, (void **)context->mem_pointers, &context->options->gen);
 	context->aregs[7] = reset_vec[0] << 16 | reset_vec[1];
@@ -1237,7 +1237,7 @@
 
 void m68k_options_free(m68k_options *opts)
 {
-#ifdef USE_NATIVE
+#ifndef NEW_CORE
 	for (uint32_t address = 0; address < opts->gen.address_mask; address += NATIVE_CHUNK_SIZE)
 	{
 		uint32_t chunk = address / NATIVE_CHUNK_SIZE;
@@ -1257,7 +1257,7 @@
 	free(opts);
 }
 
-#ifndef USE_NATIVE
+#ifdef NEW_CORE
 void init_m68k_opts(m68k_options * opts, memmap_chunk * memmap, uint32_t num_chunks, uint32_t clock_divider)
 {
 	memset(opts, 0, sizeof(*opts));
@@ -1273,7 +1273,7 @@
 
 m68k_context * init_68k_context(m68k_options * opts, m68k_reset_handler reset_handler)
 {
-#ifdef USE_NATIVE
+#ifndef NEW_CORE
 	m68k_context * context = calloc(1, sizeof(m68k_context) + ram_size(&opts->gen) / (1 << opts->gen.ram_flags_shift) / 8);
 	context->options = opts;
 #else
@@ -1301,7 +1301,7 @@
 	{
 		save_int32(buf, context->aregs[i]);
 	}
-#ifdef USE_NATIVE
+#ifndef NEW_CORE
 	save_int32(buf, pc);
 	uint16_t sr = context->status << 3;
 	for (int flag = 4; flag >= 0; flag--) {
@@ -1332,7 +1332,7 @@
 	{
 		context->aregs[i] = load_int32(buf);
 	}
-#ifdef USE_NATIVE
+#ifndef NEW_CORE
 	//hack until both PC and IR registers are represented properly
 	context->last_prefetch_address = load_int32(buf);
 	uint16_t sr = load_int16(buf);
@@ -1354,7 +1354,7 @@
 	context->trace_pending = load_int8(buf);
 }
 
-#ifndef USE_NATIVE
+#ifdef NEW_CORE
 void m68k_invalidate_code_range(m68k_context *context, uint32_t start, uint32_t end)
 {
 	m68000_base_device *device = (m68000_base_device *)context;
--- a/m68k_core.h	Thu Apr 18 22:06:47 2019 -0700
+++ b/m68k_core.h	Sat Apr 18 11:42:53 2020 -0700
@@ -36,13 +36,13 @@
 
 typedef struct {
 	cpu_options     gen;
-#ifdef USE_NATIVE
+#ifndef NEW_CORE
 	int8_t          dregs[8];
 	int8_t          aregs[8];
 	int8_t			flag_regs[5];
 #endif
 	FILE            *address_log;
-#ifdef USE_NATIVE
+#ifndef NEW_CORE
 	code_ptr        read_16;
 	code_ptr        write_16;
 	code_ptr        read_8;
@@ -119,7 +119,7 @@
 uint16_t m68k_get_ir(m68k_context *context);
 void m68k_print_regs(m68k_context * context);
 void m68k_invalidate_code_range(m68k_context *context, uint32_t start, uint32_t end);
-#ifdef USE_NATIVE
+#ifndef NEW_CORE
 m68k_context * m68k_handle_code_write(uint32_t address, m68k_context * context);
 #else
 #define m68k_handle_code_write(A, M)
--- a/nuklear_ui/blastem_nuklear.c	Thu Apr 18 22:06:47 2019 -0700
+++ b/nuklear_ui/blastem_nuklear.c	Sat Apr 18 11:42:53 2020 -0700
@@ -50,12 +50,14 @@
 	}
 	previous_views[num_prev++] = current_view;
 	current_view = new_view;
+	context->input.selected_widget = 0;
 }
 
 static void pop_view()
 {
 	if (num_prev) {
 		current_view = previous_views[--num_prev];
+		context->input.selected_widget = 0;
 	}
 }
 
@@ -439,11 +441,11 @@
 	};
 	const char *general_binds[] = {
 		"ui.exit", "ui.save_state", "ui.toggle_fullscreen", "ui.soft_reset", "ui.reload",
-		"ui.screenshot", "ui.sms_pause", "ui.toggle_keyboard_cpatured", "ui.release_mouse"
+		"ui.screenshot", "ui.vgm_log", "ui.sms_pause", "ui.toggle_keyboard_cpatured", "ui.release_mouse"
 	};
 	const char *general_names[] = {
 		"Show Menu", "Quick Save", "Toggle Fullscreen", "Soft Reset", "Reload Media",
-		"Internal Screenshot", "SMS Pause", "Capture Keyboard", "Release Mouse"
+		"Internal Screenshot", "Toggle VGM Log", "SMS Pause", "Capture Keyboard", "Release Mouse"
 	};
 	const char *speed_binds[] = {
 		"ui.next_speed", "ui.prev_speed",
@@ -590,6 +592,7 @@
 		conf_names = tern_insert_ptr(conf_names, "ui.vdp_debug_pal", "VDP Debug Palette");
 		conf_names = tern_insert_ptr(conf_names, "ui.enter_debugger", "Enter CPU Debugger");
 		conf_names = tern_insert_ptr(conf_names, "ui.screenshot", "Take Screenshot");
+		conf_names = tern_insert_ptr(conf_names, "ui.vgm_log", "Toggle VGM Log");
 		conf_names = tern_insert_ptr(conf_names, "ui.exit", "Show Menu");
 		conf_names = tern_insert_ptr(conf_names, "ui.save_state", "Quick Save");
 		conf_names = tern_insert_ptr(conf_names, "ui.set_speed.0", "Set Speed 0");
@@ -1173,6 +1176,7 @@
 	mapping_string[mapping_pos++] = ':';
 }
 
+static uint8_t initial_controller_config;
 #define QUIET_FRAMES 9
 static void view_controller_mappings(struct nk_context *context)
 {
@@ -1275,8 +1279,10 @@
 					save_controller_mapping(selected_controller, mapping_string);
 					free(mapping_string);
 					pop_view();
-					push_view(view_controller_bindings);
-					controller_binding_changed = 0;
+					if (initial_controller_config) {
+						push_view(view_controller_bindings);
+						controller_binding_changed = 0;
+					}
 					added_mapping = 0;
 				} else if (get_axis_label(&selected_controller_info, current_axis)) {
 					added_mapping = 0;
@@ -1290,6 +1296,31 @@
 	}
 }
 
+static void show_mapping_view(void)
+{
+	current_button = SDL_CONTROLLER_BUTTON_A;
+	button_pressed = -1;
+	last_button = -1;
+	last_hat = -1;
+	axis_moved = -1;
+	last_axis = -1;
+	last_axis_value = 0;
+	SDL_Joystick *joy = render_get_joystick(selected_controller);
+	const char *name = SDL_JoystickName(joy);
+	size_t namesz = strlen(name);
+	mapping_string = malloc(512 + namesz);
+	for (mapping_pos = 0; mapping_pos < namesz; mapping_pos++)
+	{
+		char c = name[mapping_pos];
+		if (c == ',' || c == '\n' || c == '\r') {
+			c = ' ';
+		}
+		mapping_string[mapping_pos] = c;
+	}
+	
+	push_view(view_controller_mappings);
+}
+
 static void view_controller_variant(struct nk_context *context)
 {
 	uint8_t selected = 0;
@@ -1325,33 +1356,15 @@
 	if (selected) {
 		save_controller_info(selected_controller, &selected_controller_info);
 		pop_view();
-		SDL_GameController *controller = render_get_controller(selected_controller);
-		if (controller) {
-			push_view(view_controller_bindings);
-			controller_binding_changed = 0;
-			SDL_GameControllerClose(controller);
-		} else {
-			current_button = SDL_CONTROLLER_BUTTON_A;
-			button_pressed = -1;
-			last_button = -1;
-			last_hat = -1;
-			axis_moved = -1;
-			last_axis = -1;
-			last_axis_value = 0;
-			SDL_Joystick *joy = render_get_joystick(selected_controller);
-			const char *name = SDL_JoystickName(joy);
-			size_t namesz = strlen(name);
-			mapping_string = malloc(512 + namesz);
-			for (mapping_pos = 0; mapping_pos < namesz; mapping_pos++)
-			{
-				char c = name[mapping_pos];
-				if (c == ',' || c == '\n' || c == '\r') {
-					c = ' ';
-				}
-				mapping_string[mapping_pos] = c;
+		if (initial_controller_config) {
+			SDL_GameController *controller = render_get_controller(selected_controller);
+			if (controller) {
+				push_view(view_controller_bindings);
+				controller_binding_changed = 0;
+				SDL_GameControllerClose(controller);
+			} else {
+				show_mapping_view();
 			}
-			
-			push_view(view_controller_mappings);
 		}
 	}
 }
@@ -1396,36 +1409,85 @@
 void view_controllers(struct nk_context *context)
 {
 	if (nk_begin(context, "Controllers", nk_rect(0, 0, render_width(), render_height()), NK_WINDOW_NO_SCROLLBAR)) {
-		int height = (render_width() - 2*context->style.font->height) / MAX_JOYSTICKS;
+		int height = (render_height() - 2*context->style.font->height) / 5;
+		int inner_height = height - context->style.window.spacing.y;
+		const struct nk_user_font *font = context->style.font;
+		int bindings_width = font->width(font->userdata, font->height, "Bindings", strlen("Bindings")) + context->style.button.padding.x * 2;
+		int remap_width = font->width(font->userdata, font->height, "Remap", strlen("Remap")) + context->style.button.padding.x * 2;
+		int change_type_width = font->width(font->userdata, font->height, "Change Type", strlen("Change Type")) + context->style.button.padding.x * 2;
+		int total = bindings_width + remap_width + change_type_width;
+		float bindings_ratio = (float)bindings_width / total;
+		float remap_ratio = (float)remap_width / total;
+		float change_type_ratio = (float)change_type_width / total;
+		
+		
+		uint8_t found_controller = 0;
 		for (int i = 0; i < MAX_JOYSTICKS; i++)
 		{
 			SDL_Joystick *joy = render_get_joystick(i);
 			if (joy) {
+				found_controller = 1;
 				controller_info info = get_controller_info(i);
 				ui_image *controller_image = select_best_image(&info);
-				int image_width = height * controller_image->width / controller_image->height;
-				nk_layout_row_begin(context, NK_STATIC, height, 2);
-				nk_layout_row_push(context, image_width);
+				int image_width = inner_height * controller_image->width / controller_image->height;
+				nk_layout_space_begin(context, NK_STATIC, height, INT_MAX);
+				nk_layout_space_push(context, nk_rect(context->style.font->height / 2, 0, image_width, inner_height));
 				if (info.type == TYPE_UNKNOWN || info.type == TYPE_GENERIC_MAPPING) {
 					nk_label(context, "?", NK_TEXT_CENTERED);
 				} else {
 					nk_image(context, controller_image->ui);
 				}
-				nk_layout_row_push(context, render_width() - image_width - 2 * context->style.font->height);
-				if (nk_button_label(context, info.name)) {
-					selected_controller = i;
-					selected_controller_info = info;
-					if (info.type == TYPE_UNKNOWN || info.type == TYPE_GENERIC_MAPPING) {
+				int button_start = image_width + context->style.font->height;
+				int button_area_width = render_width() - image_width - 2 * context->style.font->height;
+				
+				nk_layout_space_push(context, nk_rect(button_start, 0, button_area_width, inner_height/2));
+				nk_label(context, info.name, NK_TEXT_CENTERED);
+				const struct nk_user_font *font = context->style.font;
+				if (info.type == TYPE_UNKNOWN || info.type == TYPE_GENERIC_MAPPING) {
+					int button_width = font->width(font->userdata, font->height, "Configure", strlen("Configure"));
+					nk_layout_space_push(context, nk_rect(button_start, height/2, button_width, inner_height/2));
+					if (nk_button_label(context, "Configure")) {
+						selected_controller = i;
+						selected_controller_info = info;
+						initial_controller_config = 1;
 						push_view(view_controller_type);
-					} else {
+					}
+				} else {
+					button_area_width -= 2 * context->style.window.spacing.x;
+					bindings_width = bindings_ratio * button_area_width;
+					nk_layout_space_push(context, nk_rect(button_start, height/2, bindings_width, inner_height/2));
+					if (nk_button_label(context, "Bindings")) {
+						selected_controller = i;
+						selected_controller_info = info;
 						push_view(view_controller_bindings);
 						controller_binding_changed = 0;
 					}
-					
+					button_start += bindings_width + context->style.window.spacing.x;
+					remap_width = remap_ratio * button_area_width;
+					nk_layout_space_push(context, nk_rect(button_start, height/2, remap_width, inner_height/2));
+					if (nk_button_label(context, "Remap")) {
+						selected_controller = i;
+						selected_controller_info = info;
+						initial_controller_config = 0;
+						show_mapping_view();
+					}
+					button_start += remap_width + context->style.window.spacing.x;
+					change_type_width = change_type_ratio * button_area_width;
+					nk_layout_space_push(context, nk_rect(button_start, height/2, change_type_width, inner_height/2));
+					if (nk_button_label(context, "Change Type")) {
+						selected_controller = i;
+						selected_controller_info = info;
+						initial_controller_config = 0;
+						push_view(view_controller_type);
+					}
 				}
-				nk_layout_row_end(context);
+				//nk_layout_row_end(context);
 			}
 		}
+		if (!found_controller) {
+			nk_layout_row_static(context, context->style.font->height, render_width() - 2 * context->style.font->height, 1);
+			nk_label(context, "No controllers detected", NK_TEXT_CENTERED);
+		}
 		nk_layout_row_static(context, context->style.font->height, (render_width() - 2 * context->style.font->height) / 2, 2);
 		nk_label(context, "", NK_TEXT_LEFT);
 		if (nk_button_label(context, "Back")) {
@@ -1466,6 +1528,24 @@
 	}
 }
 
+void settings_string(struct nk_context *context, char *label, char *path, char *def)
+{
+	nk_label(context, label, NK_TEXT_LEFT);
+	char *curstr = tern_find_path_default(config, path, (tern_val){.ptrval = def}, TVAL_PTR).ptrval;
+	uint32_t len = strlen(curstr);
+	uint32_t buffer_len = len > 100 ? len + 1 : 101;
+	char *buffer = malloc(buffer_len);
+	memcpy(buffer, curstr, len);
+	memset(buffer+len, 0, buffer_len-len);
+	nk_edit_string(context, NK_EDIT_SIMPLE, buffer, &len, buffer_len-1, nk_filter_default);
+	buffer[len] = 0;
+	if (strcmp(buffer, curstr)) {
+		config_dirty = 1;
+		config = tern_insert_path(config, path, (tern_val){.ptrval = strdup(buffer)}, TVAL_PTR);
+	}
+	free(buffer);
+}
+
 void settings_int_property(struct nk_context *context, char *label, char *name, char *path, int def, int min, int max)
 {
 	char *curstr = tern_find_path(config, path, TVAL_PTR).ptrval;
@@ -1524,7 +1604,7 @@
 			if (!dupe) {
 				if (num_progs == prog_storage) {
 					prog_storage = prog_storage ? prog_storage*2 : 4;
-					progs = realloc(progs, sizeof(progs) * prog_storage);
+					progs = realloc(progs, sizeof(*progs) * prog_storage);
 				}
 				progs[num_progs].vertex = NULL;
 				progs[num_progs++].fragment = strdup(entries[i].name); 
@@ -1576,6 +1656,7 @@
 	shader_dir = path_append(get_exe_dir(), "shaders");
 #endif
 	entries = get_dir_list(shader_dir, &num_entries);
+	free(shader_dir);
 	progs = get_shader_progs(entries, num_entries, progs, &num_progs, &prog_storage);
 	*num_out = num_progs;
 	return progs;
@@ -1751,6 +1832,50 @@
 		nk_end(context);
 	}
 }
+typedef struct {
+	const char **models;
+	const char **names;
+	uint32_t   num_models;
+	uint32_t   storage;
+} model_foreach_state;
+void model_iter(char *key, tern_val val, uint8_t valtype, void *data)
+{
+	if (valtype != TVAL_NODE) {
+		return;
+	}
+	model_foreach_state *state = data;
+	if (state->num_models == state->storage) {
+		state->storage *= 2;
+		state->models = realloc(state->models, state->storage * sizeof(char *));
+		state->names = realloc(state->names, state->storage * sizeof(char *));
+	}
+	char *def = strdup(key);
+	state->models[state->num_models] = def;
+	state->names[state->num_models++] = tern_find_ptr_default(val.ptrval, "name", def);
+}
+
+typedef struct {
+	const char **models;
+	const char **names;
+} models;
+
+models get_models(uint32_t *num_out)
+{
+	tern_node *systems = get_systems_config();
+	model_foreach_state state = {
+		.models = calloc(4, sizeof(char *)),
+		.names = calloc(4, sizeof(char *)),
+		.num_models = 0,
+		.storage = 4
+	};
+	tern_foreach(systems, model_iter, &state);
+	*num_out = state.num_models;
+	return (models){
+		.models = state.models,
+		.names = state.names
+	};
+}
+
 void view_system_settings(struct nk_context *context)
 {
 	const char *sync_opts[] = {
@@ -1773,12 +1898,25 @@
 	if (selected_region < 0) {
 		selected_region = find_match(region_codes, num_regions, "system\0default_region\0", "U");
 	}
+	static const char **model_opts;
+	static const char **model_names;
+	static uint32_t num_models;
+	if (!model_opts) {
+		models m = get_models(&num_models);
+		model_opts = m.models;
+		model_names = m.names;
+	}
+	static int32_t selected_model = -1;
+	if (selected_model < 0) {
+		selected_model = find_match(model_opts, num_models, "system\0model\0", "md1va3");
+	}
+	
 	const char *formats[] = {
 		"native",
 		"gst"
 	};
 	const uint32_t num_formats = sizeof(formats)/sizeof(*formats);
-	int32_t selected_format = -1;
+	static int32_t selected_format = -1;
 	if (selected_format < 0) {
 		selected_format = find_match(formats, num_formats, "ui\0state_format\0", "native");
 	}
@@ -1825,7 +1963,10 @@
 		selected_sync = settings_dropdown(context, "Sync Source", sync_opts, num_sync_opts, selected_sync, "system\0sync_source\0");
 		settings_int_property(context, "68000 Clock Divider", "", "clocks\0m68k_divider\0", 7, 1, 53);
 		settings_toggle(context, "Remember ROM Path", "ui\0remember_path\0", 1);
+		settings_toggle(context, "Save config with EXE", "ui\0config_in_exe_dir\0", 0);
+		settings_string(context, "Game Save Path", "ui\0save_path\0", "$USERDATA/blastem/$ROMNAME");
 		selected_region = settings_dropdown_ex(context, "Default Region", region_codes, regions, num_regions, selected_region, "system\0default_region\0");
+		selected_model = settings_dropdown_ex(context, "Model", model_opts, model_names, num_models, selected_model, "system\0model\0");
 		selected_format = settings_dropdown(context, "Save State Format", formats, num_formats, selected_format, "ui\0state_format\0");
 		selected_init = settings_dropdown(context, "Initial RAM Value", ram_inits, num_inits, selected_init, "system\0ram_init\0");
 		selected_io_1 = settings_dropdown_ex(context, "IO Port 1 Device", io_opts_1, device_type_names, num_io, selected_io_1, "io\0devices\0""1\0");
@@ -1919,6 +2060,7 @@
 
 void ui_idle_loop(void)
 {
+	render_enable_gamepad_events(1);
 	const uint32_t MIN_UI_DELAY = 15;
 	static uint32_t last;
 	while (current_view != view_play)
@@ -1935,6 +2077,7 @@
 		persist_config(config);
 		config_dirty = 0;
 	}
+	render_enable_gamepad_events(0);
 }
 static void handle_event(SDL_Event *event)
 {
@@ -2054,6 +2197,11 @@
 		.r = 255, .g = 128, .b = 0, .a = 255
 	};
 	context->style.checkbox.cursor_hover = context->style.checkbox.cursor_normal;
+	context->style.property.inc_button.text_hover = (struct nk_color){
+		.r = 255, .g = 128, .b = 0, .a = 255
+	};
+	context->style.property.dec_button.text_hover = context->style.property.inc_button.text_hover;
+	context->style.combo.button.text_hover = context->style.property.inc_button.text_hover;
 }
 
 static void context_created(void)
@@ -2071,6 +2219,7 @@
 		context->style.window.background = nk_rgba(0, 0, 0, 128);
 		context->style.window.fixed_background = nk_style_item_color(nk_rgba(0, 0, 0, 128));
 		current_view = view_pause;
+		context->input.selected_widget = 0;
 		current_system->request_exit(current_system);
 	} else if (current_system && !set_binding) {
 		clear_view_stack();
@@ -2082,6 +2231,7 @@
 {
 	set_content_binding_state(1);
 	current_view = view_play;
+	context->input.selected_widget = 0;
 }
 
 static uint8_t active;
--- a/nuklear_ui/nuklear.h	Thu Apr 18 22:06:47 2019 -0700
+++ b/nuklear_ui/nuklear.h	Sat Apr 18 11:42:53 2020 -0700
@@ -1845,7 +1845,8 @@
     NK_WIDGET_STATE_HOVERED     = NK_WIDGET_STATE_HOVER|NK_WIDGET_STATE_MODIFIED, /* widget is being hovered */
     NK_WIDGET_STATE_ACTIVE      = NK_WIDGET_STATE_ACTIVED|NK_WIDGET_STATE_MODIFIED /* widget is currently activated */
 };
-NK_API enum nk_widget_layout_states nk_widget(struct nk_rect*, const struct nk_context*);
+NK_API enum nk_widget_layout_states nk_widget(struct nk_rect*, struct nk_context*);
+NK_API enum nk_widget_layout_states nk_keynav_widget(struct nk_rect *, struct nk_context *);
 NK_API enum nk_widget_layout_states nk_widget_fitting(struct nk_rect*, struct nk_context*, struct nk_vec2);
 NK_API struct nk_rect nk_widget_bounds(struct nk_context*);
 NK_API struct nk_vec2 nk_widget_position(struct nk_context*);
@@ -3192,6 +3193,8 @@
 struct nk_input {
     struct nk_keyboard keyboard;
     struct nk_mouse mouse;
+	int widget_counter;
+	int selected_widget;
 };
 
 NK_API int nk_input_has_mouse_click(const struct nk_input*, enum nk_buttons);
@@ -12568,6 +12571,7 @@
     in->mouse.delta.y = 0;
     for (i = 0; i < NK_KEY_MAX; i++)
         in->keyboard.keys[i].clicked = 0;
+	in->widget_counter = -1;
 }
 
 NK_API void
@@ -12692,9 +12696,11 @@
     const struct nk_mouse_button *btn;
     if (!i) return nk_false;
     btn = &i->mouse.buttons[id];
-    if (!NK_INBOX(btn->clicked_pos.x,btn->clicked_pos.y,b.x,b.y,b.w,b.h))
-        return nk_false;
-    return nk_true;
+    if (NK_INBOX(btn->clicked_pos.x,btn->clicked_pos.y,b.x,b.y,b.w,b.h))
+        return nk_true;
+	if (i->selected_widget == i->widget_counter && i->keyboard.keys[NK_KEY_ENTER].clicked)
+		return nk_true;
+    return nk_false;
 }
 
 NK_API int
@@ -12742,7 +12748,7 @@
 nk_input_is_mouse_hovering_rect(const struct nk_input *i, struct nk_rect rect)
 {
     if (!i) return nk_false;
-    return NK_INBOX(i->mouse.pos.x, i->mouse.pos.y, rect.x, rect.y, rect.w, rect.h);
+    return i->selected_widget == i->widget_counter || NK_INBOX(i->mouse.pos.x, i->mouse.pos.y, rect.x, rect.y, rect.w, rect.h);
 }
 
 NK_API int
@@ -12764,7 +12770,9 @@
 nk_input_is_mouse_down(const struct nk_input *i, enum nk_buttons id)
 {
     if (!i) return nk_false;
-    return i->mouse.buttons[id].down;
+    return i->mouse.buttons[id].down || (
+		id == NK_BUTTON_LEFT && i->widget_counter == i->selected_widget && i->keyboard.keys[NK_KEY_ENTER].down
+	);
 }
 
 NK_API int
@@ -12775,6 +12783,11 @@
     b = &i->mouse.buttons[id];
     if (b->down && b->clicked)
         return nk_true;
+	if (
+		id == NK_BUTTON_LEFT && i->widget_counter == i->selected_widget 
+		&& i->keyboard.keys[NK_KEY_ENTER].down && i->keyboard.keys[NK_KEY_ENTER].clicked
+	) 
+		return nk_true;
     return nk_false;
 }
 
@@ -12782,7 +12795,14 @@
 nk_input_is_mouse_released(const struct nk_input *i, enum nk_buttons id)
 {
     if (!i) return nk_false;
-    return (!i->mouse.buttons[id].down && i->mouse.buttons[id].clicked);
+    if (!i->mouse.buttons[id].down && i->mouse.buttons[id].clicked)
+		return nk_true;
+	if (
+		id == NK_BUTTON_LEFT && i->widget_counter == i->selected_widget 
+		&& !i->keyboard.keys[NK_KEY_ENTER].down && i->keyboard.keys[NK_KEY_ENTER].clicked
+	) 
+		return nk_true;
+	return nk_false;
 }
 
 NK_API int
@@ -14610,7 +14630,7 @@
 NK_INTERN int
 nk_do_selectable(nk_flags *state, struct nk_command_buffer *out,
     struct nk_rect bounds, const char *str, int len, nk_flags align, int *value,
-    const struct nk_style_selectable *style, const struct nk_input *in,
+    const struct nk_style_selectable *style, struct nk_input *in,
     const struct nk_user_font *font)
 {
     int old_value;
@@ -14636,6 +14656,12 @@
     /* update button */
     if (nk_button_behavior(state, touch, in, NK_BUTTON_DEFAULT))
         *value = !(*value);
+		
+	if (!old_value && !(*value) && in && in->selected_widget == in->widget_counter) {
+		*value = 1;
+	} else if (!old_value && *value && in) {
+		in->selected_widget = in->widget_counter;
+	}
 
     /* draw selectable */
     if (style->draw_begin) style->draw_begin(out, style->userdata);
@@ -16161,8 +16187,18 @@
     nk_draw_property(out, style, &property, &label, *ws, name, name_len, font);
     if (style->draw_end) style->draw_end(out, style->userdata);
 
-    /* execute right button  */
-    if (nk_do_button_symbol(ws, out, left, style->sym_left, behavior, &style->dec_button, in, font)) {
+	int selected = in && in->selected_widget == in->widget_counter;
+	int enter_clicked;
+	if (selected) {
+		//prevent left/right buttons from activating when enter is pressed
+		enter_clicked = in->keyboard.keys[NK_KEY_ENTER].clicked;
+		in->keyboard.keys[NK_KEY_ENTER].clicked = 0;
+	}
+    /* execute left button  */
+    if (
+		nk_do_button_symbol(ws, out, left, style->sym_left, behavior, &style->dec_button, in, font)
+		|| (selected && in->keyboard.keys[NK_KEY_LEFT].clicked && in->keyboard.keys[NK_KEY_LEFT].down)
+	) {
         switch (variant->kind) {
         default: break;
         case NK_PROPERTY_INT:
@@ -16173,8 +16209,11 @@
             variant->value.d = NK_CLAMP(variant->min_value.d, variant->value.d - variant->step.d, variant->max_value.d); break;
         }
     }
-    /* execute left button  */
-    if (nk_do_button_symbol(ws, out, right, style->sym_right, behavior, &style->inc_button, in, font)) {
+    /* execute right button  */
+    if (
+		nk_do_button_symbol(ws, out, right, style->sym_right, behavior, &style->inc_button, in, font)
+		|| (selected && in->keyboard.keys[NK_KEY_RIGHT].clicked && in->keyboard.keys[NK_KEY_RIGHT].down)
+	) {
         switch (variant->kind) {
         default: break;
         case NK_PROPERTY_INT:
@@ -16185,6 +16224,9 @@
             variant->value.d = NK_CLAMP(variant->min_value.d, variant->value.d + variant->step.d, variant->max_value.d); break;
         }
     }
+	if (selected) {
+		in->keyboard.keys[NK_KEY_ENTER].clicked = enter_clicked;
+	}
     if (old != NK_PROPERTY_EDIT && (*state == NK_PROPERTY_EDIT)) {
         /* property has been activated so setup buffer */
         NK_MEMCPY(buffer, dst, (nk_size)*length);
@@ -17259,6 +17301,7 @@
 #ifdef NK_INCLUDE_VERTEX_BUFFER_OUTPUT
     nk_draw_list_init(&ctx->draw_list);
 #endif
+	ctx->input.widget_counter = -1;
 }
 
 #ifdef NK_INCLUDE_DEFAULT_ALLOCATOR
@@ -20238,7 +20281,7 @@
 }
 
 NK_API enum nk_widget_layout_states
-nk_widget(struct nk_rect *bounds, const struct nk_context *ctx)
+nk_widget_gen(struct nk_rect *bounds, struct nk_context *ctx, nk_byte is_keynav)
 {
     struct nk_rect c, v;
     struct nk_window *win;
@@ -20277,13 +20320,51 @@
     c.y = (float)((int)c.y);
     c.w = (float)((int)c.w);
     c.h = (float)((int)c.h);
+	int newly_selected = nk_false;
+	if (is_keynav) {
+		ctx->input.widget_counter++;
+		if (
+			ctx->input.selected_widget == (ctx->input.widget_counter + 1) && 
+			ctx->input.keyboard.keys[NK_KEY_UP].clicked && ctx->input.keyboard.keys[NK_KEY_UP].down
+		) {
+			ctx->input.selected_widget--;
+			newly_selected = nk_true;
+		} else if (
+			ctx->input.selected_widget == (ctx->input.widget_counter - 1) &&
+			ctx->input.keyboard.keys[NK_KEY_DOWN].clicked && ctx->input.keyboard.keys[NK_KEY_DOWN].down
+		) {
+			ctx->input.keyboard.keys[NK_KEY_DOWN].clicked = 0;
+			ctx->input.selected_widget++;
+			newly_selected = nk_true;
+		}
+	}
 
     nk_unify(&v, &c, bounds->x, bounds->y, bounds->x + bounds->w, bounds->y + bounds->h);
+	if (is_keynav && newly_selected) {
+		//ensure widget is fully on-screen if it was newly selected via a keyboard action
+		if ((bounds->y + bounds->h) > (c.y + c.h)) {
+			*layout->offset_y += bounds->y + bounds->h - (c.y + c.h);
+		} else if(c.y > bounds->y){
+			*layout->offset_y -= c.y - bounds->y;
+		}
+	}
     if (!NK_INTERSECT(c.x, c.y, c.w, c.h, bounds->x, bounds->y, bounds->w, bounds->h))
         return NK_WIDGET_INVALID;
-    if (!NK_INBOX(in->mouse.pos.x, in->mouse.pos.y, v.x, v.y, v.w, v.h))
-        return NK_WIDGET_ROM;
-    return NK_WIDGET_VALID;
+    if ((is_keynav && ctx->input.selected_widget == ctx->input.widget_counter ) || NK_INBOX(in->mouse.pos.x, in->mouse.pos.y, v.x, v.y, v.w, v.h))
+        return NK_WIDGET_VALID;
+    return NK_WIDGET_ROM;
+}
+
+NK_API enum nk_widget_layout_states
+nk_widget(struct nk_rect *bounds, struct nk_context *ctx)
+{
+	return nk_widget_gen(bounds, ctx, 0);
+}
+
+NK_API enum nk_widget_layout_states
+nk_keynav_widget(struct nk_rect *bounds, struct nk_context *ctx)
+{
+	return nk_widget_gen(bounds, ctx, 1);
 }
 
 NK_API enum nk_widget_layout_states
@@ -20306,7 +20387,7 @@
     win = ctx->current;
     style = &ctx->style;
     layout = win->layout;
-    state = nk_widget(bounds, ctx);
+    state = nk_keynav_widget(bounds, ctx);
 
     panel_padding = nk_panel_get_padding(style, layout->type);
     if (layout->row.index == 1) {
@@ -20627,13 +20708,17 @@
 
     win = ctx->current;
     layout = win->layout;
-    state = nk_widget(&bounds, ctx);
+    state = nk_keynav_widget(&bounds, ctx);
 
     if (!state) return 0;
     in = (state == NK_WIDGET_ROM || layout->flags & NK_WINDOW_ROM) ? 0 : &ctx->input;
-    return nk_do_button_text(&ctx->last_widget_state, &win->buffer, bounds,
+    int ret = nk_do_button_text(&ctx->last_widget_state, &win->buffer, bounds,
                     title, len, style->text_alignment, ctx->button_behavior,
                     style, in, ctx->style.font);
+	if (ctx->last_widget_state & NK_WIDGET_STATE_ENTERED) {
+		ctx->input.selected_widget = ctx->input.widget_counter;
+	}
+	return ret;
 }
 
 NK_API int
@@ -20673,7 +20758,7 @@
     win = ctx->current;
     layout = win->layout;
 
-    state = nk_widget(&bounds, ctx);
+    state = nk_keynav_widget(&bounds, ctx);
     if (!state) return 0;
     in = (state == NK_WIDGET_ROM || layout->flags & NK_WINDOW_ROM) ? 0 : &ctx->input;
 
@@ -20684,6 +20769,9 @@
     ret = nk_do_button(&ctx->last_widget_state, &win->buffer, bounds,
                 &button, in, ctx->button_behavior, &content);
     nk_draw_button(&win->buffer, &bounds, ctx->last_widget_state, &button);
+	if (ctx->last_widget_state & NK_WIDGET_STATE_ENTERED) {
+		ctx->input.selected_widget = ctx->input.widget_counter;
+	}
     return ret;
 }
 
@@ -20706,11 +20794,15 @@
 
     win = ctx->current;
     layout = win->layout;
-    state = nk_widget(&bounds, ctx);
+    state = nk_keynav_widget(&bounds, ctx);
     if (!state) return 0;
     in = (state == NK_WIDGET_ROM || layout->flags & NK_WINDOW_ROM) ? 0 : &ctx->input;
-    return nk_do_button_symbol(&ctx->last_widget_state, &win->buffer, bounds,
+    int ret = nk_do_button_symbol(&ctx->last_widget_state, &win->buffer, bounds,
             symbol, ctx->button_behavior, style, in, ctx->style.font);
+	if (ctx->last_widget_state & NK_WIDGET_STATE_ENTERED) {
+		ctx->input.selected_widget = ctx->input.widget_counter;
+	}
+	return ret;
 }
 
 NK_API int
@@ -20741,7 +20833,7 @@
     win = ctx->current;
     layout = win->layout;
 
-    state = nk_widget(&bounds, ctx);
+    state = nk_keynav_widget(&bounds, ctx);
     if (!state) return 0;
     in = (state == NK_WIDGET_ROM || layout->flags & NK_WINDOW_ROM) ? 0 : &ctx->input;
     return nk_do_button_image(&ctx->last_widget_state, &win->buffer, bounds,
@@ -20777,7 +20869,7 @@
     win = ctx->current;
     layout = win->layout;
 
-    state = nk_widget(&bounds, ctx);
+    state = nk_keynav_widget(&bounds, ctx);
     if (!state) return 0;
     in = (state == NK_WIDGET_ROM || layout->flags & NK_WINDOW_ROM) ? 0 : &ctx->input;
     return nk_do_button_text_symbol(&ctx->last_widget_state, &win->buffer, bounds,
@@ -20824,7 +20916,7 @@
     win = ctx->current;
     layout = win->layout;
 
-    state = nk_widget(&bounds, ctx);
+    state = nk_keynav_widget(&bounds, ctx);
     if (!state) return 0;
     in = (state == NK_WIDGET_ROM || layout->flags & NK_WINDOW_ROM) ? 0 : &ctx->input;
     return nk_do_button_text_image(&ctx->last_widget_state, &win->buffer,
@@ -20858,7 +20950,7 @@
 {
     struct nk_window *win;
     struct nk_panel *layout;
-    const struct nk_input *in;
+    struct nk_input *in;
     const struct nk_style *style;
 
     enum nk_widget_layout_states state;
@@ -20875,7 +20967,7 @@
     layout = win->layout;
     style = &ctx->style;
 
-    state = nk_widget(&bounds, ctx);
+    state = nk_keynav_widget(&bounds, ctx);
     if (!state) return 0;
     in = (state == NK_WIDGET_ROM || layout->flags & NK_WINDOW_ROM) ? 0 : &ctx->input;
     return nk_do_selectable(&ctx->last_widget_state, &win->buffer, bounds,
@@ -20960,7 +21052,7 @@
     style = &ctx->style;
     layout = win->layout;
 
-    state = nk_widget(&bounds, ctx);
+    state = nk_keynav_widget(&bounds, ctx);
     if (!state) return active;
     in = (state == NK_WIDGET_ROM || layout->flags & NK_WINDOW_ROM) ? 0 : &ctx->input;
     nk_do_toggle(&ctx->last_widget_state, &win->buffer, bounds, &active,
@@ -21435,7 +21527,7 @@
     win = ctx->current;
     layout = win->layout;
     style = &ctx->style;
-    s = nk_widget(&bounds, ctx);
+    s = nk_keynav_widget(&bounds, ctx);
     if (!s) return;
 
     /* calculate hash from name */
@@ -21485,6 +21577,7 @@
             ctx->input.mouse.grab = nk_true;
             ctx->input.mouse.grabbed = nk_true;
         }
+		ctx->input.selected_widget = ctx->input.widget_counter;
     }
     /* check if previously active property is now inactive */
     if (*state == NK_PROPERTY_DEFAULT && old_state != NK_PROPERTY_DEFAULT) {
@@ -22695,8 +22788,11 @@
             body.y = (panel->at_y + panel->footer_height + panel->border + padding.y + panel->row.height);
             body.h = (panel->bounds.y + panel->bounds.h) - body.y;
         }
+		int selected = ctx->input.selected_widget;
+		ctx->input.selected_widget = -1;
         {int pressed = nk_input_is_mouse_pressed(&ctx->input, NK_BUTTON_LEFT);
         int in_body = nk_input_is_mouse_hovering_rect(&ctx->input, body);
+		ctx->input.selected_widget = selected;
         if (pressed && in_body)
             popup->flags |= NK_WINDOW_HIDDEN;
         }
@@ -22769,7 +22865,7 @@
 
     win = ctx->current;
     style = &ctx->style;
-    s = nk_widget(&header, ctx);
+    s = nk_keynav_widget(&header, ctx);
     if (s == NK_WIDGET_INVALID)
         return 0;
 
@@ -23330,11 +23426,21 @@
     size.y = NK_MIN(size.y, (float)max_height);
     if (nk_combo_begin_label(ctx, items[selected], size)) {
         nk_layout_row_dynamic(ctx, (float)item_height, 1);
+		int main_item_widget = ctx->input.widget_counter;
         for (i = 0; i < count; ++i) {
-            if (nk_combo_item_label(ctx, items[i], NK_TEXT_LEFT))
+            if (nk_combo_item_label(ctx, items[i], NK_TEXT_LEFT)) {
                 selected = i;
+				ctx->input.selected_widget = main_item_widget;
+				//prevent below code from advancing selected widget
+				main_item_widget--;
+			}
         }
         nk_combo_end(ctx);
+		if (ctx->input.selected_widget <= main_item_widget) {
+			ctx->input.selected_widget = main_item_widget + 1;
+		} else if (ctx->input.selected_widget > main_item_widget + count) {
+			ctx->input.selected_widget = main_item_widget + count;
+		}
     }
     return selected;
 }
--- a/nuklear_ui/nuklear_sdl_gles2.h	Thu Apr 18 22:06:47 2019 -0700
+++ b/nuklear_ui/nuklear_sdl_gles2.h	Sat Apr 18 11:42:53 2020 -0700
@@ -414,6 +414,30 @@
             else nk_input_key(ctx, NK_KEY_RIGHT, down);
         } else return 0;
         return 1;
+	} else if (evt->type == SDL_CONTROLLERBUTTONDOWN || evt->type == SDL_CONTROLLERBUTTONUP) {
+		int down = evt->type == SDL_CONTROLLERBUTTONDOWN;
+		if (evt->cbutton.button == SDL_CONTROLLER_BUTTON_DPAD_UP) {
+			nk_input_key(ctx, NK_KEY_UP, down);
+		} else if (evt->cbutton.button == SDL_CONTROLLER_BUTTON_DPAD_DOWN) {
+			nk_input_key(ctx, NK_KEY_DOWN, down);
+		} else if (evt->cbutton.button == SDL_CONTROLLER_BUTTON_A || evt->cbutton.button == SDL_CONTROLLER_BUTTON_START) {
+			nk_input_key(ctx, NK_KEY_ENTER, down);
+		}
+	} else if (evt->type == SDL_CONTROLLERAXISMOTION) {
+		if (evt->caxis.axis == SDL_CONTROLLER_AXIS_LEFTY || evt->caxis.axis ==  SDL_CONTROLLER_AXIS_RIGHTY) {
+			int down = abs(evt->caxis.value) > 2000;
+			if (evt->caxis.value >= 0) {
+				if (ctx->input.keyboard.keys[NK_KEY_UP].down) {
+					nk_input_key(ctx, NK_KEY_UP, 0);
+				}
+				nk_input_key(ctx, NK_KEY_DOWN, down);
+			} else {
+				if (ctx->input.keyboard.keys[NK_KEY_DOWN].down) {
+					nk_input_key(ctx, NK_KEY_DOWN, 0);
+				}
+				nk_input_key(ctx, NK_KEY_UP, down);
+			}
+		}
     } else if (evt->type == SDL_MOUSEBUTTONDOWN || evt->type == SDL_MOUSEBUTTONUP) {
         /* mouse button */
         int down = evt->type == SDL_MOUSEBUTTONDOWN;
--- a/paths.c	Thu Apr 18 22:06:47 2019 -0700
+++ b/paths.c	Sat Apr 18 11:42:53 2020 -0700
@@ -59,7 +59,7 @@
 
 void get_initial_browse_path(char **dst)
 {
-	*dst = NULL;
+	char *base = NULL;
 	char *remember_path = tern_find_path(config, "ui\0remember_path\0", TVAL_PTR).ptrval;
 	if (!remember_path || !strcmp("on", remember_path)) {
 		char *pathfname = alloc_concat(get_userdata_dir(), PATH_SEP "blastem" PATH_SEP "sticky_path");
@@ -67,13 +67,13 @@
 		if (f) {
 			long pathsize = file_size(f);
 			if (pathsize > 0) {
-				*dst = malloc(pathsize + 1);
-				if (fread(*dst, 1, pathsize, f) != pathsize) {
+				base = malloc(pathsize + 1);
+				if (fread(base, 1, pathsize, f) != pathsize) {
 					warning("Error restoring saved file browser path");
-					free(*dst);
-					*dst = NULL;
+					free(base);
+					base = NULL;
 				} else {
-					(*dst)[pathsize] = 0;
+					base[pathsize] = 0;
 				}
 			}
 			fclose(f);
@@ -84,19 +84,20 @@
 			current_path = dst;
 		}
 	}
-	if (!*dst) {
-		*dst = tern_find_path(config, "ui\0initial_path\0", TVAL_PTR).ptrval;
+	if (!base) {
+		base = tern_find_path(config, "ui\0initial_path\0", TVAL_PTR).ptrval;
 	}
-	if (!*dst){
+	if (!base){
 #ifdef __ANDROID__
-		*dst = get_external_storage_path();
+		base = get_external_storage_path();
 #else
-		*dst = "$HOME";
+		base = "$HOME";
 #endif
 	}
 	tern_node *vars = tern_insert_ptr(NULL, "HOME", get_home_dir());
 	vars = tern_insert_ptr(vars, "EXEDIR", get_exe_dir());
-	*dst = replace_vars(*dst, vars, 1);
+	*dst = replace_vars(base, vars, 1);
+	free(base);
 	tern_free(vars);
 }
 
--- a/psg.c	Thu Apr 18 22:06:47 2019 -0700
+++ b/psg.c	Sat Apr 18 11:42:53 2020 -0700
@@ -4,7 +4,6 @@
  BlastEm is free software distributed under the terms of the GNU General Public License version 3 or greater. See COPYING for full license text.
 */
 #include "psg.h"
-#include "render.h"
 #include "blastem.h"
 #include <string.h>
 #include <stdlib.h>
@@ -33,6 +32,9 @@
 
 void psg_write(psg_context * context, uint8_t value)
 {
+	if (context->vgm) {
+		vgm_sn76489_write(context->vgm, context->cycles, value);
+	}
 	if (value & 0x80) {
 		context->latch = value & 0x70;
 		uint8_t channel = value >> 5 & 0x3;
@@ -123,6 +125,30 @@
 	}
 }
 
+void psg_vgm_log(psg_context *context, uint32_t master_clock, vgm_writer *vgm)
+{
+	vgm_sn76489_init(vgm, 16 * master_clock / context->clock_inc, 9, 16, 0);
+	context->vgm = vgm;
+	for (int chan = 0; chan < 4; chan++)
+	{
+		uint8_t base = chan << 5 | 0x80;
+		vgm_sn76489_write(context->vgm, context->cycles, context->volume[chan] | base | 0x10);
+		if (chan == 3) {
+			if (context->noise_use_tone) {
+				vgm_sn76489_write(context->vgm, context->cycles, 3 | base);
+			} else {
+				//0x10 = 0
+				//0x20 = 1
+				//0x40 = 2
+				vgm_sn76489_write(context->vgm, context->cycles, context->counter_load[chan] >> 5 | base);
+			}
+		} else {
+			vgm_sn76489_write(context->vgm, context->cycles, (context->counter_load[chan] & 0xF) | base);
+			vgm_sn76489_write(context->vgm, context->cycles, context->counter_load[chan] >> 4 & 0x3F);
+		}
+	}
+}
+
 void psg_serialize(psg_context *context, serialize_buffer *buf)
 {
 	save_int16(buf, context->lsfr);
--- a/psg.h	Thu Apr 18 22:06:47 2019 -0700
+++ b/psg.h	Sat Apr 18 11:42:53 2020 -0700
@@ -8,10 +8,12 @@
 
 #include <stdint.h>
 #include "serialize.h"
-#include "render.h"
+#include "render_audio.h"
+#include "vgm.h"
 
 typedef struct {
 	audio_source *audio;
+	vgm_writer   *vgm;
 	uint32_t clock_inc;
 	uint32_t cycles;
 	uint16_t lsfr;
@@ -31,6 +33,7 @@
 void psg_adjust_master_clock(psg_context * context, uint32_t master_clock);
 void psg_write(psg_context * context, uint8_t value);
 void psg_run(psg_context * context, uint32_t cycles);
+void psg_vgm_log(psg_context *context, uint32_t master_clock, vgm_writer *vgm);
 void psg_serialize(psg_context *context, serialize_buffer *buf);
 void psg_deserialize(deserialize_buffer *buf, void *vcontext);
 
--- a/render.h	Thu Apr 18 22:06:47 2019 -0700
+++ b/render.h	Sat Apr 18 11:42:53 2020 -0700
@@ -92,7 +92,6 @@
 #define RENDER_NOT_MAPPED -2
 #define RENDER_NOT_PLUGGED_IN -3
 
-typedef struct audio_source audio_source;
 typedef void (*drop_handler)(const char *filename);
 typedef void (*window_close_handler)(uint8_t which);
 typedef void (*ui_render_fun)(void);
@@ -109,9 +108,7 @@
 void render_set_video_standard(vid_std std);
 void render_toggle_fullscreen();
 void render_update_caption(char *title);
-void render_wait_quit(vdp_context * context);
-uint32_t render_audio_buffer();
-uint32_t render_sample_rate();
+void render_wait_quit(void);
 void process_events();
 int render_width();
 int render_height();
@@ -129,18 +126,11 @@
 uint32_t render_emulated_width();
 uint32_t render_emulated_height();
 uint32_t render_overscan_top();
+uint32_t render_overscan_bot();
 uint32_t render_overscan_left();
 uint32_t render_elapsed_ms(void);
 void render_sleep_ms(uint32_t delay);
 uint8_t render_has_gl(void);
-audio_source *render_audio_source(uint64_t master_clock, uint64_t sample_divider, uint8_t channels);
-void render_audio_source_gaindb(audio_source *src, float gain);
-void render_audio_adjust_clock(audio_source *src, uint64_t master_clock, uint64_t sample_divider);
-void render_put_mono_sample(audio_source *src, int16_t value);
-void render_put_stereo_sample(audio_source *src, int16_t left, int16_t right);
-void render_pause_source(audio_source *src);
-void render_resume_source(audio_source *src);
-void render_free_source(audio_source *src);
 void render_config_updated(void);
 void render_set_gl_context_handlers(ui_render_fun destroy, ui_render_fun create);
 void render_set_ui_render_fun(ui_render_fun);
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/render_audio.c	Sat Apr 18 11:42:53 2020 -0700
@@ -0,0 +1,385 @@
+#include <limits.h>
+#include <string.h>
+#include <stdlib.h>
+#include <math.h>
+#include "render_audio.h"
+#include "util.h"
+#include "config.h"
+#include "blastem.h"
+
+static uint8_t output_channels;
+static uint32_t buffer_samples, sample_rate;
+
+static audio_source *audio_sources[8];
+static audio_source *inactive_audio_sources[8];
+static uint8_t num_audio_sources;
+static uint8_t num_inactive_audio_sources;
+
+static float overall_gain_mult, *mix_buf;
+static int sample_size;
+
+typedef void (*conv_func)(float *samples, void *vstream, int sample_count);
+
+static void convert_null(float *samples, void *vstream, int sample_count)
+{
+	memset(vstream, 0, sample_count * sample_size);
+}
+
+static void convert_s16(float *samples, void *vstream, int sample_count)
+{
+	int16_t *stream = vstream;
+	for (int16_t *end = stream + sample_count; stream < end; stream++, samples++)
+	{
+		float sample = *samples;
+		int16_t out_sample;
+		if (sample >= 1.0f) {
+			out_sample = 0x7FFF;
+		} else if (sample <= -1.0f) {
+			out_sample = -0x8000;
+		} else {
+			out_sample = sample * 0x7FFF;
+		}
+		*stream = out_sample;
+	}
+}
+
+static void clamp_f32(float *samples, void *vstream, int sample_count)
+{
+	for (; sample_count > 0; sample_count--, samples++)
+	{
+		float sample = *samples;
+		if (sample > 1.0f) {
+			sample = 1.0f;
+		} else if (sample < -1.0f) {
+			sample = -1.0f;
+		}
+		*samples = sample;
+	}
+}
+
+static int32_t mix_f32(audio_source *audio, float *stream, int samples)
+{
+	float *end = stream + samples;
+	int16_t *src = audio->front;
+	uint32_t i = audio->read_start;
+	uint32_t i_end = audio->read_end;
+	float *cur = stream;
+	float gain_mult = audio->gain_mult * overall_gain_mult;
+	size_t first_add = output_channels > 1 ? 1 : 0, second_add = output_channels > 1 ? output_channels - 1 : 1;
+	if (audio->num_channels == 1) {
+		while (cur < end && i != i_end)
+		{
+			*cur += gain_mult * ((float)src[i]) / 0x7FFF;
+			cur += first_add;
+			*cur += gain_mult * ((float)src[i++]) / 0x7FFF;
+			cur += second_add;
+			i &= audio->mask;
+		}
+	} else {
+		while(cur < end && i != i_end)
+		{
+			*cur += gain_mult * ((float)src[i++]) / 0x7FFF;
+			cur += first_add;
+			*cur += gain_mult * ((float)src[i++]) / 0x7FFF;
+			cur += second_add;
+			i &= audio->mask;
+		}
+	}
+	if (!render_is_audio_sync()) {
+		audio->read_start = i;
+	}
+	if (cur != end) {
+		debug_message("Underflow of %d samples, read_start: %d, read_end: %d, mask: %X\n", (int)(end-cur)/2, audio->read_start, audio->read_end, audio->mask);
+		return (cur-end)/2;
+	} else {
+		return ((i_end - i) & audio->mask) / audio->num_channels;
+	}
+}
+
+static conv_func convert;
+
+
+int mix_and_convert(unsigned char *byte_stream, int len, int *min_remaining_out)
+{
+	int samples = len / sample_size;
+	float *mix_dest = mix_buf ? mix_buf : (float *)byte_stream;
+	memset(mix_dest, 0, samples * sizeof(float));
+	int min_buffered = INT_MAX;
+	int min_remaining_buffer = INT_MAX;
+	for (uint8_t i = 0; i < num_audio_sources; i++)
+	{
+		int buffered = mix_f32(audio_sources[i], mix_dest, samples);
+		int remaining = (audio_sources[i]->mask + 1) / audio_sources[i]->num_channels - buffered;
+		min_buffered = buffered < min_buffered ? buffered : min_buffered;
+		min_remaining_buffer = remaining < min_remaining_buffer ? remaining : min_remaining_buffer;
+		audio_sources[i]->front_populated = 0;
+		render_buffer_consumed(audio_sources[i]);
+	}
+	convert(mix_dest, byte_stream, samples);
+	if (min_remaining_out) {
+		*min_remaining_out = min_remaining_buffer;
+	}
+	return min_buffered;
+}
+
+uint8_t all_sources_ready(void)
+{
+	uint8_t num_populated = 0;
+	num_populated = 0;
+	for (uint8_t i = 0; i < num_audio_sources; i++)
+	{
+		if (audio_sources[i]->front_populated) {
+			num_populated++;
+		}
+	}
+	return num_populated == num_audio_sources;
+}
+
+#define BUFFER_INC_RES 0x40000000UL
+
+void render_audio_adjust_clock(audio_source *src, uint64_t master_clock, uint64_t sample_divider)
+{
+	src->buffer_inc = ((BUFFER_INC_RES * (uint64_t)sample_rate) / master_clock) * sample_divider;
+}
+
+void render_audio_adjust_speed(float adjust_ratio)
+{
+	for (uint8_t i = 0; i < num_audio_sources; i++)
+	{
+		audio_sources[i]->buffer_inc = ((double)audio_sources[i]->buffer_inc) + ((double)audio_sources[i]->buffer_inc) * adjust_ratio + 0.5;
+	}
+}
+
+audio_source *render_audio_source(uint64_t master_clock, uint64_t sample_divider, uint8_t channels)
+{
+	audio_source *ret = NULL;
+	uint32_t alloc_size = render_is_audio_sync() ? channels * buffer_samples : nearest_pow2(render_min_buffered() * 4 * channels);
+	render_lock_audio();
+		if (num_audio_sources < 8) {
+			ret = calloc(1, sizeof(audio_source));
+			ret->back = malloc(alloc_size * sizeof(int16_t));
+			ret->front = render_is_audio_sync() ? malloc(alloc_size * sizeof(int16_t)) : ret->back;
+			ret->front_populated = 0;
+			ret->opaque = render_new_audio_opaque();
+			ret->num_channels = channels;
+			audio_sources[num_audio_sources++] = ret;
+		}
+	render_unlock_audio();
+	if (!ret) {
+		fatal_error("Too many audio sources!");
+	} else {
+		render_audio_adjust_clock(ret, master_clock, sample_divider);
+		double lowpass_cutoff = get_lowpass_cutoff(config);
+		double rc = (1.0 / lowpass_cutoff) / (2.0 * M_PI);
+		ret->dt = 1.0 / ((double)master_clock / (double)(sample_divider));
+		double alpha = ret->dt / (ret->dt + rc);
+		ret->lowpass_alpha = (int32_t)(((double)0x10000) * alpha);
+		ret->buffer_pos = 0;
+		ret->buffer_fraction = 0;
+		ret->last_left = ret->last_right = 0;
+		ret->read_start = 0;
+		ret->read_end = render_is_audio_sync() ? buffer_samples * channels : 0;
+		ret->mask = render_is_audio_sync() ? 0xFFFFFFFF : alloc_size-1;
+		ret->gain_mult = 1.0f;
+	}
+	render_audio_created(ret);
+	
+	return ret;
+}
+
+
+static float db_to_mult(float gain)
+{
+	return powf(10.0f, gain/20.0f);
+}
+
+void render_audio_source_gaindb(audio_source *src, float gain)
+{
+	src->gain_mult = db_to_mult(gain);
+}
+
+void render_pause_source(audio_source *src)
+{
+	uint8_t found = 0, remaining_sources;
+	render_lock_audio();
+		for (uint8_t i = 0; i < num_audio_sources; i++)
+		{
+			if (audio_sources[i] == src) {
+				audio_sources[i] = audio_sources[--num_audio_sources];
+				found = 1;
+				remaining_sources = num_audio_sources;
+				break;
+			}
+		}
+		
+	render_unlock_audio();
+	if (found) {
+		render_source_paused(src, remaining_sources);
+	}
+	inactive_audio_sources[num_inactive_audio_sources++] = src;
+}
+
+void render_resume_source(audio_source *src)
+{
+	render_lock_audio();
+		if (num_audio_sources < 8) {
+			audio_sources[num_audio_sources++] = src;
+		}
+	render_unlock_audio();
+	for (uint8_t i = 0; i < num_inactive_audio_sources; i++)
+	{
+		if (inactive_audio_sources[i] == src) {
+			inactive_audio_sources[i] = inactive_audio_sources[--num_inactive_audio_sources];
+		}
+	}
+	render_source_resumed(src);
+}
+
+void render_free_source(audio_source *src)
+{
+	uint8_t found = 0;
+	for (uint8_t i = 0; i < num_inactive_audio_sources; i++)
+	{
+		if (inactive_audio_sources[i] == src) {
+			inactive_audio_sources[i] = inactive_audio_sources[--num_inactive_audio_sources];
+			found = 1;
+			break;
+		}
+	}
+	if (!found) {
+		render_pause_source(src);
+		num_inactive_audio_sources--;
+	}
+	
+	free(src->front);
+	if (render_is_audio_sync()) {
+		free(src->back);
+		render_free_audio_opaque(src->opaque);
+	}
+	free(src);
+}
+
+static int16_t lowpass_sample(audio_source *src, int16_t last, int16_t current)
+{
+	int32_t tmp = current * src->lowpass_alpha + last * (0x10000 - src->lowpass_alpha);
+	current = tmp >> 16;
+	return current;
+}
+
+static void interp_sample(audio_source *src, int16_t last, int16_t current)
+{
+	int64_t tmp = last * ((src->buffer_fraction << 16) / src->buffer_inc);
+	tmp += current * (0x10000 - ((src->buffer_fraction << 16) / src->buffer_inc));
+	src->back[src->buffer_pos++] = tmp >> 16;
+}
+
+static uint32_t sync_samples;
+void render_put_mono_sample(audio_source *src, int16_t value)
+{
+	value = lowpass_sample(src, src->last_left, value);
+	src->buffer_fraction += src->buffer_inc;
+	uint32_t base = render_is_audio_sync() ? 0 : src->read_end;
+	while (src->buffer_fraction > BUFFER_INC_RES)
+	{
+		src->buffer_fraction -= BUFFER_INC_RES;
+		interp_sample(src, src->last_left, value);
+		
+		if (((src->buffer_pos - base) & src->mask) >= sync_samples) {
+			render_do_audio_ready(src);
+		}
+		src->buffer_pos &= src->mask;
+	}
+	src->last_left = value;
+}
+
+void render_put_stereo_sample(audio_source *src, int16_t left, int16_t right)
+{
+	left = lowpass_sample(src, src->last_left, left);
+	right = lowpass_sample(src, src->last_right, right);
+	src->buffer_fraction += src->buffer_inc;
+	uint32_t base = render_is_audio_sync() ? 0 : src->read_end;
+	while (src->buffer_fraction > BUFFER_INC_RES)
+	{
+		src->buffer_fraction -= BUFFER_INC_RES;
+		
+		interp_sample(src, src->last_left, left);
+		interp_sample(src, src->last_right, right);
+		
+		if (((src->buffer_pos - base) & src->mask)/2 >= sync_samples) {
+			render_do_audio_ready(src);
+		}
+		src->buffer_pos &= src->mask;
+	}
+	src->last_left = left;
+	src->last_right = right;
+}
+
+static void update_source(audio_source *src, double rc, uint8_t sync_changed)
+{
+	double alpha = src->dt / (src->dt + rc);
+	int32_t lowpass_alpha = (int32_t)(((double)0x10000) * alpha);
+	src->lowpass_alpha = lowpass_alpha;
+	if (sync_changed) {
+		uint32_t alloc_size = render_is_audio_sync() ? src->num_channels * buffer_samples : nearest_pow2(render_min_buffered() * 4 * src->num_channels);
+		src->back = realloc(src->back, alloc_size * sizeof(int16_t));
+		if (render_is_audio_sync()) {
+			src->front = malloc(alloc_size * sizeof(int16_t));
+		} else {
+			free(src->front);
+			src->front = src->back;
+		}
+		src->mask = render_is_audio_sync() ? 0xFFFFFFFF : alloc_size-1;
+		src->read_start = 0;
+		src->read_end = render_is_audio_sync() ? buffer_samples * src->num_channels : 0;
+		src->buffer_pos = 0;
+	}
+}
+
+uint8_t old_audio_sync;
+void render_audio_initialized(render_audio_format format, uint32_t rate, uint8_t channels, uint32_t buffer_size, int sample_size_in)
+{
+	sample_rate = rate;
+	output_channels = channels;
+	buffer_samples = buffer_size;
+	sample_size = sample_size_in;
+	if (mix_buf) {
+		free(mix_buf);
+		mix_buf = NULL;
+	}
+	switch(format)
+	{
+	case RENDER_AUDIO_S16:
+		convert = convert_s16;
+		mix_buf = calloc(output_channels * buffer_samples, sizeof(float));
+		break;
+	case RENDER_AUDIO_FLOAT:
+		convert = clamp_f32;
+		break;
+	case RENDER_AUDIO_UNKNOWN:
+		convert = convert_null;
+		mix_buf = calloc(output_channels * buffer_samples, sizeof(float));
+		break;
+	}
+	uint32_t syncs = render_audio_syncs_per_sec();
+	if (syncs) {
+		sync_samples = rate / syncs;
+	} else {
+		sync_samples = buffer_samples;
+	}
+	char * gain_str = tern_find_path(config, "audio\0gain\0", TVAL_PTR).ptrval;
+	overall_gain_mult = db_to_mult(gain_str ? atof(gain_str) : 0.0f);
+	uint8_t sync_changed = old_audio_sync != render_is_audio_sync();
+	old_audio_sync = render_is_audio_sync();
+	double lowpass_cutoff = get_lowpass_cutoff(config);
+	double rc = (1.0 / lowpass_cutoff) / (2.0 * M_PI);
+	render_lock_audio();
+		for (uint8_t i = 0; i < num_audio_sources; i++)
+		{
+			update_source(audio_sources[i], rc, sync_changed);
+		}
+	render_unlock_audio();
+	for (uint8_t i = 0; i < num_inactive_audio_sources; i++)
+	{
+		update_source(inactive_audio_sources[i], rc, sync_changed);
+	}
+}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/render_audio.h	Sat Apr 18 11:42:53 2020 -0700
@@ -0,0 +1,57 @@
+#ifndef RENDER_AUDIO_H_
+#define RENDER_AUDIO_H_
+
+#include <stdint.h>
+typedef enum {
+	RENDER_AUDIO_S16,
+	RENDER_AUDIO_FLOAT,
+	RENDER_AUDIO_UNKNOWN
+} render_audio_format;
+
+typedef struct {
+	void     *opaque;
+	int16_t  *front;
+	int16_t  *back;
+	double   dt;
+	uint64_t buffer_fraction;
+	uint64_t buffer_inc;
+	float    gain_mult;
+	uint32_t buffer_pos;
+	uint32_t read_start;
+	uint32_t read_end;
+	uint32_t lowpass_alpha;
+	uint32_t mask;
+	int16_t  last_left;
+	int16_t  last_right;
+	uint8_t  num_channels;
+	uint8_t  front_populated;
+} audio_source;
+
+//public interface
+audio_source *render_audio_source(uint64_t master_clock, uint64_t sample_divider, uint8_t channels);
+void render_audio_source_gaindb(audio_source *src, float gain);
+void render_audio_adjust_clock(audio_source *src, uint64_t master_clock, uint64_t sample_divider);
+void render_put_mono_sample(audio_source *src, int16_t value);
+void render_put_stereo_sample(audio_source *src, int16_t left, int16_t right);
+void render_pause_source(audio_source *src);
+void render_resume_source(audio_source *src);
+void render_free_source(audio_source *src);
+//interface for render backends
+void render_audio_initialized(render_audio_format format, uint32_t rate, uint8_t channels, uint32_t buffer_size, int sample_size);
+int mix_and_convert(unsigned char *byte_stream, int len, int *min_remaining_out);
+uint8_t all_sources_ready(void);
+void render_audio_adjust_speed(float adjust_ratio);
+//to be implemented by render backend
+uint8_t render_is_audio_sync(void);
+void render_buffer_consumed(audio_source *src);
+void *render_new_audio_opaque(void);
+void render_free_audio_opaque(void *opaque);
+void render_lock_audio(void);
+void render_unlock_audio(void);
+uint32_t render_min_buffered(void);
+uint32_t render_audio_syncs_per_sec(void);
+void render_audio_created(audio_source *src);
+void render_do_audio_ready(audio_source *src);
+void render_source_paused(audio_source *src, uint8_t remaining_sources);
+void render_source_resumed(audio_source *src);
+#endif //RENDER_AUDIO_H_
--- a/render_sdl.c	Thu Apr 18 22:06:47 2019 -0700
+++ b/render_sdl.c	Sat Apr 18 11:42:53 2020 -0700
@@ -46,151 +46,41 @@
 
 static uint32_t last_frame = 0;
 
-static uint8_t output_channels;
-static uint32_t buffer_samples, sample_rate;
-static uint32_t missing_count;
-
 static SDL_mutex * audio_mutex;
 static SDL_cond * audio_ready;
 static uint8_t quitting = 0;
 
-struct audio_source {
-	SDL_cond *cond;
-	int16_t  *front;
-	int16_t  *back;
-	double   dt;
-	uint64_t buffer_fraction;
-	uint64_t buffer_inc;
-	float    gain_mult;
-	uint32_t buffer_pos;
-	uint32_t read_start;
-	uint32_t read_end;
-	uint32_t lowpass_alpha;
-	uint32_t mask;
-	int16_t  last_left;
-	int16_t  last_right;
-	uint8_t  num_channels;
-	uint8_t  front_populated;
-};
-
-static audio_source *audio_sources[8];
-static audio_source *inactive_audio_sources[8];
-static uint8_t num_audio_sources;
-static uint8_t num_inactive_audio_sources;
 static uint8_t sync_to_audio;
 static uint32_t min_buffered;
-static float overall_gain_mult, *mix_buf;
-static int sample_size;
 
-typedef void (*conv_func)(float *samples, void *vstream, int sample_count);
-
-static void convert_null(float *samples, void *vstream, int sample_count)
+uint32_t render_min_buffered(void)
 {
-	memset(vstream, 0, sample_count * sample_size);
+	return min_buffered;
 }
 
-static void convert_s16(float *samples, void *vstream, int sample_count)
+uint8_t render_is_audio_sync(void)
 {
-	int16_t *stream = vstream;
-	for (int16_t *end = stream + sample_count; stream < end; stream++, samples++)
-	{
-		float sample = *samples;
-		int16_t out_sample;
-		if (sample >= 1.0f) {
-			out_sample = 0x7FFF;
-		} else if (sample <= -1.0f) {
-			out_sample = -0x8000;
-		} else {
-			out_sample = sample * 0x7FFF;
-		}
-		*stream = out_sample;
-	}
+	return sync_to_audio;
 }
 
-static void clamp_f32(float *samples, void *vstream, int sample_count)
+void render_buffer_consumed(audio_source *src)
 {
-	for (; sample_count > 0; sample_count--, samples++)
-	{
-		float sample = *samples;
-		if (sample > 1.0f) {
-			sample = 1.0f;
-		} else if (sample < -1.0f) {
-			sample = -1.0f;
-		}
-		*samples = sample;
-	}
+	SDL_CondSignal(src->opaque);
 }
 
-static int32_t mix_f32(audio_source *audio, float *stream, int samples)
-{
-	float *end = stream + samples;
-	int16_t *src = audio->front;
-	uint32_t i = audio->read_start;
-	uint32_t i_end = audio->read_end;
-	float *cur = stream;
-	float gain_mult = audio->gain_mult * overall_gain_mult;
-	size_t first_add = output_channels > 1 ? 1 : 0, second_add = output_channels > 1 ? output_channels - 1 : 1;
-	if (audio->num_channels == 1) {
-		while (cur < end && i != i_end)
-		{
-			*cur += gain_mult * ((float)src[i]) / 0x7FFF;
-			cur += first_add;
-			*cur += gain_mult * ((float)src[i++]) / 0x7FFF;
-			cur += second_add;
-			i &= audio->mask;
-		}
-	} else {
-		while(cur < end && i != i_end)
-		{
-			*cur += gain_mult * ((float)src[i++]) / 0x7FFF;
-			cur += first_add;
-			*cur += gain_mult * ((float)src[i++]) / 0x7FFF;
-			cur += second_add;
-			i &= audio->mask;
-		}
-	}
-	if (!sync_to_audio) {
-		audio->read_start = i;
-	}
-	if (cur != end) {
-		debug_message("Underflow of %d samples, read_start: %d, read_end: %d, mask: %X\n", (int)(end-cur)/2, audio->read_start, audio->read_end, audio->mask);
-		return (cur-end)/2;
-	} else {
-		return ((i_end - i) & audio->mask) / audio->num_channels;
-	}
-}
-
-static conv_func convert;
-
 static void audio_callback(void * userdata, uint8_t *byte_stream, int len)
 {
-	uint8_t num_populated;
 	SDL_LockMutex(audio_mutex);
+		uint8_t all_ready;
 		do {
-			num_populated = 0;
-			for (uint8_t i = 0; i < num_audio_sources; i++)
-			{
-				if (audio_sources[i]->front_populated) {
-					num_populated++;
-				}
-			}
-			if (!quitting && num_populated < num_audio_sources) {
-				fflush(stdout);
+			all_ready = all_sources_ready();
+			if (!quitting && !all_ready) {
 				SDL_CondWait(audio_ready, audio_mutex);
 			}
-		} while(!quitting && num_populated < num_audio_sources);
-		int samples = len / sample_size;
-		float *mix_dest = mix_buf ? mix_buf : (float *)byte_stream;
-		memset(mix_dest, 0, samples * sizeof(float));
+		} while(!quitting && !all_ready);
 		if (!quitting) {
-			for (uint8_t i = 0; i < num_audio_sources; i++)
-			{
-				mix_f32(audio_sources[i], mix_dest, samples);
-				audio_sources[i]->front_populated = 0;
-				SDL_CondSignal(audio_sources[i]->cond);
-			}
+			mix_and_convert(byte_stream, len, NULL);
 		}
-		convert(mix_dest, byte_stream, samples);
 	SDL_UnlockMutex(audio_mutex);
 }
 
@@ -208,23 +98,10 @@
 		//underflow last frame, but main thread hasn't gotten a chance to call SDL_PauseAudio yet
 		return;
 	}
-	cur_min_buffered = 0x7FFFFFFF;
-	min_remaining_buffer = 0xFFFFFFFF;
-	float *mix_dest = mix_buf ? mix_buf : (float *)byte_stream;	
-	int samples = len / sample_size;
-	memset(mix_dest, 0, samples * sizeof(float));
-	for (uint8_t i = 0; i < num_audio_sources; i++)
-	{
-		
-		int32_t buffered = mix_f32(audio_sources[i], mix_dest, samples);
-		cur_min_buffered = buffered < cur_min_buffered ? buffered : cur_min_buffered;
-		uint32_t remaining = (audio_sources[i]->mask + 1)/audio_sources[i]->num_channels - buffered;
-		min_remaining_buffer = remaining < min_remaining_buffer ? remaining : min_remaining_buffer;
-	}
-	convert(mix_dest, byte_stream, samples);
+	cur_min_buffered = mix_and_convert(byte_stream, len, &min_remaining_buffer);
 }
 
-static void lock_audio()
+void render_lock_audio()
 {
 	if (sync_to_audio) {
 		SDL_LockMutex(audio_mutex);
@@ -233,7 +110,7 @@
 	}
 }
 
-static void unlock_audio()
+void render_unlock_audio()
 {
 	if (sync_to_audio) {
 		SDL_UnlockMutex(audio_mutex);
@@ -249,127 +126,55 @@
 		SDL_CondSignal(audio_ready);
 	SDL_UnlockMutex(audio_mutex);
 	SDL_CloseAudio();
+	/*
+	FIXME: move this to render_audio.c
 	if (mix_buf) {
 		free(mix_buf);
 		mix_buf = NULL;
 	}
+	*/
 }
 
-#define BUFFER_INC_RES 0x40000000UL
-
-void render_audio_adjust_clock(audio_source *src, uint64_t master_clock, uint64_t sample_divider)
+void *render_new_audio_opaque(void)
 {
-	src->buffer_inc = ((BUFFER_INC_RES * (uint64_t)sample_rate) / master_clock) * sample_divider;
+	return SDL_CreateCond();
 }
 
-audio_source *render_audio_source(uint64_t master_clock, uint64_t sample_divider, uint8_t channels)
+void render_free_audio_opaque(void *opaque)
 {
-	audio_source *ret = NULL;
-	uint32_t alloc_size = sync_to_audio ? channels * buffer_samples : nearest_pow2(min_buffered * 4 * channels);
-	lock_audio();
-		if (num_audio_sources < 8) {
-			ret = malloc(sizeof(audio_source));
-			ret->back = malloc(alloc_size * sizeof(int16_t));
-			ret->front = sync_to_audio ? malloc(alloc_size * sizeof(int16_t)) : ret->back;
-			ret->front_populated = 0;
-			ret->cond = SDL_CreateCond();
-			ret->num_channels = channels;
-			audio_sources[num_audio_sources++] = ret;
-		}
-	unlock_audio();
-	if (!ret) {
-		fatal_error("Too many audio sources!");
-	} else {
-		render_audio_adjust_clock(ret, master_clock, sample_divider);
-		double lowpass_cutoff = get_lowpass_cutoff(config);
-		double rc = (1.0 / lowpass_cutoff) / (2.0 * M_PI);
-		ret->dt = 1.0 / ((double)master_clock / (double)(sample_divider));
-		double alpha = ret->dt / (ret->dt + rc);
-		ret->lowpass_alpha = (int32_t)(((double)0x10000) * alpha);
-		ret->buffer_pos = 0;
-		ret->buffer_fraction = 0;
-		ret->last_left = ret->last_right = 0;
-		ret->read_start = 0;
-		ret->read_end = sync_to_audio ? buffer_samples * channels : 0;
-		ret->mask = sync_to_audio ? 0xFFFFFFFF : alloc_size-1;
-		ret->gain_mult = 1.0f;
-	}
+	SDL_DestroyCond(opaque);
+}
+
+void render_audio_created(audio_source *source)
+{
 	if (sync_to_audio && SDL_GetAudioStatus() == SDL_AUDIO_PAUSED) {
 		SDL_PauseAudio(0);
 	}
-	return ret;
 }
 
-static float db_to_mult(float gain)
+void render_source_paused(audio_source *src, uint8_t remaining_sources)
 {
-	return powf(10.0f, gain/20.0f);
+	if (sync_to_audio) {
+		SDL_CondSignal(audio_ready);
+	}
+	if (!remaining_sources) {
+		SDL_PauseAudio(0);
+	}
 }
 
-void render_audio_source_gaindb(audio_source *src, float gain)
+void render_source_resumed(audio_source *src)
 {
-	src->gain_mult = db_to_mult(gain);
-}
-
-void render_pause_source(audio_source *src)
-{
-	uint8_t need_pause = 0;
-	lock_audio();
-		for (uint8_t i = 0; i < num_audio_sources; i++)
-		{
-			if (audio_sources[i] == src) {
-				audio_sources[i] = audio_sources[--num_audio_sources];
-				if (sync_to_audio) {
-					SDL_CondSignal(audio_ready);
-				}
-				break;
-			}
-		}
-		if (!num_audio_sources) {
-			need_pause = 1;
-		}
-	unlock_audio();
-	if (need_pause) {
-		SDL_PauseAudio(1);
-	}
-	inactive_audio_sources[num_inactive_audio_sources++] = src;
-}
-
-void render_resume_source(audio_source *src)
-{
-	lock_audio();
-		if (num_audio_sources < 8) {
-			audio_sources[num_audio_sources++] = src;
-		}
-	unlock_audio();
-	for (uint8_t i = 0; i < num_inactive_audio_sources; i++)
-	{
-		if (inactive_audio_sources[i] == src) {
-			inactive_audio_sources[i] = inactive_audio_sources[--num_inactive_audio_sources];
-		}
-	}
 	if (sync_to_audio) {
 		SDL_PauseAudio(0);
 	}
 }
 
-void render_free_source(audio_source *src)
-{
-	render_pause_source(src);
-	
-	free(src->front);
-	if (sync_to_audio) {
-		free(src->back);
-		SDL_DestroyCond(src->cond);
-	}
-	free(src);
-}
-static uint32_t sync_samples;
-static void do_audio_ready(audio_source *src)
+void render_do_audio_ready(audio_source *src)
 {
 	if (sync_to_audio) {
 		SDL_LockMutex(audio_mutex);
 			while (src->front_populated) {
-				SDL_CondWait(src->cond, audio_mutex);
+				SDL_CondWait(src->opaque, audio_mutex);
 			}
 			int16_t *tmp = src->front;
 			src->front = src->back;
@@ -390,62 +195,9 @@
 	}
 }
 
-static int16_t lowpass_sample(audio_source *src, int16_t last, int16_t current)
-{
-	int32_t tmp = current * src->lowpass_alpha + last * (0x10000 - src->lowpass_alpha);
-	current = tmp >> 16;
-	return current;
-}
-
-static void interp_sample(audio_source *src, int16_t last, int16_t current)
-{
-	int64_t tmp = last * ((src->buffer_fraction << 16) / src->buffer_inc);
-	tmp += current * (0x10000 - ((src->buffer_fraction << 16) / src->buffer_inc));
-	src->back[src->buffer_pos++] = tmp >> 16;
-}
-
-void render_put_mono_sample(audio_source *src, int16_t value)
-{
-	value = lowpass_sample(src, src->last_left, value);
-	src->buffer_fraction += src->buffer_inc;
-	uint32_t base = sync_to_audio ? 0 : src->read_end;
-	while (src->buffer_fraction > BUFFER_INC_RES)
-	{
-		src->buffer_fraction -= BUFFER_INC_RES;
-		interp_sample(src, src->last_left, value);
-		
-		if (((src->buffer_pos - base) & src->mask) >= sync_samples) {
-			do_audio_ready(src);
-		}
-		src->buffer_pos &= src->mask;
-	}
-	src->last_left = value;
-}
-
-void render_put_stereo_sample(audio_source *src, int16_t left, int16_t right)
-{
-	left = lowpass_sample(src, src->last_left, left);
-	right = lowpass_sample(src, src->last_right, right);
-	src->buffer_fraction += src->buffer_inc;
-	uint32_t base = sync_to_audio ? 0 : src->read_end;
-	while (src->buffer_fraction > BUFFER_INC_RES)
-	{
-		src->buffer_fraction -= BUFFER_INC_RES;
-		
-		interp_sample(src, src->last_left, left);
-		interp_sample(src, src->last_right, right);
-		
-		if (((src->buffer_pos - base) & src->mask)/2 >= sync_samples) {
-			do_audio_ready(src);
-		}
-		src->buffer_pos &= src->mask;
-	}
-	src->last_left = left;
-	src->last_right = right;
-}
-
 static SDL_Joystick * joysticks[MAX_JOYSTICKS];
 static int joystick_sdl_index[MAX_JOYSTICKS];
+static uint8_t joystick_index_locked[MAX_JOYSTICKS];
 
 int render_width()
 {
@@ -635,12 +387,11 @@
 	if (texture_init) {
 		return;
 	}
-	sdl_textures= malloc(sizeof(SDL_Texture *) * 2);
-	num_textures = 2;
+	sdl_textures= calloc(sizeof(SDL_Texture *), 3);
+	num_textures = 3;
 	texture_init = 1;
 #ifndef DISABLE_OPENGL
 	if (render_gl) {
-		sdl_textures[0] = sdl_textures[1] = NULL;
 		gl_setup();
 	} else {
 #endif
@@ -889,6 +640,16 @@
 	return -1;
 }
 
+static int lowest_unlocked_joystick_index(void)
+{
+	for (int i = 0; i < MAX_JOYSTICKS; i++) {
+		if (!joystick_index_locked[i]) {
+			return i;
+		}
+	}
+	return -1;
+}
+
 SDL_Joystick *render_get_joystick(int index)
 {
 	if (index >= MAX_JOYSTICKS) {
@@ -910,12 +671,29 @@
 
 SDL_GameController *render_get_controller(int index)
 {
-	if (index >= MAX_JOYSTICKS) {
+	if (index >= MAX_JOYSTICKS || !joysticks[index]) {
 		return NULL;
 	}
 	return SDL_GameControllerOpen(joystick_sdl_index[index]);
 }
 
+static uint8_t gc_events_enabled;
+static SDL_GameController *controllers[MAX_JOYSTICKS];
+void render_enable_gamepad_events(uint8_t enabled)
+{
+	if (enabled != gc_events_enabled) {
+		gc_events_enabled = enabled;
+		for (int i = 0; i < MAX_JOYSTICKS; i++) {
+			if (enabled) {
+				controllers[i] = render_get_controller(i);
+			} else if (controllers[i]) {
+				SDL_GameControllerClose(controllers[i]);
+				controllers[i] = NULL;
+			}
+		}
+	}
+}
+
 static uint32_t overscan_top[NUM_VID_STD] = {2, 21};
 static uint32_t overscan_bot[NUM_VID_STD] = {1, 17};
 static uint32_t overscan_left[NUM_VID_STD] = {13, 13};
@@ -923,6 +701,30 @@
 static vid_std video_standard = VID_NTSC;
 static uint8_t need_ui_fb_resize;
 
+int lock_joystick_index(int joystick, int desired_index)
+{
+	if (desired_index < 0) {
+		desired_index = lowest_unlocked_joystick_index();
+		if (desired_index < 0 || desired_index >= joystick) {
+			return joystick;
+		}
+	}
+	SDL_Joystick *tmp_joy = joysticks[joystick];
+	int tmp_index = joystick_sdl_index[joystick];
+	joysticks[joystick] = joysticks[desired_index];
+	joystick_sdl_index[joystick] = joystick_sdl_index[desired_index];
+	joystick_index_locked[joystick] = joystick_sdl_index[desired_index];
+	joysticks[desired_index] = tmp_joy;
+	joystick_sdl_index[desired_index] = tmp_index;
+	joystick_index_locked[desired_index] = 1;
+	//update bindings as the controllers being swapped may have different mappings
+	handle_joy_added(desired_index);
+	if (joysticks[joystick]) {
+		handle_joy_added(joystick);
+	}
+	return desired_index;
+}
+
 static int32_t handle_event(SDL_Event *event)
 {
 	if (custom_event_handler) {
@@ -939,13 +741,13 @@
 		handle_joydown(find_joystick_index(event->jbutton.which), event->jbutton.button);
 		break;
 	case SDL_JOYBUTTONUP:
-		handle_joyup(find_joystick_index(event->jbutton.which), event->jbutton.button);
+		handle_joyup(lock_joystick_index(find_joystick_index(event->jbutton.which), -1), event->jbutton.button);
 		break;
 	case SDL_JOYHATMOTION:
-		handle_joy_dpad(find_joystick_index(event->jhat.which), event->jhat.hat, event->jhat.value);
+		handle_joy_dpad(lock_joystick_index(find_joystick_index(event->jhat.which), -1), event->jhat.hat, event->jhat.value);
 		break;
 	case SDL_JOYAXISMOTION:
-		handle_joy_axis(find_joystick_index(event->jaxis.which), event->jaxis.axis, event->jaxis.value);
+		handle_joy_axis(lock_joystick_index(find_joystick_index(event->jaxis.which), -1), event->jaxis.axis, event->jaxis.value);
 		break;
 	case SDL_JOYDEVICEADDED:
 		if (event->jdevice.which < MAX_JOYSTICKS) {
@@ -953,6 +755,10 @@
 			if (index >= 0) {
 				SDL_Joystick * joy = joysticks[index] = SDL_JoystickOpen(event->jdevice.which);
 				joystick_sdl_index[index] = event->jdevice.which;
+				joystick_index_locked[index] = 0;
+				if (gc_events_enabled) {
+					controllers[index] = SDL_GameControllerOpen(event->jdevice.which);
+				}
 				if (joy) {
 					debug_message("Joystick %d added: %s\n", index, SDL_JoystickName(joy));
 					debug_message("\tNum Axes: %d\n\tNum Buttons: %d\n\tNum Hats: %d\n", SDL_JoystickNumAxes(joy), SDL_JoystickNumButtons(joy), SDL_JoystickNumHats(joy));
@@ -966,6 +772,10 @@
 		if (index >= 0) {
 			SDL_JoystickClose(joysticks[index]);
 			joysticks[index] = NULL;
+			if (controllers[index]) {
+				SDL_GameControllerClose(controllers[index]);
+				controllers[index] = NULL;
+			}
 			debug_message("Joystick %d removed\n", index);
 		} else {
 			debug_message("Failed to find removed joystick with instance ID: %d\n", index);
@@ -1053,6 +863,7 @@
 static int source_frame_count;
 static int frame_repeat[60];
 
+static uint32_t sample_rate;
 static void init_audio()
 {
 	SDL_AudioSpec desired, actual;
@@ -1077,27 +888,20 @@
 	if (SDL_OpenAudio(&desired, &actual) < 0) {
 		fatal_error("Unable to open SDL audio: %s\n", SDL_GetError());
 	}
-	buffer_samples = actual.samples;
 	sample_rate = actual.freq;
-	output_channels = actual.channels;
 	debug_message("Initialized audio at frequency %d with a %d sample buffer, ", actual.freq, actual.samples);
-	sample_size = SDL_AUDIO_BITSIZE(actual.format) / 8;
+	render_audio_format format = RENDER_AUDIO_UNKNOWN;
 	if (actual.format == AUDIO_S16SYS) {
 		debug_message("signed 16-bit int format\n");
-		convert = convert_s16;
-		mix_buf = calloc(output_channels * buffer_samples, sizeof(float));
+		format = RENDER_AUDIO_S16;
 	} else if (actual.format == AUDIO_F32SYS) {
 		debug_message("32-bit float format\n");
-		convert = clamp_f32;
-		mix_buf = NULL;
+		format = RENDER_AUDIO_FLOAT;
 	} else {
 		debug_message("unsupported format %X\n", actual.format);
 		warning("Unsupported audio sample format: %X\n", actual.format);
-		convert = convert_null;
-		mix_buf = calloc(output_channels * buffer_samples, sizeof(float));
 	}
-	char * gain_str = tern_find_path(config, "audio\0gain\0", TVAL_PTR).ptrval;
-	overall_gain_mult = db_to_mult(gain_str ? atof(gain_str) : 0.0f);
+	render_audio_initialized(format, actual.freq, actual.channels, actual.samples, SDL_AUDIO_BITSIZE(actual.format) / 8);
 }
 
 void window_setup(void)
@@ -1292,26 +1096,6 @@
 }
 #include<unistd.h>
 static int in_toggle;
-static void update_source(audio_source *src, double rc, uint8_t sync_changed)
-{
-	double alpha = src->dt / (src->dt + rc);
-	int32_t lowpass_alpha = (int32_t)(((double)0x10000) * alpha);
-	src->lowpass_alpha = lowpass_alpha;
-	if (sync_changed) {
-		uint32_t alloc_size = sync_to_audio ? src->num_channels * buffer_samples : nearest_pow2(min_buffered * 4 * src->num_channels);
-		src->back = realloc(src->back, alloc_size * sizeof(int16_t));
-		if (sync_to_audio) {
-			src->front = malloc(alloc_size * sizeof(int16_t));
-		} else {
-			free(src->front);
-			src->front = src->back;
-		}
-		src->mask = sync_to_audio ? 0xFFFFFFFF : alloc_size-1;
-		src->read_start = 0;
-		src->read_end = sync_to_audio ? buffer_samples * src->num_channels : 0;
-		src->buffer_pos = 0;
-	}
-}
 
 void render_config_updated(void)
 {
@@ -1378,18 +1162,6 @@
 	init_audio();
 	render_set_video_standard(video_standard);
 	
-	double lowpass_cutoff = get_lowpass_cutoff(config);
-	double rc = (1.0 / lowpass_cutoff) / (2.0 * M_PI);
-	lock_audio();
-		for (uint8_t i = 0; i < num_audio_sources; i++)
-		{
-			update_source(audio_sources[i], rc, old_sync_to_audio != sync_to_audio);
-		}
-	unlock_audio();
-	for (uint8_t i = 0; i < num_inactive_audio_sources; i++)
-	{
-		update_source(inactive_audio_sources[i], rc, old_sync_to_audio != sync_to_audio);
-	}
 	drain_events();
 	in_toggle = 0;
 	if (!was_paused) {
@@ -1402,6 +1174,12 @@
 	return main_window;
 }
 
+uint32_t render_audio_syncs_per_sec(void)
+{
+	//sync samples with audio thread approximately every 8 lines when doing sync to video
+	return sync_to_audio ? 0 : source_hz * (video_standard == VID_PAL ? 313 : 262) / 8;
+}
+
 void render_set_video_standard(vid_std std)
 {
 	video_standard = std;
@@ -1431,8 +1209,6 @@
 	}
 	source_frame = 0;
 	source_frame_count = frame_repeat[0];
-	//sync samples with audio thread approximately every 8 lines
-	sync_samples = sync_to_audio ? buffer_samples : 8 * sample_rate / (source_hz * (std == VID_PAL ? 313 : 262));
 	max_repeat++;
 	min_buffered = (((float)max_repeat * (float)sample_rate/(float)source_hz)/* / (float)buffer_samples*/);// + 0.9999;
 	//min_buffered *= buffer_samples;
@@ -1521,13 +1297,6 @@
 		return texture_buf;
 	} else {
 #endif
-		if (which == FRAMEBUFFER_UI && which >= num_textures) {
-			sdl_textures = realloc(sdl_textures, sizeof(*sdl_textures) * (FRAMEBUFFER_UI + 1));
-			for (; num_textures <= FRAMEBUFFER_UI; num_textures++)
-			{
-				sdl_textures[num_textures] = NULL;
-			}
-		}
 		if (which == FRAMEBUFFER_UI && !sdl_textures[which]) {
 			sdl_textures[which] = SDL_CreateTexture(main_renderer, SDL_PIXELFORMAT_ARGB8888, SDL_TEXTUREACCESS_STREAMING, main_width, main_height);
 		}
@@ -1593,7 +1362,7 @@
 #ifndef DISABLE_ZLIB
 			ext = path_extension(screenshot_path);
 #endif
-			info_message("Saving screenshot to %s\n", screenshot_path);
+			debug_message("Saving screenshot to %s\n", screenshot_path);
 		} else {
 			warning("Failed to open screenshot file %s for writing\n", screenshot_path);
 		}
@@ -1748,10 +1517,8 @@
 		}
 		if (adjust_ratio != 0.0f) {
 			average_change = 0;
-			for (uint8_t i = 0; i < num_audio_sources; i++)
-			{
-				audio_sources[i]->buffer_inc = ((double)audio_sources[i]->buffer_inc) + ((double)audio_sources[i]->buffer_inc) * adjust_ratio + 0.5;
-			}
+			render_audio_adjust_speed(adjust_ratio);
+			
 		}
 		while (source_frame_count > 0)
 		{
@@ -1849,7 +1616,12 @@
 	return overscan_top[video_standard];
 }
 
-void render_wait_quit(vdp_context * context)
+uint32_t render_overscan_bot()
+{
+	return overscan_bot[video_standard];
+}
+
+void render_wait_quit(void)
 {
 	SDL_Event event;
 	while(SDL_WaitEvent(&event)) {
@@ -2017,16 +1789,6 @@
 	need_ui_fb_resize = 1;
 }
 
-uint32_t render_audio_buffer()
-{
-	return buffer_samples;
-}
-
-uint32_t render_sample_rate()
-{
-	return sample_rate;
-}
-
 void render_errorbox(char *title, char *message)
 {
 	SDL_ShowSimpleMessageBox(SDL_MESSAGEBOX_ERROR, title, message, NULL);
--- a/render_sdl.h	Thu Apr 18 22:06:47 2019 -0700
+++ b/render_sdl.h	Sat Apr 18 11:42:53 2020 -0700
@@ -11,5 +11,6 @@
 SDL_GameController *render_get_controller(int index);
 int render_lookup_button(char *name);
 int render_lookup_axis(char *name);
+void render_enable_gamepad_events(uint8_t enabled);
 
 #endif //RENDER_SDL_H_
--- a/romdb.c	Thu Apr 18 22:06:47 2019 -0700
+++ b/romdb.c	Sat Apr 18 11:42:53 2020 -0700
@@ -77,6 +77,7 @@
 	switch(gen->mapper_type)
 	{
 	case MAPPER_SEGA:
+	case MAPPER_SEGA_SRAM:
 		sega_mapper_serialize(gen, buf);
 		break;
 	case MAPPER_REALTEC:
@@ -96,13 +97,14 @@
 {
 	genesis_context *gen = vcontext;
 	uint8_t mapper_type = load_int8(buf);
-	if (mapper_type != gen->mapper_type) {
-		warning("Mapper type mismatch, skipping load of mapper state");
+	if (mapper_type != gen->mapper_type && (mapper_type != MAPPER_SEGA || gen->mapper_type != MAPPER_SEGA_SRAM)) {
+		warning("Mapper type mismatch, skipping load of mapper state\n");
 		return;
 	}
 	switch(gen->mapper_type)
 	{
 	case MAPPER_SEGA:
+	case MAPPER_SEGA_SRAM:
 		sega_mapper_deserialize(buf, gen);
 		break;
 	case MAPPER_REALTEC:
@@ -242,7 +244,7 @@
 		save_size /= 2;
 	}
 	info->save_size = save_size;
-	info->save_buffer = malloc(save_size);
+	info->save_buffer = calloc(save_size, 1);
 	return ram_start;
 }
 
@@ -254,6 +256,7 @@
 	} else if (rom_end > nearest_pow2(size)) {
 		rom_end = nearest_pow2(size);
 	}
+	info->save_type = SAVE_NONE;
 	if (size >= 0x80000 && !memcmp("SEGA SSF", rom + 0x100, 8)) {
 		info->mapper_start_index = 0;
 		info->mapper_type = MAPPER_SEGA;
@@ -310,7 +313,7 @@
 		info->map[0].read_16 = nor_flash_read_w;
 		info->map[0].read_8 = nor_flash_read_b;
 		info->map[0].flags = MMAP_READ_CODE | MMAP_CODE;
-		info->map[0].buffer = info->save_buffer = malloc(info->save_size);
+		info->map[0].buffer = info->save_buffer = calloc(info->save_size, 1);
 		uint32_t init_size = size < info->save_size ? size : info->save_size;
 		memcpy(info->save_buffer, rom, init_size);
 		byteswap_rom(info->save_size, (uint16_t *)info->save_buffer);
@@ -365,7 +368,7 @@
 				info->map[1].buffer = info->save_buffer;
 			} else {
 				//Assume the standard Sega mapper
-				info->mapper_type = MAPPER_SEGA;
+				info->mapper_type = MAPPER_SEGA_SRAM;
 				info->map[0].end = 0x200000;
 				info->map[0].mask = 0xFFFFFF;
 				info->map[0].flags = MMAP_READ;
@@ -381,8 +384,13 @@
 				info->map[1].write_16 = (write_16_fun)write_sram_area_w;//these will be called all writes to the area
 				info->map[1].write_8 = (write_8_fun)write_sram_area_b;
 				info->map[1].buffer = rom + 0x200000;
-
+				
+				//Last entry in the base map is a catch all one that needs to be
+				//after all the other entries
+				memmap_chunk *unused = info->map + info->map_chunks - 2;
 				memmap_chunk *last = info->map + info->map_chunks - 1;
+				*last = *unused;
+				last = unused;
 				memset(last, 0, sizeof(memmap_chunk));
 				last->start = 0xA13000;
 				last->end = 0xA13100;
@@ -489,8 +497,7 @@
 			fatal_error("SRAM size %s is invalid\n", size);
 		}
 		state->info->save_mask = nearest_pow2(state->info->save_size)-1;
-		state->info->save_buffer = malloc(state->info->save_size);
-		memset(state->info->save_buffer, 0, state->info->save_size);
+		state->info->save_buffer = calloc(state->info->save_size, 1);
 		char *bus = tern_find_path(state->root, "SRAM\0bus\0", TVAL_PTR).ptrval;
 		if (!strcmp(bus, "odd")) {
 			state->info->save_type = RAM_FLAG_ODD;
@@ -567,6 +574,9 @@
 		if (!strcmp(init, "ROM")) {
 			uint32_t init_size = state->rom_size > state->info->save_size ? state->info->save_size : state->rom_size;
 			memcpy(state->info->save_buffer, state->rom, init_size);
+			if (init_size < state->info->save_size) {
+				memset(state->info->save_buffer + init_size, 0xFF, state->info->save_size - init_size);
+			}
 			if (state->info->save_bus == RAM_FLAG_BOTH) {
 				byteswap_rom(state->info->save_size, (uint16_t *)state->info->save_buffer);
 			}
@@ -712,7 +722,7 @@
 		if (!size || size > map->end - map->start) {
 			size = map->end - map->start;
 		}
-		map->buffer = malloc(size);
+		map->buffer = calloc(size, 1);
 		map->mask = calc_mask(size, start, end);
 		map->flags = MMAP_READ | MMAP_WRITE;
 		char *bus = tern_find_ptr_default(node, "bus", "both");
--- a/romdb.h	Thu Apr 18 22:06:47 2019 -0700
+++ b/romdb.h	Sat Apr 18 11:42:53 2020 -0700
@@ -43,6 +43,7 @@
 enum {
 	MAPPER_NONE,
 	MAPPER_SEGA,
+	MAPPER_SEGA_SRAM,
 	MAPPER_REALTEC,
 	MAPPER_XBAND,
 	MAPPER_MULTI_GAME,
--- a/sega_mapper.c	Thu Apr 18 22:06:47 2019 -0700
+++ b/sega_mapper.c	Sat Apr 18 11:42:53 2020 -0700
@@ -116,7 +116,7 @@
 				context->mem_pointers[gen->mapper_start_index + i] = gen->cart + 0x40000*gen->bank_regs[i];
 			}
 		}
-	} else {
+	} else if (gen->mapper_type == MAPPER_SEGA) {
 		void *new_ptr = gen->cart + 0x40000*value;
 		if (context->mem_pointers[gen->mapper_start_index + address] != new_ptr) {
 			context->mem_pointers[gen->mapper_start_index + address] = new_ptr;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/shaders/sharp.f.glsl	Sat Apr 18 11:42:53 2020 -0700
@@ -0,0 +1,31 @@
+
+uniform sampler2D textures[2];
+
+varying mediump vec2 texcoord;
+
+void main()
+{
+	mediump float x0 = (floor(texcoord.x * 512.0 - 0.25) + 0.5)/512.0;
+	mediump float x1 = (floor(texcoord.x * 512.0 + 0.25) + 0.5)/512.0;
+	mediump float y0 = (floor(texcoord.y * 512.0 + 0.25) + 0.5)/512.0;
+	mediump float y1 = (floor(texcoord.y * 512.0 - 0.25) + 0.5)/512.0;
+	
+	
+	mediump vec2 modifiedCoord0 = vec2(texcoord.x, (floor(texcoord.y * 512.0 + 0.25) + 0.5)/512.0);
+	mediump vec2 modifiedCoord1 = vec2(texcoord.x, (floor(texcoord.y * 512.0 - 0.25) + 0.5)/512.0);
+	mediump float ymix = (sin(texcoord.y * 1024.0 * 3.14159265359) + 1.0) * 0.5;
+	mediump float xmix = (sin(texcoord.x * 1024.0 * 3.14159265359) + 1.0) * 0.5;
+	gl_FragColor = mix(
+		mix(
+			texture2D(textures[1], vec2(x0, y1)),
+			texture2D(textures[0], vec2(x0, y0)),
+			ymix
+		),
+		mix(
+			texture2D(textures[1], vec2(x1, y1)),
+			texture2D(textures[0], vec2(x1, y0)),
+			ymix
+		),
+		xmix
+	);
+}
--- a/sms.c	Thu Apr 18 22:06:47 2019 -0700
+++ b/sms.c	Sat Apr 18 11:42:53 2020 -0700
@@ -9,7 +9,7 @@
 #include "saves.h"
 #include "bindings.h"
 
-#ifndef USE_NATIVE
+#ifdef NEW_CORE
 #define Z80_CYCLE cycles
 #define Z80_OPTS opts
 #define z80_handle_code_write(...)
@@ -70,7 +70,7 @@
 {
 	uint32_t vint = vdp_next_vint(sms->vdp);
 	uint32_t hint = vdp_next_hint(sms->vdp);
-#ifndef USE_NATIVE
+#ifdef NEW_CORE
 	sms->z80->int_cycle = vint < hint ? vint : hint;
 	z80_sync_cycle(sms->z80, sms->z80->sync_cycle);
 #else
@@ -357,7 +357,7 @@
 	sms_context *sms = (sms_context *)system;
 	char *statepath = get_slot_name(system, slot, "state");
 	uint8_t ret;
-#ifdef USE_NATIVE
+#ifndef NEW_CORE
 	if (!sms->z80->native_pc) {
 		ret = get_modification_time(statepath) != 0;
 		if (ret) {
@@ -381,7 +381,7 @@
 	render_set_video_standard(VID_NTSC);
 	while (!sms->should_return)
 	{
-#ifdef USE_NATIVE
+#ifndef NEW_CORE
 		if (system->delayed_load_slot) {
 			load_state(system, system->delayed_load_slot - 1);
 			system->delayed_load_slot = 0;
@@ -408,7 +408,7 @@
 		vdp_run_context(sms->vdp, target_cycle);
 		psg_run(sms->psg, target_cycle);
 		
-#ifdef USE_NATIVE
+#ifndef NEW_CORE
 		if (system->save_state) {
 			while (!sms->z80->pc) {
 				//advance Z80 to an instruction boundary
@@ -460,7 +460,7 @@
 		load_state_path(sms, statefile);
 	}
 	
-#ifdef USE_NATIVE
+#ifndef NEW_CORE
 	if (system->enter_debugger) {
 		system->enter_debugger = 0;
 		zinsert_breakpoint(sms->z80, sms->z80->pc, (uint8_t *)zdebugger);
@@ -474,7 +474,7 @@
 {
 	sms_context *sms = (sms_context *)system;
 	z80_assert_reset(sms->z80, sms->z80->Z80_CYCLE);
-#ifdef USE_NATIVE
+#ifndef NEW_CORE
 	sms->z80->target_cycle = sms->z80->sync_cycle = sms->z80->current_cycle;
 #endif
 }
@@ -498,7 +498,7 @@
 {
 	sms_context *sms = (sms_context *)system;
 	sms->should_return = 1;
-#ifdef USE_NATIVE
+#ifndef NEW_CORE
 	sms->z80->target_cycle = sms->z80->sync_cycle = sms->z80->Z80_CYCLE;
 #endif
 }
@@ -633,7 +633,7 @@
 	
 	set_gain_config(sms);
 	
-	sms->vdp = init_vdp_context(0);
+	sms->vdp = init_vdp_context(0, 0);
 	sms->vdp->system = &sms->header;
 	
 	sms->header.info.save_type = SAVE_NONE;
--- a/sms.h	Thu Apr 18 22:06:47 2019 -0700
+++ b/sms.h	Sat Apr 18 11:42:53 2020 -0700
@@ -4,7 +4,7 @@
 #include "system.h"
 #include "vdp.h"
 #include "psg.h"
-#ifdef USE_NATIVE
+#ifndef NEW_CORE
 #include "z80_to_x86.h"
 #else
 #include "z80.h"
--- a/stateview.c	Thu Apr 18 22:06:47 2019 -0700
+++ b/stateview.c	Sat Apr 18 11:42:53 2020 -0700
@@ -113,7 +113,7 @@
 	height = height < 240 ? (width/320) * 240 : height;
 
 	render_init(width, height, "GST State Viewer", 0);
-	vdp_context *context = init_vdp_context(0);
+	vdp_context *context = init_vdp_context(0, 0);
 	vdp_load_gst(context, state_file);
 	vdp_run_to_vblank(context);
 	vdp_print_sprite_table(context);
@@ -122,6 +122,6 @@
 		puts("Forcing display on");
 		vdp_control_port_write(context, 0x8000 | REG_MODE_2 << 8 | context->regs[REG_MODE_2] | DISPLAY_ENABLE);
 	}
-    render_wait_quit(context);
+    render_wait_quit();
     return 0;
 }
--- a/system.c	Thu Apr 18 22:06:47 2019 -0700
+++ b/system.c	Sat Apr 18 11:42:53 2020 -0700
@@ -39,7 +39,7 @@
 	//More certain checks failed, look for a valid 68K reset vector
 	if (media->size >= 8) {
 		char *rom = media->buffer;
-		uint32_t reset = rom[4] << 24 | rom[5] << 16 | rom[6] << 8 | rom[7];
+		uint32_t reset = rom[5] << 16 | rom[6] << 8 | rom[7];
 		if (!(reset & 1) && reset < media->size) {
 			//we have a valid looking reset vector, assume it's a Genesis ROM
 			return SYSTEM_GENESIS;
--- a/system.h	Thu Apr 18 22:06:47 2019 -0700
+++ b/system.h	Sat Apr 18 11:42:53 2020 -0700
@@ -58,6 +58,8 @@
 	system_fun              config_updated;
 	system_ptrszt_fun_rptr8 serialize;
 	system_ptr8_sizet_fun   deserialize;
+	system_str_fun          start_vgm_log;
+	system_fun              stop_vgm_log;
 	rom_info                info;
 	arena                   *arena;
 	char                    *next_rom;
@@ -67,6 +69,7 @@
 	uint8_t                 save_state;
 	uint8_t                 delayed_load_slot;
 	uint8_t                 has_keyboard;
+	uint8_t                 vgm_logging;
 	debugger_type           debugger_type;
 	system_type             type;
 };
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/systems.cfg	Sat Apr 18 11:42:53 2020 -0700
@@ -0,0 +1,80 @@
+md1va0 {
+	name Model 1 VA0
+	vram 64
+	vsram 40
+	zram 8
+	tas broken
+	z80_open_bus float
+	fm discrete 2612
+	tmss off
+}
+md1va3 {
+	name Model 1 VA3
+	vram 64
+	vsram 40
+	zram 8
+	tas broken
+	z80_open_bus FF
+	fm discrete 2612
+	tmss off
+}
+md1va6 {
+	name Model 1 VA6
+	vram 64
+	vsram 40
+	zram 8
+	tas broken
+	z80_open_bus FF
+	fm discrete 2612
+	tmss on
+}
+md2va1 {
+	name Model 2 VA1
+	vram 64
+	vsram 40
+	zram 8
+	tas broken
+	z80_open_bus FF
+	fm integrated 3834
+	tmss on
+}
+md2va2 {
+	name Model 2 VA2
+	vram 64
+	vsram 40
+	zram 8
+	tas broken
+	z80_open_bus FF
+	fm discrete 2612
+	tmss on
+}
+md3va1 {
+	name Model 3 VA1
+	vram 64
+	vsram 64
+	zram 8
+	tas broken
+	z80_open_bus FF
+	fm integrated 3834
+	tmss on
+}
+md3va2 {
+	name Model 3 VA2
+	vram 64
+	vsram 64
+	zram 8
+	tas works
+	z80_open_bus FF
+	fm integrated 3834
+	tmss on
+}
+teradrive {
+	name Teradrive
+	vram 128
+	vsram 40
+	zram 16
+	tas broken
+	z80_open_bus FF
+	fm discrete 3834
+	tmss off
+}
\ No newline at end of file
--- a/util.c	Thu Apr 18 22:06:47 2019 -0700
+++ b/util.c	Sat Apr 18 11:42:53 2020 -0700
@@ -80,7 +80,7 @@
 	for (char *cur = base; *cur; ++cur)
 	{
 		if (in_var) {
-			if (!(*cur == '_' || isalnum(*cur))) {
+			if (!isalnum(*cur)) {
 				positions[num_vars].end = cur-base;
 				if (positions[num_vars].end - positions[num_vars].start > max_var_len) {
 					max_var_len = positions[num_vars].end - positions[num_vars].start;
@@ -854,6 +854,16 @@
 	qsort(list, num_entries, sizeof(dir_entry), sort_dir_alpha);
 }
 
+uint8_t delete_file(char *path)
+{
+#ifdef _WIN32
+	//TODO: Call Unicode version and prepend special string to remove max path limitation
+	return 0 != DeleteFileA(path);
+#else
+	return 0 == unlink(path);
+#endif
+}
+
 #ifdef __ANDROID__
 
 #include <SDL.h>
@@ -943,7 +953,7 @@
 	fclose(f);
 	return ret;
 }
-#endif
+#endif //ISLIB
 
 #ifdef _WIN32
 char const *get_userdata_dir()
@@ -1000,8 +1010,6 @@
 }
 
 
-#endif
+#endif //_WIN32
+#endif //__ANDROID__
 
-
-
-#endif
--- a/util.h	Thu Apr 18 22:06:47 2019 -0700
+++ b/util.h	Sat Apr 18 11:42:53 2020 -0700
@@ -88,5 +88,7 @@
 void debug_message(char *format, ...);
 //Disables output of info and debug messages to stdout
 void disable_stdout_messages(void);
+//Deletes a file, returns true on success, false on failure
+uint8_t delete_file(char *path);
 
 #endif //UTIL_H_
--- a/vdp.c	Thu Apr 18 22:06:47 2019 -0700
+++ b/vdp.c	Sat Apr 18 11:42:53 2020 -0700
@@ -70,28 +70,36 @@
 	{127, 0, 127}    //Sprites
 };
 
+static uint32_t calc_crop(uint32_t crop, uint32_t border)
+{
+	return crop >= border ? 0 : border - crop;
+}
+
 static void update_video_params(vdp_context *context)
 {
+	uint32_t top_crop = render_overscan_top();
+	uint32_t bot_crop = render_overscan_bot();
+	uint32_t border_top;
 	if (context->regs[REG_MODE_2] & BIT_MODE_5) {
 		if (context->regs[REG_MODE_2] & BIT_PAL) {
 			if (context->flags2 & FLAG2_REGION_PAL) {
 				context->inactive_start = PAL_INACTIVE_START;
-				context->border_top = BORDER_TOP_V30_PAL;
-				context->border_bot = BORDER_BOT_V30_PAL;
+				border_top = BORDER_TOP_V30_PAL;
+				context->border_bot = calc_crop(bot_crop, BORDER_BOT_V30_PAL);
 			} else {
 				//the behavior here is rather weird and needs more investigation
 				context->inactive_start = 0xF0;
-				context->border_top = 1;
-				context->border_bot = 3;
+				border_top = 1;
+				context->border_bot = calc_crop(bot_crop, 3);
 			}
 		} else {
 			context->inactive_start = NTSC_INACTIVE_START;
 			if (context->flags2 & FLAG2_REGION_PAL) {
-				context->border_top = BORDER_TOP_V28_PAL;
-				context->border_bot = BORDER_BOT_V28_PAL;
+				border_top = BORDER_TOP_V28_PAL;
+				context->border_bot = calc_crop(bot_crop, BORDER_BOT_V28_PAL);
 			} else {
-				context->border_top = BORDER_TOP_V28;
-				context->border_bot = BORDER_TOP_V28;
+				border_top = BORDER_TOP_V28;
+				context->border_bot = calc_crop(bot_crop, BORDER_BOT_V28);
 			}
 		}
 		if (context->regs[REG_MODE_4] & BIT_H40) {
@@ -112,11 +120,11 @@
 	} else {
 		context->inactive_start = MODE4_INACTIVE_START;
 		if (context->flags2 & FLAG2_REGION_PAL) {
-			context->border_top = BORDER_TOP_V24_PAL;
-			context->border_bot = BORDER_BOT_V24_PAL;
+			border_top = BORDER_TOP_V24_PAL;
+			context->border_bot = calc_crop(bot_crop, BORDER_BOT_V24_PAL);
 		} else {
-			context->border_top = BORDER_TOP_V24;
-			context->border_bot = BORDER_BOT_V24;
+			border_top = BORDER_TOP_V24;
+			context->border_bot = calc_crop(bot_crop, BORDER_BOT_V24);
 		}
 		if (!(context->regs[REG_MODE_1] & BIT_MODE_4)){
 			context->state = INACTIVE;
@@ -130,24 +138,27 @@
 			}
 		}
 	}
+	context->border_top = calc_crop(top_crop, border_top);
+	context->top_offset = border_top - context->border_top;
 }
 
 static uint8_t color_map_init_done;
 
-vdp_context *init_vdp_context(uint8_t region_pal)
+vdp_context *init_vdp_context(uint8_t region_pal, uint8_t has_max_vsram)
 {
 	vdp_context *context = calloc(1, sizeof(vdp_context) + VRAM_SIZE);
 	if (headless) {
-		context->output = malloc(LINEBUF_SIZE * sizeof(uint32_t));
-		context->output_pitch = 0;
+		context->fb = malloc(512 * LINEBUF_SIZE * sizeof(uint32_t));
+		context->output_pitch = LINEBUF_SIZE * sizeof(uint32_t);
 	} else {
 		context->cur_buffer = FRAMEBUFFER_ODD;
 		context->fb = render_get_framebuffer(FRAMEBUFFER_ODD, &context->output_pitch);
 	}
-	context->sprite_draws = MAX_DRAWS;
+	context->sprite_draws = MAX_SPRITES_LINE;
 	context->fifo_write = 0;
 	context->fifo_read = -1;
 	context->regs[REG_HINT] = context->hint_counter = 0xFF;
+	context->vsram_size = has_max_vsram ? MAX_VSRAM_SIZE : MIN_VSRAM_SIZE;
 
 	if (!color_map_init_done) {
 		uint8_t b,g,r;
@@ -235,9 +246,7 @@
 		context->flags2 |= FLAG2_REGION_PAL;
 	}
 	update_video_params(context);
-	if (!headless) {
-		context->output = (uint32_t *)(((char *)context->fb) + context->output_pitch * context->border_top);
-	}
+	context->output = (uint32_t *)(((char *)context->fb) + context->output_pitch * context->border_top);
 	return context;
 }
 
@@ -269,40 +278,86 @@
 
 static void render_sprite_cells(vdp_context * context)
 {
+	if (context->cur_slot > MAX_SPRITES_LINE) {
+		context->cur_slot--;
+		return;
+	}
+	if (context->cur_slot < 0) {
+		return;
+	}
 	sprite_draw * d = context->sprite_draw_list + context->cur_slot;
-	context->serial_address = d->address;
-	if (context->cur_slot >= context->sprite_draws) {
-
-		uint16_t dir;
-		int16_t x;
-		if (d->h_flip) {
-			x = d->x_pos + 7;
-			dir = -1;
-		} else {
-			x = d->x_pos;
-			dir = 1;
-		}
-		//printf("Draw Slot %d of %d, Rendering sprite cell from %X to x: %d\n", context->cur_slot, context->sprite_draws, d->address, x);
-		context->cur_slot--;
-		for (uint16_t address = d->address; address != ((d->address+4) & 0xFFFF); address++) {
-			if (x >= 0 && x < 320) {
-				if (!(context->linebuf[x] & 0xF)) {
-					context->linebuf[x] = (context->vdpmem[address] >> 4) | d->pal_priority;
-				} else if (context->vdpmem[address] >> 4) {
-					context->flags2 |= FLAG2_SPRITE_COLLIDE;
+	uint16_t address = d->address;
+	address += context->sprite_x_offset * d->height * 4;
+	context->serial_address = address;
+	uint16_t dir;
+	int16_t x;
+	if (d->h_flip) {
+		x = d->x_pos + 7 + 8 * (d->width - context->sprite_x_offset - 1);
+		dir = -1;
+	} else {
+		x = d->x_pos + context->sprite_x_offset * 8;
+		dir = 1;
+	}
+	if (d->x_pos) {
+		context->flags |= FLAG_CAN_MASK;
+		if (!(context->flags & FLAG_MASKED)) {
+			x -= 128;
+			//printf("Draw Slot %d of %d, Rendering sprite cell from %X to x: %d\n", context->cur_slot, context->sprite_draws, d->address, x);
+			uint8_t collide = 0;
+			if (x >= 8 && x < 312) {
+				//sprite is fully visible
+				for (; address != ((context->serial_address+4) & 0xFFFF); address++) {
+					uint8_t pixel = context->vdpmem[address] >> 4;
+					if (!(context->linebuf[x] & 0xF)) {
+						context->linebuf[x] = pixel | d->pal_priority;
+					} else {
+						collide |= pixel;
+					}
+					x += dir;
+					pixel = context->vdpmem[address] & 0xF;
+					if (!(context->linebuf[x] & 0xF)) {
+						context->linebuf[x] = pixel  | d->pal_priority;
+					} else {
+						collide |= pixel;
+					}
+					x += dir;
+				}
+			} else if (x > -8 && x < 327) {
+				//sprite is partially visible
+				for (; address != ((context->serial_address+4) & 0xFFFF); address++) {
+					if (x >= 0 && x < 320) {
+						uint8_t pixel = context->vdpmem[address] >> 4;
+						if (!(context->linebuf[x] & 0xF)) {
+							context->linebuf[x] = pixel | d->pal_priority;
+						} else {
+							collide |= pixel;
+						}
+					}
+					x += dir;
+					if (x >= 0 && x < 320) {
+						uint8_t pixel = context->vdpmem[address] & 0xF;
+						if (!(context->linebuf[x] & 0xF)) {
+							context->linebuf[x] = pixel  | d->pal_priority;
+						} else {
+							collide |= pixel;
+						}
+					}
+					x += dir;
 				}
 			}
-			x += dir;
-			if (x >= 0 && x < 320) {
-				if (!(context->linebuf[x] & 0xF)) {
-					context->linebuf[x] = (context->vdpmem[address] & 0xF)  | d->pal_priority;
-				} else if (context->vdpmem[address] & 0xF) {
-					context->flags2 |= FLAG2_SPRITE_COLLIDE;
-				}
+			if (collide) {
+				context->flags2 |= FLAG2_SPRITE_COLLIDE;
 			}
-			x += dir;
 		}
-	} else {
+	} else if (context->flags & FLAG_CAN_MASK) {
+		context->flags |= FLAG_MASKED;
+		context->flags &= ~FLAG_CAN_MASK;
+	}
+
+	context->sprite_x_offset++;
+	if (context->sprite_x_offset == d->width) {
+		d->x_pos = 0;
+		context->sprite_x_offset = 0;
 		context->cur_slot--;
 	}
 }
@@ -530,6 +585,9 @@
 		   (context->flags & FLAG_PENDING) ? "word" : (context->flags2 & FLAG2_BYTE_PENDING) ? "byte" : "none",
 		   context->vcounter, context->hslot*2, (context->flags2 & FLAG2_VINT_PENDING) ? "true" : "false",
 		   (context->flags2 & FLAG2_HINT_PENDING) ? "true" : "false", vdp_control_port_read(context));
+	printf("\nDebug Register: %X | Output disabled: %s, Force Layer: %d\n", context->test_port, 
+		(context->test_port & TEST_BIT_DISABLE)  ? "true" : "false", context->test_port >> 7 & 3
+	);
 	//restore flags as calling vdp_control_port_read can change them
 	context->flags = old_flags;
 	context->flags2 = old_flags2;
@@ -695,47 +753,13 @@
 			} else {
 				address = ((tileinfo & 0x7FF) << 5) + row * 4;
 			}
-			int16_t x = ((context->vdpmem[att_addr+ 2] & 0x3) << 8 | context->vdpmem[att_addr + 3]) & 0x1FF;
-			if (x) {
-				context->flags |= FLAG_CAN_MASK;
-			} else if(context->flags & (FLAG_CAN_MASK | FLAG_DOT_OFLOW)) {
-				context->flags |= FLAG_MASKED;
-			}
-
-			context->flags &= ~FLAG_DOT_OFLOW;
-			int16_t i;
-			if (context->flags & FLAG_MASKED) {
-				for (i=0; i < width && context->sprite_draws; i++) {
-					--context->sprite_draws;
-					context->sprite_draw_list[context->sprite_draws].x_pos = -128;
-					context->sprite_draw_list[context->sprite_draws].address = address + i * height * 4;
-				}
-			} else {
-				x -= 128;
-				int16_t base_x = x;
-				int16_t dir;
-				if (tileinfo & MAP_BIT_H_FLIP) {
-					x += (width-1) * 8;
-					dir = -8;
-				} else {
-					dir = 8;
-				}
-				//printf("Sprite %d | x: %d, y: %d, width: %d, height: %d, pal_priority: %X, row: %d, tile addr: %X\n", context->sprite_info_list[context->cur_slot].index, x, context->sprite_info_list[context->cur_slot].y, width, height, pal_priority, row, address);
-				for (i=0; i < width && context->sprite_draws; i++, x += dir) {
-					--context->sprite_draws;
-					context->sprite_draw_list[context->sprite_draws].address = address + i * height * 4;
-					context->sprite_draw_list[context->sprite_draws].x_pos = x;
-					context->sprite_draw_list[context->sprite_draws].pal_priority = pal_priority;
-					context->sprite_draw_list[context->sprite_draws].h_flip = (tileinfo & MAP_BIT_H_FLIP) ? 1 : 0;
-				}
-			}
-			//Used to be i < width
-			//TODO: Confirm this is the right condition on hardware
-			if (!context->sprite_draws) {
-				context->flags |= FLAG_DOT_OFLOW;
-			}
-		} else {
-			context->flags |= FLAG_DOT_OFLOW;
+			context->sprite_draws--;
+			context->sprite_draw_list[context->sprite_draws].x_pos = ((context->vdpmem[att_addr+ 2] & 0x3) << 8 | context->vdpmem[att_addr + 3]) & 0x1FF;
+			context->sprite_draw_list[context->sprite_draws].address = address;
+			context->sprite_draw_list[context->sprite_draws].pal_priority = pal_priority;
+			context->sprite_draw_list[context->sprite_draws].h_flip = (tileinfo & MAP_BIT_H_FLIP) ? 1 : 0;
+			context->sprite_draw_list[context->sprite_draws].width = width;
+			context->sprite_draw_list[context->sprite_draws].height = height;
 		}
 	}
 	context->cur_slot++;
@@ -790,7 +814,7 @@
 	}
 	write_cram_internal(context, addr, value);
 	
-	if (context->hslot >= BG_START_SLOT && (
+	if (context->output && context->hslot >= BG_START_SLOT && (
 		context->vcounter < context->inactive_start + context->border_bot 
 		|| context->vcounter > 0x200 - context->border_top
 	)) {
@@ -915,7 +939,7 @@
 			break;
 		}
 		case VSRAM_WRITE:
-			if (((start->address/2) & 63) < VSRAM_SIZE) {
+			if (((start->address/2) & 63) < context->vsram_size) {
 				//printf("VSRAM Write: %X to %X @ frame: %d, vcounter: %d, hslot: %d, cycle: %d\n", start->value, start->address, context->frame, context->vcounter, context->hslot, context->cycles);
 				if (start->partial == 3) {
 					if (start->address & 1) {
@@ -952,7 +976,7 @@
 			
 			context->flags |= FLAG_READ_FETCHED;
 		}
-	} else if (!(context->cd & 1) && !(context->flags & (FLAG_READ_FETCHED|FLAG_PENDING))) {
+	} else if (!(context->cd & 1) && !(context->flags & FLAG_READ_FETCHED)) {
 		switch(context->cd & 0xF)
 		{
 		case VRAM_READ:
@@ -989,7 +1013,7 @@
 			break;
 		case VSRAM_READ: {
 			uint16_t address = (context->address /2) & 63;
-			if (address >= VSRAM_SIZE) {
+			if (address >= context->vsram_size) {
 				address = 0;
 			}
 			context->prefetch = context->vsram[address] & VSRAM_BITS;
@@ -1125,32 +1149,15 @@
 	context->v_offset = vscroll & v_offset_mask;
 	//printf("%s | line %d, vsram: %d, vscroll: %d, v_offset: %d\n",(vsram_off ? "B" : "A"), line, context->vsram[context->regs[REG_MODE_3] & 0x4 ? column : 0], vscroll, context->v_offset);
 	vscroll >>= vscroll_shift;
-	uint16_t hscroll_mask;
-	uint16_t v_mul;
-	switch(context->regs[REG_SCROLL] & 0x3)
-	{
-	case 0:
-		hscroll_mask = 0x1F;
-		v_mul = 64;
-		break;
-	case 0x1:
-		hscroll_mask = 0x3F;
-		v_mul = 128;
-		break;
-	case 0x2:
-		//TODO: Verify this behavior
-		hscroll_mask = 0x1F;
-		v_mul = 0;
-		break;
-	case 0x3:
-		hscroll_mask = 0x7F;
-		v_mul = 256;
-		break;
-	}
+	//TODO: Verify the behavior for a setting of 2
+	static const uint16_t hscroll_masks[] = {0x1F, 0x3F, 0x1F, 0x7F};
+	static const uint16_t v_shifts[] = {6, 7, 0, 8};
+	uint16_t hscroll_mask = hscroll_masks[context->regs[REG_SCROLL] & 0x3];
+	uint16_t v_shift = v_shifts[context->regs[REG_SCROLL] & 0x3];
 	uint16_t hscroll, offset;
 	for (int i = 0; i < 2; i++) {
 		hscroll = (column - 2 + i - ((hscroll_val/8) & 0xFFFE)) & hscroll_mask;
-		offset = address + ((vscroll * v_mul + hscroll*2) & 0x1FFF);
+		offset = address + (((vscroll << v_shift) + hscroll*2) & 0x1FFF);
 		//printf("%s | line: %d, col: %d, x: %d, hs_mask %X, scr reg: %X, tbl addr: %X\n", (vsram_off ? "B" : "A"), line, (column-2+i), hscroll, hscroll_mask, context->regs[REG_SCROLL], offset);
 		uint16_t col_val = (context->vdpmem[offset] << 8) | context->vdpmem[offset+1];
 		if (i) {
@@ -1208,28 +1215,25 @@
 	}
 	uint8_t pal_priority = (col >> 9) & 0x70;
 	uint32_t bits = *((uint32_t *)(&context->vdpmem[address]));
+	tmp_buf += offset;
 	if (col & MAP_BIT_H_FLIP) {
 		uint32_t shift = 28;
 		for (int i = 0; i < 4; i++)
 		{
 			uint8_t right = pal_priority | ((bits >> shift) & 0xF);
 			shift -= 4;
-			tmp_buf[offset++] = pal_priority | ((bits >> shift) & 0xF);
+			*(tmp_buf++) = pal_priority | ((bits >> shift) & 0xF);
 			shift -= 4;
-			offset &= SCROLL_BUFFER_MASK;
-			tmp_buf[offset++] = right;
-			offset &= SCROLL_BUFFER_MASK;
+			*(tmp_buf++) = right;
 		}
 	} else {
 		for (int i = 0; i < 4; i++)
 		{
 			uint8_t right = pal_priority | (bits & 0xF);
 			bits >>= 4;
-			tmp_buf[offset++] = pal_priority | (bits & 0xF);
-			offset &= SCROLL_BUFFER_MASK;
+			*(tmp_buf++) = pal_priority | (bits & 0xF);
 			bits >>= 4;
-			tmp_buf[offset++] = right;
-			offset &= SCROLL_BUFFER_MASK;
+			*(tmp_buf++) = right;
 		}
 	}
 }
@@ -1328,23 +1332,32 @@
 
 static void render_normal(vdp_context *context, int32_t col, uint8_t *dst, uint8_t *debug_dst, int plane_a_off, int plane_b_off)
 {
-	int start = 0;
+	uint8_t *sprite_buf = context->linebuf + col * 8;
 	if (!col && (context->regs[REG_MODE_1] & BIT_COL0_MASK)) {
 		memset(dst, 0, 8);
 		memset(debug_dst, DBG_SRC_BG, 8);
 		dst += 8;
 		debug_dst += 8;
-		start = 8;
-	}
-	uint8_t *sprite_buf = context->linebuf + col * 8 + start;
-	for (int i = start; i < 16; ++plane_a_off, ++plane_b_off, ++sprite_buf, ++i)
-	{
-		uint8_t sprite, plane_a, plane_b;
-		plane_a = context->tmp_buf_a[plane_a_off & SCROLL_BUFFER_MASK];
-		plane_b = context->tmp_buf_b[plane_b_off & SCROLL_BUFFER_MASK];
-		sprite = *sprite_buf;
-		*(dst++) = composite_normal(context, debug_dst, sprite, plane_a, plane_b, context->regs[REG_BG_COLOR]) & 0x3F;
-		debug_dst++;
+		sprite_buf += 8;
+		plane_a_off += 8;
+		plane_b_off += 8;
+		for (int i = 0; i < 8; ++plane_a_off, ++plane_b_off, ++sprite_buf, ++i)
+		{
+			uint8_t sprite, plane_a, plane_b;
+			plane_a = context->tmp_buf_a[plane_a_off & SCROLL_BUFFER_MASK];
+			plane_b = context->tmp_buf_b[plane_b_off & SCROLL_BUFFER_MASK];
+			*(dst++) = composite_normal(context, debug_dst, *sprite_buf, plane_a, plane_b, context->regs[REG_BG_COLOR]) & 0x3F;
+			debug_dst++;
+		}
+	} else {
+		for (int i = 0; i < 16; ++plane_a_off, ++plane_b_off, ++sprite_buf, ++i)
+		{
+			uint8_t sprite, plane_a, plane_b;
+			plane_a = context->tmp_buf_a[plane_a_off & SCROLL_BUFFER_MASK];
+			plane_b = context->tmp_buf_b[plane_b_off & SCROLL_BUFFER_MASK];
+			*(dst++) = composite_normal(context, debug_dst, *sprite_buf, plane_a, plane_b, context->regs[REG_BG_COLOR]) & 0x3F;
+			debug_dst++;
+		}
 	}
 }
 
@@ -1609,10 +1622,10 @@
 			plane_a_off = context->buf_a_off;
 			a_src = DBG_SRC_W;
 		} else {
-			plane_a_off = context->buf_a_off - (context->hscroll_a & 0xF);
+			plane_a_off = context->buf_a_off - context->hscroll_a_fine;
 			a_src = DBG_SRC_A;
 		}
-		plane_b_off = context->buf_b_off - (context->hscroll_b & 0xF);
+		plane_b_off = context->buf_b_off - context->hscroll_b_fine;
 		//printf("A | tmp_buf offset: %d\n", 8 - (context->hscroll_a & 0x7));
 
 		if (context->regs[REG_MODE_4] & BIT_HILIGHT) {
@@ -1642,13 +1655,14 @@
 			case 1:
 				memset(dst, 0, BORDER_LEFT);
 				memset(debug_dst, DBG_SRC_BG, BORDER_LEFT);
+				dst += BORDER_LEFT;
 				break;
 			case 2: {
 				//plane A
 				//TODO: Deal with Window layer
 				int i;
 				i = 0;
-				uint8_t buf_off = context->buf_a_off - (context->hscroll_a & 0xF) + (16 - BORDER_LEFT);
+				uint8_t buf_off = context->buf_a_off - context->hscroll_a_fine + (16 - BORDER_LEFT);
 				//uint8_t *src = context->tmp_buf_a + ((context->buf_a_off + (i ? 0 : (16 - BORDER_LEFT) - (context->hscroll_a & 0xF))) & SCROLL_BUFFER_MASK); 
 				for (; i < BORDER_LEFT; buf_off++, i++, dst++, debug_dst++)
 				{
@@ -1661,7 +1675,7 @@
 				//plane B
 				int i;
 				i = 0;
-				uint8_t buf_off = context->buf_b_off - (context->hscroll_b & 0xF) + (16 - BORDER_LEFT);
+				uint8_t buf_off = context->buf_b_off - context->hscroll_b_fine + (16 - BORDER_LEFT);
 				//uint8_t *src = context->tmp_buf_b + ((context->buf_b_off + (i ? 0 : (16 - BORDER_LEFT) - (context->hscroll_b & 0xF))) & SCROLL_BUFFER_MASK); 
 				for (; i < BORDER_LEFT; buf_off++, i++, dst++, debug_dst++)
 				{
@@ -1674,8 +1688,8 @@
 		} else {
 			memset(dst, pixel, BORDER_LEFT);
 			memset(debug_dst, DBG_SRC_BG, BORDER_LEFT);
+			dst += BORDER_LEFT;
 		}
-		dst += BORDER_LEFT;
 	}
 	context->done_composite = dst;
 	context->buf_a_off = (context->buf_a_off + SCROLL_BUFFER_DRAW) & SCROLL_BUFFER_MASK;
@@ -1725,7 +1739,7 @@
 	uint8_t *dst = context->compositebuf + col * 8 + BORDER_LEFT;
 	uint8_t *debug_dst = context->layer_debug_buf + col * 8 + BORDER_LEFT;
 	if (context->state == PREPARING) {
-		memset(dst, 0, 8);
+		memset(dst, 0x10 + (context->regs[REG_BG_COLOR] & 0xF) + MODE4_OFFSET, 8);
 		memset(debug_dst, DBG_SRC_BG, 8);
 		context->done_composite = dst + 8;
 		return;
@@ -1752,8 +1766,8 @@
 		}
 		context->done_composite = dst;
 	} else {
-		memset(dst, 0, 8);
-		memset(dst, DBG_SRC_BG, 8);
+		memset(dst, 0x10 + (context->regs[REG_BG_COLOR] & 0xF) + MODE4_OFFSET, 8);
+		memset(debug_dst, DBG_SRC_BG, 8);
 		context->done_composite = dst + 8;
 	}
 }
@@ -1809,11 +1823,21 @@
 		}
 		if (context->enabled_debuggers & (1 << VDP_DEBUG_CRAM)) {
 			uint32_t *fb = context->debug_fbs[VDP_DEBUG_CRAM] + context->debug_fb_pitch[VDP_DEBUG_CRAM] * line / sizeof(uint32_t);
-			for (int i = 0; i < 64; i++)
-			{
-				for (int x = 0; x < 8; x++)
+			if (context->regs[REG_MODE_2] & BIT_MODE_5) {
+				for (int i = 0; i < 64; i++)
 				{
-					*(fb++) = context->colors[i];
+					for (int x = 0; x < 8; x++)
+					{
+						*(fb++) = context->colors[i];
+					}
+				}
+			} else {
+				for (int i = MODE4_OFFSET; i < MODE4_OFFSET+32; i++)
+				{
+					for (int x = 0; x < 16; x++)
+					{
+						*(fb++) = context->colors[i];
+					}
 				}
 			}
 		}
@@ -1988,28 +2012,54 @@
 		uint32_t starting_line = 512 - 32*4;
 		uint32_t *line = context->debug_fbs[VDP_DEBUG_CRAM] 
 			+ context->debug_fb_pitch[VDP_DEBUG_CRAM]  * starting_line / sizeof(uint32_t);
-		for (int pal = 0; pal < 4; pal ++)
-		{
-			uint32_t *cur;
-			for (int y = 0; y < 31; y++)
+		if (context->regs[REG_MODE_2] & BIT_MODE_5) {
+			for (int pal = 0; pal < 4; pal ++)
 			{
+				uint32_t *cur;
+				for (int y = 0; y < 31; y++)
+				{
+					cur = line;
+					for (int offset = 0; offset < 16; offset++)
+					{
+						for (int x = 0; x < 31; x++)
+						{
+							*(cur++) = context->colors[pal * 16 + offset];
+						}
+						*(cur++) = 0xFF000000;
+					}
+					line += context->debug_fb_pitch[VDP_DEBUG_CRAM] / sizeof(uint32_t);
+				}
 				cur = line;
-				for (int offset = 0; offset < 16; offset++)
+				for (int x = 0; x < 512; x++)
 				{
-					for (int x = 0; x < 31; x++)
-					{
-						*(cur++) = context->colors[pal * 16 + offset];
-					}
 					*(cur++) = 0xFF000000;
 				}
 				line += context->debug_fb_pitch[VDP_DEBUG_CRAM] / sizeof(uint32_t);
 			}
-			cur = line;
-			for (int x = 0; x < 512; x++)
+		} else {
+			for (int pal = 0; pal < 2; pal ++)
 			{
-				*(cur++) = 0xFF000000;
+				uint32_t *cur;
+				for (int y = 0; y < 31; y++)
+				{
+					cur = line;
+					for (int offset = MODE4_OFFSET; offset < MODE4_OFFSET + 16; offset++)
+					{
+						for (int x = 0; x < 31; x++)
+						{
+							*(cur++) = context->colors[pal * 16 + offset];
+						}
+						*(cur++) = 0xFF000000;
+					}
+					line += context->debug_fb_pitch[VDP_DEBUG_CRAM] / sizeof(uint32_t);
+				}
+				cur = line;
+				for (int x = 0; x < 512; x++)
+				{
+					*(cur++) = 0xFF000000;
+				}
+				line += context->debug_fb_pitch[VDP_DEBUG_CRAM] / sizeof(uint32_t);
 			}
-			line += context->debug_fb_pitch[VDP_DEBUG_CRAM] / sizeof(uint32_t);
 		}
 		render_framebuffer_updated(context->debug_fb_indices[VDP_DEBUG_CRAM], 512);
 		context->debug_fbs[VDP_DEBUG_CRAM] = render_get_framebuffer(context->debug_fb_indices[VDP_DEBUG_CRAM], &context->debug_fb_pitch[VDP_DEBUG_CRAM]);
@@ -2022,9 +2072,10 @@
 
 void vdp_force_update_framebuffer(vdp_context *context)
 {
-	uint16_t lines_max = (context->flags2 & FLAG2_REGION_PAL) 
-			? 240 + BORDER_TOP_V30_PAL + BORDER_BOT_V30_PAL 
-			: 224 + BORDER_TOP_V28 + BORDER_BOT_V28;
+	if (!context->fb) {
+		return;
+	}
+	uint16_t lines_max = context->inactive_start + context->border_bot + context->border_top;
 			
 	uint16_t to_fill = lines_max - context->output_lines;
 	memset(
@@ -2039,69 +2090,85 @@
 
 static void advance_output_line(vdp_context *context)
 {
-	if (headless) {
-		if (context->vcounter == context->inactive_start) {
-			context->frame++;
+	//This function is kind of gross because of the need to deal with vertical border busting via mode changes
+	uint16_t lines_max = context->inactive_start + context->border_bot + context->border_top;
+	uint32_t output_line = context->vcounter;
+	if (!(context->regs[REG_MODE_2] & BIT_MODE_5)) {
+		//vcounter increment occurs much later in Mode 4
+		output_line++;
+	} 
+	
+	if (context->output_lines >= lines_max || (!context->pushed_frame && output_line == context->inactive_start + context->border_top)) {
+		//we've either filled up a full frame or we're at the bottom of screen in the current defined mode + border crop
+		if (!headless) {
+			render_framebuffer_updated(context->cur_buffer, context->h40_lines > (context->inactive_start + context->border_top) / 2 ? LINEBUF_SIZE : (256+HORIZ_BORDER));
+			uint8_t is_even = context->flags2 & FLAG2_EVEN_FIELD;
+			if (context->vcounter <= context->inactive_start && (context->regs[REG_MODE_4] & BIT_INTERLACE)) {
+				is_even = !is_even;
+			}
+			context->cur_buffer = is_even ? FRAMEBUFFER_EVEN : FRAMEBUFFER_ODD;
+			context->pushed_frame = 1;
+			context->fb = NULL;
 		}
-		context->vcounter &= 0x1FF;
+		vdp_update_per_frame_debug(context);
+		context->h40_lines = 0;
+		context->frame++;
+		context->output_lines = 0;
+	}
+	
+	if (output_line < context->inactive_start + context->border_bot) {
+		if (context->output_lines) {
+			output_line = context->output_lines++;//context->border_top + context->vcounter;
+		} else if (!output_line && !context->border_top) {
+			//top border is completely cropped so we won't hit the case below
+			output_line = 0;
+			context->output_lines = 1;
+			context->pushed_frame = 0;
+		} else {
+			context->output_lines = output_line + 1;
+		}
+	} else if (output_line >= 0x200 - context->border_top) {
+		if (output_line == 0x200 - context->border_top) {
+			//We're at the top of the display, force context->output_lines to be zero to avoid
+			//potential screen rolling if the mode is changed at an inopportune time
+			context->output_lines = 0;
+			context->pushed_frame = 0;
+		}
+		output_line = context->output_lines++;//context->vcounter - (0x200 - context->border_top);
 	} else {
-		uint16_t lines_max = (context->flags2 & FLAG2_REGION_PAL) 
-			? 240 + BORDER_TOP_V30_PAL + BORDER_BOT_V30_PAL 
-			: 224 + BORDER_TOP_V28 + BORDER_BOT_V28;
-
-		if (context->output_lines == lines_max) {
-			render_framebuffer_updated(context->cur_buffer, context->h40_lines > (context->inactive_start + context->border_top) / 2 ? LINEBUF_SIZE : (256+HORIZ_BORDER));
-			context->cur_buffer = context->flags2 & FLAG2_EVEN_FIELD ? FRAMEBUFFER_EVEN : FRAMEBUFFER_ODD;
-			context->fb = render_get_framebuffer(context->cur_buffer, &context->output_pitch);
-			vdp_update_per_frame_debug(context);
-			context->h40_lines = 0;
-			context->frame++;
-			context->output_lines = 0;
-		}
-		uint32_t output_line = context->vcounter;
-		if (!(context->regs[REG_MODE_2] & BIT_MODE_5)) {
-			//vcounter increment occurs much later in Mode 4
-			output_line++;
-		} 
-		if (output_line < context->inactive_start + context->border_bot && context->output_lines > 0) {
-			output_line = context->output_lines++;//context->border_top + context->vcounter;
-		} else if (output_line >= 0x200 - context->border_top) {
-			if (output_line == 0x200 - context->border_top) {
-				//We're at the top of the display, force context->output_lines to be zero to avoid
-				//potential screen rolling if the mode is changed at an inopportune time
-				context->output_lines = 0;
-			}
-			output_line = context->output_lines++;//context->vcounter - (0x200 - context->border_top);
-		} else {
-			context->output = NULL;
-		}
-		context->output = (uint32_t *)(((char *)context->fb) + context->output_pitch * output_line);
+		context->output = NULL;
+		return;
+	}
+	if (!context->fb) {
+		context->fb = render_get_framebuffer(context->cur_buffer, &context->output_pitch);
+	}
+	output_line += context->top_offset;
+	context->output = (uint32_t *)(((char *)context->fb) + context->output_pitch * output_line);
 #ifdef DEBUG_FB_FILL
-		for (int i = 0; i < LINEBUF_SIZE; i++)
-		{
-			context->output[i] = 0xFFFF00FF;
-		}
+	for (int i = 0; i < LINEBUF_SIZE; i++)
+	{
+		context->output[i] = 0xFFFF00FF;
+	}
 #endif	
-		if (context->output && (context->regs[REG_MODE_4] & BIT_H40)) {
-			context->h40_lines++;
-		}
+	if (context->output && (context->regs[REG_MODE_4] & BIT_H40)) {
+		context->h40_lines++;
 	}
 }
 
 void vdp_release_framebuffer(vdp_context *context)
 {
-	render_framebuffer_updated(context->cur_buffer, context->h40_lines > (context->inactive_start + context->border_top) / 2 ? LINEBUF_SIZE : (256+HORIZ_BORDER));
-	context->output = context->fb = NULL;
+	if (context->fb) {
+		render_framebuffer_updated(context->cur_buffer, context->h40_lines > (context->inactive_start + context->border_top) / 2 ? LINEBUF_SIZE : (256+HORIZ_BORDER));
+		context->output = context->fb = NULL;
+	}
 }
 
 void vdp_reacquire_framebuffer(vdp_context *context)
 {
-	context->fb = render_get_framebuffer(context->cur_buffer, &context->output_pitch);
-	uint16_t lines_max = (context->flags2 & FLAG2_REGION_PAL) 
-			? 240 + BORDER_TOP_V30_PAL + BORDER_BOT_V30_PAL
-			: 224 + BORDER_TOP_V28 + BORDER_BOT_V28;
+	uint16_t lines_max = context->inactive_start + context->border_bot + context->border_top;
 	if (context->output_lines <= lines_max && context->output_lines > 0) {
-		context->output = (uint32_t *)(((char *)context->fb) + context->output_pitch * (context->output_lines - 1));
+		context->fb = render_get_framebuffer(context->cur_buffer, &context->output_pitch);
+		context->output = (uint32_t *)(((char *)context->fb) + context->output_pitch * (context->output_lines - 1 + context->top_offset));
 	} else {
 		context->output = NULL;
 	}
@@ -2138,7 +2205,7 @@
 				//TODO: Deal with Window layer
 				int i;
 				i = 0;
-				uint8_t buf_off = context->buf_a_off - (context->hscroll_a & 0xF);
+				uint8_t buf_off = context->buf_a_off - context->hscroll_a_fine;
 				//uint8_t *src = context->tmp_buf_a + ((context->buf_a_off + (i ? 0 : (16 - BORDER_LEFT) - (context->hscroll_a & 0xF))) & SCROLL_BUFFER_MASK); 
 				for (; i < BORDER_RIGHT; buf_off++, i++, dst++)
 				{
@@ -2218,6 +2285,30 @@
 			}\
 		}\
 	}
+	
+//BG_START_SLOT => dst = 0, src = border
+//BG_START_SLOT + 13/2=6, dst = 6, src = border + comp + 13
+#define OUTPUT_PIXEL_MODE4(slot) if ((slot) >= BG_START_SLOT) {\
+		uint8_t *src = context->compositebuf + ((slot) - BG_START_SLOT) *2;\
+		uint32_t *dst = context->output + ((slot) - BG_START_SLOT) *2;\
+		if ((slot) - BG_START_SLOT < BORDER_LEFT/2) {\
+			*(dst++) = context->colors[bgindex];\
+			*(dst++) = context->colors[bgindex];\
+		} else if ((slot) - BG_START_SLOT < (BORDER_LEFT+256)/2){\
+			if ((slot) - BG_START_SLOT == BORDER_LEFT/2) {\
+				*(dst++) = context->colors[bgindex];\
+				src++;\
+			} else {\
+				*(dst++) = context->colors[*(src++)];\
+			}\
+			*(dst++) = context->colors[*(src++)];\
+		} else if ((slot) - BG_START_SLOT <= (HORIZ_BORDER+256)/2) {\
+			*(dst++) = context->colors[bgindex];\
+			if ((slot) - BG_START_SLOT < (HORIZ_BORDER+256)/2) {\
+				*(dst++) = context->colors[bgindex];\
+			}\
+		}\
+	}
 
 #define COLUMN_RENDER_BLOCK(column, startcyc) \
 	case startcyc:\
@@ -2291,11 +2382,11 @@
 		
 #define COLUMN_RENDER_BLOCK_MODE4(column, startcyc) \
 	case startcyc:\
-		OUTPUT_PIXEL(startcyc)\
+		OUTPUT_PIXEL_MODE4(startcyc)\
 		read_map_mode4(column, context->vcounter, context);\
 		CHECK_LIMIT\
 	case ((startcyc+1)&0xFF):\
-		OUTPUT_PIXEL((startcyc+1)&0xFF)\
+		OUTPUT_PIXEL_MODE4((startcyc+1)&0xFF)\
 		if (column & 3) {\
 			scan_sprite_table_mode4(context);\
 		} else {\
@@ -2303,11 +2394,11 @@
 		}\
 		CHECK_LIMIT\
 	case ((startcyc+2)&0xFF):\
-		OUTPUT_PIXEL((startcyc+2)&0xFF)\
+		OUTPUT_PIXEL_MODE4((startcyc+2)&0xFF)\
 		fetch_map_mode4(column, context->vcounter, context);\
 		CHECK_LIMIT\
 	case ((startcyc+3)&0xFF):\
-		OUTPUT_PIXEL((startcyc+3)&0xFF)\
+		OUTPUT_PIXEL_MODE4((startcyc+3)&0xFF)\
 		render_map_mode4(context->vcounter, column, context);\
 		CHECK_LIMIT
 		
@@ -2424,31 +2515,225 @@
 		
 #define SPRITE_RENDER_H32_MODE4(slot) \
 	case slot:\
-		OUTPUT_PIXEL_H32(slot)\
+		OUTPUT_PIXEL_MODE4(slot)\
 		read_sprite_x_mode4(context);\
 		MODE4_CHECK_SLOT_LINE(slot)\
 	case CALC_SLOT(slot, 1):\
-		OUTPUT_PIXEL(CALC_SLOT(slot, 1))\
+		OUTPUT_PIXEL_MODE4(CALC_SLOT(slot, 1))\
 		read_sprite_x_mode4(context);\
 		MODE4_CHECK_SLOT_LINE(CALC_SLOT(slot,1))\
 	case CALC_SLOT(slot, 2):\
-		OUTPUT_PIXEL(CALC_SLOT(slot, 2))\
+		OUTPUT_PIXEL_MODE4(CALC_SLOT(slot, 2))\
 		fetch_sprite_cells_mode4(context);\
 		MODE4_CHECK_SLOT_LINE(CALC_SLOT(slot, 2))\
 	case CALC_SLOT(slot, 3):\
-		OUTPUT_PIXEL(CALC_SLOT(slot, 3))\
+		OUTPUT_PIXEL_MODE4(CALC_SLOT(slot, 3))\
 		render_sprite_cells_mode4(context);\
 		MODE4_CHECK_SLOT_LINE(CALC_SLOT(slot, 3))\
 	case CALC_SLOT(slot, 4):\
-		OUTPUT_PIXEL(CALC_SLOT(slot, 4))\
+		OUTPUT_PIXEL_MODE4(CALC_SLOT(slot, 4))\
 		fetch_sprite_cells_mode4(context);\
 		MODE4_CHECK_SLOT_LINE(CALC_SLOT(slot, 4))\
 	case CALC_SLOT(slot, 5):\
-		OUTPUT_PIXEL(CALC_SLOT(slot, 5))\
+		OUTPUT_PIXEL_MODE4(CALC_SLOT(slot, 5))\
 		render_sprite_cells_mode4(context);\
 		MODE4_CHECK_SLOT_LINE(CALC_SLOT(slot, 5))
 
 static uint32_t dummy_buffer[LINEBUF_SIZE];
+static void vdp_h40_line(vdp_context * context)
+{
+	uint16_t address;
+	uint32_t mask;
+	uint32_t const slot_cycles = MCLKS_SLOT_H40;
+	uint8_t bgindex = context->regs[REG_BG_COLOR] & 0x3F;
+	uint8_t test_layer = context->test_port >> 7 & 3;
+	
+	//165
+	if (!(context->regs[REG_MODE_3] & BIT_VSCROLL)) {
+		//TODO: Develop some tests on hardware to see when vscroll latch actually happens for full plane mode
+		//See note in vdp_h32 for why this was originally moved out of read_map_scroll
+		//Skitchin' has a similar problem, but uses H40 mode. It seems to be able to hit the extern slot at 232
+		//pretty consistently
+		context->vscroll_latch[0] = context->vsram[0];
+		context->vscroll_latch[1] = context->vsram[1];
+	}
+	render_sprite_cells(context);
+	//166
+	render_sprite_cells(context);
+	//167
+	context->sprite_index = 0x80;
+	context->slot_counter = 0;
+	render_border_garbage(
+		context,
+		context->sprite_draw_list[context->cur_slot].address,
+		context->tmp_buf_b, context->buf_b_off,
+		context->col_1
+	);
+	render_sprite_cells(context);
+	scan_sprite_table(context->vcounter, context);
+	//168
+	render_border_garbage(
+		context,
+		context->sprite_draw_list[context->cur_slot].address,
+		context->tmp_buf_b,
+		context->buf_b_off + 8,
+		context->col_2
+	);
+	//Do palette lookup for end of previous line
+	uint8_t *src = context->compositebuf + (LINE_CHANGE_H40 - BG_START_SLOT) *2;
+	uint32_t *dst = context->output + (LINE_CHANGE_H40 - BG_START_SLOT) *2;
+	if (test_layer) {
+		for (int i = 0; i < LINEBUF_SIZE - (LINE_CHANGE_H40 - BG_START_SLOT) * 2; i++)
+		{
+			*(dst++) = context->colors[*(src++)];
+		}
+	} else {
+		for (int i = 0; i < LINEBUF_SIZE - (LINE_CHANGE_H40 - BG_START_SLOT) * 2; i++)
+		{
+			if (*src & 0x3F) {
+				*(dst++) = context->colors[*(src++)];
+			} else {
+				*(dst++) = context->colors[(*(src++) & 0xC0) | bgindex];
+			}
+		}
+	}
+	advance_output_line(context);
+	//168-242 (inclusive)
+	for (int i = 0; i < 28; i++)
+	{
+		render_sprite_cells(context);
+		scan_sprite_table(context->vcounter, context);
+	}
+	//243
+	render_border_garbage(
+		context,
+		context->sprite_draw_list[context->cur_slot].address,
+		context->tmp_buf_a,
+		context->buf_a_off,
+		context->col_1
+	);
+	//244
+	address = (context->regs[REG_HSCROLL] & 0x3F) << 10;
+	mask = 0;
+	if (context->regs[REG_MODE_3] & 0x2) {
+		mask |= 0xF8;
+	}
+	if (context->regs[REG_MODE_3] & 0x1) {
+		mask |= 0x7;
+	}
+	render_border_garbage(context, address, context->tmp_buf_a, context->buf_a_off+8, context->col_2);
+	address += (context->vcounter & mask) * 4;
+	context->hscroll_a = context->vdpmem[address] << 8 | context->vdpmem[address+1];
+	context->hscroll_a_fine = context->hscroll_a & 0xF;
+	context->hscroll_b = context->vdpmem[address+2] << 8 | context->vdpmem[address+3];
+	context->hscroll_b_fine = context->hscroll_b & 0xF;
+	//printf("%d: HScroll A: %d, HScroll B: %d\n", context->vcounter, context->hscroll_a, context->hscroll_b);
+	//243-246 inclusive
+	for (int i = 0; i < 3; i++)
+	{
+		render_sprite_cells(context);
+		scan_sprite_table(context->vcounter, context);
+	}
+	//247
+	render_border_garbage(
+		context,
+		context->sprite_draw_list[context->cur_slot].address,
+		context->tmp_buf_b,
+		context->buf_b_off,
+		context->col_1
+	);
+	render_sprite_cells(context);
+	scan_sprite_table(context->vcounter, context);
+	//248
+	render_border_garbage(
+		context,
+		context->sprite_draw_list[context->cur_slot].address,
+		context->tmp_buf_b,
+		context->buf_b_off + 8,
+		context->col_2
+	);
+	render_sprite_cells(context);
+	scan_sprite_table(context->vcounter, context);
+	context->buf_a_off = (context->buf_a_off + SCROLL_BUFFER_DRAW) & SCROLL_BUFFER_MASK;
+	context->buf_b_off = (context->buf_b_off + SCROLL_BUFFER_DRAW) & SCROLL_BUFFER_MASK;
+	//250
+	render_sprite_cells(context);
+	scan_sprite_table(context->vcounter, context);
+	//251
+	scan_sprite_table(context->vcounter, context);//Just a guess
+	//252
+	scan_sprite_table(context->vcounter, context);//Just a guess
+	//254
+	render_sprite_cells(context);
+	scan_sprite_table(context->vcounter, context);
+	//255
+	if (context->cur_slot >= 0 && context->sprite_draw_list[context->cur_slot].x_pos) {
+		context->flags |= FLAG_DOT_OFLOW;
+	}
+	scan_sprite_table(context->vcounter, context);
+	//0
+	scan_sprite_table(context->vcounter, context);//Just a guess
+	//seems like the sprite table scan fills a shift register
+	//values are FIFO, but unused slots precede used slots
+	//so we set cur_slot to slot_counter and let it wrap around to
+	//the beginning of the list
+	context->cur_slot = context->slot_counter;
+	context->sprite_x_offset = 0;
+	context->sprite_draws = MAX_SPRITES_LINE;
+	//background planes and layer compositing
+	for (int col = 0; col < 42; col+=2)
+	{
+		read_map_scroll_a(col, context->vcounter, context);
+		render_map_1(context);
+		render_map_2(context);
+		read_map_scroll_b(col, context->vcounter, context);
+		render_map_3(context);
+		render_map_output(context->vcounter, col, context);
+	}
+	//sprite rendering phase 2
+	for (int i = 0; i < MAX_SPRITES_LINE; i++)
+	{
+		read_sprite_x(context->vcounter, context);
+	}
+	//163
+	context->cur_slot = MAX_SPRITES_LINE-1;
+	memset(context->linebuf, 0, LINEBUF_SIZE);
+	render_border_garbage(
+		context,
+		context->sprite_draw_list[context->cur_slot].address,
+		context->tmp_buf_a, context->buf_a_off,
+		context->col_1
+	);
+	context->flags &= ~FLAG_MASKED;
+	render_sprite_cells(context);
+	//164
+	render_border_garbage(
+		context,
+		context->sprite_draw_list[context->cur_slot].address,
+		context->tmp_buf_a, context->buf_a_off + 8,
+		context->col_2
+	);
+	render_sprite_cells(context);
+	context->cycles += MCLKS_LINE;
+	vdp_advance_line(context);
+	src = context->compositebuf;
+	dst = context->output;
+	if (test_layer) {
+		for (int i = 0; i < (LINE_CHANGE_H40 - BG_START_SLOT) * 2; i++)
+		{
+			*(dst++) = context->colors[*(src++)];
+		}
+	} else {
+		for (int i = 0; i < (LINE_CHANGE_H40 - BG_START_SLOT) * 2; i++)
+		{
+			if (*src & 0x3F) {
+				*(dst++) = context->colors[*(src++)];
+			} else {
+				*(dst++) = context->colors[(*(src++) & 0xC0) | bgindex];
+			}
+		}
+	}
+}
 static void vdp_h40(vdp_context * context, uint32_t target_cycles)
 {
 	uint16_t address;
@@ -2466,6 +2751,13 @@
 	for (;;)
 	{
 	case 165:
+		//only consider doing a line at a time if the FIFO is empty, there are no pending reads and there is no DMA running
+		if (context->fifo_read == -1 && !(context->flags & FLAG_DMA_RUN) && ((context->cd & 1) || (context->flags & FLAG_READ_FETCHED))) {
+			while (target_cycles - context->cycles >= MCLKS_LINE && context->state != PREPARING && context->vcounter != context->inactive_start) {
+				vdp_h40_line(context);
+			}
+			CHECK_ONLY
+		}
 		OUTPUT_PIXEL(165)
 		if (!(context->regs[REG_MODE_3] & BIT_VSCROLL)) {
 			//TODO: Develop some tests on hardware to see when vscroll latch actually happens for full plane mode
@@ -2553,7 +2845,9 @@
 		render_border_garbage(context, address, context->tmp_buf_a, context->buf_a_off+8, context->col_2);
 		address += (context->vcounter & mask) * 4;
 		context->hscroll_a = context->vdpmem[address] << 8 | context->vdpmem[address+1];
+		context->hscroll_a_fine = context->hscroll_a & 0xF;
 		context->hscroll_b = context->vdpmem[address+2] << 8 | context->vdpmem[address+3];
+		context->hscroll_b_fine = context->hscroll_b & 0xF;
 		//printf("%d: HScroll A: %d, HScroll B: %d\n", context->vcounter, context->hscroll_a, context->hscroll_b);
 		if (context->flags & FLAG_DMA_RUN) { run_dma_src(context, -1); }
 		context->hslot++;
@@ -2581,6 +2875,9 @@
 		CHECK_LIMIT
 	SPRITE_RENDER_H40(254)
 	case 255:
+		if (context->cur_slot >= 0 && context->sprite_draw_list[context->cur_slot].x_pos) {
+			context->flags |= FLAG_DOT_OFLOW;
+		}
 		render_map_3(context);
 		scan_sprite_table(context->vcounter, context);//Just a guess
 		CHECK_LIMIT
@@ -2592,8 +2889,8 @@
 		//so we set cur_slot to slot_counter and let it wrap around to
 		//the beginning of the list
 		context->cur_slot = context->slot_counter;
-		context->sprite_draws = MAX_DRAWS;
-		context->flags &= (~FLAG_CAN_MASK & ~FLAG_MASKED);
+		context->sprite_x_offset = 0;
+		context->sprite_draws = MAX_SPRITES_LINE;
 		CHECK_LIMIT
 	COLUMN_RENDER_BLOCK(2, 1)
 	COLUMN_RENDER_BLOCK(4, 9)
@@ -2626,7 +2923,7 @@
 	//sprite render to line buffer starts
 	case 163:
 		OUTPUT_PIXEL(163)
-		context->cur_slot = MAX_DRAWS-1;
+		context->cur_slot = MAX_SPRITES_LINE-1;
 		memset(context->linebuf, 0, LINEBUF_SIZE);
 		render_border_garbage(
 			context,
@@ -2634,6 +2931,7 @@
 			context->tmp_buf_a, context->buf_a_off,
 			context->col_1
 		);
+		context->flags &= ~FLAG_MASKED;
 		render_sprite_cells(context);
 		CHECK_LIMIT
 	case 164:
@@ -2769,7 +3067,9 @@
 		render_border_garbage(context, address, context->tmp_buf_a, context->buf_a_off+8, context->col_2);
 		address += (context->vcounter & mask) * 4;
 		context->hscroll_a = context->vdpmem[address] << 8 | context->vdpmem[address+1];
+		context->hscroll_a_fine = context->hscroll_a & 0xF;
 		context->hscroll_b = context->vdpmem[address+2] << 8 | context->vdpmem[address+3];
+		context->hscroll_b_fine = context->hscroll_b & 0xF;
 		//printf("%d: HScroll A: %d, HScroll B: %d\n", context->vcounter, context->hscroll_a, context->hscroll_b);
 		CHECK_LIMIT //provides "garbage" for border when plane A selected
 	SPRITE_RENDER_H32(245)
@@ -2782,6 +3082,9 @@
 		CHECK_LIMIT
 	SPRITE_RENDER_H32(250)
 	case 251:
+		if (context->cur_slot >= 0 && context->sprite_draw_list[context->cur_slot].x_pos) {
+			context->flags |= FLAG_DOT_OFLOW;
+		}
 		render_map_1(context);
 		scan_sprite_table(context->vcounter, context);//Just a guess
 		CHECK_LIMIT
@@ -2807,8 +3110,8 @@
 		//filled rather than the number of available slots
 		//context->slot_counter = MAX_SPRITES_LINE - context->slot_counter;
 		context->cur_slot = context->slot_counter;
-		context->sprite_draws = MAX_DRAWS_H32;
-		context->flags &= (~FLAG_CAN_MASK & ~FLAG_MASKED);
+		context->sprite_x_offset = 0;
+		context->sprite_draws = MAX_SPRITES_LINE_H32;
 		CHECK_LIMIT
 	COLUMN_RENDER_BLOCK(2, 1)
 	COLUMN_RENDER_BLOCK(4, 9)
@@ -2838,7 +3141,7 @@
 	//sprite render to line buffer starts
 	case 131:
 		OUTPUT_PIXEL(131)
-		context->cur_slot = MAX_DRAWS_H32-1;
+		context->cur_slot = MAX_SPRITES_LINE_H32-1;
 		memset(context->linebuf, 0, LINEBUF_SIZE);
 		render_border_garbage(
 			context,
@@ -2846,6 +3149,7 @@
 			context->tmp_buf_a, context->buf_a_off,
 			context->col_1
 		);
+		context->flags &= ~FLAG_MASKED;
 		render_sprite_cells(context);
 		CHECK_LIMIT
 	case 132:
@@ -2982,19 +3286,19 @@
 	COLUMN_RENDER_BLOCK_MODE4(30, 125)
 	COLUMN_RENDER_BLOCK_MODE4(31, 129)
 	case 133:
-		OUTPUT_PIXEL(133)
+		OUTPUT_PIXEL_MODE4(133)
 		external_slot(context);
 		CHECK_LIMIT
 	case 134:
-		OUTPUT_PIXEL(134)
+		OUTPUT_PIXEL_MODE4(134)
 		external_slot(context);
 		CHECK_LIMIT
 	case 135:
-		OUTPUT_PIXEL(135)
+		OUTPUT_PIXEL_MODE4(135)
 		external_slot(context);
 		CHECK_LIMIT
 	case 136: {
-		OUTPUT_PIXEL(136)
+		OUTPUT_PIXEL_MODE4(136)
 		external_slot(context);
 		//set things up for sprite rendering in the next slot
 		memset(context->linebuf, 0, LINEBUF_SIZE);
@@ -3028,11 +3332,11 @@
 	uint8_t *src = NULL;
 	if (test_layer == 2) {
 		//plane A
-		src_off += context->buf_a_off + context->hscroll_a;
+		src_off += context->buf_a_off - (context->hscroll_a & 0xF);
 		src = context->tmp_buf_a;
 	} else if (test_layer == 3){
 		//plane B
-		src_off += context->buf_b_off + context->hscroll_b;
+		src_off += context->buf_b_off - (context->hscroll_b & 0xF);
 		src = context->tmp_buf_b;
 	} else {
 		//sprite layer
@@ -3064,7 +3368,6 @@
 	uint8_t buf_clear_slot, index_reset_slot, bg_end_slot, vint_slot, line_change, jump_start, jump_dest, latch_slot;
 	uint8_t index_reset_value, max_draws, max_sprites;
 	uint16_t vint_line, active_line;
-	uint32_t bg_color;
 	
 	if (mode_5) {
 		if (is_h40) {
@@ -3072,7 +3375,7 @@
 			buf_clear_slot = 163;
 			index_reset_slot = 167;
 			bg_end_slot = BG_START_SLOT + LINEBUF_SIZE/2;
-			max_draws = MAX_DRAWS-1;
+			max_draws = MAX_SPRITES_LINE-1;
 			max_sprites = MAX_SPRITES_LINE;
 			index_reset_value = 0x80;
 			vint_slot = VINT_SLOT_H40;
@@ -3081,7 +3384,7 @@
 			jump_dest = 229;
 		} else {
 			bg_end_slot = BG_START_SLOT + (256+HORIZ_BORDER)/2;
-			max_draws = MAX_DRAWS_H32-1;
+			max_draws = MAX_SPRITES_LINE_H32-1;
 			max_sprites = MAX_SPRITES_LINE_H32;
 			buf_clear_slot = 128;
 			index_reset_slot = 132;
@@ -3108,7 +3411,6 @@
 		vint_line = context->inactive_start + 1;
 		vint_slot = VINT_SLOT_MODE4;
 		line_change = LINE_CHANGE_MODE4;
-		bg_color = render_map_color(0, 0, 0);
 		jump_start = 147;
 		jump_dest = 233;
 		if (context->regs[REG_MODE_1] & BIT_MODE_4) {
@@ -3156,6 +3458,8 @@
 				break;
 			case 0:
 				render_border_garbage(context, context->serial_address, context->tmp_buf_b, context->buf_b_off+8, context->col_2);
+				break;
+			case 1:
 				inactive_test_output(context, is_h40, test_layer);
 				break;
 			}
@@ -3189,12 +3493,15 @@
 		
 		if (dst) {
 			uint8_t bg_index;
+			uint32_t bg_color;
 			if (mode_5) {
 				bg_index = context->regs[REG_BG_COLOR] & 0x3F;
 				bg_color = context->colors[bg_index];
 			} else if (context->regs[REG_MODE_1] & BIT_MODE_4) {
 				bg_index = 0x10 + (context->regs[REG_BG_COLOR] & 0xF);
 				bg_color = context->colors[MODE4_OFFSET + bg_index];
+			} else {
+				bg_color = render_map_color(0, 0, 0);
 			}
 			if (context->done_composite) {
 				uint8_t pixel = context->compositebuf[dst-context->output];
@@ -3384,6 +3691,19 @@
 	return hv;
 }
 
+static void clear_pending(vdp_context *context)
+{
+	context->flags &= ~FLAG_PENDING;
+	context->address = context->address_latch;
+	//It seems like the DMA enable bit doesn't so much enable DMA so much 
+	//as it enables changing CD5 from control port writes
+	if (context->regs[REG_MODE_2] & BIT_DMA_ENABLE) {
+		context->cd = context->cd_latch;
+	} else {
+		context->cd = (context->cd & 0x20) | (context->cd_latch & 0x1F);
+	}
+}
+
 int vdp_control_port_write(vdp_context * context, uint16_t value)
 {
 	//printf("control port write: %X at %d\n", value, context->cycles);
@@ -3391,12 +3711,9 @@
 		return -1;
 	}
 	if (context->flags & FLAG_PENDING) {
-		context->address = (context->address & 0x3FFF) | (value << 14 & 0x1C000);
-		//It seems like the DMA enable bit doesn't so much enable DMA so much 
-		//as it enables changing CD5 from control port writes
-		uint8_t preserve = (context->regs[REG_MODE_2] & BIT_DMA_ENABLE) ? 0x3 : 0x23;
-		context->cd = (context->cd & preserve) | ((value >> 2) & ~preserve & 0xFF);
-		context->flags &= ~FLAG_PENDING;
+		context->address_latch = (context->address_latch & 0x3FFF) | (value << 14 & 0x1C000);
+		context->cd_latch = (context->cd_latch & 0x3) | ((value >> 2) & ~0x3 & 0xFF);
+		clear_pending(context);
 		//Should these be taken care of here or after the first write?
 		context->flags &= ~FLAG_READ_FETCHED;
 		context->flags2 &= ~FLAG2_READ_PENDING;
@@ -3425,8 +3742,10 @@
 		}
 	} else {
 		uint8_t mode_5 = context->regs[REG_MODE_2] & BIT_MODE_5;
-		context->address = (context->address &0xC000) | (value & 0x3FFF);
-		context->cd = (context->cd & 0x3C) | (value >> 14);
+		//contrary to what's in Charles MacDonald's doc, it seems top 2 address bits are cleared
+		//needed for the Mona in 344 Bytes demo
+		context->address_latch = (context->address_latch & 0x1C000) | (value & 0x3FFF);
+		context->cd_latch = (context->cd_latch & 0x3C) | (value >> 14);
 		if ((value & 0xC000) == 0x8000) {
 			//Register write
 			uint8_t reg = (value >> 8) & 0x1F;
@@ -3488,7 +3807,7 @@
 		return -1;
 	}
 	if (context->flags & FLAG_PENDING) {
-		context->flags &= ~FLAG_PENDING;
+		clear_pending(context);
 		//Should these be cleared here?
 		context->flags &= ~FLAG_READ_FETCHED;
 		context->flags2 &= ~FLAG2_READ_PENDING;
@@ -3523,7 +3842,7 @@
 void vdp_data_port_write_pbc(vdp_context * context, uint8_t value)
 {
 	if (context->flags & FLAG_PENDING) {
-		context->flags &= ~FLAG_PENDING;
+		clear_pending(context);
 		//Should these be cleared here?
 		context->flags &= ~FLAG_READ_FETCHED;
 		context->flags2 &= ~FLAG2_READ_PENDING;
@@ -3562,7 +3881,9 @@
 
 uint16_t vdp_control_port_read(vdp_context * context)
 {
-	context->flags &= ~FLAG_PENDING;
+	if (context->flags & FLAG_PENDING) {
+		clear_pending(context);
+	}
 	context->flags2 &= ~FLAG2_BYTE_PENDING;
 	//Bits 15-10 are not fixed like Charles MacDonald's doc suggests, but instead open bus values that reflect 68K prefetch
 	uint16_t value = context->system->get_open_bus_value(context->system) & 0xFC00;
@@ -3612,7 +3933,7 @@
 uint16_t vdp_data_port_read(vdp_context * context)
 {
 	if (context->flags & FLAG_PENDING) {
-		context->flags &= ~FLAG_PENDING;
+		clear_pending(context);
 		//Should these be cleared here?
 		context->flags &= ~FLAG_READ_FETCHED;
 		context->flags2 &= ~FLAG2_READ_PENDING;
@@ -3629,19 +3950,16 @@
 
 uint8_t vdp_data_port_read_pbc(vdp_context * context)
 {
-	context->flags &= ~(FLAG_PENDING | FLAG_READ_FETCHED);
+	if (context->flags & FLAG_PENDING) {
+		clear_pending(context);
+	}
+	context->flags &= ~FLAG_READ_FETCHED;
 	context->flags2 &= ~FLAG2_BYTE_PENDING;
 		
 	context->cd = VRAM_READ8;
 	return context->prefetch;
 }
 
-uint16_t vdp_test_port_read(vdp_context * context)
-{
-	//TODO: Find out what actually gets returned here
-	return context->test_port;
-}
-
 void vdp_adjust_cycles(vdp_context * context, uint32_t deduction)
 {
 	context->cycles -= deduction;
@@ -3934,12 +4252,14 @@
 	}
 }
 
+#define VDP_STATE_VERSION 3
 void vdp_serialize(vdp_context *context, serialize_buffer *buf)
 {
+	save_int8(buf, VDP_STATE_VERSION);
 	save_int8(buf, VRAM_SIZE / 1024);//VRAM size in KB, needed for future proofing
 	save_buffer8(buf, context->vdpmem, VRAM_SIZE);
 	save_buffer16(buf, context->cram, CRAM_SIZE);
-	save_buffer16(buf, context->vsram, VSRAM_SIZE);
+	save_buffer16(buf, context->vsram, MAX_VSRAM_SIZE);
 	save_buffer8(buf, context->sat_cache, SAT_CACHE_SIZE);
 	for (int i = 0; i <= REG_DMASRC_H; i++)
 	{
@@ -3990,13 +4310,15 @@
 	save_int8(buf, context->sprite_draws);
 	save_int8(buf, context->slot_counter);
 	save_int8(buf, context->cur_slot);
-	for (int i = 0; i < MAX_DRAWS; i++)
+	for (int i = 0; i < MAX_SPRITES_LINE; i++)
 	{
 		sprite_draw *draw = context->sprite_draw_list + i;
 		save_int16(buf, draw->address);
 		save_int16(buf, draw->x_pos);
 		save_int8(buf, draw->pal_priority);
 		save_int8(buf, draw->h_flip);
+		save_int8(buf, draw->width);
+		save_int8(buf, draw->height);
 	}
 	for (int i = 0; i < MAX_SPRITES_LINE; i++)
 	{
@@ -4010,12 +4332,24 @@
 	save_int32(buf, context->cycles);
 	save_int32(buf, context->pending_vint_start);
 	save_int32(buf, context->pending_hint_start);
+	save_int32(buf, context->address_latch);
+	save_int8(buf, context->cd_latch);
 }
 
 void vdp_deserialize(deserialize_buffer *buf, void *vcontext)
 {
 	vdp_context *context = vcontext;
-	uint8_t vramk = load_int8(buf);
+	uint8_t version = load_int8(buf);
+	uint8_t vramk;
+	if (version == 64) {
+		vramk = version;
+		version = 0;
+	} else {
+		vramk = load_int8(buf);
+	}
+	if (version > VDP_STATE_VERSION) {
+		warning("Save state has VDP version %d, but this build only understands versions %d and lower", version, VDP_STATE_VERSION);
+	}
 	load_buffer8(buf, context->vdpmem, (vramk * 1024) <= VRAM_SIZE ? vramk * 1024 : VRAM_SIZE);
 	if ((vramk * 1024) > VRAM_SIZE) {
 		buf->cur_pos += (vramk * 1024) - VRAM_SIZE;
@@ -4025,7 +4359,7 @@
 	{
 		update_color_map(context, i, context->cram[i]);
 	}
-	load_buffer16(buf, context->vsram, VSRAM_SIZE);
+	load_buffer16(buf, context->vsram, version > 1 ? MAX_VSRAM_SIZE : MIN_VSRAM_SIZE);
 	load_buffer8(buf, context->sat_cache, SAT_CACHE_SIZE);
 	for (int i = 0; i <= REG_DMASRC_H; i++)
 	{
@@ -4077,13 +4411,50 @@
 	context->sprite_draws = load_int8(buf);
 	context->slot_counter = load_int8(buf);
 	context->cur_slot = load_int8(buf);
-	for (int i = 0; i < MAX_DRAWS; i++)
-	{
-		sprite_draw *draw = context->sprite_draw_list + i;
-		draw->address = load_int16(buf);
-		draw->x_pos = load_int16(buf);
-		draw->pal_priority = load_int8(buf);
-		draw->h_flip = load_int8(buf);
+	if (version == 0) {
+		int cur_draw = 0;
+		for (int i = 0; i < MAX_SPRITES_LINE * 2; i++)
+		{
+			if (cur_draw < MAX_SPRITES_LINE) {
+				sprite_draw *last = cur_draw ? context->sprite_draw_list + cur_draw - 1 : NULL;
+				sprite_draw *draw = context->sprite_draw_list + cur_draw++;
+				draw->address = load_int16(buf);
+				draw->x_pos = load_int16(buf);
+				draw->pal_priority = load_int8(buf);
+				draw->h_flip = load_int8(buf);
+				draw->width = 1;
+				draw->height = 8;
+				
+				if (last && last->width < 4 && last->h_flip == draw->h_flip && last->pal_priority == draw->pal_priority) {
+					int adjust_x = draw->x_pos + draw->h_flip ? -8 : 8;
+					int height = draw->address - last->address /4;
+					if (last->x_pos == adjust_x && (
+						(last->width > 1 && height == last->height) || 
+						(last->width == 1 && (height == 8 || height == 16 || height == 24 || height == 32))
+					)) {
+						//current draw appears to be part of the same sprite as the last one, combine it
+						cur_draw--;
+						last->width++;
+					}
+				}
+			} else {
+				load_int16(buf);
+				load_int16(buf);
+				load_int8(buf);
+				load_int8(buf);
+			}
+		}
+	} else {
+		for (int i = 0; i < MAX_SPRITES_LINE; i++)
+		{
+			sprite_draw *draw = context->sprite_draw_list + i;
+			draw->address = load_int16(buf);
+			draw->x_pos = load_int16(buf);
+			draw->pal_priority = load_int8(buf);
+			draw->h_flip = load_int8(buf);
+			draw->width = load_int8(buf);
+			draw->height = load_int8(buf);
+		}
 	}
 	for (int i = 0; i < MAX_SPRITES_LINE; i++)
 	{
@@ -4097,6 +4468,13 @@
 	context->cycles = load_int32(buf);
 	context->pending_vint_start = load_int32(buf);
 	context->pending_hint_start = load_int32(buf);
+	if (version > 2) {
+		context->address_latch = load_int32(buf);
+		context->cd_latch = load_int8(buf);
+	} else {
+		context->address_latch = context->address;
+		context->cd_latch = context->cd;
+	}
 	update_video_params(context);
 }
 
--- a/vdp.h	Thu Apr 18 22:06:47 2019 -0700
+++ b/vdp.h	Sat Apr 18 11:42:53 2020 -0700
@@ -16,7 +16,8 @@
 #define SHADOW_OFFSET CRAM_SIZE
 #define HIGHLIGHT_OFFSET (SHADOW_OFFSET+CRAM_SIZE)
 #define MODE4_OFFSET (HIGHLIGHT_OFFSET+CRAM_SIZE)
-#define VSRAM_SIZE 40
+#define MIN_VSRAM_SIZE 40
+#define MAX_VSRAM_SIZE 64
 #define VRAM_SIZE (64*1024)
 #define BORDER_LEFT 13
 #define BORDER_RIGHT 14
@@ -24,8 +25,6 @@
 #define LINEBUF_SIZE (320+HORIZ_BORDER) //H40 + full border
 #define SCROLL_BUFFER_SIZE 32
 #define BORDER_BOTTOM 13 //TODO: Replace with actual value
-#define MAX_DRAWS 40
-#define MAX_DRAWS_H32 32
 #define MAX_DRAWS_H32_MODE4 8
 #define MAX_SPRITES_LINE 20
 #define MAX_SPRITES_LINE_H32 16
@@ -133,6 +132,8 @@
 	int16_t x_pos;
 	uint8_t pal_priority;
 	uint8_t h_flip;
+	uint8_t width;
+	uint8_t height;
 } sprite_draw;
 
 typedef struct {
@@ -173,29 +174,35 @@
 	int32_t        fifo_write;
 	int32_t        fifo_read;
 	uint32_t       address;
+	uint32_t       address_latch;
 	uint32_t       serial_address;
 	uint32_t       colors[CRAM_SIZE*4];
 	uint32_t       debugcolors[1 << (3 + 1 + 1 + 1)];//3 bits for source, 1 bit for priority, 1 bit for shadow, 1 bit for hilight
 	uint16_t       cram[CRAM_SIZE];
 	uint32_t       frame;
+	uint32_t       vsram_size;
 	uint8_t        cd;
+	uint8_t        cd_latch;
 	uint8_t	       flags;
 	uint8_t        regs[VDP_REGS];
 	//cycle count in MCLKs
 	uint32_t       cycles;
 	uint32_t       pending_vint_start;
 	uint32_t       pending_hint_start;
-	uint16_t       vsram[VSRAM_SIZE];
+	uint32_t       top_offset;
+	uint16_t       vsram[MAX_VSRAM_SIZE];
 	uint16_t       vscroll_latch[2];
 	uint16_t       vcounter;
 	uint16_t       inactive_start;
 	uint16_t       border_top;
 	uint16_t       border_bot;
 	uint16_t       hscroll_a;
+	uint16_t       hscroll_a_fine;
 	uint16_t       hscroll_b;
+	uint16_t       hscroll_b_fine;
 	uint16_t       h40_lines;
 	uint16_t       output_lines;
-	sprite_draw    sprite_draw_list[MAX_DRAWS];
+	sprite_draw    sprite_draw_list[MAX_SPRITES_LINE];
 	sprite_info    sprite_info_list[MAX_SPRITES_LINE];
 	uint8_t        sat_cache[SAT_CACHE_SIZE];
 	uint16_t       col_1;
@@ -212,6 +219,7 @@
 	uint8_t        sprite_draws;
 	int8_t         slot_counter;
 	int8_t         cur_slot;
+	uint8_t        sprite_x_offset;
 	uint8_t        max_sprites_frame;
 	uint8_t        max_sprites_line;
 	uint8_t        fetch_tmp[2];
@@ -229,12 +237,13 @@
 	uint8_t        enabled_debuggers;
 	uint8_t        debug_fb_indices[VDP_NUM_DEBUG_TYPES];
 	uint8_t        debug_modes[VDP_NUM_DEBUG_TYPES];
+	uint8_t        pushed_frame;
 	uint8_t        vdpmem[];
 } vdp_context;
 
 
 
-vdp_context *init_vdp_context(uint8_t region_pal);
+vdp_context *init_vdp_context(uint8_t region_pal, uint8_t has_max_vsram);
 void vdp_free(vdp_context *context);
 void vdp_run_context_full(vdp_context * context, uint32_t target_cycles);
 void vdp_run_context(vdp_context * context, uint32_t target_cycles);
@@ -254,7 +263,6 @@
 uint8_t vdp_data_port_read_pbc(vdp_context * context);
 void vdp_latch_hv(vdp_context *context);
 uint16_t vdp_hv_counter_read(vdp_context * context);
-uint16_t vdp_test_port_read(vdp_context * context);
 void vdp_adjust_cycles(vdp_context * context, uint32_t deduction);
 uint32_t vdp_next_hint(vdp_context * context);
 uint32_t vdp_next_vint(vdp_context * context);
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/vgm.c	Sat Apr 18 11:42:53 2020 -0700
@@ -0,0 +1,117 @@
+#include <stdlib.h>
+#include <string.h>
+#include <stddef.h>
+#include "vgm.h"
+
+vgm_writer *vgm_write_open(char *filename, uint32_t rate, uint32_t clock, uint32_t cycle)
+{
+	FILE *f = fopen(filename, "wb");
+	if (!f) {
+		return NULL;
+	}
+	vgm_writer *writer = calloc(sizeof(vgm_writer), 1);
+	memcpy(writer->header.ident, "Vgm ", 4);
+	writer->header.version = 0x150;
+	writer->header.data_offset = sizeof(writer->header) - offsetof(vgm_header, data_offset);
+	writer->header.rate = rate;
+	writer->f = f;
+	if (1 != fwrite(&writer->header, sizeof(writer->header), 1, f)) {
+		free(writer);
+		fclose(f);
+		return NULL;
+	}
+	writer->master_clock = clock;
+	writer->last_cycle = cycle;
+	
+	return writer;
+}
+
+void vgm_sn76489_init(vgm_writer *writer, uint32_t clock, uint16_t feedback, uint8_t shift_reg_size, uint8_t flags)
+{
+	if (flags && writer->header.version < 0x151) {
+		writer->header.version = 0x151;
+	}
+	writer->header.sn76489_clk = clock,
+	writer->header.sn76489_fb = feedback;
+	writer->header.sn76489_shift = shift_reg_size;
+	writer->header.sn76489_flags = flags;
+}
+
+static void wait_commands(vgm_writer *writer, uint32_t delta)
+{
+	if (!delta) {
+		return;
+	}
+	if (delta <= 0x10) {
+		fputc(CMD_WAIT_SHORT + (delta - 1), writer->f);
+	} else if (delta >= 735 && delta <= (735 + 0x10)) {
+		fputc(CMD_WAIT_60, writer->f);
+		wait_commands(writer, delta - 735);
+	} else if (delta >= 882 && delta <= (882 + 0x10)) {
+		fputc(CMD_WAIT_50, writer->f);
+		wait_commands(writer, delta - 882);
+	} else if (delta > 0xFFFF) {
+		uint8_t cmd[3] = {CMD_WAIT, 0xFF, 0xFF};
+		fwrite(cmd, 1, sizeof(cmd), writer->f);
+		wait_commands(writer, delta - 0xFFFF);
+	} else {
+		uint8_t cmd[3] = {CMD_WAIT, delta, delta >> 8};
+		fwrite(cmd, 1, sizeof(cmd), writer->f);
+	}
+}
+
+static void add_wait(vgm_writer *writer, uint32_t cycle)
+{
+	uint64_t delta = cycle - writer->last_cycle;
+	delta *= (uint64_t)44100;
+	delta /= (uint64_t)writer->master_clock;
+	
+	uint32_t mclks_per_sample = writer->master_clock / 44100;
+	writer->last_cycle += delta * mclks_per_sample; 
+	writer->header.num_samples += delta;
+	wait_commands(writer, delta);
+}
+
+void vgm_sn76489_write(vgm_writer *writer, uint32_t cycle, uint8_t value)
+{
+	add_wait(writer, cycle);
+	uint8_t cmd[2] = {CMD_PSG, value};
+	fwrite(cmd, 1, sizeof(cmd), writer->f);
+}
+
+void vgm_ym2612_init(vgm_writer *writer, uint32_t clock)
+{
+	writer->header.ym2612_clk = clock;
+}
+
+void vgm_ym2612_part1_write(vgm_writer *writer, uint32_t cycle, uint8_t reg, uint8_t value)
+{
+	add_wait(writer, cycle);
+	uint8_t cmd[3] = {CMD_YM2612_0, reg, value};
+	fwrite(cmd, 1, sizeof(cmd), writer->f);
+}
+
+void vgm_ym2612_part2_write(vgm_writer *writer, uint32_t cycle, uint8_t reg, uint8_t value)
+{
+	add_wait(writer, cycle);
+	uint8_t cmd[3] = {CMD_YM2612_1, reg, value};
+	fwrite(cmd, 1, sizeof(cmd), writer->f);
+}
+
+void vgm_adjust_cycles(vgm_writer *writer, uint32_t deduction)
+{
+	if (deduction > writer->last_cycle) {
+		writer->last_cycle = 0;
+	} else {
+		writer->last_cycle -= deduction;
+	}
+}
+
+void vgm_close(vgm_writer *writer)
+{
+	writer->header.eof_offset = ftell(writer->f) - offsetof(vgm_header, eof_offset);
+	fseek(writer->f, SEEK_SET, 0);
+	fwrite(&writer->header, sizeof(writer->header), 1, writer->f);
+	fclose(writer->f);
+	free(writer);
+}
\ No newline at end of file
--- a/vgm.h	Thu Apr 18 22:06:47 2019 -0700
+++ b/vgm.h	Sat Apr 18 11:42:53 2020 -0700
@@ -1,6 +1,9 @@
 #ifndef VGM_H_
 #define VGM_H_
 
+#include <stdint.h>
+#include <stdio.h>
+
 #pragma pack(push, 1)
 typedef struct {
 	char     ident[4];
@@ -71,4 +74,20 @@
 	uint8_t           type;
 } data_block;
 
+typedef struct {
+	vgm_header header;
+	FILE       *f;
+	uint32_t   master_clock;
+	uint32_t   last_cycle;
+} vgm_writer;
+
+vgm_writer *vgm_write_open(char *filename, uint32_t rate, uint32_t clock, uint32_t cycle);
+void vgm_sn76489_init(vgm_writer *writer, uint32_t clock, uint16_t feedback, uint8_t shift_reg_size, uint8_t flags);
+void vgm_sn76489_write(vgm_writer *writer, uint32_t cycle, uint8_t value);
+void vgm_ym2612_init(vgm_writer *writer, uint32_t clock);
+void vgm_ym2612_part1_write(vgm_writer *writer, uint32_t cycle, uint8_t reg, uint8_t value);
+void vgm_ym2612_part2_write(vgm_writer *writer, uint32_t cycle, uint8_t reg, uint8_t value);
+void vgm_adjust_cycles(vgm_writer *writer, uint32_t deduction);
+void vgm_close(vgm_writer *writer);
+
 #endif //VGM_H_
--- a/ym2612.c	Thu Apr 18 22:06:47 2019 -0700
+++ b/ym2612.c	Sat Apr 18 11:42:53 2020 -0700
@@ -21,9 +21,7 @@
 #define dfopen(var, fname, mode)
 #endif
 
-#define BUSY_CYCLES_ADDRESS 17
-#define BUSY_CYCLES_DATA_LOW 83
-#define BUSY_CYCLES_DATA_HIGH 47
+#define BUSY_CYCLES 32
 #define OP_UPDATE_PERIOD 144
 
 #define BIT_TIMERA_ENABLE 0x1
@@ -122,6 +120,27 @@
 	render_audio_adjust_clock(context->audio, master_clock, context->clock_inc * NUM_OPERATORS);
 }
 
+void ym_adjust_cycles(ym2612_context *context, uint32_t deduction)
+{
+	context->current_cycle -= deduction;
+	if (context->write_cycle != CYCLE_NEVER && context->write_cycle >= deduction) {
+		context->write_cycle -= deduction;
+	} else {
+		context->write_cycle = CYCLE_NEVER;
+	}
+	if (context->busy_start != CYCLE_NEVER && context->busy_start >= deduction) {
+		context->busy_start -= deduction;
+	} else {
+		context->busy_start = CYCLE_NEVER;
+	}
+	if (context->last_status_cycle != CYCLE_NEVER && context->last_status_cycle >= deduction) {
+		context->last_status_cycle -= deduction;
+	} else {
+		context->last_status = 0;
+		context->last_status_cycle = CYCLE_NEVER;
+	}
+}
+
 #ifdef __ANDROID__
 #define log2(x) (log(x)/log(2))
 #endif
@@ -173,7 +192,11 @@
 	dfopen(debug_file, "ym_debug.txt", "w");
 	memset(context, 0, sizeof(*context));
 	context->clock_inc = clock_div * 6;
+	context->busy_cycles = BUSY_CYCLES * context->clock_inc;
 	context->audio = render_audio_source(master_clock, context->clock_inc * NUM_OPERATORS, 2);
+	//TODO: pick a randomish high initial value and lower it over time
+	context->invalid_status_decay = 225000 * context->clock_inc;
+	context->status_address_mask = (options & YM_OPT_3834) ? 0 : 3;
 	
 	//some games seem to expect that the LR flags start out as 1
 	for (int i = 0; i < NUM_CHANNELS; i++) {
@@ -338,128 +361,287 @@
 	}
 }
 
+void ym_run_timers(ym2612_context *context)
+{
+	if (context->timer_control & BIT_TIMERA_ENABLE) {
+		if (context->timer_a != TIMER_A_MAX) {
+			context->timer_a++;
+			if (context->csm_keyon) {
+				csm_keyoff(context);
+			}
+		} else {
+			if (context->timer_control & BIT_TIMERA_LOAD) {
+				context->timer_control &= ~BIT_TIMERA_LOAD;
+			} else if (context->timer_control & BIT_TIMERA_OVEREN) {
+				context->status |= BIT_STATUS_TIMERA;
+			}
+			context->timer_a = context->timer_a_load;
+			if (!context->csm_keyon && context->ch3_mode == CSM_MODE) {
+				context->csm_keyon = 0xF0;
+				uint8_t changes = 0xF0 ^ context->channels[2].keyon;;
+				for (uint8_t op = 2*4, bit = 0; op < 3*4; op++, bit++)
+				{
+					if (changes & keyon_bits[bit]) {
+						keyon(context->operators + op, context->channels + 2);
+					}
+				}
+			}
+		}
+	}
+	if (!context->sub_timer_b) {
+		if (context->timer_control & BIT_TIMERB_ENABLE) {
+			if (context->timer_b != TIMER_B_MAX) {
+				context->timer_b++;
+			} else {
+				if (context->timer_control & BIT_TIMERB_LOAD) {
+					context->timer_control &= ~BIT_TIMERB_LOAD;
+				} else if (context->timer_control & BIT_TIMERB_OVEREN) {
+					context->status |= BIT_STATUS_TIMERB;
+				}
+				context->timer_b = context->timer_b_load;
+			}
+		}
+	}
+	context->sub_timer_b += 0x10;
+	//Update LFO
+	if (context->lfo_enable) {
+		if (context->lfo_counter) {
+			context->lfo_counter--;
+		} else {
+			context->lfo_counter = lfo_timer_values[context->lfo_freq];
+			context->lfo_am_step += 2;
+			context->lfo_am_step &= 0xFE;
+			uint8_t old_pm_step = context->lfo_pm_step;
+			context->lfo_pm_step = context->lfo_am_step / 8;
+			if (context->lfo_pm_step != old_pm_step) {
+				for (int chan = 0; chan < NUM_CHANNELS; chan++)
+				{
+					if (context->channels[chan].pms) {
+						for (int op = chan * 4; op < (chan + 1) * 4; op++)
+						{
+							context->operators[op].phase_inc = ym_calc_phase_inc(context, context->operators + op, op);
+						}
+					}
+				}
+			}
+		}
+	}
+}
+
+void ym_run_envelope(ym2612_context *context, ym_channel *channel, ym_operator *operator)
+{
+	uint32_t env_cyc = context->env_counter;
+	uint8_t rate;
+	if (operator->env_phase == PHASE_DECAY && operator->envelope >= operator->sustain_level) {
+		//operator->envelope = operator->sustain_level;
+		operator->env_phase = PHASE_SUSTAIN;
+	}
+	rate = operator->rates[operator->env_phase];
+	if (rate) {
+		uint8_t ks = channel->keycode >> operator->key_scaling;;
+		rate = rate*2 + ks;
+		if (rate > 63) {
+			rate = 63;
+		}
+	}
+	uint32_t cycle_shift = rate < 0x30 ? ((0x2F - rate) >> 2) : 0;
+	if (!(env_cyc & ((1 << cycle_shift) - 1))) {
+		uint32_t update_cycle = env_cyc >> cycle_shift & 0x7;
+		uint16_t envelope_inc = rate_table[rate * 8 + update_cycle];
+		if (operator->env_phase == PHASE_ATTACK) {
+			//this can probably be optimized to a single shift rather than a multiply + shift
+			uint16_t old_env = operator->envelope;
+			operator->envelope += ((~operator->envelope * envelope_inc) >> 4) & 0xFFFFFFFC;
+			if (operator->envelope > old_env) {
+				//Handle overflow
+				operator->envelope = 0;
+			}
+			if (!operator->envelope) {
+				operator->env_phase = PHASE_DECAY;
+			}
+		} else {
+			if (operator->ssg) {
+				if (operator->envelope < SSG_CENTER) {
+					envelope_inc *= 4;
+				} else {
+					envelope_inc = 0;
+				}
+			}
+			//envelope value is 10-bits, but it will be used as a 4.8 value
+			operator->envelope += envelope_inc << 2;
+			//clamp to max attenuation value
+			if (
+				operator->envelope > MAX_ENVELOPE 
+				|| (operator->env_phase == PHASE_RELEASE && operator->envelope >= SSG_CENTER)
+			) {
+				operator->envelope = MAX_ENVELOPE;
+			}
+		}
+	}
+}
+
+void ym_run_phase(ym2612_context *context, uint32_t channel, uint32_t op)
+{
+	if (channel != 5 || !context->dac_enable) {
+		//printf("updating operator %d of channel %d\n", op, channel);
+		ym_operator * operator = context->operators + op;
+		ym_channel * chan = context->channels + channel;
+		uint16_t phase = operator->phase_counter >> 10 & 0x3FF;
+		operator->phase_counter += operator->phase_inc;//ym_calc_phase_inc(context, operator, op);
+		int16_t mod = 0;
+		if (op & 3) {
+			if (operator->mod_src[0]) {
+				mod = *operator->mod_src[0];
+				if (operator->mod_src[1]) {
+					mod += *operator->mod_src[1];
+				}
+				mod >>= YM_MOD_SHIFT;
+			}
+		} else {
+			if (chan->feedback) {
+				mod = (chan->op1_old + operator->output) >> (10-chan->feedback);
+			}
+		}
+		uint16_t env = operator->envelope;
+		if (operator->ssg) {
+			if (env >= SSG_CENTER) {
+				if (operator->ssg & SSG_ALTERNATE) {
+					if (operator->env_phase != PHASE_RELEASE && (
+						!(operator->ssg & SSG_HOLD) || ((operator->ssg ^ operator->inverted) & SSG_INVERT) == 0
+					)) {
+						operator->inverted ^= SSG_INVERT;
+					}
+				} else if (!(operator->ssg & SSG_HOLD)) {
+					phase = operator->phase_counter = 0;
+				}
+				if (
+					(operator->env_phase == PHASE_DECAY || operator->env_phase == PHASE_SUSTAIN) 
+					&& !(operator->ssg & SSG_HOLD)
+				) {
+					start_envelope(operator, chan);
+					env = operator->envelope;
+				}
+			}
+			if (operator->inverted) {
+				env = (SSG_CENTER - env) & MAX_ENVELOPE;
+			}
+		}
+		env += operator->total_level;
+		if (operator->am) {
+			uint16_t base_am = (context->lfo_am_step & 0x80 ? context->lfo_am_step : ~context->lfo_am_step) & 0x7E;
+			if (ams_shift[chan->ams] >= 0) {
+				env += (base_am >> ams_shift[chan->ams]) & MAX_ENVELOPE;
+			} else {
+				env += base_am << (-ams_shift[chan->ams]);
+			}
+		}
+		if (env > MAX_ENVELOPE) {
+			env = MAX_ENVELOPE;
+		}
+		if (first_key_on) {
+			dfprintf(debug_file, "op %d, base phase: %d, mod: %d, sine: %d, out: %d\n", op, phase, mod, sine_table[(phase+mod) & 0x1FF], pow_table[sine_table[phase & 0x1FF] + env]);
+		}
+		//if ((channel != 0 && channel != 4) || chan->algorithm != 5) {
+			phase += mod;
+		//}
+
+		int16_t output = pow_table[sine_table[phase & 0x1FF] + env];
+		if (phase & 0x200) {
+			output = -output;
+		}
+		if (op % 4 == 0) {
+			chan->op1_old = operator->output;
+		} else if (op % 4 == 2) {
+			chan->op2_old = operator->output;
+		}
+		operator->output = output;
+		//Update the channel output if we've updated all operators
+		if (op % 4 == 3) {
+			if (chan->algorithm < 4) {
+				chan->output = operator->output;
+			} else if(chan->algorithm == 4) {
+				chan->output = operator->output + context->operators[channel * 4 + 2].output;
+			} else {
+				output = 0;
+				for (uint32_t op = ((chan->algorithm == 7) ? 0 : 1) + channel*4; op < (channel+1)*4; op++) {
+					output += context->operators[op].output;
+				}
+				chan->output = output;
+			}
+			if (first_key_on) {
+				int16_t value = context->channels[channel].output & 0x3FE0;
+				if (value & 0x2000) {
+					value |= 0xC000;
+				}
+			}
+		}
+		//puts("operator update done");
+	}
+}
+
+void ym_output_sample(ym2612_context *context)
+{
+	int16_t left = 0, right = 0;
+	for (int i = 0; i < NUM_CHANNELS; i++) {
+		int16_t value = context->channels[i].output;
+		if (value > 0x1FE0) {
+			value = 0x1FE0;
+		} else if (value < -0x1FF0) {
+			value = -0x1FF0;
+		} else {
+			value &= 0x3FE0;
+			if (value & 0x2000) {
+				value |= 0xC000;
+			}
+		}
+		if (value >= 0) {
+			value += context->zero_offset;
+		} else {
+			value -= context->zero_offset;
+		}
+		if (context->channels[i].logfile) {
+			fwrite(&value, sizeof(value), 1, context->channels[i].logfile);
+		}
+		if (context->channels[i].lr & 0x80) {
+			left += (value * context->volume_mult) / context->volume_div;
+		} else if (context->zero_offset) {
+			if (value >= 0) {
+				left += (context->zero_offset * context->volume_mult) / context->volume_div;
+			} else {
+				left -= (context->zero_offset * context->volume_mult) / context->volume_div;
+			}
+		}
+		if (context->channels[i].lr & 0x40) {
+			right += (value * context->volume_mult) / context->volume_div;
+		} else if (context->zero_offset) {
+			if (value >= 0) {
+				right += (context->zero_offset * context->volume_mult) / context->volume_div;
+			} else {
+				right -= (context->zero_offset * context->volume_mult) / context->volume_div;
+			}
+		}
+	}
+	render_put_stereo_sample(context->audio, left, right);
+}
+
 void ym_run(ym2612_context * context, uint32_t to_cycle)
 {
+	if (context->current_cycle >= to_cycle) {
+		return;
+	}
 	//printf("Running YM2612 from cycle %d to cycle %d\n", context->current_cycle, to_cycle);
 	//TODO: Fix channel update order OR remap channels in register write
 	for (; context->current_cycle < to_cycle; context->current_cycle += context->clock_inc) {
 		//Update timers at beginning of 144 cycle period
 		if (!context->current_op) {
-			if (context->timer_control & BIT_TIMERA_ENABLE) {
-				if (context->timer_a != TIMER_A_MAX) {
-					context->timer_a++;
-					if (context->csm_keyon) {
-						csm_keyoff(context);
-					}
-				} else {
-					if (context->timer_control & BIT_TIMERA_LOAD) {
-						context->timer_control &= ~BIT_TIMERA_LOAD;
-					} else if (context->timer_control & BIT_TIMERA_OVEREN) {
-						context->status |= BIT_STATUS_TIMERA;
-					}
-					context->timer_a = context->timer_a_load;
-					if (!context->csm_keyon && context->ch3_mode == CSM_MODE) {
-						context->csm_keyon = 0xF0;
-						uint8_t changes = 0xF0 ^ context->channels[2].keyon;;
-						for (uint8_t op = 2*4, bit = 0; op < 3*4; op++, bit++)
-						{
-							if (changes & keyon_bits[bit]) {
-								keyon(context->operators + op, context->channels + 2);
-							}
-						}
-					}
-				}
-			}
-			if (!context->sub_timer_b) {
-				if (context->timer_control & BIT_TIMERB_ENABLE) {
-					if (context->timer_b != TIMER_B_MAX) {
-						context->timer_b++;
-					} else {
-						if (context->timer_control & BIT_TIMERB_LOAD) {
-							context->timer_control &= ~BIT_TIMERB_LOAD;
-						} else if (context->timer_control & BIT_TIMERB_OVEREN) {
-							context->status |= BIT_STATUS_TIMERB;
-						}
-						context->timer_b = context->timer_b_load;
-					}
-				}
-			}
-			context->sub_timer_b += 0x10;
-			//Update LFO
-			if (context->lfo_enable) {
-				if (context->lfo_counter) {
-					context->lfo_counter--;
-				} else {
-					context->lfo_counter = lfo_timer_values[context->lfo_freq];
-					context->lfo_am_step += 2;
-					context->lfo_am_step &= 0xFE;
-					context->lfo_pm_step = context->lfo_am_step / 8;
-				}
-			}
+			ym_run_timers(context);
 		}
 		//Update Envelope Generator
 		if (!(context->current_op % 3)) {
-			uint32_t env_cyc = context->env_counter;
 			uint32_t op = context->current_env_op;
 			ym_operator * operator = context->operators + op;
 			ym_channel * channel = context->channels + op/4;
-			uint8_t rate;
-			if (operator->env_phase == PHASE_DECAY && operator->envelope >= operator->sustain_level) {
-				//operator->envelope = operator->sustain_level;
-				operator->env_phase = PHASE_SUSTAIN;
-			}
-			rate = operator->rates[operator->env_phase];
-			if (rate) {
-				uint8_t ks = channel->keycode >> operator->key_scaling;;
-				rate = rate*2 + ks;
-				if (rate > 63) {
-					rate = 63;
-				}
-			}
-			uint32_t cycle_shift = rate < 0x30 ? ((0x2F - rate) >> 2) : 0;
-			if (first_key_on) {
-				dfprintf(debug_file, "Operator: %d, env rate: %d (2*%d+%d), env_cyc: %d, cycle_shift: %d, env_cyc & ((1 << cycle_shift) - 1): %d\n", op, rate, operator->rates[operator->env_phase], channel->keycode >> operator->key_scaling,env_cyc, cycle_shift, env_cyc & ((1 << cycle_shift) - 1));
-			}
-			if (!(env_cyc & ((1 << cycle_shift) - 1))) {
-				uint32_t update_cycle = env_cyc >> cycle_shift & 0x7;
-				uint16_t envelope_inc = rate_table[rate * 8 + update_cycle];
-				if (operator->env_phase == PHASE_ATTACK) {
-					//this can probably be optimized to a single shift rather than a multiply + shift
-					if (first_key_on) {
-						dfprintf(debug_file, "Changing op %d envelope %d by %d(%d * %d) in attack phase\n", op, operator->envelope, (~operator->envelope * envelope_inc) >> 4, ~operator->envelope, envelope_inc);
-					}
-					uint16_t old_env = operator->envelope;
-					operator->envelope += ((~operator->envelope * envelope_inc) >> 4) & 0xFFFFFFFC;
-					if (operator->envelope > old_env) {
-						//Handle overflow
-						operator->envelope = 0;
-					}
-					if (!operator->envelope) {
-						operator->env_phase = PHASE_DECAY;
-					}
-				} else {
-					if (first_key_on) {
-						dfprintf(debug_file, "Changing op %d envelope %d by %d in %s phase\n", op, operator->envelope, envelope_inc,
-							operator->env_phase == PHASE_SUSTAIN ? "sustain" : (operator->env_phase == PHASE_DECAY ? "decay": "release"));
-					}
-					if (operator->ssg) {
-						if (operator->envelope < SSG_CENTER) {
-							envelope_inc *= 4;
-						} else {
-							envelope_inc = 0;
-						}
-					}
-					//envelope value is 10-bits, but it will be used as a 4.8 value
-					operator->envelope += envelope_inc << 2;
-					//clamp to max attenuation value
-					if (
-						operator->envelope > MAX_ENVELOPE 
-						|| (operator->env_phase == PHASE_RELEASE && operator->envelope >= SSG_CENTER)
-					) {
-						operator->envelope = MAX_ENVELOPE;
-					}
-				}
-			}
+			ym_run_envelope(context, channel, operator);
 			context->current_env_op++;
 			if (context->current_env_op == NUM_OPERATORS) {
 				context->current_env_op = 0;
@@ -468,156 +650,14 @@
 		}
 
 		//Update Phase Generator
-		uint32_t channel = context->current_op / 4;
-		if (channel != 5 || !context->dac_enable) {
-			uint32_t op = context->current_op;
-			//printf("updating operator %d of channel %d\n", op, channel);
-			ym_operator * operator = context->operators + op;
-			ym_channel * chan = context->channels + channel;
-			uint16_t phase = operator->phase_counter >> 10 & 0x3FF;
-			operator->phase_counter += ym_calc_phase_inc(context, operator, context->current_op);
-			int16_t mod = 0;
-			if (op & 3) {
-				if (operator->mod_src[0]) {
-					mod = *operator->mod_src[0];
-					if (operator->mod_src[1]) {
-						mod += *operator->mod_src[1];
-					}
-					mod >>= YM_MOD_SHIFT;
-				}
-			} else {
-				if (chan->feedback) {
-					mod = (chan->op1_old + operator->output) >> (10-chan->feedback);
-				}
-			}
-			uint16_t env = operator->envelope;
-			if (operator->ssg) {
-				if (env >= SSG_CENTER) {
-					if (operator->ssg & SSG_ALTERNATE) {
-						if (operator->env_phase != PHASE_RELEASE && (
-							!(operator->ssg & SSG_HOLD) || ((operator->ssg ^ operator->inverted) & SSG_INVERT) == 0
-						)) {
-							operator->inverted ^= SSG_INVERT;
-						}
-					} else if (!(operator->ssg & SSG_HOLD)) {
-						phase = operator->phase_counter = 0;
-					}
-					if (
-						(operator->env_phase == PHASE_DECAY || operator->env_phase == PHASE_SUSTAIN) 
-						&& !(operator->ssg & SSG_HOLD)
-					) {
-						start_envelope(operator, chan);
-						env = operator->envelope;
-					}
-				}
-				if (operator->inverted) {
-					env = (SSG_CENTER - env) & MAX_ENVELOPE;
-				}
-			}
-			env += operator->total_level;
-			if (operator->am) {
-				uint16_t base_am = (context->lfo_am_step & 0x80 ? context->lfo_am_step : ~context->lfo_am_step) & 0x7E;
-				if (ams_shift[chan->ams] >= 0) {
-					env += (base_am >> ams_shift[chan->ams]) & MAX_ENVELOPE;
-				} else {
-					env += base_am << (-ams_shift[chan->ams]);
-				}
-			}
-			if (env > MAX_ENVELOPE) {
-				env = MAX_ENVELOPE;
-			}
-			if (first_key_on) {
-				dfprintf(debug_file, "op %d, base phase: %d, mod: %d, sine: %d, out: %d\n", op, phase, mod, sine_table[(phase+mod) & 0x1FF], pow_table[sine_table[phase & 0x1FF] + env]);
-			}
-			//if ((channel != 0 && channel != 4) || chan->algorithm != 5) {
-				phase += mod;
-			//}
-
-			int16_t output = pow_table[sine_table[phase & 0x1FF] + env];
-			if (phase & 0x200) {
-				output = -output;
-			}
-			if (op % 4 == 0) {
-				chan->op1_old = operator->output;
-			} else if (op % 4 == 2) {
-				chan->op2_old = operator->output;
-			}
-			operator->output = output;
-			//Update the channel output if we've updated all operators
-			if (op % 4 == 3) {
-				if (chan->algorithm < 4) {
-					chan->output = operator->output;
-				} else if(chan->algorithm == 4) {
-					chan->output = operator->output + context->operators[channel * 4 + 2].output;
-				} else {
-					output = 0;
-					for (uint32_t op = ((chan->algorithm == 7) ? 0 : 1) + channel*4; op < (channel+1)*4; op++) {
-						output += context->operators[op].output;
-					}
-					chan->output = output;
-				}
-				if (first_key_on) {
-					int16_t value = context->channels[channel].output & 0x3FE0;
-					if (value & 0x2000) {
-						value |= 0xC000;
-					}
-					dfprintf(debug_file, "channel %d output: %d\n", channel, (value * context->volume_mult) / context->volume_div);
-				}
-			}
-			//puts("operator update done");
-		}
+		ym_run_phase(context, context->current_op / 4, context->current_op);
 		context->current_op++;
 		if (context->current_op == NUM_OPERATORS) {
 			context->current_op = 0;
-			
-			int16_t left = 0, right = 0;
-			for (int i = 0; i < NUM_CHANNELS; i++) {
-				int16_t value = context->channels[i].output;
-				if (value > 0x1FE0) {
-					value = 0x1FE0;
-				} else if (value < -0x1FF0) {
-					value = -0x1FF0;
-				} else {
-					value &= 0x3FE0;
-					if (value & 0x2000) {
-						value |= 0xC000;
-					}
-				}
-				if (value >= 0) {
-					value += context->zero_offset;
-				} else {
-					value -= context->zero_offset;
-				}
-				if (context->channels[i].logfile) {
-					fwrite(&value, sizeof(value), 1, context->channels[i].logfile);
-				}
-				if (context->channels[i].lr & 0x80) {
-					left += (value * context->volume_mult) / context->volume_div;
-				} else if (context->zero_offset) {
-					if (value >= 0) {
-						left += (context->zero_offset * context->volume_mult) / context->volume_div;
-					} else {
-						left -= (context->zero_offset * context->volume_mult) / context->volume_div;
-					}
-				}
-				if (context->channels[i].lr & 0x40) {
-					right += (value * context->volume_mult) / context->volume_div;
-				} else if (context->zero_offset) {
-					if (value >= 0) {
-						right += (context->zero_offset * context->volume_mult) / context->volume_div;
-					} else {
-						right -= (context->zero_offset * context->volume_mult) / context->volume_div;
-					}
-				}
-			}
-			render_put_stereo_sample(context->audio, left, right);
+			ym_output_sample(context);
 		}
 		
 	}
-	if (context->current_cycle >= context->write_cycle + (context->busy_cycles * context->clock_inc / 6)) {
-		context->status &= 0x7F;
-		context->write_cycle = CYCLE_NEVER;
-	}
 	//printf("Done running YM2612 at cycle %d\n", context->current_cycle, to_cycle);
 }
 
@@ -626,9 +666,6 @@
 	//printf("address_write_part1: %X\n", address);
 	context->selected_reg = address;
 	context->selected_part = 0;
-	context->write_cycle = context->current_cycle;
-	context->busy_cycles = BUSY_CYCLES_ADDRESS;
-	context->status |= 0x80;
 }
 
 void ym_address_write_part2(ym2612_context * context, uint8_t address)
@@ -636,9 +673,6 @@
 	//printf("address_write_part2: %X\n", address);
 	context->selected_reg = address;
 	context->selected_part = 1;
-	context->write_cycle = context->current_cycle;
-	context->busy_cycles = BUSY_CYCLES_ADDRESS;
-	context->status |= 0x80;
 }
 
 static uint8_t fnum_to_keycode[] = {
@@ -745,8 +779,32 @@
 	return inc;
 }
 
+void ym_vgm_log(ym2612_context *context, uint32_t master_clock, vgm_writer *vgm)
+{
+	vgm_ym2612_init(vgm, 6 * master_clock / context->clock_inc);
+	context->vgm = vgm;
+	for (uint8_t reg = YM_PART1_START; reg < YM_REG_END; reg++) {
+		if ((reg >= REG_DETUNE_MULT && (reg & 3) == 3) || (reg >= 0x2D && reg < REG_DETUNE_MULT) || reg == 0x23 || reg == 0x29) {
+			//skip invalid registers
+			continue;
+		}
+		vgm_ym2612_part1_write(context->vgm, context->current_cycle, reg, context->part1_regs[reg - YM_PART1_START]);
+	}
+	
+	for (uint8_t reg = YM_PART2_START; reg < YM_REG_END; reg++) {
+		if ((reg & 3) == 3 || (reg >= REG_FNUM_LOW_CH3 && reg < REG_ALG_FEEDBACK)) {
+			//skip invalid registers
+			continue;
+		}
+		vgm_ym2612_part2_write(context->vgm, context->current_cycle, reg, context->part2_regs[reg - YM_PART2_START]);
+	}
+}
+
 void ym_data_write(ym2612_context * context, uint8_t value)
 {
+	context->write_cycle = context->current_cycle;
+	context->busy_start = context->current_cycle + context->clock_inc;
+	
 	if (context->selected_reg >= YM_REG_END) {
 		return;
 	}
@@ -754,11 +812,17 @@
 		if (context->selected_reg < YM_PART2_START) {
 			return;
 		}
+		if (context->vgm) {
+			vgm_ym2612_part2_write(context->vgm, context->current_cycle, context->selected_reg, value);
+		}
 		context->part2_regs[context->selected_reg - YM_PART2_START] = value;
 	} else {
 		if (context->selected_reg < YM_PART1_START) {
 			return;
 		}
+		if (context->vgm) {
+			vgm_ym2612_part1_write(context->vgm, context->current_cycle, context->selected_reg, value);
+		}
 		context->part1_regs[context->selected_reg - YM_PART1_START] = value;
 	}
 	dfprintf(debug_file, "write of %X to reg %X in part %d\n", value, context->selected_reg, context->selected_part+1);
@@ -773,7 +837,19 @@
 			}*/
 			context->lfo_enable = value & 0x8;
 			if (!context->lfo_enable) {
+				uint8_t old_pm_step = context->lfo_pm_step;
 				context->lfo_am_step = context->lfo_pm_step = 0;
+				if (old_pm_step) {
+					for (int chan = 0; chan < NUM_CHANNELS; chan++)
+					{
+						if (context->channels[chan].pms) {
+							for (int op = chan * 4; op < (chan + 1) * 4; op++)
+							{
+								context->operators[op].phase_inc = ym_calc_phase_inc(context, context->operators + op, op);
+							}
+						}
+					}
+				}
 			}
 			context->lfo_freq = value & 0x7;
 
@@ -809,7 +885,14 @@
 			if (context->ch3_mode == CSM_MODE && (value & 0xC0) != CSM_MODE && context->csm_keyon) {
 				csm_keyoff(context);
 			}
+			uint8_t old_mode = context->ch3_mode;
 			context->ch3_mode = value & 0xC0;
+			if (context->ch3_mode != old_mode) {
+				for (int op = 2 * 4; op < 3*4; op++)
+				{
+					context->operators[op].phase_inc = ym_calc_phase_inc(context, context->operators + op, op);
+				}
+			}
 			break;
 		}
 		case REG_KEY_ONOFF: {
@@ -861,6 +944,7 @@
 			case REG_DETUNE_MULT:
 				operator->detune = value >> 4 & 0x7;
 				operator->multiple = value & 0xF;
+				operator->phase_inc = ym_calc_phase_inc(context, operator, op);
 				break;
 			case REG_TOTAL_LEVEL:
 				operator->total_level = (value & 0x7F) << 5;
@@ -907,6 +991,10 @@
 				context->channels[channel].block = context->channels[channel].block_fnum_latch >> 3 & 0x7;
 				context->channels[channel].fnum = (context->channels[channel].block_fnum_latch & 0x7) << 8 | value;
 				context->channels[channel].keycode = context->channels[channel].block << 2 | fnum_to_keycode[context->channels[channel].fnum >> 7];
+				for (int op = channel * 4; op < (channel + 1) * 4; op++)
+				{
+					context->operators[op].phase_inc = ym_calc_phase_inc(context, context->operators + op, op);
+				}
 				break;
 			case REG_BLOCK_FNUM_H:{
 				context->channels[channel].block_fnum_latch = value;
@@ -917,6 +1005,10 @@
 					context->ch3_supp[channel].block = context->ch3_supp[channel].block_fnum_latch >> 3 & 0x7;
 					context->ch3_supp[channel].fnum = (context->ch3_supp[channel].block_fnum_latch & 0x7) << 8 | value;
 					context->ch3_supp[channel].keycode = context->ch3_supp[channel].block << 2 | fnum_to_keycode[context->ch3_supp[channel].fnum >> 7];
+					if (context->ch3_mode) {
+						int op = 2 * 4 + (channel < 2 ? (channel ^ 1) : channel);
+						context->operators[op].phase_inc = ym_calc_phase_inc(context, context->operators + op, op);
+					}
 				}
 				break;
 			case REG_BLOCK_FN_CH3:
@@ -1042,24 +1134,44 @@
 				context->channels[channel].feedback = value >> 3 & 0x7;
 				//printf("Algorithm %d, feedback %d for channel %d\n", value & 0x7, value >> 3 & 0x7, channel);
 				break;
-			case REG_LR_AMS_PMS:
+			case REG_LR_AMS_PMS: {
+				uint8_t old_pms = context->channels[channel].pms;
 				context->channels[channel].pms = (value & 0x7) * 32;
 				context->channels[channel].ams = value >> 4 & 0x3;
 				context->channels[channel].lr = value & 0xC0;
+				if (old_pms != context->channels[channel].pms) {
+					for (int op = channel * 4; op < (channel + 1) * 4; op++)
+					{
+						context->operators[op].phase_inc = ym_calc_phase_inc(context, context->operators + op, op);
+					}
+				}
 				//printf("Write of %X to LR_AMS_PMS reg for channel %d\n", value, channel);
 				break;
 			}
+			}
 		}
 	}
-
-	context->write_cycle = context->current_cycle;
-	context->busy_cycles = context->selected_reg < 0xA0 ? BUSY_CYCLES_DATA_LOW : BUSY_CYCLES_DATA_HIGH;
-	context->status |= 0x80;
 }
 
-uint8_t ym_read_status(ym2612_context * context)
+uint8_t ym_read_status(ym2612_context * context, uint32_t cycle, uint32_t port)
 {
-	return context->status;
+	uint8_t status;
+	port &= context->status_address_mask;
+	if (port) {
+		if (context->last_status_cycle != CYCLE_NEVER && cycle - context->last_status_cycle > context->invalid_status_decay) {
+			context->last_status = 0;
+		}
+		status = context->last_status;
+	} else {
+		status = context->status;
+		if (cycle >= context->busy_start && cycle < context->busy_start + context->busy_cycles) {
+			status |= 0x80;
+		}
+		context->last_status = status;
+		context->last_status_cycle = cycle;
+	}
+	return status;
+		
 }
 
 void ym_print_channel_info(ym2612_context *context, int channel)
@@ -1171,7 +1283,10 @@
 	save_int8(buf, context->selected_part);
 	save_int32(buf, context->current_cycle);
 	save_int32(buf, context->write_cycle);
-	save_int32(buf, context->busy_cycles);
+	save_int32(buf, context->busy_start);
+	save_int32(buf, context->last_status_cycle);
+	save_int32(buf, context->invalid_status_decay);
+	save_int8(buf, context->last_status);
 }
 
 void ym_deserialize(deserialize_buffer *buf, void *vcontext)
@@ -1246,5 +1361,13 @@
 	context->selected_part = load_int8(buf);
 	context->current_cycle = load_int32(buf);
 	context->write_cycle = load_int32(buf);
-	context->busy_cycles = load_int32(buf);
+	context->busy_start = load_int32(buf);
+	if (buf->size > buf->cur_pos) {
+		context->last_status_cycle = load_int32(buf);
+		context->invalid_status_decay = load_int32(buf);
+		context->last_status = load_int8(buf);
+	} else {
+		context->last_status = context->status;
+		context->last_status_cycle = context->write_cycle;
+	}
 }
--- a/ym2612.h	Thu Apr 18 22:06:47 2019 -0700
+++ b/ym2612.h	Sat Apr 18 11:42:53 2020 -0700
@@ -9,17 +9,20 @@
 #include <stdint.h>
 #include <stdio.h>
 #include "serialize.h"
-#include "render.h"
+#include "render_audio.h"
+#include "vgm.h"
 
 #define NUM_PART_REGS (0xB7-0x30)
 #define NUM_CHANNELS 6
 #define NUM_OPERATORS (4*NUM_CHANNELS)
 
 #define YM_OPT_WAVE_LOG 1
+#define YM_OPT_3834 2
 
 typedef struct {
 	int16_t  *mod_src[2];
 	uint32_t phase_counter;
+	uint32_t phase_inc;
 	uint16_t envelope;
 	int16_t  output;
 	uint16_t total_level;
@@ -66,11 +69,15 @@
 
 typedef struct {
 	audio_source *audio;
+	vgm_writer  *vgm;
     uint32_t    clock_inc;
 	uint32_t    current_cycle;
-	//TODO: Condense the next two fields into one
 	uint32_t    write_cycle;
+	uint32_t    busy_start;
 	uint32_t    busy_cycles;
+	uint32_t    last_status_cycle;
+	uint32_t    invalid_status_decay;
+	uint32_t    status_address_mask;
 	int32_t     volume_mult;
 	int32_t     volume_div;
 	ym_operator operators[NUM_OPERATORS];
@@ -96,6 +103,7 @@
 	uint8_t     lfo_pm_step;
 	uint8_t     csm_keyon;
 	uint8_t     status;
+	uint8_t     last_status;
 	uint8_t     selected_reg;
 	uint8_t     selected_part;
 	uint8_t     part1_regs[YM_PART1_REGS];
@@ -133,11 +141,13 @@
 void ym_free(ym2612_context *context);
 void ym_enable_zero_offset(ym2612_context *context, uint8_t enabled);
 void ym_adjust_master_clock(ym2612_context * context, uint32_t master_clock);
+void ym_adjust_cycles(ym2612_context *context, uint32_t deduction);
 void ym_run(ym2612_context * context, uint32_t to_cycle);
 void ym_address_write_part1(ym2612_context * context, uint8_t address);
 void ym_address_write_part2(ym2612_context * context, uint8_t address);
 void ym_data_write(ym2612_context * context, uint8_t value);
-uint8_t ym_read_status(ym2612_context * context);
+void ym_vgm_log(ym2612_context *context, uint32_t master_clock, vgm_writer *vgm);
+uint8_t ym_read_status(ym2612_context * context, uint32_t cycle, uint32_t port);
 uint8_t ym_load_gst(ym2612_context * context, FILE * gstfile);
 uint8_t ym_save_gst(ym2612_context * context, FILE * gstfile);
 void ym_print_channel_info(ym2612_context *context, int channel);
--- a/z80_to_x86.c	Thu Apr 18 22:06:47 2019 -0700
+++ b/z80_to_x86.c	Sat Apr 18 11:42:53 2020 -0700
@@ -2327,7 +2327,7 @@
 		if (inst->addr_mode == Z80_IMMED_INDIRECT) {
 			mov_ir(code, inst->immed, opts->gen.scratch1, SZ_B);
 		} else {
-			zreg_to_native(opts, Z80_C, opts->gen.scratch2);
+			zreg_to_native(opts, Z80_C, opts->gen.scratch1);
 		}
 		call(code, opts->read_io);
 		if (inst->addr_mode != Z80_IMMED_INDIRECT) {
@@ -3413,6 +3413,9 @@
 	add_rdispr(code, options->gen.context_reg, offsetof(z80_context, target_cycle), options->gen.cycles, SZ_D);
 	cmp_rdispr(code, options->gen.context_reg, offsetof(z80_context, int_cycle), options->gen.cycles, SZ_D);
 	jcc(code, CC_B, skip_int);
+	//check that we are not past the end of interrupt pulse
+	cmp_rrdisp(code, options->gen.cycles, options->gen.context_reg, offsetof(z80_context, int_pulse_end), SZ_D);
+	jcc(code, CC_B, skip_int);
 	//set limit to the cycle limit
 	mov_rdispr(code, options->gen.context_reg, offsetof(z80_context, sync_cycle), options->gen.scratch2, SZ_D);
 	mov_rrdisp(code, options->gen.scratch2, options->gen.context_reg, offsetof(z80_context, target_cycle), SZ_D);
--- a/z80inst.c	Thu Apr 18 22:06:47 2019 -0700
+++ b/z80inst.c	Sat Apr 18 11:42:53 2020 -0700
@@ -1570,7 +1570,7 @@
 uint8_t z80_is_terminal(z80inst * inst)
 {
 	return inst->op == Z80_RET || inst->op == Z80_RETI || inst->op == Z80_RETN || inst->op == Z80_JP
-		|| inst->op == Z80_JR || inst->op == Z80_HALT || (inst->op == Z80_NOP && inst->immed == 42);
+		|| inst->op == Z80_JR || (inst->op == Z80_NOP && inst->immed == 42);
 }
 
 
--- a/ztestrun.c	Thu Apr 18 22:06:47 2019 -0700
+++ b/ztestrun.c	Sat Apr 18 11:42:53 2020 -0700
@@ -4,7 +4,7 @@
  BlastEm is free software distributed under the terms of the GNU General Public License version 3 or greater. See COPYING for full license text.
 */
 #include "z80inst.h"
-#ifndef USE_NATIVE
+#ifdef NEW_CORE
 #include "z80.h"
 #include <string.h>
 #else
@@ -47,7 +47,7 @@
 	{ 0x0000, 0x100, 0xFF, 0, 0, 0,                                  NULL,    NULL, NULL, z80_unmapped_read, z80_unmapped_write}
 };
 
-#ifdef USE_NATIVE
+#ifndef NEW_CORE
 void z80_next_int_pulse(z80_context * context)
 {
 	context->int_pulse_start = context->int_pulse_end = CYCLE_NEVER;
@@ -98,7 +98,7 @@
 	fclose(f);
 	init_z80_opts(&opts, z80_map, 2, port_map, 1, 1, 0xFF);
 	context = init_z80_context(&opts);
-#ifndef USE_NATIVE
+#ifdef NEW_CORE
 	z80_execute(context, 1000);
 	printf("A: %X\nB: %X\nC: %X\nD: %X\nE: %X\nHL: %X\nIX: %X\nIY: %X\nSP: %X\n\nIM: %d, IFF1: %d, IFF2: %d\n",
 		context->main[7], context->main[0], context->main[1],