changeset 2053:3414a4423de1 segacd

Merge from default
author Michael Pavone <pavone@retrodev.com>
date Sat, 15 Jan 2022 13:15:21 -0800
parents 5dacaef602a7 (current diff) 3748a2a8a4b7 (diff)
children 8ee7ecbf3f21
files Makefile blastem.c genesis.c genesis.h romdb.c romdb.h system.c system.h
diffstat 115 files changed, 21387 insertions(+), 3005 deletions(-) [+]
line wrap: on
line diff
--- a/.hgtags	Sat Jan 05 00:58:08 2019 -0800
+++ b/.hgtags	Sat Jan 15 13:15:21 2022 -0800
@@ -10,3 +10,4 @@
 3d48cb0c28be9045866e00795b698086018b825f v0.5.1
 ef50c9affe6a7c86398f2c36eb5439a559808108 v0.6.0
 357b4951d9b2d1999e4c2765ee53e946aaab864d v0.6.1
+8aeac7bd9fa7d9d978c99ec07e9a68989a12e453 v0.6.2
--- a/Android.mk	Sat Jan 05 00:58:08 2019 -0800
+++ b/Android.mk	Sat Jan 15 13:15:21 2022 -0800
@@ -8,14 +8,21 @@
 
 LOCAL_C_INCLUDES := $(LOCAL_PATH)/$(SDL_PATH)/include
 
-LOCAL_CFLAGS += -std=gnu99 -DX86_32 -DDISABLE_OPENGL
+LOCAL_CFLAGS += -std=gnu99 -DX86_32 -DUSE_GLES
 
 # Add your application source files here...
 LOCAL_SRC_FILES := $(SDL_PATH)/src/main/android/SDL_android_main.c \
 	68kinst.c debug.c gst.c psg.c z80_to_x86.c backend.c io.c render_sdl.c \
 	tern.c backend_x86.c gdb_remote.c m68k_core.c romdb.c m68k_core_x86.c \
 	util.c wave.c blastem.c gen.c mem.c vdp.c ym2612.c config.c gen_x86.c \
-	terminal.c z80inst.c menu.c arena.c
+	terminal.c z80inst.c menu.c arena.c zlib/adler32.c zlib/compress.c \
+	zlib/crc32.c zlib/deflate.c zlib/gzclose.c zlib/gzlib.c zlib/gzread.c \
+	zlib/gzwrite.c zlib/infback.c zlib/inffast.c zlib/inflate.c \
+	zlib/inftrees.c zlib/trees.c zlib/uncompr.c zlib/zutil.c \
+	nuklear_ui/font_android.c nuklear_ui/blastem_nuklear.c nuklear_ui/sfnt.c \
+	ppm.c controller_info.c png.c system.c genesis.c sms.c serialize.c \
+	saves.c hash.c xband.c zip.c bindings.c jcart.c paths.c megawifi.c \
+	nor.c i2c.c sega_mapper.c realtec.c multi_game.c net.c
 
 LOCAL_SHARED_LIBRARIES := SDL2
 
--- a/CHANGELOG	Sat Jan 05 00:58:08 2019 -0800
+++ b/CHANGELOG	Sat Jan 15 13:15:21 2022 -0800
@@ -1,3 +1,33 @@
+0.6.2
+-----
+*New Features*
+
+ - Zipped and gzipped SMD ROMs are now supported
+ - Gain control for overall volume and FM/PSG invidually
+ 
+*Accuracy/Completeness Improvements*
+
+ - Fixed timing of a few instructions in Z80 core
+ - Added optional emulation of YM2612 imperfections (aka "ladder effect")
+ - Fixed some unintentional extra precision in some FM LFO calculations
+ - Added a 1 sample delay in some FM operator results when used as modulators to match hardware
+
+*Bugfixes*
+
+ - Fixed regression in NBA JAM TE and possibly other 32MBit Acclaim mapper titles
+ - Added code to handle controllers that have their d-pads mapped as buttons or axes
+ - Removed some problematic SDL2 game controller mappings
+ - Fixed crash that occurred when releasing mouse too clickly when loading a ROM
+ - Fixed SMD ROM support
+ - Fixed handling of audio contexts with more or less than 2 channels
+ - Fixed off-by-one error in IO device selection UI
+ - Fixed regression in GDB remote debugging support on Linux and OS X
+
+*Other Changes*
+
+ - MegaWiFi hardware can now be enabled by a header string (still gated by config)
+ - Tweaked the style of checkboxes in the Nuklear UI to hopefully make the on/off state more clear
+
 0.6.1
 -----
 *Bugfixes*
@@ -40,7 +70,7 @@
  - Added support for Open GL ES in addition to the existing desktop GL support
  - Some small optimizations
  - Added ROM DB entry for Squirrel King to support it's copy protection
- - Added support for float32 audio output (fixes an issue with defautl SDL2 driver in Windows when using more recent SDL2 versions)
+ - Added support for float32 audio output (fixes an issue with default SDL2 driver in Windows when using more recent SDL2 versions)
  
 0.5.1
 -----
--- a/Makefile	Sat Jan 05 00:58:08 2019 -0800
+++ b/Makefile	Sat Jan 15 13:15:21 2022 -0800
@@ -1,28 +1,43 @@
+#disable built-in rules
+.SUFFIXES :
+
 ifndef OS
 OS:=$(shell uname -s)
 endif
 FIXUP:=true
 
+BUNDLED_LIBZ:=zlib/adler32.o zlib/compress.o zlib/crc32.o zlib/deflate.o zlib/gzclose.o zlib/gzlib.o zlib/gzread.o\
+	zlib/gzwrite.o zlib/infback.o zlib/inffast.o zlib/inflate.o zlib/inftrees.o zlib/trees.o zlib/uncompr.o zlib/zutil.o
+
 ifeq ($(OS),Windows)
-ifndef SDL2_PREFIX
-SDL2_PREFIX:="sdl/i686-w64-mingw32"
-endif
-ifndef GLEW_PREFIX
+
 GLEW_PREFIX:=glew
-endif
-ifndef GLEW32S_LIB
-GLEW32S_LIB:=$(GLEW_PREFIX)/lib/Release/Win32/glew32s.lib
-endif
-
 MEM:=mem_win.o
 TERMINAL:=terminal_win.o
 FONT:=nuklear_ui/font_win.o
 NET:=net_win.o
 EXE:=.exe
+SO:=dll
+CPU:=i686
+ifeq ($(CPU),i686)
 CC:=i686-w64-mingw32-gcc-win32
-CFLAGS:=-std=gnu99 -Wreturn-type -Werror=return-type -Werror=implicit-function-declaration -I"$(SDL2_PREFIX)/include/SDL2" -I"$(GLEW_PREFIX)/include" -DGLEW_STATIC
-LDFLAGS:= $(GLEW32S_LIB) -L"$(SDL2_PREFIX)/lib" -lm -lmingw32 -lSDL2main -lSDL2 -lws2_32 -lopengl32 -lglu32 -mwindows
-CPU:=i686
+WINDRES:=i686-w64-mingw32-windres
+GLUDIR:=Win32
+SDL2_PREFIX:="sdl/i686-w64-mingw32"
+else
+CC:=x86_64-w64-mingw32-gcc-win32
+WINDRES:=x86_64-w64-mingw32-windres
+SDL2_PREFIX:="sdl/x86_64-w64-mingw32"
+GLUDIR:=x64
+endif
+GLEW32S_LIB:=$(GLEW_PREFIX)/lib/Release/$(GLUDIR)/glew32s.lib
+CFLAGS:=-std=gnu99 -Wreturn-type -Werror=return-type -Werror=implicit-function-declaration -Wpointer-arith -Werror=pointer-arith
+LDFLAGS:=-lm -lmingw32 -lws2_32 -mwindows
+ifneq ($(MAKECMDGOALS),libblastem.dll)
+CFLAGS+= -I"$(SDL2_PREFIX)/include/SDL2" -I"$(GLEW_PREFIX)/include" -DGLEW_STATIC
+LDFLAGS+= $(GLEW32S_LIB) -L"$(SDL2_PREFIX)/lib" -lSDL2main -lSDL2 -lopengl32 -lglu32
+endif
+LIBZOBJS=$(BUNDLED_LIBZ)
 
 else
 
@@ -32,11 +47,21 @@
 EXE:=
 
 HAS_PROC:=$(shell if [ -d /proc ]; then /bin/echo -e -DHAS_PROC; fi)
-CFLAGS:=-std=gnu99 -Wreturn-type -Werror=return-type -Werror=implicit-function-declaration -Wno-unused-value $(HAS_PROC) -DHAVE_UNISTD_H
+CFLAGS:=-std=gnu99 -Wreturn-type -Werror=return-type -Werror=implicit-function-declaration -Wno-unused-value  -Wpointer-arith -Werror=pointer-arith $(HAS_PROC) -DHAVE_UNISTD_H
 
 ifeq ($(OS),Darwin)
 LIBS=sdl2 glew
 FONT:=nuklear_ui/font_mac.o
+SO:=dylib
+else
+SO:=so
+
+ifdef USE_FBDEV
+LIBS=alsa
+ifndef NOGL
+LIBS+=glesv2 egl
+endif
+CFLAGS+= -DUSE_GLES -DUSE_FBDEV -pthread
 else
 ifdef USE_GLES
 LIBS=sdl2 glesv2
@@ -44,9 +69,17 @@
 else
 LIBS=sdl2 glew gl
 endif #USE_GLES
+endif #USE_FBDEV
 FONT:=nuklear_ui/font.o
 endif #Darwin
 
+ifdef HOST_ZLIB
+LIBS+= zlib
+LIBZOBJS=
+else
+LIBZOBJS=$(BUNDLED_LIBZ)
+endif
+
 ifeq ($(OS),Darwin)
 #This should really be based on whether or not the C compiler is clang rather than based on the OS
 CFLAGS+= -Wno-logical-op-parentheses
@@ -63,20 +96,29 @@
 endif
 
 ifeq ($(OS),Darwin)
-CFLAGS+= -IFrameworks/SDL2.framework/Headers
-LDFLAGS+= -FFrameworks -framework SDL2 -framework OpenGL -framework AppKit
+SDL_INCLUDE_PATH:=Frameworks/SDL2.framework/Headers
+CFLAGS+=  -mmacosx-version-min=10.10
+LDFLAGS+= -FFrameworks -framework SDL2 -framework OpenGL -framework AppKit -mmacosx-version-min=10.10
 FIXUP:=install_name_tool -change @rpath/SDL2.framework/Versions/A/SDL2 @executable_path/Frameworks/SDL2.framework/Versions/A/SDL2
 else
-CFLAGS+= -Isdl/include
+SDL_INCLUDE_PATH:=sdl/include
 LDFLAGS+= -Wl,-rpath='$$ORIGIN/lib' -Llib -lSDL2
 ifndef USE_GLES
 LDFLAGS+= $(shell pkg-config --libs gl)
 endif
 endif #Darwin
+CFLAGS+= -I$(SDL_INCLUDE_PATH)
 
 else
+ifeq ($(MAKECMDGOALS),libblastem.$(SO))
+LDFLAGS:=-lm
+else
 CFLAGS:=$(shell pkg-config --cflags-only-I $(LIBS)) $(CFLAGS)
 LDFLAGS:=-lm $(shell pkg-config --libs $(LIBS))
+ifdef USE_FBDEV
+LDFLAGS+= -pthread
+endif
+endif #libblastem.so
 
 ifeq ($(OS),Darwin)
 LDFLAGS+= -framework OpenGL -framework AppKit
@@ -105,11 +147,11 @@
 endif
 
 ifdef PROFILE
-LDFLAGS+= -Wl,--no-as-needed -lprofiler -Wl,--as-needed
+PROFFLAGS:= -Wl,--no-as-needed -lprofiler -Wl,--as-needed
+CFLAGS+= -g3
 endif
 ifdef NOGL
 CFLAGS+= -DDISABLE_OPENGL
-NONUKLEAR:=1
 endif
 
 ifdef M68030
@@ -136,24 +178,33 @@
 endif
 
 TRANSOBJS=gen.o backend.o $(MEM) arena.o tern.o
-M68KOBJS=68kinst.o m68k_core.o
+M68KOBJS=68kinst.o
+
+ifdef NEW_CORE
+Z80OBJS=z80.o z80inst.o 
+M68KOBJS+= m68k.o
+CFLAGS+= -DNEW_CORE
+else
+Z80OBJS=z80inst.o z80_to_x86.o
 ifeq ($(CPU),x86_64)
-M68KOBJS+= m68k_core_x86.o
+M68KOBJS+= m68k_core.o m68k_core_x86.o
 TRANSOBJS+= gen_x86.o backend_x86.o
 else
 ifeq ($(CPU),i686)
-M68KOBJS+= m68k_core_x86.o
+M68KOBJS+= m68k_core.o m68k_core_x86.o
 TRANSOBJS+= gen_x86.o backend_x86.o
 endif
 endif
-
-Z80OBJS=z80inst.o z80_to_x86.o
-AUDIOOBJS=ym2612.o psg.o wave.o
+endif
+AUDIOOBJS=ym2612.o psg.o wave.o vgm.o event_log.o render_audio.o
 CONFIGOBJS=config.o tern.o util.o paths.o 
-NUKLEAROBJS=$(FONT) nuklear_ui/blastem_nuklear.o nuklear_ui/sfnt.o controller_info.o
-RENDEROBJS=render_sdl.o ppm.o
-LIBZOBJS=zlib/adler32.o zlib/compress.o zlib/crc32.o zlib/deflate.o zlib/gzclose.o zlib/gzlib.o zlib/gzread.o\
-	zlib/gzwrite.o zlib/infback.o zlib/inffast.o zlib/inflate.o zlib/inftrees.o zlib/trees.o zlib/uncompr.o zlib/zutil.o
+NUKLEAROBJS=$(FONT) nuklear_ui/blastem_nuklear.o nuklear_ui/sfnt.o
+RENDEROBJS=ppm.o controller_info.o
+ifdef USE_FBDEV
+RENDEROBJS+= render_fbdev.o
+else
+RENDEROBJS+= render_sdl.o
+endif
 	
 ifdef NOZLIB
 CFLAGS+= -DDISABLE_ZLIB
@@ -163,7 +214,11 @@
 
 MAINOBJS=blastem.o system.o genesis.o debug.o gdb_remote.o vdp.o $(RENDEROBJS) io.o romdb.o hash.o menu.o xband.o \
 	realtec.o i2c.o nor.o sega_mapper.o multi_game.o megawifi.o $(NET) serialize.o $(TERMINAL) $(CONFIGOBJS) gst.o \
-	$(M68KOBJS) $(TRANSOBJS) $(AUDIOOBJS) saves.o zip.o bindings.o jcart.o segacd.o
+	$(M68KOBJS) $(TRANSOBJS) $(AUDIOOBJS) saves.o zip.o bindings.o jcart.o gen_player.o segacd.o
+
+LIBOBJS=libblastem.o system.o genesis.o debug.o gdb_remote.o vdp.o io.o romdb.o hash.o xband.o realtec.o \
+	i2c.o nor.o sega_mapper.o multi_game.o megawifi.o $(NET) serialize.o $(TERMINAL) $(CONFIGOBJS) gst.o \
+	$(M68KOBJS) $(TRANSOBJS) $(AUDIOOBJS) saves.o jcart.o rom.db.o gen_player.o segacd.o $(LIBZOBJS)
 	
 ifdef NONUKLEAR
 CFLAGS+= -DDISABLE_NUKLEAR
@@ -187,26 +242,49 @@
 CFLAGS+=-DNO_Z80
 else
 MAINOBJS+= sms.o $(Z80OBJS)
+LIBOBJS+= sms.o $(Z80OBJS)
 endif
 
 ifeq ($(OS),Windows)
 MAINOBJS+= res.o
 endif
 
-ALL=dis$(EXE) zdis$(EXE) stateview$(EXE) vgmplay$(EXE) blastem$(EXE)
+ifdef CONFIG_PATH
+CFLAGS+= -DCONFIG_PATH='"'$(CONFIG_PATH)'"'
+endif
+
+ifdef DATA_PATH
+CFLAGS+= -DDATA_PATH='"'$(DATA_PATH)'"'
+endif
+
+ifdef FONT_PATH
+CFLAGS+= -DFONT_PATH='"'$(FONT_PATH)'"'
+endif
+
+ALL=dis$(EXE) zdis$(EXE) vgmplay$(EXE) blastem$(EXE)
 ifneq ($(OS),Windows)
 ALL+= termhelper
 endif
 
+ifeq ($(MAKECMDGOALS),libblastem.$(SO))
+CFLAGS+= -fpic -DIS_LIB
+endif
+
 all : $(ALL)
 
+libblastem.$(SO) : $(LIBOBJS)
+	$(CC) -shared -o $@ $^ $(LDFLAGS)
+
 blastem$(EXE) : $(MAINOBJS)
-	$(CC) -o $@ $^ $(LDFLAGS)
+	$(CC) -o $@ $^ $(LDFLAGS) $(PROFFLAGS)
 	$(FIXUP) ./$@
 	
 blastjag$(EXE) : jaguar.o jag_video.o $(RENDEROBJS) serialize.o $(M68KOBJS) $(TRANSOBJS) $(CONFIGOBJS)
 	$(CC) -o $@ $^ $(LDFLAGS)
 
+termhelper : termhelper.o
+	$(CC) -o $@ $^ $(LDFLAGS)
+
 dis$(EXE) : dis.o 68kinst.o tern.o vos_program_module.o
 	$(CC) -o $@ $^ $(OPT)
 	
@@ -220,27 +298,23 @@
 	ar rcs libemu68k.a $(M68KOBJS) $(TRANSOBJS)
 
 trans : trans.o serialize.o $(M68KOBJS) $(TRANSOBJS) util.o
-	$(CC) -o trans trans.o $(M68KOBJS) $(TRANSOBJS) util.o $(OPT)
+	$(CC) -o $@ $^ $(OPT)
 
 transz80 : transz80.o $(Z80OBJS) $(TRANSOBJS)
 	$(CC) -o transz80 transz80.o $(Z80OBJS) $(TRANSOBJS)
 
 ztestrun : ztestrun.o serialize.o $(Z80OBJS) $(TRANSOBJS)
-	$(CC) -o ztestrun ztestrun.o $(Z80OBJS) $(TRANSOBJS) $(OPT)
+	$(CC) -o ztestrun $^ $(OPT)
 
 ztestgen : ztestgen.o z80inst.o
 	$(CC) -ggdb -o ztestgen ztestgen.o z80inst.o
 
-stateview$(EXE) : stateview.o vdp.o $(RENDEROBJS) serialize.o $(CONFIGOBJS) gst.o
-	$(CC) -o $@ $^ $(LDFLAGS)
-	$(FIXUP) ./$@
-
 vgmplay$(EXE) : vgmplay.o $(RENDEROBJS) serialize.o $(CONFIGOBJS) $(AUDIOOBJS)
 	$(CC) -o $@ $^ $(LDFLAGS)
 	$(FIXUP) ./$@
 
 blastcpm : blastcpm.o util.o serialize.o $(Z80OBJS) $(TRANSOBJS)
-	$(CC) -o $@ $^ $(OPT)
+	$(CC) -o $@ $^ $(OPT) $(PROFFLAGS)
 
 test : test.o vdp.o
 	$(CC) -o test test.o vdp.o
@@ -266,6 +340,15 @@
 vos_prog_info : vos_prog_info.o vos_program_module.o
 	$(CC) -o vos_prog_info vos_prog_info.o vos_program_module.o
 
+m68k.c : m68k.cpu cpu_dsl.py
+	./cpu_dsl.py -d call $< > $@
+
+%.c : %.cpu cpu_dsl.py
+	./cpu_dsl.py -d goto $< > $@
+
+%.db.c : %.db
+	sed $< -e 's/"/\\"/g' -e 's/^\(.*\)$$/"\1\\n"/' -e'1s/^\(.*\)$$/const char $(shell echo $< | tr '.' '_')_data[] = \1/' -e '$$s/^\(.*\)$$/\1;/' > $@
+
 %.o : %.S
 	$(CC) -c -o $@ $<
 
@@ -284,10 +367,13 @@
 %.bin : %.s68
 	vasmm68k_mot -Fbin -m68000 -no-opt -spaces -o $@ -L $@.list $<
 
+%.md : %.s68
+	vasmm68k_mot -Fbin -m68000 -no-opt -spaces -o $@ -L $@.list $<
+
 %.bin : %.sz8
 	vasmz80_mot -Fbin -spaces -o $@ $<
 res.o : blastem.rc
-	i686-w64-mingw32-windres blastem.rc res.o
+	$(WINDRES) blastem.rc res.o
 
 arrow.tiles : arrow.png
 cursor.tiles : cursor.png
@@ -296,6 +382,7 @@
 font.tiles : font.png
 
 menu.bin : font_interlace_variable.tiles arrow.tiles cursor.tiles button.tiles font.tiles
+tmss.md : font.tiles
 
 clean :
 	rm -rf $(ALL) trans ztestrun ztestgen *.o nuklear_ui/*.o zlib/*.o
--- a/README	Sat Jan 05 00:58:08 2019 -0800
+++ b/README	Sat Jan 15 13:15:21 2022 -0800
@@ -281,6 +281,22 @@
 at least some Genesis/Megadrive models. Other models reportedly use an even
 lower value.
 
+"gain" specifies the gain in decibels to be applied to the overall output.
+
+"fm_gain" specifies the gain to be applied to the emulated FM output before
+mixing with the PSG.
+
+"psg_gain" specifies the gain to be applied to the emulated PSG output before
+mixing with the FM chip.
+
+"fm_dac" controls the characteristics of the DAC in the emulated FM chip. If
+this is set to "linear", then the DAC will have precise linear output similar
+to the integrated YM3438 in later Gen/MD consoles. If it is set to "zero_offset",
+there will be a larger gap between -1 and 0. This is commonly referred to as the
+"ladder effect". This will also cause "leakage" on channels that are muted or
+panned to one side in a similar manner to a discrete YM2612.
+
+
 Clocks
 ------
 
@@ -495,6 +511,15 @@
 Eke-Eke            - Eke-Eke wrote a great document on the use of I2C EEPROM in
                      Genesis games and also left some useful very helpful 
                      comments about problematic games in Genesis Plus GX
+					 
+Sauraen            - Sauraen has analyzed the YM2203 and YM2612 dies and written
+                     a VHDL operator implementation. These have been useful in
+                     improving the accuracy of my YM2612 core.
+
+Alexey Khokholov   - Alexey (aka Nuke.YKT) has analyzed the YM3438 die and written
+                     a fairly direct C implementation from that analysis. This
+                     has been a useful reference for verifying and improving my
+                     YM2612 core.
 
 Bart Trzynadlowski - His documents on the Genecyst save-state format and the
                      mapper used in Super Street Fighter 2 were definitely
@@ -530,5 +555,6 @@
 modify the program as long as you follow the terms of the license. See the file
 COPYING for full license details.
 
-Binary releases of BlastEm are packaged with GLEW and SDL2 which have thier own
-licenses. See GLEW-LICENSE and SDL-LICENSE for details.
+Binary releases of BlastEm are packaged with GLEW, SDL2 and zlib which have their
+own licenses. See GLEW-LICENSE and SDL-LICENSE for details. For zlib license
+information, please see zlib.h in the source code release.
--- a/android/assets/default.cfg	Sat Jan 05 00:58:08 2019 -0800
+++ b/android/assets/default.cfg	Sat Jan 15 13:15:21 2022 -0800
@@ -14,9 +14,14 @@
 		f gamepads.1.mode
 		enter gamepads.1.start
 
+		r ui.release_mouse
 		[ ui.vdp_debug_mode
-		] ui.vdp_debug_pal
 		u ui.enter_debugger
+		p ui.screenshot
+		b ui.plane_debug
+		v ui.vram_debug
+		c ui.cram_debug
+		n ui.compositing_debug
 		esc ui.exit
 		` ui.save_state
 		0 ui.set_speed.0
@@ -29,6 +34,11 @@
 		7 ui.set_speed.7
 		= ui.next_speed
 		- ui.prev_speed
+		f11 ui.toggle_fullscreen
+		tab ui.soft_reset
+		f5 ui.reload
+		z ui.sms_pause
+		rctrl ui.toggle_keyboard_captured
 		
 		
 		select gamepads.1.c
@@ -36,44 +46,230 @@
 		back ui.exit
 	}
 	pads {
-		0 {
+		default {
 			dpads {
 				0 {
-					up gamepads.1.up
-					down gamepads.1.down
-					left gamepads.1.left
-					right gamepads.1.right
+					up gamepads.n.up
+					down gamepads.n.down
+					left gamepads.n.left
+					right gamepads.n.right
 				}
 			}
 			buttons {
-				0 gamepads.1.a
-				1 gamepads.1.b
-				2 gamepads.1.c
-				3 gamepads.1.x
-				4 gamepads.1.y
-				5 gamepads.1.z
-				6 gamepads.1.mode
-				7 gamepads.1.start
+				a gamepads.n.a
+				b gamepads.n.b
+				rightshoulder gamepads.n.c
+				x gamepads.n.x
+				y gamepads.n.y
+				leftshoulder gamepads.n.z
+				back gamepads.n.mode
+				start gamepads.n.start
+				guide ui.exit
+				leftstick ui.save_state
+			}
+			axes {
+				lefty.positive gamepads.n.down
+				lefty.negative gamepads.n.up
+				leftx.positive gamepads.n.right
+				leftx.negative gamepads.n.left
+				lefttrigger ui.prev_speed
+				righttrigger ui.next_speed
+			}
+		}
+		ps4_6b_right {
+			axes {
+				lefttrigger ui.next_speed
+				leftx.negative gamepads.n.up
+				leftx.positive gamepads.n.down
+				lefty.negative gamepads.n.left
+				lefty.positive gamepads.n.right
+				righttrigger gamepads.n.c
+			}
+			buttons {
+				a gamepads.n.a
+				b gamepads.n.b
+				back ui.sms_pause
+				guide ui.exit
+				leftshoulder gamepads.n.mode
+				leftstick ui.save_state
+				rightshoulder gamepads.n.z
+				rightstick ui.prev_speed
+				start gamepads.n.start
+				x gamepads.n.x
+				y gamepads.n.y
+			}
+			dpads {
+				0 {
+					down gamepads.n.down
+					left gamepads.n.left
+					right gamepads.n.right
+					up gamepads.n.up
+				}
 			}
 		}
-		1 {
+		ps3_6b_right {
+			axes {
+				lefttrigger ui.next_speed
+				leftx.negative gamepads.n.up
+				leftx.positive gamepads.n.down
+				lefty.negative gamepads.n.left
+				lefty.positive gamepads.n.right
+				righttrigger gamepads.n.c
+			}
+			buttons {
+				a gamepads.n.a
+				b gamepads.n.b
+				back ui.sms_pause
+				guide ui.exit
+				leftshoulder gamepads.n.mode
+				leftstick ui.save_state
+				rightshoulder gamepads.n.z
+				rightstick ui.prev_speed
+				start gamepads.n.start
+				x gamepads.n.x
+				y gamepads.n.y
+			}
+			dpads {
+				0 {
+					down gamepads.n.down
+					left gamepads.n.left
+					right gamepads.n.right
+					up gamepads.n.up
+				}
+			}
+		}
+		xbox_360_6b_right {
+			axes {
+				lefttrigger ui.next_speed
+				leftx.negative gamepads.n.up
+				leftx.positive gamepads.n.down
+				lefty.negative gamepads.n.left
+				lefty.positive gamepads.n.right
+				righttrigger gamepads.n.c
+			}
+			buttons {
+				a gamepads.n.a
+				b gamepads.n.b
+				back ui.sms_pause
+				guide ui.exit
+				leftshoulder gamepads.n.mode
+				leftstick ui.save_state
+				rightshoulder gamepads.n.z
+				rightstick ui.prev_speed
+				start gamepads.n.start
+				x gamepads.n.x
+				y gamepads.n.y
+			}
 			dpads {
 				0 {
-					up gamepads.2.up
-					down gamepads.2.down
-					left gamepads.2.left
-					right gamepads.2.right
+					down gamepads.n.down
+					left gamepads.n.left
+					right gamepads.n.right
+					up gamepads.n.up
 				}
 			}
+		}
+		xbone_6b_right {
+			axes {
+				lefttrigger ui.next_speed
+				leftx.negative gamepads.n.up
+				leftx.positive gamepads.n.down
+				lefty.negative gamepads.n.left
+				lefty.positive gamepads.n.right
+				righttrigger gamepads.n.c
+			}
+			buttons {
+				a gamepads.n.a
+				b gamepads.n.b
+				back ui.sms_pause
+				guide ui.exit
+				leftshoulder gamepads.n.mode
+				leftstick ui.save_state
+				rightshoulder gamepads.n.z
+				rightstick ui.prev_speed
+				start gamepads.n.start
+				x gamepads.n.x
+				y gamepads.n.y
+			}
+			dpads {
+				0 {
+					down gamepads.n.down
+					left gamepads.n.left
+					right gamepads.n.right
+					up gamepads.n.up
+				}
+			}
+		}
+		genesis_6b_bumpers {
+			axes {
+				lefttrigger ui.exit
+				righttrigger gamepads.n.mode
+			}
 			buttons {
-				0 gamepads.2.a
-				1 gamepads.2.b
-				2 gamepads.2.c
-				3 gamepads.2.x
-				4 gamepads.2.y
-				5 gamepads.2.z
-				6 gamepads.2.mode
-				7 gamepads.2.start
+				a gamepads.n.a
+				b gamepads.n.b
+				back ui.sms_pause
+				guide ui.exit
+				leftshoulder gamepads.n.z
+				rightshoulder gamepads.n.c
+				start gamepads.n.start
+				x gamepads.n.x
+				y gamepads.n.y
+			}
+			dpads {
+				0 {
+					down gamepads.n.down
+					left gamepads.n.left
+					right gamepads.n.right
+					up gamepads.n.up
+				}
+			}
+		}
+		saturn_6b_bumpers {
+			axes {
+				lefttrigger ui.exit
+				righttrigger gamepads.n.mode
+			}
+			buttons {
+				a gamepads.n.a
+				b gamepads.n.b
+				back ui.sms_pause
+				guide ui.exit
+				leftshoulder gamepads.n.z
+				rightshoulder gamepads.n.c
+				start gamepads.n.start
+				x gamepads.n.x
+				y gamepads.n.y
+			}
+			dpads {
+				0 {
+					down gamepads.n.down
+					left gamepads.n.left
+					right gamepads.n.right
+					up gamepads.n.up
+				}
+			}
+		}
+	}
+	mice {
+		0 {
+			motion mouse.1.motion
+			buttons {
+				1 mouse.1.left
+				2 mouse.1.middle
+				3 mouse.1.right
+				4 mouse.1.start
+			}
+		}
+		#having the second host mouse also mapped to the first emulated
+		#mouse is useful for laptop users with an external mouse
+		1 {
+			motion mouse.1.motion
+			buttons {
+				1 mouse.1.left
+				2 mouse.1.middle
+				3 mouse.1.right
+				4 mouse.1.start
 			}
 		}
 	}
@@ -87,19 +283,69 @@
 }
 
 video {
+	#special value "stretch" will cause aspect to match window aspect ratio
+	aspect 4:3
 	width 640
+	#height is normally calculated automatically from width using the aspect setting
+	#if you would like to set it explicitly, uncomment the line below
+	#height 480
 	vertex_shader default.v.glsl
 	fragment_shader default.f.glsl
+	scanlines off
+	vsync off
+	fullscreen off
+	#setting gl to off, will force use of the SDL2 fallback renderer
+	#this is useful for those running on machines with Open GL 2.0 unavailable
+	#so the warning doesn't display on startup
+	gl on
+	#scaling can be linear (for linear interpolation) or nearest (for nearest neighbor)
+	scaling linear
+	ntsc {
+		overscan {
+			#these values will result in square pixels in H40 mode
+			top 2
+			bottom 1
+			#if you want to completely hide the border instead
+			#comment out those two lines and uncomment these
+			#top 11
+			#bottom 8
+			
+			#these values will completely hide the horizontal border
+			left 13
+			right 14
+		}
+	}
+	pal {
+		overscan {
+			#these values will produce the same size border in V30 mode
+			#as the default NTSC settings will produce in V24 mode
+			#this results in a slightly vertically squished picture
+			#which is probably approximately correct on a properly calibrated TV
+			top 21
+			bottom 17
+			#for square pixels and zero border in V30 mode
+			#coment out those two lines and uncomment these
+			#top 30
+			#bottom 24
+			
+			#these values will completely hide the horizontal border
+			left 13
+			right 14
+		}
+	}
 }
 
 audio {
 	rate 48000
 	buffer 512
+	lowpass_cutoff 3390
 }
 
 clocks {
+	m68k_divider 7
 	max_cycles 3420
 	speeds {
+		0 100
 		1 150
 		2 200
 		3 300
@@ -111,8 +357,39 @@
 }
 
 ui {
+	#specifies the ROM that implements the Menu UI
 	rom menu.bin
+	#starting path for ROM browsing, accepts special variables $HOME, $EXEDIR
+	#and variables defined in the OS environment
+	initial_path $HOME
+	#if this is set to on, then the menu will remember the last path when visited
+	#if it's set to off, initial_path will always be used on startup
+	remember_path on
+	#path for storing internal screenshots, accepts the same variables as initial_path
+	screenshot_path $HOME
+	#see strftime for the format specifiers valid in screenshot_template
+	screenshot_template blastem_%Y%m%d_%H%M%S.png
+	#path template for saving SRAM, EEPROM and savestates
+	#accepts special variables $HOME, $EXEDIR, $USERDATA, $ROMNAME
+	save_path $USERDATA/blastem/$ROMNAME
+	#space delimited list of file extensions to filter against in menu
+	extensions bin gen md smd sms gg zip gz
+	#specifies the preferred save-state format, set to gst for Genecyst compatible states
+	state_format native
 }
 
-default_region U
+system {
+	#controls how the emulated system is synced to the host
+	#video provides the smoothest experience when the host and emulated system have similar refresh rates
+	#audio provides lower audio latency, especially when there is a refresh rate mismatch
+	sync_source audio
+	#set this to random to debug initialization bugs
+	ram_init zero
+	default_region U
+	#controls whether MegaWiFi support is enabled or not
+	#MegaWiFi allows ROMs to make connections to the internet
+	#so it should only be enabled for ROMs you trust
+	megawifi off
+}
 
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/android/assets/images	Sat Jan 15 13:15:21 2022 -0800
@@ -0,0 +1,1 @@
+../../images
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/android/assets/shaders	Sat Jan 15 13:15:21 2022 -0800
@@ -0,0 +1,1 @@
+../../shaders
\ No newline at end of file
--- a/android/jni/Application.mk	Sat Jan 05 00:58:08 2019 -0800
+++ b/android/jni/Application.mk	Sat Jan 15 13:15:21 2022 -0800
@@ -4,3 +4,5 @@
 # APP_STL := stlport_static 
 
 APP_ABI := x86
+APP_PLATFORM := android-16
+APP_OPTIM := release
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/android/src/org/libsdl/app/SDL.java	Sat Jan 15 13:15:21 2022 -0800
@@ -0,0 +1,37 @@
+package org.libsdl.app;
+
+import android.content.Context;
+
+/**
+    SDL library initialization
+*/
+public class SDL {
+
+    // This function should be called first and sets up the native code
+    // so it can call into the Java classes
+    public static void setupJNI() {
+        SDLActivity.nativeSetupJNI();
+        SDLAudioManager.nativeSetupJNI();
+        SDLControllerManager.nativeSetupJNI();
+    }
+
+    // This function should be called each time the activity is started
+    public static void initialize() {
+        setContext(null);
+
+        SDLActivity.initialize();
+        SDLAudioManager.initialize();
+        SDLControllerManager.initialize();
+    }
+
+    // This function stores the current activity (SDL or not)
+    public static void setContext(Context context) {
+        mContext = context;
+    }
+
+    public static Context getContext() {
+        return mContext;
+    }
+
+    protected static Context mContext;
+}
--- a/android/src/org/libsdl/app/SDLActivity.java	Sat Jan 05 00:58:08 2019 -0800
+++ b/android/src/org/libsdl/app/SDLActivity.java	Sat Jan 15 13:15:21 2022 -0800
@@ -1,25 +1,30 @@
 package org.libsdl.app;
 
-import java.util.ArrayList;
+import java.io.IOException;
+import java.io.InputStream;
 import java.util.Arrays;
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.List;
+import java.lang.reflect.Method;
+import java.util.Objects;
 
 import android.app.*;
 import android.content.*;
+import android.text.InputType;
 import android.view.*;
 import android.view.inputmethod.BaseInputConnection;
 import android.view.inputmethod.EditorInfo;
 import android.view.inputmethod.InputConnection;
 import android.view.inputmethod.InputMethodManager;
-import android.widget.AbsoluteLayout;
+import android.widget.RelativeLayout;
+import android.widget.Button;
+import android.widget.LinearLayout;
+import android.widget.TextView;
 import android.os.*;
 import android.util.Log;
+import android.util.SparseArray;
 import android.graphics.*;
-import android.media.*;
+import android.graphics.drawable.Drawable;
 import android.hardware.*;
-
+import android.content.pm.ActivityInfo;
 
 /**
     SDL Activity
@@ -27,34 +32,96 @@
 public class SDLActivity extends Activity {
     private static final String TAG = "SDL";
 
-    // Keep track of the paused state
-    public static boolean mIsPaused, mIsSurfaceReady, mHasFocus;
+    public static boolean mIsResumedCalled, mIsSurfaceReady, mHasFocus;
+
+    // Handle the state of the native layer
+    public enum NativeState {
+           INIT, RESUMED, PAUSED 
+    }
+
+    public static NativeState mNextNativeState;
+    public static NativeState mCurrentNativeState;
+
     public static boolean mExitCalledFromJava;
 
+    /** If shared libraries (e.g. SDL or the native application) could not be loaded. */
+    public static boolean mBrokenLibraries;
+
+    // If we want to separate mouse and touch events.
+    //  This is only toggled in native code when a hint is set!
+    public static boolean mSeparateMouseAndTouch;
+
     // Main components
     protected static SDLActivity mSingleton;
     protected static SDLSurface mSurface;
     protected static View mTextEdit;
+    protected static boolean mScreenKeyboardShown;
     protected static ViewGroup mLayout;
-    protected static SDLJoystickHandler mJoystickHandler;
+    protected static SDLClipboardHandler mClipboardHandler;
+
 
     // This is what SDL runs in. It invokes SDL_main(), eventually
     protected static Thread mSDLThread;
-    
-    // Audio
-    protected static AudioTrack mAudioTrack;
+
+    /**
+     * This method returns the name of the shared object with the application entry point
+     * It can be overridden by derived classes.
+     */
+    protected String getMainSharedObject() {
+        String library;
+        String[] libraries = SDLActivity.mSingleton.getLibraries();
+        if (libraries.length > 0) {
+            library = "lib" + libraries[libraries.length - 1] + ".so";
+        } else {
+            library = "libmain.so";
+        }
+        return library;
+    }
+
+    /**
+     * This method returns the name of the application entry point
+     * It can be overridden by derived classes.
+     */
+    protected String getMainFunction() {
+        return "SDL_main";
+    }
+
+    /**
+     * This method is called by SDL before loading the native shared libraries.
+     * It can be overridden to provide names of shared libraries to be loaded.
+     * The default implementation returns the defaults. It never returns null.
+     * An array returned by a new implementation must at least contain "SDL2".
+     * Also keep in mind that the order the libraries are loaded may matter.
+     * @return names of shared libraries to be loaded (e.g. "SDL2", "main").
+     */
+    protected String[] getLibraries() {
+        return new String[] {
+            "SDL2",
+            // "SDL2_image",
+            // "SDL2_mixer",
+            // "SDL2_net",
+            // "SDL2_ttf",
+            "main"
+        };
+    }
 
     // Load the .so
-    static {
-        System.loadLibrary("SDL2");
-        //System.loadLibrary("SDL2_image");
-        //System.loadLibrary("SDL2_mixer");
-        //System.loadLibrary("SDL2_net");
-        //System.loadLibrary("SDL2_ttf");
-        System.loadLibrary("main");
+    public void loadLibraries() {
+       for (String lib : getLibraries()) {
+          System.loadLibrary(lib);
+       }
     }
-    
-    
+
+    /**
+     * This method is called by SDL before starting the native application thread.
+     * It can be overridden to provide the arguments after the application name.
+     * The default implementation returns an empty array. It never returns null.
+     * @return arguments for the native application.
+     */
+    protected String[] getArguments() {
+        return new String[0];
+    }
+
     public static void initialize() {
         // The static nature of the singleton and Android quirkyness force us to initialize everything here
         // Otherwise, when exiting the app and returning to it, these variables *keep* their pre exit values
@@ -62,78 +129,172 @@
         mSurface = null;
         mTextEdit = null;
         mLayout = null;
-        mJoystickHandler = null;
+        mClipboardHandler = null;
         mSDLThread = null;
-        mAudioTrack = null;
         mExitCalledFromJava = false;
-        mIsPaused = false;
+        mBrokenLibraries = false;
+        mIsResumedCalled = false;
         mIsSurfaceReady = false;
         mHasFocus = true;
+        mNextNativeState = NativeState.INIT;
+        mCurrentNativeState = NativeState.INIT;
     }
 
     // Setup
     @Override
     protected void onCreate(Bundle savedInstanceState) {
-        Log.v("SDL", "onCreate():" + mSingleton);
+        Log.v(TAG, "Device: " + android.os.Build.DEVICE);
+        Log.v(TAG, "Model: " + android.os.Build.MODEL);
+        Log.v(TAG, "onCreate()");
         super.onCreate(savedInstanceState);
-        
-        SDLActivity.initialize();
+
+        // Load shared libraries
+        String errorMsgBrokenLib = "";
+        try {
+            loadLibraries();
+        } catch(UnsatisfiedLinkError e) {
+            System.err.println(e.getMessage());
+            mBrokenLibraries = true;
+            errorMsgBrokenLib = e.getMessage();
+        } catch(Exception e) {
+            System.err.println(e.getMessage());
+            mBrokenLibraries = true;
+            errorMsgBrokenLib = e.getMessage();
+        }
+
+        if (mBrokenLibraries)
+        {
+            AlertDialog.Builder dlgAlert  = new AlertDialog.Builder(this);
+            dlgAlert.setMessage("An error occurred while trying to start the application. Please try again and/or reinstall."
+                  + System.getProperty("line.separator")
+                  + System.getProperty("line.separator")
+                  + "Error: " + errorMsgBrokenLib);
+            dlgAlert.setTitle("SDL Error");
+            dlgAlert.setPositiveButton("Exit",
+                new DialogInterface.OnClickListener() {
+                    @Override
+                    public void onClick(DialogInterface dialog,int id) {
+                        // if this button is clicked, close current activity
+                        SDLActivity.mSingleton.finish();
+                    }
+                });
+           dlgAlert.setCancelable(false);
+           dlgAlert.create().show();
+
+           return;
+        }
+
+        // Set up JNI
+        SDL.setupJNI();
+
+        // Initialize state
+        SDL.initialize();
+
         // So we can call stuff from static callbacks
         mSingleton = this;
+        SDL.setContext(this);
+
+        if (Build.VERSION.SDK_INT >= 11) {
+            mClipboardHandler = new SDLClipboardHandler_API11();
+        } else {
+            /* Before API 11, no clipboard notification (eg no SDL_CLIPBOARDUPDATE) */
+            mClipboardHandler = new SDLClipboardHandler_Old();
+        }
 
         // Set up the surface
         mSurface = new SDLSurface(getApplication());
-        
-        if(Build.VERSION.SDK_INT >= 12) {
-            mJoystickHandler = new SDLJoystickHandler_API12();
-        }
-        else {
-            mJoystickHandler = new SDLJoystickHandler();
-        }
 
-        mLayout = new AbsoluteLayout(this);
+        mLayout = new RelativeLayout(this);
         mLayout.addView(mSurface);
 
         setContentView(mLayout);
+        
+        // Get filename from "Open with" of another application
+        Intent intent = getIntent();
+        if (intent != null && intent.getData() != null) {
+            String filename = intent.getData().getPath();
+            if (filename != null) {
+                Log.v(TAG, "Got filename: " + filename);
+                SDLActivity.onNativeDropFile(filename);
+            }
+        }
     }
 
     // Events
     @Override
     protected void onPause() {
-        Log.v("SDL", "onPause()");
+        Log.v(TAG, "onPause()");
         super.onPause();
-        SDLActivity.handlePause();
+        mNextNativeState = NativeState.PAUSED;
+        mIsResumedCalled = false;
+
+        if (SDLActivity.mBrokenLibraries) {
+           return;
+        }
+
+        SDLActivity.handleNativeState();
     }
 
     @Override
     protected void onResume() {
-        Log.v("SDL", "onResume()");
+        Log.v(TAG, "onResume()");
         super.onResume();
-        SDLActivity.handleResume();
+        mNextNativeState = NativeState.RESUMED;
+        mIsResumedCalled = true;
+
+        if (SDLActivity.mBrokenLibraries) {
+           return;
+        }
+
+        SDLActivity.handleNativeState();
     }
 
 
     @Override
     public void onWindowFocusChanged(boolean hasFocus) {
         super.onWindowFocusChanged(hasFocus);
-        Log.v("SDL", "onWindowFocusChanged(): " + hasFocus);
+        Log.v(TAG, "onWindowFocusChanged(): " + hasFocus);
+
+        if (SDLActivity.mBrokenLibraries) {
+           return;
+        }
 
         SDLActivity.mHasFocus = hasFocus;
         if (hasFocus) {
-            SDLActivity.handleResume();
+           mNextNativeState = NativeState.RESUMED;
+        } else {
+           mNextNativeState = NativeState.PAUSED;
         }
+        
+        SDLActivity.handleNativeState();
     }
 
     @Override
     public void onLowMemory() {
-        Log.v("SDL", "onLowMemory()");
+        Log.v(TAG, "onLowMemory()");
         super.onLowMemory();
+
+        if (SDLActivity.mBrokenLibraries) {
+           return;
+        }
+
         SDLActivity.nativeLowMemory();
     }
 
     @Override
     protected void onDestroy() {
-        Log.v("SDL", "onDestroy()");
+        Log.v(TAG, "onDestroy()");
+
+        if (SDLActivity.mBrokenLibraries) {
+           super.onDestroy();
+           // Reset everything in case the user re opens the app
+           SDLActivity.initialize();
+           return;
+        }
+
+        mNextNativeState = NativeState.PAUSED;
+        SDLActivity.handleNativeState();
+
         // Send a quit message to the application
         SDLActivity.mExitCalledFromJava = true;
         SDLActivity.nativeQuit();
@@ -143,57 +304,101 @@
             try {
                 SDLActivity.mSDLThread.join();
             } catch(Exception e) {
-                Log.v("SDL", "Problem stopping thread: " + e);
+                Log.v(TAG, "Problem stopping thread: " + e);
             }
             SDLActivity.mSDLThread = null;
 
-            //Log.v("SDL", "Finished waiting for SDL thread");
+            //Log.v(TAG, "Finished waiting for SDL thread");
         }
-            
+
         super.onDestroy();
+
         // Reset everything in case the user re opens the app
         SDLActivity.initialize();
     }
 
     @Override
     public boolean dispatchKeyEvent(KeyEvent event) {
+
+        if (SDLActivity.mBrokenLibraries) {
+           return false;
+        }
+
         int keyCode = event.getKeyCode();
         // Ignore certain special keys so they're handled by Android
         if (keyCode == KeyEvent.KEYCODE_VOLUME_DOWN ||
             keyCode == KeyEvent.KEYCODE_VOLUME_UP ||
             keyCode == KeyEvent.KEYCODE_CAMERA ||
-            keyCode == 168 || /* API 11: KeyEvent.KEYCODE_ZOOM_IN */
-            keyCode == 169 /* API 11: KeyEvent.KEYCODE_ZOOM_OUT */
+            keyCode == KeyEvent.KEYCODE_ZOOM_IN || /* API 11 */
+            keyCode == KeyEvent.KEYCODE_ZOOM_OUT /* API 11 */
             ) {
             return false;
         }
         return super.dispatchKeyEvent(event);
     }
 
-    /** Called by onPause or surfaceDestroyed. Even if surfaceDestroyed
-     *  is the first to be called, mIsSurfaceReady should still be set
-     *  to 'true' during the call to onPause (in a usual scenario).
-     */
-    public static void handlePause() {
-        if (!SDLActivity.mIsPaused && SDLActivity.mIsSurfaceReady) {
-            SDLActivity.mIsPaused = true;
-            SDLActivity.nativePause();
-            mSurface.enableSensor(Sensor.TYPE_ACCELEROMETER, false);
+    /* Transition to next state */
+    public static void handleNativeState() {
+
+        if (mNextNativeState == mCurrentNativeState) {
+            // Already in same state, discard.
+            return;
+        }
+
+        // Try a transition to init state
+        if (mNextNativeState == NativeState.INIT) {
+
+            mCurrentNativeState = mNextNativeState;
+            return;
+        }
+
+        // Try a transition to paused state
+        if (mNextNativeState == NativeState.PAUSED) {
+            nativePause();
+            mSurface.handlePause();
+            mCurrentNativeState = mNextNativeState;
+            return;
+        }
+
+        // Try a transition to resumed state
+        if (mNextNativeState == NativeState.RESUMED) {
+            if (mIsSurfaceReady && mHasFocus && mIsResumedCalled) {
+                if (mSDLThread == null) {
+                    // This is the entry point to the C app.
+                    // Start up the C app thread and enable sensor input for the first time
+                    // FIXME: Why aren't we enabling sensor input at start?
+
+                    final Thread sdlThread = new Thread(new SDLMain(), "SDLThread");
+                    mSurface.enableSensor(Sensor.TYPE_ACCELEROMETER, true);
+                    sdlThread.start();
+
+                    // Set up a listener thread to catch when the native thread ends
+                    mSDLThread = new Thread(new Runnable() {
+                        @Override
+                        public void run() {
+                            try {
+                                sdlThread.join();
+                            } catch (Exception e) {
+                                // Ignore any exception
+                            } finally {
+                                // Native thread has finished
+                                if (!mExitCalledFromJava) {
+                                    handleNativeExit();
+                                }
+                            }
+                        }
+                    }, "SDLThreadListener");
+
+                    mSDLThread.start();
+                }
+
+                nativeResume();
+                mSurface.handleResume();
+                mCurrentNativeState = mNextNativeState;
+            }
         }
     }
 
-    /** Called by onResume or surfaceCreated. An actual resume should be done only when the surface is ready.
-     * Note: Some Android variants may send multiple surfaceChanged events, so we don't need to resume
-     * every time we get one of those events, only if it comes after surfaceDestroyed
-     */
-    public static void handleResume() {
-        if (SDLActivity.mIsPaused && SDLActivity.mIsSurfaceReady && SDLActivity.mHasFocus) {
-            SDLActivity.mIsPaused = false;
-            SDLActivity.nativeResume();
-            mSurface.enableSensor(Sensor.TYPE_ACCELEROMETER, true);
-        }
-    }
-        
     /* The native thread has finished */
     public static void handleNativeExit() {
         SDLActivity.mSDLThread = null;
@@ -205,6 +410,7 @@
     static final int COMMAND_CHANGE_TITLE = 1;
     static final int COMMAND_UNUSED = 2;
     static final int COMMAND_TEXTEDIT_HIDE = 3;
+    static final int COMMAND_SET_KEEP_SCREEN_ON = 5;
 
     protected static final int COMMAND_USER = 0x8000;
 
@@ -228,7 +434,7 @@
     protected static class SDLCommandHandler extends Handler {
         @Override
         public void handleMessage(Message msg) {
-            Context context = getContext();
+            Context context = SDL.getContext();
             if (context == null) {
                 Log.e(TAG, "error handling message, getContext() returned null");
                 return;
@@ -243,13 +449,31 @@
                 break;
             case COMMAND_TEXTEDIT_HIDE:
                 if (mTextEdit != null) {
-                    mTextEdit.setVisibility(View.GONE);
+                    // Note: On some devices setting view to GONE creates a flicker in landscape.
+                    // Setting the View's sizes to 0 is similar to GONE but without the flicker.
+                    // The sizes will be set to useful values when the keyboard is shown again.
+                    mTextEdit.setLayoutParams(new RelativeLayout.LayoutParams(0, 0));
 
                     InputMethodManager imm = (InputMethodManager) context.getSystemService(Context.INPUT_METHOD_SERVICE);
                     imm.hideSoftInputFromWindow(mTextEdit.getWindowToken(), 0);
+                    
+                    mScreenKeyboardShown = false;
                 }
                 break;
-
+            case COMMAND_SET_KEEP_SCREEN_ON:
+            {
+                if (context instanceof Activity) {
+                    Window window = ((Activity) context).getWindow();
+                    if (window != null) {
+                        if ((msg.obj instanceof Integer) && (((Integer) msg.obj).intValue() != 0)) {
+                            window.addFlags(WindowManager.LayoutParams.FLAG_KEEP_SCREEN_ON);
+                        } else {
+                            window.clearFlags(WindowManager.LayoutParams.FLAG_KEEP_SCREEN_ON);
+                        }
+                    }
+                }
+                break;
+            }
             default:
                 if ((context instanceof SDLActivity) && !((SDLActivity) context).onUnhandledMessage(msg.arg1, msg.obj)) {
                     Log.e(TAG, "error handling message, command is " + msg.arg1);
@@ -270,76 +494,123 @@
     }
 
     // C functions we call
-    public static native void nativeInit();
+    public static native int nativeSetupJNI();
+    public static native int nativeRunMain(String library, String function, Object arguments);
     public static native void nativeLowMemory();
     public static native void nativeQuit();
     public static native void nativePause();
     public static native void nativeResume();
-    public static native void onNativeResize(int x, int y, int format);
-    public static native int onNativePadDown(int device_id, int keycode);
-    public static native int onNativePadUp(int device_id, int keycode);
-    public static native void onNativeJoy(int device_id, int axis,
-                                          float value);
-    public static native void onNativeHat(int device_id, int hat_id,
-                                          int x, int y);
+    public static native void onNativeDropFile(String filename);
+    public static native void onNativeResize(int x, int y, int format, float rate);
     public static native void onNativeKeyDown(int keycode);
     public static native void onNativeKeyUp(int keycode);
     public static native void onNativeKeyboardFocusLost();
+    public static native void onNativeMouse(int button, int action, float x, float y);
     public static native void onNativeTouch(int touchDevId, int pointerFingerId,
-                                            int action, float x, 
+                                            int action, float x,
                                             float y, float p);
     public static native void onNativeAccel(float x, float y, float z);
+    public static native void onNativeClipboardChanged();
     public static native void onNativeSurfaceChanged();
     public static native void onNativeSurfaceDestroyed();
-    public static native void nativeFlipBuffers();
-    public static native int nativeAddJoystick(int device_id, String name, 
-                                               int is_accelerometer, int nbuttons, 
-                                               int naxes, int nhats, int nballs);
-    public static native int nativeRemoveJoystick(int device_id);
+    public static native String nativeGetHint(String name);
 
-    public static void flipBuffers() {
-        SDLActivity.nativeFlipBuffers();
-    }
-
+    /**
+     * This method is called by SDL using JNI.
+     */
     public static boolean setActivityTitle(String title) {
         // Called from SDLMain() thread and can't directly affect the view
         return mSingleton.sendCommand(COMMAND_CHANGE_TITLE, title);
     }
 
-    public static boolean sendMessage(int command, int param) {
-        return mSingleton.sendCommand(command, Integer.valueOf(param));
+    /**
+     * This method is called by SDL using JNI.
+     * This is a static method for JNI convenience, it calls a non-static method
+     * so that is can be overridden  
+     */
+    public static void setOrientation(int w, int h, boolean resizable, String hint)
+    {
+        if (mSingleton != null) {
+            mSingleton.setOrientationBis(w, h, resizable, hint);
+        }
+    }
+   
+    /**
+     * This can be overridden
+     */
+    public void setOrientationBis(int w, int h, boolean resizable, String hint) 
+    {
+      int orientation = -1;
+
+      if (!Objects.equals(hint, "")) {
+         if (hint.contains("LandscapeRight") && hint.contains("LandscapeLeft")) {
+            orientation = ActivityInfo.SCREEN_ORIENTATION_SENSOR_LANDSCAPE;
+         } else if (hint.contains("LandscapeRight")) {
+            orientation = ActivityInfo.SCREEN_ORIENTATION_LANDSCAPE;
+         } else if (hint.contains("LandscapeLeft")) {
+            orientation = ActivityInfo.SCREEN_ORIENTATION_REVERSE_LANDSCAPE;
+         } else if (hint.contains("Portrait") && hint.contains("PortraitUpsideDown")) {
+            orientation = ActivityInfo.SCREEN_ORIENTATION_SENSOR_PORTRAIT;
+         } else if (hint.contains("Portrait")) {
+            orientation = ActivityInfo.SCREEN_ORIENTATION_PORTRAIT;
+         } else if (hint.contains("PortraitUpsideDown")) {
+            orientation = ActivityInfo.SCREEN_ORIENTATION_REVERSE_PORTRAIT;
+         }
+      }
+
+      /* no valid hint */
+      if (orientation == -1) {
+         if (resizable) {
+            /* no fixed orientation */
+         } else {
+            if (w > h) {
+               orientation = ActivityInfo.SCREEN_ORIENTATION_SENSOR_LANDSCAPE;
+            } else {
+               orientation = ActivityInfo.SCREEN_ORIENTATION_SENSOR_PORTRAIT;
+            }
+         }
+      }
+
+      Log.v("SDL", "setOrientation() orientation=" + orientation + " width=" + w +" height="+ h +" resizable=" + resizable + " hint=" + hint);
+      if (orientation != -1) {
+         mSingleton.setRequestedOrientation(orientation);
+      }
     }
 
-    public static Context getContext() {
-        return mSingleton;
+
+    /**
+     * This method is called by SDL using JNI.
+     */
+    public static boolean isScreenKeyboardShown() 
+    {
+        if (mTextEdit == null) {
+            return false;
+        }
+
+        if (!mScreenKeyboardShown) {
+            return false;
+        }
+
+        InputMethodManager imm = (InputMethodManager) SDL.getContext().getSystemService(Context.INPUT_METHOD_SERVICE);
+        return imm.isAcceptingText();
+
     }
 
     /**
-     * @return result of getSystemService(name) but executed on UI thread.
+     * This method is called by SDL using JNI.
      */
-    public Object getSystemServiceFromUiThread(final String name) {
-        final Object lock = new Object();
-        final Object[] results = new Object[2]; // array for writable variables
-        synchronized (lock) {
-            runOnUiThread(new Runnable() {
-                @Override
-                public void run() {
-                    synchronized (lock) {
-                        results[0] = getSystemService(name);
-                        results[1] = Boolean.TRUE;
-                        lock.notify();
-                    }
-                }
-            });
-            if (results[1] == null) {
-                try {
-                    lock.wait();
-                } catch (InterruptedException ex) {
-                    ex.printStackTrace();
-                }
-            }
+    public static boolean sendMessage(int command, int param) {
+        if (mSingleton == null) {
+            return false;
         }
-        return results[0];
+        return mSingleton.sendCommand(command, Integer.valueOf(param));
+    }
+
+    /**
+     * This method is called by SDL using JNI.
+     */
+    public static Context getContext() {
+        return SDL.getContext();
     }
 
     static class ShowTextInputTask implements Runnable {
@@ -361,11 +632,12 @@
 
         @Override
         public void run() {
-            AbsoluteLayout.LayoutParams params = new AbsoluteLayout.LayoutParams(
-                    w, h + HEIGHT_PADDING, x, y);
+            RelativeLayout.LayoutParams params = new RelativeLayout.LayoutParams(w, h + HEIGHT_PADDING);
+            params.leftMargin = x;
+            params.topMargin = y;
 
             if (mTextEdit == null) {
-                mTextEdit = new DummyEdit(getContext());
+                mTextEdit = new DummyEdit(SDL.getContext());
 
                 mLayout.addView(mTextEdit, params);
             } else {
@@ -375,101 +647,47 @@
             mTextEdit.setVisibility(View.VISIBLE);
             mTextEdit.requestFocus();
 
-            InputMethodManager imm = (InputMethodManager) getContext().getSystemService(Context.INPUT_METHOD_SERVICE);
+            InputMethodManager imm = (InputMethodManager) SDL.getContext().getSystemService(Context.INPUT_METHOD_SERVICE);
             imm.showSoftInput(mTextEdit, 0);
+
+            mScreenKeyboardShown = true;
         }
     }
 
+    /**
+     * This method is called by SDL using JNI.
+     */
     public static boolean showTextInput(int x, int y, int w, int h) {
         // Transfer the task to the main thread as a Runnable
         return mSingleton.commandHandler.post(new ShowTextInputTask(x, y, w, h));
     }
-            
-    public static Surface getNativeSurface() {
-        return SDLActivity.mSurface.getNativeSurface();
+
+    public static boolean isTextInputEvent(KeyEvent event) {
+      
+        // Key pressed with Ctrl should be sent as SDL_KEYDOWN/SDL_KEYUP and not SDL_TEXTINPUT
+        if (android.os.Build.VERSION.SDK_INT >= 11) {
+            if (event.isCtrlPressed()) {
+                return false;
+            }  
+        }
+
+        return event.isPrintingKey() || event.getKeyCode() == KeyEvent.KEYCODE_SPACE;
     }
 
-    // Audio
-    public static int audioInit(int sampleRate, boolean is16Bit, boolean isStereo, int desiredFrames) {
-        int channelConfig = isStereo ? AudioFormat.CHANNEL_CONFIGURATION_STEREO : AudioFormat.CHANNEL_CONFIGURATION_MONO;
-        int audioFormat = is16Bit ? AudioFormat.ENCODING_PCM_16BIT : AudioFormat.ENCODING_PCM_8BIT;
-        int frameSize = (isStereo ? 2 : 1) * (is16Bit ? 2 : 1);
-        
-        Log.v("SDL", "SDL audio: wanted " + (isStereo ? "stereo" : "mono") + " " + (is16Bit ? "16-bit" : "8-bit") + " " + (sampleRate / 1000f) + "kHz, " + desiredFrames + " frames buffer");
-        
-        // Let the user pick a larger buffer if they really want -- but ye
-        // gods they probably shouldn't, the minimums are horrifyingly high
-        // latency already
-        desiredFrames = Math.max(desiredFrames, (AudioTrack.getMinBufferSize(sampleRate, channelConfig, audioFormat) + frameSize - 1) / frameSize);
-        
-        if (mAudioTrack == null) {
-            mAudioTrack = new AudioTrack(AudioManager.STREAM_MUSIC, sampleRate,
-                    channelConfig, audioFormat, desiredFrames * frameSize, AudioTrack.MODE_STREAM);
-            
-            // Instantiating AudioTrack can "succeed" without an exception and the track may still be invalid
-            // Ref: https://android.googlesource.com/platform/frameworks/base/+/refs/heads/master/media/java/android/media/AudioTrack.java
-            // Ref: http://developer.android.com/reference/android/media/AudioTrack.html#getState()
-            
-            if (mAudioTrack.getState() != AudioTrack.STATE_INITIALIZED) {
-                Log.e("SDL", "Failed during initialization of Audio Track");
-                mAudioTrack = null;
-                return -1;
-            }
-            
-            mAudioTrack.play();
+    /**
+     * This method is called by SDL using JNI.
+     */
+    public static Surface getNativeSurface() {
+        if (SDLActivity.mSurface == null) {
+            return null;
         }
-       
-        Log.v("SDL", "SDL audio: got " + ((mAudioTrack.getChannelCount() >= 2) ? "stereo" : "mono") + " " + ((mAudioTrack.getAudioFormat() == AudioFormat.ENCODING_PCM_16BIT) ? "16-bit" : "8-bit") + " " + (mAudioTrack.getSampleRate() / 1000f) + "kHz, " + desiredFrames + " frames buffer");
-        
-        return 0;
-    }
-    
-    public static void audioWriteShortBuffer(short[] buffer) {
-        for (int i = 0; i < buffer.length; ) {
-            int result = mAudioTrack.write(buffer, i, buffer.length - i);
-            if (result > 0) {
-                i += result;
-            } else if (result == 0) {
-                try {
-                    Thread.sleep(1);
-                } catch(InterruptedException e) {
-                    // Nom nom
-                }
-            } else {
-                Log.w("SDL", "SDL audio: error return from write(short)");
-                return;
-            }
-        }
-    }
-    
-    public static void audioWriteByteBuffer(byte[] buffer) {
-        for (int i = 0; i < buffer.length; ) {
-            int result = mAudioTrack.write(buffer, i, buffer.length - i);
-            if (result > 0) {
-                i += result;
-            } else if (result == 0) {
-                try {
-                    Thread.sleep(1);
-                } catch(InterruptedException e) {
-                    // Nom nom
-                }
-            } else {
-                Log.w("SDL", "SDL audio: error return from write(byte)");
-                return;
-            }
-        }
-    }
-
-    public static void audioQuit() {
-        if (mAudioTrack != null) {
-            mAudioTrack.stop();
-            mAudioTrack = null;
-        }
+        return SDLActivity.mSurface.getNativeSurface();
     }
 
     // Input
 
     /**
+     * This method is called by SDL using JNI.
      * @return an array which may be empty but is never null.
      */
     public static int[] inputGetInputDeviceIds(int sources) {
@@ -484,41 +702,331 @@
         }
         return Arrays.copyOf(filtered, used);
     }
-            
-    // Joystick glue code, just a series of stubs that redirect to the SDLJoystickHandler instance
-    public static boolean handleJoystickMotionEvent(MotionEvent event) {
-        return mJoystickHandler.handleMotionEvent(event);
+
+    // APK expansion files support
+
+    /** com.android.vending.expansion.zipfile.ZipResourceFile object or null. */
+    private static Object expansionFile;
+
+    /** com.android.vending.expansion.zipfile.ZipResourceFile's getInputStream() or null. */
+    private static Method expansionFileMethod;
+
+    /**
+     * This method is called by SDL using JNI.
+     * @return an InputStream on success or null if no expansion file was used.
+     * @throws IOException on errors. Message is set for the SDL error message.
+     */
+    public static InputStream openAPKExpansionInputStream(String fileName) throws IOException {
+        // Get a ZipResourceFile representing a merger of both the main and patch files
+        if (expansionFile == null) {
+            String mainHint = nativeGetHint("SDL_ANDROID_APK_EXPANSION_MAIN_FILE_VERSION");
+            if (mainHint == null) {
+                return null; // no expansion use if no main version was set
+            }
+            String patchHint = nativeGetHint("SDL_ANDROID_APK_EXPANSION_PATCH_FILE_VERSION");
+            if (patchHint == null) {
+                return null; // no expansion use if no patch version was set
+            }
+
+            Integer mainVersion;
+            Integer patchVersion;
+            try {
+                mainVersion = Integer.valueOf(mainHint);
+                patchVersion = Integer.valueOf(patchHint);
+            } catch (NumberFormatException ex) {
+                ex.printStackTrace();
+                throw new IOException("No valid file versions set for APK expansion files", ex);
+            }
+
+            try {
+                // To avoid direct dependency on Google APK expansion library that is
+                // not a part of Android SDK we access it using reflection
+                expansionFile = Class.forName("com.android.vending.expansion.zipfile.APKExpansionSupport")
+                    .getMethod("getAPKExpansionZipFile", Context.class, int.class, int.class)
+                    .invoke(null, SDL.getContext(), mainVersion, patchVersion);
+
+                expansionFileMethod = expansionFile.getClass()
+                    .getMethod("getInputStream", String.class);
+            } catch (Exception ex) {
+                ex.printStackTrace();
+                expansionFile = null;
+                expansionFileMethod = null;
+                throw new IOException("Could not access APK expansion support library", ex);
+            }
+        }
+
+        // Get an input stream for a known file inside the expansion file ZIPs
+        InputStream fileStream;
+        try {
+            fileStream = (InputStream)expansionFileMethod.invoke(expansionFile, fileName);
+        } catch (Exception ex) {
+            // calling "getInputStream" failed
+            ex.printStackTrace();
+            throw new IOException("Could not open stream from APK expansion file", ex);
+        }
+
+        if (fileStream == null) {
+            // calling "getInputStream" was successful but null was returned
+            throw new IOException("Could not find path in APK expansion file");
+        }
+
+        return fileStream;
+    }
+
+    // Messagebox
+
+    /** Result of current messagebox. Also used for blocking the calling thread. */
+    protected final int[] messageboxSelection = new int[1];
+
+    /** Id of current dialog. */
+    protected int dialogs = 0;
+
+    /**
+     * This method is called by SDL using JNI.
+     * Shows the messagebox from UI thread and block calling thread.
+     * buttonFlags, buttonIds and buttonTexts must have same length.
+     * @param buttonFlags array containing flags for every button.
+     * @param buttonIds array containing id for every button.
+     * @param buttonTexts array containing text for every button.
+     * @param colors null for default or array of length 5 containing colors.
+     * @return button id or -1.
+     */
+    public int messageboxShowMessageBox(
+            final int flags,
+            final String title,
+            final String message,
+            final int[] buttonFlags,
+            final int[] buttonIds,
+            final String[] buttonTexts,
+            final int[] colors) {
+
+        messageboxSelection[0] = -1;
+
+        // sanity checks
+
+        if ((buttonFlags.length != buttonIds.length) && (buttonIds.length != buttonTexts.length)) {
+            return -1; // implementation broken
+        }
+
+        // collect arguments for Dialog
+
+        final Bundle args = new Bundle();
+        args.putInt("flags", flags);
+        args.putString("title", title);
+        args.putString("message", message);
+        args.putIntArray("buttonFlags", buttonFlags);
+        args.putIntArray("buttonIds", buttonIds);
+        args.putStringArray("buttonTexts", buttonTexts);
+        args.putIntArray("colors", colors);
+
+        // trigger Dialog creation on UI thread
+
+        runOnUiThread(new Runnable() {
+            @Override
+            public void run() {
+                showDialog(dialogs++, args);
+            }
+        });
+
+        // block the calling thread
+
+        synchronized (messageboxSelection) {
+            try {
+                messageboxSelection.wait();
+            } catch (InterruptedException ex) {
+                ex.printStackTrace();
+                return -1;
+            }
+        }
+
+        // return selected value
+
+        return messageboxSelection[0];
+    }
+
+    @Override
+    protected Dialog onCreateDialog(int ignore, Bundle args) {
+
+        // TODO set values from "flags" to messagebox dialog
+
+        // get colors
+
+        int[] colors = args.getIntArray("colors");
+        int backgroundColor;
+        int textColor;
+        int buttonBorderColor;
+        int buttonBackgroundColor;
+        int buttonSelectedColor;
+        if (colors != null) {
+            int i = -1;
+            backgroundColor = colors[++i];
+            textColor = colors[++i];
+            buttonBorderColor = colors[++i];
+            buttonBackgroundColor = colors[++i];
+            buttonSelectedColor = colors[++i];
+        } else {
+            backgroundColor = Color.TRANSPARENT;
+            textColor = Color.TRANSPARENT;
+            buttonBorderColor = Color.TRANSPARENT;
+            buttonBackgroundColor = Color.TRANSPARENT;
+            buttonSelectedColor = Color.TRANSPARENT;
+        }
+
+        // create dialog with title and a listener to wake up calling thread
+
+        final Dialog dialog = new Dialog(this);
+        dialog.setTitle(args.getString("title"));
+        dialog.setCancelable(false);
+        dialog.setOnDismissListener(new DialogInterface.OnDismissListener() {
+            @Override
+            public void onDismiss(DialogInterface unused) {
+                synchronized (messageboxSelection) {
+                    messageboxSelection.notify();
+                }
+            }
+        });
+
+        // create text
+
+        TextView message = new TextView(this);
+        message.setGravity(Gravity.CENTER);
+        message.setText(args.getString("message"));
+        if (textColor != Color.TRANSPARENT) {
+            message.setTextColor(textColor);
+        }
+
+        // create buttons
+
+        int[] buttonFlags = args.getIntArray("buttonFlags");
+        int[] buttonIds = args.getIntArray("buttonIds");
+        String[] buttonTexts = args.getStringArray("buttonTexts");
+
+        final SparseArray<Button> mapping = new SparseArray<Button>();
+
+        LinearLayout buttons = new LinearLayout(this);
+        buttons.setOrientation(LinearLayout.HORIZONTAL);
+        buttons.setGravity(Gravity.CENTER);
+        for (int i = 0; i < buttonTexts.length; ++i) {
+            Button button = new Button(this);
+            final int id = buttonIds[i];
+            button.setOnClickListener(new View.OnClickListener() {
+                @Override
+                public void onClick(View v) {
+                    messageboxSelection[0] = id;
+                    dialog.dismiss();
+                }
+            });
+            if (buttonFlags[i] != 0) {
+                // see SDL_messagebox.h
+                if ((buttonFlags[i] & 0x00000001) != 0) {
+                    mapping.put(KeyEvent.KEYCODE_ENTER, button);
+                }
+                if ((buttonFlags[i] & 0x00000002) != 0) {
+                    mapping.put(KeyEvent.KEYCODE_ESCAPE, button); /* API 11 */
+                }
+            }
+            button.setText(buttonTexts[i]);
+            if (textColor != Color.TRANSPARENT) {
+                button.setTextColor(textColor);
+            }
+            if (buttonBorderColor != Color.TRANSPARENT) {
+                // TODO set color for border of messagebox button
+            }
+            if (buttonBackgroundColor != Color.TRANSPARENT) {
+                Drawable drawable = button.getBackground();
+                if (drawable == null) {
+                    // setting the color this way removes the style
+                    button.setBackgroundColor(buttonBackgroundColor);
+                } else {
+                    // setting the color this way keeps the style (gradient, padding, etc.)
+                    drawable.setColorFilter(buttonBackgroundColor, PorterDuff.Mode.MULTIPLY);
+                }
+            }
+            if (buttonSelectedColor != Color.TRANSPARENT) {
+                // TODO set color for selected messagebox button
+            }
+            buttons.addView(button);
+        }
+
+        // create content
+
+        LinearLayout content = new LinearLayout(this);
+        content.setOrientation(LinearLayout.VERTICAL);
+        content.addView(message);
+        content.addView(buttons);
+        if (backgroundColor != Color.TRANSPARENT) {
+            content.setBackgroundColor(backgroundColor);
+        }
+
+        // add content to dialog and return
+
+        dialog.setContentView(content);
+        dialog.setOnKeyListener(new Dialog.OnKeyListener() {
+            @Override
+            public boolean onKey(DialogInterface d, int keyCode, KeyEvent event) {
+                Button button = mapping.get(keyCode);
+                if (button != null) {
+                    if (event.getAction() == KeyEvent.ACTION_UP) {
+                        button.performClick();
+                    }
+                    return true; // also for ignored actions
+                }
+                return false;
+            }
+        });
+
+        return dialog;
+    }
+
+    /**
+     * This method is called by SDL using JNI.
+     */
+    public static boolean clipboardHasText() {
+        return mClipboardHandler.clipboardHasText();
     }
     
-    public static void pollInputDevices() {
-        if (SDLActivity.mSDLThread != null) {
-            mJoystickHandler.pollInputDevices();
-        }
+    /**
+     * This method is called by SDL using JNI.
+     */
+    public static String clipboardGetText() {
+        return mClipboardHandler.clipboardGetText();
     }
-    
+
+    /**
+     * This method is called by SDL using JNI.
+     */
+    public static void clipboardSetText(String string) {
+        mClipboardHandler.clipboardSetText(string);
+    }
+
 }
 
 /**
-    Simple nativeInit() runnable
+    Simple runnable to start the SDL application
 */
 class SDLMain implements Runnable {
     @Override
     public void run() {
         // Runs SDL_main()
-        SDLActivity.nativeInit();
+        String library = SDLActivity.mSingleton.getMainSharedObject();
+        String function = SDLActivity.mSingleton.getMainFunction();
+        String[] arguments = SDLActivity.mSingleton.getArguments();
 
-        //Log.v("SDL", "SDL thread terminated");
+        Log.v("SDL", "Running main function " + function + " from library " + library);
+        SDLActivity.nativeRunMain(library, function, arguments);
+
+        Log.v("SDL", "Finished main function");
     }
 }
 
 
 /**
     SDLSurface. This is what we draw on, so we need to know when it's created
-    in order to do anything useful. 
+    in order to do anything useful.
 
     Because of this, that's where we set up the SDL thread
 */
-class SDLSurface extends SurfaceView implements SurfaceHolder.Callback, 
+class SDLSurface extends SurfaceView implements SurfaceHolder.Callback,
     View.OnKeyListener, View.OnTouchListener, SensorEventListener  {
 
     // Sensors
@@ -528,21 +1036,21 @@
     // Keep track of the surface size to normalize touch events
     protected static float mWidth, mHeight;
 
-    // Startup    
+    // Startup
     public SDLSurface(Context context) {
         super(context);
-        getHolder().addCallback(this); 
-    
+        getHolder().addCallback(this);
+
         setFocusable(true);
         setFocusableInTouchMode(true);
         requestFocus();
-        setOnKeyListener(this); 
-        setOnTouchListener(this);   
+        setOnKeyListener(this);
+        setOnTouchListener(this);
 
         mDisplay = ((WindowManager)context.getSystemService(Context.WINDOW_SERVICE)).getDefaultDisplay();
         mSensorManager = (SensorManager)context.getSystemService(Context.SENSOR_SERVICE);
-        
-        if(Build.VERSION.SDK_INT >= 12) {
+
+        if (Build.VERSION.SDK_INT >= 12) {
             setOnGenericMotionListener(new SDLGenericMotionListener_API12());
         }
 
@@ -550,7 +1058,20 @@
         mWidth = 1.0f;
         mHeight = 1.0f;
     }
-    
+
+    public void handlePause() {
+        enableSensor(Sensor.TYPE_ACCELEROMETER, false);
+    }
+
+    public void handleResume() {
+        setFocusable(true);
+        setFocusableInTouchMode(true);
+        requestFocus();
+        setOnKeyListener(this);
+        setOnTouchListener(this);
+        enableSensor(Sensor.TYPE_ACCELEROMETER, true);
+    }
+
     public Surface getNativeSurface() {
         return getHolder().getSurface();
     }
@@ -566,8 +1087,11 @@
     @Override
     public void surfaceDestroyed(SurfaceHolder holder) {
         Log.v("SDL", "surfaceDestroyed()");
-        // Call this *before* setting mIsSurfaceReady to 'false'
-        SDLActivity.handlePause();
+
+        // Transition to pause, if needed
+        SDLActivity.mNextNativeState = SDLActivity.NativeState.PAUSED;
+        SDLActivity.handleNativeState();
+
         SDLActivity.mIsSurfaceReady = false;
         SDLActivity.onNativeSurfaceDestroyed();
     }
@@ -625,66 +1149,78 @@
 
         mWidth = width;
         mHeight = height;
-        SDLActivity.onNativeResize(width, height, sdlFormat);
-        Log.v("SDL", "Window size:" + width + "x"+height);
+        SDLActivity.onNativeResize(width, height, sdlFormat, mDisplay.getRefreshRate());
+        Log.v("SDL", "Window size: " + width + "x" + height);
+
+ 
+        boolean skip = false;
+        int requestedOrientation = SDLActivity.mSingleton.getRequestedOrientation();
+
+        if (requestedOrientation == ActivityInfo.SCREEN_ORIENTATION_UNSPECIFIED)
+        {
+            // Accept any
+        }
+        else if (requestedOrientation == ActivityInfo.SCREEN_ORIENTATION_PORTRAIT || requestedOrientation == ActivityInfo.SCREEN_ORIENTATION_SENSOR_PORTRAIT)
+        {
+            if (mWidth > mHeight) {
+               skip = true;
+            }
+        } else if (requestedOrientation == ActivityInfo.SCREEN_ORIENTATION_LANDSCAPE || requestedOrientation == ActivityInfo.SCREEN_ORIENTATION_SENSOR_LANDSCAPE) {
+            if (mWidth < mHeight) {
+               skip = true;
+            }
+        }
 
-        // Set mIsSurfaceReady to 'true' *before* making a call to handleResume
+        // Special Patch for Square Resolution: Black Berry Passport
+        if (skip) {
+           double min = Math.min(mWidth, mHeight);
+           double max = Math.max(mWidth, mHeight);
+           
+           if (max / min < 1.20) {
+              Log.v("SDL", "Don't skip on such aspect-ratio. Could be a square resolution.");
+              skip = false;
+           }
+        }
+
+        if (skip) {
+           Log.v("SDL", "Skip .. Surface is not ready.");
+           SDLActivity.mIsSurfaceReady = false;
+           return;
+        }
+        
+        /* Surface is ready */
         SDLActivity.mIsSurfaceReady = true;
+
+        /* If the surface has been previously destroyed by onNativeSurfaceDestroyed, recreate it here */
         SDLActivity.onNativeSurfaceChanged();
 
-
-        if (SDLActivity.mSDLThread == null) {
-            // This is the entry point to the C app.
-            // Start up the C app thread and enable sensor input for the first time
-
-            SDLActivity.mSDLThread = new Thread(new SDLMain(), "SDLThread");
-            enableSensor(Sensor.TYPE_ACCELEROMETER, true);
-            SDLActivity.mSDLThread.start();
-            
-            // Set up a listener thread to catch when the native thread ends
-            new Thread(new Runnable(){
-                @Override
-                public void run(){
-                    try {
-                        SDLActivity.mSDLThread.join();
-                    }
-                    catch(Exception e){}
-                    finally{ 
-                        // Native thread has finished
-                        if (! SDLActivity.mExitCalledFromJava) {
-                            SDLActivity.handleNativeExit();
-                        }
-                    }
-                }
-            }).start();
-        }
+        SDLActivity.handleNativeState();
     }
 
-    // unused
-    @Override
-    public void onDraw(Canvas canvas) {}
-
-
     // Key events
     @Override
     public boolean onKey(View  v, int keyCode, KeyEvent event) {
         // Dispatch the different events depending on where they come from
-        // Some SOURCE_DPAD or SOURCE_GAMEPAD are also SOURCE_KEYBOARD
-        // So, we try to process them as DPAD or GAMEPAD events first, if that fails we try them as KEYBOARD
-        
-        if ( (event.getSource() & InputDevice.SOURCE_GAMEPAD) == InputDevice.SOURCE_GAMEPAD) {
+        // Some SOURCE_JOYSTICK, SOURCE_DPAD or SOURCE_GAMEPAD are also SOURCE_KEYBOARD
+        // So, we try to process them as JOYSTICK/DPAD/GAMEPAD events first, if that fails we try them as KEYBOARD
+        //
+        // Furthermore, it's possible a game controller has SOURCE_KEYBOARD and
+        // SOURCE_JOYSTICK, while its key events arrive from the keyboard source
+        // So, retrieve the device itself and check all of its sources
+        if (SDLControllerManager.isDeviceSDLJoystick(event.getDeviceId())) {
+            // Note that we process events with specific key codes here
             if (event.getAction() == KeyEvent.ACTION_DOWN) {
-                if (SDLActivity.onNativePadDown(event.getDeviceId(), keyCode) == 0) {
+                if (SDLControllerManager.onNativePadDown(event.getDeviceId(), keyCode) == 0) {
                     return true;
                 }
             } else if (event.getAction() == KeyEvent.ACTION_UP) {
-                if (SDLActivity.onNativePadUp(event.getDeviceId(), keyCode) == 0) {
+                if (SDLControllerManager.onNativePadUp(event.getDeviceId(), keyCode) == 0) {
                     return true;
                 }
             }
         }
-        
-        if( (event.getSource() & InputDevice.SOURCE_KEYBOARD) != 0) {
+
+        if ((event.getSource() & InputDevice.SOURCE_KEYBOARD) != 0) {
             if (event.getAction() == KeyEvent.ACTION_DOWN) {
                 //Log.v("SDL", "key down: " + keyCode);
                 SDLActivity.onNativeKeyDown(keyCode);
@@ -696,7 +1232,21 @@
                 return true;
             }
         }
-        
+
+        if ((event.getSource() & InputDevice.SOURCE_MOUSE) != 0) {
+            // on some devices key events are sent for mouse BUTTON_BACK/FORWARD presses
+            // they are ignored here because sending them as mouse input to SDL is messy
+            if ((keyCode == KeyEvent.KEYCODE_BACK) || (keyCode == KeyEvent.KEYCODE_FORWARD)) {
+                switch (event.getAction()) {
+                case KeyEvent.ACTION_DOWN:
+                case KeyEvent.ACTION_UP:
+                    // mark the event as handled or it will be handled by system
+                    // handling KEYCODE_BACK by system will call onBackPressed()
+                    return true;
+                }
+            }
+        }
+
         return false;
     }
 
@@ -708,58 +1258,98 @@
         final int pointerCount = event.getPointerCount();
         int action = event.getActionMasked();
         int pointerFingerId;
+        int mouseButton;
         int i = -1;
         float x,y,p;
-        
-        switch(action) {
-            case MotionEvent.ACTION_MOVE:
-                for (i = 0; i < pointerCount; i++) {
+
+        // !!! FIXME: dump this SDK check after 2.0.4 ships and require API14.
+        if (event.getSource() == InputDevice.SOURCE_MOUSE && SDLActivity.mSeparateMouseAndTouch) {
+            if (Build.VERSION.SDK_INT < 14) {
+                mouseButton = 1; // all mouse buttons are the left button
+            } else {
+                try {
+                    mouseButton = (Integer) event.getClass().getMethod("getButtonState").invoke(event);
+                } catch(Exception e) {
+                    mouseButton = 1;    // oh well.
+                }
+            }
+            SDLActivity.onNativeMouse(mouseButton, action, event.getX(0), event.getY(0));
+        } else {
+            switch(action) {
+                case MotionEvent.ACTION_MOVE:
+                    for (i = 0; i < pointerCount; i++) {
+                        pointerFingerId = event.getPointerId(i);
+                        x = event.getX(i) / mWidth;
+                        y = event.getY(i) / mHeight;
+                        p = event.getPressure(i);
+                        if (p > 1.0f) {
+                            // may be larger than 1.0f on some devices
+                            // see the documentation of getPressure(i)
+                            p = 1.0f;
+                        }
+                        SDLActivity.onNativeTouch(touchDevId, pointerFingerId, action, x, y, p);
+                    }
+                    break;
+
+                case MotionEvent.ACTION_UP:
+                case MotionEvent.ACTION_DOWN:
+                    // Primary pointer up/down, the index is always zero
+                    i = 0;
+                case MotionEvent.ACTION_POINTER_UP:
+                case MotionEvent.ACTION_POINTER_DOWN:
+                    // Non primary pointer up/down
+                    if (i == -1) {
+                        i = event.getActionIndex();
+                    }
+
                     pointerFingerId = event.getPointerId(i);
                     x = event.getX(i) / mWidth;
                     y = event.getY(i) / mHeight;
                     p = event.getPressure(i);
+                    if (p > 1.0f) {
+                        // may be larger than 1.0f on some devices
+                        // see the documentation of getPressure(i)
+                        p = 1.0f;
+                    }
                     SDLActivity.onNativeTouch(touchDevId, pointerFingerId, action, x, y, p);
-                }
-                break;
-            
-            case MotionEvent.ACTION_UP:
-            case MotionEvent.ACTION_DOWN:
-                // Primary pointer up/down, the index is always zero
-                i = 0;
-            case MotionEvent.ACTION_POINTER_UP:
-            case MotionEvent.ACTION_POINTER_DOWN:
-                // Non primary pointer up/down
-                if (i == -1) {
-                    i = event.getActionIndex();
-                }
-                
-                pointerFingerId = event.getPointerId(i);
-                x = event.getX(i) / mWidth;
-                y = event.getY(i) / mHeight;
-                p = event.getPressure(i);
-                SDLActivity.onNativeTouch(touchDevId, pointerFingerId, action, x, y, p);
-                break;
-            
-            default:
-                break;
+                    break;
+
+                case MotionEvent.ACTION_CANCEL:
+                    for (i = 0; i < pointerCount; i++) {
+                        pointerFingerId = event.getPointerId(i);
+                        x = event.getX(i) / mWidth;
+                        y = event.getY(i) / mHeight;
+                        p = event.getPressure(i);
+                        if (p > 1.0f) {
+                            // may be larger than 1.0f on some devices
+                            // see the documentation of getPressure(i)
+                            p = 1.0f;
+                        }
+                        SDLActivity.onNativeTouch(touchDevId, pointerFingerId, MotionEvent.ACTION_UP, x, y, p);
+                    }
+                    break;
+
+                default:
+                    break;
+            }
         }
 
         return true;
-   } 
+   }
 
     // Sensor events
     public void enableSensor(int sensortype, boolean enabled) {
         // TODO: This uses getDefaultSensor - what if we have >1 accels?
         if (enabled) {
-            mSensorManager.registerListener(this, 
-                            mSensorManager.getDefaultSensor(sensortype), 
+            mSensorManager.registerListener(this,
+                            mSensorManager.getDefaultSensor(sensortype),
                             SensorManager.SENSOR_DELAY_GAME, null);
         } else {
-            mSensorManager.unregisterListener(this, 
+            mSensorManager.unregisterListener(this,
                             mSensorManager.getDefaultSensor(sensortype));
         }
     }
-    
+
     @Override
     public void onAccuracyChanged(Sensor sensor, int accuracy) {
         // TODO
@@ -789,9 +1379,9 @@
             }
             SDLActivity.onNativeAccel(-x / SensorManager.GRAVITY_EARTH,
                                       y / SensorManager.GRAVITY_EARTH,
-                                      event.values[2] / SensorManager.GRAVITY_EARTH - 1);
+                                      event.values[2] / SensorManager.GRAVITY_EARTH);
         }
-    }    
+    }
 }
 
 /* This is a fake invisible editor view that receives the input and defines the
@@ -814,33 +1404,29 @@
 
     @Override
     public boolean onKey(View v, int keyCode, KeyEvent event) {
-
-        // This handles the hardware keyboard input
-        if (event.isPrintingKey()) {
-            if (event.getAction() == KeyEvent.ACTION_DOWN) {
+        /* 
+         * This handles the hardware keyboard input
+         */
+        if (event.getAction() == KeyEvent.ACTION_DOWN) {
+            if (SDLActivity.isTextInputEvent(event)) {
                 ic.commitText(String.valueOf((char) event.getUnicodeChar()), 1);
             }
-            return true;
-        }
-
-        if (event.getAction() == KeyEvent.ACTION_DOWN) {
             SDLActivity.onNativeKeyDown(keyCode);
             return true;
         } else if (event.getAction() == KeyEvent.ACTION_UP) {
             SDLActivity.onNativeKeyUp(keyCode);
             return true;
         }
-
         return false;
     }
-        
+
     //
     @Override
     public boolean onKeyPreIme (int keyCode, KeyEvent event) {
         // As seen on StackOverflow: http://stackoverflow.com/questions/7634346/keyboard-hide-event
         // FIXME: Discussion at http://bugzilla.libsdl.org/show_bug.cgi?id=1639
         // FIXME: This is not a 100% effective solution to the problem of detecting if the keyboard is showing or not
-        // FIXME: A more effective solution would be to change our Layout from AbsoluteLayout to Relative or Linear
+        // FIXME: A more effective solution would be to assume our Layout to be RelativeLayout or LinearLayout
         // FIXME: And determine the keyboard presence doing this: http://stackoverflow.com/questions/2150078/how-to-check-visibility-of-software-keyboard-in-android
         // FIXME: An even more effective way would be if Android provided this out of the box, but where would the fun be in that :)
         if (event.getAction()==KeyEvent.ACTION_UP && keyCode == KeyEvent.KEYCODE_BACK) {
@@ -855,8 +1441,9 @@
     public InputConnection onCreateInputConnection(EditorInfo outAttrs) {
         ic = new SDLInputConnection(this, true);
 
+        outAttrs.inputType = InputType.TYPE_CLASS_TEXT | InputType.TYPE_TEXT_VARIATION_VISIBLE_PASSWORD;
         outAttrs.imeOptions = EditorInfo.IME_FLAG_NO_EXTRACT_UI
-                | 33554432 /* API 11: EditorInfo.IME_FLAG_NO_FULLSCREEN */;
+                | EditorInfo.IME_FLAG_NO_FULLSCREEN /* API 11 */;
 
         return ic;
     }
@@ -871,20 +1458,17 @@
 
     @Override
     public boolean sendKeyEvent(KeyEvent event) {
-
         /*
-         * This handles the keycodes from soft keyboard (and IME-translated
-         * input from hardkeyboard)
+         * This handles the keycodes from soft keyboard (and IME-translated input from hardkeyboard)
          */
         int keyCode = event.getKeyCode();
         if (event.getAction() == KeyEvent.ACTION_DOWN) {
-            if (event.isPrintingKey()) {
+            if (SDLActivity.isTextInputEvent(event)) {
                 commitText(String.valueOf((char) event.getUnicodeChar()), 1);
             }
             SDLActivity.onNativeKeyDown(keyCode);
             return true;
         } else if (event.getAction() == KeyEvent.ACTION_UP) {
-
             SDLActivity.onNativeKeyUp(keyCode);
             return true;
         }
@@ -912,162 +1496,100 @@
     public native void nativeSetComposingText(String text, int newCursorPosition);
 
     @Override
-    public boolean deleteSurroundingText(int beforeLength, int afterLength) {       
+    public boolean deleteSurroundingText(int beforeLength, int afterLength) {
         // Workaround to capture backspace key. Ref: http://stackoverflow.com/questions/14560344/android-backspace-in-webview-baseinputconnection
-        if (beforeLength == 1 && afterLength == 0) {
-            // backspace
-            return super.sendKeyEvent(new KeyEvent(KeyEvent.ACTION_DOWN, KeyEvent.KEYCODE_DEL))
-                && super.sendKeyEvent(new KeyEvent(KeyEvent.ACTION_UP, KeyEvent.KEYCODE_DEL));
+        // and https://bugzilla.libsdl.org/show_bug.cgi?id=2265
+        if (beforeLength > 0 && afterLength == 0) {
+            boolean ret = true;
+            // backspace(s)
+            while (beforeLength-- > 0) {
+               boolean ret_key = sendKeyEvent(new KeyEvent(KeyEvent.ACTION_DOWN, KeyEvent.KEYCODE_DEL))
+                              && sendKeyEvent(new KeyEvent(KeyEvent.ACTION_UP, KeyEvent.KEYCODE_DEL));
+               ret = ret && ret_key; 
+            }
+            return ret;
         }
 
         return super.deleteSurroundingText(beforeLength, afterLength);
     }
 }
 
-/* A null joystick handler for API level < 12 devices (the accelerometer is handled separately) */
-class SDLJoystickHandler {
-    
-    public boolean handleMotionEvent(MotionEvent event) {
-        return false;
-    }
-    
-    public void pollInputDevices() {
-    }
+interface SDLClipboardHandler {
+
+    public boolean clipboardHasText();
+    public String clipboardGetText();
+    public void clipboardSetText(String string);
+
 }
 
-/* Actual joystick functionality available for API >= 12 devices */
-class SDLJoystickHandler_API12 extends SDLJoystickHandler {
-  
-    class SDLJoystick {
-        public int device_id;
-        public String name;
-        public ArrayList<InputDevice.MotionRange> axes;
-        public ArrayList<InputDevice.MotionRange> hats;
+
+class SDLClipboardHandler_API11 implements
+    SDLClipboardHandler, 
+    android.content.ClipboardManager.OnPrimaryClipChangedListener {
+
+    protected android.content.ClipboardManager mClipMgr;
+
+    SDLClipboardHandler_API11() {
+       mClipMgr = (android.content.ClipboardManager) SDL.getContext().getSystemService(Context.CLIPBOARD_SERVICE);
+       mClipMgr.addPrimaryClipChangedListener(this);
     }
-    class RangeComparator implements Comparator<InputDevice.MotionRange>
-    {
-        @Override
-        public int compare(InputDevice.MotionRange arg0, InputDevice.MotionRange arg1) {
-            return arg0.getAxis() - arg1.getAxis();
-        }
+
+    @Override
+    public boolean clipboardHasText() {
+       return mClipMgr.hasText();
     }
-    
-    private ArrayList<SDLJoystick> mJoysticks;
-    
-    public SDLJoystickHandler_API12() {
-       
-        mJoysticks = new ArrayList<SDLJoystick>();
+
+    @Override
+    public String clipboardGetText() {
+        CharSequence text;
+        text = mClipMgr.getText();
+        if (text != null) {
+           return text.toString();
+        }
+        return null;
     }
 
     @Override
-    public void pollInputDevices() {
-        int[] deviceIds = InputDevice.getDeviceIds();
-        // It helps processing the device ids in reverse order
-        // For example, in the case of the XBox 360 wireless dongle,
-        // so the first controller seen by SDL matches what the receiver
-        // considers to be the first controller
-        
-        for(int i=deviceIds.length-1; i>-1; i--) {
-            SDLJoystick joystick = getJoystick(deviceIds[i]);
-            if (joystick == null) {
-                joystick = new SDLJoystick();
-                InputDevice joystickDevice = InputDevice.getDevice(deviceIds[i]);
-                if( (joystickDevice.getSources() & InputDevice.SOURCE_CLASS_JOYSTICK) != 0) {
-                    joystick.device_id = deviceIds[i];
-                    joystick.name = joystickDevice.getName();
-                    joystick.axes = new ArrayList<InputDevice.MotionRange>();
-                    joystick.hats = new ArrayList<InputDevice.MotionRange>();
-                    
-                    List<InputDevice.MotionRange> ranges = joystickDevice.getMotionRanges();
-                    Collections.sort(ranges, new RangeComparator());
-                    for (InputDevice.MotionRange range : ranges ) {
-                        if ((range.getSource() & InputDevice.SOURCE_CLASS_JOYSTICK) != 0 ) {
-                            if (range.getAxis() == MotionEvent.AXIS_HAT_X ||
-                                range.getAxis() == MotionEvent.AXIS_HAT_Y) {
-                                joystick.hats.add(range);
-                            }
-                            else {
-                                joystick.axes.add(range);
-                            }
-                        }
-                    }
-                    
-                    mJoysticks.add(joystick);
-                    SDLActivity.nativeAddJoystick(joystick.device_id, joystick.name, 0, -1, 
-                                                  joystick.axes.size(), joystick.hats.size()/2, 0);
-                }
-            }
-        }
-        
-        /* Check removed devices */
-        ArrayList<Integer> removedDevices = new ArrayList<Integer>();
-        for(int i=0; i < mJoysticks.size(); i++) {
-            int device_id = mJoysticks.get(i).device_id;
-            int j;
-            for (j=0; j < deviceIds.length; j++) {
-                if (device_id == deviceIds[j]) break;
-            }
-            if (j == deviceIds.length) {
-                removedDevices.add(device_id);
-            }
-        }
-            
-        for(int i=0; i < removedDevices.size(); i++) {
-            int device_id = removedDevices.get(i);
-            SDLActivity.nativeRemoveJoystick(device_id);
-            for (int j=0; j < mJoysticks.size(); j++) {
-                if (mJoysticks.get(j).device_id == device_id) {
-                    mJoysticks.remove(j);
-                    break;
-                }
-            }
-        }        
+    public void clipboardSetText(String string) {
+       mClipMgr.removePrimaryClipChangedListener(this);
+       mClipMgr.setText(string);
+       mClipMgr.addPrimaryClipChangedListener(this);
     }
     
-    protected SDLJoystick getJoystick(int device_id) {
-        for(int i=0; i < mJoysticks.size(); i++) {
-            if (mJoysticks.get(i).device_id == device_id) {
-                return mJoysticks.get(i);
-            }
-        }
-        return null;
-    }   
-    
-    @Override        
-    public boolean handleMotionEvent(MotionEvent event) {
-        if ( (event.getSource() & InputDevice.SOURCE_JOYSTICK) != 0) {
-            int actionPointerIndex = event.getActionIndex();
-            int action = event.getActionMasked();
-            switch(action) {
-                case MotionEvent.ACTION_MOVE:
-                    SDLJoystick joystick = getJoystick(event.getDeviceId());
-                    if ( joystick != null ) {
-                        for (int i = 0; i < joystick.axes.size(); i++) {
-                            InputDevice.MotionRange range = joystick.axes.get(i);
-                            /* Normalize the value to -1...1 */
-                            float value = ( event.getAxisValue( range.getAxis(), actionPointerIndex) - range.getMin() ) / range.getRange() * 2.0f - 1.0f;
-                            SDLActivity.onNativeJoy(joystick.device_id, i, value );
-                        }          
-                        for (int i = 0; i < joystick.hats.size(); i+=2) {
-                            int hatX = Math.round(event.getAxisValue( joystick.hats.get(i).getAxis(), actionPointerIndex ) );
-                            int hatY = Math.round(event.getAxisValue( joystick.hats.get(i+1).getAxis(), actionPointerIndex ) );
-                            SDLActivity.onNativeHat(joystick.device_id, i/2, hatX, hatY );
-                        }
-                    }
-                    break;
-                default:
-                    break;
-            }
-        }
-        return true;
-    }            
+    @Override
+    public void onPrimaryClipChanged() {
+        SDLActivity.onNativeClipboardChanged();
+    }
+
 }
 
-class SDLGenericMotionListener_API12 implements View.OnGenericMotionListener {
-    // Generic Motion (mouse hover, joystick...) events go here
-    // We only have joysticks yet
+class SDLClipboardHandler_Old implements
+    SDLClipboardHandler {
+   
+    protected android.text.ClipboardManager mClipMgrOld;
+  
+    SDLClipboardHandler_Old() {
+       mClipMgrOld = (android.text.ClipboardManager) SDL.getContext().getSystemService(Context.CLIPBOARD_SERVICE);
+    }
+
     @Override
-    public boolean onGenericMotion(View v, MotionEvent event) {
-        return SDLActivity.handleJoystickMotionEvent(event);
+    public boolean clipboardHasText() {
+       return mClipMgrOld.hasText();
+    }
+
+    @Override
+    public String clipboardGetText() {
+       CharSequence text;
+       text = mClipMgrOld.getText();
+       if (text != null) {
+          return text.toString();
+       }
+       return null;
+    }
+
+    @Override
+    public void clipboardSetText(String string) {
+       mClipMgrOld.setText(string);
     }
 }
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/android/src/org/libsdl/app/SDLAudioManager.java	Sat Jan 15 13:15:21 2022 -0800
@@ -0,0 +1,178 @@
+package org.libsdl.app;
+
+import android.media.*;
+import android.util.Log;
+
+public class SDLAudioManager
+{
+    protected static final String TAG = "SDLAudio";
+
+    protected static AudioTrack mAudioTrack;
+    protected static AudioRecord mAudioRecord;
+
+    public static void initialize() {
+        mAudioTrack = null;
+        mAudioRecord = null;
+    }
+
+    // Audio
+
+    /**
+     * This method is called by SDL using JNI.
+     */
+    public static int audioOpen(int sampleRate, boolean is16Bit, boolean isStereo, int desiredFrames) {
+        int channelConfig = isStereo ? AudioFormat.CHANNEL_CONFIGURATION_STEREO : AudioFormat.CHANNEL_CONFIGURATION_MONO;
+        int audioFormat = is16Bit ? AudioFormat.ENCODING_PCM_16BIT : AudioFormat.ENCODING_PCM_8BIT;
+        int frameSize = (isStereo ? 2 : 1) * (is16Bit ? 2 : 1);
+
+        Log.v(TAG, "SDL audio: wanted " + (isStereo ? "stereo" : "mono") + " " + (is16Bit ? "16-bit" : "8-bit") + " " + (sampleRate / 1000f) + "kHz, " + desiredFrames + " frames buffer");
+
+        // Let the user pick a larger buffer if they really want -- but ye
+        // gods they probably shouldn't, the minimums are horrifyingly high
+        // latency already
+        desiredFrames = Math.max(desiredFrames, (AudioTrack.getMinBufferSize(sampleRate, channelConfig, audioFormat) + frameSize - 1) / frameSize);
+
+        if (mAudioTrack == null) {
+            mAudioTrack = new AudioTrack(AudioManager.STREAM_MUSIC, sampleRate,
+                    channelConfig, audioFormat, desiredFrames * frameSize, AudioTrack.MODE_STREAM);
+
+            // Instantiating AudioTrack can "succeed" without an exception and the track may still be invalid
+            // Ref: https://android.googlesource.com/platform/frameworks/base/+/refs/heads/master/media/java/android/media/AudioTrack.java
+            // Ref: http://developer.android.com/reference/android/media/AudioTrack.html#getState()
+
+            if (mAudioTrack.getState() != AudioTrack.STATE_INITIALIZED) {
+                Log.e(TAG, "Failed during initialization of Audio Track");
+                mAudioTrack = null;
+                return -1;
+            }
+
+            mAudioTrack.play();
+        }
+
+        Log.v(TAG, "SDL audio: got " + ((mAudioTrack.getChannelCount() >= 2) ? "stereo" : "mono") + " " + ((mAudioTrack.getAudioFormat() == AudioFormat.ENCODING_PCM_16BIT) ? "16-bit" : "8-bit") + " " + (mAudioTrack.getSampleRate() / 1000f) + "kHz, " + desiredFrames + " frames buffer");
+
+        return 0;
+    }
+
+    /**
+     * This method is called by SDL using JNI.
+     */
+    public static void audioWriteShortBuffer(short[] buffer) {
+        if (mAudioTrack == null) {
+            Log.e(TAG, "Attempted to make audio call with uninitialized audio!");
+            return;
+        }
+
+        for (int i = 0; i < buffer.length; ) {
+            int result = mAudioTrack.write(buffer, i, buffer.length - i);
+            if (result > 0) {
+                i += result;
+            } else if (result == 0) {
+                try {
+                    Thread.sleep(1);
+                } catch(InterruptedException e) {
+                    // Nom nom
+                }
+            } else {
+                Log.w(TAG, "SDL audio: error return from write(short)");
+                return;
+            }
+        }
+    }
+
+    /**
+     * This method is called by SDL using JNI.
+     */
+    public static void audioWriteByteBuffer(byte[] buffer) {
+        if (mAudioTrack == null) {
+            Log.e(TAG, "Attempted to make audio call with uninitialized audio!");
+            return;
+        }
+        
+        for (int i = 0; i < buffer.length; ) {
+            int result = mAudioTrack.write(buffer, i, buffer.length - i);
+            if (result > 0) {
+                i += result;
+            } else if (result == 0) {
+                try {
+                    Thread.sleep(1);
+                } catch(InterruptedException e) {
+                    // Nom nom
+                }
+            } else {
+                Log.w(TAG, "SDL audio: error return from write(byte)");
+                return;
+            }
+        }
+    }
+
+    /**
+     * This method is called by SDL using JNI.
+     */
+    public static int captureOpen(int sampleRate, boolean is16Bit, boolean isStereo, int desiredFrames) {
+        int channelConfig = isStereo ? AudioFormat.CHANNEL_CONFIGURATION_STEREO : AudioFormat.CHANNEL_CONFIGURATION_MONO;
+        int audioFormat = is16Bit ? AudioFormat.ENCODING_PCM_16BIT : AudioFormat.ENCODING_PCM_8BIT;
+        int frameSize = (isStereo ? 2 : 1) * (is16Bit ? 2 : 1);
+
+        Log.v(TAG, "SDL capture: wanted " + (isStereo ? "stereo" : "mono") + " " + (is16Bit ? "16-bit" : "8-bit") + " " + (sampleRate / 1000f) + "kHz, " + desiredFrames + " frames buffer");
+
+        // Let the user pick a larger buffer if they really want -- but ye
+        // gods they probably shouldn't, the minimums are horrifyingly high
+        // latency already
+        desiredFrames = Math.max(desiredFrames, (AudioRecord.getMinBufferSize(sampleRate, channelConfig, audioFormat) + frameSize - 1) / frameSize);
+
+        if (mAudioRecord == null) {
+            mAudioRecord = new AudioRecord(MediaRecorder.AudioSource.DEFAULT, sampleRate,
+                    channelConfig, audioFormat, desiredFrames * frameSize);
+
+            // see notes about AudioTrack state in audioOpen(), above. Probably also applies here.
+            if (mAudioRecord.getState() != AudioRecord.STATE_INITIALIZED) {
+                Log.e(TAG, "Failed during initialization of AudioRecord");
+                mAudioRecord.release();
+                mAudioRecord = null;
+                return -1;
+            }
+
+            mAudioRecord.startRecording();
+        }
+
+        Log.v(TAG, "SDL capture: got " + ((mAudioRecord.getChannelCount() >= 2) ? "stereo" : "mono") + " " + ((mAudioRecord.getAudioFormat() == AudioFormat.ENCODING_PCM_16BIT) ? "16-bit" : "8-bit") + " " + (mAudioRecord.getSampleRate() / 1000f) + "kHz, " + desiredFrames + " frames buffer");
+
+        return 0;
+    }
+
+    /** This method is called by SDL using JNI. */
+    public static int captureReadShortBuffer(short[] buffer, boolean blocking) {
+        // !!! FIXME: this is available in API Level 23. Until then, we always block.  :(
+        //return mAudioRecord.read(buffer, 0, buffer.length, blocking ? AudioRecord.READ_BLOCKING : AudioRecord.READ_NON_BLOCKING);
+        return mAudioRecord.read(buffer, 0, buffer.length);
+    }
+
+    /** This method is called by SDL using JNI. */
+    public static int captureReadByteBuffer(byte[] buffer, boolean blocking) {
+        // !!! FIXME: this is available in API Level 23. Until then, we always block.  :(
+        //return mAudioRecord.read(buffer, 0, buffer.length, blocking ? AudioRecord.READ_BLOCKING : AudioRecord.READ_NON_BLOCKING);
+        return mAudioRecord.read(buffer, 0, buffer.length);
+    }
+
+
+    /** This method is called by SDL using JNI. */
+    public static void audioClose() {
+        if (mAudioTrack != null) {
+            mAudioTrack.stop();
+            mAudioTrack.release();
+            mAudioTrack = null;
+        }
+    }
+
+    /** This method is called by SDL using JNI. */
+    public static void captureClose() {
+        if (mAudioRecord != null) {
+            mAudioRecord.stop();
+            mAudioRecord.release();
+            mAudioRecord = null;
+        }
+    }
+
+    public static native int nativeSetupJNI();
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/android/src/org/libsdl/app/SDLControllerManager.java	Sat Jan 15 13:15:21 2022 -0800
@@ -0,0 +1,433 @@
+package org.libsdl.app;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.List;
+import java.util.Objects;
+
+import android.content.Context;
+import android.os.*;
+import android.view.*;
+import android.util.Log;
+
+
+public class SDLControllerManager 
+{
+
+    public static native int nativeSetupJNI();
+
+    public static native int nativeAddJoystick(int device_id, String name, String desc,
+                                               int is_accelerometer, int nbuttons,
+                                               int naxes, int nhats, int nballs);
+    public static native int nativeRemoveJoystick(int device_id);
+    public static native int nativeAddHaptic(int device_id, String name);
+    public static native int nativeRemoveHaptic(int device_id);
+    public static native int onNativePadDown(int device_id, int keycode);
+    public static native int onNativePadUp(int device_id, int keycode);
+    public static native void onNativeJoy(int device_id, int axis,
+                                          float value);
+    public static native void onNativeHat(int device_id, int hat_id,
+                                          int x, int y);
+
+    protected static SDLJoystickHandler mJoystickHandler;
+    protected static SDLHapticHandler mHapticHandler;
+
+    private static final String TAG = "SDLControllerManager";
+
+    public static void initialize() {
+        mJoystickHandler = null;
+        mHapticHandler = null;
+
+        SDLControllerManager.setup();
+    }
+
+    public static void setup() {
+        if (Build.VERSION.SDK_INT >= 16) {
+            mJoystickHandler = new SDLJoystickHandler_API16();
+        } else if (Build.VERSION.SDK_INT >= 12) {
+            mJoystickHandler = new SDLJoystickHandler_API12();
+        } else {
+            mJoystickHandler = new SDLJoystickHandler();
+        }
+        mHapticHandler = new SDLHapticHandler();
+    }
+
+    // Joystick glue code, just a series of stubs that redirect to the SDLJoystickHandler instance
+    public static boolean handleJoystickMotionEvent(MotionEvent event) {
+        return mJoystickHandler.handleMotionEvent(event);
+    }
+
+    /**
+     * This method is called by SDL using JNI.
+     */
+    public static void pollInputDevices() {
+        mJoystickHandler.pollInputDevices();
+    }
+
+    /**
+     * This method is called by SDL using JNI.
+     */
+    public static void pollHapticDevices() {
+        mHapticHandler.pollHapticDevices();
+    }
+
+    /**
+     * This method is called by SDL using JNI.
+     */
+    public static void hapticRun(int device_id, int length) {
+        mHapticHandler.run(device_id, length);
+    }
+
+    // Check if a given device is considered a possible SDL joystick
+    public static boolean isDeviceSDLJoystick(int deviceId) {
+        InputDevice device = InputDevice.getDevice(deviceId);
+        // We cannot use InputDevice.isVirtual before API 16, so let's accept
+        // only nonnegative device ids (VIRTUAL_KEYBOARD equals -1)
+        if ((device == null) || (deviceId < 0)) {
+            return false;
+        }
+        int sources = device.getSources();
+
+        if ((sources & InputDevice.SOURCE_CLASS_JOYSTICK) == InputDevice.SOURCE_CLASS_JOYSTICK) {
+            Log.v(TAG, "Input device " + device.getName() + " is a joystick.");
+        }
+        if ((sources & InputDevice.SOURCE_DPAD) == InputDevice.SOURCE_DPAD) {
+            Log.v(TAG, "Input device " + device.getName() + " is a dpad.");
+        }
+        if ((sources & InputDevice.SOURCE_GAMEPAD) == InputDevice.SOURCE_GAMEPAD) {
+            Log.v(TAG, "Input device " + device.getName() + " is a gamepad.");
+        }
+
+        return (((sources & InputDevice.SOURCE_CLASS_JOYSTICK) == InputDevice.SOURCE_CLASS_JOYSTICK) ||
+                ((sources & InputDevice.SOURCE_DPAD) == InputDevice.SOURCE_DPAD) ||
+                ((sources & InputDevice.SOURCE_GAMEPAD) == InputDevice.SOURCE_GAMEPAD)
+        );
+    }
+
+}
+
+/* A null joystick handler for API level < 12 devices (the accelerometer is handled separately) */
+class SDLJoystickHandler {
+
+    /**
+     * Handles given MotionEvent.
+     * @param event the event to be handled.
+     * @return if given event was processed.
+     */
+    public boolean handleMotionEvent(MotionEvent event) {
+        return false;
+    }
+
+    /**
+     * Handles adding and removing of input devices.
+     */
+    public void pollInputDevices() {
+    }
+}
+
+/* Actual joystick functionality available for API >= 12 devices */
+class SDLJoystickHandler_API12 extends SDLJoystickHandler {
+
+    static class SDLJoystick {
+        public int device_id;
+        public String name;
+        public String desc;
+        public ArrayList<InputDevice.MotionRange> axes;
+        public ArrayList<InputDevice.MotionRange> hats;
+    }
+    static class RangeComparator implements Comparator<InputDevice.MotionRange> {
+        @Override
+        public int compare(InputDevice.MotionRange arg0, InputDevice.MotionRange arg1) {
+            return arg0.getAxis() - arg1.getAxis();
+        }
+    }
+
+    private ArrayList<SDLJoystick> mJoysticks;
+
+    public SDLJoystickHandler_API12() {
+
+        mJoysticks = new ArrayList<SDLJoystick>();
+    }
+
+    @Override
+    public void pollInputDevices() {
+        int[] deviceIds = InputDevice.getDeviceIds();
+        // It helps processing the device ids in reverse order
+        // For example, in the case of the XBox 360 wireless dongle,
+        // so the first controller seen by SDL matches what the receiver
+        // considers to be the first controller
+
+        for(int i=deviceIds.length-1; i>-1; i--) {
+            SDLJoystick joystick = getJoystick(deviceIds[i]);
+            if (joystick == null) {
+                joystick = new SDLJoystick();
+                InputDevice joystickDevice = InputDevice.getDevice(deviceIds[i]);
+                if (SDLControllerManager.isDeviceSDLJoystick(deviceIds[i])) {
+                    joystick.device_id = deviceIds[i];
+                    joystick.name = joystickDevice.getName();
+                    joystick.desc = getJoystickDescriptor(joystickDevice);
+                    joystick.axes = new ArrayList<InputDevice.MotionRange>();
+                    joystick.hats = new ArrayList<InputDevice.MotionRange>();
+
+                    List<InputDevice.MotionRange> ranges = joystickDevice.getMotionRanges();
+                    Collections.sort(ranges, new RangeComparator());
+                    for (InputDevice.MotionRange range : ranges ) {
+                        if ((range.getSource() & InputDevice.SOURCE_CLASS_JOYSTICK) != 0) {
+                            if (range.getAxis() == MotionEvent.AXIS_HAT_X ||
+                                range.getAxis() == MotionEvent.AXIS_HAT_Y) {
+                                joystick.hats.add(range);
+                            }
+                            else {
+                                joystick.axes.add(range);
+                            }
+                        }
+                    }
+
+                    mJoysticks.add(joystick);
+                    SDLControllerManager.nativeAddJoystick(joystick.device_id, joystick.name, joystick.desc, 0, -1,
+                                                           joystick.axes.size(), joystick.hats.size()/2, 0);
+                }
+            }
+        }
+
+        /* Check removed devices */
+        ArrayList<Integer> removedDevices = new ArrayList<Integer>();
+        for(int i=0; i < mJoysticks.size(); i++) {
+            int device_id = mJoysticks.get(i).device_id;
+            int j;
+            for (j=0; j < deviceIds.length; j++) {
+                if (device_id == deviceIds[j]) break;
+            }
+            if (j == deviceIds.length) {
+                removedDevices.add(Integer.valueOf(device_id));
+            }
+        }
+
+        for(int i=0; i < removedDevices.size(); i++) {
+            int device_id = removedDevices.get(i).intValue();
+            SDLControllerManager.nativeRemoveJoystick(device_id);
+            for (int j=0; j < mJoysticks.size(); j++) {
+                if (mJoysticks.get(j).device_id == device_id) {
+                    mJoysticks.remove(j);
+                    break;
+                }
+            }
+        }
+    }
+
+    protected SDLJoystick getJoystick(int device_id) {
+        for(int i=0; i < mJoysticks.size(); i++) {
+            if (mJoysticks.get(i).device_id == device_id) {
+                return mJoysticks.get(i);
+            }
+        }
+        return null;
+    }
+
+    @Override
+    public boolean handleMotionEvent(MotionEvent event) {
+        if ((event.getSource() & InputDevice.SOURCE_JOYSTICK) != 0) {
+            int actionPointerIndex = event.getActionIndex();
+            int action = event.getActionMasked();
+            switch(action) {
+                case MotionEvent.ACTION_MOVE:
+                    SDLJoystick joystick = getJoystick(event.getDeviceId());
+                    if ( joystick != null ) {
+                        for (int i = 0; i < joystick.axes.size(); i++) {
+                            InputDevice.MotionRange range = joystick.axes.get(i);
+                            /* Normalize the value to -1...1 */
+                            float value = ( event.getAxisValue( range.getAxis(), actionPointerIndex) - range.getMin() ) / range.getRange() * 2.0f - 1.0f;
+                            SDLControllerManager.onNativeJoy(joystick.device_id, i, value );
+                        }
+                        for (int i = 0; i < joystick.hats.size(); i+=2) {
+                            int hatX = Math.round(event.getAxisValue( joystick.hats.get(i).getAxis(), actionPointerIndex ) );
+                            int hatY = Math.round(event.getAxisValue( joystick.hats.get(i+1).getAxis(), actionPointerIndex ) );
+                            SDLControllerManager.onNativeHat(joystick.device_id, i/2, hatX, hatY );
+                        }
+                    }
+                    break;
+                default:
+                    break;
+            }
+        }
+        return true;
+    }
+
+    public String getJoystickDescriptor(InputDevice joystickDevice) {
+        return joystickDevice.getName();
+    }
+}
+
+
+class SDLJoystickHandler_API16 extends SDLJoystickHandler_API12 {
+
+    @Override
+    public String getJoystickDescriptor(InputDevice joystickDevice) {
+        String desc = joystickDevice.getDescriptor();
+
+        if (desc != null && !Objects.equals(desc, "")) {
+            return desc;
+        }
+
+        return super.getJoystickDescriptor(joystickDevice);
+    }
+}
+
+class SDLHapticHandler {
+
+    class SDLHaptic {
+        public int device_id;
+        public String name;
+        public Vibrator vib;
+    }
+
+    private ArrayList<SDLHaptic> mHaptics;
+    
+    public SDLHapticHandler() {
+        mHaptics = new ArrayList<SDLHaptic>();
+    }
+
+    public void run(int device_id, int length) {
+        SDLHaptic haptic = getHaptic(device_id);
+        if (haptic != null) {
+            haptic.vib.vibrate (length);
+        }
+    }
+
+    public void pollHapticDevices() {
+        
+        final int deviceId_VIBRATOR_SERVICE = 999999;
+        boolean hasVibratorService = false;
+
+        int[] deviceIds = InputDevice.getDeviceIds();
+        // It helps processing the device ids in reverse order
+        // For example, in the case of the XBox 360 wireless dongle,
+        // so the first controller seen by SDL matches what the receiver
+        // considers to be the first controller
+
+        if (Build.VERSION.SDK_INT >= 16)
+        {
+            for (int i = deviceIds.length - 1; i > -1; i--) {
+                SDLHaptic haptic = getHaptic(deviceIds[i]);
+                if (haptic == null) {
+                    InputDevice device = InputDevice.getDevice(deviceIds[i]);
+                    Vibrator vib = device.getVibrator();
+                    if (vib.hasVibrator()) {
+                        haptic = new SDLHaptic();
+                        haptic.device_id = deviceIds[i];
+                        haptic.name = device.getName();
+                        haptic.vib = vib;
+                        mHaptics.add(haptic);
+                        SDLControllerManager.nativeAddHaptic(haptic.device_id, haptic.name);
+                    }
+                }
+            }
+        }
+
+        /* Check VIBRATOR_SERVICE */
+        Vibrator vib = (Vibrator) SDL.getContext().getSystemService(Context.VIBRATOR_SERVICE);
+        if (vib != null) {
+            if (Build.VERSION.SDK_INT >= 11) {
+                hasVibratorService = vib.hasVibrator();
+            } else {
+                hasVibratorService = true;
+            }
+
+            if (hasVibratorService) {
+                SDLHaptic haptic = getHaptic(deviceId_VIBRATOR_SERVICE);
+                if (haptic == null) {
+                    haptic = new SDLHaptic();
+                    haptic.device_id = deviceId_VIBRATOR_SERVICE;
+                    haptic.name = "VIBRATOR_SERVICE";
+                    haptic.vib = vib; 
+                    mHaptics.add(haptic);
+                    SDLControllerManager.nativeAddHaptic(haptic.device_id, haptic.name);
+                }
+            }
+        }
+
+        /* Check removed devices */
+        ArrayList<Integer> removedDevices = new ArrayList<Integer>();
+        for(int i=0; i < mHaptics.size(); i++) {
+            int device_id = mHaptics.get(i).device_id;
+            int j;
+            for (j=0; j < deviceIds.length; j++) {
+                if (device_id == deviceIds[j]) break;
+            }
+
+            if (device_id == deviceId_VIBRATOR_SERVICE && hasVibratorService) {
+                // don't remove the vibrator if it is still present
+            } else if (j == deviceIds.length) {
+                removedDevices.add(device_id);
+            }
+        }
+
+        for(int i=0; i < removedDevices.size(); i++) {
+            int device_id = removedDevices.get(i);
+            SDLControllerManager.nativeRemoveHaptic(device_id);
+            for (int j=0; j < mHaptics.size(); j++) {
+                if (mHaptics.get(j).device_id == device_id) {
+                    mHaptics.remove(j);
+                    break;
+                }
+            }
+        }
+    }
+
+    protected SDLHaptic getHaptic(int device_id) {
+        for(int i=0; i < mHaptics.size(); i++) {
+            if (mHaptics.get(i).device_id == device_id) {
+                return mHaptics.get(i);
+            }
+        }
+        return null;
+    }   
+}
+
+class SDLGenericMotionListener_API12 implements View.OnGenericMotionListener {
+    // Generic Motion (mouse hover, joystick...) events go here
+    @Override
+    public boolean onGenericMotion(View v, MotionEvent event) {
+        float x, y;
+        int action;
+
+        switch ( event.getSource() ) {
+            case InputDevice.SOURCE_JOYSTICK:
+            case InputDevice.SOURCE_GAMEPAD:
+            case InputDevice.SOURCE_DPAD:
+                return SDLControllerManager.handleJoystickMotionEvent(event);
+
+            case InputDevice.SOURCE_MOUSE:
+                if (!SDLActivity.mSeparateMouseAndTouch) {
+                    break;
+                }
+                action = event.getActionMasked();
+                switch (action) {
+                    case MotionEvent.ACTION_SCROLL:
+                        x = event.getAxisValue(MotionEvent.AXIS_HSCROLL, 0);
+                        y = event.getAxisValue(MotionEvent.AXIS_VSCROLL, 0);
+                        SDLActivity.onNativeMouse(0, action, x, y);
+                        return true;
+
+                    case MotionEvent.ACTION_HOVER_MOVE:
+                        x = event.getX(0);
+                        y = event.getY(0);
+
+                        SDLActivity.onNativeMouse(0, action, x, y);
+                        return true;
+
+                    default:
+                        break;
+                }
+                break;
+
+            default:
+                break;
+        }
+
+        // Event was not managed
+        return false;
+    }
+}
+
--- a/backend.c	Sat Jan 05 00:58:08 2019 -0800
+++ b/backend.c	Sat Jan 15 13:15:21 2022 -0800
@@ -56,13 +56,23 @@
 	if (size_sum) {
 		*size_sum = 0;
 	}
+	uint32_t minsize;
+	if (flags == MMAP_CODE) {
+		minsize = 1 << (opts->ram_flags_shift + 3);
+	} else {
+		minsize = 0;
+	}
 	address &= opts->address_mask;
 	for (memmap_chunk const *cur = opts->memmap, *end = opts->memmap + opts->memmap_chunks; cur != end; cur++)
 	{
 		if (address >= cur->start && address < cur->end) {
 			return cur;
 		} else if (size_sum && (cur->flags & flags) == flags) {
-			*size_sum += chunk_size(opts, cur);
+			uint32_t size = chunk_size(opts, cur);
+			if (size < minsize) {
+				size = minsize;
+			}
+			*size_sum += size;
 		}
 	}
 	return NULL;
@@ -83,7 +93,32 @@
 				: memmap[chunk].buffer;
 			if (!base) {
 				if (memmap[chunk].flags & MMAP_AUX_BUFF) {
-					return memmap[chunk].buffer + (address & memmap[chunk].aux_mask);
+					return ((uint8_t *)memmap[chunk].buffer) + (address & memmap[chunk].aux_mask);
+				}
+				return NULL;
+			}
+			return base + (address & memmap[chunk].mask);
+		}
+	}
+	return NULL;
+}
+
+void * get_native_write_pointer(uint32_t address, void ** mem_pointers, cpu_options * opts)
+{
+	memmap_chunk const * memmap = opts->memmap;
+	address &= opts->address_mask;
+	for (uint32_t chunk = 0; chunk < opts->memmap_chunks; chunk++)
+	{
+		if (address >= memmap[chunk].start && address < memmap[chunk].end) {
+			if (!(memmap[chunk].flags & (MMAP_WRITE))) {
+				return NULL;
+			}
+			uint8_t * base = memmap[chunk].flags & MMAP_PTR_IDX
+				? mem_pointers[memmap[chunk].ptr_index]
+				: memmap[chunk].buffer;
+			if (!base) {
+				if (memmap[chunk].flags & MMAP_AUX_BUFF) {
+					return ((uint8_t *)memmap[chunk].buffer) + (address & memmap[chunk].aux_mask);
 				}
 				return NULL;
 			}
@@ -99,7 +134,7 @@
 	if (!chunk) {
 		return 0xFFFF;
 	}
-	uint32_t offset = (address - chunk->start) & chunk->mask;
+	uint32_t offset = address & chunk->mask;
 	if (chunk->flags & MMAP_READ) {
 		uint8_t *base;
 		if (chunk->flags & MMAP_PTR_IDX) {
@@ -129,6 +164,109 @@
 	return 0xFFFF;
 }
 
+void write_word(uint32_t address, uint16_t value, void **mem_pointers, cpu_options *opts, void *context)
+{
+	memmap_chunk const *chunk = find_map_chunk(address, opts, 0, NULL);
+	if (!chunk) {
+		return;
+	}
+	uint32_t offset = address & chunk->mask;
+	if (chunk->flags & MMAP_WRITE) {
+		uint8_t *base;
+		if (chunk->flags & MMAP_PTR_IDX) {
+			base = mem_pointers[chunk->ptr_index];
+		} else {
+			base = chunk->buffer;
+		}
+		if (base) {
+			if ((chunk->flags & MMAP_ONLY_ODD) || (chunk->flags & MMAP_ONLY_EVEN)) {
+				offset /= 2;
+				if (chunk->flags & MMAP_ONLY_EVEN) {
+					value >>= 16;
+				}
+				base[offset] = value;
+			} else {
+				*(uint16_t *)(base + offset) = value;
+			}
+			return;
+		}
+	}
+	if ((!(chunk->flags & MMAP_WRITE) || (chunk->flags & MMAP_FUNC_NULL)) && chunk->write_16) {
+		chunk->write_16(offset, context, value);
+	}
+}
+
+uint8_t read_byte(uint32_t address, void **mem_pointers, cpu_options *opts, void *context)
+{
+	memmap_chunk const *chunk = find_map_chunk(address, opts, 0, NULL);
+	if (!chunk) {
+		return 0xFF;
+	}
+	uint32_t offset = address & chunk->mask;
+	if (chunk->flags & MMAP_READ) {
+		uint8_t *base;
+		if (chunk->flags & MMAP_PTR_IDX) {
+			base = mem_pointers[chunk->ptr_index];
+		} else {
+			base = chunk->buffer;
+		}
+		if (base) {
+			if ((chunk->flags & MMAP_ONLY_ODD) || (chunk->flags & MMAP_ONLY_EVEN)) {
+				if (address & 1) {
+					if (chunk->flags & MMAP_ONLY_EVEN) {
+						return 0xFF;
+					}
+				} else if (chunk->flags & MMAP_ONLY_ODD) {
+					return 0xFF;
+				}
+				offset /= 2;
+			} else if(opts->byte_swap) {
+				offset ^= 1;
+			}
+			return base[offset];
+		}
+	}
+	if ((!(chunk->flags & MMAP_READ) || (chunk->flags & MMAP_FUNC_NULL)) && chunk->read_8) {
+		return chunk->read_8(offset, context);
+	}
+	return 0xFF;
+}
+
+void write_byte(uint32_t address, uint8_t value, void **mem_pointers, cpu_options *opts, void *context)
+{
+	memmap_chunk const *chunk = find_map_chunk(address, opts, 0, NULL);
+	if (!chunk) {
+		return;
+	}
+	uint32_t offset = address & chunk->mask;
+	if (chunk->flags & MMAP_WRITE) {
+		uint8_t *base;
+		if (chunk->flags & MMAP_PTR_IDX) {
+			base = mem_pointers[chunk->ptr_index];
+		} else {
+			base = chunk->buffer;
+		}
+		if (base) {
+			if ((chunk->flags & MMAP_ONLY_ODD) || (chunk->flags & MMAP_ONLY_EVEN)) {
+				if (address & 1) {
+					if (chunk->flags & MMAP_ONLY_EVEN) {
+						return;
+					}
+				} else if (chunk->flags & MMAP_ONLY_ODD) {
+					return;
+				}
+				offset /= 2;
+			} else if(opts->byte_swap) {
+				offset ^= 1;
+			}
+			base[offset] = value;
+		}
+	}
+	if ((!(chunk->flags & MMAP_WRITE) || (chunk->flags & MMAP_FUNC_NULL)) && chunk->write_8) {
+		chunk->write_8(offset, context, value);
+	}
+}
+
 uint32_t chunk_size(cpu_options *opts, memmap_chunk const *chunk)
 {
 	if (chunk->mask == opts->address_mask) {
@@ -141,13 +279,15 @@
 uint32_t ram_size(cpu_options *opts)
 {
 	uint32_t size = 0;
+	uint32_t minsize = 1 << (opts->ram_flags_shift + 3);
 	for (int i = 0; i < opts->memmap_chunks; i++)
 	{
 		if (opts->memmap[i].flags & MMAP_CODE) {
-			if (opts->memmap[i].mask == opts->address_mask) {
-				size += opts->memmap[i].end - opts->memmap[i].start;
+			uint32_t cursize = chunk_size(opts, opts->memmap + i);
+			if (cursize < minsize) {
+				size += minsize;
 			} else {
-				size += opts->memmap[i].mask + 1;
+				size += cursize;
 			}
 		}
 	}
--- a/backend.h	Sat Jan 05 00:58:08 2019 -0800
+++ b/backend.h	Sat Jan 15 13:15:21 2022 -0800
@@ -95,7 +95,11 @@
 
 code_ptr gen_mem_fun(cpu_options * opts, memmap_chunk const * memmap, uint32_t num_chunks, ftype fun_type, code_ptr *after_inc);
 void * get_native_pointer(uint32_t address, void ** mem_pointers, cpu_options * opts);
+void * get_native_write_pointer(uint32_t address, void ** mem_pointers, cpu_options * opts);
 uint16_t read_word(uint32_t address, void **mem_pointers, cpu_options *opts, void *context);
+void write_word(uint32_t address, uint16_t value, void **mem_pointers, cpu_options *opts, void *context);
+uint8_t read_byte(uint32_t address, void **mem_pointers, cpu_options *opts, void *context);
+void write_byte(uint32_t address, uint8_t value, void **mem_pointers, cpu_options *opts, void *context);
 memmap_chunk const *find_map_chunk(uint32_t address, cpu_options *opts, uint16_t flags, uint32_t *size_sum);
 uint32_t chunk_size(cpu_options *opts, memmap_chunk const *chunk);
 uint32_t ram_size(cpu_options *opts);
--- a/backend_x86.c	Sat Jan 05 00:58:08 2019 -0800
+++ b/backend_x86.c	Sat Jan 15 13:15:21 2022 -0800
@@ -299,10 +299,16 @@
 			retn(code);
 		}
 		if (memmap[chunk].flags & MMAP_CODE) {
+			uint32_t added_offset;
 			if (memmap[chunk].mask == opts->address_mask) {
-				ram_flags_off += (memmap[chunk].end - memmap[chunk].start) / (1 << opts->ram_flags_shift) / 8; ;
+				added_offset = (memmap[chunk].end - memmap[chunk].start) / (1 << opts->ram_flags_shift) / 8;
 			} else {
-				ram_flags_off += (memmap[chunk].mask + 1) /  (1 << opts->ram_flags_shift) / 8;;
+				added_offset = (memmap[chunk].mask + 1) /  (1 << opts->ram_flags_shift) / 8;
+			}
+			if (added_offset) {
+				ram_flags_off += added_offset;
+			} else {
+				ram_flags_off += 1;
 			}
 		}
 		if (lb_jcc) {
--- a/bindings.c	Sat Jan 05 00:58:08 2019 -0800
+++ b/bindings.c	Sat Jan 15 13:15:21 2022 -0800
@@ -1,4 +1,5 @@
 #include <string.h>
+#include <stdlib.h>
 #include "render.h"
 #include "system.h"
 #include "io.h"
@@ -35,6 +36,7 @@
 	UI_RELOAD,
 	UI_SMS_PAUSE,
 	UI_SCREENSHOT,
+	UI_VGM_LOG,
 	UI_EXIT,
 	UI_PLANE_DEBUG,
 	UI_VRAM_DEBUG,
@@ -257,17 +259,51 @@
 #define localtime_r(a,b) localtime(a)
 #endif
 
+char *get_content_config_path(char *config_path, char *config_template, char *default_name)
+{
+	char *base = tern_find_path(config, config_path, TVAL_PTR).ptrval;
+	if (!base) {
+		base = "$HOME";
+	}
+	const system_media *media = current_media();
+	tern_node *vars = tern_insert_ptr(NULL, "HOME", get_home_dir());
+	vars = tern_insert_ptr(vars, "EXEDIR", get_exe_dir());
+	vars = tern_insert_ptr(vars, "USERDATA", (char *)get_userdata_dir());
+	vars = tern_insert_ptr(vars, "ROMNAME", media->name);
+	vars = tern_insert_ptr(vars, "ROMDIR", media->dir);
+	base = replace_vars(base, vars, 1);
+	tern_free(vars);
+	ensure_dir_exists(base);
+	time_t now = time(NULL);
+	struct tm local_store;
+	char fname_part[256];
+	char *template = tern_find_path(config, config_template, TVAL_PTR).ptrval;
+	if (template) {
+		vars = tern_insert_ptr(NULL, "ROMNAME", media->name);
+		template = replace_vars(template, vars, 0);
+	} else {
+		template = strdup(default_name);
+	}
+	strftime(fname_part, sizeof(fname_part), template, localtime_r(&now, &local_store));
+	char const *parts[] = {base, PATH_SEP, fname_part};
+	char *path = alloc_concat_m(3, parts);
+	free(base);
+	free(template);
+	return path;
+}
+
 void handle_binding_up(keybinding * binding)
 {
+	uint8_t allow_content_binds = content_binds_enabled && current_system;
 	switch(binding->bind_type)
 	{
 	case BIND_GAMEPAD:
-		if (content_binds_enabled && current_system->gamepad_up) {
+		if (allow_content_binds && current_system->gamepad_up) {
 			current_system->gamepad_up(current_system, binding->subtype_a, binding->subtype_b);
 		}
 		break;
 	case BIND_MOUSE:
-		if (content_binds_enabled && current_system->mouse_up) {
+		if (allow_content_binds && current_system->mouse_up) {
 			current_system->mouse_up(current_system, binding->subtype_a, binding->subtype_b);
 		}
 		break;
@@ -275,22 +311,22 @@
 		switch (binding->subtype_a)
 		{
 		case UI_DEBUG_MODE_INC:
-			if (content_binds_enabled) {
+			if (allow_content_binds) {
 				current_system->inc_debug_mode(current_system);
 			}
 			break;
 		case UI_ENTER_DEBUGGER:
-			if (content_binds_enabled) {
+			if (allow_content_binds) {
 				current_system->enter_debugger = 1;
 			}
 			break;
 		case UI_SAVE_STATE:
-			if (content_binds_enabled) {
+			if (allow_content_binds) {
 				current_system->save_state = QUICK_SAVE_SLOT+1;
 			}
 			break;
 		case UI_NEXT_SPEED:
-			if (content_binds_enabled) {
+			if (allow_content_binds) {
 				current_speed++;
 				if (current_speed >= num_speeds) {
 					current_speed = 0;
@@ -300,7 +336,7 @@
 			}
 			break;
 		case UI_PREV_SPEED:
-			if (content_binds_enabled) {
+			if (allow_content_binds) {
 				current_speed--;
 				if (current_speed < 0) {
 					current_speed = num_speeds - 1;
@@ -310,7 +346,7 @@
 			}
 			break;
 		case UI_SET_SPEED:
-			if (content_binds_enabled) {
+			if (allow_content_binds) {
 				if (binding->subtype_b < num_speeds) {
 					current_speed = binding->subtype_b;
 					printf("Setting speed to %d: %d\n", current_speed, speeds[current_speed]);
@@ -328,7 +364,7 @@
 			}
 			break;
 		case UI_TOGGLE_KEYBOARD_CAPTURE:
-			if (content_binds_enabled && current_system->has_keyboard) {
+			if (allow_content_binds && current_system->has_keyboard) {
 				keyboard_captured = !keyboard_captured;
 			}
 			break;
@@ -336,52 +372,44 @@
 			render_toggle_fullscreen();
 			break;
 		case UI_SOFT_RESET:
-			if (content_binds_enabled) {
+			if (allow_content_binds) {
 				current_system->soft_reset(current_system);
 			}
 			break;
 		case UI_RELOAD:
-			if (content_binds_enabled) {
+			if (allow_content_binds) {
 				reload_media();
 			}
 			break;
 		case UI_SMS_PAUSE:
-			if (content_binds_enabled && current_system->gamepad_down) {
+			if (allow_content_binds && current_system->gamepad_down) {
 				current_system->gamepad_down(current_system, GAMEPAD_MAIN_UNIT, MAIN_UNIT_PAUSE);
 			}
 			break;
-		case UI_SCREENSHOT: {
-			if (content_binds_enabled) {
-				char *screenshot_base = tern_find_path(config, "ui\0screenshot_path\0", TVAL_PTR).ptrval;
-				if (!screenshot_base) {
-					screenshot_base = "$HOME";
-				}
-				tern_node *vars = tern_insert_ptr(NULL, "HOME", get_home_dir());
-				vars = tern_insert_ptr(vars, "EXEDIR", get_exe_dir());
-				screenshot_base = replace_vars(screenshot_base, vars, 1);
-				tern_free(vars);
-				time_t now = time(NULL);
-				struct tm local_store;
-				char fname_part[256];
-				char *template = tern_find_path(config, "ui\0screenshot_template\0", TVAL_PTR).ptrval;
-				if (!template) {
-					template = "blastem_%c.ppm";
-				}
-				strftime(fname_part, sizeof(fname_part), template, localtime_r(&now, &local_store));
-				char const *parts[] = {screenshot_base, PATH_SEP, fname_part};
-				char *path = alloc_concat_m(3, parts);
-				free(screenshot_base);
+		case UI_SCREENSHOT:
+			if (allow_content_binds) {
+				char *path = get_content_config_path("ui\0screenshot_path\0", "ui\0screenshot_template\0", "blastem_%c.ppm");
 				render_save_screenshot(path);
 			}
 			break;
-		}
+		case UI_VGM_LOG:
+			if (allow_content_binds && current_system->start_vgm_log) {
+				if (current_system->vgm_logging) {
+					current_system->stop_vgm_log(current_system);
+				} else {
+					char *path = get_content_config_path("ui\0vgm_path\0", "ui\0vgm_template\0", "blastem_%c.vgm");
+					current_system->start_vgm_log(current_system, path);
+					free(path);
+				}
+			}
+			break;
 		case UI_EXIT:
 #ifndef DISABLE_NUKLEAR
 			if (is_nuklear_active()) {
 				show_pause_menu();
 			} else {
 #endif
-			current_system->request_exit(current_system);
+			system_request_exit(current_system, 1);
 			if (current_system->type == SYSTEM_GENESIS) {
 				genesis_context *gen = (genesis_context *)current_system;
 				if (gen->extra) {
@@ -398,7 +426,7 @@
 		case UI_VRAM_DEBUG: 
 		case UI_CRAM_DEBUG:
 		case UI_COMPOSITE_DEBUG:
-			if (content_binds_enabled) {
+			if (allow_content_binds) {
 				vdp_context *vdp = NULL;
 				if (current_system->type == SYSTEM_GENESIS) {
 					genesis_context *gen = (genesis_context *)current_system;
@@ -556,7 +584,7 @@
 {
 	const int gpadslen = strlen("gamepads.");
 	const int mouselen = strlen("mouse.");
-	if (!strncmp(target, "gamepads.", gpadslen)) {
+	if (startswith(target, "gamepads.")) {
 		int padnum = target[gpadslen] == 'n' ? device_num + 1 : target[gpadslen] - '0';
 		if (padnum >= 1 && padnum <= 8) {
 			int button = tern_find_int(padbuttons, target + gpadslen + 1, 0);
@@ -574,7 +602,7 @@
 		} else {
 			warning("Gamepad mapping string '%s' refers to an invalid gamepad number %c\n", target, target[gpadslen]);
 		}
-	} else if(!strncmp(target, "mouse.", mouselen)) {
+	} else if(startswith(target, "mouse.")) {
 		int mousenum = target[mouselen] == 'n' ? device_num + 1 : target[mouselen] - '0';
 		if (mousenum >= 1 && mousenum <= 8) {
 			int button = tern_find_int(mousebuttons, target + mouselen + 1, 0);
@@ -592,7 +620,7 @@
 		} else {
 			warning("Gamepad mapping string '%s' refers to an invalid mouse number %c\n", target, target[mouselen]);
 		}
-	} else if(!strncmp(target, "ui.", strlen("ui."))) {
+	} else if(startswith(target, "ui.")) {
 		if (!strcmp(target + 3, "vdp_debug_mode")) {
 			*subtype_a = UI_DEBUG_MODE_INC;
 		} else if(!strcmp(target + 3, "vdp_debug_pal")) {
@@ -602,7 +630,7 @@
 			*subtype_a = UI_ENTER_DEBUGGER;
 		} else if(!strcmp(target + 3, "save_state")) {
 			*subtype_a = UI_SAVE_STATE;
-		} else if(!strncmp(target + 3, "set_speed.", strlen("set_speed."))) {
+		} else if(startswith(target + 3, "set_speed.")) {
 			*subtype_a = UI_SET_SPEED;
 			*subtype_b = atoi(target + 3 + strlen("set_speed."));
 		} else if(!strcmp(target + 3, "next_speed")) {
@@ -623,6 +651,8 @@
 			*subtype_a = UI_SMS_PAUSE;
 		} else if (!strcmp(target + 3, "screenshot")) {
 			*subtype_a = UI_SCREENSHOT;
+		} else if (!strcmp(target + 3, "vgm_log")) {
+			*subtype_a = UI_VGM_LOG;
 		} else if(!strcmp(target + 3, "exit")) {
 			*subtype_a = UI_EXIT;
 		} else if (!strcmp(target + 3, "plane_debug")) {
@@ -814,7 +844,7 @@
 			if (hostbutton == RENDER_INVALID_NAME) {
 				warning("%s is not a valid gamepad input name\n", key);
 			} else if (hostbutton == RENDER_NOT_MAPPED && hostpadnum != map_warning_pad) {
-				warning("No SDL 2 mapping exists for input %s on gamepad %d\n", key, hostpadnum);
+				debug_message("No SDL 2 mapping exists for input %s on gamepad %d\n", key, hostpadnum);
 				map_warning_pad = hostpadnum;
 			}
 			return;
@@ -823,7 +853,7 @@
 			bind_dpad(hostpadnum, render_dpad_part(hostbutton), render_direction_part(hostbutton), bindtype, subtype_a, subtype_b);
 			return;
 		} else if (hostbutton & RENDER_AXIS_BIT) {
-			bind_axis(hostpadnum, render_axis_part(hostbutton), 1, bindtype, subtype_a, subtype_b);
+			bind_axis(hostpadnum, render_axis_part(hostbutton), hostbutton & RENDER_AXIS_POS, bindtype, subtype_a, subtype_b);
 			return;
 		}
 	}
@@ -861,7 +891,7 @@
 			if (axis == RENDER_INVALID_NAME) {
 				warning("%s is not a valid gamepad input name\n", key);
 			} else if (axis == RENDER_NOT_MAPPED && hostpadnum != map_warning_pad) {
-				warning("No SDL 2 mapping exists for input %s on gamepad %d\n", key, hostpadnum);
+				debug_message("No SDL 2 mapping exists for input %s on gamepad %d\n", key, hostpadnum);
 				map_warning_pad = hostpadnum;
 			}
 			goto done;
@@ -957,14 +987,26 @@
 			char numstr[2] = {dpad + '0', 0};
 			tern_node * pad_dpad = tern_find_node(dpad_node, numstr);
 			char * dirs[] = {"up", "down", "left", "right"};
-			//TODO: Support controllers that have d-pads implemented as analog axes or buttons
+			char *render_dirs[] = {"dpup", "dpdown", "dpleft", "dpright"};
 			int dirnums[] = {RENDER_DPAD_UP, RENDER_DPAD_DOWN, RENDER_DPAD_LEFT, RENDER_DPAD_RIGHT};
 			for (int dir = 0; dir < sizeof(dirs)/sizeof(dirs[0]); dir++) {
 				char * target = tern_find_ptr(pad_dpad, dirs[dir]);
 				if (target) {
 					uint8_t subtype_a = 0, subtype_b = 0;
 					int bindtype = parse_binding_target(joystick, target, get_pad_buttons(), get_mouse_buttons(), &subtype_a, &subtype_b);
-					bind_dpad(joystick, dpad, dirnums[dir], bindtype, subtype_a, subtype_b);
+					int32_t hostbutton = dpad >0 ? -1 : render_translate_input_name(joystick, render_dirs[dir], 0);
+					if (hostbutton < 0) {
+						//assume this is a raw dpad mapping
+						bind_dpad(joystick, dpad, dirnums[dir], bindtype, subtype_a, subtype_b);
+					} else if (hostbutton & RENDER_DPAD_BIT) {
+						bind_dpad(joystick, render_dpad_part(hostbutton), render_direction_part(hostbutton), bindtype, subtype_a, subtype_b);
+					} else if (hostbutton & RENDER_AXIS_BIT) {
+						//SDL2 knows internally whether this should be a positive or negative binding, but doesn't expose that externally
+						//for now I'll just assume that any controller with axes for a d-pad has these mapped the "sane" way
+						bind_axis(joystick, render_axis_part(hostbutton), dir == 1 || dir == 3 ? 1 : 0, bindtype, subtype_a, subtype_b);
+					} else {
+						bind_button(joystick, hostbutton, bindtype, subtype_a, subtype_b);
+					}
 				}
 			}
 		}
--- a/blastcpm.c	Sat Jan 05 00:58:08 2019 -0800
+++ b/blastcpm.c	Sat Jan 15 13:15:21 2022 -0800
@@ -2,9 +2,14 @@
 #include <stdlib.h>
 #include <stddef.h>
 #include <string.h>
+#include <time.h>
 #include <sys/select.h>
 
+#ifdef NEW_CORE
+#include "z80.h"
+#else
 #include "z80_to_x86.h"
+#endif
 #include "util.h"
 
 uint8_t ram[64 * 1024];
@@ -14,10 +19,12 @@
 #define OS_RESET 0xE403
 int headless = 1;
 
+#ifndef NEW_CORE
 void z80_next_int_pulse(z80_context * context)
 {
 	context->int_pulse_start = context->int_pulse_end = CYCLE_NEVER;
 }
+#endif
 
 void render_errorbox(char *title, char *message)
 {
@@ -55,8 +62,18 @@
 	return select(fileno(stdin)+1, &read_fds, NULL, NULL, &timeout) > 0; 
 }
 
+time_t start;
+uint64_t total_cycles;
 void *exit_write(uint32_t address, void *context, uint8_t value)
 {
+	time_t duration = time(NULL) - start;
+	z80_context *z80 = context;
+#ifdef NEW_CORE
+	total_cycles += z80->cycles;
+#else
+	total_cycles += context->current_cycle;
+#endif
+	printf("Effective clock speed: %f MHz\n", ((double)total_cycles) / (1000000.0 * duration));
 	exit(0);
 	return context;
 }
@@ -65,7 +82,7 @@
 	{ 0x0000, 0x10000,  0xFFFF, 0, 0, MMAP_READ | MMAP_WRITE | MMAP_CODE, ram, NULL, NULL, NULL, NULL},
 };
 
-const memmap_chunk io_map[] = {
+memmap_chunk io_map[] = {
 	{ 0x0, 0x1, 0xFFFF, 0, 0, 0, NULL, NULL, NULL, console_read, console_write},
 	{ 0x1, 0x2, 0xFFFF, 0, 0, 0, NULL, NULL, NULL, console_status_read, console_flush_write},
 	{ 0x2, 0x3, 0xFFFF, 0, 0, 0, NULL, NULL, NULL, NULL, exit_write},
@@ -103,10 +120,19 @@
 	z80_context *context;
 	init_z80_opts(&opts, z80_map, 1, io_map, 3, 1, 0xFF);
 	context = init_z80_context(&opts);
+	start = time(NULL);
 	for(;;)
 	{
+#ifdef NEW_CORE
+		z80_execute(context, 1000000);
+		total_cycles += context->cycles;
+		context->cycles = 0;
+#else
 		z80_run(context, 1000000);
+		total_cycles += context->current_cycle;
 		context->current_cycle = 0;
+#endif
+		
 	}
 	return 0;
 }
\ No newline at end of file
--- a/blastem.c	Sat Jan 05 00:58:08 2019 -0800
+++ b/blastem.c	Sat Jan 15 13:15:21 2022 -0800
@@ -11,7 +11,11 @@
 #include "system.h"
 #include "68kinst.h"
 #include "m68k_core.h"
+#ifdef NEW_CORE
+#include "z80.h"
+#else
 #include "z80_to_x86.h"
+#endif
 #include "mem.h"
 #include "vdp.h"
 #include "render.h"
@@ -26,11 +30,12 @@
 #include "bindings.h"
 #include "menu.h"
 #include "zip.h"
+#include "event_log.h"
 #ifndef DISABLE_NUKLEAR
 #include "nuklear_ui/blastem_nuklear.h"
 #endif
 
-#define BLASTEM_VERSION "0.6.1"
+#define BLASTEM_VERSION "0.6.3-pre"
 
 #ifdef __ANDROID__
 #define FULLSCREEN_DEFAULT 1
@@ -69,6 +74,14 @@
 #define romclose gzclose
 #endif
 
+uint16_t *process_smd_block(uint16_t *dst, uint8_t *src, size_t bytes)
+{
+	for (uint8_t *low = src, *high = (src+bytes/2), *end = src+bytes; high < end; high++, low++) {
+		*(dst++) = *low << 8 | *high;
+	}
+	return dst;
+}
+
 int load_smd_rom(ROMFILE f, void **buffer)
 {
 	uint8_t block[SMD_BLOCK_SIZE];
@@ -76,39 +89,58 @@
 
 	size_t filesize = 512 * 1024;
 	size_t readsize = 0;
-	uint16_t *dst = malloc(filesize);
-	
+	uint16_t *dst, *buf;
+	dst = buf = malloc(filesize);
+
 
 	size_t read;
 	do {
 		if ((readsize + SMD_BLOCK_SIZE > filesize)) {
 			filesize *= 2;
-			dst = realloc(dst, filesize);
+			buf = realloc(buf, filesize);
+			dst = buf + readsize/sizeof(uint16_t);
 		}
 		read = romread(block, 1, SMD_BLOCK_SIZE, f);
 		if (read > 0) {
-			for (uint8_t *low = block, *high = (block+read/2), *end = block+read; high < end; high++, low++) {
-				*(dst++) = *low << 8 | *high;
-			}
+			dst = process_smd_block(dst, block, read);
 			readsize += read;
 		}
 	} while(read > 0);
 	romclose(f);
-	
-	*buffer = dst;
-	
+
+	*buffer = buf;
+
 	return readsize;
 }
 
+uint8_t is_smd_format(const char *filename, uint8_t *header)
+{
+	if (header[1] == SMD_MAGIC1 && header[8] == SMD_MAGIC2 && header[9] == SMD_MAGIC3) {
+		int i;
+		for (i = 3; i < 8; i++) {
+			if (header[i] != 0) {
+				return 0;
+			}
+		}
+		if (i == 8) {
+			if (header[2]) {
+				fatal_error("%s is a split SMD ROM which is not currently supported", filename);
+			}
+			return 1;
+		}
+	}
+	return 0;
+}
+
 uint32_t load_media_zip(const char *filename, system_media *dst)
 {
-	static const char *valid_exts[] = {"bin", "md", "gen", "sms", "rom"};
+	static const char *valid_exts[] = {"bin", "md", "gen", "sms", "rom", "smd"};
 	const uint32_t num_exts = sizeof(valid_exts)/sizeof(*valid_exts);
 	zip_file *z = zip_open(filename);
 	if (!z) {
 		return 0;
 	}
-	
+
 	for (uint32_t i = 0; i < z->num_entries; i++)
 	{
 		char *ext = path_extension(z->entries[i].name);
@@ -121,6 +153,17 @@
 				size_t out_size = nearest_pow2(z->entries[i].size);
 				dst->buffer = zip_read(z, i, &out_size);
 				if (dst->buffer) {
+					if (is_smd_format(z->entries[i].name, dst->buffer)) {
+						size_t offset;
+						for (offset = 0; offset + SMD_BLOCK_SIZE + SMD_HEADER_SIZE <= out_size; offset += SMD_BLOCK_SIZE)
+						{
+							uint8_t tmp[SMD_BLOCK_SIZE];
+							uint8_t *u8dst = dst->buffer;
+							memcpy(tmp, u8dst + offset + SMD_HEADER_SIZE, SMD_BLOCK_SIZE);
+							process_smd_block((void *)(u8dst + offset), tmp, SMD_BLOCK_SIZE);
+						}
+						out_size = offset;
+					}
 					dst->extension = ext;
 					dst->dir = path_dirname(filename);
 					dst->name = basename_no_extension(filename);
@@ -152,33 +195,22 @@
 	if (sizeof(header) != romread(header, 1, sizeof(header), f)) {
 		fatal_error("Error reading from %s\n", filename);
 	}
-	
+
 	uint32_t ret = 0;
-	if (header[1] == SMD_MAGIC1 && header[8] == SMD_MAGIC2 && header[9] == SMD_MAGIC3) {
-		int i;
-		for (i = 3; i < 8; i++) {
-			if (header[i] != 0) {
-				break;
-			}
-		}
-		if (i == 8) {
-			if (header[2]) {
-				fatal_error("%s is a split SMD ROM which is not currently supported", filename);
-			}
+	if (is_smd_format(filename, header)) {
 			if (stype) {
 				*stype = SYSTEM_GENESIS;
 			}
 			ret = load_smd_rom(f, &dst->buffer);
 		}
-	}
-	
+
 	if (!ret) {
 		size_t filesize = 512 * 1024;
 		size_t readsize = sizeof(header);
-		
+
 		char *buf = malloc(filesize);
 		memcpy(buf, header, readsize);
-	
+
 		size_t read;
 		do {
 			read = romread(buf + readsize, 1, filesize - readsize, f);
@@ -203,7 +235,7 @@
 	dst->name = basename_no_extension(filename);
 	dst->extension = path_extension(filename);
 	dst->size = ret;
-	
+
 	romclose(f);
 	return ret;
 }
@@ -247,6 +279,7 @@
 		savedir_template = "$USERDATA/blastem/$ROMNAME";
 	}
 	tern_node *vars = tern_insert_ptr(NULL, "ROMNAME", media->name);
+	vars = tern_insert_ptr(vars, "ROMDIR", media->dir);
 	vars = tern_insert_ptr(vars, "HOME", get_home_dir());
 	vars = tern_insert_ptr(vars, "EXEDIR", get_exe_dir());
 	vars = tern_insert_ptr(vars, "USERDATA", (char *)get_userdata_dir());
@@ -258,12 +291,23 @@
 	return save_dir;
 }
 
+const char *get_save_fname(uint8_t save_type)
+{
+	switch(save_type)
+	{
+	case SAVE_I2C: return "save.eeprom";
+	case SAVE_NOR: return "save.nor";
+	case SAVE_HBPT: return "save.hbpt";
+	default: return "save.sram";
+	}
+}
+
 void setup_saves(system_media *media, system_header *context)
 {
 	static uint8_t persist_save_registered;
 	rom_info *info = &context->info;
 	char *save_dir = get_save_dir(info->is_save_lock_on ? media->chain : media);
-	char const *parts[] = {save_dir, PATH_SEP, info->save_type == SAVE_I2C ? "save.eeprom" : info->save_type == SAVE_NOR ? "save.nor" : "save.sram"};
+	char const *parts[] = {save_dir, PATH_SEP, get_save_fname(info->save_type)};
 	free(save_filename);
 	save_filename = alloc_concat_m(3, parts);
 	if (info->is_save_lock_on) {
@@ -304,7 +348,7 @@
 			free(current_system->next_rom);
 		}
 		current_system->next_rom = strdup(filename);
-		current_system->request_exit(current_system);
+		system_request_exit(current_system, 1);
 		if (menu_system && menu_system->type == SYSTEM_GENESIS) {
 			genesis_context *gen = (genesis_context *)menu_system;
 			if (gen->extra) {
@@ -323,6 +367,11 @@
 }
 
 static system_media cart, lock_on;
+const system_media *current_media(void)
+{
+	return &cart;
+}
+
 void reload_media(void)
 {
 	if (!current_system) {
@@ -340,7 +389,7 @@
 		num_parts--;
 	}
 	current_system->next_rom = alloc_concat_m(num_parts, start);
-	current_system->request_exit(current_system);
+	system_request_exit(current_system, 1);
 }
 
 void lockon_media(char *lock_on_path)
@@ -376,7 +425,7 @@
 	if (!(cart.size = load_media(path, &cart, &stype))) {
 		fatal_error("Failed to open %s for reading\n", path);
 	}
-	
+
 	if (force_stype != SYSTEM_UNKNOWN) {
 		stype = force_stype;
 	}
@@ -399,6 +448,23 @@
 	update_title(game_system->info.name);
 }
 
+char *parse_addr_port(char *arg)
+{
+	while (*arg && *arg != ':') {
+		++arg;
+	}
+	if (!*arg) {
+		return NULL;
+	}
+	char *end;
+	int port = strtol(arg + 1, &end, 10);
+	if (port && !*end) {
+		*arg = 0;
+		return arg + 1;
+	}
+	return NULL;
+}
+
 int main(int argc, char ** argv)
 {
 	set_exe_str(argv[0]);
@@ -410,10 +476,13 @@
 	system_type stype = SYSTEM_UNKNOWN, force_stype = SYSTEM_UNKNOWN;
 	char * romfname = NULL;
 	char * statefile = NULL;
+	char *reader_addr = NULL, *reader_port = NULL;
+	event_reader reader = {0};
 	debugger_type dtype = DEBUGGER_NATIVE;
 	uint8_t start_in_debugger = 0;
 	uint8_t fullscreen = FULLSCREEN_DEFAULT, use_gl = 1;
 	uint8_t debug_target = 0;
+	char *port;
 	for (int i = 1; i < argc; i++) {
 		if (argv[i][0] == '-') {
 			switch(argv[i][1]) {
@@ -437,6 +506,18 @@
 				dtype = DEBUGGER_GDB;
 				start_in_debugger = 1;
 				break;
+			case 'e':
+				i++;
+				if (i >= argc) {
+					fatal_error("-e must be followed by a file name\n");
+				}
+				port = parse_addr_port(argv[i]);
+				if (port) {
+					event_log_tcp(argv[i], port);
+				} else {
+					event_log_file(argv[i]);
+				}
+				break;
 			case 'f':
 				fullscreen = !fullscreen;
 				break;
@@ -521,15 +602,21 @@
 					"	-v          Display version number and exit\n"
 					"	-l          Log 68K code addresses (useful for assemblers)\n"
 					"	-y          Log individual YM-2612 channels to WAVE files\n"
+					"   -e FILE     Write hardware event log to FILE\n"
 				);
 				return 0;
 			default:
 				fatal_error("Unrecognized switch %s\n", argv[i]);
 			}
 		} else if (!loaded) {
+			reader_port = parse_addr_port(argv[i]);
+			if (reader_port) {
+				reader_addr = argv[i];
+			} else {
 			if (!load_media(argv[i], &cart, stype == SYSTEM_UNKNOWN ? &stype : NULL)) {
 				fatal_error("Failed to open %s for reading\n", argv[i]);
 			}
+			}
 			romfname = argv[i];
 			loaded = 1;
 		} else if (width < 0) {
@@ -538,7 +625,7 @@
 			height = atoi(argv[i]);
 		}
 	}
-	
+
 	int def_width = 0, def_height = 0;
 	char *config_width = tern_find_path(config, "video\0width\0", TVAL_PTR).ptrval;
 	if (config_width) {
@@ -562,11 +649,14 @@
 		fullscreen = !fullscreen;
 	}
 	if (!headless) {
+		if (reader_addr) {
+			render_set_external_sync(1);
+		}
 		render_init(width, height, "BlastEm", fullscreen);
 		render_set_drag_drop_handler(on_drag_drop);
 	}
 	set_bindings();
-	
+
 	uint8_t menu = !loaded;
 	uint8_t use_nuklear = 0;
 #ifndef DISABLE_NUKLEAR
@@ -607,18 +697,19 @@
 		warning("%s is not a valid value for the ui.state_format setting. Valid values are gst and native\n", state_format);
 	}
 
-	if (loaded) {
+	if (loaded && !reader_addr) {
 		if (stype == SYSTEM_UNKNOWN) {
 			stype = detect_system_type(&cart);
 		}
 		if (stype == SYSTEM_UNKNOWN) {
 			fatal_error("Failed to detect system type for %s\n", romfname);
 		}
+
 		current_system = alloc_config_system(stype, &cart, menu ? 0 : opts, force_region);
 		if (!current_system) {
 			fatal_error("Failed to configure emulated machine for %s\n", romfname);
 		}
-	
+
 		setup_saves(&cart, current_system);
 		update_title(current_system->info.name);
 		if (menu) {
@@ -627,7 +718,7 @@
 			game_system = current_system;
 		}
 	}
-	
+
 #ifndef DISABLE_NUKLEAR
 	if (use_nuklear) {
 		blastem_nuklear_init(!menu);
@@ -635,10 +726,24 @@
 		menu = 0;
 	}
 #endif
-	
+
+	if (reader_addr) {
+		init_event_reader_tcp(&reader, reader_addr, reader_port);
+		stype = reader_system_type(&reader);
+		if (stype == SYSTEM_UNKNOWN) {
+			fatal_error("Failed to detect system type for %s\n", romfname);
+		}
+		game_system = current_system = alloc_config_player(stype, &reader);
+		//free inflate stream as it was inflateCopied to an internal event reader in the player
+		inflateEnd(&reader.input_stream);
+		setup_saves(&cart, current_system);
+		update_title(current_system->info.name);
+	}
+
 	current_system->debugger_type = dtype;
 	current_system->enter_debugger = start_in_debugger && menu == debug_target;
 	current_system->start_context(current_system,  menu ? NULL : statefile);
+	render_video_loop();
 	for(;;)
 	{
 		if (current_system->should_exit) {
@@ -654,6 +759,7 @@
 			current_system->debugger_type = dtype;
 			current_system->enter_debugger = start_in_debugger && menu == debug_target;
 			current_system->start_context(current_system, statefile);
+			render_video_loop();
 		} else if (menu && game_system) {
 			current_system->arena = set_current_arena(game_system->arena);
 			current_system = game_system;
@@ -671,6 +777,7 @@
 			}
 			if (!current_system->next_rom) {
 				current_system->resume_context(current_system);
+				render_video_loop();
 			}
 		} else {
 			break;
--- a/blastem.h	Sat Jan 05 00:58:08 2019 -0800
+++ b/blastem.h	Sat Jan 15 13:15:21 2022 -0800
@@ -19,5 +19,6 @@
 void lockon_media(char *lock_on_path);
 void init_system_with_media(const char *path, system_type force_stype);
 void apply_updated_config(void);
+const system_media *current_media(void);
 
 #endif //BLASTEM_H_
--- a/build_release	Sat Jan 05 00:58:08 2019 -0800
+++ b/build_release	Sat Jan 15 13:15:21 2022 -0800
@@ -25,14 +25,20 @@
 	cd ..
 fi
 echo "Path is: $PATH"
-make PORTABLE=1 clean all
-make menu.bin
-if [ $OS = "Windows" ]; then
-	binaries="dis.exe zdis.exe stateview.exe vgmplay.exe blastem.exe SDL2.dll"
+if [ $OS = "Win64" ]; then
+	make PORTABLE=1 OS=Windows CPU=x86_64 clean all
+	SDLDLLPATH=sdl/x86_64-w64-mingw32/bin
+else
+	make PORTABLE=1 clean all
+	SDLDLLPATH=sdl/i686-w64-mingw32/bin
+fi
+make menu.bin tmss.md
+if [ $OS = "Windows" -o $OS = "Win64" ]; then
+	binaries="dis.exe zdis.exe vgmplay.exe blastem.exe $SDLDLLPATH/SDL2.dll"
 	verstr=`sed -E -n 's/^[^B]+BLASTEM_VERSION "([^"]+)"/blastem \1/p' blastem.c`
 	txt=".txt"
 else
-	binaries="dis zdis stateview vgmplay blastem termhelper"
+	binaries="dis zdis vgmplay blastem termhelper"
 	if [ $OS = "Darwin" ]; then
 		binaries="$binaries Frameworks"
 	else
@@ -41,10 +47,12 @@
 	verstr=`./blastem -v`
 	txt=""
 fi
-binaries="$binaries menu.bin"
+binaries="$binaries menu.bin tmss.md"
 ver=`echo $verstr | awk '/blastem/ { gsub(/\r/, "", $2); print $2 }'`
 if [ $OS = "Windows" ]; then
 	suffix='-win32'
+elif [ $OS = "Win64" ]; then
+	suffix='-win64'
 elif [ $OS = "Darwin" ]; then
 	suffix='-osx'
 else
@@ -54,18 +62,20 @@
 echo $dir
 rm -rf "$dir"
 mkdir "$dir"
-cp -r $binaries shaders images default.cfg rom.db gamecontrollerdb.txt "$dir"
+cp -r $binaries shaders images default.cfg rom.db gamecontrollerdb.txt systems.cfg "$dir"
 for file in README COPYING CHANGELOG; do
 	cp "$file" "$dir"/"$file$txt"
 done
 if [ $OS = "Darwin" ]; then
 	cp SDL-LICENSE "$dir"
+elif [ $OS = "Windows" -o $OS = "Win64" ]; then
+	cp sdl/COPYING.txt "$dir"/SDL-LICENSE$txt
 else
-	cp sdl/COPYING.txt "$dir"/SDL-LICENSE$txt
+	cp sdl/LICENSE.txt "$dir"/SDL-LICENSE$txt
 fi
 cp glew/LICENSE.txt "$dir"/GLEW-LICENSE$txt
 
-if [ $OS = "Windows" ]; then
+if [ $OS = "Windows" -o $OS = "Win64" ]; then
 	rm -f "${dir}.zip"
 	zip -r "${dir}.zip" "$dir"
 	echo "${dir}.zip"
--- a/build_upload_nightly	Sat Jan 05 00:58:08 2019 -0800
+++ b/build_upload_nightly	Sat Jan 15 13:15:21 2022 -0800
@@ -14,6 +14,20 @@
 if [ $result -ne 0 ]; then
 	echo Build falied with return code $result stopping $CONTAINER_NAME
 	lxc-stop -n "$CONTAINER_NAME"
+	curdate=`date -Iseconds`
+	curl -d'@-' -H 'Content-Type: application/json' "$WEBHOOKURL" <<WEBHOOKEOF
+{
+	"embeds": [
+		{
+			"title": "Build $name failed!",
+			"type": "rich",
+			"description": "Build falied with return code $result stopping $CONTAINER_NAME",
+			"timestamp": "$curdate",
+			"color": 16711680
+		}
+	]
+}
+WEBHOOKEOF
 	exit $result
 fi
 echo "Build succeeded, stopping $CONTAINER_NAME"
@@ -23,3 +37,25 @@
 echo "Uploaing $artifact to $REMOTE_HOST"
 scp -i "$REMOTE_IDENT" "$HOME/.local/share/lxc/$CONTAINER_NAME/rootfs/home/$BUILD_USER/blastem/$artifact" $REMOTE_USER@$REMOTE_HOST:/home/$REMOTE_USER/nightlies
 echo "Done"
+curdate=`date -Iseconds`
+version=`echo "$artifact" | sed -E 's/[^-]+-([0-9]+\.[0-9]+\.[0-9]+[^.]*)\..*$/\1/'`
+curl -d'@-' -H 'Content-Type: application/json' "$WEBHOOKURL" <<WEBHOOKEOF
+{
+	"embeds": [
+		{
+			"title": "$artifact",
+			"type": "rich",
+			"url": "https://www.retrodev.com/blastem/nightlies/$artifact",
+			"description": "New build of $name succeeded!",
+			"timestamp": "$curdate",
+			"color": 65280,
+			"fields": [
+				{
+					"name": "Version",
+					"value": "$version"
+				}
+			]
+		}
+	]
+}
+WEBHOOKEOF
\ No newline at end of file
--- a/build_upload_win_nightly	Sat Jan 05 00:58:08 2019 -0800
+++ b/build_upload_win_nightly	Sat Jan 15 13:15:21 2022 -0800
@@ -1,20 +1,56 @@
 #!/bin/sh
 name=$1
-
-cd $HOME/blastem_win
+. "$HOME/$name.params"
+cd $HOME/$DIR
 hg revert -a
 hg pull
 hg up
 rev=`hg summary |  sed -E -n 's/^parent: [^:]+:([^ ]+) .*$/\1/p'`
 sed -i -E "s/(define BLASTEM_VERSION \"[^-]+)-pre\"/\1-pre-$rev\"/" blastem.c
-export OS=Windows
+export OS
 ./build_release > /tmp/build_${name}_out.log
 result=$?
 if [ $result -ne 0 ]; then
 	echo Build falied with return code $result
+	curdate=`date -Iseconds`
+	curl -d'@-' -H 'Content-Type: application/json' "$WEBHOOKURL" <<WEBHOOKEOF
+{
+	"embeds": [
+		{
+			"title": "Build $name failed!",
+			"type": "rich",
+			"description": "Build falied with return code $result",
+			"timestamp": "$curdate",
+			"color": 16711680
+		}
+	]
+}
+WEBHOOKEOF
 	exit $result
 fi
 . $HOME/remote.params
 artifact=$(tail -n 1 /tmp/build_${name}_out.log)
 echo "Uploaing $artifact to $REMOTE_HOST"
-scp -i "$REMOTE_IDENT" "$HOME/blastem_win/$artifact" $REMOTE_USER@$REMOTE_HOST:/home/$REMOTE_USER/nightlies
+scp -i "$REMOTE_IDENT" "$HOME/$DIR/$artifact" $REMOTE_USER@$REMOTE_HOST:/home/$REMOTE_USER/nightlies
+curdate=`date -Iseconds`
+version=`echo "$artifact" | sed -E 's/[^-]+-([0-9]+\.[0-9]+\.[0-9]+[^.]*)\..*$/\1/'`
+curl -d'@-' -H 'Content-Type: application/json' "$WEBHOOKURL" <<WEBHOOKEOF
+{
+	"embeds": [
+		{
+			"title": "$artifact",
+			"type": "rich",
+			"url": "https://www.retrodev.com/blastem/nightlies/$artifact",
+			"description": "New build of $name succeeded!",
+			"timestamp": "$curdate",
+			"color": 65280,
+			"fields": [
+				{
+					"name": "Version",
+					"value": "$version"
+				}
+			]
+		}
+	]
+}
+WEBHOOKEOF
\ No newline at end of file
--- a/config.c	Sat Jan 05 00:58:08 2019 -0800
+++ b/config.c	Sat Jan 15 13:15:21 2022 -0800
@@ -6,6 +6,7 @@
 #include "tern.h"
 #include "util.h"
 #include "paths.h"
+#include "config.h"
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -49,11 +50,11 @@
 		curline = strip_ws(curline);
 		int len = strlen(curline);
 		if (!len) {
-			*line = *line + 1;
+			(*line)++;
 			continue;
 		}
 		if (curline[0] == '#') {
-			*line = *line + 1;
+			(*line)++;
 			continue;
 		}
 		if (curline[0] == '}') {
@@ -67,7 +68,7 @@
 		if (*end == '{') {
 			*end = 0;
 			curline = strip_ws(curline);
-			*line = *line + 1;
+			(*line)++;
 			head = tern_insert_node(head, curline, parse_config_int(state, 1, line));
 		} else {
 			char * val = strip_ws(split_keyval(curline));
@@ -77,7 +78,7 @@
 			} else {
 				fprintf(stderr, "Key %s is missing a value on line %d\n", key, *line);
 			}
-			*line = *line + 1;
+			(*line)++;
 		}
 	}
 	return head;
@@ -174,11 +175,10 @@
 	if (!config_size) {
 		goto config_empty;
 	}
-	char * config_data = malloc(config_size+1);
+	char *config_data = calloc(config_size + 1, 1);
 	if (fread(config_data, 1, config_size, config_file) != config_size) {
 		goto config_read_fail;
 	}
-	config_data[config_size] = '\0';
 
 	ret = parse_config(config_data);
 config_read_fail:
@@ -205,18 +205,28 @@
 
 tern_node *parse_bundled_config(char *config_name)
 {
+	tern_node *ret = NULL;
+#ifdef CONFIG_PATH
+	if (!strcmp("default.cfg", config_name) || !strcmp("blastem.cfg", config_name)) {
+		char *confpath = path_append(CONFIG_PATH, config_name);
+		ret = parse_config_file(confpath);
+		free(confpath);
+	} else {
+#endif
 	uint32_t confsize;
 	char *confdata = read_bundled_file(config_name, &confsize);
-	tern_node *ret = NULL;
 	if (confdata) {
 		confdata[confsize] = 0;
 		ret = parse_config(confdata);
 		free(confdata);
 	}
+#ifdef CONFIG_PATH
+	}
+#endif
 	return ret;
 }
 
-tern_node *load_overrideable_config(char *name, char *bundled_name)
+tern_node *load_overrideable_config(char *name, char *bundled_name, uint8_t *used_config_dir)
 {
 	char const *confdir = get_config_dir();
 	char *confpath = NULL;
@@ -224,58 +234,60 @@
 	if (confdir) {
 		confpath = path_append(confdir, name);
 		ret = parse_config_file(confpath);
-		if (ret) {
-			free(confpath);
-			return ret;
+	}
+	free(confpath);
+	if (used_config_dir) {
+		*used_config_dir = ret != NULL;
+	}
+	
+	if (!ret) {
+		ret = parse_bundled_config(name);
+		if (!ret) {
+			ret = parse_bundled_config(bundled_name);
 		}
 	}
 
-	ret = parse_bundled_config(bundled_name);
-	if (ret) {
-		free(confpath);
-		return ret;
-	}
-	return NULL;
+	return ret;
 }
 
+static uint8_t app_config_in_config_dir;
 tern_node *load_config()
 {
-	char const *confdir = get_config_dir();
-	char *confpath = NULL;
-	tern_node *ret = load_overrideable_config("blastem.cfg", "default.cfg");
-	if (confdir) {
-		confpath = path_append(confdir, "blastem.cfg");
-		ret = parse_config_file(confpath);
-		if (ret) {
-			free(confpath);
-			return ret;
+	tern_node *ret = load_overrideable_config("blastem.cfg", "default.cfg", &app_config_in_config_dir);
+	
+	if (!ret) {
+		if (get_config_dir()) {
+			fatal_error("Failed to find a config file at %s or in the blastem executable directory\n", get_config_dir());
+		} else {
+			fatal_error("Failed to find a config file in the BlastEm executable directory and the config directory path could not be determined\n");
 		}
 	}
-
-	ret = parse_bundled_config("default.cfg");
-	if (ret) {
-		free(confpath);
-		return ret;
-	}
-
-	if (get_config_dir()) {
-		fatal_error("Failed to find a config file at %s or in the blastem executable directory\n", get_config_dir());
-	} else {
-		fatal_error("Failed to find a config file in the BlastEm executable directory and the config directory path could not be determined\n");
-	}
-	//this will never get reached, but the compiler doesn't know that. Let's make it happy
-	return NULL;
+	return ret;
 }
 
-void persist_config_at(tern_node *config, char *fname)
+void persist_config_at(tern_node *app_config, tern_node *to_save, char *fname)
 {
-	char const *confdir = get_config_dir();
-	if (!confdir) {
-		fatal_error("Failed to locate config file directory\n");
+	char*use_exe_dir = tern_find_path_default(app_config, "ui\0config_in_exe_dir\0", (tern_val){.ptrval = "off"}, TVAL_PTR).ptrval;
+	char *confpath;
+	if (!strcmp(use_exe_dir, "on")) {
+		confpath = path_append(get_exe_dir(), fname);
+		if (app_config == to_save && app_config_in_config_dir) {
+			//user switched to "portable" configs this session and there is an
+			//existing config file in the user-specific config directory
+			//delete it so we don't end up loading it next time
+			char *oldpath = path_append(get_config_dir(), fname);
+			delete_file(oldpath);
+			free(oldpath);
+		}
+	} else {
+		char const *confdir = get_config_dir();
+		if (!confdir) {
+			fatal_error("Failed to locate config file directory\n");
+		}
+		ensure_dir_exists(confdir);
+		confpath = path_append(confdir, fname);
 	}
-	ensure_dir_exists(confdir);
-	char *confpath = path_append(confdir, fname);
-	if (!serialize_config_file(config, confpath)) {
+	if (!serialize_config_file(to_save, confpath)) {
 		fatal_error("Failed to write config to %s\n", confpath);
 	}
 	free(confpath);
@@ -283,7 +295,22 @@
 
 void persist_config(tern_node *config)
 {
-	persist_config_at(config, "blastem.cfg");
+	persist_config_at(config, config, "blastem.cfg");
+}
+
+void delete_custom_config_at(char *fname)
+{
+	char *confpath = path_append(get_exe_dir(), fname);
+	delete_file(confpath);
+	free(confpath);
+	confpath = path_append(get_config_dir(), fname);
+	delete_file(confpath);
+	free(confpath);
+}
+
+void delete_custom_config(void)
+{
+	delete_custom_config_at("blastem.cfg");
 }
 
 char **get_extension_list(tern_node *config, uint32_t *num_exts_out)
@@ -311,3 +338,18 @@
 	char * lowpass_cutoff_str = tern_find_path(config, "audio\0lowpass_cutoff\0", TVAL_PTR).ptrval;
 	return lowpass_cutoff_str ? atoi(lowpass_cutoff_str) : DEFAULT_LOWPASS_CUTOFF;
 }
+
+tern_node *get_systems_config(void)
+{
+	static tern_node *systems;
+	if (!systems) {
+		systems = parse_bundled_config("systems.cfg");
+	}
+	return systems;
+}
+
+tern_node *get_model(tern_node *config, system_type stype)
+{
+	char *model = tern_find_path_default(config, "system\0model\0", (tern_val){.ptrval = "md1va3"}, TVAL_PTR).ptrval;
+	return tern_find_node(get_systems_config(), model);
+}
--- a/config.h	Sat Jan 05 00:58:08 2019 -0800
+++ b/config.h	Sat Jan 15 13:15:21 2022 -0800
@@ -6,17 +6,22 @@
 #ifndef CONFIG_H_
 #define CONFIG_H_
 #include "tern.h"
+#include "system.h"
 
 tern_node *parse_config_file(char *config_path);
 tern_node *parse_bundled_config(char *config_name);
-tern_node *load_overrideable_config(char *name, char *bundled_name);
+tern_node *load_overrideable_config(char *name, char *bundled_name, uint8_t *used_config_dir);
 tern_node *load_config();
 char *serialize_config(tern_node *config, uint32_t *size_out);
 uint8_t serialize_config_file(tern_node *config, char *path);
-void persist_config_at(tern_node *config, char *fname);
+void persist_config_at(tern_node *app_config, tern_node *to_save, char *fname);
 void persist_config(tern_node *config);
+void delete_custom_config_at(char *fname);
+void delete_custom_config(void);
 char **get_extension_list(tern_node *config, uint32_t *num_exts_out);
 uint32_t get_lowpass_cutoff(tern_node *config);
+tern_node *get_systems_config(void);
+tern_node *get_model(tern_node *config, system_type stype);
 
 #endif //CONFIG_H_
 
--- a/controller_info.c	Sat Jan 05 00:58:08 2019 -0800
+++ b/controller_info.c	Sat Jan 15 13:15:21 2022 -0800
@@ -1,8 +1,13 @@
 #include <string.h>
+#include <stdlib.h>
+#ifndef USE_FBDEV
 #include "render_sdl.h"
+#endif
 #include "controller_info.h"
 #include "config.h"
 #include "util.h"
+#include "blastem.h"
+#include "bindings.h"
 
 typedef struct {
 	char const      *name;
@@ -57,19 +62,23 @@
 static const char *variant_names[] = {
 	"normal",
 	"6b bumpers",
-	"6b right"
+	"6b right",
+	"3button",
+	"6button",
+	"8button"
 };
 
 static void load_ctype_config(void)
 {
 	if (!loaded) {
-		info_config = load_overrideable_config("controller_types.cfg", "controller_types.cfg");
+		info_config = load_overrideable_config("controller_types.cfg", "controller_types.cfg", NULL);
 		loaded = 1;
 	}
 }
 
 controller_info get_controller_info(int joystick)
 {
+#ifndef USE_FBDEV
 	load_ctype_config();
 	char guid_string[33];
 	SDL_Joystick *stick = render_get_joystick(joystick);
@@ -148,6 +157,9 @@
 			return res;
 		}
 	}
+#else
+	const char *name = "Unknown";
+#endif
 	//default to a 360
 	return (controller_info){
 		.type = TYPE_GENERIC_MAPPING,
@@ -159,6 +171,7 @@
 
 static void mappings_iter(char *key, tern_val val, uint8_t valtype, void *data)
 {
+#ifndef USE_FBDEV
 	if (valtype != TVAL_NODE) {
 		return;
 	}
@@ -169,6 +182,7 @@
 		SDL_GameControllerAddMapping(full);
 		free(full);
 	}
+#endif
 }
 
 void controller_add_mappings(void)
@@ -181,24 +195,42 @@
 
 void save_controller_info(int joystick, controller_info *info)
 {
+#ifndef USE_FBDEV
 	char guid_string[33];
 	SDL_JoystickGetGUIDString(SDL_JoystickGetGUID(render_get_joystick(joystick)), guid_string, sizeof(guid_string));
 	tern_node *existing = tern_find_node(info_config, guid_string);
-	existing = tern_insert_ptr(existing, "subtype", (void *)subtype_names[info->subtype]);
-	existing = tern_insert_ptr(existing, "variant",  (void *)variant_names[info->variant]);
+	existing = tern_insert_ptr(existing, "subtype", strdup(subtype_names[info->subtype]));
+	existing = tern_insert_ptr(existing, "variant", strdup(variant_names[info->variant]));
 	info_config = tern_insert_node(info_config, guid_string, existing);
-	persist_config_at(info_config, "controller_types.cfg");
-	
+	persist_config_at(config, info_config, "controller_types.cfg");
+	handle_joy_added(joystick);
+#endif	
 }
 
 void save_controller_mapping(int joystick, char *mapping_string)
 {
+#ifndef USE_FBDEV
 	char guid_string[33];
 	SDL_JoystickGetGUIDString(SDL_JoystickGetGUID(render_get_joystick(joystick)), guid_string, sizeof(guid_string));
 	tern_node *existing = tern_find_node(info_config, guid_string);
 	existing = tern_insert_ptr(existing, "mapping", mapping_string);
 	info_config = tern_insert_node(info_config, guid_string, existing);
-	persist_config_at(info_config, "controller_types.cfg");
+	persist_config_at(config, info_config, "controller_types.cfg");
+	const char *parts[] = {guid_string, ",", mapping_string};
+	char * full = alloc_concat_m(3, parts);
+	SDL_GameControllerAddMapping(full);
+	free(full);
+	handle_joy_added(joystick);
+#endif
+}
+
+void delete_controller_info(void)
+{
+	delete_custom_config_at("controller_types.cfg");
+	loaded = 0;
+	tern_free(info_config);
+	info_config = NULL;
+	render_reset_mappings();
 }
 
 char const *labels_xbox[] = {
@@ -222,6 +254,12 @@
 static char const *labels_genesis[] = {
 	"A", "B", "X", "Y", NULL, NULL, "Start", NULL, NULL, "Z", "C", NULL, "Mode"
 };
+static char const *labels_genesis_3button[] = {
+	"A", "B", NULL, NULL, NULL, NULL, "Start", NULL, NULL, NULL, "C", NULL, "Mode"
+};
+static char const *labels_genesis_8button[] = {
+	"A", "B", "X", "Y", "Mode", NULL, "Start", NULL, NULL, "Z", "C", "L", "R"
+};
 static char const *labels_saturn[] = {
 	"A", "B", "X", "Y", NULL, NULL, "Start", NULL, NULL, "Z", "C", "LT", "RT"
 };
@@ -246,7 +284,13 @@
 		}
 	} else {
 		if (info->subtype == SUBTYPE_GENESIS) {
-			return labels_genesis;
+			if (info->variant == VARIANT_8BUTTON) {
+				return labels_genesis_8button;
+			} else if (info->variant == VARIANT_3BUTTON) {
+				return labels_genesis_3button;
+			} else {
+				return labels_genesis;
+			}
 		} else {
 			return labels_saturn;
 		}
@@ -255,10 +299,12 @@
 
 const char *get_button_label(controller_info *info, int button)
 {
+#ifndef USE_FBDEV
 	if (button >= SDL_CONTROLLER_BUTTON_DPAD_UP) {
 		static char const * dirs[] = {"Up", "Down", "Left", "Right"};
 		return dirs[button - SDL_CONTROLLER_BUTTON_DPAD_UP];
 	}
+#endif
 	return label_source(info)[button];
 }
 
@@ -267,11 +313,15 @@
 };
 const char *get_axis_label(controller_info *info, int axis)
 {
+#ifndef USE_FBDEV
 	if (axis < SDL_CONTROLLER_AXIS_TRIGGERLEFT) {
 		return axis_labels[axis];
 	} else {
 		return label_source(info)[axis - SDL_CONTROLLER_AXIS_TRIGGERLEFT + SDL_CONTROLLER_BUTTON_RIGHTSHOULDER + 1];
 	}
+#else
+	return NULL;
+#endif
 }
 
 char *make_controller_type_key(controller_info *info)
@@ -316,6 +366,9 @@
 		prefix = "Normal ";
 	} else {
 		static const char *parts[] = {"6 button (", NULL, "/", NULL, ") "};
+#ifdef USE_FBDEV
+		parts[1] = parts[3] = "??";
+#else
 		if (info->variant == VARIANT_6B_BUMPERS) {
 			parts[1] = get_button_label(info, SDL_CONTROLLER_BUTTON_LEFTSHOULDER);
 			parts[3] = get_button_label(info, SDL_CONTROLLER_BUTTON_RIGHTSHOULDER);
@@ -323,6 +376,7 @@
 			parts[1] = get_button_label(info, SDL_CONTROLLER_BUTTON_RIGHTSHOULDER);
 			parts[3] = get_axis_label(info, SDL_CONTROLLER_AXIS_TRIGGERRIGHT);
 		}
+#endif
 		prefix = alloc_concat_m(5, parts);
 	}
 	char *ret = alloc_concat(prefix, base);
--- a/controller_info.h	Sat Jan 05 00:58:08 2019 -0800
+++ b/controller_info.h	Sat Jan 15 13:15:21 2022 -0800
@@ -30,6 +30,8 @@
 	VARIANT_NORMAL,
 	VARIANT_6B_BUMPERS, //C and Z positions are RB and LB respectively
 	VARIANT_6B_RIGHT, //C and Z positions are RT and RB respectively
+	VARIANT_3BUTTON, //3-button Gen/MD controller
+	VARIANT_8BUTTON, //Modern 8-button Gen/MD style controller (retro-bit, 8bitdo M30, etc.)
 	VARIANT_NUM
 };
 
@@ -45,6 +47,7 @@
 const char *get_axis_label(controller_info *info, int axis);
 void save_controller_info(int joystick, controller_info *info);
 void save_controller_mapping(int joystick, char *mapping_string);
+void delete_controller_info(void);
 void controller_add_mappings(void);
 char *make_controller_type_key(controller_info *info);
 char *make_human_readable_type_name(controller_info *info);
--- a/cpu_dsl.py	Sat Jan 05 00:58:08 2019 -0800
+++ b/cpu_dsl.py	Sat Jan 15 13:15:21 2022 -0800
@@ -20,6 +20,14 @@
 			self.addOp(NormalOp(parts))
 		return self
 		
+	def processOps(self, prog, fieldVals, output, otype, oplist):
+		for i in range(0, len(oplist)):
+			if i + 1 < len(oplist) and oplist[i+1].op == 'update_flags':
+				flagUpdates, _ = prog.flags.parseFlagUpdate(oplist[i+1].params[0])
+			else:
+				flagUpdates = None
+			oplist[i].generate(prog, self, fieldVals, output, otype, flagUpdates)
+		
 	def resolveLocal(self, name):
 		return None
 			
@@ -40,6 +48,7 @@
 		self.regValues = {}
 		self.varyingBits = 0
 		self.invalidFieldValues = {}
+		self.invalidCombos = []
 		self.newLocals = []
 		for field in fields:
 			self.varyingBits += fields[field][1]
@@ -47,12 +56,20 @@
 	def addOp(self, op):
 		if op.op == 'local':
 			name = op.params[0]
-			size = op.params[1]
+			size = int(op.params[1])
 			self.locals[name] = size
 		elif op.op == 'invalid':
-			name = op.params[0]
-			value = int(op.params[1])
-			self.invalidFieldValues.setdefault(name, set()).add(value)
+			if len(op.params) < 3:
+				name = op.params[0]
+				value = int(op.params[1])
+				self.invalidFieldValues.setdefault(name, set()).add(value)
+			else:
+				vmap = {}
+				for i in range(0, len(op.params), 2):
+					name = op.params[i]
+					value = int(op.params[i+1])
+					vmap[name] = value
+				self.invalidCombos.append(vmap)
 		else:
 			self.implementation.append(op)
 			
@@ -81,12 +98,29 @@
 		for i in range(0, 1 << self.varyingBits):
 			iword = self.value
 			doIt = True
+			combos = []
+			for combo in self.invalidCombos:
+				combos.append(dict(combo))
 			for field in self.fields:
 				shift,bits = self.fields[field]
 				val = i & ((1 << bits) - 1)
 				if field in self.invalidFieldValues and val in self.invalidFieldValues[field]:
 					doIt = False
 					break
+				nextcombos = []
+				for combo in combos:
+					if field in combo:
+						if combo[field] == val:
+							del combo[field]
+							if not combo:
+								doIt = False
+								break
+						else:
+							continue
+					nextcombos.append(combo)
+				combos = nextcombos
+				if not doIt:
+					break
 				i >>= bits
 				iword |= val << shift
 			if doIt:
@@ -121,16 +155,25 @@
 			output.append('\n\tuint{sz}_t {name};'.format(sz=self.locals[var], name=var))
 		self.newLocals = []
 		fieldVals,_ = self.getFieldVals(value)
-		for op in self.implementation:
-			op.generate(prog, self, fieldVals, output, otype)
-		begin = '\nvoid ' + self.generateName(value) + '(' + prog.context_type + ' *context)\n{'
+		self.processOps(prog, fieldVals, output, otype, self.implementation)
+		
+		if prog.dispatch == 'call':
+			begin = '\nvoid ' + self.generateName(value) + '(' + prog.context_type + ' *context, uint32_t target_cycle)\n{'
+		elif prog.dispatch == 'goto':
+			begin = '\n' + self.generateName(value) + ': {'
+		else:
+			raise Exception('Unsupported dispatch type ' + prog.dispatch)
 		if prog.needFlagCoalesce:
 			begin += prog.flags.coalesceFlags(prog, otype)
 		if prog.needFlagDisperse:
 			output.append(prog.flags.disperseFlags(prog, otype))
 		for var in self.newLocals:
 			begin += '\n\tuint{sz}_t {name};'.format(sz=self.locals[var], name=var)
+		for size in prog.temp:
+			begin += '\n\tuint{sz}_t gen_tmp{sz}__;'.format(sz=size)
 		prog.popScope()
+		if prog.dispatch == 'goto':
+			output += prog.nextInstruction(otype)
 		return begin + ''.join(output) + '\n}'
 		
 	def __str__(self):
@@ -150,16 +193,17 @@
 		self.arg_map = {}
 		self.locals = {}
 		self.regValues = {}
+		self.argValues = {}
 	
 	def addOp(self, op):
 		if op.op == 'arg':
 			name = op.params[0]
-			size = op.params[1]
+			size = int(op.params[1])
 			self.arg_map[name] = len(self.args)
 			self.args.append((name, size))
 		elif op.op == 'local':
 			name = op.params[0]
-			size = op.params[1]
+			size = int(op.params[1])
 			self.locals[name] = size
 		else:
 			self.implementation.append(op)
@@ -173,7 +217,12 @@
 		self.locals[name] = size
 	
 	def localSize(self, name):
-		return self.locals.get(name)
+		if name in self.locals:
+			return self.locals[name]
+		if name in self.arg_map:
+			argIndex = self.arg_map[name]
+			return self.args[argIndex][1]
+		return None
 			
 	def inline(self, prog, params, output, otype, parent):
 		if len(params) != len(self.args):
@@ -189,8 +238,8 @@
 		for name in self.locals:
 			size = self.locals[name]
 			output.append('\n\tuint{size}_t {sub}_{local};'.format(size=size, sub=self.name, local=name))
-		for op in self.implementation:
-			op.generate(prog, self, argValues, output, otype)
+		self.argValues = argValues
+		self.processOps(prog, argValues, output, otype, self.implementation)
 		prog.popScope()
 		
 	def __str__(self):
@@ -209,24 +258,115 @@
 		self.impls = {}
 		self.outOp = ()
 	def cBinaryOperator(self, op):
-		def _impl(prog, params):
+		def _impl(prog, params, rawParams, flagUpdates):
 			if op == '-':
 				a = params[1]
 				b = params[0]
 			else:
 				a = params[0]
 				b = params[1]
-			return '\n\t{dst} = {a} {op} {b};'.format(
-				dst = params[2], a = a, b = b, op = op
-			)
+			needsSizeAdjust = False
+			if len(params) > 3:
+				size = params[3]
+				if size == 0:
+					size = 8
+				elif size == 1:
+					size = 16
+				else:
+					size = 32
+				prog.lastSize = size
+				destSize = prog.paramSize(rawParams[2])
+				if destSize > size:
+					needsSizeAdjust = True
+					prog.sizeAdjust = size
+			needsCarry = needsOflow = needsHalf = False
+			if flagUpdates:
+				for flag in flagUpdates:
+					calc = prog.flags.flagCalc[flag]
+					if calc == 'carry':
+						needsCarry = True
+					elif calc == 'half-carry':
+						needsHalf = True
+					elif calc == 'overflow':
+						needsOflow = True
+			decl = ''
+			if needsCarry or needsOflow or needsHalf or (flagUpdates and needsSizeAdjust):
+				if len(params) <= 3:
+					size = prog.paramSize(rawParams[2])
+				if needsCarry and op != 'lsr':
+					size *= 2
+				decl,name = prog.getTemp(size)
+				dst = prog.carryFlowDst = name
+				prog.lastA = a
+				prog.lastB = b
+				if size == 64:
+					a = '((uint64_t){a})'.format(a=a)
+					b = '((uint64_t){b})'.format(b=b)
+				prog.lastBFlow = b if op == '-' else '(~{b})'.format(b=b)
+			elif needsSizeAdjust:
+				decl,name = prog.getTemp(size)
+				dst = params[2]
+				return '{decl}\n\t{tmp} = ({a} & {mask}) {op} ({b} & {mask});\n\t{dst} = ({dst} & ~{mask}) | {tmp};'.format(
+					decl = decl, tmp = name, a = a, b = b, op = op, dst = dst, mask = ((1 << size) - 1)
+				)
+			else:
+				dst = params[2]
+			if needsSizeAdjust:
+				return decl + '\n\t{dst} = ({a} & {mask}) {op} ({b} & {mask});'.format(
+					dst = dst, a = a, b = b, op = op, mask = (1 << prog.sizeAdjust) - 1
+				)
+			else:
+				return decl + '\n\t{dst} = {a} {op} {b};'.format(
+					dst = dst, a = a, b = b, op = op
+				)
 		self.impls['c'] = _impl
 		self.outOp = (2,)
 		return self
 	def cUnaryOperator(self, op):
-		def _impl(prog, params):
-			return '\n\t{dst} = {op}{a};'.format(
-				dst = params[1], a = params[0], op = op
-			)
+		def _impl(prog, params, rawParams, flagUpdates):
+			dst = params[1]
+			decl = ''
+			needsSizeAdjust = False
+			if len(params) > 2:
+				size = params[2]
+				if size == 0:
+					size = 8
+				elif size == 1:
+					size = 16
+				else:
+					size = 32
+				prog.lastSize = size
+				destSize = prog.paramSize(rawParams[1])
+				if destSize > size:
+					needsSizeAdjust = True
+					prog.sizeAdjust = size
+			if op == '-':
+				if flagUpdates:
+					for flag in flagUpdates:
+						calc = prog.flags.flagCalc[flag]
+						if calc == 'carry':
+							needsCarry = True
+						elif calc == 'half-carry':
+							needsHalf = True
+						elif calc == 'overflow':
+							needsOflow = True
+				if needsCarry or needsOflow or needsHalf or (flagUpdates and needsSizeAdjust):
+					size = prog.paramSize(rawParams[1])
+					if needsCarry:
+						size *= 2
+					decl,name = prog.getTemp(size)
+					dst = prog.carryFlowDst = name
+					prog.lastA = 0
+					prog.lastB = params[0]
+					prog.lastBFlow = params[0]
+			if needsSizeAdjust:
+				return decl + '\n\t{dst} = ({dst} & ~{mask}) | (({op}{a}) & {mask});'.format(
+					dst = dst, a = params[0], op = op, mask = (1 << prog.sizeAdjust) - 1
+				)
+			else:
+				return decl + '\n\t{dst} = {op}{a};'.format(
+					dst = dst, a = params[0], op = op
+				)
 		self.impls['c'] = _impl
 		self.outOp = (1,)
 		return self
@@ -244,11 +384,21 @@
 		return not self.evalFun is None
 	def numArgs(self):
 		return self.evalFun.__code__.co_argcount
-	def generate(self, otype, prog, params, rawParams):
+	def numParams(self):
+		if self.outOp:
+			params = max(self.outOp) + 1
+		else:
+			params = 0
+		if self.evalFun:
+			params = max(params, self.numArgs())
+		return params
+	def generate(self, otype, prog, params, rawParams, flagUpdates):
 		if self.impls[otype].__code__.co_argcount == 2:
 			return self.impls[otype](prog, params)
+		elif self.impls[otype].__code__.co_argcount == 3:
+			return self.impls[otype](prog, params, rawParams)
 		else:
-			return self.impls[otype](prog, params, rawParams)
+			return self.impls[otype](prog, params, rawParams, flagUpdates)
 		
 		
 def _xchgCImpl(prog, params, rawParams):
@@ -261,50 +411,54 @@
 		table = 'main'
 	else:
 		table = params[1]
-	return '\n\timpl_{tbl}[{op}](context);'.format(tbl = table, op = params[0])
+	if prog.dispatch == 'call':
+		return '\n\timpl_{tbl}[{op}](context, target_cycle);'.format(tbl = table, op = params[0])
+	elif prog.dispatch == 'goto':
+		return '\n\tgoto *impl_{tbl}[{op}];'.format(tbl = table, op = params[0])
+	else:
+		raise Exception('Unsupported dispatch type ' + prog.dispatch)
 
 def _updateFlagsCImpl(prog, params, rawParams):
-	i = 0
-	last = ''
-	autoUpdate = set()
-	explicit = {}
-	for c in params[0]:
-		if c.isdigit():
-			if last.isalpha():
-				num = int(c)
-				if num > 1:
-					raise Exception(c + ' is not a valid digit for update_flags')
-				explicit[last] = num
-				last = c
-			else:
-				raise Exception('Digit must follow flag letter in update_flags')
-		else:
-			if last.isalpha():
-				autoUpdate.add(last)
-			last = c
-	if last.isalpha():
-		autoUpdate.add(last)
+	autoUpdate, explicit = prog.flags.parseFlagUpdate(params[0])
 	output = []
-	#TODO: handle autoUpdate flags
+	parity = None
+	directFlags = {}
 	for flag in autoUpdate:
 		calc = prog.flags.flagCalc[flag]
 		calc,_,resultBit = calc.partition('-')
-		lastDst = prog.resolveParam(prog.lastDst, None, {})
+		if prog.carryFlowDst:
+			lastDst = prog.carryFlowDst
+		else:
+			lastDst = prog.resolveParam(prog.lastDst, prog.currentScope, {})
 		storage = prog.flags.getStorage(flag)
-		if calc == 'bit' or calc == 'sign':
+		if calc == 'bit' or calc == 'sign' or calc == 'carry' or calc == 'half' or calc == 'overflow':
+			myRes = lastDst
 			if calc == 'sign':
-				resultBit = prog.paramSize(prog.lastDst) - 1
+				resultBit = prog.getLastSize() - 1
+			elif calc == 'carry':
+				if prog.lastOp.op in ('asr', 'lsr'):
+					resultBit = 0
+					myRes = prog.lastA
+				else:
+					resultBit = prog.getLastSize()
+					if prog.lastOp.op == 'ror':
+						resultBit -= 1
+			elif calc == 'half':
+				resultBit = prog.getLastSize() - 4
+				myRes = '({a} ^ {b} ^ {res})'.format(a = prog.lastA, b = prog.lastB, res = lastDst)
+			elif calc == 'overflow':
+				resultBit = prog.getLastSize() - 1
+				myRes = '((({a} ^ {b})) & ({a} ^ {res}))'.format(a = prog.lastA, b = prog.lastBFlow, res = lastDst)
 			else:
-				resultBit = int(resultBit)
+				#Note: offsetting this by the operation size - 8 makes sense for the Z80
+				#but might not for other CPUs with this kind of fixed bit flag behavior
+				resultBit = int(resultBit) + prog.getLastSize() - 8
 			if type(storage) is tuple:
 				reg,storageBit = storage
-				reg = prog.resolveParam(reg, None, {})
 				if storageBit == resultBit:
-					#TODO: optimize this case
-					output.append('\n\t{reg} = ({reg} & ~{mask}U) | ({res} & {mask}U);'.format(
-						reg = reg, mask = 1 << resultBit, res = lastDst
-					))
+					directFlags.setdefault((reg, myRes), []).append(resultBit)
 				else:
+					reg = prog.resolveParam(reg, None, {})
 					if resultBit > storageBit:
 						op = '>>'
 						shift = resultBit - storageBit
@@ -312,34 +466,83 @@
 						op = '<<'
 						shift = storageBit - resultBit
 					output.append('\n\t{reg} = ({reg} & ~{mask}U) | ({res} {op} {shift}U & {mask}U);'.format(
-						reg = reg, mask = 1 << storageBit, res = lastDst, op = op, shift = shift
+						reg = reg, mask = 1 << storageBit, res = myRes, op = op, shift = shift
 					))
 			else:
 				reg = prog.resolveParam(storage, None, {})
-				output.append('\n\t{reg} = {res} & {mask}U;'.format(reg=reg, res=lastDst, mask = 1 << resultBit))
+				maxBit = prog.paramSize(storage) - 1
+				if resultBit > maxBit:
+					output.append('\n\t{reg} = {res} >> {shift} & {mask}U;'.format(reg=reg, res=myRes, shift = resultBit - maxBit, mask = 1 << maxBit))
+				else:
+					output.append('\n\t{reg} = {res} & {mask}U;'.format(reg=reg, res=myRes, mask = 1 << resultBit))
 		elif calc == 'zero':
+			if prog.carryFlowDst:
+				realSize = prog.getLastSize()
+				if realSize != prog.paramSize(prog.carryFlowDst):
+					lastDst = '({res} & {mask})'.format(res=lastDst, mask = (1 << realSize) - 1)
 			if type(storage) is tuple:
 				reg,storageBit = storage
 				reg = prog.resolveParam(reg, None, {})
 				output.append('\n\t{reg} = {res} ? ({reg} & {mask}U) : ({reg} | {bit}U);'.format(
 					reg = reg, mask = ~(1 << storageBit), res = lastDst, bit = 1 << storageBit
 				))
-			elif prog.paramSize(prog.lastDst) > prog.paramSize(storage):
-				reg = prog.resolveParam(storage, None, {})
-				output.append('\n\t{reg} = {res} != 0;'.format(
-					reg = reg, res = lastDst
-				))
 			else:
 				reg = prog.resolveParam(storage, None, {})
-				output.append('\n\t{reg} = {res};'.format(reg = reg, res = lastDst))
-		elif calc == 'half-carry':
-			pass
-		elif calc == 'carry':
-			pass
-		elif calc == 'overflow':
-			pass
+				output.append('\n\t{reg} = {res} == 0;'.format(
+					reg = reg, res = lastDst
+				))
 		elif calc == 'parity':
-			pass
+			parity = storage
+			paritySize = prog.getLastSize()
+			if prog.carryFlowDst:
+				parityDst = paritySrc = prog.carryFlowDst
+			else:
+				paritySrc = lastDst
+				decl,name = prog.getTemp(paritySize)
+				output.append(decl)
+				parityDst = name
+		else:
+			raise Exception('Unknown flag calc type: ' + calc)
+	for reg, myRes in directFlags:
+		bits = directFlags[(reg, myRes)]
+		resolved = prog.resolveParam(reg, None, {})
+		if len(bits) == len(prog.flags.storageToFlags[reg]):
+			output.append('\n\t{reg} = {res};'.format(reg = resolved, res = myRes))
+		else:
+			mask = 0
+			for bit in bits:
+				mask |= 1 << bit
+			output.append('\n\t{reg} = ({reg} & ~{mask}U) | ({res} & {mask}U);'.format(
+				reg = resolved, mask = mask, res = myRes
+			))
+	if prog.carryFlowDst:
+		if prog.lastOp.op != 'cmp':
+			if prog.sizeAdjust:
+				output.append('\n\t{dst} = ({dst} & ~{mask}) | ({tmpdst} & {mask});'.format(
+					dst = prog.resolveParam(prog.lastDst, prog.currentScope, {}), tmpdst = prog.carryFlowDst, mask = ((1 << prog.sizeAdjust) - 1)
+				))
+				prog.sizeAdjust = None
+			else:
+				output.append('\n\t{dst} = {tmpdst};'.format(dst = prog.resolveParam(prog.lastDst, prog.currentScope, {}), tmpdst = prog.carryFlowDst))
+		prog.carryFlowDst = None
+	if parity:
+		if paritySize > 8:
+			if paritySize > 16:
+				output.append('\n\t{dst} = {src} ^ ({src} >> 16);'.format(dst=parityDst, src=paritySrc))
+				paritySrc = parityDst
+			output.append('\n\t{dst} = {src} ^ ({src} >> 8);'.format(dst=parityDst, src=paritySrc))
+			paritySrc = parityDst
+		output.append('\n\t{dst} = ({src} ^ ({src} >> 4)) & 0xF;'.format(dst=parityDst, src=paritySrc))
+		if type(parity) is tuple:
+			reg,bit = parity
+			reg = prog.resolveParam(reg, None, {})
+			output.append('\n\t{flag} = ({flag} & ~{mask}U) | ((0x6996 >> {parity}) << {bit} & {mask}U);'.format(
+				flag=reg, mask = 1 << bit, bit = bit, parity = parityDst
+			))
+		else:
+			reg = prog.resolveParam(parity, None, {})
+			output.append('\n\t{flag} = 0x9669 >> {parity} & 1;'.format(flag=reg, parity=parityDst))
+			
 	#TODO: combine explicit flags targeting the same storage location
 	for flag in explicit:
 		location = prog.flags.getStorage(flag)
@@ -358,31 +561,276 @@
 			output.append('\n\t{reg} = {val};'.format(reg=reg, val=explicit[flag]))
 	return ''.join(output)
 	
-def _cmpCImpl(prog, params):
-	size = prog.paramSize(params[1])
+def _cmpCImpl(prog, params, rawParams, flagUpdates):
+	size = prog.paramSize(rawParams[1])
+	needsCarry = False
+	if flagUpdates:
+		for flag in flagUpdates:
+			calc = prog.flags.flagCalc[flag]
+			if calc == 'carry':
+				needsCarry = True
+				break
+	if needsCarry:
+		size *= 2
 	tmpvar = 'cmp_tmp{sz}__'.format(sz=size)
-	typename = ''
+	if flagUpdates:
+		prog.carryFlowDst = tmpvar
+		prog.lastA = params[1]
+		prog.lastB = params[0]
+		prog.lastBFlow = params[0]
 	scope = prog.getRootScope()
 	if not scope.resolveLocal(tmpvar):
 		scope.addLocal(tmpvar, size)
-	prog.lastDst = tmpvar
+	prog.lastDst = rawParams[1]
+	if len(params) > 2:
+		size = params[2]
+		if size == 0:
+			size = 8
+		elif size == 1:
+			size = 16
+		else:
+			size = 32
+		prog.lastSize = size
+	else:
+		prog.lastSize = None
 	return '\n\t{var} = {b} - {a};'.format(var = tmpvar, a = params[0], b = params[1])
 
-def _asrCImpl(prog, params, rawParams):
-	shiftSize = prog.paramSize(rawParams[0])
-	mask = 1 << (shiftSize - 1)
-	return '\n\t{dst} = ({a} >> {b}) | ({a} & {mask});'.format(a = params[0], b = params[1], dst = params[2], mask = mask)
-		
+def _asrCImpl(prog, params, rawParams, flagUpdates):
+	needsCarry = False
+	if flagUpdates:
+		for flag in flagUpdates:
+			calc = prog.flags.flagCalc[flag]
+			if calc == 'carry':
+				needsCarry = True
+	decl = ''
+	size = prog.paramSize(rawParams[2])
+	if needsCarry:
+		decl,name = prog.getTemp(size * 2)
+		dst = prog.carryFlowDst = name
+		prog.lastA = params[0]
+	else:
+		dst = params[2]
+	mask = 1 << (size - 1)
+	return decl + '\n\t{dst} = ({a} >> {b}) | ({a} & {mask} ? 0xFFFFFFFFU << ({size} - {b}) : 0);'.format(
+		a = params[0], b = params[1], dst = dst, mask = mask, size=size)
+	
+def _sext(size, src):
+	if size == 16:
+		return src | 0xFF00 if src & 0x80 else src & 0x7F
+	else:
+		return src | 0xFFFF0000 if src & 0x8000 else src & 0x7FFF
+
+def _sextCImpl(prog, params, rawParms):
+	if params[0] == 16:
+		fmt = '\n\t{dst} = {src} & 0x80 ? {src} | 0xFF00 : {src} & 0x7F;'
+	else:
+		fmt = '\n\t{dst} = {src} & 0x8000 ? {src} | 0xFFFF0000 : {src} & 0x7FFF;'
+	return fmt.format(src=params[1], dst=params[2])
+	
+def _getCarryCheck(prog):
+	carryFlag = None
+	for flag in prog.flags.flagOrder:
+		if prog.flags.flagCalc[flag] == 'carry':
+			carryFlag = flag
+			break
+	if carryFlag is None:
+		raise Exception('adc requires a defined carry flag')
+	carryStorage = prog.flags.getStorage(carryFlag)
+	if type(carryStorage) is tuple:
+		reg,bit = carryStorage
+		reg = prog.resolveReg(reg, None, (), False)
+		return '({reg} & 1 << {bit})'.format(reg=reg, bit=bit)
+	else:
+		return prog.resolveReg(carryStorage, None, (), False)
+
+def _adcCImpl(prog, params, rawParams, flagUpdates):
+	needsSizeAdjust = False
+	if len(params) > 3:
+		size = params[3]
+		if size == 0:
+			size = 8
+		elif size == 1:
+			size = 16
+		else:
+			size = 32
+		prog.lastSize = size
+		destSize = prog.paramSize(rawParams[2])
+		if destSize > size:
+			needsSizeAdjust = True
+			prog.sizeAdjust = size
+	needsCarry = needsOflow = needsHalf = False
+	if flagUpdates:
+		for flag in flagUpdates:
+			calc = prog.flags.flagCalc[flag]
+			if calc == 'carry':
+				needsCarry = True
+			elif calc == 'half-carry':
+				needsHalf = True
+			elif calc == 'overflow':
+				needsOflow = True
+	decl = ''
+	carryCheck = _getCarryCheck(prog)
+	vals = '1 : 0'
+	if needsCarry or needsOflow or needsHalf or (flagUpdates and needsSizeAdjust):
+		if len(params) <= 3:
+			size = prog.paramSize(rawParams[2])
+		if needsCarry:
+			size *= 2
+		decl,name = prog.getTemp(size)
+		dst = prog.carryFlowDst = name
+		prog.lastA = params[0]
+		prog.lastB = params[1]
+		prog.lastBFlow = '(~{b})'.format(b=params[1])
+		if size == 64:
+			params[0] = '((uint64_t){a})'.format(a=params[0])
+			params[1] = '((uint64_t){b})'.format(b=params[1])
+			vals = '((uint64_t)1) : ((uint64_t)0)'
+	elif needsSizeAdjust:
+		decl,name = prog.getTemp(size)
+		dst = params[2]
+		return '{decl}\n\t{tmp} = ({a} & {mask}) + ({b} & {mask}) + ({check} ? 1 : 0);\n\t{dst} = ({dst} & ~{mask}) | {tmp};'.format(
+			decl = decl, tmp = name, a = a, b = b, op = op, dst = dst, mask = ((1 << size) - 1), check = carryCheck
+		)
+	else:
+		dst = params[2]
+	return decl + '\n\t{dst} = {a} + {b} + ({check} ? {vals});'.format(dst = dst,
+		a = params[0], b = params[1], check = carryCheck, vals = vals
+	)
+
+def _sbcCImpl(prog, params, rawParams, flagUpdates):
+	needsSizeAdjust = False
+	if len(params) > 3:
+		size = params[3]
+		if size == 0:
+			size = 8
+		elif size == 1:
+			size = 16
+		else:
+			size = 32
+		prog.lastSize = size
+		destSize = prog.paramSize(rawParams[2])
+		if destSize > size:
+			needsSizeAdjust = True
+			prog.sizeAdjust = size
+	needsCarry = needsOflow = needsHalf = False
+	if flagUpdates:
+		for flag in flagUpdates:
+			calc = prog.flags.flagCalc[flag]
+			if calc == 'carry':
+				needsCarry = True
+			elif calc == 'half-carry':
+				needsHalf = True
+			elif calc == 'overflow':
+				needsOflow = True
+	decl = ''
+	carryCheck = _getCarryCheck(prog)
+	vals = '1 : 0'
+	if needsCarry or needsOflow or needsHalf or (flagUpdates and needsSizeAdjust):
+		size = prog.paramSize(rawParams[2])
+		if needsCarry:
+			size *= 2
+		decl,name = prog.getTemp(size)
+		dst = prog.carryFlowDst = name
+		prog.lastA = params[1]
+		prog.lastB = params[0]
+		prog.lastBFlow = params[0]
+		if size == 64:
+			params[1] = '((uint64_t){a})'.format(a=params[1])
+			params[0] = '((uint64_t){b})'.format(b=params[0])
+			vals = '((uint64_t)1) : ((uint64_t)0)'
+	elif needsSizeAdjust:
+		decl,name = prog.getTemp(size)
+		dst = params[2]
+		return '{decl}\n\t{tmp} = ({b} & {mask}) - ({a} & {mask}) - ({check} ? 1 : 0);\n\t{dst} = ({dst} & ~{mask}) | {tmp};'.format(
+			decl = decl, tmp = name, a = params[0], b = params[1], op = op, dst = dst, mask = ((1 << size) - 1), check = carryCheck
+		)
+	else:
+		dst = params[2]
+	return decl + '\n\t{dst} = {b} - {a} - ({check} ? {vals});'.format(dst = dst,
+		a = params[0], b = params[1], check=_getCarryCheck(prog), vals = vals
+	)
+	
+def _rolCImpl(prog, params, rawParams, flagUpdates):
+	needsCarry = False
+	if flagUpdates:
+		for flag in flagUpdates:
+			calc = prog.flags.flagCalc[flag]
+			if calc == 'carry':
+				needsCarry = True
+	decl = ''
+	size = prog.paramSize(rawParams[2])
+	if needsCarry:
+		decl,name = prog.getTemp(size * 2)
+		dst = prog.carryFlowDst = name
+	else:
+		dst = params[2]
+	return decl + '\n\t{dst} = {a} << {b} | {a} >> ({size} - {b});'.format(dst = dst,
+		a = params[0], b = params[1], size=size
+	)
+	
+def _rlcCImpl(prog, params, rawParams, flagUpdates):
+	needsCarry = False
+	if flagUpdates:
+		for flag in flagUpdates:
+			calc = prog.flags.flagCalc[flag]
+			if calc == 'carry':
+				needsCarry = True
+	decl = ''
+	carryCheck = _getCarryCheck(prog)
+	size = prog.paramSize(rawParams[2])
+	if needsCarry:
+		decl,name = prog.getTemp(size * 2)
+		dst = prog.carryFlowDst = name
+	else:
+		dst = params[2]
+	return decl + '\n\t{dst} = {a} << {b} | {a} >> ({size} + 1 - {b}) | ({check} ? 1 : 0) << ({b} - 1);'.format(dst = dst,
+		a = params[0], b = params[1], size=size, check=carryCheck
+	)
+	
+def _rorCImpl(prog, params, rawParams, flagUpdates):
+	size = prog.paramSize(rawParams[2])
+	return '\n\t{dst} = {a} >> {b} | {a} << ({size} - {b});'.format(dst = params[2],
+		a = params[0], b = params[1], size=size
+	)
+
+def _rrcCImpl(prog, params, rawParams, flagUpdates):
+	needsCarry = False
+	if flagUpdates:
+		for flag in flagUpdates:
+			calc = prog.flags.flagCalc[flag]
+			if calc == 'carry':
+				needsCarry = True
+	decl = ''
+	carryCheck = _getCarryCheck(prog)
+	size = prog.paramSize(rawParams[2])
+	if needsCarry:
+		decl,name = prog.getTemp(size * 2)
+		dst = prog.carryFlowDst = name
+	else:
+		dst = params[2]
+	return decl + '\n\t{dst} = {a} >> {b} | {a} << ({size} + 1 - {b}) | ({check} ? 1 : 0) << ({size}-{b});'.format(dst = dst,
+		a = params[0], b = params[1], size=size, check=carryCheck
+	)
+	
+def _updateSyncCImpl(prog, params):
+	return '\n\t{sync}(context, target_cycle);'.format(sync=prog.sync_cycle)
+
 _opMap = {
 	'mov': Op(lambda val: val).cUnaryOperator(''),
 	'not': Op(lambda val: ~val).cUnaryOperator('~'),
 	'lnot': Op(lambda val: 0 if val else 1).cUnaryOperator('!'),
 	'neg': Op(lambda val: -val).cUnaryOperator('-'),
 	'add': Op(lambda a, b: a + b).cBinaryOperator('+'),
+	'adc': Op().addImplementation('c', 2, _adcCImpl),
 	'sub': Op(lambda a, b: b - a).cBinaryOperator('-'),
+	'sbc': Op().addImplementation('c', 2, _sbcCImpl),
 	'lsl': Op(lambda a, b: a << b).cBinaryOperator('<<'),
 	'lsr': Op(lambda a, b: a >> b).cBinaryOperator('>>'),
 	'asr': Op(lambda a, b: a >> b).addImplementation('c', 2, _asrCImpl),
+	'rol': Op().addImplementation('c', 2, _rolCImpl),
+	'rlc': Op().addImplementation('c', 2, _rlcCImpl),
+	'ror': Op().addImplementation('c', 2, _rorCImpl),
+	'rrc': Op().addImplementation('c', 2, _rrcCImpl),
 	'and': Op(lambda a, b: a & b).cBinaryOperator('&'),
 	'or':  Op(lambda a, b: a | b).cBinaryOperator('|'),
 	'xor': Op(lambda a, b: a ^ b).cBinaryOperator('^'),
@@ -390,9 +838,13 @@
 		'c', 1, lambda prog, params: '\n\t{dst} = abs({src});'.format(dst=params[1], src=params[0])
 	),
 	'cmp': Op().addImplementation('c', None, _cmpCImpl),
+	'sext': Op(_sext).addImplementation('c', 2, _sextCImpl),
 	'ocall': Op().addImplementation('c', None, lambda prog, params: '\n\t{pre}{fun}({args});'.format(
 		pre = prog.prefix, fun = params[0], args = ', '.join(['context'] + [str(p) for p in params[1:]])
 	)),
+	'pcall': Op().addImplementation('c', None, lambda prog, params: '\n\t(({typ}){fun})({args});'.format(
+		typ = params[1], fun = params[0], args = ', '.join([str(p) for p in params[2:]])
+	)),
 	'cycles': Op().addImplementation('c', None,
 		lambda prog, params: '\n\tcontext->cycles += context->opts->gen.clock_divider * {0};'.format(
 			params[0]
@@ -400,17 +852,18 @@
 	),
 	'addsize': Op(
 		lambda a, b: b + (2 * a if a else 1)
-	).addImplementation('c', 2, lambda prog, params: '\n\t{dst} = {val} + {sz} ? {sz} * 2 : 1;'.format(
+	).addImplementation('c', 2, lambda prog, params: '\n\t{dst} = {val} + ({sz} ? {sz} * 2 : 1);'.format(
 		dst = params[2], sz = params[0], val = params[1]
 	)),
 	'decsize': Op(
 		lambda a, b: b - (2 * a if a else 1)
-	).addImplementation('c', 2, lambda prog, params: '\n\t{dst} = {val} - {sz} ? {sz} * 2 : 1;'.format(
+	).addImplementation('c', 2, lambda prog, params: '\n\t{dst} = {val} - ({sz} ? {sz} * 2 : 1);'.format(
 		dst = params[2], sz = params[0], val = params[1]
 	)),
 	'xchg': Op().addImplementation('c', (0,1), _xchgCImpl),
 	'dispatch': Op().addImplementation('c', None, _dispatchCImpl),
-	'update_flags': Op().addImplementation('c', None, _updateFlagsCImpl)
+	'update_flags': Op().addImplementation('c', None, _updateFlagsCImpl),
+	'update_sync': Op().addImplementation('c', None, _updateSyncCImpl)
 }
 
 #represents a simple DSL instruction
@@ -419,13 +872,17 @@
 		self.op = parts[0]
 		self.params = parts[1:]
 		
-	def generate(self, prog, parent, fieldVals, output, otype):
+	def generate(self, prog, parent, fieldVals, output, otype, flagUpdates):
 		procParams = []
-		allParamsConst = True
+		allParamsConst = flagUpdates is None and not prog.conditional
 		opDef = _opMap.get(self.op)
 		for param in self.params:
-			allowConst = (self.op in prog.subroutines or len(procParams) != len(self.params) - 1) and param in parent.regValues
 			isDst = (not opDef is None) and len(procParams) in opDef.outOp
+			allowConst = (self.op in prog.subroutines or not isDst) and param in parent.regValues
+			if isDst and self.op == 'xchg':
+				#xchg uses its regs as both source and destination
+				#we need to resolve as both so that disperse/coalesce flag stuff gets done
+				prog.resolveParam(param, parent, fieldVals, allowConst, False)
 			param = prog.resolveParam(param, parent, fieldVals, allowConst, isDst)
 			
 			if (not type(param) is int) and len(procParams) != len(self.params) - 1:
@@ -442,12 +899,14 @@
 			else:
 				param = parent.resolveLocal(param) or param
 				if param in fieldVals:
-					param = fieldVals[index]
+					param = fieldVals[param]
 			prog.meta[self.params[0]] = param
 		elif self.op == 'dis':
 			#TODO: Disassembler
 			pass
 		elif not opDef is None:
+			if opDef.numParams() > len(procParams):
+				raise Exception('Insufficient params for ' + self.op + ' (' + ', '.join(self.params) + ')')
 			if opDef.canEval() and allParamsConst:
 				#do constant folding
 				if opDef.numArgs() >= len(procParams):
@@ -461,10 +920,33 @@
 					dst = maybeLocal
 				parent.regValues[dst] = result
 				if prog.isReg(dst):
-					output.append(_opMap['mov'].generate(otype, prog, procParams, self.params))
+					shortProc = (procParams[0], procParams[-1])
+					shortParams = (self.params[0], self.params[-1])
+					output.append(_opMap['mov'].generate(otype, prog, shortProc, shortParams, None))
 			else:
-				output.append(opDef.generate(otype, prog, procParams, self.params))
+				output.append(opDef.generate(otype, prog, procParams, self.params, flagUpdates))
+				for dstIdx in opDef.outOp:
+					dst = self.params[dstIdx]
+					while dst in prog.meta:
+						dst = prog.meta[dst]
+					if dst in parent.regValues:
+						del parent.regValues[dst]
+					
 		elif self.op in prog.subroutines:
+			procParams = []
+			for param in self.params:
+				begin,sep,end = param.partition('.')
+				if sep:
+					if end in fieldVals:
+						param = begin + '.' + str(fieldVals[end])
+				else:
+					if param in fieldVals:
+						param = fieldVals[param]
+					else:
+						maybeLocal = parent.resolveLocal(param)
+						if maybeLocal and maybeLocal in parent.regValues:
+							param = parent.regValues[maybeLocal]
+				procParams.append(param)
 			prog.subroutines[self.op].inline(prog, procParams, output, otype, parent)
 		else:
 			output.append('\n\t' + self.op + '(' + ', '.join([str(p) for p in procParams]) + ');')
@@ -517,7 +999,7 @@
 			return self.current_locals[name]
 		return self.parent.localSize(name)
 			
-	def generate(self, prog, parent, fieldVals, output, otype):
+	def generate(self, prog, parent, fieldVals, output, otype, flagUpdates):
 		prog.pushScope(self)
 		param = prog.resolveParam(self.param, parent, fieldVals)
 		if type(param) is int:
@@ -527,39 +1009,42 @@
 				output.append('\n\t{')
 				for local in self.case_locals[param]:
 					output.append('\n\tuint{0}_t {1};'.format(self.case_locals[param][local], local))
-				for op in self.cases[param]:
-					op.generate(prog, self, fieldVals, output, otype)
+				self.processOps(prog, fieldVals, output, otype, self.cases[param])
 				output.append('\n\t}')
 			elif self.default:
 				self.current_locals = self.default_locals
 				output.append('\n\t{')
 				for local in self.default_locals:
 					output.append('\n\tuint{0}_t {1};'.format(self.default[local], local))
-				for op in self.default:
-					op.generate(prog, self, fieldVals, output, otype)
+				self.processOps(prog, fieldVals, output, otype, self.default)
 				output.append('\n\t}')
 		else:
+			oldCond = prog.conditional
+			prog.conditional = True
 			output.append('\n\tswitch(' + param + ')')
 			output.append('\n\t{')
 			for case in self.cases:
+				#temp = prog.temp.copy()
 				self.current_locals = self.case_locals[case]
 				self.regValues = dict(self.parent.regValues)
 				output.append('\n\tcase {0}U: '.format(case) + '{')
 				for local in self.case_locals[case]:
 					output.append('\n\tuint{0}_t {1};'.format(self.case_locals[case][local], local))
-				for op in self.cases[case]:
-					op.generate(prog, self, fieldVals, output, otype)
+				self.processOps(prog, fieldVals, output, otype, self.cases[case])
 				output.append('\n\tbreak;')
 				output.append('\n\t}')
+				#prog.temp = temp
 			if self.default:
+				#temp = prog.temp.copy()
 				self.current_locals = self.default_locals
 				self.regValues = dict(self.parent.regValues)
 				output.append('\n\tdefault: {')
 				for local in self.default_locals:
 					output.append('\n\tuint{0}_t {1};'.format(self.default_locals[local], local))
-				for op in self.default:
-					op.generate(prog, self, fieldVals, output, otype)
+				self.processOps(prog, fieldVals, output, otype, self.default)
+				#prog.temp = temp
 			output.append('\n\t}')
+			prog.conditional = oldCond
 		prog.popScope()
 	
 	def __str__(self):
@@ -579,11 +1064,24 @@
 		params = [prog.resolveParam(p, parent, fieldVals) for p in prog.lastOp.params]
 		return '\n\tif ({a} >= {b}) '.format(a=params[1], b = params[0]) + '{'
 	else:
-		raise ion(">=U not implemented in the general case yet")
+		raise Exception(">=U not implemented in the general case yet")
+
+def _eqCImpl(prog, parent, fieldVals, output):
+	if prog.lastOp.op == 'cmp':
+		output.pop()
+		params = [prog.resolveParam(p, parent, fieldVals) for p in prog.lastOp.params]
+		return '\n\tif ({a} == {b}) '.format(a=params[1], b = params[0]) + '{'
+	else:
+		return '\n\tif (!{a}) {{'.format(a=prog.resolveParam(prog.lastDst, None, {}))
+
+def _neqCImpl(prog, parent, fieldVals, output):
+	return '\n\tif ({a}) {{'.format(a=prog.resolveParam(prog.lastDst, None, {}))
 	
 _ifCmpImpl = {
 	'c': {
-		'>=U': _geuCImpl
+		'>=U': _geuCImpl,
+		'=': _eqCImpl,
+		'!=': _neqCImpl
 	}
 }
 #represents a DSL conditional construct
@@ -606,7 +1104,7 @@
 		if op.op == 'local':
 			name = op.params[0]
 			size = op.params[1]
-			self.locals[name] = size
+			self.curLocals[name] = size
 		elif op.op == 'else':
 			self.curLocals = self.elseLocals
 			self.curBody = self.elseBody
@@ -617,23 +1115,25 @@
 		return self.curLocals.get(name)
 		
 	def resolveLocal(self, name):
-		if name in self.locals:
+		if name in self.curLocals:
 			return name
 		return self.parent.resolveLocal(name)
 		
 	def _genTrueBody(self, prog, fieldVals, output, otype):
 		self.curLocals = self.locals
+		subOut = []
+		self.processOps(prog, fieldVals, subOut, otype, self.body)
 		for local in self.locals:
 			output.append('\n\tuint{sz}_t {nm};'.format(sz=self.locals[local], nm=local))
-		for op in self.body:
-			op.generate(prog, self, fieldVals, output, otype)
+		output += subOut
 			
 	def _genFalseBody(self, prog, fieldVals, output, otype):
 		self.curLocals = self.elseLocals
+		subOut = []
+		self.processOps(prog, fieldVals, subOut, otype, self.elseBody)
 		for local in self.elseLocals:
 			output.append('\n\tuint{sz}_t {nm};'.format(sz=self.elseLocals[local], nm=local))
-		for op in self.elseBody:
-			op.generate(prog, self, fieldVals, output, otype)
+		output += subOut
 	
 	def _genConstParam(self, param, prog, fieldVals, output, otype):
 		if param:
@@ -641,29 +1141,43 @@
 		else:
 			self._genFalseBody(prog, fieldVals, output, otype)
 			
-	def generate(self, prog, parent, fieldVals, output, otype):
+	def generate(self, prog, parent, fieldVals, output, otype, flagUpdates):
 		self.regValues = parent.regValues
-		try:
+		if self.cond in prog.booleans:
 			self._genConstParam(prog.checkBool(self.cond), prog, fieldVals, output, otype)
-		except Exception:
+		else:
 			if self.cond in _ifCmpImpl[otype]:
+				oldCond = prog.conditional
+				prog.conditional = True
+				#temp = prog.temp.copy()
 				output.append(_ifCmpImpl[otype][self.cond](prog, parent, fieldVals, output))
 				self._genTrueBody(prog, fieldVals, output, otype)
+				#prog.temp = temp
 				if self.elseBody:
+					#temp = prog.temp.copy()
 					output.append('\n\t} else {')
 					self._genFalseBody(prog, fieldVals, output, otype)
+					#prog.temp = temp
 				output.append('\n\t}')
+				prog.conditional = oldCond
 			else:
 				cond = prog.resolveParam(self.cond, parent, fieldVals)
 				if type(cond) is int:
 					self._genConstParam(cond, prog, fieldVals, output, otype)
 				else:
+					#temp = prog.temp.copy()
 					output.append('\n\tif ({cond}) '.format(cond=cond) + '{')
+					oldCond = prog.conditional
+					prog.conditional = True
 					self._genTrueBody(prog, fieldVals, output, otype)
+					#prog.temp = temp
 					if self.elseBody:
+						#temp = prog.temp.copy()
 						output.append('\n\t} else {')
 						self._genFalseBody(prog, fieldVals, output, otype)
+						#prog.temp = temp
 					output.append('\n\t}')
+					prog.conditional = oldCond
 						
 	
 	def __str__(self):
@@ -679,12 +1193,14 @@
 		self.pointers = {}
 		self.regArrays = {}
 		self.regToArray = {}
+		self.addReg('cycles', 32)
+		self.addReg('sync_cycle', 32)
 	
 	def addReg(self, name, size):
 		self.regs[name] = size
 		
-	def addPointer(self, name, size):
-		self.pointers[name] = size
+	def addPointer(self, name, size, count):
+		self.pointers[name] = (size, count)
 	
 	def addRegArray(self, name, size, regs):
 		self.regArrays[name] = (size, regs)
@@ -721,12 +1237,15 @@
 	
 	def processLine(self, parts):
 		if len(parts) == 3:
-			self.addRegArray(parts[0], int(parts[1]), int(parts[2]))
+			if parts[1].startswith('ptr'):
+				self.addPointer(parts[0], parts[1][3:], int(parts[2]))
+			else:
+				self.addRegArray(parts[0], int(parts[1]), int(parts[2]))
 		elif len(parts) > 2:
 			self.addRegArray(parts[0], int(parts[1]), parts[2:])
 		else:
 			if parts[1].startswith('ptr'):
-				self.addPointer(parts[0], int(parts[1][3:]))
+				self.addPointer(parts[0], parts[1][3:], 1)
 			else:
 				self.addReg(parts[0], int(parts[1]))
 		return self
@@ -734,7 +1253,18 @@
 	def writeHeader(self, otype, hFile):
 		fieldList = []
 		for pointer in self.pointers:
-			hFile.write('\n\tuint{sz}_t *{nm};'.format(nm=pointer, sz=self.pointers[pointer]))
+			stars = '*'
+			ptype, count = self.pointers[pointer]
+			while ptype.startswith('ptr'):
+				stars += '*'
+				ptype = ptype[3:]
+			if ptype.isdigit():
+				ptype = 'uint{sz}_t'.format(sz=ptype)
+			if count > 1:
+				arr = '[{n}]'.format(n=count)
+			else:
+				arr = ''
+			hFile.write('\n\t{ptype} {stars}{nm}{arr};'.format(nm=pointer, ptype=ptype, stars=stars, arr=arr))
 		for reg in self.regs:
 			if not self.isRegArrayMember(reg):
 				fieldList.append((self.regs[reg], 1, reg))
@@ -756,7 +1286,9 @@
 		self.flagBits = {}
 		self.flagCalc = {}
 		self.flagStorage = {}
+		self.flagOrder = []
 		self.flagReg = None
+		self.storageToFlags = {}
 		self.maxBit = -1
 	
 	def processLine(self, parts):
@@ -777,6 +1309,9 @@
 				self.flagBits[flag] = bit
 			self.flagCalc[flag] = calc
 			self.flagStorage[flag] = storage
+			storage,_,storebit = storage.partition('.')
+			self.storageToFlags.setdefault(storage, []).append((storebit, flag))
+			self.flagOrder.append(flag)
 		return self
 	
 	def getStorage(self, flag):
@@ -788,6 +1323,28 @@
 		else:
 			return loc 
 	
+	def parseFlagUpdate(self, flagString):
+		last = ''
+		autoUpdate = set()
+		explicit = {}
+		for c in flagString:
+			if c.isdigit():
+				if last.isalpha():
+					num = int(c)
+					if num > 1:
+						raise Exception(c + ' is not a valid digit for update_flags')
+					explicit[last] = num
+					last = c
+				else:
+					raise Exception('Digit must follow flag letter in update_flags')
+			else:
+				if last.isalpha():
+					autoUpdate.add(last)
+				last = c
+		if last.isalpha():
+			autoUpdate.add(last)
+		return (autoUpdate, explicit)
+	
 	def disperseFlags(self, prog, otype):
 		bitToFlag = [None] * (self.maxBit+1)
 		src = prog.resolveReg(self.flagReg, None, {})
@@ -884,7 +1441,7 @@
 						src=src, dst=dst, srcbit=srcbit, dstbit=dstbit
 					))
 			if direct:
-				output.append('\n\t{dst} |= {src} & {mask}'.format(
+				output.append('\n\t{dst} |= {src} & {mask};'.format(
 					dst=dst, src=src, mask=direct
 				))
 		return ''.join(output)
@@ -902,12 +1459,22 @@
 		self.extra_tables = info.get('extra_tables', [])
 		self.context_type = self.prefix + 'context'
 		self.body = info.get('body', [None])[0]
+		self.interrupt = info.get('interrupt', [None])[0]
+		self.sync_cycle = info.get('sync_cycle', [None])[0]
 		self.includes = info.get('include', [])
 		self.flags = flags
 		self.lastDst = None
 		self.scopes = []
 		self.currentScope = None
 		self.lastOp = None
+		self.carryFlowDst = None
+		self.lastA = None
+		self.lastB = None
+		self.lastBFlow = None
+		self.sizeAdjust = None
+		self.conditional = False
+		self.declares = []
+		self.lastSize = None
 		
 	def __str__(self):
 		pieces = []
@@ -930,21 +1497,21 @@
 		hFile.write('\n}} {0}options;'.format(self.prefix))
 		hFile.write('\n\ntypedef struct {')
 		hFile.write('\n\t{0}options *opts;'.format(self.prefix))
-		hFile.write('\n\tuint32_t cycles;')
 		self.regs.writeHeader(otype, hFile)
 		hFile.write('\n}} {0}context;'.format(self.prefix))
 		hFile.write('\n')
+		hFile.write('\nvoid {pre}execute({type} *context, uint32_t target_cycle);'.format(pre = self.prefix, type = self.context_type))
+		for decl in self.declares:
+			hFile.write('\n' + decl)
 		hFile.write('\n#endif //{0}_'.format(macro))
 		hFile.write('\n')
 		hFile.close()
-	def build(self, otype):
-		body = []
+		
+	def _buildTable(self, otype, table, body, lateBody):
 		pieces = []
-		for include in self.includes:
-			body.append('#include "{0}"\n'.format(include))
-		for table in self.instructions:
-			opmap = [None] * (1 << self.opsize)
-			bodymap = {}
+		opmap = [None] * (1 << self.opsize)
+		bodymap = {}
+		if table in self.instructions:
 			instructions = self.instructions[table]
 			instructions.sort()
 			for inst in instructions:
@@ -957,8 +1524,8 @@
 						self.lastOp = None
 						opmap[val] = inst.generateName(val)
 						bodymap[val] = inst.generateBody(val, self, otype)
-			
-			pieces.append('\ntypedef void (*impl_fun)({pre}context *context);'.format(pre=self.prefix))
+		
+		if self.dispatch == 'call':
 			pieces.append('\nstatic impl_fun impl_{name}[{sz}] = {{'.format(name = table, sz=len(opmap)))
 			for inst in range(0, len(opmap)):
 				op = opmap[inst]
@@ -968,20 +1535,85 @@
 					pieces.append('\n\t' + op + ',')
 					body.append(bodymap[inst])
 			pieces.append('\n};')
-		if self.body in self.subroutines:
+		elif self.dispatch == 'goto':
+			body.append('\n\tstatic void *impl_{name}[{sz}] = {{'.format(name = table, sz=len(opmap)))
+			for inst in range(0, len(opmap)):
+				op = opmap[inst]
+				if op is None:
+					body.append('\n\t\t&&unimplemented,')
+				else:
+					body.append('\n\t\t&&' + op + ',')
+					lateBody.append(bodymap[inst])
+			body.append('\n\t};')
+		else:
+			raise Exception("unimplmeneted dispatch type " + self.dispatch)
+		body.extend(pieces)
+		
+	def nextInstruction(self, otype):
+		output = []
+		if self.dispatch == 'goto':
+			if self.interrupt in self.subroutines:
+				output.append('\n\tif (context->cycles >= context->sync_cycle) {')
+			output.append('\n\tif (context->cycles >= target_cycle) { return; }')
+			if self.interrupt in self.subroutines:
+				self.meta = {}
+				self.temp = {}
+				self.subroutines[self.interrupt].inline(self, [], output, otype, None)
+				output.append('\n\t}')
+			
+			self.meta = {}
+			self.temp = {}
+			self.subroutines[self.body].inline(self, [], output, otype, None)
+		return output
+	
+	def build(self, otype):
+		body = []
+		pieces = []
+		for include in self.includes:
+			body.append('#include "{0}"\n'.format(include))
+		if self.dispatch == 'call':
+			body.append('\nstatic void unimplemented({pre}context *context, uint32_t target_cycle)'.format(pre = self.prefix))
+			body.append('\n{')
+			body.append('\n\tfatal_error("Unimplemented instruction\\n");')
+			body.append('\n}\n')
+			body.append('\ntypedef void (*impl_fun)({pre}context *context, uint32_t target_cycle);'.format(pre=self.prefix))
+			for table in self.extra_tables:
+				body.append('\nstatic impl_fun impl_{name}[{sz}];'.format(name = table, sz=(1 << self.opsize)))
+			body.append('\nstatic impl_fun impl_main[{sz}];'.format(sz=(1 << self.opsize)))
+		elif self.dispatch == 'goto':
+			body.append('\nvoid {pre}execute({type} *context, uint32_t target_cycle)'.format(pre = self.prefix, type = self.context_type))
+			body.append('\n{')
+			
+		for table in self.extra_tables:
+			self._buildTable(otype, table, body, pieces)
+		self._buildTable(otype, 'main', body, pieces)
+		if self.dispatch == 'call' and self.body in self.subroutines:
 			pieces.append('\nvoid {pre}execute({type} *context, uint32_t target_cycle)'.format(pre = self.prefix, type = self.context_type))
 			pieces.append('\n{')
+			pieces.append('\n\t{sync}(context, target_cycle);'.format(sync=self.sync_cycle))
 			pieces.append('\n\twhile (context->cycles < target_cycle)')
 			pieces.append('\n\t{')
+			if self.interrupt in self.subroutines:
+				pieces.append('\n\t\tif (context->cycles >= context->sync_cycle) {')
+				self.meta = {}
+				self.temp = {}
+				intpieces = []
+				self.subroutines[self.interrupt].inline(self, [], intpieces, otype, None)
+				for size in self.temp:
+					pieces.append('\n\tuint{sz}_t gen_tmp{sz}__;'.format(sz=size))
+				pieces += intpieces
+				pieces.append('\n\t\t}')
 			self.meta = {}
 			self.temp = {}
 			self.subroutines[self.body].inline(self, [], pieces, otype, None)
 			pieces.append('\n\t}')
 			pieces.append('\n}')
-		body.append('\nstatic void unimplemented({pre}context *context)'.format(pre = self.prefix))
-		body.append('\n{')
-		body.append('\n\tfatal_error("Unimplemented instruction");')
-		body.append('\n}\n')
+		elif self.dispatch == 'goto':
+			body.append('\n\t{sync}(context, target_cycle);'.format(sync=self.sync_cycle))
+			body += self.nextInstruction(otype)
+			pieces.append('\nunimplemented:')
+			pieces.append('\n\tfatal_error("Unimplemented instruction\\n");')
+			pieces.append('\n}')
 		return ''.join(body) +  ''.join(pieces)
 		
 	def checkBool(self, name):
@@ -992,8 +1624,8 @@
 	def getTemp(self, size):
 		if size in self.temp:
 			return ('', self.temp[size])
-		self.temp[size] = 'tmp{sz}'.format(sz=size);
-		return ('\n\tuint{sz}_t tmp{sz};'.format(sz=size), self.temp[size])
+		self.temp[size] = 'gen_tmp{sz}__'.format(sz=size);
+		return ('', self.temp[size])
 		
 	def resolveParam(self, param, parent, fieldVals, allowConstant=True, isdst=False):
 		keepGoing = True
@@ -1013,14 +1645,24 @@
 						return parent.regValues[param]
 					maybeLocal = parent.resolveLocal(param)
 					if maybeLocal:
+						if isdst:
+							self.lastDst = param
+							self.lastSize = None
 						return maybeLocal
 				if param in fieldVals:
 					param = fieldVals[param]
+					fieldVals = {}
+					keepGoing = True
 				elif param in self.meta:
 					param = self.meta[param]
 					keepGoing = True
 				elif self.isReg(param):
-					param = self.resolveReg(param, parent, fieldVals, isdst)
+					return self.resolveReg(param, parent, fieldVals, isdst)
+				elif param in self.regs.pointers:
+					return 'context->' + param
+		if isdst:
+			self.lastDst = param
+			self.lastSize = None
 		return param
 	
 	def isReg(self, name):
@@ -1070,9 +1712,12 @@
 	
 	
 	def paramSize(self, name):
-		size = self.currentScope.localSize(name)
-		if size:
-			return size
+		if name in self.meta:
+			return self.paramSize(self.meta[name])
+		for i in range(len(self.scopes) -1, -1, -1):
+			size = self.scopes[i].localSize(name)
+			if size:
+				return size
 		begin,sep,_ = name.partition('.')
 		if sep and self.regs.isRegArray(begin):
 			return self.regs.regArrays[begin][0]
@@ -1080,6 +1725,11 @@
 			return self.regs.regs[name]
 		return 32
 	
+	def getLastSize(self):
+		if self.lastSize:
+			return self.lastSize
+		return self.paramSize(self.lastDst)
+	
 	def pushScope(self, scope):
 		self.scopes.append(scope)
 		self.currentScope = scope
@@ -1092,11 +1742,13 @@
 	def getRootScope(self):
 		return self.scopes[0]
 
-def parse(f):
+def parse(args):
+	f = args.source
 	instructions = {}
 	subroutines = {}
 	registers = None
 	flags = None
+	declares = []
 	errors = []
 	info = {}
 	line_num = 0
@@ -1108,9 +1760,22 @@
 			continue
 		if line[0].isspace():
 			if not cur_object is None:
-				parts = [el.strip() for el in line.split(' ')]
+				sep = True
+				parts = []
+				while sep:
+					before,sep,after = line.partition('"')
+					before = before.strip()
+					if before:
+						parts += [el.strip() for el in before.split(' ')]
+					if sep:
+						#TODO: deal with escaped quotes
+						inside,sep,after = after.partition('"')
+						parts.append('"' + inside + '"')
+					line = after
 				if type(cur_object) is dict:
 					cur_object[parts[0]] = parts[1:]
+				elif type(cur_object) is list:
+					cur_object.append(' '.join(parts))
 				else:
 					cur_object = cur_object.processLine(parts)
 				
@@ -1168,6 +1833,8 @@
 				if flags is None:
 					flags = Flags()
 				cur_object = flags
+			elif line.strip() == 'declare':
+				cur_object = declares
 			else:
 				cur_object = SubRoutine(line.strip())
 				subroutines[cur_object.name] = cur_object
@@ -1175,8 +1842,19 @@
 		print(errors)
 	else:
 		p = Program(registers, instructions, subroutines, info, flags)
+		p.dispatch = args.dispatch
+		p.declares = declares
 		p.booleans['dynarec'] = False
 		p.booleans['interp'] = True
+		if args.define:
+			for define in args.define:
+				name,sep,val = define.partition('=')
+				name = name.strip()
+				val = val.strip()
+				if sep:
+					p.booleans[name] = bool(val)
+				else:
+					p.booleans[name] = True
 		
 		if 'header' in info:
 			print('#include "{0}"'.format(info['header'][0]))
@@ -1186,8 +1864,12 @@
 		print(p.build('c'))
 
 def main(argv):
-	f =  open(argv[1])
-	parse(f)
+	from argparse import ArgumentParser, FileType
+	argParser = ArgumentParser(description='CPU emulator DSL compiler')
+	argParser.add_argument('source', type=FileType('r'))
+	argParser.add_argument('-D', '--define', action='append')
+	argParser.add_argument('-d', '--dispatch', choices=('call', 'switch', 'goto'), default='call')
+	parse(argParser.parse_args(argv[1:]))
 
 if __name__ == '__main__':
 	from sys import argv
--- a/debug.c	Sat Jan 05 00:58:08 2019 -0800
+++ b/debug.c	Sat Jan 15 13:15:21 2022 -0800
@@ -9,6 +9,13 @@
 #include "render.h"
 #include "util.h"
 #include "terminal.h"
+#include "z80inst.h"
+
+#ifdef NEW_CORE
+#define Z80_OPTS opts
+#else
+#define Z80_OPTS options
+#endif
 
 static bp_def * breakpoints = NULL;
 static bp_def * zbreakpoints = NULL;
@@ -88,6 +95,12 @@
 	}
 }
 
+static uint8_t m68k_read_byte(uint32_t address, m68k_context *context)
+{
+	//TODO: share this implementation with GDB debugger
+	return read_byte(address, (void **)context->mem_pointers, &context->options->gen, context);
+}
+
 uint16_t m68k_read_word(uint32_t address, m68k_context *context)
 {
 	return read_word(address, (void **)context->mem_pointers, &context->options->gen, context);
@@ -98,7 +111,7 @@
 	return m68k_read_word(address, context) << 16 | m68k_read_word(address + 2, context);
 }
 
-void debugger_print(m68k_context *context, char format_char, char *param)
+void debugger_print(m68k_context *context, char format_char, char *param, uint32_t address)
 {
 	uint32_t value;
 	char format[8];
@@ -134,7 +147,7 @@
 				value &= 0xFF;
 			}
 		}
-	} else if (param[0] == 'S' && param[1] == 'R') {
+	} else if (param[0] == 's' && param[1] == 'r') {
 		value = (context->status << 8);
 		for (int flag = 0; flag < 5; flag++) {
 			value |= context->flags[flag] << (4-flag);
@@ -144,11 +157,15 @@
 	} else if(param[0] == 'f') {
 		genesis_context *gen = context->system;
 		value = gen->vdp->frame;
+	} else if (param[0] == 'p' && param[1] == 'c') {
+		value = address;
 	} else if ((param[0] == '0' && param[1] == 'x') || param[0] == '$') {
 		char *after;
 		uint32_t p_addr = strtol(param+(param[0] == '0' ? 2 : 1), &after, 16);
 		if (after[0] == '.' && after[1] == 'l') {
 			value = m68k_read_long(p_addr, context);
+		} else if (after[0] == '.' && after[1] == 'b') {
+			value = m68k_read_byte(p_addr, context);
 		} else {
 			value = m68k_read_word(p_addr, context);
 		}
@@ -157,6 +174,8 @@
 		uint32_t p_addr = param[1] == 'a' ? context->aregs[reg] : context->dregs[reg];
 		if (param[4] == '.' && param[5] == 'l') {
 			value = m68k_read_long(p_addr, context);
+		} else if (param[4] == '.' && param[5] == 'b') {
+			value = m68k_read_byte(p_addr, context);
 		} else {
 			value = m68k_read_word(p_addr, context);
 		}
@@ -190,6 +209,7 @@
 	}
 	switch (param[0])
 	{
+#ifndef NEW_CORE
 	case 'a':
 		if (param[1] == 'f') {
 			if(param[2] == '\'') {
@@ -331,6 +351,7 @@
 			value = context->im;
 		}
 		break;
+#endif
 	case 's':
 		if (param[1] == 'p') {
 			value = context->sp;
@@ -339,22 +360,7 @@
 	case '0':
 		if (param[1] == 'x') {
 			uint16_t p_addr = strtol(param+2, NULL, 16);
-			if (p_addr < 0x4000) {
-				value = system->zram[p_addr & 0x1FFF];
-			} else if(p_addr >= 0x8000) {
-				uint32_t v_addr = context->bank_reg << 15;
-				v_addr += p_addr & 0x7FFF;
-				if (v_addr < 0x400000) {
-					value = system->cart[v_addr/2];
-				} else if(v_addr > 0xE00000) {
-					value = system->work_ram[(v_addr & 0xFFFF)/2];
-				}
-				if (v_addr & 1) {
-					value &= 0xFF;
-				} else {
-					value >>= 8;
-				}
-			}
+			value = read_byte(p_addr, (void **)context->mem_pointers, &context->options->gen, context);
 		}
 		break;
 	}
@@ -377,7 +383,7 @@
 	} else {
 		zremove_breakpoint(context, address);
 	}
-	uint8_t * pc = get_native_pointer(address, (void **)context->mem_pointers, &context->options->gen);
+	uint8_t * pc = get_native_pointer(address, (void **)context->mem_pointers, &context->Z80_OPTS->gen);
 	if (!pc) {
 		fatal_error("Failed to get native pointer on entering Z80 debugger at address %X\n", address);
 	}
@@ -487,19 +493,21 @@
 					if (inst.addr_mode == Z80_IMMED) {
 						after = inst.immed;
 					} else if (inst.ea_reg == Z80_HL) {
+#ifndef NEW_CORE
 						after = context->regs[Z80_H] << 8 | context->regs[Z80_L];
 					} else if (inst.ea_reg == Z80_IX) {
 						after = context->regs[Z80_IXH] << 8 | context->regs[Z80_IXL];
 					} else if (inst.ea_reg == Z80_IY) {
 						after = context->regs[Z80_IYH] << 8 | context->regs[Z80_IYL];
+#endif
 					}
 				} else if(inst.op == Z80_JR) {
 					after += inst.immed;
 				} else if(inst.op == Z80_RET) {
-					uint8_t *sp = get_native_pointer(context->sp, (void **)context->mem_pointers, &context->options->gen);
+					uint8_t *sp = get_native_pointer(context->sp, (void **)context->mem_pointers, &context->Z80_OPTS->gen);
 					if (sp) {
 						after = *sp;
-						sp = get_native_pointer((context->sp + 1) & 0xFFFF, (void **)context->mem_pointers, &context->options->gen);
+						sp = get_native_pointer((context->sp + 1) & 0xFFFF, (void **)context->mem_pointers, &context->Z80_OPTS->gen);
 						if (sp) {
 							after |= *sp << 8;
 						}
@@ -527,9 +535,9 @@
 					break;
 				}
 				memmap_chunk const *ram_chunk = NULL;
-				for (int i = 0; i < context->options->gen.memmap_chunks; i++)
+				for (int i = 0; i < context->Z80_OPTS->gen.memmap_chunks; i++)
 				{
-					memmap_chunk const *cur = context->options->gen.memmap + i;
+					memmap_chunk const *cur = context->Z80_OPTS->gen.memmap + i;
 					if (cur->flags & MMAP_WRITE) {
 						ram_chunk = cur;
 						break;
@@ -540,7 +548,7 @@
 					if (size > ram_chunk->mask) {
 						size = ram_chunk->mask+1;
 					}
-					uint8_t *buf = get_native_pointer(ram_chunk->start, (void **)context->mem_pointers, &context->options->gen);
+					uint8_t *buf = get_native_pointer(ram_chunk->start, (void **)context->mem_pointers, &context->Z80_OPTS->gen);
 					FILE * f = fopen(param, "wb");
 					if (f) {
 						if(fwrite(buf, 1, size, f) != size) {
@@ -556,12 +564,15 @@
 				}
 				break;
 			}
+			case '?':
+				print_z80_help();
+				break;
 			default:
 				if (
-					!context->options->gen.debug_cmd_handler
-					|| !context->options->gen.debug_cmd_handler(&system->header, input_buf)
+					!context->Z80_OPTS->gen.debug_cmd_handler
+					|| !context->Z80_OPTS->gen.debug_cmd_handler(&system->header, input_buf)
 				) {
-					fprintf(stderr, "Unrecognized debugger command %s\n", input_buf);
+					fprintf(stderr, "Unrecognized debugger command %s\nUse '?' for help.\n", input_buf);
 				}
 				break;
 		}
@@ -574,7 +585,7 @@
 static uint32_t branch_t;
 static uint32_t branch_f;
 
-int run_debugger_command(m68k_context *context, char *input_buf, m68kinst inst, uint32_t after)
+int run_debugger_command(m68k_context *context, uint32_t address, char *input_buf, m68kinst inst, uint32_t after)
 {
 	char * param;
 	char format_char;
@@ -692,7 +703,7 @@
 					fputs("display command requires a parameter\n", stderr);
 					break;
 				}
-				debugger_print(context, format_char, param);
+				debugger_print(context, format_char, param, address);
 				add_display(&displays, &disp_index, format_char, param);
 			} else {
 				param = find_param(input_buf);
@@ -723,11 +734,13 @@
 				}
 			}
 			param = find_param(input_buf);
-			if (!param) {
-				fputs("p command requires a parameter\n", stderr);
-				break;
+			if (param) {
+				debugger_print(context, format_char, param, address);
+			} else {
+				m68k_disasm(&inst, input_buf);
+				printf("%X: %s\n", address, input_buf);
 			}
-			debugger_print(context, format_char, param);
+			
 			break;
 		case 'n':
 			if (inst.op == M68K_RTS) {
@@ -791,10 +804,12 @@
 				param = find_param(input_buf);
 				if (!param) {
 					fputs("Missing destination parameter for set\n", stderr);
+					return 1;
 				}
 				char *val = find_param(param);
 				if (!val) {
 					fputs("Missing value parameter for set\n", stderr);
+					return 1;
 				}
 				long int_val;
 				int reg_num;
@@ -835,6 +850,9 @@
 					fprintf(stderr, "Invalid destinatino %s\n", param);
 				}
 				break;
+			} else if (input_buf[1] == 'r') {
+				system->header.soft_reset(&system->header);
+				return 0;
 			} else {
 				if (inst.op == M68K_RTS) {
 					after = m68k_read_long(context->aregs[7], context);
@@ -929,17 +947,62 @@
 			break;
 		}
 #endif
+		case '?':
+			print_m68k_help();
+			break;
 		case 'q':
 			puts("Quitting");
 			exit(0);
 			break;
 		default:
-			fprintf(stderr, "Unrecognized debugger command %s\n", input_buf);
+			fprintf(stderr, "Unrecognized debugger command %s\nUse '?' for help.\n", input_buf);
 			break;
 	}
 	return 1;
 }
 
+void print_m68k_help()
+{
+	printf("M68k Debugger Commands\n");
+	printf("    b ADDRESS            - Set a breakpoint at ADDRESS\n");
+	printf("    d BREAKPOINT         - Delete a 68K breakpoint\n");
+	printf("    co BREAKPOINT        - Run a list of debugger commands each time\n");
+	printf("                           BREAKPOINT is hit\n");
+	printf("    a ADDRESS            - Advance to address\n");
+	printf("    n                    - Advance to next instruction\n");
+	printf("    o                    - Advance to next instruction ignoring branches to\n");
+	printf("                           lower addresses (good for breaking out of loops)\n");
+	printf("    s                    - Advance to next instruction (follows bsr/jsr)\n");
+	printf("    se REG|ADDRESS VALUE - Set value\n");
+	printf("    sr                   - Soft reset\n");
+	printf("    c                    - Continue\n");
+	printf("    bt                   - Print a backtrace\n");
+	printf("    p[/(x|X|d|c)] VALUE  - Print a register or memory location\n");
+	printf("    di[/(x|X|d|c)] VALUE - Print a register or memory location each time\n");
+	printf("                           a breakpoint is hit\n");
+	printf("    vs                   - Print VDP sprite list\n");
+	printf("    vr                   - Print VDP register info\n");
+	printf("    yc [CHANNEL NUM]     - Print YM-2612 channel info\n");
+	printf("    yt                   - Print YM-2612 timer info\n");
+	printf("    zb ADDRESS           - Set a Z80 breakpoint\n");
+	printf("    zp[/(x|X|d|c)] VALUE - Display a Z80 value\n");
+	printf("    ?                    - Display help\n");
+	printf("    q                    - Quit BlastEm\n");
+}
+
+void print_z80_help()
+{
+	printf("Z80 Debugger Commands\n");
+	printf("    b  ADDRESS           - Set a breakpoint at ADDRESS\n");
+	printf("    de BREAKPOINT        - Delete a Z80 breakpoint\n");
+	printf("    a  ADDRESS           - Advance to address\n");
+	printf("    n                    - Advance to next instruction\n");
+	printf("    c                    - Continue\n");
+	printf("    p[/(x|X|d|c)] VALUE  - Print a register or memory location\n");
+	printf("    di[/(x|X|d|c)] VALUE - Print a register or memory location each time\n");
+	printf("                           a breakpoint is hit\n");
+	printf("    q                    - Quit BlastEm\n");
+}
 
 void debugger(m68k_context * context, uint32_t address)
 {
@@ -989,7 +1052,7 @@
 				char *cmd = commands;
 				strip_nl(cmd);
 				commands += strlen(cmd) + 1;
-				debugging = run_debugger_command(context, cmd, inst, after);
+				debugging = run_debugger_command(context, address, cmd, inst, after);
 			}
 			free(copy);
 		}
@@ -1002,7 +1065,7 @@
 		remove_breakpoint(context, address);
 	}
 	for (disp_def * cur = displays; cur; cur = cur->next) {
-		debugger_print(context, cur->format_char, cur->param);
+		debugger_print(context, cur->format_char, cur->param, address);
 	}
 	m68k_disasm(&inst, input_buf);
 	printf("%X: %s\n", address, input_buf);
@@ -1042,7 +1105,7 @@
 		} else {
 			strcpy(input_buf, last_cmd);
 		}
-		debugging = run_debugger_command(context, input_buf, inst, after);
+		debugging = run_debugger_command(context, address, input_buf, inst, after);
 	}
 	return;
 }
--- a/debug.h	Sat Jan 05 00:58:08 2019 -0800
+++ b/debug.h	Sat Jan 15 13:15:21 2022 -0800
@@ -3,7 +3,11 @@
 
 #include <stdint.h>
 #include "m68k_core.h"
+#ifdef NEW_CORE
+#include "z80.h"
+#else
 #include "z80_to_x86.h"
+#endif
 
 typedef struct disp_def {
 	struct disp_def * next;
@@ -25,5 +29,7 @@
 void remove_display(disp_def ** head, uint32_t index);
 void debugger(m68k_context * context, uint32_t address);
 z80_context * zdebugger(z80_context * context, uint16_t address);
+void print_m68k_help();
+void print_z80_help();
 
 #endif //DEBUG_H_
--- a/default.cfg	Sat Jan 05 00:58:08 2019 -0800
+++ b/default.cfg	Sat Jan 15 13:15:21 2022 -0800
@@ -22,6 +22,7 @@
 		v ui.vram_debug
 		c ui.cram_debug
 		n ui.compositing_debug
+		m ui.vgm_log
 		esc ui.exit
 		` ui.save_state
 		0 ui.set_speed.0
@@ -295,6 +296,9 @@
 	gl on
 	#scaling can be linear (for linear interpolation) or nearest (for nearest neighbor)
 	scaling linear
+	#When off, a 512x512 texture is used for each field, when turned on a smaller texture is used
+	#turning this on seems to help performance on certain mobile GPUs like Mali
+	npot_textures off
 	ntsc {
 		overscan {
 			#these values will result in square pixels in H40 mode
@@ -334,6 +338,8 @@
 	rate 48000
 	buffer 512
 	lowpass_cutoff 3390
+	#Use f32 for 32-bit floating point, s16 for signed 16-bit integer
+	format f32
 }
 
 clocks {
@@ -364,6 +370,10 @@
 	screenshot_path $HOME
 	#see strftime for the format specifiers valid in screenshot_template
 	screenshot_template blastem_%Y%m%d_%H%M%S.png
+	#path for storing VGM recordings, accepts the same variables as initial_path
+	vgm_path $HOME
+	#see strftime for the format specifiers valid in vgm_template
+	vgm_template blastem_%Y%m%d_%H%M%S.vgm
 	#path template for saving SRAM, EEPROM and savestates
 	#accepts special variables $HOME, $EXEDIR, $USERDATA, $ROMNAME
 	save_path $USERDATA/blastem/$ROMNAME
@@ -385,6 +395,8 @@
 	#MegaWiFi allows ROMs to make connections to the internet
 	#so it should only be enabled for ROMs you trust
 	megawifi off
+	#Model of the emulated Gen/MD system, see systems.cfg for a list of options
+	model md1va3
 }
 
 
--- a/dis.c	Sat Jan 05 00:58:08 2019 -0800
+++ b/dis.c	Sat Jan 15 13:15:21 2022 -0800
@@ -269,19 +269,21 @@
 		{
 			*cur = (*cur >> 8) | (*cur << 8);
 		}
-		uint32_t start = filebuf[2] << 16 | filebuf[3];
-		uint32_t int_2 = filebuf[0x68/2] << 16 | filebuf[0x6A/2];
-		uint32_t int_4 = filebuf[0x70/2] << 16 | filebuf[0x72/2];
-		uint32_t int_6 = filebuf[0x78/2] << 16 | filebuf[0x7A/2];
-		named_labels = add_label(named_labels, "start", start);
-		named_labels = add_label(named_labels, "int_2", int_2);
-		named_labels = add_label(named_labels, "int_4", int_4);
-		named_labels = add_label(named_labels, "int_6", int_6);
-		if (!def || !only) {
-			def = defer(start, def);
-			def = defer(int_2, def);
-			def = defer(int_4, def);
-			def = defer(int_6, def);
+		if (!address_off) {
+			uint32_t start = filebuf[2] << 16 | filebuf[3];
+			uint32_t int_2 = filebuf[0x68/2] << 16 | filebuf[0x6A/2];
+			uint32_t int_4 = filebuf[0x70/2] << 16 | filebuf[0x72/2];
+			uint32_t int_6 = filebuf[0x78/2] << 16 | filebuf[0x7A/2];
+			named_labels = add_label(named_labels, "start", start);
+			named_labels = add_label(named_labels, "int_2", int_2);
+			named_labels = add_label(named_labels, "int_4", int_4);
+			named_labels = add_label(named_labels, "int_6", int_6);
+			if (!def || !only) {
+				def = defer(start, def);
+				def = defer(int_2, def);
+				def = defer(int_4, def);
+				def = defer(int_6, def);
+			}
 		}
 	}
 	uint16_t *encoded, *next;
@@ -292,7 +294,7 @@
 			encoded = NULL;
 			address = def->address;
 			if (!is_visited(address)) {
-				encoded = filebuf + (address - address_off)/2;
+				encoded = filebuf + ((address & 0xFFFFFF) - address_off)/2;
 			}
 			tmpd = def;
 			def = def->next;
@@ -302,7 +304,7 @@
 			break;
 		}
 		for(;;) {
-			if (address > address_end || address < address_off) {
+			if ((address & 0xFFFFFF) > address_end || address < address_off) {
 				break;
 			}
 			visit(address);
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/event_log.c	Sat Jan 15 13:15:21 2022 -0800
@@ -0,0 +1,748 @@
+#ifdef _WIN32
+#define WINVER 0x501
+#include <winsock2.h>
+#include <ws2tcpip.h>
+#else
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <unistd.h>
+#include <netdb.h>
+#include <netinet/tcp.h>
+#endif
+
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include "event_log.h"
+#include "util.h"
+#include "blastem.h"
+#include "saves.h"
+#include "zlib/zlib.h"
+
+enum {
+	CMD_GAMEPAD_DOWN,
+	CMD_GAMEPAD_UP,
+};
+
+static uint8_t active, fully_active;
+static FILE *event_file;
+static serialize_buffer buffer;
+static uint8_t *compressed;
+static size_t compressed_storage;
+static z_stream output_stream;
+static uint32_t last;
+
+static void event_log_common_init(void)
+{
+	init_serialize(&buffer);
+	compressed_storage = 128*1024;
+	compressed = malloc(compressed_storage);
+	deflateInit(&output_stream, 9);
+	output_stream.avail_out = compressed_storage;
+	output_stream.next_out = compressed;
+	output_stream.avail_in = 0;
+	output_stream.next_in = buffer.data;
+	last = 0;
+	active = 1;
+}
+
+static uint8_t multi_count;
+static size_t multi_start;
+static void finish_multi(void)
+{
+	buffer.data[multi_start] |= multi_count - 2;
+	multi_count = 0;
+}
+
+static void file_finish(void)
+{
+	fwrite(compressed, 1, output_stream.next_out - compressed, event_file);
+	output_stream.next_out = compressed;
+	output_stream.avail_out = compressed_storage;
+	int result = deflate(&output_stream, Z_FINISH);
+	if (Z_STREAM_END != result) {
+		fatal_error("Final deflate call returned %d\n", result);
+	}
+	fwrite(compressed, 1, output_stream.next_out - compressed, event_file);
+	fclose(event_file);
+}
+
+static const char el_ident[] = "BLSTEL\x02\x00";
+void event_log_file(char *fname)
+{
+	event_file = fopen(fname, "wb");
+	if (!event_file) {
+		warning("Failed to open event file %s for writing\n", fname);
+		return;
+	}
+	fwrite(el_ident, 1, sizeof(el_ident) - 1, event_file);
+	event_log_common_init();
+	fully_active = 1;
+	atexit(file_finish);
+}
+
+typedef struct {
+	uint8_t  *send_progress;
+	int      sock;
+	uint8_t  players[1]; //TODO: Expand when support for multiple players per remote is added
+	uint8_t  num_players;
+} remote;
+
+static int listen_sock;
+static remote remotes[7];
+static int num_remotes;
+static uint8_t available_players[7] = {2,3,4,5,6,7,8};
+static int num_available_players = 7;
+void event_log_tcp(char *address, char *port)
+{
+	struct addrinfo request, *result;
+	socket_init();
+	memset(&request, 0, sizeof(request));
+	request.ai_family = AF_INET;
+	request.ai_socktype = SOCK_STREAM;
+	request.ai_flags = AI_PASSIVE;
+	getaddrinfo(address, port, &request, &result);
+	
+	listen_sock = socket(result->ai_family, result->ai_socktype, result->ai_protocol);
+	if (listen_sock < 0) {
+		warning("Failed to open event log listen socket on %s:%s\n", address, port);
+		goto cleanup_address;
+	}
+	int param = 1;
+	setsockopt(listen_sock, SOL_SOCKET, SO_REUSEADDR, (const char *)&param, sizeof(param));
+	if (bind(listen_sock, result->ai_addr, result->ai_addrlen) < 0) {
+		warning("Failed to bind event log listen socket on %s:%s\n", address, port);
+		socket_close(listen_sock);
+		goto cleanup_address;
+	}
+	if (listen(listen_sock, 3) < 0) {
+		warning("Failed to listen for event log remotes on %s:%s\n", address, port);
+		socket_close(listen_sock);
+		goto cleanup_address;
+	}
+	socket_blocking(listen_sock, 0);
+	event_log_common_init();
+cleanup_address:
+	freeaddrinfo(result);
+}
+
+static uint8_t *system_start;
+static size_t system_start_size;
+void event_system_start(system_type stype, vid_std video_std, char *name)
+{
+	if (!active) {
+		return;
+	}
+	save_int8(&buffer, stype);
+	save_int8(&buffer, video_std);
+	size_t name_len = strlen(name);
+	if (name_len > 255) {
+		name_len = 255;
+	}
+	save_int8(&buffer, name_len);
+	save_buffer8(&buffer, name, strlen(name));
+	if (listen_sock) {
+		system_start = malloc(buffer.size);
+		system_start_size = buffer.size;
+		memcpy(system_start, buffer.data, buffer.size);
+	} else {
+		//system start header is never compressed, so write to file immediately
+		fwrite(buffer.data, 1, buffer.size, event_file);
+	}
+	buffer.size = 0;
+}
+
+//header formats
+//Single byte: 4 bit type, 4 bit delta (16-31)
+//Three Byte: 8 bit type, 16-bit delta
+//Four byte: 8-bit type, 24-bit signed delta
+#define FORMAT_3BYTE 0xE0
+#define FORMAT_4BYTE 0xF0
+static uint8_t last_event_type = 0xFF;
+static uint32_t last_delta;
+static void event_header(uint8_t type, uint32_t cycle)
+{
+	uint32_t delta = cycle - last;
+	if (multi_count) {
+		if (type != last_event_type || delta != last_delta) {
+			finish_multi();
+		} else {
+			++multi_count;
+			if (multi_count == 17) {
+				finish_multi();
+				last_event_type = 0xFF;
+			}
+			return;
+		}
+	} else if (type == last_event_type && delta == last_delta && type != EVENT_FLUSH) {
+		//make some room
+		save_int8(&buffer, 0);
+		//shift existing command
+		memmove(buffer.data + multi_start + 1, buffer.data + multi_start, buffer.size - multi_start - 1);
+		buffer.data[multi_start] = EVENT_MULTI << 4;
+		multi_count = 2;
+		return;
+	}
+	multi_start = buffer.size;
+	last_event_type = type;
+	last_delta = delta;
+	
+	if (delta > 65535) {
+		save_int8(&buffer, FORMAT_4BYTE | type);
+		save_int8(&buffer, delta >> 16);
+		save_int16(&buffer, delta);
+	} else if (delta >= 16 && delta < 32) {
+		save_int8(&buffer, type << 4 | (delta - 16));
+	} else {
+		save_int8(&buffer, FORMAT_3BYTE | type);
+		save_int16(&buffer, delta);
+	}
+}
+
+void event_cycle_adjust(uint32_t cycle, uint32_t deduction)
+{
+	if (!fully_active) {
+		return;
+	}
+	event_header(EVENT_ADJUST, cycle);
+	last = cycle - deduction;
+	save_int32(&buffer, deduction);
+}
+
+static uint8_t next_available_player(void)
+{
+	uint8_t lowest = 0xFF;
+	int lowest_index = -1;
+	for (int i = 0; i < num_available_players; i++)
+	{
+		if (available_players[i] < lowest) {
+			lowest = available_players[i];
+			lowest_index = i;
+		}
+	}
+	if (lowest_index >= 0) {
+		available_players[lowest_index] = available_players[num_available_players - 1];
+		--num_available_players;
+	}
+	return lowest;
+}
+
+static void flush_socket(void)
+{
+	int remote_sock = accept(listen_sock, NULL, NULL);
+	if (remote_sock != -1) {
+		if (num_remotes == 7) {
+			socket_close(remote_sock);
+		} else {
+			printf("remote %d connected\n", num_remotes);
+			uint8_t player = next_available_player();
+			remotes[num_remotes++] = (remote){
+				.sock = remote_sock,
+				.send_progress = NULL,
+				.players = {player},
+				.num_players = player == 0xFF ? 0 : 1
+			};
+			current_system->save_state = EVENTLOG_SLOT + 1;
+		}
+	}
+	uint8_t *min_progress = compressed;
+	for (int i = 0; i < num_remotes; i++) {
+		if (remotes[i].send_progress) {
+			uint8_t recv_buffer[1500];
+			int bytes = recv(remotes[i].sock, recv_buffer, sizeof(recv_buffer), 0);
+			for (int j = 0; j < bytes; j++)
+			{
+				uint8_t cmd = recv_buffer[j];
+				switch(cmd)
+				{
+				case CMD_GAMEPAD_DOWN:
+				case CMD_GAMEPAD_UP: {
+					++j;
+					if (j < bytes) {
+						uint8_t button = recv_buffer[j];
+						uint8_t pad = (button >> 5) - 1;
+						button &= 0x1F;
+						if (pad <  remotes[i].num_players) {
+							pad = remotes[i].players[pad];
+							if (cmd == CMD_GAMEPAD_DOWN) {
+								current_system->gamepad_down(current_system, pad, button);
+							} else {
+								current_system->gamepad_up(current_system, pad, button);
+							}
+						}
+					} else {
+						warning("Received incomplete command %X\n", cmd);
+					}
+					break;
+				}
+				default:
+					warning("Unrecognized remote command %X\n", cmd);
+					j = bytes;
+				}
+			}
+			int sent = 1;
+			while (sent && output_stream.next_out > remotes[i].send_progress)
+			{
+				sent = send(remotes[i].sock, remotes[i].send_progress, output_stream.next_out - remotes[i].send_progress, 0);
+				if (sent >= 0) {
+					remotes[i].send_progress += sent;
+				} else if (!socket_error_is_wouldblock()) {
+					socket_close(remotes[i].sock);
+					for (int j = 0; j < remotes[i].num_players; j++) {
+						available_players[num_available_players++] = remotes[i].players[j];
+					}
+					remotes[i] = remotes[num_remotes-1];
+					num_remotes--;
+					if (!num_remotes) {
+						//last remote disconnected, reset buffers/deflate
+						fully_active = 0;
+						deflateReset(&output_stream);
+						output_stream.next_out = compressed;
+						output_stream.avail_out = compressed_storage;
+						buffer.size = 0;
+					}
+					i--;
+					break;
+				}
+				if (remotes[i].send_progress > min_progress) {
+					min_progress = remotes[i].send_progress;
+				}
+			}
+		}
+	}
+	if (min_progress == output_stream.next_out) {
+		output_stream.next_out = compressed;
+		output_stream.avail_out = compressed_storage;
+		for (int i = 0; i < num_remotes; i++) {
+			if (remotes[i].send_progress) {
+				remotes[i].send_progress = compressed;
+			}
+		}
+	}
+}
+
+uint8_t wrote_since_last_flush;
+void event_log(uint8_t type, uint32_t cycle, uint8_t size, uint8_t *payload)
+{
+	if (!fully_active) {
+		return;
+	}
+	event_header(type, cycle);
+	last = cycle;
+	save_buffer8(&buffer, payload, size);
+	if (!multi_count) {
+		last_event_type = 0xFF;
+		output_stream.avail_in = buffer.size - (output_stream.next_in - buffer.data);
+		int result = deflate(&output_stream, Z_NO_FLUSH);
+		if (result != Z_OK) {
+			fatal_error("deflate returned %d\n", result);
+		}
+		if (listen_sock) {
+			if ((output_stream.next_out - compressed) > 1280 || !output_stream.avail_out) {
+				flush_socket();
+				wrote_since_last_flush = 1;
+			}
+		} else if (!output_stream.avail_out) {
+			fwrite(compressed, 1, compressed_storage, event_file);
+			output_stream.next_out = compressed;
+			output_stream.avail_out = compressed_storage;
+		}
+		if (!output_stream.avail_in) {
+			buffer.size = 0;
+			output_stream.next_in = buffer.data;
+		}
+	}
+}
+
+static uint32_t last_word_address;
+void event_vram_word(uint32_t cycle, uint32_t address, uint16_t value)
+{
+	uint32_t delta = address - last_word_address;
+	if (delta < 256) {
+		uint8_t buffer[3] = {delta, value >> 8, value};
+		event_log(EVENT_VRAM_WORD_DELTA, cycle, sizeof(buffer), buffer);
+	} else {
+		uint8_t buffer[5] = {address >> 16, address >> 8, address, value >> 8, value};
+		event_log(EVENT_VRAM_WORD, cycle, sizeof(buffer), buffer);
+	}
+	last_word_address = address;
+}
+
+static uint32_t last_byte_address;
+void event_vram_byte(uint32_t cycle, uint16_t address, uint8_t byte, uint8_t auto_inc)
+{
+	uint32_t delta = address - last_byte_address;
+	if (delta == 1) {
+		event_log(EVENT_VRAM_BYTE_ONE, cycle, sizeof(byte), &byte);
+	} else if (delta == auto_inc) {
+		event_log(EVENT_VRAM_BYTE_AUTO, cycle, sizeof(byte), &byte);
+	} else if (delta < 256) {
+		uint8_t buffer[2] = {delta, byte};
+		event_log(EVENT_VRAM_BYTE_DELTA, cycle, sizeof(buffer), buffer);
+	} else {
+		uint8_t buffer[3] = {address >> 8, address, byte};
+		event_log(EVENT_VRAM_BYTE, cycle, sizeof(buffer), buffer);
+	}
+	last_byte_address = address;
+}
+
+static size_t send_all(int sock, uint8_t *data, size_t size, int flags)
+{
+	size_t total = 0, sent = 1;
+	while(sent > 0 && total < size)
+	{
+		sent = send(sock, data + total, size - total, flags);
+		if (sent > 0) {
+			total += sent;
+		}
+	}
+	return total;
+}
+
+void deflate_flush(uint8_t full)
+{
+	output_stream.avail_in = buffer.size - (output_stream.next_in - buffer.data);
+	uint8_t force = full;
+	while (output_stream.avail_in || force)
+	{
+		if (!output_stream.avail_out) {
+			size_t old_storage = compressed_storage;
+			uint8_t *old_compressed = compressed;
+			compressed_storage *= 2;
+			compressed = realloc(compressed, compressed_storage);
+			output_stream.next_out = compressed + old_storage;
+			output_stream.avail_out = old_storage;
+			for (int i = 0; i < num_remotes; i++) {
+				if (remotes[i].send_progress) {
+					remotes[i].send_progress = compressed + (remotes[i].send_progress - old_compressed);
+				}
+			}
+		}
+		int result = deflate(&output_stream, full ? Z_FINISH : Z_SYNC_FLUSH);
+		if (result != (full ? Z_STREAM_END : Z_OK)) {
+			fatal_error("deflate returned %d\n", result);
+		}
+		if (full && result == Z_STREAM_END) {
+			result = deflateReset(&output_stream);
+			if (result != Z_OK) {
+				fatal_error("deflateReset returned %d\n", result);
+			}
+		}
+		force = 0;
+	}
+	output_stream.next_in = buffer.data;
+	buffer.size = 0;
+}
+
+void event_state(uint32_t cycle, serialize_buffer *state)
+{
+	if (!fully_active) {
+		last = cycle;
+	}
+	uint8_t header[] = {
+		EVENT_STATE << 4, last >> 24, last >> 16, last >> 8, last,
+		last_word_address >> 16, last_word_address >> 8, last_word_address,
+		last_byte_address >> 8, last_byte_address,
+		state->size >> 16, state->size >> 8, state->size
+	};
+	uint8_t sent_system_start = 0;
+	for (int i = 0; i < num_remotes; i++)
+	{
+		if (!remotes[i].send_progress) {
+			if (send_all(remotes[i].sock, system_start, system_start_size, 0) == system_start_size) {
+				sent_system_start = 1;
+			} else {
+				socket_close(remotes[i].sock);
+				remotes[i] = remotes[num_remotes-1];
+				num_remotes--;
+				i--;
+			}
+		}
+	}
+	if (sent_system_start) {
+		if (fully_active) {
+			if (multi_count) {
+				finish_multi();
+			}
+			//full flush is needed so new and old clients can share a stream
+			deflate_flush(1);
+		}
+		save_buffer8(&buffer, header, sizeof(header));
+		save_buffer8(&buffer, state->data, state->size);
+		size_t old_compressed_size = output_stream.next_out - compressed;
+		deflate_flush(1);
+		size_t state_size = output_stream.next_out - compressed - old_compressed_size;
+		for (int i = 0; i < num_remotes; i++) {
+			if (!remotes[i].send_progress) {
+				if (send_all(remotes[i].sock, compressed + old_compressed_size, state_size, 0) == state_size) {
+					remotes[i].send_progress = compressed + old_compressed_size;
+					socket_blocking(remotes[i].sock, 0);
+					int flag = 1;
+					setsockopt(remotes[i].sock, IPPROTO_TCP, TCP_NODELAY, (const char *)&flag, sizeof(flag));
+					fully_active = 1;
+				} else {
+					socket_close(remotes[i].sock);
+					remotes[i] = remotes[num_remotes-1];
+					num_remotes--;
+					i--;
+				}
+			}
+		}
+		output_stream.next_out = compressed + old_compressed_size;
+		output_stream.avail_out = compressed_storage - old_compressed_size;
+	}
+}
+
+void event_flush(uint32_t cycle)
+{
+	if (!active) {
+		return;
+	}
+	if (fully_active) {
+		event_header(EVENT_FLUSH, cycle);
+		last = cycle;
+		
+		deflate_flush(0);
+	}
+	if (event_file) {
+		fwrite(compressed, 1, output_stream.next_out - compressed, event_file);
+		fflush(event_file);
+		output_stream.next_out = compressed;
+		output_stream.avail_out = compressed_storage;
+	} else if (listen_sock) {
+		flush_socket();
+		wrote_since_last_flush = 0;
+	}
+}
+
+void event_soft_flush(uint32_t cycle)
+{
+	if (!fully_active || wrote_since_last_flush || event_file) {
+		return;
+	}
+	event_header(EVENT_FLUSH, cycle);
+	last = cycle;
+	
+	deflate_flush(0);
+	flush_socket();
+}
+
+static void init_event_reader_common(event_reader *reader)
+{
+	reader->last_cycle = 0;
+	reader->repeat_event = 0xFF;
+	reader->storage = 512 * 1024;
+	init_deserialize(&reader->buffer, malloc(reader->storage), reader->storage);
+	reader->buffer.size = 0;
+	memset(&reader->input_stream, 0, sizeof(reader->input_stream));
+	
+}
+
+void init_event_reader(event_reader *reader, uint8_t *data, size_t size)
+{
+	reader->socket = 0;
+	reader->last_cycle = 0;
+	reader->repeat_event = 0xFF;
+	init_event_reader_common(reader);
+	uint8_t name_len = data[1];
+	reader->buffer.size = name_len + 2;
+	memcpy(reader->buffer.data, data, reader->buffer.size);
+	reader->input_stream.next_in = data + reader->buffer.size;
+	reader->input_stream.avail_in = size - reader->buffer.size;
+	
+	int result = inflateInit(&reader->input_stream);
+	if (Z_OK != result) {
+		fatal_error("inflateInit returned %d\n", result);
+	}
+	reader->input_stream.next_out = reader->buffer.data + reader->buffer.size;
+	reader->input_stream.avail_out = reader->storage - reader->buffer.size;
+	result = inflate(&reader->input_stream, Z_NO_FLUSH);
+	if (Z_OK != result && Z_STREAM_END != result) {
+		fatal_error("inflate returned %d\n", result);
+	}
+	reader->buffer.size = reader->input_stream.next_out - reader->buffer.data;
+}
+
+void init_event_reader_tcp(event_reader *reader, char *address, char *port)
+{
+	struct addrinfo request, *result;
+	socket_init();
+	memset(&request, 0, sizeof(request));
+	request.ai_family = AF_INET;
+	request.ai_socktype = SOCK_STREAM;
+	request.ai_flags = AI_PASSIVE;
+	getaddrinfo(address, port, &request, &result);
+	
+	reader->socket = socket(result->ai_family, result->ai_socktype, result->ai_protocol);
+	if (reader->socket < 0) {
+		fatal_error("Failed to create socket for event log connection to %s:%s\n", address, port);
+	}
+	if (connect(reader->socket, result->ai_addr, result->ai_addrlen) < 0) {
+		fatal_error("Failed to connect to %s:%s for event log stream\n", address, port);
+	}
+	
+	init_event_reader_common(reader);
+	reader->socket_buffer_size = 256 * 1024;
+	reader->socket_buffer = malloc(reader->socket_buffer_size);
+	
+	while(reader->buffer.size < 3 || reader->buffer.size < 3 + reader->buffer.data[2])
+	{
+		int bytes = recv(reader->socket, reader->buffer.data + reader->buffer.size, reader->storage - reader->buffer.size, 0);
+		if (bytes < 0) {
+			fatal_error("Failed to receive system init from %s:%s\n", address, port);
+		}
+		reader->buffer.size += bytes;
+	}
+	size_t init_msg_len = 3 + reader->buffer.data[2];
+	memcpy(reader->socket_buffer, reader->buffer.data + init_msg_len, reader->buffer.size - init_msg_len);
+	reader->input_stream.next_in = reader->socket_buffer;
+	reader->input_stream.avail_in = reader->buffer.size - init_msg_len;
+	reader->buffer.size = init_msg_len;
+	int res = inflateInit(&reader->input_stream);
+	if (Z_OK != res) {
+		fatal_error("inflateInit returned %d\n", res);
+	}
+	reader->input_stream.next_out = reader->buffer.data + init_msg_len;
+	reader->input_stream.avail_out = reader->storage - init_msg_len;
+	res = inflate(&reader->input_stream, Z_NO_FLUSH);
+	if (Z_OK != res && Z_BUF_ERROR != res) {
+		fatal_error("inflate returned %d in init_event_reader_tcp\n", res);
+	}
+	int flag = 1;
+	setsockopt(reader->socket, IPPROTO_TCP, TCP_NODELAY, (const char *)&flag, sizeof(flag));
+}
+
+static void read_from_socket(event_reader *reader)
+{
+	if (reader->socket_buffer_size - reader->input_stream.avail_in < 128 * 1024) {
+		reader->socket_buffer_size *= 2;
+		uint8_t *new_buf = malloc(reader->socket_buffer_size);
+		memcpy(new_buf, reader->input_stream.next_in, reader->input_stream.avail_in);
+		free(reader->socket_buffer);
+		reader->socket_buffer = new_buf;
+		reader->input_stream.next_in = new_buf;
+	} else if (
+		reader->input_stream.next_in - reader->socket_buffer >= reader->input_stream.avail_in 
+		&& reader->input_stream.next_in - reader->socket_buffer + reader->input_stream.avail_in >= reader->socket_buffer_size/2
+	) {
+		memmove(reader->socket_buffer, reader->input_stream.next_in, reader->input_stream.avail_in);
+		reader->input_stream.next_in = reader->socket_buffer;
+	}
+	uint8_t *space_start = reader->input_stream.next_in + reader->input_stream.avail_in;
+	size_t space = (reader->socket_buffer + reader->socket_buffer_size) - space_start;
+	int bytes = recv(reader->socket, space_start, space, 0);
+	if (bytes >= 0) {
+		reader->input_stream.avail_in += bytes;
+	} else if (!socket_error_is_wouldblock()) {
+		fatal_error("Connection closed, error = %X\n", socket_last_error());
+	}
+}
+
+static void inflate_flush(event_reader *reader)
+{
+	if (reader->buffer.cur_pos > reader->storage / 2) {
+		memmove(reader->buffer.data, reader->buffer.data + reader->buffer.cur_pos, reader->buffer.size - reader->buffer.cur_pos);
+		reader->buffer.size -= reader->buffer.cur_pos;
+		reader->buffer.cur_pos = 0;
+		reader->input_stream.next_out = reader->buffer.data + reader->buffer.size;
+		reader->input_stream.avail_out = reader->storage - reader->buffer.size;
+	}
+	int result = inflate(&reader->input_stream, Z_SYNC_FLUSH);
+	if (Z_OK != result && Z_STREAM_END != result) {
+		fatal_error("inflate returned %d\n", result);
+	}
+	reader->buffer.size = reader->input_stream.next_out - reader->buffer.data;
+	if (result == Z_STREAM_END && (reader->socket || reader->input_stream.avail_in)) {
+		inflateReset(&reader->input_stream);
+		if (reader->input_stream.avail_in) {
+			inflate_flush(reader);
+		}
+	}
+	
+}
+
+void reader_ensure_data(event_reader *reader, size_t bytes)
+{
+	if (reader->buffer.size - reader->buffer.cur_pos < bytes) {
+		if (reader->input_stream.avail_in) {
+			inflate_flush(reader);
+		}
+		if (reader->socket) {
+			while (reader->buffer.size - reader->buffer.cur_pos < bytes) {
+				read_from_socket(reader);
+				inflate_flush(reader);
+			}
+		}
+	}
+}
+
+uint8_t reader_next_event(event_reader *reader, uint32_t *cycle_out)
+{
+	if (reader->repeat_remaining) {
+		reader->repeat_remaining--;
+		*cycle_out = reader->last_cycle + reader->repeat_delta;
+		reader->last_cycle = *cycle_out;
+		return reader->repeat_event;
+	}
+	reader_ensure_data(reader, 1);
+	uint8_t header = load_int8(&reader->buffer);
+	uint8_t ret;
+	uint32_t delta;
+	uint8_t multi_start = 0;
+	if ((header & 0xF0) == (EVENT_MULTI << 4)) {
+		reader->repeat_remaining = (header & 0xF) + 1;
+		multi_start = 1;
+		reader_ensure_data(reader, 1);
+		header = load_int8(&reader->buffer);
+	}
+	if ((header & 0xF0) < FORMAT_3BYTE) {
+		delta = (header & 0xF) + 16;
+		ret = header >> 4;
+	} else if ((header & 0xF0) == FORMAT_3BYTE) {
+		reader_ensure_data(reader, 2);
+		delta = load_int16(&reader->buffer);
+		ret = header & 0xF;
+	} else {
+		reader_ensure_data(reader, 3);
+		delta = load_int8(&reader->buffer) << 16;
+		//sign extend 24-bit delta to 32-bit
+		if (delta & 0x800000) {
+			delta |= 0xFF000000;
+		}
+		delta |= load_int16(&reader->buffer);
+		ret = header & 0xF;
+	}
+	if (multi_start) {
+		reader->repeat_event = ret;
+		reader->repeat_delta = delta;
+	}
+	*cycle_out = reader->last_cycle + delta;
+	reader->last_cycle = *cycle_out;
+	if (ret == EVENT_ADJUST) {
+		reader_ensure_data(reader, 4);
+		size_t old_pos = reader->buffer.cur_pos;
+		uint32_t adjust = load_int32(&reader->buffer);
+		reader->buffer.cur_pos = old_pos;
+		reader->last_cycle -= adjust;
+	} else if (ret == EVENT_STATE) {
+		reader_ensure_data(reader, 8);
+		reader->last_cycle = load_int32(&reader->buffer);
+		reader->last_word_address = load_int8(&reader->buffer) << 16;
+		reader->last_word_address |= load_int16(&reader->buffer);
+		reader->last_byte_address = load_int16(&reader->buffer);
+	}
+	return ret;
+}
+
+uint8_t reader_system_type(event_reader *reader)
+{
+	return load_int8(&reader->buffer);
+}
+
+void reader_send_gamepad_event(event_reader *reader, uint8_t pad, uint8_t button, uint8_t down)
+{
+	uint8_t buffer[] = {down ? CMD_GAMEPAD_DOWN : CMD_GAMEPAD_UP, pad << 5 | button};
+	//TODO: Deal with the fact that we're not in blocking mode so this may not actually send all
+	//if the buffer is full
+	send_all(reader->socket, buffer, sizeof(buffer), 0);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/event_log.h	Sat Jan 15 13:15:21 2022 -0800
@@ -0,0 +1,60 @@
+#ifndef EVENT_LOG_H_
+#define EVENT_LOG_H_
+
+enum {
+	EVENT_FLUSH = 0,
+	EVENT_ADJUST = 1,
+	EVENT_PSG_REG = 2,
+	EVENT_YM_REG = 3,
+	EVENT_VDP_REG = 4,
+	EVENT_VRAM_BYTE = 5,
+	EVENT_VRAM_BYTE_DELTA = 6,
+	EVENT_VRAM_BYTE_ONE = 7,
+	EVENT_VRAM_BYTE_AUTO = 8,
+	EVENT_VRAM_WORD = 9,
+	EVENT_VRAM_WORD_DELTA = 10,
+	EVENT_VDP_INTRAM = 11,
+	EVENT_STATE = 12,
+	EVENT_MULTI = 13
+	//14 and 15 are reserved for header types
+};
+
+#include "serialize.h"
+#include "zlib/zlib.h"
+typedef struct {
+	size_t storage;
+	uint8_t *socket_buffer;
+	size_t socket_buffer_size;
+	int socket;
+	uint32_t last_cycle;
+	uint32_t last_word_address;
+	uint32_t last_byte_address;
+	uint32_t repeat_delta;
+	deserialize_buffer buffer;
+	z_stream input_stream;
+	uint8_t repeat_event;
+	uint8_t repeat_remaining;
+} event_reader;
+
+#include "system.h"
+#include "render.h"
+
+void event_log_file(char *fname);
+void event_log_tcp(char *address, char *port);
+void event_system_start(system_type stype, vid_std video_std, char *name);
+void event_cycle_adjust(uint32_t cycle, uint32_t deduction);
+void event_log(uint8_t type, uint32_t cycle, uint8_t size, uint8_t *payload);
+void event_vram_word(uint32_t cycle, uint32_t address, uint16_t value);
+void event_vram_byte(uint32_t cycle, uint16_t address, uint8_t byte, uint8_t auto_inc);
+void event_state(uint32_t cycle, serialize_buffer *state);
+void event_flush(uint32_t cycle);
+void event_soft_flush(uint32_t cycle);
+
+void init_event_reader(event_reader *reader, uint8_t *data, size_t size);
+void init_event_reader_tcp(event_reader *reader, char *address, char *port);
+uint8_t reader_next_event(event_reader *reader, uint32_t *cycle_out);
+void reader_ensure_data(event_reader *reader, size_t bytes);
+uint8_t reader_system_type(event_reader *reader);
+void reader_send_gamepad_event(event_reader *reader, uint8_t pad, uint8_t button, uint8_t down);
+
+#endif //EVENT_LOG_H_
--- a/gamecontrollerdb.txt	Sat Jan 05 00:58:08 2019 -0800
+++ b/gamecontrollerdb.txt	Sat Jan 15 13:15:21 2022 -0800
@@ -132,7 +132,6 @@
 03000000380700006382000000000000,MLG GamePad PS3 Controller,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b0,y:b3,platform:Windows,
 03000000efbe0000edfe000000000000,Monect Virtual Controller,a:b2,b:b1,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:a5,rightx:a3,righty:a4,start:b9,x:b3,y:b0,platform:Windows,
 03000000250900006688000000000000,MP-8866 Super Dual Box,a:b2,b:b1,back:b9,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b6,leftstick:b10,lefttrigger:b4,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b11,righttrigger:b5,rightx:a2,righty:a3,start:b8,x:b3,y:b0,platform:Windows,
-030000001008000001e5000000000000,NEXT SNES Controller,a:b2,b:b1,back:b8,dpdown:+a1,dpleft:-a0,dpright:+a0,dpup:-a1,leftshoulder:b4,rightshoulder:b6,start:b9,x:b3,y:b0,platform:Windows,
 03000000152000000182000000000000,NGDS,a:b2,b:b1,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a3,righty:a4,start:b9,x:b3,y:b0,platform:Windows,
 03000000bd12000015d0000000000000,Nintendo Retrolink USB Super SNES Classic Controller,a:b2,b:b1,back:b8,leftshoulder:b4,leftx:a0,lefty:a1,rightshoulder:b5,start:b9,x:b3,y:b0,platform:Windows,
 030000007e0500000920000000000000,Nintendo Switch Pro Controller,a:b0,b:b1,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b2,y:b3,platform:Windows,
@@ -186,7 +185,6 @@
 030000000d0f00002200000000000000,REAL ARCADE Pro.V3,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b0,y:b3,platform:Windows,
 030000000d0f00005b00000000000000,Real Arcade Pro.V4,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a5,start:b9,x:b0,y:b3,platform:Windows,
 030000000d0f00005c00000000000000,Real Arcade Pro.V4,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b0,y:b3,platform:Windows,
-03000000790000001100000000000000,Retrolink SNES Controller,a:b2,b:b1,back:b8,dpdown:+a4,dpleft:-a3,dpright:+a3,dpup:-a4,leftshoulder:b4,rightshoulder:b5,start:b9,x:b3,y:b0,platform:Windows,
 0300000000f000000300000000000000,RetroUSB.com RetroPad,a:b1,b:b5,back:b2,leftshoulder:b6,leftx:a0,lefty:a1,rightshoulder:b7,start:b3,x:b0,y:b4,platform:Windows,
 0300000000f00000f100000000000000,RetroUSB.com Super RetroPort,a:b1,b:b5,back:b2,leftshoulder:b6,leftx:a0,lefty:a1,rightshoulder:b7,start:b3,x:b0,y:b4,platform:Windows,
 030000006b140000010d000000000000,Revolution Pro Controller,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:a3,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:a4,rightx:a2,righty:a5,start:b9,x:b0,y:b3,platform:Windows,
@@ -287,7 +285,6 @@
 0300000025090000e803000000000000,Mayflash Wii Classic Controller,a:b1,b:b0,back:b8,dpdown:b13,dpleft:b12,dpright:b14,dpup:b11,guide:b10,leftshoulder:b4,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b3,y:b2,platform:Mac OS X,
 03000000790000000018000000000000,Mayflash WiiU Pro Game Controller Adapter (DInput),a:b4,b:b8,back:b32,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b16,leftstick:b40,lefttrigger:b24,leftx:a0,lefty:a4,rightshoulder:b20,rightstick:b44,righttrigger:b28,rightx:a8,righty:a12,start:b36,x:b0,y:b12,platform:Mac OS X,
 03000000d8140000cecf000000000000,MC Cthulhu,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b4,lefttrigger:b6,rightshoulder:b5,righttrigger:b7,start:b9,x:b0,y:b3,platform:Mac OS X,
-030000001008000001e5000006010000,NEXT SNES Controller,a:b2,b:b1,back:b8,dpdown:+a1,dpleft:-a0,dpright:+a0,dpup:-a1,leftshoulder:b4,rightshoulder:b6,start:b9,x:b3,y:b0,platform:Mac OS X,
 030000007e0500000920000000000000,Nintendo Switch Pro Controller,a:b0,b:b1,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b2,y:b3,platform:Mac OS X,
 030000008f0e00000300000000000000,Piranha xtreme,a:b2,b:b1,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b6,leftstick:b10,lefttrigger:b4,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b11,righttrigger:b5,rightx:a3,righty:a2,start:b9,x:b3,y:b0,platform:Mac OS X,
 03000000d62000006dca000000010000,PowerA Pro Ex,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b0,y:b3,platform:Mac OS X,
@@ -303,7 +300,6 @@
 03000000321500000010000000010000,Razer RAIJU,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:a3,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:a4,rightx:a2,righty:a5,start:b9,x:b0,y:b3,platform:Mac OS X,
 0300000032150000030a000000000000,Razer Wildcat,a:b0,b:b1,back:b9,dpdown:b12,dpleft:b13,dpright:b14,dpup:b11,guide:b10,leftshoulder:b4,leftstick:b6,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b7,righttrigger:a5,rightx:a3,righty:a4,start:b8,x:b2,y:b3,platform:Mac OS X,
 03000000790000001100000000000000,Retrolink Classic Controller,a:b2,b:b1,back:b8,leftshoulder:b4,leftx:a3,lefty:a4,rightshoulder:b5,start:b9,x:b3,y:b0,platform:Mac OS X,
-03000000790000001100000006010000,Retrolink SNES Controller,a:b2,b:b1,back:b8,dpdown:+a4,dpleft:-a3,dpright:+a3,dpup:-a4,leftshoulder:b4,rightshoulder:b5,start:b9,x:b3,y:b0,platform:Mac OS X,
 030000006b140000010d000000010000,Revolution Pro Controller,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:a3,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:a4,rightx:a2,righty:a5,start:b9,x:b0,y:b3,platform:Mac OS X,
 03000000c6240000fefa000000000000,Rock Candy Gamepad for PS3,a:b0,b:b1,back:b9,dpdown:b12,dpleft:b13,dpright:b14,dpup:b11,guide:b10,leftshoulder:b4,leftstick:b6,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b7,righttrigger:a5,rightx:a3,righty:a4,start:b8,x:b2,y:b3,platform:Mac OS X,
 03000000811700007e05000000000000,Sega Saturn,a:b2,b:b4,dpdown:b16,dpleft:b15,dpright:b14,dpup:b17,leftshoulder:b8,lefttrigger:a5,leftx:a0,lefty:a2,rightshoulder:b9,righttrigger:a4,start:b13,x:b0,y:b6,platform:Mac OS X,
@@ -435,7 +431,6 @@
 05000000d6200000ad0d000001000000,Moga Pro,a:b0,b:b1,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b4,leftstick:b7,lefttrigger:a5,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b8,righttrigger:a4,rightx:a2,righty:a3,start:b6,x:b2,y:b3,platform:Linux,
 03000000250900006688000000010000,MP-8866 Super Dual Box,a:b2,b:b1,back:b9,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,leftshoulder:b6,leftstick:b10,lefttrigger:b4,leftx:a0,lefty:a1,rightshoulder:b7,rightstick:b11,righttrigger:b5,rightx:a2,righty:a3,start:b8,x:b3,y:b0,platform:Linux,
 030000000d0f00000900000010010000,Natec Genesis P44,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b0,y:b3,platform:Linux,
-030000001008000001e5000010010000,NEXT SNES Controller,a:b2,b:b1,back:b8,dpdown:+a1,dpleft:-a0,dpright:+a0,dpup:-a1,leftshoulder:b4,rightshoulder:b6,start:b9,x:b3,y:b0,platform:Linux,
 050000007e0500000920000001000000,Nintendo Switch Pro Controller,a:b0,b:b1,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b2,y:b3,platform:Linux,
 050000007e0500003003000001000000,Nintendo Wii Remote Pro Controller,a:b0,b:b1,back:b8,dpdown:b14,dpleft:b15,dpright:b16,dpup:b13,guide:b10,leftshoulder:b4,leftstick:b11,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b12,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b3,y:b2,platform:Linux,
 05000000010000000100000003000000,Nintendo Wiimote,a:b0,b:b1,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b10,leftshoulder:b4,leftstick:b11,lefttrigger:b6,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b12,righttrigger:b7,rightx:a2,righty:a3,start:b9,x:b2,y:b3,platform:Linux,
@@ -487,7 +482,6 @@
 03000000321500000009000011010000,Razer Serval,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b8,leftshoulder:b4,leftstick:b9,lefttrigger:a5,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b10,righttrigger:a4,rightx:a2,righty:a3,start:b7,x:b2,y:b3,platform:Linux,
 050000003215000000090000163a0000,Razer Serval,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b8,leftshoulder:b4,leftstick:b9,lefttrigger:a5,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b10,righttrigger:a4,rightx:a2,righty:a3,start:b7,x:b2,y:b3,platform:Linux,
 0300000032150000030a000001010000,Razer Wildcat,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b8,leftshoulder:b4,leftstick:b9,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b10,righttrigger:a5,rightx:a3,righty:a4,start:b7,x:b2,y:b3,platform:Linux,
-03000000790000001100000010010000,Retrolink SNES Controller,a:b2,b:b1,back:b8,dpdown:+a1,dpleft:-a0,dpright:+a0,dpup:-a1,leftshoulder:b4,rightshoulder:b5,start:b9,x:b3,y:b0,platform:Linux,
 0300000000f000000300000000010000,RetroPad,a:b1,b:b5,back:b2,leftshoulder:b6,leftx:a0,lefty:a1,rightshoulder:b7,start:b3,x:b0,y:b4,platform:Linux,
 030000006b140000010d000011010000,Revolution Pro Controller,a:b1,b:b2,back:b8,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b12,leftshoulder:b4,leftstick:b10,lefttrigger:a3,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b11,righttrigger:a4,rightx:a2,righty:a5,start:b9,x:b0,y:b3,platform:Linux,
 030000006f0e00001f01000000010000,Rock Candy,a:b0,b:b1,back:b6,dpdown:h0.4,dpleft:h0.8,dpright:h0.2,dpup:h0.1,guide:b8,leftshoulder:b4,leftstick:b9,lefttrigger:a2,leftx:a0,lefty:a1,rightshoulder:b5,rightstick:b10,righttrigger:a5,rightx:a3,righty:a4,start:b7,x:b2,y:b3,platform:Linux,
--- a/gdb_remote.c	Sat Jan 05 00:58:08 2019 -0800
+++ b/gdb_remote.c	Sat Jan 15 13:15:21 2022 -0800
@@ -18,13 +18,13 @@
 #define GDB_OUT_FD STDOUT_FILENO
 #define GDB_READ read
 #define GDB_WRITE write
+#include <unistd.h>
 #endif
 
 #include "gdb_remote.h"
 #include "68kinst.h"
 #include "debug.h"
 #include "util.h"
-#include <unistd.h>
 #include <fcntl.h>
 #include <stddef.h>
 #include <stdlib.h>
@@ -132,25 +132,13 @@
 	}
 }
 
-uint8_t read_byte(m68k_context * context, uint32_t address)
+static uint8_t m68k_read_byte(m68k_context *context, uint32_t address)
 {
-	
-	genesis_context *gen = context->system;
-	//TODO: Use generated read/write functions to support access to hardware that is not ROM or RAM
-	uint16_t * word = get_native_pointer(address & 0xFFFFFFFE, (void **)context->mem_pointers, &context->options->gen);
-	if (word) {	
-	if (address & 1) {
-		return *word;
-	}
-	return *word >> 8;
-}
-	if (address >= 0xA00000 && address < 0xA04000) {
-		return gen->zram[address & 0x1FFF];
-	}
-	return 0;
+	//TODO: share this implementation with builtin debugger
+	return read_byte(address, (void **)context->mem_pointers, &context->options->gen, context);
 }
 
-void write_byte(m68k_context * context, uint32_t address, uint8_t value)
+void m68k_write_byte(m68k_context * context, uint32_t address, uint8_t value)
 {
 	genesis_context *gen = context->system;
 	//TODO: Use generated read/write functions so that memory map is properly respected
@@ -170,7 +158,7 @@
 	if (address >= 0xA00000 && address < 0xA04000) {
 		gen->zram[address & 0x1FFF] = value;
 		genesis_context * gen = context->system;
-#ifndef NO_Z80
+#if !defined(NO_Z80) && !defined(NEW_CORE)
 		z80_handle_code_write(address & 0x1FFF, gen->z80);
 #endif
 		return;
@@ -305,7 +293,7 @@
 		char *cur = send_buf;
 		while (size)
 		{
-			hex_8(read_byte(context, address), cur);
+			hex_8(m68k_read_byte(context, address), cur);
 			cur += 2;
 			address++;
 			size--;
@@ -326,7 +314,7 @@
 			tmp[0] = *(cur++);
 			tmp[1] = *(cur++);
 			tmp[2] = 0;
-			write_byte(context, address, strtoul(tmp, NULL, 16));
+			m68k_write_byte(context, address, strtoul(tmp, NULL, 16));
 			address++;
 			size--;
 		}
@@ -401,6 +389,10 @@
 			gdb_send_command("m1");
 		} else if (!strcmp("sThreadInfo", command + 1)) {
 			gdb_send_command("l");
+		} else if (!memcmp("ThreadExtraInfo", command+1, strlen("ThreadExtraInfo"))) {
+			gdb_send_command("");
+		} else if (command[1] == 'P') {
+			gdb_send_command("");
 		} else {
 			goto not_impl;
 		}
@@ -558,21 +550,13 @@
 	}
 }
 
-#ifdef _WIN32
-void gdb_cleanup(void)
-{
-	WSACleanup();
-}
-WSADATA wsa_data;
-#endif
-
 void gdb_remote_init(void)
 {
 	buf = malloc(INITIAL_BUFFER_SIZE);
 	curbuf = NULL;
 	bufsize = INITIAL_BUFFER_SIZE;
 #ifdef _WIN32
-	WSAStartup(MAKEWORD(2,2), &wsa_data);
+	socket_init();
 
 	struct addrinfo request, *result;
 	memset(&request, 0, sizeof(request));
@@ -588,6 +572,7 @@
 	if (bind(listen_sock, result->ai_addr, result->ai_addrlen) < 0) {
 		fatal_error("Failed to bind GDB remote debugging socket");
 	}
+	freeaddrinfo(result);
 	if (listen(listen_sock, 1) < 0) {
 		fatal_error("Failed to listen on GDB remote debugging socket");
 	}
@@ -595,6 +580,8 @@
 	if (gdb_sock < 0) {
 		fatal_error("accept returned an error while listening on GDB remote debugging socket");
 	}
-	closesocket(listen_sock);
+	socket_close(listen_sock);
+#else
+	disable_stdout_messages();
 #endif
 }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gen_player.c	Sat Jan 15 13:15:21 2022 -0800
@@ -0,0 +1,175 @@
+#include <stdlib.h>
+#include "gen_player.h"
+#include "event_log.h"
+#include "render.h"
+
+#define MCLKS_NTSC 53693175
+#define MCLKS_PAL  53203395
+#define MCLKS_PER_YM  7
+#define MCLKS_PER_Z80 15
+#define MCLKS_PER_PSG (MCLKS_PER_Z80*16)
+
+#ifdef IS_LIB
+#define MAX_SOUND_CYCLES (MCLKS_PER_YM*NUM_OPERATORS*6*4)
+#else
+#define MAX_SOUND_CYCLES 100000	
+#endif
+
+static void sync_sound(gen_player *gen, uint32_t target)
+{
+	//printf("YM | Cycle: %d, bpos: %d, PSG | Cycle: %d, bpos: %d\n", gen->ym->current_cycle, gen->ym->buffer_pos, gen->psg->cycles, gen->psg->buffer_pos * 2);
+	while (target > gen->psg->cycles && target - gen->psg->cycles > MAX_SOUND_CYCLES) {
+		uint32_t cur_target = gen->psg->cycles + MAX_SOUND_CYCLES;
+		//printf("Running PSG to cycle %d\n", cur_target);
+		psg_run(gen->psg, cur_target);
+		//printf("Running YM-2612 to cycle %d\n", cur_target);
+		ym_run(gen->ym, cur_target);
+	}
+	psg_run(gen->psg, target);
+	ym_run(gen->ym, target);
+
+	//printf("Target: %d, YM bufferpos: %d, PSG bufferpos: %d\n", target, gen->ym->buffer_pos, gen->psg->buffer_pos * 2);
+}
+
+static void run(gen_player *player)
+{
+	while(player->reader.socket || player->reader.buffer.cur_pos < player->reader.buffer.size)
+	{
+		uint32_t cycle;
+		uint8_t event = reader_next_event(&player->reader, &cycle);
+		switch (event)
+		{
+		case EVENT_FLUSH:
+			sync_sound(player, cycle);
+			vdp_run_context(player->vdp, cycle);
+			break;
+		case EVENT_ADJUST: {
+			sync_sound(player, cycle);
+			vdp_run_context(player->vdp, cycle);
+			uint32_t deduction = load_int32(&player->reader.buffer);
+			ym_adjust_cycles(player->ym, deduction);
+			vdp_adjust_cycles(player->vdp, deduction);
+			player->psg->cycles -= deduction;
+			break;
+		case EVENT_PSG_REG:
+			sync_sound(player, cycle);
+			reader_ensure_data(&player->reader, 1);
+			psg_write(player->psg, load_int8(&player->reader.buffer));
+			break;
+		case EVENT_YM_REG: {
+			sync_sound(player, cycle);
+			reader_ensure_data(&player->reader, 3);
+			uint8_t part = load_int8(&player->reader.buffer);
+			uint8_t reg = load_int8(&player->reader.buffer);
+			uint8_t value = load_int8(&player->reader.buffer);
+			if (part) {
+				ym_address_write_part2(player->ym, reg);
+			} else {
+				ym_address_write_part1(player->ym, reg);
+			}
+			ym_data_write(player->ym, value);
+			break;
+		case EVENT_STATE: {
+			reader_ensure_data(&player->reader, 3);
+			uint32_t size = load_int8(&player->reader.buffer) << 16;
+			size |= load_int16(&player->reader.buffer);
+			reader_ensure_data(&player->reader, size);
+			deserialize_buffer buffer;
+			init_deserialize(&buffer, player->reader.buffer.data + player->reader.buffer.cur_pos, size);
+			register_section_handler(&buffer, (section_handler){.fun = vdp_deserialize, .data = player->vdp}, SECTION_VDP);
+			register_section_handler(&buffer, (section_handler){.fun = ym_deserialize, .data = player->ym}, SECTION_YM2612);
+			register_section_handler(&buffer, (section_handler){.fun = psg_deserialize, .data = player->psg}, SECTION_PSG);
+			while (buffer.cur_pos < buffer.size)
+			{
+				load_section(&buffer);
+			}
+			player->reader.buffer.cur_pos += size;
+			free(buffer.handlers);
+			break;
+		}
+		default:
+			vdp_run_context(player->vdp, cycle);
+			vdp_replay_event(player->vdp, event, &player->reader);
+		}
+		}
+			
+		}
+		if (!player->reader.socket) {
+			reader_ensure_data(&player->reader, 1);
+		}
+	}
+}
+
+static int thread_main(void *player)
+{
+	run(player);
+	return 0;
+}
+
+void start_context(system_header *sys, char *statefile)
+{
+	gen_player *player = (gen_player *)sys;
+	if (player->reader.socket) {
+#ifndef IS_LIB
+		render_create_thread(&player->thread, "player", thread_main, player);
+#endif
+	} else {
+		run(player);
+	}
+}
+
+static void gamepad_down(system_header *system, uint8_t gamepad_num, uint8_t button)
+{
+	gen_player *player = (gen_player *)system;
+	reader_send_gamepad_event(&player->reader, gamepad_num, button, 1);
+}
+
+static void gamepad_up(system_header *system, uint8_t gamepad_num, uint8_t button)
+{
+	gen_player *player = (gen_player *)system;
+	reader_send_gamepad_event(&player->reader, gamepad_num, button, 0);
+}
+
+static void config_common(gen_player *player)
+{
+	uint8_t vid_std = load_int8(&player->reader.buffer);
+	uint8_t name_len = load_int8(&player->reader.buffer);
+	player->header.info.name = calloc(1, name_len + 1);
+	load_buffer8(&player->reader.buffer, player->header.info.name, name_len);
+	
+	player->vdp = init_vdp_context(vid_std == VID_PAL, 0);
+	render_set_video_standard(vid_std);
+	uint32_t master_clock = vid_std == VID_NTSC ? MCLKS_NTSC : MCLKS_PAL;
+	
+	player->ym = malloc(sizeof(ym2612_context));
+	ym_init(player->ym, master_clock, MCLKS_PER_YM, 0);
+	
+	player->psg = malloc(sizeof(psg_context));
+	psg_init(player->psg, master_clock, MCLKS_PER_PSG);
+	
+	player->header.start_context = start_context;
+	player->header.gamepad_down = gamepad_down;
+	player->header.gamepad_up = gamepad_up;
+	player->header.type = SYSTEM_GENESIS_PLAYER;
+	player->header.info.save_type = SAVE_NONE;
+}
+
+gen_player *alloc_config_gen_player(void *stream, uint32_t rom_size)
+{
+	uint8_t *data = stream;
+	gen_player *player = calloc(1, sizeof(gen_player));
+	init_event_reader(&player->reader, data + 9, rom_size - 9);
+	config_common(player);
+	return player;
+}
+
+gen_player *alloc_config_gen_player_reader(event_reader *reader)
+{
+	gen_player *player = calloc(1, sizeof(gen_player));
+	player->reader = *reader;
+	inflateCopy(&player->reader.input_stream, &reader->input_stream);
+	render_set_external_sync(1);
+	config_common(player);
+	return player;
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gen_player.h	Sat Jan 15 13:15:21 2022 -0800
@@ -0,0 +1,26 @@
+#ifndef GEN_PLAYER_H_
+#define GEN_PLAYER_H_
+
+#include "render.h"
+#include "system.h"
+#include "vdp.h"
+#include "psg.h"
+#include "ym2612.h"
+#include "event_log.h"
+
+typedef struct {
+	system_header   header;
+	
+	vdp_context     *vdp;
+	ym2612_context  *ym;
+	psg_context     *psg;
+#ifndef IS_LIB
+	render_thread   thread;
+#endif
+	event_reader    reader;
+} gen_player;
+
+gen_player *alloc_config_gen_player(void *stream, uint32_t rom_size);
+gen_player *alloc_config_gen_player_reader(event_reader *reader);
+
+#endif //GEN_PLAYER_H_
--- a/gen_x86.c	Sat Jan 05 00:58:08 2019 -0800
+++ b/gen_x86.c	Sat Jan 15 13:15:21 2022 -0800
@@ -130,7 +130,7 @@
 	X86_R13,
 	X86_R14,
 	X86_R15
-} x86_regs_enc;
+};
 
 char * x86_reg_names[] = {
 #ifdef X86_64
@@ -171,7 +171,7 @@
 };
 
 #ifdef X86_64
-#define CHECK_DISP(disp) (disp <= 0x7FFFFFFF && disp >= -2147483648)
+#define CHECK_DISP(disp) (disp <= ((ptrdiff_t)INT32_MAX) && disp >= ((ptrdiff_t)INT32_MIN))
 #else
 #define CHECK_DISP(disp) 1
 #endif
@@ -1261,7 +1261,7 @@
 	check_alloc_code(code, 14);
 	code_ptr out = code->cur;
 	uint8_t sign_extend = 0;
-	if (size == SZ_Q && val <= 0x7FFFFFFF && val >= -2147483648) {
+	if (size == SZ_Q && val <= ((int64_t)INT32_MAX) && val >= ((int64_t)INT32_MIN)) {
 		sign_extend = 1;
 	}
 	if (size == SZ_W) {
@@ -2111,7 +2111,12 @@
 	}
 #ifdef X86_64
 	uint32_t stack_args = 0;
+#ifdef _WIN32
+	//Microsoft is too good for the ABI that everyone else uses on x86-64 apparently
+	uint8_t abi_regs[] = {RCX, RDX, R8, R9};
+#else
 	uint8_t abi_regs[] = {RDI, RSI, RDX, RCX, R8, R9};
+#endif
 	int8_t reg_swap[R15+1];
 	uint32_t usage = 0;
 	memset(reg_swap, -1, sizeof(reg_swap));
@@ -2153,6 +2158,11 @@
 		push_r(code, arg_arr[i]);
 	}
 	free(arg_arr);
+#if defined(X86_64) && defined(_WIN32)
+	sub_ir(code, 32, RSP, SZ_PTR);
+	code->stack_off += 32;
+	adjust += 32;
+#endif
 	
 	return stack_args * sizeof(void *) + adjust;
 }
@@ -2218,7 +2228,8 @@
 	push_r(code, R13);
 	push_r(code, R14);
 	push_r(code, R15);
-#else
+#endif
+#if !defined(X86_64) || defined(_WIN32)
 	push_r(code, RDI);
 	push_r(code, RSI);
 #endif
@@ -2226,14 +2237,15 @@
 
 void restore_callee_save_regs(code_info *code)
 {
+#if !defined(X86_64) || defined(_WIN32)
+	pop_r(code, RSI);
+	pop_r(code, RDI);
+#endif
 #ifdef X86_64
 	pop_r(code, R15);
 	pop_r(code, R14);
 	pop_r(code, R13);
 	pop_r(code, R12);
-#else
-	pop_r(code, RSI);
-	pop_r(code, RDI);
 #endif
 	pop_r(code, RBP);
 	pop_r(code, RBX);
--- a/gen_x86.h	Sat Jan 05 00:58:08 2019 -0800
+++ b/gen_x86.h	Sat Jan 15 13:15:21 2022 -0800
@@ -30,7 +30,7 @@
 	R13,
 	R14,
 	R15
-} x86_regs;
+};
 
 enum {
 	CC_O = 0,
@@ -51,18 +51,25 @@
 	CC_GE,
 	CC_LE,
 	CC_G
-} x86_cc;
+};
 
 enum {
 	SZ_B = 0,
 	SZ_W,
 	SZ_D,
 	SZ_Q
-} x86_size;
+};
 
 #ifdef X86_64
 #define SZ_PTR SZ_Q
 #define MAX_INST_LEN 14
+#ifdef _WIN32
+#define FIRST_ARG_REG RCX
+#define SECOND_ARG_REG RDX
+#else
+#define FIRST_ARG_REG RDI
+#define SECOND_ARG_REG RSI
+#endif
 #else
 #define SZ_PTR SZ_D
 #define MAX_INST_LEN 11
@@ -78,7 +85,7 @@
 	MODE_REG_DIRECT = 0xC0,
 //"phony" mode
 	MODE_IMMED = 0xFF
-} x86_modes;
+};
 
 void rol_ir(code_info *code, uint8_t val, uint8_t dst, uint8_t size);
 void ror_ir(code_info *code, uint8_t val, uint8_t dst, uint8_t size);
--- a/genesis.c	Sat Jan 05 00:58:08 2019 -0800
+++ b/genesis.c	Sat Jan 15 13:15:21 2022 -0800
@@ -19,6 +19,8 @@
 #include "saves.h"
 #include "bindings.h"
 #include "jcart.h"
+#include "config.h"
+#include "event_log.h"
 #define MCLKS_NTSC 53693175
 #define MCLKS_PAL  53203395
 
@@ -34,59 +36,111 @@
 #define LINES_NTSC 262
 #define LINES_PAL 313
 
-#define MAX_SOUND_CYCLES 100000	
+#ifdef IS_LIB
+#define MAX_SOUND_CYCLES (MCLKS_PER_YM*NUM_OPERATORS*6*4)
+#else
+#define MAX_SOUND_CYCLES 100000
+#endif
 
-void genesis_serialize(genesis_context *gen, serialize_buffer *buf, uint32_t m68k_pc)
+#ifdef NEW_CORE
+#define Z80_CYCLE cycles
+#define Z80_OPTS opts
+#define z80_handle_code_write(...)
+#else
+#define Z80_CYCLE current_cycle
+#define Z80_OPTS options
+#endif
+
+void genesis_serialize(genesis_context *gen, serialize_buffer *buf, uint32_t m68k_pc, uint8_t all)
 {
-	start_section(buf, SECTION_68000);
-	m68k_serialize(gen->m68k, m68k_pc, buf);
-	end_section(buf);
-	
-	start_section(buf, SECTION_Z80);
-	z80_serialize(gen->z80, buf);
-	end_section(buf);
-	
+	if (all) {
+		start_section(buf, SECTION_68000);
+		m68k_serialize(gen->m68k, m68k_pc, buf);
+		end_section(buf);
+
+		start_section(buf, SECTION_Z80);
+		z80_serialize(gen->z80, buf);
+		end_section(buf);
+	}
+
 	start_section(buf, SECTION_VDP);
 	vdp_serialize(gen->vdp, buf);
 	end_section(buf);
-	
+
 	start_section(buf, SECTION_YM2612);
 	ym_serialize(gen->ym, buf);
 	end_section(buf);
-	
+
 	start_section(buf, SECTION_PSG);
 	psg_serialize(gen->psg, buf);
 	end_section(buf);
-	
-	start_section(buf, SECTION_GEN_BUS_ARBITER);
-	save_int8(buf, gen->z80->reset);
-	save_int8(buf, gen->z80->busreq);
-	save_int16(buf, gen->z80->bank_reg);
-	end_section(buf);
-	
-	start_section(buf, SECTION_SEGA_IO_1);
-	io_serialize(gen->io.ports, buf);
-	end_section(buf);
-	
-	start_section(buf, SECTION_SEGA_IO_2);
-	io_serialize(gen->io.ports + 1, buf);
-	end_section(buf);
-	
-	start_section(buf, SECTION_SEGA_IO_EXT);
-	io_serialize(gen->io.ports + 2, buf);
-	end_section(buf);
-	
-	start_section(buf, SECTION_MAIN_RAM);
-	save_int8(buf, RAM_WORDS * 2 / 1024);
-	save_buffer16(buf, gen->work_ram, RAM_WORDS);
-	end_section(buf);
-	
-	start_section(buf, SECTION_SOUND_RAM);
-	save_int8(buf, Z80_RAM_BYTES / 1024);
-	save_buffer8(buf, gen->zram, Z80_RAM_BYTES);
-	end_section(buf);
-	
-	cart_serialize(&gen->header, buf);
+
+	if (all) {
+		start_section(buf, SECTION_GEN_BUS_ARBITER);
+		save_int8(buf, gen->z80->reset);
+		save_int8(buf, gen->z80->busreq);
+		save_int16(buf, gen->z80_bank_reg);
+		end_section(buf);
+
+		start_section(buf, SECTION_SEGA_IO_1);
+		io_serialize(gen->io.ports, buf);
+		end_section(buf);
+
+		start_section(buf, SECTION_SEGA_IO_2);
+		io_serialize(gen->io.ports + 1, buf);
+		end_section(buf);
+
+		start_section(buf, SECTION_SEGA_IO_EXT);
+		io_serialize(gen->io.ports + 2, buf);
+		end_section(buf);
+
+		start_section(buf, SECTION_MAIN_RAM);
+		save_int8(buf, RAM_WORDS * 2 / 1024);
+		save_buffer16(buf, gen->work_ram, RAM_WORDS);
+		end_section(buf);
+
+		start_section(buf, SECTION_SOUND_RAM);
+		save_int8(buf, Z80_RAM_BYTES / 1024);
+		save_buffer8(buf, gen->zram, Z80_RAM_BYTES);
+		end_section(buf);
+
+		if (gen->version_reg & 0xF) {
+			//only save TMSS info if it's present
+			//that will allow a state saved on a model lacking TMSS
+			//to be loaded on a model that has it
+			start_section(buf, SECTION_TMSS);
+			save_int8(buf, gen->tmss);
+			save_buffer16(buf, gen->tmss_lock, 2);
+			end_section(buf);
+		}
+
+		cart_serialize(&gen->header, buf);
+	}
+}
+
+static uint8_t *serialize(system_header *sys, size_t *size_out)
+{
+	genesis_context *gen = (genesis_context *)sys;
+	uint32_t address;
+	if (gen->m68k->resume_pc) {
+		gen->m68k->target_cycle = gen->m68k->current_cycle;
+		gen->header.save_state = SERIALIZE_SLOT+1;
+		resume_68k(gen->m68k);
+		if (size_out) {
+			*size_out = gen->serialize_size;
+		}
+		return gen->serialize_tmp;
+	} else {
+		serialize_buffer state;
+		init_serialize(&state);
+		uint32_t address = read_word(4, (void **)gen->m68k->mem_pointers, &gen->m68k->options->gen, gen->m68k) << 16;
+		address |= read_word(6, (void **)gen->m68k->mem_pointers, &gen->m68k->options->gen, gen->m68k);
+		genesis_serialize(gen, &state, address, 1);
+		if (size_out) {
+			*size_out = state.size;
+		}
+		return state.data;
+	}
 }
 
 static void ram_deserialize(deserialize_buffer *buf, void *vgen)
@@ -113,11 +167,12 @@
 
 static void update_z80_bank_pointer(genesis_context *gen)
 {
-	if (gen->z80->bank_reg < 0x140) {
-		gen->z80->mem_pointers[1] = get_native_pointer(gen->z80->bank_reg << 15, (void **)gen->m68k->mem_pointers, &gen->m68k->options->gen);
+	if (gen->z80_bank_reg < 0x140) {
+		gen->z80->mem_pointers[1] = get_native_pointer(gen->z80_bank_reg << 15, (void **)gen->m68k->mem_pointers, &gen->m68k->options->gen);
 	} else {
 		gen->z80->mem_pointers[1] = NULL;
 	}
+	z80_invalidate_code_range(gen->z80, 0x8000, 0xFFFF);
 }
 
 static void bus_arbiter_deserialize(deserialize_buffer *buf, void *vgen)
@@ -125,10 +180,19 @@
 	genesis_context *gen = vgen;
 	gen->z80->reset = load_int8(buf);
 	gen->z80->busreq = load_int8(buf);
-	gen->z80->bank_reg = load_int16(buf) & 0x1FF;
+	gen->z80_bank_reg = load_int16(buf) & 0x1FF;
+}
+
+static void tmss_deserialize(deserialize_buffer *buf, void *vgen)
+{
+	genesis_context *gen = vgen;
+	gen->tmss = load_int8(buf);
+	load_buffer16(buf, gen->tmss_lock, 2);
 }
 
 static void adjust_int_cycle(m68k_context * context, vdp_context * v_context);
+static void check_tmss_lock(genesis_context *gen);
+static void toggle_tmss_rom(genesis_context *gen);
 void genesis_deserialize(deserialize_buffer *buf, genesis_context *gen)
 {
 	register_section_handler(buf, (section_handler){.fun = m68k_deserialize, .data = gen->m68k}, SECTION_68000);
@@ -143,12 +207,41 @@
 	register_section_handler(buf, (section_handler){.fun = ram_deserialize, .data = gen}, SECTION_MAIN_RAM);
 	register_section_handler(buf, (section_handler){.fun = zram_deserialize, .data = gen}, SECTION_SOUND_RAM);
 	register_section_handler(buf, (section_handler){.fun = cart_deserialize, .data = gen}, SECTION_MAPPER);
+	register_section_handler(buf, (section_handler){.fun = tmss_deserialize, .data = gen}, SECTION_TMSS);
+	uint8_t tmss_old = gen->tmss;
+	gen->tmss = 0xFF;
 	while (buf->cur_pos < buf->size)
 	{
 		load_section(buf);
 	}
+	if (gen->version_reg & 0xF) {
+		if (gen->tmss == 0xFF) {
+			//state lacked a TMSS section, assume that the game ROM is mapped in
+			//and that the VDP is unlocked
+			gen->tmss_lock[0] = 0x5345;
+			gen->tmss_lock[1] = 0x4741;
+			gen->tmss = 1;
+		}
+		if (gen->tmss != tmss_old) {
+			toggle_tmss_rom(gen);
+		}
+		check_tmss_lock(gen);
+	}
 	update_z80_bank_pointer(gen);
 	adjust_int_cycle(gen->m68k, gen->vdp);
+	free(buf->handlers);
+	buf->handlers = NULL;
+}
+
+#include "m68k_internal.h" //needed for get_native_address_trans, should be eliminated once handling of PC is cleaned up
+static void deserialize(system_header *sys, uint8_t *data, size_t size)
+{
+	genesis_context *gen = (genesis_context *)sys;
+	deserialize_buffer buffer;
+	init_deserialize(&buffer, data, size);
+	genesis_deserialize(&buffer, gen);
+	//HACK: Fix this once PC/IR is represented in a better way in 68K core
+	gen->m68k->resume_pc = get_native_address_trans(gen->m68k, gen->m68k->last_prefetch_address);
 }
 
 uint16_t read_dma_value(uint32_t address)
@@ -158,7 +251,7 @@
 	if ((address >= 0xA00000 && address < 0xB00000) || (address >= 0xC00000 && address <= 0xE00000)) {
 		return 0;
 	}
-	
+
 	//addresses here are word addresses (i.e. bit 0 corresponds to A1), so no need to do multiply by 2
 	return read_word(address * 2, (void **)genesis->m68k->mem_pointers, &genesis->m68k->options->gen, genesis->m68k);
 }
@@ -177,13 +270,14 @@
 		context->sync_cycle = context->current_cycle + gen->max_cycles;
 	}
 	context->int_cycle = CYCLE_NEVER;
-	if ((context->status & 0x7) < 6) {
+	uint8_t mask = context->status & 0x7;
+	if (mask < 6) {
 		uint32_t next_vint = vdp_next_vint(v_context);
 		if (next_vint != CYCLE_NEVER) {
 			context->int_cycle = next_vint;
 			context->int_num = 6;
 		}
-		if ((context->status & 0x7) < 4) {
+		if (mask < 4) {
 			uint32_t next_hint = vdp_next_hint(v_context);
 			if (next_hint != CYCLE_NEVER) {
 				next_hint = next_hint < context->current_cycle ? context->current_cycle : next_hint;
@@ -193,6 +287,21 @@
 
 				}
 			}
+			if (mask < 2 && (v_context->regs[REG_MODE_3] & BIT_EINT_EN)) {
+				uint32_t next_eint_port0 = io_next_interrupt(gen->io.ports, context->current_cycle);
+				uint32_t next_eint_port1 = io_next_interrupt(gen->io.ports + 1, context->current_cycle);
+				uint32_t next_eint_port2 = io_next_interrupt(gen->io.ports + 2, context->current_cycle);
+				uint32_t next_eint = next_eint_port0 < next_eint_port1
+					? (next_eint_port0 < next_eint_port2 ? next_eint_port0 : next_eint_port2)
+					: (next_eint_port1 < next_eint_port2 ? next_eint_port1 : next_eint_port2);
+				if (next_eint != CYCLE_NEVER) {
+					next_eint = next_eint < context->current_cycle ? context->current_cycle : next_eint;
+					if (next_eint < context->int_cycle) {
+						context->int_cycle = next_eint;
+						context->int_num = 2;
+					}
+				}
+			}
 		}
 	}
 	if (context->int_cycle > context->current_cycle && context->int_pending == INT_PENDING_SR_CHANGE) {
@@ -202,14 +311,14 @@
 		printf("int cycle changed to: %d, level: %d @ %d(%d), frame: %d, vcounter: %d, hslot: %d, mask: %d, hint_counter: %d\n", context->int_cycle, context->int_num, v_context->cycles, context->current_cycle, v_context->frame, v_context->vcounter, v_context->hslot, context->status & 0x7, v_context->hint_counter);
 		old_int_cycle = context->int_cycle;
 	}*/
-	
+
 	if (context->status & M68K_STATUS_TRACE || context->trace_pending) {
 		context->target_cycle = context->current_cycle;
 		return;
 	}
 
 	context->target_cycle = context->int_cycle < context->sync_cycle ? context->int_cycle : context->sync_cycle;
-	if (context->should_return) {
+	if (context->should_return || gen->header.enter_debugger) {
 		context->target_cycle = context->current_cycle;
 	} else if (context->target_cycle < context->current_cycle) {
 		//Changes to SR can result in an interrupt cycle that's in the past
@@ -227,7 +336,7 @@
 		} else {
 			context->target_cycle = context->sync_cycle = context->current_cycle;
 		}
-		
+
 	}
 	/*printf("Cyc: %d, Trgt: %d, Int Cyc: %d, Int: %d, Mask: %X, V: %d, H: %d, HICount: %d, HReg: %d, Line: %d\n",
 		context->current_cycle, context->target_cycle, context->int_cycle, context->int_num, (context->status & 0x7),
@@ -246,20 +355,32 @@
 static void z80_next_int_pulse(z80_context * z_context)
 {
 	genesis_context * gen = z_context->system;
+#ifdef NEW_CORE
+	z_context->int_cycle = vdp_next_vint_z80(gen->vdp);
+	z_context->int_end_cycle = z_context->int_cycle + Z80_INT_PULSE_MCLKS;
+	z_context->int_value = 0xFF;
+	z80_sync_cycle(z_context, z_context->sync_cycle);
+#else
 	z_context->int_pulse_start = vdp_next_vint_z80(gen->vdp);
 	z_context->int_pulse_end = z_context->int_pulse_start + Z80_INT_PULSE_MCLKS;
 	z_context->im2_vector = 0xFF;
+#endif
 }
 
 static void sync_z80(z80_context * z_context, uint32_t mclks)
 {
 #ifndef NO_Z80
 	if (z80_enabled) {
+#ifdef NEW_CORE
+		if (z_context->int_cycle == 0xFFFFFFFFU) {
+			z80_next_int_pulse(z_context);
+		}
+#endif
 		z80_run(z_context, mclks);
 	} else
 #endif
 	{
-		z_context->current_cycle = mclks;
+		z_context->Z80_CYCLE = mclks;
 	}
 }
 
@@ -279,9 +400,6 @@
 	//printf("Target: %d, YM bufferpos: %d, PSG bufferpos: %d\n", target, gen->ym->buffer_pos, gen->psg->buffer_pos * 2);
 }
 
-//TODO: move this inside the system context
-static uint32_t last_frame_num;
-
 //My refresh emulation isn't currently good enough and causes more problems than it solves
 #define REFRESH_EMULATION
 #ifdef REFRESH_EMULATION
@@ -313,14 +431,19 @@
 	sync_z80(z_context, mclks);
 	sync_sound(gen, mclks);
 	vdp_run_context(v_context, mclks);
+	io_run(gen->io.ports, mclks);
+	io_run(gen->io.ports + 1, mclks);
+	io_run(gen->io.ports + 2, mclks);
 	if (mclks >= gen->reset_cycle) {
 		gen->reset_requested = 1;
 		context->should_return = 1;
 		gen->reset_cycle = CYCLE_NEVER;
 	}
-	if (v_context->frame != last_frame_num) {
-		//printf("reached frame end %d | MCLK Cycles: %d, Target: %d, VDP cycles: %d, vcounter: %d, hslot: %d\n", last_frame_num, mclks, gen->frame_end, v_context->cycles, v_context->vcounter, v_context->hslot);
-		last_frame_num = v_context->frame;
+	if (v_context->frame != gen->last_frame) {
+		//printf("reached frame end %d | MCLK Cycles: %d, Target: %d, VDP cycles: %d, vcounter: %d, hslot: %d\n", gen->last_frame, mclks, gen->frame_end, v_context->cycles, v_context->vcounter, v_context->hslot);
+		gen->last_frame = v_context->frame;
+		event_flush(mclks);
+		gen->last_flush_cycle = mclks;
 
 		if(exit_after){
 			--exit_after;
@@ -339,15 +462,20 @@
 			}
 			context->current_cycle -= deduction;
 			z80_adjust_cycles(z_context, deduction);
-			gen->ym->current_cycle -= deduction;
+			ym_adjust_cycles(gen->ym, deduction);
+			if (gen->ym->vgm) {
+				vgm_adjust_cycles(gen->ym->vgm, deduction);
+			}
 			gen->psg->cycles -= deduction;
-			if (gen->ym->write_cycle != CYCLE_NEVER) {
-				gen->ym->write_cycle = gen->ym->write_cycle >= deduction ? gen->ym->write_cycle - deduction : 0;
-			}
 			if (gen->reset_cycle != CYCLE_NEVER) {
 				gen->reset_cycle -= deduction;
 			}
+			event_cycle_adjust(mclks, deduction);
+			gen->last_flush_cycle -= deduction;
 		}
+	} else if (mclks - gen->last_flush_cycle > gen->soft_flush_cycles) {
+		event_soft_flush(mclks);
+		gen->last_flush_cycle = mclks;
 	}
 	gen->frame_end = vdp_cycles_to_frame_end(v_context);
 	context->sync_cycle = gen->frame_end;
@@ -367,11 +495,20 @@
 	if (address) {
 		if (gen->header.enter_debugger) {
 			gen->header.enter_debugger = 0;
-			debugger(context, address);
+			if (gen->header.debugger_type == DEBUGGER_NATIVE) {
+				debugger(context, address);
+			} else {
+				gdb_debug_enter(context, address);
+			}
 		}
+#ifdef NEW_CORE
+		if (gen->header.save_state) {
+#else
 		if (gen->header.save_state && (z_context->pc || !z_context->native_pc || z_context->reset || !z_context->busreq)) {
+#endif
 			uint8_t slot = gen->header.save_state - 1;
 			gen->header.save_state = 0;
+#ifndef NEW_CORE
 			if (z_context->native_pc && !z_context->reset) {
 				//advance Z80 core to the start of an instruction
 				while (!z_context->pc)
@@ -379,17 +516,29 @@
 					sync_z80(z_context, z_context->current_cycle + MCLKS_PER_Z80);
 				}
 			}
-			char *save_path = get_slot_name(&gen->header, slot, use_native_states ? "state" : "gst");
-			if (use_native_states) {
+#endif
+			char *save_path = slot >= SERIALIZE_SLOT ? NULL : get_slot_name(&gen->header, slot, use_native_states ? "state" : "gst");
+			if (use_native_states || slot >= SERIALIZE_SLOT) {
 				serialize_buffer state;
 				init_serialize(&state);
-				genesis_serialize(gen, &state, address);
-				save_to_file(&state, save_path);
-				free(state.data);
+				genesis_serialize(gen, &state, address, slot != EVENTLOG_SLOT);
+				if (slot == SERIALIZE_SLOT) {
+					gen->serialize_tmp = state.data;
+					gen->serialize_size = state.size;
+					context->sync_cycle = context->current_cycle;
+					context->should_return = 1;
+				} else if (slot == EVENTLOG_SLOT) {
+					event_state(context->current_cycle, &state);
+				} else {
+					save_to_file(&state, save_path);
+					free(state.data);
+				}
 			} else {
 				save_gst(gen, save_path, address);
 			}
-			printf("Saved state to %s\n", save_path);
+			if (slot != SERIALIZE_SLOT) {
+				debug_message("Saved state to %s\n", save_path);
+			}
 			free(save_path);
 		} else if(gen->header.save_state) {
 			context->sync_cycle = context->current_cycle + 1;
@@ -406,6 +555,10 @@
 	if (vdp_port & 0x2700E0) {
 		fatal_error("machine freeze due to write to address %X\n", 0xC00000 | vdp_port);
 	}
+	genesis_context * gen = context->system;
+	if (!gen->vdp_unlocked) {
+		fatal_error("machine freeze due to VDP write to %X without TMSS unlock\n", 0xC00000 | vdp_port);
+	}
 	vdp_port &= 0x1F;
 	//printf("vdp_port write: %X, value: %X, cycle: %d\n", vdp_port, value, context->current_cycle);
 #ifdef REFRESH_EMULATION
@@ -416,7 +569,6 @@
 	last_sync_cycle = context->current_cycle;
 #endif
 	sync_components(context, 0);
-	genesis_context * gen = context->system;
 	vdp_context *v_context = gen->vdp;
 	uint32_t before_cycle = v_context->cycles;
 	if (vdp_port < 0x10) {
@@ -459,7 +611,7 @@
 							gen->bus_busy = 0;
 						}
 					}
-					
+
 					if (blocked < 0) {
 						blocked = vdp_control_port_write(v_context, value);
 					} else {
@@ -493,7 +645,7 @@
 		vdp_test_port_write(gen->vdp, value);
 	}
 #ifdef REFRESH_EMULATION
-	last_sync_cycle -= 4;
+	last_sync_cycle -= 4 * MCLKS_PER_68K;
 	//refresh may have happened while we were waiting on the VDP,
 	//so advance refresh_counter but don't add any delays
 	if (vdp_port >= 4 && vdp_port < 8 && v_context->cycles != before_cycle) {
@@ -523,16 +675,16 @@
 	if (vdp_port < 0x10) {
 		//These probably won't currently interact well with the 68K accessing the VDP
 		if (vdp_port < 4) {
-			vdp_run_context(gen->vdp, context->current_cycle);
+			vdp_run_context(gen->vdp, context->Z80_CYCLE);
 			vdp_data_port_write(gen->vdp, value << 8 | value);
 		} else if (vdp_port < 8) {
-			vdp_run_context_full(gen->vdp, context->current_cycle);
+			vdp_run_context_full(gen->vdp, context->Z80_CYCLE);
 			vdp_control_port_write(gen->vdp, value << 8 | value);
 		} else {
 			fatal_error("Illegal write to HV Counter port %X\n", vdp_port);
 		}
 	} else if (vdp_port < 0x18) {
-		sync_sound(gen, context->current_cycle);
+		sync_sound(gen, context->Z80_CYCLE);
 		psg_write(gen->psg, value);
 	} else {
 		vdp_test_port_write(gen->vdp, value);
@@ -545,6 +697,10 @@
 	if (vdp_port & 0x2700E0) {
 		fatal_error("machine freeze due to read from address %X\n", 0xC00000 | vdp_port);
 	}
+	genesis_context *gen = context->system;
+	if (!gen->vdp_unlocked) {
+		fatal_error("machine freeze due to VDP read from %X without TMSS unlock\n", 0xC00000 | vdp_port);
+	}
 	vdp_port &= 0x1F;
 	uint16_t value;
 #ifdef REFRESH_EMULATION
@@ -555,7 +711,6 @@
 	last_sync_cycle = context->current_cycle;
 #endif
 	sync_components(context, 0);
-	genesis_context *gen = context->system;
 	vdp_context * v_context = gen->vdp;
 	uint32_t before_cycle = v_context->cycles;
 	if (vdp_port < 0x10) {
@@ -570,7 +725,7 @@
 	} else if (vdp_port < 0x18){
 		fatal_error("Illegal read from PSG  port %X\n", vdp_port);
 	} else {
-		value = vdp_test_port_read(v_context);
+		value = get_open_bus_value(&gen->header);
 	}
 	if (v_context->cycles != before_cycle) {
 		//printf("68K paused for %d (%d) cycles at cycle %d (%d) for read\n", v_context->cycles - context->current_cycle, v_context->cycles - before_cycle, context->current_cycle, before_cycle);
@@ -582,7 +737,7 @@
 		gen->bus_busy = 0;
 	}
 #ifdef REFRESH_EMULATION
-	last_sync_cycle -= 4;
+	last_sync_cycle -= 4 * MCLKS_PER_68K;
 	//refresh may have happened while we were waiting on the VDP,
 	//so advance refresh_counter but don't add any delays
 	refresh_counter += (context->current_cycle - last_sync_cycle);
@@ -611,7 +766,7 @@
 	genesis_context * gen = context->system;
 	//VDP access goes over the 68K bus like a bank area access
 	//typical delay from bus arbitration
-	context->current_cycle += 3 * MCLKS_PER_Z80;
+	context->Z80_CYCLE += 3 * MCLKS_PER_Z80;
 	//TODO: add cycle for an access right after a previous one
 	//TODO: Below cycle time is an estimate based on the time between 68K !BG goes low and Z80 !MREQ goes high
 	//      Needs a new logic analyzer capture to get the actual delay on the 68K side
@@ -622,7 +777,7 @@
 	uint16_t ret;
 	if (vdp_port < 0x10) {
 		//These probably won't currently interact well with the 68K accessing the VDP
-		vdp_run_context(gen->vdp, context->current_cycle);
+		vdp_run_context(gen->vdp, context->Z80_CYCLE);
 		if (vdp_port < 4) {
 			ret = vdp_data_port_read(gen->vdp);
 		} else if (vdp_port < 8) {
@@ -643,6 +798,13 @@
 static m68k_context * io_write(uint32_t location, m68k_context * context, uint8_t value)
 {
 	genesis_context * gen = context->system;
+#ifdef REFRESH_EMULATION
+	//do refresh check here so we can avoid adding a penalty for a refresh that happens during an IO area access
+	refresh_counter += context->current_cycle - 4*MCLKS_PER_68K - last_sync_cycle;
+	context->current_cycle += REFRESH_DELAY * MCLKS_PER_68K * (refresh_counter / (MCLKS_PER_68K * REFRESH_INTERVAL));
+	refresh_counter = refresh_counter % (MCLKS_PER_68K * REFRESH_INTERVAL);
+	last_sync_cycle = context->current_cycle - 4*MCLKS_PER_68K;
+#endif
 	if (location < 0x10000) {
 		//Access to Z80 memory incurs a one 68K cycle wait state
 		context->current_cycle += MCLKS_PER_68K;
@@ -663,9 +825,9 @@
 					ym_address_write_part1(gen->ym, value);
 				}
 			} else if (location == 0x6000) {
-				gen->z80->bank_reg = (gen->z80->bank_reg >> 1 | value << 8) & 0x1FF;
-				if (gen->z80->bank_reg < 0x80) {
-					gen->z80->mem_pointers[1] = (gen->z80->bank_reg << 15) + ((char *)gen->z80->mem_pointers[2]);
+				gen->z80_bank_reg = (gen->z80_bank_reg >> 1 | value << 8) & 0x1FF;
+				if (gen->z80_bank_reg < 0x80) {
+					gen->z80->mem_pointers[1] = (gen->z80_bank_reg << 15) + ((char *)gen->z80->mem_pointers[2]);
 				} else {
 					gen->z80->mem_pointers[1] = NULL;
 				}
@@ -674,9 +836,8 @@
 			}
 		}
 	} else {
-		location &= 0x1FFF;
-		if (location < 0x100) {
-			switch(location/2)
+		if (location < 0x10100) {
+			switch(location >> 1 & 0xFF)
 			{
 			case 0x1:
 				io_data_write(gen->io.ports, value, context->current_cycle);
@@ -697,7 +858,7 @@
 				io_control_write(gen->io.ports+2, value, context->current_cycle);
 				break;
 			case 0x7:
-				gen->io.ports[0].serial_out = value;
+				io_tx_write(gen->io.ports, value, context->current_cycle);
 				break;
 			case 0x8:
 			case 0xB:
@@ -705,23 +866,25 @@
 				//serial input port is not writeable
 				break;
 			case 0x9:
+				io_sctrl_write(gen->io.ports, value, context->current_cycle);
 				gen->io.ports[0].serial_ctrl = value;
 				break;
 			case 0xA:
-				gen->io.ports[1].serial_out = value;
+				io_tx_write(gen->io.ports + 1, value, context->current_cycle);
 				break;
 			case 0xC:
-				gen->io.ports[1].serial_ctrl = value;
+				io_sctrl_write(gen->io.ports + 1, value, context->current_cycle);
 				break;
 			case 0xD:
-				gen->io.ports[2].serial_out = value;
+				io_tx_write(gen->io.ports + 2, value, context->current_cycle);
 				break;
 			case 0xF:
-				gen->io.ports[2].serial_ctrl = value;
+				io_sctrl_write(gen->io.ports + 2, value, context->current_cycle);
 				break;
 			}
 		} else {
-			if (location == 0x1100) {
+			uint32_t masked = location & 0xFFF00;
+			if (masked == 0x11100) {
 				if (value & 1) {
 					dputs("bus requesting Z80");
 					if (z80_enabled) {
@@ -746,7 +909,7 @@
 						gen->z80->busack = 0;
 					}
 				}
-			} else if (location == 0x1200) {
+			} else if (masked == 0x11200) {
 				sync_z80(gen->z80, context->current_cycle);
 				if (value & 1) {
 					if (z80_enabled) {
@@ -762,9 +925,16 @@
 					}
 					ym_reset(gen->ym);
 				}
+			} else if (masked != 0x11300 && masked != 0x11000) {
+				fatal_error("Machine freeze due to unmapped write to address %X\n", location | 0xA00000);
 			}
 		}
 	}
+#ifdef REFRESH_EMULATION
+	//no refresh delays during IO access
+	refresh_counter += context->current_cycle - last_sync_cycle;
+	refresh_counter = refresh_counter % (MCLKS_PER_68K * REFRESH_INTERVAL);
+#endif
 	return context;
 }
 
@@ -788,6 +958,13 @@
 {
 	uint8_t value;
 	genesis_context *gen = context->system;
+#ifdef REFRESH_EMULATION
+	//do refresh check here so we can avoid adding a penalty for a refresh that happens during an IO area access
+	refresh_counter += context->current_cycle - 4*MCLKS_PER_68K - last_sync_cycle;
+	context->current_cycle += REFRESH_DELAY * MCLKS_PER_68K * (refresh_counter / (MCLKS_PER_68K * REFRESH_INTERVAL));
+	refresh_counter = refresh_counter % (MCLKS_PER_68K * REFRESH_INTERVAL);
+	last_sync_cycle = context->current_cycle - 4*MCLKS_PER_68K;
+#endif
 	if (location < 0x10000) {
 		//Access to Z80 memory incurs a one 68K cycle wait state
 		context->current_cycle += MCLKS_PER_68K;
@@ -797,17 +974,20 @@
 				value = gen->zram[location & 0x1FFF];
 			} else if (location < 0x6000) {
 				sync_sound(gen, context->current_cycle);
-				value = ym_read_status(gen->ym);
+				value = ym_read_status(gen->ym, context->current_cycle, location);
+			} else if (location < 0x7F00) {
+				value = 0xFF;
 			} else {
+				fatal_error("Machine freeze due to read of Z80 VDP memory window by 68K: %X\n", location | 0xA00000);
 				value = 0xFF;
 			}
 		} else {
-			value = 0xFF;
+			uint16_t word = get_open_bus_value(&gen->header);
+			value = location & 1 ? word : word >> 8;
 		}
 	} else {
-		location &= 0x1FFF;
-		if (location < 0x100) {
-			switch(location/2)
+		if (location < 0x10100) {
+			switch(location >> 1 & 0xFF)
 			{
 			case 0x0:
 				//version bits should be 0 for now since we're not emulating TMSS
@@ -835,45 +1015,56 @@
 				value = gen->io.ports[0].serial_out;
 				break;
 			case 0x8:
-				value = gen->io.ports[0].serial_in;
+				value = io_rx_read(gen->io.ports, context->current_cycle);
 				break;
 			case 0x9:
-				value = gen->io.ports[0].serial_ctrl;
+				value = io_sctrl_read(gen->io.ports, context->current_cycle);
 				break;
 			case 0xA:
 				value = gen->io.ports[1].serial_out;
 				break;
 			case 0xB:
-				value = gen->io.ports[1].serial_in;
+				value = io_rx_read(gen->io.ports + 1, context->current_cycle);
 				break;
 			case 0xC:
-				value = gen->io.ports[1].serial_ctrl;
+				value = io_sctrl_read(gen->io.ports, context->current_cycle);
 				break;
 			case 0xD:
 				value = gen->io.ports[2].serial_out;
 				break;
 			case 0xE:
-				value = gen->io.ports[2].serial_in;
+				value = io_rx_read(gen->io.ports + 1, context->current_cycle);
 				break;
 			case 0xF:
-				value = gen->io.ports[2].serial_ctrl;
+				value = io_sctrl_read(gen->io.ports, context->current_cycle);
 				break;
 			default:
-				value = 0xFF;
+				value = get_open_bus_value(&gen->header) >> 8;
 			}
 		} else {
-			if (location == 0x1100) {
+			uint32_t masked = location & 0xFFF00;
+			if (masked == 0x11100) {
 				value = z80_enabled ? !z80_get_busack(gen->z80, context->current_cycle) : !gen->z80->busack;
 				value |= (get_open_bus_value(&gen->header) >> 8) & 0xFE;
 				dprintf("Byte read of BUSREQ returned %d @ %d (reset: %d)\n", value, context->current_cycle, gen->z80->reset);
-			} else if (location == 0x1200) {
+			} else if (masked == 0x11200) {
 				value = !gen->z80->reset;
+			} else if (masked == 0x11300 || masked == 0x11000) {
+				//A11300 is apparently completely unused
+				//A11000 is the memory control register which I am assuming is write only
+				value = get_open_bus_value(&gen->header) >> 8;
 			} else {
+				location |= 0xA00000;
+				fatal_error("Machine freeze due to read of unmapped IO location %X\n", location);
 				value = 0xFF;
-				printf("Byte read of unknown IO location: %X\n", location);
 			}
 		}
 	}
+#ifdef REFRESH_EMULATION
+	//no refresh delays during IO access
+	refresh_counter += context->current_cycle - last_sync_cycle;
+	refresh_counter = refresh_counter % (MCLKS_PER_68K * REFRESH_INTERVAL);
+#endif
 	return value;
 }
 
@@ -894,7 +1085,7 @@
 {
 	z80_context * context = vcontext;
 	genesis_context * gen = context->system;
-	sync_sound(gen, context->current_cycle);
+	sync_sound(gen, context->Z80_CYCLE);
 	if (location & 1) {
 		ym_data_write(gen->ym, value);
 	} else if (location & 2) {
@@ -909,8 +1100,8 @@
 {
 	z80_context * context = vcontext;
 	genesis_context * gen = context->system;
-	sync_sound(gen, context->current_cycle);
-	return ym_read_status(gen->ym);
+	sync_sound(gen, context->Z80_CYCLE);
+	return ym_read_status(gen->ym, context->Z80_CYCLE, location);
 }
 
 static uint8_t z80_read_bank(uint32_t location, void * vcontext)
@@ -918,10 +1109,10 @@
 	z80_context * context = vcontext;
 	genesis_context *gen = context->system;
 	if (gen->bus_busy) {
-		context->current_cycle = context->sync_cycle;
+		context->Z80_CYCLE = gen->m68k->current_cycle;
 	}
 	//typical delay from bus arbitration
-	context->current_cycle += 3 * MCLKS_PER_Z80;
+	context->Z80_CYCLE += 3 * MCLKS_PER_Z80;
 	//TODO: add cycle for an access right after a previous one
 	//TODO: Below cycle time is an estimate based on the time between 68K !BG goes low and Z80 !MREQ goes high
 	//      Needs a new logic analyzer capture to get the actual delay on the 68K side
@@ -931,11 +1122,15 @@
 	if (context->mem_pointers[1]) {
 		return context->mem_pointers[1][location ^ 1];
 	}
-	uint32_t address = context->bank_reg << 15 | location;
+	uint32_t address = gen->z80_bank_reg << 15 | location;
 	if (address >= 0xC00000 && address < 0xE00000) {
 		return z80_vdp_port_read(location & 0xFF, context);
+	} else if (address >= 0xA10000 && address <= 0xA10001) {
+		//Apparently version reg can be read through Z80 banked area
+		//TODO: Check rest of IO region addresses
+		return gen->version_reg;
 	} else {
-		fprintf(stderr, "Unhandled read by Z80 from address %X through banked memory area (%X)\n", address, context->bank_reg << 15);
+		fprintf(stderr, "Unhandled read by Z80 from address %X through banked memory area (%X)\n", address, gen->z80_bank_reg << 15);
 	}
 	return 0;
 }
@@ -945,17 +1140,17 @@
 	z80_context * context = vcontext;
 	genesis_context *gen = context->system;
 	if (gen->bus_busy) {
-		context->current_cycle = context->sync_cycle;
+		context->Z80_CYCLE = gen->m68k->current_cycle;
 	}
 	//typical delay from bus arbitration
-	context->current_cycle += 3 * MCLKS_PER_Z80;
+	context->Z80_CYCLE += 3 * MCLKS_PER_Z80;
 	//TODO: add cycle for an access right after a previous one
 	//TODO: Below cycle time is an estimate based on the time between 68K !BG goes low and Z80 !MREQ goes high
 	//      Needs a new logic analyzer capture to get the actual delay on the 68K side
 	gen->m68k->current_cycle += 8 * MCLKS_PER_68K;
 
 	location &= 0x7FFF;
-	uint32_t address = context->bank_reg << 15 | location;
+	uint32_t address = gen->z80_bank_reg << 15 | location;
 	if (address >= 0xE00000) {
 		address &= 0xFFFF;
 		((uint8_t *)gen->work_ram)[address ^ 1] = value;
@@ -970,13 +1165,121 @@
 static void *z80_write_bank_reg(uint32_t location, void * vcontext, uint8_t value)
 {
 	z80_context * context = vcontext;
+	genesis_context *gen = context->system;
 
-	context->bank_reg = (context->bank_reg >> 1 | value << 8) & 0x1FF;
+	gen->z80_bank_reg = (gen->z80_bank_reg >> 1 | value << 8) & 0x1FF;
 	update_z80_bank_pointer(context->system);
 
 	return context;
 }
 
+static uint16_t unused_read(uint32_t location, void *vcontext)
+{
+	m68k_context *context = vcontext;
+	genesis_context *gen = context->system;
+	if (location < 0x800000 || (location >= 0xA13000 && location < 0xA13100) || (location >= 0xA12000 && location < 0xA12100)) {
+		//Only called if the cart/exp doesn't have a more specific handler for this region
+		return get_open_bus_value(&gen->header);
+	} else if (location == 0xA14000 || location == 0xA14002) {
+		if (gen->version_reg & 0xF) {
+			return gen->tmss_lock[location >> 1 & 1];
+		} else {
+			fatal_error("Machine freeze due to read from TMSS lock when TMSS is not present %X\n", location);
+			return 0xFFFF;
+		}
+	} else if (location == 0xA14100) {
+		if (gen->version_reg & 0xF) {
+			return get_open_bus_value(&gen->header);
+		} else {
+			fatal_error("Machine freeze due to read from TMSS control when TMSS is not present %X\n", location);
+			return 0xFFFF;
+		}
+	} else {
+		fatal_error("Machine freeze due to unmapped read from %X\n", location);
+		return 0xFFFF;
+	}
+}
+
+static uint8_t unused_read_b(uint32_t location, void *vcontext)
+{
+	uint16_t v = unused_read(location & 0xFFFFFE, vcontext);
+	if (location & 1) {
+		return v;
+	} else {
+		return v >> 8;
+	}
+}
+
+static void check_tmss_lock(genesis_context *gen)
+{
+	gen->vdp_unlocked = gen->tmss_lock[0] == 0x5345 && gen->tmss_lock[1] == 0x4741;
+}
+
+static void toggle_tmss_rom(genesis_context *gen)
+{
+	m68k_context *context = gen->m68k;
+	for (int i = 0; i < NUM_MEM_AREAS; i++)
+	{
+		uint16_t *tmp = context->mem_pointers[i];
+		context->mem_pointers[i] = gen->tmss_pointers[i];
+		gen->tmss_pointers[i] = tmp;
+	}
+	m68k_invalidate_code_range(context, 0, 0x400000);
+}
+
+static void *unused_write(uint32_t location, void *vcontext, uint16_t value)
+{
+	m68k_context *context = vcontext;
+	genesis_context *gen = context->system;
+	uint8_t has_tmss = gen->version_reg & 0xF;
+	if (has_tmss && (location == 0xA14000 || location == 0xA14002)) {
+		gen->tmss_lock[location >> 1 & 1] = value;
+		check_tmss_lock(gen);
+	} else if (has_tmss && location == 0xA14100) {
+		value &= 1;
+		if (gen->tmss != value) {
+			gen->tmss = value;
+			toggle_tmss_rom(gen);
+		}
+	} else if (location < 0x800000 || (location >= 0xA13000 && location < 0xA13100) || (location >= 0xA12000 && location < 0xA12100)) {
+		//these writes are ignored when no relevant hardware is present
+	} else {
+		fatal_error("Machine freeze due to unmapped write to %X\n", location);
+	}
+	return vcontext;
+}
+
+static void *unused_write_b(uint32_t location, void *vcontext, uint8_t value)
+{
+	m68k_context *context = vcontext;
+	genesis_context *gen = context->system;
+	uint8_t has_tmss = gen->version_reg & 0xF;
+	if (has_tmss && location >= 0xA14000 && location <= 0xA14003) {
+		uint32_t offset = location >> 1 & 1;
+		if (location & 1) {
+			gen->tmss_lock[offset] &= 0xFF00;
+			gen->tmss_lock[offset] |= value;
+		} else {
+			gen->tmss_lock[offset] &= 0xFF;
+			gen->tmss_lock[offset] |= value << 8;
+		}
+		check_tmss_lock(gen);
+	} else if (has_tmss && (location == 0xA14100 || location == 0xA14101)) {
+		if (location & 1) {
+			value &= 1;
+			if (gen->tmss != value) {
+				gen->tmss = value;
+				toggle_tmss_rom(gen);
+			}
+		}
+	} else if (location < 0x800000 || (location >= 0xA13000 && location < 0xA13100) || (location >= 0xA12000 && location < 0xA12100)) {
+		//these writes are ignored when no relevant hardware is present
+	} else {
+		fatal_error("Machine freeze due to unmapped byte write to %X\n", location);
+	}
+	return vcontext;
+}
+
 static void set_speed_percent(system_header * system, uint32_t percent)
 {
 	genesis_context *context = (genesis_context *)system;
@@ -1006,16 +1309,17 @@
 	} else {
 		gen->version_reg = NO_DISK | USA;
 	}
-	
+
 	if (region & HZ50) {
 		gen->normal_clock = MCLKS_PAL;
+		gen->soft_flush_cycles = MCLKS_LINE * 262 / 3 + 2;
 	} else {
 		gen->normal_clock = MCLKS_NTSC;
+		gen->soft_flush_cycles = MCLKS_LINE * 313 / 3 + 2;
 	}
 	gen->master_clock = gen->normal_clock;
 }
 
-#include "m68k_internal.h" //needed for get_native_address_trans, should be eliminated once handling of PC is cleaned up
 static uint8_t load_state(system_header *system, uint8_t slot)
 {
 	genesis_context *gen = (genesis_context *)system;
@@ -1071,10 +1375,12 @@
 			resume_68k(gen->m68k);
 		}
 	}
-	bindings_release_capture();
-	vdp_release_framebuffer(gen->vdp);
-	render_pause_source(gen->ym->audio);
-	render_pause_source(gen->psg->audio);
+	if (gen->header.force_release || render_should_release_on_exit()) {
+		bindings_release_capture();
+		vdp_release_framebuffer(gen->vdp);
+		render_pause_source(gen->ym->audio);
+		render_pause_source(gen->psg->audio);
+	}
 }
 
 static void start_genesis(system_header *system, char *statefile)
@@ -1118,11 +1424,14 @@
 static void resume_genesis(system_header *system)
 {
 	genesis_context *gen = (genesis_context *)system;
-	render_set_video_standard((gen->version_reg & HZ50) ? VID_PAL : VID_NTSC);
-	bindings_reacquire_capture();
-	vdp_reacquire_framebuffer(gen->vdp);
-	render_resume_source(gen->ym->audio);
-	render_resume_source(gen->psg->audio);
+	if (gen->header.force_release || render_should_release_on_exit()) {
+		gen->header.force_release = 0;
+		render_set_video_standard((gen->version_reg & HZ50) ? VID_PAL : VID_NTSC);
+		bindings_reacquire_capture();
+		vdp_reacquire_framebuffer(gen->vdp);
+		render_resume_source(gen->ym->audio);
+		render_resume_source(gen->psg->audio);
+	}
 	resume_68k(gen->m68k);
 	handle_reset_requests(gen);
 }
@@ -1136,6 +1445,7 @@
 static void request_exit(system_header *system)
 {
 	genesis_context *gen = (genesis_context *)system;
+	gen->m68k->target_cycle = gen->m68k->current_cycle;
 	gen->m68k->should_return = 1;
 }
 
@@ -1150,7 +1460,13 @@
 		fprintf(stderr, "Failed to open %s file %s for writing\n", save_type_name(gen->save_type), save_filename);
 		return;
 	}
+	if (gen->save_type == RAM_FLAG_BOTH) {
+		byteswap_rom(gen->save_size, (uint16_t *)gen->save_storage);
+	}
 	fwrite(gen->save_storage, 1, gen->save_size, f);
+	if (gen->save_type == RAM_FLAG_BOTH) {
+		byteswap_rom(gen->save_size, (uint16_t *)gen->save_storage);
+	}
 	fclose(f);
 	printf("Saved %s to %s\n", save_type_name(gen->save_type), save_filename);
 }
@@ -1163,6 +1479,9 @@
 		uint32_t read = fread(gen->save_storage, 1, gen->save_size, f);
 		fclose(f);
 		if (read > 0) {
+			if (gen->save_type == RAM_FLAG_BOTH) {
+				byteswap_rom(gen->save_size, (uint16_t *)gen->save_storage);
+			}
 			printf("Loaded %s from %s\n", save_type_name(gen->save_type), save_filename);
 		}
 	}
@@ -1189,7 +1508,7 @@
 	free(gen->cart);
 	free(gen->m68k);
 	free(gen->work_ram);
-	z80_options_free(gen->z80->options);
+	z80_options_free(gen->z80->Z80_OPTS);
 	free(gen->z80);
 	free(gen->zram);
 	ym_free(gen->ym);
@@ -1197,6 +1516,9 @@
 	free(gen->header.save_dir);
 	free_rom_info(&gen->header.info);
 	free(gen->lock_on);
+	if (gen->save_type != SAVE_NONE && gen->mapper_type != MAPPER_SEGA_MED_V2) {
+		free(gen->save_storage);
+	}
 	free(gen);
 }
 
@@ -1254,10 +1576,177 @@
 	io_keyboard_up(&gen->io, scancode);
 }
 
+static void set_audio_config(genesis_context *gen)
+{
+	char *config_gain;
+	config_gain = tern_find_path(config, "audio\0psg_gain\0", TVAL_PTR).ptrval;
+	render_audio_source_gaindb(gen->psg->audio, config_gain ? atof(config_gain) : 0.0f);
+	config_gain = tern_find_path(config, "audio\0fm_gain\0", TVAL_PTR).ptrval;
+	render_audio_source_gaindb(gen->ym->audio, config_gain ? atof(config_gain) : 0.0f);
+
+	char *config_dac = tern_find_path_default(config, "audio\0fm_dac\0", (tern_val){.ptrval="zero_offset"}, TVAL_PTR).ptrval;
+	ym_enable_zero_offset(gen->ym, !strcmp(config_dac, "zero_offset"));
+}
+
 static void config_updated(system_header *system)
 {
 	genesis_context *gen = (genesis_context *)system;
 	setup_io_devices(config, &system->info, &gen->io);
+	set_audio_config(gen);
+}
+
+static void start_vgm_log(system_header *system, char *filename)
+{
+	genesis_context *gen = (genesis_context *)system;
+	vgm_writer *vgm = vgm_write_open(filename, gen->version_reg & HZ50 ? 50 : 60, gen->master_clock, gen->m68k->current_cycle);
+	if (vgm) {
+		printf("Started logging VGM to %s\n", filename);
+		sync_sound(gen, vgm->last_cycle);
+		ym_vgm_log(gen->ym, gen->master_clock, vgm);
+		psg_vgm_log(gen->psg, gen->master_clock, vgm);
+		gen->header.vgm_logging = 1;
+	} else {
+		printf("Failed to start logging to %s\n", filename);
+	}
+}
+
+static void stop_vgm_log(system_header *system)
+{
+	puts("Stopped VGM log");
+	genesis_context *gen = (genesis_context *)system;
+	vgm_close(gen->ym->vgm);
+	gen->ym->vgm = gen->psg->vgm = NULL;
+	gen->header.vgm_logging = 0;
+}
+
+static void *tmss_rom_write_16(uint32_t address, void *context, uint16_t value)
+{
+	m68k_context *m68k = context;
+	genesis_context *gen = m68k->system;
+	if (gen->tmss) {
+		return gen->tmss_write_16(address, context, value);
+	}
+
+	return context;
+}
+
+static void *tmss_rom_write_8(uint32_t address, void *context, uint8_t value)
+{
+	m68k_context *m68k = context;
+	genesis_context *gen = m68k->system;
+	if (gen->tmss) {
+		return gen->tmss_write_8(address, context, value);
+	}
+
+	return context;
+}
+
+static uint16_t tmss_rom_read_16(uint32_t address, void *context)
+{
+	m68k_context *m68k = context;
+	genesis_context *gen = m68k->system;
+	if (gen->tmss) {
+		return gen->tmss_read_16(address, context);
+	}
+	return ((uint16_t *)gen->tmss_buffer)[address >> 1];
+}
+
+static uint8_t tmss_rom_read_8(uint32_t address, void *context)
+{
+	m68k_context *m68k = context;
+	genesis_context *gen = m68k->system;
+	if (gen->tmss) {
+		return gen->tmss_read_8(address, context);
+	}
+#ifdef BLASTEM_BIG_ENDIAN
+	return gen->tmss_buffer[address];
+#else
+	return gen->tmss_buffer[address ^ 1];
+#endif
+}
+
+static void *tmss_word_write_16(uint32_t address, void *context, uint16_t value)
+{
+	m68k_context *m68k = context;
+	genesis_context *gen = m68k->system;
+	if (gen->tmss) {
+		address += gen->tmss_write_offset;
+		uint16_t *dest = get_native_pointer(address, (void **)m68k->mem_pointers, &m68k->options->gen);
+		*dest = value;
+		m68k_handle_code_write(address, m68k);
+	}
+
+	return context;
+}
+
+static void *tmss_word_write_8(uint32_t address, void *context, uint8_t value)
+{
+	m68k_context *m68k = context;
+	genesis_context *gen = m68k->system;
+	if (gen->tmss) {
+		address += gen->tmss_write_offset;
+		uint8_t *dest = get_native_pointer(address & ~1, (void **)m68k->mem_pointers, &m68k->options->gen);
+#ifdef BLASTEM_BIG_ENDIAN
+		dest[address & 1] = value;
+#else
+		dest[address & 1 ^ 1] = value;
+#endif
+		m68k_handle_code_write(address & ~1, m68k);
+	}
+
+	return context;
+}
+
+static void *tmss_odd_write_16(uint32_t address, void *context, uint16_t value)
+{
+	m68k_context *m68k = context;
+	genesis_context *gen = m68k->system;
+	if (gen->tmss) {
+		memmap_chunk const *chunk = find_map_chunk(address + gen->tmss_write_offset, &m68k->options->gen, 0, NULL);
+		address >>= 1;
+		uint8_t *base = (uint8_t *)m68k->mem_pointers[chunk->ptr_index];
+		base[address] = value;
+	}
+	return context;
+}
+
+static void *tmss_odd_write_8(uint32_t address, void *context, uint8_t value)
+{
+	m68k_context *m68k = context;
+	genesis_context *gen = m68k->system;
+	if (gen->tmss && (address & 1)) {
+		memmap_chunk const *chunk = find_map_chunk(address + gen->tmss_write_offset, &m68k->options->gen, 0, NULL);
+		address >>= 1;
+		uint8_t *base = (uint8_t *)m68k->mem_pointers[chunk->ptr_index];
+		base[address] = value;
+	}
+	return context;
+}
+
+static void *tmss_even_write_16(uint32_t address, void *context, uint16_t value)
+{
+	m68k_context *m68k = context;
+	genesis_context *gen = m68k->system;
+	if (gen->tmss) {
+		memmap_chunk const *chunk = find_map_chunk(address + gen->tmss_write_offset, &m68k->options->gen, 0, NULL);
+		address >>= 1;
+		uint8_t *base = (uint8_t *)m68k->mem_pointers[chunk->ptr_index];
+		base[address] = value >> 8;
+	}
+	return context;
+}
+
+static void *tmss_even_write_8(uint32_t address, void *context, uint8_t value)
+{
+	m68k_context *m68k = context;
+	genesis_context *gen = m68k->system;
+	if (gen->tmss && !(address & 1)) {
+		memmap_chunk const *chunk = find_map_chunk(address + gen->tmss_write_offset, &m68k->options->gen, 0, NULL);
+		address >>= 1;
+		uint8_t *base = (uint8_t *)m68k->mem_pointers[chunk->ptr_index];
+		base[address] = value;
+	}
+	return context;
 }
 
 static genesis_context *shared_init(uint32_t system_opts, rom_info *rom, uint8_t force_region)
@@ -1269,7 +1758,7 @@
 		{ 0x6000, 0x6100,  0xFFFF, 0, 0, 0,                                  NULL, NULL, NULL, NULL,              z80_write_bank_reg},
 		{ 0x7F00, 0x8000,  0x00FF, 0, 0, 0,                                  NULL, NULL, NULL, z80_vdp_port_read, z80_vdp_port_write}
 	};
-	
+
 	char *m68k_divider = tern_find_path(config, "clocks\0m68k_divider\0", TVAL_PTR).ptrval;
 	if (!m68k_divider) {
 		m68k_divider = "7";
@@ -1278,7 +1767,7 @@
 	if (!MCLKS_PER_68K) {
 		MCLKS_PER_68K = 7;
 	}
-	
+
 	genesis_context *gen = calloc(1, sizeof(genesis_context));
 	gen->header.set_speed_percent = set_speed_percent;
 	gen->header.start_context = start_genesis;
@@ -1300,32 +1789,53 @@
 	gen->header.keyboard_down = keyboard_down;
 	gen->header.keyboard_up = keyboard_up;
 	gen->header.config_updated = config_updated;
+	gen->header.serialize = serialize;
+	gen->header.deserialize = deserialize;
+	gen->header.start_vgm_log = start_vgm_log;
+	gen->header.stop_vgm_log = stop_vgm_log;
 	gen->header.type = SYSTEM_GENESIS;
 	gen->header.info = *rom;
 	set_region(gen, rom, force_region);
-	
-	gen->vdp = init_vdp_context(gen->version_reg & 0x40);
+	tern_node *model = get_model(config, SYSTEM_GENESIS);
+	uint8_t tmss = !strcmp(tern_find_ptr_default(model, "tmss", "off"), "on");
+	if (tmss) {
+		gen->version_reg |= 1;
+	} else {
+		gen->vdp_unlocked = 1;
+	}
+
+	uint8_t max_vsram = !strcmp(tern_find_ptr_default(model, "vsram", "40"), "64");
+	gen->vdp = init_vdp_context(gen->version_reg & 0x40, max_vsram);
 	gen->vdp->system = &gen->header;
 	gen->frame_end = vdp_cycles_to_frame_end(gen->vdp);
 	char * config_cycles = tern_find_path(config, "clocks\0max_cycles\0", TVAL_PTR).ptrval;
 	gen->max_cycles = config_cycles ? atoi(config_cycles) : DEFAULT_SYNC_INTERVAL;
 	gen->int_latency_prev1 = MCLKS_PER_68K * 32;
 	gen->int_latency_prev2 = MCLKS_PER_68K * 16;
-	
+
 	render_set_video_standard((gen->version_reg & HZ50) ? VID_PAL : VID_NTSC);
-	
+	event_system_start(SYSTEM_GENESIS, (gen->version_reg & HZ50) ? VID_PAL : VID_NTSC, rom->name);
+
 	gen->ym = malloc(sizeof(ym2612_context));
+	char *fm = tern_find_ptr_default(model, "fm", "discrete 2612");
+	if (!strcmp(fm + strlen(fm) -4, "3834")) {
+		system_opts |= YM_OPT_3834;
+	}
 	ym_init(gen->ym, gen->master_clock, MCLKS_PER_YM, system_opts);
 
 	gen->psg = malloc(sizeof(psg_context));
 	psg_init(gen->psg, gen->master_clock, MCLKS_PER_PSG);
 
+	set_audio_config(gen);
+
 	z80_map[0].buffer = gen->zram = calloc(1, Z80_RAM_BYTES);
 #ifndef NO_Z80
 	z80_options *z_opts = malloc(sizeof(z80_options));
 	init_z80_opts(z_opts, z80_map, 5, NULL, 0, MCLKS_PER_Z80, 0xFFFF);
 	gen->z80 = init_z80_context(z_opts);
+#ifndef NEW_CORE
 	gen->z80->next_int_pulse = z80_next_int_pulse;
+#endif
 	z80_assert_reset(gen->z80, 0);
 #else
 	gen->z80 = calloc(1, sizeof(z80_context));
@@ -1334,7 +1844,7 @@
 	gen->z80->system = gen;
 	gen->z80->mem_pointers[0] = gen->zram;
 	gen->z80->mem_pointers[1] = gen->z80->mem_pointers[2] = NULL;
-	
+
 	gen->work_ram = calloc(2, RAM_WORDS);
 	if (!strcmp("random", tern_find_path_default(config, "system\0ram_init\0", (tern_val){.ptrval = "zero"}, TVAL_PTR).ptrval))
 	{
@@ -1359,12 +1869,12 @@
 		{
 			write_cram_internal(gen->vdp, i, rand());
 		}
-		for (int i = 0; i < VSRAM_SIZE; i++)
+		for (int i = 0; i < gen->vdp->vsram_size; i++)
 		{
 			gen->vdp->vsram[i] = rand();
 		}
 	}
-	
+
 	return gen;
 }
 
@@ -1375,7 +1885,7 @@
 
 	gen->cart = main_rom;
 	gen->lock_on = lock_on;
-	
+
 	setup_io_devices(config, rom, &gen->io);
 	gen->header.has_keyboard = io_has_keyboard(&gen->io);
 	gen->mapper_type = rom->mapper_type;
@@ -1395,24 +1905,160 @@
 	} else {
 		gen->save_storage = NULL;
 	}
-	
+
+	gen->mapper_start_index = rom->mapper_start_index;
+
+	tern_node *model = get_model(config, SYSTEM_GENESIS);
+	uint8_t tmss = !strcmp(tern_find_ptr_default(model, "tmss", "off"), "on");
+
 	//This must happen before we generate memory access functions in init_m68k_opts
+	uint8_t next_ptr_index = 0;
+	uint32_t tmss_min_alloc = 16 * 1024;
 	for (int i = 0; i < rom->map_chunks; i++)
 	{
 		if (rom->map[i].start == 0xE00000) {
 			rom->map[i].buffer = gen->work_ram;
-			break;
+			if (!tmss) {
+				break;
+			}
+		}
+		if (rom->map[i].flags & MMAP_PTR_IDX && rom->map[i].ptr_index >= next_ptr_index) {
+			next_ptr_index = rom->map[i].ptr_index + 1;
+		}
+		if (rom->map[i].start < 0x400000 && rom->map[i].read_16 != unused_read) {
+			uint32_t highest_offset = (rom->map[i].end & rom->map[i].mask) + 1;
+			if (highest_offset > tmss_min_alloc) {
+				tmss_min_alloc = highest_offset;
+			}
+		}
+	}
+	if (tmss) {
+		char *tmss_path = tern_find_path_default(config, "system\0tmss_path\0", (tern_val){.ptrval = "tmss.md"}, TVAL_PTR).ptrval;
+		uint8_t *buffer = malloc(tmss_min_alloc);
+		uint32_t tmss_size;
+		if (is_absolute_path(tmss_path)) {
+			FILE *f = fopen(tmss_path, "rb");
+			if (!f) {
+				fatal_error("Configured to use a model with TMSS, but failed to load the TMSS ROM from %s\n", tmss_path);
+			}
+			tmss_size = fread(buffer, 1, tmss_min_alloc, f);
+			fclose(f);
+		} else {
+			char *tmp = read_bundled_file(tmss_path, &tmss_size);
+			if (!tmp) {
+				fatal_error("Configured to use a model with TMSS, but failed to load the TMSS ROM from %s\n", tmss_path);
+			}
+			memcpy(buffer, tmp, tmss_size);
+			free(tmp);
+		}
+		for (uint32_t padded = nearest_pow2(tmss_size); tmss_size < padded; tmss_size++)
+		{
+			buffer[tmss_size] = 0xFF;
+		}
+#ifndef BLASTEM_BIG_ENDIAN
+		byteswap_rom(tmss_size, (uint16_t *)buffer);
+#endif
+		//mirror TMSS ROM until we fill up to tmss_min_alloc
+		for (uint32_t dst = tmss_size; dst < tmss_min_alloc; dst += tmss_size)
+		{
+			memcpy(buffer + dst, buffer, dst + tmss_size > tmss_min_alloc ? tmss_min_alloc - dst : tmss_size);
 		}
+		//modify mappings for ROM space to point to the TMSS ROM and fixup flags to allow switching back and forth
+		//WARNING: This code makes some pretty big assumptions about the kinds of map chunks it will encounter
+		for (int i = 0; i < rom->map_chunks; i++)
+		{
+			if (rom->map[i].start < 0x400000 && rom->map[i].read_16 != unused_read) {
+				if (rom->map[i].flags == MMAP_READ) {
+					//Normal ROM
+					rom->map[i].flags |= MMAP_PTR_IDX | MMAP_CODE;
+					rom->map[i].ptr_index = next_ptr_index++;
+					if (rom->map[i].ptr_index >= NUM_MEM_AREAS) {
+						fatal_error("Too many memmap chunks with MMAP_PTR_IDX after TMSS remap\n");
+					}
+					gen->tmss_pointers[rom->map[i].ptr_index] = rom->map[i].buffer;
+					rom->map[i].buffer = buffer + (rom->map[i].start & ~rom->map[i].mask & (tmss_size - 1));
+				} else if (rom->map[i].flags & MMAP_PTR_IDX) {
+					//Sega mapper page or multi-game mapper
+					gen->tmss_pointers[rom->map[i].ptr_index] = rom->map[i].buffer;
+					rom->map[i].buffer = buffer + (rom->map[i].start & ~rom->map[i].mask & (tmss_size - 1));
+					if (rom->map[i].write_16) {
+						if (!gen->tmss_write_16) {
+							gen->tmss_write_16 = rom->map[i].write_16;
+							gen->tmss_write_8 = rom->map[i].write_8;
+							rom->map[i].write_16 = tmss_rom_write_16;
+							rom->map[i].write_8 = tmss_rom_write_8;
+						} else if (gen->tmss_write_16 == rom->map[i].write_16) {
+							rom->map[i].write_16 = tmss_rom_write_16;
+							rom->map[i].write_8 = tmss_rom_write_8;
+						} else {
+							warning("Chunk starting at %X has a write function, but we've already stored a different one for TMSS remap\n", rom->map[i].start);
+						}
+					}
+				} else if ((rom->map[i].flags & (MMAP_READ | MMAP_WRITE)) == (MMAP_READ | MMAP_WRITE)) {
+					//RAM or SRAM
+					rom->map[i].flags |= MMAP_PTR_IDX;
+					rom->map[i].ptr_index = next_ptr_index++;
+					gen->tmss_pointers[rom->map[i].ptr_index] = rom->map[i].buffer;
+					rom->map[i].buffer = buffer + (rom->map[i].start & ~rom->map[i].mask & (tmss_size - 1));
+					if (!gen->tmss_write_offset || gen->tmss_write_offset == rom->map[i].start) {
+						gen->tmss_write_offset = rom->map[i].start;
+						rom->map[i].flags &= ~MMAP_WRITE;
+						if (rom->map[i].flags & MMAP_ONLY_ODD) {
+							rom->map[i].write_16 = tmss_odd_write_16;
+							rom->map[i].write_8 = tmss_odd_write_8;
+						} else if (rom->map[i].flags & MMAP_ONLY_EVEN) {
+							rom->map[i].write_16 = tmss_even_write_16;
+							rom->map[i].write_8 = tmss_even_write_8;
+						} else {
+							rom->map[i].write_16 = tmss_word_write_16;
+							rom->map[i].write_8 = tmss_word_write_8;
+						}
+					} else {
+						warning("Could not remap writes for chunk starting at %X for TMSS because write_offset is %X\n", rom->map[i].start, gen->tmss_write_offset);
+					}
+				} else if (rom->map[i].flags & MMAP_READ_CODE) {
+					//NOR flash
+					rom->map[i].flags |= MMAP_PTR_IDX;
+					rom->map[i].ptr_index = next_ptr_index++;
+					if (rom->map[i].ptr_index >= NUM_MEM_AREAS) {
+						fatal_error("Too many memmap chunks with MMAP_PTR_IDX after TMSS remap\n");
+					}
+					gen->tmss_pointers[rom->map[i].ptr_index] = rom->map[i].buffer;
+					rom->map[i].buffer = buffer + (rom->map[i].start & ~rom->map[i].mask & (tmss_size - 1));
+					if (!gen->tmss_write_16) {
+						gen->tmss_write_16 = rom->map[i].write_16;
+						gen->tmss_write_8 = rom->map[i].write_8;
+						gen->tmss_read_16 = rom->map[i].read_16;
+						gen->tmss_read_8 = rom->map[i].read_8;
+						rom->map[i].write_16 = tmss_rom_write_16;
+						rom->map[i].write_8 = tmss_rom_write_8;
+						rom->map[i].read_16 = tmss_rom_read_16;
+						rom->map[i].read_8 = tmss_rom_read_8;
+					} else if (gen->tmss_write_16 == rom->map[i].write_16) {
+						rom->map[i].write_16 = tmss_rom_write_16;
+						rom->map[i].write_8 = tmss_rom_write_8;
+						rom->map[i].read_16 = tmss_rom_read_16;
+						rom->map[i].read_8 = tmss_rom_read_8;
+					} else {
+						warning("Chunk starting at %X has a write function, but we've already stored a different one for TMSS remap\n", rom->map[i].start);
+					}
+				} else {
+					warning("Didn't remap chunk starting at %X for TMSS because it has flags %X\n", rom->map[i].start, rom->map[i].flags);
+				}
+			}
+		}
+		gen->tmss_buffer = buffer;
 	}
 
 	m68k_options *opts = malloc(sizeof(m68k_options));
 	init_m68k_opts(opts, rom->map, rom->map_chunks, MCLKS_PER_68K);
-	//TODO: make this configurable
-	opts->gen.flags |= M68K_OPT_BROKEN_READ_MODIFY;
+	if (!strcmp(tern_find_ptr_default(model, "tas", "broken"), "broken")) {
+		opts->gen.flags |= M68K_OPT_BROKEN_READ_MODIFY;
+	}
 	gen->m68k = init_68k_context(opts, NULL);
 	gen->m68k->system = gen;
 	opts->address_log = (system_opts & OPT_ADDRESS_LOG) ? fopen("address.log", "w") : NULL;
-	
+
 	//This must happen after the 68K context has been allocated
 	for (int i = 0; i < rom->map_chunks; i++)
 	{
@@ -1420,7 +2066,7 @@
 			gen->m68k->mem_pointers[rom->map[i].ptr_index] = rom->map[i].buffer;
 		}
 	}
-	
+
 	if (gen->mapper_type == MAPPER_SEGA) {
 		//initialize bank registers
 		for (int i = 1; i < sizeof(gen->bank_regs); i++)
@@ -1428,21 +2074,25 @@
 			gen->bank_regs[i] = i;
 		}
 	}
+	gen->reset_cycle = CYCLE_NEVER;
 
 	return gen;
 }
 
-static memmap_chunk base_map[] = {
-	{0xE00000, 0x1000000, 0xFFFF,   0, 0, MMAP_READ | MMAP_WRITE | MMAP_CODE, NULL,
-			   NULL,          NULL,         NULL,            NULL},
-	{0xC00000, 0xE00000,  0x1FFFFF, 0, 0, 0,                                  NULL,
-			   (read_16_fun)vdp_port_read,  (write_16_fun)vdp_port_write,
-			   (read_8_fun)vdp_port_read_b, (write_8_fun)vdp_port_write_b},
-	{0xA00000, 0xA12000,  0x1FFFF,  0, 0, 0,                                  NULL,
-			   (read_16_fun)io_read_w,      (write_16_fun)io_write_w,
-			   (read_8_fun)io_read,         (write_8_fun)io_write}
-};
-const size_t base_chunks = sizeof(base_map)/sizeof(*base_map); 
+	static memmap_chunk base_map[] = {
+		{0xE00000, 0x1000000, 0xFFFF,   0, 0, MMAP_READ | MMAP_WRITE | MMAP_CODE, NULL,
+		           NULL,          NULL,         NULL,            NULL},
+		{0xC00000, 0xE00000,  0x1FFFFF, 0, 0, 0,                                  NULL,
+		           (read_16_fun)vdp_port_read,  (write_16_fun)vdp_port_write,
+		           (read_8_fun)vdp_port_read_b, (write_8_fun)vdp_port_write_b},
+		{0xA00000, 0xA12000,  0x1FFFF,  0, 0, 0,                                  NULL,
+		           (read_16_fun)io_read_w,      (write_16_fun)io_write_w,
+		           (read_8_fun)io_read,         (write_8_fun)io_write},
+		{0x000000, 0xFFFFFF, 0xFFFFFF, 0, 0, 0,                                   NULL,
+		           (read_16_fun)unused_read,    (write_16_fun)unused_write,
+		           (read_8_fun)unused_read_b,   (write_8_fun)unused_write_b}
+	};
+const size_t base_chunks = sizeof(base_map)/sizeof(*base_map);
 
 genesis_context *alloc_config_genesis(void *rom, uint32_t rom_size, void *lock_on, uint32_t lock_on_size, uint32_t ym_opts, uint8_t force_region)
 {
@@ -1471,17 +2121,17 @@
 {
 	tern_node *rom_db = get_rom_db();
 	rom_info info = configure_rom(rom_db, media->buffer, media->size, NULL, 0, base_map, base_chunks);
-	
+
 	segacd_context *cd = alloc_configure_segacd(media, system_opts, force_region, &info);
 	genesis_context *gen = shared_init(system_opts, &info, force_region);
 	gen->cart = gen->lock_on = NULL;
 	gen->save_storage = NULL;
 	gen->save_type = SAVE_NONE;
 	gen->version_reg &= ~NO_DISK;
-	
+
 	gen->expansion = cd;
 	setup_io_devices(config, &info, &gen->io);
-	
+
 	uint32_t cd_chunks;
 	memmap_chunk *cd_map = segacd_main_cpu_map(gen->expansion, &cd_chunks);
 	memmap_chunk *map = malloc(sizeof(memmap_chunk) * (cd_chunks + base_chunks));
@@ -1489,7 +2139,7 @@
 	memcpy(map + cd_chunks, base_map, sizeof(memmap_chunk) * base_chunks);
 	map[cd_chunks].buffer = gen->work_ram;
 	uint32_t num_chunks = cd_chunks + base_chunks;
-	
+
 	m68k_options *opts = malloc(sizeof(m68k_options));
 	init_m68k_opts(opts, map, num_chunks, MCLKS_PER_68K);
 	//TODO: make this configurable
@@ -1497,7 +2147,7 @@
 	gen->m68k = init_68k_context(opts, NULL);
 	gen->m68k->system = gen;
 	opts->address_log = (system_opts & OPT_ADDRESS_LOG) ? fopen("address.log", "w") : NULL;
-	
+
 	//This must happen after the 68K context has been allocated
 	for (int i = 0; i < num_chunks; i++)
 	{
--- a/genesis.h	Sat Jan 05 00:58:08 2019 -0800
+++ b/genesis.h	Sat Jan 15 13:15:21 2022 -0800
@@ -9,7 +9,11 @@
 #include <stdint.h>
 #include "system.h"
 #include "m68k_core.h"
+#ifdef NEW_CORE
+#include "z80.h"
+#else
 #include "z80_to_x86.h"
+#endif
 #include "ym2612.h"
 #include "vdp.h"
 #include "psg.h"
@@ -36,6 +40,14 @@
 	uint8_t         *save_storage;
 	void            *mapper_temp;
 	eeprom_map      *eeprom_map;
+	write_16_fun    tmss_write_16;
+	write_8_fun     tmss_write_8;
+	read_16_fun     tmss_read_16;
+	read_8_fun      tmss_read_8;
+	uint16_t        *tmss_pointers[NUM_MEM_AREAS];
+	uint8_t         *tmss_buffer;
+	uint8_t         *serialize_tmp;
+	size_t          serialize_size;
 	uint32_t        num_eeprom;
 	uint32_t        save_size;
 	uint32_t        save_ram_mask;
@@ -46,14 +58,22 @@
 	uint32_t        int_latency_prev1;
 	uint32_t        int_latency_prev2;
 	uint32_t        reset_cycle;
-	uint8_t         bank_regs[8];
+	uint32_t        last_frame;
+	uint32_t        last_flush_cycle;
+	uint32_t        soft_flush_cycles;
+	uint32_t        tmss_write_offset;
+	uint16_t        z80_bank_reg;
+	uint16_t        tmss_lock[2];
 	uint16_t        mapper_start_index;
 	uint8_t         mapper_type;
+	uint8_t         bank_regs[9];
 	uint8_t         save_type;
 	sega_io         io;
 	uint8_t         version_reg;
 	uint8_t         bus_busy;
 	uint8_t         reset_requested;
+	uint8_t         tmss;
+	uint8_t         vdp_unlocked;
 	eeprom_state    eeprom;
 	nor_state       nor;
 };
@@ -64,7 +84,7 @@
 m68k_context * sync_components(m68k_context *context, uint32_t address);
 genesis_context *alloc_config_genesis(void *rom, uint32_t rom_size, void *lock_on, uint32_t lock_on_size, uint32_t system_opts, uint8_t force_region);
 genesis_context *alloc_config_genesis_cdboot(system_media *media, uint32_t system_opts, uint8_t force_region);
-void genesis_serialize(genesis_context *gen, serialize_buffer *buf, uint32_t m68k_pc);
+void genesis_serialize(genesis_context *gen, serialize_buffer *buf, uint32_t m68k_pc, uint8_t all);
 void genesis_deserialize(deserialize_buffer *buf, genesis_context *gen);
 
 #endif //GENESIS_H_
--- a/gentests.py	Sat Jan 05 00:58:08 2019 -0800
+++ b/gentests.py	Sat Jan 15 13:15:21 2022 -0800
@@ -136,7 +136,10 @@
 			num = already.get('label', 0)+1
 			already['label'] = num
 			if (already[str(self.index)] + self.disp) & 1:
-				self.disp += 1
+				if self.disp > 0:
+					self.disp -= 1
+				else:
+					self.disp += 1
 			address = 'lbl_' + str(num) + ' + 2 + ' + str(self.disp) + ' + ' + str(index)
 		else:
 			if self.base == self.index:
--- a/gst.c	Sat Jan 05 00:58:08 2019 -0800
+++ b/gst.c	Sat Jan 15 13:15:21 2022 -0800
@@ -144,6 +144,7 @@
 	}
 	uint8_t * curpos = regdata;
 	uint8_t f = *(curpos++);
+#ifndef NEW_CORE
 	context->flags[ZF_C] = f & 1;
 	f >>= 1;
 	context->flags[ZF_N] = f & 1;
@@ -200,6 +201,7 @@
 		context->mem_pointers[1] = NULL;
 	}
 	context->bank_reg = bank >> 15;
+#endif
 	uint8_t buffer[Z80_RAM_BYTES];
 	fseek(gstfile, GST_Z80_RAM, SEEK_SET);
 	if(fread(buffer, 1, sizeof(buffer), gstfile) != (8*1024)) {
@@ -210,11 +212,15 @@
 	{
 		if (context->mem_pointers[0][i] != buffer[i]) {
 			context->mem_pointers[0][i] = buffer[i];
+#ifndef NEW_CORE
 			z80_handle_code_write(i, context);
+#endif
 		}
 	}
+#ifndef NEW_CORE
 	context->native_pc = NULL;
 	context->extra_pc = NULL;
+#endif
 	return 1;
 }
 
@@ -238,11 +244,11 @@
 		uint16_t value;
 		write_cram_internal(context, i, (tmp_buf[i*2+1] << 8) | tmp_buf[i*2]);
 	}
-	if (fread(tmp_buf, 2, VSRAM_SIZE, state_file) != VSRAM_SIZE) {
+	if (fread(tmp_buf, 2, MIN_VSRAM_SIZE, state_file) != MIN_VSRAM_SIZE) {
 		fputs("Failed to read VSRAM from savestate\n", stderr);
 		return 0;
 	}
-	for (int i = 0; i < VSRAM_SIZE; i++) {
+	for (int i = 0; i < MIN_VSRAM_SIZE; i++) {
 		context->vsram[i] = (tmp_buf[i*2+1] << 8) | tmp_buf[i*2];
 	}
 	fseek(state_file, GST_VDP_MEM, SEEK_SET);
@@ -274,12 +280,12 @@
 		fputs("Error writing CRAM to savestate\n", stderr);
 		return 0;
 	}
-	for (int i = 0; i < VSRAM_SIZE; i++)
+	for (int i = 0; i < MIN_VSRAM_SIZE; i++)
 	{
 		tmp_buf[i*2] = context->vsram[i];
 		tmp_buf[i*2+1] = context->vsram[i] >> 8;
 	}
-	if (fwrite(tmp_buf, 2, VSRAM_SIZE, outfile) != VSRAM_SIZE) {
+	if (fwrite(tmp_buf, 2, MIN_VSRAM_SIZE, outfile) != MIN_VSRAM_SIZE) {
 		fputs("Error writing VSRAM to savestate\n", stderr);
 		return 0;
 	}
@@ -296,6 +302,7 @@
 	uint8_t regdata[GST_Z80_REG_SIZE];
 	uint8_t * curpos = regdata;
 	memset(regdata, 0, sizeof(regdata));
+#ifndef NEW_CORE
 	uint8_t f = context->flags[ZF_S];
 	f <<= 1;
 	f |= context->flags[ZF_Z] ;
@@ -348,6 +355,7 @@
 	curpos += 3;
 	uint32_t bank = context->bank_reg << 15;
 	write_le_32(curpos, bank);
+#endif
 	fseek(gstfile, GST_Z80_REGS, SEEK_SET);
 	if (fwrite(regdata, 1, sizeof(regdata), gstfile) != sizeof(regdata)) {
 		return 0;
Binary file images/genesis_6b.png has changed
Binary file images/wiiu.png has changed
--- a/img2tiles.py	Sat Jan 05 00:58:08 2019 -0800
+++ b/img2tiles.py	Sat Jan 15 13:15:21 2022 -0800
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 from PIL import Image
 
 def gchannel(Val):
@@ -68,30 +68,30 @@
 	glist = [(gencolors[color][0] * gencolors[color][1], color) for color in gencolors]
 	glist.sort()
 	glist.reverse()
-	
+
 	return glist
 
 def make_palette(im, trans_thresh, max_global, max_line):
 	pixels = im.getdata()
 	(width, height) = im.size
-	colors = get_color_info(im, pixels, xrange(0, height * width), trans_thresh)
-	print len(colors), 'distinct 9-bit colors in image'
+	colors = get_color_info(im, pixels, range(0, height * width), trans_thresh)
+	print(len(colors), 'distinct 9-bit colors in image')
 	glob_pal = {}
-	print 'Static Palette:'
+	print('Static Palette:')
 	while len(glob_pal) < max_global and len(colors):
 		idx = len(glob_pal)
 		(count, color) = colors[0]
-		print str(idx) + ':', color
+		print(str(idx) + ':', color)
 		glob_pal[color] = idx
-		colors = get_color_info_both(im, pixels, xrange(0, height * width), trans_thresh, glob_pal)
+		colors = get_color_info_both(im, pixels, range(0, height * width), trans_thresh, glob_pal)
 	line_pals = []
 	if max_global < len(colors):
-		for line in xrange(0, height):
+		for line in range(0, height):
 			linestart = line * width
 			if len(glob_pal):
-				linecolors = get_color_info_both(im, pixels, xrange(linestart, linestart+width), trans_thresh, glob_pal)
+				linecolors = get_color_info_both(im, pixels, range(linestart, linestart+width), trans_thresh, glob_pal)
 			else:
-				linecolors = get_color_info(im, pixels, xrange(linestart, linestart+width), trans_thresh)
+				linecolors = get_color_info(im, pixels, range(linestart, linestart+width), trans_thresh)
 			line_pal = {}
 			while len(line_pal) < max_line and len(linecolors):
 				(score, color) = linecolors[0]
@@ -100,8 +100,8 @@
 					combo = dict(glob_pal)
 					for color in line_pal:
 						combo[color] = line_pal[color]
-					linecolors = get_color_info_both(im, pixels, xrange(linestart, linestart+width), trans_thresh, combo)
-			#for idx in xrange(0, min(max_line, len(linecolors))):
+					linecolors = get_color_info_both(im, pixels, range(linestart, linestart+width), trans_thresh, combo)
+			#for idx in range(0, min(max_line, len(linecolors))):
 			#	(count, color) = linecolors[idx]
 			#	line_pal[color] = idx + max_global
 			line_pals.append(line_pal)
@@ -116,7 +116,7 @@
 	bestdist = color_dist((0,0,0), (15, 15, 15))
 	bestpalidx = 0
 	bestcolor = (0,0,0)
-	for i in xrange(0, len(pals)):
+	for i in range(0, len(pals)):
 		pal = pals[i]
 		for cur in pal:
 			curdist = color_dist(gpixel, cur)
@@ -142,7 +142,7 @@
 			x = 0
 			y += 1
 			if width % 8 and not chunky:
-				for i in xrange(0, 8-(width%8)):
+				for i in range(0, 8-(width%8)):
 					gpixels.append(0)
 		gpixel = get_gcolor(im, trans_thresh, color=pixel)
 		if type(gpixel) == tuple:
@@ -158,12 +158,12 @@
 			gpixels.append(gpixel)
 		x += 1
 	if width % 8 and not chunky:
-		for i in xrange(0, 8-(width%8)):
+		for i in range(0, 8-(width%8)):
 			gpixels.append(0)
 		width += 8-(width%8)
 	if height % 8 and not chunky:
-		for y in xrange(0, 8-(height%8)):
-			for x in xrange(0, width):
+		for y in range(0, 8-(height%8)):
+			for x in range(0, width):
 				gpixels.append(0)
 		height += 8-(height%8)
 
@@ -183,37 +183,37 @@
 		for pixel in pixels:
 			b.append(pixel)
 	else:
-		cwidth = width/8
-		cheight = height/tile_height
-		words = len(pixels)/4
+		cwidth = width//8
+		cheight = height//tile_height
+		words = len(pixels)//4
 		if not raw:
 			appendword(b, words)
 			appendword(b, cwidth)
 			appendword(b, cheight)
 
 		if sprite_order:
-			for cx in xrange(0, cwidth):
+			for cx in range(0, cwidth):
 				xstart = cx * 8
-				for cy in xrange(0, cheight):
+				for cy in range(0, cheight):
 					startoff = cy*tile_height*width + xstart
-					for row in xrange(0, tile_height):
+					for row in range(0, tile_height):
 						rowoff = startoff + row*width
-						for bytecol in xrange(0, 4):
+						for bytecol in range(0, 4):
 							boff = bytecol * 2 + rowoff
-							#print 'boff:', boff, 'len(pixels)', len(pixels), 'cx', cx, 'cy', cy, 'cwidth', cwidth, 'cheight', cheight
-							#print 'pixels[boff]:', pixels[boff]
+							#print('boff:', boff, 'len(pixels)', len(pixels), 'cx', cx, 'cy', cy, 'cwidth', cwidth, 'cheight', cheight)
+							#print('pixels[boff]:', pixels[boff])
 							b.append(pixels[boff] << 4 | pixels[boff+1])
 		else:
-			for cy in xrange(0, cheight):
+			for cy in range(0, cheight):
 				ystart = cy*tile_height*width
-				for cx in xrange(0, cwidth):
+				for cx in range(0, cwidth):
 					startoff = (cx*8) + ystart
-					for row in xrange(0, tile_height):
+					for row in range(0, tile_height):
 						rowoff = startoff + row*width
-						for bytecol in xrange(0, 4):
+						for bytecol in range(0, 4):
 							boff = bytecol * 2 + rowoff
-							#print 'boff:', boff, 'len(pixels)', len(pixels), 'cx', cx, 'cy', cy, 'cwidth', cwidth, 'cheight', cheight
-							#print 'pixels[boff]:', pixels[boff]
+							#print('boff:', boff, 'len(pixels)', len(pixels), 'cx', cx, 'cy', cy, 'cwidth', cwidth, 'cheight', cheight)
+							#print('pixels[boff]:', pixels[boff])
 							b.append(pixels[boff] << 4 | pixels[boff+1])
 	return b
 
@@ -247,7 +247,7 @@
 	options = {}
 	tile_height = 8
 	sprite_order = False
-	for i in xrange(1, len(argv)):
+	for i in range(1, len(argv)):
 		if argv[i].startswith('-'):
 			if argv[i] == '-r':
 				raw = True
@@ -262,7 +262,7 @@
 			elif argv[i] == '-s' or argv[i] == '--spec':
 				expect_option = 'specfile'
 			else:
-				print 'Unrecognized switch', argv[i]
+				print('Unrecognized switch', argv[i])
 				return
 		elif not expect_option is None:
 			options[expect_option] = argv[i]
@@ -270,7 +270,7 @@
 		else:
 			posargs.append(argv[i])
 	if len(posargs) < 2 and not ('specfile' in options and len(posargs) >= 1):
-		print "Usage: img2tiles.py [OPTIONS] infile outfile [STATIC_COLORS [DYNAMIC_COLORS]]"
+		print("Usage: img2tiles.py [OPTIONS] infile outfile [STATIC_COLORS [DYNAMIC_COLORS]]")
 		return
 	if 'specfile' in options:
 		props = open(options['specfile']).read().strip().split(',')
@@ -300,7 +300,7 @@
 		if len(posargs) > 3:
 			dynamic_colors = int(posargs[3])
 	if dynamic_colors + static_colors > 16:
-		print "No more than 16 combined dynamic and static colors are allowed"
+		print("No more than 16 combined dynamic and static colors are allowed")
 		return
 	im = Image.open(fname)
 	pal = make_palette(im, threshold, static_colors, dynamic_colors)
--- a/io.c	Sat Jan 05 00:58:08 2019 -0800
+++ b/io.c	Sat Jan 15 13:15:21 2022 -0800
@@ -39,7 +39,9 @@
 	"EA 4-way Play cable A",
 	"EA 4-way Play cable B",
 	"Sega Parallel Transfer Board",
-	"Generic Device"
+	"Generic Device",
+	"Generic Serial",
+	"Heartbeat Personal Trainer"
 };
 
 #define GAMEPAD_TH0 0
@@ -58,6 +60,13 @@
 	IO_READ
 };
 
+enum {
+	HBPT_NEED_INIT,
+	HBPT_IDLE,
+	HBPT_CMD_PAYLOAD,
+	HBPT_REPLY
+};
+
 typedef struct {
 	uint8_t states[2], value;
 } gp_button_def;
@@ -86,6 +95,9 @@
 		if (port->device_type < IO_MOUSE && port->device.pad.gamepad_num == gamepad_num) {
 			return port;
 		}
+		if (port->device_type == IO_HEARTBEAT_TRAINER && port->device.heartbeat_trainer.device_num == gamepad_num) {
+			return port;
+		} 
 	}
 	return NULL;
 }
@@ -210,8 +222,20 @@
 	return find_keyboard(io) != NULL;
 }
 
+static void set_serial_clock(io_port *port)
+{
+	switch(port->serial_ctrl >> 6)
+	{
+	case 0: port->serial_divider = 11186; break; //4800 bps
+	case 1: port->serial_divider = 22372; break; //2400 bps
+	case 2: port->serial_divider = 44744; break; //1200 bps
+	case 3: port->serial_divider = 178976; break; //300 bps
+	}
+}
+
 void process_device(char * device_type, io_port * port)
 {
+	set_serial_clock(port);
 	//assuming that the io_port struct has been zeroed if this is the first time this has been called
 	if (!device_type)
 	{
@@ -219,8 +243,7 @@
 	}
 
 	const int gamepad_len = strlen("gamepad");
-	const int mouse_len = strlen("mouse");
-	if (!strncmp(device_type, "gamepad", gamepad_len))
+	if (startswith(device_type, "gamepad"))
 	{
 		if (
 			(device_type[gamepad_len] != '3' && device_type[gamepad_len] != '6' && device_type[gamepad_len] != '2')
@@ -236,10 +259,14 @@
 			port->device_type = IO_GAMEPAD6;
 		}
 		port->device.pad.gamepad_num = device_type[gamepad_len+2] - '0';
-	} else if(!strncmp(device_type, "mouse", mouse_len)) {
+	} else if(startswith(device_type, "heartbeat_trainer.")) {
+		port->device_type = IO_HEARTBEAT_TRAINER;
+		port->device.heartbeat_trainer.nv_memory = NULL;
+		port->device.heartbeat_trainer.device_num = device_type[strlen("heartbeat_trainer.")] - '0';
+	} else if(startswith(device_type, "mouse")) {
 		if (port->device_type != IO_MOUSE) {
 			port->device_type = IO_MOUSE;
-			port->device.mouse.mouse_num = device_type[mouse_len+1] - '0';
+			port->device.mouse.mouse_num = device_type[strlen("mouse")+1] - '0';
 			port->device.mouse.last_read_x = 0;
 			port->device.mouse.last_read_y = 0;
 			port->device.mouse.cur_x = 0;
@@ -273,6 +300,12 @@
 			port->device.stream.data_fd = -1;
 			port->device.stream.listen_fd = -1;
 		}
+	} else if(!strcmp(device_type, "serial")) {
+		if (port->device_type != IO_GENERIC_SERIAL) {
+			port->device_type = IO_GENERIC_SERIAL;
+			port->device.stream.data_fd = -1;
+			port->device.stream.listen_fd = -1;
+		}
 	}
 }
 
@@ -301,9 +334,9 @@
 {
 	io_port * ports = io->ports;
 	tern_node *io_nodes = tern_find_path(config, "io\0devices\0", TVAL_NODE).ptrval;
-	char * io_1 = rom->port1_override ? rom->port1_override : io_nodes ? tern_find_ptr(io_nodes, "1") : NULL;
-	char * io_2 = rom->port2_override ? rom->port2_override : io_nodes ? tern_find_ptr(io_nodes, "2") : NULL;
-	char * io_ext = rom->ext_override ? rom->ext_override : io_nodes ? tern_find_ptr(io_nodes, "ext") : NULL;
+	char * io_1 = rom->port1_override ? rom->port1_override : tern_find_ptr_default(io_nodes, "1", "gamepad6.1");
+	char * io_2 = rom->port2_override ? rom->port2_override : tern_find_ptr_default(io_nodes, "2", "gamepad6.2");
+	char * io_ext = rom->ext_override ? rom->ext_override : tern_find_ptr(io_nodes, "ext");
 
 	process_device(io_1, ports);
 	process_device(io_2, ports+1);
@@ -336,7 +369,7 @@
 				warning("IO port %s is configured to use the sega parallel board, but no paralell_pipe is set!\n", io_name(i));
 				ports[i].device_type = IO_NONE;
 			} else {
-				printf("IO port: %s connected to device '%s' with pipe name: %s\n", io_name(i), device_type_names[ports[i].device_type], pipe_name);
+				debug_message("IO port: %s connected to device '%s' with pipe name: %s\n", io_name(i), device_type_names[ports[i].device_type], pipe_name);
 				if (!strcmp("stdin", pipe_name))
 				{
 					ports[i].device.stream.data_fd = STDIN_FILENO;
@@ -355,14 +388,14 @@
 					}
 				}
 			}
-		} else if (ports[i].device_type == IO_GENERIC && ports[i].device.stream.data_fd == -1) {
+		} else if (ports[i].device_type == IO_GENERIC || ports[i].device_type == IO_GENERIC_SERIAL && ports[i].device.stream.data_fd == -1) {
 			char *sock_name = tern_find_path(config, "io\0socket\0", TVAL_PTR).ptrval;
 			if (!sock_name)
 			{
 				warning("IO port %s is configured to use generic IO, but no socket is set!\n", io_name(i));
 				ports[i].device_type = IO_NONE;
 			} else {
-				printf("IO port: %s connected to device '%s' with socket name: %s\n", io_name(i), device_type_names[ports[i].device_type], sock_name);
+				debug_message("IO port: %s connected to device '%s' with socket name: %s\n", io_name(i), device_type_names[ports[i].device_type], sock_name);
 				ports[i].device.stream.data_fd = -1;
 				ports[i].device.stream.listen_fd = socket(AF_UNIX, SOCK_STREAM, 0);
 				size_t pathlen = strlen(sock_name);
@@ -392,9 +425,33 @@
 		} else
 #endif
 		if (ports[i].device_type == IO_GAMEPAD3 || ports[i].device_type == IO_GAMEPAD6 || ports[i].device_type == IO_GAMEPAD2) {
-			printf("IO port %s connected to gamepad #%d with type '%s'\n", io_name(i), ports[i].device.pad.gamepad_num + 1, device_type_names[ports[i].device_type]);
+			debug_message("IO port %s connected to gamepad #%d with type '%s'\n", io_name(i), ports[i].device.pad.gamepad_num, device_type_names[ports[i].device_type]);
+		} else if (ports[i].device_type == IO_HEARTBEAT_TRAINER) {
+			debug_message("IO port %s connected to Heartbeat Personal Trainer #%d\n", io_name(i), ports[i].device.heartbeat_trainer.device_num);
+			if (rom->save_type == SAVE_HBPT) {
+				ports[i].device.heartbeat_trainer.nv_memory = rom->save_buffer;
+				uint32_t page_size = 16;
+				for (; page_size < 128; page_size *= 2)
+				{
+					if (rom->save_size / page_size < 256) {
+						break;
+					}
+				}
+				ports[i].device.heartbeat_trainer.nv_page_size = page_size;
+				uint32_t num_pages = rom->save_size / page_size;
+				ports[i].device.heartbeat_trainer.nv_pages = num_pages < 256 ? num_pages : 255;
+			} else {
+				ports[i].device.heartbeat_trainer.nv_page_size = 16;
+				ports[i].device.heartbeat_trainer.nv_pages = 32;
+				size_t bufsize = 
+					ports[i].device.heartbeat_trainer.nv_page_size * ports[i].device.heartbeat_trainer.nv_pages
+					+ 5 + 8;
+				ports[i].device.heartbeat_trainer.nv_memory = malloc(bufsize);
+				memset(ports[i].device.heartbeat_trainer.nv_memory, 0xFF, bufsize);
+			}
+			ports[i].device.heartbeat_trainer.state = HBPT_NEED_INIT;
 		} else {
-			printf("IO port %s connected to device '%s'\n", io_name(i), device_type_names[ports[i].device_type]);
+			debug_message("IO port %s connected to device '%s'\n", io_name(i), device_type_names[ports[i].device_type]);
 		}
 	}
 }
@@ -428,7 +485,6 @@
 	}
 }
 
-uint32_t last_poll_cycle;
 void io_adjust_cycles(io_port * port, uint32_t current_cycle, uint32_t deduction)
 {
 	/*uint8_t control = pad->control | 0x80;
@@ -460,25 +516,80 @@
 			}
 		}
 	}
-	if (last_poll_cycle >= deduction) {
-		last_poll_cycle -= deduction;
+	if (port->transmit_end >= deduction) {
+		port->transmit_end -= deduction;
 	} else {
-		last_poll_cycle = 0;
+		port->transmit_end = 0;
+	}
+	if (port->receive_end >= deduction) {
+		port->receive_end -= deduction;
+	} else {
+		port->receive_end = 0;
+	}
+	if (port->last_poll_cycle >= deduction) {
+		port->last_poll_cycle -= deduction;
+	} else {
+		port->last_poll_cycle = 0;
 	}
 }
 
 #ifndef _WIN32
-static void wait_for_connection(io_port * port)
+static void wait_for_connection(io_port *port)
 {
 	if (port->device.stream.data_fd == -1)
 	{
-		puts("Waiting for socket connection...");
+		debug_message("Waiting for socket connection...\n");
 		port->device.stream.data_fd = accept(port->device.stream.listen_fd, NULL, NULL);
 		fcntl(port->device.stream.data_fd, F_SETFL, O_NONBLOCK | O_RDWR);
 	}
 }
 
-static void service_pipe(io_port * port)
+static void poll_for_connection(io_port *port)
+{
+	if (port->device.stream.data_fd == -1)
+	{
+		fcntl(port->device.stream.listen_fd, F_SETFL, O_NONBLOCK | O_RDWR);
+		port->device.stream.data_fd = accept(port->device.stream.listen_fd, NULL, NULL);
+		fcntl(port->device.stream.listen_fd, F_SETFL, O_RDWR);
+		if (port->device.stream.data_fd != -1) {
+			fcntl(port->device.stream.data_fd, F_SETFL, O_NONBLOCK | O_RDWR);
+		}
+	}
+}
+
+static void write_serial_byte(io_port *port)
+{
+	fcntl(port->device.stream.data_fd, F_SETFL, O_RDWR);
+	for (int sent = 0; sent != sizeof(port->serial_transmitting);)
+	{
+		sent = send(port->device.stream.data_fd, &port->serial_transmitting, sizeof(port->serial_transmitting), 0);
+		if (sent < 0) {
+			close(port->device.stream.data_fd);
+			port->device.stream.data_fd = -1;
+			wait_for_connection(port);
+			fcntl(port->device.stream.data_fd, F_SETFL, O_RDWR);
+		}
+	}
+	fcntl(port->device.stream.data_fd, F_SETFL, O_NONBLOCK | O_RDWR);
+}
+
+static void read_serial_byte(io_port *port)
+{
+	poll_for_connection(port);
+	if (port->device.stream.data_fd == -1) {
+		return;
+	}
+	int read = recv(port->device.stream.data_fd, &port->serial_receiving, sizeof(port->serial_receiving), 0);
+	if (read < 0 && errno != EAGAIN && errno != EWOULDBLOCK) {
+		close(port->device.stream.data_fd);
+		port->device.stream.data_fd = -1;
+	}
+	if (read > 0) {
+		port->receive_end = port->serial_cycle + 10 * port->serial_divider;
+	}
+}
+
+static void service_pipe(io_port *port)
 {
 	uint8_t value;
 	int numRead = read(port->device.stream.data_fd, &value, sizeof(value));
@@ -569,6 +680,257 @@
 }
 #endif
 
+enum {
+	HBPT_UNKNOWN1 = 1,
+	HBPT_POLL,
+	HBPT_READ_PAGE = 5,
+	HBPT_WRITE_PAGE,
+	HBPT_READ_RTC,
+	HBPT_SET_RTC,
+	HBPT_GET_STATUS,
+	HBPT_ERASE_NVMEM,
+	HBPT_NVMEM_PARAMS,
+	HBPT_INIT
+};
+
+static void start_reply(io_port *port, uint8_t bytes, const uint8_t *src)
+{
+	port->device.heartbeat_trainer.remaining_bytes = bytes;
+	port->device.heartbeat_trainer.state = HBPT_REPLY;
+	port->device.heartbeat_trainer.cur_buffer = (uint8_t *)src;
+}
+
+static void simple_reply(io_port *port, uint8_t value)
+{
+	port->device.heartbeat_trainer.param = value;
+	start_reply(port, 1, &port->device.heartbeat_trainer.param);
+}
+
+static void expect_payload(io_port *port, uint8_t bytes, uint8_t *dst)
+{
+	port->device.heartbeat_trainer.remaining_bytes = bytes;
+	port->device.heartbeat_trainer.state = HBPT_CMD_PAYLOAD;
+	port->device.heartbeat_trainer.cur_buffer = dst;
+}
+
+void hbpt_check_init(io_port *port)
+{
+	if (port->device.heartbeat_trainer.state == HBPT_NEED_INIT) {
+		port->device.heartbeat_trainer.rtc_base_timestamp = 0;
+		for (int i = 0; i < 8; i ++)
+		{
+			port->device.heartbeat_trainer.rtc_base_timestamp <<= 8;
+			port->device.heartbeat_trainer.rtc_base_timestamp |= port->device.heartbeat_trainer.nv_memory[i];
+		}
+		memcpy(port->device.heartbeat_trainer.rtc_base, port->device.heartbeat_trainer.nv_memory + 8, 5);
+		if (port->device.heartbeat_trainer.rtc_base_timestamp == UINT64_MAX) {
+			//uninitialized save, set the appropriate status bit
+			port->device.heartbeat_trainer.status |= 1;
+		}
+		port->device.heartbeat_trainer.bpm = 60;
+		port->device.heartbeat_trainer.state = HBPT_IDLE;
+	}
+}
+
+void hbpt_check_send_reply(io_port *port)
+{
+	if (port->device.heartbeat_trainer.state == HBPT_REPLY && !port->receive_end) {
+		port->serial_receiving = *(port->device.heartbeat_trainer.cur_buffer++);
+		port->receive_end = port->serial_cycle + 10 * port->serial_divider;
+		if (!--port->device.heartbeat_trainer.remaining_bytes) {
+			port->device.heartbeat_trainer.state = HBPT_IDLE;
+		}
+	}
+}
+
+uint8_t is_leap_year(uint16_t year)
+{
+	if (year & 3) {
+		return 0;
+	}
+	if (year % 100) {
+		return 1;
+	}
+	if (year % 400) {
+		return 0;
+	}
+	return 1;
+}
+
+uint8_t days_in_month(uint8_t month, uint16_t year)
+{
+	static uint8_t days_per_month[] = {31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31};
+	if (month == 2 && is_leap_year(year)) {
+		return 29;
+	}
+	if (month > 12 || !month) {
+		return 30;
+	}
+	return days_per_month[month-1];
+}
+
+void hbpt_write_byte(io_port *port)
+{
+	hbpt_check_init(port);
+	uint8_t reply;
+	switch (port->device.heartbeat_trainer.state)
+	{
+	case HBPT_IDLE:
+		port->device.heartbeat_trainer.cmd = port->serial_transmitting;
+		switch (port->device.heartbeat_trainer.cmd)
+		{
+		case HBPT_UNKNOWN1:
+			start_reply(port, 11, NULL);
+			break;
+		case HBPT_POLL:
+			start_reply(port, 3, &port->device.heartbeat_trainer.bpm);
+			if (port->serial_cycle - port->last_poll_cycle > MIN_POLL_INTERVAL) {
+				process_events();
+				port->last_poll_cycle = port->serial_cycle;
+			}
+			port->device.heartbeat_trainer.buttons = (port->input[GAMEPAD_TH0] << 2 & 0xC0) | (port->input[GAMEPAD_TH1] & 0x1F);
+			if (port->device.heartbeat_trainer.cadence && port->input[GAMEPAD_TH1] & 0x20) {
+				port->device.heartbeat_trainer.cadence--;
+				printf("Cadence: %d\n", port->device.heartbeat_trainer.cadence);
+			} else if (port->device.heartbeat_trainer.cadence < 255 && port->input[GAMEPAD_EXTRA] & 1) {
+				port->device.heartbeat_trainer.cadence++;
+				printf("Cadence: %d\n", port->device.heartbeat_trainer.cadence);
+			}
+			if (port->device.heartbeat_trainer.bpm && port->input[GAMEPAD_EXTRA] & 4) {
+				port->device.heartbeat_trainer.bpm--;
+				printf("Heart Rate: %d\n", port->device.heartbeat_trainer.bpm);
+			} else if (port->device.heartbeat_trainer.bpm < 255 && port->input[GAMEPAD_EXTRA] & 2) {
+				port->device.heartbeat_trainer.bpm++;
+				printf("Heart Rate: %d\n", port->device.heartbeat_trainer.bpm);
+			}
+			
+			break;
+		case HBPT_READ_PAGE:
+		case HBPT_WRITE_PAGE:
+			//strictly speaking for the write case, we want 1 + page size here
+			//but the rest of the payload goes to a different destination
+			expect_payload(port, 1, &port->device.heartbeat_trainer.param);
+			break;
+		case HBPT_READ_RTC: {
+			uint8_t *rtc = port->device.heartbeat_trainer.rtc_base;
+			start_reply(port, 5, rtc);
+			uint64_t now = time(NULL);
+			uint64_t delta = (now - port->device.heartbeat_trainer.rtc_base_timestamp + 30) / 60;
+			rtc[4] += delta % 60;
+			if (rtc[4] > 59) {
+				rtc[4] -= 60;
+				rtc[3]++;
+			}
+			delta /= 60;
+			if (delta) {
+				rtc[3] += delta % 24;
+				delta /= 24;
+				if (rtc[3] > 23) {
+					rtc[3] -= 24;
+					delta++;
+				}
+				if (delta) {
+					uint16_t year = rtc[0] < 81 ? 2000 + rtc[0] : 1900 + rtc[0];
+					uint8_t days_cur_month = days_in_month(rtc[1], year);
+					while (delta + rtc[2] > days_cur_month) {
+						delta -= days_cur_month + 1 - rtc[2];
+						rtc[2] = 1;
+						if (++rtc[1] == 13) {
+							rtc[1] = 1;
+							year++;
+						}
+						days_cur_month = days_in_month(rtc[1], year);
+					}
+					rtc[1] += delta;
+					rtc[0] = year % 100;
+				}
+			}
+			printf("RTC %02d-%02d-%02d %02d:%02d\n", rtc[0], rtc[1], rtc[2], rtc[3], rtc[4]);
+			port->device.heartbeat_trainer.rtc_base_timestamp = now;
+			break;
+		}
+		case HBPT_SET_RTC:
+			port->device.heartbeat_trainer.rtc_base_timestamp = time(NULL);
+			expect_payload(port, 5, port->device.heartbeat_trainer.rtc_base);
+			break;
+		case HBPT_GET_STATUS:
+			simple_reply(port, port->device.heartbeat_trainer.status);
+			break;
+		case HBPT_ERASE_NVMEM:
+			expect_payload(port, 1, &port->device.heartbeat_trainer.param);
+			break;
+		case HBPT_NVMEM_PARAMS:
+			start_reply(port, 2, &port->device.heartbeat_trainer.nv_page_size);
+			break;
+		case HBPT_INIT:
+			expect_payload(port, 19, NULL);
+			break;
+		default:
+			// it's unclear what these commands do as they are unused by Outback Joey
+			// just return 0 to indicate failure
+			simple_reply(port, 0);
+		}
+		break;
+	case HBPT_CMD_PAYLOAD:
+		if (port->device.heartbeat_trainer.cur_buffer) {
+			*(port->device.heartbeat_trainer.cur_buffer++) = port->serial_transmitting;
+		}
+		if (!--port->device.heartbeat_trainer.remaining_bytes) {
+			switch (port->device.heartbeat_trainer.cmd)
+			{
+			case HBPT_READ_PAGE:
+			case HBPT_WRITE_PAGE:
+				if (
+					port->device.heartbeat_trainer.cmd == HBPT_WRITE_PAGE 
+					&& port->device.heartbeat_trainer.cur_buffer != &port->device.heartbeat_trainer.param + 1) {
+					simple_reply(port, 1);
+					break;
+				}
+				port->device.heartbeat_trainer.remaining_bytes = port->device.heartbeat_trainer.nv_page_size;
+				port->device.heartbeat_trainer.cur_buffer =
+					port->device.heartbeat_trainer.param < port->device.heartbeat_trainer.nv_pages
+					? port->device.heartbeat_trainer.nv_memory + 5 + 8
+						+ port->device.heartbeat_trainer.param * port->device.heartbeat_trainer.nv_page_size
+					: NULL;
+				if (port->device.heartbeat_trainer.cmd == HBPT_WRITE_PAGE) {
+					return;
+				}
+				port->device.heartbeat_trainer.state = HBPT_REPLY;
+				break;
+			case HBPT_SET_RTC:
+				//save RTC base values back to nv memory area so it's saved to disk on exit
+				for (int i = 0; i < 8; i++)
+				{
+					port->device.heartbeat_trainer.nv_memory[i] = port->device.heartbeat_trainer.rtc_base_timestamp >> (56 - i*8);
+				}
+				memcpy(port->device.heartbeat_trainer.nv_memory + 8, port->device.heartbeat_trainer.rtc_base, 5);
+				simple_reply(port, 1);
+				break;
+			case HBPT_ERASE_NVMEM:
+				memset(
+					port->device.heartbeat_trainer.nv_memory + 5 + 8, 
+					port->device.heartbeat_trainer.param, 
+					port->device.heartbeat_trainer.nv_pages * port->device.heartbeat_trainer.nv_page_size
+				);
+				simple_reply(port, 1);
+				break;
+			case HBPT_INIT: {
+				static const char reply[] = "(C) HEARTBEAT CORP";
+				start_reply(port, strlen(reply), reply);
+				break;
+			}
+			}
+		}
+	}
+	hbpt_check_send_reply(port);
+}
+
+void hbpt_read_byte(io_port *port)
+{
+	hbpt_check_init(port);
+	hbpt_check_send_reply(port);
+}
+
 const int mouse_delays[] = {112*7, 120*7, 96*7, 132*7, 104*7, 96*7, 112*7, 96*7};
 
 enum {
@@ -577,6 +939,67 @@
 	KB_WRITE
 };
 
+enum {
+	SCTRL_BIT_TX_FULL = 1,
+	SCTRL_BIT_RX_READY = 2,
+	SCTRL_BIT_RX_ERROR = 4,
+	SCTRL_BIT_RX_INTEN = 8,
+	SCTRL_BIT_TX_ENABLE = 0x10,
+	SCTRL_BIT_RX_ENABLE = 0x20
+};
+
+void io_run(io_port *port, uint32_t current_cycle)
+{
+	uint32_t new_serial_cycle = ((current_cycle - port->serial_cycle) / port->serial_divider) * port->serial_divider + port->serial_cycle;
+	if (port->transmit_end && port->transmit_end <= new_serial_cycle) {
+		port->transmit_end = 0;
+		
+		if (port->serial_ctrl & SCTRL_BIT_TX_ENABLE) {
+			switch (port->device_type)
+			{
+			case IO_HEARTBEAT_TRAINER:
+				hbpt_write_byte(port);
+				break;
+#ifndef _WIN32
+			case IO_GENERIC_SERIAL:
+				write_serial_byte(port);
+				break;
+#endif
+			//TODO: think about how serial mode might interact with non-serial peripherals
+			}
+		}
+	}
+	if (!port->transmit_end && new_serial_cycle != port->serial_cycle && (port->serial_ctrl & SCTRL_BIT_TX_FULL)) {
+		//there's a transmit byte pending and no byte is currently being sent
+		port->serial_transmitting = port->serial_out;
+		port->serial_ctrl &= ~SCTRL_BIT_TX_FULL;
+		//1 start bit, 8 data bits and 1 stop bit
+		port->transmit_end = new_serial_cycle + 10 * port->serial_divider;
+	}
+	port->serial_cycle = new_serial_cycle;
+	if (port->serial_ctrl && SCTRL_BIT_RX_ENABLE) {
+		if (port->receive_end && new_serial_cycle >= port->receive_end) {
+			port->serial_in = port->serial_receiving;
+			port->serial_ctrl |= SCTRL_BIT_RX_READY;
+			port->receive_end = 0;
+		}
+		if (!port->receive_end) {
+			switch(port->device_type)
+			{
+			case IO_HEARTBEAT_TRAINER:
+				hbpt_read_byte(port);
+				break;
+#ifndef _WIN32
+			case IO_GENERIC_SERIAL:
+				read_serial_byte(port);
+				break;
+#endif
+			//TODO: think about how serial mode might interact with non-serial peripherals
+			}
+		}
+	}
+}
+
 void io_control_write(io_port *port, uint8_t value, uint32_t current_cycle)
 {
 	uint8_t changes = value ^ port->control;
@@ -724,6 +1147,20 @@
 
 }
 
+void io_tx_write(io_port *port, uint8_t value, uint32_t current_cycle)
+{
+	io_run(port, current_cycle);
+	port->serial_out = value;
+	port->serial_ctrl |= SCTRL_BIT_TX_FULL;
+}
+
+void io_sctrl_write(io_port *port, uint8_t value, uint32_t current_cycle)
+{
+	io_run(port, current_cycle);
+	port->serial_ctrl = (port->serial_ctrl & 0x7) | (value & 0xF8);
+	set_serial_clock(port);
+}
+
 uint8_t get_scancode_bytes(io_port *port)
 {
 	if (port->device.keyboard.read_pos == 0xFF) {
@@ -767,9 +1204,9 @@
 	uint8_t th = output & 0x40;
 	uint8_t input;
 	uint8_t device_driven;
-	if (current_cycle - last_poll_cycle > MIN_POLL_INTERVAL) {
+	if (current_cycle - port->last_poll_cycle > MIN_POLL_INTERVAL) {
 		process_events();
-		last_poll_cycle = current_cycle;
+		port->last_poll_cycle = current_cycle;
 	}
 	switch (port->device_type)
 	{
@@ -1050,6 +1487,36 @@
 	return value;
 }
 
+uint8_t io_rx_read(io_port * port, uint32_t current_cycle)
+{
+	io_run(port, current_cycle);
+	port->serial_ctrl &= ~SCTRL_BIT_RX_READY;
+	return port->serial_in;
+}
+
+uint8_t io_sctrl_read(io_port *port, uint32_t current_cycle)
+{
+	io_run(port, current_cycle);
+	return port->serial_ctrl;
+}
+
+uint32_t io_next_interrupt(io_port *port, uint32_t current_cycle)
+{
+	if (!(port->control & 0x80)) {
+		return CYCLE_NEVER;
+	}
+	if (port->serial_ctrl & SCTRL_BIT_RX_INTEN) {
+		if (port->serial_ctrl & SCTRL_BIT_RX_READY) {
+			return current_cycle;
+		}
+		if ((port->serial_ctrl & SCTRL_BIT_RX_ENABLE) && port->receive_end) {
+			return port->receive_end;
+		}
+	}
+	//TODO: handle external interrupts from TH transitions
+	return CYCLE_NEVER;
+}
+
 void io_serialize(io_port *port, serialize_buffer *buf)
 {
 	save_int8(buf, port->output);
@@ -1081,7 +1548,21 @@
 			save_int8(buf, port->device.keyboard.cmd);
 		}
 		break;
+	case IO_HEARTBEAT_TRAINER:
+		save_int8(buf, port->device.heartbeat_trainer.bpm);
+		save_int8(buf, port->device.heartbeat_trainer.cadence);
+		save_int8(buf, port->device.heartbeat_trainer.param);
+		save_int8(buf, port->device.heartbeat_trainer.state);
+		save_int8(buf, port->device.heartbeat_trainer.status);
+		save_int8(buf, port->device.heartbeat_trainer.cmd);
+		save_int8(buf, port->device.heartbeat_trainer.remaining_bytes);
+		break;
 	}
+	save_int32(buf, port->serial_cycle);
+	save_int32(buf, port->transmit_end);
+	save_int32(buf, port->receive_end);
+	save_int8(buf, port->serial_transmitting);
+	save_int8(buf, port->serial_receiving);
 }
 
 void io_deserialize(deserialize_buffer *buf, void *vport)
@@ -1092,7 +1573,9 @@
 	port->serial_out = load_int8(buf);
 	port->serial_in = load_int8(buf);
 	port->serial_ctrl = load_int8(buf);
+	set_serial_clock(port);
 	uint8_t device_type = load_int8(buf);
+	load_buffer32(buf, port->slow_rise_start, 8);
 	if (device_type != port->device_type) {
 		warning("Loaded save state has a different device type from the current configuration");
 		return;
@@ -1119,5 +1602,21 @@
 			port->device.keyboard.cmd = load_int8(buf);
 		}
 		break;
+	case IO_HEARTBEAT_TRAINER:
+		port->device.heartbeat_trainer.bpm = load_int8(buf);
+		port->device.heartbeat_trainer.cadence = load_int8(buf);
+		port->device.heartbeat_trainer.param = load_int8(buf);
+		port->device.heartbeat_trainer.state = load_int8(buf);
+		port->device.heartbeat_trainer.status = load_int8(buf);
+		port->device.heartbeat_trainer.cmd = load_int8(buf);
+		port->device.heartbeat_trainer.remaining_bytes = load_int8(buf);
+		break;
+	}
+	if (buf->cur_pos < buf->size) {
+		port->serial_cycle = load_int32(buf);
+		port->transmit_end = load_int32(buf);
+		port->receive_end = load_int32(buf);
+		port->serial_transmitting = load_int8(buf);
+		port->serial_receiving = load_int8(buf);
 	}
 }
--- a/io.h	Sat Jan 05 00:58:08 2019 -0800
+++ b/io.h	Sat Jan 15 13:15:21 2022 -0800
@@ -24,7 +24,9 @@
 	IO_EA_MULTI_A,
 	IO_EA_MULTI_B,
 	IO_SEGA_PARALLEL,
-	IO_GENERIC
+	IO_GENERIC,
+	IO_GENERIC_SERIAL,
+	IO_HEARTBEAT_TRAINER
 };
 
 typedef struct {
@@ -57,13 +59,37 @@
 			uint8_t  mode;
 			uint8_t  cmd;
 		} keyboard;
+		struct {
+			uint8_t  *nv_memory;
+			uint8_t  *cur_buffer;
+			uint64_t rtc_base_timestamp;
+			uint8_t  rtc_base[5];
+			uint8_t  bpm;
+			uint8_t  cadence;
+			uint8_t  buttons;
+			uint8_t  nv_page_size;
+			uint8_t  nv_pages;
+			uint8_t  param;
+			uint8_t  state;
+			uint8_t  status;
+			uint8_t  device_num;
+			uint8_t  cmd;
+			uint8_t  remaining_bytes;
+		} heartbeat_trainer;
 	} device;
 	uint8_t  output;
 	uint8_t  control;
 	uint8_t  input[3];
 	uint32_t slow_rise_start[8];
+	uint32_t serial_cycle;
+	uint32_t serial_divider;
+	uint32_t last_poll_cycle;
+	uint32_t transmit_end;
+	uint32_t receive_end;
 	uint8_t  serial_out;
+	uint8_t  serial_transmitting;
 	uint8_t  serial_in;
+	uint8_t  serial_receiving;
 	uint8_t  serial_ctrl;
 	uint8_t  device_type;
 } io_port;
@@ -106,9 +132,15 @@
 
 void setup_io_devices(tern_node * config, rom_info *rom, sega_io *io);
 void io_adjust_cycles(io_port * pad, uint32_t current_cycle, uint32_t deduction);
+void io_run(io_port *port, uint32_t current_cycle);
 void io_control_write(io_port *port, uint8_t value, uint32_t current_cycle);
 void io_data_write(io_port * pad, uint8_t value, uint32_t current_cycle);
+void io_tx_write(io_port *port, uint8_t value, uint32_t current_cycle);
+void io_sctrl_write(io_port *port, uint8_t value, uint32_t current_cycle);
 uint8_t io_data_read(io_port * pad, uint32_t current_cycle);
+uint8_t io_rx_read(io_port * port, uint32_t current_cycle);
+uint8_t io_sctrl_read(io_port *port, uint32_t current_cycle);
+uint32_t io_next_interrupt(io_port *port, uint32_t current_cycle);
 void io_serialize(io_port *port, serialize_buffer *buf);
 void io_deserialize(deserialize_buffer *buf, void *vport);
 
--- a/jcart.c	Sat Jan 05 00:58:08 2019 -0800
+++ b/jcart.c	Sat Jan 15 13:15:21 2022 -0800
@@ -1,3 +1,4 @@
+#include <stdlib.h>
 #include "genesis.h"
 
 static io_port *get_ports(m68k_context *m68k)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libblastem.c	Sat Jan 15 13:15:21 2022 -0800
@@ -0,0 +1,546 @@
+#include <stdlib.h>
+#include <string.h>
+#include "libretro.h"
+#include "system.h"
+#include "util.h"
+#include "vdp.h"
+#include "render.h"
+#include "io.h"
+#include "genesis.h"
+#include "sms.h"
+
+static retro_environment_t retro_environment;
+RETRO_API void retro_set_environment(retro_environment_t re)
+{
+	retro_environment = re;
+#	define input_descriptor_macro(pad_num) \
+		{ pad_num, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_LEFT,  "D-Pad Left" }, \
+		{ pad_num, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_UP,    "D-Pad Up" }, \
+		{ pad_num, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_DOWN,  "D-Pad Down" }, \
+		{ pad_num, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_RIGHT, "D-Pad Right" }, \
+		{ pad_num, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_B,     "A" }, \
+		{ pad_num, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_A,     "B" }, \
+		{ pad_num, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_X,     "Y" }, \
+		{ pad_num, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_Y,     "X" }, \
+		{ pad_num, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_L,     "Z" }, \
+		{ pad_num, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_R,     "C" }, \
+		{ pad_num, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_SELECT,    "Mode" }, \
+		{ pad_num, RETRO_DEVICE_JOYPAD, 0, RETRO_DEVICE_ID_JOYPAD_START,    "Start" }, \
+
+	static const struct retro_input_descriptor desc[] = {
+		input_descriptor_macro(0)
+		input_descriptor_macro(1)
+		input_descriptor_macro(2)
+		input_descriptor_macro(3)
+		input_descriptor_macro(4)
+		input_descriptor_macro(5)
+		input_descriptor_macro(6)
+		input_descriptor_macro(7)
+		{ 0 },
+	};
+
+	re(RETRO_ENVIRONMENT_SET_INPUT_DESCRIPTORS, (void *)desc);
+}
+
+static retro_video_refresh_t retro_video_refresh;
+RETRO_API void retro_set_video_refresh(retro_video_refresh_t rvf)
+{
+	retro_video_refresh = rvf;
+}
+
+RETRO_API void retro_set_audio_sample(retro_audio_sample_t ras)
+{
+}
+
+static retro_audio_sample_batch_t retro_audio_sample_batch;
+RETRO_API void retro_set_audio_sample_batch(retro_audio_sample_batch_t rasb)
+{
+	retro_audio_sample_batch = rasb;
+}
+
+static retro_input_poll_t retro_input_poll;
+RETRO_API void retro_set_input_poll(retro_input_poll_t rip)
+{
+	retro_input_poll = rip;
+}
+
+static retro_input_state_t retro_input_state;
+RETRO_API void retro_set_input_state(retro_input_state_t ris)
+{
+	retro_input_state = ris;
+}
+
+int headless = 0;
+int exit_after = 0;
+int z80_enabled = 1;
+char *save_filename;
+tern_node *config;
+uint8_t use_native_states = 1;
+system_header *current_system;
+system_media media;
+
+RETRO_API void retro_init(void)
+{
+	render_audio_initialized(RENDER_AUDIO_S16, 53693175 / (7 * 6 * 4), 2, 4, sizeof(int16_t));
+}
+
+RETRO_API void retro_deinit(void)
+{
+	if (current_system) {
+		retro_unload_game();
+	}
+}
+
+RETRO_API unsigned retro_api_version(void)
+{
+	return RETRO_API_VERSION;
+}
+
+RETRO_API void retro_get_system_info(struct retro_system_info *info)
+{
+	info->library_name = "BlastEm";
+	info->library_version = "0.6.3-pre"; //TODO: share this with blastem.c
+	info->valid_extensions = "md|gen|sms|bin|rom";
+	info->need_fullpath = 0;
+	info->block_extract = 0;
+}
+
+static vid_std video_standard;
+static uint32_t last_width, last_height;
+static uint32_t overscan_top, overscan_bot, overscan_left, overscan_right;
+static void update_overscan(void)
+{
+	uint8_t overscan;
+	retro_environment(RETRO_ENVIRONMENT_GET_OVERSCAN, &overscan);
+	if (overscan) {
+		overscan_top = overscan_bot = overscan_left = overscan_right = 0;
+	} else {
+		if (video_standard == VID_NTSC) {
+			overscan_top = 11;
+			overscan_bot = 8;
+			overscan_left = 13;
+			overscan_right = 14;
+		} else {
+			overscan_top = 30;
+			overscan_bot = 24;
+			overscan_left = 13;
+			overscan_right = 14;
+		}
+	}
+}
+
+static int32_t sample_rate;
+RETRO_API void retro_get_system_av_info(struct retro_system_av_info *info)
+{
+	update_overscan();
+	last_width = LINEBUF_SIZE;
+	info->geometry.base_width = info->geometry.max_width = LINEBUF_SIZE - (overscan_left + overscan_right);
+	info->geometry.base_height = (video_standard == VID_NTSC ? 243 : 294) - (overscan_top + overscan_bot);
+	last_height = info->geometry.base_height;
+	info->geometry.max_height = info->geometry.base_height * 2;
+	info->geometry.aspect_ratio = 0;
+	double master_clock = video_standard == VID_NTSC ? 53693175 : 53203395;
+	double lines = video_standard == VID_NTSC ? 262 : 313;
+	info->timing.fps = master_clock / (3420.0 * lines);
+	info->timing.sample_rate = master_clock / (7 * 6 * 24); //sample rate of YM2612
+	sample_rate = info->timing.sample_rate;
+	render_audio_initialized(RENDER_AUDIO_S16, info->timing.sample_rate, 2, 4, sizeof(int16_t));
+	//force adjustment of resampling parameters since target sample rate may have changed slightly
+	current_system->set_speed_percent(current_system, 100);
+}
+
+RETRO_API void retro_set_controller_port_device(unsigned port, unsigned device)
+{
+}
+
+/* Resets the current game. */
+RETRO_API void retro_reset(void)
+{
+	current_system->soft_reset(current_system);
+}
+
+/* Runs the game for one video frame.
+ * During retro_run(), input_poll callback must be called at least once.
+ *
+ * If a frame is not rendered for reasons where a game "dropped" a frame,
+ * this still counts as a frame, and retro_run() should explicitly dupe
+ * a frame if GET_CAN_DUPE returns true.
+ * In this case, the video callback can take a NULL argument for data.
+ */
+static uint8_t started;
+RETRO_API void retro_run(void)
+{
+	if (started) {
+		current_system->resume_context(current_system);
+	} else {
+		current_system->start_context(current_system, NULL);
+		started = 1;
+	}
+}
+
+/* Returns the amount of data the implementation requires to serialize
+ * internal state (save states).
+ * Between calls to retro_load_game() and retro_unload_game(), the
+ * returned size is never allowed to be larger than a previous returned
+ * value, to ensure that the frontend can allocate a save state buffer once.
+ */
+static size_t serialize_size_cache;
+RETRO_API size_t retro_serialize_size(void)
+{
+	if (!serialize_size_cache) {
+		uint8_t *tmp = current_system->serialize(current_system, &serialize_size_cache);
+		free(tmp);
+	}
+	return serialize_size_cache;
+}
+
+/* Serializes internal state. If failed, or size is lower than
+ * retro_serialize_size(), it should return false, true otherwise. */
+RETRO_API bool retro_serialize(void *data, size_t size)
+{
+	size_t actual_size;
+	uint8_t *tmp = current_system->serialize(current_system, &actual_size);
+	if (actual_size > size) {
+		free(tmp);
+		return 0;
+	}
+	memcpy(data, tmp, actual_size);
+	free(tmp);
+	return 1;
+}
+
+RETRO_API bool retro_unserialize(const void *data, size_t size)
+{
+	current_system->deserialize(current_system, (uint8_t *)data, size);
+	return 0;
+}
+
+RETRO_API void retro_cheat_reset(void)
+{
+}
+
+RETRO_API void retro_cheat_set(unsigned index, bool enabled, const char *code)
+{
+}
+
+/* Loads a game. */
+static system_type stype;
+RETRO_API bool retro_load_game(const struct retro_game_info *game)
+{
+	serialize_size_cache = 0;
+	if (game->path) {
+		media.dir = path_dirname(game->path);
+		media.name = basename_no_extension(game->path);
+		media.extension = path_extension(game->path);
+	}
+	media.buffer = malloc(nearest_pow2(game->size));
+	memcpy(media.buffer, game->data, game->size);
+	media.size = game->size;
+	stype = detect_system_type(&media);
+	current_system = alloc_config_system(stype, &media, 0, 0);
+	
+	unsigned format = RETRO_PIXEL_FORMAT_XRGB8888;
+	retro_environment(RETRO_ENVIRONMENT_SET_PIXEL_FORMAT, &format);
+	
+	return current_system != NULL;
+}
+
+/* Loads a "special" kind of game. Should not be used,
+ * except in extreme cases. */
+RETRO_API bool retro_load_game_special(unsigned game_type, const struct retro_game_info *info, size_t num_info)
+{
+	return retro_load_game(info);
+}
+
+/* Unloads a currently loaded game. */
+RETRO_API void retro_unload_game(void)
+{
+	free(media.dir);
+	free(media.name);
+	free(media.extension);
+	media.dir = media.name = media.extension = NULL;
+	//buffer is freed by the context
+	media.buffer = NULL;
+	current_system->free_context(current_system);
+	current_system = NULL;
+}
+
+/* Gets region of game. */
+RETRO_API unsigned retro_get_region(void)
+{
+	return video_standard == VID_NTSC ? RETRO_REGION_NTSC : RETRO_REGION_PAL;
+}
+
+/* Gets region of memory. */
+RETRO_API void *retro_get_memory_data(unsigned id)
+{
+	switch (id) {
+	case RETRO_MEMORY_SYSTEM_RAM:
+		switch (stype) {
+		case SYSTEM_GENESIS: {
+			genesis_context *gen = (genesis_context *)current_system;
+			return (uint8_t *)gen->work_ram;
+		}
+#ifndef NO_Z80
+		case SYSTEM_SMS: {
+			sms_context *sms = (sms_context *)current_system;
+			return sms->ram;
+		}
+#endif
+		}
+		break;
+	case RETRO_MEMORY_SAVE_RAM:
+		if (stype == SYSTEM_GENESIS) {
+			genesis_context *gen = (genesis_context *)current_system;
+			if (gen->save_type != SAVE_NONE)
+				return gen->save_storage;
+		}
+		break;
+	default:
+		break;
+	}
+	return NULL;
+}
+
+RETRO_API size_t retro_get_memory_size(unsigned id)
+{
+	switch (id) {
+	case RETRO_MEMORY_SYSTEM_RAM:
+		switch (stype) {
+		case SYSTEM_GENESIS:
+			return RAM_WORDS * sizeof(uint16_t);
+#ifndef NO_Z80
+		case SYSTEM_SMS:
+			return SMS_RAM_SIZE;
+#endif
+		}
+		break;
+	case RETRO_MEMORY_SAVE_RAM:
+		if (stype == SYSTEM_GENESIS) {
+			genesis_context *gen = (genesis_context *)current_system;
+			if (gen->save_type != SAVE_NONE)
+				return gen->save_size;
+		}
+		break;
+	default:
+		break;
+	}
+	return 0;
+}
+
+//blastem render backend API implementation
+uint32_t render_map_color(uint8_t r, uint8_t g, uint8_t b)
+{
+	return r << 16 | g << 8 | b;
+}
+
+uint8_t render_create_window(char *caption, uint32_t width, uint32_t height, window_close_handler close_handler)
+{
+	//not supported in lib build
+	return 0;
+}
+
+void render_destroy_window(uint8_t which)
+{
+	//not supported in lib build
+}
+
+static uint32_t fb[LINEBUF_SIZE * 294 * 2];
+static uint8_t last_fb;
+uint32_t *render_get_framebuffer(uint8_t which, int *pitch)
+{
+	*pitch = LINEBUF_SIZE * sizeof(uint32_t);
+	if (which != last_fb) {
+		*pitch = *pitch * 2;
+	}
+
+	if (which) {
+		return fb + LINEBUF_SIZE;
+	} else {
+		return fb;
+	}
+}
+
+void render_framebuffer_updated(uint8_t which, int width)
+{
+	unsigned height = (video_standard == VID_NTSC ? 243 : 294) - (overscan_top + overscan_bot);
+	width -= (overscan_left + overscan_right);
+	unsigned base_height = height;
+	if (which != last_fb) {
+		height *= 2;
+		last_fb = which;
+	}
+	if (width != last_width || height != last_height) {
+		struct retro_game_geometry geometry = {
+			.base_width = width,
+			.base_height = height,
+			.aspect_ratio = (float)LINEBUF_SIZE / base_height
+		};
+		retro_environment(RETRO_ENVIRONMENT_SET_GEOMETRY, &geometry);
+		last_width = width;
+		last_height = height;
+	}
+	retro_video_refresh(fb + overscan_left + LINEBUF_SIZE * overscan_top, width, height, LINEBUF_SIZE * sizeof(uint32_t));
+	system_request_exit(current_system, 0);
+}
+
+uint8_t render_get_active_framebuffer(void)
+{
+	return 0;
+}
+
+void render_set_video_standard(vid_std std)
+{
+	video_standard = std;
+}
+
+int render_fullscreen(void)
+{
+	return 1;
+}
+
+uint32_t render_overscan_top()
+{
+	return overscan_top;
+}
+
+uint32_t render_overscan_bot()
+{
+	return overscan_bot;
+}
+
+void process_events()
+{
+	static int16_t prev_state[2][RETRO_DEVICE_ID_JOYPAD_L2];
+	static const uint8_t map[] = {
+		BUTTON_A, BUTTON_X, BUTTON_MODE, BUTTON_START, DPAD_UP, DPAD_DOWN,
+		DPAD_LEFT, DPAD_RIGHT, BUTTON_B, BUTTON_Y, BUTTON_Z, BUTTON_C
+	};
+	//TODO: handle other input device types
+	//TODO: handle more than 2 ports when appropriate
+	retro_input_poll();
+	for (int port = 0; port < 2; port++)
+	{
+		for (int id = RETRO_DEVICE_ID_JOYPAD_B; id < RETRO_DEVICE_ID_JOYPAD_L2; id++)
+		{
+			int16_t new_state = retro_input_state(port, RETRO_DEVICE_JOYPAD, 0, id);
+			if (new_state != prev_state[port][id]) {
+				if (new_state) {
+					current_system->gamepad_down(current_system, port + 1, map[id]);
+				} else {
+					current_system->gamepad_up(current_system, port + 1, map[id]);
+				}
+				prev_state[port][id] = new_state;
+			}
+		}
+	}
+}
+
+void render_errorbox(char *title, char *message)
+{
+}
+void render_warnbox(char *title, char *message)
+{
+}
+void render_infobox(char *title, char *message)
+{
+}
+
+uint8_t render_is_audio_sync(void)
+{
+	//whether this is true depends on the libretro frontend implementation
+	//but the sync to audio path works better here
+	return 1;
+}
+
+uint8_t render_should_release_on_exit(void)
+{
+	return 0;
+}
+
+void render_buffer_consumed(audio_source *src)
+{
+}
+
+void *render_new_audio_opaque(void)
+{
+	return NULL;
+}
+
+void render_free_audio_opaque(void *opaque)
+{
+}
+
+void render_lock_audio(void)
+{
+}
+
+void render_unlock_audio()
+{
+}
+
+uint32_t render_min_buffered(void)
+{
+	//not actually used in the sync to audio path
+	return 4;
+}
+
+uint32_t render_audio_syncs_per_sec(void)
+{
+	return 0;
+}
+
+void render_audio_created(audio_source *src)
+{
+}
+
+void render_do_audio_ready(audio_source *src)
+{
+	int16_t *tmp = src->front;
+	src->front = src->back;
+	src->back = tmp;
+	src->front_populated = 1;
+	src->buffer_pos = 0;
+	if (all_sources_ready()) {
+		int16_t buffer[8];
+		int min_remaining_out;
+		mix_and_convert((uint8_t *)buffer, sizeof(buffer), &min_remaining_out);
+		retro_audio_sample_batch(buffer, sizeof(buffer)/(2*sizeof(*buffer)));
+	}
+}
+
+void render_source_paused(audio_source *src, uint8_t remaining_sources)
+{
+}
+
+void render_source_resumed(audio_source *src)
+{
+}
+
+void render_set_external_sync(uint8_t ext_sync_on)
+{
+}
+
+void bindings_set_mouse_mode(uint8_t mode)
+{
+}
+
+void bindings_release_capture(void)
+{
+}
+
+void bindings_reacquire_capture(void)
+{
+}
+
+extern const char rom_db_data[];
+char *read_bundled_file(char *name, uint32_t *sizeret)
+{
+	if (!strcmp(name, "rom.db")) {
+		*sizeret = strlen(rom_db_data);
+		char *ret = malloc(*sizeret+1);
+		memcpy(ret, rom_db_data, *sizeret + 1);
+		return ret;
+	}
+	return NULL;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/libretro.h	Sat Jan 15 13:15:21 2022 -0800
@@ -0,0 +1,2343 @@
+/* Copyright (C) 2010-2017 The RetroArch team
+ *
+ * ---------------------------------------------------------------------------------------
+ * The following license statement only applies to this libretro API header (libretro.h).
+ * ---------------------------------------------------------------------------------------
+ *
+ * Permission is hereby granted, free of charge,
+ * to any person obtaining a copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation the rights to
+ * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
+ * and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
+ * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef LIBRETRO_H__
+#define LIBRETRO_H__
+
+#include <stdint.h>
+#include <stddef.h>
+#include <limits.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifndef __cplusplus
+#if defined(_MSC_VER) && _MSC_VER < 1800 && !defined(SN_TARGET_PS3)
+/* Hack applied for MSVC when compiling in C89 mode
+ * as it isn't C99-compliant. */
+#define bool unsigned char
+#define true 1
+#define false 0
+#else
+#include <stdbool.h>
+#endif
+#endif
+
+#ifndef RETRO_CALLCONV
+#  if defined(__GNUC__) && defined(__i386__) && !defined(__x86_64__)
+#    define RETRO_CALLCONV __attribute__((cdecl))
+#  elif defined(_MSC_VER) && defined(_M_X86) && !defined(_M_X64)
+#    define RETRO_CALLCONV __cdecl
+#  else
+#    define RETRO_CALLCONV /* all other platforms only have one calling convention each */
+#  endif
+#endif
+
+#ifndef RETRO_API
+#  if defined(_WIN32) || defined(__CYGWIN__) || defined(__MINGW32__)
+#    ifdef RETRO_IMPORT_SYMBOLS
+#      ifdef __GNUC__
+#        define RETRO_API RETRO_CALLCONV __attribute__((__dllimport__))
+#      else
+#        define RETRO_API RETRO_CALLCONV __declspec(dllimport)
+#      endif
+#    else
+#      ifdef __GNUC__
+#        define RETRO_API RETRO_CALLCONV __attribute__((__dllexport__))
+#      else
+#        define RETRO_API RETRO_CALLCONV __declspec(dllexport)
+#      endif
+#    endif
+#  else
+#      if defined(__GNUC__) && __GNUC__ >= 4 && !defined(__CELLOS_LV2__)
+#        define RETRO_API RETRO_CALLCONV __attribute__((__visibility__("default")))
+#      else
+#        define RETRO_API RETRO_CALLCONV
+#      endif
+#  endif
+#endif
+
+/* Used for checking API/ABI mismatches that can break libretro
+ * implementations.
+ * It is not incremented for compatible changes to the API.
+ */
+#define RETRO_API_VERSION         1
+
+/*
+ * Libretro's fundamental device abstractions.
+ *
+ * Libretro's input system consists of some standardized device types,
+ * such as a joypad (with/without analog), mouse, keyboard, lightgun
+ * and a pointer.
+ *
+ * The functionality of these devices are fixed, and individual cores
+ * map their own concept of a controller to libretro's abstractions.
+ * This makes it possible for frontends to map the abstract types to a
+ * real input device, and not having to worry about binding input
+ * correctly to arbitrary controller layouts.
+ */
+
+#define RETRO_DEVICE_TYPE_SHIFT         8
+#define RETRO_DEVICE_MASK               ((1 << RETRO_DEVICE_TYPE_SHIFT) - 1)
+#define RETRO_DEVICE_SUBCLASS(base, id) (((id + 1) << RETRO_DEVICE_TYPE_SHIFT) | base)
+
+/* Input disabled. */
+#define RETRO_DEVICE_NONE         0
+
+/* The JOYPAD is called RetroPad. It is essentially a Super Nintendo
+ * controller, but with additional L2/R2/L3/R3 buttons, similar to a
+ * PS1 DualShock. */
+#define RETRO_DEVICE_JOYPAD       1
+
+/* The mouse is a simple mouse, similar to Super Nintendo's mouse.
+ * X and Y coordinates are reported relatively to last poll (poll callback).
+ * It is up to the libretro implementation to keep track of where the mouse
+ * pointer is supposed to be on the screen.
+ * The frontend must make sure not to interfere with its own hardware
+ * mouse pointer.
+ */
+#define RETRO_DEVICE_MOUSE        2
+
+/* KEYBOARD device lets one poll for raw key pressed.
+ * It is poll based, so input callback will return with the current
+ * pressed state.
+ * For event/text based keyboard input, see
+ * RETRO_ENVIRONMENT_SET_KEYBOARD_CALLBACK.
+ */
+#define RETRO_DEVICE_KEYBOARD     3
+
+/* LIGHTGUN device is similar to Guncon-2 for PlayStation 2.
+ * It reports X/Y coordinates in screen space (similar to the pointer)
+ * in the range [-0x8000, 0x7fff] in both axes, with zero being center.
+ * As well as reporting on/off screen state. It features a trigger,
+ * start/select buttons, auxiliary action buttons and a
+ * directional pad. A forced off-screen shot can be requested for
+ * auto-reloading function in some games.
+ */
+#define RETRO_DEVICE_LIGHTGUN     4
+
+/* The ANALOG device is an extension to JOYPAD (RetroPad).
+ * Similar to DualShock2 it adds two analog sticks and all buttons can
+ * be analog. This is treated as a separate device type as it returns
+ * axis values in the full analog range of [-0x8000, 0x7fff].
+ * Positive X axis is right. Positive Y axis is down.
+ * Buttons are returned in the range [0, 0x7fff].
+ * Only use ANALOG type when polling for analog values.
+ */
+#define RETRO_DEVICE_ANALOG       5
+
+/* Abstracts the concept of a pointing mechanism, e.g. touch.
+ * This allows libretro to query in absolute coordinates where on the
+ * screen a mouse (or something similar) is being placed.
+ * For a touch centric device, coordinates reported are the coordinates
+ * of the press.
+ *
+ * Coordinates in X and Y are reported as:
+ * [-0x7fff, 0x7fff]: -0x7fff corresponds to the far left/top of the screen,
+ * and 0x7fff corresponds to the far right/bottom of the screen.
+ * The "screen" is here defined as area that is passed to the frontend and
+ * later displayed on the monitor.
+ *
+ * The frontend is free to scale/resize this screen as it sees fit, however,
+ * (X, Y) = (-0x7fff, -0x7fff) will correspond to the top-left pixel of the
+ * game image, etc.
+ *
+ * To check if the pointer coordinates are valid (e.g. a touch display
+ * actually being touched), PRESSED returns 1 or 0.
+ *
+ * If using a mouse on a desktop, PRESSED will usually correspond to the
+ * left mouse button, but this is a frontend decision.
+ * PRESSED will only return 1 if the pointer is inside the game screen.
+ *
+ * For multi-touch, the index variable can be used to successively query
+ * more presses.
+ * If index = 0 returns true for _PRESSED, coordinates can be extracted
+ * with _X, _Y for index = 0. One can then query _PRESSED, _X, _Y with
+ * index = 1, and so on.
+ * Eventually _PRESSED will return false for an index. No further presses
+ * are registered at this point. */
+#define RETRO_DEVICE_POINTER      6
+
+/* Buttons for the RetroPad (JOYPAD).
+ * The placement of these is equivalent to placements on the
+ * Super Nintendo controller.
+ * L2/R2/L3/R3 buttons correspond to the PS1 DualShock.
+ * Also used as id values for RETRO_DEVICE_INDEX_ANALOG_BUTTON */
+#define RETRO_DEVICE_ID_JOYPAD_B        0
+#define RETRO_DEVICE_ID_JOYPAD_Y        1
+#define RETRO_DEVICE_ID_JOYPAD_SELECT   2
+#define RETRO_DEVICE_ID_JOYPAD_START    3
+#define RETRO_DEVICE_ID_JOYPAD_UP       4
+#define RETRO_DEVICE_ID_JOYPAD_DOWN     5
+#define RETRO_DEVICE_ID_JOYPAD_LEFT     6
+#define RETRO_DEVICE_ID_JOYPAD_RIGHT    7
+#define RETRO_DEVICE_ID_JOYPAD_A        8
+#define RETRO_DEVICE_ID_JOYPAD_X        9
+#define RETRO_DEVICE_ID_JOYPAD_L       10
+#define RETRO_DEVICE_ID_JOYPAD_R       11
+#define RETRO_DEVICE_ID_JOYPAD_L2      12
+#define RETRO_DEVICE_ID_JOYPAD_R2      13
+#define RETRO_DEVICE_ID_JOYPAD_L3      14
+#define RETRO_DEVICE_ID_JOYPAD_R3      15
+
+/* Index / Id values for ANALOG device. */
+#define RETRO_DEVICE_INDEX_ANALOG_LEFT       0
+#define RETRO_DEVICE_INDEX_ANALOG_RIGHT      1
+#define RETRO_DEVICE_INDEX_ANALOG_BUTTON     2
+#define RETRO_DEVICE_ID_ANALOG_X             0
+#define RETRO_DEVICE_ID_ANALOG_Y             1
+
+/* Id values for MOUSE. */
+#define RETRO_DEVICE_ID_MOUSE_X                0
+#define RETRO_DEVICE_ID_MOUSE_Y                1
+#define RETRO_DEVICE_ID_MOUSE_LEFT             2
+#define RETRO_DEVICE_ID_MOUSE_RIGHT            3
+#define RETRO_DEVICE_ID_MOUSE_WHEELUP          4
+#define RETRO_DEVICE_ID_MOUSE_WHEELDOWN        5
+#define RETRO_DEVICE_ID_MOUSE_MIDDLE           6
+#define RETRO_DEVICE_ID_MOUSE_HORIZ_WHEELUP    7
+#define RETRO_DEVICE_ID_MOUSE_HORIZ_WHEELDOWN  8
+#define RETRO_DEVICE_ID_MOUSE_BUTTON_4         9
+#define RETRO_DEVICE_ID_MOUSE_BUTTON_5         10
+
+/* Id values for LIGHTGUN. */
+#define RETRO_DEVICE_ID_LIGHTGUN_SCREEN_X        13 /*Absolute Position*/
+#define RETRO_DEVICE_ID_LIGHTGUN_SCREEN_Y        14 /*Absolute*/
+#define RETRO_DEVICE_ID_LIGHTGUN_IS_OFFSCREEN    15 /*Status Check*/
+#define RETRO_DEVICE_ID_LIGHTGUN_TRIGGER          2
+#define RETRO_DEVICE_ID_LIGHTGUN_RELOAD          16 /*Forced off-screen shot*/
+#define RETRO_DEVICE_ID_LIGHTGUN_AUX_A            3
+#define RETRO_DEVICE_ID_LIGHTGUN_AUX_B            4
+#define RETRO_DEVICE_ID_LIGHTGUN_START            6
+#define RETRO_DEVICE_ID_LIGHTGUN_SELECT           7
+#define RETRO_DEVICE_ID_LIGHTGUN_AUX_C            8
+#define RETRO_DEVICE_ID_LIGHTGUN_DPAD_UP          9
+#define RETRO_DEVICE_ID_LIGHTGUN_DPAD_DOWN       10
+#define RETRO_DEVICE_ID_LIGHTGUN_DPAD_LEFT       11
+#define RETRO_DEVICE_ID_LIGHTGUN_DPAD_RIGHT      12
+/* deprecated */
+#define RETRO_DEVICE_ID_LIGHTGUN_X                0 /*Relative Position*/
+#define RETRO_DEVICE_ID_LIGHTGUN_Y                1 /*Relative*/
+#define RETRO_DEVICE_ID_LIGHTGUN_CURSOR           3 /*Use Aux:A*/
+#define RETRO_DEVICE_ID_LIGHTGUN_TURBO            4 /*Use Aux:B*/
+#define RETRO_DEVICE_ID_LIGHTGUN_PAUSE            5 /*Use Start*/
+
+/* Id values for POINTER. */
+#define RETRO_DEVICE_ID_POINTER_X         0
+#define RETRO_DEVICE_ID_POINTER_Y         1
+#define RETRO_DEVICE_ID_POINTER_PRESSED   2
+
+/* Returned from retro_get_region(). */
+#define RETRO_REGION_NTSC  0
+#define RETRO_REGION_PAL   1
+
+/* Id values for LANGUAGE */
+enum retro_language
+{
+   RETRO_LANGUAGE_ENGLISH             = 0,
+   RETRO_LANGUAGE_JAPANESE            = 1,
+   RETRO_LANGUAGE_FRENCH              = 2,
+   RETRO_LANGUAGE_SPANISH             = 3,
+   RETRO_LANGUAGE_GERMAN              = 4,
+   RETRO_LANGUAGE_ITALIAN             = 5,
+   RETRO_LANGUAGE_DUTCH               = 6,
+   RETRO_LANGUAGE_PORTUGUESE_BRAZIL   = 7,
+   RETRO_LANGUAGE_PORTUGUESE_PORTUGAL = 8,
+   RETRO_LANGUAGE_RUSSIAN             = 9,
+   RETRO_LANGUAGE_KOREAN              = 10,
+   RETRO_LANGUAGE_CHINESE_TRADITIONAL = 11,
+   RETRO_LANGUAGE_CHINESE_SIMPLIFIED  = 12,
+   RETRO_LANGUAGE_ESPERANTO           = 13,
+   RETRO_LANGUAGE_POLISH              = 14,
+   RETRO_LANGUAGE_VIETNAMESE          = 15,
+   RETRO_LANGUAGE_ARABIC              = 16,
+   RETRO_LANGUAGE_LAST,
+
+   /* Ensure sizeof(enum) == sizeof(int) */
+   RETRO_LANGUAGE_DUMMY          = INT_MAX
+};
+
+/* Passed to retro_get_memory_data/size().
+ * If the memory type doesn't apply to the
+ * implementation NULL/0 can be returned.
+ */
+#define RETRO_MEMORY_MASK        0xff
+
+/* Regular save RAM. This RAM is usually found on a game cartridge,
+ * backed up by a battery.
+ * If save game data is too complex for a single memory buffer,
+ * the SAVE_DIRECTORY (preferably) or SYSTEM_DIRECTORY environment
+ * callback can be used. */
+#define RETRO_MEMORY_SAVE_RAM    0
+
+/* Some games have a built-in clock to keep track of time.
+ * This memory is usually just a couple of bytes to keep track of time.
+ */
+#define RETRO_MEMORY_RTC         1
+
+/* System ram lets a frontend peek into a game systems main RAM. */
+#define RETRO_MEMORY_SYSTEM_RAM  2
+
+/* Video ram lets a frontend peek into a game systems video RAM (VRAM). */
+#define RETRO_MEMORY_VIDEO_RAM   3
+
+/* Keysyms used for ID in input state callback when polling RETRO_KEYBOARD. */
+enum retro_key
+{
+   RETROK_UNKNOWN        = 0,
+   RETROK_FIRST          = 0,
+   RETROK_BACKSPACE      = 8,
+   RETROK_TAB            = 9,
+   RETROK_CLEAR          = 12,
+   RETROK_RETURN         = 13,
+   RETROK_PAUSE          = 19,
+   RETROK_ESCAPE         = 27,
+   RETROK_SPACE          = 32,
+   RETROK_EXCLAIM        = 33,
+   RETROK_QUOTEDBL       = 34,
+   RETROK_HASH           = 35,
+   RETROK_DOLLAR         = 36,
+   RETROK_AMPERSAND      = 38,
+   RETROK_QUOTE          = 39,
+   RETROK_LEFTPAREN      = 40,
+   RETROK_RIGHTPAREN     = 41,
+   RETROK_ASTERISK       = 42,
+   RETROK_PLUS           = 43,
+   RETROK_COMMA          = 44,
+   RETROK_MINUS          = 45,
+   RETROK_PERIOD         = 46,
+   RETROK_SLASH          = 47,
+   RETROK_0              = 48,
+   RETROK_1              = 49,
+   RETROK_2              = 50,
+   RETROK_3              = 51,
+   RETROK_4              = 52,
+   RETROK_5              = 53,
+   RETROK_6              = 54,
+   RETROK_7              = 55,
+   RETROK_8              = 56,
+   RETROK_9              = 57,
+   RETROK_COLON          = 58,
+   RETROK_SEMICOLON      = 59,
+   RETROK_LESS           = 60,
+   RETROK_EQUALS         = 61,
+   RETROK_GREATER        = 62,
+   RETROK_QUESTION       = 63,
+   RETROK_AT             = 64,
+   RETROK_LEFTBRACKET    = 91,
+   RETROK_BACKSLASH      = 92,
+   RETROK_RIGHTBRACKET   = 93,
+   RETROK_CARET          = 94,
+   RETROK_UNDERSCORE     = 95,
+   RETROK_BACKQUOTE      = 96,
+   RETROK_a              = 97,
+   RETROK_b              = 98,
+   RETROK_c              = 99,
+   RETROK_d              = 100,
+   RETROK_e              = 101,
+   RETROK_f              = 102,
+   RETROK_g              = 103,
+   RETROK_h              = 104,
+   RETROK_i              = 105,
+   RETROK_j              = 106,
+   RETROK_k              = 107,
+   RETROK_l              = 108,
+   RETROK_m              = 109,
+   RETROK_n              = 110,
+   RETROK_o              = 111,
+   RETROK_p              = 112,
+   RETROK_q              = 113,
+   RETROK_r              = 114,
+   RETROK_s              = 115,
+   RETROK_t              = 116,
+   RETROK_u              = 117,
+   RETROK_v              = 118,
+   RETROK_w              = 119,
+   RETROK_x              = 120,
+   RETROK_y              = 121,
+   RETROK_z              = 122,
+   RETROK_LEFTBRACE      = 123,
+   RETROK_BAR            = 124,
+   RETROK_RIGHTBRACE     = 125,
+   RETROK_TILDE          = 126,
+   RETROK_DELETE         = 127,
+
+   RETROK_KP0            = 256,
+   RETROK_KP1            = 257,
+   RETROK_KP2            = 258,
+   RETROK_KP3            = 259,
+   RETROK_KP4            = 260,
+   RETROK_KP5            = 261,
+   RETROK_KP6            = 262,
+   RETROK_KP7            = 263,
+   RETROK_KP8            = 264,
+   RETROK_KP9            = 265,
+   RETROK_KP_PERIOD      = 266,
+   RETROK_KP_DIVIDE      = 267,
+   RETROK_KP_MULTIPLY    = 268,
+   RETROK_KP_MINUS       = 269,
+   RETROK_KP_PLUS        = 270,
+   RETROK_KP_ENTER       = 271,
+   RETROK_KP_EQUALS      = 272,
+
+   RETROK_UP             = 273,
+   RETROK_DOWN           = 274,
+   RETROK_RIGHT          = 275,
+   RETROK_LEFT           = 276,
+   RETROK_INSERT         = 277,
+   RETROK_HOME           = 278,
+   RETROK_END            = 279,
+   RETROK_PAGEUP         = 280,
+   RETROK_PAGEDOWN       = 281,
+
+   RETROK_F1             = 282,
+   RETROK_F2             = 283,
+   RETROK_F3             = 284,
+   RETROK_F4             = 285,
+   RETROK_F5             = 286,
+   RETROK_F6             = 287,
+   RETROK_F7             = 288,
+   RETROK_F8             = 289,
+   RETROK_F9             = 290,
+   RETROK_F10            = 291,
+   RETROK_F11            = 292,
+   RETROK_F12            = 293,
+   RETROK_F13            = 294,
+   RETROK_F14            = 295,
+   RETROK_F15            = 296,
+
+   RETROK_NUMLOCK        = 300,
+   RETROK_CAPSLOCK       = 301,
+   RETROK_SCROLLOCK      = 302,
+   RETROK_RSHIFT         = 303,
+   RETROK_LSHIFT         = 304,
+   RETROK_RCTRL          = 305,
+   RETROK_LCTRL          = 306,
+   RETROK_RALT           = 307,
+   RETROK_LALT           = 308,
+   RETROK_RMETA          = 309,
+   RETROK_LMETA          = 310,
+   RETROK_LSUPER         = 311,
+   RETROK_RSUPER         = 312,
+   RETROK_MODE           = 313,
+   RETROK_COMPOSE        = 314,
+
+   RETROK_HELP           = 315,
+   RETROK_PRINT          = 316,
+   RETROK_SYSREQ         = 317,
+   RETROK_BREAK          = 318,
+   RETROK_MENU           = 319,
+   RETROK_POWER          = 320,
+   RETROK_EURO           = 321,
+   RETROK_UNDO           = 322,
+
+   RETROK_LAST,
+
+   RETROK_DUMMY          = INT_MAX /* Ensure sizeof(enum) == sizeof(int) */
+};
+
+enum retro_mod
+{
+   RETROKMOD_NONE       = 0x0000,
+
+   RETROKMOD_SHIFT      = 0x01,
+   RETROKMOD_CTRL       = 0x02,
+   RETROKMOD_ALT        = 0x04,
+   RETROKMOD_META       = 0x08,
+
+   RETROKMOD_NUMLOCK    = 0x10,
+   RETROKMOD_CAPSLOCK   = 0x20,
+   RETROKMOD_SCROLLOCK  = 0x40,
+
+   RETROKMOD_DUMMY = INT_MAX /* Ensure sizeof(enum) == sizeof(int) */
+};
+
+/* If set, this call is not part of the public libretro API yet. It can
+ * change or be removed at any time. */
+#define RETRO_ENVIRONMENT_EXPERIMENTAL 0x10000
+/* Environment callback to be used internally in frontend. */
+#define RETRO_ENVIRONMENT_PRIVATE 0x20000
+
+/* Environment commands. */
+#define RETRO_ENVIRONMENT_SET_ROTATION  1  /* const unsigned * --
+                                            * Sets screen rotation of graphics.
+                                            * Is only implemented if rotation can be accelerated by hardware.
+                                            * Valid values are 0, 1, 2, 3, which rotates screen by 0, 90, 180,
+                                            * 270 degrees counter-clockwise respectively.
+                                            */
+#define RETRO_ENVIRONMENT_GET_OVERSCAN  2  /* bool * --
+                                            * Boolean value whether or not the implementation should use overscan,
+                                            * or crop away overscan.
+                                            */
+#define RETRO_ENVIRONMENT_GET_CAN_DUPE  3  /* bool * --
+                                            * Boolean value whether or not frontend supports frame duping,
+                                            * passing NULL to video frame callback.
+                                            */
+
+                                           /* Environ 4, 5 are no longer supported (GET_VARIABLE / SET_VARIABLES),
+                                            * and reserved to avoid possible ABI clash.
+                                            */
+
+#define RETRO_ENVIRONMENT_SET_MESSAGE   6  /* const struct retro_message * --
+                                            * Sets a message to be displayed in implementation-specific manner
+                                            * for a certain amount of 'frames'.
+                                            * Should not be used for trivial messages, which should simply be
+                                            * logged via RETRO_ENVIRONMENT_GET_LOG_INTERFACE (or as a
+                                            * fallback, stderr).
+                                            */
+#define RETRO_ENVIRONMENT_SHUTDOWN      7  /* N/A (NULL) --
+                                            * Requests the frontend to shutdown.
+                                            * Should only be used if game has a specific
+                                            * way to shutdown the game from a menu item or similar.
+                                            */
+#define RETRO_ENVIRONMENT_SET_PERFORMANCE_LEVEL 8
+                                           /* const unsigned * --
+                                            * Gives a hint to the frontend how demanding this implementation
+                                            * is on a system. E.g. reporting a level of 2 means
+                                            * this implementation should run decently on all frontends
+                                            * of level 2 and up.
+                                            *
+                                            * It can be used by the frontend to potentially warn
+                                            * about too demanding implementations.
+                                            *
+                                            * The levels are "floating".
+                                            *
+                                            * This function can be called on a per-game basis,
+                                            * as certain games an implementation can play might be
+                                            * particularly demanding.
+                                            * If called, it should be called in retro_load_game().
+                                            */
+#define RETRO_ENVIRONMENT_GET_SYSTEM_DIRECTORY 9
+                                           /* const char ** --
+                                            * Returns the "system" directory of the frontend.
+                                            * This directory can be used to store system specific
+                                            * content such as BIOSes, configuration data, etc.
+                                            * The returned value can be NULL.
+                                            * If so, no such directory is defined,
+                                            * and it's up to the implementation to find a suitable directory.
+                                            *
+                                            * NOTE: Some cores used this folder also for "save" data such as
+                                            * memory cards, etc, for lack of a better place to put it.
+                                            * This is now discouraged, and if possible, cores should try to
+                                            * use the new GET_SAVE_DIRECTORY.
+                                            */
+#define RETRO_ENVIRONMENT_SET_PIXEL_FORMAT 10
+                                           /* const enum retro_pixel_format * --
+                                            * Sets the internal pixel format used by the implementation.
+                                            * The default pixel format is RETRO_PIXEL_FORMAT_0RGB1555.
+                                            * This pixel format however, is deprecated (see enum retro_pixel_format).
+                                            * If the call returns false, the frontend does not support this pixel
+                                            * format.
+                                            *
+                                            * This function should be called inside retro_load_game() or
+                                            * retro_get_system_av_info().
+                                            */
+#define RETRO_ENVIRONMENT_SET_INPUT_DESCRIPTORS 11
+                                           /* const struct retro_input_descriptor * --
+                                            * Sets an array of retro_input_descriptors.
+                                            * It is up to the frontend to present this in a usable way.
+                                            * The array is terminated by retro_input_descriptor::description
+                                            * being set to NULL.
+                                            * This function can be called at any time, but it is recommended
+                                            * to call it as early as possible.
+                                            */
+#define RETRO_ENVIRONMENT_SET_KEYBOARD_CALLBACK 12
+                                           /* const struct retro_keyboard_callback * --
+                                            * Sets a callback function used to notify core about keyboard events.
+                                            */
+#define RETRO_ENVIRONMENT_SET_DISK_CONTROL_INTERFACE 13
+                                           /* const struct retro_disk_control_callback * --
+                                            * Sets an interface which frontend can use to eject and insert
+                                            * disk images.
+                                            * This is used for games which consist of multiple images and
+                                            * must be manually swapped out by the user (e.g. PSX).
+                                            */
+#define RETRO_ENVIRONMENT_SET_HW_RENDER 14
+                                           /* struct retro_hw_render_callback * --
+                                            * Sets an interface to let a libretro core render with
+                                            * hardware acceleration.
+                                            * Should be called in retro_load_game().
+                                            * If successful, libretro cores will be able to render to a
+                                            * frontend-provided framebuffer.
+                                            * The size of this framebuffer will be at least as large as
+                                            * max_width/max_height provided in get_av_info().
+                                            * If HW rendering is used, pass only RETRO_HW_FRAME_BUFFER_VALID or
+                                            * NULL to retro_video_refresh_t.
+                                            */
+#define RETRO_ENVIRONMENT_GET_VARIABLE 15
+                                           /* struct retro_variable * --
+                                            * Interface to acquire user-defined information from environment
+                                            * that cannot feasibly be supported in a multi-system way.
+                                            * 'key' should be set to a key which has already been set by
+                                            * SET_VARIABLES.
+                                            * 'data' will be set to a value or NULL.
+                                            */
+#define RETRO_ENVIRONMENT_SET_VARIABLES 16
+                                           /* const struct retro_variable * --
+                                            * Allows an implementation to signal the environment
+                                            * which variables it might want to check for later using
+                                            * GET_VARIABLE.
+                                            * This allows the frontend to present these variables to
+                                            * a user dynamically.
+                                            * This should be called as early as possible (ideally in
+                                            * retro_set_environment).
+                                            *
+                                            * 'data' points to an array of retro_variable structs
+                                            * terminated by a { NULL, NULL } element.
+                                            * retro_variable::key should be namespaced to not collide
+                                            * with other implementations' keys. E.g. A core called
+                                            * 'foo' should use keys named as 'foo_option'.
+                                            * retro_variable::value should contain a human readable
+                                            * description of the key as well as a '|' delimited list
+                                            * of expected values.
+                                            *
+                                            * The number of possible options should be very limited,
+                                            * i.e. it should be feasible to cycle through options
+                                            * without a keyboard.
+                                            *
+                                            * First entry should be treated as a default.
+                                            *
+                                            * Example entry:
+                                            * { "foo_option", "Speed hack coprocessor X; false|true" }
+                                            *
+                                            * Text before first ';' is description. This ';' must be
+                                            * followed by a space, and followed by a list of possible
+                                            * values split up with '|'.
+                                            *
+                                            * Only strings are operated on. The possible values will
+                                            * generally be displayed and stored as-is by the frontend.
+                                            */
+#define RETRO_ENVIRONMENT_GET_VARIABLE_UPDATE 17
+                                           /* bool * --
+                                            * Result is set to true if some variables are updated by
+                                            * frontend since last call to RETRO_ENVIRONMENT_GET_VARIABLE.
+                                            * Variables should be queried with GET_VARIABLE.
+                                            */
+#define RETRO_ENVIRONMENT_SET_SUPPORT_NO_GAME 18
+                                           /* const bool * --
+                                            * If true, the libretro implementation supports calls to
+                                            * retro_load_game() with NULL as argument.
+                                            * Used by cores which can run without particular game data.
+                                            * This should be called within retro_set_environment() only.
+                                            */
+#define RETRO_ENVIRONMENT_GET_LIBRETRO_PATH 19
+                                           /* const char ** --
+                                            * Retrieves the absolute path from where this libretro
+                                            * implementation was loaded.
+                                            * NULL is returned if the libretro was loaded statically
+                                            * (i.e. linked statically to frontend), or if the path cannot be
+                                            * determined.
+                                            * Mostly useful in cooperation with SET_SUPPORT_NO_GAME as assets can
+                                            * be loaded without ugly hacks.
+                                            */
+
+                                           /* Environment 20 was an obsolete version of SET_AUDIO_CALLBACK.
+                                            * It was not used by any known core at the time,
+                                            * and was removed from the API. */
+#define RETRO_ENVIRONMENT_SET_AUDIO_CALLBACK 22
+                                           /* const struct retro_audio_callback * --
+                                            * Sets an interface which is used to notify a libretro core about audio
+                                            * being available for writing.
+                                            * The callback can be called from any thread, so a core using this must
+                                            * have a thread safe audio implementation.
+                                            * It is intended for games where audio and video are completely
+                                            * asynchronous and audio can be generated on the fly.
+                                            * This interface is not recommended for use with emulators which have
+                                            * highly synchronous audio.
+                                            *
+                                            * The callback only notifies about writability; the libretro core still
+                                            * has to call the normal audio callbacks
+                                            * to write audio. The audio callbacks must be called from within the
+                                            * notification callback.
+                                            * The amount of audio data to write is up to the implementation.
+                                            * Generally, the audio callback will be called continously in a loop.
+                                            *
+                                            * Due to thread safety guarantees and lack of sync between audio and
+                                            * video, a frontend  can selectively disallow this interface based on
+                                            * internal configuration. A core using this interface must also
+                                            * implement the "normal" audio interface.
+                                            *
+                                            * A libretro core using SET_AUDIO_CALLBACK should also make use of
+                                            * SET_FRAME_TIME_CALLBACK.
+                                            */
+#define RETRO_ENVIRONMENT_SET_FRAME_TIME_CALLBACK 21
+                                           /* const struct retro_frame_time_callback * --
+                                            * Lets the core know how much time has passed since last
+                                            * invocation of retro_run().
+                                            * The frontend can tamper with the timing to fake fast-forward,
+                                            * slow-motion, frame stepping, etc.
+                                            * In this case the delta time will use the reference value
+                                            * in frame_time_callback..
+                                            */
+#define RETRO_ENVIRONMENT_GET_RUMBLE_INTERFACE 23
+                                           /* struct retro_rumble_interface * --
+                                            * Gets an interface which is used by a libretro core to set
+                                            * state of rumble motors in controllers.
+                                            * A strong and weak motor is supported, and they can be
+                                            * controlled indepedently.
+                                            */
+#define RETRO_ENVIRONMENT_GET_INPUT_DEVICE_CAPABILITIES 24
+                                           /* uint64_t * --
+                                            * Gets a bitmask telling which device type are expected to be
+                                            * handled properly in a call to retro_input_state_t.
+                                            * Devices which are not handled or recognized always return
+                                            * 0 in retro_input_state_t.
+                                            * Example bitmask: caps = (1 << RETRO_DEVICE_JOYPAD) | (1 << RETRO_DEVICE_ANALOG).
+                                            * Should only be called in retro_run().
+                                            */
+#define RETRO_ENVIRONMENT_GET_SENSOR_INTERFACE (25 | RETRO_ENVIRONMENT_EXPERIMENTAL)
+                                           /* struct retro_sensor_interface * --
+                                            * Gets access to the sensor interface.
+                                            * The purpose of this interface is to allow
+                                            * setting state related to sensors such as polling rate,
+                                            * enabling/disable it entirely, etc.
+                                            * Reading sensor state is done via the normal
+                                            * input_state_callback API.
+                                            */
+#define RETRO_ENVIRONMENT_GET_CAMERA_INTERFACE (26 | RETRO_ENVIRONMENT_EXPERIMENTAL)
+                                           /* struct retro_camera_callback * --
+                                            * Gets an interface to a video camera driver.
+                                            * A libretro core can use this interface to get access to a
+                                            * video camera.
+                                            * New video frames are delivered in a callback in same
+                                            * thread as retro_run().
+                                            *
+                                            * GET_CAMERA_INTERFACE should be called in retro_load_game().
+                                            *
+                                            * Depending on the camera implementation used, camera frames
+                                            * will be delivered as a raw framebuffer,
+                                            * or as an OpenGL texture directly.
+                                            *
+                                            * The core has to tell the frontend here which types of
+                                            * buffers can be handled properly.
+                                            * An OpenGL texture can only be handled when using a
+                                            * libretro GL core (SET_HW_RENDER).
+                                            * It is recommended to use a libretro GL core when
+                                            * using camera interface.
+                                            *
+                                            * The camera is not started automatically. The retrieved start/stop
+                                            * functions must be used to explicitly
+                                            * start and stop the camera driver.
+                                            */
+#define RETRO_ENVIRONMENT_GET_LOG_INTERFACE 27
+                                           /* struct retro_log_callback * --
+                                            * Gets an interface for logging. This is useful for
+                                            * logging in a cross-platform way
+                                            * as certain platforms cannot use stderr for logging.
+                                            * It also allows the frontend to
+                                            * show logging information in a more suitable way.
+                                            * If this interface is not used, libretro cores should
+                                            * log to stderr as desired.
+                                            */
+#define RETRO_ENVIRONMENT_GET_PERF_INTERFACE 28
+                                           /* struct retro_perf_callback * --
+                                            * Gets an interface for performance counters. This is useful
+                                            * for performance logging in a cross-platform way and for detecting
+                                            * architecture-specific features, such as SIMD support.
+                                            */
+#define RETRO_ENVIRONMENT_GET_LOCATION_INTERFACE 29
+                                           /* struct retro_location_callback * --
+                                            * Gets access to the location interface.
+                                            * The purpose of this interface is to be able to retrieve
+                                            * location-based information from the host device,
+                                            * such as current latitude / longitude.
+                                            */
+#define RETRO_ENVIRONMENT_GET_CONTENT_DIRECTORY 30 /* Old name, kept for compatibility. */
+#define RETRO_ENVIRONMENT_GET_CORE_ASSETS_DIRECTORY 30
+                                           /* const char ** --
+                                            * Returns the "core assets" directory of the frontend.
+                                            * This directory can be used to store specific assets that the
+                                            * core relies upon, such as art assets,
+                                            * input data, etc etc.
+                                            * The returned value can be NULL.
+                                            * If so, no such directory is defined,
+                                            * and it's up to the implementation to find a suitable directory.
+                                            */
+#define RETRO_ENVIRONMENT_GET_SAVE_DIRECTORY 31
+                                           /* const char ** --
+                                            * Returns the "save" directory of the frontend.
+                                            * This directory can be used to store SRAM, memory cards,
+                                            * high scores, etc, if the libretro core
+                                            * cannot use the regular memory interface (retro_get_memory_data()).
+                                            *
+                                            * NOTE: libretro cores used to check GET_SYSTEM_DIRECTORY for
+                                            * similar things before.
+                                            * They should still check GET_SYSTEM_DIRECTORY if they want to
+                                            * be backwards compatible.
+                                            * The path here can be NULL. It should only be non-NULL if the
+                                            * frontend user has set a specific save path.
+                                            */
+#define RETRO_ENVIRONMENT_SET_SYSTEM_AV_INFO 32
+                                           /* const struct retro_system_av_info * --
+                                            * Sets a new av_info structure. This can only be called from
+                                            * within retro_run().
+                                            * This should *only* be used if the core is completely altering the
+                                            * internal resolutions, aspect ratios, timings, sampling rate, etc.
+                                            * Calling this can require a full reinitialization of video/audio
+                                            * drivers in the frontend,
+                                            *
+                                            * so it is important to call it very sparingly, and usually only with
+                                            * the users explicit consent.
+                                            * An eventual driver reinitialize will happen so that video and
+                                            * audio callbacks
+                                            * happening after this call within the same retro_run() call will
+                                            * target the newly initialized driver.
+                                            *
+                                            * This callback makes it possible to support configurable resolutions
+                                            * in games, which can be useful to
+                                            * avoid setting the "worst case" in max_width/max_height.
+                                            *
+                                            * ***HIGHLY RECOMMENDED*** Do not call this callback every time
+                                            * resolution changes in an emulator core if it's
+                                            * expected to be a temporary change, for the reasons of possible
+                                            * driver reinitialization.
+                                            * This call is not a free pass for not trying to provide
+                                            * correct values in retro_get_system_av_info(). If you need to change
+                                            * things like aspect ratio or nominal width/height,
+                                            * use RETRO_ENVIRONMENT_SET_GEOMETRY, which is a softer variant
+                                            * of SET_SYSTEM_AV_INFO.
+                                            *
+                                            * If this returns false, the frontend does not acknowledge a
+                                            * changed av_info struct.
+                                            */
+#define RETRO_ENVIRONMENT_SET_PROC_ADDRESS_CALLBACK 33
+                                           /* const struct retro_get_proc_address_interface * --
+                                            * Allows a libretro core to announce support for the
+                                            * get_proc_address() interface.
+                                            * This interface allows for a standard way to extend libretro where
+                                            * use of environment calls are too indirect,
+                                            * e.g. for cases where the frontend wants to call directly into the core.
+                                            *
+                                            * If a core wants to expose this interface, SET_PROC_ADDRESS_CALLBACK
+                                            * **MUST** be called from within retro_set_environment().
+                                            */
+#define RETRO_ENVIRONMENT_SET_SUBSYSTEM_INFO 34
+                                           /* const struct retro_subsystem_info * --
+                                            * This environment call introduces the concept of libretro "subsystems".
+                                            * A subsystem is a variant of a libretro core which supports
+                                            * different kinds of games.
+                                            * The purpose of this is to support e.g. emulators which might
+                                            * have special needs, e.g. Super Nintendo's Super GameBoy, Sufami Turbo.
+                                            * It can also be used to pick among subsystems in an explicit way
+                                            * if the libretro implementation is a multi-system emulator itself.
+                                            *
+                                            * Loading a game via a subsystem is done with retro_load_game_special(),
+                                            * and this environment call allows a libretro core to expose which
+                                            * subsystems are supported for use with retro_load_game_special().
+                                            * A core passes an array of retro_game_special_info which is terminated
+                                            * with a zeroed out retro_game_special_info struct.
+                                            *
+                                            * If a core wants to use this functionality, SET_SUBSYSTEM_INFO
+                                            * **MUST** be called from within retro_set_environment().
+                                            */
+#define RETRO_ENVIRONMENT_SET_CONTROLLER_INFO 35
+                                           /* const struct retro_controller_info * --
+                                            * This environment call lets a libretro core tell the frontend
+                                            * which controller types are recognized in calls to
+                                            * retro_set_controller_port_device().
+                                            *
+                                            * Some emulators such as Super Nintendo
+                                            * support multiple lightgun types which must be specifically
+                                            * selected from.
+                                            * It is therefore sometimes necessary for a frontend to be able
+                                            * to tell the core about a special kind of input device which is
+                                            * not covered by the libretro input API.
+                                            *
+                                            * In order for a frontend to understand the workings of an input device,
+                                            * it must be a specialized type
+                                            * of the generic device types already defined in the libretro API.
+                                            *
+                                            * Which devices are supported can vary per input port.
+                                            * The core must pass an array of const struct retro_controller_info which
+                                            * is terminated with a blanked out struct. Each element of the struct
+                                            * corresponds to an ascending port index to
+                                            * retro_set_controller_port_device().
+                                            * Even if special device types are set in the libretro core,
+                                            * libretro should only poll input based on the base input device types.
+                                            */
+#define RETRO_ENVIRONMENT_SET_MEMORY_MAPS (36 | RETRO_ENVIRONMENT_EXPERIMENTAL)
+                                           /* const struct retro_memory_map * --
+                                            * This environment call lets a libretro core tell the frontend
+                                            * about the memory maps this core emulates.
+                                            * This can be used to implement, for example, cheats in a core-agnostic way.
+                                            *
+                                            * Should only be used by emulators; it doesn't make much sense for
+                                            * anything else.
+                                            * It is recommended to expose all relevant pointers through
+                                            * retro_get_memory_* as well.
+                                            *
+                                            * Can be called from retro_init and retro_load_game.
+                                            */
+#define RETRO_ENVIRONMENT_SET_GEOMETRY 37
+                                           /* const struct retro_game_geometry * --
+                                            * This environment call is similar to SET_SYSTEM_AV_INFO for changing
+                                            * video parameters, but provides a guarantee that drivers will not be
+                                            * reinitialized.
+                                            * This can only be called from within retro_run().
+                                            *
+                                            * The purpose of this call is to allow a core to alter nominal
+                                            * width/heights as well as aspect ratios on-the-fly, which can be
+                                            * useful for some emulators to change in run-time.
+                                            *
+                                            * max_width/max_height arguments are ignored and cannot be changed
+                                            * with this call as this could potentially require a reinitialization or a
+                                            * non-constant time operation.
+                                            * If max_width/max_height are to be changed, SET_SYSTEM_AV_INFO is required.
+                                            *
+                                            * A frontend must guarantee that this environment call completes in
+                                            * constant time.
+                                            */
+#define RETRO_ENVIRONMENT_GET_USERNAME 38
+                                           /* const char **
+                                            * Returns the specified username of the frontend, if specified by the user.
+                                            * This username can be used as a nickname for a core that has online facilities
+                                            * or any other mode where personalization of the user is desirable.
+                                            * The returned value can be NULL.
+                                            * If this environ callback is used by a core that requires a valid username,
+                                            * a default username should be specified by the core.
+                                            */
+#define RETRO_ENVIRONMENT_GET_LANGUAGE 39
+                                           /* unsigned * --
+                                            * Returns the specified language of the frontend, if specified by the user.
+                                            * It can be used by the core for localization purposes.
+                                            */
+#define RETRO_ENVIRONMENT_GET_CURRENT_SOFTWARE_FRAMEBUFFER (40 | RETRO_ENVIRONMENT_EXPERIMENTAL)
+                                           /* struct retro_framebuffer * --
+                                            * Returns a preallocated framebuffer which the core can use for rendering
+                                            * the frame into when not using SET_HW_RENDER.
+                                            * The framebuffer returned from this call must not be used
+                                            * after the current call to retro_run() returns.
+                                            *
+                                            * The goal of this call is to allow zero-copy behavior where a core
+                                            * can render directly into video memory, avoiding extra bandwidth cost by copying
+                                            * memory from core to video memory.
+                                            *
+                                            * If this call succeeds and the core renders into it,
+                                            * the framebuffer pointer and pitch can be passed to retro_video_refresh_t.
+                                            * If the buffer from GET_CURRENT_SOFTWARE_FRAMEBUFFER is to be used,
+                                            * the core must pass the exact
+                                            * same pointer as returned by GET_CURRENT_SOFTWARE_FRAMEBUFFER;
+                                            * i.e. passing a pointer which is offset from the
+                                            * buffer is undefined. The width, height and pitch parameters
+                                            * must also match exactly to the values obtained from GET_CURRENT_SOFTWARE_FRAMEBUFFER.
+                                            *
+                                            * It is possible for a frontend to return a different pixel format
+                                            * than the one used in SET_PIXEL_FORMAT. This can happen if the frontend
+                                            * needs to perform conversion.
+                                            *
+                                            * It is still valid for a core to render to a different buffer
+                                            * even if GET_CURRENT_SOFTWARE_FRAMEBUFFER succeeds.
+                                            *
+                                            * A frontend must make sure that the pointer obtained from this function is
+                                            * writeable (and readable).
+                                            */
+
+#define RETRO_ENVIRONMENT_SET_HW_SHARED_CONTEXT (44 | RETRO_ENVIRONMENT_EXPERIMENTAL)
+                                           /* N/A (null) * --
+                                            * The frontend will try to use a 'shared' hardware context (mostly applicable
+                                            * to OpenGL) when a hardware context is being set up.
+                                            *
+                                            * Returns true if the frontend supports shared hardware contexts and false
+                                            * if the frontend does not support shared hardware contexts.
+                                            *
+                                            * This will do nothing on its own until SET_HW_RENDER env callbacks are
+                                            * being used.
+                                            */
+
+#define RETRO_ENVIRONMENT_GET_VFS_INTERFACE (45 | RETRO_ENVIRONMENT_EXPERIMENTAL)
+                                           /* struct retro_vfs_interface_info * --
+                                            * Gets access to the VFS interface.
+                                            * VFS presence needs to be queried prior to load_game or any
+                                            * get_system/save/other_directory being called to let front end know
+                                            * core supports VFS before it starts handing out paths.
+                                            * It is recomended to do so in retro_set_environment */
+
+/* VFS functionality */
+
+/* File paths:
+ * File paths passed as parameters when using this api shall be well formed unix-style,
+ * using "/" (unquoted forward slash) as directory separator regardless of the platform's native separator.
+ * Paths shall also include at least one forward slash ("game.bin" is an invalid path, use "./game.bin" instead).
+ * Other than the directory separator, cores shall not make assumptions about path format:
+ * "C:/path/game.bin", "http://example.com/game.bin", "#game/game.bin", "./game.bin" (without quotes) are all valid paths.
+ * Cores may replace the basename or remove path components from the end, and/or add new components;
+ * however, cores shall not append "./", "../" or multiple consecutive forward slashes ("//") to paths they request to front end.
+ * The frontend is encouraged to make such paths work as well as it can, but is allowed to give up if the core alters paths too much.
+ * Frontends are encouraged, but not required, to support native file system paths (modulo replacing the directory separator, if applicable).
+ * Cores are allowed to try using them, but must remain functional if the front rejects such requests.
+ * Cores are encouraged to use the libretro-common filestream functions for file I/O,
+ * as they seamlessly integrate with VFS, deal with directory separator replacement as appropriate
+ * and provide platform-specific fallbacks in cases where front ends do not support VFS. */
+
+/* Opaque file handle
+ * Introduced in VFS API v1 */
+struct retro_vfs_file_handle;
+
+/* File open flags
+ * Introduced in VFS API v1 */
+#define RETRO_VFS_FILE_ACCESS_READ            (1 << 0) /* Read only mode */
+#define RETRO_VFS_FILE_ACCESS_WRITE           (1 << 1) /* Write only mode, discard contents and overwrites existing file unless RETRO_VFS_FILE_ACCESS_UPDATE is also specified */
+#define RETRO_VFS_FILE_ACCESS_READ_WRITE      (RETRO_VFS_FILE_ACCESS_READ | RETRO_VFS_FILE_ACCESS_WRITE) /* Read-write mode, discard contents and overwrites existing file unless RETRO_VFS_FILE_ACCESS_UPDATE is also specified*/
+#define RETRO_VFS_FILE_ACCESS_UPDATE_EXISTING (1 << 2) /* Prevents discarding content of existing files opened for writing */
+
+/* These are only hints. The frontend may choose to ignore them. Other than RAM/CPU/etc use,
+   and how they react to unlikely external interference (for example someone else writing to that file,
+   or the file's server going down), behavior will not change. */
+#define RETRO_VFS_FILE_ACCESS_HINT_NONE              (0)
+/* Indicate that the file will be accessed many times. The frontend should aggressively cache everything. */
+#define RETRO_VFS_FILE_ACCESS_HINT_FREQUENT_ACCESS   (1 << 0)
+
+/* Seek positions */
+#define RETRO_VFS_SEEK_POSITION_START    0
+#define RETRO_VFS_SEEK_POSITION_CURRENT  1
+#define RETRO_VFS_SEEK_POSITION_END      2
+
+/* Get path from opaque handle. Returns the exact same path passed to file_open when getting the handle
+ * Introduced in VFS API v1 */
+typedef const char *(RETRO_CALLCONV *retro_vfs_get_path_t)(struct retro_vfs_file_handle *stream);
+
+/* Open a file for reading or writing. If path points to a directory, this will
+ * fail. Returns the opaque file handle, or NULL for error.
+ * Introduced in VFS API v1 */
+typedef struct retro_vfs_file_handle *(RETRO_CALLCONV *retro_vfs_open_t)(const char *path, unsigned mode, unsigned hints);
+
+/* Close the file and release its resources. Must be called if open_file returns non-NULL. Returns 0 on succes, -1 on failure.
+ * Whether the call succeeds ot not, the handle passed as parameter becomes invalid and should no longer be used.
+ * Introduced in VFS API v1 */
+typedef int (RETRO_CALLCONV *retro_vfs_close_t)(struct retro_vfs_file_handle *stream);
+
+/* Return the size of the file in bytes, or -1 for error.
+ * Introduced in VFS API v1 */
+typedef int64_t (RETRO_CALLCONV *retro_vfs_size_t)(struct retro_vfs_file_handle *stream);
+
+/* Get the current read / write position for the file. Returns - 1 for error.
+ * Introduced in VFS API v1 */
+typedef int64_t (RETRO_CALLCONV *retro_vfs_tell_t)(struct retro_vfs_file_handle *stream);
+
+/* Set the current read/write position for the file. Returns the new position, -1 for error.
+ * Introduced in VFS API v1 */
+typedef int64_t (RETRO_CALLCONV *retro_vfs_seek_t)(struct retro_vfs_file_handle *stream, int64_t offset, int seek_position);
+
+/* Read data from a file. Returns the number of bytes read, or -1 for error.
+ * Introduced in VFS API v1 */
+typedef int64_t (RETRO_CALLCONV *retro_vfs_read_t)(struct retro_vfs_file_handle *stream, void *s, uint64_t len);
+
+/* Write data to a file. Returns the number of bytes written, or -1 for error.
+ * Introduced in VFS API v1 */
+typedef int64_t (RETRO_CALLCONV *retro_vfs_write_t)(struct retro_vfs_file_handle *stream, const void *s, uint64_t len);
+
+/* Flush pending writes to file, if using buffered IO. Returns 0 on sucess, or -1 on failure.
+ * Introduced in VFS API v1 */
+typedef int (RETRO_CALLCONV *retro_vfs_flush_t)(struct retro_vfs_file_handle *stream);
+
+/* Delete the specified file. Returns 0 on success, -1 on failure
+ * Introduced in VFS API v1 */
+typedef int (RETRO_CALLCONV *retro_vfs_remove_t)(const char *path);
+
+/* Rename the specified file. Returns 0 on success, -1 on failure
+ * Introduced in VFS API v1 */
+typedef int (RETRO_CALLCONV *retro_vfs_rename_t)(const char *old_path, const char *new_path);
+
+struct retro_vfs_interface
+{
+	retro_vfs_get_path_t get_path;
+	retro_vfs_open_t open;
+	retro_vfs_close_t close;
+	retro_vfs_size_t size;
+	retro_vfs_tell_t tell;
+	retro_vfs_seek_t seek;
+	retro_vfs_read_t read;
+	retro_vfs_write_t write;
+	retro_vfs_flush_t flush;
+	retro_vfs_remove_t remove;
+	retro_vfs_rename_t rename;
+};
+
+struct retro_vfs_interface_info
+{
+   /* Set by core: should this be higher than the version the front end supports,
+    * front end will return false in the RETRO_ENVIRONMENT_GET_VFS_INTERFACE call
+    * Introduced in VFS API v1 */
+   uint32_t required_interface_version;
+
+   /* Frontend writes interface pointer here. The frontend also sets the actual
+    * version, must be at least required_interface_version.
+    * Introduced in VFS API v1 */
+   struct retro_vfs_interface *iface;
+};
+
+enum retro_hw_render_interface_type
+{
+   RETRO_HW_RENDER_INTERFACE_VULKAN = 0,
+   RETRO_HW_RENDER_INTERFACE_DUMMY = INT_MAX
+};
+
+/* Base struct. All retro_hw_render_interface_* types
+ * contain at least these fields. */
+struct retro_hw_render_interface
+{
+   enum retro_hw_render_interface_type interface_type;
+   unsigned interface_version;
+};
+
+
+#define RETRO_ENVIRONMENT_GET_LED_INTERFACE (46 | RETRO_ENVIRONMENT_EXPERIMENTAL)
+                                           /* struct retro_led_interface * --
+                                            * Gets an interface which is used by a libretro core to set 
+                                            * state of LEDs.
+                                            */
+
+typedef void (RETRO_CALLCONV *retro_set_led_state_t)(int led, int state);
+struct retro_led_interface
+{
+    retro_set_led_state_t set_led_state;
+};
+
+
+#define RETRO_ENVIRONMENT_GET_HW_RENDER_INTERFACE (41 | RETRO_ENVIRONMENT_EXPERIMENTAL)
+                                           /* const struct retro_hw_render_interface ** --
+                                            * Returns an API specific rendering interface for accessing API specific data.
+                                            * Not all HW rendering APIs support or need this.
+                                            * The contents of the returned pointer is specific to the rendering API
+                                            * being used. See the various headers like libretro_vulkan.h, etc.
+                                            *
+                                            * GET_HW_RENDER_INTERFACE cannot be called before context_reset has been called.
+                                            * Similarly, after context_destroyed callback returns,
+                                            * the contents of the HW_RENDER_INTERFACE are invalidated.
+                                            */
+
+#define RETRO_ENVIRONMENT_SET_SUPPORT_ACHIEVEMENTS (42 | RETRO_ENVIRONMENT_EXPERIMENTAL)
+                                           /* const bool * --
+                                            * If true, the libretro implementation supports achievements
+                                            * either via memory descriptors set with RETRO_ENVIRONMENT_SET_MEMORY_MAPS
+                                            * or via retro_get_memory_data/retro_get_memory_size.
+                                            *
+                                            * This must be called before the first call to retro_run.
+                                            */
+
+enum retro_hw_render_context_negotiation_interface_type
+{
+   RETRO_HW_RENDER_CONTEXT_NEGOTIATION_INTERFACE_VULKAN = 0,
+   RETRO_HW_RENDER_CONTEXT_NEGOTIATION_INTERFACE_DUMMY = INT_MAX
+};
+
+/* Base struct. All retro_hw_render_context_negotiation_interface_* types
+ * contain at least these fields. */
+struct retro_hw_render_context_negotiation_interface
+{
+   enum retro_hw_render_context_negotiation_interface_type interface_type;
+   unsigned interface_version;
+};
+#define RETRO_ENVIRONMENT_SET_HW_RENDER_CONTEXT_NEGOTIATION_INTERFACE (43 | RETRO_ENVIRONMENT_EXPERIMENTAL)
+                                           /* const struct retro_hw_render_context_negotiation_interface * --
+                                            * Sets an interface which lets the libretro core negotiate with frontend how a context is created.
+                                            * The semantics of this interface depends on which API is used in SET_HW_RENDER earlier.
+                                            * This interface will be used when the frontend is trying to create a HW rendering context,
+                                            * so it will be used after SET_HW_RENDER, but before the context_reset callback.
+                                            */
+
+/* Serialized state is incomplete in some way. Set if serialization is
+ * usable in typical end-user cases but should not be relied upon to
+ * implement frame-sensitive frontend features such as netplay or
+ * rerecording. */
+#define RETRO_SERIALIZATION_QUIRK_INCOMPLETE (1 << 0)
+/* The core must spend some time initializing before serialization is
+ * supported. retro_serialize() will initially fail; retro_unserialize()
+ * and retro_serialize_size() may or may not work correctly either. */
+#define RETRO_SERIALIZATION_QUIRK_MUST_INITIALIZE (1 << 1)
+/* Serialization size may change within a session. */
+#define RETRO_SERIALIZATION_QUIRK_CORE_VARIABLE_SIZE (1 << 2)
+/* Set by the frontend to acknowledge that it supports variable-sized
+ * states. */
+#define RETRO_SERIALIZATION_QUIRK_FRONT_VARIABLE_SIZE (1 << 3)
+/* Serialized state can only be loaded during the same session. */
+#define RETRO_SERIALIZATION_QUIRK_SINGLE_SESSION (1 << 4)
+/* Serialized state cannot be loaded on an architecture with a different
+ * endianness from the one it was saved on. */
+#define RETRO_SERIALIZATION_QUIRK_ENDIAN_DEPENDENT (1 << 5)
+/* Serialized state cannot be loaded on a different platform from the one it
+ * was saved on for reasons other than endianness, such as word size
+ * dependence */
+#define RETRO_SERIALIZATION_QUIRK_PLATFORM_DEPENDENT (1 << 6)
+
+#define RETRO_ENVIRONMENT_SET_SERIALIZATION_QUIRKS 44
+                                           /* uint64_t * --
+                                            * Sets quirk flags associated with serialization. The frontend will zero any flags it doesn't
+                                            * recognize or support. Should be set in either retro_init or retro_load_game, but not both.
+                                            */
+
+#define RETRO_MEMDESC_CONST     (1 << 0)   /* The frontend will never change this memory area once retro_load_game has returned. */
+#define RETRO_MEMDESC_BIGENDIAN (1 << 1)   /* The memory area contains big endian data. Default is little endian. */
+#define RETRO_MEMDESC_ALIGN_2   (1 << 16)  /* All memory access in this area is aligned to their own size, or 2, whichever is smaller. */
+#define RETRO_MEMDESC_ALIGN_4   (2 << 16)
+#define RETRO_MEMDESC_ALIGN_8   (3 << 16)
+#define RETRO_MEMDESC_MINSIZE_2 (1 << 24)  /* All memory in this region is accessed at least 2 bytes at the time. */
+#define RETRO_MEMDESC_MINSIZE_4 (2 << 24)
+#define RETRO_MEMDESC_MINSIZE_8 (3 << 24)
+struct retro_memory_descriptor
+{
+   uint64_t flags;
+
+   /* Pointer to the start of the relevant ROM or RAM chip.
+    * It's strongly recommended to use 'offset' if possible, rather than
+    * doing math on the pointer.
+    *
+    * If the same byte is mapped my multiple descriptors, their descriptors
+    * must have the same pointer.
+    * If 'start' does not point to the first byte in the pointer, put the
+    * difference in 'offset' instead.
+    *
+    * May be NULL if there's nothing usable here (e.g. hardware registers and
+    * open bus). No flags should be set if the pointer is NULL.
+    * It's recommended to minimize the number of descriptors if possible,
+    * but not mandatory. */
+   void *ptr;
+   size_t offset;
+
+   /* This is the location in the emulated address space
+    * where the mapping starts. */
+   size_t start;
+
+   /* Which bits must be same as in 'start' for this mapping to apply.
+    * The first memory descriptor to claim a certain byte is the one
+    * that applies.
+    * A bit which is set in 'start' must also be set in this.
+    * Can be zero, in which case each byte is assumed mapped exactly once.
+    * In this case, 'len' must be a power of two. */
+   size_t select;
+
+   /* If this is nonzero, the set bits are assumed not connected to the
+    * memory chip's address pins. */
+   size_t disconnect;
+
+   /* This one tells the size of the current memory area.
+    * If, after start+disconnect are applied, the address is higher than
+    * this, the highest bit of the address is cleared.
+    *
+    * If the address is still too high, the next highest bit is cleared.
+    * Can be zero, in which case it's assumed to be infinite (as limited
+    * by 'select' and 'disconnect'). */
+   size_t len;
+
+   /* To go from emulated address to physical address, the following
+    * order applies:
+    * Subtract 'start', pick off 'disconnect', apply 'len', add 'offset'. */
+
+   /* The address space name must consist of only a-zA-Z0-9_-,
+    * should be as short as feasible (maximum length is 8 plus the NUL),
+    * and may not be any other address space plus one or more 0-9A-F
+    * at the end.
+    * However, multiple memory descriptors for the same address space is
+    * allowed, and the address space name can be empty. NULL is treated
+    * as empty.
+    *
+    * Address space names are case sensitive, but avoid lowercase if possible.
+    * The same pointer may exist in multiple address spaces.
+    *
+    * Examples:
+    * blank+blank - valid (multiple things may be mapped in the same namespace)
+    * 'Sp'+'Sp' - valid (multiple things may be mapped in the same namespace)
+    * 'A'+'B' - valid (neither is a prefix of each other)
+    * 'S'+blank - valid ('S' is not in 0-9A-F)
+    * 'a'+blank - valid ('a' is not in 0-9A-F)
+    * 'a'+'A' - valid (neither is a prefix of each other)
+    * 'AR'+blank - valid ('R' is not in 0-9A-F)
+    * 'ARB'+blank - valid (the B can't be part of the address either, because
+    *                      there is no namespace 'AR')
+    * blank+'B' - not valid, because it's ambigous which address space B1234
+    *             would refer to.
+    * The length can't be used for that purpose; the frontend may want
+    * to append arbitrary data to an address, without a separator. */
+   const char *addrspace;
+
+   /* TODO: When finalizing this one, add a description field, which should be
+    * "WRAM" or something roughly equally long. */
+
+   /* TODO: When finalizing this one, replace 'select' with 'limit', which tells
+    * which bits can vary and still refer to the same address (limit = ~select).
+    * TODO: limit? range? vary? something else? */
+
+   /* TODO: When finalizing this one, if 'len' is above what 'select' (or
+    * 'limit') allows, it's bankswitched. Bankswitched data must have both 'len'
+    * and 'select' != 0, and the mappings don't tell how the system switches the
+    * banks. */
+
+   /* TODO: When finalizing this one, fix the 'len' bit removal order.
+    * For len=0x1800, pointer 0x1C00 should go to 0x1400, not 0x0C00.
+    * Algorithm: Take bits highest to lowest, but if it goes above len, clear
+    * the most recent addition and continue on the next bit.
+    * TODO: Can the above be optimized? Is "remove the lowest bit set in both
+    * pointer and 'len'" equivalent? */
+
+   /* TODO: Some emulators (MAME?) emulate big endian systems by only accessing
+    * the emulated memory in 32-bit chunks, native endian. But that's nothing
+    * compared to Darek Mihocka <http://www.emulators.com/docs/nx07_vm101.htm>
+    * (section Emulation 103 - Nearly Free Byte Reversal) - he flips the ENTIRE
+    * RAM backwards! I'll want to represent both of those, via some flags.
+    *
+    * I suspect MAME either didn't think of that idea, or don't want the #ifdef.
+    * Not sure which, nor do I really care. */
+
+   /* TODO: Some of those flags are unused and/or don't really make sense. Clean
+    * them up. */
+};
+
+/* The frontend may use the largest value of 'start'+'select' in a
+ * certain namespace to infer the size of the address space.
+ *
+ * If the address space is larger than that, a mapping with .ptr=NULL
+ * should be at the end of the array, with .select set to all ones for
+ * as long as the address space is big.
+ *
+ * Sample descriptors (minus .ptr, and RETRO_MEMFLAG_ on the flags):
+ * SNES WRAM:
+ * .start=0x7E0000, .len=0x20000
+ * (Note that this must be mapped before the ROM in most cases; some of the
+ * ROM mappers
+ * try to claim $7E0000, or at least $7E8000.)
+ * SNES SPC700 RAM:
+ * .addrspace="S", .len=0x10000
+ * SNES WRAM mirrors:
+ * .flags=MIRROR, .start=0x000000, .select=0xC0E000, .len=0x2000
+ * .flags=MIRROR, .start=0x800000, .select=0xC0E000, .len=0x2000
+ * SNES WRAM mirrors, alternate equivalent descriptor:
+ * .flags=MIRROR, .select=0x40E000, .disconnect=~0x1FFF
+ * (Various similar constructions can be created by combining parts of
+ * the above two.)
+ * SNES LoROM (512KB, mirrored a couple of times):
+ * .flags=CONST, .start=0x008000, .select=0x408000, .disconnect=0x8000, .len=512*1024
+ * .flags=CONST, .start=0x400000, .select=0x400000, .disconnect=0x8000, .len=512*1024
+ * SNES HiROM (4MB):
+ * .flags=CONST,                 .start=0x400000, .select=0x400000, .len=4*1024*1024
+ * .flags=CONST, .offset=0x8000, .start=0x008000, .select=0x408000, .len=4*1024*1024
+ * SNES ExHiROM (8MB):
+ * .flags=CONST, .offset=0,                  .start=0xC00000, .select=0xC00000, .len=4*1024*1024
+ * .flags=CONST, .offset=4*1024*1024,        .start=0x400000, .select=0xC00000, .len=4*1024*1024
+ * .flags=CONST, .offset=0x8000,             .start=0x808000, .select=0xC08000, .len=4*1024*1024
+ * .flags=CONST, .offset=4*1024*1024+0x8000, .start=0x008000, .select=0xC08000, .len=4*1024*1024
+ * Clarify the size of the address space:
+ * .ptr=NULL, .select=0xFFFFFF
+ * .len can be implied by .select in many of them, but was included for clarity.
+ */
+
+struct retro_memory_map
+{
+   const struct retro_memory_descriptor *descriptors;
+   unsigned num_descriptors;
+};
+
+struct retro_controller_description
+{
+   /* Human-readable description of the controller. Even if using a generic
+    * input device type, this can be set to the particular device type the
+    * core uses. */
+   const char *desc;
+
+   /* Device type passed to retro_set_controller_port_device(). If the device
+    * type is a sub-class of a generic input device type, use the
+    * RETRO_DEVICE_SUBCLASS macro to create an ID.
+    *
+    * E.g. RETRO_DEVICE_SUBCLASS(RETRO_DEVICE_JOYPAD, 1). */
+   unsigned id;
+};
+
+struct retro_controller_info
+{
+   const struct retro_controller_description *types;
+   unsigned num_types;
+};
+
+struct retro_subsystem_memory_info
+{
+   /* The extension associated with a memory type, e.g. "psram". */
+   const char *extension;
+
+   /* The memory type for retro_get_memory(). This should be at
+    * least 0x100 to avoid conflict with standardized
+    * libretro memory types. */
+   unsigned type;
+};
+
+struct retro_subsystem_rom_info
+{
+   /* Describes what the content is (SGB BIOS, GB ROM, etc). */
+   const char *desc;
+
+   /* Same definition as retro_get_system_info(). */
+   const char *valid_extensions;
+
+   /* Same definition as retro_get_system_info(). */
+   bool need_fullpath;
+
+   /* Same definition as retro_get_system_info(). */
+   bool block_extract;
+
+   /* This is set if the content is required to load a game.
+    * If this is set to false, a zeroed-out retro_game_info can be passed. */
+   bool required;
+
+   /* Content can have multiple associated persistent
+    * memory types (retro_get_memory()). */
+   const struct retro_subsystem_memory_info *memory;
+   unsigned num_memory;
+};
+
+struct retro_subsystem_info
+{
+   /* Human-readable string of the subsystem type, e.g. "Super GameBoy" */
+   const char *desc;
+
+   /* A computer friendly short string identifier for the subsystem type.
+    * This name must be [a-z].
+    * E.g. if desc is "Super GameBoy", this can be "sgb".
+    * This identifier can be used for command-line interfaces, etc.
+    */
+   const char *ident;
+
+   /* Infos for each content file. The first entry is assumed to be the
+    * "most significant" content for frontend purposes.
+    * E.g. with Super GameBoy, the first content should be the GameBoy ROM,
+    * as it is the most "significant" content to a user.
+    * If a frontend creates new file paths based on the content used
+    * (e.g. savestates), it should use the path for the first ROM to do so. */
+   const struct retro_subsystem_rom_info *roms;
+
+   /* Number of content files associated with a subsystem. */
+   unsigned num_roms;
+
+   /* The type passed to retro_load_game_special(). */
+   unsigned id;
+};
+
+typedef void (RETRO_CALLCONV *retro_proc_address_t)(void);
+
+/* libretro API extension functions:
+ * (None here so far).
+ *
+ * Get a symbol from a libretro core.
+ * Cores should only return symbols which are actual
+ * extensions to the libretro API.
+ *
+ * Frontends should not use this to obtain symbols to standard
+ * libretro entry points (static linking or dlsym).
+ *
+ * The symbol name must be equal to the function name,
+ * e.g. if void retro_foo(void); exists, the symbol must be called "retro_foo".
+ * The returned function pointer must be cast to the corresponding type.
+ */
+typedef retro_proc_address_t (RETRO_CALLCONV *retro_get_proc_address_t)(const char *sym);
+
+struct retro_get_proc_address_interface
+{
+   retro_get_proc_address_t get_proc_address;
+};
+
+enum retro_log_level
+{
+   RETRO_LOG_DEBUG = 0,
+   RETRO_LOG_INFO,
+   RETRO_LOG_WARN,
+   RETRO_LOG_ERROR,
+
+   RETRO_LOG_DUMMY = INT_MAX
+};
+
+/* Logging function. Takes log level argument as well. */
+typedef void (RETRO_CALLCONV *retro_log_printf_t)(enum retro_log_level level,
+      const char *fmt, ...);
+
+struct retro_log_callback
+{
+   retro_log_printf_t log;
+};
+
+/* Performance related functions */
+
+/* ID values for SIMD CPU features */
+#define RETRO_SIMD_SSE      (1 << 0)
+#define RETRO_SIMD_SSE2     (1 << 1)
+#define RETRO_SIMD_VMX      (1 << 2)
+#define RETRO_SIMD_VMX128   (1 << 3)
+#define RETRO_SIMD_AVX      (1 << 4)
+#define RETRO_SIMD_NEON     (1 << 5)
+#define RETRO_SIMD_SSE3     (1 << 6)
+#define RETRO_SIMD_SSSE3    (1 << 7)
+#define RETRO_SIMD_MMX      (1 << 8)
+#define RETRO_SIMD_MMXEXT   (1 << 9)
+#define RETRO_SIMD_SSE4     (1 << 10)
+#define RETRO_SIMD_SSE42    (1 << 11)
+#define RETRO_SIMD_AVX2     (1 << 12)
+#define RETRO_SIMD_VFPU     (1 << 13)
+#define RETRO_SIMD_PS       (1 << 14)
+#define RETRO_SIMD_AES      (1 << 15)
+#define RETRO_SIMD_VFPV3    (1 << 16)
+#define RETRO_SIMD_VFPV4    (1 << 17)
+#define RETRO_SIMD_POPCNT   (1 << 18)
+#define RETRO_SIMD_MOVBE    (1 << 19)
+#define RETRO_SIMD_CMOV     (1 << 20)
+#define RETRO_SIMD_ASIMD    (1 << 21)
+
+typedef uint64_t retro_perf_tick_t;
+typedef int64_t retro_time_t;
+
+struct retro_perf_counter
+{
+   const char *ident;
+   retro_perf_tick_t start;
+   retro_perf_tick_t total;
+   retro_perf_tick_t call_cnt;
+
+   bool registered;
+};
+
+/* Returns current time in microseconds.
+ * Tries to use the most accurate timer available.
+ */
+typedef retro_time_t (RETRO_CALLCONV *retro_perf_get_time_usec_t)(void);
+
+/* A simple counter. Usually nanoseconds, but can also be CPU cycles.
+ * Can be used directly if desired (when creating a more sophisticated
+ * performance counter system).
+ * */
+typedef retro_perf_tick_t (RETRO_CALLCONV *retro_perf_get_counter_t)(void);
+
+/* Returns a bit-mask of detected CPU features (RETRO_SIMD_*). */
+typedef uint64_t (RETRO_CALLCONV *retro_get_cpu_features_t)(void);
+
+/* Asks frontend to log and/or display the state of performance counters.
+ * Performance counters can always be poked into manually as well.
+ */
+typedef void (RETRO_CALLCONV *retro_perf_log_t)(void);
+
+/* Register a performance counter.
+ * ident field must be set with a discrete value and other values in
+ * retro_perf_counter must be 0.
+ * Registering can be called multiple times. To avoid calling to
+ * frontend redundantly, you can check registered field first. */
+typedef void (RETRO_CALLCONV *retro_perf_register_t)(struct retro_perf_counter *counter);
+
+/* Starts a registered counter. */
+typedef void (RETRO_CALLCONV *retro_perf_start_t)(struct retro_perf_counter *counter);
+
+/* Stops a registered counter. */
+typedef void (RETRO_CALLCONV *retro_perf_stop_t)(struct retro_perf_counter *counter);
+
+/* For convenience it can be useful to wrap register, start and stop in macros.
+ * E.g.:
+ * #ifdef LOG_PERFORMANCE
+ * #define RETRO_PERFORMANCE_INIT(perf_cb, name) static struct retro_perf_counter name = {#name}; if (!name.registered) perf_cb.perf_register(&(name))
+ * #define RETRO_PERFORMANCE_START(perf_cb, name) perf_cb.perf_start(&(name))
+ * #define RETRO_PERFORMANCE_STOP(perf_cb, name) perf_cb.perf_stop(&(name))
+ * #else
+ * ... Blank macros ...
+ * #endif
+ *
+ * These can then be used mid-functions around code snippets.
+ *
+ * extern struct retro_perf_callback perf_cb;  * Somewhere in the core.
+ *
+ * void do_some_heavy_work(void)
+ * {
+ *    RETRO_PERFORMANCE_INIT(cb, work_1;
+ *    RETRO_PERFORMANCE_START(cb, work_1);
+ *    heavy_work_1();
+ *    RETRO_PERFORMANCE_STOP(cb, work_1);
+ *
+ *    RETRO_PERFORMANCE_INIT(cb, work_2);
+ *    RETRO_PERFORMANCE_START(cb, work_2);
+ *    heavy_work_2();
+ *    RETRO_PERFORMANCE_STOP(cb, work_2);
+ * }
+ *
+ * void retro_deinit(void)
+ * {
+ *    perf_cb.perf_log();  * Log all perf counters here for example.
+ * }
+ */
+
+struct retro_perf_callback
+{
+   retro_perf_get_time_usec_t    get_time_usec;
+   retro_get_cpu_features_t      get_cpu_features;
+
+   retro_perf_get_counter_t      get_perf_counter;
+   retro_perf_register_t         perf_register;
+   retro_perf_start_t            perf_start;
+   retro_perf_stop_t             perf_stop;
+   retro_perf_log_t              perf_log;
+};
+
+/* FIXME: Document the sensor API and work out behavior.
+ * It will be marked as experimental until then.
+ */
+enum retro_sensor_action
+{
+   RETRO_SENSOR_ACCELEROMETER_ENABLE = 0,
+   RETRO_SENSOR_ACCELEROMETER_DISABLE,
+
+   RETRO_SENSOR_DUMMY = INT_MAX
+};
+
+/* Id values for SENSOR types. */
+#define RETRO_SENSOR_ACCELEROMETER_X 0
+#define RETRO_SENSOR_ACCELEROMETER_Y 1
+#define RETRO_SENSOR_ACCELEROMETER_Z 2
+
+typedef bool (RETRO_CALLCONV *retro_set_sensor_state_t)(unsigned port,
+      enum retro_sensor_action action, unsigned rate);
+
+typedef float (RETRO_CALLCONV *retro_sensor_get_input_t)(unsigned port, unsigned id);
+
+struct retro_sensor_interface
+{
+   retro_set_sensor_state_t set_sensor_state;
+   retro_sensor_get_input_t get_sensor_input;
+};
+
+enum retro_camera_buffer
+{
+   RETRO_CAMERA_BUFFER_OPENGL_TEXTURE = 0,
+   RETRO_CAMERA_BUFFER_RAW_FRAMEBUFFER,
+
+   RETRO_CAMERA_BUFFER_DUMMY = INT_MAX
+};
+
+/* Starts the camera driver. Can only be called in retro_run(). */
+typedef bool (RETRO_CALLCONV *retro_camera_start_t)(void);
+
+/* Stops the camera driver. Can only be called in retro_run(). */
+typedef void (RETRO_CALLCONV *retro_camera_stop_t)(void);
+
+/* Callback which signals when the camera driver is initialized
+ * and/or deinitialized.
+ * retro_camera_start_t can be called in initialized callback.
+ */
+typedef void (RETRO_CALLCONV *retro_camera_lifetime_status_t)(void);
+
+/* A callback for raw framebuffer data. buffer points to an XRGB8888 buffer.
+ * Width, height and pitch are similar to retro_video_refresh_t.
+ * First pixel is top-left origin.
+ */
+typedef void (RETRO_CALLCONV *retro_camera_frame_raw_framebuffer_t)(const uint32_t *buffer,
+      unsigned width, unsigned height, size_t pitch);
+
+/* A callback for when OpenGL textures are used.
+ *
+ * texture_id is a texture owned by camera driver.
+ * Its state or content should be considered immutable, except for things like
+ * texture filtering and clamping.
+ *
+ * texture_target is the texture target for the GL texture.
+ * These can include e.g. GL_TEXTURE_2D, GL_TEXTURE_RECTANGLE, and possibly
+ * more depending on extensions.
+ *
+ * affine points to a packed 3x3 column-major matrix used to apply an affine
+ * transform to texture coordinates. (affine_matrix * vec3(coord_x, coord_y, 1.0))
+ * After transform, normalized texture coord (0, 0) should be bottom-left
+ * and (1, 1) should be top-right (or (width, height) for RECTANGLE).
+ *
+ * GL-specific typedefs are avoided here to avoid relying on gl.h in
+ * the API definition.
+ */
+typedef void (RETRO_CALLCONV *retro_camera_frame_opengl_texture_t)(unsigned texture_id,
+      unsigned texture_target, const float *affine);
+
+struct retro_camera_callback
+{
+   /* Set by libretro core.
+    * Example bitmask: caps = (1 << RETRO_CAMERA_BUFFER_OPENGL_TEXTURE) | (1 << RETRO_CAMERA_BUFFER_RAW_FRAMEBUFFER).
+    */
+   uint64_t caps;
+
+   /* Desired resolution for camera. Is only used as a hint. */
+   unsigned width;
+   unsigned height;
+
+   /* Set by frontend. */
+   retro_camera_start_t start;
+   retro_camera_stop_t stop;
+
+   /* Set by libretro core if raw framebuffer callbacks will be used. */
+   retro_camera_frame_raw_framebuffer_t frame_raw_framebuffer;
+
+   /* Set by libretro core if OpenGL texture callbacks will be used. */
+   retro_camera_frame_opengl_texture_t frame_opengl_texture;
+
+   /* Set by libretro core. Called after camera driver is initialized and
+    * ready to be started.
+    * Can be NULL, in which this callback is not called.
+    */
+   retro_camera_lifetime_status_t initialized;
+
+   /* Set by libretro core. Called right before camera driver is
+    * deinitialized.
+    * Can be NULL, in which this callback is not called.
+    */
+   retro_camera_lifetime_status_t deinitialized;
+};
+
+/* Sets the interval of time and/or distance at which to update/poll
+ * location-based data.
+ *
+ * To ensure compatibility with all location-based implementations,
+ * values for both interval_ms and interval_distance should be provided.
+ *
+ * interval_ms is the interval expressed in milliseconds.
+ * interval_distance is the distance interval expressed in meters.
+ */
+typedef void (RETRO_CALLCONV *retro_location_set_interval_t)(unsigned interval_ms,
+      unsigned interval_distance);
+
+/* Start location services. The device will start listening for changes to the
+ * current location at regular intervals (which are defined with
+ * retro_location_set_interval_t). */
+typedef bool (RETRO_CALLCONV *retro_location_start_t)(void);
+
+/* Stop location services. The device will stop listening for changes
+ * to the current location. */
+typedef void (RETRO_CALLCONV *retro_location_stop_t)(void);
+
+/* Get the position of the current location. Will set parameters to
+ * 0 if no new  location update has happened since the last time. */
+typedef bool (RETRO_CALLCONV *retro_location_get_position_t)(double *lat, double *lon,
+      double *horiz_accuracy, double *vert_accuracy);
+
+/* Callback which signals when the location driver is initialized
+ * and/or deinitialized.
+ * retro_location_start_t can be called in initialized callback.
+ */
+typedef void (RETRO_CALLCONV *retro_location_lifetime_status_t)(void);
+
+struct retro_location_callback
+{
+   retro_location_start_t         start;
+   retro_location_stop_t          stop;
+   retro_location_get_position_t  get_position;
+   retro_location_set_interval_t  set_interval;
+
+   retro_location_lifetime_status_t initialized;
+   retro_location_lifetime_status_t deinitialized;
+};
+
+enum retro_rumble_effect
+{
+   RETRO_RUMBLE_STRONG = 0,
+   RETRO_RUMBLE_WEAK = 1,
+
+   RETRO_RUMBLE_DUMMY = INT_MAX
+};
+
+/* Sets rumble state for joypad plugged in port 'port'.
+ * Rumble effects are controlled independently,
+ * and setting e.g. strong rumble does not override weak rumble.
+ * Strength has a range of [0, 0xffff].
+ *
+ * Returns true if rumble state request was honored.
+ * Calling this before first retro_run() is likely to return false. */
+typedef bool (RETRO_CALLCONV *retro_set_rumble_state_t)(unsigned port,
+      enum retro_rumble_effect effect, uint16_t strength);
+
+struct retro_rumble_interface
+{
+   retro_set_rumble_state_t set_rumble_state;
+};
+
+/* Notifies libretro that audio data should be written. */
+typedef void (RETRO_CALLCONV *retro_audio_callback_t)(void);
+
+/* True: Audio driver in frontend is active, and callback is
+ * expected to be called regularily.
+ * False: Audio driver in frontend is paused or inactive.
+ * Audio callback will not be called until set_state has been
+ * called with true.
+ * Initial state is false (inactive).
+ */
+typedef void (RETRO_CALLCONV *retro_audio_set_state_callback_t)(bool enabled);
+
+struct retro_audio_callback
+{
+   retro_audio_callback_t callback;
+   retro_audio_set_state_callback_t set_state;
+};
+
+/* Notifies a libretro core of time spent since last invocation
+ * of retro_run() in microseconds.
+ *
+ * It will be called right before retro_run() every frame.
+ * The frontend can tamper with timing to support cases like
+ * fast-forward, slow-motion and framestepping.
+ *
+ * In those scenarios the reference frame time value will be used. */
+typedef int64_t retro_usec_t;
+typedef void (RETRO_CALLCONV *retro_frame_time_callback_t)(retro_usec_t usec);
+struct retro_frame_time_callback
+{
+   retro_frame_time_callback_t callback;
+   /* Represents the time of one frame. It is computed as
+    * 1000000 / fps, but the implementation will resolve the
+    * rounding to ensure that framestepping, etc is exact. */
+   retro_usec_t reference;
+};
+
+/* Pass this to retro_video_refresh_t if rendering to hardware.
+ * Passing NULL to retro_video_refresh_t is still a frame dupe as normal.
+ * */
+#define RETRO_HW_FRAME_BUFFER_VALID ((void*)-1)
+
+/* Invalidates the current HW context.
+ * Any GL state is lost, and must not be deinitialized explicitly.
+ * If explicit deinitialization is desired by the libretro core,
+ * it should implement context_destroy callback.
+ * If called, all GPU resources must be reinitialized.
+ * Usually called when frontend reinits video driver.
+ * Also called first time video driver is initialized,
+ * allowing libretro core to initialize resources.
+ */
+typedef void (RETRO_CALLCONV *retro_hw_context_reset_t)(void);
+
+/* Gets current framebuffer which is to be rendered to.
+ * Could change every frame potentially.
+ */
+typedef uintptr_t (RETRO_CALLCONV *retro_hw_get_current_framebuffer_t)(void);
+
+/* Get a symbol from HW context. */
+typedef retro_proc_address_t (RETRO_CALLCONV *retro_hw_get_proc_address_t)(const char *sym);
+
+enum retro_hw_context_type
+{
+   RETRO_HW_CONTEXT_NONE             = 0,
+   /* OpenGL 2.x. Driver can choose to use latest compatibility context. */
+   RETRO_HW_CONTEXT_OPENGL           = 1,
+   /* OpenGL ES 2.0. */
+   RETRO_HW_CONTEXT_OPENGLES2        = 2,
+   /* Modern desktop core GL context. Use version_major/
+    * version_minor fields to set GL version. */
+   RETRO_HW_CONTEXT_OPENGL_CORE      = 3,
+   /* OpenGL ES 3.0 */
+   RETRO_HW_CONTEXT_OPENGLES3        = 4,
+   /* OpenGL ES 3.1+. Set version_major/version_minor. For GLES2 and GLES3,
+    * use the corresponding enums directly. */
+   RETRO_HW_CONTEXT_OPENGLES_VERSION = 5,
+
+   /* Vulkan, see RETRO_ENVIRONMENT_GET_HW_RENDER_INTERFACE. */
+   RETRO_HW_CONTEXT_VULKAN           = 6,
+
+   RETRO_HW_CONTEXT_DUMMY = INT_MAX
+};
+
+struct retro_hw_render_callback
+{
+   /* Which API to use. Set by libretro core. */
+   enum retro_hw_context_type context_type;
+
+   /* Called when a context has been created or when it has been reset.
+    * An OpenGL context is only valid after context_reset() has been called.
+    *
+    * When context_reset is called, OpenGL resources in the libretro
+    * implementation are guaranteed to be invalid.
+    *
+    * It is possible that context_reset is called multiple times during an
+    * application lifecycle.
+    * If context_reset is called without any notification (context_destroy),
+    * the OpenGL context was lost and resources should just be recreated
+    * without any attempt to "free" old resources.
+    */
+   retro_hw_context_reset_t context_reset;
+
+   /* Set by frontend.
+    * TODO: This is rather obsolete. The frontend should not
+    * be providing preallocated framebuffers. */
+   retro_hw_get_current_framebuffer_t get_current_framebuffer;
+
+   /* Set by frontend.
+    * Can return all relevant functions, including glClear on Windows. */
+   retro_hw_get_proc_address_t get_proc_address;
+
+   /* Set if render buffers should have depth component attached.
+    * TODO: Obsolete. */
+   bool depth;
+
+   /* Set if stencil buffers should be attached.
+    * TODO: Obsolete. */
+   bool stencil;
+
+   /* If depth and stencil are true, a packed 24/8 buffer will be added.
+    * Only attaching stencil is invalid and will be ignored. */
+
+   /* Use conventional bottom-left origin convention. If false,
+    * standard libretro top-left origin semantics are used.
+    * TODO: Move to GL specific interface. */
+   bool bottom_left_origin;
+
+   /* Major version number for core GL context or GLES 3.1+. */
+   unsigned version_major;
+
+   /* Minor version number for core GL context or GLES 3.1+. */
+   unsigned version_minor;
+
+   /* If this is true, the frontend will go very far to avoid
+    * resetting context in scenarios like toggling fullscreen, etc.
+    * TODO: Obsolete? Maybe frontend should just always assume this ...
+    */
+   bool cache_context;
+
+   /* The reset callback might still be called in extreme situations
+    * such as if the context is lost beyond recovery.
+    *
+    * For optimal stability, set this to false, and allow context to be
+    * reset at any time.
+    */
+
+   /* A callback to be called before the context is destroyed in a
+    * controlled way by the frontend. */
+   retro_hw_context_reset_t context_destroy;
+
+   /* OpenGL resources can be deinitialized cleanly at this step.
+    * context_destroy can be set to NULL, in which resources will
+    * just be destroyed without any notification.
+    *
+    * Even when context_destroy is non-NULL, it is possible that
+    * context_reset is called without any destroy notification.
+    * This happens if context is lost by external factors (such as
+    * notified by GL_ARB_robustness).
+    *
+    * In this case, the context is assumed to be already dead,
+    * and the libretro implementation must not try to free any OpenGL
+    * resources in the subsequent context_reset.
+    */
+
+   /* Creates a debug context. */
+   bool debug_context;
+};
+
+/* Callback type passed in RETRO_ENVIRONMENT_SET_KEYBOARD_CALLBACK.
+ * Called by the frontend in response to keyboard events.
+ * down is set if the key is being pressed, or false if it is being released.
+ * keycode is the RETROK value of the char.
+ * character is the text character of the pressed key. (UTF-32).
+ * key_modifiers is a set of RETROKMOD values or'ed together.
+ *
+ * The pressed/keycode state can be indepedent of the character.
+ * It is also possible that multiple characters are generated from a
+ * single keypress.
+ * Keycode events should be treated separately from character events.
+ * However, when possible, the frontend should try to synchronize these.
+ * If only a character is posted, keycode should be RETROK_UNKNOWN.
+ *
+ * Similarily if only a keycode event is generated with no corresponding
+ * character, character should be 0.
+ */
+typedef void (RETRO_CALLCONV *retro_keyboard_event_t)(bool down, unsigned keycode,
+      uint32_t character, uint16_t key_modifiers);
+
+struct retro_keyboard_callback
+{
+   retro_keyboard_event_t callback;
+};
+
+/* Callbacks for RETRO_ENVIRONMENT_SET_DISK_CONTROL_INTERFACE.
+ * Should be set for implementations which can swap out multiple disk
+ * images in runtime.
+ *
+ * If the implementation can do this automatically, it should strive to do so.
+ * However, there are cases where the user must manually do so.
+ *
+ * Overview: To swap a disk image, eject the disk image with
+ * set_eject_state(true).
+ * Set the disk index with set_image_index(index). Insert the disk again
+ * with set_eject_state(false).
+ */
+
+/* If ejected is true, "ejects" the virtual disk tray.
+ * When ejected, the disk image index can be set.
+ */
+typedef bool (RETRO_CALLCONV *retro_set_eject_state_t)(bool ejected);
+
+/* Gets current eject state. The initial state is 'not ejected'. */
+typedef bool (RETRO_CALLCONV *retro_get_eject_state_t)(void);
+
+/* Gets current disk index. First disk is index 0.
+ * If return value is >= get_num_images(), no disk is currently inserted.
+ */
+typedef unsigned (RETRO_CALLCONV *retro_get_image_index_t)(void);
+
+/* Sets image index. Can only be called when disk is ejected.
+ * The implementation supports setting "no disk" by using an
+ * index >= get_num_images().
+ */
+typedef bool (RETRO_CALLCONV *retro_set_image_index_t)(unsigned index);
+
+/* Gets total number of images which are available to use. */
+typedef unsigned (RETRO_CALLCONV *retro_get_num_images_t)(void);
+
+struct retro_game_info;
+
+/* Replaces the disk image associated with index.
+ * Arguments to pass in info have same requirements as retro_load_game().
+ * Virtual disk tray must be ejected when calling this.
+ *
+ * Replacing a disk image with info = NULL will remove the disk image
+ * from the internal list.
+ * As a result, calls to get_image_index() can change.
+ *
+ * E.g. replace_image_index(1, NULL), and previous get_image_index()
+ * returned 4 before.
+ * Index 1 will be removed, and the new index is 3.
+ */
+typedef bool (RETRO_CALLCONV *retro_replace_image_index_t)(unsigned index,
+      const struct retro_game_info *info);
+
+/* Adds a new valid index (get_num_images()) to the internal disk list.
+ * This will increment subsequent return values from get_num_images() by 1.
+ * This image index cannot be used until a disk image has been set
+ * with replace_image_index. */
+typedef bool (RETRO_CALLCONV *retro_add_image_index_t)(void);
+
+struct retro_disk_control_callback
+{
+   retro_set_eject_state_t set_eject_state;
+   retro_get_eject_state_t get_eject_state;
+
+   retro_get_image_index_t get_image_index;
+   retro_set_image_index_t set_image_index;
+   retro_get_num_images_t  get_num_images;
+
+   retro_replace_image_index_t replace_image_index;
+   retro_add_image_index_t add_image_index;
+};
+
+enum retro_pixel_format
+{
+   /* 0RGB1555, native endian.
+    * 0 bit must be set to 0.
+    * This pixel format is default for compatibility concerns only.
+    * If a 15/16-bit pixel format is desired, consider using RGB565. */
+   RETRO_PIXEL_FORMAT_0RGB1555 = 0,
+
+   /* XRGB8888, native endian.
+    * X bits are ignored. */
+   RETRO_PIXEL_FORMAT_XRGB8888 = 1,
+
+   /* RGB565, native endian.
+    * This pixel format is the recommended format to use if a 15/16-bit
+    * format is desired as it is the pixel format that is typically
+    * available on a wide range of low-power devices.
+    *
+    * It is also natively supported in APIs like OpenGL ES. */
+   RETRO_PIXEL_FORMAT_RGB565   = 2,
+
+   /* Ensure sizeof() == sizeof(int). */
+   RETRO_PIXEL_FORMAT_UNKNOWN  = INT_MAX
+};
+
+struct retro_message
+{
+   const char *msg;        /* Message to be displayed. */
+   unsigned    frames;     /* Duration in frames of message. */
+};
+
+/* Describes how the libretro implementation maps a libretro input bind
+ * to its internal input system through a human readable string.
+ * This string can be used to better let a user configure input. */
+struct retro_input_descriptor
+{
+   /* Associates given parameters with a description. */
+   unsigned port;
+   unsigned device;
+   unsigned index;
+   unsigned id;
+
+   /* Human readable description for parameters.
+    * The pointer must remain valid until
+    * retro_unload_game() is called. */
+   const char *description;
+};
+
+struct retro_system_info
+{
+   /* All pointers are owned by libretro implementation, and pointers must
+    * remain valid until retro_deinit() is called. */
+
+   const char *library_name;      /* Descriptive name of library. Should not
+                                   * contain any version numbers, etc. */
+   const char *library_version;   /* Descriptive version of core. */
+
+   const char *valid_extensions;  /* A string listing probably content
+                                   * extensions the core will be able to
+                                   * load, separated with pipe.
+                                   * I.e. "bin|rom|iso".
+                                   * Typically used for a GUI to filter
+                                   * out extensions. */
+
+   /* If true, retro_load_game() is guaranteed to provide a valid pathname
+    * in retro_game_info::path.
+    * ::data and ::size are both invalid.
+    *
+    * If false, ::data and ::size are guaranteed to be valid, but ::path
+    * might not be valid.
+    *
+    * This is typically set to true for libretro implementations that must
+    * load from file.
+    * Implementations should strive for setting this to false, as it allows
+    * the frontend to perform patching, etc. */
+   bool        need_fullpath;
+
+   /* If true, the frontend is not allowed to extract any archives before
+    * loading the real content.
+    * Necessary for certain libretro implementations that load games
+    * from zipped archives. */
+   bool        block_extract;
+};
+
+struct retro_game_geometry
+{
+   unsigned base_width;    /* Nominal video width of game. */
+   unsigned base_height;   /* Nominal video height of game. */
+   unsigned max_width;     /* Maximum possible width of game. */
+   unsigned max_height;    /* Maximum possible height of game. */
+
+   float    aspect_ratio;  /* Nominal aspect ratio of game. If
+                            * aspect_ratio is <= 0.0, an aspect ratio
+                            * of base_width / base_height is assumed.
+                            * A frontend could override this setting,
+                            * if desired. */
+};
+
+struct retro_system_timing
+{
+   double fps;             /* FPS of video content. */
+   double sample_rate;     /* Sampling rate of audio. */
+};
+
+struct retro_system_av_info
+{
+   struct retro_game_geometry geometry;
+   struct retro_system_timing timing;
+};
+
+struct retro_variable
+{
+   /* Variable to query in RETRO_ENVIRONMENT_GET_VARIABLE.
+    * If NULL, obtains the complete environment string if more
+    * complex parsing is necessary.
+    * The environment string is formatted as key-value pairs
+    * delimited by semicolons as so:
+    * "key1=value1;key2=value2;..."
+    */
+   const char *key;
+
+   /* Value to be obtained. If key does not exist, it is set to NULL. */
+   const char *value;
+};
+
+struct retro_game_info
+{
+   const char *path;       /* Path to game, UTF-8 encoded.
+                            * Sometimes used as a reference for building other paths.
+                            * May be NULL if game was loaded from stdin or similar,
+                            * but in this case some cores will be unable to load `data`.
+                            * So, it is preferable to fabricate something here instead
+                            * of passing NULL, which will help more cores to succeed.
+                            * retro_system_info::need_fullpath requires
+                            * that this path is valid. */
+   const void *data;       /* Memory buffer of loaded game. Will be NULL
+                            * if need_fullpath was set. */
+   size_t      size;       /* Size of memory buffer. */
+   const char *meta;       /* String of implementation specific meta-data. */
+};
+
+#define RETRO_MEMORY_ACCESS_WRITE (1 << 0)
+   /* The core will write to the buffer provided by retro_framebuffer::data. */
+#define RETRO_MEMORY_ACCESS_READ (1 << 1)
+   /* The core will read from retro_framebuffer::data. */
+#define RETRO_MEMORY_TYPE_CACHED (1 << 0)
+   /* The memory in data is cached.
+    * If not cached, random writes and/or reading from the buffer is expected to be very slow. */
+struct retro_framebuffer
+{
+   void *data;                      /* The framebuffer which the core can render into.
+                                       Set by frontend in GET_CURRENT_SOFTWARE_FRAMEBUFFER.
+                                       The initial contents of data are unspecified. */
+   unsigned width;                  /* The framebuffer width used by the core. Set by core. */
+   unsigned height;                 /* The framebuffer height used by the core. Set by core. */
+   size_t pitch;                    /* The number of bytes between the beginning of a scanline,
+                                       and beginning of the next scanline.
+                                       Set by frontend in GET_CURRENT_SOFTWARE_FRAMEBUFFER. */
+   enum retro_pixel_format format;  /* The pixel format the core must use to render into data.
+                                       This format could differ from the format used in
+                                       SET_PIXEL_FORMAT.
+                                       Set by frontend in GET_CURRENT_SOFTWARE_FRAMEBUFFER. */
+
+   unsigned access_flags;           /* How the core will access the memory in the framebuffer.
+                                       RETRO_MEMORY_ACCESS_* flags.
+                                       Set by core. */
+   unsigned memory_flags;           /* Flags telling core how the memory has been mapped.
+                                       RETRO_MEMORY_TYPE_* flags.
+                                       Set by frontend in GET_CURRENT_SOFTWARE_FRAMEBUFFER. */
+};
+
+/* Callbacks */
+
+/* Environment callback. Gives implementations a way of performing
+ * uncommon tasks. Extensible. */
+typedef bool (RETRO_CALLCONV *retro_environment_t)(unsigned cmd, void *data);
+
+/* Render a frame. Pixel format is 15-bit 0RGB1555 native endian
+ * unless changed (see RETRO_ENVIRONMENT_SET_PIXEL_FORMAT).
+ *
+ * Width and height specify dimensions of buffer.
+ * Pitch specifices length in bytes between two lines in buffer.
+ *
+ * For performance reasons, it is highly recommended to have a frame
+ * that is packed in memory, i.e. pitch == width * byte_per_pixel.
+ * Certain graphic APIs, such as OpenGL ES, do not like textures
+ * that are not packed in memory.
+ */
+typedef void (RETRO_CALLCONV *retro_video_refresh_t)(const void *data, unsigned width,
+      unsigned height, size_t pitch);
+
+/* Renders a single audio frame. Should only be used if implementation
+ * generates a single sample at a time.
+ * Format is signed 16-bit native endian.
+ */
+typedef void (RETRO_CALLCONV *retro_audio_sample_t)(int16_t left, int16_t right);
+
+/* Renders multiple audio frames in one go.
+ *
+ * One frame is defined as a sample of left and right channels, interleaved.
+ * I.e. int16_t buf[4] = { l, r, l, r }; would be 2 frames.
+ * Only one of the audio callbacks must ever be used.
+ */
+typedef size_t (RETRO_CALLCONV *retro_audio_sample_batch_t)(const int16_t *data,
+      size_t frames);
+
+/* Polls input. */
+typedef void (RETRO_CALLCONV *retro_input_poll_t)(void);
+
+/* Queries for input for player 'port'. device will be masked with
+ * RETRO_DEVICE_MASK.
+ *
+ * Specialization of devices such as RETRO_DEVICE_JOYPAD_MULTITAP that
+ * have been set with retro_set_controller_port_device()
+ * will still use the higher level RETRO_DEVICE_JOYPAD to request input.
+ */
+typedef int16_t (RETRO_CALLCONV *retro_input_state_t)(unsigned port, unsigned device,
+      unsigned index, unsigned id);
+
+/* Sets callbacks. retro_set_environment() is guaranteed to be called
+ * before retro_init().
+ *
+ * The rest of the set_* functions are guaranteed to have been called
+ * before the first call to retro_run() is made. */
+RETRO_API void retro_set_environment(retro_environment_t);
+RETRO_API void retro_set_video_refresh(retro_video_refresh_t);
+RETRO_API void retro_set_audio_sample(retro_audio_sample_t);
+RETRO_API void retro_set_audio_sample_batch(retro_audio_sample_batch_t);
+RETRO_API void retro_set_input_poll(retro_input_poll_t);
+RETRO_API void retro_set_input_state(retro_input_state_t);
+
+/* Library global initialization/deinitialization. */
+RETRO_API void retro_init(void);
+RETRO_API void retro_deinit(void);
+
+/* Must return RETRO_API_VERSION. Used to validate ABI compatibility
+ * when the API is revised. */
+RETRO_API unsigned retro_api_version(void);
+
+/* Gets statically known system info. Pointers provided in *info
+ * must be statically allocated.
+ * Can be called at any time, even before retro_init(). */
+RETRO_API void retro_get_system_info(struct retro_system_info *info);
+
+/* Gets information about system audio/video timings and geometry.
+ * Can be called only after retro_load_game() has successfully completed.
+ * NOTE: The implementation of this function might not initialize every
+ * variable if needed.
+ * E.g. geom.aspect_ratio might not be initialized if core doesn't
+ * desire a particular aspect ratio. */
+RETRO_API void retro_get_system_av_info(struct retro_system_av_info *info);
+
+/* Sets device to be used for player 'port'.
+ * By default, RETRO_DEVICE_JOYPAD is assumed to be plugged into all
+ * available ports.
+ * Setting a particular device type is not a guarantee that libretro cores
+ * will only poll input based on that particular device type. It is only a
+ * hint to the libretro core when a core cannot automatically detect the
+ * appropriate input device type on its own. It is also relevant when a
+ * core can change its behavior depending on device type. */
+RETRO_API void retro_set_controller_port_device(unsigned port, unsigned device);
+
+/* Resets the current game. */
+RETRO_API void retro_reset(void);
+
+/* Runs the game for one video frame.
+ * During retro_run(), input_poll callback must be called at least once.
+ *
+ * If a frame is not rendered for reasons where a game "dropped" a frame,
+ * this still counts as a frame, and retro_run() should explicitly dupe
+ * a frame if GET_CAN_DUPE returns true.
+ * In this case, the video callback can take a NULL argument for data.
+ */
+RETRO_API void retro_run(void);
+
+/* Returns the amount of data the implementation requires to serialize
+ * internal state (save states).
+ * Between calls to retro_load_game() and retro_unload_game(), the
+ * returned size is never allowed to be larger than a previous returned
+ * value, to ensure that the frontend can allocate a save state buffer once.
+ */
+RETRO_API size_t retro_serialize_size(void);
+
+/* Serializes internal state. If failed, or size is lower than
+ * retro_serialize_size(), it should return false, true otherwise. */
+RETRO_API bool retro_serialize(void *data, size_t size);
+RETRO_API bool retro_unserialize(const void *data, size_t size);
+
+RETRO_API void retro_cheat_reset(void);
+RETRO_API void retro_cheat_set(unsigned index, bool enabled, const char *code);
+
+/* Loads a game. */
+RETRO_API bool retro_load_game(const struct retro_game_info *game);
+
+/* Loads a "special" kind of game. Should not be used,
+ * except in extreme cases. */
+RETRO_API bool retro_load_game_special(
+  unsigned game_type,
+  const struct retro_game_info *info, size_t num_info
+);
+
+/* Unloads a currently loaded game. */
+RETRO_API void retro_unload_game(void);
+
+/* Gets region of game. */
+RETRO_API unsigned retro_get_region(void);
+
+/* Gets region of memory. */
+RETRO_API void *retro_get_memory_data(unsigned id);
+RETRO_API size_t retro_get_memory_size(unsigned id);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/m68k.cpu	Sat Jan 15 13:15:21 2022 -0800
@@ -0,0 +1,1160 @@
+info
+	prefix m68k_
+	opcode_size 16
+	body m68k_run_op
+	header m68k.h
+	interrupt m68k_interrupt
+	include m68k_util.c
+	sync_cycle m68k_sync_cycle
+	
+declare
+	typedef m68k_context *(*m68k_reset_handler)(m68k_context *context);
+	void init_m68k_opts(m68k_options *opts, memmap_chunk * memmap, uint32_t num_chunks, uint32_t clock_divider);
+	m68k_context *init_68k_context(m68k_options * opts, m68k_reset_handler reset_handler);
+	void m68k_reset(m68k_context *context);
+	void m68k_print_regs(m68k_context *context);
+
+regs
+	dregs 32 d0 d1 d2 d3 d4 d5 d6 d7
+	aregs 32 a0 a1 a2 a3 a4 a5 a6 a7
+	pc 32
+	other_sp 32
+	scratch1 32
+	scratch2 32
+	int_cycle 32
+	prefetch 16
+	int_priority 8
+	int_num 8
+	int_pending 8
+	int_pending_num 8
+	int_ack 8
+	status 8
+	ccr 8
+	xflag 8
+	nflag 8
+	zflag 8
+	vflag 8
+	cflag 8
+	reset_handler ptrvoid
+	mem_pointers ptrvoid 8
+	
+flags
+	register ccr
+	X 4 carry xflag
+	N 3 sign nflag
+	Z 2 zero zflag
+	V 1 overflow vflag
+	C 0 carry cflag
+
+m68k_prefetch
+	if dynarec
+	
+	ccall m68k_read16_noinc context pc
+	mov result prefetch
+	
+	end
+	
+	if interp
+	
+	mov pc scratch1
+	ocall read_16
+	mov scratch1 prefetch
+	
+	end
+	
+	add 2 pc pc
+	
+check_user_mode_swap_ssp_usp
+	local tmp 8
+	and 0x20 status tmp
+	if tmp
+	else
+	
+	xchg other_sp a7
+	
+	end
+	
+m68k_get_sr
+	lsl status 8 scratch1
+	or ccr scratch1 scratch1
+	
+m68k_write32_lowfirst
+	arg value 32
+	add 2 scratch2 scratch2
+	mov value scratch1
+	ocall write_16
+	
+	sub 2 scratch2 scratch2
+	lsr value 16 scratch1
+	ocall write_16
+
+m68k_write32
+	arg value 32
+	local tmp 32
+	mov value tmp
+	lsr value 16 scratch1
+	ocall write_16
+	
+	add 2 scratch2 scratch2
+	mov tmp scratch1
+	ocall write_16
+	
+m68k_read32
+	local tmp 32
+	add 2 scratch1 tmp
+	ocall read_16
+	xchg scratch1 tmp
+	ocall read_16
+	lsl tmp 16 tmp
+	or tmp scratch1 scratch1
+	
+m68k_interrupt
+	cmp int_cycle cycles
+	if >=U
+	
+	#INT_PENDING_NONE
+	cmp 255 int_pending
+	if =
+	
+	mov int_priority int_pending
+	mov int_num int_pending_num
+	
+	else
+	
+	#INT_PENDING_SR_CHANGE
+	cmp 254 int_pending
+	if =
+	
+	mov int_priority int_pending
+	mov int_num int_pending_num
+	
+	else
+	
+	check_user_mode_swap_ssp_usp
+	
+	cycles 6
+	#save status reg
+	sub 6 a7 a7
+	m68k_get_sr
+	mov a7 scratch2
+	ocall write_16
+	
+	#update status register
+	and 0x78 status status
+	or int_priority status status
+	or 0x20 status status
+	
+	#Interrupt ack cycle
+	mov int_pending int_ack
+	if int_pending_num
+	cycles 4
+	else
+	#TODO: do the whole E clock variable latency nonsense
+	cycles 13
+	add 24 int_pending int_pending_num
+	end
+	
+	#save pc
+	add 2 a7 scratch2
+	m68k_write32_lowfirst pc
+	
+	lsl int_pending_num 2 scratch1
+	m68k_read32
+	mov scratch1 pc
+	update_sync
+	end
+	
+m68k_run_op
+	dispatch prefetch
+
+m68k_mem_src
+	arg address 32
+	arg size 16
+	arg isdst 8
+	mov address scratch1
+	if isdst
+	mov address scratch2
+	meta ismem 1
+	end
+	switch size
+	
+	case 0
+	ocall read_8
+	
+	case 1
+	ocall read_16
+	
+	case 2
+	m68k_read32
+	
+	end
+	meta op scratch1
+
+m68k_write_size
+	arg size 16
+	arg lowfirst 8
+	switch size
+	case 0
+	ocall write_8
+	
+	case 1
+	ocall write_16
+	
+	case 2
+	if lowfirst
+	m68k_write32_lowfirst scratch1
+	else
+	m68k_write32 scratch1
+	end
+	end
+	
+m68k_index_word
+	m68k_prefetch
+	local disp 32
+	and prefetch 255 disp
+	sext 16 disp disp
+	sext 32 disp disp
+	local index 16
+	lsr prefetch 12 index
+	local isareg 16
+	and index 8 isareg
+	and index 7 index
+	local islong 16
+	and prefetch 2048 islong
+	
+	switch isareg
+	case 0
+		switch islong
+		case 0
+		sext 32 dregs.index scratch1
+		case 2048
+		mov dregs.index scratch1
+		end
+	case 8
+		switch islong
+		case 0
+		sext 32 aregs.index scratch1
+		case 2048
+		mov aregs.index scratch1
+		end
+	end
+	add disp scratch1 scratch1
+
+m68k_fetch_op_ea
+	arg mode 16
+	arg reg 16
+	arg Z 16
+	arg isdst 8
+	switch mode
+	
+	case 0
+	#data reg direct
+	meta op dregs.reg
+	if isdst
+	meta ismem 0
+	end
+	
+	case 1
+	#address reg direct
+	meta op aregs.reg
+	if isdst
+	meta ismem 0
+	end
+	
+	case 2
+	#address reg indirect
+	m68k_mem_src aregs.reg Z isdst
+	
+	case 3
+	#postincrement
+	m68k_mem_src aregs.reg Z isdst
+	switch reg
+	case 7
+		if Z
+			addsize Z aregs.reg aregs.reg
+		else
+			addsize 1 aregs.reg aregs.reg
+		end
+	default
+		addsize Z aregs.reg aregs.reg
+	end
+	
+	case 4
+	#predecrement
+	switch reg
+	case 7
+		if Z
+			decsize Z aregs.reg aregs.reg
+		else
+			decsize 1 aregs.reg aregs.reg
+		end
+	default
+		decsize Z aregs.reg aregs.reg
+	end
+	cycles 2
+	m68k_mem_src aregs.reg Z isdst
+	
+	case 5
+	#displacement
+	m68k_prefetch
+	sext 32 prefetch scratch1
+	add scratch1 aregs.reg scratch1
+	m68k_mem_src scratch1 Z isdst
+	
+	case 6
+	#indexed
+	m68k_index_word
+	add aregs.reg scratch1 scratch1
+	
+	m68k_mem_src scratch1 Z isdst
+	case 7
+	#pc-relative and absolute modes
+	
+	switch reg
+	case 0
+	#absolute short
+	m68k_prefetch
+	sext 32 prefetch scratch1
+	m68k_mem_src scratch1 Z isdst
+	
+	case 1
+	#absolute long
+	local address 32
+	m68k_prefetch
+	lsl prefetch 16 address
+	m68k_prefetch
+	or prefetch address scratch1
+	m68k_mem_src scratch1 Z isdst
+	
+	case 2
+	#pc displaceent
+	m68k_prefetch
+	sext 32 prefetch scratch1
+	add scratch1 pc scratch1
+	sub 2 scratch1 scratch1
+	m68k_mem_src scratch1 Z isdst
+	
+	case 3
+	#pc indexed
+	m68k_index_word
+	add pc scratch1 scratch1
+	sub 2 scratch1 scratch1
+	m68k_mem_src scratch1 Z isdst
+	
+	case 4
+	#immediate
+	switch Z
+	case 2
+		local tmp32 32
+		m68k_prefetch
+		lsl prefetch 16 tmp32
+		m68k_prefetch
+		or prefetch tmp32 scratch1
+		
+	default
+		m68k_prefetch
+		mov prefetch scratch1
+	end
+	meta op scratch1
+	
+	end
+	
+	end
+
+m68k_fetch_src_ea
+	arg mode 16
+	arg reg 16
+	arg Z 16
+	m68k_fetch_op_ea mode reg Z 0
+	meta src op
+	switch mode
+	case 0
+		meta src_is_mem 0
+	case 1
+		meta src_is_mem 0
+	default
+		meta src_is_mem 1
+	end
+
+m68k_fetch_dst_ea
+	arg mode 16
+	arg reg 16
+	arg Z 16
+	m68k_fetch_op_ea mode reg Z 1
+	meta dst op
+	
+m68k_save_dst
+	arg Z 16
+	if ismem
+	m68k_write_size Z 0
+	end
+
+1101DDD0ZZMMMRRR add_ea_dn
+	invalid M 7 R 5
+	invalid M 7 R 6
+	invalid M 7 R 7
+	invalid Z 3
+	m68k_fetch_src_ea M R Z
+	
+	add src dregs.D dregs.D Z
+	update_flags XNZVC
+	m68k_prefetch
+	
+1101DDD1ZZMMMRRR add_dn_ea
+	invalid M 0
+	invalid M 1
+	invalid M 7 R 2
+	invalid M 7 R 3
+	invalid M 7 R 4
+	invalid M 7 R 5
+	invalid M 7 R 6
+	invalid M 7 R 7
+	invalid Z 3
+	m68k_fetch_dst_ea M R Z
+	
+	add dregs.D dst dst Z
+	update_flags XNZVC
+	m68k_save_dst Z
+	m68k_prefetch
+
+1101AAAZ11MMMRRR adda
+	invalid M 7 R 5
+	invalid M 7 R 6
+	invalid M 7 R 7
+	local size 16
+	local ext_src 32
+	if Z
+	mov 2 size
+	else
+	mov 1 size
+	end
+	m68k_fetch_src_ea M R size
+	switch size
+	case 1
+	sext 32 src ext_src
+	meta src ext_src
+	end
+	
+	add src aregs.A aregs.A
+	m68k_prefetch
+
+00000110ZZMMMRRR addi
+	local immed 32
+	invalid Z 3
+	invalid M 1
+	invalid M 7 R 2
+	invalid M 7 R 3
+	invalid M 7 R 4
+	invalid M 7 R 5
+	invalid M 7 R 6
+	invalid M 7 R 7
+	#fetch immediate operand
+	m68k_prefetch
+	switch Z
+	case 2
+		lsl prefetch 16 immed
+		m68k_prefetch
+		or prefetch immed immed
+	default
+		mov prefetch immed
+	end
+	#fetch dst EA
+	m68k_fetch_dst_ea M R Z
+	
+	add immed dst dst Z
+	update_flags XNZVC
+	m68k_save_dst Z
+	m68k_prefetch
+	
+0101III0ZZMMMRRR addq
+	invalid Z 3
+	invalid M 7 R 2
+	invalid M 7 R 3
+	invalid M 7 R 4
+	invalid M 7 R 5
+	invalid M 7 R 6
+	invalid M 7 R 7
+	local src 32
+	switch I
+	case 0
+	mov 8 src
+	default
+	mov I src
+	end
+	
+	m68k_fetch_dst_ea M R Z
+	switch M
+	case 1
+		add src dst dst Z
+	default
+		add src dst dst Z
+		update_flags XNZVC
+	end
+	m68k_save_dst Z
+	m68k_prefetch
+
+1101DDD1ZZ000SSS addx_dy_dx
+	invalid Z 3
+	adc dregs.S dregs.D dregs.D Z
+	update_flags XNVC
+	switch Z
+	case 0
+	local tmp8 8
+	mov dregs.D tmp8
+	if tmp8
+		update_flags Z0
+	end
+	case 1
+	local tmp16 16
+	mov dregs.D tmp16
+	if tmp16
+		update_flags Z0
+	end
+	case 2
+	if dregs.D
+		update_flags Z0
+	end
+	end
+	m68k_prefetch
+
+1101DDD1ZZ001SSS addx_ay_ax
+	invalid Z 3
+	if Z
+		decsize Z aregs.S aregs.S
+	else
+		switch S
+		case 7
+			sub 2 aregs.S aregs.S
+		default
+			decsize Z aregs.S aregs.S
+		end
+	end
+	mov aregs.S scratch1
+	switch Z
+	case 0
+	ocall read_8
+	case 1
+	ocall read_16
+	case 2
+	m68k_read32
+	end
+	mov scratch1 scratch2
+	if Z
+		decsize Z aregs.D aregs.D
+	else
+		switch D
+		case 7
+			sub 2 aregs.D aregs.D
+		default
+			decsize Z aregs.D aregs.D
+		end
+	end
+	mov aregs.D scratch1
+	switch Z
+	case 0
+	ocall read_8
+	case 1
+	ocall read_16
+	case 2
+	m68k_read32
+	end
+	adc scratch2 scratch1 scratch1 Z
+	update_flags XNVC
+	switch Z
+	case 0
+	local tmp8 8
+	mov dregs.D tmp8
+	if tmp8
+		update_flags Z0
+	end
+	case 1
+	local tmp16 16
+	mov dregs.D tmp16
+	if tmp16
+		update_flags Z0
+	end
+	case 2
+	if dregs.D
+		update_flags Z0
+	end
+	end
+	mov aregs.D scratch2
+	m68k_write_size Z 0
+	m68k_prefetch
+
+1100DDD0ZZMMMRRR and_ea_dn
+	invalid M 1
+	invalid M 7 R 5
+	invalid M 7 R 6
+	invalid M 7 R 7
+	invalid Z 3
+	m68k_fetch_src_ea M R Z
+	
+	and src dregs.D dregs.D Z
+	update_flags NZV0C0
+	m68k_prefetch
+	
+1100DDD1ZZMMMRRR and_dn_ea
+	invalid M 0
+	invalid M 1
+	invalid M 7 R 2
+	invalid M 7 R 3
+	invalid M 7 R 4
+	invalid M 7 R 5
+	invalid M 7 R 6
+	invalid M 7 R 7
+	invalid Z 3
+	m68k_fetch_dst_ea M R Z
+	
+	and dregs.D dst dst Z
+	update_flags NZV0C0
+	m68k_save_dst Z
+	m68k_prefetch
+	
+00000010ZZMMMRRR andi
+	local immed 32
+	invalid Z 3
+	invalid M 1
+	invalid M 7 R 2
+	invalid M 7 R 3
+	invalid M 7 R 4
+	invalid M 7 R 5
+	invalid M 7 R 6
+	invalid M 7 R 7
+	#fetch immediate operand
+	m68k_prefetch
+	switch Z
+	case 2
+		lsl prefetch 16 immed
+		m68k_prefetch
+		or prefetch immed immed
+	default
+		mov prefetch immed
+	end
+	#fetch dst EA
+	m68k_fetch_dst_ea M R Z
+	
+	and immed dst dst Z
+	update_flags NZV0C0
+	m68k_save_dst Z
+	m68k_prefetch
+
+0000001000111100 andi_to_ccr
+	#fetch immediate operand
+	m68k_prefetch
+	and prefetch ccr ccr
+	m68k_prefetch
+	
+1011DDD1ZZMMMRRR eor_dn_ea
+	invalid M 1
+	invalid M 7 R 2
+	invalid M 7 R 3
+	invalid M 7 R 4
+	invalid M 7 R 5
+	invalid M 7 R 6
+	invalid M 7 R 7
+	invalid Z 3
+	m68k_fetch_dst_ea M R Z
+	
+	xor dregs.D dst dst Z
+	update_flags NZV0C0
+	m68k_save_dst Z
+	m68k_prefetch
+
+00001010ZZMMMRRR eori
+	local immed 32
+	invalid Z 3
+	invalid M 1
+	invalid M 7 R 2
+	invalid M 7 R 3
+	invalid M 7 R 4
+	invalid M 7 R 5
+	invalid M 7 R 6
+	invalid M 7 R 7
+	#fetch immediate operand
+	m68k_prefetch
+	switch Z
+	case 2
+		lsl prefetch 16 immed
+		m68k_prefetch
+		or prefetch immed immed
+	default
+		mov prefetch immed
+	end
+	#fetch dst EA
+	m68k_fetch_dst_ea M R Z
+	
+	xor immed dst dst Z
+	update_flags NZV0C0
+	m68k_save_dst Z
+	m68k_prefetch
+	
+0000001000111100 eori_to_ccr
+	#fetch immediate operand
+	m68k_prefetch
+	xor prefetch ccr ccr
+	m68k_prefetch
+	
+1000DDD0ZZMMMRRR or_ea_dn
+	invalid M 1
+	invalid M 7 R 5
+	invalid M 7 R 6
+	invalid M 7 R 7
+	invalid Z 3
+	m68k_fetch_src_ea M R Z
+	
+	or src dregs.D dregs.D Z
+	update_flags NZV0C0
+	m68k_prefetch
+	
+1000DDD1ZZMMMRRR or_dn_ea
+	invalid M 0
+	invalid M 1
+	invalid M 7 R 2
+	invalid M 7 R 3
+	invalid M 7 R 4
+	invalid M 7 R 5
+	invalid M 7 R 6
+	invalid M 7 R 7
+	invalid Z 3
+	m68k_fetch_dst_ea M R Z
+	
+	or dregs.D dst dst Z
+	update_flags NZV0C0
+	m68k_save_dst Z
+	m68k_prefetch
+	
+00000000ZZMMMRRR ori
+	local immed 32
+	invalid Z 3
+	invalid M 1
+	invalid M 7 R 2
+	invalid M 7 R 3
+	invalid M 7 R 4
+	invalid M 7 R 5
+	invalid M 7 R 6
+	invalid M 7 R 7
+	#fetch immediate operand
+	m68k_prefetch
+	switch Z
+	case 2
+		lsl prefetch 16 immed
+		m68k_prefetch
+		or prefetch immed immed
+	default
+		mov prefetch immed
+	end
+	#fetch dst EA
+	m68k_fetch_dst_ea M R Z
+	
+	or immed dst dst Z
+	update_flags NZV0C0
+	m68k_save_dst Z
+	m68k_prefetch
+
+0000000000111100 ori_to_ccr
+	#fetch immediate operand
+	m68k_prefetch
+	or prefetch ccr ccr
+	m68k_prefetch
+	
+1001DDD0ZZMMMRRR sub_ea_dn
+	invalid M 7 R 5
+	invalid M 7 R 6
+	invalid M 7 R 7
+	invalid Z 3
+	m68k_fetch_src_ea M R Z
+	
+	sub src dregs.D dregs.D Z
+	update_flags XNZVC
+	m68k_prefetch
+	
+1001DDD1ZZMMMRRR sub_dn_ea
+	invalid M 0
+	invalid M 1
+	invalid M 7 R 2
+	invalid M 7 R 3
+	invalid M 7 R 4
+	invalid M 7 R 5
+	invalid M 7 R 6
+	invalid M 7 R 7
+	invalid Z 3
+	m68k_fetch_dst_ea M R Z
+	
+	sub dregs.D dst dst Z
+	update_flags XNZVC
+	m68k_save_dst Z
+	m68k_prefetch
+
+1001AAAZ11MMMRRR suba
+	invalid M 7 R 5
+	invalid M 7 R 6
+	invalid M 7 R 7
+	local size 16
+	local ext_src 32
+	if Z
+	mov 2 size
+	else
+	mov 1 size
+	end
+	m68k_fetch_src_ea M R size
+	switch size
+	case 1
+	sext 32 src ext_src
+	meta src ext_src
+	end
+	
+	sub src aregs.A aregs.A
+	m68k_prefetch
+
+00000100ZZMMMRRR subi
+	local immed 32
+	invalid Z 3
+	invalid M 1
+	invalid M 7 R 2
+	invalid M 7 R 3
+	invalid M 7 R 4
+	invalid M 7 R 5
+	invalid M 7 R 6
+	invalid M 7 R 7
+	#fetch immediate operand
+	m68k_prefetch
+	switch Z
+	case 2
+		lsl prefetch 16 immed
+		m68k_prefetch
+		or prefetch immed immed
+	default
+		mov prefetch immed
+	end
+	#fetch dst EA
+	m68k_fetch_dst_ea M R Z
+	
+	sub immed dst dst Z
+	update_flags XNZVC
+	m68k_save_dst Z
+	m68k_prefetch
+	
+0101III1ZZMMMRRR subq
+	invalid Z 3
+	invalid M 7 R 2
+	invalid M 7 R 3
+	invalid M 7 R 4
+	invalid M 7 R 5
+	invalid M 7 R 6
+	invalid M 7 R 7
+	local src 32
+	switch I
+	case 0
+	mov 8 src
+	default
+	mov I src
+	end
+	
+	m68k_fetch_dst_ea M R Z
+	switch M
+	case 1
+		sub src dst dst Z
+	default
+		sub src dst dst Z
+		update_flags XNZVC
+	end
+	m68k_save_dst Z
+	m68k_prefetch
+	
+1110CCC0ZZ001RRR lsri
+	invalid Z 3
+	switch C
+	case 0
+		meta shift 8
+	default
+		meta shift C
+	end
+	lsr dregs.R shift dregs.R Z
+	update_flags XNZV0C
+	add shift shift shift
+	switch Z
+	case 2
+		add 4 shift shift
+	default
+		add 2 shift shift
+	end
+	cycles shift
+	#TODO: should this happen before or after the majority of the shift?
+	m68k_prefetch
+	
+1110CCC0ZZ101RRR lsr_dn
+	invalid Z 3
+	local shift 8
+	and dregs.C 63 shift
+	lsr dregs.R shift dregs.R Z
+	update_flags XNZV0C
+	add shift shift shift
+	switch Z
+	case 2
+		add 4 shift shift
+	default
+		add 2 shift shift
+	end
+	cycles shift
+	#TODO: should this happen before or after the majority of the shift?
+	m68k_prefetch
+	
+1110001011MMMRRR lsr_ea
+	invalid M 0
+	invalid M 1
+	invalid M 7 R 2
+	invalid M 7 R 3
+	invalid M 7 R 4
+	invalid M 7 R 5
+	invalid M 7 R 6
+	invalid M 7 R 7
+	
+	m68k_fetch_dst_ea M R 0
+	lsr dst 1 dst
+	update_flags XNZV0C
+	m68k_save_dst 0
+	m68k_prefetch
+	
+1110CCC1ZZ001RRR lsli
+	invalid Z 3
+	switch C
+	case 0
+		meta shift 8
+	default
+		meta shift C
+	end
+	lsl dregs.R shift dregs.R Z
+	update_flags XNZV0C
+	add shift shift shift
+	switch Z
+	case 2
+		add 4 shift shift
+	default
+		add 2 shift shift
+	end
+	cycles shift
+	#TODO: should this happen before or after the majority of the shift?
+	m68k_prefetch
+	
+1110CCC1ZZ101RRR lsl_dn
+	invalid Z 3
+	local shift 8
+	and dregs.C 63 shift
+	lsl dregs.R shift dregs.R Z
+	update_flags XNZV0C
+	add shift shift shift
+	switch Z
+	case 2
+		add 4 shift shift
+	default
+		add 2 shift shift
+	end
+	cycles shift
+	#TODO: should this happen before or after the majority of the shift?
+	m68k_prefetch
+	
+1110001111MMMRRR lsl_ea
+	invalid M 0
+	invalid M 1
+	invalid M 7 R 2
+	invalid M 7 R 3
+	invalid M 7 R 4
+	invalid M 7 R 5
+	invalid M 7 R 6
+	invalid M 7 R 7
+	
+	m68k_fetch_dst_ea M R 0
+	lsl dst 1 dst
+	update_flags XNZV0C
+	m68k_save_dst 0
+	m68k_prefetch
+
+00ZZRRRMMMEEESSS move
+	invalid Z 0
+	invalid M 1
+	invalid M 7 #not actually invalid, but will be handled separately due to DSL limitations
+	invalid E 7 S 5
+	invalid E 7 S 6
+	invalid E 7 S 7
+	local size 8
+	local memsrc 32
+	#move uses a different size format than most instructions
+	switch Z
+		case 1
+			mov 0 size
+		case 2
+			mov 2 size
+		case 3
+			mov 1 size
+	end
+	m68k_fetch_src_ea E S size
+	
+	if src_is_mem
+		#avoid clobbering src if we need scratch1
+		mov src memsrc
+		meta src memsrc
+	end
+	
+	cmp 0 src size
+	update_flags NZV0C0
+	
+	switch M
+		case 0
+		mov src dregs.R size
+		
+		case 2
+		mov aregs.R scratch2
+		mov src scratch1
+		m68k_write_size size 0
+		
+		case 3
+		mov aregs.R scratch2
+		mov src scratch1
+		switch R
+			case 7
+				if size
+					addsize size aregs.R aregs.R
+				else
+					addsize 1 aregs.R aregs.R
+				end
+			default
+				addsize size aregs.R aregs.R
+		end
+		m68k_write_size size 0
+		
+		case 4
+		mov src scratch1
+		switch R
+			case 7
+				if size
+					decsize size aregs.R aregs.R
+				else
+					decsize 1 aregs.R aregs.R
+				end
+			default
+				decsize size aregs.R aregs.R
+		end
+		mov aregs.R scratch2
+		m68k_write_size size 1
+		
+		case 5
+		m68k_prefetch
+		sext 32 prefetch scratch2
+		add aregs.R scratch2 scratch2
+		mov src scratch1
+		m68k_write_size size 0
+		
+		case 6
+		m68k_index_word
+		add aregs.R scratch1 scratch2
+		mov src scratch1
+		m68k_write_size size 0
+	end
+	m68k_prefetch
+
+
+00ZZ00M111EEESSS move_abs
+	invalid E 7 S 5
+	invalid E 7 S 6
+	invalid E 7 S 7
+	invalid Z 0
+	local size 8
+	local memsrc 32
+	#move uses a different size format than most instructions
+	switch Z
+	case 1
+		mov 0 size
+	case 2
+		mov 2 size
+	case 3
+		mov 1 size
+	end
+	m68k_fetch_src_ea E S size
+	
+	if src_is_mem
+		#avoid clobbering src if we need scratch1
+		mov src memsrc
+		meta src memsrc
+	end
+	
+	cmp 0 src size
+	update_flags NZV0C0
+	
+	switch M
+	case 0
+	m68k_prefetch
+	sext 32 prefetch scratch2
+	
+	case 1
+	m68k_prefetch
+	lsl prefetch 16 scratch2
+	m68k_prefetch
+	or prefetch scratch2 scratch2
+	end
+	mov src scratch1
+	m68k_write_size size 0
+	m68k_prefetch
+	
+00ZZRRR001EEESSS movea
+	local size 8
+	invalid Z 0
+	invalid Z 1
+	invalid E 7 S 5
+	invalid E 7 S 6
+	invalid E 7 S 7
+	switch Z
+	case 2
+		mov 2 size
+	case 3
+		mov 1 size
+	end
+	m68k_fetch_src_ea E S size
+	switch Z
+	case 2
+		mov src aregs.R
+	case 3
+		sext 32 src aregs.R
+	end
+	m68k_prefetch
+	
+0100010011MMMRRR move_to_ccr
+	invalid M 1
+	invalid M 7 R 5
+	invalid M 7 R 6
+	invalid M 7 R 7
+	m68k_fetch_src_ea M R 1
+	mov scratch1 ccr
+	m68k_prefetch
+
+0100011011MMMRRR move_to_sr
+	invalid M 1
+	invalid M 7 R 5
+	invalid M 7 R 6
+	invalid M 7 R 7
+	m68k_fetch_src_ea M R 1
+	mov scratch1 ccr
+	lsr scratch1 8 status
+	update_sync
+	m68k_prefetch
+
+0100000011MMMRRR move_from_sr
+	invalid M 1
+	invalid M 7 R 2
+	invalid M 7 R 3
+	invalid M 7 R 4
+	invalid M 7 R 5
+	invalid M 7 R 6
+	invalid M 7 R 7
+	m68k_fetch_dst_ea M R 1
+	lsl status 8 scratch1
+	or ccr scratch1 scratch1
+	mov scratch1 dst
+	m68k_save_dst 1
+	m68k_prefetch
+
+0100111001110000 reset
+	cycles 124
+	if reset_handler
+	pcall reset_handler m68k_reset_handler context
+	end
--- a/m68k_core.c	Sat Jan 05 00:58:08 2019 -0800
+++ b/m68k_core.c	Sat Jan 15 13:15:21 2022 -0800
@@ -1188,8 +1188,15 @@
 {
 	//TODO: Actually execute the M68K reset vector rather than simulating some of its behavior
 	uint16_t *reset_vec = get_native_pointer(0, (void **)context->mem_pointers, &context->options->gen);
+	if (!(context->status & 0x20)) {
+		//switching from user to system mode so swap stack pointers
+		context->aregs[8] = context->aregs[7];
+	}
+	context->status = 0x27;
 	context->aregs[7] = reset_vec[0] << 16 | reset_vec[1];
 	uint32_t address = reset_vec[2] << 16 | reset_vec[3];
+	//interrupt mask may have changed so force a sync
+	sync_components(context, address);
 	start_68k_context(context, address);
 }
 
@@ -1216,9 +1223,7 @@
 
 m68k_context * init_68k_context(m68k_options * opts, m68k_reset_handler reset_handler)
 {
-	size_t ctx_size = sizeof(m68k_context) + ram_size(&opts->gen) / (1 << opts->gen.ram_flags_shift) / 8;
-	m68k_context * context = malloc(ctx_size);
-	memset(context, 0, ctx_size);
+	m68k_context * context = calloc(1, sizeof(m68k_context) + ram_size(&opts->gen) / (1 << opts->gen.ram_flags_shift) / 8);
 	context->options = opts;
 	context->int_cycle = CYCLE_NEVER;
 	context->status = 0x27;
--- a/m68k_core.h	Sat Jan 05 00:58:08 2019 -0800
+++ b/m68k_core.h	Sat Jan 15 13:15:21 2022 -0800
@@ -12,7 +12,7 @@
 //#include "68kinst.h"
 struct m68kinst;
 
-#define NUM_MEM_AREAS 8
+#define NUM_MEM_AREAS 10
 #define NATIVE_MAP_CHUNKS (64*1024)
 #define NATIVE_CHUNK_SIZE ((16 * 1024 * 1024 / NATIVE_MAP_CHUNKS))
 #define MAX_NATIVE_SIZE 255
--- a/m68k_core_x86.c	Sat Jan 05 00:58:08 2019 -0800
+++ b/m68k_core_x86.c	Sat Jan 15 13:15:21 2022 -0800
@@ -421,7 +421,11 @@
 			push_r(code, opts->gen.scratch1);
 		}
 		dec_amount = inst->extra.size == OPSIZE_WORD ? 2 : (inst->extra.size == OPSIZE_LONG ? 4 : (op->params.regs.pri == 7 ? 2 :1));
-		if (!dst) {
+		if (!dst || (
+			inst->op != M68K_MOVE && inst->op != M68K_MOVEM 
+			&& inst->op != M68K_SUBX && inst->op != M68K_ADDX 
+			&& inst->op != M68K_ABCD && inst->op != M68K_SBCD
+		)) {
 			cycles(&opts->gen, PREDEC_PENALTY);
 		}
 		subi_areg(opts, dec_amount, op->params.regs.pri);
@@ -874,7 +878,7 @@
 		code_ptr end_off = code->cur+1;
 		jmp(code, code->cur+2);
 		*true_off = code->cur - (true_off+1);
-		cycles(&opts->gen, 6);
+		cycles(&opts->gen, inst->dst.addr_mode == MODE_REG ? 6 : 4);
 		if (dst_op.mode == MODE_REG_DIRECT) {
 			mov_ir(code, 0xFF, dst_op.base, SZ_B);
 		} else {
@@ -1190,8 +1194,6 @@
 void translate_m68k_reset(m68k_options *opts, m68kinst *inst)
 {
 	code_info *code = &opts->gen.code;
-	//RESET instructions take a long time to give peripherals time to reset themselves
-	cycles(&opts->gen, 132);
 	mov_rdispr(code, opts->gen.context_reg, offsetof(m68k_context, reset_handler), opts->gen.scratch1, SZ_PTR);
 	cmp_ir(code, 0, opts->gen.scratch1, SZ_PTR);
 	code_ptr no_reset_handler = code->cur + 1;
@@ -1201,6 +1203,8 @@
 	mov_rr(code, RAX, opts->gen.context_reg, SZ_PTR);
 	call(code, opts->gen.load_context);
 	*no_reset_handler = code->cur - (no_reset_handler + 1);
+	//RESET instructions take a long time to give peripherals time to reset themselves
+	cycles(&opts->gen, 132);
 }
 
 void op_ir(code_info *code, m68kinst *inst, int32_t val, uint8_t dst, uint8_t size)
@@ -1309,16 +1313,17 @@
 	
 	uint32_t numcycles;
 	if ((inst->op == M68K_ADDX || inst->op == M68K_SUBX) && inst->src.addr_mode != MODE_REG) {
-		numcycles = 6;
+		numcycles = 4;
 	} else if (size == OPSIZE_LONG) {
 		if (inst->op == M68K_CMP) {
-			numcycles = 6;
-		} else if (inst->op == M68K_AND && inst->variant == VAR_IMMEDIATE) {
+			numcycles = inst->src.addr_mode > MODE_AREG && inst->dst.addr_mode > MODE_AREG ? 4 : 6;
+		} else if (inst->op == M68K_AND && inst->variant == VAR_IMMEDIATE && inst->dst.addr_mode == MODE_REG) {
 			numcycles = 6;
-		} else if (inst->op == M68K_ADD && inst->dst.addr_mode == MODE_AREG && inst->extra.size == OPSIZE_WORD && inst->variant == VAR_QUICK) {
-			numcycles = 4;
-		} else if (inst->dst.addr_mode <= MODE_AREG) {
+		} else if (inst->dst.addr_mode == MODE_REG) {
 			numcycles = inst->src.addr_mode <= MODE_AREG || inst->src.addr_mode == MODE_IMMEDIATE ? 8 : 6;
+		} else if (inst->dst.addr_mode == MODE_AREG) {
+			numcycles = numcycles = inst->src.addr_mode <= MODE_AREG || inst->src.addr_mode == MODE_IMMEDIATE  
+				|| inst->extra.size == OPSIZE_WORD ? 8 : 6;
 		} else {
 			numcycles = 4;
 		}
@@ -1495,8 +1500,9 @@
 		//destination is in memory so we need to preserve scratch2 for the write at the end
 		push_r(code, opts->gen.scratch2);
 	}
-	//MC68000 User's Manual suggests NBCD hides the 2 cycle penalty during the write cycle somehow
-	cycles(&opts->gen, inst->op == M68K_NBCD && inst->dst.addr_mode != MODE_REG_DIRECT ? BUS : BUS + 2);
+	
+	//reg to reg takes 6 cycles, mem to mem is 4 cycles + all the operand fetch/writing (including 2 cycle predec penalty for first operand)
+	cycles(&opts->gen, inst->dst.addr_mode != MODE_REG ? BUS : BUS + 2);
 	uint8_t other_reg;
 	//WARNING: This may need adjustment if register assignments change
 	if (opts->gen.scratch2 > RBX) {
@@ -2070,7 +2076,7 @@
 void translate_m68k_negx(m68k_options *opts, m68kinst *inst, host_ea *src_op, host_ea *dst_op)
 {
 	code_info *code = &opts->gen.code;
-	cycles(&opts->gen, BUS);
+	cycles(&opts->gen, inst->extra.size == OPSIZE_LONG && inst->dst.addr_mode == MODE_REG ? BUS+2 : BUS);
 	if (dst_op->mode == MODE_REG_DIRECT) {
 		if (dst_op->base == opts->gen.scratch1) {
 			push_r(code, opts->gen.scratch2);
@@ -2136,6 +2142,7 @@
 			}
 			update_flags(opts, init_flags);
 		} else {
+			cycles(&opts->gen, inst->extra.size == OPSIZE_LONG ? 8 : 6);
 			if (src_op->mode == MODE_REG_DIRECT) {
 				if (src_op->base != opts->gen.scratch1) {
 					mov_rr(code, src_op->base, opts->gen.scratch1, SZ_B);
@@ -2443,7 +2450,7 @@
 void translate_m68k_move_from_sr(m68k_options *opts, m68kinst *inst, host_ea *src_op, host_ea *dst_op)
 {
 	code_info *code = &opts->gen.code;
-	cycles(&opts->gen, inst->dst.addr_mode == MODE_REG_DIRECT ? BUS+2 : BUS);
+	cycles(&opts->gen, inst->dst.addr_mode == MODE_REG ? BUS+2 : BUS);
 	call(code, opts->get_sr);
 	if (dst_op->mode == MODE_REG_DIRECT) {
 		mov_rr(code, opts->gen.scratch1, dst_op->base, SZ_W);
@@ -2658,8 +2665,11 @@
 	opts->start_context = (start_fun)code->cur;
 	save_callee_save_regs(code);
 #ifdef X86_64
-	if (opts->gen.scratch2 != RDI) {
-		mov_rr(code, RDI, opts->gen.scratch2, SZ_PTR);
+	if (opts->gen.scratch2 != FIRST_ARG_REG) {
+		mov_rr(code, FIRST_ARG_REG, opts->gen.scratch2, SZ_PTR);
+	}
+	if (opts->gen.context_reg != SECOND_ARG_REG) {
+		mov_rr(code, SECOND_ARG_REG, opts->gen.context_reg, SZ_PTR);
 	}
 #else
 	mov_rdispr(code, RSP, 20, opts->gen.scratch2, SZ_D);
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/m68k_util.c	Sat Jan 15 13:15:21 2022 -0800
@@ -0,0 +1,89 @@
+#include <string.h>
+
+void m68k_read_8(m68k_context *context)
+{
+	context->cycles += 4 * context->opts->gen.clock_divider;
+	context->scratch1 = read_byte(context->scratch1, context->mem_pointers, &context->opts->gen, context);
+}
+
+void m68k_read_16(m68k_context *context)
+{
+	context->cycles += 4 * context->opts->gen.clock_divider;
+	context->scratch1 = read_word(context->scratch1, context->mem_pointers, &context->opts->gen, context);
+}
+
+void m68k_write_8(m68k_context *context)
+{
+	context->cycles += 4 * context->opts->gen.clock_divider;
+	write_byte(context->scratch2, context->scratch1, context->mem_pointers, &context->opts->gen, context);
+}
+
+void m68k_write_16(m68k_context *context)
+{
+	context->cycles += 4 * context->opts->gen.clock_divider;
+	write_word(context->scratch2, context->scratch1, context->mem_pointers, &context->opts->gen, context);
+}
+
+void m68k_sync_cycle(m68k_context *context, uint32_t target_cycle)
+{
+	//TODO: interrupt stuff
+	context->sync_cycle = target_cycle;
+}
+
+void init_m68k_opts(m68k_options *opts, memmap_chunk * memmap, uint32_t num_chunks, uint32_t clock_divider)
+{
+	memset(opts, 0, sizeof(*opts));
+	opts->gen.memmap = memmap;
+	opts->gen.memmap_chunks = num_chunks;
+	opts->gen.address_mask = 0xFFFFFF;
+	opts->gen.byte_swap = 1;
+	opts->gen.max_address = 0x1000000;
+	opts->gen.bus_cycles = 4;
+	opts->gen.clock_divider = clock_divider;
+}
+
+m68k_context *init_68k_context(m68k_options * opts, m68k_reset_handler reset_handler)
+{
+	m68k_context *context = calloc(1, sizeof(m68k_context));
+	context->opts = opts;
+	context->reset_handler = reset_handler;
+	context->int_cycle = 0xFFFFFFFFU;
+	return context;
+}
+
+void m68k_reset(m68k_context *context)
+{
+	//read initial SP
+	context->scratch1 = 0;
+	m68k_read_16(context);
+	context->aregs[7] = context->scratch1 << 16;
+	context->scratch1 = 2;
+	m68k_read_16(context);
+	context->aregs[7] |= context->scratch1;
+	
+	//read initial PC
+	context->scratch1 = 4;
+	m68k_read_16(context);
+	context->pc = context->scratch1 << 16;
+	context->scratch1 = 6;
+	m68k_read_16(context);
+	context->pc |= context->scratch1;
+	
+	context->scratch1 = context->pc;
+	m68k_read_16(context);
+	context->prefetch = context->scratch1;
+	context->pc += 2;
+	
+	context->status = 0x27;
+}
+
+void m68k_print_regs(m68k_context *context)
+{
+	printf("XNZVC\n%d%d%d%d%d\n", context->xflag != 0, context->nflag != 0, context->zflag != 0, context->vflag != 0, context->cflag != 0);
+	for (int i = 0; i < 8; i++) {
+		printf("d%d: %X\n", i, context->dregs[i]);
+	}
+	for (int i = 0; i < 8; i++) {
+		printf("a%d: %X\n", i, context->aregs[i]);
+	}
+}
--- a/megawifi.c	Sat Jan 05 00:58:08 2019 -0800
+++ b/megawifi.c	Sat Jan 15 13:15:21 2022 -0800
@@ -6,15 +6,28 @@
 #define WINVER 0x501
 #include <winsock2.h>
 #include <ws2tcpip.h>
+#include <sys/param.h>
 #else
 #include <sys/socket.h>
+#include <arpa/inet.h>
 #include <unistd.h>
 #include <netinet/in.h>
+#include <netdb.h>
 #endif
 #include <errno.h>
 #include <fcntl.h>
+#include <time.h>
 #include "genesis.h"
 #include "net.h"
+#include "util.h"
+
+#if defined(_WIN32) || defined(__APPLE__)
+#  if BYTE_ORDER == LITTLE_ENDIAN
+#define htobe64(val)   ((((uint64_t)htonl((val)&0xFFFFFFFF))<<32) | htonl((val)>>32))
+#  else
+#define htobe64(val)	(val)
+#  endif
+#endif
 
 enum {
 	TX_IDLE,
@@ -25,7 +38,7 @@
 };
 #define STX 0x7E
 #define ETX 0x7E
-#define MAX_RECV_SIZE 1440
+#define MAX_RECV_SIZE 1460
 
 #define E(N) N
 enum {
@@ -43,7 +56,7 @@
 #define MSG_NOSIGNAL 0
 #endif
 
-enum {
+enum mw_state {
 	STATE_IDLE=1,
 	STATE_AP_JOIN,
 	STATE_SCAN,
@@ -51,6 +64,21 @@
 	STATE_TRANSPARENT
 };
 
+enum {
+	SOCKST_NONE = 0,
+	SOCKST_TCP_LISTEN,
+	SOCKST_TCP_EST,
+	SOCKST_UDP_READY
+};
+
+// TCP/UDP address message
+struct mw_addr_msg {
+	char dst_port[6];
+	char src_port[6];
+	uint8_t channel;
+	char host[];
+};
+
 #define FLAG_ONLINE 
 
 typedef struct {
@@ -68,6 +96,7 @@
 	uint8_t  flags;
 	uint8_t  transmit_buffer[4096];
 	uint8_t  receive_buffer[4096];
+	struct sockaddr_in remote_addr[15];	// Needed for UDP sockets
 } megawifi;
 
 static megawifi *get_megawifi(void *context)
@@ -75,11 +104,12 @@
 	m68k_context *m68k = context;
 	genesis_context *gen = m68k->system;
 	if (!gen->extra) {
+		socket_init();
 		gen->extra = calloc(1, sizeof(megawifi));
 		megawifi *mw = gen->extra;
 		mw->module_state = STATE_IDLE;
-		for (int i = 0; i < 15; i++)
-		{
+		mw->flags = 0xE0; // cfg_ok, dt_ok, online
+		for (int i = 0; i < 15; i++) {
 			mw->sock_fds[i] = -1;
 		}
 	}
@@ -112,14 +142,88 @@
 	mw->receive_bytes += count;
 }
 
-static void mw_puts(megawifi *mw, char *s)
+static void mw_puts(megawifi *mw, const char *s)
+{
+	size_t len = strlen(s);
+	mw_copy(mw, (uint8_t*)s, len);
+}
+
+static void udp_recv(megawifi *mw, uint8_t idx)
 {
-	uint32_t len = strlen(s);
-	if ((mw->receive_bytes + len) > sizeof(mw->receive_buffer)) {
-		return;
+	ssize_t recvd;
+	int s = mw->sock_fds[idx];
+	struct sockaddr_in remote;
+	socklen_t addr_len = sizeof(struct sockaddr_in);
+
+	if (mw->remote_addr[idx].sin_addr.s_addr != htonl(INADDR_ANY)) {
+		// Receive only from specified address
+		recvd = recvfrom(s, (char*)mw->receive_buffer + 3, MAX_RECV_SIZE, 0,
+				(struct sockaddr*)&remote, &addr_len);
+		if (recvd > 0) {
+			if (remote.sin_addr.s_addr != mw->remote_addr[idx].sin_addr.s_addr) {
+				printf("Discarding UDP packet from unknown addr %s:%d\n",
+						inet_ntoa(remote.sin_addr), ntohs(remote.sin_port));
+				recvd = 0;
+			}
+		}
+	} else {
+		// Reuse mode, data is preceded by remote IPv4 and port
+		recvd = recvfrom(s, (char*)mw->receive_buffer + 9, MAX_RECV_SIZE - 6,
+				0, (struct sockaddr*)&remote, &addr_len);
+		if (recvd > 0) {
+			mw->receive_buffer[3] = remote.sin_addr.s_addr;
+			mw->receive_buffer[4] = remote.sin_addr.s_addr>>8;
+			mw->receive_buffer[5] = remote.sin_addr.s_addr>>16;
+			mw->receive_buffer[6] = remote.sin_addr.s_addr>>24;
+			mw->receive_buffer[7] = remote.sin_port;
+			mw->receive_buffer[8] = remote.sin_port>>8;
+			recvd += 6;
+		}
 	}
-	memcpy(mw->receive_buffer + mw->receive_bytes, s, len);
-	mw->receive_bytes += len;
+
+	if (recvd > 0) {
+		mw_putc(mw, STX);
+		mw_putc(mw, (recvd >> 8) | ((idx+1) << 4));
+		mw_putc(mw, recvd);
+		mw->receive_bytes += recvd;
+		mw_putc(mw, ETX);
+		//should this set the channel flag?
+	} else if (recvd < 0 && !socket_error_is_wouldblock()) {
+		socket_close(mw->sock_fds[idx]);
+		mw->channel_state[idx] = SOCKST_NONE;
+		mw->channel_flags |= 1 << (idx + 1);
+	}
+}
+
+static void udp_send(megawifi *mw, uint8_t idx)
+{
+	struct sockaddr_in remote;
+	int s = mw->sock_fds[idx];
+	int sent;
+	char *data = (char*)mw->transmit_buffer;
+
+	if (mw->remote_addr[idx].sin_addr.s_addr != htonl(INADDR_ANY)) {
+		sent = sendto(s, data, mw->transmit_bytes, 0, (struct sockaddr*)&mw->remote_addr[idx],
+				sizeof(struct sockaddr_in));
+	} else {
+		// Reuse mode, extract address from leading bytes
+		// NOTE: mw->remote_addr[idx].sin_addr.s_addr == INADDR_ANY
+		remote.sin_addr.s_addr = *((int32_t*)data);
+		remote.sin_port = *((int16_t*)(data + 4));
+		remote.sin_family = AF_INET;
+		memset(remote.sin_zero, 0, sizeof(remote.sin_zero));
+		sent = sendto(s, data + 6, mw->transmit_bytes - 6, 0, (struct sockaddr*)&remote,
+				sizeof(struct sockaddr_in)) + 6;
+	}
+	if (sent < 0 && !socket_error_is_wouldblock()) {
+		socket_close(s);
+		mw->sock_fds[idx] = -1;
+		mw->channel_state[idx] = SOCKST_NONE;
+		mw->channel_flags |= 1 << (idx + 1);
+	} else if (sent < mw->transmit_bytes) {
+		//TODO: save this data somewhere so it can be sent in poll_socket
+		printf("Sent %d bytes on channel %d, but %d were requested\n", sent, idx + 1, mw->transmit_bytes);
+	}
 }
 
 static void poll_socket(megawifi *mw, uint8_t channel)
@@ -127,28 +231,25 @@
 	if (mw->sock_fds[channel] < 0) {
 		return;
 	}
-	if (mw->channel_state[channel] == 1) {
+	if (mw->channel_state[channel] == SOCKST_TCP_LISTEN) {
 		int res = accept(mw->sock_fds[channel], NULL, NULL);
 		if (res >= 0) {
-			close(mw->sock_fds[channel]);
-#ifndef _WIN32
-//FIXME: Set nonblocking on Windows too
-			fcntl(res, F_SETFL, O_NONBLOCK);
-#endif
+			socket_close(mw->sock_fds[channel]);
+			socket_blocking(res, 0);
 			mw->sock_fds[channel] = res;
-			mw->channel_state[channel] = 2;
+			mw->channel_state[channel] = SOCKST_TCP_EST;
 			mw->channel_flags |= 1 << (channel + 1);
 		} else if (errno != EAGAIN && errno != EWOULDBLOCK) {
-			close(mw->sock_fds[channel]);
-			mw->channel_state[channel] = 0;
+			socket_close(mw->sock_fds[channel]);
+			mw->channel_state[channel] = SOCKST_NONE;
 			mw->channel_flags |= 1 << (channel + 1);
 		}
-	} else if (mw->channel_state[channel] == 2 && mw->receive_bytes < sizeof(mw->receive_buffer) - 4) {
+	} else if (mw->channel_state[channel] == SOCKST_TCP_EST && mw->receive_bytes < (sizeof(mw->receive_buffer) - 4)) {
 		size_t max = sizeof(mw->receive_buffer) - 4 - mw->receive_bytes;
 		if (max > MAX_RECV_SIZE) {
 			max = MAX_RECV_SIZE;
 		}
-		int bytes = recv(mw->sock_fds[channel], mw->receive_buffer + mw->receive_bytes + 3, max, 0);
+		int bytes = recv(mw->sock_fds[channel], (char*)(mw->receive_buffer + mw->receive_bytes + 3), max, 0);
 		if (bytes > 0) {
 			mw_putc(mw, STX);
 			mw_putc(mw, bytes >> 8 | (channel+1) << 4);
@@ -156,11 +257,13 @@
 			mw->receive_bytes += bytes;
 			mw_putc(mw, ETX);
 			//should this set the channel flag?
-		} else if (bytes < 0 && errno != EAGAIN && errno != EWOULDBLOCK) {
-			close(mw->sock_fds[channel]);
-			mw->channel_state[channel] = 0;
+		} else if (bytes < 0 && !socket_error_is_wouldblock()) {
+			socket_close(mw->sock_fds[channel]);
+			mw->channel_state[channel] = SOCKST_NONE;
 			mw->channel_flags |= 1 << (channel + 1);
 		}
+	} else if (mw->channel_state[channel] == SOCKST_UDP_READY && !mw->receive_bytes) {
+		udp_recv(mw, channel);
 	}
 }
 
@@ -172,6 +275,7 @@
 	}
 }
 
+
 static void start_reply(megawifi *mw, uint8_t cmd)
 {
 	mw_putc(mw, STX);
@@ -197,152 +301,443 @@
 	mw_putc(mw, ETX);
 }
 
-static void process_packet(megawifi *mw)
+static void cmd_ap_cfg_get(megawifi *mw)
+{
+	char ssid[32] = {0};
+	char pass[64] = {0};
+	uint8_t slot = mw->transmit_buffer[4];
+
+	sprintf(ssid, "BLASTEM! SSID %d", slot + 1);
+	sprintf(pass, "BLASTEM! PASS %d", slot + 1);
+	start_reply(mw, CMD_OK);
+	mw_putc(mw, slot);
+	mw_putc(mw, 7);	/// 11bgn
+	mw_copy(mw, (uint8_t*)ssid, 32);
+	mw_copy(mw, (uint8_t*)pass, 64);
+	end_reply(mw);
+}
+
+static void cmd_ip_cfg_get(megawifi *mw)
+{
+	uint32_t ipv4s[5] = {0};
+
+	start_reply(mw, CMD_OK);
+	mw_putc(mw, mw->transmit_buffer[4]);
+	mw_putc(mw, 0);
+	mw_putc(mw, 0);
+	mw_putc(mw, 0);
+	mw_copy(mw, (uint8_t*)ipv4s, sizeof(ipv4s));
+	end_reply(mw);
+}
+
+static void cmd_tcp_con(megawifi *mw, uint32_t size)
+{
+	struct mw_addr_msg *addr = (struct mw_addr_msg*)(mw->transmit_buffer + 4);
+	struct addrinfo hints;
+	struct addrinfo *res = NULL;
+	int s;
+	int err;
+
+	uint8_t channel = addr->channel;
+	if (!channel || channel > 15 || mw->sock_fds[channel - 1] >= 0) {
+		start_reply(mw, CMD_ERROR);
+		end_reply(mw);
+		return;
+	}
+	channel--;
+
+	memset(&hints, 0, sizeof(hints));
+	hints.ai_family = AF_INET;
+#ifndef _WIN32
+	hints.ai_flags = AI_NUMERICSERV;
+#endif
+	hints.ai_socktype = SOCK_STREAM;
+
+	if ((err = getaddrinfo(addr->host, addr->dst_port, &hints, &res)) != 0) {
+		printf("getaddrinfo failed: %s\n", gai_strerror(err));
+		start_reply(mw, CMD_ERROR);
+		end_reply(mw);
+		return;
+	}
+
+	s = socket(AF_INET, SOCK_STREAM, 0);
+	if (s < 0) {
+		goto err;
+	}
+
+	// Should this be handled in a separate thread to avoid blocking emulation?
+	if (connect(s, res->ai_addr, res->ai_addrlen) != 0) {
+		goto err;
+	}
+
+	socket_blocking(s, 0);
+	mw->sock_fds[channel] = s;
+	mw->channel_state[channel] = SOCKST_TCP_EST;
+	mw->channel_flags |= 1 << (channel + 1);
+	printf("Connection established on ch %d with %s:%s\n", channel + 1,
+			addr->host, addr->dst_port);
+
+	if (res) {
+		freeaddrinfo(res);
+	}
+	start_reply(mw, CMD_OK);
+	end_reply(mw);
+	return;
+
+err:
+	freeaddrinfo(res);
+	printf("Connection to %s:%s failed, %s\n", addr->host, addr->dst_port, strerror(errno));
+	start_reply(mw, CMD_ERROR);
+	end_reply(mw);
+}
+
+static void cmd_close(megawifi *mw)
+{
+	int channel = mw->transmit_buffer[4] - 1;
+
+	if (channel >= 15 || mw->sock_fds[channel] < 0) {
+		start_reply(mw, CMD_ERROR);
+		end_reply(mw);
+		return;
+	}
+
+	socket_close(mw->sock_fds[channel]);
+	mw->sock_fds[channel] = -1;
+	mw->channel_state[channel] = SOCKST_NONE;
+	mw->channel_flags |= 1 << (channel + 1);
+	start_reply(mw, CMD_OK);
+	end_reply(mw);
+}
+
+static void cmd_udp_set(megawifi *mw)
 {
-	if (mw->transmit_channel == 0) {
-		uint32_t command = mw->transmit_buffer[0] << 8 | mw->transmit_buffer[1];
-		uint32_t size = mw->transmit_buffer[2] << 8 | mw->transmit_buffer[3];
-		if (size > mw->transmit_bytes - 4) {
-			size = mw->transmit_bytes - 4;
+	struct mw_addr_msg *addr = (struct mw_addr_msg*)(mw->transmit_buffer + 4);
+	unsigned int local_port, remote_port;
+	int s;
+	struct addrinfo *raddr;
+	struct addrinfo hints;
+	struct sockaddr_in local;
+	int err;
+
+	uint8_t channel = addr->channel;
+	if (!channel || channel > 15 || mw->sock_fds[channel - 1] >= 0) {
+		goto err;
+	}
+	channel--;
+	local_port = atoi(addr->src_port);
+	remote_port = atoi(addr->dst_port);
+
+	if ((s = socket(PF_INET, SOCK_DGRAM, 0)) < 0) {
+		printf("Datagram socket creation failed\n");
+		goto err;
+	}
+
+	memset(local.sin_zero, 0, sizeof(local.sin_zero));
+	local.sin_family = AF_INET;
+	local.sin_addr.s_addr = htonl(INADDR_ANY);
+	local.sin_port = htons(local_port);
+	if (remote_port && addr->host[0]) {
+		// Communication with remote peer
+		printf("Set UDP ch %d, port %d to addr %s:%d\n", addr->channel,
+				local_port, addr->host, remote_port);
+
+		memset(&hints, 0, sizeof(hints));
+		hints.ai_family = AF_INET;
+#ifndef _WIN32
+		hints.ai_flags = AI_NUMERICSERV;
+#endif
+		hints.ai_socktype = SOCK_DGRAM;
+
+		if ((err = getaddrinfo(addr->host, addr->dst_port, &hints, &raddr)) != 0) {
+			printf("getaddrinfo failed: %s\n", gai_strerror(err));
+			goto err;
 		}
-		int orig_receive_bytes = mw->receive_bytes;
-		switch (command)
-		{
-		case CMD_VERSION:
+		mw->remote_addr[channel] = *((struct sockaddr_in*)raddr->ai_addr);
+		freeaddrinfo(raddr);
+	} else if (local_port) {
+		// Server in reuse mode
+		printf("Set UDP ch %d, src port %d\n", addr->channel, local_port);
+		mw->remote_addr[channel] = local;
+	} else {
+		printf("Invalid UDP socket data\n");
+		goto err;
+	}
+
+	if (bind(s, (struct sockaddr*)&local, sizeof(struct sockaddr_in)) < 0) {
+		printf("bind to port %d failed\n", local_port);
+		goto err;
+	}
+
+	socket_blocking(s, 0);
+	mw->sock_fds[channel] = s;
+	mw->channel_state[channel] = SOCKST_UDP_READY;
+	mw->channel_flags |= 1 << (channel + 1);
+
+	start_reply(mw, CMD_OK);
+	end_reply(mw);
+
+	return;
+
+err:
+	start_reply(mw, CMD_ERROR);
+	end_reply(mw);
+}
+
+#define AVATAR_BYTES	(32 * 48 / 2)
+static void cmd_gamertag_get(megawifi *mw)
+{
+	uint32_t id = htonl(1);
+	char buf[AVATAR_BYTES];
+
+	start_reply(mw, CMD_OK);
+	// TODO Get items from config file
+	mw_copy(mw, (uint8_t*)&id, 4);
+	strncpy(buf, "doragasu on Blastem!", 32);
+	mw_copy(mw, (uint8_t*)buf, 32);
+	strncpy(buf, "My cool password", 32);
+	mw_copy(mw, (uint8_t*)buf, 32);
+	strncpy(buf, "All your WiFi are belong to me!", 32);
+	mw_copy(mw, (uint8_t*)buf, 32);
+	memset(buf, 0, 64); // Telegram token
+	mw_copy(mw, (uint8_t*)buf, 64);
+	mw_copy(mw, (uint8_t*)buf, AVATAR_BYTES); // Avatar tiles
+	mw_copy(mw, (uint8_t*)buf, 32); // Avatar palette
+	end_reply(mw);
+}
+
+static void cmd_hrng_get(megawifi *mw)
+{
+	uint16_t len = (mw->transmit_buffer[4]<<8) + mw->transmit_buffer[5];
+	if (len > (MAX_RECV_SIZE - 4)) {
+		start_reply(mw, CMD_ERROR);
+		end_reply(mw);
+		return;
+	}
+	// Pseudo-random, but who cares
+	start_reply(mw, CMD_OK);
+	srand(time(NULL));
+	for (uint16_t i = 0; i < len; i++) {
+		mw_putc(mw, rand());
+	}
+	end_reply(mw);
+}
+
+static void cmd_datetime(megawifi *mw)
+{
+	start_reply(mw, CMD_OK);
+#ifdef _WIN32
+	__time64_t t = _time64(NULL);
+	int64_t t_be = htobe64(t);
+	mw_copy(mw, (uint8_t*)&t_be, sizeof(int64_t));
+	mw_puts(mw, _ctime64(&t));
+#else
+	time_t t = time(NULL);
+	int64_t t_be = htobe64(t);
+	mw_copy(mw, (uint8_t*)&t_be, sizeof(int64_t));
+	mw_puts(mw, ctime(&t));
+#endif
+
+	mw_putc(mw, '\0');
+	end_reply(mw);
+}
+
+static void process_command(megawifi *mw)
+{
+	uint32_t command = mw->transmit_buffer[0] << 8 | mw->transmit_buffer[1];
+	uint32_t size = mw->transmit_buffer[2] << 8 | mw->transmit_buffer[3];
+	if (size > mw->transmit_bytes - 4) {
+		size = mw->transmit_bytes - 4;
+	}
+	int orig_receive_bytes = mw->receive_bytes;
+	switch (command)
+	{
+	case CMD_VERSION:
+		start_reply(mw, CMD_OK);
+		mw_putc(mw, 1);
+		mw_putc(mw, 3);
+		mw_putc(mw, 0);
+		mw_puts(mw, "blastem");
+		mw_putc(mw, '\0');
+		end_reply(mw);
+		break;
+	case CMD_ECHO:
+		mw->receive_bytes = mw->transmit_bytes;
+		memcpy(mw->receive_buffer, mw->transmit_buffer, mw->transmit_bytes);
+		break;
+	case CMD_AP_CFG_GET:
+		cmd_ap_cfg_get(mw);
+		break;
+	case CMD_IP_CURRENT: {
+		iface_info i;
+		if (get_host_address(&i)) {
 			start_reply(mw, CMD_OK);
-			mw_putc(mw, 1);
-			mw_putc(mw, 0);
-			mw_puts(mw, "blastem");
-			end_reply(mw);
-			break;
-		case CMD_ECHO:
-			mw->receive_bytes = mw->transmit_bytes;
-			memcpy(mw->receive_buffer, mw->transmit_buffer, mw->transmit_bytes);
-			break;
-		case CMD_IP_CURRENT: {
-			iface_info i;
-			if (get_host_address(&i)) {
-				start_reply(mw, CMD_OK);
-				//config number and reserved bytes
-				mw_set(mw, 0, 4);
-				//ip
-				mw_copy(mw, i.ip, sizeof(i.ip));
-				//net mask
-				mw_copy(mw, i.net_mask, sizeof(i.net_mask));
-				//gateway guess
-				mw_putc(mw, i.ip[0] & i.net_mask[0]);
-				mw_putc(mw, i.ip[1] & i.net_mask[1]);
-				mw_putc(mw, i.ip[2] & i.net_mask[2]);
-				mw_putc(mw, (i.ip[3] & i.net_mask[3]) + 1);
-				//dns
-				static const uint8_t localhost[] = {127,0,0,1};
-				mw_copy(mw, localhost, sizeof(localhost));
-				mw_copy(mw, localhost, sizeof(localhost));
-				
-			} else {
-				start_reply(mw, CMD_ERROR);
-			}
+			//config number and reserved bytes
+			mw_set(mw, 0, 4);
+			//ip
+			mw_copy(mw, i.ip, sizeof(i.ip));
+			//net mask
+			mw_copy(mw, i.net_mask, sizeof(i.net_mask));
+			//gateway guess
+			mw_putc(mw, i.ip[0] & i.net_mask[0]);
+			mw_putc(mw, i.ip[1] & i.net_mask[1]);
+			mw_putc(mw, i.ip[2] & i.net_mask[2]);
+			mw_putc(mw, (i.ip[3] & i.net_mask[3]) + 1);
+			//dns
+			static const uint8_t localhost[] = {127,0,0,1};
+			mw_copy(mw, localhost, sizeof(localhost));
+			mw_copy(mw, localhost, sizeof(localhost));
+			
+		} else {
+			start_reply(mw, CMD_ERROR);
+		}
+		end_reply(mw);
+		break;
+	}
+	case CMD_IP_CFG_GET:
+		cmd_ip_cfg_get(mw);
+		break;
+	case CMD_DEF_AP_CFG_GET:
+		start_reply(mw, CMD_OK);
+		mw_putc(mw, 0);
+		end_reply(mw);
+		break;
+	case CMD_AP_JOIN:
+		mw->module_state = STATE_READY;
+		start_reply(mw, CMD_OK);
+		end_reply(mw);
+		break;
+	case CMD_TCP_CON:
+		cmd_tcp_con(mw, size);
+		break;
+	case CMD_TCP_BIND:{
+		if (size < 7){
+			start_reply(mw, CMD_ERROR);
 			end_reply(mw);
 			break;
 		}
-		case CMD_AP_JOIN:
-			mw->module_state = STATE_READY;
-			start_reply(mw, CMD_OK);
+		uint8_t channel = mw->transmit_buffer[10];
+		if (!channel || channel > 15) {
+			start_reply(mw, CMD_ERROR);
+			end_reply(mw);
+			break;
+		}
+		channel--;
+		if (mw->sock_fds[channel] >= 0) {
+			socket_close(mw->sock_fds[channel]);
+		}
+		mw->sock_fds[channel] = socket(AF_INET, SOCK_STREAM, 0);
+		if (mw->sock_fds[channel] < 0) {
+			start_reply(mw, CMD_ERROR);
 			end_reply(mw);
 			break;
-		case CMD_TCP_BIND:{
-			if (size < 7){
-				start_reply(mw, CMD_ERROR);
-				end_reply(mw);
-				break;
-			}
-			uint8_t channel = mw->transmit_buffer[10];
-			if (!channel || channel > 15) {
-				start_reply(mw, CMD_ERROR);
-				end_reply(mw);
-				break;
-			}
-			channel--;
-			if (mw->sock_fds[channel] >= 0) {
-				close(mw->sock_fds[channel]);
-			}
-			mw->sock_fds[channel] = socket(AF_INET, SOCK_STREAM, 0);
-			if (mw->sock_fds[channel] < 0) {
-				start_reply(mw, CMD_ERROR);
-				end_reply(mw);
-				break;
-			}
-			int value = 1;
-			setsockopt(mw->sock_fds[channel], SOL_SOCKET, SO_REUSEADDR, &value, sizeof(value));
-			struct sockaddr_in bind_addr;
-			memset(&bind_addr, 0, sizeof(bind_addr));
-			bind_addr.sin_family = AF_INET;
-			bind_addr.sin_port = htons(mw->transmit_buffer[8] << 8 | mw->transmit_buffer[9]);
-			if (bind(mw->sock_fds[channel], (struct sockaddr *)&bind_addr, sizeof(bind_addr)) != 0) {
-				close(mw->sock_fds[channel]);
-				mw->sock_fds[channel] = -1;
-				start_reply(mw, CMD_ERROR);
-				end_reply(mw);
-				break;
-			}
-			int res = listen(mw->sock_fds[channel], 2);
-			start_reply(mw, res ? CMD_ERROR : CMD_OK);
-			if (res) {
-				close(mw->sock_fds[channel]);
-				mw->sock_fds[channel] = -1;
-			} else {
-				mw->channel_flags |= 1 << (channel + 1);
-				mw->channel_state[channel] = 1;
-#ifndef _WIN32
-//FIXME: Set nonblocking on Windows too
-				fcntl(mw->sock_fds[channel], F_SETFL, O_NONBLOCK);
-#endif
-			}
+		}
+		int value = 1;
+		setsockopt(mw->sock_fds[channel], SOL_SOCKET, SO_REUSEADDR, (char*)&value, sizeof(value));
+		struct sockaddr_in bind_addr;
+		memset(&bind_addr, 0, sizeof(bind_addr));
+		bind_addr.sin_family = AF_INET;
+		bind_addr.sin_port = htons(mw->transmit_buffer[8] << 8 | mw->transmit_buffer[9]);
+		if (bind(mw->sock_fds[channel], (struct sockaddr *)&bind_addr, sizeof(bind_addr)) != 0) {
+			socket_close(mw->sock_fds[channel]);
+			mw->sock_fds[channel] = -1;
+			start_reply(mw, CMD_ERROR);
+			end_reply(mw);
+			break;
+		}
+		int res = listen(mw->sock_fds[channel], 2);
+		start_reply(mw, res ? CMD_ERROR : CMD_OK);
+		if (res) {
+			socket_close(mw->sock_fds[channel]);
+			mw->sock_fds[channel] = -1;
+		} else {
+			mw->channel_flags |= 1 << (channel + 1);
+			mw->channel_state[channel] = SOCKST_TCP_LISTEN;
+			socket_blocking(mw->sock_fds[channel], 0);
+		}
+		end_reply(mw);
+		break;
+	}
+	case CMD_CLOSE:
+		cmd_close(mw);
+		break;
+	case CMD_UDP_SET:
+		cmd_udp_set(mw);
+		break;
+	case CMD_SOCK_STAT: {
+		uint8_t channel = mw->transmit_buffer[4];
+		if (!channel || channel > 15) {
+			start_reply(mw, CMD_ERROR);
 			end_reply(mw);
 			break;
 		}
-		case CMD_SOCK_STAT: {
-			uint8_t channel = mw->transmit_buffer[4];
-			if (!channel || channel > 15) {
-				start_reply(mw, CMD_ERROR);
-				end_reply(mw);
-				break;
+		mw->channel_flags &= ~(1 << channel);
+		channel--;
+		poll_socket(mw, channel);
+		start_reply(mw, CMD_OK);
+		mw_putc(mw, mw->channel_state[channel]);
+		end_reply(mw);
+		break;
+	}
+	case CMD_DATETIME:
+		cmd_datetime(mw);
+		break;
+	case CMD_SYS_STAT:
+		poll_all_sockets(mw);
+		start_reply(mw, CMD_OK);
+		mw_putc(mw, mw->module_state);
+		mw_putc(mw, mw->flags);
+		mw_putc(mw, mw->channel_flags >> 8);
+		mw_putc(mw, mw->channel_flags);
+		end_reply(mw);
+		break;
+	case CMD_GAMERTAG_GET:
+		cmd_gamertag_get(mw);
+		break;
+	case CMD_LOG:
+		start_reply(mw, CMD_OK);
+		puts((char*)&mw->transmit_buffer[4]);
+		end_reply(mw);
+		break;
+	case CMD_HRNG_GET:
+		cmd_hrng_get(mw);
+		break;
+	case CMD_SERVER_URL_GET:
+		start_reply(mw, CMD_OK);
+		// FIXME: This should be get from config file
+		mw_puts(mw, "doragasu.com");
+		mw_putc(mw,'\0');
+		end_reply(mw);
+		break;
+	default:
+		printf("Unhandled MegaWiFi command %s(%d) with length %X\n", cmd_names[command], command, size);
+		break;
+	}
+}
+
+static void process_packet(megawifi *mw)
+{
+	if (mw->transmit_channel == 0) {
+		process_command(mw);
+	} else {
+		uint8_t channel = mw->transmit_channel - 1;
+		int channel_state = mw->channel_state[channel];
+		int sock_fd = mw->sock_fds[channel];
+		if (sock_fd >= 0 && channel_state == SOCKST_TCP_EST) {
+			int sent = send(sock_fd, (char*)mw->transmit_buffer, mw->transmit_bytes, 0);
+			if (sent < 0 && !socket_error_is_wouldblock()) {
+				socket_close(sock_fd);
+				mw->sock_fds[channel] = -1;
+				mw->channel_state[channel] = SOCKST_NONE;
+				mw->channel_flags |= 1 << mw->transmit_channel;
+			} else if (sent < mw->transmit_bytes) {
+				//TODO: save this data somewhere so it can be sent in poll_socket
+				printf("Sent %d bytes on channel %d, but %d were requested\n", sent, mw->transmit_channel, mw->transmit_bytes);
 			}
-			mw->channel_flags &= ~(1 << channel);
-			channel--;
-			poll_socket(mw, channel);
-			start_reply(mw, CMD_OK);
-			mw_putc(mw, mw->channel_state[channel]);
-			end_reply(mw);
-			break;
+		} else if (sock_fd >= 0 && channel_state == SOCKST_UDP_READY) {
+			udp_send(mw, channel);
+		} else {
+			printf("Unhandled receive of MegaWiFi data on channel %d\n", mw->transmit_channel);
 		}
-		case CMD_SYS_STAT:
-			poll_all_sockets(mw);
-			start_reply(mw, CMD_OK);
-			mw_putc(mw, mw->module_state);
-			mw_putc(mw, mw->flags);
-			mw_putc(mw, mw->channel_flags >> 8);
-			mw_putc(mw, mw->channel_flags);
-			end_reply(mw);
-			break;
-		default:
-			printf("Unhandled MegaWiFi command %s(%d) with length %X\n", cmd_names[command], command, size);
-			break;
-		}
-	} else if (mw->sock_fds[mw->transmit_channel - 1] >= 0 && mw->channel_state[mw->transmit_channel - 1] == 2) {
-		uint8_t channel = mw->transmit_channel - 1;
-		int sent = send(mw->sock_fds[channel], mw->transmit_buffer, mw->transmit_bytes, MSG_NOSIGNAL);
-		if (sent < 0 && errno != EAGAIN && errno != EWOULDBLOCK) {
-			close(mw->sock_fds[channel]);
-			mw->sock_fds[channel] = -1;
-			mw->channel_state[channel] = 0;
-			mw->channel_flags |= 1 << mw->transmit_channel;
-		} else if (sent < mw->transmit_bytes) {
-			//TODO: save this data somewhere so it can be sent in poll_socket
-			printf("Sent %d bytes on channel %d, but %d were requested\n", sent, mw->transmit_channel, mw->transmit_bytes);
-		}
-	} else {
-		printf("Unhandled receive of MegaWiFi data on channel %d\n", mw->transmit_channel);
 	}
 	mw->transmit_bytes = mw->expected_bytes = 0;
 }
--- a/menu.s68	Sat Jan 05 00:58:08 2019 -0800
+++ b/menu.s68	Sat Jan 15 13:15:21 2022 -0800
@@ -1251,8 +1251,8 @@
 	dc.b "Prev", 0
 	
 about_text:
-	dc.b "BlastEm v0.6.1", 0
-	dc.b "Copyright 2011-2017 Michael Pavone", 0
+	dc.b "BlastEm v0.6.3-pre", 0
+	dc.b "Copyright 2011-2019 Michael Pavone", 0
 	dc.b " ", 0
 	dc.b "BlastEm is a high performance, open", 0
 	dc.b "source (GPLv3) Genesis/Megadrive", 0
--- a/mw_commands.c	Sat Jan 05 00:58:08 2019 -0800
+++ b/mw_commands.c	Sat Jan 15 13:15:21 2022 -0800
@@ -5,7 +5,7 @@
 	E(CMD_AP_CFG),
 	E(CMD_AP_CFG_GET),
 	E(CMD_IP_CURRENT),
-	E(CMD_RESERVED),
+	E(CMD_RESERVED_7),
 	E(CMD_IP_CFG),
 	E(CMD_IP_CFG_GET),
 	E(CMD_DEF_AP_CFG),
@@ -14,10 +14,10 @@
 	E(CMD_AP_LEAVE),
 	E(CMD_TCP_CON),
 	E(CMD_TCP_BIND),
-	E(CMD_TCP_ACCEPT),
-	E(CMD_TCP_DISC),
+	E(CMD_RESERVED_16),
+	E(CMD_CLOSE),
 	E(CMD_UDP_SET),
-	E(CMD_UDP_CLR),
+	E(CMD_RESERVED_19),
 	E(CMD_SOCK_STAT),
 	E(CMD_PING),
 	E(CMD_SNTP_CFG),
@@ -30,4 +30,27 @@
 	E(CMD_FLASH_ID),
 	E(CMD_SYS_STAT),
 	E(CMD_DEF_CFG_SET),
-	E(CMD_HRNG_GET),
\ No newline at end of file
+	E(CMD_HRNG_GET),
+	E(CMD_BSSID_GET),
+	E(CMD_GAMERTAG_SET),
+	E(CMD_GAMERTAG_GET),
+	E(CMD_LOG),
+	E(CMD_FACTORY_RESET),
+	E(CMD_SLEEP),
+	E(CMD_HTTP_URL_SET),
+	E(CMD_HTTP_METHOD_SET),
+	E(CMD_HTTP_CERT_QUERY),
+	E(CMD_HTTP_CERT_SET),
+	E(CMD_HTTP_HDR_ADD),
+	E(CMD_HTTP_HDR_DEL),
+	E(CMD_HTTP_OPEN),
+	E(CMD_HTTP_FINISH),
+	E(CMD_HTTP_CLEANUP),
+	E(CMD_RESERVED_48),
+	E(CMD_SERVER_URL_GET),
+	E(CMD_SERVER_URL_SET),
+	E(CMD_WIFI_ADV_GET),
+	E(CMD_WIFI_ADV_SET),
+	E(CMD_NV_CFG_SAVE),
+	E(CMD_UPGRADE_LIST),
+	E(CMD_UPGRADE_PERFORM),
--- a/net.c	Sat Jan 05 00:58:08 2019 -0800
+++ b/net.c	Sat Jan 15 13:15:21 2022 -0800
@@ -19,6 +19,10 @@
 
 uint8_t get_host_address(iface_info *out)
 {
+#ifdef __ANDROID__
+	//TODO: write an implementation for Android
+	return 0;
+#else
 	struct ifaddrs *entries, *current, *localhost;
 	if (getifaddrs(&entries)) {
 		return 0;
@@ -46,4 +50,5 @@
 	}
 	freeifaddrs(entries);
 	return ret;
+#endif
 }
\ No newline at end of file
--- a/nuklear_ui/blastem_nuklear.c	Sat Jan 05 00:58:08 2019 -0800
+++ b/nuklear_ui/blastem_nuklear.c	Sat Jan 15 13:15:21 2022 -0800
@@ -1,9 +1,13 @@
 #define NK_IMPLEMENTATION
 #define NK_SDL_GLES2_IMPLEMENTATION
+#define NK_RAWFB_IMPLEMENTATION
+#define RAWFB_RGBX_8888
 
 #include <stdlib.h>
 #include <limits.h>
+#include <math.h>
 #include "blastem_nuklear.h"
+#include "nuklear_rawfb.h"
 #include "font.h"
 #include "../render.h"
 #include "../render_sdl.h"
@@ -18,6 +22,7 @@
 #include "../bindings.h"
 
 static struct nk_context *context;
+static struct rawfb_context *fb_context;
 
 typedef struct
 {
@@ -26,7 +31,8 @@
 	struct nk_image  ui;
 } ui_image;
 
-static ui_image **ui_images, *controller_360, *controller_ps4, *controller_ps4_6b;
+static ui_image **ui_images, *controller_360, *controller_ps4, 
+	*controller_ps4_6b, *controller_wiiu, *controller_gen_6b;
 static uint32_t num_ui_images, ui_image_storage;
 
 typedef void (*view_fun)(struct nk_context *);
@@ -45,12 +51,14 @@
 	}
 	previous_views[num_prev++] = current_view;
 	current_view = new_view;
+	context->input.selected_widget = 0;
 }
 
 static void pop_view()
 {
 	if (num_prev) {
 		current_view = previous_views[--num_prev];
+		context->input.selected_widget = 0;
 	}
 }
 
@@ -81,6 +89,15 @@
 		if (entries) {
 			sort_dir_list(entries, num_entries);
 		}
+		if (!num_entries) {
+			//get_dir_list can fail if the user doesn't have permission
+			//for the current folder, make sure they can still navigate up
+			free_dir_list(entries, num_entries);
+			entries = calloc(1, sizeof(dir_entry));
+			entries[0].name = strdup("..");
+			entries[0].is_dir = 1;
+			num_entries = 1;
+		}
 	}
 	if (!got_ext_list) {
 		ext_list = get_extension_list(config, &num_exts);
@@ -91,7 +108,8 @@
 	if (nk_begin(context, "Load ROM", nk_rect(0, 0, width, height), 0)) {
 		nk_layout_row_static(context, height - context->style.font->height * 3, width - 60, 1);
 		int32_t old_selected = selected_entry;
-		if (nk_group_begin(context, "Select ROM", NK_WINDOW_BORDER | NK_WINDOW_TITLE)) {
+		char *title = alloc_concat("Select ROM: ", current_path);
+		if (nk_group_begin(context, title, NK_WINDOW_BORDER | NK_WINDOW_TITLE)) {
 			nk_layout_row_static(context, context->style.font->height - 2, width-100, 1);
 			for (int32_t i = 0; i < num_entries; i++)
 			{
@@ -111,6 +129,7 @@
 			}
 			nk_group_end(context);
 		}
+		free(title);
 		nk_layout_row_static(context, context->style.font->height * 1.75, width > 600 ? 300 : width / 2, 2);
 		if (nk_button_label(context, "Back")) {
 			pop_view();
@@ -160,8 +179,8 @@
 void view_about(struct nk_context *context)
 {
 	const char *lines[] = {
-		"BlastEm v0.6.1",
-		"Copyright 2012-2017 Michael Pavone",
+		"BlastEm v0.6.3-pre",
+		"Copyright 2012-2019 Michael Pavone",
 		"",
 		"BlastEm is a high performance open source",
 		"(GPLv3) Genesis/Megadrive emulator",
@@ -171,6 +190,8 @@
 		"Nemesis: Documentation and test ROMs",
 		"Charles MacDonald: Documentation",
 		"Eke-Eke: Documentation",
+		"Sauraen: YM2612/YM2203 Die Analysis",
+		"Alexey Khokholov: YM3438 Die Analysis",
 		"Bart Trzynadlowski: Documentation",
 		"KanedaFR: Hosting the best Sega forum",
 		"Titan: Awesome demos and documentation",
@@ -293,6 +314,7 @@
 					free_slot_info(slots);
 					slots = NULL;
 				} else if (current_view == view_play) {
+					clear_view_stack();
 					set_content_binding_state(1);
 				}
 			} else {
@@ -314,6 +336,7 @@
 
 static int32_t keycode;
 static const char *set_binding;
+static uint8_t bind_click_release, click;
 char *set_label;
 void binding_group(struct nk_context *context, char *name, const char **binds, const char **bind_names, uint32_t num_binds, tern_node *binding_lookup)
 {
@@ -332,6 +355,7 @@
 			if (nk_button_label(context, tern_find_ptr_default(binding_lookup, binds[i], "Not Set"))) {
 				set_binding = binds[i];
 				set_label = strdup(label);
+				bind_click_release = 0;
 				keycode = 0;
 			}
 			if (label_alloc) {
@@ -429,11 +453,11 @@
 	};
 	const char *general_binds[] = {
 		"ui.exit", "ui.save_state", "ui.toggle_fullscreen", "ui.soft_reset", "ui.reload",
-		"ui.screenshot", "ui.sms_pause", "ui.toggle_keyboard_cpatured", "ui.release_mouse"
+		"ui.screenshot", "ui.vgm_log", "ui.sms_pause", "ui.toggle_keyboard_cpatured", "ui.release_mouse"
 	};
 	const char *general_names[] = {
 		"Show Menu", "Quick Save", "Toggle Fullscreen", "Soft Reset", "Reload Media",
-		"Internal Screenshot", "SMS Pause", "Capture Keyboard", "Release Mouse"
+		"Internal Screenshot", "Toggle VGM Log", "SMS Pause", "Capture Keyboard", "Release Mouse"
 	};
 	const char *speed_binds[] = {
 		"ui.next_speed", "ui.prev_speed",
@@ -483,7 +507,7 @@
 		nk_layout_row_static(context, 30, width/2-30, 1);
 		nk_label(context, "Press new key for", NK_TEXT_CENTERED);
 		nk_label(context, set_label, NK_TEXT_CENTERED);
-		if (nk_button_label(context, "Cancel")) {
+		if (nk_button_label(context, "Cancel") && bind_click_release) {
 			free(set_label);
 			set_binding = set_label = NULL;
 		} else if (keycode) {
@@ -517,6 +541,8 @@
 			}
 			free(set_label);
 			set_binding = set_label = NULL;
+		} else if (!click) {
+			bind_click_release = 1;
 		}
 		nk_end(context);
 	}
@@ -578,6 +604,7 @@
 		conf_names = tern_insert_ptr(conf_names, "ui.vdp_debug_pal", "VDP Debug Palette");
 		conf_names = tern_insert_ptr(conf_names, "ui.enter_debugger", "Enter CPU Debugger");
 		conf_names = tern_insert_ptr(conf_names, "ui.screenshot", "Take Screenshot");
+		conf_names = tern_insert_ptr(conf_names, "ui.vgm_log", "Toggle VGM Log");
 		conf_names = tern_insert_ptr(conf_names, "ui.exit", "Show Menu");
 		conf_names = tern_insert_ptr(conf_names, "ui.save_state", "Quick Save");
 		conf_names = tern_insert_ptr(conf_names, "ui.set_speed.0", "Set Speed 0");
@@ -962,10 +989,16 @@
 
 static ui_image *select_best_image(controller_info *info)
 {
-	if (info->variant != VARIANT_NORMAL) {
-		return controller_ps4_6b;
+	if (info->variant != VARIANT_NORMAL || info->type == TYPE_SEGA) {
+		if (info->type == TYPE_PSX) {
+			return controller_ps4_6b;
+		} else {
+			return controller_gen_6b;
+		}
 	} else if (info->type == TYPE_PSX) {
 		return controller_ps4;
+	} else if (info->type == TYPE_NINTENDO) {
+		return controller_wiiu;
 	} else {
 		return controller_360;
 	}
@@ -1054,12 +1087,19 @@
 			});
 		}
 		
-		binding_box(context, bindings, "Right Shoulder", bind_box_left, font->height/2, bind_box_width,
-			selected_controller_info.variant == VARIANT_6B_BUMPERS ? 1 : 2, 
-			(int[]){
-			selected_controller_info.variant == VARIANT_6B_RIGHT ? SDL_CONTROLLER_BUTTON_LEFTSHOULDER : SDL_CONTROLLER_BUTTON_RIGHTSHOULDER,
-			AXIS | SDL_CONTROLLER_AXIS_TRIGGERLEFT
-		});
+		if (selected_controller_info.variant == VARIANT_NORMAL) {
+			binding_box(context, bindings, "Right Shoulder", bind_box_left, font->height/2, bind_box_width, 2, (int[]){
+				SDL_CONTROLLER_BUTTON_RIGHTSHOULDER,
+				AXIS | SDL_CONTROLLER_AXIS_TRIGGERRIGHT
+			});
+		} else {
+			binding_box(context, bindings, "Right Shoulder", bind_box_left, font->height/2, bind_box_width,
+				selected_controller_info.variant == VARIANT_6B_BUMPERS ? 1 : 2, 
+				(int[]){
+				selected_controller_info.variant == VARIANT_6B_RIGHT ? SDL_CONTROLLER_BUTTON_LEFTSHOULDER : AXIS | SDL_CONTROLLER_AXIS_TRIGGERRIGHT,
+				AXIS | SDL_CONTROLLER_AXIS_TRIGGERLEFT
+			});
+		}
 		
 		binding_box(context, bindings, "Misc Buttons", (render_width() - bind_box_width) / 2, font->height/2, bind_box_width, 3, (int[]){
 			SDL_CONTROLLER_BUTTON_BACK,
@@ -1096,12 +1136,19 @@
 			dpad_top = img_top;
 		}
 		
-		binding_box(context, bindings, "Left Shoulder", bind_box_left, font->height/2, bind_box_width, 
-			selected_controller_info.variant == VARIANT_6B_BUMPERS ? 1 : 2, 
-			(int[]){
-			selected_controller_info.variant == VARIANT_6B_RIGHT ? SDL_CONTROLLER_BUTTON_LEFTSTICK : SDL_CONTROLLER_BUTTON_LEFTSHOULDER,
-			SDL_CONTROLLER_BUTTON_RIGHTSTICK
-		});
+		if (selected_controller_info.variant == VARIANT_NORMAL) {
+			binding_box(context, bindings, "Left Shoulder", bind_box_left, font->height/2, bind_box_width, 2, (int[]){
+				SDL_CONTROLLER_BUTTON_LEFTSHOULDER,
+				AXIS | SDL_CONTROLLER_AXIS_TRIGGERLEFT
+			});
+		} else {
+			binding_box(context, bindings, "Left Shoulder", bind_box_left, font->height/2, bind_box_width, 
+				selected_controller_info.variant == VARIANT_6B_BUMPERS ? 1 : 2, 
+				(int[]){
+				selected_controller_info.variant == VARIANT_6B_RIGHT ? SDL_CONTROLLER_BUTTON_LEFTSTICK : AXIS | SDL_CONTROLLER_AXIS_TRIGGERLEFT,
+				SDL_CONTROLLER_BUTTON_RIGHTSTICK
+			});
+		}
 		
 		binding_box(context, bindings, "D-pad", dpad_left, dpad_top, bind_box_width, 4, (int[]){
 			SDL_CONTROLLER_BUTTON_DPAD_UP,
@@ -1114,7 +1161,7 @@
 		
 		def_font->handle.height = orig_height;
 		nk_layout_row_static(context, orig_height + 4, (render_width() - 2*orig_height) / 4, 1);
-		if (nk_button_label(context, "Back")) {
+		if (nk_button_label(context, controller_binding_changed ? "Save" : "Back")) {
 			pop_view();
 			if (controller_binding_changed) {
 				push_view(view_select_binding_dest);
@@ -1128,7 +1175,7 @@
 static int current_axis;
 static int button_pressed, last_button;
 static int hat_moved, hat_value, last_hat, last_hat_value;
-static int axis_moved, axis_value, last_axis;
+static int axis_moved, axis_value, last_axis, last_axis_value;
 static char *mapping_string;
 static size_t mapping_pos;
 
@@ -1147,6 +1194,7 @@
 	mapping_string[mapping_pos++] = ':';
 }
 
+static uint8_t initial_controller_config;
 #define QUIET_FRAMES 9
 static void view_controller_mappings(struct nk_context *context)
 {
@@ -1206,26 +1254,39 @@
 				
 				last_hat = hat_moved;
 				last_hat_value = hat_value;
-			} else if (axis_moved >= 0 && abs(axis_value) > 1000 && axis_moved != last_axis) {
+			} else if (axis_moved >= 0 && abs(axis_value) > 1000 && (
+					axis_moved != last_axis || (
+						axis_value/abs(axis_value) != last_axis_value/abs(axis_value) && current_button >= SDL_CONTROLLER_BUTTON_DPAD_UP
+					)
+				)) {
 				if (current_button <= SDL_CONTROLLER_BUTTON_B || axis_moved != button_a_axis) {
 					start_mapping();
+					if (current_button >= SDL_CONTROLLER_BUTTON_DPAD_UP) {
+						mapping_string[mapping_pos++] = axis_value >= 0 ? '+' : '-';
+					}
 					mapping_string[mapping_pos++] = 'a';
 					if (axis_moved > 9) {
 						mapping_string[mapping_pos++] = '0' + axis_moved / 10;
 					}
 					mapping_string[mapping_pos++] = '0' + axis_moved % 10;
 					last_axis = axis_moved;
+					last_axis_value = axis_value;
 				}
 				added_mapping = 1;
 			}
 		}
 			
-		if (added_mapping) {
+		while (added_mapping) {
 			quiet = QUIET_FRAMES;
 			if (current_button < SDL_CONTROLLER_BUTTON_MAX) {
 				current_button++;
 				if (current_button == SDL_CONTROLLER_BUTTON_MAX) {
 					current_axis = 0;
+					if (get_axis_label(&selected_controller_info, current_axis)) {
+						added_mapping = 0;
+					}
+				} else if (get_button_label(&selected_controller_info, current_button)) {
+					added_mapping = 0;
 				}
 			} else {
 				current_axis++;
@@ -1236,8 +1297,13 @@
 					save_controller_mapping(selected_controller, mapping_string);
 					free(mapping_string);
 					pop_view();
-					push_view(view_controller_bindings);
-					controller_binding_changed = 0;
+					if (initial_controller_config) {
+						push_view(view_controller_bindings);
+						controller_binding_changed = 0;
+					}
+					added_mapping = 0;
+				} else if (get_axis_label(&selected_controller_info, current_axis)) {
+					added_mapping = 0;
 				}
 			}
 		}
@@ -1248,6 +1314,31 @@
 	}
 }
 
+static void show_mapping_view(void)
+{
+	current_button = SDL_CONTROLLER_BUTTON_A;
+	button_pressed = -1;
+	last_button = -1;
+	last_hat = -1;
+	axis_moved = -1;
+	last_axis = -1;
+	last_axis_value = 0;
+	SDL_Joystick *joy = render_get_joystick(selected_controller);
+	const char *name = SDL_JoystickName(joy);
+	size_t namesz = strlen(name);
+	mapping_string = malloc(512 + namesz);
+	for (mapping_pos = 0; mapping_pos < namesz; mapping_pos++)
+	{
+		char c = name[mapping_pos];
+		if (c == ',' || c == '\n' || c == '\r') {
+			c = ' ';
+		}
+		mapping_string[mapping_pos] = c;
+	}
+	
+	push_view(view_controller_mappings);
+}
+
 static void view_controller_variant(struct nk_context *context)
 {
 	uint8_t selected = 0;
@@ -1257,58 +1348,56 @@
 		nk_label(context, "Select the layout that", NK_TEXT_CENTERED);
 		nk_label(context, "best matches your controller", NK_TEXT_CENTERED);
 		nk_label(context, "", NK_TEXT_CENTERED);
-		if (nk_button_label(context, "4 face buttons")) {
-			selected_controller_info.variant = VARIANT_NORMAL;
-			selected = 1;
-		}
-		char buffer[512];
-		snprintf(buffer, sizeof(buffer), "6 face buttons including %s and %s", 
-			get_button_label(&selected_controller_info, SDL_CONTROLLER_BUTTON_RIGHTSHOULDER), 
-			get_axis_label(&selected_controller_info, SDL_CONTROLLER_AXIS_TRIGGERRIGHT)
-		);
-		if (nk_button_label(context, buffer)) {
-			selected_controller_info.variant = VARIANT_6B_RIGHT;
-			selected = 1;
-		}
-		snprintf(buffer, sizeof(buffer), "6 face buttons including %s and %s", 
-			get_button_label(&selected_controller_info, SDL_CONTROLLER_BUTTON_LEFTSHOULDER), 
-			get_button_label(&selected_controller_info, SDL_CONTROLLER_BUTTON_RIGHTSHOULDER)
-		);
-		if (nk_button_label(context, buffer)) {
-			selected_controller_info.variant = VARIANT_6B_BUMPERS;
-			selected = 1;
+		if (selected_controller_info.subtype == SUBTYPE_GENESIS) {
+			if (nk_button_label(context, "3 button")) {
+				selected_controller_info.variant = VARIANT_3BUTTON;
+				selected = 1;
+			}
+			if (nk_button_label(context, "Standard 6 button")) {
+				selected_controller_info.variant = VARIANT_6B_BUMPERS;
+				selected = 1;
+			}
+			if (nk_button_label(context, "6 button with 2 shoulder buttons")) {
+				selected_controller_info.variant = VARIANT_8BUTTON;
+				selected = 1;
+			}
+		} else {
+			if (nk_button_label(context, "4 face buttons")) {
+				selected_controller_info.variant = VARIANT_NORMAL;
+				selected = 1;
+			}
+			char buffer[512];
+			snprintf(buffer, sizeof(buffer), "6 face buttons including %s and %s", 
+				get_button_label(&selected_controller_info, SDL_CONTROLLER_BUTTON_RIGHTSHOULDER), 
+				get_axis_label(&selected_controller_info, SDL_CONTROLLER_AXIS_TRIGGERRIGHT)
+			);
+			if (nk_button_label(context, buffer)) {
+				selected_controller_info.variant = VARIANT_6B_RIGHT;
+				selected = 1;
+			}
+			snprintf(buffer, sizeof(buffer), "6 face buttons including %s and %s", 
+				get_button_label(&selected_controller_info, SDL_CONTROLLER_BUTTON_LEFTSHOULDER), 
+				get_button_label(&selected_controller_info, SDL_CONTROLLER_BUTTON_RIGHTSHOULDER)
+			);
+			if (nk_button_label(context, buffer)) {
+				selected_controller_info.variant = VARIANT_6B_BUMPERS;
+				selected = 1;
+			}
 		}
 		nk_end(context);
 	}
 	if (selected) {
 		save_controller_info(selected_controller, &selected_controller_info);
 		pop_view();
-		SDL_GameController *controller = render_get_controller(selected_controller);
-		if (controller) {
-			push_view(view_controller_bindings);
-			controller_binding_changed = 0;
-			SDL_GameControllerClose(controller);
-		} else {
-			current_button = SDL_CONTROLLER_BUTTON_A;
-			button_pressed = -1;
-			last_button = -1;
-			last_hat = -1;
-			axis_moved = -1;
-			last_axis = -1;
-			SDL_Joystick *joy = render_get_joystick(selected_controller);
-			const char *name = SDL_JoystickName(joy);
-			size_t namesz = strlen(name);
-			mapping_string = malloc(512 + namesz);
-			for (mapping_pos = 0; mapping_pos < namesz; mapping_pos++)
-			{
-				char c = name[mapping_pos];
-				if (c == ',' || c == '\n' || c == '\r') {
-					c = ' ';
-				}
-				mapping_string[mapping_pos] = c;
+		if (initial_controller_config) {
+			SDL_GameController *controller = render_get_controller(selected_controller);
+			if (controller) {
+				push_view(view_controller_bindings);
+				controller_binding_changed = 0;
+				SDL_GameControllerClose(controller);
+			} else {
+				show_mapping_view();
 			}
-			
-			push_view(view_controller_mappings);
 		}
 	}
 }
@@ -1324,7 +1413,22 @@
 				selected_controller_info.type = type_id;
 				selected_controller_info.subtype = first_subtype_id + i;
 				pop_view();
-				push_view(view_controller_variant);
+				if (selected_controller_info.subtype == SUBTYPE_SATURN) {
+					selected_controller_info.variant = VARIANT_6B_BUMPERS;
+					save_controller_info(selected_controller, &selected_controller_info);
+					if (initial_controller_config) {
+						SDL_GameController *controller = render_get_controller(selected_controller);
+						if (controller) {
+							push_view(view_controller_bindings);
+							controller_binding_changed = 0;
+							SDL_GameControllerClose(controller);
+						} else {
+							show_mapping_view();
+						}
+					}
+				} else {
+					push_view(view_controller_variant);
+				}
 			}
 		}
 		nk_group_end(context);
@@ -1353,36 +1457,85 @@
 void view_controllers(struct nk_context *context)
 {
 	if (nk_begin(context, "Controllers", nk_rect(0, 0, render_width(), render_height()), NK_WINDOW_NO_SCROLLBAR)) {
-		int height = (render_width() - 2*context->style.font->height) / MAX_JOYSTICKS;
+		int height = (render_height() - 2*context->style.font->height) / 5;
+		int inner_height = height - context->style.window.spacing.y;
+		const struct nk_user_font *font = context->style.font;
+		int bindings_width = font->width(font->userdata, font->height, "Bindings", strlen("Bindings")) + context->style.button.padding.x * 2;
+		int remap_width = font->width(font->userdata, font->height, "Remap", strlen("Remap")) + context->style.button.padding.x * 2;
+		int change_type_width = font->width(font->userdata, font->height, "Change Type", strlen("Change Type")) + context->style.button.padding.x * 2;
+		int total = bindings_width + remap_width + change_type_width;
+		float bindings_ratio = (float)bindings_width / total;
+		float remap_ratio = (float)remap_width / total;
+		float change_type_ratio = (float)change_type_width / total;
+		
+		
+		uint8_t found_controller = 0;
 		for (int i = 0; i < MAX_JOYSTICKS; i++)
 		{
 			SDL_Joystick *joy = render_get_joystick(i);
 			if (joy) {
+				found_controller = 1;
 				controller_info info = get_controller_info(i);
 				ui_image *controller_image = select_best_image(&info);
-				int image_width = height * controller_image->width / controller_image->height;
-				nk_layout_row_begin(context, NK_STATIC, height, 2);
-				nk_layout_row_push(context, image_width);
+				int image_width = inner_height * controller_image->width / controller_image->height;
+				nk_layout_space_begin(context, NK_STATIC, height, INT_MAX);
+				nk_layout_space_push(context, nk_rect(context->style.font->height / 2, 0, image_width, inner_height));
 				if (info.type == TYPE_UNKNOWN || info.type == TYPE_GENERIC_MAPPING) {
 					nk_label(context, "?", NK_TEXT_CENTERED);
 				} else {
 					nk_image(context, controller_image->ui);
 				}
-				nk_layout_row_push(context, render_width() - image_width - 2 * context->style.font->height);
-				if (nk_button_label(context, info.name)) {
-					selected_controller = i;
-					selected_controller_info = info;
-					if (info.type == TYPE_UNKNOWN || info.type == TYPE_GENERIC_MAPPING) {
+				int button_start = image_width + context->style.font->height;
+				int button_area_width = render_width() - image_width - 2 * context->style.font->height;
+				
+				nk_layout_space_push(context, nk_rect(button_start, 0, button_area_width, inner_height/2));
+				nk_label(context, info.name, NK_TEXT_CENTERED);
+				const struct nk_user_font *font = context->style.font;
+				if (info.type == TYPE_UNKNOWN || info.type == TYPE_GENERIC_MAPPING) {
+					int button_width = font->width(font->userdata, font->height, "Configure", strlen("Configure"));
+					nk_layout_space_push(context, nk_rect(button_start, height/2, button_width, inner_height/2));
+					if (nk_button_label(context, "Configure")) {
+						selected_controller = i;
+						selected_controller_info = info;
+						initial_controller_config = 1;
 						push_view(view_controller_type);
-					} else {
+					}
+				} else {
+					button_area_width -= 2 * context->style.window.spacing.x;
+					bindings_width = bindings_ratio * button_area_width;
+					nk_layout_space_push(context, nk_rect(button_start, height/2, bindings_width, inner_height/2));
+					if (nk_button_label(context, "Bindings")) {
+						selected_controller = i;
+						selected_controller_info = info;
 						push_view(view_controller_bindings);
 						controller_binding_changed = 0;
 					}
-					
+					button_start += bindings_width + context->style.window.spacing.x;
+					remap_width = remap_ratio * button_area_width;
+					nk_layout_space_push(context, nk_rect(button_start, height/2, remap_width, inner_height/2));
+					if (nk_button_label(context, "Remap")) {
+						selected_controller = i;
+						selected_controller_info = info;
+						initial_controller_config = 0;
+						show_mapping_view();
+					}
+					button_start += remap_width + context->style.window.spacing.x;
+					change_type_width = change_type_ratio * button_area_width;
+					nk_layout_space_push(context, nk_rect(button_start, height/2, change_type_width, inner_height/2));
+					if (nk_button_label(context, "Change Type")) {
+						selected_controller = i;
+						selected_controller_info = info;
+						initial_controller_config = 0;
+						push_view(view_controller_type);
+					}
 				}
-				nk_layout_row_end(context);
+				//nk_layout_row_end(context);
 			}
 		}
+		if (!found_controller) {
+			nk_layout_row_static(context, context->style.font->height, render_width() - 2 * context->style.font->height, 1);
+			nk_label(context, "No controllers detected", NK_TEXT_CENTERED);
+		}
 		nk_layout_row_static(context, context->style.font->height, (render_width() - 2 * context->style.font->height) / 2, 2);
 		nk_label(context, "", NK_TEXT_LEFT);
 		if (nk_button_label(context, "Back")) {
@@ -1423,6 +1576,24 @@
 	}
 }
 
+void settings_string(struct nk_context *context, char *label, char *path, char *def)
+{
+	nk_label(context, label, NK_TEXT_LEFT);
+	char *curstr = tern_find_path_default(config, path, (tern_val){.ptrval = def}, TVAL_PTR).ptrval;
+	uint32_t len = strlen(curstr);
+	uint32_t buffer_len = len > 100 ? len + 1 : 101;
+	char *buffer = malloc(buffer_len);
+	memcpy(buffer, curstr, len);
+	memset(buffer+len, 0, buffer_len-len);
+	nk_edit_string(context, NK_EDIT_SIMPLE, buffer, &len, buffer_len-1, nk_filter_default);
+	buffer[len] = 0;
+	if (strcmp(buffer, curstr)) {
+		config_dirty = 1;
+		config = tern_insert_path(config, path, (tern_val){.ptrval = strdup(buffer)}, TVAL_PTR);
+	}
+	free(buffer);
+}
+
 void settings_int_property(struct nk_context *context, char *label, char *name, char *path, int def, int min, int max)
 {
 	char *curstr = tern_find_path(config, path, TVAL_PTR).ptrval;
@@ -1438,6 +1609,21 @@
 	}
 }
 
+void settings_float_property(struct nk_context *context, char *label, char *name, char *path, float def, float min, float max, float step)
+{
+	char *curstr = tern_find_path(config, path, TVAL_PTR).ptrval;
+	float curval = curstr ? atof(curstr) : def;
+	nk_label(context, label, NK_TEXT_LEFT);
+	float val = curval;
+	nk_property_float(context, name, min, &val, max, step, step);
+	if (val != curval) {
+		char buffer[64];
+		sprintf(buffer, "%f", val);
+		config_dirty = 1;
+		config = tern_insert_path(config, path, (tern_val){.ptrval = strdup(buffer)}, TVAL_PTR);
+	}
+}
+
 typedef struct {
 	char *fragment;
 	char *vertex;
@@ -1466,7 +1652,7 @@
 			if (!dupe) {
 				if (num_progs == prog_storage) {
 					prog_storage = prog_storage ? prog_storage*2 : 4;
-					progs = realloc(progs, sizeof(progs) * prog_storage);
+					progs = realloc(progs, sizeof(*progs) * prog_storage);
 				}
 				progs[num_progs].vertex = NULL;
 				progs[num_progs++].fragment = strdup(entries[i].name); 
@@ -1512,8 +1698,13 @@
 		progs = NULL;
 		prog_storage = 0;
 	}
+#ifdef DATA_PATH
+	shader_dir = path_append(DATA_PATH, "shaders");
+#else
 	shader_dir = path_append(get_exe_dir(), "shaders");
+#endif
 	entries = get_dir_list(shader_dir, &num_entries);
+	free(shader_dir);
 	progs = get_shader_progs(entries, num_entries, progs, &num_progs, &prog_storage);
 	*num_out = num_progs;
 	return progs;
@@ -1649,13 +1840,24 @@
 		"128",
 		"64"
 	};
+	const char *dac[] = {
+		"zero_offset",
+		"linear"
+	};
+	const char *dac_desc[] = {
+		"Zero Offset",
+		"Linear"
+	};
 	const uint32_t num_rates = sizeof(rates)/sizeof(*rates);
 	const uint32_t num_sizes = sizeof(sizes)/sizeof(*sizes);
+	const uint32_t num_dacs = sizeof(dac)/sizeof(*dac);
 	static int32_t selected_rate = -1;
 	static int32_t selected_size = -1;
-	if (selected_rate < 0 || selected_size < 0) {
+	static int32_t selected_dac = -1;
+	if (selected_rate < 0 || selected_size < 0 || selected_dac < 0) {
 		selected_rate = find_match(rates, num_rates, "autio\0rate\0", "48000");
 		selected_size = find_match(sizes, num_sizes, "audio\0buffer\0", "512");
+		selected_dac = find_match(dac, num_dacs, "audio\0fm_dac\0", "zero_offset");
 	}
 	uint32_t width = render_width();
 	uint32_t height = render_height();
@@ -1668,12 +1870,60 @@
 		selected_rate = settings_dropdown(context, "Rate in Hz", rates, num_rates, selected_rate, "audio\0rate\0");
 		selected_size = settings_dropdown(context, "Buffer Samples", sizes, num_sizes, selected_size, "audio\0buffer\0");
 		settings_int_input(context, "Lowpass Cutoff Hz", "audio\0lowpass_cutoff\0", "3390");
+		settings_float_property(context, "Gain (dB)", "Overall", "audio\0gain\0", 0, -30.0f, 30.0f, 0.5f);
+		settings_float_property(context, "", "FM", "audio\0fm_gain\0", 0, -30.0f, 30.0f, 0.5f);
+		settings_float_property(context, "", "PSG", "audio\0psg_gain\0", 0, -30.0f, 30.0f, 0.5f);
+		selected_dac = settings_dropdown_ex(context, "FM DAC", dac, dac_desc, num_dacs, selected_dac, "audio\0fm_dac\0");
 		if (nk_button_label(context, "Back")) {
 			pop_view();
 		}
 		nk_end(context);
 	}
 }
+typedef struct {
+	const char **models;
+	const char **names;
+	uint32_t   num_models;
+	uint32_t   storage;
+} model_foreach_state;
+void model_iter(char *key, tern_val val, uint8_t valtype, void *data)
+{
+	if (valtype != TVAL_NODE) {
+		return;
+	}
+	model_foreach_state *state = data;
+	if (state->num_models == state->storage) {
+		state->storage *= 2;
+		state->models = realloc(state->models, state->storage * sizeof(char *));
+		state->names = realloc(state->names, state->storage * sizeof(char *));
+	}
+	char *def = strdup(key);
+	state->models[state->num_models] = def;
+	state->names[state->num_models++] = tern_find_ptr_default(val.ptrval, "name", def);
+}
+
+typedef struct {
+	const char **models;
+	const char **names;
+} models;
+
+models get_models(uint32_t *num_out)
+{
+	tern_node *systems = get_systems_config();
+	model_foreach_state state = {
+		.models = calloc(4, sizeof(char *)),
+		.names = calloc(4, sizeof(char *)),
+		.num_models = 0,
+		.storage = 4
+	};
+	tern_foreach(systems, model_iter, &state);
+	*num_out = state.num_models;
+	return (models){
+		.models = state.models,
+		.names = state.names
+	};
+}
+
 void view_system_settings(struct nk_context *context)
 {
 	const char *sync_opts[] = {
@@ -1696,12 +1946,25 @@
 	if (selected_region < 0) {
 		selected_region = find_match(region_codes, num_regions, "system\0default_region\0", "U");
 	}
+	static const char **model_opts;
+	static const char **model_names;
+	static uint32_t num_models;
+	if (!model_opts) {
+		models m = get_models(&num_models);
+		model_opts = m.models;
+		model_names = m.names;
+	}
+	static int32_t selected_model = -1;
+	if (selected_model < 0) {
+		selected_model = find_match(model_opts, num_models, "system\0model\0", "md1va3");
+	}
+	
 	const char *formats[] = {
 		"native",
 		"gst"
 	};
 	const uint32_t num_formats = sizeof(formats)/sizeof(*formats);
-	int32_t selected_format = -1;
+	static int32_t selected_format = -1;
 	if (selected_format < 0) {
 		selected_format = find_match(formats, num_formats, "ui\0state_format\0", "native");
 	}
@@ -1715,6 +1978,7 @@
 		selected_init = find_match(ram_inits, num_inits, "system\0ram_init\0", "zero");
 	}
 	const char *io_opts_1[] = {
+		"none",
 		"gamepad2.1",
 		"gamepad3.1",
 		"gamepad6.1",
@@ -1723,6 +1987,7 @@
 		"xband keyboard"
 	};
 	const char *io_opts_2[] = {
+		"none",
 		"gamepad2.2",
 		"gamepad3.2",
 		"gamepad6.2",
@@ -1743,14 +2008,19 @@
 	uint32_t desired_width = context->style.font->height * 10;
 	if (nk_begin(context, "System Settings", nk_rect(0, 0, width, height), 0)) {
 		nk_layout_row_static(context, context->style.font->height, desired_width, 2);
+		
+		selected_model = settings_dropdown_ex(context, "Model", model_opts, model_names, num_models, selected_model, "system\0model\0");
+		selected_io_1 = settings_dropdown_ex(context, "IO Port 1 Device", io_opts_1, device_type_names, num_io, selected_io_1, "io\0devices\0""1\0");
+		selected_io_2 = settings_dropdown_ex(context, "IO Port 2 Device", io_opts_2, device_type_names, num_io, selected_io_2, "io\0devices\0""2\0");
+		selected_region = settings_dropdown_ex(context, "Default Region", region_codes, regions, num_regions, selected_region, "system\0default_region\0");
 		selected_sync = settings_dropdown(context, "Sync Source", sync_opts, num_sync_opts, selected_sync, "system\0sync_source\0");
 		settings_int_property(context, "68000 Clock Divider", "", "clocks\0m68k_divider\0", 7, 1, 53);
-		settings_toggle(context, "Remember ROM Path", "ui\0remember_path\0", 1);
-		selected_region = settings_dropdown_ex(context, "Default Region", region_codes, regions, num_regions, selected_region, "system\0default_region\0");
 		selected_format = settings_dropdown(context, "Save State Format", formats, num_formats, selected_format, "ui\0state_format\0");
 		selected_init = settings_dropdown(context, "Initial RAM Value", ram_inits, num_inits, selected_init, "system\0ram_init\0");
-		selected_io_1 = settings_dropdown_ex(context, "IO Port 1 Device", io_opts_1, device_type_names, num_io, selected_io_1, "io\0devices\0""1\0");
-		selected_io_2 = settings_dropdown_ex(context, "IO Port 2 Device", io_opts_2, device_type_names, num_io, selected_io_2, "io\0devices\0""2\0");
+		settings_toggle(context, "Remember ROM Path", "ui\0remember_path\0", 1);
+		settings_toggle(context, "Save config with EXE", "ui\0config_in_exe_dir\0", 0);
+		settings_string(context, "Game Save Path", "ui\0save_path\0", "$USERDATA/blastem/$ROMNAME");
+		
 		if (nk_button_label(context, "Back")) {
 			pop_view();
 		}
@@ -1758,6 +2028,29 @@
 	}
 }
 
+void view_confirm_reset(struct nk_context *context)
+{
+	if (nk_begin(context, "Reset Confirm", nk_rect(0, 0, render_width(), render_height()), 0)) {
+		uint32_t desired_width = context->style.font->height * 20;
+		nk_layout_row_static(context, context->style.font->height, desired_width, 1);
+		nk_label(context, "This will reset all settings and controller", NK_TEXT_LEFT);
+		nk_label(context, "mappings back to the defaults.", NK_TEXT_LEFT);
+		nk_label(context, "Are you sure you want to proceed?", NK_TEXT_LEFT);
+		nk_layout_row_static(context, context->style.font->height * 1.5, desired_width / 2, 2);
+		if (nk_button_label(context, "Maybe not")) {
+			pop_view();
+		}
+		if (nk_button_label(context, "Yep, delete it all")) {
+			delete_custom_config();
+			config = load_config();
+			delete_controller_info();
+			config_dirty = 1;
+			pop_view();
+		}
+		nk_end(context);
+	}
+}
+
 void view_back(struct nk_context *context)
 {
 	pop_view();
@@ -1773,6 +2066,7 @@
 		{"Video", view_video_settings},
 		{"Audio", view_audio_settings},
 		{"System", view_system_settings},
+		{"Reset to Defaults", view_confirm_reset},
 		{"Back", view_back}
 	};
 	
@@ -1825,13 +2119,22 @@
 	if (current_view != view_play) {
 		nk_input_end(context);
 		current_view(context);
-		nk_sdl_render(NK_ANTI_ALIASING_ON, 512 * 1024, 128 * 1024);
+		if (fb_context) {
+			fb_context->fb.pixels = render_get_framebuffer(FRAMEBUFFER_UI, &fb_context->fb.pitch);
+			nk_rawfb_render(fb_context, nk_rgb(0,0,0), 0);
+			render_framebuffer_updated(FRAMEBUFFER_UI, render_width());
+		} else {
+#ifndef DISABLE_OPENGL
+			nk_sdl_render(NK_ANTI_ALIASING_ON, 512 * 1024, 128 * 1024);
+#endif
+		}
 		nk_input_begin(context);
 	}
 }
 
 void ui_idle_loop(void)
 {
+	render_enable_gamepad_events(1);
 	const uint32_t MIN_UI_DELAY = 15;
 	static uint32_t last;
 	while (current_view != view_play)
@@ -1848,6 +2151,7 @@
 		persist_config(config);
 		config_dirty = 0;
 	}
+	render_enable_gamepad_events(0);
 }
 static void handle_event(SDL_Event *event)
 {
@@ -1866,15 +2170,29 @@
 			axis_moved = event->jaxis.axis;
 			axis_value = event->jaxis.value;
 		}
+	} else if (event->type == SDL_MOUSEBUTTONDOWN && event->button.button == 0) {
+		click = 1;
+	} else if (event->type == SDL_MOUSEBUTTONUP && event->button.button == 0) {
+		click = 0;
 	}
 	nk_sdl_handle_event(event);
 }
 
 static void context_destroyed(void)
 {
-	nk_sdl_shutdown();
+	if (context)
+	{
+		nk_sdl_shutdown();
+		context = NULL;
+	}
 }
 
+static void fb_resize(void)
+{
+	nk_rawfb_resize_fb(fb_context, NULL, render_width(), render_height(), 0);
+}
+
+#ifndef DISABLE_OPENGL
 static struct nk_image load_image_texture(uint32_t *buf, uint32_t width, uint32_t height)
 {
 	GLuint tex;
@@ -1891,11 +2209,29 @@
 #endif
 	return nk_image_id((int)tex);
 }
+#endif
+
+static struct nk_image load_image_rawfb(uint32_t *buf, uint32_t width, uint32_t height)
+{
+	struct rawfb_image *fbimg = calloc(1, sizeof(struct rawfb_image));
+	fbimg->pixels = buf;
+	fbimg->pitch = width * sizeof(uint32_t);
+	fbimg->w = width;
+	fbimg->h = height;
+	fbimg->format = NK_FONT_ATLAS_RGBA32;
+	return nk_image_ptr(fbimg);
+}
 
 static void texture_init(void)
 {
 	struct nk_font_atlas *atlas;
-	nk_sdl_font_stash_begin(&atlas);
+	if (fb_context) {
+		nk_rawfb_font_stash_begin(fb_context, &atlas);
+	} else {
+#ifndef DISABLE_OPENGL
+		nk_sdl_font_stash_begin(&atlas);
+#endif
+	}
 	uint32_t font_size;
 	uint8_t *font = default_font(&font_size);
 	if (!font) {
@@ -1903,33 +2239,73 @@
 	}
 	def_font = nk_font_atlas_add_from_memory(atlas, font, font_size, render_height() / 16, NULL);
 	free(font);
-	nk_sdl_font_stash_end();
+	if (fb_context) {
+		nk_rawfb_font_stash_end(fb_context);
+	} else {
+#ifndef DISABLE_OPENGL
+		nk_sdl_font_stash_end();
+#endif
+	}
 	nk_style_set_font(context, &def_font->handle);
 	for (uint32_t i = 0; i < num_ui_images; i++)
 	{
-		ui_images[i]->ui = load_image_texture(ui_images[i]->image_data, ui_images[i]->width, ui_images[i]->height);
+#ifndef DISABLE_OPENGL
+		if (fb_context) {
+#endif
+			ui_images[i]->ui = load_image_rawfb(ui_images[i]->image_data, ui_images[i]->width, ui_images[i]->height);
+#ifndef DISABLE_OPENGL
+		} else {
+			ui_images[i]->ui = load_image_texture(ui_images[i]->image_data, ui_images[i]->width, ui_images[i]->height);
+		}
+#endif
 	}
 }
 
+static void style_init(void)
+{
+	context->style.checkbox.padding.x = render_height() / 120;
+	context->style.checkbox.padding.y = render_height() / 120;
+	context->style.checkbox.border = render_height() / 240;
+	context->style.checkbox.cursor_normal.type = NK_STYLE_ITEM_COLOR;
+	context->style.checkbox.cursor_normal.data.color = (struct nk_color){
+		.r = 255, .g = 128, .b = 0, .a = 255
+	};
+	context->style.checkbox.cursor_hover = context->style.checkbox.cursor_normal;
+	context->style.property.inc_button.text_hover = (struct nk_color){
+		.r = 255, .g = 128, .b = 0, .a = 255
+	};
+	context->style.property.dec_button.text_hover = context->style.property.inc_button.text_hover;
+	context->style.combo.button.text_hover = context->style.property.inc_button.text_hover;
+}
+
 static void context_created(void)
 {
 	context = nk_sdl_init(render_get_window());
+	nk_sdl_device_create();
+	style_init();
 	texture_init();
 }
 
 void show_pause_menu(void)
 {
-	set_content_binding_state(0);
-	context->style.window.background = nk_rgba(0, 0, 0, 128);
-	context->style.window.fixed_background = nk_style_item_color(nk_rgba(0, 0, 0, 128));
-	current_view = view_pause;
-	current_system->request_exit(current_system);
+	if (current_view == view_play) {
+		set_content_binding_state(0);
+		context->style.window.background = nk_rgba(0, 0, 0, 128);
+		context->style.window.fixed_background = nk_style_item_color(nk_rgba(0, 0, 0, 128));
+		current_view = view_pause;
+		context->input.selected_widget = 0;
+		system_request_exit(current_system, 1);
+	} else if (current_system && !set_binding) {
+		clear_view_stack();
+		show_play_view();
+	}
 }
 
 void show_play_view(void)
 {
 	set_content_binding_state(1);
 	current_view = view_play;
+	context->input.selected_widget = 0;
 }
 
 static uint8_t active;
@@ -1940,10 +2316,10 @@
 
 uint8_t is_nuklear_available(void)
 {
-	if (!render_has_gl()) {
+	/*if (!render_has_gl()) {
 		//currently no fallback if GL2 unavailable
 		return 0;
-	}
+	}*/
 	char *style = tern_find_path(config, "ui\0style\0", TVAL_PTR).ptrval;
 	if (!style) {
 		return 1;
@@ -1993,10 +2369,23 @@
 void blastem_nuklear_init(uint8_t file_loaded)
 {
 	context = nk_sdl_init(render_get_window());
+#ifndef DISABLE_OPENGL
+	if (render_has_gl()) {
+		nk_sdl_device_create();
+	} else {
+#endif
+		fb_context = nk_rawfb_init(NULL, context, render_width(), render_height(), 0);
+		render_set_ui_fb_resize_handler(fb_resize);
+#ifndef DISABLE_OPENGL
+	}
+#endif
+	style_init();
 	
 	controller_360 = load_ui_image("images/360.png");
 	controller_ps4 = load_ui_image("images/ps4.png");
 	controller_ps4_6b = load_ui_image("images/ps4_6b.png");
+	controller_wiiu = load_ui_image("images/wiiu.png");
+	controller_gen_6b = load_ui_image("images/genesis_6b.png");
 	
 	texture_init();
 	
--- a/nuklear_ui/font.c	Sat Jan 05 00:58:08 2019 -0800
+++ b/nuklear_ui/font.c	Sat Jan 15 13:15:21 2022 -0800
@@ -1,12 +1,21 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <stdint.h>
+#include <string.h>
 #include "../util.h"
 #include "sfnt.h"
 
 char *default_font_path(void)
 {
-	FILE *fc_pipe = popen("fc-match -f '%{file}'", "r");
+#ifdef FONT_PATH
+	FILE *f = fopen(FONT_PATH, "rb");
+	if (f) {
+		fclose(f);
+		return strdup(FONT_PATH);
+	}
+#endif
+	//TODO: specify language dynamically once BlastEm is localized
+	FILE *fc_pipe = popen("fc-match :lang=en -f '%{file}'", "r");
 	if (!fc_pipe) {
 		return NULL;
 	}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/nuklear_ui/font_android.c	Sat Jan 15 13:15:21 2022 -0800
@@ -0,0 +1,194 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <limits.h>
+#include "../util.h"
+#include "../paths.h"
+#include "sfnt.h"
+
+typedef enum {
+	STATE_DEFAULT,
+	STATE_DECL,
+	STATE_COMMENT,
+	STATE_TAG,
+	STATE_PRE_ATTRIB,
+	STATE_ATTRIB,
+	STATE_PRE_VALUE,
+	STATE_VALUE
+} parse_state;
+
+#define DEFAULT_WEIGHT 400
+
+char *default_font_path(void)
+{
+	//Would probably be better to call into Java for this, but this should do for now
+	FILE *f = fopen("/system/etc/fonts.xml", "rb");
+	if (!f) {
+		return NULL;
+	}
+	long size = file_size(f);
+	char *font_xml = malloc(size+1);
+	if (size != fread(font_xml, 1, size, f)) {
+		free(font_xml);
+		fclose(f);
+		return NULL;
+	}
+	fclose(f);
+	font_xml[size] = 0;
+	
+	char *last_tag = NULL, *last_attrib = NULL, *last_value = NULL;
+	uint8_t last_style_was_normal = 0;
+	char *capture_best = NULL;
+	char *best = NULL;
+	int best_weight_diff = INT_MAX;
+	int last_weight = INT_MAX;
+	parse_state state = STATE_DEFAULT;
+	for(char *cur = font_xml; *cur; ++cur) {
+		switch (state)
+		{
+		case STATE_DEFAULT:
+			if (*cur == '<' && cur[1]) {
+				cur++;
+				switch(*cur)
+				{
+				case '?':
+					state = STATE_DECL;
+					break;
+				case '!':
+					if (cur[1] == '-' && cur[2] == '-') {
+						state = STATE_COMMENT;
+						cur++;
+					} else {
+						debug_message("Invalid comment\n");
+						cur = font_xml + size - 1;
+					}
+					break;
+				default:
+					if (capture_best) {
+						cur[-1] = 0;
+						best = strip_ws(capture_best);
+						capture_best = NULL;
+						best_weight_diff = abs(last_weight - DEFAULT_WEIGHT);
+						debug_message("Found candidate %s with weight %d\n", best, last_weight);
+					}
+					state = STATE_TAG;
+					last_tag = cur;
+					last_attrib = NULL;
+					last_value = NULL;
+					last_weight = INT_MAX;
+					break;
+				}
+			}
+			break;
+		case STATE_DECL:
+			if (*cur == '?' && cur[1] == '>') {
+				cur++;
+				state = STATE_DEFAULT;
+			}
+			break;
+		case STATE_COMMENT:
+			if (*cur == '-' && cur[1] == '-' && cur[2] == '>') {
+				cur += 2;
+				state = STATE_DEFAULT;
+			}
+			break;
+		case STATE_TAG:
+			if (*cur == ' ' || *cur == '\t' || *cur == '\n' || *cur == '\r') {
+				*cur = 0;
+				state = STATE_PRE_ATTRIB;
+			} else if (*cur == '>') {
+				*cur = 0;
+				state = STATE_DEFAULT;
+			}
+			break;
+		case STATE_PRE_ATTRIB:
+			if (!(*cur == ' ' || *cur == '\t' || *cur == '\n' || *cur == '\r')) {
+				if (*cur == '>') {
+					state = STATE_DEFAULT;
+					if (last_style_was_normal && abs(last_weight - DEFAULT_WEIGHT) < best_weight_diff) {
+						capture_best = cur + 1;
+					} else if (best && !strcmp("/family", last_tag)) {
+						debug_message("found family close tag, stopping search\n");
+						cur = font_xml + size - 1;
+					}
+				} else {
+					last_attrib = cur;
+					state = STATE_ATTRIB;
+				}
+			}
+			break;
+		case STATE_ATTRIB:
+			if (*cur == '=') {
+				*cur = 0;
+				state = STATE_PRE_VALUE;
+			} else if (*cur == ' ' || *cur == '\t' || *cur == '\n' || *cur == '\r') {
+				*cur = 0;
+			}
+			break;
+		case STATE_PRE_VALUE:
+			if (*cur == '"') {
+				state = STATE_VALUE;
+				last_value = cur + 1;
+			}
+			break;
+		case STATE_VALUE:
+			if (*cur == '"') {
+				*cur = 0;
+				state = STATE_PRE_ATTRIB;
+				if (!strcmp("weight", last_attrib)) {
+					last_weight = atoi(last_value);
+				} else if (!strcmp("style", last_attrib)) {
+					last_style_was_normal = !strcmp("normal", last_value);
+				}
+			}
+			break;
+		}
+	}
+	if (best) {
+		best = path_append("/system/fonts", best);
+	}
+	free(font_xml);
+	return best;
+}
+
+static uint8_t *try_load_font(char *path, uint32_t *size_out)
+{
+	debug_message("Trying to load font %s\n", path);
+	FILE *f = fopen(path, "rb");
+	free(path);
+	if (!f) {
+		return NULL;
+	}
+	long size = file_size(f);
+	uint8_t *buffer = malloc(size);
+	if (size != fread(buffer, 1, size, f)) {
+		fclose(f);
+		return NULL;
+	}
+	fclose(f);
+	sfnt_container *sfnt = load_sfnt(buffer, size);
+	if (!sfnt) {
+		free(buffer);
+		return NULL;
+	}
+	return sfnt_flatten(sfnt->tables, size_out);
+}
+
+uint8_t *default_font(uint32_t *size_out)
+{
+	char *path = default_font_path();
+	if (!path) {
+		goto error;
+	}
+	uint8_t *ret = try_load_font(path, size_out);
+	if (ret) {
+		return ret;
+	}
+error:
+	//try some likely suspects if we failed to parse fonts.xml or failed to find the indicated font
+	ret = try_load_font("/system/fonts/Roboto-Regular.ttf", size_out);
+	if (!ret) {
+		ret = try_load_font("/system/fonts/DroidSans.ttf", size_out);
+	}
+	return ret;
+}
--- a/nuklear_ui/nuklear.h	Sat Jan 05 00:58:08 2019 -0800
+++ b/nuklear_ui/nuklear.h	Sat Jan 15 13:15:21 2022 -0800
@@ -1845,7 +1845,8 @@
     NK_WIDGET_STATE_HOVERED     = NK_WIDGET_STATE_HOVER|NK_WIDGET_STATE_MODIFIED, /* widget is being hovered */
     NK_WIDGET_STATE_ACTIVE      = NK_WIDGET_STATE_ACTIVED|NK_WIDGET_STATE_MODIFIED /* widget is currently activated */
 };
-NK_API enum nk_widget_layout_states nk_widget(struct nk_rect*, const struct nk_context*);
+NK_API enum nk_widget_layout_states nk_widget(struct nk_rect*, struct nk_context*);
+NK_API enum nk_widget_layout_states nk_keynav_widget(struct nk_rect *, struct nk_context *);
 NK_API enum nk_widget_layout_states nk_widget_fitting(struct nk_rect*, struct nk_context*, struct nk_vec2);
 NK_API struct nk_rect nk_widget_bounds(struct nk_context*);
 NK_API struct nk_vec2 nk_widget_position(struct nk_context*);
@@ -3192,6 +3193,8 @@
 struct nk_input {
     struct nk_keyboard keyboard;
     struct nk_mouse mouse;
+	int widget_counter;
+	int selected_widget;
 };
 
 NK_API int nk_input_has_mouse_click(const struct nk_input*, enum nk_buttons);
@@ -11365,10 +11368,13 @@
     NK_ASSERT(alloc);
 
     if (!image_memory || !width || !height || !config_list || !count) return nk_false;
+	int pixel_area_estimate = 0;
     for (config_iter = config_list; config_iter; config_iter = config_iter->next) {
         range_count = nk_range_count(config_iter->range);
         total_range_count += range_count;
-        total_glyph_count += nk_range_glyph_count(config_iter->range, range_count);
+		int glyphs = nk_range_glyph_count(config_iter->range, range_count);
+        total_glyph_count += glyphs;
+		pixel_area_estimate += glyphs * config_iter->size * config_iter->size;
     }
 
     /* setup font baker from temporary memory */
@@ -11379,7 +11385,13 @@
     }
 
     *height = 0;
-    *width = (total_glyph_count > 1000) ? 1024 : 512;
+	int width_estimate = sqrt(pixel_area_estimate) + 0.5;
+	*width = 128;
+	while (*width < width_estimate)
+	{
+		*width *= 2;
+	}
+    //*width = (total_glyph_count > 1000) ? 1024 : 512;
     nk_tt_PackBegin(&baker->spc, 0, (int)*width, (int)max_height, 0, 1, alloc);
     {
         int input_i = 0;
@@ -12556,6 +12568,7 @@
     in->mouse.delta.y = 0;
     for (i = 0; i < NK_KEY_MAX; i++)
         in->keyboard.keys[i].clicked = 0;
+	in->widget_counter = -1;
 }
 
 NK_API void
@@ -12680,9 +12693,11 @@
     const struct nk_mouse_button *btn;
     if (!i) return nk_false;
     btn = &i->mouse.buttons[id];
-    if (!NK_INBOX(btn->clicked_pos.x,btn->clicked_pos.y,b.x,b.y,b.w,b.h))
-        return nk_false;
-    return nk_true;
+    if (NK_INBOX(btn->clicked_pos.x,btn->clicked_pos.y,b.x,b.y,b.w,b.h))
+        return nk_true;
+	if (i->selected_widget == i->widget_counter && i->keyboard.keys[NK_KEY_ENTER].clicked)
+		return nk_true;
+    return nk_false;
 }
 
 NK_API int
@@ -12730,7 +12745,7 @@
 nk_input_is_mouse_hovering_rect(const struct nk_input *i, struct nk_rect rect)
 {
     if (!i) return nk_false;
-    return NK_INBOX(i->mouse.pos.x, i->mouse.pos.y, rect.x, rect.y, rect.w, rect.h);
+    return i->selected_widget == i->widget_counter || NK_INBOX(i->mouse.pos.x, i->mouse.pos.y, rect.x, rect.y, rect.w, rect.h);
 }
 
 NK_API int
@@ -12752,7 +12767,9 @@
 nk_input_is_mouse_down(const struct nk_input *i, enum nk_buttons id)
 {
     if (!i) return nk_false;
-    return i->mouse.buttons[id].down;
+    return i->mouse.buttons[id].down || (
+		id == NK_BUTTON_LEFT && i->widget_counter == i->selected_widget && i->keyboard.keys[NK_KEY_ENTER].down
+	);
 }
 
 NK_API int
@@ -12763,6 +12780,11 @@
     b = &i->mouse.buttons[id];
     if (b->down && b->clicked)
         return nk_true;
+	if (
+		id == NK_BUTTON_LEFT && i->widget_counter == i->selected_widget 
+		&& i->keyboard.keys[NK_KEY_ENTER].down && i->keyboard.keys[NK_KEY_ENTER].clicked
+	) 
+		return nk_true;
     return nk_false;
 }
 
@@ -12770,7 +12792,14 @@
 nk_input_is_mouse_released(const struct nk_input *i, enum nk_buttons id)
 {
     if (!i) return nk_false;
-    return (!i->mouse.buttons[id].down && i->mouse.buttons[id].clicked);
+    if (!i->mouse.buttons[id].down && i->mouse.buttons[id].clicked)
+		return nk_true;
+	if (
+		id == NK_BUTTON_LEFT && i->widget_counter == i->selected_widget 
+		&& !i->keyboard.keys[NK_KEY_ENTER].down && i->keyboard.keys[NK_KEY_ENTER].clicked
+	) 
+		return nk_true;
+	return nk_false;
 }
 
 NK_API int
@@ -14598,7 +14627,7 @@
 NK_INTERN int
 nk_do_selectable(nk_flags *state, struct nk_command_buffer *out,
     struct nk_rect bounds, const char *str, int len, nk_flags align, int *value,
-    const struct nk_style_selectable *style, const struct nk_input *in,
+    const struct nk_style_selectable *style, struct nk_input *in,
     const struct nk_user_font *font)
 {
     int old_value;
@@ -14624,6 +14653,12 @@
     /* update button */
     if (nk_button_behavior(state, touch, in, NK_BUTTON_DEFAULT))
         *value = !(*value);
+		
+	if (!old_value && !(*value) && in && in->selected_widget == in->widget_counter) {
+		*value = 1;
+	} else if (!old_value && *value && in) {
+		in->selected_widget = in->widget_counter;
+	}
 
     /* draw selectable */
     if (style->draw_begin) style->draw_begin(out, style->userdata);
@@ -16149,8 +16184,18 @@
     nk_draw_property(out, style, &property, &label, *ws, name, name_len, font);
     if (style->draw_end) style->draw_end(out, style->userdata);
 
-    /* execute right button  */
-    if (nk_do_button_symbol(ws, out, left, style->sym_left, behavior, &style->dec_button, in, font)) {
+	int selected = in && in->selected_widget == in->widget_counter;
+	int enter_clicked;
+	if (selected) {
+		//prevent left/right buttons from activating when enter is pressed
+		enter_clicked = in->keyboard.keys[NK_KEY_ENTER].clicked;
+		in->keyboard.keys[NK_KEY_ENTER].clicked = 0;
+	}
+    /* execute left button  */
+    if (
+		nk_do_button_symbol(ws, out, left, style->sym_left, behavior, &style->dec_button, in, font)
+		|| (selected && in->keyboard.keys[NK_KEY_LEFT].clicked && in->keyboard.keys[NK_KEY_LEFT].down)
+	) {
         switch (variant->kind) {
         default: break;
         case NK_PROPERTY_INT:
@@ -16161,8 +16206,11 @@
             variant->value.d = NK_CLAMP(variant->min_value.d, variant->value.d - variant->step.d, variant->max_value.d); break;
         }
     }
-    /* execute left button  */
-    if (nk_do_button_symbol(ws, out, right, style->sym_right, behavior, &style->inc_button, in, font)) {
+    /* execute right button  */
+    if (
+		nk_do_button_symbol(ws, out, right, style->sym_right, behavior, &style->inc_button, in, font)
+		|| (selected && in->keyboard.keys[NK_KEY_RIGHT].clicked && in->keyboard.keys[NK_KEY_RIGHT].down)
+	) {
         switch (variant->kind) {
         default: break;
         case NK_PROPERTY_INT:
@@ -16173,6 +16221,9 @@
             variant->value.d = NK_CLAMP(variant->min_value.d, variant->value.d + variant->step.d, variant->max_value.d); break;
         }
     }
+	if (selected) {
+		in->keyboard.keys[NK_KEY_ENTER].clicked = enter_clicked;
+	}
     if (old != NK_PROPERTY_EDIT && (*state == NK_PROPERTY_EDIT)) {
         /* property has been activated so setup buffer */
         NK_MEMCPY(buffer, dst, (nk_size)*length);
@@ -17247,6 +17298,7 @@
 #ifdef NK_INCLUDE_VERTEX_BUFFER_OUTPUT
     nk_draw_list_init(&ctx->draw_list);
 #endif
+	ctx->input.widget_counter = -1;
 }
 
 #ifdef NK_INCLUDE_DEFAULT_ALLOCATOR
@@ -20226,7 +20278,7 @@
 }
 
 NK_API enum nk_widget_layout_states
-nk_widget(struct nk_rect *bounds, const struct nk_context *ctx)
+nk_widget_gen(struct nk_rect *bounds, struct nk_context *ctx, nk_byte is_keynav)
 {
     struct nk_rect c, v;
     struct nk_window *win;
@@ -20265,13 +20317,51 @@
     c.y = (float)((int)c.y);
     c.w = (float)((int)c.w);
     c.h = (float)((int)c.h);
+	int newly_selected = nk_false;
+	if (is_keynav) {
+		ctx->input.widget_counter++;
+		if (
+			ctx->input.selected_widget == (ctx->input.widget_counter + 1) && 
+			ctx->input.keyboard.keys[NK_KEY_UP].clicked && ctx->input.keyboard.keys[NK_KEY_UP].down
+		) {
+			ctx->input.selected_widget--;
+			newly_selected = nk_true;
+		} else if (
+			ctx->input.selected_widget == (ctx->input.widget_counter - 1) &&
+			ctx->input.keyboard.keys[NK_KEY_DOWN].clicked && ctx->input.keyboard.keys[NK_KEY_DOWN].down
+		) {
+			ctx->input.keyboard.keys[NK_KEY_DOWN].clicked = 0;
+			ctx->input.selected_widget++;
+			newly_selected = nk_true;
+		}
+	}
 
     nk_unify(&v, &c, bounds->x, bounds->y, bounds->x + bounds->w, bounds->y + bounds->h);
+	if (is_keynav && newly_selected) {
+		//ensure widget is fully on-screen if it was newly selected via a keyboard action
+		if ((bounds->y + bounds->h) > (c.y + c.h)) {
+			*layout->offset_y += bounds->y + bounds->h - (c.y + c.h);
+		} else if(c.y > bounds->y){
+			*layout->offset_y -= c.y - bounds->y;
+		}
+	}
     if (!NK_INTERSECT(c.x, c.y, c.w, c.h, bounds->x, bounds->y, bounds->w, bounds->h))
         return NK_WIDGET_INVALID;
-    if (!NK_INBOX(in->mouse.pos.x, in->mouse.pos.y, v.x, v.y, v.w, v.h))
-        return NK_WIDGET_ROM;
-    return NK_WIDGET_VALID;
+    if ((is_keynav && ctx->input.selected_widget == ctx->input.widget_counter ) || NK_INBOX(in->mouse.pos.x, in->mouse.pos.y, v.x, v.y, v.w, v.h))
+        return NK_WIDGET_VALID;
+    return NK_WIDGET_ROM;
+}
+
+NK_API enum nk_widget_layout_states
+nk_widget(struct nk_rect *bounds, struct nk_context *ctx)
+{
+	return nk_widget_gen(bounds, ctx, 0);
+}
+
+NK_API enum nk_widget_layout_states
+nk_keynav_widget(struct nk_rect *bounds, struct nk_context *ctx)
+{
+	return nk_widget_gen(bounds, ctx, 1);
 }
 
 NK_API enum nk_widget_layout_states
@@ -20294,7 +20384,7 @@
     win = ctx->current;
     style = &ctx->style;
     layout = win->layout;
-    state = nk_widget(bounds, ctx);
+    state = nk_keynav_widget(bounds, ctx);
 
     panel_padding = nk_panel_get_padding(style, layout->type);
     if (layout->row.index == 1) {
@@ -20615,13 +20705,17 @@
 
     win = ctx->current;
     layout = win->layout;
-    state = nk_widget(&bounds, ctx);
+    state = nk_keynav_widget(&bounds, ctx);
 
     if (!state) return 0;
     in = (state == NK_WIDGET_ROM || layout->flags & NK_WINDOW_ROM) ? 0 : &ctx->input;
-    return nk_do_button_text(&ctx->last_widget_state, &win->buffer, bounds,
+    int ret = nk_do_button_text(&ctx->last_widget_state, &win->buffer, bounds,
                     title, len, style->text_alignment, ctx->button_behavior,
                     style, in, ctx->style.font);
+	if (ctx->last_widget_state & NK_WIDGET_STATE_ENTERED) {
+		ctx->input.selected_widget = ctx->input.widget_counter;
+	}
+	return ret;
 }
 
 NK_API int
@@ -20661,7 +20755,7 @@
     win = ctx->current;
     layout = win->layout;
 
-    state = nk_widget(&bounds, ctx);
+    state = nk_keynav_widget(&bounds, ctx);
     if (!state) return 0;
     in = (state == NK_WIDGET_ROM || layout->flags & NK_WINDOW_ROM) ? 0 : &ctx->input;
 
@@ -20672,6 +20766,9 @@
     ret = nk_do_button(&ctx->last_widget_state, &win->buffer, bounds,
                 &button, in, ctx->button_behavior, &content);
     nk_draw_button(&win->buffer, &bounds, ctx->last_widget_state, &button);
+	if (ctx->last_widget_state & NK_WIDGET_STATE_ENTERED) {
+		ctx->input.selected_widget = ctx->input.widget_counter;
+	}
     return ret;
 }
 
@@ -20694,11 +20791,15 @@
 
     win = ctx->current;
     layout = win->layout;
-    state = nk_widget(&bounds, ctx);
+    state = nk_keynav_widget(&bounds, ctx);
     if (!state) return 0;
     in = (state == NK_WIDGET_ROM || layout->flags & NK_WINDOW_ROM) ? 0 : &ctx->input;
-    return nk_do_button_symbol(&ctx->last_widget_state, &win->buffer, bounds,
+    int ret = nk_do_button_symbol(&ctx->last_widget_state, &win->buffer, bounds,
             symbol, ctx->button_behavior, style, in, ctx->style.font);
+	if (ctx->last_widget_state & NK_WIDGET_STATE_ENTERED) {
+		ctx->input.selected_widget = ctx->input.widget_counter;
+	}
+	return ret;
 }
 
 NK_API int
@@ -20729,7 +20830,7 @@
     win = ctx->current;
     layout = win->layout;
 
-    state = nk_widget(&bounds, ctx);
+    state = nk_keynav_widget(&bounds, ctx);
     if (!state) return 0;
     in = (state == NK_WIDGET_ROM || layout->flags & NK_WINDOW_ROM) ? 0 : &ctx->input;
     return nk_do_button_image(&ctx->last_widget_state, &win->buffer, bounds,
@@ -20765,7 +20866,7 @@
     win = ctx->current;
     layout = win->layout;
 
-    state = nk_widget(&bounds, ctx);
+    state = nk_keynav_widget(&bounds, ctx);
     if (!state) return 0;
     in = (state == NK_WIDGET_ROM || layout->flags & NK_WINDOW_ROM) ? 0 : &ctx->input;
     return nk_do_button_text_symbol(&ctx->last_widget_state, &win->buffer, bounds,
@@ -20812,7 +20913,7 @@
     win = ctx->current;
     layout = win->layout;
 
-    state = nk_widget(&bounds, ctx);
+    state = nk_keynav_widget(&bounds, ctx);
     if (!state) return 0;
     in = (state == NK_WIDGET_ROM || layout->flags & NK_WINDOW_ROM) ? 0 : &ctx->input;
     return nk_do_button_text_image(&ctx->last_widget_state, &win->buffer,
@@ -20846,7 +20947,7 @@
 {
     struct nk_window *win;
     struct nk_panel *layout;
-    const struct nk_input *in;
+    struct nk_input *in;
     const struct nk_style *style;
 
     enum nk_widget_layout_states state;
@@ -20863,7 +20964,7 @@
     layout = win->layout;
     style = &ctx->style;
 
-    state = nk_widget(&bounds, ctx);
+    state = nk_keynav_widget(&bounds, ctx);
     if (!state) return 0;
     in = (state == NK_WIDGET_ROM || layout->flags & NK_WINDOW_ROM) ? 0 : &ctx->input;
     return nk_do_selectable(&ctx->last_widget_state, &win->buffer, bounds,
@@ -20948,7 +21049,7 @@
     style = &ctx->style;
     layout = win->layout;
 
-    state = nk_widget(&bounds, ctx);
+    state = nk_keynav_widget(&bounds, ctx);
     if (!state) return active;
     in = (state == NK_WIDGET_ROM || layout->flags & NK_WINDOW_ROM) ? 0 : &ctx->input;
     nk_do_toggle(&ctx->last_widget_state, &win->buffer, bounds, &active,
@@ -21423,7 +21524,7 @@
     win = ctx->current;
     layout = win->layout;
     style = &ctx->style;
-    s = nk_widget(&bounds, ctx);
+    s = nk_keynav_widget(&bounds, ctx);
     if (!s) return;
 
     /* calculate hash from name */
@@ -21473,6 +21574,7 @@
             ctx->input.mouse.grab = nk_true;
             ctx->input.mouse.grabbed = nk_true;
         }
+		ctx->input.selected_widget = ctx->input.widget_counter;
     }
     /* check if previously active property is now inactive */
     if (*state == NK_PROPERTY_DEFAULT && old_state != NK_PROPERTY_DEFAULT) {
@@ -22683,8 +22785,11 @@
             body.y = (panel->at_y + panel->footer_height + panel->border + padding.y + panel->row.height);
             body.h = (panel->bounds.y + panel->bounds.h) - body.y;
         }
+		int selected = ctx->input.selected_widget;
+		ctx->input.selected_widget = -1;
         {int pressed = nk_input_is_mouse_pressed(&ctx->input, NK_BUTTON_LEFT);
         int in_body = nk_input_is_mouse_hovering_rect(&ctx->input, body);
+		ctx->input.selected_widget = selected;
         if (pressed && in_body)
             popup->flags |= NK_WINDOW_HIDDEN;
         }
@@ -22757,7 +22862,7 @@
 
     win = ctx->current;
     style = &ctx->style;
-    s = nk_widget(&header, ctx);
+    s = nk_keynav_widget(&header, ctx);
     if (s == NK_WIDGET_INVALID)
         return 0;
 
@@ -23318,11 +23423,21 @@
     size.y = NK_MIN(size.y, (float)max_height);
     if (nk_combo_begin_label(ctx, items[selected], size)) {
         nk_layout_row_dynamic(ctx, (float)item_height, 1);
+		int main_item_widget = ctx->input.widget_counter;
         for (i = 0; i < count; ++i) {
-            if (nk_combo_item_label(ctx, items[i], NK_TEXT_LEFT))
+            if (nk_combo_item_label(ctx, items[i], NK_TEXT_LEFT)) {
                 selected = i;
+				ctx->input.selected_widget = main_item_widget;
+				//prevent below code from advancing selected widget
+				main_item_widget--;
+			}
         }
         nk_combo_end(ctx);
+		if (ctx->input.selected_widget <= main_item_widget) {
+			ctx->input.selected_widget = main_item_widget + 1;
+		} else if (ctx->input.selected_widget > main_item_widget + count) {
+			ctx->input.selected_widget = main_item_widget + count;
+		}
     }
     return selected;
 }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/nuklear_ui/nuklear_rawfb.h	Sat Jan 15 13:15:21 2022 -0800
@@ -0,0 +1,1007 @@
+/*
+ * MIT License
+ *
+ * Copyright (c) 2016-2017 Patrick Rudolph <siro@das-labor.org>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+*/
+/*
+ * ==============================================================
+ *
+ *                              API
+ *
+ * ===============================================================
+ */
+#ifndef NK_RAWFB_H_
+#define NK_RAWFB_H_
+
+struct rawfb_context;
+
+/* All functions are thread-safe */
+NK_API struct rawfb_context *nk_rawfb_init(void *fb, struct nk_context *context, const unsigned int w, const unsigned int h, const unsigned int pitch);
+NK_API void                  nk_rawfb_render(const struct rawfb_context *rawfb, const struct nk_color clear, const unsigned char enable_clear);
+NK_API void                  nk_rawfb_shutdown(struct rawfb_context *rawfb);
+NK_API void                  nk_rawfb_resize_fb(struct rawfb_context *rawfb, void *fb, const unsigned int w, const unsigned int h, const unsigned int pitch);
+
+#endif
+/*
+ * ==============================================================
+ *
+ *                          IMPLEMENTATION
+ *
+ * ===============================================================
+ */
+#ifdef NK_RAWFB_IMPLEMENTATION
+
+struct rawfb_image {
+    void *pixels;
+    int w, h, pitch;
+    enum nk_font_atlas_format format;
+};
+struct rawfb_context {
+    struct nk_context *ctx;
+    struct nk_rect scissors;
+    struct rawfb_image fb;
+    struct rawfb_image font_tex;
+    struct nk_font_atlas atlas;
+};
+
+#ifndef MIN
+#define MIN(a,b) ((a) < (b) ? (a) : (b))
+#endif
+#ifndef MAX
+#define MAX(a,b) ((a) < (b) ? (b) : (a))
+#endif
+
+static unsigned int
+nk_color_from_byte(const nk_byte *c)
+{
+    unsigned int res = 0;
+#if defined(RAWFB_RGBX_8888)
+    res |= (unsigned int)c[0] << 16;
+    res |= (unsigned int)c[1] << 8;
+    res |= (unsigned int)c[2] << 0;
+#elif defined(RAWFB_XRGB_8888)
+    res = ((unsigned int *)c)[0];
+#else
+#error Define one of RAWFB_RGBX_8888 , RAWFB_XRGB_8888
+#endif
+    return (res);
+}
+
+static void
+nk_rawfb_setpixel(const struct rawfb_context *rawfb,
+    const short x0, const short y0, const struct nk_color col)
+{
+    unsigned int c = nk_color_from_byte(&col.r);
+    unsigned char *pixels = rawfb->fb.pixels;
+    unsigned int *ptr;
+
+    pixels += y0 * rawfb->fb.pitch;
+    ptr = (unsigned int *)pixels;
+    ptr += x0;
+
+    if (y0 < rawfb->scissors.h && y0 >= rawfb->scissors.y &&
+        x0 >= rawfb->scissors.x && x0 < rawfb->scissors.w)
+        *ptr = c;
+}
+
+static void
+nk_rawfb_line_horizontal(const struct rawfb_context *rawfb,
+    const short x0, const short y, const short x1, const struct nk_color col)
+{
+    /* This function is called the most. Try to optimize it a bit...
+     * It does not check for scissors or image borders.
+     * The caller has to make sure it does no exceed bounds. */
+    unsigned int i, n;
+    unsigned int c[16];
+    unsigned char *pixels = rawfb->fb.pixels;
+    unsigned int *ptr;
+
+    pixels += y * rawfb->fb.pitch;
+    ptr = (unsigned int *)pixels;
+    ptr += x0;
+
+    n = x1 - x0;
+    for (i = 0; i < sizeof(c) / sizeof(c[0]); i++)
+        c[i] = nk_color_from_byte(&col.r);
+
+    while (n > 16) {
+        memcpy((void *)ptr, c, sizeof(c));
+        n -= 16; ptr += 16;
+    } for (i = 0; i < n; i++)
+        ptr[i] = c[i];
+}
+
+static void
+nk_rawfb_imagesetpixel(const struct rawfb_image *img,
+    const int x0, const int y0, const struct nk_color col)
+{
+    unsigned char *ptr;
+    NK_ASSERT(img);
+    if (y0 < img->h && y0 > 0 && x0 > 0 && x0 < img->w) {
+        ptr = img->pixels;
+        if (img->format == NK_FONT_ATLAS_ALPHA8) {
+            ptr += img->pitch * y0;
+            ptr[x0] = col.a;
+        } else {
+            ptr += img->pitch * y0;
+            ((struct nk_color *)ptr)[x0] = col;
+        }
+    }
+}
+
+static struct nk_color
+nk_image_getpixel(const struct rawfb_image *img, const int x0, const int y0)
+{
+    struct nk_color col = {0, 0, 0, 0};
+    unsigned char *ptr;
+    NK_ASSERT(img);
+    if (y0 < img->h && y0 > 0 && x0 > 0 && x0 < img->w) {
+        ptr = img->pixels;
+        if (img->format == NK_FONT_ATLAS_ALPHA8) {
+            ptr += img->pitch * y0;
+            col.a = ptr[x0];
+            col.b = col.g = col.r = 0xff;
+        } else {
+            ptr += img->pitch * y0;
+            col = ((struct nk_color *)ptr)[x0];
+        }
+    } return col;
+}
+
+static void
+nk_image_blendpixel(const struct rawfb_image *img,
+    const int x0, const int y0, struct nk_color col)
+{
+    struct nk_color col2;
+    unsigned char inv_a;
+    if (col.a == 0)
+        return;
+
+    inv_a = 0xff - col.a;
+    col2 = nk_image_getpixel(img, x0, y0);
+    col.r = (col.r * col.a + col2.r * inv_a) >> 8;
+    col.g = (col.g * col.a + col2.g * inv_a) >> 8;
+    col.b = (col.b * col.a + col2.b * inv_a) >> 8;
+    nk_rawfb_imagesetpixel(img, x0, y0, col);
+}
+
+static void
+nk_rawfb_scissor(struct rawfb_context *rawfb,
+                 const float x,
+                 const float y,
+                 const float w,
+                 const float h)
+{
+    rawfb->scissors.x = MIN(MAX(x, 0), rawfb->fb.w);
+    rawfb->scissors.y = MIN(MAX(y, 0), rawfb->fb.h);
+    rawfb->scissors.w = MIN(MAX(w + x, 0), rawfb->fb.w);
+    rawfb->scissors.h = MIN(MAX(h + y, 0), rawfb->fb.h);
+}
+
+static void
+nk_rawfb_stroke_line(const struct rawfb_context *rawfb,
+    short x0, short y0, short x1, short y1,
+    const unsigned int line_thickness, const struct nk_color col)
+{
+    short tmp;
+    int dy, dx, stepx, stepy;
+
+    dy = y1 - y0;
+    dx = x1 - x0;
+
+    /* fast path */
+    if (dy == 0) {
+        if (dx == 0 || y0 >= rawfb->scissors.h || y0 < rawfb->scissors.y)
+            return;
+
+        if (dx < 0) {
+            /* swap x0 and x1 */
+            tmp = x1;
+            x1 = x0;
+            x0 = tmp;
+        }
+        x1 = MIN(rawfb->scissors.w - 1, x1);
+        x0 = MIN(rawfb->scissors.w - 1, x0);
+        x1 = MAX(rawfb->scissors.x, x1);
+        x0 = MAX(rawfb->scissors.x, x0);
+        nk_rawfb_line_horizontal(rawfb, x0, y0, x1, col);
+        return;
+    }
+    if (dy < 0) {
+        dy = -dy;
+        stepy = -1;
+    } else stepy = 1;
+
+    if (dx < 0) {
+        dx = -dx;
+        stepx = -1;
+    } else stepx = 1;
+
+    dy <<= 1;
+    dx <<= 1;
+
+    nk_rawfb_setpixel(rawfb, x0, y0, col);
+    if (dx > dy) {
+        int fraction = dy - (dx >> 1);
+        while (x0 != x1) {
+            if (fraction >= 0) {
+                y0 += stepy;
+                fraction -= dx;
+            }
+            x0 += stepx;
+            fraction += dy;
+            nk_rawfb_setpixel(rawfb, x0, y0, col);
+        }
+    } else {
+        int fraction = dx - (dy >> 1);
+        while (y0 != y1) {
+            if (fraction >= 0) {
+                x0 += stepx;
+                fraction -= dy;
+            }
+            y0 += stepy;
+            fraction += dx;
+            nk_rawfb_setpixel(rawfb, x0, y0, col);
+        }
+    }
+}
+
+static void
+nk_rawfb_fill_polygon(const struct rawfb_context *rawfb,
+    const struct nk_vec2i *pnts, int count, const struct nk_color col)
+{
+    int i = 0;
+    #define MAX_POINTS 64
+    int left = 10000, top = 10000, bottom = 0, right = 0;
+    int nodes, nodeX[MAX_POINTS], pixelX, pixelY, j, swap ;
+
+    if (count == 0) return;
+    if (count > MAX_POINTS)
+        count = MAX_POINTS;
+
+    /* Get polygon dimensions */
+    for (i = 0; i < count; i++) {
+        if (left > pnts[i].x)
+            left = pnts[i].x;
+        if (right < pnts[i].x)
+            right = pnts[i].x;
+        if (top > pnts[i].y)
+            top = pnts[i].y;
+        if (bottom < pnts[i].y)
+            bottom = pnts[i].y;
+    } bottom++; right++;
+
+    /* Polygon scanline algorithm released under public-domain by Darel Rex Finley, 2007 */
+    /*  Loop through the rows of the image. */
+    for (pixelY = top; pixelY < bottom; pixelY ++) {
+        nodes = 0; /*  Build a list of nodes. */
+        j = count - 1;
+        for (i = 0; i < count; i++) {
+            if (((pnts[i].y < pixelY) && (pnts[j].y >= pixelY)) ||
+                ((pnts[j].y < pixelY) && (pnts[i].y >= pixelY))) {
+                nodeX[nodes++]= (int)((float)pnts[i].x
+                     + ((float)pixelY - (float)pnts[i].y) / ((float)pnts[j].y - (float)pnts[i].y)
+                     * ((float)pnts[j].x - (float)pnts[i].x));
+            } j = i;
+        }
+
+        /*  Sort the nodes, via a simple “Bubble” sort. */
+        i = 0;
+        while (i < nodes - 1) {
+            if (nodeX[i] > nodeX[i+1]) {
+                swap = nodeX[i];
+                nodeX[i] = nodeX[i+1];
+                nodeX[i+1] = swap;
+                if (i) i--;
+            } else i++;
+        }
+        /*  Fill the pixels between node pairs. */
+        for (i = 0; i < nodes; i += 2) {
+            if (nodeX[i+0] >= right) break;
+            if (nodeX[i+1] > left) {
+                if (nodeX[i+0] < left) nodeX[i+0] = left ;
+                if (nodeX[i+1] > right) nodeX[i+1] = right;
+                for (pixelX = nodeX[i]; pixelX < nodeX[i + 1]; pixelX++)
+                    nk_rawfb_setpixel(rawfb, pixelX, pixelY, col);
+            }
+        }
+    }
+    #undef MAX_POINTS
+}
+
+static void
+nk_rawfb_stroke_arc(const struct rawfb_context *rawfb,
+    short x0, short y0, short w, short h, const short s,
+    const short line_thickness, const struct nk_color col)
+{
+    /* Bresenham's ellipses - modified to draw one quarter */
+    const int a2 = (w * w) / 4;
+    const int b2 = (h * h) / 4;
+    const int fa2 = 4 * a2, fb2 = 4 * b2;
+    int x, y, sigma;
+
+    if (s != 0 && s != 90 && s != 180 && s != 270) return;
+    if (w < 1 || h < 1) return;
+
+    /* Convert upper left to center */
+    h = (h + 1) / 2;
+    w = (w + 1) / 2;
+    x0 += w; y0 += h;
+
+    /* First half */
+    for (x = 0, y = h, sigma = 2*b2+a2*(1-2*h); b2*x <= a2*y; x++) {
+        if (s == 180)
+            nk_rawfb_setpixel(rawfb, x0 + x, y0 + y, col);
+        else if (s == 270)
+            nk_rawfb_setpixel(rawfb, x0 - x, y0 + y, col);
+        else if (s == 0)
+            nk_rawfb_setpixel(rawfb, x0 + x, y0 - y, col);
+        else if (s == 90)
+            nk_rawfb_setpixel(rawfb, x0 - x, y0 - y, col);
+        if (sigma >= 0) {
+            sigma += fa2 * (1 - y);
+            y--;
+        } sigma += b2 * ((4 * x) + 6);
+    }
+
+    /* Second half */
+    for (x = w, y = 0, sigma = 2*a2+b2*(1-2*w); a2*y <= b2*x; y++) {
+        if (s == 180)
+            nk_rawfb_setpixel(rawfb, x0 + x, y0 + y, col);
+        else if (s == 270)
+            nk_rawfb_setpixel(rawfb, x0 - x, y0 + y, col);
+        else if (s == 0)
+            nk_rawfb_setpixel(rawfb, x0 + x, y0 - y, col);
+        else if (s == 90)
+            nk_rawfb_setpixel(rawfb, x0 - x, y0 - y, col);
+        if (sigma >= 0) {
+            sigma += fb2 * (1 - x);
+            x--;
+        } sigma += a2 * ((4 * y) + 6);
+    }
+}
+
+static void
+nk_rawfb_fill_arc(const struct rawfb_context *rawfb, short x0, short y0,
+    short w, short h, const short s, const struct nk_color col)
+{
+    /* Bresenham's ellipses - modified to fill one quarter */
+    const int a2 = (w * w) / 4;
+    const int b2 = (h * h) / 4;
+    const int fa2 = 4 * a2, fb2 = 4 * b2;
+    int x, y, sigma;
+    struct nk_vec2i pnts[3];
+    if (w < 1 || h < 1) return;
+    if (s != 0 && s != 90 && s != 180 && s != 270)
+        return;
+
+    /* Convert upper left to center */
+    h = (h + 1) / 2;
+    w = (w + 1) / 2;
+    x0 += w;
+    y0 += h;
+
+    pnts[0].x = x0;
+    pnts[0].y = y0;
+    pnts[2].x = x0;
+    pnts[2].y = y0;
+
+    /* First half */
+    for (x = 0, y = h, sigma = 2*b2+a2*(1-2*h); b2*x <= a2*y; x++) {
+        if (s == 180) {
+            pnts[1].x = x0 + x; pnts[1].y = y0 + y;
+        } else if (s == 270) {
+            pnts[1].x = x0 - x; pnts[1].y = y0 + y;
+        } else if (s == 0) {
+            pnts[1].x = x0 + x; pnts[1].y = y0 - y;
+        } else if (s == 90) {
+            pnts[1].x = x0 - x; pnts[1].y = y0 - y;
+        }
+        nk_rawfb_fill_polygon(rawfb, pnts, 3, col);
+        pnts[2] = pnts[1];
+        if (sigma >= 0) {
+            sigma += fa2 * (1 - y);
+            y--;
+        } sigma += b2 * ((4 * x) + 6);
+    }
+
+    /* Second half */
+    for (x = w, y = 0, sigma = 2*a2+b2*(1-2*w); a2*y <= b2*x; y++) {
+        if (s == 180) {
+            pnts[1].x = x0 + x; pnts[1].y = y0 + y;
+        } else if (s == 270) {
+            pnts[1].x = x0 - x; pnts[1].y = y0 + y;
+        } else if (s == 0) {
+            pnts[1].x = x0 + x; pnts[1].y = y0 - y;
+        } else if (s == 90) {
+            pnts[1].x = x0 - x; pnts[1].y = y0 - y;
+        }
+        nk_rawfb_fill_polygon(rawfb, pnts, 3, col);
+        pnts[2] = pnts[1];
+        if (sigma >= 0) {
+            sigma += fb2 * (1 - x);
+            x--;
+        } sigma += a2 * ((4 * y) + 6);
+    }
+}
+
+static void
+nk_rawfb_stroke_rect(const struct rawfb_context *rawfb,
+    const short x, const short y, const short w, const short h,
+    const short r, const short line_thickness, const struct nk_color col)
+{
+    if (r == 0) {
+        nk_rawfb_stroke_line(rawfb, x, y, x + w, y, line_thickness, col);
+        nk_rawfb_stroke_line(rawfb, x, y + h, x + w, y + h, line_thickness, col);
+        nk_rawfb_stroke_line(rawfb, x, y, x, y + h, line_thickness, col);
+        nk_rawfb_stroke_line(rawfb, x + w, y, x + w, y + h, line_thickness, col);
+    } else {
+        const short xc = x + r;
+        const short yc = y + r;
+        const short wc = (short)(w - 2 * r);
+        const short hc = (short)(h - 2 * r);
+
+        nk_rawfb_stroke_line(rawfb, xc, y, xc + wc, y, line_thickness, col);
+        nk_rawfb_stroke_line(rawfb, x + w, yc, x + w, yc + hc, line_thickness, col);
+        nk_rawfb_stroke_line(rawfb, xc, y + h, xc + wc, y + h, line_thickness, col);
+        nk_rawfb_stroke_line(rawfb, x, yc, x, yc + hc, line_thickness, col);
+
+        nk_rawfb_stroke_arc(rawfb, xc + wc - r, y,
+                (unsigned)r*2, (unsigned)r*2, 0 , line_thickness, col);
+        nk_rawfb_stroke_arc(rawfb, x, y,
+                (unsigned)r*2, (unsigned)r*2, 90 , line_thickness, col);
+        nk_rawfb_stroke_arc(rawfb, x, yc + hc - r,
+                (unsigned)r*2, (unsigned)r*2, 270 , line_thickness, col);
+        nk_rawfb_stroke_arc(rawfb, xc + wc - r, yc + hc - r,
+                (unsigned)r*2, (unsigned)r*2, 180 , line_thickness, col);
+    }
+}
+
+static void
+nk_rawfb_fill_rect(const struct rawfb_context *rawfb,
+    const short x, const short y, const short w, const short h,
+    const short r, const struct nk_color col)
+{
+    int i;
+    if (r == 0) {
+        for (i = 0; i < h; i++)
+            nk_rawfb_stroke_line(rawfb, x, y + i, x + w, y + i, 1, col);
+    } else {
+        const short xc = x + r;
+        const short yc = y + r;
+        const short wc = (short)(w - 2 * r);
+        const short hc = (short)(h - 2 * r);
+
+        struct nk_vec2i pnts[12];
+        pnts[0].x = x;
+        pnts[0].y = yc;
+        pnts[1].x = xc;
+        pnts[1].y = yc;
+        pnts[2].x = xc;
+        pnts[2].y = y;
+
+        pnts[3].x = xc + wc;
+        pnts[3].y = y;
+        pnts[4].x = xc + wc;
+        pnts[4].y = yc;
+        pnts[5].x = x + w;
+        pnts[5].y = yc;
+
+        pnts[6].x = x + w;
+        pnts[6].y = yc + hc;
+        pnts[7].x = xc + wc;
+        pnts[7].y = yc + hc;
+        pnts[8].x = xc + wc;
+        pnts[8].y = y + h;
+
+        pnts[9].x = xc;
+        pnts[9].y = y + h;
+        pnts[10].x = xc;
+        pnts[10].y = yc + hc;
+        pnts[11].x = x;
+        pnts[11].y = yc + hc;
+
+        nk_rawfb_fill_polygon(rawfb, pnts, 12, col);
+
+        nk_rawfb_fill_arc(rawfb, xc + wc - r, y,
+                (unsigned)r*2, (unsigned)r*2, 0 , col);
+        nk_rawfb_fill_arc(rawfb, x, y,
+                (unsigned)r*2, (unsigned)r*2, 90 , col);
+        nk_rawfb_fill_arc(rawfb, x, yc + hc - r,
+                (unsigned)r*2, (unsigned)r*2, 270 , col);
+        nk_rawfb_fill_arc(rawfb, xc + wc - r, yc + hc - r,
+                (unsigned)r*2, (unsigned)r*2, 180 , col);
+    }
+}
+
+static void
+nk_rawfb_fill_triangle(const struct rawfb_context *rawfb,
+    const short x0, const short y0, const short x1, const short y1,
+    const short x2, const short y2, const struct nk_color col)
+{
+    struct nk_vec2i pnts[3];
+    pnts[0].x = x0;
+    pnts[0].y = y0;
+    pnts[1].x = x1;
+    pnts[1].y = y1;
+    pnts[2].x = x2;
+    pnts[2].y = y2;
+    nk_rawfb_fill_polygon(rawfb, pnts, 3, col);
+}
+
+static void
+nk_rawfb_stroke_triangle(const struct rawfb_context *rawfb,
+    const short x0, const short y0, const short x1, const short y1,
+    const short x2, const short y2, const unsigned short line_thickness,
+    const struct nk_color col)
+{
+    nk_rawfb_stroke_line(rawfb, x0, y0, x1, y1, line_thickness, col);
+    nk_rawfb_stroke_line(rawfb, x1, y1, x2, y2, line_thickness, col);
+    nk_rawfb_stroke_line(rawfb, x2, y2, x0, y0, line_thickness, col);
+}
+
+static void
+nk_rawfb_stroke_polygon(const struct rawfb_context *rawfb,
+    const struct nk_vec2i *pnts, const int count,
+    const unsigned short line_thickness, const struct nk_color col)
+{
+    int i;
+    for (i = 1; i < count; ++i)
+        nk_rawfb_stroke_line(rawfb, pnts[i-1].x, pnts[i-1].y, pnts[i].x,
+                pnts[i].y, line_thickness, col);
+    nk_rawfb_stroke_line(rawfb, pnts[count-1].x, pnts[count-1].y,
+            pnts[0].x, pnts[0].y, line_thickness, col);
+}
+
+static void
+nk_rawfb_stroke_polyline(const struct rawfb_context *rawfb,
+    const struct nk_vec2i *pnts, const int count,
+    const unsigned short line_thickness, const struct nk_color col)
+{
+    int i;
+    for (i = 0; i < count-1; ++i)
+        nk_rawfb_stroke_line(rawfb, pnts[i].x, pnts[i].y,
+                 pnts[i+1].x, pnts[i+1].y, line_thickness, col);
+}
+
+static void
+nk_rawfb_fill_circle(const struct rawfb_context *rawfb,
+    short x0, short y0, short w, short h, const struct nk_color col)
+{
+    /* Bresenham's ellipses */
+    const int a2 = (w * w) / 4;
+    const int b2 = (h * h) / 4;
+    const int fa2 = 4 * a2, fb2 = 4 * b2;
+    int x, y, sigma;
+
+    /* Convert upper left to center */
+    h = (h + 1) / 2;
+    w = (w + 1) / 2;
+    x0 += w;
+    y0 += h;
+
+    /* First half */
+    for (x = 0, y = h, sigma = 2*b2+a2*(1-2*h); b2*x <= a2*y; x++) {
+        nk_rawfb_stroke_line(rawfb, x0 - x, y0 + y, x0 + x, y0 + y, 1, col);
+        nk_rawfb_stroke_line(rawfb, x0 - x, y0 - y, x0 + x, y0 - y, 1, col);
+        if (sigma >= 0) {
+            sigma += fa2 * (1 - y);
+            y--;
+        } sigma += b2 * ((4 * x) + 6);
+    }
+    /* Second half */
+    for (x = w, y = 0, sigma = 2*a2+b2*(1-2*w); a2*y <= b2*x; y++) {
+        nk_rawfb_stroke_line(rawfb, x0 - x, y0 + y, x0 + x, y0 + y, 1, col);
+        nk_rawfb_stroke_line(rawfb, x0 - x, y0 - y, x0 + x, y0 - y, 1, col);
+        if (sigma >= 0) {
+            sigma += fb2 * (1 - x);
+            x--;
+        } sigma += a2 * ((4 * y) + 6);
+    }
+}
+
+static void
+nk_rawfb_stroke_circle(const struct rawfb_context *rawfb,
+    short x0, short y0, short w, short h, const short line_thickness,
+    const struct nk_color col)
+{
+    /* Bresenham's ellipses */
+    const int a2 = (w * w) / 4;
+    const int b2 = (h * h) / 4;
+    const int fa2 = 4 * a2, fb2 = 4 * b2;
+    int x, y, sigma;
+
+    /* Convert upper left to center */
+    h = (h + 1) / 2;
+    w = (w + 1) / 2;
+    x0 += w;
+    y0 += h;
+
+    /* First half */
+    for (x = 0, y = h, sigma = 2*b2+a2*(1-2*h); b2*x <= a2*y; x++) {
+        nk_rawfb_setpixel(rawfb, x0 + x, y0 + y, col);
+        nk_rawfb_setpixel(rawfb, x0 - x, y0 + y, col);
+        nk_rawfb_setpixel(rawfb, x0 + x, y0 - y, col);
+        nk_rawfb_setpixel(rawfb, x0 - x, y0 - y, col);
+        if (sigma >= 0) {
+            sigma += fa2 * (1 - y);
+            y--;
+        } sigma += b2 * ((4 * x) + 6);
+    }
+    /* Second half */
+    for (x = w, y = 0, sigma = 2*a2+b2*(1-2*w); a2*y <= b2*x; y++) {
+        nk_rawfb_setpixel(rawfb, x0 + x, y0 + y, col);
+        nk_rawfb_setpixel(rawfb, x0 - x, y0 + y, col);
+        nk_rawfb_setpixel(rawfb, x0 + x, y0 - y, col);
+        nk_rawfb_setpixel(rawfb, x0 - x, y0 - y, col);
+        if (sigma >= 0) {
+            sigma += fb2 * (1 - x);
+            x--;
+        } sigma += a2 * ((4 * y) + 6);
+    }
+}
+
+static void
+nk_rawfb_stroke_curve(const struct rawfb_context *rawfb,
+    const struct nk_vec2i p1, const struct nk_vec2i p2,
+    const struct nk_vec2i p3, const struct nk_vec2i p4,
+    const unsigned int num_segments, const unsigned short line_thickness,
+    const struct nk_color col)
+{
+    unsigned int i_step, segments;
+    float t_step;
+    struct nk_vec2i last = p1;
+
+    segments = MAX(num_segments, 1);
+    t_step = 1.0f/(float)segments;
+    for (i_step = 1; i_step <= segments; ++i_step) {
+        float t = t_step * (float)i_step;
+        float u = 1.0f - t;
+        float w1 = u*u*u;
+        float w2 = 3*u*u*t;
+        float w3 = 3*u*t*t;
+        float w4 = t * t *t;
+        float x = w1 * p1.x + w2 * p2.x + w3 * p3.x + w4 * p4.x;
+        float y = w1 * p1.y + w2 * p2.y + w3 * p3.y + w4 * p4.y;
+        nk_rawfb_stroke_line(rawfb, last.x, last.y,
+                (short)x, (short)y, line_thickness,col);
+        last.x = (short)x; last.y = (short)y;
+    }
+}
+
+static void
+nk_rawfb_clear(const struct rawfb_context *rawfb, const struct nk_color col)
+{
+    nk_rawfb_fill_rect(rawfb, 0, 0, rawfb->fb.w, rawfb->fb.h, 0, col);
+}
+
+NK_API struct rawfb_context*
+nk_rawfb_init(void *fb, struct nk_context *context, const unsigned int w, const unsigned int h,
+    const unsigned int pitch)
+{
+    struct rawfb_context *rawfb;
+    rawfb = malloc(sizeof(struct rawfb_context));
+    if (!rawfb)
+        return NULL;
+
+    nk_memset(rawfb, 0, sizeof(struct rawfb_context));
+    rawfb->font_tex.format = NK_FONT_ATLAS_ALPHA8;
+    rawfb->font_tex.w = rawfb->font_tex.h = 0;
+
+    rawfb->fb.pixels = fb;
+    rawfb->fb.w= w;
+    rawfb->fb.h = h;
+
+#if defined(RAWFB_XRGB_8888) || defined(RAWFB_RGBX_8888)
+    rawfb->fb.format = NK_FONT_ATLAS_RGBA32;
+    rawfb->fb.pitch = pitch;
+#else
+    #error Fixme
+#endif
+
+    rawfb->ctx = context;
+	nk_rawfb_scissor(rawfb, 0, 0, rawfb->fb.w, rawfb->fb.h);
+	
+    return rawfb;
+}
+
+NK_API void
+nk_rawfb_font_stash_begin(struct rawfb_context *rawfb, struct nk_font_atlas **atlas)
+{
+	nk_font_atlas_init_default(&rawfb->atlas);
+	nk_font_atlas_begin(&rawfb->atlas);
+	*atlas = &rawfb->atlas;
+}
+
+NK_API void
+nk_rawfb_font_stash_end(struct rawfb_context *rawfb)
+{
+	const void *tex;
+	tex = nk_font_atlas_bake(&rawfb->atlas, &rawfb->font_tex.w, &rawfb->font_tex.h, rawfb->font_tex.format);
+    if (!tex) return;
+
+    switch(rawfb->font_tex.format) {
+    case NK_FONT_ATLAS_ALPHA8:
+        rawfb->font_tex.pitch = rawfb->font_tex.w * 1;
+        break;
+    case NK_FONT_ATLAS_RGBA32:
+        rawfb->font_tex.pitch = rawfb->font_tex.w * 4;
+        break;
+    };
+    /* Store the font texture in tex scratch memory */
+	rawfb->font_tex.pixels = malloc(rawfb->font_tex.pitch * rawfb->font_tex.h);
+    memcpy(rawfb->font_tex.pixels, tex, rawfb->font_tex.pitch * rawfb->font_tex.h);
+    nk_font_atlas_end(&rawfb->atlas, nk_handle_ptr(NULL), NULL);
+    if (rawfb->atlas.default_font)
+        nk_style_set_font(rawfb->ctx, &rawfb->atlas.default_font->handle);
+    nk_style_load_all_cursors(rawfb->ctx, rawfb->atlas.cursors);
+}
+
+static void
+nk_rawfb_stretch_image(const struct rawfb_image *dst,
+    const struct rawfb_image *src, const struct nk_rect *dst_rect,
+    const struct nk_rect *src_rect, const struct nk_rect *dst_scissors)
+{
+    short i, j;
+    struct nk_color col;
+    float xinc = src_rect->w / dst_rect->w;
+    float yinc = src_rect->h / dst_rect->h;
+    float xoff = src_rect->x, yoff = src_rect->y;
+
+    /* Simple nearest filtering rescaling */
+    /* TODO: use bilinear filter */
+    for (j = 0; j < (short)dst_rect->h; j++) {
+        for (i = 0; i < (short)dst_rect->w; i++) {
+            if (dst_scissors) {
+                if (i + (int)(dst_rect->x + 0.5f) < dst_scissors->x || i + (int)(dst_rect->x + 0.5f) >= dst_scissors->w)
+                    continue;
+                if (j + (int)(dst_rect->y + 0.5f) < dst_scissors->y || j + (int)(dst_rect->y + 0.5f) >= dst_scissors->h)
+                    continue;
+            }
+            col = nk_image_getpixel(src, (int)xoff, (int) yoff);
+            nk_image_blendpixel(dst, i + (int)(dst_rect->x + 0.5f), j + (int)(dst_rect->y + 0.5f), col);
+            xoff += xinc;
+        }
+        xoff = src_rect->x;
+        yoff += yinc;
+    }
+}
+
+static void
+nk_rawfb_font_query_font_glyph(nk_handle handle, const float height,
+    struct nk_user_font_glyph *glyph, const nk_rune codepoint,
+    const nk_rune next_codepoint)
+{
+    float scale;
+    const struct nk_font_glyph *g;
+    struct nk_font *font;
+    NK_ASSERT(glyph);
+    NK_UNUSED(next_codepoint);
+
+    font = (struct nk_font*)handle.ptr;
+    NK_ASSERT(font);
+    NK_ASSERT(font->glyphs);
+    if (!font || !glyph)
+        return;
+
+    scale = height/font->info.height;
+    g = nk_font_find_glyph(font, codepoint);
+    glyph->width = (g->x1 - g->x0) * scale;
+    glyph->height = (g->y1 - g->y0) * scale;
+    glyph->offset = nk_vec2(g->x0 * scale, g->y0 * scale);
+    glyph->xadvance = (g->xadvance * scale);
+    glyph->uv[0] = nk_vec2(g->u0, g->v0);
+    glyph->uv[1] = nk_vec2(g->u1, g->v1);
+}
+
+NK_API void
+nk_rawfb_draw_text(const struct rawfb_context *rawfb,
+    const struct nk_user_font *font, const struct nk_rect rect,
+    const char *text, const int len, const float font_height,
+    const struct nk_color fg)
+{
+    float x = 0;
+    int text_len = 0;
+    nk_rune unicode = 0;
+    nk_rune next = 0;
+    int glyph_len = 0;
+    int next_glyph_len = 0;
+    struct nk_user_font_glyph g;
+    if (!len || !text) return;
+
+    x = 0;
+    glyph_len = nk_utf_decode(text, &unicode, len);
+    if (!glyph_len) return;
+
+    /* draw every glyph image */
+    while (text_len < len && glyph_len) {
+        struct nk_rect src_rect;
+        struct nk_rect dst_rect;
+        float char_width = 0;
+        if (unicode == NK_UTF_INVALID) break;
+
+        /* query currently drawn glyph information */
+        next_glyph_len = nk_utf_decode(text + text_len + glyph_len, &next, (int)len - text_len);
+        nk_rawfb_font_query_font_glyph(font->userdata, font_height, &g, unicode,
+                    (next == NK_UTF_INVALID) ? '\0' : next);
+
+        /* calculate and draw glyph drawing rectangle and image */
+        char_width = g.xadvance;
+        src_rect.x = g.uv[0].x * rawfb->font_tex.w;
+        src_rect.y = g.uv[0].y * rawfb->font_tex.h;
+        src_rect.w = g.uv[1].x * rawfb->font_tex.w - g.uv[0].x * rawfb->font_tex.w;
+        src_rect.h = g.uv[1].y * rawfb->font_tex.h - g.uv[0].y * rawfb->font_tex.h;
+
+        dst_rect.x = x + g.offset.x + rect.x;
+        dst_rect.y = g.offset.y + rect.y;
+        dst_rect.w = ceilf(g.width);
+        dst_rect.h = ceilf(g.height);
+
+        /* TODO: account fg */
+        /* Use software rescaling to blit glyph from font_text to framebuffer */
+        nk_rawfb_stretch_image(&rawfb->fb, &rawfb->font_tex, &dst_rect, &src_rect, &rawfb->scissors);
+
+        /* offset next glyph */
+        text_len += glyph_len;
+        x += char_width;
+        glyph_len = next_glyph_len;
+        unicode = next;
+    }
+}
+
+NK_API void
+nk_rawfb_drawimage(const struct rawfb_context *rawfb,
+    const int x, const int y, const int w, const int h,
+    const struct nk_image *img, const struct nk_color *col)
+{
+    struct nk_rect src_rect;
+    struct nk_rect dst_rect;
+	
+	const struct rawfb_image *rfb_img = img->handle.ptr;
+	if (!rfb_img) {
+		rfb_img = &rawfb->font_tex;
+	}
+
+    src_rect.x = img->region[0];
+    src_rect.y = img->region[1];
+	if (nk_image_is_subimage(img)) {
+		src_rect.w = img->region[2];
+		src_rect.h = img->region[3];
+	} else {
+		src_rect.w = rfb_img->w;
+		src_rect.h = rfb_img->h;
+	}
+
+    dst_rect.x = x;
+    dst_rect.y = y;
+    dst_rect.w = w;
+    dst_rect.h = h;
+    nk_rawfb_stretch_image(&rawfb->fb, rfb_img, &dst_rect, &src_rect, &rawfb->scissors);
+}
+
+NK_API void
+nk_rawfb_shutdown(struct rawfb_context *rawfb)
+{
+    nk_memset(rawfb, 0, sizeof(struct rawfb_context));
+    free(rawfb);
+}
+
+NK_API void
+nk_rawfb_resize_fb(struct rawfb_context *rawfb,
+                   void *fb,
+                   const unsigned int w,
+                   const unsigned int h,
+                   const unsigned int pitch)
+{
+    rawfb->fb.w = w;
+    rawfb->fb.h = h;
+    rawfb->fb.pixels = fb;
+    rawfb->fb.pitch = pitch;
+}
+
+NK_API void
+nk_rawfb_render(const struct rawfb_context *rawfb,
+                const struct nk_color clear,
+                const unsigned char enable_clear)
+{
+    const struct nk_command *cmd;
+    if (enable_clear)
+        nk_rawfb_clear(rawfb, clear);
+
+    nk_foreach(cmd, rawfb->ctx) {
+        switch (cmd->type) {
+        case NK_COMMAND_NOP: break;
+        case NK_COMMAND_SCISSOR: {
+            const struct nk_command_scissor *s =(const struct nk_command_scissor*)cmd;
+            nk_rawfb_scissor((struct rawfb_context *)rawfb, s->x, s->y, s->w, s->h);
+        } break;
+        case NK_COMMAND_LINE: {
+            const struct nk_command_line *l = (const struct nk_command_line *)cmd;
+            nk_rawfb_stroke_line(rawfb, l->begin.x, l->begin.y, l->end.x,
+                l->end.y, l->line_thickness, l->color);
+        } break;
+        case NK_COMMAND_RECT: {
+            const struct nk_command_rect *r = (const struct nk_command_rect *)cmd;
+            nk_rawfb_stroke_rect(rawfb, r->x, r->y, r->w, r->h,
+                (unsigned short)r->rounding, r->line_thickness, r->color);
+        } break;
+        case NK_COMMAND_RECT_FILLED: {
+            const struct nk_command_rect_filled *r = (const struct nk_command_rect_filled *)cmd;
+            nk_rawfb_fill_rect(rawfb, r->x, r->y, r->w, r->h,
+                (unsigned short)r->rounding, r->color);
+        } break;
+        case NK_COMMAND_CIRCLE: {
+            const struct nk_command_circle *c = (const struct nk_command_circle *)cmd;
+            nk_rawfb_stroke_circle(rawfb, c->x, c->y, c->w, c->h, c->line_thickness, c->color);
+        } break;
+        case NK_COMMAND_CIRCLE_FILLED: {
+            const struct nk_command_circle_filled *c = (const struct nk_command_circle_filled *)cmd;
+            nk_rawfb_fill_circle(rawfb, c->x, c->y, c->w, c->h, c->color);
+        } break;
+        case NK_COMMAND_TRIANGLE: {
+            const struct nk_command_triangle*t = (const struct nk_command_triangle*)cmd;
+            nk_rawfb_stroke_triangle(rawfb, t->a.x, t->a.y, t->b.x, t->b.y,
+                t->c.x, t->c.y, t->line_thickness, t->color);
+        } break;
+        case NK_COMMAND_TRIANGLE_FILLED: {
+            const struct nk_command_triangle_filled *t = (const struct nk_command_triangle_filled *)cmd;
+            nk_rawfb_fill_triangle(rawfb, t->a.x, t->a.y, t->b.x, t->b.y,
+                t->c.x, t->c.y, t->color);
+        } break;
+        case NK_COMMAND_POLYGON: {
+            const struct nk_command_polygon *p =(const struct nk_command_polygon*)cmd;
+            nk_rawfb_stroke_polygon(rawfb, p->points, p->point_count, p->line_thickness,p->color);
+        } break;
+        case NK_COMMAND_POLYGON_FILLED: {
+            const struct nk_command_polygon_filled *p = (const struct nk_command_polygon_filled *)cmd;
+            nk_rawfb_fill_polygon(rawfb, p->points, p->point_count, p->color);
+        } break;
+        case NK_COMMAND_POLYLINE: {
+            const struct nk_command_polyline *p = (const struct nk_command_polyline *)cmd;
+            nk_rawfb_stroke_polyline(rawfb, p->points, p->point_count, p->line_thickness, p->color);
+        } break;
+        case NK_COMMAND_TEXT: {
+            const struct nk_command_text *t = (const struct nk_command_text*)cmd;
+            nk_rawfb_draw_text(rawfb, t->font, nk_rect(t->x, t->y, t->w, t->h),
+                t->string, t->length, t->height, t->foreground);
+        } break;
+        case NK_COMMAND_CURVE: {
+            const struct nk_command_curve *q = (const struct nk_command_curve *)cmd;
+            nk_rawfb_stroke_curve(rawfb, q->begin, q->ctrl[0], q->ctrl[1],
+                q->end, 22, q->line_thickness, q->color);
+        } break;
+        case NK_COMMAND_RECT_MULTI_COLOR:
+        case NK_COMMAND_IMAGE: {
+            const struct nk_command_image *q = (const struct nk_command_image *)cmd;
+            nk_rawfb_drawimage(rawfb, q->x, q->y, q->w, q->h, &q->img, &q->col);
+        } break;
+        case NK_COMMAND_ARC: {
+            assert(0 && "NK_COMMAND_ARC not implemented\n");
+        } break;
+        case NK_COMMAND_ARC_FILLED: {
+            assert(0 && "NK_COMMAND_ARC_FILLED not implemented\n");
+        } break;
+        default: break;
+        }
+    } nk_clear(rawfb->ctx);
+}
+#endif
+
--- a/nuklear_ui/nuklear_sdl_gles2.h	Sat Jan 05 00:58:08 2019 -0800
+++ b/nuklear_ui/nuklear_sdl_gles2.h	Sat Jan 15 13:15:21 2022 -0800
@@ -47,6 +47,7 @@
 
 #include <string.h>
 
+#ifndef DISABLE_OPENGL
 struct nk_sdl_device {
     struct nk_buffer cmds;
     struct nk_draw_null_texture null;
@@ -69,10 +70,13 @@
     GLfloat uv[2];
     nk_byte col[4];
 };
+#endif
 
 static struct nk_sdl {
     SDL_Window *win;
+#ifndef DISABLE_OPENGL
     struct nk_sdl_device ogl;
+#endif
     struct nk_context ctx;
     struct nk_font_atlas atlas;
 } sdl;
@@ -85,7 +89,7 @@
 #define DECLARE_PRECISION
 #endif
 
-
+#ifndef DISABLE_OPENGL
 NK_API void
 nk_sdl_device_create(void)
 {
@@ -164,6 +168,10 @@
     glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
     glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, (GLsizei)width, (GLsizei)height, 0,
                 GL_RGBA, GL_UNSIGNED_BYTE, image);
+	GLenum err = glGetError();
+	if (err != GL_NO_ERROR) {
+		printf("glTexImage2D failed with error %d\n", err);
+	}
 }
 
 NK_API void
@@ -296,6 +304,7 @@
     glDisable(GL_BLEND);
     glDisable(GL_SCISSOR_TEST);
 }
+#endif
 
 static void
 nk_sdl_clipbard_paste(nk_handle usr, struct nk_text_edit *edit)
@@ -327,10 +336,10 @@
     sdl.ctx.clip.copy = nk_sdl_clipbard_copy;
     sdl.ctx.clip.paste = nk_sdl_clipbard_paste;
     sdl.ctx.clip.userdata = nk_handle_ptr(0);
-    nk_sdl_device_create();
     return &sdl.ctx;
 }
 
+#ifndef DISABLE_OPENGL
 NK_API void
 nk_sdl_font_stash_begin(struct nk_font_atlas **atlas)
 {
@@ -350,6 +359,7 @@
         nk_style_set_font(&sdl.ctx, &sdl.atlas.default_font->handle);
 
 }
+#endif
 
 NK_API int
 nk_sdl_handle_event(SDL_Event *evt)
@@ -408,6 +418,30 @@
             else nk_input_key(ctx, NK_KEY_RIGHT, down);
         } else return 0;
         return 1;
+	} else if (evt->type == SDL_CONTROLLERBUTTONDOWN || evt->type == SDL_CONTROLLERBUTTONUP) {
+		int down = evt->type == SDL_CONTROLLERBUTTONDOWN;
+		if (evt->cbutton.button == SDL_CONTROLLER_BUTTON_DPAD_UP) {
+			nk_input_key(ctx, NK_KEY_UP, down);
+		} else if (evt->cbutton.button == SDL_CONTROLLER_BUTTON_DPAD_DOWN) {
+			nk_input_key(ctx, NK_KEY_DOWN, down);
+		} else if (evt->cbutton.button == SDL_CONTROLLER_BUTTON_A || evt->cbutton.button == SDL_CONTROLLER_BUTTON_START) {
+			nk_input_key(ctx, NK_KEY_ENTER, down);
+		}
+	} else if (evt->type == SDL_CONTROLLERAXISMOTION) {
+		if (evt->caxis.axis == SDL_CONTROLLER_AXIS_LEFTY || evt->caxis.axis ==  SDL_CONTROLLER_AXIS_RIGHTY) {
+			int down = abs(evt->caxis.value) > 2000;
+			if (evt->caxis.value >= 0) {
+				if (ctx->input.keyboard.keys[NK_KEY_UP].down) {
+					nk_input_key(ctx, NK_KEY_UP, 0);
+				}
+				nk_input_key(ctx, NK_KEY_DOWN, down);
+			} else {
+				if (ctx->input.keyboard.keys[NK_KEY_DOWN].down) {
+					nk_input_key(ctx, NK_KEY_DOWN, 0);
+				}
+				nk_input_key(ctx, NK_KEY_UP, down);
+			}
+		}
     } else if (evt->type == SDL_MOUSEBUTTONDOWN || evt->type == SDL_MOUSEBUTTONUP) {
         /* mouse button */
         int down = evt->type == SDL_MOUSEBUTTONDOWN;
@@ -447,7 +481,9 @@
 {
     nk_font_atlas_clear(&sdl.atlas);
     nk_free(&sdl.ctx);
+#ifndef DISABLE_OPENGL
     nk_sdl_device_destroy();
+#endif
     memset(&sdl, 0, sizeof(sdl));
 }
 
--- a/paths.c	Sat Jan 05 00:58:08 2019 -0800
+++ b/paths.c	Sat Jan 15 13:15:21 2022 -0800
@@ -59,7 +59,7 @@
 
 void get_initial_browse_path(char **dst)
 {
-	*dst = NULL;
+	char *base = NULL;
 	char *remember_path = tern_find_path(config, "ui\0remember_path\0", TVAL_PTR).ptrval;
 	if (!remember_path || !strcmp("on", remember_path)) {
 		char *pathfname = alloc_concat(get_userdata_dir(), PATH_SEP "blastem" PATH_SEP "sticky_path");
@@ -67,13 +67,13 @@
 		if (f) {
 			long pathsize = file_size(f);
 			if (pathsize > 0) {
-				*dst = malloc(pathsize + 1);
-				if (fread(*dst, 1, pathsize, f) != pathsize) {
+				base = malloc(pathsize + 1);
+				if (fread(base, 1, pathsize, f) != pathsize) {
 					warning("Error restoring saved file browser path");
-					free(*dst);
-					*dst = NULL;
+					free(base);
+					base = NULL;
 				} else {
-					(*dst)[pathsize] = 0;
+					base[pathsize] = 0;
 				}
 			}
 			fclose(f);
@@ -84,19 +84,20 @@
 			current_path = dst;
 		}
 	}
-	if (!*dst) {
-		*dst = tern_find_path(config, "ui\0initial_path\0", TVAL_PTR).ptrval;
+	if (!base) {
+		base = tern_find_path(config, "ui\0initial_path\0", TVAL_PTR).ptrval;
 	}
-	if (!*dst){
+	if (!base){
 #ifdef __ANDROID__
-		*dst = get_external_storage_path();
+		base = get_external_storage_path();
 #else
-		*dst = "$HOME";
+		base = "$HOME";
 #endif
 	}
 	tern_node *vars = tern_insert_ptr(NULL, "HOME", get_home_dir());
 	vars = tern_insert_ptr(vars, "EXEDIR", get_exe_dir());
-	*dst = replace_vars(*dst, vars, 1);
+	*dst = replace_vars(base, vars, 1);
+	free(base);
 	tern_free(vars);
 }
 
--- a/png.c	Sat Jan 05 00:58:08 2019 -0800
+++ b/png.c	Sat Jan 15 13:15:21 2022 -0800
@@ -204,7 +204,7 @@
 
 static uint32_t pixel_gray(uint8_t **cur, uint8_t **last, uint8_t bpp, uint32_t x, filter_fun filter)
 {
-	uint8_t value = filter(*cur, *last, bpp, x);
+	uint8_t value = **cur = filter(*cur, *last, bpp, x);
 	(*cur)++;
 	if (*last) {
 		(*last)++;
@@ -214,17 +214,17 @@
 
 static uint32_t pixel_true(uint8_t **cur, uint8_t **last, uint8_t bpp, uint32_t x, filter_fun filter)
 {
-	uint8_t red = filter(*cur, *last, bpp, x);
+	uint8_t red = **cur = filter(*cur, *last, bpp, x);
 	(*cur)++;
 	if (*last) {
 		(*last)++;
 	}
-	uint8_t green = filter(*cur, *last, bpp, x);
+	uint8_t green = **cur = filter(*cur, *last, bpp, x);
 	(*cur)++;
 	if (*last) {
 		(*last)++;
 	}
-	uint8_t blue = filter(*cur, *last, bpp, x);
+	uint8_t blue = **cur = filter(*cur, *last, bpp, x);
 	(*cur)++;
 	if (*last) {
 		(*last)++;
@@ -234,12 +234,12 @@
 
 static uint32_t pixel_gray_alpha(uint8_t **cur, uint8_t **last, uint8_t bpp, uint32_t x, filter_fun filter)
 {
-	uint8_t value = filter(*cur, *last, bpp, x);
+	uint8_t value = **cur = filter(*cur, *last, bpp, x);
 	(*cur)++;
 	if (*last) {
 		(*last)++;
 	}
-	uint8_t alpha = filter(*cur, *last, bpp, x);
+	uint8_t alpha = **cur = filter(*cur, *last, bpp, x);
 	(*cur)++;
 	if (*last) {
 		(*last)++;
@@ -249,22 +249,22 @@
 
 static uint32_t pixel_true_alpha(uint8_t **cur, uint8_t **last, uint8_t bpp, uint32_t x, filter_fun filter)
 {
-	uint8_t red = filter(*cur, *last, bpp, x);
+	uint8_t red = **cur = filter(*cur, *last, bpp, x);
 	(*cur)++;
 	if (*last) {
 		(*last)++;
 	}
-	uint8_t green = filter(*cur, *last, bpp, x);
+	uint8_t green = **cur = filter(*cur, *last, bpp, x);
 	(*cur)++;
 	if (*last) {
 		(*last)++;
 	}
-	uint8_t blue = filter(*cur, *last, bpp, x);
+	uint8_t blue = **cur = filter(*cur, *last, bpp, x);
 	(*cur)++;
 	if (*last) {
 		(*last)++;
 	}
-	uint8_t alpha = filter(*cur, *last, bpp, x);
+	uint8_t alpha = **cur = filter(*cur, *last, bpp, x);
 	(*cur)++;
 	if (*last) {
 		(*last)++;
@@ -354,6 +354,7 @@
 					}
 					memcpy(idat_buf + idat_size, buffer + cur, chunk_size);
 					idat_size += chunk_size;
+					idat_needs_free = 1;
 				} else {
 					idat_buf = buffer + cur;
 					idat_size = chunk_size;
--- a/psg.c	Sat Jan 05 00:58:08 2019 -0800
+++ b/psg.c	Sat Jan 15 13:15:21 2022 -0800
@@ -4,8 +4,8 @@
  BlastEm is free software distributed under the terms of the GNU General Public License version 3 or greater. See COPYING for full license text.
 */
 #include "psg.h"
-#include "render.h"
 #include "blastem.h"
+#include "event_log.h"
 #include <string.h>
 #include <stdlib.h>
 #include <stdio.h>
@@ -33,6 +33,10 @@
 
 void psg_write(psg_context * context, uint8_t value)
 {
+	if (context->vgm) {
+		vgm_sn76489_write(context->vgm, context->cycles, value);
+	}
+	event_log(EVENT_PSG_REG, context->cycles, sizeof(value), &value);
 	if (value & 0x80) {
 		context->latch = value & 0x70;
 		uint8_t channel = value >> 5 & 0x3;
@@ -123,6 +127,30 @@
 	}
 }
 
+void psg_vgm_log(psg_context *context, uint32_t master_clock, vgm_writer *vgm)
+{
+	vgm_sn76489_init(vgm, 16 * master_clock / context->clock_inc, 9, 16, 0);
+	context->vgm = vgm;
+	for (int chan = 0; chan < 4; chan++)
+	{
+		uint8_t base = chan << 5 | 0x80;
+		vgm_sn76489_write(context->vgm, context->cycles, context->volume[chan] | base | 0x10);
+		if (chan == 3) {
+			if (context->noise_use_tone) {
+				vgm_sn76489_write(context->vgm, context->cycles, 3 | base);
+			} else {
+				//0x10 = 0
+				//0x20 = 1
+				//0x40 = 2
+				vgm_sn76489_write(context->vgm, context->cycles, context->counter_load[chan] >> 5 | base);
+			}
+		} else {
+			vgm_sn76489_write(context->vgm, context->cycles, (context->counter_load[chan] & 0xF) | base);
+			vgm_sn76489_write(context->vgm, context->cycles, context->counter_load[chan] >> 4 & 0x3F);
+		}
+	}
+}
+
 void psg_serialize(psg_context *context, serialize_buffer *buf)
 {
 	save_int16(buf, context->lsfr);
--- a/psg.h	Sat Jan 05 00:58:08 2019 -0800
+++ b/psg.h	Sat Jan 15 13:15:21 2022 -0800
@@ -8,10 +8,12 @@
 
 #include <stdint.h>
 #include "serialize.h"
-#include "render.h"
+#include "render_audio.h"
+#include "vgm.h"
 
 typedef struct {
 	audio_source *audio;
+	vgm_writer   *vgm;
 	uint32_t clock_inc;
 	uint32_t cycles;
 	uint16_t lsfr;
@@ -31,6 +33,7 @@
 void psg_adjust_master_clock(psg_context * context, uint32_t master_clock);
 void psg_write(psg_context * context, uint8_t value);
 void psg_run(psg_context * context, uint32_t cycles);
+void psg_vgm_log(psg_context *context, uint32_t master_clock, vgm_writer *vgm);
 void psg_serialize(psg_context *context, serialize_buffer *buf);
 void psg_deserialize(deserialize_buffer *buf, void *vcontext);
 
--- a/render.h	Sat Jan 05 00:58:08 2019 -0800
+++ b/render.h	Sat Jan 15 13:15:21 2022 -0800
@@ -6,7 +6,13 @@
 #ifndef RENDER_H_
 #define RENDER_H_
 
-//TODO: Throw an ifdef in here once there's more than one renderer
+#include <stdint.h>
+
+#ifndef IS_LIB
+#ifdef USE_FBDEV
+#include "special_keys_evdev.h"
+#define render_relative_mouse(V)
+#else
 #include <SDL.h>
 #define RENDERKEY_UP       SDLK_UP
 #define RENDERKEY_DOWN     SDLK_DOWN
@@ -61,6 +67,9 @@
 #define RENDER_DPAD_LEFT   SDL_HAT_LEFT
 #define RENDER_DPAD_RIGHT  SDL_HAT_RIGHT
 #define render_relative_mouse SDL_SetRelativeMouseMode
+typedef SDL_Thread* render_thread;
+#endif
+#endif
 
 #define MAX_JOYSTICKS 8
 #define MAX_MICE 8
@@ -68,9 +77,8 @@
 
 #define FRAMEBUFFER_ODD 0
 #define FRAMEBUFFER_EVEN 1
-#define FRAMEBUFFER_USER_START 2
-
-#include "vdp.h"
+#define FRAMEBUFFER_UI 2
+#define FRAMEBUFFER_USER_START 3
 
 typedef enum {
 	VID_NTSC,
@@ -80,13 +88,15 @@
 
 #define RENDER_DPAD_BIT 0x40000000
 #define RENDER_AXIS_BIT 0x20000000
+#define RENDER_AXIS_POS 0x10000000
 #define RENDER_INVALID_NAME -1
 #define RENDER_NOT_MAPPED -2
 #define RENDER_NOT_PLUGGED_IN -3
 
-typedef struct audio_source audio_source;
 typedef void (*drop_handler)(const char *filename);
 typedef void (*window_close_handler)(uint8_t which);
+typedef void (*ui_render_fun)(void);
+typedef int (*render_thread_fun)(void*);
 
 uint32_t render_map_color(uint8_t r, uint8_t g, uint8_t b);
 void render_save_screenshot(char *path);
@@ -100,9 +110,7 @@
 void render_set_video_standard(vid_std std);
 void render_toggle_fullscreen();
 void render_update_caption(char *title);
-void render_wait_quit(vdp_context * context);
-uint32_t render_audio_buffer();
-uint32_t render_sample_rate();
+void render_wait_quit(void);
 void process_events();
 int render_width();
 int render_height();
@@ -120,18 +128,22 @@
 uint32_t render_emulated_width();
 uint32_t render_emulated_height();
 uint32_t render_overscan_top();
+uint32_t render_overscan_bot();
 uint32_t render_overscan_left();
 uint32_t render_elapsed_ms(void);
 void render_sleep_ms(uint32_t delay);
 uint8_t render_has_gl(void);
-audio_source *render_audio_source(uint64_t master_clock, uint64_t sample_divider, uint8_t channels);
-void render_audio_adjust_clock(audio_source *src, uint64_t master_clock, uint64_t sample_divider);
-void render_put_mono_sample(audio_source *src, int16_t value);
-void render_put_stereo_sample(audio_source *src, int16_t left, int16_t right);
-void render_pause_source(audio_source *src);
-void render_resume_source(audio_source *src);
-void render_free_source(audio_source *src);
 void render_config_updated(void);
+void render_set_gl_context_handlers(ui_render_fun destroy, ui_render_fun create);
+void render_set_ui_render_fun(ui_render_fun);
+void render_set_ui_fb_resize_handler(ui_render_fun resize);
+void render_video_loop(void);
+uint8_t render_should_release_on_exit(void);
+void render_set_external_sync(uint8_t ext_sync_on);
+void render_reset_mappings(void);
+#ifndef IS_LIB
+uint8_t render_create_thread(render_thread *thread, const char *name, render_thread_fun fun, void *data);
+#endif
 
 #endif //RENDER_H_
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/render_audio.c	Sat Jan 15 13:15:21 2022 -0800
@@ -0,0 +1,385 @@
+#include <limits.h>
+#include <string.h>
+#include <stdlib.h>
+#include <math.h>
+#include "render_audio.h"
+#include "util.h"
+#include "config.h"
+#include "blastem.h"
+
+static uint8_t output_channels;
+static uint32_t buffer_samples, sample_rate;
+
+static audio_source *audio_sources[8];
+static audio_source *inactive_audio_sources[8];
+static uint8_t num_audio_sources;
+static uint8_t num_inactive_audio_sources;
+
+static float overall_gain_mult, *mix_buf;
+static int sample_size;
+
+typedef void (*conv_func)(float *samples, void *vstream, int sample_count);
+
+static void convert_null(float *samples, void *vstream, int sample_count)
+{
+	memset(vstream, 0, sample_count * sample_size);
+}
+
+static void convert_s16(float *samples, void *vstream, int sample_count)
+{
+	int16_t *stream = vstream;
+	for (int16_t *end = stream + sample_count; stream < end; stream++, samples++)
+	{
+		float sample = *samples;
+		int16_t out_sample;
+		if (sample >= 1.0f) {
+			out_sample = 0x7FFF;
+		} else if (sample <= -1.0f) {
+			out_sample = -0x8000;
+		} else {
+			out_sample = sample * 0x7FFF;
+		}
+		*stream = out_sample;
+	}
+}
+
+static void clamp_f32(float *samples, void *vstream, int sample_count)
+{
+	for (; sample_count > 0; sample_count--, samples++)
+	{
+		float sample = *samples;
+		if (sample > 1.0f) {
+			sample = 1.0f;
+		} else if (sample < -1.0f) {
+			sample = -1.0f;
+		}
+		*samples = sample;
+	}
+}
+
+static int32_t mix_f32(audio_source *audio, float *stream, int samples)
+{
+	float *end = stream + samples;
+	int16_t *src = audio->front;
+	uint32_t i = audio->read_start;
+	uint32_t i_end = audio->read_end;
+	float *cur = stream;
+	float gain_mult = audio->gain_mult * overall_gain_mult;
+	size_t first_add = output_channels > 1 ? 1 : 0, second_add = output_channels > 1 ? output_channels - 1 : 1;
+	if (audio->num_channels == 1) {
+		while (cur < end && i != i_end)
+		{
+			*cur += gain_mult * ((float)src[i]) / 0x7FFF;
+			cur += first_add;
+			*cur += gain_mult * ((float)src[i++]) / 0x7FFF;
+			cur += second_add;
+			i &= audio->mask;
+		}
+	} else {
+		while(cur < end && i != i_end)
+		{
+			*cur += gain_mult * ((float)src[i++]) / 0x7FFF;
+			cur += first_add;
+			*cur += gain_mult * ((float)src[i++]) / 0x7FFF;
+			cur += second_add;
+			i &= audio->mask;
+		}
+	}
+	if (!render_is_audio_sync()) {
+		audio->read_start = i;
+	}
+	if (cur != end) {
+		debug_message("Underflow of %d samples, read_start: %d, read_end: %d, mask: %X\n", (int)(end-cur)/2, audio->read_start, audio->read_end, audio->mask);
+		return (cur-end)/2;
+	} else {
+		return ((i_end - i) & audio->mask) / audio->num_channels;
+	}
+}
+
+static conv_func convert;
+
+
+int mix_and_convert(unsigned char *byte_stream, int len, int *min_remaining_out)
+{
+	int samples = len / sample_size;
+	float *mix_dest = mix_buf ? mix_buf : (float *)byte_stream;
+	memset(mix_dest, 0, samples * sizeof(float));
+	int min_buffered = INT_MAX;
+	int min_remaining_buffer = INT_MAX;
+	for (uint8_t i = 0; i < num_audio_sources; i++)
+	{
+		int buffered = mix_f32(audio_sources[i], mix_dest, samples);
+		int remaining = (audio_sources[i]->mask + 1) / audio_sources[i]->num_channels - buffered;
+		min_buffered = buffered < min_buffered ? buffered : min_buffered;
+		min_remaining_buffer = remaining < min_remaining_buffer ? remaining : min_remaining_buffer;
+		audio_sources[i]->front_populated = 0;
+		render_buffer_consumed(audio_sources[i]);
+	}
+	convert(mix_dest, byte_stream, samples);
+	if (min_remaining_out) {
+		*min_remaining_out = min_remaining_buffer;
+	}
+	return min_buffered;
+}
+
+uint8_t all_sources_ready(void)
+{
+	uint8_t num_populated = 0;
+	num_populated = 0;
+	for (uint8_t i = 0; i < num_audio_sources; i++)
+	{
+		if (audio_sources[i]->front_populated) {
+			num_populated++;
+		}
+	}
+	return num_populated == num_audio_sources;
+}
+
+#define BUFFER_INC_RES 0x40000000UL
+
+void render_audio_adjust_clock(audio_source *src, uint64_t master_clock, uint64_t sample_divider)
+{
+	src->buffer_inc = ((BUFFER_INC_RES * (uint64_t)sample_rate) / master_clock) * sample_divider;
+}
+
+void render_audio_adjust_speed(float adjust_ratio)
+{
+	for (uint8_t i = 0; i < num_audio_sources; i++)
+	{
+		audio_sources[i]->buffer_inc = ((double)audio_sources[i]->buffer_inc) + ((double)audio_sources[i]->buffer_inc) * adjust_ratio + 0.5;
+	}
+}
+
+audio_source *render_audio_source(uint64_t master_clock, uint64_t sample_divider, uint8_t channels)
+{
+	audio_source *ret = NULL;
+	uint32_t alloc_size = render_is_audio_sync() ? channels * buffer_samples : nearest_pow2(render_min_buffered() * 4 * channels);
+	render_lock_audio();
+		if (num_audio_sources < 8) {
+			ret = calloc(1, sizeof(audio_source));
+			ret->back = malloc(alloc_size * sizeof(int16_t));
+			ret->front = render_is_audio_sync() ? malloc(alloc_size * sizeof(int16_t)) : ret->back;
+			ret->front_populated = 0;
+			ret->opaque = render_new_audio_opaque();
+			ret->num_channels = channels;
+			audio_sources[num_audio_sources++] = ret;
+		}
+	render_unlock_audio();
+	if (!ret) {
+		fatal_error("Too many audio sources!");
+	} else {
+		render_audio_adjust_clock(ret, master_clock, sample_divider);
+		double lowpass_cutoff = get_lowpass_cutoff(config);
+		double rc = (1.0 / lowpass_cutoff) / (2.0 * M_PI);
+		ret->dt = 1.0 / ((double)master_clock / (double)(sample_divider));
+		double alpha = ret->dt / (ret->dt + rc);
+		ret->lowpass_alpha = (int32_t)(((double)0x10000) * alpha);
+		ret->buffer_pos = 0;
+		ret->buffer_fraction = 0;
+		ret->last_left = ret->last_right = 0;
+		ret->read_start = 0;
+		ret->read_end = render_is_audio_sync() ? buffer_samples * channels : 0;
+		ret->mask = render_is_audio_sync() ? 0xFFFFFFFF : alloc_size-1;
+		ret->gain_mult = 1.0f;
+	}
+	render_audio_created(ret);
+	
+	return ret;
+}
+
+
+static float db_to_mult(float gain)
+{
+	return powf(10.0f, gain/20.0f);
+}
+
+void render_audio_source_gaindb(audio_source *src, float gain)
+{
+	src->gain_mult = db_to_mult(gain);
+}
+
+void render_pause_source(audio_source *src)
+{
+	uint8_t found = 0, remaining_sources;
+	render_lock_audio();
+		for (uint8_t i = 0; i < num_audio_sources; i++)
+		{
+			if (audio_sources[i] == src) {
+				audio_sources[i] = audio_sources[--num_audio_sources];
+				found = 1;
+				remaining_sources = num_audio_sources;
+				break;
+			}
+		}
+		
+	render_unlock_audio();
+	if (found) {
+		render_source_paused(src, remaining_sources);
+	}
+	inactive_audio_sources[num_inactive_audio_sources++] = src;
+}
+
+void render_resume_source(audio_source *src)
+{
+	render_lock_audio();
+		if (num_audio_sources < 8) {
+			audio_sources[num_audio_sources++] = src;
+		}
+	render_unlock_audio();
+	for (uint8_t i = 0; i < num_inactive_audio_sources; i++)
+	{
+		if (inactive_audio_sources[i] == src) {
+			inactive_audio_sources[i] = inactive_audio_sources[--num_inactive_audio_sources];
+		}
+	}
+	render_source_resumed(src);
+}
+
+void render_free_source(audio_source *src)
+{
+	uint8_t found = 0;
+	for (uint8_t i = 0; i < num_inactive_audio_sources; i++)
+	{
+		if (inactive_audio_sources[i] == src) {
+			inactive_audio_sources[i] = inactive_audio_sources[--num_inactive_audio_sources];
+			found = 1;
+			break;
+		}
+	}
+	if (!found) {
+		render_pause_source(src);
+		num_inactive_audio_sources--;
+	}
+	
+	free(src->front);
+	if (render_is_audio_sync()) {
+		free(src->back);
+		render_free_audio_opaque(src->opaque);
+	}
+	free(src);
+}
+
+static int16_t lowpass_sample(audio_source *src, int16_t last, int16_t current)
+{
+	int32_t tmp = current * src->lowpass_alpha + last * (0x10000 - src->lowpass_alpha);
+	current = tmp >> 16;
+	return current;
+}
+
+static void interp_sample(audio_source *src, int16_t last, int16_t current)
+{
+	int64_t tmp = last * ((src->buffer_fraction << 16) / src->buffer_inc);
+	tmp += current * (0x10000 - ((src->buffer_fraction << 16) / src->buffer_inc));
+	src->back[src->buffer_pos++] = tmp >> 16;
+}
+
+static uint32_t sync_samples;
+void render_put_mono_sample(audio_source *src, int16_t value)
+{
+	value = lowpass_sample(src, src->last_left, value);
+	src->buffer_fraction += src->buffer_inc;
+	uint32_t base = render_is_audio_sync() ? 0 : src->read_end;
+	while (src->buffer_fraction > BUFFER_INC_RES)
+	{
+		src->buffer_fraction -= BUFFER_INC_RES;
+		interp_sample(src, src->last_left, value);
+		
+		if (((src->buffer_pos - base) & src->mask) >= sync_samples) {
+			render_do_audio_ready(src);
+		}
+		src->buffer_pos &= src->mask;
+	}
+	src->last_left = value;
+}
+
+void render_put_stereo_sample(audio_source *src, int16_t left, int16_t right)
+{
+	left = lowpass_sample(src, src->last_left, left);
+	right = lowpass_sample(src, src->last_right, right);
+	src->buffer_fraction += src->buffer_inc;
+	uint32_t base = render_is_audio_sync() ? 0 : src->read_end;
+	while (src->buffer_fraction > BUFFER_INC_RES)
+	{
+		src->buffer_fraction -= BUFFER_INC_RES;
+		
+		interp_sample(src, src->last_left, left);
+		interp_sample(src, src->last_right, right);
+		
+		if (((src->buffer_pos - base) & src->mask)/2 >= sync_samples) {
+			render_do_audio_ready(src);
+		}
+		src->buffer_pos &= src->mask;
+	}
+	src->last_left = left;
+	src->last_right = right;
+}
+
+static void update_source(audio_source *src, double rc, uint8_t sync_changed)
+{
+	double alpha = src->dt / (src->dt + rc);
+	int32_t lowpass_alpha = (int32_t)(((double)0x10000) * alpha);
+	src->lowpass_alpha = lowpass_alpha;
+	if (sync_changed) {
+		uint32_t alloc_size = render_is_audio_sync() ? src->num_channels * buffer_samples : nearest_pow2(render_min_buffered() * 4 * src->num_channels);
+		src->back = realloc(src->back, alloc_size * sizeof(int16_t));
+		if (render_is_audio_sync()) {
+			src->front = malloc(alloc_size * sizeof(int16_t));
+		} else {
+			free(src->front);
+			src->front = src->back;
+		}
+		src->mask = render_is_audio_sync() ? 0xFFFFFFFF : alloc_size-1;
+		src->read_start = 0;
+		src->read_end = render_is_audio_sync() ? buffer_samples * src->num_channels : 0;
+		src->buffer_pos = 0;
+	}
+}
+
+uint8_t old_audio_sync;
+void render_audio_initialized(render_audio_format format, uint32_t rate, uint8_t channels, uint32_t buffer_size, int sample_size_in)
+{
+	sample_rate = rate;
+	output_channels = channels;
+	buffer_samples = buffer_size;
+	sample_size = sample_size_in;
+	if (mix_buf) {
+		free(mix_buf);
+		mix_buf = NULL;
+	}
+	switch(format)
+	{
+	case RENDER_AUDIO_S16:
+		convert = convert_s16;
+		mix_buf = calloc(output_channels * buffer_samples, sizeof(float));
+		break;
+	case RENDER_AUDIO_FLOAT:
+		convert = clamp_f32;
+		break;
+	case RENDER_AUDIO_UNKNOWN:
+		convert = convert_null;
+		mix_buf = calloc(output_channels * buffer_samples, sizeof(float));
+		break;
+	}
+	uint32_t syncs = render_audio_syncs_per_sec();
+	if (syncs) {
+		sync_samples = rate / syncs;
+	} else {
+		sync_samples = buffer_samples;
+	}
+	char * gain_str = tern_find_path(config, "audio\0gain\0", TVAL_PTR).ptrval;
+	overall_gain_mult = db_to_mult(gain_str ? atof(gain_str) : 0.0f);
+	uint8_t sync_changed = old_audio_sync != render_is_audio_sync();
+	old_audio_sync = render_is_audio_sync();
+	double lowpass_cutoff = get_lowpass_cutoff(config);
+	double rc = (1.0 / lowpass_cutoff) / (2.0 * M_PI);
+	render_lock_audio();
+		for (uint8_t i = 0; i < num_audio_sources; i++)
+		{
+			update_source(audio_sources[i], rc, sync_changed);
+		}
+	render_unlock_audio();
+	for (uint8_t i = 0; i < num_inactive_audio_sources; i++)
+	{
+		update_source(inactive_audio_sources[i], rc, sync_changed);
+	}
+}
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/render_audio.h	Sat Jan 15 13:15:21 2022 -0800
@@ -0,0 +1,57 @@
+#ifndef RENDER_AUDIO_H_
+#define RENDER_AUDIO_H_
+
+#include <stdint.h>
+typedef enum {
+	RENDER_AUDIO_S16,
+	RENDER_AUDIO_FLOAT,
+	RENDER_AUDIO_UNKNOWN
+} render_audio_format;
+
+typedef struct {
+	void     *opaque;
+	int16_t  *front;
+	int16_t  *back;
+	double   dt;
+	uint64_t buffer_fraction;
+	uint64_t buffer_inc;
+	float    gain_mult;
+	uint32_t buffer_pos;
+	uint32_t read_start;
+	uint32_t read_end;
+	uint32_t lowpass_alpha;
+	uint32_t mask;
+	int16_t  last_left;
+	int16_t  last_right;
+	uint8_t  num_channels;
+	uint8_t  front_populated;
+} audio_source;
+
+//public interface
+audio_source *render_audio_source(uint64_t master_clock, uint64_t sample_divider, uint8_t channels);
+void render_audio_source_gaindb(audio_source *src, float gain);
+void render_audio_adjust_clock(audio_source *src, uint64_t master_clock, uint64_t sample_divider);
+void render_put_mono_sample(audio_source *src, int16_t value);
+void render_put_stereo_sample(audio_source *src, int16_t left, int16_t right);
+void render_pause_source(audio_source *src);
+void render_resume_source(audio_source *src);
+void render_free_source(audio_source *src);
+//interface for render backends
+void render_audio_initialized(render_audio_format format, uint32_t rate, uint8_t channels, uint32_t buffer_size, int sample_size);
+int mix_and_convert(unsigned char *byte_stream, int len, int *min_remaining_out);
+uint8_t all_sources_ready(void);
+void render_audio_adjust_speed(float adjust_ratio);
+//to be implemented by render backend
+uint8_t render_is_audio_sync(void);
+void render_buffer_consumed(audio_source *src);
+void *render_new_audio_opaque(void);
+void render_free_audio_opaque(void *opaque);
+void render_lock_audio(void);
+void render_unlock_audio(void);
+uint32_t render_min_buffered(void);
+uint32_t render_audio_syncs_per_sec(void);
+void render_audio_created(audio_source *src);
+void render_do_audio_ready(audio_source *src);
+void render_source_paused(audio_source *src, uint8_t remaining_sources);
+void render_source_resumed(audio_source *src);
+#endif //RENDER_AUDIO_H_
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/render_fbdev.c	Sat Jan 15 13:15:21 2022 -0800
@@ -0,0 +1,1620 @@
+/*
+ Copyright 2013 Michael Pavone
+ This file is part of BlastEm.
+ BlastEm is free software distributed under the terms of the GNU General Public License version 3 or greater. See COPYING for full license text.
+*/
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+#include <linux/fb.h>
+#include <linux/input.h>
+#include <linux/kd.h>
+#include <alsa/asoundlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <dirent.h>
+#include "render.h"
+#include "blastem.h"
+#include "genesis.h"
+#include "bindings.h"
+#include "util.h"
+#include "paths.h"
+#include "ppm.h"
+#include "png.h"
+#include "config.h"
+#include "controller_info.h"
+
+#ifndef DISABLE_OPENGL
+#include <EGL/egl.h>
+#include <GLES2/gl2.h>
+#ifdef USE_MALI
+//Mali GLES headers don't seem to define GLchar for some reason
+typedef char GLchar;
+#endif
+#endif
+
+#define MAX_EVENT_POLL_PER_FRAME 2
+
+static EGLContext main_context;
+
+static int main_width, main_height, windowed_width, windowed_height, is_fullscreen;
+
+static uint8_t render_gl = 1;
+static uint8_t scanlines = 0;
+
+static uint32_t last_frame = 0;
+static snd_pcm_uframes_t buffer_samples;
+static size_t buffer_bytes;
+static unsigned int output_channels, sample_rate;
+
+
+static uint8_t quitting = 0;
+
+
+static void render_close_audio()
+{
+
+}
+
+static snd_pcm_t *audio_handle;
+static void *output_buffer;
+void render_do_audio_ready(audio_source *src)
+{
+	if (src->front_populated) {
+		fatal_error("Audio source filled up a buffer a second time before other sources finished their first\n");
+	}
+	int16_t *tmp = src->front;
+	src->front = src->back;
+	src->back = tmp;
+	src->front_populated = 1;
+	src->buffer_pos = 0;
+	
+	if (!all_sources_ready()) {
+		return;
+	}
+	mix_and_convert(output_buffer, buffer_bytes, NULL);
+
+	int frames = snd_pcm_writei(audio_handle, output_buffer, buffer_samples);
+	if (frames < 0) {
+		frames = snd_pcm_recover(audio_handle, frames, 0);
+	}
+	if (frames < 0) {
+		fprintf(stderr, "Failed to write samples: %s\n", snd_strerror(frames));
+	}
+}
+
+int render_width()
+{
+	return main_width;
+}
+
+int render_height()
+{
+	return main_height;
+}
+
+int render_fullscreen()
+{
+	return 1;
+}
+
+uint32_t red_shift, blue_shift, green_shift;
+uint32_t render_map_color(uint8_t r, uint8_t g, uint8_t b)
+{
+	return r << red_shift | g << green_shift | b << blue_shift;
+}
+
+#ifndef DISABLE_OPENGL
+static GLuint textures[3], buffers[2], vshader, fshader, program, un_textures[2], un_width, un_height, at_pos;
+
+static GLfloat vertex_data_default[] = {
+	-1.0f, -1.0f,
+	 1.0f, -1.0f,
+	-1.0f,  1.0f,
+	 1.0f,  1.0f
+};
+
+static GLfloat vertex_data[8];
+
+static const GLushort element_data[] = {0, 1, 2, 3};
+
+static const GLchar shader_prefix[] =
+#ifdef USE_GLES
+	"#version 100\n";
+#else
+	"#version 110\n"
+	"#define lowp\n"
+	"#define mediump\n"
+	"#define highp\n";
+#endif
+
+static GLuint load_shader(char * fname, GLenum shader_type)
+{
+	char const * parts[] = {get_home_dir(), "/.config/blastem/shaders/", fname};
+	char * shader_path = alloc_concat_m(3, parts);
+	FILE * f = fopen(shader_path, "rb");
+	free(shader_path);
+	GLchar * text;
+	long fsize;
+	if (f) {
+		fsize = file_size(f);
+		text = malloc(fsize);
+		if (fread(text, 1, fsize, f) != fsize) {
+			warning("Error reading from shader file %s\n", fname);
+			free(text);
+			return 0;
+		}
+	} else {
+		shader_path = path_append("shaders", fname);
+		uint32_t fsize32;
+		text = read_bundled_file(shader_path, &fsize32);
+		free(shader_path);
+		if (!text) {
+			warning("Failed to open shader file %s for reading\n", fname);
+			return 0;
+		}
+		fsize = fsize32;
+	}
+	
+	if (strncmp(text, "#version", strlen("#version"))) {
+		GLchar *tmp = text;
+		text = alloc_concat(shader_prefix, tmp);
+		free(tmp);
+		fsize += strlen(shader_prefix);
+	}
+	GLuint ret = glCreateShader(shader_type);
+	glShaderSource(ret, 1, (const GLchar **)&text, (const GLint *)&fsize);
+	free(text);
+	glCompileShader(ret);
+	GLint compile_status, loglen;
+	glGetShaderiv(ret, GL_COMPILE_STATUS, &compile_status);
+	if (!compile_status) {
+		glGetShaderiv(ret, GL_INFO_LOG_LENGTH, &loglen);
+		text = malloc(loglen);
+		glGetShaderInfoLog(ret, loglen, NULL, text);
+		warning("Shader %s failed to compile:\n%s\n", fname, text);
+		free(text);
+		glDeleteShader(ret);
+		return 0;
+	}
+	return ret;
+}
+#endif
+
+#define MAX_FB_LINES 590
+static uint32_t texture_buf[MAX_FB_LINES * LINEBUF_SIZE * 2];
+#ifdef DISABLE_OPENGL
+#define RENDER_FORMAT SDL_PIXELFORMAT_ARGB8888
+#else
+#ifdef USE_GLES
+#define INTERNAL_FORMAT GL_RGBA
+#define SRC_FORMAT GL_RGBA
+#define RENDER_FORMAT SDL_PIXELFORMAT_ABGR8888
+#else
+#define INTERNAL_FORMAT GL_RGBA8
+#define SRC_FORMAT GL_BGRA
+#define RENDER_FORMAT SDL_PIXELFORMAT_ARGB8888
+#endif
+static void gl_setup()
+{
+	tern_val def = {.ptrval = "linear"};
+	char *scaling = tern_find_path_default(config, "video\0scaling\0", def, TVAL_PTR).ptrval;
+	GLint filter = strcmp(scaling, "linear") ? GL_NEAREST : GL_LINEAR;
+	glGenTextures(3, textures);
+	for (int i = 0; i < 3; i++)
+	{
+		glBindTexture(GL_TEXTURE_2D, textures[i]);
+		glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, filter);
+		glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, filter);
+		glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
+		glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
+		if (i < 2) {
+			//TODO: Fixme for PAL + invalid display mode
+			glTexImage2D(GL_TEXTURE_2D, 0, INTERNAL_FORMAT, 512, 512, 0, SRC_FORMAT, GL_UNSIGNED_BYTE, texture_buf);
+		} else {
+			uint32_t blank = 255 << 24;
+			glTexImage2D(GL_TEXTURE_2D, 0, INTERNAL_FORMAT, 1, 1, 0, SRC_FORMAT, GL_UNSIGNED_BYTE, &blank);
+		}
+	}
+	glGenBuffers(2, buffers);
+	glBindBuffer(GL_ARRAY_BUFFER, buffers[0]);
+	glBufferData(GL_ARRAY_BUFFER, sizeof(vertex_data), vertex_data, GL_STATIC_DRAW);
+	glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, buffers[1]);
+	glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(element_data), element_data, GL_STATIC_DRAW);
+	def.ptrval = "default.v.glsl";
+	vshader = load_shader(tern_find_path_default(config, "video\0vertex_shader\0", def, TVAL_PTR).ptrval, GL_VERTEX_SHADER);
+	def.ptrval = "default.f.glsl";
+	fshader = load_shader(tern_find_path_default(config, "video\0fragment_shader\0", def, TVAL_PTR).ptrval, GL_FRAGMENT_SHADER);
+	program = glCreateProgram();
+	glAttachShader(program, vshader);
+	glAttachShader(program, fshader);
+	glLinkProgram(program);
+	GLint link_status;
+	glGetProgramiv(program, GL_LINK_STATUS, &link_status);
+	if (!link_status) {
+		fputs("Failed to link shader program\n", stderr);
+		exit(1);
+	}
+	un_textures[0] = glGetUniformLocation(program, "textures[0]");
+	un_textures[1] = glGetUniformLocation(program, "textures[1]");
+	un_width = glGetUniformLocation(program, "width");
+	un_height = glGetUniformLocation(program, "height");
+	at_pos = glGetAttribLocation(program, "pos");
+}
+
+static void gl_teardown()
+{
+	glDeleteProgram(program);
+	glDeleteShader(vshader);
+	glDeleteShader(fshader);
+	glDeleteBuffers(2, buffers);
+	glDeleteTextures(3, textures);
+}
+#endif
+
+static uint8_t texture_init;
+static void render_alloc_surfaces()
+{
+	if (texture_init) {
+		return;
+	}
+	texture_init = 1;
+#ifndef DISABLE_OPENGL
+	if (render_gl) {
+		gl_setup();
+	}
+#endif
+}
+
+static void free_surfaces(void)
+{
+	texture_init = 0;
+}
+
+static char * caption = NULL;
+static char * fps_caption = NULL;
+
+static void render_quit()
+{
+	render_close_audio();
+	free_surfaces();
+#ifndef DISABLE_OPENGL
+	if (render_gl) {
+		gl_teardown();
+		//FIXME: replace with EGL equivalent
+		//SDL_GL_DeleteContext(main_context);
+	}
+#endif
+}
+
+static float config_aspect()
+{
+	static float aspect = 0.0f;
+	if (aspect == 0.0f) {
+		char *config_aspect = tern_find_path_default(config, "video\0aspect\0", (tern_val){.ptrval = "4:3"}, TVAL_PTR).ptrval;
+		if (strcmp("stretch", config_aspect)) {
+			aspect = 4.0f/3.0f;
+			char *end;
+			float aspect_numerator = strtof(config_aspect, &end);
+			if (aspect_numerator > 0.0f && *end == ':') {
+				float aspect_denominator = strtof(end+1, &end);
+				if (aspect_denominator > 0.0f && !*end) {
+					aspect = aspect_numerator / aspect_denominator;
+				}
+			}
+		} else {
+			aspect = -1.0f;
+		}
+	}
+	return aspect;
+}
+
+static void update_aspect()
+{
+	//reset default values
+#ifndef DISABLE_OPENGL
+	memcpy(vertex_data, vertex_data_default, sizeof(vertex_data));
+#endif
+	if (config_aspect() > 0.0f) {
+		float aspect = (float)main_width / main_height;
+		if (fabs(aspect - config_aspect()) < 0.01f) {
+			//close enough for government work
+			return;
+		}
+#ifndef DISABLE_OPENGL
+		if (render_gl) {
+			for (int i = 0; i < 4; i++)
+			{
+				if (aspect > config_aspect()) {
+					vertex_data[i*2] *= config_aspect()/aspect;
+				} else {
+					vertex_data[i*2+1] *= aspect/config_aspect();
+				}
+			}
+		} else {
+#endif
+		//TODO: Maybe do some stuff for non-integer scaling in raw fbdev copy
+#ifndef DISABLE_OPENGL
+		}
+#endif
+	}
+}
+
+static uint8_t scancode_map[128] = {
+	[KEY_A] = 0x1C,
+	[KEY_B] = 0x32,
+	[KEY_C] = 0x21,
+	[KEY_D] = 0x23,
+	[KEY_E] = 0x24,
+	[KEY_F] = 0x2B,
+	[KEY_G] = 0x34,
+	[KEY_H] = 0x33,
+	[KEY_I] = 0x43,
+	[KEY_J] = 0x3B,
+	[KEY_K] = 0x42,
+	[KEY_L] = 0x4B,
+	[KEY_M] = 0x3A,
+	[KEY_N] = 0x31,
+	[KEY_O] = 0x44,
+	[KEY_P] = 0x4D,
+	[KEY_Q] = 0x15,
+	[KEY_R] = 0x2D,
+	[KEY_S] = 0x1B,
+	[KEY_T] = 0x2C,
+	[KEY_U] = 0x3C,
+	[KEY_V] = 0x2A,
+	[KEY_W] = 0x1D,
+	[KEY_X] = 0x22,
+	[KEY_Y] = 0x35,
+	[KEY_Z] = 0x1A,
+	[KEY_1] = 0x16,
+	[KEY_2] = 0x1E,
+	[KEY_3] = 0x26,
+	[KEY_4] = 0x25,
+	[KEY_5] = 0x2E,
+	[KEY_6] = 0x36,
+	[KEY_7] = 0x3D,
+	[KEY_8] = 0x3E,
+	[KEY_9] = 0x46,
+	[KEY_0] = 0x45,
+	[KEY_ENTER] = 0x5A,
+	[KEY_ESC] = 0x76,
+	[KEY_SPACE] = 0x29,
+	[KEY_TAB] = 0x0D,
+	[KEY_BACKSPACE] = 0x66,
+	[KEY_MINUS] = 0x4E,
+	[KEY_EQUAL] = 0x55,
+	[KEY_LEFTBRACE] = 0x54,
+	[KEY_RIGHTBRACE] = 0x5B,
+	[KEY_BACKSLASH] = 0x5D,
+	[KEY_SEMICOLON] = 0x4C,
+	[KEY_APOSTROPHE] = 0x52,
+	[KEY_GRAVE] = 0x0E,
+	[KEY_COMMA] = 0x41,
+	[KEY_DOT] = 0x49,
+	[KEY_SLASH] = 0x4A,
+	[KEY_CAPSLOCK] = 0x58,
+	[KEY_F1] = 0x05,
+	[KEY_F2] = 0x06,
+	[KEY_F3] = 0x04,
+	[KEY_F4] = 0x0C,
+	[KEY_F5] = 0x03,
+	[KEY_F6] = 0x0B,
+	[KEY_F7] = 0x83,
+	[KEY_F8] = 0x0A,
+	[KEY_F9] = 0x01,
+	[KEY_F10] = 0x09,
+	[KEY_F11] = 0x78,
+	[KEY_F12] = 0x07,
+	[KEY_LEFTCTRL] = 0x14,
+	[KEY_LEFTSHIFT] = 0x12,
+	[KEY_LEFTALT] = 0x11,
+	[KEY_RIGHTCTRL] = 0x18,
+	[KEY_RIGHTSHIFT] = 0x59,
+	[KEY_RIGHTALT] = 0x17,
+	[KEY_INSERT] = 0x81,
+	[KEY_PAUSE] = 0x82,
+	[KEY_SYSRQ] = 0x84,
+	[KEY_SCROLLLOCK] = 0x7E,
+	[KEY_DELETE] = 0x85,
+	[KEY_LEFT] = 0x86,
+	[KEY_HOME] = 0x87,
+	[KEY_END] = 0x88,
+	[KEY_UP] = 0x89,
+	[KEY_DOWN] = 0x8A,
+	[KEY_PAGEUP] = 0x8B,
+	[KEY_PAGEDOWN] = 0x8C,
+	[KEY_RIGHT] = 0x8D,
+	[KEY_NUMLOCK] = 0x77,
+	[KEY_KPSLASH] = 0x80,
+	[KEY_KPASTERISK] = 0x7C,
+	[KEY_KPMINUS] = 0x7B,
+	[KEY_KPPLUS] = 0x79,
+	[KEY_KPENTER] = 0x19,
+	[KEY_KP1] = 0x69,
+	[KEY_KP2] = 0x72,
+	[KEY_KP3] = 0x7A,
+	[KEY_KP4] = 0x6B,
+	[KEY_KP5] = 0x73,
+	[KEY_KP6] = 0x74,
+	[KEY_KP7] = 0x6C,
+	[KEY_KP8] = 0x75,
+	[KEY_KP9] = 0x7D,
+	[KEY_KP0] = 0x70,
+	[KEY_KPDOT] = 0x71,
+};
+
+#include "special_keys_evdev.h"
+static uint8_t sym_map[128] = {
+	[KEY_A] = 'a',
+	[KEY_B] = 'b',
+	[KEY_C] = 'c',
+	[KEY_D] = 'd',
+	[KEY_E] = 'e',
+	[KEY_F] = 'f',
+	[KEY_G] = 'g',
+	[KEY_H] = 'h',
+	[KEY_I] = 'i',
+	[KEY_J] = 'j',
+	[KEY_K] = 'k',
+	[KEY_L] = 'l',
+	[KEY_M] = 'm',
+	[KEY_N] = 'n',
+	[KEY_O] = 'o',
+	[KEY_P] = 'p',
+	[KEY_Q] = 'q',
+	[KEY_R] = 'r',
+	[KEY_S] = 's',
+	[KEY_T] = 't',
+	[KEY_U] = 'u',
+	[KEY_V] = 'v',
+	[KEY_W] = 'w',
+	[KEY_X] = 'x',
+	[KEY_Y] = 'y',
+	[KEY_Z] = 'z',
+	[KEY_1] = '1',
+	[KEY_2] = '2',
+	[KEY_3] = '3',
+	[KEY_4] = '4',
+	[KEY_5] = '5',
+	[KEY_6] = '6',
+	[KEY_7] = '7',
+	[KEY_8] = '8',
+	[KEY_9] = '9',
+	[KEY_0] = '0',
+	[KEY_ENTER] = '\r',
+	[KEY_SPACE] = ' ',
+	[KEY_TAB] = '\t',
+	[KEY_BACKSPACE] = '\b',
+	[KEY_MINUS] = '-',
+	[KEY_EQUAL] = '=',
+	[KEY_LEFTBRACE] = '[',
+	[KEY_RIGHTBRACE] = ']',
+	[KEY_BACKSLASH] = '\\',
+	[KEY_SEMICOLON] = ';',
+	[KEY_APOSTROPHE] = '\'',
+	[KEY_GRAVE] = '`',
+	[KEY_COMMA] = ',',
+	[KEY_DOT] = '.',
+	[KEY_SLASH] = '/',
+	[KEY_ESC] = RENDERKEY_ESC,
+	[KEY_F1] = RENDERKEY_F1,
+	[KEY_F2] = RENDERKEY_F2,
+	[KEY_F3] = RENDERKEY_F3,
+	[KEY_F4] = RENDERKEY_F4,
+	[KEY_F5] = RENDERKEY_F5,
+	[KEY_F6] = RENDERKEY_F6,
+	[KEY_F7] = RENDERKEY_F7,
+	[KEY_F8] = RENDERKEY_F8,
+	[KEY_F9] = RENDERKEY_F9,
+	[KEY_F10] = RENDERKEY_F10,
+	[KEY_F11] = RENDERKEY_F11,
+	[KEY_F12] = RENDERKEY_F12,
+	[KEY_LEFTCTRL] = RENDERKEY_LCTRL,
+	[KEY_LEFTSHIFT] = RENDERKEY_LSHIFT,
+	[KEY_LEFTALT] = RENDERKEY_LALT,
+	[KEY_RIGHTCTRL] = RENDERKEY_RCTRL,
+	[KEY_RIGHTSHIFT] = RENDERKEY_RSHIFT,
+	[KEY_RIGHTALT] = RENDERKEY_RALT,
+	[KEY_DELETE] = RENDERKEY_DEL,
+	[KEY_LEFT] = RENDERKEY_LEFT,
+	[KEY_HOME] = RENDERKEY_HOME,
+	[KEY_END] = RENDERKEY_END,
+	[KEY_UP] = RENDERKEY_UP,
+	[KEY_DOWN] = RENDERKEY_DOWN,
+	[KEY_PAGEUP] = RENDERKEY_PAGEUP,
+	[KEY_PAGEDOWN] = RENDERKEY_PAGEDOWN,
+	[KEY_RIGHT] = RENDERKEY_RIGHT,
+	[KEY_KPSLASH] = 0x80,
+	[KEY_KPASTERISK] = 0x7C,
+	[KEY_KPMINUS] = 0x7B,
+	[KEY_KPPLUS] = 0x79,
+	[KEY_KPENTER] = 0x19,
+	[KEY_KP1] = 0x69,
+	[KEY_KP2] = 0x72,
+	[KEY_KP3] = 0x7A,
+	[KEY_KP4] = 0x6B,
+	[KEY_KP5] = 0x73,
+	[KEY_KP6] = 0x74,
+	[KEY_KP7] = 0x6C,
+	[KEY_KP8] = 0x75,
+	[KEY_KP9] = 0x7D,
+	[KEY_KP0] = 0x70,
+	[KEY_KPDOT] = 0x71,
+};
+
+static drop_handler drag_drop_handler;
+void render_set_drag_drop_handler(drop_handler handler)
+{
+	drag_drop_handler = handler;
+}
+
+char* render_joystick_type_id(int index)
+{
+	return strdup("");
+}
+
+static uint32_t overscan_top[NUM_VID_STD] = {2, 21};
+static uint32_t overscan_bot[NUM_VID_STD] = {1, 17};
+static uint32_t overscan_left[NUM_VID_STD] = {13, 13};
+static uint32_t overscan_right[NUM_VID_STD] = {14, 14};
+static vid_std video_standard = VID_NTSC;
+
+typedef enum {
+	DEV_NONE,
+	DEV_KEYBOARD,
+	DEV_MOUSE,
+	DEV_GAMEPAD
+} device_type;
+
+static int32_t mouse_x, mouse_y, mouse_accum_x, mouse_accum_y;
+static int32_t handle_event(device_type dtype, int device_index, struct input_event *event)
+{
+	switch (event->type) {
+	case EV_KEY:
+		//code is key, value is 1 for keydown, 0 for keyup
+		if (dtype == DEV_KEYBOARD && event->code < 128) {
+			//keyboard key that we might have a mapping for
+			if (event->value) {
+				handle_keydown(sym_map[event->code], scancode_map[event->code]);
+			} else {
+				handle_keyup(sym_map[event->code], scancode_map[event->code]);
+			}
+		} else if (dtype == DEV_MOUSE && event->code >= BTN_MOUSE && event->code < BTN_JOYSTICK) {
+			//mosue button
+			if (event->value) {
+				handle_mousedown(device_index, event->code - BTN_LEFT);
+			} else {
+				handle_mouseup(device_index, event->code - BTN_LEFT);
+			}
+		} else if (dtype == DEV_GAMEPAD && event->code >= BTN_GAMEPAD && event->code < BTN_DIGI) {
+			//gamepad button
+			if (event->value) {
+				handle_joydown(device_index, event->code - BTN_SOUTH);
+			} else {
+				handle_joyup(device_index, event->code - BTN_SOUTH);
+			}
+		}
+		break;
+	case EV_REL:
+		if (dtype == DEV_MOUSE) {
+			switch(event->code)
+			{
+			case REL_X:
+				mouse_accum_x += event->value;
+				break;
+			case REL_Y:
+				mouse_accum_y += event->value;
+				break;
+			}
+		}
+		break;
+	case EV_ABS:
+		//TODO: Handle joystick axis/hat motion, absolute mouse movement
+		break;
+	case EV_SYN:
+		if (dtype == DEV_MOUSE && (mouse_accum_x || mouse_accum_y)) {
+			mouse_x += mouse_accum_x;
+			mouse_y += mouse_accum_y;
+			if (mouse_x < 0) {
+				mouse_x = 0;
+			} else if (mouse_x >= main_width) {
+				mouse_x = main_width - 1;
+			}
+			if (mouse_y < 0) {
+				mouse_y = 0;
+			} else if (mouse_y >= main_height) {
+				mouse_y = main_height - 1;
+			}
+			handle_mouse_moved(device_index, mouse_x, mouse_y, mouse_accum_x, mouse_accum_y);
+			mouse_accum_x = mouse_accum_y = 0;
+		}
+		break;
+	/*
+	case SDL_JOYHATMOTION:
+		handle_joy_dpad(find_joystick_index(event->jhat.which), event->jhat.hat, event->jhat.value);
+		break;
+	case SDL_JOYAXISMOTION:
+		handle_joy_axis(find_joystick_index(event->jaxis.which), event->jaxis.axis, event->jaxis.value);
+		break;*/
+	}
+	return 0;
+}
+
+#define MAX_DEVICES 16
+static int device_fds[MAX_DEVICES];
+static device_type device_types[MAX_DEVICES];
+static int cur_devices;
+
+static void drain_events()
+{
+	struct input_event events[64];
+	int index_by_type[3] = {0,0,0};
+	for (int i = 0; i < cur_devices; i++)
+	{
+		int bytes = sizeof(events);
+		int device_index = index_by_type[device_types[i]-1]++;
+		while (bytes == sizeof(events))
+		{
+			bytes = read(device_fds[i], events, sizeof(events));
+			if (bytes > 0) {
+				int num_events = bytes / sizeof(events[0]);
+				for (int j = 0; j < num_events; j++)
+				{
+					handle_event(device_types[i], device_index, events + j);
+				}
+			} else if (bytes < 0 && errno != EAGAIN && errno != EWOULDBLOCK) {
+				perror("Failed to read evdev events");
+			}
+		}
+	}
+}
+
+static char *vid_std_names[NUM_VID_STD] = {"ntsc", "pal"};
+
+static void init_audio()
+{
+	char *device_name = tern_find_path_default(config, "audio\0alsa_device\0", (tern_val){.ptrval="default"}, TVAL_PTR).ptrval;
+	int res = snd_pcm_open(&audio_handle, device_name, SND_PCM_STREAM_PLAYBACK, 0);
+	if (res < 0) {
+		fatal_error("Failed to open ALSA device: %s\n", snd_strerror(res));
+	}
+	
+	snd_pcm_hw_params_t *params;
+	snd_pcm_hw_params_alloca(&params);
+	res = snd_pcm_hw_params_any(audio_handle, params);
+	if (res < 0) {
+		fatal_error("No playback configurations available: %s\n", snd_strerror(res));
+	}
+	res = snd_pcm_hw_params_set_access(audio_handle, params, SND_PCM_ACCESS_RW_INTERLEAVED);
+	if (res < 0) {
+		fatal_error("Failed to set access type: %s\n", snd_strerror(res));
+	}
+	res = snd_pcm_hw_params_set_format(audio_handle, params, SND_PCM_FORMAT_S16_LE);
+	if (res < 0) {
+		//failed to set, signed 16-bit integer, try floating point
+		res = snd_pcm_hw_params_set_format(audio_handle, params, SND_PCM_FORMAT_FLOAT_LE);
+		if (res < 0) {
+			fatal_error("Failed to set an acceptable format: %s\n", snd_strerror(res));
+		}
+		mix = mix_f32;
+	} else {
+		mix = mix_s16;
+	}
+
+    char * rate_str = tern_find_path(config, "audio\0rate\0", TVAL_PTR).ptrval;
+   	sample_rate = rate_str ? atoi(rate_str) : 0;
+   	if (!sample_rate) {
+   		sample_rate = 48000;
+   	}
+    snd_pcm_hw_params_set_rate_near(audio_handle, params, &sample_rate, NULL);
+	output_channels = 2;
+	snd_pcm_hw_params_set_channels_near(audio_handle, params, &output_channels);
+
+    char * samples_str = tern_find_path(config, "audio\0buffer\0", TVAL_PTR).ptrval;
+   	buffer_samples = samples_str ? atoi(samples_str) : 0;
+   	if (!buffer_samples) {
+   		buffer_samples = 512;
+   	}
+	snd_pcm_hw_params_set_period_size_near(audio_handle, params, &buffer_samples, NULL);
+	
+	int dir = 1;
+	unsigned int periods = 2;
+	snd_pcm_hw_params_set_periods_near(audio_handle, params, &periods, &dir);
+
+	res = snd_pcm_hw_params(audio_handle, params);
+	if (res < 0) {
+		fatal_error("Failed to set ALSA hardware params: %s\n", snd_strerror(res));
+	}
+	
+	printf("Initialized audio at frequency %d with a %d sample buffer, ", (int)sample_rate, (int)buffer_samples);
+	if (mix == mix_s16) {
+		puts("signed 16-bit int format");
+	} else {
+		puts("32-bit float format");
+	}
+}
+
+int fbfd;
+uint32_t *framebuffer;
+uint32_t fb_stride;
+#ifndef DISABLE_OPENGL
+EGLDisplay egl_display;
+EGLSurface main_surface;
+uint8_t egl_setup(void)
+{
+	//Mesa wants the fbdev file descriptor as the display
+	egl_display = eglGetDisplay((EGLNativeDisplayType)fbfd);
+	if (egl_display == EGL_NO_DISPLAY) {
+		//Mali (and possibly others) seems to just want EGL_DEFAULT_DISPLAY
+		egl_display = eglGetDisplay(EGL_DEFAULT_DISPLAY);
+		if (egl_display == EGL_NO_DISPLAY) {
+			warning("eglGetDisplay failed with error %X\n", eglGetError());
+			return 0;
+		}
+	}
+	EGLint major, minor;
+	if (!eglInitialize(egl_display, &major, &minor)) {
+		warning("eglInitialize failed with error %X\n", eglGetError());
+		return 0;
+	}
+	printf("EGL version %d.%d\n", major, minor);
+	EGLint num_configs;
+	EGLConfig config;
+	EGLint const config_attribs[] = {
+		EGL_RED_SIZE, 5,
+		EGL_GREEN_SIZE, 5,
+		EGL_BLUE_SIZE, 5,
+		EGL_CONFORMANT, EGL_OPENGL_ES2_BIT,
+		EGL_NONE
+	};
+	if (!eglChooseConfig(egl_display, config_attribs, &config, 1, &num_configs)) {
+		num_configs = 0;
+		warning("eglChooseConfig failed with error %X\n", eglGetError());
+	}
+	if (!num_configs) {
+		warning("Failed to choose an EGL config\n");
+		goto error;
+	}
+	EGLint const context_attribs[] = {
+#ifdef EGL_CONTEXT_MAJOR_VERSION
+		EGL_CONTEXT_MAJOR_VERSION, 2,
+#endif
+		EGL_NONE
+	};
+	main_context = eglCreateContext(egl_display, config, EGL_NO_CONTEXT, context_attribs);
+	if (main_context == EGL_NO_CONTEXT) {
+		warning("Failed to create EGL context %X\n", eglGetError());
+		goto error;
+	}
+#ifdef USE_MALI
+	struct mali_native_window native_window = {
+		.width = main_width,
+		.height = main_height
+	};
+	main_surface = eglCreateWindowSurface(egl_display, config, &native_window, NULL);
+#else
+	main_surface = eglCreateWindowSurface(egl_display, config, (EGLNativeWindowType)NULL, NULL);
+#endif
+	if (main_surface == EGL_NO_SURFACE) {
+		warning("Failed to create EGL surface %X\n", eglGetError());
+		goto post_context_error;
+	}
+	if (eglMakeCurrent(egl_display, main_surface, main_surface, main_context)) {
+		return 1;
+	}
+	eglDestroySurface(egl_display, main_surface);
+post_context_error:
+	eglDestroyContext(egl_display, main_context);
+error:
+	eglTerminate(egl_display);
+	return 0;
+}
+#endif
+static pthread_mutex_t buffer_lock = PTHREAD_MUTEX_INITIALIZER;
+static pthread_cond_t buffer_cond = PTHREAD_COND_INITIALIZER;
+static uint8_t buffer_ready;
+static uint32_t *copy_buffer;
+static uint32_t last_width, last_width_scale, last_height, last_height_scale;
+static uint32_t max_multiple;
+
+static uint32_t mix_pixel(uint32_t last, uint32_t cur, float ratio)
+{
+	float a,b,c,d;
+	a = (last & 255) * ratio;
+	b = (last >> 8 & 255) * ratio;
+	c = (last >> 16 & 255) * ratio;
+	d = (last >> 24 & 255) * ratio;
+	ratio = 1.0f - ratio;
+	a += (cur & 255) * ratio;
+	b += (cur >> 8 & 255) * ratio;
+	c += (cur >> 16 & 255) * ratio;
+	d += (cur >> 24 & 255) * ratio;
+	return ((int)d) << 24 | ((int)c) << 16 | ((int)b) << 8 | ((int)a);
+}
+static void do_buffer_copy(void)
+{
+	uint32_t width_multiple = main_width / last_width_scale;
+	uint32_t height_multiple = main_height / last_height_scale;
+	uint32_t multiple = width_multiple < height_multiple ? width_multiple : height_multiple;
+	if (max_multiple && multiple > max_multiple) {
+		multiple = max_multiple;
+	}
+	height_multiple = last_height_scale * multiple / last_height;
+	uint32_t *cur_line = framebuffer + (main_width - last_width_scale * multiple)/2;
+	cur_line += fb_stride * (main_height - last_height_scale * multiple) / (2 * sizeof(uint32_t));
+	uint32_t *src_line = copy_buffer;
+	if (height_multiple * last_height == multiple * last_height_scale) {
+		if (last_width == last_width_scale) {
+			for (uint32_t y = 0; y < last_height; y++)
+			{
+				for (uint32_t i = 0; i < height_multiple; i++)
+				{
+					uint32_t *cur = cur_line;
+					uint32_t *src = src_line;
+					for (uint32_t x = 0; x < last_width	; x++)
+					{
+						uint32_t pixel = *(src++);
+						for (uint32_t j = 0; j < multiple; j++)
+						{
+							*(cur++) = pixel;
+						}
+					}
+					
+					cur_line += fb_stride / sizeof(uint32_t);
+				}
+				src_line += LINEBUF_SIZE;
+			}
+		} else {
+			float scale_multiple = ((float)(last_width_scale * multiple)) / (float)last_width;
+			float remaining = 0.0f;
+			uint32_t last_pixel = 0;
+			for (uint32_t y = 0; y < last_height; y++)
+			{
+				for (uint32_t i = 0; i < height_multiple; i++)
+				{
+					uint32_t *cur = cur_line;
+					uint32_t *src = src_line;
+					for (uint32_t x = 0; x < last_width	; x++)
+					{
+						uint32_t pixel = *(src++);
+						float count = scale_multiple;
+						if (remaining > 0.0f) {
+							*(cur++) = mix_pixel(last_pixel, pixel, remaining);
+							count -= 1.0f - remaining;
+						}
+						for (; count >= 1; count -= 1.0f)
+						{
+							*(cur++) = pixel;
+						}
+						remaining = count;
+						last_pixel = pixel;
+					}
+					
+					cur_line += fb_stride / sizeof(uint32_t);
+				}
+				src_line += LINEBUF_SIZE;
+			}
+		}
+	} else {
+		float height_scale = ((float)(last_height_scale * multiple)) / (float)last_height;
+		float height_remaining = 0.0f;
+		uint32_t *last_line;
+		if (last_width == last_width_scale) {
+			for (uint32_t y = 0; y < last_height; y++)
+			{
+				float hcount = height_scale;
+				if (height_remaining > 0.0f) {
+					uint32_t *cur = cur_line;
+					uint32_t *src = src_line;
+					uint32_t *last = last_line;
+					for (uint32_t x = 0; x < last_width	; x++)
+					{
+						uint32_t mixed = mix_pixel(*(last++), *(src++), height_remaining);
+						for (uint32_t j = 0; j < multiple; j++)
+						{
+							*(cur++) = mixed;
+						}
+					}
+					hcount -= 1.0f - height_remaining;
+					cur_line += fb_stride / sizeof(uint32_t);
+				}
+				for(; hcount >= 1; hcount -= 1.0f)
+				{
+					uint32_t *cur = cur_line;
+					uint32_t *src = src_line;
+					for (uint32_t x = 0; x < last_width	; x++)
+					{
+						uint32_t pixel = *(src++);
+						for (uint32_t j = 0; j < multiple; j++)
+						{
+							*(cur++) = pixel;
+						}
+					}
+					
+					cur_line += fb_stride / sizeof(uint32_t);
+				}
+				height_remaining = hcount;
+				last_line = src_line;
+				src_line += LINEBUF_SIZE;
+			}
+		} else {
+			float scale_multiple = ((float)(last_width_scale * multiple)) / (float)last_width;
+			float remaining = 0.0f;
+			uint32_t last_pixel = 0;
+			for (uint32_t y = 0; y < last_height; y++)
+			{
+				float hcount = height_scale;
+				if (height_remaining > 0.0f) {
+					uint32_t *cur = cur_line;
+					uint32_t *src = src_line;
+					uint32_t *last = last_line;
+					
+					for (uint32_t x = 0; x < last_width; x++)
+					{
+						uint32_t pixel = mix_pixel(*(last++), *(src++), height_remaining);
+						float count = scale_multiple;
+						if (remaining > 0.0f) {
+							*(cur++) = mix_pixel(last_pixel, pixel, remaining);
+							count -= 1.0f - remaining;
+						}
+						for (; count >= 1.0f; count -= 1.0f)
+						{
+							*(cur++) = pixel;
+						}
+						remaining = count;
+						last_pixel = pixel;
+					}
+					hcount -= 1.0f - height_remaining;
+					cur_line += fb_stride / sizeof(uint32_t);
+				}
+				
+				for (; hcount >= 1.0f; hcount -= 1.0f)
+				{
+					uint32_t *cur = cur_line;
+					uint32_t *src = src_line;
+					for (uint32_t x = 0; x < last_width	; x++)
+					{
+						uint32_t pixel = *(src++);
+						float count = scale_multiple;
+						if (remaining > 0.0f) {
+							*(cur++) = mix_pixel(last_pixel, pixel, remaining);
+							count -= 1.0f - remaining;
+						}
+						for (; count >= 1; count -= 1.0f)
+						{
+							*(cur++) = pixel;
+						}
+						remaining = count;
+						last_pixel = pixel;
+					}
+					
+					cur_line += fb_stride / sizeof(uint32_t);
+				}
+				height_remaining = hcount;
+				last_line = src_line;
+				src_line += LINEBUF_SIZE;
+			}
+		}
+	}
+}
+static void *buffer_copy(void *data)
+{
+	pthread_mutex_lock(&buffer_lock);
+	for(;;)
+	{
+		while (!buffer_ready)
+		{
+			pthread_cond_wait(&buffer_cond, &buffer_lock);
+		}
+		buffer_ready = 0;
+		do_buffer_copy();
+	}
+	return 0;
+}
+
+static pthread_t buffer_copy_handle;
+static uint8_t copy_use_thread;
+void window_setup(void)
+{
+	fbfd = open("/dev/fb0", O_RDWR);
+	struct fb_fix_screeninfo fixInfo;
+	struct fb_var_screeninfo varInfo;
+	ioctl(fbfd, FBIOGET_FSCREENINFO, &fixInfo);
+	ioctl(fbfd, FBIOGET_VSCREENINFO, &varInfo);
+	printf("Resolution: %d x %d\n", varInfo.xres, varInfo.yres);
+	printf("Framebuffer size: %d, line stride: %d\n", fixInfo.smem_len, fixInfo.line_length);
+	main_width = varInfo.xres;
+	main_height = varInfo.yres;
+	fb_stride = fixInfo.line_length;
+	tern_val def = {.ptrval = "audio"};
+	char *sync_src = tern_find_path_default(config, "system\0sync_source\0", def, TVAL_PTR).ptrval;
+		
+	const char *vsync;
+	def.ptrval = "off";
+	vsync = tern_find_path_default(config, "video\0vsync\0", def, TVAL_PTR).ptrval;
+	
+	
+	tern_node *video = tern_find_node(config, "video");
+	if (video)
+	{
+		for (int i = 0; i < NUM_VID_STD; i++)
+		{
+			tern_node *std_settings = tern_find_node(video, vid_std_names[i]);
+			if (std_settings) {
+				char *val = tern_find_path_default(std_settings, "overscan\0top\0", (tern_val){.ptrval = NULL}, TVAL_PTR).ptrval;
+				if (val) {
+					overscan_top[i] = atoi(val);
+				}
+				val = tern_find_path_default(std_settings, "overscan\0bottom\0", (tern_val){.ptrval = NULL}, TVAL_PTR).ptrval;
+				if (val) {
+					overscan_bot[i] = atoi(val);
+				}
+				val = tern_find_path_default(std_settings, "overscan\0left\0", (tern_val){.ptrval = NULL}, TVAL_PTR).ptrval;
+				if (val) {
+					overscan_left[i] = atoi(val);
+				}
+				val = tern_find_path_default(std_settings, "overscan\0right\0", (tern_val){.ptrval = NULL}, TVAL_PTR).ptrval;
+				if (val) {
+					overscan_right[i] = atoi(val);
+				}
+			}
+		}
+	}
+	render_gl = 0;
+#ifndef DISABLE_OPENGL
+	char *gl_enabled_str = tern_find_path_default(config, "video\0gl\0", def, TVAL_PTR).ptrval;
+	uint8_t gl_enabled = strcmp(gl_enabled_str, "off") != 0;
+	if (gl_enabled)
+	{
+		render_gl = egl_setup();
+		blue_shift = 16;
+		green_shift = 8;
+		red_shift = 0;
+	}
+	if (!render_gl) {
+#endif
+	framebuffer = mmap(NULL, fixInfo.smem_len, PROT_READ|PROT_WRITE, MAP_SHARED, fbfd, 0);
+	red_shift = varInfo.red.offset;
+	green_shift = varInfo.green.offset;
+	blue_shift = varInfo.blue.offset;
+	def.ptrval = "0";
+	max_multiple = atoi(tern_find_path_default(config, "video\0fbdev\0max_multiple\0", def, TVAL_PTR).ptrval);
+	def.ptrval = "true";
+	copy_use_thread = strcmp(tern_find_path_default(config, "video\0fbdev\0use_thread\0", def, TVAL_PTR).ptrval, "false");
+	if (copy_use_thread) {
+		pthread_create(&buffer_copy_handle, NULL, buffer_copy, NULL);
+	}
+#ifndef DISABLE_OPENGL
+	}
+#endif
+	
+	update_aspect();
+	render_alloc_surfaces();
+	def.ptrval = "off";
+	scanlines = !strcmp(tern_find_path_default(config, "video\0scanlines\0", def, TVAL_PTR).ptrval, "on");
+}
+
+void restore_tty(void)
+{
+	ioctl(STDIN_FILENO, KDSETMODE, KD_TEXT);
+	for (int i = 0; i < cur_devices; i++)
+	{
+		if (device_types[i] == DEV_KEYBOARD) {
+			ioctl(device_fds[i], EVIOCGRAB, 0);
+		}
+	}
+}
+
+void render_init(int width, int height, char * title, uint8_t fullscreen)
+{
+	if (height <= 0) {
+		float aspect = config_aspect() > 0.0f ? config_aspect() : 4.0f/3.0f;
+		height = ((float)width / aspect) + 0.5f;
+	}
+	printf("width: %d, height: %d\n", width, height);
+	windowed_width = width;
+	windowed_height = height;
+	
+	main_width = width;
+	main_height = height;
+	
+	caption = title;
+	
+	if (isatty(STDIN_FILENO)) {
+		ioctl(STDIN_FILENO, KDSETMODE, KD_GRAPHICS);
+		atexit(restore_tty);
+	}
+	
+	window_setup();
+	
+	init_audio();
+	
+	render_set_video_standard(VID_NTSC);
+	
+	DIR *d = opendir("/dev/input");
+	struct dirent* entry;
+	int joystick_counter = 0;
+	while ((entry = readdir(d)) && cur_devices < MAX_DEVICES)
+	{
+		if (!strncmp("event", entry->d_name, strlen("event"))) {
+			char *filename = alloc_concat("/dev/input/", entry->d_name);
+			int fd = open(filename, O_RDONLY);
+			if (fd == -1) {
+				int errnum = errno;
+				warning("Failed to open evdev device %s for reading: %s\n", filename, strerror(errnum));
+				free(filename);
+				continue;
+			}
+			
+			unsigned long bits;
+			if (-1 == ioctl(fd, EVIOCGBIT(0, sizeof(bits)), &bits)) {
+				int errnum = errno;
+				warning("Failed get capability bits from evdev device %s: %s\n", filename, strerror(errnum));
+				free(filename);
+				close(fd);
+				continue;
+			}
+			if (!(1 & bits >> EV_KEY)) {
+				//if it doesn't support key events we don't care about it
+				free(filename);
+				close(fd);
+				continue;
+			}
+			unsigned long button_bits[(BTN_THUMBR+8*sizeof(long))/(8*sizeof(long))];
+			int res = ioctl(fd, EVIOCGBIT(EV_KEY, sizeof(button_bits)), button_bits);
+			if (-1 == res) {
+				int errnum = errno;
+				warning("Failed get capability bits from evdev device %s: %s\n", filename, strerror(errnum));
+				free(filename);
+				close(fd);
+				continue;
+			}
+			int to_check[] = {KEY_ENTER, BTN_MOUSE, BTN_GAMEPAD};
+			device_type dtype = DEV_NONE;
+			for (int i = 0; i < 3; i++)
+			{
+				if (1 & button_bits[to_check[i]/(8*sizeof(button_bits[0]))] >> to_check[i]%(8*sizeof(button_bits[0]))) {
+					dtype = i + 1;
+				}
+			}
+			if (dtype == DEV_NONE) {
+				close(fd);
+			} else {
+				device_fds[cur_devices] = fd;
+				device_types[cur_devices] = dtype;
+				char name[1024];
+				char *names[] = {"Keyboard", "Mouse", "Gamepad"};
+				ioctl(fd, EVIOCGNAME(sizeof(name)), name);
+				printf("%s is a %s\n%s\n", filename, names[dtype - 1], name);
+				
+				if (dtype == DEV_GAMEPAD) {
+					handle_joy_added(joystick_counter++);
+				} else if (dtype == DEV_KEYBOARD && isatty(STDIN_FILENO)) {
+					ioctl(fd, EVIOCGRAB, 1);
+				}
+				
+				//set FD to non-blocking mode for event polling
+				fcntl(fd, F_SETFL, O_NONBLOCK);
+				cur_devices++;
+			}
+			free(filename);
+		}
+	}
+
+	atexit(render_quit);
+}
+#include<unistd.h>
+static int in_toggle;
+static void update_source(audio_source *src, double rc, uint8_t sync_changed)
+{
+	double alpha = src->dt / (src->dt + rc);
+	int32_t lowpass_alpha = (int32_t)(((double)0x10000) * alpha);
+	src->lowpass_alpha = lowpass_alpha;
+}
+
+void render_config_updated(void)
+{
+	
+	free_surfaces();
+#ifndef DISABLE_OPENGL
+	if (render_gl) {
+		/*if (on_context_destroyed) {
+			on_context_destroyed();
+		}*/
+		gl_teardown();
+		//FIXME: EGL equivalent
+		//SDL_GL_DeleteContext(main_context);
+	} else {
+#endif
+#ifndef DISABLE_OPENGL
+	}
+#endif
+	//FIXME: EGL equivalent
+	//SDL_DestroyWindow(main_window);
+	drain_events();
+	
+	char *config_width = tern_find_path(config, "video\0width\0", TVAL_PTR).ptrval;
+	if (config_width) {
+		windowed_width = atoi(config_width);
+	}
+	char *config_height = tern_find_path(config, "video\0height\0", TVAL_PTR).ptrval;
+	if (config_height) {
+		windowed_height = atoi(config_height);
+	} else {
+		float aspect = config_aspect() > 0.0f ? config_aspect() : 4.0f/3.0f;
+		windowed_height = ((float)windowed_width / aspect) + 0.5f;
+	}
+	
+	window_setup();
+	update_aspect();
+#ifndef DISABLE_OPENGL
+	//need to check render_gl again after window_setup as render option could have changed
+	/*if (render_gl && on_context_created) {
+		on_context_created();
+	}*/
+#endif
+
+	render_close_audio();
+	quitting = 0;
+	init_audio();
+	render_set_video_standard(video_standard);
+	
+	double lowpass_cutoff = get_lowpass_cutoff(config);
+	double rc = (1.0 / lowpass_cutoff) / (2.0 * M_PI);
+	for (uint8_t i = 0; i < num_audio_sources; i++)
+	{
+		update_source(audio_sources[i], rc, 0);
+	}
+	for (uint8_t i = 0; i < num_inactive_audio_sources; i++)
+	{
+		update_source(inactive_audio_sources[i], rc, 0);
+	}
+	drain_events();
+}
+
+void render_set_video_standard(vid_std std)
+{
+	video_standard = std;
+}
+
+void render_update_caption(char *title)
+{
+	caption = title;
+	free(fps_caption);
+	fps_caption = NULL;
+}
+
+static char *screenshot_path;
+void render_save_screenshot(char *path)
+{
+	if (screenshot_path) {
+		free(screenshot_path);
+	}
+	screenshot_path = path;
+}
+
+uint8_t render_create_window(char *caption, uint32_t width, uint32_t height, window_close_handler close_handler)
+{
+	//not supported under fbdev
+	return 0;
+}
+
+void render_destroy_window(uint8_t which)
+{
+	//not supported under fbdev
+}
+
+static uint8_t last_fb;
+static uint32_t texture_off;
+uint32_t *render_get_framebuffer(uint8_t which, int *pitch)
+{
+	if (max_multiple == 1 && !render_gl) {
+		if (last_fb != which) {
+			*pitch = fb_stride * 2;
+			return framebuffer + (which == FRAMEBUFFER_EVEN ? fb_stride / sizeof(uint32_t) : 0);
+		}
+		*pitch = fb_stride;
+		return framebuffer;
+	}
+	if (!render_gl && last_fb != which) {
+		*pitch = LINEBUF_SIZE * sizeof(uint32_t) * 2;
+		return texture_buf + texture_off + (which == FRAMEBUFFER_EVEN ? LINEBUF_SIZE : 0);
+	}
+	*pitch = LINEBUF_SIZE * sizeof(uint32_t);
+	return texture_buf + texture_off;
+}
+
+uint8_t events_processed;
+#ifdef __ANDROID__
+#define FPS_INTERVAL 10000
+#else
+#define FPS_INTERVAL 1000
+#endif
+
+static uint8_t interlaced;
+void render_update_display();
+void render_framebuffer_updated(uint8_t which, int width)
+{
+	uint32_t height = which <= FRAMEBUFFER_EVEN 
+		? (video_standard == VID_NTSC ? 243 : 294) - (overscan_top[video_standard] + overscan_bot[video_standard])
+		: 240;
+	width -= overscan_left[video_standard] + overscan_right[video_standard];
+#ifndef DISABLE_OPENGL
+	if (render_gl && which <= FRAMEBUFFER_EVEN) {
+		last_width = width;
+		glBindTexture(GL_TEXTURE_2D, textures[which]);
+		glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, LINEBUF_SIZE, height, SRC_FORMAT, GL_UNSIGNED_BYTE, texture_buf + overscan_left[video_standard] + LINEBUF_SIZE * overscan_top[video_standard]);
+		render_update_display();
+		last_height = height;
+	} else {
+#endif
+	if (max_multiple != 1) {
+		if (copy_use_thread) {
+			pthread_mutex_lock(&buffer_lock);
+				buffer_ready = 1;
+				last_width = width;
+				last_width_scale = LINEBUF_SIZE - (overscan_left[video_standard] + overscan_right[video_standard]);
+				last_height = last_height_scale = height;
+				copy_buffer = texture_buf + texture_off + overscan_left[video_standard] + LINEBUF_SIZE * overscan_top[video_standard];
+				if (which != last_fb) {
+					last_height *= 2;
+					copy_buffer += LINEBUF_SIZE * overscan_top[video_standard];
+					uint32_t *src = texture_buf + (texture_off ? 0 : LINEBUF_SIZE * MAX_FB_LINES) + overscan_left[video_standard] + LINEBUF_SIZE * overscan_top[video_standard] + LINEBUF_SIZE * overscan_top[video_standard];
+					uint32_t *dst = copy_buffer;
+					if (which == FRAMEBUFFER_ODD) {
+						src += LINEBUF_SIZE;
+						dst += LINEBUF_SIZE;
+					}
+					for (int i = 0; i < height; i++)
+					{
+						memcpy(dst, src, width * sizeof(uint32_t));
+						src += LINEBUF_SIZE * 2;
+						dst += LINEBUF_SIZE * 2;
+					}
+				}
+				texture_off = texture_off ? 0 : LINEBUF_SIZE * MAX_FB_LINES;
+				pthread_cond_signal(&buffer_cond);
+			pthread_mutex_unlock(&buffer_lock);
+		} else {
+			last_width = width;
+			last_width_scale = LINEBUF_SIZE - (overscan_left[video_standard] + overscan_right[video_standard]);
+			last_height = last_height_scale = height;
+			copy_buffer = texture_buf + texture_off + overscan_left[video_standard] + LINEBUF_SIZE * overscan_top[video_standard];
+			if (which != last_fb) {
+				last_height *= 2;
+				copy_buffer += LINEBUF_SIZE * overscan_top[video_standard];
+			}
+			do_buffer_copy();
+		}
+	}
+	last_fb = which;
+	if (!events_processed) {
+		process_events();
+	}
+	events_processed = 0;
+#ifndef DISABLE_OPENGL
+	}
+#endif
+}
+
+void render_update_display()
+{
+#ifndef DISABLE_OPENGL
+	if (render_gl) {
+		glClearColor(0.0f, 0.0f, 0.0f, 1.0f);
+		glClear(GL_COLOR_BUFFER_BIT);
+
+		glUseProgram(program);
+		glActiveTexture(GL_TEXTURE0);
+		glBindTexture(GL_TEXTURE_2D, textures[0]);
+		glUniform1i(un_textures[0], 0);
+
+		glActiveTexture(GL_TEXTURE1);
+		glBindTexture(GL_TEXTURE_2D, textures[interlaced ? 1 : scanlines ? 2 : 0]);
+		glUniform1i(un_textures[1], 1);
+
+		glUniform1f(un_width, render_emulated_width());
+		glUniform1f(un_height, last_height);
+
+		glBindBuffer(GL_ARRAY_BUFFER, buffers[0]);
+		glVertexAttribPointer(at_pos, 2, GL_FLOAT, GL_FALSE, sizeof(GLfloat[2]), (void *)0);
+		glEnableVertexAttribArray(at_pos);
+
+		glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, buffers[1]);
+		glDrawElements(GL_TRIANGLE_STRIP, 4, GL_UNSIGNED_SHORT, (void *)0);
+
+		glDisableVertexAttribArray(at_pos);
+		
+		/*if (render_ui) {
+			render_ui();
+		}*/
+
+		eglSwapBuffers(egl_display, main_surface);
+	}
+#endif
+	if (!events_processed) {
+		process_events();
+	}
+	events_processed = 0;
+}
+
+uint32_t render_emulated_width()
+{
+	return last_width - overscan_left[video_standard] - overscan_right[video_standard];
+}
+
+uint32_t render_emulated_height()
+{
+	return (video_standard == VID_NTSC ? 243 : 294) - overscan_top[video_standard] - overscan_bot[video_standard];
+}
+
+uint32_t render_overscan_left()
+{
+	return overscan_left[video_standard];
+}
+
+uint32_t render_overscan_top()
+{
+	return overscan_top[video_standard];
+}
+
+void render_wait_quit(vdp_context * context)
+{
+	for(;;)
+	{
+		drain_events();
+		sleep(1);
+	}
+}
+
+int render_lookup_button(char *name)
+{
+	static tern_node *button_lookup;
+	if (!button_lookup) {
+		//xbox/sdl style names
+		button_lookup = tern_insert_int(button_lookup, "a", BTN_SOUTH);
+		button_lookup = tern_insert_int(button_lookup, "b", BTN_EAST);
+		button_lookup = tern_insert_int(button_lookup, "x", BTN_WEST);
+		button_lookup = tern_insert_int(button_lookup, "y", BTN_NORTH);
+		button_lookup = tern_insert_int(button_lookup, "back", BTN_SELECT);
+		button_lookup = tern_insert_int(button_lookup, "start", BTN_START);
+		button_lookup = tern_insert_int(button_lookup, "guid", BTN_MODE);
+		button_lookup = tern_insert_int(button_lookup, "leftshoulder", BTN_TL);
+		button_lookup = tern_insert_int(button_lookup, "rightshoulder", BTN_TR);
+		button_lookup = tern_insert_int(button_lookup, "leftstick", BTN_THUMBL);
+		button_lookup = tern_insert_int(button_lookup, "rightstick", BTN_THUMBR);
+		//alternative Playstation-style names
+		button_lookup = tern_insert_int(button_lookup, "cross", BTN_SOUTH);
+		button_lookup = tern_insert_int(button_lookup, "circle", BTN_EAST);
+		button_lookup = tern_insert_int(button_lookup, "square", BTN_WEST);
+		button_lookup = tern_insert_int(button_lookup, "triangle", BTN_NORTH);
+		button_lookup = tern_insert_int(button_lookup, "share", BTN_SELECT);
+		button_lookup = tern_insert_int(button_lookup, "select", BTN_SELECT);
+		button_lookup = tern_insert_int(button_lookup, "options", BTN_START);
+		button_lookup = tern_insert_int(button_lookup, "l1", BTN_TL);
+		button_lookup = tern_insert_int(button_lookup, "r1", BTN_TR);
+		button_lookup = tern_insert_int(button_lookup, "l3", BTN_THUMBL);
+		button_lookup = tern_insert_int(button_lookup, "r3", BTN_THUMBR);
+	}
+	return (int)tern_find_int(button_lookup, name, KEY_CNT);
+}
+
+int render_lookup_axis(char *name)
+{
+	static tern_node *axis_lookup;
+	if (!axis_lookup) {
+		//xbox/sdl style names
+		axis_lookup = tern_insert_int(axis_lookup, "leftx", ABS_X);
+		axis_lookup = tern_insert_int(axis_lookup, "lefty", ABS_Y);
+		axis_lookup = tern_insert_int(axis_lookup, "lefttrigger", ABS_Z);
+		axis_lookup = tern_insert_int(axis_lookup, "rightx", ABS_RX);
+		axis_lookup = tern_insert_int(axis_lookup, "righty", ABS_RY);
+		axis_lookup = tern_insert_int(axis_lookup, "righttrigger", ABS_RZ);
+		//alternative Playstation-style names
+		axis_lookup = tern_insert_int(axis_lookup, "l2", ABS_Z);
+		axis_lookup = tern_insert_int(axis_lookup, "r2", ABS_RZ);
+	}
+	return (int)tern_find_int(axis_lookup, name, ABS_CNT);
+}
+
+int32_t render_translate_input_name(int32_t controller, char *name, uint8_t is_axis)
+{
+	tern_node *button_lookup, *axis_lookup;
+	if (is_axis) {
+		int axis = render_lookup_axis(name);
+		if (axis == ABS_CNT) {
+			return RENDER_INVALID_NAME;
+		}
+		return RENDER_AXIS_BIT | axis;
+	} else {
+		int button = render_lookup_button(name);
+		if (button != KEY_CNT) {
+			return button;
+		}
+		if (!strcmp("dpup", name)) {
+			return RENDER_DPAD_BIT | 1;
+		}
+		if (!strcmp("dpdown", name)) {
+			return RENDER_DPAD_BIT | 4;
+		}
+		if (!strcmp("dpdleft", name)) {
+			return RENDER_DPAD_BIT | 8;
+		}
+		if (!strcmp("dpright", name)) {
+			return RENDER_DPAD_BIT | 2;
+		}
+		return RENDER_INVALID_NAME;
+	}
+}
+
+int32_t render_dpad_part(int32_t input)
+{
+	return input >> 4 & 0xFFFFFF;
+}
+
+uint8_t render_direction_part(int32_t input)
+{
+	return input & 0xF;
+}
+
+int32_t render_axis_part(int32_t input)
+{
+	return input & 0xFFFFFFF;
+}
+
+void process_events()
+{
+	if (events_processed > MAX_EVENT_POLL_PER_FRAME) {
+		return;
+	}
+	drain_events();
+	events_processed++;
+}
+
+#define TOGGLE_MIN_DELAY 250
+void render_toggle_fullscreen()
+{
+	//always fullscreen in fbdev
+}
+
+uint32_t render_audio_buffer()
+{
+	return buffer_samples;
+}
+
+uint32_t render_sample_rate()
+{
+	return sample_rate;
+}
+
+void render_errorbox(char *title, char *message)
+{
+	
+}
+
+void render_warnbox(char *title, char *message)
+{
+	
+}
+
+void render_infobox(char *title, char *message)
+{
+	
+}
+
+uint8_t render_has_gl(void)
+{
+	return render_gl;
+}
+
+uint8_t render_get_active_framebuffer(void)
+{
+	return FRAMEBUFFER_ODD;
+}
--- a/render_sdl.c	Sat Jan 05 00:58:08 2019 -0800
+++ b/render_sdl.c	Sat Jan 15 13:15:21 2022 -0800
@@ -13,6 +13,7 @@
 #include "genesis.h"
 #include "bindings.h"
 #include "util.h"
+#include "paths.h"
 #include "ppm.h"
 #include "png.h"
 #include "config.h"
@@ -45,146 +46,56 @@
 
 static uint32_t last_frame = 0;
 
-static uint32_t buffer_samples, sample_rate;
-static uint32_t missing_count;
-
-static SDL_mutex * audio_mutex;
-static SDL_cond * audio_ready;
+static SDL_mutex *audio_mutex, *frame_mutex, *free_buffer_mutex;
+static SDL_cond *audio_ready, *frame_ready;
 static uint8_t quitting = 0;
 
-struct audio_source {
-	SDL_cond *cond;
-	int16_t  *front;
-	int16_t  *back;
-	double   dt;
-	uint64_t buffer_fraction;
-	uint64_t buffer_inc;
-	uint32_t buffer_pos;
-	uint32_t read_start;
-	uint32_t read_end;
-	uint32_t lowpass_alpha;
-	uint32_t mask;
-	int16_t  last_left;
-	int16_t  last_right;
-	uint8_t  num_channels;
-	uint8_t  front_populated;
+enum {
+	SYNC_AUDIO,
+	SYNC_AUDIO_THREAD,
+	SYNC_VIDEO,
+	SYNC_EXTERNAL
 };
 
-static audio_source *audio_sources[8];
-static audio_source *inactive_audio_sources[8];
-static uint8_t num_audio_sources;
-static uint8_t num_inactive_audio_sources;
-static uint8_t sync_to_audio;
+static uint8_t sync_src;
 static uint32_t min_buffered;
 
-typedef int32_t (*mix_func)(audio_source *audio, void *vstream, int len);
+uint32_t **frame_buffers;
+uint32_t num_buffers;
+uint32_t buffer_storage;
 
-static int32_t mix_s16(audio_source *audio, void *vstream, int len)
+uint32_t render_min_buffered(void)
 {
-	int samples = len/(sizeof(int16_t)*2);
-	int16_t *stream = vstream;
-	int16_t *end = stream + 2*samples;
-	int16_t *src = audio->front;
-	uint32_t i = audio->read_start;
-	uint32_t i_end = audio->read_end;
-	int16_t *cur = stream;
-	if (audio->num_channels == 1) {
-		while (cur < end && i != i_end)
-		{
-			*(cur++) += src[i];
-			*(cur++) += src[i++];
-			i &= audio->mask;
-		}
-	} else {
-		while (cur < end && i != i_end)
-		{
-			*(cur++) += src[i++];
-			*(cur++) += src[i++];
-			i &= audio->mask;
-		}
-	}
-	
-	if (cur != end) {
-		printf("Underflow of %d samples, read_start: %d, read_end: %d, mask: %X\n", (int)(end-cur)/2, audio->read_start, audio->read_end, audio->mask);
-	}
-	if (!sync_to_audio) {
-		audio->read_start = i;
-	}
-	if (cur != end) {
-		//printf("Underflow of %d samples, read_start: %d, read_end: %d, mask: %X\n", (int)(end-cur)/2, audio->read_start, audio->read_end, audio->mask);
-		return (cur-end)/2;
-	} else {
-		return ((i_end - i) & audio->mask) / audio->num_channels;
-	}
+	return min_buffered;
 }
 
-static int32_t mix_f32(audio_source *audio, void *vstream, int len)
+uint8_t render_is_audio_sync(void)
 {
-	int samples = len/(sizeof(float)*2);
-	float *stream = vstream;
-	float *end = stream + 2*samples;
-	int16_t *src = audio->front;
-	uint32_t i = audio->read_start;
-	uint32_t i_end = audio->read_end;
-	float *cur = stream;
-	if (audio->num_channels == 1) {
-		while (cur < end && i != i_end)
-		{
-			*(cur++) += ((float)src[i]) / 0x7FFF;
-			*(cur++) += ((float)src[i++]) / 0x7FFF;
-			i &= audio->mask;
-		}
-	} else {
-		while(cur < end && i != i_end)
-		{
-			*(cur++) += ((float)src[i++]) / 0x7FFF;
-			*(cur++) += ((float)src[i++]) / 0x7FFF;
-			i &= audio->mask;
-		}
-	}
-	if (!sync_to_audio) {
-		audio->read_start = i;
-	}
-	if (cur != end) {
-		printf("Underflow of %d samples, read_start: %d, read_end: %d, mask: %X\n", (int)(end-cur)/2, audio->read_start, audio->read_end, audio->mask);
-		return (cur-end)/2;
-	} else {
-		return ((i_end - i) & audio->mask) / audio->num_channels;
-	}
+	return sync_src < SYNC_VIDEO;
 }
 
-static int32_t mix_null(audio_source *audio, void *vstream, int len)
+uint8_t render_should_release_on_exit(void)
 {
-	return 0;
+	return sync_src != SYNC_AUDIO_THREAD;
 }
 
-static mix_func mix;
+void render_buffer_consumed(audio_source *src)
+{
+	SDL_CondSignal(src->opaque);
+}
 
 static void audio_callback(void * userdata, uint8_t *byte_stream, int len)
 {
-	uint8_t num_populated;
-	memset(byte_stream, 0, len);
 	SDL_LockMutex(audio_mutex);
+		uint8_t all_ready;
 		do {
-			num_populated = 0;
-			for (uint8_t i = 0; i < num_audio_sources; i++)
-			{
-				if (audio_sources[i]->front_populated) {
-					num_populated++;
-				}
-			}
-			if (!quitting && num_populated < num_audio_sources) {
-				fflush(stdout);
+			all_ready = all_sources_ready();
+			if (!quitting && !all_ready) {
 				SDL_CondWait(audio_ready, audio_mutex);
 			}
-		} while(!quitting && num_populated < num_audio_sources);
+		} while(!quitting && !all_ready);
 		if (!quitting) {
-			for (uint8_t i = 0; i < num_audio_sources; i++)
-			{
-				mix(audio_sources[i], byte_stream, len);
-				audio_sources[i]->front_populated = 0;
-				SDL_CondSignal(audio_sources[i]->cond);
-			}
+			mix_and_convert(byte_stream, len, NULL);
 		}
 	SDL_UnlockMutex(audio_mutex);
 }
@@ -199,35 +110,33 @@
 static uint32_t min_remaining_buffer;
 static void audio_callback_drc(void *userData, uint8_t *byte_stream, int len)
 {
-	memset(byte_stream, 0, len);
 	if (cur_min_buffered < 0) {
 		//underflow last frame, but main thread hasn't gotten a chance to call SDL_PauseAudio yet
 		return;
 	}
-	cur_min_buffered = 0x7FFFFFFF;
-	min_remaining_buffer = 0xFFFFFFFF;
-	for (uint8_t i = 0; i < num_audio_sources; i++)
-	{
-		
-		int32_t buffered = mix(audio_sources[i], byte_stream, len);
-		cur_min_buffered = buffered < cur_min_buffered ? buffered : cur_min_buffered;
-		uint32_t remaining = (audio_sources[i]->mask + 1)/audio_sources[i]->num_channels - buffered;
-		min_remaining_buffer = remaining < min_remaining_buffer ? remaining : min_remaining_buffer;
-	}
+	cur_min_buffered = mix_and_convert(byte_stream, len, &min_remaining_buffer);
 }
 
-static void lock_audio()
+static void audio_callback_run_on_audio(void *user_data, uint8_t *byte_stream, int len)
 {
-	if (sync_to_audio) {
+	if (current_system) {
+		current_system->resume_context(current_system);
+	}
+	mix_and_convert(byte_stream, len, NULL);
+}
+
+void render_lock_audio()
+{
+	if (sync_src == SYNC_AUDIO) {
 		SDL_LockMutex(audio_mutex);
 	} else {
 		SDL_LockAudio();
 	}
 }
 
-static void unlock_audio()
+void render_unlock_audio()
 {
-	if (sync_to_audio) {
+	if (sync_src == SYNC_AUDIO) {
 		SDL_UnlockMutex(audio_mutex);
 	} else {
 		SDL_UnlockAudio();
@@ -241,112 +150,86 @@
 		SDL_CondSignal(audio_ready);
 	SDL_UnlockMutex(audio_mutex);
 	SDL_CloseAudio();
+	/*
+	FIXME: move this to render_audio.c
+	if (mix_buf) {
+		free(mix_buf);
+		mix_buf = NULL;
+	}
+	*/
 }
 
-#define BUFFER_INC_RES 0x40000000UL
-
-void render_audio_adjust_clock(audio_source *src, uint64_t master_clock, uint64_t sample_divider)
+void *render_new_audio_opaque(void)
 {
-	src->buffer_inc = ((BUFFER_INC_RES * (uint64_t)sample_rate) / master_clock) * sample_divider;
+	return SDL_CreateCond();
 }
 
-audio_source *render_audio_source(uint64_t master_clock, uint64_t sample_divider, uint8_t channels)
+void render_free_audio_opaque(void *opaque)
 {
-	audio_source *ret = NULL;
-	uint32_t alloc_size = sync_to_audio ? channels * buffer_samples : nearest_pow2(min_buffered * 4 * channels);
-	lock_audio();
-		if (num_audio_sources < 8) {
-			ret = malloc(sizeof(audio_source));
-			ret->back = malloc(alloc_size * sizeof(int16_t));
-			ret->front = sync_to_audio ? malloc(alloc_size * sizeof(int16_t)) : ret->back;
-			ret->front_populated = 0;
-			ret->cond = SDL_CreateCond();
-			ret->num_channels = channels;
-			audio_sources[num_audio_sources++] = ret;
-		}
-	unlock_audio();
-	if (!ret) {
-		fatal_error("Too many audio sources!");
-	} else {
-		render_audio_adjust_clock(ret, master_clock, sample_divider);
-		double lowpass_cutoff = get_lowpass_cutoff(config);
-		double rc = (1.0 / lowpass_cutoff) / (2.0 * M_PI);
-		ret->dt = 1.0 / ((double)master_clock / (double)(sample_divider));
-		double alpha = ret->dt / (ret->dt + rc);
-		ret->lowpass_alpha = (int32_t)(((double)0x10000) * alpha);
-		ret->buffer_pos = 0;
-		ret->buffer_fraction = 0;
-		ret->last_left = ret->last_right = 0;
-		ret->read_start = 0;
-		ret->read_end = sync_to_audio ? buffer_samples * channels : 0;
-		ret->mask = sync_to_audio ? 0xFFFFFFFF : alloc_size-1;
-	}
-	if (sync_to_audio && SDL_GetAudioStatus() == SDL_AUDIO_PAUSED) {
-		SDL_PauseAudio(0);
-	}
-	return ret;
+	SDL_DestroyCond(opaque);
 }
 
-void render_pause_source(audio_source *src)
+void render_audio_created(audio_source *source)
 {
-	uint8_t need_pause = 0;
-	lock_audio();
-		for (uint8_t i = 0; i < num_audio_sources; i++)
-		{
-			if (audio_sources[i] == src) {
-				audio_sources[i] = audio_sources[--num_audio_sources];
-				if (sync_to_audio) {
-					SDL_CondSignal(audio_ready);
-				}
-				break;
-			}
-		}
-		if (!num_audio_sources) {
-			need_pause = 1;
-		}
-	unlock_audio();
-	if (need_pause) {
-		SDL_PauseAudio(1);
-	}
-	inactive_audio_sources[num_inactive_audio_sources++] = src;
-}
-
-void render_resume_source(audio_source *src)
-{
-	lock_audio();
-		if (num_audio_sources < 8) {
-			audio_sources[num_audio_sources++] = src;
-		}
-	unlock_audio();
-	for (uint8_t i = 0; i < num_inactive_audio_sources; i++)
-	{
-		if (inactive_audio_sources[i] == src) {
-			inactive_audio_sources[i] = inactive_audio_sources[--num_inactive_audio_sources];
+	if (sync_src == SYNC_AUDIO) {
+		//SDL_PauseAudio acquires the audio device lock, which is held while the callback runs
+		//since our callback can itself be stuck waiting on the audio_ready condition variable
+		//calling SDL_PauseAudio(0) again for audio sources after the first can deadlock
+		//fortunately SDL_GetAudioStatus does not acquire the lock so is safe to call here
+		if (SDL_GetAudioStatus() == SDL_AUDIO_PAUSED) {
+			SDL_PauseAudio(0);
 		}
 	}
-	if (sync_to_audio) {
-		SDL_PauseAudio(0);
+	if (current_system && sync_src == SYNC_AUDIO_THREAD) {
+		system_request_exit(current_system, 0);
 	}
 }
 
-void render_free_source(audio_source *src)
+void render_source_paused(audio_source *src, uint8_t remaining_sources)
+{
+	if (sync_src == SYNC_AUDIO) {
+		SDL_CondSignal(audio_ready);
+	}
+	if (!remaining_sources && render_is_audio_sync()) {
+		SDL_PauseAudio(1);
+		if (sync_src == SYNC_AUDIO_THREAD) {
+			SDL_CondSignal(frame_ready);
+		}
+	}
+}
+
+void render_source_resumed(audio_source *src)
 {
-	render_pause_source(src);
-	
-	free(src->front);
-	if (sync_to_audio) {
-		free(src->back);
-		SDL_DestroyCond(src->cond);
+	if (sync_src == SYNC_AUDIO) {
+		//SDL_PauseAudio acquires the audio device lock, which is held while the callback runs
+		//since our callback can itself be stuck waiting on the audio_ready condition variable
+		//calling SDL_PauseAudio(0) again for audio sources after the first can deadlock
+		//fortunately SDL_GetAudioStatus does not acquire the lock so is safe to call here
+		if (SDL_GetAudioStatus() == SDL_AUDIO_PAUSED) {
+			SDL_PauseAudio(0);
+		}
+	}
+	if (current_system && sync_src == SYNC_AUDIO_THREAD) {
+		system_request_exit(current_system, 0);
 	}
-	free(src);
 }
-static uint32_t sync_samples;
-static void do_audio_ready(audio_source *src)
+
+void render_do_audio_ready(audio_source *src)
 {
-	if (sync_to_audio) {
+	if (sync_src == SYNC_AUDIO_THREAD) {
+		int16_t *tmp = src->front;
+		src->front = src->back;
+		src->back = tmp;
+		src->front_populated = 1;
+		src->buffer_pos = 0;
+		if (all_sources_ready()) {
+			//we've emulated far enough to fill the current buffer
+			system_request_exit(current_system, 0);
+		}
+	} else if (sync_src == SYNC_AUDIO) {
 		SDL_LockMutex(audio_mutex);
 			while (src->front_populated) {
-				SDL_CondWait(src->cond, audio_mutex);
+				SDL_CondWait(src->opaque, audio_mutex);
 			}
 			int16_t *tmp = src->front;
 			src->front = src->back;
@@ -367,62 +250,9 @@
 	}
 }
 
-static int16_t lowpass_sample(audio_source *src, int16_t last, int16_t current)
-{
-	int32_t tmp = current * src->lowpass_alpha + last * (0x10000 - src->lowpass_alpha);
-	current = tmp >> 16;
-	return current;
-}
-
-static void interp_sample(audio_source *src, int16_t last, int16_t current)
-{
-	int64_t tmp = last * ((src->buffer_fraction << 16) / src->buffer_inc);
-	tmp += current * (0x10000 - ((src->buffer_fraction << 16) / src->buffer_inc));
-	src->back[src->buffer_pos++] = tmp >> 16;
-}
-
-void render_put_mono_sample(audio_source *src, int16_t value)
-{
-	value = lowpass_sample(src, src->last_left, value);
-	src->buffer_fraction += src->buffer_inc;
-	uint32_t base = sync_to_audio ? 0 : src->read_end;
-	while (src->buffer_fraction > BUFFER_INC_RES)
-	{
-		src->buffer_fraction -= BUFFER_INC_RES;
-		interp_sample(src, src->last_left, value);
-		
-		if (((src->buffer_pos - base) & src->mask) >= sync_samples) {
-			do_audio_ready(src);
-		}
-		src->buffer_pos &= src->mask;
-	}
-	src->last_left = value;
-}
-
-void render_put_stereo_sample(audio_source *src, int16_t left, int16_t right)
-{
-	left = lowpass_sample(src, src->last_left, left);
-	right = lowpass_sample(src, src->last_right, right);
-	src->buffer_fraction += src->buffer_inc;
-	uint32_t base = sync_to_audio ? 0 : src->read_end;
-	while (src->buffer_fraction > BUFFER_INC_RES)
-	{
-		src->buffer_fraction -= BUFFER_INC_RES;
-		
-		interp_sample(src, src->last_left, left);
-		interp_sample(src, src->last_right, right);
-		
-		if (((src->buffer_pos - base) & src->mask)/2 >= sync_samples) {
-			do_audio_ready(src);
-		}
-		src->buffer_pos &= src->mask;
-	}
-	src->last_left = left;
-	src->last_right = right;
-}
-
 static SDL_Joystick * joysticks[MAX_JOYSTICKS];
 static int joystick_sdl_index[MAX_JOYSTICKS];
+static uint8_t joystick_index_locked[MAX_JOYSTICKS];
 
 int render_width()
 {
@@ -448,8 +278,21 @@
 #endif
 }
 
+static uint8_t external_sync;
+void render_set_external_sync(uint8_t ext_sync_on)
+{
+	if (ext_sync_on != external_sync) {
+		external_sync = ext_sync_on;
+		if (windowed_width) {
+			//only do this if render_init has already been called
+			render_config_updated();
+		}
+	}
+}
+
 #ifndef DISABLE_OPENGL
-static GLuint textures[3], buffers[2], vshader, fshader, program, un_textures[2], un_width, un_height, at_pos;
+static GLuint textures[3], buffers[2], vshader, fshader, program, un_textures[2], un_width, un_height, un_texsize, at_pos;
+static int tex_width, tex_height;
 
 static GLfloat vertex_data_default[] = {
 	-1.0f, -1.0f,
@@ -474,28 +317,39 @@
 
 static GLuint load_shader(char * fname, GLenum shader_type)
 {
+	char * shader_path;
+	FILE *f;
+	GLchar *text;
+	long fsize;
+#ifndef __ANDROID__
 	char const * parts[] = {get_home_dir(), "/.config/blastem/shaders/", fname};
-	char * shader_path = alloc_concat_m(3, parts);
-	FILE * f = fopen(shader_path, "rb");
+	shader_path = alloc_concat_m(3, parts);
+	f = fopen(shader_path, "rb");
 	free(shader_path);
-	if (!f) {
-		parts[0] = get_exe_dir();
-		parts[1] = "/shaders/";
-		shader_path = alloc_concat_m(3, parts);
-		f = fopen(shader_path, "rb");
+	if (f) {
+		fsize = file_size(f);
+		text = malloc(fsize);
+		if (fread(text, 1, fsize, f) != fsize) {
+			warning("Error reading from shader file %s\n", fname);
+			free(text);
+			return 0;
+		}
+	} else {
+#endif
+		shader_path = path_append("shaders", fname);
+		uint32_t fsize32;
+		text = read_bundled_file(shader_path, &fsize32);
 		free(shader_path);
-		if (!f) {
+		if (!text) {
 			warning("Failed to open shader file %s for reading\n", fname);
 			return 0;
 		}
+		fsize = fsize32;
+#ifndef __ANDROID__
 	}
-	long fsize = file_size(f);
-	GLchar * text = malloc(fsize);
-	if (fread(text, 1, fsize, f) != fsize) {
-		warning("Error reading from shader file %s\n", fname);
-		free(text);
-		return 0;
-	}
+#endif
+	text[fsize] = 0;
+	
 	if (strncmp(text, "#version", strlen("#version"))) {
 		GLchar *tmp = text;
 		text = alloc_concat(shader_prefix, tmp);
@@ -503,6 +357,10 @@
 		fsize += strlen(shader_prefix);
 	}
 	GLuint ret = glCreateShader(shader_type);
+	if (!ret) {
+		warning("glCreateShader failed with error %d\n", glGetError());
+		return 0;
+	}
 	glShaderSource(ret, 1, (const GLchar **)&text, (const GLint *)&fsize);
 	free(text);
 	glCompileShader(ret);
@@ -522,7 +380,9 @@
 #endif
 
 static uint32_t texture_buf[512 * 513];
-#ifndef DISABLE_OPENGL
+#ifdef DISABLE_OPENGL
+#define RENDER_FORMAT SDL_PIXELFORMAT_ARGB8888
+#else
 #ifdef USE_GLES
 #define INTERNAL_FORMAT GL_RGBA
 #define SRC_FORMAT GL_RGBA
@@ -538,6 +398,15 @@
 	char *scaling = tern_find_path_default(config, "video\0scaling\0", def, TVAL_PTR).ptrval;
 	GLint filter = strcmp(scaling, "linear") ? GL_NEAREST : GL_LINEAR;
 	glGenTextures(3, textures);
+	def.ptrval = "off";
+	char *npot_textures = tern_find_path_default(config, "video\0npot_textures\0", def, TVAL_PTR).ptrval;
+	if (!strcmp(npot_textures, "on")) {
+		tex_width = LINEBUF_SIZE;
+		tex_height = 294; //PAL height with full borders
+	} else {
+		tex_width = tex_height = 512;
+	}
+	debug_message("Using %dx%d textures\n", tex_width, tex_height);
 	for (int i = 0; i < 3; i++)
 	{
 		glBindTexture(GL_TEXTURE_2D, textures[i]);
@@ -547,7 +416,7 @@
 		glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
 		if (i < 2) {
 			//TODO: Fixme for PAL + invalid display mode
-			glTexImage2D(GL_TEXTURE_2D, 0, INTERNAL_FORMAT, 512, 512, 0, SRC_FORMAT, GL_UNSIGNED_BYTE, texture_buf);
+			glTexImage2D(GL_TEXTURE_2D, 0, INTERNAL_FORMAT, tex_width, tex_height, 0, SRC_FORMAT, GL_UNSIGNED_BYTE, texture_buf);
 		} else {
 			uint32_t blank = 255 << 24;
 			glTexImage2D(GL_TEXTURE_2D, 0, INTERNAL_FORMAT, 1, 1, 0, SRC_FORMAT, GL_UNSIGNED_BYTE, &blank);
@@ -576,6 +445,7 @@
 	un_textures[1] = glGetUniformLocation(program, "textures[1]");
 	un_width = glGetUniformLocation(program, "width");
 	un_height = glGetUniformLocation(program, "height");
+	un_texsize = glGetUniformLocation(program, "texsize");
 	at_pos = glGetAttribLocation(program, "pos");
 }
 
@@ -595,12 +465,11 @@
 	if (texture_init) {
 		return;
 	}
-	sdl_textures= malloc(sizeof(SDL_Texture *) * 2);
-	num_textures = 2;
+	sdl_textures= calloc(sizeof(SDL_Texture *), 3);
+	num_textures = 3;
 	texture_init = 1;
 #ifndef DISABLE_OPENGL
 	if (render_gl) {
-		sdl_textures[0] = sdl_textures[1] = NULL;
 		gl_setup();
 	} else {
 #endif
@@ -667,7 +536,9 @@
 static void update_aspect()
 {
 	//reset default values
+#ifndef DISABLE_OPENGL
 	memcpy(vertex_data, vertex_data_default, sizeof(vertex_data));
+#endif
 	main_clip.w = main_width;
 	main_clip.h = main_height;
 	main_clip.x = main_clip.y = 0;
@@ -699,13 +570,18 @@
 	}
 }
 
-static ui_render_fun on_context_destroyed, on_context_created;
+static ui_render_fun on_context_destroyed, on_context_created, on_ui_fb_resized;
 void render_set_gl_context_handlers(ui_render_fun destroy, ui_render_fun create)
 {
 	on_context_destroyed = destroy;
 	on_context_created = create;
 }
 
+void render_set_ui_fb_resize_handler(ui_render_fun resize)
+{
+	on_ui_fb_resized = resize;
+}
+
 static uint8_t scancode_map[SDL_NUM_SCANCODES] = {
 	[SDL_SCANCODE_A] = 0x1C,
 	[SDL_SCANCODE_B] = 0x32,
@@ -842,6 +718,16 @@
 	return -1;
 }
 
+static int lowest_unlocked_joystick_index(void)
+{
+	for (int i = 0; i < MAX_JOYSTICKS; i++) {
+		if (!joystick_index_locked[i]) {
+			return i;
+		}
+	}
+	return -1;
+}
+
 SDL_Joystick *render_get_joystick(int index)
 {
 	if (index >= MAX_JOYSTICKS) {
@@ -863,17 +749,59 @@
 
 SDL_GameController *render_get_controller(int index)
 {
-	if (index >= MAX_JOYSTICKS) {
+	if (index >= MAX_JOYSTICKS || !joysticks[index]) {
 		return NULL;
 	}
 	return SDL_GameControllerOpen(joystick_sdl_index[index]);
 }
 
+static uint8_t gc_events_enabled;
+static SDL_GameController *controllers[MAX_JOYSTICKS];
+void render_enable_gamepad_events(uint8_t enabled)
+{
+	if (enabled != gc_events_enabled) {
+		gc_events_enabled = enabled;
+		for (int i = 0; i < MAX_JOYSTICKS; i++) {
+			if (enabled) {
+				controllers[i] = render_get_controller(i);
+			} else if (controllers[i]) {
+				SDL_GameControllerClose(controllers[i]);
+				controllers[i] = NULL;
+			}
+		}
+	}
+}
+
 static uint32_t overscan_top[NUM_VID_STD] = {2, 21};
 static uint32_t overscan_bot[NUM_VID_STD] = {1, 17};
 static uint32_t overscan_left[NUM_VID_STD] = {13, 13};
 static uint32_t overscan_right[NUM_VID_STD] = {14, 14};
 static vid_std video_standard = VID_NTSC;
+static uint8_t need_ui_fb_resize;
+
+int lock_joystick_index(int joystick, int desired_index)
+{
+	if (desired_index < 0) {
+		desired_index = lowest_unlocked_joystick_index();
+		if (desired_index < 0 || desired_index >= joystick) {
+			return joystick;
+		}
+	}
+	SDL_Joystick *tmp_joy = joysticks[joystick];
+	int tmp_index = joystick_sdl_index[joystick];
+	joysticks[joystick] = joysticks[desired_index];
+	joystick_sdl_index[joystick] = joystick_sdl_index[desired_index];
+	joystick_index_locked[joystick] = joystick_sdl_index[desired_index];
+	joysticks[desired_index] = tmp_joy;
+	joystick_sdl_index[desired_index] = tmp_index;
+	joystick_index_locked[desired_index] = 1;
+	//update bindings as the controllers being swapped may have different mappings
+	handle_joy_added(desired_index);
+	if (joysticks[joystick]) {
+		handle_joy_added(joystick);
+	}
+	return desired_index;
+}
 
 static int32_t handle_event(SDL_Event *event)
 {
@@ -891,13 +819,13 @@
 		handle_joydown(find_joystick_index(event->jbutton.which), event->jbutton.button);
 		break;
 	case SDL_JOYBUTTONUP:
-		handle_joyup(find_joystick_index(event->jbutton.which), event->jbutton.button);
+		handle_joyup(lock_joystick_index(find_joystick_index(event->jbutton.which), -1), event->jbutton.button);
 		break;
 	case SDL_JOYHATMOTION:
-		handle_joy_dpad(find_joystick_index(event->jhat.which), event->jhat.hat, event->jhat.value);
+		handle_joy_dpad(lock_joystick_index(find_joystick_index(event->jhat.which), -1), event->jhat.hat, event->jhat.value);
 		break;
 	case SDL_JOYAXISMOTION:
-		handle_joy_axis(find_joystick_index(event->jaxis.which), event->jaxis.axis, event->jaxis.value);
+		handle_joy_axis(lock_joystick_index(find_joystick_index(event->jaxis.which), -1), event->jaxis.axis, event->jaxis.value);
 		break;
 	case SDL_JOYDEVICEADDED:
 		if (event->jdevice.which < MAX_JOYSTICKS) {
@@ -905,9 +833,13 @@
 			if (index >= 0) {
 				SDL_Joystick * joy = joysticks[index] = SDL_JoystickOpen(event->jdevice.which);
 				joystick_sdl_index[index] = event->jdevice.which;
+				joystick_index_locked[index] = 0;
+				if (gc_events_enabled) {
+					controllers[index] = SDL_GameControllerOpen(event->jdevice.which);
+				}
 				if (joy) {
-					printf("Joystick %d added: %s\n", index, SDL_JoystickName(joy));
-					printf("\tNum Axes: %d\n\tNum Buttons: %d\n\tNum Hats: %d\n", SDL_JoystickNumAxes(joy), SDL_JoystickNumButtons(joy), SDL_JoystickNumHats(joy));
+					debug_message("Joystick %d added: %s\n", index, SDL_JoystickName(joy));
+					debug_message("\tNum Axes: %d\n\tNum Buttons: %d\n\tNum Hats: %d\n", SDL_JoystickNumAxes(joy), SDL_JoystickNumButtons(joy), SDL_JoystickNumHats(joy));
 					handle_joy_added(index);
 				}
 			}
@@ -918,9 +850,13 @@
 		if (index >= 0) {
 			SDL_JoystickClose(joysticks[index]);
 			joysticks[index] = NULL;
-			printf("Joystick %d removed\n", index);
+			if (controllers[index]) {
+				SDL_GameControllerClose(controllers[index]);
+				controllers[index] = NULL;
+			}
+			debug_message("Joystick %d removed\n", index);
 		} else {
-			printf("Failed to find removed joystick with instance ID: %d\n", index);
+			debug_message("Failed to find removed joystick with instance ID: %d\n", index);
 		}
 		break;
 	}
@@ -937,8 +873,12 @@
 		switch (event->window.event)
 		{
 		case SDL_WINDOWEVENT_SIZE_CHANGED:
+			if (!main_window) {
+				break;
+			}
 			main_width = event->window.data1;
 			main_height = event->window.data2;
+			need_ui_fb_resize = 1;
 			update_aspect();
 #ifndef DISABLE_OPENGL
 			if (render_gl) {
@@ -956,7 +896,7 @@
 #endif
 			break;
 		case SDL_WINDOWEVENT_CLOSE:
-			if (SDL_GetWindowID(main_window) == event->window.windowID) {
+			if (main_window && SDL_GetWindowID(main_window) == event->window.windowID) {
 				exit(0);
 			} else {
 				for (int i = 0; i < num_textures - FRAMEBUFFER_USER_START; i++)
@@ -1001,6 +941,7 @@
 static int source_frame_count;
 static int frame_repeat[60];
 
+static uint32_t sample_rate;
 static void init_audio()
 {
 	SDL_AudioSpec desired, actual;
@@ -1010,35 +951,46 @@
    		rate = 48000;
    	}
     desired.freq = rate;
-	desired.format = AUDIO_S16SYS;
+	char *config_format = tern_find_path_default(config, "audio\0format\0", (tern_val){.ptrval="f32"}, TVAL_PTR).ptrval;
+	desired.format = !strcmp(config_format, "s16") ? AUDIO_S16SYS : AUDIO_F32SYS;
 	desired.channels = 2;
     char * samples_str = tern_find_path(config, "audio\0buffer\0", TVAL_PTR).ptrval;
    	int samples = samples_str ? atoi(samples_str) : 0;
    	if (!samples) {
    		samples = 512;
    	}
-    printf("config says: %d\n", samples);
+    debug_message("config says: %d\n", samples);
     desired.samples = samples*2;
-	desired.callback = sync_to_audio ? audio_callback : audio_callback_drc;
+	switch (sync_src)
+	{
+	case SYNC_AUDIO:
+		desired.callback = audio_callback;
+		break;
+	case SYNC_AUDIO_THREAD:
+		desired.callback = audio_callback_run_on_audio;
+		break;
+	default:
+		desired.callback = audio_callback_drc;
+	}
 	desired.userdata = NULL;
 
 	if (SDL_OpenAudio(&desired, &actual) < 0) {
 		fatal_error("Unable to open SDL audio: %s\n", SDL_GetError());
 	}
-	buffer_samples = actual.samples;
 	sample_rate = actual.freq;
-	printf("Initialized audio at frequency %d with a %d sample buffer, ", actual.freq, actual.samples);
+	debug_message("Initialized audio at frequency %d with a %d sample buffer, ", actual.freq, actual.samples);
+	render_audio_format format = RENDER_AUDIO_UNKNOWN;
 	if (actual.format == AUDIO_S16SYS) {
-		puts("signed 16-bit int format");
-		mix = mix_s16;
+		debug_message("signed 16-bit int format\n");
+		format = RENDER_AUDIO_S16;
 	} else if (actual.format == AUDIO_F32SYS) {
-		puts("32-bit float format");
-		mix = mix_f32;
+		debug_message("32-bit float format\n");
+		format = RENDER_AUDIO_FLOAT;
 	} else {
-		printf("unsupported format %X\n", actual.format);
+		debug_message("unsupported format %X\n", actual.format);
 		warning("Unsupported audio sample format: %X\n", actual.format);
-		mix = mix_null;
 	}
+	render_audio_initialized(format, actual.freq, actual.channels, actual.samples, SDL_AUDIO_BITSIZE(actual.format) / 8);
 }
 
 void window_setup(void)
@@ -1049,11 +1001,31 @@
 	}
 	
 	tern_val def = {.ptrval = "audio"};
-	char *sync_src = tern_find_path_default(config, "system\0sync_source\0", def, TVAL_PTR).ptrval;
-	sync_to_audio = !strcmp(sync_src, "audio");
+	if (external_sync) {
+		sync_src = SYNC_EXTERNAL;
+	} else {
+		char *sync_src_str = tern_find_path_default(config, "system\0sync_source\0", def, TVAL_PTR).ptrval;
+		if (!strcmp(sync_src_str, "audio")) {
+			sync_src = SYNC_AUDIO;
+		} else if (!strcmp(sync_src_str, "audio_thread")) {
+			sync_src = SYNC_AUDIO_THREAD;
+		} else {
+			sync_src = SYNC_VIDEO;
+		}
+	}
+	
+	if (!num_buffers && (sync_src == SYNC_AUDIO_THREAD || sync_src == SYNC_EXTERNAL)) {
+		frame_mutex = SDL_CreateMutex();
+		free_buffer_mutex = SDL_CreateMutex();
+		frame_ready = SDL_CreateCond();
+		buffer_storage = 4;
+		frame_buffers = calloc(buffer_storage, sizeof(uint32_t*));
+		frame_buffers[0] = texture_buf;
+		num_buffers = 1;
+	}
 	
 	const char *vsync;
-	if (sync_to_audio) {
+	if (sync_src == SYNC_AUDIO) {
 		def.ptrval = "off";
 		vsync = tern_find_path_default(config, "video\0vsync\0", def, TVAL_PTR).ptrval;
 	} else {
@@ -1137,7 +1109,11 @@
 			}
 			if (vsync) {
 				if (SDL_GL_SetSwapInterval(!strcmp("on", vsync)) < 0) {
+#ifdef __ANDROID__
+					debug_message("Failed to set vsync to %s: %s\n", vsync, SDL_GetError());
+#else
 					warning("Failed to set vsync to %s: %s\n", vsync, SDL_GetError());
+#endif
 				}
 			}
 		} else {
@@ -1157,7 +1133,7 @@
 		}
 		SDL_RendererInfo rinfo;
 		SDL_GetRendererInfo(main_renderer, &rinfo);
-		printf("SDL2 Render Driver: %s\n", rinfo.name);
+		debug_message("SDL2 Render Driver: %s\n", rinfo.name);
 		main_clip.x = main_clip.y = 0;
 		main_clip.w = main_width;
 		main_clip.h = main_height;
@@ -1166,7 +1142,7 @@
 #endif
 
 	SDL_GetWindowSize(main_window, &main_width, &main_height);
-	printf("Window created with size: %d x %d\n", main_width, main_height);
+	debug_message("Window created with size: %d x %d\n", main_width, main_height);
 	update_aspect();
 	render_alloc_surfaces();
 	def.ptrval = "off";
@@ -1183,7 +1159,7 @@
 		float aspect = config_aspect() > 0.0f ? config_aspect() : 4.0f/3.0f;
 		height = ((float)width / aspect) + 0.5f;
 	}
-	printf("width: %d, height: %d\n", width, height);
+	debug_message("width: %d, height: %d\n", width, height);
 	windowed_width = width;
 	windowed_height = height;
 	
@@ -1216,7 +1192,7 @@
 	if (db_data) {
 		int added = SDL_GameControllerAddMappingsFromRW(SDL_RWFromMem(db_data, db_size), 1);
 		free(db_data);
-		printf("Added %d game controller mappings from gamecontrollerdb.txt\n", added);
+		debug_message("Added %d game controller mappings from gamecontrollerdb.txt\n", added);
 	}
 	
 	controller_add_mappings();
@@ -1227,33 +1203,23 @@
 
 	atexit(render_quit);
 }
-#include<unistd.h>
-static int in_toggle;
-static void update_source(audio_source *src, double rc, uint8_t sync_changed)
+
+void render_reset_mappings(void)
 {
-	double alpha = src->dt / (src->dt + rc);
-	int32_t lowpass_alpha = (int32_t)(((double)0x10000) * alpha);
-	src->lowpass_alpha = lowpass_alpha;
-	if (sync_changed) {
-		uint32_t alloc_size = sync_to_audio ? src->num_channels * buffer_samples : nearest_pow2(min_buffered * 4 * src->num_channels);
-		src->back = realloc(src->back, alloc_size * sizeof(int16_t));
-		if (sync_to_audio) {
-			src->front = malloc(alloc_size * sizeof(int16_t));
-		} else {
-			free(src->front);
-			src->front = src->back;
-		}
-		src->mask = sync_to_audio ? 0xFFFFFFFF : alloc_size-1;
-		src->read_start = 0;
-		src->read_end = sync_to_audio ? buffer_samples * src->num_channels : 0;
-		src->buffer_pos = 0;
+	SDL_QuitSubSystem(SDL_INIT_GAMECONTROLLER);
+	SDL_InitSubSystem(SDL_INIT_GAMECONTROLLER);
+	uint32_t db_size;
+	char *db_data = read_bundled_file("gamecontrollerdb.txt", &db_size);
+	if (db_data) {
+		int added = SDL_GameControllerAddMappingsFromRW(SDL_RWFromMem(db_data, db_size), 1);
+		free(db_data);
+		debug_message("Added %d game controller mappings from gamecontrollerdb.txt\n", added);
 	}
 }
+static int in_toggle;
 
 void render_config_updated(void)
 {
-	uint8_t old_sync_to_audio = sync_to_audio;
-	
 	free_surfaces();
 #ifndef DISABLE_OPENGL
 	if (render_gl) {
@@ -1270,6 +1236,7 @@
 #endif
 	in_toggle = 1;
 	SDL_DestroyWindow(main_window);
+	main_window = NULL;
 	drain_events();
 	
 	char *config_width = tern_find_path(config, "video\0width\0", TVAL_PTR).ptrval;
@@ -1295,6 +1262,9 @@
 		main_width = windowed_width;
 		main_height = windowed_height;
 	}
+	if (on_ui_fb_resized) {
+		on_ui_fb_resized();
+	}
 	
 	window_setup();
 	update_aspect();
@@ -1311,18 +1281,6 @@
 	init_audio();
 	render_set_video_standard(video_standard);
 	
-	double lowpass_cutoff = get_lowpass_cutoff(config);
-	double rc = (1.0 / lowpass_cutoff) / (2.0 * M_PI);
-	lock_audio();
-		for (uint8_t i = 0; i < num_audio_sources; i++)
-		{
-			update_source(audio_sources[i], rc, old_sync_to_audio != sync_to_audio);
-		}
-	unlock_audio();
-	for (uint8_t i = 0; i < num_inactive_audio_sources; i++)
-	{
-		update_source(inactive_audio_sources[i], rc, old_sync_to_audio != sync_to_audio);
-	}
 	drain_events();
 	in_toggle = 0;
 	if (!was_paused) {
@@ -1335,9 +1293,18 @@
 	return main_window;
 }
 
+uint32_t render_audio_syncs_per_sec(void)
+{
+	//sync samples with audio thread approximately every 8 lines when doing sync to video
+	return render_is_audio_sync() ? 0 : source_hz * (video_standard == VID_PAL ? 313 : 262) / 8;
+}
+
 void render_set_video_standard(vid_std std)
 {
 	video_standard = std;
+	if (render_is_audio_sync()) {
+		return;
+	}
 	source_hz = std == VID_PAL ? 50 : 60;
 	uint32_t max_repeat = 0;
 	if (abs(source_hz - display_hz) < 2) {
@@ -1364,12 +1331,10 @@
 	}
 	source_frame = 0;
 	source_frame_count = frame_repeat[0];
-	//sync samples with audio thread approximately every 8 lines
-	sync_samples = sync_to_audio ? buffer_samples : 8 * sample_rate / (source_hz * (VID_PAL ? 313 : 262));
 	max_repeat++;
 	min_buffered = (((float)max_repeat * (float)sample_rate/(float)source_hz)/* / (float)buffer_samples*/);// + 0.9999;
 	//min_buffered *= buffer_samples;
-	printf("Min samples buffered before audio start: %d\n", min_buffered);
+	debug_message("Min samples buffered before audio start: %d\n", min_buffered);
 	max_adjust = BASE_MAX_ADJUST / source_hz;
 }
 
@@ -1448,24 +1413,40 @@
 uint32_t locked_pitch;
 uint32_t *render_get_framebuffer(uint8_t which, int *pitch)
 {
+	if (sync_src == SYNC_AUDIO_THREAD || sync_src == SYNC_EXTERNAL) {
+		*pitch = LINEBUF_SIZE * sizeof(uint32_t);
+		uint32_t *buffer;
+		SDL_LockMutex(free_buffer_mutex);
+			if (num_buffers) {
+				buffer = frame_buffers[--num_buffers];
+			} else {
+				buffer = calloc(tex_width*(tex_height + 1), sizeof(uint32_t));
+			}
+		SDL_UnlockMutex(free_buffer_mutex);
+		locked_pixels = buffer;
+		return buffer;
+	}
 #ifndef DISABLE_OPENGL
 	if (render_gl && which <= FRAMEBUFFER_EVEN) {
 		*pitch = LINEBUF_SIZE * sizeof(uint32_t);
 		return texture_buf;
 	} else {
 #endif
+		if (which == FRAMEBUFFER_UI && !sdl_textures[which]) {
+			sdl_textures[which] = SDL_CreateTexture(main_renderer, SDL_PIXELFORMAT_ARGB8888, SDL_TEXTUREACCESS_STREAMING, main_width, main_height);
+		}
 		if (which >= num_textures) {
 			warning("Request for invalid framebuffer number %d\n", which);
 			return NULL;
 		}
-		void *pixels;
-		if (SDL_LockTexture(sdl_textures[which], NULL, &pixels, pitch) < 0) {
+		uint8_t *pixels;
+		if (SDL_LockTexture(sdl_textures[which], NULL, (void **)&pixels, pitch) < 0) {
 			warning("Failed to lock texture: %s\n", SDL_GetError());
 			return NULL;
 		}
 		static uint8_t last;
 		if (which <= FRAMEBUFFER_EVEN) {
-			locked_pixels = pixels;
+			locked_pixels = (uint32_t *)pixels;
 			if (which == FRAMEBUFFER_EVEN) {
 				pixels += *pitch;
 			}
@@ -1475,12 +1456,23 @@
 			}
 			last = which;
 		}
-		return pixels;
+		return (uint32_t *)pixels;
 #ifndef DISABLE_OPENGL
 	}
 #endif
 }
 
+static void release_buffer(uint32_t *buffer)
+{
+	SDL_LockMutex(free_buffer_mutex);
+		if (num_buffers == buffer_storage) {
+			buffer_storage *= 2;
+			frame_buffers = realloc(frame_buffers, sizeof(uint32_t*)*buffer_storage);
+		}
+		frame_buffers[num_buffers++] = buffer;
+	SDL_UnlockMutex(free_buffer_mutex);
+}
+
 uint8_t events_processed;
 #ifdef __ANDROID__
 #define FPS_INTERVAL 10000
@@ -1490,10 +1482,10 @@
 
 static uint32_t last_width, last_height;
 static uint8_t interlaced;
-void render_framebuffer_updated(uint8_t which, int width)
+static void process_framebuffer(uint32_t *buffer, uint8_t which, int width)
 {
 	static uint8_t last;
-	if (!sync_to_audio && which <= FRAMEBUFFER_EVEN && source_frame_count < 0) {
+	if (sync_src == SYNC_VIDEO && which <= FRAMEBUFFER_EVEN && source_frame_count < 0) {
 		source_frame++;
 		if (source_frame >= source_hz) {
 			source_frame = 0;
@@ -1516,7 +1508,7 @@
 #ifndef DISABLE_ZLIB
 			ext = path_extension(screenshot_path);
 #endif
-			info_message("Saving screenshot to %s\n", screenshot_path);
+			debug_message("Saving screenshot to %s\n", screenshot_path);
 		} else {
 			warning("Failed to open screenshot file %s for writing\n", screenshot_path);
 		}
@@ -1531,24 +1523,25 @@
 	if (render_gl && which <= FRAMEBUFFER_EVEN) {
 		SDL_GL_MakeCurrent(main_window, main_context);
 		glBindTexture(GL_TEXTURE_2D, textures[which]);
-		glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, LINEBUF_SIZE, height, SRC_FORMAT, GL_UNSIGNED_BYTE, texture_buf + overscan_left[video_standard] + LINEBUF_SIZE * overscan_top[video_standard]);
+		glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, LINEBUF_SIZE, height, SRC_FORMAT, GL_UNSIGNED_BYTE, buffer + overscan_left[video_standard] + LINEBUF_SIZE * overscan_top[video_standard]);
 		
 		if (screenshot_file) {
 			//properly supporting interlaced modes here is non-trivial, so only save the odd field for now
 #ifndef DISABLE_ZLIB
 			if (!strcasecmp(ext, "png")) {
 				free(ext);
-				save_png(screenshot_file, texture_buf, shot_width, shot_height, LINEBUF_SIZE*sizeof(uint32_t));
+				save_png(screenshot_file, buffer, shot_width, shot_height, LINEBUF_SIZE*sizeof(uint32_t));
 			} else {
 				free(ext);
 #endif
-				save_ppm(screenshot_file, texture_buf, shot_width, shot_height, LINEBUF_SIZE*sizeof(uint32_t));
+				save_ppm(screenshot_file, buffer, shot_width, shot_height, LINEBUF_SIZE*sizeof(uint32_t));
 #ifndef DISABLE_ZLIB
 			}
 #endif
 		}
 	} else {
 #endif
+		//TODO: Support SYNC_AUDIO_THREAD/SYNC_EXTERNAL for render API framebuffers
 		if (which <= FRAMEBUFFER_EVEN && last != which) {
 			uint8_t *cur_dst = (uint8_t *)locked_pixels;
 			uint8_t *cur_saved = (uint8_t *)texture_buf;
@@ -1591,6 +1584,16 @@
 	last_height = height;
 	if (which <= FRAMEBUFFER_EVEN) {
 		render_update_display();
+	} else if (which == FRAMEBUFFER_UI) {
+		SDL_RenderCopy(main_renderer, sdl_textures[which], NULL, NULL);
+		if (need_ui_fb_resize) {
+			SDL_DestroyTexture(sdl_textures[which]);
+			sdl_textures[which] = NULL;
+			if (on_ui_fb_resized) {
+				on_ui_fb_resized();
+			}
+			need_ui_fb_resize = 0;
+		}
 	} else {
 		SDL_RenderCopy(extra_renderers[which - FRAMEBUFFER_USER_START], sdl_textures[which], NULL, NULL);
 		SDL_RenderPresent(extra_renderers[which - FRAMEBUFFER_USER_START]);
@@ -1606,7 +1609,7 @@
 		if ((last_frame - start) > FPS_INTERVAL) {
 			if (start && (last_frame-start)) {
 	#ifdef __ANDROID__
-				info_message("%s - %.1f fps", caption, ((float)frame_counter) / (((float)(last_frame-start)) / 1000.0));
+				debug_message("%s - %.1f fps", caption, ((float)frame_counter) / (((float)(last_frame-start)) / 1000.0));
 	#else
 				if (!fps_caption) {
 					fps_caption = malloc(strlen(caption) + strlen(" - 100000000.1 fps") + 1);
@@ -1619,7 +1622,7 @@
 			frame_counter = 0;
 		}
 	}
-	if (!sync_to_audio) {
+	if (!render_is_audio_sync()) {
 		int32_t local_cur_min, local_min_remaining;
 		SDL_LockAudio();
 			if (last_buffered > NO_LAST_BUFFERED) {
@@ -1661,10 +1664,8 @@
 		}
 		if (adjust_ratio != 0.0f) {
 			average_change = 0;
-			for (uint8_t i = 0; i < num_audio_sources; i++)
-			{
-				audio_sources[i]->buffer_inc = ((double)audio_sources[i]->buffer_inc) + ((double)audio_sources[i]->buffer_inc) * adjust_ratio + 0.5;
-			}
+			render_audio_adjust_speed(adjust_ratio);
+			
 		}
 		while (source_frame_count > 0)
 		{
@@ -1679,6 +1680,83 @@
 	}
 }
 
+typedef struct {
+	uint32_t *buffer;
+	int      width;
+	uint8_t  which;
+} frame;
+frame frame_queue[4];
+int frame_queue_len, frame_queue_read, frame_queue_write;
+
+void render_framebuffer_updated(uint8_t which, int width)
+{
+	if (sync_src == SYNC_AUDIO_THREAD || sync_src == SYNC_EXTERNAL) {
+		SDL_LockMutex(frame_mutex);
+			while (frame_queue_len == 4) {
+				SDL_CondSignal(frame_ready);
+				SDL_UnlockMutex(frame_mutex);
+				SDL_Delay(1);
+				SDL_LockMutex(frame_mutex);
+			}
+			for (int cur = frame_queue_read, i = 0; i < frame_queue_len; i++) {
+				if (frame_queue[cur].which == which) {
+					int last = (frame_queue_write - 1) & 3;
+					frame_queue_len--;
+					release_buffer(frame_queue[cur].buffer);
+					if (last != cur) {
+						frame_queue[cur] = frame_queue[last];
+					}
+					frame_queue_write = last;
+					break;
+				}
+				cur = (cur + 1) & 3;
+			}
+			frame_queue[frame_queue_write++] = (frame){
+				.buffer = locked_pixels,
+				.width = width,
+				.which = which
+			};
+			frame_queue_write &= 0x3;
+			frame_queue_len++;
+			SDL_CondSignal(frame_ready);
+		SDL_UnlockMutex(frame_mutex);
+		return;
+	}
+	//TODO: Maybe fixme for render API
+	process_framebuffer(texture_buf, which, width);
+}
+
+void render_video_loop(void)
+{
+	if (sync_src != SYNC_AUDIO_THREAD && sync_src != SYNC_EXTERNAL) {
+		return;
+	}
+	SDL_PauseAudio(0);
+	SDL_LockMutex(frame_mutex);
+		for(;;)
+		{
+			while (!frame_queue_len && SDL_GetAudioStatus() == SDL_AUDIO_PLAYING)
+			{
+				SDL_CondWait(frame_ready, frame_mutex);
+			}
+			while (frame_queue_len)
+			{
+				frame f = frame_queue[frame_queue_read++];
+				frame_queue_read &= 0x3;
+				frame_queue_len--;
+				SDL_UnlockMutex(frame_mutex);
+				process_framebuffer(f.buffer, f.which, f.width);
+				release_buffer(f.buffer);
+				SDL_LockMutex(frame_mutex);
+			}
+			if (SDL_GetAudioStatus() != SDL_AUDIO_PLAYING) {
+				break;
+			}
+		}
+	
+	SDL_UnlockMutex(frame_mutex);
+}
+
 static ui_render_fun render_ui;
 void render_set_ui_render_fun(ui_render_fun fun)
 {
@@ -1703,6 +1781,7 @@
 
 		glUniform1f(un_width, render_emulated_width());
 		glUniform1f(un_height, last_height);
+		glUniform2f(un_texsize, tex_width, tex_height);
 
 		glBindBuffer(GL_ARRAY_BUFFER, buffers[0]);
 		glVertexAttribPointer(at_pos, 2, GL_FLOAT, GL_FALSE, sizeof(GLfloat[2]), (void *)0);
@@ -1762,7 +1841,12 @@
 	return overscan_top[video_standard];
 }
 
-void render_wait_quit(vdp_context * context)
+uint32_t render_overscan_bot()
+{
+	return overscan_bot[video_standard];
+}
+
+void render_wait_quit(void)
 {
 	SDL_Event event;
 	while(SDL_WaitEvent(&event)) {
@@ -1829,6 +1913,7 @@
 	}
 	
 	SDL_GameControllerButtonBind cbind;
+	int32_t is_positive = RENDER_AXIS_POS;
 	if (is_axis) {
 		
 		int sdl_axis = render_lookup_axis(name);
@@ -1843,6 +1928,10 @@
 			SDL_GameControllerClose(control);
 			return RENDER_INVALID_NAME;
 		}
+		if (sdl_button == SDL_CONTROLLER_BUTTON_DPAD_UP || sdl_button == SDL_CONTROLLER_BUTTON_DPAD_LEFT) {
+			//assume these will be negative if they are an axis
+			is_positive = 0;
+		}
 		cbind = SDL_GameControllerGetBindForButton(control, sdl_button);
 	}
 	SDL_GameControllerClose(control);
@@ -1851,7 +1940,7 @@
 	case SDL_CONTROLLER_BINDTYPE_BUTTON:
 		return cbind.value.button;
 	case SDL_CONTROLLER_BINDTYPE_AXIS:
-		return RENDER_AXIS_BIT | cbind.value.axis;
+		return RENDER_AXIS_BIT | cbind.value.axis | is_positive;
 	case SDL_CONTROLLER_BINDTYPE_HAT:
 		return RENDER_DPAD_BIT | (cbind.value.hat.hat << 4) | cbind.value.hat.hat_mask;
 	}
@@ -1922,16 +2011,7 @@
 	SDL_SetWindowSize(main_window, windowed_width, windowed_height);
 	drain_events();
 	in_toggle = 0;
-}
-
-uint32_t render_audio_buffer()
-{
-	return buffer_samples;
-}
-
-uint32_t render_sample_rate()
-{
-	return sample_rate;
+	need_ui_fb_resize = 1;
 }
 
 void render_errorbox(char *title, char *message)
@@ -1977,3 +2057,9 @@
 	}
 	return 0xFF;
 }
+
+uint8_t render_create_thread(render_thread *thread, const char *name, render_thread_fun fun, void *data)
+{
+	*thread = SDL_CreateThread(fun, name, data);
+	return *thread != 0;
+}
--- a/render_sdl.h	Sat Jan 05 00:58:08 2019 -0800
+++ b/render_sdl.h	Sat Jan 15 13:15:21 2022 -0800
@@ -4,15 +4,13 @@
 #include <SDL.h>
 
 SDL_Window *render_get_window(void);
-typedef void (*ui_render_fun)(void);
 typedef void (*event_handler)(SDL_Event *);
 void render_update_display(void);
-void render_set_ui_render_fun(ui_render_fun);
 void render_set_event_handler(event_handler handler);
-void render_set_gl_context_handlers(ui_render_fun destroy, ui_render_fun create);
 SDL_Joystick *render_get_joystick(int index);
 SDL_GameController *render_get_controller(int index);
 int render_lookup_button(char *name);
 int render_lookup_axis(char *name);
+void render_enable_gamepad_events(uint8_t enabled);
 
 #endif //RENDER_SDL_H_
--- a/rom.db	Sat Jan 05 00:58:08 2019 -0800
+++ b/rom.db	Sat Jan 15 13:15:21 2022 -0800
@@ -779,7 +779,7 @@
 			last 3FFFFF
 		}
 	}
-	
+
 }
 MK-1563 {
 	name Sonic & Knuckles
@@ -1287,7 +1287,7 @@
 }
 
 7313c20071de0ab1cd84ac1352cb0ed1c4a4afa8 {
-	#This appears to be an underdump, but it seems to be the only copy floating around 
+	#This appears to be an underdump, but it seems to be the only copy floating around
 	name 12-in-1
 	map {
 		0 {
@@ -1377,3 +1377,47 @@
 		}
 	}
 }
+5e5ca20b39122c86b8f662bd8014af674f9dbed7 {
+	name Rock Heaven
+	map {
+		0 {
+			device ROM
+			last 3FFFFF
+		}
+		500008 {
+			device fixed
+			value 5082
+			last 500009
+		}
+	}
+}
+68366449fd1497538741b5a1949e342202d48948 {
+	name Rock World
+	map {
+		0 {
+			device ROM
+			last 3FFFFF
+		}
+		500008 {
+			device fixed
+			value 4000
+			last 500009
+		}
+		500208 {
+			device fixed
+			value A000
+			last 500209
+		}
+
+	}
+}
+T-122026 {
+	name Outback Joey
+	HeartbeatTrainer {
+		size 512
+	}
+	device_overrides {
+		1 heartbeat_trainer.1
+		2 gamepad3.2
+	}
+}
--- a/romdb.c	Sat Jan 05 00:58:08 2019 -0800
+++ b/romdb.c	Sat Jan 15 13:15:21 2022 -0800
@@ -32,6 +32,8 @@
 		return "EEPROM";
 	} else if(save_type == SAVE_NOR) {
 		return "NOR Flash";
+	} else if(save_type == SAVE_HBPT) {
+		return "Heartbeat Personal Trainer";
 	}
 	return "SRAM";
 }
@@ -80,6 +82,8 @@
 	switch(gen->mapper_type)
 	{
 	case MAPPER_SEGA:
+	case MAPPER_SEGA_SRAM:
+	case MAPPER_SEGA_MED_V2:
 		sega_mapper_serialize(gen, buf);
 		break;
 	case MAPPER_REALTEC:
@@ -99,13 +103,14 @@
 {
 	genesis_context *gen = vcontext;
 	uint8_t mapper_type = load_int8(buf);
-	if (mapper_type != gen->mapper_type) {
-		warning("Mapper type mismatch, skipping load of mapper state");
+	if (mapper_type != gen->mapper_type && (mapper_type != MAPPER_SEGA || gen->mapper_type != MAPPER_SEGA_SRAM)) {
+		warning("Mapper type mismatch, skipping load of mapper state\n");
 		return;
 	}
 	switch(gen->mapper_type)
 	{
 	case MAPPER_SEGA:
+	case MAPPER_SEGA_SRAM:
 		sega_mapper_deserialize(buf, gen);
 		break;
 	case MAPPER_REALTEC:
@@ -125,7 +130,7 @@
 	//TODO: Should probably prefer the title field that corresponds to the user's region preference
 	uint8_t *last = rom + TITLE_END - 1;
 	uint8_t *src = rom + TITLE_START;
-	
+
 	for (;;)
 	{
 		while (last > src && (*last <=  0x20 || *last >= 0x80))
@@ -168,7 +173,7 @@
 uint8_t region_bits[] = {REGION_J, REGION_U, REGION_E, REGION_J|REGION_U|REGION_E};
 
 uint8_t translate_region_char(uint8_t c)
-{	
+{
 	for (int i = 0; i < sizeof(region_bits); i++)
 	{
 		if (c == region_chars[i]) {
@@ -225,7 +230,7 @@
 
 uint8_t has_ram_header(uint8_t *rom, uint32_t rom_size)
 {
-	return rom_size >= (RAM_END + 4) && rom[RAM_ID] == 'R' && rom[RAM_ID + 1] == 'A'; 
+	return rom_size >= (RAM_END + 4) && rom[RAM_ID] == 'R' && rom[RAM_ID + 1] == 'A';
 }
 
 uint32_t read_ram_header(rom_info *info, uint8_t *rom)
@@ -245,50 +250,93 @@
 		save_size /= 2;
 	}
 	info->save_size = save_size;
-	info->save_buffer = malloc(save_size);
+	info->save_buffer = calloc(save_size, 1);
 	return ram_start;
 }
 
 void add_memmap_header(rom_info *info, uint8_t *rom, uint32_t size, memmap_chunk const *base_map, int base_chunks)
 {
 	uint32_t rom_end = get_u32be(rom + ROM_END) + 1;
+	uint32_t rom_end_raw = rom_end;
 	if (size > rom_end) {
 		rom_end = size;
 	} else if (rom_end > nearest_pow2(size)) {
 		rom_end = nearest_pow2(size);
 	}
-	if (size >= 0x80000 && !memcmp("SEGA SSF", rom + 0x100, 8)) {
+	info->save_type = SAVE_NONE;
+	uint8_t is_med_ssf = size >= 0x108 && !memcmp("SEGA SSF", rom + 0x100, 8);
+	if (is_med_ssf || (size >= 0x400000 && rom_end_raw <= 0x400000)) {
+		if (is_med_ssf && rom_end < 16*1024*1024) {
+			info->rom = rom = realloc(rom, 16*1024*1024);
+		}
 		info->mapper_start_index = 0;
-		info->mapper_type = MAPPER_SEGA;
+		info->mapper_type = is_med_ssf ? MAPPER_SEGA_MED_V2 : MAPPER_SEGA;
 		info->map_chunks = base_chunks + 9;
 		info->map = malloc(sizeof(memmap_chunk) * info->map_chunks);
 		memset(info->map, 0, sizeof(memmap_chunk)*9);
 		memcpy(info->map+9, base_map, sizeof(memmap_chunk) * base_chunks);
-		
-		info->map[0].start = 0;
-		info->map[0].end = 0x80000;
-		info->map[0].mask = 0xFFFFFF;
-		info->map[0].flags = MMAP_READ;
-		info->map[0].buffer = rom;
-		
-		if (has_ram_header(rom, size)){
-			read_ram_header(info, rom);
+
+		int i;
+		uint16_t map_flags;
+		if (is_med_ssf) {
+			i = 0;
+			map_flags = info->map[i].flags = MMAP_READ | MMAP_PTR_IDX | MMAP_CODE;
+			info->save_type = RAM_FLAG_BOTH;
+			info->save_size = 256*1024;
+			info->save_mask = info->save_size - 1;
+			info->save_buffer = rom + 16*1024*1024 - 256*1024;
+		} else {
+			i = 1;
+			map_flags = info->map[i].flags = MMAP_READ | MMAP_PTR_IDX | MMAP_CODE | MMAP_FUNC_NULL;
+			info->map[0].start = 0;
+			info->map[0].end = 0x80000;
+			info->map[0].mask = 0xFFFFFF;
+			info->map[0].flags = MMAP_READ;
+			info->map[0].buffer = rom;
+
+			if (has_ram_header(rom, size)){
+				read_ram_header(info, rom);
+			}
 		}
-		
-		for (int i = 1; i < 8; i++)
+		static const write_8_fun med_w8[] = {
+			write_med_ram0_b,
+			write_med_ram1_b,
+			write_med_ram2_b,
+			write_med_ram3_b,
+			write_med_ram4_b,
+			write_med_ram5_b,
+			write_med_ram6_b,
+			write_med_ram7_b,
+		};
+		static const write_16_fun med_w16[] = {
+			write_med_ram0_w,
+			write_med_ram1_w,
+			write_med_ram2_w,
+			write_med_ram3_w,
+			write_med_ram4_w,
+			write_med_ram5_w,
+			write_med_ram6_w,
+			write_med_ram7_w,
+		};
+
+		for (; i < 8; i++)
 		{
 			info->map[i].start = i * 0x80000;
 			info->map[i].end = (i + 1) * 0x80000;
 			info->map[i].mask = 0x7FFFF;
 			info->map[i].buffer = (i + 1) * 0x80000 <= size ? rom + i * 0x80000 : rom;
 			info->map[i].ptr_index = i;
-			info->map[i].flags = MMAP_READ | MMAP_PTR_IDX | MMAP_CODE | MMAP_FUNC_NULL;
-			
-			info->map[i].read_16 = (read_16_fun)read_sram_w;//these will only be called when mem_pointers[i] == NULL
-			info->map[i].read_8 = (read_8_fun)read_sram_b;
-			info->map[i].write_16 = (write_16_fun)write_sram_area_w;//these will be called all writes to the area
-			info->map[i].write_8 = (write_8_fun)write_sram_area_b;
-			
+			info->map[i].flags = map_flags;
+
+			info->map[i].read_16 = is_med_ssf ? NULL : (read_16_fun)read_sram_w;//these will only be called when mem_pointers[i] == NULL
+			info->map[i].read_8 = is_med_ssf ? NULL : (read_8_fun)read_sram_b;
+			if (is_med_ssf) {
+				info->map[i].write_16 = med_w16[i];
+				info->map[i].write_8 = med_w8[i];
+			} else {
+				info->map[i].write_16 = (write_16_fun)write_sram_area_w;//these will be called all writes to the area
+				info->map[i].write_8 = (write_8_fun)write_sram_area_b;
+			}
 		}
 		info->map[8].start = 0xA13000;
 		info->map[8].end = 0xA13100;
@@ -296,6 +344,46 @@
 		info->map[8].write_16 = (write_16_fun)write_bank_reg_w;
 		info->map[8].write_8 = (write_8_fun)write_bank_reg_b;
 		return;
+	} else if(!memcmp("SEGA MEGAWIFI", rom + 0x100, strlen("SEGA MEGAWIFI"))) {
+		info->mapper_type = MAPPER_NONE;
+		info->map_chunks = base_chunks + 2;
+		info->map = malloc(sizeof(memmap_chunk) * info->map_chunks);
+		memset(info->map, 0, sizeof(memmap_chunk)*2);
+		memcpy(info->map+2, base_map, sizeof(memmap_chunk) * base_chunks);
+		info->save_size = 0x400000;
+		info->save_bus = RAM_FLAG_BOTH;
+		info->save_type = SAVE_NOR;
+		info->map[0].start = 0;
+		info->map[0].end = 0x400000;
+		info->map[0].mask = 0xFFFFFF;
+		info->map[0].write_16 = nor_flash_write_w;
+		info->map[0].write_8 = nor_flash_write_b;
+		info->map[0].read_16 = nor_flash_read_w;
+		info->map[0].read_8 = nor_flash_read_b;
+		info->map[0].flags = MMAP_READ_CODE | MMAP_CODE;
+		info->map[0].buffer = info->save_buffer = calloc(info->save_size, 1);
+		uint32_t init_size = size < info->save_size ? size : info->save_size;
+		memcpy(info->save_buffer, rom, init_size);
+		byteswap_rom(info->save_size, (uint16_t *)info->save_buffer);
+		info->nor = calloc(1, sizeof(nor_state));
+		nor_flash_init(info->nor, info->save_buffer, info->save_size, 128, 0xDA45, RAM_FLAG_BOTH);
+		info->nor->cmd_address1 = 0xAAB;
+		info->nor->cmd_address2 = 0x555;
+		info->map[1].start = 0xA130C0;
+		info->map[1].end = 0xA130D0;
+		info->map[1].mask = 0xFFFFFF;
+		if (!strcmp(
+			"on",
+			tern_find_path_default(config, "system\0megawifi\0", (tern_val){.ptrval="off"}, TVAL_PTR).ptrval)
+		) {
+			info->map[1].write_16 = megawifi_write_w;
+			info->map[1].write_8 = megawifi_write_b;
+			info->map[1].read_16 = megawifi_read_w;
+			info->map[1].read_8 = megawifi_read_b;
+		} else {
+			warning("ROM uses MegaWiFi, but it is disabled\n");
+		}
+		return;
 	} else if (has_ram_header(rom, size)) {
 		uint32_t ram_start = read_ram_header(info, rom);
 
@@ -324,11 +412,13 @@
 					info->map[1].flags |= MMAP_ONLY_ODD;
 				} else if (info->save_type == RAM_FLAG_EVEN) {
 					info->map[1].flags |= MMAP_ONLY_EVEN;
+				} else {
+					info->map[1].flags |= MMAP_CODE;
 				}
 				info->map[1].buffer = info->save_buffer;
 			} else {
 				//Assume the standard Sega mapper
-				info->mapper_type = MAPPER_SEGA;
+				info->mapper_type = MAPPER_SEGA_SRAM;
 				info->map[0].end = 0x200000;
 				info->map[0].mask = 0xFFFFFF;
 				info->map[0].flags = MMAP_READ;
@@ -345,7 +435,12 @@
 				info->map[1].write_8 = (write_8_fun)write_sram_area_b;
 				info->map[1].buffer = rom + 0x200000;
 
+				//Last entry in the base map is a catch all one that needs to be
+				//after all the other entries
+				memmap_chunk *unused = info->map + info->map_chunks - 2;
 				memmap_chunk *last = info->map + info->map_chunks - 1;
+				*last = *unused;
+				last = unused;
 				memset(last, 0, sizeof(memmap_chunk));
 				last->start = 0xA13000;
 				last->end = 0xA13100;
@@ -356,7 +451,7 @@
 			return;
 		}
 	}
-	
+
 	info->map_chunks = base_chunks + 1;
 	info->map = malloc(sizeof(memmap_chunk) * info->map_chunks);
 	memset(info->map, 0, sizeof(memmap_chunk));
@@ -452,8 +547,7 @@
 			fatal_error("SRAM size %s is invalid\n", size);
 		}
 		state->info->save_mask = nearest_pow2(state->info->save_size)-1;
-		state->info->save_buffer = malloc(state->info->save_size);
-		memset(state->info->save_buffer, 0, state->info->save_size);
+		state->info->save_buffer = calloc(state->info->save_size, 1);
 		char *bus = tern_find_path(state->root, "SRAM\0bus\0", TVAL_PTR).ptrval;
 		if (!strcmp(bus, "odd")) {
 			state->info->save_type = RAM_FLAG_ODD;
@@ -530,6 +624,9 @@
 		if (!strcmp(init, "ROM")) {
 			uint32_t init_size = state->rom_size > state->info->save_size ? state->info->save_size : state->rom_size;
 			memcpy(state->info->save_buffer, state->rom, init_size);
+			if (init_size < state->info->save_size) {
+				memset(state->info->save_buffer + init_size, 0xFF, state->info->save_size - init_size);
+			}
 			if (state->info->save_bus == RAM_FLAG_BOTH) {
 				byteswap_rom(state->info->save_size, (uint16_t *)state->info->save_buffer);
 			}
@@ -563,7 +660,7 @@
 	if (bits_write) {
 		tern_foreach(bits_write, eeprom_write_fun, eep_map);
 	}
-	printf("EEPROM address %X: sda read: %X, sda write: %X, scl: %X\n", start, eep_map->sda_read_bit, eep_map->sda_write_mask, eep_map->scl_mask);
+	debug_message("EEPROM address %X: sda read: %X, sda write: %X, scl: %X\n", start, eep_map->sda_read_bit, eep_map->sda_write_mask, eep_map->scl_mask);
 	state->info->num_eeprom++;
 }
 
@@ -628,7 +725,9 @@
 			*map = lock_info.map[i];
 			if (map->start < 0x200000) {
 				if (map->buffer) {
-					map->buffer += (0x200000 - map->start) & ((map->flags & MMAP_AUX_BUFF) ? map->aux_mask : map->mask);
+					uint8_t *buf = map->buffer;
+					buf += (0x200000 - map->start) & ((map->flags & MMAP_AUX_BUFF) ? map->aux_mask : map->mask);
+					map->buffer = buf;
 				}
 				map->start = 0x200000;
 			}
@@ -668,6 +767,8 @@
 			map->flags |= MMAP_ONLY_ODD;
 		} else if(state->info->save_type == RAM_FLAG_EVEN) {
 			map->flags |= MMAP_ONLY_EVEN;
+		} else {
+			map->flags |= MMAP_CODE;
 		}
 		map->mask = calc_mask(state->info->save_size, start, end);
 	} else if (!strcmp(dtype, "RAM")) {
@@ -675,7 +776,7 @@
 		if (!size || size > map->end - map->start) {
 			size = map->end - map->start;
 		}
-		map->buffer = malloc(size);
+		map->buffer = calloc(size, 1);
 		map->mask = calc_mask(size, start, end);
 		map->flags = MMAP_READ | MMAP_WRITE;
 		char *bus = tern_find_ptr_default(node, "bus", "both");
@@ -688,7 +789,7 @@
 		}
 	} else if (!strcmp(dtype, "NOR")) {
 		process_nor_def(key, state);
-		
+
 		map->write_16 = nor_flash_write_w;
 		map->write_8 = nor_flash_write_b;
 		map->read_16 = nor_flash_read_w;
@@ -774,12 +875,14 @@
 		map->mask = 0xFF;
 		map->write_16 = (write_16_fun)write_bank_reg_w;
 		map->write_8 = (write_8_fun)write_bank_reg_b;
+#ifndef IS_LIB
 	} else if (!strcmp(dtype, "MENU")) {
 		//fake hardware for supporting menu
 		map->buffer = NULL;
 		map->mask = 0xFF;
 		map->write_16 = menu_write_w;
 		map->read_16 = menu_read_w;
+#endif
 	} else if (!strcmp(dtype, "fixed")) {
 		uint16_t *value =  malloc(2);
 		map->buffer = value;
@@ -811,7 +914,7 @@
 		map->write_8 = write_multi_game_b;
 	} else if (!strcmp(dtype, "megawifi")) {
 		if (!strcmp(
-			"on", 
+			"on",
 			tern_find_path_default(config, "system\0megawifi\0", (tern_val){.ptrval="off"}, TVAL_PTR).ptrval)
 		) {
 			map->write_16 = megawifi_write_w;
@@ -850,18 +953,18 @@
 		product_id[i] = rom[GAME_ID_OFF + i];
 
 	}
-	printf("Product ID: %s\n", product_id);
+	debug_message("Product ID: %s\n", product_id);
 	uint8_t raw_hash[20];
 	sha1(vrom, rom_size, raw_hash);
 	uint8_t hex_hash[41];
 	bin_to_hex(hex_hash, raw_hash, 20);
-	printf("SHA1: %s\n", hex_hash);
+	debug_message("SHA1: %s\n", hex_hash);
 	tern_node * entry = tern_find_node(rom_db, hex_hash);
 	if (!entry) {
 		entry = tern_find_node(rom_db, product_id);
 	}
 	if (!entry) {
-		puts("Not found in ROM DB, examining header\n");
+		debug_message("Not found in ROM DB, examining header\n\n");
 		if (xband_detect(rom, rom_size)) {
 			return xband_configure_rom(rom_db, rom, rom_size, lock_on, lock_on_size, base_map, base_chunks);
 		}
@@ -874,7 +977,7 @@
 	info.mapper_type = MAPPER_NONE;
 	info.name = tern_find_ptr(entry, "name");
 	if (info.name) {
-		printf("Found name: %s\n", info.name);
+		debug_message("Found name: %s\n\n", info.name);
 		info.name = strdup(info.name);
 	} else {
 		info.name = get_header_name(rom);
@@ -908,14 +1011,14 @@
 			info.num_eeprom = 0;
 			memset(info.map, 0, sizeof(memmap_chunk) * info.map_chunks);
 			map_iter_state state = {
-				.info = &info, 
-				.rom = rom, 
+				.info = &info,
+				.rom = rom,
 				.lock_on = lock_on,
 				.root = entry,
 				.rom_db = rom_db,
-				.rom_size = rom_size, 
+				.rom_size = rom_size,
 				.lock_on_size = lock_on_size,
-				.index = 0, 
+				.index = 0,
 				.num_els = info.map_chunks - base_chunks,
 				.ptr_index = 0
 			};
@@ -935,6 +1038,19 @@
 		info.port1_override = tern_find_ptr(device_overrides, "1");
 		info.port2_override = tern_find_ptr(device_overrides, "2");
 		info.ext_override = tern_find_ptr(device_overrides, "ext");
+		if (
+			info.save_type == SAVE_NONE
+			&& (
+				(info.port1_override && startswith(info.port1_override, "heartbeat_trainer."))
+				|| (info.port2_override && startswith(info.port2_override, "heartbeat_trainer."))
+				|| (info.ext_override && startswith(info.ext_override, "heartbeat_trainer."))
+			)
+		) {
+			info.save_type = SAVE_HBPT;
+			info.save_size = atoi(tern_find_path_default(entry, "HeartbeatTrainer\0size\0", (tern_val){.ptrval="512"}, TVAL_PTR).ptrval);
+			info.save_buffer = calloc(info.save_size + 5 + 8, 1);
+			memset(info.save_buffer, 0xFF, info.save_size);
+		}
 	} else {
 		info.port1_override = info.port2_override = info.ext_override = NULL;
 	}
--- a/romdb.h	Sat Jan 05 00:58:08 2019 -0800
+++ b/romdb.h	Sat Jan 15 13:15:21 2022 -0800
@@ -11,6 +11,7 @@
 #define RAM_FLAG_MASK RAM_FLAG_ODD
 #define SAVE_I2C      0x01
 #define SAVE_NOR      0x02
+#define SAVE_HBPT     0x03
 #define SAVE_NONE     0xFF
 
 #include "tern.h"
@@ -43,10 +44,12 @@
 enum {
 	MAPPER_NONE,
 	MAPPER_SEGA,
+	MAPPER_SEGA_SRAM,
 	MAPPER_REALTEC,
 	MAPPER_XBAND,
 	MAPPER_MULTI_GAME,
-	MAPPER_JCART
+	MAPPER_JCART,
+	MAPPER_SEGA_MED_V2
 };
 
 
--- a/saves.h	Sat Jan 05 00:58:08 2019 -0800
+++ b/saves.h	Sat Jan 15 13:15:21 2022 -0800
@@ -6,6 +6,8 @@
 #include "system.h"
 
 #define QUICK_SAVE_SLOT 10
+#define SERIALIZE_SLOT 11
+#define EVENTLOG_SLOT 12
 
 typedef struct {
 	char   *desc;
--- a/sega_mapper.c	Sat Jan 05 00:58:08 2019 -0800
+++ b/sega_mapper.c	Sat Jan 15 13:15:21 2022 -0800
@@ -1,4 +1,5 @@
 #include "genesis.h"
+#include "util.h"
 
 uint16_t read_sram_w(uint32_t address, m68k_context * context)
 {
@@ -43,6 +44,14 @@
 m68k_context * write_sram_area_w(uint32_t address, m68k_context * context, uint16_t value)
 {
 	genesis_context * gen = context->system;
+	if (gen->mapper_type == MAPPER_SEGA_MED_V2) {
+		if (gen->bank_regs[8] & 0x20) {
+			uint32_t bank = address >> 19;
+			address &= 0x7FFFF;
+			context->mem_pointers[gen->mapper_start_index + bank][address >> 1] = value;
+		}
+		return context;
+	}
 	if ((gen->bank_regs[0] & 0x3) == 1) {
 		address &= gen->save_ram_mask;
 		switch(gen->save_type)
@@ -87,14 +96,128 @@
 	return context;
 }
 
+static void* write_med_ram_w(uint32_t address, void *vcontext, uint16_t value, uint16_t bank)
+{
+	m68k_context *context = vcontext;
+	genesis_context * gen = context->system;
+	if (gen->bank_regs[8] & 0x20) {
+		context->mem_pointers[gen->mapper_start_index + bank][address >> 1] = value;
+		address += bank * 0x80000;
+		m68k_invalidate_code_range(gen->m68k, address, address + 2);
+	}
+	return vcontext;
+}
+
+void* write_med_ram0_w(uint32_t address, void *vcontext, uint16_t value)
+{
+	return write_med_ram_w(address, vcontext, value, 0);
+}
+
+void* write_med_ram1_w(uint32_t address, void *vcontext, uint16_t value)
+{
+	return write_med_ram_w(address, vcontext, value, 1);
+}
+
+void* write_med_ram2_w(uint32_t address, void *vcontext, uint16_t value)
+{
+	return write_med_ram_w(address, vcontext, value, 2);
+}
+
+void* write_med_ram3_w(uint32_t address, void *vcontext, uint16_t value)
+{
+	return write_med_ram_w(address, vcontext, value, 3);
+}
+
+void* write_med_ram4_w(uint32_t address, void *vcontext, uint16_t value)
+{
+	return write_med_ram_w(address, vcontext, value, 4);
+}
+
+void* write_med_ram5_w(uint32_t address, void *vcontext, uint16_t value)
+{
+	return write_med_ram_w(address, vcontext, value, 5);
+}
+
+void* write_med_ram6_w(uint32_t address, void *vcontext, uint16_t value)
+{
+	return write_med_ram_w(address, vcontext, value, 6);
+}
+
+void* write_med_ram7_w(uint32_t address, void *vcontext, uint16_t value)
+{
+	return write_med_ram_w(address, vcontext, value, 7);
+}
+
+static void* write_med_ram_b(uint32_t address, void *vcontext, uint8_t value, uint16_t bank)
+{
+	m68k_context *context = vcontext;
+	genesis_context * gen = context->system;
+	if (gen->bank_regs[8] & 0x20) {
+		((uint8_t*)context->mem_pointers[gen->mapper_start_index + bank])[address ^ 1] = value;
+		address += bank * 0x80000;
+		m68k_invalidate_code_range(gen->m68k, address, address + 1);
+	}
+	return vcontext;
+}
+
+void* write_med_ram0_b(uint32_t address, void *vcontext, uint8_t value)
+{
+	return write_med_ram_b(address, vcontext, value, 0);
+}
+
+void* write_med_ram1_b(uint32_t address, void *vcontext, uint8_t value)
+{
+	return write_med_ram_b(address, vcontext, value, 1);
+}
+
+void* write_med_ram2_b(uint32_t address, void *vcontext, uint8_t value)
+{
+	return write_med_ram_b(address, vcontext, value, 2);
+}
+
+void* write_med_ram3_b(uint32_t address, void *vcontext, uint8_t value)
+{
+	return write_med_ram_b(address, vcontext, value, 3);
+}
+
+void* write_med_ram4_b(uint32_t address, void *vcontext, uint8_t value)
+{
+	return write_med_ram_b(address, vcontext, value, 4);
+}
+
+void* write_med_ram5_b(uint32_t address, void *vcontext, uint8_t value)
+{
+	return write_med_ram_b(address, vcontext, value, 5);
+}
+
+void* write_med_ram6_b(uint32_t address, void *vcontext, uint8_t value)
+{
+	return write_med_ram_b(address, vcontext, value, 6);
+}
+
+void* write_med_ram7_b(uint32_t address, void *vcontext, uint8_t value)
+{
+	return write_med_ram_b(address, vcontext, value, 7);
+}
+
 m68k_context * write_bank_reg_w(uint32_t address, m68k_context * context, uint16_t value)
 {
 	genesis_context * gen = context->system;
 	address &= 0xE;
 	address >>= 1;
-	gen->bank_regs[address] = value;
 	if (!address) {
-		if (value & 1) {
+		if (gen->mapper_type == MAPPER_SEGA_MED_V2) {
+			if (!value & 0x8000) {
+				//writes without protection bit set are ignored
+				return context;
+			}
+			gen->bank_regs[8] = value >> 8;
+			void *new_ptr = gen->cart + 0x40000*(value & 0x1F);
+			if (context->mem_pointers[gen->mapper_start_index] != new_ptr) {
+				m68k_invalidate_code_range(gen->m68k, 0, 0x80000);
+				context->mem_pointers[gen->mapper_start_index] = new_ptr;
+			}
+		} else if (value & 1) {
 			//Used for games that only use the mapper for SRAM
 			if (context->mem_pointers[gen->mapper_start_index]) {
 				gen->mapper_temp = context->mem_pointers[gen->mapper_start_index];
@@ -116,19 +239,30 @@
 				context->mem_pointers[gen->mapper_start_index + i] = gen->cart + 0x40000*gen->bank_regs[i];
 			}
 		}
-	} else {
-		void *new_ptr = gen->cart + 0x40000*value;
+	} else if (gen->mapper_type != MAPPER_SEGA_SRAM) {
+		uint32_t mask = ((gen->mapper_type == MAPPER_SEGA_MED_V2 ? (16 *1024 * 1024) : nearest_pow2(gen->header.info.rom_size)) >> 1) - 1;
+		void *new_ptr = gen->cart + ((0x40000*value) & mask);
 		if (context->mem_pointers[gen->mapper_start_index + address] != new_ptr) {
 			m68k_invalidate_code_range(gen->m68k, address * 0x80000, (address + 1) * 0x80000);
 			context->mem_pointers[gen->mapper_start_index + address] = new_ptr;
 		}
 	}
+	gen->bank_regs[address] = value;
 	return context;
 }
 
 m68k_context * write_bank_reg_b(uint32_t address, m68k_context * context, uint8_t value)
 {
-	if (address & 1) {
+	genesis_context * gen = context->system;
+	if (gen->mapper_type == MAPPER_SEGA_MED_V2) {
+		address &= 0xF;
+		if (!address) {
+			//not sure if this is correct, possible byte sized writes are always rejected to $A130F0
+			write_bank_reg_w(address, context, value << 8 | value);
+		} else if (address > 2 && (address & 1)) {
+			write_bank_reg_w(address, context, value);
+		}
+	} else if (address & 1) {
 		write_bank_reg_w(address, context, value);
 	}
 	return context;
@@ -136,13 +270,23 @@
 
 void sega_mapper_serialize(genesis_context *gen, serialize_buffer *buf)
 {
-	save_buffer8(buf, gen->bank_regs, sizeof(gen->bank_regs));
+	save_buffer8(buf, gen->bank_regs, gen->mapper_type == MAPPER_SEGA_MED_V2 ? sizeof(gen->bank_regs) : sizeof(gen->bank_regs) - 1);
 }
 
 void sega_mapper_deserialize(deserialize_buffer *buf, genesis_context *gen)
 {
-	for (int i = 0; i < sizeof(gen->bank_regs); i++)
-	{
-		write_bank_reg_w(i * 2, gen->m68k, load_int8(buf));
+	if (gen->mapper_type == MAPPER_SEGA_MED_V2) {
+		uint16_t reg0 = load_int8(buf);
+		for (int i = 1; i < sizeof(gen->bank_regs) - 1; i++)
+		{
+			write_bank_reg_w(i * 2, gen->m68k, load_int8(buf));
+		}
+		reg0 |= load_int8(buf) << 8;
+		write_bank_reg_w(0, gen->m68k, reg0);
+	} else {
+		for (int i = 0; i < sizeof(gen->bank_regs) - 1; i++)
+		{
+			write_bank_reg_w(i * 2, gen->m68k, load_int8(buf));
+		}
 	}
 }
--- a/sega_mapper.h	Sat Jan 05 00:58:08 2019 -0800
+++ b/sega_mapper.h	Sat Jan 15 13:15:21 2022 -0800
@@ -8,6 +8,22 @@
 m68k_context * write_sram_area_b(uint32_t address, m68k_context * context, uint8_t value);
 m68k_context * write_bank_reg_w(uint32_t address, m68k_context * context, uint16_t value);
 m68k_context * write_bank_reg_b(uint32_t address, m68k_context * context, uint8_t value);
+void* write_med_ram0_w(uint32_t address, void *vcontext, uint16_t value);
+void* write_med_ram1_w(uint32_t address, void *vcontext, uint16_t value);
+void* write_med_ram2_w(uint32_t address, void *vcontext, uint16_t value);
+void* write_med_ram3_w(uint32_t address, void *vcontext, uint16_t value);
+void* write_med_ram4_w(uint32_t address, void *vcontext, uint16_t value);
+void* write_med_ram5_w(uint32_t address, void *vcontext, uint16_t value);
+void* write_med_ram6_w(uint32_t address, void *vcontext, uint16_t value);
+void* write_med_ram7_w(uint32_t address, void *vcontext, uint16_t value);
+void* write_med_ram0_b(uint32_t address, void *vcontext, uint8_t value);
+void* write_med_ram1_b(uint32_t address, void *vcontext, uint8_t value);
+void* write_med_ram2_b(uint32_t address, void *vcontext, uint8_t value);
+void* write_med_ram3_b(uint32_t address, void *vcontext, uint8_t value);
+void* write_med_ram4_b(uint32_t address, void *vcontext, uint8_t value);
+void* write_med_ram5_b(uint32_t address, void *vcontext, uint8_t value);
+void* write_med_ram6_b(uint32_t address, void *vcontext, uint8_t value);
+void* write_med_ram7_b(uint32_t address, void *vcontext, uint8_t value);
 void sega_mapper_serialize(genesis_context *gen, serialize_buffer *buf);
 void sega_mapper_deserialize(deserialize_buffer *buf, genesis_context *gen);
 
--- a/serialize.c	Sat Jan 05 00:58:08 2019 -0800
+++ b/serialize.c	Sat Jan 15 13:15:21 2022 -0800
@@ -20,8 +20,13 @@
 static void reserve(serialize_buffer *buf, size_t amount)
 {
 	if (amount > (buf->storage - buf->size)) {
-		buf->storage *= 2;
-		buf = realloc(buf, buf->storage + sizeof(*buf));
+		if (amount < buf->storage) {
+			buf->storage *= 2;
+		} else {
+			//doublign isn't enough, increase by the precise amount needed
+			buf->storage += amount - (buf->storage - buf->size);
+		}
+		buf->data = realloc(buf->data, buf->storage + sizeof(*buf));
 	}
 }
 
--- a/serialize.h	Sat Jan 05 00:58:08 2019 -0800
+++ b/serialize.h	Sat Jan 15 13:15:21 2022 -0800
@@ -42,7 +42,8 @@
 	SECTION_SOUND_RAM,
 	SECTION_MAPPER,
 	SECTION_EEPROM,
-	SECTION_CART_RAM
+	SECTION_CART_RAM,
+	SECTION_TMSS
 };
 
 void init_serialize(serialize_buffer *buf);
--- a/shaders/crt.f.glsl	Sat Jan 05 00:58:08 2019 -0800
+++ b/shaders/crt.f.glsl	Sat Jan 15 13:15:21 2022 -0800
@@ -1,5 +1,3 @@
-#version 110
-
 /* Subtle CRT shader usable in fullscreen - Anaël Seghezzi [anael(at)maratis3d.com]
    This shader is free software distributed under the terms of the GNU General Public
    License version 3 or higher. This gives you the right to redistribute and/or
@@ -10,63 +8,64 @@
 #define M_PI 3.14159265358979323846
 
 uniform sampler2D textures[2];
-uniform float width, height;
-varying vec2 texcoord;
-varying vec2 screencoord;
+uniform mediump float width, height;
+uniform mediump vec2 texsize;
+varying mediump vec2 texcoord;
+varying mediump vec2 screencoord;
 
 
-float nrand(vec2 n) {
+mediump float nrand(vec2 n) {
 	return fract(sin(dot(n.xy, vec2(12.9898, 78.233))) * 43758.5453);
 }
 
-float scanline(vec2 texco)
+mediump float scanline(vec2 texco)
 {
-	return (1.0 - abs(cos(texco.y * 512.0 * M_PI)));
+	return (1.0 - abs(cos(texco.y * texsize.y * M_PI)));
 }
 
-vec2 sharp_coord(vec2 texco, vec2 dim, vec2 sharpness)
+mediump vec2 sharp_coord(mediump vec2 texco, mediump vec2 dim, mediump vec2 sharpness)
 {
-	vec2 texcoif = texco * dim;
-	vec2 texcoi = floor(texcoif);
-	vec2 mu = (texcoif - 0.5) - texcoi;
-	vec2 mub = pow(abs(mu) * 2.0, sharpness) * sign(mu) * 0.5;
+	mediump vec2 texcoif = texco * dim;
+	mediump vec2 texcoi = floor(texcoif);
+	mediump vec2 mu = (texcoif - 0.5) - texcoi;
+	mediump vec2 mub = pow(abs(mu) * 2.0, sharpness) * sign(mu) * 0.5;
 	return (texcoi + mub + 0.5) / dim;	
 }
 
 void main()
 {
-	float v = 1.0 / 512.0;
-	float yforce = 0.175;
-	float vign = length(screencoord);
+	mediump float v = 1.0 / texsize.y;
+	mediump float yforce = 0.175;
+	mediump float vign = length(screencoord);
 
 	// monitor deformation
-	vec2 monitorcoord = (screencoord + screencoord * vign * 0.025);
+	mediump vec2 monitorcoord = (screencoord + screencoord * vign * 0.025);
 	
 	if (monitorcoord.x < -1.0 || monitorcoord.y < -1.0 || monitorcoord.x > 1.0 || monitorcoord.y > 1.0) {
 		gl_FragColor = vec4(0.0);
 		return;
 	}
 
-	vec2 texco = monitorcoord * vec2(width/1024.0, height/-1024.0) + vec2(width/1024.0, height/1024.0);
+	mediump vec2 texco = monitorcoord * vec2(0.5*width/texsize.x, -0.5 * height/texsize.y) + vec2(0.5*width/texsize.x, 0.5*height/texsize.y);
 
 	// mask
-	float maskx = 1.0 - pow(abs(monitorcoord.x), 200.0);
-	float masky = 1.0 - pow(abs(-monitorcoord.y), 200.0);
-	float mask = clamp(maskx * masky, 0.0, 1.0);
+	mediump float maskx = 1.0 - pow(abs(monitorcoord.x), 200.0);
+	mediump float masky = 1.0 - pow(abs(-monitorcoord.y), 200.0);
+	mediump float mask = clamp(maskx * masky, 0.0, 1.0);
 
 	// sharp texcoord
-	vec2 texco_sharp0 = sharp_coord(texco, vec2(512.0, 512.0), vec2(4.0, 8.0));
-	vec2 texco_sharp1 = sharp_coord(texco - vec2(0.0, 1.0 / 1024.0), vec2(512.0, 512.0), vec2(4.0, 8.0));
+	mediump vec2 texco_sharp0 = sharp_coord(texco, texsize, vec2(4.0, 8.0));
+	mediump vec2 texco_sharp1 = sharp_coord(texco - vec2(0.0, 0.5 / texsize.y), texsize, vec2(4.0, 8.0));
 
-	vec4 src0 = texture2D(textures[0], texco_sharp0);
-	vec4 src1 = texture2D(textures[1], texco_sharp1);
+	mediump vec4 src0 = texture2D(textures[0], texco_sharp0);
+	mediump vec4 src1 = texture2D(textures[1], texco_sharp1);
 
 	// interlace mix
-	float interlace = cos((texco.y * 1024.0) * M_PI);
-	vec4 src_mix = mix(src0, src1, interlace * 0.5 + 0.5);
+	mediump float interlace = cos((texco.y * 2.0 * texsize.y) * M_PI);
+	mediump vec4 src_mix = mix(src0, src1, interlace * 0.5 + 0.5);
 
 	// blur
-	vec4 src_blur = mix(texture2D(textures[0], texco), texture2D(textures[1], texco), 0.5);
+	mediump vec4 src_blur = mix(texture2D(textures[0], texco), texture2D(textures[1], texco), 0.5);
 
 #ifdef NO_SCANLINE
 
@@ -75,11 +74,11 @@
 #else
 	// multisample scanline with grain
 	// TODO: offset grain with time (needs a "frame" uniform)
-	float cosy;
-	cosy  = scanline(texco + vec2(0.125, v * (nrand(texcoord + vec2(0.0, 1.0)) * 0.25) + 0.3333));
-	cosy += scanline(texco + vec2(0.25, v * (nrand(texcoord + vec2(0.0, 2.0)) * 0.25) + 0.25));
-	cosy += scanline(texco + vec2(0.50, v * (nrand(texcoord + vec2(0.0, 3.0)) * 0.25) + 0.6666));
-	cosy += scanline(texco + vec2(0.75, v * (nrand(texcoord + vec2(0.0, 4.0)) * 0.25) + 0.75));
+	mediump float cosy;
+	cosy  = scanline(texco + vec2(0.125, v * (nrand(texcoord + vec2(0.0, 512.0/texsize.y)) * 0.25) + 512.0*0.3333/texsize.y));
+	cosy += scanline(texco + vec2(0.25, v * (nrand(texcoord + vec2(0.0, 1024.0/texsize.y)) * 0.25) + 512.0*0.25/texsize.y));
+	cosy += scanline(texco + vec2(0.50, v * (nrand(texcoord + vec2(0.0, 1536.0/texsize.y)) * 0.25) + 512.0*0.6666/texsize.y));
+	cosy += scanline(texco + vec2(0.75, v * (nrand(texcoord + vec2(0.0, 2048.0/texsize.y)) * 0.25) + 512.0*0.75/texsize.y));
 	cosy *= 0.25;
 
 	// final scanline + burn
--- a/shaders/crt.v.glsl	Sat Jan 05 00:58:08 2019 -0800
+++ b/shaders/crt.v.glsl	Sat Jan 15 13:15:21 2022 -0800
@@ -1,13 +1,13 @@
-#version 110
 
 attribute vec2 pos;
-varying vec2 texcoord;
-varying vec2 screencoord;
-uniform float width, height;
+varying mediump vec2 texcoord;
+varying mediump vec2 screencoord;
+uniform mediump float width, height;
+uniform mediump vec2 texsize;
 
 void main()
 {
 	gl_Position = vec4(pos, 0.0, 1.0);
-	texcoord = sign(pos) * vec2(width/1024.0, height/-1024.0) + vec2(width/1024.0, height/1024.0);
+	texcoord = sign(pos) * vec2(0.5*width/texsize.x, -0.5*height/texsize.y) + vec2(0.5*width/texsize.x, 0.5*height/texsize.y);
 	screencoord = sign(pos);
 }
\ No newline at end of file
--- a/shaders/default.f.glsl	Sat Jan 05 00:58:08 2019 -0800
+++ b/shaders/default.f.glsl	Sat Jan 15 13:15:21 2022 -0800
@@ -1,15 +1,16 @@
 
 uniform sampler2D textures[2];
+uniform mediump vec2 texsize;
 
 varying mediump vec2 texcoord;
 
 void main()
 {
-	mediump vec2 modifiedCoord0 = vec2(texcoord.x, (floor(texcoord.y * 512.0 + 0.25) + 0.5)/512.0);
-	mediump vec2 modifiedCoord1 = vec2(texcoord.x, (floor(texcoord.y * 512.0 - 0.25) + 0.5)/512.0);
+	mediump vec2 modifiedCoord0 = vec2(texcoord.x, (floor(texcoord.y * texsize.y + 0.25) + 0.5)/texsize.y);
+	mediump vec2 modifiedCoord1 = vec2(texcoord.x, (floor(texcoord.y * texsize.y - 0.25) + 0.5)/texsize.y);
 	gl_FragColor = mix(
 		texture2D(textures[1], modifiedCoord1),
 		texture2D(textures[0], modifiedCoord0),
-		(sin(texcoord.y * 1024.0 * 3.14159265359) + 1.0) * 0.5
+		(sin(texcoord.y * texsize.y * 6.283185307) + 1.0) * 0.5
 	);
 }
--- a/shaders/default.v.glsl	Sat Jan 05 00:58:08 2019 -0800
+++ b/shaders/default.v.glsl	Sat Jan 15 13:15:21 2022 -0800
@@ -2,9 +2,10 @@
 attribute vec2 pos;
 varying mediump vec2 texcoord;
 uniform mediump float width, height;
+uniform mediump vec2 texsize;
 
 void main()
 {
 	gl_Position = vec4(pos, 0.0, 1.0);
-	texcoord = sign(pos) * vec2(width / 1024.0, height / -1024.0) + vec2(width / 1024.0, height / 1024.0);
+	texcoord = sign(pos) * vec2(0.5 * width / texsize.x, -0.5 * height / texsize.y) + vec2(0.5 * width / texsize.x, 0.5 * height / texsize.y);
 }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/shaders/sharp.f.glsl	Sat Jan 15 13:15:21 2022 -0800
@@ -0,0 +1,29 @@
+
+uniform sampler2D textures[2];
+uniform mediump vec2 texsize;
+
+varying mediump vec2 texcoord;
+
+void main()
+{
+	mediump float x0 = (floor(texcoord.x * texsize.x - 0.25) + 0.5)/texsize.x;
+	mediump float x1 = (floor(texcoord.x * texsize.x + 0.25) + 0.5)/texsize.x;
+	mediump float y0 = (floor(texcoord.y * texsize.y + 0.25) + 0.5)/texsize.y;
+	mediump float y1 = (floor(texcoord.y * texsize.y - 0.25) + 0.5)/texsize.y;
+	
+	mediump float ymix = (sin(texcoord.y * texsize.y * 6.283185307) + 1.0) * 0.5;
+	mediump float xmix = (sin(texcoord.x * texsize.x * 6.283185307) + 1.0) * 0.5;
+	gl_FragColor = mix(
+		mix(
+			texture2D(textures[1], vec2(x0, y1)),
+			texture2D(textures[0], vec2(x0, y0)),
+			ymix
+		),
+		mix(
+			texture2D(textures[1], vec2(x1, y1)),
+			texture2D(textures[0], vec2(x1, y0)),
+			ymix
+		),
+		xmix
+	);
+}
--- a/sms.c	Sat Jan 05 00:58:08 2019 -0800
+++ b/sms.c	Sat Jan 15 13:15:21 2022 -0800
@@ -9,26 +9,35 @@
 #include "saves.h"
 #include "bindings.h"
 
+#ifdef NEW_CORE
+#define Z80_CYCLE cycles
+#define Z80_OPTS opts
+#define z80_handle_code_write(...)
+#else
+#define Z80_CYCLE current_cycle
+#define Z80_OPTS options
+#endif
+
 static void *memory_io_write(uint32_t location, void *vcontext, uint8_t value)
 {
 	z80_context *z80 = vcontext;
 	sms_context *sms = z80->system;
 	if (location & 1) {
 		uint8_t fuzzy_ctrl_0 = sms->io.ports[0].control, fuzzy_ctrl_1 = sms->io.ports[1].control;
-		io_control_write(sms->io.ports, (~value) << 5 & 0x60, z80->current_cycle);
+		io_control_write(sms->io.ports, (~value) << 5 & 0x60, z80->Z80_CYCLE);
 		fuzzy_ctrl_0 |= sms->io.ports[0].control;
-		io_control_write(sms->io.ports+1, (~value) << 3 & 0x60, z80->current_cycle);
+		io_control_write(sms->io.ports+1, (~value) << 3 & 0x60, z80->Z80_CYCLE);
 		fuzzy_ctrl_1 |= sms->io.ports[1].control;
 		if (
 			(fuzzy_ctrl_0 & 0x40 & (sms->io.ports[0].output ^ (value << 1)) & (value << 1))
 			|| (fuzzy_ctrl_0 & 0x40 & (sms->io.ports[1].output ^ (value >> 1)) & (value >> 1))
 		) {
 			//TH is an output and it went from 0 -> 1
-			vdp_run_context(sms->vdp, z80->current_cycle);
+			vdp_run_context(sms->vdp, z80->Z80_CYCLE);
 			vdp_latch_hv(sms->vdp);
 		}
-		io_data_write(sms->io.ports, value << 1, z80->current_cycle);
-		io_data_write(sms->io.ports + 1, value >> 1, z80->current_cycle);
+		io_data_write(sms->io.ports, value << 1, z80->Z80_CYCLE);
+		io_data_write(sms->io.ports + 1, value >> 1, z80->Z80_CYCLE);
 	} else {
 		//TODO: memory control write
 	}
@@ -39,7 +48,7 @@
 {
 	z80_context *z80 = vcontext;
 	sms_context *sms = z80->system;
-	vdp_run_context(sms->vdp, z80->current_cycle);
+	vdp_run_context(sms->vdp, z80->Z80_CYCLE);
 	uint16_t hv = vdp_hv_counter_read(sms->vdp);
 	if (location & 1) {
 		return hv;
@@ -52,7 +61,7 @@
 {
 	z80_context *z80 = vcontext;
 	sms_context *sms = z80->system;
-	psg_run(sms->psg, z80->current_cycle);
+	psg_run(sms->psg, z80->Z80_CYCLE);
 	psg_write(sms->psg, value);
 	return vcontext;
 }
@@ -61,14 +70,19 @@
 {
 	uint32_t vint = vdp_next_vint(sms->vdp);
 	uint32_t hint = vdp_next_hint(sms->vdp);
+#ifdef NEW_CORE
+	sms->z80->int_cycle = vint < hint ? vint : hint;
+	z80_sync_cycle(sms->z80, sms->z80->sync_cycle);
+#else
 	sms->z80->int_pulse_start = vint < hint ? vint : hint;
+#endif
 }
 
 static uint8_t vdp_read(uint32_t location, void *vcontext)
 {
 	z80_context *z80 = vcontext;
 	sms_context *sms = z80->system;
-	vdp_run_context(sms->vdp, z80->current_cycle);
+	vdp_run_context(sms->vdp, z80->Z80_CYCLE);
 	if (location & 1) {
 		uint8_t ret = vdp_control_port_read(sms->vdp);
 		sms->vdp->flags2 &= ~(FLAG2_VINT_PENDING|FLAG2_HINT_PENDING);
@@ -84,11 +98,11 @@
 	z80_context *z80 = vcontext;
 	sms_context *sms = z80->system;
 	if (location & 1) {
-		vdp_run_context_full(sms->vdp, z80->current_cycle);
+		vdp_run_context_full(sms->vdp, z80->Z80_CYCLE);
 		vdp_control_port_write_pbc(sms->vdp, value);
 		update_interrupts(sms);
 	} else {
-		vdp_run_context(sms->vdp, z80->current_cycle);
+		vdp_run_context(sms->vdp, z80->Z80_CYCLE);
 		vdp_data_port_write_pbc(sms->vdp, value);
 	}
 	return vcontext;
@@ -99,13 +113,13 @@
 	z80_context *z80 = vcontext;
 	sms_context *sms = z80->system;
 	if (location == 0xC0 || location == 0xDC) {
-		uint8_t port_a = io_data_read(sms->io.ports, z80->current_cycle);
-		uint8_t port_b = io_data_read(sms->io.ports+1, z80->current_cycle);
+		uint8_t port_a = io_data_read(sms->io.ports, z80->Z80_CYCLE);
+		uint8_t port_b = io_data_read(sms->io.ports+1, z80->Z80_CYCLE);
 		return (port_a & 0x3F) | (port_b << 6);
 	}
 	if (location == 0xC1 || location == 0xDD) {
-		uint8_t port_a = io_data_read(sms->io.ports, z80->current_cycle);
-		uint8_t port_b = io_data_read(sms->io.ports+1, z80->current_cycle);
+		uint8_t port_a = io_data_read(sms->io.ports, z80->Z80_CYCLE);
+		uint8_t port_b = io_data_read(sms->io.ports+1, z80->Z80_CYCLE);
 		return (port_a & 0x40) | (port_b >> 2 & 0xF) | (port_b << 1 & 0x80) | 0x10;
 	}
 	return 0xFF;
@@ -235,6 +249,18 @@
 	end_section(buf);
 }
 
+static uint8_t *serialize(system_header *sys, size_t *size_out)
+{
+	sms_context *sms = (sms_context *)sys;
+	serialize_buffer state;
+	init_serialize(&state);
+	sms_serialize(sms, &state);
+	if (size_out) {
+		*size_out = state.size;
+	}
+	return state.data;
+}
+
 static void ram_deserialize(deserialize_buffer *buf, void *vsms)
 {
 	sms_context *sms = vsms;
@@ -290,6 +316,16 @@
 		//cart RAM is enabled, invalidate the region in case there is any code there
 		z80_invalidate_code_range(sms->z80, 0x8000, 0xC000);
 	}
+	free(buf->handlers);
+	buf->handlers = NULL;
+}
+
+static void deserialize(system_header *sys, uint8_t *data, size_t size)
+{
+	sms_context *sms = (sms_context *)sys;
+	deserialize_buffer buffer;
+	init_deserialize(&buffer, data, size);
+	sms_deserialize(&buffer, sms);
 }
 
 static void save_state(sms_context *sms, uint8_t slot)
@@ -321,6 +357,7 @@
 	sms_context *sms = (sms_context *)system;
 	char *statepath = get_slot_name(system, slot, "state");
 	uint8_t ret;
+#ifndef NEW_CORE
 	if (!sms->z80->native_pc) {
 		ret = get_modification_time(statepath) != 0;
 		if (ret) {
@@ -329,6 +366,7 @@
 		goto done;
 		
 	}
+#endif
 	ret = load_state_path(sms, statepath);
 done:
 	free(statepath);
@@ -338,7 +376,7 @@
 static void run_sms(system_header *system)
 {
 	sms_context *sms = (sms_context *)system;
-	uint32_t target_cycle = sms->z80->current_cycle + 3420*16;
+	uint32_t target_cycle = sms->z80->Z80_CYCLE + 3420*16;
 	//TODO: PAL support
 	render_set_video_standard(VID_NTSC);
 	while (!sms->should_return)
@@ -352,7 +390,11 @@
 			system->enter_debugger = 0;
 			zdebugger(sms->z80, sms->z80->pc);
 		}
+#ifdef NEW_CORE
+		if (sms->z80->nmi_cycle == CYCLE_NEVER) {
+#else
 		if (sms->z80->nmi_start == CYCLE_NEVER) {
+#endif
 			uint32_t nmi = vdp_next_nmi(sms->vdp);
 			if (nmi != CYCLE_NEVER) {
 				z80_assert_nmi(sms->z80, nmi);
@@ -360,16 +402,16 @@
 		}
 		z80_run(sms->z80, target_cycle);
 		if (sms->z80->reset) {
-			z80_clear_reset(sms->z80, sms->z80->current_cycle + 128*15);
+			z80_clear_reset(sms->z80, sms->z80->Z80_CYCLE + 128*15);
 		}
-		target_cycle = sms->z80->current_cycle;
+		target_cycle = sms->z80->Z80_CYCLE;
 		vdp_run_context(sms->vdp, target_cycle);
 		psg_run(sms->psg, target_cycle);
 		
 		if (system->save_state) {
 			while (!sms->z80->pc) {
 				//advance Z80 to an instruction boundary
-				z80_run(sms->z80, sms->z80->current_cycle + 1);
+				z80_run(sms->z80, sms->z80->Z80_CYCLE + 1);
 			}
 			save_state(sms, system->save_state - 1);
 			system->save_state = 0;
@@ -377,27 +419,32 @@
 		
 		target_cycle += 3420*16;
 		if (target_cycle > 0x10000000) {
-			uint32_t adjust = sms->z80->current_cycle - 3420*262*2;
-			io_adjust_cycles(sms->io.ports, sms->z80->current_cycle, adjust);
-			io_adjust_cycles(sms->io.ports+1, sms->z80->current_cycle, adjust);
+			uint32_t adjust = sms->z80->Z80_CYCLE - 3420*262*2;
+			io_adjust_cycles(sms->io.ports, sms->z80->Z80_CYCLE, adjust);
+			io_adjust_cycles(sms->io.ports+1, sms->z80->Z80_CYCLE, adjust);
 			z80_adjust_cycles(sms->z80, adjust);
 			vdp_adjust_cycles(sms->vdp, adjust);
 			sms->psg->cycles -= adjust;
 			target_cycle -= adjust;
 		}
 	}
-	bindings_release_capture();
-	vdp_release_framebuffer(sms->vdp);
-	render_pause_source(sms->psg->audio);
+	if (sms->header.force_release || render_should_release_on_exit()) {
+		bindings_release_capture();
+		vdp_release_framebuffer(sms->vdp);
+		render_pause_source(sms->psg->audio);
+	}
 	sms->should_return = 0;
 }
 
 static void resume_sms(system_header *system)
 {
 	sms_context *sms = (sms_context *)system;
-	bindings_reacquire_capture();
-	vdp_reacquire_framebuffer(sms->vdp);
-	render_resume_source(sms->psg->audio);
+	if (sms->header.force_release || render_should_release_on_exit()) {
+		sms->header.force_release = 0;
+		bindings_reacquire_capture();
+		vdp_reacquire_framebuffer(sms->vdp);
+		render_resume_source(sms->psg->audio);
+	}
 	run_sms(system);
 }
 
@@ -423,15 +470,17 @@
 static void soft_reset(system_header *system)
 {
 	sms_context *sms = (sms_context *)system;
-	z80_assert_reset(sms->z80, sms->z80->current_cycle);
-	sms->z80->target_cycle = sms->z80->sync_cycle = sms->z80->current_cycle;
+	z80_assert_reset(sms->z80, sms->z80->Z80_CYCLE);
+#ifndef NEW_CORE
+	sms->z80->target_cycle = sms->z80->sync_cycle = sms->z80->Z80_CYCLE;
+#endif
 }
 
 static void free_sms(system_header *system)
 {
 	sms_context *sms = (sms_context *)system;
 	vdp_free(sms->vdp);
-	z80_options_free(sms->z80->options);
+	z80_options_free(sms->z80->Z80_OPTS);
 	free(sms->z80);
 	psg_free(sms->psg);
 	free(sms);
@@ -446,6 +495,9 @@
 {
 	sms_context *sms = (sms_context *)system;
 	sms->should_return = 1;
+#ifndef NEW_CORE
+	sms->z80->target_cycle = sms->z80->sync_cycle = sms->z80->Z80_CYCLE;
+#endif
 }
 
 static void inc_debug_mode(system_header *system)
@@ -518,6 +570,13 @@
 	io_keyboard_up(&sms->io, scancode);
 }
 
+static void set_gain_config(sms_context *sms)
+{
+	char *config_gain;
+	config_gain = tern_find_path(config, "audio\0psg_gain\0", TVAL_PTR).ptrval;
+	render_audio_source_gaindb(sms->psg->audio, config_gain ? atof(config_gain) : 0.0f);
+}
+
 static void config_updated(system_header *system)
 {
 	sms_context *sms = (sms_context *)system;
@@ -550,7 +609,7 @@
 	init_z80_opts(zopts, sms->header.info.map, sms->header.info.map_chunks, io_map, 4, 15, 0xFF);
 	sms->z80 = init_z80_context(zopts);
 	sms->z80->system = sms;
-	sms->z80->options->gen.debug_cmd_handler = debug_commands;
+	sms->z80->Z80_OPTS->gen.debug_cmd_handler = debug_commands;
 	
 	sms->rom = media->buffer;
 	sms->rom_size = rom_size;
@@ -569,7 +628,9 @@
 	sms->psg = malloc(sizeof(psg_context));
 	psg_init(sms->psg, sms->master_clock, 15*16);
 	
-	sms->vdp = init_vdp_context(0);
+	set_gain_config(sms);
+	
+	sms->vdp = init_vdp_context(0, 0);
 	sms->vdp->system = &sms->header;
 	
 	sms->header.info.save_type = SAVE_NONE;
@@ -598,6 +659,8 @@
 	sms->header.keyboard_down = keyboard_down;
 	sms->header.keyboard_up = keyboard_up;
 	sms->header.config_updated = config_updated;
+	sms->header.serialize = serialize;
+	sms->header.deserialize = deserialize;
 	sms->header.type = SYSTEM_SMS;
 	
 	return sms;
--- a/sms.h	Sat Jan 05 00:58:08 2019 -0800
+++ b/sms.h	Sat Jan 15 13:15:21 2022 -0800
@@ -4,7 +4,11 @@
 #include "system.h"
 #include "vdp.h"
 #include "psg.h"
+#ifdef NEW_CORE
+#include "z80.h"
+#else
 #include "z80_to_x86.h"
+#endif
 #include "io.h"
 
 #define SMS_RAM_SIZE (8*1024)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/special_keys_evdev.h	Sat Jan 15 13:15:21 2022 -0800
@@ -0,0 +1,59 @@
+#ifndef SPECIAL_KEYS_EVDEV_H_
+#define SPECIAL_KEYS_EVDEV_H_
+
+enum {
+	RENDERKEY_DOWN = 128,
+	RENDERKEY_UP,
+	RENDERKEY_LEFT,
+	RENDERKEY_RIGHT,
+	RENDERKEY_ESC,
+	RENDERKEY_DEL,  
+	RENDERKEY_LSHIFT, 
+	RENDERKEY_RSHIFT,
+	RENDERKEY_LCTRL,
+	RENDERKEY_RCTRL,
+	RENDERKEY_LALT,
+	RENDERKEY_RALT,
+	RENDERKEY_HOME,
+	RENDERKEY_END,
+	RENDERKEY_PAGEUP,
+	RENDERKEY_PAGEDOWN,
+	RENDERKEY_F1,
+	RENDERKEY_F2,
+	RENDERKEY_F3,
+	RENDERKEY_F4,
+	RENDERKEY_F5,
+	RENDERKEY_F6,
+	RENDERKEY_F7,
+	RENDERKEY_F8,
+	RENDERKEY_F9,
+	RENDERKEY_F10,
+	RENDERKEY_F11,
+	RENDERKEY_F12,
+	RENDERKEY_SELECT,
+	RENDERKEY_PLAY,
+	RENDERKEY_SEARCH,
+	RENDERKEY_BACK,
+	RENDERKEY_NP0,
+	RENDERKEY_NP1,
+	RENDERKEY_NP2,
+	RENDERKEY_NP3,
+	RENDERKEY_NP4,
+	RENDERKEY_NP5,
+	RENDERKEY_NP6,
+	RENDERKEY_NP7,
+	RENDERKEY_NP8,
+	RENDERKEY_NP9,
+	RENDERKEY_NP_DIV,
+	RENDERKEY_NP_MUL,
+	RENDERKEY_NP_MIN,
+	RENDERKEY_NP_PLUS,
+	RENDERKEY_NP_ENTER,
+	RENDERKEY_NP_STOP,
+	RENDER_DPAD_UP,
+	RENDER_DPAD_DOWN,
+	RENDER_DPAD_LEFT,
+	RENDER_DPAD_RIGHT
+};
+
+#endif //SPECIAL_KEYS_EVDEV_H_
--- a/stateview.c	Sat Jan 05 00:58:08 2019 -0800
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,131 +0,0 @@
-/*
- Copyright 2013 Michael Pavone
- This file is part of BlastEm. 
- BlastEm is free software distributed under the terms of the GNU General Public License version 3 or greater. See COPYING for full license text.
-*/
-#include <stdlib.h>
-#include <stdio.h>
-#include "vdp.h"
-#include "render.h"
-#include "util.h"
-#include "genesis.h"
-#include "config.h"
-
-
-uint16_t read_dma_value(uint32_t address)
-{
-	return 0;
-}
-
-m68k_context *m68k_handle_code_write(uint32_t address, m68k_context *context)
-{
-	return NULL;
-}
-
-z80_context *z80_handle_code_write(uint32_t address, z80_context *context)
-{
-	return NULL;
-}
-
-void ym_data_write(ym2612_context * context, uint8_t value)
-{
-}
-
-void ym_address_write_part1(ym2612_context * context, uint8_t address)
-{
-}
-
-void ym_address_write_part2(ym2612_context * context, uint8_t address)
-{
-}
-
-void handle_keydown(int keycode, uint8_t scancode)
-{
-}
-
-void handle_keyup(int keycode, uint8_t scancode)
-{
-}
-
-void handle_joydown(int joystick, int button)
-{
-}
-
-void handle_joyup(int joystick, int button)
-{
-}
-
-void handle_joy_dpad(int joystick, int dpadnum, uint8_t value)
-{
-}
-
-void handle_joy_axis(int joystick, int axis, int16_t value)
-{
-}
-
-void handle_joy_added(int joystick)
-{
-}
-
-void handle_mousedown(int mouse, int button)
-{
-}
-
-void handle_mouseup(int mouse, int button)
-{
-}
-
-void handle_mouse_moved(int mouse, uint16_t x, uint16_t y, int16_t deltax, int16_t deltay)
-{
-}
-
-void controller_add_mappings()
-{
-}
-
-tern_node * config;
-int headless = 0;
-
-int main(int argc, char ** argv)
-{
-	if (argc < 2) {
-		fatal_error("Usage: stateview FILENAME\n");
-	}
-	FILE * state_file = fopen(argv[1], "rb");
-	if (!state_file) {
-		fatal_error("Failed to open %s\n", argv[1]);
-	}
-	set_exe_str(argv[0]);
-	config = load_config(argv[0]);
-	int width = -1;
-	int height = -1;
-	if (argc > 2) {
-		width = atoi(argv[2]);
-		if (argc > 3) {
-			height = atoi(argv[3]);
-		}
-	}
-	int def_width = 0;
-	char *config_width = tern_find_ptr(config, "videowidth");
-	if (config_width) {
-		def_width = atoi(config_width);
-	}
-	if (!def_width) {
-		def_width = 640;
-	}
-	width = width < 320 ? def_width : width;
-	height = height < 240 ? (width/320) * 240 : height;
-
-	render_init(width, height, "GST State Viewer", 0);
-	vdp_context *context = init_vdp_context(0);
-	vdp_load_gst(context, state_file);
-	vdp_run_to_vblank(context);
-	vdp_print_sprite_table(context);
-	printf("Display %s\n", (context->regs[REG_MODE_2] & DISPLAY_ENABLE) ? "enabled" : "disabled");
-	if (!(context->regs[REG_MODE_2] & DISPLAY_ENABLE)) {
-		puts("Forcing display on");
-		vdp_control_port_write(context, 0x8000 | REG_MODE_2 << 8 | context->regs[REG_MODE_2] | DISPLAY_ENABLE);
-	}
-    render_wait_quit(context);
-    return 0;
-}
--- a/system.c	Sat Jan 05 00:58:08 2019 -0800
+++ b/system.c	Sat Jan 15 13:15:21 2022 -0800
@@ -1,6 +1,7 @@
 #include <string.h>
 #include "system.h"
 #include "genesis.h"
+#include "gen_player.h"
 #include "sms.h"
 
 uint8_t safe_cmp(char *str, long offset, uint8_t *buffer, long filesize)
@@ -25,6 +26,14 @@
 	) {
 		return SYSTEM_SMS;
 	}
+	if (safe_cmp("BLSTEL\x02", 0, media->buffer, media->size)) {
+		uint8_t *buffer = media->buffer;
+		if (media->size > 9 && buffer[7] == 0) {
+			return buffer[8] + 1;
+		}
+	}
+		
+	
 	//TODO: Detect Jaguar ROMs here
 	
 	//Header based detection failed, examine filename for clues
@@ -43,7 +52,7 @@
 	//More certain checks failed, look for a valid 68K reset vector
 	if (media->size >= 8) {
 		char *rom = media->buffer;
-		uint32_t reset = rom[4] << 24 | rom[5] << 16 | rom[6] << 8 | rom[7];
+		uint32_t reset = rom[5] << 16 | rom[6] << 8 | rom[7];
 		if (!(reset & 1) && reset < media->size) {
 			//we have a valid looking reset vector, assume it's a Genesis ROM
 			return SYSTEM_GENESIS;
@@ -64,6 +73,8 @@
 	{
 	case SYSTEM_GENESIS:
 		return &(alloc_config_genesis(media->buffer, media->size, lock_on, lock_on_size, opts, force_region))->header;
+	case SYSTEM_GENESIS_PLAYER:
+		return &(alloc_config_gen_player(media->buffer, media->size))->header;
 	case SYSTEM_SEGACD:
 		return &(alloc_config_genesis_cdboot(media, opts, force_region))->header;
 #ifndef NO_Z80
@@ -74,3 +85,19 @@
 		return NULL;
 	}
 }
+
+system_header *alloc_config_player(system_type stype, event_reader *reader)
+{
+	switch(stype)
+	{
+	case SYSTEM_GENESIS:
+		return &(alloc_config_gen_player_reader(reader))->header;
+	}
+	return NULL;
+}
+
+void system_request_exit(system_header *system, uint8_t force_release)
+{
+	system->force_release = force_release;
+	system->request_exit(system);
+}
--- a/system.h	Sat Jan 05 00:58:08 2019 -0800
+++ b/system.h	Sat Jan 15 13:15:21 2022 -0800
@@ -1,5 +1,6 @@
 #ifndef SYSTEM_H_
 #define SYSTEM_H_
+#include <stddef.h>
 #include <stdint.h>
 
 typedef struct system_header system_header;
@@ -8,9 +9,11 @@
 typedef enum {
 	SYSTEM_UNKNOWN,
 	SYSTEM_GENESIS,
+	SYSTEM_GENESIS_PLAYER,
 	SYSTEM_SEGACD,
 	SYSTEM_SMS,
-	SYSTEM_JAGUAR
+	SYSTEM_SMS_PLAYER,
+	SYSTEM_JAGUAR,
 } system_type;
 
 typedef enum {
@@ -28,9 +31,12 @@
 typedef void (*system_u8_u8_fun)(system_header *, uint8_t, uint8_t);
 typedef void (*system_mabs_fun)(system_header *, uint8_t, uint16_t, uint16_t);
 typedef void (*system_mrel_fun)(system_header *, uint8_t, int32_t, int32_t);
+typedef uint8_t *(*system_ptrszt_fun_rptr8)(system_header *, size_t *);
+typedef void (*system_ptr8_sizet_fun)(system_header *, uint8_t *, size_t);
 
 #include "arena.h"
 #include "romdb.h"
+#include "event_log.h"
 
 struct system_header {
 	system_header     *next_context;
@@ -54,6 +60,10 @@
 	system_u8_fun     keyboard_down;
 	system_u8_fun     keyboard_up;
 	system_fun        config_updated;
+	system_ptrszt_fun_rptr8 serialize;
+	system_ptr8_sizet_fun   deserialize;
+	system_str_fun          start_vgm_log;
+	system_fun              stop_vgm_log;
 	rom_info          info;
 	arena             *arena;
 	char              *next_rom;
@@ -63,6 +73,8 @@
 	uint8_t           save_state;
 	uint8_t           delayed_load_slot;
 	uint8_t           has_keyboard;
+	uint8_t                 vgm_logging;
+	uint8_t                 force_release;
 	debugger_type     debugger_type;
 	system_type       type;
 };
@@ -86,5 +98,7 @@
 
 system_type detect_system_type(system_media *media);
 system_header *alloc_config_system(system_type stype, system_media *media, uint32_t opts, uint8_t force_region);
+system_header *alloc_config_player(system_type stype, event_reader *reader);
+void system_request_exit(system_header *system, uint8_t force_release);
 
 #endif //SYSTEM_H_
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/systems.cfg	Sat Jan 15 13:15:21 2022 -0800
@@ -0,0 +1,80 @@
+md1va0 {
+	name Model 1 VA0
+	vram 64
+	vsram 40
+	zram 8
+	tas broken
+	z80_open_bus float
+	fm discrete 2612
+	tmss off
+}
+md1va3 {
+	name Model 1 VA3
+	vram 64
+	vsram 40
+	zram 8
+	tas broken
+	z80_open_bus FF
+	fm discrete 2612
+	tmss off
+}
+md1va6 {
+	name Model 1 VA6
+	vram 64
+	vsram 40
+	zram 8
+	tas broken
+	z80_open_bus FF
+	fm discrete 2612
+	tmss on
+}
+md2va1 {
+	name Model 2 VA1
+	vram 64
+	vsram 40
+	zram 8
+	tas broken
+	z80_open_bus FF
+	fm integrated 3834
+	tmss on
+}
+md2va2 {
+	name Model 2 VA2
+	vram 64
+	vsram 40
+	zram 8
+	tas broken
+	z80_open_bus FF
+	fm discrete 2612
+	tmss on
+}
+md3va1 {
+	name Model 3 VA1
+	vram 64
+	vsram 64
+	zram 8
+	tas broken
+	z80_open_bus FF
+	fm integrated 3834
+	tmss on
+}
+md3va2 {
+	name Model 3 VA2
+	vram 64
+	vsram 64
+	zram 8
+	tas works
+	z80_open_bus FF
+	fm integrated 3834
+	tmss on
+}
+teradrive {
+	name Teradrive
+	vram 128
+	vsram 40
+	zram 16
+	tas broken
+	z80_open_bus FF
+	fm discrete 3834
+	tmss off
+}
\ No newline at end of file
--- a/tern.c	Sat Jan 05 00:58:08 2019 -0800
+++ b/tern.c	Sat Jan 15 13:15:21 2022 -0800
@@ -305,12 +305,11 @@
 
 void tern_free(tern_node *head)
 {
-	if (head->left) {
-		tern_free(head->left);
+	if (!head) {
+		return;
 	}
-	if (head->right) {
-		tern_free(head->right);
-	}
+	tern_free(head->left);
+	tern_free(head->right);
 	if (head->el) {
 		tern_free(head->straight.next);
 	}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tmss.s68	Sat Jan 15 13:15:21 2022 -0800
@@ -0,0 +1,253 @@
+	dc.l $0, start
+	dc.l empty_handler
+	dc.l empty_handler
+	;$10
+	dc.l empty_handler
+	dc.l empty_handler
+	dc.l empty_handler
+	dc.l empty_handler
+	;$20
+	dc.l empty_handler
+	dc.l empty_handler
+	dc.l empty_handler
+	dc.l empty_handler
+	;$30
+	dc.l empty_handler
+	dc.l empty_handler
+	dc.l empty_handler
+	dc.l empty_handler
+	;$40
+	dc.l empty_handler
+	dc.l empty_handler
+	dc.l empty_handler
+	dc.l empty_handler
+	;$50
+	dc.l empty_handler
+	dc.l empty_handler
+	dc.l empty_handler
+	dc.l empty_handler
+	;$60
+	dc.l empty_handler
+	dc.l empty_handler
+	dc.l empty_handler
+	dc.l empty_handler
+	;$70
+	dc.l empty_handler
+	dc.l empty_handler
+	dc.l int_6
+	dc.l empty_handler
+	;$80
+	dc.l empty_handler
+	dc.l empty_handler
+	dc.l empty_handler
+	dc.l empty_handler
+	;$90
+	dc.l empty_handler
+	dc.l empty_handler
+	dc.l empty_handler
+	dc.l empty_handler
+	;$A0
+	dc.l empty_handler
+	dc.l empty_handler
+	dc.l empty_handler
+	dc.l empty_handler
+	;$B0
+	dc.l empty_handler
+	dc.l empty_handler
+	dc.l empty_handler
+	dc.l empty_handler
+	;$C0
+	dc.l empty_handler
+	dc.l empty_handler
+	dc.l empty_handler
+	dc.l empty_handler
+	;$D0
+	dc.l empty_handler
+	dc.l empty_handler
+	dc.l empty_handler
+	dc.l empty_handler
+	;$E0
+	dc.l empty_handler
+	dc.l empty_handler
+	dc.l empty_handler
+	dc.l empty_handler
+	;$F0
+	dc.l empty_handler
+	dc.l empty_handler
+	dc.l empty_handler
+	dc.l empty_handler
+	dc.b "SEGA IS COOL    "
+	dc.b "(C)M.Pavone 2021"
+	dc.b "TRAIN MEMES STAN"
+	dc.b "D SILENTLY      "
+	dc.b "                "
+	dc.b "TRIUMPHANT MAMMA"
+	dc.b "LS SALUTE SOCIAL"
+	dc.b "ITES            "
+	dc.b "MP 20210227-01",0,0
+	dc.b "J               "
+	dc.l $0
+	dc.l romend-1
+	dc.l $FF0000
+	dc.l $FFFFFF
+	dc.b "                "
+	dc.b "                "
+	dc.b "                "
+	dc.b "                "
+	dc.b "JUE             "
+	
+frame_counter equ $FFFF8000
+ram_code equ $FFFF8002
+
+empty_handler:
+	rte
+start:
+	lea $A14000, a3
+	move.l #'SEGA', (a3)
+	lea $C00000, a0
+	lea $C00004, a1
+	move.w #$8104, (a1) ;Mode 5, everything turned off
+	move.w #$8004, (a1)
+	move.w #$8220, (a1) ;Scroll a table $8000
+	move.w #$8404, (a1) ;Scroll b table $8000
+	move.w #$8560, (a1) ;SAT table $C000
+	move.w #$8700, (a1) ;backdrop color 0
+	move.w #$8A01, (a1) ;Set HINT counter
+	move.w #$8B00, (a1) ;full screen scroll
+	move.w #$8C81, (a1) ;40 cell mode, no interlace
+	move.w #$8D00, (a1) ;hscroll table at 0
+	move.w #$8F02, (a1) ;autoinc 2
+	move.w #$9011, (a1) ;64x64 scroll size
+	move.l #$C0000000, (a1)
+	move.w #$000, (a0)
+	move.w #$EEE, (a0)
+
+	;clear scroll table
+	move.l #$40000000, (a1)
+	move.l #0, (a0)
+
+	;load tiles
+	move.l #$44000000, (a1)
+	lea font, a2
+	move.w #((fontend-font)/4 - 1), d0
+tloop:
+	move.l (a2)+, (a0)
+	dbra d0, tloop
+
+	;clear name table
+	move.l #$40000002, (a1)
+	moveq #32, d0
+	move.w #(64*64-1), d1
+ploop:
+	move.w d0, (a0)
+	dbra d1, ploop
+	move.l #$45960002, d7
+	move.l d7, (a1)
+	move.l #$800000, d6
+	
+	lea ram_code_src(pc), a6
+	lea ram_code.w, a5
+	moveq #(font-ram_code_src)/2-1, d0
+copy:
+	move.w (a6)+, (a5)+
+	dbra d0, copy
+	lea $101(a3), a4
+	lea $100.w, a5
+	move.l #'SEGA', d5
+	move.l #' SEG', d4
+	moveq #0, d0
+	moveq #1, d2
+	move.w #180, d3
+	btst #6, $A10001
+	beq .not_pal
+	move.w #150, d3
+.not_pal:
+	jmp ram_code.w
+
+ram_code_src:
+	move.b d2, (a4)
+	cmp.l (a5), d5
+	beq.s is_good
+	cmp.l (a5), d4
+	bne.s is_bad
+is_good:
+	move.b d0, (a4)
+	lea good(pc), a6
+	bsr.s print_string
+	
+	add.l d6, d7
+	move.l d7, (a1)
+	bsr.s print_string
+	
+	add.l d6, d7
+	move.l d7, (a1)
+	bsr.s print_string
+
+	move.w #$8164, (a1)
+	move #$2500, SR
+wait:
+	cmp.w frame_counter.w, d3
+	bne.s wait
+	move #$2700, SR
+	move.b d2, (a4)
+	move.l $0.w, a7
+	move.l $4.w, a6
+	move.w #$8104, (a1)
+	move.l d0, (a3)
+	jmp (a6)
+	
+is_bad:
+	move.b d0, (a4)
+	lea bad(pc), a6
+	bsr.s print_string
+	
+	add.l d6, d7
+	move.l d7, (a1)
+	bsr.s print_string
+	
+	add.l d6, d7
+	move.l d7, (a1)
+	bsr.s print_string
+	
+	move.w #$8144, (a1)
+forever:
+	bra.s forever
+
+
+int_6:
+	addq.w #1, frame_counter.w
+	rte
+
+;Prints a null terminated string
+;a6 - pointer to string
+;a0 - VDP data port
+;d0 - base tile attribute
+;
+;Clobbers: d1.w
+print_string:
+.loop
+	moveq #0, d1
+	move.b (a6)+, d1
+	beq .end
+	add.w d0, d1
+	move.w d1, (a0)
+	bra .loop
+.end
+	rts
+	
+good:
+	dc.b "  BLASTEM THINKS", 0
+	dc.b "  THAT THIS CART", 0
+	dc.b " TASTES DELICIOUS!", 0
+	
+bad:
+	dc.b "  *sniff* *sniff*", 0
+	dc.b " something doesn't", 0
+	dc.b "  smell right...", 0
+
+	align 1
+font:
+	incbin font.tiles
+fontend
+
+romend
--- a/trans.c	Sat Jan 05 00:58:08 2019 -0800
+++ b/trans.c	Sat Jan 15 13:15:21 2022 -0800
@@ -4,7 +4,11 @@
  BlastEm is free software distributed under the terms of the GNU General Public License version 3 or greater. See COPYING for full license text.
 */
 #include "68kinst.h"
+#ifdef NEW_CORE
+#include "m68k.h"
+#else
 #include "m68k_core.h"
+#endif
 #include "mem.h"
 #include <stdio.h>
 #include <stdlib.h>
@@ -19,20 +23,28 @@
 {
 }
 
+#ifndef NEW_CORE
 m68k_context * sync_components(m68k_context * context, uint32_t address)
 {
-	if (context->current_cycle > 0x80000000) {
-		context->current_cycle -= 0x80000000;
+	if (context->current_cycle >= context->target_cycle) {
+		puts("hit cycle limit");
+		exit(0);
 	}
 	if (context->status & M68K_STATUS_TRACE || context->trace_pending) {
 		context->target_cycle = context->current_cycle;
 	}
 	return context;
 }
+#endif
 
 m68k_context *reset_handler(m68k_context *context)
 {
 	m68k_print_regs(context);
+#ifdef NEW_CORE
+	printf("cycles: %d\n", context->cycles);
+#else
+	printf("cycles: %d\n", context->current_cycle);
+#endif
 	exit(0);
 	//unreachable
 	return context;
@@ -74,11 +86,17 @@
 	m68k_context * context = init_68k_context(&opts, reset_handler);
 	context->mem_pointers[0] = memmap[0].buffer;
 	context->mem_pointers[1] = memmap[1].buffer;
-	context->target_cycle = context->sync_cycle = 0x80000000;
-	uint32_t address;
-	address = filebuf[2] << 16 | filebuf[3];
-	translate_m68k_stream(address, context);
+#ifdef NEW_CORE
+	context->cycles = 40;
+#else
+	context->current_cycle = 40;
+	context->target_cycle = context->sync_cycle = 8000;
+#endif
 	m68k_reset(context);
+#ifdef NEW_CORE
+	m68k_execute(context, 8000);
+	puts("hit cycle limit");
+#endif
 	return 0;
 }
 
--- a/util.c	Sat Jan 05 00:58:08 2019 -0800
+++ b/util.c	Sat Jan 15 13:15:21 2022 -0800
@@ -7,7 +7,6 @@
 
 #include <sys/types.h>
 #include <sys/stat.h>
-#include <unistd.h>
 #include <errno.h>
 
 #ifdef __ANDROID__
@@ -80,7 +79,7 @@
 	for (char *cur = base; *cur; ++cur)
 	{
 		if (in_var) {
-			if (!(*cur == '_' || isalnum(*cur))) {
+			if (!isalnum(*cur)) {
 				positions[num_vars].end = cur-base;
 				if (positions[num_vars].end - positions[num_vars].start > max_var_len) {
 					max_var_len = positions[num_vars].end - positions[num_vars].start;
@@ -189,6 +188,11 @@
 	return text+1;
 }
 
+uint8_t startswith(const char *haystack, const char *prefix)
+{
+	return !strncmp(haystack, prefix, strlen(prefix));
+}
+
 void bin_to_hex(uint8_t *output, uint8_t *input, uint64_t size)
 {
 	while (size)
@@ -437,6 +441,10 @@
 	exit(1);
 }
 
+#ifndef _WIN32
+#include <unistd.h>
+#endif
+
 void warning(char *format, ...)
 {
 	va_list args;
@@ -472,13 +480,16 @@
 	va_end(args);
 }
 
+static uint8_t output_enabled = 1;
 void info_message(char *format, ...)
 {
 	va_list args;
 	va_start(args, format);
 #ifndef _WIN32
 	if (headless || (isatty(STDOUT_FILENO) && isatty(STDIN_FILENO))) {
-		info_printf(format, args);
+		if (output_enabled) {
+			info_printf(format, args);
+		}
 	} else {
 #endif
 		int32_t size = strlen(format) * 2;
@@ -498,7 +509,9 @@
 			va_start(args, format);
 			vsnprintf(buf, actual, format, args);
 		}
-		info_puts(buf);
+		if (output_enabled) {
+			info_puts(buf);
+		}
 		render_infobox("BlastEm Info", buf);
 		free(buf);
 #ifndef _WIN32
@@ -507,7 +520,28 @@
 	va_end(args);
 }
 
+void debug_message(char *format, ...)
+{
+	va_list args;
+	va_start(args, format);
+	if (output_enabled) {
+		info_printf(format, args);
+	}
+}
+
+void disable_stdout_messages(void)
+{
+	output_enabled = 0;
+}
+
+uint8_t is_stdout_enabled(void)
+{
+	return output_enabled;
+}
+
 #ifdef _WIN32
+#define WINVER 0x501
+#include <winsock2.h>
 #include <windows.h>
 #include <shlobj.h>
 
@@ -655,7 +689,80 @@
 	return CreateDirectory(path, NULL);
 }
 
+static WSADATA wsa_data;
+static void socket_cleanup(void)
+{
+	WSACleanup();
+}
+
+void socket_init(void)
+{
+	static uint8_t started;
+	if (!started) {
+		started = 1;
+		WSAStartup(MAKEWORD(2,2), &wsa_data);
+		atexit(socket_cleanup);
+	}
+}
+
+int socket_blocking(int sock, int should_block)
+{
+	u_long param = !should_block;
+	if (ioctlsocket(sock, FIONBIO, &param)) {
+		return WSAGetLastError();
+	}
+	return 0;
+}
+
+void socket_close(int sock)
+{
+	closesocket(sock);
+}
+
+int socket_last_error(void)
+{
+	return WSAGetLastError();
+}
+
+int socket_error_is_wouldblock(void)
+{
+	return WSAGetLastError() == WSAEWOULDBLOCK;
+}
+
 #else
+#include <fcntl.h>
+#include <signal.h>
+
+void socket_init(void)
+{
+	//SIGPIPE on network sockets is not desired
+	//would be better to do this in a more limited way,
+	//but the alternatives are not portable
+	signal(SIGPIPE, SIG_IGN);
+}
+
+int socket_blocking(int sock, int should_block)
+{
+	if (fcntl(sock, F_SETFL, should_block ? 0 : O_NONBLOCK)) {
+		return errno;
+	}
+	return 0;
+}
+
+void socket_close(int sock)
+{
+	close(sock);
+}
+
+int socket_last_error(void)
+{
+	return errno;
+}
+
+int socket_error_is_wouldblock(void)
+{
+	return errno == EAGAIN || errno == EWOULDBLOCK;
+}
 
 char * get_home_dir()
 {
@@ -830,9 +937,20 @@
 	qsort(list, num_entries, sizeof(dir_entry), sort_dir_alpha);
 }
 
+uint8_t delete_file(char *path)
+{
+#ifdef _WIN32
+	//TODO: Call Unicode version and prepend special string to remove max path limitation
+	return 0 != DeleteFileA(path);
+#else
+	return 0 == unlink(path);
+#endif
+}
+
 #ifdef __ANDROID__
 
 #include <SDL.h>
+#ifndef IS_LIB
 char *read_bundled_file(char *name, uint32_t *sizeret)
 {
 	SDL_RWops *rw = SDL_RWFromFile(name, "rb");
@@ -860,6 +978,7 @@
 	SDL_RWclose(rw);
 	return ret;
 }
+#endif
 
 char const *get_config_dir()
 {
@@ -873,16 +992,21 @@
 
 #else
 
+#ifndef IS_LIB
 char *read_bundled_file(char *name, uint32_t *sizeret)
 {
-	char *exe_dir = get_exe_dir();
-	if (!exe_dir) {
+#ifdef DATA_PATH
+	char *data_dir = DATA_PATH;
+#else
+	char *data_dir = get_exe_dir();
+	if (!data_dir) {
 		if (sizeret) {
 			*sizeret = -1;
 		}
 		return NULL;
 	}
-	char const *pieces[] = {exe_dir, PATH_SEP, name};
+#endif
+	char const *pieces[] = {data_dir, PATH_SEP, name};
 	char *path = alloc_concat_m(3, pieces);
 	FILE *f = fopen(path, "rb");
 	free(path);
@@ -912,7 +1036,7 @@
 	fclose(f);
 	return ret;
 }
-
+#endif //ISLIB
 
 #ifdef _WIN32
 char const *get_userdata_dir()
@@ -969,8 +1093,6 @@
 }
 
 
-#endif
-
+#endif //_WIN32
+#endif //__ANDROID__
 
-
-#endif
--- a/util.h	Sat Jan 05 00:58:08 2019 -0800
+++ b/util.h	Sat Jan 15 13:15:21 2022 -0800
@@ -32,6 +32,8 @@
 char * strip_ws(char * text);
 //Inserts a null after the first word, returns a pointer to the second word
 char * split_keyval(char * text);
+//Checks if haystack starts with prefix
+uint8_t startswith(const char *haystack, const char *prefix);
 //Takes a binary byte buffer and produces a lowercase hex string
 void bin_to_hex(uint8_t *output, uint8_t *input, uint64_t size);
 //Takes an (optionally) null-terminated UTF16-BE string and converts a maximum of max_size code-units to UTF-8
@@ -82,5 +84,23 @@
 void info_message(char *format, ...);
 //Prints an information message to stderr and to a message box if not in headless mode and not attached to a console
 void warning(char *format, ...);
+//Prints a debug message to stdout
+void debug_message(char *format, ...);
+//Disables output of info and debug messages to stdout
+void disable_stdout_messages(void);
+//Returns stdout disable status
+uint8_t is_stdout_enabled(void);
+//Deletes a file, returns true on success, false on failure
+uint8_t delete_file(char *path);
+//Initializes the socket library on platforms that need it
+void socket_init(void);
+//Sets a sockt to blocking or non-blocking mode
+int socket_blocking(int sock, int should_block);
+//Close a socket
+void socket_close(int sock);
+//Return the last error on a socket operation
+int socket_last_error(void);
+//Returns if the last socket error was EAGAIN/EWOULDBLOCK
+int socket_error_is_wouldblock(void);
 
 #endif //UTIL_H_
--- a/vdp.c	Sat Jan 05 00:58:08 2019 -0800
+++ b/vdp.c	Sat Jan 15 13:15:21 2022 -0800
@@ -9,6 +9,8 @@
 #include <string.h>
 #include "render.h"
 #include "util.h"
+#include "event_log.h"
+#include "terminal.h"
 
 #define NTSC_INACTIVE_START 224
 #define PAL_INACTIVE_START 240
@@ -51,8 +53,6 @@
 #define BORDER_BOT_V28_PAL 32
 #define BORDER_BOT_V30_PAL 24
 
-#define INVALID_LINE 0x200
-
 enum {
 	INACTIVE = 0,
 	PREPARING, //used for line 0x1FF
@@ -72,28 +72,36 @@
 	{127, 0, 127}    //Sprites
 };
 
+static uint32_t calc_crop(uint32_t crop, uint32_t border)
+{
+	return crop >= border ? 0 : border - crop;
+}
+
 static void update_video_params(vdp_context *context)
 {
+	uint32_t top_crop = render_overscan_top();
+	uint32_t bot_crop = render_overscan_bot();
+	uint32_t border_top;
 	if (context->regs[REG_MODE_2] & BIT_MODE_5) {
 		if (context->regs[REG_MODE_2] & BIT_PAL) {
 			if (context->flags2 & FLAG2_REGION_PAL) {
 				context->inactive_start = PAL_INACTIVE_START;
-				context->border_top = BORDER_TOP_V30_PAL;
-				context->border_bot = BORDER_BOT_V30_PAL;
+				border_top = BORDER_TOP_V30_PAL;
+				context->border_bot = calc_crop(bot_crop, BORDER_BOT_V30_PAL);
 			} else {
 				//the behavior here is rather weird and needs more investigation
 				context->inactive_start = 0xF0;
-				context->border_top = 1;
-				context->border_bot = 3;
+				border_top = 1;
+				context->border_bot = calc_crop(bot_crop, 3);
 			}
 		} else {
 			context->inactive_start = NTSC_INACTIVE_START;
 			if (context->flags2 & FLAG2_REGION_PAL) {
-				context->border_top = BORDER_TOP_V28_PAL;
-				context->border_bot = BORDER_BOT_V28_PAL;
+				border_top = BORDER_TOP_V28_PAL;
+				context->border_bot = calc_crop(bot_crop, BORDER_BOT_V28_PAL);
 			} else {
-				context->border_top = BORDER_TOP_V28;
-				context->border_bot = BORDER_TOP_V28;
+				border_top = BORDER_TOP_V28;
+				context->border_bot = calc_crop(bot_crop, BORDER_BOT_V28);
 			}
 		}
 		if (context->regs[REG_MODE_4] & BIT_H40) {
@@ -114,11 +122,11 @@
 	} else {
 		context->inactive_start = MODE4_INACTIVE_START;
 		if (context->flags2 & FLAG2_REGION_PAL) {
-			context->border_top = BORDER_TOP_V24_PAL;
-			context->border_bot = BORDER_BOT_V24_PAL;
+			border_top = BORDER_TOP_V24_PAL;
+			context->border_bot = calc_crop(bot_crop, BORDER_BOT_V24_PAL);
 		} else {
-			context->border_top = BORDER_TOP_V24;
-			context->border_bot = BORDER_BOT_V24;
+			border_top = BORDER_TOP_V24;
+			context->border_bot = calc_crop(bot_crop, BORDER_BOT_V24);
 		}
 		if (!(context->regs[REG_MODE_1] & BIT_MODE_4)){
 			context->state = INACTIVE;
@@ -132,24 +140,27 @@
 			}
 		}
 	}
+	context->border_top = calc_crop(top_crop, border_top);
+	context->top_offset = border_top - context->border_top;
 }
 
 static uint8_t color_map_init_done;
 
-vdp_context *init_vdp_context(uint8_t region_pal)
+vdp_context *init_vdp_context(uint8_t region_pal, uint8_t has_max_vsram)
 {
 	vdp_context *context = calloc(1, sizeof(vdp_context) + VRAM_SIZE);
 	if (headless) {
-		context->output = malloc(LINEBUF_SIZE * sizeof(uint32_t));
-		context->output_pitch = 0;
+		context->fb = malloc(512 * LINEBUF_SIZE * sizeof(uint32_t));
+		context->output_pitch = LINEBUF_SIZE * sizeof(uint32_t);
 	} else {
 		context->cur_buffer = FRAMEBUFFER_ODD;
 		context->fb = render_get_framebuffer(FRAMEBUFFER_ODD, &context->output_pitch);
 	}
-	context->sprite_draws = MAX_DRAWS;
+	context->sprite_draws = MAX_SPRITES_LINE;
 	context->fifo_write = 0;
 	context->fifo_read = -1;
 	context->regs[REG_HINT] = context->hint_counter = 0xFF;
+	context->vsram_size = has_max_vsram ? MAX_VSRAM_SIZE : MIN_VSRAM_SIZE;
 
 	if (!color_map_init_done) {
 		uint8_t b,g,r;
@@ -237,14 +248,21 @@
 		context->flags2 |= FLAG2_REGION_PAL;
 	}
 	update_video_params(context);
-	if (!headless) {
-		context->output = (uint32_t *)(((char *)context->fb) + context->output_pitch * context->border_top);
-	}
+	context->output = (uint32_t *)(((char *)context->fb) + context->output_pitch * context->border_top);
 	return context;
 }
 
 void vdp_free(vdp_context *context)
 {
+	if (headless) {
+		free(context->fb);
+	}
+	for (int i = 0; i < VDP_NUM_DEBUG_TYPES; i++)
+	{
+		if (context->enabled_debuggers & (1 << i)) {
+			vdp_toggle_debug_view(context, i);
+		}
+	}
 	free(context);
 }
 
@@ -271,40 +289,86 @@
 
 static void render_sprite_cells(vdp_context * context)
 {
+	if (context->cur_slot > MAX_SPRITES_LINE) {
+		context->cur_slot--;
+		return;
+	}
+	if (context->cur_slot < 0) {
+		return;
+	}
 	sprite_draw * d = context->sprite_draw_list + context->cur_slot;
-	context->serial_address = d->address;
-	if (context->cur_slot >= context->sprite_draws) {
-
-		uint16_t dir;
-		int16_t x;
-		if (d->h_flip) {
-			x = d->x_pos + 7;
-			dir = -1;
-		} else {
-			x = d->x_pos;
-			dir = 1;
-		}
-		//printf("Draw Slot %d of %d, Rendering sprite cell from %X to x: %d\n", context->cur_slot, context->sprite_draws, d->address, x);
-		context->cur_slot--;
-		for (uint16_t address = d->address; address != ((d->address+4) & 0xFFFF); address++) {
-			if (x >= 0 && x < 320) {
-				if (!(context->linebuf[x] & 0xF)) {
-					context->linebuf[x] = (context->vdpmem[address] >> 4) | d->pal_priority;
-				} else if (context->vdpmem[address] >> 4) {
-					context->flags2 |= FLAG2_SPRITE_COLLIDE;
+	uint16_t address = d->address;
+	address += context->sprite_x_offset * d->height * 4;
+	context->serial_address = address;
+	uint16_t dir;
+	int16_t x;
+	if (d->h_flip) {
+		x = d->x_pos + 7 + 8 * (d->width - context->sprite_x_offset - 1);
+		dir = -1;
+	} else {
+		x = d->x_pos + context->sprite_x_offset * 8;
+		dir = 1;
+	}
+	if (d->x_pos) {
+		context->flags |= FLAG_CAN_MASK;
+		if (!(context->flags & FLAG_MASKED)) {
+			x -= 128;
+			//printf("Draw Slot %d of %d, Rendering sprite cell from %X to x: %d\n", context->cur_slot, context->sprite_draws, d->address, x);
+			uint8_t collide = 0;
+			if (x >= 8 && x < 312) {
+				//sprite is fully visible
+				for (; address != ((context->serial_address+4) & 0xFFFF); address++) {
+					uint8_t pixel = context->vdpmem[address] >> 4;
+					if (!(context->linebuf[x] & 0xF)) {
+						context->linebuf[x] = pixel | d->pal_priority;
+					} else {
+						collide |= pixel;
+					}
+					x += dir;
+					pixel = context->vdpmem[address] & 0xF;
+					if (!(context->linebuf[x] & 0xF)) {
+						context->linebuf[x] = pixel  | d->pal_priority;
+					} else {
+						collide |= pixel;
+					}
+					x += dir;
+				}
+			} else if (x > -8 && x < 327) {
+				//sprite is partially visible
+				for (; address != ((context->serial_address+4) & 0xFFFF); address++) {
+					if (x >= 0 && x < 320) {
+						uint8_t pixel = context->vdpmem[address] >> 4;
+						if (!(context->linebuf[x] & 0xF)) {
+							context->linebuf[x] = pixel | d->pal_priority;
+						} else {
+							collide |= pixel;
+						}
+					}
+					x += dir;
+					if (x >= 0 && x < 320) {
+						uint8_t pixel = context->vdpmem[address] & 0xF;
+						if (!(context->linebuf[x] & 0xF)) {
+							context->linebuf[x] = pixel  | d->pal_priority;
+						} else {
+							collide |= pixel;
+						}
+					}
+					x += dir;
 				}
 			}
-			x += dir;
-			if (x >= 0 && x < 320) {
-				if (!(context->linebuf[x] & 0xF)) {
-					context->linebuf[x] = (context->vdpmem[address] & 0xF)  | d->pal_priority;
-				} else if (context->vdpmem[address] & 0xF) {
-					context->flags2 |= FLAG2_SPRITE_COLLIDE;
-				}
+			if (collide) {
+				context->flags2 |= FLAG2_SPRITE_COLLIDE;
 			}
-			x += dir;
 		}
-	} else {
+	} else if (context->flags & FLAG_CAN_MASK) {
+		context->flags |= FLAG_MASKED;
+		context->flags &= ~FLAG_CAN_MASK;
+	}
+
+	context->sprite_x_offset++;
+	if (context->sprite_x_offset == d->width) {
+		d->x_pos = 0;
+		context->sprite_x_offset = 0;
 		context->cur_slot--;
 	}
 }
@@ -532,6 +596,9 @@
 		   (context->flags & FLAG_PENDING) ? "word" : (context->flags2 & FLAG2_BYTE_PENDING) ? "byte" : "none",
 		   context->vcounter, context->hslot*2, (context->flags2 & FLAG2_VINT_PENDING) ? "true" : "false",
 		   (context->flags2 & FLAG2_HINT_PENDING) ? "true" : "false", vdp_control_port_read(context));
+	printf("\nDebug Register: %X | Output disabled: %s, Force Layer: %d\n", context->test_port, 
+		(context->test_port & TEST_BIT_DISABLE)  ? "true" : "false", context->test_port >> 7 & 3
+	);
 	//restore flags as calling vdp_control_port_read can change them
 	context->flags = old_flags;
 	context->flags2 = old_flags2;
@@ -697,47 +764,13 @@
 			} else {
 				address = ((tileinfo & 0x7FF) << 5) + row * 4;
 			}
-			int16_t x = ((context->vdpmem[att_addr+ 2] & 0x3) << 8 | context->vdpmem[att_addr + 3]) & 0x1FF;
-			if (x) {
-				context->flags |= FLAG_CAN_MASK;
-			} else if(context->flags & (FLAG_CAN_MASK | FLAG_DOT_OFLOW)) {
-				context->flags |= FLAG_MASKED;
-			}
-
-			context->flags &= ~FLAG_DOT_OFLOW;
-			int16_t i;
-			if (context->flags & FLAG_MASKED) {
-				for (i=0; i < width && context->sprite_draws; i++) {
-					--context->sprite_draws;
-					context->sprite_draw_list[context->sprite_draws].x_pos = -128;
-					context->sprite_draw_list[context->sprite_draws].address = address + i * height * 4;
-				}
-			} else {
-				x -= 128;
-				int16_t base_x = x;
-				int16_t dir;
-				if (tileinfo & MAP_BIT_H_FLIP) {
-					x += (width-1) * 8;
-					dir = -8;
-				} else {
-					dir = 8;
-				}
-				//printf("Sprite %d | x: %d, y: %d, width: %d, height: %d, pal_priority: %X, row: %d, tile addr: %X\n", context->sprite_info_list[context->cur_slot].index, x, context->sprite_info_list[context->cur_slot].y, width, height, pal_priority, row, address);
-				for (i=0; i < width && context->sprite_draws; i++, x += dir) {
-					--context->sprite_draws;
-					context->sprite_draw_list[context->sprite_draws].address = address + i * height * 4;
-					context->sprite_draw_list[context->sprite_draws].x_pos = x;
-					context->sprite_draw_list[context->sprite_draws].pal_priority = pal_priority;
-					context->sprite_draw_list[context->sprite_draws].h_flip = (tileinfo & MAP_BIT_H_FLIP) ? 1 : 0;
-				}
-			}
-			//Used to be i < width
-			//TODO: Confirm this is the right condition on hardware
-			if (!context->sprite_draws) {
-				context->flags |= FLAG_DOT_OFLOW;
-			}
-		} else {
-			context->flags |= FLAG_DOT_OFLOW;
+			context->sprite_draws--;
+			context->sprite_draw_list[context->sprite_draws].x_pos = ((context->vdpmem[att_addr+ 2] & 0x3) << 8 | context->vdpmem[att_addr + 3]) & 0x1FF;
+			context->sprite_draw_list[context->sprite_draws].address = address;
+			context->sprite_draw_list[context->sprite_draws].pal_priority = pal_priority;
+			context->sprite_draw_list[context->sprite_draws].h_flip = (tileinfo & MAP_BIT_H_FLIP) ? 1 : 0;
+			context->sprite_draw_list[context->sprite_draws].width = width;
+			context->sprite_draw_list[context->sprite_draws].height = height;
 		}
 	}
 	context->cur_slot++;
@@ -792,7 +825,7 @@
 	}
 	write_cram_internal(context, addr, value);
 	
-	if (context->hslot >= BG_START_SLOT && (
+	if (context->output && context->hslot >= BG_START_SLOT && (
 		context->vcounter < context->inactive_start + context->border_bot 
 		|| context->vcounter > 0x200 - context->border_top
 	)) {
@@ -886,14 +919,17 @@
 		{
 		case VRAM_WRITE:
 			if ((context->regs[REG_MODE_2] & (BIT_128K_VRAM|BIT_MODE_5)) == (BIT_128K_VRAM|BIT_MODE_5)) {
+				event_vram_word(context->cycles, start->address, start->value);
 				vdp_check_update_sat(context, start->address, start->value);
 				write_vram_word(context, start->address, start->value);
 			} else {
 				uint8_t byte = start->partial == 1 ? start->value >> 8 : start->value;
-				vdp_check_update_sat_byte(context, start->address ^ 1, byte);
-				write_vram_byte(context, start->address ^ 1, byte);
+				uint32_t address = start->address ^ 1;
+				event_vram_byte(context->cycles, start->address, byte, context->regs[REG_AUTOINC]);
+				vdp_check_update_sat_byte(context, address, byte);
+				write_vram_byte(context, address, byte);
 				if (!start->partial) {
-					start->address = start->address ^ 1;
+					start->address = address;
 					start->partial = 1;
 					//skip auto-increment and removal of entry from fifo
 					return;
@@ -902,22 +938,24 @@
 			break;
 		case CRAM_WRITE: {
 			//printf("CRAM Write | %X to %X\n", start->value, (start->address/2) & (CRAM_SIZE-1));
+			uint16_t val;
 			if (start->partial == 3) {
-				uint16_t val;
 				if ((start->address & 1) && (context->regs[REG_MODE_2] & BIT_MODE_5)) {
 					val = (context->cram[start->address >> 1 & (CRAM_SIZE-1)] & 0xFF) | start->value << 8;
 				} else {
 					uint16_t address = (context->regs[REG_MODE_2] & BIT_MODE_5) ? start->address >> 1 & (CRAM_SIZE-1) : start->address & 0x1F;
 					val = (context->cram[address] & 0xFF00) | start->value;
 				}
-				write_cram(context, start->address, val);
 			} else {
-				write_cram(context, start->address, start->partial ? context->fifo[context->fifo_write].value : start->value);
+				val = start->partial ? context->fifo[context->fifo_write].value : start->value;
 			}
+			uint8_t buffer[3] = {start->address & 127, val >> 8, val};
+			event_log(EVENT_VDP_INTRAM, context->cycles, sizeof(buffer), buffer);
+			write_cram(context, start->address, val);
 			break;
 		}
 		case VSRAM_WRITE:
-			if (((start->address/2) & 63) < VSRAM_SIZE) {
+			if (((start->address/2) & 63) < context->vsram_size) {
 				//printf("VSRAM Write: %X to %X @ frame: %d, vcounter: %d, hslot: %d, cycle: %d\n", start->value, start->address, context->frame, context->vcounter, context->hslot, context->cycles);
 				if (start->partial == 3) {
 					if (start->address & 1) {
@@ -930,6 +968,8 @@
 				} else {
 					context->vsram[(start->address/2) & 63] = start->partial ? context->fifo[context->fifo_write].value : start->value;
 				}
+				uint8_t buffer[3] = {((start->address/2) & 63) + 128, context->vsram[(start->address/2) & 63] >> 8, context->vsram[(start->address/2) & 63]};
+				event_log(EVENT_VDP_INTRAM, context->cycles, sizeof(buffer), buffer);
 			}
 
 			break;
@@ -954,7 +994,7 @@
 			
 			context->flags |= FLAG_READ_FETCHED;
 		}
-	} else if (!(context->cd & 1) && !(context->flags & (FLAG_READ_FETCHED|FLAG_PENDING))) {
+	} else if (!(context->cd & 1) && !(context->flags & FLAG_READ_FETCHED)) {
 		switch(context->cd & 0xF)
 		{
 		case VRAM_READ:
@@ -991,7 +1031,7 @@
 			break;
 		case VSRAM_READ: {
 			uint16_t address = (context->address /2) & 63;
-			if (address >= VSRAM_SIZE) {
+			if (address >= context->vsram_size) {
 				address = 0;
 			}
 			context->prefetch = context->vsram[address] & VSRAM_BITS;
@@ -1114,8 +1154,12 @@
 			context->v_offset = (line) & v_offset_mask;
 			context->flags |= FLAG_WINDOW;
 			return;
+		} else if (column == right_col) {
+			context->flags |= FLAG_WINDOW_EDGE;
+			context->flags &= ~FLAG_WINDOW;
+		} else {
+			context->flags &= ~(FLAG_WINDOW_EDGE|FLAG_WINDOW);
 		}
-		context->flags &= ~FLAG_WINDOW;
 	}
 	//TODO: Verify behavior for 0x20 case
 	uint16_t vscroll = 0xFF | (context->regs[REG_SCROLL] & 0x30) << 4;
@@ -1127,32 +1171,15 @@
 	context->v_offset = vscroll & v_offset_mask;
 	//printf("%s | line %d, vsram: %d, vscroll: %d, v_offset: %d\n",(vsram_off ? "B" : "A"), line, context->vsram[context->regs[REG_MODE_3] & 0x4 ? column : 0], vscroll, context->v_offset);
 	vscroll >>= vscroll_shift;
-	uint16_t hscroll_mask;
-	uint16_t v_mul;
-	switch(context->regs[REG_SCROLL] & 0x3)
-	{
-	case 0:
-		hscroll_mask = 0x1F;
-		v_mul = 64;
-		break;
-	case 0x1:
-		hscroll_mask = 0x3F;
-		v_mul = 128;
-		break;
-	case 0x2:
-		//TODO: Verify this behavior
-		hscroll_mask = 0x1F;
-		v_mul = 0;
-		break;
-	case 0x3:
-		hscroll_mask = 0x7F;
-		v_mul = 256;
-		break;
-	}
+	//TODO: Verify the behavior for a setting of 2
+	static const uint16_t hscroll_masks[] = {0x1F, 0x3F, 0x1F, 0x7F};
+	static const uint16_t v_shifts[] = {6, 7, 16, 8};
+	uint16_t hscroll_mask = hscroll_masks[context->regs[REG_SCROLL] & 0x3];
+	uint16_t v_shift = v_shifts[context->regs[REG_SCROLL] & 0x3];
 	uint16_t hscroll, offset;
 	for (int i = 0; i < 2; i++) {
 		hscroll = (column - 2 + i - ((hscroll_val/8) & 0xFFFE)) & hscroll_mask;
-		offset = address + ((vscroll * v_mul + hscroll*2) & 0x1FFF);
+		offset = address + (((vscroll << v_shift) + hscroll*2) & 0x1FFF);
 		//printf("%s | line: %d, col: %d, x: %d, hs_mask %X, scr reg: %X, tbl addr: %X\n", (vsram_off ? "B" : "A"), line, (column-2+i), hscroll, hscroll_mask, context->regs[REG_SCROLL], offset);
 		uint16_t col_val = (context->vdpmem[offset] << 8) | context->vdpmem[offset+1];
 		if (i) {
@@ -1210,28 +1237,25 @@
 	}
 	uint8_t pal_priority = (col >> 9) & 0x70;
 	uint32_t bits = *((uint32_t *)(&context->vdpmem[address]));
+	tmp_buf += offset;
 	if (col & MAP_BIT_H_FLIP) {
 		uint32_t shift = 28;
 		for (int i = 0; i < 4; i++)
 		{
 			uint8_t right = pal_priority | ((bits >> shift) & 0xF);
 			shift -= 4;
-			tmp_buf[offset++] = pal_priority | ((bits >> shift) & 0xF);
+			*(tmp_buf++) = pal_priority | ((bits >> shift) & 0xF);
 			shift -= 4;
-			offset &= SCROLL_BUFFER_MASK;
-			tmp_buf[offset++] = right;
-			offset &= SCROLL_BUFFER_MASK;
+			*(tmp_buf++) = right;
 		}
 	} else {
 		for (int i = 0; i < 4; i++)
 		{
 			uint8_t right = pal_priority | (bits & 0xF);
 			bits >>= 4;
-			tmp_buf[offset++] = pal_priority | (bits & 0xF);
-			offset &= SCROLL_BUFFER_MASK;
+			*(tmp_buf++) = pal_priority | (bits & 0xF);
 			bits >>= 4;
-			tmp_buf[offset++] = right;
-			offset &= SCROLL_BUFFER_MASK;
+			*(tmp_buf++) = right;
 		}
 	}
 }
@@ -1328,65 +1352,69 @@
 	return (sh_pixel){.index = pixel, .intensity = intensity};
 }
 
-static void render_normal(vdp_context *context, int32_t col, uint32_t *dst, uint8_t *debug_dst, int plane_a_off, int plane_b_off)
+static void render_normal(vdp_context *context, int32_t col, uint8_t *dst, uint8_t *debug_dst, uint8_t *buf_a, int plane_a_off, int plane_a_mask, int plane_b_off)
+{
+	uint8_t *sprite_buf = context->linebuf + col * 8;
+	if (!col && (context->regs[REG_MODE_1] & BIT_COL0_MASK)) {
+		memset(dst, 0, 8);
+		memset(debug_dst, DBG_SRC_BG, 8);
+		dst += 8;
+		debug_dst += 8;
+		sprite_buf += 8;
+		plane_a_off += 8;
+		plane_b_off += 8;
+		for (int i = 0; i < 8; ++plane_a_off, ++plane_b_off, ++sprite_buf, ++i)
+		{
+			uint8_t sprite, plane_a, plane_b;
+			plane_a = buf_a[plane_a_off & plane_a_mask];
+			plane_b = context->tmp_buf_b[plane_b_off & SCROLL_BUFFER_MASK];
+			*(dst++) = composite_normal(context, debug_dst, *sprite_buf, plane_a, plane_b, context->regs[REG_BG_COLOR]) & 0x3F;
+			debug_dst++;
+		}
+	} else {
+		for (int i = 0; i < 16; ++plane_a_off, ++plane_b_off, ++sprite_buf, ++i)
+		{
+			uint8_t sprite, plane_a, plane_b;
+			plane_a = buf_a[plane_a_off & plane_a_mask];
+			plane_b = context->tmp_buf_b[plane_b_off & SCROLL_BUFFER_MASK];
+			*(dst++) = composite_normal(context, debug_dst, *sprite_buf, plane_a, plane_b, context->regs[REG_BG_COLOR]) & 0x3F;
+			debug_dst++;
+		}
+	}
+}
+
+static void render_highlight(vdp_context *context, int32_t col, uint8_t *dst, uint8_t *debug_dst, uint8_t *buf_a, int plane_a_off, int plane_a_mask, int plane_b_off)
 {
 	int start = 0;
 	if (!col && (context->regs[REG_MODE_1] & BIT_COL0_MASK)) {
-		uint32_t bgcolor = context->colors[context->regs[REG_BG_COLOR] & 0x3F];
-		for (int i = 0; i < 8; ++i)
-		{
-			*(dst++) = bgcolor;
-			*(debug_dst++) = DBG_SRC_BG;
-		}
+		memset(dst, SHADOW_OFFSET + (context->regs[REG_BG_COLOR] & 0x3F), 8);
+		memset(debug_dst, DBG_SRC_BG | DBG_SHADOW, 8);
+		dst += 8;
+		debug_dst += 8;
 		start = 8;
 	}
 	uint8_t *sprite_buf = context->linebuf + col * 8 + start;
 	for (int i = start; i < 16; ++plane_a_off, ++plane_b_off, ++sprite_buf, ++i)
 	{
 		uint8_t sprite, plane_a, plane_b;
-		plane_a = context->tmp_buf_a[plane_a_off & SCROLL_BUFFER_MASK];
+		plane_a = buf_a[plane_a_off & plane_a_mask];
 		plane_b = context->tmp_buf_b[plane_b_off & SCROLL_BUFFER_MASK];
 		sprite = *sprite_buf;
-		uint8_t pixel = composite_normal(context, debug_dst, sprite, plane_a, plane_b, context->regs[REG_BG_COLOR]);
+		sh_pixel pixel = composite_highlight(context, debug_dst, sprite, plane_a, plane_b, context->regs[REG_BG_COLOR]);
+		uint8_t final_pixel;
+		if (pixel.intensity == BUF_BIT_PRIORITY << 1) {
+			final_pixel = (pixel.index & 0x3F) + HIGHLIGHT_OFFSET;
+		} else if (pixel.intensity) {
+			final_pixel = pixel.index & 0x3F;
+		} else {
+			final_pixel = (pixel.index & 0x3F) + SHADOW_OFFSET;
+		}
 		debug_dst++;
-		*(dst++) = context->colors[pixel & 0x3F];
+		*(dst++) = final_pixel;
 	}
 }
 
-static void render_highlight(vdp_context *context, int32_t col, uint32_t *dst, uint8_t *debug_dst, int plane_a_off, int plane_b_off)
-{
-	int start = 0;
-	if (!col && (context->regs[REG_MODE_1] & BIT_COL0_MASK)) {
-		uint32_t bgcolor = context->colors[SHADOW_OFFSET + (context->regs[REG_BG_COLOR] & 0x3F)];
-		for (int i = 0; i < 8; ++i)
-		{
-			*(dst++) = bgcolor;
-			*(debug_dst++) = DBG_SRC_BG | DBG_SHADOW;
-		}
-		start = 8;
-	}
-	uint8_t *sprite_buf = context->linebuf + col * 8 + start;
-	for (int i = start; i < 16; ++plane_a_off, ++plane_b_off, ++sprite_buf, ++i)
-	{
-		uint8_t sprite, plane_a, plane_b;
-		plane_a = context->tmp_buf_a[plane_a_off & SCROLL_BUFFER_MASK];
-		plane_b = context->tmp_buf_b[plane_b_off & SCROLL_BUFFER_MASK];
-		sprite = *sprite_buf;
-		sh_pixel pixel = composite_highlight(context, debug_dst, sprite, plane_a, plane_b, context->regs[REG_BG_COLOR]);
-		uint32_t *colors;
-		if (pixel.intensity == BUF_BIT_PRIORITY << 1) {
-			colors = context->colors + HIGHLIGHT_OFFSET;
-		} else if (pixel.intensity) {
-			colors = context->colors;
-		} else {
-			colors = context->colors + SHADOW_OFFSET;
-		}
-		debug_dst++;
-		*(dst++) = colors[pixel.index & 0x3F];
-	}
-}
-
-static void render_testreg(vdp_context *context, int32_t col, uint32_t *dst, uint8_t *debug_dst, int plane_a_off, int plane_b_off, uint8_t output_disabled, uint8_t test_layer)
+static void render_testreg(vdp_context *context, int32_t col, uint8_t *dst, uint8_t *debug_dst, uint8_t *buf_a, int plane_a_off, int plane_a_mask, int plane_b_off, uint8_t output_disabled, uint8_t test_layer)
 {
 	if (output_disabled) {
 		switch (test_layer)
@@ -1402,7 +1430,7 @@
 			uint8_t *sprite_buf = context->linebuf + col * 8;
 			for (int i = 0; i < 16; i++)
 			{
-				*(dst++) = context->colors[*(sprite_buf++) & 0x3F];
+				*(dst++) = *(sprite_buf++) & 0x3F;
 				*(debug_dst++) = DBG_SRC_S;
 			}
 			break;
@@ -1410,14 +1438,14 @@
 		case 2:
 			for (int i = 0; i < 16; i++)
 			{
-				*(dst++) = context->colors[context->tmp_buf_a[(plane_a_off++) & SCROLL_BUFFER_MASK] & 0x3F];
+				*(dst++) = buf_a[(plane_a_off++) & plane_a_mask] & 0x3F;
 				*(debug_dst++) = DBG_SRC_A;
 			}
 			break;
 		case 3:
 			for (int i = 0; i < 16; i++)
 			{
-				*(dst++) = context->colors[context->tmp_buf_b[(plane_b_off++) & SCROLL_BUFFER_MASK] & 0x3F];
+				*(dst++) = context->tmp_buf_b[(plane_b_off++) & SCROLL_BUFFER_MASK] & 0x3F;
 				*(debug_dst++) = DBG_SRC_B;
 			}
 			break;
@@ -1427,7 +1455,7 @@
 		uint8_t *sprite_buf = context->linebuf + col * 8;
 		if (!col && (context->regs[REG_MODE_1] & BIT_COL0_MASK)) {
 			//TODO: Confirm how test register interacts with column 0 blanking
-			uint8_t pixel = context->regs[REG_BG_COLOR] & 0x3F;
+			uint8_t pixel = 0x3F;
 			uint8_t src = DBG_SRC_BG;
 			for (int i = 0; i < 8; ++i)
 			{
@@ -1440,7 +1468,7 @@
 					}
 					break;
 				case 2:
-					pixel &= context->tmp_buf_a[(plane_a_off + i) & SCROLL_BUFFER_MASK];
+					pixel &= buf_a[(plane_a_off + i) & plane_a_mask];
 					if (pixel) {
 						src = DBG_SRC_A;
 					}
@@ -1453,7 +1481,7 @@
 					break;
 				}
 				
-				*(dst++) = context->colors[pixel & 0x3F];
+				*(dst++) = pixel;
 				*(debug_dst++) = src;
 			}
 			plane_a_off += 8;
@@ -1464,10 +1492,10 @@
 		for (int i = start; i < 16; ++plane_a_off, ++plane_b_off, ++sprite_buf, ++i)
 		{
 			uint8_t sprite, plane_a, plane_b;
-			plane_a = context->tmp_buf_a[plane_a_off & SCROLL_BUFFER_MASK];
+			plane_a = buf_a[plane_a_off & plane_a_mask];
 			plane_b = context->tmp_buf_b[plane_b_off & SCROLL_BUFFER_MASK];
 			sprite = *sprite_buf;
-			uint8_t pixel = composite_normal(context, debug_dst, sprite, plane_a, plane_b, 0x3F);
+			uint8_t pixel = composite_normal(context, debug_dst, sprite, plane_a, plane_b, 0x3F) & 0x3F;
 			switch (test_layer)
 			{
 			case 1:
@@ -1490,18 +1518,18 @@
 				break;
 			}
 			debug_dst++;
-			*(dst++) = context->colors[pixel & 0x3F];
+			*(dst++) = pixel;
 		}
 	}
 }
 
-static void render_testreg_highlight(vdp_context *context, int32_t col, uint32_t *dst, uint8_t *debug_dst, int plane_a_off, int plane_b_off, uint8_t output_disabled, uint8_t test_layer)
+static void render_testreg_highlight(vdp_context *context, int32_t col, uint8_t *dst, uint8_t *debug_dst, uint8_t *buf_a, int plane_a_off, int plane_a_mask, int plane_b_off, uint8_t output_disabled, uint8_t test_layer)
 {
 	int start = 0;
 	uint8_t *sprite_buf = context->linebuf + col * 8;
 	if (!col && (context->regs[REG_MODE_1] & BIT_COL0_MASK)) {
 		//TODO: Confirm how test register interacts with column 0 blanking
-		uint8_t pixel = context->regs[REG_BG_COLOR] & 0x3F;
+		uint8_t pixel = 0x3F;
 		uint8_t src = DBG_SRC_BG | DBG_SHADOW;
 		for (int i = 0; i < 8; ++i)
 		{
@@ -1514,7 +1542,7 @@
 				}
 				break;
 			case 2:
-				pixel &= context->tmp_buf_a[(plane_a_off + i) & SCROLL_BUFFER_MASK];
+				pixel &= buf_a[(plane_a_off + i) & plane_a_mask];
 				if (pixel) {
 					src = DBG_SRC_A | DBG_SHADOW;
 				}
@@ -1527,7 +1555,7 @@
 				break;
 			}
 			
-			*(dst++) = context->colors[SHADOW_OFFSET + (pixel & 0x3F)];
+			*(dst++) = SHADOW_OFFSET + pixel;
 			*(debug_dst++) = src;
 		}
 		plane_a_off += 8;
@@ -1538,20 +1566,14 @@
 	for (int i = start; i < 16; ++plane_a_off, ++plane_b_off, ++sprite_buf, ++i)
 	{
 		uint8_t sprite, plane_a, plane_b;
-		plane_a = context->tmp_buf_a[plane_a_off & SCROLL_BUFFER_MASK];
+		plane_a = buf_a[plane_a_off & plane_a_mask];
 		plane_b = context->tmp_buf_b[plane_b_off & SCROLL_BUFFER_MASK];
 		sprite = *sprite_buf;
 		sh_pixel pixel = composite_highlight(context, debug_dst, sprite, plane_a, plane_b, 0x3F);
-		uint32_t *colors;
-		if (pixel.intensity == BUF_BIT_PRIORITY << 1) {
-			colors = context->colors + HIGHLIGHT_OFFSET;
-		} else if (pixel.intensity) {
-			colors = context->colors;
-		} else {
-			colors = context->colors + SHADOW_OFFSET;
-		}
 		if (output_disabled) {
 			pixel.index = 0x3F;
+		} else {
+			pixel.index &= 0x3F;
 		}
 		switch (test_layer)
 		{
@@ -1574,37 +1596,35 @@
 			}
 			break;
 		}
+		if (pixel.intensity == BUF_BIT_PRIORITY << 1) {
+			pixel.index += HIGHLIGHT_OFFSET;
+		} else if (!pixel.intensity) {
+			pixel.index += SHADOW_OFFSET;
+		}
 		debug_dst++;
-		*(dst++) = colors[pixel.index & 0x3F];
+		*(dst++) = pixel.index;
 	}
 }
 
 static void render_map_output(uint32_t line, int32_t col, vdp_context * context)
 {
-	uint32_t *dst;
+	uint8_t *dst;
 	uint8_t *debug_dst;
 	uint8_t output_disabled = (context->test_port & TEST_BIT_DISABLE) != 0;
 	uint8_t test_layer = context->test_port >> 7 & 3;
 	if (context->state == PREPARING && !test_layer) {
 		if (col) {
 			col -= 2;
-			dst = context->output + BORDER_LEFT + col * 8;
+			dst = context->compositebuf + BORDER_LEFT + col * 8;
 		} else {
-			dst = context->output;
+			dst = context->compositebuf;
 			uint32_t bg_color = context->colors[context->regs[REG_BG_COLOR] & 0x3F];
-			for (int i = 0; i < BORDER_LEFT; i++, dst++)
-			{
-				*dst = bg_color;
-			}
-			context->done_output = dst;
+			memset(dst, 0, BORDER_LEFT);
+			context->done_composite = dst + BORDER_LEFT;
 			return;
 		}
-		uint32_t color = context->colors[context->regs[REG_BG_COLOR] & 0x3F];
-		for (int i = 0; i < 16; i++)
-		{
-			*(dst++) = color;
-		}
-		context->done_output = dst;
+		memset(dst, 0, 16);
+		context->done_composite = dst + 16;
 		return;
 	}
 	line &= 0xFF;
@@ -1615,65 +1635,73 @@
 	if (col)
 	{
 		col-=2;
-		dst = context->output + BORDER_LEFT + col * 8;
+		dst = context->compositebuf + BORDER_LEFT + col * 8;
 		debug_dst = context->layer_debug_buf + BORDER_LEFT + col * 8;
 		
 		
 		uint8_t a_src, src;
+		uint8_t *buf_a;
+		int plane_a_mask;
 		if (context->flags & FLAG_WINDOW) {
 			plane_a_off = context->buf_a_off;
+			buf_a = context->tmp_buf_a;
 			a_src = DBG_SRC_W;
+			plane_a_mask = SCROLL_BUFFER_MASK;
 		} else {
-			plane_a_off = context->buf_a_off - (context->hscroll_a & 0xF);
+			if (context->flags & FLAG_WINDOW_EDGE) {
+				buf_a = context->tmp_buf_a + context->buf_a_off;
+				plane_a_mask = 15;
+				plane_a_off = -context->hscroll_a_fine;
+			} else {
+				plane_a_off = context->buf_a_off - context->hscroll_a_fine;
+				plane_a_mask = SCROLL_BUFFER_MASK;
+				buf_a = context->tmp_buf_a;
+			}
 			a_src = DBG_SRC_A;
 		}
-		plane_b_off = context->buf_b_off - (context->hscroll_b & 0xF);
+		plane_a_off &= plane_a_mask;
+		plane_b_off = context->buf_b_off - context->hscroll_b_fine;
 		//printf("A | tmp_buf offset: %d\n", 8 - (context->hscroll_a & 0x7));
 
 		if (context->regs[REG_MODE_4] & BIT_HILIGHT) {
 			if (output_disabled || test_layer) {
-				render_testreg_highlight(context, col, dst, debug_dst, plane_a_off, plane_b_off, output_disabled, test_layer);
+				render_testreg_highlight(context, col, dst, debug_dst, buf_a, plane_a_off, plane_a_mask, plane_b_off, output_disabled, test_layer);
 			} else {
-				render_highlight(context, col, dst, debug_dst, plane_a_off, plane_b_off);
+				render_highlight(context, col, dst, debug_dst, buf_a, plane_a_off, plane_a_mask, plane_b_off);
 			}
 		} else {
 			if (output_disabled || test_layer) {
-				render_testreg(context, col, dst, debug_dst, plane_a_off, plane_b_off, output_disabled, test_layer);
+				render_testreg(context, col, dst, debug_dst, buf_a, plane_a_off, plane_a_mask, plane_b_off, output_disabled, test_layer);
 			} else {
-				render_normal(context, col, dst, debug_dst, plane_a_off, plane_b_off);
+				render_normal(context, col, dst, debug_dst, buf_a, plane_a_off, plane_a_mask, plane_b_off);
 			}
 		}
 		dst += 16;
 	} else {
-		dst = context->output;
+		dst = context->compositebuf;
 		debug_dst = context->layer_debug_buf;
-		uint8_t pixel = context->regs[REG_BG_COLOR] & 0x3F;
+		uint8_t pixel = 0;
 		if (output_disabled) {
 			pixel = 0x3F;
 		}
-		uint32_t bg_color = context->colors[pixel];
 		if (test_layer) {
 			switch(test_layer)
 			{
 			case 1:
-				bg_color = context->colors[0];
-				for (int i = 0; i < BORDER_LEFT; i++, dst++, debug_dst++)
-				{
-					*dst = bg_color;
-					*debug_dst = DBG_SRC_BG;
-					
-				}
+				memset(dst, 0, BORDER_LEFT);
+				memset(debug_dst, DBG_SRC_BG, BORDER_LEFT);
+				dst += BORDER_LEFT;
 				break;
 			case 2: {
 				//plane A
 				//TODO: Deal with Window layer
 				int i;
 				i = 0;
-				uint8_t buf_off = context->buf_a_off - (context->hscroll_a & 0xF) + (16 - BORDER_LEFT);
+				uint8_t buf_off = context->buf_a_off - context->hscroll_a_fine + (16 - BORDER_LEFT);
 				//uint8_t *src = context->tmp_buf_a + ((context->buf_a_off + (i ? 0 : (16 - BORDER_LEFT) - (context->hscroll_a & 0xF))) & SCROLL_BUFFER_MASK); 
 				for (; i < BORDER_LEFT; buf_off++, i++, dst++, debug_dst++)
 				{
-					*dst = context->colors[context->tmp_buf_a[buf_off & SCROLL_BUFFER_MASK]];
+					*dst = context->tmp_buf_a[buf_off & SCROLL_BUFFER_MASK];
 					*debug_dst = DBG_SRC_A;
 				}
 				break;
@@ -1682,25 +1710,23 @@
 				//plane B
 				int i;
 				i = 0;
-				uint8_t buf_off = context->buf_b_off - (context->hscroll_b & 0xF) + (16 - BORDER_LEFT);
+				uint8_t buf_off = context->buf_b_off - context->hscroll_b_fine + (16 - BORDER_LEFT);
 				//uint8_t *src = context->tmp_buf_b + ((context->buf_b_off + (i ? 0 : (16 - BORDER_LEFT) - (context->hscroll_b & 0xF))) & SCROLL_BUFFER_MASK); 
 				for (; i < BORDER_LEFT; buf_off++, i++, dst++, debug_dst++)
 				{
-					*dst = context->colors[context->tmp_buf_b[buf_off & SCROLL_BUFFER_MASK]];
+					*dst = context->tmp_buf_b[buf_off & SCROLL_BUFFER_MASK];
 					*debug_dst = DBG_SRC_B;
 				}
 				break;
 			}
 			}
 		} else {
-			for (int i = 0; i < BORDER_LEFT; i++, dst++, debug_dst++)
-			{
-				*dst = bg_color;
-				*debug_dst = DBG_SRC_BG;
-			}
+			memset(dst, pixel, BORDER_LEFT);
+			memset(debug_dst, DBG_SRC_BG, BORDER_LEFT);
+			dst += BORDER_LEFT;
 		}
 	}
-	context->done_output = dst;
+	context->done_composite = dst;
 	context->buf_a_off = (context->buf_a_off + SCROLL_BUFFER_DRAW) & SCROLL_BUFFER_MASK;
 	context->buf_b_off = (context->buf_b_off + SCROLL_BUFFER_DRAW) & SCROLL_BUFFER_MASK;
 }
@@ -1745,15 +1771,12 @@
 	}
 	context->buf_a_off = (context->buf_a_off + 8) & 15;
 	
-	uint8_t bgcolor = 0x10 | (context->regs[REG_BG_COLOR] & 0xF) + MODE4_OFFSET;
-	uint32_t *dst = context->output + col * 8 + BORDER_LEFT;
+	uint8_t *dst = context->compositebuf + col * 8 + BORDER_LEFT;
 	uint8_t *debug_dst = context->layer_debug_buf + col * 8 + BORDER_LEFT;
 	if (context->state == PREPARING) {
-		for (int i = 0; i < 16; i++)
-		{
-			*(dst++) = context->colors[bgcolor];
-		}
-		context->done_output = dst;
+		memset(dst, 0x10 + (context->regs[REG_BG_COLOR] & 0xF) + MODE4_OFFSET, 8);
+		memset(debug_dst, DBG_SRC_BG, 8);
+		context->done_composite = dst + 8;
 		return;
 	}
 	
@@ -1767,22 +1790,21 @@
 			uint8_t *bg_src = context->tmp_buf_a + ((8 + i + col * 8 - (context->hscroll_a & 0x7)) & 15);
 			if ((*bg_src & 0x4F) > 0x40 || !*sprite_src) {
 				//background plane has priority and is opaque or sprite layer is transparent
-				*(dst++) = context->colors[(*bg_src & 0x1F) + MODE4_OFFSET];
-				*(debug_dst++) = DBG_SRC_A;
+				uint8_t pixel = *bg_src & 0x1F;
+				*(dst++) = pixel + MODE4_OFFSET;
+				*(debug_dst++) = pixel ? DBG_SRC_A : DBG_SRC_BG;
 			} else {
 				//sprite layer is opaque and not covered by high priority BG pixels
-				*(dst++) = context->colors[*sprite_src | 0x10 + MODE4_OFFSET];
+				*(dst++) = (*sprite_src | 0x10) + MODE4_OFFSET;
 				*(debug_dst++) = DBG_SRC_S;
 			}
 		}
+		context->done_composite = dst;
 	} else {
-		for (int i = 0; i < 8; i++)
-		{
-			*(dst++) = context->colors[bgcolor];
-			*(debug_dst++) = DBG_SRC_BG;
-		}
+		memset(dst, 0x10 + (context->regs[REG_BG_COLOR] & 0xF) + MODE4_OFFSET, 8);
+		memset(debug_dst, DBG_SRC_BG, 8);
+		context->done_composite = dst + 8;
 	}
-	context->done_output = dst;
 }
 
 static uint32_t const h40_hsync_cycles[] = {19, 20, 20, 20, 18, 20, 20, 20, 18, 20, 20, 20, 18, 20, 20, 20, 19};
@@ -1836,11 +1858,21 @@
 		}
 		if (context->enabled_debuggers & (1 << VDP_DEBUG_CRAM)) {
 			uint32_t *fb = context->debug_fbs[VDP_DEBUG_CRAM] + context->debug_fb_pitch[VDP_DEBUG_CRAM] * line / sizeof(uint32_t);
-			for (int i = 0; i < 64; i++)
-			{
-				for (int x = 0; x < 8; x++)
+			if (context->regs[REG_MODE_2] & BIT_MODE_5) {
+				for (int i = 0; i < 64; i++)
 				{
-					*(fb++) = context->colors[i];
+					for (int x = 0; x < 8; x++)
+					{
+						*(fb++) = context->colors[i];
+					}
+				}
+			} else {
+				for (int i = MODE4_OFFSET; i < MODE4_OFFSET+32; i++)
+				{
+					for (int x = 0; x < 16; x++)
+					{
+						*(fb++) = context->colors[i];
+					}
 				}
 			}
 		}
@@ -2015,28 +2047,54 @@
 		uint32_t starting_line = 512 - 32*4;
 		uint32_t *line = context->debug_fbs[VDP_DEBUG_CRAM] 
 			+ context->debug_fb_pitch[VDP_DEBUG_CRAM]  * starting_line / sizeof(uint32_t);
-		for (int pal = 0; pal < 4; pal ++)
-		{
-			uint32_t *cur;
-			for (int y = 0; y < 31; y++)
+		if (context->regs[REG_MODE_2] & BIT_MODE_5) {
+			for (int pal = 0; pal < 4; pal ++)
 			{
+				uint32_t *cur;
+				for (int y = 0; y < 31; y++)
+				{
+					cur = line;
+					for (int offset = 0; offset < 16; offset++)
+					{
+						for (int x = 0; x < 31; x++)
+						{
+							*(cur++) = context->colors[pal * 16 + offset];
+						}
+						*(cur++) = 0xFF000000;
+					}
+					line += context->debug_fb_pitch[VDP_DEBUG_CRAM] / sizeof(uint32_t);
+				}
 				cur = line;
-				for (int offset = 0; offset < 16; offset++)
+				for (int x = 0; x < 512; x++)
 				{
-					for (int x = 0; x < 31; x++)
-					{
-						*(cur++) = context->colors[pal * 16 + offset];
-					}
 					*(cur++) = 0xFF000000;
 				}
 				line += context->debug_fb_pitch[VDP_DEBUG_CRAM] / sizeof(uint32_t);
 			}
-			cur = line;
-			for (int x = 0; x < 512; x++)
+		} else {
+			for (int pal = 0; pal < 2; pal ++)
 			{
-				*(cur++) = 0xFF000000;
+				uint32_t *cur;
+				for (int y = 0; y < 31; y++)
+				{
+					cur = line;
+					for (int offset = MODE4_OFFSET; offset < MODE4_OFFSET + 16; offset++)
+					{
+						for (int x = 0; x < 31; x++)
+						{
+							*(cur++) = context->colors[pal * 16 + offset];
+						}
+						*(cur++) = 0xFF000000;
+					}
+					line += context->debug_fb_pitch[VDP_DEBUG_CRAM] / sizeof(uint32_t);
+				}
+				cur = line;
+				for (int x = 0; x < 512; x++)
+				{
+					*(cur++) = 0xFF000000;
+				}
+				line += context->debug_fb_pitch[VDP_DEBUG_CRAM] / sizeof(uint32_t);
 			}
-			line += context->debug_fb_pitch[VDP_DEBUG_CRAM] / sizeof(uint32_t);
 		}
 		render_framebuffer_updated(context->debug_fb_indices[VDP_DEBUG_CRAM], 512);
 		context->debug_fbs[VDP_DEBUG_CRAM] = render_get_framebuffer(context->debug_fb_indices[VDP_DEBUG_CRAM], &context->debug_fb_pitch[VDP_DEBUG_CRAM]);
@@ -2049,9 +2107,10 @@
 
 void vdp_force_update_framebuffer(vdp_context *context)
 {
-	uint16_t lines_max = (context->flags2 & FLAG2_REGION_PAL) 
-			? 240 + BORDER_TOP_V30_PAL + BORDER_BOT_V30_PAL 
-			: 224 + BORDER_TOP_V28 + BORDER_BOT_V28;
+	if (!context->fb) {
+		return;
+	}
+	uint16_t lines_max = context->inactive_start + context->border_bot + context->border_top;
 			
 	uint16_t to_fill = lines_max - context->output_lines;
 	memset(
@@ -2066,72 +2125,87 @@
 
 static void advance_output_line(vdp_context *context)
 {
-	if (headless) {
-		if (context->vcounter == context->inactive_start) {
-			context->frame++;
-		}
-		context->vcounter &= 0x1FF;
-	} else {
-		uint16_t lines_max = (context->flags2 & FLAG2_REGION_PAL) 
-			? 240 + BORDER_TOP_V30_PAL + BORDER_BOT_V30_PAL 
-			: 224 + BORDER_TOP_V28 + BORDER_BOT_V28;
-
-		if (context->output_lines == lines_max) {
+	//This function is kind of gross because of the need to deal with vertical border busting via mode changes
+	uint16_t lines_max = context->inactive_start + context->border_bot + context->border_top;
+	uint32_t output_line = context->vcounter;
+	if (!(context->regs[REG_MODE_2] & BIT_MODE_5)) {
+		//vcounter increment occurs much later in Mode 4
+		output_line++;
+	} 
+	
+	if (context->output_lines >= lines_max || (!context->pushed_frame && output_line == context->inactive_start + context->border_top)) {
+		//we've either filled up a full frame or we're at the bottom of screen in the current defined mode + border crop
+		if (!headless) {
 			render_framebuffer_updated(context->cur_buffer, context->h40_lines > (context->inactive_start + context->border_top) / 2 ? LINEBUF_SIZE : (256+HORIZ_BORDER));
-			context->cur_buffer = context->flags2 & FLAG2_EVEN_FIELD ? FRAMEBUFFER_EVEN : FRAMEBUFFER_ODD;
-			context->fb = render_get_framebuffer(context->cur_buffer, &context->output_pitch);
-			vdp_update_per_frame_debug(context);
-			context->h40_lines = 0;
-			context->frame++;
-			context->output_lines = 0;
+			uint8_t is_even = context->flags2 & FLAG2_EVEN_FIELD;
+			if (context->vcounter <= context->inactive_start && (context->regs[REG_MODE_4] & BIT_INTERLACE)) {
+				is_even = !is_even;
+			}
+			context->cur_buffer = is_even ? FRAMEBUFFER_EVEN : FRAMEBUFFER_ODD;
+			context->pushed_frame = 1;
+			context->fb = NULL;
 		}
-		uint32_t output_line = context->vcounter;
-		if (!(context->regs[REG_MODE_2] & BIT_MODE_5)) {
-			//vcounter increment occurs much later in Mode 4
-			output_line++;
-		} 
-		if (output_line < context->inactive_start + context->border_bot && context->output_lines > 0) {
+		vdp_update_per_frame_debug(context);
+		context->h40_lines = 0;
+		context->frame++;
+		context->output_lines = 0;
+	}
+	
+	if (output_line < context->inactive_start + context->border_bot) {
+		if (context->output_lines) {
 			output_line = context->output_lines++;//context->border_top + context->vcounter;
-		} else if (output_line >= 0x200 - context->border_top) {
-			if (output_line == 0x200 - context->border_top) {
-				//We're at the top of the display, force context->output_lines to be zero to avoid
-				//potential screen rolling if the mode is changed at an inopportune time
-				context->output_lines = 0;
-			}
-			output_line = context->output_lines++;//context->vcounter - (0x200 - context->border_top);
+		} else if (!output_line && !context->border_top) {
+			//top border is completely cropped so we won't hit the case below
+			output_line = 0;
+			context->output_lines = 1;
+			context->pushed_frame = 0;
 		} else {
-			output_line = INVALID_LINE;
+			context->output_lines = output_line + 1;
+		}
+	} else if (output_line >= 0x200 - context->border_top) {
+		if (output_line == 0x200 - context->border_top) {
+			//We're at the top of the display, force context->output_lines to be zero to avoid
+			//potential screen rolling if the mode is changed at an inopportune time
+			context->output_lines = 0;
+			context->pushed_frame = 0;
 		}
-		context->output = (uint32_t *)(((char *)context->fb) + context->output_pitch * output_line);
-		context->done_output = context->output;
+		output_line = context->output_lines++;//context->vcounter - (0x200 - context->border_top);
+	} else {
+		context->output = NULL;
+		return;
+	}
+	if (!context->fb) {
+		context->fb = render_get_framebuffer(context->cur_buffer, &context->output_pitch);
+	}
+	output_line += context->top_offset;
+	context->output = (uint32_t *)(((char *)context->fb) + context->output_pitch * output_line);
 #ifdef DEBUG_FB_FILL
-		for (int i = 0; i < LINEBUF_SIZE; i++)
-		{
-			context->output[i] = 0xFFFF00FF;
-		}
+	for (int i = 0; i < LINEBUF_SIZE; i++)
+	{
+		context->output[i] = 0xFFFF00FF;
+	}
 #endif	
-		if (output_line != INVALID_LINE && (context->regs[REG_MODE_4] & BIT_H40)) {
-			context->h40_lines++;
-		}
+	if (context->output && (context->regs[REG_MODE_4] & BIT_H40)) {
+		context->h40_lines++;
 	}
 }
 
 void vdp_release_framebuffer(vdp_context *context)
 {
-	render_framebuffer_updated(context->cur_buffer, context->h40_lines > (context->inactive_start + context->border_top) / 2 ? LINEBUF_SIZE : (256+HORIZ_BORDER));
-	context->output = context->fb = NULL;
+	if (context->fb) {
+		render_framebuffer_updated(context->cur_buffer, context->h40_lines > (context->inactive_start + context->border_top) / 2 ? LINEBUF_SIZE : (256+HORIZ_BORDER));
+		context->output = context->fb = NULL;
+	}
 }
 
 void vdp_reacquire_framebuffer(vdp_context *context)
 {
-	context->fb = render_get_framebuffer(context->cur_buffer, &context->output_pitch);
-	uint16_t lines_max = (context->flags2 & FLAG2_REGION_PAL) 
-			? 240 + BORDER_TOP_V30_PAL + BORDER_BOT_V30_PAL
-			: 224 + BORDER_TOP_V28 + BORDER_BOT_V28;
+	uint16_t lines_max = context->inactive_start + context->border_bot + context->border_top;
 	if (context->output_lines <= lines_max && context->output_lines > 0) {
-		context->output = (uint32_t *)(((char *)context->fb) + context->output_pitch * (context->output_lines - 1));
+		context->fb = render_get_framebuffer(context->cur_buffer, &context->output_pitch);
+		context->output = (uint32_t *)(((char *)context->fb) + context->output_pitch * (context->output_lines - 1 + context->top_offset));
 	} else {
-		context->output = (uint32_t *)(((char *)context->fb) + context->output_pitch * INVALID_LINE);
+		context->output = NULL;
 	}
 }
 
@@ -2148,33 +2222,29 @@
 
 static void draw_right_border(vdp_context *context)
 {
-	uint32_t *dst = context->output + BORDER_LEFT + ((context->regs[REG_MODE_4] & BIT_H40) ? 320 : 256);
+	uint8_t *dst = context->compositebuf + BORDER_LEFT + ((context->regs[REG_MODE_4] & BIT_H40) ? 320 : 256);
 	uint8_t pixel = context->regs[REG_BG_COLOR] & 0x3F;
 	if ((context->test_port & TEST_BIT_DISABLE) != 0) {
 		pixel = 0x3F;
 	}
-	uint32_t bg_color = context->colors[pixel];
 	uint8_t test_layer = context->test_port >> 7 & 3;
 	if (test_layer) {
 		switch(test_layer)
 			{
 			case 1:
-				bg_color = context->colors[0];
-				for (int i = 0; i < BORDER_RIGHT; i++, dst++)
-				{
-					*dst = bg_color;
-				}
+				memset(dst, 0, BORDER_RIGHT);
+				dst += BORDER_RIGHT;
 				break;
 			case 2: {
 				//plane A
 				//TODO: Deal with Window layer
 				int i;
 				i = 0;
-				uint8_t buf_off = context->buf_a_off - (context->hscroll_a & 0xF);
+				uint8_t buf_off = context->buf_a_off - context->hscroll_a_fine;
 				//uint8_t *src = context->tmp_buf_a + ((context->buf_a_off + (i ? 0 : (16 - BORDER_LEFT) - (context->hscroll_a & 0xF))) & SCROLL_BUFFER_MASK); 
 				for (; i < BORDER_RIGHT; buf_off++, i++, dst++)
 				{
-					*dst = context->colors[context->tmp_buf_a[buf_off & SCROLL_BUFFER_MASK] & 0x3F];
+					*dst = context->tmp_buf_a[buf_off & SCROLL_BUFFER_MASK] & 0x3F;
 				}
 				break;
 			}
@@ -2186,84 +2256,172 @@
 				//uint8_t *src = context->tmp_buf_b + ((context->buf_b_off + (i ? 0 : (16 - BORDER_LEFT) - (context->hscroll_b & 0xF))) & SCROLL_BUFFER_MASK); 
 				for (; i < BORDER_RIGHT; buf_off++, i++, dst++)
 				{
-					*dst = context->colors[context->tmp_buf_b[buf_off & SCROLL_BUFFER_MASK] & 0x3F];
+					*dst = context->tmp_buf_b[buf_off & SCROLL_BUFFER_MASK] & 0x3F;
 				}
 				break;
 			}
 			}
 	} else {
-		for (int i = 0; i < BORDER_RIGHT; i++, dst++)
-		{
-			*dst = bg_color;
-		}
+		memset(dst, 0, BORDER_RIGHT);
+		dst += BORDER_RIGHT;
 	}
-	context->done_output = dst;
+	context->done_composite = dst;
 	context->buf_a_off = (context->buf_a_off + SCROLL_BUFFER_DRAW) & SCROLL_BUFFER_MASK;
 	context->buf_b_off = (context->buf_b_off + SCROLL_BUFFER_DRAW) & SCROLL_BUFFER_MASK;
 }
 
 #define CHECK_ONLY if (context->cycles >= target_cycles) { return; }
 #define CHECK_LIMIT if (context->flags & FLAG_DMA_RUN) { run_dma_src(context, -1); } context->hslot++; context->cycles += slot_cycles; CHECK_ONLY
+#define OUTPUT_PIXEL(slot) if ((slot) >= BG_START_SLOT) {\
+		uint8_t *src = context->compositebuf + ((slot) - BG_START_SLOT) *2;\
+		uint32_t *dst = context->output + ((slot) - BG_START_SLOT) *2;\
+		if ((*src & 0x3F) | test_layer) {\
+			*(dst++) = context->colors[*(src++)];\
+		} else {\
+			*(dst++) = context->colors[(*(src++) & 0xC0) | bgindex];\
+		}\
+		if ((*src & 0x3F) | test_layer) {\
+			*(dst++) = context->colors[*(src++)];\
+		} else {\
+			*(dst++) = context->colors[(*(src++) & 0xC0) | bgindex];\
+		}\
+	}
+	
+#define OUTPUT_PIXEL_H40(slot) if (slot <= (BG_START_SLOT + LINEBUF_SIZE/2)) {\
+		uint8_t *src = context->compositebuf + (slot - BG_START_SLOT) *2;\
+		uint32_t *dst = context->output + (slot - BG_START_SLOT) *2;\
+		if ((*src & 0x3F) | test_layer) {\
+			*(dst++) = context->colors[*(src++)];\
+		} else {\
+			*(dst++) = context->colors[(*(src++) & 0xC0) | bgindex];\
+		}\
+		if (slot != (BG_START_SLOT + LINEBUF_SIZE/2)) {\
+			if ((*src & 0x3F) | test_layer) {\
+				*(dst++) = context->colors[*(src++)];\
+			} else {\
+				*(dst++) = context->colors[(*(src++) & 0xC0) | bgindex];\
+			}\
+		}\
+	}
+	
+#define OUTPUT_PIXEL_H32(slot) if (slot <= (BG_START_SLOT + (256+HORIZ_BORDER)/2)) {\
+		uint8_t *src = context->compositebuf + (slot - BG_START_SLOT) *2;\
+		uint32_t *dst = context->output + (slot - BG_START_SLOT) *2;\
+		if ((*src & 0x3F) | test_layer) {\
+			*(dst++) = context->colors[*(src++)];\
+		} else {\
+			*(dst++) = context->colors[(*(src++) & 0xC0) | bgindex];\
+		}\
+		if (slot != (BG_START_SLOT + (256+HORIZ_BORDER)/2)) {\
+			if ((*src & 0x3F) | test_layer) {\
+				*(dst++) = context->colors[*(src++)];\
+			} else {\
+				*(dst++) = context->colors[(*(src++) & 0xC0) | bgindex];\
+			}\
+		}\
+	}
+	
+//BG_START_SLOT => dst = 0, src = border
+//BG_START_SLOT + 13/2=6, dst = 6, src = border + comp + 13
+#define OUTPUT_PIXEL_MODE4(slot) if ((slot) >= BG_START_SLOT) {\
+		uint8_t *src = context->compositebuf + ((slot) - BG_START_SLOT) *2;\
+		uint32_t *dst = context->output + ((slot) - BG_START_SLOT) *2;\
+		if ((slot) - BG_START_SLOT < BORDER_LEFT/2) {\
+			*(dst++) = context->colors[bgindex];\
+			*(dst++) = context->colors[bgindex];\
+		} else if ((slot) - BG_START_SLOT < (BORDER_LEFT+256)/2){\
+			if ((slot) - BG_START_SLOT == BORDER_LEFT/2) {\
+				*(dst++) = context->colors[bgindex];\
+				src++;\
+			} else {\
+				*(dst++) = context->colors[*(src++)];\
+			}\
+			*(dst++) = context->colors[*(src++)];\
+		} else if ((slot) - BG_START_SLOT <= (HORIZ_BORDER+256)/2) {\
+			*(dst++) = context->colors[bgindex];\
+			if ((slot) - BG_START_SLOT < (HORIZ_BORDER+256)/2) {\
+				*(dst++) = context->colors[bgindex];\
+			}\
+		}\
+	}
 
 #define COLUMN_RENDER_BLOCK(column, startcyc) \
 	case startcyc:\
+		OUTPUT_PIXEL(startcyc)\
 		read_map_scroll_a(column, context->vcounter, context);\
 		CHECK_LIMIT\
 	case ((startcyc+1)&0xFF):\
+		OUTPUT_PIXEL((startcyc+1)&0xFF)\
 		external_slot(context);\
 		CHECK_LIMIT\
 	case ((startcyc+2)&0xFF):\
+		OUTPUT_PIXEL((startcyc+2)&0xFF)\
 		render_map_1(context);\
 		CHECK_LIMIT\
 	case ((startcyc+3)&0xFF):\
+		OUTPUT_PIXEL((startcyc+3)&0xFF)\
 		render_map_2(context);\
 		CHECK_LIMIT\
 	case ((startcyc+4)&0xFF):\
+		OUTPUT_PIXEL((startcyc+4)&0xFF)\
 		read_map_scroll_b(column, context->vcounter, context);\
 		CHECK_LIMIT\
 	case ((startcyc+5)&0xFF):\
+		OUTPUT_PIXEL((startcyc+5)&0xFF)\
 		read_sprite_x(context->vcounter, context);\
 		CHECK_LIMIT\
 	case ((startcyc+6)&0xFF):\
+		OUTPUT_PIXEL((startcyc+6)&0xFF)\
 		render_map_3(context);\
 		CHECK_LIMIT\
 	case ((startcyc+7)&0xFF):\
+		OUTPUT_PIXEL((startcyc+7)&0xFF)\
 		render_map_output(context->vcounter, column, context);\
 		CHECK_LIMIT
 
 #define COLUMN_RENDER_BLOCK_REFRESH(column, startcyc) \
 	case startcyc:\
+		OUTPUT_PIXEL(startcyc)\
 		read_map_scroll_a(column, context->vcounter, context);\
 		CHECK_LIMIT\
 	case (startcyc+1):\
-		/* refresh, no don't run dma src */\
+		/* refresh, so don't run dma src */\
+		OUTPUT_PIXEL((startcyc+1)&0xFF)\
 		context->hslot++;\
 		context->cycles += slot_cycles;\
 		CHECK_ONLY\
 	case (startcyc+2):\
+		OUTPUT_PIXEL((startcyc+2)&0xFF)\
 		render_map_1(context);\
 		CHECK_LIMIT\
 	case (startcyc+3):\
+		OUTPUT_PIXEL((startcyc+3)&0xFF)\
 		render_map_2(context);\
 		CHECK_LIMIT\
 	case (startcyc+4):\
+		OUTPUT_PIXEL((startcyc+4)&0xFF)\
 		read_map_scroll_b(column, context->vcounter, context);\
 		CHECK_LIMIT\
 	case (startcyc+5):\
+		OUTPUT_PIXEL((startcyc+5)&0xFF)\
 		read_sprite_x(context->vcounter, context);\
 		CHECK_LIMIT\
 	case (startcyc+6):\
+		OUTPUT_PIXEL((startcyc+6)&0xFF)\
 		render_map_3(context);\
 		CHECK_LIMIT\
 	case (startcyc+7):\
+		OUTPUT_PIXEL((startcyc+7)&0xFF)\
 		render_map_output(context->vcounter, column, context);\
 		CHECK_LIMIT
 		
 #define COLUMN_RENDER_BLOCK_MODE4(column, startcyc) \
 	case startcyc:\
+		OUTPUT_PIXEL_MODE4(startcyc)\
 		read_map_mode4(column, context->vcounter, context);\
 		CHECK_LIMIT\
 	case ((startcyc+1)&0xFF):\
+		OUTPUT_PIXEL_MODE4((startcyc+1)&0xFF)\
 		if (column & 3) {\
 			scan_sprite_table_mode4(context);\
 		} else {\
@@ -2271,9 +2429,11 @@
 		}\
 		CHECK_LIMIT\
 	case ((startcyc+2)&0xFF):\
+		OUTPUT_PIXEL_MODE4((startcyc+2)&0xFF)\
 		fetch_map_mode4(column, context->vcounter, context);\
 		CHECK_LIMIT\
 	case ((startcyc+3)&0xFF):\
+		OUTPUT_PIXEL_MODE4((startcyc+3)&0xFF)\
 		render_map_mode4(context->vcounter, column, context);\
 		CHECK_LIMIT
 		
@@ -2293,8 +2453,12 @@
 
 #define SPRITE_RENDER_H40(slot) \
 	case slot:\
+		OUTPUT_PIXEL_H40(slot)\
 		if ((slot) == BG_START_SLOT + LINEBUF_SIZE/2) {\
 			advance_output_line(context);\
+			if (!context->output) {\
+				context->output = dummy_buffer;\
+			}\
 		}\
 		if (slot == 168 || slot == 247 || slot == 248) {\
 			render_border_garbage(\
@@ -2327,8 +2491,12 @@
 //as we're bumping up against the hcounter jump
 #define SPRITE_RENDER_H32(slot) \
 	case slot:\
+		OUTPUT_PIXEL_H32(slot)\
 		if ((slot) == BG_START_SLOT + (256+HORIZ_BORDER)/2) {\
 			advance_output_line(context);\
+			if (!context->output) {\
+				context->output = dummy_buffer;\
+			}\
 		}\
 		if (slot == 136 || slot == 247 || slot == 248) {\
 			render_border_garbage(\
@@ -2360,6 +2528,9 @@
 		if (context->flags & FLAG_DMA_RUN) { run_dma_src(context, -1); } \
 		if ((slot) == BG_START_SLOT + (256+HORIZ_BORDER)/2) {\
 			advance_output_line(context);\
+			if (!context->output) {\
+				context->output = dummy_buffer;\
+			}\
 		}\
 		if ((slot) == 147) {\
 			context->hslot = 233;\
@@ -2379,43 +2550,250 @@
 		
 #define SPRITE_RENDER_H32_MODE4(slot) \
 	case slot:\
+		OUTPUT_PIXEL_MODE4(slot)\
 		read_sprite_x_mode4(context);\
 		MODE4_CHECK_SLOT_LINE(slot)\
 	case CALC_SLOT(slot, 1):\
+		OUTPUT_PIXEL_MODE4(CALC_SLOT(slot, 1))\
 		read_sprite_x_mode4(context);\
 		MODE4_CHECK_SLOT_LINE(CALC_SLOT(slot,1))\
 	case CALC_SLOT(slot, 2):\
+		OUTPUT_PIXEL_MODE4(CALC_SLOT(slot, 2))\
 		fetch_sprite_cells_mode4(context);\
 		MODE4_CHECK_SLOT_LINE(CALC_SLOT(slot, 2))\
 	case CALC_SLOT(slot, 3):\
-		if ((slot + 3) == 140) {\
-			uint32_t *dst = context->output + BORDER_LEFT + 256 + 8;\
-			uint32_t bgcolor = context->colors[0x10 | (context->regs[REG_BG_COLOR] & 0xF) + MODE4_OFFSET];\
-			for (int i = 0; i < BORDER_RIGHT-8; i++, dst++)\
-			{\
-				*dst = bgcolor;\
-			}\
-			context->done_output = dst;\
-		}\
+		OUTPUT_PIXEL_MODE4(CALC_SLOT(slot, 3))\
 		render_sprite_cells_mode4(context);\
 		MODE4_CHECK_SLOT_LINE(CALC_SLOT(slot, 3))\
 	case CALC_SLOT(slot, 4):\
+		OUTPUT_PIXEL_MODE4(CALC_SLOT(slot, 4))\
 		fetch_sprite_cells_mode4(context);\
 		MODE4_CHECK_SLOT_LINE(CALC_SLOT(slot, 4))\
 	case CALC_SLOT(slot, 5):\
+		OUTPUT_PIXEL_MODE4(CALC_SLOT(slot, 5))\
 		render_sprite_cells_mode4(context);\
 		MODE4_CHECK_SLOT_LINE(CALC_SLOT(slot, 5))
 
+static uint32_t dummy_buffer[LINEBUF_SIZE];
+static void vdp_h40_line(vdp_context * context)
+{
+	uint16_t address;
+	uint32_t mask;
+	uint32_t const slot_cycles = MCLKS_SLOT_H40;
+	uint8_t bgindex = context->regs[REG_BG_COLOR] & 0x3F;
+	uint8_t test_layer = context->test_port >> 7 & 3;
+	
+	//165
+	if (!(context->regs[REG_MODE_3] & BIT_VSCROLL)) {
+		//TODO: Develop some tests on hardware to see when vscroll latch actually happens for full plane mode
+		//See note in vdp_h32 for why this was originally moved out of read_map_scroll
+		//Skitchin' has a similar problem, but uses H40 mode. It seems to be able to hit the extern slot at 232
+		//pretty consistently
+		context->vscroll_latch[0] = context->vsram[0];
+		context->vscroll_latch[1] = context->vsram[1];
+	}
+	render_sprite_cells(context);
+	//166
+	render_sprite_cells(context);
+	//167
+	context->sprite_index = 0x80;
+	context->slot_counter = 0;
+	render_border_garbage(
+		context,
+		context->sprite_draw_list[context->cur_slot].address,
+		context->tmp_buf_b, context->buf_b_off,
+		context->col_1
+	);
+	render_sprite_cells(context);
+	scan_sprite_table(context->vcounter, context);
+	//168
+	render_border_garbage(
+		context,
+		context->sprite_draw_list[context->cur_slot].address,
+		context->tmp_buf_b,
+		context->buf_b_off + 8,
+		context->col_2
+	);
+	//Do palette lookup for end of previous line
+	uint8_t *src = context->compositebuf + (LINE_CHANGE_H40 - BG_START_SLOT) *2;
+	uint32_t *dst = context->output + (LINE_CHANGE_H40 - BG_START_SLOT) *2;
+	if (test_layer) {
+		for (int i = 0; i < LINEBUF_SIZE - (LINE_CHANGE_H40 - BG_START_SLOT) * 2; i++)
+		{
+			*(dst++) = context->colors[*(src++)];
+		}
+	} else {
+		for (int i = 0; i < LINEBUF_SIZE - (LINE_CHANGE_H40 - BG_START_SLOT) * 2; i++)
+		{
+			if (*src & 0x3F) {
+				*(dst++) = context->colors[*(src++)];
+			} else {
+				*(dst++) = context->colors[(*(src++) & 0xC0) | bgindex];
+			}
+		}
+	}
+	advance_output_line(context);
+	//168-242 (inclusive)
+	for (int i = 0; i < 28; i++)
+	{
+		render_sprite_cells(context);
+		scan_sprite_table(context->vcounter, context);
+	}
+	//243
+	render_border_garbage(
+		context,
+		context->sprite_draw_list[context->cur_slot].address,
+		context->tmp_buf_a,
+		context->buf_a_off,
+		context->col_1
+	);
+	//244
+	address = (context->regs[REG_HSCROLL] & 0x3F) << 10;
+	mask = 0;
+	if (context->regs[REG_MODE_3] & 0x2) {
+		mask |= 0xF8;
+	}
+	if (context->regs[REG_MODE_3] & 0x1) {
+		mask |= 0x7;
+	}
+	render_border_garbage(context, address, context->tmp_buf_a, context->buf_a_off+8, context->col_2);
+	address += (context->vcounter & mask) * 4;
+	context->hscroll_a = context->vdpmem[address] << 8 | context->vdpmem[address+1];
+	context->hscroll_a_fine = context->hscroll_a & 0xF;
+	context->hscroll_b = context->vdpmem[address+2] << 8 | context->vdpmem[address+3];
+	context->hscroll_b_fine = context->hscroll_b & 0xF;
+	//printf("%d: HScroll A: %d, HScroll B: %d\n", context->vcounter, context->hscroll_a, context->hscroll_b);
+	//243-246 inclusive
+	for (int i = 0; i < 3; i++)
+	{
+		render_sprite_cells(context);
+		scan_sprite_table(context->vcounter, context);
+	}
+	//247
+	render_border_garbage(
+		context,
+		context->sprite_draw_list[context->cur_slot].address,
+		context->tmp_buf_b,
+		context->buf_b_off,
+		context->col_1
+	);
+	render_sprite_cells(context);
+	scan_sprite_table(context->vcounter, context);
+	//248
+	render_border_garbage(
+		context,
+		context->sprite_draw_list[context->cur_slot].address,
+		context->tmp_buf_b,
+		context->buf_b_off + 8,
+		context->col_2
+	);
+	render_sprite_cells(context);
+	scan_sprite_table(context->vcounter, context);
+	context->buf_a_off = (context->buf_a_off + SCROLL_BUFFER_DRAW) & SCROLL_BUFFER_MASK;
+	context->buf_b_off = (context->buf_b_off + SCROLL_BUFFER_DRAW) & SCROLL_BUFFER_MASK;
+	//250
+	render_sprite_cells(context);
+	scan_sprite_table(context->vcounter, context);
+	//251
+	scan_sprite_table(context->vcounter, context);//Just a guess
+	//252
+	scan_sprite_table(context->vcounter, context);//Just a guess
+	//254
+	render_sprite_cells(context);
+	scan_sprite_table(context->vcounter, context);
+	//255
+	if (context->cur_slot >= 0 && context->sprite_draw_list[context->cur_slot].x_pos) {
+		context->flags |= FLAG_DOT_OFLOW;
+	}
+	scan_sprite_table(context->vcounter, context);
+	//0
+	scan_sprite_table(context->vcounter, context);//Just a guess
+	//seems like the sprite table scan fills a shift register
+	//values are FIFO, but unused slots precede used slots
+	//so we set cur_slot to slot_counter and let it wrap around to
+	//the beginning of the list
+	context->cur_slot = context->slot_counter;
+	context->sprite_x_offset = 0;
+	context->sprite_draws = MAX_SPRITES_LINE;
+	//background planes and layer compositing
+	for (int col = 0; col < 42; col+=2)
+	{
+		read_map_scroll_a(col, context->vcounter, context);
+		render_map_1(context);
+		render_map_2(context);
+		read_map_scroll_b(col, context->vcounter, context);
+		render_map_3(context);
+		render_map_output(context->vcounter, col, context);
+	}
+	//sprite rendering phase 2
+	for (int i = 0; i < MAX_SPRITES_LINE; i++)
+	{
+		read_sprite_x(context->vcounter, context);
+	}
+	//163
+	context->cur_slot = MAX_SPRITES_LINE-1;
+	memset(context->linebuf, 0, LINEBUF_SIZE);
+	render_border_garbage(
+		context,
+		context->sprite_draw_list[context->cur_slot].address,
+		context->tmp_buf_a, context->buf_a_off,
+		context->col_1
+	);
+	context->flags &= ~FLAG_MASKED;
+	render_sprite_cells(context);
+	//164
+	render_border_garbage(
+		context,
+		context->sprite_draw_list[context->cur_slot].address,
+		context->tmp_buf_a, context->buf_a_off + 8,
+		context->col_2
+	);
+	render_sprite_cells(context);
+	context->cycles += MCLKS_LINE;
+	vdp_advance_line(context);
+	src = context->compositebuf;
+	dst = context->output;
+	if (test_layer) {
+		for (int i = 0; i < (LINE_CHANGE_H40 - BG_START_SLOT) * 2; i++)
+		{
+			*(dst++) = context->colors[*(src++)];
+		}
+	} else {
+		for (int i = 0; i < (LINE_CHANGE_H40 - BG_START_SLOT) * 2; i++)
+		{
+			if (*src & 0x3F) {
+				*(dst++) = context->colors[*(src++)];
+			} else {
+				*(dst++) = context->colors[(*(src++) & 0xC0) | bgindex];
+			}
+		}
+	}
+}
 static void vdp_h40(vdp_context * context, uint32_t target_cycles)
 {
 	uint16_t address;
 	uint32_t mask;
 	uint32_t const slot_cycles = MCLKS_SLOT_H40;
+	uint8_t bgindex = context->regs[REG_BG_COLOR] & 0x3F;
+	uint8_t test_layer = context->test_port >> 7 & 3;
+	if (!context->output) {
+		//This shouldn't happen normally, but it can theoretically
+		//happen when doing border busting
+		context->output = dummy_buffer;
+	}
 	switch(context->hslot)
 	{
 	for (;;)
 	{
 	case 165:
+		//only consider doing a line at a time if the FIFO is empty, there are no pending reads and there is no DMA running
+		if (context->fifo_read == -1 && !(context->flags & FLAG_DMA_RUN) && ((context->cd & 1) || (context->flags & FLAG_READ_FETCHED))) {
+			while (target_cycles - context->cycles >= MCLKS_LINE && context->state != PREPARING && context->vcounter != context->inactive_start) {
+				vdp_h40_line(context);
+			}
+			CHECK_ONLY
+		}
+		OUTPUT_PIXEL(165)
 		if (!(context->regs[REG_MODE_3] & BIT_VSCROLL)) {
 			//TODO: Develop some tests on hardware to see when vscroll latch actually happens for full plane mode
 			//See note in vdp_h32 for why this was originally moved out of read_map_scroll
@@ -2425,31 +2803,14 @@
 			context->vscroll_latch[1] = context->vsram[1];
 		}
 		if (context->state == PREPARING) {
-			uint32_t bg_color = context->colors[context->regs[REG_BG_COLOR] & 0x3F];
-			uint32_t *dst = context->output + (context->hslot - BG_START_SLOT) * 2;
-			if (dst >= context->done_output) {
-				*dst = bg_color;
-			}
-			dst++;
-			if (dst >= context->done_output) {
-				*dst = bg_color;
-			}
 			external_slot(context);
 		} else {
 			render_sprite_cells(context);
 		}
 		CHECK_LIMIT
 	case 166:
+		OUTPUT_PIXEL(166)
 		if (context->state == PREPARING) {
-			uint32_t bg_color = context->colors[context->regs[REG_BG_COLOR] & 0x3F];
-			uint32_t *dst = context->output + (context->hslot - BG_START_SLOT) * 2;
-			if (dst >= context->done_output) {
-				*dst = bg_color;
-			}
-			dst++;
-			if (dst >= context->done_output) {
-				*dst = bg_color;
-			}
 			external_slot(context);
 		} else {
 			render_sprite_cells(context);
@@ -2462,16 +2823,7 @@
 		CHECK_LIMIT
 	//sprite attribute table scan starts
 	case 167:
-		if (context->state == PREPARING) {
-			uint32_t bg_color = context->colors[context->regs[REG_BG_COLOR] & 0x3F];
-			uint32_t *dst = context->output + (context->hslot - BG_START_SLOT) * 2;
-			for (int i = 0; i < LINEBUF_SIZE - 2 * (context->hslot - BG_START_SLOT); i++, dst++)
-			{
-				if (dst >= context->done_output) {
-					*dst = bg_color;
-				}
-			}
-		}
+		OUTPUT_PIXEL(167)
 		context->sprite_index = 0x80;
 		context->slot_counter = 0;
 		render_border_garbage(
@@ -2528,7 +2880,9 @@
 		render_border_garbage(context, address, context->tmp_buf_a, context->buf_a_off+8, context->col_2);
 		address += (context->vcounter & mask) * 4;
 		context->hscroll_a = context->vdpmem[address] << 8 | context->vdpmem[address+1];
+		context->hscroll_a_fine = context->hscroll_a & 0xF;
 		context->hscroll_b = context->vdpmem[address+2] << 8 | context->vdpmem[address+3];
+		context->hscroll_b_fine = context->hscroll_b & 0xF;
 		//printf("%d: HScroll A: %d, HScroll B: %d\n", context->vcounter, context->hscroll_a, context->hscroll_b);
 		if (context->flags & FLAG_DMA_RUN) { run_dma_src(context, -1); }
 		context->hslot++;
@@ -2556,6 +2910,9 @@
 		CHECK_LIMIT
 	SPRITE_RENDER_H40(254)
 	case 255:
+		if (context->cur_slot >= 0 && context->sprite_draw_list[context->cur_slot].x_pos) {
+			context->flags |= FLAG_DOT_OFLOW;
+		}
 		render_map_3(context);
 		scan_sprite_table(context->vcounter, context);//Just a guess
 		CHECK_LIMIT
@@ -2567,8 +2924,8 @@
 		//so we set cur_slot to slot_counter and let it wrap around to
 		//the beginning of the list
 		context->cur_slot = context->slot_counter;
-		context->sprite_draws = MAX_DRAWS;
-		context->flags &= (~FLAG_CAN_MASK & ~FLAG_MASKED);
+		context->sprite_x_offset = 0;
+		context->sprite_draws = MAX_SPRITES_LINE;
 		CHECK_LIMIT
 	COLUMN_RENDER_BLOCK(2, 1)
 	COLUMN_RENDER_BLOCK(4, 9)
@@ -2591,14 +2948,17 @@
 	COLUMN_RENDER_BLOCK(38, 145)
 	COLUMN_RENDER_BLOCK_REFRESH(40, 153)
 	case 161:
+		OUTPUT_PIXEL(161)
 		external_slot(context);
 		CHECK_LIMIT
 	case 162:
+		OUTPUT_PIXEL(162)
 		external_slot(context);
 		CHECK_LIMIT
 	//sprite render to line buffer starts
 	case 163:
-		context->cur_slot = MAX_DRAWS-1;
+		OUTPUT_PIXEL(163)
+		context->cur_slot = MAX_SPRITES_LINE-1;
 		memset(context->linebuf, 0, LINEBUF_SIZE);
 		render_border_garbage(
 			context,
@@ -2606,9 +2966,11 @@
 			context->tmp_buf_a, context->buf_a_off,
 			context->col_1
 		);
+		context->flags &= ~FLAG_MASKED;
 		render_sprite_cells(context);
 		CHECK_LIMIT
 	case 164:
+		OUTPUT_PIXEL(164)
 		render_border_garbage(
 			context,
 			context->sprite_draw_list[context->cur_slot].address,
@@ -2636,37 +2998,28 @@
 	uint16_t address;
 	uint32_t mask;
 	uint32_t const slot_cycles = MCLKS_SLOT_H32;
+	uint8_t bgindex = context->regs[REG_BG_COLOR] & 0x3F;
+	uint8_t test_layer = context->test_port >> 7 & 3;
+	if (!context->output) {
+		//This shouldn't happen normally, but it can theoretically
+		//happen when doing border busting
+		context->output = dummy_buffer;
+	}
 	switch(context->hslot)
 	{
 	for (;;)
 	{
 	case 133:
+		OUTPUT_PIXEL(133)
 		if (context->state == PREPARING) {
-			uint32_t bg_color = context->colors[context->regs[REG_BG_COLOR] & 0x3F];
-			uint32_t *dst = context->output + (context->hslot - BG_START_SLOT) * 2;
-			if (dst >= context->done_output) {
-				*dst = bg_color;
-			}
-			dst++;
-			if (dst >= context->done_output) {
-				*dst = bg_color;
-			}
 			external_slot(context);
 		} else {
 			render_sprite_cells(context);
 		}
 		CHECK_LIMIT
 	case 134:
+		OUTPUT_PIXEL(134)
 		if (context->state == PREPARING) {
-			uint32_t bg_color = context->colors[context->regs[REG_BG_COLOR] & 0x3F];
-			uint32_t *dst = context->output + (context->hslot - BG_START_SLOT) * 2;
-			if (dst >= context->done_output) {
-				*dst = bg_color;
-			}
-			dst++;
-			if (dst >= context->done_output) {
-				*dst = bg_color;
-			}
 			external_slot(context);
 		} else {
 			render_sprite_cells(context);
@@ -2679,16 +3032,7 @@
 		CHECK_LIMIT
 	//sprite attribute table scan starts
 	case 135:
-		if (context->state == PREPARING) {
-			uint32_t bg_color = context->colors[context->regs[REG_BG_COLOR] & 0x3F];
-			uint32_t *dst = context->output + (context->hslot - BG_START_SLOT) * 2;
-			for (int i = 0; i < (256+HORIZ_BORDER) - 2 * (context->hslot - BG_START_SLOT); i++)
-			{
-				if (dst >= context->done_output) {
-					*(dst++) = bg_color;
-				}
-			}
-		}
+		OUTPUT_PIXEL(135)
 		context->sprite_index = 0x80;
 		context->slot_counter = 0;
 		render_border_garbage(
@@ -2710,6 +3054,7 @@
 	SPRITE_RENDER_H32(143)
 	SPRITE_RENDER_H32(144)
 	case 145:
+		OUTPUT_PIXEL(145)
 		external_slot(context);
 		CHECK_LIMIT
 	SPRITE_RENDER_H32(146)
@@ -2757,7 +3102,9 @@
 		render_border_garbage(context, address, context->tmp_buf_a, context->buf_a_off+8, context->col_2);
 		address += (context->vcounter & mask) * 4;
 		context->hscroll_a = context->vdpmem[address] << 8 | context->vdpmem[address+1];
+		context->hscroll_a_fine = context->hscroll_a & 0xF;
 		context->hscroll_b = context->vdpmem[address+2] << 8 | context->vdpmem[address+3];
+		context->hscroll_b_fine = context->hscroll_b & 0xF;
 		//printf("%d: HScroll A: %d, HScroll B: %d\n", context->vcounter, context->hscroll_a, context->hscroll_b);
 		CHECK_LIMIT //provides "garbage" for border when plane A selected
 	SPRITE_RENDER_H32(245)
@@ -2770,6 +3117,9 @@
 		CHECK_LIMIT
 	SPRITE_RENDER_H32(250)
 	case 251:
+		if (context->cur_slot >= 0 && context->sprite_draw_list[context->cur_slot].x_pos) {
+			context->flags |= FLAG_DOT_OFLOW;
+		}
 		render_map_1(context);
 		scan_sprite_table(context->vcounter, context);//Just a guess
 		CHECK_LIMIT
@@ -2795,8 +3145,8 @@
 		//filled rather than the number of available slots
 		//context->slot_counter = MAX_SPRITES_LINE - context->slot_counter;
 		context->cur_slot = context->slot_counter;
-		context->sprite_draws = MAX_DRAWS_H32;
-		context->flags &= (~FLAG_CAN_MASK & ~FLAG_MASKED);
+		context->sprite_x_offset = 0;
+		context->sprite_draws = MAX_SPRITES_LINE_H32;
 		CHECK_LIMIT
 	COLUMN_RENDER_BLOCK(2, 1)
 	COLUMN_RENDER_BLOCK(4, 9)
@@ -2815,15 +3165,18 @@
 	COLUMN_RENDER_BLOCK(30, 113)
 	COLUMN_RENDER_BLOCK_REFRESH(32, 121)
 	case 129:
+		OUTPUT_PIXEL(129)
 		external_slot(context);
 		CHECK_LIMIT
 	case 130: {
+		OUTPUT_PIXEL(130)
 		external_slot(context);
 		CHECK_LIMIT
 	}
 	//sprite render to line buffer starts
 	case 131:
-		context->cur_slot = MAX_DRAWS_H32-1;
+		OUTPUT_PIXEL(131)
+		context->cur_slot = MAX_SPRITES_LINE_H32-1;
 		memset(context->linebuf, 0, LINEBUF_SIZE);
 		render_border_garbage(
 			context,
@@ -2831,9 +3184,11 @@
 			context->tmp_buf_a, context->buf_a_off,
 			context->col_1
 		);
+		context->flags &= ~FLAG_MASKED;
 		render_sprite_cells(context);
 		CHECK_LIMIT
 	case 132:
+		OUTPUT_PIXEL(132)
 		render_border_garbage(
 			context,
 			context->sprite_draw_list[context->cur_slot].address,
@@ -2860,6 +3215,13 @@
 	uint16_t address;
 	uint32_t mask;
 	uint32_t const slot_cycles = MCLKS_SLOT_H32;
+	uint8_t bgindex = 0x10 | (context->regs[REG_BG_COLOR] & 0xF) + MODE4_OFFSET;
+	uint8_t test_layer = context->test_port >> 7 & 3;
+	if (!context->output) {
+		//This shouldn't happen normally, but it can theoretically
+		//happen when doing border busting
+		context->output = dummy_buffer;
+	}
 	switch(context->hslot)
 	{
 	for (;;)
@@ -2909,13 +3271,6 @@
 		CHECK_LIMIT
 	case 0: {
 		scan_sprite_table_mode4(context);
-		uint32_t *dst = context->output;;
-		uint32_t bgcolor = context->colors[0x10 | (context->regs[REG_BG_COLOR] & 0xF) + MODE4_OFFSET];
-		for (int i = 0; i < BORDER_LEFT-8; i++, dst++)
-		{
-			*dst = bgcolor;
-		}
-		context->done_output = dst;
 		CHECK_LIMIT
 	}
 	case 1:
@@ -2931,13 +3286,6 @@
 		scan_sprite_table_mode4(context);
 		context->buf_a_off = 8;
 		memset(context->tmp_buf_a, 0, 8);
-		uint32_t *dst = context->output + BORDER_LEFT - 8;
-		uint32_t bgcolor = context->colors[0x10 | (context->regs[REG_BG_COLOR] & 0xF) + MODE4_OFFSET];
-		for (int i = 0; i < 8; i++, dst++)
-		{
-			*dst = bgcolor;
-		}
-		context->done_output = dst;
 		CHECK_LIMIT
 	}
 	COLUMN_RENDER_BLOCK_MODE4(0, 5)
@@ -2973,27 +3321,24 @@
 	COLUMN_RENDER_BLOCK_MODE4(30, 125)
 	COLUMN_RENDER_BLOCK_MODE4(31, 129)
 	case 133:
+		OUTPUT_PIXEL_MODE4(133)
 		external_slot(context);
 		CHECK_LIMIT
 	case 134:
+		OUTPUT_PIXEL_MODE4(134)
 		external_slot(context);
 		CHECK_LIMIT
 	case 135:
+		OUTPUT_PIXEL_MODE4(135)
 		external_slot(context);
 		CHECK_LIMIT
 	case 136: {
+		OUTPUT_PIXEL_MODE4(136)
 		external_slot(context);
 		//set things up for sprite rendering in the next slot
 		memset(context->linebuf, 0, LINEBUF_SIZE);
 		context->cur_slot = context->sprite_index = MAX_DRAWS_H32_MODE4-1;
 		context->sprite_draws = MAX_DRAWS_H32_MODE4;
-		uint32_t *dst = context->output + BORDER_LEFT + 256;
-		uint32_t bgcolor = context->colors[0x10 | (context->regs[REG_BG_COLOR] & 0xF) + MODE4_OFFSET];
-		for (int i = 0; i < 8; i++, dst++)
-		{
-			*dst = bgcolor;
-		}
-		context->done_output = dst;
 		CHECK_LIMIT
 	}}
 	default:
@@ -3008,7 +3353,7 @@
 	if (context->hslot > max_slot) {
 		return;
 	}
-	uint32_t *dst = context->output + (context->hslot >> 3) * SCROLL_BUFFER_DRAW;
+	uint8_t *dst = context->compositebuf + (context->hslot >> 3) * SCROLL_BUFFER_DRAW;
 	int32_t len;
 	uint32_t src_off;
 	if (context->hslot) {
@@ -3019,27 +3364,28 @@
 		src_off = SCROLL_BUFFER_DRAW - BORDER_LEFT;
 		len = BORDER_LEFT;
 	}
-	uint8_t *src;
+	uint8_t *src = NULL;
 	if (test_layer == 2) {
 		//plane A
-		src_off += context->buf_a_off + context->hscroll_a;
+		src_off += context->buf_a_off - (context->hscroll_a & 0xF);
 		src = context->tmp_buf_a;
 	} else if (test_layer == 3){
 		//plane B
-		src_off += context->buf_b_off + context->hscroll_b;
+		src_off += context->buf_b_off - (context->hscroll_b & 0xF);
 		src = context->tmp_buf_b;
 	} else {
 		//sprite layer
+		memset(dst, 0, len);
+		dst += len;
+		len = 0;
+	}
+	if (src) {
 		for (; len >=0; len--, dst++, src_off++)
 		{
-			*dst = context->colors[0];
+			*dst = src[src_off & SCROLL_BUFFER_MASK] & 0x3F;
 		}
 	}
-	for (; len >=0; len--, dst++, src_off++)
-	{
-		*dst = context->colors[src[src_off & SCROLL_BUFFER_MASK] & 0x3F];
-	}
-	context->done_output = dst;
+	context->done_composite = dst;
 	context->buf_a_off = (context->buf_a_off + SCROLL_BUFFER_DRAW) & SCROLL_BUFFER_DRAW;
 	context->buf_b_off = (context->buf_b_off + SCROLL_BUFFER_DRAW) & SCROLL_BUFFER_DRAW;
 }
@@ -3049,6 +3395,8 @@
 	//technically the second hcounter check should be different for H40, but this is probably close enough for now
 	if (context->state == ACTIVE && context->vcounter == context->inactive_start && (context->hslot >= (is_h40 ? 167 : 135) || context->hslot < 133)) {
 		context->state = INACTIVE;
+		context->cur_slot = MAX_SPRITES_LINE-1;
+		context->sprite_x_offset = 0;
 	}
 }
 
@@ -3057,7 +3405,6 @@
 	uint8_t buf_clear_slot, index_reset_slot, bg_end_slot, vint_slot, line_change, jump_start, jump_dest, latch_slot;
 	uint8_t index_reset_value, max_draws, max_sprites;
 	uint16_t vint_line, active_line;
-	uint32_t bg_color;
 	
 	if (mode_5) {
 		if (is_h40) {
@@ -3065,7 +3412,7 @@
 			buf_clear_slot = 163;
 			index_reset_slot = 167;
 			bg_end_slot = BG_START_SLOT + LINEBUF_SIZE/2;
-			max_draws = MAX_DRAWS-1;
+			max_draws = MAX_SPRITES_LINE-1;
 			max_sprites = MAX_SPRITES_LINE;
 			index_reset_value = 0x80;
 			vint_slot = VINT_SLOT_H40;
@@ -3074,7 +3421,7 @@
 			jump_dest = 229;
 		} else {
 			bg_end_slot = BG_START_SLOT + (256+HORIZ_BORDER)/2;
-			max_draws = MAX_DRAWS_H32-1;
+			max_draws = MAX_SPRITES_LINE_H32-1;
 			max_sprites = MAX_SPRITES_LINE_H32;
 			buf_clear_slot = 128;
 			index_reset_slot = 132;
@@ -3101,7 +3448,6 @@
 		vint_line = context->inactive_start + 1;
 		vint_slot = VINT_SLOT_MODE4;
 		line_change = LINE_CHANGE_MODE4;
-		bg_color = render_map_color(0, 0, 0);
 		jump_start = 147;
 		jump_dest = 233;
 		if (context->regs[REG_MODE_1] & BIT_MODE_4) {
@@ -3113,38 +3459,19 @@
 	}
 	uint32_t *dst;
 	uint8_t *debug_dst;
-	if (
-		(
-			context->vcounter < context->inactive_start + context->border_bot 
-			|| context->vcounter >= 0x200 - context->border_top
-		) && context->hslot >= BG_START_SLOT && context->hslot < bg_end_slot
-	) {
+	if (context->output && context->hslot >= BG_START_SLOT && context->hslot < bg_end_slot) {
 		dst = context->output + 2 * (context->hslot - BG_START_SLOT);
 		debug_dst = context->layer_debug_buf + 2 * (context->hslot - BG_START_SLOT);
 	} else {
 		dst = NULL;
 	}
 		
-	if (
-		!dst && context->vcounter == context->inactive_start + context->border_bot
-		&& context->hslot >= line_change  && context->hslot < bg_end_slot
-	) {
-		dst = context->output + 2 * (context->hslot - BG_START_SLOT);
-		debug_dst = context->layer_debug_buf + 2 * (context->hslot - BG_START_SLOT);
-	}
-		
 	uint8_t test_layer = context->test_port >> 7 & 3;
-	if (test_layer) {
-		dst = NULL;
-	}
 	
 	while(context->cycles < target_cycles)
 	{
 		check_switch_inactive(context, is_h40);
-		if (context->hslot == BG_START_SLOT && !test_layer && (
-			context->vcounter < context->inactive_start + context->border_bot 
-			|| context->vcounter >= 0x200 - context->border_top
-		)) {
+		if (context->hslot == BG_START_SLOT && context->output) {
 			dst = context->output + (context->hslot - BG_START_SLOT) * 2;
 			debug_dst = context->layer_debug_buf + 2 * (context->hslot - BG_START_SLOT);
 		} else if (context->hslot == bg_end_slot) {
@@ -3168,6 +3495,8 @@
 				break;
 			case 0:
 				render_border_garbage(context, context->serial_address, context->tmp_buf_b, context->buf_b_off+8, context->col_2);
+				break;
+			case 1:
 				inactive_test_output(context, is_h40, test_layer);
 				break;
 			}
@@ -3200,30 +3529,61 @@
 		}
 		
 		if (dst) {
+			uint8_t bg_index;
+			uint32_t bg_color;
 			if (mode_5) {
-				bg_color = context->colors[context->regs[REG_BG_COLOR] & 0x3F];
+				bg_index = context->regs[REG_BG_COLOR] & 0x3F;
+				bg_color = context->colors[bg_index];
 			} else if (context->regs[REG_MODE_1] & BIT_MODE_4) {
-				bg_color = context->colors[MODE4_OFFSET + 0x10 + (context->regs[REG_BG_COLOR] & 0xF)];
+				bg_index = 0x10 + (context->regs[REG_BG_COLOR] & 0xF);
+				bg_color = context->colors[MODE4_OFFSET + bg_index];
+			} else {
+				bg_color = render_map_color(0, 0, 0);
 			}
-			if (dst >= context->done_output) {
+			if (context->done_composite) {
+				uint8_t pixel = context->compositebuf[dst-context->output];
+				if (!(pixel & 0x3F | test_layer)) {
+					pixel = pixel & 0xC0 | bg_index;
+				}
+				*(dst++) = context->colors[pixel];
+				if ((dst - context->output) == (context->done_composite - context->compositebuf)) {
+					context->done_composite = NULL;
+					memset(context->compositebuf, 0, sizeof(context->compositebuf));
+				}
+			} else {
 				*(dst++) = bg_color;
 				*(debug_dst++) = DBG_SRC_BG;
+			}
+			if (context->done_composite) {
+				uint8_t pixel = context->compositebuf[dst-context->output];
+				if (!(pixel & 0x3F | test_layer)) {
+					pixel = pixel & 0xC0 | bg_index;
+				}
+				*(dst++) = context->colors[pixel];
+				if ((dst - context->output) == (context->done_composite - context->compositebuf)) {
+					context->done_composite = NULL;
+					memset(context->compositebuf, 0, sizeof(context->compositebuf));
+				}
 			} else {
-				dst++;
-				debug_dst++;
-			}
-			if (dst >= context->done_output) {
 				*(dst++) = bg_color;
 				*(debug_dst++) = DBG_SRC_BG;
-				context->done_output = dst;
-			} else {
-				dst++;
-				debug_dst++;
 			}
+			
 			if (context->hslot == (bg_end_slot-1)) {
-				*(dst++) = bg_color;
-				*(debug_dst++) = DBG_SRC_BG;
-				context->done_output = dst;
+				if (context->done_composite) {
+					uint8_t pixel = context->compositebuf[dst-context->output];
+					if (!(pixel & 0x3F | test_layer)) {
+						pixel = pixel & 0xC0 | bg_index;
+					}
+					*(dst++) = context->colors[pixel];
+					if ((dst - context->output) == (context->done_composite - context->compositebuf)) {
+						context->done_composite = NULL;
+						memset(context->compositebuf, 0, sizeof(context->compositebuf));
+					}
+				} else {
+					*(dst++) = bg_color;
+					*(debug_dst++) = DBG_SRC_BG;
+				}
 			}
 		}
 		
@@ -3368,6 +3728,19 @@
 	return hv;
 }
 
+static void clear_pending(vdp_context *context)
+{
+	context->flags &= ~FLAG_PENDING;
+	context->address = context->address_latch;
+	//It seems like the DMA enable bit doesn't so much enable DMA so much 
+	//as it enables changing CD5 from control port writes
+	if (context->regs[REG_MODE_2] & BIT_DMA_ENABLE) {
+		context->cd = context->cd_latch;
+	} else {
+		context->cd = (context->cd & 0x20) | (context->cd_latch & 0x1F);
+	}
+}
+
 int vdp_control_port_write(vdp_context * context, uint16_t value)
 {
 	//printf("control port write: %X at %d\n", value, context->cycles);
@@ -3375,12 +3748,9 @@
 		return -1;
 	}
 	if (context->flags & FLAG_PENDING) {
-		context->address = (context->address & 0x3FFF) | (value << 14 & 0x1C000);
-		//It seems like the DMA enable bit doesn't so much enable DMA so much 
-		//as it enables changing CD5 from control port writes
-		uint8_t preserve = (context->regs[REG_MODE_2] & BIT_DMA_ENABLE) ? 0x3 : 0x23;
-		context->cd = (context->cd & preserve) | ((value >> 2) & ~preserve & 0xFF);
-		context->flags &= ~FLAG_PENDING;
+		context->address_latch = (context->address_latch & 0x3FFF) | (value << 14 & 0x1C000);
+		context->cd_latch = (context->cd_latch & 0x3) | ((value >> 2) & ~0x3 & 0xFF);
+		clear_pending(context);
 		//Should these be taken care of here or after the first write?
 		context->flags &= ~FLAG_READ_FETCHED;
 		context->flags2 &= ~FLAG2_READ_PENDING;
@@ -3409,11 +3779,15 @@
 		}
 	} else {
 		uint8_t mode_5 = context->regs[REG_MODE_2] & BIT_MODE_5;
-		context->address = (context->address &0xC000) | (value & 0x3FFF);
-		context->cd = (context->cd & 0x3C) | (value >> 14);
+		context->address_latch = (context->address_latch & 0x1C000) | (value & 0x3FFF);
+		context->cd_latch = (context->cd_latch & 0x3C) | (value >> 14);
 		if ((value & 0xC000) == 0x8000) {
 			//Register write
 			uint8_t reg = (value >> 8) & 0x1F;
+			// The fact that this is needed seems to pour some cold water on my theory
+			// about how the address latch actually works. Needs more search to definitively confirm
+			context->address = (context->address & 0x1C000) | (value & 0x3FFF);
+			context->cd = (context->cd & 0x3C) | (value >> 14);
 			if (reg < (mode_5 ? VDP_REGS : 0xB)) {
 				//printf("register %d set to %X\n", reg, value & 0xFF);
 				if (reg == REG_MODE_1 && (value & BIT_HVC_LATCH) && !(context->regs[reg] & BIT_HVC_LATCH)) {
@@ -3425,6 +3799,8 @@
 				/*if (reg == REG_MODE_4 && ((value ^ context->regs[reg]) & BIT_H40)) {
 					printf("Mode changed from H%d to H%d @ %d, frame: %d\n", context->regs[reg] & BIT_H40 ? 40 : 32, value & BIT_H40 ? 40 : 32, context->cycles, context->frame);
 				}*/
+				uint8_t buffer[2] = {reg, value};
+				event_log(EVENT_VDP_REG, context->cycles, sizeof(buffer), buffer);
 				context->regs[reg] = value;
 				if (reg == REG_MODE_4) {
 					context->double_res = (value & (BIT_INTERLACE | BIT_DOUBLE_RES)) == (BIT_INTERLACE | BIT_DOUBLE_RES);
@@ -3435,6 +3811,43 @@
 				if (reg == REG_MODE_1 || reg == REG_MODE_2 || reg == REG_MODE_4) {
 					update_video_params(context);
 				}
+			} else if (reg == REG_KMOD_CTRL) {
+				if (!(value & 0xFF)) {
+					context->system->enter_debugger = 1;
+				}
+			} else if (reg == REG_KMOD_MSG) {
+				char c = value;
+				if (c) {
+					context->kmod_buffer_length++;
+					if ((context->kmod_buffer_length + 1) > context->kmod_buffer_storage) {
+						context->kmod_buffer_storage = context->kmod_buffer_length ? 128 : context->kmod_buffer_length * 2;
+						context->kmod_msg_buffer = realloc(context->kmod_msg_buffer, context->kmod_buffer_storage);
+					}
+					context->kmod_msg_buffer[context->kmod_buffer_length - 1] = c;
+				} else if (context->kmod_buffer_length) {
+					context->kmod_msg_buffer[context->kmod_buffer_length] = 0;
+					if (is_stdout_enabled()) {
+						init_terminal();
+						printf("KDEBUG MESSAGE: %s\n", context->kmod_msg_buffer);
+					} else {
+						// GDB remote debugging is enabled, use stderr instead
+						fprintf(stderr, "KDEBUG MESSAGE: %s\n", context->kmod_msg_buffer);
+					}
+					context->kmod_buffer_length = 0;
+				}
+			} else if (reg == REG_KMOD_TIMER) {
+				if (!(value & 0x80)) {
+					if (is_stdout_enabled()) {
+						init_terminal();
+						printf("KDEBUG TIMER: %d\n", (context->cycles - context->timer_start_cycle) / 7);
+					} else {
+						// GDB remote debugging is enabled, use stderr instead
+						fprintf(stderr, "KDEBUG TIMER: %d\n", (context->cycles - context->timer_start_cycle) / 7);
+					}
+				}
+				if (value & 0xC0) {
+					context->timer_start_cycle = context->cycles;
+				}
 			}
 		} else if (mode_5) {
 			context->flags |= FLAG_PENDING;
@@ -3442,6 +3855,7 @@
 			//context->flags &= ~FLAG_READ_FETCHED;
 			//context->flags2 &= ~FLAG2_READ_PENDING;
 		} else {
+			clear_pending(context);
 			context->flags &= ~FLAG_READ_FETCHED;
 			context->flags2 &= ~FLAG2_READ_PENDING;
 		}
@@ -3472,7 +3886,7 @@
 		return -1;
 	}
 	if (context->flags & FLAG_PENDING) {
-		context->flags &= ~FLAG_PENDING;
+		clear_pending(context);
 		//Should these be cleared here?
 		context->flags &= ~FLAG_READ_FETCHED;
 		context->flags2 &= ~FLAG2_READ_PENDING;
@@ -3507,7 +3921,7 @@
 void vdp_data_port_write_pbc(vdp_context * context, uint8_t value)
 {
 	if (context->flags & FLAG_PENDING) {
-		context->flags &= ~FLAG_PENDING;
+		clear_pending(context);
 		//Should these be cleared here?
 		context->flags &= ~FLAG_READ_FETCHED;
 		context->flags2 &= ~FLAG2_READ_PENDING;
@@ -3546,7 +3960,9 @@
 
 uint16_t vdp_control_port_read(vdp_context * context)
 {
-	context->flags &= ~FLAG_PENDING;
+	if (context->flags & FLAG_PENDING) {
+		clear_pending(context);
+	}
 	context->flags2 &= ~FLAG2_BYTE_PENDING;
 	//Bits 15-10 are not fixed like Charles MacDonald's doc suggests, but instead open bus values that reflect 68K prefetch
 	uint16_t value = context->system->get_open_bus_value(context->system) & 0xFC00;
@@ -3596,13 +4012,27 @@
 uint16_t vdp_data_port_read(vdp_context * context)
 {
 	if (context->flags & FLAG_PENDING) {
-		context->flags &= ~FLAG_PENDING;
+		clear_pending(context);
 		//Should these be cleared here?
 		context->flags &= ~FLAG_READ_FETCHED;
 		context->flags2 &= ~FLAG2_READ_PENDING;
 	}
 	if (context->cd & 1) {
 		warning("Read from VDP data port while writes are configured, CPU is now frozen. VDP Address: %X, CD: %X\n", context->address, context->cd);
+		context->system->enter_debugger = 1;
+		return context->prefetch;
+	}
+	switch (context->cd)
+	{
+	case VRAM_READ:
+	case VSRAM_READ:
+	case CRAM_READ:
+	case VRAM_READ8:
+		break;
+	default:
+		warning("Read from VDP data port with invalid source, CPU is now frozen. VDP Address: %X, CD: %X\n", context->address, context->cd);
+		context->system->enter_debugger = 1;
+		return context->prefetch;
 	}
 	while (!(context->flags & FLAG_READ_FETCHED)) {
 		vdp_run_context_full(context, context->cycles + ((context->regs[REG_MODE_4] & BIT_H40) ? 16 : 20));
@@ -3613,19 +4043,16 @@
 
 uint8_t vdp_data_port_read_pbc(vdp_context * context)
 {
-	context->flags &= ~(FLAG_PENDING | FLAG_READ_FETCHED);
+	if (context->flags & FLAG_PENDING) {
+		clear_pending(context);
+	}
+	context->flags &= ~FLAG_READ_FETCHED;
 	context->flags2 &= ~FLAG2_BYTE_PENDING;
 		
 	context->cd = VRAM_READ8;
 	return context->prefetch;
 }
 
-uint16_t vdp_test_port_read(vdp_context * context)
-{
-	//TODO: Find out what actually gets returned here
-	return context->test_port;
-}
-
 void vdp_adjust_cycles(vdp_context * context, uint32_t deduction)
 {
 	context->cycles -= deduction;
@@ -3918,12 +4345,14 @@
 	}
 }
 
+#define VDP_STATE_VERSION 3
 void vdp_serialize(vdp_context *context, serialize_buffer *buf)
 {
+	save_int8(buf, VDP_STATE_VERSION);
 	save_int8(buf, VRAM_SIZE / 1024);//VRAM size in KB, needed for future proofing
 	save_buffer8(buf, context->vdpmem, VRAM_SIZE);
 	save_buffer16(buf, context->cram, CRAM_SIZE);
-	save_buffer16(buf, context->vsram, VSRAM_SIZE);
+	save_buffer16(buf, context->vsram, MAX_VSRAM_SIZE);
 	save_buffer8(buf, context->sat_cache, SAT_CACHE_SIZE);
 	for (int i = 0; i <= REG_DMASRC_H; i++)
 	{
@@ -3974,13 +4403,15 @@
 	save_int8(buf, context->sprite_draws);
 	save_int8(buf, context->slot_counter);
 	save_int8(buf, context->cur_slot);
-	for (int i = 0; i < MAX_DRAWS; i++)
+	for (int i = 0; i < MAX_SPRITES_LINE; i++)
 	{
 		sprite_draw *draw = context->sprite_draw_list + i;
 		save_int16(buf, draw->address);
 		save_int16(buf, draw->x_pos);
 		save_int8(buf, draw->pal_priority);
 		save_int8(buf, draw->h_flip);
+		save_int8(buf, draw->width);
+		save_int8(buf, draw->height);
 	}
 	for (int i = 0; i < MAX_SPRITES_LINE; i++)
 	{
@@ -3994,12 +4425,24 @@
 	save_int32(buf, context->cycles);
 	save_int32(buf, context->pending_vint_start);
 	save_int32(buf, context->pending_hint_start);
+	save_int32(buf, context->address_latch);
+	save_int8(buf, context->cd_latch);
 }
 
 void vdp_deserialize(deserialize_buffer *buf, void *vcontext)
 {
 	vdp_context *context = vcontext;
-	uint8_t vramk = load_int8(buf);
+	uint8_t version = load_int8(buf);
+	uint8_t vramk;
+	if (version == 64) {
+		vramk = version;
+		version = 0;
+	} else {
+		vramk = load_int8(buf);
+	}
+	if (version > VDP_STATE_VERSION) {
+		warning("Save state has VDP version %d, but this build only understands versions %d and lower", version, VDP_STATE_VERSION);
+	}
 	load_buffer8(buf, context->vdpmem, (vramk * 1024) <= VRAM_SIZE ? vramk * 1024 : VRAM_SIZE);
 	if ((vramk * 1024) > VRAM_SIZE) {
 		buf->cur_pos += (vramk * 1024) - VRAM_SIZE;
@@ -4009,7 +4452,7 @@
 	{
 		update_color_map(context, i, context->cram[i]);
 	}
-	load_buffer16(buf, context->vsram, VSRAM_SIZE);
+	load_buffer16(buf, context->vsram, version > 1 ? MAX_VSRAM_SIZE : MIN_VSRAM_SIZE);
 	load_buffer8(buf, context->sat_cache, SAT_CACHE_SIZE);
 	for (int i = 0; i <= REG_DMASRC_H; i++)
 	{
@@ -4061,13 +4504,50 @@
 	context->sprite_draws = load_int8(buf);
 	context->slot_counter = load_int8(buf);
 	context->cur_slot = load_int8(buf);
-	for (int i = 0; i < MAX_DRAWS; i++)
-	{
-		sprite_draw *draw = context->sprite_draw_list + i;
-		draw->address = load_int16(buf);
-		draw->x_pos = load_int16(buf);
-		draw->pal_priority = load_int8(buf);
-		draw->h_flip = load_int8(buf);
+	if (version == 0) {
+		int cur_draw = 0;
+		for (int i = 0; i < MAX_SPRITES_LINE * 2; i++)
+		{
+			if (cur_draw < MAX_SPRITES_LINE) {
+				sprite_draw *last = cur_draw ? context->sprite_draw_list + cur_draw - 1 : NULL;
+				sprite_draw *draw = context->sprite_draw_list + cur_draw++;
+				draw->address = load_int16(buf);
+				draw->x_pos = load_int16(buf);
+				draw->pal_priority = load_int8(buf);
+				draw->h_flip = load_int8(buf);
+				draw->width = 1;
+				draw->height = 8;
+				
+				if (last && last->width < 4 && last->h_flip == draw->h_flip && last->pal_priority == draw->pal_priority) {
+					int adjust_x = draw->x_pos + draw->h_flip ? -8 : 8;
+					int height = draw->address - last->address /4;
+					if (last->x_pos == adjust_x && (
+						(last->width > 1 && height == last->height) || 
+						(last->width == 1 && (height == 8 || height == 16 || height == 24 || height == 32))
+					)) {
+						//current draw appears to be part of the same sprite as the last one, combine it
+						cur_draw--;
+						last->width++;
+					}
+				}
+			} else {
+				load_int16(buf);
+				load_int16(buf);
+				load_int8(buf);
+				load_int8(buf);
+			}
+		}
+	} else {
+		for (int i = 0; i < MAX_SPRITES_LINE; i++)
+		{
+			sprite_draw *draw = context->sprite_draw_list + i;
+			draw->address = load_int16(buf);
+			draw->x_pos = load_int16(buf);
+			draw->pal_priority = load_int8(buf);
+			draw->h_flip = load_int8(buf);
+			draw->width = load_int8(buf);
+			draw->height = load_int8(buf);
+		}
 	}
 	for (int i = 0; i < MAX_SPRITES_LINE; i++)
 	{
@@ -4081,6 +4561,13 @@
 	context->cycles = load_int32(buf);
 	context->pending_vint_start = load_int32(buf);
 	context->pending_hint_start = load_int32(buf);
+	if (version > 2) {
+		context->address_latch = load_int32(buf);
+		context->cd_latch = load_int8(buf);
+	} else {
+		context->address_latch = context->address;
+		context->cd_latch = context->cd;
+	}
 	update_video_params(context);
 }
 
@@ -4157,3 +4644,85 @@
 		}
 	}
 }
+
+void vdp_replay_event(vdp_context *context, uint8_t event, event_reader *reader)
+{
+	uint32_t address;
+	deserialize_buffer *buffer = &reader->buffer;
+	switch (event)
+	{
+	case EVENT_VRAM_BYTE:
+		reader_ensure_data(reader, 3);
+		address = load_int16(buffer);
+		break;
+	case EVENT_VRAM_BYTE_DELTA:
+		reader_ensure_data(reader, 2);
+		address = reader->last_byte_address + load_int8(buffer);
+		break;
+	case EVENT_VRAM_BYTE_ONE:
+		reader_ensure_data(reader, 1);
+		address = reader->last_byte_address + 1;
+		break;
+	case EVENT_VRAM_BYTE_AUTO:
+		reader_ensure_data(reader, 1);
+		address = reader->last_byte_address + context->regs[REG_AUTOINC];
+		break;
+	case EVENT_VRAM_WORD:
+		reader_ensure_data(reader, 4);
+		address = load_int8(buffer) << 16;
+		address |= load_int16(buffer);
+		break;
+	case EVENT_VRAM_WORD_DELTA:
+		reader_ensure_data(reader, 3);
+		address = reader->last_word_address + load_int8(buffer);
+		break;
+	case EVENT_VDP_REG:
+	case EVENT_VDP_INTRAM:
+		reader_ensure_data(reader, event == EVENT_VDP_REG ? 2 : 3);
+		address = load_int8(buffer);
+		break;
+	}
+	
+	switch (event)
+	{
+	case EVENT_VDP_REG: {
+		uint8_t value = load_int8(buffer);
+		context->regs[address] = value;
+		if (address == REG_MODE_4) {
+			context->double_res = (value & (BIT_INTERLACE | BIT_DOUBLE_RES)) == (BIT_INTERLACE | BIT_DOUBLE_RES);
+			if (!context->double_res) {
+				context->flags2 &= ~FLAG2_EVEN_FIELD;
+			}
+		}
+		if (address == REG_MODE_1 || address == REG_MODE_2 || address == REG_MODE_4) {
+			update_video_params(context);
+		}
+		break;
+	}
+	case EVENT_VRAM_BYTE:
+	case EVENT_VRAM_BYTE_DELTA:
+	case EVENT_VRAM_BYTE_ONE:
+	case EVENT_VRAM_BYTE_AUTO: {
+		uint8_t byte = load_int8(buffer);
+		reader->last_byte_address = address;
+		vdp_check_update_sat_byte(context, address ^ 1, byte);
+		write_vram_byte(context, address ^ 1, byte);
+		break;
+	}
+	case EVENT_VRAM_WORD:
+	case EVENT_VRAM_WORD_DELTA: {
+		uint16_t value = load_int16(buffer);
+		reader->last_word_address = address;
+		vdp_check_update_sat(context, address, value);
+		write_vram_word(context, address, value);
+		break;
+	}
+	case EVENT_VDP_INTRAM:
+		if (address < 128) {
+			write_cram(context, address, load_int16(buffer));
+		} else {
+			context->vsram[address&63] = load_int16(buffer);
+		}
+		break;
+	}
+}
--- a/vdp.h	Sat Jan 05 00:58:08 2019 -0800
+++ b/vdp.h	Sat Jan 15 13:15:21 2022 -0800
@@ -16,7 +16,8 @@
 #define SHADOW_OFFSET CRAM_SIZE
 #define HIGHLIGHT_OFFSET (SHADOW_OFFSET+CRAM_SIZE)
 #define MODE4_OFFSET (HIGHLIGHT_OFFSET+CRAM_SIZE)
-#define VSRAM_SIZE 40
+#define MIN_VSRAM_SIZE 40
+#define MAX_VSRAM_SIZE 64
 #define VRAM_SIZE (64*1024)
 #define BORDER_LEFT 13
 #define BORDER_RIGHT 14
@@ -24,8 +25,6 @@
 #define LINEBUF_SIZE (320+HORIZ_BORDER) //H40 + full border
 #define SCROLL_BUFFER_SIZE 32
 #define BORDER_BOTTOM 13 //TODO: Replace with actual value
-#define MAX_DRAWS 40
-#define MAX_DRAWS_H32 32
 #define MAX_DRAWS_H32_MODE4 8
 #define MAX_SPRITES_LINE 20
 #define MAX_SPRITES_LINE_H32 16
@@ -55,7 +54,7 @@
 #define FLAG_PENDING       0x10
 #define FLAG_READ_FETCHED  0x20
 #define FLAG_DMA_RUN       0x40
-#define FLAG_DMA_PROG      0x80
+#define FLAG_WINDOW_EDGE   0x80
 
 #define FLAG2_VINT_PENDING   0x01
 #define FLAG2_HINT_PENDING   0x02
@@ -92,8 +91,11 @@
 	REG_DMALEN_H,
 	REG_DMASRC_L,
 	REG_DMASRC_M,
-	REG_DMASRC_H
-} vdp_regs;
+	REG_DMASRC_H,
+	REG_KMOD_CTRL=29,
+	REG_KMOD_MSG,
+	REG_KMOD_TIMER
+};
 
 //Mode reg 1
 #define BIT_VSCRL_LOCK 0x80
@@ -116,7 +118,7 @@
 #define BIT_SPRITE_SZ  0x02
 
 //Mode reg 3
-#define BIT_EINT_EN    0x10
+#define BIT_EINT_EN    0x08
 #define BIT_VSCROLL    0x04
 
 //Mode reg 4
@@ -133,6 +135,8 @@
 	int16_t x_pos;
 	uint8_t pal_priority;
 	uint8_t h_flip;
+	uint8_t width;
+	uint8_t height;
 } sprite_draw;
 
 typedef struct {
@@ -163,39 +167,49 @@
 	system_header  *system;
 	//pointer to current line in framebuffer
 	uint32_t       *output;
-	uint32_t       *done_output;
 	//pointer to current framebuffer
 	uint32_t       *fb;
+	uint8_t        *done_composite;
 	uint32_t       *debug_fbs[VDP_NUM_DEBUG_TYPES];
+	char           *kmod_msg_buffer;
+	uint32_t       kmod_buffer_storage;
+	uint32_t       kmod_buffer_length;
+	uint32_t       timer_start_cycle;
 	uint32_t       output_pitch;
 	uint32_t       debug_fb_pitch[VDP_NUM_DEBUG_TYPES];
 	fifo_entry     fifo[FIFO_SIZE];
 	int32_t        fifo_write;
 	int32_t        fifo_read;
 	uint32_t       address;
+	uint32_t       address_latch;
 	uint32_t       serial_address;
 	uint32_t       colors[CRAM_SIZE*4];
 	uint32_t       debugcolors[1 << (3 + 1 + 1 + 1)];//3 bits for source, 1 bit for priority, 1 bit for shadow, 1 bit for hilight
 	uint16_t       cram[CRAM_SIZE];
 	uint32_t       frame;
+	uint32_t       vsram_size;
 	uint8_t        cd;
+	uint8_t        cd_latch;
 	uint8_t	       flags;
 	uint8_t        regs[VDP_REGS];
 	//cycle count in MCLKs
 	uint32_t       cycles;
 	uint32_t       pending_vint_start;
 	uint32_t       pending_hint_start;
-	uint16_t       vsram[VSRAM_SIZE];
+	uint32_t       top_offset;
+	uint16_t       vsram[MAX_VSRAM_SIZE];
 	uint16_t       vscroll_latch[2];
 	uint16_t       vcounter;
 	uint16_t       inactive_start;
 	uint16_t       border_top;
 	uint16_t       border_bot;
 	uint16_t       hscroll_a;
+	uint16_t       hscroll_a_fine;
 	uint16_t       hscroll_b;
+	uint16_t       hscroll_b_fine;
 	uint16_t       h40_lines;
 	uint16_t       output_lines;
-	sprite_draw    sprite_draw_list[MAX_DRAWS];
+	sprite_draw    sprite_draw_list[MAX_SPRITES_LINE];
 	sprite_info    sprite_info_list[MAX_SPRITES_LINE];
 	uint8_t        sat_cache[SAT_CACHE_SIZE];
 	uint16_t       col_1;
@@ -205,12 +219,14 @@
 	uint16_t       test_port;
 	//stores 2-bit palette + 4-bit palette index + priority for current sprite line
 	uint8_t        linebuf[LINEBUF_SIZE];
+	uint8_t        compositebuf[LINEBUF_SIZE];
 	uint8_t        layer_debug_buf[LINEBUF_SIZE];
 	uint8_t        hslot; //hcounter/2
 	uint8_t	       sprite_index;
 	uint8_t        sprite_draws;
 	int8_t         slot_counter;
 	int8_t         cur_slot;
+	uint8_t        sprite_x_offset;
 	uint8_t        max_sprites_frame;
 	uint8_t        max_sprites_line;
 	uint8_t        fetch_tmp[2];
@@ -228,12 +244,13 @@
 	uint8_t        enabled_debuggers;
 	uint8_t        debug_fb_indices[VDP_NUM_DEBUG_TYPES];
 	uint8_t        debug_modes[VDP_NUM_DEBUG_TYPES];
+	uint8_t        pushed_frame;
 	uint8_t        vdpmem[];
 } vdp_context;
 
 
 
-vdp_context *init_vdp_context(uint8_t region_pal);
+vdp_context *init_vdp_context(uint8_t region_pal, uint8_t has_max_vsram);
 void vdp_free(vdp_context *context);
 void vdp_run_context_full(vdp_context * context, uint32_t target_cycles);
 void vdp_run_context(vdp_context * context, uint32_t target_cycles);
@@ -253,7 +270,6 @@
 uint8_t vdp_data_port_read_pbc(vdp_context * context);
 void vdp_latch_hv(vdp_context *context);
 uint16_t vdp_hv_counter_read(vdp_context * context);
-uint16_t vdp_test_port_read(vdp_context * context);
 void vdp_adjust_cycles(vdp_context * context, uint32_t deduction);
 uint32_t vdp_next_hint(vdp_context * context);
 uint32_t vdp_next_vint(vdp_context * context);
@@ -276,5 +292,6 @@
 void vdp_inc_debug_mode(vdp_context *context);
 //to be implemented by the host system
 uint16_t read_dma_value(uint32_t address);
+void vdp_replay_event(vdp_context *context, uint8_t event, event_reader *reader);
 
 #endif //VDP_H_
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/vgm.c	Sat Jan 15 13:15:21 2022 -0800
@@ -0,0 +1,133 @@
+#include <stdlib.h>
+#include <string.h>
+#include <stddef.h>
+#include "vgm.h"
+
+vgm_writer *vgm_write_open(char *filename, uint32_t rate, uint32_t clock, uint32_t cycle)
+{
+	FILE *f = fopen(filename, "wb");
+	if (!f) {
+		return NULL;
+	}
+	vgm_writer *writer = calloc(sizeof(vgm_writer), 1);
+	memcpy(writer->header.ident, "Vgm ", 4);
+	writer->header.version = 0x150;
+	writer->header.data_offset = sizeof(writer->header) - offsetof(vgm_header, data_offset);
+	writer->header.rate = rate;
+	writer->f = f;
+	if (1 != fwrite(&writer->header, sizeof(writer->header), 1, f)) {
+		free(writer);
+		fclose(f);
+		return NULL;
+	}
+	writer->master_clock = clock;
+	writer->last_cycle = cycle;
+	
+	return writer;
+}
+
+void vgm_sn76489_init(vgm_writer *writer, uint32_t clock, uint16_t feedback, uint8_t shift_reg_size, uint8_t flags)
+{
+	if (flags && writer->header.version < 0x151) {
+		writer->header.version = 0x151;
+	}
+	writer->header.sn76489_clk = clock,
+	writer->header.sn76489_fb = feedback;
+	writer->header.sn76489_shift = shift_reg_size;
+	writer->header.sn76489_flags = flags;
+}
+
+static void wait_commands(vgm_writer *writer, uint32_t delta)
+{
+	if (!delta) {
+		return;
+	}
+	if (delta <= 0x10) {
+		fputc(CMD_WAIT_SHORT + (delta - 1), writer->f);
+	} else if (delta >= 735 && delta <= (735 + 0x10)) {
+		fputc(CMD_WAIT_60, writer->f);
+		wait_commands(writer, delta - 735);
+	} else if (delta >= 882 && delta <= (882 + 0x10)) {
+		fputc(CMD_WAIT_50, writer->f);
+		wait_commands(writer, delta - 882);
+	} else if (delta > 0xFFFF) {
+		uint8_t cmd[3] = {CMD_WAIT, 0xFF, 0xFF};
+		fwrite(cmd, 1, sizeof(cmd), writer->f);
+		wait_commands(writer, delta - 0xFFFF);
+	} else {
+		uint8_t cmd[3] = {CMD_WAIT, delta, delta >> 8};
+		fwrite(cmd, 1, sizeof(cmd), writer->f);
+	}
+}
+
+#include "util.h"
+static void add_wait(vgm_writer *writer, uint32_t cycle)
+{
+	if (cycle < writer->last_cycle) {
+		//This can happen when a YM-2612 write happens immediately after a PSG write
+		//due to the relatively low granularity of the PSG's internal clock
+		//given that VGM only has a granularity of 44.1 kHz ignoring this is harmless
+		return;
+	}
+	uint64_t last_sample = (uint64_t)writer->last_cycle * (uint64_t)44100;
+	last_sample /= (uint64_t)writer->master_clock;
+	uint64_t sample = ((uint64_t)cycle + (uint64_t)writer->extra_delta) * (uint64_t)44100;
+	sample /= (uint64_t)writer->master_clock;
+	uint32_t delta = sample - last_sample;
+	
+	writer->last_cycle = cycle;
+	writer->extra_delta = 0;
+	writer->header.num_samples += delta;
+	wait_commands(writer, delta);
+}
+
+static uint8_t last_cmd;
+void vgm_sn76489_write(vgm_writer *writer, uint32_t cycle, uint8_t value)
+{
+	add_wait(writer, cycle);
+	uint8_t cmd[2] = {CMD_PSG, value};
+	last_cmd = CMD_PSG;
+	fwrite(cmd, 1, sizeof(cmd), writer->f);
+}
+
+void vgm_ym2612_init(vgm_writer *writer, uint32_t clock)
+{
+	writer->header.ym2612_clk = clock;
+}
+
+void vgm_ym2612_part1_write(vgm_writer *writer, uint32_t cycle, uint8_t reg, uint8_t value)
+{
+	add_wait(writer, cycle);
+	uint8_t cmd[3] = {CMD_YM2612_0, reg, value};
+	last_cmd = CMD_YM2612_0;
+	fwrite(cmd, 1, sizeof(cmd), writer->f);
+}
+
+void vgm_ym2612_part2_write(vgm_writer *writer, uint32_t cycle, uint8_t reg, uint8_t value)
+{
+	add_wait(writer, cycle);
+	uint8_t cmd[3] = {CMD_YM2612_1, reg, value};
+	last_cmd = CMD_YM2612_1;
+	fwrite(cmd, 1, sizeof(cmd), writer->f);
+}
+
+void vgm_adjust_cycles(vgm_writer *writer, uint32_t deduction)
+{
+	if (deduction > writer->last_cycle) {
+		writer->extra_delta += deduction - writer->last_cycle;
+		writer->last_cycle = 0;
+	} else {
+		writer->last_cycle -= deduction;
+	}
+}
+
+void vgm_close(vgm_writer *writer)
+{
+	uint8_t cmd = 0x66;
+	fwrite(&cmd, 1, sizeof(cmd), writer->f);
+	writer->header.eof_offset = ftell(writer->f) - offsetof(vgm_header, eof_offset);
+	fseek(writer->f, SEEK_SET, 0);
+	fwrite(&writer->header, sizeof(writer->header), 1, writer->f);
+	fclose(writer->f);
+	free(writer);
+}
\ No newline at end of file
--- a/vgm.h	Sat Jan 05 00:58:08 2019 -0800
+++ b/vgm.h	Sat Jan 15 13:15:21 2022 -0800
@@ -1,6 +1,9 @@
 #ifndef VGM_H_
 #define VGM_H_
 
+#include <stdint.h>
+#include <stdio.h>
+
 #pragma pack(push, 1)
 typedef struct {
 	char     ident[4];
@@ -71,4 +74,21 @@
 	uint8_t           type;
 } data_block;
 
+typedef struct {
+	vgm_header header;
+	FILE       *f;
+	uint32_t   master_clock;
+	uint32_t   last_cycle;
+	uint32_t   extra_delta;
+} vgm_writer;
+
+vgm_writer *vgm_write_open(char *filename, uint32_t rate, uint32_t clock, uint32_t cycle);
+void vgm_sn76489_init(vgm_writer *writer, uint32_t clock, uint16_t feedback, uint8_t shift_reg_size, uint8_t flags);
+void vgm_sn76489_write(vgm_writer *writer, uint32_t cycle, uint8_t value);
+void vgm_ym2612_init(vgm_writer *writer, uint32_t clock);
+void vgm_ym2612_part1_write(vgm_writer *writer, uint32_t cycle, uint8_t reg, uint8_t value);
+void vgm_ym2612_part2_write(vgm_writer *writer, uint32_t cycle, uint8_t reg, uint8_t value);
+void vgm_adjust_cycles(vgm_writer *writer, uint32_t deduction);
+void vgm_close(vgm_writer *writer);
+
 #endif //VGM_H_
--- a/vgmplay.c	Sat Jan 05 00:58:08 2019 -0800
+++ b/vgmplay.c	Sat Jan 15 13:15:21 2022 -0800
@@ -13,6 +13,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include "vgm.h"
+#include "system.h"
 
 #define MCLKS_NTSC 53693175
 #define MCLKS_PAL  53203395
@@ -22,6 +23,31 @@
 #define MCLKS_PER_Z80 15
 #define MCLKS_PER_PSG (MCLKS_PER_Z80*16)
 
+
+#ifdef DISABLE_ZLIB
+#define VGMFILE FILE*
+#define vgmopen fopen
+#define vgmread fread
+#define vgmseek fseek
+#define vgmgetc fgetc
+#define vgmclose fclose
+#else
+#include "zlib/zlib.h"
+#define VGMFILE gzFile
+#define vgmopen gzopen
+#define vgmread gzfread
+#define vgmseek gzseek
+#define vgmgetc gzgetc
+#define vgmclose gzclose
+#endif
+
+
+system_header *current_system;
+
+void system_request_exit(system_header *system, uint8_t force_release)
+{
+}
+
 void handle_keydown(int keycode)
 {
 }
@@ -62,10 +88,6 @@
 {
 }
 
-void controller_add_mappings()
-{
-}
-
 int headless = 0;
 
 #define CYCLE_LIMIT MCLKS_NTSC/60
@@ -117,17 +139,17 @@
 	psg_context p_context;
 	psg_init(&p_context, MCLKS_NTSC, MCLKS_PER_PSG);
 
-	FILE * f = fopen(argv[1], "rb");
+	VGMFILE * f = vgmopen(argv[1], "rb");
 	vgm_header header;
-	fread(&header, sizeof(header), 1, f);
+	vgmread(&header, sizeof(header), 1, f);
 	if (header.version < 0x150 || !header.data_offset) {
 		header.data_offset = 0xC;
 	}
-	fseek(f, header.data_offset + 0x34, SEEK_SET);
+	vgmseek(f, header.data_offset + 0x34, SEEK_SET);
 	uint32_t data_size = header.eof_offset + 4 - (header.data_offset + 0x34);
 	uint8_t * data = malloc(data_size);
-	fread(data, 1, data_size, f);
-	fclose(f);
+	vgmread(data, 1, data_size, f);
+	vgmclose(f);
 
 	uint32_t mclks_sample = MCLKS_NTSC / 44100;
 	uint32_t loop_count = 2;
--- a/ym2612.c	Sat Jan 05 00:58:08 2019 -0800
+++ b/ym2612.c	Sat Jan 15 13:15:21 2022 -0800
@@ -11,6 +11,7 @@
 #include "render.h"
 #include "wave.h"
 #include "blastem.h"
+#include "event_log.h"
 
 //#define DO_DEBUG_PRINT
 #ifdef DO_DEBUG_PRINT
@@ -21,9 +22,7 @@
 #define dfopen(var, fname, mode)
 #endif
 
-#define BUSY_CYCLES_ADDRESS 17
-#define BUSY_CYCLES_DATA_LOW 83
-#define BUSY_CYCLES_DATA_HIGH 47
+#define BUSY_CYCLES 32
 #define OP_UPDATE_PERIOD 144
 
 #define BIT_TIMERA_ENABLE 0x1
@@ -122,6 +121,27 @@
 	render_audio_adjust_clock(context->audio, master_clock, context->clock_inc * NUM_OPERATORS);
 }
 
+void ym_adjust_cycles(ym2612_context *context, uint32_t deduction)
+{
+	context->current_cycle -= deduction;
+	if (context->write_cycle != CYCLE_NEVER && context->write_cycle >= deduction) {
+		context->write_cycle -= deduction;
+	} else {
+		context->write_cycle = CYCLE_NEVER;
+	}
+	if (context->busy_start != CYCLE_NEVER && context->busy_start >= deduction) {
+		context->busy_start -= deduction;
+	} else {
+		context->busy_start = CYCLE_NEVER;
+	}
+	if (context->last_status_cycle != CYCLE_NEVER && context->last_status_cycle >= deduction) {
+		context->last_status_cycle -= deduction;
+	} else {
+		context->last_status = 0;
+		context->last_status_cycle = CYCLE_NEVER;
+	}
+}
+
 #ifdef __ANDROID__
 #define log2(x) (log(x)/log(2))
 #endif
@@ -159,6 +179,11 @@
 	for (int i = 0; i < NUM_CHANNELS; i++) {
 		context->channels[i].lr = 0xC0;
 		context->channels[i].logfile = savedlogs[i];
+		if (i < 3) {
+			context->part1_regs[REG_LR_AMS_PMS - YM_PART1_START + i] = 0xC0;
+		} else {
+			context->part2_regs[REG_LR_AMS_PMS - YM_PART2_START + i - 3] = 0xC0;
+		}
 	}
 	context->write_cycle = CYCLE_NEVER;
 	for (int i = 0; i < NUM_OPERATORS; i++) {
@@ -173,7 +198,11 @@
 	dfopen(debug_file, "ym_debug.txt", "w");
 	memset(context, 0, sizeof(*context));
 	context->clock_inc = clock_div * 6;
+	context->busy_cycles = BUSY_CYCLES * context->clock_inc;
 	context->audio = render_audio_source(master_clock, context->clock_inc * NUM_OPERATORS, 2);
+	//TODO: pick a randomish high initial value and lower it over time
+	context->invalid_status_decay = 225000 * context->clock_inc;
+	context->status_address_mask = (options & YM_OPT_3834) ? 0 : 3;
 	
 	//some games seem to expect that the LR flags start out as 1
 	for (int i = 0; i < NUM_CHANNELS; i++) {
@@ -256,6 +285,7 @@
 		}
 	}
 	ym_reset(context);
+	ym_enable_zero_offset(context, 1);
 }
 
 void ym_free(ym2612_context *context)
@@ -267,8 +297,18 @@
 	free(context);
 }
 
-#define YM_VOLUME_MULTIPLIER 2
-#define YM_VOLUME_DIVIDER 3
+void ym_enable_zero_offset(ym2612_context *context, uint8_t enabled)
+{
+	if (enabled) {
+		context->zero_offset = 0x70;
+		context->volume_mult = 79;
+		context->volume_div = 120;
+	} else {
+		context->zero_offset = 0;
+		context->volume_mult = 2;
+		context->volume_div = 3;
+	}
+}
 #define YM_MOD_SHIFT 1
 
 #define CSM_MODE 0x80
@@ -327,128 +367,290 @@
 	}
 }
 
+void ym_run_timers(ym2612_context *context)
+{
+	if (context->timer_control & BIT_TIMERA_ENABLE) {
+		if (context->timer_a != TIMER_A_MAX) {
+			context->timer_a++;
+			if (context->csm_keyon) {
+				csm_keyoff(context);
+			}
+		} else {
+			if (context->timer_control & BIT_TIMERA_LOAD) {
+				context->timer_control &= ~BIT_TIMERA_LOAD;
+			} else if (context->timer_control & BIT_TIMERA_OVEREN) {
+				context->status |= BIT_STATUS_TIMERA;
+			}
+			context->timer_a = context->timer_a_load;
+			if (!context->csm_keyon && context->ch3_mode == CSM_MODE) {
+				context->csm_keyon = 0xF0;
+				uint8_t changes = 0xF0 ^ context->channels[2].keyon;;
+				for (uint8_t op = 2*4, bit = 0; op < 3*4; op++, bit++)
+				{
+					if (changes & keyon_bits[bit]) {
+						keyon(context->operators + op, context->channels + 2);
+					}
+				}
+			}
+		}
+	}
+	if (!context->sub_timer_b) {
+		if (context->timer_control & BIT_TIMERB_ENABLE) {
+			if (context->timer_b != TIMER_B_MAX) {
+				context->timer_b++;
+			} else {
+				if (context->timer_control & BIT_TIMERB_LOAD) {
+					context->timer_control &= ~BIT_TIMERB_LOAD;
+				} else if (context->timer_control & BIT_TIMERB_OVEREN) {
+					context->status |= BIT_STATUS_TIMERB;
+				}
+				context->timer_b = context->timer_b_load;
+			}
+		}
+	} else if (context->timer_control & BIT_TIMERB_LOAD) {
+		context->timer_control &= ~BIT_TIMERB_LOAD;
+		context->timer_b = context->timer_b_load;
+	}
+	context->sub_timer_b += 0x10;
+	//Update LFO
+	if (context->lfo_enable) {
+		if (context->lfo_counter) {
+			context->lfo_counter--;
+		} else {
+			context->lfo_counter = lfo_timer_values[context->lfo_freq];
+			context->lfo_am_step += 2;
+			context->lfo_am_step &= 0xFE;
+			uint8_t old_pm_step = context->lfo_pm_step;
+			context->lfo_pm_step = context->lfo_am_step / 8;
+			if (context->lfo_pm_step != old_pm_step) {
+				for (int chan = 0; chan < NUM_CHANNELS; chan++)
+				{
+					if (context->channels[chan].pms) {
+						for (int op = chan * 4; op < (chan + 1) * 4; op++)
+						{
+							context->operators[op].phase_inc = ym_calc_phase_inc(context, context->operators + op, op);
+						}
+					}
+				}
+			}
+		}
+	}
+}
+
+void ym_run_envelope(ym2612_context *context, ym_channel *channel, ym_operator *operator)
+{
+	uint32_t env_cyc = context->env_counter;
+	uint8_t rate;
+	if (operator->env_phase == PHASE_DECAY && operator->envelope >= operator->sustain_level) {
+		//operator->envelope = operator->sustain_level;
+		operator->env_phase = PHASE_SUSTAIN;
+	}
+	rate = operator->rates[operator->env_phase];
+	if (rate) {
+		uint8_t ks = channel->keycode >> operator->key_scaling;;
+		rate = rate*2 + ks;
+		if (rate > 63) {
+			rate = 63;
+		}
+	}
+	uint32_t cycle_shift = rate < 0x30 ? ((0x2F - rate) >> 2) : 0;
+	if (!(env_cyc & ((1 << cycle_shift) - 1))) {
+		uint32_t update_cycle = env_cyc >> cycle_shift & 0x7;
+		uint16_t envelope_inc = rate_table[rate * 8 + update_cycle];
+		if (operator->env_phase == PHASE_ATTACK) {
+			//this can probably be optimized to a single shift rather than a multiply + shift
+			uint16_t old_env = operator->envelope;
+			operator->envelope += ((~operator->envelope * envelope_inc) >> 4) & 0xFFFFFFFC;
+			if (operator->envelope > old_env) {
+				//Handle overflow
+				operator->envelope = 0;
+			}
+			if (!operator->envelope) {
+				operator->env_phase = PHASE_DECAY;
+			}
+		} else {
+			if (operator->ssg) {
+				if (operator->envelope < SSG_CENTER) {
+					envelope_inc *= 4;
+				} else {
+					envelope_inc = 0;
+				}
+			}
+			//envelope value is 10-bits, but it will be used as a 4.8 value
+			operator->envelope += envelope_inc << 2;
+			//clamp to max attenuation value
+			if (
+				operator->envelope > MAX_ENVELOPE 
+				|| (operator->env_phase == PHASE_RELEASE && operator->envelope >= SSG_CENTER)
+			) {
+				operator->envelope = MAX_ENVELOPE;
+			}
+		}
+	}
+}
+
+void ym_run_phase(ym2612_context *context, uint32_t channel, uint32_t op)
+{
+	if (channel != 5 || !context->dac_enable) {
+		//printf("updating operator %d of channel %d\n", op, channel);
+		ym_operator * operator = context->operators + op;
+		ym_channel * chan = context->channels + channel;
+		uint16_t phase = operator->phase_counter >> 10 & 0x3FF;
+		operator->phase_counter += operator->phase_inc;//ym_calc_phase_inc(context, operator, op);
+		int16_t mod = 0;
+		if (op & 3) {
+			if (operator->mod_src[0]) {
+				mod = *operator->mod_src[0];
+				if (operator->mod_src[1]) {
+					mod += *operator->mod_src[1];
+				}
+				mod >>= YM_MOD_SHIFT;
+			}
+		} else {
+			if (chan->feedback) {
+				mod = (chan->op1_old + operator->output) >> (10-chan->feedback);
+			}
+		}
+		uint16_t env = operator->envelope;
+		if (operator->ssg) {
+			if (env >= SSG_CENTER) {
+				if (operator->ssg & SSG_ALTERNATE) {
+					if (operator->env_phase != PHASE_RELEASE && (
+						!(operator->ssg & SSG_HOLD) || ((operator->ssg ^ operator->inverted) & SSG_INVERT) == 0
+					)) {
+						operator->inverted ^= SSG_INVERT;
+					}
+				} else if (!(operator->ssg & SSG_HOLD)) {
+					phase = operator->phase_counter = 0;
+				}
+				if (
+					(operator->env_phase == PHASE_DECAY || operator->env_phase == PHASE_SUSTAIN) 
+					&& !(operator->ssg & SSG_HOLD)
+				) {
+					start_envelope(operator, chan);
+					env = operator->envelope;
+				}
+			}
+			if (operator->inverted) {
+				env = (SSG_CENTER - env) & MAX_ENVELOPE;
+			}
+		}
+		env += operator->total_level;
+		if (operator->am) {
+			uint16_t base_am = (context->lfo_am_step & 0x80 ? context->lfo_am_step : ~context->lfo_am_step) & 0x7E;
+			if (ams_shift[chan->ams] >= 0) {
+				env += (base_am >> ams_shift[chan->ams]) & MAX_ENVELOPE;
+			} else {
+				env += base_am << (-ams_shift[chan->ams]);
+			}
+		}
+		if (env > MAX_ENVELOPE) {
+			env = MAX_ENVELOPE;
+		}
+		if (first_key_on) {
+			dfprintf(debug_file, "op %d, base phase: %d, mod: %d, sine: %d, out: %d\n", op, phase, mod, sine_table[(phase+mod) & 0x1FF], pow_table[sine_table[phase & 0x1FF] + env]);
+		}
+		//if ((channel != 0 && channel != 4) || chan->algorithm != 5) {
+			phase += mod;
+		//}
+
+		int16_t output = pow_table[sine_table[phase & 0x1FF] + env];
+		if (phase & 0x200) {
+			output = -output;
+		}
+		if (op % 4 == 0) {
+			chan->op1_old = operator->output;
+		} else if (op % 4 == 2) {
+			chan->op2_old = operator->output;
+		}
+		operator->output = output;
+		//Update the channel output if we've updated all operators
+		if (op % 4 == 3) {
+			if (chan->algorithm < 4) {
+				chan->output = operator->output;
+			} else if(chan->algorithm == 4) {
+				chan->output = operator->output + context->operators[channel * 4 + 2].output;
+			} else {
+				output = 0;
+				for (uint32_t op = ((chan->algorithm == 7) ? 0 : 1) + channel*4; op < (channel+1)*4; op++) {
+					output += context->operators[op].output;
+				}
+				chan->output = output;
+			}
+			if (first_key_on) {
+				int16_t value = context->channels[channel].output & 0x3FE0;
+				if (value & 0x2000) {
+					value |= 0xC000;
+				}
+			}
+		}
+		//puts("operator update done");
+	}
+}
+
+void ym_output_sample(ym2612_context *context)
+{
+	int16_t left = 0, right = 0;
+	for (int i = 0; i < NUM_CHANNELS; i++) {
+		int16_t value = context->channels[i].output;
+		if (value > 0x1FE0) {
+			value = 0x1FE0;
+		} else if (value < -0x1FF0) {
+			value = -0x1FF0;
+		} else {
+			value &= 0x3FE0;
+			if (value & 0x2000) {
+				value |= 0xC000;
+			}
+		}
+		if (value >= 0) {
+			value += context->zero_offset;
+		} else {
+			value -= context->zero_offset;
+		}
+		if (context->channels[i].logfile) {
+			fwrite(&value, sizeof(value), 1, context->channels[i].logfile);
+		}
+		if (context->channels[i].lr & 0x80) {
+			left += (value * context->volume_mult) / context->volume_div;
+		} else if (context->zero_offset) {
+			if (value >= 0) {
+				left += (context->zero_offset * context->volume_mult) / context->volume_div;
+			} else {
+				left -= (context->zero_offset * context->volume_mult) / context->volume_div;
+			}
+		}
+		if (context->channels[i].lr & 0x40) {
+			right += (value * context->volume_mult) / context->volume_div;
+		} else if (context->zero_offset) {
+			if (value >= 0) {
+				right += (context->zero_offset * context->volume_mult) / context->volume_div;
+			} else {
+				right -= (context->zero_offset * context->volume_mult) / context->volume_div;
+			}
+		}
+	}
+	render_put_stereo_sample(context->audio, left, right);
+}
+
 void ym_run(ym2612_context * context, uint32_t to_cycle)
 {
+	if (context->current_cycle >= to_cycle) {
+		return;
+	}
 	//printf("Running YM2612 from cycle %d to cycle %d\n", context->current_cycle, to_cycle);
 	//TODO: Fix channel update order OR remap channels in register write
 	for (; context->current_cycle < to_cycle; context->current_cycle += context->clock_inc) {
 		//Update timers at beginning of 144 cycle period
 		if (!context->current_op) {
-			if (context->timer_control & BIT_TIMERA_ENABLE) {
-				if (context->timer_a != TIMER_A_MAX) {
-					context->timer_a++;
-					if (context->csm_keyon) {
-						csm_keyoff(context);
-					}
-				} else {
-					if (context->timer_control & BIT_TIMERA_LOAD) {
-						context->timer_control &= ~BIT_TIMERA_LOAD;
-					} else if (context->timer_control & BIT_TIMERA_OVEREN) {
-						context->status |= BIT_STATUS_TIMERA;
-					}
-					context->timer_a = context->timer_a_load;
-					if (!context->csm_keyon && context->ch3_mode == CSM_MODE) {
-						context->csm_keyon = 0xF0;
-						uint8_t changes = 0xF0 ^ context->channels[2].keyon;;
-						for (uint8_t op = 2*4, bit = 0; op < 3*4; op++, bit++)
-						{
-							if (changes & keyon_bits[bit]) {
-								keyon(context->operators + op, context->channels + 2);
-							}
-						}
-					}
-				}
-			}
-			if (!context->sub_timer_b) {
-				if (context->timer_control & BIT_TIMERB_ENABLE) {
-					if (context->timer_b != TIMER_B_MAX) {
-						context->timer_b++;
-					} else {
-						if (context->timer_control & BIT_TIMERB_LOAD) {
-							context->timer_control &= ~BIT_TIMERB_LOAD;
-						} else if (context->timer_control & BIT_TIMERB_OVEREN) {
-							context->status |= BIT_STATUS_TIMERB;
-						}
-						context->timer_b = context->timer_b_load;
-					}
-				}
-			}
-			context->sub_timer_b += 0x10;
-			//Update LFO
-			if (context->lfo_enable) {
-				if (context->lfo_counter) {
-					context->lfo_counter--;
-				} else {
-					context->lfo_counter = lfo_timer_values[context->lfo_freq];
-					context->lfo_am_step += 2;
-					context->lfo_am_step &= 0xFE;
-					context->lfo_pm_step = context->lfo_am_step / 8;
-				}
-			}
+			ym_run_timers(context);
 		}
 		//Update Envelope Generator
 		if (!(context->current_op % 3)) {
-			uint32_t env_cyc = context->env_counter;
 			uint32_t op = context->current_env_op;
 			ym_operator * operator = context->operators + op;
 			ym_channel * channel = context->channels + op/4;
-			uint8_t rate;
-			if (operator->env_phase == PHASE_DECAY && operator->envelope >= operator->sustain_level) {
-				//operator->envelope = operator->sustain_level;
-				operator->env_phase = PHASE_SUSTAIN;
-			}
-			rate = operator->rates[operator->env_phase];
-			if (rate) {
-				uint8_t ks = channel->keycode >> operator->key_scaling;;
-				rate = rate*2 + ks;
-				if (rate > 63) {
-					rate = 63;
-				}
-			}
-			uint32_t cycle_shift = rate < 0x30 ? ((0x2F - rate) >> 2) : 0;
-			if (first_key_on) {
-				dfprintf(debug_file, "Operator: %d, env rate: %d (2*%d+%d), env_cyc: %d, cycle_shift: %d, env_cyc & ((1 << cycle_shift) - 1): %d\n", op, rate, operator->rates[operator->env_phase], channel->keycode >> operator->key_scaling,env_cyc, cycle_shift, env_cyc & ((1 << cycle_shift) - 1));
-			}
-			if (!(env_cyc & ((1 << cycle_shift) - 1))) {
-				uint32_t update_cycle = env_cyc >> cycle_shift & 0x7;
-				uint16_t envelope_inc = rate_table[rate * 8 + update_cycle];
-				if (operator->env_phase == PHASE_ATTACK) {
-					//this can probably be optimized to a single shift rather than a multiply + shift
-					if (first_key_on) {
-						dfprintf(debug_file, "Changing op %d envelope %d by %d(%d * %d) in attack phase\n", op, operator->envelope, (~operator->envelope * envelope_inc) >> 4, ~operator->envelope, envelope_inc);
-					}
-					uint16_t old_env = operator->envelope;
-					operator->envelope += ((~operator->envelope * envelope_inc) >> 4) & 0xFFFFFFFC;
-					if (operator->envelope > old_env) {
-						//Handle overflow
-						operator->envelope = 0;
-					}
-					if (!operator->envelope) {
-						operator->env_phase = PHASE_DECAY;
-					}
-				} else {
-					if (first_key_on) {
-						dfprintf(debug_file, "Changing op %d envelope %d by %d in %s phase\n", op, operator->envelope, envelope_inc,
-							operator->env_phase == PHASE_SUSTAIN ? "sustain" : (operator->env_phase == PHASE_DECAY ? "decay": "release"));
-					}
-					if (operator->ssg) {
-						if (operator->envelope < SSG_CENTER) {
-							envelope_inc *= 4;
-						} else {
-							envelope_inc = 0;
-						}
-					}
-					//envelope value is 10-bits, but it will be used as a 4.8 value
-					operator->envelope += envelope_inc << 2;
-					//clamp to max attenuation value
-					if (
-						operator->envelope > MAX_ENVELOPE 
-						|| (operator->env_phase == PHASE_RELEASE && operator->envelope >= SSG_CENTER)
-					) {
-						operator->envelope = MAX_ENVELOPE;
-					}
-				}
-			}
+			ym_run_envelope(context, channel, operator);
 			context->current_env_op++;
 			if (context->current_env_op == NUM_OPERATORS) {
 				context->current_env_op = 0;
@@ -457,138 +659,14 @@
 		}
 
 		//Update Phase Generator
-		uint32_t channel = context->current_op / 4;
-		if (channel != 5 || !context->dac_enable) {
-			uint32_t op = context->current_op;
-			//printf("updating operator %d of channel %d\n", op, channel);
-			ym_operator * operator = context->operators + op;
-			ym_channel * chan = context->channels + channel;
-			uint16_t phase = operator->phase_counter >> 10 & 0x3FF;
-			operator->phase_counter += ym_calc_phase_inc(context, operator, context->current_op);
-			int16_t mod = 0;
-			if (op & 3) {
-				if (operator->mod_src[0]) {
-					mod = *operator->mod_src[0];
-					if (operator->mod_src[1]) {
-						mod += *
-						operator->mod_src[1];
-					}
-					mod >>= YM_MOD_SHIFT;
-				}
-			} else {
-				if (chan->feedback) {
-					mod = (chan->op1_old + operator->output) >> (10-chan->feedback);
-				}
-			}
-			uint16_t env = operator->envelope;
-			if (operator->ssg) {
-				if (env >= SSG_CENTER) {
-					if (operator->ssg & SSG_ALTERNATE) {
-						if (operator->env_phase != PHASE_RELEASE && (
-							!(operator->ssg & SSG_HOLD) || ((operator->ssg ^ operator->inverted) & SSG_INVERT) == 0
-						)) {
-							operator->inverted ^= SSG_INVERT;
-						}
-					} else if (!(operator->ssg & SSG_HOLD)) {
-						phase = operator->phase_counter = 0;
-					}
-					if (
-						(operator->env_phase == PHASE_DECAY || operator->env_phase == PHASE_SUSTAIN) 
-						&& !(operator->ssg & SSG_HOLD)
-					) {
-						start_envelope(operator, chan);
-						env = operator->envelope;
-					}
-				}
-				if (operator->inverted) {
-					env = (SSG_CENTER - env) & MAX_ENVELOPE;
-				}
-			}
-			env += operator->total_level;
-			if (operator->am) {
-				uint16_t base_am = (context->lfo_am_step & 0x80 ? context->lfo_am_step : ~context->lfo_am_step) & 0x7E;
-				if (ams_shift[chan->ams] >= 0) {
-					env += base_am >> ams_shift[chan->ams];
-				} else {
-					env += base_am << (-ams_shift[chan->ams]);
-				}
-			}
-			if (env > MAX_ENVELOPE) {
-				env = MAX_ENVELOPE;
-			}
-			if (first_key_on) {
-				dfprintf(debug_file, "op %d, base phase: %d, mod: %d, sine: %d, out: %d\n", op, phase, mod, sine_table[(phase+mod) & 0x1FF], pow_table[sine_table[phase & 0x1FF] + env]);
-			}
-			//if ((channel != 0 && channel != 4) || chan->algorithm != 5) {
-				phase += mod;
-			//}
-
-			int16_t output = pow_table[sine_table[phase & 0x1FF] + env];
-			if (phase & 0x200) {
-				output = -output;
-			}
-			if (op % 4 == 0) {
-				chan->op1_old = operator->output;
-			}
-			operator->output = output;
-			//Update the channel output if we've updated all operators
-			if (op % 4 == 3) {
-				if (chan->algorithm < 4) {
-					chan->output = operator->output;
-				} else if(chan->algorithm == 4) {
-					chan->output = operator->output + context->operators[channel * 4 + 2].output;
-				} else {
-					output = 0;
-					for (uint32_t op = ((chan->algorithm == 7) ? 0 : 1) + channel*4; op < (channel+1)*4; op++) {
-						output += context->operators[op].output;
-					}
-					chan->output = output;
-				}
-				if (first_key_on) {
-					int16_t value = context->channels[channel].output & 0x3FE0;
-					if (value & 0x2000) {
-						value |= 0xC000;
-					}
-					dfprintf(debug_file, "channel %d output: %d\n", channel, (value * YM_VOLUME_MULTIPLIER) / YM_VOLUME_DIVIDER);
-				}
-			}
-			//puts("operator update done");
-		}
+		ym_run_phase(context, context->current_op / 4, context->current_op);
 		context->current_op++;
 		if (context->current_op == NUM_OPERATORS) {
 			context->current_op = 0;
-			
-			int16_t left = 0, right = 0;
-			for (int i = 0; i < NUM_CHANNELS; i++) {
-				int16_t value = context->channels[i].output;
-				if (value > 0x1FE0) {
-					value = 0x1FE0;
-				} else if (value < -0x1FF0) {
-					value = -0x1FF0;
-				} else {
-					value &= 0x3FE0;
-					if (value & 0x2000) {
-						value |= 0xC000;
-					}
-				}
-				if (context->channels[i].logfile) {
-					fwrite(&value, sizeof(value), 1, context->channels[i].logfile);
-				}
-				if (context->channels[i].lr & 0x80) {
-					left += (value * YM_VOLUME_MULTIPLIER) / YM_VOLUME_DIVIDER;
-				}
-				if (context->channels[i].lr & 0x40) {
-					right += (value * YM_VOLUME_MULTIPLIER) / YM_VOLUME_DIVIDER;
-				}
-			}
-			render_put_stereo_sample(context->audio, left, right);
+			ym_output_sample(context);
 		}
 		
 	}
-	if (context->current_cycle >= context->write_cycle + (context->busy_cycles * context->clock_inc / 6)) {
-		context->status &= 0x7F;
-		context->write_cycle = CYCLE_NEVER;
-	}
 	//printf("Done running YM2612 at cycle %d\n", context->current_cycle, to_cycle);
 }
 
@@ -597,9 +675,6 @@
 	//printf("address_write_part1: %X\n", address);
 	context->selected_reg = address;
 	context->selected_part = 0;
-	context->write_cycle = context->current_cycle;
-	context->busy_cycles = BUSY_CYCLES_ADDRESS;
-	context->status |= 0x80;
 }
 
 void ym_address_write_part2(ym2612_context * context, uint8_t address)
@@ -607,9 +682,6 @@
 	//printf("address_write_part2: %X\n", address);
 	context->selected_reg = address;
 	context->selected_part = 1;
-	context->write_cycle = context->current_cycle;
-	context->busy_cycles = BUSY_CYCLES_ADDRESS;
-	context->status |= 0x80;
 }
 
 static uint8_t fnum_to_keycode[] = {
@@ -671,6 +743,7 @@
 		inc = context->ch3_supp[index].fnum;
 		if (channel->pms) {
 			inc = inc * 2 + lfo_pm_table[(inc & 0x7F0) * 16 + channel->pms + context->lfo_pm_step];
+			inc &= 0xFFF;
 		}
 		if (!context->ch3_supp[index].block) {
 			inc >>= 1;
@@ -683,6 +756,7 @@
 		inc = channel->fnum;
 		if (channel->pms) {
 			inc = inc * 2 + lfo_pm_table[(inc & 0x7F0) * 16 + channel->pms + context->lfo_pm_step];
+			inc &= 0xFFF;
 		}
 		if (!channel->block) {
 			inc >>= 1;
@@ -714,8 +788,32 @@
 	return inc;
 }
 
+void ym_vgm_log(ym2612_context *context, uint32_t master_clock, vgm_writer *vgm)
+{
+	vgm_ym2612_init(vgm, 6 * master_clock / context->clock_inc);
+	context->vgm = vgm;
+	for (uint8_t reg = YM_PART1_START; reg < YM_REG_END; reg++) {
+		if ((reg >= REG_DETUNE_MULT && (reg & 3) == 3) || (reg >= 0x2D && reg < REG_DETUNE_MULT) || reg == 0x23 || reg == 0x29) {
+			//skip invalid registers
+			continue;
+		}
+		vgm_ym2612_part1_write(context->vgm, context->current_cycle, reg, context->part1_regs[reg - YM_PART1_START]);
+	}
+	
+	for (uint8_t reg = YM_PART2_START; reg < YM_REG_END; reg++) {
+		if ((reg & 3) == 3 || (reg >= REG_FNUM_LOW_CH3 && reg < REG_ALG_FEEDBACK)) {
+			//skip invalid registers
+			continue;
+		}
+		vgm_ym2612_part2_write(context->vgm, context->current_cycle, reg, context->part2_regs[reg - YM_PART2_START]);
+	}
+}
+
 void ym_data_write(ym2612_context * context, uint8_t value)
 {
+	context->write_cycle = context->current_cycle;
+	context->busy_start = context->current_cycle + context->clock_inc;
+	
 	if (context->selected_reg >= YM_REG_END) {
 		return;
 	}
@@ -723,13 +821,21 @@
 		if (context->selected_reg < YM_PART2_START) {
 			return;
 		}
+		if (context->vgm) {
+			vgm_ym2612_part2_write(context->vgm, context->current_cycle, context->selected_reg, value);
+		}
 		context->part2_regs[context->selected_reg - YM_PART2_START] = value;
 	} else {
 		if (context->selected_reg < YM_PART1_START) {
 			return;
 		}
+		if (context->vgm) {
+			vgm_ym2612_part1_write(context->vgm, context->current_cycle, context->selected_reg, value);
+		}
 		context->part1_regs[context->selected_reg - YM_PART1_START] = value;
 	}
+	uint8_t buffer[3] = {context->selected_part, context->selected_reg, value};
+	event_log(EVENT_YM_REG, context->current_cycle, sizeof(buffer), buffer);
 	dfprintf(debug_file, "write of %X to reg %X in part %d\n", value, context->selected_reg, context->selected_part+1);
 	if (context->selected_reg < 0x30) {
 		//Shared regs
@@ -742,7 +848,19 @@
 			}*/
 			context->lfo_enable = value & 0x8;
 			if (!context->lfo_enable) {
+				uint8_t old_pm_step = context->lfo_pm_step;
 				context->lfo_am_step = context->lfo_pm_step = 0;
+				if (old_pm_step) {
+					for (int chan = 0; chan < NUM_CHANNELS; chan++)
+					{
+						if (context->channels[chan].pms) {
+							for (int op = chan * 4; op < (chan + 1) * 4; op++)
+							{
+								context->operators[op].phase_inc = ym_calc_phase_inc(context, context->operators + op, op);
+							}
+						}
+					}
+				}
 			}
 			context->lfo_freq = value & 0x7;
 
@@ -778,7 +896,14 @@
 			if (context->ch3_mode == CSM_MODE && (value & 0xC0) != CSM_MODE && context->csm_keyon) {
 				csm_keyoff(context);
 			}
+			uint8_t old_mode = context->ch3_mode;
 			context->ch3_mode = value & 0xC0;
+			if (context->ch3_mode != old_mode) {
+				for (int op = 2 * 4; op < 3*4; op++)
+				{
+					context->operators[op].phase_inc = ym_calc_phase_inc(context, context->operators + op, op);
+				}
+			}
 			break;
 		}
 		case REG_KEY_ONOFF: {
@@ -830,6 +955,7 @@
 			case REG_DETUNE_MULT:
 				operator->detune = value >> 4 & 0x7;
 				operator->multiple = value & 0xF;
+				operator->phase_inc = ym_calc_phase_inc(context, operator, op);
 				break;
 			case REG_TOTAL_LEVEL:
 				operator->total_level = (value & 0x7F) << 5;
@@ -839,7 +965,6 @@
 				operator->rates[PHASE_ATTACK] = value & 0x1F;
 				break;
 			case REG_DECAY_AM:
-				//TODO: AM flag for LFO
 				operator->am = value & 0x80;
 				operator->rates[PHASE_DECAY] = value & 0x1F;
 				break;
@@ -877,6 +1002,10 @@
 				context->channels[channel].block = context->channels[channel].block_fnum_latch >> 3 & 0x7;
 				context->channels[channel].fnum = (context->channels[channel].block_fnum_latch & 0x7) << 8 | value;
 				context->channels[channel].keycode = context->channels[channel].block << 2 | fnum_to_keycode[context->channels[channel].fnum >> 7];
+				for (int op = channel * 4; op < (channel + 1) * 4; op++)
+				{
+					context->operators[op].phase_inc = ym_calc_phase_inc(context, context->operators + op, op);
+				}
 				break;
 			case REG_BLOCK_FNUM_H:{
 				context->channels[channel].block_fnum_latch = value;
@@ -887,6 +1016,10 @@
 					context->ch3_supp[channel].block = context->ch3_supp[channel].block_fnum_latch >> 3 & 0x7;
 					context->ch3_supp[channel].fnum = (context->ch3_supp[channel].block_fnum_latch & 0x7) << 8 | value;
 					context->ch3_supp[channel].keycode = context->ch3_supp[channel].block << 2 | fnum_to_keycode[context->ch3_supp[channel].fnum >> 7];
+					if (context->ch3_mode) {
+						int op = 2 * 4 + (channel < 2 ? (channel ^ 1) : channel);
+						context->operators[op].phase_inc = ym_calc_phase_inc(context, context->operators + op, op);
+					}
 				}
 				break;
 			case REG_BLOCK_FN_CH3:
@@ -900,6 +1033,8 @@
 				{
 				case 0:
 					//operator 3 modulated by operator 2
+					//this uses a special op2 result reg on HW, but that reg will have the most recent
+					//result from op2 when op3 starts executing
 					context->operators[channel*4+1].mod_src[0] = &context->operators[channel*4+2].output;
 					context->operators[channel*4+1].mod_src[1] = NULL;
 					
@@ -912,7 +1047,11 @@
 					break;
 				case 1:
 					//operator 3 modulated by operator 1+2
-					context->operators[channel*4+1].mod_src[0] = &context->operators[channel*4+0].output;
+					//op1 starts executing before this, but due to pipeline length the most current result is
+					//not available and instead the previous result is used
+					context->operators[channel*4+1].mod_src[0] = &context->channels[channel].op1_old;
+					//this uses a special op2 result reg on HW, but that reg will have the most recent
+					//result from op2 when op3 starts executing
 					context->operators[channel*4+1].mod_src[1] = &context->operators[channel*4+2].output;
 					
 					//operator 2 unmodulated
@@ -924,6 +1063,8 @@
 					break;
 				case 2:
 					//operator 3 modulated by operator 2
+					//this uses a special op2 result reg on HW, but that reg will have the most recent
+					//result from op2 when op3 starts executing
 					context->operators[channel*4+1].mod_src[0] = &context->operators[channel*4+2].output;
 					context->operators[channel*4+1].mod_src[1] = NULL;
 					
@@ -931,6 +1072,8 @@
 					context->operators[channel*4+2].mod_src[0] = NULL;
 					
 					//operator 4 modulated by operator 1+3
+					//this uses a special op1 result reg on HW, but that reg will have the most recent
+					//result from op1 when op4 starts executing
 					context->operators[channel*4+3].mod_src[0] = &context->operators[channel*4+0].output;
 					context->operators[channel*4+3].mod_src[1] = &context->operators[channel*4+1].output;
 					break;
@@ -943,7 +1086,9 @@
 					context->operators[channel*4+2].mod_src[0] = &context->operators[channel*4+0].output;
 					
 					//operator 4 modulated by operator 2+3
-					context->operators[channel*4+3].mod_src[0] = &context->operators[channel*4+2].output;
+					//op2 starts executing before this, but due to pipeline length the most current result is
+					//not available and instead the previous result is used
+					context->operators[channel*4+3].mod_src[0] = &context->channels[channel].op2_old;
 					context->operators[channel*4+3].mod_src[1] = &context->operators[channel*4+1].output;
 					break;
 				case 4:
@@ -960,13 +1105,17 @@
 					break;
 				case 5:
 					//operator 3 modulated by operator 1
-					context->operators[channel*4+1].mod_src[0] = &context->operators[channel*4+0].output;
+					//op1 starts executing before this, but due to pipeline length the most current result is
+					//not available and instead the previous result is used
+					context->operators[channel*4+1].mod_src[0] = &context->channels[channel].op1_old;
 					context->operators[channel*4+1].mod_src[1] = NULL;
 					
 					//operator 2 modulated by operator 1
 					context->operators[channel*4+2].mod_src[0] = &context->operators[channel*4+0].output;
 					
 					//operator 4 modulated by operator 1
+					//this uses a special op1 result reg on HW, but that reg will have the most recent
+					//result from op1 when op4 starts executing
 					context->operators[channel*4+3].mod_src[0] = &context->operators[channel*4+0].output;
 					context->operators[channel*4+3].mod_src[1] = NULL;
 					break;
@@ -996,24 +1145,44 @@
 				context->channels[channel].feedback = value >> 3 & 0x7;
 				//printf("Algorithm %d, feedback %d for channel %d\n", value & 0x7, value >> 3 & 0x7, channel);
 				break;
-			case REG_LR_AMS_PMS:
+			case REG_LR_AMS_PMS: {
+				uint8_t old_pms = context->channels[channel].pms;
 				context->channels[channel].pms = (value & 0x7) * 32;
 				context->channels[channel].ams = value >> 4 & 0x3;
 				context->channels[channel].lr = value & 0xC0;
+				if (old_pms != context->channels[channel].pms) {
+					for (int op = channel * 4; op < (channel + 1) * 4; op++)
+					{
+						context->operators[op].phase_inc = ym_calc_phase_inc(context, context->operators + op, op);
+					}
+				}
 				//printf("Write of %X to LR_AMS_PMS reg for channel %d\n", value, channel);
 				break;
 			}
+			}
 		}
 	}
-
-	context->write_cycle = context->current_cycle;
-	context->busy_cycles = context->selected_reg < 0xA0 ? BUSY_CYCLES_DATA_LOW : BUSY_CYCLES_DATA_HIGH;
-	context->status |= 0x80;
 }
 
-uint8_t ym_read_status(ym2612_context * context)
+uint8_t ym_read_status(ym2612_context * context, uint32_t cycle, uint32_t port)
 {
-	return context->status;
+	uint8_t status;
+	port &= context->status_address_mask;
+	if (port) {
+		if (context->last_status_cycle != CYCLE_NEVER && cycle - context->last_status_cycle > context->invalid_status_decay) {
+			context->last_status = 0;
+		}
+		status = context->last_status;
+	} else {
+		status = context->status;
+		if (cycle >= context->busy_start && cycle < context->busy_start + context->busy_cycles) {
+			status |= 0x80;
+		}
+		context->last_status = status;
+		context->last_status_cycle = cycle;
+	}
+	return status;
+		
 }
 
 void ym_print_channel_info(ym2612_context *context, int channel)
@@ -1125,7 +1294,10 @@
 	save_int8(buf, context->selected_part);
 	save_int32(buf, context->current_cycle);
 	save_int32(buf, context->write_cycle);
-	save_int32(buf, context->busy_cycles);
+	save_int32(buf, context->busy_start);
+	save_int32(buf, context->last_status_cycle);
+	save_int32(buf, context->invalid_status_decay);
+	save_int8(buf, context->last_status);
 }
 
 void ym_deserialize(deserialize_buffer *buf, void *vcontext)
@@ -1200,5 +1372,13 @@
 	context->selected_part = load_int8(buf);
 	context->current_cycle = load_int32(buf);
 	context->write_cycle = load_int32(buf);
-	context->busy_cycles = load_int32(buf);
+	context->busy_start = load_int32(buf);
+	if (buf->size > buf->cur_pos) {
+		context->last_status_cycle = load_int32(buf);
+		context->invalid_status_decay = load_int32(buf);
+		context->last_status = load_int8(buf);
+	} else {
+		context->last_status = context->status;
+		context->last_status_cycle = context->write_cycle;
+	}
 }
--- a/ym2612.h	Sat Jan 05 00:58:08 2019 -0800
+++ b/ym2612.h	Sat Jan 15 13:15:21 2022 -0800
@@ -9,17 +9,20 @@
 #include <stdint.h>
 #include <stdio.h>
 #include "serialize.h"
-#include "render.h"
+#include "render_audio.h"
+#include "vgm.h"
 
 #define NUM_PART_REGS (0xB7-0x30)
 #define NUM_CHANNELS 6
 #define NUM_OPERATORS (4*NUM_CHANNELS)
 
 #define YM_OPT_WAVE_LOG 1
+#define YM_OPT_3834 2
 
 typedef struct {
 	int16_t  *mod_src[2];
 	uint32_t phase_counter;
+	uint32_t phase_inc;
 	uint16_t envelope;
 	int16_t  output;
 	uint16_t total_level;
@@ -39,6 +42,7 @@
 	uint16_t fnum;
 	int16_t  output;
 	int16_t  op1_old;
+	int16_t  op2_old;
 	uint8_t  block_fnum_latch;
 	uint8_t  block;
 	uint8_t  keycode;
@@ -65,14 +69,20 @@
 
 typedef struct {
 	audio_source *audio;
+	vgm_writer  *vgm;
     uint32_t    clock_inc;
 	uint32_t    current_cycle;
-	//TODO: Condense the next two fields into one
 	uint32_t    write_cycle;
+	uint32_t    busy_start;
 	uint32_t    busy_cycles;
-	uint32_t    lowpass_alpha;
+	uint32_t    last_status_cycle;
+	uint32_t    invalid_status_decay;
+	uint32_t    status_address_mask;
+	int32_t     volume_mult;
+	int32_t     volume_div;
 	ym_operator operators[NUM_OPERATORS];
 	ym_channel  channels[NUM_CHANNELS];
+	int16_t     zero_offset;
 	uint16_t    timer_a;
 	uint16_t    timer_a_load;
 	uint16_t    env_counter;
@@ -93,6 +103,7 @@
 	uint8_t     lfo_pm_step;
 	uint8_t     csm_keyon;
 	uint8_t     status;
+	uint8_t     last_status;
 	uint8_t     selected_reg;
 	uint8_t     selected_part;
 	uint8_t     part1_regs[YM_PART1_REGS];
@@ -128,12 +139,15 @@
 void ym_init(ym2612_context * context, uint32_t master_clock, uint32_t clock_div, uint32_t options);
 void ym_reset(ym2612_context *context);
 void ym_free(ym2612_context *context);
+void ym_enable_zero_offset(ym2612_context *context, uint8_t enabled);
 void ym_adjust_master_clock(ym2612_context * context, uint32_t master_clock);
+void ym_adjust_cycles(ym2612_context *context, uint32_t deduction);
 void ym_run(ym2612_context * context, uint32_t to_cycle);
 void ym_address_write_part1(ym2612_context * context, uint8_t address);
 void ym_address_write_part2(ym2612_context * context, uint8_t address);
 void ym_data_write(ym2612_context * context, uint8_t value);
-uint8_t ym_read_status(ym2612_context * context);
+void ym_vgm_log(ym2612_context *context, uint32_t master_clock, vgm_writer *vgm);
+uint8_t ym_read_status(ym2612_context * context, uint32_t cycle, uint32_t port);
 uint8_t ym_load_gst(ym2612_context * context, FILE * gstfile);
 uint8_t ym_save_gst(ym2612_context * context, FILE * gstfile);
 void ym_print_channel_info(ym2612_context *context, int channel);
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/z80.cpu	Sat Jan 15 13:15:21 2022 -0800
@@ -0,0 +1,2539 @@
+info
+	prefix z80_
+	opcode_size 8
+	extra_tables cb ed dded fded ddcb fdcb dd fd
+	body z80_run_op
+	sync_cycle z80_sync_cycle
+	interrupt z80_interrupt
+	include z80_util.c
+	header z80.h
+	
+declare
+	void init_z80_opts(z80_options * options, memmap_chunk const * chunks, uint32_t num_chunks, memmap_chunk const * io_chunks, uint32_t num_io_chunks, uint32_t clock_divider, uint32_t io_address_mask);
+	z80_context * init_z80_context(z80_options *options);
+	void z80_run(z80_context *context, uint32_t target_cycle);
+	void z80_assert_reset(z80_context * context, uint32_t cycle);
+	void z80_clear_reset(z80_context * context, uint32_t cycle);
+	void z80_assert_busreq(z80_context * context, uint32_t cycle);
+	void z80_clear_busreq(z80_context * context, uint32_t cycle);
+	void z80_assert_nmi(z80_context *context, uint32_t cycle);
+	uint8_t z80_get_busack(z80_context * context, uint32_t cycle);
+	void z80_invalidate_code_range(z80_context *context, uint32_t start, uint32_t end);
+	void z80_adjust_cycles(z80_context * context, uint32_t deduction);
+	void z80_serialize(z80_context *context, serialize_buffer *buf);
+	void z80_deserialize(deserialize_buffer *buf, void *vcontext);
+	void zinsert_breakpoint(z80_context * context, uint16_t address, uint8_t * bp_handler);
+	void zremove_breakpoint(z80_context * context, uint16_t address);
+	void z80_options_free(z80_options *opts);
+	void z80_sync_cycle(z80_context *context, uint32_t target_cycle);
+
+regs
+	main 8 b c d e h l f a
+	alt 8 b' c' d' e' h' l' f' a'
+	i 8
+	r 8
+	rhigh 8
+	iff1 8
+	iff2 8
+	imode 8
+	sp 16
+	ix 16
+	iy 16
+	pc 16
+	wz 16
+	nflag 8
+	last_flag_result 8
+	pvflag 8
+	chflags 8
+	zflag 8
+	scratch1 16
+	scratch2 16
+	busreq 8
+	busack 8
+	reset 8
+	io_map ptrmemmap_chunk
+	io_chunks 32
+	io_mask 32
+	int_cycle 32
+	int_end_cycle 32
+	int_value 8
+	nmi_cycle 32
+	system ptrvoid
+	fastread ptr8 64
+	fastwrite ptr8 64
+	mem_pointers ptr8 4
+	
+flags
+	register f
+	S 7 sign last_flag_result.7
+	Z 6 zero zflag
+	Y 5 bit-5 last_flag_result.5
+	H 4 half-carry chflags.3
+	P 2 parity pvflag
+	V 2 overflow pvflag
+	X 3 bit-3 last_flag_result.3
+	N 1 none nflag
+	C 0 carry chflags.7
+
+	
+z80_op_fetch
+	cycles 1
+	add 1 r r
+	mov pc scratch1
+	ocall read_8
+	add 1 pc pc
+	
+z80_run_op
+	#printf "Z80: %X @ %d\n" pc cycles
+	#printf "Z80: %X - A: %X, B: %X, C: %X D: %X, E: %X, H: %X, L: %X, SP: %X, IX: %X, IY: %X @ %d\n" pc a b c d e h l sp ix iy cycles
+	z80_op_fetch
+	dispatch scratch1
+
+z80_interrupt
+	cmp int_cycle cycles
+	if >=U
+	
+	mov 0 iff1
+	mov 0 iff2
+	cycles 6
+	update_sync
+	
+	switch imode
+	case 0
+	dispatch int_value
+	
+	case 1
+	dispatch 0xFF
+	
+	case 2
+	lsl i 8 pc
+	or int_value pc pc
+	#CD is call
+	dispatch 0xCD
+	end
+	
+	else
+	
+	cmp nmi_cycle cycles
+	if >=U
+	
+	mov 0xFFFFFFFF nmi_cycle
+	mov 0 iff1
+	local pch 8
+	lsr pc 8 pch
+	meta high pch
+	meta low pc
+	z80_push
+	mov 0x66 pc
+	update_sync
+	
+	end
+	end
+	
+	
+11001011 cb_prefix
+	z80_op_fetch
+	dispatch scratch1 cb
+
+11011101 dd_prefix
+	z80_op_fetch
+	dispatch scratch1 dd
+
+11101101 ed_prefix
+	z80_op_fetch
+	dispatch scratch1 ed
+
+11111101 fd_prefix
+	z80_op_fetch
+	dispatch scratch1 fd
+	
+dd 11001011 ddcb_prefix
+	z80_calc_index ix
+	cycles 2
+	mov pc scratch1
+	ocall read_8
+	add 1 pc pc
+	dispatch scratch1 ddcb
+	
+fd 11001011 fdcb_prefix
+	z80_calc_index iy
+	cycles 2
+	mov pc scratch1
+	ocall read_8
+	add 1 pc pc
+	dispatch scratch1 fdcb
+	
+z80_check_cond
+	arg cond 8
+	local invert 8
+	switch cond
+	case 0
+	meta istrue invert
+	lnot zflag invert
+	
+	case 1
+	meta istrue zflag
+	
+	case 2
+	meta istrue invert
+	not chflags invert
+	and 0x80 invert invert
+	
+	case 3
+	meta istrue invert
+	and 0x80 chflags invert
+	
+	case 4
+	meta istrue invert
+	lnot pvflag invert
+	
+	case 5
+	meta istrue pvflag
+	
+	case 6
+	meta istrue invert
+	not last_flag_result invert
+	and 0x80 invert invert
+	
+	case 7
+	meta istrue invert
+	and 0x80 last_flag_result invert
+	
+	end
+	
+z80_fetch_hl
+	lsl h 8 scratch1
+	or l scratch1 scratch1
+	ocall read_8
+	
+z80_store_hl
+	lsl h 8 scratch2
+	or l scratch2 scratch2
+	ocall write_8
+
+z80_fetch_immed
+	mov pc scratch1
+	ocall read_8
+	add 1 pc pc
+	
+z80_fetch_immed16
+	mov pc scratch1
+	ocall read_8
+	mov scratch1 wz
+	add 1 pc pc
+	mov pc scratch1
+	ocall read_8
+	add 1 pc pc
+	lsl scratch1 8 scratch1
+	or scratch1 wz wz
+
+z80_fetch_immed_reg16
+	mov pc scratch1
+	ocall read_8
+	mov scratch1 low
+	add 1 pc pc
+	mov pc scratch1
+	ocall read_8
+	mov scratch1 high
+	add 1 pc pc
+	
+z80_fetch_immed_to_reg16
+	mov pc scratch1
+	ocall read_8
+	mov scratch1 reg
+	add 1 pc pc
+	mov pc scratch1
+	ocall read_8
+	add 1 pc pc
+	lsl scratch1 8 scratch1
+	or scratch1 reg reg
+
+01RRR110 ld_from_hl
+	z80_fetch_hl
+	mov scratch1 main.R
+
+01DDDSSS ld_from_reg
+	mov main.S main.D
+	
+dd 01DDD100 ld_from_ixh
+	invalid D 6
+	lsr ix 8 main.D
+	
+dd 01100SSS ld_to_ixh
+	invalid S 6
+	local tmp 16
+	and 0xFF ix ix
+	lsl main.S 8 tmp
+	or tmp ix ix
+	
+dd 0110D10S ld_ixb_to_ixb
+
+dd 01DDD101 ld_from_ixl
+	invalid D 6
+	mov ix main.D
+	
+dd 01101SSS ld_to_ixl
+	invalid S 6
+	and 0xFF00 ix ix
+	or main.S ix ix
+	
+dd 01100101 ld_ixl_to_ixh
+	local tmp 16
+	lsl ix 8 tmp
+	and 0xFF ix ix
+	or tmp ix ix
+	
+dd 01101100 ld_ixh_to_ixl
+	local tmp 16
+	lsr ix 8 tmp
+	and 0xFF00 ix ix
+	or tmp ix ix
+	
+fd 01DDD100 ld_from_iyh
+	invalid D 6
+	lsr iy 8 main.D
+	
+fd 01100SSS ld_to_iyh
+	invalid S 6
+	local tmp 16
+	and 0xFF iy iy
+	lsl main.S 8 tmp
+	or tmp iy iy
+	
+fd 0110D10S ld_iyb_to_iyb
+
+fd 01DDD101 ld_from_iyl
+	invalid D 6
+	mov iy main.D
+	
+fd 01101SSS ld_to_iyl
+	invalid S 6
+	and 0xFF00 iy iy
+	or main.S iy iy
+	
+fd 01100101 ld_iyl_to_iyh
+	local tmp 16
+	lsl iy 8 tmp
+	and 0xFF iy iy
+	or tmp iy iy
+	
+fd 01101100 ld_iyh_to_iyl
+	local tmp 16
+	lsr iy 8 tmp
+	and 0xFF00 iy iy
+	or tmp iy iy
+	
+z80_calc_index
+	arg index 16
+	mov index wz
+	z80_fetch_immed
+	sext 16 scratch1 scratch1
+	add scratch1 wz wz
+
+z80_fetch_index
+	arg index 16
+	z80_calc_index index
+	mov wz scratch1
+	cycles 5
+	ocall read_8
+	
+z80_store_index
+	mov wz scratch2
+	ocall write_8
+	
+dd 01RRR110 ld_from_ix
+	z80_fetch_index ix
+	mov scratch1 main.R
+
+fd 01RRR110 ld_from_iy
+	z80_fetch_index iy
+	mov scratch1 main.R
+
+00RRR110 ld_immed
+	z80_fetch_immed
+	mov scratch1 main.R
+	
+dd 00100110 ld_immed_ixh
+	z80_fetch_immed
+	lsl scratch1 8 scratch1
+	and 0xFF ix ix
+	or scratch1 ix ix
+	
+dd 00101110 ld_immed_ixl
+	z80_fetch_immed
+	and 0xFF00 ix ix
+	or scratch1 ix ix
+	
+fd 00100110 ld_immed_iyh
+	z80_fetch_immed
+	lsl scratch1 8 scratch1
+	and 0xFF iy iy
+	or scratch1 iy iy
+	
+fd 00101110 ld_immed_iyl
+	z80_fetch_immed
+	and 0xFF00 iy iy
+	or scratch1 iy iy
+
+01110RRR ld_to_hl
+	mov main.R scratch1
+	z80_store_hl
+
+dd 01110RRR ld_to_ix
+	z80_calc_index ix
+	mov wz scratch2
+	mov main.R scratch1
+	cycles 5
+	ocall write_8
+
+fd 01110RRR ld_to_iy
+	z80_calc_index iy
+	mov wz scratch2
+	mov main.R scratch1
+	cycles 5
+	ocall write_8
+
+00110110 ld_to_hl_immed
+	z80_fetch_immed
+	z80_store_hl
+	
+dd 00110110 ld_to_ixd_immed
+	z80_calc_index ix
+	z80_fetch_immed
+	cycles 2
+	mov wz scratch2
+	ocall write_8
+	
+fd 00110110 ld_to_iyd_immed
+	z80_calc_index iy
+	z80_fetch_immed
+	cycles 2
+	mov wz scratch2
+	ocall write_8
+
+00001010 ld_a_from_bc
+	lsl b 8 wz
+	or c wz wz
+	mov wz scratch1
+	add 1 wz wz
+	ocall read_8
+	mov scratch1 a
+
+00011010 ld_a_from_de
+	lsl d 8 wz
+	or e wz wz
+	mov wz scratch1
+	add 1 wz wz
+	ocall read_8
+	mov scratch1 a
+
+00111010 ld_a_from_immed
+	z80_fetch_immed16
+	mov wz scratch1
+	add 1 wz wz
+	ocall read_8
+	mov scratch1 a
+	
+00000010 ld_a_to_bc
+	local tmp 8
+	lsl b 8 scratch2
+	or c scratch2 scratch2
+	mov a scratch1
+	add c 1 tmp
+	lsl a 8 wz
+	or tmp wz wz
+	ocall write_8
+	
+00010010 ld_a_to_de
+	local tmp 8
+	lsl d 8 scratch2
+	or e scratch2 scratch2
+	mov a scratch1
+	add e 1 tmp
+	lsl a 8 wz
+	or tmp wz wz
+	ocall write_8
+
+00110010 ld_a_to_immed
+	local tmp 16
+	z80_fetch_immed16
+	mov wz scratch2
+	mov a scratch1
+	add 1 wz wz
+	ocall write_8
+	and 0xFF wz wz
+	lsl a 8 tmp
+	or tmp wz wz
+
+ed 01000111 ld_i_a
+	mov a i
+	cycles 1
+
+ed 01001111 ld_r_a
+	mov a r
+	and 0x80 a rhigh
+	cycles 1
+
+ed 01011111 ld_a_r
+	cycles 1
+	and 0x7F r a
+	or rhigh a a
+	update_flags SZYH0XN0
+	mov iff2 pvflag
+	
+ed 01010111 ld_a_i
+	cycles 1
+	mov i a
+	update_flags SZYH0XN0
+	mov iff2 pvflag
+
+00000001 ld_bc_immed
+	meta high b
+	meta low c
+	z80_fetch_immed_reg16
+
+00010001 ld_de_immed
+	meta high d
+	meta low e
+	z80_fetch_immed_reg16
+
+00100001 ld_hl_immed
+	meta high h
+	meta low l
+	z80_fetch_immed_reg16
+
+00110001 ld_sp_immed
+	meta reg sp
+	z80_fetch_immed_to_reg16
+
+dd 00100001 ld_ix_immed
+	meta reg ix
+	z80_fetch_immed_to_reg16
+
+fd 00100001 ld_iy_immed
+	meta reg iy
+	z80_fetch_immed_to_reg16
+	
+z80_fetch16_from_immed
+	z80_fetch_immed16
+	mov wz scratch1
+	ocall read_8
+	mov scratch1 low
+	add 1 wz wz
+	mov wz scratch1
+	ocall read_8
+	mov scratch1 high
+	add 1 wz wz
+
+00101010 ld_hl_from_immed
+	meta low l
+	meta high h
+	z80_fetch16_from_immed
+
+ed 01001011 ld_bc_from_immed
+	meta low c
+	meta high b
+	z80_fetch16_from_immed
+
+ed 01011011 ld_de_from_immed
+	meta low e
+	meta high d
+	z80_fetch16_from_immed
+
+ed 01101011 ld_hl_from_immed_slow
+	meta low l
+	meta high h
+	z80_fetch16_from_immed
+	
+z80_fetch_reg16_from_immed
+	z80_fetch_immed16
+	mov wz scratch1
+	ocall read_8
+	mov scratch1 reg
+	add 1 wz wz
+	mov wz scratch1
+	ocall read_8
+	lsl scratch1 8 scratch1
+	or scratch1 reg reg
+	add 1 wz wz
+
+ed 01111011 ld_sp_from_immed
+	meta reg sp
+	z80_fetch_reg16_from_immed
+
+dd 00101010 ld_ix_from_immed
+	meta reg ix
+	z80_fetch_reg16_from_immed
+
+fd 00101010 ld_iy_from_immed
+	meta reg iy
+	z80_fetch_reg16_from_immed
+
+00100010 ld_hl_to_immed
+	z80_fetch_immed16
+	mov wz scratch2
+	mov l scratch1
+	ocall write_8
+	add 1 wz wz
+	mov wz scratch2
+	mov h scratch1
+	ocall write_8
+	add 1 wz wz
+	
+dd 00100010 ld_ix_to_immed
+	z80_fetch_immed16
+	mov wz scratch2
+	mov ix scratch1
+	ocall write_8
+	add 1 wz wz
+	mov wz scratch2
+	lsr ix 8 scratch1
+	ocall write_8
+	add 1 wz wz
+	
+fd 00100010 ld_iy_to_immed
+	z80_fetch_immed16
+	mov wz scratch2
+	mov iy scratch1
+	ocall write_8
+	add 1 wz wz
+	mov wz scratch2
+	lsr iy 8 scratch1
+	ocall write_8
+	add 1 wz wz
+	
+z80_regpair_to_immed
+	z80_fetch_immed16
+	mov wz scratch2
+	mov low scratch1
+	ocall write_8
+	add 1 wz wz
+	mov high scratch1
+	mov wz scratch2
+	ocall write_8
+	add 1 wz wz
+	
+ed 01000011 ld_bc_to_immed
+	meta low c
+	meta high b
+	z80_regpair_to_immed
+
+ed 01010011 ld_de_to_immed
+	meta low e
+	meta high d
+	z80_regpair_to_immed
+	
+ed 01100011 ld_hl_to_immed_slow
+	meta low l
+	meta high h
+	z80_regpair_to_immed
+	
+ed 01110011 ld_sp_to_immed
+	meta low sp
+	local sph 8
+	lsr sp 8 sph
+	meta high sph
+	z80_regpair_to_immed
+
+11111001 ld_sp_hl
+	cycles 2
+	lsl h 8 sp
+	or l sp sp
+	
+dd 11111001 ld_sp_ix
+	cycles 2
+	mov ix sp
+
+fd 11111001 ld_sp_iy
+	cycles 2
+	mov iy sp
+
+z80_push
+	cycles 1
+	sub 1 sp sp
+	mov sp scratch2
+	mov high scratch1
+	ocall write_8
+	sub 1 sp sp
+	mov sp scratch2
+	mov low scratch1
+	ocall write_8
+
+11000101 push_bc
+	meta high b
+	meta low c
+	z80_push
+
+11010101 push_de
+	meta high d
+	meta low e
+	z80_push
+
+11100101 push_hl
+	meta high h
+	meta low l
+	z80_push
+
+11110101 push_af
+	meta high a
+	meta low f
+	z80_push
+	
+dd 11100101 push_ix
+	local ixh 8
+	lsr ix 8 ixh
+	meta high ixh
+	meta low ix
+	z80_push
+
+fd 11100101 push_iy
+	local iyh 8
+	lsr iy 8 iyh
+	meta high iyh
+	meta low iy
+	z80_push
+
+z80_pop
+	mov sp scratch1
+	ocall read_8
+	add 1 sp sp
+	mov scratch1 low
+	mov sp scratch1
+	ocall read_8
+	add 1 sp sp
+	mov scratch1 high
+
+11000001 pop_bc
+	meta high b
+	meta low c
+	z80_pop
+
+11010001 pop_de
+	meta high d
+	meta low e
+	z80_pop
+
+11100001 pop_hl
+	meta high h
+	meta low l
+	z80_pop
+
+11110001 pop_af
+	meta high a
+	meta low f
+	z80_pop
+
+dd 11100001 pop_ix
+	local ixh 16
+	meta high ixh
+	meta low ix
+	z80_pop
+	lsl ixh 8 ixh
+	or ixh ix ix
+
+fd 11100001 pop_iy
+	local iyh 16
+	meta high iyh
+	meta low iy
+	z80_pop
+	lsl iyh 8 iyh
+	or iyh iy iy
+
+11101011 ex_de_hl
+	xchg e l
+	xchg d h
+
+00001000 ex_af_af
+	xchg a a'
+	xchg f f'
+
+11011001 exx
+	xchg b b'
+	xchg c c'
+	xchg d d'
+	xchg e e'
+	xchg h h'
+	xchg l l'
+
+11100011 ex_sp_hl
+	mov sp scratch1
+	ocall read_8
+	xchg l scratch1
+	cycles 1
+	mov sp scratch2
+	ocall write_8
+	add 1 sp scratch1
+	ocall read_8
+	xchg h scratch1
+	cycles 2
+	add 1 sp scratch2
+	ocall write_8
+	lsl h 8 wz
+	or l wz wz
+	
+dd 11100011 ex_sp_ix
+	mov sp scratch1
+	ocall read_8
+	mov scratch1 wz
+	mov ix scratch1
+	cycles 1
+	mov sp scratch2
+	ocall write_8
+	add 1 sp scratch1
+	ocall read_8
+	lsl scratch1 8 scratch1
+	or scratch1 wz wz
+	lsr ix 8 scratch1
+	cycles 2
+	add 1 sp scratch2
+	ocall write_8
+	mov wz ix
+	
+fd 11100011 ex_sp_iy
+	mov sp scratch1
+	ocall read_8
+	mov scratch1 wz
+	mov iy scratch1
+	cycles 1
+	mov sp scratch2
+	ocall write_8
+	add 1 sp scratch1
+	ocall read_8
+	lsl scratch1 8 scratch1
+	or scratch1 wz wz
+	lsr iy 8 scratch1
+	cycles 2
+	add 1 sp scratch2
+	ocall write_8
+	mov wz iy
+
+10000RRR add_reg
+	add a main.R a
+	update_flags SZYHVXN0C
+	
+dd 10000100 add_ixh
+	lsr ix 8 scratch1
+	add a scratch1 a
+	update_flags SZYHVXN0C
+	
+dd 10000101 add_ixl
+	and ix 0xFF scratch1
+	add a scratch1 a
+	update_flags SZYHVXN0C
+	
+fd 10000100 add_iyh
+	lsr iy 8 scratch1
+	add a scratch1 a
+	update_flags SZYHVXN0C
+	
+fd 10000101 add_iyl
+	and iy 0xFF scratch1
+	add a scratch1 a
+	update_flags SZYHVXN0C
+	
+10000110 add_hl
+	z80_fetch_hl
+	add a scratch1 a
+	update_flags SZYHVXN0C
+	
+dd 10000110 add_ixd
+	z80_fetch_index ix
+	add a scratch1 a
+	update_flags SZYHVXN0C
+	
+fd 10000110 add_iyd
+	z80_fetch_index iy
+	add a scratch1 a
+	update_flags SZYHVXN0C
+
+11000110 add_immed
+	z80_fetch_immed
+	add a scratch1 a
+	update_flags SZYHVXN0C
+	
+z80_add16_hl
+	arg src 16
+	lsl h 8 hlt
+	or l hlt hlt
+	add 1 hlt wz
+	add src hlt hlt
+	update_flags YHXN0C
+	mov hlt l
+	lsr hlt 8 h
+	cycles 7
+	
+00001001 add_hl_bc
+	local hlw 16
+	local bcw 16
+	meta hlt hlw
+	lsl b 8 bcw
+	or c bcw bcw
+	z80_add16_hl bcw
+	
+00011001 add_hl_de
+	local hlw 16
+	local dew 16
+	meta hlt hlw
+	lsl d 8 dew
+	or e dew dew
+	z80_add16_hl dew
+	
+00101001 add_hl_hl
+	local hlw 16
+	meta hlt hlw
+	z80_add16_hl hlw
+	
+00111001 add_hl_sp
+	local hlw 16
+	meta hlt hlw
+	z80_add16_hl sp
+	
+dd 00001001 add_ix_bc
+	lsl b 8 scratch1
+	or c scratch1 scratch1
+	add scratch1 ix ix
+	update_flags YHXN0C
+	cycles 7
+	
+dd 00011001 add_ix_de
+	lsl d 8 scratch1
+	or e scratch1 scratch1
+	add scratch1 ix ix
+	update_flags YHXN0C
+	cycles 7
+	
+dd 00101001 add_ix_ix
+	add ix ix ix
+	update_flags YHXN0C
+	cycles 7
+	
+dd 00111001 add_ix_sp
+	add sp ix ix
+	update_flags YHXN0C
+	cycles 7
+	
+fd 00001001 add_iy_bc
+	lsl b 8 scratch1
+	or c scratch1 scratch1
+	add scratch1 iy iy
+	update_flags YHXN0C
+	cycles 7
+	
+fd 00011001 add_iy_de
+	lsl d 8 scratch1
+	or e scratch1 scratch1
+	add scratch1 iy iy
+	update_flags YHXN0C
+	cycles 7
+	
+fd 00101001 add_iy_iy
+	add iy iy iy
+	update_flags YHXN0C
+	cycles 7
+	
+fd 00111001 add_iy_sp
+	add sp iy iy
+	update_flags YHXN0C
+	cycles 7
+	
+10001RRR adc_reg
+	adc a main.R a
+	update_flags SZYHVXN0C
+	
+dd 10001100 adc_ixh
+	lsr ix 8 scratch1
+	adc a scratch1 a
+	update_flags SZYHVXN0C
+	
+dd 10001101 adc_ixl
+	and ix 0xFF scratch1
+	adc a scratch1 a
+	update_flags SZYHVXN0C
+	
+fd 10001100 adc_iyh
+	lsr iy 8 scratch1
+	adc a scratch1 a
+	update_flags SZYHVXN0C
+	
+fd 10001101 adc_iyl
+	and iy 0xFF scratch1
+	adc a scratch1 a
+	update_flags SZYHVXN0C
+
+10001110 adc_hl
+	z80_fetch_hl
+	adc a scratch1 a
+	update_flags SZYHVXN0C
+	
+dd 10001110 adc_ixd
+	z80_fetch_index ix
+	adc a scratch1 a
+	update_flags SZYHVXN0C
+	
+fd 10001110 adc_iyd
+	z80_fetch_index iy
+	adc a scratch1 a
+	update_flags SZYHVXN0C
+
+11001110 adc_immed
+	z80_fetch_immed
+	adc a scratch1 a
+	update_flags SZYHVXN0C
+	
+z80_adc16_hl
+	arg src 16
+	lsl h 8 hlt
+	or l hlt hlt
+	add 1 hlt wz
+	adc src hlt hlt
+	update_flags SZYHVXN0C
+	mov hlt l
+	lsr hlt 8 h
+	cycles 7
+	
+ed 01001010 adc_hl_bc
+	local hlw 16
+	local bcw 16
+	meta hlt hlw
+	lsl b 8 bcw
+	or c bcw bcw
+	z80_adc16_hl bcw
+	
+ed 01011010 adc_hl_de
+	local hlw 16
+	local dew 16
+	meta hlt hlw
+	lsl d 8 dew
+	or e dew dew
+	z80_adc16_hl dew
+	
+ed 01101010 adc_hl_hl
+	local hlw 16
+	meta hlt hlw
+	z80_adc16_hl hlw
+
+	
+ed 01111010 adc_hl_sp
+	local hlw 16
+	meta hlt hlw
+	z80_adc16_hl sp
+
+10010RRR sub_reg
+	sub main.R a a
+	update_flags SZYHVXN1C
+	
+dd 10010100 sub_ixh
+	lsr ix 8 scratch1
+	sub scratch1 a a
+	update_flags SZYHVXN1C
+	
+dd 10010101 sub_ixl
+	and ix 0xFF scratch1
+	sub scratch1 a a
+	update_flags SZYHVXN1C
+	
+fd 10010100 sub_iyh
+	lsr iy 8 scratch1
+	sub scratch1 a a
+	update_flags SZYHVXN1C
+	
+fd 10010101 sub_iyl
+	and iy 0xFF scratch1
+	sub scratch1 a a
+	update_flags SZYHVXN1C
+	
+10010110 sub_hl
+	z80_fetch_hl
+	sub scratch1 a a
+	update_flags SZYHVXN1C
+	
+dd 10010110 sub_ixd
+	z80_fetch_index ix
+	sub scratch1 a a
+	update_flags SZYHVXN1C
+
+fd 10010110 sub_iyd
+	z80_fetch_index iy
+	sub scratch1 a a
+	update_flags SZYHVXN1C
+
+11010110 sub_immed
+	z80_fetch_immed
+	sub scratch1 a a
+	update_flags SZYHVXN1C
+
+10011RRR sbc_reg
+	sbc main.R a a
+	update_flags SZYHVXN1C
+	
+dd 10011100 sbc_ixh
+	lsr ix 8 scratch1
+	sbc scratch1 a a
+	update_flags SZYHVXN1C
+	
+dd 10011101 sbc_ixl
+	and ix 0xFF scratch1
+	sbc scratch1 a a
+	update_flags SZYHVXN1C
+	
+fd 10011100 sbc_iyh
+	lsr iy 8 scratch1
+	sbc scratch1 a a
+	update_flags SZYHVXN1C
+	
+fd 10011101 sbc_iyl
+	and iy 0xFF scratch1
+	sbc scratch1 a a
+	update_flags SZYHVXN1C
+	
+	
+10011110 sbc_hl
+	z80_fetch_hl
+	sbc scratch1 a a
+	update_flags SZYHVXN1C
+	
+dd 10011110 sbc_ixd
+	z80_fetch_index ix
+	sbc scratch1 a a
+	update_flags SZYHVXN1C
+
+fd 10011110 sbc_iyd
+	z80_fetch_index iy
+	sbc scratch1 a a
+	update_flags SZYHVXN1C
+
+11011110 sbc_immed
+	z80_fetch_immed
+	sbc scratch1 a a
+	update_flags SZYHVXN1C
+	
+z80_sbc16_hl
+	arg src 16
+	lsl h 8 hlt
+	or l hlt hlt
+	add 1 hlt wz
+	sbc src hlt hlt
+	update_flags SZYHVXN1C
+	mov hlt l
+	lsr hlt 8 h
+	cycles 7
+	
+ed 01000010 sbc_hl_bc
+	local hlw 16
+	local bcw 16
+	meta hlt hlw
+	lsl b 8 bcw
+	or c bcw bcw
+	z80_sbc16_hl bcw
+	
+ed 01010010 sbc_hl_de
+	local hlw 16
+	local dew 16
+	meta hlt hlw
+	lsl d 8 dew
+	or e dew dew
+	z80_sbc16_hl dew
+	
+ed 01100010 sbc_hl_hl
+	local hlw 16
+	meta hlt hlw
+	z80_sbc16_hl hlw
+
+	
+ed 01110010 sbc_hl_sp
+	local hlw 16
+	meta hlt hlw
+	z80_sbc16_hl sp
+
+10100RRR and_reg
+	and a main.R a
+	update_flags SZYH1PXN0C0
+	
+dd 10100100 and_ixh
+	lsr ix 8 scratch1
+	and scratch1 a a
+	update_flags SZYH1PXN0C0
+	
+dd 10100101 and_ixl
+	and ix a a
+	update_flags SZYH1PXN0C0
+	
+fd 10100100 and_iyh
+	lsr iy 8 scratch1
+	and scratch1 a a
+	update_flags SZYH1PXN0C0
+	
+fd 10100101 and_iyl
+	and iy a a
+	update_flags SZYH1PXN0C0
+	
+10100110 and_hl
+	z80_fetch_hl
+	and a scratch1 a
+	update_flags SZYH1PXN0C0
+	
+dd 10100110 and_ixd
+	z80_fetch_index ix
+	and a scratch1 a
+	update_flags SZYH1PXN0C0
+	
+fd 10100110 and_iyd
+	z80_fetch_index iy
+	and a scratch1 a
+	update_flags SZYH1PXN0C0
+
+11100110 and_immed
+	z80_fetch_immed
+	and a scratch1 a
+	update_flags SZYH1PXN0C0
+	
+10110RRR or_reg
+	or a main.R a
+	update_flags SZYH0PXN0C0
+	
+dd 10110100 or_ixh
+	lsr ix 8 scratch1
+	or scratch1 a a
+	update_flags SZYH0PXN0C0
+	
+dd 10110101 or_ixl
+	or ix a a
+	update_flags SZYH0PXN0C0
+	
+fd 10110100 or_iyh
+	lsr iy 8 scratch1
+	or scratch1 a a
+	update_flags SZYH0PXN0C0
+	
+fd 10110101 or_iyl
+	or iy a a
+	update_flags SZYH0PXN0C0
+	
+10110110 or_hl
+	z80_fetch_hl
+	or a scratch1 a
+	update_flags SZYH0PXN0C0
+	
+dd 10110110 or_ixd
+	z80_fetch_index ix
+	or a scratch1 a
+	update_flags SZYH0PXN0C0
+	
+fd 10110110 or_iyd
+	z80_fetch_index iy
+	or a scratch1 a
+	update_flags SZYH0PXN0C0
+
+11110110 or_immed
+	z80_fetch_immed
+	or a scratch1 a
+	update_flags SZYH0PXN0C0
+	
+10101RRR xor_reg
+	xor a main.R a
+	update_flags SZYH0PXN0C0
+	
+dd 10101100 xor_ixh
+	lsr ix 8 scratch1
+	xor scratch1 a a
+	update_flags SZYH0PXN0C0
+	
+dd 10101101 xor_ixl
+	xor ix a a
+	update_flags SZYH0PXN0C0
+	
+fd 10101100 xor_iyh
+	lsr iy 8 scratch1
+	xor scratch1 a a
+	update_flags SZYH0PXN0C0
+	
+fd 10101101 xor_iyl
+	xor iy a a
+	update_flags SZYH0PXN0C0
+	
+10101110 xor_hl
+	z80_fetch_hl
+	xor a scratch1 a
+	update_flags SZYH0PXN0C0
+	
+dd 10101110 xor_ixd
+	z80_fetch_index ix
+	xor a scratch1 a
+	update_flags SZYH0PXN0C0
+	
+fd 10101110 xor_iyd
+	z80_fetch_index iy
+	xor a scratch1 a
+	update_flags SZYH0PXN0C0
+
+11101110 xor_immed
+	z80_fetch_immed
+	xor a scratch1 a
+	update_flags SZYH0PXN0C0
+
+10111RRR cp_reg
+	mov main.R last_flag_result
+	cmp main.R a
+	update_flags SZHVN1C
+	
+dd 10111100 cp_ixh
+	local tmp 8
+	lsr ix 8 tmp
+	mov tmp last_flag_result
+	cmp tmp a
+	update_flags SZHVN1C
+	
+dd 10111101 cp_ixl
+	local tmp 8
+	mov ix tmp
+	mov ix last_flag_result
+	cmp tmp a
+	update_flags SZHVN1C
+	
+fd 10111100 cp_iyh
+	local tmp 8
+	lsr iy 8 tmp
+	mov tmp last_flag_result
+	cmp tmp a
+	update_flags SZHVN1C
+	
+fd 10111101 cp_iyl
+	local tmp 8
+	mov iy tmp
+	mov iy last_flag_result
+	cmp tmp a
+	update_flags SZHVN1C
+	
+10111110 cp_hl
+	local tmp 8
+	z80_fetch_hl
+	mov scratch1 tmp
+	mov scratch1 last_flag_result
+	cmp tmp a
+	update_flags SZHVN1C
+	
+dd 10111110 cp_ixd
+	local tmp 8
+	z80_fetch_index ix
+	mov scratch1 tmp
+	mov scratch1 last_flag_result
+	cmp tmp a
+	update_flags SZHVN1C
+	
+fd 10111110 cp_iyd
+	local tmp 8
+	z80_fetch_index iy
+	mov scratch1 tmp
+	mov scratch1 last_flag_result
+	cmp tmp a
+	update_flags SZHVN1C
+
+11111110 cp_immed
+	local tmp 8
+	z80_fetch_immed
+	mov scratch1 tmp
+	mov scratch1 last_flag_result
+	cmp tmp a
+	update_flags SZHVN1C
+
+00RRR100 inc_reg
+	add 1 main.R main.R
+	update_flags SZYHVXN0
+	
+dd 00100100 inc_ixh
+	add 0x100 ix ix
+	update_flags SZYHVXN0
+	
+dd 00101100 inc_ixl
+	local tmp 8
+	mov ix tmp
+	add 1 tmp tmp
+	update_flags SZYHVXN0
+	and 0xFF00 ix ix
+	or tmp ix ix
+	
+fd 00100100 inc_iyh
+	add 0x100 iy iy
+	update_flags SZYHVXN0
+	
+fd 00101100 inc_iyl
+	local tmp 8
+	mov iy tmp
+	add 1 tmp tmp
+	update_flags SZYHVXN0
+	and 0xFF00 iy iy
+	or tmp iy iy
+	
+00110100 inc_hl
+	local tmp 8
+	z80_fetch_hl
+	#TODO: Either make DSL compiler smart enough to optimize these unnecessary moves out
+	#or add some syntax to force a certain size on an operation so they are unnecessary
+	mov scratch1 tmp
+	add 1 tmp tmp
+	update_flags SZYHVXN0
+	mov tmp scratch1
+	cycles 1
+	z80_store_hl
+	
+dd 00110100 inc_ixd
+	local tmp 8
+	z80_fetch_index ix
+	#TODO: Either make DSL compiler smart enough to optimize these unnecessary moves out
+	#or add some syntax to force a certain size on an operation so they are unnecessary
+	mov scratch1 tmp
+	add 1 tmp tmp
+	update_flags SZYHVXN0
+	mov tmp scratch1
+	cycles 1
+	z80_store_index
+
+fd 00110100 inc_iyd
+	local tmp 8
+	z80_fetch_index iy
+	#TODO: Either make DSL compiler smart enough to optimize these unnecessary moves out
+	#or add some syntax to force a certain size on an operation so they are unnecessary
+	mov scratch1 tmp
+	add 1 tmp tmp
+	update_flags SZYHVXN0
+	mov tmp scratch1
+	cycles 1
+	z80_store_index
+	
+z80_inc_pair
+	arg high 8
+	arg low 8
+	cycles 2
+	local word 16
+	lsl high 8 word
+	or low word word
+	add 1 word word
+	mov word low
+	lsr word 8 high
+	
+00000011 inc_bc
+	z80_inc_pair b c
+	
+00010011 inc_de
+	z80_inc_pair d e
+	
+00100011 inc16_hl
+	z80_inc_pair h l
+
+00110011 inc_sp
+	add 1 sp sp
+	
+dd 00100011 inc_ix
+	add 1 ix ix
+
+fd 00100011 inc_iy
+	add 1 iy iy
+
+00RRR101 dec_reg
+	sub 1 main.R main.R
+	update_flags SZYHVXN1
+	
+dd 00100101 dec_ixh
+	sub 0x100 ix ix
+	update_flags SZYHVXN1
+	
+dd 00101101 dec_ixl
+	local tmp 8
+	mov ix tmp
+	sub 1 tmp tmp
+	update_flags SZYHVXN1
+	and 0xFF00 ix ix
+	or tmp ix ix
+	
+fd 00100101 dec_iyh
+	sub 0x100 iy iy
+	update_flags SZYHVXN1
+	
+fd 00101101 dec_iyl
+	local tmp 8
+	mov iy tmp
+	sub 1 tmp tmp
+	update_flags SZYHVXN1
+	and 0xFF00 iy iy
+	or tmp iy iy
+	
+00110101 dec_hl
+	local tmp 8
+	z80_fetch_hl
+	#TODO: Either make DSL compiler smart enough to optimize these unnecessary moves out
+	#or add some syntax to force a certain size on an operation so they are unnecessary
+	mov scratch1 tmp
+	sub 1 tmp tmp
+	update_flags SZYHVXN1
+	mov tmp scratch1
+	cycles 1
+	z80_store_hl
+	
+dd 00110101 dec_ixd
+	local tmp 8
+	z80_fetch_index ix
+	#TODO: Either make DSL compiler smart enough to optimize these unnecessary moves out
+	#or add some syntax to force a certain size on an operation so they are unnecessary
+	mov scratch1 tmp
+	sub 1 tmp tmp
+	update_flags SZYHVXN1
+	mov tmp scratch1
+	cycles 1
+	z80_store_index
+
+fd 00110101 dec_iyd
+	local tmp 8
+	z80_fetch_index iy
+	#TODO: Either make DSL compiler smart enough to optimize these unnecessary moves out
+	#or add some syntax to force a certain size on an operation so they are unnecessary
+	mov scratch1 tmp
+	sub 1 tmp tmp
+	update_flags SZYHVXN1
+	mov tmp scratch1
+	cycles 1
+	z80_store_index
+	
+z80_dec_pair
+	arg high 8
+	arg low 8
+	local word 16
+	lsl high 8 word
+	or low word word
+	sub 1 word word
+	mov word low
+	lsr word 8 high
+	cycles 2
+	
+00001011 dec_bc
+	z80_dec_pair b c
+	
+00011011 dec_de
+	z80_dec_pair d e
+	
+00101011 dec16_hl
+	z80_dec_pair h l
+
+00111011 dec_sp
+	sub 1 sp sp
+	
+dd 00101011 dec_ix
+	sub 1 ix ix
+
+fd 00101011 dec_iy
+	sub 1 iy iy
+
+00101111 cpl
+	not a a
+	update_flags YH1XN1
+
+ed 01DDD100 neg
+	neg a a
+	update_flags SZYHVXN1C
+
+00111111 ccf
+	local tmp 8
+	and 0x80 last_flag_result last_flag_result
+	and 0x7F a tmp
+	or tmp last_flag_result last_flag_result
+	and 0x80 chflags chflags
+	lsr chflags 4 tmp
+	or tmp chflags chflags
+	xor 0x80 chflags chflags
+	update_flags N0
+
+00110111 scf
+	local tmp 8
+	and 0x80 last_flag_result last_flag_result
+	and 0x7F a tmp
+	or tmp last_flag_result last_flag_result
+	update_flags H0N0C1
+
+00000000 nop
+
+01110110 halt
+	cmp nmi_cycle cycles
+	if >=U
+	
+	else
+	cmp int_cycle cycles
+	if >=U
+	
+	if iff1
+	else
+	sub 1 pc pc
+	end
+	
+	else
+	sub 1 pc pc
+	end
+	end
+
+11110011 di
+	mov 0 iff1
+	mov 0 iff2
+	update_sync
+
+11111011 ei
+	mov 1 iff1
+	mov 1 iff2
+	update_sync
+	cmp int_cycle cycles
+	if >=U
+	
+	add 1 cycles int_cycle
+	
+	end
+
+ed 01D00110 im0
+	mov 0 imode
+
+ed 01D10110 im1
+	mov 1 imode
+
+ed 01D11110 im2
+	mov 2 imode
+	
+ed 01D01110 im3
+	#some sources call this mode 0/1, but unclear
+	#if the behavior is really different from im 0
+	mov 0 imode
+	
+11000011 jp
+	z80_fetch_immed16
+	mov wz pc
+	
+11101001 jp_hl
+	lsl h 8 pc
+	or l pc pc
+	
+dd 11101001 jp_ix
+	mov ix pc
+	
+fd 11101001 jp_iy
+	mov iy pc
+	
+11CCC010 jp_cc
+	z80_check_cond C
+	z80_fetch_immed16
+	if istrue
+	
+	mov wz pc
+	
+	end
+	
+00011000 jr
+	z80_fetch_immed
+	#TODO: determine if this updates wz
+	sext 16 scratch1 scratch1
+	add scratch1 pc pc
+	cycles 5
+	
+001CC000 jr_cc
+	z80_check_cond C
+	z80_fetch_immed
+	
+	if istrue
+	
+	sext 16 scratch1 scratch1
+	add scratch1 pc pc
+	cycles 5
+	
+	end
+	
+00010000 djnz
+	cycles 1
+	z80_fetch_immed
+	sub 1 b b
+	
+	if b
+	
+	sext 16 scratch1 scratch1
+	add scratch1 pc pc
+	cycles 5
+	
+	end
+	
+
+11001101 call_uncond
+	z80_fetch_immed16
+	local pch 8
+	lsr pc 8 pch
+	meta high pch
+	meta low pc
+	z80_push
+	mov wz pc
+	
+11CCC100 call_cond
+	local pch 8
+	z80_fetch_immed16
+	z80_check_cond C
+	
+	if istrue
+	
+	lsr pc 8 pch
+	meta high pch
+	meta low pc
+	z80_push
+	mov wz pc
+	
+	end
+	
+11TTT111 rst
+	local pch 8
+	lsr pc 8 pch
+	meta high pch
+	meta low pc
+	z80_push
+	lsl T 3 scratch1
+	mov scratch1 pc
+
+11001001 ret
+	local pch 16
+	meta high pch
+	meta low pc
+	z80_pop
+	lsl pch 8 pch
+	or pch pc pc
+	
+ed 01001101 reti
+	local pch 16
+	cycles 1
+	meta high pch
+	meta low pc
+	z80_pop
+	lsl pch 8 pch
+	or pch pc pc
+	
+ed 01NN1101 retn
+	mov iff2 iff1
+	local pch 16
+	cycles 1
+	meta high pch
+	meta low pc
+	z80_pop
+	lsl pch 8 pch
+	or pch pc pc
+	
+11CCC000 ret_cond
+	local pch 16
+	cycles 1
+	z80_check_cond C
+	if istrue
+	
+	meta high pch
+	meta low pc
+	z80_pop
+	lsl pch 8 pch
+	or pch pc pc
+	
+	end
+
+11011011 in_abs
+	z80_fetch_immed
+	ocall io_read8
+	mov scratch1 a
+	
+ed 01RRR000 in_bc
+	lsl b 8 scratch1
+	or c scratch1 scratch1
+	ocall io_read8
+	mov scratch1 main.R
+	
+z80_ini_ind
+	arg change 16
+	local tmp 8
+	cycles 1
+	
+	lsl 8 b wz
+	or c wz wz
+	add change wz wz
+	
+	sub 1 b b
+	update_flags SZYX
+	
+	lsl b 8 scratch1
+	or c scratch1 scratch1
+	ocall io_read8
+	
+	and 0x80 scratch1 nflag
+	
+	mov wz tmp
+	add tmp scratch1 tmp
+	update_flags C
+	
+	z80_store_hl
+	
+	lsl h 8 scratch2
+	or l scratch2 scratch2
+	add change scratch2 scratch2
+	mov scratch2 l
+	lsr scratch2 8 h
+	
+	and 7 tmp tmp
+	xor b tmp tmp
+	update_flags P
+	lsr chflags 4 tmp
+	or tmp chflags chflags
+	
+ed 10100010 ini
+	z80_ini_ind 1
+	
+ed 10110010 inir
+	z80_ini_ind 1
+	if zflag
+	else
+	sub 2 pc pc
+	cycles 5
+	end
+	
+ed 10101010 ind
+	z80_ini_ind -1
+	
+ed 10111010 indr
+	z80_ini_ind -1
+	if zflag
+	else
+	sub 2 pc pc
+	cycles 5
+	end
+	
+11010011 out_abs
+	z80_fetch_immed
+	mov scratch1 scratch2
+	mov a scratch1
+	ocall io_write8
+	
+ed 01RRR001 out_bc
+	lsl b 8 scratch2
+	or c scratch2 scratch2
+	mov main.R scratch1
+	ocall io_write8
+	
+z80_outi_outd
+	arg change 16
+	local tmp 8
+	cycles 1
+	z80_fetch_hl
+	
+	and 0x80 scratch1 nflag
+	
+	lsl h 8 scratch2
+	or l scratch2 scratch2
+	add change scratch2 scratch2
+	mov scratch2 l
+	lsr scratch2 8 h
+	
+	add l scratch1 tmp
+	update_flags C
+	and 7 tmp tmp
+	
+	lsl b 8 scratch2
+	or c scratch2 scratch2
+	ocall io_write8
+	
+	sub 1 b b
+	update_flags SZYX
+	
+	lsl 8 b wz
+	or c wz wz
+	add change wz wz
+	
+	xor b tmp tmp
+	update_flags P
+	lsr chflags 4 tmp
+	or tmp chflags chflags
+	
+ed 10100011 outi
+	z80_outi_outd 1
+
+ed 10110011 otir
+	z80_outi_outd 1
+	if zflag
+	else
+	sub 2 pc pc
+	cycles 5
+	end
+	
+ed 10101011 outd
+	z80_outi_outd -1
+	
+ed 10111011 otdr
+	z80_outi_outd -1
+	if zflag
+	else
+	sub 2 pc pc
+	cycles 5
+	end
+	
+00000111 rlca
+	rol a 1 a
+	update_flags YH0XN0C
+	
+00010111 rla
+	rlc a 1 a
+	update_flags YH0XN0C
+	
+00001111 rrca
+	ror a 1 a
+	update_flags YH0XN0C
+
+00011111 rra
+	rrc a 1 a
+	update_flags YH0XN0C
+	
+cb 00000RRR rlc
+	rol main.R 1 main.R
+	update_flags SZYH0PXN0C
+	
+cb 00000110 rlc_hl
+	local tmp 8
+	z80_fetch_hl
+	mov scratch1 tmp
+	rol tmp 1 tmp
+	update_flags SZYH0PXN0C
+	mov tmp scratch1
+	z80_store_hl
+	
+z80_rlc_index
+	arg tmp 8
+	mov wz scratch1
+	ocall read_8
+	cycles 1
+	mov scratch1 tmp
+	rol tmp 1 tmp
+	update_flags SZYH0PXN0C
+	mov tmp scratch1
+	z80_store_index
+	
+ddcb 00000110 rlc_ixd
+	local tmp 8
+	z80_rlc_index tmp
+	
+ddcb 00000RRR rlc_ixd_reg
+	z80_rlc_index main.R
+	
+fdcb 00000110 rlc_iyd
+	local tmp 8
+	z80_rlc_index tmp
+	
+fdcb 00000RRR rlc_iyd_reg
+	z80_rlc_index main.R
+	
+cb 00010RRR rl
+	rlc main.R 1 main.R
+	update_flags SZYH0PXN0C
+
+cb 00010110 rl_hl
+	local tmp 8
+	z80_fetch_hl
+	mov scratch1 tmp
+	rlc tmp 1 tmp
+	update_flags SZYH0PXN0C
+	mov tmp scratch1
+	z80_store_hl
+	
+z80_rl_index
+	arg tmp 8
+	mov wz scratch1
+	ocall read_8
+	cycles 1
+	mov scratch1 tmp
+	rlc tmp 1 tmp
+	update_flags SZYH0PXN0C
+	mov tmp scratch1
+	z80_store_index
+	
+ddcb 00010110 rl_ixd
+	local tmp 8
+	z80_rl_index tmp
+	
+fdcb 00010110 rl_iyd
+	local tmp 8
+	z80_rl_index tmp
+	
+	
+ddcb 00010RRR rl_ixd_reg
+	z80_rl_index main.R
+	
+fdcb 00010RRR rl_iyd_reg
+	z80_rl_index main.R
+	
+cb 00001RRR rrc
+	ror main.R 1 main.R
+	update_flags SZYH0PXN0C
+	
+cb 00001110 rrc_hl
+	local tmp 8
+	z80_fetch_hl
+	mov scratch1 tmp
+	ror tmp 1 tmp
+	update_flags SZYH0PXN0C
+	mov tmp scratch1
+	z80_store_hl
+	
+z80_rrc_index
+	arg tmp 8
+	mov wz scratch1
+	ocall read_8
+	cycles 1
+	mov scratch1 tmp
+	ror tmp 1 tmp
+	update_flags SZYH0PXN0C
+	mov tmp scratch1
+	z80_store_index
+	
+ddcb 00001110 rrc_ixd
+	local tmp 8
+	z80_rrc_index tmp
+	
+ddcb 00001RRR rrc_ixd_reg
+	z80_rrc_index main.R
+	
+fdcb 00001110 rrc_iyd
+	local tmp 8
+	z80_rrc_index tmp
+	
+fdcb 00001RRR rrc_iyd_reg
+	z80_rrc_index main.R
+	
+cb 00011RRR rr
+	rrc main.R 1 main.R
+	update_flags SZYH0PXN0C
+	
+cb 00011110 rr_hl
+	local tmp 8
+	z80_fetch_hl
+	mov scratch1 tmp
+	rrc tmp 1 tmp
+	update_flags SZYH0PXN0C
+	mov tmp scratch1
+	z80_store_hl
+	
+z80_rr_index
+	arg tmp 8
+	mov wz scratch1
+	ocall read_8
+	cycles 1
+	mov scratch1 tmp
+	rrc tmp 1 tmp
+	update_flags SZYH0PXN0C
+	mov tmp scratch1
+	z80_store_index
+	
+ddcb 00011110 rr_ixd
+	local tmp 8
+	z80_rr_index tmp
+	
+ddcb 00011RRR rr_ixd_reg
+	z80_rr_index main.R
+	
+fdcb 00011110 rr_iyd
+	local tmp 8
+	z80_rr_index tmp
+	
+fdcb 00011RRR rr_iyd_reg
+	z80_rr_index main.R
+	
+cb 00100RRR sla
+	lsl main.R 1 main.R
+	update_flags SZYH0PXN0C
+	
+cb 00100110 sla_hl
+	local tmp 8
+	z80_fetch_hl
+	mov scratch1 tmp
+	lsl tmp 1 tmp
+	update_flags SZYH0PXN0C
+	mov tmp scratch1
+	z80_store_hl
+	
+z80_sla_index
+	arg tmp 8
+	mov wz scratch1
+	ocall read_8
+	cycles 1
+	mov scratch1 tmp
+	lsl tmp 1 tmp
+	update_flags SZYH0PXN0C
+	mov tmp scratch1
+	z80_store_index
+	
+ddcb 00100110 sla_ixd
+	local tmp 8
+	z80_sla_index tmp
+	
+ddcb 00100RRR sla_ixd_reg
+	z80_sla_index main.R
+	
+fdcb 00100110 sla_iyd
+	local tmp 8
+	z80_sla_index tmp
+	
+fdcb 00100RRR sla_iyd_reg
+	z80_sla_index main.R
+	
+cb 00101RRR sra
+	asr main.R 1 main.R
+	update_flags SZYH0PXN0C
+	
+cb 00101110 sra_hl
+	local tmp 8
+	z80_fetch_hl
+	mov scratch1 tmp
+	asr tmp 1 tmp
+	update_flags SZYH0PXN0C
+	mov tmp scratch1
+	z80_store_hl
+	
+z80_sra_index
+	arg tmp 8
+	mov wz scratch1
+	ocall read_8
+	cycles 1
+	mov scratch1 tmp
+	asr tmp 1 tmp
+	update_flags SZYH0PXN0C
+	mov tmp scratch1
+	z80_store_index
+	
+ddcb 00101110 sra_ixd
+	local tmp 8
+	z80_sra_index tmp
+	
+ddcb 00101RRR sra_ixd_reg
+	z80_sra_index main.R
+	
+fdcb 00101110 sra_iyd
+	local tmp 8
+	z80_sra_index tmp
+	
+fdcb 00101RRR sra_iyd_reg
+	z80_sra_index main.R
+	
+cb 00110RRR sll
+	lsl main.R 1 main.R
+	update_flags SZ0YH0XN0C
+	or 1 main.R main.R
+	update_flags P
+	
+cb 00110110 sll_hl
+	local tmp 8
+	z80_fetch_hl
+	mov scratch1 tmp
+	lsl tmp 1 tmp
+	update_flags SZ0YH0XN0C
+	or 1 tmp tmp
+	update_flags P
+	mov tmp scratch1
+	z80_store_hl
+	
+z80_sll_index
+	arg tmp 8
+	mov wz scratch1
+	ocall read_8
+	cycles 1
+	mov scratch1 tmp
+	lsl tmp 1 tmp
+	update_flags SZ0YH0XN0C
+	or 1 tmp tmp
+	update_flags P
+	mov tmp scratch1
+	z80_store_index
+	
+ddcb 00110110 sll_ixd
+	local tmp 8
+	z80_sll_index tmp
+	
+ddcb 00110RRR sll_ixd_reg
+	z80_sll_index main.R
+	
+fdcb 00110110 sll_iyd
+	local tmp 8
+	z80_sll_index tmp
+	
+fdcb 00110RRR sll_iyd_reg
+	z80_sll_index main.R
+	
+cb 00111RRR srl
+	lsr main.R 1 main.R
+	update_flags SZYH0PXN0C
+	
+cb 00111110 srl_hl
+	local tmp 8
+	z80_fetch_hl
+	mov scratch1 tmp
+	lsr tmp 1 tmp
+	update_flags SZYH0PXN0C
+	mov tmp scratch1
+	z80_store_hl
+	
+z80_srl_index
+	arg tmp 8
+	mov wz scratch1
+	ocall read_8
+	cycles 1
+	mov scratch1 tmp
+	lsr tmp 1 tmp
+	update_flags SZYH0PXN0C
+	mov tmp scratch1
+	z80_store_index
+	
+ddcb 00111110 srl_ixd
+	local tmp 8
+	z80_srl_index tmp
+	
+ddcb 00111RRR srl_ixd_reg
+	z80_srl_index main.R
+	
+fdcb 00111110 srl_iyd
+	local tmp 8
+	z80_srl_index tmp
+	
+fdcb 00111RRR srl_iyd_reg
+	z80_srl_index main.R
+	
+cb 01BBBRRR bit_reg
+	local tmp 8
+	lsl 1 B tmp
+	mov main.R last_flag_result
+	and main.R tmp tmp
+	update_flags SZH1PN0
+	
+cb 01BBB110 bit_hl
+	local tmp 8
+	z80_fetch_hl
+	cycles 1
+	lsl 1 B tmp
+	lsr wz 8 last_flag_result
+	and scratch1 tmp tmp
+	update_flags SZH1PN0
+	
+	
+ddcb 01BBBRRR bit_ixd
+	local tmp 8
+	mov wz scratch1
+	ocall read_8
+	cycles 1
+	lsl 1 B tmp
+	lsr wz 8 last_flag_result
+	and scratch1 tmp tmp
+	update_flags SZH1PN0
+	
+fdcb 01BBBRRR bit_iyd
+	local tmp 8
+	mov wz scratch1
+	ocall read_8
+	cycles 1
+	lsl 1 B tmp
+	lsr wz 8 last_flag_result
+	and scratch1 tmp tmp
+	update_flags SZH1PN0
+	
+cb 10BBBRRR res_reg
+	local tmp 8
+	lsl 1 B tmp
+	not tmp tmp
+	and main.R tmp main.R
+	
+cb 10BBB110 res_hl
+	z80_fetch_hl
+	cycles 1
+	local tmp 8
+	lsl 1 B tmp
+	not tmp tmp
+	and scratch1 tmp scratch1
+	z80_store_hl
+	
+z80_res_index
+	arg bit 8
+	arg tmp 8
+	lsl 1 bit tmp
+	not tmp tmp
+	mov wz scratch1
+	ocall read_8
+	cycles 1
+	and scratch1 tmp tmp
+	mov tmp scratch1
+	z80_store_index
+	
+ddcb 10BBB110 res_ixd
+	local tmp 8
+	z80_res_index B tmp
+	
+ddcb 10BBBRRR res_ixd_reg
+	z80_res_index B main.R
+	
+fdcb 10BBB110 res_iyd
+	local tmp 8
+	z80_res_index B tmp
+	
+fdcb 10BBBRRR res_iyd_reg
+	z80_res_index B main.R
+	
+cb 11BBBRRR set_reg
+	local tmp 8
+	lsl 1 B tmp
+	or main.R tmp main.R
+	
+cb 11BBB110 set_hl
+	z80_fetch_hl
+	cycles 1
+	local tmp 8
+	lsl 1 B tmp
+	or scratch1 tmp scratch1
+	z80_store_hl
+	
+z80_set_index
+	arg bit 8
+	arg tmp 8
+	lsl 1 bit tmp
+	mov wz scratch1
+	ocall read_8
+	cycles 1
+	or scratch1 tmp tmp
+	mov tmp scratch1
+	z80_store_index
+	
+ddcb 11BBB110 set_ixd
+	local tmp 8
+	z80_set_index B tmp
+	
+ddcb 11BBBRRR set_ixd_reg
+	z80_set_index B main.R
+	
+fdcb 11BBB110 set_iyd
+	local tmp 8
+	z80_set_index B tmp
+	
+fdcb 11BBBRRR set_iyd_reg
+	z80_set_index B main.R
+	
+z80_fetch_mod_hl
+	local tmp 16
+	arg change 16
+	lsl h 8 tmp
+	or l tmp tmp
+	mov tmp scratch1
+	add change tmp tmp
+	mov tmp l
+	lsr tmp 8 h
+	ocall read_8
+	cycles 2
+	
+z80_ldd_ldi
+	arg change 16
+	local tmp 16
+	local tmp8 8
+	z80_fetch_mod_hl change
+	
+	add a scratch1 tmp8
+	update_flags H0XN0
+	
+	and 0x2 tmp8 tmp8
+	lsl tmp8 4 tmp8
+	and 0x88 last_flag_result last_flag_result
+	or tmp8 last_flag_result last_flag_result
+	
+	lsl d 8 tmp
+	or e tmp tmp
+	mov tmp scratch2
+	add change tmp tmp
+	mov tmp e
+	lsr tmp 8 d
+	ocall write_8
+	
+	lsl b 8 tmp
+	or c tmp tmp
+	sub 1 tmp tmp
+	
+	mov tmp c
+	lsr tmp 8 b
+	mov c pvflag
+	or b pvflag pvflag
+	
+
+ed 10100000 ldi
+	z80_ldd_ldi 1
+
+ed 10101000 ldd
+	z80_ldd_ldi -1
+
+ed 10110000 ldir
+	z80_ldd_ldi 1
+	if pvflag
+	
+	add 1 pc wz
+	sub 2 pc pc
+	cycles 5
+	
+	end
+
+ed 10111000 lddr
+	z80_ldd_ldi -1
+	if pvflag
+	
+	add 1 pc wz
+	sub 2 pc pc
+	cycles 5
+	
+	end
+	
+z80_cpd_cpi
+	local tmp 16
+	local tmp8 8
+	local hf 8
+	arg change 16
+	
+	z80_fetch_mod_hl change
+	sub scratch1 a tmp8
+	update_flags SZHN1
+	
+	lsr chflags 3 hf
+	and 1 hf hf
+	
+	sub hf tmp8 tmp8
+	update_flags X
+	
+	and 0x2 tmp8 tmp8
+	lsl tmp8 4 tmp8
+	and 0x88 last_flag_result last_flag_result
+	or tmp8 last_flag_result last_flag_result
+	
+	lsl b 8 tmp
+	or c tmp tmp
+	sub 1 tmp tmp
+	
+	mov tmp c
+	lsr tmp 8 b
+	mov c pvflag
+	or b pvflag pvflag
+	
+	cycles 5
+	
+ed 10100001 cpi
+	z80_cpd_cpi 1
+	
+ed 10101001 cpd
+	z80_cpd_cpi -1
+	
+ed 10110001 cpir
+	z80_cpd_cpi 1
+	if pvflag
+	
+	if zflag
+	
+	else
+	
+	add 1 pc wz
+	sub 2 pc pc
+	cycles 5
+	
+	end
+	end
+	
+ed 10111001 cpdr
+	z80_cpd_cpi -1
+	if pvflag
+	
+	if zflag
+	
+	else
+	
+	add 1 pc wz
+	sub 2 pc pc
+	cycles 5
+	
+	end
+	end
+
+00100111 daa
+	local diff 8
+	local tmp 8
+	local low 8
+	and 0xF a low
+	and 0x8 chflags tmp
+	if tmp
+	
+	mov 6 diff
+	
+	else
+	
+	cmp 0xA low
+	if >=U
+	mov 6 diff
+	else
+	mov 0 diff
+	end
+	
+	end
+	
+	and 0x80 chflags tmp
+	if tmp
+	
+	or 0x60 diff diff
+	update_flags C1
+	
+	else
+	
+	cmp 0x9A a
+	if >=U
+	or 0x60 diff diff
+	update_flags C1
+	else
+	update_flags C0
+	end
+	end
+	
+	if nflag
+	
+	sub diff a a
+	update_flags SZYHPX
+	
+	else
+	
+	add diff a a
+	update_flags SZYHPX
+	
+	end
+	
+dd OOOOOOOO dd_normal
+	dispatch O
+
+fd OOOOOOOO fd_normal
+	dispatch O
+	
+ed 01101111 rld
+	local tmp 8
+	local tmp2 8
+	z80_fetch_hl
+	cycles 4
+	
+	lsr scratch1 4 tmp
+	
+	lsl scratch1 4 scratch1
+	
+	and 0xF a tmp2
+	or tmp2 scratch1 scratch1
+	
+	and 0xF0 a a
+	or tmp a a
+	update_flags SZYH0XPN0
+	z80_store_hl
+	
+ed 01100111 rrd
+	local tmp 8
+	local tmp2 8
+	z80_fetch_hl
+	cycles 4
+	
+	and 0xF scratch1 tmp
+	lsr scratch1 4 scratch1
+	
+	lsl a 4 tmp2
+	or tmp2 scratch1 scratch1
+	
+	and 0xF0 a a
+	or tmp a a
+	update_flags SZYH0XPN0
+	z80_store_hl
+	
\ No newline at end of file
--- a/z80_to_x86.c	Sat Jan 05 00:58:08 2019 -0800
+++ b/z80_to_x86.c	Sat Jan 15 13:15:21 2022 -0800
@@ -355,6 +355,8 @@
 			}
 			if (inst->reg == Z80_I || inst->ea_reg == Z80_I || inst->reg == Z80_R || inst->ea_reg == Z80_R) {
 				num_cycles += 1;
+			} else if (inst->reg == Z80_USE_IMMED) {
+				num_cycles += 3;
 			}
 			break;
 		case Z80_IMMED:
@@ -874,7 +876,7 @@
 		} else if(inst->addr_mode == Z80_IMMED) {
 			num_cycles += 3;
 		} else if(z80_size(inst) == SZ_W) {
-			num_cycles += 4;
+			num_cycles += 7;
 		}
 		cycles(&opts->gen, num_cycles);
 		translate_z80_reg(inst, &dst_op, opts);
@@ -942,7 +944,7 @@
 		} else if(inst->addr_mode == Z80_IMMED) {
 			num_cycles += 3;
 		} else if(z80_size(inst) == SZ_W) {
-			num_cycles += 4;
+			num_cycles += 7;
 		}
 		cycles(&opts->gen, num_cycles);
 		translate_z80_reg(inst, &dst_op, opts);
@@ -1073,7 +1075,7 @@
 		} else if(inst->addr_mode == Z80_IMMED) {
 			num_cycles += 3;
 		} else if(z80_size(inst) == SZ_W) {
-			num_cycles += 4;
+			num_cycles += 7;
 		}
 		cycles(&opts->gen, num_cycles);
 		translate_z80_reg(inst, &dst_op, opts);
@@ -1261,6 +1263,10 @@
 	case Z80_DEC:
 		if(z80_size(inst) == SZ_W) {
 			num_cycles += 2;
+		} else if (inst->addr_mode == Z80_REG_INDIRECT) {
+			num_cycles += 1;
+		} else if (inst->addr_mode == Z80_IX_DISPLACE || inst->addr_mode == Z80_IY_DISPLACE) {
+			num_cycles += 9;
 		}
 		cycles(&opts->gen, num_cycles);
 		translate_z80_reg(inst, &dst_op, opts);
@@ -1853,7 +1859,7 @@
 		break;
 	case Z80_BIT: {
 		if (inst->addr_mode == Z80_IX_DISPLACE || inst->addr_mode == Z80_IY_DISPLACE) {
-			num_cycles += 8;
+			num_cycles += 4;
 		}
 		cycles(&opts->gen, num_cycles);
 		uint8_t bit;
@@ -1914,7 +1920,7 @@
 	}
 	case Z80_SET: {
 		if (inst->addr_mode == Z80_IX_DISPLACE || inst->addr_mode == Z80_IY_DISPLACE) {
-			num_cycles += 8;
+			num_cycles += 4;
 		}
 		cycles(&opts->gen, num_cycles);
 		uint8_t bit;
@@ -1983,7 +1989,7 @@
 	}
 	case Z80_RES: {
 		if (inst->addr_mode == Z80_IX_DISPLACE || inst->addr_mode == Z80_IY_DISPLACE) {
-			num_cycles += 8;
+			num_cycles += 4;
 		}
 		cycles(&opts->gen, num_cycles);
 		uint8_t bit;
@@ -2321,7 +2327,7 @@
 		if (inst->addr_mode == Z80_IMMED_INDIRECT) {
 			mov_ir(code, inst->immed, opts->gen.scratch1, SZ_B);
 		} else {
-			zreg_to_native(opts, Z80_C, opts->gen.scratch2);
+			zreg_to_native(opts, Z80_C, opts->gen.scratch1);
 		}
 		call(code, opts->read_io);
 		if (inst->addr_mode != Z80_IMMED_INDIRECT) {
@@ -2593,7 +2599,7 @@
 		break;
 	}
 	case Z80_OUT:
-		if (inst->reg == Z80_A) {
+		if (inst->addr_mode == Z80_IMMED_INDIRECT) {
 			num_cycles += 3;
 		}
 		cycles(&opts->gen, num_cycles);//T States: 4 3/4
@@ -2658,7 +2664,6 @@
 		setcc_rdisp(code, CC_P, opts->gen.context_reg, zf_off(ZF_PV));
 		break;
 	case Z80_OTIR: {
-		code_ptr start = code->cur;
 		cycles(&opts->gen, num_cycles + 1);//T States: 4, 5
 		//read from (HL)
 		zreg_to_native(opts, Z80_HL, opts->gen.scratch1);
@@ -2758,7 +2763,6 @@
 		setcc_rdisp(code, CC_P, opts->gen.context_reg, zf_off(ZF_PV));
 		break;
 	case Z80_OTDR: {
-		code_ptr start = code->cur;
 		cycles(&opts->gen, num_cycles + 1);//T States: 4, 5
 		//read from (HL)
 		zreg_to_native(opts, Z80_HL, opts->gen.scratch1);
@@ -3409,6 +3413,9 @@
 	add_rdispr(code, options->gen.context_reg, offsetof(z80_context, target_cycle), options->gen.cycles, SZ_D);
 	cmp_rdispr(code, options->gen.context_reg, offsetof(z80_context, int_cycle), options->gen.cycles, SZ_D);
 	jcc(code, CC_B, skip_int);
+	//check that we are not past the end of interrupt pulse
+	cmp_rrdisp(code, options->gen.cycles, options->gen.context_reg, offsetof(z80_context, int_pulse_end), SZ_D);
+	jcc(code, CC_B, skip_int);
 	//set limit to the cycle limit
 	mov_rdispr(code, options->gen.context_reg, offsetof(z80_context, sync_cycle), options->gen.scratch2, SZ_D);
 	mov_rrdisp(code, options->gen.scratch2, options->gen.context_reg, offsetof(z80_context, target_cycle), SZ_D);
@@ -3420,14 +3427,15 @@
 	jcc(code, CC_NZ, is_nmi);
 	mov_irdisp(code, 0, options->gen.context_reg, offsetof(z80_context, iff1), SZ_B);
 	mov_irdisp(code, 0, options->gen.context_reg, offsetof(z80_context, iff2), SZ_B);
+	cycles(&options->gen, 6); //interupt ack cycle
 	code_ptr after_int_disable = code->cur + 1;
 	jmp(code, after_int_disable);
 	*is_nmi = code->cur - (is_nmi + 1);
 	mov_rdispr(code, options->gen.context_reg, offsetof(z80_context, iff1), options->gen.scratch2, SZ_B);
 	mov_irdisp(code, 0, options->gen.context_reg, offsetof(z80_context, iff1), SZ_B);
 	mov_rrdisp(code, options->gen.scratch2, options->gen.context_reg, offsetof(z80_context, iff2), SZ_B);
+	cycles(&options->gen, 5); //NMI processing cycles
 	*after_int_disable = code->cur - (after_int_disable + 1);
-	cycles(&options->gen, 7);
 	//save return address (in scratch1) to Z80 stack
 	sub_ir(code, 2, options->regs[Z80_SP], SZ_W);
 	mov_rr(code, options->regs[Z80_SP], options->gen.scratch2, SZ_W);
@@ -3453,7 +3461,6 @@
 	cmp_irdisp(code, 0, options->gen.context_reg, offsetof(z80_context, int_is_nmi), SZ_B);
 	is_nmi = code->cur + 1;
 	jcc(code, CC_NZ, is_nmi);
-	cycles(&options->gen, 6); //interupt ack cycle
 	//TODO: Support interrupt mode 0, not needed for Genesis sit it seems to read $FF during intack
 	//which is conveniently rst $38, i.e. the same thing that im 1 does
 	//check interrupt mode
@@ -3461,6 +3468,7 @@
 	code_ptr im2 = code->cur + 1;
 	jcc(code, CC_Z, im2);
 	mov_ir(code, 0x38, options->gen.scratch1, SZ_W);
+	cycles(&options->gen, 1); //total time for mode 0/1 is 13 t-states
 	code_ptr after_int_dest = code->cur + 1;
 	jmp(code, after_int_dest);
 	*im2 = code->cur - (im2 + 1);
@@ -3501,10 +3509,12 @@
 	//HACK
 	options->gen.address_size = SZ_D;
 	options->gen.address_mask = io_address_mask;
+	options->gen.bus_cycles = 4;
 	options->read_io = gen_mem_fun(&options->gen, io_chunks, num_io_chunks, READ_8, NULL);
 	options->write_io = gen_mem_fun(&options->gen, io_chunks, num_io_chunks, WRITE_8, NULL);
 	options->gen.address_size = SZ_W;
 	options->gen.address_mask = 0xFFFF;
+	options->gen.bus_cycles = 3;
 
 	options->read_16 = code->cur;
 	cycles(&options->gen, 3);
@@ -3594,7 +3604,7 @@
 	tmp_stack_off = code->stack_off;
 	save_callee_save_regs(code);
 #ifdef X86_64
-	mov_rr(code, RDI, options->gen.context_reg, SZ_PTR);
+	mov_rr(code, FIRST_ARG_REG, options->gen.context_reg, SZ_PTR);
 #else
 	mov_rdispr(code, RSP, 5 * sizeof(int32_t), options->gen.context_reg, SZ_PTR);
 #endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/z80_util.c	Sat Jan 15 13:15:21 2022 -0800
@@ -0,0 +1,341 @@
+#include <string.h>
+
+void z80_read_8(z80_context *context)
+{
+	context->cycles += 3 * context->opts->gen.clock_divider;
+	uint8_t *fast = context->fastread[context->scratch1 >> 10];
+	if (fast) {
+		context->scratch1 = fast[context->scratch1 & 0x3FF];
+	} else {
+		context->scratch1 = read_byte(context->scratch1, (void **)context->mem_pointers, &context->opts->gen, context);
+	}
+}
+
+void z80_write_8(z80_context *context)
+{
+	context->cycles += 3 * context->opts->gen.clock_divider;
+	uint8_t *fast = context->fastwrite[context->scratch2 >> 10];
+	if (fast) {
+		fast[context->scratch2 & 0x3FF] = context->scratch1;
+	} else {
+		write_byte(context->scratch2, context->scratch1, (void **)context->mem_pointers, &context->opts->gen, context);
+	}
+}
+
+void z80_io_read8(z80_context *context)
+{
+	uint32_t tmp_mask = context->opts->gen.address_mask;
+	memmap_chunk const *tmp_map = context->opts->gen.memmap;
+	uint32_t tmp_chunks = context->opts->gen.memmap_chunks;
+	
+	context->opts->gen.address_mask = context->io_mask;
+	context->opts->gen.memmap = context->io_map;
+	context->opts->gen.memmap_chunks = context->io_chunks;
+	
+	context->cycles += 4 * context->opts->gen.clock_divider;
+	context->scratch1 = read_byte(context->scratch1, (void **)context->mem_pointers, &context->opts->gen, context);
+	
+	context->opts->gen.address_mask = tmp_mask;
+	context->opts->gen.memmap = tmp_map;
+	context->opts->gen.memmap_chunks = tmp_chunks;
+}
+
+void z80_io_write8(z80_context *context)
+{
+	uint32_t tmp_mask = context->opts->gen.address_mask;
+	memmap_chunk const *tmp_map = context->opts->gen.memmap;
+	uint32_t tmp_chunks = context->opts->gen.memmap_chunks;
+	
+	context->opts->gen.address_mask = context->io_mask;
+	context->opts->gen.memmap = context->io_map;
+	context->opts->gen.memmap_chunks = context->io_chunks;
+	
+	context->cycles += 4 * context->opts->gen.clock_divider;
+	write_byte(context->scratch2, context->scratch1, (void **)context->mem_pointers, &context->opts->gen, context);
+	
+	context->opts->gen.address_mask = tmp_mask;
+	context->opts->gen.memmap = tmp_map;
+	context->opts->gen.memmap_chunks = tmp_chunks;
+}
+
+//quick hack until I get a chance to change which init method these get passed to
+static memmap_chunk const * tmp_io_chunks;
+static uint32_t tmp_num_io_chunks, tmp_io_mask;
+void init_z80_opts(z80_options * options, memmap_chunk const * chunks, uint32_t num_chunks, memmap_chunk const * io_chunks, uint32_t num_io_chunks, uint32_t clock_divider, uint32_t io_address_mask)
+{
+	memset(options, 0, sizeof(*options));
+	options->gen.memmap = chunks;
+	options->gen.memmap_chunks = num_chunks;
+	options->gen.address_mask = 0xFFFF;
+	options->gen.max_address = 0xFFFF;
+	options->gen.clock_divider = clock_divider;
+	tmp_io_chunks = io_chunks;
+	tmp_num_io_chunks = num_io_chunks;
+	tmp_io_mask = io_address_mask;
+}
+
+void z80_options_free(z80_options *opts)
+{
+	free(opts);
+}
+
+z80_context * init_z80_context(z80_options *options)
+{
+	z80_context *context = calloc(1, sizeof(z80_context));
+	context->opts = options;
+	context->io_map = (memmap_chunk *)tmp_io_chunks;
+	context->io_chunks = tmp_num_io_chunks;
+	context->io_mask = tmp_io_mask;
+	context->int_cycle = context->int_end_cycle = context->nmi_cycle = 0xFFFFFFFFU;
+	z80_invalidate_code_range(context, 0, 0xFFFF);
+	return context;
+}
+
+void z80_sync_cycle(z80_context *context, uint32_t target_cycle)
+{
+	if (context->iff1 && context->int_cycle < target_cycle) {
+		if (context->cycles > context->int_end_cycle) {
+			context->int_cycle = 0xFFFFFFFFU;
+		} else {
+			target_cycle = context->int_cycle;
+		}
+	};
+	if (context->nmi_cycle < target_cycle) {
+		target_cycle = context->nmi_cycle;
+	}
+	context->sync_cycle = target_cycle;
+}
+
+void z80_run(z80_context *context, uint32_t target_cycle)
+{
+	if (context->reset || context->busack) {
+		context->cycles = target_cycle;
+	} else if (target_cycle > context->cycles) {
+		if (context->busreq) {
+			//busreq is sampled at the end of an m-cycle
+			//we can approximate that by running for a single m-cycle after a bus request
+			target_cycle = context->cycles + 4 * context->opts->gen.clock_divider;
+		}
+		z80_execute(context, target_cycle);
+		if (context->busreq) {
+			context->busack = 1;
+		}
+	}
+}
+
+void z80_assert_reset(z80_context * context, uint32_t cycle)
+{
+	z80_run(context, cycle);
+	context->reset = 1;
+}
+
+void z80_clear_reset(z80_context * context, uint32_t cycle)
+{
+	z80_run(context, cycle);
+	if (context->reset) {
+		context->imode = 0;
+		context->iff1 = context->iff2 = 0;
+		context->pc = 0;
+		context->reset = 0;
+		if (context->busreq) {
+			//TODO: Figure out appropriate delay
+			context->busack = 1;
+		}
+	}
+}
+
+#define MAX_MCYCLE_LENGTH 6
+void z80_assert_busreq(z80_context * context, uint32_t cycle)
+{
+	z80_run(context, cycle);
+	context->busreq = 1;
+	//this is an imperfect aproximation since most M-cycles take less tstates than the max
+	//and a short 3-tstate m-cycle can take an unbounded number due to wait states
+	if (context->cycles - cycle > MAX_MCYCLE_LENGTH * context->opts->gen.clock_divider) {
+		context->busack = 1;
+	}
+}
+
+void z80_clear_busreq(z80_context * context, uint32_t cycle)
+{
+	z80_run(context, cycle);
+	context->busreq = 0;
+	context->busack = 0;
+	//there appears to be at least a 1 Z80 cycle delay between busreq
+	//being released and resumption of execution
+	context->cycles += context->opts->gen.clock_divider;
+}
+
+void z80_assert_nmi(z80_context *context, uint32_t cycle)
+{
+	context->nmi_cycle = cycle;
+}
+
+uint8_t z80_get_busack(z80_context * context, uint32_t cycle)
+{
+	z80_run(context, cycle);
+	return context->busack;
+}
+
+void z80_invalidate_code_range(z80_context *context, uint32_t startA, uint32_t endA)
+{
+	for(startA &= ~0x3FF; startA < endA; startA += 1024)
+	{
+		uint8_t *start = get_native_pointer(startA, (void**)context->mem_pointers, &context->opts->gen);
+		if (start) {
+			uint8_t *end = get_native_pointer(startA + 1023, (void**)context->mem_pointers, &context->opts->gen);
+			if (!end || end - start != 1023) {
+				start = NULL;
+			}
+		}
+		context->fastread[startA >> 10] = start;
+		start = get_native_write_pointer(startA, (void**)context->mem_pointers, &context->opts->gen);
+		if (start) {
+			uint8_t *end = get_native_write_pointer(startA + 1023, (void**)context->mem_pointers, &context->opts->gen);
+			if (!end || end - start != 1023) {
+				start = NULL;
+			}
+		}
+		context->fastwrite[startA >> 10] = start;
+	}
+}
+
+void z80_adjust_cycles(z80_context * context, uint32_t deduction)
+{
+	context->cycles -= deduction;
+	if (context->int_cycle != 0xFFFFFFFFU) {
+		if (context->int_cycle > deduction) {
+			context->int_cycle -= deduction;
+		} else {
+			context->int_cycle = 0;
+		}
+	}
+	if (context->int_end_cycle != 0xFFFFFFFFU) {
+		if (context->int_end_cycle > deduction) {
+			context->int_end_cycle -= deduction;
+		} else {
+			context->int_end_cycle = 0;
+		}
+	}
+	if (context->nmi_cycle != 0xFFFFFFFFU) {
+		if (context->nmi_cycle > deduction) {
+			context->nmi_cycle -= deduction;
+		} else {
+			context->nmi_cycle = 0;
+		}
+	}
+}
+
+void z80_serialize(z80_context *context, serialize_buffer *buf)
+{
+	save_int8(buf, context->main[1]);//C
+	save_int8(buf, context->main[0]);//B
+	save_int8(buf, context->main[3]);//E
+	save_int8(buf, context->main[2]);//D
+	save_int8(buf, context->main[5]);//L
+	save_int8(buf, context->main[4]);//H
+	save_int8(buf, context->ix);//IXL
+	save_int8(buf, context->ix >> 8);//IXH
+	save_int8(buf, context->iy);//IYL
+	save_int8(buf, context->iy >> 8);//IYH
+	save_int8(buf, context->i);
+	save_int8(buf, (context->rhigh & 0x80) | (context->r & 0x7F));
+	save_int8(buf, context->main[7]);//A
+	uint8_t f = context->last_flag_result & 0xA8
+		| (context->zflag ? 0x40 : 0)
+		| (context->chflags & 8 ? 0x10 : 0)
+		| (context->pvflag ? 4 : 0)
+		| (context->nflag ? 2 : 0)
+		| (context->chflags & 0x80 ? 1 : 0);
+	save_int8(buf, f);
+	save_int8(buf, context->alt[1]);//C
+	save_int8(buf, context->alt[0]);//B
+	save_int8(buf, context->alt[3]);//E
+	save_int8(buf, context->alt[2]);//D
+	save_int8(buf, context->alt[5]);//L
+	save_int8(buf, context->alt[4]);//H
+	save_int8(buf, 0);//non-existant alt ixl
+	save_int8(buf, 0);//non-existant alt ixh
+	save_int8(buf, 0);//non-existant alt iyl
+	save_int8(buf, 0);//non-existant alt iyh
+	save_int8(buf, 0);//non-existant alt i
+	save_int8(buf, 0);//non-existant alt r
+	save_int8(buf, context->alt[7]);//A
+	save_int8(buf, context->alt[6]);//F
+	
+	save_int16(buf, context->pc);
+	save_int16(buf, context->sp);
+	save_int8(buf, context->imode);
+	save_int8(buf, context->iff1);
+	save_int8(buf, context->iff2);
+	uint8_t is_nmi = context->nmi_cycle != 0xFFFFFFFF && (context->nmi_cycle < context->int_cycle || !context->iff1);
+	save_int8(buf,  is_nmi);//int_is_nmi
+	save_int8(buf, context->busack);
+	save_int32(buf, context->cycles);
+	save_int32(buf, is_nmi ? context->nmi_cycle : context->int_cycle);//int_cycle
+	save_int32(buf, 0);//int_enable_cycle
+	save_int32(buf, context->int_cycle);
+	save_int32(buf, context->int_end_cycle);
+	save_int32(buf, context->nmi_cycle);
+}
+
+void z80_deserialize(deserialize_buffer *buf, void *vcontext)
+{
+	z80_context *context = vcontext;
+	context->main[1] = load_int8(buf);//C
+	context->main[0] = load_int8(buf);//B
+	context->main[3] = load_int8(buf);//E
+	context->main[2] = load_int8(buf);//D
+	context->main[5] = load_int8(buf);//L
+	context->main[4] = load_int8(buf);//H
+	context->ix = load_int8(buf);//IXL
+	context->ix |= load_int8(buf) << 8;//IXH
+	context->iy = load_int8(buf);//IYL
+	context->iy |= load_int8(buf) << 8;//IYH
+	context->i = load_int8(buf);
+	context->r = load_int8(buf);
+	context->rhigh = context->r & 0x80;
+	context->main[7] = load_int8(buf);//A
+	context->last_flag_result = load_int8(buf);
+	context->zflag = context->last_flag_result & 0x40;
+	context->chflags = context->last_flag_result & 0x10 ? 8 : 0;
+	context->pvflag = context->last_flag_result & 4;
+	context->nflag = context->last_flag_result & 2;
+	context->chflags |= context->last_flag_result & 1 ? 0x80 : 0;
+	context->alt[1] = load_int8(buf);//C
+	context->alt[0] = load_int8(buf);//B
+	context->alt[3] = load_int8(buf);//E
+	context->alt[2] = load_int8(buf);//D
+	context->alt[5] = load_int8(buf);//L
+	context->alt[4] = load_int8(buf);//H
+	load_int8(buf);//non-existant alt ixl
+	load_int8(buf);//non-existant alt ixh
+	load_int8(buf);//non-existant alt iyl
+	load_int8(buf);//non-existant alt iyh
+	load_int8(buf);//non-existant alt i
+	load_int8(buf);//non-existant alt r
+	context->alt[7] = load_int8(buf);//A
+	context->alt[6] = load_int8(buf);//F
+	
+	context->pc = load_int16(buf);
+	context->sp = load_int16(buf);
+	context->imode = load_int8(buf);
+	context->iff1 = load_int8(buf);
+	context->iff2 = load_int8(buf);
+	load_int8(buf);//int_is_nmi
+	context->busack = load_int8(buf);
+	context->cycles = load_int32(buf);
+	load_int32(buf);//int_cycle
+	load_int32(buf);//int_enable_cycle
+	context->int_cycle = load_int32(buf);
+	context->int_end_cycle = load_int32(buf);
+	context->nmi_cycle = load_int32(buf);
+}
+
+void zinsert_breakpoint(z80_context * context, uint16_t address, uint8_t * bp_handler)
+{
+}
+
+void zremove_breakpoint(z80_context * context, uint16_t address)
+{
+}
--- a/z80inst.c	Sat Jan 05 00:58:08 2019 -0800
+++ b/z80inst.c	Sat Jan 15 13:15:21 2022 -0800
@@ -1570,7 +1570,7 @@
 uint8_t z80_is_terminal(z80inst * inst)
 {
 	return inst->op == Z80_RET || inst->op == Z80_RETI || inst->op == Z80_RETN || inst->op == Z80_JP
-		|| inst->op == Z80_JR || inst->op == Z80_HALT || (inst->op == Z80_NOP && inst->immed == 42);
+		|| inst->op == Z80_JR || (inst->op == Z80_NOP && inst->immed == 42);
 }
 
 
--- a/ztestgen.c	Sat Jan 05 00:58:08 2019 -0800
+++ b/ztestgen.c	Sat Jan 15 13:15:21 2022 -0800
@@ -11,6 +11,7 @@
 #include <sys/stat.h>
 #include <sys/types.h>
 #include <errno.h>
+#include <time.h>
 
 extern z80inst z80_tbl_a[256];
 extern z80inst z80_tbl_extd[0xC0-0x40];
@@ -530,6 +531,7 @@
 
 int main(int argc, char ** argv)
 {
+	srand(time(NULL));
 	z80_gen_all();
 	return 0;
 }
--- a/ztestrun.c	Sat Jan 05 00:58:08 2019 -0800
+++ b/ztestrun.c	Sat Jan 15 13:15:21 2022 -0800
@@ -4,7 +4,12 @@
  BlastEm is free software distributed under the terms of the GNU General Public License version 3 or greater. See COPYING for full license text.
 */
 #include "z80inst.h"
+#ifdef NEW_CORE
+#include "z80.h"
+#include <string.h>
+#else
 #include "z80_to_x86.h"
+#endif
 #include "mem.h"
 #include "vdp.h"
 #include <stdio.h>
@@ -42,10 +47,12 @@
 	{ 0x0000, 0x100, 0xFF, 0, 0, 0,                                  NULL,    NULL, NULL, z80_unmapped_read, z80_unmapped_write}
 };
 
+#ifndef NEW_CORE
 void z80_next_int_pulse(z80_context * context)
 {
 	context->int_pulse_start = context->int_pulse_end = CYCLE_NEVER;
 }
+#endif
 
 int main(int argc, char ** argv)
 {
@@ -91,6 +98,26 @@
 	fclose(f);
 	init_z80_opts(&opts, z80_map, 2, port_map, 1, 1, 0xFF);
 	context = init_z80_context(&opts);
+#ifdef NEW_CORE
+	z80_execute(context, 1000);
+	printf("A: %X\nB: %X\nC: %X\nD: %X\nE: %X\nHL: %X\nIX: %X\nIY: %X\nSP: %X\n\nIM: %d, IFF1: %d, IFF2: %d\n",
+		context->main[7], context->main[0], context->main[1],
+		context->main[2], context->main[3],
+		(context->main[4] << 8) | context->main[5],
+		context->ix,
+		context->iy,
+		context->sp, context->imode, context->iff1, context->iff2);
+	printf("Flags: SZYHXVNC\n"
+	       "       %d%d%d%d%d%d%d%d\n", 
+			context->last_flag_result >> 7, context->zflag != 0, context->last_flag_result >> 5 & 1, context->chflags >> 3 & 1, 
+			context->last_flag_result >> 3 & 1, context->pvflag != 0, context->nflag != 0, context->chflags >> 7 & 1
+	);
+	puts("--Alternate Regs--");
+	printf("A: %X\nB: %X\nC: %X\nD: %X\nE: %X\nHL: %X\n",
+		context->alt[7], context->alt[0], context->alt[1],
+		context->alt[2], context->alt[3],
+		(context->alt[4] << 8) | context->alt[5]);
+#else
 	//Z80 RAM
 	context->mem_pointers[0] = z80_ram;
 	if (retranslate) {
@@ -122,5 +149,6 @@
 		context->alt_regs[Z80_A], context->alt_regs[Z80_B], context->alt_regs[Z80_C],
 		context->alt_regs[Z80_D], context->alt_regs[Z80_E],
 		(context->alt_regs[Z80_H] << 8) | context->alt_regs[Z80_L]);
+#endif
 	return 0;
 }