changeset 426:add9e2f5c0e3

Make VDP render in native pixel format of the renderer for a modest performance gain and to make it easier to use OpenGL for rendering
author Mike Pavone <pavone@retrodev.com>
date Sun, 30 Jun 2013 11:45:58 -0700
parents 8b3ae850d1c4
children 2802318c14e1
files Makefile render.h render_sdl.c vdp.c vdp.h
diffstat 5 files changed, 184 insertions(+), 175 deletions(-) [+]
line wrap: on
line diff
--- a/Makefile	Sat Jun 29 17:21:19 2013 -0700
+++ b/Makefile	Sun Jun 30 11:45:58 2013 -0700
@@ -1,10 +1,16 @@
 LIBS=sdl
+LDFLAGS=-lm `pkg-config --libs $(LIBS)`
 ifdef DEBUG
 CFLAGS=-ggdb -std=gnu99 `pkg-config --cflags-only-I $(LIBS)` -Wreturn-type -Werror=return-type
 else
 CFLAGS=-O2 -std=gnu99 `pkg-config --cflags-only-I $(LIBS)` -Wreturn-type -Werror=return-type
 endif
 
+ifdef PROFILE
+CFLAGS+= -pg
+LDFLAGS+= -pg
+endif
+
 TRANSOBJS=gen_x86.o x86_backend.o mem.o
 M68KOBJS=68kinst.o m68k_to_x86.o runtime.o
 Z80OBJS=z80inst.o z80_to_x86.o zruntime.o
@@ -13,7 +19,7 @@
 all : dis trans stateview blastem
 
 blastem : blastem.o vdp.o render_sdl.o io.o $(M68KOBJS) $(Z80OBJS) $(TRANSOBJS) $(AUDIOOBJS)
-	$(CC) -ggdb -o blastem  blastem.o vdp.o render_sdl.o io.o $(M68KOBJS) $(Z80OBJS) $(TRANSOBJS) $(AUDIOOBJS) -lm `pkg-config --libs $(LIBS)`
+	$(CC) -ggdb -o blastem  blastem.o vdp.o render_sdl.o io.o $(M68KOBJS) $(Z80OBJS) $(TRANSOBJS) $(AUDIOOBJS) $(LDFLAGS)
 
 dis : dis.o 68kinst.o
 	$(CC) -o dis dis.o 68kinst.o
--- a/render.h	Sat Jun 29 17:21:19 2013 -0700
+++ b/render.h	Sun Jun 30 11:45:58 2013 -0700
@@ -4,6 +4,8 @@
 #include "vdp.h"
 #include "psg.h"
 #include "ym2612.h"
+uint32_t render_map_color(uint8_t r, uint8_t g, uint8_t b);
+uint8_t render_depth();
 void render_init(int width, int height, char * title, uint32_t fps);
 void render_context(vdp_context * context);
 void render_wait_quit(vdp_context * context);
--- a/render_sdl.c	Sat Jun 29 17:21:19 2013 -0700
+++ b/render_sdl.c	Sun Jun 30 11:45:58 2013 -0700
@@ -10,9 +10,6 @@
 
 uint32_t last_frame = 0;
 
-int32_t color_map[1 << 12];
-uint8_t levels[] = {0, 27, 49, 71, 87, 103, 119, 130, 146, 157, 174, 190, 206, 228, 255};
-
 uint32_t min_delay;
 uint32_t frame_delay = 1000/60;
 
@@ -76,6 +73,16 @@
 SDL_Joystick * joysticks[MAX_JOYSTICKS];
 int num_joysticks;
 
+uint32_t render_map_color(uint8_t r, uint8_t g, uint8_t b)
+{
+	return SDL_MapRGB(screen->format, r, g, b);
+}
+
+uint8_t render_depth()
+{
+	return screen->format->BytesPerPixel * 8;
+}
+
 void render_init(int width, int height, char * title, uint32_t fps)
 {
 	if (SDL_Init(SDL_INIT_VIDEO | SDL_INIT_AUDIO | SDL_INIT_JOYSTICK) < 0) {
@@ -90,28 +97,11 @@
     	fprintf(stderr, "Unable to get SDL surface: %s\n", SDL_GetError());
         exit(1);
     }
-    if (screen->format->BytesPerPixel < 2) {
-    	fprintf(stderr, "BlastEm requires at least a 16-bit surface, SDL returned a %d-bit surface\n", screen->format->BytesPerPixel * 8);
+    if (screen->format->BytesPerPixel != 2 && screen->format->BytesPerPixel != 4) {
+    	fprintf(stderr, "BlastEm requires a 16-bit or 32-bit surface, SDL returned a %d-bit surface\n", screen->format->BytesPerPixel * 8);
     	exit(1);
     }
     SDL_WM_SetCaption(title, title);
-    uint8_t b,g,r;
-    for (uint16_t color = 0; color < (1 << 12); color++) {
-    	if (color & FBUF_SHADOW) {
-    		b = levels[(color >> 9) & 0x7];
-			g = levels[(color >> 5) & 0x7];
-			r = levels[(color >> 1) & 0x7];
-    	} else if(color & FBUF_HILIGHT) {
-    		b = levels[((color >> 9) & 0x7) + 7];
-			g = levels[((color >> 5) & 0x7) + 7];
-			r = levels[((color >> 1) & 0x7) + 7];
-    	} else {
-			b = levels[(color >> 8) & 0xE];
-			g = levels[(color >> 4) & 0xE];
-			r = levels[color & 0xE];
-		}
-		color_map[color] = SDL_MapRGB(screen->format, r, g, b);
-    }
     min_delay = 0;
     for (int i = 0; i < 100; i++) {
     	uint32_t start = SDL_GetTicks();
@@ -163,11 +153,10 @@
     SDL_JoystickEventState(SDL_ENABLE);
 }
 
-uint16_t blankbuf[320*240];
+uint32_t blankbuf[320*240];
 
 void render_context(vdp_context * context)
 {
-	uint8_t *buf_8;
 	uint16_t *buf_16;
 	uint32_t *buf_32; 
 	uint8_t b,g,r;
@@ -185,108 +174,40 @@
     	repeat_y = repeat_x;
     }
     int othermask = repeat_y >> 1;
-    uint16_t *otherbuf = (context->regs[REG_MODE_4] & BIT_INTERLACE) ? context->evenbuf : blankbuf;
-    switch (screen->format->BytesPerPixel) {
-    case 2:
-        buf_16 = (uint16_t *)screen->pixels;
-        for (int y = 0; y < 240; y++) {
-        	for (int i = 0; i < repeat_y; i++,buf_16 += screen->pitch/2) {
+    
+    if (screen->format->BytesPerPixel == 2) {
+    	uint16_t *otherbuf = (context->regs[REG_MODE_4] & BIT_INTERLACE) ? context->evenbuf : (uint16_t *)blankbuf;
+    	uint16_t * oddbuf = context->oddbuf;
+    	buf_16 = (uint16_t *)screen->pixels;
+    	for (int y = 0; y < 240; y++) {
+    		for (int i = 0; i < repeat_y; i++,buf_16 += screen->pitch/2) {
         		uint16_t *line = buf_16;
-        		uint16_t *src_line = (i & othermask ? otherbuf : context->oddbuf) + y * 320;
+        		uint16_t *src_line = (i & othermask ? otherbuf : oddbuf) + y * 320;
 		    	for (int x = 0; x < 320; x++) {
-		    		uint16_t color = color_map[*(src_line++) & 0xFFF];
+		    		uint16_t color = *(src_line++);
 		    		for (int j = 0; j < repeat_x; j++) {
 		    			*(line++) = color;
 		    		}
 		    	}
 		    }
-        }
-    	break;
-    case 3:
-        buf_8 = (uint8_t *)screen->pixels;
-        for (int y = 0; y < 240; y++) {
-        	for (int i = 0; i < repeat_y; i++,buf_8 += screen->pitch) {
-        		uint8_t *line = buf_8;
+    	}
+    } else {
+    	uint32_t *otherbuf = (context->regs[REG_MODE_4] & BIT_INTERLACE) ? context->evenbuf : (uint32_t *)blankbuf;
+    	uint32_t * oddbuf = context->oddbuf;
+    	buf_32 = (uint32_t *)screen->pixels;
+    	for (int y = 0; y < 240; y++) {
+    		for (int i = 0; i < repeat_y; i++,buf_32 += screen->pitch/4) {
+        		uint32_t *line = buf_32;
+        		uint32_t *src_line = (i & othermask ? otherbuf : oddbuf) + y * 320;
 		    	for (int x = 0; x < 320; x++) {
-		    		uint16_t gen_color = context->oddbuf[y * 320 + x];
-		    		b = ((gen_color >> 8) & 0xE) * 18;
-		    		g = ((gen_color >> 4) & 0xE) * 18;
-		    		r = (gen_color& 0xE) * 18;
+		    		uint32_t color = *(src_line++);
 		    		for (int j = 0; j < repeat_x; j++) {
-						*(buf_8+screen->format->Rshift/8) = r;
-						*(buf_8+screen->format->Gshift/8) = g;
-						*(buf_8+screen->format->Bshift/8) = b;
-						buf_8 += 3;
-					}
+		    			*(line++) = color;
+		    		}
 		    	}
 		    }
-        }
-    	break;
-    case 4:
-        buf_32 = (uint32_t *)screen->pixels;
-
-	    for (int y = 0; y < 240; y++) {
-	    	for (int i = 0; i < repeat_y; i++,buf_32 += screen->pitch/4) {
-	    		uint32_t *line = buf_32;
-	    		uint16_t *src_line = (i & othermask ? otherbuf : context->oddbuf) + y * 320;
-		    	for (int x = 0; x < 320; x++) {
-		    		uint32_t color;
-		    		if (!render_dbg) {
-		    			color = color_map[*(src_line++) & 0xFFF];
-					} else if(render_dbg == 2) {
-						color = color_map[context->cram[(y/30)*8 + x/40]];
-					} else if(render_dbg == 3) {
-						if (x & 1) {
-							color = color_map[context->cram[ (debug_pal << 4) | (context->vdpmem[(x/8)*32 + (y/8)*32*40 + (x%8)/2 + (y%8)*4] & 0xF) ]];
-						} else {
-							color = color_map[context->cram[ (debug_pal << 4) | (context->vdpmem[(x/8)*32 + (y/8)*32*40 + (x%8)/2 + (y%8)*4] >> 4) ]];
-						}
-					}else {
-						uint16_t gen_color = context->oddbuf[y * 320 + x];
-						r = g = b = 0;
-						switch(gen_color & FBUF_SRC_MASK)
-						{
-						case FBUF_SRC_A:
-							g = 127;//plane a = green
-							break;
-						case FBUF_SRC_W:
-							g = 127;//window = cyan
-							b = 127;
-							break;
-						case FBUF_SRC_B:
-							b = 127;//plane b = blue
-							break;
-						case FBUF_SRC_S:
-							r = 127;//sprite = red
-							break;
-						case FBUF_SRC_BG:
-							r = 127;//BG = purple
-							b = 127;
-						}
-						if (gen_color & FBUF_BIT_PRIORITY) {
-							b *= 2;
-							g *= 2;
-							r *= 2;
-						}
-						if (gen_color & FBUF_SHADOW) {
-							b /= 2;
-							g /= 2;
-							r /= 2;
-						} else if(gen_color & FBUF_HILIGHT) {
-							b = b ? b : 64;
-							g = g ? g : 64;
-							r = r ? r : 64;
-						}
-						color = SDL_MapRGB(screen->format, r, g, b);
-					}
-					for (int j = 0; j < repeat_x; j++) {
-						*(line++) = color;
-					}
-				}
-	    	}
-	    }
-		break;
-	}
+    	}
+    }
     if ( SDL_MUSTLOCK(screen) ) {
         SDL_UnlockSurface(screen);
     }
--- a/vdp.c	Sat Jun 29 17:21:19 2013 -0700
+++ b/vdp.c	Sun Jun 30 11:45:58 2013 -0700
@@ -2,6 +2,7 @@
 #include "blastem.h"
 #include <stdlib.h>
 #include <string.h>
+#include "render.h"
 
 #define NTSC_ACTIVE 225
 #define PAL_ACTIVE 241
@@ -26,15 +27,20 @@
 #define HBLANK_CLEAR_H40 (MCLK_WEIRD_END+61*4)
 #define HBLANK_CLEAR_H32 (HSYNC_END_H32 + 46*5)
 
+int32_t color_map[1 << 12];
+uint8_t levels[] = {0, 27, 49, 71, 87, 103, 119, 130, 146, 157, 174, 190, 206, 228, 255};
+
+uint8_t color_map_init_done;
+
 void init_vdp_context(vdp_context * context)
 {
 	memset(context, 0, sizeof(*context));
 	context->vdpmem = malloc(VRAM_SIZE);
 	memset(context->vdpmem, 0, VRAM_SIZE);
-	context->oddbuf = context->framebuf = malloc(FRAMEBUF_SIZE);
-	memset(context->framebuf, 0, FRAMEBUF_SIZE);
-	context->evenbuf = malloc(FRAMEBUF_SIZE);
-	memset(context->evenbuf, 0, FRAMEBUF_SIZE);
+	context->oddbuf = context->framebuf = malloc(FRAMEBUF_ENTRIES * (render_depth() / 8));
+	memset(context->framebuf, 0, FRAMEBUF_ENTRIES * (render_depth() / 8));
+	context->evenbuf = malloc(FRAMEBUF_ENTRIES * (render_depth() / 8));
+	memset(context->evenbuf, 0, FRAMEBUF_ENTRIES * (render_depth() / 8));
 	context->linebuf = malloc(LINEBUF_SIZE + SCROLL_BUFFER_SIZE*2);
 	memset(context->linebuf, 0, LINEBUF_SIZE + SCROLL_BUFFER_SIZE*2);
 	context->tmp_buf_a = context->linebuf + LINEBUF_SIZE;
@@ -42,6 +48,27 @@
 	context->sprite_draws = MAX_DRAWS;
 	context->fifo_cur = malloc(sizeof(fifo_entry) * FIFO_SIZE);
 	context->fifo_end = context->fifo_cur + FIFO_SIZE;
+	context->b32 = render_depth() == 32;
+	if (!color_map_init_done) {
+		uint8_t b,g,r;
+		for (uint16_t color = 0; color < (1 << 12); color++) {
+			if (color & FBUF_SHADOW) {
+				b = levels[(color >> 9) & 0x7];
+				g = levels[(color >> 5) & 0x7];
+				r = levels[(color >> 1) & 0x7];
+			} else if(color & FBUF_HILIGHT) {
+				b = levels[((color >> 9) & 0x7) + 7];
+				g = levels[((color >> 5) & 0x7) + 7];
+				r = levels[((color >> 1) & 0x7) + 7];
+			} else {
+				b = levels[(color >> 8) & 0xE];
+				g = levels[(color >> 4) & 0xE];
+				r = levels[color & 0xE];
+			}
+			color_map[color] = render_map_color(r, g, b);
+		}
+		color_map_init_done = 1;
+	}
 }
 
 void render_sprite_cells(vdp_context * context)
@@ -302,10 +329,15 @@
 					context->flags |= FLAG_DMA_PROG;
 				}
 				break;
-			case CRAM_WRITE:
-				context->cram[(context->address/2) & (CRAM_SIZE-1)] = read_dma_value((context->regs[REG_DMASRC_H] << 16) | (context->regs[REG_DMASRC_M] << 8) | context->regs[REG_DMASRC_L]);
+			case CRAM_WRITE: {
+				uint16_t addr = (context->address/2) & (CRAM_SIZE-1), value;
+				context->cram[addr] = value = read_dma_value((context->regs[REG_DMASRC_H] << 16) | (context->regs[REG_DMASRC_M] << 8) | context->regs[REG_DMASRC_L]);
+				context->colors[addr] = color_map[value & 0xEEE];
+				context->colors[addr + CRAM_SIZE] = color_map[(value & 0xEEE) | FBUF_SHADOW];
+				context->colors[addr + CRAM_SIZE*2] = color_map[(value & 0xEEE) | FBUF_HILIGHT];
 				//printf("CRAM DMA | %X set to %X from %X at %d\n", (context->address/2) & (CRAM_SIZE-1), context->cram[(context->address/2) & (CRAM_SIZE-1)], (context->regs[REG_DMASRC_H] << 17) | (context->regs[REG_DMASRC_M] << 9) | (context->regs[REG_DMASRC_L] << 1), context->cycles);
 				break;
+			}
 			case VSRAM_WRITE:
 				if (((context->address/2) & 63) < VSRAM_SIZE) {
 					context->vsram[(context->address/2) & 63] = read_dma_value((context->regs[REG_DMASRC_H] << 16) | (context->regs[REG_DMASRC_M] << 8) | context->regs[REG_DMASRC_L]);
@@ -322,10 +354,15 @@
 				context->vdpmem[context->address] = context->dma_val;
 				context->dma_val = (context->dma_val << 8) | ((context->dma_val >> 8) & 0xFF);
 				break;
-			case CRAM_WRITE:
-				context->cram[(context->address/2) & (CRAM_SIZE-1)] = context->dma_val;
+			case CRAM_WRITE: {
+				uint16_t addr = (context->address/2) & (CRAM_SIZE-1);
+				context->cram[addr] = context->dma_val;
+				context->colors[addr] = color_map[context->dma_val & 0xEEE];
+				context->colors[addr + CRAM_SIZE] = color_map[(context->dma_val & 0xEEE) | FBUF_SHADOW];
+				context->colors[addr + CRAM_SIZE*2] = color_map[(context->dma_val & 0xEEE) | FBUF_HILIGHT];
 				//printf("CRAM DMA Fill | %X set to %X at %d\n", (context->address/2) & (CRAM_SIZE-1), context->cram[(context->address/2) & (CRAM_SIZE-1)], context->cycles);
 				break;
+			}
 			case VSRAM_WRITE:
 				if (((context->address/2) & 63) < VSRAM_SIZE) {
 					context->vsram[(context->address/2) & 63] = context->dma_val;
@@ -341,10 +378,15 @@
 				case VRAM_WRITE:
 					context->vdpmem[context->address] = context->dma_val;
 					break;
-				case CRAM_WRITE:
-					context->cram[(context->address/2) & (CRAM_SIZE-1)] = context->dma_val;
+				case CRAM_WRITE: {
+					uint16_t addr = (context->address/2) & (CRAM_SIZE-1);
+					context->cram[addr] = context->dma_val;
+					context->colors[addr] = color_map[context->dma_val & 0xEEE];
+					context->colors[addr + CRAM_SIZE] = color_map[(context->dma_val & 0xEEE) | FBUF_SHADOW];
+					context->colors[addr + CRAM_SIZE*2] = color_map[(context->dma_val & 0xEEE) | FBUF_HILIGHT];
 					//printf("CRAM DMA Copy | %X set to %X from %X at %d\n", (context->address/2) & (CRAM_SIZE-1), context->cram[(context->address/2) & (CRAM_SIZE-1)], context->regs[REG_DMASRC_L] & (CRAM_SIZE-1), context->cycles);
 					break;
+				}
 				case VSRAM_WRITE:
 					if (((context->address/2) & 63) < VSRAM_SIZE) {
 						context->vsram[(context->address/2) & 63] = context->dma_val;
@@ -411,10 +453,15 @@
 						return;
 					}
 					break;
-				case CRAM_WRITE:
+				case CRAM_WRITE: {
 					//printf("CRAM Write | %X to %X\n", start->value, (start->address/2) & (CRAM_SIZE-1));
-					context->cram[(start->address/2) & (CRAM_SIZE-1)] = start->value;
+					uint16_t addr = (context->address/2) & (CRAM_SIZE-1);
+					context->cram[addr] = start->value;
+					context->colors[addr] = color_map[start->value & 0xEEE];
+					context->colors[addr + CRAM_SIZE] = color_map[(start->value & 0xEEE) | FBUF_SHADOW];
+					context->colors[addr + CRAM_SIZE*2] = color_map[(start->value & 0xEEE) | FBUF_HILIGHT];
 					break;
+				}
 				case VSRAM_WRITE:
 					if (((start->address/2) & 63) < VSRAM_SIZE) {
 						//printf("VSRAM Write: %X to %X\n", start->value, context->address);
@@ -629,115 +676,133 @@
 		return;
 	}
 	render_map(context->col_2, context->tmp_buf_b+SCROLL_BUFFER_DRAW+8, context);
-	uint16_t *dst, *end;
+	uint16_t *dst;
+	uint32_t *dst32;
 	uint8_t *sprite_buf, *plane_a, *plane_b;
 	if (col)
 	{
 		col-=2;
-		dst = context->framebuf + line * 320 + col * 8;
+		if (context->b32) {
+			dst32 = context->framebuf;
+			dst32 += line * 320 + col * 8;
+		} else {
+			dst = context->framebuf;
+			dst += line * 320 + col * 8;
+		}
 		sprite_buf = context->linebuf + col * 8;
 		uint16_t a_src;
 		if (context->flags & FLAG_WINDOW) {
 			plane_a = context->tmp_buf_a + SCROLL_BUFFER_DRAW;
-			a_src = FBUF_SRC_W;
+			//a_src = FBUF_SRC_W;
 		} else {
 			plane_a = context->tmp_buf_a + SCROLL_BUFFER_DRAW - (context->hscroll_a & 0xF);
-			a_src = FBUF_SRC_A;
+			//a_src = FBUF_SRC_A;
 		}
 		plane_b = context->tmp_buf_b + SCROLL_BUFFER_DRAW - (context->hscroll_b & 0xF);
-		end = dst + 16;
 		uint16_t src;
 		//printf("A | tmp_buf offset: %d\n", 8 - (context->hscroll_a & 0x7));
 		
 		if (context->regs[REG_MODE_4] & BIT_HILIGHT) {
-			for (; dst < end; ++plane_a, ++plane_b, ++sprite_buf, ++dst) {
+			for (int i = 0; i < 16; ++plane_a, ++plane_b, ++sprite_buf, ++i) {
 				uint8_t pixel;
 				
 				src = 0;
 				uint8_t sprite_color = *sprite_buf & 0x3F;
 				if (sprite_color == 0x3E || sprite_color == 0x3F) {
 					if (sprite_color == 0x3F) {
-						src = FBUF_SHADOW;
+						src = CRAM_SIZE;//FBUF_SHADOW;
 					} else {
-						src = FBUF_HILIGHT;
+						src = CRAM_SIZE*2;//FBUF_HILIGHT;
 					}
 					if (*plane_a & BUF_BIT_PRIORITY && *plane_a & 0xF) {
 						pixel = *plane_a;
-						src |= a_src;
+						//src |= a_src;
 					} else if (*plane_b & BUF_BIT_PRIORITY && *plane_b & 0xF) {
 						pixel = *plane_b;
-						src |= FBUF_SRC_B;
+						//src |= FBUF_SRC_B;
 					} else if (*plane_a & 0xF) {
 						pixel = *plane_a;
-						src |= a_src;
+						//src |= a_src;
 					} else if (*plane_b & 0xF){
 						pixel = *plane_b;
-						src |= FBUF_SRC_B;
+						//src |= FBUF_SRC_B;
 					} else {
 						pixel = context->regs[REG_BG_COLOR] & 0x3F;
-						src |= FBUF_SRC_BG;
+						//src |= FBUF_SRC_BG;
 					}
 				} else {
 					if (*sprite_buf & BUF_BIT_PRIORITY && *sprite_buf & 0xF) {
 						pixel = *sprite_buf;
-						src = FBUF_SRC_S;
+						//src = FBUF_SRC_S;
 					} else if (*plane_a & BUF_BIT_PRIORITY && *plane_a & 0xF) {
 						pixel = *plane_a;
-						src = a_src;
+						//src = a_src;
 					} else if (*plane_b & BUF_BIT_PRIORITY && *plane_b & 0xF) {
 						pixel = *plane_b;
-						src = FBUF_SRC_B;
+						//src = FBUF_SRC_B;
 					} else {
 						if (!(*plane_a & BUF_BIT_PRIORITY || *plane_a & BUF_BIT_PRIORITY)) {
-							src = FBUF_SHADOW;
+							src = CRAM_SIZE;//FBUF_SHADOW;
 						}
 						if (*sprite_buf & 0xF) {
 							pixel = *sprite_buf;
 							if (*sprite_buf & 0xF == 0xE) {
-								src = FBUF_SRC_S;
-							} else {
+								src = 0;//FBUF_SRC_S;
+							} /*else {
 								src |= FBUF_SRC_S;
-							}
+							}*/
 						} else if (*plane_a & 0xF) {
 							pixel = *plane_a;
-							src |= a_src;
+							//src |= a_src;
 						} else if (*plane_b & 0xF){
 							pixel = *plane_b;
-							src |= FBUF_SRC_B;
+							//src |= FBUF_SRC_B;
 						} else {
 							pixel = context->regs[REG_BG_COLOR] & 0x3F;
-							src |= FBUF_SRC_BG;
+							//src |= FBUF_SRC_BG;
 						}
 					}
 				}
-				*dst = (context->cram[pixel & 0x3F] & 0xEEE) | ((pixel & BUF_BIT_PRIORITY) ? FBUF_BIT_PRIORITY : 0) | src;
+				pixel &= 0x3F;
+				pixel += src;
+				if (context->b32) {
+					*(dst32++) = context->colors[pixel];
+				} else {
+					*(dst++) = context->colors[pixel];
+				}
+				//*dst = (context->cram[pixel & 0x3F] & 0xEEE) | ((pixel & BUF_BIT_PRIORITY) ? FBUF_BIT_PRIORITY : 0) | src;
 			}
 		} else {
-			for (; dst < end; ++plane_a, ++plane_b, ++sprite_buf, ++dst) {
+			for (int i = 0; i < 16; ++plane_a, ++plane_b, ++sprite_buf, ++i) {
 				uint8_t pixel;
 				if (*sprite_buf & BUF_BIT_PRIORITY && *sprite_buf & 0xF) {
 					pixel = *sprite_buf;
-					src = FBUF_SRC_S;
+					//src = FBUF_SRC_S;
 				} else if (*plane_a & BUF_BIT_PRIORITY && *plane_a & 0xF) {
 					pixel = *plane_a;
-					src = a_src;
+					//src = a_src;
 				} else if (*plane_b & BUF_BIT_PRIORITY && *plane_b & 0xF) {
 					pixel = *plane_b;
-					src = FBUF_SRC_B;
+					//src = FBUF_SRC_B;
 				} else if (*sprite_buf & 0xF) {
 					pixel = *sprite_buf;
-					src = FBUF_SRC_S;
+					//src = FBUF_SRC_S;
 				} else if (*plane_a & 0xF) {
 					pixel = *plane_a;
-					src = a_src;
+					//src = a_src;
 				} else if (*plane_b & 0xF){
 					pixel = *plane_b;
-					src = FBUF_SRC_B;
+					//src = FBUF_SRC_B;
 				} else {
 					pixel = context->regs[REG_BG_COLOR] & 0x3F;
-					src = FBUF_SRC_BG;
+					//src = FBUF_SRC_BG;
 				}
-				*dst = (context->cram[pixel & 0x3F] & 0xEEE) | ((pixel & BUF_BIT_PRIORITY) ? FBUF_BIT_PRIORITY : 0) | src;
+				if (context->b32) {
+					*(dst32++) = context->colors[pixel & 0x3F];
+				} else {
+					*(dst++) = context->colors[pixel & 0x3F];
+				}
+				//*dst = (context->cram[pixel & 0x3F] & 0xEEE) | ((pixel & BUF_BIT_PRIORITY) ? FBUF_BIT_PRIORITY : 0) | src;
 			}
 		}
 	} else {
@@ -1125,24 +1190,34 @@
 {
 	if (line > 0) {
 		line -= 1;
-		uint16_t * start = NULL, *end = NULL;
+		int starti = -1;
 		if (context->latched_mode & BIT_H40) {
 			if (slot >= 50 && slot < 210) {
 				uint32_t x = (slot-50)*2;
-				start = context->framebuf + line * 320 + x;
-				end = start + 2;
+				starti = line * 320 + x;
 			}
 		} else {
 			if (slot >= 43 && slot < 171) {
 				uint32_t x = (slot-43)*2;
-				start = context->framebuf + line * 320 + x;
-				end = start + 2;
+				starti = line * 320 + x;
 			}
 		}
-		uint16_t color = (context->cram[context->regs[REG_BG_COLOR] & 0x3F] & 0xEEE);
-		while (start != end) {
-			*start = color;
-			++start;
+		if (starti >= 0) {
+			if (context->b32) {
+				uint32_t color = context->colors[context->regs[REG_BG_COLOR] & 0x3F];
+				uint32_t * start = context->framebuf;
+				start += starti;
+				for (int i = 0; i < 2; i++) {
+					*(start++) = color;
+				}
+			} else {
+				uint16_t color = context->colors[context->regs[REG_BG_COLOR] & 0x3F];
+				uint16_t * start = context->framebuf;
+				start += starti;
+				for (int i = 0; i < 2; i++) {
+					*(start++) = color;
+				}
+			}
 		}
 	}
 }
@@ -1671,7 +1746,11 @@
 		return 0;
 	}
 	for (int i = 0; i < CRAM_SIZE; i++) {
-		context->cram[i] = (tmp_buf[i*2+1] << 8) | tmp_buf[i*2];
+		uint16_t value;
+		context->cram[i] = value = (tmp_buf[i*2+1] << 8) | tmp_buf[i*2];
+		context->colors[i] = color_map[value & 0xEEE];
+		context->colors[i + CRAM_SIZE] = color_map[(value & 0xEEE) | FBUF_SHADOW];
+		context->colors[i + CRAM_SIZE*2] = color_map[(value & 0xEEE) | FBUF_HILIGHT];
 	}
 	if (fread(tmp_buf, 2, VSRAM_SIZE, state_file) != VSRAM_SIZE) {
 		fputs("Failed to read VSRAM from savestate\n", stderr);
--- a/vdp.h	Sat Jun 29 17:21:19 2013 -0700
+++ b/vdp.h	Sun Jun 30 11:45:58 2013 -0700
@@ -10,7 +10,6 @@
 #define VRAM_SIZE (64*1024)
 #define LINEBUF_SIZE 320
 #define FRAMEBUF_ENTRIES (320+27)*(240+27) //PAL active display + full border
-#define FRAMEBUF_SIZE (FRAMEBUF_ENTRIES*sizeof(uint16_t))
 #define MAX_DRAWS 40
 #define MAX_DRAWS_H32 32
 #define MAX_SPRITES_LINE 20
@@ -124,10 +123,11 @@
 	//stores 2-bit palette + 4-bit palette index + priority for current sprite line
 	uint8_t     *linebuf;
 	//stores 12-bit color + shadow/highlight bits
-	uint16_t    *framebuf;
-	uint16_t    *oddbuf;
-	uint16_t    *evenbuf;
+	void        *framebuf;
+	void        *oddbuf;
+	void        *evenbuf;
 	uint16_t    cram[CRAM_SIZE];
+	uint32_t    colors[CRAM_SIZE*3];
 	uint16_t    vsram[VSRAM_SIZE];
 	uint8_t     latched_mode;
 	uint16_t    hscroll_a;
@@ -146,6 +146,7 @@
 	uint8_t     hint_counter;
 	uint8_t     flags2;
 	uint8_t     double_res;
+	uint8_t     b32;
 	uint8_t     *tmp_buf_a;
 	uint8_t     *tmp_buf_b;
 } vdp_context;