view ym2612.c @ 1971:80920c21bb52

Add an event log soft flush and call it twice per frame in between hard flushes to netplay latency when there are insufficient hardware updates to flush packets in the middle of a frame
author Michael Pavone <pavone@retrodev.com>
date Fri, 08 May 2020 11:40:30 -0700
parents c3c62dbf1ceb
children 3ce38692a3f2
line wrap: on
line source

/*
 Copyright 2013 Michael Pavone
 This file is part of BlastEm.
 BlastEm is free software distributed under the terms of the GNU General Public License version 3 or greater. See COPYING for full license text.
*/
#include <string.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include "ym2612.h"
#include "render.h"
#include "wave.h"
#include "blastem.h"
#include "event_log.h"

//#define DO_DEBUG_PRINT
#ifdef DO_DEBUG_PRINT
#define dfprintf fprintf
#define dfopen(var, fname, mode) var=fopen(fname, mode)
#else
#define dfprintf
#define dfopen(var, fname, mode)
#endif

#define BUSY_CYCLES 32
#define OP_UPDATE_PERIOD 144

#define BIT_TIMERA_ENABLE 0x1
#define BIT_TIMERB_ENABLE 0x2
#define BIT_TIMERA_OVEREN 0x4
#define BIT_TIMERB_OVEREN 0x8
#define BIT_TIMERA_RESET  0x10
#define BIT_TIMERB_RESET  0x20

#define BIT_TIMERA_LOAD   0x40
#define BIT_TIMERB_LOAD   0x80

#define BIT_STATUS_TIMERA 0x1
#define BIT_STATUS_TIMERB 0x2

static uint32_t ym_calc_phase_inc(ym2612_context * context, ym_operator * operator, uint32_t op);

enum {
	PHASE_ATTACK,
	PHASE_DECAY,
	PHASE_SUSTAIN,
	PHASE_RELEASE
};

uint8_t did_tbl_init = 0;
//According to Nemesis, real hardware only uses a 256 entry quarter sine table; however,
//memory is cheap so using a half sine table will probably save some cycles
//a full sine table would be nice, but negative numbers don't get along with log2
#define SINE_TABLE_SIZE 512
static uint16_t sine_table[SINE_TABLE_SIZE];
//Similar deal here with the power table for log -> linear conversion
//According to Nemesis, real hardware only uses a 256 entry table for the fractional part
//and uses the whole part as a shift amount.
#define POW_TABLE_SIZE (1 << 13)
static uint16_t pow_table[POW_TABLE_SIZE];

static uint16_t rate_table_base[] = {
	//main portion
	0,1,0,1,0,1,0,1,
	0,1,0,1,1,1,0,1,
	0,1,1,1,0,1,1,1,
	0,1,1,1,1,1,1,1,
	//top end
	1,1,1,1,1,1,1,1,
	1,1,1,2,1,1,1,2,
	1,2,1,2,1,2,1,2,
	1,2,2,2,1,2,2,2,
};

static uint16_t rate_table[64*8];

static uint8_t lfo_timer_values[] = {108, 77, 71, 67, 62, 44, 8, 5};
static uint8_t lfo_pm_base[][8] = {
	{0,   0,   0,   0,   0,   0,   0,   0},
	{0,   0,   0,   0,   4,   4,   4,   4},
	{0,   0,   0,   4,   4,   4,   8,   8},
	{0,   0,   4,   4,   8,   8, 0xc, 0xc},
	{0,   0,   4,   8,   8,   8, 0xc,0x10},
	{0,   0,   8, 0xc,0x10,0x10,0x14,0x18},
	{0,   0,0x10,0x18,0x20,0x20,0x28,0x30},
	{0,   0,0x20,0x30,0x40,0x40,0x50,0x60}
};
static int16_t lfo_pm_table[128 * 32 * 8];

int16_t ams_shift[] = {8, 1, -1, -2};

#define MAX_ENVELOPE 0xFFC
#define YM_DIVIDER 2
#define CYCLE_NEVER 0xFFFFFFFF

static uint16_t round_fixed_point(double value, int dec_bits)
{
	return value * (1 << dec_bits) + 0.5;
}

static FILE * debug_file = NULL;
static uint32_t first_key_on=0;

static ym2612_context * log_context = NULL;

static void ym_finalize_log()
{
	if (!log_context) {
		return;
	}
	for (int i = 0; i < NUM_CHANNELS; i++) {
		if (log_context->channels[i].logfile) {
			wave_finalize(log_context->channels[i].logfile);
		}
	}
	log_context = NULL;
}

void ym_adjust_master_clock(ym2612_context * context, uint32_t master_clock)
{
	render_audio_adjust_clock(context->audio, master_clock, context->clock_inc * NUM_OPERATORS);
}

void ym_adjust_cycles(ym2612_context *context, uint32_t deduction)
{
	context->current_cycle -= deduction;
	if (context->write_cycle != CYCLE_NEVER && context->write_cycle >= deduction) {
		context->write_cycle -= deduction;
	} else {
		context->write_cycle = CYCLE_NEVER;
	}
	if (context->busy_start != CYCLE_NEVER && context->busy_start >= deduction) {
		context->busy_start -= deduction;
	} else {
		context->busy_start = CYCLE_NEVER;
	}
	if (context->last_status_cycle != CYCLE_NEVER && context->last_status_cycle >= deduction) {
		context->last_status_cycle -= deduction;
	} else {
		context->last_status = 0;
		context->last_status_cycle = CYCLE_NEVER;
	}
}

#ifdef __ANDROID__
#define log2(x) (log(x)/log(2))
#endif


#define TIMER_A_MAX 1023
#define TIMER_B_MAX 255

void ym_reset(ym2612_context *context)
{
	memset(context->part1_regs, 0, sizeof(context->part1_regs));
	memset(context->part2_regs, 0, sizeof(context->part2_regs));
	memset(context->operators, 0, sizeof(context->operators));
	FILE* savedlogs[NUM_CHANNELS];
	for (int i = 0; i < NUM_CHANNELS; i++)
	{
		savedlogs[i] = context->channels[i].logfile;
	}
	memset(context->channels, 0, sizeof(context->channels));
	memset(context->ch3_supp, 0, sizeof(context->ch3_supp));
	context->selected_reg = 0;
	context->csm_keyon = 0;
	context->ch3_mode = 0;
	context->dac_enable = 0;
	context->status = 0;
	context->timer_a_load = 0;
	context->timer_b_load = 0;
	//TODO: Confirm these on hardware
	context->timer_a = TIMER_A_MAX;
	context->timer_b = TIMER_B_MAX;
	
	//TODO: Reset LFO state
	
	//some games seem to expect that the LR flags start out as 1
	for (int i = 0; i < NUM_CHANNELS; i++) {
		context->channels[i].lr = 0xC0;
		context->channels[i].logfile = savedlogs[i];
	}
	context->write_cycle = CYCLE_NEVER;
	for (int i = 0; i < NUM_OPERATORS; i++) {
		context->operators[i].envelope = MAX_ENVELOPE;
		context->operators[i].env_phase = PHASE_RELEASE;
	}
}

void ym_init(ym2612_context * context, uint32_t master_clock, uint32_t clock_div, uint32_t options)
{
	static uint8_t registered_finalize;
	dfopen(debug_file, "ym_debug.txt", "w");
	memset(context, 0, sizeof(*context));
	context->clock_inc = clock_div * 6;
	context->busy_cycles = BUSY_CYCLES * context->clock_inc;
	context->audio = render_audio_source(master_clock, context->clock_inc * NUM_OPERATORS, 2);
	//TODO: pick a randomish high initial value and lower it over time
	context->invalid_status_decay = 225000 * context->clock_inc;
	context->status_address_mask = (options & YM_OPT_3834) ? 0 : 3;
	
	//some games seem to expect that the LR flags start out as 1
	for (int i = 0; i < NUM_CHANNELS; i++) {
		if (options & YM_OPT_WAVE_LOG) {
			char fname[64];
			sprintf(fname, "ym_channel_%d.wav", i);
			FILE * f = context->channels[i].logfile = fopen(fname, "wb");
			if (!f) {
				fprintf(stderr, "Failed to open WAVE log file %s for writing\n", fname);
				continue;
			}
			if (!wave_init(f, master_clock / (context->clock_inc * NUM_OPERATORS), 16, 1)) {
				fclose(f);
				context->channels[i].logfile = NULL;
			}
		}
	}
	if (options & YM_OPT_WAVE_LOG) {
		log_context = context;
		if (!registered_finalize) {
			atexit(ym_finalize_log);
			registered_finalize = 1;
		}
	}
	if (!did_tbl_init) {
		//populate sine table
		for (int32_t i = 0; i < 512; i++) {
			double sine = sin( ((double)(i*2+1) / SINE_TABLE_SIZE) * M_PI_2 );

			//table stores 4.8 fixed pointed representation of the base 2 log
			sine_table[i] = round_fixed_point(-log2(sine), 8);
		}
		//populate power table
		for (int32_t i = 0; i < POW_TABLE_SIZE; i++) {
			double linear = pow(2, -((double)((i & 0xFF)+1) / 256.0));
			int32_t tmp = round_fixed_point(linear, 11);
			int32_t shift = (i >> 8) - 2;
			if (shift < 0) {
				tmp <<= 0-shift;
			} else {
				tmp >>= shift;
			}
			pow_table[i] =  tmp;
		}
		//populate envelope generator rate table, from small base table
		for (int rate = 0; rate < 64; rate++) {
			for (int cycle = 0; cycle < 8; cycle++) {
				uint16_t value;
				if (rate < 2) {
					value = 0;
				} else if (rate >= 60) {
					value = 8;
				} else if (rate < 8) {
					value = rate_table_base[((rate & 6) == 6 ? 16 : 0) + cycle];
				} else if (rate < 48) {
					value = rate_table_base[(rate & 0x3) * 8 + cycle];
				} else {
					value = rate_table_base[32 + (rate & 0x3) * 8 + cycle] << ((rate - 48) >> 2);
				}
				rate_table[rate * 8 + cycle] = value;
			}
		}
		//populate LFO PM table from small base table
		//seems like there must be a better way to derive this
		for (int freq = 0; freq < 128; freq++) {
			for (int pms = 0; pms < 8; pms++) {
				for (int step = 0; step < 32; step++) {
					int16_t value = 0;
					for (int bit = 0x40, shift = 0; bit > 0; bit >>= 1, shift++) {
						if (freq & bit) {
							value += lfo_pm_base[pms][(step & 0x8) ? 7-step & 7 : step & 7] >> shift;
						}
					}
					if (step & 0x10) {
						value = -value;
					}
					lfo_pm_table[freq * 256 + pms * 32 + step] = value;
				}
			}
		}
	}
	ym_reset(context);
	ym_enable_zero_offset(context, 1);
}

void ym_free(ym2612_context *context)
{
	render_free_source(context->audio);
	if (context == log_context) {
		ym_finalize_log();
	}
	free(context);
}

void ym_enable_zero_offset(ym2612_context *context, uint8_t enabled)
{
	if (enabled) {
		context->zero_offset = 0x70;
		context->volume_mult = 79;
		context->volume_div = 120;
	} else {
		context->zero_offset = 0;
		context->volume_mult = 2;
		context->volume_div = 3;
	}
}
#define YM_MOD_SHIFT 1

#define CSM_MODE 0x80

#define SSG_ENABLE    8
#define SSG_INVERT    4
#define SSG_ALTERNATE 2
#define SSG_HOLD      1

#define SSG_CENTER 0x800

static void start_envelope(ym_operator *op, ym_channel *channel)
{
	//Deal with "infinite" attack rates
	uint8_t rate = op->rates[PHASE_ATTACK];
	if (rate) {
		uint8_t ks = channel->keycode >> op->key_scaling;;
		rate = rate*2 + ks;
	}
	if (rate >= 62) {
		op->env_phase = PHASE_DECAY;
		op->envelope = 0;
	} else {
		op->env_phase = PHASE_ATTACK;
	}
}

static void keyon(ym_operator *op, ym_channel *channel)
{
	start_envelope(op, channel);
	op->phase_counter = 0;
	op->inverted = op->ssg & SSG_INVERT;
}

static const uint8_t keyon_bits[] = {0x10, 0x40, 0x20, 0x80};

static void keyoff(ym_operator *op)
{
	op->env_phase = PHASE_RELEASE;
	if (op->inverted) {
		//Nemesis says the inversion state doesn't change here, but I don't see how that is observable either way
		op->inverted = 0;
		op->envelope = (SSG_CENTER - op->envelope) & MAX_ENVELOPE;
	}
}

static void csm_keyoff(ym2612_context *context)
{
	context->csm_keyon = 0;
	uint8_t changes = 0xF0 ^ context->channels[2].keyon;
	for (uint8_t op = 2*4, bit = 0; op < 3*4; op++, bit++)
	{
		if (changes & keyon_bits[bit]) {
			keyoff(context->operators + op);
		}
	}
}

void ym_run_timers(ym2612_context *context)
{
	if (context->timer_control & BIT_TIMERA_ENABLE) {
		if (context->timer_a != TIMER_A_MAX) {
			context->timer_a++;
			if (context->csm_keyon) {
				csm_keyoff(context);
			}
		} else {
			if (context->timer_control & BIT_TIMERA_LOAD) {
				context->timer_control &= ~BIT_TIMERA_LOAD;
			} else if (context->timer_control & BIT_TIMERA_OVEREN) {
				context->status |= BIT_STATUS_TIMERA;
			}
			context->timer_a = context->timer_a_load;
			if (!context->csm_keyon && context->ch3_mode == CSM_MODE) {
				context->csm_keyon = 0xF0;
				uint8_t changes = 0xF0 ^ context->channels[2].keyon;;
				for (uint8_t op = 2*4, bit = 0; op < 3*4; op++, bit++)
				{
					if (changes & keyon_bits[bit]) {
						keyon(context->operators + op, context->channels + 2);
					}
				}
			}
		}
	}
	if (!context->sub_timer_b) {
		if (context->timer_control & BIT_TIMERB_ENABLE) {
			if (context->timer_b != TIMER_B_MAX) {
				context->timer_b++;
			} else {
				if (context->timer_control & BIT_TIMERB_LOAD) {
					context->timer_control &= ~BIT_TIMERB_LOAD;
				} else if (context->timer_control & BIT_TIMERB_OVEREN) {
					context->status |= BIT_STATUS_TIMERB;
				}
				context->timer_b = context->timer_b_load;
			}
		}
	}
	context->sub_timer_b += 0x10;
	//Update LFO
	if (context->lfo_enable) {
		if (context->lfo_counter) {
			context->lfo_counter--;
		} else {
			context->lfo_counter = lfo_timer_values[context->lfo_freq];
			context->lfo_am_step += 2;
			context->lfo_am_step &= 0xFE;
			uint8_t old_pm_step = context->lfo_pm_step;
			context->lfo_pm_step = context->lfo_am_step / 8;
			if (context->lfo_pm_step != old_pm_step) {
				for (int chan = 0; chan < NUM_CHANNELS; chan++)
				{
					if (context->channels[chan].pms) {
						for (int op = chan * 4; op < (chan + 1) * 4; op++)
						{
							context->operators[op].phase_inc = ym_calc_phase_inc(context, context->operators + op, op);
						}
					}
				}
			}
		}
	}
}

void ym_run_envelope(ym2612_context *context, ym_channel *channel, ym_operator *operator)
{
	uint32_t env_cyc = context->env_counter;
	uint8_t rate;
	if (operator->env_phase == PHASE_DECAY && operator->envelope >= operator->sustain_level) {
		//operator->envelope = operator->sustain_level;
		operator->env_phase = PHASE_SUSTAIN;
	}
	rate = operator->rates[operator->env_phase];
	if (rate) {
		uint8_t ks = channel->keycode >> operator->key_scaling;;
		rate = rate*2 + ks;
		if (rate > 63) {
			rate = 63;
		}
	}
	uint32_t cycle_shift = rate < 0x30 ? ((0x2F - rate) >> 2) : 0;
	if (!(env_cyc & ((1 << cycle_shift) - 1))) {
		uint32_t update_cycle = env_cyc >> cycle_shift & 0x7;
		uint16_t envelope_inc = rate_table[rate * 8 + update_cycle];
		if (operator->env_phase == PHASE_ATTACK) {
			//this can probably be optimized to a single shift rather than a multiply + shift
			uint16_t old_env = operator->envelope;
			operator->envelope += ((~operator->envelope * envelope_inc) >> 4) & 0xFFFFFFFC;
			if (operator->envelope > old_env) {
				//Handle overflow
				operator->envelope = 0;
			}
			if (!operator->envelope) {
				operator->env_phase = PHASE_DECAY;
			}
		} else {
			if (operator->ssg) {
				if (operator->envelope < SSG_CENTER) {
					envelope_inc *= 4;
				} else {
					envelope_inc = 0;
				}
			}
			//envelope value is 10-bits, but it will be used as a 4.8 value
			operator->envelope += envelope_inc << 2;
			//clamp to max attenuation value
			if (
				operator->envelope > MAX_ENVELOPE 
				|| (operator->env_phase == PHASE_RELEASE && operator->envelope >= SSG_CENTER)
			) {
				operator->envelope = MAX_ENVELOPE;
			}
		}
	}
}

void ym_run_phase(ym2612_context *context, uint32_t channel, uint32_t op)
{
	if (channel != 5 || !context->dac_enable) {
		//printf("updating operator %d of channel %d\n", op, channel);
		ym_operator * operator = context->operators + op;
		ym_channel * chan = context->channels + channel;
		uint16_t phase = operator->phase_counter >> 10 & 0x3FF;
		operator->phase_counter += operator->phase_inc;//ym_calc_phase_inc(context, operator, op);
		int16_t mod = 0;
		if (op & 3) {
			if (operator->mod_src[0]) {
				mod = *operator->mod_src[0];
				if (operator->mod_src[1]) {
					mod += *operator->mod_src[1];
				}
				mod >>= YM_MOD_SHIFT;
			}
		} else {
			if (chan->feedback) {
				mod = (chan->op1_old + operator->output) >> (10-chan->feedback);
			}
		}
		uint16_t env = operator->envelope;
		if (operator->ssg) {
			if (env >= SSG_CENTER) {
				if (operator->ssg & SSG_ALTERNATE) {
					if (operator->env_phase != PHASE_RELEASE && (
						!(operator->ssg & SSG_HOLD) || ((operator->ssg ^ operator->inverted) & SSG_INVERT) == 0
					)) {
						operator->inverted ^= SSG_INVERT;
					}
				} else if (!(operator->ssg & SSG_HOLD)) {
					phase = operator->phase_counter = 0;
				}
				if (
					(operator->env_phase == PHASE_DECAY || operator->env_phase == PHASE_SUSTAIN) 
					&& !(operator->ssg & SSG_HOLD)
				) {
					start_envelope(operator, chan);
					env = operator->envelope;
				}
			}
			if (operator->inverted) {
				env = (SSG_CENTER - env) & MAX_ENVELOPE;
			}
		}
		env += operator->total_level;
		if (operator->am) {
			uint16_t base_am = (context->lfo_am_step & 0x80 ? context->lfo_am_step : ~context->lfo_am_step) & 0x7E;
			if (ams_shift[chan->ams] >= 0) {
				env += (base_am >> ams_shift[chan->ams]) & MAX_ENVELOPE;
			} else {
				env += base_am << (-ams_shift[chan->ams]);
			}
		}
		if (env > MAX_ENVELOPE) {
			env = MAX_ENVELOPE;
		}
		if (first_key_on) {
			dfprintf(debug_file, "op %d, base phase: %d, mod: %d, sine: %d, out: %d\n", op, phase, mod, sine_table[(phase+mod) & 0x1FF], pow_table[sine_table[phase & 0x1FF] + env]);
		}
		//if ((channel != 0 && channel != 4) || chan->algorithm != 5) {
			phase += mod;
		//}

		int16_t output = pow_table[sine_table[phase & 0x1FF] + env];
		if (phase & 0x200) {
			output = -output;
		}
		if (op % 4 == 0) {
			chan->op1_old = operator->output;
		} else if (op % 4 == 2) {
			chan->op2_old = operator->output;
		}
		operator->output = output;
		//Update the channel output if we've updated all operators
		if (op % 4 == 3) {
			if (chan->algorithm < 4) {
				chan->output = operator->output;
			} else if(chan->algorithm == 4) {
				chan->output = operator->output + context->operators[channel * 4 + 2].output;
			} else {
				output = 0;
				for (uint32_t op = ((chan->algorithm == 7) ? 0 : 1) + channel*4; op < (channel+1)*4; op++) {
					output += context->operators[op].output;
				}
				chan->output = output;
			}
			if (first_key_on) {
				int16_t value = context->channels[channel].output & 0x3FE0;
				if (value & 0x2000) {
					value |= 0xC000;
				}
			}
		}
		//puts("operator update done");
	}
}

void ym_output_sample(ym2612_context *context)
{
	int16_t left = 0, right = 0;
	for (int i = 0; i < NUM_CHANNELS; i++) {
		int16_t value = context->channels[i].output;
		if (value > 0x1FE0) {
			value = 0x1FE0;
		} else if (value < -0x1FF0) {
			value = -0x1FF0;
		} else {
			value &= 0x3FE0;
			if (value & 0x2000) {
				value |= 0xC000;
			}
		}
		if (value >= 0) {
			value += context->zero_offset;
		} else {
			value -= context->zero_offset;
		}
		if (context->channels[i].logfile) {
			fwrite(&value, sizeof(value), 1, context->channels[i].logfile);
		}
		if (context->channels[i].lr & 0x80) {
			left += (value * context->volume_mult) / context->volume_div;
		} else if (context->zero_offset) {
			if (value >= 0) {
				left += (context->zero_offset * context->volume_mult) / context->volume_div;
			} else {
				left -= (context->zero_offset * context->volume_mult) / context->volume_div;
			}
		}
		if (context->channels[i].lr & 0x40) {
			right += (value * context->volume_mult) / context->volume_div;
		} else if (context->zero_offset) {
			if (value >= 0) {
				right += (context->zero_offset * context->volume_mult) / context->volume_div;
			} else {
				right -= (context->zero_offset * context->volume_mult) / context->volume_div;
			}
		}
	}
	render_put_stereo_sample(context->audio, left, right);
}

void ym_run(ym2612_context * context, uint32_t to_cycle)
{
	if (context->current_cycle >= to_cycle) {
		return;
	}
	//printf("Running YM2612 from cycle %d to cycle %d\n", context->current_cycle, to_cycle);
	//TODO: Fix channel update order OR remap channels in register write
	for (; context->current_cycle < to_cycle; context->current_cycle += context->clock_inc) {
		//Update timers at beginning of 144 cycle period
		if (!context->current_op) {
			ym_run_timers(context);
		}
		//Update Envelope Generator
		if (!(context->current_op % 3)) {
			uint32_t op = context->current_env_op;
			ym_operator * operator = context->operators + op;
			ym_channel * channel = context->channels + op/4;
			ym_run_envelope(context, channel, operator);
			context->current_env_op++;
			if (context->current_env_op == NUM_OPERATORS) {
				context->current_env_op = 0;
				context->env_counter++;
			}
		}

		//Update Phase Generator
		ym_run_phase(context, context->current_op / 4, context->current_op);
		context->current_op++;
		if (context->current_op == NUM_OPERATORS) {
			context->current_op = 0;
			ym_output_sample(context);
		}
		
	}
	//printf("Done running YM2612 at cycle %d\n", context->current_cycle, to_cycle);
}

void ym_address_write_part1(ym2612_context * context, uint8_t address)
{
	//printf("address_write_part1: %X\n", address);
	context->selected_reg = address;
	context->selected_part = 0;
}

void ym_address_write_part2(ym2612_context * context, uint8_t address)
{
	//printf("address_write_part2: %X\n", address);
	context->selected_reg = address;
	context->selected_part = 1;
}

static uint8_t fnum_to_keycode[] = {
	//F11 = 0
	0,0,0,0,0,0,0,1,
	//F11 = 1
	2,3,3,3,3,3,3,3
};

//table courtesy of Nemesis
static uint32_t detune_table[][4] = {
	{0, 0, 1, 2},   //0  (0x00)
    {0, 0, 1, 2},   //1  (0x01)
    {0, 0, 1, 2},   //2  (0x02)
    {0, 0, 1, 2},   //3  (0x03)
    {0, 1, 2, 2},   //4  (0x04)
    {0, 1, 2, 3},   //5  (0x05)
    {0, 1, 2, 3},   //6  (0x06)
    {0, 1, 2, 3},   //7  (0x07)
    {0, 1, 2, 4},   //8  (0x08)
    {0, 1, 3, 4},   //9  (0x09)
    {0, 1, 3, 4},   //10 (0x0A)
    {0, 1, 3, 5},   //11 (0x0B)
    {0, 2, 4, 5},   //12 (0x0C)
    {0, 2, 4, 6},   //13 (0x0D)
    {0, 2, 4, 6},   //14 (0x0E)
    {0, 2, 5, 7},   //15 (0x0F)
    {0, 2, 5, 8},   //16 (0x10)
    {0, 3, 6, 8},   //17 (0x11)
    {0, 3, 6, 9},   //18 (0x12)
    {0, 3, 7,10},   //19 (0x13)
    {0, 4, 8,11},   //20 (0x14)
    {0, 4, 8,12},   //21 (0x15)
    {0, 4, 9,13},   //22 (0x16)
    {0, 5,10,14},   //23 (0x17)
    {0, 5,11,16},   //24 (0x18)
    {0, 6,12,17},   //25 (0x19)
    {0, 6,13,19},   //26 (0x1A)
    {0, 7,14,20},   //27 (0x1B)
    {0, 8,16,22},   //28 (0x1C)
    {0, 8,16,22},   //29 (0x1D)
    {0, 8,16,22},   //30 (0x1E)
    {0, 8,16,22}
};  //31 (0x1F)

static uint32_t ym_calc_phase_inc(ym2612_context * context, ym_operator * operator, uint32_t op)
{
	uint32_t chan_num = op / 4;
	//printf("ym_update_phase_inc | channel: %d, op: %d\n", chan_num, op);
	//base frequency
	ym_channel * channel = context->channels + chan_num;
	uint32_t inc, detune;
	if (chan_num == 2 && context->ch3_mode && (op < (2*4 + 3))) {
		//supplemental fnum registers are in a different order than normal slot paramters
		int index = op-2*4;
		if (index < 2) {
			index ^= 1;
		}
		inc = context->ch3_supp[index].fnum;
		if (channel->pms) {
			inc = inc * 2 + lfo_pm_table[(inc & 0x7F0) * 16 + channel->pms + context->lfo_pm_step];
			inc &= 0xFFF;
		}
		if (!context->ch3_supp[index].block) {
			inc >>= 1;
		} else {
			inc <<= (context->ch3_supp[index].block-1);
		}
		//detune
		detune = detune_table[context->ch3_supp[index].keycode][operator->detune & 0x3];
	} else {
		inc = channel->fnum;
		if (channel->pms) {
			inc = inc * 2 + lfo_pm_table[(inc & 0x7F0) * 16 + channel->pms + context->lfo_pm_step];
			inc &= 0xFFF;
		}
		if (!channel->block) {
			inc >>= 1;
		} else {
			inc <<= (channel->block-1);
		}
		//detune
		detune = detune_table[channel->keycode][operator->detune & 0x3];
	}
	if (channel->pms) {
		inc >>= 1;
	}
	if (operator->detune & 0x4) {
		inc -= detune;
		//this can underflow, mask to 17-bit result
		inc &= 0x1FFFF;
	} else {
		inc += detune;
	}
	//multiple
	if (operator->multiple) {
		inc *= operator->multiple;
		inc &= 0xFFFFF;
	} else {
		//0.5
		inc >>= 1;
	}
	//printf("phase_inc for operator %d: %d, block: %d, fnum: %d, detune: %d, multiple: %d\n", op, inc, channel->block, channel->fnum, detune, operator->multiple);
	return inc;
}

void ym_vgm_log(ym2612_context *context, uint32_t master_clock, vgm_writer *vgm)
{
	vgm_ym2612_init(vgm, 6 * master_clock / context->clock_inc);
	context->vgm = vgm;
	for (uint8_t reg = YM_PART1_START; reg < YM_REG_END; reg++) {
		if ((reg >= REG_DETUNE_MULT && (reg & 3) == 3) || (reg >= 0x2D && reg < REG_DETUNE_MULT) || reg == 0x23 || reg == 0x29) {
			//skip invalid registers
			continue;
		}
		vgm_ym2612_part1_write(context->vgm, context->current_cycle, reg, context->part1_regs[reg - YM_PART1_START]);
	}
	
	for (uint8_t reg = YM_PART2_START; reg < YM_REG_END; reg++) {
		if ((reg & 3) == 3 || (reg >= REG_FNUM_LOW_CH3 && reg < REG_ALG_FEEDBACK)) {
			//skip invalid registers
			continue;
		}
		vgm_ym2612_part2_write(context->vgm, context->current_cycle, reg, context->part2_regs[reg - YM_PART2_START]);
	}
}

void ym_data_write(ym2612_context * context, uint8_t value)
{
	context->write_cycle = context->current_cycle;
	context->busy_start = context->current_cycle + context->clock_inc;
	
	if (context->selected_reg >= YM_REG_END) {
		return;
	}
	if (context->selected_part) {
		if (context->selected_reg < YM_PART2_START) {
			return;
		}
		if (context->vgm) {
			vgm_ym2612_part2_write(context->vgm, context->current_cycle, context->selected_reg, value);
		}
		context->part2_regs[context->selected_reg - YM_PART2_START] = value;
	} else {
		if (context->selected_reg < YM_PART1_START) {
			return;
		}
		if (context->vgm) {
			vgm_ym2612_part1_write(context->vgm, context->current_cycle, context->selected_reg, value);
		}
		context->part1_regs[context->selected_reg - YM_PART1_START] = value;
	}
	uint8_t buffer[3] = {context->selected_part, context->selected_reg, value};
	event_log(EVENT_YM_REG, context->current_cycle, sizeof(buffer), buffer);
	dfprintf(debug_file, "write of %X to reg %X in part %d\n", value, context->selected_reg, context->selected_part+1);
	if (context->selected_reg < 0x30) {
		//Shared regs
		switch (context->selected_reg)
		{
		//TODO: Test reg
		case REG_LFO:
			/*if ((value & 0x8) && !context->lfo_enable) {
				printf("LFO Enabled, Freq: %d\n", value & 0x7);
			}*/
			context->lfo_enable = value & 0x8;
			if (!context->lfo_enable) {
				uint8_t old_pm_step = context->lfo_pm_step;
				context->lfo_am_step = context->lfo_pm_step = 0;
				if (old_pm_step) {
					for (int chan = 0; chan < NUM_CHANNELS; chan++)
					{
						if (context->channels[chan].pms) {
							for (int op = chan * 4; op < (chan + 1) * 4; op++)
							{
								context->operators[op].phase_inc = ym_calc_phase_inc(context, context->operators + op, op);
							}
						}
					}
				}
			}
			context->lfo_freq = value & 0x7;

			break;
		case REG_TIMERA_HIGH:
			context->timer_a_load &= 0x3;
			context->timer_a_load |= value << 2;
			break;
		case REG_TIMERA_LOW:
			context->timer_a_load &= 0xFFFC;
			context->timer_a_load |= value & 0x3;
			break;
		case REG_TIMERB:
			context->timer_b_load = value;
			break;
		case REG_TIME_CTRL: {
			if (value & BIT_TIMERA_ENABLE && !(context->timer_control & BIT_TIMERA_ENABLE)) {
				context->timer_a = TIMER_A_MAX;
				context->timer_control |= BIT_TIMERA_LOAD;
			}
			if (value & BIT_TIMERB_ENABLE && !(context->timer_control & BIT_TIMERB_ENABLE)) {
				context->timer_b = TIMER_B_MAX;
				context->timer_control |= BIT_TIMERB_LOAD;
			}
			context->timer_control &= (BIT_TIMERA_LOAD | BIT_TIMERB_LOAD);
			context->timer_control |= value & 0xF;
			if (value & BIT_TIMERA_RESET) {
				context->status &= ~BIT_STATUS_TIMERA;
			}
			if (value & BIT_TIMERB_RESET) {
				context->status &= ~BIT_STATUS_TIMERB;
			}
			if (context->ch3_mode == CSM_MODE && (value & 0xC0) != CSM_MODE && context->csm_keyon) {
				csm_keyoff(context);
			}
			uint8_t old_mode = context->ch3_mode;
			context->ch3_mode = value & 0xC0;
			if (context->ch3_mode != old_mode) {
				for (int op = 2 * 4; op < 3*4; op++)
				{
					context->operators[op].phase_inc = ym_calc_phase_inc(context, context->operators + op, op);
				}
			}
			break;
		}
		case REG_KEY_ONOFF: {
			uint8_t channel = value & 0x7;
			if (channel != 3 && channel != 7) {
				if (channel > 2) {
					channel--;
				}
				uint8_t changes = channel == 2 
					? (value | context->csm_keyon) ^  (context->channels[channel].keyon | context->csm_keyon)
					: value ^ context->channels[channel].keyon;
				context->channels[channel].keyon = value & 0xF0;
				for (uint8_t op = channel * 4, bit = 0; op < (channel + 1) * 4; op++, bit++) {
					if (changes & keyon_bits[bit]) {
						if (value & keyon_bits[bit]) {
							first_key_on = 1;
							//printf("Key On for operator %d in channel %d\n", op, channel);
							keyon(context->operators + op, context->channels + channel);
						} else {
							//printf("Key Off for operator %d in channel %d\n", op, channel);
							keyoff(context->operators + op);
						}
					}
				}
			}
			break;
		}
		case REG_DAC:
			if (context->dac_enable) {
				context->channels[5].output = (((int16_t)value) - 0x80) << 6;
				//printf("DAC Write %X(%d) @ %d\n", value, context->channels[5].output, context->current_cycle);
			}
			break;
		case REG_DAC_ENABLE:
			//printf("DAC Enable: %X\n", value);
			context->dac_enable = value & 0x80;
			break;
		}
	} else if (context->selected_reg < 0xA0) {
		//part
		uint8_t op = context->selected_part ? (NUM_OPERATORS/2) : 0;
		//channel in part
		if ((context->selected_reg & 0x3) != 0x3) {
			op += 4 * (context->selected_reg & 0x3) + ((context->selected_reg & 0xC) / 4);
			//printf("write targets operator %d (%d of channel %d)\n", op, op % 4, op / 4);
			ym_operator * operator = context->operators + op;
			switch (context->selected_reg & 0xF0)
			{
			case REG_DETUNE_MULT:
				operator->detune = value >> 4 & 0x7;
				operator->multiple = value & 0xF;
				operator->phase_inc = ym_calc_phase_inc(context, operator, op);
				break;
			case REG_TOTAL_LEVEL:
				operator->total_level = (value & 0x7F) << 5;
				break;
			case REG_ATTACK_KS:
				operator->key_scaling = 3 - (value >> 6);
				operator->rates[PHASE_ATTACK] = value & 0x1F;
				break;
			case REG_DECAY_AM:
				operator->am = value & 0x80;
				operator->rates[PHASE_DECAY] = value & 0x1F;
				break;
			case REG_SUSTAIN_RATE:
				operator->rates[PHASE_SUSTAIN] = value & 0x1F;
				break;
			case REG_S_LVL_R_RATE:
				operator->rates[PHASE_RELEASE] = (value & 0xF) << 1 | 1;
				operator->sustain_level = (value & 0xF0) << 3;
				if (operator->sustain_level == 0x780) {
					operator->sustain_level = MAX_ENVELOPE;
				}
				break;
			case REG_SSG_EG:
				if (!(value & SSG_ENABLE)) {
					value = 0;
				}
				if ((value ^ operator->ssg) & SSG_INVERT) {
					operator->inverted ^= SSG_INVERT;
				}
				operator->ssg = value;
				break;
			}
		}
	} else {
		uint8_t channel = context->selected_reg & 0x3;
		if (channel != 3) {
			if (context->selected_part) {
				channel += 3;
			}
			//printf("write targets channel %d\n", channel);
			switch (context->selected_reg & 0xFC)
			{
			case REG_FNUM_LOW:
				context->channels[channel].block = context->channels[channel].block_fnum_latch >> 3 & 0x7;
				context->channels[channel].fnum = (context->channels[channel].block_fnum_latch & 0x7) << 8 | value;
				context->channels[channel].keycode = context->channels[channel].block << 2 | fnum_to_keycode[context->channels[channel].fnum >> 7];
				for (int op = channel * 4; op < (channel + 1) * 4; op++)
				{
					context->operators[op].phase_inc = ym_calc_phase_inc(context, context->operators + op, op);
				}
				break;
			case REG_BLOCK_FNUM_H:{
				context->channels[channel].block_fnum_latch = value;
				break;
			}
			case REG_FNUM_LOW_CH3:
				if (channel < 3) {
					context->ch3_supp[channel].block = context->ch3_supp[channel].block_fnum_latch >> 3 & 0x7;
					context->ch3_supp[channel].fnum = (context->ch3_supp[channel].block_fnum_latch & 0x7) << 8 | value;
					context->ch3_supp[channel].keycode = context->ch3_supp[channel].block << 2 | fnum_to_keycode[context->ch3_supp[channel].fnum >> 7];
					if (context->ch3_mode) {
						int op = 2 * 4 + (channel < 2 ? (channel ^ 1) : channel);
						context->operators[op].phase_inc = ym_calc_phase_inc(context, context->operators + op, op);
					}
				}
				break;
			case REG_BLOCK_FN_CH3:
				if (channel < 3) {
					context->ch3_supp[channel].block_fnum_latch = value;
				}
				break;
			case REG_ALG_FEEDBACK:
				context->channels[channel].algorithm = value & 0x7;
				switch (context->channels[channel].algorithm)
				{
				case 0:
					//operator 3 modulated by operator 2
					//this uses a special op2 result reg on HW, but that reg will have the most recent
					//result from op2 when op3 starts executing
					context->operators[channel*4+1].mod_src[0] = &context->operators[channel*4+2].output;
					context->operators[channel*4+1].mod_src[1] = NULL;
					
					//operator 2 modulated by operator 1
					context->operators[channel*4+2].mod_src[0] = &context->operators[channel*4+0].output;
					
					//operator 4 modulated by operator 3
					context->operators[channel*4+3].mod_src[0] = &context->operators[channel*4+1].output;
					context->operators[channel*4+3].mod_src[1] = NULL;
					break;
				case 1:
					//operator 3 modulated by operator 1+2
					//op1 starts executing before this, but due to pipeline length the most current result is
					//not available and instead the previous result is used
					context->operators[channel*4+1].mod_src[0] = &context->channels[channel].op1_old;
					//this uses a special op2 result reg on HW, but that reg will have the most recent
					//result from op2 when op3 starts executing
					context->operators[channel*4+1].mod_src[1] = &context->operators[channel*4+2].output;
					
					//operator 2 unmodulated
					context->operators[channel*4+2].mod_src[0] = NULL;
					
					//operator 4 modulated by operator 3
					context->operators[channel*4+3].mod_src[0] = &context->operators[channel*4+1].output;
					context->operators[channel*4+3].mod_src[1] = NULL;
					break;
				case 2:
					//operator 3 modulated by operator 2
					//this uses a special op2 result reg on HW, but that reg will have the most recent
					//result from op2 when op3 starts executing
					context->operators[channel*4+1].mod_src[0] = &context->operators[channel*4+2].output;
					context->operators[channel*4+1].mod_src[1] = NULL;
					
					//operator 2 unmodulated
					context->operators[channel*4+2].mod_src[0] = NULL;
					
					//operator 4 modulated by operator 1+3
					//this uses a special op1 result reg on HW, but that reg will have the most recent
					//result from op1 when op4 starts executing
					context->operators[channel*4+3].mod_src[0] = &context->operators[channel*4+0].output;
					context->operators[channel*4+3].mod_src[1] = &context->operators[channel*4+1].output;
					break;
				case 3:
					//operator 3 unmodulated
					context->operators[channel*4+1].mod_src[0] = NULL;
					context->operators[channel*4+1].mod_src[1] = NULL;
					
					//operator 2 modulated by operator 1
					context->operators[channel*4+2].mod_src[0] = &context->operators[channel*4+0].output;
					
					//operator 4 modulated by operator 2+3
					//op2 starts executing before this, but due to pipeline length the most current result is
					//not available and instead the previous result is used
					context->operators[channel*4+3].mod_src[0] = &context->channels[channel].op2_old;
					context->operators[channel*4+3].mod_src[1] = &context->operators[channel*4+1].output;
					break;
				case 4:
					//operator 3 unmodulated
					context->operators[channel*4+1].mod_src[0] = NULL;
					context->operators[channel*4+1].mod_src[1] = NULL;
					
					//operator 2 modulated by operator 1
					context->operators[channel*4+2].mod_src[0] = &context->operators[channel*4+0].output;
					
					//operator 4 modulated by operator 3
					context->operators[channel*4+3].mod_src[0] = &context->operators[channel*4+1].output;
					context->operators[channel*4+3].mod_src[1] = NULL;
					break;
				case 5:
					//operator 3 modulated by operator 1
					//op1 starts executing before this, but due to pipeline length the most current result is
					//not available and instead the previous result is used
					context->operators[channel*4+1].mod_src[0] = &context->channels[channel].op1_old;
					context->operators[channel*4+1].mod_src[1] = NULL;
					
					//operator 2 modulated by operator 1
					context->operators[channel*4+2].mod_src[0] = &context->operators[channel*4+0].output;
					
					//operator 4 modulated by operator 1
					//this uses a special op1 result reg on HW, but that reg will have the most recent
					//result from op1 when op4 starts executing
					context->operators[channel*4+3].mod_src[0] = &context->operators[channel*4+0].output;
					context->operators[channel*4+3].mod_src[1] = NULL;
					break;
				case 6:
					//operator 3 unmodulated
					context->operators[channel*4+1].mod_src[0] = NULL;
					context->operators[channel*4+1].mod_src[1] = NULL;
					
					//operator 2 modulated by operator 1
					context->operators[channel*4+2].mod_src[0] = &context->operators[channel*4+0].output;
					
					//operator 4 unmodulated
					context->operators[channel*4+3].mod_src[0] = NULL;
					context->operators[channel*4+3].mod_src[1] = NULL;
					break;
				case 7:
					//everything is an output so no modulation (except for op 1 feedback)
					context->operators[channel*4+1].mod_src[0] = NULL;
					context->operators[channel*4+1].mod_src[1] = NULL;
					
					context->operators[channel*4+2].mod_src[0] = NULL;
					
					context->operators[channel*4+3].mod_src[0] = NULL;
					context->operators[channel*4+3].mod_src[1] = NULL;
					break;
				}
				context->channels[channel].feedback = value >> 3 & 0x7;
				//printf("Algorithm %d, feedback %d for channel %d\n", value & 0x7, value >> 3 & 0x7, channel);
				break;
			case REG_LR_AMS_PMS: {
				uint8_t old_pms = context->channels[channel].pms;
				context->channels[channel].pms = (value & 0x7) * 32;
				context->channels[channel].ams = value >> 4 & 0x3;
				context->channels[channel].lr = value & 0xC0;
				if (old_pms != context->channels[channel].pms) {
					for (int op = channel * 4; op < (channel + 1) * 4; op++)
					{
						context->operators[op].phase_inc = ym_calc_phase_inc(context, context->operators + op, op);
					}
				}
				//printf("Write of %X to LR_AMS_PMS reg for channel %d\n", value, channel);
				break;
			}
			}
		}
	}
}

uint8_t ym_read_status(ym2612_context * context, uint32_t cycle, uint32_t port)
{
	uint8_t status;
	port &= context->status_address_mask;
	if (port) {
		if (context->last_status_cycle != CYCLE_NEVER && cycle - context->last_status_cycle > context->invalid_status_decay) {
			context->last_status = 0;
		}
		status = context->last_status;
	} else {
		status = context->status;
		if (cycle >= context->busy_start && cycle < context->busy_start + context->busy_cycles) {
			status |= 0x80;
		}
		context->last_status = status;
		context->last_status_cycle = cycle;
	}
	return status;
		
}

void ym_print_channel_info(ym2612_context *context, int channel)
{
	ym_channel *chan = context->channels + channel;
	printf("\n***Channel %d***\n"
	       "Algorithm: %d\n"
		   "Feedback:  %d\n"
		   "Pan:       %s\n"
		   "AMS:       %d\n"
		   "PMS:       %d\n",
		   channel+1, chan->algorithm, chan->feedback,
		   chan->lr == 0xC0 ? "LR" : chan->lr == 0x80 ? "L" : chan->lr == 0x40 ? "R" : "",
		   chan->ams, chan->pms);
	if (channel == 2) {
		printf(
		   "Mode:      %X: %s\n",
		   context->ch3_mode, context->ch3_mode ? "special" : "normal");
	}
	for (int operator = channel * 4; operator < channel * 4+4; operator++)
	{
		int dispnum = operator - channel * 4 + 1;
		if (dispnum == 2) {
			dispnum = 3;
		} else if (dispnum == 3) {
			dispnum = 2;
		}
		ym_operator *op = context->operators + operator;
		printf("\nOperator %d:\n"
		       "    Multiple:      %d\n"
			   "    Detune:        %d\n"
			   "    Total Level:   %d\n"
			   "    Attack Rate:   %d\n"
			   "    Key Scaling:   %d\n"
			   "    Decay Rate:    %d\n"
			   "    Sustain Level: %d\n"
			   "    Sustain Rate:  %d\n"
			   "    Release Rate:  %d\n"
			   "    Amplitude Modulation %s\n",
			   dispnum, op->multiple, op->detune, op->total_level,
			   op->rates[PHASE_ATTACK], op->key_scaling, op->rates[PHASE_DECAY],
			   op->sustain_level, op->rates[PHASE_SUSTAIN], op->rates[PHASE_RELEASE],
			   op->am ? "On" : "Off");
	}
}

void ym_print_timer_info(ym2612_context *context)
{
	printf("***Timer A***\n"
	       "Current Value: %d\n"
		   "Load Value:    %d\n"
		   "Triggered:     %s\n"
		   "Enabled:       %s\n\n",
		   context->timer_a,
		   context->timer_a_load,
		   context->status & BIT_STATUS_TIMERA ? "yes" : "no",
		   context->timer_control & BIT_TIMERA_ENABLE ? "yes" : "no");
	printf("***Timer B***\n"
	       "Current Value: %d\n"
		   "Load Value:    %d\n"
		   "Triggered:     %s\n"
		   "Enabled:       %s\n\n",
		   context->timer_b,
		   context->timer_b_load,
		   context->status & BIT_STATUS_TIMERB ? "yes" : "no",
		   context->timer_control & BIT_TIMERB_ENABLE ? "yes" : "no");
}

void ym_serialize(ym2612_context *context, serialize_buffer *buf)
{
	save_buffer8(buf, context->part1_regs, YM_PART1_REGS);
	save_buffer8(buf, context->part2_regs, YM_PART2_REGS);
	for (int i = 0; i < NUM_OPERATORS; i++)
	{
		save_int32(buf, context->operators[i].phase_counter);
		save_int16(buf, context->operators[i].envelope);
		save_int16(buf, context->operators[i].output);
		save_int8(buf, context->operators[i].env_phase);
		save_int8(buf, context->operators[i].inverted);
	}
	for (int i = 0; i < NUM_CHANNELS; i++)
	{
		save_int16(buf, context->channels[i].output);
		save_int16(buf, context->channels[i].op1_old);
		//Due to the latching behavior, these need to be saved
		//even though duplicate info is probably in the regs array
		save_int8(buf, context->channels[i].block);
		save_int16(buf, context->channels[i].fnum);
		save_int8(buf, context->channels[i].keyon);
	}
	for (int i = 0; i < 3; i++)
	{
		//Due to the latching behavior, these need to be saved
		//even though duplicate info is probably in the regs array
		save_int8(buf, context->ch3_supp[i].block);
		save_int8(buf, context->ch3_supp[i].fnum);
	}
	save_int8(buf, context->timer_control);
	save_int16(buf, context->timer_a);
	save_int8(buf, context->timer_b);
	save_int8(buf, context->sub_timer_b);
	save_int16(buf, context->env_counter);
	save_int8(buf, context->current_op);
	save_int8(buf, context->current_env_op);
	save_int8(buf, context->lfo_counter);
	save_int8(buf, context->csm_keyon);
	save_int8(buf, context->status);
	save_int8(buf, context->selected_reg);
	save_int8(buf, context->selected_part);
	save_int32(buf, context->current_cycle);
	save_int32(buf, context->write_cycle);
	save_int32(buf, context->busy_start);
	save_int32(buf, context->last_status_cycle);
	save_int32(buf, context->invalid_status_decay);
	save_int8(buf, context->last_status);
}

void ym_deserialize(deserialize_buffer *buf, void *vcontext)
{
	ym2612_context *context = vcontext;
	uint8_t temp_regs[YM_PART1_REGS];
	load_buffer8(buf, temp_regs, YM_PART1_REGS);
	context->selected_part = 0;
	for (int i = 0; i < YM_PART1_REGS; i++)
	{
		uint8_t reg = YM_PART1_START + i;
		if (reg == REG_TIME_CTRL) {
			context->ch3_mode = temp_regs[i] & 0xC0;
		} else if (reg != REG_FNUM_LOW && reg != REG_KEY_ONOFF) {
			context->selected_reg = reg;
			ym_data_write(context, temp_regs[i]);
		}
	}
	load_buffer8(buf, temp_regs, YM_PART2_REGS);
	context->selected_part = 1;
	for (int i = 0; i < YM_PART2_REGS; i++)
	{
		uint8_t reg = YM_PART2_START + i;
		if (reg != REG_FNUM_LOW) {
			context->selected_reg = reg;
			ym_data_write(context, temp_regs[i]);
		}
	}
	for (int i = 0; i < NUM_OPERATORS; i++)
	{
		context->operators[i].phase_counter = load_int32(buf);
		context->operators[i].envelope = load_int16(buf);
		context->operators[i].output = load_int16(buf);
		context->operators[i].env_phase = load_int8(buf);
		if (context->operators[i].env_phase > PHASE_RELEASE) {
			context->operators[i].env_phase = PHASE_RELEASE;
		}
		context->operators[i].inverted = load_int8(buf) != 0 ? SSG_INVERT : 0;
	}
	for (int i = 0; i < NUM_CHANNELS; i++)
	{
		context->channels[i].output = load_int16(buf);
		context->channels[i].op1_old = load_int16(buf);
		context->channels[i].block = load_int8(buf);
		context->channels[i].fnum = load_int16(buf);
		context->channels[i].keycode = context->channels[i].block << 2 | fnum_to_keycode[context->channels[i].fnum >> 7];
		context->channels[i].keyon = load_int8(buf);
	}
	for (int i = 0; i < 3; i++)
	{
		context->ch3_supp[i].block = load_int8(buf);
		context->ch3_supp[i].fnum = load_int8(buf);
		context->ch3_supp[i].keycode = context->ch3_supp[i].block << 2 | fnum_to_keycode[context->ch3_supp[i].fnum >> 7];
	}
	context->timer_control = load_int8(buf);
	context->timer_a = load_int16(buf);
	context->timer_b = load_int8(buf);
	context->sub_timer_b = load_int8(buf);
	context->env_counter = load_int16(buf);
	context->current_op = load_int8(buf);
	if (context->current_op >= NUM_OPERATORS) {
		context->current_op = 0;
	}
	context->current_env_op = load_int8(buf);
	if (context->current_env_op >= NUM_OPERATORS) {
		context->current_env_op = 0;
	}
	context->lfo_counter = load_int8(buf);
	context->csm_keyon = load_int8(buf);
	context->status = load_int8(buf);
	context->selected_reg = load_int8(buf);
	context->selected_part = load_int8(buf);
	context->current_cycle = load_int32(buf);
	context->write_cycle = load_int32(buf);
	context->busy_start = load_int32(buf);
	if (buf->size > buf->cur_pos) {
		context->last_status_cycle = load_int32(buf);
		context->invalid_status_decay = load_int32(buf);
		context->last_status = load_int8(buf);
	} else {
		context->last_status = context->status;
		context->last_status_cycle = context->write_cycle;
	}
}