From 612cd07fb3203aa61e5c2ce36d68973e41612533 Mon Sep 17 00:00:00 2001 From: Lior Halphon Date: Wed, 12 Dec 2018 23:44:00 +0200 Subject: [PATCH 1/4] Fixed emulation of echo RAM --- Core/memory.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Core/memory.c b/Core/memory.c index 4053530..9466ab1 100644 --- a/Core/memory.c +++ b/Core/memory.c @@ -193,7 +193,7 @@ static uint8_t read_high_memory(GB_gameboy_t *gb, uint16_t addr) } if (addr < 0xFE00) { - return gb->ram[addr & 0x0FFF]; + return read_banked_ram(gb, addr); } if (addr < 0xFF00) { @@ -409,7 +409,7 @@ static GB_read_function_t * const read_map[] = read_vram, read_vram, /* 8XXX, 9XXX */ read_mbc_ram, read_mbc_ram, /* AXXX, BXXX */ read_ram, read_banked_ram, /* CXXX, DXXX */ - read_high_memory, read_high_memory, /* EXXX FXXX */ + read_ram, read_high_memory, /* EXXX FXXX */ }; uint8_t GB_read_memory(GB_gameboy_t *gb, uint16_t addr) @@ -537,7 +537,7 @@ static void write_high_memory(GB_gameboy_t *gb, uint16_t addr, uint8_t value) { if (addr < 0xFE00) { GB_log(gb, "Wrote %02x to %04x (RAM Mirror)\n", value, addr); - gb->ram[addr & 0x0FFF] = value; + write_banked_ram(gb, addr, value); return; } @@ -907,7 +907,7 @@ static GB_write_function_t * const write_map[] = write_vram, write_vram, /* 8XXX, 9XXX */ write_mbc_ram, write_mbc_ram, /* AXXX, BXXX */ write_ram, write_banked_ram, /* CXXX, DXXX */ - write_high_memory, write_high_memory, /* EXXX FXXX */ + write_ram, write_high_memory, /* EXXX FXXX */ }; void GB_write_memory(GB_gameboy_t *gb, uint16_t addr, uint8_t value) From af0430dbc5652175d2433c4c80920f3407518a85 Mon Sep 17 00:00:00 2001 From: Lior Halphon Date: Mon, 31 Dec 2018 22:06:20 +0200 Subject: [PATCH 2/4] Unroll some loops in PPU code, more efficient timer handling --- Core/display.c | 4 ++++ Core/timing.c | 21 ++++++++------------- 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/Core/display.c b/Core/display.c index 2759146..aae3a47 100644 --- a/Core/display.c +++ b/Core/display.c @@ -27,6 +27,7 @@ static GB_fifo_item_t *fifo_pop(GB_fifo_t *fifo) static void fifo_push_bg_row(GB_fifo_t *fifo, uint8_t lower, uint8_t upper, uint8_t palette, bool bg_priority, bool flip_x) { if (!flip_x) { + #pragma unroll for (unsigned i = 8; i--;) { fifo->fifo[fifo->write_end] = (GB_fifo_item_t) { (lower >> 7) | ((upper >> 7) << 1), @@ -42,6 +43,7 @@ static void fifo_push_bg_row(GB_fifo_t *fifo, uint8_t lower, uint8_t upper, uint } } else { + #pragma unroll for (unsigned i = 8; i--;) { fifo->fifo[fifo->write_end] = (GB_fifo_item_t) { (lower & 1) | ((upper & 1) << 1), @@ -68,6 +70,7 @@ static void fifo_overlay_object_row(GB_fifo_t *fifo, uint8_t lower, uint8_t uppe uint8_t flip_xor = flip_x? 0: 0x7; + #pragma unroll for (unsigned i = 8; i--;) { uint8_t pixel = (lower >> 7) | ((upper >> 7) << 1); GB_fifo_item_t *target = &fifo->fifo[(fifo->read_end + (i ^ flip_xor)) & (GB_FIFO_LENGTH - 1)]; @@ -1063,6 +1066,7 @@ uint8_t GB_get_oam_info(GB_gameboy_t *gb, GB_oam_info_t *dest, uint8_t *sprite_h } for (unsigned y = 0; y < *sprite_height; y++) { + #pragma unroll for (unsigned x = 0; x < 8; x++) { uint8_t color = (((gb->vram[vram_address ] >> ((~x)&7)) & 1 ) | ((gb->vram[vram_address + 1] >> ((~x)&7)) & 1) << 1 ); diff --git a/Core/timing.c b/Core/timing.c index 65382e4..3ab4c30 100644 --- a/Core/timing.c +++ b/Core/timing.c @@ -8,7 +8,7 @@ #include #endif -static const unsigned int GB_TAC_RATIOS[] = {1024, 16, 64, 256}; +static const unsigned int GB_TAC_TRIGGER_BITS[] = {512, 8, 32, 128}; #ifndef DISABLE_TIMEKEEPING static int64_t get_nanoseconds(void) @@ -108,11 +108,6 @@ static void advance_tima_state_machine(GB_gameboy_t *gb) } } -static bool counter_overflow_check(uint32_t old, uint32_t new, uint32_t max) -{ - return (old & (max >> 1)) && !(new & (max >> 1)); -} - static void increase_tima(GB_gameboy_t *gb) { gb->io_registers[GB_IO_TIMA]++; @@ -126,13 +121,13 @@ static void GB_set_internal_div_counter(GB_gameboy_t *gb, uint32_t value) { /* TIMA increases when a specific high-bit becomes a low-bit. */ value &= INTERNAL_DIV_CYCLES - 1; - if ((gb->io_registers[GB_IO_TAC] & 4) && - counter_overflow_check(gb->div_counter, value, GB_TAC_RATIOS[gb->io_registers[GB_IO_TAC] & 3])) { + uint32_t triggers = gb->div_counter & ~value; + if ((gb->io_registers[GB_IO_TAC] & 4) && (triggers & GB_TAC_TRIGGER_BITS[gb->io_registers[GB_IO_TAC] & 3])) { increase_tima(gb); } /* TODO: Can switching to double speed mode trigger an event? */ - if (counter_overflow_check(gb->div_counter, value, gb->cgb_double_speed? 0x4000 : 0x2000)) { + if (triggers & (gb->cgb_double_speed? 0x2000 : 0x1000)) { GB_apu_run(gb); GB_apu_div_event(gb); } @@ -221,13 +216,13 @@ void GB_emulate_timer_glitch(GB_gameboy_t *gb, uint8_t old_tac, uint8_t new_tac) /* Glitch only happens when old_tac is enabled. */ if (!(old_tac & 4)) return; - unsigned int old_clocks = GB_TAC_RATIOS[old_tac & 3]; - unsigned int new_clocks = GB_TAC_RATIOS[new_tac & 3]; + unsigned int old_clocks = GB_TAC_TRIGGER_BITS[old_tac & 3]; + unsigned int new_clocks = GB_TAC_TRIGGER_BITS[new_tac & 3]; /* The bit used for overflow testing must have been 1 */ - if (gb->div_counter & (old_clocks >> 1)) { + if (gb->div_counter & old_clocks) { /* And now either the timer must be disabled, or the new bit used for overflow testing be 0. */ - if (!(new_tac & 4) || gb->div_counter & (new_clocks >> 1)) { + if (!(new_tac & 4) || gb->div_counter & new_clocks) { increase_tima(gb); } } From 9d947c7ce639e93f42a86d1bf42135ff78f0375a Mon Sep 17 00:00:00 2001 From: Lior Halphon Date: Mon, 31 Dec 2018 23:09:56 +0200 Subject: [PATCH 3/4] Unroll some APU loops --- Core/apu.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Core/apu.c b/Core/apu.c index f9aabd7..127d34c 100644 --- a/Core/apu.c +++ b/Core/apu.c @@ -69,6 +69,7 @@ static void update_sample(GB_gameboy_t *gb, unsigned index, int8_t value, unsign static void render(GB_gameboy_t *gb, bool no_downsampling, GB_sample_t *dest) { GB_sample_t output = {0,0}; + #pragma unroll for (unsigned i = GB_N_CHANNELS; i--;) { double multiplier = CH_STEP; if (!is_DAC_enabled(gb, i)) { @@ -125,6 +126,7 @@ static void render(GB_gameboy_t *gb, bool no_downsampling, GB_sample_t *dest) unsigned mask = gb->io_registers[GB_IO_NR51]; unsigned left_volume = 0; unsigned right_volume = 0; + #pragma unroll for (unsigned i = GB_N_CHANNELS; i--;) { if (gb->apu.is_active[i]) { if (mask & 1) { @@ -372,6 +374,7 @@ void GB_apu_run(GB_gameboy_t *gb) } } + #pragma unroll for (unsigned i = GB_SQUARE_2 + 1; i--;) { if (gb->apu.is_active[i]) { uint8_t cycles_left = cycles; From 4051f190a5c611c76db4c353d25ac326799c4a43 Mon Sep 17 00:00:00 2001 From: Lior Halphon Date: Tue, 1 Jan 2019 00:42:40 +0200 Subject: [PATCH 4/4] Cache cycles_per_sample to avoid FP arithmetic --- Core/apu.c | 13 ++++++++++--- Core/apu.h | 2 ++ Core/gb.c | 3 +++ 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/Core/apu.c b/Core/apu.c index 127d34c..147ea63 100644 --- a/Core/apu.c +++ b/Core/apu.c @@ -457,10 +457,9 @@ void GB_apu_run(GB_gameboy_t *gb) if (gb->apu_output.sample_rate) { gb->apu_output.cycles_since_render += cycles; - double cycles_per_sample = 2 * GB_get_clock_rate(gb) / (double)gb->apu_output.sample_rate; /* 2 * because we use 8MHz units */ - if (gb->apu_output.sample_cycles > cycles_per_sample) { - gb->apu_output.sample_cycles -= cycles_per_sample; + if (gb->apu_output.sample_cycles > gb->apu_output.cycles_per_sample) { + gb->apu_output.sample_cycles -= gb->apu_output.cycles_per_sample; render(gb, false, NULL); } } @@ -976,9 +975,17 @@ void GB_set_sample_rate(GB_gameboy_t *gb, unsigned int sample_rate) if (sample_rate) { gb->apu_output.highpass_rate = pow(0.999958, GB_get_clock_rate(gb) / (double)sample_rate); } + GB_apu_update_cycles_per_sample(gb); } void GB_set_highpass_filter_mode(GB_gameboy_t *gb, GB_highpass_mode_t mode) { gb->apu_output.highpass_mode = mode; } + +void GB_apu_update_cycles_per_sample(GB_gameboy_t *gb) +{ + if (gb->apu_output.sample_rate) { + gb->apu_output.cycles_per_sample = 2 * GB_get_clock_rate(gb) / (double)gb->apu_output.sample_rate; /* 2 * because we use 8MHz units */ + } +} diff --git a/Core/apu.h b/Core/apu.h index ab42055..bfa3598 100644 --- a/Core/apu.h +++ b/Core/apu.h @@ -140,6 +140,7 @@ typedef struct { volatile bool lock; double sample_cycles; // In 8 MHz units + double cycles_per_sample; // Samples are NOT normalized to MAX_CH_AMP * 4 at this stage! unsigned cycles_since_render; @@ -164,6 +165,7 @@ uint8_t GB_apu_read(GB_gameboy_t *gb, uint8_t reg); void GB_apu_div_event(GB_gameboy_t *gb); void GB_apu_init(GB_gameboy_t *gb); void GB_apu_run(GB_gameboy_t *gb); +void GB_apu_update_cycles_per_sample(GB_gameboy_t *gb); #endif #endif /* apu_h */ diff --git a/Core/gb.c b/Core/gb.c index edc0a56..fc3bdc4 100644 --- a/Core/gb.c +++ b/Core/gb.c @@ -638,6 +638,8 @@ void GB_reset(GB_gameboy_t *gb) /* Todo: Ugly, fixme, see comment in the timer state machine */ gb->div_state = 3; + GB_apu_update_cycles_per_sample(gb); + gb->magic = (uintptr_t)'SAME'; } @@ -725,6 +727,7 @@ void *GB_get_direct_access(GB_gameboy_t *gb, GB_direct_access_t access, size_t * void GB_set_clock_multiplier(GB_gameboy_t *gb, double multiplier) { gb->clock_multiplier = multiplier; + GB_apu_update_cycles_per_sample(gb); } uint32_t GB_get_clock_rate(GB_gameboy_t *gb)