From af0430dbc5652175d2433c4c80920f3407518a85 Mon Sep 17 00:00:00 2001 From: Lior Halphon Date: Mon, 31 Dec 2018 22:06:20 +0200 Subject: [PATCH] Unroll some loops in PPU code, more efficient timer handling --- Core/display.c | 4 ++++ Core/timing.c | 21 ++++++++------------- 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/Core/display.c b/Core/display.c index 2759146..aae3a47 100644 --- a/Core/display.c +++ b/Core/display.c @@ -27,6 +27,7 @@ static GB_fifo_item_t *fifo_pop(GB_fifo_t *fifo) static void fifo_push_bg_row(GB_fifo_t *fifo, uint8_t lower, uint8_t upper, uint8_t palette, bool bg_priority, bool flip_x) { if (!flip_x) { + #pragma unroll for (unsigned i = 8; i--;) { fifo->fifo[fifo->write_end] = (GB_fifo_item_t) { (lower >> 7) | ((upper >> 7) << 1), @@ -42,6 +43,7 @@ static void fifo_push_bg_row(GB_fifo_t *fifo, uint8_t lower, uint8_t upper, uint } } else { + #pragma unroll for (unsigned i = 8; i--;) { fifo->fifo[fifo->write_end] = (GB_fifo_item_t) { (lower & 1) | ((upper & 1) << 1), @@ -68,6 +70,7 @@ static void fifo_overlay_object_row(GB_fifo_t *fifo, uint8_t lower, uint8_t uppe uint8_t flip_xor = flip_x? 0: 0x7; + #pragma unroll for (unsigned i = 8; i--;) { uint8_t pixel = (lower >> 7) | ((upper >> 7) << 1); GB_fifo_item_t *target = &fifo->fifo[(fifo->read_end + (i ^ flip_xor)) & (GB_FIFO_LENGTH - 1)]; @@ -1063,6 +1066,7 @@ uint8_t GB_get_oam_info(GB_gameboy_t *gb, GB_oam_info_t *dest, uint8_t *sprite_h } for (unsigned y = 0; y < *sprite_height; y++) { + #pragma unroll for (unsigned x = 0; x < 8; x++) { uint8_t color = (((gb->vram[vram_address ] >> ((~x)&7)) & 1 ) | ((gb->vram[vram_address + 1] >> ((~x)&7)) & 1) << 1 ); diff --git a/Core/timing.c b/Core/timing.c index 65382e4..3ab4c30 100644 --- a/Core/timing.c +++ b/Core/timing.c @@ -8,7 +8,7 @@ #include #endif -static const unsigned int GB_TAC_RATIOS[] = {1024, 16, 64, 256}; +static const unsigned int GB_TAC_TRIGGER_BITS[] = {512, 8, 32, 128}; #ifndef DISABLE_TIMEKEEPING static int64_t get_nanoseconds(void) @@ -108,11 +108,6 @@ static void advance_tima_state_machine(GB_gameboy_t *gb) } } -static bool counter_overflow_check(uint32_t old, uint32_t new, uint32_t max) -{ - return (old & (max >> 1)) && !(new & (max >> 1)); -} - static void increase_tima(GB_gameboy_t *gb) { gb->io_registers[GB_IO_TIMA]++; @@ -126,13 +121,13 @@ static void GB_set_internal_div_counter(GB_gameboy_t *gb, uint32_t value) { /* TIMA increases when a specific high-bit becomes a low-bit. */ value &= INTERNAL_DIV_CYCLES - 1; - if ((gb->io_registers[GB_IO_TAC] & 4) && - counter_overflow_check(gb->div_counter, value, GB_TAC_RATIOS[gb->io_registers[GB_IO_TAC] & 3])) { + uint32_t triggers = gb->div_counter & ~value; + if ((gb->io_registers[GB_IO_TAC] & 4) && (triggers & GB_TAC_TRIGGER_BITS[gb->io_registers[GB_IO_TAC] & 3])) { increase_tima(gb); } /* TODO: Can switching to double speed mode trigger an event? */ - if (counter_overflow_check(gb->div_counter, value, gb->cgb_double_speed? 0x4000 : 0x2000)) { + if (triggers & (gb->cgb_double_speed? 0x2000 : 0x1000)) { GB_apu_run(gb); GB_apu_div_event(gb); } @@ -221,13 +216,13 @@ void GB_emulate_timer_glitch(GB_gameboy_t *gb, uint8_t old_tac, uint8_t new_tac) /* Glitch only happens when old_tac is enabled. */ if (!(old_tac & 4)) return; - unsigned int old_clocks = GB_TAC_RATIOS[old_tac & 3]; - unsigned int new_clocks = GB_TAC_RATIOS[new_tac & 3]; + unsigned int old_clocks = GB_TAC_TRIGGER_BITS[old_tac & 3]; + unsigned int new_clocks = GB_TAC_TRIGGER_BITS[new_tac & 3]; /* The bit used for overflow testing must have been 1 */ - if (gb->div_counter & (old_clocks >> 1)) { + if (gb->div_counter & old_clocks) { /* And now either the timer must be disabled, or the new bit used for overflow testing be 0. */ - if (!(new_tac & 4) || gb->div_counter & (new_clocks >> 1)) { + if (!(new_tac & 4) || gb->div_counter & new_clocks) { increase_tima(gb); } }