From 69de3f0fae260338caafb2130b8f7896cdcf6d25 Mon Sep 17 00:00:00 2001 From: Lior Halphon Date: Sun, 26 Dec 2021 01:47:59 +0200 Subject: [PATCH] Implement a PPU fast path, up to 34% performance boost --- Cocoa/Document.m | 5 +- Core/debugger.c | 2 + Core/display.c | 386 ++++++++++++++++++++++++++++++++++++++++++++++- Core/display.h | 3 +- Core/gb.c | 4 +- Core/gb.h | 4 +- Core/memory.c | 40 +++++ Core/timing.c | 2 +- Core/timing.h | 16 +- 9 files changed, 448 insertions(+), 14 deletions(-) diff --git a/Cocoa/Document.m b/Cocoa/Document.m index d539d7d..e6f2bd9 100644 --- a/Cocoa/Document.m +++ b/Cocoa/Document.m @@ -1413,6 +1413,7 @@ static unsigned *multiplication_table_for_frequency(unsigned frequency) - (IBAction) reloadVRAMData: (id) sender { if (self.vramWindow.isVisible) { + uint8_t *io_regs = GB_get_direct_access(&gb, GB_DIRECT_ACCESS_IO, NULL, NULL); switch ([self.vramTabView.tabViewItems indexOfObject:self.vramTabView.selectedTabViewItem]) { case 0: /* Tileset */ @@ -1451,8 +1452,8 @@ static unsigned *multiplication_table_for_frequency(unsigned frequency) (GB_map_type_t) self.tilemapMapButton.indexOfSelectedItem, (GB_tileset_type_t) self.TilemapSetButton.indexOfSelectedItem); - self.tilemapImageView.scrollRect = NSMakeRect(GB_read_memory(&gb, 0xFF00 | GB_IO_SCX), - GB_read_memory(&gb, 0xFF00 | GB_IO_SCY), + self.tilemapImageView.scrollRect = NSMakeRect(io_regs[GB_IO_SCX], + io_regs[GB_IO_SCY], 160, 144); self.tilemapImageView.image = [Document imageFromData:data width:256 height:256 scale:1.0]; self.tilemapImageView.layer.magnificationFilter = kCAFilterNearest; diff --git a/Core/debugger.c b/Core/debugger.c index 2088ebd..0abba51 100644 --- a/Core/debugger.c +++ b/Core/debugger.c @@ -2196,6 +2196,8 @@ bool GB_debugger_execute_command(GB_gameboy_t *gb, char *input) if (!input[0]) { return true; } + + GB_display_sync(gb); char *command_string = input; char *arguments = strchr(input, ' '); diff --git a/Core/display.c b/Core/display.c index c859c33..e25a6c2 100644 --- a/Core/display.c +++ b/Core/display.c @@ -861,11 +861,366 @@ static uint16_t get_object_line_address(GB_gameboy_t *gb, const object_t *object return line_address; } +static inline uint8_t flip(uint8_t x) +{ + x = (x & 0xF0) >> 4 | (x & 0x0F) << 4; + x = (x & 0xCC) >> 2 | (x & 0x33) << 2; + x = (x & 0xAA) >> 1 | (x & 0x55) << 1; + return x; +} + +static inline void get_tile_data(const GB_gameboy_t *gb, uint8_t tile_x, uint8_t y, uint16_t map, uint8_t *attributes, uint8_t *data0, uint8_t *data1) +{ + uint8_t current_tile = gb->vram[map + (tile_x & 0x1F) + y / 8 * 32]; + *attributes = GB_is_cgb(gb)? gb->vram[0x2000 + map + (tile_x & 0x1F) + y / 8 * 32] : 0; + + uint16_t tile_address = 0; + + /* Todo: Verified for DMG (Tested: SGB2), CGB timing is wrong. */ + if (gb->io_registers[GB_IO_LCDC] & 0x10) { + tile_address = current_tile * 0x10; + } + else { + tile_address = (int8_t)current_tile * 0x10 + 0x1000; + } + if (*attributes & 8) { + tile_address += 0x2000; + } + uint8_t y_flip = 0; + if (*attributes & 0x40) { + y_flip = 0x7; + } + + *data0 = gb->vram[tile_address + ((y & 7) ^ y_flip) * 2]; + *data1 = gb->vram[tile_address + ((y & 7) ^ y_flip) * 2 + 1]; + + if (*attributes & 0x20) { + *data0 = flip(*data0); + *data1 = flip(*data1); + } + +} + +static void render_line(GB_gameboy_t *gb) +{ + if (gb->disable_rendering) return; + if (!gb->screen) return; + if (gb->current_line > 144) return; // Corrupt save state + + struct { + unsigned pixel:2; // Color, 0-3 + unsigned priority:6; // Object priority – 0 in DMG, OAM index in CGB + unsigned palette:3; // Palette, 0 - 7 (CGB); 0-1 in DMG (or just 0 for BG) + bool bg_priority:1; // BG priority bit + } object_buffer[160 + 16]; // allocate extra to avoid per pixel checks + memset(object_buffer, 0, sizeof(object_buffer)); + + if (gb->n_visible_objs && !gb->objects_disabled && (gb->io_registers[GB_IO_LCDC] & 2)) { + object_t *objects = (object_t *) &gb->oam; + + while (gb->n_visible_objs) { + unsigned object_index = gb->visible_objs[gb->n_visible_objs - 1]; + unsigned priority = gb->object_priority == GB_OBJECT_PRIORITY_X? 0 : object_index; + const object_t *object = &objects[object_index]; + gb->n_visible_objs--; + + uint16_t line_address = get_object_line_address(gb, object); + uint8_t data0 = gb->vram[line_address]; + uint8_t data1 = gb->vram[line_address + 1]; + if (object->flags & 0x20) { + data0 = flip(data0); + data1 = flip(data1); + } + + typeof(object_buffer[0]) *p = object_buffer + object->x; + if (object->x >= 168) { + continue; + } + unrolled for (unsigned x = 0; x < 8; x++) { + unsigned pixel = (data0 >> 7) | ((data1 >> 7) << 1); + data0 <<= 1; + data1 <<= 1; + if (pixel && (!p->pixel || priority < p->priority)) { + p->pixel = pixel; + p->priority = priority; + + if (gb->cgb_mode) { + p->palette = object->flags & 0x7; + } + else { + p->palette = (object->flags & 0x10) >> 4; + } + p->bg_priority = object->flags & 0x80; + } + p++; + } + } + } + + + uint32_t *restrict p = gb->screen; + typeof(object_buffer[0]) *object_buffer_pointer = object_buffer + 8; + if (gb->border_mode == GB_BORDER_ALWAYS) { + p += (BORDERED_WIDTH - (WIDTH)) / 2 + BORDERED_WIDTH * (BORDERED_HEIGHT - LINES) / 2; + p += BORDERED_WIDTH * gb->current_line; + } + else { + p += WIDTH * gb->current_line; + } + + if (unlikely(gb->background_disabled) || (!gb->cgb_mode && !(gb->io_registers[GB_IO_LCDC] & 1))) { + uint32_t bg = gb->background_palettes_rgb[gb->cgb_mode? 0 : (gb->io_registers[GB_IO_BGP] & 3)]; + for (unsigned i = 160; i--;) { + if (unlikely(object_buffer_pointer->pixel)) { + uint8_t pixel = object_buffer_pointer->pixel; + if (!gb->cgb_mode) { + pixel = ((gb->io_registers[GB_IO_OBP0 + object_buffer_pointer->palette] >> (pixel << 1)) & 3); + } + *(p++) = gb->object_palettes_rgb[pixel + (object_buffer_pointer->palette & 7) * 4]; + } + else { + *(p++) = bg; + } + object_buffer_pointer++; + } + return; + } + + unsigned pixels = 0; + uint8_t tile_x = gb->io_registers[GB_IO_SCX] / 8; + unsigned fractional_scroll = gb->io_registers[GB_IO_SCX] & 7; + uint16_t map = 0x1800; + if (gb->io_registers[GB_IO_LCDC] & 0x08) { + map = 0x1C00; + } + uint8_t y = gb->current_line + gb->io_registers[GB_IO_SCY]; + uint8_t attributes; + uint8_t data0, data1; + get_tile_data(gb, tile_x, y, map, &attributes, &data0, &data1); + +#define DO_PIXEL() \ +uint8_t pixel = (data0 >> 7) | ((data1 >> 7) << 1);\ +data0 <<= 1;\ +data1 <<= 1;\ +\ +if (unlikely(object_buffer_pointer->pixel) && (pixel == 0 || !(object_buffer_pointer->bg_priority || (attributes & 0x80)) || !(gb->io_registers[GB_IO_LCDC] & 1))) {\ + pixel = object_buffer_pointer->pixel;\ + if (!gb->cgb_mode) {\ + pixel = ((gb->io_registers[GB_IO_OBP0 + object_buffer_pointer->palette] >> (pixel << 1)) & 3);\ + }\ + *(p++) = gb->object_palettes_rgb[pixel + (object_buffer_pointer->palette & 7) * 4];\ +}\ +else {\ + if (!gb->cgb_mode) {\ + pixel = ((gb->io_registers[GB_IO_BGP] >> (pixel << 1)) & 3);\ + }\ + *(p++) = gb->background_palettes_rgb[pixel + (attributes & 7) * 4];\ +}\ +pixels++;\ +object_buffer_pointer++\ + + // First 1-8 pixels + data0 <<= fractional_scroll; + data1 <<= fractional_scroll; + bool check_window = gb->wy_triggered && (gb->io_registers[GB_IO_LCDC] & 0x20); + for (unsigned i = fractional_scroll; i < 8; i++) { + if (check_window && gb->io_registers[GB_IO_WX] == pixels + 7) { +activate_window: + check_window = false; + map = gb->io_registers[GB_IO_LCDC] & 0x40? 0x1C00 : 0x1800; + tile_x = -1; + y = ++gb->window_y; + break; + } + DO_PIXEL(); + } + tile_x++; + + while (pixels < 160 - 8) { + get_tile_data(gb, tile_x, y, map, &attributes, &data0, &data1); + for (unsigned i = 0; i < 8; i++) { + if (check_window && gb->io_registers[GB_IO_WX] == pixels + 7) { + goto activate_window; + } + DO_PIXEL(); + } + tile_x++; + } + + gb->fetcher_state = (160 - pixels) & 7; + get_tile_data(gb, tile_x, y, map, &attributes, &data0, &data1); + while (pixels < 160) { + if (check_window && gb->io_registers[GB_IO_WX] == pixels + 7) { + goto activate_window; + } + DO_PIXEL(); + } + tile_x++; + + get_tile_data(gb, tile_x, y, map, &attributes, gb->current_tile_data, gb->current_tile_data + 1); +#undef DO_PIXEL +} + +static void render_line_sgb(GB_gameboy_t *gb) +{ + if (gb->current_line > 144) return; // Corrupt save state + + struct { + unsigned pixel:2; // Color, 0-3 + unsigned palette:1; // Palette, 0 - 7 (CGB); 0-1 in DMG (or just 0 for BG) + bool bg_priority:1; // BG priority bit + } object_buffer[160 + 16]; // allocate extra to avoid per pixel checks + memset(object_buffer, 0, sizeof(object_buffer)); + + if (gb->n_visible_objs && !gb->objects_disabled && (gb->io_registers[GB_IO_LCDC] & 2)) { + object_t *objects = (object_t *) &gb->oam; + + while (gb->n_visible_objs) { + const object_t *object = &objects[gb->visible_objs[gb->n_visible_objs - 1]]; + gb->n_visible_objs--; + + uint16_t line_address = get_object_line_address(gb, object); + uint8_t data0 = gb->vram[line_address]; + uint8_t data1 = gb->vram[line_address + 1]; + if (object->flags & 0x20) { + data0 = flip(data0); + data1 = flip(data1); + } + + typeof(object_buffer[0]) *p = object_buffer + object->x; + if (object->x >= 168) { + continue; + } + unrolled for (unsigned x = 0; x < 8; x++) { + unsigned pixel = (data0 >> 7) | ((data1 >> 7) << 1); + data0 <<= 1; + data1 <<= 1; + if (!p->pixel) { + p->pixel = pixel; + p->palette = (object->flags & 0x10) >> 4; + p->bg_priority = object->flags & 0x80; + } + p++; + } + } + } + + + uint8_t *restrict p = gb->sgb->screen_buffer; + typeof(object_buffer[0]) *object_buffer_pointer = object_buffer + 8; + p += WIDTH * gb->current_line; + + if (unlikely(gb->background_disabled) || (!gb->cgb_mode && !(gb->io_registers[GB_IO_LCDC] & 1))) { + for (unsigned i = 160; i--;) { + if (unlikely(object_buffer_pointer->pixel)) { + uint8_t pixel = object_buffer_pointer->pixel; + pixel = ((gb->io_registers[GB_IO_OBP0 + object_buffer_pointer->palette] >> (pixel << 1)) & 3); + *(p++) = pixel; + } + else { + *(p++) = gb->io_registers[GB_IO_BGP] & 3; + } + object_buffer_pointer++; + } + return; + } + + unsigned pixels = 0; + uint8_t tile_x = gb->io_registers[GB_IO_SCX] / 8; + unsigned fractional_scroll = gb->io_registers[GB_IO_SCX] & 7; + uint16_t map = 0x1800; + if (gb->io_registers[GB_IO_LCDC] & 0x08) { + map = 0x1C00; + } + uint8_t y = gb->current_line + gb->io_registers[GB_IO_SCY]; + uint8_t attributes; + uint8_t data0, data1; + get_tile_data(gb, tile_x, y, map, &attributes, &data0, &data1); + +#define DO_PIXEL() \ +uint8_t pixel = (data0 >> 7) | ((data1 >> 7) << 1);\ +data0 <<= 1;\ +data1 <<= 1;\ +\ +if (unlikely(object_buffer_pointer->pixel) && (pixel == 0 || !object_buffer_pointer->bg_priority || !(gb->io_registers[GB_IO_LCDC] & 1))) {\ + pixel = object_buffer_pointer->pixel;\ + pixel = ((gb->io_registers[GB_IO_OBP0 + object_buffer_pointer->palette] >> (pixel << 1)) & 3);\ + *(p++) = pixel;\ +}\ +else {\ + pixel = ((gb->io_registers[GB_IO_BGP] >> (pixel << 1)) & 3);\ + *(p++) = pixel;\ +}\ +pixels++;\ +object_buffer_pointer++\ + + // First 1-8 pixels + data0 <<= fractional_scroll; + data1 <<= fractional_scroll; + bool check_window = gb->wy_triggered && (gb->io_registers[GB_IO_LCDC] & 0x20); + for (unsigned i = fractional_scroll; i < 8; i++) { + if (check_window && gb->io_registers[GB_IO_WX] == pixels + 7) { + activate_window: + check_window = false; + map = gb->io_registers[GB_IO_LCDC] & 0x40? 0x1C00 : 0x1800; + tile_x = -1; + y = ++gb->window_y; + break; + } + DO_PIXEL(); + } + tile_x++; + + while (pixels < 160 - 8) { + get_tile_data(gb, tile_x, y, map, &attributes, &data0, &data1); + for (unsigned i = 0; i < 8; i++) { + if (check_window && gb->io_registers[GB_IO_WX] == pixels + 7) { + goto activate_window; + } + DO_PIXEL(); + } + tile_x++; + } + + get_tile_data(gb, tile_x, y, map, &attributes, &data0, &data1); + while (pixels < 160) { + if (check_window && gb->io_registers[GB_IO_WX] == pixels + 7) { + goto activate_window; + } + DO_PIXEL(); + } +} + +static inline uint16_t mode3_batching_length(GB_gameboy_t *gb) +{ + if (gb->model & GB_MODEL_NO_SFC_BIT) return 0; + if (gb->hdma_on) return 0; + if (gb->dma_steps_left) return 0; + if (gb->wy_triggered && (gb->io_registers[GB_IO_LCDC] & 0x20) && (gb->io_registers[GB_IO_WX] < 8 || gb->io_registers[GB_IO_WX] == 166)) { + return 0; + } + + // No objects or window, timing is trivial + if (gb->n_visible_objs == 0 && !(gb->wy_triggered && (gb->io_registers[GB_IO_LCDC] & 0x20))) return 167 + (gb->io_registers[GB_IO_SCX] & 7); + + if (gb->hdma_on_hblank) return 0; + + // 300 is a bit more than the maximum Mode 3 length + + // No HBlank interrupt + if (!(gb->io_registers[GB_IO_STAT] & 0x8)) return 300; + // No STAT interrupt requested + if (!(gb->interrupt_enable & 2)) return 300; + + + return 0; +} + /* TODO: It seems that the STAT register's mode bits are always "late" by 4 T-cycles. The PPU logic can be greatly simplified if that delay is simply emulated. */ -void GB_display_run(GB_gameboy_t *gb, uint8_t cycles) +void GB_display_run(GB_gameboy_t *gb, unsigned cycles, bool force) { gb->cycles_since_vblank_callback += cycles / 2; @@ -878,12 +1233,12 @@ void GB_display_run(GB_gameboy_t *gb, uint8_t cycles) } object_t *objects = (object_t *) &gb->oam; - GB_STATE_MACHINE(gb, display, cycles, 2) { + GB_BATCHABLE_STATE_MACHINE(gb, display, cycles, 2, !force) { GB_STATE(gb, display, 1); GB_STATE(gb, display, 2); - // GB_STATE(gb, display, 3); - // GB_STATE(gb, display, 4); - // GB_STATE(gb, display, 5); + GB_STATE(gb, display, 3); + GB_STATE(gb, display, 4); + GB_STATE(gb, display, 5); GB_STATE(gb, display, 6); GB_STATE(gb, display, 7); GB_STATE(gb, display, 8); @@ -1031,6 +1386,9 @@ void GB_display_run(GB_gameboy_t *gb, uint8_t cycles) GB_STAT_update(gb); gb->n_visible_objs = 0; + if (!gb->dma_steps_left && !gb->oam_ppu_blocked) { + GB_BATCHPOINT(gb, display, 5, 80); + } for (gb->oam_search_index = 0; gb->oam_search_index < 40; gb->oam_search_index++) { if (GB_is_cgb(gb)) { add_object_from_index(gb, gb->oam_search_index); @@ -1046,7 +1404,6 @@ void GB_display_run(GB_gameboy_t *gb, uint8_t cycles) gb->vram_write_blocked = false; gb->cgb_palettes_blocked = false; gb->oam_write_blocked = GB_is_cgb(gb); - GB_STAT_update(gb); } } gb->cycles_for_line = MODE2_LENGTH + 4; @@ -1093,6 +1450,22 @@ void GB_display_run(GB_gameboy_t *gb, uint8_t cycles) /* The actual rendering cycle */ gb->fetcher_state = 0; + if ((gb->mode3_batching_length = mode3_batching_length(gb))) { + GB_BATCHPOINT(gb, display, 3, gb->mode3_batching_length); + if (GB_BATCHED_CYCLES(gb, display) >= gb->mode3_batching_length) { + // Successfully batched! + gb->lcd_x = gb->position_in_line = 160; + gb->cycles_for_line += gb->mode3_batching_length; + if (gb->sgb) { + render_line_sgb(gb); + } + else { + render_line(gb); + } + GB_SLEEP(gb, display, 4, gb->mode3_batching_length); + goto skip_slow_mode_3; + } + } while (true) { /* Handle window */ /* TODO: It appears that WX checks if the window begins *next* pixel, not *this* pixel. For this reason, @@ -1255,6 +1628,7 @@ abort_fetching_object: gb->cycles_for_line++; GB_SLEEP(gb, display, 21, 1); } +skip_slow_mode_3: /* TODO: Verify */ if (gb->fetcher_state == 4 || gb->fetcher_state == 5) { diff --git a/Core/display.h b/Core/display.h index 04b85b3..d50dc18 100644 --- a/Core/display.h +++ b/Core/display.h @@ -6,11 +6,12 @@ #include #ifdef GB_INTERNAL -internal void GB_display_run(GB_gameboy_t *gb, uint8_t cycles); +internal void GB_display_run(GB_gameboy_t *gb, unsigned cycles, bool force); internal void GB_palette_changed(GB_gameboy_t *gb, bool background_palette, uint8_t index); internal void GB_STAT_update(GB_gameboy_t *gb); internal void GB_lcd_off(GB_gameboy_t *gb); internal void GB_display_vblank(GB_gameboy_t *gb); +#define GB_display_sync(gb) GB_display_run(gb, 0, true) enum { GB_OBJECT_PRIORITY_X, diff --git a/Core/gb.c b/Core/gb.c index 2b95d55..0279eef 100644 --- a/Core/gb.c +++ b/Core/gb.c @@ -1139,7 +1139,7 @@ uint8_t GB_run(GB_gameboy_t *gb) we just halt the CPU (with hacky code) until the correct time. This ensures the Nintendo logo doesn't flash on screen, and the game does "run in background" while the animation is playing. */ - GB_display_run(gb, 228); + GB_display_run(gb, 228, true); gb->cycles_since_last_sync += 228; return 228; } @@ -1327,7 +1327,7 @@ bool GB_is_inited(GB_gameboy_t *gb) return gb->magic == state_magic(); } -bool GB_is_cgb(GB_gameboy_t *gb) +bool GB_is_cgb(const GB_gameboy_t *gb) { return gb->model >= GB_MODEL_CGB_0; } diff --git a/Core/gb.h b/Core/gb.h index 4592ec9..54fa0f3 100644 --- a/Core/gb.h +++ b/Core/gb.h @@ -540,6 +540,8 @@ struct GB_gameboy_internal_s { /* For timing of the vblank callback */ uint32_t cycles_since_vblank_callback; bool lcd_disabled_outside_of_vblank; + int32_t allowed_pending_cycles; + uint16_t mode3_batching_length; ); /* APU */ @@ -796,7 +798,7 @@ __attribute__((__format__ (__printf__, fmtarg, firstvararg))) void GB_init(GB_gameboy_t *gb, GB_model_t model); bool GB_is_inited(GB_gameboy_t *gb); -bool GB_is_cgb(GB_gameboy_t *gb); +bool GB_is_cgb(const GB_gameboy_t *gb); bool GB_is_cgb_in_cgb_mode(GB_gameboy_t *gb); bool GB_is_sgb(GB_gameboy_t *gb); // Returns true if the model is SGB or SGB2 bool GB_is_hle_sgb(GB_gameboy_t *gb); // Returns true if the model is SGB or SGB2 and the SFC/SNES side is HLE'd diff --git a/Core/memory.c b/Core/memory.c index ea9152f..cd76d57 100644 --- a/Core/memory.c +++ b/Core/memory.c @@ -98,6 +98,7 @@ void GB_trigger_oam_bug(GB_gameboy_t *gb, uint16_t address) if (GB_is_cgb(gb)) return; if (address >= 0xFE00 && address < 0xFF00) { + GB_display_sync(gb); if (gb->accessed_oam_row != 0xff && gb->accessed_oam_row >= 8) { uint16_t *base = (uint16_t *)(gb->oam + gb->accessed_oam_row); base[0] = bitwise_glitch(base[0], @@ -283,6 +284,7 @@ static uint8_t read_mbc_rom(GB_gameboy_t *gb, uint16_t addr) static uint8_t read_vram(GB_gameboy_t *gb, uint16_t addr) { + GB_display_sync(gb); if (gb->vram_read_blocked) { return 0xFF; } @@ -421,6 +423,37 @@ static uint8_t read_banked_ram(GB_gameboy_t *gb, uint16_t addr) return gb->ram[(addr & 0x0FFF) + gb->cgb_ram_bank * 0x1000]; } +static inline void sync_ppu_if_needed(GB_gameboy_t *gb, uint8_t register_accessed) +{ + switch (register_accessed) { + case GB_IO_IF: + case GB_IO_LCDC: + case GB_IO_STAT: + case GB_IO_SCY: + case GB_IO_SCX: + case GB_IO_LY: + case GB_IO_LYC: + case GB_IO_DMA: + case GB_IO_BGP: + case GB_IO_OBP0: + case GB_IO_OBP1: + case GB_IO_WY: + case GB_IO_WX: + case GB_IO_HDMA1: + case GB_IO_HDMA2: + case GB_IO_HDMA3: + case GB_IO_HDMA4: + case GB_IO_HDMA5: + case GB_IO_BGPI: + case GB_IO_BGPD: + case GB_IO_OBPI: + case GB_IO_OBPD: + case GB_IO_OPRI: + GB_display_sync(gb); + break; + } +} + static uint8_t read_high_memory(GB_gameboy_t *gb, uint16_t addr) { @@ -433,6 +466,7 @@ static uint8_t read_high_memory(GB_gameboy_t *gb, uint16_t addr) } if (addr < 0xFF00) { + GB_display_sync(gb); if (gb->oam_write_blocked && !GB_is_cgb(gb)) { if (!gb->disable_oam_corruption) { GB_trigger_oam_bug_read(gb, addr); @@ -548,6 +582,7 @@ static uint8_t read_high_memory(GB_gameboy_t *gb, uint16_t addr) } if (addr < 0xFF80) { + sync_ppu_if_needed(gb, addr); switch (addr & 0xFF) { case GB_IO_IF: return gb->io_registers[GB_IO_IF] | 0xE0; @@ -846,6 +881,7 @@ static void write_mbc(GB_gameboy_t *gb, uint16_t addr, uint8_t value) static void write_vram(GB_gameboy_t *gb, uint16_t addr, uint8_t value) { + GB_display_sync(gb); if (gb->vram_write_blocked) { //GB_log(gb, "Wrote %02x to %04x (VRAM) during mode 3\n", value, addr); return; @@ -1155,6 +1191,7 @@ static void write_high_memory(GB_gameboy_t *gb, uint16_t addr, uint8_t value) } if (addr < 0xFF00) { + GB_display_sync(gb); if (gb->oam_write_blocked) { GB_trigger_oam_bug(gb, addr); return; @@ -1233,6 +1270,8 @@ static void write_high_memory(GB_gameboy_t *gb, uint16_t addr, uint8_t value) /* Todo: Clean this code up: use a function table and move relevant code to display.c and timing.c (APU read and writes are already at apu.c) */ if (addr < 0xFF80) { + sync_ppu_if_needed(gb, addr); + /* Hardware registers */ switch (addr & 0xFF) { case GB_IO_WY: @@ -1563,6 +1602,7 @@ static void write_high_memory(GB_gameboy_t *gb, uint16_t addr, uint8_t value) } if (addr == 0xFFFF) { + GB_display_sync(gb); /* Interrupt mask */ gb->interrupt_enable = value; return; diff --git a/Core/timing.c b/Core/timing.c index f8ffd80..3cf62bc 100644 --- a/Core/timing.c +++ b/Core/timing.c @@ -435,7 +435,7 @@ void GB_advance_cycles(GB_gameboy_t *gb, uint8_t cycles) GB_hdma_run(gb); } GB_apu_run(gb); - GB_display_run(gb, cycles); + GB_display_run(gb, cycles, false); ir_run(gb, cycles); rtc_run(gb, cycles); } diff --git a/Core/timing.h b/Core/timing.h index 96b2082..ee817d1 100644 --- a/Core/timing.h +++ b/Core/timing.h @@ -28,13 +28,23 @@ enum { #define GB_SLEEP(gb, unit, state, cycles) do {\ (gb)->unit##_cycles -= (cycles) * __state_machine_divisor; \ - if ((gb)->unit##_cycles <= 0) {\ + if (unlikely((gb)->unit##_cycles <= 0)) {\ (gb)->unit##_state = state;\ return;\ unit##state:; \ }\ } while (0) +#define GB_BATCHPOINT(gb, unit, state, cycles) do {\ +unit##state:; \ +if (likely(__state_machine_allow_batching && (gb)->unit##_cycles < (cycles * 2))) {\ + (gb)->unit##_state = state;\ + return;\ +}\ +} while (0) + +#define GB_BATCHED_CYCLES(gb, unit) ((gb)->unit##_cycles / __state_machine_divisor) + #define GB_STATE_MACHINE(gb, unit, cycles, divisor) \ static const int __state_machine_divisor = divisor;\ (gb)->unit##_cycles += cycles; \ @@ -44,6 +54,10 @@ if ((gb)->unit##_cycles <= 0) {\ switch ((gb)->unit##_state) #endif +#define GB_BATCHABLE_STATE_MACHINE(gb, unit, cycles, divisor, allow_batching) \ +const bool __state_machine_allow_batching = (allow_batching); \ +GB_STATE_MACHINE(gb, unit, cycles, divisor) + #define GB_STATE(gb, unit, state) case state: goto unit##state #define GB_UNIT(unit) int32_t unit##_cycles, unit##_state