Implement a PPU fast path, up to 34% performance boost
This commit is contained in:
parent
f3277ab8d3
commit
69de3f0fae
@ -1413,6 +1413,7 @@ static unsigned *multiplication_table_for_frequency(unsigned frequency)
|
|||||||
- (IBAction) reloadVRAMData: (id) sender
|
- (IBAction) reloadVRAMData: (id) sender
|
||||||
{
|
{
|
||||||
if (self.vramWindow.isVisible) {
|
if (self.vramWindow.isVisible) {
|
||||||
|
uint8_t *io_regs = GB_get_direct_access(&gb, GB_DIRECT_ACCESS_IO, NULL, NULL);
|
||||||
switch ([self.vramTabView.tabViewItems indexOfObject:self.vramTabView.selectedTabViewItem]) {
|
switch ([self.vramTabView.tabViewItems indexOfObject:self.vramTabView.selectedTabViewItem]) {
|
||||||
case 0:
|
case 0:
|
||||||
/* Tileset */
|
/* Tileset */
|
||||||
@ -1451,8 +1452,8 @@ static unsigned *multiplication_table_for_frequency(unsigned frequency)
|
|||||||
(GB_map_type_t) self.tilemapMapButton.indexOfSelectedItem,
|
(GB_map_type_t) self.tilemapMapButton.indexOfSelectedItem,
|
||||||
(GB_tileset_type_t) self.TilemapSetButton.indexOfSelectedItem);
|
(GB_tileset_type_t) self.TilemapSetButton.indexOfSelectedItem);
|
||||||
|
|
||||||
self.tilemapImageView.scrollRect = NSMakeRect(GB_read_memory(&gb, 0xFF00 | GB_IO_SCX),
|
self.tilemapImageView.scrollRect = NSMakeRect(io_regs[GB_IO_SCX],
|
||||||
GB_read_memory(&gb, 0xFF00 | GB_IO_SCY),
|
io_regs[GB_IO_SCY],
|
||||||
160, 144);
|
160, 144);
|
||||||
self.tilemapImageView.image = [Document imageFromData:data width:256 height:256 scale:1.0];
|
self.tilemapImageView.image = [Document imageFromData:data width:256 height:256 scale:1.0];
|
||||||
self.tilemapImageView.layer.magnificationFilter = kCAFilterNearest;
|
self.tilemapImageView.layer.magnificationFilter = kCAFilterNearest;
|
||||||
|
@ -2196,6 +2196,8 @@ bool GB_debugger_execute_command(GB_gameboy_t *gb, char *input)
|
|||||||
if (!input[0]) {
|
if (!input[0]) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
GB_display_sync(gb);
|
||||||
|
|
||||||
char *command_string = input;
|
char *command_string = input;
|
||||||
char *arguments = strchr(input, ' ');
|
char *arguments = strchr(input, ' ');
|
||||||
|
386
Core/display.c
386
Core/display.c
@ -861,11 +861,366 @@ static uint16_t get_object_line_address(GB_gameboy_t *gb, const object_t *object
|
|||||||
return line_address;
|
return line_address;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline uint8_t flip(uint8_t x)
|
||||||
|
{
|
||||||
|
x = (x & 0xF0) >> 4 | (x & 0x0F) << 4;
|
||||||
|
x = (x & 0xCC) >> 2 | (x & 0x33) << 2;
|
||||||
|
x = (x & 0xAA) >> 1 | (x & 0x55) << 1;
|
||||||
|
return x;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void get_tile_data(const GB_gameboy_t *gb, uint8_t tile_x, uint8_t y, uint16_t map, uint8_t *attributes, uint8_t *data0, uint8_t *data1)
|
||||||
|
{
|
||||||
|
uint8_t current_tile = gb->vram[map + (tile_x & 0x1F) + y / 8 * 32];
|
||||||
|
*attributes = GB_is_cgb(gb)? gb->vram[0x2000 + map + (tile_x & 0x1F) + y / 8 * 32] : 0;
|
||||||
|
|
||||||
|
uint16_t tile_address = 0;
|
||||||
|
|
||||||
|
/* Todo: Verified for DMG (Tested: SGB2), CGB timing is wrong. */
|
||||||
|
if (gb->io_registers[GB_IO_LCDC] & 0x10) {
|
||||||
|
tile_address = current_tile * 0x10;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
tile_address = (int8_t)current_tile * 0x10 + 0x1000;
|
||||||
|
}
|
||||||
|
if (*attributes & 8) {
|
||||||
|
tile_address += 0x2000;
|
||||||
|
}
|
||||||
|
uint8_t y_flip = 0;
|
||||||
|
if (*attributes & 0x40) {
|
||||||
|
y_flip = 0x7;
|
||||||
|
}
|
||||||
|
|
||||||
|
*data0 = gb->vram[tile_address + ((y & 7) ^ y_flip) * 2];
|
||||||
|
*data1 = gb->vram[tile_address + ((y & 7) ^ y_flip) * 2 + 1];
|
||||||
|
|
||||||
|
if (*attributes & 0x20) {
|
||||||
|
*data0 = flip(*data0);
|
||||||
|
*data1 = flip(*data1);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
static void render_line(GB_gameboy_t *gb)
|
||||||
|
{
|
||||||
|
if (gb->disable_rendering) return;
|
||||||
|
if (!gb->screen) return;
|
||||||
|
if (gb->current_line > 144) return; // Corrupt save state
|
||||||
|
|
||||||
|
struct {
|
||||||
|
unsigned pixel:2; // Color, 0-3
|
||||||
|
unsigned priority:6; // Object priority – 0 in DMG, OAM index in CGB
|
||||||
|
unsigned palette:3; // Palette, 0 - 7 (CGB); 0-1 in DMG (or just 0 for BG)
|
||||||
|
bool bg_priority:1; // BG priority bit
|
||||||
|
} object_buffer[160 + 16]; // allocate extra to avoid per pixel checks
|
||||||
|
memset(object_buffer, 0, sizeof(object_buffer));
|
||||||
|
|
||||||
|
if (gb->n_visible_objs && !gb->objects_disabled && (gb->io_registers[GB_IO_LCDC] & 2)) {
|
||||||
|
object_t *objects = (object_t *) &gb->oam;
|
||||||
|
|
||||||
|
while (gb->n_visible_objs) {
|
||||||
|
unsigned object_index = gb->visible_objs[gb->n_visible_objs - 1];
|
||||||
|
unsigned priority = gb->object_priority == GB_OBJECT_PRIORITY_X? 0 : object_index;
|
||||||
|
const object_t *object = &objects[object_index];
|
||||||
|
gb->n_visible_objs--;
|
||||||
|
|
||||||
|
uint16_t line_address = get_object_line_address(gb, object);
|
||||||
|
uint8_t data0 = gb->vram[line_address];
|
||||||
|
uint8_t data1 = gb->vram[line_address + 1];
|
||||||
|
if (object->flags & 0x20) {
|
||||||
|
data0 = flip(data0);
|
||||||
|
data1 = flip(data1);
|
||||||
|
}
|
||||||
|
|
||||||
|
typeof(object_buffer[0]) *p = object_buffer + object->x;
|
||||||
|
if (object->x >= 168) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
unrolled for (unsigned x = 0; x < 8; x++) {
|
||||||
|
unsigned pixel = (data0 >> 7) | ((data1 >> 7) << 1);
|
||||||
|
data0 <<= 1;
|
||||||
|
data1 <<= 1;
|
||||||
|
if (pixel && (!p->pixel || priority < p->priority)) {
|
||||||
|
p->pixel = pixel;
|
||||||
|
p->priority = priority;
|
||||||
|
|
||||||
|
if (gb->cgb_mode) {
|
||||||
|
p->palette = object->flags & 0x7;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
p->palette = (object->flags & 0x10) >> 4;
|
||||||
|
}
|
||||||
|
p->bg_priority = object->flags & 0x80;
|
||||||
|
}
|
||||||
|
p++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
uint32_t *restrict p = gb->screen;
|
||||||
|
typeof(object_buffer[0]) *object_buffer_pointer = object_buffer + 8;
|
||||||
|
if (gb->border_mode == GB_BORDER_ALWAYS) {
|
||||||
|
p += (BORDERED_WIDTH - (WIDTH)) / 2 + BORDERED_WIDTH * (BORDERED_HEIGHT - LINES) / 2;
|
||||||
|
p += BORDERED_WIDTH * gb->current_line;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
p += WIDTH * gb->current_line;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (unlikely(gb->background_disabled) || (!gb->cgb_mode && !(gb->io_registers[GB_IO_LCDC] & 1))) {
|
||||||
|
uint32_t bg = gb->background_palettes_rgb[gb->cgb_mode? 0 : (gb->io_registers[GB_IO_BGP] & 3)];
|
||||||
|
for (unsigned i = 160; i--;) {
|
||||||
|
if (unlikely(object_buffer_pointer->pixel)) {
|
||||||
|
uint8_t pixel = object_buffer_pointer->pixel;
|
||||||
|
if (!gb->cgb_mode) {
|
||||||
|
pixel = ((gb->io_registers[GB_IO_OBP0 + object_buffer_pointer->palette] >> (pixel << 1)) & 3);
|
||||||
|
}
|
||||||
|
*(p++) = gb->object_palettes_rgb[pixel + (object_buffer_pointer->palette & 7) * 4];
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
*(p++) = bg;
|
||||||
|
}
|
||||||
|
object_buffer_pointer++;
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned pixels = 0;
|
||||||
|
uint8_t tile_x = gb->io_registers[GB_IO_SCX] / 8;
|
||||||
|
unsigned fractional_scroll = gb->io_registers[GB_IO_SCX] & 7;
|
||||||
|
uint16_t map = 0x1800;
|
||||||
|
if (gb->io_registers[GB_IO_LCDC] & 0x08) {
|
||||||
|
map = 0x1C00;
|
||||||
|
}
|
||||||
|
uint8_t y = gb->current_line + gb->io_registers[GB_IO_SCY];
|
||||||
|
uint8_t attributes;
|
||||||
|
uint8_t data0, data1;
|
||||||
|
get_tile_data(gb, tile_x, y, map, &attributes, &data0, &data1);
|
||||||
|
|
||||||
|
#define DO_PIXEL() \
|
||||||
|
uint8_t pixel = (data0 >> 7) | ((data1 >> 7) << 1);\
|
||||||
|
data0 <<= 1;\
|
||||||
|
data1 <<= 1;\
|
||||||
|
\
|
||||||
|
if (unlikely(object_buffer_pointer->pixel) && (pixel == 0 || !(object_buffer_pointer->bg_priority || (attributes & 0x80)) || !(gb->io_registers[GB_IO_LCDC] & 1))) {\
|
||||||
|
pixel = object_buffer_pointer->pixel;\
|
||||||
|
if (!gb->cgb_mode) {\
|
||||||
|
pixel = ((gb->io_registers[GB_IO_OBP0 + object_buffer_pointer->palette] >> (pixel << 1)) & 3);\
|
||||||
|
}\
|
||||||
|
*(p++) = gb->object_palettes_rgb[pixel + (object_buffer_pointer->palette & 7) * 4];\
|
||||||
|
}\
|
||||||
|
else {\
|
||||||
|
if (!gb->cgb_mode) {\
|
||||||
|
pixel = ((gb->io_registers[GB_IO_BGP] >> (pixel << 1)) & 3);\
|
||||||
|
}\
|
||||||
|
*(p++) = gb->background_palettes_rgb[pixel + (attributes & 7) * 4];\
|
||||||
|
}\
|
||||||
|
pixels++;\
|
||||||
|
object_buffer_pointer++\
|
||||||
|
|
||||||
|
// First 1-8 pixels
|
||||||
|
data0 <<= fractional_scroll;
|
||||||
|
data1 <<= fractional_scroll;
|
||||||
|
bool check_window = gb->wy_triggered && (gb->io_registers[GB_IO_LCDC] & 0x20);
|
||||||
|
for (unsigned i = fractional_scroll; i < 8; i++) {
|
||||||
|
if (check_window && gb->io_registers[GB_IO_WX] == pixels + 7) {
|
||||||
|
activate_window:
|
||||||
|
check_window = false;
|
||||||
|
map = gb->io_registers[GB_IO_LCDC] & 0x40? 0x1C00 : 0x1800;
|
||||||
|
tile_x = -1;
|
||||||
|
y = ++gb->window_y;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
DO_PIXEL();
|
||||||
|
}
|
||||||
|
tile_x++;
|
||||||
|
|
||||||
|
while (pixels < 160 - 8) {
|
||||||
|
get_tile_data(gb, tile_x, y, map, &attributes, &data0, &data1);
|
||||||
|
for (unsigned i = 0; i < 8; i++) {
|
||||||
|
if (check_window && gb->io_registers[GB_IO_WX] == pixels + 7) {
|
||||||
|
goto activate_window;
|
||||||
|
}
|
||||||
|
DO_PIXEL();
|
||||||
|
}
|
||||||
|
tile_x++;
|
||||||
|
}
|
||||||
|
|
||||||
|
gb->fetcher_state = (160 - pixels) & 7;
|
||||||
|
get_tile_data(gb, tile_x, y, map, &attributes, &data0, &data1);
|
||||||
|
while (pixels < 160) {
|
||||||
|
if (check_window && gb->io_registers[GB_IO_WX] == pixels + 7) {
|
||||||
|
goto activate_window;
|
||||||
|
}
|
||||||
|
DO_PIXEL();
|
||||||
|
}
|
||||||
|
tile_x++;
|
||||||
|
|
||||||
|
get_tile_data(gb, tile_x, y, map, &attributes, gb->current_tile_data, gb->current_tile_data + 1);
|
||||||
|
#undef DO_PIXEL
|
||||||
|
}
|
||||||
|
|
||||||
|
static void render_line_sgb(GB_gameboy_t *gb)
|
||||||
|
{
|
||||||
|
if (gb->current_line > 144) return; // Corrupt save state
|
||||||
|
|
||||||
|
struct {
|
||||||
|
unsigned pixel:2; // Color, 0-3
|
||||||
|
unsigned palette:1; // Palette, 0 - 7 (CGB); 0-1 in DMG (or just 0 for BG)
|
||||||
|
bool bg_priority:1; // BG priority bit
|
||||||
|
} object_buffer[160 + 16]; // allocate extra to avoid per pixel checks
|
||||||
|
memset(object_buffer, 0, sizeof(object_buffer));
|
||||||
|
|
||||||
|
if (gb->n_visible_objs && !gb->objects_disabled && (gb->io_registers[GB_IO_LCDC] & 2)) {
|
||||||
|
object_t *objects = (object_t *) &gb->oam;
|
||||||
|
|
||||||
|
while (gb->n_visible_objs) {
|
||||||
|
const object_t *object = &objects[gb->visible_objs[gb->n_visible_objs - 1]];
|
||||||
|
gb->n_visible_objs--;
|
||||||
|
|
||||||
|
uint16_t line_address = get_object_line_address(gb, object);
|
||||||
|
uint8_t data0 = gb->vram[line_address];
|
||||||
|
uint8_t data1 = gb->vram[line_address + 1];
|
||||||
|
if (object->flags & 0x20) {
|
||||||
|
data0 = flip(data0);
|
||||||
|
data1 = flip(data1);
|
||||||
|
}
|
||||||
|
|
||||||
|
typeof(object_buffer[0]) *p = object_buffer + object->x;
|
||||||
|
if (object->x >= 168) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
unrolled for (unsigned x = 0; x < 8; x++) {
|
||||||
|
unsigned pixel = (data0 >> 7) | ((data1 >> 7) << 1);
|
||||||
|
data0 <<= 1;
|
||||||
|
data1 <<= 1;
|
||||||
|
if (!p->pixel) {
|
||||||
|
p->pixel = pixel;
|
||||||
|
p->palette = (object->flags & 0x10) >> 4;
|
||||||
|
p->bg_priority = object->flags & 0x80;
|
||||||
|
}
|
||||||
|
p++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
uint8_t *restrict p = gb->sgb->screen_buffer;
|
||||||
|
typeof(object_buffer[0]) *object_buffer_pointer = object_buffer + 8;
|
||||||
|
p += WIDTH * gb->current_line;
|
||||||
|
|
||||||
|
if (unlikely(gb->background_disabled) || (!gb->cgb_mode && !(gb->io_registers[GB_IO_LCDC] & 1))) {
|
||||||
|
for (unsigned i = 160; i--;) {
|
||||||
|
if (unlikely(object_buffer_pointer->pixel)) {
|
||||||
|
uint8_t pixel = object_buffer_pointer->pixel;
|
||||||
|
pixel = ((gb->io_registers[GB_IO_OBP0 + object_buffer_pointer->palette] >> (pixel << 1)) & 3);
|
||||||
|
*(p++) = pixel;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
*(p++) = gb->io_registers[GB_IO_BGP] & 3;
|
||||||
|
}
|
||||||
|
object_buffer_pointer++;
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned pixels = 0;
|
||||||
|
uint8_t tile_x = gb->io_registers[GB_IO_SCX] / 8;
|
||||||
|
unsigned fractional_scroll = gb->io_registers[GB_IO_SCX] & 7;
|
||||||
|
uint16_t map = 0x1800;
|
||||||
|
if (gb->io_registers[GB_IO_LCDC] & 0x08) {
|
||||||
|
map = 0x1C00;
|
||||||
|
}
|
||||||
|
uint8_t y = gb->current_line + gb->io_registers[GB_IO_SCY];
|
||||||
|
uint8_t attributes;
|
||||||
|
uint8_t data0, data1;
|
||||||
|
get_tile_data(gb, tile_x, y, map, &attributes, &data0, &data1);
|
||||||
|
|
||||||
|
#define DO_PIXEL() \
|
||||||
|
uint8_t pixel = (data0 >> 7) | ((data1 >> 7) << 1);\
|
||||||
|
data0 <<= 1;\
|
||||||
|
data1 <<= 1;\
|
||||||
|
\
|
||||||
|
if (unlikely(object_buffer_pointer->pixel) && (pixel == 0 || !object_buffer_pointer->bg_priority || !(gb->io_registers[GB_IO_LCDC] & 1))) {\
|
||||||
|
pixel = object_buffer_pointer->pixel;\
|
||||||
|
pixel = ((gb->io_registers[GB_IO_OBP0 + object_buffer_pointer->palette] >> (pixel << 1)) & 3);\
|
||||||
|
*(p++) = pixel;\
|
||||||
|
}\
|
||||||
|
else {\
|
||||||
|
pixel = ((gb->io_registers[GB_IO_BGP] >> (pixel << 1)) & 3);\
|
||||||
|
*(p++) = pixel;\
|
||||||
|
}\
|
||||||
|
pixels++;\
|
||||||
|
object_buffer_pointer++\
|
||||||
|
|
||||||
|
// First 1-8 pixels
|
||||||
|
data0 <<= fractional_scroll;
|
||||||
|
data1 <<= fractional_scroll;
|
||||||
|
bool check_window = gb->wy_triggered && (gb->io_registers[GB_IO_LCDC] & 0x20);
|
||||||
|
for (unsigned i = fractional_scroll; i < 8; i++) {
|
||||||
|
if (check_window && gb->io_registers[GB_IO_WX] == pixels + 7) {
|
||||||
|
activate_window:
|
||||||
|
check_window = false;
|
||||||
|
map = gb->io_registers[GB_IO_LCDC] & 0x40? 0x1C00 : 0x1800;
|
||||||
|
tile_x = -1;
|
||||||
|
y = ++gb->window_y;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
DO_PIXEL();
|
||||||
|
}
|
||||||
|
tile_x++;
|
||||||
|
|
||||||
|
while (pixels < 160 - 8) {
|
||||||
|
get_tile_data(gb, tile_x, y, map, &attributes, &data0, &data1);
|
||||||
|
for (unsigned i = 0; i < 8; i++) {
|
||||||
|
if (check_window && gb->io_registers[GB_IO_WX] == pixels + 7) {
|
||||||
|
goto activate_window;
|
||||||
|
}
|
||||||
|
DO_PIXEL();
|
||||||
|
}
|
||||||
|
tile_x++;
|
||||||
|
}
|
||||||
|
|
||||||
|
get_tile_data(gb, tile_x, y, map, &attributes, &data0, &data1);
|
||||||
|
while (pixels < 160) {
|
||||||
|
if (check_window && gb->io_registers[GB_IO_WX] == pixels + 7) {
|
||||||
|
goto activate_window;
|
||||||
|
}
|
||||||
|
DO_PIXEL();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline uint16_t mode3_batching_length(GB_gameboy_t *gb)
|
||||||
|
{
|
||||||
|
if (gb->model & GB_MODEL_NO_SFC_BIT) return 0;
|
||||||
|
if (gb->hdma_on) return 0;
|
||||||
|
if (gb->dma_steps_left) return 0;
|
||||||
|
if (gb->wy_triggered && (gb->io_registers[GB_IO_LCDC] & 0x20) && (gb->io_registers[GB_IO_WX] < 8 || gb->io_registers[GB_IO_WX] == 166)) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// No objects or window, timing is trivial
|
||||||
|
if (gb->n_visible_objs == 0 && !(gb->wy_triggered && (gb->io_registers[GB_IO_LCDC] & 0x20))) return 167 + (gb->io_registers[GB_IO_SCX] & 7);
|
||||||
|
|
||||||
|
if (gb->hdma_on_hblank) return 0;
|
||||||
|
|
||||||
|
// 300 is a bit more than the maximum Mode 3 length
|
||||||
|
|
||||||
|
// No HBlank interrupt
|
||||||
|
if (!(gb->io_registers[GB_IO_STAT] & 0x8)) return 300;
|
||||||
|
// No STAT interrupt requested
|
||||||
|
if (!(gb->interrupt_enable & 2)) return 300;
|
||||||
|
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
TODO: It seems that the STAT register's mode bits are always "late" by 4 T-cycles.
|
TODO: It seems that the STAT register's mode bits are always "late" by 4 T-cycles.
|
||||||
The PPU logic can be greatly simplified if that delay is simply emulated.
|
The PPU logic can be greatly simplified if that delay is simply emulated.
|
||||||
*/
|
*/
|
||||||
void GB_display_run(GB_gameboy_t *gb, uint8_t cycles)
|
void GB_display_run(GB_gameboy_t *gb, unsigned cycles, bool force)
|
||||||
{
|
{
|
||||||
gb->cycles_since_vblank_callback += cycles / 2;
|
gb->cycles_since_vblank_callback += cycles / 2;
|
||||||
|
|
||||||
@ -878,12 +1233,12 @@ void GB_display_run(GB_gameboy_t *gb, uint8_t cycles)
|
|||||||
}
|
}
|
||||||
object_t *objects = (object_t *) &gb->oam;
|
object_t *objects = (object_t *) &gb->oam;
|
||||||
|
|
||||||
GB_STATE_MACHINE(gb, display, cycles, 2) {
|
GB_BATCHABLE_STATE_MACHINE(gb, display, cycles, 2, !force) {
|
||||||
GB_STATE(gb, display, 1);
|
GB_STATE(gb, display, 1);
|
||||||
GB_STATE(gb, display, 2);
|
GB_STATE(gb, display, 2);
|
||||||
// GB_STATE(gb, display, 3);
|
GB_STATE(gb, display, 3);
|
||||||
// GB_STATE(gb, display, 4);
|
GB_STATE(gb, display, 4);
|
||||||
// GB_STATE(gb, display, 5);
|
GB_STATE(gb, display, 5);
|
||||||
GB_STATE(gb, display, 6);
|
GB_STATE(gb, display, 6);
|
||||||
GB_STATE(gb, display, 7);
|
GB_STATE(gb, display, 7);
|
||||||
GB_STATE(gb, display, 8);
|
GB_STATE(gb, display, 8);
|
||||||
@ -1031,6 +1386,9 @@ void GB_display_run(GB_gameboy_t *gb, uint8_t cycles)
|
|||||||
GB_STAT_update(gb);
|
GB_STAT_update(gb);
|
||||||
gb->n_visible_objs = 0;
|
gb->n_visible_objs = 0;
|
||||||
|
|
||||||
|
if (!gb->dma_steps_left && !gb->oam_ppu_blocked) {
|
||||||
|
GB_BATCHPOINT(gb, display, 5, 80);
|
||||||
|
}
|
||||||
for (gb->oam_search_index = 0; gb->oam_search_index < 40; gb->oam_search_index++) {
|
for (gb->oam_search_index = 0; gb->oam_search_index < 40; gb->oam_search_index++) {
|
||||||
if (GB_is_cgb(gb)) {
|
if (GB_is_cgb(gb)) {
|
||||||
add_object_from_index(gb, gb->oam_search_index);
|
add_object_from_index(gb, gb->oam_search_index);
|
||||||
@ -1046,7 +1404,6 @@ void GB_display_run(GB_gameboy_t *gb, uint8_t cycles)
|
|||||||
gb->vram_write_blocked = false;
|
gb->vram_write_blocked = false;
|
||||||
gb->cgb_palettes_blocked = false;
|
gb->cgb_palettes_blocked = false;
|
||||||
gb->oam_write_blocked = GB_is_cgb(gb);
|
gb->oam_write_blocked = GB_is_cgb(gb);
|
||||||
GB_STAT_update(gb);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
gb->cycles_for_line = MODE2_LENGTH + 4;
|
gb->cycles_for_line = MODE2_LENGTH + 4;
|
||||||
@ -1093,6 +1450,22 @@ void GB_display_run(GB_gameboy_t *gb, uint8_t cycles)
|
|||||||
|
|
||||||
/* The actual rendering cycle */
|
/* The actual rendering cycle */
|
||||||
gb->fetcher_state = 0;
|
gb->fetcher_state = 0;
|
||||||
|
if ((gb->mode3_batching_length = mode3_batching_length(gb))) {
|
||||||
|
GB_BATCHPOINT(gb, display, 3, gb->mode3_batching_length);
|
||||||
|
if (GB_BATCHED_CYCLES(gb, display) >= gb->mode3_batching_length) {
|
||||||
|
// Successfully batched!
|
||||||
|
gb->lcd_x = gb->position_in_line = 160;
|
||||||
|
gb->cycles_for_line += gb->mode3_batching_length;
|
||||||
|
if (gb->sgb) {
|
||||||
|
render_line_sgb(gb);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
render_line(gb);
|
||||||
|
}
|
||||||
|
GB_SLEEP(gb, display, 4, gb->mode3_batching_length);
|
||||||
|
goto skip_slow_mode_3;
|
||||||
|
}
|
||||||
|
}
|
||||||
while (true) {
|
while (true) {
|
||||||
/* Handle window */
|
/* Handle window */
|
||||||
/* TODO: It appears that WX checks if the window begins *next* pixel, not *this* pixel. For this reason,
|
/* TODO: It appears that WX checks if the window begins *next* pixel, not *this* pixel. For this reason,
|
||||||
@ -1255,6 +1628,7 @@ abort_fetching_object:
|
|||||||
gb->cycles_for_line++;
|
gb->cycles_for_line++;
|
||||||
GB_SLEEP(gb, display, 21, 1);
|
GB_SLEEP(gb, display, 21, 1);
|
||||||
}
|
}
|
||||||
|
skip_slow_mode_3:
|
||||||
|
|
||||||
/* TODO: Verify */
|
/* TODO: Verify */
|
||||||
if (gb->fetcher_state == 4 || gb->fetcher_state == 5) {
|
if (gb->fetcher_state == 4 || gb->fetcher_state == 5) {
|
||||||
|
@ -6,11 +6,12 @@
|
|||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
|
||||||
#ifdef GB_INTERNAL
|
#ifdef GB_INTERNAL
|
||||||
internal void GB_display_run(GB_gameboy_t *gb, uint8_t cycles);
|
internal void GB_display_run(GB_gameboy_t *gb, unsigned cycles, bool force);
|
||||||
internal void GB_palette_changed(GB_gameboy_t *gb, bool background_palette, uint8_t index);
|
internal void GB_palette_changed(GB_gameboy_t *gb, bool background_palette, uint8_t index);
|
||||||
internal void GB_STAT_update(GB_gameboy_t *gb);
|
internal void GB_STAT_update(GB_gameboy_t *gb);
|
||||||
internal void GB_lcd_off(GB_gameboy_t *gb);
|
internal void GB_lcd_off(GB_gameboy_t *gb);
|
||||||
internal void GB_display_vblank(GB_gameboy_t *gb);
|
internal void GB_display_vblank(GB_gameboy_t *gb);
|
||||||
|
#define GB_display_sync(gb) GB_display_run(gb, 0, true)
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
GB_OBJECT_PRIORITY_X,
|
GB_OBJECT_PRIORITY_X,
|
||||||
|
@ -1139,7 +1139,7 @@ uint8_t GB_run(GB_gameboy_t *gb)
|
|||||||
we just halt the CPU (with hacky code) until the correct time.
|
we just halt the CPU (with hacky code) until the correct time.
|
||||||
This ensures the Nintendo logo doesn't flash on screen, and
|
This ensures the Nintendo logo doesn't flash on screen, and
|
||||||
the game does "run in background" while the animation is playing. */
|
the game does "run in background" while the animation is playing. */
|
||||||
GB_display_run(gb, 228);
|
GB_display_run(gb, 228, true);
|
||||||
gb->cycles_since_last_sync += 228;
|
gb->cycles_since_last_sync += 228;
|
||||||
return 228;
|
return 228;
|
||||||
}
|
}
|
||||||
@ -1327,7 +1327,7 @@ bool GB_is_inited(GB_gameboy_t *gb)
|
|||||||
return gb->magic == state_magic();
|
return gb->magic == state_magic();
|
||||||
}
|
}
|
||||||
|
|
||||||
bool GB_is_cgb(GB_gameboy_t *gb)
|
bool GB_is_cgb(const GB_gameboy_t *gb)
|
||||||
{
|
{
|
||||||
return gb->model >= GB_MODEL_CGB_0;
|
return gb->model >= GB_MODEL_CGB_0;
|
||||||
}
|
}
|
||||||
|
@ -540,6 +540,8 @@ struct GB_gameboy_internal_s {
|
|||||||
/* For timing of the vblank callback */
|
/* For timing of the vblank callback */
|
||||||
uint32_t cycles_since_vblank_callback;
|
uint32_t cycles_since_vblank_callback;
|
||||||
bool lcd_disabled_outside_of_vblank;
|
bool lcd_disabled_outside_of_vblank;
|
||||||
|
int32_t allowed_pending_cycles;
|
||||||
|
uint16_t mode3_batching_length;
|
||||||
);
|
);
|
||||||
|
|
||||||
/* APU */
|
/* APU */
|
||||||
@ -796,7 +798,7 @@ __attribute__((__format__ (__printf__, fmtarg, firstvararg)))
|
|||||||
|
|
||||||
void GB_init(GB_gameboy_t *gb, GB_model_t model);
|
void GB_init(GB_gameboy_t *gb, GB_model_t model);
|
||||||
bool GB_is_inited(GB_gameboy_t *gb);
|
bool GB_is_inited(GB_gameboy_t *gb);
|
||||||
bool GB_is_cgb(GB_gameboy_t *gb);
|
bool GB_is_cgb(const GB_gameboy_t *gb);
|
||||||
bool GB_is_cgb_in_cgb_mode(GB_gameboy_t *gb);
|
bool GB_is_cgb_in_cgb_mode(GB_gameboy_t *gb);
|
||||||
bool GB_is_sgb(GB_gameboy_t *gb); // Returns true if the model is SGB or SGB2
|
bool GB_is_sgb(GB_gameboy_t *gb); // Returns true if the model is SGB or SGB2
|
||||||
bool GB_is_hle_sgb(GB_gameboy_t *gb); // Returns true if the model is SGB or SGB2 and the SFC/SNES side is HLE'd
|
bool GB_is_hle_sgb(GB_gameboy_t *gb); // Returns true if the model is SGB or SGB2 and the SFC/SNES side is HLE'd
|
||||||
|
@ -98,6 +98,7 @@ void GB_trigger_oam_bug(GB_gameboy_t *gb, uint16_t address)
|
|||||||
if (GB_is_cgb(gb)) return;
|
if (GB_is_cgb(gb)) return;
|
||||||
|
|
||||||
if (address >= 0xFE00 && address < 0xFF00) {
|
if (address >= 0xFE00 && address < 0xFF00) {
|
||||||
|
GB_display_sync(gb);
|
||||||
if (gb->accessed_oam_row != 0xff && gb->accessed_oam_row >= 8) {
|
if (gb->accessed_oam_row != 0xff && gb->accessed_oam_row >= 8) {
|
||||||
uint16_t *base = (uint16_t *)(gb->oam + gb->accessed_oam_row);
|
uint16_t *base = (uint16_t *)(gb->oam + gb->accessed_oam_row);
|
||||||
base[0] = bitwise_glitch(base[0],
|
base[0] = bitwise_glitch(base[0],
|
||||||
@ -283,6 +284,7 @@ static uint8_t read_mbc_rom(GB_gameboy_t *gb, uint16_t addr)
|
|||||||
|
|
||||||
static uint8_t read_vram(GB_gameboy_t *gb, uint16_t addr)
|
static uint8_t read_vram(GB_gameboy_t *gb, uint16_t addr)
|
||||||
{
|
{
|
||||||
|
GB_display_sync(gb);
|
||||||
if (gb->vram_read_blocked) {
|
if (gb->vram_read_blocked) {
|
||||||
return 0xFF;
|
return 0xFF;
|
||||||
}
|
}
|
||||||
@ -421,6 +423,37 @@ static uint8_t read_banked_ram(GB_gameboy_t *gb, uint16_t addr)
|
|||||||
return gb->ram[(addr & 0x0FFF) + gb->cgb_ram_bank * 0x1000];
|
return gb->ram[(addr & 0x0FFF) + gb->cgb_ram_bank * 0x1000];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline void sync_ppu_if_needed(GB_gameboy_t *gb, uint8_t register_accessed)
|
||||||
|
{
|
||||||
|
switch (register_accessed) {
|
||||||
|
case GB_IO_IF:
|
||||||
|
case GB_IO_LCDC:
|
||||||
|
case GB_IO_STAT:
|
||||||
|
case GB_IO_SCY:
|
||||||
|
case GB_IO_SCX:
|
||||||
|
case GB_IO_LY:
|
||||||
|
case GB_IO_LYC:
|
||||||
|
case GB_IO_DMA:
|
||||||
|
case GB_IO_BGP:
|
||||||
|
case GB_IO_OBP0:
|
||||||
|
case GB_IO_OBP1:
|
||||||
|
case GB_IO_WY:
|
||||||
|
case GB_IO_WX:
|
||||||
|
case GB_IO_HDMA1:
|
||||||
|
case GB_IO_HDMA2:
|
||||||
|
case GB_IO_HDMA3:
|
||||||
|
case GB_IO_HDMA4:
|
||||||
|
case GB_IO_HDMA5:
|
||||||
|
case GB_IO_BGPI:
|
||||||
|
case GB_IO_BGPD:
|
||||||
|
case GB_IO_OBPI:
|
||||||
|
case GB_IO_OBPD:
|
||||||
|
case GB_IO_OPRI:
|
||||||
|
GB_display_sync(gb);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static uint8_t read_high_memory(GB_gameboy_t *gb, uint16_t addr)
|
static uint8_t read_high_memory(GB_gameboy_t *gb, uint16_t addr)
|
||||||
{
|
{
|
||||||
|
|
||||||
@ -433,6 +466,7 @@ static uint8_t read_high_memory(GB_gameboy_t *gb, uint16_t addr)
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (addr < 0xFF00) {
|
if (addr < 0xFF00) {
|
||||||
|
GB_display_sync(gb);
|
||||||
if (gb->oam_write_blocked && !GB_is_cgb(gb)) {
|
if (gb->oam_write_blocked && !GB_is_cgb(gb)) {
|
||||||
if (!gb->disable_oam_corruption) {
|
if (!gb->disable_oam_corruption) {
|
||||||
GB_trigger_oam_bug_read(gb, addr);
|
GB_trigger_oam_bug_read(gb, addr);
|
||||||
@ -548,6 +582,7 @@ static uint8_t read_high_memory(GB_gameboy_t *gb, uint16_t addr)
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (addr < 0xFF80) {
|
if (addr < 0xFF80) {
|
||||||
|
sync_ppu_if_needed(gb, addr);
|
||||||
switch (addr & 0xFF) {
|
switch (addr & 0xFF) {
|
||||||
case GB_IO_IF:
|
case GB_IO_IF:
|
||||||
return gb->io_registers[GB_IO_IF] | 0xE0;
|
return gb->io_registers[GB_IO_IF] | 0xE0;
|
||||||
@ -846,6 +881,7 @@ static void write_mbc(GB_gameboy_t *gb, uint16_t addr, uint8_t value)
|
|||||||
|
|
||||||
static void write_vram(GB_gameboy_t *gb, uint16_t addr, uint8_t value)
|
static void write_vram(GB_gameboy_t *gb, uint16_t addr, uint8_t value)
|
||||||
{
|
{
|
||||||
|
GB_display_sync(gb);
|
||||||
if (gb->vram_write_blocked) {
|
if (gb->vram_write_blocked) {
|
||||||
//GB_log(gb, "Wrote %02x to %04x (VRAM) during mode 3\n", value, addr);
|
//GB_log(gb, "Wrote %02x to %04x (VRAM) during mode 3\n", value, addr);
|
||||||
return;
|
return;
|
||||||
@ -1155,6 +1191,7 @@ static void write_high_memory(GB_gameboy_t *gb, uint16_t addr, uint8_t value)
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (addr < 0xFF00) {
|
if (addr < 0xFF00) {
|
||||||
|
GB_display_sync(gb);
|
||||||
if (gb->oam_write_blocked) {
|
if (gb->oam_write_blocked) {
|
||||||
GB_trigger_oam_bug(gb, addr);
|
GB_trigger_oam_bug(gb, addr);
|
||||||
return;
|
return;
|
||||||
@ -1233,6 +1270,8 @@ static void write_high_memory(GB_gameboy_t *gb, uint16_t addr, uint8_t value)
|
|||||||
/* Todo: Clean this code up: use a function table and move relevant code to display.c and timing.c
|
/* Todo: Clean this code up: use a function table and move relevant code to display.c and timing.c
|
||||||
(APU read and writes are already at apu.c) */
|
(APU read and writes are already at apu.c) */
|
||||||
if (addr < 0xFF80) {
|
if (addr < 0xFF80) {
|
||||||
|
sync_ppu_if_needed(gb, addr);
|
||||||
|
|
||||||
/* Hardware registers */
|
/* Hardware registers */
|
||||||
switch (addr & 0xFF) {
|
switch (addr & 0xFF) {
|
||||||
case GB_IO_WY:
|
case GB_IO_WY:
|
||||||
@ -1563,6 +1602,7 @@ static void write_high_memory(GB_gameboy_t *gb, uint16_t addr, uint8_t value)
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (addr == 0xFFFF) {
|
if (addr == 0xFFFF) {
|
||||||
|
GB_display_sync(gb);
|
||||||
/* Interrupt mask */
|
/* Interrupt mask */
|
||||||
gb->interrupt_enable = value;
|
gb->interrupt_enable = value;
|
||||||
return;
|
return;
|
||||||
|
@ -435,7 +435,7 @@ void GB_advance_cycles(GB_gameboy_t *gb, uint8_t cycles)
|
|||||||
GB_hdma_run(gb);
|
GB_hdma_run(gb);
|
||||||
}
|
}
|
||||||
GB_apu_run(gb);
|
GB_apu_run(gb);
|
||||||
GB_display_run(gb, cycles);
|
GB_display_run(gb, cycles, false);
|
||||||
ir_run(gb, cycles);
|
ir_run(gb, cycles);
|
||||||
rtc_run(gb, cycles);
|
rtc_run(gb, cycles);
|
||||||
}
|
}
|
||||||
|
@ -28,13 +28,23 @@ enum {
|
|||||||
|
|
||||||
#define GB_SLEEP(gb, unit, state, cycles) do {\
|
#define GB_SLEEP(gb, unit, state, cycles) do {\
|
||||||
(gb)->unit##_cycles -= (cycles) * __state_machine_divisor; \
|
(gb)->unit##_cycles -= (cycles) * __state_machine_divisor; \
|
||||||
if ((gb)->unit##_cycles <= 0) {\
|
if (unlikely((gb)->unit##_cycles <= 0)) {\
|
||||||
(gb)->unit##_state = state;\
|
(gb)->unit##_state = state;\
|
||||||
return;\
|
return;\
|
||||||
unit##state:; \
|
unit##state:; \
|
||||||
}\
|
}\
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
|
#define GB_BATCHPOINT(gb, unit, state, cycles) do {\
|
||||||
|
unit##state:; \
|
||||||
|
if (likely(__state_machine_allow_batching && (gb)->unit##_cycles < (cycles * 2))) {\
|
||||||
|
(gb)->unit##_state = state;\
|
||||||
|
return;\
|
||||||
|
}\
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
#define GB_BATCHED_CYCLES(gb, unit) ((gb)->unit##_cycles / __state_machine_divisor)
|
||||||
|
|
||||||
#define GB_STATE_MACHINE(gb, unit, cycles, divisor) \
|
#define GB_STATE_MACHINE(gb, unit, cycles, divisor) \
|
||||||
static const int __state_machine_divisor = divisor;\
|
static const int __state_machine_divisor = divisor;\
|
||||||
(gb)->unit##_cycles += cycles; \
|
(gb)->unit##_cycles += cycles; \
|
||||||
@ -44,6 +54,10 @@ if ((gb)->unit##_cycles <= 0) {\
|
|||||||
switch ((gb)->unit##_state)
|
switch ((gb)->unit##_state)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#define GB_BATCHABLE_STATE_MACHINE(gb, unit, cycles, divisor, allow_batching) \
|
||||||
|
const bool __state_machine_allow_batching = (allow_batching); \
|
||||||
|
GB_STATE_MACHINE(gb, unit, cycles, divisor)
|
||||||
|
|
||||||
#define GB_STATE(gb, unit, state) case state: goto unit##state
|
#define GB_STATE(gb, unit, state) case state: goto unit##state
|
||||||
|
|
||||||
#define GB_UNIT(unit) int32_t unit##_cycles, unit##_state
|
#define GB_UNIT(unit) int32_t unit##_cycles, unit##_state
|
||||||
|
Loading…
Reference in New Issue
Block a user