diff --git a/src/citra/citra.cpp b/src/citra/citra.cpp index 9399ff296..7dc721dc3 100644 --- a/src/citra/citra.cpp +++ b/src/citra/citra.cpp @@ -31,7 +31,9 @@ int __cdecl main(int argc, char **argv) { return -1; } - Core::RunLoop(); + while(true) { + Core::RunLoop(); + } delete emu_window; diff --git a/src/core/core.cpp b/src/core/core.cpp index fc9909377..f21801e52 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp @@ -6,6 +6,8 @@ #include "common/log.h" #include "common/symbols.h" +#include "video_core/video_core.h" + #include "core/core.h" #include "core/mem_map.h" #include "core/hw/hw.h" @@ -24,29 +26,17 @@ ARM_Interface* g_app_core = nullptr; ///< ARM11 application core ARM_Interface* g_sys_core = nullptr; ///< ARM11 system (OS) core /// Run the core CPU loop -void RunLoop() { - for (;;){ - // This function loops for 100 instructions in the CPU before trying to update hardware. - // This is a little bit faster than SingleStep, and should be pretty much equivalent. The - // number of instructions chosen is fairly arbitrary, however a large number will more - // drastically affect the frequency of GSP interrupts and likely break things. The point of - // this is to just loop in the CPU for more than 1 instruction to reduce overhead and make - // it a little bit faster... - g_app_core->Run(100); - HW::Update(); - if (HLE::g_reschedule) { - Kernel::Reschedule(); - } +void RunLoop(int tight_loop) { + g_app_core->Run(tight_loop); + HW::Update(); + if (HLE::g_reschedule) { + Kernel::Reschedule(); } } /// Step the CPU one instruction void SingleStep() { - g_app_core->Step(); - HW::Update(); - if (HLE::g_reschedule) { - Kernel::Reschedule(); - } + RunLoop(1); } /// Halt the core diff --git a/src/core/core.h b/src/core/core.h index 4b42dabcb..9c72c8b3f 100644 --- a/src/core/core.h +++ b/src/core/core.h @@ -19,8 +19,15 @@ extern ARM_Interface* g_sys_core; ///< ARM11 system (OS) core /// Start the core void Start(); -/// Run the core CPU loop -void RunLoop(); +/** + * Run the core CPU loop + * This function loops for 100 instructions in the CPU before trying to update hardware. This is a + * little bit faster than SingleStep, and should be pretty much equivalent. The number of + * instructions chosen is fairly arbitrary, however a large number will more drastically affect the + * frequency of GSP interrupts and likely break things. The point of this is to just loop in the CPU + * for more than 1 instruction to reduce overhead and make it a little bit faster... + */ +void RunLoop(int tight_loop=100); /// Step the CPU one instruction void SingleStep(); diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp index f1f3e7ab3..8709b8eb7 100644 --- a/src/core/hw/gpu.cpp +++ b/src/core/hw/gpu.cpp @@ -24,6 +24,7 @@ Regs g_regs; u32 g_cur_line = 0; ///< Current vertical screen line u64 g_last_line_ticks = 0; ///< CPU tick count from last vertical screen line +u64 g_last_frame_ticks = 0; ///< CPU tick count from last frame template inline void Read(T &var, const u32 raw_addr) { @@ -179,27 +180,44 @@ void Update() { auto& framebuffer_top = g_regs.framebuffer_config[0]; u64 current_ticks = Core::g_app_core->GetTicks(); - // Synchronize line... - if ((current_ticks - g_last_line_ticks) >= GPU::kFrameTicks / framebuffer_top.height) { - GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PDC0); - g_cur_line++; - g_last_line_ticks = current_ticks; + // Update the frame after a certain number of CPU ticks have elapsed. This assumes that the + // active frame in memory is always complete to render. There also may be issues with this + // becoming out-of-synch with GSP synchrinization code (as follows). At this time, this seems to + // be the most effective solution for both homebrew and retail applications. With retail, this + // could be moved below (and probably would guarantee more accurate synchronization). However, + // primitive homebrew relies on a vertical blank interrupt to happen inevitably (regardless of a + // threading reschedule). + + if ((current_ticks - g_last_frame_ticks) > GPU::kFrameTicks) { + VideoCore::g_renderer->SwapBuffers(); + g_last_frame_ticks = current_ticks; } - // Synchronize frame... - if (g_cur_line >= framebuffer_top.height) { - g_cur_line = 0; - GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PDC1); - VideoCore::g_renderer->SwapBuffers(); - Kernel::WaitCurrentThread(WAITTYPE_VBLANK); - HLE::Reschedule(__func__); + // Synchronize GPU on a thread reschedule: Because we cannot accurately predict a vertical + // blank, we need to simulate it. Based on testing, it seems that retail applications work more + // accurately when this is signalled between thread switches. + + if (HLE::g_reschedule) { + + // Synchronize line... + if ((current_ticks - g_last_line_ticks) >= GPU::kFrameTicks / framebuffer_top.height) { + GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PDC0); + g_cur_line++; + g_last_line_ticks = current_ticks; + } + + // Synchronize frame... + if (g_cur_line >= framebuffer_top.height) { + g_cur_line = 0; + GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PDC1); + } } } /// Initialize hardware void Init() { g_cur_line = 0; - g_last_line_ticks = Core::g_app_core->GetTicks(); + g_last_frame_ticks = g_last_line_ticks = Core::g_app_core->GetTicks(); auto& framebuffer_top = g_regs.framebuffer_config[0]; auto& framebuffer_sub = g_regs.framebuffer_config[1];