Merge pull request #84 from bunnei/fix-hw-synchronization

Fix GPU/HW synchronization
2024-07-06 02:46:47 +01:00 · 2014-08-31 00:53:07 -04:00 · 2014-08-31 00:53:07 -04:00 · 76372feb19
parent 038a51aac1 aabfcfe6ad
commit 76372feb19
4 changed files with 51 additions and 34 deletions
--- a/src/citra/citra.cpp
+++ b/src/citra/citra.cpp
@ -31,7 +31,9 @@ int __cdecl main(int argc, char **argv) {
        return -1;
    }
-    Core::RunLoop();
+    while(true) {
        Core::RunLoop();
    }
    delete emu_window;
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@ -6,6 +6,8 @@
 #include "common/log.h"
 #include "common/symbols.h"
 #include "video_core/video_core.h"
 #include "core/core.h"
 #include "core/mem_map.h"
 #include "core/hw/hw.h"
@ -24,29 +26,17 @@ ARM_Interface*  g_app_core      = nullptr;  ///< ARM11 application core
 ARM_Interface*  g_sys_core      = nullptr;  ///< ARM11 system (OS) core
 /// Run the core CPU loop
-void RunLoop() {
+void RunLoop(int tight_loop) {
-    for (;;){
+    g_app_core->Run(tight_loop);
-        // This function loops for 100 instructions in the CPU before trying to update hardware.
+    HW::Update();
-        // This is a little bit faster than SingleStep, and should be pretty much equivalent. The 
+    if (HLE::g_reschedule) {
-        // number of instructions chosen is fairly arbitrary, however a large number will more 
+        Kernel::Reschedule();
        // drastically affect the frequency of GSP interrupts and likely break things. The point of
        // this is to just loop in the CPU for more than 1 instruction to reduce overhead and make
        // it a little bit faster...
        g_app_core->Run(100);
        HW::Update();
        if (HLE::g_reschedule) {
            Kernel::Reschedule();
        }
    }
 }
 /// Step the CPU one instruction
 void SingleStep() {
-    g_app_core->Step();
+    RunLoop(1);
    HW::Update();
    if (HLE::g_reschedule) {
        Kernel::Reschedule();
    }
 }
 /// Halt the core
--- a/src/core/core.h
+++ b/src/core/core.h
@ -19,8 +19,15 @@ extern ARM_Interface*   g_sys_core;     ///< ARM11 system (OS) core
 /// Start the core
 void Start();
-/// Run the core CPU loop
+/**
-void RunLoop();
+ * Run the core CPU loop
 * This function loops for 100 instructions in the CPU before trying to update hardware. This is a
 * little bit faster than SingleStep, and should be pretty much equivalent. The number of
 * instructions chosen is fairly arbitrary, however a large number will more drastically affect the
 * frequency of GSP interrupts and likely break things. The point of this is to just loop in the CPU
 * for more than 1 instruction to reduce overhead and make it a little bit faster...
 */
 void RunLoop(int tight_loop=100);
 /// Step the CPU one instruction
 void SingleStep();
--- a/src/core/hw/gpu.cpp
+++ b/src/core/hw/gpu.cpp
@ -24,6 +24,7 @@ Regs g_regs;
 u32 g_cur_line = 0;         ///< Current vertical screen line
 u64 g_last_line_ticks = 0;  ///< CPU tick count from last vertical screen line
 u64 g_last_frame_ticks = 0; ///< CPU tick count from last frame
 template <typename T>
 inline void Read(T &var, const u32 raw_addr) {
@ -179,27 +180,44 @@ void Update() {
    auto& framebuffer_top = g_regs.framebuffer_config[0];
    u64 current_ticks = Core::g_app_core->GetTicks();
-    // Synchronize line...
+    // Update the frame after a certain number of CPU ticks have elapsed. This assumes that the
-    if ((current_ticks - g_last_line_ticks) >= GPU::kFrameTicks / framebuffer_top.height) {
+    // active frame in memory is always complete to render. There also may be issues with this
-        GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PDC0);
+    // becoming out-of-synch with GSP synchrinization code (as follows). At this time, this seems to
-        g_cur_line++;
+    // be the most effective solution for both homebrew and retail applications. With retail, this
-        g_last_line_ticks = current_ticks;
+    // could be moved below (and probably would guarantee more accurate synchronization). However,
    // primitive homebrew relies on a vertical blank interrupt to happen inevitably (regardless of a
    // threading reschedule).
    if ((current_ticks - g_last_frame_ticks) > GPU::kFrameTicks) {
        VideoCore::g_renderer->SwapBuffers();
        g_last_frame_ticks = current_ticks;
    }
-    // Synchronize frame...
+    // Synchronize GPU on a thread reschedule: Because we cannot accurately predict a vertical
-    if (g_cur_line >= framebuffer_top.height) {
+    // blank, we need to simulate it. Based on testing, it seems that retail applications work more
-        g_cur_line = 0;
+    // accurately when this is signalled between thread switches.
-        GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PDC1);
+
-        VideoCore::g_renderer->SwapBuffers();
+    if (HLE::g_reschedule) {
-        Kernel::WaitCurrentThread(WAITTYPE_VBLANK);
+
-        HLE::Reschedule(__func__);
+        // Synchronize line...
        if ((current_ticks - g_last_line_ticks) >= GPU::kFrameTicks / framebuffer_top.height) {
            GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PDC0);
            g_cur_line++;
            g_last_line_ticks = current_ticks;
        }
        // Synchronize frame...
        if (g_cur_line >= framebuffer_top.height) {
            g_cur_line = 0;
            GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PDC1);
        }
    }
 }
 /// Initialize hardware
 void Init() {
    g_cur_line = 0;
-    g_last_line_ticks = Core::g_app_core->GetTicks();
+    g_last_frame_ticks = g_last_line_ticks = Core::g_app_core->GetTicks();
    auto& framebuffer_top = g_regs.framebuffer_config[0];
    auto& framebuffer_sub = g_regs.framebuffer_config[1];