Merge pull request #84 from bunnei/fix-hw-synchronization

Fix GPU/HW synchronization
This commit is contained in:
bunnei 2014-08-31 00:53:07 -04:00
commit 76372feb19
4 changed files with 51 additions and 34 deletions

View file

@ -31,7 +31,9 @@ int __cdecl main(int argc, char **argv) {
return -1; return -1;
} }
Core::RunLoop(); while(true) {
Core::RunLoop();
}
delete emu_window; delete emu_window;

View file

@ -6,6 +6,8 @@
#include "common/log.h" #include "common/log.h"
#include "common/symbols.h" #include "common/symbols.h"
#include "video_core/video_core.h"
#include "core/core.h" #include "core/core.h"
#include "core/mem_map.h" #include "core/mem_map.h"
#include "core/hw/hw.h" #include "core/hw/hw.h"
@ -24,29 +26,17 @@ ARM_Interface* g_app_core = nullptr; ///< ARM11 application core
ARM_Interface* g_sys_core = nullptr; ///< ARM11 system (OS) core ARM_Interface* g_sys_core = nullptr; ///< ARM11 system (OS) core
/// Run the core CPU loop /// Run the core CPU loop
void RunLoop() { void RunLoop(int tight_loop) {
for (;;){ g_app_core->Run(tight_loop);
// This function loops for 100 instructions in the CPU before trying to update hardware. HW::Update();
// This is a little bit faster than SingleStep, and should be pretty much equivalent. The if (HLE::g_reschedule) {
// number of instructions chosen is fairly arbitrary, however a large number will more Kernel::Reschedule();
// drastically affect the frequency of GSP interrupts and likely break things. The point of
// this is to just loop in the CPU for more than 1 instruction to reduce overhead and make
// it a little bit faster...
g_app_core->Run(100);
HW::Update();
if (HLE::g_reschedule) {
Kernel::Reschedule();
}
} }
} }
/// Step the CPU one instruction /// Step the CPU one instruction
void SingleStep() { void SingleStep() {
g_app_core->Step(); RunLoop(1);
HW::Update();
if (HLE::g_reschedule) {
Kernel::Reschedule();
}
} }
/// Halt the core /// Halt the core

View file

@ -19,8 +19,15 @@ extern ARM_Interface* g_sys_core; ///< ARM11 system (OS) core
/// Start the core /// Start the core
void Start(); void Start();
/// Run the core CPU loop /**
void RunLoop(); * Run the core CPU loop
* This function loops for 100 instructions in the CPU before trying to update hardware. This is a
* little bit faster than SingleStep, and should be pretty much equivalent. The number of
* instructions chosen is fairly arbitrary, however a large number will more drastically affect the
* frequency of GSP interrupts and likely break things. The point of this is to just loop in the CPU
* for more than 1 instruction to reduce overhead and make it a little bit faster...
*/
void RunLoop(int tight_loop=100);
/// Step the CPU one instruction /// Step the CPU one instruction
void SingleStep(); void SingleStep();

View file

@ -24,6 +24,7 @@ Regs g_regs;
u32 g_cur_line = 0; ///< Current vertical screen line u32 g_cur_line = 0; ///< Current vertical screen line
u64 g_last_line_ticks = 0; ///< CPU tick count from last vertical screen line u64 g_last_line_ticks = 0; ///< CPU tick count from last vertical screen line
u64 g_last_frame_ticks = 0; ///< CPU tick count from last frame
template <typename T> template <typename T>
inline void Read(T &var, const u32 raw_addr) { inline void Read(T &var, const u32 raw_addr) {
@ -179,27 +180,44 @@ void Update() {
auto& framebuffer_top = g_regs.framebuffer_config[0]; auto& framebuffer_top = g_regs.framebuffer_config[0];
u64 current_ticks = Core::g_app_core->GetTicks(); u64 current_ticks = Core::g_app_core->GetTicks();
// Synchronize line... // Update the frame after a certain number of CPU ticks have elapsed. This assumes that the
if ((current_ticks - g_last_line_ticks) >= GPU::kFrameTicks / framebuffer_top.height) { // active frame in memory is always complete to render. There also may be issues with this
GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PDC0); // becoming out-of-synch with GSP synchrinization code (as follows). At this time, this seems to
g_cur_line++; // be the most effective solution for both homebrew and retail applications. With retail, this
g_last_line_ticks = current_ticks; // could be moved below (and probably would guarantee more accurate synchronization). However,
// primitive homebrew relies on a vertical blank interrupt to happen inevitably (regardless of a
// threading reschedule).
if ((current_ticks - g_last_frame_ticks) > GPU::kFrameTicks) {
VideoCore::g_renderer->SwapBuffers();
g_last_frame_ticks = current_ticks;
} }
// Synchronize frame... // Synchronize GPU on a thread reschedule: Because we cannot accurately predict a vertical
if (g_cur_line >= framebuffer_top.height) { // blank, we need to simulate it. Based on testing, it seems that retail applications work more
g_cur_line = 0; // accurately when this is signalled between thread switches.
GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PDC1);
VideoCore::g_renderer->SwapBuffers(); if (HLE::g_reschedule) {
Kernel::WaitCurrentThread(WAITTYPE_VBLANK);
HLE::Reschedule(__func__); // Synchronize line...
if ((current_ticks - g_last_line_ticks) >= GPU::kFrameTicks / framebuffer_top.height) {
GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PDC0);
g_cur_line++;
g_last_line_ticks = current_ticks;
}
// Synchronize frame...
if (g_cur_line >= framebuffer_top.height) {
g_cur_line = 0;
GSP_GPU::SignalInterrupt(GSP_GPU::InterruptId::PDC1);
}
} }
} }
/// Initialize hardware /// Initialize hardware
void Init() { void Init() {
g_cur_line = 0; g_cur_line = 0;
g_last_line_ticks = Core::g_app_core->GetTicks(); g_last_frame_ticks = g_last_line_ticks = Core::g_app_core->GetTicks();
auto& framebuffer_top = g_regs.framebuffer_config[0]; auto& framebuffer_top = g_regs.framebuffer_config[0];
auto& framebuffer_sub = g_regs.framebuffer_config[1]; auto& framebuffer_sub = g_regs.framebuffer_config[1];