diff --git a/src/core/hle/service/nvflinger/nvflinger.cpp b/src/core/hle/service/nvflinger/nvflinger.cpp index 3b251f8c8..86a90526c 100644 --- a/src/core/hle/service/nvflinger/nvflinger.cpp +++ b/src/core/hle/service/nvflinger/nvflinger.cpp @@ -36,6 +36,10 @@ NVFlinger::NVFlinger(Core::System& system) : system(system) { displays.emplace_back(3, "Internal", system); displays.emplace_back(4, "Null", system); + for (auto& display : displays) { + display.SignalVSyncEvent(); + } + // Schedule the screen composition events composition_event = system.CoreTiming().RegisterEvent( "ScreenComposition", [this](u64 userdata, s64 cycles_late) { @@ -173,7 +177,13 @@ void NVFlinger::Compose() { bool trigger_event = false; // Trigger vsync for this display at the end of drawing SCOPE_EXIT({ - if (trigger_event) { + // TODO(Blinkhawk): Correctly send buffers through nvflinger while + // loading the game thorugh the OS. + // During loading, the OS takes care of sending buffers to vsync, + // thus it triggers, since this is not properly emulated due to + // HLE complications, we allow it to signal until the game enqueues + // it's first buffer. + if (trigger_event || !first_buffer_enqueued) { display.SignalVSyncEvent(); } }); @@ -193,13 +203,20 @@ void NVFlinger::Compose() { if (!buffer) { // There was no queued buffer to draw, render previous frame - system.GetPerfStats().EndGameFrame(); system.GPU().SwapBuffers({}); continue; } const auto& igbp_buffer = buffer->get().igbp_buffer; trigger_event = true; + first_buffer_enqueued = true; + + const auto& gpu = system.GPU(); + const auto& multi_fence = buffer->get().multi_fence; + for (u32 fence_id = 0; fence_id < multi_fence.num_fences; fence_id++) { + const auto& fence = multi_fence.fences[fence_id]; + gpu.WaitFence(fence.id, fence.value); + } // Now send the buffer to the GPU for drawing. // TODO(Subv): Support more than just disp0. The display device selection is probably based diff --git a/src/core/hle/service/nvflinger/nvflinger.h b/src/core/hle/service/nvflinger/nvflinger.h index 5d7e3bfb8..95d7278f5 100644 --- a/src/core/hle/service/nvflinger/nvflinger.h +++ b/src/core/hle/service/nvflinger/nvflinger.h @@ -102,6 +102,8 @@ private: u32 swap_interval = 1; + bool first_buffer_enqueued{}; + /// Event that handles screen composition. Core::Timing::EventType* composition_event; diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 76cfe8107..d94be9c9d 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -3,6 +3,7 @@ // Refer to the license.txt file included. #include "common/assert.h" +#include "common/microprofile.h" #include "core/core.h" #include "core/core_timing.h" #include "core/memory.h" @@ -17,6 +18,8 @@ namespace Tegra { +MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192)); + GPU::GPU(Core::System& system, VideoCore::RendererBase& renderer, bool is_async) : system{system}, renderer{renderer}, is_async{is_async} { auto& rasterizer{renderer.Rasterizer()}; @@ -63,6 +66,16 @@ const DmaPusher& GPU::DmaPusher() const { return *dma_pusher; } +void GPU::WaitFence(u32 syncpoint_id, u32 value) const { + // Synced GPU, is always in sync + if (!is_async) { + return; + } + MICROPROFILE_SCOPE(GPU_wait); + while (syncpoints[syncpoint_id].load() < value) { + } +} + void GPU::IncrementSyncPoint(const u32 syncpoint_id) { syncpoints[syncpoint_id]++; std::lock_guard lock{sync_mutex}; diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 29fa8e95b..e20b0687a 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -177,6 +177,9 @@ public: /// Returns a reference to the GPU DMA pusher. Tegra::DmaPusher& DmaPusher(); + /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame. + void WaitFence(u32 syncpoint_id, u32 value) const; + void IncrementSyncPoint(u32 syncpoint_id); u32 GetSyncpointValue(u32 syncpoint_id) const; diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index 5f039e4fd..d7048b6ae 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp @@ -5,8 +5,6 @@ #include "common/assert.h" #include "common/microprofile.h" #include "core/core.h" -#include "core/core_timing.h" -#include "core/core_timing_util.h" #include "core/frontend/scope_acquire_window_context.h" #include "video_core/dma_pusher.h" #include "video_core/gpu.h" @@ -68,14 +66,10 @@ ThreadManager::~ThreadManager() { void ThreadManager::StartThread(VideoCore::RendererBase& renderer, Tegra::DmaPusher& dma_pusher) { thread = std::thread{RunThread, std::ref(renderer), std::ref(dma_pusher), std::ref(state)}; - synchronization_event = system.CoreTiming().RegisterEvent( - "GPUThreadSynch", [this](u64 fence, s64) { state.WaitForSynchronization(fence); }); } void ThreadManager::SubmitList(Tegra::CommandList&& entries) { - const u64 fence{PushCommand(SubmitListCommand(std::move(entries)))}; - const s64 synchronization_ticks{Core::Timing::usToCycles(std::chrono::microseconds{9000})}; - system.CoreTiming().ScheduleEvent(synchronization_ticks, synchronization_event, fence); + PushCommand(SubmitListCommand(std::move(entries))); } void ThreadManager::SwapBuffers(const Tegra::FramebufferConfig* framebuffer) { @@ -102,10 +96,4 @@ u64 ThreadManager::PushCommand(CommandData&& command_data) { return fence; } -MICROPROFILE_DEFINE(GPU_wait, "GPU", "Wait for the GPU", MP_RGB(128, 128, 192)); -void SynchState::WaitForSynchronization(u64 fence) { - while (signaled_fence.load() < fence) - ; -} - } // namespace VideoCommon::GPUThread diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h index 3ae0ec9f3..108f456bd 100644 --- a/src/video_core/gpu_thread.h +++ b/src/video_core/gpu_thread.h @@ -21,9 +21,6 @@ class DmaPusher; namespace Core { class System; -namespace Timing { -struct EventType; -} // namespace Timing } // namespace Core namespace VideoCommon::GPUThread { @@ -89,8 +86,6 @@ struct CommandDataContainer { struct SynchState final { std::atomic_bool is_running{true}; - void WaitForSynchronization(u64 fence); - using CommandQueue = Common::SPSCQueue; CommandQueue queue; u64 last_fence{}; @@ -128,7 +123,6 @@ private: private: SynchState state; Core::System& system; - Core::Timing::EventType* synchronization_event{}; std::thread thread; std::thread::id thread_id; };