diff --git a/src/common/threadsafe_queue.h b/src/common/threadsafe_queue.h index a4647314a..ad04df8ca 100644 --- a/src/common/threadsafe_queue.h +++ b/src/common/threadsafe_queue.h @@ -83,11 +83,15 @@ public: return true; } - T PopWait() { + void Wait() { if (Empty()) { std::unique_lock lock{cv_mutex}; cv.wait(lock, [this]() { return !Empty(); }); } + } + + T PopWait() { + Wait(); T t; Pop(t); return t; @@ -156,6 +160,10 @@ public: return spsc_queue.Pop(t); } + void Wait() { + spsc_queue.Wait(); + } + T PopWait() { return spsc_queue.PopWait(); } diff --git a/src/core/core.cpp b/src/core/core.cpp index 305f56ff1..56b47e671 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp @@ -296,7 +296,7 @@ struct System::Impl { exit_lock = false; if (gpu_core) { - gpu_core->WaitIdle(); + gpu_core->ShutDown(); } services.reset(); diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index c61f44619..009c6f574 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -517,8 +517,8 @@ void GPU::TriggerCpuInterrupt(const u32 syncpoint_id, const u32 value) const { interrupt_manager.GPUInterruptSyncpt(syncpoint_id, value); } -void GPU::WaitIdle() const { - gpu_thread.WaitIdle(); +void GPU::ShutDown() { + gpu_thread.ShutDown(); } void GPU::OnCommandListEnd() { diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index b2ee45496..ecab35d3b 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -219,8 +219,8 @@ public: return *shader_notify; } - // Waits for the GPU to finish working - void WaitIdle() const; + // Stops the GPU execution and waits for the GPU to finish working + void ShutDown(); /// Allows the CPU/NvFlinger to wait on the GPU before presenting a frame. void WaitFence(u32 syncpoint_id, u32 value); diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index 99353f15f..7addfbc7b 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp @@ -29,8 +29,7 @@ static void RunThread(Core::System& system, VideoCore::RendererBase& renderer, system.RegisterHostThread(); // Wait for first GPU command before acquiring the window context - while (state.queue.Empty()) - ; + state.queue.Wait(); // If emulation was stopped during disk shader loading, abort before trying to acquire context if (!state.is_running) { @@ -57,11 +56,17 @@ static void RunThread(Core::System& system, VideoCore::RendererBase& renderer, } else if (const auto* invalidate = std::get_if(&next.data)) { rasterizer->OnCPUWrite(invalidate->addr, invalidate->size); } else if (std::holds_alternative(next.data)) { - return; + ASSERT(state.is_running == false); } else { UNREACHABLE(); } state.signaled_fence.store(next.fence); + if (next.block) { + // We have to lock the write_lock to ensure that the condition_variable wait not get a + // race between the check and the lock itself. + std::lock_guard lk(state.write_lock); + state.cv.notify_all(); + } } } @@ -69,13 +74,7 @@ ThreadManager::ThreadManager(Core::System& system_, bool is_async_) : system{system_}, is_async{is_async_} {} ThreadManager::~ThreadManager() { - if (!thread.joinable()) { - return; - } - - // Notify GPU thread that a shutdown is pending - PushCommand(EndProcessingCommand()); - thread.join(); + ShutDown(); } void ThreadManager::StartThread(VideoCore::RendererBase& renderer, @@ -112,9 +111,8 @@ void ThreadManager::FlushRegion(VAddr addr, u64 size) { case Settings::GPUAccuracy::Extreme: { auto& gpu = system.GPU(); u64 fence = gpu.RequestFlush(addr, size); - PushCommand(GPUTickCommand()); - while (fence > gpu.CurrentFlushRequestFence()) { - } + PushCommand(GPUTickCommand(), true); + ASSERT(fence <= gpu.CurrentFlushRequestFence()); break; } default: @@ -131,23 +129,45 @@ void ThreadManager::FlushAndInvalidateRegion(VAddr addr, u64 size) { rasterizer->OnCPUWrite(addr, size); } -void ThreadManager::WaitIdle() const { - while (state.last_fence > state.signaled_fence.load(std::memory_order_relaxed) && - system.IsPoweredOn()) { +void ThreadManager::ShutDown() { + if (!state.is_running) { + return; } + + { + std::lock_guard lk(state.write_lock); + state.is_running = false; + state.cv.notify_all(); + } + + if (!thread.joinable()) { + return; + } + + // Notify GPU thread that a shutdown is pending + PushCommand(EndProcessingCommand()); + thread.join(); } void ThreadManager::OnCommandListEnd() { PushCommand(OnCommandListEndCommand()); } -u64 ThreadManager::PushCommand(CommandData&& command_data) { - const u64 fence{++state.last_fence}; - state.queue.Push(CommandDataContainer(std::move(command_data), fence)); - +u64 ThreadManager::PushCommand(CommandData&& command_data, bool block) { if (!is_async) { // In synchronous GPU mode, block the caller until the command has executed - WaitIdle(); + block = true; + } + + std::unique_lock lk(state.write_lock); + const u64 fence{++state.last_fence}; + state.queue.Push(CommandDataContainer(std::move(command_data), fence, block)); + + if (block) { + state.cv.wait(lk, [this, fence] { + return fence <= state.signaled_fence.load(std::memory_order_relaxed) || + !state.is_running; + }); } return fence; diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h index 18269e51c..11a648f38 100644 --- a/src/video_core/gpu_thread.h +++ b/src/video_core/gpu_thread.h @@ -90,21 +90,24 @@ using CommandData = struct CommandDataContainer { CommandDataContainer() = default; - explicit CommandDataContainer(CommandData&& data_, u64 next_fence_) - : data{std::move(data_)}, fence{next_fence_} {} + explicit CommandDataContainer(CommandData&& data_, u64 next_fence_, bool block_) + : data{std::move(data_)}, fence{next_fence_}, block(block_) {} CommandData data; u64 fence{}; + bool block{}; }; /// Struct used to synchronize the GPU thread struct SynchState final { std::atomic_bool is_running{true}; - using CommandQueue = Common::MPSCQueue; + using CommandQueue = Common::SPSCQueue; + std::mutex write_lock; CommandQueue queue; u64 last_fence{}; std::atomic signaled_fence{}; + std::condition_variable cv; }; /// Class used to manage the GPU thread @@ -132,14 +135,14 @@ public: /// Notify rasterizer that any caches of the specified region should be flushed and invalidated void FlushAndInvalidateRegion(VAddr addr, u64 size); - // Wait until the gpu thread is idle. - void WaitIdle() const; + // Stops the GPU execution and waits for the GPU to finish working + void ShutDown(); void OnCommandListEnd(); private: /// Pushes a command to be executed by the GPU thread - u64 PushCommand(CommandData&& command_data); + u64 PushCommand(CommandData&& command_data, bool block = false); Core::System& system; const bool is_async;