diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp index 2605c3b42..c297bc31b 100644 --- a/src/video_core/engines/maxwell_3d.cpp +++ b/src/video_core/engines/maxwell_3d.cpp @@ -397,14 +397,6 @@ void Maxwell3D::StampQueryResult(u64 payload, bool long_query) { } } -void Maxwell3D::ReleaseFences() { - for (const auto pair : delay_fences) { - const auto [addr, payload] = pair; - memory_manager.Write(addr, static_cast(payload)); - } - delay_fences.clear(); -} - void Maxwell3D::ProcessQueryGet() { // TODO(Subv): Support the other query units. ASSERT_MSG(regs.query.query_get.unit == Regs::QueryUnit::Crop, @@ -412,10 +404,12 @@ void Maxwell3D::ProcessQueryGet() { switch (regs.query.query_get.operation) { case Regs::QueryOperation::Release: { - rasterizer.FlushCommands(); - rasterizer.SyncGuestHost(); const u64 result = regs.query.query_sequence; - delay_fences.emplace_back(regs.query.QueryAddress(), result); + if (regs.query.query_get.fence == 1) { + rasterizer.SignalFence(regs.query.QueryAddress(), static_cast(result)); + } else { + StampQueryResult(result, regs.query.query_get.short_query == 0); + } break; } case Regs::QueryOperation::Acquire: diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 0a93827ec..59d5752d2 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -1427,8 +1427,6 @@ public: Tables tables{}; } dirty; - void ReleaseFences(); - private: void InitializeRegisterDefaults(); @@ -1469,8 +1467,6 @@ private: std::array dirty_pointers{}; - std::vector> delay_fences; - /// Retrieves information about a specific TIC entry from the TIC buffer. Texture::TICEntry GetTICEntry(u32 tic_index) const; diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp index 71ddfbd26..d05b6a9d2 100644 --- a/src/video_core/gpu.cpp +++ b/src/video_core/gpu.cpp @@ -147,7 +147,7 @@ void GPU::SyncGuestHost() { } void GPU::OnCommandListEnd() { - maxwell_3d->ReleaseFences(); + renderer.Rasterizer().ReleaseFences(); } // Note that, traditionally, methods are treated as 4-byte addressable locations, and hence // their numbers are written down multiplied by 4 in Docs. Here we are not multiply by 4. diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index b88445634..fa9991c87 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -157,7 +157,7 @@ public: void FlushCommands(); void SyncGuestHost(); - void OnCommandListEnd(); + virtual void OnCommandListEnd(); /// Returns a reference to the Maxwell3D GPU engine. Engines::Maxwell3D& Maxwell3D(); diff --git a/src/video_core/gpu_asynch.cpp b/src/video_core/gpu_asynch.cpp index 20e73a37e..53305ab43 100644 --- a/src/video_core/gpu_asynch.cpp +++ b/src/video_core/gpu_asynch.cpp @@ -52,4 +52,8 @@ void GPUAsynch::WaitIdle() const { gpu_thread.WaitIdle(); } +void GPUAsynch::OnCommandListEnd() { + gpu_thread.OnCommandListEnd(); +} + } // namespace VideoCommon diff --git a/src/video_core/gpu_asynch.h b/src/video_core/gpu_asynch.h index 03fd0eef0..517658612 100644 --- a/src/video_core/gpu_asynch.h +++ b/src/video_core/gpu_asynch.h @@ -32,6 +32,8 @@ public: void FlushAndInvalidateRegion(VAddr addr, u64 size) override; void WaitIdle() const override; + void OnCommandListEnd() override; + protected: void TriggerCpuInterrupt(u32 syncpoint_id, u32 value) const override; diff --git a/src/video_core/gpu_thread.cpp b/src/video_core/gpu_thread.cpp index 1994d3bb4..251a9d911 100644 --- a/src/video_core/gpu_thread.cpp +++ b/src/video_core/gpu_thread.cpp @@ -37,6 +37,8 @@ static void RunThread(VideoCore::RendererBase& renderer, Core::Frontend::Graphic dma_pusher.DispatchCalls(); } else if (const auto data = std::get_if(&next.data)) { renderer.SwapBuffers(data->framebuffer ? &*data->framebuffer : nullptr); + } else if (const auto data = std::get_if(&next.data)) { + renderer.Rasterizer().ReleaseFences(); } else if (const auto data = std::get_if(&next.data)) { renderer.Rasterizer().FlushRegion(data->addr, data->size); } else if (const auto data = std::get_if(&next.data)) { @@ -95,6 +97,10 @@ void ThreadManager::WaitIdle() const { } } +void ThreadManager::OnCommandListEnd() { + PushCommand(OnCommandListEndCommand()); +} + u64 ThreadManager::PushCommand(CommandData&& command_data) { const u64 fence{++state.last_fence}; state.queue.Push(CommandDataContainer(std::move(command_data), fence)); diff --git a/src/video_core/gpu_thread.h b/src/video_core/gpu_thread.h index cd74ad330..9d0877921 100644 --- a/src/video_core/gpu_thread.h +++ b/src/video_core/gpu_thread.h @@ -70,9 +70,12 @@ struct FlushAndInvalidateRegionCommand final { u64 size; }; +/// Command to signal to the GPU thread that processing has ended +struct OnCommandListEndCommand final {}; + using CommandData = std::variant; + InvalidateRegionCommand, FlushAndInvalidateRegionCommand, OnCommandListEndCommand>; struct CommandDataContainer { CommandDataContainer() = default; @@ -122,6 +125,8 @@ public: // Wait until the gpu thread is idle. void WaitIdle() const; + void OnCommandListEnd(); + private: /// Pushes a command to be executed by the GPU thread u64 PushCommand(CommandData&& command_data); diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 0d05a3fc7..72f65b166 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -49,6 +49,14 @@ public: /// Records a GPU query and caches it virtual void Query(GPUVAddr gpu_addr, QueryType type, std::optional timestamp) = 0; + virtual void SignalFence(GPUVAddr addr, u32 value) { + + } + + virtual void ReleaseFences() { + + } + /// Notify rasterizer that all caches should be flushed to Switch memory virtual void FlushAll() = 0; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 988eaeaa5..93bb33e8c 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -676,6 +676,34 @@ void RasterizerOpenGL::SyncGuestHost() { buffer_cache.SyncGuestHost(); } +void RasterizerOpenGL::SignalFence(GPUVAddr addr, u32 value) { + if (!fences.empty()) { + const std::pair& current_fence = fences.front(); + const auto [address, payload] = current_fence; + texture_cache.PopAsyncFlushes(); + auto& gpu{system.GPU()}; + auto& memory_manager{gpu.MemoryManager()}; + memory_manager.Write(address, payload); + fences.pop_front(); + } + fences.emplace_back(addr, value); + texture_cache.CommitAsyncFlushes(); + FlushCommands(); + SyncGuestHost(); +} + +void RasterizerOpenGL::ReleaseFences() { + while (!fences.empty()) { + const std::pair& current_fence = fences.front(); + const auto [address, payload] = current_fence; + texture_cache.PopAsyncFlushes(); + auto& gpu{system.GPU()}; + auto& memory_manager{gpu.MemoryManager()}; + memory_manager.Write(address, payload); + fences.pop_front(); + } +} + void RasterizerOpenGL::FlushAndInvalidateRegion(VAddr addr, u64 size) { if (Settings::IsGPULevelExtreme()) { FlushRegion(addr, size); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index a870024c6..486a154ad 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -69,6 +69,8 @@ public: void InvalidateRegion(VAddr addr, u64 size) override; void OnCPUWrite(VAddr addr, u64 size) override; void SyncGuestHost() override; + void SignalFence(GPUVAddr addr, u32 value) override; + void ReleaseFences() override; void FlushAndInvalidateRegion(VAddr addr, u64 size) override; void FlushCommands() override; void TickFrame() override; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index d8c8390bb..6629c59ed 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -238,7 +238,7 @@ public: surface->MarkAsRenderTarget(false, NO_RT); const auto& cr_params = surface->GetSurfaceParams(); if (!cr_params.is_tiled) { - FlushSurface(surface); + AsyncFlushSurface(surface); } } render_targets[index].target = surface_view.first; @@ -317,6 +317,26 @@ public: return ++ticks; } + void CommitAsyncFlushes() { + commited_flushes.push_back(uncommited_flushes); + uncommited_flushes.reset(); + } + + void PopAsyncFlushes() { + if (commited_flushes.empty()) { + return; + } + auto& flush_list = commited_flushes.front(); + if (!flush_list) { + commited_flushes.pop_front(); + return; + } + for (TSurface& surface : *flush_list) { + FlushSurface(surface); + } + commited_flushes.pop_front(); + } + protected: explicit TextureCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer, bool is_astc_supported) @@ -1152,6 +1172,13 @@ private: TView view; }; + void AsyncFlushSurface(TSurface& surface) { + if (!uncommited_flushes) { + uncommited_flushes = std::make_shared>(); + } + uncommited_flushes->push_back(surface); + } + VideoCore::RasterizerInterface& rasterizer; FormatLookupTable format_lookup_table; @@ -1198,6 +1225,9 @@ private: std::list marked_for_unregister; + std::shared_ptr> uncommited_flushes{}; + std::list>> commited_flushes; + StagingCache staging_cache; std::recursive_mutex mutex; };