From 405eae3734dd6bfb259df0afceecf4de1f1262ce Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Sun, 25 Jun 2023 18:59:33 -0400 Subject: [PATCH] shaders: Track local memory usage --- src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp | 4 ++++ src/shader_recompiler/shader_info.h | 1 + src/video_core/renderer_opengl/gl_compute_pipeline.cpp | 1 + src/video_core/renderer_opengl/gl_compute_pipeline.h | 5 +++++ src/video_core/renderer_opengl/gl_graphics_pipeline.cpp | 1 + src/video_core/renderer_opengl/gl_graphics_pipeline.h | 5 +++++ src/video_core/renderer_opengl/gl_rasterizer.cpp | 8 ++++++-- 7 files changed, 23 insertions(+), 2 deletions(-) diff --git a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp index 5a4195217..70292686f 100644 --- a/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp +++ b/src/shader_recompiler/ir_opt/collect_shader_info_pass.cpp @@ -424,6 +424,10 @@ void VisitUsages(Info& info, IR::Inst& inst) { info.used_constant_buffer_types |= IR::Type::U32 | IR::Type::U32x2; info.used_storage_buffer_types |= IR::Type::U32 | IR::Type::U32x2 | IR::Type::U32x4; break; + case IR::Opcode::LoadLocal: + case IR::Opcode::WriteLocal: + info.uses_local_memory = true; + break; default: break; } diff --git a/src/shader_recompiler/shader_info.h b/src/shader_recompiler/shader_info.h index d308db942..b4b4afd37 100644 --- a/src/shader_recompiler/shader_info.h +++ b/src/shader_recompiler/shader_info.h @@ -172,6 +172,7 @@ struct Info { bool stores_indexed_attributes{}; bool stores_global_memory{}; + bool uses_local_memory{}; bool uses_fp16{}; bool uses_fp64{}; diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp index 3151c0db8..f9ca55c36 100644 --- a/src/video_core/renderer_opengl/gl_compute_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.cpp @@ -63,6 +63,7 @@ ComputePipeline::ComputePipeline(const Device& device, TextureCache& texture_cac writes_global_memory = !use_storage_buffers && std::ranges::any_of(info.storage_buffers_descriptors, [](const auto& desc) { return desc.is_written; }); + uses_local_memory = info.uses_local_memory; if (force_context_flush) { std::scoped_lock lock{built_mutex}; built_fence.Create(); diff --git a/src/video_core/renderer_opengl/gl_compute_pipeline.h b/src/video_core/renderer_opengl/gl_compute_pipeline.h index 9bcc72b59..c26b4fa5e 100644 --- a/src/video_core/renderer_opengl/gl_compute_pipeline.h +++ b/src/video_core/renderer_opengl/gl_compute_pipeline.h @@ -59,6 +59,10 @@ public: return writes_global_memory; } + [[nodiscard]] bool UsesLocalMemory() const noexcept { + return uses_local_memory; + } + void SetEngine(Tegra::Engines::KeplerCompute* kepler_compute_, Tegra::MemoryManager* gpu_memory_) { kepler_compute = kepler_compute_; @@ -84,6 +88,7 @@ private: bool use_storage_buffers{}; bool writes_global_memory{}; + bool uses_local_memory{}; std::mutex built_mutex; std::condition_variable built_condvar; diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp index c58f760b8..23a48c6fe 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.cpp @@ -215,6 +215,7 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c writes_global_memory |= std::ranges::any_of( info.storage_buffers_descriptors, [](const auto& desc) { return desc.is_written; }); + uses_local_memory |= info.uses_local_memory; } ASSERT(num_textures <= MAX_TEXTURES); ASSERT(num_images <= MAX_IMAGES); diff --git a/src/video_core/renderer_opengl/gl_graphics_pipeline.h b/src/video_core/renderer_opengl/gl_graphics_pipeline.h index 7bab3be0a..7b3d7eae8 100644 --- a/src/video_core/renderer_opengl/gl_graphics_pipeline.h +++ b/src/video_core/renderer_opengl/gl_graphics_pipeline.h @@ -98,6 +98,10 @@ public: return writes_global_memory; } + [[nodiscard]] bool UsesLocalMemory() const noexcept { + return uses_local_memory; + } + [[nodiscard]] bool IsBuilt() noexcept; template @@ -146,6 +150,7 @@ private: bool use_storage_buffers{}; bool writes_global_memory{}; + bool uses_local_memory{}; static constexpr std::size_t XFB_ENTRY_STRIDE = 3; GLsizei num_xfb_attribs{}; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index d03288516..edf527f2d 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -222,7 +222,9 @@ void RasterizerOpenGL::PrepareDraw(bool is_indexed, Func&& draw_func) { gpu.TickWork(); std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; - program_manager.LocalMemoryWarmup(); + if (pipeline->UsesLocalMemory()) { + program_manager.LocalMemoryWarmup(); + } pipeline->SetEngine(maxwell3d, gpu_memory); pipeline->Configure(is_indexed); @@ -372,7 +374,9 @@ void RasterizerOpenGL::DispatchCompute() { if (!pipeline) { return; } - program_manager.LocalMemoryWarmup(); + if (pipeline->UsesLocalMemory()) { + program_manager.LocalMemoryWarmup(); + } pipeline->SetEngine(kepler_compute, gpu_memory); pipeline->Configure(); const auto& qmd{kepler_compute->launch_description};