Merge pull request #4443 from ameerj/vk-async-shaders

vulkan_renderer: Async shader/graphics pipeline compilation
This commit is contained in:
David 2020-08-17 15:06:11 +10:00 committed by GitHub
commit cbaf1bc711
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
15 changed files with 210 additions and 88 deletions

View file

@ -177,15 +177,7 @@ RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWind
} }
if (device.UseAsynchronousShaders()) { if (device.UseAsynchronousShaders()) {
// Max worker threads we should allow async_shaders.AllocateWorkers();
constexpr u32 MAX_THREADS = 4;
// Deduce how many threads we can use
const u32 threads_used = std::thread::hardware_concurrency() / 4;
// Always allow at least 1 thread regardless of our settings
const auto max_worker_count = std::max(1U, threads_used);
// Don't use more than MAX_THREADS
const auto worker_count = std::min(max_worker_count, MAX_THREADS);
async_shaders.AllocateWorkers(worker_count);
} }
} }

View file

@ -382,6 +382,8 @@ bool VKDevice::Create() {
graphics_queue = logical.GetQueue(graphics_family); graphics_queue = logical.GetQueue(graphics_family);
present_queue = logical.GetQueue(present_family); present_queue = logical.GetQueue(present_family);
use_asynchronous_shaders = Settings::values.use_asynchronous_shaders.GetValue();
return true; return true;
} }

View file

@ -202,6 +202,11 @@ public:
return reported_extensions; return reported_extensions;
} }
/// Returns true if the setting for async shader compilation is enabled.
bool UseAsynchronousShaders() const {
return use_asynchronous_shaders;
}
/// Checks if the physical device is suitable. /// Checks if the physical device is suitable.
static bool IsSuitable(vk::PhysicalDevice physical, VkSurfaceKHR surface); static bool IsSuitable(vk::PhysicalDevice physical, VkSurfaceKHR surface);
@ -252,6 +257,9 @@ private:
bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state. bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state.
bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config. bool nv_device_diagnostics_config{}; ///< Support for VK_NV_device_diagnostics_config.
// Asynchronous Graphics Pipeline setting
bool use_asynchronous_shaders{}; ///< Setting to use asynchronous shaders/graphics pipeline
// Telemetry parameters // Telemetry parameters
std::string vendor_name; ///< Device's driver name. std::string vendor_name; ///< Device's driver name.
std::vector<std::string> reported_extensions; ///< Reported Vulkan extensions. std::vector<std::string> reported_extensions; ///< Reported Vulkan extensions.

View file

@ -29,7 +29,7 @@ void InnerFence::Queue() {
} }
ASSERT(!event); ASSERT(!event);
event = device.GetLogical().CreateEvent(); event = device.GetLogical().CreateNewEvent();
ticks = scheduler.Ticks(); ticks = scheduler.Ticks();
scheduler.RequestOutsideRenderPassOperationContext(); scheduler.RequestOutsideRenderPassOperationContext();

View file

@ -78,15 +78,14 @@ VKGraphicsPipeline::VKGraphicsPipeline(const VKDevice& device, VKScheduler& sche
const GraphicsPipelineCacheKey& key, const GraphicsPipelineCacheKey& key,
vk::Span<VkDescriptorSetLayoutBinding> bindings, vk::Span<VkDescriptorSetLayoutBinding> bindings,
const SPIRVProgram& program) const SPIRVProgram& program)
: device{device}, scheduler{scheduler}, fixed_state{key.fixed_state}, hash{key.Hash()}, : device{device}, scheduler{scheduler}, cache_key{key}, hash{cache_key.Hash()},
descriptor_set_layout{CreateDescriptorSetLayout(bindings)}, descriptor_set_layout{CreateDescriptorSetLayout(bindings)},
descriptor_allocator{descriptor_pool, *descriptor_set_layout}, descriptor_allocator{descriptor_pool, *descriptor_set_layout},
update_descriptor_queue{update_descriptor_queue}, layout{CreatePipelineLayout()}, update_descriptor_queue{update_descriptor_queue}, layout{CreatePipelineLayout()},
descriptor_template{CreateDescriptorUpdateTemplate(program)}, modules{CreateShaderModules( descriptor_template{CreateDescriptorUpdateTemplate(program)}, modules{CreateShaderModules(
program)}, program)},
renderpass{renderpass_cache.GetRenderPass(key.renderpass_params)}, pipeline{CreatePipeline( renderpass{renderpass_cache.GetRenderPass(cache_key.renderpass_params)},
key.renderpass_params, pipeline{CreatePipeline(cache_key.renderpass_params, program)} {}
program)} {}
VKGraphicsPipeline::~VKGraphicsPipeline() = default; VKGraphicsPipeline::~VKGraphicsPipeline() = default;
@ -181,7 +180,7 @@ std::vector<vk::ShaderModule> VKGraphicsPipeline::CreateShaderModules(
vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpass_params, vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpass_params,
const SPIRVProgram& program) const { const SPIRVProgram& program) const {
const auto& state = fixed_state; const auto& state = cache_key.fixed_state;
const auto& viewport_swizzles = state.viewport_swizzles; const auto& viewport_swizzles = state.viewport_swizzles;
FixedPipelineState::DynamicState dynamic; FixedPipelineState::DynamicState dynamic;

View file

@ -19,7 +19,27 @@ namespace Vulkan {
using Maxwell = Tegra::Engines::Maxwell3D::Regs; using Maxwell = Tegra::Engines::Maxwell3D::Regs;
struct GraphicsPipelineCacheKey; struct GraphicsPipelineCacheKey {
RenderPassParams renderpass_params;
u32 padding;
std::array<GPUVAddr, Maxwell::MaxShaderProgram> shaders;
FixedPipelineState fixed_state;
std::size_t Hash() const noexcept;
bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept;
bool operator!=(const GraphicsPipelineCacheKey& rhs) const noexcept {
return !operator==(rhs);
}
std::size_t Size() const noexcept {
return sizeof(renderpass_params) + sizeof(padding) + sizeof(shaders) + fixed_state.Size();
}
};
static_assert(std::has_unique_object_representations_v<GraphicsPipelineCacheKey>);
static_assert(std::is_trivially_copyable_v<GraphicsPipelineCacheKey>);
static_assert(std::is_trivially_constructible_v<GraphicsPipelineCacheKey>);
class VKDescriptorPool; class VKDescriptorPool;
class VKDevice; class VKDevice;
@ -54,6 +74,10 @@ public:
return renderpass; return renderpass;
} }
GraphicsPipelineCacheKey GetCacheKey() const {
return cache_key;
}
private: private:
vk::DescriptorSetLayout CreateDescriptorSetLayout( vk::DescriptorSetLayout CreateDescriptorSetLayout(
vk::Span<VkDescriptorSetLayoutBinding> bindings) const; vk::Span<VkDescriptorSetLayoutBinding> bindings) const;
@ -70,7 +94,7 @@ private:
const VKDevice& device; const VKDevice& device;
VKScheduler& scheduler; VKScheduler& scheduler;
const FixedPipelineState fixed_state; const GraphicsPipelineCacheKey cache_key;
const u64 hash; const u64 hash;
vk::DescriptorSetLayout descriptor_set_layout; vk::DescriptorSetLayout descriptor_set_layout;

View file

@ -28,6 +28,7 @@
#include "video_core/shader/compiler_settings.h" #include "video_core/shader/compiler_settings.h"
#include "video_core/shader/memory_util.h" #include "video_core/shader/memory_util.h"
#include "video_core/shader_cache.h" #include "video_core/shader_cache.h"
#include "video_core/shader_notify.h"
namespace Vulkan { namespace Vulkan {
@ -205,24 +206,43 @@ std::array<Shader*, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
return last_shaders = shaders; return last_shaders = shaders;
} }
VKGraphicsPipeline& VKPipelineCache::GetGraphicsPipeline(const GraphicsPipelineCacheKey& key) { VKGraphicsPipeline* VKPipelineCache::GetGraphicsPipeline(
const GraphicsPipelineCacheKey& key, VideoCommon::Shader::AsyncShaders& async_shaders) {
MICROPROFILE_SCOPE(Vulkan_PipelineCache); MICROPROFILE_SCOPE(Vulkan_PipelineCache);
if (last_graphics_pipeline && last_graphics_key == key) { if (last_graphics_pipeline && last_graphics_key == key) {
return *last_graphics_pipeline; return last_graphics_pipeline;
} }
last_graphics_key = key; last_graphics_key = key;
if (device.UseAsynchronousShaders() && async_shaders.IsShaderAsync(system.GPU())) {
std::unique_lock lock{pipeline_cache};
const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key);
if (is_cache_miss) {
system.GPU().ShaderNotify().MarkSharderBuilding();
LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash());
const auto [program, bindings] = DecompileShaders(key.fixed_state);
async_shaders.QueueVulkanShader(this, device, scheduler, descriptor_pool,
update_descriptor_queue, renderpass_cache, bindings,
program, key);
}
last_graphics_pipeline = pair->second.get();
return last_graphics_pipeline;
}
const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key); const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key);
auto& entry = pair->second; auto& entry = pair->second;
if (is_cache_miss) { if (is_cache_miss) {
system.GPU().ShaderNotify().MarkSharderBuilding();
LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash()); LOG_INFO(Render_Vulkan, "Compile 0x{:016X}", key.Hash());
const auto [program, bindings] = DecompileShaders(key); const auto [program, bindings] = DecompileShaders(key.fixed_state);
entry = std::make_unique<VKGraphicsPipeline>(device, scheduler, descriptor_pool, entry = std::make_unique<VKGraphicsPipeline>(device, scheduler, descriptor_pool,
update_descriptor_queue, renderpass_cache, key, update_descriptor_queue, renderpass_cache, key,
bindings, program); bindings, program);
system.GPU().ShaderNotify().MarkShaderComplete();
} }
return *(last_graphics_pipeline = entry.get()); last_graphics_pipeline = entry.get();
return last_graphics_pipeline;
} }
VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCacheKey& key) { VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCacheKey& key) {
@ -277,6 +297,12 @@ VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCach
return *entry; return *entry;
} }
void VKPipelineCache::EmplacePipeline(std::unique_ptr<VKGraphicsPipeline> pipeline) {
system.GPU().ShaderNotify().MarkShaderComplete();
std::unique_lock lock{pipeline_cache};
graphics_cache.at(pipeline->GetCacheKey()) = std::move(pipeline);
}
void VKPipelineCache::OnShaderRemoval(Shader* shader) { void VKPipelineCache::OnShaderRemoval(Shader* shader) {
bool finished = false; bool finished = false;
const auto Finish = [&] { const auto Finish = [&] {
@ -312,8 +338,7 @@ void VKPipelineCache::OnShaderRemoval(Shader* shader) {
} }
std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>>
VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) { VKPipelineCache::DecompileShaders(const FixedPipelineState& fixed_state) {
const auto& fixed_state = key.fixed_state;
auto& memory_manager = system.GPU().MemoryManager(); auto& memory_manager = system.GPU().MemoryManager();
const auto& gpu = system.GPU().Maxwell3D(); const auto& gpu = system.GPU().Maxwell3D();

View file

@ -22,6 +22,7 @@
#include "video_core/renderer_vulkan/vk_renderpass_cache.h" #include "video_core/renderer_vulkan/vk_renderpass_cache.h"
#include "video_core/renderer_vulkan/vk_shader_decompiler.h" #include "video_core/renderer_vulkan/vk_shader_decompiler.h"
#include "video_core/renderer_vulkan/wrapper.h" #include "video_core/renderer_vulkan/wrapper.h"
#include "video_core/shader/async_shaders.h"
#include "video_core/shader/memory_util.h" #include "video_core/shader/memory_util.h"
#include "video_core/shader/registry.h" #include "video_core/shader/registry.h"
#include "video_core/shader/shader_ir.h" #include "video_core/shader/shader_ir.h"
@ -43,28 +44,6 @@ class VKUpdateDescriptorQueue;
using Maxwell = Tegra::Engines::Maxwell3D::Regs; using Maxwell = Tegra::Engines::Maxwell3D::Regs;
struct GraphicsPipelineCacheKey {
RenderPassParams renderpass_params;
u32 padding;
std::array<GPUVAddr, Maxwell::MaxShaderProgram> shaders;
FixedPipelineState fixed_state;
std::size_t Hash() const noexcept;
bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept;
bool operator!=(const GraphicsPipelineCacheKey& rhs) const noexcept {
return !operator==(rhs);
}
std::size_t Size() const noexcept {
return sizeof(renderpass_params) + sizeof(padding) + sizeof(shaders) + fixed_state.Size();
}
};
static_assert(std::has_unique_object_representations_v<GraphicsPipelineCacheKey>);
static_assert(std::is_trivially_copyable_v<GraphicsPipelineCacheKey>);
static_assert(std::is_trivially_constructible_v<GraphicsPipelineCacheKey>);
struct ComputePipelineCacheKey { struct ComputePipelineCacheKey {
GPUVAddr shader; GPUVAddr shader;
u32 shared_memory_size; u32 shared_memory_size;
@ -152,16 +131,19 @@ public:
std::array<Shader*, Maxwell::MaxShaderProgram> GetShaders(); std::array<Shader*, Maxwell::MaxShaderProgram> GetShaders();
VKGraphicsPipeline& GetGraphicsPipeline(const GraphicsPipelineCacheKey& key); VKGraphicsPipeline* GetGraphicsPipeline(const GraphicsPipelineCacheKey& key,
VideoCommon::Shader::AsyncShaders& async_shaders);
VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key); VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key);
void EmplacePipeline(std::unique_ptr<VKGraphicsPipeline> pipeline);
protected: protected:
void OnShaderRemoval(Shader* shader) final; void OnShaderRemoval(Shader* shader) final;
private: private:
std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> DecompileShaders( std::pair<SPIRVProgram, std::vector<VkDescriptorSetLayoutBinding>> DecompileShaders(
const GraphicsPipelineCacheKey& key); const FixedPipelineState& fixed_state);
Core::System& system; Core::System& system;
const VKDevice& device; const VKDevice& device;
@ -178,6 +160,7 @@ private:
GraphicsPipelineCacheKey last_graphics_key; GraphicsPipelineCacheKey last_graphics_key;
VKGraphicsPipeline* last_graphics_pipeline = nullptr; VKGraphicsPipeline* last_graphics_pipeline = nullptr;
std::mutex pipeline_cache;
std::unordered_map<GraphicsPipelineCacheKey, std::unique_ptr<VKGraphicsPipeline>> std::unordered_map<GraphicsPipelineCacheKey, std::unique_ptr<VKGraphicsPipeline>>
graphics_cache; graphics_cache;
std::unordered_map<ComputePipelineCacheKey, std::unique_ptr<VKComputePipeline>> compute_cache; std::unordered_map<ComputePipelineCacheKey, std::unique_ptr<VKComputePipeline>> compute_cache;

View file

@ -14,6 +14,7 @@
#include "common/assert.h" #include "common/assert.h"
#include "common/logging/log.h" #include "common/logging/log.h"
#include "common/microprofile.h" #include "common/microprofile.h"
#include "common/scope_exit.h"
#include "core/core.h" #include "core/core.h"
#include "core/settings.h" #include "core/settings.h"
#include "video_core/engines/kepler_compute.h" #include "video_core/engines/kepler_compute.h"
@ -400,8 +401,12 @@ RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWind
buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool), buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool),
sampler_cache(device), sampler_cache(device),
fence_manager(system, *this, device, scheduler, texture_cache, buffer_cache, query_cache), fence_manager(system, *this, device, scheduler, texture_cache, buffer_cache, query_cache),
query_cache(system, *this, device, scheduler), wfi_event{device.GetLogical().CreateEvent()} { query_cache(system, *this, device, scheduler),
wfi_event{device.GetLogical().CreateNewEvent()}, async_shaders{renderer} {
scheduler.SetQueryCache(query_cache); scheduler.SetQueryCache(query_cache);
if (device.UseAsynchronousShaders()) {
async_shaders.AllocateWorkers();
}
} }
RasterizerVulkan::~RasterizerVulkan() = default; RasterizerVulkan::~RasterizerVulkan() = default;
@ -413,6 +418,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
query_cache.UpdateCounters(); query_cache.UpdateCounters();
SCOPE_EXIT({ system.GPU().TickWork(); });
const auto& gpu = system.GPU().Maxwell3D(); const auto& gpu = system.GPU().Maxwell3D();
GraphicsPipelineCacheKey key; GraphicsPipelineCacheKey key;
key.fixed_state.Fill(gpu.regs, device.IsExtExtendedDynamicStateSupported()); key.fixed_state.Fill(gpu.regs, device.IsExtExtendedDynamicStateSupported());
@ -439,10 +446,15 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
key.renderpass_params = GetRenderPassParams(texceptions); key.renderpass_params = GetRenderPassParams(texceptions);
key.padding = 0; key.padding = 0;
auto& pipeline = pipeline_cache.GetGraphicsPipeline(key); auto* pipeline = pipeline_cache.GetGraphicsPipeline(key, async_shaders);
scheduler.BindGraphicsPipeline(pipeline.GetHandle()); if (pipeline == nullptr || pipeline->GetHandle() == VK_NULL_HANDLE) {
// Async graphics pipeline was not ready.
return;
}
const auto renderpass = pipeline.GetRenderPass(); scheduler.BindGraphicsPipeline(pipeline->GetHandle());
const auto renderpass = pipeline->GetRenderPass();
const auto [framebuffer, render_area] = ConfigureFramebuffers(renderpass); const auto [framebuffer, render_area] = ConfigureFramebuffers(renderpass);
scheduler.RequestRenderpass(renderpass, framebuffer, render_area); scheduler.RequestRenderpass(renderpass, framebuffer, render_area);
@ -452,8 +464,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
BeginTransformFeedback(); BeginTransformFeedback();
const auto pipeline_layout = pipeline.GetLayout(); const auto pipeline_layout = pipeline->GetLayout();
const auto descriptor_set = pipeline.CommitDescriptorSet(); const auto descriptor_set = pipeline->CommitDescriptorSet();
scheduler.Record([pipeline_layout, descriptor_set, draw_params](vk::CommandBuffer cmdbuf) { scheduler.Record([pipeline_layout, descriptor_set, draw_params](vk::CommandBuffer cmdbuf) {
if (descriptor_set) { if (descriptor_set) {
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout,
@ -463,8 +475,6 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
}); });
EndTransformFeedback(); EndTransformFeedback();
system.GPU().TickWork();
} }
void RasterizerVulkan::Clear() { void RasterizerVulkan::Clear() {

View file

@ -32,6 +32,7 @@
#include "video_core/renderer_vulkan/vk_texture_cache.h" #include "video_core/renderer_vulkan/vk_texture_cache.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h" #include "video_core/renderer_vulkan/vk_update_descriptor.h"
#include "video_core/renderer_vulkan/wrapper.h" #include "video_core/renderer_vulkan/wrapper.h"
#include "video_core/shader/async_shaders.h"
namespace Core { namespace Core {
class System; class System;
@ -136,6 +137,14 @@ public:
u32 pixel_stride) override; u32 pixel_stride) override;
void SetupDirtyFlags() override; void SetupDirtyFlags() override;
VideoCommon::Shader::AsyncShaders& GetAsyncShaders() {
return async_shaders;
}
const VideoCommon::Shader::AsyncShaders& GetAsyncShaders() const {
return async_shaders;
}
/// Maximum supported size that a constbuffer can have in bytes. /// Maximum supported size that a constbuffer can have in bytes.
static constexpr std::size_t MaxConstbufferSize = 0x10000; static constexpr std::size_t MaxConstbufferSize = 0x10000;
static_assert(MaxConstbufferSize % (4 * sizeof(float)) == 0, static_assert(MaxConstbufferSize % (4 * sizeof(float)) == 0,
@ -297,6 +306,7 @@ private:
vk::Buffer default_buffer; vk::Buffer default_buffer;
VKMemoryCommit default_buffer_commit; VKMemoryCommit default_buffer_commit;
vk::Event wfi_event; vk::Event wfi_event;
VideoCommon::Shader::AsyncShaders async_shaders;
std::array<View, Maxwell::NumRenderTargets> color_attachments; std::array<View, Maxwell::NumRenderTargets> color_attachments;
View zeta_attachment; View zeta_attachment;

View file

@ -644,7 +644,7 @@ ShaderModule Device::CreateShaderModule(const VkShaderModuleCreateInfo& ci) cons
return ShaderModule(object, handle, *dld); return ShaderModule(object, handle, *dld);
} }
Event Device::CreateEvent() const { Event Device::CreateNewEvent() const {
static constexpr VkEventCreateInfo ci{ static constexpr VkEventCreateInfo ci{
.sType = VK_STRUCTURE_TYPE_EVENT_CREATE_INFO, .sType = VK_STRUCTURE_TYPE_EVENT_CREATE_INFO,
.pNext = nullptr, .pNext = nullptr,

View file

@ -721,7 +721,7 @@ public:
ShaderModule CreateShaderModule(const VkShaderModuleCreateInfo& ci) const; ShaderModule CreateShaderModule(const VkShaderModuleCreateInfo& ci) const;
Event CreateEvent() const; Event CreateNewEvent() const;
SwapchainKHR CreateSwapchainKHR(const VkSwapchainCreateInfoKHR& ci) const; SwapchainKHR CreateSwapchainKHR(const VkSwapchainCreateInfoKHR& ci) const;

View file

@ -2,7 +2,6 @@
// Licensed under GPLv2 or any later version // Licensed under GPLv2 or any later version
// Refer to the license.txt file included. // Refer to the license.txt file included.
#include <chrono>
#include <condition_variable> #include <condition_variable>
#include <mutex> #include <mutex>
#include <thread> #include <thread>
@ -20,9 +19,18 @@ AsyncShaders::~AsyncShaders() {
KillWorkers(); KillWorkers();
} }
void AsyncShaders::AllocateWorkers(std::size_t num_workers) { void AsyncShaders::AllocateWorkers() {
// If we're already have workers queued or don't want to queue workers, ignore // Max worker threads we should allow
if (num_workers == worker_threads.size() || num_workers == 0) { constexpr u32 MAX_THREADS = 4;
// Deduce how many threads we can use
const u32 threads_used = std::thread::hardware_concurrency() / 4;
// Always allow at least 1 thread regardless of our settings
const auto max_worker_count = std::max(1U, threads_used);
// Don't use more than MAX_THREADS
const auto num_workers = std::min(max_worker_count, MAX_THREADS);
// If we already have workers queued, ignore
if (num_workers == worker_threads.size()) {
return; return;
} }
@ -111,24 +119,50 @@ void AsyncShaders::QueueOpenGLShader(const OpenGL::Device& device,
VideoCommon::Shader::CompilerSettings compiler_settings, VideoCommon::Shader::CompilerSettings compiler_settings,
const VideoCommon::Shader::Registry& registry, const VideoCommon::Shader::Registry& registry,
VAddr cpu_addr) { VAddr cpu_addr) {
WorkerParams params{device.UseAssemblyShaders() ? AsyncShaders::Backend::GLASM WorkerParams params{
: AsyncShaders::Backend::OpenGL, .backend = device.UseAssemblyShaders() ? Backend::GLASM : Backend::OpenGL,
device, .device = &device,
shader_type, .shader_type = shader_type,
uid, .uid = uid,
std::move(code), .code = std::move(code),
std::move(code_b), .code_b = std::move(code_b),
main_offset, .main_offset = main_offset,
compiler_settings, .compiler_settings = compiler_settings,
registry, .registry = registry,
cpu_addr}; .cpu_address = cpu_addr,
};
std::unique_lock lock(queue_mutex); std::unique_lock lock(queue_mutex);
pending_queue.push_back(std::move(params)); pending_queue.push(std::move(params));
cv.notify_one();
}
void AsyncShaders::QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache,
const Vulkan::VKDevice& device, Vulkan::VKScheduler& scheduler,
Vulkan::VKDescriptorPool& descriptor_pool,
Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue,
Vulkan::VKRenderPassCache& renderpass_cache,
std::vector<VkDescriptorSetLayoutBinding> bindings,
Vulkan::SPIRVProgram program,
Vulkan::GraphicsPipelineCacheKey key) {
WorkerParams params{
.backend = Backend::Vulkan,
.pp_cache = pp_cache,
.vk_device = &device,
.scheduler = &scheduler,
.descriptor_pool = &descriptor_pool,
.update_descriptor_queue = &update_descriptor_queue,
.renderpass_cache = &renderpass_cache,
.bindings = bindings,
.program = program,
.key = key,
};
std::unique_lock lock(queue_mutex);
pending_queue.push(std::move(params));
cv.notify_one(); cv.notify_one();
} }
void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context) { void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context) {
using namespace std::chrono_literals;
while (!is_thread_exiting.load(std::memory_order_relaxed)) { while (!is_thread_exiting.load(std::memory_order_relaxed)) {
std::unique_lock lock{queue_mutex}; std::unique_lock lock{queue_mutex};
cv.wait(lock, [this] { return HasWorkQueued() || is_thread_exiting; }); cv.wait(lock, [this] { return HasWorkQueued() || is_thread_exiting; });
@ -144,18 +178,17 @@ void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context
if (pending_queue.empty()) { if (pending_queue.empty()) {
continue; continue;
} }
// Pull work from queue // Pull work from queue
WorkerParams work = std::move(pending_queue.front()); WorkerParams work = std::move(pending_queue.front());
pending_queue.pop_front(); pending_queue.pop();
lock.unlock(); lock.unlock();
if (work.backend == AsyncShaders::Backend::OpenGL || if (work.backend == Backend::OpenGL || work.backend == Backend::GLASM) {
work.backend == AsyncShaders::Backend::GLASM) { const ShaderIR ir(work.code, work.main_offset, work.compiler_settings, *work.registry);
const ShaderIR ir(work.code, work.main_offset, work.compiler_settings, work.registry);
const auto scope = context->Acquire(); const auto scope = context->Acquire();
auto program = auto program =
OpenGL::BuildShader(work.device, work.shader_type, work.uid, ir, work.registry); OpenGL::BuildShader(*work.device, work.shader_type, work.uid, ir, *work.registry);
Result result{}; Result result{};
result.backend = work.backend; result.backend = work.backend;
result.cpu_address = work.cpu_address; result.cpu_address = work.cpu_address;
@ -164,9 +197,9 @@ void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context
result.code_b = std::move(work.code_b); result.code_b = std::move(work.code_b);
result.shader_type = work.shader_type; result.shader_type = work.shader_type;
if (work.backend == AsyncShaders::Backend::OpenGL) { if (work.backend == Backend::OpenGL) {
result.program.opengl = std::move(program->source_program); result.program.opengl = std::move(program->source_program);
} else if (work.backend == AsyncShaders::Backend::GLASM) { } else if (work.backend == Backend::GLASM) {
result.program.glasm = std::move(program->assembly_program); result.program.glasm = std::move(program->assembly_program);
} }
@ -174,6 +207,13 @@ void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context
std::unique_lock complete_lock(completed_mutex); std::unique_lock complete_lock(completed_mutex);
finished_work.push_back(std::move(result)); finished_work.push_back(std::move(result));
} }
} else if (work.backend == Backend::Vulkan) {
auto pipeline = std::make_unique<Vulkan::VKGraphicsPipeline>(
*work.vk_device, *work.scheduler, *work.descriptor_pool,
*work.update_descriptor_queue, *work.renderpass_cache, work.key, work.bindings,
work.program);
work.pp_cache->EmplacePipeline(std::move(pipeline));
} }
} }
} }

View file

@ -14,6 +14,10 @@
#include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_shader_decompiler.h" #include "video_core/renderer_opengl/gl_shader_decompiler.h"
#include "video_core/renderer_vulkan/vk_device.h"
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
namespace Core::Frontend { namespace Core::Frontend {
class EmuWindow; class EmuWindow;
@ -24,6 +28,10 @@ namespace Tegra {
class GPU; class GPU;
} }
namespace Vulkan {
class VKPipelineCache;
}
namespace VideoCommon::Shader { namespace VideoCommon::Shader {
class AsyncShaders { class AsyncShaders {
@ -31,6 +39,7 @@ public:
enum class Backend { enum class Backend {
OpenGL, OpenGL,
GLASM, GLASM,
Vulkan,
}; };
struct ResultPrograms { struct ResultPrograms {
@ -52,7 +61,7 @@ public:
~AsyncShaders(); ~AsyncShaders();
/// Start up shader worker threads /// Start up shader worker threads
void AllocateWorkers(std::size_t num_workers); void AllocateWorkers();
/// Clear the shader queue and kill all worker threads /// Clear the shader queue and kill all worker threads
void FreeWorkers(); void FreeWorkers();
@ -76,6 +85,14 @@ public:
VideoCommon::Shader::CompilerSettings compiler_settings, VideoCommon::Shader::CompilerSettings compiler_settings,
const VideoCommon::Shader::Registry& registry, VAddr cpu_addr); const VideoCommon::Shader::Registry& registry, VAddr cpu_addr);
void QueueVulkanShader(Vulkan::VKPipelineCache* pp_cache, const Vulkan::VKDevice& device,
Vulkan::VKScheduler& scheduler,
Vulkan::VKDescriptorPool& descriptor_pool,
Vulkan::VKUpdateDescriptorQueue& update_descriptor_queue,
Vulkan::VKRenderPassCache& renderpass_cache,
std::vector<VkDescriptorSetLayoutBinding> bindings,
Vulkan::SPIRVProgram program, Vulkan::GraphicsPipelineCacheKey key);
private: private:
void ShaderCompilerThread(Core::Frontend::GraphicsContext* context); void ShaderCompilerThread(Core::Frontend::GraphicsContext* context);
@ -83,16 +100,28 @@ private:
bool HasWorkQueued(); bool HasWorkQueued();
struct WorkerParams { struct WorkerParams {
AsyncShaders::Backend backend; Backend backend;
OpenGL::Device device; // For OGL
const OpenGL::Device* device;
Tegra::Engines::ShaderType shader_type; Tegra::Engines::ShaderType shader_type;
u64 uid; u64 uid;
std::vector<u64> code; std::vector<u64> code;
std::vector<u64> code_b; std::vector<u64> code_b;
u32 main_offset; u32 main_offset;
VideoCommon::Shader::CompilerSettings compiler_settings; VideoCommon::Shader::CompilerSettings compiler_settings;
VideoCommon::Shader::Registry registry; std::optional<VideoCommon::Shader::Registry> registry;
VAddr cpu_address; VAddr cpu_address;
// For Vulkan
Vulkan::VKPipelineCache* pp_cache;
const Vulkan::VKDevice* vk_device;
Vulkan::VKScheduler* scheduler;
Vulkan::VKDescriptorPool* descriptor_pool;
Vulkan::VKUpdateDescriptorQueue* update_descriptor_queue;
Vulkan::VKRenderPassCache* renderpass_cache;
std::vector<VkDescriptorSetLayoutBinding> bindings;
Vulkan::SPIRVProgram program;
Vulkan::GraphicsPipelineCacheKey key;
}; };
std::condition_variable cv; std::condition_variable cv;
@ -101,7 +130,7 @@ private:
std::atomic<bool> is_thread_exiting{}; std::atomic<bool> is_thread_exiting{};
std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> context_list; std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> context_list;
std::vector<std::thread> worker_threads; std::vector<std::thread> worker_threads;
std::deque<WorkerParams> pending_queue; std::queue<WorkerParams> pending_queue;
std::vector<AsyncShaders::Result> finished_work; std::vector<AsyncShaders::Result> finished_work;
Core::Frontend::EmuWindow& emu_window; Core::Frontend::EmuWindow& emu_window;
}; };

View file

@ -92,7 +92,7 @@
<string>Enables asynchronous shader compilation, which may reduce shader stutter. This feature is experimental.</string> <string>Enables asynchronous shader compilation, which may reduce shader stutter. This feature is experimental.</string>
</property> </property>
<property name="text"> <property name="text">
<string>Use asynchronous shader building (experimental, OpenGL or Assembly shaders only)</string> <string>Use asynchronous shader building (experimental)</string>
</property> </property>
</widget> </widget>
</item> </item>