Address feedback. Bruteforce delete duplicates

This commit is contained in:
ameerj 2020-07-30 15:41:11 -04:00
parent 6ac97405df
commit 4539073ce1
7 changed files with 115 additions and 79 deletions

View file

@ -54,7 +54,7 @@ public:
return renderpass; return renderpass;
} }
const GraphicsPipelineCacheKey& GetCacheKey() { const GraphicsPipelineCacheKey& GetCacheKey() const {
return m_key; return m_key;
} }

View file

@ -205,20 +205,20 @@ std::array<Shader*, Maxwell::MaxShaderProgram> VKPipelineCache::GetShaders() {
return last_shaders = shaders; return last_shaders = shaders;
} }
VKGraphicsPipeline& VKPipelineCache::GetGraphicsPipeline( VKGraphicsPipeline* VKPipelineCache::GetGraphicsPipeline(
const GraphicsPipelineCacheKey& key, VideoCommon::Shader::AsyncShaders& async_shaders) { const GraphicsPipelineCacheKey& key, VideoCommon::Shader::AsyncShaders& async_shaders) {
MICROPROFILE_SCOPE(Vulkan_PipelineCache); MICROPROFILE_SCOPE(Vulkan_PipelineCache);
if (last_graphics_pipeline && last_graphics_key == key) { if (last_graphics_pipeline && last_graphics_key == key) {
return *last_graphics_pipeline; return last_graphics_pipeline;
} }
last_graphics_key = key; last_graphics_key = key;
if (device.UseAsynchronousShaders()) { if (device.UseAsynchronousShaders()) {
auto work = async_shaders.GetCompletedWork(); auto work = async_shaders.GetCompletedWork();
for (std::size_t i = 0; i < work.size(); ++i) { for (auto& w : work) {
auto& entry = graphics_cache.at(work[i].pipeline->GetCacheKey()); auto& entry = graphics_cache.at(w.pipeline->GetCacheKey());
entry = std::move(work[i].pipeline); entry = std::move(w.pipeline);
} }
const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key); const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key);
if (is_cache_miss) { if (is_cache_miss) {
@ -227,7 +227,8 @@ VKGraphicsPipeline& VKPipelineCache::GetGraphicsPipeline(
async_shaders.QueueVulkanShader(this, bindings, program, key.renderpass_params, async_shaders.QueueVulkanShader(this, bindings, program, key.renderpass_params,
key.padding, key.shaders, key.fixed_state); key.padding, key.shaders, key.fixed_state);
} }
return *(last_graphics_pipeline = graphics_cache.at(key).get()); last_graphics_pipeline = graphics_cache.at(key).get();
return last_graphics_pipeline;
} }
const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key); const auto [pair, is_cache_miss] = graphics_cache.try_emplace(key);
@ -239,7 +240,8 @@ VKGraphicsPipeline& VKPipelineCache::GetGraphicsPipeline(
update_descriptor_queue, renderpass_cache, key, update_descriptor_queue, renderpass_cache, key,
bindings, program); bindings, program);
} }
return *(last_graphics_pipeline = entry.get()); last_graphics_pipeline = entry.get();
return last_graphics_pipeline;
} }
VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCacheKey& key) { VKComputePipeline& VKPipelineCache::GetComputePipeline(const ComputePipelineCacheKey& key) {

View file

@ -153,31 +153,46 @@ public:
std::array<Shader*, Maxwell::MaxShaderProgram> GetShaders(); std::array<Shader*, Maxwell::MaxShaderProgram> GetShaders();
VKGraphicsPipeline& GetGraphicsPipeline(const GraphicsPipelineCacheKey& key, VKGraphicsPipeline* GetGraphicsPipeline(const GraphicsPipelineCacheKey& key,
VideoCommon::Shader::AsyncShaders& async_shaders); VideoCommon::Shader::AsyncShaders& async_shaders);
VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key); VKComputePipeline& GetComputePipeline(const ComputePipelineCacheKey& key);
const VKDevice& GetDevice() { const VKDevice& GetDevice() const {
return device; return device;
} }
VKScheduler& GetScheduler() { VKScheduler& GetScheduler() {
return scheduler; return scheduler;
} }
const VKScheduler& GetScheduler() const {
return scheduler;
}
VKDescriptorPool& GetDescriptorPool() { VKDescriptorPool& GetDescriptorPool() {
return descriptor_pool; return descriptor_pool;
} }
const VKDescriptorPool& GetDescriptorPool() const {
return descriptor_pool;
}
VKUpdateDescriptorQueue& GetUpdateDescriptorQueue() { VKUpdateDescriptorQueue& GetUpdateDescriptorQueue() {
return update_descriptor_queue; return update_descriptor_queue;
} }
const VKUpdateDescriptorQueue& GetUpdateDescriptorQueue() const {
return update_descriptor_queue;
}
VKRenderPassCache& GetRenderpassCache() { VKRenderPassCache& GetRenderpassCache() {
return renderpass_cache; return renderpass_cache;
} }
const VKRenderPassCache& GetRenderpassCache() const {
return renderpass_cache;
}
protected: protected:
void OnShaderRemoval(Shader* shader) final; void OnShaderRemoval(Shader* shader) final;

View file

@ -404,10 +404,12 @@ RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWind
wfi_event{device.GetLogical().CreateNewEvent()}, async_shaders{renderer} { wfi_event{device.GetLogical().CreateNewEvent()}, async_shaders{renderer} {
scheduler.SetQueryCache(query_cache); scheduler.SetQueryCache(query_cache);
if (device.UseAsynchronousShaders()) { if (device.UseAsynchronousShaders()) {
// The following is subject to move into the allocate workers method, to be api agnostic
// Max worker threads we should allow // Max worker threads we should allow
constexpr auto MAX_THREADS = 2u; constexpr u32 MAX_THREADS = 4;
// Amount of threads we should reserve for other parts of yuzu // Amount of threads we should reserve for other parts of yuzu
constexpr auto RESERVED_THREADS = 6u; constexpr u32 RESERVED_THREADS = 6;
// Get the amount of threads we can use(this can return zero) // Get the amount of threads we can use(this can return zero)
const auto cpu_thread_count = const auto cpu_thread_count =
std::max(RESERVED_THREADS, std::thread::hardware_concurrency()); std::max(RESERVED_THREADS, std::thread::hardware_concurrency());
@ -456,16 +458,16 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
key.renderpass_params = GetRenderPassParams(texceptions); key.renderpass_params = GetRenderPassParams(texceptions);
key.padding = 0; key.padding = 0;
auto& pipeline = pipeline_cache.GetGraphicsPipeline(key, async_shaders); auto pipeline = pipeline_cache.GetGraphicsPipeline(key, async_shaders);
if (&pipeline == nullptr || pipeline.GetHandle() == VK_NULL_HANDLE) { if (pipeline == nullptr || pipeline->GetHandle() == VK_NULL_HANDLE) {
// Async graphics pipeline was not ready. // Async graphics pipeline was not ready.
system.GPU().TickWork(); system.GPU().TickWork();
return; return;
} }
scheduler.BindGraphicsPipeline(pipeline.GetHandle()); scheduler.BindGraphicsPipeline(pipeline->GetHandle());
const auto renderpass = pipeline.GetRenderPass(); const auto renderpass = pipeline->GetRenderPass();
const auto [framebuffer, render_area] = ConfigureFramebuffers(renderpass); const auto [framebuffer, render_area] = ConfigureFramebuffers(renderpass);
scheduler.RequestRenderpass(renderpass, framebuffer, render_area); scheduler.RequestRenderpass(renderpass, framebuffer, render_area);
@ -475,8 +477,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
BeginTransformFeedback(); BeginTransformFeedback();
const auto pipeline_layout = pipeline.GetLayout(); const auto pipeline_layout = pipeline->GetLayout();
const auto descriptor_set = pipeline.CommitDescriptorSet(); const auto descriptor_set = pipeline->CommitDescriptorSet();
scheduler.Record([pipeline_layout, descriptor_set, draw_params](vk::CommandBuffer cmdbuf) { scheduler.Record([pipeline_layout, descriptor_set, draw_params](vk::CommandBuffer cmdbuf) {
if (descriptor_set) { if (descriptor_set) {
cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout, cmdbuf.BindDescriptorSets(VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline_layout,

View file

@ -287,7 +287,6 @@ private:
VKMemoryManager& memory_manager; VKMemoryManager& memory_manager;
StateTracker& state_tracker; StateTracker& state_tracker;
VKScheduler& scheduler; VKScheduler& scheduler;
VideoCommon::Shader::AsyncShaders async_shaders;
VKStagingBufferPool staging_pool; VKStagingBufferPool staging_pool;
VKDescriptorPool descriptor_pool; VKDescriptorPool descriptor_pool;
@ -307,6 +306,7 @@ private:
vk::Buffer default_buffer; vk::Buffer default_buffer;
VKMemoryCommit default_buffer_commit; VKMemoryCommit default_buffer_commit;
vk::Event wfi_event; vk::Event wfi_event;
VideoCommon::Shader::AsyncShaders async_shaders;
std::array<View, Maxwell::NumRenderTargets> color_attachments; std::array<View, Maxwell::NumRenderTargets> color_attachments;
View zeta_attachment; View zeta_attachment;

View file

@ -111,20 +111,19 @@ void AsyncShaders::QueueOpenGLShader(const OpenGL::Device& device,
VideoCommon::Shader::CompilerSettings compiler_settings, VideoCommon::Shader::CompilerSettings compiler_settings,
const VideoCommon::Shader::Registry& registry, const VideoCommon::Shader::Registry& registry,
VAddr cpu_addr) { VAddr cpu_addr) {
WorkerParams params{device.UseAssemblyShaders() ? AsyncShaders::Backend::GLASM auto p = std::make_unique<WorkerParams>();
: AsyncShaders::Backend::OpenGL, p->backend = device.UseAssemblyShaders() ? Backend::GLASM : Backend::OpenGL;
&device, p->device = &device;
shader_type, p->shader_type = shader_type;
uid, p->uid = uid;
std::move(code), p->code = std::move(code);
std::move(code_b), p->code_b = std::move(code_b);
main_offset, p->main_offset = main_offset;
compiler_settings, p->compiler_settings = compiler_settings;
&registry, p->registry = &registry;
cpu_addr}; p->cpu_address = cpu_addr;
std::unique_lock lock(queue_mutex); std::unique_lock lock(queue_mutex);
pending_queue.push_back(std::move(params)); pending_queue.push(std::move(p));
cv.notify_one(); cv.notify_one();
} }
@ -134,19 +133,19 @@ void AsyncShaders::QueueVulkanShader(
std::array<GPUVAddr, Vulkan::Maxwell::MaxShaderProgram> shaders, std::array<GPUVAddr, Vulkan::Maxwell::MaxShaderProgram> shaders,
Vulkan::FixedPipelineState fixed_state) { Vulkan::FixedPipelineState fixed_state) {
WorkerParams params{ auto p = std::make_unique<WorkerParams>();
.backend = AsyncShaders::Backend::Vulkan,
.pp_cache = pp_cache, p->backend = Backend::Vulkan;
.bindings = bindings, p->pp_cache = pp_cache;
.program = program, p->bindings = bindings;
.renderpass_params = renderpass_params, p->program = program;
.padding = padding, p->renderpass_params = renderpass_params;
.shaders = shaders, p->padding = padding;
.fixed_state = fixed_state, p->shaders = shaders;
}; p->fixed_state = fixed_state;
std::unique_lock lock(queue_mutex); std::unique_lock lock(queue_mutex);
pending_queue.push_back(std::move(params)); pending_queue.push(std::move(p));
cv.notify_one(); cv.notify_one();
} }
@ -168,64 +167,82 @@ void AsyncShaders::ShaderCompilerThread(Core::Frontend::GraphicsContext* context
if (pending_queue.empty()) { if (pending_queue.empty()) {
continue; continue;
} }
// Pull work from queue
WorkerParams work = std::move(pending_queue.front());
pending_queue.pop_front();
// Pull work from queue
auto work = std::move(pending_queue.front());
pending_queue.pop();
lock.unlock(); lock.unlock();
if (work.backend == AsyncShaders::Backend::OpenGL || if (work->backend == Backend::OpenGL || work->backend == Backend::GLASM) {
work.backend == AsyncShaders::Backend::GLASM) { VideoCommon::Shader::Registry registry = *work->registry;
VideoCommon::Shader::Registry registry = *work.registry; const ShaderIR ir(work->code, work->main_offset, work->compiler_settings, registry);
const ShaderIR ir(work.code, work.main_offset, work.compiler_settings, registry);
const auto scope = context->Acquire(); const auto scope = context->Acquire();
auto program = auto program =
OpenGL::BuildShader(*work.device, work.shader_type, work.uid, ir, registry); OpenGL::BuildShader(*work->device, work->shader_type, work->uid, ir, registry);
Result result{}; Result result{};
result.backend = work.backend; result.backend = work->backend;
result.cpu_address = work.cpu_address; result.cpu_address = work->cpu_address;
result.uid = work.uid; result.uid = work->uid;
result.code = std::move(work.code); result.code = std::move(work->code);
result.code_b = std::move(work.code_b); result.code_b = std::move(work->code_b);
result.shader_type = work.shader_type; result.shader_type = work->shader_type;
// LOG_CRITICAL(Render_Vulkan, "Shader hast been Compiled \t0x{:016X} id {}",
// result.uid, id);
if (work.backend == AsyncShaders::Backend::OpenGL) { if (work->backend == Backend::OpenGL) {
result.program.opengl = std::move(program->source_program); result.program.opengl = std::move(program->source_program);
} else if (work.backend == AsyncShaders::Backend::GLASM) { } else if (work->backend == Backend::GLASM) {
result.program.glasm = std::move(program->assembly_program); result.program.glasm = std::move(program->assembly_program);
} }
work.reset();
{ {
std::unique_lock complete_lock(completed_mutex); std::unique_lock complete_lock(completed_mutex);
finished_work.push_back(std::move(result)); finished_work.push_back(std::move(result));
} }
} else if (work->backend == Backend::Vulkan) {
} else if (work.backend == AsyncShaders::Backend::Vulkan) {
Vulkan::GraphicsPipelineCacheKey params_key{ Vulkan::GraphicsPipelineCacheKey params_key{
work.renderpass_params, .renderpass_params = work->renderpass_params,
work.padding, .padding = work->padding,
work.shaders, .shaders = work->shaders,
work.fixed_state, .fixed_state = work->fixed_state,
}; };
{
std::unique_lock find_lock{completed_mutex};
for (size_t i = 0; i < finished_work.size(); ++i) {
// This loop deletes duplicate pipelines in finished_work
// in favor of the pipeline about to be created
if (finished_work[i].pipeline &&
finished_work[i].pipeline->GetCacheKey().Hash() == params_key.Hash()) {
LOG_CRITICAL(Render_Vulkan,
"Pipeliene was already here \t0x{:016X} matches 0x{:016X} ",
params_key.Hash(),
finished_work[i].pipeline->GetCacheKey().Hash());
finished_work.erase(finished_work.begin() + i);
}
}
find_lock.unlock();
}
auto pipeline = std::make_unique<Vulkan::VKGraphicsPipeline>(
work->pp_cache->GetDevice(), work->pp_cache->GetScheduler(),
work->pp_cache->GetDescriptorPool(), work->pp_cache->GetUpdateDescriptorQueue(),
work->pp_cache->GetRenderpassCache(), params_key, work->bindings, work->program);
{ {
std::unique_lock complete_lock(completed_mutex); std::unique_lock complete_lock(completed_mutex);
// Duplicate creation of pipelines leads to instability and crashing, caused by a
// race condition but band-aid solution is locking the making of the pipeline
// results in only one pipeline created at a time.
Result result{ Result result{
.backend = work.backend, .backend = Backend::Vulkan,
.pipeline = std::make_unique<Vulkan::VKGraphicsPipeline>( .pipeline = std::move(pipeline),
work.pp_cache->GetDevice(), work.pp_cache->GetScheduler(),
work.pp_cache->GetDescriptorPool(),
work.pp_cache->GetUpdateDescriptorQueue(),
work.pp_cache->GetRenderpassCache(), params_key, work.bindings,
work.program),
}; };
finished_work.push_back(std::move(result)); finished_work.push_back(std::move(result));
complete_lock.unlock();
} }
} }
// Give a chance for another thread to get work. Lessens duplicates
std::this_thread::yield();
} }
} }

View file

@ -100,7 +100,7 @@ private:
bool HasWorkQueued(); bool HasWorkQueued();
struct WorkerParams { struct WorkerParams {
AsyncShaders::Backend backend; Backend backend;
// For OGL // For OGL
const OpenGL::Device* device; const OpenGL::Device* device;
Tegra::Engines::ShaderType shader_type; Tegra::Engines::ShaderType shader_type;
@ -128,7 +128,7 @@ private:
std::atomic<bool> is_thread_exiting{}; std::atomic<bool> is_thread_exiting{};
std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> context_list; std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> context_list;
std::vector<std::thread> worker_threads; std::vector<std::thread> worker_threads;
std::deque<WorkerParams> pending_queue; std::queue<std::unique_ptr<WorkerParams>> pending_queue;
std::vector<AsyncShaders::Result> finished_work; std::vector<AsyncShaders::Result> finished_work;
Core::Frontend::EmuWindow& emu_window; Core::Frontend::EmuWindow& emu_window;
}; };