gl_shader_cache: Implement async shaders

This commit is contained in:
ameerj 2021-06-14 21:06:29 -04:00
parent ae4e452759
commit 74f683787e
7 changed files with 161 additions and 114 deletions

View file

@ -83,6 +83,7 @@ add_library(video_core STATIC
renderer_opengl/gl_shader_cache.h
renderer_opengl/gl_shader_manager.cpp
renderer_opengl/gl_shader_manager.h
renderer_opengl/gl_shader_context.h
renderer_opengl/gl_shader_util.cpp
renderer_opengl/gl_shader_util.h
renderer_opengl/gl_state_tracker.cpp

View file

@ -6,11 +6,13 @@
#include <cstring>
#include "common/cityhash.h"
#include "common/thread_worker.h"
#include "shader_recompiler/shader_info.h"
#include "video_core/renderer_opengl/gl_graphics_pipeline.h"
#include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/gl_shader_util.h"
#include "video_core/renderer_opengl/gl_state_tracker.h"
#include "video_core/shader_notify.h"
#include "video_core/texture_cache/texture_cache.h"
namespace OpenGL {
@ -117,74 +119,91 @@ GraphicsPipeline::GraphicsPipeline(const Device& device, TextureCache& texture_c
BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_,
Tegra::Engines::Maxwell3D& maxwell3d_,
ProgramManager& program_manager_, StateTracker& state_tracker_,
std::array<std::string, 5> assembly_sources,
std::array<std::string, 5> glsl_sources,
ShaderWorker* thread_worker,
VideoCore::ShaderNotify* shader_notify,
std::array<std::string, 5> sources,
const std::array<const Shader::Info*, 5>& infos,
const VideoCommon::TransformFeedbackState* xfb_state)
: texture_cache{texture_cache_}, buffer_cache{buffer_cache_}, gpu_memory{gpu_memory_},
maxwell3d{maxwell3d_}, program_manager{program_manager_}, state_tracker{state_tracker_} {
if (shader_notify) {
shader_notify->MarkShaderBuilding();
}
std::ranges::transform(infos, stage_infos.begin(),
[](const Shader::Info* info) { return info ? *info : Shader::Info{}; });
if (device.UseAssemblyShaders()) {
for (size_t stage = 0; stage < 5; ++stage) {
const auto code{assembly_sources[stage]};
if (code.empty()) {
continue;
auto func{[this, device, sources, shader_notify, xfb_state](ShaderContext::Context*) mutable {
if (device.UseAssemblyShaders()) {
for (size_t stage = 0; stage < 5; ++stage) {
const auto code{sources[stage]};
if (code.empty()) {
continue;
}
assembly_programs[stage] = CompileProgram(code, AssemblyStage(stage));
enabled_stages_mask |= (assembly_programs[stage].handle != 0 ? 1 : 0) << stage;
}
assembly_programs[stage] = CompileProgram(code, AssemblyStage(stage));
enabled_stages_mask |= (assembly_programs[stage].handle != 0 ? 1 : 0) << stage;
} else {
program.handle = glCreateProgram();
for (size_t stage = 0; stage < 5; ++stage) {
const auto code{sources[stage]};
if (code.empty()) {
continue;
}
AttachShader(Stage(stage), program.handle, code);
}
LinkProgram(program.handle);
}
if (shader_notify) {
shader_notify->MarkShaderComplete();
}
u32 num_textures{};
u32 num_images{};
u32 num_storage_buffers{};
for (size_t stage = 0; stage < base_uniform_bindings.size(); ++stage) {
const auto& info{stage_infos[stage]};
if (stage < 4) {
base_uniform_bindings[stage + 1] = base_uniform_bindings[stage];
base_storage_bindings[stage + 1] = base_storage_bindings[stage];
base_uniform_bindings[stage + 1] +=
AccumulateCount(info.constant_buffer_descriptors);
base_storage_bindings[stage + 1] +=
AccumulateCount(info.storage_buffers_descriptors);
}
enabled_uniform_buffer_masks[stage] = info.constant_buffer_mask;
std::ranges::copy(info.constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin());
const u32 num_tex_buffer_bindings{AccumulateCount(info.texture_buffer_descriptors)};
num_texture_buffers[stage] += num_tex_buffer_bindings;
num_textures += num_tex_buffer_bindings;
const u32 num_img_buffers_bindings{AccumulateCount(info.image_buffer_descriptors)};
num_image_buffers[stage] += num_img_buffers_bindings;
num_images += num_img_buffers_bindings;
num_textures += AccumulateCount(info.texture_descriptors);
num_images += AccumulateCount(info.image_descriptors);
num_storage_buffers += AccumulateCount(info.storage_buffers_descriptors);
writes_global_memory |= std::ranges::any_of(
info.storage_buffers_descriptors, [](const auto& desc) { return desc.is_written; });
}
ASSERT(num_textures <= MAX_TEXTURES);
ASSERT(num_images <= MAX_IMAGES);
const bool assembly_shaders{assembly_programs[0].handle != 0};
use_storage_buffers =
!assembly_shaders || num_storage_buffers <= device.GetMaxGLASMStorageBufferBlocks();
writes_global_memory &= !use_storage_buffers;
if (assembly_shaders && xfb_state) {
GenerateTransformFeedbackState(*xfb_state);
}
is_built.store(true, std::memory_order_relaxed);
}};
if (thread_worker) {
thread_worker->QueueWork(std::move(func));
} else {
program.handle = glCreateProgram();
for (size_t stage = 0; stage < 5; ++stage) {
const auto code{glsl_sources[stage]};
if (code.empty()) {
continue;
}
AttachShader(Stage(stage), program.handle, code);
}
LinkProgram(program.handle);
}
u32 num_textures{};
u32 num_images{};
u32 num_storage_buffers{};
for (size_t stage = 0; stage < base_uniform_bindings.size(); ++stage) {
const auto& info{stage_infos[stage]};
if (stage < 4) {
base_uniform_bindings[stage + 1] = base_uniform_bindings[stage];
base_storage_bindings[stage + 1] = base_storage_bindings[stage];
base_uniform_bindings[stage + 1] += AccumulateCount(info.constant_buffer_descriptors);
base_storage_bindings[stage + 1] += AccumulateCount(info.storage_buffers_descriptors);
}
enabled_uniform_buffer_masks[stage] = info.constant_buffer_mask;
std::ranges::copy(info.constant_buffer_used_sizes, uniform_buffer_sizes[stage].begin());
const u32 num_tex_buffer_bindings{AccumulateCount(info.texture_buffer_descriptors)};
num_texture_buffers[stage] += num_tex_buffer_bindings;
num_textures += num_tex_buffer_bindings;
const u32 num_img_buffers_bindings{AccumulateCount(info.image_buffer_descriptors)};
num_image_buffers[stage] += num_img_buffers_bindings;
num_images += num_img_buffers_bindings;
num_textures += AccumulateCount(info.texture_descriptors);
num_images += AccumulateCount(info.image_descriptors);
num_storage_buffers += AccumulateCount(info.storage_buffers_descriptors);
writes_global_memory |= std::ranges::any_of(
info.storage_buffers_descriptors, [](const auto& desc) { return desc.is_written; });
}
ASSERT(num_textures <= MAX_TEXTURES);
ASSERT(num_images <= MAX_IMAGES);
const bool assembly_shaders{assembly_programs[0].handle != 0};
use_storage_buffers =
!assembly_shaders || num_storage_buffers <= device.GetMaxGLASMStorageBufferBlocks();
writes_global_memory &= !use_storage_buffers;
if (assembly_shaders && xfb_state) {
GenerateTransformFeedbackState(*xfb_state);
func(nullptr);
}
}

View file

@ -20,10 +20,15 @@
namespace OpenGL {
namespace ShaderContext {
struct Context;
}
class Device;
class ProgramManager;
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
using ShaderWorker = Common::StatefulThreadWorker<ShaderContext::Context>;
struct GraphicsPipelineKey {
std::array<u64, 6> unique_hashes;
@ -65,8 +70,8 @@ public:
BufferCache& buffer_cache_, Tegra::MemoryManager& gpu_memory_,
Tegra::Engines::Maxwell3D& maxwell3d_,
ProgramManager& program_manager_, StateTracker& state_tracker_,
std::array<std::string, 5> assembly_sources,
std::array<std::string, 5> glsl_sources,
ShaderWorker* thread_worker, VideoCore::ShaderNotify* shader_notify,
std::array<std::string, 5> sources,
const std::array<const Shader::Info*, 5>& infos,
const VideoCommon::TransformFeedbackState* xfb_state);
@ -82,6 +87,10 @@ public:
return writes_global_memory;
}
[[nodiscard]] bool IsBuilt() const noexcept {
return is_built.load(std::memory_order::relaxed);
}
private:
void GenerateTransformFeedbackState(const VideoCommon::TransformFeedbackState& xfb_state);
@ -108,6 +117,7 @@ private:
bool use_storage_buffers{};
bool writes_global_memory{};
std::atomic_bool is_built{false};
static constexpr std::size_t XFB_ENTRY_STRIDE = 3;
GLsizei num_xfb_attribs{};

View file

@ -70,7 +70,7 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra
buffer_cache_runtime(device),
buffer_cache(*this, maxwell3d, kepler_compute, gpu_memory, cpu_memory_, buffer_cache_runtime),
shader_cache(*this, emu_window_, maxwell3d, kepler_compute, gpu_memory, device, texture_cache,
buffer_cache, program_manager, state_tracker),
buffer_cache, program_manager, state_tracker, gpu.ShaderNotify()),
query_cache(*this, maxwell3d, gpu_memory), accelerate_dma(buffer_cache),
fence_manager(*this, gpu, texture_cache, buffer_cache, query_cache) {}

View file

@ -17,7 +17,6 @@
#include "common/scope_exit.h"
#include "common/thread_worker.h"
#include "core/core.h"
#include "core/frontend/emu_window.h"
#include "shader_recompiler/backend/glasm/emit_glasm.h"
#include "shader_recompiler/backend/glsl/emit_glsl.h"
#include "shader_recompiler/backend/spirv/emit_spirv.h"
@ -50,6 +49,7 @@ using VideoCommon::FileEnvironment;
using VideoCommon::GenericEnvironment;
using VideoCommon::GraphicsEnvironment;
using VideoCommon::SerializePipeline;
using Context = ShaderContext::Context;
template <typename Container>
auto MakeSpan(Container& container) {
@ -143,25 +143,17 @@ void SetXfbState(VideoCommon::TransformFeedbackState& state, const Maxwell& regs
}
} // Anonymous namespace
struct ShaderCache::Context {
explicit Context(Core::Frontend::EmuWindow& emu_window)
: gl_context{emu_window.CreateSharedContext()}, scoped{*gl_context} {}
std::unique_ptr<Core::Frontend::GraphicsContext> gl_context;
Core::Frontend::GraphicsContext::Scoped scoped;
ShaderPools pools;
};
ShaderCache::ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_,
Tegra::Engines::Maxwell3D& maxwell3d_,
Tegra::Engines::KeplerCompute& kepler_compute_,
Tegra::MemoryManager& gpu_memory_, const Device& device_,
TextureCache& texture_cache_, BufferCache& buffer_cache_,
ProgramManager& program_manager_, StateTracker& state_tracker_)
ProgramManager& program_manager_, StateTracker& state_tracker_,
VideoCore::ShaderNotify& shader_notify_)
: VideoCommon::ShaderCache{rasterizer_, gpu_memory_, maxwell3d_, kepler_compute_},
emu_window{emu_window_}, device{device_}, texture_cache{texture_cache_},
buffer_cache{buffer_cache_}, program_manager{program_manager_}, state_tracker{state_tracker_},
use_asynchronous_shaders{device.UseAsynchronousShaders()},
shader_notify{shader_notify_}, use_asynchronous_shaders{device.UseAsynchronousShaders()},
profile{
.supported_spirv = 0x00010000,
@ -264,7 +256,7 @@ void ShaderCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading,
env_ptrs.push_back(&env);
}
ctx->pools.ReleaseContents();
auto pipeline{CreateGraphicsPipeline(ctx->pools, key, MakeSpan(env_ptrs))};
auto pipeline{CreateGraphicsPipeline(ctx->pools, key, MakeSpan(env_ptrs), false)};
std::lock_guard lock{state.mutex};
if (pipeline) {
graphics_cache.emplace(key, std::move(pipeline));
@ -311,6 +303,9 @@ GraphicsPipeline* ShaderCache::CurrentGraphicsPipeline() {
if (is_new) {
program = CreateGraphicsPipeline();
}
if (!program || !program->IsBuilt()) {
return nullptr;
}
return program.get();
}
@ -339,7 +334,8 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline() {
GetGraphicsEnvironments(environments, graphics_key.unique_hashes);
main_pools.ReleaseContents();
auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, environments.Span())};
auto pipeline{CreateGraphicsPipeline(main_pools, graphics_key, environments.Span(),
use_asynchronous_shaders)};
if (!pipeline || shader_cache_filename.empty()) {
return pipeline;
}
@ -354,8 +350,8 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline() {
}
std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline(
ShaderPools& pools, const GraphicsPipelineKey& key,
std::span<Shader::Environment* const> envs) try {
ShaderContext::ShaderPools& pools, const GraphicsPipelineKey& key,
std::span<Shader::Environment* const> envs, bool build_in_parallel) try {
LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash());
size_t env_index{};
u32 total_storage_buffers{};
@ -394,8 +390,7 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline(
std::array<const Shader::Info*, Maxwell::MaxShaderStage> infos{};
OGLProgram source_program;
std::array<std::string, 5> assembly_sources;
std::array<std::string, 5> glsl_sources;
std::array<std::string, 5> sources;
Shader::Backend::Bindings binding;
const bool use_glasm{device.UseAssemblyShaders()};
const size_t first_index = uses_vertex_a && uses_vertex_b ? 1 : 0;
@ -412,14 +407,16 @@ std::unique_ptr<GraphicsPipeline> ShaderCache::CreateGraphicsPipeline(
const auto runtime_info{
MakeRuntimeInfo(key, program, glasm_use_storage_buffers, use_glasm)};
if (use_glasm) {
assembly_sources[stage_index] = EmitGLASM(profile, runtime_info, program, binding);
sources[stage_index] = EmitGLASM(profile, runtime_info, program, binding);
} else {
glsl_sources[stage_index] = EmitGLSL(profile, runtime_info, program, binding);
sources[stage_index] = EmitGLSL(profile, runtime_info, program, binding);
}
}
auto* const thread_worker{build_in_parallel ? workers.get() : nullptr};
VideoCore::ShaderNotify* const notify{build_in_parallel ? &shader_notify : nullptr};
return std::make_unique<GraphicsPipeline>(
device, texture_cache, buffer_cache, gpu_memory, maxwell3d, program_manager, state_tracker,
assembly_sources, glsl_sources, infos, key.xfb_enabled != 0 ? &key.xfb_state : nullptr);
thread_worker, notify, sources, infos, key.xfb_enabled != 0 ? &key.xfb_state : nullptr);
} catch (Shader::Exception& exception) {
LOG_ERROR(Render_OpenGL, "{}", exception.what());
@ -442,9 +439,9 @@ std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline(
return pipeline;
}
std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline(ShaderPools& pools,
const ComputePipelineKey& key,
Shader::Environment& env) try {
std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline(
ShaderContext::ShaderPools& pools, const ComputePipelineKey& key,
Shader::Environment& env) try {
LOG_INFO(Render_OpenGL, "0x{:016x}", key.Hash());
Shader::Maxwell::Flow::CFG cfg{env, pools.flow_block, env.StartAddress()};
@ -465,11 +462,10 @@ std::unique_ptr<ComputePipeline> ShaderCache::CreateComputePipeline(ShaderPools&
return nullptr;
}
std::unique_ptr<Common::StatefulThreadWorker<ShaderCache::Context>> ShaderCache::CreateWorkers()
const {
return std::make_unique<Common::StatefulThreadWorker<Context>>(
std::max(std::thread::hardware_concurrency(), 2U) - 1, "yuzu:ShaderBuilder",
[this] { return Context{emu_window}; });
std::unique_ptr<ShaderWorker> ShaderCache::CreateWorkers() const {
return std::make_unique<ShaderWorker>(std::max(std::thread::hardware_concurrency(), 2U) - 1,
"yuzu:ShaderBuilder",
[this] { return Context{emu_window}; });
}
} // namespace OpenGL

View file

@ -13,13 +13,12 @@
#include "common/common_types.h"
#include "common/thread_worker.h"
#include "shader_recompiler/frontend/ir/basic_block.h"
#include "shader_recompiler/frontend/ir/value.h"
#include "shader_recompiler/frontend/maxwell/control_flow.h"
#include "shader_recompiler/object_pool.h"
#include "video_core/engines/shader_type.h"
#include "video_core/renderer_opengl/gl_compute_pipeline.h"
#include "video_core/renderer_opengl/gl_graphics_pipeline.h"
#include "video_core/renderer_opengl/gl_shader_context.h"
#include "video_core/shader_cache.h"
namespace Tegra {
@ -31,29 +30,17 @@ namespace OpenGL {
class Device;
class ProgramManager;
class RasterizerOpenGL;
struct ShaderPools {
void ReleaseContents() {
flow_block.ReleaseContents();
block.ReleaseContents();
inst.ReleaseContents();
}
Shader::ObjectPool<Shader::IR::Inst> inst;
Shader::ObjectPool<Shader::IR::Block> block;
Shader::ObjectPool<Shader::Maxwell::Flow::Block> flow_block;
};
using ShaderWorker = Common::StatefulThreadWorker<ShaderContext::Context>;
class ShaderCache : public VideoCommon::ShaderCache {
struct Context;
public:
explicit ShaderCache(RasterizerOpenGL& rasterizer_, Core::Frontend::EmuWindow& emu_window_,
Tegra::Engines::Maxwell3D& maxwell3d_,
Tegra::Engines::KeplerCompute& kepler_compute_,
Tegra::MemoryManager& gpu_memory_, const Device& device_,
TextureCache& texture_cache_, BufferCache& buffer_cache_,
ProgramManager& program_manager_, StateTracker& state_tracker_);
ProgramManager& program_manager_, StateTracker& state_tracker_,
VideoCore::ShaderNotify& shader_notify_);
~ShaderCache();
void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
@ -67,17 +54,17 @@ private:
std::unique_ptr<GraphicsPipeline> CreateGraphicsPipeline();
std::unique_ptr<GraphicsPipeline> CreateGraphicsPipeline(
ShaderPools& pools, const GraphicsPipelineKey& key,
std::span<Shader::Environment* const> envs);
ShaderContext::ShaderPools& pools, const GraphicsPipelineKey& key,
std::span<Shader::Environment* const> envs, bool build_in_parallel);
std::unique_ptr<ComputePipeline> CreateComputePipeline(const ComputePipelineKey& key,
const VideoCommon::ShaderInfo* shader);
std::unique_ptr<ComputePipeline> CreateComputePipeline(ShaderPools& pools,
std::unique_ptr<ComputePipeline> CreateComputePipeline(ShaderContext::ShaderPools& pools,
const ComputePipelineKey& key,
Shader::Environment& env);
std::unique_ptr<Common::StatefulThreadWorker<Context>> CreateWorkers() const;
std::unique_ptr<ShaderWorker> CreateWorkers() const;
Core::Frontend::EmuWindow& emu_window;
const Device& device;
@ -85,17 +72,18 @@ private:
BufferCache& buffer_cache;
ProgramManager& program_manager;
StateTracker& state_tracker;
VideoCore::ShaderNotify& shader_notify;
GraphicsPipelineKey graphics_key{};
const bool use_asynchronous_shaders;
ShaderPools main_pools;
ShaderContext::ShaderPools main_pools;
std::unordered_map<GraphicsPipelineKey, std::unique_ptr<GraphicsPipeline>> graphics_cache;
std::unordered_map<ComputePipelineKey, std::unique_ptr<ComputePipeline>> compute_cache;
Shader::Profile profile;
std::filesystem::path shader_cache_filename;
std::unique_ptr<Common::StatefulThreadWorker<Context>> workers;
std::unique_ptr<ShaderWorker> workers;
};
} // namespace OpenGL

View file

@ -0,0 +1,33 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#pragma once
#include "core/frontend/emu_window.h"
#include "shader_recompiler/frontend/ir/basic_block.h"
#include "shader_recompiler/frontend/maxwell/control_flow.h"
namespace OpenGL::ShaderContext {
struct ShaderPools {
void ReleaseContents() {
flow_block.ReleaseContents();
block.ReleaseContents();
inst.ReleaseContents();
}
Shader::ObjectPool<Shader::IR::Inst> inst;
Shader::ObjectPool<Shader::IR::Block> block;
Shader::ObjectPool<Shader::Maxwell::Flow::Block> flow_block;
};
struct Context {
explicit Context(Core::Frontend::EmuWindow& emu_window)
: gl_context{emu_window.CreateSharedContext()}, scoped{*gl_context} {}
std::unique_ptr<Core::Frontend::GraphicsContext> gl_context;
Core::Frontend::GraphicsContext::Scoped scoped;
ShaderPools pools;
};
} // namespace OpenGL::ShaderContext