Merge pull request #2358 from ReinUsesLisp/parallel-shader
gl_shader_cache: Use shared contexts to build shaders in parallel at boot
This commit is contained in:
commit
68c9c9222d
9 changed files with 124 additions and 64 deletions
|
@ -98,9 +98,11 @@ struct FramebufferCacheKey {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
RasterizerOpenGL::RasterizerOpenGL(Core::System& system, ScreenInfo& info)
|
RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window,
|
||||||
: res_cache{*this}, shader_cache{*this, system, device}, global_cache{*this}, system{system},
|
ScreenInfo& info)
|
||||||
screen_info{info}, buffer_cache(*this, STREAM_BUFFER_SIZE) {
|
: res_cache{*this}, shader_cache{*this, system, emu_window, device},
|
||||||
|
global_cache{*this}, system{system}, screen_info{info},
|
||||||
|
buffer_cache(*this, STREAM_BUFFER_SIZE) {
|
||||||
OpenGLState::ApplyDefaultState();
|
OpenGLState::ApplyDefaultState();
|
||||||
|
|
||||||
shader_program_manager = std::make_unique<GLShader::ProgramManager>();
|
shader_program_manager = std::make_unique<GLShader::ProgramManager>();
|
||||||
|
|
|
@ -48,7 +48,8 @@ struct FramebufferCacheKey;
|
||||||
|
|
||||||
class RasterizerOpenGL : public VideoCore::RasterizerInterface {
|
class RasterizerOpenGL : public VideoCore::RasterizerInterface {
|
||||||
public:
|
public:
|
||||||
explicit RasterizerOpenGL(Core::System& system, ScreenInfo& info);
|
explicit RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window,
|
||||||
|
ScreenInfo& info);
|
||||||
~RasterizerOpenGL() override;
|
~RasterizerOpenGL() override;
|
||||||
|
|
||||||
void DrawArrays() override;
|
void DrawArrays() override;
|
||||||
|
|
|
@ -2,10 +2,14 @@
|
||||||
// Licensed under GPLv2 or any later version
|
// Licensed under GPLv2 or any later version
|
||||||
// Refer to the license.txt file included.
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
#include <mutex>
|
||||||
|
#include <thread>
|
||||||
#include <boost/functional/hash.hpp>
|
#include <boost/functional/hash.hpp>
|
||||||
#include "common/assert.h"
|
#include "common/assert.h"
|
||||||
#include "common/hash.h"
|
#include "common/hash.h"
|
||||||
|
#include "common/scope_exit.h"
|
||||||
#include "core/core.h"
|
#include "core/core.h"
|
||||||
|
#include "core/frontend/emu_window.h"
|
||||||
#include "video_core/engines/maxwell_3d.h"
|
#include "video_core/engines/maxwell_3d.h"
|
||||||
#include "video_core/memory_manager.h"
|
#include "video_core/memory_manager.h"
|
||||||
#include "video_core/renderer_opengl/gl_rasterizer.h"
|
#include "video_core/renderer_opengl/gl_rasterizer.h"
|
||||||
|
@ -344,8 +348,8 @@ ShaderDiskCacheUsage CachedShader::GetUsage(GLenum primitive_mode,
|
||||||
}
|
}
|
||||||
|
|
||||||
ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system,
|
ShaderCacheOpenGL::ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system,
|
||||||
const Device& device)
|
Core::Frontend::EmuWindow& emu_window, const Device& device)
|
||||||
: RasterizerCache{rasterizer}, device{device}, disk_cache{system} {}
|
: RasterizerCache{rasterizer}, emu_window{emu_window}, device{device}, disk_cache{system} {}
|
||||||
|
|
||||||
void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
|
void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
|
||||||
const VideoCore::DiskResourceLoadCallback& callback) {
|
const VideoCore::DiskResourceLoadCallback& callback) {
|
||||||
|
@ -353,62 +357,107 @@ void ShaderCacheOpenGL::LoadDiskCache(const std::atomic_bool& stop_loading,
|
||||||
if (!transferable) {
|
if (!transferable) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
const auto [raws, usages] = *transferable;
|
const auto [raws, shader_usages] = *transferable;
|
||||||
|
|
||||||
auto [decompiled, dumps] = disk_cache.LoadPrecompiled();
|
auto [decompiled, dumps] = disk_cache.LoadPrecompiled();
|
||||||
|
|
||||||
const auto supported_formats{GetSupportedFormats()};
|
const auto supported_formats{GetSupportedFormats()};
|
||||||
const auto unspecialized{
|
const auto unspecialized_shaders{
|
||||||
GenerateUnspecializedShaders(stop_loading, callback, raws, decompiled)};
|
GenerateUnspecializedShaders(stop_loading, callback, raws, decompiled)};
|
||||||
if (stop_loading)
|
if (stop_loading) {
|
||||||
return;
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
// Track if precompiled cache was altered during loading to know if we have to serialize the
|
// Track if precompiled cache was altered during loading to know if we have to serialize the
|
||||||
// virtual precompiled cache file back to the hard drive
|
// virtual precompiled cache file back to the hard drive
|
||||||
bool precompiled_cache_altered = false;
|
bool precompiled_cache_altered = false;
|
||||||
|
|
||||||
// Build shaders
|
// Inform the frontend about shader build initialization
|
||||||
if (callback)
|
if (callback) {
|
||||||
callback(VideoCore::LoadCallbackStage::Build, 0, usages.size());
|
callback(VideoCore::LoadCallbackStage::Build, 0, shader_usages.size());
|
||||||
for (std::size_t i = 0; i < usages.size(); ++i) {
|
}
|
||||||
if (stop_loading)
|
|
||||||
|
std::mutex mutex;
|
||||||
|
std::size_t built_shaders = 0; // It doesn't have be atomic since it's used behind a mutex
|
||||||
|
std::atomic_bool compilation_failed = false;
|
||||||
|
|
||||||
|
const auto Worker = [&](Core::Frontend::GraphicsContext* context, std::size_t begin,
|
||||||
|
std::size_t end, const std::vector<ShaderDiskCacheUsage>& shader_usages,
|
||||||
|
const ShaderDumpsMap& dumps) {
|
||||||
|
context->MakeCurrent();
|
||||||
|
SCOPE_EXIT({ return context->DoneCurrent(); });
|
||||||
|
|
||||||
|
for (std::size_t i = begin; i < end; ++i) {
|
||||||
|
if (stop_loading || compilation_failed) {
|
||||||
return;
|
return;
|
||||||
|
}
|
||||||
|
const auto& usage{shader_usages[i]};
|
||||||
|
LOG_INFO(Render_OpenGL, "Building shader {:016x} (index {} of {})",
|
||||||
|
usage.unique_identifier, i, shader_usages.size());
|
||||||
|
|
||||||
const auto& usage{usages[i]};
|
const auto& unspecialized{unspecialized_shaders.at(usage.unique_identifier)};
|
||||||
LOG_INFO(Render_OpenGL, "Building shader {:016x} ({} of {})", usage.unique_identifier,
|
const auto dump{dumps.find(usage)};
|
||||||
i + 1, usages.size());
|
|
||||||
|
|
||||||
const auto& unspec{unspecialized.at(usage.unique_identifier)};
|
|
||||||
const auto dump_it = dumps.find(usage);
|
|
||||||
|
|
||||||
CachedProgram shader;
|
CachedProgram shader;
|
||||||
if (dump_it != dumps.end()) {
|
if (dump != dumps.end()) {
|
||||||
// If the shader is dumped, attempt to load it with
|
// If the shader is dumped, attempt to load it with
|
||||||
shader = GeneratePrecompiledProgram(dump_it->second, supported_formats);
|
shader = GeneratePrecompiledProgram(dump->second, supported_formats);
|
||||||
if (!shader) {
|
if (!shader) {
|
||||||
|
compilation_failed = true;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!shader) {
|
||||||
|
shader = SpecializeShader(unspecialized.code, unspecialized.entries,
|
||||||
|
unspecialized.program_type, usage.bindings,
|
||||||
|
usage.primitive, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::scoped_lock lock(mutex);
|
||||||
|
if (callback) {
|
||||||
|
callback(VideoCore::LoadCallbackStage::Build, ++built_shaders,
|
||||||
|
shader_usages.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
precompiled_programs.emplace(usage, std::move(shader));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const auto num_workers{static_cast<std::size_t>(std::thread::hardware_concurrency() + 1)};
|
||||||
|
const std::size_t bucket_size{shader_usages.size() / num_workers};
|
||||||
|
std::vector<std::unique_ptr<Core::Frontend::GraphicsContext>> contexts(num_workers);
|
||||||
|
std::vector<std::thread> threads(num_workers);
|
||||||
|
for (std::size_t i = 0; i < num_workers; ++i) {
|
||||||
|
const bool is_last_worker = i + 1 == num_workers;
|
||||||
|
const std::size_t start{bucket_size * i};
|
||||||
|
const std::size_t end{is_last_worker ? shader_usages.size() : start + bucket_size};
|
||||||
|
|
||||||
|
// On some platforms the shared context has to be created from the GUI thread
|
||||||
|
contexts[i] = emu_window.CreateSharedContext();
|
||||||
|
threads[i] = std::thread(Worker, contexts[i].get(), start, end, shader_usages, dumps);
|
||||||
|
}
|
||||||
|
for (auto& thread : threads) {
|
||||||
|
thread.join();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (compilation_failed) {
|
||||||
// Invalidate the precompiled cache if a shader dumped shader was rejected
|
// Invalidate the precompiled cache if a shader dumped shader was rejected
|
||||||
disk_cache.InvalidatePrecompiled();
|
disk_cache.InvalidatePrecompiled();
|
||||||
precompiled_cache_altered = true;
|
|
||||||
dumps.clear();
|
dumps.clear();
|
||||||
|
precompiled_cache_altered = true;
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
}
|
if (stop_loading) {
|
||||||
if (!shader) {
|
return;
|
||||||
shader = SpecializeShader(unspec.code, unspec.entries, unspec.program_type,
|
|
||||||
usage.bindings, usage.primitive, true);
|
|
||||||
}
|
|
||||||
precompiled_programs.insert({usage, std::move(shader)});
|
|
||||||
|
|
||||||
if (callback)
|
|
||||||
callback(VideoCore::LoadCallbackStage::Build, i + 1, usages.size());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw before
|
// TODO(Rodrigo): Do state tracking for transferable shaders and do a dummy draw before
|
||||||
// precompiling them
|
// precompiling them
|
||||||
|
|
||||||
for (std::size_t i = 0; i < usages.size(); ++i) {
|
for (std::size_t i = 0; i < shader_usages.size(); ++i) {
|
||||||
const auto& usage{usages[i]};
|
const auto& usage{shader_usages[i]};
|
||||||
if (dumps.find(usage) == dumps.end()) {
|
if (dumps.find(usage) == dumps.end()) {
|
||||||
const auto& program = precompiled_programs.at(usage);
|
const auto& program{precompiled_programs.at(usage)};
|
||||||
disk_cache.SaveDump(usage, program->handle);
|
disk_cache.SaveDump(usage, program->handle);
|
||||||
precompiled_cache_altered = true;
|
precompiled_cache_altered = true;
|
||||||
}
|
}
|
||||||
|
|
|
@ -22,7 +22,11 @@
|
||||||
|
|
||||||
namespace Core {
|
namespace Core {
|
||||||
class System;
|
class System;
|
||||||
} // namespace Core
|
}
|
||||||
|
|
||||||
|
namespace Core::Frontend {
|
||||||
|
class EmuWindow;
|
||||||
|
}
|
||||||
|
|
||||||
namespace OpenGL {
|
namespace OpenGL {
|
||||||
|
|
||||||
|
@ -111,7 +115,7 @@ private:
|
||||||
class ShaderCacheOpenGL final : public RasterizerCache<Shader> {
|
class ShaderCacheOpenGL final : public RasterizerCache<Shader> {
|
||||||
public:
|
public:
|
||||||
explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system,
|
explicit ShaderCacheOpenGL(RasterizerOpenGL& rasterizer, Core::System& system,
|
||||||
const Device& device);
|
Core::Frontend::EmuWindow& emu_window, const Device& device);
|
||||||
|
|
||||||
/// Loads disk cache for the current game
|
/// Loads disk cache for the current game
|
||||||
void LoadDiskCache(const std::atomic_bool& stop_loading,
|
void LoadDiskCache(const std::atomic_bool& stop_loading,
|
||||||
|
@ -133,13 +137,13 @@ private:
|
||||||
CachedProgram GeneratePrecompiledProgram(const ShaderDiskCacheDump& dump,
|
CachedProgram GeneratePrecompiledProgram(const ShaderDiskCacheDump& dump,
|
||||||
const std::set<GLenum>& supported_formats);
|
const std::set<GLenum>& supported_formats);
|
||||||
|
|
||||||
|
Core::Frontend::EmuWindow& emu_window;
|
||||||
const Device& device;
|
const Device& device;
|
||||||
|
|
||||||
std::array<Shader, Maxwell::MaxShaderProgram> last_shaders;
|
|
||||||
|
|
||||||
ShaderDiskCacheOpenGL disk_cache;
|
ShaderDiskCacheOpenGL disk_cache;
|
||||||
|
|
||||||
PrecompiledShaders precompiled_shaders;
|
PrecompiledShaders precompiled_shaders;
|
||||||
PrecompiledPrograms precompiled_programs;
|
PrecompiledPrograms precompiled_programs;
|
||||||
|
std::array<Shader, Maxwell::MaxShaderProgram> last_shaders;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace OpenGL
|
} // namespace OpenGL
|
||||||
|
|
|
@ -183,8 +183,7 @@ ShaderDiskCacheOpenGL::LoadTransferable() {
|
||||||
return {{raws, usages}};
|
return {{raws, usages}};
|
||||||
}
|
}
|
||||||
|
|
||||||
std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>,
|
std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>, ShaderDumpsMap>
|
||||||
std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>>
|
|
||||||
ShaderDiskCacheOpenGL::LoadPrecompiled() {
|
ShaderDiskCacheOpenGL::LoadPrecompiled() {
|
||||||
if (!IsUsable())
|
if (!IsUsable())
|
||||||
return {};
|
return {};
|
||||||
|
@ -208,8 +207,7 @@ ShaderDiskCacheOpenGL::LoadPrecompiled() {
|
||||||
return *result;
|
return *result;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::optional<std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>,
|
std::optional<std::pair<std::unordered_map<u64, ShaderDiskCacheDecompiled>, ShaderDumpsMap>>
|
||||||
std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>>>
|
|
||||||
ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) {
|
ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) {
|
||||||
// Read compressed file from disk and decompress to virtual precompiled cache file
|
// Read compressed file from disk and decompress to virtual precompiled cache file
|
||||||
std::vector<u8> compressed(file.GetSize());
|
std::vector<u8> compressed(file.GetSize());
|
||||||
|
@ -230,7 +228,7 @@ ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) {
|
||||||
}
|
}
|
||||||
|
|
||||||
std::unordered_map<u64, ShaderDiskCacheDecompiled> decompiled;
|
std::unordered_map<u64, ShaderDiskCacheDecompiled> decompiled;
|
||||||
std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump> dumps;
|
ShaderDumpsMap dumps;
|
||||||
while (precompiled_cache_virtual_file_offset < precompiled_cache_virtual_file.GetSize()) {
|
while (precompiled_cache_virtual_file_offset < precompiled_cache_virtual_file.GetSize()) {
|
||||||
PrecompiledEntryKind kind{};
|
PrecompiledEntryKind kind{};
|
||||||
if (!LoadObjectFromPrecompiled(kind)) {
|
if (!LoadObjectFromPrecompiled(kind)) {
|
||||||
|
|
|
@ -33,6 +33,11 @@ namespace OpenGL {
|
||||||
using ProgramCode = std::vector<u64>;
|
using ProgramCode = std::vector<u64>;
|
||||||
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
using Maxwell = Tegra::Engines::Maxwell3D::Regs;
|
||||||
|
|
||||||
|
struct ShaderDiskCacheUsage;
|
||||||
|
struct ShaderDiskCacheDump;
|
||||||
|
|
||||||
|
using ShaderDumpsMap = std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>;
|
||||||
|
|
||||||
/// Allocated bindings used by an OpenGL shader program
|
/// Allocated bindings used by an OpenGL shader program
|
||||||
struct BaseBindings {
|
struct BaseBindings {
|
||||||
u32 cbuf{};
|
u32 cbuf{};
|
||||||
|
|
|
@ -97,8 +97,8 @@ static std::array<GLfloat, 3 * 2> MakeOrthographicMatrix(const float width, cons
|
||||||
return matrix;
|
return matrix;
|
||||||
}
|
}
|
||||||
|
|
||||||
RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& window, Core::System& system)
|
RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system)
|
||||||
: VideoCore::RendererBase{window}, system{system} {}
|
: VideoCore::RendererBase{emu_window}, emu_window{emu_window}, system{system} {}
|
||||||
|
|
||||||
RendererOpenGL::~RendererOpenGL() = default;
|
RendererOpenGL::~RendererOpenGL() = default;
|
||||||
|
|
||||||
|
@ -265,7 +265,7 @@ void RendererOpenGL::CreateRasterizer() {
|
||||||
}
|
}
|
||||||
// Initialize sRGB Usage
|
// Initialize sRGB Usage
|
||||||
OpenGLState::ClearsRGBUsed();
|
OpenGLState::ClearsRGBUsed();
|
||||||
rasterizer = std::make_unique<RasterizerOpenGL>(system, screen_info);
|
rasterizer = std::make_unique<RasterizerOpenGL>(system, emu_window, screen_info);
|
||||||
}
|
}
|
||||||
|
|
||||||
void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
|
void RendererOpenGL::ConfigureFramebufferTexture(TextureInfo& texture,
|
||||||
|
|
|
@ -45,7 +45,7 @@ struct ScreenInfo {
|
||||||
|
|
||||||
class RendererOpenGL : public VideoCore::RendererBase {
|
class RendererOpenGL : public VideoCore::RendererBase {
|
||||||
public:
|
public:
|
||||||
explicit RendererOpenGL(Core::Frontend::EmuWindow& window, Core::System& system);
|
explicit RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::System& system);
|
||||||
~RendererOpenGL() override;
|
~RendererOpenGL() override;
|
||||||
|
|
||||||
/// Swap buffers (render frame)
|
/// Swap buffers (render frame)
|
||||||
|
@ -77,6 +77,7 @@ private:
|
||||||
void LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, u8 color_a,
|
void LoadColorToActiveGLTexture(u8 color_r, u8 color_g, u8 color_b, u8 color_a,
|
||||||
const TextureInfo& texture);
|
const TextureInfo& texture);
|
||||||
|
|
||||||
|
Core::Frontend::EmuWindow& emu_window;
|
||||||
Core::System& system;
|
Core::System& system;
|
||||||
|
|
||||||
OpenGLState state;
|
OpenGLState state;
|
||||||
|
|
|
@ -91,25 +91,25 @@ void EmuThread::run() {
|
||||||
|
|
||||||
class GGLContext : public Core::Frontend::GraphicsContext {
|
class GGLContext : public Core::Frontend::GraphicsContext {
|
||||||
public:
|
public:
|
||||||
explicit GGLContext(QOpenGLContext* shared_context)
|
explicit GGLContext(QOpenGLContext* shared_context) : shared_context{shared_context} {
|
||||||
: context{std::make_unique<QOpenGLContext>(shared_context)} {
|
context.setFormat(shared_context->format());
|
||||||
surface.setFormat(shared_context->format());
|
context.setShareContext(shared_context);
|
||||||
surface.create();
|
context.create();
|
||||||
}
|
}
|
||||||
|
|
||||||
void MakeCurrent() override {
|
void MakeCurrent() override {
|
||||||
context->makeCurrent(&surface);
|
context.makeCurrent(shared_context->surface());
|
||||||
}
|
}
|
||||||
|
|
||||||
void DoneCurrent() override {
|
void DoneCurrent() override {
|
||||||
context->doneCurrent();
|
context.doneCurrent();
|
||||||
}
|
}
|
||||||
|
|
||||||
void SwapBuffers() override {}
|
void SwapBuffers() override {}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
std::unique_ptr<QOpenGLContext> context;
|
QOpenGLContext* shared_context;
|
||||||
QOffscreenSurface surface;
|
QOpenGLContext context;
|
||||||
};
|
};
|
||||||
|
|
||||||
// This class overrides paintEvent and resizeEvent to prevent the GUI thread from stealing GL
|
// This class overrides paintEvent and resizeEvent to prevent the GUI thread from stealing GL
|
||||||
|
@ -358,7 +358,7 @@ void GRenderWindow::OnClientAreaResized(unsigned width, unsigned height) {
|
||||||
}
|
}
|
||||||
|
|
||||||
std::unique_ptr<Core::Frontend::GraphicsContext> GRenderWindow::CreateSharedContext() const {
|
std::unique_ptr<Core::Frontend::GraphicsContext> GRenderWindow::CreateSharedContext() const {
|
||||||
return std::make_unique<GGLContext>(shared_context.get());
|
return std::make_unique<GGLContext>(context.get());
|
||||||
}
|
}
|
||||||
|
|
||||||
void GRenderWindow::InitRenderTarget() {
|
void GRenderWindow::InitRenderTarget() {
|
||||||
|
|
Loading…
Reference in a new issue