gl_shader_cache: Remove dynamic BaseBinding specialization

This commit is contained in:
ReinUsesLisp 2019-11-18 21:38:15 -03:00
parent c8a48aacc0
commit 180417c514
No known key found for this signature in database
GPG key ID: 2DFC508897B39CFE
16 changed files with 200 additions and 192 deletions

View file

@ -63,7 +63,6 @@ public:
static constexpr std::size_t NumVertexArrays = 32; static constexpr std::size_t NumVertexArrays = 32;
static constexpr std::size_t NumVertexAttributes = 32; static constexpr std::size_t NumVertexAttributes = 32;
static constexpr std::size_t NumVaryings = 31; static constexpr std::size_t NumVaryings = 31;
static constexpr std::size_t NumTextureSamplers = 32;
static constexpr std::size_t NumImages = 8; // TODO(Rodrigo): Investigate this number static constexpr std::size_t NumImages = 8; // TODO(Rodrigo): Investigate this number
static constexpr std::size_t NumClipDistances = 8; static constexpr std::size_t NumClipDistances = 8;
static constexpr std::size_t MaxShaderProgram = 6; static constexpr std::size_t MaxShaderProgram = 6;

View file

@ -16,5 +16,6 @@ enum class ShaderType : u32 {
Fragment = 4, Fragment = 4,
Compute = 5, Compute = 5,
}; };
static constexpr std::size_t MaxShaderTypes = 6;
} // namespace Tegra::Engines } // namespace Tegra::Engines

View file

@ -17,6 +17,9 @@ namespace OpenGL {
namespace { namespace {
// One uniform block is reserved for emulation purposes
constexpr u32 ReservedUniformBlocks = 1;
template <typename T> template <typename T>
T GetInteger(GLenum pname) { T GetInteger(GLenum pname) {
GLint temporary; GLint temporary;
@ -48,6 +51,22 @@ bool HasExtension(const std::vector<std::string_view>& images, std::string_view
return std::find(images.begin(), images.end(), extension) != images.end(); return std::find(images.begin(), images.end(), extension) != images.end();
} }
constexpr Device::BaseBindings operator+(Device::BaseBindings lhs, Device::BaseBindings rhs) {
return Device::BaseBindings{lhs.uniform_buffer + rhs.uniform_buffer,
lhs.shader_storage_buffer + rhs.shader_storage_buffer,
lhs.sampler + rhs.sampler, lhs.image + rhs.image};
}
Device::BaseBindings BuildBaseBindings(GLenum uniform_blocks, GLenum shader_storage_blocks,
GLenum texture_image_units, GLenum image_uniforms) noexcept {
return Device::BaseBindings{
GetInteger<u32>(uniform_blocks) - ReservedUniformBlocks,
GetInteger<u32>(shader_storage_blocks),
GetInteger<u32>(texture_image_units),
GetInteger<u32>(image_uniforms),
};
}
} // Anonymous namespace } // Anonymous namespace
Device::Device() { Device::Device() {
@ -56,6 +75,29 @@ Device::Device() {
const bool is_nvidia = vendor == "NVIDIA Corporation"; const bool is_nvidia = vendor == "NVIDIA Corporation";
// Reserve the first UBO for emulation bindings
base_bindings[0] = BaseBindings{ReservedUniformBlocks, 0, 0, 0};
base_bindings[1] = base_bindings[0] + BuildBaseBindings(GL_MAX_VERTEX_UNIFORM_BLOCKS,
GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS,
GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS,
GL_MAX_VERTEX_IMAGE_UNIFORMS);
base_bindings[2] =
base_bindings[1] + BuildBaseBindings(GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS,
GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS,
GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS,
GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS);
base_bindings[3] =
base_bindings[2] + BuildBaseBindings(GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS,
GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS,
GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS,
GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS);
base_bindings[4] = base_bindings[3] + BuildBaseBindings(GL_MAX_GEOMETRY_UNIFORM_BLOCKS,
GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS,
GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS,
GL_MAX_GEOMETRY_IMAGE_UNIFORMS);
// Compute doesn't need any of that
base_bindings[5] = BaseBindings{0, 0, 0, 0};
uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); uniform_buffer_alignment = GetInteger<std::size_t>(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT);
shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); shader_storage_alignment = GetInteger<std::size_t>(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT);
max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS); max_vertex_attributes = GetInteger<u32>(GL_MAX_VERTEX_ATTRIBS);

View file

@ -6,14 +6,32 @@
#include <cstddef> #include <cstddef>
#include "common/common_types.h" #include "common/common_types.h"
#include "video_core/engines/shader_type.h"
namespace OpenGL { namespace OpenGL {
class Device { static constexpr u32 EmulationUniformBlockBinding = 0;
class Device final {
public: public:
struct BaseBindings final {
u32 uniform_buffer{};
u32 shader_storage_buffer{};
u32 sampler{};
u32 image{};
};
explicit Device(); explicit Device();
explicit Device(std::nullptr_t); explicit Device(std::nullptr_t);
const BaseBindings& GetBaseBindings(std::size_t stage_index) const noexcept {
return base_bindings[stage_index];
}
const BaseBindings& GetBaseBindings(Tegra::Engines::ShaderType shader_type) const noexcept {
return GetBaseBindings(static_cast<std::size_t>(shader_type));
}
std::size_t GetUniformBufferAlignment() const { std::size_t GetUniformBufferAlignment() const {
return uniform_buffer_alignment; return uniform_buffer_alignment;
} }
@ -67,6 +85,7 @@ private:
static bool TestComponentIndexingBug(); static bool TestComponentIndexingBug();
static bool TestPreciseBug(); static bool TestPreciseBug();
std::array<BaseBindings, Tegra::Engines::MaxShaderTypes> base_bindings;
std::size_t uniform_buffer_alignment{}; std::size_t uniform_buffer_alignment{};
std::size_t shader_storage_alignment{}; std::size_t shader_storage_alignment{};
u32 max_vertex_attributes{}; u32 max_vertex_attributes{};

View file

@ -258,7 +258,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
MICROPROFILE_SCOPE(OpenGL_Shader); MICROPROFILE_SCOPE(OpenGL_Shader);
auto& gpu = system.GPU().Maxwell3D(); auto& gpu = system.GPU().Maxwell3D();
BaseBindings base_bindings;
std::array<bool, Maxwell::NumClipDistances> clip_distances{}; std::array<bool, Maxwell::NumClipDistances> clip_distances{};
for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) {
@ -277,25 +276,17 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
continue; continue;
} }
GLShader::MaxwellUniformData ubo{};
ubo.SetFromRegs(gpu);
const auto [buffer, offset] =
buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment());
// Bind the emulation info buffer
bind_ubo_pushbuffer.Push(buffer, offset, static_cast<GLsizeiptr>(sizeof(ubo)));
Shader shader{shader_cache.GetStageProgram(program)}; Shader shader{shader_cache.GetStageProgram(program)};
// Stage indices are 0 - 5 // Stage indices are 0 - 5
const std::size_t stage = index == 0 ? 0 : index - 1; const std::size_t stage = index == 0 ? 0 : index - 1;
SetupDrawConstBuffers(stage, shader); SetupDrawConstBuffers(stage, shader);
SetupDrawGlobalMemory(stage, shader); SetupDrawGlobalMemory(stage, shader);
SetupDrawTextures(stage, shader, base_bindings); SetupDrawTextures(stage, shader);
SetupDrawImages(stage, shader, base_bindings); SetupDrawImages(stage, shader);
const ProgramVariant variant(base_bindings, primitive_mode); const ProgramVariant variant(primitive_mode);
const auto [program_handle, next_bindings] = shader->GetHandle(variant); const auto program_handle = shader->GetHandle(variant);
switch (program) { switch (program) {
case Maxwell::ShaderProgram::VertexA: case Maxwell::ShaderProgram::VertexA:
@ -326,8 +317,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
// VertexB was combined with VertexA, so we skip the VertexB iteration // VertexB was combined with VertexA, so we skip the VertexB iteration
++index; ++index;
} }
base_bindings = next_bindings;
} }
SyncClipEnabled(clip_distances); SyncClipEnabled(clip_distances);
@ -612,8 +601,16 @@ void RasterizerOpenGL::DrawPrelude() {
index_buffer_offset = SetupIndexBuffer(); index_buffer_offset = SetupIndexBuffer();
// Prepare packed bindings. // Prepare packed bindings.
bind_ubo_pushbuffer.Setup(0); bind_ubo_pushbuffer.Setup();
bind_ssbo_pushbuffer.Setup(0); bind_ssbo_pushbuffer.Setup();
// Setup emulation uniform buffer.
GLShader::MaxwellUniformData ubo;
ubo.SetFromRegs(gpu);
const auto [buffer, offset] =
buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment());
bind_ubo_pushbuffer.Push(EmulationUniformBlockBinding, buffer, offset,
static_cast<GLsizeiptr>(sizeof(ubo)));
// Setup shaders and their used resources. // Setup shaders and their used resources.
texture_cache.GuardSamplers(true); texture_cache.GuardSamplers(true);
@ -754,7 +751,7 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
const ProgramVariant variant(launch_desc.block_dim_x, launch_desc.block_dim_y, const ProgramVariant variant(launch_desc.block_dim_x, launch_desc.block_dim_y,
launch_desc.block_dim_z, launch_desc.shared_alloc, launch_desc.block_dim_z, launch_desc.shared_alloc,
launch_desc.local_pos_alloc); launch_desc.local_pos_alloc);
std::tie(state.draw.shader_program, std::ignore) = kernel->GetHandle(variant); state.draw.shader_program = kernel->GetHandle(variant);
state.draw.program_pipeline = 0; state.draw.program_pipeline = 0;
const std::size_t buffer_size = const std::size_t buffer_size =
@ -762,8 +759,8 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
(Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment()); (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
buffer_cache.Map(buffer_size); buffer_cache.Map(buffer_size);
bind_ubo_pushbuffer.Setup(0); bind_ubo_pushbuffer.Setup();
bind_ssbo_pushbuffer.Setup(0); bind_ssbo_pushbuffer.Setup();
SetupComputeConstBuffers(kernel); SetupComputeConstBuffers(kernel);
SetupComputeGlobalMemory(kernel); SetupComputeGlobalMemory(kernel);
@ -847,7 +844,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
ASSERT_MSG(params.height == config.height, "Framebuffer height is different"); ASSERT_MSG(params.height == config.height, "Framebuffer height is different");
if (params.pixel_format != pixel_format) { if (params.pixel_format != pixel_format) {
LOG_WARNING(Render_OpenGL, "Framebuffer pixel_format is different"); LOG_DEBUG(Render_OpenGL, "Framebuffer pixel_format is different");
} }
screen_info.display_texture = surface->GetTexture(); screen_info.display_texture = surface->GetTexture();
@ -858,17 +855,21 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config,
void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, const Shader& shader) { void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, const Shader& shader) {
MICROPROFILE_SCOPE(OpenGL_UBO); MICROPROFILE_SCOPE(OpenGL_UBO);
const u32 base_binding = device.GetBaseBindings(stage_index).uniform_buffer;
const auto& stages = system.GPU().Maxwell3D().state.shader_stages; const auto& stages = system.GPU().Maxwell3D().state.shader_stages;
const auto& shader_stage = stages[stage_index]; const auto& shader_stage = stages[stage_index];
for (const auto& entry : shader->GetShaderEntries().const_buffers) { for (const auto& entry : shader->GetShaderEntries().const_buffers) {
const auto& buffer = shader_stage.const_buffers[entry.GetIndex()]; const auto& buffer = shader_stage.const_buffers[entry.GetIndex()];
SetupConstBuffer(buffer, entry); SetupConstBuffer(base_binding + entry.GetIndex(), buffer, entry);
} }
} }
void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) { void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) {
MICROPROFILE_SCOPE(OpenGL_UBO); MICROPROFILE_SCOPE(OpenGL_UBO);
const auto& launch_desc = system.GPU().KeplerCompute().launch_description; const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
u32 binding = 0;
for (const auto& entry : kernel->GetShaderEntries().const_buffers) { for (const auto& entry : kernel->GetShaderEntries().const_buffers) {
const auto& config = launch_desc.const_buffer_config[entry.GetIndex()]; const auto& config = launch_desc.const_buffer_config[entry.GetIndex()];
const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value(); const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value();
@ -876,15 +877,16 @@ void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) {
buffer.address = config.Address(); buffer.address = config.Address();
buffer.size = config.size; buffer.size = config.size;
buffer.enabled = mask[entry.GetIndex()]; buffer.enabled = mask[entry.GetIndex()];
SetupConstBuffer(buffer, entry); SetupConstBuffer(binding++, buffer, entry);
} }
} }
void RasterizerOpenGL::SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& buffer, void RasterizerOpenGL::SetupConstBuffer(u32 binding, const Tegra::Engines::ConstBufferInfo& buffer,
const GLShader::ConstBufferEntry& entry) { const GLShader::ConstBufferEntry& entry) {
if (!buffer.enabled) { if (!buffer.enabled) {
// Set values to zero to unbind buffers // Set values to zero to unbind buffers
bind_ubo_pushbuffer.Push(buffer_cache.GetEmptyBuffer(sizeof(float)), 0, sizeof(float)); bind_ubo_pushbuffer.Push(binding, buffer_cache.GetEmptyBuffer(sizeof(float)), 0,
sizeof(float));
return; return;
} }
@ -895,18 +897,20 @@ void RasterizerOpenGL::SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& b
const auto alignment = device.GetUniformBufferAlignment(); const auto alignment = device.GetUniformBufferAlignment();
const auto [cbuf, offset] = buffer_cache.UploadMemory(buffer.address, size, alignment, false, const auto [cbuf, offset] = buffer_cache.UploadMemory(buffer.address, size, alignment, false,
device.HasFastBufferSubData()); device.HasFastBufferSubData());
bind_ubo_pushbuffer.Push(cbuf, offset, size); bind_ubo_pushbuffer.Push(binding, cbuf, offset, size);
} }
void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader) { void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader) {
auto& gpu{system.GPU()}; auto& gpu{system.GPU()};
auto& memory_manager{gpu.MemoryManager()}; auto& memory_manager{gpu.MemoryManager()};
const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage_index]}; const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage_index]};
u32 binding = device.GetBaseBindings(stage_index).shader_storage_buffer;
for (const auto& entry : shader->GetShaderEntries().global_memory_entries) { for (const auto& entry : shader->GetShaderEntries().global_memory_entries) {
const auto addr{cbufs.const_buffers[entry.GetCbufIndex()].address + entry.GetCbufOffset()}; const auto addr{cbufs.const_buffers[entry.GetCbufIndex()].address + entry.GetCbufOffset()};
const auto gpu_addr{memory_manager.Read<u64>(addr)}; const auto gpu_addr{memory_manager.Read<u64>(addr)};
const auto size{memory_manager.Read<u32>(addr + 8)}; const auto size{memory_manager.Read<u32>(addr + 8)};
SetupGlobalMemory(entry, gpu_addr, size); SetupGlobalMemory(binding++, entry, gpu_addr, size);
} }
} }
@ -914,38 +918,35 @@ void RasterizerOpenGL::SetupComputeGlobalMemory(const Shader& kernel) {
auto& gpu{system.GPU()}; auto& gpu{system.GPU()};
auto& memory_manager{gpu.MemoryManager()}; auto& memory_manager{gpu.MemoryManager()};
const auto cbufs{gpu.KeplerCompute().launch_description.const_buffer_config}; const auto cbufs{gpu.KeplerCompute().launch_description.const_buffer_config};
u32 binding = 0;
for (const auto& entry : kernel->GetShaderEntries().global_memory_entries) { for (const auto& entry : kernel->GetShaderEntries().global_memory_entries) {
const auto addr{cbufs[entry.GetCbufIndex()].Address() + entry.GetCbufOffset()}; const auto addr{cbufs[entry.GetCbufIndex()].Address() + entry.GetCbufOffset()};
const auto gpu_addr{memory_manager.Read<u64>(addr)}; const auto gpu_addr{memory_manager.Read<u64>(addr)};
const auto size{memory_manager.Read<u32>(addr + 8)}; const auto size{memory_manager.Read<u32>(addr + 8)};
SetupGlobalMemory(entry, gpu_addr, size); SetupGlobalMemory(binding++, entry, gpu_addr, size);
} }
} }
void RasterizerOpenGL::SetupGlobalMemory(const GLShader::GlobalMemoryEntry& entry, void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GLShader::GlobalMemoryEntry& entry,
GPUVAddr gpu_addr, std::size_t size) { GPUVAddr gpu_addr, std::size_t size) {
const auto alignment{device.GetShaderStorageBufferAlignment()}; const auto alignment{device.GetShaderStorageBufferAlignment()};
const auto [ssbo, buffer_offset] = const auto [ssbo, buffer_offset] =
buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.IsWritten()); buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.IsWritten());
bind_ssbo_pushbuffer.Push(ssbo, buffer_offset, static_cast<GLsizeiptr>(size)); bind_ssbo_pushbuffer.Push(binding, ssbo, buffer_offset, static_cast<GLsizeiptr>(size));
} }
void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, const Shader& shader, void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, const Shader& shader) {
BaseBindings base_bindings) {
MICROPROFILE_SCOPE(OpenGL_Texture); MICROPROFILE_SCOPE(OpenGL_Texture);
const auto& gpu = system.GPU(); const auto& gpu = system.GPU();
const auto& maxwell3d = gpu.Maxwell3D(); const auto& maxwell3d = gpu.Maxwell3D();
const auto& entries = shader->GetShaderEntries().samplers; const auto& entries = shader->GetShaderEntries().samplers;
ASSERT_MSG(base_bindings.sampler + entries.size() <= std::size(state.textures), u32 binding = device.GetBaseBindings(stage_index).sampler;
"Exceeded the number of active textures."); for (const auto& entry : entries) {
const auto num_entries = static_cast<u32>(entries.size());
for (u32 bindpoint = 0; bindpoint < num_entries; ++bindpoint) {
const auto& entry = entries[bindpoint];
const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage_index); const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage_index);
const auto texture = GetTextureInfo(maxwell3d, entry, shader_type); const auto texture = GetTextureInfo(maxwell3d, entry, shader_type);
SetupTexture(base_bindings.sampler + bindpoint, texture, entry); SetupTexture(binding++, texture, entry);
} }
} }
@ -954,14 +955,10 @@ void RasterizerOpenGL::SetupComputeTextures(const Shader& kernel) {
const auto& compute = system.GPU().KeplerCompute(); const auto& compute = system.GPU().KeplerCompute();
const auto& entries = kernel->GetShaderEntries().samplers; const auto& entries = kernel->GetShaderEntries().samplers;
ASSERT_MSG(entries.size() <= std::size(state.textures), u32 binding = 0;
"Exceeded the number of active textures."); for (const auto& entry : entries) {
const auto num_entries = static_cast<u32>(entries.size());
for (u32 bindpoint = 0; bindpoint < num_entries; ++bindpoint) {
const auto& entry = entries[bindpoint];
const auto texture = GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute); const auto texture = GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute);
SetupTexture(bindpoint, texture, entry); SetupTexture(binding++, texture, entry);
} }
} }
@ -986,8 +983,7 @@ void RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextu
texture.tic.w_source); texture.tic.w_source);
} }
void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& shader, void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& shader) {
BaseBindings base_bindings) {
const auto& maxwell3d = system.GPU().Maxwell3D(); const auto& maxwell3d = system.GPU().Maxwell3D();
const auto& entries = shader->GetShaderEntries().images; const auto& entries = shader->GetShaderEntries().images;
@ -996,7 +992,7 @@ void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& sh
const auto& entry = entries[bindpoint]; const auto& entry = entries[bindpoint];
const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage_index); const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage_index);
const auto tic = GetTextureInfo(maxwell3d, entry, shader_type).tic; const auto tic = GetTextureInfo(maxwell3d, entry, shader_type).tic;
SetupImage(base_bindings.image + bindpoint, tic, entry); SetupImage(bindpoint, tic, entry);
} }
} }

View file

@ -89,7 +89,7 @@ private:
void SetupComputeConstBuffers(const Shader& kernel); void SetupComputeConstBuffers(const Shader& kernel);
/// Configures a constant buffer. /// Configures a constant buffer.
void SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& buffer, void SetupConstBuffer(u32 binding, const Tegra::Engines::ConstBufferInfo& buffer,
const GLShader::ConstBufferEntry& entry); const GLShader::ConstBufferEntry& entry);
/// Configures the current global memory entries to use for the draw command. /// Configures the current global memory entries to use for the draw command.
@ -99,15 +99,14 @@ private:
void SetupComputeGlobalMemory(const Shader& kernel); void SetupComputeGlobalMemory(const Shader& kernel);
/// Configures a constant buffer. /// Configures a constant buffer.
void SetupGlobalMemory(const GLShader::GlobalMemoryEntry& entry, GPUVAddr gpu_addr, void SetupGlobalMemory(u32 binding, const GLShader::GlobalMemoryEntry& entry, GPUVAddr gpu_addr,
std::size_t size); std::size_t size);
/// Syncs all the state, shaders, render targets and textures setting before a draw call. /// Syncs all the state, shaders, render targets and textures setting before a draw call.
void DrawPrelude(); void DrawPrelude();
/// Configures the current textures to use for the draw command. /// Configures the current textures to use for the draw command.
void SetupDrawTextures(std::size_t stage_index, const Shader& shader, void SetupDrawTextures(std::size_t stage_index, const Shader& shader);
BaseBindings base_bindings);
/// Configures the textures used in a compute shader. /// Configures the textures used in a compute shader.
void SetupComputeTextures(const Shader& kernel); void SetupComputeTextures(const Shader& kernel);
@ -117,7 +116,7 @@ private:
const GLShader::SamplerEntry& entry); const GLShader::SamplerEntry& entry);
/// Configures images in a graphics shader. /// Configures images in a graphics shader.
void SetupDrawImages(std::size_t stage_index, const Shader& shader, BaseBindings base_bindings); void SetupDrawImages(std::size_t stage_index, const Shader& shader);
/// Configures images in a compute shader. /// Configures images in a compute shader.
void SetupComputeImages(const Shader& shader); void SetupComputeImages(const Shader& shader);

View file

@ -266,28 +266,6 @@ CachedProgram BuildShader(const Device& device, u64 unique_identifier, ShaderTyp
} }
source += '\n'; source += '\n';
auto base_bindings = variant.base_bindings;
if (!is_compute) {
source += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++);
}
for (const auto& cbuf : entries.const_buffers) {
source +=
fmt::format("#define CBUF_BINDING_{} {}\n", cbuf.GetIndex(), base_bindings.cbuf++);
}
for (const auto& gmem : entries.global_memory_entries) {
source += fmt::format("#define GMEM_BINDING_{}_{} {}\n", gmem.GetCbufIndex(),
gmem.GetCbufOffset(), base_bindings.gmem++);
}
for (const auto& sampler : entries.samplers) {
source += fmt::format("#define SAMPLER_BINDING_{} {}\n", sampler.GetIndex(),
base_bindings.sampler++);
}
for (const auto& image : entries.images) {
source +=
fmt::format("#define IMAGE_BINDING_{} {}\n", image.GetIndex(), base_bindings.image++);
}
if (shader_type == ShaderType::Geometry) { if (shader_type == ShaderType::Geometry) {
const auto [glsl_topology, debug_name, max_vertices] = const auto [glsl_topology, debug_name, max_vertices] =
GetPrimitiveDescription(variant.primitive_mode); GetPrimitiveDescription(variant.primitive_mode);
@ -403,27 +381,21 @@ Shader CachedShader::CreateFromCache(const ShaderParameters& params,
unspecialized.code_b)); unspecialized.code_b));
} }
std::tuple<GLuint, BaseBindings> CachedShader::GetHandle(const ProgramVariant& variant) { GLuint CachedShader::GetHandle(const ProgramVariant& variant) {
EnsureValidLockerVariant(); EnsureValidLockerVariant();
const auto [entry, is_cache_miss] = curr_locker_variant->programs.try_emplace(variant); const auto [entry, is_cache_miss] = curr_locker_variant->programs.try_emplace(variant);
auto& program = entry->second; auto& program = entry->second;
if (is_cache_miss) { if (!is_cache_miss) {
program = BuildShader(device, unique_identifier, shader_type, code, code_b, return program->handle;
*curr_locker_variant->locker, variant);
disk_cache.SaveUsage(GetUsage(variant, *curr_locker_variant->locker));
LabelGLObject(GL_PROGRAM, program->handle, cpu_addr);
} }
auto base_bindings = variant.base_bindings; program = BuildShader(device, unique_identifier, shader_type, code, code_b,
base_bindings.cbuf += static_cast<u32>(entries.const_buffers.size()); *curr_locker_variant->locker, variant);
base_bindings.cbuf += STAGE_RESERVED_UBOS; disk_cache.SaveUsage(GetUsage(variant, *curr_locker_variant->locker));
base_bindings.gmem += static_cast<u32>(entries.global_memory_entries.size());
base_bindings.sampler += static_cast<u32>(entries.samplers.size());
base_bindings.image += static_cast<u32>(entries.images.size());
return {program->handle, base_bindings}; LabelGLObject(GL_PROGRAM, program->handle, cpu_addr);
return program->handle;
} }
bool CachedShader::EnsureValidLockerVariant() { bool CachedShader::EnsureValidLockerVariant() {

View file

@ -87,7 +87,7 @@ public:
} }
/// Gets the GL program handle for the shader /// Gets the GL program handle for the shader
std::tuple<GLuint, BaseBindings> GetHandle(const ProgramVariant& variant); GLuint GetHandle(const ProgramVariant& variant);
private: private:
struct LockerVariant { struct LockerVariant {

View file

@ -43,6 +43,9 @@ using namespace VideoCommon::Shader;
using Maxwell = Tegra::Engines::Maxwell3D::Regs; using Maxwell = Tegra::Engines::Maxwell3D::Regs;
using Operation = const OperationNode&; using Operation = const OperationNode&;
class ASTDecompiler;
class ExprDecompiler;
enum class Type { Void, Bool, Bool2, Float, Int, Uint, HalfFloat }; enum class Type { Void, Bool, Bool2, Float, Int, Uint, HalfFloat };
struct TextureAoffi {}; struct TextureAoffi {};
@ -337,9 +340,6 @@ std::string FlowStackTopName(MetaStackClass stack) {
return stage == ShaderType::Vertex; return stage == ShaderType::Vertex;
} }
class ASTDecompiler;
class ExprDecompiler;
class GLSLDecompiler final { class GLSLDecompiler final {
public: public:
explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, ShaderType stage, explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, ShaderType stage,
@ -621,7 +621,8 @@ private:
void DeclareConstantBuffers() { void DeclareConstantBuffers() {
for (const auto& entry : ir.GetConstantBuffers()) { for (const auto& entry : ir.GetConstantBuffers()) {
const auto [index, size] = entry; const auto [index, size] = entry;
code.AddLine("layout (std140, binding = CBUF_BINDING_{}) uniform {} {{", index, const u32 binding = device.GetBaseBindings(stage).uniform_buffer + index;
code.AddLine("layout (std140, binding = {}) uniform {} {{", binding,
GetConstBufferBlock(index)); GetConstBufferBlock(index));
code.AddLine(" uvec4 {}[{}];", GetConstBuffer(index), MAX_CONSTBUFFER_ELEMENTS); code.AddLine(" uvec4 {}[{}];", GetConstBuffer(index), MAX_CONSTBUFFER_ELEMENTS);
code.AddLine("}};"); code.AddLine("}};");
@ -630,6 +631,8 @@ private:
} }
void DeclareGlobalMemory() { void DeclareGlobalMemory() {
u32 binding = device.GetBaseBindings(stage).shader_storage_buffer;
for (const auto& gmem : ir.GetGlobalMemory()) { for (const auto& gmem : ir.GetGlobalMemory()) {
const auto& [base, usage] = gmem; const auto& [base, usage] = gmem;
@ -642,8 +645,8 @@ private:
qualifier += " writeonly"; qualifier += " writeonly";
} }
code.AddLine("layout (std430, binding = GMEM_BINDING_{}_{}) {} buffer {} {{", code.AddLine("layout (std430, binding = {}) {} buffer {} {{", binding++, qualifier,
base.cbuf_index, base.cbuf_offset, qualifier, GetGlobalMemoryBlock(base)); GetGlobalMemoryBlock(base));
code.AddLine(" uint {}[];", GetGlobalMemory(base)); code.AddLine(" uint {}[];", GetGlobalMemory(base));
code.AddLine("}};"); code.AddLine("}};");
code.AddNewLine(); code.AddNewLine();
@ -653,9 +656,11 @@ private:
void DeclareSamplers() { void DeclareSamplers() {
const auto& samplers = ir.GetSamplers(); const auto& samplers = ir.GetSamplers();
for (const auto& sampler : samplers) { for (const auto& sampler : samplers) {
const std::string name{GetSampler(sampler)}; const std::string name = GetSampler(sampler);
const std::string description{"layout (binding = SAMPLER_BINDING_" +
std::to_string(sampler.GetIndex()) + ") uniform"}; const u32 binding = device.GetBaseBindings(stage).sampler + sampler.GetIndex();
const std::string description = fmt::format("layout (binding = {}) uniform", binding);
std::string sampler_type = [&]() { std::string sampler_type = [&]() {
if (sampler.IsBuffer()) { if (sampler.IsBuffer()) {
return "samplerBuffer"; return "samplerBuffer";
@ -732,10 +737,12 @@ private:
qualifier += " writeonly"; qualifier += " writeonly";
} }
const u32 binding = device.GetBaseBindings(stage).image + image.GetIndex();
const char* format = image.IsAtomic() ? "r32ui, " : ""; const char* format = image.IsAtomic() ? "r32ui, " : "";
const char* type_declaration = GetImageTypeDeclaration(image.GetType()); const char* type_declaration = GetImageTypeDeclaration(image.GetType());
code.AddLine("layout ({}binding = IMAGE_BINDING_{}) {} uniform uimage{} {};", format, code.AddLine("layout ({}binding = {}) {} uniform uimage{} {};", format, binding,
image.GetIndex(), qualifier, type_declaration, GetImage(image)); qualifier, type_declaration, GetImage(image));
} }
if (!images.empty()) { if (!images.empty()) {
code.AddNewLine(); code.AddNewLine();

View file

@ -53,11 +53,10 @@ struct BindlessSamplerKey {
Tegra::Engines::SamplerDescriptor sampler{}; Tegra::Engines::SamplerDescriptor sampler{};
}; };
constexpr u32 NativeVersion = 10; constexpr u32 NativeVersion = 11;
// Making sure sizes doesn't change by accident // Making sure sizes doesn't change by accident
static_assert(sizeof(BaseBindings) == 16); static_assert(sizeof(ProgramVariant) == 20);
static_assert(sizeof(ProgramVariant) == 36);
ShaderCacheVersionHash GetShaderCacheVersionHash() { ShaderCacheVersionHash GetShaderCacheVersionHash() {
ShaderCacheVersionHash hash{}; ShaderCacheVersionHash hash{};

View file

@ -38,31 +38,13 @@ struct ShaderDiskCacheDump;
using ProgramCode = std::vector<u64>; using ProgramCode = std::vector<u64>;
using ShaderDumpsMap = std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>; using ShaderDumpsMap = std::unordered_map<ShaderDiskCacheUsage, ShaderDiskCacheDump>;
/// Allocated bindings used by an OpenGL shader program
struct BaseBindings {
u32 cbuf{};
u32 gmem{};
u32 sampler{};
u32 image{};
bool operator==(const BaseBindings& rhs) const noexcept {
return std::tie(cbuf, gmem, sampler, image) ==
std::tie(rhs.cbuf, rhs.gmem, rhs.sampler, rhs.image);
}
bool operator!=(const BaseBindings& rhs) const noexcept {
return !operator==(rhs);
}
};
static_assert(std::is_trivially_copyable_v<BaseBindings>);
/// Describes the different variants a program can be compiled with. /// Describes the different variants a program can be compiled with.
struct ProgramVariant final { struct ProgramVariant final {
ProgramVariant() = default; ProgramVariant() = default;
/// Graphics constructor. /// Graphics constructor.
explicit constexpr ProgramVariant(BaseBindings base_bindings, GLenum primitive_mode) noexcept explicit constexpr ProgramVariant(GLenum primitive_mode) noexcept
: base_bindings{base_bindings}, primitive_mode{primitive_mode} {} : primitive_mode{primitive_mode} {}
/// Compute constructor. /// Compute constructor.
explicit constexpr ProgramVariant(u32 block_x, u32 block_y, u32 block_z, u32 shared_memory_size, explicit constexpr ProgramVariant(u32 block_x, u32 block_y, u32 block_z, u32 shared_memory_size,
@ -71,7 +53,6 @@ struct ProgramVariant final {
shared_memory_size{shared_memory_size}, local_memory_size{local_memory_size} {} shared_memory_size{shared_memory_size}, local_memory_size{local_memory_size} {}
// Graphics specific parameters. // Graphics specific parameters.
BaseBindings base_bindings{};
GLenum primitive_mode{}; GLenum primitive_mode{};
// Compute specific parameters. // Compute specific parameters.
@ -82,10 +63,10 @@ struct ProgramVariant final {
u32 local_memory_size{}; u32 local_memory_size{};
bool operator==(const ProgramVariant& rhs) const noexcept { bool operator==(const ProgramVariant& rhs) const noexcept {
return std::tie(base_bindings, primitive_mode, block_x, block_y, block_z, return std::tie(primitive_mode, block_x, block_y, block_z, shared_memory_size,
shared_memory_size, local_memory_size) == local_memory_size) == std::tie(rhs.primitive_mode, rhs.block_x, rhs.block_y,
std::tie(rhs.base_bindings, rhs.primitive_mode, rhs.block_x, rhs.block_y, rhs.block_z, rhs.shared_memory_size,
rhs.block_z, rhs.shared_memory_size, rhs.local_memory_size); rhs.local_memory_size);
} }
bool operator!=(const ProgramVariant& rhs) const noexcept { bool operator!=(const ProgramVariant& rhs) const noexcept {
@ -117,21 +98,10 @@ struct ShaderDiskCacheUsage {
namespace std { namespace std {
template <>
struct hash<OpenGL::BaseBindings> {
std::size_t operator()(const OpenGL::BaseBindings& bindings) const noexcept {
return static_cast<std::size_t>(bindings.cbuf) ^
(static_cast<std::size_t>(bindings.gmem) << 8) ^
(static_cast<std::size_t>(bindings.sampler) << 16) ^
(static_cast<std::size_t>(bindings.image) << 24);
}
};
template <> template <>
struct hash<OpenGL::ProgramVariant> { struct hash<OpenGL::ProgramVariant> {
std::size_t operator()(const OpenGL::ProgramVariant& variant) const noexcept { std::size_t operator()(const OpenGL::ProgramVariant& variant) const noexcept {
return std::hash<OpenGL::BaseBindings>{}(variant.base_bindings) ^ return (static_cast<std::size_t>(variant.primitive_mode) << 6) ^
(static_cast<std::size_t>(variant.primitive_mode) << 6) ^
static_cast<std::size_t>(variant.block_x) ^ static_cast<std::size_t>(variant.block_x) ^
(static_cast<std::size_t>(variant.block_y) << 32) ^ (static_cast<std::size_t>(variant.block_y) << 32) ^
(static_cast<std::size_t>(variant.block_z) << 48) ^ (static_cast<std::size_t>(variant.block_z) << 48) ^

View file

@ -2,9 +2,13 @@
// Licensed under GPLv2 or any later version // Licensed under GPLv2 or any later version
// Refer to the license.txt file included. // Refer to the license.txt file included.
#include <string>
#include <fmt/format.h> #include <fmt/format.h>
#include "video_core/engines/maxwell_3d.h" #include "video_core/engines/maxwell_3d.h"
#include "video_core/engines/shader_type.h" #include "video_core/engines/shader_type.h"
#include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_shader_decompiler.h" #include "video_core/renderer_opengl/gl_shader_decompiler.h"
#include "video_core/renderer_opengl/gl_shader_gen.h" #include "video_core/renderer_opengl/gl_shader_gen.h"
#include "video_core/shader/shader_ir.h" #include "video_core/shader/shader_ir.h"
@ -20,12 +24,13 @@ using VideoCommon::Shader::ShaderIR;
std::string GenerateVertexShader(const Device& device, const ShaderIR& ir, const ShaderIR* ir_b) { std::string GenerateVertexShader(const Device& device, const ShaderIR& ir, const ShaderIR* ir_b) {
std::string out = GetCommonDeclarations(); std::string out = GetCommonDeclarations();
out += R"( out += fmt::format(R"(
layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config { layout (std140, binding = {}) uniform vs_config {{
float y_direction; float y_direction;
}; }};
)"; )",
EmulationUniformBlockBinding);
out += Decompile(device, ir, ShaderType::Vertex, "vertex"); out += Decompile(device, ir, ShaderType::Vertex, "vertex");
if (ir_b) { if (ir_b) {
out += Decompile(device, *ir_b, ShaderType::Vertex, "vertex_b"); out += Decompile(device, *ir_b, ShaderType::Vertex, "vertex_b");
@ -44,12 +49,13 @@ void main() {
std::string GenerateGeometryShader(const Device& device, const ShaderIR& ir) { std::string GenerateGeometryShader(const Device& device, const ShaderIR& ir) {
std::string out = GetCommonDeclarations(); std::string out = GetCommonDeclarations();
out += R"( out += fmt::format(R"(
layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config { layout (std140, binding = {}) uniform gs_config {{
float y_direction; float y_direction;
}; }};
)"; )",
EmulationUniformBlockBinding);
out += Decompile(device, ir, ShaderType::Geometry, "geometry"); out += Decompile(device, ir, ShaderType::Geometry, "geometry");
out += R"( out += R"(
@ -62,7 +68,7 @@ void main() {
std::string GenerateFragmentShader(const Device& device, const ShaderIR& ir) { std::string GenerateFragmentShader(const Device& device, const ShaderIR& ir) {
std::string out = GetCommonDeclarations(); std::string out = GetCommonDeclarations();
out += R"( out += fmt::format(R"(
layout (location = 0) out vec4 FragColor0; layout (location = 0) out vec4 FragColor0;
layout (location = 1) out vec4 FragColor1; layout (location = 1) out vec4 FragColor1;
layout (location = 2) out vec4 FragColor2; layout (location = 2) out vec4 FragColor2;
@ -72,11 +78,12 @@ layout (location = 5) out vec4 FragColor5;
layout (location = 6) out vec4 FragColor6; layout (location = 6) out vec4 FragColor6;
layout (location = 7) out vec4 FragColor7; layout (location = 7) out vec4 FragColor7;
layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config { layout (std140, binding = {}) uniform fs_config {{
float y_direction; float y_direction;
}; }};
)"; )",
EmulationUniformBlockBinding);
out += Decompile(device, ir, ShaderType::Fragment, "fragment"); out += Decompile(device, ir, ShaderType::Fragment, "fragment");
out += R"( out += R"(

View file

@ -417,14 +417,20 @@ void OpenGLState::ApplyClipControl() {
} }
void OpenGLState::ApplyTextures() { void OpenGLState::ApplyTextures() {
if (const auto update = UpdateArray(cur_state.textures, textures)) { const std::size_t size = std::size(textures);
glBindTextures(update->first, update->second, textures.data() + update->first); for (std::size_t i = 0; i < size; ++i) {
if (UpdateValue(cur_state.textures[i], textures[i])) {
glBindTextureUnit(static_cast<GLuint>(i), textures[i]);
}
} }
} }
void OpenGLState::ApplySamplers() { void OpenGLState::ApplySamplers() {
if (const auto update = UpdateArray(cur_state.samplers, samplers)) { const std::size_t size = std::size(samplers);
glBindSamplers(update->first, update->second, samplers.data() + update->first); for (std::size_t i = 0; i < size; ++i) {
if (UpdateValue(cur_state.samplers[i], samplers[i])) {
glBindSampler(static_cast<GLuint>(i), samplers[i]);
}
} }
} }

View file

@ -96,8 +96,9 @@ public:
GLenum operation = GL_COPY; GLenum operation = GL_COPY;
} logic_op; } logic_op;
std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> textures = {}; static constexpr std::size_t NumSamplers = 32 * 5;
std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumTextureSamplers> samplers = {}; std::array<GLuint, NumSamplers> textures = {};
std::array<GLuint, NumSamplers> samplers = {};
std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumImages> images = {}; std::array<GLuint, Tegra::Engines::Maxwell3D::Regs::NumImages> images = {};
struct { struct {

View file

@ -3,7 +3,10 @@
// Refer to the license.txt file included. // Refer to the license.txt file included.
#include <string> #include <string>
#include <vector>
#include <fmt/format.h> #include <fmt/format.h>
#include <glad/glad.h> #include <glad/glad.h>
#include "common/assert.h" #include "common/assert.h"
@ -48,34 +51,19 @@ BindBuffersRangePushBuffer::BindBuffersRangePushBuffer(GLenum target) : target{t
BindBuffersRangePushBuffer::~BindBuffersRangePushBuffer() = default; BindBuffersRangePushBuffer::~BindBuffersRangePushBuffer() = default;
void BindBuffersRangePushBuffer::Setup(GLuint first_) { void BindBuffersRangePushBuffer::Setup() {
first = first_; entries.clear();
buffer_pointers.clear();
offsets.clear();
sizes.clear();
} }
void BindBuffersRangePushBuffer::Push(const GLuint* buffer, GLintptr offset, GLsizeiptr size) { void BindBuffersRangePushBuffer::Push(GLuint binding, const GLuint* buffer, GLintptr offset,
buffer_pointers.push_back(buffer); GLsizeiptr size) {
offsets.push_back(offset); entries.push_back(Entry{binding, buffer, offset, size});
sizes.push_back(size);
} }
void BindBuffersRangePushBuffer::Bind() { void BindBuffersRangePushBuffer::Bind() {
// Ensure sizes are valid. for (const Entry& entry : entries) {
const std::size_t count{buffer_pointers.size()}; glBindBufferRange(target, entry.binding, *entry.buffer, entry.offset, entry.size);
DEBUG_ASSERT(count == offsets.size() && count == sizes.size());
if (count == 0) {
return;
} }
// Dereference buffers.
buffers.resize(count);
std::transform(buffer_pointers.begin(), buffer_pointers.end(), buffers.begin(),
[](const GLuint* pointer) { return *pointer; });
glBindBuffersRange(target, first, static_cast<GLsizei>(count), buffers.data(), offsets.data(),
sizes.data());
} }
void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info) { void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info) {

View file

@ -43,20 +43,22 @@ public:
explicit BindBuffersRangePushBuffer(GLenum target); explicit BindBuffersRangePushBuffer(GLenum target);
~BindBuffersRangePushBuffer(); ~BindBuffersRangePushBuffer();
void Setup(GLuint first_); void Setup();
void Push(const GLuint* buffer, GLintptr offset, GLsizeiptr size); void Push(GLuint binding, const GLuint* buffer, GLintptr offset, GLsizeiptr size);
void Bind(); void Bind();
private: private:
GLenum target{}; struct Entry {
GLuint first{}; GLuint binding;
std::vector<const GLuint*> buffer_pointers; const GLuint* buffer;
GLintptr offset;
GLsizeiptr size;
};
std::vector<GLuint> buffers; GLenum target;
std::vector<GLintptr> offsets; std::vector<Entry> entries;
std::vector<GLsizeiptr> sizes;
}; };
void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info = {}); void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info = {});