From 180417c51438e2c97b800f4b19e621dbc8288493 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 18 Nov 2019 21:38:15 -0300 Subject: [PATCH] gl_shader_cache: Remove dynamic BaseBinding specialization --- src/video_core/engines/maxwell_3d.h | 1 - src/video_core/engines/shader_type.h | 1 + src/video_core/renderer_opengl/gl_device.cpp | 42 +++++++++ src/video_core/renderer_opengl/gl_device.h | 21 ++++- .../renderer_opengl/gl_rasterizer.cpp | 94 +++++++++---------- .../renderer_opengl/gl_rasterizer.h | 9 +- .../renderer_opengl/gl_shader_cache.cpp | 44 ++------- .../renderer_opengl/gl_shader_cache.h | 2 +- .../renderer_opengl/gl_shader_decompiler.cpp | 29 +++--- .../renderer_opengl/gl_shader_disk_cache.cpp | 5 +- .../renderer_opengl/gl_shader_disk_cache.h | 44 ++------- .../renderer_opengl/gl_shader_gen.cpp | 31 +++--- src/video_core/renderer_opengl/gl_state.cpp | 14 ++- src/video_core/renderer_opengl/gl_state.h | 5 +- src/video_core/renderer_opengl/utils.cpp | 32 ++----- src/video_core/renderer_opengl/utils.h | 18 ++-- 16 files changed, 200 insertions(+), 192 deletions(-) diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 72994f4d2..c8dd362ab 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -63,7 +63,6 @@ public: static constexpr std::size_t NumVertexArrays = 32; static constexpr std::size_t NumVertexAttributes = 32; static constexpr std::size_t NumVaryings = 31; - static constexpr std::size_t NumTextureSamplers = 32; static constexpr std::size_t NumImages = 8; // TODO(Rodrigo): Investigate this number static constexpr std::size_t NumClipDistances = 8; static constexpr std::size_t MaxShaderProgram = 6; diff --git a/src/video_core/engines/shader_type.h b/src/video_core/engines/shader_type.h index 239196ba9..49ce5cde5 100644 --- a/src/video_core/engines/shader_type.h +++ b/src/video_core/engines/shader_type.h @@ -16,5 +16,6 @@ enum class ShaderType : u32 { Fragment = 4, Compute = 5, }; +static constexpr std::size_t MaxShaderTypes = 6; } // namespace Tegra::Engines diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index b30d5be74..5cfa97fc2 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -17,6 +17,9 @@ namespace OpenGL { namespace { +// One uniform block is reserved for emulation purposes +constexpr u32 ReservedUniformBlocks = 1; + template T GetInteger(GLenum pname) { GLint temporary; @@ -48,6 +51,22 @@ bool HasExtension(const std::vector& images, std::string_view return std::find(images.begin(), images.end(), extension) != images.end(); } +constexpr Device::BaseBindings operator+(Device::BaseBindings lhs, Device::BaseBindings rhs) { + return Device::BaseBindings{lhs.uniform_buffer + rhs.uniform_buffer, + lhs.shader_storage_buffer + rhs.shader_storage_buffer, + lhs.sampler + rhs.sampler, lhs.image + rhs.image}; +} + +Device::BaseBindings BuildBaseBindings(GLenum uniform_blocks, GLenum shader_storage_blocks, + GLenum texture_image_units, GLenum image_uniforms) noexcept { + return Device::BaseBindings{ + GetInteger(uniform_blocks) - ReservedUniformBlocks, + GetInteger(shader_storage_blocks), + GetInteger(texture_image_units), + GetInteger(image_uniforms), + }; +} + } // Anonymous namespace Device::Device() { @@ -56,6 +75,29 @@ Device::Device() { const bool is_nvidia = vendor == "NVIDIA Corporation"; + // Reserve the first UBO for emulation bindings + base_bindings[0] = BaseBindings{ReservedUniformBlocks, 0, 0, 0}; + base_bindings[1] = base_bindings[0] + BuildBaseBindings(GL_MAX_VERTEX_UNIFORM_BLOCKS, + GL_MAX_VERTEX_SHADER_STORAGE_BLOCKS, + GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS, + GL_MAX_VERTEX_IMAGE_UNIFORMS); + base_bindings[2] = + base_bindings[1] + BuildBaseBindings(GL_MAX_TESS_CONTROL_UNIFORM_BLOCKS, + GL_MAX_TESS_CONTROL_SHADER_STORAGE_BLOCKS, + GL_MAX_TESS_CONTROL_TEXTURE_IMAGE_UNITS, + GL_MAX_TESS_CONTROL_IMAGE_UNIFORMS); + base_bindings[3] = + base_bindings[2] + BuildBaseBindings(GL_MAX_TESS_EVALUATION_UNIFORM_BLOCKS, + GL_MAX_TESS_EVALUATION_SHADER_STORAGE_BLOCKS, + GL_MAX_TESS_EVALUATION_TEXTURE_IMAGE_UNITS, + GL_MAX_TESS_EVALUATION_IMAGE_UNIFORMS); + base_bindings[4] = base_bindings[3] + BuildBaseBindings(GL_MAX_GEOMETRY_UNIFORM_BLOCKS, + GL_MAX_GEOMETRY_SHADER_STORAGE_BLOCKS, + GL_MAX_GEOMETRY_TEXTURE_IMAGE_UNITS, + GL_MAX_GEOMETRY_IMAGE_UNIFORMS); + // Compute doesn't need any of that + base_bindings[5] = BaseBindings{0, 0, 0, 0}; + uniform_buffer_alignment = GetInteger(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); shader_storage_alignment = GetInteger(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); max_vertex_attributes = GetInteger(GL_MAX_VERTEX_ATTRIBS); diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 6c86fe207..e7d3c48b0 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -6,14 +6,32 @@ #include #include "common/common_types.h" +#include "video_core/engines/shader_type.h" namespace OpenGL { -class Device { +static constexpr u32 EmulationUniformBlockBinding = 0; + +class Device final { public: + struct BaseBindings final { + u32 uniform_buffer{}; + u32 shader_storage_buffer{}; + u32 sampler{}; + u32 image{}; + }; + explicit Device(); explicit Device(std::nullptr_t); + const BaseBindings& GetBaseBindings(std::size_t stage_index) const noexcept { + return base_bindings[stage_index]; + } + + const BaseBindings& GetBaseBindings(Tegra::Engines::ShaderType shader_type) const noexcept { + return GetBaseBindings(static_cast(shader_type)); + } + std::size_t GetUniformBufferAlignment() const { return uniform_buffer_alignment; } @@ -67,6 +85,7 @@ private: static bool TestComponentIndexingBug(); static bool TestPreciseBug(); + std::array base_bindings; std::size_t uniform_buffer_alignment{}; std::size_t shader_storage_alignment{}; u32 max_vertex_attributes{}; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 8baa73ebf..5c5ad1f6c 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -258,7 +258,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { MICROPROFILE_SCOPE(OpenGL_Shader); auto& gpu = system.GPU().Maxwell3D(); - BaseBindings base_bindings; std::array clip_distances{}; for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { @@ -277,25 +276,17 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { continue; } - GLShader::MaxwellUniformData ubo{}; - ubo.SetFromRegs(gpu); - const auto [buffer, offset] = - buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment()); - - // Bind the emulation info buffer - bind_ubo_pushbuffer.Push(buffer, offset, static_cast(sizeof(ubo))); - Shader shader{shader_cache.GetStageProgram(program)}; // Stage indices are 0 - 5 const std::size_t stage = index == 0 ? 0 : index - 1; SetupDrawConstBuffers(stage, shader); SetupDrawGlobalMemory(stage, shader); - SetupDrawTextures(stage, shader, base_bindings); - SetupDrawImages(stage, shader, base_bindings); + SetupDrawTextures(stage, shader); + SetupDrawImages(stage, shader); - const ProgramVariant variant(base_bindings, primitive_mode); - const auto [program_handle, next_bindings] = shader->GetHandle(variant); + const ProgramVariant variant(primitive_mode); + const auto program_handle = shader->GetHandle(variant); switch (program) { case Maxwell::ShaderProgram::VertexA: @@ -326,8 +317,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { // VertexB was combined with VertexA, so we skip the VertexB iteration ++index; } - - base_bindings = next_bindings; } SyncClipEnabled(clip_distances); @@ -612,8 +601,16 @@ void RasterizerOpenGL::DrawPrelude() { index_buffer_offset = SetupIndexBuffer(); // Prepare packed bindings. - bind_ubo_pushbuffer.Setup(0); - bind_ssbo_pushbuffer.Setup(0); + bind_ubo_pushbuffer.Setup(); + bind_ssbo_pushbuffer.Setup(); + + // Setup emulation uniform buffer. + GLShader::MaxwellUniformData ubo; + ubo.SetFromRegs(gpu); + const auto [buffer, offset] = + buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment()); + bind_ubo_pushbuffer.Push(EmulationUniformBlockBinding, buffer, offset, + static_cast(sizeof(ubo))); // Setup shaders and their used resources. texture_cache.GuardSamplers(true); @@ -754,7 +751,7 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { const ProgramVariant variant(launch_desc.block_dim_x, launch_desc.block_dim_y, launch_desc.block_dim_z, launch_desc.shared_alloc, launch_desc.local_pos_alloc); - std::tie(state.draw.shader_program, std::ignore) = kernel->GetHandle(variant); + state.draw.shader_program = kernel->GetHandle(variant); state.draw.program_pipeline = 0; const std::size_t buffer_size = @@ -762,8 +759,8 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) { (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment()); buffer_cache.Map(buffer_size); - bind_ubo_pushbuffer.Setup(0); - bind_ssbo_pushbuffer.Setup(0); + bind_ubo_pushbuffer.Setup(); + bind_ssbo_pushbuffer.Setup(); SetupComputeConstBuffers(kernel); SetupComputeGlobalMemory(kernel); @@ -847,7 +844,7 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, ASSERT_MSG(params.height == config.height, "Framebuffer height is different"); if (params.pixel_format != pixel_format) { - LOG_WARNING(Render_OpenGL, "Framebuffer pixel_format is different"); + LOG_DEBUG(Render_OpenGL, "Framebuffer pixel_format is different"); } screen_info.display_texture = surface->GetTexture(); @@ -858,17 +855,21 @@ bool RasterizerOpenGL::AccelerateDisplay(const Tegra::FramebufferConfig& config, void RasterizerOpenGL::SetupDrawConstBuffers(std::size_t stage_index, const Shader& shader) { MICROPROFILE_SCOPE(OpenGL_UBO); + const u32 base_binding = device.GetBaseBindings(stage_index).uniform_buffer; const auto& stages = system.GPU().Maxwell3D().state.shader_stages; const auto& shader_stage = stages[stage_index]; + for (const auto& entry : shader->GetShaderEntries().const_buffers) { const auto& buffer = shader_stage.const_buffers[entry.GetIndex()]; - SetupConstBuffer(buffer, entry); + SetupConstBuffer(base_binding + entry.GetIndex(), buffer, entry); } } void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) { MICROPROFILE_SCOPE(OpenGL_UBO); const auto& launch_desc = system.GPU().KeplerCompute().launch_description; + + u32 binding = 0; for (const auto& entry : kernel->GetShaderEntries().const_buffers) { const auto& config = launch_desc.const_buffer_config[entry.GetIndex()]; const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value(); @@ -876,15 +877,16 @@ void RasterizerOpenGL::SetupComputeConstBuffers(const Shader& kernel) { buffer.address = config.Address(); buffer.size = config.size; buffer.enabled = mask[entry.GetIndex()]; - SetupConstBuffer(buffer, entry); + SetupConstBuffer(binding++, buffer, entry); } } -void RasterizerOpenGL::SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& buffer, +void RasterizerOpenGL::SetupConstBuffer(u32 binding, const Tegra::Engines::ConstBufferInfo& buffer, const GLShader::ConstBufferEntry& entry) { if (!buffer.enabled) { // Set values to zero to unbind buffers - bind_ubo_pushbuffer.Push(buffer_cache.GetEmptyBuffer(sizeof(float)), 0, sizeof(float)); + bind_ubo_pushbuffer.Push(binding, buffer_cache.GetEmptyBuffer(sizeof(float)), 0, + sizeof(float)); return; } @@ -895,18 +897,20 @@ void RasterizerOpenGL::SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& b const auto alignment = device.GetUniformBufferAlignment(); const auto [cbuf, offset] = buffer_cache.UploadMemory(buffer.address, size, alignment, false, device.HasFastBufferSubData()); - bind_ubo_pushbuffer.Push(cbuf, offset, size); + bind_ubo_pushbuffer.Push(binding, cbuf, offset, size); } void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader) { auto& gpu{system.GPU()}; auto& memory_manager{gpu.MemoryManager()}; const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage_index]}; + + u32 binding = device.GetBaseBindings(stage_index).shader_storage_buffer; for (const auto& entry : shader->GetShaderEntries().global_memory_entries) { const auto addr{cbufs.const_buffers[entry.GetCbufIndex()].address + entry.GetCbufOffset()}; const auto gpu_addr{memory_manager.Read(addr)}; const auto size{memory_manager.Read(addr + 8)}; - SetupGlobalMemory(entry, gpu_addr, size); + SetupGlobalMemory(binding++, entry, gpu_addr, size); } } @@ -914,38 +918,35 @@ void RasterizerOpenGL::SetupComputeGlobalMemory(const Shader& kernel) { auto& gpu{system.GPU()}; auto& memory_manager{gpu.MemoryManager()}; const auto cbufs{gpu.KeplerCompute().launch_description.const_buffer_config}; + + u32 binding = 0; for (const auto& entry : kernel->GetShaderEntries().global_memory_entries) { const auto addr{cbufs[entry.GetCbufIndex()].Address() + entry.GetCbufOffset()}; const auto gpu_addr{memory_manager.Read(addr)}; const auto size{memory_manager.Read(addr + 8)}; - SetupGlobalMemory(entry, gpu_addr, size); + SetupGlobalMemory(binding++, entry, gpu_addr, size); } } -void RasterizerOpenGL::SetupGlobalMemory(const GLShader::GlobalMemoryEntry& entry, +void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GLShader::GlobalMemoryEntry& entry, GPUVAddr gpu_addr, std::size_t size) { const auto alignment{device.GetShaderStorageBufferAlignment()}; const auto [ssbo, buffer_offset] = buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.IsWritten()); - bind_ssbo_pushbuffer.Push(ssbo, buffer_offset, static_cast(size)); + bind_ssbo_pushbuffer.Push(binding, ssbo, buffer_offset, static_cast(size)); } -void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, const Shader& shader, - BaseBindings base_bindings) { +void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, const Shader& shader) { MICROPROFILE_SCOPE(OpenGL_Texture); const auto& gpu = system.GPU(); const auto& maxwell3d = gpu.Maxwell3D(); const auto& entries = shader->GetShaderEntries().samplers; - ASSERT_MSG(base_bindings.sampler + entries.size() <= std::size(state.textures), - "Exceeded the number of active textures."); - - const auto num_entries = static_cast(entries.size()); - for (u32 bindpoint = 0; bindpoint < num_entries; ++bindpoint) { - const auto& entry = entries[bindpoint]; + u32 binding = device.GetBaseBindings(stage_index).sampler; + for (const auto& entry : entries) { const auto shader_type = static_cast(stage_index); const auto texture = GetTextureInfo(maxwell3d, entry, shader_type); - SetupTexture(base_bindings.sampler + bindpoint, texture, entry); + SetupTexture(binding++, texture, entry); } } @@ -954,14 +955,10 @@ void RasterizerOpenGL::SetupComputeTextures(const Shader& kernel) { const auto& compute = system.GPU().KeplerCompute(); const auto& entries = kernel->GetShaderEntries().samplers; - ASSERT_MSG(entries.size() <= std::size(state.textures), - "Exceeded the number of active textures."); - - const auto num_entries = static_cast(entries.size()); - for (u32 bindpoint = 0; bindpoint < num_entries; ++bindpoint) { - const auto& entry = entries[bindpoint]; + u32 binding = 0; + for (const auto& entry : entries) { const auto texture = GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute); - SetupTexture(bindpoint, texture, entry); + SetupTexture(binding++, texture, entry); } } @@ -986,8 +983,7 @@ void RasterizerOpenGL::SetupTexture(u32 binding, const Tegra::Texture::FullTextu texture.tic.w_source); } -void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& shader, - BaseBindings base_bindings) { +void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& shader) { const auto& maxwell3d = system.GPU().Maxwell3D(); const auto& entries = shader->GetShaderEntries().images; @@ -996,7 +992,7 @@ void RasterizerOpenGL::SetupDrawImages(std::size_t stage_index, const Shader& sh const auto& entry = entries[bindpoint]; const auto shader_type = static_cast(stage_index); const auto tic = GetTextureInfo(maxwell3d, entry, shader_type).tic; - SetupImage(base_bindings.image + bindpoint, tic, entry); + SetupImage(bindpoint, tic, entry); } } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 6a2ce1586..0e47d71df 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -89,7 +89,7 @@ private: void SetupComputeConstBuffers(const Shader& kernel); /// Configures a constant buffer. - void SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& buffer, + void SetupConstBuffer(u32 binding, const Tegra::Engines::ConstBufferInfo& buffer, const GLShader::ConstBufferEntry& entry); /// Configures the current global memory entries to use for the draw command. @@ -99,15 +99,14 @@ private: void SetupComputeGlobalMemory(const Shader& kernel); /// Configures a constant buffer. - void SetupGlobalMemory(const GLShader::GlobalMemoryEntry& entry, GPUVAddr gpu_addr, + void SetupGlobalMemory(u32 binding, const GLShader::GlobalMemoryEntry& entry, GPUVAddr gpu_addr, std::size_t size); /// Syncs all the state, shaders, render targets and textures setting before a draw call. void DrawPrelude(); /// Configures the current textures to use for the draw command. - void SetupDrawTextures(std::size_t stage_index, const Shader& shader, - BaseBindings base_bindings); + void SetupDrawTextures(std::size_t stage_index, const Shader& shader); /// Configures the textures used in a compute shader. void SetupComputeTextures(const Shader& kernel); @@ -117,7 +116,7 @@ private: const GLShader::SamplerEntry& entry); /// Configures images in a graphics shader. - void SetupDrawImages(std::size_t stage_index, const Shader& shader, BaseBindings base_bindings); + void SetupDrawImages(std::size_t stage_index, const Shader& shader); /// Configures images in a compute shader. void SetupComputeImages(const Shader& shader); diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index f474fb550..41ca005a1 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -266,28 +266,6 @@ CachedProgram BuildShader(const Device& device, u64 unique_identifier, ShaderTyp } source += '\n'; - auto base_bindings = variant.base_bindings; - if (!is_compute) { - source += fmt::format("#define EMULATION_UBO_BINDING {}\n", base_bindings.cbuf++); - } - - for (const auto& cbuf : entries.const_buffers) { - source += - fmt::format("#define CBUF_BINDING_{} {}\n", cbuf.GetIndex(), base_bindings.cbuf++); - } - for (const auto& gmem : entries.global_memory_entries) { - source += fmt::format("#define GMEM_BINDING_{}_{} {}\n", gmem.GetCbufIndex(), - gmem.GetCbufOffset(), base_bindings.gmem++); - } - for (const auto& sampler : entries.samplers) { - source += fmt::format("#define SAMPLER_BINDING_{} {}\n", sampler.GetIndex(), - base_bindings.sampler++); - } - for (const auto& image : entries.images) { - source += - fmt::format("#define IMAGE_BINDING_{} {}\n", image.GetIndex(), base_bindings.image++); - } - if (shader_type == ShaderType::Geometry) { const auto [glsl_topology, debug_name, max_vertices] = GetPrimitiveDescription(variant.primitive_mode); @@ -403,27 +381,21 @@ Shader CachedShader::CreateFromCache(const ShaderParameters& params, unspecialized.code_b)); } -std::tuple CachedShader::GetHandle(const ProgramVariant& variant) { +GLuint CachedShader::GetHandle(const ProgramVariant& variant) { EnsureValidLockerVariant(); const auto [entry, is_cache_miss] = curr_locker_variant->programs.try_emplace(variant); auto& program = entry->second; - if (is_cache_miss) { - program = BuildShader(device, unique_identifier, shader_type, code, code_b, - *curr_locker_variant->locker, variant); - disk_cache.SaveUsage(GetUsage(variant, *curr_locker_variant->locker)); - - LabelGLObject(GL_PROGRAM, program->handle, cpu_addr); + if (!is_cache_miss) { + return program->handle; } - auto base_bindings = variant.base_bindings; - base_bindings.cbuf += static_cast(entries.const_buffers.size()); - base_bindings.cbuf += STAGE_RESERVED_UBOS; - base_bindings.gmem += static_cast(entries.global_memory_entries.size()); - base_bindings.sampler += static_cast(entries.samplers.size()); - base_bindings.image += static_cast(entries.images.size()); + program = BuildShader(device, unique_identifier, shader_type, code, code_b, + *curr_locker_variant->locker, variant); + disk_cache.SaveUsage(GetUsage(variant, *curr_locker_variant->locker)); - return {program->handle, base_bindings}; + LabelGLObject(GL_PROGRAM, program->handle, cpu_addr); + return program->handle; } bool CachedShader::EnsureValidLockerVariant() { diff --git a/src/video_core/renderer_opengl/gl_shader_cache.h b/src/video_core/renderer_opengl/gl_shader_cache.h index d23c8d6d4..7b1470db3 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_cache.h @@ -87,7 +87,7 @@ public: } /// Gets the GL program handle for the shader - std::tuple GetHandle(const ProgramVariant& variant); + GLuint GetHandle(const ProgramVariant& variant); private: struct LockerVariant { diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index caec565d1..5ad285c25 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -43,6 +43,9 @@ using namespace VideoCommon::Shader; using Maxwell = Tegra::Engines::Maxwell3D::Regs; using Operation = const OperationNode&; +class ASTDecompiler; +class ExprDecompiler; + enum class Type { Void, Bool, Bool2, Float, Int, Uint, HalfFloat }; struct TextureAoffi {}; @@ -337,9 +340,6 @@ std::string FlowStackTopName(MetaStackClass stack) { return stage == ShaderType::Vertex; } -class ASTDecompiler; -class ExprDecompiler; - class GLSLDecompiler final { public: explicit GLSLDecompiler(const Device& device, const ShaderIR& ir, ShaderType stage, @@ -621,7 +621,8 @@ private: void DeclareConstantBuffers() { for (const auto& entry : ir.GetConstantBuffers()) { const auto [index, size] = entry; - code.AddLine("layout (std140, binding = CBUF_BINDING_{}) uniform {} {{", index, + const u32 binding = device.GetBaseBindings(stage).uniform_buffer + index; + code.AddLine("layout (std140, binding = {}) uniform {} {{", binding, GetConstBufferBlock(index)); code.AddLine(" uvec4 {}[{}];", GetConstBuffer(index), MAX_CONSTBUFFER_ELEMENTS); code.AddLine("}};"); @@ -630,6 +631,8 @@ private: } void DeclareGlobalMemory() { + u32 binding = device.GetBaseBindings(stage).shader_storage_buffer; + for (const auto& gmem : ir.GetGlobalMemory()) { const auto& [base, usage] = gmem; @@ -642,8 +645,8 @@ private: qualifier += " writeonly"; } - code.AddLine("layout (std430, binding = GMEM_BINDING_{}_{}) {} buffer {} {{", - base.cbuf_index, base.cbuf_offset, qualifier, GetGlobalMemoryBlock(base)); + code.AddLine("layout (std430, binding = {}) {} buffer {} {{", binding++, qualifier, + GetGlobalMemoryBlock(base)); code.AddLine(" uint {}[];", GetGlobalMemory(base)); code.AddLine("}};"); code.AddNewLine(); @@ -653,9 +656,11 @@ private: void DeclareSamplers() { const auto& samplers = ir.GetSamplers(); for (const auto& sampler : samplers) { - const std::string name{GetSampler(sampler)}; - const std::string description{"layout (binding = SAMPLER_BINDING_" + - std::to_string(sampler.GetIndex()) + ") uniform"}; + const std::string name = GetSampler(sampler); + + const u32 binding = device.GetBaseBindings(stage).sampler + sampler.GetIndex(); + const std::string description = fmt::format("layout (binding = {}) uniform", binding); + std::string sampler_type = [&]() { if (sampler.IsBuffer()) { return "samplerBuffer"; @@ -732,10 +737,12 @@ private: qualifier += " writeonly"; } + const u32 binding = device.GetBaseBindings(stage).image + image.GetIndex(); + const char* format = image.IsAtomic() ? "r32ui, " : ""; const char* type_declaration = GetImageTypeDeclaration(image.GetType()); - code.AddLine("layout ({}binding = IMAGE_BINDING_{}) {} uniform uimage{} {};", format, - image.GetIndex(), qualifier, type_declaration, GetImage(image)); + code.AddLine("layout ({}binding = {}) {} uniform uimage{} {};", format, binding, + qualifier, type_declaration, GetImage(image)); } if (!images.empty()) { code.AddNewLine(); diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index 09f62c8c4..cf874a09a 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp @@ -53,11 +53,10 @@ struct BindlessSamplerKey { Tegra::Engines::SamplerDescriptor sampler{}; }; -constexpr u32 NativeVersion = 10; +constexpr u32 NativeVersion = 11; // Making sure sizes doesn't change by accident -static_assert(sizeof(BaseBindings) == 16); -static_assert(sizeof(ProgramVariant) == 36); +static_assert(sizeof(ProgramVariant) == 20); ShaderCacheVersionHash GetShaderCacheVersionHash() { ShaderCacheVersionHash hash{}; diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h index 917dbccdd..69a2fbdda 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h @@ -38,31 +38,13 @@ struct ShaderDiskCacheDump; using ProgramCode = std::vector; using ShaderDumpsMap = std::unordered_map; -/// Allocated bindings used by an OpenGL shader program -struct BaseBindings { - u32 cbuf{}; - u32 gmem{}; - u32 sampler{}; - u32 image{}; - - bool operator==(const BaseBindings& rhs) const noexcept { - return std::tie(cbuf, gmem, sampler, image) == - std::tie(rhs.cbuf, rhs.gmem, rhs.sampler, rhs.image); - } - - bool operator!=(const BaseBindings& rhs) const noexcept { - return !operator==(rhs); - } -}; -static_assert(std::is_trivially_copyable_v); - /// Describes the different variants a program can be compiled with. struct ProgramVariant final { ProgramVariant() = default; /// Graphics constructor. - explicit constexpr ProgramVariant(BaseBindings base_bindings, GLenum primitive_mode) noexcept - : base_bindings{base_bindings}, primitive_mode{primitive_mode} {} + explicit constexpr ProgramVariant(GLenum primitive_mode) noexcept + : primitive_mode{primitive_mode} {} /// Compute constructor. explicit constexpr ProgramVariant(u32 block_x, u32 block_y, u32 block_z, u32 shared_memory_size, @@ -71,7 +53,6 @@ struct ProgramVariant final { shared_memory_size{shared_memory_size}, local_memory_size{local_memory_size} {} // Graphics specific parameters. - BaseBindings base_bindings{}; GLenum primitive_mode{}; // Compute specific parameters. @@ -82,10 +63,10 @@ struct ProgramVariant final { u32 local_memory_size{}; bool operator==(const ProgramVariant& rhs) const noexcept { - return std::tie(base_bindings, primitive_mode, block_x, block_y, block_z, - shared_memory_size, local_memory_size) == - std::tie(rhs.base_bindings, rhs.primitive_mode, rhs.block_x, rhs.block_y, - rhs.block_z, rhs.shared_memory_size, rhs.local_memory_size); + return std::tie(primitive_mode, block_x, block_y, block_z, shared_memory_size, + local_memory_size) == std::tie(rhs.primitive_mode, rhs.block_x, rhs.block_y, + rhs.block_z, rhs.shared_memory_size, + rhs.local_memory_size); } bool operator!=(const ProgramVariant& rhs) const noexcept { @@ -117,21 +98,10 @@ struct ShaderDiskCacheUsage { namespace std { -template <> -struct hash { - std::size_t operator()(const OpenGL::BaseBindings& bindings) const noexcept { - return static_cast(bindings.cbuf) ^ - (static_cast(bindings.gmem) << 8) ^ - (static_cast(bindings.sampler) << 16) ^ - (static_cast(bindings.image) << 24); - } -}; - template <> struct hash { std::size_t operator()(const OpenGL::ProgramVariant& variant) const noexcept { - return std::hash{}(variant.base_bindings) ^ - (static_cast(variant.primitive_mode) << 6) ^ + return (static_cast(variant.primitive_mode) << 6) ^ static_cast(variant.block_x) ^ (static_cast(variant.block_y) << 32) ^ (static_cast(variant.block_z) << 48) ^ diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 2f601d550..296817efc 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -2,9 +2,13 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include + #include + #include "video_core/engines/maxwell_3d.h" #include "video_core/engines/shader_type.h" +#include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_shader_decompiler.h" #include "video_core/renderer_opengl/gl_shader_gen.h" #include "video_core/shader/shader_ir.h" @@ -20,12 +24,13 @@ using VideoCommon::Shader::ShaderIR; std::string GenerateVertexShader(const Device& device, const ShaderIR& ir, const ShaderIR* ir_b) { std::string out = GetCommonDeclarations(); - out += R"( -layout (std140, binding = EMULATION_UBO_BINDING) uniform vs_config { + out += fmt::format(R"( +layout (std140, binding = {}) uniform vs_config {{ float y_direction; -}; +}}; -)"; +)", + EmulationUniformBlockBinding); out += Decompile(device, ir, ShaderType::Vertex, "vertex"); if (ir_b) { out += Decompile(device, *ir_b, ShaderType::Vertex, "vertex_b"); @@ -44,12 +49,13 @@ void main() { std::string GenerateGeometryShader(const Device& device, const ShaderIR& ir) { std::string out = GetCommonDeclarations(); - out += R"( -layout (std140, binding = EMULATION_UBO_BINDING) uniform gs_config { + out += fmt::format(R"( +layout (std140, binding = {}) uniform gs_config {{ float y_direction; -}; +}}; -)"; +)", + EmulationUniformBlockBinding); out += Decompile(device, ir, ShaderType::Geometry, "geometry"); out += R"( @@ -62,7 +68,7 @@ void main() { std::string GenerateFragmentShader(const Device& device, const ShaderIR& ir) { std::string out = GetCommonDeclarations(); - out += R"( + out += fmt::format(R"( layout (location = 0) out vec4 FragColor0; layout (location = 1) out vec4 FragColor1; layout (location = 2) out vec4 FragColor2; @@ -72,11 +78,12 @@ layout (location = 5) out vec4 FragColor5; layout (location = 6) out vec4 FragColor6; layout (location = 7) out vec4 FragColor7; -layout (std140, binding = EMULATION_UBO_BINDING) uniform fs_config { +layout (std140, binding = {}) uniform fs_config {{ float y_direction; -}; +}}; -)"; +)", + EmulationUniformBlockBinding); out += Decompile(device, ir, ShaderType::Fragment, "fragment"); out += R"( diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index ccbe5912e..4cf3d0a8a 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp @@ -417,14 +417,20 @@ void OpenGLState::ApplyClipControl() { } void OpenGLState::ApplyTextures() { - if (const auto update = UpdateArray(cur_state.textures, textures)) { - glBindTextures(update->first, update->second, textures.data() + update->first); + const std::size_t size = std::size(textures); + for (std::size_t i = 0; i < size; ++i) { + if (UpdateValue(cur_state.textures[i], textures[i])) { + glBindTextureUnit(static_cast(i), textures[i]); + } } } void OpenGLState::ApplySamplers() { - if (const auto update = UpdateArray(cur_state.samplers, samplers)) { - glBindSamplers(update->first, update->second, samplers.data() + update->first); + const std::size_t size = std::size(samplers); + for (std::size_t i = 0; i < size; ++i) { + if (UpdateValue(cur_state.samplers[i], samplers[i])) { + glBindSampler(static_cast(i), samplers[i]); + } } } diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h index eaff22bda..fd53eb81a 100644 --- a/src/video_core/renderer_opengl/gl_state.h +++ b/src/video_core/renderer_opengl/gl_state.h @@ -96,8 +96,9 @@ public: GLenum operation = GL_COPY; } logic_op; - std::array textures = {}; - std::array samplers = {}; + static constexpr std::size_t NumSamplers = 32 * 5; + std::array textures = {}; + std::array samplers = {}; std::array images = {}; struct { diff --git a/src/video_core/renderer_opengl/utils.cpp b/src/video_core/renderer_opengl/utils.cpp index c504a2c1a..9770dda1c 100644 --- a/src/video_core/renderer_opengl/utils.cpp +++ b/src/video_core/renderer_opengl/utils.cpp @@ -3,7 +3,10 @@ // Refer to the license.txt file included. #include +#include + #include + #include #include "common/assert.h" @@ -48,34 +51,19 @@ BindBuffersRangePushBuffer::BindBuffersRangePushBuffer(GLenum target) : target{t BindBuffersRangePushBuffer::~BindBuffersRangePushBuffer() = default; -void BindBuffersRangePushBuffer::Setup(GLuint first_) { - first = first_; - buffer_pointers.clear(); - offsets.clear(); - sizes.clear(); +void BindBuffersRangePushBuffer::Setup() { + entries.clear(); } -void BindBuffersRangePushBuffer::Push(const GLuint* buffer, GLintptr offset, GLsizeiptr size) { - buffer_pointers.push_back(buffer); - offsets.push_back(offset); - sizes.push_back(size); +void BindBuffersRangePushBuffer::Push(GLuint binding, const GLuint* buffer, GLintptr offset, + GLsizeiptr size) { + entries.push_back(Entry{binding, buffer, offset, size}); } void BindBuffersRangePushBuffer::Bind() { - // Ensure sizes are valid. - const std::size_t count{buffer_pointers.size()}; - DEBUG_ASSERT(count == offsets.size() && count == sizes.size()); - if (count == 0) { - return; + for (const Entry& entry : entries) { + glBindBufferRange(target, entry.binding, *entry.buffer, entry.offset, entry.size); } - - // Dereference buffers. - buffers.resize(count); - std::transform(buffer_pointers.begin(), buffer_pointers.end(), buffers.begin(), - [](const GLuint* pointer) { return *pointer; }); - - glBindBuffersRange(target, first, static_cast(count), buffers.data(), offsets.data(), - sizes.data()); } void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info) { diff --git a/src/video_core/renderer_opengl/utils.h b/src/video_core/renderer_opengl/utils.h index 6c2b45546..d56153fe7 100644 --- a/src/video_core/renderer_opengl/utils.h +++ b/src/video_core/renderer_opengl/utils.h @@ -43,20 +43,22 @@ public: explicit BindBuffersRangePushBuffer(GLenum target); ~BindBuffersRangePushBuffer(); - void Setup(GLuint first_); + void Setup(); - void Push(const GLuint* buffer, GLintptr offset, GLsizeiptr size); + void Push(GLuint binding, const GLuint* buffer, GLintptr offset, GLsizeiptr size); void Bind(); private: - GLenum target{}; - GLuint first{}; - std::vector buffer_pointers; + struct Entry { + GLuint binding; + const GLuint* buffer; + GLintptr offset; + GLsizeiptr size; + }; - std::vector buffers; - std::vector offsets; - std::vector sizes; + GLenum target; + std::vector entries; }; void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info = {});