From 2a7653142dd8adeacafd8c90e64d52d6959f0aa7 Mon Sep 17 00:00:00 2001 From: Subv Date: Sat, 9 Jun 2018 18:02:05 -0500 Subject: [PATCH] Rasterizer: Use UBOs instead of SSBOs for uploading const buffers. This should help a bit with GPU performance once we're GPU-bound. --- .../renderer_opengl/gl_rasterizer.cpp | 28 +++++++++++-------- .../renderer_opengl/gl_rasterizer.h | 5 ++++ .../renderer_opengl/gl_shader_decompiler.cpp | 22 +++++++++++---- src/video_core/renderer_opengl/gl_state.cpp | 2 +- 4 files changed, 39 insertions(+), 18 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 6f05f24a0..e04966849 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -197,8 +197,8 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) { ASSERT_MSG(!gpu.regs.shader_config[0].enable, "VertexA is unsupported!"); // Next available bindpoints to use when uploading the const buffers and textures to the GLSL - // shaders. - u32 current_constbuffer_bindpoint = 0; + // shaders. The constbuffer bindpoint starts after the shader stage configuration bind points. + u32 current_constbuffer_bindpoint = uniform_buffers.size(); u32 current_texture_bindpoint = 0; for (unsigned index = 1; index < Maxwell::MaxShaderProgram; ++index) { @@ -608,27 +608,33 @@ u32 RasterizerOpenGL::SetupConstBuffers(Maxwell::ShaderStage stage, GLuint progr boost::optional addr = gpu.memory_manager->GpuToCpuAddress(buffer.address); - std::vector data; + size_t size = 0; + if (used_buffer.IsIndirect()) { // Buffer is accessed indirectly, so upload the entire thing - data.resize(buffer.size * sizeof(float)); + size = buffer.size * sizeof(float); } else { // Buffer is accessed directly, upload just what we use - data.resize(used_buffer.GetSize() * sizeof(float)); + size = used_buffer.GetSize() * sizeof(float); } + // Align the actual size so it ends up being a multiple of vec4 to meet the OpenGL std140 + // UBO alignment requirements. + size = Common::AlignUp(size, sizeof(GLvec4)); + ASSERT_MSG(size <= MaxConstbufferSize, "Constbuffer too big"); + + std::vector data(size); Memory::ReadBlock(*addr, data.data(), data.size()); - glBindBuffer(GL_SHADER_STORAGE_BUFFER, buffer_draw_state.ssbo); - glBufferData(GL_SHADER_STORAGE_BUFFER, data.size(), data.data(), GL_DYNAMIC_DRAW); - glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0); + glBindBuffer(GL_UNIFORM_BUFFER, buffer_draw_state.ssbo); + glBufferData(GL_UNIFORM_BUFFER, data.size(), data.data(), GL_DYNAMIC_DRAW); + glBindBuffer(GL_UNIFORM_BUFFER, 0); // Now configure the bindpoint of the buffer inside the shader std::string buffer_name = used_buffer.GetName(); - GLuint index = - glGetProgramResourceIndex(program, GL_SHADER_STORAGE_BLOCK, buffer_name.c_str()); + GLuint index = glGetProgramResourceIndex(program, GL_UNIFORM_BLOCK, buffer_name.c_str()); if (index != -1) - glShaderStorageBlockBinding(program, index, buffer_draw_state.bindpoint); + glUniformBlockBinding(program, index, buffer_draw_state.bindpoint); } state.Apply(); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index b7c8cf843..2ab066681 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -54,6 +54,11 @@ public: OGLShader shader; }; + /// Maximum supported size that a constbuffer can have in bytes. + static constexpr size_t MaxConstbufferSize = 0x1000; + static_assert(MaxConstbufferSize % sizeof(GLvec4) == 0, + "The maximum size of a constbuffer must be a multiple of the size of GLvec4"); + private: class SamplerInfo { public: diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 37fbb94da..87ae47ac9 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -9,6 +9,7 @@ #include "common/assert.h" #include "common/common_types.h" #include "video_core/engines/shader_bytecode.h" +#include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_shader_decompiler.h" namespace GLShader { @@ -366,7 +367,8 @@ public: /// Generates code representing a uniform (C buffer) register, interpreted as the input type. std::string GetUniform(u64 index, u64 offset, GLSLRegister::Type type) { declr_const_buffers[index].MarkAsUsed(index, offset, stage); - std::string value = 'c' + std::to_string(index) + '[' + std::to_string(offset) + ']'; + std::string value = 'c' + std::to_string(index) + '[' + std::to_string(offset / 4) + "][" + + std::to_string(offset % 4) + ']'; if (type == GLSLRegister::Type::Float) { return value; @@ -380,8 +382,12 @@ public: std::string GetUniformIndirect(u64 index, s64 offset, const Register& index_reg, GLSLRegister::Type type) { declr_const_buffers[index].MarkAsUsedIndirect(index, stage); - std::string value = 'c' + std::to_string(index) + "[(floatBitsToInt(" + - GetRegister(index_reg, 0) + ") + " + std::to_string(offset) + ") / 4]"; + + std::string final_offset = "((floatBitsToInt(" + GetRegister(index_reg, 0) + ") + " + + std::to_string(offset) + ") / 4)"; + + std::string value = + 'c' + std::to_string(index) + '[' + final_offset + " / 4][" + final_offset + " % 4]"; if (type == GLSLRegister::Type::Float) { return value; @@ -423,9 +429,10 @@ public: unsigned const_buffer_layout = 0; for (const auto& entry : GetConstBuffersDeclarations()) { - declarations.AddLine("layout(std430) buffer " + entry.GetName()); + declarations.AddLine("layout(std140) uniform " + entry.GetName()); declarations.AddLine('{'); - declarations.AddLine(" float c" + std::to_string(entry.GetIndex()) + "[];"); + declarations.AddLine(" vec4 c" + std::to_string(entry.GetIndex()) + + "[MAX_CONSTBUFFER_ELEMENTS];"); declarations.AddLine("};"); declarations.AddNewLine(); ++const_buffer_layout; @@ -1611,7 +1618,10 @@ private: }; // namespace Decompiler std::string GetCommonDeclarations() { - return "bool exec_shader();"; + std::string declarations = "bool exec_shader();\n"; + declarations += "#define MAX_CONSTBUFFER_ELEMENTS " + + std::to_string(RasterizerOpenGL::MaxConstbufferSize / (sizeof(GLvec4))); + return declarations; } boost::optional DecompileProgram(const ProgramCode& program_code, u32 main_offset, diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp index 44f0c8a01..443ce3f2b 100644 --- a/src/video_core/renderer_opengl/gl_state.cpp +++ b/src/video_core/renderer_opengl/gl_state.cpp @@ -223,7 +223,7 @@ void OpenGLState::Apply() const { if (current.enabled != new_state.enabled || current.bindpoint != new_state.bindpoint || current.ssbo != new_state.ssbo) { if (new_state.enabled) { - glBindBufferBase(GL_SHADER_STORAGE_BUFFER, new_state.bindpoint, new_state.ssbo); + glBindBufferBase(GL_UNIFORM_BUFFER, new_state.bindpoint, new_state.ssbo); } } }