From 2e39c20da5701b8356bd1024f4f783c1db39a3fd Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 27 May 2019 19:37:46 -0300 Subject: [PATCH 01/17] gl_rasterizer: Move index buffer uploading to its own method --- .../renderer_opengl/gl_rasterizer.cpp | 21 +++++++++++++------ .../renderer_opengl/gl_rasterizer.h | 4 +++- 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index f45a3c5ef..e89f96aec 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -215,7 +215,15 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) { gpu.dirty_flags.vertex_array.reset(); } -DrawParameters RasterizerOpenGL::SetupDraw() { +GLintptr RasterizerOpenGL::SetupIndexBuffer() { + if (accelerate_draw != AccelDraw::Indexed) { + return 0; + } + const auto& regs = system.GPU().Maxwell3D().regs; + return buffer_cache.UploadMemory(regs.index_array.IndexStart(), CalculateIndexBufferSize()); +} + +DrawParameters RasterizerOpenGL::SetupDraw(GLintptr index_buffer_offset) { const auto& gpu = system.GPU().Maxwell3D(); const auto& regs = gpu.regs; const bool is_indexed = accelerate_draw == AccelDraw::Indexed; @@ -230,8 +238,7 @@ DrawParameters RasterizerOpenGL::SetupDraw() { MICROPROFILE_SCOPE(OpenGL_Index); params.index_format = MaxwellToGL::IndexFormat(regs.index_array.format); params.count = regs.index_array.count; - params.index_buffer_offset = - buffer_cache.UploadMemory(regs.index_array.IndexStart(), CalculateIndexBufferSize()); + params.index_buffer_offset = index_buffer_offset; params.base_vertex = static_cast(regs.vb_element_base); } else { params.count = regs.vertex_buffer.count; @@ -643,10 +650,12 @@ void RasterizerOpenGL::DrawArrays() { gpu.dirty_flags.vertex_array.set(); } - const GLuint vao = SetupVertexFormat(); - SetupVertexBuffer(vao); + const GLuint vertex_array = SetupVertexFormat(); - DrawParameters params = SetupDraw(); + SetupVertexBuffer(vertex_array); + const GLintptr index_buffer_offset = SetupIndexBuffer(); + + DrawParameters params = SetupDraw(index_buffer_offset); texture_cache.GuardSamplers(true); SetupShaders(params.primitive_mode); texture_cache.GuardSamplers(false); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index bf67e3a70..8f1757e25 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -220,7 +220,9 @@ private: void SetupVertexBuffer(GLuint vao); - DrawParameters SetupDraw(); + GLintptr SetupIndexBuffer(); + + DrawParameters SetupDraw(GLintptr index_buffer_offset); void SetupShaders(GLenum primitive_mode); From 2b9d4088ecf153f06f668ed2aff8b730366a5391 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 27 May 2019 19:38:32 -0300 Subject: [PATCH 02/17] gl_rasterizer: Make DrawParameters rasterizer instance const --- src/video_core/renderer_opengl/gl_rasterizer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index e89f96aec..6cab48329 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -655,7 +655,7 @@ void RasterizerOpenGL::DrawArrays() { SetupVertexBuffer(vertex_array); const GLintptr index_buffer_offset = SetupIndexBuffer(); - DrawParameters params = SetupDraw(index_buffer_offset); + const DrawParameters params = SetupDraw(index_buffer_offset); texture_cache.GuardSamplers(true); SetupShaders(params.primitive_mode); texture_cache.GuardSamplers(false); From a6d2f52fc32295c73f41b86ab1b5e85d06afd5b0 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 27 May 2019 19:41:19 -0300 Subject: [PATCH 03/17] gl_rasterizer: Add some commentaries --- src/video_core/renderer_opengl/gl_rasterizer.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 6cab48329..bfc3c4df9 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -650,12 +650,17 @@ void RasterizerOpenGL::DrawArrays() { gpu.dirty_flags.vertex_array.set(); } + // Prepare vertex array format. const GLuint vertex_array = SetupVertexFormat(); + // Upload vertex and index data. SetupVertexBuffer(vertex_array); const GLintptr index_buffer_offset = SetupIndexBuffer(); + // Setup draw parameters. It will automatically choose what glDraw* method to use. const DrawParameters params = SetupDraw(index_buffer_offset); + + // Setup shaders and their used resources. texture_cache.GuardSamplers(true); SetupShaders(params.primitive_mode); texture_cache.GuardSamplers(false); From b54fb8fc4c6801ba7fa7990199071d94f463373c Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 27 May 2019 19:55:44 -0300 Subject: [PATCH 04/17] gl_buffer_cache: Return used buffer from Upload function --- .../renderer_opengl/gl_buffer_cache.cpp | 23 +++++++------- .../renderer_opengl/gl_buffer_cache.h | 16 +++++----- .../renderer_opengl/gl_rasterizer.cpp | 30 +++++++++---------- .../renderer_opengl/gl_rasterizer.h | 2 +- 4 files changed, 35 insertions(+), 36 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index 2b9bd142e..ea8b4c99f 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -4,6 +4,7 @@ #include #include +#include #include "common/alignment.h" #include "core/core.h" @@ -21,9 +22,10 @@ CachedBufferEntry::CachedBufferEntry(VAddr cpu_addr, std::size_t size, GLintptr OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size) : RasterizerCache{rasterizer}, stream_buffer(size, true) {} -GLintptr OGLBufferCache::UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment, - bool cache) { +std::pair OGLBufferCache::UploadMemory(GPUVAddr gpu_addr, std::size_t size, + std::size_t alignment, bool cache) { std::lock_guard lock{mutex}; + auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager(); // Cache management is a big overhead, so only cache entries with a given size. @@ -35,7 +37,7 @@ GLintptr OGLBufferCache::UploadMemory(GPUVAddr gpu_addr, std::size_t size, std:: auto entry = TryGet(host_ptr); if (entry) { if (entry->GetSize() >= size && entry->GetAlignment() == alignment) { - return entry->GetOffset(); + return {stream_buffer.GetHandle(), entry->GetOffset()}; } Unregister(entry); } @@ -45,7 +47,7 @@ GLintptr OGLBufferCache::UploadMemory(GPUVAddr gpu_addr, std::size_t size, std:: const GLintptr uploaded_offset = buffer_offset; if (!host_ptr) { - return uploaded_offset; + return {stream_buffer.GetHandle(), uploaded_offset}; } std::memcpy(buffer_ptr, host_ptr, size); @@ -58,11 +60,12 @@ GLintptr OGLBufferCache::UploadMemory(GPUVAddr gpu_addr, std::size_t size, std:: Register(entry); } - return uploaded_offset; + return {stream_buffer.GetHandle(), uploaded_offset}; } -GLintptr OGLBufferCache::UploadHostMemory(const void* raw_pointer, std::size_t size, - std::size_t alignment) { +std::pair OGLBufferCache::UploadHostMemory(const void* raw_pointer, + std::size_t size, + std::size_t alignment) { std::lock_guard lock{mutex}; AlignBuffer(alignment); std::memcpy(buffer_ptr, raw_pointer, size); @@ -70,7 +73,7 @@ GLintptr OGLBufferCache::UploadHostMemory(const void* raw_pointer, std::size_t s buffer_ptr += size; buffer_offset += size; - return uploaded_offset; + return {stream_buffer.GetHandle(), uploaded_offset}; } bool OGLBufferCache::Map(std::size_t max_size) { @@ -89,10 +92,6 @@ void OGLBufferCache::Unmap() { stream_buffer.Unmap(buffer_offset - buffer_offset_base); } -GLuint OGLBufferCache::GetHandle() const { - return stream_buffer.GetHandle(); -} - void OGLBufferCache::AlignBuffer(std::size_t alignment) { // Align the offset, not the mapped pointer const GLintptr offset_aligned = diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index f2347581b..544f3b010 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -7,6 +7,7 @@ #include #include #include +#include #include "common/common_types.h" #include "video_core/rasterizer_cache.h" @@ -53,19 +54,18 @@ class OGLBufferCache final : public RasterizerCache UploadMemory(GPUVAddr gpu_addr, std::size_t size, + std::size_t alignment = 4, bool cache = true); - /// Uploads from a host memory. Returns host's buffer offset where it's been allocated. - GLintptr UploadHostMemory(const void* raw_pointer, std::size_t size, std::size_t alignment = 4); + /// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset. + std::pair UploadHostMemory(const void* raw_pointer, std::size_t size, + std::size_t alignment = 4); bool Map(std::size_t max_size); void Unmap(); - GLuint GetHandle() const; - protected: void AlignBuffer(std::size_t alignment); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index bfc3c4df9..d694dacfb 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -129,8 +129,6 @@ GLuint RasterizerOpenGL::SetupVertexFormat() { state.draw.vertex_array = vao; state.ApplyVertexArrayState(); - glVertexArrayElementBuffer(vao, buffer_cache.GetHandle()); - // Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL. // Enables the first 16 vertex attributes always, as we don't know which ones are actually // used until shader time. Note, Tegra technically supports 32, but we're capping this to 16 @@ -197,10 +195,10 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) { ASSERT(end > start); const u64 size = end - start + 1; - const GLintptr vertex_buffer_offset = buffer_cache.UploadMemory(start, size); + const auto [vertex_buffer, vertex_buffer_offset] = buffer_cache.UploadMemory(start, size); // Bind the vertex array to the buffer at the current offset. - glVertexArrayVertexBuffer(vao, index, buffer_cache.GetHandle(), vertex_buffer_offset, + glVertexArrayVertexBuffer(vao, index, vertex_buffer, vertex_buffer_offset, vertex_array.stride); if (regs.instanced_arrays.IsInstancingEnabled(index) && vertex_array.divisor != 0) { @@ -215,12 +213,16 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) { gpu.dirty_flags.vertex_array.reset(); } -GLintptr RasterizerOpenGL::SetupIndexBuffer() { +GLintptr RasterizerOpenGL::SetupIndexBuffer(GLuint vao) { if (accelerate_draw != AccelDraw::Indexed) { return 0; } + MICROPROFILE_SCOPE(OpenGL_Index); const auto& regs = system.GPU().Maxwell3D().regs; - return buffer_cache.UploadMemory(regs.index_array.IndexStart(), CalculateIndexBufferSize()); + const std::size_t size = CalculateIndexBufferSize(); + const auto [buffer, offset] = buffer_cache.UploadMemory(regs.index_array.IndexStart(), size); + glVertexArrayElementBuffer(vao, buffer); + return offset; } DrawParameters RasterizerOpenGL::SetupDraw(GLintptr index_buffer_offset) { @@ -235,7 +237,6 @@ DrawParameters RasterizerOpenGL::SetupDraw(GLintptr index_buffer_offset) { params.primitive_mode = MaxwellToGL::PrimitiveTopology(regs.draw.topology); if (is_indexed) { - MICROPROFILE_SCOPE(OpenGL_Index); params.index_format = MaxwellToGL::IndexFormat(regs.index_array.format); params.count = regs.index_array.count; params.index_buffer_offset = index_buffer_offset; @@ -278,12 +279,11 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { GLShader::MaxwellUniformData ubo{}; ubo.SetFromRegs(gpu, stage); - const GLintptr offset = + const auto [buffer, offset] = buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment()); // Bind the emulation info buffer - bind_ubo_pushbuffer.Push(buffer_cache.GetHandle(), offset, - static_cast(sizeof(ubo))); + bind_ubo_pushbuffer.Push(buffer, offset, static_cast(sizeof(ubo))); Shader shader{shader_cache.GetStageProgram(program)}; @@ -651,11 +651,11 @@ void RasterizerOpenGL::DrawArrays() { } // Prepare vertex array format. - const GLuint vertex_array = SetupVertexFormat(); + const GLuint vao = SetupVertexFormat(); // Upload vertex and index data. - SetupVertexBuffer(vertex_array); - const GLintptr index_buffer_offset = SetupIndexBuffer(); + SetupVertexBuffer(vao); + const GLintptr index_buffer_offset = SetupIndexBuffer(vao); // Setup draw parameters. It will automatically choose what glDraw* method to use. const DrawParameters params = SetupDraw(index_buffer_offset); @@ -791,8 +791,8 @@ void RasterizerOpenGL::SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& b ASSERT_MSG(size <= MaxConstbufferSize, "Constant buffer is too big"); const std::size_t alignment = device.GetUniformBufferAlignment(); - const GLintptr offset = buffer_cache.UploadMemory(buffer.address, size, alignment); - bind_ubo_pushbuffer.Push(buffer_cache.GetHandle(), offset, size); + const auto [cbuf, offset] = buffer_cache.UploadMemory(buffer.address, size, alignment); + bind_ubo_pushbuffer.Push(cbuf, offset, size); } void RasterizerOpenGL::SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 8f1757e25..a03bc759f 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -220,7 +220,7 @@ private: void SetupVertexBuffer(GLuint vao); - GLintptr SetupIndexBuffer(); + GLintptr SetupIndexBuffer(GLuint vao); DrawParameters SetupDraw(GLintptr index_buffer_offset); From f8ba72d4912fb472f97bc8d5b80f3363e6f714cc Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 27 May 2019 20:11:46 -0300 Subject: [PATCH 05/17] gl_buffer_cache: Store in CachedBufferEntry the used buffer handle --- .../renderer_opengl/gl_buffer_cache.cpp | 33 ++++++++++--------- .../renderer_opengl/gl_buffer_cache.h | 20 +++++++---- 2 files changed, 30 insertions(+), 23 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index ea8b4c99f..b4277ef73 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -14,10 +14,10 @@ namespace OpenGL { -CachedBufferEntry::CachedBufferEntry(VAddr cpu_addr, std::size_t size, GLintptr offset, - std::size_t alignment, u8* host_ptr) - : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, size{size}, offset{offset}, - alignment{alignment} {} +CachedBufferEntry::CachedBufferEntry(VAddr cpu_addr, u8* host_ptr, std::size_t size, + std::size_t alignment, GLuint buffer, GLintptr offset) + : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, size{size}, alignment{alignment}, + buffer{buffer}, offset{offset} {} OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size) : RasterizerCache{rasterizer}, stream_buffer(size, true) {} @@ -28,16 +28,20 @@ std::pair OGLBufferCache::UploadMemory(GPUVAddr gpu_addr, std: auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager(); + const auto& host_ptr{memory_manager.GetPointer(gpu_addr)}; + if (!host_ptr) { + // Return a dummy buffer when host_ptr is invalid. + return {0, 0}; + } + // Cache management is a big overhead, so only cache entries with a given size. // TODO: Figure out which size is the best for given games. cache &= size >= 2048; - const auto& host_ptr{memory_manager.GetPointer(gpu_addr)}; if (cache) { - auto entry = TryGet(host_ptr); - if (entry) { + if (auto entry = TryGet(host_ptr); entry) { if (entry->GetSize() >= size && entry->GetAlignment() == alignment) { - return {stream_buffer.GetHandle(), entry->GetOffset()}; + return {entry->GetBuffer(), entry->GetOffset()}; } Unregister(entry); } @@ -46,21 +50,18 @@ std::pair OGLBufferCache::UploadMemory(GPUVAddr gpu_addr, std: AlignBuffer(alignment); const GLintptr uploaded_offset = buffer_offset; - if (!host_ptr) { - return {stream_buffer.GetHandle(), uploaded_offset}; - } - std::memcpy(buffer_ptr, host_ptr, size); buffer_ptr += size; buffer_offset += size; + const GLuint buffer = stream_buffer.GetHandle(); if (cache) { - auto entry = std::make_shared( - *memory_manager.GpuToCpuAddress(gpu_addr), size, uploaded_offset, alignment, host_ptr); - Register(entry); + const VAddr cpu_addr = *memory_manager.GpuToCpuAddress(gpu_addr); + Register(std::make_shared(cpu_addr, host_ptr, size, alignment, buffer, + uploaded_offset)); } - return {stream_buffer.GetHandle(), uploaded_offset}; + return {buffer, uploaded_offset}; } std::pair OGLBufferCache::UploadHostMemory(const void* raw_pointer, diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index 544f3b010..4a055035a 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -20,8 +20,8 @@ class RasterizerOpenGL; class CachedBufferEntry final : public RasterizerCacheObject { public: - explicit CachedBufferEntry(VAddr cpu_addr, std::size_t size, GLintptr offset, - std::size_t alignment, u8* host_ptr); + explicit CachedBufferEntry(VAddr cpu_addr, u8* host_ptr, std::size_t size, + std::size_t alignment, GLuint buffer, GLintptr offset); VAddr GetCpuAddr() const override { return cpu_addr; @@ -35,19 +35,25 @@ public: return size; } - GLintptr GetOffset() const { - return offset; - } - std::size_t GetAlignment() const { return alignment; } + GLuint GetBuffer() const { + return buffer; + } + + GLintptr GetOffset() const { + return offset; + } + private: VAddr cpu_addr{}; std::size_t size{}; - GLintptr offset{}; std::size_t alignment{}; + + GLuint buffer{}; + GLintptr offset{}; }; class OGLBufferCache final : public RasterizerCache> { From 8155b12d3d8963ec4d8727614ffb522a33389cbf Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 27 May 2019 20:50:11 -0300 Subject: [PATCH 06/17] gl_buffer_cache: Rework to support internalized buffers --- .../renderer_opengl/gl_buffer_cache.cpp | 171 +++++++++++++----- .../renderer_opengl/gl_buffer_cache.h | 70 +++++-- .../renderer_opengl/gl_rasterizer.cpp | 2 +- 3 files changed, 176 insertions(+), 67 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index b4277ef73..1219ca6ea 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -7,90 +7,165 @@ #include #include "common/alignment.h" +#include "common/assert.h" #include "core/core.h" #include "video_core/memory_manager.h" #include "video_core/renderer_opengl/gl_buffer_cache.h" #include "video_core/renderer_opengl/gl_rasterizer.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" namespace OpenGL { -CachedBufferEntry::CachedBufferEntry(VAddr cpu_addr, u8* host_ptr, std::size_t size, - std::size_t alignment, GLuint buffer, GLintptr offset) - : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, size{size}, alignment{alignment}, - buffer{buffer}, offset{offset} {} +namespace { + +constexpr GLuint EmptyBuffer = 0; +constexpr GLintptr CachedBufferOffset = 0; + +OGLBuffer CreateBuffer(std::size_t size, GLenum usage) { + OGLBuffer buffer; + buffer.Create(); + glNamedBufferData(buffer.handle, size, nullptr, usage); + return buffer; +} + +} // Anonymous namespace + +CachedBufferEntry::CachedBufferEntry(VAddr cpu_addr, u8* host_ptr) + : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr} {} OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size) : RasterizerCache{rasterizer}, stream_buffer(size, true) {} -std::pair OGLBufferCache::UploadMemory(GPUVAddr gpu_addr, std::size_t size, - std::size_t alignment, bool cache) { +OGLBufferCache::~OGLBufferCache() = default; + +void OGLBufferCache::Unregister(const std::shared_ptr& entry) { + std::lock_guard lock{mutex}; + + if (entry->IsInternalized()) { + internalized_entries.erase(entry->GetCacheAddr()); + } + ReserveBuffer(entry); + RasterizerCache>::Unregister(entry); +} + +OGLBufferCache::BufferInfo OGLBufferCache::UploadMemory(GPUVAddr gpu_addr, std::size_t size, + std::size_t alignment, bool internalize) { std::lock_guard lock{mutex}; auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager(); - - const auto& host_ptr{memory_manager.GetPointer(gpu_addr)}; + const auto host_ptr{memory_manager.GetPointer(gpu_addr)}; + const auto cache_addr{ToCacheAddr(host_ptr)}; if (!host_ptr) { - // Return a dummy buffer when host_ptr is invalid. - return {0, 0}; + return {EmptyBuffer, 0}; } // Cache management is a big overhead, so only cache entries with a given size. // TODO: Figure out which size is the best for given games. - cache &= size >= 2048; - - if (cache) { - if (auto entry = TryGet(host_ptr); entry) { - if (entry->GetSize() >= size && entry->GetAlignment() == alignment) { - return {entry->GetBuffer(), entry->GetOffset()}; - } - Unregister(entry); - } + if (!internalize && size < 0x800 && + internalized_entries.find(cache_addr) == internalized_entries.end()) { + return StreamBufferUpload(host_ptr, size, alignment); } - AlignBuffer(alignment); - const GLintptr uploaded_offset = buffer_offset; - - std::memcpy(buffer_ptr, host_ptr, size); - buffer_ptr += size; - buffer_offset += size; - - const GLuint buffer = stream_buffer.GetHandle(); - if (cache) { - const VAddr cpu_addr = *memory_manager.GpuToCpuAddress(gpu_addr); - Register(std::make_shared(cpu_addr, host_ptr, size, alignment, buffer, - uploaded_offset)); + auto entry = TryGet(host_ptr); + if (!entry) { + return FixedBufferUpload(gpu_addr, host_ptr, size, internalize); } - return {buffer, uploaded_offset}; + if (entry->GetSize() < size) { + GrowBuffer(entry, size); + } + return {entry->GetBuffer(), CachedBufferOffset}; } -std::pair OGLBufferCache::UploadHostMemory(const void* raw_pointer, - std::size_t size, - std::size_t alignment) { - std::lock_guard lock{mutex}; +OGLBufferCache::BufferInfo OGLBufferCache::UploadHostMemory(const void* raw_pointer, + std::size_t size, + std::size_t alignment) { + return StreamBufferUpload(raw_pointer, size, alignment); +} + +bool OGLBufferCache::Map(std::size_t max_size) { + const auto max_size_ = static_cast(max_size); + bool invalidate; + std::tie(buffer_ptr, buffer_offset_base, invalidate) = stream_buffer.Map(max_size_, 4); + buffer_offset = buffer_offset_base; + return invalidate; +} + +void OGLBufferCache::Unmap() { + stream_buffer.Unmap(buffer_offset - buffer_offset_base); +} + +OGLBufferCache::BufferInfo OGLBufferCache::StreamBufferUpload(const void* raw_pointer, + std::size_t size, + std::size_t alignment) { AlignBuffer(alignment); - std::memcpy(buffer_ptr, raw_pointer, size); const GLintptr uploaded_offset = buffer_offset; + std::memcpy(buffer_ptr, raw_pointer, size); buffer_ptr += size; buffer_offset += size; return {stream_buffer.GetHandle(), uploaded_offset}; } -bool OGLBufferCache::Map(std::size_t max_size) { - bool invalidate; - std::tie(buffer_ptr, buffer_offset_base, invalidate) = - stream_buffer.Map(static_cast(max_size), 4); - buffer_offset = buffer_offset_base; - - if (invalidate) { - InvalidateAll(); +OGLBufferCache::BufferInfo OGLBufferCache::FixedBufferUpload(GPUVAddr gpu_addr, u8* host_ptr, + std::size_t size, bool internalize) { + if (internalize) { + internalized_entries.emplace(ToCacheAddr(host_ptr)); } - return invalidate; + auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager(); + const auto cpu_addr = *memory_manager.GpuToCpuAddress(gpu_addr); + auto entry = GetUncachedBuffer(cpu_addr, host_ptr); + entry->SetSize(size); + entry->SetInternalState(internalize); + Register(entry); + + if (entry->GetCapacity() < size) { + entry->SetCapacity(CreateBuffer(size, GL_STATIC_DRAW), size); + } + glNamedBufferSubData(entry->GetBuffer(), 0, static_cast(size), host_ptr); + return {entry->GetBuffer(), CachedBufferOffset}; } -void OGLBufferCache::Unmap() { - stream_buffer.Unmap(buffer_offset - buffer_offset_base); +void OGLBufferCache::GrowBuffer(std::shared_ptr& entry, std::size_t new_size) { + const auto old_size = static_cast(entry->GetSize()); + if (entry->GetCapacity() < new_size) { + const auto old_buffer = entry->GetBuffer(); + OGLBuffer new_buffer = CreateBuffer(new_size, GL_STATIC_COPY); + + // Copy bits from the old buffer to the new buffer. + glCopyNamedBufferSubData(old_buffer, new_buffer.handle, 0, 0, old_size); + entry->SetCapacity(std::move(new_buffer), new_size); + } + // Upload the new bits. + const auto size_diff = static_cast(new_size - old_size); + glNamedBufferSubData(entry->GetBuffer(), old_size, size_diff, entry->GetHostPtr() + old_size); + + // Update entry's size in the object and in the cache. + entry->SetSize(new_size); + Unregister(entry); + Register(entry); +} + +std::shared_ptr OGLBufferCache::GetUncachedBuffer(VAddr cpu_addr, u8* host_ptr) { + if (auto entry = TryGetReservedBuffer(host_ptr); entry) { + return entry; + } + return std::make_shared(cpu_addr, host_ptr); +} + +std::shared_ptr OGLBufferCache::TryGetReservedBuffer(u8* host_ptr) { + const auto it = buffer_reserve.find(ToCacheAddr(host_ptr)); + if (it == buffer_reserve.end()) { + return {}; + } + auto& reserve = it->second; + auto entry = reserve.back(); + reserve.pop_back(); + return entry; +} + +void OGLBufferCache::ReserveBuffer(std::shared_ptr entry) { + buffer_reserve[entry->GetCacheAddr()].push_back(std::move(entry)); } void OGLBufferCache::AlignBuffer(std::size_t alignment) { diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index 4a055035a..00bc6008a 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -5,9 +5,12 @@ #pragma once #include +#include #include #include +#include #include +#include #include "common/common_types.h" #include "video_core/rasterizer_cache.h" @@ -20,8 +23,7 @@ class RasterizerOpenGL; class CachedBufferEntry final : public RasterizerCacheObject { public: - explicit CachedBufferEntry(VAddr cpu_addr, u8* host_ptr, std::size_t size, - std::size_t alignment, GLuint buffer, GLintptr offset); + explicit CachedBufferEntry(VAddr cpu_addr, u8* host_ptr); VAddr GetCpuAddr() const override { return cpu_addr; @@ -35,55 +37,87 @@ public: return size; } - std::size_t GetAlignment() const { - return alignment; + std::size_t GetCapacity() const { + return capacity; + } + + bool IsInternalized() const { + return is_internal; } GLuint GetBuffer() const { - return buffer; + return buffer.handle; } - GLintptr GetOffset() const { - return offset; + void SetSize(std::size_t new_size) { + size = new_size; + } + + void SetInternalState(bool is_internal_) { + is_internal = is_internal_; + } + + void SetCapacity(OGLBuffer&& new_buffer, std::size_t new_capacity) { + capacity = new_capacity; + buffer = std::move(new_buffer); } private: VAddr cpu_addr{}; std::size_t size{}; - std::size_t alignment{}; - - GLuint buffer{}; - GLintptr offset{}; + std::size_t capacity{}; + bool is_internal{}; + OGLBuffer buffer; }; class OGLBufferCache final : public RasterizerCache> { + using BufferInfo = std::pair; + public: explicit OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size); + ~OGLBufferCache(); + + void Unregister(const std::shared_ptr& entry) override; /// Uploads data from a guest GPU address. Returns the OpenGL buffer where it's located and its /// offset. - std::pair UploadMemory(GPUVAddr gpu_addr, std::size_t size, - std::size_t alignment = 4, bool cache = true); + BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4, + bool internalize = false); /// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset. - std::pair UploadHostMemory(const void* raw_pointer, std::size_t size, - std::size_t alignment = 4); + BufferInfo UploadHostMemory(const void* raw_pointer, std::size_t size, + std::size_t alignment = 4); bool Map(std::size_t max_size); void Unmap(); protected: - void AlignBuffer(std::size_t alignment); - // We do not have to flush this cache as things in it are never modified by us. void FlushObjectInner(const std::shared_ptr& object) override {} private: - OGLStreamBuffer stream_buffer; + BufferInfo StreamBufferUpload(const void* raw_pointer, std::size_t size, std::size_t alignment); + + BufferInfo FixedBufferUpload(GPUVAddr gpu_addr, u8* host_ptr, std::size_t size, + bool internalize); + + void GrowBuffer(std::shared_ptr& entry, std::size_t new_size); + + std::shared_ptr GetUncachedBuffer(VAddr cpu_addr, u8* host_ptr); + + std::shared_ptr TryGetReservedBuffer(u8* host_ptr); + + void ReserveBuffer(std::shared_ptr entry); + + void AlignBuffer(std::size_t alignment); u8* buffer_ptr = nullptr; GLintptr buffer_offset = 0; GLintptr buffer_offset_base = 0; + + OGLStreamBuffer stream_buffer; + std::unordered_set internalized_entries; + std::unordered_map>> buffer_reserve; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index d694dacfb..e216163e1 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -790,7 +790,7 @@ void RasterizerOpenGL::SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& b size = Common::AlignUp(size, sizeof(GLvec4)); ASSERT_MSG(size <= MaxConstbufferSize, "Constant buffer is too big"); - const std::size_t alignment = device.GetUniformBufferAlignment(); + const auto alignment = device.GetUniformBufferAlignment(); const auto [cbuf, offset] = buffer_cache.UploadMemory(buffer.address, size, alignment); bind_ubo_pushbuffer.Push(cbuf, offset, size); } From 345f852bdb64d1a779ac617965f46f5b8227eca5 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 29 May 2019 18:15:28 -0300 Subject: [PATCH 07/17] gl_rasterizer: Drop gl_global_cache in favor of gl_buffer_cache --- src/video_core/CMakeLists.txt | 2 - .../renderer_opengl/gl_buffer_cache.cpp | 22 ++-- .../renderer_opengl/gl_buffer_cache.h | 4 +- .../renderer_opengl/gl_global_cache.cpp | 102 ------------------ .../renderer_opengl/gl_global_cache.h | 82 -------------- .../renderer_opengl/gl_rasterizer.cpp | 27 +++-- .../renderer_opengl/gl_rasterizer.h | 2 - 7 files changed, 35 insertions(+), 206 deletions(-) delete mode 100644 src/video_core/renderer_opengl/gl_global_cache.cpp delete mode 100644 src/video_core/renderer_opengl/gl_global_cache.h diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 6839abe71..7aefd4035 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -43,8 +43,6 @@ add_library(video_core STATIC renderer_opengl/gl_device.h renderer_opengl/gl_framebuffer_cache.cpp renderer_opengl/gl_framebuffer_cache.h - renderer_opengl/gl_global_cache.cpp - renderer_opengl/gl_global_cache.h renderer_opengl/gl_rasterizer.cpp renderer_opengl/gl_rasterizer.h renderer_opengl/gl_resource_manager.cpp diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index 1219ca6ea..2f603e3d7 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -49,7 +49,8 @@ void OGLBufferCache::Unregister(const std::shared_ptr& entry) } OGLBufferCache::BufferInfo OGLBufferCache::UploadMemory(GPUVAddr gpu_addr, std::size_t size, - std::size_t alignment, bool internalize) { + std::size_t alignment, bool internalize, + bool is_written) { std::lock_guard lock{mutex}; auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager(); @@ -68,18 +69,22 @@ OGLBufferCache::BufferInfo OGLBufferCache::UploadMemory(GPUVAddr gpu_addr, std:: auto entry = TryGet(host_ptr); if (!entry) { - return FixedBufferUpload(gpu_addr, host_ptr, size, internalize); + return FixedBufferUpload(gpu_addr, host_ptr, size, internalize, is_written); } if (entry->GetSize() < size) { GrowBuffer(entry, size); } + if (is_written) { + entry->MarkAsModified(true, *this); + } return {entry->GetBuffer(), CachedBufferOffset}; } OGLBufferCache::BufferInfo OGLBufferCache::UploadHostMemory(const void* raw_pointer, std::size_t size, std::size_t alignment) { + std::lock_guard lock{mutex}; return StreamBufferUpload(raw_pointer, size, alignment); } @@ -108,10 +113,8 @@ OGLBufferCache::BufferInfo OGLBufferCache::StreamBufferUpload(const void* raw_po } OGLBufferCache::BufferInfo OGLBufferCache::FixedBufferUpload(GPUVAddr gpu_addr, u8* host_ptr, - std::size_t size, bool internalize) { - if (internalize) { - internalized_entries.emplace(ToCacheAddr(host_ptr)); - } + std::size_t size, bool internalize, + bool is_written) { auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager(); const auto cpu_addr = *memory_manager.GpuToCpuAddress(gpu_addr); auto entry = GetUncachedBuffer(cpu_addr, host_ptr); @@ -119,6 +122,13 @@ OGLBufferCache::BufferInfo OGLBufferCache::FixedBufferUpload(GPUVAddr gpu_addr, entry->SetInternalState(internalize); Register(entry); + if (internalize) { + internalized_entries.emplace(ToCacheAddr(host_ptr)); + } + if (is_written) { + entry->MarkAsModified(true, *this); + } + if (entry->GetCapacity() < size) { entry->SetCapacity(CreateBuffer(size, GL_STATIC_DRAW), size); } diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index 00bc6008a..b4fbd201d 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -82,7 +82,7 @@ public: /// Uploads data from a guest GPU address. Returns the OpenGL buffer where it's located and its /// offset. BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4, - bool internalize = false); + bool internalize = false, bool is_written = false); /// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset. BufferInfo UploadHostMemory(const void* raw_pointer, std::size_t size, @@ -99,7 +99,7 @@ private: BufferInfo StreamBufferUpload(const void* raw_pointer, std::size_t size, std::size_t alignment); BufferInfo FixedBufferUpload(GPUVAddr gpu_addr, u8* host_ptr, std::size_t size, - bool internalize); + bool internalize, bool is_written); void GrowBuffer(std::shared_ptr& entry, std::size_t new_size); diff --git a/src/video_core/renderer_opengl/gl_global_cache.cpp b/src/video_core/renderer_opengl/gl_global_cache.cpp deleted file mode 100644 index d5e385151..000000000 --- a/src/video_core/renderer_opengl/gl_global_cache.cpp +++ /dev/null @@ -1,102 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#include - -#include "common/logging/log.h" -#include "core/core.h" -#include "video_core/memory_manager.h" -#include "video_core/renderer_opengl/gl_global_cache.h" -#include "video_core/renderer_opengl/gl_rasterizer.h" -#include "video_core/renderer_opengl/gl_shader_decompiler.h" -#include "video_core/renderer_opengl/utils.h" - -namespace OpenGL { - -CachedGlobalRegion::CachedGlobalRegion(VAddr cpu_addr, u8* host_ptr, u32 size, u32 max_size) - : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, host_ptr{host_ptr}, size{size}, - max_size{max_size} { - buffer.Create(); - LabelGLObject(GL_BUFFER, buffer.handle, cpu_addr, "GlobalMemory"); -} - -CachedGlobalRegion::~CachedGlobalRegion() = default; - -void CachedGlobalRegion::Reload(u32 size_) { - size = size_; - if (size > max_size) { - size = max_size; - LOG_CRITICAL(HW_GPU, "Global region size {} exceeded the supported size {}!", size_, - max_size); - } - glNamedBufferData(buffer.handle, size, host_ptr, GL_STREAM_DRAW); -} - -void CachedGlobalRegion::Flush() { - LOG_DEBUG(Render_OpenGL, "Flushing {} bytes to CPU memory address 0x{:16}", size, cpu_addr); - glGetNamedBufferSubData(buffer.handle, 0, static_cast(size), host_ptr); -} - -GlobalRegion GlobalRegionCacheOpenGL::TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const { - const auto search{reserve.find(addr)}; - if (search == reserve.end()) { - return {}; - } - return search->second; -} - -GlobalRegion GlobalRegionCacheOpenGL::GetUncachedGlobalRegion(GPUVAddr addr, u8* host_ptr, - u32 size) { - GlobalRegion region{TryGetReservedGlobalRegion(ToCacheAddr(host_ptr), size)}; - if (!region) { - // No reserved surface available, create a new one and reserve it - auto& memory_manager{Core::System::GetInstance().GPU().MemoryManager()}; - const auto cpu_addr{memory_manager.GpuToCpuAddress(addr)}; - ASSERT(cpu_addr); - - region = std::make_shared(*cpu_addr, host_ptr, size, max_ssbo_size); - ReserveGlobalRegion(region); - } - region->Reload(size); - return region; -} - -void GlobalRegionCacheOpenGL::ReserveGlobalRegion(GlobalRegion region) { - reserve.insert_or_assign(region->GetCacheAddr(), std::move(region)); -} - -GlobalRegionCacheOpenGL::GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer) - : RasterizerCache{rasterizer} { - GLint max_ssbo_size_; - glGetIntegerv(GL_MAX_SHADER_STORAGE_BLOCK_SIZE, &max_ssbo_size_); - max_ssbo_size = static_cast(max_ssbo_size_); -} - -GlobalRegion GlobalRegionCacheOpenGL::GetGlobalRegion( - const GLShader::GlobalMemoryEntry& global_region, - Tegra::Engines::Maxwell3D::Regs::ShaderStage stage) { - std::lock_guard lock{mutex}; - - auto& gpu{Core::System::GetInstance().GPU()}; - auto& memory_manager{gpu.MemoryManager()}; - const auto cbufs{gpu.Maxwell3D().state.shader_stages[static_cast(stage)]}; - const auto addr{cbufs.const_buffers[global_region.GetCbufIndex()].address + - global_region.GetCbufOffset()}; - const auto actual_addr{memory_manager.Read(addr)}; - const auto size{memory_manager.Read(addr + 8)}; - - // Look up global region in the cache based on address - const auto& host_ptr{memory_manager.GetPointer(actual_addr)}; - GlobalRegion region{TryGet(host_ptr)}; - - if (!region) { - // No global region found - create a new one - region = GetUncachedGlobalRegion(actual_addr, host_ptr, size); - Register(region); - } - - return region; -} - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_global_cache.h b/src/video_core/renderer_opengl/gl_global_cache.h deleted file mode 100644 index 2d467a240..000000000 --- a/src/video_core/renderer_opengl/gl_global_cache.h +++ /dev/null @@ -1,82 +0,0 @@ -// Copyright 2018 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include - -#include - -#include "common/assert.h" -#include "common/common_types.h" -#include "video_core/engines/maxwell_3d.h" -#include "video_core/rasterizer_cache.h" -#include "video_core/renderer_opengl/gl_resource_manager.h" - -namespace OpenGL { - -namespace GLShader { -class GlobalMemoryEntry; -} - -class RasterizerOpenGL; -class CachedGlobalRegion; -using GlobalRegion = std::shared_ptr; - -class CachedGlobalRegion final : public RasterizerCacheObject { -public: - explicit CachedGlobalRegion(VAddr cpu_addr, u8* host_ptr, u32 size, u32 max_size); - ~CachedGlobalRegion(); - - VAddr GetCpuAddr() const override { - return cpu_addr; - } - - std::size_t GetSizeInBytes() const override { - return size; - } - - /// Gets the GL program handle for the buffer - GLuint GetBufferHandle() const { - return buffer.handle; - } - - /// Reloads the global region from guest memory - void Reload(u32 size_); - - void Flush(); - -private: - VAddr cpu_addr{}; - u8* host_ptr{}; - u32 size{}; - u32 max_size{}; - - OGLBuffer buffer; -}; - -class GlobalRegionCacheOpenGL final : public RasterizerCache { -public: - explicit GlobalRegionCacheOpenGL(RasterizerOpenGL& rasterizer); - - /// Gets the current specified shader stage program - GlobalRegion GetGlobalRegion(const GLShader::GlobalMemoryEntry& descriptor, - Tegra::Engines::Maxwell3D::Regs::ShaderStage stage); - -protected: - void FlushObjectInner(const GlobalRegion& object) override { - object->Flush(); - } - -private: - GlobalRegion TryGetReservedGlobalRegion(CacheAddr addr, u32 size) const; - GlobalRegion GetUncachedGlobalRegion(GPUVAddr addr, u8* host_ptr, u32 size); - void ReserveGlobalRegion(GlobalRegion region); - - std::unordered_map reserve; - u32 max_ssbo_size{}; -}; - -} // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index e216163e1..d1790f24f 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -20,6 +20,7 @@ #include "core/hle/kernel/process.h" #include "core/settings.h" #include "video_core/engines/maxwell_3d.h" +#include "video_core/memory_manager.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_shader_cache.h" #include "video_core/renderer_opengl/gl_shader_gen.h" @@ -82,8 +83,8 @@ struct DrawParameters { RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, ScreenInfo& info) - : texture_cache{system, *this, device}, shader_cache{*this, system, emu_window, device}, - global_cache{*this}, system{system}, screen_info{info}, + : texture_cache{system, *this, device}, + shader_cache{*this, system, emu_window, device}, system{system}, screen_info{info}, buffer_cache(*this, STREAM_BUFFER_SIZE) { OpenGLState::ApplyDefaultState(); @@ -689,7 +690,7 @@ void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) { return; } texture_cache.FlushRegion(addr, size); - global_cache.FlushRegion(addr, size); + buffer_cache.FlushRegion(addr, size); } void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) { @@ -699,7 +700,6 @@ void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) { } texture_cache.InvalidateRegion(addr, size); shader_cache.InvalidateRegion(addr, size); - global_cache.InvalidateRegion(addr, size); buffer_cache.InvalidateRegion(addr, size); } @@ -797,15 +797,22 @@ void RasterizerOpenGL::SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& b void RasterizerOpenGL::SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage, const Shader& shader) { + auto& gpu{system.GPU()}; + auto& memory_manager{gpu.MemoryManager()}; + const auto cbufs{gpu.Maxwell3D().state.shader_stages[static_cast(stage)]}; + const auto alignment{device.GetShaderStorageBufferAlignment()}; + const auto& entries = shader->GetShaderEntries().global_memory_entries; for (std::size_t bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { const auto& entry{entries[bindpoint]}; - const auto& region{global_cache.GetGlobalRegion(entry, stage)}; - if (entry.IsWritten()) { - region->MarkAsModified(true, global_cache); - } - bind_ssbo_pushbuffer.Push(region->GetBufferHandle(), 0, - static_cast(region->GetSizeInBytes())); + + const auto addr{cbufs.const_buffers[entry.GetCbufIndex()].address + entry.GetCbufOffset()}; + const auto actual_addr{memory_manager.Read(addr)}; + const auto size{memory_manager.Read(addr + 8)}; + + const auto [ssbo, buffer_offset] = + buffer_cache.UploadMemory(actual_addr, size, alignment, true, entry.IsWritten()); + bind_ssbo_pushbuffer.Push(ssbo, buffer_offset, static_cast(size)); } } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index a03bc759f..bc988727b 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -24,7 +24,6 @@ #include "video_core/renderer_opengl/gl_buffer_cache.h" #include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_framebuffer_cache.h" -#include "video_core/renderer_opengl/gl_global_cache.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_sampler_cache.h" #include "video_core/renderer_opengl/gl_shader_cache.h" @@ -189,7 +188,6 @@ private: TextureCacheOpenGL texture_cache; ShaderCacheOpenGL shader_cache; - GlobalRegionCacheOpenGL global_cache; SamplerCacheOpenGL sampler_cache; FramebufferCacheOpenGL framebuffer_cache; From d14fbfb9b594f003f11902d53f8993466486dc9d Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 29 May 2019 21:08:33 -0300 Subject: [PATCH 08/17] gl_buffer_cache: Implement flushing --- src/video_core/renderer_opengl/gl_buffer_cache.cpp | 6 +++++- src/video_core/renderer_opengl/gl_buffer_cache.h | 7 ++++++- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index 2f603e3d7..3ce61c5cd 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -31,7 +31,7 @@ OGLBuffer CreateBuffer(std::size_t size, GLenum usage) { } // Anonymous namespace CachedBufferEntry::CachedBufferEntry(VAddr cpu_addr, u8* host_ptr) - : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr} {} + : RasterizerCacheObject{host_ptr}, host_ptr{host_ptr}, cpu_addr{cpu_addr} {} OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size) : RasterizerCache{rasterizer}, stream_buffer(size, true) {} @@ -100,6 +100,10 @@ void OGLBufferCache::Unmap() { stream_buffer.Unmap(buffer_offset - buffer_offset_base); } +void OGLBufferCache::FlushObjectInner(const std::shared_ptr& entry) { + glGetNamedBufferSubData(entry->GetBuffer(), 0, entry->GetSize(), entry->GetWritableHostPtr()); +} + OGLBufferCache::BufferInfo OGLBufferCache::StreamBufferUpload(const void* raw_pointer, std::size_t size, std::size_t alignment) { diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index b4fbd201d..4267a5067 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -33,6 +33,10 @@ public: return size; } + u8* GetWritableHostPtr() const { + return host_ptr; + } + std::size_t GetSize() const { return size; } @@ -63,6 +67,7 @@ public: } private: + u8* host_ptr{}; VAddr cpu_addr{}; std::size_t size{}; std::size_t capacity{}; @@ -93,7 +98,7 @@ public: protected: // We do not have to flush this cache as things in it are never modified by us. - void FlushObjectInner(const std::shared_ptr& object) override {} + void FlushObjectInner(const std::shared_ptr& entry) override; private: BufferInfo StreamBufferUpload(const void* raw_pointer, std::size_t size, std::size_t alignment); From 02ab8449349fa9bb6d4f8b8403fff84ed85790c7 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 1 Jun 2019 17:41:55 -0300 Subject: [PATCH 09/17] gl_device: Query SSBO alignment --- src/video_core/renderer_opengl/gl_device.cpp | 1 + src/video_core/renderer_opengl/gl_device.h | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index a48e14d2e..6238ddaaa 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -24,6 +24,7 @@ T GetInteger(GLenum pname) { Device::Device() { uniform_buffer_alignment = GetInteger(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); + shader_storage_alignment = GetInteger(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); max_vertex_attributes = GetInteger(GL_MAX_VERTEX_ATTRIBS); max_varyings = GetInteger(GL_MAX_VARYING_VECTORS); has_variable_aoffi = TestVariableAoffi(); diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index 8c8c93760..939edb440 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -18,6 +18,10 @@ public: return uniform_buffer_alignment; } + std::size_t GetShaderStorageBufferAlignment() const { + return shader_storage_alignment; + } + u32 GetMaxVertexAttributes() const { return max_vertex_attributes; } @@ -39,6 +43,7 @@ private: static bool TestComponentIndexingBug(); std::size_t uniform_buffer_alignment{}; + std::size_t shader_storage_alignment{}; u32 max_vertex_attributes{}; u32 max_varyings{}; bool has_variable_aoffi{}; From 2bcae41a73cf2f2c6b21a3ab298eddd0855a0245 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 14 Jun 2019 19:58:16 -0300 Subject: [PATCH 10/17] gl_buffer_cache: Remove global system getters --- src/video_core/renderer_opengl/gl_buffer_cache.cpp | 10 +++++----- src/video_core/renderer_opengl/gl_buffer_cache.h | 8 +++++++- src/video_core/renderer_opengl/gl_rasterizer.cpp | 5 ++--- 3 files changed, 14 insertions(+), 9 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index 3ce61c5cd..fb3aedd07 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -33,8 +33,8 @@ OGLBuffer CreateBuffer(std::size_t size, GLenum usage) { CachedBufferEntry::CachedBufferEntry(VAddr cpu_addr, u8* host_ptr) : RasterizerCacheObject{host_ptr}, host_ptr{host_ptr}, cpu_addr{cpu_addr} {} -OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size) - : RasterizerCache{rasterizer}, stream_buffer(size, true) {} +OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system, std::size_t size) + : RasterizerCache{rasterizer}, system{system}, stream_buffer(size, true) {} OGLBufferCache::~OGLBufferCache() = default; @@ -53,7 +53,7 @@ OGLBufferCache::BufferInfo OGLBufferCache::UploadMemory(GPUVAddr gpu_addr, std:: bool is_written) { std::lock_guard lock{mutex}; - auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager(); + auto& memory_manager = system.GPU().MemoryManager(); const auto host_ptr{memory_manager.GetPointer(gpu_addr)}; const auto cache_addr{ToCacheAddr(host_ptr)}; if (!host_ptr) { @@ -119,7 +119,7 @@ OGLBufferCache::BufferInfo OGLBufferCache::StreamBufferUpload(const void* raw_po OGLBufferCache::BufferInfo OGLBufferCache::FixedBufferUpload(GPUVAddr gpu_addr, u8* host_ptr, std::size_t size, bool internalize, bool is_written) { - auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager(); + auto& memory_manager = system.GPU().MemoryManager(); const auto cpu_addr = *memory_manager.GpuToCpuAddress(gpu_addr); auto entry = GetUncachedBuffer(cpu_addr, host_ptr); entry->SetSize(size); @@ -161,7 +161,7 @@ void OGLBufferCache::GrowBuffer(std::shared_ptr& entry, std:: } std::shared_ptr OGLBufferCache::GetUncachedBuffer(VAddr cpu_addr, u8* host_ptr) { - if (auto entry = TryGetReservedBuffer(host_ptr); entry) { + if (auto entry = TryGetReservedBuffer(host_ptr)) { return entry; } return std::make_shared(cpu_addr, host_ptr); diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index 4267a5067..19d643e41 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -17,6 +17,10 @@ #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_stream_buffer.h" +namespace Core { +class System; +} + namespace OpenGL { class RasterizerOpenGL; @@ -79,7 +83,7 @@ class OGLBufferCache final : public RasterizerCache; public: - explicit OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size); + explicit OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system, std::size_t size); ~OGLBufferCache(); void Unregister(const std::shared_ptr& entry) override; @@ -116,6 +120,8 @@ private: void AlignBuffer(std::size_t alignment); + Core::System& system; + u8* buffer_ptr = nullptr; GLintptr buffer_offset = 0; GLintptr buffer_offset_base = 0; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index d1790f24f..35ba84235 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -83,9 +83,8 @@ struct DrawParameters { RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, ScreenInfo& info) - : texture_cache{system, *this, device}, - shader_cache{*this, system, emu_window, device}, system{system}, screen_info{info}, - buffer_cache(*this, STREAM_BUFFER_SIZE) { + : texture_cache{system, *this, device}, shader_cache{*this, system, emu_window, device}, + system{system}, screen_info{info}, buffer_cache{*this, system, STREAM_BUFFER_SIZE} { OpenGLState::ApplyDefaultState(); shader_program_manager = std::make_unique(); From 32c0212b24bc933607b84eb8ab1d55e07db30afe Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 16 Jun 2019 20:00:48 -0300 Subject: [PATCH 11/17] buffer_cache: Implement a generic buffer cache Implements a templated class with a similar approach to our current generic texture cache. It is designed to be compatible with Vulkan and OpenGL, --- src/video_core/CMakeLists.txt | 1 + src/video_core/buffer_cache.h | 300 ++++++++++++++++++++++++++++++++++ 2 files changed, 301 insertions(+) create mode 100644 src/video_core/buffer_cache.h diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 7aefd4035..8753383b8 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -1,4 +1,5 @@ add_library(video_core STATIC + buffer_cache.h dma_pusher.cpp dma_pusher.h debug_utils/debug_utils.cpp diff --git a/src/video_core/buffer_cache.h b/src/video_core/buffer_cache.h new file mode 100644 index 000000000..eb0ec45c2 --- /dev/null +++ b/src/video_core/buffer_cache.h @@ -0,0 +1,300 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include "common/alignment.h" +#include "common/common_types.h" +#include "core/core.h" +#include "video_core/memory_manager.h" +#include "video_core/rasterizer_cache.h" + +namespace VideoCore { +class RasterizerInterface; +} + +namespace VideoCommon { + +template +class CachedBuffer final : public RasterizerCacheObject { +public: + explicit CachedBuffer(VAddr cpu_addr, u8* host_ptr) + : RasterizerCacheObject{host_ptr}, host_ptr{host_ptr}, cpu_addr{cpu_addr} {} + ~CachedBuffer() override = default; + + VAddr GetCpuAddr() const override { + return cpu_addr; + } + + std::size_t GetSizeInBytes() const override { + return size; + } + + u8* GetWritableHostPtr() const { + return host_ptr; + } + + std::size_t GetSize() const { + return size; + } + + std::size_t GetCapacity() const { + return capacity; + } + + bool IsInternalized() const { + return is_internal; + } + + const BufferStorageType& GetBuffer() const { + return buffer; + } + + void SetSize(std::size_t new_size) { + size = new_size; + } + + void SetInternalState(bool is_internal_) { + is_internal = is_internal_; + } + + BufferStorageType ExchangeBuffer(BufferStorageType buffer_, std::size_t new_capacity) { + capacity = new_capacity; + std::swap(buffer, buffer_); + return buffer_; + } + +private: + u8* host_ptr{}; + VAddr cpu_addr{}; + std::size_t size{}; + std::size_t capacity{}; + bool is_internal{}; + BufferStorageType buffer; +}; + +template +class BufferCache : public RasterizerCache>> { +public: + using Buffer = std::shared_ptr>; + using BufferInfo = std::pair; + + explicit BufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, + std::unique_ptr stream_buffer) + : RasterizerCache{rasterizer}, system{system}, + stream_buffer{std::move(stream_buffer)}, stream_buffer_handle{ + this->stream_buffer->GetHandle()} {} + ~BufferCache() = default; + + void Unregister(const Buffer& entry) override { + std::lock_guard lock{RasterizerCache::mutex}; + if (entry->IsInternalized()) { + internalized_entries.erase(entry->GetCacheAddr()); + } + ReserveBuffer(entry); + RasterizerCache::Unregister(entry); + } + + void TickFrame() { + marked_for_destruction_index = + (marked_for_destruction_index + 1) % marked_for_destruction_ring_buffer.size(); + MarkedForDestruction().clear(); + } + + [[nodiscard]] BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, + std::size_t alignment = 4, bool internalize = false, + bool is_written = false) { + std::lock_guard lock{RasterizerCache::mutex}; + + auto& memory_manager = system.GPU().MemoryManager(); + const auto host_ptr = memory_manager.GetPointer(gpu_addr); + if (!host_ptr) { + return {GetEmptyBuffer(size), 0}; + } + const auto cache_addr = ToCacheAddr(host_ptr); + + // Cache management is a big overhead, so only cache entries with a given size. + // TODO: Figure out which size is the best for given games. + constexpr std::size_t max_stream_size = 0x800; + if (!internalize && size < max_stream_size && + internalized_entries.find(cache_addr) == internalized_entries.end()) { + return StreamBufferUpload(host_ptr, size, alignment); + } + + auto entry = RasterizerCache::TryGet(cache_addr); + if (!entry) { + return FixedBufferUpload(gpu_addr, host_ptr, size, internalize, is_written); + } + + if (entry->GetSize() < size) { + IncreaseBufferSize(entry, size); + } + if (is_written) { + entry->MarkAsModified(true, *this); + } + return {ToHandle(entry->GetBuffer()), 0}; + } + + /// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset. + [[nodiscard]] BufferInfo UploadHostMemory(const void* raw_pointer, std::size_t size, + std::size_t alignment = 4) { + std::lock_guard lock{RasterizerCache::mutex}; + return StreamBufferUpload(raw_pointer, size, alignment); + } + + void Map(std::size_t max_size) { + std::tie(buffer_ptr, buffer_offset_base, invalidated) = stream_buffer->Map(max_size, 4); + buffer_offset = buffer_offset_base; + } + + /// Finishes the upload stream, returns true on bindings invalidation. + bool Unmap() { + stream_buffer->Unmap(buffer_offset - buffer_offset_base); + return std::exchange(invalidated, false); + } + +protected: + void FlushObjectInner(const Buffer& entry) override { + DownloadBufferData(entry->GetBuffer(), 0, entry->GetSize(), entry->GetWritableHostPtr()); + } + + virtual BufferStorageType CreateBuffer(std::size_t size) = 0; + + virtual const BufferType* ToHandle(const BufferStorageType& storage) = 0; + + virtual const BufferType* GetEmptyBuffer(std::size_t size) = 0; + + virtual void UploadBufferData(const BufferStorageType& buffer, std::size_t offset, + std::size_t size, const u8* data) = 0; + + virtual void DownloadBufferData(const BufferStorageType& buffer, std::size_t offset, + std::size_t size, u8* data) = 0; + + virtual void CopyBufferData(const BufferStorageType& src, const BufferStorageType& dst, + std::size_t src_offset, std::size_t dst_offset, + std::size_t size) = 0; + +private: + BufferInfo StreamBufferUpload(const void* raw_pointer, std::size_t size, + std::size_t alignment) { + AlignBuffer(alignment); + const std::size_t uploaded_offset = buffer_offset; + std::memcpy(buffer_ptr, raw_pointer, size); + + buffer_ptr += size; + buffer_offset += size; + return {&stream_buffer_handle, uploaded_offset}; + } + + BufferInfo FixedBufferUpload(GPUVAddr gpu_addr, u8* host_ptr, std::size_t size, + bool internalize, bool is_written) { + auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager(); + const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr); + ASSERT(cpu_addr); + + auto entry = GetUncachedBuffer(*cpu_addr, host_ptr); + entry->SetSize(size); + entry->SetInternalState(internalize); + RasterizerCache::Register(entry); + + if (internalize) { + internalized_entries.emplace(ToCacheAddr(host_ptr)); + } + if (is_written) { + entry->MarkAsModified(true, *this); + } + + if (entry->GetCapacity() < size) { + MarkedForDestruction().push_back(entry->ExchangeBuffer(CreateBuffer(size), size)); + } + + UploadBufferData(entry->GetBuffer(), 0, size, host_ptr); + return {ToHandle(entry->GetBuffer()), 0}; + } + + void IncreaseBufferSize(Buffer& entry, std::size_t new_size) { + const std::size_t old_size = entry->GetSize(); + if (entry->GetCapacity() < new_size) { + const auto& old_buffer = entry->GetBuffer(); + auto new_buffer = CreateBuffer(new_size); + + // Copy bits from the old buffer to the new buffer. + CopyBufferData(old_buffer, new_buffer, 0, 0, old_size); + MarkedForDestruction().push_back( + entry->ExchangeBuffer(std::move(new_buffer), new_size)); + + // This buffer could have been used + invalidated = true; + } + // Upload the new bits. + const std::size_t size_diff = new_size - old_size; + UploadBufferData(entry->GetBuffer(), old_size, size_diff, entry->GetHostPtr() + old_size); + + // Update entry's size in the object and in the cache. + Unregister(entry); + + entry->SetSize(new_size); + RasterizerCache::Register(entry); + } + + Buffer GetUncachedBuffer(VAddr cpu_addr, u8* host_ptr) { + if (auto entry = TryGetReservedBuffer(host_ptr)) { + return entry; + } + return std::make_shared(cpu_addr, host_ptr); + } + + Buffer TryGetReservedBuffer(u8* host_ptr) { + const auto it = buffer_reserve.find(ToCacheAddr(host_ptr)); + if (it == buffer_reserve.end()) { + return {}; + } + auto& reserve = it->second; + auto entry = reserve.back(); + reserve.pop_back(); + return entry; + } + + void ReserveBuffer(Buffer entry) { + buffer_reserve[entry->GetCacheAddr()].push_back(std::move(entry)); + } + + void AlignBuffer(std::size_t alignment) { + // Align the offset, not the mapped pointer + const std::size_t offset_aligned = Common::AlignUp(buffer_offset, alignment); + buffer_ptr += offset_aligned - buffer_offset; + buffer_offset = offset_aligned; + } + + std::vector& MarkedForDestruction() { + return marked_for_destruction_ring_buffer[marked_for_destruction_index]; + } + + Core::System& system; + + std::unique_ptr stream_buffer; + BufferType stream_buffer_handle{}; + + bool invalidated = false; + + u8* buffer_ptr = nullptr; + u64 buffer_offset = 0; + u64 buffer_offset_base = 0; + + std::size_t marked_for_destruction_index = 0; + std::array, 4> marked_for_destruction_ring_buffer; + + std::unordered_set internalized_entries; + std::unordered_map> buffer_reserve; +}; + +} // namespace VideoCommon From 1fa21fa1927feecc63f0d81824ce4ea203f79fcc Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 20 Jun 2019 03:22:25 -0300 Subject: [PATCH 12/17] gl_buffer_cache: Implement with generic buffer cache --- src/video_core/rasterizer_interface.h | 3 + .../renderer_opengl/gl_buffer_cache.cpp | 193 +++--------------- .../renderer_opengl/gl_buffer_cache.h | 115 ++--------- .../renderer_opengl/gl_rasterizer.cpp | 40 ++-- .../renderer_opengl/gl_rasterizer.h | 1 + .../renderer_opengl/renderer_opengl.cpp | 4 +- src/video_core/renderer_opengl/utils.cpp | 17 +- src/video_core/renderer_opengl/utils.h | 14 +- 8 files changed, 94 insertions(+), 293 deletions(-) diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h index 5ee4f8e8e..2b7367568 100644 --- a/src/video_core/rasterizer_interface.h +++ b/src/video_core/rasterizer_interface.h @@ -47,6 +47,9 @@ public: /// and invalidated virtual void FlushAndInvalidateRegion(CacheAddr addr, u64 size) = 0; + /// Notify rasterizer that a frame is about to finish + virtual void TickFrame() = 0; + /// Attempt to use a faster method to perform a surface copy virtual bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, const Tegra::Engines::Fermi2D::Regs::Surface& dst, diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index fb3aedd07..2a9b523f5 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -2,192 +2,57 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include #include -#include -#include "common/alignment.h" +#include + #include "common/assert.h" -#include "core/core.h" -#include "video_core/memory_manager.h" #include "video_core/renderer_opengl/gl_buffer_cache.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_resource_manager.h" namespace OpenGL { -namespace { - -constexpr GLuint EmptyBuffer = 0; -constexpr GLintptr CachedBufferOffset = 0; - -OGLBuffer CreateBuffer(std::size_t size, GLenum usage) { - OGLBuffer buffer; - buffer.Create(); - glNamedBufferData(buffer.handle, size, nullptr, usage); - return buffer; -} - -} // Anonymous namespace - -CachedBufferEntry::CachedBufferEntry(VAddr cpu_addr, u8* host_ptr) - : RasterizerCacheObject{host_ptr}, host_ptr{host_ptr}, cpu_addr{cpu_addr} {} - -OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system, std::size_t size) - : RasterizerCache{rasterizer}, system{system}, stream_buffer(size, true) {} +OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system, + std::size_t stream_size) + : VideoCommon::BufferCache{ + rasterizer, system, std::make_unique(stream_size, true)} {} OGLBufferCache::~OGLBufferCache() = default; -void OGLBufferCache::Unregister(const std::shared_ptr& entry) { - std::lock_guard lock{mutex}; - - if (entry->IsInternalized()) { - internalized_entries.erase(entry->GetCacheAddr()); - } - ReserveBuffer(entry); - RasterizerCache>::Unregister(entry); +OGLBuffer OGLBufferCache::CreateBuffer(std::size_t size) { + OGLBuffer buffer; + buffer.Create(); + glNamedBufferData(buffer.handle, static_cast(size), nullptr, GL_DYNAMIC_DRAW); + return buffer; } -OGLBufferCache::BufferInfo OGLBufferCache::UploadMemory(GPUVAddr gpu_addr, std::size_t size, - std::size_t alignment, bool internalize, - bool is_written) { - std::lock_guard lock{mutex}; - - auto& memory_manager = system.GPU().MemoryManager(); - const auto host_ptr{memory_manager.GetPointer(gpu_addr)}; - const auto cache_addr{ToCacheAddr(host_ptr)}; - if (!host_ptr) { - return {EmptyBuffer, 0}; - } - - // Cache management is a big overhead, so only cache entries with a given size. - // TODO: Figure out which size is the best for given games. - if (!internalize && size < 0x800 && - internalized_entries.find(cache_addr) == internalized_entries.end()) { - return StreamBufferUpload(host_ptr, size, alignment); - } - - auto entry = TryGet(host_ptr); - if (!entry) { - return FixedBufferUpload(gpu_addr, host_ptr, size, internalize, is_written); - } - - if (entry->GetSize() < size) { - GrowBuffer(entry, size); - } - if (is_written) { - entry->MarkAsModified(true, *this); - } - return {entry->GetBuffer(), CachedBufferOffset}; +const GLuint* OGLBufferCache::ToHandle(const OGLBuffer& buffer) { + return &buffer.handle; } -OGLBufferCache::BufferInfo OGLBufferCache::UploadHostMemory(const void* raw_pointer, - std::size_t size, - std::size_t alignment) { - std::lock_guard lock{mutex}; - return StreamBufferUpload(raw_pointer, size, alignment); +const GLuint* OGLBufferCache::GetEmptyBuffer(std::size_t) { + static const GLuint null_buffer = 0; + return &null_buffer; } -bool OGLBufferCache::Map(std::size_t max_size) { - const auto max_size_ = static_cast(max_size); - bool invalidate; - std::tie(buffer_ptr, buffer_offset_base, invalidate) = stream_buffer.Map(max_size_, 4); - buffer_offset = buffer_offset_base; - return invalidate; +void OGLBufferCache::UploadBufferData(const OGLBuffer& buffer, std::size_t offset, std::size_t size, + const u8* data) { + glNamedBufferSubData(buffer.handle, static_cast(offset), + static_cast(size), data); } -void OGLBufferCache::Unmap() { - stream_buffer.Unmap(buffer_offset - buffer_offset_base); +void OGLBufferCache::DownloadBufferData(const OGLBuffer& buffer, std::size_t offset, + std::size_t size, u8* data) { + glGetNamedBufferSubData(buffer.handle, static_cast(offset), + static_cast(size), data); } -void OGLBufferCache::FlushObjectInner(const std::shared_ptr& entry) { - glGetNamedBufferSubData(entry->GetBuffer(), 0, entry->GetSize(), entry->GetWritableHostPtr()); -} - -OGLBufferCache::BufferInfo OGLBufferCache::StreamBufferUpload(const void* raw_pointer, - std::size_t size, - std::size_t alignment) { - AlignBuffer(alignment); - const GLintptr uploaded_offset = buffer_offset; - std::memcpy(buffer_ptr, raw_pointer, size); - - buffer_ptr += size; - buffer_offset += size; - return {stream_buffer.GetHandle(), uploaded_offset}; -} - -OGLBufferCache::BufferInfo OGLBufferCache::FixedBufferUpload(GPUVAddr gpu_addr, u8* host_ptr, - std::size_t size, bool internalize, - bool is_written) { - auto& memory_manager = system.GPU().MemoryManager(); - const auto cpu_addr = *memory_manager.GpuToCpuAddress(gpu_addr); - auto entry = GetUncachedBuffer(cpu_addr, host_ptr); - entry->SetSize(size); - entry->SetInternalState(internalize); - Register(entry); - - if (internalize) { - internalized_entries.emplace(ToCacheAddr(host_ptr)); - } - if (is_written) { - entry->MarkAsModified(true, *this); - } - - if (entry->GetCapacity() < size) { - entry->SetCapacity(CreateBuffer(size, GL_STATIC_DRAW), size); - } - glNamedBufferSubData(entry->GetBuffer(), 0, static_cast(size), host_ptr); - return {entry->GetBuffer(), CachedBufferOffset}; -} - -void OGLBufferCache::GrowBuffer(std::shared_ptr& entry, std::size_t new_size) { - const auto old_size = static_cast(entry->GetSize()); - if (entry->GetCapacity() < new_size) { - const auto old_buffer = entry->GetBuffer(); - OGLBuffer new_buffer = CreateBuffer(new_size, GL_STATIC_COPY); - - // Copy bits from the old buffer to the new buffer. - glCopyNamedBufferSubData(old_buffer, new_buffer.handle, 0, 0, old_size); - entry->SetCapacity(std::move(new_buffer), new_size); - } - // Upload the new bits. - const auto size_diff = static_cast(new_size - old_size); - glNamedBufferSubData(entry->GetBuffer(), old_size, size_diff, entry->GetHostPtr() + old_size); - - // Update entry's size in the object and in the cache. - entry->SetSize(new_size); - Unregister(entry); - Register(entry); -} - -std::shared_ptr OGLBufferCache::GetUncachedBuffer(VAddr cpu_addr, u8* host_ptr) { - if (auto entry = TryGetReservedBuffer(host_ptr)) { - return entry; - } - return std::make_shared(cpu_addr, host_ptr); -} - -std::shared_ptr OGLBufferCache::TryGetReservedBuffer(u8* host_ptr) { - const auto it = buffer_reserve.find(ToCacheAddr(host_ptr)); - if (it == buffer_reserve.end()) { - return {}; - } - auto& reserve = it->second; - auto entry = reserve.back(); - reserve.pop_back(); - return entry; -} - -void OGLBufferCache::ReserveBuffer(std::shared_ptr entry) { - buffer_reserve[entry->GetCacheAddr()].push_back(std::move(entry)); -} - -void OGLBufferCache::AlignBuffer(std::size_t alignment) { - // Align the offset, not the mapped pointer - const GLintptr offset_aligned = - static_cast(Common::AlignUp(static_cast(buffer_offset), alignment)); - buffer_ptr += offset_aligned - buffer_offset; - buffer_offset = offset_aligned; +void OGLBufferCache::CopyBufferData(const OGLBuffer& src, const OGLBuffer& dst, + std::size_t src_offset, std::size_t dst_offset, + std::size_t size) { + glCopyNamedBufferSubData(src.handle, dst.handle, static_cast(src_offset), + static_cast(dst_offset), static_cast(size)); } } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index 19d643e41..3befdc6ab 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -4,15 +4,10 @@ #pragma once -#include -#include #include -#include -#include -#include -#include #include "common/common_types.h" +#include "video_core/buffer_cache.h" #include "video_core/rasterizer_cache.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_stream_buffer.h" @@ -23,112 +18,30 @@ class System; namespace OpenGL { +class OGLStreamBuffer; class RasterizerOpenGL; -class CachedBufferEntry final : public RasterizerCacheObject { +class OGLBufferCache final : public VideoCommon::BufferCache { public: - explicit CachedBufferEntry(VAddr cpu_addr, u8* host_ptr); - - VAddr GetCpuAddr() const override { - return cpu_addr; - } - - std::size_t GetSizeInBytes() const override { - return size; - } - - u8* GetWritableHostPtr() const { - return host_ptr; - } - - std::size_t GetSize() const { - return size; - } - - std::size_t GetCapacity() const { - return capacity; - } - - bool IsInternalized() const { - return is_internal; - } - - GLuint GetBuffer() const { - return buffer.handle; - } - - void SetSize(std::size_t new_size) { - size = new_size; - } - - void SetInternalState(bool is_internal_) { - is_internal = is_internal_; - } - - void SetCapacity(OGLBuffer&& new_buffer, std::size_t new_capacity) { - capacity = new_capacity; - buffer = std::move(new_buffer); - } - -private: - u8* host_ptr{}; - VAddr cpu_addr{}; - std::size_t size{}; - std::size_t capacity{}; - bool is_internal{}; - OGLBuffer buffer; -}; - -class OGLBufferCache final : public RasterizerCache> { - using BufferInfo = std::pair; - -public: - explicit OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system, std::size_t size); + explicit OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system, + std::size_t stream_size); ~OGLBufferCache(); - void Unregister(const std::shared_ptr& entry) override; - - /// Uploads data from a guest GPU address. Returns the OpenGL buffer where it's located and its - /// offset. - BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4, - bool internalize = false, bool is_written = false); - - /// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset. - BufferInfo UploadHostMemory(const void* raw_pointer, std::size_t size, - std::size_t alignment = 4); - - bool Map(std::size_t max_size); - void Unmap(); - protected: - // We do not have to flush this cache as things in it are never modified by us. - void FlushObjectInner(const std::shared_ptr& entry) override; + OGLBuffer CreateBuffer(std::size_t size) override; -private: - BufferInfo StreamBufferUpload(const void* raw_pointer, std::size_t size, std::size_t alignment); + const GLuint* ToHandle(const OGLBuffer& buffer) override; - BufferInfo FixedBufferUpload(GPUVAddr gpu_addr, u8* host_ptr, std::size_t size, - bool internalize, bool is_written); + const GLuint* GetEmptyBuffer(std::size_t) override; - void GrowBuffer(std::shared_ptr& entry, std::size_t new_size); + void UploadBufferData(const OGLBuffer& buffer, std::size_t offset, std::size_t size, + const u8* data) override; - std::shared_ptr GetUncachedBuffer(VAddr cpu_addr, u8* host_ptr); + void DownloadBufferData(const OGLBuffer& buffer, std::size_t offset, std::size_t size, + u8* data) override; - std::shared_ptr TryGetReservedBuffer(u8* host_ptr); - - void ReserveBuffer(std::shared_ptr entry); - - void AlignBuffer(std::size_t alignment); - - Core::System& system; - - u8* buffer_ptr = nullptr; - GLintptr buffer_offset = 0; - GLintptr buffer_offset_base = 0; - - OGLStreamBuffer stream_buffer; - std::unordered_set internalized_entries; - std::unordered_map>> buffer_reserve; + void CopyBufferData(const OGLBuffer& src, const OGLBuffer& dst, std::size_t src_offset, + std::size_t dst_offset, std::size_t size) override; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 35ba84235..b57d60856 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -198,7 +198,8 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) { const auto [vertex_buffer, vertex_buffer_offset] = buffer_cache.UploadMemory(start, size); // Bind the vertex array to the buffer at the current offset. - glVertexArrayVertexBuffer(vao, index, vertex_buffer, vertex_buffer_offset, + // FIXME(Rodrigo): This dereferenced pointer might be invalidated in future uploads. + glVertexArrayVertexBuffer(vao, index, *vertex_buffer, vertex_buffer_offset, vertex_array.stride); if (regs.instanced_arrays.IsInstancingEnabled(index) && vertex_array.divisor != 0) { @@ -221,7 +222,8 @@ GLintptr RasterizerOpenGL::SetupIndexBuffer(GLuint vao) { const auto& regs = system.GPU().Maxwell3D().regs; const std::size_t size = CalculateIndexBufferSize(); const auto [buffer, offset] = buffer_cache.UploadMemory(regs.index_array.IndexStart(), size); - glVertexArrayElementBuffer(vao, buffer); + // FIXME(Rodrigo): This dereferenced pointer might be invalidated in future uploads. + glVertexArrayElementBuffer(vao, *buffer); return offset; } @@ -255,10 +257,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { BaseBindings base_bindings; std::array clip_distances{}; - // Prepare packed bindings - bind_ubo_pushbuffer.Setup(base_bindings.cbuf); - bind_ssbo_pushbuffer.Setup(base_bindings.gmem); - for (std::size_t index = 0; index < Maxwell::MaxShaderProgram; ++index) { const auto& shader_config = gpu.regs.shader_config[index]; const Maxwell::ShaderProgram program{static_cast(index)}; @@ -328,9 +326,6 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) { base_bindings = next_bindings; } - bind_ubo_pushbuffer.Bind(); - bind_ssbo_pushbuffer.Bind(); - SyncClipEnabled(clip_distances); gpu.dirty_flags.shaders = false; @@ -644,11 +639,8 @@ void RasterizerOpenGL::DrawArrays() { buffer_size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + device.GetUniformBufferAlignment()); - const bool invalidate = buffer_cache.Map(buffer_size); - if (invalidate) { - // As all cached buffers are invalidated, we need to recheck their state. - gpu.dirty_flags.vertex_array.set(); - } + // Prepare the vertex array. + buffer_cache.Map(buffer_size); // Prepare vertex array format. const GLuint vao = SetupVertexFormat(); @@ -660,6 +652,10 @@ void RasterizerOpenGL::DrawArrays() { // Setup draw parameters. It will automatically choose what glDraw* method to use. const DrawParameters params = SetupDraw(index_buffer_offset); + // Prepare packed bindings. + bind_ubo_pushbuffer.Setup(0); + bind_ssbo_pushbuffer.Setup(0); + // Setup shaders and their used resources. texture_cache.GuardSamplers(true); SetupShaders(params.primitive_mode); @@ -667,7 +663,17 @@ void RasterizerOpenGL::DrawArrays() { ConfigureFramebuffers(state); - buffer_cache.Unmap(); + // Signal the buffer cache that we are not going to upload more things. + const bool invalidate = buffer_cache.Unmap(); + + // Now that we are no longer uploading data, we can safely bind the buffers to OpenGL. + bind_ubo_pushbuffer.Bind(); + bind_ssbo_pushbuffer.Bind(); + + if (invalidate) { + // As all cached buffers are invalidated, we need to recheck their state. + gpu.dirty_flags.vertex_array.set(); + } shader_program_manager->ApplyTo(state); state.Apply(); @@ -709,6 +715,10 @@ void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { InvalidateRegion(addr, size); } +void RasterizerOpenGL::TickFrame() { + buffer_cache.TickFrame(); +} + bool RasterizerOpenGL::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, const Tegra::Engines::Fermi2D::Regs::Surface& dst, const Tegra::Engines::Fermi2D::Config& copy_config) { diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index bc988727b..7067ad5b4 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -62,6 +62,7 @@ public: void FlushRegion(CacheAddr addr, u64 size) override; void InvalidateRegion(CacheAddr addr, u64 size) override; void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; + void TickFrame() override; bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, const Tegra::Engines::Fermi2D::Regs::Surface& dst, const Tegra::Engines::Fermi2D::Config& copy_config) override; diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index b142521ec..9ecdddb0d 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -101,7 +101,6 @@ RendererOpenGL::RendererOpenGL(Core::Frontend::EmuWindow& emu_window, Core::Syst RendererOpenGL::~RendererOpenGL() = default; -/// Swap buffers (render frame) void RendererOpenGL::SwapBuffers( std::optional> framebuffer) { @@ -130,6 +129,8 @@ void RendererOpenGL::SwapBuffers( DrawScreen(render_window.GetFramebufferLayout()); + rasterizer->TickFrame(); + render_window.SwapBuffers(); } @@ -262,7 +263,6 @@ void RendererOpenGL::CreateRasterizer() { if (rasterizer) { return; } - // Initialize sRGB Usage OpenGLState::ClearsRGBUsed(); rasterizer = std::make_unique(system, emu_window, screen_info); } diff --git a/src/video_core/renderer_opengl/utils.cpp b/src/video_core/renderer_opengl/utils.cpp index 68c36988d..22eefa1d7 100644 --- a/src/video_core/renderer_opengl/utils.cpp +++ b/src/video_core/renderer_opengl/utils.cpp @@ -19,23 +19,30 @@ BindBuffersRangePushBuffer::~BindBuffersRangePushBuffer() = default; void BindBuffersRangePushBuffer::Setup(GLuint first_) { first = first_; - buffers.clear(); + buffer_pointers.clear(); offsets.clear(); sizes.clear(); } -void BindBuffersRangePushBuffer::Push(GLuint buffer, GLintptr offset, GLsizeiptr size) { - buffers.push_back(buffer); +void BindBuffersRangePushBuffer::Push(const GLuint* buffer, GLintptr offset, GLsizeiptr size) { + buffer_pointers.push_back(buffer); offsets.push_back(offset); sizes.push_back(size); } -void BindBuffersRangePushBuffer::Bind() const { - const std::size_t count{buffers.size()}; +void BindBuffersRangePushBuffer::Bind() { + // Ensure sizes are valid. + const std::size_t count{buffer_pointers.size()}; DEBUG_ASSERT(count == offsets.size() && count == sizes.size()); if (count == 0) { return; } + + // Dereference buffers. + buffers.resize(count); + std::transform(buffer_pointers.begin(), buffer_pointers.end(), buffers.begin(), + [](const GLuint* pointer) { return *pointer; }); + glBindBuffersRange(target, first, static_cast(count), buffers.data(), offsets.data(), sizes.data()); } diff --git a/src/video_core/renderer_opengl/utils.h b/src/video_core/renderer_opengl/utils.h index 4a752f3b4..d2a3d25d9 100644 --- a/src/video_core/renderer_opengl/utils.h +++ b/src/video_core/renderer_opengl/utils.h @@ -11,20 +11,22 @@ namespace OpenGL { -class BindBuffersRangePushBuffer { +class BindBuffersRangePushBuffer final { public: - BindBuffersRangePushBuffer(GLenum target); + explicit BindBuffersRangePushBuffer(GLenum target); ~BindBuffersRangePushBuffer(); void Setup(GLuint first_); - void Push(GLuint buffer, GLintptr offset, GLsizeiptr size); + void Push(const GLuint* buffer, GLintptr offset, GLsizeiptr size); - void Bind() const; + void Bind(); private: - GLenum target; - GLuint first; + GLenum target{}; + GLuint first{}; + std::vector buffer_pointers; + std::vector buffers; std::vector offsets; std::vector sizes; From 9cdc576f6055cbb308551f09e3566b34233b226e Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 20 Jun 2019 03:44:06 -0300 Subject: [PATCH 13/17] gl_rasterizer: Fix vertex and index data invalidations --- .../renderer_opengl/gl_rasterizer.cpp | 14 ++++----- .../renderer_opengl/gl_rasterizer.h | 3 +- src/video_core/renderer_opengl/utils.cpp | 31 +++++++++++++++++++ src/video_core/renderer_opengl/utils.h | 27 ++++++++++++++++ 4 files changed, 67 insertions(+), 8 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index b57d60856..f3527d65b 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -198,9 +198,8 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) { const auto [vertex_buffer, vertex_buffer_offset] = buffer_cache.UploadMemory(start, size); // Bind the vertex array to the buffer at the current offset. - // FIXME(Rodrigo): This dereferenced pointer might be invalidated in future uploads. - glVertexArrayVertexBuffer(vao, index, *vertex_buffer, vertex_buffer_offset, - vertex_array.stride); + vertex_array_pushbuffer.SetVertexBuffer(index, vertex_buffer, vertex_buffer_offset, + vertex_array.stride); if (regs.instanced_arrays.IsInstancingEnabled(index) && vertex_array.divisor != 0) { // Enable vertex buffer instancing with the specified divisor. @@ -214,7 +213,7 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) { gpu.dirty_flags.vertex_array.reset(); } -GLintptr RasterizerOpenGL::SetupIndexBuffer(GLuint vao) { +GLintptr RasterizerOpenGL::SetupIndexBuffer() { if (accelerate_draw != AccelDraw::Indexed) { return 0; } @@ -222,8 +221,7 @@ GLintptr RasterizerOpenGL::SetupIndexBuffer(GLuint vao) { const auto& regs = system.GPU().Maxwell3D().regs; const std::size_t size = CalculateIndexBufferSize(); const auto [buffer, offset] = buffer_cache.UploadMemory(regs.index_array.IndexStart(), size); - // FIXME(Rodrigo): This dereferenced pointer might be invalidated in future uploads. - glVertexArrayElementBuffer(vao, *buffer); + vertex_array_pushbuffer.SetIndexBuffer(buffer); return offset; } @@ -644,10 +642,11 @@ void RasterizerOpenGL::DrawArrays() { // Prepare vertex array format. const GLuint vao = SetupVertexFormat(); + vertex_array_pushbuffer.Setup(vao); // Upload vertex and index data. SetupVertexBuffer(vao); - const GLintptr index_buffer_offset = SetupIndexBuffer(vao); + const GLintptr index_buffer_offset = SetupIndexBuffer(); // Setup draw parameters. It will automatically choose what glDraw* method to use. const DrawParameters params = SetupDraw(index_buffer_offset); @@ -667,6 +666,7 @@ void RasterizerOpenGL::DrawArrays() { const bool invalidate = buffer_cache.Unmap(); // Now that we are no longer uploading data, we can safely bind the buffers to OpenGL. + vertex_array_pushbuffer.Bind(); bind_ubo_pushbuffer.Bind(); bind_ssbo_pushbuffer.Bind(); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 7067ad5b4..1c915fd7f 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -207,6 +207,7 @@ private: static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; OGLBufferCache buffer_cache; + VertexArrayPushBuffer vertex_array_pushbuffer; BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER}; BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER}; @@ -219,7 +220,7 @@ private: void SetupVertexBuffer(GLuint vao); - GLintptr SetupIndexBuffer(GLuint vao); + GLintptr SetupIndexBuffer(); DrawParameters SetupDraw(GLintptr index_buffer_offset); diff --git a/src/video_core/renderer_opengl/utils.cpp b/src/video_core/renderer_opengl/utils.cpp index 22eefa1d7..c504a2c1a 100644 --- a/src/video_core/renderer_opengl/utils.cpp +++ b/src/video_core/renderer_opengl/utils.cpp @@ -13,6 +13,37 @@ namespace OpenGL { +VertexArrayPushBuffer::VertexArrayPushBuffer() = default; + +VertexArrayPushBuffer::~VertexArrayPushBuffer() = default; + +void VertexArrayPushBuffer::Setup(GLuint vao_) { + vao = vao_; + index_buffer = nullptr; + vertex_buffers.clear(); +} + +void VertexArrayPushBuffer::SetIndexBuffer(const GLuint* buffer) { + index_buffer = buffer; +} + +void VertexArrayPushBuffer::SetVertexBuffer(GLuint binding_index, const GLuint* buffer, + GLintptr offset, GLsizei stride) { + vertex_buffers.push_back(Entry{binding_index, buffer, offset, stride}); +} + +void VertexArrayPushBuffer::Bind() { + if (index_buffer) { + glVertexArrayElementBuffer(vao, *index_buffer); + } + + // TODO(Rodrigo): Find a way to ARB_multi_bind this + for (const auto& entry : vertex_buffers) { + glVertexArrayVertexBuffer(vao, entry.binding_index, *entry.buffer, entry.offset, + entry.stride); + } +} + BindBuffersRangePushBuffer::BindBuffersRangePushBuffer(GLenum target) : target{target} {} BindBuffersRangePushBuffer::~BindBuffersRangePushBuffer() = default; diff --git a/src/video_core/renderer_opengl/utils.h b/src/video_core/renderer_opengl/utils.h index d2a3d25d9..6c2b45546 100644 --- a/src/video_core/renderer_opengl/utils.h +++ b/src/video_core/renderer_opengl/utils.h @@ -11,6 +11,33 @@ namespace OpenGL { +class VertexArrayPushBuffer final { +public: + explicit VertexArrayPushBuffer(); + ~VertexArrayPushBuffer(); + + void Setup(GLuint vao_); + + void SetIndexBuffer(const GLuint* buffer); + + void SetVertexBuffer(GLuint binding_index, const GLuint* buffer, GLintptr offset, + GLsizei stride); + + void Bind(); + +private: + struct Entry { + GLuint binding_index{}; + const GLuint* buffer{}; + GLintptr offset{}; + GLsizei stride{}; + }; + + GLuint vao{}; + const GLuint* index_buffer{}; + std::vector vertex_buffers; +}; + class BindBuffersRangePushBuffer final { public: explicit BindBuffersRangePushBuffer(GLenum target); From 7ecf64257aef13dcd86b01ae0c66389dc78f70bc Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 5 Jul 2019 23:11:58 -0300 Subject: [PATCH 14/17] gl_rasterizer: Minor style changes --- src/video_core/engines/maxwell_3d.h | 1 + .../renderer_opengl/gl_rasterizer.cpp | 46 ++++++++----------- .../renderer_opengl/gl_rasterizer.h | 5 -- .../renderer_opengl/gl_shader_decompiler.cpp | 2 +- 4 files changed, 22 insertions(+), 32 deletions(-) diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 13e314944..8d15c8a48 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -67,6 +67,7 @@ public: static constexpr std::size_t MaxShaderStage = 5; // Maximum number of const buffers per shader stage. static constexpr std::size_t MaxConstBuffers = 18; + static constexpr std::size_t MaxConstBufferSize = 0x10000; enum class QueryMode : u32 { Write = 0, diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index f3527d65b..b8430f16d 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -81,6 +81,21 @@ struct DrawParameters { } }; +static std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer, + const GLShader::ConstBufferEntry& entry) { + if (!entry.IsIndirect()) { + return entry.GetSize(); + } + + if (buffer.size > Maxwell::MaxConstBufferSize) { + LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", buffer.size, + Maxwell::MaxConstBufferSize); + return Maxwell::MaxConstBufferSize; + } + + return buffer.size; +} + RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window, ScreenInfo& info) : texture_cache{system, *this, device}, shader_cache{*this, system, emu_window, device}, @@ -634,8 +649,8 @@ void RasterizerOpenGL::DrawArrays() { Maxwell::MaxShaderStage; // Add space for at least 18 constant buffers - buffer_size += - Maxwell::MaxConstBuffers * (MaxConstbufferSize + device.GetUniformBufferAlignment()); + buffer_size += Maxwell::MaxConstBuffers * + (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment()); // Prepare the vertex array. buffer_cache.Map(buffer_size); @@ -762,11 +777,9 @@ void RasterizerOpenGL::SetupDrawConstBuffers(Tegra::Engines::Maxwell3D::Regs::Sh MICROPROFILE_SCOPE(OpenGL_UBO); const auto stage_index = static_cast(stage); const auto& shader_stage = system.GPU().Maxwell3D().state.shader_stages[stage_index]; - const auto& entries = shader->GetShaderEntries().const_buffers; // Upload only the enabled buffers from the 16 constbuffers of each shader stage - for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { - const auto& entry = entries[bindpoint]; + for (const auto& entry : shader->GetShaderEntries().const_buffers) { SetupConstBuffer(shader_stage.const_buffers[entry.GetIndex()], entry); } } @@ -779,25 +792,9 @@ void RasterizerOpenGL::SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& b return; } - std::size_t size; - if (entry.IsIndirect()) { - // Buffer is accessed indirectly, so upload the entire thing - size = buffer.size; - - if (size > MaxConstbufferSize) { - LOG_WARNING(Render_OpenGL, "Indirect constbuffer size {} exceeds maximum {}", size, - MaxConstbufferSize); - size = MaxConstbufferSize; - } - } else { - // Buffer is accessed directly, upload just what we use - size = entry.GetSize(); - } - // Align the actual size so it ends up being a multiple of vec4 to meet the OpenGL std140 // UBO alignment requirements. - size = Common::AlignUp(size, sizeof(GLvec4)); - ASSERT_MSG(size <= MaxConstbufferSize, "Constant buffer is too big"); + const std::size_t size = Common::AlignUp(GetConstBufferSize(buffer, entry), sizeof(GLvec4)); const auto alignment = device.GetUniformBufferAlignment(); const auto [cbuf, offset] = buffer_cache.UploadMemory(buffer.address, size, alignment); @@ -811,10 +808,7 @@ void RasterizerOpenGL::SetupGlobalRegions(Tegra::Engines::Maxwell3D::Regs::Shade const auto cbufs{gpu.Maxwell3D().state.shader_stages[static_cast(stage)]}; const auto alignment{device.GetShaderStorageBufferAlignment()}; - const auto& entries = shader->GetShaderEntries().global_memory_entries; - for (std::size_t bindpoint = 0; bindpoint < entries.size(); ++bindpoint) { - const auto& entry{entries[bindpoint]}; - + for (const auto& entry : shader->GetShaderEntries().global_memory_entries) { const auto addr{cbufs.const_buffers[entry.GetCbufIndex()].address + entry.GetCbufOffset()}; const auto actual_addr{memory_manager.Read(addr)}; const auto size{memory_manager.Read(addr + 8)}; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 1c915fd7f..172cfe8f6 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -73,11 +73,6 @@ public: void LoadDiskResources(const std::atomic_bool& stop_loading, const VideoCore::DiskResourceLoadCallback& callback) override; - /// Maximum supported size that a constbuffer can have in bytes. - static constexpr std::size_t MaxConstbufferSize = 0x10000; - static_assert(MaxConstbufferSize % sizeof(GLvec4) == 0, - "The maximum size of a constbuffer must be a multiple of the size of GLvec4"); - private: struct FramebufferConfigState { bool using_color_fb{}; diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 5f2f1510c..592525d11 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -46,7 +46,7 @@ using TextureArgument = std::pair; using TextureIR = std::variant; constexpr u32 MAX_CONSTBUFFER_ELEMENTS = - static_cast(RasterizerOpenGL::MaxConstbufferSize) / (4 * sizeof(float)); + static_cast(Maxwell::MaxConstBufferSize) / (4 * sizeof(float)); class ShaderWriter { public: From f7691ebe57d083f12969b5e186154a301f7bfae1 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 5 Jul 2019 23:49:45 -0300 Subject: [PATCH 15/17] gl_rasterizer: Fix nullptr dereference on disabled buffers --- src/video_core/buffer_cache.h | 4 ++-- src/video_core/renderer_opengl/gl_buffer_cache.h | 4 ++-- src/video_core/renderer_opengl/gl_rasterizer.cpp | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/video_core/buffer_cache.h b/src/video_core/buffer_cache.h index eb0ec45c2..e51f2418c 100644 --- a/src/video_core/buffer_cache.h +++ b/src/video_core/buffer_cache.h @@ -162,6 +162,8 @@ public: return std::exchange(invalidated, false); } + virtual const BufferType* GetEmptyBuffer(std::size_t size) = 0; + protected: void FlushObjectInner(const Buffer& entry) override { DownloadBufferData(entry->GetBuffer(), 0, entry->GetSize(), entry->GetWritableHostPtr()); @@ -171,8 +173,6 @@ protected: virtual const BufferType* ToHandle(const BufferStorageType& storage) = 0; - virtual const BufferType* GetEmptyBuffer(std::size_t size) = 0; - virtual void UploadBufferData(const BufferStorageType& buffer, std::size_t offset, std::size_t size, const u8* data) = 0; diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index 3befdc6ab..8c8ac4038 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -27,13 +27,13 @@ public: std::size_t stream_size); ~OGLBufferCache(); + const GLuint* GetEmptyBuffer(std::size_t) override; + protected: OGLBuffer CreateBuffer(std::size_t size) override; const GLuint* ToHandle(const OGLBuffer& buffer) override; - const GLuint* GetEmptyBuffer(std::size_t) override; - void UploadBufferData(const OGLBuffer& buffer, std::size_t offset, std::size_t size, const u8* data) override; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index b8430f16d..0bb5c068c 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -788,7 +788,7 @@ void RasterizerOpenGL::SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& b const GLShader::ConstBufferEntry& entry) { if (!buffer.enabled) { // Set values to zero to unbind buffers - bind_ubo_pushbuffer.Push(0, 0, 0); + bind_ubo_pushbuffer.Push(buffer_cache.GetEmptyBuffer(sizeof(float)), 0, sizeof(float)); return; } From 83050c949594fba1248a73c8a2c343f970bd3f06 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 6 Jul 2019 01:14:05 -0300 Subject: [PATCH 16/17] buffer_cache: Try to fix MinGW build --- src/video_core/buffer_cache.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/video_core/buffer_cache.h b/src/video_core/buffer_cache.h index e51f2418c..fa2238a92 100644 --- a/src/video_core/buffer_cache.h +++ b/src/video_core/buffer_cache.h @@ -250,7 +250,7 @@ private: if (auto entry = TryGetReservedBuffer(host_ptr)) { return entry; } - return std::make_shared(cpu_addr, host_ptr); + return std::make_shared>(cpu_addr, host_ptr); } Buffer TryGetReservedBuffer(u8* host_ptr) { From 79a23ca5f0e049deaebdc9dda5a3fd689560a16c Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 6 Jul 2019 01:17:05 -0300 Subject: [PATCH 17/17] buffer_cache: Avoid [[nodiscard]] to make clang-format happy --- src/video_core/buffer_cache.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/video_core/buffer_cache.h b/src/video_core/buffer_cache.h index fa2238a92..6f868b8b4 100644 --- a/src/video_core/buffer_cache.h +++ b/src/video_core/buffer_cache.h @@ -110,9 +110,8 @@ public: MarkedForDestruction().clear(); } - [[nodiscard]] BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, - std::size_t alignment = 4, bool internalize = false, - bool is_written = false) { + BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4, + bool internalize = false, bool is_written = false) { std::lock_guard lock{RasterizerCache::mutex}; auto& memory_manager = system.GPU().MemoryManager(); @@ -145,8 +144,8 @@ public: } /// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset. - [[nodiscard]] BufferInfo UploadHostMemory(const void* raw_pointer, std::size_t size, - std::size_t alignment = 4) { + BufferInfo UploadHostMemory(const void* raw_pointer, std::size_t size, + std::size_t alignment = 4) { std::lock_guard lock{RasterizerCache::mutex}; return StreamBufferUpload(raw_pointer, size, alignment); }