diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index b4277ef73..1219ca6ea 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -7,90 +7,165 @@ #include #include "common/alignment.h" +#include "common/assert.h" #include "core/core.h" #include "video_core/memory_manager.h" #include "video_core/renderer_opengl/gl_buffer_cache.h" #include "video_core/renderer_opengl/gl_rasterizer.h" +#include "video_core/renderer_opengl/gl_resource_manager.h" namespace OpenGL { -CachedBufferEntry::CachedBufferEntry(VAddr cpu_addr, u8* host_ptr, std::size_t size, - std::size_t alignment, GLuint buffer, GLintptr offset) - : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, size{size}, alignment{alignment}, - buffer{buffer}, offset{offset} {} +namespace { + +constexpr GLuint EmptyBuffer = 0; +constexpr GLintptr CachedBufferOffset = 0; + +OGLBuffer CreateBuffer(std::size_t size, GLenum usage) { + OGLBuffer buffer; + buffer.Create(); + glNamedBufferData(buffer.handle, size, nullptr, usage); + return buffer; +} + +} // Anonymous namespace + +CachedBufferEntry::CachedBufferEntry(VAddr cpu_addr, u8* host_ptr) + : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr} {} OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size) : RasterizerCache{rasterizer}, stream_buffer(size, true) {} -std::pair OGLBufferCache::UploadMemory(GPUVAddr gpu_addr, std::size_t size, - std::size_t alignment, bool cache) { +OGLBufferCache::~OGLBufferCache() = default; + +void OGLBufferCache::Unregister(const std::shared_ptr& entry) { + std::lock_guard lock{mutex}; + + if (entry->IsInternalized()) { + internalized_entries.erase(entry->GetCacheAddr()); + } + ReserveBuffer(entry); + RasterizerCache>::Unregister(entry); +} + +OGLBufferCache::BufferInfo OGLBufferCache::UploadMemory(GPUVAddr gpu_addr, std::size_t size, + std::size_t alignment, bool internalize) { std::lock_guard lock{mutex}; auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager(); - - const auto& host_ptr{memory_manager.GetPointer(gpu_addr)}; + const auto host_ptr{memory_manager.GetPointer(gpu_addr)}; + const auto cache_addr{ToCacheAddr(host_ptr)}; if (!host_ptr) { - // Return a dummy buffer when host_ptr is invalid. - return {0, 0}; + return {EmptyBuffer, 0}; } // Cache management is a big overhead, so only cache entries with a given size. // TODO: Figure out which size is the best for given games. - cache &= size >= 2048; - - if (cache) { - if (auto entry = TryGet(host_ptr); entry) { - if (entry->GetSize() >= size && entry->GetAlignment() == alignment) { - return {entry->GetBuffer(), entry->GetOffset()}; - } - Unregister(entry); - } + if (!internalize && size < 0x800 && + internalized_entries.find(cache_addr) == internalized_entries.end()) { + return StreamBufferUpload(host_ptr, size, alignment); } - AlignBuffer(alignment); - const GLintptr uploaded_offset = buffer_offset; - - std::memcpy(buffer_ptr, host_ptr, size); - buffer_ptr += size; - buffer_offset += size; - - const GLuint buffer = stream_buffer.GetHandle(); - if (cache) { - const VAddr cpu_addr = *memory_manager.GpuToCpuAddress(gpu_addr); - Register(std::make_shared(cpu_addr, host_ptr, size, alignment, buffer, - uploaded_offset)); + auto entry = TryGet(host_ptr); + if (!entry) { + return FixedBufferUpload(gpu_addr, host_ptr, size, internalize); } - return {buffer, uploaded_offset}; + if (entry->GetSize() < size) { + GrowBuffer(entry, size); + } + return {entry->GetBuffer(), CachedBufferOffset}; } -std::pair OGLBufferCache::UploadHostMemory(const void* raw_pointer, - std::size_t size, - std::size_t alignment) { - std::lock_guard lock{mutex}; +OGLBufferCache::BufferInfo OGLBufferCache::UploadHostMemory(const void* raw_pointer, + std::size_t size, + std::size_t alignment) { + return StreamBufferUpload(raw_pointer, size, alignment); +} + +bool OGLBufferCache::Map(std::size_t max_size) { + const auto max_size_ = static_cast(max_size); + bool invalidate; + std::tie(buffer_ptr, buffer_offset_base, invalidate) = stream_buffer.Map(max_size_, 4); + buffer_offset = buffer_offset_base; + return invalidate; +} + +void OGLBufferCache::Unmap() { + stream_buffer.Unmap(buffer_offset - buffer_offset_base); +} + +OGLBufferCache::BufferInfo OGLBufferCache::StreamBufferUpload(const void* raw_pointer, + std::size_t size, + std::size_t alignment) { AlignBuffer(alignment); - std::memcpy(buffer_ptr, raw_pointer, size); const GLintptr uploaded_offset = buffer_offset; + std::memcpy(buffer_ptr, raw_pointer, size); buffer_ptr += size; buffer_offset += size; return {stream_buffer.GetHandle(), uploaded_offset}; } -bool OGLBufferCache::Map(std::size_t max_size) { - bool invalidate; - std::tie(buffer_ptr, buffer_offset_base, invalidate) = - stream_buffer.Map(static_cast(max_size), 4); - buffer_offset = buffer_offset_base; - - if (invalidate) { - InvalidateAll(); +OGLBufferCache::BufferInfo OGLBufferCache::FixedBufferUpload(GPUVAddr gpu_addr, u8* host_ptr, + std::size_t size, bool internalize) { + if (internalize) { + internalized_entries.emplace(ToCacheAddr(host_ptr)); } - return invalidate; + auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager(); + const auto cpu_addr = *memory_manager.GpuToCpuAddress(gpu_addr); + auto entry = GetUncachedBuffer(cpu_addr, host_ptr); + entry->SetSize(size); + entry->SetInternalState(internalize); + Register(entry); + + if (entry->GetCapacity() < size) { + entry->SetCapacity(CreateBuffer(size, GL_STATIC_DRAW), size); + } + glNamedBufferSubData(entry->GetBuffer(), 0, static_cast(size), host_ptr); + return {entry->GetBuffer(), CachedBufferOffset}; } -void OGLBufferCache::Unmap() { - stream_buffer.Unmap(buffer_offset - buffer_offset_base); +void OGLBufferCache::GrowBuffer(std::shared_ptr& entry, std::size_t new_size) { + const auto old_size = static_cast(entry->GetSize()); + if (entry->GetCapacity() < new_size) { + const auto old_buffer = entry->GetBuffer(); + OGLBuffer new_buffer = CreateBuffer(new_size, GL_STATIC_COPY); + + // Copy bits from the old buffer to the new buffer. + glCopyNamedBufferSubData(old_buffer, new_buffer.handle, 0, 0, old_size); + entry->SetCapacity(std::move(new_buffer), new_size); + } + // Upload the new bits. + const auto size_diff = static_cast(new_size - old_size); + glNamedBufferSubData(entry->GetBuffer(), old_size, size_diff, entry->GetHostPtr() + old_size); + + // Update entry's size in the object and in the cache. + entry->SetSize(new_size); + Unregister(entry); + Register(entry); +} + +std::shared_ptr OGLBufferCache::GetUncachedBuffer(VAddr cpu_addr, u8* host_ptr) { + if (auto entry = TryGetReservedBuffer(host_ptr); entry) { + return entry; + } + return std::make_shared(cpu_addr, host_ptr); +} + +std::shared_ptr OGLBufferCache::TryGetReservedBuffer(u8* host_ptr) { + const auto it = buffer_reserve.find(ToCacheAddr(host_ptr)); + if (it == buffer_reserve.end()) { + return {}; + } + auto& reserve = it->second; + auto entry = reserve.back(); + reserve.pop_back(); + return entry; +} + +void OGLBufferCache::ReserveBuffer(std::shared_ptr entry) { + buffer_reserve[entry->GetCacheAddr()].push_back(std::move(entry)); } void OGLBufferCache::AlignBuffer(std::size_t alignment) { diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index 4a055035a..00bc6008a 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -5,9 +5,12 @@ #pragma once #include +#include #include #include +#include #include +#include #include "common/common_types.h" #include "video_core/rasterizer_cache.h" @@ -20,8 +23,7 @@ class RasterizerOpenGL; class CachedBufferEntry final : public RasterizerCacheObject { public: - explicit CachedBufferEntry(VAddr cpu_addr, u8* host_ptr, std::size_t size, - std::size_t alignment, GLuint buffer, GLintptr offset); + explicit CachedBufferEntry(VAddr cpu_addr, u8* host_ptr); VAddr GetCpuAddr() const override { return cpu_addr; @@ -35,55 +37,87 @@ public: return size; } - std::size_t GetAlignment() const { - return alignment; + std::size_t GetCapacity() const { + return capacity; + } + + bool IsInternalized() const { + return is_internal; } GLuint GetBuffer() const { - return buffer; + return buffer.handle; } - GLintptr GetOffset() const { - return offset; + void SetSize(std::size_t new_size) { + size = new_size; + } + + void SetInternalState(bool is_internal_) { + is_internal = is_internal_; + } + + void SetCapacity(OGLBuffer&& new_buffer, std::size_t new_capacity) { + capacity = new_capacity; + buffer = std::move(new_buffer); } private: VAddr cpu_addr{}; std::size_t size{}; - std::size_t alignment{}; - - GLuint buffer{}; - GLintptr offset{}; + std::size_t capacity{}; + bool is_internal{}; + OGLBuffer buffer; }; class OGLBufferCache final : public RasterizerCache> { + using BufferInfo = std::pair; + public: explicit OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size); + ~OGLBufferCache(); + + void Unregister(const std::shared_ptr& entry) override; /// Uploads data from a guest GPU address. Returns the OpenGL buffer where it's located and its /// offset. - std::pair UploadMemory(GPUVAddr gpu_addr, std::size_t size, - std::size_t alignment = 4, bool cache = true); + BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4, + bool internalize = false); /// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset. - std::pair UploadHostMemory(const void* raw_pointer, std::size_t size, - std::size_t alignment = 4); + BufferInfo UploadHostMemory(const void* raw_pointer, std::size_t size, + std::size_t alignment = 4); bool Map(std::size_t max_size); void Unmap(); protected: - void AlignBuffer(std::size_t alignment); - // We do not have to flush this cache as things in it are never modified by us. void FlushObjectInner(const std::shared_ptr& object) override {} private: - OGLStreamBuffer stream_buffer; + BufferInfo StreamBufferUpload(const void* raw_pointer, std::size_t size, std::size_t alignment); + + BufferInfo FixedBufferUpload(GPUVAddr gpu_addr, u8* host_ptr, std::size_t size, + bool internalize); + + void GrowBuffer(std::shared_ptr& entry, std::size_t new_size); + + std::shared_ptr GetUncachedBuffer(VAddr cpu_addr, u8* host_ptr); + + std::shared_ptr TryGetReservedBuffer(u8* host_ptr); + + void ReserveBuffer(std::shared_ptr entry); + + void AlignBuffer(std::size_t alignment); u8* buffer_ptr = nullptr; GLintptr buffer_offset = 0; GLintptr buffer_offset_base = 0; + + OGLStreamBuffer stream_buffer; + std::unordered_set internalized_entries; + std::unordered_map>> buffer_reserve; }; } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index d694dacfb..e216163e1 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -790,7 +790,7 @@ void RasterizerOpenGL::SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& b size = Common::AlignUp(size, sizeof(GLvec4)); ASSERT_MSG(size <= MaxConstbufferSize, "Constant buffer is too big"); - const std::size_t alignment = device.GetUniformBufferAlignment(); + const auto alignment = device.GetUniformBufferAlignment(); const auto [cbuf, offset] = buffer_cache.UploadMemory(buffer.address, size, alignment); bind_ubo_pushbuffer.Push(cbuf, offset, size); }