From 862bec001b7ada13ba0e97f95d6ad108ae8a8d0c Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 19 Jul 2019 10:50:40 -0400 Subject: [PATCH 1/7] Video_Core: Implement a new Buffer Cache --- src/video_core/CMakeLists.txt | 4 +- src/video_core/buffer_cache.h | 299 -------------- src/video_core/buffer_cache/buffer_block.h | 78 ++++ src/video_core/buffer_cache/buffer_cache.h | 372 ++++++++++++++++++ src/video_core/buffer_cache/map_interval.h | 48 +++ src/video_core/gpu.h | 4 + .../renderer_opengl/gl_buffer_cache.cpp | 47 ++- .../renderer_opengl/gl_buffer_cache.h | 33 +- .../renderer_opengl/gl_rasterizer.cpp | 2 +- 9 files changed, 560 insertions(+), 327 deletions(-) delete mode 100644 src/video_core/buffer_cache.h create mode 100644 src/video_core/buffer_cache/buffer_block.h create mode 100644 src/video_core/buffer_cache/buffer_cache.h create mode 100644 src/video_core/buffer_cache/map_interval.h diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 7c18c27b3..e73ecb2fe 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -1,5 +1,7 @@ add_library(video_core STATIC - buffer_cache.h + buffer_cache/buffer_block.h + buffer_cache/buffer_cache.h + buffer_cache/map_interval.h dma_pusher.cpp dma_pusher.h debug_utils/debug_utils.cpp diff --git a/src/video_core/buffer_cache.h b/src/video_core/buffer_cache.h deleted file mode 100644 index 6f868b8b4..000000000 --- a/src/video_core/buffer_cache.h +++ /dev/null @@ -1,299 +0,0 @@ -// Copyright 2019 yuzu Emulator Project -// Licensed under GPLv2 or any later version -// Refer to the license.txt file included. - -#pragma once - -#include -#include -#include -#include -#include -#include -#include - -#include "common/alignment.h" -#include "common/common_types.h" -#include "core/core.h" -#include "video_core/memory_manager.h" -#include "video_core/rasterizer_cache.h" - -namespace VideoCore { -class RasterizerInterface; -} - -namespace VideoCommon { - -template -class CachedBuffer final : public RasterizerCacheObject { -public: - explicit CachedBuffer(VAddr cpu_addr, u8* host_ptr) - : RasterizerCacheObject{host_ptr}, host_ptr{host_ptr}, cpu_addr{cpu_addr} {} - ~CachedBuffer() override = default; - - VAddr GetCpuAddr() const override { - return cpu_addr; - } - - std::size_t GetSizeInBytes() const override { - return size; - } - - u8* GetWritableHostPtr() const { - return host_ptr; - } - - std::size_t GetSize() const { - return size; - } - - std::size_t GetCapacity() const { - return capacity; - } - - bool IsInternalized() const { - return is_internal; - } - - const BufferStorageType& GetBuffer() const { - return buffer; - } - - void SetSize(std::size_t new_size) { - size = new_size; - } - - void SetInternalState(bool is_internal_) { - is_internal = is_internal_; - } - - BufferStorageType ExchangeBuffer(BufferStorageType buffer_, std::size_t new_capacity) { - capacity = new_capacity; - std::swap(buffer, buffer_); - return buffer_; - } - -private: - u8* host_ptr{}; - VAddr cpu_addr{}; - std::size_t size{}; - std::size_t capacity{}; - bool is_internal{}; - BufferStorageType buffer; -}; - -template -class BufferCache : public RasterizerCache>> { -public: - using Buffer = std::shared_ptr>; - using BufferInfo = std::pair; - - explicit BufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, - std::unique_ptr stream_buffer) - : RasterizerCache{rasterizer}, system{system}, - stream_buffer{std::move(stream_buffer)}, stream_buffer_handle{ - this->stream_buffer->GetHandle()} {} - ~BufferCache() = default; - - void Unregister(const Buffer& entry) override { - std::lock_guard lock{RasterizerCache::mutex}; - if (entry->IsInternalized()) { - internalized_entries.erase(entry->GetCacheAddr()); - } - ReserveBuffer(entry); - RasterizerCache::Unregister(entry); - } - - void TickFrame() { - marked_for_destruction_index = - (marked_for_destruction_index + 1) % marked_for_destruction_ring_buffer.size(); - MarkedForDestruction().clear(); - } - - BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4, - bool internalize = false, bool is_written = false) { - std::lock_guard lock{RasterizerCache::mutex}; - - auto& memory_manager = system.GPU().MemoryManager(); - const auto host_ptr = memory_manager.GetPointer(gpu_addr); - if (!host_ptr) { - return {GetEmptyBuffer(size), 0}; - } - const auto cache_addr = ToCacheAddr(host_ptr); - - // Cache management is a big overhead, so only cache entries with a given size. - // TODO: Figure out which size is the best for given games. - constexpr std::size_t max_stream_size = 0x800; - if (!internalize && size < max_stream_size && - internalized_entries.find(cache_addr) == internalized_entries.end()) { - return StreamBufferUpload(host_ptr, size, alignment); - } - - auto entry = RasterizerCache::TryGet(cache_addr); - if (!entry) { - return FixedBufferUpload(gpu_addr, host_ptr, size, internalize, is_written); - } - - if (entry->GetSize() < size) { - IncreaseBufferSize(entry, size); - } - if (is_written) { - entry->MarkAsModified(true, *this); - } - return {ToHandle(entry->GetBuffer()), 0}; - } - - /// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset. - BufferInfo UploadHostMemory(const void* raw_pointer, std::size_t size, - std::size_t alignment = 4) { - std::lock_guard lock{RasterizerCache::mutex}; - return StreamBufferUpload(raw_pointer, size, alignment); - } - - void Map(std::size_t max_size) { - std::tie(buffer_ptr, buffer_offset_base, invalidated) = stream_buffer->Map(max_size, 4); - buffer_offset = buffer_offset_base; - } - - /// Finishes the upload stream, returns true on bindings invalidation. - bool Unmap() { - stream_buffer->Unmap(buffer_offset - buffer_offset_base); - return std::exchange(invalidated, false); - } - - virtual const BufferType* GetEmptyBuffer(std::size_t size) = 0; - -protected: - void FlushObjectInner(const Buffer& entry) override { - DownloadBufferData(entry->GetBuffer(), 0, entry->GetSize(), entry->GetWritableHostPtr()); - } - - virtual BufferStorageType CreateBuffer(std::size_t size) = 0; - - virtual const BufferType* ToHandle(const BufferStorageType& storage) = 0; - - virtual void UploadBufferData(const BufferStorageType& buffer, std::size_t offset, - std::size_t size, const u8* data) = 0; - - virtual void DownloadBufferData(const BufferStorageType& buffer, std::size_t offset, - std::size_t size, u8* data) = 0; - - virtual void CopyBufferData(const BufferStorageType& src, const BufferStorageType& dst, - std::size_t src_offset, std::size_t dst_offset, - std::size_t size) = 0; - -private: - BufferInfo StreamBufferUpload(const void* raw_pointer, std::size_t size, - std::size_t alignment) { - AlignBuffer(alignment); - const std::size_t uploaded_offset = buffer_offset; - std::memcpy(buffer_ptr, raw_pointer, size); - - buffer_ptr += size; - buffer_offset += size; - return {&stream_buffer_handle, uploaded_offset}; - } - - BufferInfo FixedBufferUpload(GPUVAddr gpu_addr, u8* host_ptr, std::size_t size, - bool internalize, bool is_written) { - auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager(); - const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr); - ASSERT(cpu_addr); - - auto entry = GetUncachedBuffer(*cpu_addr, host_ptr); - entry->SetSize(size); - entry->SetInternalState(internalize); - RasterizerCache::Register(entry); - - if (internalize) { - internalized_entries.emplace(ToCacheAddr(host_ptr)); - } - if (is_written) { - entry->MarkAsModified(true, *this); - } - - if (entry->GetCapacity() < size) { - MarkedForDestruction().push_back(entry->ExchangeBuffer(CreateBuffer(size), size)); - } - - UploadBufferData(entry->GetBuffer(), 0, size, host_ptr); - return {ToHandle(entry->GetBuffer()), 0}; - } - - void IncreaseBufferSize(Buffer& entry, std::size_t new_size) { - const std::size_t old_size = entry->GetSize(); - if (entry->GetCapacity() < new_size) { - const auto& old_buffer = entry->GetBuffer(); - auto new_buffer = CreateBuffer(new_size); - - // Copy bits from the old buffer to the new buffer. - CopyBufferData(old_buffer, new_buffer, 0, 0, old_size); - MarkedForDestruction().push_back( - entry->ExchangeBuffer(std::move(new_buffer), new_size)); - - // This buffer could have been used - invalidated = true; - } - // Upload the new bits. - const std::size_t size_diff = new_size - old_size; - UploadBufferData(entry->GetBuffer(), old_size, size_diff, entry->GetHostPtr() + old_size); - - // Update entry's size in the object and in the cache. - Unregister(entry); - - entry->SetSize(new_size); - RasterizerCache::Register(entry); - } - - Buffer GetUncachedBuffer(VAddr cpu_addr, u8* host_ptr) { - if (auto entry = TryGetReservedBuffer(host_ptr)) { - return entry; - } - return std::make_shared>(cpu_addr, host_ptr); - } - - Buffer TryGetReservedBuffer(u8* host_ptr) { - const auto it = buffer_reserve.find(ToCacheAddr(host_ptr)); - if (it == buffer_reserve.end()) { - return {}; - } - auto& reserve = it->second; - auto entry = reserve.back(); - reserve.pop_back(); - return entry; - } - - void ReserveBuffer(Buffer entry) { - buffer_reserve[entry->GetCacheAddr()].push_back(std::move(entry)); - } - - void AlignBuffer(std::size_t alignment) { - // Align the offset, not the mapped pointer - const std::size_t offset_aligned = Common::AlignUp(buffer_offset, alignment); - buffer_ptr += offset_aligned - buffer_offset; - buffer_offset = offset_aligned; - } - - std::vector& MarkedForDestruction() { - return marked_for_destruction_ring_buffer[marked_for_destruction_index]; - } - - Core::System& system; - - std::unique_ptr stream_buffer; - BufferType stream_buffer_handle{}; - - bool invalidated = false; - - u8* buffer_ptr = nullptr; - u64 buffer_offset = 0; - u64 buffer_offset_base = 0; - - std::size_t marked_for_destruction_index = 0; - std::array, 4> marked_for_destruction_ring_buffer; - - std::unordered_set internalized_entries; - std::unordered_map> buffer_reserve; -}; - -} // namespace VideoCommon diff --git a/src/video_core/buffer_cache/buffer_block.h b/src/video_core/buffer_cache/buffer_block.h new file mode 100644 index 000000000..2c739a586 --- /dev/null +++ b/src/video_core/buffer_cache/buffer_block.h @@ -0,0 +1,78 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include + +#include "common/alignment.h" +#include "common/common_types.h" +#include "video_core/gpu.h" + +namespace VideoCommon { + +class BufferBlock { +public: + bool Overlaps(const CacheAddr start, const CacheAddr end) const { + return (cache_addr < end) && (cache_addr_end > start); + } + + bool IsInside(const CacheAddr other_start, const CacheAddr other_end) { + return (cache_addr <= other_start && other_end <= cache_addr_end); + } + + u8* GetWritableHostPtr() const { + return FromCacheAddr(cache_addr); + } + + u8* GetWritableHostPtr(std::size_t offset) const { + return FromCacheAddr(cache_addr + offset); + } + + std::size_t GetOffset(const CacheAddr in_addr) { + return static_cast(in_addr - cache_addr); + } + + CacheAddr GetCacheAddr() const { + return cache_addr; + } + + CacheAddr GetCacheAddrEnd() const { + return cache_addr_end; + } + + void SetCacheAddr(const CacheAddr new_addr) { + cache_addr = new_addr; + cache_addr_end = new_addr + size; + } + + std::size_t GetSize() const { + return size; + } + + void SetEpoch(u64 new_epoch) { + epoch = new_epoch; + } + + u64 GetEpoch() { + return epoch; + } + +protected: + explicit BufferBlock(CacheAddr cache_addr,const std::size_t size) + : size{size} { + SetCacheAddr(cache_addr); + } + ~BufferBlock() = default; + +private: + CacheAddr cache_addr{}; + CacheAddr cache_addr_end{}; + u64 pages{}; + std::size_t size{}; + u64 epoch{}; +}; + +} // namespace VideoCommon diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h new file mode 100644 index 000000000..6c467eb80 --- /dev/null +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -0,0 +1,372 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include "common/alignment.h" +#include "common/common_types.h" +#include "core/core.h" +#include "video_core/buffer_cache/map_interval.h" +#include "video_core/buffer_cache/buffer_block.h" +#include "video_core/memory_manager.h" + +namespace VideoCore { +class RasterizerInterface; +} + +namespace VideoCommon { + +template +class BufferCache { +public: + using BufferInfo = std::pair; + + BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4, + bool is_written = false) { + std::lock_guard lock{mutex}; + + auto& memory_manager = system.GPU().MemoryManager(); + const auto host_ptr = memory_manager.GetPointer(gpu_addr); + if (!host_ptr) { + return {GetEmptyBuffer(size), 0}; + } + const auto cache_addr = ToCacheAddr(host_ptr); + + auto block = GetBlock(cache_addr, size); + MapAddress(block, gpu_addr, cache_addr, size, is_written); + + const u64 offset = static_cast(block->GetOffset(cache_addr)); + + return {ToHandle(block), offset}; + } + + /// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset. + BufferInfo UploadHostMemory(const void* raw_pointer, std::size_t size, + std::size_t alignment = 4) { + std::lock_guard lock{mutex}; + return StreamBufferUpload(raw_pointer, size, alignment); + } + + void Map(std::size_t max_size) { + std::tie(buffer_ptr, buffer_offset_base, invalidated) = stream_buffer->Map(max_size, 4); + buffer_offset = buffer_offset_base; + } + + /// Finishes the upload stream, returns true on bindings invalidation. + bool Unmap() { + stream_buffer->Unmap(buffer_offset - buffer_offset_base); + return std::exchange(invalidated, false); + } + + void TickFrame() { + ++epoch; + while (!pending_destruction.empty()) { + if (pending_destruction.front()->GetEpoch() + 1 > epoch) { + break; + } + pending_destruction.pop_front(); + } + } + + /// Write any cached resources overlapping the specified region back to memory + void FlushRegion(CacheAddr addr, std::size_t size) { + std::lock_guard lock{mutex}; + + // TODO + } + + /// Mark the specified region as being invalidated + void InvalidateRegion(CacheAddr addr, u64 size) { + std::lock_guard lock{mutex}; + + std::vector objects = GetMapsInRange(addr, size); + for (auto& object : objects) { + Unregister(object); + } + } + + virtual const TBufferType* GetEmptyBuffer(std::size_t size) = 0; + +protected: + explicit BufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, + std::unique_ptr stream_buffer) + : rasterizer{rasterizer}, system{system}, stream_buffer{std::move(stream_buffer)}, + stream_buffer_handle{this->stream_buffer->GetHandle()} {} + + ~BufferCache() = default; + + virtual const TBufferType* ToHandle(const TBuffer& storage) = 0; + + virtual void WriteBarrier() = 0; + + virtual TBuffer CreateBlock(CacheAddr cache_addr, std::size_t size) = 0; + + virtual void UploadBlockData(const TBuffer& buffer, std::size_t offset, std::size_t size, + const u8* data) = 0; + + virtual void DownloadBlockData(const TBuffer& buffer, std::size_t offset, std::size_t size, + u8* data) = 0; + + virtual void CopyBlock(const TBuffer& src, const TBuffer& dst, std::size_t src_offset, + std::size_t dst_offset, std::size_t size) = 0; + + /// Register an object into the cache + void Register(const MapInterval& new_interval, const GPUVAddr gpu_addr) { + const CacheAddr cache_ptr = new_interval.start; + const std::size_t size = new_interval.end - new_interval.start; + const std::optional cpu_addr = + system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr); + if (!cache_ptr || !cpu_addr) { + LOG_CRITICAL(HW_GPU, "Failed to register buffer with unmapped gpu_address 0x{:016x}", + gpu_addr); + return; + } + const IntervalType interval{new_interval.start, new_interval.end}; + mapped_addresses.insert(interval); + map_storage[new_interval] = MapInfo{gpu_addr, *cpu_addr}; + + rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1); + } + + /// Unregisters an object from the cache + void Unregister(const MapInterval& interval) { + const MapInfo info = map_storage[interval]; + const std::size_t size = interval.end - interval.start; + rasterizer.UpdatePagesCachedCount(info.cpu_addr, size, -1); + const IntervalType delete_interval{interval.start, interval.end}; + mapped_addresses.erase(delete_interval); + map_storage.erase(interval); + } + +private: + void MapAddress(const TBuffer& block, const GPUVAddr gpu_addr, const CacheAddr cache_addr, + const std::size_t size, bool is_written) { + + std::vector overlaps = GetMapsInRange(cache_addr, size); + if (overlaps.empty()) { + const CacheAddr cache_addr_end = cache_addr + size; + MapInterval new_interval{cache_addr, cache_addr_end}; + if (!is_written) { + u8* host_ptr = FromCacheAddr(cache_addr); + UploadBlockData(block, block->GetOffset(cache_addr), size, host_ptr); + } + Register(new_interval, gpu_addr); + return; + } + + if (overlaps.size() == 1) { + MapInterval current_map = overlaps[0]; + const CacheAddr cache_addr_end = cache_addr + size; + if (current_map.IsInside(cache_addr, cache_addr_end)) { + return; + } + const CacheAddr new_start = std::min(cache_addr, current_map.start); + const CacheAddr new_end = std::max(cache_addr_end, current_map.end); + const GPUVAddr new_gpu_addr = gpu_addr + new_start - cache_addr; + const std::size_t new_size = static_cast(new_end - new_start); + MapInterval new_interval{new_start, new_end}; + const std::size_t offset = current_map.start - new_start; + const std::size_t size = current_map.end - current_map.start; + // Upload the remaining data + if (!is_written) { + u8* host_ptr = FromCacheAddr(new_start); + if (new_start == cache_addr && new_end == cache_addr_end) { + std::size_t first_size = current_map.start - new_start; + if (first_size > 0) { + UploadBlockData(block, block->GetOffset(new_start), first_size, host_ptr); + } + + std::size_t second_size = new_end - current_map.end; + if (second_size > 0) { + u8* host_ptr2 = FromCacheAddr(current_map.end); + UploadBlockData(block, block->GetOffset(current_map.end), second_size, + host_ptr2); + } + } else { + if (new_start == cache_addr) { + std::size_t second_size = new_end - current_map.end; + if (second_size > 0) { + u8* host_ptr2 = FromCacheAddr(current_map.end); + UploadBlockData(block, block->GetOffset(current_map.end), second_size, + host_ptr2); + } + } else { + std::size_t first_size = current_map.start - new_start; + if (first_size > 0) { + UploadBlockData(block, block->GetOffset(new_start), first_size, host_ptr); + } + } + } + } + Unregister(current_map); + Register(new_interval, new_gpu_addr); + } else { + // Calculate new buffer parameters + GPUVAddr new_gpu_addr = gpu_addr; + CacheAddr start = cache_addr; + CacheAddr end = cache_addr + size; + for (auto& overlap : overlaps) { + start = std::min(overlap.start, start); + end = std::max(overlap.end, end); + } + new_gpu_addr = gpu_addr + start - cache_addr; + MapInterval new_interval{start, end}; + for (auto& overlap : overlaps) { + Unregister(overlap); + } + std::size_t new_size = end - start; + if (!is_written) { + u8* host_ptr = FromCacheAddr(start); + UploadBlockData(block, block->GetOffset(start), new_size, host_ptr); + } + Register(new_interval, new_gpu_addr); + } + } + + std::vector GetMapsInRange(CacheAddr addr, std::size_t size) { + if (size == 0) { + return {}; + } + + std::vector objects{}; + const IntervalType interval{addr, addr + size}; + for (auto& pair : boost::make_iterator_range(mapped_addresses.equal_range(interval))) { + objects.emplace_back(pair.lower(), pair.upper()); + } + + return objects; + } + + /// Returns a ticks counter used for tracking when cached objects were last modified + u64 GetModifiedTicks() { + return ++modified_ticks; + } + + BufferInfo StreamBufferUpload(const void* raw_pointer, std::size_t size, + std::size_t alignment) { + AlignBuffer(alignment); + const std::size_t uploaded_offset = buffer_offset; + std::memcpy(buffer_ptr, raw_pointer, size); + + buffer_ptr += size; + buffer_offset += size; + return {&stream_buffer_handle, uploaded_offset}; + } + + void AlignBuffer(std::size_t alignment) { + // Align the offset, not the mapped pointer + const std::size_t offset_aligned = Common::AlignUp(buffer_offset, alignment); + buffer_ptr += offset_aligned - buffer_offset; + buffer_offset = offset_aligned; + } + + TBuffer EnlargeBlock(TBuffer buffer) { + const std::size_t old_size = buffer->GetSize(); + const std::size_t new_size = old_size + block_page_size; + const CacheAddr cache_addr = buffer->GetCacheAddr(); + TBuffer new_buffer = CreateBlock(cache_addr, new_size); + CopyBlock(buffer, new_buffer, 0, 0, old_size); + buffer->SetEpoch(epoch); + pending_destruction.push_back(buffer); + const CacheAddr cache_addr_end = cache_addr + new_size - 1; + u64 page_start = cache_addr >> block_page_bits; + const u64 page_end = cache_addr_end >> block_page_bits; + while (page_start <= page_end) { + blocks[page_start] = new_buffer; + ++page_start; + } + return new_buffer; + } + + TBuffer MergeBlocks(TBuffer first, TBuffer second) { + const std::size_t size_1 = first->GetSize(); + const std::size_t size_2 = second->GetSize(); + const CacheAddr first_addr = first->GetCacheAddr(); + const CacheAddr second_addr = second->GetCacheAddr(); + const CacheAddr new_addr = std::min(first_addr, second_addr); + const std::size_t new_size = size_1 + size_2; + TBuffer new_buffer = CreateBlock(new_addr, new_size); + CopyBlock(first, new_buffer, 0, new_buffer->GetOffset(first_addr), size_1); + CopyBlock(second, new_buffer, 0, new_buffer->GetOffset(second_addr), size_2); + first->SetEpoch(epoch); + second->SetEpoch(epoch); + pending_destruction.push_back(first); + pending_destruction.push_back(second); + const CacheAddr cache_addr_end = new_addr + new_size - 1; + u64 page_start = new_addr >> block_page_bits; + const u64 page_end = cache_addr_end >> block_page_bits; + while (page_start <= page_end) { + blocks[page_start] = new_buffer; + ++page_start; + } + return new_buffer; + } + + TBuffer GetBlock(const CacheAddr cache_addr, const std::size_t size) { + TBuffer found{}; + const CacheAddr cache_addr_end = cache_addr + size - 1; + u64 page_start = cache_addr >> block_page_bits; + const u64 page_end = cache_addr_end >> block_page_bits; + const u64 num_pages = page_end - page_start + 1; + while (page_start <= page_end) { + auto it = blocks.find(page_start); + if (it == blocks.end()) { + if (found) { + found = EnlargeBlock(found); + } else { + const CacheAddr start_addr = (page_start << block_page_bits); + found = CreateBlock(start_addr, block_page_size); + blocks[page_start] = found; + } + } else { + if (found) { + if (found == it->second) { + ++page_start; + continue; + } + found = MergeBlocks(found, it->second); + } else { + found = it->second; + } + } + ++page_start; + } + return found; + } + + std::unique_ptr stream_buffer; + TBufferType stream_buffer_handle{}; + + bool invalidated = false; + + u8* buffer_ptr = nullptr; + u64 buffer_offset = 0; + u64 buffer_offset_base = 0; + + using IntervalCache = boost::icl::interval_set; + using IntervalType = typename IntervalCache::interval_type; + IntervalCache mapped_addresses{}; + std::unordered_map map_storage; + + static constexpr u64 block_page_bits{24}; + static constexpr u64 block_page_size{1 << block_page_bits}; + std::unordered_map blocks; + + std::list pending_destruction; + u64 epoch{}; + u64 modified_ticks{}; + VideoCore::RasterizerInterface& rasterizer; + Core::System& system; + std::recursive_mutex mutex; +}; + +} // namespace VideoCommon diff --git a/src/video_core/buffer_cache/map_interval.h b/src/video_core/buffer_cache/map_interval.h new file mode 100644 index 000000000..652a35dcd --- /dev/null +++ b/src/video_core/buffer_cache/map_interval.h @@ -0,0 +1,48 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include "common/common_types.h" +#include "video_core/gpu.h" + +namespace VideoCommon { + +struct MapInterval { + MapInterval(const CacheAddr start, const CacheAddr end) : start{start}, end{end} {} + CacheAddr start; + CacheAddr end; + bool IsInside(const CacheAddr other_start, const CacheAddr other_end) { + return (start <= other_start && other_end <= end); + } + + bool operator==(const MapInterval& rhs) const { + return std::tie(start, end) == std::tie(rhs.start, rhs.end); + } + + bool operator!=(const MapInterval& rhs) const { + return !operator==(rhs); + } +}; + +struct MapInfo { + GPUVAddr gpu_addr; + VAddr cpu_addr; +}; + +} // namespace VideoCommon + +namespace std { + +template <> +struct hash { + std::size_t operator()(const VideoCommon::MapInterval& k) const noexcept { + std::size_t a = std::hash()(k.start); + boost::hash_combine(a, std::hash()(k.end)); + return a; + } +}; + +} // namespace std diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h index 11857ff99..0baf2177c 100644 --- a/src/video_core/gpu.h +++ b/src/video_core/gpu.h @@ -19,6 +19,10 @@ inline CacheAddr ToCacheAddr(const void* host_ptr) { return reinterpret_cast(host_ptr); } +inline u8* FromCacheAddr(CacheAddr cache_addr) { + return reinterpret_cast(cache_addr); +} + namespace Core { class System; } diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index 2a9b523f5..a45d2771b 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -13,22 +13,31 @@ namespace OpenGL { +CachedBufferBlock::CachedBufferBlock(CacheAddr cache_addr, const std::size_t size) + : VideoCommon::BufferBlock{cache_addr, size} { + gl_buffer.Create(); + glNamedBufferData(gl_buffer.handle, static_cast(size), nullptr, GL_DYNAMIC_DRAW); +} + +CachedBufferBlock::~CachedBufferBlock() = default; + OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system, std::size_t stream_size) - : VideoCommon::BufferCache{ + : VideoCommon::BufferCache{ rasterizer, system, std::make_unique(stream_size, true)} {} OGLBufferCache::~OGLBufferCache() = default; -OGLBuffer OGLBufferCache::CreateBuffer(std::size_t size) { - OGLBuffer buffer; - buffer.Create(); - glNamedBufferData(buffer.handle, static_cast(size), nullptr, GL_DYNAMIC_DRAW); - return buffer; +Buffer OGLBufferCache::CreateBlock(CacheAddr cache_addr, std::size_t size) { + return std::make_shared(cache_addr, size); } -const GLuint* OGLBufferCache::ToHandle(const OGLBuffer& buffer) { - return &buffer.handle; +void OGLBufferCache::WriteBarrier() { + glMemoryBarrier(GL_ALL_BARRIER_BITS); +} + +const GLuint* OGLBufferCache::ToHandle(const Buffer& buffer) { + return buffer->GetHandle(); } const GLuint* OGLBufferCache::GetEmptyBuffer(std::size_t) { @@ -36,23 +45,23 @@ const GLuint* OGLBufferCache::GetEmptyBuffer(std::size_t) { return &null_buffer; } -void OGLBufferCache::UploadBufferData(const OGLBuffer& buffer, std::size_t offset, std::size_t size, - const u8* data) { - glNamedBufferSubData(buffer.handle, static_cast(offset), +void OGLBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, + const u8* data) { + glNamedBufferSubData(*buffer->GetHandle(), static_cast(offset), static_cast(size), data); } -void OGLBufferCache::DownloadBufferData(const OGLBuffer& buffer, std::size_t offset, - std::size_t size, u8* data) { - glGetNamedBufferSubData(buffer.handle, static_cast(offset), +void OGLBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, + u8* data) { + glGetNamedBufferSubData(*buffer->GetHandle(), static_cast(offset), static_cast(size), data); } -void OGLBufferCache::CopyBufferData(const OGLBuffer& src, const OGLBuffer& dst, - std::size_t src_offset, std::size_t dst_offset, - std::size_t size) { - glCopyNamedBufferSubData(src.handle, dst.handle, static_cast(src_offset), - static_cast(dst_offset), static_cast(size)); +void OGLBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset, + std::size_t dst_offset, std::size_t size) { + glCopyNamedBufferSubData(*src->GetHandle(), *dst->GetHandle(), + static_cast(src_offset), static_cast(dst_offset), + static_cast(size)); } } // namespace OpenGL diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index 8c8ac4038..fb93f22dd 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -7,7 +7,7 @@ #include #include "common/common_types.h" -#include "video_core/buffer_cache.h" +#include "video_core/buffer_cache/buffer_cache.h" #include "video_core/rasterizer_cache.h" #include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_stream_buffer.h" @@ -21,7 +21,24 @@ namespace OpenGL { class OGLStreamBuffer; class RasterizerOpenGL; -class OGLBufferCache final : public VideoCommon::BufferCache { +class CachedBufferBlock; + +using Buffer = std::shared_ptr; + +class CachedBufferBlock : public VideoCommon::BufferBlock { +public: + explicit CachedBufferBlock(CacheAddr cache_addr, const std::size_t size); + ~CachedBufferBlock(); + + const GLuint* GetHandle() const { + return &gl_buffer.handle; + } + +private: + OGLBuffer gl_buffer{}; +}; + +class OGLBufferCache final : public VideoCommon::BufferCache { public: explicit OGLBufferCache(RasterizerOpenGL& rasterizer, Core::System& system, std::size_t stream_size); @@ -30,17 +47,19 @@ public: const GLuint* GetEmptyBuffer(std::size_t) override; protected: - OGLBuffer CreateBuffer(std::size_t size) override; + Buffer CreateBlock(CacheAddr cache_addr, std::size_t size) override; - const GLuint* ToHandle(const OGLBuffer& buffer) override; + void WriteBarrier() override; - void UploadBufferData(const OGLBuffer& buffer, std::size_t offset, std::size_t size, + const GLuint* ToHandle(const Buffer& buffer) override; + + void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, const u8* data) override; - void DownloadBufferData(const OGLBuffer& buffer, std::size_t offset, std::size_t size, + void DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, u8* data) override; - void CopyBufferData(const OGLBuffer& src, const OGLBuffer& dst, std::size_t src_offset, + void CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset, std::size_t dst_offset, std::size_t size) override; }; diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 80cfda7e4..019583718 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -980,7 +980,7 @@ void RasterizerOpenGL::SetupGlobalMemory(const GLShader::GlobalMemoryEntry& entr GPUVAddr gpu_addr, std::size_t size) { const auto alignment{device.GetShaderStorageBufferAlignment()}; const auto [ssbo, buffer_offset] = - buffer_cache.UploadMemory(gpu_addr, size, alignment, true, entry.IsWritten()); + buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.IsWritten()); bind_ssbo_pushbuffer.Push(ssbo, buffer_offset, static_cast(size)); } From 86d8563314c615d60c7b59748467ffb71904b0c4 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 19 Jul 2019 13:22:27 -0400 Subject: [PATCH 2/7] Buffer_Cache: Fixes and optimizations. --- src/video_core/buffer_cache/buffer_cache.h | 104 ++++++++------------- src/video_core/buffer_cache/map_interval.h | 2 +- 2 files changed, 38 insertions(+), 68 deletions(-) diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 6c467eb80..e36f85705 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -15,8 +15,8 @@ #include "common/alignment.h" #include "common/common_types.h" #include "core/core.h" -#include "video_core/buffer_cache/map_interval.h" #include "video_core/buffer_cache/buffer_block.h" +#include "video_core/buffer_cache/map_interval.h" #include "video_core/memory_manager.h" namespace VideoCore { @@ -42,7 +42,7 @@ public: const auto cache_addr = ToCacheAddr(host_ptr); auto block = GetBlock(cache_addr, size); - MapAddress(block, gpu_addr, cache_addr, size, is_written); + MapAddress(block, gpu_addr, cache_addr, size); const u64 offset = static_cast(block->GetOffset(cache_addr)); @@ -149,86 +149,56 @@ protected: private: void MapAddress(const TBuffer& block, const GPUVAddr gpu_addr, const CacheAddr cache_addr, - const std::size_t size, bool is_written) { + const std::size_t size) { std::vector overlaps = GetMapsInRange(cache_addr, size); if (overlaps.empty()) { const CacheAddr cache_addr_end = cache_addr + size; MapInterval new_interval{cache_addr, cache_addr_end}; - if (!is_written) { - u8* host_ptr = FromCacheAddr(cache_addr); - UploadBlockData(block, block->GetOffset(cache_addr), size, host_ptr); - } + u8* host_ptr = FromCacheAddr(cache_addr); + UploadBlockData(block, block->GetOffset(cache_addr), size, host_ptr); Register(new_interval, gpu_addr); return; } + const CacheAddr cache_addr_end = cache_addr + size; if (overlaps.size() == 1) { - MapInterval current_map = overlaps[0]; - const CacheAddr cache_addr_end = cache_addr + size; + const MapInterval& current_map = overlaps[0]; if (current_map.IsInside(cache_addr, cache_addr_end)) { return; } - const CacheAddr new_start = std::min(cache_addr, current_map.start); - const CacheAddr new_end = std::max(cache_addr_end, current_map.end); - const GPUVAddr new_gpu_addr = gpu_addr + new_start - cache_addr; - const std::size_t new_size = static_cast(new_end - new_start); - MapInterval new_interval{new_start, new_end}; - const std::size_t offset = current_map.start - new_start; - const std::size_t size = current_map.end - current_map.start; - // Upload the remaining data - if (!is_written) { - u8* host_ptr = FromCacheAddr(new_start); - if (new_start == cache_addr && new_end == cache_addr_end) { - std::size_t first_size = current_map.start - new_start; - if (first_size > 0) { - UploadBlockData(block, block->GetOffset(new_start), first_size, host_ptr); - } + } + CacheAddr new_start = cache_addr; + CacheAddr new_end = cache_addr_end; + // Calculate new buffer parameters + for (auto& overlap : overlaps) { + new_start = std::min(overlap.start, new_start); + new_end = std::max(overlap.end, new_end); + } + GPUVAddr new_gpu_addr = gpu_addr + new_start - cache_addr; + for (auto& overlap : overlaps) { + Unregister(overlap); + } + UpdateBlock(block, new_start, new_end, overlaps); + MapInterval new_interval{new_start, new_end}; + Register(new_interval, new_gpu_addr); + } - std::size_t second_size = new_end - current_map.end; - if (second_size > 0) { - u8* host_ptr2 = FromCacheAddr(current_map.end); - UploadBlockData(block, block->GetOffset(current_map.end), second_size, - host_ptr2); - } - } else { - if (new_start == cache_addr) { - std::size_t second_size = new_end - current_map.end; - if (second_size > 0) { - u8* host_ptr2 = FromCacheAddr(current_map.end); - UploadBlockData(block, block->GetOffset(current_map.end), second_size, - host_ptr2); - } - } else { - std::size_t first_size = current_map.start - new_start; - if (first_size > 0) { - UploadBlockData(block, block->GetOffset(new_start), first_size, host_ptr); - } - } - } + void UpdateBlock(const TBuffer& block, CacheAddr start, CacheAddr end, + std::vector& overlaps) { + const IntervalType base_interval{start, end}; + IntervalCache interval_set{}; + interval_set.add(base_interval); + for (auto& overlap : overlaps) { + const IntervalType subtract{overlap.start, overlap.end}; + interval_set.subtract(subtract); + } + for (auto& interval : interval_set) { + std::size_t size = interval.upper() - interval.lower(); + if (size > 0) { + u8* host_ptr = FromCacheAddr(interval.lower()); + UploadBlockData(block, block->GetOffset(interval.lower()), size, host_ptr); } - Unregister(current_map); - Register(new_interval, new_gpu_addr); - } else { - // Calculate new buffer parameters - GPUVAddr new_gpu_addr = gpu_addr; - CacheAddr start = cache_addr; - CacheAddr end = cache_addr + size; - for (auto& overlap : overlaps) { - start = std::min(overlap.start, start); - end = std::max(overlap.end, end); - } - new_gpu_addr = gpu_addr + start - cache_addr; - MapInterval new_interval{start, end}; - for (auto& overlap : overlaps) { - Unregister(overlap); - } - std::size_t new_size = end - start; - if (!is_written) { - u8* host_ptr = FromCacheAddr(start); - UploadBlockData(block, block->GetOffset(start), new_size, host_ptr); - } - Register(new_interval, new_gpu_addr); } } diff --git a/src/video_core/buffer_cache/map_interval.h b/src/video_core/buffer_cache/map_interval.h index 652a35dcd..c1cd52ca4 100644 --- a/src/video_core/buffer_cache/map_interval.h +++ b/src/video_core/buffer_cache/map_interval.h @@ -14,7 +14,7 @@ struct MapInterval { MapInterval(const CacheAddr start, const CacheAddr end) : start{start}, end{end} {} CacheAddr start; CacheAddr end; - bool IsInside(const CacheAddr other_start, const CacheAddr other_end) { + bool IsInside(const CacheAddr other_start, const CacheAddr other_end) const { return (start <= other_start && other_end <= end); } From 5f4b746a1ee27d2e5e532f4f13f660ff08453474 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Fri, 19 Jul 2019 21:07:28 -0400 Subject: [PATCH 3/7] BufferCache: Rework mapping caching. --- src/video_core/buffer_cache/buffer_cache.h | 63 ++++++++++++--------- src/video_core/buffer_cache/map_interval.h | 66 ++++++++++++++-------- 2 files changed, 78 insertions(+), 51 deletions(-) diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index e36f85705..7c1737fe2 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -25,6 +25,8 @@ class RasterizerInterface; namespace VideoCommon { +using MapInterval = std::shared_ptr; + template class BufferCache { public: @@ -90,7 +92,9 @@ public: std::vector objects = GetMapsInRange(addr, size); for (auto& object : objects) { - Unregister(object); + if (object->IsRegistered()) { + Unregister(object); + } } } @@ -120,51 +124,54 @@ protected: std::size_t dst_offset, std::size_t size) = 0; /// Register an object into the cache - void Register(const MapInterval& new_interval, const GPUVAddr gpu_addr) { - const CacheAddr cache_ptr = new_interval.start; - const std::size_t size = new_interval.end - new_interval.start; + void Register(const MapInterval& new_map) { + const CacheAddr cache_ptr = new_map->GetStart(); const std::optional cpu_addr = - system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr); + system.GPU().MemoryManager().GpuToCpuAddress(new_map->GetGpuAddress()); if (!cache_ptr || !cpu_addr) { LOG_CRITICAL(HW_GPU, "Failed to register buffer with unmapped gpu_address 0x{:016x}", - gpu_addr); + new_map->GetGpuAddress()); return; } - const IntervalType interval{new_interval.start, new_interval.end}; - mapped_addresses.insert(interval); - map_storage[new_interval] = MapInfo{gpu_addr, *cpu_addr}; - + const std::size_t size = new_map->GetEnd() - new_map->GetStart(); + new_map->SetCpuAddress(*cpu_addr); + new_map->MarkAsRegistered(true); + const IntervalType interval{new_map->GetStart(), new_map->GetEnd()}; + mapped_addresses.insert({interval, new_map}); rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1); } /// Unregisters an object from the cache - void Unregister(const MapInterval& interval) { - const MapInfo info = map_storage[interval]; - const std::size_t size = interval.end - interval.start; - rasterizer.UpdatePagesCachedCount(info.cpu_addr, size, -1); - const IntervalType delete_interval{interval.start, interval.end}; + void Unregister(MapInterval& map) { + const std::size_t size = map->GetEnd() - map->GetStart(); + rasterizer.UpdatePagesCachedCount(map->GetCpuAddress(), size, -1); + map->MarkAsRegistered(false); + const IntervalType delete_interval{map->GetStart(), map->GetEnd()}; mapped_addresses.erase(delete_interval); - map_storage.erase(interval); } private: + MapInterval CreateMap(const CacheAddr start, const CacheAddr end, const GPUVAddr gpu_addr) { + return std::make_shared(start, end, gpu_addr); + } + void MapAddress(const TBuffer& block, const GPUVAddr gpu_addr, const CacheAddr cache_addr, const std::size_t size) { std::vector overlaps = GetMapsInRange(cache_addr, size); if (overlaps.empty()) { const CacheAddr cache_addr_end = cache_addr + size; - MapInterval new_interval{cache_addr, cache_addr_end}; + MapInterval new_map = CreateMap(cache_addr, cache_addr_end, gpu_addr); u8* host_ptr = FromCacheAddr(cache_addr); UploadBlockData(block, block->GetOffset(cache_addr), size, host_ptr); - Register(new_interval, gpu_addr); + Register(new_map); return; } const CacheAddr cache_addr_end = cache_addr + size; if (overlaps.size() == 1) { const MapInterval& current_map = overlaps[0]; - if (current_map.IsInside(cache_addr, cache_addr_end)) { + if (current_map->IsInside(cache_addr, cache_addr_end)) { return; } } @@ -172,25 +179,25 @@ private: CacheAddr new_end = cache_addr_end; // Calculate new buffer parameters for (auto& overlap : overlaps) { - new_start = std::min(overlap.start, new_start); - new_end = std::max(overlap.end, new_end); + new_start = std::min(overlap->GetStart(), new_start); + new_end = std::max(overlap->GetEnd(), new_end); } GPUVAddr new_gpu_addr = gpu_addr + new_start - cache_addr; for (auto& overlap : overlaps) { Unregister(overlap); } UpdateBlock(block, new_start, new_end, overlaps); - MapInterval new_interval{new_start, new_end}; - Register(new_interval, new_gpu_addr); + MapInterval new_map = CreateMap(new_start, new_end, new_gpu_addr); + Register(new_map); } void UpdateBlock(const TBuffer& block, CacheAddr start, CacheAddr end, std::vector& overlaps) { const IntervalType base_interval{start, end}; - IntervalCache interval_set{}; + IntervalSet interval_set{}; interval_set.add(base_interval); for (auto& overlap : overlaps) { - const IntervalType subtract{overlap.start, overlap.end}; + const IntervalType subtract{overlap->GetStart(), overlap->GetEnd()}; interval_set.subtract(subtract); } for (auto& interval : interval_set) { @@ -210,7 +217,7 @@ private: std::vector objects{}; const IntervalType interval{addr, addr + size}; for (auto& pair : boost::make_iterator_range(mapped_addresses.equal_range(interval))) { - objects.emplace_back(pair.lower(), pair.upper()); + objects.push_back(pair.second); } return objects; @@ -322,10 +329,10 @@ private: u64 buffer_offset = 0; u64 buffer_offset_base = 0; - using IntervalCache = boost::icl::interval_set; + using IntervalSet = boost::icl::interval_set; + using IntervalCache = boost::icl::interval_map; using IntervalType = typename IntervalCache::interval_type; IntervalCache mapped_addresses{}; - std::unordered_map map_storage; static constexpr u64 block_page_bits{24}; static constexpr u64 block_page_size{1 << block_page_bits}; diff --git a/src/video_core/buffer_cache/map_interval.h b/src/video_core/buffer_cache/map_interval.h index c1cd52ca4..a01eddf49 100644 --- a/src/video_core/buffer_cache/map_interval.h +++ b/src/video_core/buffer_cache/map_interval.h @@ -4,45 +4,65 @@ #pragma once -#include #include "common/common_types.h" #include "video_core/gpu.h" namespace VideoCommon { -struct MapInterval { - MapInterval(const CacheAddr start, const CacheAddr end) : start{start}, end{end} {} - CacheAddr start; - CacheAddr end; +class MapIntervalBase { +public: + MapIntervalBase(const CacheAddr start, const CacheAddr end, const GPUVAddr gpu_addr) + : start{start}, end{end}, gpu_addr{gpu_addr} {} + + void SetCpuAddress(VAddr new_cpu_addr) { + cpu_addr = new_cpu_addr; + } + + VAddr GetCpuAddress() const { + return cpu_addr; + } + + GPUVAddr GetGpuAddress() const { + return gpu_addr; + } + bool IsInside(const CacheAddr other_start, const CacheAddr other_end) const { return (start <= other_start && other_end <= end); } - bool operator==(const MapInterval& rhs) const { + bool operator==(const MapIntervalBase& rhs) const { return std::tie(start, end) == std::tie(rhs.start, rhs.end); } - bool operator!=(const MapInterval& rhs) const { + bool operator!=(const MapIntervalBase& rhs) const { return !operator==(rhs); } -}; -struct MapInfo { + void MarkAsRegistered(const bool registered) { + is_registered = registered; + } + + bool IsRegistered() const { + return is_registered; + } + + CacheAddr GetStart() const { + return start; + } + + CacheAddr GetEnd() const { + return end; + } + +private: + CacheAddr start; + CacheAddr end; GPUVAddr gpu_addr; - VAddr cpu_addr; + VAddr cpu_addr{}; + bool is_write{}; + bool is_modified{}; + bool is_registered{}; + u64 ticks{}; }; } // namespace VideoCommon - -namespace std { - -template <> -struct hash { - std::size_t operator()(const VideoCommon::MapInterval& k) const noexcept { - std::size_t a = std::hash()(k.start); - boost::hash_combine(a, std::hash()(k.end)); - return a; - } -}; - -} // namespace std From 286f4c446ae2396da41ca09173070ae5beb10e8e Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 20 Jul 2019 11:42:53 -0400 Subject: [PATCH 4/7] Buffer_Cache: Optimize and track written areas. --- src/video_core/buffer_cache/buffer_cache.h | 95 +++++++++++++++++++--- src/video_core/buffer_cache/map_interval.h | 23 +++++- 2 files changed, 105 insertions(+), 13 deletions(-) diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 7c1737fe2..4ea43a6c4 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -43,8 +43,24 @@ public: } const auto cache_addr = ToCacheAddr(host_ptr); + // Cache management is a big overhead, so only cache entries with a given size. + // TODO: Figure out which size is the best for given games. + constexpr std::size_t max_stream_size = 0x800; + if (size < max_stream_size) { + if (!is_written && !IsRegionWritten(cache_addr, cache_addr + size - 1)) { + return StreamBufferUpload(host_ptr, size, alignment); + } + } + auto block = GetBlock(cache_addr, size); - MapAddress(block, gpu_addr, cache_addr, size); + auto map = MapAddress(block, gpu_addr, cache_addr, size); + if (is_written) { + map->MarkAsModified(true, GetModifiedTicks()); + if (!map->IsWritten()) { + map->MarkAsWritten(true); + MarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1); + } + } const u64 offset = static_cast(block->GetOffset(cache_addr)); @@ -124,7 +140,7 @@ protected: std::size_t dst_offset, std::size_t size) = 0; /// Register an object into the cache - void Register(const MapInterval& new_map) { + void Register(const MapInterval& new_map, bool inherit_written = false) { const CacheAddr cache_ptr = new_map->GetStart(); const std::optional cpu_addr = system.GPU().MemoryManager().GpuToCpuAddress(new_map->GetGpuAddress()); @@ -139,6 +155,10 @@ protected: const IntervalType interval{new_map->GetStart(), new_map->GetEnd()}; mapped_addresses.insert({interval, new_map}); rasterizer.UpdatePagesCachedCount(*cpu_addr, size, 1); + if (inherit_written) { + MarkRegionAsWritten(new_map->GetStart(), new_map->GetEnd() - 1); + new_map->MarkAsWritten(true); + } } /// Unregisters an object from the cache @@ -146,6 +166,9 @@ protected: const std::size_t size = map->GetEnd() - map->GetStart(); rasterizer.UpdatePagesCachedCount(map->GetCpuAddress(), size, -1); map->MarkAsRegistered(false); + if (map->IsWritten()) { + UnmarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1); + } const IntervalType delete_interval{map->GetStart(), map->GetEnd()}; mapped_addresses.erase(delete_interval); } @@ -155,8 +178,8 @@ private: return std::make_shared(start, end, gpu_addr); } - void MapAddress(const TBuffer& block, const GPUVAddr gpu_addr, const CacheAddr cache_addr, - const std::size_t size) { + MapInterval MapAddress(const TBuffer& block, const GPUVAddr gpu_addr, + const CacheAddr cache_addr, const std::size_t size) { std::vector overlaps = GetMapsInRange(cache_addr, size); if (overlaps.empty()) { @@ -165,22 +188,24 @@ private: u8* host_ptr = FromCacheAddr(cache_addr); UploadBlockData(block, block->GetOffset(cache_addr), size, host_ptr); Register(new_map); - return; + return new_map; } const CacheAddr cache_addr_end = cache_addr + size; if (overlaps.size() == 1) { - const MapInterval& current_map = overlaps[0]; + MapInterval& current_map = overlaps[0]; if (current_map->IsInside(cache_addr, cache_addr_end)) { - return; + return current_map; } } CacheAddr new_start = cache_addr; CacheAddr new_end = cache_addr_end; + bool write_inheritance = false; // Calculate new buffer parameters for (auto& overlap : overlaps) { new_start = std::min(overlap->GetStart(), new_start); new_end = std::max(overlap->GetEnd(), new_end); + write_inheritance |= overlap->IsWritten(); } GPUVAddr new_gpu_addr = gpu_addr + new_start - cache_addr; for (auto& overlap : overlaps) { @@ -188,7 +213,8 @@ private: } UpdateBlock(block, new_start, new_end, overlaps); MapInterval new_map = CreateMap(new_start, new_end, new_gpu_addr); - Register(new_map); + Register(new_map, write_inheritance); + return new_map; } void UpdateBlock(const TBuffer& block, CacheAddr start, CacheAddr end, @@ -320,6 +346,48 @@ private: return found; } + void MarkRegionAsWritten(const CacheAddr start, const CacheAddr end) { + u64 page_start = start >> write_page_bit; + const u64 page_end = end >> write_page_bit; + while (page_start <= page_end) { + auto it = written_pages.find(page_start); + if (it != written_pages.end()) { + it->second = it->second + 1; + } else { + written_pages[page_start] = 1; + } + page_start++; + } + } + + void UnmarkRegionAsWritten(const CacheAddr start, const CacheAddr end) { + u64 page_start = start >> write_page_bit; + const u64 page_end = end >> write_page_bit; + while (page_start <= page_end) { + auto it = written_pages.find(page_start); + if (it != written_pages.end()) { + if (it->second > 1) { + it->second = it->second - 1; + } else { + written_pages.erase(it); + } + } + page_start++; + } + } + + bool IsRegionWritten(const CacheAddr start, const CacheAddr end) const { + u64 page_start = start >> write_page_bit; + const u64 page_end = end >> write_page_bit; + while (page_start <= page_end) { + if (written_pages.count(page_start) > 0) { + return true; + } + page_start++; + } + return false; + } + std::unique_ptr stream_buffer; TBufferType stream_buffer_handle{}; @@ -334,11 +402,14 @@ private: using IntervalType = typename IntervalCache::interval_type; IntervalCache mapped_addresses{}; - static constexpr u64 block_page_bits{24}; - static constexpr u64 block_page_size{1 << block_page_bits}; - std::unordered_map blocks; + static constexpr u64 write_page_bit{11}; + std::unordered_map written_pages{}; - std::list pending_destruction; + static constexpr u64 block_page_bits{21}; + static constexpr u64 block_page_size{1 << block_page_bits}; + std::unordered_map blocks{}; + + std::list pending_destruction{}; u64 epoch{}; u64 modified_ticks{}; VideoCore::RasterizerInterface& rasterizer; diff --git a/src/video_core/buffer_cache/map_interval.h b/src/video_core/buffer_cache/map_interval.h index a01eddf49..3a104d5cd 100644 --- a/src/video_core/buffer_cache/map_interval.h +++ b/src/video_core/buffer_cache/map_interval.h @@ -54,12 +54,33 @@ public: return end; } + void MarkAsModified(const bool is_modified_, const u64 tick) { + is_modified = is_modified_; + ticks = tick; + } + + bool IsModified() const { + return is_modified; + } + + u64 GetModificationTick() const { + return ticks; + } + + void MarkAsWritten(const bool is_written_) { + is_written = is_written_; + } + + bool IsWritten() const { + return is_written; + } + private: CacheAddr start; CacheAddr end; GPUVAddr gpu_addr; VAddr cpu_addr{}; - bool is_write{}; + bool is_written{}; bool is_modified{}; bool is_registered{}; u64 ticks{}; From de8ff8a1c629b97be98267e9866ce10b63ad12ad Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 20 Jul 2019 12:18:54 -0400 Subject: [PATCH 5/7] Buffer_Cache: Implement barriers. --- src/video_core/buffer_cache/buffer_cache.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 4ea43a6c4..03b288233 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -60,6 +60,10 @@ public: map->MarkAsWritten(true); MarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1); } + } else { + if (map->IsWritten()) { + WriteBarrier(); + } } const u64 offset = static_cast(block->GetOffset(cache_addr)); From 6ce2c850470e4d0600d69a11ae2347a643a44a65 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 20 Jul 2019 12:54:31 -0400 Subject: [PATCH 6/7] Buffer_Cache: Implement flushing. --- src/video_core/buffer_cache/buffer_cache.h | 27 ++++++++++++++++++- .../renderer_opengl/gl_buffer_cache.cpp | 4 +++ 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 03b288233..38ce16ed5 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -79,12 +79,16 @@ public: } void Map(std::size_t max_size) { + std::lock_guard lock{mutex}; + std::tie(buffer_ptr, buffer_offset_base, invalidated) = stream_buffer->Map(max_size, 4); buffer_offset = buffer_offset_base; } /// Finishes the upload stream, returns true on bindings invalidation. bool Unmap() { + std::lock_guard lock{mutex}; + stream_buffer->Unmap(buffer_offset - buffer_offset_base); return std::exchange(invalidated, false); } @@ -103,7 +107,15 @@ public: void FlushRegion(CacheAddr addr, std::size_t size) { std::lock_guard lock{mutex}; - // TODO + std::vector objects = GetMapsInRange(addr, size); + std::sort(objects.begin(), objects.end(), [](const MapInterval& a, const MapInterval& b) { + return a->GetModificationTick() < b->GetModificationTick(); + }); + for (auto& object : objects) { + if (object->IsModified() && object->IsRegistered()) { + FlushMap(object); + } + } } /// Mark the specified region as being invalidated @@ -205,11 +217,13 @@ private: CacheAddr new_start = cache_addr; CacheAddr new_end = cache_addr_end; bool write_inheritance = false; + bool modified_inheritance = false; // Calculate new buffer parameters for (auto& overlap : overlaps) { new_start = std::min(overlap->GetStart(), new_start); new_end = std::max(overlap->GetEnd(), new_end); write_inheritance |= overlap->IsWritten(); + modified_inheritance |= overlap->IsModified(); } GPUVAddr new_gpu_addr = gpu_addr + new_start - cache_addr; for (auto& overlap : overlaps) { @@ -217,6 +231,9 @@ private: } UpdateBlock(block, new_start, new_end, overlaps); MapInterval new_map = CreateMap(new_start, new_end, new_gpu_addr); + if (modified_inheritance) { + new_map->MarkAsModified(true, GetModifiedTicks()); + } Register(new_map, write_inheritance); return new_map; } @@ -258,6 +275,14 @@ private: return ++modified_ticks; } + void FlushMap(MapInterval map) { + std::size_t size = map->GetEnd() - map->GetStart(); + TBuffer block = blocks[map->GetStart() >> block_page_bits]; + u8* host_ptr = FromCacheAddr(map->GetStart()); + DownloadBlockData(block, block->GetOffset(map->GetStart()), size, host_ptr); + map->MarkAsModified(false, 0); + } + BufferInfo StreamBufferUpload(const void* raw_pointer, std::size_t size, std::size_t alignment) { AlignBuffer(alignment); diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index a45d2771b..0781e6595 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -7,12 +7,15 @@ #include #include "common/assert.h" +#include "common/microprofile.h" #include "video_core/renderer_opengl/gl_buffer_cache.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_resource_manager.h" namespace OpenGL { +MICROPROFILE_DEFINE(OpenGL_Buffer_Download, "OpenGL", "Buffer Download", MP_RGB(192, 192, 128)); + CachedBufferBlock::CachedBufferBlock(CacheAddr cache_addr, const std::size_t size) : VideoCommon::BufferBlock{cache_addr, size} { gl_buffer.Create(); @@ -53,6 +56,7 @@ void OGLBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, s void OGLBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, u8* data) { + MICROPROFILE_SCOPE(OpenGL_Buffer_Download); glGetNamedBufferSubData(*buffer->GetHandle(), static_cast(offset), static_cast(size), data); } From 83ec2091c1836bf32e9070d0ddf2a53288871d69 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sat, 10 Aug 2019 12:50:38 -0400 Subject: [PATCH 7/7] Buffer Cache: Adress Feedback. --- src/video_core/buffer_cache/buffer_block.h | 7 +++---- src/video_core/renderer_opengl/gl_buffer_cache.h | 6 +++--- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/src/video_core/buffer_cache/buffer_block.h b/src/video_core/buffer_cache/buffer_block.h index 2c739a586..d2124443f 100644 --- a/src/video_core/buffer_cache/buffer_block.h +++ b/src/video_core/buffer_cache/buffer_block.h @@ -19,8 +19,8 @@ public: return (cache_addr < end) && (cache_addr_end > start); } - bool IsInside(const CacheAddr other_start, const CacheAddr other_end) { - return (cache_addr <= other_start && other_end <= cache_addr_end); + bool IsInside(const CacheAddr other_start, const CacheAddr other_end) const { + return cache_addr <= other_start && other_end <= cache_addr_end; } u8* GetWritableHostPtr() const { @@ -61,8 +61,7 @@ public: } protected: - explicit BufferBlock(CacheAddr cache_addr,const std::size_t size) - : size{size} { + explicit BufferBlock(CacheAddr cache_addr, const std::size_t size) : size{size} { SetCacheAddr(cache_addr); } ~BufferBlock() = default; diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h index fb93f22dd..022e7bfa9 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.h +++ b/src/video_core/renderer_opengl/gl_buffer_cache.h @@ -54,13 +54,13 @@ protected: const GLuint* ToHandle(const Buffer& buffer) override; void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, - const u8* data) override; + const u8* data) override; void DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, - u8* data) override; + u8* data) override; void CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset, - std::size_t dst_offset, std::size_t size) override; + std::size_t dst_offset, std::size_t size) override; }; } // namespace OpenGL