From 599274e3f0d97c36b31016ba63dcc300d0cf8f6a Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 16 May 2020 17:08:33 -0300 Subject: [PATCH 1/6] buffer_cache: Minor style changes Minor style changes. Mostly done so I avoid editing it while doing other changes. --- src/video_core/buffer_cache/buffer_cache.h | 100 ++++++++++----------- src/video_core/buffer_cache/map_interval.h | 98 ++++---------------- 2 files changed, 67 insertions(+), 131 deletions(-) diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 56e570994..81134eb1f 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -40,14 +40,12 @@ public: bool is_written = false, bool use_fast_cbuf = false) { std::lock_guard lock{mutex}; - const std::optional cpu_addr_opt = - system.GPU().MemoryManager().GpuToCpuAddress(gpu_addr); - + const auto& memory_manager = system.GPU().MemoryManager(); + const std::optional cpu_addr_opt = memory_manager.GpuToCpuAddress(gpu_addr); if (!cpu_addr_opt) { return {GetEmptyBuffer(size), 0}; } - - VAddr cpu_addr = *cpu_addr_opt; + const VAddr cpu_addr = *cpu_addr_opt; // Cache management is a big overhead, so only cache entries with a given size. // TODO: Figure out which size is the best for given games. @@ -84,9 +82,9 @@ public: if (Settings::IsGPULevelHigh() && Settings::values.use_asynchronous_gpu_emulation) { MarkForAsyncFlush(map); } - if (!map->IsWritten()) { - map->MarkAsWritten(true); - MarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1); + if (!map->is_written) { + map->is_written = true; + MarkRegionAsWritten(map->start, map->end - 1); } } @@ -133,11 +131,11 @@ public: std::lock_guard lock{mutex}; std::vector objects = GetMapsInRange(addr, size); - std::sort(objects.begin(), objects.end(), [](const MapInterval& a, const MapInterval& b) { - return a->GetModificationTick() < b->GetModificationTick(); - }); + std::sort( + objects.begin(), objects.end(), + [](const MapInterval& lhs, const MapInterval& rhs) { return lhs->ticks < rhs->ticks; }); for (auto& object : objects) { - if (object->IsModified() && object->IsRegistered()) { + if (object->is_modified && object->is_registered) { mutex.unlock(); FlushMap(object); mutex.lock(); @@ -150,7 +148,7 @@ public: const std::vector objects = GetMapsInRange(addr, size); return std::any_of(objects.cbegin(), objects.cend(), [](const MapInterval& map) { - return map->IsModified() && map->IsRegistered(); + return map->is_modified && map->is_registered; }); } @@ -160,7 +158,7 @@ public: std::vector objects = GetMapsInRange(addr, size); for (auto& object : objects) { - if (object->IsRegistered()) { + if (object->is_registered) { Unregister(object); } } @@ -170,9 +168,9 @@ public: std::lock_guard lock{mutex}; for (const auto& object : GetMapsInRange(addr, size)) { - if (object->IsMemoryMarked() && object->IsRegistered()) { + if (object->is_memory_marked && object->is_registered) { UnmarkMemory(object); - object->SetSyncPending(true); + object->is_sync_pending = true; marked_for_unregister.emplace_back(object); } } @@ -182,8 +180,8 @@ public: std::lock_guard lock{mutex}; for (const auto& object : marked_for_unregister) { - if (object->IsRegistered()) { - object->SetSyncPending(false); + if (object->is_registered) { + object->is_sync_pending = false; Unregister(object); } } @@ -194,7 +192,7 @@ public: if (uncommitted_flushes) { auto commit_list = std::make_shared>(); for (auto& map : *uncommitted_flushes) { - if (map->IsRegistered() && map->IsModified()) { + if (map->is_registered && map->is_modified) { // TODO(Blinkhawk): Implement backend asynchronous flushing // AsyncFlushMap(map) commit_list->push_back(map); @@ -229,7 +227,7 @@ public: return; } for (MapInterval& map : *flush_list) { - if (map->IsRegistered()) { + if (map->is_registered) { // TODO(Blinkhawk): Replace this for reading the asynchronous flush FlushMap(map); } @@ -266,45 +264,45 @@ protected: /// Register an object into the cache void Register(const MapInterval& new_map, bool inherit_written = false) { - const VAddr cpu_addr = new_map->GetStart(); + const VAddr cpu_addr = new_map->start; if (!cpu_addr) { LOG_CRITICAL(HW_GPU, "Failed to register buffer with unmapped gpu_address 0x{:016x}", - new_map->GetGpuAddress()); + new_map->gpu_addr); return; } - const std::size_t size = new_map->GetEnd() - new_map->GetStart(); - new_map->MarkAsRegistered(true); - const IntervalType interval{new_map->GetStart(), new_map->GetEnd()}; + const std::size_t size = new_map->end - new_map->start; + new_map->is_registered = true; + const IntervalType interval{new_map->start, new_map->end}; mapped_addresses.insert({interval, new_map}); rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); - new_map->SetMemoryMarked(true); + new_map->is_memory_marked = true; if (inherit_written) { - MarkRegionAsWritten(new_map->GetStart(), new_map->GetEnd() - 1); - new_map->MarkAsWritten(true); + MarkRegionAsWritten(new_map->start, new_map->end - 1); + new_map->is_written = true; } } void UnmarkMemory(const MapInterval& map) { - if (!map->IsMemoryMarked()) { + if (!map->is_memory_marked) { return; } - const std::size_t size = map->GetEnd() - map->GetStart(); - rasterizer.UpdatePagesCachedCount(map->GetStart(), size, -1); - map->SetMemoryMarked(false); + const std::size_t size = map->end - map->start; + rasterizer.UpdatePagesCachedCount(map->start, size, -1); + map->is_memory_marked = false; } /// Unregisters an object from the cache void Unregister(const MapInterval& map) { UnmarkMemory(map); - map->MarkAsRegistered(false); - if (map->IsSyncPending()) { + map->is_registered = false; + if (map->is_sync_pending) { + map->is_sync_pending = false; marked_for_unregister.remove(map); - map->SetSyncPending(false); } - if (map->IsWritten()) { - UnmarkRegionAsWritten(map->GetStart(), map->GetEnd() - 1); + if (map->is_written) { + UnmarkRegionAsWritten(map->start, map->end - 1); } - const IntervalType delete_interval{map->GetStart(), map->GetEnd()}; + const IntervalType delete_interval{map->start, map->end}; mapped_addresses.erase(delete_interval); } @@ -345,10 +343,10 @@ private: bool modified_inheritance = false; // Calculate new buffer parameters for (auto& overlap : overlaps) { - new_start = std::min(overlap->GetStart(), new_start); - new_end = std::max(overlap->GetEnd(), new_end); - write_inheritance |= overlap->IsWritten(); - modified_inheritance |= overlap->IsModified(); + new_start = std::min(overlap->start, new_start); + new_end = std::max(overlap->end, new_end); + write_inheritance |= overlap->is_written; + modified_inheritance |= overlap->is_modified; } GPUVAddr new_gpu_addr = gpu_addr + new_start - cpu_addr; for (auto& overlap : overlaps) { @@ -372,7 +370,7 @@ private: IntervalSet interval_set{}; interval_set.add(base_interval); for (auto& overlap : overlaps) { - const IntervalType subtract{overlap->GetStart(), overlap->GetEnd()}; + const IntervalType subtract{overlap->start, overlap->end}; interval_set.subtract(subtract); } for (auto& interval : interval_set) { @@ -406,11 +404,11 @@ private: } void FlushMap(MapInterval map) { - std::size_t size = map->GetEnd() - map->GetStart(); - OwnerBuffer block = blocks[map->GetStart() >> block_page_bits]; + std::size_t size = map->end - map->start; + OwnerBuffer block = blocks[map->start >> block_page_bits]; staging_buffer.resize(size); - DownloadBlockData(block, block->GetOffset(map->GetStart()), size, staging_buffer.data()); - system.Memory().WriteBlockUnsafe(map->GetStart(), staging_buffer.data(), size); + DownloadBlockData(block, block->GetOffset(map->start), size, staging_buffer.data()); + system.Memory().WriteBlockUnsafe(map->start, staging_buffer.data(), size); map->MarkAsModified(false, 0); } @@ -515,7 +513,7 @@ private: } else { written_pages[page_start] = 1; } - page_start++; + ++page_start; } } @@ -531,7 +529,7 @@ private: written_pages.erase(it); } } - page_start++; + ++page_start; } } @@ -542,7 +540,7 @@ private: if (written_pages.count(page_start) > 0) { return true; } - page_start++; + ++page_start; } return false; } @@ -585,7 +583,7 @@ private: std::vector staging_buffer; std::list marked_for_unregister; - std::shared_ptr> uncommitted_flushes{}; + std::shared_ptr> uncommitted_flushes; std::list>> committed_flushes; std::recursive_mutex mutex; diff --git a/src/video_core/buffer_cache/map_interval.h b/src/video_core/buffer_cache/map_interval.h index 29d8b26f3..1e77012d9 100644 --- a/src/video_core/buffer_cache/map_interval.h +++ b/src/video_core/buffer_cache/map_interval.h @@ -9,99 +9,37 @@ namespace VideoCommon { -class MapIntervalBase { -public: - MapIntervalBase(const VAddr start, const VAddr end, const GPUVAddr gpu_addr) +struct MapIntervalBase { + constexpr explicit MapIntervalBase(VAddr start, VAddr end, GPUVAddr gpu_addr) noexcept : start{start}, end{end}, gpu_addr{gpu_addr} {} - void SetCpuAddress(VAddr new_cpu_addr) { - cpu_addr = new_cpu_addr; - } - - VAddr GetCpuAddress() const { - return cpu_addr; - } - - GPUVAddr GetGpuAddress() const { - return gpu_addr; - } - - bool IsInside(const VAddr other_start, const VAddr other_end) const { + constexpr bool IsInside(const VAddr other_start, const VAddr other_end) const noexcept { return (start <= other_start && other_end <= end); } - bool operator==(const MapIntervalBase& rhs) const { - return std::tie(start, end) == std::tie(rhs.start, rhs.end); + constexpr void MarkAsModified(bool is_modified_, u64 ticks_) noexcept { + is_modified = is_modified_; + ticks = ticks_; } - bool operator!=(const MapIntervalBase& rhs) const { + constexpr bool operator==(const MapIntervalBase& rhs) const noexcept { + return start == rhs.start && end == rhs.end; + } + + constexpr bool operator!=(const MapIntervalBase& rhs) const noexcept { return !operator==(rhs); } - void MarkAsRegistered(const bool registered) { - is_registered = registered; - } - - bool IsRegistered() const { - return is_registered; - } - - void SetMemoryMarked(bool is_memory_marked_) { - is_memory_marked = is_memory_marked_; - } - - bool IsMemoryMarked() const { - return is_memory_marked; - } - - void SetSyncPending(bool is_sync_pending_) { - is_sync_pending = is_sync_pending_; - } - - bool IsSyncPending() const { - return is_sync_pending; - } - - VAddr GetStart() const { - return start; - } - - VAddr GetEnd() const { - return end; - } - - void MarkAsModified(const bool is_modified_, const u64 tick) { - is_modified = is_modified_; - ticks = tick; - } - - bool IsModified() const { - return is_modified; - } - - u64 GetModificationTick() const { - return ticks; - } - - void MarkAsWritten(const bool is_written_) { - is_written = is_written_; - } - - bool IsWritten() const { - return is_written; - } - -private: VAddr start; VAddr end; GPUVAddr gpu_addr; - VAddr cpu_addr{}; - bool is_written{}; - bool is_modified{}; - bool is_registered{}; - bool is_memory_marked{}; - bool is_sync_pending{}; - u64 ticks{}; + VAddr cpu_addr = 0; + u64 ticks = 0; + bool is_written = false; + bool is_modified = false; + bool is_registered = false; + bool is_memory_marked = false; + bool is_sync_pending = false; }; } // namespace VideoCommon From 3b0baf746ea6863d38d90521abba95303f321bf3 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 16 May 2020 18:07:03 -0300 Subject: [PATCH 2/6] buffer_cache: Remove shared pointers Removing shared pointers is a first step to be able to use intrusive objects and keep allocations close to one another in memory. --- src/video_core/buffer_cache/buffer_cache.h | 114 ++++++++++----------- src/video_core/buffer_cache/map_interval.h | 30 +++--- 2 files changed, 73 insertions(+), 71 deletions(-) diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 81134eb1f..eb03879c4 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -29,8 +29,6 @@ namespace VideoCommon { -using MapInterval = std::shared_ptr; - template class BufferCache { public: @@ -76,7 +74,10 @@ public: } auto block = GetBlock(cpu_addr, size); - auto map = MapAddress(block, gpu_addr, cpu_addr, size); + MapInterval* const map = MapAddress(block, gpu_addr, cpu_addr, size); + if (!map) { + return {GetEmptyBuffer(size), 0}; + } if (is_written) { map->MarkAsModified(true, GetModifiedTicks()); if (Settings::IsGPULevelHigh() && Settings::values.use_asynchronous_gpu_emulation) { @@ -130,11 +131,10 @@ public: void FlushRegion(VAddr addr, std::size_t size) { std::lock_guard lock{mutex}; - std::vector objects = GetMapsInRange(addr, size); - std::sort( - objects.begin(), objects.end(), - [](const MapInterval& lhs, const MapInterval& rhs) { return lhs->ticks < rhs->ticks; }); - for (auto& object : objects) { + std::vector objects = GetMapsInRange(addr, size); + std::sort(objects.begin(), objects.end(), + [](MapInterval* lhs, MapInterval* rhs) { return lhs->ticks < rhs->ticks; }); + for (MapInterval* object : objects) { if (object->is_modified && object->is_registered) { mutex.unlock(); FlushMap(object); @@ -146,8 +146,8 @@ public: bool MustFlushRegion(VAddr addr, std::size_t size) { std::lock_guard lock{mutex}; - const std::vector objects = GetMapsInRange(addr, size); - return std::any_of(objects.cbegin(), objects.cend(), [](const MapInterval& map) { + const std::vector objects = GetMapsInRange(addr, size); + return std::any_of(objects.cbegin(), objects.cend(), [](const MapInterval* map) { return map->is_modified && map->is_registered; }); } @@ -156,7 +156,7 @@ public: void InvalidateRegion(VAddr addr, u64 size) { std::lock_guard lock{mutex}; - std::vector objects = GetMapsInRange(addr, size); + std::vector objects = GetMapsInRange(addr, size); for (auto& object : objects) { if (object->is_registered) { Unregister(object); @@ -167,7 +167,7 @@ public: void OnCPUWrite(VAddr addr, std::size_t size) { std::lock_guard lock{mutex}; - for (const auto& object : GetMapsInRange(addr, size)) { + for (MapInterval* object : GetMapsInRange(addr, size)) { if (object->is_memory_marked && object->is_registered) { UnmarkMemory(object); object->is_sync_pending = true; @@ -179,7 +179,7 @@ public: void SyncGuestHost() { std::lock_guard lock{mutex}; - for (const auto& object : marked_for_unregister) { + for (auto& object : marked_for_unregister) { if (object->is_registered) { object->is_sync_pending = false; Unregister(object); @@ -190,8 +190,8 @@ public: void CommitAsyncFlushes() { if (uncommitted_flushes) { - auto commit_list = std::make_shared>(); - for (auto& map : *uncommitted_flushes) { + auto commit_list = std::make_shared>(); + for (MapInterval* map : *uncommitted_flushes) { if (map->is_registered && map->is_modified) { // TODO(Blinkhawk): Implement backend asynchronous flushing // AsyncFlushMap(map) @@ -226,7 +226,7 @@ public: committed_flushes.pop_front(); return; } - for (MapInterval& map : *flush_list) { + for (MapInterval* map : *flush_list) { if (map->is_registered) { // TODO(Blinkhawk): Replace this for reading the asynchronous flush FlushMap(map); @@ -263,26 +263,28 @@ protected: } /// Register an object into the cache - void Register(const MapInterval& new_map, bool inherit_written = false) { - const VAddr cpu_addr = new_map->start; + MapInterval* Register(MapInterval new_map, bool inherit_written = false) { + const VAddr cpu_addr = new_map.start; if (!cpu_addr) { LOG_CRITICAL(HW_GPU, "Failed to register buffer with unmapped gpu_address 0x{:016x}", - new_map->gpu_addr); - return; + new_map.gpu_addr); + return nullptr; } - const std::size_t size = new_map->end - new_map->start; - new_map->is_registered = true; - const IntervalType interval{new_map->start, new_map->end}; - mapped_addresses.insert({interval, new_map}); + const std::size_t size = new_map.end - new_map.start; + new_map.is_registered = true; + const IntervalType interval{new_map.start, new_map.end}; rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); - new_map->is_memory_marked = true; + new_map.is_memory_marked = true; if (inherit_written) { - MarkRegionAsWritten(new_map->start, new_map->end - 1); - new_map->is_written = true; + MarkRegionAsWritten(new_map.start, new_map.end - 1); + new_map.is_written = true; } + mapped_addresses.insert({interval, new_map}); + // Temporary hack until this is replaced with boost::intrusive::rbtree + return const_cast(&mapped_addresses.find(interval)->second); } - void UnmarkMemory(const MapInterval& map) { + void UnmarkMemory(MapInterval* map) { if (!map->is_memory_marked) { return; } @@ -292,7 +294,7 @@ protected: } /// Unregisters an object from the cache - void Unregister(const MapInterval& map) { + void Unregister(MapInterval* map) { UnmarkMemory(map); map->is_registered = false; if (map->is_sync_pending) { @@ -307,17 +309,12 @@ protected: } private: - MapInterval CreateMap(const VAddr start, const VAddr end, const GPUVAddr gpu_addr) { - return std::make_shared(start, end, gpu_addr); - } - - MapInterval MapAddress(const OwnerBuffer& block, const GPUVAddr gpu_addr, const VAddr cpu_addr, - const std::size_t size) { - std::vector overlaps = GetMapsInRange(cpu_addr, size); + MapInterval* MapAddress(const OwnerBuffer& block, GPUVAddr gpu_addr, VAddr cpu_addr, + std::size_t size) { + std::vector overlaps = GetMapsInRange(cpu_addr, size); if (overlaps.empty()) { auto& memory_manager = system.GPU().MemoryManager(); const VAddr cpu_addr_end = cpu_addr + size; - MapInterval new_map = CreateMap(cpu_addr, cpu_addr_end, gpu_addr); if (memory_manager.IsGranularRange(gpu_addr, size)) { u8* host_ptr = memory_manager.GetPointer(gpu_addr); UploadBlockData(block, block->GetOffset(cpu_addr), size, host_ptr); @@ -326,13 +323,12 @@ private: memory_manager.ReadBlockUnsafe(gpu_addr, staging_buffer.data(), size); UploadBlockData(block, block->GetOffset(cpu_addr), size, staging_buffer.data()); } - Register(new_map); - return new_map; + return Register(MapInterval(cpu_addr, cpu_addr_end, gpu_addr)); } const VAddr cpu_addr_end = cpu_addr + size; if (overlaps.size() == 1) { - MapInterval& current_map = overlaps[0]; + MapInterval* const current_map = overlaps[0]; if (current_map->IsInside(cpu_addr, cpu_addr_end)) { return current_map; } @@ -342,7 +338,7 @@ private: bool write_inheritance = false; bool modified_inheritance = false; // Calculate new buffer parameters - for (auto& overlap : overlaps) { + for (MapInterval* overlap : overlaps) { new_start = std::min(overlap->start, new_start); new_end = std::max(overlap->end, new_end); write_inheritance |= overlap->is_written; @@ -353,19 +349,23 @@ private: Unregister(overlap); } UpdateBlock(block, new_start, new_end, overlaps); - MapInterval new_map = CreateMap(new_start, new_end, new_gpu_addr); + + const MapInterval new_map{new_start, new_end, new_gpu_addr}; + MapInterval* const map = Register(new_map, write_inheritance); + if (!map) { + return nullptr; + } if (modified_inheritance) { - new_map->MarkAsModified(true, GetModifiedTicks()); + map->MarkAsModified(true, GetModifiedTicks()); if (Settings::IsGPULevelHigh() && Settings::values.use_asynchronous_gpu_emulation) { - MarkForAsyncFlush(new_map); + MarkForAsyncFlush(map); } } - Register(new_map, write_inheritance); - return new_map; + return map; } void UpdateBlock(const OwnerBuffer& block, VAddr start, VAddr end, - std::vector& overlaps) { + std::vector& overlaps) { const IntervalType base_interval{start, end}; IntervalSet interval_set{}; interval_set.add(base_interval); @@ -384,15 +384,15 @@ private: } } - std::vector GetMapsInRange(VAddr addr, std::size_t size) { + std::vector GetMapsInRange(VAddr addr, std::size_t size) { if (size == 0) { return {}; } - std::vector objects{}; + std::vector objects; const IntervalType interval{addr, addr + size}; for (auto& pair : boost::make_iterator_range(mapped_addresses.equal_range(interval))) { - objects.push_back(pair.second); + objects.push_back(&pair.second); } return objects; @@ -403,8 +403,8 @@ private: return ++modified_ticks; } - void FlushMap(MapInterval map) { - std::size_t size = map->end - map->start; + void FlushMap(MapInterval* map) { + const std::size_t size = map->end - map->start; OwnerBuffer block = blocks[map->start >> block_page_bits]; staging_buffer.resize(size); DownloadBlockData(block, block->GetOffset(map->start), size, staging_buffer.data()); @@ -545,9 +545,9 @@ private: return false; } - void MarkForAsyncFlush(MapInterval& map) { + void MarkForAsyncFlush(MapInterval* map) { if (!uncommitted_flushes) { - uncommitted_flushes = std::make_shared>(); + uncommitted_flushes = std::make_shared>(); } uncommitted_flushes->insert(map); } @@ -581,10 +581,10 @@ private: u64 modified_ticks = 0; std::vector staging_buffer; - std::list marked_for_unregister; + std::list marked_for_unregister; - std::shared_ptr> uncommitted_flushes; - std::list>> committed_flushes; + std::shared_ptr> uncommitted_flushes; + std::list>> committed_flushes; std::recursive_mutex mutex; }; diff --git a/src/video_core/buffer_cache/map_interval.h b/src/video_core/buffer_cache/map_interval.h index 1e77012d9..ad4db0135 100644 --- a/src/video_core/buffer_cache/map_interval.h +++ b/src/video_core/buffer_cache/map_interval.h @@ -9,30 +9,32 @@ namespace VideoCommon { -struct MapIntervalBase { - constexpr explicit MapIntervalBase(VAddr start, VAddr end, GPUVAddr gpu_addr) noexcept +struct MapInterval { + constexpr explicit MapInterval() noexcept = default; + + constexpr explicit MapInterval(VAddr start, VAddr end, GPUVAddr gpu_addr) noexcept : start{start}, end{end}, gpu_addr{gpu_addr} {} - constexpr bool IsInside(const VAddr other_start, const VAddr other_end) const noexcept { + constexpr bool IsInside(VAddr other_start, VAddr other_end) const noexcept { return (start <= other_start && other_end <= end); } + constexpr bool operator==(const MapInterval& rhs) const noexcept { + return start == rhs.start && end == rhs.end; + } + + constexpr bool operator!=(const MapInterval& rhs) const noexcept { + return !operator==(rhs); + } + constexpr void MarkAsModified(bool is_modified_, u64 ticks_) noexcept { is_modified = is_modified_; ticks = ticks_; } - constexpr bool operator==(const MapIntervalBase& rhs) const noexcept { - return start == rhs.start && end == rhs.end; - } - - constexpr bool operator!=(const MapIntervalBase& rhs) const noexcept { - return !operator==(rhs); - } - - VAddr start; - VAddr end; - GPUVAddr gpu_addr; + VAddr start = 0; + VAddr end = 0; + GPUVAddr gpu_addr = 0; VAddr cpu_addr = 0; u64 ticks = 0; bool is_written = false; From 891236124caaed34cdefac61cf90896a5b66b267 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 17 May 2020 16:56:08 -0300 Subject: [PATCH 3/6] buffer_cache: Use boost::intrusive::set for caching Instead of using boost::icl::interval_map for caching, use boost::intrusive::set. interval_map is intended as a container where the keys can overlap with one another; we don't need this for caching buffers and a std::set-like data structure that allows us to search with lower_bound is enough. --- src/video_core/buffer_cache/buffer_cache.h | 42 ++++++++++++------- src/video_core/buffer_cache/map_interval.h | 32 +++++++------- .../renderer_opengl/gl_buffer_cache.cpp | 1 + .../renderer_opengl/gl_fence_manager.cpp | 1 + .../renderer_vulkan/vk_buffer_cache.cpp | 1 + .../renderer_vulkan/vk_fence_manager.h | 1 + 6 files changed, 48 insertions(+), 30 deletions(-) diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index eb03879c4..fb12af9d8 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -14,9 +14,11 @@ #include #include +#include #include #include "common/alignment.h" +#include "common/assert.h" #include "common/common_types.h" #include "common/logging/log.h" #include "core/core.h" @@ -73,7 +75,7 @@ public: } } - auto block = GetBlock(cpu_addr, size); + OwnerBuffer block = GetBlock(cpu_addr, size); MapInterval* const map = MapAddress(block, gpu_addr, cpu_addr, size); if (!map) { return {GetEmptyBuffer(size), 0}; @@ -272,16 +274,16 @@ protected: } const std::size_t size = new_map.end - new_map.start; new_map.is_registered = true; - const IntervalType interval{new_map.start, new_map.end}; rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1); new_map.is_memory_marked = true; if (inherit_written) { MarkRegionAsWritten(new_map.start, new_map.end - 1); new_map.is_written = true; } - mapped_addresses.insert({interval, new_map}); - // Temporary hack until this is replaced with boost::intrusive::rbtree - return const_cast(&mapped_addresses.find(interval)->second); + // Temporary hack, leaks memory and it's not cache local + MapInterval* const storage = &mapped_addresses_storage.emplace_back(new_map); + mapped_addresses.insert(*storage); + return storage; } void UnmarkMemory(MapInterval* map) { @@ -304,8 +306,9 @@ protected: if (map->is_written) { UnmarkRegionAsWritten(map->start, map->end - 1); } - const IntervalType delete_interval{map->start, map->end}; - mapped_addresses.erase(delete_interval); + const auto it = mapped_addresses.find(*map); + ASSERT(it != mapped_addresses.end()); + mapped_addresses.erase(it); } private: @@ -389,13 +392,20 @@ private: return {}; } - std::vector objects; - const IntervalType interval{addr, addr + size}; - for (auto& pair : boost::make_iterator_range(mapped_addresses.equal_range(interval))) { - objects.push_back(&pair.second); - } + std::vector result; + const VAddr addr_end = addr + size; - return objects; + auto it = mapped_addresses.lower_bound(addr); + if (it != mapped_addresses.begin()) { + --it; + } + while (it != mapped_addresses.end() && it->start < addr_end) { + if (it->Overlaps(addr, addr_end)) { + result.push_back(&*it); + } + ++it; + } + return result; } /// Returns a ticks counter used for tracking when cached objects were last modified @@ -565,9 +575,9 @@ private: u64 buffer_offset_base = 0; using IntervalSet = boost::icl::interval_set; - using IntervalCache = boost::icl::interval_map; - using IntervalType = typename IntervalCache::interval_type; - IntervalCache mapped_addresses; + using IntervalType = typename IntervalSet::interval_type; + std::list mapped_addresses_storage; // Temporary hack + boost::intrusive::set> mapped_addresses; static constexpr u64 write_page_bit = 11; std::unordered_map written_pages; diff --git a/src/video_core/buffer_cache/map_interval.h b/src/video_core/buffer_cache/map_interval.h index ad4db0135..45705cccf 100644 --- a/src/video_core/buffer_cache/map_interval.h +++ b/src/video_core/buffer_cache/map_interval.h @@ -4,38 +4,36 @@ #pragma once +#include + #include "common/common_types.h" #include "video_core/gpu.h" namespace VideoCommon { -struct MapInterval { - constexpr explicit MapInterval() noexcept = default; +struct MapInterval : public boost::intrusive::set_base_hook> { + /*implicit*/ MapInterval(VAddr start_) noexcept : start{start_} {} - constexpr explicit MapInterval(VAddr start, VAddr end, GPUVAddr gpu_addr) noexcept - : start{start}, end{end}, gpu_addr{gpu_addr} {} + explicit MapInterval(VAddr start_, VAddr end_, GPUVAddr gpu_addr_) noexcept + : start{start_}, end{end_}, gpu_addr{gpu_addr_} {} - constexpr bool IsInside(VAddr other_start, VAddr other_end) const noexcept { - return (start <= other_start && other_end <= end); + bool IsInside(VAddr other_start, VAddr other_end) const noexcept { + return start <= other_start && other_end <= end; } - constexpr bool operator==(const MapInterval& rhs) const noexcept { - return start == rhs.start && end == rhs.end; + bool Overlaps(VAddr other_start, VAddr other_end) const noexcept { + return start < other_end && other_start < end; } - constexpr bool operator!=(const MapInterval& rhs) const noexcept { - return !operator==(rhs); - } - - constexpr void MarkAsModified(bool is_modified_, u64 ticks_) noexcept { + void MarkAsModified(bool is_modified_, u64 ticks_) noexcept { is_modified = is_modified_; ticks = ticks_; } + boost::intrusive::set_member_hook<> member_hook_; VAddr start = 0; VAddr end = 0; GPUVAddr gpu_addr = 0; - VAddr cpu_addr = 0; u64 ticks = 0; bool is_written = false; bool is_modified = false; @@ -44,4 +42,10 @@ struct MapInterval { bool is_sync_pending = false; }; +struct MapIntervalCompare { + constexpr bool operator()(const MapInterval& lhs, const MapInterval& rhs) const noexcept { + return lhs.start < rhs.start; + } +}; + } // namespace VideoCommon diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp index d2cab50bd..9964ea894 100644 --- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp +++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp @@ -8,6 +8,7 @@ #include "common/assert.h" #include "common/microprofile.h" +#include "video_core/buffer_cache/buffer_cache.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/rasterizer_interface.h" #include "video_core/renderer_opengl/gl_buffer_cache.h" diff --git a/src/video_core/renderer_opengl/gl_fence_manager.cpp b/src/video_core/renderer_opengl/gl_fence_manager.cpp index 99ddcb3f8..ec5421afa 100644 --- a/src/video_core/renderer_opengl/gl_fence_manager.cpp +++ b/src/video_core/renderer_opengl/gl_fence_manager.cpp @@ -4,6 +4,7 @@ #include "common/assert.h" +#include "video_core/renderer_opengl/gl_buffer_cache.h" #include "video_core/renderer_opengl/gl_fence_manager.h" namespace OpenGL { diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index 5b494da8c..5f33d9e40 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -7,6 +7,7 @@ #include #include "core/core.h" +#include "video_core/buffer_cache/buffer_cache.h" #include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_scheduler.h" diff --git a/src/video_core/renderer_vulkan/vk_fence_manager.h b/src/video_core/renderer_vulkan/vk_fence_manager.h index 04d07fe6a..043fe7947 100644 --- a/src/video_core/renderer_vulkan/vk_fence_manager.h +++ b/src/video_core/renderer_vulkan/vk_fence_manager.h @@ -7,6 +7,7 @@ #include #include "video_core/fence_manager.h" +#include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/renderer_vulkan/wrapper.h" namespace Core { From 19d4f28001d3a8e28b41187a7940d14d0a8d708c Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 20 May 2020 23:55:40 -0300 Subject: [PATCH 4/6] buffer_cache: Use boost::container::small_vector for maps in range Most overlaps in the buffer cache only contain one mapped address. We can avoid close to all heap allocations once the buffer cache is warmed up by using a small_vector with a stack size of one. --- src/video_core/buffer_cache/buffer_cache.h | 28 ++++++++++++---------- 1 file changed, 15 insertions(+), 13 deletions(-) diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index fb12af9d8..0c8500c04 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -12,6 +12,7 @@ #include #include +#include #include #include #include @@ -33,6 +34,10 @@ namespace VideoCommon { template class BufferCache { + using IntervalSet = boost::icl::interval_set; + using IntervalType = typename IntervalSet::interval_type; + using VectorMapInterval = boost::container::small_vector; + public: using BufferInfo = std::pair; @@ -133,7 +138,7 @@ public: void FlushRegion(VAddr addr, std::size_t size) { std::lock_guard lock{mutex}; - std::vector objects = GetMapsInRange(addr, size); + VectorMapInterval objects = GetMapsInRange(addr, size); std::sort(objects.begin(), objects.end(), [](MapInterval* lhs, MapInterval* rhs) { return lhs->ticks < rhs->ticks; }); for (MapInterval* object : objects) { @@ -148,7 +153,7 @@ public: bool MustFlushRegion(VAddr addr, std::size_t size) { std::lock_guard lock{mutex}; - const std::vector objects = GetMapsInRange(addr, size); + const VectorMapInterval objects = GetMapsInRange(addr, size); return std::any_of(objects.cbegin(), objects.cend(), [](const MapInterval* map) { return map->is_modified && map->is_registered; }); @@ -158,8 +163,7 @@ public: void InvalidateRegion(VAddr addr, u64 size) { std::lock_guard lock{mutex}; - std::vector objects = GetMapsInRange(addr, size); - for (auto& object : objects) { + for (auto& object : GetMapsInRange(addr, size)) { if (object->is_registered) { Unregister(object); } @@ -314,7 +318,7 @@ protected: private: MapInterval* MapAddress(const OwnerBuffer& block, GPUVAddr gpu_addr, VAddr cpu_addr, std::size_t size) { - std::vector overlaps = GetMapsInRange(cpu_addr, size); + const VectorMapInterval overlaps = GetMapsInRange(cpu_addr, size); if (overlaps.empty()) { auto& memory_manager = system.GPU().MemoryManager(); const VAddr cpu_addr_end = cpu_addr + size; @@ -368,7 +372,7 @@ private: } void UpdateBlock(const OwnerBuffer& block, VAddr start, VAddr end, - std::vector& overlaps) { + const VectorMapInterval& overlaps) { const IntervalType base_interval{start, end}; IntervalSet interval_set{}; interval_set.add(base_interval); @@ -387,14 +391,13 @@ private: } } - std::vector GetMapsInRange(VAddr addr, std::size_t size) { + VectorMapInterval GetMapsInRange(VAddr addr, std::size_t size) { + VectorMapInterval result; if (size == 0) { - return {}; + return result; } - std::vector result; const VAddr addr_end = addr + size; - auto it = mapped_addresses.lower_bound(addr); if (it != mapped_addresses.begin()) { --it; @@ -574,10 +577,9 @@ private: u64 buffer_offset = 0; u64 buffer_offset_base = 0; - using IntervalSet = boost::icl::interval_set; - using IntervalType = typename IntervalSet::interval_type; std::list mapped_addresses_storage; // Temporary hack - boost::intrusive::set> mapped_addresses; + boost::intrusive::set> + mapped_addresses; static constexpr u64 write_page_bit = 11; std::unordered_map written_pages; From a2dcc642c1737721bafe54605c7826fa08d18f47 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 21 May 2020 01:06:40 -0300 Subject: [PATCH 5/6] map_interval: Add interval allocator and drop hack Drop the std::list hack to allocate memory indefinitely. Instead use a custom allocator that keeps references valid until destruction. This allocates fixed chunks of memory and puts pointers in a free list. When an allocation is no longer used put it back to the free list, this doesn't heap allocate because std::vector doesn't change the capacity. If the free list is empty, allocate a new chunk. --- src/video_core/CMakeLists.txt | 1 + src/video_core/buffer_cache/buffer_cache.h | 7 ++-- src/video_core/buffer_cache/map_interval.cpp | 33 ++++++++++++++++ src/video_core/buffer_cache/map_interval.h | 41 ++++++++++++++++++++ 4 files changed, 79 insertions(+), 3 deletions(-) create mode 100644 src/video_core/buffer_cache/map_interval.cpp diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index d23c53843..f00c71dae 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -1,6 +1,7 @@ add_library(video_core STATIC buffer_cache/buffer_block.h buffer_cache/buffer_cache.h + buffer_cache/map_interval.cpp buffer_cache/map_interval.h dirty_flags.cpp dirty_flags.h diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 0c8500c04..2262259c7 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -284,8 +284,8 @@ protected: MarkRegionAsWritten(new_map.start, new_map.end - 1); new_map.is_written = true; } - // Temporary hack, leaks memory and it's not cache local - MapInterval* const storage = &mapped_addresses_storage.emplace_back(new_map); + MapInterval* const storage = mapped_addresses_allocator.Allocate(); + *storage = new_map; mapped_addresses.insert(*storage); return storage; } @@ -313,6 +313,7 @@ protected: const auto it = mapped_addresses.find(*map); ASSERT(it != mapped_addresses.end()); mapped_addresses.erase(it); + mapped_addresses_allocator.Release(map); } private: @@ -577,7 +578,7 @@ private: u64 buffer_offset = 0; u64 buffer_offset_base = 0; - std::list mapped_addresses_storage; // Temporary hack + MapIntervalAllocator mapped_addresses_allocator; boost::intrusive::set> mapped_addresses; diff --git a/src/video_core/buffer_cache/map_interval.cpp b/src/video_core/buffer_cache/map_interval.cpp new file mode 100644 index 000000000..62587e18a --- /dev/null +++ b/src/video_core/buffer_cache/map_interval.cpp @@ -0,0 +1,33 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include +#include +#include + +#include "video_core/buffer_cache/map_interval.h" + +namespace VideoCommon { + +MapIntervalAllocator::MapIntervalAllocator() { + FillFreeList(first_chunk); +} + +MapIntervalAllocator::~MapIntervalAllocator() = default; + +void MapIntervalAllocator::AllocateNewChunk() { + *new_chunk = std::make_unique(); + FillFreeList(**new_chunk); + new_chunk = &(*new_chunk)->next; +} + +void MapIntervalAllocator::FillFreeList(Chunk& chunk) { + const std::size_t old_size = free_list.size(); + free_list.resize(old_size + chunk.data.size()); + std::transform(chunk.data.rbegin(), chunk.data.rend(), free_list.begin() + old_size, + [](MapInterval& interval) { return &interval; }); +} + +} // namespace VideoCommon diff --git a/src/video_core/buffer_cache/map_interval.h b/src/video_core/buffer_cache/map_interval.h index 45705cccf..fe0bcd1d8 100644 --- a/src/video_core/buffer_cache/map_interval.h +++ b/src/video_core/buffer_cache/map_interval.h @@ -4,6 +4,11 @@ #pragma once +#include +#include +#include +#include + #include #include "common/common_types.h" @@ -12,6 +17,8 @@ namespace VideoCommon { struct MapInterval : public boost::intrusive::set_base_hook> { + MapInterval() = default; + /*implicit*/ MapInterval(VAddr start_) noexcept : start{start_} {} explicit MapInterval(VAddr start_, VAddr end_, GPUVAddr gpu_addr_) noexcept @@ -48,4 +55,38 @@ struct MapIntervalCompare { } }; +class MapIntervalAllocator { +public: + MapIntervalAllocator(); + ~MapIntervalAllocator(); + + MapInterval* Allocate() { + if (free_list.empty()) { + AllocateNewChunk(); + } + MapInterval* const interval = free_list.back(); + free_list.pop_back(); + return interval; + } + + void Release(MapInterval* interval) { + free_list.push_back(interval); + } + +private: + struct Chunk { + std::unique_ptr next; + std::array data; + }; + + void AllocateNewChunk(); + + void FillFreeList(Chunk& chunk); + + std::vector free_list; + std::unique_ptr* new_chunk = &first_chunk.next; + + Chunk first_chunk; +}; + } // namespace VideoCommon From ebaace294fc6a867a12bcb30031c5c5cbfdcb238 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 21 May 2020 16:17:33 -0300 Subject: [PATCH 6/6] buffer_cache: Remove unused boost headers --- src/video_core/buffer_cache/buffer_cache.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 2262259c7..d9a4a1b4d 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -13,10 +13,8 @@ #include #include -#include #include #include -#include #include "common/alignment.h" #include "common/assert.h"