From 94da1e8a7e876eaae911332e3539ffcfc783cfac Mon Sep 17 00:00:00 2001 From: bunnei Date: Tue, 2 Mar 2021 16:57:53 -0800 Subject: [PATCH 1/4] video_core: rasterizer_accelerated: Use a flat array instead of interval_map for cached pages. - Uses a fixed 64MB for the cache instead of an ever growing map. - Slightly faster by using atomics instead of a single mutex for access. - Thanks for Rodrigo for the idea. --- src/video_core/rasterizer_accelerated.cpp | 51 ++++++----------------- src/video_core/rasterizer_accelerated.h | 25 ++++++++--- 2 files changed, 32 insertions(+), 44 deletions(-) diff --git a/src/video_core/rasterizer_accelerated.cpp b/src/video_core/rasterizer_accelerated.cpp index 53622ca05..c54b17670 100644 --- a/src/video_core/rasterizer_accelerated.cpp +++ b/src/video_core/rasterizer_accelerated.cpp @@ -2,64 +2,39 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. -#include - -#include -#include - #include "common/assert.h" #include "common/common_types.h" +#include "common/div_ceil.h" #include "core/memory.h" #include "video_core/rasterizer_accelerated.h" namespace VideoCore { -namespace { - -template -constexpr auto RangeFromInterval(Map& map, const Interval& interval) { - return boost::make_iterator_range(map.equal_range(interval)); -} - -} // Anonymous namespace - RasterizerAccelerated::RasterizerAccelerated(Core::Memory::Memory& cpu_memory_) : cpu_memory{cpu_memory_} {} RasterizerAccelerated::~RasterizerAccelerated() = default; void RasterizerAccelerated::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) { - std::lock_guard lock{pages_mutex}; - const u64 page_start{addr >> Core::Memory::PAGE_BITS}; - const u64 page_end{(addr + size + Core::Memory::PAGE_SIZE - 1) >> Core::Memory::PAGE_BITS}; + const auto page_end = Common::DivCeil(addr + size, Core::Memory::PAGE_SIZE); + for (auto page = addr >> Core::Memory::PAGE_BITS; page != page_end; ++page) { + auto& count = cached_pages.at(page >> 3).Count(page); - // Interval maps will erase segments if count reaches 0, so if delta is negative we have to - // subtract after iterating - const auto pages_interval = CachedPageMap::interval_type::right_open(page_start, page_end); - if (delta > 0) { - cached_pages.add({pages_interval, delta}); - } + ASSERT_MSG(count < UINT8_MAX, "Count may exceed UINT8_MAX!"); - for (const auto& pair : RangeFromInterval(cached_pages, pages_interval)) { - const auto interval = pair.first & pages_interval; - const int count = pair.second; + count += delta; - const VAddr interval_start_addr = boost::icl::first(interval) << Core::Memory::PAGE_BITS; - const VAddr interval_end_addr = boost::icl::last_next(interval) << Core::Memory::PAGE_BITS; - const u64 interval_size = interval_end_addr - interval_start_addr; - - if (delta > 0 && count == delta) { - cpu_memory.RasterizerMarkRegionCached(interval_start_addr, interval_size, true); - } else if (delta < 0 && count == -delta) { - cpu_memory.RasterizerMarkRegionCached(interval_start_addr, interval_size, false); + // Assume delta is either -1 or 1 + if (count == 0) { + cpu_memory.RasterizerMarkRegionCached(page << Core::Memory::PAGE_BITS, + Core::Memory::PAGE_SIZE, false); + } else if (count == 1 && delta > 0) { + cpu_memory.RasterizerMarkRegionCached(page << Core::Memory::PAGE_BITS, + Core::Memory::PAGE_SIZE, true); } else { ASSERT(count >= 0); } } - - if (delta < 0) { - cached_pages.add({pages_interval, delta}); - } } } // namespace VideoCore diff --git a/src/video_core/rasterizer_accelerated.h b/src/video_core/rasterizer_accelerated.h index 91866d7dd..9227a4adc 100644 --- a/src/video_core/rasterizer_accelerated.h +++ b/src/video_core/rasterizer_accelerated.h @@ -4,9 +4,8 @@ #pragma once -#include - -#include +#include +#include #include "common/common_types.h" #include "video_core/rasterizer_interface.h" @@ -26,10 +25,24 @@ public: void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) override; private: - using CachedPageMap = boost::icl::interval_map; - CachedPageMap cached_pages; - std::mutex pages_mutex; + class CacheEntry final { + public: + CacheEntry() = default; + std::atomic_uint8_t& Count(std::size_t page) { + return values[page & 7]; + } + + const std::atomic_uint8_t& Count(std::size_t page) const { + return values[page & 7]; + } + + private: + std::array values{}; + }; + static_assert(sizeof(CacheEntry) == 8, "CacheEntry should be 8 bytes!"); + + std::array cached_pages; Core::Memory::Memory& cpu_memory; }; From 6ab839462c2d109781320bc09579b1e370c16395 Mon Sep 17 00:00:00 2001 From: bunnei Date: Tue, 2 Mar 2021 17:44:02 -0800 Subject: [PATCH 2/4] video_core: rasterizer_accelerated: Improve error handling & fix implicit conversion. --- src/video_core/rasterizer_accelerated.cpp | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/video_core/rasterizer_accelerated.cpp b/src/video_core/rasterizer_accelerated.cpp index c54b17670..f684cd7e5 100644 --- a/src/video_core/rasterizer_accelerated.cpp +++ b/src/video_core/rasterizer_accelerated.cpp @@ -20,9 +20,15 @@ void RasterizerAccelerated::UpdatePagesCachedCount(VAddr addr, u64 size, int del for (auto page = addr >> Core::Memory::PAGE_BITS; page != page_end; ++page) { auto& count = cached_pages.at(page >> 3).Count(page); - ASSERT_MSG(count < UINT8_MAX, "Count may exceed UINT8_MAX!"); + if (delta < 0) { + ASSERT_MSG(count > 0, "Count may underflow!"); + } else if (delta > 0) { + ASSERT_MSG(count < UINT8_MAX, "Count may overflow!"); + } else { + ASSERT_MSG(true, "Delta must be non-zero!"); + } - count += delta; + count += static_cast(delta); // Assume delta is either -1 or 1 if (count == 0) { @@ -31,8 +37,6 @@ void RasterizerAccelerated::UpdatePagesCachedCount(VAddr addr, u64 size, int del } else if (count == 1 && delta > 0) { cpu_memory.RasterizerMarkRegionCached(page << Core::Memory::PAGE_BITS, Core::Memory::PAGE_SIZE, true); - } else { - ASSERT(count >= 0); } } } From 50ee9c46ab47da55ec6fb6976360ac9bd71c13d7 Mon Sep 17 00:00:00 2001 From: bunnei Date: Tue, 2 Mar 2021 17:48:02 -0800 Subject: [PATCH 3/4] video_core: rasterizer_accelerated: Fix delta check ordering. --- src/video_core/rasterizer_accelerated.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/video_core/rasterizer_accelerated.cpp b/src/video_core/rasterizer_accelerated.cpp index f684cd7e5..4fd57d7c7 100644 --- a/src/video_core/rasterizer_accelerated.cpp +++ b/src/video_core/rasterizer_accelerated.cpp @@ -20,10 +20,10 @@ void RasterizerAccelerated::UpdatePagesCachedCount(VAddr addr, u64 size, int del for (auto page = addr >> Core::Memory::PAGE_BITS; page != page_end; ++page) { auto& count = cached_pages.at(page >> 3).Count(page); - if (delta < 0) { - ASSERT_MSG(count > 0, "Count may underflow!"); - } else if (delta > 0) { + if (delta > 0) { ASSERT_MSG(count < UINT8_MAX, "Count may overflow!"); + } else if (delta < 0) { + ASSERT_MSG(count > 0, "Count may underflow!"); } else { ASSERT_MSG(true, "Delta must be non-zero!"); } From a9d24b0df36b7e3bb5e8f5e71f3b6a9a2485f44b Mon Sep 17 00:00:00 2001 From: bunnei Date: Fri, 12 Mar 2021 21:52:49 -0800 Subject: [PATCH 4/4] video_core: rasterizer_accelerated: Fix un/signed mismatch. --- src/video_core/rasterizer_accelerated.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/video_core/rasterizer_accelerated.cpp b/src/video_core/rasterizer_accelerated.cpp index 4fd57d7c7..62d84c0f8 100644 --- a/src/video_core/rasterizer_accelerated.cpp +++ b/src/video_core/rasterizer_accelerated.cpp @@ -28,7 +28,8 @@ void RasterizerAccelerated::UpdatePagesCachedCount(VAddr addr, u64 size, int del ASSERT_MSG(true, "Delta must be non-zero!"); } - count += static_cast(delta); + // Adds or subtracts 1, as count is a unsigned 8-bit value + count += static_cast(delta); // Assume delta is either -1 or 1 if (count == 0) {