From a4bfae1b551651e50f1484d6d488d3f598d274cf Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 30 Dec 2020 18:37:31 -0300 Subject: [PATCH 1/2] buffer_cache/buffer_base: Add a range tracking buffer container It keeps track of the modified CPU and GPU ranges on a CPU page granularity, notifying the given rasterizer about state changes in the tracking behavior of the buffer. Use a small vector optimization to store buffers smaller than 256 KiB locally instead of using free store memory allocations. --- src/video_core/CMakeLists.txt | 1 + src/video_core/buffer_cache/buffer_base.h | 495 ++++++++++++++++++++++ 2 files changed, 496 insertions(+) create mode 100644 src/video_core/buffer_cache/buffer_base.h diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index f7b9d7f86..c3d0f4c31 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -1,6 +1,7 @@ add_subdirectory(host_shaders) add_library(video_core STATIC + buffer_cache/buffer_base.h buffer_cache/buffer_block.h buffer_cache/buffer_cache.h buffer_cache/map_interval.cpp diff --git a/src/video_core/buffer_cache/buffer_base.h b/src/video_core/buffer_cache/buffer_base.h new file mode 100644 index 000000000..fd740c2c1 --- /dev/null +++ b/src/video_core/buffer_cache/buffer_base.h @@ -0,0 +1,495 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include +#include + +#include "common/alignment.h" +#include "common/common_funcs.h" +#include "common/common_types.h" +#include "common/div_ceil.h" +#include "core/memory.h" + +namespace VideoCommon { + +enum class BufferFlagBits { + Picked = 1 << 0, +}; +DECLARE_ENUM_FLAG_OPERATORS(BufferFlagBits) + +/// Tag for creating null buffers with no storage or size +struct NullBufferParams {}; + +/** + * Range tracking buffer container. + * + * It keeps track of the modified CPU and GPU ranges on a CPU page granularity, notifying the given + * rasterizer about state changes in the tracking behavior of the buffer. + * + * The buffer size and address is forcefully aligned to CPU page boundaries. + */ +template +class BufferBase { + static constexpr u64 PAGES_PER_WORD = 64; + static constexpr u64 BYTES_PER_PAGE = Core::Memory::PAGE_SIZE; + static constexpr u64 BYTES_PER_WORD = PAGES_PER_WORD * BYTES_PER_PAGE; + + /// Vector tracking modified pages tightly packed with small vector optimization + union WrittenWords { + /// Returns the pointer to the words state + [[nodiscard]] const u64* Pointer(bool is_short) const noexcept { + return is_short ? &stack : heap; + } + + /// Returns the pointer to the words state + [[nodiscard]] u64* Pointer(bool is_short) noexcept { + return is_short ? &stack : heap; + } + + u64 stack = 0; ///< Small buffers storage + u64* heap; ///< Not-small buffers pointer to the storage + }; + + struct GpuCpuWords { + explicit GpuCpuWords() = default; + explicit GpuCpuWords(u64 size_bytes_) : size_bytes{size_bytes_} { + if (IsShort()) { + cpu.stack = ~u64{0}; + gpu.stack = 0; + } else { + // Share allocation between CPU and GPU pages and set their default values + const size_t num_words = NumWords(); + u64* const alloc = new u64[num_words * 2]; + cpu.heap = alloc; + gpu.heap = alloc + num_words; + std::fill_n(cpu.heap, num_words, ~u64{0}); + std::fill_n(gpu.heap, num_words, 0); + } + // Clean up tailing bits + const u64 last_local_page = + Common::DivCeil(size_bytes % BYTES_PER_WORD, BYTES_PER_PAGE); + const u64 shift = (PAGES_PER_WORD - last_local_page) % PAGES_PER_WORD; + u64& last_word = cpu.Pointer(IsShort())[NumWords() - 1]; + last_word = (last_word << shift) >> shift; + } + + ~GpuCpuWords() { + Release(); + } + + GpuCpuWords& operator=(GpuCpuWords&& rhs) noexcept { + Release(); + size_bytes = rhs.size_bytes; + cpu = rhs.cpu; + gpu = rhs.gpu; + rhs.cpu.heap = nullptr; + return *this; + } + + GpuCpuWords(GpuCpuWords&& rhs) noexcept + : size_bytes{rhs.size_bytes}, cpu{rhs.cpu}, gpu{rhs.gpu} { + rhs.cpu.heap = nullptr; + } + + GpuCpuWords& operator=(const GpuCpuWords&) = delete; + GpuCpuWords(const GpuCpuWords&) = delete; + + /// Returns true when the buffer fits in the small vector optimization + [[nodiscard]] bool IsShort() const noexcept { + return size_bytes <= BYTES_PER_WORD; + } + + /// Returns the number of words of the buffer + [[nodiscard]] size_t NumWords() const noexcept { + return Common::DivCeil(size_bytes, BYTES_PER_WORD); + } + + /// Release buffer resources + void Release() { + if (!IsShort()) { + // CPU written words is the base for the heap allocation + delete[] cpu.heap; + } + } + + u64 size_bytes = 0; + WrittenWords cpu; + WrittenWords gpu; + }; + +public: + explicit BufferBase(RasterizerInterface& rasterizer_, VAddr cpu_addr_, u64 size_bytes) + : rasterizer{&rasterizer_}, cpu_addr{Common::AlignDown(cpu_addr_, BYTES_PER_PAGE)}, + words(Common::AlignUp(size_bytes + (cpu_addr_ - cpu_addr), BYTES_PER_PAGE)) {} + + explicit BufferBase(NullBufferParams) {} + + BufferBase& operator=(const BufferBase&) = delete; + BufferBase(const BufferBase&) = delete; + + /// Returns the inclusive CPU modified range in a begin end pair + [[nodiscard]] std::pair ModifiedCpuRegion(VAddr query_cpu_addr, + u64 query_size) const noexcept { + const u64 offset = query_cpu_addr - cpu_addr; + return ModifiedRegion(offset, query_size); + } + + /// Returns the inclusive GPU modified range in a begin end pair + [[nodiscard]] std::pair ModifiedGpuRegion(VAddr query_cpu_addr, + u64 query_size) const noexcept { + const u64 offset = query_cpu_addr - cpu_addr; + return ModifiedRegion(offset, query_size); + } + + /// Returns true if a region has been modified from the CPU + [[nodiscard]] bool IsRegionCpuModified(VAddr query_cpu_addr, u64 query_size) const noexcept { + const u64 offset = query_cpu_addr - cpu_addr; + return IsRegionModified(offset, query_size); + } + + /// Returns true if a region has been modified from the GPU + [[nodiscard]] bool IsRegionGpuModified(VAddr query_cpu_addr, u64 query_size) const noexcept { + const u64 offset = query_cpu_addr - cpu_addr; + return IsRegionModified(offset, query_size); + } + + /// Mark region as CPU modified, notifying the rasterizer about this change + void MarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 size) { + ChangeRegionState(words.cpu, dirty_cpu_addr, size); + } + + /// Unmark region as CPU modified, notifying the rasterizer about this change + void UnmarkRegionAsCpuModified(VAddr dirty_cpu_addr, u64 size) { + ChangeRegionState(words.cpu, dirty_cpu_addr, size); + } + + /// Mark region as modified from the host GPU + void MarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 size) noexcept { + ChangeRegionState(words.gpu, dirty_cpu_addr, size); + } + + /// Unmark region as modified from the host GPU + void UnmarkRegionAsGpuModified(VAddr dirty_cpu_addr, u64 size) noexcept { + ChangeRegionState(words.gpu, dirty_cpu_addr, size); + } + + /// Call 'func' for each CPU modified range and unmark those pages as CPU modified + template + void ForEachUploadRange(VAddr query_cpu_range, u64 size, Func&& func) { + ForEachModifiedRange(query_cpu_range, size, func); + } + + /// Call 'func' for each GPU modified range and unmark those pages as GPU modified + template + void ForEachDownloadRange(VAddr query_cpu_range, u64 size, Func&& func) { + ForEachModifiedRange(query_cpu_range, size, func); + } + + /// Call 'func' for each GPU modified range and unmark those pages as GPU modified + template + void ForEachDownloadRange(Func&& func) { + ForEachModifiedRange(cpu_addr, SizeBytes(), func); + } + + /// Mark buffer as picked + void Pick() noexcept { + flags |= BufferFlagBits::Picked; + } + + /// Unmark buffer as picked + void Unpick() noexcept { + flags &= ~BufferFlagBits::Picked; + } + + /// Returns true when vaddr -> vaddr+size is fully contained in the buffer + [[nodiscard]] bool IsInBounds(VAddr addr, u64 size) const noexcept { + return addr >= cpu_addr && addr + size <= cpu_addr + SizeBytes(); + } + + /// Returns true if the buffer has been marked as picked + [[nodiscard]] bool IsPicked() const noexcept { + return True(flags & BufferFlagBits::Picked); + } + + /// Returns the base CPU address of the buffer + [[nodiscard]] VAddr CpuAddr() const noexcept { + return cpu_addr; + } + + /// Returns the offset relative to the given CPU address + /// @pre IsInBounds returns true + [[nodiscard]] u32 Offset(VAddr other_cpu_addr) const noexcept { + return static_cast(other_cpu_addr - cpu_addr); + } + + /// Returns the size in bytes of the buffer + [[nodiscard]] u64 SizeBytes() const noexcept { + return words.size_bytes; + } + +private: + /** + * Change the state of a range of pages + * + * @param written_words Pages to be marked or unmarked as modified + * @param dirty_addr Base address to mark or unmark as modified + * @param size Size in bytes to mark or unmark as modified + * + * @tparam enable True when the bits will be set to one, false for zero + * @tparam notify_rasterizer True when the rasterizer has to be notified about the changes + */ + template + void ChangeRegionState(WrittenWords& written_words, u64 dirty_addr, + s64 size) noexcept(!notify_rasterizer) { + const s64 difference = dirty_addr - cpu_addr; + const u64 offset = std::max(difference, 0); + size += std::min(difference, 0); + if (offset >= SizeBytes() || size < 0) { + return; + } + u64* const state_words = written_words.Pointer(IsShort()); + const u64 offset_end = std::min(offset + size, SizeBytes()); + const u64 begin_page_index = offset / BYTES_PER_PAGE; + const u64 begin_word_index = begin_page_index / PAGES_PER_WORD; + const u64 end_page_index = Common::DivCeil(offset_end, BYTES_PER_PAGE); + const u64 end_word_index = Common::DivCeil(end_page_index, PAGES_PER_WORD); + u64 page_index = begin_page_index % PAGES_PER_WORD; + u64 word_index = begin_word_index; + while (word_index < end_word_index) { + const u64 next_word_first_page = (word_index + 1) * PAGES_PER_WORD; + const u64 left_offset = + std::min(next_word_first_page - end_page_index, PAGES_PER_WORD) % PAGES_PER_WORD; + const u64 right_offset = page_index; + u64 bits = ~u64{0}; + bits = (bits >> right_offset) << right_offset; + bits = (bits << left_offset) >> left_offset; + if constexpr (notify_rasterizer) { + NotifyRasterizer(word_index, state_words[word_index], bits); + } + if constexpr (enable) { + state_words[word_index] |= bits; + } else { + state_words[word_index] &= ~bits; + } + page_index = 0; + ++word_index; + } + } + + /** + * Notify rasterizer about changes in the CPU tracking state of a word in the buffer + * + * @param word_index Index to the word to notify to the rasterizer + * @param current_bits Current state of the word + * @param new_bits New state of the word + * + * @tparam add_to_rasterizer True when the rasterizer should start tracking the new pages + */ + template + void NotifyRasterizer(u64 word_index, u64 current_bits, u64 new_bits) { + u64 changed_bits = (add_to_rasterizer ? current_bits : ~current_bits) & new_bits; + VAddr addr = cpu_addr + word_index * BYTES_PER_WORD; + while (changed_bits != 0) { + const int empty_bits = std::countr_zero(changed_bits); + addr += empty_bits * BYTES_PER_PAGE; + changed_bits >>= empty_bits; + + const u32 continuous_bits = std::countr_one(changed_bits); + const u64 size = continuous_bits * BYTES_PER_PAGE; + const VAddr begin_addr = addr; + addr += size; + changed_bits = continuous_bits < PAGES_PER_WORD ? (changed_bits >> continuous_bits) : 0; + rasterizer->UpdatePagesCachedCount(begin_addr, size, add_to_rasterizer ? 1 : -1); + } + } + + /** + * Loop over each page in the given range, turn off those bits and notify the rasterizer if + * needed. Call the given function on each turned off range. + * + * @param query_cpu_range Base CPU address to loop over + * @param size Size in bytes of the CPU range to loop over + * @param func Function to call for each turned off region + * + * @tparam gpu True for host GPU pages, false for CPU pages + * @tparam notify_rasterizer True when the rasterizer should be notified about state changes + */ + template + void ForEachModifiedRange(VAddr query_cpu_range, s64 size, Func&& func) { + const s64 difference = query_cpu_range - cpu_addr; + const u64 query_begin = std::max(difference, 0); + size += std::min(difference, 0); + if (query_begin >= SizeBytes() || size < 0) { + return; + } + const u64* const cpu_words = words.cpu.Pointer(IsShort()); + const u64 query_end = query_begin + std::min(static_cast(size), SizeBytes()); + u64* const state_words = (gpu ? words.gpu : words.cpu).Pointer(IsShort()); + u64* const words_begin = state_words + query_begin / BYTES_PER_WORD; + u64* const words_end = state_words + Common::DivCeil(query_end, BYTES_PER_WORD); + + const auto modified = [](u64 word) { return word != 0; }; + const auto first_modified_word = std::find_if(words_begin, words_end, modified); + if (first_modified_word == words_end) { + // Exit early when the buffer is not modified + return; + } + const auto last_modified_word = std::find_if_not(first_modified_word, words_end, modified); + + const u64 word_index_begin = std::distance(state_words, first_modified_word); + const u64 word_index_end = std::distance(state_words, last_modified_word); + + const unsigned local_page_begin = std::countr_zero(*first_modified_word); + const unsigned local_page_end = PAGES_PER_WORD - std::countl_zero(last_modified_word[-1]); + const u64 word_page_begin = word_index_begin * PAGES_PER_WORD; + const u64 word_page_end = (word_index_end - 1) * PAGES_PER_WORD; + const u64 query_page_begin = query_begin / BYTES_PER_PAGE; + const u64 query_page_end = Common::DivCeil(query_end, BYTES_PER_PAGE); + const u64 page_index_begin = std::max(word_page_begin + local_page_begin, query_page_begin); + const u64 page_index_end = std::min(word_page_end + local_page_end, query_page_end); + const u64 first_word_page_begin = page_index_begin % PAGES_PER_WORD; + const u64 last_word_page_end = (page_index_end - 1) % PAGES_PER_WORD + 1; + + u64 page_begin = first_word_page_begin; + u64 current_base = 0; + u64 current_size = 0; + bool on_going = false; + for (u64 word_index = word_index_begin; word_index < word_index_end; ++word_index) { + const bool is_last_word = word_index + 1 == word_index_end; + const u64 page_end = is_last_word ? last_word_page_end : PAGES_PER_WORD; + const u64 right_offset = page_begin; + const u64 left_offset = PAGES_PER_WORD - page_end; + u64 bits = ~u64{0}; + bits = (bits >> right_offset) << right_offset; + bits = (bits << left_offset) >> left_offset; + + const u64 current_word = state_words[word_index] & bits; + state_words[word_index] &= ~bits; + + // Exclude CPU modified pages when visiting GPU pages + const u64 word = current_word & ~(gpu ? cpu_words[word_index] : 0); + if constexpr (notify_rasterizer) { + NotifyRasterizer(word_index, word, ~u64{0}); + } + u64 page = page_begin; + page_begin = 0; + + while (page < page_end) { + const int empty_bits = std::countr_zero(word >> page); + if (on_going && empty_bits != 0) { + InvokeModifiedRange(func, current_size, current_base); + current_size = 0; + on_going = false; + } + page += empty_bits; + + const int continuous_bits = std::countr_one(word >> page); + if (!on_going && continuous_bits != 0) { + current_base = word_index * PAGES_PER_WORD + page; + on_going = true; + } + current_size += continuous_bits; + page += continuous_bits; + } + } + if (on_going && current_size > 0) { + InvokeModifiedRange(func, current_size, current_base); + } + } + + template + void InvokeModifiedRange(Func&& func, u64 current_size, u64 current_base) { + const u64 current_size_bytes = current_size * BYTES_PER_PAGE; + const u64 offset_begin = current_base * BYTES_PER_PAGE; + const u64 offset_end = std::min(offset_begin + current_size_bytes, SizeBytes()); + func(offset_begin, offset_end - offset_begin); + } + + /** + * Returns true when a region has been modified + * + * @param offset Offset in bytes from the start of the buffer + * @param size Size in bytes of the region to query for modifications + */ + template + [[nodiscard]] bool IsRegionModified(u64 offset, u64 size) const noexcept { + const u64* const cpu_words = words.cpu.Pointer(IsShort()); + const u64* const state_words = (gpu ? words.gpu : words.cpu).Pointer(IsShort()); + const u64 num_query_words = size / BYTES_PER_WORD + 1; + const u64 word_begin = offset / BYTES_PER_WORD; + const u64 word_end = std::min(word_begin + num_query_words, NumWords()); + const u64 page_limit = Common::DivCeil(offset + size, BYTES_PER_PAGE); + u64 page_index = (offset / BYTES_PER_PAGE) % PAGES_PER_WORD; + for (u64 word_index = word_begin; word_index < word_end; ++word_index, page_index = 0) { + const u64 word = state_words[word_index] & ~(gpu ? cpu_words[word_index] : 0); + if (word == 0) { + continue; + } + const u64 page_end = std::min((word_index + 1) * PAGES_PER_WORD, page_limit); + const u64 local_page_end = page_end % PAGES_PER_WORD; + const u64 page_end_shift = (PAGES_PER_WORD - local_page_end) % PAGES_PER_WORD; + if (((word >> page_index) << page_index) << page_end_shift != 0) { + return true; + } + } + return false; + } + + /** + * Returns a begin end pair with the inclusive modified region + * + * @param offset Offset in bytes from the start of the buffer + * @param size Size in bytes of the region to query for modifications + * + * @tparam True to query GPU modified pages, false for CPU pages + */ + template + [[nodiscard]] std::pair ModifiedRegion(u64 offset, u64 size) const noexcept { + const u64* const cpu_words = words.cpu.Pointer(IsShort()); + const u64* const state_words = (gpu ? words.gpu : words.cpu).Pointer(IsShort()); + const u64 num_query_words = size / BYTES_PER_WORD + 1; + const u64 word_begin = offset / BYTES_PER_WORD; + const u64 word_end = std::min(word_begin + num_query_words, NumWords()); + const u64 page_base = offset / BYTES_PER_PAGE; + const u64 page_limit = Common::DivCeil(offset + size, BYTES_PER_PAGE); + u64 begin = std::numeric_limits::max(); + u64 end = 0; + for (u64 word_index = word_begin; word_index < word_end; ++word_index) { + const u64 word = state_words[word_index] & ~(gpu ? cpu_words[word_index] : 0); + if (word == 0) { + continue; + } + const u64 local_page_begin = std::countr_zero(word); + const u64 local_page_end = PAGES_PER_WORD - std::countl_zero(word); + const u64 page_index = word_index * PAGES_PER_WORD; + const u64 page_begin = std::max(page_index + local_page_begin, page_base); + const u64 page_end = std::min(page_index + local_page_end, page_limit); + begin = std::min(begin, page_begin); + end = std::max(end, page_end); + } + static constexpr std::pair EMPTY{0, 0}; + return begin < end ? std::make_pair(begin * BYTES_PER_PAGE, end * BYTES_PER_PAGE) : EMPTY; + } + + /// Returns the number of words of the buffer + [[nodiscard]] size_t NumWords() const noexcept { + return words.NumWords(); + } + + /// Returns true when the buffer fits in the small vector optimization + [[nodiscard]] bool IsShort() const noexcept { + return words.IsShort(); + } + + RasterizerInterface* rasterizer = nullptr; + VAddr cpu_addr = 0; + GpuCpuWords words; + BufferFlagBits flags{}; +}; + +} // namespace VideoCommon From 7bd603061c81e1088448d51853b6f721a932d31c Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 30 Dec 2020 18:42:27 -0300 Subject: [PATCH 2/2] tests: Add unit tests for the GPU range tracking buffer container Due to how error prone the container design is, this commit adds unit tests for it. Some tests taken from here are based on bugs from using this buffer container in games, so if we ever break it in the future in a way that might harm games, the tests should fail. --- src/tests/CMakeLists.txt | 1 + src/tests/video_core/buffer_base.cpp | 473 +++++++++++++++++++++++++++ 2 files changed, 474 insertions(+) create mode 100644 src/tests/video_core/buffer_base.cpp diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt index 8a606b448..5b637f3c5 100644 --- a/src/tests/CMakeLists.txt +++ b/src/tests/CMakeLists.txt @@ -6,6 +6,7 @@ add_executable(tests common/ring_buffer.cpp core/core_timing.cpp tests.cpp + video_core/buffer_base.cpp ) create_target_directory_groups(tests) diff --git a/src/tests/video_core/buffer_base.cpp b/src/tests/video_core/buffer_base.cpp new file mode 100644 index 000000000..651633e9e --- /dev/null +++ b/src/tests/video_core/buffer_base.cpp @@ -0,0 +1,473 @@ +// Copyright 2020 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include + +#include + +#include "common/alignment.h" +#include "common/common_types.h" +#include "video_core/buffer_cache/buffer_base.h" + +namespace { +using VideoCommon::BufferBase; +using Range = std::pair; + +constexpr u64 PAGE = 4096; +constexpr u64 WORD = 4096 * 64; + +constexpr VAddr c = 0x1328914000; + +class RasterizerInterface { +public: + void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) { + const u64 page_start{addr >> Core::Memory::PAGE_BITS}; + const u64 page_end{(addr + size + Core::Memory::PAGE_SIZE - 1) >> Core::Memory::PAGE_BITS}; + for (u64 page = page_start; page < page_end; ++page) { + int& value = page_table[page]; + value += delta; + if (value < 0) { + throw std::logic_error{"negative page"}; + } + if (value == 0) { + page_table.erase(page); + } + } + } + + [[nodiscard]] int Count(VAddr addr) const noexcept { + const auto it = page_table.find(addr >> Core::Memory::PAGE_BITS); + return it == page_table.end() ? 0 : it->second; + } + + [[nodiscard]] unsigned Count() const noexcept { + unsigned count = 0; + for (const auto [index, value] : page_table) { + count += value; + } + return count; + } + +private: + std::unordered_map page_table; +}; +} // Anonymous namespace + +TEST_CASE("BufferBase: Small buffer", "[video_core]") { + RasterizerInterface rasterizer; + BufferBase buffer(rasterizer, c, WORD); + REQUIRE(rasterizer.Count() == 0); + buffer.UnmarkRegionAsCpuModified(c, WORD); + REQUIRE(rasterizer.Count() == WORD / PAGE); + REQUIRE(buffer.ModifiedCpuRegion(c, WORD) == Range{0, 0}); + + buffer.MarkRegionAsCpuModified(c + PAGE, 1); + REQUIRE(buffer.ModifiedCpuRegion(c, WORD) == Range{PAGE * 1, PAGE * 2}); +} + +TEST_CASE("BufferBase: Large buffer", "[video_core]") { + RasterizerInterface rasterizer; + BufferBase buffer(rasterizer, c, WORD * 32); + buffer.UnmarkRegionAsCpuModified(c, WORD * 32); + buffer.MarkRegionAsCpuModified(c + 4096, WORD * 4); + REQUIRE(buffer.ModifiedCpuRegion(c, WORD + PAGE * 2) == Range{PAGE, WORD + PAGE * 2}); + REQUIRE(buffer.ModifiedCpuRegion(c + PAGE * 2, PAGE * 6) == Range{PAGE * 2, PAGE * 8}); + REQUIRE(buffer.ModifiedCpuRegion(c, WORD * 32) == Range{PAGE, WORD * 4 + PAGE}); + REQUIRE(buffer.ModifiedCpuRegion(c + WORD * 4, PAGE) == Range{WORD * 4, WORD * 4 + PAGE}); + REQUIRE(buffer.ModifiedCpuRegion(c + WORD * 3 + PAGE * 63, PAGE) == + Range{WORD * 3 + PAGE * 63, WORD * 4}); + + buffer.MarkRegionAsCpuModified(c + WORD * 5 + PAGE * 6, PAGE); + buffer.MarkRegionAsCpuModified(c + WORD * 5 + PAGE * 8, PAGE); + REQUIRE(buffer.ModifiedCpuRegion(c + WORD * 5, WORD) == + Range{WORD * 5 + PAGE * 6, WORD * 5 + PAGE * 9}); + + buffer.UnmarkRegionAsCpuModified(c + WORD * 5 + PAGE * 8, PAGE); + REQUIRE(buffer.ModifiedCpuRegion(c + WORD * 5, WORD) == + Range{WORD * 5 + PAGE * 6, WORD * 5 + PAGE * 7}); + + buffer.MarkRegionAsCpuModified(c + PAGE, WORD * 31 + PAGE * 63); + REQUIRE(buffer.ModifiedCpuRegion(c, WORD * 32) == Range{PAGE, WORD * 32}); + + buffer.UnmarkRegionAsCpuModified(c + PAGE * 4, PAGE); + buffer.UnmarkRegionAsCpuModified(c + PAGE * 6, PAGE); + + buffer.UnmarkRegionAsCpuModified(c, WORD * 32); + REQUIRE(buffer.ModifiedCpuRegion(c, WORD * 32) == Range{0, 0}); +} + +TEST_CASE("BufferBase: Rasterizer counting", "[video_core]") { + RasterizerInterface rasterizer; + BufferBase buffer(rasterizer, c, PAGE * 2); + REQUIRE(rasterizer.Count() == 0); + buffer.UnmarkRegionAsCpuModified(c, PAGE); + REQUIRE(rasterizer.Count() == 1); + buffer.MarkRegionAsCpuModified(c, PAGE * 2); + REQUIRE(rasterizer.Count() == 0); + buffer.UnmarkRegionAsCpuModified(c, PAGE); + buffer.UnmarkRegionAsCpuModified(c + PAGE, PAGE); + REQUIRE(rasterizer.Count() == 2); + buffer.MarkRegionAsCpuModified(c, PAGE * 2); + REQUIRE(rasterizer.Count() == 0); +} + +TEST_CASE("BufferBase: Basic range", "[video_core]") { + RasterizerInterface rasterizer; + BufferBase buffer(rasterizer, c, WORD); + buffer.UnmarkRegionAsCpuModified(c, WORD); + buffer.MarkRegionAsCpuModified(c, PAGE); + int num = 0; + buffer.ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { + REQUIRE(offset == 0U); + REQUIRE(size == PAGE); + ++num; + }); + REQUIRE(num == 1U); +} + +TEST_CASE("BufferBase: Border upload", "[video_core]") { + RasterizerInterface rasterizer; + BufferBase buffer(rasterizer, c, WORD * 2); + buffer.UnmarkRegionAsCpuModified(c, WORD * 2); + buffer.MarkRegionAsCpuModified(c + WORD - PAGE, PAGE * 2); + buffer.ForEachUploadRange(c, WORD * 2, [](u64 offset, u64 size) { + REQUIRE(offset == WORD - PAGE); + REQUIRE(size == PAGE * 2); + }); +} + +TEST_CASE("BufferBase: Border upload range", "[video_core]") { + RasterizerInterface rasterizer; + BufferBase buffer(rasterizer, c, WORD * 2); + buffer.UnmarkRegionAsCpuModified(c, WORD * 2); + buffer.MarkRegionAsCpuModified(c + WORD - PAGE, PAGE * 2); + buffer.ForEachUploadRange(c + WORD - PAGE, PAGE * 2, [](u64 offset, u64 size) { + REQUIRE(offset == WORD - PAGE); + REQUIRE(size == PAGE * 2); + }); + buffer.MarkRegionAsCpuModified(c + WORD - PAGE, PAGE * 2); + buffer.ForEachUploadRange(c + WORD - PAGE, PAGE, [](u64 offset, u64 size) { + REQUIRE(offset == WORD - PAGE); + REQUIRE(size == PAGE); + }); + buffer.ForEachUploadRange(c + WORD, PAGE, [](u64 offset, u64 size) { + REQUIRE(offset == WORD); + REQUIRE(size == PAGE); + }); +} + +TEST_CASE("BufferBase: Border upload partial range", "[video_core]") { + RasterizerInterface rasterizer; + BufferBase buffer(rasterizer, c, WORD * 2); + buffer.UnmarkRegionAsCpuModified(c, WORD * 2); + buffer.MarkRegionAsCpuModified(c + WORD - PAGE, PAGE * 2); + buffer.ForEachUploadRange(c + WORD - 1, 2, [](u64 offset, u64 size) { + REQUIRE(offset == WORD - PAGE); + REQUIRE(size == PAGE * 2); + }); + buffer.MarkRegionAsCpuModified(c + WORD - PAGE, PAGE * 2); + buffer.ForEachUploadRange(c + WORD - 1, 1, [](u64 offset, u64 size) { + REQUIRE(offset == WORD - PAGE); + REQUIRE(size == PAGE); + }); + buffer.ForEachUploadRange(c + WORD + 50, 1, [](u64 offset, u64 size) { + REQUIRE(offset == WORD); + REQUIRE(size == PAGE); + }); +} + +TEST_CASE("BufferBase: Partial word uploads", "[video_core]") { + RasterizerInterface rasterizer; + BufferBase buffer(rasterizer, c, 0x9d000); + int num = 0; + buffer.ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { + REQUIRE(offset == 0U); + REQUIRE(size == WORD); + ++num; + }); + REQUIRE(num == 1); + buffer.ForEachUploadRange(c + WORD, WORD, [&](u64 offset, u64 size) { + REQUIRE(offset == WORD); + REQUIRE(size == WORD); + ++num; + }); + REQUIRE(num == 2); + buffer.ForEachUploadRange(c + 0x79000, 0x24000, [&](u64 offset, u64 size) { + REQUIRE(offset == WORD * 2); + REQUIRE(size == PAGE * 0x1d); + ++num; + }); + REQUIRE(num == 3); +} + +TEST_CASE("BufferBase: Partial page upload", "[video_core]") { + RasterizerInterface rasterizer; + BufferBase buffer(rasterizer, c, WORD); + buffer.UnmarkRegionAsCpuModified(c, WORD); + int num = 0; + buffer.MarkRegionAsCpuModified(c + PAGE * 2, PAGE); + buffer.MarkRegionAsCpuModified(c + PAGE * 9, PAGE); + buffer.ForEachUploadRange(c, PAGE * 3, [&](u64 offset, u64 size) { + REQUIRE(offset == PAGE * 2); + REQUIRE(size == PAGE); + ++num; + }); + REQUIRE(num == 1); + buffer.ForEachUploadRange(c + PAGE * 7, PAGE * 3, [&](u64 offset, u64 size) { + REQUIRE(offset == PAGE * 9); + REQUIRE(size == PAGE); + ++num; + }); + REQUIRE(num == 2); +} + +TEST_CASE("BufferBase: Partial page upload with multiple words on the right") { + RasterizerInterface rasterizer; + BufferBase buffer(rasterizer, c, WORD * 8); + buffer.UnmarkRegionAsCpuModified(c, WORD * 8); + buffer.MarkRegionAsCpuModified(c + PAGE * 13, WORD * 7); + int num = 0; + buffer.ForEachUploadRange(c + PAGE * 10, WORD * 7, [&](u64 offset, u64 size) { + REQUIRE(offset == PAGE * 13); + REQUIRE(size == WORD * 7 - PAGE * 3); + ++num; + }); + REQUIRE(num == 1); + buffer.ForEachUploadRange(c + PAGE, WORD * 8, [&](u64 offset, u64 size) { + REQUIRE(offset == WORD * 7 + PAGE * 10); + REQUIRE(size == PAGE * 3); + ++num; + }); + REQUIRE(num == 2); +} + +TEST_CASE("BufferBase: Partial page upload with multiple words on the left", "[video_core]") { + RasterizerInterface rasterizer; + BufferBase buffer(rasterizer, c, WORD * 8); + buffer.UnmarkRegionAsCpuModified(c, WORD * 8); + buffer.MarkRegionAsCpuModified(c + PAGE * 13, WORD * 7); + int num = 0; + buffer.ForEachUploadRange(c + PAGE * 16, WORD * 7, [&](u64 offset, u64 size) { + REQUIRE(offset == PAGE * 16); + REQUIRE(size == WORD * 7 - PAGE * 3); + ++num; + }); + REQUIRE(num == 1); + buffer.ForEachUploadRange(c + PAGE, WORD, [&](u64 offset, u64 size) { + REQUIRE(offset == PAGE * 13); + REQUIRE(size == PAGE * 3); + ++num; + }); + REQUIRE(num == 2); +} + +TEST_CASE("BufferBase: Partial page upload with multiple words in the middle", "[video_core]") { + RasterizerInterface rasterizer; + BufferBase buffer(rasterizer, c, WORD * 8); + buffer.UnmarkRegionAsCpuModified(c, WORD * 8); + buffer.MarkRegionAsCpuModified(c + PAGE * 13, PAGE * 140); + int num = 0; + buffer.ForEachUploadRange(c + PAGE * 16, WORD, [&](u64 offset, u64 size) { + REQUIRE(offset == PAGE * 16); + REQUIRE(size == WORD); + ++num; + }); + REQUIRE(num == 1); + buffer.ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { + REQUIRE(offset == PAGE * 13); + REQUIRE(size == PAGE * 3); + ++num; + }); + REQUIRE(num == 2); + buffer.ForEachUploadRange(c, WORD * 8, [&](u64 offset, u64 size) { + REQUIRE(offset == WORD + PAGE * 16); + REQUIRE(size == PAGE * 73); + ++num; + }); + REQUIRE(num == 3); +} + +TEST_CASE("BufferBase: Empty right bits", "[video_core]") { + RasterizerInterface rasterizer; + BufferBase buffer(rasterizer, c, WORD * 2048); + buffer.UnmarkRegionAsCpuModified(c, WORD * 2048); + buffer.MarkRegionAsCpuModified(c + WORD - PAGE, PAGE * 2); + buffer.ForEachUploadRange(c, WORD * 2048, [](u64 offset, u64 size) { + REQUIRE(offset == WORD - PAGE); + REQUIRE(size == PAGE * 2); + }); +} + +TEST_CASE("BufferBase: Out of bound ranges 1", "[video_core]") { + RasterizerInterface rasterizer; + BufferBase buffer(rasterizer, c, WORD); + buffer.UnmarkRegionAsCpuModified(c, WORD); + buffer.MarkRegionAsCpuModified(c, PAGE); + int num = 0; + buffer.ForEachUploadRange(c - WORD, WORD, [&](u64 offset, u64 size) { ++num; }); + buffer.ForEachUploadRange(c + WORD, WORD, [&](u64 offset, u64 size) { ++num; }); + buffer.ForEachUploadRange(c - PAGE, PAGE, [&](u64 offset, u64 size) { ++num; }); + REQUIRE(num == 0); + buffer.ForEachUploadRange(c - PAGE, PAGE * 2, [&](u64 offset, u64 size) { ++num; }); + REQUIRE(num == 1); + buffer.MarkRegionAsCpuModified(c, WORD); + REQUIRE(rasterizer.Count() == 0); +} + +TEST_CASE("BufferBase: Out of bound ranges 2", "[video_core]") { + RasterizerInterface rasterizer; + BufferBase buffer(rasterizer, c, 0x22000); + REQUIRE_NOTHROW(buffer.UnmarkRegionAsCpuModified(c + 0x22000, PAGE)); + REQUIRE_NOTHROW(buffer.UnmarkRegionAsCpuModified(c + 0x28000, PAGE)); + REQUIRE(rasterizer.Count() == 0); + REQUIRE_NOTHROW(buffer.UnmarkRegionAsCpuModified(c + 0x21100, PAGE - 0x100)); + REQUIRE(rasterizer.Count() == 1); + REQUIRE_NOTHROW(buffer.UnmarkRegionAsCpuModified(c - 0x1000, PAGE * 2)); + buffer.UnmarkRegionAsCpuModified(c - 0x3000, PAGE * 2); + buffer.UnmarkRegionAsCpuModified(c - 0x2000, PAGE * 2); + REQUIRE(rasterizer.Count() == 2); +} + +TEST_CASE("BufferBase: Out of bound ranges 3", "[video_core]") { + RasterizerInterface rasterizer; + BufferBase buffer(rasterizer, c, 0x310720); + buffer.UnmarkRegionAsCpuModified(c, 0x310720); + REQUIRE(rasterizer.Count(c) == 1); + REQUIRE(rasterizer.Count(c + PAGE) == 1); + REQUIRE(rasterizer.Count(c + WORD) == 1); + REQUIRE(rasterizer.Count(c + WORD + PAGE) == 1); +} + +TEST_CASE("BufferBase: Sparse regions 1", "[video_core]") { + RasterizerInterface rasterizer; + BufferBase buffer(rasterizer, c, WORD); + buffer.UnmarkRegionAsCpuModified(c, WORD); + buffer.MarkRegionAsCpuModified(c + PAGE * 1, PAGE); + buffer.MarkRegionAsCpuModified(c + PAGE * 3, PAGE * 4); + buffer.ForEachUploadRange(c, WORD, [i = 0](u64 offset, u64 size) mutable { + static constexpr std::array offsets{PAGE, PAGE * 3}; + static constexpr std::array sizes{PAGE, PAGE * 4}; + REQUIRE(offset == offsets.at(i)); + REQUIRE(size == sizes.at(i)); + ++i; + }); +} + +TEST_CASE("BufferBase: Sparse regions 2", "[video_core]") { + RasterizerInterface rasterizer; + BufferBase buffer(rasterizer, c, 0x22000); + buffer.UnmarkRegionAsCpuModified(c, 0x22000); + REQUIRE(rasterizer.Count() == 0x22); + buffer.MarkRegionAsCpuModified(c + PAGE * 0x1B, PAGE); + buffer.MarkRegionAsCpuModified(c + PAGE * 0x21, PAGE); + buffer.ForEachUploadRange(c, WORD, [i = 0](u64 offset, u64 size) mutable { + static constexpr std::array offsets{PAGE * 0x1B, PAGE * 0x21}; + static constexpr std::array sizes{PAGE, PAGE}; + REQUIRE(offset == offsets.at(i)); + REQUIRE(size == sizes.at(i)); + ++i; + }); +} + +TEST_CASE("BufferBase: Single page modified range", "[video_core]") { + RasterizerInterface rasterizer; + BufferBase buffer(rasterizer, c, PAGE); + REQUIRE(buffer.IsRegionCpuModified(c, PAGE)); + buffer.UnmarkRegionAsCpuModified(c, PAGE); + REQUIRE(!buffer.IsRegionCpuModified(c, PAGE)); +} + +TEST_CASE("BufferBase: Two page modified range", "[video_core]") { + RasterizerInterface rasterizer; + BufferBase buffer(rasterizer, c, PAGE * 2); + REQUIRE(buffer.IsRegionCpuModified(c, PAGE)); + REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE)); + REQUIRE(buffer.IsRegionCpuModified(c, PAGE * 2)); + buffer.UnmarkRegionAsCpuModified(c, PAGE); + REQUIRE(!buffer.IsRegionCpuModified(c, PAGE)); +} + +TEST_CASE("BufferBase: Multi word modified ranges", "[video_core]") { + for (int offset = 0; offset < 4; ++offset) { + const VAddr address = c + WORD * offset; + RasterizerInterface rasterizer; + BufferBase buffer(rasterizer, address, WORD * 4); + REQUIRE(buffer.IsRegionCpuModified(address, PAGE)); + REQUIRE(buffer.IsRegionCpuModified(address + PAGE * 48, PAGE)); + REQUIRE(buffer.IsRegionCpuModified(address + PAGE * 56, PAGE)); + + buffer.UnmarkRegionAsCpuModified(address + PAGE * 32, PAGE); + REQUIRE(buffer.IsRegionCpuModified(address + PAGE, WORD)); + REQUIRE(buffer.IsRegionCpuModified(address + PAGE * 31, PAGE)); + REQUIRE(!buffer.IsRegionCpuModified(address + PAGE * 32, PAGE)); + REQUIRE(buffer.IsRegionCpuModified(address + PAGE * 33, PAGE)); + REQUIRE(buffer.IsRegionCpuModified(address + PAGE * 31, PAGE * 2)); + REQUIRE(buffer.IsRegionCpuModified(address + PAGE * 32, PAGE * 2)); + + buffer.UnmarkRegionAsCpuModified(address + PAGE * 33, PAGE); + REQUIRE(!buffer.IsRegionCpuModified(address + PAGE * 32, PAGE * 2)); + } +} + +TEST_CASE("BufferBase: Single page in large buffer", "[video_core]") { + RasterizerInterface rasterizer; + BufferBase buffer(rasterizer, c, WORD * 16); + buffer.UnmarkRegionAsCpuModified(c, WORD * 16); + REQUIRE(!buffer.IsRegionCpuModified(c, WORD * 16)); + + buffer.MarkRegionAsCpuModified(c + WORD * 12 + PAGE * 8, PAGE); + REQUIRE(buffer.IsRegionCpuModified(c, WORD * 16)); + REQUIRE(buffer.IsRegionCpuModified(c + WORD * 10, WORD * 2)); + REQUIRE(buffer.IsRegionCpuModified(c + WORD * 11, WORD * 2)); + REQUIRE(buffer.IsRegionCpuModified(c + WORD * 12, WORD * 2)); + REQUIRE(buffer.IsRegionCpuModified(c + WORD * 12 + PAGE * 4, PAGE * 8)); + REQUIRE(buffer.IsRegionCpuModified(c + WORD * 12 + PAGE * 6, PAGE * 8)); + REQUIRE(!buffer.IsRegionCpuModified(c + WORD * 12 + PAGE * 6, PAGE)); + REQUIRE(buffer.IsRegionCpuModified(c + WORD * 12 + PAGE * 7, PAGE * 2)); + REQUIRE(buffer.IsRegionCpuModified(c + WORD * 12 + PAGE * 8, PAGE * 2)); +} + +TEST_CASE("BufferBase: Out of bounds region query") { + RasterizerInterface rasterizer; + BufferBase buffer(rasterizer, c, WORD * 16); + REQUIRE(!buffer.IsRegionCpuModified(c - PAGE, PAGE)); + REQUIRE(!buffer.IsRegionCpuModified(c - PAGE * 2, PAGE)); + REQUIRE(!buffer.IsRegionCpuModified(c + WORD * 16, PAGE)); + REQUIRE(buffer.IsRegionCpuModified(c + WORD * 16 - PAGE, WORD * 64)); + REQUIRE(!buffer.IsRegionCpuModified(c + WORD * 16, WORD * 64)); +} + +TEST_CASE("BufferBase: Wrap word regions") { + RasterizerInterface rasterizer; + BufferBase buffer(rasterizer, c, WORD * 2); + buffer.UnmarkRegionAsCpuModified(c, WORD * 2); + buffer.MarkRegionAsCpuModified(c + PAGE * 63, PAGE * 2); + REQUIRE(buffer.IsRegionCpuModified(c, WORD * 2)); + REQUIRE(!buffer.IsRegionCpuModified(c + PAGE * 62, PAGE)); + REQUIRE(buffer.IsRegionCpuModified(c + PAGE * 63, PAGE)); + REQUIRE(buffer.IsRegionCpuModified(c + PAGE * 64, PAGE)); + REQUIRE(buffer.IsRegionCpuModified(c + PAGE * 63, PAGE * 2)); + REQUIRE(buffer.IsRegionCpuModified(c + PAGE * 63, PAGE * 8)); + REQUIRE(buffer.IsRegionCpuModified(c + PAGE * 60, PAGE * 8)); + + REQUIRE(!buffer.IsRegionCpuModified(c + PAGE * 127, WORD * 16)); + buffer.MarkRegionAsCpuModified(c + PAGE * 127, PAGE); + REQUIRE(buffer.IsRegionCpuModified(c + PAGE * 127, WORD * 16)); + REQUIRE(buffer.IsRegionCpuModified(c + PAGE * 127, PAGE)); + REQUIRE(!buffer.IsRegionCpuModified(c + PAGE * 126, PAGE)); + REQUIRE(buffer.IsRegionCpuModified(c + PAGE * 126, PAGE * 2)); + REQUIRE(!buffer.IsRegionCpuModified(c + PAGE * 128, WORD * 16)); +} + +TEST_CASE("BufferBase: Unaligned page region query") { + RasterizerInterface rasterizer; + BufferBase buffer(rasterizer, c, WORD); + buffer.UnmarkRegionAsCpuModified(c, WORD); + buffer.MarkRegionAsCpuModified(c + 4000, 1000); + REQUIRE(buffer.IsRegionCpuModified(c, PAGE)); + REQUIRE(buffer.IsRegionCpuModified(c + PAGE, PAGE)); + REQUIRE(buffer.IsRegionCpuModified(c + 4000, 1000)); + REQUIRE(buffer.IsRegionCpuModified(c + 4000, 1)); +}