From 36c302fa32b475abb1b211934eab14fe0cad936a Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Thu, 4 May 2023 13:23:36 +0200 Subject: [PATCH] Buffer cache: always use async buffer downloads and fix regression. --- src/tests/video_core/memory_tracker.cpp | 4 +- src/video_core/buffer_cache/buffer_cache.h | 114 +++++++++--------- .../buffer_cache/buffer_cache_base.h | 2 - src/video_core/buffer_cache/word_manager.h | 13 ++ 4 files changed, 70 insertions(+), 63 deletions(-) diff --git a/src/tests/video_core/memory_tracker.cpp b/src/tests/video_core/memory_tracker.cpp index 3981907a2..618793668 100644 --- a/src/tests/video_core/memory_tracker.cpp +++ b/src/tests/video_core/memory_tracker.cpp @@ -535,12 +535,12 @@ TEST_CASE("MemoryTracker: Cached write downloads") { memory_track->MarkRegionAsGpuModified(c + PAGE, PAGE); int num = 0; memory_track->ForEachDownloadRangeAndClear(c, WORD, [&](u64 offset, u64 size) { ++num; }); - REQUIRE(num == 1); + REQUIRE(num == 0); num = 0; memory_track->ForEachUploadRange(c, WORD, [&](u64 offset, u64 size) { ++num; }); REQUIRE(num == 0); REQUIRE(!memory_track->IsRegionCpuModified(c + PAGE, PAGE)); - REQUIRE(!memory_track->IsRegionGpuModified(c + PAGE, PAGE)); + REQUIRE(memory_track->IsRegionGpuModified(c + PAGE, PAGE)); memory_track->FlushCachedWrites(); REQUIRE(memory_track->IsRegionCpuModified(c + PAGE, PAGE)); REQUIRE(!memory_track->IsRegionGpuModified(c + PAGE, PAGE)); diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 474822354..0b15944d6 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -23,8 +23,6 @@ BufferCache

::BufferCache(VideoCore::RasterizerInterface& rasterizer_, common_ranges.clear(); inline_buffer_id = NULL_BUFFER_ID; - active_async_buffers = !Settings::IsGPULevelHigh(); - if (!runtime.CanReportMemoryUsage()) { minimum_memory = DEFAULT_EXPECTED_MEMORY; critical_memory = DEFAULT_CRITICAL_MEMORY; @@ -75,8 +73,6 @@ void BufferCache

::TickFrame() { uniform_cache_hits[0] = 0; uniform_cache_shots[0] = 0; - active_async_buffers = !Settings::IsGPULevelHigh(); - const bool skip_preferred = hits * 256 < shots * 251; uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0; @@ -491,9 +487,8 @@ void BufferCache

::CommitAsyncFlushesHigh() { if (committed_ranges.empty()) { if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { - if (active_async_buffers) { - async_buffers.emplace_back(std::optional{}); - } + + async_buffers.emplace_back(std::optional{}); } return; } @@ -554,64 +549,65 @@ void BufferCache

::CommitAsyncFlushesHigh() { committed_ranges.clear(); if (downloads.empty()) { if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { - if (active_async_buffers) { - async_buffers.emplace_back(std::optional{}); - } + + async_buffers.emplace_back(std::optional{}); } return; } - if (active_async_buffers) { - if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { - auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes, true); - boost::container::small_vector normalized_copies; - IntervalSet new_async_range{}; - runtime.PreCopyBarrier(); - for (auto& [copy, buffer_id] : downloads) { - copy.dst_offset += download_staging.offset; - const std::array copies{copy}; - BufferCopy second_copy{copy}; - Buffer& buffer = slot_buffers[buffer_id]; - second_copy.src_offset = static_cast(buffer.CpuAddr()) + copy.src_offset; - VAddr orig_cpu_addr = static_cast(second_copy.src_offset); - const IntervalType base_interval{orig_cpu_addr, orig_cpu_addr + copy.size}; - async_downloads += std::make_pair(base_interval, 1); - runtime.CopyBuffer(download_staging.buffer, buffer, copies, false); - normalized_copies.push_back(second_copy); - } - runtime.PostCopyBarrier(); - pending_downloads.emplace_back(std::move(normalized_copies)); - async_buffers.emplace_back(download_staging); - } else { + if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { + auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes, true); + boost::container::small_vector normalized_copies; + IntervalSet new_async_range{}; + runtime.PreCopyBarrier(); + for (auto& [copy, buffer_id] : downloads) { + copy.dst_offset += download_staging.offset; + const std::array copies{copy}; + BufferCopy second_copy{copy}; + Buffer& buffer = slot_buffers[buffer_id]; + second_copy.src_offset = static_cast(buffer.CpuAddr()) + copy.src_offset; + VAddr orig_cpu_addr = static_cast(second_copy.src_offset); + const IntervalType base_interval{orig_cpu_addr, orig_cpu_addr + copy.size}; + async_downloads += std::make_pair(base_interval, 1); + runtime.CopyBuffer(download_staging.buffer, buffer, copies, false); + normalized_copies.push_back(second_copy); + } + runtime.PostCopyBarrier(); + pending_downloads.emplace_back(std::move(normalized_copies)); + async_buffers.emplace_back(download_staging); + } else { + if (!Settings::IsGPULevelHigh()) { committed_ranges.clear(); uncommitted_ranges.clear(); - } - } else { - if constexpr (USE_MEMORY_MAPS) { - auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes); - runtime.PreCopyBarrier(); - for (auto& [copy, buffer_id] : downloads) { - // Have in mind the staging buffer offset for the copy - copy.dst_offset += download_staging.offset; - const std::array copies{copy}; - runtime.CopyBuffer(download_staging.buffer, slot_buffers[buffer_id], copies, false); - } - runtime.PostCopyBarrier(); - runtime.Finish(); - for (const auto& [copy, buffer_id] : downloads) { - const Buffer& buffer = slot_buffers[buffer_id]; - const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset; - // Undo the modified offset - const u64 dst_offset = copy.dst_offset - download_staging.offset; - const u8* read_mapped_memory = download_staging.mapped_span.data() + dst_offset; - cpu_memory.WriteBlockUnsafe(cpu_addr, read_mapped_memory, copy.size); - } } else { - const std::span immediate_buffer = ImmediateBuffer(largest_copy); - for (const auto& [copy, buffer_id] : downloads) { - Buffer& buffer = slot_buffers[buffer_id]; - buffer.ImmediateDownload(copy.src_offset, immediate_buffer.subspan(0, copy.size)); - const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset; - cpu_memory.WriteBlockUnsafe(cpu_addr, immediate_buffer.data(), copy.size); + if constexpr (USE_MEMORY_MAPS) { + auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes); + runtime.PreCopyBarrier(); + for (auto& [copy, buffer_id] : downloads) { + // Have in mind the staging buffer offset for the copy + copy.dst_offset += download_staging.offset; + const std::array copies{copy}; + runtime.CopyBuffer(download_staging.buffer, slot_buffers[buffer_id], copies, + false); + } + runtime.PostCopyBarrier(); + runtime.Finish(); + for (const auto& [copy, buffer_id] : downloads) { + const Buffer& buffer = slot_buffers[buffer_id]; + const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset; + // Undo the modified offset + const u64 dst_offset = copy.dst_offset - download_staging.offset; + const u8* read_mapped_memory = download_staging.mapped_span.data() + dst_offset; + cpu_memory.WriteBlockUnsafe(cpu_addr, read_mapped_memory, copy.size); + } + } else { + const std::span immediate_buffer = ImmediateBuffer(largest_copy); + for (const auto& [copy, buffer_id] : downloads) { + Buffer& buffer = slot_buffers[buffer_id]; + buffer.ImmediateDownload(copy.src_offset, + immediate_buffer.subspan(0, copy.size)); + const VAddr cpu_addr = buffer.CpuAddr() + copy.src_offset; + cpu_memory.WriteBlockUnsafe(cpu_addr, immediate_buffer.data(), copy.size); + } } } } diff --git a/src/video_core/buffer_cache/buffer_cache_base.h b/src/video_core/buffer_cache/buffer_cache_base.h index e3914a53a..0445ec47f 100644 --- a/src/video_core/buffer_cache/buffer_cache_base.h +++ b/src/video_core/buffer_cache/buffer_cache_base.h @@ -572,8 +572,6 @@ private: u64 critical_memory = 0; BufferId inline_buffer_id; - bool active_async_buffers = false; - std::array> CACHING_PAGEBITS)> page_table; std::vector tmp_buffer; }; diff --git a/src/video_core/buffer_cache/word_manager.h b/src/video_core/buffer_cache/word_manager.h index 0fb199a54..a336bde41 100644 --- a/src/video_core/buffer_cache/word_manager.h +++ b/src/video_core/buffer_cache/word_manager.h @@ -302,6 +302,9 @@ public: (pending_pointer - pending_offset) * BYTES_PER_PAGE); }; IterateWords(offset, size, [&](size_t index, u64 mask) { + if constexpr (type == Type::GPU) { + mask &= ~untracked_words[index]; + } const u64 word = state_words[index] & mask; if constexpr (clear) { if constexpr (type == Type::CPU || type == Type::CachedCPU) { @@ -350,8 +353,13 @@ public: static_assert(type != Type::Untracked); const std::span state_words = words.template Span(); + [[maybe_unused]] const std::span untracked_words = + words.template Span(); bool result = false; IterateWords(offset, size, [&](size_t index, u64 mask) { + if constexpr (type == Type::GPU) { + mask &= ~untracked_words[index]; + } const u64 word = state_words[index] & mask; if (word != 0) { result = true; @@ -372,9 +380,14 @@ public: [[nodiscard]] std::pair ModifiedRegion(u64 offset, u64 size) const noexcept { static_assert(type != Type::Untracked); const std::span state_words = words.template Span(); + [[maybe_unused]] const std::span untracked_words = + words.template Span(); u64 begin = std::numeric_limits::max(); u64 end = 0; IterateWords(offset, size, [&](size_t index, u64 mask) { + if constexpr (type == Type::GPU) { + mask &= ~untracked_words[index]; + } const u64 word = state_words[index] & mask; if (word == 0) { return;