From 5b1efe522eac11a4f1b687981e0913e66818ca74 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 19 Jan 2021 02:39:29 -0300 Subject: [PATCH 01/10] vulkan_memory_allocator: Release allocations with no commits --- .../vulkan_common/vulkan_memory_allocator.cpp | 22 ++++++++++++++----- .../vulkan_common/vulkan_memory_allocator.h | 5 +++++ 2 files changed, 22 insertions(+), 5 deletions(-) diff --git a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp index 5edd06ebc..aa173d19e 100644 --- a/src/video_core/vulkan_common/vulkan_memory_allocator.cpp +++ b/src/video_core/vulkan_common/vulkan_memory_allocator.cpp @@ -69,10 +69,10 @@ constexpr VkExportMemoryAllocateInfo EXPORT_ALLOCATE_INFO{ class MemoryAllocation { public: - explicit MemoryAllocation(vk::DeviceMemory memory_, VkMemoryPropertyFlags properties, - u64 allocation_size_, u32 type) - : memory{std::move(memory_)}, allocation_size{allocation_size_}, property_flags{properties}, - shifted_memory_type{1U << type} {} + explicit MemoryAllocation(MemoryAllocator* const allocator_, vk::DeviceMemory memory_, + VkMemoryPropertyFlags properties, u64 allocation_size_, u32 type) + : allocator{allocator_}, memory{std::move(memory_)}, allocation_size{allocation_size_}, + property_flags{properties}, shifted_memory_type{1U << type} {} #if defined(_WIN32) || defined(__unix__) ~MemoryAllocation() { @@ -106,6 +106,10 @@ public: const auto it = std::ranges::find(commits, begin, &Range::begin); ASSERT_MSG(it != commits.end(), "Invalid commit"); commits.erase(it); + if (commits.empty()) { + // Do not call any code involving 'this' after this call, the object will be destroyed + allocator->ReleaseMemory(this); + } } [[nodiscard]] std::span Map() { @@ -171,6 +175,7 @@ private: return candidate; } + MemoryAllocator* const allocator; ///< Parent memory allocation. const vk::DeviceMemory memory; ///< Vulkan memory allocation handler. const u64 allocation_size; ///< Size of this allocation. const VkMemoryPropertyFlags property_flags; ///< Vulkan memory property flags. @@ -275,10 +280,17 @@ bool MemoryAllocator::TryAllocMemory(VkMemoryPropertyFlags flags, u32 type_mask, return false; } } - allocations.push_back(std::make_unique(std::move(memory), flags, size, type)); + allocations.push_back( + std::make_unique(this, std::move(memory), flags, size, type)); return true; } +void MemoryAllocator::ReleaseMemory(MemoryAllocation* alloc) { + const auto it = std::ranges::find(allocations, alloc, &std::unique_ptr::get); + ASSERT(it != allocations.end()); + allocations.erase(it); +} + std::optional MemoryAllocator::TryCommit(const VkMemoryRequirements& requirements, VkMemoryPropertyFlags flags) { for (auto& allocation : allocations) { diff --git a/src/video_core/vulkan_common/vulkan_memory_allocator.h b/src/video_core/vulkan_common/vulkan_memory_allocator.h index db12d02f4..b61e931e0 100644 --- a/src/video_core/vulkan_common/vulkan_memory_allocator.h +++ b/src/video_core/vulkan_common/vulkan_memory_allocator.h @@ -69,6 +69,8 @@ private: /// Memory allocator container. /// Allocates and releases memory allocations on demand. class MemoryAllocator { + friend MemoryAllocation; + public: /** * Construct memory allocator @@ -104,6 +106,9 @@ private: /// Tries to allocate a chunk of memory. bool TryAllocMemory(VkMemoryPropertyFlags flags, u32 type_mask, u64 size); + /// Releases a chunk of memory. + void ReleaseMemory(MemoryAllocation* alloc); + /// Tries to allocate a memory commit. std::optional TryCommit(const VkMemoryRequirements& requirements, VkMemoryPropertyFlags flags); From a11bc4a382ebca52bdf0aab1a9474351e8d85cef Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 19 Jan 2021 21:59:53 -0300 Subject: [PATCH 02/10] Initial Reaper Setup WIP --- src/video_core/buffer_cache/buffer_base.h | 11 ++ src/video_core/buffer_cache/buffer_cache.h | 138 +++++++++++++------ src/video_core/texture_cache/image_base.cpp | 17 +++ src/video_core/texture_cache/image_base.h | 2 + src/video_core/texture_cache/slot_vector.h | 70 +++++++++- src/video_core/texture_cache/texture_cache.h | 44 ++++-- 6 files changed, 226 insertions(+), 56 deletions(-) diff --git a/src/video_core/buffer_cache/buffer_base.h b/src/video_core/buffer_cache/buffer_base.h index a39505903..b121d36a3 100644 --- a/src/video_core/buffer_cache/buffer_base.h +++ b/src/video_core/buffer_cache/buffer_base.h @@ -256,6 +256,16 @@ public: stream_score += score; } + /// Sets the new frame tick + void SetFrameTick(u64 new_frame_tick) noexcept { + frame_tick = new_frame_tick; + } + + /// Returns the new frame tick + [[nodiscard]] u64 FrameTick() const noexcept { + return frame_tick; + } + /// Returns the likeliness of this being a stream buffer [[nodiscard]] int StreamScore() const noexcept { return stream_score; @@ -586,6 +596,7 @@ private: RasterizerInterface* rasterizer = nullptr; VAddr cpu_addr = 0; Words words; + u64 frame_tick = 0; BufferFlagBits flags{}; int stream_score = 0; }; diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index d371b842f..ecb7d3dee 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -243,6 +243,8 @@ private: template void ChangeRegister(BufferId buffer_id); + void TouchBuffer(Buffer& buffer) const noexcept; + bool SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size); bool SynchronizeBufferImpl(Buffer& buffer, VAddr cpu_addr, u32 size); @@ -255,6 +257,10 @@ private: void MappedUploadMemory(Buffer& buffer, u64 total_size_bytes, std::span copies); + void DownloadBufferMemory(Buffer& buffer_id); + + void DownloadBufferMemory(Buffer& buffer_id, VAddr cpu_addr, u64 size); + void DeleteBuffer(BufferId buffer_id); void ReplaceBufferDownloads(BufferId old_buffer_id, BufferId new_buffer_id); @@ -319,6 +325,9 @@ private: size_t immediate_buffer_capacity = 0; std::unique_ptr immediate_buffer_alloc; + typename SlotVector::Iterator deletion_iterator; + u64 frame_tick = 0; + std::array> PAGE_BITS)> page_table; }; @@ -332,6 +341,7 @@ BufferCache

::BufferCache(VideoCore::RasterizerInterface& rasterizer_, gpu_memory{gpu_memory_}, cpu_memory{cpu_memory_}, runtime{runtime_} { // Ensure the first slot is used for the null buffer void(slot_buffers.insert(runtime, NullBufferParams{})); + deletion_iterator = slot_buffers.end(); } template @@ -349,7 +359,24 @@ void BufferCache

::TickFrame() { const bool skip_preferred = hits * 256 < shots * 251; uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0; + static constexpr u64 ticks_to_destroy = 120; + int num_iterations = 32; + for (; num_iterations > 0; --num_iterations) { + if (deletion_iterator == slot_buffers.end()) { + deletion_iterator = slot_buffers.begin(); + } + ++deletion_iterator; + if (deletion_iterator == slot_buffers.end()) { + break; + } + const auto [buffer_id, buffer] = *deletion_iterator; + if (buffer->FrameTick() + ticks_to_destroy < frame_tick) { + DownloadBufferMemory(*buffer); + DeleteBuffer(buffer_id); + } + } delayed_destruction_ring.Tick(); + ++frame_tick; } template @@ -371,50 +398,8 @@ void BufferCache

::CachedWriteMemory(VAddr cpu_addr, u64 size) { template void BufferCache

::DownloadMemory(VAddr cpu_addr, u64 size) { - ForEachBufferInRange(cpu_addr, size, [&](BufferId, Buffer& buffer) { - boost::container::small_vector copies; - u64 total_size_bytes = 0; - u64 largest_copy = 0; - buffer.ForEachDownloadRange(cpu_addr, size, [&](u64 range_offset, u64 range_size) { - copies.push_back(BufferCopy{ - .src_offset = range_offset, - .dst_offset = total_size_bytes, - .size = range_size, - }); - total_size_bytes += range_size; - largest_copy = std::max(largest_copy, range_size); - }); - if (total_size_bytes == 0) { - return; - } - MICROPROFILE_SCOPE(GPU_DownloadMemory); - - if constexpr (USE_MEMORY_MAPS) { - auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes); - const u8* const mapped_memory = download_staging.mapped_span.data(); - const std::span copies_span(copies.data(), copies.data() + copies.size()); - for (BufferCopy& copy : copies) { - // Modify copies to have the staging offset in mind - copy.dst_offset += download_staging.offset; - } - runtime.CopyBuffer(download_staging.buffer, buffer, copies_span); - runtime.Finish(); - for (const BufferCopy& copy : copies) { - const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset; - // Undo the modified offset - const u64 dst_offset = copy.dst_offset - download_staging.offset; - const u8* copy_mapped_memory = mapped_memory + dst_offset; - cpu_memory.WriteBlockUnsafe(copy_cpu_addr, copy_mapped_memory, copy.size); - } - } else { - const std::span immediate_buffer = ImmediateBuffer(largest_copy); - for (const BufferCopy& copy : copies) { - buffer.ImmediateDownload(copy.src_offset, immediate_buffer.subspan(0, copy.size)); - const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset; - cpu_memory.WriteBlockUnsafe(copy_cpu_addr, immediate_buffer.data(), copy.size); - } - } - }); + ForEachBufferInRange(cpu_addr, size, + [&](BufferId, Buffer& buffer) { DownloadBufferMemory(buffer); }); } template @@ -640,6 +625,7 @@ bool BufferCache

::IsRegionGpuModified(VAddr addr, size_t size) { template void BufferCache

::BindHostIndexBuffer() { Buffer& buffer = slot_buffers[index_buffer.buffer_id]; + TouchBuffer(buffer); const u32 offset = buffer.Offset(index_buffer.cpu_addr); const u32 size = index_buffer.size; SynchronizeBuffer(buffer, index_buffer.cpu_addr, size); @@ -658,6 +644,7 @@ void BufferCache

::BindHostVertexBuffers() { for (u32 index = 0; index < NUM_VERTEX_BUFFERS; ++index) { const Binding& binding = vertex_buffers[index]; Buffer& buffer = slot_buffers[binding.buffer_id]; + TouchBuffer(buffer); SynchronizeBuffer(buffer, binding.cpu_addr, binding.size); if (!flags[Dirty::VertexBuffer0 + index]) { continue; @@ -693,6 +680,7 @@ void BufferCache

::BindHostGraphicsUniformBuffer(size_t stage, u32 index, u32 const VAddr cpu_addr = binding.cpu_addr; const u32 size = binding.size; Buffer& buffer = slot_buffers[binding.buffer_id]; + TouchBuffer(buffer); const bool use_fast_buffer = binding.buffer_id != NULL_BUFFER_ID && size <= uniform_buffer_skip_cache_size && !buffer.IsRegionGpuModified(cpu_addr, size); @@ -744,6 +732,7 @@ void BufferCache

::BindHostGraphicsStorageBuffers(size_t stage) { ForEachEnabledBit(enabled_storage_buffers[stage], [&](u32 index) { const Binding& binding = storage_buffers[stage][index]; Buffer& buffer = slot_buffers[binding.buffer_id]; + TouchBuffer(buffer); const u32 size = binding.size; SynchronizeBuffer(buffer, binding.cpu_addr, size); @@ -766,6 +755,7 @@ void BufferCache

::BindHostTransformFeedbackBuffers() { for (u32 index = 0; index < NUM_TRANSFORM_FEEDBACK_BUFFERS; ++index) { const Binding& binding = transform_feedback_buffers[index]; Buffer& buffer = slot_buffers[binding.buffer_id]; + TouchBuffer(buffer); const u32 size = binding.size; SynchronizeBuffer(buffer, binding.cpu_addr, size); @@ -784,6 +774,7 @@ void BufferCache

::BindHostComputeUniformBuffers() { ForEachEnabledBit(enabled_compute_uniform_buffers, [&](u32 index) { const Binding& binding = compute_uniform_buffers[index]; Buffer& buffer = slot_buffers[binding.buffer_id]; + TouchBuffer(buffer); const u32 size = binding.size; SynchronizeBuffer(buffer, binding.cpu_addr, size); @@ -803,6 +794,7 @@ void BufferCache

::BindHostComputeStorageBuffers() { ForEachEnabledBit(enabled_compute_storage_buffers, [&](u32 index) { const Binding& binding = compute_storage_buffers[index]; Buffer& buffer = slot_buffers[binding.buffer_id]; + TouchBuffer(buffer); const u32 size = binding.size; SynchronizeBuffer(buffer, binding.cpu_addr, size); @@ -1101,6 +1093,7 @@ BufferId BufferCache

::CreateBuffer(VAddr cpu_addr, u32 wanted_size) { const OverlapResult overlap = ResolveOverlaps(cpu_addr, wanted_size); const u32 size = static_cast(overlap.end - overlap.begin); const BufferId new_buffer_id = slot_buffers.insert(runtime, rasterizer, overlap.begin, size); + TouchBuffer(slot_buffers[new_buffer_id]); for (const BufferId overlap_id : overlap.ids) { JoinOverlap(new_buffer_id, overlap_id, !overlap.has_stream_leap); } @@ -1135,6 +1128,11 @@ void BufferCache

::ChangeRegister(BufferId buffer_id) { } } +template +void BufferCache

::TouchBuffer(Buffer& buffer) const noexcept { + buffer.SetFrameTick(frame_tick); +} + template bool BufferCache

::SynchronizeBuffer(Buffer& buffer, VAddr cpu_addr, u32 size) { if (buffer.CpuAddr() == 0) { @@ -1211,6 +1209,57 @@ void BufferCache

::MappedUploadMemory(Buffer& buffer, u64 total_size_bytes, runtime.CopyBuffer(buffer, upload_staging.buffer, copies); } +template +void BufferCache

::DownloadBufferMemory(Buffer& buffer) { + DownloadBufferMemory(buffer, buffer.CpuAddr(), buffer.SizeBytes()); +} + +template +void BufferCache

::DownloadBufferMemory(Buffer& buffer, VAddr cpu_addr, u64 size) { + boost::container::small_vector copies; + u64 total_size_bytes = 0; + u64 largest_copy = 0; + buffer.ForEachDownloadRange(cpu_addr, size, [&](u64 range_offset, u64 range_size) { + copies.push_back(BufferCopy{ + .src_offset = range_offset, + .dst_offset = total_size_bytes, + .size = range_size, + }); + total_size_bytes += range_size; + largest_copy = std::max(largest_copy, range_size); + }); + if (total_size_bytes == 0) { + return; + } + MICROPROFILE_SCOPE(GPU_DownloadMemory); + + if constexpr (USE_MEMORY_MAPS) { + auto download_staging = runtime.DownloadStagingBuffer(total_size_bytes); + const u8* const mapped_memory = download_staging.mapped_span.data(); + const std::span copies_span(copies.data(), copies.data() + copies.size()); + for (BufferCopy& copy : copies) { + // Modify copies to have the staging offset in mind + copy.dst_offset += download_staging.offset; + } + runtime.CopyBuffer(download_staging.buffer, buffer, copies_span); + runtime.Finish(); + for (const BufferCopy& copy : copies) { + const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset; + // Undo the modified offset + const u64 dst_offset = copy.dst_offset - download_staging.offset; + const u8* copy_mapped_memory = mapped_memory + dst_offset; + cpu_memory.WriteBlockUnsafe(copy_cpu_addr, copy_mapped_memory, copy.size); + } + } else { + const std::span immediate_buffer = ImmediateBuffer(largest_copy); + for (const BufferCopy& copy : copies) { + buffer.ImmediateDownload(copy.src_offset, immediate_buffer.subspan(0, copy.size)); + const VAddr copy_cpu_addr = buffer.CpuAddr() + copy.src_offset; + cpu_memory.WriteBlockUnsafe(copy_cpu_addr, immediate_buffer.data(), copy.size); + } + } +} + template void BufferCache

::DeleteBuffer(BufferId buffer_id) { const auto scalar_replace = [buffer_id](Binding& binding) { @@ -1236,6 +1285,7 @@ void BufferCache

::DeleteBuffer(BufferId buffer_id) { Unregister(buffer_id); delayed_destruction_ring.Push(std::move(slot_buffers[buffer_id])); + slot_buffers.erase(buffer_id); NotifyBufferDeletion(); } diff --git a/src/video_core/texture_cache/image_base.cpp b/src/video_core/texture_cache/image_base.cpp index 9914926b3..bd0e7e64e 100644 --- a/src/video_core/texture_cache/image_base.cpp +++ b/src/video_core/texture_cache/image_base.cpp @@ -113,6 +113,23 @@ void ImageBase::InsertView(const ImageViewInfo& view_info, ImageViewId image_vie image_view_ids.push_back(image_view_id); } +bool ImageBase::IsSafeDownload() const noexcept { + // Skip images that were not modified from the GPU + if (False(flags & ImageFlagBits::GpuModified)) { + return false; + } + // Skip images that .are. modified from the CPU + // We don't want to write sensitive data from the guest + if (True(flags & ImageFlagBits::CpuModified)) { + return false; + } + if (info.num_samples > 1) { + LOG_WARNING(HW_GPU, "MSAA image downloads are not implemented"); + return false; + } + return true; +} + void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_id) { static constexpr auto OPTIONS = RelaxedOptions::Size | RelaxedOptions::Format; ASSERT(lhs.info.type == rhs.info.type); diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h index b7f3b7e43..0f69d8a32 100644 --- a/src/video_core/texture_cache/image_base.h +++ b/src/video_core/texture_cache/image_base.h @@ -44,6 +44,8 @@ struct ImageBase { void InsertView(const ImageViewInfo& view_info, ImageViewId image_view_id); + [[nodiscard]] bool IsSafeDownload() const noexcept; + [[nodiscard]] bool Overlaps(VAddr overlap_cpu_addr, size_t overlap_size) const noexcept { const VAddr overlap_end = overlap_cpu_addr + overlap_size; return cpu_addr < overlap_end && overlap_cpu_addr < cpu_addr_end; diff --git a/src/video_core/texture_cache/slot_vector.h b/src/video_core/texture_cache/slot_vector.h index eae3be6ea..1259e8263 100644 --- a/src/video_core/texture_cache/slot_vector.h +++ b/src/video_core/texture_cache/slot_vector.h @@ -5,6 +5,7 @@ #pragma once #include +#include #include #include #include @@ -32,6 +33,60 @@ template requires std::is_nothrow_move_assignable_v&& std::is_nothrow_move_constructible_v class SlotVector { public: + class Iterator { + friend SlotVector; + + public: + constexpr Iterator() = default; + + Iterator& operator++() noexcept { + const u64* const bitset = slot_vector->stored_bitset.data(); + const u32 size = static_cast(slot_vector->stored_bitset.size()) * 64; + if (id.index < size) { + do { + ++id.index; + } while (id.index < size && !IsValid(bitset)); + if (id.index == size) { + id.index = SlotId::INVALID_INDEX; + } + } + return *this; + } + + Iterator operator++(int) noexcept { + const Iterator copy{*this}; + ++*this; + return copy; + } + + bool operator==(const Iterator& other) const noexcept { + return id.index == other.id.index; + } + + bool operator!=(const Iterator& other) const noexcept { + return id.index != other.id.index; + } + + std::pair operator*() const noexcept { + return {id, std::addressof((*slot_vector)[id])}; + } + + T* operator->() const noexcept { + return std::addressof((*slot_vector)[id]); + } + + private: + Iterator(SlotVector* slot_vector_, SlotId id_) noexcept + : slot_vector{slot_vector_}, id{id_} {} + + bool IsValid(const u64* bitset) noexcept { + return ((bitset[id.index / 64] >> (id.index % 64)) & 1) != 0; + } + + SlotVector* slot_vector; + SlotId id; + }; + ~SlotVector() noexcept { size_t index = 0; for (u64 bits : stored_bitset) { @@ -70,6 +125,20 @@ public: ResetStorageBit(id.index); } + [[nodiscard]] Iterator begin() noexcept { + const auto it = std::ranges::find_if(stored_bitset, [](u64 value) { return value != 0; }); + if (it == stored_bitset.end()) { + return end(); + } + const u32 word_index = static_cast(std::distance(it, stored_bitset.begin())); + const SlotId first_id{word_index * 64 + static_cast(std::countr_zero(*it))}; + return Iterator(this, first_id); + } + + [[nodiscard]] Iterator end() noexcept { + return Iterator(this, SlotId{SlotId::INVALID_INDEX}); + } + private: struct NonTrivialDummy { NonTrivialDummy() noexcept {} @@ -140,7 +209,6 @@ private: Entry* values = nullptr; size_t values_capacity = 0; - size_t values_size = 0; std::vector stored_bitset; std::vector free_list; diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 59b7c678b..45ef155b5 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -353,6 +353,7 @@ private: u64 modification_tick = 0; u64 frame_tick = 0; + typename SlotVector::Iterator deletion_iterator; }; template @@ -373,10 +374,41 @@ TextureCache

::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& // This way the null resource becomes a compile time constant void(slot_image_views.insert(runtime, NullImageParams{})); void(slot_samplers.insert(runtime, sampler_descriptor)); + + deletion_iterator = slot_images.begin(); } template void TextureCache

::TickFrame() { + static constexpr u64 ticks_to_destroy = 120; + int num_iterations = 32; + for (; num_iterations > 0; --num_iterations) { + if (deletion_iterator == slot_images.end()) { + deletion_iterator = slot_images.begin(); + if (deletion_iterator == slot_images.end()) { + break; + } + } + const auto [image_id, image] = *deletion_iterator; + if (image->frame_tick + ticks_to_destroy < frame_tick) { + if (image->IsSafeDownload() && + std::ranges::none_of(image->aliased_images, [&](const AliasedImage& alias) { + return slot_images[alias.id].modification_tick > image->modification_tick; + })) { + auto map = runtime.DownloadStagingBuffer(image->unswizzled_size_bytes); + const auto copies = FullDownloadCopies(image->info); + image->DownloadMemory(map, copies); + runtime.Finish(); + SwizzleImage(gpu_memory, image->gpu_addr, image->info, copies, map.mapped_span); + } + if (True(image->flags & ImageFlagBits::Tracked)) { + UntrackImage(*image); + } + UnregisterImage(image_id); + DeleteImage(image_id); + } + ++deletion_iterator; + } // Tick sentenced resources in this order to ensure they are destroyed in the right order sentenced_images.Tick(); sentenced_framebuffers.Tick(); @@ -568,17 +600,7 @@ template void TextureCache

::DownloadMemory(VAddr cpu_addr, size_t size) { std::vector images; ForEachImageInRegion(cpu_addr, size, [this, &images](ImageId image_id, ImageBase& image) { - // Skip images that were not modified from the GPU - if (False(image.flags & ImageFlagBits::GpuModified)) { - return; - } - // Skip images that .are. modified from the CPU - // We don't want to write sensitive data from the guest - if (True(image.flags & ImageFlagBits::CpuModified)) { - return; - } - if (image.info.num_samples > 1) { - LOG_WARNING(HW_GPU, "MSAA image downloads are not implemented"); + if (!image.IsSafeDownload()) { return; } image.flags &= ~ImageFlagBits::GpuModified; From d8ad6aa18754eeebbcc1a59a683c7c3ff216ebe7 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sun, 13 Jun 2021 15:47:54 +0200 Subject: [PATCH 03/10] Reaper: Tune it up to be an smart GC. --- src/video_core/buffer_cache/buffer_cache.h | 27 +++++-- src/video_core/texture_cache/image_base.cpp | 20 +++++ src/video_core/texture_cache/image_base.h | 10 +++ src/video_core/texture_cache/texture_cache.h | 84 ++++++++++++++++++-- src/video_core/texture_cache/util.cpp | 2 + 5 files changed, 130 insertions(+), 13 deletions(-) diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index ecb7d3dee..b4fa85c5b 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -65,6 +65,9 @@ class BufferCache { static constexpr BufferId NULL_BUFFER_ID{0}; + static constexpr u64 expected_memory = 512ULL * 1024ULL * 1024ULL; + static constexpr u64 critical_memory = 1024ULL * 1024ULL * 1024ULL; + using Maxwell = Tegra::Engines::Maxwell3D::Regs; using Runtime = typename P::Runtime; @@ -327,6 +330,7 @@ private: typename SlotVector::Iterator deletion_iterator; u64 frame_tick = 0; + u64 total_used_memory = 0; std::array> PAGE_BITS)> page_table; }; @@ -346,6 +350,10 @@ BufferCache

::BufferCache(VideoCore::RasterizerInterface& rasterizer_, template void BufferCache

::TickFrame() { + SCOPE_EXIT({ + ++frame_tick; + delayed_destruction_ring.Tick(); + }); // Calculate hits and shots and move hit bits to the right const u32 hits = std::reduce(uniform_cache_hits.begin(), uniform_cache_hits.end()); const u32 shots = std::reduce(uniform_cache_shots.begin(), uniform_cache_shots.end()); @@ -359,8 +367,13 @@ void BufferCache

::TickFrame() { const bool skip_preferred = hits * 256 < shots * 251; uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0; - static constexpr u64 ticks_to_destroy = 120; - int num_iterations = 32; + const bool activate_gc = total_used_memory >= expected_memory; + if (!activate_gc) { + return; + } + const bool agressive_gc = total_used_memory >= critical_memory; + const u64 ticks_to_destroy = agressive_gc ? 60 : 120; + int num_iterations = agressive_gc ? 64 : 32; for (; num_iterations > 0; --num_iterations) { if (deletion_iterator == slot_buffers.end()) { deletion_iterator = slot_buffers.begin(); @@ -375,8 +388,6 @@ void BufferCache

::TickFrame() { DeleteBuffer(buffer_id); } } - delayed_destruction_ring.Tick(); - ++frame_tick; } template @@ -1115,8 +1126,14 @@ template template void BufferCache

::ChangeRegister(BufferId buffer_id) { const Buffer& buffer = slot_buffers[buffer_id]; + const auto size = buffer.SizeBytes(); + if (insert) { + total_used_memory += Common::AlignUp(size, 1024); + } else { + total_used_memory -= Common::AlignUp(size, 1024); + } const VAddr cpu_addr_begin = buffer.CpuAddr(); - const VAddr cpu_addr_end = cpu_addr_begin + buffer.SizeBytes(); + const VAddr cpu_addr_end = cpu_addr_begin + size; const u64 page_begin = cpu_addr_begin / PAGE_SIZE; const u64 page_end = Common::DivCeil(cpu_addr_end, PAGE_SIZE); for (u64 page = page_begin; page != page_end; ++page) { diff --git a/src/video_core/texture_cache/image_base.cpp b/src/video_core/texture_cache/image_base.cpp index bd0e7e64e..ad69d32d1 100644 --- a/src/video_core/texture_cache/image_base.cpp +++ b/src/video_core/texture_cache/image_base.cpp @@ -130,6 +130,26 @@ bool ImageBase::IsSafeDownload() const noexcept { return true; } +void ImageBase::CheckBadOverlapState() { + if (False(flags & ImageFlagBits::BadOverlap)) { + return; + } + if (!overlapping_images.empty()) { + return; + } + flags &= ~ImageFlagBits::BadOverlap; +} + +void ImageBase::CheckAliasState() { + if (False(flags & ImageFlagBits::Alias)) { + return; + } + if (!aliased_images.empty()) { + return; + } + flags &= ~ImageFlagBits::Alias; +} + void AddImageAlias(ImageBase& lhs, ImageBase& rhs, ImageId lhs_id, ImageId rhs_id) { static constexpr auto OPTIONS = RelaxedOptions::Size | RelaxedOptions::Format; ASSERT(lhs.info.type == rhs.info.type); diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h index 0f69d8a32..40c047ea1 100644 --- a/src/video_core/texture_cache/image_base.h +++ b/src/video_core/texture_cache/image_base.h @@ -25,6 +25,12 @@ enum class ImageFlagBits : u32 { Strong = 1 << 5, ///< Exists in the image table, the dimensions are can be trusted Registered = 1 << 6, ///< True when the image is registered Picked = 1 << 7, ///< Temporary flag to mark the image as picked + + // Garbage Collection Flags + BadOverlap = 1 << 8, ///< This image overlaps other but doesn't fit, has higher + ///< garbage collection priority + Alias = 1 << 9, ///< This image has aliases and has priority on garbage + ///< collection }; DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits) @@ -51,6 +57,9 @@ struct ImageBase { return cpu_addr < overlap_end && overlap_cpu_addr < cpu_addr_end; } + void CheckBadOverlapState(); + void CheckAliasState(); + ImageInfo info; u32 guest_size_bytes = 0; @@ -74,6 +83,7 @@ struct ImageBase { std::vector slice_subresources; std::vector aliased_images; + std::vector overlapping_images; }; struct ImageAllocBase { diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 45ef155b5..cf48f7b02 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -75,6 +75,9 @@ class TextureCache { /// Sampler ID for bugged sampler ids static constexpr SamplerId NULL_SAMPLER_ID{0}; + static constexpr u64 expected_memory = 1024ULL * 1024ULL * 1024ULL; + static constexpr u64 critical_memory = 2 * 1024ULL * 1024ULL * 1024ULL; + using Runtime = typename P::Runtime; using Image = typename P::Image; using ImageAlloc = typename P::ImageAlloc; @@ -333,6 +336,7 @@ private: std::unordered_map, IdentityHash> page_table; bool has_deleted_images = false; + u64 total_used_memory = 0; SlotVector slot_images; SlotVector slot_image_views; @@ -380,8 +384,10 @@ TextureCache

::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& template void TextureCache

::TickFrame() { - static constexpr u64 ticks_to_destroy = 120; - int num_iterations = 32; + const bool high_priority_mode = total_used_memory >= expected_memory; + const bool aggressive_mode = total_used_memory >= critical_memory; + const u64 ticks_to_destroy = high_priority_mode ? 60 : 100; + int num_iterations = aggressive_mode ? 256 : (high_priority_mode ? 128 : 64); for (; num_iterations > 0; --num_iterations) { if (deletion_iterator == slot_images.end()) { deletion_iterator = slot_images.begin(); @@ -390,11 +396,42 @@ void TextureCache

::TickFrame() { } } const auto [image_id, image] = *deletion_iterator; - if (image->frame_tick + ticks_to_destroy < frame_tick) { - if (image->IsSafeDownload() && - std::ranges::none_of(image->aliased_images, [&](const AliasedImage& alias) { - return slot_images[alias.id].modification_tick > image->modification_tick; - })) { + const bool is_alias = True(image->flags & ImageFlagBits::Alias); + if (is_alias && image->aliased_images.size() <= 1) { + ++deletion_iterator; + continue; + } + const bool is_bad_overlap = True(image->flags & ImageFlagBits::BadOverlap); + const bool must_download = image->IsSafeDownload(); + const u64 ticks_needed = is_bad_overlap ? ticks_to_destroy >> 4 : ticks_to_destroy; + const bool should_care = + aggressive_mode || is_bad_overlap || is_alias || (high_priority_mode && !must_download); + if (should_care && image->frame_tick + ticks_needed < frame_tick) { + if (is_bad_overlap) { + const bool overlap_check = + std::ranges::all_of(image->overlapping_images, [&](const ImageId& overlap_id) { + auto& overlap = slot_images[overlap_id]; + return (overlap.frame_tick >= image->frame_tick) && + (overlap.modification_tick > image->modification_tick); + }); + if (!overlap_check) { + ++deletion_iterator; + continue; + } + } + if (!is_bad_overlap && must_download) { + if (is_alias) { + const bool alias_check = + std::ranges::all_of(image->aliased_images, [&](const AliasedImage& alias) { + auto& alias_image = slot_images[alias.id]; + return (alias_image.frame_tick >= image->frame_tick) && + (alias_image.modification_tick > image->modification_tick); + }); + if (!alias_check) { + ++deletion_iterator; + continue; + } + } auto map = runtime.DownloadStagingBuffer(image->unswizzled_size_bytes); const auto copies = FullDownloadCopies(image->info); image->DownloadMemory(map, copies); @@ -406,10 +443,12 @@ void TextureCache

::TickFrame() { } UnregisterImage(image_id); DeleteImage(image_id); + if (is_bad_overlap) { + num_iterations++; + } } ++deletion_iterator; } - // Tick sentenced resources in this order to ensure they are destroyed in the right order sentenced_images.Tick(); sentenced_framebuffers.Tick(); sentenced_image_view.Tick(); @@ -989,6 +1028,7 @@ ImageId TextureCache

::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA std::vector overlap_ids; std::vector left_aliased_ids; std::vector right_aliased_ids; + std::vector bad_overlap_ids; ForEachImageInRegion(cpu_addr, size_bytes, [&](ImageId overlap_id, ImageBase& overlap) { if (info.type != overlap.info.type) { return; @@ -1014,9 +1054,14 @@ ImageId TextureCache

::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA const ImageBase new_image_base(new_info, gpu_addr, cpu_addr); if (IsSubresource(new_info, overlap, gpu_addr, options, broken_views, native_bgr)) { left_aliased_ids.push_back(overlap_id); + overlap.flags |= ImageFlagBits::Alias; } else if (IsSubresource(overlap.info, new_image_base, overlap.gpu_addr, options, broken_views, native_bgr)) { right_aliased_ids.push_back(overlap_id); + overlap.flags |= ImageFlagBits::Alias; + } else { + bad_overlap_ids.push_back(overlap_id); + overlap.flags |= ImageFlagBits::BadOverlap; } }); const ImageId new_image_id = slot_images.insert(runtime, new_info, gpu_addr, cpu_addr); @@ -1044,10 +1089,18 @@ ImageId TextureCache

::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA for (const ImageId aliased_id : right_aliased_ids) { ImageBase& aliased = slot_images[aliased_id]; AddImageAlias(new_image_base, aliased, new_image_id, aliased_id); + new_image.flags |= ImageFlagBits::Alias; } for (const ImageId aliased_id : left_aliased_ids) { ImageBase& aliased = slot_images[aliased_id]; AddImageAlias(aliased, new_image_base, aliased_id, new_image_id); + new_image.flags |= ImageFlagBits::Alias; + } + for (const ImageId aliased_id : bad_overlap_ids) { + ImageBase& aliased = slot_images[aliased_id]; + aliased.overlapping_images.push_back(new_image_id); + new_image.overlapping_images.push_back(aliased_id); + new_image.flags |= ImageFlagBits::BadOverlap; } RegisterImage(new_image_id); return new_image_id; @@ -1217,6 +1270,8 @@ void TextureCache

::RegisterImage(ImageId image_id) { image.flags |= ImageFlagBits::Registered; ForEachPage(image.cpu_addr, image.guest_size_bytes, [this, image_id](u64 page) { page_table[page].push_back(image_id); }); + total_used_memory += + Common::AlignUp(std::max(image.guest_size_bytes, image.unswizzled_size_bytes), 1024); } template @@ -1225,6 +1280,9 @@ void TextureCache

::UnregisterImage(ImageId image_id) { ASSERT_MSG(True(image.flags & ImageFlagBits::Registered), "Trying to unregister an already registered image"); image.flags &= ~ImageFlagBits::Registered; + image.flags &= ~ImageFlagBits::BadOverlap; + total_used_memory -= + Common::AlignUp(std::max(image.guest_size_bytes, image.unswizzled_size_bytes), 1024); ForEachPage(image.cpu_addr, image.guest_size_bytes, [this, image_id](u64 page) { const auto page_it = page_table.find(page); if (page_it == page_table.end()) { @@ -1298,9 +1356,19 @@ void TextureCache

::DeleteImage(ImageId image_id) { std::erase_if(other_image.aliased_images, [image_id](const AliasedImage& other_alias) { return other_alias.id == image_id; }); + other_image.CheckAliasState(); ASSERT_MSG(num_removed_aliases == 1, "Invalid number of removed aliases: {}", num_removed_aliases); } + for (const ImageId overlap_id : image.overlapping_images) { + ImageBase& other_image = slot_images[overlap_id]; + [[maybe_unused]] const size_t num_removed_overlaps = std::erase_if( + other_image.overlapping_images, + [image_id](const ImageId other_overlap_id) { return other_overlap_id == image_id; }); + other_image.CheckBadOverlapState(); + ASSERT_MSG(num_removed_overlaps == 1, "Invalid number of removed overlapps: {}", + num_removed_overlaps); + } for (const ImageViewId image_view_id : image_view_ids) { sentenced_image_view.Push(std::move(slot_image_views[image_view_id])); slot_image_views.erase(image_view_id); diff --git a/src/video_core/texture_cache/util.cpp b/src/video_core/texture_cache/util.cpp index 0d3e0804f..9680167ee 100644 --- a/src/video_core/texture_cache/util.cpp +++ b/src/video_core/texture_cache/util.cpp @@ -581,6 +581,8 @@ void SwizzleBlockLinearImage(Tegra::MemoryManager& gpu_memory, GPUVAddr gpu_addr for (s32 layer = 0; layer < info.resources.layers; ++layer) { const std::span src = input.subspan(host_offset); + gpu_memory.ReadBlockUnsafe(gpu_addr + guest_offset, dst.data(), dst.size_bytes()); + SwizzleTexture(dst, src, bytes_per_block, num_tiles.width, num_tiles.height, num_tiles.depth, block.height, block.depth); From 954ad2a61ee597b67b978b36898f008885d3adb0 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Mon, 14 Jun 2021 11:58:36 +0200 Subject: [PATCH 04/10] Reaper: Setup settings and final tuning. --- src/common/settings.cpp | 2 + src/common/settings.h | 1 + src/video_core/buffer_cache/buffer_cache.h | 7 ++- src/video_core/texture_cache/image_base.h | 8 +-- src/video_core/texture_cache/texture_cache.h | 55 ++++++++++--------- src/yuzu/configuration/config.cpp | 2 + .../configure_graphics_advanced.cpp | 6 ++ .../configure_graphics_advanced.h | 1 + .../configure_graphics_advanced.ui | 10 ++++ src/yuzu_cmd/default_ini.h | 4 ++ 10 files changed, 64 insertions(+), 32 deletions(-) diff --git a/src/common/settings.cpp b/src/common/settings.cpp index 9ec71eced..ab5cbe67b 100644 --- a/src/common/settings.cpp +++ b/src/common/settings.cpp @@ -59,6 +59,7 @@ void LogSettings() { log_setting("Renderer_UseVsync", values.use_vsync.GetValue()); log_setting("Renderer_UseAssemblyShaders", values.use_assembly_shaders.GetValue()); log_setting("Renderer_UseAsynchronousShaders", values.use_asynchronous_shaders.GetValue()); + log_setting("Renderer_UseGarbageCollection", values.use_caches_gc.GetValue()); log_setting("Renderer_AnisotropicFilteringLevel", values.max_anisotropy.GetValue()); log_setting("Audio_OutputEngine", values.sink_id); log_setting("Audio_EnableAudioStretching", values.enable_audio_stretching.GetValue()); @@ -141,6 +142,7 @@ void RestoreGlobalState(bool is_powered_on) { values.use_assembly_shaders.SetGlobal(true); values.use_asynchronous_shaders.SetGlobal(true); values.use_fast_gpu_time.SetGlobal(true); + values.use_caches_gc.SetGlobal(true); values.bg_red.SetGlobal(true); values.bg_green.SetGlobal(true); values.bg_blue.SetGlobal(true); diff --git a/src/common/settings.h b/src/common/settings.h index 6198f2d9f..a1c0bf3ad 100644 --- a/src/common/settings.h +++ b/src/common/settings.h @@ -152,6 +152,7 @@ struct Values { Setting use_assembly_shaders; Setting use_asynchronous_shaders; Setting use_fast_gpu_time; + Setting use_caches_gc; Setting bg_red; Setting bg_green; diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index b4fa85c5b..a8fb21d92 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -350,9 +350,10 @@ BufferCache

::BufferCache(VideoCore::RasterizerInterface& rasterizer_, template void BufferCache

::TickFrame() { + const bool enabled_gc = Settings::values.use_caches_gc.GetValue(); SCOPE_EXIT({ - ++frame_tick; - delayed_destruction_ring.Tick(); + ++frame_tick; + delayed_destruction_ring.Tick(); }); // Calculate hits and shots and move hit bits to the right const u32 hits = std::reduce(uniform_cache_hits.begin(), uniform_cache_hits.end()); @@ -367,7 +368,7 @@ void BufferCache

::TickFrame() { const bool skip_preferred = hits * 256 < shots * 251; uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0; - const bool activate_gc = total_used_memory >= expected_memory; + const bool activate_gc = enabled_gc && total_used_memory >= expected_memory; if (!activate_gc) { return; } diff --git a/src/video_core/texture_cache/image_base.h b/src/video_core/texture_cache/image_base.h index 40c047ea1..e326cab71 100644 --- a/src/video_core/texture_cache/image_base.h +++ b/src/video_core/texture_cache/image_base.h @@ -27,10 +27,10 @@ enum class ImageFlagBits : u32 { Picked = 1 << 7, ///< Temporary flag to mark the image as picked // Garbage Collection Flags - BadOverlap = 1 << 8, ///< This image overlaps other but doesn't fit, has higher - ///< garbage collection priority - Alias = 1 << 9, ///< This image has aliases and has priority on garbage - ///< collection + BadOverlap = 1 << 8, ///< This image overlaps other but doesn't fit, has higher + ///< garbage collection priority + Alias = 1 << 9, ///< This image has aliases and has priority on garbage + ///< collection }; DECLARE_ENUM_FLAG_OPERATORS(ImageFlagBits) diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index cf48f7b02..8685f4418 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -22,6 +22,7 @@ #include "common/common_funcs.h" #include "common/common_types.h" #include "common/logging/log.h" +#include "common/settings.h" #include "video_core/compatible_formats.h" #include "video_core/delayed_destruction_ring.h" #include "video_core/dirty_flags.h" @@ -384,6 +385,15 @@ TextureCache

::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& template void TextureCache

::TickFrame() { + const bool enabled_gc = Settings::values.use_caches_gc.GetValue(); + if (!enabled_gc) { + // @Note(Blinkhawk): compile error with SCOPE_EXIT on msvc. + sentenced_images.Tick(); + sentenced_framebuffers.Tick(); + sentenced_image_view.Tick(); + ++frame_tick; + return; + } const bool high_priority_mode = total_used_memory >= expected_memory; const bool aggressive_mode = total_used_memory >= critical_memory; const u64 ticks_to_destroy = high_priority_mode ? 60 : 100; @@ -397,22 +407,20 @@ void TextureCache

::TickFrame() { } const auto [image_id, image] = *deletion_iterator; const bool is_alias = True(image->flags & ImageFlagBits::Alias); - if (is_alias && image->aliased_images.size() <= 1) { - ++deletion_iterator; - continue; - } const bool is_bad_overlap = True(image->flags & ImageFlagBits::BadOverlap); const bool must_download = image->IsSafeDownload(); - const u64 ticks_needed = is_bad_overlap ? ticks_to_destroy >> 4 : ticks_to_destroy; - const bool should_care = - aggressive_mode || is_bad_overlap || is_alias || (high_priority_mode && !must_download); + bool should_care = is_bad_overlap || is_alias || (high_priority_mode && !must_download); + const u64 ticks_needed = + is_bad_overlap + ? ticks_to_destroy >> 4 + : ((should_care && aggressive_mode) ? ticks_to_destroy >> 1 : ticks_to_destroy); + should_care |= aggressive_mode; if (should_care && image->frame_tick + ticks_needed < frame_tick) { if (is_bad_overlap) { const bool overlap_check = std::ranges::all_of(image->overlapping_images, [&](const ImageId& overlap_id) { auto& overlap = slot_images[overlap_id]; - return (overlap.frame_tick >= image->frame_tick) && - (overlap.modification_tick > image->modification_tick); + return overlap.frame_tick >= image->frame_tick; }); if (!overlap_check) { ++deletion_iterator; @@ -420,23 +428,20 @@ void TextureCache

::TickFrame() { } } if (!is_bad_overlap && must_download) { - if (is_alias) { - const bool alias_check = - std::ranges::all_of(image->aliased_images, [&](const AliasedImage& alias) { - auto& alias_image = slot_images[alias.id]; - return (alias_image.frame_tick >= image->frame_tick) && - (alias_image.modification_tick > image->modification_tick); - }); - if (!alias_check) { - ++deletion_iterator; - continue; - } + const bool alias_check = + std::ranges::none_of(image->aliased_images, [&](const AliasedImage& alias) { + auto& alias_image = slot_images[alias.id]; + return (alias_image.frame_tick < image->frame_tick) || + (alias_image.modification_tick < image->modification_tick); + }); + + if (alias_check) { + auto map = runtime.DownloadStagingBuffer(image->unswizzled_size_bytes); + const auto copies = FullDownloadCopies(image->info); + image->DownloadMemory(map, copies); + runtime.Finish(); + SwizzleImage(gpu_memory, image->gpu_addr, image->info, copies, map.mapped_span); } - auto map = runtime.DownloadStagingBuffer(image->unswizzled_size_bytes); - const auto copies = FullDownloadCopies(image->info); - image->DownloadMemory(map, copies); - runtime.Finish(); - SwizzleImage(gpu_memory, image->gpu_addr, image->info, copies, map.mapped_span); } if (True(image->flags & ImageFlagBits::Tracked)) { UntrackImage(*image); diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp index 916a22724..57843ac5a 100644 --- a/src/yuzu/configuration/config.cpp +++ b/src/yuzu/configuration/config.cpp @@ -817,6 +817,7 @@ void Config::ReadRendererValues() { QStringLiteral("use_asynchronous_shaders"), false); ReadSettingGlobal(Settings::values.use_fast_gpu_time, QStringLiteral("use_fast_gpu_time"), true); + ReadSettingGlobal(Settings::values.use_caches_gc, QStringLiteral("use_caches_gc"), false); ReadSettingGlobal(Settings::values.bg_red, QStringLiteral("bg_red"), 0.0); ReadSettingGlobal(Settings::values.bg_green, QStringLiteral("bg_green"), 0.0); ReadSettingGlobal(Settings::values.bg_blue, QStringLiteral("bg_blue"), 0.0); @@ -1401,6 +1402,7 @@ void Config::SaveRendererValues() { Settings::values.use_asynchronous_shaders, false); WriteSettingGlobal(QStringLiteral("use_fast_gpu_time"), Settings::values.use_fast_gpu_time, true); + WriteSettingGlobal(QStringLiteral("use_caches_gc"), Settings::values.use_caches_gc, false); // Cast to double because Qt's written float values are not human-readable WriteSettingGlobal(QStringLiteral("bg_red"), Settings::values.bg_red, 0.0); WriteSettingGlobal(QStringLiteral("bg_green"), Settings::values.bg_green, 0.0); diff --git a/src/yuzu/configuration/configure_graphics_advanced.cpp b/src/yuzu/configuration/configure_graphics_advanced.cpp index 35bf9c6be..a9e611125 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.cpp +++ b/src/yuzu/configuration/configure_graphics_advanced.cpp @@ -30,6 +30,7 @@ void ConfigureGraphicsAdvanced::SetConfiguration() { ui->use_vsync->setChecked(Settings::values.use_vsync.GetValue()); ui->use_assembly_shaders->setChecked(Settings::values.use_assembly_shaders.GetValue()); ui->use_asynchronous_shaders->setChecked(Settings::values.use_asynchronous_shaders.GetValue()); + ui->use_caches_gc->setChecked(Settings::values.use_caches_gc.GetValue()); ui->use_fast_gpu_time->setChecked(Settings::values.use_fast_gpu_time.GetValue()); if (Settings::IsConfiguringGlobal()) { @@ -62,6 +63,8 @@ void ConfigureGraphicsAdvanced::ApplyConfiguration() { ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_asynchronous_shaders, ui->use_asynchronous_shaders, use_asynchronous_shaders); + ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_caches_gc, ui->use_caches_gc, + use_caches_gc); ConfigurationShared::ApplyPerGameSetting(&Settings::values.use_fast_gpu_time, ui->use_fast_gpu_time, use_fast_gpu_time); @@ -101,6 +104,7 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() { ui->use_asynchronous_shaders->setEnabled( Settings::values.use_asynchronous_shaders.UsingGlobal()); ui->use_fast_gpu_time->setEnabled(Settings::values.use_fast_gpu_time.UsingGlobal()); + ui->use_caches_gc->setEnabled(Settings::values.use_caches_gc.UsingGlobal()); ui->anisotropic_filtering_combobox->setEnabled( Settings::values.max_anisotropy.UsingGlobal()); @@ -115,6 +119,8 @@ void ConfigureGraphicsAdvanced::SetupPerGameUI() { use_asynchronous_shaders); ConfigurationShared::SetColoredTristate(ui->use_fast_gpu_time, Settings::values.use_fast_gpu_time, use_fast_gpu_time); + ConfigurationShared::SetColoredTristate(ui->use_caches_gc, Settings::values.use_caches_gc, + use_caches_gc); ConfigurationShared::SetColoredComboBox( ui->gpu_accuracy, ui->label_gpu_accuracy, static_cast(Settings::values.gpu_accuracy.GetValue(true))); diff --git a/src/yuzu/configuration/configure_graphics_advanced.h b/src/yuzu/configuration/configure_graphics_advanced.h index e61b571c7..9148aacf2 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.h +++ b/src/yuzu/configuration/configure_graphics_advanced.h @@ -38,4 +38,5 @@ private: ConfigurationShared::CheckState use_assembly_shaders; ConfigurationShared::CheckState use_asynchronous_shaders; ConfigurationShared::CheckState use_fast_gpu_time; + ConfigurationShared::CheckState use_caches_gc; }; diff --git a/src/yuzu/configuration/configure_graphics_advanced.ui b/src/yuzu/configuration/configure_graphics_advanced.ui index 846a30586..3566e9bfa 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.ui +++ b/src/yuzu/configuration/configure_graphics_advanced.ui @@ -103,6 +103,16 @@ + + + + Enables garbage collection for the GPU caches, this will try to keep VRAM within 3-4Gb and flush least used textures/buffers. This option may be unsafe on a few games + + + Enable GPU caches garbage collection (unsafe) + + + diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h index efa1b1d18..839919062 100644 --- a/src/yuzu_cmd/default_ini.h +++ b/src/yuzu_cmd/default_ini.h @@ -227,6 +227,10 @@ use_asynchronous_gpu_emulation = # 0: Off, 1 (default): On use_vsync = +# Whether to use garbage collection or not. +# 0 (default): Off, 1: On +use_caches_gc = + # The clear color for the renderer. What shows up on the sides of the bottom screen. # Must be in range of 0.0-1.0. Defaults to 1.0 for all. bg_red = From 0dd98842bf87bdd0735d187f8d183ef7593ad747 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Mon, 14 Jun 2021 13:42:22 +0200 Subject: [PATCH 05/10] Reaper: Address Feedback. --- src/common/common_sizes.h | 1 + src/video_core/buffer_cache/buffer_cache.h | 13 ++++--- src/video_core/surface.cpp | 7 ++++ src/video_core/surface.h | 2 + src/video_core/texture_cache/texture_cache.h | 38 ++++++++++++------- .../configure_graphics_advanced.ui | 2 +- 6 files changed, 43 insertions(+), 20 deletions(-) diff --git a/src/common/common_sizes.h b/src/common/common_sizes.h index 7e9fd968b..d07b7ee5a 100644 --- a/src/common/common_sizes.h +++ b/src/common/common_sizes.h @@ -24,6 +24,7 @@ enum : u64 { Size_128_MB = 128ULL * Size_1_MB, Size_448_MB = 448ULL * Size_1_MB, Size_507_MB = 507ULL * Size_1_MB, + Size_512_MB = 512ULL * Size_1_MB, Size_562_MB = 562ULL * Size_1_MB, Size_1554_MB = 1554ULL * Size_1_MB, Size_2048_MB = 2048ULL * Size_1_MB, diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index a8fb21d92..82a4a10d6 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -16,6 +16,7 @@ #include +#include "common/common_sizes.h" #include "common/common_types.h" #include "common/div_ceil.h" #include "common/microprofile.h" @@ -65,8 +66,8 @@ class BufferCache { static constexpr BufferId NULL_BUFFER_ID{0}; - static constexpr u64 expected_memory = 512ULL * 1024ULL * 1024ULL; - static constexpr u64 critical_memory = 1024ULL * 1024ULL * 1024ULL; + static constexpr u64 EXPECTED_MEMORY = Common::Size_512_MB; + static constexpr u64 CRITICAL_MEMORY = Common::Size_1_GB; using Maxwell = Tegra::Engines::Maxwell3D::Regs; @@ -368,13 +369,13 @@ void BufferCache

::TickFrame() { const bool skip_preferred = hits * 256 < shots * 251; uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0; - const bool activate_gc = enabled_gc && total_used_memory >= expected_memory; + const bool activate_gc = enabled_gc && total_used_memory >= EXPECTED_MEMORY; if (!activate_gc) { return; } - const bool agressive_gc = total_used_memory >= critical_memory; - const u64 ticks_to_destroy = agressive_gc ? 60 : 120; - int num_iterations = agressive_gc ? 64 : 32; + const bool aggressive_gc = total_used_memory >= CRITICAL_MEMORY; + const u64 ticks_to_destroy = aggressive_gc ? 60 : 120; + int num_iterations = aggressive_gc ? 64 : 32; for (; num_iterations > 0; --num_iterations) { if (deletion_iterator == slot_buffers.end()) { deletion_iterator = slot_buffers.begin(); diff --git a/src/video_core/surface.cpp b/src/video_core/surface.cpp index 6308aef94..eb1746265 100644 --- a/src/video_core/surface.cpp +++ b/src/video_core/surface.cpp @@ -283,4 +283,11 @@ std::pair GetASTCBlockSize(PixelFormat format) { return {DefaultBlockWidth(format), DefaultBlockHeight(format)}; } +u64 EstimatedDecompressedSize(u64 base_size, PixelFormat format) { + constexpr u64 RGBA8_PIXEL_SIZE = 4; + const u64 base_block_size = static_cast(DefaultBlockWidth(format)) * + static_cast(DefaultBlockHeight(format)) * RGBA8_PIXEL_SIZE; + return (base_size * base_block_size) / BytesPerBlock(format); +} + } // namespace VideoCore::Surface diff --git a/src/video_core/surface.h b/src/video_core/surface.h index c40ab89d0..1503db81f 100644 --- a/src/video_core/surface.h +++ b/src/video_core/surface.h @@ -462,4 +462,6 @@ bool IsPixelFormatSRGB(PixelFormat format); std::pair GetASTCBlockSize(PixelFormat format); +u64 EstimatedDecompressedSize(u64 base_size, PixelFormat format); + } // namespace VideoCore::Surface diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 8685f4418..8ff6f4e01 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -20,6 +20,7 @@ #include "common/alignment.h" #include "common/common_funcs.h" +#include "common/common_sizes.h" #include "common/common_types.h" #include "common/logging/log.h" #include "common/settings.h" @@ -76,8 +77,8 @@ class TextureCache { /// Sampler ID for bugged sampler ids static constexpr SamplerId NULL_SAMPLER_ID{0}; - static constexpr u64 expected_memory = 1024ULL * 1024ULL * 1024ULL; - static constexpr u64 critical_memory = 2 * 1024ULL * 1024ULL * 1024ULL; + static constexpr u64 EXPECTED_MEMORY = Common::Size_1_GB; + static constexpr u64 CRITICAL_MEMORY = Common::Size_2_GB; using Runtime = typename P::Runtime; using Image = typename P::Image; @@ -394,8 +395,8 @@ void TextureCache

::TickFrame() { ++frame_tick; return; } - const bool high_priority_mode = total_used_memory >= expected_memory; - const bool aggressive_mode = total_used_memory >= critical_memory; + const bool high_priority_mode = total_used_memory >= EXPECTED_MEMORY; + const bool aggressive_mode = total_used_memory >= CRITICAL_MEMORY; const u64 ticks_to_destroy = high_priority_mode ? 60 : 100; int num_iterations = aggressive_mode ? 256 : (high_priority_mode ? 128 : 64); for (; num_iterations > 0; --num_iterations) { @@ -405,7 +406,8 @@ void TextureCache

::TickFrame() { break; } } - const auto [image_id, image] = *deletion_iterator; + auto [image_id, image_tmp] = *deletion_iterator; + Image* image = image_tmp; // fix clang error. const bool is_alias = True(image->flags & ImageFlagBits::Alias); const bool is_bad_overlap = True(image->flags & ImageFlagBits::BadOverlap); const bool must_download = image->IsSafeDownload(); @@ -417,8 +419,8 @@ void TextureCache

::TickFrame() { should_care |= aggressive_mode; if (should_care && image->frame_tick + ticks_needed < frame_tick) { if (is_bad_overlap) { - const bool overlap_check = - std::ranges::all_of(image->overlapping_images, [&](const ImageId& overlap_id) { + const bool overlap_check = std::ranges::all_of( + image->overlapping_images, [&, image](const ImageId& overlap_id) { auto& overlap = slot_images[overlap_id]; return overlap.frame_tick >= image->frame_tick; }); @@ -428,8 +430,8 @@ void TextureCache

::TickFrame() { } } if (!is_bad_overlap && must_download) { - const bool alias_check = - std::ranges::none_of(image->aliased_images, [&](const AliasedImage& alias) { + const bool alias_check = std::ranges::none_of( + image->aliased_images, [&, image](const AliasedImage& alias) { auto& alias_image = slot_images[alias.id]; return (alias_image.frame_tick < image->frame_tick) || (alias_image.modification_tick < image->modification_tick); @@ -1275,8 +1277,13 @@ void TextureCache

::RegisterImage(ImageId image_id) { image.flags |= ImageFlagBits::Registered; ForEachPage(image.cpu_addr, image.guest_size_bytes, [this, image_id](u64 page) { page_table[page].push_back(image_id); }); - total_used_memory += - Common::AlignUp(std::max(image.guest_size_bytes, image.unswizzled_size_bytes), 1024); + u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes); + if ((IsPixelFormatASTC(image.info.format) && + True(image.flags & ImageFlagBits::AcceleratedUpload)) || + True(image.flags & ImageFlagBits::Converted)) { + tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); + } + total_used_memory += Common::AlignUp(tentative_size, 1024); } template @@ -1286,8 +1293,13 @@ void TextureCache

::UnregisterImage(ImageId image_id) { "Trying to unregister an already registered image"); image.flags &= ~ImageFlagBits::Registered; image.flags &= ~ImageFlagBits::BadOverlap; - total_used_memory -= - Common::AlignUp(std::max(image.guest_size_bytes, image.unswizzled_size_bytes), 1024); + u64 tentative_size = std::max(image.guest_size_bytes, image.unswizzled_size_bytes); + if ((IsPixelFormatASTC(image.info.format) && + True(image.flags & ImageFlagBits::AcceleratedUpload)) || + True(image.flags & ImageFlagBits::Converted)) { + tentative_size = EstimatedDecompressedSize(tentative_size, image.info.format); + } + total_used_memory -= Common::AlignUp(tentative_size, 1024); ForEachPage(image.cpu_addr, image.guest_size_bytes, [this, image_id](u64 page) { const auto page_it = page_table.find(page); if (page_it == page_table.end()) { diff --git a/src/yuzu/configuration/configure_graphics_advanced.ui b/src/yuzu/configuration/configure_graphics_advanced.ui index 3566e9bfa..4bab3d074 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.ui +++ b/src/yuzu/configuration/configure_graphics_advanced.ui @@ -106,7 +106,7 @@ - Enables garbage collection for the GPU caches, this will try to keep VRAM within 3-4Gb and flush least used textures/buffers. This option may be unsafe on a few games + Enables garbage collection for the GPU caches, this will try to keep VRAM within 3-4 GB by flushing the least used textures/buffers. May cause issues in a few games. Enable GPU caches garbage collection (unsafe) From ca6f47c6862a24dfa78f3d25c8b7819636218cdd Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Thu, 17 Jun 2021 00:29:48 +0200 Subject: [PATCH 06/10] Reaper: Change memory restrictions on TC depending on host memory on VK. --- src/video_core/buffer_cache/buffer_cache.h | 48 ++++++++++--------- .../renderer_opengl/gl_texture_cache.h | 1 + .../renderer_vulkan/vk_texture_cache.cpp | 4 ++ .../renderer_vulkan/vk_texture_cache.h | 3 ++ src/video_core/texture_cache/slot_vector.h | 2 +- src/video_core/texture_cache/texture_cache.h | 46 ++++++++++++------ .../vulkan_common/vulkan_device.cpp | 14 ++++++ src/video_core/vulkan_common/vulkan_device.h | 9 ++++ .../configure_graphics_advanced.ui | 2 +- src/yuzu_cmd/default_ini.h | 2 +- 10 files changed, 90 insertions(+), 41 deletions(-) diff --git a/src/video_core/buffer_cache/buffer_cache.h b/src/video_core/buffer_cache/buffer_cache.h index 82a4a10d6..6d04d00da 100644 --- a/src/video_core/buffer_cache/buffer_cache.h +++ b/src/video_core/buffer_cache/buffer_cache.h @@ -106,6 +106,8 @@ public: void TickFrame(); + void RunGarbageCollector(); + void WriteMemory(VAddr cpu_addr, u64 size); void CachedWriteMemory(VAddr cpu_addr, u64 size); @@ -350,29 +352,7 @@ BufferCache

::BufferCache(VideoCore::RasterizerInterface& rasterizer_, } template -void BufferCache

::TickFrame() { - const bool enabled_gc = Settings::values.use_caches_gc.GetValue(); - SCOPE_EXIT({ - ++frame_tick; - delayed_destruction_ring.Tick(); - }); - // Calculate hits and shots and move hit bits to the right - const u32 hits = std::reduce(uniform_cache_hits.begin(), uniform_cache_hits.end()); - const u32 shots = std::reduce(uniform_cache_shots.begin(), uniform_cache_shots.end()); - std::copy_n(uniform_cache_hits.begin(), uniform_cache_hits.size() - 1, - uniform_cache_hits.begin() + 1); - std::copy_n(uniform_cache_shots.begin(), uniform_cache_shots.size() - 1, - uniform_cache_shots.begin() + 1); - uniform_cache_hits[0] = 0; - uniform_cache_shots[0] = 0; - - const bool skip_preferred = hits * 256 < shots * 251; - uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0; - - const bool activate_gc = enabled_gc && total_used_memory >= EXPECTED_MEMORY; - if (!activate_gc) { - return; - } +void BufferCache

::RunGarbageCollector() { const bool aggressive_gc = total_used_memory >= CRITICAL_MEMORY; const u64 ticks_to_destroy = aggressive_gc ? 60 : 120; int num_iterations = aggressive_gc ? 64 : 32; @@ -392,6 +372,28 @@ void BufferCache

::TickFrame() { } } +template +void BufferCache

::TickFrame() { + // Calculate hits and shots and move hit bits to the right + const u32 hits = std::reduce(uniform_cache_hits.begin(), uniform_cache_hits.end()); + const u32 shots = std::reduce(uniform_cache_shots.begin(), uniform_cache_shots.end()); + std::copy_n(uniform_cache_hits.begin(), uniform_cache_hits.size() - 1, + uniform_cache_hits.begin() + 1); + std::copy_n(uniform_cache_shots.begin(), uniform_cache_shots.size() - 1, + uniform_cache_shots.begin() + 1); + uniform_cache_hits[0] = 0; + uniform_cache_shots[0] = 0; + + const bool skip_preferred = hits * 256 < shots * 251; + uniform_buffer_skip_cache_size = skip_preferred ? DEFAULT_SKIP_CACHE_SIZE : 0; + + if (Settings::values.use_caches_gc.GetValue() && total_used_memory >= EXPECTED_MEMORY) { + RunGarbageCollector(); + } + ++frame_tick; + delayed_destruction_ring.Tick(); +} + template void BufferCache

::WriteMemory(VAddr cpu_addr, u64 size) { ForEachBufferInRange(cpu_addr, size, [&](BufferId, Buffer& buffer) { diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index df8be12ff..12c619aca 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -235,6 +235,7 @@ struct TextureCacheParams { static constexpr bool ENABLE_VALIDATION = true; static constexpr bool FRAMEBUFFER_BLITS = true; static constexpr bool HAS_EMULATED_COPIES = true; + static constexpr bool HAS_DEVICE_MEMORY_INFO = false; using Runtime = OpenGL::TextureCacheRuntime; using Image = OpenGL::Image; diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index 52860b4cf..e8ef6f5c3 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -818,6 +818,10 @@ void TextureCacheRuntime::CopyImage(Image& dst, Image& src, }); } +u64 TextureCacheRuntime::GetDeviceLocalMemory() const { + return device.GetDeviceLocalMemory(); +} + Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_addr_, VAddr cpu_addr_) : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), scheduler{&runtime.scheduler}, diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index 4a57d378b..d392f721b 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -97,6 +97,8 @@ struct TextureCacheRuntime { // All known Vulkan drivers can natively handle BGR textures return true; } + + u64 GetDeviceLocalMemory() const; }; class Image : public VideoCommon::ImageBase { @@ -257,6 +259,7 @@ struct TextureCacheParams { static constexpr bool ENABLE_VALIDATION = true; static constexpr bool FRAMEBUFFER_BLITS = false; static constexpr bool HAS_EMULATED_COPIES = false; + static constexpr bool HAS_DEVICE_MEMORY_INFO = true; using Runtime = Vulkan::TextureCacheRuntime; using Image = Vulkan::Image; diff --git a/src/video_core/texture_cache/slot_vector.h b/src/video_core/texture_cache/slot_vector.h index 1259e8263..6180b8c0e 100644 --- a/src/video_core/texture_cache/slot_vector.h +++ b/src/video_core/texture_cache/slot_vector.h @@ -79,7 +79,7 @@ public: Iterator(SlotVector* slot_vector_, SlotId id_) noexcept : slot_vector{slot_vector_}, id{id_} {} - bool IsValid(const u64* bitset) noexcept { + bool IsValid(const u64* bitset) const noexcept { return ((bitset[id.index / 64] >> (id.index % 64)) & 1) != 0; } diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 8ff6f4e01..64b576cbc 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -71,14 +71,16 @@ class TextureCache { static constexpr bool FRAMEBUFFER_BLITS = P::FRAMEBUFFER_BLITS; /// True when some copies have to be emulated static constexpr bool HAS_EMULATED_COPIES = P::HAS_EMULATED_COPIES; + /// True when the API can provide info about the memory of the device. + static constexpr bool HAS_DEVICE_MEMORY_INFO = P::HAS_DEVICE_MEMORY_INFO; /// Image view ID for null descriptors static constexpr ImageViewId NULL_IMAGE_VIEW_ID{0}; /// Sampler ID for bugged sampler ids static constexpr SamplerId NULL_SAMPLER_ID{0}; - static constexpr u64 EXPECTED_MEMORY = Common::Size_1_GB; - static constexpr u64 CRITICAL_MEMORY = Common::Size_2_GB; + static constexpr u64 DEFAULT_EXPECTED_MEMORY = Common::Size_1_GB; + static constexpr u64 DEFAULT_CRITICAL_MEMORY = Common::Size_2_GB; using Runtime = typename P::Runtime; using Image = typename P::Image; @@ -108,6 +110,9 @@ public: /// Notify the cache that a new frame has been queued void TickFrame(); + /// Runs the Garbage Collector. + void RunGarbageCollector(); + /// Return a constant reference to the given image view id [[nodiscard]] const ImageView& GetImageView(ImageViewId id) const noexcept; @@ -339,6 +344,8 @@ private: bool has_deleted_images = false; u64 total_used_memory = 0; + u64 expected_memory; + u64 critical_memory; SlotVector slot_images; SlotVector slot_image_views; @@ -382,21 +389,23 @@ TextureCache

::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& void(slot_samplers.insert(runtime, sampler_descriptor)); deletion_iterator = slot_images.begin(); + + if constexpr (HAS_DEVICE_MEMORY_INFO) { + const auto device_memory = runtime.GetDeviceLocalMemory(); + const u64 possible_expected_memory = (device_memory * 3) / 10; + const u64 possible_critical_memory = (device_memory * 6) / 10; + expected_memory = std::max(possible_expected_memory, DEFAULT_EXPECTED_MEMORY); + critical_memory = std::max(possible_critical_memory, DEFAULT_CRITICAL_MEMORY); + } else { + expected_memory = DEFAULT_EXPECTED_MEMORY; + critical_memory = DEFAULT_CRITICAL_MEMORY; + } } template -void TextureCache

::TickFrame() { - const bool enabled_gc = Settings::values.use_caches_gc.GetValue(); - if (!enabled_gc) { - // @Note(Blinkhawk): compile error with SCOPE_EXIT on msvc. - sentenced_images.Tick(); - sentenced_framebuffers.Tick(); - sentenced_image_view.Tick(); - ++frame_tick; - return; - } - const bool high_priority_mode = total_used_memory >= EXPECTED_MEMORY; - const bool aggressive_mode = total_used_memory >= CRITICAL_MEMORY; +void TextureCache

::RunGarbageCollector() { + const bool high_priority_mode = total_used_memory >= expected_memory; + const bool aggressive_mode = total_used_memory >= critical_memory; const u64 ticks_to_destroy = high_priority_mode ? 60 : 100; int num_iterations = aggressive_mode ? 256 : (high_priority_mode ? 128 : 64); for (; num_iterations > 0; --num_iterations) { @@ -451,11 +460,18 @@ void TextureCache

::TickFrame() { UnregisterImage(image_id); DeleteImage(image_id); if (is_bad_overlap) { - num_iterations++; + ++num_iterations; } } ++deletion_iterator; } +} + +template +void TextureCache

::TickFrame() { + if (Settings::values.use_caches_gc.GetValue()) { + RunGarbageCollector(); + } sentenced_images.Tick(); sentenced_framebuffers.Tick(); sentenced_image_view.Tick(); diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 64206b3d2..724a0141c 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -408,6 +408,7 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR } logical = vk::Device::Create(physical, queue_cis, extensions, first_next, dld); + CollectPhysicalMemoryInfo(); CollectTelemetryParameters(); CollectToolingInfo(); @@ -818,6 +819,19 @@ void Device::CollectTelemetryParameters() { } } +void Device::CollectPhysicalMemoryInfo() { + const auto mem_properties = physical.GetMemoryProperties(); + const std::size_t num_properties = mem_properties.memoryTypeCount; + device_access_memory = 0; + for (std::size_t element = 0; element < num_properties; element++) { + if ((mem_properties.memoryTypes[element].propertyFlags & + VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) != 0) { + const std::size_t heap_index = mem_properties.memoryTypes[element].heapIndex; + device_access_memory += mem_properties.memoryHeaps[heap_index].size; + } + } +} + void Device::CollectToolingInfo() { if (!ext_tooling_info) { return; diff --git a/src/video_core/vulkan_common/vulkan_device.h b/src/video_core/vulkan_common/vulkan_device.h index 67d70cd22..a1aba973b 100644 --- a/src/video_core/vulkan_common/vulkan_device.h +++ b/src/video_core/vulkan_common/vulkan_device.h @@ -225,6 +225,10 @@ public: return use_asynchronous_shaders; } + u64 GetDeviceLocalMemory() const { + return device_access_memory; + } + private: /// Checks if the physical device is suitable. void CheckSuitability(bool requires_swapchain) const; @@ -244,6 +248,9 @@ private: /// Collects information about attached tools. void CollectToolingInfo(); + /// Collects information about the device's local memory. + void CollectPhysicalMemoryInfo(); + /// Returns a list of queue initialization descriptors. std::vector GetDeviceQueueCreateInfos() const; @@ -302,6 +309,8 @@ private: /// Nsight Aftermath GPU crash tracker std::unique_ptr nsight_aftermath_tracker; + + u64 device_access_memory; }; } // namespace Vulkan diff --git a/src/yuzu/configuration/configure_graphics_advanced.ui b/src/yuzu/configuration/configure_graphics_advanced.ui index 4bab3d074..eaf55c517 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.ui +++ b/src/yuzu/configuration/configure_graphics_advanced.ui @@ -109,7 +109,7 @@ Enables garbage collection for the GPU caches, this will try to keep VRAM within 3-4 GB by flushing the least used textures/buffers. May cause issues in a few games. - Enable GPU caches garbage collection (unsafe) + Enable GPU cache garbage collection (unsafe) diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h index 839919062..f0a0ec398 100644 --- a/src/yuzu_cmd/default_ini.h +++ b/src/yuzu_cmd/default_ini.h @@ -227,7 +227,7 @@ use_asynchronous_gpu_emulation = # 0: Off, 1 (default): On use_vsync = -# Whether to use garbage collection or not. +# Whether to use garbage collection or not for GPU caches. # 0 (default): Off, 1: On use_caches_gc = From 719a6dd5a16ed08df392af695dfc08b0f5e1f00f Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Thu, 17 Jun 2021 08:48:41 +0200 Subject: [PATCH 07/10] Reaper: Correct size calculation on Vulkan. --- src/video_core/vulkan_common/vulkan_device.cpp | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/src/video_core/vulkan_common/vulkan_device.cpp b/src/video_core/vulkan_common/vulkan_device.cpp index 724a0141c..707a8b8fb 100644 --- a/src/video_core/vulkan_common/vulkan_device.cpp +++ b/src/video_core/vulkan_common/vulkan_device.cpp @@ -821,13 +821,11 @@ void Device::CollectTelemetryParameters() { void Device::CollectPhysicalMemoryInfo() { const auto mem_properties = physical.GetMemoryProperties(); - const std::size_t num_properties = mem_properties.memoryTypeCount; + const std::size_t num_properties = mem_properties.memoryHeapCount; device_access_memory = 0; for (std::size_t element = 0; element < num_properties; element++) { - if ((mem_properties.memoryTypes[element].propertyFlags & - VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) != 0) { - const std::size_t heap_index = mem_properties.memoryTypes[element].heapIndex; - device_access_memory += mem_properties.memoryHeaps[heap_index].size; + if ((mem_properties.memoryHeaps[element].flags & VK_MEMORY_HEAP_DEVICE_LOCAL_BIT) != 0) { + device_access_memory += mem_properties.memoryHeaps[element].size; } } } From 865dd615ca05be7599dd9f866daff670877a27c3 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sun, 20 Jun 2021 12:35:19 +0200 Subject: [PATCH 08/10] Reaper: Upgrade label from unsafe to experimental as no regressions are known now. --- src/yuzu/configuration/configure_graphics_advanced.ui | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/yuzu/configuration/configure_graphics_advanced.ui b/src/yuzu/configuration/configure_graphics_advanced.ui index eaf55c517..ad0840355 100644 --- a/src/yuzu/configuration/configure_graphics_advanced.ui +++ b/src/yuzu/configuration/configure_graphics_advanced.ui @@ -109,7 +109,7 @@ Enables garbage collection for the GPU caches, this will try to keep VRAM within 3-4 GB by flushing the least used textures/buffers. May cause issues in a few games. - Enable GPU cache garbage collection (unsafe) + Enable GPU cache garbage collection (experimental) From 569a1962c093319c89079c52b5cb6fff139c8174 Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Sun, 20 Jun 2021 17:07:17 +0200 Subject: [PATCH 09/10] Reaper: Guarantee correct deletion. --- src/video_core/renderer_opengl/gl_texture_cache.cpp | 2 ++ src/video_core/renderer_opengl/gl_texture_cache.h | 8 ++++++++ src/video_core/renderer_vulkan/vk_texture_cache.cpp | 2 ++ src/video_core/renderer_vulkan/vk_texture_cache.h | 8 ++++++++ src/video_core/texture_cache/texture_cache.h | 5 +++-- 5 files changed, 23 insertions(+), 2 deletions(-) diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp index 9b4038615..23948feed 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.cpp +++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp @@ -737,6 +737,8 @@ Image::Image(TextureCacheRuntime& runtime, const VideoCommon::ImageInfo& info_, } } +Image::~Image() = default; + void Image::UploadMemory(const ImageBufferMap& map, std::span copies) { glBindBuffer(GL_PIXEL_UNPACK_BUFFER, map.buffer); diff --git a/src/video_core/renderer_opengl/gl_texture_cache.h b/src/video_core/renderer_opengl/gl_texture_cache.h index 12c619aca..25fe61566 100644 --- a/src/video_core/renderer_opengl/gl_texture_cache.h +++ b/src/video_core/renderer_opengl/gl_texture_cache.h @@ -143,6 +143,14 @@ public: explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr); + ~Image(); + + Image(const Image&) = delete; + Image& operator=(const Image&) = delete; + + Image(Image&&) = default; + Image& operator=(Image&&) = default; + void UploadMemory(const ImageBufferMap& map, std::span copies); diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.cpp b/src/video_core/renderer_vulkan/vk_texture_cache.cpp index e8ef6f5c3..a2ab4d1ee 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_texture_cache.cpp @@ -880,6 +880,8 @@ Image::Image(TextureCacheRuntime& runtime, const ImageInfo& info_, GPUVAddr gpu_ } } +Image::~Image() = default; + void Image::UploadMemory(const StagingBufferRef& map, std::span copies) { // TODO: Move this to another API scheduler->RequestOutsideRenderPassOperationContext(); diff --git a/src/video_core/renderer_vulkan/vk_texture_cache.h b/src/video_core/renderer_vulkan/vk_texture_cache.h index d392f721b..172bcdf98 100644 --- a/src/video_core/renderer_vulkan/vk_texture_cache.h +++ b/src/video_core/renderer_vulkan/vk_texture_cache.h @@ -106,6 +106,14 @@ public: explicit Image(TextureCacheRuntime&, const VideoCommon::ImageInfo& info, GPUVAddr gpu_addr, VAddr cpu_addr); + ~Image(); + + Image(const Image&) = delete; + Image& operator=(const Image&) = delete; + + Image(Image&&) = default; + Image& operator=(Image&&) = default; + void UploadMemory(const StagingBufferRef& map, std::span copies); diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 64b576cbc..6ee654dc1 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -397,8 +397,9 @@ TextureCache

::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& expected_memory = std::max(possible_expected_memory, DEFAULT_EXPECTED_MEMORY); critical_memory = std::max(possible_critical_memory, DEFAULT_CRITICAL_MEMORY); } else { - expected_memory = DEFAULT_EXPECTED_MEMORY; - critical_memory = DEFAULT_CRITICAL_MEMORY; + // on OGL we can be more conservatives as the driver takes care. + expected_memory = DEFAULT_EXPECTED_MEMORY + Common::Size_512_MB; + critical_memory = DEFAULT_CRITICAL_MEMORY + Common::Size_1_GB; } } From f9b940a442d50875d2b45a0f2f380ccad88670da Mon Sep 17 00:00:00 2001 From: Fernando Sahmkow Date: Tue, 22 Jun 2021 22:07:17 +0200 Subject: [PATCH 10/10] Reaper: Set minimum cleaning limit on OGL. --- src/video_core/texture_cache/texture_cache.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h index 6ee654dc1..e7f8478b4 100644 --- a/src/video_core/texture_cache/texture_cache.h +++ b/src/video_core/texture_cache/texture_cache.h @@ -344,6 +344,7 @@ private: bool has_deleted_images = false; u64 total_used_memory = 0; + u64 minimum_memory; u64 expected_memory; u64 critical_memory; @@ -396,10 +397,12 @@ TextureCache

::TextureCache(Runtime& runtime_, VideoCore::RasterizerInterface& const u64 possible_critical_memory = (device_memory * 6) / 10; expected_memory = std::max(possible_expected_memory, DEFAULT_EXPECTED_MEMORY); critical_memory = std::max(possible_critical_memory, DEFAULT_CRITICAL_MEMORY); + minimum_memory = 0; } else { // on OGL we can be more conservatives as the driver takes care. expected_memory = DEFAULT_EXPECTED_MEMORY + Common::Size_512_MB; critical_memory = DEFAULT_CRITICAL_MEMORY + Common::Size_1_GB; + minimum_memory = expected_memory; } } @@ -470,7 +473,7 @@ void TextureCache

::RunGarbageCollector() { template void TextureCache

::TickFrame() { - if (Settings::values.use_caches_gc.GetValue()) { + if (Settings::values.use_caches_gc.GetValue() && total_used_memory > minimum_memory) { RunGarbageCollector(); } sentenced_images.Tick();