diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp index c36ede898..1ba544943 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.cpp @@ -1,3 +1,146 @@ // Copyright 2019 yuzu Emulator Project // Licensed under GPLv2 or any later version // Refer to the license.txt file included. + +#include +#include +#include +#include +#include + +#include "common/assert.h" +#include "common/bit_util.h" +#include "core/core.h" +#include "video_core/renderer_vulkan/declarations.h" +#include "video_core/renderer_vulkan/vk_buffer_cache.h" +#include "video_core/renderer_vulkan/vk_device.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/renderer_vulkan/vk_stream_buffer.h" + +namespace Vulkan { + +namespace { + +const auto BufferUsage = + vk::BufferUsageFlagBits::eVertexBuffer | vk::BufferUsageFlagBits::eIndexBuffer | + vk::BufferUsageFlagBits::eUniformBuffer | vk::BufferUsageFlagBits::eStorageBuffer; + +const auto UploadPipelineStage = + vk::PipelineStageFlagBits::eTransfer | vk::PipelineStageFlagBits::eVertexInput | + vk::PipelineStageFlagBits::eVertexShader | vk::PipelineStageFlagBits::eFragmentShader | + vk::PipelineStageFlagBits::eComputeShader; + +const auto UploadAccessBarriers = + vk::AccessFlagBits::eTransferRead | vk::AccessFlagBits::eShaderRead | + vk::AccessFlagBits::eUniformRead | vk::AccessFlagBits::eVertexAttributeRead | + vk::AccessFlagBits::eIndexRead; + +auto CreateStreamBuffer(const VKDevice& device, VKScheduler& scheduler) { + return std::make_unique(device, scheduler, BufferUsage); +} + +} // Anonymous namespace + +CachedBufferBlock::CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager, + CacheAddr cache_addr, std::size_t size) + : VideoCommon::BufferBlock{cache_addr, size} { + const vk::BufferCreateInfo buffer_ci({}, static_cast(size), + BufferUsage | vk::BufferUsageFlagBits::eTransferSrc | + vk::BufferUsageFlagBits::eTransferDst, + vk::SharingMode::eExclusive, 0, nullptr); + + const auto& dld{device.GetDispatchLoader()}; + const auto dev{device.GetLogical()}; + buffer.handle = dev.createBufferUnique(buffer_ci, nullptr, dld); + buffer.commit = memory_manager.Commit(*buffer.handle, false); +} + +CachedBufferBlock::~CachedBufferBlock() = default; + +VKBufferCache::VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, + const VKDevice& device, VKMemoryManager& memory_manager, + VKScheduler& scheduler, VKStagingBufferPool& staging_pool) + : VideoCommon::BufferCache{rasterizer, system, + CreateStreamBuffer(device, + scheduler)}, + device{device}, memory_manager{memory_manager}, scheduler{scheduler}, staging_pool{ + staging_pool} {} + +VKBufferCache::~VKBufferCache() = default; + +Buffer VKBufferCache::CreateBlock(CacheAddr cache_addr, std::size_t size) { + return std::make_shared(device, memory_manager, cache_addr, size); +} + +const vk::Buffer* VKBufferCache::ToHandle(const Buffer& buffer) { + return buffer->GetHandle(); +} + +const vk::Buffer* VKBufferCache::GetEmptyBuffer(std::size_t size) { + size = std::max(size, std::size_t(4)); + const auto& empty = staging_pool.GetUnusedBuffer(size, false); + scheduler.RequestOutsideRenderPassOperationContext(); + scheduler.Record([size, buffer = *empty.handle](vk::CommandBuffer cmdbuf, auto& dld) { + cmdbuf.fillBuffer(buffer, 0, size, 0, dld); + }); + return &*empty.handle; +} + +void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, + const u8* data) { + const auto& staging = staging_pool.GetUnusedBuffer(size, true); + std::memcpy(staging.commit->Map(size), data, size); + + scheduler.RequestOutsideRenderPassOperationContext(); + scheduler.Record([staging = *staging.handle, buffer = *buffer->GetHandle(), offset, + size](auto cmdbuf, auto& dld) { + cmdbuf.copyBuffer(staging, buffer, {{0, offset, size}}, dld); + cmdbuf.pipelineBarrier( + vk::PipelineStageFlagBits::eTransfer, UploadPipelineStage, {}, {}, + {vk::BufferMemoryBarrier(vk::AccessFlagBits::eTransferWrite, UploadAccessBarriers, + VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, buffer, + offset, size)}, + {}, dld); + }); +} + +void VKBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, + u8* data) { + const auto& staging = staging_pool.GetUnusedBuffer(size, true); + scheduler.RequestOutsideRenderPassOperationContext(); + scheduler.Record([staging = *staging.handle, buffer = *buffer->GetHandle(), offset, + size](auto cmdbuf, auto& dld) { + cmdbuf.pipelineBarrier( + vk::PipelineStageFlagBits::eVertexShader | vk::PipelineStageFlagBits::eFragmentShader | + vk::PipelineStageFlagBits::eComputeShader, + vk::PipelineStageFlagBits::eTransfer, {}, {}, + {vk::BufferMemoryBarrier(vk::AccessFlagBits::eShaderWrite, + vk::AccessFlagBits::eTransferRead, VK_QUEUE_FAMILY_IGNORED, + VK_QUEUE_FAMILY_IGNORED, buffer, offset, size)}, + {}, dld); + cmdbuf.copyBuffer(buffer, staging, {{offset, 0, size}}, dld); + }); + scheduler.Finish(); + + std::memcpy(data, staging.commit->Map(size), size); +} + +void VKBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset, + std::size_t dst_offset, std::size_t size) { + scheduler.RequestOutsideRenderPassOperationContext(); + scheduler.Record([src_buffer = *src->GetHandle(), dst_buffer = *dst->GetHandle(), src_offset, + dst_offset, size](auto cmdbuf, auto& dld) { + cmdbuf.copyBuffer(src_buffer, dst_buffer, {{src_offset, dst_offset, size}}, dld); + cmdbuf.pipelineBarrier( + vk::PipelineStageFlagBits::eTransfer, UploadPipelineStage, {}, {}, + {vk::BufferMemoryBarrier(vk::AccessFlagBits::eTransferRead, + vk::AccessFlagBits::eShaderWrite, VK_QUEUE_FAMILY_IGNORED, + VK_QUEUE_FAMILY_IGNORED, src_buffer, src_offset, size), + vk::BufferMemoryBarrier(vk::AccessFlagBits::eTransferWrite, UploadAccessBarriers, + VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, dst_buffer, + dst_offset, size)}, + {}, dld); + }); +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_buffer_cache.h b/src/video_core/renderer_vulkan/vk_buffer_cache.h index bc6e584cf..3f38eed0c 100644 --- a/src/video_core/renderer_vulkan/vk_buffer_cache.h +++ b/src/video_core/renderer_vulkan/vk_buffer_cache.h @@ -3,3 +3,76 @@ // Refer to the license.txt file included. #pragma once + +#include +#include +#include + +#include "common/common_types.h" +#include "video_core/buffer_cache/buffer_cache.h" +#include "video_core/rasterizer_cache.h" +#include "video_core/renderer_vulkan/declarations.h" +#include "video_core/renderer_vulkan/vk_memory_manager.h" +#include "video_core/renderer_vulkan/vk_resource_manager.h" +#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" +#include "video_core/renderer_vulkan/vk_stream_buffer.h" + +namespace Core { +class System; +} + +namespace Vulkan { + +class VKDevice; +class VKMemoryManager; +class VKScheduler; + +class CachedBufferBlock final : public VideoCommon::BufferBlock { +public: + explicit CachedBufferBlock(const VKDevice& device, VKMemoryManager& memory_manager, + CacheAddr cache_addr, std::size_t size); + ~CachedBufferBlock(); + + const vk::Buffer* GetHandle() const { + return &*buffer.handle; + } + +private: + VKBuffer buffer; +}; + +using Buffer = std::shared_ptr; + +class VKBufferCache final : public VideoCommon::BufferCache { +public: + explicit VKBufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, + const VKDevice& device, VKMemoryManager& memory_manager, + VKScheduler& scheduler, VKStagingBufferPool& staging_pool); + ~VKBufferCache(); + + const vk::Buffer* GetEmptyBuffer(std::size_t size) override; + +protected: + void WriteBarrier() override {} + + Buffer CreateBlock(CacheAddr cache_addr, std::size_t size) override; + + const vk::Buffer* ToHandle(const Buffer& buffer) override; + + void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, + const u8* data) override; + + void DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, + u8* data) override; + + void CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset, + std::size_t dst_offset, std::size_t size) override; + +private: + const VKDevice& device; + VKMemoryManager& memory_manager; + VKScheduler& scheduler; + VKStagingBufferPool& staging_pool; +}; + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp index 62f1427f5..d48d3b44c 100644 --- a/src/video_core/renderer_vulkan/vk_stream_buffer.cpp +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.cpp @@ -3,86 +3,144 @@ // Refer to the license.txt file included. #include -#include #include +#include #include +#include "common/alignment.h" #include "common/assert.h" #include "video_core/renderer_vulkan/declarations.h" #include "video_core/renderer_vulkan/vk_device.h" -#include "video_core/renderer_vulkan/vk_memory_manager.h" #include "video_core/renderer_vulkan/vk_resource_manager.h" #include "video_core/renderer_vulkan/vk_scheduler.h" #include "video_core/renderer_vulkan/vk_stream_buffer.h" namespace Vulkan { +namespace { + constexpr u64 WATCHES_INITIAL_RESERVE = 0x4000; constexpr u64 WATCHES_RESERVE_CHUNK = 0x1000; -VKStreamBuffer::VKStreamBuffer(const VKDevice& device, VKMemoryManager& memory_manager, - VKScheduler& scheduler, u64 size, vk::BufferUsageFlags usage, - vk::AccessFlags access, vk::PipelineStageFlags pipeline_stage) - : device{device}, scheduler{scheduler}, buffer_size{size}, access{access}, pipeline_stage{ - pipeline_stage} { - CreateBuffers(memory_manager, usage); - ReserveWatches(WATCHES_INITIAL_RESERVE); +constexpr u64 STREAM_BUFFER_SIZE = 256 * 1024 * 1024; + +std::optional FindMemoryType(const VKDevice& device, u32 filter, + vk::MemoryPropertyFlags wanted) { + const auto properties = device.GetPhysical().getMemoryProperties(device.GetDispatchLoader()); + for (u32 i = 0; i < properties.memoryTypeCount; i++) { + if (!(filter & (1 << i))) { + continue; + } + if ((properties.memoryTypes[i].propertyFlags & wanted) == wanted) { + return i; + } + } + return {}; +} + +} // Anonymous namespace + +VKStreamBuffer::VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler, + vk::BufferUsageFlags usage) + : device{device}, scheduler{scheduler} { + CreateBuffers(usage); + ReserveWatches(current_watches, WATCHES_INITIAL_RESERVE); + ReserveWatches(previous_watches, WATCHES_INITIAL_RESERVE); } VKStreamBuffer::~VKStreamBuffer() = default; -std::tuple VKStreamBuffer::Reserve(u64 size) { - ASSERT(size <= buffer_size); +std::tuple VKStreamBuffer::Map(u64 size, u64 alignment) { + ASSERT(size <= STREAM_BUFFER_SIZE); mapped_size = size; - if (offset + size > buffer_size) { - // The buffer would overflow, save the amount of used buffers, signal an invalidation and - // reset the state. - invalidation_mark = used_watches; - used_watches = 0; + if (alignment > 0) { + offset = Common::AlignUp(offset, alignment); + } + + WaitPendingOperations(offset); + + bool invalidated = false; + if (offset + size > STREAM_BUFFER_SIZE) { + // The buffer would overflow, save the amount of used watches and reset the state. + invalidation_mark = current_watch_cursor; + current_watch_cursor = 0; offset = 0; - } - return {mapped_pointer + offset, offset, invalidation_mark.has_value()}; -} + // Swap watches and reset waiting cursors. + std::swap(previous_watches, current_watches); + wait_cursor = 0; + wait_bound = 0; -void VKStreamBuffer::Send(u64 size) { - ASSERT_MSG(size <= mapped_size, "Reserved size is too small"); - - if (invalidation_mark) { - // TODO(Rodrigo): Find a better way to invalidate than waiting for all watches to finish. + // Ensure that we don't wait for uncommitted fences. scheduler.Flush(); - std::for_each(watches.begin(), watches.begin() + *invalidation_mark, - [&](auto& resource) { resource->Wait(); }); - invalidation_mark = std::nullopt; + + invalidated = true; } - if (used_watches + 1 >= watches.size()) { - // Ensure that there are enough watches. - ReserveWatches(WATCHES_RESERVE_CHUNK); - } - // Add a watch for this allocation. - watches[used_watches++]->Watch(scheduler.GetFence()); - - offset += size; -} - -void VKStreamBuffer::CreateBuffers(VKMemoryManager& memory_manager, vk::BufferUsageFlags usage) { - const vk::BufferCreateInfo buffer_ci({}, buffer_size, usage, vk::SharingMode::eExclusive, 0, - nullptr); - const auto dev = device.GetLogical(); const auto& dld = device.GetDispatchLoader(); - buffer = dev.createBufferUnique(buffer_ci, nullptr, dld); - commit = memory_manager.Commit(*buffer, true); - mapped_pointer = commit->GetData(); + const auto pointer = reinterpret_cast(dev.mapMemory(*memory, offset, size, {}, dld)); + return {pointer, offset, invalidated}; } -void VKStreamBuffer::ReserveWatches(std::size_t grow_size) { - const std::size_t previous_size = watches.size(); - watches.resize(previous_size + grow_size); - std::generate(watches.begin() + previous_size, watches.end(), - []() { return std::make_unique(); }); +void VKStreamBuffer::Unmap(u64 size) { + ASSERT_MSG(size <= mapped_size, "Reserved size is too small"); + + const auto dev = device.GetLogical(); + dev.unmapMemory(*memory, device.GetDispatchLoader()); + + offset += size; + + if (current_watch_cursor + 1 >= current_watches.size()) { + // Ensure that there are enough watches. + ReserveWatches(current_watches, WATCHES_RESERVE_CHUNK); + } + auto& watch = current_watches[current_watch_cursor++]; + watch.upper_bound = offset; + watch.fence.Watch(scheduler.GetFence()); +} + +void VKStreamBuffer::CreateBuffers(vk::BufferUsageFlags usage) { + const vk::BufferCreateInfo buffer_ci({}, STREAM_BUFFER_SIZE, usage, vk::SharingMode::eExclusive, + 0, nullptr); + const auto dev = device.GetLogical(); + const auto& dld = device.GetDispatchLoader(); + buffer = dev.createBufferUnique(buffer_ci, nullptr, dld); + + const auto requirements = dev.getBufferMemoryRequirements(*buffer, dld); + // Prefer device local host visible allocations (this should hit AMD's pinned memory). + auto type = FindMemoryType(device, requirements.memoryTypeBits, + vk::MemoryPropertyFlagBits::eHostVisible | + vk::MemoryPropertyFlagBits::eHostCoherent | + vk::MemoryPropertyFlagBits::eDeviceLocal); + if (!type) { + // Otherwise search for a host visible allocation. + type = FindMemoryType(device, requirements.memoryTypeBits, + vk::MemoryPropertyFlagBits::eHostVisible | + vk::MemoryPropertyFlagBits::eHostCoherent); + ASSERT_MSG(type, "No host visible and coherent memory type found"); + } + const vk::MemoryAllocateInfo alloc_ci(requirements.size, *type); + memory = dev.allocateMemoryUnique(alloc_ci, nullptr, dld); + + dev.bindBufferMemory(*buffer, *memory, 0, dld); +} + +void VKStreamBuffer::ReserveWatches(std::vector& watches, std::size_t grow_size) { + watches.resize(watches.size() + grow_size); +} + +void VKStreamBuffer::WaitPendingOperations(u64 requested_upper_bound) { + if (!invalidation_mark) { + return; + } + while (requested_upper_bound < wait_bound && wait_cursor < *invalidation_mark) { + auto& watch = previous_watches[wait_cursor]; + wait_bound = watch.upper_bound; + watch.fence.Wait(); + ++wait_cursor; + } } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_stream_buffer.h b/src/video_core/renderer_vulkan/vk_stream_buffer.h index 842e54162..187c0c612 100644 --- a/src/video_core/renderer_vulkan/vk_stream_buffer.h +++ b/src/video_core/renderer_vulkan/vk_stream_buffer.h @@ -4,28 +4,24 @@ #pragma once -#include #include #include #include #include "common/common_types.h" #include "video_core/renderer_vulkan/declarations.h" -#include "video_core/renderer_vulkan/vk_memory_manager.h" namespace Vulkan { class VKDevice; class VKFence; class VKFenceWatch; -class VKResourceManager; class VKScheduler; -class VKStreamBuffer { +class VKStreamBuffer final { public: - explicit VKStreamBuffer(const VKDevice& device, VKMemoryManager& memory_manager, - VKScheduler& scheduler, u64 size, vk::BufferUsageFlags usage, - vk::AccessFlags access, vk::PipelineStageFlags pipeline_stage); + explicit VKStreamBuffer(const VKDevice& device, VKScheduler& scheduler, + vk::BufferUsageFlags usage); ~VKStreamBuffer(); /** @@ -34,39 +30,47 @@ public: * @returns A tuple in the following order: Raw memory pointer (with offset added), buffer * offset and a boolean that's true when buffer has been invalidated. */ - std::tuple Reserve(u64 size); + std::tuple Map(u64 size, u64 alignment); /// Ensures that "size" bytes of memory are available to the GPU, potentially recording a copy. - void Send(u64 size); + void Unmap(u64 size); - vk::Buffer GetBuffer() const { + vk::Buffer GetHandle() const { return *buffer; } private: + struct Watch final { + VKFenceWatch fence; + u64 upper_bound{}; + }; + /// Creates Vulkan buffer handles committing the required the required memory. - void CreateBuffers(VKMemoryManager& memory_manager, vk::BufferUsageFlags usage); + void CreateBuffers(vk::BufferUsageFlags usage); /// Increases the amount of watches available. - void ReserveWatches(std::size_t grow_size); + void ReserveWatches(std::vector& watches, std::size_t grow_size); + + void WaitPendingOperations(u64 requested_upper_bound); const VKDevice& device; ///< Vulkan device manager. VKScheduler& scheduler; ///< Command scheduler. - const u64 buffer_size; ///< Total size of the stream buffer. const vk::AccessFlags access; ///< Access usage of this stream buffer. const vk::PipelineStageFlags pipeline_stage; ///< Pipeline usage of this stream buffer. - UniqueBuffer buffer; ///< Mapped buffer. - VKMemoryCommit commit; ///< Memory commit. - u8* mapped_pointer{}; ///< Pointer to the host visible commit + UniqueBuffer buffer; ///< Mapped buffer. + UniqueDeviceMemory memory; ///< Memory allocation. u64 offset{}; ///< Buffer iterator. u64 mapped_size{}; ///< Size reserved for the current copy. - std::vector> watches; ///< Total watches - std::size_t used_watches{}; ///< Count of watches, reset on invalidation. - std::optional - invalidation_mark{}; ///< Number of watches used in the current invalidation. + std::vector current_watches; ///< Watches recorded in the current iteration. + std::size_t current_watch_cursor{}; ///< Count of watches, reset on invalidation. + std::optional invalidation_mark; ///< Number of watches used in the previous cycle. + + std::vector previous_watches; ///< Watches used in the previous iteration. + std::size_t wait_cursor{}; ///< Last watch being waited for completion. + u64 wait_bound{}; ///< Highest offset being watched for completion. }; } // namespace Vulkan