Merge pull request #10473 from GPUCode/vma

Use vulkan memory allocator
This commit is contained in:
liamwhite 2023-06-27 11:21:36 -04:00 committed by GitHub
commit c6959449d1
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
27 changed files with 415 additions and 366 deletions

3
.gitmodules vendored
View file

@ -55,3 +55,6 @@
[submodule "tzdb_to_nx"]
path = externals/nx_tzdb/tzdb_to_nx
url = https://github.com/lat9nq/tzdb_to_nx.git
[submodule "VulkanMemoryAllocator"]
path = externals/vma/VulkanMemoryAllocator
url = https://github.com/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator

View file

@ -143,6 +143,11 @@ endif()
# TZDB (Time Zone Database)
add_subdirectory(nx_tzdb)
# VMA
add_library(vma vma/vma.cpp)
target_include_directories(vma PUBLIC ./vma/VulkanMemoryAllocator/include)
target_link_libraries(vma PRIVATE Vulkan::Headers)
if (NOT TARGET LLVM::Demangle)
add_library(demangle demangle/ItaniumDemangle.cpp)
target_include_directories(demangle PUBLIC ./demangle)

@ -0,0 +1 @@
Subproject commit 0aa3989b8f382f185fdf646cc83a1d16fa31d6ab

8
externals/vma/vma.cpp vendored Normal file
View file

@ -0,0 +1,8 @@
// SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#define VMA_IMPLEMENTATION
#define VMA_STATIC_VULKAN_FUNCTIONS 0
#define VMA_DYNAMIC_VULKAN_FUNCTIONS 1
#include <vk_mem_alloc.h>

View file

@ -291,7 +291,7 @@ target_link_options(video_core PRIVATE ${FFmpeg_LDFLAGS})
add_dependencies(video_core host_shaders)
target_include_directories(video_core PRIVATE ${HOST_SHADERS_INCLUDE})
target_link_libraries(video_core PRIVATE sirit Vulkan::Headers)
target_link_libraries(video_core PRIVATE sirit Vulkan::Headers vma)
if (ENABLE_NSIGHT_AFTERMATH)
if (NOT DEFINED ENV{NSIGHT_AFTERMATH_SDK})

View file

@ -89,8 +89,8 @@ RendererVulkan::RendererVulkan(Core::TelemetrySession& telemetry_session_,
Settings::values.renderer_debug.GetValue())),
debug_callback(Settings::values.renderer_debug ? CreateDebugCallback(instance) : nullptr),
surface(CreateSurface(instance, render_window.GetWindowInfo())),
device(CreateDevice(instance, dld, *surface)), memory_allocator(device, false),
state_tracker(), scheduler(device, state_tracker),
device(CreateDevice(instance, dld, *surface)), memory_allocator(device), state_tracker(),
scheduler(device, state_tracker),
swapchain(*surface, device, scheduler, render_window.GetFramebufferLayout().width,
render_window.GetFramebufferLayout().height, false),
present_manager(instance, render_window, device, memory_allocator, scheduler, swapchain,
@ -173,7 +173,7 @@ void Vulkan::RendererVulkan::RenderScreenshot(const Tegra::FramebufferConfig& fr
return;
}
const Layout::FramebufferLayout layout{renderer_settings.screenshot_framebuffer_layout};
vk::Image staging_image = device.GetLogical().CreateImage(VkImageCreateInfo{
vk::Image staging_image = memory_allocator.CreateImage(VkImageCreateInfo{
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
.pNext = nullptr,
.flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT,
@ -196,7 +196,6 @@ void Vulkan::RendererVulkan::RenderScreenshot(const Tegra::FramebufferConfig& fr
.pQueueFamilyIndices = nullptr,
.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
});
const auto image_commit = memory_allocator.Commit(staging_image, MemoryUsage::DeviceLocal);
const vk::ImageView dst_view = device.GetLogical().CreateImageView(VkImageViewCreateInfo{
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
@ -234,8 +233,8 @@ void Vulkan::RendererVulkan::RenderScreenshot(const Tegra::FramebufferConfig& fr
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
};
const vk::Buffer dst_buffer = device.GetLogical().CreateBuffer(dst_buffer_info);
MemoryCommit dst_buffer_memory = memory_allocator.Commit(dst_buffer, MemoryUsage::Download);
const vk::Buffer dst_buffer =
memory_allocator.CreateBuffer(dst_buffer_info, MemoryUsage::Download);
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([&](vk::CommandBuffer cmdbuf) {
@ -309,8 +308,9 @@ void Vulkan::RendererVulkan::RenderScreenshot(const Tegra::FramebufferConfig& fr
scheduler.Finish();
// Copy backing image data to the QImage screenshot buffer
const auto dst_memory_map = dst_buffer_memory.Map();
std::memcpy(renderer_settings.screenshot_bits, dst_memory_map.data(), dst_memory_map.size());
dst_buffer.Invalidate();
std::memcpy(renderer_settings.screenshot_bits, dst_buffer.Mapped().data(),
dst_buffer.Mapped().size());
renderer_settings.screenshot_complete_callback(false);
renderer_settings.screenshot_requested = false;
}

View file

@ -162,7 +162,7 @@ void BlitScreen::Draw(const Tegra::FramebufferConfig& framebuffer,
SetUniformData(data, layout);
SetVertexData(data, framebuffer, layout);
const std::span<u8> mapped_span = buffer_commit.Map();
const std::span<u8> mapped_span = buffer.Mapped();
std::memcpy(mapped_span.data(), &data, sizeof(data));
if (!use_accelerated) {
@ -1071,14 +1071,9 @@ void BlitScreen::ReleaseRawImages() {
scheduler.Wait(tick);
}
raw_images.clear();
raw_buffer_commits.clear();
aa_image_view.reset();
aa_image.reset();
aa_commit = MemoryCommit{};
buffer.reset();
buffer_commit = MemoryCommit{};
}
void BlitScreen::CreateStagingBuffer(const Tegra::FramebufferConfig& framebuffer) {
@ -1094,20 +1089,18 @@ void BlitScreen::CreateStagingBuffer(const Tegra::FramebufferConfig& framebuffer
.pQueueFamilyIndices = nullptr,
};
buffer = device.GetLogical().CreateBuffer(ci);
buffer_commit = memory_allocator.Commit(buffer, MemoryUsage::Upload);
buffer = memory_allocator.CreateBuffer(ci, MemoryUsage::Upload);
}
void BlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) {
raw_images.resize(image_count);
raw_image_views.resize(image_count);
raw_buffer_commits.resize(image_count);
const auto create_image = [&](bool used_on_framebuffer = false, u32 up_scale = 1,
u32 down_shift = 0) {
u32 extra_usages = used_on_framebuffer ? VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT
: VK_IMAGE_USAGE_TRANSFER_DST_BIT;
return device.GetLogical().CreateImage(VkImageCreateInfo{
return memory_allocator.CreateImage(VkImageCreateInfo{
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
@ -1130,9 +1123,6 @@ void BlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) {
.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
});
};
const auto create_commit = [&](vk::Image& image) {
return memory_allocator.Commit(image, MemoryUsage::DeviceLocal);
};
const auto create_image_view = [&](vk::Image& image, bool used_on_framebuffer = false) {
return device.GetLogical().CreateImageView(VkImageViewCreateInfo{
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
@ -1161,7 +1151,6 @@ void BlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) {
for (size_t i = 0; i < image_count; ++i) {
raw_images[i] = create_image();
raw_buffer_commits[i] = create_commit(raw_images[i]);
raw_image_views[i] = create_image_view(raw_images[i]);
}
@ -1169,7 +1158,6 @@ void BlitScreen::CreateRawImages(const Tegra::FramebufferConfig& framebuffer) {
const u32 up_scale = Settings::values.resolution_info.up_scale;
const u32 down_shift = Settings::values.resolution_info.down_shift;
aa_image = create_image(true, up_scale, down_shift);
aa_commit = create_commit(aa_image);
aa_image_view = create_image_view(aa_image, true);
VkExtent2D size{
.width = (up_scale * framebuffer.width) >> down_shift,

View file

@ -142,13 +142,11 @@ private:
vk::Sampler sampler;
vk::Buffer buffer;
MemoryCommit buffer_commit;
std::vector<u64> resource_ticks;
std::vector<vk::Image> raw_images;
std::vector<vk::ImageView> raw_image_views;
std::vector<MemoryCommit> raw_buffer_commits;
vk::DescriptorPool aa_descriptor_pool;
vk::DescriptorSetLayout aa_descriptor_set_layout;
@ -159,7 +157,6 @@ private:
vk::DescriptorSets aa_descriptor_sets;
vk::Image aa_image;
vk::ImageView aa_image_view;
MemoryCommit aa_commit;
u32 raw_width = 0;
u32 raw_height = 0;

View file

@ -50,7 +50,7 @@ size_t BytesPerIndex(VkIndexType index_type) {
}
}
vk::Buffer CreateBuffer(const Device& device, u64 size) {
vk::Buffer CreateBuffer(const Device& device, const MemoryAllocator& memory_allocator, u64 size) {
VkBufferUsageFlags flags =
VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT |
@ -60,7 +60,7 @@ vk::Buffer CreateBuffer(const Device& device, u64 size) {
if (device.IsExtTransformFeedbackSupported()) {
flags |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT;
}
return device.GetLogical().CreateBuffer({
const VkBufferCreateInfo buffer_ci = {
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
@ -69,7 +69,8 @@ vk::Buffer CreateBuffer(const Device& device, u64 size) {
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
});
};
return memory_allocator.CreateBuffer(buffer_ci, MemoryUsage::DeviceLocal);
}
} // Anonymous namespace
@ -79,8 +80,8 @@ Buffer::Buffer(BufferCacheRuntime&, VideoCommon::NullBufferParams null_params)
Buffer::Buffer(BufferCacheRuntime& runtime, VideoCore::RasterizerInterface& rasterizer_,
VAddr cpu_addr_, u64 size_bytes_)
: VideoCommon::BufferBase<VideoCore::RasterizerInterface>(rasterizer_, cpu_addr_, size_bytes_),
device{&runtime.device}, buffer{CreateBuffer(*device, SizeBytes())},
commit{runtime.memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal)} {
device{&runtime.device}, buffer{
CreateBuffer(*device, runtime.memory_allocator, SizeBytes())} {
if (runtime.device.HasDebuggingToolAttached()) {
buffer.SetObjectNameEXT(fmt::format("Buffer 0x{:x}", CpuAddr()).c_str());
}
@ -138,7 +139,7 @@ public:
const u32 num_first_offset_copies = 4;
const size_t bytes_per_index = BytesPerIndex(index_type);
const size_t size_bytes = num_triangle_indices * bytes_per_index * num_first_offset_copies;
buffer = device.GetLogical().CreateBuffer(VkBufferCreateInfo{
const VkBufferCreateInfo buffer_ci = {
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
@ -147,14 +148,21 @@ public:
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
});
};
buffer = memory_allocator.CreateBuffer(buffer_ci, MemoryUsage::DeviceLocal);
if (device.HasDebuggingToolAttached()) {
buffer.SetObjectNameEXT("Quad LUT");
}
memory_commit = memory_allocator.Commit(buffer, MemoryUsage::DeviceLocal);
const StagingBufferRef staging = staging_pool.Request(size_bytes, MemoryUsage::Upload);
u8* staging_data = staging.mapped_span.data();
const bool host_visible = buffer.IsHostVisible();
const StagingBufferRef staging = [&] {
if (host_visible) {
return StagingBufferRef{};
}
return staging_pool.Request(size_bytes, MemoryUsage::Upload);
}();
u8* staging_data = host_visible ? buffer.Mapped().data() : staging.mapped_span.data();
const size_t quad_size = bytes_per_index * 6;
for (u32 first = 0; first < num_first_offset_copies; ++first) {
@ -164,29 +172,33 @@ public:
}
}
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([src_buffer = staging.buffer, src_offset = staging.offset,
dst_buffer = *buffer, size_bytes](vk::CommandBuffer cmdbuf) {
const VkBufferCopy copy{
.srcOffset = src_offset,
.dstOffset = 0,
.size = size_bytes,
};
const VkBufferMemoryBarrier write_barrier{
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_INDEX_READ_BIT,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.buffer = dst_buffer,
.offset = 0,
.size = size_bytes,
};
cmdbuf.CopyBuffer(src_buffer, dst_buffer, copy);
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, write_barrier);
});
if (!host_visible) {
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([src_buffer = staging.buffer, src_offset = staging.offset,
dst_buffer = *buffer, size_bytes](vk::CommandBuffer cmdbuf) {
const VkBufferCopy copy{
.srcOffset = src_offset,
.dstOffset = 0,
.size = size_bytes,
};
const VkBufferMemoryBarrier write_barrier{
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
.pNext = nullptr,
.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
.dstAccessMask = VK_ACCESS_INDEX_READ_BIT,
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
.buffer = dst_buffer,
.offset = 0,
.size = size_bytes,
};
cmdbuf.CopyBuffer(src_buffer, dst_buffer, copy);
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_TRANSFER_BIT,
VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, write_barrier);
});
} else {
buffer.Flush();
}
}
void BindBuffer(u32 first) {
@ -587,11 +599,10 @@ void BufferCacheRuntime::ReserveNullBuffer() {
create_info.usage |= VK_BUFFER_USAGE_TRANSFORM_FEEDBACK_BUFFER_BIT_EXT;
}
create_info.usage |= VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT;
null_buffer = device.GetLogical().CreateBuffer(create_info);
null_buffer = memory_allocator.CreateBuffer(create_info, MemoryUsage::DeviceLocal);
if (device.HasDebuggingToolAttached()) {
null_buffer.SetObjectNameEXT("Null buffer");
}
null_buffer_commit = memory_allocator.Commit(null_buffer, MemoryUsage::DeviceLocal);
scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([buffer = *null_buffer](vk::CommandBuffer cmdbuf) {

View file

@ -48,7 +48,6 @@ private:
const Device* device{};
vk::Buffer buffer;
MemoryCommit commit;
std::vector<BufferView> views;
};
@ -142,7 +141,6 @@ private:
std::shared_ptr<QuadStripIndexBuffer> quad_strip_index_buffer;
vk::Buffer null_buffer;
MemoryCommit null_buffer_commit;
std::unique_ptr<Uint8Pass> uint8_pass;
QuadIndexedPass quad_index_pass;

View file

@ -205,10 +205,9 @@ void FSR::CreateDescriptorSets() {
void FSR::CreateImages() {
images.resize(image_count * 2);
image_views.resize(image_count * 2);
buffer_commits.resize(image_count * 2);
for (size_t i = 0; i < image_count * 2; ++i) {
images[i] = device.GetLogical().CreateImage(VkImageCreateInfo{
images[i] = memory_allocator.CreateImage(VkImageCreateInfo{
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
@ -231,7 +230,6 @@ void FSR::CreateImages() {
.pQueueFamilyIndices = nullptr,
.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
});
buffer_commits[i] = memory_allocator.Commit(images[i], MemoryUsage::DeviceLocal);
image_views[i] = device.GetLogical().CreateImageView(VkImageViewCreateInfo{
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
.pNext = nullptr,

View file

@ -47,7 +47,6 @@ private:
vk::Sampler sampler;
std::vector<vk::Image> images;
std::vector<vk::ImageView> image_views;
std::vector<MemoryCommit> buffer_commits;
};
} // namespace Vulkan

View file

@ -181,7 +181,7 @@ void PresentManager::RecreateFrame(Frame* frame, u32 width, u32 height, bool is_
frame->height = height;
frame->is_srgb = is_srgb;
frame->image = dld.CreateImage({
frame->image = memory_allocator.CreateImage({
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
.pNext = nullptr,
.flags = VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT,
@ -204,8 +204,6 @@ void PresentManager::RecreateFrame(Frame* frame, u32 width, u32 height, bool is_
.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
});
frame->image_commit = memory_allocator.Commit(frame->image, MemoryUsage::DeviceLocal);
frame->image_view = dld.CreateImageView({
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
.pNext = nullptr,

View file

@ -29,7 +29,6 @@ struct Frame {
vk::Image image;
vk::ImageView image_view;
vk::Framebuffer framebuffer;
MemoryCommit image_commit;
vk::CommandBuffer cmdbuf;
vk::Semaphore render_ready;
vk::Fence present_done;

View file

@ -25,9 +25,7 @@ namespace {
#define ARRAY_TO_SPAN(a) std::span(a, (sizeof(a) / sizeof(a[0])))
std::pair<vk::Image, MemoryCommit> CreateWrappedImage(const Device& device,
MemoryAllocator& allocator,
VkExtent2D dimensions, VkFormat format) {
vk::Image CreateWrappedImage(MemoryAllocator& allocator, VkExtent2D dimensions, VkFormat format) {
const VkImageCreateInfo image_ci{
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
.pNext = nullptr,
@ -46,11 +44,7 @@ std::pair<vk::Image, MemoryCommit> CreateWrappedImage(const Device& device,
.pQueueFamilyIndices = nullptr,
.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
};
auto image = device.GetLogical().CreateImage(image_ci);
auto commit = allocator.Commit(image, Vulkan::MemoryUsage::DeviceLocal);
return std::make_pair(std::move(image), std::move(commit));
return allocator.CreateImage(image_ci);
}
void TransitionImageLayout(vk::CommandBuffer& cmdbuf, VkImage image, VkImageLayout target_layout,
@ -82,7 +76,7 @@ void TransitionImageLayout(vk::CommandBuffer& cmdbuf, VkImage image, VkImageLayo
void UploadImage(const Device& device, MemoryAllocator& allocator, Scheduler& scheduler,
vk::Image& image, VkExtent2D dimensions, VkFormat format,
std::span<const u8> initial_contents = {}) {
auto upload_buffer = device.GetLogical().CreateBuffer(VkBufferCreateInfo{
const VkBufferCreateInfo upload_ci = {
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
@ -91,9 +85,10 @@ void UploadImage(const Device& device, MemoryAllocator& allocator, Scheduler& sc
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
});
auto upload_commit = allocator.Commit(upload_buffer, MemoryUsage::Upload);
std::ranges::copy(initial_contents, upload_commit.Map().begin());
};
auto upload_buffer = allocator.CreateBuffer(upload_ci, MemoryUsage::Upload);
std::ranges::copy(initial_contents, upload_buffer.Mapped().begin());
upload_buffer.Flush();
const std::array<VkBufferImageCopy, 1> regions{{{
.bufferOffset = 0,
@ -117,9 +112,6 @@ void UploadImage(const Device& device, MemoryAllocator& allocator, Scheduler& sc
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
});
scheduler.Finish();
// This should go out of scope before the commit
auto upload_buffer2 = std::move(upload_buffer);
}
vk::ImageView CreateWrappedImageView(const Device& device, vk::Image& image, VkFormat format) {
@ -531,10 +523,8 @@ void SMAA::CreateImages() {
static constexpr VkExtent2D area_extent{AREATEX_WIDTH, AREATEX_HEIGHT};
static constexpr VkExtent2D search_extent{SEARCHTEX_WIDTH, SEARCHTEX_HEIGHT};
std::tie(m_static_images[Area], m_static_buffer_commits[Area]) =
CreateWrappedImage(m_device, m_allocator, area_extent, VK_FORMAT_R8G8_UNORM);
std::tie(m_static_images[Search], m_static_buffer_commits[Search]) =
CreateWrappedImage(m_device, m_allocator, search_extent, VK_FORMAT_R8_UNORM);
m_static_images[Area] = CreateWrappedImage(m_allocator, area_extent, VK_FORMAT_R8G8_UNORM);
m_static_images[Search] = CreateWrappedImage(m_allocator, search_extent, VK_FORMAT_R8_UNORM);
m_static_image_views[Area] =
CreateWrappedImageView(m_device, m_static_images[Area], VK_FORMAT_R8G8_UNORM);
@ -544,12 +534,11 @@ void SMAA::CreateImages() {
for (u32 i = 0; i < m_image_count; i++) {
Images& images = m_dynamic_images.emplace_back();
std::tie(images.images[Blend], images.buffer_commits[Blend]) =
CreateWrappedImage(m_device, m_allocator, m_extent, VK_FORMAT_R16G16B16A16_SFLOAT);
std::tie(images.images[Edges], images.buffer_commits[Edges]) =
CreateWrappedImage(m_device, m_allocator, m_extent, VK_FORMAT_R16G16_SFLOAT);
std::tie(images.images[Output], images.buffer_commits[Output]) =
CreateWrappedImage(m_device, m_allocator, m_extent, VK_FORMAT_R16G16B16A16_SFLOAT);
images.images[Blend] =
CreateWrappedImage(m_allocator, m_extent, VK_FORMAT_R16G16B16A16_SFLOAT);
images.images[Edges] = CreateWrappedImage(m_allocator, m_extent, VK_FORMAT_R16G16_SFLOAT);
images.images[Output] =
CreateWrappedImage(m_allocator, m_extent, VK_FORMAT_R16G16B16A16_SFLOAT);
images.image_views[Blend] =
CreateWrappedImageView(m_device, images.images[Blend], VK_FORMAT_R16G16B16A16_SFLOAT);

View file

@ -66,13 +66,11 @@ private:
std::array<vk::Pipeline, MaxSMAAStage> m_pipelines{};
std::array<vk::RenderPass, MaxSMAAStage> m_renderpasses{};
std::array<MemoryCommit, MaxStaticImage> m_static_buffer_commits;
std::array<vk::Image, MaxStaticImage> m_static_images{};
std::array<vk::ImageView, MaxStaticImage> m_static_image_views{};
struct Images {
vk::DescriptorSets descriptor_sets{};
std::array<MemoryCommit, MaxDynamicImage> buffer_commits;
std::array<vk::Image, MaxDynamicImage> images{};
std::array<vk::ImageView, MaxDynamicImage> image_views{};
std::array<vk::Framebuffer, MaxSMAAStage> framebuffers{};

View file

@ -30,55 +30,6 @@ constexpr VkDeviceSize MAX_STREAM_BUFFER_REQUEST_SIZE = 8_MiB;
constexpr VkDeviceSize STREAM_BUFFER_SIZE = 128_MiB;
constexpr VkDeviceSize REGION_SIZE = STREAM_BUFFER_SIZE / StagingBufferPool::NUM_SYNCS;
constexpr VkMemoryPropertyFlags HOST_FLAGS =
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
constexpr VkMemoryPropertyFlags STREAM_FLAGS = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | HOST_FLAGS;
bool IsStreamHeap(VkMemoryHeap heap) noexcept {
return STREAM_BUFFER_SIZE < (heap.size * 2) / 3;
}
std::optional<u32> FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& props, u32 type_mask,
VkMemoryPropertyFlags flags) noexcept {
for (u32 type_index = 0; type_index < props.memoryTypeCount; ++type_index) {
if (((type_mask >> type_index) & 1) == 0) {
// Memory type is incompatible
continue;
}
const VkMemoryType& memory_type = props.memoryTypes[type_index];
if ((memory_type.propertyFlags & flags) != flags) {
// Memory type doesn't have the flags we want
continue;
}
if (!IsStreamHeap(props.memoryHeaps[memory_type.heapIndex])) {
// Memory heap is not suitable for streaming
continue;
}
// Success!
return type_index;
}
return std::nullopt;
}
u32 FindMemoryTypeIndex(const VkPhysicalDeviceMemoryProperties& props, u32 type_mask,
bool try_device_local) {
std::optional<u32> type;
if (try_device_local) {
// Try to find a DEVICE_LOCAL_BIT type, Nvidia and AMD have a dedicated heap for this
type = FindMemoryTypeIndex(props, type_mask, STREAM_FLAGS);
if (type) {
return *type;
}
}
// Otherwise try without the DEVICE_LOCAL_BIT
type = FindMemoryTypeIndex(props, type_mask, HOST_FLAGS);
if (type) {
return *type;
}
// This should never happen, and in case it does, signal it as an out of memory situation
throw vk::Exception(VK_ERROR_OUT_OF_DEVICE_MEMORY);
}
size_t Region(size_t iterator) noexcept {
return iterator / REGION_SIZE;
}
@ -87,8 +38,7 @@ size_t Region(size_t iterator) noexcept {
StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& memory_allocator_,
Scheduler& scheduler_)
: device{device_}, memory_allocator{memory_allocator_}, scheduler{scheduler_} {
const vk::Device& dev = device.GetLogical();
stream_buffer = dev.CreateBuffer(VkBufferCreateInfo{
const VkBufferCreateInfo stream_ci = {
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
@ -99,46 +49,13 @@ StagingBufferPool::StagingBufferPool(const Device& device_, MemoryAllocator& mem
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
});
};
stream_buffer = memory_allocator.CreateBuffer(stream_ci, MemoryUsage::Stream);
if (device.HasDebuggingToolAttached()) {
stream_buffer.SetObjectNameEXT("Stream Buffer");
}
VkMemoryDedicatedRequirements dedicated_reqs{
.sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
.pNext = nullptr,
.prefersDedicatedAllocation = VK_FALSE,
.requiresDedicatedAllocation = VK_FALSE,
};
const auto requirements = dev.GetBufferMemoryRequirements(*stream_buffer, &dedicated_reqs);
const bool make_dedicated = dedicated_reqs.prefersDedicatedAllocation == VK_TRUE ||
dedicated_reqs.requiresDedicatedAllocation == VK_TRUE;
const VkMemoryDedicatedAllocateInfo dedicated_info{
.sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
.pNext = nullptr,
.image = nullptr,
.buffer = *stream_buffer,
};
const auto memory_properties = device.GetPhysical().GetMemoryProperties().memoryProperties;
VkMemoryAllocateInfo stream_memory_info{
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
.pNext = make_dedicated ? &dedicated_info : nullptr,
.allocationSize = requirements.size,
.memoryTypeIndex =
FindMemoryTypeIndex(memory_properties, requirements.memoryTypeBits, true),
};
stream_memory = dev.TryAllocateMemory(stream_memory_info);
if (!stream_memory) {
LOG_INFO(Render_Vulkan, "Dynamic memory allocation failed, trying with system memory");
stream_memory_info.memoryTypeIndex =
FindMemoryTypeIndex(memory_properties, requirements.memoryTypeBits, false);
stream_memory = dev.AllocateMemory(stream_memory_info);
}
if (device.HasDebuggingToolAttached()) {
stream_memory.SetObjectNameEXT("Stream Buffer Memory");
}
stream_buffer.BindMemory(*stream_memory, 0);
stream_pointer = stream_memory.Map(0, STREAM_BUFFER_SIZE);
stream_pointer = stream_buffer.Mapped();
ASSERT_MSG(!stream_pointer.empty(), "Stream buffer must be host visible!");
}
StagingBufferPool::~StagingBufferPool() = default;
@ -199,7 +116,7 @@ StagingBufferRef StagingBufferPool::GetStreamBuffer(size_t size) {
return StagingBufferRef{
.buffer = *stream_buffer,
.offset = static_cast<VkDeviceSize>(offset),
.mapped_span = std::span<u8>(stream_pointer + offset, size),
.mapped_span = stream_pointer.subspan(offset, size),
.usage{},
.log2_level{},
.index{},
@ -247,7 +164,7 @@ std::optional<StagingBufferRef> StagingBufferPool::TryGetReservedBuffer(size_t s
StagingBufferRef StagingBufferPool::CreateStagingBuffer(size_t size, MemoryUsage usage,
bool deferred) {
const u32 log2 = Common::Log2Ceil64(size);
vk::Buffer buffer = device.GetLogical().CreateBuffer({
const VkBufferCreateInfo buffer_ci = {
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
@ -259,17 +176,15 @@ StagingBufferRef StagingBufferPool::CreateStagingBuffer(size_t size, MemoryUsage
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
});
};
vk::Buffer buffer = memory_allocator.CreateBuffer(buffer_ci, usage);
if (device.HasDebuggingToolAttached()) {
++buffer_index;
buffer.SetObjectNameEXT(fmt::format("Staging Buffer {}", buffer_index).c_str());
}
MemoryCommit commit = memory_allocator.Commit(buffer, usage);
const std::span<u8> mapped_span = IsHostVisible(usage) ? commit.Map() : std::span<u8>{};
const std::span<u8> mapped_span = buffer.Mapped();
StagingBuffer& entry = GetCache(usage)[log2].entries.emplace_back(StagingBuffer{
.buffer = std::move(buffer),
.commit = std::move(commit),
.mapped_span = mapped_span,
.usage = usage,
.log2_level = log2,

View file

@ -46,7 +46,6 @@ private:
struct StagingBuffer {
vk::Buffer buffer;
MemoryCommit commit;
std::span<u8> mapped_span;
MemoryUsage usage;
u32 log2_level;
@ -97,8 +96,7 @@ private:
Scheduler& scheduler;
vk::Buffer stream_buffer;
vk::DeviceMemory stream_memory;
u8* stream_pointer = nullptr;
std::span<u8> stream_pointer;
size_t iterator = 0;
size_t used_iterator = 0;

View file

@ -15,7 +15,6 @@
#include "video_core/renderer_vulkan/blit_image.h"
#include "video_core/renderer_vulkan/maxwell_to_vk.h"
#include "video_core/renderer_vulkan/vk_compute_pass.h"
#include "video_core/renderer_vulkan/vk_rasterizer.h"
#include "video_core/renderer_vulkan/vk_render_pass_cache.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"
#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h"
@ -163,11 +162,12 @@ constexpr VkBorderColor ConvertBorderColor(const std::array<float, 4>& color) {
};
}
[[nodiscard]] vk::Image MakeImage(const Device& device, const ImageInfo& info) {
[[nodiscard]] vk::Image MakeImage(const Device& device, const MemoryAllocator& allocator,
const ImageInfo& info) {
if (info.type == ImageType::Buffer) {
return vk::Image{};
}
return device.GetLogical().CreateImage(MakeImageCreateInfo(device, info));
return allocator.CreateImage(MakeImageCreateInfo(device, info));
}
[[nodiscard]] VkImageAspectFlags ImageAspectMask(PixelFormat format) {
@ -839,14 +839,14 @@ bool TextureCacheRuntime::ShouldReinterpret(Image& dst, Image& src) {
VkBuffer TextureCacheRuntime::GetTemporaryBuffer(size_t needed_size) {
const auto level = (8 * sizeof(size_t)) - std::countl_zero(needed_size - 1ULL);
if (buffer_commits[level]) {
if (buffers[level]) {
return *buffers[level];
}
const auto new_size = Common::NextPow2(needed_size);
static constexpr VkBufferUsageFlags flags =
VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT |
VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT;
buffers[level] = device.GetLogical().CreateBuffer({
const VkBufferCreateInfo temp_ci = {
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
@ -855,9 +855,8 @@ VkBuffer TextureCacheRuntime::GetTemporaryBuffer(size_t needed_size) {
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
});
buffer_commits[level] = std::make_unique<MemoryCommit>(
memory_allocator.Commit(buffers[level], MemoryUsage::DeviceLocal));
};
buffers[level] = memory_allocator.CreateBuffer(temp_ci, MemoryUsage::DeviceLocal);
return *buffers[level];
}
@ -1266,8 +1265,8 @@ void TextureCacheRuntime::TickFrame() {}
Image::Image(TextureCacheRuntime& runtime_, const ImageInfo& info_, GPUVAddr gpu_addr_,
VAddr cpu_addr_)
: VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), scheduler{&runtime_.scheduler},
runtime{&runtime_}, original_image(MakeImage(runtime_.device, info)),
commit(runtime_.memory_allocator.Commit(original_image, MemoryUsage::DeviceLocal)),
runtime{&runtime_},
original_image(MakeImage(runtime_.device, runtime_.memory_allocator, info)),
aspect_mask(ImageAspectMask(info.format)) {
if (IsPixelFormatASTC(info.format) && !runtime->device.IsOptimalAstcSupported()) {
if (Settings::values.async_astc.GetValue()) {
@ -1468,9 +1467,7 @@ bool Image::ScaleUp(bool ignore) {
auto scaled_info = info;
scaled_info.size.width = scaled_width;
scaled_info.size.height = scaled_height;
scaled_image = MakeImage(runtime->device, scaled_info);
auto& allocator = runtime->memory_allocator;
scaled_commit = MemoryCommit(allocator.Commit(scaled_image, MemoryUsage::DeviceLocal));
scaled_image = MakeImage(runtime->device, runtime->memory_allocator, scaled_info);
ignore = false;
}
current_image = *scaled_image;

View file

@ -116,7 +116,6 @@ public:
static constexpr size_t indexing_slots = 8 * sizeof(size_t);
std::array<vk::Buffer, indexing_slots> buffers{};
std::array<std::unique_ptr<MemoryCommit>, indexing_slots> buffer_commits{};
};
class Image : public VideoCommon::ImageBase {
@ -180,12 +179,10 @@ private:
TextureCacheRuntime* runtime{};
vk::Image original_image;
MemoryCommit commit;
std::vector<vk::ImageView> storage_image_views;
VkImageAspectFlags aspect_mask = 0;
bool initialized = false;
vk::Image scaled_image{};
MemoryCommit scaled_commit{};
VkImage current_image{};
std::unique_ptr<Framebuffer> scale_framebuffer;

View file

@ -18,7 +18,7 @@ using namespace Common::Literals;
TurboMode::TurboMode(const vk::Instance& instance, const vk::InstanceDispatch& dld)
#ifndef ANDROID
: m_device{CreateDevice(instance, dld, VK_NULL_HANDLE)}, m_allocator{m_device, false}
: m_device{CreateDevice(instance, dld, VK_NULL_HANDLE)}, m_allocator{m_device}
#endif
{
{
@ -41,7 +41,7 @@ void TurboMode::Run(std::stop_token stop_token) {
auto& dld = m_device.GetLogical();
// Allocate buffer. 2MiB should be sufficient.
auto buffer = dld.CreateBuffer(VkBufferCreateInfo{
const VkBufferCreateInfo buffer_ci = {
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
@ -50,10 +50,8 @@ void TurboMode::Run(std::stop_token stop_token) {
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
.queueFamilyIndexCount = 0,
.pQueueFamilyIndices = nullptr,
});
// Commit some device local memory for the buffer.
auto commit = m_allocator.Commit(buffer, MemoryUsage::DeviceLocal);
};
vk::Buffer buffer = m_allocator.CreateBuffer(buffer_ci, MemoryUsage::DeviceLocal);
// Create the descriptor pool to contain our descriptor.
static constexpr VkDescriptorPoolSize pool_size{

View file

@ -22,6 +22,8 @@
#include <adrenotools/bcenabler.h>
#endif
#include <vk_mem_alloc.h>
namespace Vulkan {
using namespace Common::Literals;
namespace {
@ -596,9 +598,31 @@ Device::Device(VkInstance instance_, vk::PhysicalDevice physical_, VkSurfaceKHR
graphics_queue = logical.GetQueue(graphics_family);
present_queue = logical.GetQueue(present_family);
VmaVulkanFunctions functions{};
functions.vkGetInstanceProcAddr = dld.vkGetInstanceProcAddr;
functions.vkGetDeviceProcAddr = dld.vkGetDeviceProcAddr;
const VmaAllocatorCreateInfo allocator_info = {
.flags = VMA_ALLOCATOR_CREATE_EXTERNALLY_SYNCHRONIZED_BIT,
.physicalDevice = physical,
.device = *logical,
.preferredLargeHeapBlockSize = 0,
.pAllocationCallbacks = nullptr,
.pDeviceMemoryCallbacks = nullptr,
.pHeapSizeLimit = nullptr,
.pVulkanFunctions = &functions,
.instance = instance,
.vulkanApiVersion = VK_API_VERSION_1_1,
.pTypeExternalMemoryHandleTypes = nullptr,
};
vk::Check(vmaCreateAllocator(&allocator_info, &allocator));
}
Device::~Device() = default;
Device::~Device() {
vmaDestroyAllocator(allocator);
}
VkFormat Device::GetSupportedFormat(VkFormat wanted_format, VkFormatFeatureFlags wanted_usage,
FormatType format_type) const {

View file

@ -14,6 +14,8 @@
#include "common/settings.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
VK_DEFINE_HANDLE(VmaAllocator)
// Define all features which may be used by the implementation here.
// Vulkan version in the macro describes the minimum version required for feature availability.
// If the Vulkan version is lower than the required version, the named extension is required.
@ -199,6 +201,11 @@ public:
return dld;
}
/// Returns the VMA allocator.
VmaAllocator GetAllocator() const {
return allocator;
}
/// Returns the logical device.
const vk::Device& GetLogical() const {
return logical;
@ -630,6 +637,7 @@ private:
private:
VkInstance instance; ///< Vulkan instance.
VmaAllocator allocator; ///< VMA allocator.
vk::DeviceDispatch dld; ///< Device function pointers.
vk::PhysicalDevice physical; ///< Physical device.
vk::Device logical; ///< Logical device.

View file

@ -6,8 +6,6 @@
#include <optional>
#include <vector>
#include <glad/glad.h>
#include "common/alignment.h"
#include "common/assert.h"
#include "common/common_types.h"
@ -17,6 +15,8 @@
#include "video_core/vulkan_common/vulkan_memory_allocator.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
#include <vk_mem_alloc.h>
namespace Vulkan {
namespace {
struct Range {
@ -49,22 +49,45 @@ struct Range {
case MemoryUsage::Download:
return VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
case MemoryUsage::Stream:
return VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
}
ASSERT_MSG(false, "Invalid memory usage={}", usage);
return VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
}
constexpr VkExportMemoryAllocateInfo EXPORT_ALLOCATE_INFO{
.sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO,
.pNext = nullptr,
#ifdef _WIN32
.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT,
#elif __unix__
.handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT,
#else
.handleTypes = 0,
#endif
};
[[nodiscard]] VkMemoryPropertyFlags MemoryUsagePreferedVmaFlags(MemoryUsage usage) {
return usage != MemoryUsage::DeviceLocal ? VK_MEMORY_PROPERTY_HOST_COHERENT_BIT
: VkMemoryPropertyFlagBits{};
}
[[nodiscard]] VmaAllocationCreateFlags MemoryUsageVmaFlags(MemoryUsage usage) {
switch (usage) {
case MemoryUsage::Upload:
case MemoryUsage::Stream:
return VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT;
case MemoryUsage::Download:
return VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT;
case MemoryUsage::DeviceLocal:
return VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT |
VMA_ALLOCATION_CREATE_HOST_ACCESS_ALLOW_TRANSFER_INSTEAD_BIT;
}
return {};
}
[[nodiscard]] VmaMemoryUsage MemoryUsageVma(MemoryUsage usage) {
switch (usage) {
case MemoryUsage::DeviceLocal:
case MemoryUsage::Stream:
return VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE;
case MemoryUsage::Upload:
case MemoryUsage::Download:
return VMA_MEMORY_USAGE_AUTO_PREFER_HOST;
}
return VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE;
}
} // Anonymous namespace
class MemoryAllocation {
@ -74,14 +97,6 @@ public:
: allocator{allocator_}, memory{std::move(memory_)}, allocation_size{allocation_size_},
property_flags{properties}, shifted_memory_type{1U << type} {}
#if defined(_WIN32) || defined(__unix__)
~MemoryAllocation() {
if (owning_opengl_handle != 0) {
glDeleteMemoryObjectsEXT(1, &owning_opengl_handle);
}
}
#endif
MemoryAllocation& operator=(const MemoryAllocation&) = delete;
MemoryAllocation(const MemoryAllocation&) = delete;
@ -120,31 +135,6 @@ public:
return memory_mapped_span;
}
#ifdef _WIN32
[[nodiscard]] u32 ExportOpenGLHandle() {
if (!owning_opengl_handle) {
glCreateMemoryObjectsEXT(1, &owning_opengl_handle);
glImportMemoryWin32HandleEXT(owning_opengl_handle, allocation_size,
GL_HANDLE_TYPE_OPAQUE_WIN32_EXT,
memory.GetMemoryWin32HandleKHR());
}
return owning_opengl_handle;
}
#elif __unix__
[[nodiscard]] u32 ExportOpenGLHandle() {
if (!owning_opengl_handle) {
glCreateMemoryObjectsEXT(1, &owning_opengl_handle);
glImportMemoryFdEXT(owning_opengl_handle, allocation_size, GL_HANDLE_TYPE_OPAQUE_FD_EXT,
memory.GetMemoryFdKHR());
}
return owning_opengl_handle;
}
#else
[[nodiscard]] u32 ExportOpenGLHandle() {
return 0;
}
#endif
/// Returns whether this allocation is compatible with the arguments.
[[nodiscard]] bool IsCompatible(VkMemoryPropertyFlags flags, u32 type_mask) const {
return (flags & property_flags) == flags && (type_mask & shifted_memory_type) != 0;
@ -182,9 +172,6 @@ private:
const u32 shifted_memory_type; ///< Shifted Vulkan memory type.
std::vector<Range> commits; ///< All commit ranges done from this allocation.
std::span<u8> memory_mapped_span; ///< Memory mapped span. Empty if not queried before.
#if defined(_WIN32) || defined(__unix__)
u32 owning_opengl_handle{}; ///< Owning OpenGL memory object handle.
#endif
};
MemoryCommit::MemoryCommit(MemoryAllocation* allocation_, VkDeviceMemory memory_, u64 begin_,
@ -216,24 +203,70 @@ std::span<u8> MemoryCommit::Map() {
return span;
}
u32 MemoryCommit::ExportOpenGLHandle() const {
return allocation->ExportOpenGLHandle();
}
void MemoryCommit::Release() {
if (allocation) {
allocation->Free(begin);
}
}
MemoryAllocator::MemoryAllocator(const Device& device_, bool export_allocations_)
: device{device_}, properties{device_.GetPhysical().GetMemoryProperties().memoryProperties},
export_allocations{export_allocations_},
MemoryAllocator::MemoryAllocator(const Device& device_)
: device{device_}, allocator{device.GetAllocator()},
properties{device_.GetPhysical().GetMemoryProperties().memoryProperties},
buffer_image_granularity{
device_.GetPhysical().GetProperties().limits.bufferImageGranularity} {}
MemoryAllocator::~MemoryAllocator() = default;
vk::Image MemoryAllocator::CreateImage(const VkImageCreateInfo& ci) const {
const VmaAllocationCreateInfo alloc_ci = {
.flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT,
.usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE,
.requiredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
.preferredFlags = 0,
.memoryTypeBits = 0,
.pool = VK_NULL_HANDLE,
.pUserData = nullptr,
.priority = 0.f,
};
VkImage handle{};
VmaAllocation allocation{};
vk::Check(vmaCreateImage(allocator, &ci, &alloc_ci, &handle, &allocation, nullptr));
return vk::Image(handle, *device.GetLogical(), allocator, allocation,
device.GetDispatchLoader());
}
vk::Buffer MemoryAllocator::CreateBuffer(const VkBufferCreateInfo& ci, MemoryUsage usage) const {
const VmaAllocationCreateInfo alloc_ci = {
.flags = VMA_ALLOCATION_CREATE_WITHIN_BUDGET_BIT | VMA_ALLOCATION_CREATE_MAPPED_BIT |
MemoryUsageVmaFlags(usage),
.usage = MemoryUsageVma(usage),
.requiredFlags = 0,
.preferredFlags = MemoryUsagePreferedVmaFlags(usage),
.memoryTypeBits = 0,
.pool = VK_NULL_HANDLE,
.pUserData = nullptr,
.priority = 0.f,
};
VkBuffer handle{};
VmaAllocationInfo alloc_info{};
VmaAllocation allocation{};
VkMemoryPropertyFlags property_flags{};
vk::Check(vmaCreateBuffer(allocator, &ci, &alloc_ci, &handle, &allocation, &alloc_info));
vmaGetAllocationMemoryProperties(allocator, allocation, &property_flags);
u8* data = reinterpret_cast<u8*>(alloc_info.pMappedData);
const std::span<u8> mapped_data = data ? std::span<u8>{data, ci.size} : std::span<u8>{};
const bool is_coherent = property_flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
return vk::Buffer(handle, *device.GetLogical(), allocator, allocation, mapped_data, is_coherent,
device.GetDispatchLoader());
}
MemoryCommit MemoryAllocator::Commit(const VkMemoryRequirements& requirements, MemoryUsage usage) {
// Find the fastest memory flags we can afford with the current requirements
const u32 type_mask = requirements.memoryTypeBits;
@ -253,25 +286,11 @@ MemoryCommit MemoryAllocator::Commit(const VkMemoryRequirements& requirements, M
return TryCommit(requirements, flags).value();
}
MemoryCommit MemoryAllocator::Commit(const vk::Buffer& buffer, MemoryUsage usage) {
auto commit = Commit(device.GetLogical().GetBufferMemoryRequirements(*buffer), usage);
buffer.BindMemory(commit.Memory(), commit.Offset());
return commit;
}
MemoryCommit MemoryAllocator::Commit(const vk::Image& image, MemoryUsage usage) {
VkMemoryRequirements requirements = device.GetLogical().GetImageMemoryRequirements(*image);
requirements.size = Common::AlignUp(requirements.size, buffer_image_granularity);
auto commit = Commit(requirements, usage);
image.BindMemory(commit.Memory(), commit.Offset());
return commit;
}
bool MemoryAllocator::TryAllocMemory(VkMemoryPropertyFlags flags, u32 type_mask, u64 size) {
const u32 type = FindType(flags, type_mask).value();
vk::DeviceMemory memory = device.GetLogical().TryAllocateMemory({
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
.pNext = export_allocations ? &EXPORT_ALLOCATE_INFO : nullptr,
.pNext = nullptr,
.allocationSize = size,
.memoryTypeIndex = type,
});
@ -342,16 +361,4 @@ std::optional<u32> MemoryAllocator::FindType(VkMemoryPropertyFlags flags, u32 ty
return std::nullopt;
}
bool IsHostVisible(MemoryUsage usage) noexcept {
switch (usage) {
case MemoryUsage::DeviceLocal:
return false;
case MemoryUsage::Upload:
case MemoryUsage::Download:
return true;
}
ASSERT_MSG(false, "Invalid memory usage={}", usage);
return false;
}
} // namespace Vulkan

View file

@ -9,6 +9,8 @@
#include "common/common_types.h"
#include "video_core/vulkan_common/vulkan_wrapper.h"
VK_DEFINE_HANDLE(VmaAllocator)
namespace Vulkan {
class Device;
@ -17,9 +19,11 @@ class MemoryAllocation;
/// Hints and requirements for the backing memory type of a commit
enum class MemoryUsage {
DeviceLocal, ///< Hints device local usages, fastest memory type to read and write from the GPU
DeviceLocal, ///< Requests device local host visible buffer, falling back to device local
///< memory.
Upload, ///< Requires a host visible memory type optimized for CPU to GPU uploads
Download, ///< Requires a host visible memory type optimized for GPU to CPU readbacks
Stream, ///< Requests device local host visible buffer, falling back host memory.
};
/// Ownership handle of a memory commitment.
@ -41,9 +45,6 @@ public:
/// It will map the backing allocation if it hasn't been mapped before.
std::span<u8> Map();
/// Returns an non-owning OpenGL handle, creating one if it doesn't exist.
u32 ExportOpenGLHandle() const;
/// Returns the Vulkan memory handler.
VkDeviceMemory Memory() const {
return memory;
@ -74,16 +75,19 @@ public:
* Construct memory allocator
*
* @param device_ Device to allocate from
* @param export_allocations_ True when allocations have to be exported
*
* @throw vk::Exception on failure
*/
explicit MemoryAllocator(const Device& device_, bool export_allocations_);
explicit MemoryAllocator(const Device& device_);
~MemoryAllocator();
MemoryAllocator& operator=(const MemoryAllocator&) = delete;
MemoryAllocator(const MemoryAllocator&) = delete;
vk::Image CreateImage(const VkImageCreateInfo& ci) const;
vk::Buffer CreateBuffer(const VkBufferCreateInfo& ci, MemoryUsage usage) const;
/**
* Commits a memory with the specified requirements.
*
@ -97,9 +101,6 @@ public:
/// Commits memory required by the buffer and binds it.
MemoryCommit Commit(const vk::Buffer& buffer, MemoryUsage usage);
/// Commits memory required by the image and binds it.
MemoryCommit Commit(const vk::Image& image, MemoryUsage usage);
private:
/// Tries to allocate a chunk of memory.
bool TryAllocMemory(VkMemoryPropertyFlags flags, u32 type_mask, u64 size);
@ -117,15 +118,12 @@ private:
/// Returns index to the fastest memory type compatible with the passed requirements.
std::optional<u32> FindType(VkMemoryPropertyFlags flags, u32 type_mask) const;
const Device& device; ///< Device handle.
const VkPhysicalDeviceMemoryProperties properties; ///< Physical device properties.
const bool export_allocations; ///< True when memory allocations have to be exported.
const Device& device; ///< Device handle.
VmaAllocator allocator; ///< Vma allocator.
const VkPhysicalDeviceMemoryProperties properties; ///< Physical device properties.
std::vector<std::unique_ptr<MemoryAllocation>> allocations; ///< Current allocations.
VkDeviceSize buffer_image_granularity; // The granularity for adjacent offsets between buffers
// and optimal images
};
/// Returns true when a memory usage is guaranteed to be host visible.
bool IsHostVisible(MemoryUsage usage) noexcept;
} // namespace Vulkan

View file

@ -12,6 +12,8 @@
#include "video_core/vulkan_common/vulkan_wrapper.h"
#include <vk_mem_alloc.h>
namespace Vulkan::vk {
namespace {
@ -547,26 +549,42 @@ DebugUtilsMessenger Instance::CreateDebugUtilsMessenger(
return DebugUtilsMessenger(object, handle, *dld);
}
void Buffer::BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const {
Check(dld->vkBindBufferMemory(owner, handle, memory, offset));
void Image::SetObjectNameEXT(const char* name) const {
SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_IMAGE, name);
}
void Image::Release() const noexcept {
if (handle) {
vmaDestroyImage(allocator, handle, allocation);
}
}
void Buffer::Flush() const {
if (!is_coherent) {
vmaFlushAllocation(allocator, allocation, 0, VK_WHOLE_SIZE);
}
}
void Buffer::Invalidate() const {
if (!is_coherent) {
vmaInvalidateAllocation(allocator, allocation, 0, VK_WHOLE_SIZE);
}
}
void Buffer::SetObjectNameEXT(const char* name) const {
SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_BUFFER, name);
}
void Buffer::Release() const noexcept {
if (handle) {
vmaDestroyBuffer(allocator, handle, allocation);
}
}
void BufferView::SetObjectNameEXT(const char* name) const {
SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_BUFFER_VIEW, name);
}
void Image::BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const {
Check(dld->vkBindImageMemory(owner, handle, memory, offset));
}
void Image::SetObjectNameEXT(const char* name) const {
SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_IMAGE, name);
}
void ImageView::SetObjectNameEXT(const char* name) const {
SetObjectName(dld, owner, handle, VK_OBJECT_TYPE_IMAGE_VIEW, name);
}
@ -701,24 +719,12 @@ Queue Device::GetQueue(u32 family_index) const noexcept {
return Queue(queue, *dld);
}
Buffer Device::CreateBuffer(const VkBufferCreateInfo& ci) const {
VkBuffer object;
Check(dld->vkCreateBuffer(handle, &ci, nullptr, &object));
return Buffer(object, handle, *dld);
}
BufferView Device::CreateBufferView(const VkBufferViewCreateInfo& ci) const {
VkBufferView object;
Check(dld->vkCreateBufferView(handle, &ci, nullptr, &object));
return BufferView(object, handle, *dld);
}
Image Device::CreateImage(const VkImageCreateInfo& ci) const {
VkImage object;
Check(dld->vkCreateImage(handle, &ci, nullptr, &object));
return Image(object, handle, *dld);
}
ImageView Device::CreateImageView(const VkImageViewCreateInfo& ci) const {
VkImageView object;
Check(dld->vkCreateImageView(handle, &ci, nullptr, &object));

View file

@ -32,6 +32,9 @@
#pragma warning(disable : 26812) // Disable prefer enum class over enum
#endif
VK_DEFINE_HANDLE(VmaAllocator)
VK_DEFINE_HANDLE(VmaAllocation)
namespace Vulkan::vk {
/**
@ -616,6 +619,138 @@ public:
}
};
class Image {
public:
explicit Image(VkImage handle_, VkDevice owner_, VmaAllocator allocator_,
VmaAllocation allocation_, const DeviceDispatch& dld_) noexcept
: handle{handle_}, owner{owner_}, allocator{allocator_},
allocation{allocation_}, dld{&dld_} {}
Image() = default;
Image(const Image&) = delete;
Image& operator=(const Image&) = delete;
Image(Image&& rhs) noexcept
: handle{std::exchange(rhs.handle, nullptr)}, owner{rhs.owner}, allocator{rhs.allocator},
allocation{rhs.allocation}, dld{rhs.dld} {}
Image& operator=(Image&& rhs) noexcept {
Release();
handle = std::exchange(rhs.handle, nullptr);
owner = rhs.owner;
allocator = rhs.allocator;
allocation = rhs.allocation;
dld = rhs.dld;
return *this;
}
~Image() noexcept {
Release();
}
VkImage operator*() const noexcept {
return handle;
}
void reset() noexcept {
Release();
handle = nullptr;
}
explicit operator bool() const noexcept {
return handle != nullptr;
}
void SetObjectNameEXT(const char* name) const;
private:
void Release() const noexcept;
VkImage handle = nullptr;
VkDevice owner = nullptr;
VmaAllocator allocator = nullptr;
VmaAllocation allocation = nullptr;
const DeviceDispatch* dld = nullptr;
};
class Buffer {
public:
explicit Buffer(VkBuffer handle_, VkDevice owner_, VmaAllocator allocator_,
VmaAllocation allocation_, std::span<u8> mapped_, bool is_coherent_,
const DeviceDispatch& dld_) noexcept
: handle{handle_}, owner{owner_}, allocator{allocator_},
allocation{allocation_}, mapped{mapped_}, is_coherent{is_coherent_}, dld{&dld_} {}
Buffer() = default;
Buffer(const Buffer&) = delete;
Buffer& operator=(const Buffer&) = delete;
Buffer(Buffer&& rhs) noexcept
: handle{std::exchange(rhs.handle, nullptr)}, owner{rhs.owner}, allocator{rhs.allocator},
allocation{rhs.allocation}, mapped{rhs.mapped},
is_coherent{rhs.is_coherent}, dld{rhs.dld} {}
Buffer& operator=(Buffer&& rhs) noexcept {
Release();
handle = std::exchange(rhs.handle, nullptr);
owner = rhs.owner;
allocator = rhs.allocator;
allocation = rhs.allocation;
mapped = rhs.mapped;
is_coherent = rhs.is_coherent;
dld = rhs.dld;
return *this;
}
~Buffer() noexcept {
Release();
}
VkBuffer operator*() const noexcept {
return handle;
}
void reset() noexcept {
Release();
handle = nullptr;
}
explicit operator bool() const noexcept {
return handle != nullptr;
}
/// Returns the host mapped memory, an empty span otherwise.
std::span<u8> Mapped() noexcept {
return mapped;
}
std::span<const u8> Mapped() const noexcept {
return mapped;
}
/// Returns true if the buffer is mapped to the host.
bool IsHostVisible() const noexcept {
return !mapped.empty();
}
void Flush() const;
void Invalidate() const;
void SetObjectNameEXT(const char* name) const;
private:
void Release() const noexcept;
VkBuffer handle = nullptr;
VkDevice owner = nullptr;
VmaAllocator allocator = nullptr;
VmaAllocation allocation = nullptr;
std::span<u8> mapped = {};
bool is_coherent = false;
const DeviceDispatch* dld = nullptr;
};
class Queue {
public:
/// Construct an empty queue handle.
@ -639,17 +774,6 @@ private:
const DeviceDispatch* dld = nullptr;
};
class Buffer : public Handle<VkBuffer, VkDevice, DeviceDispatch> {
using Handle<VkBuffer, VkDevice, DeviceDispatch>::Handle;
public:
/// Attaches a memory allocation.
void BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const;
/// Set object name.
void SetObjectNameEXT(const char* name) const;
};
class BufferView : public Handle<VkBufferView, VkDevice, DeviceDispatch> {
using Handle<VkBufferView, VkDevice, DeviceDispatch>::Handle;
@ -658,17 +782,6 @@ public:
void SetObjectNameEXT(const char* name) const;
};
class Image : public Handle<VkImage, VkDevice, DeviceDispatch> {
using Handle<VkImage, VkDevice, DeviceDispatch>::Handle;
public:
/// Attaches a memory allocation.
void BindMemory(VkDeviceMemory memory, VkDeviceSize offset) const;
/// Set object name.
void SetObjectNameEXT(const char* name) const;
};
class ImageView : public Handle<VkImageView, VkDevice, DeviceDispatch> {
using Handle<VkImageView, VkDevice, DeviceDispatch>::Handle;
@ -840,12 +953,8 @@ public:
Queue GetQueue(u32 family_index) const noexcept;
Buffer CreateBuffer(const VkBufferCreateInfo& ci) const;
BufferView CreateBufferView(const VkBufferViewCreateInfo& ci) const;
Image CreateImage(const VkImageCreateInfo& ci) const;
ImageView CreateImageView(const VkImageViewCreateInfo& ci) const;
Semaphore CreateSemaphore() const;