From 38e789c761e6443bda33d55ae5b3c06802e33ca0 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 16 Jan 2020 22:54:15 -0300 Subject: [PATCH 1/3] renderer_vulkan: Add header as placeholder --- src/video_core/CMakeLists.txt | 1 + .../renderer_vulkan/renderer_vulkan.h | 72 +++++++++++++++++++ 2 files changed, 73 insertions(+) create mode 100644 src/video_core/renderer_vulkan/renderer_vulkan.h diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index 729ee4a01..d7719eed9 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -153,6 +153,7 @@ if (ENABLE_VULKAN) renderer_vulkan/fixed_pipeline_state.h renderer_vulkan/maxwell_to_vk.cpp renderer_vulkan/maxwell_to_vk.h + renderer_vulkan/renderer_vulkan.h renderer_vulkan/vk_buffer_cache.cpp renderer_vulkan/vk_buffer_cache.h renderer_vulkan/vk_compute_pass.cpp diff --git a/src/video_core/renderer_vulkan/renderer_vulkan.h b/src/video_core/renderer_vulkan/renderer_vulkan.h new file mode 100644 index 000000000..a472c5dc9 --- /dev/null +++ b/src/video_core/renderer_vulkan/renderer_vulkan.h @@ -0,0 +1,72 @@ +// Copyright 2018 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include "video_core/renderer_base.h" +#include "video_core/renderer_vulkan/declarations.h" + +namespace Core { +class System; +} + +namespace Vulkan { + +class VKBlitScreen; +class VKDevice; +class VKFence; +class VKMemoryManager; +class VKResourceManager; +class VKSwapchain; +class VKScheduler; +class VKImage; + +struct VKScreenInfo { + VKImage* image{}; + u32 width{}; + u32 height{}; + bool is_srgb{}; +}; + +class RendererVulkan final : public VideoCore::RendererBase { +public: + explicit RendererVulkan(Core::Frontend::EmuWindow& window, Core::System& system); + ~RendererVulkan() override; + + /// Swap buffers (render frame) + void SwapBuffers(const Tegra::FramebufferConfig* framebuffer) override; + + /// Initialize the renderer + bool Init() override; + + /// Shutdown the renderer + void ShutDown() override; + +private: + std::optional CreateDebugCallback( + const vk::DispatchLoaderDynamic& dldi); + + bool PickDevices(const vk::DispatchLoaderDynamic& dldi); + + void Report() const; + + Core::System& system; + + vk::Instance instance; + vk::SurfaceKHR surface; + + VKScreenInfo screen_info; + + UniqueDebugUtilsMessengerEXT debug_callback; + std::unique_ptr device; + std::unique_ptr swapchain; + std::unique_ptr memory_manager; + std::unique_ptr resource_manager; + std::unique_ptr scheduler; + std::unique_ptr blit_screen; +}; + +} // namespace Vulkan From fe5356d22363081808db1ba2a84ef7bdf979dad6 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Thu, 16 Jan 2020 22:55:23 -0300 Subject: [PATCH 2/3] vk_rasterizer: Implement Vulkan's rasterizer This abstraction is Vulkan's equivalent to OpenGL's rasterizer. It takes care of joining all parts of the backend and rendering accordingly on demand. --- src/video_core/CMakeLists.txt | 1 + .../renderer_vulkan/vk_rasterizer.cpp | 1135 +++++++++++++++++ .../renderer_vulkan/vk_rasterizer.h | 251 +++- 3 files changed, 1386 insertions(+), 1 deletion(-) create mode 100644 src/video_core/renderer_vulkan/vk_rasterizer.cpp diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt index d7719eed9..12c46e86f 100644 --- a/src/video_core/CMakeLists.txt +++ b/src/video_core/CMakeLists.txt @@ -172,6 +172,7 @@ if (ENABLE_VULKAN) renderer_vulkan/vk_memory_manager.h renderer_vulkan/vk_pipeline_cache.cpp renderer_vulkan/vk_pipeline_cache.h + renderer_vulkan/vk_rasterizer.cpp renderer_vulkan/vk_rasterizer.h renderer_vulkan/vk_renderpass_cache.cpp renderer_vulkan/vk_renderpass_cache.h diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp new file mode 100644 index 000000000..23252e658 --- /dev/null +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -0,0 +1,1135 @@ +// Copyright 2019 yuzu Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include +#include +#include +#include + +#include +#include + +#include "common/alignment.h" +#include "common/assert.h" +#include "common/logging/log.h" +#include "common/microprofile.h" +#include "core/core.h" +#include "core/memory.h" +#include "video_core/engines/kepler_compute.h" +#include "video_core/engines/maxwell_3d.h" +#include "video_core/renderer_vulkan/declarations.h" +#include "video_core/renderer_vulkan/fixed_pipeline_state.h" +#include "video_core/renderer_vulkan/maxwell_to_vk.h" +#include "video_core/renderer_vulkan/renderer_vulkan.h" +#include "video_core/renderer_vulkan/vk_buffer_cache.h" +#include "video_core/renderer_vulkan/vk_compute_pass.h" +#include "video_core/renderer_vulkan/vk_compute_pipeline.h" +#include "video_core/renderer_vulkan/vk_descriptor_pool.h" +#include "video_core/renderer_vulkan/vk_device.h" +#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" +#include "video_core/renderer_vulkan/vk_pipeline_cache.h" +#include "video_core/renderer_vulkan/vk_rasterizer.h" +#include "video_core/renderer_vulkan/vk_renderpass_cache.h" +#include "video_core/renderer_vulkan/vk_resource_manager.h" +#include "video_core/renderer_vulkan/vk_sampler_cache.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" +#include "video_core/renderer_vulkan/vk_texture_cache.h" +#include "video_core/renderer_vulkan/vk_update_descriptor.h" + +namespace Vulkan { + +using Maxwell = Tegra::Engines::Maxwell3D::Regs; + +MICROPROFILE_DEFINE(Vulkan_WaitForWorker, "Vulkan", "Wait for worker", MP_RGB(255, 192, 192)); +MICROPROFILE_DEFINE(Vulkan_Drawing, "Vulkan", "Record drawing", MP_RGB(192, 128, 128)); +MICROPROFILE_DEFINE(Vulkan_Compute, "Vulkan", "Record compute", MP_RGB(192, 128, 128)); +MICROPROFILE_DEFINE(Vulkan_Clearing, "Vulkan", "Record clearing", MP_RGB(192, 128, 128)); +MICROPROFILE_DEFINE(Vulkan_Geometry, "Vulkan", "Setup geometry", MP_RGB(192, 128, 128)); +MICROPROFILE_DEFINE(Vulkan_ConstBuffers, "Vulkan", "Setup constant buffers", MP_RGB(192, 128, 128)); +MICROPROFILE_DEFINE(Vulkan_GlobalBuffers, "Vulkan", "Setup global buffers", MP_RGB(192, 128, 128)); +MICROPROFILE_DEFINE(Vulkan_RenderTargets, "Vulkan", "Setup render targets", MP_RGB(192, 128, 128)); +MICROPROFILE_DEFINE(Vulkan_Textures, "Vulkan", "Setup textures", MP_RGB(192, 128, 128)); +MICROPROFILE_DEFINE(Vulkan_Images, "Vulkan", "Setup images", MP_RGB(192, 128, 128)); +MICROPROFILE_DEFINE(Vulkan_PipelineCache, "Vulkan", "Pipeline cache", MP_RGB(192, 128, 128)); + +namespace { + +constexpr auto ComputeShaderIndex = static_cast(Tegra::Engines::ShaderType::Compute); + +vk::Viewport GetViewportState(const VKDevice& device, const Maxwell& regs, std::size_t index) { + const auto& viewport = regs.viewport_transform[index]; + const float x = viewport.translate_x - viewport.scale_x; + const float y = viewport.translate_y - viewport.scale_y; + const float width = viewport.scale_x * 2.0f; + const float height = viewport.scale_y * 2.0f; + + const float reduce_z = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne; + float near = viewport.translate_z - viewport.scale_z * reduce_z; + float far = viewport.translate_z + viewport.scale_z; + if (!device.IsExtDepthRangeUnrestrictedSupported()) { + near = std::clamp(near, 0.0f, 1.0f); + far = std::clamp(far, 0.0f, 1.0f); + } + + return vk::Viewport(x, y, width != 0 ? width : 1.0f, height != 0 ? height : 1.0f, near, far); +} + +constexpr vk::Rect2D GetScissorState(const Maxwell& regs, std::size_t index) { + const auto& scissor = regs.scissor_test[index]; + if (!scissor.enable) { + return {{0, 0}, {INT32_MAX, INT32_MAX}}; + } + const u32 width = scissor.max_x - scissor.min_x; + const u32 height = scissor.max_y - scissor.min_y; + return {{static_cast(scissor.min_x), static_cast(scissor.min_y)}, {width, height}}; +} + +std::array GetShaderAddresses( + const std::array& shaders) { + std::array addresses; + for (std::size_t i = 0; i < std::size(addresses); ++i) { + addresses[i] = shaders[i] ? shaders[i]->GetGpuAddr() : 0; + } + return addresses; +} + +void TransitionImages(const std::vector& views, vk::PipelineStageFlags pipeline_stage, + vk::AccessFlags access) { + for (auto& [view, layout] : views) { + view->Transition(*layout, pipeline_stage, access); + } +} + +template +Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry, + std::size_t stage) { + const auto stage_type = static_cast(stage); + if (entry.IsBindless()) { + const Tegra::Texture::TextureHandle tex_handle = + engine.AccessConstBuffer32(stage_type, entry.GetBuffer(), entry.GetOffset()); + return engine.GetTextureInfo(tex_handle); + } + if constexpr (std::is_same_v) { + return engine.GetStageTexture(stage_type, entry.GetOffset()); + } else { + return engine.GetTexture(entry.GetOffset()); + } +} + +} // Anonymous namespace + +class BufferBindings final { +public: + void AddVertexBinding(const vk::Buffer* buffer, vk::DeviceSize offset) { + vertex.buffer_ptrs[vertex.num_buffers] = buffer; + vertex.offsets[vertex.num_buffers] = offset; + ++vertex.num_buffers; + } + + void SetIndexBinding(const vk::Buffer* buffer, vk::DeviceSize offset, vk::IndexType type) { + index.buffer = buffer; + index.offset = offset; + index.type = type; + } + + void Bind(VKScheduler& scheduler) const { + // Use this large switch case to avoid dispatching more memory in the record lambda than + // what we need. It looks horrible, but it's the best we can do on standard C++. + switch (vertex.num_buffers) { + case 0: + return BindStatic<0>(scheduler); + case 1: + return BindStatic<1>(scheduler); + case 2: + return BindStatic<2>(scheduler); + case 3: + return BindStatic<3>(scheduler); + case 4: + return BindStatic<4>(scheduler); + case 5: + return BindStatic<5>(scheduler); + case 6: + return BindStatic<6>(scheduler); + case 7: + return BindStatic<7>(scheduler); + case 8: + return BindStatic<8>(scheduler); + case 9: + return BindStatic<9>(scheduler); + case 10: + return BindStatic<10>(scheduler); + case 11: + return BindStatic<11>(scheduler); + case 12: + return BindStatic<12>(scheduler); + case 13: + return BindStatic<13>(scheduler); + case 14: + return BindStatic<14>(scheduler); + case 15: + return BindStatic<15>(scheduler); + case 16: + return BindStatic<16>(scheduler); + case 17: + return BindStatic<17>(scheduler); + case 18: + return BindStatic<18>(scheduler); + case 19: + return BindStatic<19>(scheduler); + case 20: + return BindStatic<20>(scheduler); + case 21: + return BindStatic<21>(scheduler); + case 22: + return BindStatic<22>(scheduler); + case 23: + return BindStatic<23>(scheduler); + case 24: + return BindStatic<24>(scheduler); + case 25: + return BindStatic<25>(scheduler); + case 26: + return BindStatic<26>(scheduler); + case 27: + return BindStatic<27>(scheduler); + case 28: + return BindStatic<28>(scheduler); + case 29: + return BindStatic<29>(scheduler); + case 30: + return BindStatic<30>(scheduler); + case 31: + return BindStatic<31>(scheduler); + } + UNREACHABLE(); + } + +private: + // Some of these fields are intentionally left uninitialized to avoid initializing them twice. + struct { + std::size_t num_buffers = 0; + std::array buffer_ptrs; + std::array offsets; + } vertex; + + struct { + const vk::Buffer* buffer = nullptr; + vk::DeviceSize offset; + vk::IndexType type; + } index; + + template + void BindStatic(VKScheduler& scheduler) const { + if (index.buffer != nullptr) { + BindStatic(scheduler); + } else { + BindStatic(scheduler); + } + } + + template + void BindStatic(VKScheduler& scheduler) const { + static_assert(N <= Maxwell::NumVertexArrays); + if constexpr (N == 0) { + return; + } + + std::array buffers; + std::transform(vertex.buffer_ptrs.begin(), vertex.buffer_ptrs.begin() + N, buffers.begin(), + [](const auto ptr) { return *ptr; }); + + std::array offsets; + std::copy(vertex.offsets.begin(), vertex.offsets.begin() + N, offsets.begin()); + + if constexpr (is_indexed) { + // Indexed draw + scheduler.Record([buffers, offsets, index_buffer = *index.buffer, + index_offset = index.offset, + index_type = index.type](auto cmdbuf, auto& dld) { + cmdbuf.bindIndexBuffer(index_buffer, index_offset, index_type, dld); + cmdbuf.bindVertexBuffers(0, static_cast(N), buffers.data(), offsets.data(), + dld); + }); + } else { + // Array draw + scheduler.Record([buffers, offsets](auto cmdbuf, auto& dld) { + cmdbuf.bindVertexBuffers(0, static_cast(N), buffers.data(), offsets.data(), + dld); + }); + } + } +}; + +void RasterizerVulkan::DrawParameters::Draw(vk::CommandBuffer cmdbuf, + const vk::DispatchLoaderDynamic& dld) const { + if (is_indexed) { + cmdbuf.drawIndexed(num_vertices, num_instances, 0, base_vertex, base_instance, dld); + } else { + cmdbuf.draw(num_vertices, num_instances, base_vertex, base_instance, dld); + } +} + +RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWindow& renderer, + VKScreenInfo& screen_info, const VKDevice& device, + VKResourceManager& resource_manager, + VKMemoryManager& memory_manager, VKScheduler& scheduler) + : RasterizerAccelerated{system.Memory()}, system{system}, render_window{renderer}, + screen_info{screen_info}, device{device}, resource_manager{resource_manager}, + memory_manager{memory_manager}, scheduler{scheduler}, + staging_pool(device, memory_manager, scheduler), descriptor_pool(device), + update_descriptor_queue(device, scheduler), + quad_array_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), + uint8_pass(device, scheduler, descriptor_pool, staging_pool, update_descriptor_queue), + texture_cache(system, *this, device, resource_manager, memory_manager, scheduler, + staging_pool), + pipeline_cache(system, *this, device, scheduler, descriptor_pool, update_descriptor_queue), + buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool), + sampler_cache(device) {} + +RasterizerVulkan::~RasterizerVulkan() = default; + +bool RasterizerVulkan::DrawBatch(bool is_indexed) { + Draw(is_indexed, false); + return true; +} + +bool RasterizerVulkan::DrawMultiBatch(bool is_indexed) { + Draw(is_indexed, true); + return true; +} + +void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { + MICROPROFILE_SCOPE(Vulkan_Drawing); + + FlushWork(); + + const auto& gpu = system.GPU().Maxwell3D(); + GraphicsPipelineCacheKey key{GetFixedPipelineState(gpu.regs)}; + + buffer_cache.Map(CalculateGraphicsStreamBufferSize(is_indexed)); + + BufferBindings buffer_bindings; + const DrawParameters draw_params = + SetupGeometry(key.fixed_state, buffer_bindings, is_indexed, is_instanced); + + update_descriptor_queue.Acquire(); + sampled_views.clear(); + image_views.clear(); + + const auto shaders = pipeline_cache.GetShaders(); + key.shaders = GetShaderAddresses(shaders); + SetupShaderDescriptors(shaders); + + buffer_cache.Unmap(); + + const auto texceptions = UpdateAttachments(); + SetupImageTransitions(texceptions, color_attachments, zeta_attachment); + + key.renderpass_params = GetRenderPassParams(texceptions); + + auto& pipeline = pipeline_cache.GetGraphicsPipeline(key); + scheduler.BindGraphicsPipeline(pipeline.GetHandle()); + + const auto renderpass = pipeline.GetRenderPass(); + const auto [framebuffer, render_area] = ConfigureFramebuffers(renderpass); + scheduler.RequestRenderpass({renderpass, framebuffer, {{0, 0}, render_area}, 0, nullptr}); + + UpdateDynamicStates(); + + buffer_bindings.Bind(scheduler); + + if (device.IsNvDeviceDiagnosticCheckpoints()) { + scheduler.Record( + [&pipeline](auto cmdbuf, auto& dld) { cmdbuf.setCheckpointNV(&pipeline, dld); }); + } + + const auto pipeline_layout = pipeline.GetLayout(); + const auto descriptor_set = pipeline.CommitDescriptorSet(); + scheduler.Record([pipeline_layout, descriptor_set, draw_params](auto cmdbuf, auto& dld) { + if (descriptor_set) { + cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eGraphics, pipeline_layout, + DESCRIPTOR_SET, 1, &descriptor_set, 0, nullptr, dld); + } + draw_params.Draw(cmdbuf, dld); + }); +} + +void RasterizerVulkan::Clear() { + MICROPROFILE_SCOPE(Vulkan_Clearing); + + const auto& gpu = system.GPU().Maxwell3D(); + if (!system.GPU().Maxwell3D().ShouldExecute()) { + return; + } + + const auto& regs = gpu.regs; + const bool use_color = regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B || + regs.clear_buffers.A; + const bool use_depth = regs.clear_buffers.Z; + const bool use_stencil = regs.clear_buffers.S; + if (!use_color && !use_depth && !use_stencil) { + return; + } + // Clearing images requires to be out of a renderpass + scheduler.RequestOutsideRenderPassOperationContext(); + + // TODO(Rodrigo): Implement clears rendering a quad or using beginning a renderpass. + + if (use_color) { + View color_view; + { + MICROPROFILE_SCOPE(Vulkan_RenderTargets); + color_view = texture_cache.GetColorBufferSurface(regs.clear_buffers.RT.Value(), false); + } + + color_view->Transition(vk::ImageLayout::eTransferDstOptimal, + vk::PipelineStageFlagBits::eTransfer, + vk::AccessFlagBits::eTransferWrite); + + const std::array clear_color = {regs.clear_color[0], regs.clear_color[1], + regs.clear_color[2], regs.clear_color[3]}; + const vk::ClearColorValue clear(clear_color); + scheduler.Record([image = color_view->GetImage(), + subresource = color_view->GetImageSubresourceRange(), + clear](auto cmdbuf, auto& dld) { + cmdbuf.clearColorImage(image, vk::ImageLayout::eTransferDstOptimal, clear, subresource, + dld); + }); + } + if (use_depth || use_stencil) { + View zeta_surface; + { + MICROPROFILE_SCOPE(Vulkan_RenderTargets); + zeta_surface = texture_cache.GetDepthBufferSurface(false); + } + + zeta_surface->Transition(vk::ImageLayout::eTransferDstOptimal, + vk::PipelineStageFlagBits::eTransfer, + vk::AccessFlagBits::eTransferWrite); + + const vk::ClearDepthStencilValue clear(regs.clear_depth, + static_cast(regs.clear_stencil)); + scheduler.Record([image = zeta_surface->GetImage(), + subresource = zeta_surface->GetImageSubresourceRange(), + clear](auto cmdbuf, auto& dld) { + cmdbuf.clearDepthStencilImage(image, vk::ImageLayout::eTransferDstOptimal, clear, + subresource, dld); + }); + } +} + +void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { + MICROPROFILE_SCOPE(Vulkan_Compute); + update_descriptor_queue.Acquire(); + sampled_views.clear(); + image_views.clear(); + + const auto& launch_desc = system.GPU().KeplerCompute().launch_description; + const ComputePipelineCacheKey key{ + code_addr, + launch_desc.shared_alloc, + {launch_desc.block_dim_x, launch_desc.block_dim_y, launch_desc.block_dim_z}}; + auto& pipeline = pipeline_cache.GetComputePipeline(key); + + // Compute dispatches can't be executed inside a renderpass + scheduler.RequestOutsideRenderPassOperationContext(); + + buffer_cache.Map(CalculateComputeStreamBufferSize()); + + const auto& entries = pipeline.GetEntries(); + SetupComputeConstBuffers(entries); + SetupComputeGlobalBuffers(entries); + SetupComputeTexelBuffers(entries); + SetupComputeTextures(entries); + SetupComputeImages(entries); + + buffer_cache.Unmap(); + + TransitionImages(sampled_views, vk::PipelineStageFlagBits::eComputeShader, + vk::AccessFlagBits::eShaderRead); + TransitionImages(image_views, vk::PipelineStageFlagBits::eComputeShader, + vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite); + + if (device.IsNvDeviceDiagnosticCheckpoints()) { + scheduler.Record( + [&pipeline](auto cmdbuf, auto& dld) { cmdbuf.setCheckpointNV(nullptr, dld); }); + } + + scheduler.Record([grid_x = launch_desc.grid_dim_x, grid_y = launch_desc.grid_dim_y, + grid_z = launch_desc.grid_dim_z, pipeline_handle = pipeline.GetHandle(), + layout = pipeline.GetLayout(), + descriptor_set = pipeline.CommitDescriptorSet()](auto cmdbuf, auto& dld) { + cmdbuf.bindPipeline(vk::PipelineBindPoint::eCompute, pipeline_handle, dld); + cmdbuf.bindDescriptorSets(vk::PipelineBindPoint::eCompute, layout, DESCRIPTOR_SET, 1, + &descriptor_set, 0, nullptr, dld); + cmdbuf.dispatch(grid_x, grid_y, grid_z, dld); + }); +} + +void RasterizerVulkan::FlushAll() {} + +void RasterizerVulkan::FlushRegion(CacheAddr addr, u64 size) { + texture_cache.FlushRegion(addr, size); + buffer_cache.FlushRegion(addr, size); +} + +void RasterizerVulkan::InvalidateRegion(CacheAddr addr, u64 size) { + texture_cache.InvalidateRegion(addr, size); + pipeline_cache.InvalidateRegion(addr, size); + buffer_cache.InvalidateRegion(addr, size); +} + +void RasterizerVulkan::FlushAndInvalidateRegion(CacheAddr addr, u64 size) { + FlushRegion(addr, size); + InvalidateRegion(addr, size); +} + +void RasterizerVulkan::FlushCommands() { + if (draw_counter > 0) { + draw_counter = 0; + scheduler.Flush(); + } +} + +void RasterizerVulkan::TickFrame() { + draw_counter = 0; + update_descriptor_queue.TickFrame(); + buffer_cache.TickFrame(); + staging_pool.TickFrame(); +} + +bool RasterizerVulkan::AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, + const Tegra::Engines::Fermi2D::Regs::Surface& dst, + const Tegra::Engines::Fermi2D::Config& copy_config) { + texture_cache.DoFermiCopy(src, dst, copy_config); + return true; +} + +bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config, + VAddr framebuffer_addr, u32 pixel_stride) { + if (!framebuffer_addr) { + return false; + } + + const u8* host_ptr{system.Memory().GetPointer(framebuffer_addr)}; + const auto surface{texture_cache.TryFindFramebufferSurface(host_ptr)}; + if (!surface) { + return false; + } + + // Verify that the cached surface is the same size and format as the requested framebuffer + const auto& params{surface->GetSurfaceParams()}; + const auto& pixel_format{ + VideoCore::Surface::PixelFormatFromGPUPixelFormat(config.pixel_format)}; + ASSERT_MSG(params.width == config.width, "Framebuffer width is different"); + ASSERT_MSG(params.height == config.height, "Framebuffer height is different"); + // ASSERT_MSG(params.pixel_format == pixel_format, "Framebuffer pixel_format is different"); + + screen_info.image = &surface->GetImage(); + screen_info.width = params.width; + screen_info.height = params.height; + screen_info.is_srgb = surface->GetSurfaceParams().srgb_conversion; + return true; +} + +void RasterizerVulkan::FlushWork() { + if ((++draw_counter & 7) != 7) { + return; + } + if (draw_counter < 4096) { + // Flush work to the worker thread every 8 draws + scheduler.DispatchWork(); + } else { + // Flush work to the GPU (and implicitly the worker thread) every N draws + scheduler.Flush(); + draw_counter = 0; + } +} + +RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() { + MICROPROFILE_SCOPE(Vulkan_RenderTargets); + auto& dirty = system.GPU().Maxwell3D().dirty; + const bool update_rendertargets = dirty.render_settings; + dirty.render_settings = false; + + texture_cache.GuardRenderTargets(true); + + Texceptions texceptions; + for (std::size_t rt = 0; rt < Maxwell::NumRenderTargets; ++rt) { + if (update_rendertargets) { + color_attachments[rt] = texture_cache.GetColorBufferSurface(rt, true); + } + if (color_attachments[rt] && WalkAttachmentOverlaps(*color_attachments[rt])) { + texceptions.set(rt); + } + } + + if (update_rendertargets) { + zeta_attachment = texture_cache.GetDepthBufferSurface(true); + } + if (zeta_attachment && WalkAttachmentOverlaps(*zeta_attachment)) { + texceptions.set(ZETA_TEXCEPTION_INDEX); + } + + texture_cache.GuardRenderTargets(false); + + return texceptions; +} + +bool RasterizerVulkan::WalkAttachmentOverlaps(const CachedSurfaceView& attachment) { + bool overlap = false; + for (auto& [view, layout] : sampled_views) { + if (!attachment.IsSameSurface(*view)) { + continue; + } + overlap = true; + *layout = vk::ImageLayout::eGeneral; + } + return overlap; +} + +std::tuple RasterizerVulkan::ConfigureFramebuffers( + vk::RenderPass renderpass) { + FramebufferCacheKey fbkey; + fbkey.renderpass = renderpass; + fbkey.width = std::numeric_limits::max(); + fbkey.height = std::numeric_limits::max(); + + const auto MarkAsModifiedAndPush = [&](const View& view) { + if (view == nullptr) { + return false; + } + fbkey.views.push_back(view->GetHandle()); + fbkey.width = std::min(fbkey.width, view->GetWidth()); + fbkey.height = std::min(fbkey.height, view->GetHeight()); + return true; + }; + + for (std::size_t index = 0; index < std::size(color_attachments); ++index) { + if (MarkAsModifiedAndPush(color_attachments[index])) { + texture_cache.MarkColorBufferInUse(index); + } + } + if (MarkAsModifiedAndPush(zeta_attachment)) { + texture_cache.MarkDepthBufferInUse(); + } + + const auto [fbentry, is_cache_miss] = framebuffer_cache.try_emplace(fbkey); + auto& framebuffer = fbentry->second; + if (is_cache_miss) { + const vk::FramebufferCreateInfo framebuffer_ci( + {}, fbkey.renderpass, static_cast(fbkey.views.size()), fbkey.views.data(), + fbkey.width, fbkey.height, 1); + const auto dev = device.GetLogical(); + const auto& dld = device.GetDispatchLoader(); + framebuffer = dev.createFramebufferUnique(framebuffer_ci, nullptr, dld); + } + + return {*framebuffer, vk::Extent2D{fbkey.width, fbkey.height}}; +} + +RasterizerVulkan::DrawParameters RasterizerVulkan::SetupGeometry(FixedPipelineState& fixed_state, + BufferBindings& buffer_bindings, + bool is_indexed, + bool is_instanced) { + MICROPROFILE_SCOPE(Vulkan_Geometry); + + const auto& gpu = system.GPU().Maxwell3D(); + const auto& regs = gpu.regs; + + SetupVertexArrays(fixed_state.vertex_input, buffer_bindings); + + const u32 base_instance = regs.vb_base_instance; + const u32 num_instances = is_instanced ? gpu.mme_draw.instance_count : 1; + const u32 base_vertex = is_indexed ? regs.vb_element_base : regs.vertex_buffer.first; + const u32 num_vertices = is_indexed ? regs.index_array.count : regs.vertex_buffer.count; + + DrawParameters params{base_instance, num_instances, base_vertex, num_vertices, is_indexed}; + SetupIndexBuffer(buffer_bindings, params, is_indexed); + + return params; +} + +void RasterizerVulkan::SetupShaderDescriptors( + const std::array& shaders) { + texture_cache.GuardSamplers(true); + + for (std::size_t stage = 0; stage < Maxwell::MaxShaderStage; ++stage) { + // Skip VertexA stage + const auto& shader = shaders[stage + 1]; + if (!shader) { + continue; + } + const auto& entries = shader->GetEntries(); + SetupGraphicsConstBuffers(entries, stage); + SetupGraphicsGlobalBuffers(entries, stage); + SetupGraphicsTexelBuffers(entries, stage); + SetupGraphicsTextures(entries, stage); + SetupGraphicsImages(entries, stage); + } + texture_cache.GuardSamplers(false); +} + +void RasterizerVulkan::SetupImageTransitions( + Texceptions texceptions, const std::array& color_attachments, + const View& zeta_attachment) { + TransitionImages(sampled_views, vk::PipelineStageFlagBits::eAllGraphics, + vk::AccessFlagBits::eShaderRead); + TransitionImages(image_views, vk::PipelineStageFlagBits::eAllGraphics, + vk::AccessFlagBits::eShaderRead | vk::AccessFlagBits::eShaderWrite); + + for (std::size_t rt = 0; rt < std::size(color_attachments); ++rt) { + const auto color_attachment = color_attachments[rt]; + if (color_attachment == nullptr) { + continue; + } + const auto image_layout = + texceptions[rt] ? vk::ImageLayout::eGeneral : vk::ImageLayout::eColorAttachmentOptimal; + color_attachment->Transition( + image_layout, vk::PipelineStageFlagBits::eColorAttachmentOutput, + vk::AccessFlagBits::eColorAttachmentRead | vk::AccessFlagBits::eColorAttachmentWrite); + } + + if (zeta_attachment != nullptr) { + const auto image_layout = texceptions[ZETA_TEXCEPTION_INDEX] + ? vk::ImageLayout::eGeneral + : vk::ImageLayout::eDepthStencilAttachmentOptimal; + zeta_attachment->Transition(image_layout, vk::PipelineStageFlagBits::eLateFragmentTests, + vk::AccessFlagBits::eDepthStencilAttachmentRead | + vk::AccessFlagBits::eDepthStencilAttachmentWrite); + } +} + +void RasterizerVulkan::UpdateDynamicStates() { + auto& gpu = system.GPU().Maxwell3D(); + UpdateViewportsState(gpu); + UpdateScissorsState(gpu); + UpdateDepthBias(gpu); + UpdateBlendConstants(gpu); + UpdateDepthBounds(gpu); + UpdateStencilFaces(gpu); +} + +void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input, + BufferBindings& buffer_bindings) { + const auto& regs = system.GPU().Maxwell3D().regs; + + for (u32 index = 0; index < static_cast(Maxwell::NumVertexAttributes); ++index) { + const auto& attrib = regs.vertex_attrib_format[index]; + if (!attrib.IsValid()) { + continue; + } + + const auto& buffer = regs.vertex_array[attrib.buffer]; + ASSERT(buffer.IsEnabled()); + + vertex_input.attributes[vertex_input.num_attributes++] = + FixedPipelineState::VertexAttribute(index, attrib.buffer, attrib.type, attrib.size, + attrib.offset); + } + + for (u32 index = 0; index < static_cast(Maxwell::NumVertexArrays); ++index) { + const auto& vertex_array = regs.vertex_array[index]; + if (!vertex_array.IsEnabled()) { + continue; + } + + const GPUVAddr start{vertex_array.StartAddress()}; + const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()}; + + ASSERT(end > start); + const std::size_t size{end - start + 1}; + const auto [buffer, offset] = buffer_cache.UploadMemory(start, size); + + vertex_input.bindings[vertex_input.num_bindings++] = FixedPipelineState::VertexBinding( + index, vertex_array.stride, + regs.instanced_arrays.IsInstancingEnabled(index) ? vertex_array.divisor : 0); + buffer_bindings.AddVertexBinding(buffer, offset); + } +} + +void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawParameters& params, + bool is_indexed) { + const auto& regs = system.GPU().Maxwell3D().regs; + switch (regs.draw.topology) { + case Maxwell::PrimitiveTopology::Quads: + if (params.is_indexed) { + UNIMPLEMENTED(); + } else { + const auto [buffer, offset] = + quad_array_pass.Assemble(params.num_vertices, params.base_vertex); + buffer_bindings.SetIndexBinding(&buffer, offset, vk::IndexType::eUint32); + params.base_vertex = 0; + params.num_vertices = params.num_vertices * 6 / 4; + params.is_indexed = true; + } + break; + default: { + if (!is_indexed) { + break; + } + auto [buffer, offset] = + buffer_cache.UploadMemory(regs.index_array.IndexStart(), CalculateIndexBufferSize()); + + auto format = regs.index_array.format; + const bool is_uint8 = format == Maxwell::IndexFormat::UnsignedByte; + if (is_uint8 && !device.IsExtIndexTypeUint8Supported()) { + std::tie(buffer, offset) = uint8_pass.Assemble(params.num_vertices, *buffer, offset); + format = Maxwell::IndexFormat::UnsignedShort; + } + + buffer_bindings.SetIndexBinding(buffer, offset, MaxwellToVK::IndexFormat(device, format)); + break; + } + } +} + +void RasterizerVulkan::SetupGraphicsConstBuffers(const ShaderEntries& entries, std::size_t stage) { + MICROPROFILE_SCOPE(Vulkan_ConstBuffers); + const auto& gpu = system.GPU().Maxwell3D(); + const auto& shader_stage = gpu.state.shader_stages[stage]; + for (const auto& entry : entries.const_buffers) { + SetupConstBuffer(entry, shader_stage.const_buffers[entry.GetIndex()]); + } +} + +void RasterizerVulkan::SetupGraphicsGlobalBuffers(const ShaderEntries& entries, std::size_t stage) { + MICROPROFILE_SCOPE(Vulkan_GlobalBuffers); + auto& gpu{system.GPU()}; + const auto cbufs{gpu.Maxwell3D().state.shader_stages[stage]}; + + for (const auto& entry : entries.global_buffers) { + const auto addr = cbufs.const_buffers[entry.GetCbufIndex()].address + entry.GetCbufOffset(); + SetupGlobalBuffer(entry, addr); + } +} + +void RasterizerVulkan::SetupGraphicsTexelBuffers(const ShaderEntries& entries, std::size_t stage) { + MICROPROFILE_SCOPE(Vulkan_Textures); + const auto& gpu = system.GPU().Maxwell3D(); + for (const auto& entry : entries.texel_buffers) { + const auto image = GetTextureInfo(gpu, entry, stage).tic; + SetupTexelBuffer(image, entry); + } +} + +void RasterizerVulkan::SetupGraphicsTextures(const ShaderEntries& entries, std::size_t stage) { + MICROPROFILE_SCOPE(Vulkan_Textures); + const auto& gpu = system.GPU().Maxwell3D(); + for (const auto& entry : entries.samplers) { + const auto texture = GetTextureInfo(gpu, entry, stage); + SetupTexture(texture, entry); + } +} + +void RasterizerVulkan::SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage) { + MICROPROFILE_SCOPE(Vulkan_Images); + const auto& gpu = system.GPU().KeplerCompute(); + for (const auto& entry : entries.images) { + const auto tic = GetTextureInfo(gpu, entry, stage).tic; + SetupImage(tic, entry); + } +} + +void RasterizerVulkan::SetupComputeConstBuffers(const ShaderEntries& entries) { + MICROPROFILE_SCOPE(Vulkan_ConstBuffers); + const auto& launch_desc = system.GPU().KeplerCompute().launch_description; + for (const auto& entry : entries.const_buffers) { + const auto& config = launch_desc.const_buffer_config[entry.GetIndex()]; + const std::bitset<8> mask = launch_desc.const_buffer_enable_mask.Value(); + Tegra::Engines::ConstBufferInfo buffer; + buffer.address = config.Address(); + buffer.size = config.size; + buffer.enabled = mask[entry.GetIndex()]; + SetupConstBuffer(entry, buffer); + } +} + +void RasterizerVulkan::SetupComputeGlobalBuffers(const ShaderEntries& entries) { + MICROPROFILE_SCOPE(Vulkan_GlobalBuffers); + const auto cbufs{system.GPU().KeplerCompute().launch_description.const_buffer_config}; + for (const auto& entry : entries.global_buffers) { + const auto addr{cbufs[entry.GetCbufIndex()].Address() + entry.GetCbufOffset()}; + SetupGlobalBuffer(entry, addr); + } +} + +void RasterizerVulkan::SetupComputeTexelBuffers(const ShaderEntries& entries) { + MICROPROFILE_SCOPE(Vulkan_Textures); + const auto& gpu = system.GPU().KeplerCompute(); + for (const auto& entry : entries.texel_buffers) { + const auto image = GetTextureInfo(gpu, entry, ComputeShaderIndex).tic; + SetupTexelBuffer(image, entry); + } +} + +void RasterizerVulkan::SetupComputeTextures(const ShaderEntries& entries) { + MICROPROFILE_SCOPE(Vulkan_Textures); + const auto& gpu = system.GPU().KeplerCompute(); + for (const auto& entry : entries.samplers) { + const auto texture = GetTextureInfo(gpu, entry, ComputeShaderIndex); + SetupTexture(texture, entry); + } +} + +void RasterizerVulkan::SetupComputeImages(const ShaderEntries& entries) { + MICROPROFILE_SCOPE(Vulkan_Images); + const auto& gpu = system.GPU().KeplerCompute(); + for (const auto& entry : entries.images) { + const auto tic = GetTextureInfo(gpu, entry, ComputeShaderIndex).tic; + SetupImage(tic, entry); + } +} + +void RasterizerVulkan::SetupConstBuffer(const ConstBufferEntry& entry, + const Tegra::Engines::ConstBufferInfo& buffer) { + // Align the size to avoid bad std140 interactions + const std::size_t size = + Common::AlignUp(CalculateConstBufferSize(entry, buffer), 4 * sizeof(float)); + ASSERT(size <= MaxConstbufferSize); + + const auto [buffer_handle, offset] = + buffer_cache.UploadMemory(buffer.address, size, device.GetUniformBufferAlignment()); + + update_descriptor_queue.AddBuffer(buffer_handle, offset, size); +} + +void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address) { + auto& memory_manager{system.GPU().MemoryManager()}; + const auto actual_addr = memory_manager.Read(address); + const auto size = memory_manager.Read(address + 8); + + if (size == 0) { + // Sometimes global memory pointers don't have a proper size. Upload a dummy entry because + // Vulkan doesn't like empty buffers. + constexpr std::size_t dummy_size = 4; + const auto buffer = buffer_cache.GetEmptyBuffer(dummy_size); + update_descriptor_queue.AddBuffer(buffer, 0, dummy_size); + return; + } + + const auto [buffer, offset] = buffer_cache.UploadMemory( + actual_addr, size, device.GetStorageBufferAlignment(), entry.IsWritten()); + update_descriptor_queue.AddBuffer(buffer, offset, size); +} + +void RasterizerVulkan::SetupTexelBuffer(const Tegra::Texture::TICEntry& tic, + const TexelBufferEntry& entry) { + auto view = texture_cache.GetTextureSurface(tic, entry); + ASSERT(view->IsBufferView()); + + update_descriptor_queue.AddTexelBuffer(view->GetBufferView()); +} + +void RasterizerVulkan::SetupTexture(const Tegra::Texture::FullTextureInfo& texture, + const SamplerEntry& entry) { + auto view = texture_cache.GetTextureSurface(texture.tic, entry); + ASSERT(!view->IsBufferView()); + + const auto image_view = view->GetHandle(texture.tic.x_source, texture.tic.y_source, + texture.tic.z_source, texture.tic.w_source); + const auto sampler = sampler_cache.GetSampler(texture.tsc); + update_descriptor_queue.AddSampledImage(sampler, image_view); + + const auto image_layout = update_descriptor_queue.GetLastImageLayout(); + *image_layout = vk::ImageLayout::eShaderReadOnlyOptimal; + sampled_views.push_back(ImageView{std::move(view), image_layout}); +} + +void RasterizerVulkan::SetupImage(const Tegra::Texture::TICEntry& tic, const ImageEntry& entry) { + auto view = texture_cache.GetImageSurface(tic, entry); + + if (entry.IsWritten()) { + view->MarkAsModified(texture_cache.Tick()); + } + + UNIMPLEMENTED_IF(tic.IsBuffer()); + + const auto image_view = view->GetHandle(tic.x_source, tic.y_source, tic.z_source, tic.w_source); + update_descriptor_queue.AddImage(image_view); + + const auto image_layout = update_descriptor_queue.GetLastImageLayout(); + *image_layout = vk::ImageLayout::eGeneral; + image_views.push_back(ImageView{std::move(view), image_layout}); +} + +void RasterizerVulkan::UpdateViewportsState(Tegra::Engines::Maxwell3D& gpu) { + if (!gpu.dirty.viewport_transform && scheduler.TouchViewports()) { + return; + } + gpu.dirty.viewport_transform = false; + const auto& regs = gpu.regs; + const std::array viewports{ + GetViewportState(device, regs, 0), GetViewportState(device, regs, 1), + GetViewportState(device, regs, 2), GetViewportState(device, regs, 3), + GetViewportState(device, regs, 4), GetViewportState(device, regs, 5), + GetViewportState(device, regs, 6), GetViewportState(device, regs, 7), + GetViewportState(device, regs, 8), GetViewportState(device, regs, 9), + GetViewportState(device, regs, 10), GetViewportState(device, regs, 11), + GetViewportState(device, regs, 12), GetViewportState(device, regs, 13), + GetViewportState(device, regs, 14), GetViewportState(device, regs, 15)}; + scheduler.Record([viewports](auto cmdbuf, auto& dld) { + cmdbuf.setViewport(0, static_cast(viewports.size()), viewports.data(), dld); + }); +} + +void RasterizerVulkan::UpdateScissorsState(Tegra::Engines::Maxwell3D& gpu) { + if (!gpu.dirty.scissor_test && scheduler.TouchScissors()) { + return; + } + gpu.dirty.scissor_test = false; + const auto& regs = gpu.regs; + const std::array scissors = { + GetScissorState(regs, 0), GetScissorState(regs, 1), GetScissorState(regs, 2), + GetScissorState(regs, 3), GetScissorState(regs, 4), GetScissorState(regs, 5), + GetScissorState(regs, 6), GetScissorState(regs, 7), GetScissorState(regs, 8), + GetScissorState(regs, 9), GetScissorState(regs, 10), GetScissorState(regs, 11), + GetScissorState(regs, 12), GetScissorState(regs, 13), GetScissorState(regs, 14), + GetScissorState(regs, 15)}; + scheduler.Record([scissors](auto cmdbuf, auto& dld) { + cmdbuf.setScissor(0, static_cast(scissors.size()), scissors.data(), dld); + }); +} + +void RasterizerVulkan::UpdateDepthBias(Tegra::Engines::Maxwell3D& gpu) { + if (!gpu.dirty.polygon_offset && scheduler.TouchDepthBias()) { + return; + } + gpu.dirty.polygon_offset = false; + const auto& regs = gpu.regs; + scheduler.Record([constant = regs.polygon_offset_units, clamp = regs.polygon_offset_clamp, + factor = regs.polygon_offset_factor](auto cmdbuf, auto& dld) { + cmdbuf.setDepthBias(constant, clamp, factor / 2.0f, dld); + }); +} + +void RasterizerVulkan::UpdateBlendConstants(Tegra::Engines::Maxwell3D& gpu) { + if (!gpu.dirty.blend_state && scheduler.TouchBlendConstants()) { + return; + } + gpu.dirty.blend_state = false; + const std::array blend_color = {gpu.regs.blend_color.r, gpu.regs.blend_color.g, + gpu.regs.blend_color.b, gpu.regs.blend_color.a}; + scheduler.Record([blend_color](auto cmdbuf, auto& dld) { + cmdbuf.setBlendConstants(blend_color.data(), dld); + }); +} + +void RasterizerVulkan::UpdateDepthBounds(Tegra::Engines::Maxwell3D& gpu) { + if (!gpu.dirty.depth_bounds_values && scheduler.TouchDepthBounds()) { + return; + } + gpu.dirty.depth_bounds_values = false; + const auto& regs = gpu.regs; + scheduler.Record([min = regs.depth_bounds[0], max = regs.depth_bounds[1]]( + auto cmdbuf, auto& dld) { cmdbuf.setDepthBounds(min, max, dld); }); +} + +void RasterizerVulkan::UpdateStencilFaces(Tegra::Engines::Maxwell3D& gpu) { + if (!gpu.dirty.stencil_test && scheduler.TouchStencilValues()) { + return; + } + gpu.dirty.stencil_test = false; + const auto& regs = gpu.regs; + if (regs.stencil_two_side_enable) { + // Separate values per face + scheduler.Record( + [front_ref = regs.stencil_front_func_ref, front_write_mask = regs.stencil_front_mask, + front_test_mask = regs.stencil_front_func_mask, back_ref = regs.stencil_back_func_ref, + back_write_mask = regs.stencil_back_mask, + back_test_mask = regs.stencil_back_func_mask](auto cmdbuf, auto& dld) { + // Front face + cmdbuf.setStencilReference(vk::StencilFaceFlagBits::eFront, front_ref, dld); + cmdbuf.setStencilWriteMask(vk::StencilFaceFlagBits::eFront, front_write_mask, dld); + cmdbuf.setStencilCompareMask(vk::StencilFaceFlagBits::eFront, front_test_mask, dld); + + // Back face + cmdbuf.setStencilReference(vk::StencilFaceFlagBits::eBack, back_ref, dld); + cmdbuf.setStencilWriteMask(vk::StencilFaceFlagBits::eBack, back_write_mask, dld); + cmdbuf.setStencilCompareMask(vk::StencilFaceFlagBits::eBack, back_test_mask, dld); + }); + } else { + // Front face defines both faces + scheduler.Record([ref = regs.stencil_back_func_ref, write_mask = regs.stencil_back_mask, + test_mask = regs.stencil_back_func_mask](auto cmdbuf, auto& dld) { + cmdbuf.setStencilReference(vk::StencilFaceFlagBits::eFrontAndBack, ref, dld); + cmdbuf.setStencilWriteMask(vk::StencilFaceFlagBits::eFrontAndBack, write_mask, dld); + cmdbuf.setStencilCompareMask(vk::StencilFaceFlagBits::eFrontAndBack, test_mask, dld); + }); + } +} + +std::size_t RasterizerVulkan::CalculateGraphicsStreamBufferSize(bool is_indexed) const { + std::size_t size = CalculateVertexArraysSize(); + if (is_indexed) { + size = Common::AlignUp(size, 4) + CalculateIndexBufferSize(); + } + size += Maxwell::MaxConstBuffers * (MaxConstbufferSize + device.GetUniformBufferAlignment()); + return size; +} + +std::size_t RasterizerVulkan::CalculateComputeStreamBufferSize() const { + return Tegra::Engines::KeplerCompute::NumConstBuffers * + (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment()); +} + +std::size_t RasterizerVulkan::CalculateVertexArraysSize() const { + const auto& regs = system.GPU().Maxwell3D().regs; + + std::size_t size = 0; + for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { + // This implementation assumes that all attributes are used in the shader. + const GPUVAddr start{regs.vertex_array[index].StartAddress()}; + const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()}; + DEBUG_ASSERT(end > start); + + size += (end - start + 1) * regs.vertex_array[index].enable; + } + return size; +} + +std::size_t RasterizerVulkan::CalculateIndexBufferSize() const { + const auto& regs = system.GPU().Maxwell3D().regs; + return static_cast(regs.index_array.count) * + static_cast(regs.index_array.FormatSizeInBytes()); +} + +std::size_t RasterizerVulkan::CalculateConstBufferSize( + const ConstBufferEntry& entry, const Tegra::Engines::ConstBufferInfo& buffer) const { + if (entry.IsIndirect()) { + // Buffer is accessed indirectly, so upload the entire thing + return buffer.size; + } else { + // Buffer is accessed directly, upload just what we use + return entry.GetSize(); + } +} + +RenderPassParams RasterizerVulkan::GetRenderPassParams(Texceptions texceptions) const { + using namespace VideoCore::Surface; + + const auto& regs = system.GPU().Maxwell3D().regs; + RenderPassParams renderpass_params; + + for (std::size_t rt = 0; rt < static_cast(regs.rt_control.count); ++rt) { + const auto& rendertarget = regs.rt[rt]; + if (rendertarget.Address() == 0 || rendertarget.format == Tegra::RenderTargetFormat::NONE) + continue; + renderpass_params.color_attachments.push_back(RenderPassParams::ColorAttachment{ + static_cast(rt), PixelFormatFromRenderTargetFormat(rendertarget.format), + texceptions.test(rt)}); + } + + renderpass_params.has_zeta = regs.zeta_enable; + if (renderpass_params.has_zeta) { + renderpass_params.zeta_pixel_format = PixelFormatFromDepthFormat(regs.zeta.format); + renderpass_params.zeta_texception = texceptions[ZETA_TEXCEPTION_INDEX]; + } + + return renderpass_params; +} + +} // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index fc324952b..2ecc19e7a 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -4,10 +4,259 @@ #pragma once +#include +#include +#include +#include +#include + +#include + +#include "common/common_types.h" +#include "video_core/memory_manager.h" +#include "video_core/rasterizer_accelerated.h" #include "video_core/rasterizer_interface.h" +#include "video_core/renderer_vulkan/declarations.h" +#include "video_core/renderer_vulkan/fixed_pipeline_state.h" +#include "video_core/renderer_vulkan/vk_buffer_cache.h" +#include "video_core/renderer_vulkan/vk_compute_pass.h" +#include "video_core/renderer_vulkan/vk_descriptor_pool.h" +#include "video_core/renderer_vulkan/vk_memory_manager.h" +#include "video_core/renderer_vulkan/vk_pipeline_cache.h" +#include "video_core/renderer_vulkan/vk_renderpass_cache.h" +#include "video_core/renderer_vulkan/vk_resource_manager.h" +#include "video_core/renderer_vulkan/vk_sampler_cache.h" +#include "video_core/renderer_vulkan/vk_scheduler.h" +#include "video_core/renderer_vulkan/vk_staging_buffer_pool.h" +#include "video_core/renderer_vulkan/vk_texture_cache.h" +#include "video_core/renderer_vulkan/vk_update_descriptor.h" + +namespace Core { +class System; +} + +namespace Core::Frontend { +class EmuWindow; +} + +namespace Tegra::Engines { +class Maxwell3D; +} namespace Vulkan { -class RasterizerVulkan : public VideoCore::RasterizerInterface {}; +struct VKScreenInfo; + +using ImageViewsPack = + boost::container::static_vector; + +struct FramebufferCacheKey { + vk::RenderPass renderpass; + ImageViewsPack views; + u32 width; + u32 height; + + std::size_t Hash() const noexcept { + std::size_t hash = 0; + boost::hash_combine(hash, static_cast(renderpass)); + for (const auto& view : views) { + boost::hash_combine(hash, static_cast(view)); + } + boost::hash_combine(hash, width); + boost::hash_combine(hash, height); + return hash; + } + + bool operator==(const FramebufferCacheKey& rhs) const noexcept { + return std::tie(renderpass, views, width, height) == + std::tie(rhs.renderpass, rhs.views, rhs.width, rhs.height); + } +}; + +} // namespace Vulkan + +namespace std { + +template <> +struct hash { + std::size_t operator()(const Vulkan::FramebufferCacheKey& k) const noexcept { + return k.Hash(); + } +}; + +} // namespace std + +namespace Vulkan { + +class BufferBindings; + +struct ImageView { + View view; + vk::ImageLayout* layout = nullptr; +}; + +class RasterizerVulkan : public VideoCore::RasterizerAccelerated { +public: + explicit RasterizerVulkan(Core::System& system, Core::Frontend::EmuWindow& render_window, + VKScreenInfo& screen_info, const VKDevice& device, + VKResourceManager& resource_manager, VKMemoryManager& memory_manager, + VKScheduler& scheduler); + ~RasterizerVulkan() override; + + bool DrawBatch(bool is_indexed) override; + bool DrawMultiBatch(bool is_indexed) override; + void Clear() override; + void DispatchCompute(GPUVAddr code_addr) override; + void FlushAll() override; + void FlushRegion(CacheAddr addr, u64 size) override; + void InvalidateRegion(CacheAddr addr, u64 size) override; + void FlushAndInvalidateRegion(CacheAddr addr, u64 size) override; + void FlushCommands() override; + void TickFrame() override; + bool AccelerateSurfaceCopy(const Tegra::Engines::Fermi2D::Regs::Surface& src, + const Tegra::Engines::Fermi2D::Regs::Surface& dst, + const Tegra::Engines::Fermi2D::Config& copy_config) override; + bool AccelerateDisplay(const Tegra::FramebufferConfig& config, VAddr framebuffer_addr, + u32 pixel_stride) override; + + /// Maximum supported size that a constbuffer can have in bytes. + static constexpr std::size_t MaxConstbufferSize = 0x10000; + static_assert(MaxConstbufferSize % (4 * sizeof(float)) == 0, + "The maximum size of a constbuffer must be a multiple of the size of GLvec4"); + +private: + struct DrawParameters { + void Draw(vk::CommandBuffer cmdbuf, const vk::DispatchLoaderDynamic& dld) const; + + u32 base_instance = 0; + u32 num_instances = 0; + u32 base_vertex = 0; + u32 num_vertices = 0; + bool is_indexed = 0; + }; + + using Texceptions = std::bitset; + + static constexpr std::size_t ZETA_TEXCEPTION_INDEX = 8; + + void Draw(bool is_indexed, bool is_instanced); + + void FlushWork(); + + Texceptions UpdateAttachments(); + + std::tuple ConfigureFramebuffers(vk::RenderPass renderpass); + + /// Setups geometry buffers and state. + DrawParameters SetupGeometry(FixedPipelineState& fixed_state, BufferBindings& buffer_bindings, + bool is_indexed, bool is_instanced); + + /// Setup descriptors in the graphics pipeline. + void SetupShaderDescriptors(const std::array& shaders); + + void SetupImageTransitions(Texceptions texceptions, + const std::array& color_attachments, + const View& zeta_attachment); + + void UpdateDynamicStates(); + + bool WalkAttachmentOverlaps(const CachedSurfaceView& attachment); + + void SetupVertexArrays(FixedPipelineState::VertexInput& vertex_input, + BufferBindings& buffer_bindings); + + void SetupIndexBuffer(BufferBindings& buffer_bindings, DrawParameters& params, bool is_indexed); + + /// Setup constant buffers in the graphics pipeline. + void SetupGraphicsConstBuffers(const ShaderEntries& entries, std::size_t stage); + + /// Setup global buffers in the graphics pipeline. + void SetupGraphicsGlobalBuffers(const ShaderEntries& entries, std::size_t stage); + + /// Setup texel buffers in the graphics pipeline. + void SetupGraphicsTexelBuffers(const ShaderEntries& entries, std::size_t stage); + + /// Setup textures in the graphics pipeline. + void SetupGraphicsTextures(const ShaderEntries& entries, std::size_t stage); + + /// Setup images in the graphics pipeline. + void SetupGraphicsImages(const ShaderEntries& entries, std::size_t stage); + + /// Setup constant buffers in the compute pipeline. + void SetupComputeConstBuffers(const ShaderEntries& entries); + + /// Setup global buffers in the compute pipeline. + void SetupComputeGlobalBuffers(const ShaderEntries& entries); + + /// Setup texel buffers in the compute pipeline. + void SetupComputeTexelBuffers(const ShaderEntries& entries); + + /// Setup textures in the compute pipeline. + void SetupComputeTextures(const ShaderEntries& entries); + + /// Setup images in the compute pipeline. + void SetupComputeImages(const ShaderEntries& entries); + + void SetupConstBuffer(const ConstBufferEntry& entry, + const Tegra::Engines::ConstBufferInfo& buffer); + + void SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAddr address); + + void SetupTexelBuffer(const Tegra::Texture::TICEntry& image, const TexelBufferEntry& entry); + + void SetupTexture(const Tegra::Texture::FullTextureInfo& texture, const SamplerEntry& entry); + + void SetupImage(const Tegra::Texture::TICEntry& tic, const ImageEntry& entry); + + void UpdateViewportsState(Tegra::Engines::Maxwell3D& gpu); + void UpdateScissorsState(Tegra::Engines::Maxwell3D& gpu); + void UpdateDepthBias(Tegra::Engines::Maxwell3D& gpu); + void UpdateBlendConstants(Tegra::Engines::Maxwell3D& gpu); + void UpdateDepthBounds(Tegra::Engines::Maxwell3D& gpu); + void UpdateStencilFaces(Tegra::Engines::Maxwell3D& gpu); + + std::size_t CalculateGraphicsStreamBufferSize(bool is_indexed) const; + + std::size_t CalculateComputeStreamBufferSize() const; + + std::size_t CalculateVertexArraysSize() const; + + std::size_t CalculateIndexBufferSize() const; + + std::size_t CalculateConstBufferSize(const ConstBufferEntry& entry, + const Tegra::Engines::ConstBufferInfo& buffer) const; + + RenderPassParams GetRenderPassParams(Texceptions texceptions) const; + + Core::System& system; + Core::Frontend::EmuWindow& render_window; + VKScreenInfo& screen_info; + const VKDevice& device; + VKResourceManager& resource_manager; + VKMemoryManager& memory_manager; + VKScheduler& scheduler; + + VKStagingBufferPool staging_pool; + VKDescriptorPool descriptor_pool; + VKUpdateDescriptorQueue update_descriptor_queue; + QuadArrayPass quad_array_pass; + Uint8Pass uint8_pass; + + VKTextureCache texture_cache; + VKPipelineCache pipeline_cache; + VKBufferCache buffer_cache; + VKSamplerCache sampler_cache; + + std::array color_attachments; + View zeta_attachment; + + std::vector sampled_views; + std::vector image_views; + + u32 draw_counter = 0; + + // TODO(Rodrigo): Invalidate on image destruction + std::unordered_map framebuffer_cache; +}; } // namespace Vulkan From 09b1d762d7a323fd41363d551cd0477550fec83d Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 17 Jan 2020 21:40:01 -0300 Subject: [PATCH 3/3] vk_rasterizer: Address feedback --- .../renderer_vulkan/vk_rasterizer.cpp | 50 +++++++++++-------- .../renderer_vulkan/vk_rasterizer.h | 7 +-- 2 files changed, 32 insertions(+), 25 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 23252e658..d2c6b1189 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -203,6 +203,8 @@ public: return BindStatic<30>(scheduler); case 31: return BindStatic<31>(scheduler); + case 32: + return BindStatic<32>(scheduler); } UNREACHABLE(); } @@ -526,7 +528,6 @@ bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config, VideoCore::Surface::PixelFormatFromGPUPixelFormat(config.pixel_format)}; ASSERT_MSG(params.width == config.width, "Framebuffer width is different"); ASSERT_MSG(params.height == config.height, "Framebuffer height is different"); - // ASSERT_MSG(params.pixel_format == pixel_format, "Framebuffer pixel_format is different"); screen_info.image = &surface->GetImage(); screen_info.width = params.width; @@ -536,17 +537,24 @@ bool RasterizerVulkan::AccelerateDisplay(const Tegra::FramebufferConfig& config, } void RasterizerVulkan::FlushWork() { + static constexpr u32 DRAWS_TO_DISPATCH = 4096; + + // Only check multiples of 8 draws + static_assert(DRAWS_TO_DISPATCH % 8 == 0); if ((++draw_counter & 7) != 7) { return; } - if (draw_counter < 4096) { - // Flush work to the worker thread every 8 draws + + if (draw_counter < DRAWS_TO_DISPATCH) { + // Send recorded tasks to the worker thread scheduler.DispatchWork(); - } else { - // Flush work to the GPU (and implicitly the worker thread) every N draws - scheduler.Flush(); - draw_counter = 0; + return; } + + // Otherwise (every certain number of draws) flush execution. + // This submits commands to the Vulkan driver. + scheduler.Flush(); + draw_counter = 0; } RasterizerVulkan::Texceptions RasterizerVulkan::UpdateAttachments() { @@ -593,18 +601,16 @@ bool RasterizerVulkan::WalkAttachmentOverlaps(const CachedSurfaceView& attachmen std::tuple RasterizerVulkan::ConfigureFramebuffers( vk::RenderPass renderpass) { - FramebufferCacheKey fbkey; - fbkey.renderpass = renderpass; - fbkey.width = std::numeric_limits::max(); - fbkey.height = std::numeric_limits::max(); + FramebufferCacheKey key{renderpass, std::numeric_limits::max(), + std::numeric_limits::max()}; const auto MarkAsModifiedAndPush = [&](const View& view) { if (view == nullptr) { return false; } - fbkey.views.push_back(view->GetHandle()); - fbkey.width = std::min(fbkey.width, view->GetWidth()); - fbkey.height = std::min(fbkey.height, view->GetHeight()); + key.views.push_back(view->GetHandle()); + key.width = std::min(key.width, view->GetWidth()); + key.height = std::min(key.height, view->GetHeight()); return true; }; @@ -617,18 +623,18 @@ std::tuple RasterizerVulkan::ConfigureFramebuffer texture_cache.MarkDepthBufferInUse(); } - const auto [fbentry, is_cache_miss] = framebuffer_cache.try_emplace(fbkey); + const auto [fbentry, is_cache_miss] = framebuffer_cache.try_emplace(key); auto& framebuffer = fbentry->second; if (is_cache_miss) { - const vk::FramebufferCreateInfo framebuffer_ci( - {}, fbkey.renderpass, static_cast(fbkey.views.size()), fbkey.views.data(), - fbkey.width, fbkey.height, 1); + const vk::FramebufferCreateInfo framebuffer_ci({}, key.renderpass, + static_cast(key.views.size()), + key.views.data(), key.width, key.height, 1); const auto dev = device.GetLogical(); const auto& dld = device.GetDispatchLoader(); framebuffer = dev.createFramebufferUnique(framebuffer_ci, nullptr, dld); } - return {*framebuffer, vk::Extent2D{fbkey.width, fbkey.height}}; + return {*framebuffer, vk::Extent2D{key.width, key.height}}; } RasterizerVulkan::DrawParameters RasterizerVulkan::SetupGeometry(FixedPipelineState& fixed_state, @@ -771,8 +777,8 @@ void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawPar if (!is_indexed) { break; } - auto [buffer, offset] = - buffer_cache.UploadMemory(regs.index_array.IndexStart(), CalculateIndexBufferSize()); + const GPUVAddr gpu_addr = regs.index_array.IndexStart(); + auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, CalculateIndexBufferSize()); auto format = regs.index_array.format; const bool is_uint8 = format == Maxwell::IndexFormat::UnsignedByte; @@ -918,7 +924,7 @@ void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAdd void RasterizerVulkan::SetupTexelBuffer(const Tegra::Texture::TICEntry& tic, const TexelBufferEntry& entry) { - auto view = texture_cache.GetTextureSurface(tic, entry); + const auto view = texture_cache.GetTextureSurface(tic, entry); ASSERT(view->IsBufferView()); update_descriptor_queue.AddTexelBuffer(view->GetBufferView()); diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 2ecc19e7a..7be71e734 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -11,6 +11,7 @@ #include #include +#include #include "common/common_types.h" #include "video_core/memory_manager.h" @@ -51,10 +52,10 @@ using ImageViewsPack = boost::container::static_vector; struct FramebufferCacheKey { - vk::RenderPass renderpass; + vk::RenderPass renderpass{}; + u32 width = 0; + u32 height = 0; ImageViewsPack views; - u32 width; - u32 height; std::size_t Hash() const noexcept { std::size_t hash = 0;