diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index be1c31978..a7f256ff9 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -140,6 +140,12 @@ void FixedPipelineState::BlendingAttachment::Fill(const Maxwell& regs, std::size enable.Assign(1); } +void FixedPipelineState::Fill(const Maxwell& regs) { + rasterizer.Fill(regs); + depth_stencil.Fill(regs); + color_blending.Fill(regs); +} + std::size_t FixedPipelineState::Hash() const noexcept { const u64 hash = Common::CityHash64(reinterpret_cast(this), sizeof *this); return static_cast(hash); @@ -149,15 +155,6 @@ bool FixedPipelineState::operator==(const FixedPipelineState& rhs) const noexcep return std::memcmp(this, &rhs, sizeof *this) == 0; } -FixedPipelineState GetFixedPipelineState(const Maxwell& regs) { - FixedPipelineState fixed_state; - fixed_state.rasterizer.Fill(regs); - fixed_state.depth_stencil.Fill(regs); - fixed_state.color_blending.Fill(regs); - fixed_state.padding = {}; - return fixed_state; -} - u32 FixedPipelineState::PackComparisonOp(Maxwell::ComparisonOp op) noexcept { // OpenGL enums go from 0x200 to 0x207 and the others from 1 to 8 // If we substract 0x200 to OpenGL enums and 1 to the others we get a 0-7 range. diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h index 9a950f4de..77188b862 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h @@ -17,7 +17,7 @@ namespace Vulkan { using Maxwell = Tegra::Engines::Maxwell3D::Regs; -struct alignas(32) FixedPipelineState { +struct FixedPipelineState { static u32 PackComparisonOp(Maxwell::ComparisonOp op) noexcept; static Maxwell::ComparisonOp UnpackComparisonOp(u32 packed) noexcept; @@ -237,7 +237,8 @@ struct alignas(32) FixedPipelineState { Rasterizer rasterizer; DepthStencil depth_stencil; ColorBlending color_blending; - std::array padding; + + void Fill(const Maxwell& regs); std::size_t Hash() const noexcept; @@ -250,9 +251,6 @@ struct alignas(32) FixedPipelineState { static_assert(std::has_unique_object_representations_v); static_assert(std::is_trivially_copyable_v); static_assert(std::is_trivially_constructible_v); -static_assert(sizeof(FixedPipelineState) % 32 == 0, "Size is not aligned"); - -FixedPipelineState GetFixedPipelineState(const Maxwell& regs); } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 8332b42aa..45bd1fc6c 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -288,7 +288,7 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa depth_stencil_ci.maxDepthBounds = 0.0f; std::array cb_attachments; - const std::size_t num_attachments = renderpass_params.color_attachments.size(); + const auto num_attachments = static_cast(renderpass_params.num_color_attachments); for (std::size_t index = 0; index < num_attachments; ++index) { static constexpr std::array COMPONENT_TABLE = { VK_COLOR_COMPONENT_R_BIT, VK_COLOR_COMPONENT_G_BIT, VK_COLOR_COMPONENT_B_BIT, diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 91b1b16a5..e6d4adc92 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -161,6 +161,24 @@ u32 FillDescriptorLayout(const ShaderEntries& entries, } // Anonymous namespace +std::size_t GraphicsPipelineCacheKey::Hash() const noexcept { + const u64 hash = Common::CityHash64(reinterpret_cast(this), sizeof *this); + return static_cast(hash); +} + +bool GraphicsPipelineCacheKey::operator==(const GraphicsPipelineCacheKey& rhs) const noexcept { + return std::memcmp(&rhs, this, sizeof *this) == 0; +} + +std::size_t ComputePipelineCacheKey::Hash() const noexcept { + const u64 hash = Common::CityHash64(reinterpret_cast(this), sizeof *this); + return static_cast(hash); +} + +bool ComputePipelineCacheKey::operator==(const ComputePipelineCacheKey& rhs) const noexcept { + return std::memcmp(&rhs, this, sizeof *this) == 0; +} + CachedShader::CachedShader(Core::System& system, Tegra::Engines::ShaderType stage, GPUVAddr gpu_addr, VAddr cpu_addr, ProgramCode program_code, u32 main_offset) diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.h b/src/video_core/renderer_vulkan/vk_pipeline_cache.h index 602a0a340..84d26b822 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.h +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.h @@ -7,7 +7,6 @@ #include #include #include -#include #include #include #include @@ -51,42 +50,38 @@ using ProgramCode = std::vector; struct GraphicsPipelineCacheKey { FixedPipelineState fixed_state; - std::array shaders; RenderPassParams renderpass_params; + std::array shaders; + u64 padding; // This is necessary for unique object representations - std::size_t Hash() const noexcept { - std::size_t hash = fixed_state.Hash(); - for (const auto& shader : shaders) { - boost::hash_combine(hash, shader); - } - boost::hash_combine(hash, renderpass_params.Hash()); - return hash; - } + std::size_t Hash() const noexcept; - bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept { - return std::tie(fixed_state, shaders, renderpass_params) == - std::tie(rhs.fixed_state, rhs.shaders, rhs.renderpass_params); + bool operator==(const GraphicsPipelineCacheKey& rhs) const noexcept; + + bool operator!=(const GraphicsPipelineCacheKey& rhs) const noexcept { + return !operator==(rhs); } }; +static_assert(std::has_unique_object_representations_v); +static_assert(std::is_trivially_copyable_v); +static_assert(std::is_trivially_constructible_v); struct ComputePipelineCacheKey { - GPUVAddr shader{}; - u32 shared_memory_size{}; - std::array workgroup_size{}; + GPUVAddr shader; + u32 shared_memory_size; + std::array workgroup_size; - std::size_t Hash() const noexcept { - return static_cast(shader) ^ - ((static_cast(shared_memory_size) >> 7) << 40) ^ - static_cast(workgroup_size[0]) ^ - (static_cast(workgroup_size[1]) << 16) ^ - (static_cast(workgroup_size[2]) << 24); - } + std::size_t Hash() const noexcept; - bool operator==(const ComputePipelineCacheKey& rhs) const noexcept { - return std::tie(shader, shared_memory_size, workgroup_size) == - std::tie(rhs.shader, rhs.shared_memory_size, rhs.workgroup_size); + bool operator==(const ComputePipelineCacheKey& rhs) const noexcept; + + bool operator!=(const ComputePipelineCacheKey& rhs) const noexcept { + return !operator==(rhs); } }; +static_assert(std::has_unique_object_representations_v); +static_assert(std::is_trivially_copyable_v); +static_assert(std::is_trivially_constructible_v); } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 68464e637..c821b1229 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -316,7 +316,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { query_cache.UpdateCounters(); const auto& gpu = system.GPU().Maxwell3D(); - GraphicsPipelineCacheKey key{GetFixedPipelineState(gpu.regs)}; + GraphicsPipelineCacheKey key; + key.fixed_state.Fill(gpu.regs); buffer_cache.Map(CalculateGraphicsStreamBufferSize(is_indexed)); @@ -334,10 +335,11 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { buffer_cache.Unmap(); - const auto texceptions = UpdateAttachments(); + const Texceptions texceptions = UpdateAttachments(); SetupImageTransitions(texceptions, color_attachments, zeta_attachment); key.renderpass_params = GetRenderPassParams(texceptions); + key.padding = 0; auto& pipeline = pipeline_cache.GetGraphicsPipeline(key); scheduler.BindGraphicsPipeline(pipeline.GetHandle()); @@ -453,10 +455,12 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) { query_cache.UpdateCounters(); const auto& launch_desc = system.GPU().KeplerCompute().launch_description; - const ComputePipelineCacheKey key{ - code_addr, - launch_desc.shared_alloc, - {launch_desc.block_dim_x, launch_desc.block_dim_y, launch_desc.block_dim_z}}; + ComputePipelineCacheKey key; + key.shader = code_addr; + key.shared_memory_size = launch_desc.shared_alloc; + key.workgroup_size = {launch_desc.block_dim_x, launch_desc.block_dim_y, + launch_desc.block_dim_z}; + auto& pipeline = pipeline_cache.GetComputePipeline(key); // Compute dispatches can't be executed inside a renderpass @@ -688,7 +692,7 @@ std::tuple RasterizerVulkan::ConfigureFramebuffers( FramebufferCacheKey key{renderpass, std::numeric_limits::max(), std::numeric_limits::max(), std::numeric_limits::max()}; - const auto try_push = [&](const View& view) { + const auto try_push = [&key](const View& view) { if (!view) { return false; } @@ -699,7 +703,9 @@ std::tuple RasterizerVulkan::ConfigureFramebuffers( return true; }; - for (std::size_t index = 0; index < std::size(color_attachments); ++index) { + const auto& regs = system.GPU().Maxwell3D().regs; + const std::size_t num_attachments = static_cast(regs.rt_control.count); + for (std::size_t index = 0; index < num_attachments; ++index) { if (try_push(color_attachments[index])) { texture_cache.MarkColorBufferInUse(index); } @@ -1250,28 +1256,29 @@ std::size_t RasterizerVulkan::CalculateConstBufferSize( } RenderPassParams RasterizerVulkan::GetRenderPassParams(Texceptions texceptions) const { - using namespace VideoCore::Surface; - const auto& regs = system.GPU().Maxwell3D().regs; - RenderPassParams renderpass_params; + const std::size_t num_attachments = static_cast(regs.rt_control.count); - for (std::size_t rt = 0; rt < static_cast(regs.rt_control.count); ++rt) { + RenderPassParams params; + params.color_formats = {}; + std::size_t color_texceptions = 0; + + std::size_t index = 0; + for (std::size_t rt = 0; rt < num_attachments; ++rt) { const auto& rendertarget = regs.rt[rt]; if (rendertarget.Address() == 0 || rendertarget.format == Tegra::RenderTargetFormat::NONE) { continue; } - renderpass_params.color_attachments.push_back(RenderPassParams::ColorAttachment{ - static_cast(rt), PixelFormatFromRenderTargetFormat(rendertarget.format), - texceptions[rt]}); + params.color_formats[index] = static_cast(rendertarget.format); + color_texceptions |= (texceptions[rt] ? 1ULL : 0ULL) << index; + ++index; } + params.num_color_attachments = static_cast(index); + params.texceptions = static_cast(color_texceptions); - renderpass_params.has_zeta = regs.zeta_enable; - if (renderpass_params.has_zeta) { - renderpass_params.zeta_pixel_format = PixelFormatFromDepthFormat(regs.zeta.format); - renderpass_params.zeta_texception = texceptions[ZETA_TEXCEPTION_INDEX]; - } - - return renderpass_params; + params.zeta_format = regs.zeta_enable ? static_cast(regs.zeta.format) : 0; + params.zeta_texception = texceptions[ZETA_TEXCEPTION_INDEX]; + return params; } VkBuffer RasterizerVulkan::DefaultBuffer() { diff --git a/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp b/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp index 4e5286a69..3f71d005e 100644 --- a/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_renderpass_cache.cpp @@ -2,9 +2,11 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include #include #include +#include "common/cityhash.h" #include "video_core/engines/maxwell_3d.h" #include "video_core/renderer_vulkan/maxwell_to_vk.h" #include "video_core/renderer_vulkan/vk_device.h" @@ -13,6 +15,15 @@ namespace Vulkan { +std::size_t RenderPassParams::Hash() const noexcept { + const u64 hash = Common::CityHash64(reinterpret_cast(this), sizeof *this); + return static_cast(hash); +} + +bool RenderPassParams::operator==(const RenderPassParams& rhs) const noexcept { + return std::memcmp(&rhs, this, sizeof *this) == 0; +} + VKRenderPassCache::VKRenderPassCache(const VKDevice& device) : device{device} {} VKRenderPassCache::~VKRenderPassCache() = default; @@ -27,20 +38,22 @@ VkRenderPass VKRenderPassCache::GetRenderPass(const RenderPassParams& params) { } vk::RenderPass VKRenderPassCache::CreateRenderPass(const RenderPassParams& params) const { + using namespace VideoCore::Surface; std::vector descriptors; std::vector color_references; - for (std::size_t rt = 0; rt < params.color_attachments.size(); ++rt) { - const auto attachment = params.color_attachments[rt]; - const auto format = - MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, attachment.pixel_format); + const std::size_t num_attachments = static_cast(params.num_color_attachments); + for (std::size_t rt = 0; rt < num_attachments; ++rt) { + const auto guest_format = static_cast(params.color_formats[rt]); + const PixelFormat pixel_format = PixelFormatFromRenderTargetFormat(guest_format); + const auto format = MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, pixel_format); ASSERT_MSG(format.attachable, "Trying to attach a non-attachable format with format={}", - static_cast(attachment.pixel_format)); + static_cast(pixel_format)); - // TODO(Rodrigo): Add eMayAlias when it's needed. - const auto color_layout = attachment.is_texception - ? VK_IMAGE_LAYOUT_GENERAL - : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; + // TODO(Rodrigo): Add MAY_ALIAS_BIT when it's needed. + const VkImageLayout color_layout = ((params.texceptions >> rt) & 1) != 0 + ? VK_IMAGE_LAYOUT_GENERAL + : VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; VkAttachmentDescription& descriptor = descriptors.emplace_back(); descriptor.flags = VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT; descriptor.format = format.format; @@ -58,15 +71,17 @@ vk::RenderPass VKRenderPassCache::CreateRenderPass(const RenderPassParams& param } VkAttachmentReference zeta_attachment_ref; - if (params.has_zeta) { - const auto format = - MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, params.zeta_pixel_format); + const bool has_zeta = params.zeta_format != 0; + if (has_zeta) { + const auto guest_format = static_cast(params.zeta_format); + const PixelFormat pixel_format = PixelFormatFromDepthFormat(guest_format); + const auto format = MaxwellToVK::SurfaceFormat(device, FormatType::Optimal, pixel_format); ASSERT_MSG(format.attachable, "Trying to attach a non-attachable format with format={}", - static_cast(params.zeta_pixel_format)); + static_cast(pixel_format)); - const auto zeta_layout = params.zeta_texception - ? VK_IMAGE_LAYOUT_GENERAL - : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; + const VkImageLayout zeta_layout = params.zeta_texception != 0 + ? VK_IMAGE_LAYOUT_GENERAL + : VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; VkAttachmentDescription& descriptor = descriptors.emplace_back(); descriptor.flags = 0; descriptor.format = format.format; @@ -78,7 +93,7 @@ vk::RenderPass VKRenderPassCache::CreateRenderPass(const RenderPassParams& param descriptor.initialLayout = zeta_layout; descriptor.finalLayout = zeta_layout; - zeta_attachment_ref.attachment = static_cast(params.color_attachments.size()); + zeta_attachment_ref.attachment = static_cast(num_attachments); zeta_attachment_ref.layout = zeta_layout; } @@ -90,7 +105,7 @@ vk::RenderPass VKRenderPassCache::CreateRenderPass(const RenderPassParams& param subpass_description.colorAttachmentCount = static_cast(color_references.size()); subpass_description.pColorAttachments = color_references.data(); subpass_description.pResolveAttachments = nullptr; - subpass_description.pDepthStencilAttachment = params.has_zeta ? &zeta_attachment_ref : nullptr; + subpass_description.pDepthStencilAttachment = has_zeta ? &zeta_attachment_ref : nullptr; subpass_description.preserveAttachmentCount = 0; subpass_description.pPreserveAttachments = nullptr; @@ -101,7 +116,7 @@ vk::RenderPass VKRenderPassCache::CreateRenderPass(const RenderPassParams& param stage |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; } - if (params.has_zeta) { + if (has_zeta) { access |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; stage |= VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; diff --git a/src/video_core/renderer_vulkan/vk_renderpass_cache.h b/src/video_core/renderer_vulkan/vk_renderpass_cache.h index 921b6efb5..8b0fec720 100644 --- a/src/video_core/renderer_vulkan/vk_renderpass_cache.h +++ b/src/video_core/renderer_vulkan/vk_renderpass_cache.h @@ -4,8 +4,7 @@ #pragma once -#include -#include +#include #include #include @@ -19,51 +18,25 @@ namespace Vulkan { class VKDevice; -// TODO(Rodrigo): Optimize this structure for faster hashing - struct RenderPassParams { - struct ColorAttachment { - u32 index = 0; - VideoCore::Surface::PixelFormat pixel_format = VideoCore::Surface::PixelFormat::Invalid; - bool is_texception = false; + std::array color_formats; + u8 num_color_attachments; + u8 texceptions; - std::size_t Hash() const noexcept { - return static_cast(pixel_format) | - static_cast(is_texception) << 6 | - static_cast(index) << 7; - } + u8 zeta_format; + u8 zeta_texception; - bool operator==(const ColorAttachment& rhs) const noexcept { - return std::tie(index, pixel_format, is_texception) == - std::tie(rhs.index, rhs.pixel_format, rhs.is_texception); - } - }; + std::size_t Hash() const noexcept; - boost::container::static_vector - color_attachments{}; - // TODO(Rodrigo): Unify has_zeta into zeta_pixel_format and zeta_component_type. - VideoCore::Surface::PixelFormat zeta_pixel_format = VideoCore::Surface::PixelFormat::Invalid; - bool has_zeta = false; - bool zeta_texception = false; + bool operator==(const RenderPassParams& rhs) const noexcept; - std::size_t Hash() const noexcept { - std::size_t hash = 0; - for (const auto& rt : color_attachments) { - boost::hash_combine(hash, rt.Hash()); - } - boost::hash_combine(hash, zeta_pixel_format); - boost::hash_combine(hash, has_zeta); - boost::hash_combine(hash, zeta_texception); - return hash; - } - - bool operator==(const RenderPassParams& rhs) const { - return std::tie(color_attachments, zeta_pixel_format, has_zeta, zeta_texception) == - std::tie(rhs.color_attachments, rhs.zeta_pixel_format, rhs.has_zeta, - rhs.zeta_texception); + bool operator!=(const RenderPassParams& rhs) const noexcept { + return !operator==(rhs); } }; +static_assert(std::has_unique_object_representations_v); +static_assert(std::is_trivially_copyable_v); +static_assert(std::is_trivially_constructible_v); } // namespace Vulkan