From 70353649d77979e8cbe353669061af0c04470126 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Tue, 19 Jan 2021 02:04:27 -0300 Subject: [PATCH] fixed_pipeline_cache: Use dirty flags to lazily update key Use dirty flags to avoid building pipeline key from scratch on each draw call. This saves a bit of unnecesary work on each draw call. --- .../renderer_vulkan/fixed_pipeline_state.cpp | 83 +++++++++++-------- .../renderer_vulkan/fixed_pipeline_state.h | 18 ++-- .../renderer_vulkan/vk_graphics_pipeline.cpp | 3 - .../renderer_vulkan/vk_rasterizer.cpp | 13 +-- .../renderer_vulkan/vk_rasterizer.h | 3 + .../renderer_vulkan/vk_state_tracker.cpp | 34 +++++++- .../renderer_vulkan/vk_state_tracker.h | 5 ++ 7 files changed, 103 insertions(+), 56 deletions(-) diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index 5be6dabd9..362278f01 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -12,14 +12,15 @@ #include "common/cityhash.h" #include "common/common_types.h" #include "video_core/renderer_vulkan/fixed_pipeline_state.h" +#include "video_core/renderer_vulkan/vk_state_tracker.h" namespace Vulkan { namespace { -constexpr std::size_t POINT = 0; -constexpr std::size_t LINE = 1; -constexpr std::size_t POLYGON = 2; +constexpr size_t POINT = 0; +constexpr size_t LINE = 1; +constexpr size_t POLYGON = 2; constexpr std::array POLYGON_OFFSET_ENABLE_LUT = { POINT, // Points LINE, // Lines @@ -40,10 +41,14 @@ constexpr std::array POLYGON_OFFSET_ENABLE_LUT = { } // Anonymous namespace -void FixedPipelineState::Fill(const Maxwell& regs, bool has_extended_dynamic_state) { - const std::array enabled_lut = {regs.polygon_offset_point_enable, - regs.polygon_offset_line_enable, - regs.polygon_offset_fill_enable}; +void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d, + bool has_extended_dynamic_state) { + const Maxwell& regs = maxwell3d.regs; + const std::array enabled_lut{ + regs.polygon_offset_point_enable, + regs.polygon_offset_line_enable, + regs.polygon_offset_fill_enable, + }; const u32 topology_index = static_cast(regs.draw.topology.Value()); raw1 = 0; @@ -64,45 +69,53 @@ void FixedPipelineState::Fill(const Maxwell& regs, bool has_extended_dynamic_sta raw2 = 0; const auto test_func = - regs.alpha_test_enabled == 1 ? regs.alpha_test_func : Maxwell::ComparisonOp::Always; + regs.alpha_test_enabled != 0 ? regs.alpha_test_func : Maxwell::ComparisonOp::Always; alpha_test_func.Assign(PackComparisonOp(test_func)); early_z.Assign(regs.force_early_fragment_tests != 0 ? 1 : 0); alpha_test_ref = Common::BitCast(regs.alpha_test_ref); point_size = Common::BitCast(regs.point_size); - for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { - binding_divisors[index] = - regs.instanced_arrays.IsInstancingEnabled(index) ? regs.vertex_array[index].divisor : 0; + if (maxwell3d.dirty.flags[Dirty::InstanceDivisors]) { + maxwell3d.dirty.flags[Dirty::InstanceDivisors] = false; + for (size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { + const bool is_enabled = regs.instanced_arrays.IsInstancingEnabled(index); + binding_divisors[index] = is_enabled ? regs.vertex_array[index].divisor : 0; + } } - - for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { - const auto& input = regs.vertex_attrib_format[index]; - auto& attribute = attributes[index]; - attribute.raw = 0; - attribute.enabled.Assign(input.IsConstant() ? 0 : 1); - attribute.buffer.Assign(input.buffer); - attribute.offset.Assign(input.offset); - attribute.type.Assign(static_cast(input.type.Value())); - attribute.size.Assign(static_cast(input.size.Value())); - attribute.binding_index_enabled.Assign(regs.vertex_array[index].IsEnabled() ? 1 : 0); + if (maxwell3d.dirty.flags[Dirty::VertexAttributes]) { + maxwell3d.dirty.flags[Dirty::VertexAttributes] = false; + for (size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { + const auto& input = regs.vertex_attrib_format[index]; + auto& attribute = attributes[index]; + attribute.raw = 0; + attribute.enabled.Assign(input.IsConstant() ? 0 : 1); + attribute.buffer.Assign(input.buffer); + attribute.offset.Assign(input.offset); + attribute.type.Assign(static_cast(input.type.Value())); + attribute.size.Assign(static_cast(input.size.Value())); + } } - - for (std::size_t index = 0; index < std::size(attachments); ++index) { - attachments[index].Fill(regs, index); + if (maxwell3d.dirty.flags[Dirty::Blending]) { + maxwell3d.dirty.flags[Dirty::Blending] = false; + for (size_t index = 0; index < attachments.size(); ++index) { + attachments[index].Refresh(regs, index); + } + } + if (maxwell3d.dirty.flags[Dirty::ViewportSwizzles]) { + maxwell3d.dirty.flags[Dirty::ViewportSwizzles] = false; + const auto& transform = regs.viewport_transform; + std::ranges::transform(transform, viewport_swizzles.begin(), [](const auto& viewport) { + return static_cast(viewport.swizzle.raw); + }); } - - const auto& transform = regs.viewport_transform; - std::transform(transform.begin(), transform.end(), viewport_swizzles.begin(), - [](const auto& viewport) { return static_cast(viewport.swizzle.raw); }); - if (!has_extended_dynamic_state) { no_extended_dynamic_state.Assign(1); - dynamic_state.Fill(regs); + dynamic_state.Refresh(regs); } } -void FixedPipelineState::BlendingAttachment::Fill(const Maxwell& regs, std::size_t index) { +void FixedPipelineState::BlendingAttachment::Refresh(const Maxwell& regs, size_t index) { const auto& mask = regs.color_mask[regs.color_mask_common ? 0 : index]; raw = 0; @@ -141,7 +154,7 @@ void FixedPipelineState::BlendingAttachment::Fill(const Maxwell& regs, std::size enable.Assign(1); } -void FixedPipelineState::DynamicState::Fill(const Maxwell& regs) { +void FixedPipelineState::DynamicState::Refresh(const Maxwell& regs) { u32 packed_front_face = PackFrontFace(regs.front_face); if (regs.screen_y_control.triangle_rast_flip != 0) { // Flip front face @@ -178,9 +191,9 @@ void FixedPipelineState::DynamicState::Fill(const Maxwell& regs) { }); } -std::size_t FixedPipelineState::Hash() const noexcept { +size_t FixedPipelineState::Hash() const noexcept { const u64 hash = Common::CityHash64(reinterpret_cast(this), Size()); - return static_cast(hash); + return static_cast(hash); } bool FixedPipelineState::operator==(const FixedPipelineState& rhs) const noexcept { diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h index 465a55fdb..a0eb83a68 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h @@ -58,7 +58,7 @@ struct FixedPipelineState { BitField<30, 1, u32> enable; }; - void Fill(const Maxwell& regs, std::size_t index); + void Refresh(const Maxwell& regs, size_t index); constexpr std::array Mask() const noexcept { return {mask_r != 0, mask_g != 0, mask_b != 0, mask_a != 0}; @@ -96,8 +96,6 @@ struct FixedPipelineState { BitField<6, 14, u32> offset; BitField<20, 3, u32> type; BitField<23, 6, u32> size; - // Not really an element of a vertex attribute, but it can be packed here - BitField<29, 1, u32> binding_index_enabled; constexpr Maxwell::VertexAttribute::Type Type() const noexcept { return static_cast(type.Value()); @@ -108,7 +106,7 @@ struct FixedPipelineState { } }; - template + template union StencilFace { BitField action_stencil_fail; BitField action_depth_fail; @@ -152,7 +150,7 @@ struct FixedPipelineState { // Vertex stride is a 12 bits value, we have 4 bits to spare per element std::array vertex_strides; - void Fill(const Maxwell& regs); + void Refresh(const Maxwell& regs); Maxwell::ComparisonOp DepthTestFunc() const noexcept { return UnpackComparisonOp(depth_test_func); @@ -199,9 +197,9 @@ struct FixedPipelineState { std::array viewport_swizzles; DynamicState dynamic_state; - void Fill(const Maxwell& regs, bool has_extended_dynamic_state); + void Refresh(Tegra::Engines::Maxwell3D& maxwell3d, bool has_extended_dynamic_state); - std::size_t Hash() const noexcept; + size_t Hash() const noexcept; bool operator==(const FixedPipelineState& rhs) const noexcept; @@ -209,8 +207,8 @@ struct FixedPipelineState { return !operator==(rhs); } - std::size_t Size() const noexcept { - const std::size_t total_size = sizeof *this; + size_t Size() const noexcept { + const size_t total_size = sizeof *this; return total_size - (no_extended_dynamic_state != 0 ? 0 : sizeof(DynamicState)); } }; @@ -224,7 +222,7 @@ namespace std { template <> struct hash { - std::size_t operator()(const Vulkan::FixedPipelineState& k) const noexcept { + size_t operator()(const Vulkan::FixedPipelineState& k) const noexcept { return k.Hash(); } }; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index d50dca604..fc6dd83eb 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -221,9 +221,6 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const SPIRVProgram& program, std::vector vertex_bindings; std::vector vertex_binding_divisors; for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { - if (state.attributes[index].binding_index_enabled == 0) { - continue; - } const bool instanced = state.binding_divisors[index] != 0; const auto rate = instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX; vertex_bindings.push_back({ diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 684d4e3a6..394a1c4e9 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -267,8 +267,7 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { query_cache.UpdateCounters(); - GraphicsPipelineCacheKey key; - key.fixed_state.Fill(maxwell3d.regs, device.IsExtExtendedDynamicStateSupported()); + graphics_key.fixed_state.Refresh(maxwell3d, device.IsExtExtendedDynamicStateSupported()); std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex}; @@ -276,14 +275,16 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) { texture_cache.UpdateRenderTargets(false); const auto shaders = pipeline_cache.GetShaders(); - key.shaders = GetShaderAddresses(shaders); + graphics_key.shaders = GetShaderAddresses(shaders); + + graphics_key.shaders = GetShaderAddresses(shaders); SetupShaderDescriptors(shaders, is_indexed); const Framebuffer* const framebuffer = texture_cache.GetFramebuffer(); - key.renderpass = framebuffer->RenderPass(); + graphics_key.renderpass = framebuffer->RenderPass(); - auto* const pipeline = - pipeline_cache.GetGraphicsPipeline(key, framebuffer->NumColorBuffers(), async_shaders); + VKGraphicsPipeline* const pipeline = pipeline_cache.GetGraphicsPipeline( + graphics_key, framebuffer->NumColorBuffers(), async_shaders); if (pipeline == nullptr || pipeline->GetHandle() == VK_NULL_HANDLE) { // Async graphics pipeline was not ready. return; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.h b/src/video_core/renderer_vulkan/vk_rasterizer.h index 7fc6741da..acea1ba2d 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.h +++ b/src/video_core/renderer_vulkan/vk_rasterizer.h @@ -20,6 +20,7 @@ #include "video_core/renderer_vulkan/vk_buffer_cache.h" #include "video_core/renderer_vulkan/vk_descriptor_pool.h" #include "video_core/renderer_vulkan/vk_fence_manager.h" +#include "video_core/renderer_vulkan/vk_graphics_pipeline.h" #include "video_core/renderer_vulkan/vk_pipeline_cache.h" #include "video_core/renderer_vulkan/vk_query_cache.h" #include "video_core/renderer_vulkan/vk_scheduler.h" @@ -173,6 +174,8 @@ private: VKUpdateDescriptorQueue update_descriptor_queue; BlitImageHelper blit_image; + GraphicsPipelineCacheKey graphics_key; + TextureCacheRuntime texture_cache_runtime; TextureCache texture_cache; BufferCacheRuntime buffer_cache_runtime; diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.cpp b/src/video_core/renderer_vulkan/vk_state_tracker.cpp index e81fad007..956f86845 100644 --- a/src/video_core/renderer_vulkan/vk_state_tracker.cpp +++ b/src/video_core/renderer_vulkan/vk_state_tracker.cpp @@ -18,9 +18,7 @@ #define NUM(field_name) (sizeof(Maxwell3D::Regs::field_name) / (sizeof(u32))) namespace Vulkan { - namespace { - using namespace Dirty; using namespace VideoCommon::Dirty; using Tegra::Engines::Maxwell3D; @@ -128,6 +126,34 @@ void SetupDirtyStencilTestEnable(Tables& tables) { tables[0][OFF(stencil_enable)] = StencilTestEnable; } +void SetupDirtyBlending(Tables& tables) { + tables[0][OFF(color_mask_common)] = Blending; + tables[0][OFF(independent_blend_enable)] = Blending; + FillBlock(tables[0], OFF(color_mask), NUM(color_mask), Blending); + FillBlock(tables[0], OFF(blend), NUM(blend), Blending); + FillBlock(tables[0], OFF(independent_blend), NUM(independent_blend), Blending); +} + +void SetupDirtyInstanceDivisors(Tables& tables) { + static constexpr size_t divisor_offset = 3; + for (size_t index = 0; index < Regs::NumVertexArrays; ++index) { + tables[0][OFF(instanced_arrays) + index] = InstanceDivisors; + tables[0][OFF(vertex_array) + index * NUM(vertex_array[0]) + divisor_offset] = + InstanceDivisors; + } +} + +void SetupDirtyVertexAttributes(Tables& tables) { + FillBlock(tables[0], OFF(vertex_attrib_format), NUM(vertex_attrib_format), VertexAttributes); +} + +void SetupDirtyViewportSwizzles(Tables& tables) { + static constexpr size_t swizzle_offset = 6; + for (size_t index = 0; index < Regs::NumViewports; ++index) { + tables[0][OFF(viewport_transform) + index * NUM(viewport_transform[0]) + swizzle_offset] = + ViewportSwizzles; + } +} } // Anonymous namespace StateTracker::StateTracker(Tegra::GPU& gpu) @@ -148,6 +174,10 @@ StateTracker::StateTracker(Tegra::GPU& gpu) SetupDirtyFrontFace(tables); SetupDirtyStencilOp(tables); SetupDirtyStencilTestEnable(tables); + SetupDirtyBlending(tables); + SetupDirtyInstanceDivisors(tables); + SetupDirtyVertexAttributes(tables); + SetupDirtyViewportSwizzles(tables); } } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/vk_state_tracker.h b/src/video_core/renderer_vulkan/vk_state_tracker.h index c335d2bdf..84e918a71 100644 --- a/src/video_core/renderer_vulkan/vk_state_tracker.h +++ b/src/video_core/renderer_vulkan/vk_state_tracker.h @@ -35,6 +35,11 @@ enum : u8 { StencilOp, StencilTestEnable, + Blending, + InstanceDivisors, + VertexAttributes, + ViewportSwizzles, + Last }; static_assert(Last <= std::numeric_limits::max());