From ab6704f20ccaa9ba63b9c4b85ce7afee1e9edf06 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Fri, 17 Apr 2020 18:37:27 -0300 Subject: [PATCH 1/5] fixed_pipeline_state: Pack attribute state Reduce FixedPipelineState's size from 1384 to 664 bytes --- src/video_core/engines/maxwell_3d.h | 2 +- .../renderer_vulkan/fixed_pipeline_state.cpp | 35 +------ .../renderer_vulkan/fixed_pipeline_state.h | 99 ++++++++++--------- .../renderer_vulkan/vk_graphics_pipeline.cpp | 28 +++--- .../renderer_vulkan/vk_pipeline_cache.cpp | 2 +- .../renderer_vulkan/vk_rasterizer.cpp | 20 ++-- 6 files changed, 85 insertions(+), 101 deletions(-) diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h index 5cf6a4cc3..59d5752d2 100644 --- a/src/video_core/engines/maxwell_3d.h +++ b/src/video_core/engines/maxwell_3d.h @@ -1149,7 +1149,7 @@ public: /// Returns whether the vertex array specified by index is supposed to be /// accessed per instance or not. - bool IsInstancingEnabled(u32 index) const { + bool IsInstancingEnabled(std::size_t index) const { return is_instanced[index]; } } instanced_arrays; diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index 2bb376555..97aab951a 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -6,6 +6,7 @@ #include +#include "common/cityhash.h" #include "common/common_types.h" #include "video_core/renderer_vulkan/fixed_pipeline_state.h" @@ -128,25 +129,6 @@ constexpr FixedPipelineState::Rasterizer GetRasterizerState(const Maxwell& regs) } // Anonymous namespace -std::size_t FixedPipelineState::VertexBinding::Hash() const noexcept { - return (index << stride) ^ divisor; -} - -bool FixedPipelineState::VertexBinding::operator==(const VertexBinding& rhs) const noexcept { - return std::tie(index, stride, divisor) == std::tie(rhs.index, rhs.stride, rhs.divisor); -} - -std::size_t FixedPipelineState::VertexAttribute::Hash() const noexcept { - return static_cast(index) ^ (static_cast(buffer) << 13) ^ - (static_cast(type) << 22) ^ (static_cast(size) << 31) ^ - (static_cast(offset) << 36); -} - -bool FixedPipelineState::VertexAttribute::operator==(const VertexAttribute& rhs) const noexcept { - return std::tie(index, buffer, type, size, offset) == - std::tie(rhs.index, rhs.buffer, rhs.type, rhs.size, rhs.offset); -} - std::size_t FixedPipelineState::StencilFace::Hash() const noexcept { return static_cast(action_stencil_fail) ^ (static_cast(action_depth_fail) << 4) ^ @@ -182,21 +164,12 @@ bool FixedPipelineState::BlendingAttachment::operator==(const BlendingAttachment } std::size_t FixedPipelineState::VertexInput::Hash() const noexcept { - std::size_t hash = num_bindings ^ (num_attributes << 32); - for (std::size_t i = 0; i < num_bindings; ++i) { - boost::hash_combine(hash, bindings[i].Hash()); - } - for (std::size_t i = 0; i < num_attributes; ++i) { - boost::hash_combine(hash, attributes[i].Hash()); - } - return hash; + // TODO(Rodrigo): Replace this + return Common::CityHash64(reinterpret_cast(this), sizeof *this); } bool FixedPipelineState::VertexInput::operator==(const VertexInput& rhs) const noexcept { - return std::equal(bindings.begin(), bindings.begin() + num_bindings, rhs.bindings.begin(), - rhs.bindings.begin() + rhs.num_bindings) && - std::equal(attributes.begin(), attributes.begin() + num_attributes, - rhs.attributes.begin(), rhs.attributes.begin() + rhs.num_attributes); + return std::memcmp(this, &rhs, sizeof *this) == 0; } std::size_t FixedPipelineState::InputAssembly::Hash() const noexcept { diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h index 4c8ba7f90..d82a82f75 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h @@ -7,6 +7,7 @@ #include #include +#include "common/bit_field.h" #include "common/common_types.h" #include "video_core/engines/maxwell_3d.h" @@ -18,48 +19,11 @@ using Maxwell = Tegra::Engines::Maxwell3D::Regs; // TODO(Rodrigo): Optimize this structure. +template +inline constexpr bool IsHashable = std::has_unique_object_representations_v&& + std::is_trivially_copyable_v&& std::is_trivially_constructible_v; + struct FixedPipelineState { - using PixelFormat = VideoCore::Surface::PixelFormat; - - struct VertexBinding { - constexpr VertexBinding(u32 index, u32 stride, u32 divisor) - : index{index}, stride{stride}, divisor{divisor} {} - VertexBinding() = default; - - u32 index; - u32 stride; - u32 divisor; - - std::size_t Hash() const noexcept; - - bool operator==(const VertexBinding& rhs) const noexcept; - - bool operator!=(const VertexBinding& rhs) const noexcept { - return !operator==(rhs); - } - }; - - struct VertexAttribute { - constexpr VertexAttribute(u32 index, u32 buffer, Maxwell::VertexAttribute::Type type, - Maxwell::VertexAttribute::Size size, u32 offset) - : index{index}, buffer{buffer}, type{type}, size{size}, offset{offset} {} - VertexAttribute() = default; - - u32 index; - u32 buffer; - Maxwell::VertexAttribute::Type type; - Maxwell::VertexAttribute::Size size; - u32 offset; - - std::size_t Hash() const noexcept; - - bool operator==(const VertexAttribute& rhs) const noexcept; - - bool operator!=(const VertexAttribute& rhs) const noexcept { - return !operator==(rhs); - } - }; - struct StencilFace { constexpr StencilFace(Maxwell::StencilOp action_stencil_fail, Maxwell::StencilOp action_depth_fail, @@ -114,10 +78,52 @@ struct FixedPipelineState { }; struct VertexInput { - std::size_t num_bindings = 0; - std::size_t num_attributes = 0; - std::array bindings; - std::array attributes; + union Binding { + u16 raw; + BitField<0, 1, u16> enabled; + BitField<1, 12, u16> stride; + }; + + union Attribute { + u32 raw; + BitField<0, 1, u32> enabled; + BitField<1, 5, u32> buffer; + BitField<6, 14, u32> offset; + BitField<20, 3, u32> type; + BitField<23, 6, u32> size; + + constexpr Maxwell::VertexAttribute::Type Type() const noexcept { + return static_cast(type.Value()); + } + + constexpr Maxwell::VertexAttribute::Size Size() const noexcept { + return static_cast(size.Value()); + } + }; + + std::array bindings; + std::array binding_divisors; + std::array attributes; + + void SetBinding(std::size_t index, bool enabled, u32 stride, u32 divisor) noexcept { + auto& binding = bindings[index]; + binding.raw = 0; + binding.enabled.Assign(enabled ? 1 : 0); + binding.stride.Assign(stride); + binding_divisors[index] = divisor; + } + + void SetAttribute(std::size_t index, bool enabled, u32 buffer, u32 offset, + Maxwell::VertexAttribute::Type type, + Maxwell::VertexAttribute::Size size) noexcept { + auto& attribute = attributes[index]; + attribute.raw = 0; + attribute.enabled.Assign(enabled ? 1 : 0); + attribute.buffer.Assign(buffer); + attribute.offset.Assign(offset); + attribute.type.Assign(static_cast(type)); + attribute.size.Assign(static_cast(size)); + } std::size_t Hash() const noexcept; @@ -127,6 +133,7 @@ struct FixedPipelineState { return !operator==(rhs); } }; + static_assert(IsHashable); struct InputAssembly { constexpr InputAssembly(Maxwell::PrimitiveTopology topology, bool primitive_restart_enable, @@ -256,8 +263,6 @@ struct FixedPipelineState { DepthStencil depth_stencil; ColorBlending color_blending; }; -static_assert(std::is_trivially_copyable_v); -static_assert(std::is_trivially_copyable_v); static_assert(std::is_trivially_copyable_v); static_assert(std::is_trivially_copyable_v); static_assert(std::is_trivially_copyable_v); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index b540b838d..718feafbd 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -165,35 +165,41 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa std::vector vertex_bindings; std::vector vertex_binding_divisors; - for (std::size_t i = 0; i < vi.num_bindings; ++i) { - const auto& binding = vi.bindings[i]; - const bool instanced = binding.divisor != 0; + for (std::size_t index = 0; index < std::size(vi.bindings); ++index) { + const auto& binding = vi.bindings[index]; + if (!binding.enabled) { + continue; + } + const bool instanced = vi.binding_divisors[index] != 0; const auto rate = instanced ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX; auto& vertex_binding = vertex_bindings.emplace_back(); - vertex_binding.binding = binding.index; + vertex_binding.binding = static_cast(index); vertex_binding.stride = binding.stride; vertex_binding.inputRate = rate; if (instanced) { auto& binding_divisor = vertex_binding_divisors.emplace_back(); - binding_divisor.binding = binding.index; - binding_divisor.divisor = binding.divisor; + binding_divisor.binding = static_cast(index); + binding_divisor.divisor = vi.binding_divisors[index]; } } std::vector vertex_attributes; const auto& input_attributes = program[0]->entries.attributes; - for (std::size_t i = 0; i < vi.num_attributes; ++i) { - const auto& attribute = vi.attributes[i]; - if (input_attributes.find(attribute.index) == input_attributes.end()) { + for (std::size_t index = 0; index < std::size(vi.attributes); ++index) { + const auto& attribute = vi.attributes[index]; + if (!attribute.enabled) { + continue; + } + if (input_attributes.find(static_cast(index)) == input_attributes.end()) { // Skip attributes not used by the vertex shaders. continue; } auto& vertex_attribute = vertex_attributes.emplace_back(); - vertex_attribute.location = attribute.index; + vertex_attribute.location = static_cast(index); vertex_attribute.binding = attribute.buffer; - vertex_attribute.format = MaxwellToVK::VertexFormat(attribute.type, attribute.size); + vertex_attribute.format = MaxwellToVK::VertexFormat(attribute.Type(), attribute.Size()); vertex_attribute.offset = attribute.offset; } diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 90e3a8edd..083da9999 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -334,7 +334,7 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) { specialization.point_size = fixed_state.input_assembly.point_size; } for (std::size_t i = 0; i < Maxwell::NumVertexAttributes; ++i) { - specialization.attribute_types[i] = fixed_state.vertex_input.attributes[i].type; + specialization.attribute_types[i] = fixed_state.vertex_input.attributes[i].Type(); } specialization.ndc_minus_one_to_one = fixed_state.rasterizer.ndc_minus_one_to_one; diff --git a/src/video_core/renderer_vulkan/vk_rasterizer.cpp b/src/video_core/renderer_vulkan/vk_rasterizer.cpp index 4ca0febb8..7a6aa52bc 100644 --- a/src/video_core/renderer_vulkan/vk_rasterizer.cpp +++ b/src/video_core/renderer_vulkan/vk_rasterizer.cpp @@ -806,25 +806,29 @@ void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex BufferBindings& buffer_bindings) { const auto& regs = system.GPU().Maxwell3D().regs; - for (u32 index = 0; index < static_cast(Maxwell::NumVertexAttributes); ++index) { + for (std::size_t index = 0; index < Maxwell::NumVertexAttributes; ++index) { const auto& attrib = regs.vertex_attrib_format[index]; if (!attrib.IsValid()) { + vertex_input.SetAttribute(index, false, 0, 0, {}, {}); continue; } - const auto& buffer = regs.vertex_array[attrib.buffer]; + [[maybe_unused]] const auto& buffer = regs.vertex_array[attrib.buffer]; ASSERT(buffer.IsEnabled()); - vertex_input.attributes[vertex_input.num_attributes++] = - FixedPipelineState::VertexAttribute(index, attrib.buffer, attrib.type, attrib.size, - attrib.offset); + vertex_input.SetAttribute(index, true, attrib.buffer, attrib.offset, attrib.type.Value(), + attrib.size.Value()); } - for (u32 index = 0; index < static_cast(Maxwell::NumVertexArrays); ++index) { + for (std::size_t index = 0; index < Maxwell::NumVertexArrays; ++index) { const auto& vertex_array = regs.vertex_array[index]; if (!vertex_array.IsEnabled()) { + vertex_input.SetBinding(index, false, 0, 0); continue; } + vertex_input.SetBinding( + index, true, vertex_array.stride, + regs.instanced_arrays.IsInstancingEnabled(index) ? vertex_array.divisor : 0); const GPUVAddr start{vertex_array.StartAddress()}; const GPUVAddr end{regs.vertex_array_limit[index].LimitAddress()}; @@ -832,10 +836,6 @@ void RasterizerVulkan::SetupVertexArrays(FixedPipelineState::VertexInput& vertex ASSERT(end > start); const std::size_t size{end - start + 1}; const auto [buffer, offset] = buffer_cache.UploadMemory(start, size); - - vertex_input.bindings[vertex_input.num_bindings++] = FixedPipelineState::VertexBinding( - index, vertex_array.stride, - regs.instanced_arrays.IsInstancingEnabled(index) ? vertex_array.divisor : 0); buffer_bindings.AddVertexBinding(buffer, offset); } } From 7790144a55cf1ee05e79a2b842ba38aa510fea29 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 18 Apr 2020 00:05:41 -0300 Subject: [PATCH 2/5] fixed_pipeline_state: Pack depth stencil state Reduce FixedPipelineState's size to 632 bytes. --- .../renderer_vulkan/fixed_pipeline_state.cpp | 120 ++++++++++++------ .../renderer_vulkan/fixed_pipeline_state.h | 95 +++++++------- .../renderer_vulkan/vk_graphics_pipeline.cpp | 20 +-- 3 files changed, 139 insertions(+), 96 deletions(-) diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index 97aab951a..8734045e5 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -12,23 +12,32 @@ namespace Vulkan { -namespace { - -constexpr FixedPipelineState::DepthStencil GetDepthStencilState(const Maxwell& regs) { - const FixedPipelineState::StencilFace front_stencil( - regs.stencil_front_op_fail, regs.stencil_front_op_zfail, regs.stencil_front_op_zpass, - regs.stencil_front_func_func); - const FixedPipelineState::StencilFace back_stencil = - regs.stencil_two_side_enable - ? FixedPipelineState::StencilFace(regs.stencil_back_op_fail, regs.stencil_back_op_zfail, - regs.stencil_back_op_zpass, - regs.stencil_back_func_func) - : front_stencil; - return FixedPipelineState::DepthStencil( - regs.depth_test_enable == 1, regs.depth_write_enabled == 1, regs.depth_bounds_enable == 1, - regs.stencil_enable == 1, regs.depth_test_func, front_stencil, back_stencil); +void FixedPipelineState::DepthStencil::Fill(const Maxwell& regs) noexcept { + raw = 0; + front.action_stencil_fail.Assign(PackStencilOp(regs.stencil_front_op_fail)); + front.action_depth_fail.Assign(PackStencilOp(regs.stencil_front_op_zfail)); + front.action_depth_pass.Assign(PackStencilOp(regs.stencil_front_op_zpass)); + front.test_func.Assign(PackComparisonOp(regs.stencil_front_func_func)); + if (regs.stencil_two_side_enable) { + back.action_stencil_fail.Assign(PackStencilOp(regs.stencil_back_op_fail)); + back.action_depth_fail.Assign(PackStencilOp(regs.stencil_back_op_zfail)); + back.action_depth_pass.Assign(PackStencilOp(regs.stencil_back_op_zpass)); + back.test_func.Assign(PackComparisonOp(regs.stencil_back_func_func)); + } else { + back.action_stencil_fail.Assign(front.action_stencil_fail); + back.action_depth_fail.Assign(front.action_depth_fail); + back.action_depth_pass.Assign(front.action_depth_pass); + back.test_func.Assign(front.test_func); + } + depth_test_enable.Assign(regs.depth_test_enable); + depth_write_enable.Assign(regs.depth_write_enabled); + depth_bounds_enable.Assign(regs.depth_bounds_enable); + stencil_enable.Assign(regs.stencil_enable); + depth_test_func.Assign(PackComparisonOp(regs.depth_test_func)); } +namespace { + constexpr FixedPipelineState::InputAssembly GetInputAssemblyState(const Maxwell& regs) { return FixedPipelineState::InputAssembly( regs.draw.topology, regs.primitive_restart.enabled, @@ -129,19 +138,6 @@ constexpr FixedPipelineState::Rasterizer GetRasterizerState(const Maxwell& regs) } // Anonymous namespace -std::size_t FixedPipelineState::StencilFace::Hash() const noexcept { - return static_cast(action_stencil_fail) ^ - (static_cast(action_depth_fail) << 4) ^ - (static_cast(action_depth_fail) << 20) ^ - (static_cast(action_depth_pass) << 36); -} - -bool FixedPipelineState::StencilFace::operator==(const StencilFace& rhs) const noexcept { - return std::tie(action_stencil_fail, action_depth_fail, action_depth_pass, test_func) == - std::tie(rhs.action_stencil_fail, rhs.action_depth_fail, rhs.action_depth_pass, - rhs.test_func); -} - std::size_t FixedPipelineState::BlendingAttachment::Hash() const noexcept { return static_cast(enable) ^ (static_cast(rgb_equation) << 5) ^ (static_cast(src_rgb_func) << 10) ^ @@ -212,22 +208,11 @@ bool FixedPipelineState::Rasterizer::operator==(const Rasterizer& rhs) const noe } std::size_t FixedPipelineState::DepthStencil::Hash() const noexcept { - std::size_t hash = static_cast(depth_test_enable) ^ - (static_cast(depth_write_enable) << 1) ^ - (static_cast(depth_bounds_enable) << 2) ^ - (static_cast(stencil_enable) << 3) ^ - (static_cast(depth_test_function) << 4); - boost::hash_combine(hash, front_stencil.Hash()); - boost::hash_combine(hash, back_stencil.Hash()); - return hash; + return raw; } bool FixedPipelineState::DepthStencil::operator==(const DepthStencil& rhs) const noexcept { - return std::tie(depth_test_enable, depth_write_enable, depth_bounds_enable, depth_test_function, - stencil_enable, front_stencil, back_stencil) == - std::tie(rhs.depth_test_enable, rhs.depth_write_enable, rhs.depth_bounds_enable, - rhs.depth_test_function, rhs.stencil_enable, rhs.front_stencil, - rhs.back_stencil); + return raw == rhs.raw; } std::size_t FixedPipelineState::ColorBlending::Hash() const noexcept { @@ -266,9 +251,60 @@ FixedPipelineState GetFixedPipelineState(const Maxwell& regs) { fixed_state.input_assembly = GetInputAssemblyState(regs); fixed_state.tessellation = GetTessellationState(regs); fixed_state.rasterizer = GetRasterizerState(regs); - fixed_state.depth_stencil = GetDepthStencilState(regs); + fixed_state.depth_stencil.Fill(regs); fixed_state.color_blending = GetColorBlendingState(regs); return fixed_state; } +u32 FixedPipelineState::PackComparisonOp(Maxwell::ComparisonOp op) noexcept { + // OpenGL enums go from 0x200 to 0x207 and the others from 1 to 8 + // If we substract 0x200 to OpenGL enums and 1 to the others we get a 0-7 range. + // Perfect for a hash. + const u32 value = static_cast(op); + return value - (value >= 0x200 ? 0x200 : 1); +} + +Maxwell::ComparisonOp FixedPipelineState::UnpackComparisonOp(u32 packed) noexcept { + // Read PackComparisonOp for the logic behind this. + return static_cast(packed + 1); +} + +u32 FixedPipelineState::PackStencilOp(Maxwell::StencilOp op) noexcept { + switch (op) { + case Maxwell::StencilOp::Keep: + case Maxwell::StencilOp::KeepOGL: + return 0; + case Maxwell::StencilOp::Zero: + case Maxwell::StencilOp::ZeroOGL: + return 1; + case Maxwell::StencilOp::Replace: + case Maxwell::StencilOp::ReplaceOGL: + return 2; + case Maxwell::StencilOp::Incr: + case Maxwell::StencilOp::IncrOGL: + return 3; + case Maxwell::StencilOp::Decr: + case Maxwell::StencilOp::DecrOGL: + return 4; + case Maxwell::StencilOp::Invert: + case Maxwell::StencilOp::InvertOGL: + return 5; + case Maxwell::StencilOp::IncrWrap: + case Maxwell::StencilOp::IncrWrapOGL: + return 6; + case Maxwell::StencilOp::DecrWrap: + case Maxwell::StencilOp::DecrWrapOGL: + return 7; + } + return 0; +} + +Maxwell::StencilOp FixedPipelineState::UnpackStencilOp(u32 packed) noexcept { + static constexpr std::array LUT = {Maxwell::StencilOp::Keep, Maxwell::StencilOp::Zero, + Maxwell::StencilOp::Replace, Maxwell::StencilOp::Incr, + Maxwell::StencilOp::Decr, Maxwell::StencilOp::Invert, + Maxwell::StencilOp::IncrWrap, Maxwell::StencilOp::DecrWrap}; + return LUT[packed]; +} + } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h index d82a82f75..e30877e77 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h @@ -24,27 +24,11 @@ inline constexpr bool IsHashable = std::has_unique_object_representations_v&& std::is_trivially_copyable_v&& std::is_trivially_constructible_v; struct FixedPipelineState { - struct StencilFace { - constexpr StencilFace(Maxwell::StencilOp action_stencil_fail, - Maxwell::StencilOp action_depth_fail, - Maxwell::StencilOp action_depth_pass, Maxwell::ComparisonOp test_func) - : action_stencil_fail{action_stencil_fail}, action_depth_fail{action_depth_fail}, - action_depth_pass{action_depth_pass}, test_func{test_func} {} - StencilFace() = default; + static u32 PackComparisonOp(Maxwell::ComparisonOp op) noexcept; + static Maxwell::ComparisonOp UnpackComparisonOp(u32 packed) noexcept; - Maxwell::StencilOp action_stencil_fail; - Maxwell::StencilOp action_depth_fail; - Maxwell::StencilOp action_depth_pass; - Maxwell::ComparisonOp test_func; - - std::size_t Hash() const noexcept; - - bool operator==(const StencilFace& rhs) const noexcept; - - bool operator!=(const StencilFace& rhs) const noexcept { - return !operator==(rhs); - } - }; + static u32 PackStencilOp(Maxwell::StencilOp op) noexcept; + static Maxwell::StencilOp UnpackStencilOp(u32 packed) noexcept; struct BlendingAttachment { constexpr BlendingAttachment(bool enable, Maxwell::Blend::Equation rgb_equation, @@ -202,23 +186,42 @@ struct FixedPipelineState { }; struct DepthStencil { - constexpr DepthStencil(bool depth_test_enable, bool depth_write_enable, - bool depth_bounds_enable, bool stencil_enable, - Maxwell::ComparisonOp depth_test_function, StencilFace front_stencil, - StencilFace back_stencil) - : depth_test_enable{depth_test_enable}, depth_write_enable{depth_write_enable}, - depth_bounds_enable{depth_bounds_enable}, stencil_enable{stencil_enable}, - depth_test_function{depth_test_function}, front_stencil{front_stencil}, - back_stencil{back_stencil} {} - DepthStencil() = default; + template + union StencilFace { + BitField action_stencil_fail; + BitField action_depth_fail; + BitField action_depth_pass; + BitField test_func; - bool depth_test_enable; - bool depth_write_enable; - bool depth_bounds_enable; - bool stencil_enable; - Maxwell::ComparisonOp depth_test_function; - StencilFace front_stencil; - StencilFace back_stencil; + Maxwell::StencilOp ActionStencilFail() const noexcept { + return UnpackStencilOp(action_stencil_fail); + } + + Maxwell::StencilOp ActionDepthFail() const noexcept { + return UnpackStencilOp(action_depth_fail); + } + + Maxwell::StencilOp ActionDepthPass() const noexcept { + return UnpackStencilOp(action_depth_pass); + } + + Maxwell::ComparisonOp TestFunc() const noexcept { + return UnpackComparisonOp(test_func); + } + }; + + union { + u32 raw; + StencilFace<0> front; + StencilFace<12> back; + BitField<24, 1, u32> depth_test_enable; + BitField<25, 1, u32> depth_write_enable; + BitField<26, 1, u32> depth_bounds_enable; + BitField<27, 1, u32> stencil_enable; + BitField<28, 3, u32> depth_test_func; + }; + + void Fill(const Maxwell& regs) noexcept; std::size_t Hash() const noexcept; @@ -227,7 +230,12 @@ struct FixedPipelineState { bool operator!=(const DepthStencil& rhs) const noexcept { return !operator==(rhs); } + + Maxwell::ComparisonOp DepthTestFunc() const noexcept { + return UnpackComparisonOp(depth_test_func); + } }; + static_assert(IsHashable); struct ColorBlending { constexpr ColorBlending( @@ -248,6 +256,13 @@ struct FixedPipelineState { } }; + VertexInput vertex_input; + InputAssembly input_assembly; + Tessellation tessellation; + Rasterizer rasterizer; + DepthStencil depth_stencil; + ColorBlending color_blending; + std::size_t Hash() const noexcept; bool operator==(const FixedPipelineState& rhs) const noexcept; @@ -255,15 +270,7 @@ struct FixedPipelineState { bool operator!=(const FixedPipelineState& rhs) const noexcept { return !operator==(rhs); } - - VertexInput vertex_input; - InputAssembly input_assembly; - Tessellation tessellation; - Rasterizer rasterizer; - DepthStencil depth_stencil; - ColorBlending color_blending; }; -static_assert(std::is_trivially_copyable_v); static_assert(std::is_trivially_copyable_v); static_assert(std::is_trivially_copyable_v); static_assert(std::is_trivially_copyable_v); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 718feafbd..0dd3ea5bc 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -26,12 +26,13 @@ MICROPROFILE_DECLARE(Vulkan_PipelineCache); namespace { -VkStencilOpState GetStencilFaceState(const FixedPipelineState::StencilFace& face) { +template +VkStencilOpState GetStencilFaceState(const StencilFace& face) { VkStencilOpState state; - state.failOp = MaxwellToVK::StencilOp(face.action_stencil_fail); - state.passOp = MaxwellToVK::StencilOp(face.action_depth_pass); - state.depthFailOp = MaxwellToVK::StencilOp(face.action_depth_fail); - state.compareOp = MaxwellToVK::ComparisonOp(face.test_func); + state.failOp = MaxwellToVK::StencilOp(face.ActionStencilFail()); + state.passOp = MaxwellToVK::StencilOp(face.ActionDepthPass()); + state.depthFailOp = MaxwellToVK::StencilOp(face.ActionDepthFail()); + state.compareOp = MaxwellToVK::ComparisonOp(face.TestFunc()); state.compareMask = 0; state.writeMask = 0; state.reference = 0; @@ -277,13 +278,12 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa depth_stencil_ci.flags = 0; depth_stencil_ci.depthTestEnable = ds.depth_test_enable; depth_stencil_ci.depthWriteEnable = ds.depth_write_enable; - depth_stencil_ci.depthCompareOp = ds.depth_test_enable - ? MaxwellToVK::ComparisonOp(ds.depth_test_function) - : VK_COMPARE_OP_ALWAYS; + depth_stencil_ci.depthCompareOp = + ds.depth_test_enable ? MaxwellToVK::ComparisonOp(ds.DepthTestFunc()) : VK_COMPARE_OP_ALWAYS; depth_stencil_ci.depthBoundsTestEnable = ds.depth_bounds_enable; depth_stencil_ci.stencilTestEnable = ds.stencil_enable; - depth_stencil_ci.front = GetStencilFaceState(ds.front_stencil); - depth_stencil_ci.back = GetStencilFaceState(ds.back_stencil); + depth_stencil_ci.front = GetStencilFaceState(ds.front); + depth_stencil_ci.back = GetStencilFaceState(ds.back); depth_stencil_ci.minDepthBounds = 0.0f; depth_stencil_ci.maxDepthBounds = 0.0f; From 548dd27f4567f751d54073f1408d6f8949344fa9 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 18 Apr 2020 04:03:29 -0300 Subject: [PATCH 3/5] fixed_pipeline_state: Pack rasterizer state Reduce FixedPipelineState's size to 600 bytes. --- .../renderer_vulkan/fixed_pipeline_state.cpp | 199 +++++++++--------- .../renderer_vulkan/fixed_pipeline_state.h | 105 +++++---- .../renderer_vulkan/vk_graphics_pipeline.cpp | 12 +- .../renderer_vulkan/vk_pipeline_cache.cpp | 6 +- 4 files changed, 157 insertions(+), 165 deletions(-) diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index 8734045e5..1a23de07f 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -2,6 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include #include #include @@ -12,6 +13,31 @@ namespace Vulkan { +namespace { + +constexpr std::size_t POINT = 0; +constexpr std::size_t LINE = 1; +constexpr std::size_t POLYGON = 2; +constexpr std::array POLYGON_OFFSET_ENABLE_LUT = { + POINT, // Points + LINE, // Lines + LINE, // LineLoop + LINE, // LineStrip + POLYGON, // Triangles + POLYGON, // TriangleStrip + POLYGON, // TriangleFan + POLYGON, // Quads + POLYGON, // QuadStrip + POLYGON, // Polygon + LINE, // LinesAdjacency + LINE, // LineStripAdjacency + POLYGON, // TrianglesAdjacency + POLYGON, // TriangleStripAdjacency + POLYGON, // Patches +}; + +} // Anonymous namespace + void FixedPipelineState::DepthStencil::Fill(const Maxwell& regs) noexcept { raw = 0; front.action_stencil_fail.Assign(PackStencilOp(regs.stencil_front_op_fail)); @@ -36,14 +62,41 @@ void FixedPipelineState::DepthStencil::Fill(const Maxwell& regs) noexcept { depth_test_func.Assign(PackComparisonOp(regs.depth_test_func)); } -namespace { +void FixedPipelineState::Rasterizer::Fill(const Maxwell& regs) noexcept { + const auto& clip = regs.view_volume_clip_control; + const std::array enabled_lut = {regs.polygon_offset_point_enable, + regs.polygon_offset_line_enable, + regs.polygon_offset_fill_enable}; + const u32 topology_index = static_cast(regs.draw.topology.Value()); -constexpr FixedPipelineState::InputAssembly GetInputAssemblyState(const Maxwell& regs) { - return FixedPipelineState::InputAssembly( - regs.draw.topology, regs.primitive_restart.enabled, - regs.draw.topology == Maxwell::PrimitiveTopology::Points ? regs.point_size : 0.0f); + u32 packed_front_face = PackFrontFace(regs.front_face); + if (regs.screen_y_control.triangle_rast_flip != 0 && + regs.viewport_transform[0].scale_y > 0.0f) { + // Flip front face + packed_front_face = 1 - packed_front_face; + } + + raw = 0; + topology.Assign(topology_index); + primitive_restart_enable.Assign(regs.primitive_restart.enabled != 0 ? 1 : 0); + cull_enable.Assign(regs.cull_test_enabled != 0 ? 1 : 0); + depth_bias_enable.Assign(enabled_lut[POLYGON_OFFSET_ENABLE_LUT[topology_index]] != 0 ? 1 : 0); + depth_clamp_enable.Assign(clip.depth_clamp_near == 1 || clip.depth_clamp_far == 1 ? 1 : 0); + ndc_minus_one_to_one.Assign(regs.depth_mode == Maxwell::DepthMode::MinusOneToOne ? 1 : 0); + cull_face.Assign(PackCullFace(regs.cull_face)); + front_face.Assign(packed_front_face); + polygon_mode.Assign(PackPolygonMode(regs.polygon_mode_front)); + patch_control_points_minus_one.Assign(regs.patch_vertices - 1); + tessellation_primitive.Assign(static_cast(regs.tess_mode.prim.Value())); + tessellation_spacing.Assign(static_cast(regs.tess_mode.spacing.Value())); + tessellation_clockwise.Assign(regs.tess_mode.cw.Value()); + logic_op_enable.Assign(regs.logic_op.enable != 0 ? 1 : 0); + logic_op.Assign(PackLogicOp(regs.logic_op.operation)); + std::memcpy(&point_size, ®s.point_size, sizeof(point_size)); // TODO: C++20 std::bit_cast } +namespace { + constexpr FixedPipelineState::BlendingAttachment GetBlendingAttachmentState( const Maxwell& regs, std::size_t render_target) { const auto& mask = regs.color_mask[regs.color_mask_common ? 0 : render_target]; @@ -86,56 +139,6 @@ constexpr FixedPipelineState::ColorBlending GetColorBlendingState(const Maxwell& GetBlendingAttachmentState(regs, 6), GetBlendingAttachmentState(regs, 7)}); } -constexpr FixedPipelineState::Tessellation GetTessellationState(const Maxwell& regs) { - return FixedPipelineState::Tessellation(regs.patch_vertices, regs.tess_mode.prim, - regs.tess_mode.spacing, regs.tess_mode.cw != 0); -} - -constexpr std::size_t Point = 0; -constexpr std::size_t Line = 1; -constexpr std::size_t Polygon = 2; -constexpr std::array PolygonOffsetEnableLUT = { - Point, // Points - Line, // Lines - Line, // LineLoop - Line, // LineStrip - Polygon, // Triangles - Polygon, // TriangleStrip - Polygon, // TriangleFan - Polygon, // Quads - Polygon, // QuadStrip - Polygon, // Polygon - Line, // LinesAdjacency - Line, // LineStripAdjacency - Polygon, // TrianglesAdjacency - Polygon, // TriangleStripAdjacency - Polygon, // Patches -}; - -constexpr FixedPipelineState::Rasterizer GetRasterizerState(const Maxwell& regs) { - const std::array enabled_lut = {regs.polygon_offset_point_enable, - regs.polygon_offset_line_enable, - regs.polygon_offset_fill_enable}; - const auto topology = static_cast(regs.draw.topology.Value()); - const bool depth_bias_enabled = enabled_lut[PolygonOffsetEnableLUT[topology]]; - - const auto& clip = regs.view_volume_clip_control; - const bool depth_clamp_enabled = clip.depth_clamp_near == 1 || clip.depth_clamp_far == 1; - - Maxwell::FrontFace front_face = regs.front_face; - if (regs.screen_y_control.triangle_rast_flip != 0 && - regs.viewport_transform[0].scale_y > 0.0f) { - if (front_face == Maxwell::FrontFace::CounterClockWise) - front_face = Maxwell::FrontFace::ClockWise; - else if (front_face == Maxwell::FrontFace::ClockWise) - front_face = Maxwell::FrontFace::CounterClockWise; - } - - const bool gl_ndc = regs.depth_mode == Maxwell::DepthMode::MinusOneToOne; - return FixedPipelineState::Rasterizer(regs.cull_test_enabled, depth_bias_enabled, - depth_clamp_enabled, gl_ndc, regs.cull_face, front_face); -} - } // Anonymous namespace std::size_t FixedPipelineState::BlendingAttachment::Hash() const noexcept { @@ -168,43 +171,14 @@ bool FixedPipelineState::VertexInput::operator==(const VertexInput& rhs) const n return std::memcmp(this, &rhs, sizeof *this) == 0; } -std::size_t FixedPipelineState::InputAssembly::Hash() const noexcept { - std::size_t point_size_int = 0; - std::memcpy(&point_size_int, &point_size, sizeof(point_size)); - return (static_cast(topology) << 24) ^ (point_size_int << 32) ^ - static_cast(primitive_restart_enable); -} - -bool FixedPipelineState::InputAssembly::operator==(const InputAssembly& rhs) const noexcept { - return std::tie(topology, primitive_restart_enable, point_size) == - std::tie(rhs.topology, rhs.primitive_restart_enable, rhs.point_size); -} - -std::size_t FixedPipelineState::Tessellation::Hash() const noexcept { - return static_cast(patch_control_points) ^ - (static_cast(primitive) << 6) ^ (static_cast(spacing) << 8) ^ - (static_cast(clockwise) << 10); -} - -bool FixedPipelineState::Tessellation::operator==(const Tessellation& rhs) const noexcept { - return std::tie(patch_control_points, primitive, spacing, clockwise) == - std::tie(rhs.patch_control_points, rhs.primitive, rhs.spacing, rhs.clockwise); -} - std::size_t FixedPipelineState::Rasterizer::Hash() const noexcept { - return static_cast(cull_enable) ^ - (static_cast(depth_bias_enable) << 1) ^ - (static_cast(depth_clamp_enable) << 2) ^ - (static_cast(ndc_minus_one_to_one) << 3) ^ - (static_cast(cull_face) << 24) ^ - (static_cast(front_face) << 48); + u64 hash = static_cast(raw) << 32; + std::memcpy(&hash, &point_size, sizeof(u32)); + return static_cast(hash); } bool FixedPipelineState::Rasterizer::operator==(const Rasterizer& rhs) const noexcept { - return std::tie(cull_enable, depth_bias_enable, depth_clamp_enable, ndc_minus_one_to_one, - cull_face, front_face) == - std::tie(rhs.cull_enable, rhs.depth_bias_enable, rhs.depth_clamp_enable, - rhs.ndc_minus_one_to_one, rhs.cull_face, rhs.front_face); + return raw == rhs.raw && point_size == rhs.point_size; } std::size_t FixedPipelineState::DepthStencil::Hash() const noexcept { @@ -231,8 +205,6 @@ bool FixedPipelineState::ColorBlending::operator==(const ColorBlending& rhs) con std::size_t FixedPipelineState::Hash() const noexcept { std::size_t hash = 0; boost::hash_combine(hash, vertex_input.Hash()); - boost::hash_combine(hash, input_assembly.Hash()); - boost::hash_combine(hash, tessellation.Hash()); boost::hash_combine(hash, rasterizer.Hash()); boost::hash_combine(hash, depth_stencil.Hash()); boost::hash_combine(hash, color_blending.Hash()); @@ -240,17 +212,13 @@ std::size_t FixedPipelineState::Hash() const noexcept { } bool FixedPipelineState::operator==(const FixedPipelineState& rhs) const noexcept { - return std::tie(vertex_input, input_assembly, tessellation, rasterizer, depth_stencil, - color_blending) == std::tie(rhs.vertex_input, rhs.input_assembly, - rhs.tessellation, rhs.rasterizer, rhs.depth_stencil, - rhs.color_blending); + return std::tie(vertex_input, rasterizer, depth_stencil, color_blending) == + std::tie(rhs.vertex_input, rhs.rasterizer, rhs.depth_stencil, rhs.color_blending); } FixedPipelineState GetFixedPipelineState(const Maxwell& regs) { FixedPipelineState fixed_state; - fixed_state.input_assembly = GetInputAssemblyState(regs); - fixed_state.tessellation = GetTessellationState(regs); - fixed_state.rasterizer = GetRasterizerState(regs); + fixed_state.rasterizer.Fill(regs); fixed_state.depth_stencil.Fill(regs); fixed_state.color_blending = GetColorBlendingState(regs); return fixed_state; @@ -307,4 +275,41 @@ Maxwell::StencilOp FixedPipelineState::UnpackStencilOp(u32 packed) noexcept { return LUT[packed]; } +u32 FixedPipelineState::PackCullFace(Maxwell::CullFace cull) noexcept { + // FrontAndBack is 0x408, by substracting 0x406 in it we get 2. + // Individual cull faces are in 0x404 and 0x405, substracting 0x404 we get 0 and 1. + const u32 value = static_cast(cull); + return value - (value == 0x408 ? 0x406 : 0x404); +} + +Maxwell::CullFace FixedPipelineState::UnpackCullFace(u32 packed) noexcept { + static constexpr std::array LUT = {Maxwell::CullFace::Front, Maxwell::CullFace::Back, + Maxwell::CullFace::FrontAndBack}; + return LUT[packed]; +} + +u32 FixedPipelineState::PackFrontFace(Maxwell::FrontFace face) noexcept { + return static_cast(face) - 0x900; +} + +Maxwell::FrontFace FixedPipelineState::UnpackFrontFace(u32 packed) noexcept { + return static_cast(packed + 0x900); +} + +u32 FixedPipelineState::PackPolygonMode(Maxwell::PolygonMode mode) noexcept { + return static_cast(mode) - 0x1B00; +} + +Maxwell::PolygonMode FixedPipelineState::UnpackPolygonMode(u32 packed) noexcept { + return static_cast(packed + 0x1B00); +} + +u32 FixedPipelineState::PackLogicOp(Maxwell::LogicOperation op) noexcept { + return static_cast(op) - 0x1500; +} + +Maxwell::LogicOperation FixedPipelineState::UnpackLogicOp(u32 packed) noexcept { + return static_cast(packed + 0x1500); +} + } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h index e30877e77..75b093e90 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h @@ -30,6 +30,18 @@ struct FixedPipelineState { static u32 PackStencilOp(Maxwell::StencilOp op) noexcept; static Maxwell::StencilOp UnpackStencilOp(u32 packed) noexcept; + static u32 PackCullFace(Maxwell::CullFace cull) noexcept; + static Maxwell::CullFace UnpackCullFace(u32 packed) noexcept; + + static u32 PackFrontFace(Maxwell::FrontFace face) noexcept; + static Maxwell::FrontFace UnpackFrontFace(u32 packed) noexcept; + + static u32 PackPolygonMode(Maxwell::PolygonMode mode) noexcept; + static Maxwell::PolygonMode UnpackPolygonMode(u32 packed) noexcept; + + static u32 PackLogicOp(Maxwell::LogicOperation op) noexcept; + static Maxwell::LogicOperation UnpackLogicOp(u32 packed) noexcept; + struct BlendingAttachment { constexpr BlendingAttachment(bool enable, Maxwell::Blend::Equation rgb_equation, Maxwell::Blend::Factor src_rgb_func, @@ -119,62 +131,30 @@ struct FixedPipelineState { }; static_assert(IsHashable); - struct InputAssembly { - constexpr InputAssembly(Maxwell::PrimitiveTopology topology, bool primitive_restart_enable, - float point_size) - : topology{topology}, primitive_restart_enable{primitive_restart_enable}, - point_size{point_size} {} - InputAssembly() = default; - - Maxwell::PrimitiveTopology topology; - bool primitive_restart_enable; - float point_size; - - std::size_t Hash() const noexcept; - - bool operator==(const InputAssembly& rhs) const noexcept; - - bool operator!=(const InputAssembly& rhs) const noexcept { - return !operator==(rhs); - } - }; - - struct Tessellation { - constexpr Tessellation(u32 patch_control_points, Maxwell::TessellationPrimitive primitive, - Maxwell::TessellationSpacing spacing, bool clockwise) - : patch_control_points{patch_control_points}, primitive{primitive}, spacing{spacing}, - clockwise{clockwise} {} - Tessellation() = default; - - u32 patch_control_points; - Maxwell::TessellationPrimitive primitive; - Maxwell::TessellationSpacing spacing; - bool clockwise; - - std::size_t Hash() const noexcept; - - bool operator==(const Tessellation& rhs) const noexcept; - - bool operator!=(const Tessellation& rhs) const noexcept { - return !operator==(rhs); - } - }; - struct Rasterizer { - constexpr Rasterizer(bool cull_enable, bool depth_bias_enable, bool depth_clamp_enable, - bool ndc_minus_one_to_one, Maxwell::CullFace cull_face, - Maxwell::FrontFace front_face) - : cull_enable{cull_enable}, depth_bias_enable{depth_bias_enable}, - depth_clamp_enable{depth_clamp_enable}, ndc_minus_one_to_one{ndc_minus_one_to_one}, - cull_face{cull_face}, front_face{front_face} {} - Rasterizer() = default; + union { + u32 raw; + BitField<0, 4, u32> topology; + BitField<4, 1, u32> primitive_restart_enable; + BitField<5, 1, u32> cull_enable; + BitField<6, 1, u32> depth_bias_enable; + BitField<7, 1, u32> depth_clamp_enable; + BitField<8, 1, u32> ndc_minus_one_to_one; + BitField<9, 2, u32> cull_face; + BitField<11, 1, u32> front_face; + BitField<12, 2, u32> polygon_mode; + BitField<14, 5, u32> patch_control_points_minus_one; + BitField<19, 2, u32> tessellation_primitive; + BitField<21, 2, u32> tessellation_spacing; + BitField<23, 1, u32> tessellation_clockwise; + BitField<24, 1, u32> logic_op_enable; + BitField<25, 4, u32> logic_op; + }; - bool cull_enable; - bool depth_bias_enable; - bool depth_clamp_enable; - bool ndc_minus_one_to_one; - Maxwell::CullFace cull_face; - Maxwell::FrontFace front_face; + // TODO(Rodrigo): Move this to push constants + u32 point_size; + + void Fill(const Maxwell& regs) noexcept; std::size_t Hash() const noexcept; @@ -183,7 +163,20 @@ struct FixedPipelineState { bool operator!=(const Rasterizer& rhs) const noexcept { return !operator==(rhs); } + + constexpr Maxwell::PrimitiveTopology Topology() const noexcept { + return static_cast(topology.Value()); + } + + Maxwell::CullFace CullFace() const noexcept { + return UnpackCullFace(cull_face.Value()); + } + + Maxwell::FrontFace FrontFace() const noexcept { + return UnpackFrontFace(front_face.Value()); + } }; + static_assert(IsHashable); struct DepthStencil { template @@ -257,8 +250,6 @@ struct FixedPipelineState { }; VertexInput vertex_input; - InputAssembly input_assembly; - Tessellation tessellation; Rasterizer rasterizer; DepthStencil depth_stencil; ColorBlending color_blending; @@ -273,8 +264,6 @@ struct FixedPipelineState { }; static_assert(std::is_trivially_copyable_v); static_assert(std::is_trivially_copyable_v); -static_assert(std::is_trivially_copyable_v); -static_assert(std::is_trivially_copyable_v); static_assert(std::is_trivially_copyable_v); static_assert(std::is_trivially_copyable_v); static_assert(std::is_trivially_copyable_v); diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index 0dd3ea5bc..e12c26076 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -158,10 +158,8 @@ std::vector VKGraphicsPipeline::CreateShaderModules( vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpass_params, const SPIRVProgram& program) const { const auto& vi = fixed_state.vertex_input; - const auto& ia = fixed_state.input_assembly; const auto& ds = fixed_state.depth_stencil; const auto& cd = fixed_state.color_blending; - const auto& ts = fixed_state.tessellation; const auto& rs = fixed_state.rasterizer; std::vector vertex_bindings; @@ -226,15 +224,15 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa input_assembly_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO; input_assembly_ci.pNext = nullptr; input_assembly_ci.flags = 0; - input_assembly_ci.topology = MaxwellToVK::PrimitiveTopology(device, ia.topology); + input_assembly_ci.topology = MaxwellToVK::PrimitiveTopology(device, rs.Topology()); input_assembly_ci.primitiveRestartEnable = - ia.primitive_restart_enable && SupportsPrimitiveRestart(input_assembly_ci.topology); + rs.primitive_restart_enable != 0 && SupportsPrimitiveRestart(input_assembly_ci.topology); VkPipelineTessellationStateCreateInfo tessellation_ci; tessellation_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO; tessellation_ci.pNext = nullptr; tessellation_ci.flags = 0; - tessellation_ci.patchControlPoints = ts.patch_control_points; + tessellation_ci.patchControlPoints = rs.patch_control_points_minus_one.Value() + 1; VkPipelineViewportStateCreateInfo viewport_ci; viewport_ci.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO; @@ -253,8 +251,8 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa rasterization_ci.rasterizerDiscardEnable = VK_FALSE; rasterization_ci.polygonMode = VK_POLYGON_MODE_FILL; rasterization_ci.cullMode = - rs.cull_enable ? MaxwellToVK::CullFace(rs.cull_face) : VK_CULL_MODE_NONE; - rasterization_ci.frontFace = MaxwellToVK::FrontFace(rs.front_face); + rs.cull_enable ? MaxwellToVK::CullFace(rs.CullFace()) : VK_CULL_MODE_NONE; + rasterization_ci.frontFace = MaxwellToVK::FrontFace(rs.FrontFace()); rasterization_ci.depthBiasEnable = rs.depth_bias_enable; rasterization_ci.depthBiasConstantFactor = 0.0f; rasterization_ci.depthBiasClamp = 0.0f; diff --git a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp index 083da9999..8fdc6400d 100644 --- a/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp +++ b/src/video_core/renderer_vulkan/vk_pipeline_cache.cpp @@ -329,9 +329,9 @@ VKPipelineCache::DecompileShaders(const GraphicsPipelineCacheKey& key) { const auto& gpu = system.GPU().Maxwell3D(); Specialization specialization; - if (fixed_state.input_assembly.topology == Maxwell::PrimitiveTopology::Points) { - ASSERT(fixed_state.input_assembly.point_size != 0.0f); - specialization.point_size = fixed_state.input_assembly.point_size; + if (fixed_state.rasterizer.Topology() == Maxwell::PrimitiveTopology::Points) { + ASSERT(fixed_state.rasterizer.point_size != 0); + std::memcpy(&specialization.point_size, &fixed_state.rasterizer.point_size, sizeof(u32)); } for (std::size_t i = 0; i < Maxwell::NumVertexAttributes; ++i) { specialization.attribute_types[i] = fixed_state.vertex_input.attributes[i].Type(); From b571c92dfd0e6bc3efeae6087723996165273c06 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 18 Apr 2020 05:41:56 -0300 Subject: [PATCH 4/5] fixed_pipeline_state: Pack blending state Reduce FixedPipelineState's size to 364 bytes. --- .../renderer_vulkan/fixed_pipeline_state.cpp | 213 +++++++++++++----- .../renderer_vulkan/fixed_pipeline_state.h | 81 ++++--- .../renderer_vulkan/vk_graphics_pipeline.cpp | 31 ++- 3 files changed, 227 insertions(+), 98 deletions(-) diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index 1a23de07f..2b053ea74 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -95,71 +95,58 @@ void FixedPipelineState::Rasterizer::Fill(const Maxwell& regs) noexcept { std::memcpy(&point_size, ®s.point_size, sizeof(point_size)); // TODO: C++20 std::bit_cast } -namespace { - -constexpr FixedPipelineState::BlendingAttachment GetBlendingAttachmentState( - const Maxwell& regs, std::size_t render_target) { - const auto& mask = regs.color_mask[regs.color_mask_common ? 0 : render_target]; - const std::array components = {mask.R != 0, mask.G != 0, mask.B != 0, mask.A != 0}; - - const FixedPipelineState::BlendingAttachment default_blending( - false, Maxwell::Blend::Equation::Add, Maxwell::Blend::Factor::One, - Maxwell::Blend::Factor::Zero, Maxwell::Blend::Equation::Add, Maxwell::Blend::Factor::One, - Maxwell::Blend::Factor::Zero, components); - if (render_target >= regs.rt_control.count) { - return default_blending; +void FixedPipelineState::ColorBlending::Fill(const Maxwell& regs) noexcept { + for (std::size_t index = 0; index < std::size(attachments); ++index) { + attachments[index].Fill(regs, index); } +} + +void FixedPipelineState::BlendingAttachment::Fill(const Maxwell& regs, std::size_t index) { + const auto& mask = regs.color_mask[regs.color_mask_common ? 0 : index]; + + raw = 0; + mask_r.Assign(mask.R); + mask_g.Assign(mask.G); + mask_b.Assign(mask.B); + mask_a.Assign(mask.A); + + // TODO: C++20 Use templated lambda to deduplicate code if (!regs.independent_blend_enable) { const auto& src = regs.blend; - if (!src.enable[render_target]) { - return default_blending; + if (!src.enable[index]) { + return; } - return FixedPipelineState::BlendingAttachment( - true, src.equation_rgb, src.factor_source_rgb, src.factor_dest_rgb, src.equation_a, - src.factor_source_a, src.factor_dest_a, components); + equation_rgb.Assign(PackBlendEquation(src.equation_rgb)); + equation_a.Assign(PackBlendEquation(src.equation_a)); + factor_source_rgb.Assign(PackBlendFactor(src.factor_source_rgb)); + factor_dest_rgb.Assign(PackBlendFactor(src.factor_dest_rgb)); + factor_source_a.Assign(PackBlendFactor(src.factor_source_a)); + factor_dest_a.Assign(PackBlendFactor(src.factor_dest_a)); + enable.Assign(1); + return; } - if (!regs.blend.enable[render_target]) { - return default_blending; + if (!regs.blend.enable[index]) { + return; } - const auto& src = regs.independent_blend[render_target]; - return FixedPipelineState::BlendingAttachment( - true, src.equation_rgb, src.factor_source_rgb, src.factor_dest_rgb, src.equation_a, - src.factor_source_a, src.factor_dest_a, components); + const auto& src = regs.independent_blend[index]; + equation_rgb.Assign(PackBlendEquation(src.equation_rgb)); + equation_a.Assign(PackBlendEquation(src.equation_a)); + factor_source_rgb.Assign(PackBlendFactor(src.factor_source_rgb)); + factor_dest_rgb.Assign(PackBlendFactor(src.factor_dest_rgb)); + factor_source_a.Assign(PackBlendFactor(src.factor_source_a)); + factor_dest_a.Assign(PackBlendFactor(src.factor_dest_a)); + enable.Assign(1); } -constexpr FixedPipelineState::ColorBlending GetColorBlendingState(const Maxwell& regs) { - return FixedPipelineState::ColorBlending( - {regs.blend_color.r, regs.blend_color.g, regs.blend_color.b, regs.blend_color.a}, - regs.rt_control.count, - {GetBlendingAttachmentState(regs, 0), GetBlendingAttachmentState(regs, 1), - GetBlendingAttachmentState(regs, 2), GetBlendingAttachmentState(regs, 3), - GetBlendingAttachmentState(regs, 4), GetBlendingAttachmentState(regs, 5), - GetBlendingAttachmentState(regs, 6), GetBlendingAttachmentState(regs, 7)}); -} - -} // Anonymous namespace - std::size_t FixedPipelineState::BlendingAttachment::Hash() const noexcept { - return static_cast(enable) ^ (static_cast(rgb_equation) << 5) ^ - (static_cast(src_rgb_func) << 10) ^ - (static_cast(dst_rgb_func) << 15) ^ - (static_cast(a_equation) << 20) ^ - (static_cast(src_a_func) << 25) ^ - (static_cast(dst_a_func) << 30) ^ - (static_cast(components[0]) << 35) ^ - (static_cast(components[1]) << 36) ^ - (static_cast(components[2]) << 37) ^ - (static_cast(components[3]) << 38); + return raw; } bool FixedPipelineState::BlendingAttachment::operator==(const BlendingAttachment& rhs) const noexcept { - return std::tie(enable, rgb_equation, src_rgb_func, dst_rgb_func, a_equation, src_a_func, - dst_a_func, components) == - std::tie(rhs.enable, rhs.rgb_equation, rhs.src_rgb_func, rhs.dst_rgb_func, - rhs.a_equation, rhs.src_a_func, rhs.dst_a_func, rhs.components); + return raw == rhs.raw; } std::size_t FixedPipelineState::VertexInput::Hash() const noexcept { @@ -190,16 +177,15 @@ bool FixedPipelineState::DepthStencil::operator==(const DepthStencil& rhs) const } std::size_t FixedPipelineState::ColorBlending::Hash() const noexcept { - std::size_t hash = attachments_count << 13; - for (std::size_t rt = 0; rt < static_cast(attachments_count); ++rt) { + std::size_t hash = 0; + for (std::size_t rt = 0; rt < std::size(attachments); ++rt) { boost::hash_combine(hash, attachments[rt].Hash()); } return hash; } bool FixedPipelineState::ColorBlending::operator==(const ColorBlending& rhs) const noexcept { - return std::equal(attachments.begin(), attachments.begin() + attachments_count, - rhs.attachments.begin(), rhs.attachments.begin() + rhs.attachments_count); + return attachments == rhs.attachments; } std::size_t FixedPipelineState::Hash() const noexcept { @@ -220,7 +206,7 @@ FixedPipelineState GetFixedPipelineState(const Maxwell& regs) { FixedPipelineState fixed_state; fixed_state.rasterizer.Fill(regs); fixed_state.depth_stencil.Fill(regs); - fixed_state.color_blending = GetColorBlendingState(regs); + fixed_state.color_blending.Fill(regs); return fixed_state; } @@ -312,4 +298,121 @@ Maxwell::LogicOperation FixedPipelineState::UnpackLogicOp(u32 packed) noexcept { return static_cast(packed + 0x1500); } +u32 FixedPipelineState::PackBlendEquation(Maxwell::Blend::Equation equation) noexcept { + switch (equation) { + case Maxwell::Blend::Equation::Add: + case Maxwell::Blend::Equation::AddGL: + return 0; + case Maxwell::Blend::Equation::Subtract: + case Maxwell::Blend::Equation::SubtractGL: + return 1; + case Maxwell::Blend::Equation::ReverseSubtract: + case Maxwell::Blend::Equation::ReverseSubtractGL: + return 2; + case Maxwell::Blend::Equation::Min: + case Maxwell::Blend::Equation::MinGL: + return 3; + case Maxwell::Blend::Equation::Max: + case Maxwell::Blend::Equation::MaxGL: + return 4; + } + return 0; +} + +Maxwell::Blend::Equation FixedPipelineState::UnpackBlendEquation(u32 packed) noexcept { + static constexpr std::array LUT = { + Maxwell::Blend::Equation::Add, Maxwell::Blend::Equation::Subtract, + Maxwell::Blend::Equation::ReverseSubtract, Maxwell::Blend::Equation::Min, + Maxwell::Blend::Equation::Max}; + return LUT[packed]; +} + +u32 FixedPipelineState::PackBlendFactor(Maxwell::Blend::Factor factor) noexcept { + switch (factor) { + case Maxwell::Blend::Factor::Zero: + case Maxwell::Blend::Factor::ZeroGL: + return 0; + case Maxwell::Blend::Factor::One: + case Maxwell::Blend::Factor::OneGL: + return 1; + case Maxwell::Blend::Factor::SourceColor: + case Maxwell::Blend::Factor::SourceColorGL: + return 2; + case Maxwell::Blend::Factor::OneMinusSourceColor: + case Maxwell::Blend::Factor::OneMinusSourceColorGL: + return 3; + case Maxwell::Blend::Factor::SourceAlpha: + case Maxwell::Blend::Factor::SourceAlphaGL: + return 4; + case Maxwell::Blend::Factor::OneMinusSourceAlpha: + case Maxwell::Blend::Factor::OneMinusSourceAlphaGL: + return 5; + case Maxwell::Blend::Factor::DestAlpha: + case Maxwell::Blend::Factor::DestAlphaGL: + return 6; + case Maxwell::Blend::Factor::OneMinusDestAlpha: + case Maxwell::Blend::Factor::OneMinusDestAlphaGL: + return 7; + case Maxwell::Blend::Factor::DestColor: + case Maxwell::Blend::Factor::DestColorGL: + return 8; + case Maxwell::Blend::Factor::OneMinusDestColor: + case Maxwell::Blend::Factor::OneMinusDestColorGL: + return 9; + case Maxwell::Blend::Factor::SourceAlphaSaturate: + case Maxwell::Blend::Factor::SourceAlphaSaturateGL: + return 10; + case Maxwell::Blend::Factor::Source1Color: + case Maxwell::Blend::Factor::Source1ColorGL: + return 11; + case Maxwell::Blend::Factor::OneMinusSource1Color: + case Maxwell::Blend::Factor::OneMinusSource1ColorGL: + return 12; + case Maxwell::Blend::Factor::Source1Alpha: + case Maxwell::Blend::Factor::Source1AlphaGL: + return 13; + case Maxwell::Blend::Factor::OneMinusSource1Alpha: + case Maxwell::Blend::Factor::OneMinusSource1AlphaGL: + return 14; + case Maxwell::Blend::Factor::ConstantColor: + case Maxwell::Blend::Factor::ConstantColorGL: + return 15; + case Maxwell::Blend::Factor::OneMinusConstantColor: + case Maxwell::Blend::Factor::OneMinusConstantColorGL: + return 16; + case Maxwell::Blend::Factor::ConstantAlpha: + case Maxwell::Blend::Factor::ConstantAlphaGL: + return 17; + case Maxwell::Blend::Factor::OneMinusConstantAlpha: + case Maxwell::Blend::Factor::OneMinusConstantAlphaGL: + return 18; + } + return 0; +} + +Maxwell::Blend::Factor FixedPipelineState::UnpackBlendFactor(u32 packed) noexcept { + static constexpr std::array LUT = { + Maxwell::Blend::Factor::Zero, + Maxwell::Blend::Factor::One, + Maxwell::Blend::Factor::SourceColor, + Maxwell::Blend::Factor::OneMinusSourceColor, + Maxwell::Blend::Factor::SourceAlpha, + Maxwell::Blend::Factor::OneMinusSourceAlpha, + Maxwell::Blend::Factor::DestAlpha, + Maxwell::Blend::Factor::OneMinusDestAlpha, + Maxwell::Blend::Factor::DestColor, + Maxwell::Blend::Factor::OneMinusDestColor, + Maxwell::Blend::Factor::SourceAlphaSaturate, + Maxwell::Blend::Factor::Source1Color, + Maxwell::Blend::Factor::OneMinusSource1Color, + Maxwell::Blend::Factor::Source1Alpha, + Maxwell::Blend::Factor::OneMinusSource1Alpha, + Maxwell::Blend::Factor::ConstantColor, + Maxwell::Blend::Factor::OneMinusConstantColor, + Maxwell::Blend::Factor::ConstantAlpha, + Maxwell::Blend::Factor::OneMinusConstantAlpha, + }; + return LUT[packed]; +} + } // namespace Vulkan diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h index 75b093e90..9393cb24c 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h @@ -42,27 +42,29 @@ struct FixedPipelineState { static u32 PackLogicOp(Maxwell::LogicOperation op) noexcept; static Maxwell::LogicOperation UnpackLogicOp(u32 packed) noexcept; - struct BlendingAttachment { - constexpr BlendingAttachment(bool enable, Maxwell::Blend::Equation rgb_equation, - Maxwell::Blend::Factor src_rgb_func, - Maxwell::Blend::Factor dst_rgb_func, - Maxwell::Blend::Equation a_equation, - Maxwell::Blend::Factor src_a_func, - Maxwell::Blend::Factor dst_a_func, - std::array components) - : enable{enable}, rgb_equation{rgb_equation}, src_rgb_func{src_rgb_func}, - dst_rgb_func{dst_rgb_func}, a_equation{a_equation}, src_a_func{src_a_func}, - dst_a_func{dst_a_func}, components{components} {} - BlendingAttachment() = default; + static u32 PackBlendEquation(Maxwell::Blend::Equation equation) noexcept; + static Maxwell::Blend::Equation UnpackBlendEquation(u32 packed) noexcept; - bool enable; - Maxwell::Blend::Equation rgb_equation; - Maxwell::Blend::Factor src_rgb_func; - Maxwell::Blend::Factor dst_rgb_func; - Maxwell::Blend::Equation a_equation; - Maxwell::Blend::Factor src_a_func; - Maxwell::Blend::Factor dst_a_func; - std::array components; + static u32 PackBlendFactor(Maxwell::Blend::Factor factor) noexcept; + static Maxwell::Blend::Factor UnpackBlendFactor(u32 packed) noexcept; + + struct BlendingAttachment { + union { + u32 raw; + BitField<0, 1, u32> mask_r; + BitField<1, 1, u32> mask_g; + BitField<2, 1, u32> mask_b; + BitField<3, 1, u32> mask_a; + BitField<4, 3, u32> equation_rgb; + BitField<7, 3, u32> equation_a; + BitField<10, 5, u32> factor_source_rgb; + BitField<15, 5, u32> factor_dest_rgb; + BitField<20, 5, u32> factor_source_a; + BitField<25, 5, u32> factor_dest_a; + BitField<30, 1, u32> enable; + }; + + void Fill(const Maxwell& regs, std::size_t index); std::size_t Hash() const noexcept; @@ -71,7 +73,36 @@ struct FixedPipelineState { bool operator!=(const BlendingAttachment& rhs) const noexcept { return !operator==(rhs); } + + constexpr std::array Mask() const noexcept { + return {mask_r != 0, mask_g != 0, mask_b != 0, mask_a != 0}; + } + + Maxwell::Blend::Equation EquationRGB() const noexcept { + return UnpackBlendEquation(equation_rgb.Value()); + } + + Maxwell::Blend::Equation EquationAlpha() const noexcept { + return UnpackBlendEquation(equation_a.Value()); + } + + Maxwell::Blend::Factor SourceRGBFactor() const noexcept { + return UnpackBlendFactor(factor_source_rgb.Value()); + } + + Maxwell::Blend::Factor DestRGBFactor() const noexcept { + return UnpackBlendFactor(factor_dest_rgb.Value()); + } + + Maxwell::Blend::Factor SourceAlphaFactor() const noexcept { + return UnpackBlendFactor(factor_source_a.Value()); + } + + Maxwell::Blend::Factor DestAlphaFactor() const noexcept { + return UnpackBlendFactor(factor_dest_a.Value()); + } }; + static_assert(IsHashable); struct VertexInput { union Binding { @@ -231,15 +262,10 @@ struct FixedPipelineState { static_assert(IsHashable); struct ColorBlending { - constexpr ColorBlending( - std::array blend_constants, std::size_t attachments_count, - std::array attachments) - : attachments_count{attachments_count}, attachments{attachments} {} - ColorBlending() = default; - - std::size_t attachments_count; std::array attachments; + void Fill(const Maxwell& regs) noexcept; + std::size_t Hash() const noexcept; bool operator==(const ColorBlending& rhs) const noexcept; @@ -248,6 +274,7 @@ struct FixedPipelineState { return !operator==(rhs); } }; + static_assert(IsHashable); VertexInput vertex_input; Rasterizer rasterizer; diff --git a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp index e12c26076..343999cf5 100644 --- a/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_graphics_pipeline.cpp @@ -286,29 +286,28 @@ vk::Pipeline VKGraphicsPipeline::CreatePipeline(const RenderPassParams& renderpa depth_stencil_ci.maxDepthBounds = 0.0f; std::array cb_attachments; - const std::size_t num_attachments = - std::min(cd.attachments_count, renderpass_params.color_attachments.size()); - for (std::size_t i = 0; i < num_attachments; ++i) { - static constexpr std::array component_table = { + const std::size_t num_attachments = renderpass_params.color_attachments.size(); + for (std::size_t index = 0; index < num_attachments; ++index) { + static constexpr std::array COMPONENT_TABLE = { VK_COLOR_COMPONENT_R_BIT, VK_COLOR_COMPONENT_G_BIT, VK_COLOR_COMPONENT_B_BIT, VK_COLOR_COMPONENT_A_BIT}; - const auto& blend = cd.attachments[i]; + const auto& blend = cd.attachments[index]; VkColorComponentFlags color_components = 0; - for (std::size_t j = 0; j < component_table.size(); ++j) { - if (blend.components[j]) { - color_components |= component_table[j]; + for (std::size_t i = 0; i < COMPONENT_TABLE.size(); ++i) { + if (blend.Mask()[i]) { + color_components |= COMPONENT_TABLE[i]; } } - VkPipelineColorBlendAttachmentState& attachment = cb_attachments[i]; - attachment.blendEnable = blend.enable; - attachment.srcColorBlendFactor = MaxwellToVK::BlendFactor(blend.src_rgb_func); - attachment.dstColorBlendFactor = MaxwellToVK::BlendFactor(blend.dst_rgb_func); - attachment.colorBlendOp = MaxwellToVK::BlendEquation(blend.rgb_equation); - attachment.srcAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.src_a_func); - attachment.dstAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.dst_a_func); - attachment.alphaBlendOp = MaxwellToVK::BlendEquation(blend.a_equation); + VkPipelineColorBlendAttachmentState& attachment = cb_attachments[index]; + attachment.blendEnable = blend.enable != 0; + attachment.srcColorBlendFactor = MaxwellToVK::BlendFactor(blend.SourceRGBFactor()); + attachment.dstColorBlendFactor = MaxwellToVK::BlendFactor(blend.DestRGBFactor()); + attachment.colorBlendOp = MaxwellToVK::BlendEquation(blend.EquationRGB()); + attachment.srcAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.SourceAlphaFactor()); + attachment.dstAlphaBlendFactor = MaxwellToVK::BlendFactor(blend.DestAlphaFactor()); + attachment.alphaBlendOp = MaxwellToVK::BlendEquation(blend.EquationAlpha()); attachment.colorWriteMask = color_components; } From d62f57cf5af7f329be618c0766d59ded55ff53b3 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sat, 18 Apr 2020 05:55:49 -0300 Subject: [PATCH 5/5] fixed_pipeline_state: Hash and compare the whole structure Pad FixedPipelineState's size to 384 bytes to be a multiple of 16. Compare the whole struct with std::memcmp and hash with CityHash. Using CityHash instead of a naive hash should reduce the number of collisions. Improve used type traits to ensure this operation is safe. With these changes the improvements to the hashable pipeline state are: Optimized structure Hash: 89 ns Comparison: 103 ns Construction*: 164 ns Struct size: 384 bytes Original structure Hash: 148 ns Equal: 174 ns Construction*: 281 ns Size: 1384 bytes * Attribute state initialization is not measured These measures are averages taken with std::chrono::high_accuracy_clock on MSVC shipped on Visual Studio 16.6.0 Preview 2.1. --- .../renderer_vulkan/fixed_pipeline_state.cpp | 60 ++----------------- .../renderer_vulkan/fixed_pipeline_state.h | 54 ++--------------- 2 files changed, 9 insertions(+), 105 deletions(-) diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp index 2b053ea74..be1c31978 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.cpp @@ -140,66 +140,13 @@ void FixedPipelineState::BlendingAttachment::Fill(const Maxwell& regs, std::size enable.Assign(1); } -std::size_t FixedPipelineState::BlendingAttachment::Hash() const noexcept { - return raw; -} - -bool FixedPipelineState::BlendingAttachment::operator==(const BlendingAttachment& rhs) const - noexcept { - return raw == rhs.raw; -} - -std::size_t FixedPipelineState::VertexInput::Hash() const noexcept { - // TODO(Rodrigo): Replace this - return Common::CityHash64(reinterpret_cast(this), sizeof *this); -} - -bool FixedPipelineState::VertexInput::operator==(const VertexInput& rhs) const noexcept { - return std::memcmp(this, &rhs, sizeof *this) == 0; -} - -std::size_t FixedPipelineState::Rasterizer::Hash() const noexcept { - u64 hash = static_cast(raw) << 32; - std::memcpy(&hash, &point_size, sizeof(u32)); +std::size_t FixedPipelineState::Hash() const noexcept { + const u64 hash = Common::CityHash64(reinterpret_cast(this), sizeof *this); return static_cast(hash); } -bool FixedPipelineState::Rasterizer::operator==(const Rasterizer& rhs) const noexcept { - return raw == rhs.raw && point_size == rhs.point_size; -} - -std::size_t FixedPipelineState::DepthStencil::Hash() const noexcept { - return raw; -} - -bool FixedPipelineState::DepthStencil::operator==(const DepthStencil& rhs) const noexcept { - return raw == rhs.raw; -} - -std::size_t FixedPipelineState::ColorBlending::Hash() const noexcept { - std::size_t hash = 0; - for (std::size_t rt = 0; rt < std::size(attachments); ++rt) { - boost::hash_combine(hash, attachments[rt].Hash()); - } - return hash; -} - -bool FixedPipelineState::ColorBlending::operator==(const ColorBlending& rhs) const noexcept { - return attachments == rhs.attachments; -} - -std::size_t FixedPipelineState::Hash() const noexcept { - std::size_t hash = 0; - boost::hash_combine(hash, vertex_input.Hash()); - boost::hash_combine(hash, rasterizer.Hash()); - boost::hash_combine(hash, depth_stencil.Hash()); - boost::hash_combine(hash, color_blending.Hash()); - return hash; -} - bool FixedPipelineState::operator==(const FixedPipelineState& rhs) const noexcept { - return std::tie(vertex_input, rasterizer, depth_stencil, color_blending) == - std::tie(rhs.vertex_input, rhs.rasterizer, rhs.depth_stencil, rhs.color_blending); + return std::memcmp(this, &rhs, sizeof *this) == 0; } FixedPipelineState GetFixedPipelineState(const Maxwell& regs) { @@ -207,6 +154,7 @@ FixedPipelineState GetFixedPipelineState(const Maxwell& regs) { fixed_state.rasterizer.Fill(regs); fixed_state.depth_stencil.Fill(regs); fixed_state.color_blending.Fill(regs); + fixed_state.padding = {}; return fixed_state; } diff --git a/src/video_core/renderer_vulkan/fixed_pipeline_state.h b/src/video_core/renderer_vulkan/fixed_pipeline_state.h index 9393cb24c..9fe6bdbf9 100644 --- a/src/video_core/renderer_vulkan/fixed_pipeline_state.h +++ b/src/video_core/renderer_vulkan/fixed_pipeline_state.h @@ -17,13 +17,7 @@ namespace Vulkan { using Maxwell = Tegra::Engines::Maxwell3D::Regs; -// TODO(Rodrigo): Optimize this structure. - -template -inline constexpr bool IsHashable = std::has_unique_object_representations_v&& - std::is_trivially_copyable_v&& std::is_trivially_constructible_v; - -struct FixedPipelineState { +struct alignas(32) FixedPipelineState { static u32 PackComparisonOp(Maxwell::ComparisonOp op) noexcept; static Maxwell::ComparisonOp UnpackComparisonOp(u32 packed) noexcept; @@ -102,7 +96,6 @@ struct FixedPipelineState { return UnpackBlendFactor(factor_dest_a.Value()); } }; - static_assert(IsHashable); struct VertexInput { union Binding { @@ -151,16 +144,7 @@ struct FixedPipelineState { attribute.type.Assign(static_cast(type)); attribute.size.Assign(static_cast(size)); } - - std::size_t Hash() const noexcept; - - bool operator==(const VertexInput& rhs) const noexcept; - - bool operator!=(const VertexInput& rhs) const noexcept { - return !operator==(rhs); - } }; - static_assert(IsHashable); struct Rasterizer { union { @@ -187,14 +171,6 @@ struct FixedPipelineState { void Fill(const Maxwell& regs) noexcept; - std::size_t Hash() const noexcept; - - bool operator==(const Rasterizer& rhs) const noexcept; - - bool operator!=(const Rasterizer& rhs) const noexcept { - return !operator==(rhs); - } - constexpr Maxwell::PrimitiveTopology Topology() const noexcept { return static_cast(topology.Value()); } @@ -207,7 +183,6 @@ struct FixedPipelineState { return UnpackFrontFace(front_face.Value()); } }; - static_assert(IsHashable); struct DepthStencil { template @@ -247,39 +222,22 @@ struct FixedPipelineState { void Fill(const Maxwell& regs) noexcept; - std::size_t Hash() const noexcept; - - bool operator==(const DepthStencil& rhs) const noexcept; - - bool operator!=(const DepthStencil& rhs) const noexcept { - return !operator==(rhs); - } - Maxwell::ComparisonOp DepthTestFunc() const noexcept { return UnpackComparisonOp(depth_test_func); } }; - static_assert(IsHashable); struct ColorBlending { std::array attachments; void Fill(const Maxwell& regs) noexcept; - - std::size_t Hash() const noexcept; - - bool operator==(const ColorBlending& rhs) const noexcept; - - bool operator!=(const ColorBlending& rhs) const noexcept { - return !operator==(rhs); - } }; - static_assert(IsHashable); VertexInput vertex_input; Rasterizer rasterizer; DepthStencil depth_stencil; ColorBlending color_blending; + std::array padding; std::size_t Hash() const noexcept; @@ -289,12 +247,10 @@ struct FixedPipelineState { return !operator==(rhs); } }; -static_assert(std::is_trivially_copyable_v); -static_assert(std::is_trivially_copyable_v); -static_assert(std::is_trivially_copyable_v); -static_assert(std::is_trivially_copyable_v); -static_assert(std::is_trivially_copyable_v); +static_assert(std::has_unique_object_representations_v); static_assert(std::is_trivially_copyable_v); +static_assert(std::is_trivially_constructible_v); +static_assert(sizeof(FixedPipelineState) % 32 == 0, "Size is not aligned"); FixedPipelineState GetFixedPipelineState(const Maxwell& regs);