From dec3c981d0f4310243c0a5a77fca89df44fea0e3 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 26 May 2019 01:33:57 -0300 Subject: [PATCH 1/3] vk_device: Enable features when available and misc changes Keeps track of native ASTC support, VK_EXT_scalar_block_layout availability and SSBO range. Check for independentBlend and vertexPipelineStorageAndAtomics as a required feature. Always enable it. Use vk::to_string format to log Vulkan enums. Style changes. --- src/video_core/renderer_vulkan/vk_device.cpp | 136 +++++++++++++++---- src/video_core/renderer_vulkan/vk_device.h | 58 ++++++-- 2 files changed, 151 insertions(+), 43 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp index 00242ecbe..f39985c40 100644 --- a/src/video_core/renderer_vulkan/vk_device.cpp +++ b/src/video_core/renderer_vulkan/vk_device.cpp @@ -18,6 +18,7 @@ constexpr std::array Depth24UnormS8Uint = { vk::Format::eD32SfloatS8Uint, vk::Format::eD16UnormS8Uint, {}}; constexpr std::array Depth16UnormS8Uint = { vk::Format::eD24UnormS8Uint, vk::Format::eD32SfloatS8Uint, {}}; +constexpr std::array Astc = {vk::Format::eA8B8G8R8UnormPack32, {}}; } // namespace Alternatives @@ -51,15 +52,19 @@ VKDevice::VKDevice(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice phy : physical{physical}, format_properties{GetFormatProperties(dldi, physical)} { SetupFamilies(dldi, surface); SetupProperties(dldi); + SetupFeatures(dldi); } VKDevice::~VKDevice() = default; bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instance) { - const auto queue_cis = GetDeviceQueueCreateInfos(); - vk::PhysicalDeviceFeatures device_features{}; + vk::PhysicalDeviceFeatures device_features; + device_features.vertexPipelineStoresAndAtomics = true; + device_features.independentBlend = true; + device_features.textureCompressionASTC_LDR = is_optimal_astc_supported; - const std::vector extensions = {VK_KHR_SWAPCHAIN_EXTENSION_NAME}; + const auto queue_cis = GetDeviceQueueCreateInfos(); + const std::vector extensions = LoadExtensions(dldi); const vk::DeviceCreateInfo device_ci({}, static_cast(queue_cis.size()), queue_cis.data(), 0, nullptr, static_cast(extensions.size()), extensions.data(), &device_features); @@ -90,7 +95,7 @@ vk::Format VKDevice::GetSupportedFormat(vk::Format wanted_format, LOG_CRITICAL(Render_Vulkan, "Format={} with usage={} and type={} has no defined alternatives and host " "hardware does not support it", - static_cast(wanted_format), static_cast(wanted_usage), + vk::to_string(wanted_format), vk::to_string(wanted_usage), static_cast(format_type)); UNREACHABLE(); return wanted_format; @@ -118,6 +123,30 @@ vk::Format VKDevice::GetSupportedFormat(vk::Format wanted_format, return wanted_format; } +bool VKDevice::IsOptimalAstcSupported(const vk::PhysicalDeviceFeatures& features, + const vk::DispatchLoaderDynamic& dldi) const { + if (!features.textureCompressionASTC_LDR) { + return false; + } + const auto format_feature_usage{ + vk::FormatFeatureFlagBits::eSampledImage | vk::FormatFeatureFlagBits::eBlitSrc | + vk::FormatFeatureFlagBits::eBlitDst | vk::FormatFeatureFlagBits::eTransferSrc | + vk::FormatFeatureFlagBits::eTransferDst}; + constexpr std::array astc_formats = { + vk::Format::eAstc4x4UnormBlock, vk::Format::eAstc4x4SrgbBlock, + vk::Format::eAstc8x8SrgbBlock, vk::Format::eAstc8x6SrgbBlock, + vk::Format::eAstc5x4SrgbBlock, vk::Format::eAstc5x5UnormBlock, + vk::Format::eAstc5x5SrgbBlock, vk::Format::eAstc10x8UnormBlock, + vk::Format::eAstc10x8SrgbBlock}; + for (const auto format : astc_formats) { + const auto format_properties{physical.getFormatProperties(format, dldi)}; + if (!(format_properties.optimalTilingFeatures & format_feature_usage)) { + return false; + } + } + return true; +} + bool VKDevice::IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage, FormatType format_type) const { const auto it = format_properties.find(wanted_format); @@ -132,11 +161,9 @@ bool VKDevice::IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlag bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical, vk::SurfaceKHR surface) { - const std::string swapchain_extension = VK_KHR_SWAPCHAIN_EXTENSION_NAME; - bool has_swapchain{}; for (const auto& prop : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) { - has_swapchain |= prop.extensionName == swapchain_extension; + has_swapchain |= prop.extensionName == std::string(VK_KHR_SWAPCHAIN_EXTENSION_NAME); } if (!has_swapchain) { // The device doesn't support creating swapchains. @@ -160,8 +187,14 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev } // TODO(Rodrigo): Check if the device matches all requeriments. - const vk::PhysicalDeviceProperties props = physical.getProperties(dldi); - if (props.limits.maxUniformBufferRange < 65536) { + const auto properties{physical.getProperties(dldi)}; + const auto limits{properties.limits}; + if (limits.maxUniformBufferRange < 65536) { + return false; + } + + const vk::PhysicalDeviceFeatures features{physical.getFeatures(dldi)}; + if (!features.vertexPipelineStoresAndAtomics || !features.independentBlend) { return false; } @@ -169,6 +202,30 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev return true; } +std::vector VKDevice::LoadExtensions(const vk::DispatchLoaderDynamic& dldi) { + std::vector extensions; + extensions.reserve(2); + extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME); + + const auto Test = [&](const vk::ExtensionProperties& extension, + std::optional> status, const char* name, + u32 revision) { + if (extension.extensionName != std::string(name)) { + return; + } + extensions.push_back(name); + if (status) { + status->get() = true; + } + }; + + for (const auto& extension : physical.enumerateDeviceExtensionProperties(nullptr, dldi)) { + Test(extension, ext_scalar_block_layout, VK_EXT_SCALAR_BLOCK_LAYOUT_EXTENSION_NAME, 1); + } + + return extensions; +} + void VKDevice::SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceKHR surface) { std::optional graphics_family_, present_family_; @@ -196,10 +253,16 @@ void VKDevice::SetupProperties(const vk::DispatchLoaderDynamic& dldi) { const vk::PhysicalDeviceProperties props = physical.getProperties(dldi); device_type = props.deviceType; uniform_buffer_alignment = static_cast(props.limits.minUniformBufferOffsetAlignment); + max_storage_buffer_range = static_cast(props.limits.maxStorageBufferRange); +} + +void VKDevice::SetupFeatures(const vk::DispatchLoaderDynamic& dldi) { + const auto supported_features{physical.getFeatures(dldi)}; + is_optimal_astc_supported = IsOptimalAstcSupported(supported_features, dldi); } std::vector VKDevice::GetDeviceQueueCreateInfos() const { - static const float QUEUE_PRIORITY = 1.f; + static const float QUEUE_PRIORITY = 1.0f; std::set unique_queue_families = {graphics_family, present_family}; std::vector queue_cis; @@ -212,26 +275,43 @@ std::vector VKDevice::GetDeviceQueueCreateInfos() con std::map VKDevice::GetFormatProperties( const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical) { + static constexpr std::array formats = {vk::Format::eA8B8G8R8UnormPack32, + vk::Format::eB5G6R5UnormPack16, + vk::Format::eA2B10G10R10UnormPack32, + vk::Format::eR32G32B32A32Sfloat, + vk::Format::eR16G16Unorm, + vk::Format::eR16G16Snorm, + vk::Format::eR8G8B8A8Srgb, + vk::Format::eR8Unorm, + vk::Format::eB10G11R11UfloatPack32, + vk::Format::eR32Sfloat, + vk::Format::eR16Sfloat, + vk::Format::eR16G16B16A16Sfloat, + vk::Format::eD32Sfloat, + vk::Format::eD16Unorm, + vk::Format::eD16UnormS8Uint, + vk::Format::eD24UnormS8Uint, + vk::Format::eD32SfloatS8Uint, + vk::Format::eBc1RgbaUnormBlock, + vk::Format::eBc2UnormBlock, + vk::Format::eBc3UnormBlock, + vk::Format::eBc4UnormBlock, + vk::Format::eBc5UnormBlock, + vk::Format::eBc5SnormBlock, + vk::Format::eBc7UnormBlock, + vk::Format::eAstc4x4UnormBlock, + vk::Format::eAstc4x4SrgbBlock, + vk::Format::eAstc8x8SrgbBlock, + vk::Format::eAstc8x6SrgbBlock, + vk::Format::eAstc5x4SrgbBlock, + vk::Format::eAstc5x5UnormBlock, + vk::Format::eAstc5x5SrgbBlock, + vk::Format::eAstc10x8UnormBlock, + vk::Format::eAstc10x8SrgbBlock}; std::map format_properties; - - const auto AddFormatQuery = [&format_properties, &dldi, physical](vk::Format format) { + for (const auto format : formats) { format_properties.emplace(format, physical.getFormatProperties(format, dldi)); - }; - AddFormatQuery(vk::Format::eA8B8G8R8UnormPack32); - AddFormatQuery(vk::Format::eB5G6R5UnormPack16); - AddFormatQuery(vk::Format::eA2B10G10R10UnormPack32); - AddFormatQuery(vk::Format::eR8G8B8A8Srgb); - AddFormatQuery(vk::Format::eR8Unorm); - AddFormatQuery(vk::Format::eD32Sfloat); - AddFormatQuery(vk::Format::eD16Unorm); - AddFormatQuery(vk::Format::eD16UnormS8Uint); - AddFormatQuery(vk::Format::eD24UnormS8Uint); - AddFormatQuery(vk::Format::eD32SfloatS8Uint); - AddFormatQuery(vk::Format::eBc1RgbaUnormBlock); - AddFormatQuery(vk::Format::eBc2UnormBlock); - AddFormatQuery(vk::Format::eBc3UnormBlock); - AddFormatQuery(vk::Format::eBc4UnormBlock); - + } return format_properties; } diff --git a/src/video_core/renderer_vulkan/vk_device.h b/src/video_core/renderer_vulkan/vk_device.h index e87c7a508..537825d8b 100644 --- a/src/video_core/renderer_vulkan/vk_device.h +++ b/src/video_core/renderer_vulkan/vk_device.h @@ -11,7 +11,7 @@ namespace Vulkan { -/// Format usage descriptor +/// Format usage descriptor. enum class FormatType { Linear, Optimal, Buffer }; /// Handles data specific to a physical device. @@ -34,12 +34,12 @@ public: vk::Format GetSupportedFormat(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage, FormatType format_type) const; - /// Returns the dispatch loader with direct function pointers of the device + /// Returns the dispatch loader with direct function pointers of the device. const vk::DispatchLoaderDynamic& GetDispatchLoader() const { return dld; } - /// Returns the logical device + /// Returns the logical device. vk::Device GetLogical() const { return logical.get(); } @@ -69,30 +69,55 @@ public: return present_family; } - /// Returns if the device is integrated with the host CPU + /// Returns if the device is integrated with the host CPU. bool IsIntegrated() const { return device_type == vk::PhysicalDeviceType::eIntegratedGpu; } - /// Returns uniform buffer alignment requeriment + /// Returns uniform buffer alignment requeriment. u64 GetUniformBufferAlignment() const { return uniform_buffer_alignment; } + /// Returns the maximum range for storage buffers. + u64 GetMaxStorageBufferRange() const { + return max_storage_buffer_range; + } + + /// Returns true if ASTC is natively supported. + bool IsOptimalAstcSupported() const { + return is_optimal_astc_supported; + } + + /// Returns true if the device supports VK_EXT_scalar_block_layout. + bool IsExtScalarBlockLayoutSupported() const { + return ext_scalar_block_layout; + } + /// Checks if the physical device is suitable. static bool IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical, vk::SurfaceKHR surface); private: + /// Loads extensions into a vector and stores available ones in this object. + std::vector LoadExtensions(const vk::DispatchLoaderDynamic& dldi); + /// Sets up queue families. void SetupFamilies(const vk::DispatchLoaderDynamic& dldi, vk::SurfaceKHR surface); /// Sets up device properties. void SetupProperties(const vk::DispatchLoaderDynamic& dldi); + /// Sets up device features. + void SetupFeatures(const vk::DispatchLoaderDynamic& dldi); + /// Returns a list of queue initialization descriptors. std::vector GetDeviceQueueCreateInfos() const; + /// Returns true if ASTC textures are natively supported. + bool IsOptimalAstcSupported(const vk::PhysicalDeviceFeatures& features, + const vk::DispatchLoaderDynamic& dldi) const; + /// Returns true if a format is supported. bool IsFormatSupported(vk::Format wanted_format, vk::FormatFeatureFlags wanted_usage, FormatType format_type) const; @@ -101,16 +126,19 @@ private: static std::map GetFormatProperties( const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical); - const vk::PhysicalDevice physical; ///< Physical device - vk::DispatchLoaderDynamic dld; ///< Device function pointers - UniqueDevice logical; ///< Logical device - vk::Queue graphics_queue; ///< Main graphics queue - vk::Queue present_queue; ///< Main present queue - u32 graphics_family{}; ///< Main graphics queue family index - u32 present_family{}; ///< Main present queue family index - vk::PhysicalDeviceType device_type; ///< Physical device type - u64 uniform_buffer_alignment{}; ///< Uniform buffer alignment requeriment - std::map format_properties; ///< Format properties dictionary + const vk::PhysicalDevice physical; ///< Physical device. + vk::DispatchLoaderDynamic dld; ///< Device function pointers. + UniqueDevice logical; ///< Logical device. + vk::Queue graphics_queue; ///< Main graphics queue. + vk::Queue present_queue; ///< Main present queue. + u32 graphics_family{}; ///< Main graphics queue family index. + u32 present_family{}; ///< Main present queue family index. + vk::PhysicalDeviceType device_type; ///< Physical device type. + u64 uniform_buffer_alignment{}; ///< Uniform buffer alignment requeriment. + u64 max_storage_buffer_range{}; ///< Max storage buffer size. + bool is_optimal_astc_supported{}; ///< Support for native ASTC. + bool ext_scalar_block_layout{}; ///< Support for VK_EXT_scalar_block_layout. + std::map format_properties; ///< Format properties dictionary. }; } // namespace Vulkan From a4c5e3e339430134d9e1322622a7995bfeeee567 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 26 May 2019 01:43:07 -0300 Subject: [PATCH 2/3] vk_shader_decompiler: Misc fixes Fix missing OpSelectionMerge instruction. This caused devices loses on most hardware, Intel didn't care. Fix [-1;1] -> [0;1] depth conversions. Conditionally use VK_EXT_scalar_block_layout. This allows us to use non-std140 layouts on UBOs. Update external Vulkan headers. --- externals/Vulkan-Headers | 2 +- .../renderer_vulkan/vk_shader_decompiler.cpp | 104 ++++++++++-------- .../renderer_vulkan/vk_shader_decompiler.h | 8 +- 3 files changed, 68 insertions(+), 46 deletions(-) diff --git a/externals/Vulkan-Headers b/externals/Vulkan-Headers index 15e5c4db7..d05c8df88 160000 --- a/externals/Vulkan-Headers +++ b/externals/Vulkan-Headers @@ -1 +1 @@ -Subproject commit 15e5c4db7500b936ae758236f2e72fc1aec22020 +Subproject commit d05c8df88da98ec1ab3bc600d7f5783b4060895b diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index b61a6d170..2fb368014 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -17,6 +17,7 @@ #include "video_core/engines/maxwell_3d.h" #include "video_core/engines/shader_bytecode.h" #include "video_core/engines/shader_header.h" +#include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_shader_decompiler.h" #include "video_core/shader/shader_ir.h" @@ -33,7 +34,8 @@ using ShaderStage = Tegra::Engines::Maxwell3D::Regs::ShaderStage; using Operation = const OperationNode&; // TODO(Rodrigo): Use rasterizer's value -constexpr u32 MAX_CONSTBUFFER_ELEMENTS = 0x1000; +constexpr u32 MAX_CONSTBUFFER_FLOATS = 0x4000; +constexpr u32 MAX_CONSTBUFFER_ELEMENTS = MAX_CONSTBUFFER_FLOATS / 4; constexpr u32 STAGE_BINDING_STRIDE = 0x100; enum class Type { Bool, Bool2, Float, Int, Uint, HalfFloat }; @@ -87,8 +89,8 @@ bool IsPrecise(Operation operand) { class SPIRVDecompiler : public Sirit::Module { public: - explicit SPIRVDecompiler(const ShaderIR& ir, ShaderStage stage) - : Module(0x00010300), ir{ir}, stage{stage}, header{ir.GetHeader()} { + explicit SPIRVDecompiler(const VKDevice& device, const ShaderIR& ir, ShaderStage stage) + : Module(0x00010300), device{device}, ir{ir}, stage{stage}, header{ir.GetHeader()} { AddCapability(spv::Capability::Shader); AddExtension("SPV_KHR_storage_buffer_storage_class"); AddExtension("SPV_KHR_variable_pointers"); @@ -195,7 +197,9 @@ public: entries.samplers.emplace_back(sampler); } for (const auto& attribute : ir.GetInputAttributes()) { - entries.attributes.insert(GetGenericAttributeLocation(attribute)); + if (IsGenericAttribute(attribute)) { + entries.attributes.insert(GetGenericAttributeLocation(attribute)); + } } entries.clip_distances = ir.GetClipDistances(); entries.shader_length = ir.GetLength(); @@ -210,7 +214,6 @@ private: std::array(OperationCode::Amount)>; static constexpr auto INTERNAL_FLAGS_COUNT = static_cast(InternalFlag::Amount); - static constexpr u32 CBUF_STRIDE = 16; void AllocateBindings() { const u32 binding_base = static_cast(stage) * STAGE_BINDING_STRIDE; @@ -315,6 +318,7 @@ private: constexpr std::array names = {"zero", "sign", "carry", "overflow"}; for (std::size_t flag = 0; flag < INTERNAL_FLAGS_COUNT; ++flag) { + const auto flag_code = static_cast(flag); const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false); internal_flags[flag] = AddGlobalVariable(Name(id, names[flag])); } @@ -374,7 +378,9 @@ private: u32 binding = const_buffers_base_binding; for (const auto& entry : ir.GetConstantBuffers()) { const auto [index, size] = entry; - const Id id = OpVariable(t_cbuf_ubo, spv::StorageClass::Uniform); + const Id type = + device.IsExtScalarBlockLayoutSupported() ? t_cbuf_scalar_ubo : t_cbuf_std140_ubo; + const Id id = OpVariable(type, spv::StorageClass::Uniform); AddGlobalVariable(Name(id, fmt::format("cbuf_{}", index))); Decorate(id, spv::Decoration::Binding, binding++); @@ -569,33 +575,35 @@ private: const Node offset = cbuf->GetOffset(); const Id buffer_id = constant_buffers.at(cbuf->GetIndex()); - Id buffer_index{}; - Id buffer_element{}; - - if (const auto immediate = std::get_if(offset)) { - // Direct access - const u32 offset_imm = immediate->GetValue(); - ASSERT(offset_imm % 4 == 0); - buffer_index = Constant(t_uint, offset_imm / 16); - buffer_element = Constant(t_uint, (offset_imm / 4) % 4); - - } else if (std::holds_alternative(*offset)) { - // Indirect access - // TODO(Rodrigo): Use a uniform buffer stride of 4 and drop this slow math (which - // emits sub-optimal code on GLSL from my testing). - const Id offset_id = BitcastTo(Visit(offset)); - const Id unsafe_offset = Emit(OpUDiv(t_uint, offset_id, Constant(t_uint, 4))); - const Id final_offset = Emit( - OpUMod(t_uint, unsafe_offset, Constant(t_uint, MAX_CONSTBUFFER_ELEMENTS - 1))); - buffer_index = Emit(OpUDiv(t_uint, final_offset, Constant(t_uint, 4))); - buffer_element = Emit(OpUMod(t_uint, final_offset, Constant(t_uint, 4))); - + Id pointer{}; + if (device.IsExtScalarBlockLayoutSupported()) { + const Id buffer_offset = Emit(OpShiftRightLogical( + t_uint, BitcastTo(Visit(offset)), Constant(t_uint, 2u))); + pointer = Emit( + OpAccessChain(t_cbuf_float, buffer_id, Constant(t_uint, 0u), buffer_offset)); } else { - UNREACHABLE_MSG("Unmanaged offset node type"); + Id buffer_index{}; + Id buffer_element{}; + if (const auto immediate = std::get_if(offset)) { + // Direct access + const u32 offset_imm = immediate->GetValue(); + ASSERT(offset_imm % 4 == 0); + buffer_index = Constant(t_uint, offset_imm / 16); + buffer_element = Constant(t_uint, (offset_imm / 4) % 4); + } else if (std::holds_alternative(*offset)) { + // Indirect access + const Id offset_id = BitcastTo(Visit(offset)); + const Id unsafe_offset = Emit(OpUDiv(t_uint, offset_id, Constant(t_uint, 4))); + const Id final_offset = Emit(OpUMod( + t_uint, unsafe_offset, Constant(t_uint, MAX_CONSTBUFFER_ELEMENTS - 1))); + buffer_index = Emit(OpUDiv(t_uint, final_offset, Constant(t_uint, 4))); + buffer_element = Emit(OpUMod(t_uint, final_offset, Constant(t_uint, 4))); + } else { + UNREACHABLE_MSG("Unmanaged offset node type"); + } + pointer = Emit(OpAccessChain(t_cbuf_float, buffer_id, Constant(t_uint, 0), + buffer_index, buffer_element)); } - - const Id pointer = Emit(OpAccessChain(t_cbuf_float, buffer_id, Constant(t_uint, 0), - buffer_index, buffer_element)); return Emit(OpLoad(t_float, pointer)); } else if (const auto gmem = std::get_if(node)) { @@ -612,7 +620,9 @@ private: // It's invalid to call conditional on nested nodes, use an operation instead const Id true_label = OpLabel(); const Id skip_label = OpLabel(); - Emit(OpBranchConditional(Visit(conditional->GetCondition()), true_label, skip_label)); + const Id condition = Visit(conditional->GetCondition()); + Emit(OpSelectionMerge(skip_label, spv::SelectionControlMask::MaskNone)); + Emit(OpBranchConditional(condition, true_label, skip_label)); Emit(true_label); VisitBasicBlock(conditional->GetCode()); @@ -968,11 +978,11 @@ private: case ShaderStage::Vertex: { // TODO(Rodrigo): We should use VK_EXT_depth_range_unrestricted instead, but it doesn't // seem to be working on Nvidia's drivers and Intel (mesa and blob) doesn't support it. - const Id position = AccessElement(t_float4, per_vertex, position_index); - Id depth = Emit(OpLoad(t_float, AccessElement(t_out_float, position, 2))); + const Id z_pointer = AccessElement(t_out_float, per_vertex, position_index, 2u); + Id depth = Emit(OpLoad(t_float, z_pointer)); depth = Emit(OpFAdd(t_float, depth, Constant(t_float, 1.0f))); depth = Emit(OpFMul(t_float, depth, Constant(t_float, 0.5f))); - Emit(OpStore(AccessElement(t_out_float, position, 2), depth)); + Emit(OpStore(z_pointer, depth)); break; } case ShaderStage::Fragment: { @@ -1293,6 +1303,7 @@ private: &SPIRVDecompiler::YNegate, }; + const VKDevice& device; const ShaderIR& ir; const ShaderStage stage; const Tegra::Shader::Header header; @@ -1331,12 +1342,18 @@ private: const Id t_out_float4 = Name(TypePointer(spv::StorageClass::Output, t_float4), "out_float4"); const Id t_cbuf_float = TypePointer(spv::StorageClass::Uniform, t_float); - const Id t_cbuf_array = - Decorate(Name(TypeArray(t_float4, Constant(t_uint, MAX_CONSTBUFFER_ELEMENTS)), "CbufArray"), - spv::Decoration::ArrayStride, CBUF_STRIDE); - const Id t_cbuf_struct = MemberDecorate( - Decorate(TypeStruct(t_cbuf_array), spv::Decoration::Block), 0, spv::Decoration::Offset, 0); - const Id t_cbuf_ubo = TypePointer(spv::StorageClass::Uniform, t_cbuf_struct); + const Id t_cbuf_std140 = Decorate( + Name(TypeArray(t_float4, Constant(t_uint, MAX_CONSTBUFFER_ELEMENTS)), "CbufStd140Array"), + spv::Decoration::ArrayStride, 16u); + const Id t_cbuf_scalar = Decorate( + Name(TypeArray(t_float, Constant(t_uint, MAX_CONSTBUFFER_FLOATS)), "CbufScalarArray"), + spv::Decoration::ArrayStride, 4u); + const Id t_cbuf_std140_struct = MemberDecorate( + Decorate(TypeStruct(t_cbuf_std140), spv::Decoration::Block), 0, spv::Decoration::Offset, 0); + const Id t_cbuf_scalar_struct = MemberDecorate( + Decorate(TypeStruct(t_cbuf_scalar), spv::Decoration::Block), 0, spv::Decoration::Offset, 0); + const Id t_cbuf_std140_ubo = TypePointer(spv::StorageClass::Uniform, t_cbuf_std140_struct); + const Id t_cbuf_scalar_ubo = TypePointer(spv::StorageClass::Uniform, t_cbuf_scalar_struct); const Id t_gmem_float = TypePointer(spv::StorageClass::StorageBuffer, t_float); const Id t_gmem_array = @@ -1385,8 +1402,9 @@ private: std::map labels; }; -DecompilerResult Decompile(const VideoCommon::Shader::ShaderIR& ir, Maxwell::ShaderStage stage) { - auto decompiler = std::make_unique(ir, stage); +DecompilerResult Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir, + Maxwell::ShaderStage stage) { + auto decompiler = std::make_unique(device, ir, stage); decompiler->Decompile(); return {std::move(decompiler), decompiler->GetShaderEntries()}; } diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.h b/src/video_core/renderer_vulkan/vk_shader_decompiler.h index 329d8fa38..f90541cc1 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.h +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.h @@ -20,10 +20,13 @@ namespace VideoCommon::Shader { class ShaderIR; } +namespace Vulkan { +class VKDevice; +} + namespace Vulkan::VKShader { using Maxwell = Tegra::Engines::Maxwell3D::Regs; - using SamplerEntry = VideoCommon::Shader::Sampler; constexpr u32 DESCRIPTOR_SET = 0; @@ -75,6 +78,7 @@ struct ShaderEntries { using DecompilerResult = std::pair, ShaderEntries>; -DecompilerResult Decompile(const VideoCommon::Shader::ShaderIR& ir, Maxwell::ShaderStage stage); +DecompilerResult Decompile(const VKDevice& device, const VideoCommon::Shader::ShaderIR& ir, + Maxwell::ShaderStage stage); } // namespace Vulkan::VKShader From f424b460369839a6950749babd5cc3e7cf9635dd Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Sun, 26 May 2019 03:09:06 -0300 Subject: [PATCH 3/3] vk_device: Let formats array type be deduced --- src/video_core/renderer_vulkan/vk_device.cpp | 66 ++++++++++---------- 1 file changed, 33 insertions(+), 33 deletions(-) diff --git a/src/video_core/renderer_vulkan/vk_device.cpp b/src/video_core/renderer_vulkan/vk_device.cpp index f39985c40..3b966ddc3 100644 --- a/src/video_core/renderer_vulkan/vk_device.cpp +++ b/src/video_core/renderer_vulkan/vk_device.cpp @@ -275,39 +275,39 @@ std::vector VKDevice::GetDeviceQueueCreateInfos() con std::map VKDevice::GetFormatProperties( const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDevice physical) { - static constexpr std::array formats = {vk::Format::eA8B8G8R8UnormPack32, - vk::Format::eB5G6R5UnormPack16, - vk::Format::eA2B10G10R10UnormPack32, - vk::Format::eR32G32B32A32Sfloat, - vk::Format::eR16G16Unorm, - vk::Format::eR16G16Snorm, - vk::Format::eR8G8B8A8Srgb, - vk::Format::eR8Unorm, - vk::Format::eB10G11R11UfloatPack32, - vk::Format::eR32Sfloat, - vk::Format::eR16Sfloat, - vk::Format::eR16G16B16A16Sfloat, - vk::Format::eD32Sfloat, - vk::Format::eD16Unorm, - vk::Format::eD16UnormS8Uint, - vk::Format::eD24UnormS8Uint, - vk::Format::eD32SfloatS8Uint, - vk::Format::eBc1RgbaUnormBlock, - vk::Format::eBc2UnormBlock, - vk::Format::eBc3UnormBlock, - vk::Format::eBc4UnormBlock, - vk::Format::eBc5UnormBlock, - vk::Format::eBc5SnormBlock, - vk::Format::eBc7UnormBlock, - vk::Format::eAstc4x4UnormBlock, - vk::Format::eAstc4x4SrgbBlock, - vk::Format::eAstc8x8SrgbBlock, - vk::Format::eAstc8x6SrgbBlock, - vk::Format::eAstc5x4SrgbBlock, - vk::Format::eAstc5x5UnormBlock, - vk::Format::eAstc5x5SrgbBlock, - vk::Format::eAstc10x8UnormBlock, - vk::Format::eAstc10x8SrgbBlock}; + static constexpr std::array formats{vk::Format::eA8B8G8R8UnormPack32, + vk::Format::eB5G6R5UnormPack16, + vk::Format::eA2B10G10R10UnormPack32, + vk::Format::eR32G32B32A32Sfloat, + vk::Format::eR16G16Unorm, + vk::Format::eR16G16Snorm, + vk::Format::eR8G8B8A8Srgb, + vk::Format::eR8Unorm, + vk::Format::eB10G11R11UfloatPack32, + vk::Format::eR32Sfloat, + vk::Format::eR16Sfloat, + vk::Format::eR16G16B16A16Sfloat, + vk::Format::eD32Sfloat, + vk::Format::eD16Unorm, + vk::Format::eD16UnormS8Uint, + vk::Format::eD24UnormS8Uint, + vk::Format::eD32SfloatS8Uint, + vk::Format::eBc1RgbaUnormBlock, + vk::Format::eBc2UnormBlock, + vk::Format::eBc3UnormBlock, + vk::Format::eBc4UnormBlock, + vk::Format::eBc5UnormBlock, + vk::Format::eBc5SnormBlock, + vk::Format::eBc7UnormBlock, + vk::Format::eAstc4x4UnormBlock, + vk::Format::eAstc4x4SrgbBlock, + vk::Format::eAstc8x8SrgbBlock, + vk::Format::eAstc8x6SrgbBlock, + vk::Format::eAstc5x4SrgbBlock, + vk::Format::eAstc5x5UnormBlock, + vk::Format::eAstc5x5SrgbBlock, + vk::Format::eAstc10x8UnormBlock, + vk::Format::eAstc10x8SrgbBlock}; std::map format_properties; for (const auto format : formats) { format_properties.emplace(format, physical.getFormatProperties(format, dldi));