From 675f23aedc9a3a99925068e952cbcb3faf88296a Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 18 Sep 2019 01:07:01 -0300 Subject: [PATCH] shader/image: Implement SULD and remove irrelevant code * Implement SULD as float. * Remove conditional declaration of GL_ARB_shader_viewport_layer_array. --- src/video_core/engines/shader_bytecode.h | 2 +- src/video_core/renderer_opengl/gl_device.cpp | 22 ++++++ src/video_core/renderer_opengl/gl_device.h | 5 ++ .../renderer_opengl/gl_shader_cache.cpp | 16 ++-- .../renderer_opengl/gl_shader_decompiler.cpp | 17 ++++- .../renderer_opengl/gl_shader_decompiler.h | 1 - .../renderer_opengl/gl_shader_disk_cache.cpp | 10 --- .../renderer_vulkan/vk_shader_decompiler.cpp | 7 ++ src/video_core/shader/decode/image.cpp | 73 +++++++++++++------ src/video_core/shader/node.h | 4 +- 10 files changed, 110 insertions(+), 47 deletions(-) diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 12fb8abb7..81dfe33a5 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -1427,7 +1427,7 @@ union Instruction { ASSERT(mode == SurfaceDataMode::D_BA); return store_data_layout; } - } sust; + } suldst; union { BitField<28, 1, u64> is_ba; diff --git a/src/video_core/renderer_opengl/gl_device.cpp b/src/video_core/renderer_opengl/gl_device.cpp index 4f59a87b4..64de7e425 100644 --- a/src/video_core/renderer_opengl/gl_device.cpp +++ b/src/video_core/renderer_opengl/gl_device.cpp @@ -2,8 +2,10 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include #include #include +#include #include #include "common/logging/log.h" @@ -30,9 +32,27 @@ bool TestProgram(const GLchar* glsl) { return link_status == GL_TRUE; } +std::vector GetExtensions() { + GLint num_extensions; + glGetIntegerv(GL_NUM_EXTENSIONS, &num_extensions); + std::vector extensions; + extensions.reserve(num_extensions); + for (GLint index = 0; index < num_extensions; ++index) { + extensions.push_back( + reinterpret_cast(glGetStringi(GL_EXTENSIONS, static_cast(index)))); + } + return extensions; +} + +bool HasExtension(const std::vector& images, std::string_view extension) { + return std::find(images.begin(), images.end(), extension) != images.end(); +} + } // Anonymous namespace Device::Device() { + const std::vector extensions = GetExtensions(); + uniform_buffer_alignment = GetInteger(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT); shader_storage_alignment = GetInteger(GL_SHADER_STORAGE_BUFFER_OFFSET_ALIGNMENT); max_vertex_attributes = GetInteger(GL_MAX_VERTEX_ATTRIBS); @@ -40,6 +60,7 @@ Device::Device() { has_warp_intrinsics = GLAD_GL_NV_gpu_shader5 && GLAD_GL_NV_shader_thread_group && GLAD_GL_NV_shader_thread_shuffle; has_vertex_viewport_layer = GLAD_GL_ARB_shader_viewport_layer_array; + has_image_load_formatted = HasExtension(extensions, "GL_EXT_shader_image_load_formatted"); has_variable_aoffi = TestVariableAoffi(); has_component_indexing_bug = TestComponentIndexingBug(); has_precise_bug = TestPreciseBug(); @@ -55,6 +76,7 @@ Device::Device(std::nullptr_t) { max_varyings = 15; has_warp_intrinsics = true; has_vertex_viewport_layer = true; + has_image_load_formatted = true; has_variable_aoffi = true; has_component_indexing_bug = false; has_precise_bug = false; diff --git a/src/video_core/renderer_opengl/gl_device.h b/src/video_core/renderer_opengl/gl_device.h index ba6dcd3be..bb273c3d6 100644 --- a/src/video_core/renderer_opengl/gl_device.h +++ b/src/video_core/renderer_opengl/gl_device.h @@ -38,6 +38,10 @@ public: return has_vertex_viewport_layer; } + bool HasImageLoadFormatted() const { + return has_image_load_formatted; + } + bool HasVariableAoffi() const { return has_variable_aoffi; } @@ -61,6 +65,7 @@ private: u32 max_varyings{}; bool has_warp_intrinsics{}; bool has_vertex_viewport_layer{}; + bool has_image_load_formatted{}; bool has_variable_aoffi{}; bool has_component_indexing_bug{}; bool has_precise_bug{}; diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp index 0dbc4c02f..42ca3b1bd 100644 --- a/src/video_core/renderer_opengl/gl_shader_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp @@ -211,14 +211,14 @@ CachedProgram SpecializeShader(const std::string& code, const GLShader::ShaderEn const auto primitive_mode{variant.primitive_mode}; const auto texture_buffer_usage{variant.texture_buffer_usage}; - std::string source = "#version 430 core\n" - "#extension GL_ARB_separate_shader_objects : enable\n" - "#extension GL_NV_gpu_shader5 : enable\n" - "#extension GL_NV_shader_thread_group : enable\n" - "#extension GL_NV_shader_thread_shuffle : enable\n"; - if (entries.shader_viewport_layer_array) { - source += "#extension GL_ARB_shader_viewport_layer_array : enable\n"; - } + std::string source = R"(#version 430 core +#extension GL_ARB_separate_shader_objects : enable +#extension GL_ARB_shader_viewport_layer_array : enable +#extension GL_EXT_shader_image_load_formatted : enable +#extension GL_NV_gpu_shader5 : enable +#extension GL_NV_shader_thread_group : enable +#extension GL_NV_shader_thread_shuffle : enable +)"; if (program_type == ProgramType::Compute) { source += "#extension GL_ARB_compute_variable_group_size : require\n"; } diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 76439e7ab..70ce6572b 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -19,6 +19,7 @@ #include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_shader_decompiler.h" +#include "video_core/shader/node.h" #include "video_core/shader/shader_ir.h" namespace OpenGL::GLShader { @@ -398,8 +399,6 @@ public: usage.is_read, usage.is_written); } entries.clip_distances = ir.GetClipDistances(); - entries.shader_viewport_layer_array = - IsVertexShader(stage) && (ir.UsesLayer() || ir.UsesViewportIndex()); entries.shader_length = ir.GetLength(); return entries; } @@ -1801,6 +1800,19 @@ private: return {tmp, Type::Float}; } + Expression ImageLoad(Operation operation) { + if (!device.HasImageLoadFormatted()) { + LOG_ERROR(Render_OpenGL, + "Device lacks GL_EXT_shader_image_load_formatted, stubbing image load"); + return {"0", Type::Int}; + } + + const auto meta{std::get(operation.GetMeta())}; + return {fmt::format("imageLoad({}, {}){}", GetImage(meta.image), + BuildIntegerCoordinates(operation), GetSwizzle(meta.element)), + Type::Float}; + } + Expression ImageStore(Operation operation) { const auto meta{std::get(operation.GetMeta())}; code.AddLine("imageStore({}, {}, {});", GetImage(meta.image), @@ -2164,6 +2176,7 @@ private: &GLSLDecompiler::TextureQueryLod, &GLSLDecompiler::TexelFetch, + &GLSLDecompiler::ImageLoad, &GLSLDecompiler::ImageStore, &GLSLDecompiler::AtomicImageAdd, &GLSLDecompiler::AtomicImageMin, diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.h b/src/video_core/renderer_opengl/gl_shader_decompiler.h index 2ea02f5bf..e538dc001 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.h +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.h @@ -90,7 +90,6 @@ struct ShaderEntries { std::vector images; std::vector global_memory_entries; std::array clip_distances{}; - bool shader_viewport_layer_array{}; std::size_t shader_length{}; }; diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp index f141c4e3b..02b4dd234 100644 --- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp +++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp @@ -382,12 +382,6 @@ std::optional ShaderDiskCacheOpenGL::LoadDecompiledEn } } - bool shader_viewport_layer_array{}; - if (!LoadObjectFromPrecompiled(shader_viewport_layer_array)) { - return {}; - } - entry.entries.shader_viewport_layer_array = shader_viewport_layer_array; - u64 shader_length{}; if (!LoadObjectFromPrecompiled(shader_length)) { return {}; @@ -464,10 +458,6 @@ bool ShaderDiskCacheOpenGL::SaveDecompiledFile(u64 unique_identifier, const std: } } - if (!SaveObjectToPrecompiled(entries.shader_viewport_layer_array)) { - return false; - } - if (!SaveObjectToPrecompiled(static_cast(entries.shader_length))) { return false; } diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index f7fbbb6e4..9d31bff43 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -19,6 +19,7 @@ #include "video_core/engines/shader_header.h" #include "video_core/renderer_vulkan/vk_device.h" #include "video_core/renderer_vulkan/vk_shader_decompiler.h" +#include "video_core/shader/node.h" #include "video_core/shader/shader_ir.h" namespace Vulkan::VKShader { @@ -939,6 +940,11 @@ private: return {}; } + Id ImageLoad(Operation operation) { + UNIMPLEMENTED(); + return {}; + } + Id ImageStore(Operation operation) { UNIMPLEMENTED(); return {}; @@ -1440,6 +1446,7 @@ private: &SPIRVDecompiler::TextureQueryLod, &SPIRVDecompiler::TexelFetch, + &SPIRVDecompiler::ImageLoad, &SPIRVDecompiler::ImageStore, &SPIRVDecompiler::AtomicImageAdd, &SPIRVDecompiler::AtomicImageMin, diff --git a/src/video_core/shader/decode/image.cpp b/src/video_core/shader/decode/image.cpp index d54fb88c9..e611f9f3b 100644 --- a/src/video_core/shader/decode/image.cpp +++ b/src/video_core/shader/decode/image.cpp @@ -41,11 +41,46 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { const Instruction instr = {program_code[pc]}; const auto opcode = OpCode::Decode(instr); + const auto GetCoordinates = [this, instr](Tegra::Shader::ImageType image_type) { + std::vector coords; + const std::size_t num_coords{GetImageTypeNumCoordinates(image_type)}; + coords.reserve(num_coords); + for (std::size_t i = 0; i < num_coords; ++i) { + coords.push_back(GetRegister(instr.gpr8.Value() + i)); + } + return coords; + }; + switch (opcode->get().GetId()) { + case OpCode::Id::SULD: { + UNIMPLEMENTED_IF(instr.suldst.mode != Tegra::Shader::SurfaceDataMode::P); + UNIMPLEMENTED_IF(instr.suldst.out_of_bounds_store != + Tegra::Shader::OutOfBoundsStore::Ignore); + + const auto type{instr.suldst.image_type}; + auto& image{instr.suldst.is_immediate ? GetImage(instr.image, type) + : GetBindlessImage(instr.gpr39, type)}; + image.MarkRead(); + + u32 indexer = 0; + for (u32 element = 0; element < 4; ++element) { + if (!instr.suldst.IsComponentEnabled(element)) { + continue; + } + MetaImage meta{image, {}, element}; + Node value = Operation(OperationCode::ImageLoad, meta, GetCoordinates(type)); + SetTemporary(bb, indexer++, std::move(value)); + } + for (u32 i = 0; i < indexer; ++i) { + SetRegister(bb, instr.gpr0.Value() + i, GetTemporary(i)); + } + break; + } case OpCode::Id::SUST: { - UNIMPLEMENTED_IF(instr.sust.mode != Tegra::Shader::SurfaceDataMode::P); - UNIMPLEMENTED_IF(instr.sust.out_of_bounds_store != Tegra::Shader::OutOfBoundsStore::Ignore); - UNIMPLEMENTED_IF(instr.sust.component_mask_selector != 0xf); // Ensure we have an RGBA store + UNIMPLEMENTED_IF(instr.suldst.mode != Tegra::Shader::SurfaceDataMode::P); + UNIMPLEMENTED_IF(instr.suldst.out_of_bounds_store != + Tegra::Shader::OutOfBoundsStore::Ignore); + UNIMPLEMENTED_IF(instr.suldst.component_mask_selector != 0xf); // Ensure we have RGBA std::vector values; constexpr std::size_t hardcoded_size{4}; @@ -53,32 +88,18 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { values.push_back(GetRegister(instr.gpr0.Value() + i)); } - std::vector coords; - const std::size_t num_coords{GetImageTypeNumCoordinates(instr.sust.image_type)}; - for (std::size_t i = 0; i < num_coords; ++i) { - coords.push_back(GetRegister(instr.gpr8.Value() + i)); - } - - const auto type{instr.sust.image_type}; - auto& image{instr.sust.is_immediate ? GetImage(instr.image, type) - : GetBindlessImage(instr.gpr39, type)}; + const auto type{instr.suldst.image_type}; + auto& image{instr.suldst.is_immediate ? GetImage(instr.image, type) + : GetBindlessImage(instr.gpr39, type)}; image.MarkWrite(); - MetaImage meta{image, values}; - bb.push_back(Operation(OperationCode::ImageStore, meta, std::move(coords))); + MetaImage meta{image, std::move(values)}; + bb.push_back(Operation(OperationCode::ImageStore, meta, GetCoordinates(type))); break; } case OpCode::Id::SUATOM: { UNIMPLEMENTED_IF(instr.suatom_d.is_ba != 0); - Node value = GetRegister(instr.gpr0); - - std::vector coords; - const std::size_t num_coords{GetImageTypeNumCoordinates(instr.sust.image_type)}; - for (std::size_t i = 0; i < num_coords; ++i) { - coords.push_back(GetRegister(instr.gpr8.Value() + i)); - } - const OperationCode operation_code = [instr] { switch (instr.suatom_d.operation) { case Tegra::Shader::ImageAtomicOperation::Add: @@ -102,9 +123,13 @@ u32 ShaderIR::DecodeImage(NodeBlock& bb, u32 pc) { } }(); - const auto& image{GetImage(instr.image, instr.suatom_d.image_type, instr.suatom_d.size)}; + Node value = GetRegister(instr.gpr0); + + const auto type = instr.suatom_d.image_type; + const auto& image{GetImage(instr.image, type, instr.suatom_d.size)}; + MetaImage meta{image, {std::move(value)}}; - SetRegister(bb, instr.gpr0, Operation(operation_code, meta, std::move(coords))); + SetRegister(bb, instr.gpr0, Operation(operation_code, meta, GetCoordinates(type))); break; } default: diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index abf2cb1ab..e5b75783d 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h @@ -149,7 +149,8 @@ enum class OperationCode { TextureQueryLod, /// (MetaTexture, float[N] coords) -> float4 TexelFetch, /// (MetaTexture, int[N], int) -> float4 - ImageStore, /// (MetaImage, int[N] values) -> void + ImageLoad, /// (MetaImage, int[N] coords) -> void + ImageStore, /// (MetaImage, int[N] coords) -> void AtomicImageAdd, /// (MetaImage, int[N] coords) -> void AtomicImageMin, /// (MetaImage, int[N] coords) -> void AtomicImageMax, /// (MetaImage, int[N] coords) -> void @@ -402,6 +403,7 @@ struct MetaTexture { struct MetaImage { const Image& image; std::vector values; + u32 element{}; }; /// Parameters that modify an operation but are not part of any particular operand