From abefe293986f42b8912a561b7978e952eca7bfee Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Fri, 19 Oct 2018 19:47:06 -0400 Subject: [PATCH 1/2] Implement Shader Local Memory --- .../renderer_opengl/gl_shader_decompiler.cpp | 37 +++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index 81ffb24e4..c1a86755a 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -279,6 +279,7 @@ public: const Tegra::Shader::Header& header) : shader{shader}, declarations{declarations}, stage{stage}, suffix{suffix}, header{header}, fixed_pipeline_output_attributes_used{} { + local_memory_size = 0; BuildRegisterList(); BuildInputList(); } @@ -436,6 +437,24 @@ public: shader.AddLine(dest + " = " + src + ';'); } + std::string GetLocalMemoryAsFloat(const std::string index) { + return "lmem[" + index + "]"; + } + + std::string GetLocalMemoryAsInteger(const std::string index, bool is_signed = false) { + const std::string func{is_signed ? "floatToIntBits" : "floatBitsToUint"}; + return func + "(lmem[" + index + "])"; + } + + void SetLocalMemoryAsFloat(const std::string index, const std::string value) { + shader.AddLine("lmem[" + index + "] = " + value); + } + + void SetLocalMemoryAsInteger(const std::string index, const std::string value, bool is_signed = false) { + const std::string func{is_signed ? "intBitsToFloat" : "uintBitsToFloat"}; + shader.AddLine("lmem[" + index + "] = " + func + '(' + value + ')'); + } + std::string GetControlCode(const Tegra::Shader::ControlCode cc) const { switch (cc) { case Tegra::Shader::ControlCode::NEU: @@ -533,6 +552,7 @@ public: void GenerateDeclarations(const std::string& suffix) { GenerateVertex(); GenerateRegisters(suffix); + GenerateLocalMemory(); GenerateInternalFlags(); GenerateInputAttrs(); GenerateOutputAttrs(); @@ -578,6 +598,10 @@ public: return entry.GetName(); } + void SetLocalMemory(u64 lmem) { + local_memory_size = lmem; + } + private: /// Generates declarations for registers. void GenerateRegisters(const std::string& suffix) { @@ -588,6 +612,14 @@ private: declarations.AddNewLine(); } + /// Generates declarations for local memory. + void GenerateLocalMemory() { + if (local_memory_size > 0) { + declarations.AddLine("float lmem[" + std::to_string((local_memory_size - 1 + 4) / 4) + "];"); + declarations.AddNewLine(); + } + } + /// Generates declarations for internal flags. void GenerateInternalFlags() { for (u32 ii = 0; ii < static_cast(InternalFlag::Amount); ii++) { @@ -895,6 +927,7 @@ private: const std::string& suffix; const Tegra::Shader::Header& header; std::unordered_set fixed_pipeline_output_attributes_used; + u64 local_memory_size; }; class GLSLGenerator { @@ -904,6 +937,9 @@ public: : subroutines(subroutines), program_code(program_code), main_offset(main_offset), stage(stage), suffix(suffix) { std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); + local_memory_size = (header.common2.shader_local_memory_high_size << 24) | + header.common1.shader_local_memory_low_size; + regs.SetLocalMemory(local_memory_size); Generate(suffix); } @@ -3575,6 +3611,7 @@ private: const u32 main_offset; Maxwell3D::Regs::ShaderStage stage; const std::string& suffix; + u64 local_memory_size; ShaderWriter shader; ShaderWriter declarations; From ca142f35c0f15e0d7e68e592b916660d9ee7a743 Mon Sep 17 00:00:00 2001 From: FernandoS27 Date: Fri, 19 Oct 2018 21:48:21 -0400 Subject: [PATCH 2/2] Implemented LD_L and ST_L --- src/video_core/engines/shader_bytecode.h | 31 +++++++ src/video_core/engines/shader_header.h | 5 ++ .../renderer_opengl/gl_shader_decompiler.cpp | 88 ++++++++++++++++--- 3 files changed, 112 insertions(+), 12 deletions(-) diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index af7756266..141b9159b 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -208,6 +208,16 @@ enum class UniformType : u64 { Double = 5, }; +enum class StoreType : u64 { + Unsigned8 = 0, + Signed8 = 1, + Unsigned16 = 2, + Signed16 = 3, + Bytes32 = 4, + Bytes64 = 5, + Bytes128 = 6, +}; + enum class IMinMaxExchange : u64 { None = 0, XLo = 1, @@ -747,6 +757,18 @@ union Instruction { BitField<44, 2, u64> unknown; } ld_c; + union { + BitField<48, 3, StoreType> type; + } ldst_sl; + + union { + BitField<44, 2, u64> unknown; + } ld_l; + + union { + BitField<44, 2, u64> unknown; + } st_l; + union { BitField<0, 3, u64> pred0; BitField<3, 3, u64> pred3; @@ -1209,6 +1231,7 @@ union Instruction { BitField<61, 1, u64> is_b_imm; BitField<60, 1, u64> is_b_gpr; BitField<59, 1, u64> is_c_gpr; + BitField<20, 24, s64> smem_imm; Attribute attribute; Sampler sampler; @@ -1232,8 +1255,12 @@ public: BRA, PBK, LD_A, + LD_L, + LD_S, LD_C, ST_A, + ST_L, + ST_S, LDG, // Load from global memory STG, // Store in global memory TEX, @@ -1490,8 +1517,12 @@ private: INST("111000110100---", Id::BRK, Type::Flow, "BRK"), INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"), INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"), + INST("1110111101001---", Id::LD_S, Type::Memory, "LD_S"), + INST("1110111101000---", Id::LD_L, Type::Memory, "LD_L"), INST("1110111110010---", Id::LD_C, Type::Memory, "LD_C"), INST("1110111111110---", Id::ST_A, Type::Memory, "ST_A"), + INST("1110111101011---", Id::ST_S, Type::Memory, "ST_S"), + INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"), INST("1110111011010---", Id::LDG, Type::Memory, "LDG"), INST("1110111011011---", Id::STG, Type::Memory, "STG"), INST("110000----111---", Id::TEX, Type::Memory, "TEX"), diff --git a/src/video_core/engines/shader_header.h b/src/video_core/engines/shader_header.h index a885ee3cf..a0e015c4b 100644 --- a/src/video_core/engines/shader_header.h +++ b/src/video_core/engines/shader_header.h @@ -96,6 +96,11 @@ struct Header { } } ps; }; + + u64 GetLocalMemorySize() { + return (common1.shader_local_memory_low_size | + (common2.shader_local_memory_high_size << 24)); + } }; static_assert(sizeof(Header) == 0x50, "Incorrect structure size"); diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index c1a86755a..dec291a7d 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -278,8 +278,7 @@ public: const Maxwell3D::Regs::ShaderStage& stage, const std::string& suffix, const Tegra::Shader::Header& header) : shader{shader}, declarations{declarations}, stage{stage}, suffix{suffix}, header{header}, - fixed_pipeline_output_attributes_used{} { - local_memory_size = 0; + fixed_pipeline_output_attributes_used{}, local_memory_size{0} { BuildRegisterList(); BuildInputList(); } @@ -437,22 +436,23 @@ public: shader.AddLine(dest + " = " + src + ';'); } - std::string GetLocalMemoryAsFloat(const std::string index) { - return "lmem[" + index + "]"; + std::string GetLocalMemoryAsFloat(const std::string& index) { + return "lmem[" + index + ']'; } - std::string GetLocalMemoryAsInteger(const std::string index, bool is_signed = false) { + std::string GetLocalMemoryAsInteger(const std::string& index, bool is_signed = false) { const std::string func{is_signed ? "floatToIntBits" : "floatBitsToUint"}; return func + "(lmem[" + index + "])"; } - void SetLocalMemoryAsFloat(const std::string index, const std::string value) { - shader.AddLine("lmem[" + index + "] = " + value); + void SetLocalMemoryAsFloat(const std::string& index, const std::string& value) { + shader.AddLine("lmem[" + index + "] = " + value + ';'); } - void SetLocalMemoryAsInteger(const std::string index, const std::string value, bool is_signed = false) { + void SetLocalMemoryAsInteger(const std::string& index, const std::string& value, + bool is_signed = false) { const std::string func{is_signed ? "intBitsToFloat" : "uintBitsToFloat"}; - shader.AddLine("lmem[" + index + "] = " + func + '(' + value + ')'); + shader.AddLine("lmem[" + index + "] = " + func + '(' + value + ");"); } std::string GetControlCode(const Tegra::Shader::ControlCode cc) const { @@ -615,7 +615,8 @@ private: /// Generates declarations for local memory. void GenerateLocalMemory() { if (local_memory_size > 0) { - declarations.AddLine("float lmem[" + std::to_string((local_memory_size - 1 + 4) / 4) + "];"); + declarations.AddLine("float lmem[" + std::to_string((local_memory_size - 1 + 4) / 4) + + "];"); declarations.AddNewLine(); } } @@ -937,8 +938,7 @@ public: : subroutines(subroutines), program_code(program_code), main_offset(main_offset), stage(stage), suffix(suffix) { std::memcpy(&header, program_code.data(), sizeof(Tegra::Shader::Header)); - local_memory_size = (header.common2.shader_local_memory_high_size << 24) | - header.common1.shader_local_memory_low_size; + local_memory_size = header.GetLocalMemorySize(); regs.SetLocalMemory(local_memory_size); Generate(suffix); } @@ -2360,6 +2360,39 @@ private: shader.AddLine("}"); break; } + case OpCode::Id::LD_L: { + // Add an extra scope and declare the index register inside to prevent + // overwriting it in case it is used as an output of the LD instruction. + shader.AddLine('{'); + ++shader.scope; + + std::string op = '(' + regs.GetRegisterAsInteger(instr.gpr8, 0, false) + " + " + + std::to_string(instr.smem_imm.Value()) + ')'; + + shader.AddLine("uint index = (" + op + " / 4);"); + + const std::string op_a = regs.GetLocalMemoryAsFloat("index"); + + if (instr.ld_l.unknown != 1) { + LOG_CRITICAL(HW_GPU, "LD_L Unhandled mode: {}", + static_cast(instr.ld_l.unknown.Value())); + UNREACHABLE(); + } + + switch (instr.ldst_sl.type.Value()) { + case Tegra::Shader::StoreType::Bytes32: + regs.SetRegisterToFloat(instr.gpr0, 0, op_a, 1, 1); + break; + default: + LOG_CRITICAL(HW_GPU, "LD_L Unhandled type: {}", + static_cast(instr.ldst_sl.type.Value())); + UNREACHABLE(); + } + + --shader.scope; + shader.AddLine('}'); + break; + } case OpCode::Id::ST_A: { ASSERT_MSG(instr.gpr8.Value() == Register::ZeroIndex, "Indirect attribute loads are not supported"); @@ -2388,6 +2421,37 @@ private: break; } + case OpCode::Id::ST_L: { + // Add an extra scope and declare the index register inside to prevent + // overwriting it in case it is used as an output of the LD instruction. + shader.AddLine('{'); + ++shader.scope; + + std::string op = '(' + regs.GetRegisterAsInteger(instr.gpr8, 0, false) + " + " + + std::to_string(instr.smem_imm.Value()) + ')'; + + shader.AddLine("uint index = (" + op + " / 4);"); + + if (instr.st_l.unknown != 0) { + LOG_CRITICAL(HW_GPU, "ST_L Unhandled mode: {}", + static_cast(instr.st_l.unknown.Value())); + UNREACHABLE(); + } + + switch (instr.ldst_sl.type.Value()) { + case Tegra::Shader::StoreType::Bytes32: + regs.SetLocalMemoryAsFloat("index", regs.GetRegisterAsFloat(instr.gpr0)); + break; + default: + LOG_CRITICAL(HW_GPU, "ST_L Unhandled type: {}", + static_cast(instr.ldst_sl.type.Value())); + UNREACHABLE(); + } + + --shader.scope; + shader.AddLine('}'); + break; + } case OpCode::Id::TEX: { Tegra::Shader::TextureType texture_type{instr.tex.texture_type}; std::string coord;