From 5af82a8ed4e2e0b7abc9c7da9f7bb5fa1c83de29 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Wed, 26 Dec 2018 01:33:56 -0300 Subject: [PATCH] shader_decode: Implement TEXS.F16 --- src/video_core/shader/decode/memory.cpp | 38 +++++++++++++++-------- src/video_core/shader/glsl_decompiler.cpp | 26 ++++++++++++++++ src/video_core/shader/shader_ir.h | 8 +++-- 3 files changed, 57 insertions(+), 15 deletions(-) diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index ce3445512..679e7f01b 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp @@ -219,8 +219,7 @@ u32 ShaderIR::DecodeMemory(BasicBlock& bb, u32 pc) { if (instr.texs.fp32_flag) { WriteTexsInstructionFloat(bb, instr, texture); } else { - UNIMPLEMENTED(); - // WriteTexsInstructionHalfFloat(bb, instr, texture); + WriteTexsInstructionHalfFloat(bb, instr, texture); } break; } @@ -416,39 +415,52 @@ const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, Textu return *used_samplers.emplace(entry).first; } -void ShaderIR::WriteTexsInstructionFloat(BasicBlock& bb, Tegra::Shader::Instruction instr, - Node texture) { +void ShaderIR::WriteTexsInstructionFloat(BasicBlock& bb, Instruction instr, Node texture) { // TEXS has two destination registers and a swizzle. The first two elements in the swizzle // go into gpr0+0 and gpr0+1, and the rest goes into gpr28+0 and gpr28+1 MetaComponents meta; std::array dest; - - std::size_t written_components = 0; for (u32 component = 0; component < 4; ++component) { if (!instr.texs.IsComponentEnabled(component)) { continue; } - meta.components_map[written_components] = static_cast(component); + meta.components_map[meta.count] = component; - if (written_components < 2) { + if (meta.count < 2) { // Write the first two swizzle components to gpr0 and gpr0+1 - dest[written_components] = GetRegister(instr.gpr0.Value() + written_components % 2); + dest[meta.count] = GetRegister(instr.gpr0.Value() + meta.count % 2); } else { ASSERT(instr.texs.HasTwoDestinations()); // Write the rest of the swizzle components to gpr28 and gpr28+1 - dest[written_components] = GetRegister(instr.gpr28.Value() + written_components % 2); + dest[meta.count] = GetRegister(instr.gpr28.Value() + meta.count % 2); } - - ++written_components; + ++meta.count; } - std::generate(dest.begin() + written_components, dest.end(), [&]() { return GetRegister(RZ); }); + std::generate(dest.begin() + meta.count, dest.end(), [&]() { return GetRegister(RZ); }); bb.push_back(Operation(OperationCode::AssignComposite, meta, texture, dest[0], dest[1], dest[2], dest[3])); } +void ShaderIR::WriteTexsInstructionHalfFloat(BasicBlock& bb, Instruction instr, Node texture) { + // TEXS.F16 destionation registers are packed in two registers in pairs (just like any half + // float instruction). + + MetaComponents meta; + for (u32 component = 0; component < 4; ++component) { + if (!instr.texs.IsComponentEnabled(component)) + continue; + meta.components_map[meta.count++] = component; + } + if (meta.count == 0) + return; + + bb.push_back(Operation(OperationCode::AssignCompositeHalf, meta, texture, + GetRegister(instr.gpr0), GetRegister(instr.gpr28))); +} + Node ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type, TextureProcessMode process_mode, bool depth_compare, bool is_array, std::size_t array_offset, std::size_t bias_offset, diff --git a/src/video_core/shader/glsl_decompiler.cpp b/src/video_core/shader/glsl_decompiler.cpp index d27d38178..5aa7966b9 100644 --- a/src/video_core/shader/glsl_decompiler.cpp +++ b/src/video_core/shader/glsl_decompiler.cpp @@ -785,6 +785,31 @@ private: return {}; } + std::string AssignCompositeHalf(Operation operation) { + const auto& meta = std::get(operation.GetMeta()); + + const std::string composite = code.GenerateTemporal(); + code.AddLine("vec4 " + composite + " = " + Visit(operation[0]) + ';'); + + const auto ReadComponent = [&](u32 component) { + if (component < meta.count) { + return composite + '[' + std::to_string(meta.GetSourceComponent(component)) + ']'; + } + return std::string("0"); + }; + + const auto dst1 = std::get(*operation[1]).GetIndex(); + const std::string src1 = "vec2(" + ReadComponent(0) + ", " + ReadComponent(1) + ')'; + code.AddLine(GetRegister(dst1) + " = utof(packHalf2x16(" + src1 + "))"); + + if (meta.count > 2) { + const auto dst2 = std::get(*operation[2]).GetIndex(); + const std::string src2 = "vec2(" + ReadComponent(2) + ", " + ReadComponent(3) + ')'; + code.AddLine(GetRegister(dst2) + " = utof(packHalf2x16(" + src2 + "))"); + } + return {}; + } + std::string Composite(Operation operation) { std::string value = "vec4("; for (std::size_t i = 0; i < 4; ++i) { @@ -1302,6 +1327,7 @@ private: static constexpr OperationDecompilersArray operation_decompilers = { &GLSLDecompiler::Assign, &GLSLDecompiler::AssignComposite, + &GLSLDecompiler::AssignCompositeHalf, &GLSLDecompiler::Composite, &GLSLDecompiler::Select, diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 5676d32a9..7f11599bf 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -44,8 +44,9 @@ constexpr u32 MAX_PROGRAM_LENGTH = 0x1000; constexpr u32 RZ = 0xff; enum class OperationCode { - Assign, /// (float& dest, float src) -> void - AssignComposite, /// (MetaComponents, float4 src, float&[4] dst) -> void + Assign, /// (float& dest, float src) -> void + AssignComposite, /// (MetaComponents, float4 src, float&[4] dst) -> void + AssignCompositeHalf, /// (MetaComponents, float4 src, float&[2] dst) -> void Composite, /// (float[4] values) -> float4 Select, /// (MetaArithmetic, bool pred, float a, float b) -> float @@ -279,6 +280,7 @@ struct MetaTexture { struct MetaComponents { std::array components_map{}; + u32 count{}; u32 GetSourceComponent(u32 dest_index) const { return components_map[dest_index]; @@ -692,6 +694,8 @@ private: Tegra::Shader::TextureType type, bool is_array, bool is_shadow); void WriteTexsInstructionFloat(BasicBlock& bb, Tegra::Shader::Instruction instr, Node texture); + void WriteTexsInstructionHalfFloat(BasicBlock& bb, Tegra::Shader::Instruction instr, + Node texture); Node GetTexCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type, Tegra::Shader::TextureProcessMode process_mode, bool depth_compare,