diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 8f6bc76eb..312617f71 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -615,6 +615,14 @@ union Instruction { BitField<34, 13, u64> mask_imm; } shfl; + union { + BitField<44, 1, u64> ftz; + BitField<39, 2, u64> tab5cb8_2; + BitField<38, 1, u64> ndv; + BitField<47, 1, u64> cc; + BitField<28, 8, u64> swizzle; + } fswzadd; + union { BitField<8, 8, Register> gpr; BitField<20, 24, s64> offset; @@ -1590,6 +1598,7 @@ public: DEPBAR, VOTE, SHFL, + FSWZADD, BFE_C, BFE_R, BFE_IMM, @@ -1888,6 +1897,7 @@ private: INST("1111000011110---", Id::DEPBAR, Type::Synch, "DEPBAR"), INST("0101000011011---", Id::VOTE, Type::Warp, "VOTE"), INST("1110111100010---", Id::SHFL, Type::Warp, "SHFL"), + INST("0101000011111---", Id::FSWZADD, Type::Warp, "FSWZADD"), INST("1110111111011---", Id::LD_A, Type::Memory, "LD_A"), INST("1110111101001---", Id::LD_S, Type::Memory, "LD_S"), INST("1110111101000---", Id::LD_L, Type::Memory, "LD_L"), diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index ca4e6e468..21c137ec5 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -1379,6 +1379,20 @@ private: return GenerateUnary(operation, "float", Type::Float, type); } + Expression FSwizzleAdd(Operation operation) { + const std::string op_a = VisitOperand(operation, 0).AsFloat(); + const std::string op_b = VisitOperand(operation, 1).AsFloat(); + const std::string instr_mask = VisitOperand(operation, 2).AsUint(); + + const std::string mask = code.GenerateTemporary(); + code.AddLine("uint {} = {} >> ((gl_SubGroupInvocationARB & 3) << 1);", mask, instr_mask); + + const std::string modifier_a = fmt::format("fswzadd_modifiers_a[{} & 3]", mask); + const std::string modifier_b = fmt::format("fswzadd_modifiers_b[{} & 3]", mask); + return {fmt::format("(({} * {}) + ({} * {}))", op_a, modifier_a, op_b, modifier_b), + Type::Float}; + } + Expression ICastFloat(Operation operation) { return GenerateUnary(operation, "int", Type::Int, Type::Float); } @@ -1991,6 +2005,7 @@ private: &GLSLDecompiler::FTrunc, &GLSLDecompiler::FCastInteger, &GLSLDecompiler::FCastInteger, + &GLSLDecompiler::FSwizzleAdd, &GLSLDecompiler::Add, &GLSLDecompiler::Mul, @@ -2460,6 +2475,9 @@ bvec2 HalfFloatNanComparison(bvec2 comparison, vec2 pair1, vec2 pair2) { bvec2 is_nan2 = isnan(pair2); return bvec2(comparison.x || is_nan1.x || is_nan2.x, comparison.y || is_nan1.y || is_nan2.y); } + +const float fswzadd_modifiers_a[] = float[4](-1.0f, 1.0f, -1.0f, 0.0f ); +const float fswzadd_modifiers_b[] = float[4](-1.0f, -1.0f, 1.0f, -1.0f ); )"; } diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index 383720ea1..2850d5b59 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -783,6 +783,11 @@ private: return {}; } + Id FSwizzleAdd(Operation operation) { + UNIMPLEMENTED(); + return {}; + } + Id HNegate(Operation operation) { UNIMPLEMENTED(); return {}; @@ -1363,6 +1368,7 @@ private: &SPIRVDecompiler::Unary<&Module::OpTrunc, Type::Float>, &SPIRVDecompiler::Unary<&Module::OpConvertSToF, Type::Float, Type::Int>, &SPIRVDecompiler::Unary<&Module::OpConvertUToF, Type::Float, Type::Uint>, + &SPIRVDecompiler::FSwizzleAdd, &SPIRVDecompiler::Binary<&Module::OpIAdd, Type::Int>, &SPIRVDecompiler::Binary<&Module::OpIMul, Type::Int>, diff --git a/src/video_core/shader/decode/warp.cpp b/src/video_core/shader/decode/warp.cpp index c2875eb2b..d98d0e1dd 100644 --- a/src/video_core/shader/decode/warp.cpp +++ b/src/video_core/shader/decode/warp.cpp @@ -94,6 +94,15 @@ u32 ShaderIR::DecodeWarp(NodeBlock& bb, u32 pc) { Operation(OperationCode::ShuffleIndexed, GetRegister(instr.gpr8), src_thread_id)); break; } + case OpCode::Id::FSWZADD: { + UNIMPLEMENTED_IF(instr.fswzadd.ndv); + + Node op_a = GetRegister(instr.gpr8); + Node op_b = GetRegister(instr.gpr20); + Node mask = Immediate(static_cast(instr.fswzadd.swizzle)); + SetRegister(bb, instr.gpr0, Operation(OperationCode::FSwizzleAdd, op_a, op_b, mask)); + break; + } default: UNIMPLEMENTED_MSG("Unhandled warp instruction: {}", opcode->get().GetName()); break; diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index bd3547e0d..54217e6a4 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h @@ -47,6 +47,7 @@ enum class OperationCode { FTrunc, /// (MetaArithmetic, float a) -> float FCastInteger, /// (MetaArithmetic, int a) -> float FCastUInteger, /// (MetaArithmetic, uint a) -> float + FSwizzleAdd, /// (float a, float b, uint mask) -> float IAdd, /// (MetaArithmetic, int a, int b) -> int IMul, /// (MetaArithmetic, int a, int b) -> int