shader: Implement FSET and FSETP

Also fix oversight with adding SignedZeroInfNanPreserve execution mode.
This commit is contained in:
ameerj 2021-03-16 00:57:07 -04:00
parent 17a82b56d7
commit fa2f6e38f4
9 changed files with 204 additions and 94 deletions

View file

@ -66,12 +66,14 @@ add_library(shader_recompiler STATIC
frontend/maxwell/translate/impl/find_leading_one.cpp frontend/maxwell/translate/impl/find_leading_one.cpp
frontend/maxwell/translate/impl/floating_point_add.cpp frontend/maxwell/translate/impl/floating_point_add.cpp
frontend/maxwell/translate/impl/floating_point_compare.cpp frontend/maxwell/translate/impl/floating_point_compare.cpp
frontend/maxwell/translate/impl/floating_point_compare_and_set.cpp
frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp
frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp
frontend/maxwell/translate/impl/floating_point_min_max.cpp frontend/maxwell/translate/impl/floating_point_min_max.cpp
frontend/maxwell/translate/impl/floating_point_multi_function.cpp frontend/maxwell/translate/impl/floating_point_multi_function.cpp
frontend/maxwell/translate/impl/floating_point_multiply.cpp frontend/maxwell/translate/impl/floating_point_multiply.cpp
frontend/maxwell/translate/impl/floating_point_range_reduction.cpp frontend/maxwell/translate/impl/floating_point_range_reduction.cpp
frontend/maxwell/translate/impl/floating_point_set_predicate.cpp
frontend/maxwell/translate/impl/half_floating_point_add.cpp frontend/maxwell/translate/impl/half_floating_point_add.cpp
frontend/maxwell/translate/impl/impl.cpp frontend/maxwell/translate/impl/impl.cpp
frontend/maxwell/translate/impl/impl.h frontend/maxwell/translate/impl/impl.h

View file

@ -124,10 +124,12 @@ void SetupDenormControl(const Profile& profile, const IR::Program& program, Emit
ctx.AddExtension("SPV_KHR_float_controls"); ctx.AddExtension("SPV_KHR_float_controls");
if (info.uses_fp16 && profile.support_fp16_signed_zero_nan_preserve) { if (info.uses_fp16 && profile.support_fp16_signed_zero_nan_preserve) {
ctx.AddExecutionMode(main_func, spv::ExecutionMode::SignedZeroInfNanPreserve); ctx.AddCapability(spv::Capability::SignedZeroInfNanPreserve);
ctx.AddExecutionMode(main_func, spv::ExecutionMode::SignedZeroInfNanPreserve, 16U);
} }
if (profile.support_fp32_signed_zero_nan_preserve) { if (profile.support_fp32_signed_zero_nan_preserve) {
ctx.AddExecutionMode(main_func, spv::ExecutionMode::SignedZeroInfNanPreserve); ctx.AddCapability(spv::Capability::SignedZeroInfNanPreserve);
ctx.AddExecutionMode(main_func, spv::ExecutionMode::SignedZeroInfNanPreserve, 32U);
} }
if (info.uses_fp32_denorms_flush && info.uses_fp32_denorms_preserve) { if (info.uses_fp32_denorms_flush && info.uses_fp32_denorms_preserve) {
// LOG_ERROR(HW_GPU, "Fp32 denorm flush and preserve on the same shader"); // LOG_ERROR(HW_GPU, "Fp32 denorm flush and preserve on the same shader");

View file

@ -58,4 +58,52 @@ IR::U1 PredicateOperation(IR::IREmitter& ir, const IR::U32& result, PredicateOp
} }
} }
bool IsCompareOpOrdered(FPCompareOp op) {
switch (op) {
case FPCompareOp::LTU:
case FPCompareOp::EQU:
case FPCompareOp::LEU:
case FPCompareOp::GTU:
case FPCompareOp::NEU:
case FPCompareOp::GEU:
return false;
default:
return true;
}
}
IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F32& operand_1, const IR::F32& operand_2,
FPCompareOp compare_op, IR::FpControl control) {
const bool ordered{IsCompareOpOrdered(compare_op)};
switch (compare_op) {
case FPCompareOp::F:
return ir.Imm1(false);
case FPCompareOp::LT:
case FPCompareOp::LTU:
return ir.FPLessThan(operand_1, operand_2, control, ordered);
case FPCompareOp::EQ:
case FPCompareOp::EQU:
return ir.FPEqual(operand_1, operand_2, control, ordered);
case FPCompareOp::LE:
case FPCompareOp::LEU:
return ir.FPLessThanEqual(operand_1, operand_2, control, ordered);
case FPCompareOp::GT:
case FPCompareOp::GTU:
return ir.FPGreaterThan(operand_1, operand_2, control, ordered);
case FPCompareOp::NE:
case FPCompareOp::NEU:
return ir.FPNotEqual(operand_1, operand_2, control, ordered);
case FPCompareOp::GE:
case FPCompareOp::GEU:
return ir.FPGreaterThanEqual(operand_1, operand_2, control, ordered);
case FPCompareOp::NUM:
return ir.FPOrdered(operand_1, operand_2);
case FPCompareOp::Nan:
return ir.FPUnordered(operand_1, operand_2);
case FPCompareOp::T:
return ir.Imm1(true);
default:
throw NotImplementedException("Invalid FP compare op {}", compare_op);
}
}
} // namespace Shader::Maxwell } // namespace Shader::Maxwell

View file

@ -15,4 +15,10 @@ namespace Shader::Maxwell {
const IR::U1& predicate_2, BooleanOp bop); const IR::U1& predicate_2, BooleanOp bop);
[[nodiscard]] IR::U1 PredicateOperation(IR::IREmitter& ir, const IR::U32& result, PredicateOp op); [[nodiscard]] IR::U1 PredicateOperation(IR::IREmitter& ir, const IR::U32& result, PredicateOp op);
[[nodiscard]] bool IsCompareOpOrdered(FPCompareOp op);
[[nodiscard]] IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F32& operand_1,
const IR::F32& operand_2, FPCompareOp compare_op,
IR::FpControl control = {});
} // namespace Shader::Maxwell } // namespace Shader::Maxwell

View file

@ -9,74 +9,6 @@
namespace Shader::Maxwell { namespace Shader::Maxwell {
namespace { namespace {
enum class FPCompareOp : u64 {
F,
LT,
EQ,
LE,
GT,
NE,
GE,
NUM,
Nan,
LTU,
EQU,
LEU,
GTU,
NEU,
GEU,
T,
};
bool IsCompareOpOrdered(FPCompareOp op) {
switch (op) {
case FPCompareOp::LTU:
case FPCompareOp::EQU:
case FPCompareOp::LEU:
case FPCompareOp::GTU:
case FPCompareOp::NEU:
case FPCompareOp::GEU:
return false;
default:
return true;
}
}
IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F32& operand_1, const IR::F32& operand_2,
FPCompareOp compare_op, IR::FpControl control) {
const bool ordered{IsCompareOpOrdered(compare_op)};
switch (compare_op) {
case FPCompareOp::F:
return ir.Imm1(false);
case FPCompareOp::LT:
case FPCompareOp::LTU:
return ir.FPLessThan(operand_1, operand_2, control, ordered);
case FPCompareOp::EQ:
case FPCompareOp::EQU:
return ir.FPEqual(operand_1, operand_2, control, ordered);
case FPCompareOp::LE:
case FPCompareOp::LEU:
return ir.FPLessThanEqual(operand_1, operand_2, control, ordered);
case FPCompareOp::GT:
case FPCompareOp::GTU:
return ir.FPGreaterThan(operand_1, operand_2, control, ordered);
case FPCompareOp::NE:
case FPCompareOp::NEU:
return ir.FPNotEqual(operand_1, operand_2, control, ordered);
case FPCompareOp::GE:
case FPCompareOp::GEU:
return ir.FPGreaterThanEqual(operand_1, operand_2, control, ordered);
case FPCompareOp::NUM:
return ir.FPOrdered(operand_1, operand_2);
case FPCompareOp::Nan:
return ir.FPUnordered(operand_1, operand_2);
case FPCompareOp::T:
return ir.Imm1(true);
default:
throw NotImplementedException("Invalid compare op {}", compare_op);
}
}
void FCMP(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::F32& operand) { void FCMP(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::F32& operand) {
union { union {
u64 insn; u64 insn;

View file

@ -0,0 +1,65 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
void FSET(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) {
union {
u64 insn;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> src_a_reg;
BitField<39, 3, IR::Pred> pred;
BitField<42, 1, u64> neg_pred;
BitField<43, 1, u64> negate_a;
BitField<44, 1, u64> abs_b;
BitField<45, 2, BooleanOp> bop;
BitField<48, 4, FPCompareOp> compare_op;
BitField<52, 1, u64> bf;
BitField<53, 1, u64> negate_b;
BitField<54, 1, u64> abs_a;
BitField<55, 1, u64> ftz;
} const fset{insn};
const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fset.src_a_reg), fset.abs_a != 0, fset.negate_a != 0)};
const IR::F32 op_b = v.ir.FPAbsNeg(src_b, fset.abs_b != 0, fset.negate_b != 0);
const IR::FpControl control{
.no_contraction{false},
.rounding{IR::FpRounding::DontCare},
.fmz_mode{fset.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None},
};
IR::U1 pred{v.ir.GetPred(fset.pred)};
if (fset.neg_pred != 0) {
pred = v.ir.LogicalNot(pred);
}
const IR::U1 cmp_result{FloatingPointCompare(v.ir, op_a, op_b, fset.compare_op, control)};
const IR::U1 bop_result{PredicateCombine(v.ir, cmp_result, pred, fset.bop)};
const IR::U32 one_mask{v.ir.Imm32(-1)};
const IR::U32 fp_one{v.ir.Imm32(0x3f800000)};
const IR::U32 fail_result{v.ir.Imm32(0)};
const IR::U32 pass_result{fset.bf == 0 ? one_mask : fp_one};
v.X(fset.dest_reg, IR::U32{v.ir.Select(bop_result, pass_result, fail_result)});
}
} // Anonymous namespace
void TranslatorVisitor::FSET_reg(u64 insn) {
FSET(*this, insn, GetFloatReg20(insn));
}
void TranslatorVisitor::FSET_cbuf(u64 insn) {
FSET(*this, insn, GetFloatCbuf(insn));
}
void TranslatorVisitor::FSET_imm(u64 insn) {
FSET(*this, insn, GetFloatImm20(insn));
}
} // namespace Shader::Maxwell

View file

@ -0,0 +1,60 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
void FSETP(TranslatorVisitor& v, u64 insn, const IR::F32& src_b) {
union {
u64 insn;
BitField<0, 3, IR::Pred> dest_pred_b;
BitField<3, 3, IR::Pred> dest_pred_a;
BitField<6, 1, u64> negate_b;
BitField<7, 1, u64> abs_a;
BitField<8, 8, IR::Reg> src_a_reg;
BitField<39, 3, IR::Pred> bop_pred;
BitField<42, 1, u64> neg_bop_pred;
BitField<43, 1, u64> negate_a;
BitField<44, 1, u64> abs_b;
BitField<45, 2, BooleanOp> bop;
BitField<47, 1, u64> ftz;
BitField<48, 4, FPCompareOp> compare_op;
} const fsetp{insn};
const IR::F32 op_a{v.ir.FPAbsNeg(v.F(fsetp.src_a_reg), fsetp.abs_a != 0, fsetp.negate_a != 0)};
const IR::F32 op_b = v.ir.FPAbsNeg(src_b, fsetp.abs_b != 0, fsetp.negate_b != 0);
const IR::FpControl control{
.no_contraction{false},
.rounding{IR::FpRounding::DontCare},
.fmz_mode{fsetp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None},
};
const BooleanOp bop{fsetp.bop};
const FPCompareOp compare_op{fsetp.compare_op};
const IR::U1 comparison{FloatingPointCompare(v.ir, op_a, op_b, compare_op, control)};
const IR::U1 bop_pred{v.ir.GetPred(fsetp.bop_pred, fsetp.neg_bop_pred != 0)};
const IR::U1 result_a{PredicateCombine(v.ir, comparison, bop_pred, bop)};
const IR::U1 result_b{PredicateCombine(v.ir, v.ir.LogicalNot(comparison), bop_pred, bop)};
v.ir.SetPred(fsetp.dest_pred_a, result_a);
v.ir.SetPred(fsetp.dest_pred_b, result_b);
}
} // Anonymous namespace
void TranslatorVisitor::FSETP_reg(u64 insn) {
FSETP(*this, insn, GetFloatReg20(insn));
}
void TranslatorVisitor::FSETP_cbuf(u64 insn) {
FSETP(*this, insn, GetFloatCbuf(insn));
}
void TranslatorVisitor::FSETP_imm(u64 insn) {
FSETP(*this, insn, GetFloatImm20(insn));
}
} // namespace Shader::Maxwell

View file

@ -35,6 +35,25 @@ enum class PredicateOp : u64 {
NonZero, NonZero,
}; };
enum class FPCompareOp : u64 {
F,
LT,
EQ,
LE,
GT,
NE,
GE,
NUM,
Nan,
LTU,
EQU,
LEU,
GTU,
NEU,
GEU,
T,
};
class TranslatorVisitor { class TranslatorVisitor {
public: public:
explicit TranslatorVisitor(Environment& env_, IR::Block& block) : env{env_}, ir(block) {} explicit TranslatorVisitor(Environment& env_, IR::Block& block) : env{env_}, ir(block) {}

View file

@ -201,30 +201,6 @@ void TranslatorVisitor::FCHK_imm(u64) {
ThrowNotImplemented(Opcode::FCHK_imm); ThrowNotImplemented(Opcode::FCHK_imm);
} }
void TranslatorVisitor::FSET_reg(u64) {
ThrowNotImplemented(Opcode::FSET_reg);
}
void TranslatorVisitor::FSET_cbuf(u64) {
ThrowNotImplemented(Opcode::FSET_cbuf);
}
void TranslatorVisitor::FSET_imm(u64) {
ThrowNotImplemented(Opcode::FSET_imm);
}
void TranslatorVisitor::FSETP_reg(u64) {
ThrowNotImplemented(Opcode::FSETP_reg);
}
void TranslatorVisitor::FSETP_cbuf(u64) {
ThrowNotImplemented(Opcode::FSETP_cbuf);
}
void TranslatorVisitor::FSETP_imm(u64) {
ThrowNotImplemented(Opcode::FSETP_imm);
}
void TranslatorVisitor::FSWZADD(u64) { void TranslatorVisitor::FSWZADD(u64) {
ThrowNotImplemented(Opcode::FSWZADD); ThrowNotImplemented(Opcode::FSWZADD);
} }