shader: Implement FCMP

still need to configure some settings for NV denorm flush and intel NaN
This commit is contained in:
ameerj 2021-03-10 22:42:17 -05:00
parent 3a63fa0477
commit ba8c1d2eb4
9 changed files with 203 additions and 50 deletions

View file

@ -67,6 +67,7 @@ add_library(shader_recompiler STATIC
frontend/maxwell/translate/impl/common_funcs.h
frontend/maxwell/translate/impl/find_leading_one.cpp
frontend/maxwell/translate/impl/floating_point_add.cpp
frontend/maxwell/translate/impl/floating_point_compare.cpp
frontend/maxwell/translate/impl/floating_point_conversion_integer.cpp
frontend/maxwell/translate/impl/floating_point_fused_multiply_add.cpp
frontend/maxwell/translate/impl/floating_point_multi_function.cpp

View file

@ -232,6 +232,7 @@ Id EmitFPOrdGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs);
Id EmitFPUnordGreaterThanEqual16(EmitContext& ctx, Id lhs, Id rhs);
Id EmitFPUnordGreaterThanEqual32(EmitContext& ctx, Id lhs, Id rhs);
Id EmitFPUnordGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs);
Id EmitFPIsNan32(EmitContext& ctx, Id value);
Id EmitIAdd32(EmitContext& ctx, IR::Inst* inst, Id a, Id b);
void EmitIAdd64(EmitContext& ctx);
Id EmitISub32(EmitContext& ctx, Id a, Id b);

View file

@ -346,4 +346,8 @@ Id EmitFPUnordGreaterThanEqual64(EmitContext& ctx, Id lhs, Id rhs) {
return ctx.OpFUnordGreaterThanEqual(ctx.U1, lhs, rhs);
}
Id EmitFPIsNan32(EmitContext& ctx, Id value) {
return ctx.OpIsNan(ctx.U1, value);
}
} // namespace Shader::Backend::SPIRV

View file

@ -697,93 +697,107 @@ F16F32F64 IREmitter::FPTrunc(const F16F32F64& value, FpControl control) {
}
}
U1 IREmitter::FPEqual(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered) {
U1 IREmitter::FPEqual(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control, bool ordered) {
if (lhs.Type() != rhs.Type()) {
throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
}
switch (lhs.Type()) {
case Type::F16:
return Inst<U1>(ordered ? Opcode::FPOrdEqual16 : Opcode::FPUnordEqual16, lhs, rhs);
return Inst<U1>(ordered ? Opcode::FPOrdEqual16 : Opcode::FPUnordEqual16, Flags{control},
lhs, rhs);
case Type::F32:
return Inst<U1>(ordered ? Opcode::FPOrdEqual32 : Opcode::FPUnordEqual32, lhs, rhs);
return Inst<U1>(ordered ? Opcode::FPOrdEqual32 : Opcode::FPUnordEqual32, Flags{control},
lhs, rhs);
case Type::F64:
return Inst<U1>(ordered ? Opcode::FPOrdEqual64 : Opcode::FPUnordEqual64, lhs, rhs);
return Inst<U1>(ordered ? Opcode::FPOrdEqual64 : Opcode::FPUnordEqual64, Flags{control},
lhs, rhs);
default:
ThrowInvalidType(lhs.Type());
}
}
U1 IREmitter::FPNotEqual(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered) {
U1 IREmitter::FPNotEqual(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control,
bool ordered) {
if (lhs.Type() != rhs.Type()) {
throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
}
switch (lhs.Type()) {
case Type::F16:
return Inst<U1>(ordered ? Opcode::FPOrdNotEqual16 : Opcode::FPUnordNotEqual16, lhs, rhs);
return Inst<U1>(ordered ? Opcode::FPOrdNotEqual16 : Opcode::FPUnordNotEqual16,
Flags{control}, lhs, rhs);
case Type::F32:
return Inst<U1>(ordered ? Opcode::FPOrdNotEqual32 : Opcode::FPUnordNotEqual32, lhs, rhs);
return Inst<U1>(ordered ? Opcode::FPOrdNotEqual32 : Opcode::FPUnordNotEqual32,
Flags{control}, lhs, rhs);
case Type::F64:
return Inst<U1>(ordered ? Opcode::FPOrdNotEqual64 : Opcode::FPUnordNotEqual64, lhs, rhs);
return Inst<U1>(ordered ? Opcode::FPOrdNotEqual64 : Opcode::FPUnordNotEqual64,
Flags{control}, lhs, rhs);
default:
ThrowInvalidType(lhs.Type());
}
}
U1 IREmitter::FPLessThan(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered) {
U1 IREmitter::FPLessThan(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control,
bool ordered) {
if (lhs.Type() != rhs.Type()) {
throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
}
switch (lhs.Type()) {
case Type::F16:
return Inst<U1>(ordered ? Opcode::FPOrdLessThan16 : Opcode::FPUnordLessThan16, lhs, rhs);
return Inst<U1>(ordered ? Opcode::FPOrdLessThan16 : Opcode::FPUnordLessThan16,
Flags{control}, lhs, rhs);
case Type::F32:
return Inst<U1>(ordered ? Opcode::FPOrdLessThan32 : Opcode::FPUnordLessThan32, lhs, rhs);
return Inst<U1>(ordered ? Opcode::FPOrdLessThan32 : Opcode::FPUnordLessThan32,
Flags{control}, lhs, rhs);
case Type::F64:
return Inst<U1>(ordered ? Opcode::FPOrdLessThan64 : Opcode::FPUnordLessThan64, lhs, rhs);
return Inst<U1>(ordered ? Opcode::FPOrdLessThan64 : Opcode::FPUnordLessThan64,
Flags{control}, lhs, rhs);
default:
ThrowInvalidType(lhs.Type());
}
}
U1 IREmitter::FPGreaterThan(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered) {
U1 IREmitter::FPGreaterThan(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control,
bool ordered) {
if (lhs.Type() != rhs.Type()) {
throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
}
switch (lhs.Type()) {
case Type::F16:
return Inst<U1>(ordered ? Opcode::FPOrdGreaterThan16 : Opcode::FPUnordGreaterThan16, lhs,
rhs);
return Inst<U1>(ordered ? Opcode::FPOrdGreaterThan16 : Opcode::FPUnordGreaterThan16,
Flags{control}, lhs, rhs);
case Type::F32:
return Inst<U1>(ordered ? Opcode::FPOrdGreaterThan32 : Opcode::FPUnordGreaterThan32, lhs,
rhs);
return Inst<U1>(ordered ? Opcode::FPOrdGreaterThan32 : Opcode::FPUnordGreaterThan32,
Flags{control}, lhs, rhs);
case Type::F64:
return Inst<U1>(ordered ? Opcode::FPOrdGreaterThan64 : Opcode::FPUnordGreaterThan64, lhs,
rhs);
return Inst<U1>(ordered ? Opcode::FPOrdGreaterThan64 : Opcode::FPUnordGreaterThan64,
Flags{control}, lhs, rhs);
default:
ThrowInvalidType(lhs.Type());
}
}
U1 IREmitter::FPLessThanEqual(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered) {
U1 IREmitter::FPLessThanEqual(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control,
bool ordered) {
if (lhs.Type() != rhs.Type()) {
throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
}
switch (lhs.Type()) {
case Type::F16:
return Inst<U1>(ordered ? Opcode::FPOrdLessThanEqual16 : Opcode::FPUnordLessThanEqual16,
lhs, rhs);
Flags{control}, lhs, rhs);
case Type::F32:
return Inst<U1>(ordered ? Opcode::FPOrdLessThanEqual32 : Opcode::FPUnordLessThanEqual32,
lhs, rhs);
Flags{control}, lhs, rhs);
case Type::F64:
return Inst<U1>(ordered ? Opcode::FPOrdLessThanEqual64 : Opcode::FPUnordLessThanEqual64,
lhs, rhs);
Flags{control}, lhs, rhs);
default:
ThrowInvalidType(lhs.Type());
}
}
U1 IREmitter::FPGreaterThanEqual(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered) {
U1 IREmitter::FPGreaterThanEqual(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control,
bool ordered) {
if (lhs.Type() != rhs.Type()) {
throw InvalidArgument("Mismatching types {} and {}", lhs.Type(), rhs.Type());
}
@ -791,20 +805,32 @@ U1 IREmitter::FPGreaterThanEqual(const F16F32F64& lhs, const F16F32F64& rhs, boo
case Type::F16:
return Inst<U1>(ordered ? Opcode::FPOrdGreaterThanEqual16
: Opcode::FPUnordGreaterThanEqual16,
lhs, rhs);
Flags{control}, lhs, rhs);
case Type::F32:
return Inst<U1>(ordered ? Opcode::FPOrdGreaterThanEqual32
: Opcode::FPUnordGreaterThanEqual32,
lhs, rhs);
Flags{control}, lhs, rhs);
case Type::F64:
return Inst<U1>(ordered ? Opcode::FPOrdGreaterThanEqual64
: Opcode::FPUnordGreaterThanEqual64,
lhs, rhs);
Flags{control}, lhs, rhs);
default:
ThrowInvalidType(lhs.Type());
}
}
U1 IREmitter::FPIsNan(const F32& value) {
return Inst<U1>(Opcode::FPIsNan32, value);
}
U1 IREmitter::FPOrdered(const F32& lhs, const F32& rhs) {
return LogicalAnd(LogicalNot(FPIsNan(lhs)), LogicalNot(FPIsNan(rhs)));
}
U1 IREmitter::FPUnordered(const F32& lhs, const F32& rhs) {
return LogicalOr(FPIsNan(lhs), FPIsNan(rhs));
}
U32U64 IREmitter::IAdd(const U32U64& a, const U32U64& b) {
if (a.Type() != b.Type()) {
throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type());

View file

@ -140,14 +140,21 @@ public:
[[nodiscard]] F16F32F64 FPCeil(const F16F32F64& value, FpControl control = {});
[[nodiscard]] F16F32F64 FPTrunc(const F16F32F64& value, FpControl control = {});
[[nodiscard]] U1 FPEqual(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered = true);
[[nodiscard]] U1 FPNotEqual(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered = true);
[[nodiscard]] U1 FPLessThan(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered = true);
[[nodiscard]] U1 FPGreaterThan(const F16F32F64& lhs, const F16F32F64& rhs, bool ordered = true);
[[nodiscard]] U1 FPEqual(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control = {},
bool ordered = true);
[[nodiscard]] U1 FPNotEqual(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control = {},
bool ordered = true);
[[nodiscard]] U1 FPLessThan(const F16F32F64& lhs, const F16F32F64& rhs, FpControl control = {},
bool ordered = true);
[[nodiscard]] U1 FPGreaterThan(const F16F32F64& lhs, const F16F32F64& rhs,
FpControl control = {}, bool ordered = true);
[[nodiscard]] U1 FPLessThanEqual(const F16F32F64& lhs, const F16F32F64& rhs,
bool ordered = true);
FpControl control = {}, bool ordered = true);
[[nodiscard]] U1 FPGreaterThanEqual(const F16F32F64& lhs, const F16F32F64& rhs,
bool ordered = true);
FpControl control = {}, bool ordered = true);
[[nodiscard]] U1 FPIsNan(const F32& value);
[[nodiscard]] U1 FPOrdered(const F32& lhs, const F32& rhs);
[[nodiscard]] U1 FPUnordered(const F32& lhs, const F32& rhs);
[[nodiscard]] U32U64 IAdd(const U32U64& a, const U32U64& b);
[[nodiscard]] U32U64 ISub(const U32U64& a, const U32U64& b);

View file

@ -197,6 +197,7 @@ OPCODE(FPTrunc16, F16, F16,
OPCODE(FPTrunc32, F32, F32, )
OPCODE(FPTrunc64, F64, F64, )
<<<<<<< HEAD
OPCODE(FPOrdEqual16, U1, F16, F16, )
OPCODE(FPOrdEqual32, U1, F32, F32, )
OPCODE(FPOrdEqual64, U1, F64, F64, )
@ -233,6 +234,7 @@ OPCODE(FPOrdGreaterThanEqual64, U1, F64,
OPCODE(FPUnordGreaterThanEqual16, U1, F16, F16, )
OPCODE(FPUnordGreaterThanEqual32, U1, F32, F32, )
OPCODE(FPUnordGreaterThanEqual64, U1, F64, F64, )
OPCODE(FPIsNan32, U1, F32, )
// Integer operations
OPCODE(IAdd32, U32, U32, U32, )

View file

@ -0,0 +1,116 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/common_funcs.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
enum class FPCompareOp : u64 {
F,
LT,
EQ,
LE,
GT,
NE,
GE,
NUM,
Nan,
LTU,
EQU,
LEU,
GTU,
NEU,
GEU,
T,
};
bool IsCompareOpOrdered(FPCompareOp op) {
switch (op) {
case FPCompareOp::LTU:
case FPCompareOp::EQU:
case FPCompareOp::LEU:
case FPCompareOp::GTU:
case FPCompareOp::NEU:
case FPCompareOp::GEU:
return false;
default:
return true;
}
}
IR::U1 FloatingPointCompare(IR::IREmitter& ir, const IR::F32& operand_1, const IR::F32& operand_2,
FPCompareOp compare_op, IR::FpControl control) {
const bool ordered{IsCompareOpOrdered(compare_op)};
switch (compare_op) {
case FPCompareOp::F:
return ir.Imm1(false);
case FPCompareOp::LT:
case FPCompareOp::LTU:
return ir.FPLessThan(operand_1, operand_2, control, ordered);
case FPCompareOp::EQ:
case FPCompareOp::EQU:
return ir.FPEqual(operand_1, operand_2, control, ordered);
case FPCompareOp::LE:
case FPCompareOp::LEU:
return ir.FPLessThanEqual(operand_1, operand_2, control, ordered);
case FPCompareOp::GT:
case FPCompareOp::GTU:
return ir.FPGreaterThan(operand_1, operand_2, control, ordered);
case FPCompareOp::NE:
case FPCompareOp::NEU:
return ir.FPNotEqual(operand_1, operand_2, control, ordered);
case FPCompareOp::GE:
case FPCompareOp::GEU:
return ir.FPGreaterThanEqual(operand_1, operand_2, control, ordered);
case FPCompareOp::NUM:
return ir.FPOrdered(operand_1, operand_2);
case FPCompareOp::Nan:
return ir.FPUnordered(operand_1, operand_2);
case FPCompareOp::T:
return ir.Imm1(true);
default:
throw NotImplementedException("Invalid compare op {}", compare_op);
}
}
void FCMP(TranslatorVisitor& v, u64 insn, const IR::U32& src_a, const IR::F32& operand) {
union {
u64 insn;
BitField<0, 8, IR::Reg> dest_reg;
BitField<8, 8, IR::Reg> src_reg;
BitField<47, 1, u64> ftz;
BitField<48, 4, FPCompareOp> compare_op;
} const fcmp{insn};
const IR::F32 zero{v.ir.Imm32(0.0f)};
const IR::F32 neg_zero{v.ir.Imm32(-0.0f)};
IR::FpControl control{.fmz_mode{fcmp.ftz != 0 ? IR::FmzMode::FTZ : IR::FmzMode::None}};
const IR::U1 cmp_result{FloatingPointCompare(v.ir, operand, zero, fcmp.compare_op, control)};
const IR::U32 src_reg{v.X(fcmp.src_reg)};
const IR::U32 result{v.ir.Select(cmp_result, src_reg, src_a)};
v.X(fcmp.dest_reg, result);
}
} // Anonymous namespace
void TranslatorVisitor::FCMP_reg(u64 insn) {
FCMP(*this, insn, GetReg20(insn), GetFloatReg39(insn));
}
void TranslatorVisitor::FCMP_rc(u64 insn) {
FCMP(*this, insn, GetReg39(insn), GetFloatCbuf(insn));
}
void TranslatorVisitor::FCMP_cr(u64 insn) {
FCMP(*this, insn, GetCbuf(insn), GetFloatReg39(insn));
}
void TranslatorVisitor::FCMP_imm(u64 insn) {
FCMP(*this, insn, GetReg39(insn), GetFloatImm20(insn));
}
} // namespace Shader::Maxwell

View file

@ -201,22 +201,6 @@ void TranslatorVisitor::FCHK_imm(u64) {
ThrowNotImplemented(Opcode::FCHK_imm);
}
void TranslatorVisitor::FCMP_reg(u64) {
ThrowNotImplemented(Opcode::FCMP_reg);
}
void TranslatorVisitor::FCMP_rc(u64) {
ThrowNotImplemented(Opcode::FCMP_rc);
}
void TranslatorVisitor::FCMP_cr(u64) {
ThrowNotImplemented(Opcode::FCMP_cr);
}
void TranslatorVisitor::FCMP_imm(u64) {
ThrowNotImplemented(Opcode::FCMP_imm);
}
void TranslatorVisitor::FMNMX_reg(u64) {
ThrowNotImplemented(Opcode::FMNMX_reg);
}

View file

@ -256,7 +256,19 @@ void VisitFpModifiers(Info& info, IR::Inst& inst) {
case IR::Opcode::FPRoundEven32:
case IR::Opcode::FPFloor32:
case IR::Opcode::FPCeil32:
case IR::Opcode::FPTrunc32: {
case IR::Opcode::FPTrunc32:
case IR::Opcode::FPOrdEqual32:
case IR::Opcode::FPUnordEqual32:
case IR::Opcode::FPOrdNotEqual32:
case IR::Opcode::FPUnordNotEqual32:
case IR::Opcode::FPOrdLessThan32:
case IR::Opcode::FPUnordLessThan32:
case IR::Opcode::FPOrdGreaterThan32:
case IR::Opcode::FPUnordGreaterThan32:
case IR::Opcode::FPOrdLessThanEqual32:
case IR::Opcode::FPUnordLessThanEqual32:
case IR::Opcode::FPOrdGreaterThanEqual32:
case IR::Opcode::FPUnordGreaterThanEqual32: {
const auto control{inst.Flags<IR::FpControl>()};
switch (control.fmz_mode) {
case IR::FmzMode::DontCare: