mirror of
https://github.com/yuzu-mirror/yuzu.git
synced 2024-11-18 04:10:00 +00:00
Shader Recomnpiler: implement textuzreGrad 3D emulation constant propagation
This commit is contained in:
parent
ae1421265a
commit
c03f0b3c89
8 changed files with 261 additions and 11 deletions
|
@ -558,12 +558,15 @@ void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
|
|||
const IR::Value& coord, const IR::Value& derivatives,
|
||||
const IR::Value& offset, const IR::Value& lod_clamp) {
|
||||
const auto info{inst.Flags<IR::TextureInstInfo>()};
|
||||
ScopedRegister dpdx, dpdy;
|
||||
ScopedRegister dpdx, dpdy, coords;
|
||||
const bool multi_component{info.num_derivates > 1 || info.has_lod_clamp};
|
||||
if (multi_component) {
|
||||
// Allocate this early to avoid aliasing other registers
|
||||
dpdx = ScopedRegister{ctx.reg_alloc};
|
||||
dpdy = ScopedRegister{ctx.reg_alloc};
|
||||
if (info.num_derivates >= 3) {
|
||||
coords = ScopedRegister{ctx.reg_alloc};
|
||||
}
|
||||
}
|
||||
const auto sparse_inst{PrepareSparse(inst)};
|
||||
const std::string_view sparse_mod{sparse_inst ? ".SPARSE" : ""};
|
||||
|
@ -580,15 +583,27 @@ void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
|
|||
"MOV.F {}.y,{}.w;",
|
||||
dpdx.reg, derivatives_vec, dpdx.reg, derivatives_vec, dpdy.reg, derivatives_vec,
|
||||
dpdy.reg, derivatives_vec);
|
||||
Register final_coord;
|
||||
if (info.num_derivates >= 3) {
|
||||
ctx.Add("MOV.F {}.z,{}.x;"
|
||||
"MOV.F {}.z,{}.y;",
|
||||
dpdx.reg, coord_vec, dpdy.reg, coord_vec);
|
||||
ctx.Add("MOV.F {}.x,0;"
|
||||
"MOV.F {}.y,0;",
|
||||
"MOV.F {}.z,0;", coords.reg, coords.reg, coords.reg);
|
||||
final_coord = coords.reg;
|
||||
} else {
|
||||
final_coord = coord_vec;
|
||||
}
|
||||
if (info.has_lod_clamp) {
|
||||
const ScalarF32 lod_clamp_value{ctx.reg_alloc.Consume(lod_clamp)};
|
||||
ctx.Add("MOV.F {}.w,{};"
|
||||
"TXD.F.LODCLAMP{} {},{},{},{},{},{}{};",
|
||||
dpdy.reg, lod_clamp_value, sparse_mod, ret, coord_vec, dpdx.reg, dpdy.reg,
|
||||
dpdy.reg, lod_clamp_value, sparse_mod, ret, final_coord, dpdx.reg, dpdy.reg,
|
||||
texture, type, offset_vec);
|
||||
} else {
|
||||
ctx.Add("TXD.F{} {},{},{},{},{},{}{};", sparse_mod, ret, coord_vec, dpdx.reg, dpdy.reg,
|
||||
texture, type, offset_vec);
|
||||
ctx.Add("TXD.F{} {},{},{},{},{},{}{};", sparse_mod, ret, final_coord, dpdx.reg,
|
||||
dpdy.reg, texture, type, offset_vec);
|
||||
}
|
||||
} else {
|
||||
ctx.Add("TXD.F{} {},{},{}.x,{}.y,{},{}{};", sparse_mod, ret, coord_vec, derivatives_vec,
|
||||
|
|
|
@ -548,7 +548,7 @@ void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
|
|||
if (sparse_inst) {
|
||||
throw NotImplementedException("EmitImageGradient Sparse");
|
||||
}
|
||||
if (!offset.IsEmpty()) {
|
||||
if (!offset.IsEmpty() && info.num_derivates <= 2) {
|
||||
throw NotImplementedException("EmitImageGradient offset");
|
||||
}
|
||||
const auto texture{Texture(ctx, info, index)};
|
||||
|
@ -556,6 +556,12 @@ void EmitImageGradient(EmitContext& ctx, IR::Inst& inst, const IR::Value& index,
|
|||
const bool multi_component{info.num_derivates > 1 || info.has_lod_clamp};
|
||||
const auto derivatives_vec{ctx.var_alloc.Consume(derivatives)};
|
||||
if (multi_component) {
|
||||
if (info.num_derivates >= 3) {
|
||||
const auto offset_vec{ctx.var_alloc.Consume(offset)};
|
||||
ctx.Add("{}=textureGrad({},{},vec3({}.xz, {}.x),vec3({}.yz, {}.y));", texel, texture,
|
||||
coords, derivatives_vec, offset_vec, derivatives_vec, offset_vec);
|
||||
return;
|
||||
}
|
||||
ctx.Add("{}=textureGrad({},{},vec2({}.xz),vec2({}.yz));", texel, texture, coords,
|
||||
derivatives_vec, derivatives_vec);
|
||||
} else {
|
||||
|
|
|
@ -42,6 +42,7 @@ union TextureInstInfo {
|
|||
BitField<23, 2, u32> gather_component;
|
||||
BitField<25, 2, u32> num_derivates;
|
||||
BitField<27, 3, ImageFormat> image_format;
|
||||
BitField<30, 1, u32> ndv_is_active;
|
||||
};
|
||||
static_assert(sizeof(TextureInstInfo) <= sizeof(u32));
|
||||
|
||||
|
|
|
@ -19,7 +19,7 @@ void TranslatorVisitor::FSWZADD(u64 insn) {
|
|||
} const fswzadd{insn};
|
||||
|
||||
if (fswzadd.ndv != 0) {
|
||||
throw NotImplementedException("FSWZADD NDV");
|
||||
LOG_WARNING(Shader, "(STUBBED) FSWZADD - NDV mode");
|
||||
}
|
||||
|
||||
const IR::F32 src_a{GetFloatReg8(insn)};
|
||||
|
|
|
@ -16,8 +16,10 @@ void MOV(TranslatorVisitor& v, u64 insn, const IR::U32& src, bool is_mov32i = fa
|
|||
BitField<12, 4, u64> mov32i_mask;
|
||||
} const mov{insn};
|
||||
|
||||
if ((is_mov32i ? mov.mov32i_mask : mov.mask) != 0xf) {
|
||||
throw NotImplementedException("Non-full move mask");
|
||||
u64 mask = is_mov32i ? mov.mov32i_mask : mov.mask;
|
||||
if (mask != 0xf && mask != 0x1) {
|
||||
LOG_WARNING(Shader, "(STUBBED) Masked Mov");
|
||||
return;
|
||||
}
|
||||
v.X(mov.dest_reg, src);
|
||||
}
|
||||
|
|
|
@ -209,7 +209,7 @@ void TranslatorVisitor::R2B(u64) {
|
|||
}
|
||||
|
||||
void TranslatorVisitor::RAM(u64) {
|
||||
ThrowNotImplemented(Opcode::RAM);
|
||||
LOG_WARNING(Shader, "(STUBBED) RAM Instruction");
|
||||
}
|
||||
|
||||
void TranslatorVisitor::RET(u64) {
|
||||
|
@ -221,7 +221,7 @@ void TranslatorVisitor::RTT(u64) {
|
|||
}
|
||||
|
||||
void TranslatorVisitor::SAM(u64) {
|
||||
ThrowNotImplemented(Opcode::SAM);
|
||||
LOG_WARNING(Shader, "(STUBBED) SAM Instruction");
|
||||
}
|
||||
|
||||
void TranslatorVisitor::SETCRSPTR(u64) {
|
||||
|
|
|
@ -172,6 +172,7 @@ void Impl(TranslatorVisitor& v, u64 insn, bool aoffi, Blod blod, bool lc,
|
|||
info.is_depth.Assign(tex.dc != 0 ? 1 : 0);
|
||||
info.has_bias.Assign(blod == Blod::LB || blod == Blod::LBA ? 1 : 0);
|
||||
info.has_lod_clamp.Assign(lc ? 1 : 0);
|
||||
info.ndv_is_active.Assign(tex.ndv != 0 ? 1 : 0);
|
||||
|
||||
const IR::Value sample{[&]() -> IR::Value {
|
||||
if (tex.dc == 0) {
|
||||
|
|
|
@ -10,6 +10,7 @@
|
|||
#include "shader_recompiler/environment.h"
|
||||
#include "shader_recompiler/exception.h"
|
||||
#include "shader_recompiler/frontend/ir/ir_emitter.h"
|
||||
#include "shader_recompiler/frontend/ir/modifiers.h"
|
||||
#include "shader_recompiler/frontend/ir/value.h"
|
||||
#include "shader_recompiler/ir_opt/passes.h"
|
||||
|
||||
|
@ -410,7 +411,49 @@ void FoldSelect(IR::Inst& inst) {
|
|||
}
|
||||
}
|
||||
|
||||
void FoldFPAdd32(IR::Inst& inst) {
|
||||
if (FoldWhenAllImmediates(inst, [](f32 a, f32 b) { return a + b; })) {
|
||||
return;
|
||||
}
|
||||
const IR::Value lhs_value{inst.Arg(0)};
|
||||
const IR::Value rhs_value{inst.Arg(1)};
|
||||
const auto check_neutral = [](const IR::Value& one_operand) {
|
||||
return one_operand.IsImmediate() && std::abs(one_operand.F32()) == 0.0f;
|
||||
};
|
||||
if (check_neutral(lhs_value)) {
|
||||
inst.ReplaceUsesWith(rhs_value);
|
||||
}
|
||||
if (check_neutral(rhs_value)) {
|
||||
inst.ReplaceUsesWith(lhs_value);
|
||||
}
|
||||
}
|
||||
|
||||
bool FoldDerivateYFromCorrection(IR::Inst& inst) {
|
||||
const IR::Value lhs_value{inst.Arg(0)};
|
||||
const IR::Value rhs_value{inst.Arg(1)};
|
||||
IR::Inst* const lhs_op{lhs_value.InstRecursive()};
|
||||
IR::Inst* const rhs_op{rhs_value.InstRecursive()};
|
||||
if (lhs_op->GetOpcode() == IR::Opcode::YDirection) {
|
||||
if (rhs_op->GetOpcode() != IR::Opcode::DPdyFine) {
|
||||
return false;
|
||||
}
|
||||
inst.ReplaceUsesWith(rhs_value);
|
||||
return true;
|
||||
}
|
||||
if (rhs_op->GetOpcode() != IR::Opcode::YDirection) {
|
||||
return false;
|
||||
}
|
||||
if (lhs_op->GetOpcode() != IR::Opcode::DPdyFine) {
|
||||
return false;
|
||||
}
|
||||
inst.ReplaceUsesWith(lhs_value);
|
||||
return true;
|
||||
}
|
||||
|
||||
void FoldFPMul32(IR::Inst& inst) {
|
||||
if (FoldWhenAllImmediates(inst, [](f32 a, f32 b) { return a * b; })) {
|
||||
return;
|
||||
}
|
||||
const auto control{inst.Flags<IR::FpControl>()};
|
||||
if (control.no_contraction) {
|
||||
return;
|
||||
|
@ -421,6 +464,9 @@ void FoldFPMul32(IR::Inst& inst) {
|
|||
if (lhs_value.IsImmediate() || rhs_value.IsImmediate()) {
|
||||
return;
|
||||
}
|
||||
if (FoldDerivateYFromCorrection(inst)) {
|
||||
return;
|
||||
}
|
||||
IR::Inst* const lhs_op{lhs_value.InstRecursive()};
|
||||
IR::Inst* const rhs_op{rhs_value.InstRecursive()};
|
||||
if (lhs_op->GetOpcode() != IR::Opcode::FPMul32 ||
|
||||
|
@ -622,7 +668,12 @@ void FoldFSwizzleAdd(IR::Block& block, IR::Inst& inst) {
|
|||
}
|
||||
const IR::Value value_3{GetThroughCast(inst2->Arg(0).Resolve(), IR::Opcode::BitCastU32F32)};
|
||||
if (value_2 != value_3) {
|
||||
return;
|
||||
if (!value_2.IsImmediate() || !value_3.IsImmediate()) {
|
||||
return;
|
||||
}
|
||||
if (Common::BitCast<u32>(value_2.F32()) != value_3.U32()) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
const IR::Value index{inst2->Arg(1)};
|
||||
const IR::Value clamp{inst2->Arg(2)};
|
||||
|
@ -648,6 +699,169 @@ void FoldFSwizzleAdd(IR::Block& block, IR::Inst& inst) {
|
|||
}
|
||||
}
|
||||
|
||||
bool FindGradient3DDerivates(std::array<IR::Value, 3>& results, IR::Value coord) {
|
||||
if (coord.IsImmediate()) {
|
||||
return false;
|
||||
}
|
||||
const auto check_through_shuffle = [](IR::Value input, IR::Value& result) {
|
||||
const IR::Value value_1{GetThroughCast(input.Resolve(), IR::Opcode::BitCastF32U32)};
|
||||
IR::Inst* const inst2{value_1.InstRecursive()};
|
||||
if (inst2->GetOpcode() != IR::Opcode::ShuffleIndex) {
|
||||
return false;
|
||||
}
|
||||
const IR::Value index{inst2->Arg(1).Resolve()};
|
||||
const IR::Value clamp{inst2->Arg(2).Resolve()};
|
||||
const IR::Value segmentation_mask{inst2->Arg(3).Resolve()};
|
||||
if (!index.IsImmediate() || !clamp.IsImmediate() || !segmentation_mask.IsImmediate()) {
|
||||
return false;
|
||||
}
|
||||
if (index.U32() != 3 && clamp.U32() != 3) {
|
||||
return false;
|
||||
}
|
||||
result = GetThroughCast(inst2->Arg(0).Resolve(), IR::Opcode::BitCastU32F32);
|
||||
return true;
|
||||
};
|
||||
IR::Inst* const inst = coord.InstRecursive();
|
||||
if (inst->GetOpcode() != IR::Opcode::FSwizzleAdd) {
|
||||
return false;
|
||||
}
|
||||
std::array<IR::Value, 3> temporary_values;
|
||||
IR::Value value_1 = inst->Arg(0).Resolve();
|
||||
IR::Value value_2 = inst->Arg(1).Resolve();
|
||||
IR::Value value_3 = inst->Arg(2).Resolve();
|
||||
std::array<u32, 4> swizzles_mask_a{};
|
||||
std::array<u32, 4> swizzles_mask_b{};
|
||||
const auto resolve_mask = [](std::array<u32, 4>& mask_results, IR::Value mask) {
|
||||
u32 value = mask.U32();
|
||||
for (size_t i = 0; i < 4; i++) {
|
||||
mask_results[i] = (value >> (i * 2)) & 0x3;
|
||||
}
|
||||
};
|
||||
resolve_mask(swizzles_mask_a, value_3);
|
||||
size_t coordinate_index = 0;
|
||||
const auto resolve_pending = [&](IR::Value resolve_v) {
|
||||
IR::Inst* const inst_r = resolve_v.InstRecursive();
|
||||
if (inst_r->GetOpcode() != IR::Opcode::FSwizzleAdd) {
|
||||
return false;
|
||||
}
|
||||
if (!check_through_shuffle(inst_r->Arg(0).Resolve(), temporary_values[1])) {
|
||||
return false;
|
||||
}
|
||||
if (!check_through_shuffle(inst_r->Arg(1).Resolve(), temporary_values[2])) {
|
||||
return false;
|
||||
}
|
||||
resolve_mask(swizzles_mask_b, inst_r->Arg(2).Resolve());
|
||||
return true;
|
||||
};
|
||||
if (value_1.IsImmediate() || value_2.IsImmediate()) {
|
||||
return false;
|
||||
}
|
||||
bool should_continue = false;
|
||||
if (resolve_pending(value_1)) {
|
||||
should_continue = check_through_shuffle(value_2, temporary_values[0]);
|
||||
coordinate_index = 0;
|
||||
}
|
||||
if (resolve_pending(value_2)) {
|
||||
should_continue = check_through_shuffle(value_1, temporary_values[0]);
|
||||
coordinate_index = 2;
|
||||
}
|
||||
if (!should_continue) {
|
||||
return false;
|
||||
}
|
||||
// figure which is which
|
||||
size_t zero_mask_a = 0;
|
||||
size_t zero_mask_b = 0;
|
||||
for (size_t i = 0; i < 4; i++) {
|
||||
if (swizzles_mask_a[i] == 2 || swizzles_mask_b[i] == 2) {
|
||||
// last operand can be inversed, we cannot determine a result.
|
||||
return false;
|
||||
}
|
||||
zero_mask_a |= static_cast<size_t>(swizzles_mask_a[i] == 3 ? 1 : 0) << i;
|
||||
zero_mask_b |= static_cast<size_t>(swizzles_mask_b[i] == 3 ? 1 : 0) << i;
|
||||
}
|
||||
static constexpr size_t ddx_pattern = 0b1010;
|
||||
static constexpr size_t ddx_pattern_inv = ~ddx_pattern & 0b00001111;
|
||||
if (std::popcount(zero_mask_a) != 2) {
|
||||
return false;
|
||||
}
|
||||
if (std::popcount(zero_mask_b) != 2) {
|
||||
return false;
|
||||
}
|
||||
if (zero_mask_a == zero_mask_b) {
|
||||
return false;
|
||||
}
|
||||
results[0] = temporary_values[coordinate_index];
|
||||
|
||||
if (coordinate_index == 0) {
|
||||
if (zero_mask_b == ddx_pattern || zero_mask_b == ddx_pattern_inv) {
|
||||
results[1] = temporary_values[1];
|
||||
results[2] = temporary_values[2];
|
||||
return true;
|
||||
}
|
||||
results[2] = temporary_values[1];
|
||||
results[1] = temporary_values[2];
|
||||
} else {
|
||||
const auto assign_result = [&results](IR::Value temporary_value, size_t mask) {
|
||||
if (mask == ddx_pattern || mask == ddx_pattern_inv) {
|
||||
results[1] = temporary_value;
|
||||
return;
|
||||
}
|
||||
results[2] = temporary_value;
|
||||
};
|
||||
assign_result(temporary_values[1], zero_mask_b);
|
||||
assign_result(temporary_values[0], zero_mask_a);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void FoldImageSampleImplicitLod(IR::Block& block, IR::Inst& inst) {
|
||||
IR::TextureInstInfo info = inst.Flags<IR::TextureInstInfo>();
|
||||
auto orig_opcode = inst.GetOpcode();
|
||||
if (info.ndv_is_active == 0) {
|
||||
return;
|
||||
}
|
||||
if (info.type != TextureType::Color3D) {
|
||||
return;
|
||||
}
|
||||
const IR::Value handle{inst.Arg(0)};
|
||||
const IR::Value coords{inst.Arg(1)};
|
||||
const IR::Value bias_lc{inst.Arg(2)};
|
||||
const IR::Value offset{inst.Arg(3)};
|
||||
if (!offset.IsImmediate()) {
|
||||
return;
|
||||
}
|
||||
IR::Inst* const inst2 = coords.InstRecursive();
|
||||
std::array<std::array<IR::Value, 3>, 3> results_matrix;
|
||||
for (size_t i = 0; i < 3; i++) {
|
||||
if (!FindGradient3DDerivates(results_matrix[i], inst2->Arg(i).Resolve())) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
IR::F32 lod_clamp{};
|
||||
if (info.has_lod_clamp != 0) {
|
||||
if (!bias_lc.IsImmediate()) {
|
||||
lod_clamp = IR::F32{bias_lc.InstRecursive()->Arg(1).Resolve()};
|
||||
} else {
|
||||
lod_clamp = IR::F32{bias_lc};
|
||||
}
|
||||
}
|
||||
IR::IREmitter ir{block, IR::Block::InstructionList::s_iterator_to(inst)};
|
||||
IR::Value new_coords =
|
||||
ir.CompositeConstruct(results_matrix[0][0], results_matrix[1][0], results_matrix[2][0]);
|
||||
IR::Value derivatives_1 = ir.CompositeConstruct(results_matrix[0][1], results_matrix[0][2],
|
||||
results_matrix[1][1], results_matrix[1][2]);
|
||||
IR::Value derivatives_2 = ir.CompositeConstruct(results_matrix[2][1], results_matrix[2][2]);
|
||||
info.num_derivates.Assign(3);
|
||||
IR::Value new_gradient_instruction =
|
||||
ir.ImageGradient(handle, new_coords, derivatives_1, derivatives_2, lod_clamp, info);
|
||||
IR::Inst* const new_inst = new_gradient_instruction.InstRecursive();
|
||||
if (orig_opcode == IR::Opcode::ImageSampleImplicitLod) {
|
||||
new_inst->ReplaceOpcode(IR::Opcode::ImageGradient);
|
||||
}
|
||||
inst.ReplaceUsesWith(new_gradient_instruction);
|
||||
}
|
||||
|
||||
void FoldConstBuffer(Environment& env, IR::Block& block, IR::Inst& inst) {
|
||||
const IR::Value bank{inst.Arg(0)};
|
||||
const IR::Value offset{inst.Arg(1)};
|
||||
|
@ -743,6 +957,12 @@ void ConstantPropagation(Environment& env, IR::Block& block, IR::Inst& inst) {
|
|||
case IR::Opcode::SelectF32:
|
||||
case IR::Opcode::SelectF64:
|
||||
return FoldSelect(inst);
|
||||
case IR::Opcode::FPNeg32:
|
||||
FoldWhenAllImmediates(inst, [](f32 a) { return -a; });
|
||||
return;
|
||||
case IR::Opcode::FPAdd32:
|
||||
FoldFPAdd32(inst);
|
||||
return;
|
||||
case IR::Opcode::FPMul32:
|
||||
return FoldFPMul32(inst);
|
||||
case IR::Opcode::LogicalAnd:
|
||||
|
@ -858,6 +1078,11 @@ void ConstantPropagation(Environment& env, IR::Block& block, IR::Inst& inst) {
|
|||
FoldDriverConstBuffer(env, block, inst, 1);
|
||||
}
|
||||
break;
|
||||
case IR::Opcode::BindlessImageSampleImplicitLod:
|
||||
case IR::Opcode::BoundImageSampleImplicitLod:
|
||||
case IR::Opcode::ImageSampleImplicitLod:
|
||||
FoldImageSampleImplicitLod(block, inst);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue