Merge pull request #3350 from ReinUsesLisp/atom
shader/memory: Implement ATOM.ADD
This commit is contained in:
commit
2db7adc42a
5 changed files with 86 additions and 39 deletions
|
@ -227,6 +227,28 @@ enum class AtomicOp : u64 {
|
|||
Exch = 8,
|
||||
};
|
||||
|
||||
enum class GlobalAtomicOp : u64 {
|
||||
Add = 0,
|
||||
Min = 1,
|
||||
Max = 2,
|
||||
Inc = 3,
|
||||
Dec = 4,
|
||||
And = 5,
|
||||
Or = 6,
|
||||
Xor = 7,
|
||||
Exch = 8,
|
||||
SafeAdd = 10,
|
||||
};
|
||||
|
||||
enum class GlobalAtomicType : u64 {
|
||||
U32 = 0,
|
||||
S32 = 1,
|
||||
U64 = 2,
|
||||
F32_FTZ_RN = 3,
|
||||
F16x2_FTZ_RN = 4,
|
||||
S64 = 5,
|
||||
};
|
||||
|
||||
enum class UniformType : u64 {
|
||||
UnsignedByte = 0,
|
||||
SignedByte = 1,
|
||||
|
@ -957,6 +979,12 @@ union Instruction {
|
|||
BitField<46, 2, u64> cache_mode;
|
||||
} stg;
|
||||
|
||||
union {
|
||||
BitField<52, 4, GlobalAtomicOp> operation;
|
||||
BitField<49, 3, GlobalAtomicType> type;
|
||||
BitField<28, 20, s64> offset;
|
||||
} atom;
|
||||
|
||||
union {
|
||||
BitField<52, 4, AtomicOp> operation;
|
||||
BitField<28, 2, AtomicType> type;
|
||||
|
@ -1690,6 +1718,7 @@ public:
|
|||
ST_S,
|
||||
ST, // Store in generic memory
|
||||
STG, // Store in global memory
|
||||
ATOM, // Atomic operation on global memory
|
||||
ATOMS, // Atomic operation on shared memory
|
||||
AL2P, // Transforms attribute memory into physical memory
|
||||
TEX,
|
||||
|
@ -1994,6 +2023,7 @@ private:
|
|||
INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"),
|
||||
INST("101-------------", Id::ST, Type::Memory, "ST"),
|
||||
INST("1110111011011---", Id::STG, Type::Memory, "STG"),
|
||||
INST("11101101--------", Id::ATOM, Type::Memory, "ATOM"),
|
||||
INST("11101100--------", Id::ATOMS, Type::Memory, "ATOMS"),
|
||||
INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"),
|
||||
INST("110000----111---", Id::TEX, Type::Texture, "TEX"),
|
||||
|
|
|
@ -1857,10 +1857,7 @@ private:
|
|||
|
||||
template <const std::string_view& opname, Type type>
|
||||
Expression Atomic(Operation operation) {
|
||||
ASSERT(stage == ShaderType::Compute);
|
||||
auto& smem = std::get<SmemNode>(*operation[0]);
|
||||
|
||||
return {fmt::format("atomic{}(smem[{} >> 2], {})", opname, Visit(smem.GetAddress()).AsInt(),
|
||||
return {fmt::format("atomic{}({}, {})", opname, Visit(operation[0]).GetCode(),
|
||||
Visit(operation[1]).As(type)),
|
||||
type};
|
||||
}
|
||||
|
|
|
@ -1123,15 +1123,7 @@ private:
|
|||
}
|
||||
|
||||
if (const auto gmem = std::get_if<GmemNode>(&*node)) {
|
||||
const Id gmem_buffer = global_buffers.at(gmem->GetDescriptor());
|
||||
const Id real = AsUint(Visit(gmem->GetRealAddress()));
|
||||
const Id base = AsUint(Visit(gmem->GetBaseAddress()));
|
||||
|
||||
Id offset = OpISub(t_uint, real, base);
|
||||
offset = OpUDiv(t_uint, offset, Constant(t_uint, 4U));
|
||||
return {OpLoad(t_float,
|
||||
OpAccessChain(t_gmem_float, gmem_buffer, Constant(t_uint, 0U), offset)),
|
||||
Type::Float};
|
||||
return {OpLoad(t_uint, GetGlobalMemoryPointer(*gmem)), Type::Uint};
|
||||
}
|
||||
|
||||
if (const auto lmem = std::get_if<LmemNode>(&*node)) {
|
||||
|
@ -1142,10 +1134,7 @@ private:
|
|||
}
|
||||
|
||||
if (const auto smem = std::get_if<SmemNode>(&*node)) {
|
||||
Id address = AsUint(Visit(smem->GetAddress()));
|
||||
address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U));
|
||||
const Id pointer = OpAccessChain(t_smem_uint, shared_memory, address);
|
||||
return {OpLoad(t_uint, pointer), Type::Uint};
|
||||
return {OpLoad(t_uint, GetSharedMemoryPointer(*smem)), Type::Uint};
|
||||
}
|
||||
|
||||
if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) {
|
||||
|
@ -1339,20 +1328,10 @@ private:
|
|||
target = {OpAccessChain(t_prv_float, local_memory, address), Type::Float};
|
||||
|
||||
} else if (const auto smem = std::get_if<SmemNode>(&*dest)) {
|
||||
ASSERT(stage == ShaderType::Compute);
|
||||
Id address = AsUint(Visit(smem->GetAddress()));
|
||||
address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U));
|
||||
target = {OpAccessChain(t_smem_uint, shared_memory, address), Type::Uint};
|
||||
target = {GetSharedMemoryPointer(*smem), Type::Uint};
|
||||
|
||||
} else if (const auto gmem = std::get_if<GmemNode>(&*dest)) {
|
||||
const Id real = AsUint(Visit(gmem->GetRealAddress()));
|
||||
const Id base = AsUint(Visit(gmem->GetBaseAddress()));
|
||||
const Id diff = OpISub(t_uint, real, base);
|
||||
const Id offset = OpShiftRightLogical(t_uint, diff, Constant(t_uint, 2));
|
||||
|
||||
const Id gmem_buffer = global_buffers.at(gmem->GetDescriptor());
|
||||
target = {OpAccessChain(t_gmem_float, gmem_buffer, Constant(t_uint, 0), offset),
|
||||
Type::Float};
|
||||
target = {GetGlobalMemoryPointer(*gmem), Type::Uint};
|
||||
|
||||
} else {
|
||||
UNIMPLEMENTED();
|
||||
|
@ -1804,11 +1783,16 @@ private:
|
|||
return {};
|
||||
}
|
||||
|
||||
Expression UAtomicAdd(Operation operation) {
|
||||
const auto& smem = std::get<SmemNode>(*operation[0]);
|
||||
Id address = AsUint(Visit(smem.GetAddress()));
|
||||
address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U));
|
||||
const Id pointer = OpAccessChain(t_smem_uint, shared_memory, address);
|
||||
Expression AtomicAdd(Operation operation) {
|
||||
Id pointer;
|
||||
if (const auto smem = std::get_if<SmemNode>(&*operation[0])) {
|
||||
pointer = GetSharedMemoryPointer(*smem);
|
||||
} else if (const auto gmem = std::get_if<GmemNode>(&*operation[0])) {
|
||||
pointer = GetGlobalMemoryPointer(*gmem);
|
||||
} else {
|
||||
UNREACHABLE();
|
||||
return {Constant(t_uint, 0), Type::Uint};
|
||||
}
|
||||
|
||||
const Id scope = Constant(t_uint, static_cast<u32>(spv::Scope::Device));
|
||||
const Id semantics = Constant(t_uint, 0U);
|
||||
|
@ -2243,6 +2227,22 @@ private:
|
|||
return {};
|
||||
}
|
||||
|
||||
Id GetGlobalMemoryPointer(const GmemNode& gmem) {
|
||||
const Id real = AsUint(Visit(gmem.GetRealAddress()));
|
||||
const Id base = AsUint(Visit(gmem.GetBaseAddress()));
|
||||
const Id diff = OpISub(t_uint, real, base);
|
||||
const Id offset = OpShiftRightLogical(t_uint, diff, Constant(t_uint, 2));
|
||||
const Id buffer = global_buffers.at(gmem.GetDescriptor());
|
||||
return OpAccessChain(t_gmem_uint, buffer, Constant(t_uint, 0), offset);
|
||||
}
|
||||
|
||||
Id GetSharedMemoryPointer(const SmemNode& smem) {
|
||||
ASSERT(stage == ShaderType::Compute);
|
||||
Id address = AsUint(Visit(smem.GetAddress()));
|
||||
address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U));
|
||||
return OpAccessChain(t_smem_uint, shared_memory, address);
|
||||
}
|
||||
|
||||
static constexpr std::array operation_decompilers = {
|
||||
&SPIRVDecompiler::Assign,
|
||||
|
||||
|
@ -2389,7 +2389,7 @@ private:
|
|||
&SPIRVDecompiler::AtomicImageXor,
|
||||
&SPIRVDecompiler::AtomicImageExchange,
|
||||
|
||||
&SPIRVDecompiler::UAtomicAdd,
|
||||
&SPIRVDecompiler::AtomicAdd,
|
||||
|
||||
&SPIRVDecompiler::Branch,
|
||||
&SPIRVDecompiler::BranchIndirect,
|
||||
|
@ -2485,9 +2485,9 @@ private:
|
|||
|
||||
Id t_smem_uint{};
|
||||
|
||||
const Id t_gmem_float = TypePointer(spv::StorageClass::StorageBuffer, t_float);
|
||||
const Id t_gmem_uint = TypePointer(spv::StorageClass::StorageBuffer, t_uint);
|
||||
const Id t_gmem_array =
|
||||
Name(Decorate(TypeRuntimeArray(t_float), spv::Decoration::ArrayStride, 4U), "GmemArray");
|
||||
Name(Decorate(TypeRuntimeArray(t_uint), spv::Decoration::ArrayStride, 4U), "GmemArray");
|
||||
const Id t_gmem_struct = MemberDecorate(
|
||||
Decorate(TypeStruct(t_gmem_array), spv::Decoration::Block), 0, spv::Decoration::Offset, 0);
|
||||
const Id t_gmem_ssbo = TypePointer(spv::StorageClass::StorageBuffer, t_gmem_struct);
|
||||
|
|
|
@ -19,6 +19,8 @@ namespace VideoCommon::Shader {
|
|||
using Tegra::Shader::AtomicOp;
|
||||
using Tegra::Shader::AtomicType;
|
||||
using Tegra::Shader::Attribute;
|
||||
using Tegra::Shader::GlobalAtomicOp;
|
||||
using Tegra::Shader::GlobalAtomicType;
|
||||
using Tegra::Shader::Instruction;
|
||||
using Tegra::Shader::OpCode;
|
||||
using Tegra::Shader::Register;
|
||||
|
@ -335,6 +337,24 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
|
|||
}
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::ATOM: {
|
||||
UNIMPLEMENTED_IF_MSG(instr.atom.operation != GlobalAtomicOp::Add, "operation={}",
|
||||
static_cast<int>(instr.atom.operation.Value()));
|
||||
UNIMPLEMENTED_IF_MSG(instr.atom.type != GlobalAtomicType::S32, "type={}",
|
||||
static_cast<int>(instr.atom.type.Value()));
|
||||
|
||||
const auto [real_address, base_address, descriptor] =
|
||||
TrackGlobalMemory(bb, instr, true, true);
|
||||
if (!real_address || !base_address) {
|
||||
// Tracking failed, skip atomic.
|
||||
break;
|
||||
}
|
||||
|
||||
Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
|
||||
Node value = Operation(OperationCode::AtomicAdd, std::move(gmem), GetRegister(instr.gpr20));
|
||||
SetRegister(bb, instr.gpr0, std::move(value));
|
||||
break;
|
||||
}
|
||||
case OpCode::Id::ATOMS: {
|
||||
UNIMPLEMENTED_IF_MSG(instr.atoms.operation != AtomicOp::Add, "operation={}",
|
||||
static_cast<int>(instr.atoms.operation.Value()));
|
||||
|
@ -348,7 +368,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
|
|||
Node memory = GetSharedMemory(std::move(address));
|
||||
Node data = GetRegister(instr.gpr20);
|
||||
|
||||
Node value = Operation(OperationCode::UAtomicAdd, std::move(memory), std::move(data));
|
||||
Node value = Operation(OperationCode::AtomicAdd, std::move(memory), std::move(data));
|
||||
SetRegister(bb, instr.gpr0, std::move(value));
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -162,7 +162,7 @@ enum class OperationCode {
|
|||
AtomicImageXor, /// (MetaImage, int[N] coords) -> void
|
||||
AtomicImageExchange, /// (MetaImage, int[N] coords) -> void
|
||||
|
||||
UAtomicAdd, /// (smem, uint) -> uint
|
||||
AtomicAdd, /// (memory, {u}int) -> {u}int
|
||||
|
||||
Branch, /// (uint branch_target) -> void
|
||||
BranchIndirect, /// (uint branch_target) -> void
|
||||
|
|
Loading…
Reference in a new issue