shader/memory: Implement ATOM.ADD

ATOM operates atomically on global memory. For now only add ATOM.ADD
since that's what was found in commercial games.

This asserts for ATOM.ADD.S32 (handling the others as unimplemented),
although ATOM.ADD.U32 shouldn't be any different.

This change forces us to change the default type on SPIR-V storage
buffers from float to uint. We could also alias the buffers, but it's
simpler for now to just use uint. While we are at it, abstract the code
to avoid repetition.
This commit is contained in:
ReinUsesLisp 2020-01-25 21:03:02 -03:00
parent 05df4a8c94
commit d95d4ac843
5 changed files with 86 additions and 39 deletions

View file

@ -227,6 +227,28 @@ enum class AtomicOp : u64 {
Exch = 8, Exch = 8,
}; };
enum class GlobalAtomicOp : u64 {
Add = 0,
Min = 1,
Max = 2,
Inc = 3,
Dec = 4,
And = 5,
Or = 6,
Xor = 7,
Exch = 8,
SafeAdd = 10,
};
enum class GlobalAtomicType : u64 {
U32 = 0,
S32 = 1,
U64 = 2,
F32_FTZ_RN = 3,
F16x2_FTZ_RN = 4,
S64 = 5,
};
enum class UniformType : u64 { enum class UniformType : u64 {
UnsignedByte = 0, UnsignedByte = 0,
SignedByte = 1, SignedByte = 1,
@ -957,6 +979,12 @@ union Instruction {
BitField<46, 2, u64> cache_mode; BitField<46, 2, u64> cache_mode;
} stg; } stg;
union {
BitField<52, 4, GlobalAtomicOp> operation;
BitField<49, 3, GlobalAtomicType> type;
BitField<28, 20, s64> offset;
} atom;
union { union {
BitField<52, 4, AtomicOp> operation; BitField<52, 4, AtomicOp> operation;
BitField<28, 2, AtomicType> type; BitField<28, 2, AtomicType> type;
@ -1690,6 +1718,7 @@ public:
ST_S, ST_S,
ST, // Store in generic memory ST, // Store in generic memory
STG, // Store in global memory STG, // Store in global memory
ATOM, // Atomic operation on global memory
ATOMS, // Atomic operation on shared memory ATOMS, // Atomic operation on shared memory
AL2P, // Transforms attribute memory into physical memory AL2P, // Transforms attribute memory into physical memory
TEX, TEX,
@ -1994,6 +2023,7 @@ private:
INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"), INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"),
INST("101-------------", Id::ST, Type::Memory, "ST"), INST("101-------------", Id::ST, Type::Memory, "ST"),
INST("1110111011011---", Id::STG, Type::Memory, "STG"), INST("1110111011011---", Id::STG, Type::Memory, "STG"),
INST("11101101--------", Id::ATOM, Type::Memory, "ATOM"),
INST("11101100--------", Id::ATOMS, Type::Memory, "ATOMS"), INST("11101100--------", Id::ATOMS, Type::Memory, "ATOMS"),
INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"), INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"),
INST("110000----111---", Id::TEX, Type::Texture, "TEX"), INST("110000----111---", Id::TEX, Type::Texture, "TEX"),

View file

@ -1858,10 +1858,7 @@ private:
template <const std::string_view& opname, Type type> template <const std::string_view& opname, Type type>
Expression Atomic(Operation operation) { Expression Atomic(Operation operation) {
ASSERT(stage == ShaderType::Compute); return {fmt::format("atomic{}({}, {})", opname, Visit(operation[0]).GetCode(),
auto& smem = std::get<SmemNode>(*operation[0]);
return {fmt::format("atomic{}(smem[{} >> 2], {})", opname, Visit(smem.GetAddress()).AsInt(),
Visit(operation[1]).As(type)), Visit(operation[1]).As(type)),
type}; type};
} }

View file

@ -1123,15 +1123,7 @@ private:
} }
if (const auto gmem = std::get_if<GmemNode>(&*node)) { if (const auto gmem = std::get_if<GmemNode>(&*node)) {
const Id gmem_buffer = global_buffers.at(gmem->GetDescriptor()); return {OpLoad(t_uint, GetGlobalMemoryPointer(*gmem)), Type::Uint};
const Id real = AsUint(Visit(gmem->GetRealAddress()));
const Id base = AsUint(Visit(gmem->GetBaseAddress()));
Id offset = OpISub(t_uint, real, base);
offset = OpUDiv(t_uint, offset, Constant(t_uint, 4U));
return {OpLoad(t_float,
OpAccessChain(t_gmem_float, gmem_buffer, Constant(t_uint, 0U), offset)),
Type::Float};
} }
if (const auto lmem = std::get_if<LmemNode>(&*node)) { if (const auto lmem = std::get_if<LmemNode>(&*node)) {
@ -1142,10 +1134,7 @@ private:
} }
if (const auto smem = std::get_if<SmemNode>(&*node)) { if (const auto smem = std::get_if<SmemNode>(&*node)) {
Id address = AsUint(Visit(smem->GetAddress())); return {OpLoad(t_uint, GetSharedMemoryPointer(*smem)), Type::Uint};
address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U));
const Id pointer = OpAccessChain(t_smem_uint, shared_memory, address);
return {OpLoad(t_uint, pointer), Type::Uint};
} }
if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) { if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) {
@ -1339,20 +1328,10 @@ private:
target = {OpAccessChain(t_prv_float, local_memory, address), Type::Float}; target = {OpAccessChain(t_prv_float, local_memory, address), Type::Float};
} else if (const auto smem = std::get_if<SmemNode>(&*dest)) { } else if (const auto smem = std::get_if<SmemNode>(&*dest)) {
ASSERT(stage == ShaderType::Compute); target = {GetSharedMemoryPointer(*smem), Type::Uint};
Id address = AsUint(Visit(smem->GetAddress()));
address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U));
target = {OpAccessChain(t_smem_uint, shared_memory, address), Type::Uint};
} else if (const auto gmem = std::get_if<GmemNode>(&*dest)) { } else if (const auto gmem = std::get_if<GmemNode>(&*dest)) {
const Id real = AsUint(Visit(gmem->GetRealAddress())); target = {GetGlobalMemoryPointer(*gmem), Type::Uint};
const Id base = AsUint(Visit(gmem->GetBaseAddress()));
const Id diff = OpISub(t_uint, real, base);
const Id offset = OpShiftRightLogical(t_uint, diff, Constant(t_uint, 2));
const Id gmem_buffer = global_buffers.at(gmem->GetDescriptor());
target = {OpAccessChain(t_gmem_float, gmem_buffer, Constant(t_uint, 0), offset),
Type::Float};
} else { } else {
UNIMPLEMENTED(); UNIMPLEMENTED();
@ -1804,11 +1783,16 @@ private:
return {}; return {};
} }
Expression UAtomicAdd(Operation operation) { Expression AtomicAdd(Operation operation) {
const auto& smem = std::get<SmemNode>(*operation[0]); Id pointer;
Id address = AsUint(Visit(smem.GetAddress())); if (const auto smem = std::get_if<SmemNode>(&*operation[0])) {
address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U)); pointer = GetSharedMemoryPointer(*smem);
const Id pointer = OpAccessChain(t_smem_uint, shared_memory, address); } else if (const auto gmem = std::get_if<GmemNode>(&*operation[0])) {
pointer = GetGlobalMemoryPointer(*gmem);
} else {
UNREACHABLE();
return {Constant(t_uint, 0), Type::Uint};
}
const Id scope = Constant(t_uint, static_cast<u32>(spv::Scope::Device)); const Id scope = Constant(t_uint, static_cast<u32>(spv::Scope::Device));
const Id semantics = Constant(t_uint, 0U); const Id semantics = Constant(t_uint, 0U);
@ -2243,6 +2227,22 @@ private:
return {}; return {};
} }
Id GetGlobalMemoryPointer(const GmemNode& gmem) {
const Id real = AsUint(Visit(gmem.GetRealAddress()));
const Id base = AsUint(Visit(gmem.GetBaseAddress()));
const Id diff = OpISub(t_uint, real, base);
const Id offset = OpShiftRightLogical(t_uint, diff, Constant(t_uint, 2));
const Id buffer = global_buffers.at(gmem.GetDescriptor());
return OpAccessChain(t_gmem_uint, buffer, Constant(t_uint, 0), offset);
}
Id GetSharedMemoryPointer(const SmemNode& smem) {
ASSERT(stage == ShaderType::Compute);
Id address = AsUint(Visit(smem.GetAddress()));
address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U));
return OpAccessChain(t_smem_uint, shared_memory, address);
}
static constexpr std::array operation_decompilers = { static constexpr std::array operation_decompilers = {
&SPIRVDecompiler::Assign, &SPIRVDecompiler::Assign,
@ -2389,7 +2389,7 @@ private:
&SPIRVDecompiler::AtomicImageXor, &SPIRVDecompiler::AtomicImageXor,
&SPIRVDecompiler::AtomicImageExchange, &SPIRVDecompiler::AtomicImageExchange,
&SPIRVDecompiler::UAtomicAdd, &SPIRVDecompiler::AtomicAdd,
&SPIRVDecompiler::Branch, &SPIRVDecompiler::Branch,
&SPIRVDecompiler::BranchIndirect, &SPIRVDecompiler::BranchIndirect,
@ -2485,9 +2485,9 @@ private:
Id t_smem_uint{}; Id t_smem_uint{};
const Id t_gmem_float = TypePointer(spv::StorageClass::StorageBuffer, t_float); const Id t_gmem_uint = TypePointer(spv::StorageClass::StorageBuffer, t_uint);
const Id t_gmem_array = const Id t_gmem_array =
Name(Decorate(TypeRuntimeArray(t_float), spv::Decoration::ArrayStride, 4U), "GmemArray"); Name(Decorate(TypeRuntimeArray(t_uint), spv::Decoration::ArrayStride, 4U), "GmemArray");
const Id t_gmem_struct = MemberDecorate( const Id t_gmem_struct = MemberDecorate(
Decorate(TypeStruct(t_gmem_array), spv::Decoration::Block), 0, spv::Decoration::Offset, 0); Decorate(TypeStruct(t_gmem_array), spv::Decoration::Block), 0, spv::Decoration::Offset, 0);
const Id t_gmem_ssbo = TypePointer(spv::StorageClass::StorageBuffer, t_gmem_struct); const Id t_gmem_ssbo = TypePointer(spv::StorageClass::StorageBuffer, t_gmem_struct);

View file

@ -19,6 +19,8 @@ namespace VideoCommon::Shader {
using Tegra::Shader::AtomicOp; using Tegra::Shader::AtomicOp;
using Tegra::Shader::AtomicType; using Tegra::Shader::AtomicType;
using Tegra::Shader::Attribute; using Tegra::Shader::Attribute;
using Tegra::Shader::GlobalAtomicOp;
using Tegra::Shader::GlobalAtomicType;
using Tegra::Shader::Instruction; using Tegra::Shader::Instruction;
using Tegra::Shader::OpCode; using Tegra::Shader::OpCode;
using Tegra::Shader::Register; using Tegra::Shader::Register;
@ -335,6 +337,24 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
} }
break; break;
} }
case OpCode::Id::ATOM: {
UNIMPLEMENTED_IF_MSG(instr.atom.operation != GlobalAtomicOp::Add, "operation={}",
static_cast<int>(instr.atom.operation.Value()));
UNIMPLEMENTED_IF_MSG(instr.atom.type != GlobalAtomicType::S32, "type={}",
static_cast<int>(instr.atom.type.Value()));
const auto [real_address, base_address, descriptor] =
TrackGlobalMemory(bb, instr, true, true);
if (!real_address || !base_address) {
// Tracking failed, skip atomic.
break;
}
Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
Node value = Operation(OperationCode::AtomicAdd, std::move(gmem), GetRegister(instr.gpr20));
SetRegister(bb, instr.gpr0, std::move(value));
break;
}
case OpCode::Id::ATOMS: { case OpCode::Id::ATOMS: {
UNIMPLEMENTED_IF_MSG(instr.atoms.operation != AtomicOp::Add, "operation={}", UNIMPLEMENTED_IF_MSG(instr.atoms.operation != AtomicOp::Add, "operation={}",
static_cast<int>(instr.atoms.operation.Value())); static_cast<int>(instr.atoms.operation.Value()));
@ -348,7 +368,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
Node memory = GetSharedMemory(std::move(address)); Node memory = GetSharedMemory(std::move(address));
Node data = GetRegister(instr.gpr20); Node data = GetRegister(instr.gpr20);
Node value = Operation(OperationCode::UAtomicAdd, std::move(memory), std::move(data)); Node value = Operation(OperationCode::AtomicAdd, std::move(memory), std::move(data));
SetRegister(bb, instr.gpr0, std::move(value)); SetRegister(bb, instr.gpr0, std::move(value));
break; break;
} }

View file

@ -162,7 +162,7 @@ enum class OperationCode {
AtomicImageXor, /// (MetaImage, int[N] coords) -> void AtomicImageXor, /// (MetaImage, int[N] coords) -> void
AtomicImageExchange, /// (MetaImage, int[N] coords) -> void AtomicImageExchange, /// (MetaImage, int[N] coords) -> void
UAtomicAdd, /// (smem, uint) -> uint AtomicAdd, /// (memory, {u}int) -> {u}int
Branch, /// (uint branch_target) -> void Branch, /// (uint branch_target) -> void
BranchIndirect, /// (uint branch_target) -> void BranchIndirect, /// (uint branch_target) -> void