Merge pull request #3350 from ReinUsesLisp/atom
shader/memory: Implement ATOM.ADD
This commit is contained in:
commit
2db7adc42a
5 changed files with 86 additions and 39 deletions
|
@ -227,6 +227,28 @@ enum class AtomicOp : u64 {
|
||||||
Exch = 8,
|
Exch = 8,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
enum class GlobalAtomicOp : u64 {
|
||||||
|
Add = 0,
|
||||||
|
Min = 1,
|
||||||
|
Max = 2,
|
||||||
|
Inc = 3,
|
||||||
|
Dec = 4,
|
||||||
|
And = 5,
|
||||||
|
Or = 6,
|
||||||
|
Xor = 7,
|
||||||
|
Exch = 8,
|
||||||
|
SafeAdd = 10,
|
||||||
|
};
|
||||||
|
|
||||||
|
enum class GlobalAtomicType : u64 {
|
||||||
|
U32 = 0,
|
||||||
|
S32 = 1,
|
||||||
|
U64 = 2,
|
||||||
|
F32_FTZ_RN = 3,
|
||||||
|
F16x2_FTZ_RN = 4,
|
||||||
|
S64 = 5,
|
||||||
|
};
|
||||||
|
|
||||||
enum class UniformType : u64 {
|
enum class UniformType : u64 {
|
||||||
UnsignedByte = 0,
|
UnsignedByte = 0,
|
||||||
SignedByte = 1,
|
SignedByte = 1,
|
||||||
|
@ -957,6 +979,12 @@ union Instruction {
|
||||||
BitField<46, 2, u64> cache_mode;
|
BitField<46, 2, u64> cache_mode;
|
||||||
} stg;
|
} stg;
|
||||||
|
|
||||||
|
union {
|
||||||
|
BitField<52, 4, GlobalAtomicOp> operation;
|
||||||
|
BitField<49, 3, GlobalAtomicType> type;
|
||||||
|
BitField<28, 20, s64> offset;
|
||||||
|
} atom;
|
||||||
|
|
||||||
union {
|
union {
|
||||||
BitField<52, 4, AtomicOp> operation;
|
BitField<52, 4, AtomicOp> operation;
|
||||||
BitField<28, 2, AtomicType> type;
|
BitField<28, 2, AtomicType> type;
|
||||||
|
@ -1690,6 +1718,7 @@ public:
|
||||||
ST_S,
|
ST_S,
|
||||||
ST, // Store in generic memory
|
ST, // Store in generic memory
|
||||||
STG, // Store in global memory
|
STG, // Store in global memory
|
||||||
|
ATOM, // Atomic operation on global memory
|
||||||
ATOMS, // Atomic operation on shared memory
|
ATOMS, // Atomic operation on shared memory
|
||||||
AL2P, // Transforms attribute memory into physical memory
|
AL2P, // Transforms attribute memory into physical memory
|
||||||
TEX,
|
TEX,
|
||||||
|
@ -1994,6 +2023,7 @@ private:
|
||||||
INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"),
|
INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"),
|
||||||
INST("101-------------", Id::ST, Type::Memory, "ST"),
|
INST("101-------------", Id::ST, Type::Memory, "ST"),
|
||||||
INST("1110111011011---", Id::STG, Type::Memory, "STG"),
|
INST("1110111011011---", Id::STG, Type::Memory, "STG"),
|
||||||
|
INST("11101101--------", Id::ATOM, Type::Memory, "ATOM"),
|
||||||
INST("11101100--------", Id::ATOMS, Type::Memory, "ATOMS"),
|
INST("11101100--------", Id::ATOMS, Type::Memory, "ATOMS"),
|
||||||
INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"),
|
INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"),
|
||||||
INST("110000----111---", Id::TEX, Type::Texture, "TEX"),
|
INST("110000----111---", Id::TEX, Type::Texture, "TEX"),
|
||||||
|
|
|
@ -1857,10 +1857,7 @@ private:
|
||||||
|
|
||||||
template <const std::string_view& opname, Type type>
|
template <const std::string_view& opname, Type type>
|
||||||
Expression Atomic(Operation operation) {
|
Expression Atomic(Operation operation) {
|
||||||
ASSERT(stage == ShaderType::Compute);
|
return {fmt::format("atomic{}({}, {})", opname, Visit(operation[0]).GetCode(),
|
||||||
auto& smem = std::get<SmemNode>(*operation[0]);
|
|
||||||
|
|
||||||
return {fmt::format("atomic{}(smem[{} >> 2], {})", opname, Visit(smem.GetAddress()).AsInt(),
|
|
||||||
Visit(operation[1]).As(type)),
|
Visit(operation[1]).As(type)),
|
||||||
type};
|
type};
|
||||||
}
|
}
|
||||||
|
|
|
@ -1123,15 +1123,7 @@ private:
|
||||||
}
|
}
|
||||||
|
|
||||||
if (const auto gmem = std::get_if<GmemNode>(&*node)) {
|
if (const auto gmem = std::get_if<GmemNode>(&*node)) {
|
||||||
const Id gmem_buffer = global_buffers.at(gmem->GetDescriptor());
|
return {OpLoad(t_uint, GetGlobalMemoryPointer(*gmem)), Type::Uint};
|
||||||
const Id real = AsUint(Visit(gmem->GetRealAddress()));
|
|
||||||
const Id base = AsUint(Visit(gmem->GetBaseAddress()));
|
|
||||||
|
|
||||||
Id offset = OpISub(t_uint, real, base);
|
|
||||||
offset = OpUDiv(t_uint, offset, Constant(t_uint, 4U));
|
|
||||||
return {OpLoad(t_float,
|
|
||||||
OpAccessChain(t_gmem_float, gmem_buffer, Constant(t_uint, 0U), offset)),
|
|
||||||
Type::Float};
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (const auto lmem = std::get_if<LmemNode>(&*node)) {
|
if (const auto lmem = std::get_if<LmemNode>(&*node)) {
|
||||||
|
@ -1142,10 +1134,7 @@ private:
|
||||||
}
|
}
|
||||||
|
|
||||||
if (const auto smem = std::get_if<SmemNode>(&*node)) {
|
if (const auto smem = std::get_if<SmemNode>(&*node)) {
|
||||||
Id address = AsUint(Visit(smem->GetAddress()));
|
return {OpLoad(t_uint, GetSharedMemoryPointer(*smem)), Type::Uint};
|
||||||
address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U));
|
|
||||||
const Id pointer = OpAccessChain(t_smem_uint, shared_memory, address);
|
|
||||||
return {OpLoad(t_uint, pointer), Type::Uint};
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) {
|
if (const auto internal_flag = std::get_if<InternalFlagNode>(&*node)) {
|
||||||
|
@ -1339,20 +1328,10 @@ private:
|
||||||
target = {OpAccessChain(t_prv_float, local_memory, address), Type::Float};
|
target = {OpAccessChain(t_prv_float, local_memory, address), Type::Float};
|
||||||
|
|
||||||
} else if (const auto smem = std::get_if<SmemNode>(&*dest)) {
|
} else if (const auto smem = std::get_if<SmemNode>(&*dest)) {
|
||||||
ASSERT(stage == ShaderType::Compute);
|
target = {GetSharedMemoryPointer(*smem), Type::Uint};
|
||||||
Id address = AsUint(Visit(smem->GetAddress()));
|
|
||||||
address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U));
|
|
||||||
target = {OpAccessChain(t_smem_uint, shared_memory, address), Type::Uint};
|
|
||||||
|
|
||||||
} else if (const auto gmem = std::get_if<GmemNode>(&*dest)) {
|
} else if (const auto gmem = std::get_if<GmemNode>(&*dest)) {
|
||||||
const Id real = AsUint(Visit(gmem->GetRealAddress()));
|
target = {GetGlobalMemoryPointer(*gmem), Type::Uint};
|
||||||
const Id base = AsUint(Visit(gmem->GetBaseAddress()));
|
|
||||||
const Id diff = OpISub(t_uint, real, base);
|
|
||||||
const Id offset = OpShiftRightLogical(t_uint, diff, Constant(t_uint, 2));
|
|
||||||
|
|
||||||
const Id gmem_buffer = global_buffers.at(gmem->GetDescriptor());
|
|
||||||
target = {OpAccessChain(t_gmem_float, gmem_buffer, Constant(t_uint, 0), offset),
|
|
||||||
Type::Float};
|
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
UNIMPLEMENTED();
|
UNIMPLEMENTED();
|
||||||
|
@ -1804,11 +1783,16 @@ private:
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
Expression UAtomicAdd(Operation operation) {
|
Expression AtomicAdd(Operation operation) {
|
||||||
const auto& smem = std::get<SmemNode>(*operation[0]);
|
Id pointer;
|
||||||
Id address = AsUint(Visit(smem.GetAddress()));
|
if (const auto smem = std::get_if<SmemNode>(&*operation[0])) {
|
||||||
address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U));
|
pointer = GetSharedMemoryPointer(*smem);
|
||||||
const Id pointer = OpAccessChain(t_smem_uint, shared_memory, address);
|
} else if (const auto gmem = std::get_if<GmemNode>(&*operation[0])) {
|
||||||
|
pointer = GetGlobalMemoryPointer(*gmem);
|
||||||
|
} else {
|
||||||
|
UNREACHABLE();
|
||||||
|
return {Constant(t_uint, 0), Type::Uint};
|
||||||
|
}
|
||||||
|
|
||||||
const Id scope = Constant(t_uint, static_cast<u32>(spv::Scope::Device));
|
const Id scope = Constant(t_uint, static_cast<u32>(spv::Scope::Device));
|
||||||
const Id semantics = Constant(t_uint, 0U);
|
const Id semantics = Constant(t_uint, 0U);
|
||||||
|
@ -2243,6 +2227,22 @@ private:
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Id GetGlobalMemoryPointer(const GmemNode& gmem) {
|
||||||
|
const Id real = AsUint(Visit(gmem.GetRealAddress()));
|
||||||
|
const Id base = AsUint(Visit(gmem.GetBaseAddress()));
|
||||||
|
const Id diff = OpISub(t_uint, real, base);
|
||||||
|
const Id offset = OpShiftRightLogical(t_uint, diff, Constant(t_uint, 2));
|
||||||
|
const Id buffer = global_buffers.at(gmem.GetDescriptor());
|
||||||
|
return OpAccessChain(t_gmem_uint, buffer, Constant(t_uint, 0), offset);
|
||||||
|
}
|
||||||
|
|
||||||
|
Id GetSharedMemoryPointer(const SmemNode& smem) {
|
||||||
|
ASSERT(stage == ShaderType::Compute);
|
||||||
|
Id address = AsUint(Visit(smem.GetAddress()));
|
||||||
|
address = OpShiftRightLogical(t_uint, address, Constant(t_uint, 2U));
|
||||||
|
return OpAccessChain(t_smem_uint, shared_memory, address);
|
||||||
|
}
|
||||||
|
|
||||||
static constexpr std::array operation_decompilers = {
|
static constexpr std::array operation_decompilers = {
|
||||||
&SPIRVDecompiler::Assign,
|
&SPIRVDecompiler::Assign,
|
||||||
|
|
||||||
|
@ -2389,7 +2389,7 @@ private:
|
||||||
&SPIRVDecompiler::AtomicImageXor,
|
&SPIRVDecompiler::AtomicImageXor,
|
||||||
&SPIRVDecompiler::AtomicImageExchange,
|
&SPIRVDecompiler::AtomicImageExchange,
|
||||||
|
|
||||||
&SPIRVDecompiler::UAtomicAdd,
|
&SPIRVDecompiler::AtomicAdd,
|
||||||
|
|
||||||
&SPIRVDecompiler::Branch,
|
&SPIRVDecompiler::Branch,
|
||||||
&SPIRVDecompiler::BranchIndirect,
|
&SPIRVDecompiler::BranchIndirect,
|
||||||
|
@ -2485,9 +2485,9 @@ private:
|
||||||
|
|
||||||
Id t_smem_uint{};
|
Id t_smem_uint{};
|
||||||
|
|
||||||
const Id t_gmem_float = TypePointer(spv::StorageClass::StorageBuffer, t_float);
|
const Id t_gmem_uint = TypePointer(spv::StorageClass::StorageBuffer, t_uint);
|
||||||
const Id t_gmem_array =
|
const Id t_gmem_array =
|
||||||
Name(Decorate(TypeRuntimeArray(t_float), spv::Decoration::ArrayStride, 4U), "GmemArray");
|
Name(Decorate(TypeRuntimeArray(t_uint), spv::Decoration::ArrayStride, 4U), "GmemArray");
|
||||||
const Id t_gmem_struct = MemberDecorate(
|
const Id t_gmem_struct = MemberDecorate(
|
||||||
Decorate(TypeStruct(t_gmem_array), spv::Decoration::Block), 0, spv::Decoration::Offset, 0);
|
Decorate(TypeStruct(t_gmem_array), spv::Decoration::Block), 0, spv::Decoration::Offset, 0);
|
||||||
const Id t_gmem_ssbo = TypePointer(spv::StorageClass::StorageBuffer, t_gmem_struct);
|
const Id t_gmem_ssbo = TypePointer(spv::StorageClass::StorageBuffer, t_gmem_struct);
|
||||||
|
|
|
@ -19,6 +19,8 @@ namespace VideoCommon::Shader {
|
||||||
using Tegra::Shader::AtomicOp;
|
using Tegra::Shader::AtomicOp;
|
||||||
using Tegra::Shader::AtomicType;
|
using Tegra::Shader::AtomicType;
|
||||||
using Tegra::Shader::Attribute;
|
using Tegra::Shader::Attribute;
|
||||||
|
using Tegra::Shader::GlobalAtomicOp;
|
||||||
|
using Tegra::Shader::GlobalAtomicType;
|
||||||
using Tegra::Shader::Instruction;
|
using Tegra::Shader::Instruction;
|
||||||
using Tegra::Shader::OpCode;
|
using Tegra::Shader::OpCode;
|
||||||
using Tegra::Shader::Register;
|
using Tegra::Shader::Register;
|
||||||
|
@ -335,6 +337,24 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case OpCode::Id::ATOM: {
|
||||||
|
UNIMPLEMENTED_IF_MSG(instr.atom.operation != GlobalAtomicOp::Add, "operation={}",
|
||||||
|
static_cast<int>(instr.atom.operation.Value()));
|
||||||
|
UNIMPLEMENTED_IF_MSG(instr.atom.type != GlobalAtomicType::S32, "type={}",
|
||||||
|
static_cast<int>(instr.atom.type.Value()));
|
||||||
|
|
||||||
|
const auto [real_address, base_address, descriptor] =
|
||||||
|
TrackGlobalMemory(bb, instr, true, true);
|
||||||
|
if (!real_address || !base_address) {
|
||||||
|
// Tracking failed, skip atomic.
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
|
||||||
|
Node value = Operation(OperationCode::AtomicAdd, std::move(gmem), GetRegister(instr.gpr20));
|
||||||
|
SetRegister(bb, instr.gpr0, std::move(value));
|
||||||
|
break;
|
||||||
|
}
|
||||||
case OpCode::Id::ATOMS: {
|
case OpCode::Id::ATOMS: {
|
||||||
UNIMPLEMENTED_IF_MSG(instr.atoms.operation != AtomicOp::Add, "operation={}",
|
UNIMPLEMENTED_IF_MSG(instr.atoms.operation != AtomicOp::Add, "operation={}",
|
||||||
static_cast<int>(instr.atoms.operation.Value()));
|
static_cast<int>(instr.atoms.operation.Value()));
|
||||||
|
@ -348,7 +368,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
|
||||||
Node memory = GetSharedMemory(std::move(address));
|
Node memory = GetSharedMemory(std::move(address));
|
||||||
Node data = GetRegister(instr.gpr20);
|
Node data = GetRegister(instr.gpr20);
|
||||||
|
|
||||||
Node value = Operation(OperationCode::UAtomicAdd, std::move(memory), std::move(data));
|
Node value = Operation(OperationCode::AtomicAdd, std::move(memory), std::move(data));
|
||||||
SetRegister(bb, instr.gpr0, std::move(value));
|
SetRegister(bb, instr.gpr0, std::move(value));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
|
@ -162,7 +162,7 @@ enum class OperationCode {
|
||||||
AtomicImageXor, /// (MetaImage, int[N] coords) -> void
|
AtomicImageXor, /// (MetaImage, int[N] coords) -> void
|
||||||
AtomicImageExchange, /// (MetaImage, int[N] coords) -> void
|
AtomicImageExchange, /// (MetaImage, int[N] coords) -> void
|
||||||
|
|
||||||
UAtomicAdd, /// (smem, uint) -> uint
|
AtomicAdd, /// (memory, {u}int) -> {u}int
|
||||||
|
|
||||||
Branch, /// (uint branch_target) -> void
|
Branch, /// (uint branch_target) -> void
|
||||||
BranchIndirect, /// (uint branch_target) -> void
|
BranchIndirect, /// (uint branch_target) -> void
|
||||||
|
|
Loading…
Reference in a new issue