Merge pull request #3612 from ReinUsesLisp/red

shader/memory: Implement RED.E.ADD and minor changes to ATOM
This commit is contained in:
Fernando Sahmkow 2020-04-15 15:03:49 -04:00 committed by GitHub
commit e33196d4e7
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 140 additions and 69 deletions

View file

@ -1005,6 +1005,12 @@ union Instruction {
BitField<46, 2, u64> cache_mode; BitField<46, 2, u64> cache_mode;
} stg; } stg;
union {
BitField<23, 3, AtomicOp> operation;
BitField<48, 1, u64> extended;
BitField<20, 3, GlobalAtomicType> type;
} red;
union { union {
BitField<52, 4, AtomicOp> operation; BitField<52, 4, AtomicOp> operation;
BitField<49, 3, GlobalAtomicType> type; BitField<49, 3, GlobalAtomicType> type;
@ -1787,6 +1793,7 @@ public:
ST_S, ST_S,
ST, // Store in generic memory ST, // Store in generic memory
STG, // Store in global memory STG, // Store in global memory
RED, // Reduction operation
ATOM, // Atomic operation on global memory ATOM, // Atomic operation on global memory
ATOMS, // Atomic operation on shared memory ATOMS, // Atomic operation on shared memory
AL2P, // Transforms attribute memory into physical memory AL2P, // Transforms attribute memory into physical memory
@ -2097,6 +2104,7 @@ private:
INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"), INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"),
INST("101-------------", Id::ST, Type::Memory, "ST"), INST("101-------------", Id::ST, Type::Memory, "ST"),
INST("1110111011011---", Id::STG, Type::Memory, "STG"), INST("1110111011011---", Id::STG, Type::Memory, "STG"),
INST("1110101111111---", Id::RED, Type::Memory, "RED"),
INST("11101101--------", Id::ATOM, Type::Memory, "ATOM"), INST("11101101--------", Id::ATOM, Type::Memory, "ATOM"),
INST("11101100--------", Id::ATOMS, Type::Memory, "ATOMS"), INST("11101100--------", Id::ATOMS, Type::Memory, "ATOMS"),
INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"), INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"),

View file

@ -2119,8 +2119,14 @@ private:
return {}; return {};
} }
return {fmt::format("atomic{}({}, {})", opname, Visit(operation[0]).GetCode(), return {fmt::format("atomic{}({}, {})", opname, Visit(operation[0]).GetCode(),
Visit(operation[1]).As(type)), Visit(operation[1]).AsUint()),
type}; Type::Uint};
}
template <const std::string_view& opname, Type type>
Expression Reduce(Operation operation) {
code.AddLine("{};", Atomic<opname, type>(operation).GetCode());
return {};
} }
Expression Branch(Operation operation) { Expression Branch(Operation operation) {
@ -2479,6 +2485,20 @@ private:
&GLSLDecompiler::Atomic<Func::Or, Type::Int>, &GLSLDecompiler::Atomic<Func::Or, Type::Int>,
&GLSLDecompiler::Atomic<Func::Xor, Type::Int>, &GLSLDecompiler::Atomic<Func::Xor, Type::Int>,
&GLSLDecompiler::Reduce<Func::Add, Type::Uint>,
&GLSLDecompiler::Reduce<Func::Min, Type::Uint>,
&GLSLDecompiler::Reduce<Func::Max, Type::Uint>,
&GLSLDecompiler::Reduce<Func::And, Type::Uint>,
&GLSLDecompiler::Reduce<Func::Or, Type::Uint>,
&GLSLDecompiler::Reduce<Func::Xor, Type::Uint>,
&GLSLDecompiler::Reduce<Func::Add, Type::Int>,
&GLSLDecompiler::Reduce<Func::Min, Type::Int>,
&GLSLDecompiler::Reduce<Func::Max, Type::Int>,
&GLSLDecompiler::Reduce<Func::And, Type::Int>,
&GLSLDecompiler::Reduce<Func::Or, Type::Int>,
&GLSLDecompiler::Reduce<Func::Xor, Type::Int>,
&GLSLDecompiler::Branch, &GLSLDecompiler::Branch,
&GLSLDecompiler::BranchIndirect, &GLSLDecompiler::BranchIndirect,
&GLSLDecompiler::PushFlowStack, &GLSLDecompiler::PushFlowStack,

View file

@ -1938,11 +1938,8 @@ private:
return {}; return {};
} }
template <Id (Module::*func)(Id, Id, Id, Id, Id), Type result_type, template <Id (Module::*func)(Id, Id, Id, Id, Id)>
Type value_type = result_type>
Expression Atomic(Operation operation) { Expression Atomic(Operation operation) {
const Id type_def = GetTypeDefinition(result_type);
Id pointer; Id pointer;
if (const auto smem = std::get_if<SmemNode>(&*operation[0])) { if (const auto smem = std::get_if<SmemNode>(&*operation[0])) {
pointer = GetSharedMemoryPointer(*smem); pointer = GetSharedMemoryPointer(*smem);
@ -1950,15 +1947,19 @@ private:
pointer = GetGlobalMemoryPointer(*gmem); pointer = GetGlobalMemoryPointer(*gmem);
} else { } else {
UNREACHABLE(); UNREACHABLE();
return {Constant(type_def, 0), result_type}; return {v_float_zero, Type::Float};
} }
const Id value = As(Visit(operation[1]), value_type);
const Id scope = Constant(t_uint, static_cast<u32>(spv::Scope::Device)); const Id scope = Constant(t_uint, static_cast<u32>(spv::Scope::Device));
const Id semantics = Constant(type_def, 0); const Id semantics = Constant(t_uint, 0);
const Id value = AsUint(Visit(operation[1]));
return {(this->*func)(type_def, pointer, scope, semantics, value), result_type}; return {(this->*func)(t_uint, pointer, scope, semantics, value), Type::Uint};
}
template <Id (Module::*func)(Id, Id, Id, Id, Id)>
Expression Reduce(Operation operation) {
Atomic<func>(operation);
return {};
} }
Expression Branch(Operation operation) { Expression Branch(Operation operation) {
@ -2547,21 +2548,35 @@ private:
&SPIRVDecompiler::AtomicImageXor, &SPIRVDecompiler::AtomicImageXor,
&SPIRVDecompiler::AtomicImageExchange, &SPIRVDecompiler::AtomicImageExchange,
&SPIRVDecompiler::Atomic<&Module::OpAtomicExchange, Type::Uint>, &SPIRVDecompiler::Atomic<&Module::OpAtomicExchange>,
&SPIRVDecompiler::Atomic<&Module::OpAtomicIAdd, Type::Uint>, &SPIRVDecompiler::Atomic<&Module::OpAtomicIAdd>,
&SPIRVDecompiler::Atomic<&Module::OpAtomicUMin, Type::Uint>, &SPIRVDecompiler::Atomic<&Module::OpAtomicUMin>,
&SPIRVDecompiler::Atomic<&Module::OpAtomicUMax, Type::Uint>, &SPIRVDecompiler::Atomic<&Module::OpAtomicUMax>,
&SPIRVDecompiler::Atomic<&Module::OpAtomicAnd, Type::Uint>, &SPIRVDecompiler::Atomic<&Module::OpAtomicAnd>,
&SPIRVDecompiler::Atomic<&Module::OpAtomicOr, Type::Uint>, &SPIRVDecompiler::Atomic<&Module::OpAtomicOr>,
&SPIRVDecompiler::Atomic<&Module::OpAtomicXor, Type::Uint>, &SPIRVDecompiler::Atomic<&Module::OpAtomicXor>,
&SPIRVDecompiler::Atomic<&Module::OpAtomicExchange, Type::Int>, &SPIRVDecompiler::Atomic<&Module::OpAtomicExchange>,
&SPIRVDecompiler::Atomic<&Module::OpAtomicIAdd, Type::Int>, &SPIRVDecompiler::Atomic<&Module::OpAtomicIAdd>,
&SPIRVDecompiler::Atomic<&Module::OpAtomicSMin, Type::Int>, &SPIRVDecompiler::Atomic<&Module::OpAtomicSMin>,
&SPIRVDecompiler::Atomic<&Module::OpAtomicSMax, Type::Int>, &SPIRVDecompiler::Atomic<&Module::OpAtomicSMax>,
&SPIRVDecompiler::Atomic<&Module::OpAtomicAnd, Type::Int>, &SPIRVDecompiler::Atomic<&Module::OpAtomicAnd>,
&SPIRVDecompiler::Atomic<&Module::OpAtomicOr, Type::Int>, &SPIRVDecompiler::Atomic<&Module::OpAtomicOr>,
&SPIRVDecompiler::Atomic<&Module::OpAtomicXor, Type::Int>, &SPIRVDecompiler::Atomic<&Module::OpAtomicXor>,
&SPIRVDecompiler::Reduce<&Module::OpAtomicIAdd>,
&SPIRVDecompiler::Reduce<&Module::OpAtomicUMin>,
&SPIRVDecompiler::Reduce<&Module::OpAtomicUMax>,
&SPIRVDecompiler::Reduce<&Module::OpAtomicAnd>,
&SPIRVDecompiler::Reduce<&Module::OpAtomicOr>,
&SPIRVDecompiler::Reduce<&Module::OpAtomicXor>,
&SPIRVDecompiler::Reduce<&Module::OpAtomicIAdd>,
&SPIRVDecompiler::Reduce<&Module::OpAtomicSMin>,
&SPIRVDecompiler::Reduce<&Module::OpAtomicSMax>,
&SPIRVDecompiler::Reduce<&Module::OpAtomicAnd>,
&SPIRVDecompiler::Reduce<&Module::OpAtomicOr>,
&SPIRVDecompiler::Reduce<&Module::OpAtomicXor>,
&SPIRVDecompiler::Branch, &SPIRVDecompiler::Branch,
&SPIRVDecompiler::BranchIndirect, &SPIRVDecompiler::BranchIndirect,

View file

@ -3,7 +3,9 @@
// Refer to the license.txt file included. // Refer to the license.txt file included.
#include <algorithm> #include <algorithm>
#include <utility>
#include <vector> #include <vector>
#include <fmt/format.h> #include <fmt/format.h>
#include "common/alignment.h" #include "common/alignment.h"
@ -16,6 +18,7 @@
namespace VideoCommon::Shader { namespace VideoCommon::Shader {
using std::move;
using Tegra::Shader::AtomicOp; using Tegra::Shader::AtomicOp;
using Tegra::Shader::AtomicType; using Tegra::Shader::AtomicType;
using Tegra::Shader::Attribute; using Tegra::Shader::Attribute;
@ -27,29 +30,26 @@ using Tegra::Shader::StoreType;
namespace { namespace {
Node GetAtomOperation(AtomicOp op, bool is_signed, Node memory, Node data) { OperationCode GetAtomOperation(AtomicOp op) {
const OperationCode operation_code = [op] { switch (op) {
switch (op) { case AtomicOp::Add:
case AtomicOp::Add: return OperationCode::AtomicIAdd;
return OperationCode::AtomicIAdd; case AtomicOp::Min:
case AtomicOp::Min: return OperationCode::AtomicIMin;
return OperationCode::AtomicIMin; case AtomicOp::Max:
case AtomicOp::Max: return OperationCode::AtomicIMax;
return OperationCode::AtomicIMax; case AtomicOp::And:
case AtomicOp::And: return OperationCode::AtomicIAnd;
return OperationCode::AtomicIAnd; case AtomicOp::Or:
case AtomicOp::Or: return OperationCode::AtomicIOr;
return OperationCode::AtomicIOr; case AtomicOp::Xor:
case AtomicOp::Xor: return OperationCode::AtomicIXor;
return OperationCode::AtomicIXor; case AtomicOp::Exch:
case AtomicOp::Exch: return OperationCode::AtomicIExchange;
return OperationCode::AtomicIExchange; default:
default: UNIMPLEMENTED_MSG("op={}", static_cast<int>(op));
UNIMPLEMENTED_MSG("op={}", static_cast<int>(op)); return OperationCode::AtomicIAdd;
return OperationCode::AtomicIAdd; }
}
}();
return SignedOperation(operation_code, is_signed, std::move(memory), std::move(data));
} }
bool IsUnaligned(Tegra::Shader::UniformType uniform_type) { bool IsUnaligned(Tegra::Shader::UniformType uniform_type) {
@ -90,23 +90,22 @@ u32 GetMemorySize(Tegra::Shader::UniformType uniform_type) {
Node ExtractUnaligned(Node value, Node address, u32 mask, u32 size) { Node ExtractUnaligned(Node value, Node address, u32 mask, u32 size) {
Node offset = Operation(OperationCode::UBitwiseAnd, address, Immediate(mask)); Node offset = Operation(OperationCode::UBitwiseAnd, address, Immediate(mask));
offset = Operation(OperationCode::ULogicalShiftLeft, std::move(offset), Immediate(3)); offset = Operation(OperationCode::ULogicalShiftLeft, move(offset), Immediate(3));
return Operation(OperationCode::UBitfieldExtract, std::move(value), std::move(offset), return Operation(OperationCode::UBitfieldExtract, move(value), move(offset), Immediate(size));
Immediate(size));
} }
Node InsertUnaligned(Node dest, Node value, Node address, u32 mask, u32 size) { Node InsertUnaligned(Node dest, Node value, Node address, u32 mask, u32 size) {
Node offset = Operation(OperationCode::UBitwiseAnd, std::move(address), Immediate(mask)); Node offset = Operation(OperationCode::UBitwiseAnd, move(address), Immediate(mask));
offset = Operation(OperationCode::ULogicalShiftLeft, std::move(offset), Immediate(3)); offset = Operation(OperationCode::ULogicalShiftLeft, move(offset), Immediate(3));
return Operation(OperationCode::UBitfieldInsert, std::move(dest), std::move(value), return Operation(OperationCode::UBitfieldInsert, move(dest), move(value), move(offset),
std::move(offset), Immediate(size)); Immediate(size));
} }
Node Sign16Extend(Node value) { Node Sign16Extend(Node value) {
Node sign = Operation(OperationCode::UBitwiseAnd, value, Immediate(1U << 15)); Node sign = Operation(OperationCode::UBitwiseAnd, value, Immediate(1U << 15));
Node is_sign = Operation(OperationCode::LogicalUEqual, std::move(sign), Immediate(1U << 15)); Node is_sign = Operation(OperationCode::LogicalUEqual, move(sign), Immediate(1U << 15));
Node extend = Operation(OperationCode::Select, is_sign, Immediate(0xFFFF0000), Immediate(0)); Node extend = Operation(OperationCode::Select, is_sign, Immediate(0xFFFF0000), Immediate(0));
return Operation(OperationCode::UBitwiseOr, std::move(value), std::move(extend)); return Operation(OperationCode::UBitwiseOr, move(value), move(extend));
} }
} // Anonymous namespace } // Anonymous namespace
@ -379,20 +378,36 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
if (IsUnaligned(type)) { if (IsUnaligned(type)) {
const u32 mask = GetUnalignedMask(type); const u32 mask = GetUnalignedMask(type);
value = InsertUnaligned(gmem, std::move(value), real_address, mask, size); value = InsertUnaligned(gmem, move(value), real_address, mask, size);
} }
bb.push_back(Operation(OperationCode::Assign, gmem, value)); bb.push_back(Operation(OperationCode::Assign, gmem, value));
} }
break; break;
} }
case OpCode::Id::RED: {
UNIMPLEMENTED_IF_MSG(instr.red.type != GlobalAtomicType::U32);
UNIMPLEMENTED_IF_MSG(instr.red.operation != AtomicOp::Add);
const auto [real_address, base_address, descriptor] =
TrackGlobalMemory(bb, instr, true, true);
if (!real_address || !base_address) {
// Tracking failed, skip atomic.
break;
}
Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
Node value = GetRegister(instr.gpr0);
bb.push_back(Operation(OperationCode::ReduceIAdd, move(gmem), move(value)));
break;
}
case OpCode::Id::ATOM: { case OpCode::Id::ATOM: {
UNIMPLEMENTED_IF_MSG(instr.atom.operation == AtomicOp::Inc || UNIMPLEMENTED_IF_MSG(instr.atom.operation == AtomicOp::Inc ||
instr.atom.operation == AtomicOp::Dec || instr.atom.operation == AtomicOp::Dec ||
instr.atom.operation == AtomicOp::SafeAdd, instr.atom.operation == AtomicOp::SafeAdd,
"operation={}", static_cast<int>(instr.atom.operation.Value())); "operation={}", static_cast<int>(instr.atom.operation.Value()));
UNIMPLEMENTED_IF_MSG(instr.atom.type == GlobalAtomicType::S64 || UNIMPLEMENTED_IF_MSG(instr.atom.type == GlobalAtomicType::S64 ||
instr.atom.type == GlobalAtomicType::U64, instr.atom.type == GlobalAtomicType::U64 ||
instr.atom.type == GlobalAtomicType::F16x2_FTZ_RN ||
instr.atom.type == GlobalAtomicType::F32_FTZ_RN,
"type={}", static_cast<int>(instr.atom.type.Value())); "type={}", static_cast<int>(instr.atom.type.Value()));
const auto [real_address, base_address, descriptor] = const auto [real_address, base_address, descriptor] =
@ -403,11 +418,11 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
} }
const bool is_signed = const bool is_signed =
instr.atoms.type == AtomicType::S32 || instr.atoms.type == AtomicType::S64; instr.atom.type == GlobalAtomicType::S32 || instr.atom.type == GlobalAtomicType::S64;
Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor); Node gmem = MakeNode<GmemNode>(real_address, base_address, descriptor);
Node value = GetAtomOperation(static_cast<AtomicOp>(instr.atom.operation), is_signed, gmem, SetRegister(bb, instr.gpr0,
GetRegister(instr.gpr20)); SignedOperation(GetAtomOperation(instr.atom.operation), is_signed, gmem,
SetRegister(bb, instr.gpr0, std::move(value)); GetRegister(instr.gpr20)));
break; break;
} }
case OpCode::Id::ATOMS: { case OpCode::Id::ATOMS: {
@ -421,11 +436,10 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
instr.atoms.type == AtomicType::S32 || instr.atoms.type == AtomicType::S64; instr.atoms.type == AtomicType::S32 || instr.atoms.type == AtomicType::S64;
const s32 offset = instr.atoms.GetImmediateOffset(); const s32 offset = instr.atoms.GetImmediateOffset();
Node address = GetRegister(instr.gpr8); Node address = GetRegister(instr.gpr8);
address = Operation(OperationCode::IAdd, std::move(address), Immediate(offset)); address = Operation(OperationCode::IAdd, move(address), Immediate(offset));
Node value = SetRegister(bb, instr.gpr0,
GetAtomOperation(static_cast<AtomicOp>(instr.atoms.operation), is_signed, SignedOperation(GetAtomOperation(instr.atoms.operation), is_signed,
GetSharedMemory(std::move(address)), GetRegister(instr.gpr20)); GetSharedMemory(move(address)), GetRegister(instr.gpr20)));
SetRegister(bb, instr.gpr0, std::move(value));
break; break;
} }
case OpCode::Id::AL2P: { case OpCode::Id::AL2P: {

View file

@ -178,6 +178,20 @@ enum class OperationCode {
AtomicIOr, /// (memory, int) -> int AtomicIOr, /// (memory, int) -> int
AtomicIXor, /// (memory, int) -> int AtomicIXor, /// (memory, int) -> int
ReduceUAdd, /// (memory, uint) -> void
ReduceUMin, /// (memory, uint) -> void
ReduceUMax, /// (memory, uint) -> void
ReduceUAnd, /// (memory, uint) -> void
ReduceUOr, /// (memory, uint) -> void
ReduceUXor, /// (memory, uint) -> void
ReduceIAdd, /// (memory, int) -> void
ReduceIMin, /// (memory, int) -> void
ReduceIMax, /// (memory, int) -> void
ReduceIAnd, /// (memory, int) -> void
ReduceIOr, /// (memory, int) -> void
ReduceIXor, /// (memory, int) -> void
Branch, /// (uint branch_target) -> void Branch, /// (uint branch_target) -> void
BranchIndirect, /// (uint branch_target) -> void BranchIndirect, /// (uint branch_target) -> void
PushFlowStack, /// (uint branch_target) -> void PushFlowStack, /// (uint branch_target) -> void