Merge pull request #3347 from ReinUsesLisp/local-mem

shader/memory: Implement LDL.S16, LDS.S16, STL.S16 and STS.S16
This commit is contained in:
bunnei 2020-01-30 10:59:52 -05:00 committed by GitHub
commit c593e45dbd
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -24,6 +24,7 @@ using Tegra::Shader::GlobalAtomicType;
using Tegra::Shader::Instruction; using Tegra::Shader::Instruction;
using Tegra::Shader::OpCode; using Tegra::Shader::OpCode;
using Tegra::Shader::Register; using Tegra::Shader::Register;
using Tegra::Shader::StoreType;
namespace { namespace {
@ -63,6 +64,27 @@ u32 GetMemorySize(Tegra::Shader::UniformType uniform_type) {
} }
} }
Node ExtractUnaligned(Node value, Node address, u32 mask, u32 size) {
Node offset = Operation(OperationCode::UBitwiseAnd, address, Immediate(mask));
offset = Operation(OperationCode::ULogicalShiftLeft, std::move(offset), Immediate(3));
return Operation(OperationCode::UBitfieldExtract, std::move(value), std::move(offset),
Immediate(size));
}
Node InsertUnaligned(Node dest, Node value, Node address, u32 mask, u32 size) {
Node offset = Operation(OperationCode::UBitwiseAnd, std::move(address), Immediate(mask));
offset = Operation(OperationCode::ULogicalShiftLeft, std::move(offset), Immediate(3));
return Operation(OperationCode::UBitfieldInsert, std::move(dest), std::move(value),
std::move(offset), Immediate(size));
}
Node Sign16Extend(Node value) {
Node sign = Operation(OperationCode::UBitwiseAnd, value, Immediate(1U << 15));
Node is_sign = Operation(OperationCode::LogicalUEqual, std::move(sign), Immediate(1U << 15));
Node extend = Operation(OperationCode::Select, is_sign, Immediate(0xFFFF0000), Immediate(0));
return Operation(OperationCode::UBitwiseOr, std::move(value), std::move(extend));
}
} // Anonymous namespace } // Anonymous namespace
u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
@ -138,26 +160,31 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
LOG_DEBUG(HW_GPU, "LD_L cache management mode: {}", static_cast<u64>(instr.ld_l.unknown)); LOG_DEBUG(HW_GPU, "LD_L cache management mode: {}", static_cast<u64>(instr.ld_l.unknown));
[[fallthrough]]; [[fallthrough]];
case OpCode::Id::LD_S: { case OpCode::Id::LD_S: {
const auto GetMemory = [&](s32 offset) { const auto GetAddress = [&](s32 offset) {
ASSERT(offset % 4 == 0); ASSERT(offset % 4 == 0);
const Node immediate_offset = Immediate(static_cast<s32>(instr.smem_imm) + offset); const Node immediate_offset = Immediate(static_cast<s32>(instr.smem_imm) + offset);
const Node address = Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8), return Operation(OperationCode::IAdd, GetRegister(instr.gpr8), immediate_offset);
immediate_offset); };
return opcode->get().GetId() == OpCode::Id::LD_S ? GetSharedMemory(address) const auto GetMemory = [&](s32 offset) {
: GetLocalMemory(address); return opcode->get().GetId() == OpCode::Id::LD_S ? GetSharedMemory(GetAddress(offset))
: GetLocalMemory(GetAddress(offset));
}; };
switch (instr.ldst_sl.type.Value()) { switch (instr.ldst_sl.type.Value()) {
case Tegra::Shader::StoreType::Bits32: case StoreType::Signed16:
case Tegra::Shader::StoreType::Bits64: SetRegister(bb, instr.gpr0,
case Tegra::Shader::StoreType::Bits128: { Sign16Extend(ExtractUnaligned(GetMemory(0), GetAddress(0), 0b10, 16)));
const u32 count = [&]() { break;
case StoreType::Bits32:
case StoreType::Bits64:
case StoreType::Bits128: {
const u32 count = [&] {
switch (instr.ldst_sl.type.Value()) { switch (instr.ldst_sl.type.Value()) {
case Tegra::Shader::StoreType::Bits32: case StoreType::Bits32:
return 1; return 1;
case Tegra::Shader::StoreType::Bits64: case StoreType::Bits64:
return 2; return 2;
case Tegra::Shader::StoreType::Bits128: case StoreType::Bits128:
return 4; return 4;
default: default:
UNREACHABLE(); UNREACHABLE();
@ -214,12 +241,7 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
// To handle unaligned loads get the bytes used to dereference global memory and extract // To handle unaligned loads get the bytes used to dereference global memory and extract
// those bytes from the loaded u32. // those bytes from the loaded u32.
if (IsUnaligned(type)) { if (IsUnaligned(type)) {
Node mask = Immediate(GetUnalignedMask(type)); gmem = ExtractUnaligned(gmem, real_address, GetUnalignedMask(type), size);
Node offset = Operation(OperationCode::UBitwiseAnd, real_address, std::move(mask));
offset = Operation(OperationCode::ULogicalShiftLeft, offset, Immediate(3));
gmem = Operation(OperationCode::UBitfieldExtract, std::move(gmem),
std::move(offset), Immediate(size));
} }
SetTemporary(bb, i, gmem); SetTemporary(bb, i, gmem);
@ -271,21 +293,28 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
return Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8), immediate); return Operation(OperationCode::IAdd, NO_PRECISE, GetRegister(instr.gpr8), immediate);
}; };
const auto set_memory = opcode->get().GetId() == OpCode::Id::ST_L const bool is_local = opcode->get().GetId() == OpCode::Id::ST_L;
? &ShaderIR::SetLocalMemory const auto set_memory = is_local ? &ShaderIR::SetLocalMemory : &ShaderIR::SetSharedMemory;
: &ShaderIR::SetSharedMemory; const auto get_memory = is_local ? &ShaderIR::GetLocalMemory : &ShaderIR::GetSharedMemory;
switch (instr.ldst_sl.type.Value()) { switch (instr.ldst_sl.type.Value()) {
case Tegra::Shader::StoreType::Bits128: case StoreType::Bits128:
(this->*set_memory)(bb, GetAddress(12), GetRegister(instr.gpr0.Value() + 3)); (this->*set_memory)(bb, GetAddress(12), GetRegister(instr.gpr0.Value() + 3));
(this->*set_memory)(bb, GetAddress(8), GetRegister(instr.gpr0.Value() + 2)); (this->*set_memory)(bb, GetAddress(8), GetRegister(instr.gpr0.Value() + 2));
[[fallthrough]]; [[fallthrough]];
case Tegra::Shader::StoreType::Bits64: case StoreType::Bits64:
(this->*set_memory)(bb, GetAddress(4), GetRegister(instr.gpr0.Value() + 1)); (this->*set_memory)(bb, GetAddress(4), GetRegister(instr.gpr0.Value() + 1));
[[fallthrough]]; [[fallthrough]];
case Tegra::Shader::StoreType::Bits32: case StoreType::Bits32:
(this->*set_memory)(bb, GetAddress(0), GetRegister(instr.gpr0)); (this->*set_memory)(bb, GetAddress(0), GetRegister(instr.gpr0));
break; break;
case StoreType::Signed16: {
Node address = GetAddress(0);
Node memory = (this->*get_memory)(address);
(this->*set_memory)(
bb, address, InsertUnaligned(memory, GetRegister(instr.gpr0), address, 0b10, 16));
break;
}
default: default:
UNIMPLEMENTED_MSG("{} unhandled type: {}", opcode->get().GetName(), UNIMPLEMENTED_MSG("{} unhandled type: {}", opcode->get().GetName(),
static_cast<u32>(instr.ldst_sl.type.Value())); static_cast<u32>(instr.ldst_sl.type.Value()));
@ -325,12 +354,8 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
Node value = GetRegister(instr.gpr0.Value() + i); Node value = GetRegister(instr.gpr0.Value() + i);
if (IsUnaligned(type)) { if (IsUnaligned(type)) {
Node mask = Immediate(GetUnalignedMask(type)); const u32 mask = GetUnalignedMask(type);
Node offset = Operation(OperationCode::UBitwiseAnd, real_address, std::move(mask)); value = InsertUnaligned(gmem, std::move(value), real_address, mask, size);
offset = Operation(OperationCode::ULogicalShiftLeft, offset, Immediate(3));
value = Operation(OperationCode::UBitfieldInsert, gmem, std::move(value), offset,
Immediate(size));
} }
bb.push_back(Operation(OperationCode::Assign, gmem, value)); bb.push_back(Operation(OperationCode::Assign, gmem, value));