diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 7bbc556da..e83f25fa1 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -529,6 +529,11 @@ union Instruction { BitField<39, 8, Register> gpr39; BitField<48, 16, u64> opcode; + union { + BitField<8, 8, Register> gpr; + BitField<20, 24, s64> offset; + } gmem; + union { BitField<20, 16, u64> imm20_16; BitField<20, 19, u64> imm20_19; @@ -812,13 +817,11 @@ union Instruction { union { BitField<48, 3, UniformType> type; BitField<46, 2, u64> cache_mode; - BitField<20, 24, s64> immediate_offset; } ldg; union { BitField<48, 3, UniformType> type; BitField<46, 2, u64> cache_mode; - BitField<20, 24, s64> immediate_offset; } stg; union { @@ -827,6 +830,11 @@ union Instruction { BitField<20, 11, u64> address; } al2p; + union { + BitField<53, 3, UniformType> type; + BitField<52, 1, u64> extended; + } generic; + union { BitField<0, 3, u64> pred0; BitField<3, 3, u64> pred3; @@ -1387,10 +1395,12 @@ public: LD_L, LD_S, LD_C, + LD, // Load from generic memory + LDG, // Load from global memory ST_A, ST_L, ST_S, - LDG, // Load from global memory + ST, // Store in generic memory STG, // Store in global memory AL2P, // Transforms attribute memory into physical memory TEX, @@ -1658,10 +1668,12 @@ private: INST("1110111101001---", Id::LD_S, Type::Memory, "LD_S"), INST("1110111101000---", Id::LD_L, Type::Memory, "LD_L"), INST("1110111110010---", Id::LD_C, Type::Memory, "LD_C"), + INST("100-------------", Id::LD, Type::Memory, "LD"), + INST("1110111011010---", Id::LDG, Type::Memory, "LDG"), INST("1110111111110---", Id::ST_A, Type::Memory, "ST_A"), INST("1110111101011---", Id::ST_S, Type::Memory, "ST_S"), INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"), - INST("1110111011010---", Id::LDG, Type::Memory, "LDG"), + INST("101-------------", Id::ST, Type::Memory, "ST"), INST("1110111011011---", Id::STG, Type::Memory, "STG"), INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"), INST("110000----111---", Id::TEX, Type::Texture, "TEX"), diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index 165c2b41b..e6a010a7d 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp @@ -146,12 +146,25 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { } break; } + case OpCode::Id::LD: case OpCode::Id::LDG: { - const auto [real_address_base, base_address, descriptor] = - TrackAndGetGlobalMemory(bb, GetRegister(instr.gpr8), - static_cast(instr.ldg.immediate_offset.Value()), false); + const auto type = [instr, &opcode]() -> Tegra::Shader::UniformType { + switch (opcode->get().GetId()) { + case OpCode::Id::LD: + UNIMPLEMENTED_IF_MSG(!instr.generic.extended, "Unextended LD is not implemented"); + return instr.generic.type; + case OpCode::Id::LDG: + return instr.ldg.type; + default: + UNREACHABLE(); + return {}; + } + }(); - const u32 count = GetUniformTypeElementsCount(instr.ldg.type); + const auto [real_address_base, base_address, descriptor] = + TrackAndGetGlobalMemory(bb, instr, false); + + const u32 count = GetUniformTypeElementsCount(type); for (u32 i = 0; i < count; ++i) { const Node it_offset = Immediate(i * 4); const Node real_address = @@ -165,28 +178,6 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { } break; } - case OpCode::Id::STG: { - const auto [real_address_base, base_address, descriptor] = - TrackAndGetGlobalMemory(bb, GetRegister(instr.gpr8), - static_cast(instr.stg.immediate_offset.Value()), true); - - // Encode in temporary registers like this: real_base_address, {registers_to_be_written...} - SetTemporal(bb, 0, real_address_base); - - const u32 count = GetUniformTypeElementsCount(instr.stg.type); - for (u32 i = 0; i < count; ++i) { - SetTemporal(bb, i + 1, GetRegister(instr.gpr0.Value() + i)); - } - for (u32 i = 0; i < count; ++i) { - const Node it_offset = Immediate(i * 4); - const Node real_address = - Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset); - const Node gmem = StoreNode(GmemNode(real_address, base_address, descriptor)); - - bb.push_back(Operation(OperationCode::Assign, gmem, GetTemporal(i + 1))); - } - break; - } case OpCode::Id::ST_A: { UNIMPLEMENTED_IF_MSG(instr.gpr8.Value() != Register::ZeroIndex, "Indirect attribute loads are not supported"); @@ -242,6 +233,41 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { } break; } + case OpCode::Id::ST: + case OpCode::Id::STG: { + const auto type = [instr, &opcode]() -> Tegra::Shader::UniformType { + switch (opcode->get().GetId()) { + case OpCode::Id::ST: + UNIMPLEMENTED_IF_MSG(!instr.generic.extended, "Unextended ST is not implemented"); + return instr.generic.type; + case OpCode::Id::STG: + return instr.stg.type; + default: + UNREACHABLE(); + return {}; + } + }(); + + const auto [real_address_base, base_address, descriptor] = + TrackAndGetGlobalMemory(bb, instr, true); + + // Encode in temporary registers like this: real_base_address, {registers_to_be_written...} + SetTemporal(bb, 0, real_address_base); + + const u32 count = GetUniformTypeElementsCount(type); + for (u32 i = 0; i < count; ++i) { + SetTemporal(bb, i + 1, GetRegister(instr.gpr0.Value() + i)); + } + for (u32 i = 0; i < count; ++i) { + const Node it_offset = Immediate(i * 4); + const Node real_address = + Operation(OperationCode::UAdd, NO_PRECISE, real_address_base, it_offset); + const Node gmem = StoreNode(GmemNode(real_address, base_address, descriptor)); + + bb.push_back(Operation(OperationCode::Assign, gmem, GetTemporal(i + 1))); + } + break; + } case OpCode::Id::AL2P: { // Ignore al2p.direction since we don't care about it. @@ -265,9 +291,11 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { } std::tuple ShaderIR::TrackAndGetGlobalMemory(NodeBlock& bb, - Node addr_register, - u32 immediate_offset, + Instruction instr, bool is_write) { + const auto addr_register{GetRegister(instr.gmem.gpr)}; + const auto immediate_offset{static_cast(instr.gmem.offset)}; + const Node base_address{ TrackCbuf(addr_register, global_code, static_cast(global_code.size()))}; const auto cbuf = std::get_if(base_address); diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h index 34d183ec7..35f72bddb 100644 --- a/src/video_core/shader/shader_ir.h +++ b/src/video_core/shader/shader_ir.h @@ -818,10 +818,8 @@ private: std::pair TrackRegister(const GprNode* tracked, const NodeBlock& code, s64 cursor) const; - std::tuple TrackAndGetGlobalMemory(NodeBlock& bb, - Node addr_register, - u32 immediate_offset, - bool is_write); + std::tuple TrackAndGetGlobalMemory( + NodeBlock& bb, Tegra::Shader::Instruction instr, bool is_write); template Node Operation(OperationCode code, const T*... operands) {