From 3185245845f7487c3b832035b0c19fdc4f1a8262 Mon Sep 17 00:00:00 2001 From: ReinUsesLisp Date: Mon, 6 Apr 2020 02:24:47 -0300 Subject: [PATCH] shader/memory: Implement RED.E.ADD Implements a reduction operation. It's an atomic operation that doesn't return a value. This commit introduces another primitive because some shading languages might have a primitive for reduction operations. --- src/video_core/engines/shader_bytecode.h | 8 +++ .../renderer_opengl/gl_shader_decompiler.cpp | 24 ++++++- .../renderer_vulkan/vk_shader_decompiler.cpp | 63 ++++++++++++------- src/video_core/shader/decode/memory.cpp | 16 ++++- src/video_core/shader/node.h | 14 +++++ 5 files changed, 98 insertions(+), 27 deletions(-) diff --git a/src/video_core/engines/shader_bytecode.h b/src/video_core/engines/shader_bytecode.h index 930b605af..a31947ef3 100644 --- a/src/video_core/engines/shader_bytecode.h +++ b/src/video_core/engines/shader_bytecode.h @@ -988,6 +988,12 @@ union Instruction { BitField<46, 2, u64> cache_mode; } stg; + union { + BitField<23, 3, AtomicOp> operation; + BitField<48, 1, u64> extended; + BitField<20, 3, GlobalAtomicType> type; + } red; + union { BitField<52, 4, AtomicOp> operation; BitField<49, 3, GlobalAtomicType> type; @@ -1733,6 +1739,7 @@ public: ST_S, ST, // Store in generic memory STG, // Store in global memory + RED, // Reduction operation ATOM, // Atomic operation on global memory ATOMS, // Atomic operation on shared memory AL2P, // Transforms attribute memory into physical memory @@ -2039,6 +2046,7 @@ private: INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"), INST("101-------------", Id::ST, Type::Memory, "ST"), INST("1110111011011---", Id::STG, Type::Memory, "STG"), + INST("1110101111111---", Id::RED, Type::Memory, "RED"), INST("11101101--------", Id::ATOM, Type::Memory, "ATOM"), INST("11101100--------", Id::ATOMS, Type::Memory, "ATOMS"), INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"), diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp index c7d24cf14..a25280a47 100644 --- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp +++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp @@ -2119,8 +2119,14 @@ private: return {}; } return {fmt::format("atomic{}({}, {})", opname, Visit(operation[0]).GetCode(), - Visit(operation[1]).As(type)), - type}; + Visit(operation[1]).AsUint()), + Type::Uint}; + } + + template + Expression Reduce(Operation operation) { + code.AddLine("{};", Atomic(operation).GetCode()); + return {}; } Expression Branch(Operation operation) { @@ -2479,6 +2485,20 @@ private: &GLSLDecompiler::Atomic, &GLSLDecompiler::Atomic, + &GLSLDecompiler::Reduce, + &GLSLDecompiler::Reduce, + &GLSLDecompiler::Reduce, + &GLSLDecompiler::Reduce, + &GLSLDecompiler::Reduce, + &GLSLDecompiler::Reduce, + + &GLSLDecompiler::Reduce, + &GLSLDecompiler::Reduce, + &GLSLDecompiler::Reduce, + &GLSLDecompiler::Reduce, + &GLSLDecompiler::Reduce, + &GLSLDecompiler::Reduce, + &GLSLDecompiler::Branch, &GLSLDecompiler::BranchIndirect, &GLSLDecompiler::PushFlowStack, diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp index d67f08cf9..24d3ca08f 100644 --- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp +++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp @@ -1941,11 +1941,8 @@ private: return {}; } - template + template Expression Atomic(Operation operation) { - const Id type_def = GetTypeDefinition(result_type); - Id pointer; if (const auto smem = std::get_if(&*operation[0])) { pointer = GetSharedMemoryPointer(*smem); @@ -1953,15 +1950,19 @@ private: pointer = GetGlobalMemoryPointer(*gmem); } else { UNREACHABLE(); - return {Constant(type_def, 0), result_type}; + return {v_float_zero, Type::Float}; } - - const Id value = As(Visit(operation[1]), value_type); - const Id scope = Constant(t_uint, static_cast(spv::Scope::Device)); - const Id semantics = Constant(type_def, 0); + const Id semantics = Constant(t_uint, 0); + const Id value = AsUint(Visit(operation[1])); - return {(this->*func)(type_def, pointer, scope, semantics, value), result_type}; + return {(this->*func)(t_uint, pointer, scope, semantics, value), Type::Uint}; + } + + template + Expression Reduce(Operation operation) { + Atomic(operation); + return {}; } Expression Branch(Operation operation) { @@ -2550,21 +2551,35 @@ private: &SPIRVDecompiler::AtomicImageXor, &SPIRVDecompiler::AtomicImageExchange, - &SPIRVDecompiler::Atomic<&Module::OpAtomicExchange, Type::Uint>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicIAdd, Type::Uint>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicUMin, Type::Uint>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicUMax, Type::Uint>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicAnd, Type::Uint>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicOr, Type::Uint>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicXor, Type::Uint>, + &SPIRVDecompiler::Atomic<&Module::OpAtomicExchange>, + &SPIRVDecompiler::Atomic<&Module::OpAtomicIAdd>, + &SPIRVDecompiler::Atomic<&Module::OpAtomicUMin>, + &SPIRVDecompiler::Atomic<&Module::OpAtomicUMax>, + &SPIRVDecompiler::Atomic<&Module::OpAtomicAnd>, + &SPIRVDecompiler::Atomic<&Module::OpAtomicOr>, + &SPIRVDecompiler::Atomic<&Module::OpAtomicXor>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicExchange, Type::Int>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicIAdd, Type::Int>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicSMin, Type::Int>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicSMax, Type::Int>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicAnd, Type::Int>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicOr, Type::Int>, - &SPIRVDecompiler::Atomic<&Module::OpAtomicXor, Type::Int>, + &SPIRVDecompiler::Atomic<&Module::OpAtomicExchange>, + &SPIRVDecompiler::Atomic<&Module::OpAtomicIAdd>, + &SPIRVDecompiler::Atomic<&Module::OpAtomicSMin>, + &SPIRVDecompiler::Atomic<&Module::OpAtomicSMax>, + &SPIRVDecompiler::Atomic<&Module::OpAtomicAnd>, + &SPIRVDecompiler::Atomic<&Module::OpAtomicOr>, + &SPIRVDecompiler::Atomic<&Module::OpAtomicXor>, + + &SPIRVDecompiler::Reduce<&Module::OpAtomicIAdd>, + &SPIRVDecompiler::Reduce<&Module::OpAtomicUMin>, + &SPIRVDecompiler::Reduce<&Module::OpAtomicUMax>, + &SPIRVDecompiler::Reduce<&Module::OpAtomicAnd>, + &SPIRVDecompiler::Reduce<&Module::OpAtomicOr>, + &SPIRVDecompiler::Reduce<&Module::OpAtomicXor>, + + &SPIRVDecompiler::Reduce<&Module::OpAtomicIAdd>, + &SPIRVDecompiler::Reduce<&Module::OpAtomicSMin>, + &SPIRVDecompiler::Reduce<&Module::OpAtomicSMax>, + &SPIRVDecompiler::Reduce<&Module::OpAtomicAnd>, + &SPIRVDecompiler::Reduce<&Module::OpAtomicOr>, + &SPIRVDecompiler::Reduce<&Module::OpAtomicXor>, &SPIRVDecompiler::Branch, &SPIRVDecompiler::BranchIndirect, diff --git a/src/video_core/shader/decode/memory.cpp b/src/video_core/shader/decode/memory.cpp index 1a93540fe..8112ead3e 100644 --- a/src/video_core/shader/decode/memory.cpp +++ b/src/video_core/shader/decode/memory.cpp @@ -378,13 +378,27 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) { if (IsUnaligned(type)) { const u32 mask = GetUnalignedMask(type); - value = InsertUnaligned(gmem, std::move(value), real_address, mask, size); + value = InsertUnaligned(gmem, move(value), real_address, mask, size); } bb.push_back(Operation(OperationCode::Assign, gmem, value)); } break; } + case OpCode::Id::RED: { + UNIMPLEMENTED_IF_MSG(instr.red.type != GlobalAtomicType::U32); + UNIMPLEMENTED_IF_MSG(instr.red.operation != AtomicOp::Add); + const auto [real_address, base_address, descriptor] = + TrackGlobalMemory(bb, instr, true, true); + if (!real_address || !base_address) { + // Tracking failed, skip atomic. + break; + } + Node gmem = MakeNode(real_address, base_address, descriptor); + Node value = GetRegister(instr.gpr0); + bb.push_back(Operation(OperationCode::ReduceIAdd, move(gmem), move(value))); + break; + } case OpCode::Id::ATOM: { UNIMPLEMENTED_IF_MSG(instr.atom.operation == AtomicOp::Inc || instr.atom.operation == AtomicOp::Dec || diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h index 5fcc9da60..3eee961f5 100644 --- a/src/video_core/shader/node.h +++ b/src/video_core/shader/node.h @@ -178,6 +178,20 @@ enum class OperationCode { AtomicIOr, /// (memory, int) -> int AtomicIXor, /// (memory, int) -> int + ReduceUAdd, /// (memory, uint) -> void + ReduceUMin, /// (memory, uint) -> void + ReduceUMax, /// (memory, uint) -> void + ReduceUAnd, /// (memory, uint) -> void + ReduceUOr, /// (memory, uint) -> void + ReduceUXor, /// (memory, uint) -> void + + ReduceIAdd, /// (memory, int) -> void + ReduceIMin, /// (memory, int) -> void + ReduceIMax, /// (memory, int) -> void + ReduceIAnd, /// (memory, int) -> void + ReduceIOr, /// (memory, int) -> void + ReduceIXor, /// (memory, int) -> void + Branch, /// (uint branch_target) -> void BranchIndirect, /// (uint branch_target) -> void PushFlowStack, /// (uint branch_target) -> void