From 9cc1b8a873196dac5a97368df125816b5b195777 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Tue, 25 May 2021 01:52:02 -0400 Subject: [PATCH] glsl: F16x2 storage atomics --- .../backend/glsl/emit_context.cpp | 16 ++++ .../backend/glsl/emit_context.h | 5 ++ .../backend/glsl/emit_glsl_atomic.cpp | 85 ++++++++----------- .../glsl/emit_glsl_bitwise_conversion.cpp | 9 +- .../backend/glsl/emit_glsl_instructions.h | 4 +- .../backend/glsl/reg_alloc.cpp | 2 + .../backend/glsl/reg_alloc.h | 1 + 7 files changed, 64 insertions(+), 58 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index 7986bf78f..a413219e3 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -39,6 +39,10 @@ void EmitContext::SetupExtensions(std::string& header) { if (info.uses_atomic_f16x2_add || info.uses_atomic_f16x2_min || info.uses_atomic_f16x2_max) { header += "#extension NV_shader_atomic_fp16_vector : enable\n"; } + if (info.uses_fp16) { + // TODO: AMD + header += "#extension GL_NV_gpu_shader5 : enable\n"; + } } void EmitContext::DefineConstantBuffers() { @@ -89,6 +93,18 @@ void EmitContext::DefineHelperFunctions() { code += "uint CasFloatMax32x2(uint op_a,uint op_b){return " "packHalf2x16(max(unpackHalf2x16(op_a),unpackHalf2x16(op_b)));}\n"; } + if (info.uses_atomic_f16x2_add) { + code += "uint CasFloatAdd16x2(uint op_a,uint op_b){return " + "packFloat2x16(unpackFloat2x16(op_a)+unpackFloat2x16(op_b));}\n"; + } + if (info.uses_atomic_f16x2_min) { + code += "uint CasFloatMin16x2(uint op_a,uint op_b){return " + "packFloat2x16(min(unpackFloat2x16(op_a),unpackFloat2x16(op_b)));}\n"; + } + if (info.uses_atomic_f16x2_max) { + code += "uint CasFloatMax16x2(uint op_a,uint op_b){return " + "packFloat2x16(max(unpackFloat2x16(op_a),unpackFloat2x16(op_b)));}\n"; + } // TODO: Track this usage code += "uint CasMinS32(uint op_a,uint op_b){return uint(min(int(op_a),int(op_b)));}"; code += "uint CasMaxS32(uint op_a,uint op_b){return uint(max(int(op_a),int(op_b)));}"; diff --git a/src/shader_recompiler/backend/glsl/emit_context.h b/src/shader_recompiler/backend/glsl/emit_context.h index ca5657fe7..7f8857fa7 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.h +++ b/src/shader_recompiler/backend/glsl/emit_context.h @@ -43,6 +43,11 @@ public: Add(format_str, inst, args...); } + template + void AddF16x2(const char* format_str, IR::Inst& inst, Args&&... args) { + Add(format_str, inst, args...); + } + template void AddU32(const char* format_str, IR::Inst& inst, Args&&... args) { Add(format_str, inst, args...); diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp index 0b29c213b..b6b326762 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_atomic.cpp @@ -12,8 +12,7 @@ namespace Shader::Backend::GLSL { namespace { -static constexpr std::string_view cas_loop{R"( -uint {}; +static constexpr std::string_view cas_loop{R"(uint {}; for (;;){{ uint old_value={}; {}=atomicCompSwap({},old_value,{}({},{})); @@ -49,6 +48,14 @@ void CasFunctionF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding const auto ret{ctx.reg_alloc.Define(inst)}; CasFunction(ctx, ret, ssbo, u32_value, function); } + +void CasFunctionF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value, std::string_view function) { + const std::string ssbo{fmt::format("ssbo{}[{}]", binding.U32(), offset.U32())}; + const std::string u32_value{fmt::format("packFloat2x16({})", value)}; + const auto ret{ctx.reg_alloc.Define(inst)}; + CasFunction(ctx, ret, ssbo, u32_value, function); +} } // namespace void EmitStorageAtomicIAdd32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, @@ -122,11 +129,8 @@ void EmitStorageAtomicSMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& // LOG_WARNING(..., "Op falling to non-atomic"); ctx.AddS64("{}=int64_t(ivec2(ssbo{}[{}],ssbo{}[{}]));", inst, binding.U32(), offset.U32(), binding.U32(), offset.U32() + 1); - ctx.Add(R"( -for(int i=0;i<2;++i){{ -ssbo{}[{}+i]=uint(min(int(ssbo{}[{}+i]),unpackInt2x32(int64_t({}))[i])); -}} -)", + ctx.Add("for(int i=0;i<2;++i){{ " + "ssbo{}[{}+i]=uint(min(int(ssbo{}[{}+i]),unpackInt2x32(int64_t({}))[i]));}}", binding.U32(), offset.U32(), binding.U32(), offset.U32(), value); } @@ -135,12 +139,9 @@ void EmitStorageAtomicUMin64(EmitContext& ctx, IR::Inst& inst, const IR::Value& // LOG_WARNING(..., "Op falling to non-atomic"); ctx.AddU64("{}=uint64_t(uvec2(ssbo{}[{}],ssbo{}[{}]));", inst, binding.U32(), offset.U32(), binding.U32(), offset.U32() + 1); - ctx.Add(R"( -for(int i=0;i<2;++i){{ -ssbo{}[{}+i]=min(ssbo{}[{}+i],unpackUint2x32(uint64_t({}))[i]); -}} -)", - binding.U32(), offset.U32(), binding.U32(), offset.U32(), value); + ctx.Add( + "for(int i=0;i<2;++i){{ ssbo{}[{}+i]=min(ssbo{}[{}+i],unpackUint2x32(uint64_t({}))[i]);}}", + binding.U32(), offset.U32(), binding.U32(), offset.U32(), value); } void EmitStorageAtomicSMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, @@ -148,11 +149,8 @@ void EmitStorageAtomicSMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& // LOG_WARNING(..., "Op falling to non-atomic"); ctx.AddS64("{}=int64_t(ivec2(ssbo{}[{}],ssbo{}[{}]));", inst, binding.U32(), offset.U32(), binding.U32(), offset.U32() + 1); - ctx.Add(R"( -for(int i=0;i<2;++i){{ -ssbo{}[{}+i]=uint(max(int(ssbo{}[{}+i]),unpackInt2x32(int64_t({}))[i])); -}} -)", + ctx.Add("for(int i=0;i<2;++i){{ " + "ssbo{}[{}+i]=uint(max(int(ssbo{}[{}+i]),unpackInt2x32(int64_t({}))[i]));}}", binding.U32(), offset.U32(), binding.U32(), offset.U32(), value); } @@ -161,12 +159,9 @@ void EmitStorageAtomicUMax64(EmitContext& ctx, IR::Inst& inst, const IR::Value& // LOG_WARNING(..., "Op falling to non-atomic"); ctx.AddU64("{}=uint64_t(uvec2(ssbo{}[{}],ssbo{}[{}]));", inst, binding.U32(), offset.U32(), binding.U32(), offset.U32() + 1); - ctx.Add(R"( -for(int i=0;i<2;++i){{ -ssbo{}[{}+i]=max(ssbo{}[{}+i],unpackUint2x32(uint64_t({}))[i]); -}} -)", - binding.U32(), offset.U32(), binding.U32(), offset.U32(), value); + ctx.Add( + "for(int i=0;i<2;++i){{ssbo{}[{}+i]=max(ssbo{}[{}+i],unpackUint2x32(uint64_t({}))[i]);}}", + binding.U32(), offset.U32(), binding.U32(), offset.U32(), value); } void EmitStorageAtomicAnd64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, @@ -202,45 +197,33 @@ void EmitStorageAtomicAddF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& CasFunctionF32(ctx, inst, binding, offset, value, "CasFloatAdd"); } -void EmitStorageAtomicAddF16x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] const IR::Value& binding, - [[maybe_unused]] const IR::Value& offset, - [[maybe_unused]] std::string_view value) { - throw NotImplementedException("GLSL Instrucion"); +void EmitStorageAtomicAddF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + CasFunctionF16x2(ctx, inst, binding, offset, value, "CasFloatAdd16x2"); } -void EmitStorageAtomicAddF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] const IR::Value& binding, - [[maybe_unused]] const IR::Value& offset, - [[maybe_unused]] std::string_view value) { +void EmitStorageAtomicAddF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { CasFunctionF32x2(ctx, inst, binding, offset, value, "CasFloatAdd32x2"); } -void EmitStorageAtomicMinF16x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] const IR::Value& binding, - [[maybe_unused]] const IR::Value& offset, - [[maybe_unused]] std::string_view value) { - throw NotImplementedException("GLSL Instrucion"); +void EmitStorageAtomicMinF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + CasFunctionF16x2(ctx, inst, binding, offset, value, "CasFloatMin16x2"); } -void EmitStorageAtomicMinF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] const IR::Value& binding, - [[maybe_unused]] const IR::Value& offset, - [[maybe_unused]] std::string_view value) { +void EmitStorageAtomicMinF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { CasFunctionF32x2(ctx, inst, binding, offset, value, "CasFloatMin32x2"); } -void EmitStorageAtomicMaxF16x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] const IR::Value& binding, - [[maybe_unused]] const IR::Value& offset, - [[maybe_unused]] std::string_view value) { - throw NotImplementedException("GLSL Instrucion"); +void EmitStorageAtomicMaxF16x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { + CasFunctionF16x2(ctx, inst, binding, offset, value, "CasFloatMax16x2"); } -void EmitStorageAtomicMaxF32x2([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, - [[maybe_unused]] const IR::Value& binding, - [[maybe_unused]] const IR::Value& offset, - [[maybe_unused]] std::string_view value) { +void EmitStorageAtomicMaxF32x2(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset, std::string_view value) { CasFunctionF32x2(ctx, inst, binding, offset, value, "CasFloatMax32x2"); } diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp index a1e97b4cb..742f394d4 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_bitwise_conversion.cpp @@ -62,13 +62,12 @@ void EmitUnpackUint2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value ctx.AddU32x2("{}=unpackUint2x32({});", inst, value); } -void EmitPackFloat2x16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] std::string_view value) { - throw NotImplementedException("GLSL Instruction"); +void EmitPackFloat2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddU32("{}=packFloat2x16({});", inst, value); } -void EmitUnpackFloat2x16([[maybe_unused]] EmitContext& ctx, - [[maybe_unused]] std::string_view value) { - throw NotImplementedException("GLSL Instruction"); +void EmitUnpackFloat2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value) { + ctx.AddF16x2("{}=unpackFloat2x16({});", inst, value); } void EmitPackHalf2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value) { diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h index 56b812d84..9f32070b0 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h +++ b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h @@ -224,8 +224,8 @@ void EmitBitCastF32U32(EmitContext& ctx, IR::Inst& inst, std::string_view value) void EmitBitCastF64U64(EmitContext& ctx, IR::Inst& inst, std::string_view value); void EmitPackUint2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value); void EmitUnpackUint2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value); -void EmitPackFloat2x16(EmitContext& ctx, std::string_view value); -void EmitUnpackFloat2x16(EmitContext& ctx, std::string_view value); +void EmitPackFloat2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value); +void EmitUnpackFloat2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value); void EmitPackHalf2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value); void EmitUnpackHalf2x16(EmitContext& ctx, IR::Inst& inst, std::string_view value); void EmitPackDouble2x32(EmitContext& ctx, IR::Inst& inst, std::string_view value); diff --git a/src/shader_recompiler/backend/glsl/reg_alloc.cpp b/src/shader_recompiler/backend/glsl/reg_alloc.cpp index 58c2c408e..3de4c1172 100644 --- a/src/shader_recompiler/backend/glsl/reg_alloc.cpp +++ b/src/shader_recompiler/backend/glsl/reg_alloc.cpp @@ -113,6 +113,8 @@ std::string RegAlloc::GetType(Type type, u32 index) { switch (type) { case Type::U1: return "bool "; + case Type::F16x2: + return "f16vec2 "; case Type::U32: return "uint "; case Type::S32: diff --git a/src/shader_recompiler/backend/glsl/reg_alloc.h b/src/shader_recompiler/backend/glsl/reg_alloc.h index 581954e44..9f2ff8718 100644 --- a/src/shader_recompiler/backend/glsl/reg_alloc.h +++ b/src/shader_recompiler/backend/glsl/reg_alloc.h @@ -18,6 +18,7 @@ enum class Type; namespace Shader::Backend::GLSL { enum class Type : u32 { U1, + F16x2, S32, U32, F32,