From 8ba814efb295f0b8494b3679c484c7ceab31c392 Mon Sep 17 00:00:00 2001 From: ameerj <52414509+ameerj@users.noreply.github.com> Date: Fri, 28 May 2021 21:24:52 -0400 Subject: [PATCH] glsl: Better Storage access and wip warps --- .../backend/glsl/emit_context.cpp | 4 ++ .../backend/glsl/emit_glsl.cpp | 5 +- .../glsl/emit_glsl_context_get_set.cpp | 16 +++-- .../backend/glsl/emit_glsl_instructions.h | 26 +++++--- .../backend/glsl/emit_glsl_integer.cpp | 4 +- .../backend/glsl/emit_glsl_memory.cpp | 60 +++++++++++++------ .../glsl/emit_glsl_not_implemented.cpp | 27 --------- .../backend/glsl/emit_glsl_warp.cpp | 53 ++++++++++++++++ 8 files changed, 133 insertions(+), 62 deletions(-) diff --git a/src/shader_recompiler/backend/glsl/emit_context.cpp b/src/shader_recompiler/backend/glsl/emit_context.cpp index 3530e89e5..db62ba73b 100644 --- a/src/shader_recompiler/backend/glsl/emit_context.cpp +++ b/src/shader_recompiler/backend/glsl/emit_context.cpp @@ -122,6 +122,10 @@ void EmitContext::SetupExtensions(std::string&) { header += "#extension GL_AMD_gpu_shader_half_float : enable\n"; } } + if (info.uses_subgroup_invocation_id || info.uses_subgroup_mask || info.uses_subgroup_vote || + info.uses_subgroup_shuffles || info.uses_fswzadd) { + header += "#extension GL_ARB_shader_ballot : enable\n"; + } } void EmitContext::DefineConstantBuffers(Bindings& bindings) { diff --git a/src/shader_recompiler/backend/glsl/emit_glsl.cpp b/src/shader_recompiler/backend/glsl/emit_glsl.cpp index 992e4b82e..800de58b7 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl.cpp @@ -183,8 +183,11 @@ std::string EmitGLSL(const Profile& profile, const RuntimeInfo& runtime_info, IR for (size_t index = 0; index < ctx.reg_alloc.num_used_registers; ++index) { ctx.header += fmt::format("{} R{};", ctx.reg_alloc.reg_types[index], index); } - // TODO: track CC usage + // TODO: track usage ctx.header += "uint carry;"; + if (program.info.uses_subgroup_shuffles) { + ctx.header += "bool shfl_in_bounds;\n"; + } ctx.code.insert(0, ctx.header); ctx.code += "}"; fmt::print("\n{}\n", ctx.code); diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp index 67d308c49..2286177a7 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_context_get_set.cpp @@ -20,22 +20,26 @@ char OffsetSwizzle(u32 offset) { } } // namespace -void EmitGetCbufU8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR::Value& binding, +void EmitGetCbufU8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& binding, [[maybe_unused]] const IR::Value& offset) { throw NotImplementedException("GLSL"); } -void EmitGetCbufS8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR::Value& binding, +void EmitGetCbufS8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& binding, [[maybe_unused]] const IR::Value& offset) { throw NotImplementedException("GLSL"); } -void EmitGetCbufU16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR::Value& binding, +void EmitGetCbufU16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& binding, [[maybe_unused]] const IR::Value& offset) { throw NotImplementedException("GLSL"); } -void EmitGetCbufS16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR::Value& binding, +void EmitGetCbufS16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, + [[maybe_unused]] const IR::Value& binding, [[maybe_unused]] const IR::Value& offset) { throw NotImplementedException("GLSL"); } @@ -151,4 +155,8 @@ void EmitSetFragColor(EmitContext& ctx, u32 index, u32 component, std::string_vi ctx.Add("frag_color{}.{}={};", index, swizzle, value); } +void EmitLocalInvocationId(EmitContext& ctx, IR::Inst& inst) { + ctx.AddU32x3("{}=gl_LocalInvocationID;", inst); +} + } // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h index b54fe684e..07408d9e9 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h +++ b/src/shader_recompiler/backend/glsl/emit_glsl_instructions.h @@ -52,10 +52,14 @@ void EmitSetGotoVariable(EmitContext& ctx); void EmitGetGotoVariable(EmitContext& ctx); void EmitSetIndirectBranchVariable(EmitContext& ctx); void EmitGetIndirectBranchVariable(EmitContext& ctx); -void EmitGetCbufU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -void EmitGetCbufS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -void EmitGetCbufU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -void EmitGetCbufS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +void EmitGetCbufU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset); +void EmitGetCbufS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset); +void EmitGetCbufU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset); +void EmitGetCbufS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset); void EmitGetCbufU32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset); void EmitGetCbufF32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, @@ -83,7 +87,7 @@ void EmitSetSFlag(EmitContext& ctx); void EmitSetCFlag(EmitContext& ctx); void EmitSetOFlag(EmitContext& ctx); void EmitWorkgroupId(EmitContext& ctx); -void EmitLocalInvocationId(EmitContext& ctx); +void EmitLocalInvocationId(EmitContext& ctx, IR::Inst& inst); void EmitInvocationId(EmitContext& ctx); void EmitSampleId(EmitContext& ctx); void EmitIsHelperInvocation(EmitContext& ctx); @@ -109,10 +113,14 @@ void EmitWriteGlobalS16(EmitContext& ctx); void EmitWriteGlobal32(EmitContext& ctx, std::string_view address, std::string_view value); void EmitWriteGlobal64(EmitContext& ctx, std::string_view address, std::string_view value); void EmitWriteGlobal128(EmitContext& ctx, std::string_view address, std::string_view value); -void EmitLoadStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -void EmitLoadStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -void EmitLoadStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); -void EmitLoadStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset); +void EmitLoadStorageU8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset); +void EmitLoadStorageS8(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset); +void EmitLoadStorageU16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset); +void EmitLoadStorageS16(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, + const IR::Value& offset); void EmitLoadStorage32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset); void EmitLoadStorage64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp index 34f880f1b..07e1a4b51 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_integer.cpp @@ -156,12 +156,12 @@ void EmitBitwiseNot32(EmitContext& ctx, IR::Inst& inst, std::string_view value) void EmitFindSMsb32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view value) { - throw NotImplementedException("GLSL Instruction"); + ctx.AddU32("{}=findMSB(int({}));", inst, value); } void EmitFindUMsb32([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view value) { - throw NotImplementedException("GLSL Instruction"); + ctx.AddU32("{}=findMSB(uint({}));", inst, value); } void EmitSMin32(EmitContext& ctx, IR::Inst& inst, std::string_view a, std::string_view b) { diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp index 708c9685b..09ad35e44 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_memory.cpp @@ -8,45 +8,55 @@ #include "shader_recompiler/frontend/ir/value.h" namespace Shader::Backend::GLSL { -void EmitLoadStorageU8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR::Value& binding, +void EmitLoadStorageU8([[maybe_unused]] EmitContext& ctx, IR::Inst& inst, + [[maybe_unused]] const IR::Value& binding, [[maybe_unused]] const IR::Value& offset) { - throw NotImplementedException("GLSL Instrucion"); + const auto offset_var{ctx.reg_alloc.Consume(offset)}; + ctx.AddU32("{}=bitfieldExtract(ssbo{}[{}/4],int({}%4)*8,8);", inst, binding.U32(), offset_var, + offset_var); } -void EmitLoadStorageS8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR::Value& binding, +void EmitLoadStorageS8([[maybe_unused]] EmitContext& ctx, IR::Inst& inst, + [[maybe_unused]] const IR::Value& binding, [[maybe_unused]] const IR::Value& offset) { - throw NotImplementedException("GLSL Instrucion"); + const auto offset_var{ctx.reg_alloc.Consume(offset)}; + ctx.AddS32("{}=bitfieldExtract(int(ssbo{}[{}/4]),int({}%4)*8,8);", inst, binding.U32(), + offset_var, offset_var); } -void EmitLoadStorageU16([[maybe_unused]] EmitContext& ctx, +void EmitLoadStorageU16([[maybe_unused]] EmitContext& ctx, IR::Inst& inst, [[maybe_unused]] const IR::Value& binding, [[maybe_unused]] const IR::Value& offset) { - throw NotImplementedException("GLSL Instrucion"); + const auto offset_var{ctx.reg_alloc.Consume(offset)}; + ctx.AddU32("{}=bitfieldExtract(ssbo{}[{}/4],int(({}/2)%2)*16,16);", inst, binding.U32(), + offset_var, offset_var); } -void EmitLoadStorageS16([[maybe_unused]] EmitContext& ctx, +void EmitLoadStorageS16([[maybe_unused]] EmitContext& ctx, IR::Inst& inst, [[maybe_unused]] const IR::Value& binding, [[maybe_unused]] const IR::Value& offset) { - throw NotImplementedException("GLSL Instrucion"); + const auto offset_var{ctx.reg_alloc.Consume(offset)}; + ctx.AddS32("{}=bitfieldExtract(int(ssbo{}[{}/4]),int(({}/2)%2)*16,16);", inst, binding.U32(), + offset_var, offset_var); } void EmitLoadStorage32(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset) { const auto offset_var{ctx.reg_alloc.Consume(offset)}; - ctx.AddU32("{}=ssbo{}[{}];", inst, binding.U32(), offset_var); + ctx.AddU32("{}=ssbo{}[{}/4];", inst, binding.U32(), offset_var); } void EmitLoadStorage64(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset) { const auto offset_var{ctx.reg_alloc.Consume(offset)}; - ctx.AddU32x2("{}=uvec2(ssbo{}[{}],ssbo{}[{}+1]);", inst, binding.U32(), offset_var, + ctx.AddU32x2("{}=uvec2(ssbo{}[{}/4],ssbo{}[{}/4+1]);", inst, binding.U32(), offset_var, binding.U32(), offset_var); } void EmitLoadStorage128(EmitContext& ctx, IR::Inst& inst, const IR::Value& binding, const IR::Value& offset) { const auto offset_var{ctx.reg_alloc.Consume(offset)}; - ctx.AddU32x4("{}=uvec4(ssbo{}[{}],ssbo{}[{}+1],ssbo{}[{}+2],ssbo{}[{}+3]);", inst, + ctx.AddU32x4("{}=uvec4(ssbo{}[{}/4],ssbo{}[{}/4+1],ssbo{}[{}/4+2],ssbo{}[{}/4+3]);", inst, binding.U32(), offset_var, binding.U32(), offset_var, binding.U32(), offset_var, binding.U32(), offset_var); } @@ -55,47 +65,59 @@ void EmitWriteStorageU8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR::Value& binding, [[maybe_unused]] const IR::Value& offset, [[maybe_unused]] std::string_view value) { - throw NotImplementedException("GLSL Instrucion"); + const auto offset_var{ctx.reg_alloc.Consume(offset)}; + ctx.Add("ssbo{}[{}/4]=bitfieldInsert(ssbo{}[{}/4],{},int({}%4)*8,8);", binding.U32(), + offset_var, binding.U32(), offset_var, value, offset_var); } void EmitWriteStorageS8([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR::Value& binding, [[maybe_unused]] const IR::Value& offset, [[maybe_unused]] std::string_view value) { - throw NotImplementedException("GLSL Instrucion"); + const auto offset_var{ctx.reg_alloc.Consume(offset)}; + ctx.Add("ssbo{}[{}/4]=bitfieldInsert(ssbo{}[{}/4],{},int({}%4)*8,8);", binding.U32(), + offset_var, binding.U32(), offset_var, value, offset_var); } void EmitWriteStorageU16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR::Value& binding, [[maybe_unused]] const IR::Value& offset, [[maybe_unused]] std::string_view value) { - throw NotImplementedException("GLSL Instrucion"); + const auto offset_var{ctx.reg_alloc.Consume(offset)}; + ctx.Add("ssbo{}[{}/4]=bitfieldInsert(ssbo{}[{}/4],{},int(({}/2)%2)*16,16);", binding.U32(), + offset_var, binding.U32(), offset_var, value, offset_var); } void EmitWriteStorageS16([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR::Value& binding, [[maybe_unused]] const IR::Value& offset, [[maybe_unused]] std::string_view value) { - throw NotImplementedException("GLSL Instrucion"); + const auto offset_var{ctx.reg_alloc.Consume(offset)}; + ctx.Add("ssbo{}[{}/4]=bitfieldInsert(ssbo{}[{}/4],{},int(({}/2)%2)*16,16);", binding.U32(), + offset_var, binding.U32(), offset_var, value, offset_var); } void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, std::string_view value) { const auto offset_var{ctx.reg_alloc.Consume(offset)}; - ctx.Add("ssbo{}[{}]={};", binding.U32(), offset_var, value); + ctx.Add("ssbo{}[{}/4]={};", binding.U32(), offset_var, value); } void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, std::string_view value) { const auto offset_var{ctx.reg_alloc.Consume(offset)}; - ctx.Add("ssbo{}[{}]={}.x;", binding.U32(), offset_var, value); - ctx.Add("ssbo{}[{}+1]={}.y;", binding.U32(), offset_var, value); + ctx.Add("ssbo{}[{}/4]={}.x;", binding.U32(), offset_var, value); + ctx.Add("ssbo{}[({}/4)+1]={}.y;", binding.U32(), offset_var, value); } void EmitWriteStorage128([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] const IR::Value& binding, [[maybe_unused]] const IR::Value& offset, [[maybe_unused]] std::string_view value) { - throw NotImplementedException("GLSL Instrucion"); + const auto offset_var{ctx.reg_alloc.Consume(offset)}; + ctx.Add("ssbo{}[{}/4]={}.x;", binding.U32(), offset_var, value); + ctx.Add("ssbo{}[({}/4)+1]={}.y;", binding.U32(), offset_var, value); + ctx.Add("ssbo{}[({}/4)+2]={}.z;", binding.U32(), offset_var, value); + ctx.Add("ssbo{}[({}/4)+3]={}.w;", binding.U32(), offset_var, value); } } // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp index 3bac8899b..ec80f3cef 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_not_implemented.cpp @@ -206,10 +206,6 @@ void EmitWorkgroupId(EmitContext& ctx) { NotImplemented(); } -void EmitLocalInvocationId(EmitContext& ctx) { - NotImplemented(); -} - void EmitInvocationId(EmitContext& ctx) { NotImplemented(); } @@ -626,27 +622,4 @@ void EmitSubgroupGeMask(EmitContext& ctx) { NotImplemented(); } -void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, std::string_view value, - std::string_view index, std::string_view clamp, - std::string_view segmentation_mask) { - NotImplemented(); -} - -void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view index, - std::string_view clamp, std::string_view segmentation_mask) { - NotImplemented(); -} - -void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, std::string_view value, - std::string_view index, std::string_view clamp, - std::string_view segmentation_mask) { - NotImplemented(); -} - -void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, std::string_view value, - std::string_view index, std::string_view clamp, - std::string_view segmentation_mask) { - NotImplemented(); -} - } // namespace Shader::Backend::GLSL diff --git a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp index aebdf8a3a..0a488188b 100644 --- a/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp +++ b/src/shader_recompiler/backend/glsl/emit_glsl_warp.cpp @@ -8,6 +8,59 @@ #include "shader_recompiler/frontend/ir/value.h" namespace Shader::Backend::GLSL { +namespace { +void SetInBoundsFlag(EmitContext& ctx, IR::Inst& inst) { + IR::Inst* const in_bounds{inst.GetAssociatedPseudoOperation(IR::Opcode::GetInBoundsFromOp)}; + if (!in_bounds) { + return; + } + + ctx.AddU1("{}=shfl_in_bounds;", *in_bounds); + in_bounds->Invalidate(); +} +} // namespace + +void EmitShuffleIndex(EmitContext& ctx, IR::Inst& inst, std::string_view value, + std::string_view index, std::string_view clamp, + std::string_view segmentation_mask) { + ctx.Add("shfl_in_bounds=int(gl_SubGroupInvocationARB-{})>=int((gl_SubGroupInvocationARB&{})|({}" + "&~{}));", + index, segmentation_mask, clamp, segmentation_mask); + SetInBoundsFlag(ctx, inst); + ctx.AddU32("{}=shfl_in_bounds?{}:gl_SubGroupInvocationARB-{};", inst, value, index); +} + +void EmitShuffleUp(EmitContext& ctx, IR::Inst& inst, std::string_view value, std::string_view index, + std::string_view clamp, std::string_view segmentation_mask) { + ctx.Add("shfl_in_bounds=int(gl_SubGroupInvocationARB-{})>=int((gl_SubGroupInvocationARB&{})|({}" + "&~{}));", + index, segmentation_mask, clamp, segmentation_mask); + SetInBoundsFlag(ctx, inst); + ctx.AddU32("{}=shfl_in_bounds?readInvocationARB({},gl_SubGroupInvocationARB-{}):" + "{};", + inst, value, index, value); +} + +void EmitShuffleDown(EmitContext& ctx, IR::Inst& inst, std::string_view value, + std::string_view index, std::string_view clamp, + std::string_view segmentation_mask) { + ctx.Add("shfl_in_bounds=int(gl_SubGroupInvocationARB-{})>=int((gl_SubGroupInvocationARB&{})|({}" + "&~{}));", + index, segmentation_mask, clamp, segmentation_mask); + SetInBoundsFlag(ctx, inst); + ctx.AddU32("{}=shfl_in_bounds?{}:gl_SubGroupInvocationARB-{};", inst, value, index); +} + +void EmitShuffleButterfly(EmitContext& ctx, IR::Inst& inst, std::string_view value, + std::string_view index, std::string_view clamp, + std::string_view segmentation_mask) { + ctx.Add("shfl_in_bounds=int(gl_SubGroupInvocationARB-{})>=int((gl_SubGroupInvocationARB&{})|({}" + "&~{}));", + index, segmentation_mask, clamp, segmentation_mask); + SetInBoundsFlag(ctx, inst); + ctx.AddU32("{}=shfl_in_bounds?{}:gl_SubGroupInvocationARB-{};", inst, value, index); +} + void EmitFSwizzleAdd([[maybe_unused]] EmitContext& ctx, [[maybe_unused]] IR::Inst& inst, [[maybe_unused]] std::string_view op_a, [[maybe_unused]] std::string_view op_b, [[maybe_unused]] std::string_view swizzle) {