shader: Implement VOTE

This commit is contained in:
ameerj 2021-03-23 20:27:17 -04:00
parent d40faa1db0
commit 3d07cef009
18 changed files with 182 additions and 6 deletions

View file

@ -15,6 +15,7 @@ add_library(shader_recompiler STATIC
backend/spirv/emit_spirv_memory.cpp
backend/spirv/emit_spirv_select.cpp
backend/spirv/emit_spirv_undefined.cpp
backend/spirv/emit_spirv_vote.cpp
environment.h
exception.h
file_environment.cpp
@ -122,6 +123,7 @@ add_library(shader_recompiler STATIC
frontend/maxwell/translate/impl/select_source_with_predicate.cpp
frontend/maxwell/translate/impl/texture_fetch.cpp
frontend/maxwell/translate/impl/texture_fetch_swizzled.cpp
frontend/maxwell/translate/impl/vote.cpp
frontend/maxwell/translate/translate.cpp
frontend/maxwell/translate/translate.h
ir_opt/collect_shader_info_pass.cpp

View file

@ -259,6 +259,10 @@ void EmitContext::DefineInputs(const Info& info, Stage stage) {
if (info.uses_local_invocation_id) {
local_invocation_id = DefineInput(*this, U32[3], spv::BuiltIn::LocalInvocationId);
}
if (profile.warp_size_potentially_larger_than_guest && info.uses_subgroup_vote) {
subgroup_local_invocation_id =
DefineInput(*this, U32[1], spv::BuiltIn::SubgroupLocalInvocationId);
}
if (info.loads_position) {
const bool is_fragment{stage != Stage::Fragment};
const spv::BuiltIn built_in{is_fragment ? spv::BuiltIn::Position : spv::BuiltIn::FragCoord};

View file

@ -82,6 +82,7 @@ public:
Id workgroup_id{};
Id local_invocation_id{};
Id subgroup_local_invocation_id{};
Id instance_id{};
Id instance_index{};
Id base_instance{};
@ -96,7 +97,7 @@ public:
std::array<Id, 32> output_generics{};
std::array<Id, 8> frag_color{};
Id frag_depth {};
Id frag_depth{};
std::vector<Id> interfaces;

View file

@ -224,6 +224,15 @@ void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ct
ctx.AddExtension("SPV_KHR_shader_draw_parameters");
ctx.AddCapability(spv::Capability::DrawParameters);
}
if (info.uses_subgroup_vote && profile.support_vote) {
ctx.AddExtension("SPV_KHR_shader_ballot");
ctx.AddCapability(spv::Capability::SubgroupBallotKHR);
if (!profile.warp_size_potentially_larger_than_guest) {
// vote ops are only used when not taking the long path
ctx.AddExtension("SPV_KHR_subgroup_vote");
ctx.AddCapability(spv::Capability::SubgroupVoteKHR);
}
}
// TODO: Track this usage
ctx.AddCapability(spv::Capability::ImageGatherExtended);
}

View file

@ -346,5 +346,9 @@ Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Va
Id coords, Id dref, Id bias_lc, Id offset);
Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index,
Id coords, Id dref, Id lod_lc, Id offset);
Id EmitVoteAll(EmitContext& ctx, Id pred);
Id EmitVoteAny(EmitContext& ctx, Id pred);
Id EmitVoteEqual(EmitContext& ctx, Id pred);
Id EmitSubgroupBallot(EmitContext& ctx, Id pred);
} // namespace Shader::Backend::SPIRV

View file

@ -0,0 +1,58 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include "shader_recompiler/backend/spirv/emit_spirv.h"
namespace Shader::Backend::SPIRV {
namespace {
Id LargeWarpBallot(EmitContext& ctx, Id ballot) {
const Id shift{ctx.Constant(ctx.U32[1], 5)};
const Id local_index{ctx.OpLoad(ctx.U32[1], ctx.subgroup_local_invocation_id)};
return ctx.OpVectorExtractDynamic(ctx.U32[1], ballot, local_index);
}
} // Anonymous namespace
Id EmitVoteAll(EmitContext& ctx, Id pred) {
if (!ctx.profile.warp_size_potentially_larger_than_guest) {
return ctx.OpSubgroupAllKHR(ctx.U1, pred);
}
const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)};
const Id active_mask{LargeWarpBallot(ctx, mask_ballot)};
const Id ballot{LargeWarpBallot(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))};
const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], ballot, active_mask)};
return ctx.OpIEqual(ctx.U1, lhs, active_mask);
}
Id EmitVoteAny(EmitContext& ctx, Id pred) {
if (!ctx.profile.warp_size_potentially_larger_than_guest) {
return ctx.OpSubgroupAnyKHR(ctx.U1, pred);
}
const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)};
const Id active_mask{LargeWarpBallot(ctx, mask_ballot)};
const Id ballot{LargeWarpBallot(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))};
const Id lhs{ctx.OpBitwiseAnd(ctx.U32[1], ballot, active_mask)};
return ctx.OpINotEqual(ctx.U1, lhs, ctx.u32_zero_value);
}
Id EmitVoteEqual(EmitContext& ctx, Id pred) {
if (!ctx.profile.warp_size_potentially_larger_than_guest) {
return ctx.OpSubgroupAllEqualKHR(ctx.U1, pred);
}
const Id mask_ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], ctx.true_value)};
const Id active_mask{LargeWarpBallot(ctx, mask_ballot)};
const Id ballot{LargeWarpBallot(ctx, ctx.OpSubgroupBallotKHR(ctx.U32[4], pred))};
const Id lhs{ctx.OpBitwiseXor(ctx.U32[1], ballot, active_mask)};
return ctx.OpLogicalOr(ctx.U1, ctx.OpIEqual(ctx.U1, lhs, ctx.u32_zero_value),
ctx.OpIEqual(ctx.U1, lhs, active_mask));
}
Id EmitSubgroupBallot(EmitContext& ctx, Id pred) {
const Id ballot{ctx.OpSubgroupBallotKHR(ctx.U32[4], pred)};
if (!ctx.profile.warp_size_potentially_larger_than_guest) {
return ctx.OpCompositeExtract(ctx.U32[1], ballot, 0U);
}
return LargeWarpBallot(ctx, ballot);
}
} // namespace Shader::Backend::SPIRV

View file

@ -1444,4 +1444,20 @@ F32 IREmitter::ImageSampleDrefExplicitLod(const Value& handle, const Value& coor
return Inst<F32>(op, Flags{info}, handle, coords, dref, lod_lc, offset);
}
U1 IREmitter::VoteAll(const U1& value) {
return Inst<U1>(Opcode::VoteAll, value);
}
U1 IREmitter::VoteAny(const U1& value) {
return Inst<U1>(Opcode::VoteAny, value);
}
U1 IREmitter::VoteEqual(const U1& value) {
return Inst<U1>(Opcode::VoteEqual, value);
}
U32 IREmitter::SubgroupBallot(const U1& value) {
return Inst<U32>(Opcode::SubgroupBallot, value);
}
} // namespace Shader::IR

View file

@ -234,6 +234,11 @@ public:
const Value& offset, const F32& lod_clamp,
TextureInstInfo info);
[[nodiscard]] U1 VoteAll(const U1& value);
[[nodiscard]] U1 VoteAny(const U1& value);
[[nodiscard]] U1 VoteEqual(const U1& value);
[[nodiscard]] U32 SubgroupBallot(const U1& value);
private:
IR::Block::iterator insertion_point;

View file

@ -355,3 +355,9 @@ OPCODE(ImageSampleImplicitLod, F32x4, U32,
OPCODE(ImageSampleExplicitLod, F32x4, U32, Opaque, Opaque, Opaque, )
OPCODE(ImageSampleDrefImplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, )
OPCODE(ImageSampleDrefExplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, )
// Vote operations
OPCODE(VoteAll, U1, U1, )
OPCODE(VoteAny, U1, U1, )
OPCODE(VoteEqual, U1, U1, )
OPCODE(SubgroupBallot, U32, U1, )

View file

@ -417,10 +417,6 @@ void TranslatorVisitor::VMNMX(u64) {
ThrowNotImplemented(Opcode::VMNMX);
}
void TranslatorVisitor::VOTE(u64) {
ThrowNotImplemented(Opcode::VOTE);
}
void TranslatorVisitor::VOTE_vtg(u64) {
ThrowNotImplemented(Opcode::VOTE_vtg);
}

View file

@ -0,0 +1,52 @@
// Copyright 2021 yuzu Emulator Project
// Licensed under GPLv2 or any later version
// Refer to the license.txt file included.
#include <optional>
#include "common/bit_field.h"
#include "common/common_types.h"
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
namespace Shader::Maxwell {
namespace {
enum class VoteOp : u64 {
ALL,
ANY,
EQ,
};
[[nodiscard]] IR::U1 VoteOperation(IR::IREmitter& ir, const IR::U1& pred, VoteOp vote_op) {
switch (vote_op) {
case VoteOp::ALL:
return ir.VoteAll(pred);
case VoteOp::ANY:
return ir.VoteAny(pred);
case VoteOp::EQ:
return ir.VoteEqual(pred);
default:
throw NotImplementedException("Invalid VOTE op {}", vote_op);
}
}
void Vote(TranslatorVisitor& v, u64 insn) {
union {
u64 insn;
BitField<0, 8, IR::Reg> dest_reg;
BitField<39, 3, IR::Pred> pred_a;
BitField<42, 1, u64> neg_pred_a;
BitField<45, 3, IR::Pred> pred_b;
BitField<48, 2, VoteOp> vote_op;
} const vote{insn};
const IR::U1 vote_pred{v.ir.GetPred(vote.pred_a, vote.neg_pred_a != 0)};
v.ir.SetPred(vote.pred_b, VoteOperation(v.ir, vote_pred, vote.vote_op));
v.X(vote.dest_reg, v.ir.SubgroupBallot(vote_pred));
}
} // Anonymous namespace
void TranslatorVisitor::VOTE(u64 insn) {
Vote(*this, insn);
}
} // namespace Shader::Maxwell

View file

@ -359,6 +359,12 @@ void VisitUsages(Info& info, IR::Inst& inst) {
inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp) != nullptr;
break;
}
case IR::Opcode::VoteAll:
case IR::Opcode::VoteAny:
case IR::Opcode::VoteEqual:
case IR::Opcode::SubgroupBallot:
info.uses_subgroup_vote = true;
break;
default:
break;
}

View file

@ -19,6 +19,8 @@ struct Profile {
bool support_fp16_signed_zero_nan_preserve{};
bool support_fp32_signed_zero_nan_preserve{};
bool support_fp64_signed_zero_nan_preserve{};
bool support_vote{};
bool warp_size_potentially_larger_than_guest{};
// FClamp is broken and OpFMax + OpFMin should be used instead
bool has_broken_spirv_clamp{};

View file

@ -80,6 +80,7 @@ struct Info {
bool uses_sampled_1d{};
bool uses_sparse_residency{};
bool uses_demote_to_helper_invocation{};
bool uses_subgroup_vote{};
IR::Type used_constant_buffer_types{};

View file

@ -36,13 +36,18 @@ ComputePipeline::ComputePipeline(const Device& device, VKDescriptorPool& descrip
descriptor_update_template = std::move(tuple.descriptor_update_template);
descriptor_allocator = DescriptorAllocator(descriptor_pool, *descriptor_set_layout);
const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT subgroup_size_ci{
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT,
.pNext = nullptr,
.requiredSubgroupSize = GuestWarpSize,
};
pipeline = device.GetLogical().CreateComputePipeline({
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
.pNext = nullptr,
.flags = 0,
.stage{
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
.pNext = nullptr,
.pNext = device.IsExtSubgroupSizeControlSupported() ? &subgroup_size_ci : nullptr,
.flags = 0,
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
.module = *spv_module,

View file

@ -455,6 +455,8 @@ PipelineCache::PipelineCache(RasterizerVulkan& rasterizer_, Tegra::GPU& gpu_,
float_control.shaderSignedZeroInfNanPreserveFloat32 != VK_FALSE,
.support_fp64_signed_zero_nan_preserve =
float_control.shaderSignedZeroInfNanPreserveFloat64 != VK_FALSE,
.support_vote = true,
.warp_size_potentially_larger_than_guest = device.IsWarpSizePotentiallyBiggerThanGuest(),
.has_broken_spirv_clamp = driver_id == VK_DRIVER_ID_INTEL_PROPRIETARY_WINDOWS_KHR,
};
}

View file

@ -737,6 +737,7 @@ std::vector<const char*> Device::LoadExtensions(bool requires_surface) {
subgroup_properties.maxSubgroupSize >= GuestWarpSize) {
extensions.push_back(VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME);
guest_warp_stages = subgroup_properties.requiredSubgroupSizeStages;
ext_subgroup_size_control = true;
}
} else {
is_warp_potentially_bigger = true;

View file

@ -193,6 +193,11 @@ public:
return ext_shader_viewport_index_layer;
}
/// Returns true if the device supports VK_EXT_subgroup_size_control.
bool IsExtSubgroupSizeControlSupported() const {
return ext_subgroup_size_control;
}
/// Returns true if the device supports VK_EXT_transform_feedback.
bool IsExtTransformFeedbackSupported() const {
return ext_transform_feedback;
@ -297,6 +302,7 @@ private:
bool ext_depth_range_unrestricted{}; ///< Support for VK_EXT_depth_range_unrestricted.
bool ext_shader_viewport_index_layer{}; ///< Support for VK_EXT_shader_viewport_index_layer.
bool ext_tooling_info{}; ///< Support for VK_EXT_tooling_info.
bool ext_subgroup_size_control{}; ///< Support for VK_EXT_subgroup_size_control.
bool ext_transform_feedback{}; ///< Support for VK_EXT_transform_feedback.
bool ext_custom_border_color{}; ///< Support for VK_EXT_custom_border_color.
bool ext_extended_dynamic_state{}; ///< Support for VK_EXT_extended_dynamic_state.