shader: Implement transform feedbacks and define file format

This commit is contained in:
ReinUsesLisp 2021-04-14 01:04:59 -03:00 committed by ameerj
parent a83579b50a
commit b126987c59
11 changed files with 272 additions and 23 deletions

View file

@ -135,6 +135,45 @@ Id DefineOutput(EmitContext& ctx, Id type, std::optional<spv::BuiltIn> builtin =
return DefineVariable(ctx, type, builtin, spv::StorageClass::Output);
}
void DefineGenericOutput(EmitContext& ctx, size_t index) {
static constexpr std::string_view swizzle{"xyzw"};
const size_t base_attr_index{static_cast<size_t>(IR::Attribute::Generic0X) + index * 4};
u32 element{0};
while (element < 4) {
const u32 remainder{4 - element};
const TransformFeedbackVarying* xfb_varying{};
if (!ctx.profile.xfb_varyings.empty()) {
xfb_varying = &ctx.profile.xfb_varyings[base_attr_index + element];
xfb_varying = xfb_varying && xfb_varying->components > 0 ? xfb_varying : nullptr;
}
const u32 num_components{xfb_varying ? xfb_varying->components : remainder};
const Id id{DefineOutput(ctx, ctx.F32[num_components])};
ctx.Decorate(id, spv::Decoration::Location, static_cast<u32>(index));
if (element > 0) {
ctx.Decorate(id, spv::Decoration::Component, element);
}
if (xfb_varying) {
ctx.Decorate(id, spv::Decoration::XfbBuffer, xfb_varying->buffer);
ctx.Decorate(id, spv::Decoration::XfbStride, xfb_varying->stride);
ctx.Decorate(id, spv::Decoration::Offset, xfb_varying->offset);
}
if (num_components < 4 || element > 0) {
ctx.Name(id, fmt::format("out_attr{}", index));
} else {
const std::string_view subswizzle{swizzle.substr(element, num_components)};
ctx.Name(id, fmt::format("out_attr{}_{}", index, subswizzle));
}
const GenericElementInfo info{
.id = id,
.first_element = element,
.num_components = num_components,
};
std::fill_n(ctx.output_generics[index].begin(), num_components, info);
element += num_components;
}
}
Id GetAttributeType(EmitContext& ctx, AttributeType type) {
switch (type) {
case AttributeType::Float:
@ -663,12 +702,15 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) {
OpReturn();
++label_index;
}
for (size_t i = 0; i < info.stores_generics.size(); i++) {
for (size_t i = 0; i < info.stores_generics.size(); ++i) {
if (!info.stores_generics[i]) {
continue;
}
if (output_generics[i][0].num_components != 4) {
throw NotImplementedException("Physical stores and transform feedbacks");
}
AddLabel(labels[label_index]);
const Id generic_id{output_generics.at(i)};
const Id generic_id{output_generics[i][0].id};
const Id pointer{OpAccessChain(output_f32, generic_id, masked_index)};
OpStore(pointer, store_value);
OpReturn();
@ -1015,11 +1057,9 @@ void EmitContext::DefineOutputs(const Info& info) {
}
viewport_index = DefineOutput(*this, U32[1], spv::BuiltIn::ViewportIndex);
}
for (size_t i = 0; i < info.stores_generics.size(); ++i) {
if (info.stores_generics[i]) {
output_generics[i] = DefineOutput(*this, F32[4]);
Decorate(output_generics[i], spv::Decoration::Location, static_cast<u32>(i));
Name(output_generics[i], fmt::format("out_attr{}", i));
for (size_t index = 0; index < info.stores_generics.size(); ++index) {
if (info.stores_generics[index]) {
DefineGenericOutput(*this, index);
}
}
if (stage == Stage::Fragment) {

View file

@ -79,6 +79,12 @@ struct StorageDefinitions {
Id U32x4{};
};
struct GenericElementInfo {
Id id{};
u32 first_element{};
u32 num_components{};
};
class EmitContext final : public Sirit::Module {
public:
explicit EmitContext(const Profile& profile, IR::Program& program, u32& binding);
@ -189,7 +195,7 @@ public:
Id output_point_size{};
Id output_position{};
std::array<Id, 32> output_generics{};
std::array<std::array<GenericElementInfo, 4>, 32> output_generics{};
std::array<Id, 8> frag_color{};
Id frag_depth{};

View file

@ -288,6 +288,9 @@ void SetupCapabilities(const Profile& profile, const Info& info, EmitContext& ct
if (info.uses_typeless_image_writes) {
ctx.AddCapability(spv::Capability::StorageImageWriteWithoutFormat);
}
if (!ctx.profile.xfb_varyings.empty()) {
ctx.AddCapability(spv::Capability::TransformFeedback);
}
// TODO: Track this usage
ctx.AddCapability(spv::Capability::ImageGatherExtended);
ctx.AddCapability(spv::Capability::ImageQuery);

View file

@ -40,11 +40,17 @@ Id AttrPointer(EmitContext& ctx, Id pointer_type, Id vertex, Id base, Args&&...
}
std::optional<Id> OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) {
const u32 element{static_cast<u32>(attr) % 4};
const auto element_id{[&] { return ctx.Constant(ctx.U32[1], element); }};
if (IR::IsGeneric(attr)) {
const u32 index{IR::GenericAttributeIndex(attr)};
return ctx.OpAccessChain(ctx.output_f32, ctx.output_generics.at(index), element_id());
const u32 element{IR::GenericAttributeElement(attr)};
const GenericElementInfo& info{ctx.output_generics.at(index).at(element)};
if (info.num_components == 1) {
return info.id;
} else {
const u32 index_element{element - info.first_element};
const Id index_id{ctx.Constant(ctx.U32[1], index_element)};
return ctx.OpAccessChain(ctx.output_f32, info.id, index_id);
}
}
switch (attr) {
case IR::Attribute::PointSize:
@ -52,8 +58,11 @@ std::optional<Id> OutputAttrPointer(EmitContext& ctx, IR::Attribute attr) {
case IR::Attribute::PositionX:
case IR::Attribute::PositionY:
case IR::Attribute::PositionZ:
case IR::Attribute::PositionW:
return ctx.OpAccessChain(ctx.output_f32, ctx.output_position, element_id());
case IR::Attribute::PositionW: {
const u32 element{static_cast<u32>(attr) % 4};
const Id element_id{ctx.Constant(ctx.U32[1], element)};
return ctx.OpAccessChain(ctx.output_f32, ctx.output_position, element_id);
}
case IR::Attribute::ClipDistance0:
case IR::Attribute::ClipDistance1:
case IR::Attribute::ClipDistance2:

View file

@ -22,6 +22,21 @@ void SetFixedPipelinePointSize(EmitContext& ctx) {
ctx.OpStore(ctx.output_point_size, ctx.Constant(ctx.F32[1], point_size));
}
}
Id DefaultVarying(EmitContext& ctx, u32 num_components, u32 element, Id zero, Id one,
Id default_vector) {
switch (num_components) {
case 1:
return element == 3 ? one : zero;
case 2:
return ctx.ConstantComposite(ctx.F32[2], zero, element + 1 == 3 ? one : zero);
case 3:
return ctx.ConstantComposite(ctx.F32[3], zero, zero, element + 2 == 3 ? one : zero);
case 4:
return default_vector;
}
throw InvalidArgument("Bad element");
}
} // Anonymous namespace
void EmitPrologue(EmitContext& ctx) {
@ -30,9 +45,17 @@ void EmitPrologue(EmitContext& ctx) {
const Id one{ctx.Constant(ctx.F32[1], 1.0f)};
const Id default_vector{ctx.ConstantComposite(ctx.F32[4], zero, zero, zero, one)};
ctx.OpStore(ctx.output_position, default_vector);
for (const Id generic_id : ctx.output_generics) {
if (Sirit::ValidId(generic_id)) {
ctx.OpStore(generic_id, default_vector);
for (const auto& info : ctx.output_generics) {
if (info[0].num_components == 0) {
continue;
}
u32 element{0};
while (element < 4) {
const auto& element_info{info[element]};
const u32 num{element_info.num_components};
const Id value{DefaultVarying(ctx, num, element, zero, one, default_vector)};
ctx.OpStore(element_info.id, value);
element += num;
}
}
}

View file

@ -20,6 +20,13 @@ u32 GenericAttributeIndex(Attribute attribute) {
return (static_cast<u32>(attribute) - static_cast<u32>(Attribute::Generic0X)) / 4u;
}
u32 GenericAttributeElement(Attribute attribute) {
if (!IsGeneric(attribute)) {
throw InvalidArgument("Attribute is not generic {}", attribute);
}
return static_cast<u32>(attribute) % 4;
}
std::string NameOf(Attribute attribute) {
switch (attribute) {
case Attribute::PrimitiveId:

View file

@ -226,6 +226,8 @@ enum class Attribute : u64 {
[[nodiscard]] u32 GenericAttributeIndex(Attribute attribute);
[[nodiscard]] u32 GenericAttributeElement(Attribute attribute);
[[nodiscard]] std::string NameOf(Attribute attribute);
} // namespace Shader::IR

View file

@ -5,6 +5,7 @@
#pragma once
#include <array>
#include <vector>
#include <optional>
#include "common/common_types.h"
@ -26,6 +27,13 @@ enum class InputTopology {
TrianglesAdjacency,
};
struct TransformFeedbackVarying {
u32 buffer{};
u32 stride{};
u32 offset{};
u32 components{};
};
struct Profile {
u32 supported_spirv{0x00010000};
@ -58,6 +66,8 @@ struct Profile {
InputTopology input_topology{};
std::optional<float> fixed_state_point_size;
std::vector<TransformFeedbackVarying> xfb_varyings;
};
} // namespace Shader

View file

@ -52,6 +52,8 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d,
const u32 topology_index = static_cast<u32>(regs.draw.topology.Value());
raw1 = 0;
no_extended_dynamic_state.Assign(has_extended_dynamic_state ? 0 : 1);
xfb_enabled.Assign(regs.tfb_enabled != 0);
primitive_restart_enable.Assign(regs.primitive_restart.enabled != 0 ? 1 : 0);
depth_bias_enable.Assign(enabled_lut[POLYGON_OFFSET_ENABLE_LUT[topology_index]] != 0 ? 1 : 0);
depth_clamp_disabled.Assign(regs.view_volume_clip_control.depth_clamp_disabled.Value());
@ -113,10 +115,12 @@ void FixedPipelineState::Refresh(Tegra::Engines::Maxwell3D& maxwell3d,
return static_cast<u16>(viewport.swizzle.raw);
});
}
if (!has_extended_dynamic_state) {
no_extended_dynamic_state.Assign(1);
if (no_extended_dynamic_state != 0) {
dynamic_state.Refresh(regs);
}
if (xfb_enabled != 0) {
xfb_state.Refresh(regs);
}
}
void FixedPipelineState::BlendingAttachment::Refresh(const Maxwell& regs, size_t index) {
@ -158,6 +162,17 @@ void FixedPipelineState::BlendingAttachment::Refresh(const Maxwell& regs, size_t
enable.Assign(1);
}
void FixedPipelineState::TransformFeedbackState::Refresh(const Maxwell& regs) {
std::ranges::transform(regs.tfb_layouts, layouts.begin(), [](const auto& layout) {
return Layout{
.stream = layout.stream,
.varying_count = layout.varying_count,
.stride = layout.stride,
};
});
varyings = regs.tfb_varying_locs;
}
void FixedPipelineState::DynamicState::Refresh(const Maxwell& regs) {
u32 packed_front_face = PackFrontFace(regs.front_face);
if (regs.screen_y_control.triangle_rast_flip != 0) {

View file

@ -130,6 +130,18 @@ struct FixedPipelineState {
}
};
struct TransformFeedbackState {
struct Layout {
u32 stream;
u32 varying_count;
u32 stride;
};
std::array<Layout, Maxwell::NumTransformFeedbackBuffers> layouts;
std::array<std::array<u8, 128>, Maxwell::NumTransformFeedbackBuffers> varyings;
void Refresh(const Maxwell& regs);
};
struct DynamicState {
union {
u32 raw1;
@ -168,6 +180,7 @@ struct FixedPipelineState {
union {
u32 raw1;
BitField<0, 1, u32> no_extended_dynamic_state;
BitField<1, 1, u32> xfb_enabled;
BitField<2, 1, u32> primitive_restart_enable;
BitField<3, 1, u32> depth_bias_enable;
BitField<4, 1, u32> depth_clamp_disabled;
@ -199,6 +212,7 @@ struct FixedPipelineState {
std::array<BlendingAttachment, Maxwell::NumRenderTargets> attachments;
std::array<u16, Maxwell::NumViewports> viewport_swizzles;
DynamicState dynamic_state;
TransformFeedbackState xfb_state;
void Refresh(Tegra::Engines::Maxwell3D& maxwell3d, bool has_extended_dynamic_state);
@ -211,8 +225,16 @@ struct FixedPipelineState {
}
size_t Size() const noexcept {
const size_t total_size = sizeof *this;
return total_size - (no_extended_dynamic_state != 0 ? 0 : sizeof(DynamicState));
if (xfb_enabled != 0) {
// When transform feedback is enabled, use the whole struct
return sizeof(*this);
} else if (no_extended_dynamic_state != 0) {
// Dynamic state is enabled, we can enable more
return offsetof(FixedPipelineState, xfb_state);
} else {
// No XFB, extended dynamic state enabled
return offsetof(FixedPipelineState, dynamic_state);
}
}
};
static_assert(std::has_unique_object_representations_v<FixedPipelineState>);

View file

@ -248,6 +248,10 @@ namespace {
using Shader::Backend::SPIRV::EmitSPIRV;
using Shader::Maxwell::TranslateProgram;
// TODO: Move this to a separate file
constexpr std::array<char, 8> MAGIC_NUMBER{'y', 'u', 'z', 'u', 'c', 'a', 'c', 'h'};
constexpr u32 CACHE_VERSION{1};
class GraphicsEnvironment final : public GenericEnvironment {
public:
explicit GraphicsEnvironment() = default;
@ -379,13 +383,14 @@ void SerializePipeline(const Key& key, const Envs& envs, const std::string& file
try {
std::ofstream file;
file.exceptions(std::ifstream::failbit);
Common::FS::OpenFStream(file, filename, std::ios::binary | std::ios::app);
Common::FS::OpenFStream(file, filename, std::ios::binary | std::ios::ate | std::ios::app);
if (!file.is_open()) {
LOG_ERROR(Common_Filesystem, "Failed to open pipeline cache file {}", filename);
return;
}
if (file.tellp() == 0) {
// Write header...
file.write(MAGIC_NUMBER.data(), MAGIC_NUMBER.size())
.write(reinterpret_cast<const char*>(&CACHE_VERSION), sizeof(CACHE_VERSION));
}
const std::span key_span(reinterpret_cast<const char*>(&key), sizeof(key));
SerializePipeline(key_span, MakeSpan(envs), file);
@ -520,8 +525,27 @@ void PipelineCache::LoadDiskResources(u64 title_id, std::stop_token stop_loading
file.exceptions(std::ifstream::failbit);
const auto end{file.tellg()};
file.seekg(0, std::ios::beg);
// Read header...
std::array<char, 8> magic_number;
u32 cache_version;
file.read(magic_number.data(), magic_number.size())
.read(reinterpret_cast<char*>(&cache_version), sizeof(cache_version));
if (magic_number != MAGIC_NUMBER || cache_version != CACHE_VERSION) {
file.close();
if (Common::FS::Delete(pipeline_cache_filename)) {
if (magic_number != MAGIC_NUMBER) {
LOG_ERROR(Render_Vulkan, "Invalid pipeline cache file");
}
if (cache_version != CACHE_VERSION) {
LOG_INFO(Render_Vulkan, "Deleting old pipeline cache");
}
} else {
LOG_ERROR(Render_Vulkan,
"Invalid pipeline cache file and failed to delete it in \"{}\"",
pipeline_cache_filename);
}
return;
}
while (file.tellg() != end) {
if (stop_loading) {
return;
@ -879,6 +903,88 @@ static Shader::AttributeType CastAttributeType(const FixedPipelineState::VertexA
return Shader::AttributeType::Float;
}
static std::vector<Shader::TransformFeedbackVarying> MakeTransformFeedbackVaryings(
const GraphicsPipelineCacheKey& key) {
static constexpr std::array VECTORS{
28, // gl_Position
32, // Generic 0
36, // Generic 1
40, // Generic 2
44, // Generic 3
48, // Generic 4
52, // Generic 5
56, // Generic 6
60, // Generic 7
64, // Generic 8
68, // Generic 9
72, // Generic 10
76, // Generic 11
80, // Generic 12
84, // Generic 13
88, // Generic 14
92, // Generic 15
96, // Generic 16
100, // Generic 17
104, // Generic 18
108, // Generic 19
112, // Generic 20
116, // Generic 21
120, // Generic 22
124, // Generic 23
128, // Generic 24
132, // Generic 25
136, // Generic 26
140, // Generic 27
144, // Generic 28
148, // Generic 29
152, // Generic 30
156, // Generic 31
160, // gl_FrontColor
164, // gl_FrontSecondaryColor
160, // gl_BackColor
164, // gl_BackSecondaryColor
192, // gl_TexCoord[0]
196, // gl_TexCoord[1]
200, // gl_TexCoord[2]
204, // gl_TexCoord[3]
208, // gl_TexCoord[4]
212, // gl_TexCoord[5]
216, // gl_TexCoord[6]
220, // gl_TexCoord[7]
};
std::vector<Shader::TransformFeedbackVarying> xfb(256);
for (size_t buffer = 0; buffer < Maxwell::NumTransformFeedbackBuffers; ++buffer) {
const auto& locations = key.state.xfb_state.varyings[buffer];
const auto& layout = key.state.xfb_state.layouts[buffer];
const u32 varying_count = layout.varying_count;
u32 highest = 0;
for (u32 offset = 0; offset < varying_count; ++offset) {
const u32 base_offset = offset;
const u8 location = locations[offset];
Shader::TransformFeedbackVarying varying;
varying.buffer = layout.stream;
varying.stride = layout.stride;
varying.offset = offset * 4;
varying.components = 1;
if (std::ranges::find(VECTORS, Common::AlignDown(location, 4)) != VECTORS.end()) {
UNIMPLEMENTED_IF_MSG(location % 4 != 0, "Unaligned TFB");
const u8 base_index = location / 4;
while (offset + 1 < varying_count && base_index == locations[offset + 1] / 4) {
++offset;
++varying.components;
}
}
xfb[location] = varying;
highest = std::max(highest, (base_offset + varying.components) * 4);
}
UNIMPLEMENTED_IF(highest != layout.stride);
}
return xfb;
}
Shader::Profile PipelineCache::MakeProfile(const GraphicsPipelineCacheKey& key,
const Shader::IR::Program& program) {
Shader::Profile profile{base_profile};
@ -893,6 +999,9 @@ Shader::Profile PipelineCache::MakeProfile(const GraphicsPipelineCacheKey& key,
if (key.state.topology == Maxwell::PrimitiveTopology::Points) {
profile.fixed_state_point_size = point_size;
}
if (key.state.xfb_enabled != 0) {
profile.xfb_varyings = MakeTransformFeedbackVaryings(key);
}
profile.convert_depth_mode = gl_ndc;
}
std::ranges::transform(key.state.attributes, profile.generic_input_types.begin(),
@ -902,6 +1011,9 @@ Shader::Profile PipelineCache::MakeProfile(const GraphicsPipelineCacheKey& key,
if (program.output_topology == Shader::OutputTopology::PointList) {
profile.fixed_state_point_size = point_size;
}
if (key.state.xfb_enabled != 0) {
profile.xfb_varyings = MakeTransformFeedbackVaryings(key);
}
profile.convert_depth_mode = gl_ndc;
break;
default: