Merge pull request #4049 from ReinUsesLisp/separate-samplers

shader/texture: Join separate image and sampler pairs offline
This commit is contained in:
bunnei 2020-06-13 13:48:27 -04:00 committed by GitHub
commit c2ea1e1bcb
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
16 changed files with 274 additions and 114 deletions

View file

@ -93,6 +93,7 @@ public:
virtual SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const = 0; virtual SamplerDescriptor AccessBoundSampler(ShaderType stage, u64 offset) const = 0;
virtual SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, virtual SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
u64 offset) const = 0; u64 offset) const = 0;
virtual SamplerDescriptor AccessSampler(u32 handle) const = 0;
virtual u32 GetBoundBuffer() const = 0; virtual u32 GetBoundBuffer() const = 0;
virtual VideoCore::GuestDriverProfile& AccessGuestDriverProfile() = 0; virtual VideoCore::GuestDriverProfile& AccessGuestDriverProfile() = 0;

View file

@ -92,8 +92,11 @@ SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 con
ASSERT(stage == ShaderType::Compute); ASSERT(stage == ShaderType::Compute);
const auto& tex_info_buffer = launch_description.const_buffer_config[const_buffer]; const auto& tex_info_buffer = launch_description.const_buffer_config[const_buffer];
const GPUVAddr tex_info_address = tex_info_buffer.Address() + offset; const GPUVAddr tex_info_address = tex_info_buffer.Address() + offset;
return AccessSampler(memory_manager.Read<u32>(tex_info_address));
}
const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)}; SamplerDescriptor KeplerCompute::AccessSampler(u32 handle) const {
const Texture::TextureHandle tex_handle{handle};
const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle); const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle);
SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic); SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic);
result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value()); result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value());

View file

@ -219,6 +219,8 @@ public:
SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
u64 offset) const override; u64 offset) const override;
SamplerDescriptor AccessSampler(u32 handle) const override;
u32 GetBoundBuffer() const override { u32 GetBoundBuffer() const override {
return regs.tex_cb_index; return regs.tex_cb_index;
} }

View file

@ -740,8 +740,11 @@ SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_b
const auto& shader = state.shader_stages[static_cast<std::size_t>(stage)]; const auto& shader = state.shader_stages[static_cast<std::size_t>(stage)];
const auto& tex_info_buffer = shader.const_buffers[const_buffer]; const auto& tex_info_buffer = shader.const_buffers[const_buffer];
const GPUVAddr tex_info_address = tex_info_buffer.address + offset; const GPUVAddr tex_info_address = tex_info_buffer.address + offset;
return AccessSampler(memory_manager.Read<u32>(tex_info_address));
}
const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)}; SamplerDescriptor Maxwell3D::AccessSampler(u32 handle) const {
const Texture::TextureHandle tex_handle{handle};
const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle); const Texture::FullTextureInfo tex_info = GetTextureInfo(tex_handle);
SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic); SamplerDescriptor result = SamplerDescriptor::FromTIC(tex_info.tic);
result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value()); result.is_shadow.Assign(tex_info.tsc.depth_compare_enabled.Value());

View file

@ -1404,6 +1404,8 @@ public:
SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer, SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
u64 offset) const override; u64 offset) const override;
SamplerDescriptor AccessSampler(u32 handle) const override;
u32 GetBoundBuffer() const override { u32 GetBoundBuffer() const override {
return regs.tex_cb_index; return regs.tex_cb_index;
} }

View file

@ -66,10 +66,22 @@ constexpr std::size_t NumSupportedVertexAttributes = 16;
template <typename Engine, typename Entry> template <typename Engine, typename Entry>
Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry, Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry,
ShaderType shader_type, std::size_t index = 0) { ShaderType shader_type, std::size_t index = 0) {
if (entry.is_bindless) { if constexpr (std::is_same_v<Entry, SamplerEntry>) {
const auto tex_handle = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset); if (entry.is_separated) {
return engine.GetTextureInfo(tex_handle); const u32 buffer_1 = entry.buffer;
const u32 buffer_2 = entry.secondary_buffer;
const u32 offset_1 = entry.offset;
const u32 offset_2 = entry.secondary_offset;
const u32 handle_1 = engine.AccessConstBuffer32(shader_type, buffer_1, offset_1);
const u32 handle_2 = engine.AccessConstBuffer32(shader_type, buffer_2, offset_2);
return engine.GetTextureInfo(handle_1 | handle_2);
} }
}
if (entry.is_bindless) {
const u32 handle = engine.AccessConstBuffer32(shader_type, entry.buffer, entry.offset);
return engine.GetTextureInfo(handle);
}
const auto& gpu_profile = engine.AccessGuestDriverProfile(); const auto& gpu_profile = engine.AccessGuestDriverProfile();
const u32 offset = entry.offset + static_cast<u32>(index * gpu_profile.GetTextureHandlerSize()); const u32 offset = entry.offset + static_cast<u32>(index * gpu_profile.GetTextureHandlerSize());
if constexpr (std::is_same_v<Engine, Tegra::Engines::Maxwell3D>) { if constexpr (std::is_same_v<Engine, Tegra::Engines::Maxwell3D>) {

View file

@ -29,6 +29,8 @@ using VideoCommon::Shader::KeyMap;
namespace { namespace {
using VideoCommon::Shader::SeparateSamplerKey;
using ShaderCacheVersionHash = std::array<u8, 64>; using ShaderCacheVersionHash = std::array<u8, 64>;
struct ConstBufferKey { struct ConstBufferKey {
@ -37,18 +39,26 @@ struct ConstBufferKey {
u32 value = 0; u32 value = 0;
}; };
struct BoundSamplerKey { struct BoundSamplerEntry {
u32 offset = 0; u32 offset = 0;
Tegra::Engines::SamplerDescriptor sampler; Tegra::Engines::SamplerDescriptor sampler;
}; };
struct BindlessSamplerKey { struct SeparateSamplerEntry {
u32 cbuf1 = 0;
u32 cbuf2 = 0;
u32 offset1 = 0;
u32 offset2 = 0;
Tegra::Engines::SamplerDescriptor sampler;
};
struct BindlessSamplerEntry {
u32 cbuf = 0; u32 cbuf = 0;
u32 offset = 0; u32 offset = 0;
Tegra::Engines::SamplerDescriptor sampler; Tegra::Engines::SamplerDescriptor sampler;
}; };
constexpr u32 NativeVersion = 20; constexpr u32 NativeVersion = 21;
ShaderCacheVersionHash GetShaderCacheVersionHash() { ShaderCacheVersionHash GetShaderCacheVersionHash() {
ShaderCacheVersionHash hash{}; ShaderCacheVersionHash hash{};
@ -87,12 +97,14 @@ bool ShaderDiskCacheEntry::Load(FileUtil::IOFile& file) {
u32 texture_handler_size_value; u32 texture_handler_size_value;
u32 num_keys; u32 num_keys;
u32 num_bound_samplers; u32 num_bound_samplers;
u32 num_separate_samplers;
u32 num_bindless_samplers; u32 num_bindless_samplers;
if (file.ReadArray(&unique_identifier, 1) != 1 || file.ReadArray(&bound_buffer, 1) != 1 || if (file.ReadArray(&unique_identifier, 1) != 1 || file.ReadArray(&bound_buffer, 1) != 1 ||
file.ReadArray(&is_texture_handler_size_known, 1) != 1 || file.ReadArray(&is_texture_handler_size_known, 1) != 1 ||
file.ReadArray(&texture_handler_size_value, 1) != 1 || file.ReadArray(&texture_handler_size_value, 1) != 1 ||
file.ReadArray(&graphics_info, 1) != 1 || file.ReadArray(&compute_info, 1) != 1 || file.ReadArray(&graphics_info, 1) != 1 || file.ReadArray(&compute_info, 1) != 1 ||
file.ReadArray(&num_keys, 1) != 1 || file.ReadArray(&num_bound_samplers, 1) != 1 || file.ReadArray(&num_keys, 1) != 1 || file.ReadArray(&num_bound_samplers, 1) != 1 ||
file.ReadArray(&num_separate_samplers, 1) != 1 ||
file.ReadArray(&num_bindless_samplers, 1) != 1) { file.ReadArray(&num_bindless_samplers, 1) != 1) {
return false; return false;
} }
@ -101,23 +113,32 @@ bool ShaderDiskCacheEntry::Load(FileUtil::IOFile& file) {
} }
std::vector<ConstBufferKey> flat_keys(num_keys); std::vector<ConstBufferKey> flat_keys(num_keys);
std::vector<BoundSamplerKey> flat_bound_samplers(num_bound_samplers); std::vector<BoundSamplerEntry> flat_bound_samplers(num_bound_samplers);
std::vector<BindlessSamplerKey> flat_bindless_samplers(num_bindless_samplers); std::vector<SeparateSamplerEntry> flat_separate_samplers(num_separate_samplers);
std::vector<BindlessSamplerEntry> flat_bindless_samplers(num_bindless_samplers);
if (file.ReadArray(flat_keys.data(), flat_keys.size()) != flat_keys.size() || if (file.ReadArray(flat_keys.data(), flat_keys.size()) != flat_keys.size() ||
file.ReadArray(flat_bound_samplers.data(), flat_bound_samplers.size()) != file.ReadArray(flat_bound_samplers.data(), flat_bound_samplers.size()) !=
flat_bound_samplers.size() || flat_bound_samplers.size() ||
file.ReadArray(flat_separate_samplers.data(), flat_separate_samplers.size()) !=
flat_separate_samplers.size() ||
file.ReadArray(flat_bindless_samplers.data(), flat_bindless_samplers.size()) != file.ReadArray(flat_bindless_samplers.data(), flat_bindless_samplers.size()) !=
flat_bindless_samplers.size()) { flat_bindless_samplers.size()) {
return false; return false;
} }
for (const auto& key : flat_keys) { for (const auto& entry : flat_keys) {
keys.insert({{key.cbuf, key.offset}, key.value}); keys.insert({{entry.cbuf, entry.offset}, entry.value});
} }
for (const auto& key : flat_bound_samplers) { for (const auto& entry : flat_bound_samplers) {
bound_samplers.emplace(key.offset, key.sampler); bound_samplers.emplace(entry.offset, entry.sampler);
} }
for (const auto& key : flat_bindless_samplers) { for (const auto& entry : flat_separate_samplers) {
bindless_samplers.insert({{key.cbuf, key.offset}, key.sampler}); SeparateSamplerKey key;
key.buffers = {entry.cbuf1, entry.cbuf2};
key.offsets = {entry.offset1, entry.offset2};
separate_samplers.emplace(key, entry.sampler);
}
for (const auto& entry : flat_bindless_samplers) {
bindless_samplers.insert({{entry.cbuf, entry.offset}, entry.sampler});
} }
return true; return true;
@ -142,6 +163,7 @@ bool ShaderDiskCacheEntry::Save(FileUtil::IOFile& file) const {
file.WriteObject(graphics_info) != 1 || file.WriteObject(compute_info) != 1 || file.WriteObject(graphics_info) != 1 || file.WriteObject(compute_info) != 1 ||
file.WriteObject(static_cast<u32>(keys.size())) != 1 || file.WriteObject(static_cast<u32>(keys.size())) != 1 ||
file.WriteObject(static_cast<u32>(bound_samplers.size())) != 1 || file.WriteObject(static_cast<u32>(bound_samplers.size())) != 1 ||
file.WriteObject(static_cast<u32>(separate_samplers.size())) != 1 ||
file.WriteObject(static_cast<u32>(bindless_samplers.size())) != 1) { file.WriteObject(static_cast<u32>(bindless_samplers.size())) != 1) {
return false; return false;
} }
@ -152,22 +174,34 @@ bool ShaderDiskCacheEntry::Save(FileUtil::IOFile& file) const {
flat_keys.push_back(ConstBufferKey{address.first, address.second, value}); flat_keys.push_back(ConstBufferKey{address.first, address.second, value});
} }
std::vector<BoundSamplerKey> flat_bound_samplers; std::vector<BoundSamplerEntry> flat_bound_samplers;
flat_bound_samplers.reserve(bound_samplers.size()); flat_bound_samplers.reserve(bound_samplers.size());
for (const auto& [address, sampler] : bound_samplers) { for (const auto& [address, sampler] : bound_samplers) {
flat_bound_samplers.push_back(BoundSamplerKey{address, sampler}); flat_bound_samplers.push_back(BoundSamplerEntry{address, sampler});
} }
std::vector<BindlessSamplerKey> flat_bindless_samplers; std::vector<SeparateSamplerEntry> flat_separate_samplers;
flat_separate_samplers.reserve(separate_samplers.size());
for (const auto& [key, sampler] : separate_samplers) {
SeparateSamplerEntry entry;
std::tie(entry.cbuf1, entry.cbuf2) = key.buffers;
std::tie(entry.offset1, entry.offset2) = key.offsets;
entry.sampler = sampler;
flat_separate_samplers.push_back(entry);
}
std::vector<BindlessSamplerEntry> flat_bindless_samplers;
flat_bindless_samplers.reserve(bindless_samplers.size()); flat_bindless_samplers.reserve(bindless_samplers.size());
for (const auto& [address, sampler] : bindless_samplers) { for (const auto& [address, sampler] : bindless_samplers) {
flat_bindless_samplers.push_back( flat_bindless_samplers.push_back(
BindlessSamplerKey{address.first, address.second, sampler}); BindlessSamplerEntry{address.first, address.second, sampler});
} }
return file.WriteArray(flat_keys.data(), flat_keys.size()) == flat_keys.size() && return file.WriteArray(flat_keys.data(), flat_keys.size()) == flat_keys.size() &&
file.WriteArray(flat_bound_samplers.data(), flat_bound_samplers.size()) == file.WriteArray(flat_bound_samplers.data(), flat_bound_samplers.size()) ==
flat_bound_samplers.size() && flat_bound_samplers.size() &&
file.WriteArray(flat_separate_samplers.data(), flat_separate_samplers.size()) ==
flat_separate_samplers.size() &&
file.WriteArray(flat_bindless_samplers.data(), flat_bindless_samplers.size()) == file.WriteArray(flat_bindless_samplers.data(), flat_bindless_samplers.size()) ==
flat_bindless_samplers.size(); flat_bindless_samplers.size();
} }

View file

@ -57,6 +57,7 @@ struct ShaderDiskCacheEntry {
VideoCommon::Shader::ComputeInfo compute_info; VideoCommon::Shader::ComputeInfo compute_info;
VideoCommon::Shader::KeyMap keys; VideoCommon::Shader::KeyMap keys;
VideoCommon::Shader::BoundSamplerMap bound_samplers; VideoCommon::Shader::BoundSamplerMap bound_samplers;
VideoCommon::Shader::SeparateSamplerMap separate_samplers;
VideoCommon::Shader::BindlessSamplerMap bindless_samplers; VideoCommon::Shader::BindlessSamplerMap bindless_samplers;
}; };

View file

@ -118,6 +118,17 @@ template <typename Engine, typename Entry>
Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry, Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry,
std::size_t stage, std::size_t index = 0) { std::size_t stage, std::size_t index = 0) {
const auto stage_type = static_cast<Tegra::Engines::ShaderType>(stage); const auto stage_type = static_cast<Tegra::Engines::ShaderType>(stage);
if constexpr (std::is_same_v<Entry, SamplerEntry>) {
if (entry.is_separated) {
const u32 buffer_1 = entry.buffer;
const u32 buffer_2 = entry.secondary_buffer;
const u32 offset_1 = entry.offset;
const u32 offset_2 = entry.secondary_offset;
const u32 handle_1 = engine.AccessConstBuffer32(stage_type, buffer_1, offset_1);
const u32 handle_2 = engine.AccessConstBuffer32(stage_type, buffer_2, offset_2);
return engine.GetTextureInfo(handle_1 | handle_2);
}
}
if (entry.is_bindless) { if (entry.is_bindless) {
const auto tex_handle = engine.AccessConstBuffer32(stage_type, entry.buffer, entry.offset); const auto tex_handle = engine.AccessConstBuffer32(stage_type, entry.buffer, entry.offset);
return engine.GetTextureInfo(tex_handle); return engine.GetTextureInfo(tex_handle);

View file

@ -357,13 +357,11 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
return pc; return pc;
} }
ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo(SamplerInfo info, u32 offset, ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo(
std::optional<u32> buffer) { SamplerInfo info, std::optional<Tegra::Engines::SamplerDescriptor> sampler) {
if (info.IsComplete()) { if (info.IsComplete()) {
return info; return info;
} }
const auto sampler = buffer ? registry.ObtainBindlessSampler(*buffer, offset)
: registry.ObtainBoundSampler(offset);
if (!sampler) { if (!sampler) {
LOG_WARNING(HW_GPU, "Unknown sampler info"); LOG_WARNING(HW_GPU, "Unknown sampler info");
info.type = info.type.value_or(Tegra::Shader::TextureType::Texture2D); info.type = info.type.value_or(Tegra::Shader::TextureType::Texture2D);
@ -381,8 +379,8 @@ ShaderIR::SamplerInfo ShaderIR::GetSamplerInfo(SamplerInfo info, u32 offset,
std::optional<Sampler> ShaderIR::GetSampler(Tegra::Shader::Sampler sampler, std::optional<Sampler> ShaderIR::GetSampler(Tegra::Shader::Sampler sampler,
SamplerInfo sampler_info) { SamplerInfo sampler_info) {
const auto offset = static_cast<u32>(sampler.index.Value()); const u32 offset = static_cast<u32>(sampler.index.Value());
const auto info = GetSamplerInfo(sampler_info, offset); const auto info = GetSamplerInfo(sampler_info, registry.ObtainBoundSampler(offset));
// If this sampler has already been used, return the existing mapping. // If this sampler has already been used, return the existing mapping.
const auto it = std::find_if(used_samplers.begin(), used_samplers.end(), const auto it = std::find_if(used_samplers.begin(), used_samplers.end(),
@ -404,20 +402,19 @@ std::optional<Sampler> ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg,
const Node sampler_register = GetRegister(reg); const Node sampler_register = GetRegister(reg);
const auto [base_node, tracked_sampler_info] = const auto [base_node, tracked_sampler_info] =
TrackBindlessSampler(sampler_register, global_code, static_cast<s64>(global_code.size())); TrackBindlessSampler(sampler_register, global_code, static_cast<s64>(global_code.size()));
ASSERT(base_node != nullptr); if (!base_node) {
if (base_node == nullptr) { UNREACHABLE();
return std::nullopt; return std::nullopt;
} }
if (const auto bindless_sampler_info = if (const auto sampler_info = std::get_if<BindlessSamplerNode>(&*tracked_sampler_info)) {
std::get_if<BindlessSamplerNode>(&*tracked_sampler_info)) { const u32 buffer = sampler_info->index;
const u32 buffer = bindless_sampler_info->GetIndex(); const u32 offset = sampler_info->offset;
const u32 offset = bindless_sampler_info->GetOffset(); info = GetSamplerInfo(info, registry.ObtainBindlessSampler(buffer, offset));
info = GetSamplerInfo(info, offset, buffer);
// If this sampler has already been used, return the existing mapping. // If this sampler has already been used, return the existing mapping.
const auto it = std::find_if(used_samplers.begin(), used_samplers.end(), const auto it = std::find_if(used_samplers.begin(), used_samplers.end(),
[buffer = buffer, offset = offset](const Sampler& entry) { [buffer, offset](const Sampler& entry) {
return entry.buffer == buffer && entry.offset == offset; return entry.buffer == buffer && entry.offset == offset;
}); });
if (it != used_samplers.end()) { if (it != used_samplers.end()) {
@ -431,10 +428,32 @@ std::optional<Sampler> ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg,
return used_samplers.emplace_back(next_index, offset, buffer, *info.type, *info.is_array, return used_samplers.emplace_back(next_index, offset, buffer, *info.type, *info.is_array,
*info.is_shadow, *info.is_buffer, false); *info.is_shadow, *info.is_buffer, false);
} }
if (const auto array_sampler_info = std::get_if<ArraySamplerNode>(&*tracked_sampler_info)) { if (const auto sampler_info = std::get_if<SeparateSamplerNode>(&*tracked_sampler_info)) {
const u32 base_offset = array_sampler_info->GetBaseOffset() / 4; const std::pair indices = sampler_info->indices;
index_var = GetCustomVariable(array_sampler_info->GetIndexVar()); const std::pair offsets = sampler_info->offsets;
info = GetSamplerInfo(info, base_offset); info = GetSamplerInfo(info, registry.ObtainSeparateSampler(indices, offsets));
// Try to use an already created sampler if it exists
const auto it = std::find_if(
used_samplers.begin(), used_samplers.end(), [indices, offsets](const Sampler& entry) {
return offsets == std::pair{entry.offset, entry.secondary_offset} &&
indices == std::pair{entry.buffer, entry.secondary_buffer};
});
if (it != used_samplers.end()) {
ASSERT(it->is_separated && it->type == info.type && it->is_array == info.is_array &&
it->is_shadow == info.is_shadow && it->is_buffer == info.is_buffer);
return *it;
}
// Otherwise create a new mapping for this sampler
const u32 next_index = static_cast<u32>(used_samplers.size());
return used_samplers.emplace_back(next_index, offsets, indices, *info.type, *info.is_array,
*info.is_shadow, *info.is_buffer);
}
if (const auto sampler_info = std::get_if<ArraySamplerNode>(&*tracked_sampler_info)) {
const u32 base_offset = sampler_info->base_offset / 4;
index_var = GetCustomVariable(sampler_info->bindless_var);
info = GetSamplerInfo(info, registry.ObtainBoundSampler(base_offset));
// If this sampler has already been used, return the existing mapping. // If this sampler has already been used, return the existing mapping.
const auto it = std::find_if( const auto it = std::find_if(

View file

@ -275,10 +275,11 @@ using Node = std::shared_ptr<NodeData>;
using Node4 = std::array<Node, 4>; using Node4 = std::array<Node, 4>;
using NodeBlock = std::vector<Node>; using NodeBlock = std::vector<Node>;
class BindlessSamplerNode; struct ArraySamplerNode;
class ArraySamplerNode; struct BindlessSamplerNode;
struct SeparateSamplerNode;
using TrackSamplerData = std::variant<BindlessSamplerNode, ArraySamplerNode>; using TrackSamplerData = std::variant<BindlessSamplerNode, SeparateSamplerNode, ArraySamplerNode>;
using TrackSampler = std::shared_ptr<TrackSamplerData>; using TrackSampler = std::shared_ptr<TrackSamplerData>;
struct Sampler { struct Sampler {
@ -288,6 +289,14 @@ struct Sampler {
: index{index}, offset{offset}, type{type}, is_array{is_array}, is_shadow{is_shadow}, : index{index}, offset{offset}, type{type}, is_array{is_array}, is_shadow{is_shadow},
is_buffer{is_buffer}, is_indexed{is_indexed} {} is_buffer{is_buffer}, is_indexed{is_indexed} {}
/// Separate sampler constructor
constexpr explicit Sampler(u32 index, std::pair<u32, u32> offsets, std::pair<u32, u32> buffers,
Tegra::Shader::TextureType type, bool is_array, bool is_shadow,
bool is_buffer)
: index{index}, offset{offsets.first}, secondary_offset{offsets.second},
buffer{buffers.first}, secondary_buffer{buffers.second}, type{type}, is_array{is_array},
is_shadow{is_shadow}, is_buffer{is_buffer}, is_separated{true} {}
/// Bindless samplers constructor /// Bindless samplers constructor
constexpr explicit Sampler(u32 index, u32 offset, u32 buffer, Tegra::Shader::TextureType type, constexpr explicit Sampler(u32 index, u32 offset, u32 buffer, Tegra::Shader::TextureType type,
bool is_array, bool is_shadow, bool is_buffer, bool is_indexed) bool is_array, bool is_shadow, bool is_buffer, bool is_indexed)
@ -296,7 +305,9 @@ struct Sampler {
u32 index = 0; ///< Emulated index given for the this sampler. u32 index = 0; ///< Emulated index given for the this sampler.
u32 offset = 0; ///< Offset in the const buffer from where the sampler is being read. u32 offset = 0; ///< Offset in the const buffer from where the sampler is being read.
u32 buffer = 0; ///< Buffer where the bindless sampler is being read (unused on bound samplers). u32 secondary_offset = 0; ///< Secondary offset in the const buffer.
u32 buffer = 0; ///< Buffer where the bindless sampler is read.
u32 secondary_buffer = 0; ///< Secondary buffer where the bindless sampler is read.
u32 size = 1; ///< Size of the sampler. u32 size = 1; ///< Size of the sampler.
Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc) Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc)
@ -305,46 +316,24 @@ struct Sampler {
bool is_buffer = false; ///< Whether the texture is a texture buffer without sampler. bool is_buffer = false; ///< Whether the texture is a texture buffer without sampler.
bool is_bindless = false; ///< Whether this sampler belongs to a bindless texture or not. bool is_bindless = false; ///< Whether this sampler belongs to a bindless texture or not.
bool is_indexed = false; ///< Whether this sampler is an indexed array of textures. bool is_indexed = false; ///< Whether this sampler is an indexed array of textures.
bool is_separated = false; ///< Whether the image and sampler is separated or not.
}; };
/// Represents a tracked bindless sampler into a direct const buffer /// Represents a tracked bindless sampler into a direct const buffer
class ArraySamplerNode final { struct ArraySamplerNode {
public:
explicit ArraySamplerNode(u32 index, u32 base_offset, u32 bindless_var)
: index{index}, base_offset{base_offset}, bindless_var{bindless_var} {}
constexpr u32 GetIndex() const {
return index;
}
constexpr u32 GetBaseOffset() const {
return base_offset;
}
constexpr u32 GetIndexVar() const {
return bindless_var;
}
private:
u32 index; u32 index;
u32 base_offset; u32 base_offset;
u32 bindless_var; u32 bindless_var;
}; };
/// Represents a tracked separate sampler image pair that was folded statically
struct SeparateSamplerNode {
std::pair<u32, u32> indices;
std::pair<u32, u32> offsets;
};
/// Represents a tracked bindless sampler into a direct const buffer /// Represents a tracked bindless sampler into a direct const buffer
class BindlessSamplerNode final { struct BindlessSamplerNode {
public:
explicit BindlessSamplerNode(u32 index, u32 offset) : index{index}, offset{offset} {}
constexpr u32 GetIndex() const {
return index;
}
constexpr u32 GetOffset() const {
return offset;
}
private:
u32 index; u32 index;
u32 offset; u32 offset;
}; };

View file

@ -48,7 +48,7 @@ Node MakeNode(Args&&... args) {
template <typename T, typename... Args> template <typename T, typename... Args>
TrackSampler MakeTrackSampler(Args&&... args) { TrackSampler MakeTrackSampler(Args&&... args) {
static_assert(std::is_convertible_v<T, TrackSamplerData>); static_assert(std::is_convertible_v<T, TrackSamplerData>);
return std::make_shared<TrackSamplerData>(T(std::forward<Args>(args)...)); return std::make_shared<TrackSamplerData>(T{std::forward<Args>(args)...});
} }
template <typename... Args> template <typename... Args>

View file

@ -93,6 +93,26 @@ std::optional<SamplerDescriptor> Registry::ObtainBoundSampler(u32 offset) {
return value; return value;
} }
std::optional<Tegra::Engines::SamplerDescriptor> Registry::ObtainSeparateSampler(
std::pair<u32, u32> buffers, std::pair<u32, u32> offsets) {
SeparateSamplerKey key;
key.buffers = buffers;
key.offsets = offsets;
const auto iter = separate_samplers.find(key);
if (iter != separate_samplers.end()) {
return iter->second;
}
if (!engine) {
return std::nullopt;
}
const u32 handle_1 = engine->AccessConstBuffer32(stage, key.buffers.first, key.offsets.first);
const u32 handle_2 = engine->AccessConstBuffer32(stage, key.buffers.second, key.offsets.second);
const SamplerDescriptor value = engine->AccessSampler(handle_1 | handle_2);
separate_samplers.emplace(key, value);
return value;
}
std::optional<Tegra::Engines::SamplerDescriptor> Registry::ObtainBindlessSampler(u32 buffer, std::optional<Tegra::Engines::SamplerDescriptor> Registry::ObtainBindlessSampler(u32 buffer,
u32 offset) { u32 offset) {
const std::pair key = {buffer, offset}; const std::pair key = {buffer, offset};

View file

@ -19,8 +19,39 @@
namespace VideoCommon::Shader { namespace VideoCommon::Shader {
struct SeparateSamplerKey {
std::pair<u32, u32> buffers;
std::pair<u32, u32> offsets;
};
} // namespace VideoCommon::Shader
namespace std {
template <>
struct hash<VideoCommon::Shader::SeparateSamplerKey> {
std::size_t operator()(const VideoCommon::Shader::SeparateSamplerKey& key) const noexcept {
return std::hash<u32>{}(key.buffers.first ^ key.buffers.second ^ key.offsets.first ^
key.offsets.second);
}
};
template <>
struct equal_to<VideoCommon::Shader::SeparateSamplerKey> {
bool operator()(const VideoCommon::Shader::SeparateSamplerKey& lhs,
const VideoCommon::Shader::SeparateSamplerKey& rhs) const noexcept {
return lhs.buffers == rhs.buffers && lhs.offsets == rhs.offsets;
}
};
} // namespace std
namespace VideoCommon::Shader {
using KeyMap = std::unordered_map<std::pair<u32, u32>, u32, Common::PairHash>; using KeyMap = std::unordered_map<std::pair<u32, u32>, u32, Common::PairHash>;
using BoundSamplerMap = std::unordered_map<u32, Tegra::Engines::SamplerDescriptor>; using BoundSamplerMap = std::unordered_map<u32, Tegra::Engines::SamplerDescriptor>;
using SeparateSamplerMap =
std::unordered_map<SeparateSamplerKey, Tegra::Engines::SamplerDescriptor>;
using BindlessSamplerMap = using BindlessSamplerMap =
std::unordered_map<std::pair<u32, u32>, Tegra::Engines::SamplerDescriptor, Common::PairHash>; std::unordered_map<std::pair<u32, u32>, Tegra::Engines::SamplerDescriptor, Common::PairHash>;
@ -73,6 +104,9 @@ public:
std::optional<Tegra::Engines::SamplerDescriptor> ObtainBoundSampler(u32 offset); std::optional<Tegra::Engines::SamplerDescriptor> ObtainBoundSampler(u32 offset);
std::optional<Tegra::Engines::SamplerDescriptor> ObtainSeparateSampler(
std::pair<u32, u32> buffers, std::pair<u32, u32> offsets);
std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset); std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset);
/// Inserts a key. /// Inserts a key.
@ -128,6 +162,7 @@ private:
Tegra::Engines::ConstBufferEngineInterface* engine = nullptr; Tegra::Engines::ConstBufferEngineInterface* engine = nullptr;
KeyMap keys; KeyMap keys;
BoundSamplerMap bound_samplers; BoundSamplerMap bound_samplers;
SeparateSamplerMap separate_samplers;
BindlessSamplerMap bindless_samplers; BindlessSamplerMap bindless_samplers;
u32 bound_buffer; u32 bound_buffer;
GraphicsInfo graphics_info; GraphicsInfo graphics_info;

View file

@ -330,8 +330,8 @@ private:
OperationCode GetPredicateCombiner(Tegra::Shader::PredOperation operation); OperationCode GetPredicateCombiner(Tegra::Shader::PredOperation operation);
/// Queries the missing sampler info from the execution context. /// Queries the missing sampler info from the execution context.
SamplerInfo GetSamplerInfo(SamplerInfo info, u32 offset, SamplerInfo GetSamplerInfo(SamplerInfo info,
std::optional<u32> buffer = std::nullopt); std::optional<Tegra::Engines::SamplerDescriptor> sampler);
/// Accesses a texture sampler. /// Accesses a texture sampler.
std::optional<Sampler> GetSampler(Tegra::Shader::Sampler sampler, SamplerInfo info); std::optional<Sampler> GetSampler(Tegra::Shader::Sampler sampler, SamplerInfo info);
@ -409,7 +409,13 @@ private:
std::tuple<Node, u32, u32> TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const; std::tuple<Node, u32, u32> TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const;
std::tuple<Node, TrackSampler> TrackBindlessSampler(Node tracked, const NodeBlock& code, std::pair<Node, TrackSampler> TrackBindlessSampler(Node tracked, const NodeBlock& code,
s64 cursor);
std::pair<Node, TrackSampler> HandleBindlessIndirectRead(const CbufNode& cbuf,
const OperationNode& operation,
Node gpr, Node base_offset,
Node tracked, const NodeBlock& code,
s64 cursor); s64 cursor);
std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const; std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const;

View file

@ -14,6 +14,7 @@
namespace VideoCommon::Shader { namespace VideoCommon::Shader {
namespace { namespace {
std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor, std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor,
OperationCode operation_code) { OperationCode operation_code) {
for (; cursor >= 0; --cursor) { for (; cursor >= 0; --cursor) {
@ -63,7 +64,8 @@ bool AmendNodeCv(std::size_t amend_index, Node node) {
if (const auto operation = std::get_if<OperationNode>(&*node)) { if (const auto operation = std::get_if<OperationNode>(&*node)) {
operation->SetAmendIndex(amend_index); operation->SetAmendIndex(amend_index);
return true; return true;
} else if (const auto conditional = std::get_if<ConditionalNode>(&*node)) { }
if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
conditional->SetAmendIndex(amend_index); conditional->SetAmendIndex(amend_index);
return true; return true;
} }
@ -72,40 +74,27 @@ bool AmendNodeCv(std::size_t amend_index, Node node) {
} // Anonymous namespace } // Anonymous namespace
std::tuple<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, const NodeBlock& code, std::pair<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, const NodeBlock& code,
s64 cursor) { s64 cursor) {
if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) { if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) {
const u32 cbuf_index = cbuf->GetIndex();
// Constant buffer found, test if it's an immediate // Constant buffer found, test if it's an immediate
const auto& offset = cbuf->GetOffset(); const auto& offset = cbuf->GetOffset();
if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) { if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) {
auto track = auto track = MakeTrackSampler<BindlessSamplerNode>(cbuf_index, immediate->GetValue());
MakeTrackSampler<BindlessSamplerNode>(cbuf->GetIndex(), immediate->GetValue());
return {tracked, track}; return {tracked, track};
} }
if (const auto operation = std::get_if<OperationNode>(&*offset)) { if (const auto operation = std::get_if<OperationNode>(&*offset)) {
const u32 bound_buffer = registry.GetBoundBuffer(); const u32 bound_buffer = registry.GetBoundBuffer();
if (bound_buffer != cbuf->GetIndex()) { if (bound_buffer != cbuf_index) {
return {};
}
const auto pair = DecoupleIndirectRead(*operation);
if (!pair) {
return {}; return {};
} }
if (const std::optional pair = DecoupleIndirectRead(*operation)) {
auto [gpr, base_offset] = *pair; auto [gpr, base_offset] = *pair;
const auto offset_inm = std::get_if<ImmediateNode>(&*base_offset); return HandleBindlessIndirectRead(*cbuf, *operation, gpr, base_offset, tracked,
const auto& gpu_driver = registry.AccessGuestDriverProfile(); code, cursor);
const u32 bindless_cv = NewCustomVariable(); }
Node op =
Operation(OperationCode::UDiv, gpr, Immediate(gpu_driver.GetTextureHandlerSize()));
const Node cv_node = GetCustomVariable(bindless_cv);
Node amend_op = Operation(OperationCode::Assign, cv_node, std::move(op));
const std::size_t amend_index = DeclareAmend(std::move(amend_op));
AmendNodeCv(amend_index, code[cursor]);
// TODO Implement Bindless Index custom variable
auto track = MakeTrackSampler<ArraySamplerNode>(cbuf->GetIndex(),
offset_inm->GetValue(), bindless_cv);
return {tracked, track};
} }
return {}; return {};
} }
@ -122,10 +111,23 @@ std::tuple<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, cons
return TrackBindlessSampler(source, code, new_cursor); return TrackBindlessSampler(source, code, new_cursor);
} }
if (const auto operation = std::get_if<OperationNode>(&*tracked)) { if (const auto operation = std::get_if<OperationNode>(&*tracked)) {
for (std::size_t i = operation->GetOperandsCount(); i > 0; --i) { const OperationNode& op = *operation;
if (auto found = TrackBindlessSampler((*operation)[i - 1], code, cursor);
std::get<0>(found)) { const OperationCode opcode = operation->GetCode();
// Cbuf found in operand. if (opcode == OperationCode::IBitwiseOr || opcode == OperationCode::UBitwiseOr) {
ASSERT(op.GetOperandsCount() == 2);
auto [node_a, index_a, offset_a] = TrackCbuf(op[0], code, cursor);
auto [node_b, index_b, offset_b] = TrackCbuf(op[1], code, cursor);
if (node_a && node_b) {
auto track = MakeTrackSampler<SeparateSamplerNode>(std::pair{index_a, index_b},
std::pair{offset_a, offset_b});
return {tracked, std::move(track)};
}
}
std::size_t i = op.GetOperandsCount();
while (i--) {
if (auto found = TrackBindlessSampler(op[i - 1], code, cursor); std::get<0>(found)) {
// Constant buffer found in operand.
return found; return found;
} }
} }
@ -139,6 +141,26 @@ std::tuple<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, cons
return {}; return {};
} }
std::pair<Node, TrackSampler> ShaderIR::HandleBindlessIndirectRead(
const CbufNode& cbuf, const OperationNode& operation, Node gpr, Node base_offset, Node tracked,
const NodeBlock& code, s64 cursor) {
const auto offset_imm = std::get<ImmediateNode>(*base_offset);
const auto& gpu_driver = registry.AccessGuestDriverProfile();
const u32 bindless_cv = NewCustomVariable();
const u32 texture_handler_size = gpu_driver.GetTextureHandlerSize();
Node op = Operation(OperationCode::UDiv, gpr, Immediate(texture_handler_size));
Node cv_node = GetCustomVariable(bindless_cv);
Node amend_op = Operation(OperationCode::Assign, std::move(cv_node), std::move(op));
const std::size_t amend_index = DeclareAmend(std::move(amend_op));
AmendNodeCv(amend_index, code[cursor]);
// TODO: Implement bindless index custom variable
auto track =
MakeTrackSampler<ArraySamplerNode>(cbuf.GetIndex(), offset_imm.GetValue(), bindless_cv);
return {tracked, track};
}
std::tuple<Node, u32, u32> ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code, std::tuple<Node, u32, u32> ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code,
s64 cursor) const { s64 cursor) const {
if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) { if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) {