Merge pull request #2348 from FernandoS27/guest-bindless

Implement Bindless Textures on Shader Decompiler and GL backend
This commit is contained in:
bunnei 2019-04-17 20:59:49 -04:00 committed by GitHub
commit 5bd5140bde
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 217 additions and 44 deletions

View file

@ -482,19 +482,8 @@ std::vector<Texture::FullTextureInfo> Maxwell3D::GetStageTextures(Regs::ShaderSt
return textures;
}
Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage,
Texture::FullTextureInfo Maxwell3D::GetTextureInfo(const Texture::TextureHandle tex_handle,
std::size_t offset) const {
auto& shader = state.shader_stages[static_cast<std::size_t>(stage)];
auto& tex_info_buffer = shader.const_buffers[regs.tex_cb_index];
ASSERT(tex_info_buffer.enabled && tex_info_buffer.address != 0);
const GPUVAddr tex_info_address =
tex_info_buffer.address + offset * sizeof(Texture::TextureHandle);
ASSERT(tex_info_address < tex_info_buffer.address + tex_info_buffer.size);
const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)};
Texture::FullTextureInfo tex_info{};
tex_info.index = static_cast<u32>(offset);
@ -511,6 +500,22 @@ Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage,
return tex_info;
}
Texture::FullTextureInfo Maxwell3D::GetStageTexture(Regs::ShaderStage stage,
std::size_t offset) const {
const auto& shader = state.shader_stages[static_cast<std::size_t>(stage)];
const auto& tex_info_buffer = shader.const_buffers[regs.tex_cb_index];
ASSERT(tex_info_buffer.enabled && tex_info_buffer.address != 0);
const GPUVAddr tex_info_address =
tex_info_buffer.address + offset * sizeof(Texture::TextureHandle);
ASSERT(tex_info_address < tex_info_buffer.address + tex_info_buffer.size);
const Texture::TextureHandle tex_handle{memory_manager.Read<u32>(tex_info_address)};
return GetTextureInfo(tex_handle, offset);
}
u32 Maxwell3D::GetRegisterValue(u32 method) const {
ASSERT_MSG(method < Regs::NUM_REGS, "Invalid Maxwell3D register");
return regs.reg_array[method];
@ -524,4 +529,12 @@ void Maxwell3D::ProcessClearBuffers() {
rasterizer.Clear();
}
u32 Maxwell3D::AccessConstBuffer32(Regs::ShaderStage stage, u64 const_buffer, u64 offset) const {
const auto& shader_stage = state.shader_stages[static_cast<std::size_t>(stage)];
const auto& buffer = shader_stage.const_buffers[const_buffer];
u32 result;
std::memcpy(&result, memory_manager.GetPointer(buffer.address + offset), sizeof(u32));
return result;
}
} // namespace Tegra::Engines

View file

@ -1131,12 +1131,18 @@ public:
/// Write the value to the register identified by method.
void CallMethod(const GPU::MethodCall& method_call);
/// Given a Texture Handle, returns the TSC and TIC entries.
Texture::FullTextureInfo GetTextureInfo(const Texture::TextureHandle tex_handle,
std::size_t offset) const;
/// Returns a list of enabled textures for the specified shader stage.
std::vector<Texture::FullTextureInfo> GetStageTextures(Regs::ShaderStage stage) const;
/// Returns the texture information for a specific texture in a specific shader stage.
Texture::FullTextureInfo GetStageTexture(Regs::ShaderStage stage, std::size_t offset) const;
u32 AccessConstBuffer32(Regs::ShaderStage stage, u64 const_buffer, u64 offset) const;
/// Memory for macro code - it's undetermined how big this is, however 1MB is much larger than
/// we've seen used.
using MacroMemory = std::array<u32, 0x40000>;

View file

@ -986,6 +986,38 @@ union Instruction {
}
} tex;
union {
BitField<28, 1, u64> array;
BitField<29, 2, TextureType> texture_type;
BitField<31, 4, u64> component_mask;
BitField<49, 1, u64> nodep_flag;
BitField<50, 1, u64> dc_flag;
BitField<36, 1, u64> aoffi_flag;
BitField<37, 3, TextureProcessMode> process_mode;
bool IsComponentEnabled(std::size_t component) const {
return ((1ULL << component) & component_mask) != 0;
}
TextureProcessMode GetTextureProcessMode() const {
return process_mode;
}
bool UsesMiscMode(TextureMiscMode mode) const {
switch (mode) {
case TextureMiscMode::DC:
return dc_flag != 0;
case TextureMiscMode::NODEP:
return nodep_flag != 0;
case TextureMiscMode::AOFFI:
return aoffi_flag != 0;
default:
break;
}
return false;
}
} tex_b;
union {
BitField<22, 6, TextureQueryType> query_type;
BitField<31, 4, u64> component_mask;
@ -1332,7 +1364,9 @@ public:
LDG, // Load from global memory
STG, // Store in global memory
TEX,
TEX_B, // Texture Load Bindless
TXQ, // Texture Query
TXQ_B, // Texture Query Bindless
TEXS, // Texture Fetch with scalar/non-vec4 source/destinations
TLDS, // Texture Load with scalar/non-vec4 source/destinations
TLD4, // Texture Load 4
@ -1600,7 +1634,9 @@ private:
INST("1110111011010---", Id::LDG, Type::Memory, "LDG"),
INST("1110111011011---", Id::STG, Type::Memory, "STG"),
INST("110000----111---", Id::TEX, Type::Texture, "TEX"),
INST("1101111010111---", Id::TEX_B, Type::Texture, "TEX_B"),
INST("1101111101001---", Id::TXQ, Type::Texture, "TXQ"),
INST("1101111101010---", Id::TXQ_B, Type::Texture, "TXQ_B"),
INST("1101-00---------", Id::TEXS, Type::Texture, "TEXS"),
INST("1101101---------", Id::TLDS, Type::Texture, "TLDS"),
INST("110010----111---", Id::TLD4, Type::Texture, "TLD4"),

View file

@ -974,7 +974,15 @@ void RasterizerOpenGL::SetupTextures(Maxwell::ShaderStage stage, const Shader& s
for (u32 bindpoint = 0; bindpoint < entries.size(); ++bindpoint) {
const auto& entry = entries[bindpoint];
const auto texture = maxwell3d.GetStageTexture(stage, entry.GetOffset());
Tegra::Texture::FullTextureInfo texture;
if (entry.IsBindless()) {
const auto cbuf = entry.GetBindlessCBuf();
Tegra::Texture::TextureHandle tex_handle;
tex_handle.raw = maxwell3d.AccessConstBuffer32(stage, cbuf.first, cbuf.second);
texture = maxwell3d.GetTextureInfo(tex_handle, entry.GetOffset());
} else {
texture = maxwell3d.GetStageTexture(stage, entry.GetOffset());
}
const u32 current_bindpoint = base_bindings.sampler + bindpoint;
texture_samplers[current_bindpoint].SyncWithConfig(texture.tsc);

View file

@ -69,6 +69,7 @@ private:
struct ShaderEntries {
std::vector<ConstBufferEntry> const_buffers;
std::vector<SamplerEntry> samplers;
std::vector<SamplerEntry> bindless_samplers;
std::vector<GlobalMemoryEntry> global_memory_entries;
std::array<bool, Maxwell::NumClipDistances> clip_distances{};
std::size_t shader_length{};

View file

@ -319,16 +319,19 @@ std::optional<ShaderDiskCacheDecompiled> ShaderDiskCacheOpenGL::LoadDecompiledEn
u32 type{};
u8 is_array{};
u8 is_shadow{};
u8 is_bindless{};
if (file.ReadBytes(&offset, sizeof(u64)) != sizeof(u64) ||
file.ReadBytes(&index, sizeof(u64)) != sizeof(u64) ||
file.ReadBytes(&type, sizeof(u32)) != sizeof(u32) ||
file.ReadBytes(&is_array, sizeof(u8)) != sizeof(u8) ||
file.ReadBytes(&is_shadow, sizeof(u8)) != sizeof(u8)) {
file.ReadBytes(&is_shadow, sizeof(u8)) != sizeof(u8) ||
file.ReadBytes(&is_bindless, sizeof(u8)) != sizeof(u8)) {
return {};
}
entry.entries.samplers.emplace_back(
static_cast<std::size_t>(offset), static_cast<std::size_t>(index),
static_cast<Tegra::Shader::TextureType>(type), is_array != 0, is_shadow != 0);
entry.entries.samplers.emplace_back(static_cast<std::size_t>(offset),
static_cast<std::size_t>(index),
static_cast<Tegra::Shader::TextureType>(type),
is_array != 0, is_shadow != 0, is_bindless != 0);
}
u32 global_memory_count{};
@ -393,7 +396,8 @@ bool ShaderDiskCacheOpenGL::SaveDecompiledFile(FileUtil::IOFile& file, u64 uniqu
file.WriteObject(static_cast<u64>(sampler.GetIndex())) != 1 ||
file.WriteObject(static_cast<u32>(sampler.GetType())) != 1 ||
file.WriteObject(static_cast<u8>(sampler.IsArray() ? 1 : 0)) != 1 ||
file.WriteObject(static_cast<u8>(sampler.IsShadow() ? 1 : 0)) != 1) {
file.WriteObject(static_cast<u8>(sampler.IsShadow() ? 1 : 0)) != 1 ||
file.WriteObject(static_cast<u8>(sampler.IsBindless() ? 1 : 0)) != 1) {
return false;
}
}

View file

@ -40,7 +40,7 @@ static std::size_t GetCoordCount(TextureType texture_type) {
u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
const Instruction instr = {program_code[pc]};
const auto opcode = OpCode::Decode(instr);
bool is_bindless = false;
switch (opcode->get().GetId()) {
case OpCode::Id::TEX: {
if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) {
@ -54,7 +54,25 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
const auto process_mode = instr.tex.GetTextureProcessMode();
WriteTexInstructionFloat(
bb, instr,
GetTexCode(instr, texture_type, process_mode, depth_compare, is_array, is_aoffi));
GetTexCode(instr, texture_type, process_mode, depth_compare, is_array, is_aoffi, {}));
break;
}
case OpCode::Id::TEX_B: {
UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI),
"AOFFI is not implemented");
if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) {
LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete");
}
const TextureType texture_type{instr.tex_b.texture_type};
const bool is_array = instr.tex_b.array != 0;
const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI);
const bool depth_compare = instr.tex_b.UsesMiscMode(TextureMiscMode::DC);
const auto process_mode = instr.tex_b.GetTextureProcessMode();
WriteTexInstructionFloat(bb, instr,
GetTexCode(instr, texture_type, process_mode, depth_compare,
is_array, is_aoffi, {instr.gpr20}));
break;
}
case OpCode::Id::TEXS: {
@ -134,6 +152,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
WriteTexsInstructionFloat(bb, instr, values);
break;
}
case OpCode::Id::TXQ_B:
is_bindless = true;
[[fallthrough]];
case OpCode::Id::TXQ: {
if (instr.txq.UsesMiscMode(TextureMiscMode::NODEP)) {
LOG_WARNING(HW_GPU, "TXQ.NODEP implementation is incomplete");
@ -143,7 +164,10 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
// Sadly, not all texture instructions specify the type of texture their sampler
// uses. This must be fixed at a later instance.
const auto& sampler =
GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false);
is_bindless
? GetBindlessSampler(instr.gpr8, Tegra::Shader::TextureType::Texture2D, false,
false)
: GetSampler(instr.sampler, Tegra::Shader::TextureType::Texture2D, false, false);
u32 indexer = 0;
switch (instr.txq.query_type) {
@ -154,7 +178,8 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
}
MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element};
const Node value =
Operation(OperationCode::TextureQueryDimensions, meta, GetRegister(instr.gpr8));
Operation(OperationCode::TextureQueryDimensions, meta,
GetRegister(instr.gpr8.Value() + (is_bindless ? 1 : 0)));
SetTemporal(bb, indexer++, value);
}
for (u32 i = 0; i < indexer; ++i) {
@ -168,6 +193,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
}
break;
}
case OpCode::Id::TMML_B:
is_bindless = true;
[[fallthrough]];
case OpCode::Id::TMML: {
UNIMPLEMENTED_IF_MSG(instr.tmml.UsesMiscMode(Tegra::Shader::TextureMiscMode::NDV),
"NDV is not implemented");
@ -178,7 +206,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
auto texture_type = instr.tmml.texture_type.Value();
const bool is_array = instr.tmml.array != 0;
const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, false);
const auto& sampler = is_bindless
? GetBindlessSampler(instr.gpr20, texture_type, is_array, false)
: GetSampler(instr.sampler, texture_type, is_array, false);
std::vector<Node> coords;
@ -199,17 +229,19 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
coords.push_back(GetRegister(instr.gpr8.Value() + 1));
texture_type = TextureType::Texture2D;
}
u32 indexer = 0;
for (u32 element = 0; element < 2; ++element) {
if (!instr.tmml.IsComponentEnabled(element)) {
continue;
}
auto params = coords;
MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element};
const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params));
SetTemporal(bb, element, value);
SetTemporal(bb, indexer++, value);
}
for (u32 element = 0; element < 2; ++element) {
SetRegister(bb, instr.gpr0.Value() + element, GetTemporal(element));
for (u32 i = 0; i < indexer; ++i) {
SetRegister(bb, instr.gpr0.Value() + i, GetTemporal(i));
}
break;
}
case OpCode::Id::TLDS: {
@ -254,6 +286,34 @@ const Sampler& ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler, Textu
return *used_samplers.emplace(entry).first;
}
const Sampler& ShaderIR::GetBindlessSampler(const Tegra::Shader::Register& reg, TextureType type,
bool is_array, bool is_shadow) {
const Node sampler_register = GetRegister(reg);
const Node base_sampler =
TrackCbuf(sampler_register, global_code, static_cast<s64>(global_code.size()));
const auto cbuf = std::get_if<CbufNode>(base_sampler);
const auto cbuf_offset_imm = std::get_if<ImmediateNode>(cbuf->GetOffset());
ASSERT(cbuf_offset_imm != nullptr);
const auto cbuf_offset = cbuf_offset_imm->GetValue();
const auto cbuf_index = cbuf->GetIndex();
const u64 cbuf_key = (cbuf_index << 32) | cbuf_offset;
// If this sampler has already been used, return the existing mapping.
const auto itr =
std::find_if(used_samplers.begin(), used_samplers.end(),
[&](const Sampler& entry) { return entry.GetOffset() == cbuf_key; });
if (itr != used_samplers.end()) {
ASSERT(itr->GetType() == type && itr->IsArray() == is_array &&
itr->IsShadow() == is_shadow);
return *itr;
}
// Otherwise create a new mapping for this sampler
const std::size_t next_index = used_samplers.size();
const Sampler entry{cbuf_index, cbuf_offset, next_index, type, is_array, is_shadow};
return *used_samplers.emplace(entry).first;
}
void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) {
u32 dest_elem = 0;
for (u32 elem = 0; elem < 4; ++elem) {
@ -326,22 +386,27 @@ void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr,
Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
TextureProcessMode process_mode, std::vector<Node> coords,
Node array, Node depth_compare, u32 bias_offset,
std::vector<Node> aoffi) {
std::vector<Node> aoffi,
std::optional<Tegra::Shader::Register> bindless_reg) {
const bool is_array = array;
const bool is_shadow = depth_compare;
const bool is_bindless = bindless_reg.has_value();
UNIMPLEMENTED_IF_MSG((texture_type == TextureType::Texture3D && (is_array || is_shadow)) ||
(texture_type == TextureType::TextureCube && is_array && is_shadow),
"This method is not supported.");
const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, is_shadow);
const auto& sampler = is_bindless
? GetBindlessSampler(*bindless_reg, texture_type, is_array, is_shadow)
: GetSampler(instr.sampler, texture_type, is_array, is_shadow);
const bool lod_needed = process_mode == TextureProcessMode::LZ ||
process_mode == TextureProcessMode::LL ||
process_mode == TextureProcessMode::LLA;
// LOD selection (either via bias or explicit textureLod) not supported in GL for
// sampler2DArrayShadow and samplerCubeArrayShadow.
// LOD selection (either via bias or explicit textureLod) not
// supported in GL for sampler2DArrayShadow and
// samplerCubeArrayShadow.
const bool gl_lod_supported =
!((texture_type == Tegra::Shader::TextureType::Texture2D && is_array && is_shadow) ||
(texture_type == Tegra::Shader::TextureType::TextureCube && is_array && is_shadow));
@ -359,8 +424,9 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
lod = Immediate(0.0f);
break;
case TextureProcessMode::LB:
// If present, lod or bias are always stored in the register indexed by the gpr20
// field with an offset depending on the usage of the other registers
// If present, lod or bias are always stored in the register
// indexed by the gpr20 field with an offset depending on the
// usage of the other registers
bias = GetRegister(instr.gpr20.Value() + bias_offset);
break;
case TextureProcessMode::LL:
@ -384,11 +450,18 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type,
TextureProcessMode process_mode, bool depth_compare, bool is_array,
bool is_aoffi) {
bool is_aoffi, std::optional<Tegra::Shader::Register> bindless_reg) {
const bool lod_bias_enabled{
(process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ)};
const bool is_bindless = bindless_reg.has_value();
u64 parameter_register = instr.gpr20.Value();
if (is_bindless) {
++parameter_register;
}
const u32 bias_lod_offset = (is_bindless ? 1 : 0);
if (lod_bias_enabled) {
++parameter_register;
}
@ -423,7 +496,8 @@ Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type,
dc = GetRegister(parameter_register++);
}
return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, 0, aoffi);
return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_lod_offset,
aoffi, bindless_reg);
}
Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
@ -459,7 +533,8 @@ Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
dc = GetRegister(depth_register);
}
return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset, {});
return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset, {},
{});
}
Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare,

View file

@ -196,9 +196,23 @@ enum class ExitMethod {
class Sampler {
public:
// Use this constructor for bounded Samplers
explicit Sampler(std::size_t offset, std::size_t index, Tegra::Shader::TextureType type,
bool is_array, bool is_shadow)
: offset{offset}, index{index}, type{type}, is_array{is_array}, is_shadow{is_shadow} {}
: offset{offset}, index{index}, type{type}, is_array{is_array}, is_shadow{is_shadow},
is_bindless{false} {}
// Use this constructor for bindless Samplers
explicit Sampler(u32 cbuf_index, u32 cbuf_offset, std::size_t index,
Tegra::Shader::TextureType type, bool is_array, bool is_shadow)
: offset{(static_cast<u64>(cbuf_index) << 32) | cbuf_offset}, index{index}, type{type},
is_array{is_array}, is_shadow{is_shadow}, is_bindless{true} {}
// Use this only for serialization/deserialization
explicit Sampler(std::size_t offset, std::size_t index, Tegra::Shader::TextureType type,
bool is_array, bool is_shadow, bool is_bindless)
: offset{offset}, index{index}, type{type}, is_array{is_array}, is_shadow{is_shadow},
is_bindless{is_bindless} {}
std::size_t GetOffset() const {
return offset;
@ -220,6 +234,14 @@ public:
return is_shadow;
}
bool IsBindless() const {
return is_bindless;
}
std::pair<u32, u32> GetBindlessCBuf() const {
return {static_cast<u32>(offset >> 32), static_cast<u32>(offset)};
}
bool operator<(const Sampler& rhs) const {
return std::tie(offset, index, type, is_array, is_shadow) <
std::tie(rhs.offset, rhs.index, rhs.type, rhs.is_array, rhs.is_shadow);
@ -233,6 +255,7 @@ private:
Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc)
bool is_array{}; ///< Whether the texture is being sampled as an array texture or not.
bool is_shadow{}; ///< Whether the texture is being sampled as a depth texture or not.
bool is_bindless{}; ///< Whether this sampler belongs to a bindless texture or not.
};
class ConstBuffer {
@ -735,6 +758,11 @@ private:
const Sampler& GetSampler(const Tegra::Shader::Sampler& sampler,
Tegra::Shader::TextureType type, bool is_array, bool is_shadow);
// Accesses a texture sampler for a bindless texture.
const Sampler& GetBindlessSampler(const Tegra::Shader::Register& reg,
Tegra::Shader::TextureType type, bool is_array,
bool is_shadow);
/// Extracts a sequence of bits from a node
Node BitfieldExtract(Node value, u32 offset, u32 bits);
@ -748,7 +776,8 @@ private:
Node4 GetTexCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
Tegra::Shader::TextureProcessMode process_mode, bool depth_compare,
bool is_array, bool is_aoffi);
bool is_array, bool is_aoffi,
std::optional<Tegra::Shader::Register> bindless_reg);
Node4 GetTexsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
Tegra::Shader::TextureProcessMode process_mode, bool depth_compare,
@ -768,7 +797,8 @@ private:
Node4 GetTextureCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
Tegra::Shader::TextureProcessMode process_mode, std::vector<Node> coords,
Node array, Node depth_compare, u32 bias_offset, std::vector<Node> aoffi);
Node array, Node depth_compare, u32 bias_offset, std::vector<Node> aoffi,
std::optional<Tegra::Shader::Register> bindless_reg);
Node GetVideoOperand(Node op, bool is_chunk, bool is_signed, Tegra::Shader::VideoType type,
u64 byte_height);