mirror of
https://git.citron-emu.org/Citron/Citron.git
synced 2025-01-26 02:26:35 +01:00
shader_ir/decode: Implement AOFFI for TEX and TLD4
This commit is contained in:
parent
cf4ecc1945
commit
cb68ce7c2f
2 changed files with 94 additions and 27 deletions
|
@ -7,7 +7,9 @@
|
||||||
#include <fmt/format.h>
|
#include <fmt/format.h>
|
||||||
|
|
||||||
#include "common/assert.h"
|
#include "common/assert.h"
|
||||||
|
#include "common/bit_field.h"
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
|
#include "common/logging/log.h"
|
||||||
#include "video_core/engines/shader_bytecode.h"
|
#include "video_core/engines/shader_bytecode.h"
|
||||||
#include "video_core/shader/shader_ir.h"
|
#include "video_core/shader/shader_ir.h"
|
||||||
|
|
||||||
|
@ -41,19 +43,18 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
|
||||||
|
|
||||||
switch (opcode->get().GetId()) {
|
switch (opcode->get().GetId()) {
|
||||||
case OpCode::Id::TEX: {
|
case OpCode::Id::TEX: {
|
||||||
UNIMPLEMENTED_IF_MSG(instr.tex.UsesMiscMode(TextureMiscMode::AOFFI),
|
|
||||||
"AOFFI is not implemented");
|
|
||||||
|
|
||||||
if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) {
|
if (instr.tex.UsesMiscMode(TextureMiscMode::NODEP)) {
|
||||||
LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete");
|
LOG_WARNING(HW_GPU, "TEX.NODEP implementation is incomplete");
|
||||||
}
|
}
|
||||||
|
|
||||||
const TextureType texture_type{instr.tex.texture_type};
|
const TextureType texture_type{instr.tex.texture_type};
|
||||||
const bool is_array = instr.tex.array != 0;
|
const bool is_array = instr.tex.array != 0;
|
||||||
|
const bool is_aoffi = instr.tex.UsesMiscMode(TextureMiscMode::AOFFI);
|
||||||
const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC);
|
const bool depth_compare = instr.tex.UsesMiscMode(TextureMiscMode::DC);
|
||||||
const auto process_mode = instr.tex.GetTextureProcessMode();
|
const auto process_mode = instr.tex.GetTextureProcessMode();
|
||||||
WriteTexInstructionFloat(
|
WriteTexInstructionFloat(
|
||||||
bb, instr, GetTexCode(instr, texture_type, process_mode, depth_compare, is_array));
|
bb, instr,
|
||||||
|
GetTexCode(instr, texture_type, process_mode, depth_compare, is_array, is_aoffi));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case OpCode::Id::TEXS: {
|
case OpCode::Id::TEXS: {
|
||||||
|
@ -78,8 +79,6 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
|
||||||
}
|
}
|
||||||
case OpCode::Id::TLD4: {
|
case OpCode::Id::TLD4: {
|
||||||
ASSERT(instr.tld4.array == 0);
|
ASSERT(instr.tld4.array == 0);
|
||||||
UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI),
|
|
||||||
"AOFFI is not implemented");
|
|
||||||
UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV),
|
UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::NDV),
|
||||||
"NDV is not implemented");
|
"NDV is not implemented");
|
||||||
UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::PTP),
|
UNIMPLEMENTED_IF_MSG(instr.tld4.UsesMiscMode(TextureMiscMode::PTP),
|
||||||
|
@ -92,8 +91,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
|
||||||
const auto texture_type = instr.tld4.texture_type.Value();
|
const auto texture_type = instr.tld4.texture_type.Value();
|
||||||
const bool depth_compare = instr.tld4.UsesMiscMode(TextureMiscMode::DC);
|
const bool depth_compare = instr.tld4.UsesMiscMode(TextureMiscMode::DC);
|
||||||
const bool is_array = instr.tld4.array != 0;
|
const bool is_array = instr.tld4.array != 0;
|
||||||
WriteTexInstructionFloat(bb, instr,
|
const bool is_aoffi = instr.tld4.UsesMiscMode(TextureMiscMode::AOFFI);
|
||||||
GetTld4Code(instr, texture_type, depth_compare, is_array));
|
WriteTexInstructionFloat(
|
||||||
|
bb, instr, GetTld4Code(instr, texture_type, depth_compare, is_array, is_aoffi));
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case OpCode::Id::TLD4S: {
|
case OpCode::Id::TLD4S: {
|
||||||
|
@ -127,7 +127,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
|
||||||
Node4 values;
|
Node4 values;
|
||||||
for (u32 element = 0; element < values.size(); ++element) {
|
for (u32 element = 0; element < values.size(); ++element) {
|
||||||
auto coords_copy = coords;
|
auto coords_copy = coords;
|
||||||
MetaTexture meta{sampler, {}, {}, {}, {}, component, element};
|
MetaTexture meta{sampler, {}, {}, {}, {}, {}, component, element};
|
||||||
values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
|
values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -152,7 +152,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
|
||||||
if (!instr.txq.IsComponentEnabled(element)) {
|
if (!instr.txq.IsComponentEnabled(element)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
MetaTexture meta{sampler, {}, {}, {}, {}, {}, element};
|
MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element};
|
||||||
const Node value =
|
const Node value =
|
||||||
Operation(OperationCode::TextureQueryDimensions, meta, GetRegister(instr.gpr8));
|
Operation(OperationCode::TextureQueryDimensions, meta, GetRegister(instr.gpr8));
|
||||||
SetTemporal(bb, indexer++, value);
|
SetTemporal(bb, indexer++, value);
|
||||||
|
@ -202,7 +202,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
|
||||||
|
|
||||||
for (u32 element = 0; element < 2; ++element) {
|
for (u32 element = 0; element < 2; ++element) {
|
||||||
auto params = coords;
|
auto params = coords;
|
||||||
MetaTexture meta{sampler, {}, {}, {}, {}, {}, element};
|
MetaTexture meta{sampler, {}, {}, {}, {}, {}, {}, element};
|
||||||
const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params));
|
const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params));
|
||||||
SetTemporal(bb, element, value);
|
SetTemporal(bb, element, value);
|
||||||
}
|
}
|
||||||
|
@ -325,7 +325,8 @@ void ShaderIR::WriteTexsInstructionHalfFloat(NodeBlock& bb, Instruction instr,
|
||||||
|
|
||||||
Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
|
Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
|
||||||
TextureProcessMode process_mode, std::vector<Node> coords,
|
TextureProcessMode process_mode, std::vector<Node> coords,
|
||||||
Node array, Node depth_compare, u32 bias_offset) {
|
Node array, Node depth_compare, u32 bias_offset,
|
||||||
|
std::vector<Node> aoffi) {
|
||||||
const bool is_array = array;
|
const bool is_array = array;
|
||||||
const bool is_shadow = depth_compare;
|
const bool is_shadow = depth_compare;
|
||||||
|
|
||||||
|
@ -374,7 +375,7 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
|
||||||
Node4 values;
|
Node4 values;
|
||||||
for (u32 element = 0; element < values.size(); ++element) {
|
for (u32 element = 0; element < values.size(); ++element) {
|
||||||
auto copy_coords = coords;
|
auto copy_coords = coords;
|
||||||
MetaTexture meta{sampler, array, depth_compare, bias, lod, {}, element};
|
MetaTexture meta{sampler, array, depth_compare, aoffi, bias, lod, {}, element};
|
||||||
values[element] = Operation(read_method, meta, std::move(copy_coords));
|
values[element] = Operation(read_method, meta, std::move(copy_coords));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -382,9 +383,15 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
|
||||||
}
|
}
|
||||||
|
|
||||||
Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type,
|
Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type,
|
||||||
TextureProcessMode process_mode, bool depth_compare, bool is_array) {
|
TextureProcessMode process_mode, bool depth_compare, bool is_array,
|
||||||
const bool lod_bias_enabled =
|
bool is_aoffi) {
|
||||||
(process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ);
|
const bool lod_bias_enabled{
|
||||||
|
(process_mode != TextureProcessMode::None && process_mode != TextureProcessMode::LZ)};
|
||||||
|
|
||||||
|
u64 parameter_register = instr.gpr20.Value();
|
||||||
|
if (lod_bias_enabled) {
|
||||||
|
++parameter_register;
|
||||||
|
}
|
||||||
|
|
||||||
const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement(
|
const auto [coord_count, total_coord_count] = ValidateAndGetCoordinateElement(
|
||||||
texture_type, depth_compare, is_array, lod_bias_enabled, 4, 5);
|
texture_type, depth_compare, is_array, lod_bias_enabled, 4, 5);
|
||||||
|
@ -404,15 +411,19 @@ Node4 ShaderIR::GetTexCode(Instruction instr, TextureType texture_type,
|
||||||
|
|
||||||
const Node array = is_array ? GetRegister(array_register) : nullptr;
|
const Node array = is_array ? GetRegister(array_register) : nullptr;
|
||||||
|
|
||||||
|
std::vector<Node> aoffi;
|
||||||
|
if (is_aoffi) {
|
||||||
|
aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, false);
|
||||||
|
}
|
||||||
|
|
||||||
Node dc{};
|
Node dc{};
|
||||||
if (depth_compare) {
|
if (depth_compare) {
|
||||||
// Depth is always stored in the register signaled by gpr20 or in the next register if lod
|
// Depth is always stored in the register signaled by gpr20 or in the next register if lod
|
||||||
// or bias are used
|
// or bias are used
|
||||||
const u64 depth_register = instr.gpr20.Value() + (lod_bias_enabled ? 1 : 0);
|
dc = GetRegister(parameter_register++);
|
||||||
dc = GetRegister(depth_register);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, 0);
|
return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, 0, aoffi);
|
||||||
}
|
}
|
||||||
|
|
||||||
Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
|
Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
|
||||||
|
@ -448,11 +459,11 @@ Node4 ShaderIR::GetTexsCode(Instruction instr, TextureType texture_type,
|
||||||
dc = GetRegister(depth_register);
|
dc = GetRegister(depth_register);
|
||||||
}
|
}
|
||||||
|
|
||||||
return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset);
|
return GetTextureCode(instr, texture_type, process_mode, coords, array, dc, bias_offset, {});
|
||||||
}
|
}
|
||||||
|
|
||||||
Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare,
|
Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool depth_compare,
|
||||||
bool is_array) {
|
bool is_array, bool is_aoffi) {
|
||||||
const std::size_t coord_count = GetCoordCount(texture_type);
|
const std::size_t coord_count = GetCoordCount(texture_type);
|
||||||
const std::size_t total_coord_count = coord_count + (is_array ? 1 : 0);
|
const std::size_t total_coord_count = coord_count + (is_array ? 1 : 0);
|
||||||
const std::size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0);
|
const std::size_t total_reg_count = total_coord_count + (depth_compare ? 1 : 0);
|
||||||
|
@ -463,15 +474,27 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de
|
||||||
const u64 coord_register = array_register + (is_array ? 1 : 0);
|
const u64 coord_register = array_register + (is_array ? 1 : 0);
|
||||||
|
|
||||||
std::vector<Node> coords;
|
std::vector<Node> coords;
|
||||||
for (size_t i = 0; i < coord_count; ++i)
|
for (std::size_t i = 0; i < coord_count; ++i) {
|
||||||
coords.push_back(GetRegister(coord_register + i));
|
coords.push_back(GetRegister(coord_register + i));
|
||||||
|
}
|
||||||
|
|
||||||
|
u64 parameter_register = instr.gpr20.Value();
|
||||||
|
std::vector<Node> aoffi;
|
||||||
|
if (is_aoffi) {
|
||||||
|
aoffi = GetAoffiCoordinates(GetRegister(parameter_register++), coord_count, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
Node dc{};
|
||||||
|
if (depth_compare) {
|
||||||
|
dc = GetRegister(parameter_register++);
|
||||||
|
}
|
||||||
|
|
||||||
const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare);
|
const auto& sampler = GetSampler(instr.sampler, texture_type, is_array, depth_compare);
|
||||||
|
|
||||||
Node4 values;
|
Node4 values;
|
||||||
for (u32 element = 0; element < values.size(); ++element) {
|
for (u32 element = 0; element < values.size(); ++element) {
|
||||||
auto coords_copy = coords;
|
auto coords_copy = coords;
|
||||||
MetaTexture meta{sampler, GetRegister(array_register), {}, {}, {}, {}, element};
|
MetaTexture meta{sampler, GetRegister(array_register), dc, aoffi, {}, {}, {}, element};
|
||||||
values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
|
values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -507,7 +530,7 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is
|
||||||
Node4 values;
|
Node4 values;
|
||||||
for (u32 element = 0; element < values.size(); ++element) {
|
for (u32 element = 0; element < values.size(); ++element) {
|
||||||
auto coords_copy = coords;
|
auto coords_copy = coords;
|
||||||
MetaTexture meta{sampler, array, {}, {}, lod, {}, element};
|
MetaTexture meta{sampler, array, {}, {}, {}, lod, {}, element};
|
||||||
values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
|
values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
|
||||||
}
|
}
|
||||||
return values;
|
return values;
|
||||||
|
@ -531,4 +554,45 @@ std::tuple<std::size_t, std::size_t> ShaderIR::ValidateAndGetCoordinateElement(
|
||||||
return {coord_count, total_coord_count};
|
return {coord_count, total_coord_count};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::vector<Node> ShaderIR::GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count,
|
||||||
|
bool is_tld4) {
|
||||||
|
const auto [coord_offsets, size, wrap_value,
|
||||||
|
diff_value] = [is_tld4]() -> std::tuple<std::array<u32, 3>, u32, s32, s32> {
|
||||||
|
if (is_tld4) {
|
||||||
|
return {{0, 8, 16}, 6, 32, 64};
|
||||||
|
} else {
|
||||||
|
return {{0, 4, 8}, 4, 8, 16};
|
||||||
|
}
|
||||||
|
}();
|
||||||
|
const u32 mask = (1 << size) - 1;
|
||||||
|
|
||||||
|
std::vector<Node> aoffi;
|
||||||
|
aoffi.reserve(coord_count);
|
||||||
|
|
||||||
|
const auto aoffi_immediate{
|
||||||
|
TrackImmediate(aoffi_reg, global_code, static_cast<s64>(global_code.size()))};
|
||||||
|
if (!aoffi_immediate) {
|
||||||
|
// Variable access, not supported on AMD.
|
||||||
|
LOG_WARNING(HW_GPU,
|
||||||
|
"AOFFI constant folding failed, some hardware might have graphical issues");
|
||||||
|
for (std::size_t coord = 0; coord < coord_count; ++coord) {
|
||||||
|
const Node value = BitfieldExtract(aoffi_reg, coord_offsets.at(coord), size);
|
||||||
|
const Node condition =
|
||||||
|
Operation(OperationCode::LogicalIGreaterEqual, value, Immediate(wrap_value));
|
||||||
|
const Node negative = Operation(OperationCode::IAdd, value, Immediate(-diff_value));
|
||||||
|
aoffi.push_back(Operation(OperationCode::Select, condition, negative, value));
|
||||||
|
}
|
||||||
|
return aoffi;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (std::size_t coord = 0; coord < coord_count; ++coord) {
|
||||||
|
s32 value = (*aoffi_immediate >> coord_offsets.at(coord)) & mask;
|
||||||
|
if (value >= wrap_value) {
|
||||||
|
value -= diff_value;
|
||||||
|
}
|
||||||
|
aoffi.push_back(Immediate(value));
|
||||||
|
}
|
||||||
|
return aoffi;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace VideoCommon::Shader
|
} // namespace VideoCommon::Shader
|
|
@ -291,6 +291,7 @@ struct MetaTexture {
|
||||||
const Sampler& sampler;
|
const Sampler& sampler;
|
||||||
Node array{};
|
Node array{};
|
||||||
Node depth_compare{};
|
Node depth_compare{};
|
||||||
|
std::vector<Node> aoffi;
|
||||||
Node bias{};
|
Node bias{};
|
||||||
Node lod{};
|
Node lod{};
|
||||||
Node component{};
|
Node component{};
|
||||||
|
@ -742,14 +743,14 @@ private:
|
||||||
|
|
||||||
Node4 GetTexCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
|
Node4 GetTexCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
|
||||||
Tegra::Shader::TextureProcessMode process_mode, bool depth_compare,
|
Tegra::Shader::TextureProcessMode process_mode, bool depth_compare,
|
||||||
bool is_array);
|
bool is_array, bool is_aoffi);
|
||||||
|
|
||||||
Node4 GetTexsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
|
Node4 GetTexsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
|
||||||
Tegra::Shader::TextureProcessMode process_mode, bool depth_compare,
|
Tegra::Shader::TextureProcessMode process_mode, bool depth_compare,
|
||||||
bool is_array);
|
bool is_array);
|
||||||
|
|
||||||
Node4 GetTld4Code(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
|
Node4 GetTld4Code(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
|
||||||
bool depth_compare, bool is_array);
|
bool depth_compare, bool is_array, bool is_aoffi);
|
||||||
|
|
||||||
Node4 GetTldsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
|
Node4 GetTldsCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
|
||||||
bool is_array);
|
bool is_array);
|
||||||
|
@ -758,9 +759,11 @@ private:
|
||||||
Tegra::Shader::TextureType texture_type, bool depth_compare, bool is_array,
|
Tegra::Shader::TextureType texture_type, bool depth_compare, bool is_array,
|
||||||
bool lod_bias_enabled, std::size_t max_coords, std::size_t max_inputs);
|
bool lod_bias_enabled, std::size_t max_coords, std::size_t max_inputs);
|
||||||
|
|
||||||
|
std::vector<Node> GetAoffiCoordinates(Node aoffi_reg, std::size_t coord_count, bool is_tld4);
|
||||||
|
|
||||||
Node4 GetTextureCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
|
Node4 GetTextureCode(Tegra::Shader::Instruction instr, Tegra::Shader::TextureType texture_type,
|
||||||
Tegra::Shader::TextureProcessMode process_mode, std::vector<Node> coords,
|
Tegra::Shader::TextureProcessMode process_mode, std::vector<Node> coords,
|
||||||
Node array, Node depth_compare, u32 bias_offset);
|
Node array, Node depth_compare, u32 bias_offset, std::vector<Node> aoffi);
|
||||||
|
|
||||||
Node GetVideoOperand(Node op, bool is_chunk, bool is_signed, Tegra::Shader::VideoType type,
|
Node GetVideoOperand(Node op, bool is_chunk, bool is_signed, Tegra::Shader::VideoType type,
|
||||||
u64 byte_height);
|
u64 byte_height);
|
||||||
|
|
Loading…
Reference in a new issue