mirror of
https://github.com/Ryujinx/Ryujinx.git
synced 2025-01-22 23:06:18 +01:00
GPU: Add fallback when textureGatherOffsets is not supported (#5792)
* GPU: Add fallback when textureGatherOffsets is not supported. This PR adds a fallback for GPUs or APIs that don't support an equivalent to the method `textureGatherOffsets`, where each of the 4 gathered texels has an individual offset. This is done by reusing the existing code to handle non-const offsets for texture instructions, though it has also been corrected as there were a few implementation issues. MoltenVK reports support for this capability, and it didn't error when we initially released the MacOS build, but that has since changed. MVK still reports support, but spirv-cross has been fixed in a way that it _attempts_ to use this capability, but the metal compiler errors since it doesn't exist. Some other fixes: - textureGatherOffsets emulation has been changed significantly. It now uses 4 texture sample instructions (not gather), calculates a base texel (i=0 j=0) and adds the offsets onto it before converting into a tex coord. The final result is offset into a texel center, so it shouldn't be subject to interpolation, though this isn't perfect and could have some error with floating point formats with linear sampling. It is subject to texture wrap mode as it should be, which is why texelFetch was not used. - Maybe gather should be used here with component `w` (i=0, j=0), though this multiplies number of texels fetched by 4... The way it was doing this before _was_ wrong_, but doing it right would avoid issues with texel center precision. - textureGatherOffset (singular) now performs textureGather with the offset applied to the coords, rather than the slower fallback where each texel is fetched individually. * Increment shader cache version, remove unused arg * Use base texture size for gather coord offset. Implicit LOD for gather is not supported. * Use 4 texture gathers for offsets emulation Avoids issues with interpolation at cost of performance (not sure how bad this is) * Address Feedback
This commit is contained in:
parent
28dd7d80af
commit
76b53e018a
7 changed files with 71 additions and 11 deletions
|
@ -38,6 +38,7 @@ namespace Ryujinx.Graphics.GAL
|
|||
public readonly bool SupportsShaderBallot;
|
||||
public readonly bool SupportsShaderBarrierDivergence;
|
||||
public readonly bool SupportsShaderFloat64;
|
||||
public readonly bool SupportsTextureGatherOffsets;
|
||||
public readonly bool SupportsTextureShadowLod;
|
||||
public readonly bool SupportsVertexStoreAndAtomics;
|
||||
public readonly bool SupportsViewportIndexVertexTessellation;
|
||||
|
@ -92,6 +93,7 @@ namespace Ryujinx.Graphics.GAL
|
|||
bool supportsShaderBallot,
|
||||
bool supportsShaderBarrierDivergence,
|
||||
bool supportsShaderFloat64,
|
||||
bool supportsTextureGatherOffsets,
|
||||
bool supportsTextureShadowLod,
|
||||
bool supportsVertexStoreAndAtomics,
|
||||
bool supportsViewportIndexVertexTessellation,
|
||||
|
@ -142,6 +144,7 @@ namespace Ryujinx.Graphics.GAL
|
|||
SupportsShaderBallot = supportsShaderBallot;
|
||||
SupportsShaderBarrierDivergence = supportsShaderBarrierDivergence;
|
||||
SupportsShaderFloat64 = supportsShaderFloat64;
|
||||
SupportsTextureGatherOffsets = supportsTextureGatherOffsets;
|
||||
SupportsTextureShadowLod = supportsTextureShadowLod;
|
||||
SupportsVertexStoreAndAtomics = supportsVertexStoreAndAtomics;
|
||||
SupportsViewportIndexVertexTessellation = supportsViewportIndexVertexTessellation;
|
||||
|
|
|
@ -22,7 +22,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.DiskCache
|
|||
private const ushort FileFormatVersionMajor = 1;
|
||||
private const ushort FileFormatVersionMinor = 2;
|
||||
private const uint FileFormatVersionPacked = ((uint)FileFormatVersionMajor << 16) | FileFormatVersionMinor;
|
||||
private const uint CodeGenVersion = 5767;
|
||||
private const uint CodeGenVersion = 5791;
|
||||
|
||||
private const string SharedTocFileName = "shared.toc";
|
||||
private const string SharedDataFileName = "shared.data";
|
||||
|
|
|
@ -186,6 +186,8 @@ namespace Ryujinx.Graphics.Gpu.Shader
|
|||
|
||||
public bool QueryHostSupportsSnormBufferTextureFormat() => _context.Capabilities.SupportsSnormBufferTextureFormat;
|
||||
|
||||
public bool QueryHostSupportsTextureGatherOffsets() => _context.Capabilities.SupportsTextureGatherOffsets;
|
||||
|
||||
public bool QueryHostSupportsTextureShadowLod() => _context.Capabilities.SupportsTextureShadowLod;
|
||||
|
||||
public bool QueryHostSupportsTransformFeedback() => _context.Capabilities.SupportsTransformFeedback;
|
||||
|
|
|
@ -163,6 +163,7 @@ namespace Ryujinx.Graphics.OpenGL
|
|||
supportsShaderBallot: HwCapabilities.SupportsShaderBallot,
|
||||
supportsShaderBarrierDivergence: !(intelWindows || intelUnix),
|
||||
supportsShaderFloat64: true,
|
||||
supportsTextureGatherOffsets: true,
|
||||
supportsTextureShadowLod: HwCapabilities.SupportsTextureShadowLod,
|
||||
supportsVertexStoreAndAtomics: true,
|
||||
supportsViewportIndexVertexTessellation: HwCapabilities.SupportsShaderViewportLayerArray,
|
||||
|
|
|
@ -339,6 +339,15 @@ namespace Ryujinx.Graphics.Shader
|
|||
return true;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Queries host GPU texture gather with multiple offsets support.
|
||||
/// </summary>
|
||||
/// <returns>True if the GPU and driver supports texture gather offsets, false otherwise</returns>
|
||||
bool QueryHostSupportsTextureGatherOffsets()
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Queries host GPU texture shadow LOD support.
|
||||
/// </summary>
|
||||
|
|
|
@ -303,7 +303,9 @@ namespace Ryujinx.Graphics.Shader.Translation.Transforms
|
|||
bool hasOffset = (texOp.Flags & TextureFlags.Offset) != 0;
|
||||
bool hasOffsets = (texOp.Flags & TextureFlags.Offsets) != 0;
|
||||
|
||||
bool hasInvalidOffset = (hasOffset || hasOffsets) && !gpuAccessor.QueryHostSupportsNonConstantTextureOffset();
|
||||
bool needsOffsetsEmulation = hasOffsets && !gpuAccessor.QueryHostSupportsTextureGatherOffsets();
|
||||
|
||||
bool hasInvalidOffset = needsOffsetsEmulation || ((hasOffset || hasOffsets) && !gpuAccessor.QueryHostSupportsNonConstantTextureOffset());
|
||||
|
||||
bool isBindless = (texOp.Flags & TextureFlags.Bindless) != 0;
|
||||
|
||||
|
@ -402,11 +404,14 @@ namespace Ryujinx.Graphics.Shader.Translation.Transforms
|
|||
offsets[index] = offset;
|
||||
}
|
||||
|
||||
hasInvalidOffset &= !areAllOffsetsConstant;
|
||||
|
||||
if (!hasInvalidOffset)
|
||||
if (!needsOffsetsEmulation)
|
||||
{
|
||||
return node;
|
||||
hasInvalidOffset &= !areAllOffsetsConstant;
|
||||
|
||||
if (!hasInvalidOffset)
|
||||
{
|
||||
return node;
|
||||
}
|
||||
}
|
||||
|
||||
if (hasLodBias)
|
||||
|
@ -434,13 +439,13 @@ namespace Ryujinx.Graphics.Shader.Translation.Transforms
|
|||
|
||||
LinkedListNode<INode> oldNode = node;
|
||||
|
||||
if (isGather && !isShadow)
|
||||
if (isGather && !isShadow && hasOffsets)
|
||||
{
|
||||
Operand[] newSources = new Operand[sources.Length];
|
||||
|
||||
sources.CopyTo(newSources, 0);
|
||||
|
||||
Operand[] texSizes = InsertTextureLod(node, texOp, lodSources, bindlessHandle, coordsCount, stage);
|
||||
Operand[] texSizes = InsertTextureBaseSize(node, texOp, bindlessHandle, coordsCount);
|
||||
|
||||
int destIndex = 0;
|
||||
|
||||
|
@ -455,7 +460,7 @@ namespace Ryujinx.Graphics.Shader.Translation.Transforms
|
|||
{
|
||||
Operand offset = Local();
|
||||
|
||||
Operand intOffset = offsets[index + (hasOffsets ? compIndex * coordsCount : 0)];
|
||||
Operand intOffset = offsets[index + compIndex * coordsCount];
|
||||
|
||||
node.List.AddBefore(node, new Operation(
|
||||
Instruction.FP32 | Instruction.Divide,
|
||||
|
@ -478,7 +483,7 @@ namespace Ryujinx.Graphics.Shader.Translation.Transforms
|
|||
texOp.Format,
|
||||
texOp.Flags & ~(TextureFlags.Offset | TextureFlags.Offsets),
|
||||
texOp.Binding,
|
||||
1,
|
||||
1 << 3, // W component: i=0, j=0
|
||||
new[] { dests[destIndex++] },
|
||||
newSources);
|
||||
|
||||
|
@ -502,7 +507,9 @@ namespace Ryujinx.Graphics.Shader.Translation.Transforms
|
|||
}
|
||||
else
|
||||
{
|
||||
Operand[] texSizes = InsertTextureLod(node, texOp, lodSources, bindlessHandle, coordsCount, stage);
|
||||
Operand[] texSizes = isGather
|
||||
? InsertTextureBaseSize(node, texOp, bindlessHandle, coordsCount)
|
||||
: InsertTextureLod(node, texOp, lodSources, bindlessHandle, coordsCount, stage);
|
||||
|
||||
for (int index = 0; index < coordsCount; index++)
|
||||
{
|
||||
|
@ -549,6 +556,43 @@ namespace Ryujinx.Graphics.Shader.Translation.Transforms
|
|||
return node;
|
||||
}
|
||||
|
||||
private static Operand[] InsertTextureBaseSize(
|
||||
LinkedListNode<INode> node,
|
||||
TextureOperation texOp,
|
||||
Operand bindlessHandle,
|
||||
int coordsCount)
|
||||
{
|
||||
Operand[] texSizes = new Operand[coordsCount];
|
||||
|
||||
for (int index = 0; index < coordsCount; index++)
|
||||
{
|
||||
texSizes[index] = Local();
|
||||
|
||||
Operand[] texSizeSources;
|
||||
|
||||
if (bindlessHandle != null)
|
||||
{
|
||||
texSizeSources = new Operand[] { bindlessHandle, Const(0) };
|
||||
}
|
||||
else
|
||||
{
|
||||
texSizeSources = new Operand[] { Const(0) };
|
||||
}
|
||||
|
||||
node.List.AddBefore(node, new TextureOperation(
|
||||
Instruction.TextureQuerySize,
|
||||
texOp.Type,
|
||||
texOp.Format,
|
||||
texOp.Flags,
|
||||
texOp.Binding,
|
||||
index,
|
||||
new[] { texSizes[index] },
|
||||
texSizeSources));
|
||||
}
|
||||
|
||||
return texSizes;
|
||||
}
|
||||
|
||||
private static Operand[] InsertTextureLod(
|
||||
LinkedListNode<INode> node,
|
||||
TextureOperation texOp,
|
||||
|
|
|
@ -605,6 +605,7 @@ namespace Ryujinx.Graphics.Vulkan
|
|||
supportsShaderBallot: false,
|
||||
supportsShaderBarrierDivergence: Vendor != Vendor.Intel,
|
||||
supportsShaderFloat64: Capabilities.SupportsShaderFloat64,
|
||||
supportsTextureGatherOffsets: features2.Features.ShaderImageGatherExtended && !IsMoltenVk,
|
||||
supportsTextureShadowLod: false,
|
||||
supportsVertexStoreAndAtomics: features2.Features.VertexPipelineStoresAndAtomics,
|
||||
supportsViewportIndexVertexTessellation: featuresVk12.ShaderOutputViewportIndex,
|
||||
|
|
Loading…
Reference in a new issue