shader_ir/memory: Implement patch stores

2019-11-01 00:34:38 -03:00 · 2019-11-01 00:34:38 -03:00 · 6233b1db08
commit 6233b1db08
parent f2458106e6
4 changed files with 38 additions and 20 deletions
--- a/src/video_core/engines/shader_bytecode.h
+++ b/src/video_core/engines/shader_bytecode.h
@ -98,10 +98,11 @@ union Attribute {
        BitField<20, 10, u64> immediate;
        BitField<22, 2, u64> element;
        BitField<24, 6, Index> index;
        BitField<31, 1, u64> patch;
        BitField<47, 3, AttributeSize> size;
        bool IsPhysical() const {
-            return element == 0 && static_cast<u64>(index.Value()) == 0;
+            return patch == 0 && element == 0 && static_cast<u64>(index.Value()) == 0;
        }
    } fmt20;
--- a/src/video_core/shader/decode/memory.cpp
+++ b/src/video_core/shader/decode/memory.cpp
@ -21,6 +21,7 @@ using Tegra::Shader::OpCode;
 using Tegra::Shader::Register;
 namespace {
 u32 GetUniformTypeElementsCount(Tegra::Shader::UniformType uniform_type) {
    switch (uniform_type) {
    case Tegra::Shader::UniformType::Single:
@ -35,6 +36,7 @@ u32 GetUniformTypeElementsCount(Tegra::Shader::UniformType uniform_type) {
        return 1;
    }
 }
 } // Anonymous namespace
 u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
@ -196,28 +198,28 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
        UNIMPLEMENTED_IF_MSG((instr.attribute.fmt20.immediate.Value() % sizeof(u32)) != 0,
                             "Unaligned attribute loads are not supported");
-        u64 next_element = instr.attribute.fmt20.element;
+        u64 element = instr.attribute.fmt20.element;
-        auto next_index = static_cast<u64>(instr.attribute.fmt20.index.Value());
+        auto index = static_cast<u64>(instr.attribute.fmt20.index.Value());
-        const auto StoreNextElement = [&](u32 reg_offset) {
+        const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1;
-            const auto dest = GetOutputAttribute(static_cast<Attribute::Index>(next_index),
+        for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) {
-                                                 next_element, GetRegister(instr.gpr39));
+            Node dest;
            if (instr.attribute.fmt20.patch) {
                const u32 offset = static_cast<u32>(index) * 4 + static_cast<u32>(element);
                dest = MakeNode<PatchNode>(offset);
            } else {
                dest = GetOutputAttribute(static_cast<Attribute::Index>(index), element,
                                          GetRegister(instr.gpr39));
            }
            const auto src = GetRegister(instr.gpr0.Value() + reg_offset);
            bb.push_back(Operation(OperationCode::Assign, dest, src));
-            // Load the next attribute element into the following register. If the element
+            // Load the next attribute element into the following register. If the element to load
-            // to load goes beyond the vec4 size, load the first element of the next
+            // goes beyond the vec4 size, load the first element of the next attribute.
-            // attribute.
+            element = (element + 1) % 4;
-            next_element = (next_element + 1) % 4;
+            index = index + (element == 0 ? 1 : 0);
            next_index = next_index + (next_element == 0 ? 1 : 0);
        };
        const u32 num_words = static_cast<u32>(instr.attribute.fmt20.size.Value()) + 1;
        for (u32 reg_offset = 0; reg_offset < num_words; ++reg_offset) {
            StoreNextElement(reg_offset);
        }
        break;
    }
    case OpCode::Id::ST_L:
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@ -213,13 +213,14 @@ class PredicateNode;
 class AbufNode;
 class CbufNode;
 class LmemNode;
 class PatchNode;
 class SmemNode;
 class GmemNode;
 class CommentNode;
-using NodeData =
+using NodeData = std::variant<OperationNode, ConditionalNode, GprNode, ImmediateNode,
-    std::variant<OperationNode, ConditionalNode, GprNode, ImmediateNode, InternalFlagNode,
+                              InternalFlagNode, PredicateNode, AbufNode, PatchNode, CbufNode,
-                 PredicateNode, AbufNode, CbufNode, LmemNode, SmemNode, GmemNode, CommentNode>;
+                              LmemNode, SmemNode, GmemNode, CommentNode>;
 using Node = std::shared_ptr<NodeData>;
 using Node4 = std::array<Node, 4>;
 using NodeBlock = std::vector<Node>;
@ -542,6 +543,19 @@ private:
    u32 element{};
 };
 /// Patch memory (used to communicate tessellation stages).
 class PatchNode final {
 public:
    explicit PatchNode(u32 offset) : offset{offset} {}
    u32 GetOffset() const {
        return offset;
    }
 private:
    u32 offset{};
 };
 /// Constant buffer node, usually mapped to uniform buffers in GLSL
 class CbufNode final {
 public:
--- a/src/video_core/shader/track.cpp
+++ b/src/video_core/shader/track.cpp
@ -7,6 +7,7 @@
 #include <variant>
 #include "common/common_types.h"
 #include "video_core/shader/node.h"
 #include "video_core/shader/shader_ir.h"
 namespace VideoCommon::Shader {