From c921e496eb47de49a4d6ce62527581b966dca259 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Fri, 3 Jan 2020 16:16:29 -0400
Subject: [PATCH 01/16] GPU: Implement guest driver profile and deduce texture
 handler sizes.

---
 src/video_core/CMakeLists.txt                 |  2 +
 .../engines/const_buffer_engine_interface.h   |  3 ++
 src/video_core/engines/kepler_compute.cpp     |  4 ++
 src/video_core/engines/kepler_compute.h       |  2 +
 src/video_core/engines/maxwell_3d.cpp         |  4 ++
 src/video_core/engines/maxwell_3d.h           |  2 +
 src/video_core/guest_driver.cpp               | 34 +++++++++++++++++
 src/video_core/guest_driver.h                 | 37 +++++++++++++++++++
 src/video_core/rasterizer_interface.h         |  8 ++++
 src/video_core/shader/const_buffer_locker.h   |  8 ++++
 src/video_core/shader/decode.cpp              | 21 +++++++++++
 src/video_core/shader/shader_ir.cpp           |  1 +
 src/video_core/shader/shader_ir.h             |  1 +
 13 files changed, 127 insertions(+)
 create mode 100644 src/video_core/guest_driver.cpp
 create mode 100644 src/video_core/guest_driver.h

diff --git a/src/video_core/CMakeLists.txt b/src/video_core/CMakeLists.txt
index ccfed4f2e..04a25da4f 100644
--- a/src/video_core/CMakeLists.txt
+++ b/src/video_core/CMakeLists.txt
@@ -29,6 +29,8 @@ add_library(video_core STATIC
     gpu_synch.h
     gpu_thread.cpp
     gpu_thread.h
+    guest_driver.cpp
+    guest_driver.h
     macro_interpreter.cpp
     macro_interpreter.h
     memory_manager.cpp
diff --git a/src/video_core/engines/const_buffer_engine_interface.h b/src/video_core/engines/const_buffer_engine_interface.h
index 44b8b8d22..c29156e34 100644
--- a/src/video_core/engines/const_buffer_engine_interface.h
+++ b/src/video_core/engines/const_buffer_engine_interface.h
@@ -9,6 +9,7 @@
 #include "common/common_types.h"
 #include "video_core/engines/shader_bytecode.h"
 #include "video_core/engines/shader_type.h"
+#include "video_core/guest_driver.h"
 #include "video_core/textures/texture.h"
 
 namespace Tegra::Engines {
@@ -106,6 +107,8 @@ public:
     virtual SamplerDescriptor AccessBindlessSampler(ShaderType stage, u64 const_buffer,
                                                     u64 offset) const = 0;
     virtual u32 GetBoundBuffer() const = 0;
+
+    virtual VideoCore::GuestDriverProfile& AccessGuestDriverProfile() = 0;
 };
 
 } // namespace Tegra::Engines
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp
index 110406f2f..f177ae938 100644
--- a/src/video_core/engines/kepler_compute.cpp
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -94,6 +94,10 @@ SamplerDescriptor KeplerCompute::AccessBindlessSampler(ShaderType stage, u64 con
     return result;
 }
 
+VideoCore::GuestDriverProfile& KeplerCompute::AccessGuestDriverProfile() {
+    return rasterizer.AccessGuestDriverProfile();
+}
+
 void KeplerCompute::ProcessLaunch() {
     const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address();
     memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description,
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h
index 4ef3e0613..99c82a9af 100644
--- a/src/video_core/engines/kepler_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@@ -218,6 +218,8 @@ public:
         return regs.tex_cb_index;
     }
 
+    VideoCore::GuestDriverProfile& AccessGuestDriverProfile() override;
+
 private:
     Core::System& system;
     VideoCore::RasterizerInterface& rasterizer;
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 58dfa8033..8167864c0 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -784,4 +784,8 @@ SamplerDescriptor Maxwell3D::AccessBindlessSampler(ShaderType stage, u64 const_b
     return result;
 }
 
+VideoCore::GuestDriverProfile& Maxwell3D::AccessGuestDriverProfile() {
+    return rasterizer.AccessGuestDriverProfile();
+}
+
 } // namespace Tegra::Engines
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index ee79260fc..08ef95410 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -1306,6 +1306,8 @@ public:
         return regs.tex_cb_index;
     }
 
+    VideoCore::GuestDriverProfile& AccessGuestDriverProfile() override;
+
     /// Memory for macro code - it's undetermined how big this is, however 1MB is much larger than
     /// we've seen used.
     using MacroMemory = std::array<u32, 0x40000>;
diff --git a/src/video_core/guest_driver.cpp b/src/video_core/guest_driver.cpp
new file mode 100644
index 000000000..b1ac254ff
--- /dev/null
+++ b/src/video_core/guest_driver.cpp
@@ -0,0 +1,34 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "video_core/guest_driver.h"
+
+namespace VideoCore {
+
+void GuestDriverProfile::DeduceTextureHandlerSize(std::vector<u32>&& bound_offsets) {
+    if (texture_handler_size_deduced) {
+        return;
+    }
+    std::size_t size = bound_offsets.size();
+    if (size < 2) {
+        return;
+    }
+    std::sort(bound_offsets.begin(), bound_offsets.end(),
+              [](const u32& a, const u32& b) { return a < b; });
+    u32 min_val = 0xFFFFFFFF; // set to highest possible 32 bit integer;
+    for (std::size_t i = 1; i < size; i++) {
+        if (bound_offsets[i] == bound_offsets[i - 1]) {
+            continue;
+        }
+        const u32 new_min = bound_offsets[i] - bound_offsets[i - 1];
+        min_val = std::min(min_val, new_min);
+    }
+    if (min_val > 2) {
+        return;
+    }
+    texture_handler_size_deduced = true;
+    texture_handler_size = sizeof(u32) * min_val;
+}
+
+} // namespace VideoCore
diff --git a/src/video_core/guest_driver.h b/src/video_core/guest_driver.h
new file mode 100644
index 000000000..f64f043af
--- /dev/null
+++ b/src/video_core/guest_driver.h
@@ -0,0 +1,37 @@
+// Copyright 2019 yuzu Emulator Project
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <vector>
+
+#include "common/common_types.h"
+
+namespace VideoCore {
+
+/**
+ * The GuestDriverProfile class is used to learn about the GPU drivers behavior and collect
+ * information necessary for impossible to avoid HLE methods like shader tracks.
+ */
+class GuestDriverProfile {
+public:
+    u32 GetTextureHandlerSize() const {
+        return texture_handler_size;
+    }
+
+    bool TextureHandlerSizeKnown() const {
+        return texture_handler_size_deduced;
+    }
+
+    void DeduceTextureHandlerSize(std::vector<u32>&& bound_offsets);
+
+private:
+    // This goes with Vulkan and OpenGL standards but Nvidia GPUs can easily
+    // use 4 bytes instead. Thus, certain drivers may squish the size.
+    static constexpr u32 default_texture_handler_size = 8;
+    u32 texture_handler_size{default_texture_handler_size};
+    bool texture_handler_size_deduced{};
+};
+
+} // namespace VideoCore
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 5b0eca9e2..149f79af3 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -9,6 +9,7 @@
 #include "common/common_types.h"
 #include "video_core/engines/fermi_2d.h"
 #include "video_core/gpu.h"
+#include "video_core/guest_driver.h"
 
 namespace Tegra {
 class MemoryManager;
@@ -78,5 +79,12 @@ public:
     /// Initialize disk cached resources for the game being emulated
     virtual void LoadDiskResources(const std::atomic_bool& stop_loading = false,
                                    const DiskResourceLoadCallback& callback = {}) {}
+
+    GuestDriverProfile& AccessGuestDriverProfile() {
+        return guest_driver_profile;
+    }
+
+private:
+    GuestDriverProfile guest_driver_profile{};
 };
 } // namespace VideoCore
diff --git a/src/video_core/shader/const_buffer_locker.h b/src/video_core/shader/const_buffer_locker.h
index d32e2d657..78d9d7037 100644
--- a/src/video_core/shader/const_buffer_locker.h
+++ b/src/video_core/shader/const_buffer_locker.h
@@ -10,6 +10,7 @@
 #include "common/hash.h"
 #include "video_core/engines/const_buffer_engine_interface.h"
 #include "video_core/engines/shader_type.h"
+#include "video_core/guest_driver.h"
 
 namespace VideoCommon::Shader {
 
@@ -71,6 +72,13 @@ public:
         return bindless_samplers;
     }
 
+    VideoCore::GuestDriverProfile* AccessGuestDriverProfile() {
+        if (engine) {
+            return &(engine->AccessGuestDriverProfile());
+        }
+        return nullptr;
+    }
+
 private:
     const Tegra::Engines::ShaderType stage;
     Tegra::Engines::ConstBufferEngineInterface* engine = nullptr;
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp
index 22c3e5120..aed35a9b8 100644
--- a/src/video_core/shader/decode.cpp
+++ b/src/video_core/shader/decode.cpp
@@ -315,4 +315,25 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) {
     return pc + 1;
 }
 
+void ShaderIR::PostDecode() {
+    // Deduce texture handler size if needed
+    auto* gpu_driver = locker.AccessGuestDriverProfile();
+    if (gpu_driver) {
+        if (!gpu_driver->TextureHandlerSizeKnown() && used_samplers.size() > 1) {
+            u32 count{};
+            std::vector<u32> bound_offsets;
+            for (const auto& sampler : used_samplers) {
+                if (sampler.IsBindless()) {
+                    continue;
+                }
+                count++;
+                bound_offsets.emplace_back(sampler.GetOffset());
+            }
+            if (count > 1) {
+                gpu_driver->DeduceTextureHandlerSize(std::move(bound_offsets));
+            }
+        }
+    }
+}
+
 } // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp
index 31eecb3f4..a186e22b2 100644
--- a/src/video_core/shader/shader_ir.cpp
+++ b/src/video_core/shader/shader_ir.cpp
@@ -27,6 +27,7 @@ ShaderIR::ShaderIR(const ProgramCode& program_code, u32 main_offset, CompilerSet
                    ConstBufferLocker& locker)
     : program_code{program_code}, main_offset{main_offset}, settings{settings}, locker{locker} {
     Decode();
+    PostDecode();
 }
 
 ShaderIR::~ShaderIR() = default;
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index ba1db4c11..92c24247d 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -191,6 +191,7 @@ private:
     };
 
     void Decode();
+    void PostDecode();
 
     NodeBlock DecodeRange(u32 begin, u32 end);
     void DecodeRangeInner(NodeBlock& bb, u32 begin, u32 end);

From 1e4b6bef6f1b278bdc99170b76f33179a2eff26f Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Fri, 3 Jan 2020 18:15:24 -0400
Subject: [PATCH 02/16] Shader_IR: Store Bound buffer on Shader Usage

---
 .../renderer_opengl/gl_shader_cache.cpp         |  4 +++-
 .../renderer_opengl/gl_shader_disk_cache.cpp    | 12 ++++++++----
 .../renderer_opengl/gl_shader_disk_cache.h      |  1 +
 src/video_core/shader/const_buffer_locker.cpp   | 17 +++++++++++++++++
 src/video_core/shader/const_buffer_locker.h     | 12 ++++++++++++
 5 files changed, 41 insertions(+), 5 deletions(-)

diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 3c5bdd377..489eb143c 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -214,6 +214,7 @@ std::unique_ptr<ConstBufferLocker> MakeLocker(Core::System& system, ShaderType s
 }
 
 void FillLocker(ConstBufferLocker& locker, const ShaderDiskCacheUsage& usage) {
+    locker.SetBoundBuffer(usage.bound_buffer);
     for (const auto& key : usage.keys) {
         const auto [buffer, offset] = key.first;
         locker.InsertKey(buffer, offset, key.second);
@@ -418,7 +419,8 @@ bool CachedShader::EnsureValidLockerVariant() {
 
 ShaderDiskCacheUsage CachedShader::GetUsage(const ProgramVariant& variant,
                                             const ConstBufferLocker& locker) const {
-    return ShaderDiskCacheUsage{unique_identifier, variant, locker.GetKeys(),
+    return ShaderDiskCacheUsage{unique_identifier,         variant,
+                                locker.GetBoundBuffer(),   locker.GetKeys(),
                                 locker.GetBoundSamplers(), locker.GetBindlessSamplers()};
 }
 
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
index cf874a09a..1fc204f6f 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.cpp
@@ -53,7 +53,7 @@ struct BindlessSamplerKey {
     Tegra::Engines::SamplerDescriptor sampler{};
 };
 
-constexpr u32 NativeVersion = 11;
+constexpr u32 NativeVersion = 12;
 
 // Making sure sizes doesn't change by accident
 static_assert(sizeof(ProgramVariant) == 20);
@@ -186,7 +186,8 @@ ShaderDiskCacheOpenGL::LoadTransferable() {
             u32 num_bound_samplers{};
             u32 num_bindless_samplers{};
             if (file.ReadArray(&usage.unique_identifier, 1) != 1 ||
-                file.ReadArray(&usage.variant, 1) != 1 || file.ReadArray(&num_keys, 1) != 1 ||
+                file.ReadArray(&usage.variant, 1) != 1 ||
+                file.ReadArray(&usage.bound_buffer, 1) != 1 || file.ReadArray(&num_keys, 1) != 1 ||
                 file.ReadArray(&num_bound_samplers, 1) != 1 ||
                 file.ReadArray(&num_bindless_samplers, 1) != 1) {
                 LOG_ERROR(Render_OpenGL, error_loading);
@@ -281,7 +282,9 @@ ShaderDiskCacheOpenGL::LoadPrecompiledFile(FileUtil::IOFile& file) {
         u32 num_bindless_samplers{};
         ShaderDiskCacheUsage usage;
         if (!LoadObjectFromPrecompiled(usage.unique_identifier) ||
-            !LoadObjectFromPrecompiled(usage.variant) || !LoadObjectFromPrecompiled(num_keys) ||
+            !LoadObjectFromPrecompiled(usage.variant) ||
+            !LoadObjectFromPrecompiled(usage.bound_buffer) ||
+            !LoadObjectFromPrecompiled(num_keys) ||
             !LoadObjectFromPrecompiled(num_bound_samplers) ||
             !LoadObjectFromPrecompiled(num_bindless_samplers)) {
             return {};
@@ -393,6 +396,7 @@ void ShaderDiskCacheOpenGL::SaveUsage(const ShaderDiskCacheUsage& usage) {
 
     if (file.WriteObject(TransferableEntryKind::Usage) != 1 ||
         file.WriteObject(usage.unique_identifier) != 1 || file.WriteObject(usage.variant) != 1 ||
+        file.WriteObject(usage.bound_buffer) != 1 ||
         file.WriteObject(static_cast<u32>(usage.keys.size())) != 1 ||
         file.WriteObject(static_cast<u32>(usage.bound_samplers.size())) != 1 ||
         file.WriteObject(static_cast<u32>(usage.bindless_samplers.size())) != 1) {
@@ -447,7 +451,7 @@ void ShaderDiskCacheOpenGL::SaveDump(const ShaderDiskCacheUsage& usage, GLuint p
     };
 
     if (!SaveObjectToPrecompiled(usage.unique_identifier) ||
-        !SaveObjectToPrecompiled(usage.variant) ||
+        !SaveObjectToPrecompiled(usage.variant) || !SaveObjectToPrecompiled(usage.bound_buffer) ||
         !SaveObjectToPrecompiled(static_cast<u32>(usage.keys.size())) ||
         !SaveObjectToPrecompiled(static_cast<u32>(usage.bound_samplers.size())) ||
         !SaveObjectToPrecompiled(static_cast<u32>(usage.bindless_samplers.size()))) {
diff --git a/src/video_core/renderer_opengl/gl_shader_disk_cache.h b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
index 69a2fbdda..ef2371f6d 100644
--- a/src/video_core/renderer_opengl/gl_shader_disk_cache.h
+++ b/src/video_core/renderer_opengl/gl_shader_disk_cache.h
@@ -79,6 +79,7 @@ static_assert(std::is_trivially_copyable_v<ProgramVariant>);
 struct ShaderDiskCacheUsage {
     u64 unique_identifier{};
     ProgramVariant variant;
+    u32 bound_buffer{};
     VideoCommon::Shader::KeyMap keys;
     VideoCommon::Shader::BoundSamplerMap bound_samplers;
     VideoCommon::Shader::BindlessSamplerMap bindless_samplers;
diff --git a/src/video_core/shader/const_buffer_locker.cpp b/src/video_core/shader/const_buffer_locker.cpp
index a4a0319eb..0638be8cb 100644
--- a/src/video_core/shader/const_buffer_locker.cpp
+++ b/src/video_core/shader/const_buffer_locker.cpp
@@ -66,6 +66,18 @@ std::optional<Tegra::Engines::SamplerDescriptor> ConstBufferLocker::ObtainBindle
     return value;
 }
 
+std::optional<u32> ConstBufferLocker::ObtainBoundBuffer() {
+    if (bound_buffer_saved) {
+        return bound_buffer;
+    }
+    if (!engine) {
+        return std::nullopt;
+    }
+    bound_buffer_saved = true;
+    bound_buffer = engine->GetBoundBuffer();
+    return bound_buffer;
+}
+
 void ConstBufferLocker::InsertKey(u32 buffer, u32 offset, u32 value) {
     keys.insert_or_assign({buffer, offset}, value);
 }
@@ -78,6 +90,11 @@ void ConstBufferLocker::InsertBindlessSampler(u32 buffer, u32 offset, SamplerDes
     bindless_samplers.insert_or_assign({buffer, offset}, sampler);
 }
 
+void ConstBufferLocker::SetBoundBuffer(u32 buffer) {
+    bound_buffer_saved = true;
+    bound_buffer = buffer;
+}
+
 bool ConstBufferLocker::IsConsistent() const {
     if (!engine) {
         return false;
diff --git a/src/video_core/shader/const_buffer_locker.h b/src/video_core/shader/const_buffer_locker.h
index 78d9d7037..f26cce2ce 100644
--- a/src/video_core/shader/const_buffer_locker.h
+++ b/src/video_core/shader/const_buffer_locker.h
@@ -41,6 +41,8 @@ public:
 
     std::optional<Tegra::Engines::SamplerDescriptor> ObtainBindlessSampler(u32 buffer, u32 offset);
 
+    std::optional<u32> ObtainBoundBuffer();
+
     /// Inserts a key.
     void InsertKey(u32 buffer, u32 offset, u32 value);
 
@@ -50,6 +52,10 @@ public:
     /// Inserts a bindless sampler key.
     void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler);
 
+    /// Set the bound buffer for this locker.
+
+    void SetBoundBuffer(u32 buffer);
+
     /// Checks keys and samplers against engine's current const buffers. Returns true if they are
     /// the same value, false otherwise;
     bool IsConsistent() const;
@@ -72,6 +78,10 @@ public:
         return bindless_samplers;
     }
 
+    u32 GetBoundBuffer() const {
+        return bound_buffer;
+    }
+
     VideoCore::GuestDriverProfile* AccessGuestDriverProfile() {
         if (engine) {
             return &(engine->AccessGuestDriverProfile());
@@ -85,6 +95,8 @@ private:
     KeyMap keys;
     BoundSamplerMap bound_samplers;
     BindlessSamplerMap bindless_samplers;
+    bool bound_buffer_saved{};
+    u32 bound_buffer{};
 };
 
 } // namespace VideoCommon::Shader

From 74aa7de5e3905a9438f8839fa273024edd118f19 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Sat, 4 Jan 2020 12:30:11 -0400
Subject: [PATCH 03/16] Guest_driver: Correct compiling errors in GCC.

---
 src/video_core/guest_driver.cpp | 4 +++-
 src/video_core/guest_driver.h   | 2 ++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/video_core/guest_driver.cpp b/src/video_core/guest_driver.cpp
index b1ac254ff..55b9bd021 100644
--- a/src/video_core/guest_driver.cpp
+++ b/src/video_core/guest_driver.cpp
@@ -2,6 +2,8 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include <algorithm>
+
 #include "video_core/guest_driver.h"
 
 namespace VideoCore {
@@ -28,7 +30,7 @@ void GuestDriverProfile::DeduceTextureHandlerSize(std::vector<u32>&& bound_offse
         return;
     }
     texture_handler_size_deduced = true;
-    texture_handler_size = sizeof(u32) * min_val;
+    texture_handler_size = min_texture_handler_size * min_val;
 }
 
 } // namespace VideoCore
diff --git a/src/video_core/guest_driver.h b/src/video_core/guest_driver.h
index f64f043af..7687a0434 100644
--- a/src/video_core/guest_driver.h
+++ b/src/video_core/guest_driver.h
@@ -27,6 +27,8 @@ public:
     void DeduceTextureHandlerSize(std::vector<u32>&& bound_offsets);
 
 private:
+    // Minimum size of texture handler any driver can use.
+    static constexpr u32 min_texture_handler_size = 4;
     // This goes with Vulkan and OpenGL standards but Nvidia GPUs can easily
     // use 4 bytes instead. Thus, certain drivers may squish the size.
     static constexpr u32 default_texture_handler_size = 8;

From dc5cfa8d287757dede737553b6f1f8521971c6e2 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Sun, 5 Jan 2020 12:08:39 -0400
Subject: [PATCH 04/16] Shader_IR: Address Feedback

---
 src/video_core/guest_driver.cpp             |  7 ++--
 src/video_core/guest_driver.h               |  2 +-
 src/video_core/shader/const_buffer_locker.h |  1 -
 src/video_core/shader/decode.cpp            | 42 ++++++++++++---------
 4 files changed, 30 insertions(+), 22 deletions(-)

diff --git a/src/video_core/guest_driver.cpp b/src/video_core/guest_driver.cpp
index 55b9bd021..1ded52905 100644
--- a/src/video_core/guest_driver.cpp
+++ b/src/video_core/guest_driver.cpp
@@ -1,8 +1,9 @@
-// Copyright 2019 yuzu Emulator Project
+// Copyright 2020 yuzu Emulator Project
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
 #include <algorithm>
+#include <climits>
 
 #include "video_core/guest_driver.h"
 
@@ -12,13 +13,13 @@ void GuestDriverProfile::DeduceTextureHandlerSize(std::vector<u32>&& bound_offse
     if (texture_handler_size_deduced) {
         return;
     }
-    std::size_t size = bound_offsets.size();
+    const std::size_t size = bound_offsets.size();
     if (size < 2) {
         return;
     }
     std::sort(bound_offsets.begin(), bound_offsets.end(),
               [](const u32& a, const u32& b) { return a < b; });
-    u32 min_val = 0xFFFFFFFF; // set to highest possible 32 bit integer;
+    u32 min_val = UINT_MAX;
     for (std::size_t i = 1; i < size; i++) {
         if (bound_offsets[i] == bound_offsets[i - 1]) {
             continue;
diff --git a/src/video_core/guest_driver.h b/src/video_core/guest_driver.h
index 7687a0434..e08588ee9 100644
--- a/src/video_core/guest_driver.h
+++ b/src/video_core/guest_driver.h
@@ -1,4 +1,4 @@
-// Copyright 2019 yuzu Emulator Project
+// Copyright 2020 yuzu Emulator Project
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
diff --git a/src/video_core/shader/const_buffer_locker.h b/src/video_core/shader/const_buffer_locker.h
index f26cce2ce..c7b72fa5e 100644
--- a/src/video_core/shader/const_buffer_locker.h
+++ b/src/video_core/shader/const_buffer_locker.h
@@ -53,7 +53,6 @@ public:
     void InsertBindlessSampler(u32 buffer, u32 offset, Tegra::Engines::SamplerDescriptor sampler);
 
     /// Set the bound buffer for this locker.
-
     void SetBoundBuffer(u32 buffer);
 
     /// Checks keys and samplers against engine's current const buffers. Returns true if they are
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp
index aed35a9b8..c702c7629 100644
--- a/src/video_core/shader/decode.cpp
+++ b/src/video_core/shader/decode.cpp
@@ -315,25 +315,33 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) {
     return pc + 1;
 }
 
-void ShaderIR::PostDecode() {
-    // Deduce texture handler size if needed
-    auto* gpu_driver = locker.AccessGuestDriverProfile();
-    if (gpu_driver) {
-        if (!gpu_driver->TextureHandlerSizeKnown() && used_samplers.size() > 1) {
-            u32 count{};
-            std::vector<u32> bound_offsets;
-            for (const auto& sampler : used_samplers) {
-                if (sampler.IsBindless()) {
-                    continue;
-                }
-                count++;
-                bound_offsets.emplace_back(sampler.GetOffset());
-            }
-            if (count > 1) {
-                gpu_driver->DeduceTextureHandlerSize(std::move(bound_offsets));
-            }
+void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile* gpu_driver,
+                              std::list<Sampler>& used_samplers) {
+    if (gpu_driver == nullptr) {
+        LOG_CRITICAL(HW_GPU, "GPU Driver profile has not been created yet");
+        return;
+    }
+    if (gpu_driver->TextureHandlerSizeKnown() || used_samplers.size() <= 1) {
+        return;
+    }
+    u32 count{};
+    std::vector<u32> bound_offsets;
+    for (const auto& sampler : used_samplers) {
+        if (sampler.IsBindless()) {
+            continue;
         }
+        count++;
+        bound_offsets.emplace_back(sampler.GetOffset());
+    }
+    if (count > 1) {
+        gpu_driver->DeduceTextureHandlerSize(std::move(bound_offsets));
     }
 }
 
+void ShaderIR::PostDecode() {
+    // Deduce texture handler size if needed
+    auto* gpu_driver = locker.AccessGuestDriverProfile();
+    DeduceTextureHandlerSize(gpu_driver, used_samplers);
+}
+
 } // namespace VideoCommon::Shader

From b97608ca646962a6f5a217b9477bdd86eed5e48f Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Wed, 8 Jan 2020 10:28:29 -0400
Subject: [PATCH 05/16] Shader_IR: Allow constant access of guest driver.

---
 src/video_core/engines/const_buffer_engine_interface.h | 1 +
 src/video_core/engines/kepler_compute.cpp              | 4 ++++
 src/video_core/engines/kepler_compute.h                | 2 ++
 src/video_core/engines/maxwell_3d.cpp                  | 4 ++++
 src/video_core/engines/maxwell_3d.h                    | 2 ++
 src/video_core/rasterizer_interface.h                  | 4 ++++
 src/video_core/shader/const_buffer_locker.h            | 2 +-
 7 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/src/video_core/engines/const_buffer_engine_interface.h b/src/video_core/engines/const_buffer_engine_interface.h
index c29156e34..d56a47710 100644
--- a/src/video_core/engines/const_buffer_engine_interface.h
+++ b/src/video_core/engines/const_buffer_engine_interface.h
@@ -109,6 +109,7 @@ public:
     virtual u32 GetBoundBuffer() const = 0;
 
     virtual VideoCore::GuestDriverProfile& AccessGuestDriverProfile() = 0;
+    virtual const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const = 0;
 };
 
 } // namespace Tegra::Engines
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp
index f177ae938..4b824aa4e 100644
--- a/src/video_core/engines/kepler_compute.cpp
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -98,6 +98,10 @@ VideoCore::GuestDriverProfile& KeplerCompute::AccessGuestDriverProfile() {
     return rasterizer.AccessGuestDriverProfile();
 }
 
+const VideoCore::GuestDriverProfile& KeplerCompute::AccessGuestDriverProfile() const {
+    return rasterizer.AccessGuestDriverProfile();
+}
+
 void KeplerCompute::ProcessLaunch() {
     const GPUVAddr launch_desc_loc = regs.launch_desc_loc.Address();
     memory_manager.ReadBlockUnsafe(launch_desc_loc, &launch_description,
diff --git a/src/video_core/engines/kepler_compute.h b/src/video_core/engines/kepler_compute.h
index 99c82a9af..eeb79c56f 100644
--- a/src/video_core/engines/kepler_compute.h
+++ b/src/video_core/engines/kepler_compute.h
@@ -220,6 +220,8 @@ public:
 
     VideoCore::GuestDriverProfile& AccessGuestDriverProfile() override;
 
+    const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const override;
+
 private:
     Core::System& system;
     VideoCore::RasterizerInterface& rasterizer;
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 8167864c0..7cea146f0 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -788,4 +788,8 @@ VideoCore::GuestDriverProfile& Maxwell3D::AccessGuestDriverProfile() {
     return rasterizer.AccessGuestDriverProfile();
 }
 
+const VideoCore::GuestDriverProfile& Maxwell3D::AccessGuestDriverProfile() const {
+    return rasterizer.AccessGuestDriverProfile();
+}
+
 } // namespace Tegra::Engines
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 08ef95410..8808bbf76 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -1308,6 +1308,8 @@ public:
 
     VideoCore::GuestDriverProfile& AccessGuestDriverProfile() override;
 
+    const VideoCore::GuestDriverProfile& AccessGuestDriverProfile() const override;
+
     /// Memory for macro code - it's undetermined how big this is, however 1MB is much larger than
     /// we've seen used.
     using MacroMemory = std::array<u32, 0x40000>;
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 149f79af3..1b0cc56f1 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -84,6 +84,10 @@ public:
         return guest_driver_profile;
     }
 
+    const GuestDriverProfile& AccessGuestDriverProfile() const {
+        return guest_driver_profile;
+    }
+
 private:
     GuestDriverProfile guest_driver_profile{};
 };
diff --git a/src/video_core/shader/const_buffer_locker.h b/src/video_core/shader/const_buffer_locker.h
index c7b72fa5e..f5655ac64 100644
--- a/src/video_core/shader/const_buffer_locker.h
+++ b/src/video_core/shader/const_buffer_locker.h
@@ -81,7 +81,7 @@ public:
         return bound_buffer;
     }
 
-    VideoCore::GuestDriverProfile* AccessGuestDriverProfile() {
+    VideoCore::GuestDriverProfile* AccessGuestDriverProfile() const {
         if (engine) {
             return &(engine->AccessGuestDriverProfile());
         }

From 64496f24569ecc23ebbb816725f27142867b1468 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Wed, 8 Jan 2020 11:46:36 -0400
Subject: [PATCH 06/16] Shader_IR: Address Feedback

---
 src/video_core/guest_driver.cpp             |  9 ++--
 src/video_core/guest_driver.h               | 11 ++---
 src/video_core/rasterizer_interface.h       |  2 +
 src/video_core/shader/const_buffer_locker.h |  2 +-
 src/video_core/shader/decode.cpp            | 48 ++++++++++-----------
 5 files changed, 37 insertions(+), 35 deletions(-)

diff --git a/src/video_core/guest_driver.cpp b/src/video_core/guest_driver.cpp
index 1ded52905..6adef459e 100644
--- a/src/video_core/guest_driver.cpp
+++ b/src/video_core/guest_driver.cpp
@@ -3,7 +3,7 @@
 // Refer to the license.txt file included.
 
 #include <algorithm>
-#include <climits>
+#include <limits>
 
 #include "video_core/guest_driver.h"
 
@@ -17,10 +17,9 @@ void GuestDriverProfile::DeduceTextureHandlerSize(std::vector<u32>&& bound_offse
     if (size < 2) {
         return;
     }
-    std::sort(bound_offsets.begin(), bound_offsets.end(),
-              [](const u32& a, const u32& b) { return a < b; });
-    u32 min_val = UINT_MAX;
-    for (std::size_t i = 1; i < size; i++) {
+    std::sort(bound_offsets.begin(), bound_offsets.end(), std::less{});
+    u32 min_val = std::numeric_limits<u32>::max();
+    for (std::size_t i = 1; i < size; ++i) {
         if (bound_offsets[i] == bound_offsets[i - 1]) {
             continue;
         }
diff --git a/src/video_core/guest_driver.h b/src/video_core/guest_driver.h
index e08588ee9..0a9a826b6 100644
--- a/src/video_core/guest_driver.h
+++ b/src/video_core/guest_driver.h
@@ -12,10 +12,13 @@ namespace VideoCore {
 
 /**
  * The GuestDriverProfile class is used to learn about the GPU drivers behavior and collect
- * information necessary for impossible to avoid HLE methods like shader tracks.
+ * information necessary for impossible to avoid HLE methods like shader tracks as they are
+ * Entscheidungsproblems.
  */
 class GuestDriverProfile {
 public:
+    void DeduceTextureHandlerSize(std::vector<u32>&& bound_offsets);
+
     u32 GetTextureHandlerSize() const {
         return texture_handler_size;
     }
@@ -24,16 +27,14 @@ public:
         return texture_handler_size_deduced;
     }
 
-    void DeduceTextureHandlerSize(std::vector<u32>&& bound_offsets);
-
 private:
     // Minimum size of texture handler any driver can use.
     static constexpr u32 min_texture_handler_size = 4;
     // This goes with Vulkan and OpenGL standards but Nvidia GPUs can easily
     // use 4 bytes instead. Thus, certain drivers may squish the size.
     static constexpr u32 default_texture_handler_size = 8;
-    u32 texture_handler_size{default_texture_handler_size};
-    bool texture_handler_size_deduced{};
+    u32 texture_handler_size = default_texture_handler_size;
+    bool texture_handler_size_deduced = false;
 };
 
 } // namespace VideoCore
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 1b0cc56f1..c586cd6fe 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -80,10 +80,12 @@ public:
     virtual void LoadDiskResources(const std::atomic_bool& stop_loading = false,
                                    const DiskResourceLoadCallback& callback = {}) {}
 
+    /// Grant access to the Guest Driver Profile for recording/obtaining info on the guest driver.
     GuestDriverProfile& AccessGuestDriverProfile() {
         return guest_driver_profile;
     }
 
+    /// Grant access to the Guest Driver Profile for recording/obtaining info on the guest driver.
     const GuestDriverProfile& AccessGuestDriverProfile() const {
         return guest_driver_profile;
     }
diff --git a/src/video_core/shader/const_buffer_locker.h b/src/video_core/shader/const_buffer_locker.h
index f5655ac64..fd1bb476a 100644
--- a/src/video_core/shader/const_buffer_locker.h
+++ b/src/video_core/shader/const_buffer_locker.h
@@ -83,7 +83,7 @@ public:
 
     VideoCore::GuestDriverProfile* AccessGuestDriverProfile() const {
         if (engine) {
-            return &(engine->AccessGuestDriverProfile());
+            return &engine->AccessGuestDriverProfile();
         }
         return nullptr;
     }
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp
index c702c7629..507614d59 100644
--- a/src/video_core/shader/decode.cpp
+++ b/src/video_core/shader/decode.cpp
@@ -33,6 +33,29 @@ constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) {
     return (absolute_offset % SchedPeriod) == 0;
 }
 
+void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile* gpu_driver,
+                              std::list<Sampler>& used_samplers) {
+    if (gpu_driver == nullptr) {
+        LOG_CRITICAL(HW_GPU, "GPU Driver profile has not been created yet");
+        return;
+    }
+    if (gpu_driver->TextureHandlerSizeKnown() || used_samplers.size() <= 1) {
+        return;
+    }
+    u32 count{};
+    std::vector<u32> bound_offsets;
+    for (const auto& sampler : used_samplers) {
+        if (sampler.IsBindless()) {
+            continue;
+        }
+        ++count;
+        bound_offsets.emplace_back(sampler.GetOffset());
+    }
+    if (count > 1) {
+        gpu_driver->DeduceTextureHandlerSize(std::move(bound_offsets));
+    }
+}
+
 } // Anonymous namespace
 
 class ASTDecoder {
@@ -315,32 +338,9 @@ u32 ShaderIR::DecodeInstr(NodeBlock& bb, u32 pc) {
     return pc + 1;
 }
 
-void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile* gpu_driver,
-                              std::list<Sampler>& used_samplers) {
-    if (gpu_driver == nullptr) {
-        LOG_CRITICAL(HW_GPU, "GPU Driver profile has not been created yet");
-        return;
-    }
-    if (gpu_driver->TextureHandlerSizeKnown() || used_samplers.size() <= 1) {
-        return;
-    }
-    u32 count{};
-    std::vector<u32> bound_offsets;
-    for (const auto& sampler : used_samplers) {
-        if (sampler.IsBindless()) {
-            continue;
-        }
-        count++;
-        bound_offsets.emplace_back(sampler.GetOffset());
-    }
-    if (count > 1) {
-        gpu_driver->DeduceTextureHandlerSize(std::move(bound_offsets));
-    }
-}
-
 void ShaderIR::PostDecode() {
     // Deduce texture handler size if needed
-    auto* gpu_driver = locker.AccessGuestDriverProfile();
+    auto gpu_driver = locker.AccessGuestDriverProfile();
     DeduceTextureHandlerSize(gpu_driver, used_samplers);
 }
 

From 603c861532ed1a89254556ff84bbe121bd700765 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Sun, 5 Jan 2020 15:23:24 -0400
Subject: [PATCH 07/16] Shader_IR: Implement initial code for tracking indexed
 samplers.

---
 src/video_core/shader/node.h        | 48 +++++++++++++++++
 src/video_core/shader/node_helper.h |  6 +++
 src/video_core/shader/shader_ir.h   |  3 ++
 src/video_core/shader/track.cpp     | 82 +++++++++++++++++++++++++++++
 4 files changed, 139 insertions(+)

diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
index 075c7d07c..b370df8f9 100644
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -230,6 +230,12 @@ using Node = std::shared_ptr<NodeData>;
 using Node4 = std::array<Node, 4>;
 using NodeBlock = std::vector<Node>;
 
+class BindlessSamplerNode;
+class ArraySamplerNode;
+
+using TrackSamplerData = std::variant<BindlessSamplerNode, ArraySamplerNode>;
+using TrackSampler = std::shared_ptr<TrackSamplerData>;
+
 class Sampler {
 public:
     /// This constructor is for bound samplers
@@ -288,6 +294,48 @@ private:
     bool is_bindless{}; ///< Whether this sampler belongs to a bindless texture or not.
 };
 
+/// Represents a tracked bindless sampler into a direct const buffer
+class ArraySamplerNode final {
+public:
+    explicit ArraySamplerNode(u32 index, u32 base_offset, u32 bindless_var)
+        : index{index}, base_offset{base_offset}, bindless_var{bindless_var} {}
+
+    u32 GetIndex() const {
+        return index;
+    }
+
+    u32 GetBaseOffset() const {
+        return base_offset;
+    }
+
+    u32 GetIndexVar() const {
+        return bindless_var;
+    }
+
+private:
+    u32 index;
+    u32 base_offset;
+    u32 bindless_var;
+};
+
+/// Represents a tracked bindless sampler into a direct const buffer
+class BindlessSamplerNode final {
+public:
+    explicit BindlessSamplerNode(u32 index, u32 offset) : index{index}, offset{offset} {}
+
+    u32 GetIndex() const {
+        return index;
+    }
+
+    u32 GetOffset() const {
+        return offset;
+    }
+
+private:
+    u32 index;
+    u32 offset;
+};
+
 class Image final {
 public:
     /// This constructor is for bound images
diff --git a/src/video_core/shader/node_helper.h b/src/video_core/shader/node_helper.h
index 0c2aa749b..11231bbea 100644
--- a/src/video_core/shader/node_helper.h
+++ b/src/video_core/shader/node_helper.h
@@ -45,6 +45,12 @@ Node MakeNode(Args&&... args) {
     return std::make_shared<NodeData>(T(std::forward<Args>(args)...));
 }
 
+template <typename T, typename... Args>
+TrackSampler MakeTrackSampler(Args&&... args) {
+    static_assert(std::is_convertible_v<T, TrackSamplerData>);
+    return std::make_shared<TrackSamplerData>(T(std::forward<Args>(args)...));
+}
+
 template <typename... Args>
 Node Operation(OperationCode code, Args&&... args) {
     if constexpr (sizeof...(args) == 0) {
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 92c24247d..d85f14c97 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -388,6 +388,9 @@ private:
 
     std::tuple<Node, u32, u32> TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const;
 
+    std::tuple<Node, TrackSampler> TrackSampler(Node tracked, const NodeBlock& code,
+                                                s64 cursor) const;
+
     std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const;
 
     std::pair<Node, s64> TrackRegister(const GprNode* tracked, const NodeBlock& code,
diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp
index 165c79330..69a677394 100644
--- a/src/video_core/shader/track.cpp
+++ b/src/video_core/shader/track.cpp
@@ -8,6 +8,7 @@
 
 #include "common/common_types.h"
 #include "video_core/shader/node.h"
+#include "video_core/shader/node_helper.h"
 #include "video_core/shader/shader_ir.h"
 
 namespace VideoCommon::Shader {
@@ -37,6 +38,87 @@ std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor,
 }
 } // Anonymous namespace
 
+std::optional<std::pair<Node, Node>> DecoupleIndirectRead(const OperationNode& operation) {
+    if (operation.GetCode() != OperationCode::UAdd) {
+        return std::nullopt;
+    }
+    Node gpr{};
+    Node offset{};
+    if (operation.GetOperandsCount() != 2) {
+        return std::nullopt;
+    }
+    for (std::size_t i = 0; i < operation.GetOperandsCount(); i++) {
+        Node operand = operation[i];
+        if (std::holds_alternative<ImmediateNode>(*operand)) {
+            offset = operation[i];
+        } else if (std::holds_alternative<GprNode>(*operand)) {
+            gpr = operation[i];
+        }
+    }
+    if (offset && gpr) {
+        return {std::make_pair(gpr, offset)};
+    }
+    return std::nullopt;
+}
+
+std::tuple<Node, TrackSampler> ShaderIR::TrackSampler(Node tracked, const NodeBlock& code,
+                                                      s64 cursor) const {
+    if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) {
+        // Constant buffer found, test if it's an immediate
+        const auto offset = cbuf->GetOffset();
+        if (const auto immediate = std::get_if<ImmediateNode>(&*offset)) {
+            auto track =
+                MakeTrackSampler<BindlessSamplerNode>(cbuf->GetIndex(), immediate->GetValue());
+            return {tracked, track};
+        } else if (const auto operation = std::get_if<OperationNode>(&*offset)) {
+            auto bound_buffer = locker.ObtainBoundBuffer();
+            if (!bound_buffer) {
+                return {};
+            }
+            if (*bound_buffer != cbuf->GetIndex()) {
+                return {};
+            }
+            auto pair = DecoupleIndirectRead(*operation);
+            if (!pair) {
+                return {};
+            }
+            auto [gpr, base_offset] = *pair;
+            const auto offset_inm = std::get_if<ImmediateNode>(&*base_offset);
+            // TODO Implement Bindless Index custom variable
+            auto track =
+                MakeTrackSampler<ArraySamplerNode>(cbuf->GetIndex(), offset_inm->GetValue(), 0);
+            return {tracked, track};
+        }
+        return {};
+    }
+    if (const auto gpr = std::get_if<GprNode>(&*tracked)) {
+        if (gpr->GetIndex() == Tegra::Shader::Register::ZeroIndex) {
+            return {};
+        }
+        // Reduce the cursor in one to avoid infinite loops when the instruction sets the same
+        // register that it uses as operand
+        const auto [source, new_cursor] = TrackRegister(gpr, code, cursor - 1);
+        if (!source) {
+            return {};
+        }
+        return TrackSampler(source, code, new_cursor);
+    }
+    if (const auto operation = std::get_if<OperationNode>(&*tracked)) {
+        for (std::size_t i = operation->GetOperandsCount(); i > 0; --i) {
+            if (auto found = TrackSampler((*operation)[i - 1], code, cursor); std::get<0>(found)) {
+                // Cbuf found in operand.
+                return found;
+            }
+        }
+        return {};
+    }
+    if (const auto conditional = std::get_if<ConditionalNode>(&*tracked)) {
+        const auto& conditional_code = conditional->GetCode();
+        return TrackSampler(tracked, conditional_code, static_cast<s64>(conditional_code.size()));
+    }
+    return {};
+}
+
 std::tuple<Node, u32, u32> ShaderIR::TrackCbuf(Node tracked, const NodeBlock& code,
                                                s64 cursor) const {
     if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) {

From f4603d23c551ece65cd205a850a31a84531daf43 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Sun, 5 Jan 2020 15:53:22 -0400
Subject: [PATCH 08/16] Shader_IR: Setup Indexed Samplers on the IR

---
 src/video_core/shader/decode/texture.cpp | 66 +++++++++++++++++-------
 1 file changed, 46 insertions(+), 20 deletions(-)

diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
index 0b567e39d..886650d9e 100644
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -389,31 +389,57 @@ const Sampler* ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler,
 const Sampler* ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg,
                                             std::optional<SamplerInfo> sampler_info) {
     const Node sampler_register = GetRegister(reg);
-    const auto [base_sampler, buffer, offset] =
-        TrackCbuf(sampler_register, global_code, static_cast<s64>(global_code.size()));
-    ASSERT(base_sampler != nullptr);
-    if (base_sampler == nullptr) {
+    const auto [base_node, tracked_sampler_info] =
+        TrackSampler(sampler_register, global_code, static_cast<s64>(global_code.size()));
+    ASSERT(base_node != nullptr);
+    if (base_node == nullptr) {
         return nullptr;
     }
 
-    const auto info = GetSamplerInfo(sampler_info, offset, buffer);
+    if (const auto bindless_sampler_info =
+            std::get_if<BindlessSamplerNode>(&*tracked_sampler_info)) {
+        const u32 buffer = bindless_sampler_info->GetIndex();
+        const u32 offset = bindless_sampler_info->GetOffset();
+        const auto info = GetSamplerInfo(sampler_info, offset, buffer);
 
-    // If this sampler has already been used, return the existing mapping.
-    const auto it =
-        std::find_if(used_samplers.begin(), used_samplers.end(),
-                     [buffer = buffer, offset = offset](const Sampler& entry) {
-                         return entry.GetBuffer() == buffer && entry.GetOffset() == offset;
-                     });
-    if (it != used_samplers.end()) {
-        ASSERT(it->IsBindless() && it->GetType() == info.type && it->IsArray() == info.is_array &&
-               it->IsShadow() == info.is_shadow);
-        return &*it;
+        // If this sampler has already been used, return the existing mapping.
+        const auto it =
+            std::find_if(used_samplers.begin(), used_samplers.end(),
+                         [buffer = buffer, offset = offset](const Sampler& entry) {
+                             return entry.GetBuffer() == buffer && entry.GetOffset() == offset;
+                         });
+        if (it != used_samplers.end()) {
+            ASSERT(it->IsBindless() && it->GetType() == info.type &&
+                   it->IsArray() == info.is_array && it->IsShadow() == info.is_shadow);
+            return &*it;
+        }
+
+        // Otherwise create a new mapping for this sampler
+        const auto next_index = static_cast<u32>(used_samplers.size());
+        return &used_samplers.emplace_back(next_index, offset, buffer, info.type, info.is_array,
+                                           info.is_shadow, info.is_buffer);
+    } else if (const auto array_sampler_info =
+                   std::get_if<ArraySamplerNode>(&*tracked_sampler_info)) {
+        const u32 base_offset = array_sampler_info->GetBaseOffset() / 4;
+        const auto info = GetSamplerInfo(sampler_info, base_offset);
+
+        // If this sampler has already been used, return the existing mapping.
+        const auto it = std::find_if(
+            used_samplers.begin(), used_samplers.end(),
+            [base_offset](const Sampler& entry) { return entry.GetOffset() == base_offset; });
+        if (it != used_samplers.end()) {
+            ASSERT(!it->IsBindless() && it->GetType() == info.type &&
+                   it->IsArray() == info.is_array && it->IsShadow() == info.is_shadow &&
+                   it->IsBuffer() == info.is_buffer);
+            return &*it;
+        }
+
+        // Otherwise create a new mapping for this sampler
+        const auto next_index = static_cast<u32>(used_samplers.size());
+        return &used_samplers.emplace_back(next_index, base_offset, info.type, info.is_array,
+                                           info.is_shadow, info.is_buffer);
     }
-
-    // Otherwise create a new mapping for this sampler
-    const auto next_index = static_cast<u32>(used_samplers.size());
-    return &used_samplers.emplace_back(next_index, offset, buffer, info.type, info.is_array,
-                                       info.is_shadow, info.is_buffer);
+    return nullptr;
 }
 
 void ShaderIR::WriteTexInstructionFloat(NodeBlock& bb, Instruction instr, const Node4& components) {

From 037ea431ceb93e93274fdcf9fb724819639d04fd Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Sun, 5 Jan 2020 18:36:21 -0400
Subject: [PATCH 09/16] Shader_IR: deduce size of indexed samplers

---
 src/video_core/shader/decode.cpp         | 36 ++++++++++++++++++++++++
 src/video_core/shader/decode/texture.cpp |  9 +++---
 src/video_core/shader/node.h             | 22 ++++++++++++---
 src/video_core/shader/shader_ir.h        |  1 +
 4 files changed, 60 insertions(+), 8 deletions(-)

diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp
index 507614d59..dd2f68a3e 100644
--- a/src/video_core/shader/decode.cpp
+++ b/src/video_core/shader/decode.cpp
@@ -56,6 +56,29 @@ void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile* gpu_driver,
     }
 }
 
+std::optional<u32> TryDeduceSamplerSize(Sampler& sampler_to_deduce,
+                                        VideoCore::GuestDriverProfile* gpu_driver,
+                                        std::list<Sampler>& used_samplers) {
+    if (gpu_driver == nullptr) {
+        LOG_CRITICAL(HW_GPU, "GPU Driver profile has not been created yet");
+        return std::nullopt;
+    }
+    const u32 base_offset = sampler_to_deduce.GetOffset();
+    u32 max_offset{UINT_MAX};
+    for (const auto& sampler : used_samplers) {
+        if (sampler.IsBindless()) {
+            continue;
+        }
+        if (sampler.GetOffset() > base_offset) {
+            max_offset = std::min(sampler.GetOffset(), max_offset);
+        }
+    }
+    if (max_offset == UINT_MAX) {
+        return std::nullopt;
+    }
+    return ((max_offset - base_offset) * 4) / gpu_driver->GetTextureHandlerSize();
+}
+
 } // Anonymous namespace
 
 class ASTDecoder {
@@ -342,6 +365,19 @@ void ShaderIR::PostDecode() {
     // Deduce texture handler size if needed
     auto gpu_driver = locker.AccessGuestDriverProfile();
     DeduceTextureHandlerSize(gpu_driver, used_samplers);
+    // Deduce Indexed Samplers
+    if (uses_indexed_samplers) {
+        for (auto& sampler : used_samplers) {
+            if (sampler.IsIndexed()) {
+                auto size = TryDeduceSamplerSize(sampler, gpu_driver, used_samplers);
+                if (size) {
+                    sampler.SetSize(*size);
+                } else {
+                    sampler.SetSize(1);
+                }
+            }
+        }
+    }
 }
 
 } // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
index 886650d9e..e7c38f5d6 100644
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -383,7 +383,7 @@ const Sampler* ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler,
     // Otherwise create a new mapping for this sampler
     const auto next_index = static_cast<u32>(used_samplers.size());
     return &used_samplers.emplace_back(next_index, offset, info.type, info.is_array, info.is_shadow,
-                                       info.is_buffer);
+                                       info.is_buffer, false);
 }
 
 const Sampler* ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg,
@@ -417,7 +417,7 @@ const Sampler* ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg,
         // Otherwise create a new mapping for this sampler
         const auto next_index = static_cast<u32>(used_samplers.size());
         return &used_samplers.emplace_back(next_index, offset, buffer, info.type, info.is_array,
-                                           info.is_shadow, info.is_buffer);
+                                           info.is_shadow, info.is_buffer, false);
     } else if (const auto array_sampler_info =
                    std::get_if<ArraySamplerNode>(&*tracked_sampler_info)) {
         const u32 base_offset = array_sampler_info->GetBaseOffset() / 4;
@@ -430,14 +430,15 @@ const Sampler* ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg,
         if (it != used_samplers.end()) {
             ASSERT(!it->IsBindless() && it->GetType() == info.type &&
                    it->IsArray() == info.is_array && it->IsShadow() == info.is_shadow &&
-                   it->IsBuffer() == info.is_buffer);
+                   it->IsBuffer() == info.is_buffer && it->IsIndexed());
             return &*it;
         }
 
+        uses_indexed_samplers = true;
         // Otherwise create a new mapping for this sampler
         const auto next_index = static_cast<u32>(used_samplers.size());
         return &used_samplers.emplace_back(next_index, base_offset, info.type, info.is_array,
-                                           info.is_shadow, info.is_buffer);
+                                           info.is_shadow, info.is_buffer, true);
     }
     return nullptr;
 }
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
index b370df8f9..2f29b9506 100644
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -240,15 +240,15 @@ class Sampler {
 public:
     /// This constructor is for bound samplers
     constexpr explicit Sampler(u32 index, u32 offset, Tegra::Shader::TextureType type,
-                               bool is_array, bool is_shadow, bool is_buffer)
+                               bool is_array, bool is_shadow, bool is_buffer, bool is_indexed)
         : index{index}, offset{offset}, type{type}, is_array{is_array}, is_shadow{is_shadow},
-          is_buffer{is_buffer} {}
+          is_buffer{is_buffer}, is_indexed{is_indexed} {}
 
     /// This constructor is for bindless samplers
     constexpr explicit Sampler(u32 index, u32 offset, u32 buffer, Tegra::Shader::TextureType type,
-                               bool is_array, bool is_shadow, bool is_buffer)
+                               bool is_array, bool is_shadow, bool is_buffer, bool is_indexed)
         : index{index}, offset{offset}, buffer{buffer}, type{type}, is_array{is_array},
-          is_shadow{is_shadow}, is_buffer{is_buffer}, is_bindless{true} {}
+          is_shadow{is_shadow}, is_buffer{is_buffer}, is_bindless{true}, is_indexed{is_indexed} {}
 
     constexpr u32 GetIndex() const {
         return index;
@@ -282,16 +282,30 @@ public:
         return is_bindless;
     }
 
+    constexpr bool IsIndexed() const {
+        return is_indexed;
+    }
+
+    constexpr u32 Size() const {
+        return size;
+    }
+
+    void SetSize(u32 new_size) {
+        size = new_size;
+    }
+
 private:
     u32 index{};  ///< Emulated index given for the this sampler.
     u32 offset{}; ///< Offset in the const buffer from where the sampler is being read.
     u32 buffer{}; ///< Buffer where the bindless sampler is being read (unused on bound samplers).
+    u32 size{};   ///< Size of the sampler if indexed.
 
     Tegra::Shader::TextureType type{}; ///< The type used to sample this texture (Texture2D, etc)
     bool is_array{};    ///< Whether the texture is being sampled as an array texture or not.
     bool is_shadow{};   ///< Whether the texture is being sampled as a depth texture or not.
     bool is_buffer{};   ///< Whether the texture is a texture buffer without sampler.
     bool is_bindless{}; ///< Whether this sampler belongs to a bindless texture or not.
+    bool is_indexed{};  ///< Whether this sampler is an indexed array of textures.
 };
 
 /// Represents a tracked bindless sampler into a direct const buffer
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index d85f14c97..121528346 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -435,6 +435,7 @@ private:
     bool uses_instance_id{};
     bool uses_vertex_id{};
     bool uses_warps{};
+    bool uses_indexed_samplers{};
 
     Tegra::Shader::Header header;
 };

From 2b02f29a2ddfe40639ea0f855bdf257beca59e65 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Mon, 6 Jan 2020 11:43:13 -0400
Subject: [PATCH 10/16] GL Backend: Introduce indexed samplers into the GL
 backend

---
 .../renderer_opengl/gl_rasterizer.cpp         | 34 +++++++++++++++----
 .../renderer_opengl/gl_shader_decompiler.cpp  | 15 ++++++--
 2 files changed, 39 insertions(+), 10 deletions(-)

diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index c428f06e4..362942e09 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -55,16 +55,20 @@ namespace {
 
 template <typename Engine, typename Entry>
 Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry& entry,
-                                               Tegra::Engines::ShaderType shader_type) {
+                                               Tegra::Engines::ShaderType shader_type,
+                                               std::size_t index = 0) {
     if (entry.IsBindless()) {
         const Tegra::Texture::TextureHandle tex_handle =
             engine.AccessConstBuffer32(shader_type, entry.GetBuffer(), entry.GetOffset());
         return engine.GetTextureInfo(tex_handle);
     }
+    const auto& gpu_profile = engine.AccessGuestDriverProfile();
+    const u32 offset =
+        entry.GetOffset() + static_cast<u32>(index * gpu_profile.GetTextureHandlerSize());
     if constexpr (std::is_same_v<Engine, Tegra::Engines::Maxwell3D>) {
-        return engine.GetStageTexture(shader_type, entry.GetOffset());
+        return engine.GetStageTexture(shader_type, offset);
     } else {
-        return engine.GetTexture(entry.GetOffset());
+        return engine.GetTexture(offset);
     }
 }
 
@@ -942,8 +946,15 @@ void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, const Shader&
     u32 binding = device.GetBaseBindings(stage_index).sampler;
     for (const auto& entry : shader->GetShaderEntries().samplers) {
         const auto shader_type = static_cast<Tegra::Engines::ShaderType>(stage_index);
-        const auto texture = GetTextureInfo(maxwell3d, entry, shader_type);
-        SetupTexture(binding++, texture, entry);
+        if (!entry.IsIndexed()) {
+            const auto texture = GetTextureInfo(maxwell3d, entry, shader_type);
+            SetupTexture(binding++, texture, entry);
+        } else {
+            for (std::size_t i = 0; i < entry.Size(); ++i) {
+                const auto texture = GetTextureInfo(maxwell3d, entry, shader_type, i);
+                SetupTexture(binding++, texture, entry);
+            }
+        }
     }
 }
 
@@ -952,8 +963,17 @@ void RasterizerOpenGL::SetupComputeTextures(const Shader& kernel) {
     const auto& compute = system.GPU().KeplerCompute();
     u32 binding = 0;
     for (const auto& entry : kernel->GetShaderEntries().samplers) {
-        const auto texture = GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute);
-        SetupTexture(binding++, texture, entry);
+        if (!entry.IsIndexed()) {
+            const auto texture =
+                GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute);
+            SetupTexture(binding++, texture, entry);
+        } else {
+            for (std::size_t i = 0; i < entry.Size(); ++i) {
+                const auto texture =
+                    GetTextureInfo(compute, entry, Tegra::Engines::ShaderType::Compute, i);
+                SetupTexture(binding++, texture, entry);
+            }
+        }
     }
 }
 
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 2996aaf08..4b35396f9 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -655,7 +655,8 @@ private:
         u32 binding = device.GetBaseBindings(stage).sampler;
         for (const auto& sampler : ir.GetSamplers()) {
             const std::string name = GetSampler(sampler);
-            const std::string description = fmt::format("layout (binding = {}) uniform", binding++);
+            const std::string description = fmt::format("layout (binding = {}) uniform", binding);
+            binding += sampler.IsIndexed() ? sampler.Size() : 1;
 
             std::string sampler_type = [&]() {
                 if (sampler.IsBuffer()) {
@@ -682,7 +683,11 @@ private:
                 sampler_type += "Shadow";
             }
 
-            code.AddLine("{} {} {};", description, sampler_type, name);
+            if (!sampler.IsIndexed()) {
+                code.AddLine("{} {} {};", description, sampler_type, name);
+            } else {
+                code.AddLine("{} {} {}[{}];", description, sampler_type, name, sampler.Size());
+            }
         }
         if (!ir.GetSamplers().empty()) {
             code.AddNewLine();
@@ -1099,7 +1104,11 @@ private:
         } else if (!meta->ptp.empty()) {
             expr += "Offsets";
         }
-        expr += '(' + GetSampler(meta->sampler) + ", ";
+        if (!meta->sampler.IsIndexed()) {
+            expr += '(' + GetSampler(meta->sampler) + ", ";
+        } else {
+            expr += '(' + GetSampler(meta->sampler) + "[0], ";
+        }
         expr += coord_constructors.at(count + (has_array ? 1 : 0) +
                                       (has_shadow && !separate_dc ? 1 : 0) - 1);
         expr += '(';

From 3c34678627eeb1b48375cf70ec38b72691fedd1e Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Tue, 7 Jan 2020 14:53:46 -0400
Subject: [PATCH 11/16] Shader_IR: Implement Injectable Custom Variables to the
 IR.

---
 .../renderer_opengl/gl_shader_decompiler.cpp  | 20 +++++++++++++++++++
 .../renderer_vulkan/vk_shader_decompiler.cpp  | 16 +++++++++++++++
 src/video_core/shader/node.h                  | 17 +++++++++++++++-
 src/video_core/shader/shader_ir.cpp           |  9 +++++++++
 src/video_core/shader/shader_ir.h             |  9 +++++++++
 5 files changed, 70 insertions(+), 1 deletion(-)

diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 4b35396f9..8b413ae9a 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -391,6 +391,7 @@ public:
         DeclareVertex();
         DeclareGeometry();
         DeclareRegisters();
+        DeclareCustomVariables();
         DeclarePredicates();
         DeclareLocalMemory();
         DeclareInternalFlags();
@@ -503,6 +504,16 @@ private:
         }
     }
 
+    void DeclareCustomVariables() {
+        const u32 cv_num = ir.GetCustomVariablesAmount();
+        for (u32 i = 0; i < cv_num; ++i) {
+            code.AddLine("float {} = 0.0f;", GetCustomVariable(i));
+        }
+        if (cv_num > 0) {
+            code.AddNewLine();
+        }
+    }
+
     void DeclarePredicates() {
         const auto& predicates = ir.GetPredicates();
         for (const auto pred : predicates) {
@@ -780,6 +791,11 @@ private:
             return {GetRegister(index), Type::Float};
         }
 
+        if (const auto cv = std::get_if<CustomVarNode>(&*node)) {
+            const u32 index = cv->GetIndex();
+            return {GetCustomVariable(index), Type::Float};
+        }
+
         if (const auto immediate = std::get_if<ImmediateNode>(&*node)) {
             const u32 value = immediate->GetValue();
             if (value < 10) {
@@ -2250,6 +2266,10 @@ private:
         return GetDeclarationWithSuffix(index, "gpr");
     }
 
+    std::string GetCustomVariable(u32 index) const {
+        return GetDeclarationWithSuffix(index, "custom_var");
+    }
+
     std::string GetPredicate(Tegra::Shader::Pred pred) const {
         return GetDeclarationWithSuffix(static_cast<u32>(pred), "pred");
     }
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index dd6d2ef03..bf797dad3 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -353,6 +353,7 @@ private:
         DeclareFragment();
         DeclareCompute();
         DeclareRegisters();
+        DeclareCustomVariables();
         DeclarePredicates();
         DeclareLocalMemory();
         DeclareSharedMemory();
@@ -587,6 +588,15 @@ private:
         }
     }
 
+    void DeclareCustomVariables() {
+        const u32 cv_num = ir.GetCustomVariablesAmount();
+        for (u32 i = 0; i < cv_num; ++i) {
+            const Id id = OpVariable(t_prv_float, spv::StorageClass::Private, v_float_zero);
+            Name(id, fmt::format("custom_var_{}", i));
+            custom_variables.emplace(i, AddGlobalVariable(id));
+        }
+    }
+
     void DeclarePredicates() {
         for (const auto pred : ir.GetPredicates()) {
             const Id id = OpVariable(t_prv_bool, spv::StorageClass::Private, v_false);
@@ -974,6 +984,11 @@ private:
             return {OpLoad(t_float, registers.at(index)), Type::Float};
         }
 
+        if (const auto cv = std::get_if<CustomVarNode>(&*node)) {
+            const u32 index = cv->GetIndex();
+            return {OpLoad(t_float, custom_variables.at(index)), Type::Float};
+        }
+
         if (const auto immediate = std::get_if<ImmediateNode>(&*node)) {
             return {Constant(t_uint, immediate->GetValue()), Type::Uint};
         }
@@ -2505,6 +2520,7 @@ private:
     Id out_vertex{};
     Id in_vertex{};
     std::map<u32, Id> registers;
+    std::map<u32, Id> custom_variables;
     std::map<Tegra::Shader::Pred, Id> predicates;
     std::map<u32, Id> flow_variables;
     Id local_memory{};
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
index 2f29b9506..db06767f6 100644
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -212,6 +212,7 @@ enum class MetaStackClass {
 class OperationNode;
 class ConditionalNode;
 class GprNode;
+class CustomVarNode;
 class ImmediateNode;
 class InternalFlagNode;
 class PredicateNode;
@@ -223,7 +224,7 @@ class SmemNode;
 class GmemNode;
 class CommentNode;
 
-using NodeData = std::variant<OperationNode, ConditionalNode, GprNode, ImmediateNode,
+using NodeData = std::variant<OperationNode, ConditionalNode, GprNode, CustomVarNode, ImmediateNode,
                               InternalFlagNode, PredicateNode, AbufNode, PatchNode, CbufNode,
                               LmemNode, SmemNode, GmemNode, CommentNode>;
 using Node = std::shared_ptr<NodeData>;
@@ -550,6 +551,20 @@ private:
     Tegra::Shader::Register index{};
 };
 
+/// A custom variable
+class CustomVarNode final {
+public:
+    explicit constexpr CustomVarNode(u32 index) : index{index} {}
+
+    u32 GetIndex() const {
+        return index;
+    }
+
+private:
+    u32 index{};
+};
+
+
 /// A 32-bits value that represents an immediate value
 class ImmediateNode final {
 public:
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp
index a186e22b2..94972d57f 100644
--- a/src/video_core/shader/shader_ir.cpp
+++ b/src/video_core/shader/shader_ir.cpp
@@ -39,6 +39,10 @@ Node ShaderIR::GetRegister(Register reg) {
     return MakeNode<GprNode>(reg);
 }
 
+Node ShaderIR::GetCustomVariable(u32 id) {
+    return MakeNode<CustomVarNode>(id);
+}
+
 Node ShaderIR::GetImmediate19(Instruction instr) {
     return Immediate(instr.alu.GetImm20_19());
 }
@@ -453,4 +457,9 @@ std::size_t ShaderIR::DeclareAmend(Node new_amend) {
     return id;
 }
 
+u32 ShaderIR::NewCustomVariable() {
+    const u32 id = num_custom_variables++;
+    return id;
+}
+
 } // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 121528346..2fe14e815 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -180,6 +180,10 @@ public:
         return amend_code[index];
     }
 
+    u32 GetCustomVariablesAmount() const {
+        return num_custom_variables;
+    }
+
 private:
     friend class ASTDecoder;
 
@@ -236,6 +240,8 @@ private:
 
     /// Generates a node for a passed register.
     Node GetRegister(Tegra::Shader::Register reg);
+    /// Generates a node for a custom variable
+    Node GetCustomVariable(u32 id);
     /// Generates a node representing a 19-bit immediate value
     Node GetImmediate19(Tegra::Shader::Instruction instr);
     /// Generates a node representing a 32-bit immediate value
@@ -403,6 +409,8 @@ private:
     /// Register new amending code and obtain the reference id.
     std::size_t DeclareAmend(Node new_amend);
 
+    u32 NewCustomVariable();
+
     const ProgramCode& program_code;
     const u32 main_offset;
     const CompilerSettings settings;
@@ -418,6 +426,7 @@ private:
     NodeBlock global_code;
     ASTManager program_manager{true, true};
     std::vector<Node> amend_code;
+    u32 num_custom_variables{};
 
     std::set<u32> used_registers;
     std::set<Tegra::Shader::Pred> used_predicates;

From 7c530e06661d760eb6366724d109468423363072 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Tue, 7 Jan 2020 17:45:12 -0400
Subject: [PATCH 12/16] Shader_IR: Propagate bindless index into the GL
 compiler.

---
 .../renderer_opengl/gl_shader_decompiler.cpp  |  2 +-
 src/video_core/shader/decode/texture.cpp      | 40 +++++++++++--------
 src/video_core/shader/node.h                  |  2 +-
 src/video_core/shader/shader_ir.h             |  5 +--
 src/video_core/shader/track.cpp               | 29 ++++++++++++--
 5 files changed, 54 insertions(+), 24 deletions(-)

diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 8b413ae9a..df681bdcb 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -1123,7 +1123,7 @@ private:
         if (!meta->sampler.IsIndexed()) {
             expr += '(' + GetSampler(meta->sampler) + ", ";
         } else {
-            expr += '(' + GetSampler(meta->sampler) + "[0], ";
+            expr += '(' + GetSampler(meta->sampler) + '[' + Visit(meta->index).AsUint() + "], ";
         }
         expr += coord_constructors.at(count + (has_array ? 1 : 0) +
                                       (has_shadow && !separate_dc ? 1 : 0) - 1);
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
index e7c38f5d6..31b09b18c 100644
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -144,7 +144,8 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
         Node4 values;
         for (u32 element = 0; element < values.size(); ++element) {
             auto coords_copy = coords;
-            MetaTexture meta{sampler, {}, depth_compare, aoffi, {}, {}, {}, {}, component, element};
+            MetaTexture meta{sampler, {}, depth_compare, aoffi,   {}, {},
+                             {},      {}, component,     element, {}};
             values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
         }
 
@@ -167,9 +168,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
         const auto derivate_reg = instr.gpr20.Value();
         const auto texture_type = instr.txd.texture_type.Value();
         const auto coord_count = GetCoordCount(texture_type);
-
+        Node index_var{};
         const Sampler* sampler =
-            is_bindless ? GetBindlessSampler(base_reg, {{texture_type, is_array, false}})
+            is_bindless ? GetBindlessSampler(base_reg, index_var, {{texture_type, is_array, false}})
                         : GetSampler(instr.sampler, {{texture_type, is_array, false}});
         Node4 values;
         if (sampler == nullptr) {
@@ -200,7 +201,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
         }
 
         for (u32 element = 0; element < values.size(); ++element) {
-            MetaTexture meta{*sampler, array_node, {}, {}, {}, derivates, {}, {}, {}, element};
+            MetaTexture meta{*sampler, array_node, {}, {}, {}, derivates, {}, {}, {}, element, index_var};
             values[element] = Operation(OperationCode::TextureGradient, std::move(meta), coords);
         }
 
@@ -215,8 +216,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
         // TODO: The new commits on the texture refactor, change the way samplers work.
         // Sadly, not all texture instructions specify the type of texture their sampler
         // uses. This must be fixed at a later instance.
+        Node index_var{};
         const Sampler* sampler =
-            is_bindless ? GetBindlessSampler(instr.gpr8) : GetSampler(instr.sampler);
+            is_bindless ? GetBindlessSampler(instr.gpr8, index_var) : GetSampler(instr.sampler);
 
         if (sampler == nullptr) {
             u32 indexer = 0;
@@ -240,7 +242,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
                 if (!instr.txq.IsComponentEnabled(element)) {
                     continue;
                 }
-                MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element};
+                MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element, index_var};
                 const Node value =
                     Operation(OperationCode::TextureQueryDimensions, meta,
                               GetRegister(instr.gpr8.Value() + (is_bindless ? 1 : 0)));
@@ -266,8 +268,9 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
 
         auto texture_type = instr.tmml.texture_type.Value();
         const bool is_array = instr.tmml.array != 0;
+        Node index_var{};
         const Sampler* sampler =
-            is_bindless ? GetBindlessSampler(instr.gpr20) : GetSampler(instr.sampler);
+            is_bindless ? GetBindlessSampler(instr.gpr20, index_var) : GetSampler(instr.sampler);
 
         if (sampler == nullptr) {
             u32 indexer = 0;
@@ -309,7 +312,7 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
                 continue;
             }
             auto params = coords;
-            MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element};
+            MetaTexture meta{*sampler, {}, {}, {}, {}, {}, {}, {}, {}, element, index_var};
             const Node value = Operation(OperationCode::TextureQueryLod, meta, std::move(params));
             SetTemporary(bb, indexer++, value);
         }
@@ -386,7 +389,7 @@ const Sampler* ShaderIR::GetSampler(const Tegra::Shader::Sampler& sampler,
                                        info.is_buffer, false);
 }
 
-const Sampler* ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg,
+const Sampler* ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg, Node& index_var,
                                             std::optional<SamplerInfo> sampler_info) {
     const Node sampler_register = GetRegister(reg);
     const auto [base_node, tracked_sampler_info] =
@@ -421,6 +424,7 @@ const Sampler* ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg,
     } else if (const auto array_sampler_info =
                    std::get_if<ArraySamplerNode>(&*tracked_sampler_info)) {
         const u32 base_offset = array_sampler_info->GetBaseOffset() / 4;
+        index_var = GetCustomVariable(array_sampler_info->GetIndexVar());
         const auto info = GetSamplerInfo(sampler_info, base_offset);
 
         // If this sampler has already been used, return the existing mapping.
@@ -526,8 +530,9 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
                          "This method is not supported.");
 
     const SamplerInfo info{texture_type, is_array, is_shadow, false};
-    const Sampler* sampler =
-        is_bindless ? GetBindlessSampler(*bindless_reg, info) : GetSampler(instr.sampler, info);
+    Node index_var{};
+    const Sampler* sampler = is_bindless ? GetBindlessSampler(*bindless_reg, index_var, info)
+                                         : GetSampler(instr.sampler, info);
     Node4 values;
     if (sampler == nullptr) {
         for (u32 element = 0; element < values.size(); ++element) {
@@ -575,7 +580,8 @@ Node4 ShaderIR::GetTextureCode(Instruction instr, TextureType texture_type,
 
     for (u32 element = 0; element < values.size(); ++element) {
         auto copy_coords = coords;
-        MetaTexture meta{*sampler, array, depth_compare, aoffi, {}, {}, bias, lod, {}, element};
+        MetaTexture meta{*sampler, array, depth_compare, aoffi,    {}, {}, bias,
+                         lod,      {},    element,       index_var};
         values[element] = Operation(read_method, meta, std::move(copy_coords));
     }
 
@@ -690,7 +696,8 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de
     u64 parameter_register = instr.gpr20.Value();
 
     const SamplerInfo info{texture_type, is_array, depth_compare, false};
-    const Sampler* sampler = is_bindless ? GetBindlessSampler(parameter_register++, info)
+    Node index_var{};
+    const Sampler* sampler = is_bindless ? GetBindlessSampler(parameter_register++, index_var, info)
                                          : GetSampler(instr.sampler, info);
     Node4 values;
     if (sampler == nullptr) {
@@ -719,7 +726,8 @@ Node4 ShaderIR::GetTld4Code(Instruction instr, TextureType texture_type, bool de
     for (u32 element = 0; element < values.size(); ++element) {
         auto coords_copy = coords;
         MetaTexture meta{
-            *sampler, GetRegister(array_register), dc, aoffi, ptp, {}, {}, {}, component, element};
+            *sampler, GetRegister(array_register), dc, aoffi, ptp, {}, {}, {}, component, element,
+            index_var};
         values[element] = Operation(OperationCode::TextureGather, meta, std::move(coords_copy));
     }
 
@@ -752,7 +760,7 @@ Node4 ShaderIR::GetTldCode(Tegra::Shader::Instruction instr) {
     Node4 values;
     for (u32 element = 0; element < values.size(); ++element) {
         auto coords_copy = coords;
-        MetaTexture meta{sampler, array_register, {}, {}, {}, {}, {}, lod, {}, element};
+        MetaTexture meta{sampler, array_register, {}, {}, {}, {}, {}, lod, {}, element, {}};
         values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
     }
 
@@ -802,7 +810,7 @@ Node4 ShaderIR::GetTldsCode(Instruction instr, TextureType texture_type, bool is
     Node4 values;
     for (u32 element = 0; element < values.size(); ++element) {
         auto coords_copy = coords;
-        MetaTexture meta{sampler, array, {}, {}, {}, {}, {}, lod, {}, element};
+        MetaTexture meta{sampler, array, {}, {}, {}, {}, {}, lod, {}, element, {}};
         values[element] = Operation(OperationCode::TexelFetch, meta, std::move(coords_copy));
     }
     return values;
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
index db06767f6..d75453458 100644
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -445,6 +445,7 @@ struct MetaTexture {
     Node lod;
     Node component{};
     u32 element{};
+    Node index{};
 };
 
 struct MetaImage {
@@ -564,7 +565,6 @@ private:
     u32 index{};
 };
 
-
 /// A 32-bits value that represents an immediate value
 class ImmediateNode final {
 public:
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 2fe14e815..0421dac0c 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -328,7 +328,7 @@ private:
                               std::optional<SamplerInfo> sampler_info = std::nullopt);
 
     /// Accesses a texture sampler for a bindless texture.
-    const Sampler* GetBindlessSampler(Tegra::Shader::Register reg,
+    const Sampler* GetBindlessSampler(Tegra::Shader::Register reg, Node& index_var,
                                       std::optional<SamplerInfo> sampler_info = std::nullopt);
 
     /// Accesses an image.
@@ -394,8 +394,7 @@ private:
 
     std::tuple<Node, u32, u32> TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const;
 
-    std::tuple<Node, TrackSampler> TrackSampler(Node tracked, const NodeBlock& code,
-                                                s64 cursor) const;
+    std::tuple<Node, TrackSampler> TrackSampler(Node tracked, const NodeBlock& code, s64 cursor);
 
     std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const;
 
diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp
index 69a677394..d449b625e 100644
--- a/src/video_core/shader/track.cpp
+++ b/src/video_core/shader/track.cpp
@@ -61,8 +61,19 @@ std::optional<std::pair<Node, Node>> DecoupleIndirectRead(const OperationNode& o
     return std::nullopt;
 }
 
+bool AmendNodeCv(std::size_t amend_index, Node node) {
+    if (const auto operation = std::get_if<OperationNode>(&*node)) {
+        operation->SetAmendIndex(amend_index);
+        return true;
+    } else if (const auto conditional = std::get_if<ConditionalNode>(&*node)) {
+        conditional->SetAmendIndex(amend_index);
+        return true;
+    }
+    return false;
+}
+
 std::tuple<Node, TrackSampler> ShaderIR::TrackSampler(Node tracked, const NodeBlock& code,
-                                                      s64 cursor) const {
+                                                      s64 cursor) {
     if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) {
         // Constant buffer found, test if it's an immediate
         const auto offset = cbuf->GetOffset();
@@ -84,9 +95,21 @@ std::tuple<Node, TrackSampler> ShaderIR::TrackSampler(Node tracked, const NodeBl
             }
             auto [gpr, base_offset] = *pair;
             const auto offset_inm = std::get_if<ImmediateNode>(&*base_offset);
+            auto gpu_driver = locker.AccessGuestDriverProfile();
+            if (gpu_driver == nullptr) {
+                return {};
+            }
+            const u32 bindless_cv = NewCustomVariable();
+            const Node op = Operation(OperationCode::UDiv, NO_PRECISE, gpr,
+                                      Immediate(gpu_driver->GetTextureHandlerSize()));
+
+            const Node cv_node = GetCustomVariable(bindless_cv);
+            Node amend_op = Operation(OperationCode::Assign, cv_node, std::move(op));
+            const std::size_t amend_index = DeclareAmend(amend_op);
+            AmendNodeCv(amend_index, code[cursor]);
             // TODO Implement Bindless Index custom variable
-            auto track =
-                MakeTrackSampler<ArraySamplerNode>(cbuf->GetIndex(), offset_inm->GetValue(), 0);
+            auto track = MakeTrackSampler<ArraySamplerNode>(cbuf->GetIndex(),
+                                                            offset_inm->GetValue(), bindless_cv);
             return {tracked, track};
         }
         return {};

From 37b8504faaeca9aaffd67649f5a026a900743431 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Tue, 7 Jan 2020 18:56:03 -0400
Subject: [PATCH 13/16] Shader_IR: Correct Custom Variable assignment.

---
 src/video_core/renderer_opengl/gl_shader_decompiler.cpp | 2 ++
 src/video_core/renderer_vulkan/vk_shader_decompiler.cpp | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index df681bdcb..2f2bb07a4 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -1336,6 +1336,8 @@ private:
             const std::string final_offset = fmt::format("({} - {}) >> 2", real, base);
             target = {fmt::format("{}[{}]", GetGlobalMemory(gmem->GetDescriptor()), final_offset),
                       Type::Uint};
+        } else if (const auto cv = std::get_if<CustomVarNode>(&*dest)) {
+            target = {GetCustomVariable(cv->GetIndex()), Type::Float};
         } else {
             UNREACHABLE_MSG("Assign called without a proper target");
         }
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index bf797dad3..130060369 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -1361,6 +1361,8 @@ private:
             target = {OpAccessChain(t_gmem_float, gmem_buffer, Constant(t_uint, 0), offset),
                       Type::Float};
 
+        } else if (const auto cv = std::get_if<CustomVarNode>(&*dest)) {
+            target = {custom_variables.at(cv->GetIndex()), Type::Float};
         } else {
             UNIMPLEMENTED();
         }

From 3919b7b8a935174c91927bc0a312cbfee2971583 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Wed, 8 Jan 2020 12:13:05 -0400
Subject: [PATCH 14/16] Shader_IR: Corrections, styling and extras.

---
 src/video_core/shader/decode.cpp | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp
index dd2f68a3e..d4a10eee5 100644
--- a/src/video_core/shader/decode.cpp
+++ b/src/video_core/shader/decode.cpp
@@ -3,6 +3,7 @@
 // Refer to the license.txt file included.
 
 #include <cstring>
+#include <limits>
 #include <set>
 
 #include <fmt/format.h>
@@ -64,7 +65,7 @@ std::optional<u32> TryDeduceSamplerSize(Sampler& sampler_to_deduce,
         return std::nullopt;
     }
     const u32 base_offset = sampler_to_deduce.GetOffset();
-    u32 max_offset{UINT_MAX};
+    u32 max_offset{std::numeric_limits<u32>::max()};
     for (const auto& sampler : used_samplers) {
         if (sampler.IsBindless()) {
             continue;
@@ -73,7 +74,7 @@ std::optional<u32> TryDeduceSamplerSize(Sampler& sampler_to_deduce,
             max_offset = std::min(sampler.GetOffset(), max_offset);
         }
     }
-    if (max_offset == UINT_MAX) {
+    if (max_offset == std::numeric_limits<u32>::max()) {
         return std::nullopt;
     }
     return ((max_offset - base_offset) * 4) / gpu_driver->GetTextureHandlerSize();
@@ -373,6 +374,7 @@ void ShaderIR::PostDecode() {
                 if (size) {
                     sampler.SetSize(*size);
                 } else {
+                    LOG_CRITICAL(HW_GPU, "Failed to deduce size of indexed sampler");
                     sampler.SetSize(1);
                 }
             }

From 806f5691430b86640d64d4c5ae77c5e1dac1625a Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Wed, 8 Jan 2020 15:59:21 -0400
Subject: [PATCH 15/16] Shader_IR: Change name of TrackSampler function so it
 does not confuse with the type.

---
 src/video_core/shader/decode/texture.cpp |  2 +-
 src/video_core/shader/shader_ir.h        |  3 ++-
 src/video_core/shader/track.cpp          | 12 +++++++-----
 3 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
index 31b09b18c..6da9668fe 100644
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -393,7 +393,7 @@ const Sampler* ShaderIR::GetBindlessSampler(Tegra::Shader::Register reg, Node& i
                                             std::optional<SamplerInfo> sampler_info) {
     const Node sampler_register = GetRegister(reg);
     const auto [base_node, tracked_sampler_info] =
-        TrackSampler(sampler_register, global_code, static_cast<s64>(global_code.size()));
+        TrackBindlessSampler(sampler_register, global_code, static_cast<s64>(global_code.size()));
     ASSERT(base_node != nullptr);
     if (base_node == nullptr) {
         return nullptr;
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 0421dac0c..43672b41c 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -394,7 +394,8 @@ private:
 
     std::tuple<Node, u32, u32> TrackCbuf(Node tracked, const NodeBlock& code, s64 cursor) const;
 
-    std::tuple<Node, TrackSampler> TrackSampler(Node tracked, const NodeBlock& code, s64 cursor);
+    std::tuple<Node, TrackSampler> TrackBindlessSampler(Node tracked, const NodeBlock& code,
+                                                        s64 cursor);
 
     std::optional<u32> TrackImmediate(Node tracked, const NodeBlock& code, s64 cursor) const;
 
diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp
index d449b625e..4db721f69 100644
--- a/src/video_core/shader/track.cpp
+++ b/src/video_core/shader/track.cpp
@@ -72,8 +72,8 @@ bool AmendNodeCv(std::size_t amend_index, Node node) {
     return false;
 }
 
-std::tuple<Node, TrackSampler> ShaderIR::TrackSampler(Node tracked, const NodeBlock& code,
-                                                      s64 cursor) {
+std::tuple<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, const NodeBlock& code,
+                                                              s64 cursor) {
     if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) {
         // Constant buffer found, test if it's an immediate
         const auto offset = cbuf->GetOffset();
@@ -124,11 +124,12 @@ std::tuple<Node, TrackSampler> ShaderIR::TrackSampler(Node tracked, const NodeBl
         if (!source) {
             return {};
         }
-        return TrackSampler(source, code, new_cursor);
+        return TrackBindlessSampler(source, code, new_cursor);
     }
     if (const auto operation = std::get_if<OperationNode>(&*tracked)) {
         for (std::size_t i = operation->GetOperandsCount(); i > 0; --i) {
-            if (auto found = TrackSampler((*operation)[i - 1], code, cursor); std::get<0>(found)) {
+            if (auto found = TrackBindlessSampler((*operation)[i - 1], code, cursor);
+                std::get<0>(found)) {
                 // Cbuf found in operand.
                 return found;
             }
@@ -137,7 +138,8 @@ std::tuple<Node, TrackSampler> ShaderIR::TrackSampler(Node tracked, const NodeBl
     }
     if (const auto conditional = std::get_if<ConditionalNode>(&*tracked)) {
         const auto& conditional_code = conditional->GetCode();
-        return TrackSampler(tracked, conditional_code, static_cast<s64>(conditional_code.size()));
+        return TrackBindlessSampler(tracked, conditional_code,
+                                    static_cast<s64>(conditional_code.size()));
     }
     return {};
 }

From bb8eb15d392d69693f8cda0427669d011e23db97 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Fri, 24 Jan 2020 10:44:34 -0400
Subject: [PATCH 16/16] Shader_IR: Address feedback.

---
 src/video_core/guest_driver.h                 |  1 +
 .../renderer_opengl/gl_shader_decompiler.cpp  |  6 ++--
 .../renderer_vulkan/vk_shader_decompiler.cpp  |  5 +--
 src/video_core/shader/const_buffer_locker.h   |  2 ++
 src/video_core/shader/decode.cpp              | 31 ++++++++++---------
 src/video_core/shader/decode/texture.cpp      |  3 +-
 src/video_core/shader/node.h                  | 14 ++++-----
 src/video_core/shader/shader_ir.cpp           |  3 +-
 src/video_core/shader/shader_ir.h             |  2 +-
 src/video_core/shader/track.cpp               |  9 +++---
 10 files changed, 40 insertions(+), 36 deletions(-)

diff --git a/src/video_core/guest_driver.h b/src/video_core/guest_driver.h
index 0a9a826b6..fc1917347 100644
--- a/src/video_core/guest_driver.h
+++ b/src/video_core/guest_driver.h
@@ -33,6 +33,7 @@ private:
     // This goes with Vulkan and OpenGL standards but Nvidia GPUs can easily
     // use 4 bytes instead. Thus, certain drivers may squish the size.
     static constexpr u32 default_texture_handler_size = 8;
+
     u32 texture_handler_size = default_texture_handler_size;
     bool texture_handler_size_deduced = false;
 };
diff --git a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
index 2f2bb07a4..cb1a5f35c 100644
--- a/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_decompiler.cpp
@@ -505,11 +505,11 @@ private:
     }
 
     void DeclareCustomVariables() {
-        const u32 cv_num = ir.GetCustomVariablesAmount();
-        for (u32 i = 0; i < cv_num; ++i) {
+        const u32 num_custom_variables = ir.GetNumCustomVariables();
+        for (u32 i = 0; i < num_custom_variables; ++i) {
             code.AddLine("float {} = 0.0f;", GetCustomVariable(i));
         }
-        if (cv_num > 0) {
+        if (num_custom_variables > 0) {
             code.AddNewLine();
         }
     }
diff --git a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
index 130060369..36d928fab 100644
--- a/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
+++ b/src/video_core/renderer_vulkan/vk_shader_decompiler.cpp
@@ -589,8 +589,8 @@ private:
     }
 
     void DeclareCustomVariables() {
-        const u32 cv_num = ir.GetCustomVariablesAmount();
-        for (u32 i = 0; i < cv_num; ++i) {
+        const u32 num_custom_variables = ir.GetNumCustomVariables();
+        for (u32 i = 0; i < num_custom_variables; ++i) {
             const Id id = OpVariable(t_prv_float, spv::StorageClass::Private, v_float_zero);
             Name(id, fmt::format("custom_var_{}", i));
             custom_variables.emplace(i, AddGlobalVariable(id));
@@ -1363,6 +1363,7 @@ private:
 
         } else if (const auto cv = std::get_if<CustomVarNode>(&*dest)) {
             target = {custom_variables.at(cv->GetIndex()), Type::Float};
+
         } else {
             UNIMPLEMENTED();
         }
diff --git a/src/video_core/shader/const_buffer_locker.h b/src/video_core/shader/const_buffer_locker.h
index fd1bb476a..d3ea11087 100644
--- a/src/video_core/shader/const_buffer_locker.h
+++ b/src/video_core/shader/const_buffer_locker.h
@@ -77,10 +77,12 @@ public:
         return bindless_samplers;
     }
 
+    /// Gets bound buffer used on this shader
     u32 GetBoundBuffer() const {
         return bound_buffer;
     }
 
+    /// Obtains access to the guest driver's profile.
     VideoCore::GuestDriverProfile* AccessGuestDriverProfile() const {
         if (engine) {
             return &engine->AccessGuestDriverProfile();
diff --git a/src/video_core/shader/decode.cpp b/src/video_core/shader/decode.cpp
index d4a10eee5..6b697ed5d 100644
--- a/src/video_core/shader/decode.cpp
+++ b/src/video_core/shader/decode.cpp
@@ -35,9 +35,9 @@ constexpr bool IsSchedInstruction(u32 offset, u32 main_offset) {
 }
 
 void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile* gpu_driver,
-                              std::list<Sampler>& used_samplers) {
+                              const std::list<Sampler>& used_samplers) {
     if (gpu_driver == nullptr) {
-        LOG_CRITICAL(HW_GPU, "GPU Driver profile has not been created yet");
+        LOG_CRITICAL(HW_GPU, "GPU driver profile has not been created yet");
         return;
     }
     if (gpu_driver->TextureHandlerSizeKnown() || used_samplers.size() <= 1) {
@@ -57,9 +57,9 @@ void DeduceTextureHandlerSize(VideoCore::GuestDriverProfile* gpu_driver,
     }
 }
 
-std::optional<u32> TryDeduceSamplerSize(Sampler& sampler_to_deduce,
+std::optional<u32> TryDeduceSamplerSize(const Sampler& sampler_to_deduce,
                                         VideoCore::GuestDriverProfile* gpu_driver,
-                                        std::list<Sampler>& used_samplers) {
+                                        const std::list<Sampler>& used_samplers) {
     if (gpu_driver == nullptr) {
         LOG_CRITICAL(HW_GPU, "GPU Driver profile has not been created yet");
         return std::nullopt;
@@ -367,17 +367,18 @@ void ShaderIR::PostDecode() {
     auto gpu_driver = locker.AccessGuestDriverProfile();
     DeduceTextureHandlerSize(gpu_driver, used_samplers);
     // Deduce Indexed Samplers
-    if (uses_indexed_samplers) {
-        for (auto& sampler : used_samplers) {
-            if (sampler.IsIndexed()) {
-                auto size = TryDeduceSamplerSize(sampler, gpu_driver, used_samplers);
-                if (size) {
-                    sampler.SetSize(*size);
-                } else {
-                    LOG_CRITICAL(HW_GPU, "Failed to deduce size of indexed sampler");
-                    sampler.SetSize(1);
-                }
-            }
+    if (!uses_indexed_samplers) {
+        return;
+    }
+    for (auto& sampler : used_samplers) {
+        if (!sampler.IsIndexed()) {
+            continue;
+        }
+        if (const auto size = TryDeduceSamplerSize(sampler, gpu_driver, used_samplers)) {
+            sampler.SetSize(*size);
+        } else {
+            LOG_CRITICAL(HW_GPU, "Failed to deduce size of indexed sampler");
+            sampler.SetSize(1);
         }
     }
 }
diff --git a/src/video_core/shader/decode/texture.cpp b/src/video_core/shader/decode/texture.cpp
index 6da9668fe..d980535b1 100644
--- a/src/video_core/shader/decode/texture.cpp
+++ b/src/video_core/shader/decode/texture.cpp
@@ -201,7 +201,8 @@ u32 ShaderIR::DecodeTexture(NodeBlock& bb, u32 pc) {
         }
 
         for (u32 element = 0; element < values.size(); ++element) {
-            MetaTexture meta{*sampler, array_node, {}, {}, {}, derivates, {}, {}, {}, element, index_var};
+            MetaTexture meta{*sampler, array_node, {}, {},      {},       derivates,
+                             {},       {},         {}, element, index_var};
             values[element] = Operation(OperationCode::TextureGradient, std::move(meta), coords);
         }
 
diff --git a/src/video_core/shader/node.h b/src/video_core/shader/node.h
index d75453458..53a551d27 100644
--- a/src/video_core/shader/node.h
+++ b/src/video_core/shader/node.h
@@ -291,7 +291,7 @@ public:
         return size;
     }
 
-    void SetSize(u32 new_size) {
+    constexpr void SetSize(u32 new_size) {
         size = new_size;
     }
 
@@ -315,15 +315,15 @@ public:
     explicit ArraySamplerNode(u32 index, u32 base_offset, u32 bindless_var)
         : index{index}, base_offset{base_offset}, bindless_var{bindless_var} {}
 
-    u32 GetIndex() const {
+    constexpr u32 GetIndex() const {
         return index;
     }
 
-    u32 GetBaseOffset() const {
+    constexpr u32 GetBaseOffset() const {
         return base_offset;
     }
 
-    u32 GetIndexVar() const {
+    constexpr u32 GetIndexVar() const {
         return bindless_var;
     }
 
@@ -338,11 +338,11 @@ class BindlessSamplerNode final {
 public:
     explicit BindlessSamplerNode(u32 index, u32 offset) : index{index}, offset{offset} {}
 
-    u32 GetIndex() const {
+    constexpr u32 GetIndex() const {
         return index;
     }
 
-    u32 GetOffset() const {
+    constexpr u32 GetOffset() const {
         return offset;
     }
 
@@ -557,7 +557,7 @@ class CustomVarNode final {
 public:
     explicit constexpr CustomVarNode(u32 index) : index{index} {}
 
-    u32 GetIndex() const {
+    constexpr u32 GetIndex() const {
         return index;
     }
 
diff --git a/src/video_core/shader/shader_ir.cpp b/src/video_core/shader/shader_ir.cpp
index 94972d57f..3a5d280a9 100644
--- a/src/video_core/shader/shader_ir.cpp
+++ b/src/video_core/shader/shader_ir.cpp
@@ -458,8 +458,7 @@ std::size_t ShaderIR::DeclareAmend(Node new_amend) {
 }
 
 u32 ShaderIR::NewCustomVariable() {
-    const u32 id = num_custom_variables++;
-    return id;
+    return num_custom_variables++;
 }
 
 } // namespace VideoCommon::Shader
diff --git a/src/video_core/shader/shader_ir.h b/src/video_core/shader/shader_ir.h
index 43672b41c..b0851c3be 100644
--- a/src/video_core/shader/shader_ir.h
+++ b/src/video_core/shader/shader_ir.h
@@ -180,7 +180,7 @@ public:
         return amend_code[index];
     }
 
-    u32 GetCustomVariablesAmount() const {
+    u32 GetNumCustomVariables() const {
         return num_custom_variables;
     }
 
diff --git a/src/video_core/shader/track.cpp b/src/video_core/shader/track.cpp
index 4db721f69..ea39bca54 100644
--- a/src/video_core/shader/track.cpp
+++ b/src/video_core/shader/track.cpp
@@ -36,7 +36,6 @@ std::pair<Node, s64> FindOperation(const NodeBlock& code, s64 cursor,
     }
     return {};
 }
-} // Anonymous namespace
 
 std::optional<std::pair<Node, Node>> DecoupleIndirectRead(const OperationNode& operation) {
     if (operation.GetCode() != OperationCode::UAdd) {
@@ -44,9 +43,7 @@ std::optional<std::pair<Node, Node>> DecoupleIndirectRead(const OperationNode& o
     }
     Node gpr{};
     Node offset{};
-    if (operation.GetOperandsCount() != 2) {
-        return std::nullopt;
-    }
+    ASSERT(operation.GetOperandsCount() == 2);
     for (std::size_t i = 0; i < operation.GetOperandsCount(); i++) {
         Node operand = operation[i];
         if (std::holds_alternative<ImmediateNode>(*operand)) {
@@ -56,7 +53,7 @@ std::optional<std::pair<Node, Node>> DecoupleIndirectRead(const OperationNode& o
         }
     }
     if (offset && gpr) {
-        return {std::make_pair(gpr, offset)};
+        return std::make_pair(gpr, offset);
     }
     return std::nullopt;
 }
@@ -72,6 +69,8 @@ bool AmendNodeCv(std::size_t amend_index, Node node) {
     return false;
 }
 
+} // Anonymous namespace
+
 std::tuple<Node, TrackSampler> ShaderIR::TrackBindlessSampler(Node tracked, const NodeBlock& code,
                                                               s64 cursor) {
     if (const auto cbuf = std::get_if<CbufNode>(&*tracked)) {