From f2e7b29c14e0207e0476299d69dac7ed4c213c74 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Wed, 10 Jul 2019 15:38:31 -0400
Subject: [PATCH 01/10] Maxwell3D: Rework the dirty system to be more
 consistant and scaleable

---
 src/video_core/dma_pusher.cpp                 |   2 +-
 src/video_core/engines/kepler_compute.cpp     |   2 +-
 src/video_core/engines/kepler_memory.cpp      |   2 +-
 src/video_core/engines/maxwell_3d.cpp         | 136 ++++++++++++------
 src/video_core/engines/maxwell_3d.h           |  74 ++++++++--
 src/video_core/engines/maxwell_dma.cpp        |   2 +-
 .../renderer_opengl/gl_rasterizer.cpp         |  53 +++++--
 .../renderer_opengl/gl_rasterizer.h           |   1 +
 .../renderer_opengl/gl_shader_cache.cpp       |   2 +-
 src/video_core/texture_cache/texture_cache.h  |  17 ++-
 10 files changed, 211 insertions(+), 80 deletions(-)

diff --git a/src/video_core/dma_pusher.cpp b/src/video_core/dma_pusher.cpp
index 3175579cc..bd036cbe8 100644
--- a/src/video_core/dma_pusher.cpp
+++ b/src/video_core/dma_pusher.cpp
@@ -22,7 +22,7 @@ void DmaPusher::DispatchCalls() {
     MICROPROFILE_SCOPE(DispatchCalls);
 
     // On entering GPU code, assume all memory may be touched by the ARM core.
-    gpu.Maxwell3D().dirty_flags.OnMemoryWrite();
+    gpu.Maxwell3D().dirty.OnMemoryWrite();
 
     dma_pushbuffer_subindex = 0;
 
diff --git a/src/video_core/engines/kepler_compute.cpp b/src/video_core/engines/kepler_compute.cpp
index 7404a8163..e3d5fb8a9 100644
--- a/src/video_core/engines/kepler_compute.cpp
+++ b/src/video_core/engines/kepler_compute.cpp
@@ -37,7 +37,7 @@ void KeplerCompute::CallMethod(const GPU::MethodCall& method_call) {
         const bool is_last_call = method_call.IsLastCall();
         upload_state.ProcessData(method_call.argument, is_last_call);
         if (is_last_call) {
-            system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
+            system.GPU().Maxwell3D().dirty.OnMemoryWrite();
         }
         break;
     }
diff --git a/src/video_core/engines/kepler_memory.cpp b/src/video_core/engines/kepler_memory.cpp
index 0561f676c..44279de00 100644
--- a/src/video_core/engines/kepler_memory.cpp
+++ b/src/video_core/engines/kepler_memory.cpp
@@ -34,7 +34,7 @@ void KeplerMemory::CallMethod(const GPU::MethodCall& method_call) {
         const bool is_last_call = method_call.IsLastCall();
         upload_state.ProcessData(method_call.argument, is_last_call);
         if (is_last_call) {
-            system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
+            system.GPU().Maxwell3D().dirty.OnMemoryWrite();
         }
         break;
     }
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 8755b8af4..a55915fd3 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -22,6 +22,7 @@ Maxwell3D::Maxwell3D(Core::System& system, VideoCore::RasterizerInterface& raste
                      MemoryManager& memory_manager)
     : system{system}, rasterizer{rasterizer}, memory_manager{memory_manager},
       macro_interpreter{*this}, upload_state{memory_manager, regs.upload} {
+    InitDirtySettings();
     InitializeRegisterDefaults();
 }
 
@@ -86,6 +87,80 @@ void Maxwell3D::InitializeRegisterDefaults() {
     regs.rt_separate_frag_data = 1;
 }
 
+#define DIRTY_REGS_POS(field_name) (offsetof(Maxwell3D::DirtyRegs, field_name))
+
+void Maxwell3D::InitDirtySettings() {
+    const auto set_block = [this](const u32 start, const u32 range, const u8 position) {
+        const u32 end = start + range;
+        for (std::size_t i = start; i < end; i++) {
+            dirty_pointers[i] = position;
+        }
+    };
+    for (std::size_t i = 0; i < DirtyRegs::NUM_REGS; i++) {
+        dirty.regs[i] = true;
+    }
+
+    // Init Render Targets
+    constexpr u32 registers_per_rt = sizeof(regs.rt[0]) / sizeof(u32);
+    constexpr u32 rt_start_reg = MAXWELL3D_REG_INDEX(rt);
+    constexpr u32 rt_end_reg = rt_start_reg + registers_per_rt * 8;
+    u32 rt_dirty_reg = DIRTY_REGS_POS(render_target);
+    for (u32 rt_reg = rt_start_reg; rt_reg < rt_end_reg; rt_reg += registers_per_rt) {
+        set_block(rt_reg, registers_per_rt, rt_dirty_reg);
+        rt_dirty_reg++;
+    }
+    constexpr u32 depth_buffer_flag = DIRTY_REGS_POS(depth_buffer);
+    dirty_pointers[MAXWELL3D_REG_INDEX(zeta_enable)] = depth_buffer_flag;
+    dirty_pointers[MAXWELL3D_REG_INDEX(zeta_width)] = depth_buffer_flag;
+    dirty_pointers[MAXWELL3D_REG_INDEX(zeta_height)] = depth_buffer_flag;
+    constexpr u32 registers_in_zeta = sizeof(regs.zeta) / sizeof(u32);
+    constexpr u32 zeta_reg = MAXWELL3D_REG_INDEX(zeta);
+    set_block(zeta_reg, registers_in_zeta, depth_buffer_flag);
+
+    // Init Vertex Arrays
+    constexpr u32 vertex_array_start = MAXWELL3D_REG_INDEX(vertex_array);
+    constexpr u32 vertex_array_size = sizeof(regs.vertex_array[0]) / sizeof(u32);
+    constexpr u32 vertex_array_end = vertex_array_start + vertex_array_size * Regs::NumVertexArrays;
+    u32 va_reg = DIRTY_REGS_POS(vertex_array);
+    u32 vi_reg = DIRTY_REGS_POS(vertex_instance);
+    for (u32 vertex_reg = vertex_array_start; vertex_reg < vertex_array_end;
+         vertex_reg += vertex_array_size) {
+        set_block(vertex_reg, 3, va_reg);
+        // The divisor concerns vertex array instances
+        dirty_pointers[vertex_reg + 3] = vi_reg;
+        va_reg++;
+        vi_reg++;
+    }
+    constexpr u32 vertex_limit_start = MAXWELL3D_REG_INDEX(vertex_array_limit);
+    constexpr u32 vertex_limit_size = sizeof(regs.vertex_array_limit[0]) / sizeof(u32);
+    constexpr u32 vertex_limit_end = vertex_limit_start + vertex_limit_size * Regs::NumVertexArrays;
+    va_reg = DIRTY_REGS_POS(vertex_array);
+    for (u32 vertex_reg = vertex_limit_start; vertex_reg < vertex_limit_end;
+         vertex_reg += vertex_limit_size) {
+        set_block(vertex_reg, vertex_limit_size, va_reg);
+        va_reg++;
+    }
+    constexpr u32 vertex_instance_start = MAXWELL3D_REG_INDEX(instanced_arrays);
+    constexpr u32 vertex_instance_size =
+        sizeof(regs.instanced_arrays.is_instanced[0]) / sizeof(u32);
+    constexpr u32 vertex_instance_end =
+        vertex_instance_start + vertex_instance_size * Regs::NumVertexArrays;
+    vi_reg = DIRTY_REGS_POS(vertex_instance);
+    for (u32 vertex_reg = vertex_instance_start; vertex_reg < vertex_instance_end;
+         vertex_reg += vertex_instance_size) {
+        set_block(vertex_reg, vertex_instance_size, vi_reg);
+        vi_reg++;
+    }
+    set_block(MAXWELL3D_REG_INDEX(vertex_attrib_format), regs.vertex_attrib_format.size(),
+              DIRTY_REGS_POS(vertex_attrib_format));
+
+    // Init Shaders
+    constexpr u32 shader_registers_count =
+        sizeof(regs.shader_config[0]) * Regs::MaxShaderProgram / sizeof(u32);
+    set_block(MAXWELL3D_REG_INDEX(shader_config[0]), shader_registers_count,
+              DIRTY_REGS_POS(shaders));
+}
+
 void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) {
     // Reset the current macro.
     executing_macro = 0;
@@ -143,49 +218,19 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
 
     if (regs.reg_array[method] != method_call.argument) {
         regs.reg_array[method] = method_call.argument;
-        // Color buffers
-        constexpr u32 first_rt_reg = MAXWELL3D_REG_INDEX(rt);
-        constexpr u32 registers_per_rt = sizeof(regs.rt[0]) / sizeof(u32);
-        if (method >= first_rt_reg &&
-            method < first_rt_reg + registers_per_rt * Regs::NumRenderTargets) {
-            const std::size_t rt_index = (method - first_rt_reg) / registers_per_rt;
-            dirty_flags.color_buffer.set(rt_index);
-        }
-
-        // Zeta buffer
-        constexpr u32 registers_in_zeta = sizeof(regs.zeta) / sizeof(u32);
-        if (method == MAXWELL3D_REG_INDEX(zeta_enable) ||
-            method == MAXWELL3D_REG_INDEX(zeta_width) ||
-            method == MAXWELL3D_REG_INDEX(zeta_height) ||
-            (method >= MAXWELL3D_REG_INDEX(zeta) &&
-             method < MAXWELL3D_REG_INDEX(zeta) + registers_in_zeta)) {
-            dirty_flags.zeta_buffer = true;
-        }
-
-        // Shader
-        constexpr u32 shader_registers_count =
-            sizeof(regs.shader_config[0]) * Regs::MaxShaderProgram / sizeof(u32);
-        if (method >= MAXWELL3D_REG_INDEX(shader_config[0]) &&
-            method < MAXWELL3D_REG_INDEX(shader_config[0]) + shader_registers_count) {
-            dirty_flags.shaders = true;
-        }
-
-        // Vertex format
-        if (method >= MAXWELL3D_REG_INDEX(vertex_attrib_format) &&
-            method < MAXWELL3D_REG_INDEX(vertex_attrib_format) + regs.vertex_attrib_format.size()) {
-            dirty_flags.vertex_attrib_format = true;
-        }
-
-        // Vertex buffer
-        if (method >= MAXWELL3D_REG_INDEX(vertex_array) &&
-            method < MAXWELL3D_REG_INDEX(vertex_array) + 4 * Regs::NumVertexArrays) {
-            dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array)) >> 2);
-        } else if (method >= MAXWELL3D_REG_INDEX(vertex_array_limit) &&
-                   method < MAXWELL3D_REG_INDEX(vertex_array_limit) + 2 * Regs::NumVertexArrays) {
-            dirty_flags.vertex_array.set((method - MAXWELL3D_REG_INDEX(vertex_array_limit)) >> 1);
-        } else if (method >= MAXWELL3D_REG_INDEX(instanced_arrays) &&
-                   method < MAXWELL3D_REG_INDEX(instanced_arrays) + Regs::NumVertexArrays) {
-            dirty_flags.vertex_array.set(method - MAXWELL3D_REG_INDEX(instanced_arrays));
+        std::size_t dirty_reg = dirty_pointers[method];
+        if (dirty_reg) {
+            dirty.regs[dirty_reg] = true;
+            if (dirty_reg >= DIRTY_REGS_POS(vertex_array) &&
+                dirty_reg < DIRTY_REGS_POS(vertex_array_buffers)) {
+                dirty.vertex_array_buffers = true;
+            } else if (dirty_reg >= DIRTY_REGS_POS(vertex_instance) &&
+                       dirty_reg < DIRTY_REGS_POS(vertex_instances)) {
+                dirty.vertex_instances = true;
+            } else if (dirty_reg >= DIRTY_REGS_POS(render_target) &&
+                       dirty_reg < DIRTY_REGS_POS(render_settings)) {
+                dirty.render_settings = true;
+            }
         }
     }
 
@@ -261,7 +306,7 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
         const bool is_last_call = method_call.IsLastCall();
         upload_state.ProcessData(method_call.argument, is_last_call);
         if (is_last_call) {
-            dirty_flags.OnMemoryWrite();
+            dirty.OnMemoryWrite();
         }
         break;
     }
@@ -333,7 +378,6 @@ void Maxwell3D::ProcessQueryGet() {
             query_result.timestamp = system.CoreTiming().GetTicks();
             memory_manager.WriteBlock(sequence_address, &query_result, sizeof(query_result));
         }
-        dirty_flags.OnMemoryWrite();
         break;
     }
     default:
@@ -418,8 +462,6 @@ void Maxwell3D::ProcessCBData(u32 value) {
     rasterizer.InvalidateRegion(ToCacheAddr(ptr), sizeof(u32));
     memory_manager.Write<u32>(address, value);
 
-    dirty_flags.OnMemoryWrite();
-
     // Increment the current buffer position.
     regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4;
 }
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 8d15c8a48..84e6ca145 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -1124,23 +1124,73 @@ public:
 
     State state{};
 
-    struct DirtyFlags {
-        std::bitset<8> color_buffer{0xFF};
-        std::bitset<32> vertex_array{0xFFFFFFFF};
+    struct DirtyRegs {
+        static constexpr std::size_t NUM_REGS = 256;
+        union {
+            struct {
+                bool null_dirty;
+                // Vertex Attributes
+                bool vertex_attrib_format;
+                // Vertex Arrays
+                std::array<bool, 32> vertex_array;
 
-        bool vertex_attrib_format = true;
-        bool zeta_buffer = true;
-        bool shaders = true;
+                bool vertex_array_buffers;
+                // Vertex Instances
+                std::array<bool, 32> vertex_instance;
+
+                bool vertex_instances;
+                // Render Targets
+                std::array<bool, 8> render_target;
+                bool depth_buffer;
+
+                bool render_settings;
+                // Shaders
+                bool shaders;
+                // State
+                bool viewport;
+                bool clip_enabled;
+                bool clip_coefficient;
+                bool cull_mode;
+                bool primitive_restart;
+                bool depth_test;
+                bool stencil_test;
+                bool blend_state;
+                bool logic_op;
+                bool fragment_color_clamp;
+                bool multi_sample;
+                bool scissor_test;
+                bool transform_feedback;
+                bool point;
+                bool color_mask;
+                bool polygon_offset;
+                bool alpha_test;
+
+                bool memory_general;
+            };
+            std::array<bool, NUM_REGS> regs;
+        };
+
+        void ResetVertexArrays() {
+            std::fill(vertex_array.begin(), vertex_array.end(), true);
+            vertex_array_buffers = true;
+        }
+
+        void ResetRenderTargets() {
+            depth_buffer = true;
+            std::fill(render_target.begin(), render_target.end(), true);
+            render_settings = true;
+        }
 
         void OnMemoryWrite() {
-            zeta_buffer = true;
             shaders = true;
-            color_buffer.set();
-            vertex_array.set();
+            memory_general = true;
+            ResetRenderTargets();
+            ResetVertexArrays();
         }
-    };
 
-    DirtyFlags dirty_flags;
+    } dirty{};
+
+    std::array<u8, Regs::NUM_REGS> dirty_pointers{};
 
     /// Reads a register value located at the input method address
     u32 GetRegisterValue(u32 method) const;
@@ -1200,6 +1250,8 @@ private:
     /// Retrieves information about a specific TSC entry from the TSC buffer.
     Texture::TSCEntry GetTSCEntry(u32 tsc_index) const;
 
+    void InitDirtySettings();
+
     /**
      * Call a macro on this engine.
      * @param method Method to call
diff --git a/src/video_core/engines/maxwell_dma.cpp b/src/video_core/engines/maxwell_dma.cpp
index afb9578d0..b5f57e534 100644
--- a/src/video_core/engines/maxwell_dma.cpp
+++ b/src/video_core/engines/maxwell_dma.cpp
@@ -58,7 +58,7 @@ void MaxwellDMA::HandleCopy() {
     }
 
     // All copies here update the main memory, so mark all rasterizer states as invalid.
-    system.GPU().Maxwell3D().dirty_flags.OnMemoryWrite();
+    system.GPU().Maxwell3D().dirty.OnMemoryWrite();
 
     if (regs.exec.is_dst_linear && regs.exec.is_src_linear) {
         // When the enable_2d bit is disabled, the copy is performed as if we were copying a 1D
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 0bb5c068c..c2b5cbff4 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -124,10 +124,10 @@ GLuint RasterizerOpenGL::SetupVertexFormat() {
     auto& gpu = system.GPU().Maxwell3D();
     const auto& regs = gpu.regs;
 
-    if (!gpu.dirty_flags.vertex_attrib_format) {
+    if (!gpu.dirty.vertex_attrib_format) {
         return state.draw.vertex_array;
     }
-    gpu.dirty_flags.vertex_attrib_format = false;
+    gpu.dirty.vertex_attrib_format = false;
 
     MICROPROFILE_SCOPE(OpenGL_VAO);
 
@@ -181,7 +181,7 @@ GLuint RasterizerOpenGL::SetupVertexFormat() {
     }
 
     // Rebinding the VAO invalidates the vertex buffer bindings.
-    gpu.dirty_flags.vertex_array.set();
+    gpu.dirty.ResetVertexArrays();
 
     state.draw.vertex_array = vao_entry.handle;
     return vao_entry.handle;
@@ -189,17 +189,20 @@ GLuint RasterizerOpenGL::SetupVertexFormat() {
 
 void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) {
     auto& gpu = system.GPU().Maxwell3D();
-    const auto& regs = gpu.regs;
-
-    if (gpu.dirty_flags.vertex_array.none())
+    if (!gpu.dirty.vertex_array_buffers)
         return;
+    gpu.dirty.vertex_array_buffers = false;
+
+    const auto& regs = gpu.regs;
 
     MICROPROFILE_SCOPE(OpenGL_VB);
 
     // Upload all guest vertex arrays sequentially to our buffer
     for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
-        if (!gpu.dirty_flags.vertex_array[index])
+        if (!gpu.dirty.vertex_array[index])
             continue;
+        gpu.dirty.vertex_array[index] = false;
+        gpu.dirty.vertex_instance[index] = false;
 
         const auto& vertex_array = regs.vertex_array[index];
         if (!vertex_array.IsEnabled())
@@ -224,8 +227,32 @@ void RasterizerOpenGL::SetupVertexBuffer(GLuint vao) {
             glVertexArrayBindingDivisor(vao, index, 0);
         }
     }
+}
 
-    gpu.dirty_flags.vertex_array.reset();
+void RasterizerOpenGL::SetupVertexInstances(GLuint vao) {
+    auto& gpu = system.GPU().Maxwell3D();
+
+    if (!gpu.dirty.vertex_instances)
+        return;
+    gpu.dirty.vertex_instances = false;
+
+    const auto& regs = gpu.regs;
+    // Upload all guest vertex arrays sequentially to our buffer
+    for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) {
+        if (!gpu.dirty.vertex_instance[index])
+            continue;
+
+        gpu.dirty.vertex_instance[index] = false;
+
+        if (regs.instanced_arrays.IsInstancingEnabled(index) &&
+            regs.vertex_array[index].divisor != 0) {
+            // Enable vertex buffer instancing with the specified divisor.
+            glVertexArrayBindingDivisor(vao, index, regs.vertex_array[index].divisor);
+        } else {
+            // Disable the vertex buffer instancing.
+            glVertexArrayBindingDivisor(vao, index, 0);
+        }
+    }
 }
 
 GLintptr RasterizerOpenGL::SetupIndexBuffer() {
@@ -341,7 +368,7 @@ void RasterizerOpenGL::SetupShaders(GLenum primitive_mode) {
 
     SyncClipEnabled(clip_distances);
 
-    gpu.dirty_flags.shaders = false;
+    gpu.dirty.shaders = false;
 }
 
 std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const {
@@ -424,13 +451,13 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers(
 
     const FramebufferConfigState fb_config_state{using_color_fb, using_depth_fb, preserve_contents,
                                                  single_color_target};
-    if (fb_config_state == current_framebuffer_config_state &&
-        gpu.dirty_flags.color_buffer.none() && !gpu.dirty_flags.zeta_buffer) {
+    if (fb_config_state == current_framebuffer_config_state && !gpu.dirty.render_settings) {
         // Only skip if the previous ConfigureFramebuffers call was from the same kind (multiple or
         // single color targets). This is done because the guest registers may not change but the
         // host framebuffer may contain different attachments
         return current_depth_stencil_usage;
     }
+    gpu.dirty.render_settings = false;
     current_framebuffer_config_state = fb_config_state;
 
     texture_cache.GuardRenderTargets(true);
@@ -661,6 +688,7 @@ void RasterizerOpenGL::DrawArrays() {
 
     // Upload vertex and index data.
     SetupVertexBuffer(vao);
+    SetupVertexInstances(vao);
     const GLintptr index_buffer_offset = SetupIndexBuffer();
 
     // Setup draw parameters. It will automatically choose what glDraw* method to use.
@@ -687,7 +715,7 @@ void RasterizerOpenGL::DrawArrays() {
 
     if (invalidate) {
         // As all cached buffers are invalidated, we need to recheck their state.
-        gpu.dirty_flags.vertex_array.set();
+        gpu.dirty.ResetVertexArrays();
     }
 
     shader_program_manager->ApplyTo(state);
@@ -700,6 +728,7 @@ void RasterizerOpenGL::DrawArrays() {
     params.DispatchDraw();
 
     accelerate_draw = AccelDraw::Disabled;
+    gpu.dirty.memory_general = false;
 }
 
 void RasterizerOpenGL::FlushAll() {}
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 40b571d58..1f6ce4b81 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -216,6 +216,7 @@ private:
     GLuint SetupVertexFormat();
 
     void SetupVertexBuffer(GLuint vao);
+    void SetupVertexInstances(GLuint vao);
 
     GLintptr SetupIndexBuffer();
 
diff --git a/src/video_core/renderer_opengl/gl_shader_cache.cpp b/src/video_core/renderer_opengl/gl_shader_cache.cpp
index 32dd9eae7..456ba0403 100644
--- a/src/video_core/renderer_opengl/gl_shader_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_shader_cache.cpp
@@ -572,7 +572,7 @@ std::unordered_map<u64, UnspecializedShader> ShaderCacheOpenGL::GenerateUnspecia
 }
 
 Shader ShaderCacheOpenGL::GetStageProgram(Maxwell::ShaderProgram program) {
-    if (!system.GPU().Maxwell3D().dirty_flags.shaders) {
+    if (!system.GPU().Maxwell3D().dirty.shaders) {
         return last_shaders[static_cast<std::size_t>(program)];
     }
 
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 7f9623c62..8225022a9 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -116,10 +116,10 @@ public:
         std::lock_guard lock{mutex};
         auto& maxwell3d = system.GPU().Maxwell3D();
 
-        if (!maxwell3d.dirty_flags.zeta_buffer) {
+        if (!maxwell3d.dirty.depth_buffer) {
             return depth_buffer.view;
         }
-        maxwell3d.dirty_flags.zeta_buffer = false;
+        maxwell3d.dirty.depth_buffer = false;
 
         const auto& regs{maxwell3d.regs};
         const auto gpu_addr{regs.zeta.Address()};
@@ -145,10 +145,10 @@ public:
         std::lock_guard lock{mutex};
         ASSERT(index < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets);
         auto& maxwell3d = system.GPU().Maxwell3D();
-        if (!maxwell3d.dirty_flags.color_buffer[index]) {
+        if (!maxwell3d.dirty.render_target[index]) {
             return render_targets[index].view;
         }
-        maxwell3d.dirty_flags.color_buffer.reset(index);
+        maxwell3d.dirty.render_target[index] = false;
 
         const auto& regs{maxwell3d.regs};
         if (index >= regs.rt_control.count || regs.rt[index].Address() == 0 ||
@@ -272,12 +272,19 @@ protected:
 
     void ManageRenderTargetUnregister(TSurface& surface) {
         auto& maxwell3d = system.GPU().Maxwell3D();
+<<<<<<< HEAD
         const u32 index = surface->GetRenderTarget();
         if (index == DEPTH_RT) {
             maxwell3d.dirty_flags.zeta_buffer = true;
+=======
+        u32 index = surface->GetRenderTarget();
+        if (index == 8) {
+            maxwell3d.dirty.depth_buffer = true;
+>>>>>>> Maxwell3D: Rework the dirty system to be more consistant and scaleable
         } else {
-            maxwell3d.dirty_flags.color_buffer.set(index, true);
+            maxwell3d.dirty.render_target[index] = true;
         }
+        maxwell3d.dirty.render_settings = true;
     }
 
     void Register(TSurface surface) {

From 0d3db58657ce5352d90a70ee8d6c0334d9119366 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Fri, 12 Jul 2019 09:25:47 -0400
Subject: [PATCH 02/10] Maxwell3D: Rework CBData Upload

---
 src/video_core/engines/maxwell_3d.cpp | 42 ++++++++++++++++++++++-----
 src/video_core/engines/maxwell_3d.h   | 11 +++++++
 2 files changed, 45 insertions(+), 8 deletions(-)

diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index a55915fd3..7d3a550f8 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -183,6 +183,14 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
 
     const u32 method = method_call.method;
 
+    if (method == cb_data_state.current) {
+        regs.reg_array[method] = method_call.argument;
+        ProcessCBData(method_call.argument);
+        return;
+    } else if (cb_data_state.current != null_cb_data) {
+        FinishCBData();
+    }
+
     // It is an error to write to a register other than the current macro's ARG register before it
     // has finished execution.
     if (executing_macro != 0) {
@@ -259,7 +267,7 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
     case MAXWELL3D_REG_INDEX(const_buffer.cb_data[13]):
     case MAXWELL3D_REG_INDEX(const_buffer.cb_data[14]):
     case MAXWELL3D_REG_INDEX(const_buffer.cb_data[15]): {
-        ProcessCBData(method_call.argument);
+        StartCBData(method);
         break;
     }
     case MAXWELL3D_REG_INDEX(cb_bind[0].raw_config): {
@@ -449,21 +457,39 @@ void Maxwell3D::ProcessCBBind(Regs::ShaderStage stage) {
 }
 
 void Maxwell3D::ProcessCBData(u32 value) {
+    const u32 id = cb_data_state.id;
+    cb_data_state.buff[id][cb_data_state.counter] = value;
+    // Increment the current buffer position.
+    regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4;
+    cb_data_state.counter++;
+}
+
+void Maxwell3D::StartCBData(u32 method) {
+    constexpr u32 first_cb_data = MAXWELL3D_REG_INDEX(const_buffer.cb_data[0]);
+    cb_data_state.start_pos = regs.const_buffer.cb_pos;
+    cb_data_state.id = method - first_cb_data;
+    cb_data_state.current = method;
+    cb_data_state.counter = 0;
+    ProcessCBData(regs.const_buffer.cb_data[cb_data_state.id]);
+}
+
+void Maxwell3D::FinishCBData() {
     // Write the input value to the current const buffer at the current position.
     const GPUVAddr buffer_address = regs.const_buffer.BufferAddress();
     ASSERT(buffer_address != 0);
 
     // Don't allow writing past the end of the buffer.
-    ASSERT(regs.const_buffer.cb_pos + sizeof(u32) <= regs.const_buffer.cb_size);
+    ASSERT(regs.const_buffer.cb_pos <= regs.const_buffer.cb_size);
 
-    const GPUVAddr address{buffer_address + regs.const_buffer.cb_pos};
+    const GPUVAddr address{buffer_address + cb_data_state.start_pos};
+    const std::size_t size = regs.const_buffer.cb_pos - cb_data_state.start_pos;
 
-    u8* ptr{memory_manager.GetPointer(address)};
-    rasterizer.InvalidateRegion(ToCacheAddr(ptr), sizeof(u32));
-    memory_manager.Write<u32>(address, value);
+    const u32 id = cb_data_state.id;
+    memory_manager.WriteBlock(address, cb_data_state.buff[id].data(), size);
+    dirty.ResetRenderTargets();
 
-    // Increment the current buffer position.
-    regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4;
+    cb_data_state.id = null_cb_data;
+    cb_data_state.current = null_cb_data;
 }
 
 Texture::TICEntry Maxwell3D::GetTICEntry(u32 tic_index) const {
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 84e6ca145..318078f36 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -1244,6 +1244,15 @@ private:
 
     Upload::State upload_state;
 
+    static constexpr u32 null_cb_data = 0xFFFFFFFF;
+    struct {
+        std::array<std::array<u32, 0x4000>, 16> buff;
+        u32 current{null_cb_data};
+        u32 id{null_cb_data};
+        u32 start_pos{};
+        u32 counter{};
+    } cb_data_state;
+
     /// Retrieves information about a specific TIC entry from the TIC buffer.
     Texture::TICEntry GetTICEntry(u32 tic_index) const;
 
@@ -1275,7 +1284,9 @@ private:
     void ProcessSyncPoint();
 
     /// Handles a write to the CB_DATA[i] register.
+    void StartCBData(u32 method);
     void ProcessCBData(u32 value);
+    void FinishCBData();
 
     /// Handles a write to the CB_BIND register.
     void ProcessCBBind(Regs::ShaderStage stage);

From a081dea8abd9539ab45e53fbfb0e9c6243b87180 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Sat, 13 Jul 2019 16:52:32 -0400
Subject: [PATCH 03/10] Maxwell3D: Implement State Dirty Flags.

---
 src/video_core/engines/maxwell_3d.cpp         | 82 +++++++++++++++++++
 src/video_core/engines/maxwell_3d.h           | 10 +--
 .../renderer_opengl/gl_rasterizer.cpp         | 82 ++++++++++++-------
 src/video_core/renderer_opengl/gl_state.cpp   | 22 +++--
 src/video_core/renderer_opengl/gl_state.h     | 37 ++++++++-
 .../renderer_opengl/gl_texture_cache.cpp      |  6 +-
 6 files changed, 197 insertions(+), 42 deletions(-)

diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 7d3a550f8..cfa98f528 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -159,6 +159,88 @@ void Maxwell3D::InitDirtySettings() {
         sizeof(regs.shader_config[0]) * Regs::MaxShaderProgram / sizeof(u32);
     set_block(MAXWELL3D_REG_INDEX(shader_config[0]), shader_registers_count,
               DIRTY_REGS_POS(shaders));
+
+    // State
+
+    // Viewport
+    constexpr u32 viewport_dirty_reg = DIRTY_REGS_POS(viewport);
+    constexpr u32 viewport_start = MAXWELL3D_REG_INDEX(viewports);
+    constexpr u32 viewport_size = sizeof(regs.viewports) / sizeof(u32);
+    set_block(viewport_start, viewport_size, viewport_dirty_reg);
+    constexpr u32 view_volume_start = MAXWELL3D_REG_INDEX(view_volume_clip_control);
+    constexpr u32 view_volume_size = sizeof(regs.view_volume_clip_control) / sizeof(u32);
+    set_block(view_volume_start, view_volume_size, viewport_dirty_reg);
+
+    // Viewport transformation
+    constexpr u32 viewport_trans_start = MAXWELL3D_REG_INDEX(viewport_transform);
+    constexpr u32 viewport_trans_size = sizeof(regs.viewport_transform) / sizeof(u32);
+    set_block(viewport_trans_start, viewport_trans_size, DIRTY_REGS_POS(viewport_transform));
+
+    // Cullmode
+    constexpr u32 cull_mode_start = MAXWELL3D_REG_INDEX(cull);
+    constexpr u32 cull_mode_size = sizeof(regs.cull) / sizeof(u32);
+    set_block(cull_mode_start, cull_mode_size, DIRTY_REGS_POS(cull_mode));
+
+    // Screen y control
+    dirty_pointers[MAXWELL3D_REG_INDEX(screen_y_control)] = DIRTY_REGS_POS(screen_y_control);
+
+    // Primitive Restart
+    constexpr u32 primitive_restart_start = MAXWELL3D_REG_INDEX(primitive_restart);
+    constexpr u32 primitive_restart_size = sizeof(regs.primitive_restart) / sizeof(u32);
+    set_block(primitive_restart_start, primitive_restart_size, DIRTY_REGS_POS(primitive_restart));
+
+    // Depth Test
+    constexpr u32 depth_test_dirty_reg = DIRTY_REGS_POS(depth_test);
+    dirty_pointers[MAXWELL3D_REG_INDEX(depth_test_enable)] = depth_test_dirty_reg;
+    dirty_pointers[MAXWELL3D_REG_INDEX(depth_write_enabled)] = depth_test_dirty_reg;
+    dirty_pointers[MAXWELL3D_REG_INDEX(depth_test_func)] = depth_test_dirty_reg;
+
+    // Stencil Test
+    constexpr u32 stencil_test_dirty_reg = DIRTY_REGS_POS(stencil_test);
+    dirty_pointers[MAXWELL3D_REG_INDEX(stencil_enable)] = stencil_test_dirty_reg;
+    dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_func_func)] = stencil_test_dirty_reg;
+    dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_func_ref)] = stencil_test_dirty_reg;
+    dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_func_mask)] = stencil_test_dirty_reg;
+    dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_op_fail)] = stencil_test_dirty_reg;
+    dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_op_zfail)] = stencil_test_dirty_reg;
+    dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_op_zpass)] = stencil_test_dirty_reg;
+    dirty_pointers[MAXWELL3D_REG_INDEX(stencil_front_mask)] = stencil_test_dirty_reg;
+    dirty_pointers[MAXWELL3D_REG_INDEX(stencil_two_side_enable)] = stencil_test_dirty_reg;
+    dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_func_func)] = stencil_test_dirty_reg;
+    dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_func_ref)] = stencil_test_dirty_reg;
+    dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_func_mask)] = stencil_test_dirty_reg;
+    dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_op_fail)] = stencil_test_dirty_reg;
+    dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_op_zfail)] = stencil_test_dirty_reg;
+    dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_op_zpass)] = stencil_test_dirty_reg;
+    dirty_pointers[MAXWELL3D_REG_INDEX(stencil_back_mask)] = stencil_test_dirty_reg;
+
+    // Color Mask
+    constexpr u32 color_mask_dirty_reg = DIRTY_REGS_POS(color_mask);
+    dirty_pointers[MAXWELL3D_REG_INDEX(color_mask_common)] = color_mask_dirty_reg;
+    set_block(MAXWELL3D_REG_INDEX(color_mask), sizeof(regs.color_mask) / sizeof(u32),
+              color_mask_dirty_reg);
+    // Blend State
+    constexpr u32 blend_state_dirty_reg = DIRTY_REGS_POS(blend_state);
+    set_block(MAXWELL3D_REG_INDEX(blend_color), sizeof(regs.blend_color) / sizeof(u32),
+              blend_state_dirty_reg);
+    dirty_pointers[MAXWELL3D_REG_INDEX(independent_blend_enable)] = blend_state_dirty_reg;
+    set_block(MAXWELL3D_REG_INDEX(blend), sizeof(regs.blend) / sizeof(u32), blend_state_dirty_reg);
+    set_block(MAXWELL3D_REG_INDEX(independent_blend), sizeof(regs.independent_blend) / sizeof(u32),
+              blend_state_dirty_reg);
+
+    // Scissor State
+    constexpr u32 scissor_test_dirty_reg = DIRTY_REGS_POS(scissor_test);
+    set_block(MAXWELL3D_REG_INDEX(scissor_test), sizeof(regs.scissor_test) / sizeof(u32),
+              scissor_test_dirty_reg);
+
+    // Polygon Offset
+    constexpr u32 polygon_offset_dirty_reg = DIRTY_REGS_POS(polygon_offset);
+    dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_fill_enable)] = polygon_offset_dirty_reg;
+    dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_line_enable)] = polygon_offset_dirty_reg;
+    dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_point_enable)] = polygon_offset_dirty_reg;
+    dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_units)] = polygon_offset_dirty_reg;
+    dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_factor)] = polygon_offset_dirty_reg;
+    dirty_pointers[MAXWELL3D_REG_INDEX(polygon_offset_clamp)] = polygon_offset_dirty_reg;
 }
 
 void Maxwell3D::CallMacroMethod(u32 method, std::vector<u32> parameters) {
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 318078f36..abc69cc65 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -1148,22 +1148,20 @@ public:
                 bool shaders;
                 // State
                 bool viewport;
-                bool clip_enabled;
                 bool clip_coefficient;
                 bool cull_mode;
                 bool primitive_restart;
                 bool depth_test;
                 bool stencil_test;
                 bool blend_state;
-                bool logic_op;
-                bool fragment_color_clamp;
-                bool multi_sample;
                 bool scissor_test;
                 bool transform_feedback;
-                bool point;
                 bool color_mask;
                 bool polygon_offset;
-                bool alpha_test;
+
+                // Complementary
+                bool viewport_transform;
+                bool screen_y_control;
 
                 bool memory_general;
             };
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index c2b5cbff4..76f0f98eb 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -936,56 +936,53 @@ void RasterizerOpenGL::SyncClipCoef() {
 }
 
 void RasterizerOpenGL::SyncCullMode() {
-    const auto& regs = system.GPU().Maxwell3D().regs;
+    auto& maxwell3d = system.GPU().Maxwell3D();
+
+    const auto& regs = maxwell3d.regs;
 
     state.cull.enabled = regs.cull.enabled != 0;
+    state.cull.front_face = MaxwellToGL::FrontFace(regs.cull.front_face);
+    state.cull.mode = MaxwellToGL::CullFace(regs.cull.cull_face);
 
-    if (state.cull.enabled) {
-        state.cull.front_face = MaxwellToGL::FrontFace(regs.cull.front_face);
-        state.cull.mode = MaxwellToGL::CullFace(regs.cull.cull_face);
+    const bool flip_triangles{regs.screen_y_control.triangle_rast_flip == 0 ||
+                              regs.viewport_transform[0].scale_y < 0.0f};
 
-        const bool flip_triangles{regs.screen_y_control.triangle_rast_flip == 0 ||
-                                  regs.viewport_transform[0].scale_y < 0.0f};
-
-        // If the GPU is configured to flip the rasterized triangles, then we need to flip the
-        // notion of front and back. Note: We flip the triangles when the value of the register is 0
-        // because OpenGL already does it for us.
-        if (flip_triangles) {
-            if (state.cull.front_face == GL_CCW)
-                state.cull.front_face = GL_CW;
-            else if (state.cull.front_face == GL_CW)
-                state.cull.front_face = GL_CCW;
-        }
+    // If the GPU is configured to flip the rasterized triangles, then we need to flip the
+    // notion of front and back. Note: We flip the triangles when the value of the register is 0
+    // because OpenGL already does it for us.
+    if (flip_triangles) {
+        if (state.cull.front_face == GL_CCW)
+            state.cull.front_face = GL_CW;
+        else if (state.cull.front_face == GL_CW)
+            state.cull.front_face = GL_CCW;
     }
 }
 
 void RasterizerOpenGL::SyncPrimitiveRestart() {
-    const auto& regs = system.GPU().Maxwell3D().regs;
+    auto& maxwell3d = system.GPU().Maxwell3D();
+    const auto& regs = maxwell3d.regs;
 
     state.primitive_restart.enabled = regs.primitive_restart.enabled;
     state.primitive_restart.index = regs.primitive_restart.index;
 }
 
 void RasterizerOpenGL::SyncDepthTestState() {
-    const auto& regs = system.GPU().Maxwell3D().regs;
+    auto& maxwell3d = system.GPU().Maxwell3D();
+    const auto& regs = maxwell3d.regs;
 
     state.depth.test_enabled = regs.depth_test_enable != 0;
     state.depth.write_mask = regs.depth_write_enabled ? GL_TRUE : GL_FALSE;
-
-    if (!state.depth.test_enabled)
-        return;
-
     state.depth.test_func = MaxwellToGL::ComparisonOp(regs.depth_test_func);
 }
 
 void RasterizerOpenGL::SyncStencilTestState() {
-    const auto& regs = system.GPU().Maxwell3D().regs;
-    state.stencil.test_enabled = regs.stencil_enable != 0;
-
-    if (!regs.stencil_enable) {
+    auto& maxwell3d = system.GPU().Maxwell3D();
+    if (!maxwell3d.dirty.stencil_test) {
         return;
     }
+    const auto& regs = maxwell3d.regs;
 
+    state.stencil.test_enabled = regs.stencil_enable != 0;
     state.stencil.front.test_func = MaxwellToGL::ComparisonOp(regs.stencil_front_func_func);
     state.stencil.front.test_ref = regs.stencil_front_func_ref;
     state.stencil.front.test_mask = regs.stencil_front_func_mask;
@@ -1010,10 +1007,17 @@ void RasterizerOpenGL::SyncStencilTestState() {
         state.stencil.back.action_depth_fail = GL_KEEP;
         state.stencil.back.action_depth_pass = GL_KEEP;
     }
+    state.MarkDirtyStencilState(true);
+    maxwell3d.dirty.stencil_test = false;
 }
 
 void RasterizerOpenGL::SyncColorMask() {
-    const auto& regs = system.GPU().Maxwell3D().regs;
+    auto& maxwell3d = system.GPU().Maxwell3D();
+    if (!maxwell3d.dirty.color_mask) {
+        return;
+    }
+    const auto& regs = maxwell3d.regs;
+
     const std::size_t count =
         regs.independent_blend_enable ? Tegra::Engines::Maxwell3D::Regs::NumRenderTargets : 1;
     for (std::size_t i = 0; i < count; i++) {
@@ -1024,6 +1028,9 @@ void RasterizerOpenGL::SyncColorMask() {
         dest.blue_enabled = (source.B == 0) ? GL_FALSE : GL_TRUE;
         dest.alpha_enabled = (source.A == 0) ? GL_FALSE : GL_TRUE;
     }
+
+    state.MarkDirtyColorMask(true);
+    maxwell3d.dirty.color_mask = false;
 }
 
 void RasterizerOpenGL::SyncMultiSampleState() {
@@ -1038,7 +1045,11 @@ void RasterizerOpenGL::SyncFragmentColorClampState() {
 }
 
 void RasterizerOpenGL::SyncBlendState() {
-    const auto& regs = system.GPU().Maxwell3D().regs;
+    auto& maxwell3d = system.GPU().Maxwell3D();
+    if (!maxwell3d.dirty.blend_state) {
+        return;
+    }
+    const auto& regs = maxwell3d.regs;
 
     state.blend_color.red = regs.blend_color.r;
     state.blend_color.green = regs.blend_color.g;
@@ -1061,6 +1072,8 @@ void RasterizerOpenGL::SyncBlendState() {
         for (std::size_t i = 1; i < Tegra::Engines::Maxwell3D::Regs::NumRenderTargets; i++) {
             state.blend[i].enabled = false;
         }
+        maxwell3d.dirty.blend_state = false;
+        state.MarkDirtyBlendState(true);
         return;
     }
 
@@ -1077,6 +1090,9 @@ void RasterizerOpenGL::SyncBlendState() {
         blend.src_a_func = MaxwellToGL::BlendFunc(src.factor_source_a);
         blend.dst_a_func = MaxwellToGL::BlendFunc(src.factor_dest_a);
     }
+
+    state.MarkDirtyBlendState(true);
+    maxwell3d.dirty.blend_state = false;
 }
 
 void RasterizerOpenGL::SyncLogicOpState() {
@@ -1128,13 +1144,21 @@ void RasterizerOpenGL::SyncPointState() {
 }
 
 void RasterizerOpenGL::SyncPolygonOffset() {
-    const auto& regs = system.GPU().Maxwell3D().regs;
+    auto& maxwell3d = system.GPU().Maxwell3D();
+    if (!maxwell3d.dirty.polygon_offset) {
+        return;
+    }
+    const auto& regs = maxwell3d.regs;
+
     state.polygon_offset.fill_enable = regs.polygon_offset_fill_enable != 0;
     state.polygon_offset.line_enable = regs.polygon_offset_line_enable != 0;
     state.polygon_offset.point_enable = regs.polygon_offset_point_enable != 0;
     state.polygon_offset.units = regs.polygon_offset_units;
     state.polygon_offset.factor = regs.polygon_offset_factor;
     state.polygon_offset.clamp = regs.polygon_offset_clamp;
+
+    state.MarkDirtyPolygonOffset(true);
+    maxwell3d.dirty.polygon_offset = false;
 }
 
 void RasterizerOpenGL::SyncAlphaTest() {
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index 0eae98afe..cac03dc31 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -526,7 +526,7 @@ void OpenGLState::ApplySamplers() const {
     }
 }
 
-void OpenGLState::Apply() const {
+void OpenGLState::Apply() {
     MICROPROFILE_SCOPE(OpenGL_State);
     ApplyFramebufferState();
     ApplyVertexArrayState();
@@ -536,19 +536,31 @@ void OpenGLState::Apply() const {
     ApplyPointSize();
     ApplyFragmentColorClamp();
     ApplyMultisample();
+    if (dirty.color_mask) {
+        ApplyColorMask();
+        dirty.color_mask = false;
+    }
     ApplyDepthClamp();
-    ApplyColorMask();
     ApplyViewport();
-    ApplyStencilTest();
+    if (dirty.stencil_state) {
+        ApplyStencilTest();
+        dirty.stencil_state = false;
+    }
     ApplySRgb();
     ApplyCulling();
     ApplyDepth();
     ApplyPrimitiveRestart();
-    ApplyBlending();
+    if (dirty.blend_state) {
+        ApplyBlending();
+        dirty.blend_state = false;
+    }
     ApplyLogicOp();
     ApplyTextures();
     ApplySamplers();
-    ApplyPolygonOffset();
+    if (dirty.polygon_offset) {
+        ApplyPolygonOffset();
+        dirty.polygon_offset = false;
+    }
     ApplyAlphaTest();
 }
 
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h
index b0140495d..3d0f6747f 100644
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -196,7 +196,7 @@ public:
     }
 
     /// Apply this state as the current OpenGL state
-    void Apply() const;
+    void Apply();
 
     void ApplyFramebufferState() const;
     void ApplyVertexArrayState() const;
@@ -237,11 +237,46 @@ public:
     /// Viewport does not affects glClearBuffer so emulate viewport using scissor test
     void EmulateViewportWithScissor();
 
+    void MarkDirtyBlendState(const bool is_dirty) {
+        dirty.blend_state = is_dirty;
+    }
+
+    void MarkDirtyStencilState(const bool is_dirty) {
+        dirty.stencil_state = is_dirty;
+    }
+
+    void MarkDirtyViewportState(const bool is_dirty) {
+        dirty.viewport_state = is_dirty;
+    }
+
+    void MarkDirtyPolygonOffset(const bool is_dirty) {
+        dirty.polygon_offset = is_dirty;
+    }
+
+    void MarkDirtyColorMask(const bool is_dirty) {
+        dirty.color_mask = is_dirty;
+    }
+
+    void AllDirty() {
+        dirty.blend_state = true;
+        dirty.stencil_state = true;
+        dirty.viewport_state = true;
+        dirty.polygon_offset = true;
+        dirty.color_mask = true;
+    }
+
 private:
     static OpenGLState cur_state;
 
     // Workaround for sRGB problems caused by QT not supporting srgb output
     static bool s_rgb_used;
+    struct {
+        bool blend_state;
+        bool stencil_state;
+        bool viewport_state;
+        bool polygon_offset;
+        bool color_mask;
+    } dirty{};
 };
 
 } // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_texture_cache.cpp b/src/video_core/renderer_opengl/gl_texture_cache.cpp
index b1f6bc7c2..8fcd39a69 100644
--- a/src/video_core/renderer_opengl/gl_texture_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_texture_cache.cpp
@@ -485,11 +485,15 @@ void TextureCacheOpenGL::ImageBlit(View& src_view, View& dst_view,
     const auto& dst_params{dst_view->GetSurfaceParams()};
 
     OpenGLState prev_state{OpenGLState::GetCurState()};
-    SCOPE_EXIT({ prev_state.Apply(); });
+    SCOPE_EXIT({
+        prev_state.AllDirty();
+        prev_state.Apply();
+    });
 
     OpenGLState state;
     state.draw.read_framebuffer = src_framebuffer.handle;
     state.draw.draw_framebuffer = dst_framebuffer.handle;
+    state.AllDirty();
     state.Apply();
 
     u32 buffers{};

From fec32fed18bd4210f00150018a05ab010091e573 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Sun, 14 Jul 2019 08:14:27 -0400
Subject: [PATCH 04/10] GL_Rasterizer: Rework RenderTarget/DepthBuffer clearing

---
 src/video_core/engines/maxwell_3d.cpp         |  1 -
 .../renderer_opengl/gl_rasterizer.cpp         | 64 +++++++++++++++++--
 .../renderer_opengl/gl_rasterizer.h           |  5 ++
 3 files changed, 63 insertions(+), 7 deletions(-)

diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index cfa98f528..d499aaa8d 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -568,7 +568,6 @@ void Maxwell3D::FinishCBData() {
 
     const u32 id = cb_data_state.id;
     memory_manager.WriteBlock(address, cb_data_state.buff[id].data(), size);
-    dirty.ResetRenderTargets();
 
     cb_data_state.id = null_cb_data;
     cb_data_state.current = null_cb_data;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 76f0f98eb..4aa3d6548 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -105,6 +105,7 @@ RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWind
     shader_program_manager = std::make_unique<GLShader::ProgramManager>();
     state.draw.shader_program = 0;
     state.Apply();
+    clear_framebuffer.Create();
 
     LOG_DEBUG(Render_OpenGL, "Sync fixed function OpenGL state here");
     CheckExtensions();
@@ -546,12 +547,63 @@ std::pair<bool, bool> RasterizerOpenGL::ConfigureFramebuffers(
     return current_depth_stencil_usage = {static_cast<bool>(depth_surface), fbkey.stencil_enable};
 }
 
+void RasterizerOpenGL::ConfigureClearFramebuffer(OpenGLState& current_state, bool using_color_fb,
+                                                 bool using_depth_fb, bool using_stencil_fb) {
+    auto& gpu = system.GPU().Maxwell3D();
+    const auto& regs = gpu.regs;
+
+    texture_cache.GuardRenderTargets(true);
+    View color_surface{};
+    if (using_color_fb) {
+        color_surface = texture_cache.GetColorBufferSurface(regs.clear_buffers.RT, false);
+    }
+    View depth_surface{};
+    if (using_depth_fb || using_stencil_fb) {
+        depth_surface = texture_cache.GetDepthBufferSurface(false);
+    }
+    texture_cache.GuardRenderTargets(false);
+
+    current_state.draw.draw_framebuffer = clear_framebuffer.handle;
+    current_state.ApplyFramebufferState();
+
+    if (color_surface) {
+        color_surface->Attach(GL_COLOR_ATTACHMENT0, GL_DRAW_FRAMEBUFFER);
+    } else {
+        glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
+    }
+
+    if (depth_surface) {
+        const auto& params = depth_surface->GetSurfaceParams();
+        switch (params.type) {
+        case VideoCore::Surface::SurfaceType::Depth: {
+            depth_surface->Attach(GL_DEPTH_ATTACHMENT, GL_DRAW_FRAMEBUFFER);
+            glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0);
+            break;
+        }
+        case VideoCore::Surface::SurfaceType::DepthStencil: {
+            depth_surface->Attach(GL_DEPTH_ATTACHMENT, GL_DRAW_FRAMEBUFFER);
+            break;
+        }
+        default: { UNIMPLEMENTED(); }
+        }
+    } else {
+        glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
+                               0);
+    }
+}
+
 void RasterizerOpenGL::Clear() {
     const auto& regs = system.GPU().Maxwell3D().regs;
     bool use_color{};
     bool use_depth{};
     bool use_stencil{};
 
+    OpenGLState prev_state{OpenGLState::GetCurState()};
+    SCOPE_EXIT({
+        prev_state.AllDirty();
+        prev_state.Apply();
+    });
+
     OpenGLState clear_state;
     if (regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B ||
         regs.clear_buffers.A) {
@@ -608,8 +660,8 @@ void RasterizerOpenGL::Clear() {
         return;
     }
 
-    const auto [clear_depth, clear_stencil] = ConfigureFramebuffers(
-        clear_state, use_color, use_depth || use_stencil, false, regs.clear_buffers.RT.Value());
+    ConfigureClearFramebuffer(clear_state, use_color, use_depth, use_stencil);
+    SyncViewport(clear_state);
     if (regs.clear_flags.scissor) {
         SyncScissorTest(clear_state);
     }
@@ -625,14 +677,14 @@ void RasterizerOpenGL::Clear() {
     clear_state.ApplyFramebufferState();
 
     if (use_color) {
-        glClearBufferfv(GL_COLOR, regs.clear_buffers.RT, regs.clear_color);
+        glClearBufferfv(GL_COLOR, 0, regs.clear_color);
     }
 
-    if (clear_depth && clear_stencil) {
+    if (use_depth && use_stencil) {
         glClearBufferfi(GL_DEPTH_STENCIL, 0, regs.clear_depth, regs.clear_stencil);
-    } else if (clear_depth) {
+    } else if (use_depth) {
         glClearBufferfv(GL_DEPTH, 0, &regs.clear_depth);
-    } else if (clear_stencil) {
+    } else if (use_stencil) {
         glClearBufferiv(GL_STENCIL, 0, &regs.clear_stencil);
     }
 }
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 1f6ce4b81..ef34d3f54 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -108,6 +108,9 @@ private:
         OpenGLState& current_state, bool using_color_fb = true, bool using_depth_fb = true,
         bool preserve_contents = true, std::optional<std::size_t> single_color_target = {});
 
+    void ConfigureClearFramebuffer(OpenGLState& current_state, bool using_color_fb,
+                                   bool using_depth_fb, bool using_stencil_fb);
+
     /// Configures the current constbuffers to use for the draw command.
     void SetupDrawConstBuffers(Tegra::Engines::Maxwell3D::Regs::ShaderStage stage,
                                const Shader& shader);
@@ -227,6 +230,8 @@ private:
     enum class AccelDraw { Disabled, Arrays, Indexed };
     AccelDraw accelerate_draw = AccelDraw::Disabled;
 
+    OGLFramebuffer clear_framebuffer;
+
     using CachedPageMap = boost::icl::interval_map<u64, int>;
     CachedPageMap cached_pages;
 };

From 0ff4a5fa3918c5a1aa9d973061354bf21d7205d8 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Sun, 14 Jul 2019 11:56:47 -0400
Subject: [PATCH 05/10] Maxwell3D: Correct marking dirtiness on CB upload

---
 src/video_core/engines/maxwell_3d.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index d499aaa8d..0c4e72dfe 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -568,6 +568,7 @@ void Maxwell3D::FinishCBData() {
 
     const u32 id = cb_data_state.id;
     memory_manager.WriteBlock(address, cb_data_state.buff[id].data(), size);
+    dirty.ResetVertexArrays();
 
     cb_data_state.id = null_cb_data;
     cb_data_state.current = null_cb_data;

From 8cdbfe69b1211431536414e375f0fd49222d9a29 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Sun, 14 Jul 2019 15:00:37 -0400
Subject: [PATCH 06/10] GL_Rasterizer: Corrections to Clearing.

---
 src/video_core/engines/maxwell_3d.cpp         |  2 +-
 .../renderer_opengl/gl_rasterizer.cpp         | 12 +++++------
 src/video_core/renderer_opengl/gl_state.cpp   | 20 +++++++++++++++++++
 src/video_core/renderer_opengl/gl_state.h     |  6 +-----
 4 files changed, 28 insertions(+), 12 deletions(-)

diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 0c4e72dfe..97422e700 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -568,7 +568,7 @@ void Maxwell3D::FinishCBData() {
 
     const u32 id = cb_data_state.id;
     memory_manager.WriteBlock(address, cb_data_state.buff[id].data(), size);
-    dirty.ResetVertexArrays();
+    dirty.OnMemoryWrite();
 
     cb_data_state.id = null_cb_data;
     cb_data_state.current = null_cb_data;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 4aa3d6548..77195ad93 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -604,7 +604,8 @@ void RasterizerOpenGL::Clear() {
         prev_state.Apply();
     });
 
-    OpenGLState clear_state;
+    OpenGLState clear_state{OpenGLState::GetCurState()};
+    clear_state.DefaultViewports();
     if (regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B ||
         regs.clear_buffers.A) {
         use_color = true;
@@ -624,6 +625,7 @@ void RasterizerOpenGL::Clear() {
         // true.
         clear_state.depth.test_enabled = true;
         clear_state.depth.test_func = GL_ALWAYS;
+        clear_state.depth.write_mask = GL_TRUE;
     }
     if (regs.clear_buffers.S) {
         ASSERT_MSG(regs.zeta_enable != 0, "Tried to clear stencil but buffer is not enabled!");
@@ -661,6 +663,7 @@ void RasterizerOpenGL::Clear() {
     }
 
     ConfigureClearFramebuffer(clear_state, use_color, use_depth, use_stencil);
+
     SyncViewport(clear_state);
     if (regs.clear_flags.scissor) {
         SyncScissorTest(clear_state);
@@ -670,11 +673,8 @@ void RasterizerOpenGL::Clear() {
         clear_state.EmulateViewportWithScissor();
     }
 
-    clear_state.ApplyColorMask();
-    clear_state.ApplyDepth();
-    clear_state.ApplyStencilTest();
-    clear_state.ApplyViewport();
-    clear_state.ApplyFramebufferState();
+    clear_state.AllDirty();
+    clear_state.Apply();
 
     if (use_color) {
         glClearBufferfv(GL_COLOR, 0, regs.clear_color);
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index cac03dc31..8d62045b0 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -165,6 +165,26 @@ OpenGLState::OpenGLState() {
     alpha_test.ref = 0.0f;
 }
 
+void OpenGLState::DefaultViewports() {
+    for (auto& item : viewports) {
+        item.x = 0;
+        item.y = 0;
+        item.width = 0;
+        item.height = 0;
+        item.depth_range_near = 0.0f;
+        item.depth_range_far = 1.0f;
+        item.scissor.enabled = false;
+        item.scissor.x = 0;
+        item.scissor.y = 0;
+        item.scissor.width = 0;
+        item.scissor.height = 0;
+    }
+
+    depth_clamp.far_plane = false;
+    depth_clamp.near_plane = false;
+
+}
+
 void OpenGLState::ApplyDefaultState() {
     glEnable(GL_BLEND);
     glDisable(GL_FRAMEBUFFER_SRGB);
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h
index 3d0f6747f..2860a2c82 100644
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -195,6 +195,7 @@ public:
         s_rgb_used = false;
     }
 
+    void DefaultViewports();
     /// Apply this state as the current OpenGL state
     void Apply();
 
@@ -245,10 +246,6 @@ public:
         dirty.stencil_state = is_dirty;
     }
 
-    void MarkDirtyViewportState(const bool is_dirty) {
-        dirty.viewport_state = is_dirty;
-    }
-
     void MarkDirtyPolygonOffset(const bool is_dirty) {
         dirty.polygon_offset = is_dirty;
     }
@@ -260,7 +257,6 @@ public:
     void AllDirty() {
         dirty.blend_state = true;
         dirty.stencil_state = true;
-        dirty.viewport_state = true;
         dirty.polygon_offset = true;
         dirty.color_mask = true;
     }

From 7826f0afd934cd24310778dabc5211872def2cd3 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Mon, 15 Jul 2019 08:40:41 -0400
Subject: [PATCH 07/10] Texture_Cache: Rebase Fixes

---
 src/video_core/texture_cache/texture_cache.h | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 8225022a9..a3a3770a7 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -272,15 +272,9 @@ protected:
 
     void ManageRenderTargetUnregister(TSurface& surface) {
         auto& maxwell3d = system.GPU().Maxwell3D();
-<<<<<<< HEAD
         const u32 index = surface->GetRenderTarget();
         if (index == DEPTH_RT) {
-            maxwell3d.dirty_flags.zeta_buffer = true;
-=======
-        u32 index = surface->GetRenderTarget();
-        if (index == 8) {
             maxwell3d.dirty.depth_buffer = true;
->>>>>>> Maxwell3D: Rework the dirty system to be more consistant and scaleable
         } else {
             maxwell3d.dirty.render_target[index] = true;
         }

From 5ad889f6fdb4a2d331e8e80e82fef8b8d582d7f7 Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Mon, 15 Jul 2019 10:24:01 -0400
Subject: [PATCH 08/10] Maxwell3D: Address Feedback

---
 src/video_core/engines/maxwell_3d.cpp           | 17 +++++++----------
 src/video_core/engines/maxwell_3d.h             |  6 +++---
 .../renderer_opengl/gl_rasterizer.cpp           |  2 +-
 src/video_core/renderer_opengl/gl_state.cpp     |  3 +--
 src/video_core/renderer_opengl/gl_state.h       |  2 +-
 5 files changed, 13 insertions(+), 17 deletions(-)

diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 97422e700..87777e265 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -91,14 +91,11 @@ void Maxwell3D::InitializeRegisterDefaults() {
 
 void Maxwell3D::InitDirtySettings() {
     const auto set_block = [this](const u32 start, const u32 range, const u8 position) {
-        const u32 end = start + range;
-        for (std::size_t i = start; i < end; i++) {
-            dirty_pointers[i] = position;
-        }
+        const auto start_itr = dirty_pointers.begin() + start;
+        const auto end_itr = start_itr + range;
+        std::fill(start_itr, end_itr, position);
     };
-    for (std::size_t i = 0; i < DirtyRegs::NUM_REGS; i++) {
-        dirty.regs[i] = true;
-    }
+    dirty.regs.fill(true);
 
     // Init Render Targets
     constexpr u32 registers_per_rt = sizeof(regs.rt[0]) / sizeof(u32);
@@ -308,7 +305,7 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
 
     if (regs.reg_array[method] != method_call.argument) {
         regs.reg_array[method] = method_call.argument;
-        std::size_t dirty_reg = dirty_pointers[method];
+        const std::size_t dirty_reg = dirty_pointers[method];
         if (dirty_reg) {
             dirty.regs[dirty_reg] = true;
             if (dirty_reg >= DIRTY_REGS_POS(vertex_array) &&
@@ -540,7 +537,7 @@ void Maxwell3D::ProcessCBBind(Regs::ShaderStage stage) {
 
 void Maxwell3D::ProcessCBData(u32 value) {
     const u32 id = cb_data_state.id;
-    cb_data_state.buff[id][cb_data_state.counter] = value;
+    cb_data_state.buffer[id][cb_data_state.counter] = value;
     // Increment the current buffer position.
     regs.const_buffer.cb_pos = regs.const_buffer.cb_pos + 4;
     cb_data_state.counter++;
@@ -567,7 +564,7 @@ void Maxwell3D::FinishCBData() {
     const std::size_t size = regs.const_buffer.cb_pos - cb_data_state.start_pos;
 
     const u32 id = cb_data_state.id;
-    memory_manager.WriteBlock(address, cb_data_state.buff[id].data(), size);
+    memory_manager.WriteBlock(address, cb_data_state.buffer[id].data(), size);
     dirty.OnMemoryWrite();
 
     cb_data_state.id = null_cb_data;
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index abc69cc65..6bf8d0603 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -1169,13 +1169,13 @@ public:
         };
 
         void ResetVertexArrays() {
-            std::fill(vertex_array.begin(), vertex_array.end(), true);
+            vertex_array.fill(true);
             vertex_array_buffers = true;
         }
 
         void ResetRenderTargets() {
             depth_buffer = true;
-            std::fill(render_target.begin(), render_target.end(), true);
+            render_target.fill(true);
             render_settings = true;
         }
 
@@ -1244,7 +1244,7 @@ private:
 
     static constexpr u32 null_cb_data = 0xFFFFFFFF;
     struct {
-        std::array<std::array<u32, 0x4000>, 16> buff;
+        std::array<std::array<u32, 0x4000>, 16> buffer;
         u32 current{null_cb_data};
         u32 id{null_cb_data};
         u32 start_pos{};
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 77195ad93..2e974c98a 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -605,7 +605,7 @@ void RasterizerOpenGL::Clear() {
     });
 
     OpenGLState clear_state{OpenGLState::GetCurState()};
-    clear_state.DefaultViewports();
+    clear_state.SetDefaultViewports();
     if (regs.clear_buffers.R || regs.clear_buffers.G || regs.clear_buffers.B ||
         regs.clear_buffers.A) {
         use_color = true;
diff --git a/src/video_core/renderer_opengl/gl_state.cpp b/src/video_core/renderer_opengl/gl_state.cpp
index 8d62045b0..f4777d0b0 100644
--- a/src/video_core/renderer_opengl/gl_state.cpp
+++ b/src/video_core/renderer_opengl/gl_state.cpp
@@ -165,7 +165,7 @@ OpenGLState::OpenGLState() {
     alpha_test.ref = 0.0f;
 }
 
-void OpenGLState::DefaultViewports() {
+void OpenGLState::SetDefaultViewports() {
     for (auto& item : viewports) {
         item.x = 0;
         item.y = 0;
@@ -182,7 +182,6 @@ void OpenGLState::DefaultViewports() {
 
     depth_clamp.far_plane = false;
     depth_clamp.near_plane = false;
-
 }
 
 void OpenGLState::ApplyDefaultState() {
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h
index 2860a2c82..6a85d15b1 100644
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -195,7 +195,7 @@ public:
         s_rgb_used = false;
     }
 
-    void DefaultViewports();
+    void SetDefaultViewports();
     /// Apply this state as the current OpenGL state
     void Apply();
 

From 4be61013a1e26414b37abb35e82b48600c05628b Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Wed, 17 Jul 2019 17:00:06 -0400
Subject: [PATCH 09/10] GL_State: Feedback and fixes

---
 src/video_core/engines/maxwell_3d.h              |  8 +++++++-
 src/video_core/renderer_opengl/gl_rasterizer.cpp | 10 +++++-----
 src/video_core/renderer_opengl/gl_state.h        | 16 ++++++++--------
 .../renderer_opengl/renderer_opengl.cpp          |  7 +++++++
 4 files changed, 27 insertions(+), 14 deletions(-)

diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 6bf8d0603..42feb0345 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -1129,24 +1129,30 @@ public:
         union {
             struct {
                 bool null_dirty;
+
                 // Vertex Attributes
                 bool vertex_attrib_format;
+
                 // Vertex Arrays
                 std::array<bool, 32> vertex_array;
 
                 bool vertex_array_buffers;
+
                 // Vertex Instances
                 std::array<bool, 32> vertex_instance;
 
                 bool vertex_instances;
+
                 // Render Targets
                 std::array<bool, 8> render_target;
                 bool depth_buffer;
 
                 bool render_settings;
+
                 // Shaders
                 bool shaders;
-                // State
+
+                // Rasterizer State
                 bool viewport;
                 bool clip_coefficient;
                 bool cull_mode;
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index 2e974c98a..d1ae8a7c5 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -1059,7 +1059,7 @@ void RasterizerOpenGL::SyncStencilTestState() {
         state.stencil.back.action_depth_fail = GL_KEEP;
         state.stencil.back.action_depth_pass = GL_KEEP;
     }
-    state.MarkDirtyStencilState(true);
+    state.MarkDirtyStencilState();
     maxwell3d.dirty.stencil_test = false;
 }
 
@@ -1081,7 +1081,7 @@ void RasterizerOpenGL::SyncColorMask() {
         dest.alpha_enabled = (source.A == 0) ? GL_FALSE : GL_TRUE;
     }
 
-    state.MarkDirtyColorMask(true);
+    state.MarkDirtyColorMask();
     maxwell3d.dirty.color_mask = false;
 }
 
@@ -1125,7 +1125,7 @@ void RasterizerOpenGL::SyncBlendState() {
             state.blend[i].enabled = false;
         }
         maxwell3d.dirty.blend_state = false;
-        state.MarkDirtyBlendState(true);
+        state.MarkDirtyBlendState();
         return;
     }
 
@@ -1143,7 +1143,7 @@ void RasterizerOpenGL::SyncBlendState() {
         blend.dst_a_func = MaxwellToGL::BlendFunc(src.factor_dest_a);
     }
 
-    state.MarkDirtyBlendState(true);
+    state.MarkDirtyBlendState();
     maxwell3d.dirty.blend_state = false;
 }
 
@@ -1209,7 +1209,7 @@ void RasterizerOpenGL::SyncPolygonOffset() {
     state.polygon_offset.factor = regs.polygon_offset_factor;
     state.polygon_offset.clamp = regs.polygon_offset_clamp;
 
-    state.MarkDirtyPolygonOffset(true);
+    state.MarkDirtyPolygonOffset();
     maxwell3d.dirty.polygon_offset = false;
 }
 
diff --git a/src/video_core/renderer_opengl/gl_state.h b/src/video_core/renderer_opengl/gl_state.h
index 6a85d15b1..fdf9a8a12 100644
--- a/src/video_core/renderer_opengl/gl_state.h
+++ b/src/video_core/renderer_opengl/gl_state.h
@@ -238,20 +238,20 @@ public:
     /// Viewport does not affects glClearBuffer so emulate viewport using scissor test
     void EmulateViewportWithScissor();
 
-    void MarkDirtyBlendState(const bool is_dirty) {
-        dirty.blend_state = is_dirty;
+    void MarkDirtyBlendState() {
+        dirty.blend_state = true;
     }
 
-    void MarkDirtyStencilState(const bool is_dirty) {
-        dirty.stencil_state = is_dirty;
+    void MarkDirtyStencilState() {
+        dirty.stencil_state = true;
     }
 
-    void MarkDirtyPolygonOffset(const bool is_dirty) {
-        dirty.polygon_offset = is_dirty;
+    void MarkDirtyPolygonOffset() {
+        dirty.polygon_offset = true;
     }
 
-    void MarkDirtyColorMask(const bool is_dirty) {
-        dirty.color_mask = is_dirty;
+    void MarkDirtyColorMask() {
+        dirty.color_mask = true;
     }
 
     void AllDirty() {
diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp
index 9ecdddb0d..a05cef3b9 100644
--- a/src/video_core/renderer_opengl/renderer_opengl.cpp
+++ b/src/video_core/renderer_opengl/renderer_opengl.cpp
@@ -108,6 +108,7 @@ void RendererOpenGL::SwapBuffers(
 
     // Maintain the rasterizer's state as a priority
     OpenGLState prev_state = OpenGLState::GetCurState();
+    state.AllDirty();
     state.Apply();
 
     if (framebuffer) {
@@ -140,6 +141,7 @@ void RendererOpenGL::SwapBuffers(
     system.GetPerfStats().BeginSystemFrame();
 
     // Restore the rasterizer state
+    prev_state.AllDirty();
     prev_state.Apply();
 }
 
@@ -206,6 +208,7 @@ void RendererOpenGL::InitOpenGLObjects() {
     // Link shaders and get variable locations
     shader.CreateFromSource(vertex_shader, nullptr, fragment_shader);
     state.draw.shader_program = shader.handle;
+    state.AllDirty();
     state.Apply();
     uniform_modelview_matrix = glGetUniformLocation(shader.handle, "modelview_matrix");
     uniform_color_texture = glGetUniformLocation(shader.handle, "color_texture");
@@ -338,12 +341,14 @@ void RendererOpenGL::DrawScreenTriangles(const ScreenInfo& screen_info, float x,
     // Workaround brigthness problems in SMO by enabling sRGB in the final output
     // if it has been used in the frame. Needed because of this bug in QT: QTBUG-50987
     state.framebuffer_srgb.enabled = OpenGLState::GetsRGBUsed();
+    state.AllDirty();
     state.Apply();
     glNamedBufferSubData(vertex_buffer.handle, 0, sizeof(vertices), vertices.data());
     glDrawArrays(GL_TRIANGLE_STRIP, 0, 4);
     // Restore default state
     state.framebuffer_srgb.enabled = false;
     state.texture_units[0].texture = 0;
+    state.AllDirty();
     state.Apply();
     // Clear sRGB state for the next frame
     OpenGLState::ClearsRGBUsed();
@@ -388,6 +393,7 @@ void RendererOpenGL::CaptureScreenshot() {
     GLuint old_read_fb = state.draw.read_framebuffer;
     GLuint old_draw_fb = state.draw.draw_framebuffer;
     state.draw.read_framebuffer = state.draw.draw_framebuffer = screenshot_framebuffer.handle;
+    state.AllDirty();
     state.Apply();
 
     Layout::FramebufferLayout layout{renderer_settings.screenshot_framebuffer_layout};
@@ -407,6 +413,7 @@ void RendererOpenGL::CaptureScreenshot() {
     screenshot_framebuffer.Release();
     state.draw.read_framebuffer = old_read_fb;
     state.draw.draw_framebuffer = old_draw_fb;
+    state.AllDirty();
     state.Apply();
     glDeleteRenderbuffers(1, &renderbuffer);
 

From 7a35178ee2c8ce60c87654ed2d80cc76abb0380b Mon Sep 17 00:00:00 2001
From: Fernando Sahmkow <fsahmkow27@gmail.com>
Date: Wed, 17 Jul 2019 19:37:01 -0400
Subject: [PATCH 10/10] Maxwell3D: Reorganize and address feedback

---
 src/video_core/engines/maxwell_3d.cpp         |  4 ++
 src/video_core/engines/maxwell_3d.h           |  4 +-
 .../renderer_opengl/gl_rasterizer.cpp         | 41 +++++++++++--------
 3 files changed, 31 insertions(+), 18 deletions(-)

diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 87777e265..fe9fc0278 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -70,6 +70,10 @@ void Maxwell3D::InitializeRegisterDefaults() {
     regs.stencil_back_func_mask = 0xFFFFFFFF;
     regs.stencil_back_mask = 0xFFFFFFFF;
 
+    regs.depth_test_func = Regs::ComparisonOp::Always;
+    regs.cull.front_face = Regs::Cull::FrontFace::CounterClockWise;
+    regs.cull.cull_face = Regs::Cull::CullFace::Back;
+
     // TODO(Rodrigo): Most games do not set a point size. I think this is a case of a
     // register carrying a default value. Assume it's OpenGL's default (1).
     regs.point_size = 1.0f;
diff --git a/src/video_core/engines/maxwell_3d.h b/src/video_core/engines/maxwell_3d.h
index 42feb0345..ac300bf76 100644
--- a/src/video_core/engines/maxwell_3d.h
+++ b/src/video_core/engines/maxwell_3d.h
@@ -1246,8 +1246,6 @@ private:
     /// Interpreter for the macro codes uploaded to the GPU.
     MacroInterpreter macro_interpreter;
 
-    Upload::State upload_state;
-
     static constexpr u32 null_cb_data = 0xFFFFFFFF;
     struct {
         std::array<std::array<u32, 0x4000>, 16> buffer;
@@ -1257,6 +1255,8 @@ private:
         u32 counter{};
     } cb_data_state;
 
+    Upload::State upload_state;
+
     /// Retrieves information about a specific TIC entry from the TIC buffer.
     Texture::TICEntry GetTICEntry(u32 tic_index) const;
 
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index d1ae8a7c5..0432a9e10 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -993,37 +993,42 @@ void RasterizerOpenGL::SyncCullMode() {
     const auto& regs = maxwell3d.regs;
 
     state.cull.enabled = regs.cull.enabled != 0;
-    state.cull.front_face = MaxwellToGL::FrontFace(regs.cull.front_face);
-    state.cull.mode = MaxwellToGL::CullFace(regs.cull.cull_face);
+    if (state.cull.enabled) {
+        state.cull.front_face = MaxwellToGL::FrontFace(regs.cull.front_face);
+        state.cull.mode = MaxwellToGL::CullFace(regs.cull.cull_face);
 
-    const bool flip_triangles{regs.screen_y_control.triangle_rast_flip == 0 ||
-                              regs.viewport_transform[0].scale_y < 0.0f};
+        const bool flip_triangles{regs.screen_y_control.triangle_rast_flip == 0 ||
+                                  regs.viewport_transform[0].scale_y < 0.0f};
 
-    // If the GPU is configured to flip the rasterized triangles, then we need to flip the
-    // notion of front and back. Note: We flip the triangles when the value of the register is 0
-    // because OpenGL already does it for us.
-    if (flip_triangles) {
-        if (state.cull.front_face == GL_CCW)
-            state.cull.front_face = GL_CW;
-        else if (state.cull.front_face == GL_CW)
-            state.cull.front_face = GL_CCW;
+        // If the GPU is configured to flip the rasterized triangles, then we need to flip the
+        // notion of front and back. Note: We flip the triangles when the value of the register is 0
+        // because OpenGL already does it for us.
+        if (flip_triangles) {
+            if (state.cull.front_face == GL_CCW)
+                state.cull.front_face = GL_CW;
+            else if (state.cull.front_face == GL_CW)
+                state.cull.front_face = GL_CCW;
+        }
     }
 }
 
 void RasterizerOpenGL::SyncPrimitiveRestart() {
-    auto& maxwell3d = system.GPU().Maxwell3D();
-    const auto& regs = maxwell3d.regs;
+    const auto& regs = system.GPU().Maxwell3D().regs;
 
     state.primitive_restart.enabled = regs.primitive_restart.enabled;
     state.primitive_restart.index = regs.primitive_restart.index;
 }
 
 void RasterizerOpenGL::SyncDepthTestState() {
-    auto& maxwell3d = system.GPU().Maxwell3D();
-    const auto& regs = maxwell3d.regs;
+    const auto& regs = system.GPU().Maxwell3D().regs;
 
     state.depth.test_enabled = regs.depth_test_enable != 0;
     state.depth.write_mask = regs.depth_write_enabled ? GL_TRUE : GL_FALSE;
+
+    if (!state.depth.test_enabled) {
+        return;
+    }
+
     state.depth.test_func = MaxwellToGL::ComparisonOp(regs.depth_test_func);
 }
 
@@ -1035,6 +1040,10 @@ void RasterizerOpenGL::SyncStencilTestState() {
     const auto& regs = maxwell3d.regs;
 
     state.stencil.test_enabled = regs.stencil_enable != 0;
+    if (!regs.stencil_enable) {
+        return;
+    }
+
     state.stencil.front.test_func = MaxwellToGL::ComparisonOp(regs.stencil_front_func_func);
     state.stencil.front.test_ref = regs.stencil_front_func_ref;
     state.stencil.front.test_mask = regs.stencil_front_func_mask;