diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
index 4cdf7f613..8e0f9a9e5 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
+++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.cpp
@@ -8,6 +8,7 @@
 #include "core/core.h"
 #include "core/hle/service/nvdrv/devices/nvhost_gpu.h"
 #include "core/memory.h"
+#include "video_core/command_processor.h"
 #include "video_core/gpu.h"
 #include "video_core/memory_manager.h"
 
@@ -134,17 +135,16 @@ u32 nvhost_gpu::SubmitGPFIFO(const std::vector<u8>& input, std::vector<u8>& outp
     LOG_WARNING(Service_NVDRV, "(STUBBED) called, gpfifo={:X}, num_entries={:X}, flags={:X}",
                 params.address, params.num_entries, params.flags);
 
-    ASSERT_MSG(input.size() ==
-                   sizeof(IoctlSubmitGpfifo) + params.num_entries * sizeof(IoctlGpfifoEntry),
+    ASSERT_MSG(input.size() == sizeof(IoctlSubmitGpfifo) +
+                                   params.num_entries * sizeof(Tegra::CommandListHeader),
                "Incorrect input size");
 
-    std::vector<IoctlGpfifoEntry> entries(params.num_entries);
+    std::vector<Tegra::CommandListHeader> entries(params.num_entries);
     std::memcpy(entries.data(), &input[sizeof(IoctlSubmitGpfifo)],
-                params.num_entries * sizeof(IoctlGpfifoEntry));
-    for (auto entry : entries) {
-        Tegra::GPUVAddr va_addr = entry.Address();
-        Core::System::GetInstance().GPU().ProcessCommandList(va_addr, entry.sz);
-    }
+                params.num_entries * sizeof(Tegra::CommandListHeader));
+
+    Core::System::GetInstance().GPU().ProcessCommandLists(entries);
+
     params.fence_out.id = 0;
     params.fence_out.value = 0;
     std::memcpy(output.data(), &params, sizeof(IoctlSubmitGpfifo));
@@ -160,14 +160,12 @@ u32 nvhost_gpu::KickoffPB(const std::vector<u8>& input, std::vector<u8>& output)
     LOG_WARNING(Service_NVDRV, "(STUBBED) called, gpfifo={:X}, num_entries={:X}, flags={:X}",
                 params.address, params.num_entries, params.flags);
 
-    std::vector<IoctlGpfifoEntry> entries(params.num_entries);
+    std::vector<Tegra::CommandListHeader> entries(params.num_entries);
     Memory::ReadBlock(params.address, entries.data(),
-                      params.num_entries * sizeof(IoctlGpfifoEntry));
+                      params.num_entries * sizeof(Tegra::CommandListHeader));
+
+    Core::System::GetInstance().GPU().ProcessCommandLists(entries);
 
-    for (auto entry : entries) {
-        Tegra::GPUVAddr va_addr = entry.Address();
-        Core::System::GetInstance().GPU().ProcessCommandList(va_addr, entry.sz);
-    }
     params.fence_out.id = 0;
     params.fence_out.value = 0;
     std::memcpy(output.data(), &params, output.size());
diff --git a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h
index 03b7356d0..baaefd79a 100644
--- a/src/core/hle/service/nvdrv/devices/nvhost_gpu.h
+++ b/src/core/hle/service/nvdrv/devices/nvhost_gpu.h
@@ -10,7 +10,6 @@
 #include "common/common_types.h"
 #include "common/swap.h"
 #include "core/hle/service/nvdrv/devices/nvdevice.h"
-#include "video_core/memory_manager.h"
 
 namespace Service::Nvidia::Devices {
 
@@ -151,22 +150,6 @@ private:
     };
     static_assert(sizeof(IoctlAllocObjCtx) == 16, "IoctlAllocObjCtx is incorrect size");
 
-    struct IoctlGpfifoEntry {
-        u32_le entry0; // gpu_va_lo
-        union {
-            u32_le entry1; // gpu_va_hi | (unk_0x02 << 0x08) | (size << 0x0A) | (unk_0x01 << 0x1F)
-            BitField<0, 8, u32_le> gpu_va_hi;
-            BitField<8, 2, u32_le> unk1;
-            BitField<10, 21, u32_le> sz;
-            BitField<31, 1, u32_le> unk2;
-        };
-
-        Tegra::GPUVAddr Address() const {
-            return (static_cast<Tegra::GPUVAddr>(gpu_va_hi) << 32) | entry0;
-        }
-    };
-    static_assert(sizeof(IoctlGpfifoEntry) == 8, "IoctlGpfifoEntry is incorrect size");
-
     struct IoctlSubmitGpfifo {
         u64_le address;     // pointer to gpfifo entry structs
         u32_le num_entries; // number of fence objects being submitted
diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp
index d5831e752..2625ddfdc 100644
--- a/src/video_core/command_processor.cpp
+++ b/src/video_core/command_processor.cpp
@@ -28,98 +28,106 @@ enum class BufferMethods {
     CountBufferMethods = 0x40,
 };
 
-void GPU::WriteReg(u32 method, u32 subchannel, u32 value, u32 remaining_params) {
-    LOG_TRACE(HW_GPU,
-              "Processing method {:08X} on subchannel {} value "
-              "{:08X} remaining params {}",
-              method, subchannel, value, remaining_params);
+MICROPROFILE_DEFINE(ProcessCommandLists, "GPU", "Execute command buffer", MP_RGB(128, 128, 192));
 
-    ASSERT(subchannel < bound_engines.size());
+void GPU::ProcessCommandLists(const std::vector<CommandListHeader>& commands) {
+    MICROPROFILE_SCOPE(ProcessCommandLists);
 
-    if (method == static_cast<u32>(BufferMethods::BindObject)) {
-        // Bind the current subchannel to the desired engine id.
-        LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", subchannel, value);
-        bound_engines[subchannel] = static_cast<EngineID>(value);
-        return;
-    }
+    auto WriteReg = [this](u32 method, u32 subchannel, u32 value, u32 remaining_params) {
+        LOG_TRACE(HW_GPU,
+                  "Processing method {:08X} on subchannel {} value "
+                  "{:08X} remaining params {}",
+                  method, subchannel, value, remaining_params);
 
-    if (method < static_cast<u32>(BufferMethods::CountBufferMethods)) {
-        // TODO(Subv): Research and implement these methods.
-        LOG_ERROR(HW_GPU, "Special buffer methods other than Bind are not implemented");
-        return;
-    }
+        ASSERT(subchannel < bound_engines.size());
 
-    const EngineID engine = bound_engines[subchannel];
-
-    switch (engine) {
-    case EngineID::FERMI_TWOD_A:
-        fermi_2d->WriteReg(method, value);
-        break;
-    case EngineID::MAXWELL_B:
-        maxwell_3d->WriteReg(method, value, remaining_params);
-        break;
-    case EngineID::MAXWELL_COMPUTE_B:
-        maxwell_compute->WriteReg(method, value);
-        break;
-    case EngineID::MAXWELL_DMA_COPY_A:
-        maxwell_dma->WriteReg(method, value);
-        break;
-    default:
-        UNIMPLEMENTED_MSG("Unimplemented engine");
-    }
-}
-
-void GPU::ProcessCommandList(GPUVAddr address, u32 size) {
-    const boost::optional<VAddr> head_address = memory_manager->GpuToCpuAddress(address);
-    VAddr current_addr = *head_address;
-    while (current_addr < *head_address + size * sizeof(CommandHeader)) {
-        const CommandHeader header = {Memory::Read32(current_addr)};
-        current_addr += sizeof(u32);
-
-        switch (header.mode.Value()) {
-        case SubmissionMode::IncreasingOld:
-        case SubmissionMode::Increasing: {
-            // Increase the method value with each argument.
-            for (unsigned i = 0; i < header.arg_count; ++i) {
-                WriteReg(header.method + i, header.subchannel, Memory::Read32(current_addr),
-                         header.arg_count - i - 1);
-                current_addr += sizeof(u32);
-            }
-            break;
+        if (method == static_cast<u32>(BufferMethods::BindObject)) {
+            // Bind the current subchannel to the desired engine id.
+            LOG_DEBUG(HW_GPU, "Binding subchannel {} to engine {}", subchannel, value);
+            bound_engines[subchannel] = static_cast<EngineID>(value);
+            return;
         }
-        case SubmissionMode::NonIncreasingOld:
-        case SubmissionMode::NonIncreasing: {
-            // Use the same method value for all arguments.
-            for (unsigned i = 0; i < header.arg_count; ++i) {
-                WriteReg(header.method, header.subchannel, Memory::Read32(current_addr),
-                         header.arg_count - i - 1);
-                current_addr += sizeof(u32);
-            }
-            break;
-        }
-        case SubmissionMode::IncreaseOnce: {
-            ASSERT(header.arg_count.Value() >= 1);
 
-            // Use the original method for the first argument and then the next method for all other
-            // arguments.
-            WriteReg(header.method, header.subchannel, Memory::Read32(current_addr),
-                     header.arg_count - 1);
+        if (method < static_cast<u32>(BufferMethods::CountBufferMethods)) {
+            // TODO(Subv): Research and implement these methods.
+            LOG_ERROR(HW_GPU, "Special buffer methods other than Bind are not implemented");
+            return;
+        }
+
+        const EngineID engine = bound_engines[subchannel];
+
+        switch (engine) {
+        case EngineID::FERMI_TWOD_A:
+            fermi_2d->WriteReg(method, value);
+            break;
+        case EngineID::MAXWELL_B:
+            maxwell_3d->WriteReg(method, value, remaining_params);
+            break;
+        case EngineID::MAXWELL_COMPUTE_B:
+            maxwell_compute->WriteReg(method, value);
+            break;
+        case EngineID::MAXWELL_DMA_COPY_A:
+            maxwell_dma->WriteReg(method, value);
+            break;
+        default:
+            UNIMPLEMENTED_MSG("Unimplemented engine");
+        }
+    };
+
+    for (auto entry : commands) {
+        Tegra::GPUVAddr address = entry.Address();
+        u32 size = entry.sz;
+        const boost::optional<VAddr> head_address = memory_manager->GpuToCpuAddress(address);
+        VAddr current_addr = *head_address;
+        while (current_addr < *head_address + size * sizeof(CommandHeader)) {
+            const CommandHeader header = {Memory::Read32(current_addr)};
             current_addr += sizeof(u32);
 
-            for (unsigned i = 1; i < header.arg_count; ++i) {
-                WriteReg(header.method + 1, header.subchannel, Memory::Read32(current_addr),
-                         header.arg_count - i - 1);
-                current_addr += sizeof(u32);
+            switch (header.mode.Value()) {
+            case SubmissionMode::IncreasingOld:
+            case SubmissionMode::Increasing: {
+                // Increase the method value with each argument.
+                for (unsigned i = 0; i < header.arg_count; ++i) {
+                    WriteReg(header.method + i, header.subchannel, Memory::Read32(current_addr),
+                             header.arg_count - i - 1);
+                    current_addr += sizeof(u32);
+                }
+                break;
+            }
+            case SubmissionMode::NonIncreasingOld:
+            case SubmissionMode::NonIncreasing: {
+                // Use the same method value for all arguments.
+                for (unsigned i = 0; i < header.arg_count; ++i) {
+                    WriteReg(header.method, header.subchannel, Memory::Read32(current_addr),
+                             header.arg_count - i - 1);
+                    current_addr += sizeof(u32);
+                }
+                break;
+            }
+            case SubmissionMode::IncreaseOnce: {
+                ASSERT(header.arg_count.Value() >= 1);
+
+                // Use the original method for the first argument and then the next method for all
+                // other arguments.
+                WriteReg(header.method, header.subchannel, Memory::Read32(current_addr),
+                         header.arg_count - 1);
+                current_addr += sizeof(u32);
+
+                for (unsigned i = 1; i < header.arg_count; ++i) {
+                    WriteReg(header.method + 1, header.subchannel, Memory::Read32(current_addr),
+                             header.arg_count - i - 1);
+                    current_addr += sizeof(u32);
+                }
+                break;
+            }
+            case SubmissionMode::Inline: {
+                // The register value is stored in the bits 16-28 as an immediate
+                WriteReg(header.method, header.subchannel, header.inline_data, 0);
+                break;
+            }
+            default:
+                UNIMPLEMENTED();
             }
-            break;
-        }
-        case SubmissionMode::Inline: {
-            // The register value is stored in the bits 16-28 as an immediate
-            WriteReg(header.method, header.subchannel, header.inline_data, 0);
-            break;
-        }
-        default:
-            UNIMPLEMENTED();
         }
     }
 }
diff --git a/src/video_core/command_processor.h b/src/video_core/command_processor.h
index a01153e0b..bd766e77a 100644
--- a/src/video_core/command_processor.h
+++ b/src/video_core/command_processor.h
@@ -7,6 +7,7 @@
 #include <type_traits>
 #include "common/bit_field.h"
 #include "common/common_types.h"
+#include "video_core/memory_manager.h"
 
 namespace Tegra {
 
@@ -19,6 +20,22 @@ enum class SubmissionMode : u32 {
     IncreaseOnce = 5
 };
 
+struct CommandListHeader {
+    u32 entry0; // gpu_va_lo
+    union {
+        u32 entry1; // gpu_va_hi | (unk_0x02 << 0x08) | (size << 0x0A) | (unk_0x01 << 0x1F)
+        BitField<0, 8, u32> gpu_va_hi;
+        BitField<8, 2, u32> unk1;
+        BitField<10, 21, u32> sz;
+        BitField<31, 1, u32> unk2;
+    };
+
+    GPUVAddr Address() const {
+        return (static_cast<GPUVAddr>(gpu_va_hi) << 32) | entry0;
+    }
+};
+static_assert(sizeof(CommandListHeader) == 8, "CommandListHeader is incorrect size");
+
 union CommandHeader {
     u32 hex;
 
diff --git a/src/video_core/engines/maxwell_3d.cpp b/src/video_core/engines/maxwell_3d.cpp
index 1308080b5..329079ddd 100644
--- a/src/video_core/engines/maxwell_3d.cpp
+++ b/src/video_core/engines/maxwell_3d.cpp
@@ -135,8 +135,6 @@ void Maxwell3D::WriteReg(u32 method, u32 value, u32 remaining_params) {
         break;
     }
 
-    rasterizer.NotifyMaxwellRegisterChanged(method);
-
     if (debug_context) {
         debug_context->OnEvent(Tegra::DebugContext::Event::MaxwellCommandProcessed, nullptr);
     }
diff --git a/src/video_core/gpu.h b/src/video_core/gpu.h
index d29f31f52..4f71f99d7 100644
--- a/src/video_core/gpu.h
+++ b/src/video_core/gpu.h
@@ -6,6 +6,7 @@
 
 #include <array>
 #include <memory>
+#include <vector>
 #include "common/common_types.h"
 #include "core/hle/service/nvflinger/buffer_queue.h"
 #include "video_core/memory_manager.h"
@@ -67,6 +68,7 @@ u32 RenderTargetBytesPerPixel(RenderTargetFormat format);
 /// Returns the number of bytes per pixel of each depth format.
 u32 DepthFormatBytesPerPixel(DepthFormat format);
 
+struct CommandListHeader;
 class DebugContext;
 
 /**
@@ -115,7 +117,7 @@ public:
     ~GPU();
 
     /// Processes a command list stored at the specified address in GPU memory.
-    void ProcessCommandList(GPUVAddr address, u32 size);
+    void ProcessCommandLists(const std::vector<CommandListHeader>& commands);
 
     /// Returns a reference to the Maxwell3D GPU engine.
     Engines::Maxwell3D& Maxwell3D();
@@ -130,9 +132,6 @@ public:
     const Tegra::MemoryManager& MemoryManager() const;
 
 private:
-    /// Writes a single register in the engine bound to the specified subchannel
-    void WriteReg(u32 method, u32 subchannel, u32 value, u32 remaining_params);
-
     std::unique_ptr<Tegra::MemoryManager> memory_manager;
 
     /// Mapping of command subchannels to their bound engine ids.
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index 9d78e8b6b..cd819d69f 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -20,9 +20,6 @@ public:
     /// Clear the current framebuffer
     virtual void Clear() = 0;
 
-    /// Notify rasterizer that the specified Maxwell register has been changed
-    virtual void NotifyMaxwellRegisterChanged(u32 method) = 0;
-
     /// Notify rasterizer that all caches should be flushed to Switch memory
     virtual void FlushAll() = 0;
 
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index c7e2c877c..fdfca767a 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -527,8 +527,6 @@ void RasterizerOpenGL::DrawArrays() {
     state.Apply();
 }
 
-void RasterizerOpenGL::NotifyMaxwellRegisterChanged(u32 method) {}
-
 void RasterizerOpenGL::FlushAll() {}
 
 void RasterizerOpenGL::FlushRegion(VAddr addr, u64 size) {}
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h
index 3d62cc196..eaf31ae96 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.h
+++ b/src/video_core/renderer_opengl/gl_rasterizer.h
@@ -45,7 +45,6 @@ public:
 
     void DrawArrays() override;
     void Clear() override;
-    void NotifyMaxwellRegisterChanged(u32 method) override;
     void FlushAll() override;
     void FlushRegion(VAddr addr, u64 size) override;
     void InvalidateRegion(VAddr addr, u64 size) override;