From 559024593086d04e24a99a9f77490a3f97cf952d Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Tue, 1 May 2018 22:21:38 -0400
Subject: [PATCH 01/18] core: Move common CPU core things to its own class.

---
 src/core/CMakeLists.txt |  2 ++
 src/core/core.cpp       | 57 ++++++--------------------------
 src/core/core.h         | 16 ++++-----
 src/core/core_cpu.cpp   | 72 +++++++++++++++++++++++++++++++++++++++++
 src/core/core_cpu.h     | 46 ++++++++++++++++++++++++++
 5 files changed, 135 insertions(+), 58 deletions(-)
 create mode 100644 src/core/core_cpu.cpp
 create mode 100644 src/core/core_cpu.h
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index a41e22f4a..821d2f883 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -4,6 +4,8 @@ add_library(core STATIC
     arm/unicorn/arm_unicorn.h
     core.cpp
     core.h
+    core_cpu.cpp
+    core_cpu.h
     core_timing.cpp
     core_timing.h
     file_sys/directory.h
diff --git a/src/core/core.cpp b/src/core/core.cpp
index 9e2229d02..0af78c18c 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -5,10 +5,6 @@
 #include <memory>
 #include <utility>
 #include "common/logging/log.h"
-#ifdef ARCHITECTURE_x86_64
-#include "core/arm/dynarmic/arm_dynarmic.h"
-#endif
-#include "core/arm/unicorn/arm_unicorn.h"
 #include "core/core.h"
 #include "core/core_timing.h"
 #include "core/gdbstub/gdbstub.h"
@@ -33,9 +29,6 @@ System::~System() = default;
 
 System::ResultStatus System::RunLoop(bool tight_loop) {
     status = ResultStatus::Success;
-    if (!cpu_core) {
-        return ResultStatus::ErrorNotInitialized;
-    }
 
     if (GDBStub::IsServerEnabled()) {
         GDBStub::HandlePacket();
@@ -52,24 +45,7 @@ System::ResultStatus System::RunLoop(bool tight_loop) {
         }
     }
 
-    // If we don't have a currently active thread then don't execute instructions,
-    // instead advance to the next event and try to yield to the next thread
-    if (Kernel::GetCurrentThread() == nullptr) {
-        NGLOG_TRACE(Core_ARM, "Idling");
-        CoreTiming::Idle();
-        CoreTiming::Advance();
-        PrepareReschedule();
-    } else {
-        CoreTiming::Advance();
-        if (tight_loop) {
-            cpu_core->Run();
-        } else {
-            cpu_core->Step();
-        }
-    }
-
-    HW::Update();
-    Reschedule();
+    cpu_cores[0]->RunLoop(tight_loop);
 
     return status;
 }
@@ -133,23 +109,13 @@ System::ResultStatus System::Load(EmuWindow* emu_window, const std::string& file
 }
 
 void System::PrepareReschedule() {
-    cpu_core->PrepareReschedule();
-    reschedule_pending = true;
+    cpu_cores[0]->PrepareReschedule();
 }
 
 PerfStats::Results System::GetAndResetPerfStats() {
     return perf_stats.GetAndResetStats(CoreTiming::GetGlobalTimeUs());
 }
 
-void System::Reschedule() {
-    if (!reschedule_pending) {
-        return;
-    }
-
-    reschedule_pending = false;
-    Core::System::GetInstance().Scheduler().Reschedule();
-}
-
 System::ResultStatus System::Init(EmuWindow* emu_window, u32 system_mode) {
     NGLOG_DEBUG(HW_Memory, "initialized OK");
 
@@ -157,15 +123,8 @@ System::ResultStatus System::Init(EmuWindow* emu_window, u32 system_mode) {
 
     current_process = Kernel::Process::Create("main");
 
-    if (Settings::values.use_cpu_jit) {
-#ifdef ARCHITECTURE_x86_64
-        cpu_core = std::make_shared<ARM_Dynarmic>();
-#else
-        cpu_core = std::make_shared<ARM_Unicorn>();
-        NGLOG_WARNING(Core, "CPU JIT requested, but Dynarmic not available");
-#endif
-    } else {
-        cpu_core = std::make_shared<ARM_Unicorn>();
+    for (auto& cpu_core : cpu_cores) {
+        cpu_core = std::make_unique<Cpu>();
     }
 
     gpu_core = std::make_unique<Tegra::GPU>();
@@ -176,7 +135,6 @@ System::ResultStatus System::Init(EmuWindow* emu_window, u32 system_mode) {
 
     HW::Init();
     Kernel::Init(system_mode);
-    scheduler = std::make_unique<Kernel::Scheduler>(cpu_core.get());
     Service::Init(service_manager);
     GDBStub::Init();
 
@@ -207,13 +165,16 @@ void System::Shutdown() {
     VideoCore::Shutdown();
     GDBStub::Shutdown();
     Service::Shutdown();
-    scheduler.reset();
     Kernel::Shutdown();
     HW::Shutdown();
     service_manager.reset();
     telemetry_session.reset();
     gpu_core.reset();
-    cpu_core.reset();
+
+    for (auto& cpu_core : cpu_cores) {
+        cpu_core.reset();
+    }
+
     CoreTiming::Shutdown();
 
     app_loader.reset();
diff --git a/src/core/core.h b/src/core/core.h
index f81cbfb3c..6e6cc7579 100644
--- a/src/core/core.h
+++ b/src/core/core.h
@@ -4,9 +4,11 @@
 
 #pragma once
 
+#include <array>
 #include <memory>
 #include <string>
 #include "common/common_types.h"
+#include "core/core_cpu.h"
 #include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/scheduler.h"
 #include "core/loader/loader.h"
@@ -89,7 +91,7 @@ public:
      * @returns True if the emulated system is powered on, otherwise false.
      */
     bool IsPoweredOn() const {
-        return cpu_core != nullptr;
+        return cpu_cores[0] != nullptr;
     }
 
     /**
@@ -110,7 +112,7 @@ public:
      * @returns A reference to the emulated CPU.
      */
     ARM_Interface& CPU() {
-        return *cpu_core;
+        return cpu_cores[0]->CPU();
     }
 
     Tegra::GPU& GPU() {
@@ -118,7 +120,7 @@ public:
     }
 
     Kernel::Scheduler& Scheduler() {
-        return *scheduler;
+        return cpu_cores[0]->Scheduler();
     }
 
     Kernel::SharedPtr<Kernel::Process>& CurrentProcess() {
@@ -163,18 +165,12 @@ private:
      */
     ResultStatus Init(EmuWindow* emu_window, u32 system_mode);
 
-    /// Reschedule the core emulation
-    void Reschedule();
-
     /// AppLoader used to load the current executing application
     std::unique_ptr<Loader::AppLoader> app_loader;
 
-    std::shared_ptr<ARM_Interface> cpu_core;
-    std::unique_ptr<Kernel::Scheduler> scheduler;
+    std::array<std::unique_ptr<Cpu>, 4> cpu_cores;
     std::unique_ptr<Tegra::GPU> gpu_core;
-
     std::shared_ptr<Tegra::DebugContext> debug_context;
-
     Kernel::SharedPtr<Kernel::Process> current_process;
 
     /// When true, signals that a reschedule should happen
diff --git a/src/core/core_cpu.cpp b/src/core/core_cpu.cpp
new file mode 100644
index 000000000..81c0e212d
--- /dev/null
+++ b/src/core/core_cpu.cpp
@@ -0,0 +1,72 @@
+// Copyright 2018 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#include "common/logging/log.h"
+#ifdef ARCHITECTURE_x86_64
+#include "core/arm/dynarmic/arm_dynarmic.h"
+#endif
+#include "core/arm/unicorn/arm_unicorn.h"
+#include "core/core_cpu.h"
+#include "core/core_timing.h"
+#include "core/hle/kernel/kernel.h"
+#include "core/hle/kernel/scheduler.h"
+#include "core/hle/kernel/thread.h"
+#include "core/settings.h"
+
+namespace Core {
+
+Cpu::Cpu() {
+    if (Settings::values.use_cpu_jit) {
+#ifdef ARCHITECTURE_x86_64
+        arm_interface = std::make_shared<ARM_Dynarmic>();
+#else
+        cpu_core = std::make_shared<ARM_Unicorn>();
+        NGLOG_WARNING(Core, "CPU JIT requested, but Dynarmic not available");
+#endif
+    } else {
+        arm_interface = std::make_shared<ARM_Unicorn>();
+    }
+
+    scheduler = std::make_unique<Kernel::Scheduler>(arm_interface.get());
+}
+
+void Cpu::RunLoop(bool tight_loop) {
+    // If we don't have a currently active thread then don't execute instructions,
+    // instead advance to the next event and try to yield to the next thread
+    if (Kernel::GetCurrentThread() == nullptr) {
+        NGLOG_TRACE(Core, "Idling");
+        CoreTiming::Idle();
+        CoreTiming::Advance();
+        PrepareReschedule();
+    } else {
+        CoreTiming::Advance();
+        if (tight_loop) {
+            arm_interface->Run();
+        } else {
+            arm_interface->Step();
+        }
+    }
+
+    Reschedule();
+}
+
+void Cpu::SingleStep() {
+    return RunLoop(false);
+}
+
+void Cpu::PrepareReschedule() {
+    arm_interface->PrepareReschedule();
+    reschedule_pending = true;
+}
+
+void Cpu::Reschedule() {
+    if (!reschedule_pending) {
+        return;
+    }
+
+    reschedule_pending = false;
+    scheduler->Reschedule();
+}
+
+} // namespace Core
diff --git a/src/core/core_cpu.h b/src/core/core_cpu.h
new file mode 100644
index 000000000..312db1655
--- /dev/null
+++ b/src/core/core_cpu.h
@@ -0,0 +1,46 @@
+// Copyright 2018 yuzu emulator team
+// Licensed under GPLv2 or any later version
+// Refer to the license.txt file included.
+
+#pragma once
+
+#include <memory>
+#include <string>
+#include "common/common_types.h"
+
+class ARM_Interface;
+
+namespace Kernel {
+class Scheduler;
+}
+
+namespace Core {
+
+class Cpu {
+public:
+    Cpu();
+
+    void RunLoop(bool tight_loop = true);
+
+    void SingleStep();
+
+    void PrepareReschedule();
+
+    ARM_Interface& CPU() {
+        return *arm_interface;
+    }
+
+    Kernel::Scheduler& Scheduler() {
+        return *scheduler;
+    }
+
+private:
+    void Reschedule();
+
+    std::shared_ptr<ARM_Interface> arm_interface;
+    std::unique_ptr<Kernel::Scheduler> scheduler;
+
+    bool reschedule_pending{};
+};
+
+} // namespace Core

From 9776ff91797423a9cf19571faafe4648fb5a1d1d Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Wed, 2 May 2018 21:26:14 -0400
Subject: [PATCH 02/18] core: Create a thread for each CPU core, keep in
 lock-step with a barrier.

---
 src/core/core.cpp     | 30 +++++++++++++++++++++++++-----
 src/core/core.h       | 24 +++++++++++++++++-------
 src/core/core_cpu.cpp | 25 ++++++++++++++++++++-----
 src/core/core_cpu.h   | 33 ++++++++++++++++++++++++++++++++-
 4 files changed, 94 insertions(+), 18 deletions(-)

diff --git a/src/core/core.cpp b/src/core/core.cpp
index 0af78c18c..066423f23 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -27,6 +27,13 @@ namespace Core {
 
 System::~System() = default;
 
+/// Runs a CPU core while the system is powered on
+static void RunCpuCore(std::shared_ptr<Cpu> cpu_state) {
+    while (Core::System().GetInstance().IsPoweredOn()) {
+        cpu_state->RunLoop(true);
+    }
+}
+
 System::ResultStatus System::RunLoop(bool tight_loop) {
     status = ResultStatus::Success;
 
@@ -109,7 +116,7 @@ System::ResultStatus System::Load(EmuWindow* emu_window, const std::string& file
 }
 
 void System::PrepareReschedule() {
-    cpu_cores[0]->PrepareReschedule();
+    CurrentCpuCore().PrepareReschedule();
 }
 
 PerfStats::Results System::GetAndResetPerfStats() {
@@ -123,14 +130,13 @@ System::ResultStatus System::Init(EmuWindow* emu_window, u32 system_mode) {
 
     current_process = Kernel::Process::Create("main");
 
-    for (auto& cpu_core : cpu_cores) {
-        cpu_core = std::make_unique<Cpu>();
+    cpu_barrier = std::make_shared<CpuBarrier>();
+    for (size_t index = 0; index < cpu_cores.size(); ++index) {
+        cpu_cores[index] = std::make_shared<Cpu>(cpu_barrier, index);
     }
 
     gpu_core = std::make_unique<Tegra::GPU>();
-
     telemetry_session = std::make_unique<Core::TelemetrySession>();
-
     service_manager = std::make_shared<Service::SM::ServiceManager>();
 
     HW::Init();
@@ -142,6 +148,14 @@ System::ResultStatus System::Init(EmuWindow* emu_window, u32 system_mode) {
         return ResultStatus::ErrorVideoCore;
     }
 
+    // Create threads for CPU cores 1-3, and build thread_to_cpu map
+    // CPU core 0 is run on the main thread
+    thread_to_cpu[std::this_thread::get_id()] = cpu_cores[0];
+    for (size_t index = 0; index < cpu_core_threads.size(); ++index) {
+        cpu_core_threads[index] = std::make_unique<std::thread>(RunCpuCore, cpu_cores[index + 1]);
+        thread_to_cpu[cpu_core_threads[index]->get_id()] = cpu_cores[index + 1];
+    }
+
     NGLOG_DEBUG(Core, "Initialized OK");
 
     // Reset counters and set time origin to current frame
@@ -171,9 +185,15 @@ void System::Shutdown() {
     telemetry_session.reset();
     gpu_core.reset();
 
+    // Close all CPU/threading state
+    thread_to_cpu.clear();
     for (auto& cpu_core : cpu_cores) {
         cpu_core.reset();
     }
+    for (auto& thread : cpu_core_threads) {
+        thread->join();
+        thread.reset();
+    }
 
     CoreTiming::Shutdown();
 
diff --git a/src/core/core.h b/src/core/core.h
index 6e6cc7579..21a0b074b 100644
--- a/src/core/core.h
+++ b/src/core/core.h
@@ -7,6 +7,7 @@
 #include <array>
 #include <memory>
 #include <string>
+#include <thread>
 #include "common/common_types.h"
 #include "core/core_cpu.h"
 #include "core/hle/kernel/kernel.h"
@@ -112,7 +113,7 @@ public:
      * @returns A reference to the emulated CPU.
      */
     ARM_Interface& CPU() {
-        return cpu_cores[0]->CPU();
+        return CurrentCpuCore().CPU();
     }
 
     Tegra::GPU& GPU() {
@@ -120,7 +121,7 @@ public:
     }
 
     Kernel::Scheduler& Scheduler() {
-        return cpu_cores[0]->Scheduler();
+        return CurrentCpuCore().Scheduler();
     }
 
     Kernel::SharedPtr<Kernel::Process>& CurrentProcess() {
@@ -157,6 +158,14 @@ public:
     }
 
 private:
+    /// Returns the current CPU core based on the calling host thread
+    Cpu& CurrentCpuCore() {
+        const auto& search = thread_to_cpu.find(std::this_thread::get_id());
+        ASSERT(search != thread_to_cpu.end());
+        ASSERT(search->second);
+        return *search->second;
+    }
+
     /**
      * Initialize the emulated system.
      * @param emu_window Pointer to the host-system window used for video output and keyboard input.
@@ -167,14 +176,12 @@ private:
 
     /// AppLoader used to load the current executing application
     std::unique_ptr<Loader::AppLoader> app_loader;
-
-    std::array<std::unique_ptr<Cpu>, 4> cpu_cores;
     std::unique_ptr<Tegra::GPU> gpu_core;
     std::shared_ptr<Tegra::DebugContext> debug_context;
     Kernel::SharedPtr<Kernel::Process> current_process;
-
-    /// When true, signals that a reschedule should happen
-    bool reschedule_pending{};
+    std::shared_ptr<CpuBarrier> cpu_barrier;
+    std::array<std::shared_ptr<Cpu>, NUM_CPU_CORES> cpu_cores;
+    std::array<std::unique_ptr<std::thread>, NUM_CPU_CORES - 1> cpu_core_threads;
 
     /// Service manager
     std::shared_ptr<Service::SM::ServiceManager> service_manager;
@@ -186,6 +193,9 @@ private:
 
     ResultStatus status = ResultStatus::Success;
     std::string status_details = "";
+
+    /// Map of guest threads to CPU cores
+    std::map<std::thread::id, std::shared_ptr<Cpu>> thread_to_cpu;
 };
 
 inline ARM_Interface& CPU() {
diff --git a/src/core/core_cpu.cpp b/src/core/core_cpu.cpp
index 81c0e212d..6bdfdd7df 100644
--- a/src/core/core_cpu.cpp
+++ b/src/core/core_cpu.cpp
@@ -2,6 +2,9 @@
 // Licensed under GPLv2 or any later version
 // Refer to the license.txt file included.
 
+#include <condition_variable>
+#include <mutex>
+
 #include "common/logging/log.h"
 #ifdef ARCHITECTURE_x86_64
 #include "core/arm/dynarmic/arm_dynarmic.h"
@@ -16,7 +19,9 @@
 
 namespace Core {
 
-Cpu::Cpu() {
+Cpu::Cpu(std::shared_ptr<CpuBarrier> cpu_barrier, size_t core_index)
+    : cpu_barrier{std::move(cpu_barrier)}, core_index{core_index} {
+
     if (Settings::values.use_cpu_jit) {
 #ifdef ARCHITECTURE_x86_64
         arm_interface = std::make_shared<ARM_Dynarmic>();
@@ -32,15 +37,25 @@ Cpu::Cpu() {
 }
 
 void Cpu::RunLoop(bool tight_loop) {
+    // Wait for all other CPU cores to complete the previous slice, such that they run in lock-step
+    cpu_barrier->Rendezvous();
+
     // If we don't have a currently active thread then don't execute instructions,
     // instead advance to the next event and try to yield to the next thread
     if (Kernel::GetCurrentThread() == nullptr) {
-        NGLOG_TRACE(Core, "Idling");
-        CoreTiming::Idle();
-        CoreTiming::Advance();
+        NGLOG_TRACE(Core, "Core-{} idling", core_index);
+
+        if (IsMainCore()) {
+            CoreTiming::Idle();
+            CoreTiming::Advance();
+        }
+
         PrepareReschedule();
     } else {
-        CoreTiming::Advance();
+        if (IsMainCore()) {
+            CoreTiming::Advance();
+        }
+
         if (tight_loop) {
             arm_interface->Run();
         } else {
diff --git a/src/core/core_cpu.h b/src/core/core_cpu.h
index 312db1655..e6ed698cc 100644
--- a/src/core/core_cpu.h
+++ b/src/core/core_cpu.h
@@ -4,7 +4,9 @@
 
 #pragma once
 
+#include <condition_variable>
 #include <memory>
+#include <mutex>
 #include <string>
 #include "common/common_types.h"
 
@@ -16,9 +18,32 @@ class Scheduler;
 
 namespace Core {
 
+constexpr unsigned NUM_CPU_CORES{4};
+
+class CpuBarrier {
+public:
+    void Rendezvous() {
+        std::unique_lock<std::mutex> lock(mutex);
+
+        --cores_waiting;
+        if (!cores_waiting) {
+            cores_waiting = NUM_CPU_CORES;
+            condition.notify_all();
+            return;
+        }
+
+        condition.wait(lock);
+    }
+
+private:
+    unsigned cores_waiting{NUM_CPU_CORES};
+    std::mutex mutex;
+    std::condition_variable condition;
+};
+
 class Cpu {
 public:
-    Cpu();
+    Cpu(std::shared_ptr<CpuBarrier> cpu_barrier, size_t core_index);
 
     void RunLoop(bool tight_loop = true);
 
@@ -34,13 +59,19 @@ public:
         return *scheduler;
     }
 
+    bool IsMainCore() const {
+        return core_index == 0;
+    }
+
 private:
     void Reschedule();
 
     std::shared_ptr<ARM_Interface> arm_interface;
+    std::shared_ptr<CpuBarrier> cpu_barrier;
     std::unique_ptr<Kernel::Scheduler> scheduler;
 
     bool reschedule_pending{};
+    size_t core_index;
 };
 
 } // namespace Core

From a434fdcb102e96ddf564dc0973d7073d49bf19fc Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Wed, 2 May 2018 22:36:51 -0400
Subject: [PATCH 03/18] core: Implement multicore support.

---
 src/core/arm/unicorn/arm_unicorn.cpp |  2 +-
 src/core/core.h                      | 26 ++++++++++-------
 src/core/core_cpu.cpp                |  2 +-
 src/core/core_cpu.h                  | 12 +++++---
 src/core/gdbstub/gdbstub.cpp         | 24 ++++++++--------
 src/core/hle/kernel/svc.cpp          | 43 ++++++++++++++--------------
 src/core/hle/kernel/svc_wrap.h       | 24 ++++++++--------
 src/core/hle/kernel/thread.cpp       | 16 +++++------
 src/core/hle/kernel/thread.h         |  4 +++
 src/core/hle/kernel/vm_manager.cpp   | 23 ++++++++++++---
 src/core/memory.cpp                  |  9 ++++--
 src/yuzu/debugger/registers.cpp      |  4 +--
 src/yuzu/debugger/wait_tree.cpp      |  2 +-
 13 files changed, 113 insertions(+), 78 deletions(-)

diff --git a/src/core/arm/unicorn/arm_unicorn.cpp b/src/core/arm/unicorn/arm_unicorn.cpp
index 574922130..c0cc62f03 100644
--- a/src/core/arm/unicorn/arm_unicorn.cpp
+++ b/src/core/arm/unicorn/arm_unicorn.cpp
@@ -52,7 +52,7 @@ static void InterruptHook(uc_engine* uc, u32 intNo, void* user_data) {
 static bool UnmappedMemoryHook(uc_engine* uc, uc_mem_type type, u64 addr, int size, u64 value,
                                void* user_data) {
     ARM_Interface::ThreadContext ctx{};
-    Core::CPU().SaveContext(ctx);
+    Core::CurrentArmInterface().SaveContext(ctx);
     ASSERT_MSG(false, "Attempted to read from unmapped memory: 0x{:X}, pc=0x{:X}, lr=0x{:X}", addr,
                ctx.pc, ctx.cpu_registers[30]);
     return {};
diff --git a/src/core/core.h b/src/core/core.h
index 21a0b074b..3e0a7e6a7 100644
--- a/src/core/core.h
+++ b/src/core/core.h
@@ -108,20 +108,26 @@ public:
 
     PerfStats::Results GetAndResetPerfStats();
 
-    /**
-     * Gets a reference to the emulated CPU.
-     * @returns A reference to the emulated CPU.
-     */
-    ARM_Interface& CPU() {
-        return CurrentCpuCore().CPU();
+    ARM_Interface& CurrentArmInterface() {
+        return CurrentCpuCore().ArmInterface();
+    }
+
+    ARM_Interface& ArmInterface(size_t core_index) {
+        ASSERT(core_index < NUM_CPU_CORES);
+        return cpu_cores[core_index]->ArmInterface();
     }
 
     Tegra::GPU& GPU() {
         return *gpu_core;
     }
 
-    Kernel::Scheduler& Scheduler() {
-        return CurrentCpuCore().Scheduler();
+    Kernel::Scheduler& CurrentScheduler() {
+        return *CurrentCpuCore().Scheduler();
+    }
+
+    const std::shared_ptr<Kernel::Scheduler>& Scheduler(size_t core_index) {
+        ASSERT(core_index < NUM_CPU_CORES);
+        return cpu_cores[core_index]->Scheduler();
     }
 
     Kernel::SharedPtr<Kernel::Process>& CurrentProcess() {
@@ -198,8 +204,8 @@ private:
     std::map<std::thread::id, std::shared_ptr<Cpu>> thread_to_cpu;
 };
 
-inline ARM_Interface& CPU() {
-    return System::GetInstance().CPU();
+inline ARM_Interface& CurrentArmInterface() {
+    return System::GetInstance().CurrentArmInterface();
 }
 
 inline TelemetrySession& Telemetry() {
diff --git a/src/core/core_cpu.cpp b/src/core/core_cpu.cpp
index 6bdfdd7df..a556f12e9 100644
--- a/src/core/core_cpu.cpp
+++ b/src/core/core_cpu.cpp
@@ -33,7 +33,7 @@ Cpu::Cpu(std::shared_ptr<CpuBarrier> cpu_barrier, size_t core_index)
         arm_interface = std::make_shared<ARM_Unicorn>();
     }
 
-    scheduler = std::make_unique<Kernel::Scheduler>(arm_interface.get());
+    scheduler = std::make_shared<Kernel::Scheduler>(arm_interface.get());
 }
 
 void Cpu::RunLoop(bool tight_loop) {
diff --git a/src/core/core_cpu.h b/src/core/core_cpu.h
index e6ed698cc..06784c4ab 100644
--- a/src/core/core_cpu.h
+++ b/src/core/core_cpu.h
@@ -51,12 +51,16 @@ public:
 
     void PrepareReschedule();
 
-    ARM_Interface& CPU() {
+    ARM_Interface& ArmInterface() {
         return *arm_interface;
     }
 
-    Kernel::Scheduler& Scheduler() {
-        return *scheduler;
+    const ARM_Interface& ArmInterface() const {
+        return *arm_interface;
+    }
+
+    const std::shared_ptr<Kernel::Scheduler>& Scheduler() const {
+        return scheduler;
     }
 
     bool IsMainCore() const {
@@ -68,7 +72,7 @@ private:
 
     std::shared_ptr<ARM_Interface> arm_interface;
     std::shared_ptr<CpuBarrier> cpu_barrier;
-    std::unique_ptr<Kernel::Scheduler> scheduler;
+    std::shared_ptr<Kernel::Scheduler> scheduler;
 
     bool reschedule_pending{};
     size_t core_index;
diff --git a/src/core/gdbstub/gdbstub.cpp b/src/core/gdbstub/gdbstub.cpp
index 46606b992..6c5a40ba8 100644
--- a/src/core/gdbstub/gdbstub.cpp
+++ b/src/core/gdbstub/gdbstub.cpp
@@ -598,11 +598,11 @@ static void ReadRegister() {
     }
 
     if (id <= SP_REGISTER) {
-        LongToGdbHex(reply, Core::CPU().GetReg(static_cast<int>(id)));
+        LongToGdbHex(reply, Core::CurrentArmInterface().GetReg(static_cast<int>(id)));
     } else if (id == PC_REGISTER) {
-        LongToGdbHex(reply, Core::CPU().GetPC());
+        LongToGdbHex(reply, Core::CurrentArmInterface().GetPC());
     } else if (id == CPSR_REGISTER) {
-        IntToGdbHex(reply, Core::CPU().GetCPSR());
+        IntToGdbHex(reply, Core::CurrentArmInterface().GetCPSR());
     } else {
         return SendReply("E01");
     }
@@ -618,16 +618,16 @@ static void ReadRegisters() {
     u8* bufptr = buffer;
 
     for (int reg = 0; reg <= SP_REGISTER; reg++) {
-        LongToGdbHex(bufptr + reg * 16, Core::CPU().GetReg(reg));
+        LongToGdbHex(bufptr + reg * 16, Core::CurrentArmInterface().GetReg(reg));
     }
 
     bufptr += (32 * 16);
 
-    LongToGdbHex(bufptr, Core::CPU().GetPC());
+    LongToGdbHex(bufptr, Core::CurrentArmInterface().GetPC());
 
     bufptr += 16;
 
-    IntToGdbHex(bufptr, Core::CPU().GetCPSR());
+    IntToGdbHex(bufptr, Core::CurrentArmInterface().GetCPSR());
 
     bufptr += 8;
 
@@ -646,11 +646,11 @@ static void WriteRegister() {
     }
 
     if (id <= SP_REGISTER) {
-        Core::CPU().SetReg(id, GdbHexToLong(buffer_ptr));
+        Core::CurrentArmInterface().SetReg(id, GdbHexToLong(buffer_ptr));
     } else if (id == PC_REGISTER) {
-        Core::CPU().SetPC(GdbHexToLong(buffer_ptr));
+        Core::CurrentArmInterface().SetPC(GdbHexToLong(buffer_ptr));
     } else if (id == CPSR_REGISTER) {
-        Core::CPU().SetCPSR(GdbHexToInt(buffer_ptr));
+        Core::CurrentArmInterface().SetCPSR(GdbHexToInt(buffer_ptr));
     } else {
         return SendReply("E01");
     }
@@ -667,11 +667,11 @@ static void WriteRegisters() {
 
     for (int i = 0, reg = 0; reg <= CPSR_REGISTER; i++, reg++) {
         if (reg <= SP_REGISTER) {
-            Core::CPU().SetReg(reg, GdbHexToLong(buffer_ptr + i * 16));
+            Core::CurrentArmInterface().SetReg(reg, GdbHexToLong(buffer_ptr + i * 16));
         } else if (reg == PC_REGISTER) {
-            Core::CPU().SetPC(GdbHexToLong(buffer_ptr + i * 16));
+            Core::CurrentArmInterface().SetPC(GdbHexToLong(buffer_ptr + i * 16));
         } else if (reg == CPSR_REGISTER) {
-            Core::CPU().SetCPSR(GdbHexToInt(buffer_ptr + i * 16));
+            Core::CurrentArmInterface().SetCPSR(GdbHexToInt(buffer_ptr + i * 16));
         } else {
             UNIMPLEMENTED();
         }
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index 72b5c05f2..520510b61 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -485,22 +485,28 @@ static void ExitProcess() {
 
     Core::CurrentProcess()->status = ProcessStatus::Exited;
 
-    // Stop all the process threads that are currently waiting for objects.
-    auto& thread_list = Core::System::GetInstance().Scheduler().GetThreadList();
-    for (auto& thread : thread_list) {
-        if (thread->owner_process != Core::CurrentProcess())
-            continue;
+    auto stop_threads = [](const std::vector<SharedPtr<Thread>>& thread_list) {
+        for (auto& thread : thread_list) {
+            if (thread->owner_process != Core::CurrentProcess())
+                continue;
 
-        if (thread == GetCurrentThread())
-            continue;
+            if (thread == GetCurrentThread())
+                continue;
 
-        // TODO(Subv): When are the other running/ready threads terminated?
-        ASSERT_MSG(thread->status == THREADSTATUS_WAIT_SYNCH_ANY ||
-                       thread->status == THREADSTATUS_WAIT_SYNCH_ALL,
-                   "Exiting processes with non-waiting threads is currently unimplemented");
+            // TODO(Subv): When are the other running/ready threads terminated?
+            ASSERT_MSG(thread->status == THREADSTATUS_WAIT_SYNCH_ANY ||
+                           thread->status == THREADSTATUS_WAIT_SYNCH_ALL,
+                       "Exiting processes with non-waiting threads is currently unimplemented");
 
-        thread->Stop();
-    }
+            thread->Stop();
+        }
+    };
+
+    auto& system = Core::System::GetInstance();
+    stop_threads(system.Scheduler(0)->GetThreadList());
+    stop_threads(system.Scheduler(1)->GetThreadList());
+    stop_threads(system.Scheduler(2)->GetThreadList());
+    stop_threads(system.Scheduler(3)->GetThreadList());
 
     // Kill the current thread
     GetCurrentThread()->Stop();
@@ -530,14 +536,9 @@ static ResultCode CreateThread(Handle* out_handle, VAddr entry_point, u64 arg, V
 
     switch (processor_id) {
     case THREADPROCESSORID_0:
-        break;
     case THREADPROCESSORID_1:
     case THREADPROCESSORID_2:
     case THREADPROCESSORID_3:
-        // TODO(bunnei): Implement support for other processor IDs
-        NGLOG_ERROR(Kernel_SVC,
-                    "Newly created thread must run in another thread ({}), unimplemented.",
-                    processor_id);
         break;
     default:
         ASSERT_MSG(false, "Unsupported thread processor ID: {}", processor_id);
@@ -576,7 +577,7 @@ static ResultCode StartThread(Handle thread_handle) {
 
 /// Called when a thread exits
 static void ExitThread() {
-    NGLOG_TRACE(Kernel_SVC, "called, pc=0x{:08X}", Core::CPU().GetPC());
+    NGLOG_TRACE(Kernel_SVC, "called, pc=0x{:08X}", Core::CurrentArmInterface().GetPC());
 
     ExitCurrentThread();
     Core::System::GetInstance().PrepareReschedule();
@@ -588,7 +589,7 @@ static void SleepThread(s64 nanoseconds) {
 
     // Don't attempt to yield execution if there are no available threads to run,
     // this way we avoid a useless reschedule to the idle thread.
-    if (nanoseconds == 0 && !Core::System::GetInstance().Scheduler().HaveReadyThreads())
+    if (nanoseconds == 0 && !Core::System::GetInstance().CurrentScheduler().HaveReadyThreads())
         return;
 
     // Sleep current thread and check for next thread to schedule
@@ -634,7 +635,7 @@ static ResultCode SignalProcessWideKey(VAddr condition_variable_addr, s32 target
                 condition_variable_addr, target);
 
     u32 processed = 0;
-    auto& thread_list = Core::System::GetInstance().Scheduler().GetThreadList();
+    auto& thread_list = Core::System::GetInstance().CurrentScheduler().GetThreadList();
 
     for (auto& thread : thread_list) {
         if (thread->condvar_wait_address != condition_variable_addr)
diff --git a/src/core/hle/kernel/svc_wrap.h b/src/core/hle/kernel/svc_wrap.h
index c86ad3e04..40aa88cc1 100644
--- a/src/core/hle/kernel/svc_wrap.h
+++ b/src/core/hle/kernel/svc_wrap.h
@@ -13,14 +13,14 @@
 
 namespace Kernel {
 
-#define PARAM(n) Core::CPU().GetReg(n)
+#define PARAM(n) Core::CurrentArmInterface().GetReg(n)
 
 /**
  * HLE a function return from the current ARM userland process
  * @param res Result to return
  */
 static inline void FuncReturn(u64 res) {
-    Core::CPU().SetReg(0, res);
+    Core::CurrentArmInterface().SetReg(0, res);
 }
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -45,7 +45,7 @@ template <ResultCode func(u32*, u32)>
 void SvcWrap() {
     u32 param_1 = 0;
     u32 retval = func(&param_1, (u32)PARAM(1)).raw;
-    Core::CPU().SetReg(1, param_1);
+    Core::CurrentArmInterface().SetReg(1, param_1);
     FuncReturn(retval);
 }
 
@@ -53,7 +53,7 @@ template <ResultCode func(u32*, u64)>
 void SvcWrap() {
     u32 param_1 = 0;
     u32 retval = func(&param_1, PARAM(1)).raw;
-    Core::CPU().SetReg(1, param_1);
+    Core::CurrentArmInterface().SetReg(1, param_1);
     FuncReturn(retval);
 }
 
@@ -66,7 +66,7 @@ template <ResultCode func(u64*, u64)>
 void SvcWrap() {
     u64 param_1 = 0;
     u32 retval = func(&param_1, PARAM(1)).raw;
-    Core::CPU().SetReg(1, param_1);
+    Core::CurrentArmInterface().SetReg(1, param_1);
     FuncReturn(retval);
 }
 
@@ -85,8 +85,8 @@ void SvcWrap() {
     u32 param_1 = 0;
     u64 param_2 = 0;
     ResultCode retval = func((u32)(PARAM(2) & 0xFFFFFFFF), &param_1, &param_2);
-    Core::CPU().SetReg(1, param_1);
-    Core::CPU().SetReg(2, param_2);
+    Core::CurrentArmInterface().SetReg(1, param_1);
+    Core::CurrentArmInterface().SetReg(2, param_2);
     FuncReturn(retval.raw);
 }
 
@@ -120,7 +120,7 @@ template <ResultCode func(u32*, u64, u64, s64)>
 void SvcWrap() {
     u32 param_1 = 0;
     ResultCode retval = func(&param_1, PARAM(1), (u32)(PARAM(2) & 0xFFFFFFFF), (s64)PARAM(3));
-    Core::CPU().SetReg(1, param_1);
+    Core::CurrentArmInterface().SetReg(1, param_1);
     FuncReturn(retval.raw);
 }
 
@@ -133,7 +133,7 @@ template <ResultCode func(u64*, u64, u64, u64)>
 void SvcWrap() {
     u64 param_1 = 0;
     u32 retval = func(&param_1, PARAM(1), PARAM(2), PARAM(3)).raw;
-    Core::CPU().SetReg(1, param_1);
+    Core::CurrentArmInterface().SetReg(1, param_1);
     FuncReturn(retval);
 }
 
@@ -143,7 +143,7 @@ void SvcWrap() {
     u32 retval =
         func(&param_1, PARAM(1), PARAM(2), PARAM(3), (u32)PARAM(4), (s32)(PARAM(5) & 0xFFFFFFFF))
             .raw;
-    Core::CPU().SetReg(1, param_1);
+    Core::CurrentArmInterface().SetReg(1, param_1);
     FuncReturn(retval);
 }
 
@@ -166,7 +166,7 @@ template <ResultCode func(u32*, u64, u64, u32)>
 void SvcWrap() {
     u32 param_1 = 0;
     u32 retval = func(&param_1, PARAM(1), PARAM(2), (u32)(PARAM(3) & 0xFFFFFFFF)).raw;
-    Core::CPU().SetReg(1, param_1);
+    Core::CurrentArmInterface().SetReg(1, param_1);
     FuncReturn(retval);
 }
 
@@ -175,7 +175,7 @@ void SvcWrap() {
     u32 param_1 = 0;
     u32 retval =
         func(&param_1, PARAM(1), (u32)(PARAM(2) & 0xFFFFFFFF), (u32)(PARAM(3) & 0xFFFFFFFF)).raw;
-    Core::CPU().SetReg(1, param_1);
+    Core::CurrentArmInterface().SetReg(1, param_1);
     FuncReturn(retval);
 }
 
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index 1bd5d9ebf..0a5441684 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -64,7 +64,7 @@ void Thread::Stop() {
     // Clean up thread from ready queue
     // This is only needed when the thread is termintated forcefully (SVC TerminateProcess)
     if (status == THREADSTATUS_READY) {
-        Core::System::GetInstance().Scheduler().UnscheduleThread(this, current_priority);
+        scheduler->UnscheduleThread(this, current_priority);
     }
 
     status = THREADSTATUS_DEAD;
@@ -92,7 +92,7 @@ void WaitCurrentThread_Sleep() {
 void ExitCurrentThread() {
     Thread* thread = GetCurrentThread();
     thread->Stop();
-    Core::System::GetInstance().Scheduler().RemoveThread(thread);
+    Core::System::GetInstance().CurrentScheduler().RemoveThread(thread);
 }
 
 /**
@@ -188,7 +188,7 @@ void Thread::ResumeFromWait() {
     wakeup_callback = nullptr;
 
     status = THREADSTATUS_READY;
-    Core::System::GetInstance().Scheduler().ScheduleThread(this, current_priority);
+    scheduler->ScheduleThread(this, current_priority);
     Core::System::GetInstance().PrepareReschedule();
 }
 
@@ -259,8 +259,6 @@ ResultVal<SharedPtr<Thread>> Thread::Create(std::string name, VAddr entry_point,
 
     SharedPtr<Thread> thread(new Thread);
 
-    Core::System::GetInstance().Scheduler().AddThread(thread, priority);
-
     thread->thread_id = NewThreadId();
     thread->status = THREADSTATUS_DORMANT;
     thread->entry_point = entry_point;
@@ -275,6 +273,8 @@ ResultVal<SharedPtr<Thread>> Thread::Create(std::string name, VAddr entry_point,
     thread->name = std::move(name);
     thread->callback_handle = wakeup_callback_handle_table.Create(thread).Unwrap();
     thread->owner_process = owner_process;
+    thread->scheduler = Core::System().GetInstance().Scheduler(static_cast<size_t>(processor_id));
+    thread->scheduler->AddThread(thread, priority);
 
     // Find the next available TLS index, and mark it as used
     auto& tls_slots = owner_process->tls_slots;
@@ -337,7 +337,7 @@ void Thread::SetPriority(u32 priority) {
 }
 
 void Thread::BoostPriority(u32 priority) {
-    Core::System::GetInstance().Scheduler().SetThreadPriority(this, priority);
+    scheduler->SetThreadPriority(this, priority);
     current_priority = priority;
 }
 
@@ -406,7 +406,7 @@ void Thread::UpdatePriority() {
     if (new_priority == current_priority)
         return;
 
-    Core::System::GetInstance().Scheduler().SetThreadPriority(this, new_priority);
+    scheduler->SetThreadPriority(this, new_priority);
 
     current_priority = new_priority;
 
@@ -421,7 +421,7 @@ void Thread::UpdatePriority() {
  * Gets the current thread
  */
 Thread* GetCurrentThread() {
-    return Core::System::GetInstance().Scheduler().GetCurrentThread();
+    return Core::System::GetInstance().CurrentScheduler().GetCurrentThread();
 }
 
 void ThreadingInit() {
diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h
index e0a3c0934..0a3bb1183 100644
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -4,6 +4,7 @@
 
 #pragma once
 
+#include <memory>
 #include <string>
 #include <unordered_map>
 #include <vector>
@@ -56,6 +57,7 @@ enum class ThreadWakeupReason {
 namespace Kernel {
 
 class Process;
+class Scheduler;
 
 class Thread final : public WaitObject {
 public:
@@ -240,6 +242,8 @@ public:
     // available. In case of a timeout, the object will be nullptr.
     std::function<WakeupCallback> wakeup_callback;
 
+    std::shared_ptr<Scheduler> scheduler;
+
 private:
     Thread();
     ~Thread() override;
diff --git a/src/core/hle/kernel/vm_manager.cpp b/src/core/hle/kernel/vm_manager.cpp
index 2f0044c11..676e5b282 100644
--- a/src/core/hle/kernel/vm_manager.cpp
+++ b/src/core/hle/kernel/vm_manager.cpp
@@ -104,8 +104,15 @@ ResultVal<VMManager::VMAHandle> VMManager::MapMemoryBlock(VAddr target,
     VirtualMemoryArea& final_vma = vma_handle->second;
     ASSERT(final_vma.size == size);
 
-    Core::CPU().MapBackingMemory(target, size, block->data() + offset,
-                                 VMAPermission::ReadWriteExecute);
+    auto& system = Core::System::GetInstance();
+    system.ArmInterface(0).MapBackingMemory(target, size, block->data() + offset,
+                                            VMAPermission::ReadWriteExecute);
+    system.ArmInterface(1).MapBackingMemory(target, size, block->data() + offset,
+                                            VMAPermission::ReadWriteExecute);
+    system.ArmInterface(2).MapBackingMemory(target, size, block->data() + offset,
+                                            VMAPermission::ReadWriteExecute);
+    system.ArmInterface(3).MapBackingMemory(target, size, block->data() + offset,
+                                            VMAPermission::ReadWriteExecute);
 
     final_vma.type = VMAType::AllocatedMemoryBlock;
     final_vma.permissions = VMAPermission::ReadWrite;
@@ -126,7 +133,11 @@ ResultVal<VMManager::VMAHandle> VMManager::MapBackingMemory(VAddr target, u8* me
     VirtualMemoryArea& final_vma = vma_handle->second;
     ASSERT(final_vma.size == size);
 
-    Core::CPU().MapBackingMemory(target, size, memory, VMAPermission::ReadWriteExecute);
+    auto& system = Core::System::GetInstance();
+    system.ArmInterface(0).MapBackingMemory(target, size, memory, VMAPermission::ReadWriteExecute);
+    system.ArmInterface(1).MapBackingMemory(target, size, memory, VMAPermission::ReadWriteExecute);
+    system.ArmInterface(2).MapBackingMemory(target, size, memory, VMAPermission::ReadWriteExecute);
+    system.ArmInterface(3).MapBackingMemory(target, size, memory, VMAPermission::ReadWriteExecute);
 
     final_vma.type = VMAType::BackingMemory;
     final_vma.permissions = VMAPermission::ReadWrite;
@@ -184,7 +195,11 @@ ResultCode VMManager::UnmapRange(VAddr target, u64 size) {
 
     ASSERT(FindVMA(target)->second.size >= size);
 
-    Core::CPU().UnmapMemory(target, size);
+    auto& system = Core::System::GetInstance();
+    system.ArmInterface(0).UnmapMemory(target, size);
+    system.ArmInterface(1).UnmapMemory(target, size);
+    system.ArmInterface(2).UnmapMemory(target, size);
+    system.ArmInterface(3).UnmapMemory(target, size);
 
     return RESULT_SUCCESS;
 }
diff --git a/src/core/memory.cpp b/src/core/memory.cpp
index db8211463..3b81acd63 100644
--- a/src/core/memory.cpp
+++ b/src/core/memory.cpp
@@ -28,8 +28,13 @@ static PageTable* current_page_table = nullptr;
 
 void SetCurrentPageTable(PageTable* page_table) {
     current_page_table = page_table;
-    if (Core::System::GetInstance().IsPoweredOn()) {
-        Core::CPU().PageTableChanged();
+
+    auto& system = Core::System::GetInstance();
+    if (system.IsPoweredOn()) {
+        system.ArmInterface(0).PageTableChanged();
+        system.ArmInterface(1).PageTableChanged();
+        system.ArmInterface(2).PageTableChanged();
+        system.ArmInterface(3).PageTableChanged();
     }
 }
 
diff --git a/src/yuzu/debugger/registers.cpp b/src/yuzu/debugger/registers.cpp
index 06e2d1647..178cc65a7 100644
--- a/src/yuzu/debugger/registers.cpp
+++ b/src/yuzu/debugger/registers.cpp
@@ -63,7 +63,7 @@ void RegistersWidget::OnDebugModeEntered() {
 
     for (int i = 0; i < core_registers->childCount(); ++i)
         core_registers->child(i)->setText(
-            1, QString("0x%1").arg(Core::CPU().GetReg(i), 8, 16, QLatin1Char('0')));
+            1, QString("0x%1").arg(Core::CurrentArmInterface().GetReg(i), 8, 16, QLatin1Char('0')));
 
     UpdateCPSRValues();
 }
@@ -122,7 +122,7 @@ void RegistersWidget::CreateCPSRChildren() {
 }
 
 void RegistersWidget::UpdateCPSRValues() {
-    const u32 cpsr_val = Core::CPU().GetCPSR();
+    const u32 cpsr_val = Core::CurrentArmInterface().GetCPSR();
 
     cpsr->setText(1, QString("0x%1").arg(cpsr_val, 8, 16, QLatin1Char('0')));
     cpsr->child(0)->setText(
diff --git a/src/yuzu/debugger/wait_tree.cpp b/src/yuzu/debugger/wait_tree.cpp
index acc4c2e0b..f05c98cc3 100644
--- a/src/yuzu/debugger/wait_tree.cpp
+++ b/src/yuzu/debugger/wait_tree.cpp
@@ -51,7 +51,7 @@ std::size_t WaitTreeItem::Row() const {
 }
 
 std::vector<std::unique_ptr<WaitTreeThread>> WaitTreeItem::MakeThreadItemList() {
-    const auto& threads = Core::System::GetInstance().Scheduler().GetThreadList();
+    const auto& threads = Core::System::GetInstance().Scheduler(0)->GetThreadList();
     std::vector<std::unique_ptr<WaitTreeThread>> item_list;
     item_list.reserve(threads.size());
     for (std::size_t i = 0; i < threads.size(); ++i) {

From cba69fdcd439c5f225bbddf1dad70e6326edd0dc Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Thu, 3 May 2018 00:16:12 -0400
Subject: [PATCH 04/18] core: Support session close with multicore.

---
 src/core/core.cpp     | 15 +++++++++++----
 src/core/core.h       |  2 +-
 src/core/core_cpu.cpp | 29 ++++++++++++++++++++++++++++-
 src/core/core_cpu.h   | 19 ++++++++-----------
 4 files changed, 48 insertions(+), 17 deletions(-)

diff --git a/src/core/core.cpp b/src/core/core.cpp
index 066423f23..1e6be34c8 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -37,6 +37,9 @@ static void RunCpuCore(std::shared_ptr<Cpu> cpu_state) {
 System::ResultStatus System::RunLoop(bool tight_loop) {
     status = ResultStatus::Success;
 
+    // Update thread_to_cpu in case Core 0 is run from a different host thread
+    thread_to_cpu[std::this_thread::get_id()] = cpu_cores[0];
+
     if (GDBStub::IsServerEnabled()) {
         GDBStub::HandlePacket();
 
@@ -186,17 +189,21 @@ void System::Shutdown() {
     gpu_core.reset();
 
     // Close all CPU/threading state
-    thread_to_cpu.clear();
-    for (auto& cpu_core : cpu_cores) {
-        cpu_core.reset();
-    }
+    cpu_barrier->NotifyEnd();
     for (auto& thread : cpu_core_threads) {
         thread->join();
         thread.reset();
     }
+    thread_to_cpu.clear();
+    for (auto& cpu_core : cpu_cores) {
+        cpu_core.reset();
+    }
+    cpu_barrier.reset();
 
+    // Close core timing
     CoreTiming::Shutdown();
 
+    // Close app loader
     app_loader.reset();
 
     NGLOG_DEBUG(Core, "Shutdown OK");
diff --git a/src/core/core.h b/src/core/core.h
index 3e0a7e6a7..561e7b48f 100644
--- a/src/core/core.h
+++ b/src/core/core.h
@@ -92,7 +92,7 @@ public:
      * @returns True if the emulated system is powered on, otherwise false.
      */
     bool IsPoweredOn() const {
-        return cpu_cores[0] != nullptr;
+        return cpu_barrier && cpu_barrier->IsAlive();
     }
 
     /**
diff --git a/src/core/core_cpu.cpp b/src/core/core_cpu.cpp
index a556f12e9..bd9869d28 100644
--- a/src/core/core_cpu.cpp
+++ b/src/core/core_cpu.cpp
@@ -19,6 +19,30 @@
 
 namespace Core {
 
+void CpuBarrier::NotifyEnd() {
+    std::unique_lock<std::mutex> lock(mutex);
+    end = true;
+    condition.notify_all();
+}
+
+bool CpuBarrier::Rendezvous() {
+    if (end) {
+        return false;
+    } else {
+        std::unique_lock<std::mutex> lock(mutex);
+
+        --cores_waiting;
+        if (!cores_waiting) {
+            cores_waiting = NUM_CPU_CORES;
+            condition.notify_all();
+            return true;
+        }
+
+        condition.wait(lock);
+        return true;
+    }
+}
+
 Cpu::Cpu(std::shared_ptr<CpuBarrier> cpu_barrier, size_t core_index)
     : cpu_barrier{std::move(cpu_barrier)}, core_index{core_index} {
 
@@ -38,7 +62,10 @@ Cpu::Cpu(std::shared_ptr<CpuBarrier> cpu_barrier, size_t core_index)
 
 void Cpu::RunLoop(bool tight_loop) {
     // Wait for all other CPU cores to complete the previous slice, such that they run in lock-step
-    cpu_barrier->Rendezvous();
+    if (!cpu_barrier->Rendezvous()) {
+        // If rendezvous failed, session has been killed
+        return;
+    }
 
     // If we don't have a currently active thread then don't execute instructions,
     // instead advance to the next event and try to yield to the next thread
diff --git a/src/core/core_cpu.h b/src/core/core_cpu.h
index 06784c4ab..243f0b5e7 100644
--- a/src/core/core_cpu.h
+++ b/src/core/core_cpu.h
@@ -4,6 +4,7 @@
 
 #pragma once
 
+#include <atomic>
 #include <condition_variable>
 #include <memory>
 #include <mutex>
@@ -22,23 +23,19 @@ constexpr unsigned NUM_CPU_CORES{4};
 
 class CpuBarrier {
 public:
-    void Rendezvous() {
-        std::unique_lock<std::mutex> lock(mutex);
-
-        --cores_waiting;
-        if (!cores_waiting) {
-            cores_waiting = NUM_CPU_CORES;
-            condition.notify_all();
-            return;
-        }
-
-        condition.wait(lock);
+    bool IsAlive() const {
+        return !end;
     }
 
+    void NotifyEnd();
+
+    bool Rendezvous();
+
 private:
     unsigned cores_waiting{NUM_CPU_CORES};
     std::mutex mutex;
     std::condition_variable condition;
+    std::atomic<bool> end{};
 };
 
 class Cpu {

From 9bf2a428f9e9359763be1bfd90c32371044c711e Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Thu, 3 May 2018 00:34:54 -0400
Subject: [PATCH 05/18] core: Add a configuration setting for use_multi_core.

---
 src/core/core.cpp                            | 32 ++++++++++++++++----
 src/core/core.h                              | 10 ++----
 src/core/core_cpu.cpp                        | 11 +++++--
 src/core/settings.h                          |  1 +
 src/core/telemetry_session.cpp               |  2 ++
 src/yuzu/configuration/config.cpp            |  2 ++
 src/yuzu/configuration/configure_general.cpp |  3 ++
 src/yuzu/configuration/configure_general.ui  |  7 +++++
 src/yuzu_cmd/config.cpp                      |  1 +
 src/yuzu_cmd/default_ini.h                   |  4 +++
 10 files changed, 56 insertions(+), 17 deletions(-)

diff --git a/src/core/core.cpp b/src/core/core.cpp
index 1e6be34c8..59c8940f7 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -126,6 +126,21 @@ PerfStats::Results System::GetAndResetPerfStats() {
     return perf_stats.GetAndResetStats(CoreTiming::GetGlobalTimeUs());
 }
 
+const std::shared_ptr<Kernel::Scheduler>& System::Scheduler(size_t core_index) {
+    if (!Settings::values.use_multi_core) {
+        // Always use Core 0 scheduler when multicore is disabled
+        return cpu_cores[0]->Scheduler();
+    }
+
+    ASSERT(core_index < NUM_CPU_CORES);
+    return cpu_cores[core_index]->Scheduler();
+}
+
+ARM_Interface& System::ArmInterface(size_t core_index) {
+    ASSERT(core_index < NUM_CPU_CORES);
+    return cpu_cores[core_index]->ArmInterface();
+}
+
 System::ResultStatus System::Init(EmuWindow* emu_window, u32 system_mode) {
     NGLOG_DEBUG(HW_Memory, "initialized OK");
 
@@ -154,9 +169,12 @@ System::ResultStatus System::Init(EmuWindow* emu_window, u32 system_mode) {
     // Create threads for CPU cores 1-3, and build thread_to_cpu map
     // CPU core 0 is run on the main thread
     thread_to_cpu[std::this_thread::get_id()] = cpu_cores[0];
-    for (size_t index = 0; index < cpu_core_threads.size(); ++index) {
-        cpu_core_threads[index] = std::make_unique<std::thread>(RunCpuCore, cpu_cores[index + 1]);
-        thread_to_cpu[cpu_core_threads[index]->get_id()] = cpu_cores[index + 1];
+    if (Settings::values.use_multi_core) {
+        for (size_t index = 0; index < cpu_core_threads.size(); ++index) {
+            cpu_core_threads[index] =
+                std::make_unique<std::thread>(RunCpuCore, cpu_cores[index + 1]);
+            thread_to_cpu[cpu_core_threads[index]->get_id()] = cpu_cores[index + 1];
+        }
     }
 
     NGLOG_DEBUG(Core, "Initialized OK");
@@ -190,9 +208,11 @@ void System::Shutdown() {
 
     // Close all CPU/threading state
     cpu_barrier->NotifyEnd();
-    for (auto& thread : cpu_core_threads) {
-        thread->join();
-        thread.reset();
+    if (Settings::values.use_multi_core) {
+        for (auto& thread : cpu_core_threads) {
+            thread->join();
+            thread.reset();
+        }
     }
     thread_to_cpu.clear();
     for (auto& cpu_core : cpu_cores) {
diff --git a/src/core/core.h b/src/core/core.h
index 561e7b48f..115061932 100644
--- a/src/core/core.h
+++ b/src/core/core.h
@@ -112,10 +112,7 @@ public:
         return CurrentCpuCore().ArmInterface();
     }
 
-    ARM_Interface& ArmInterface(size_t core_index) {
-        ASSERT(core_index < NUM_CPU_CORES);
-        return cpu_cores[core_index]->ArmInterface();
-    }
+    ARM_Interface& ArmInterface(size_t core_index);
 
     Tegra::GPU& GPU() {
         return *gpu_core;
@@ -125,10 +122,7 @@ public:
         return *CurrentCpuCore().Scheduler();
     }
 
-    const std::shared_ptr<Kernel::Scheduler>& Scheduler(size_t core_index) {
-        ASSERT(core_index < NUM_CPU_CORES);
-        return cpu_cores[core_index]->Scheduler();
-    }
+    const std::shared_ptr<Kernel::Scheduler>& Scheduler(size_t core_index);
 
     Kernel::SharedPtr<Kernel::Process>& CurrentProcess() {
         return current_process;
diff --git a/src/core/core_cpu.cpp b/src/core/core_cpu.cpp
index bd9869d28..099f2bb1a 100644
--- a/src/core/core_cpu.cpp
+++ b/src/core/core_cpu.cpp
@@ -26,9 +26,12 @@ void CpuBarrier::NotifyEnd() {
 }
 
 bool CpuBarrier::Rendezvous() {
-    if (end) {
-        return false;
-    } else {
+    if (!Settings::values.use_multi_core) {
+        // Meaningless when running in single-core mode
+        return true;
+    }
+
+    if (!end) {
         std::unique_lock<std::mutex> lock(mutex);
 
         --cores_waiting;
@@ -41,6 +44,8 @@ bool CpuBarrier::Rendezvous() {
         condition.wait(lock);
         return true;
     }
+
+    return false;
 }
 
 Cpu::Cpu(std::shared_ptr<CpuBarrier> cpu_barrier, size_t core_index)
diff --git a/src/core/settings.h b/src/core/settings.h
index cfec63c21..a7f1e5fa0 100644
--- a/src/core/settings.h
+++ b/src/core/settings.h
@@ -121,6 +121,7 @@ struct Values {
 
     // Core
     bool use_cpu_jit;
+    bool use_multi_core;
 
     // Data Storage
     bool use_virtual_sd;
diff --git a/src/core/telemetry_session.cpp b/src/core/telemetry_session.cpp
index 02c52bb55..a60aa1143 100644
--- a/src/core/telemetry_session.cpp
+++ b/src/core/telemetry_session.cpp
@@ -155,6 +155,8 @@ TelemetrySession::TelemetrySession() {
 
     // Log user configuration information
     AddField(Telemetry::FieldType::UserConfig, "Core_UseCpuJit", Settings::values.use_cpu_jit);
+    AddField(Telemetry::FieldType::UserConfig, "Core_UseMultiCore",
+             Settings::values.use_multi_core);
     AddField(Telemetry::FieldType::UserConfig, "Renderer_ResolutionFactor",
              Settings::values.resolution_factor);
     AddField(Telemetry::FieldType::UserConfig, "Renderer_ToggleFramelimit",
diff --git a/src/yuzu/configuration/config.cpp b/src/yuzu/configuration/config.cpp
index 8843f2078..8316db708 100644
--- a/src/yuzu/configuration/config.cpp
+++ b/src/yuzu/configuration/config.cpp
@@ -78,6 +78,7 @@ void Config::ReadValues() {
 
     qt_config->beginGroup("Core");
     Settings::values.use_cpu_jit = qt_config->value("use_cpu_jit", true).toBool();
+    Settings::values.use_multi_core = qt_config->value("use_multi_core", false).toBool();
     qt_config->endGroup();
 
     qt_config->beginGroup("Renderer");
@@ -177,6 +178,7 @@ void Config::SaveValues() {
 
     qt_config->beginGroup("Core");
     qt_config->setValue("use_cpu_jit", Settings::values.use_cpu_jit);
+    qt_config->setValue("use_multi_core", Settings::values.use_multi_core);
     qt_config->endGroup();
 
     qt_config->beginGroup("Renderer");
diff --git a/src/yuzu/configuration/configure_general.cpp b/src/yuzu/configuration/configure_general.cpp
index 2d73fc5aa..baa558667 100644
--- a/src/yuzu/configuration/configure_general.cpp
+++ b/src/yuzu/configuration/configure_general.cpp
@@ -20,6 +20,7 @@ ConfigureGeneral::ConfigureGeneral(QWidget* parent)
     this->setConfiguration();
 
     ui->use_cpu_jit->setEnabled(!Core::System::GetInstance().IsPoweredOn());
+    ui->use_multi_core->setEnabled(!Core::System::GetInstance().IsPoweredOn());
     ui->use_docked_mode->setEnabled(!Core::System::GetInstance().IsPoweredOn());
 }
 
@@ -30,6 +31,7 @@ void ConfigureGeneral::setConfiguration() {
     ui->toggle_check_exit->setChecked(UISettings::values.confirm_before_closing);
     ui->theme_combobox->setCurrentIndex(ui->theme_combobox->findData(UISettings::values.theme));
     ui->use_cpu_jit->setChecked(Settings::values.use_cpu_jit);
+    ui->use_multi_core->setChecked(Settings::values.use_multi_core);
     ui->use_docked_mode->setChecked(Settings::values.use_docked_mode);
 }
 
@@ -40,6 +42,7 @@ void ConfigureGeneral::applyConfiguration() {
         ui->theme_combobox->itemData(ui->theme_combobox->currentIndex()).toString();
 
     Settings::values.use_cpu_jit = ui->use_cpu_jit->isChecked();
+    Settings::values.use_multi_core = ui->use_multi_core->isChecked();
     Settings::values.use_docked_mode = ui->use_docked_mode->isChecked();
     Settings::Apply();
 }
diff --git a/src/yuzu/configuration/configure_general.ui b/src/yuzu/configuration/configure_general.ui
index 1775c4d40..233adbe27 100644
--- a/src/yuzu/configuration/configure_general.ui
+++ b/src/yuzu/configuration/configure_general.ui
@@ -58,6 +58,13 @@
             </property>
            </widget>
           </item>
+          <item>
+           <widget class="QCheckBox" name="use_multi_core">
+            <property name="text">
+             <string>Enable multi-core</string>
+            </property>
+           </widget>
+          </item>
          </layout>
         </item>
        </layout>
diff --git a/src/yuzu_cmd/config.cpp b/src/yuzu_cmd/config.cpp
index 675f9cafa..ee6e4d658 100644
--- a/src/yuzu_cmd/config.cpp
+++ b/src/yuzu_cmd/config.cpp
@@ -91,6 +91,7 @@ void Config::ReadValues() {
 
     // Core
     Settings::values.use_cpu_jit = sdl2_config->GetBoolean("Core", "use_cpu_jit", true);
+    Settings::values.use_multi_core = sdl2_config->GetBoolean("Core", "use_multi_core", false);
 
     // Renderer
     Settings::values.resolution_factor =
diff --git a/src/yuzu_cmd/default_ini.h b/src/yuzu_cmd/default_ini.h
index 02254403d..1c438c3f5 100644
--- a/src/yuzu_cmd/default_ini.h
+++ b/src/yuzu_cmd/default_ini.h
@@ -80,6 +80,10 @@ touch_device=
 # 0: Interpreter (slow), 1 (default): JIT (fast)
 use_cpu_jit =
 
+# Whether to use multi-core for CPU emulation
+# 0 (default): Disabled, 1: Enabled
+use_multi_core=
+
 [Renderer]
 # Whether to use software or hardware rendering.
 # 0: Software, 1 (default): Hardware

From 5c0421ebd8e6c1ef04216f8aef7cc78e61785a5d Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Sat, 5 May 2018 12:08:16 -0400
Subject: [PATCH 06/18] wait_tree: Show all threads on all schedulers.

---
 src/yuzu/debugger/wait_tree.cpp | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/src/yuzu/debugger/wait_tree.cpp b/src/yuzu/debugger/wait_tree.cpp
index f05c98cc3..a5da98d23 100644
--- a/src/yuzu/debugger/wait_tree.cpp
+++ b/src/yuzu/debugger/wait_tree.cpp
@@ -51,13 +51,21 @@ std::size_t WaitTreeItem::Row() const {
 }
 
 std::vector<std::unique_ptr<WaitTreeThread>> WaitTreeItem::MakeThreadItemList() {
-    const auto& threads = Core::System::GetInstance().Scheduler(0)->GetThreadList();
     std::vector<std::unique_ptr<WaitTreeThread>> item_list;
-    item_list.reserve(threads.size());
-    for (std::size_t i = 0; i < threads.size(); ++i) {
-        item_list.push_back(std::make_unique<WaitTreeThread>(*threads[i]));
-        item_list.back()->row = i;
-    }
+    std::size_t row = 0;
+    auto add_threads = [&](const std::vector<Kernel::SharedPtr<Kernel::Thread>>& threads) {
+        for (std::size_t i = 0; i < threads.size(); ++i) {
+            item_list.push_back(std::make_unique<WaitTreeThread>(*threads[i]));
+            item_list.back()->row = row;
+            ++row;
+        }
+    };
+
+    add_threads(Core::System::GetInstance().Scheduler(0)->GetThreadList());
+    add_threads(Core::System::GetInstance().Scheduler(1)->GetThreadList());
+    add_threads(Core::System::GetInstance().Scheduler(2)->GetThreadList());
+    add_threads(Core::System::GetInstance().Scheduler(3)->GetThreadList());
+
     return item_list;
 }
 

From 6a890023e92cc8c7c0ea6bd20df6180ab88d2bb2 Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Sat, 5 May 2018 21:59:35 -0400
Subject: [PATCH 07/18] svc: Implement GetCurrentProcessorNumber.

---
 src/core/hle/kernel/svc.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index 520510b61..ec32432db 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -401,8 +401,8 @@ static ResultCode SetThreadPriority(Handle handle, u32 priority) {
 
 /// Get which CPU core is executing the current thread
 static u32 GetCurrentProcessorNumber() {
-    NGLOG_WARNING(Kernel_SVC, "(STUBBED) called, defaulting to processor 0");
-    return 0;
+    NGLOG_TRACE(Kernel_SVC, "called");
+    return GetCurrentThread()->processor_id;
 }
 
 static ResultCode MapSharedMemory(Handle shared_memory_handle, VAddr addr, u64 size,

From 1c36f2a798372b6bfc54e0e8bd6cf073bed83e6a Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Sat, 5 May 2018 22:00:34 -0400
Subject: [PATCH 08/18] svc: SignalProcessWideKey should apply to all cores.

---
 src/core/hle/kernel/svc.cpp | 79 ++++++++++++++++++++-----------------
 1 file changed, 43 insertions(+), 36 deletions(-)

diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index ec32432db..fdf9f9011 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -635,53 +635,60 @@ static ResultCode SignalProcessWideKey(VAddr condition_variable_addr, s32 target
                 condition_variable_addr, target);
 
     u32 processed = 0;
-    auto& thread_list = Core::System::GetInstance().CurrentScheduler().GetThreadList();
 
-    for (auto& thread : thread_list) {
-        if (thread->condvar_wait_address != condition_variable_addr)
-            continue;
+    auto signal_process_wide_key = [&](size_t core_index) {
+        const auto& scheduler = Core::System::GetInstance().Scheduler(core_index);
+        for (auto& thread : scheduler->GetThreadList()) {
+            if (thread->condvar_wait_address != condition_variable_addr)
+                continue;
 
-        // Only process up to 'target' threads, unless 'target' is -1, in which case process
-        // them all.
-        if (target != -1 && processed >= target)
-            break;
+            // Only process up to 'target' threads, unless 'target' is -1, in which case process
+            // them all.
+            if (target != -1 && processed >= target)
+                break;
 
-        // If the mutex is not yet acquired, acquire it.
-        u32 mutex_val = Memory::Read32(thread->mutex_wait_address);
+            // If the mutex is not yet acquired, acquire it.
+            u32 mutex_val = Memory::Read32(thread->mutex_wait_address);
 
-        if (mutex_val == 0) {
-            // We were able to acquire the mutex, resume this thread.
-            Memory::Write32(thread->mutex_wait_address, thread->wait_handle);
-            ASSERT(thread->status == THREADSTATUS_WAIT_MUTEX);
-            thread->ResumeFromWait();
+            if (mutex_val == 0) {
+                // We were able to acquire the mutex, resume this thread.
+                Memory::Write32(thread->mutex_wait_address, thread->wait_handle);
+                ASSERT(thread->status == THREADSTATUS_WAIT_MUTEX);
+                thread->ResumeFromWait();
 
-            auto lock_owner = thread->lock_owner;
-            if (lock_owner)
-                lock_owner->RemoveMutexWaiter(thread);
+                auto lock_owner = thread->lock_owner;
+                if (lock_owner)
+                    lock_owner->RemoveMutexWaiter(thread);
 
-            thread->lock_owner = nullptr;
-            thread->mutex_wait_address = 0;
-            thread->condvar_wait_address = 0;
-            thread->wait_handle = 0;
-        } else {
-            // Couldn't acquire the mutex, block the thread.
-            Handle owner_handle = static_cast<Handle>(mutex_val & Mutex::MutexOwnerMask);
-            auto owner = g_handle_table.Get<Thread>(owner_handle);
-            ASSERT(owner);
-            ASSERT(thread->status != THREADSTATUS_RUNNING);
-            thread->status = THREADSTATUS_WAIT_MUTEX;
-            thread->wakeup_callback = nullptr;
+                thread->lock_owner = nullptr;
+                thread->mutex_wait_address = 0;
+                thread->condvar_wait_address = 0;
+                thread->wait_handle = 0;
+            } else {
+                // Couldn't acquire the mutex, block the thread.
+                Handle owner_handle = static_cast<Handle>(mutex_val & Mutex::MutexOwnerMask);
+                auto owner = g_handle_table.Get<Thread>(owner_handle);
+                ASSERT(owner);
+                ASSERT(thread->status != THREADSTATUS_RUNNING);
+                thread->status = THREADSTATUS_WAIT_MUTEX;
+                thread->wakeup_callback = nullptr;
 
-            // Signal that the mutex now has a waiting thread.
-            Memory::Write32(thread->mutex_wait_address, mutex_val | Mutex::MutexHasWaitersFlag);
+                // Signal that the mutex now has a waiting thread.
+                Memory::Write32(thread->mutex_wait_address, mutex_val | Mutex::MutexHasWaitersFlag);
 
-            owner->AddMutexWaiter(thread);
+                owner->AddMutexWaiter(thread);
 
-            Core::System::GetInstance().PrepareReschedule();
+                Core::System::GetInstance().PrepareReschedule();
+            }
+
+            ++processed;
         }
+    };
 
-        ++processed;
-    }
+    signal_process_wide_key(0);
+    signal_process_wide_key(1);
+    signal_process_wide_key(2);
+    signal_process_wide_key(3);
 
     return RESULT_SUCCESS;
 }

From 6ea8b3ef60bba657ca10c7e62dd849be9217faf3 Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Sat, 5 May 2018 23:03:01 -0400
Subject: [PATCH 09/18] thread: Implement ChangeCore function.

---
 src/core/hle/kernel/thread.cpp | 53 +++++++++++++++++++++++++++++++++-
 src/core/hle/kernel/thread.h   |  6 ++++
 2 files changed, 58 insertions(+), 1 deletion(-)

diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index 0a5441684..c370776e8 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -273,7 +273,7 @@ ResultVal<SharedPtr<Thread>> Thread::Create(std::string name, VAddr entry_point,
     thread->name = std::move(name);
     thread->callback_handle = wakeup_callback_handle_table.Create(thread).Unwrap();
     thread->owner_process = owner_process;
-    thread->scheduler = Core::System().GetInstance().Scheduler(static_cast<size_t>(processor_id));
+    thread->scheduler = Core::System().GetInstance().Scheduler(processor_id);
     thread->scheduler->AddThread(thread, priority);
 
     // Find the next available TLS index, and mark it as used
@@ -415,6 +415,57 @@ void Thread::UpdatePriority() {
         lock_owner->UpdatePriority();
 }
 
+static s32 GetNextProcessorId(u64 mask) {
+    s32 processor_id{};
+    for (s32 index = 0; index < Core::NUM_CPU_CORES; ++index) {
+        if (mask & (1ULL << index)) {
+            if (!Core::System().GetInstance().Scheduler(index)->GetCurrentThread()) {
+                // Core is enabled and not running any threads, use this one
+                return index;
+            }
+
+            // Core is enabled, but running a thread, less ideal
+            processor_id = index;
+        }
+    }
+
+    return processor_id;
+}
+
+void Thread::ChangeCore(u32 core, u64 mask) {
+    const s32 new_processor_id{GetNextProcessorId(mask)};
+
+    ASSERT(ideal_core == core); // We're not doing anything with this yet, so assert the expected
+    ASSERT(new_processor_id < Core::NUM_CPU_CORES);
+
+    if (new_processor_id == processor_id) {
+        // Already running on ideal core, nothing to do here
+        return;
+    }
+
+    ASSERT(status != THREADSTATUS_RUNNING); // Unsupported
+
+    processor_id = new_processor_id;
+    ideal_core = core;
+    mask = mask;
+
+    // Add thread to new core's scheduler
+    auto& next_scheduler = Core::System().GetInstance().Scheduler(new_processor_id);
+    next_scheduler->AddThread(this, current_priority);
+
+    if (status == THREADSTATUS_READY) {
+        // If the thread was ready, unschedule from the previous core and schedule on the new core
+        scheduler->UnscheduleThread(this, current_priority);
+        next_scheduler->ScheduleThread(this, current_priority);
+    }
+
+    // Remove thread from previous core's scheduler
+    scheduler->RemoveThread(this);
+
+    // Change thread's scheduler
+    scheduler = next_scheduler;
+}
+
 ////////////////////////////////////////////////////////////////////////////////////////////////////
 
 /**
diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h
index 0a3bb1183..3dda548ad 100644
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -120,6 +120,9 @@ public:
     /// Recalculates the current priority taking into account priority inheritance.
     void UpdatePriority();
 
+    /// Changes the core that the thread is running or scheduled to run on.
+    void ChangeCore(u32 core, u64 mask);
+
     /**
      * Gets the thread's thread ID
      * @return The thread's ID
@@ -244,6 +247,9 @@ public:
 
     std::shared_ptr<Scheduler> scheduler;
 
+    u32 ideal_core{0xFFFFFFFF};
+    u64 mask{0x1};
+
 private:
     Thread();
     ~Thread() override;

From d6e3cd9a17c47ce68ddb1392b7fff8c9e645aa07 Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Sat, 5 May 2018 23:13:15 -0400
Subject: [PATCH 10/18] svc: Implement GetThreadCoreMask and SetThreadCoreMask.

---
 src/core/hle/kernel/svc.cpp | 29 ++++++++++++++++++++++-------
 1 file changed, 22 insertions(+), 7 deletions(-)

diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index fdf9f9011..9050ff3de 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -726,16 +726,31 @@ static ResultCode CreateTransferMemory(Handle* handle, VAddr addr, u64 size, u32
     return RESULT_SUCCESS;
 }
 
-static ResultCode GetThreadCoreMask(Handle handle, u32* mask, u64* unknown) {
-    NGLOG_WARNING(Kernel_SVC, "(STUBBED) called, handle=0x{:08X}", handle);
-    *mask = 0x0;
-    *unknown = 0xf;
+static ResultCode GetThreadCoreMask(Handle thread_handle, u32* core, u64* mask) {
+    NGLOG_TRACE(Kernel_SVC, "called, handle=0x{:08X}", thread_handle);
+
+    const SharedPtr<Thread> thread = g_handle_table.Get<Thread>(thread_handle);
+    if (!thread) {
+        return ERR_INVALID_HANDLE;
+    }
+
+    *core = thread->ideal_core;
+    *mask = thread->mask;
+
     return RESULT_SUCCESS;
 }
 
-static ResultCode SetThreadCoreMask(Handle handle, u32 mask, u64 unknown) {
-    NGLOG_WARNING(Kernel_SVC, "(STUBBED) called, handle=0x{:08X}, mask=0x{:08X}, unknown=0x{:X}",
-                  handle, mask, unknown);
+static ResultCode SetThreadCoreMask(Handle thread_handle, u32 core, u64 mask) {
+    NGLOG_TRACE(Kernel_SVC, "called, handle=0x{:08X}, mask=0x{:08X}, core=0x{:X}", thread_handle,
+                mask, core);
+
+    const SharedPtr<Thread> thread = g_handle_table.Get<Thread>(thread_handle);
+    if (!thread) {
+        return ERR_INVALID_HANDLE;
+    }
+
+    thread->ChangeCore(core, mask);
+
     return RESULT_SUCCESS;
 }
 

From 8aa5d25f826c8969a1e9938d8c8e12fa6df8be82 Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Sat, 5 May 2018 23:54:43 -0400
Subject: [PATCH 11/18] threading: Reschedule only on cores that are necessary.

---
 src/core/core.cpp              | 5 +++++
 src/core/core.h                | 2 ++
 src/core/hle/kernel/svc.cpp    | 4 ++--
 src/core/hle/kernel/thread.cpp | 2 +-
 4 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/src/core/core.cpp b/src/core/core.cpp
index 59c8940f7..6cbfc3035 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -141,6 +141,11 @@ ARM_Interface& System::ArmInterface(size_t core_index) {
     return cpu_cores[core_index]->ArmInterface();
 }
 
+Cpu& System::CpuCore(size_t core_index) {
+    ASSERT(core_index < NUM_CPU_CORES);
+    return *cpu_cores[core_index];
+}
+
 System::ResultStatus System::Init(EmuWindow* emu_window, u32 system_mode) {
     NGLOG_DEBUG(HW_Memory, "initialized OK");
 
diff --git a/src/core/core.h b/src/core/core.h
index 115061932..5740e858b 100644
--- a/src/core/core.h
+++ b/src/core/core.h
@@ -114,6 +114,8 @@ public:
 
     ARM_Interface& ArmInterface(size_t core_index);
 
+    Cpu& CpuCore(size_t core_index);
+
     Tegra::GPU& GPU() {
         return *gpu_core;
     }
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index 9050ff3de..89c3e240a 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -625,7 +625,7 @@ static ResultCode WaitProcessWideKeyAtomic(VAddr mutex_addr, VAddr condition_var
 
     // Note: Deliberately don't attempt to inherit the lock owner's priority.
 
-    Core::System::GetInstance().PrepareReschedule();
+    Core::System::GetInstance().CpuCore(current_thread->processor_id).PrepareReschedule();
     return RESULT_SUCCESS;
 }
 
@@ -678,7 +678,7 @@ static ResultCode SignalProcessWideKey(VAddr condition_variable_addr, s32 target
 
                 owner->AddMutexWaiter(thread);
 
-                Core::System::GetInstance().PrepareReschedule();
+                Core::System::GetInstance().CpuCore(thread->processor_id).PrepareReschedule();
             }
 
             ++processed;
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index c370776e8..31cf1551d 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -189,7 +189,7 @@ void Thread::ResumeFromWait() {
 
     status = THREADSTATUS_READY;
     scheduler->ScheduleThread(this, current_priority);
-    Core::System::GetInstance().PrepareReschedule();
+    Core::System::GetInstance().CpuCore(processor_id).PrepareReschedule();
 }
 
 /**

From 4822765fefbb81ad55fe4db7561ccf69b9a60bcd Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Mon, 7 May 2018 21:57:42 -0400
Subject: [PATCH 12/18] thread: Initialize ideal_core and mask members.

---
 src/core/hle/kernel/thread.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index 31cf1551d..ac6252eac 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -266,6 +266,8 @@ ResultVal<SharedPtr<Thread>> Thread::Create(std::string name, VAddr entry_point,
     thread->nominal_priority = thread->current_priority = priority;
     thread->last_running_ticks = CoreTiming::GetTicks();
     thread->processor_id = processor_id;
+    thread->ideal_core = processor_id;
+    thread->mask = 1 << processor_id;
     thread->wait_objects.clear();
     thread->mutex_wait_address = 0;
     thread->condvar_wait_address = 0;

From e6671190a5994e5154fc845a1152577f1be22133 Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Mon, 7 May 2018 22:03:48 -0400
Subject: [PATCH 13/18] wait_tree: Add ideal core and affinity mask.

---
 src/yuzu/debugger/wait_tree.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/yuzu/debugger/wait_tree.cpp b/src/yuzu/debugger/wait_tree.cpp
index a5da98d23..01f9d9008 100644
--- a/src/yuzu/debugger/wait_tree.cpp
+++ b/src/yuzu/debugger/wait_tree.cpp
@@ -248,6 +248,8 @@ std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeThread::GetChildren() const {
     }
 
     list.push_back(std::make_unique<WaitTreeText>(tr("processor = %1").arg(processor)));
+    list.push_back(std::make_unique<WaitTreeText>(tr("ideal core = %1").arg(thread.ideal_core)));
+    list.push_back(std::make_unique<WaitTreeText>(tr("affinity mask = %1").arg(thread.mask)));
     list.push_back(std::make_unique<WaitTreeText>(tr("thread id = %1").arg(thread.GetThreadId())));
     list.push_back(std::make_unique<WaitTreeText>(tr("priority = %1(current) / %2(normal)")
                                                       .arg(thread.current_priority)

From 91af2f94e85e433cb8cca8648e52ac91beae7471 Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Mon, 7 May 2018 22:12:45 -0400
Subject: [PATCH 14/18] scheduler: Protect scheduling functions with a global
 mutex.

---
 src/core/hle/kernel/scheduler.cpp | 15 +++++++++++++++
 src/core/hle/kernel/scheduler.h   |  3 +++
 2 files changed, 18 insertions(+)

diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp
index ff6a0941a..9cb9e0e5c 100644
--- a/src/core/hle/kernel/scheduler.cpp
+++ b/src/core/hle/kernel/scheduler.cpp
@@ -9,6 +9,8 @@
 
 namespace Kernel {
 
+std::mutex Scheduler::scheduler_mutex;
+
 Scheduler::Scheduler(ARM_Interface* cpu_core) : cpu_core(cpu_core) {}
 
 Scheduler::~Scheduler() {
@@ -18,6 +20,7 @@ Scheduler::~Scheduler() {
 }
 
 bool Scheduler::HaveReadyThreads() {
+    std::lock_guard<std::mutex> lock(scheduler_mutex);
     return ready_queue.get_first() != nullptr;
 }
 
@@ -90,6 +93,8 @@ void Scheduler::SwitchContext(Thread* new_thread) {
 }
 
 void Scheduler::Reschedule() {
+    std::lock_guard<std::mutex> lock(scheduler_mutex);
+
     Thread* cur = GetCurrentThread();
     Thread* next = PopNextReadyThread();
 
@@ -105,26 +110,36 @@ void Scheduler::Reschedule() {
 }
 
 void Scheduler::AddThread(SharedPtr<Thread> thread, u32 priority) {
+    std::lock_guard<std::mutex> lock(scheduler_mutex);
+
     thread_list.push_back(thread);
     ready_queue.prepare(priority);
 }
 
 void Scheduler::RemoveThread(Thread* thread) {
+    std::lock_guard<std::mutex> lock(scheduler_mutex);
+
     thread_list.erase(std::remove(thread_list.begin(), thread_list.end(), thread),
                       thread_list.end());
 }
 
 void Scheduler::ScheduleThread(Thread* thread, u32 priority) {
+    std::lock_guard<std::mutex> lock(scheduler_mutex);
+
     ASSERT(thread->status == THREADSTATUS_READY);
     ready_queue.push_back(priority, thread);
 }
 
 void Scheduler::UnscheduleThread(Thread* thread, u32 priority) {
+    std::lock_guard<std::mutex> lock(scheduler_mutex);
+
     ASSERT(thread->status == THREADSTATUS_READY);
     ready_queue.remove(priority, thread);
 }
 
 void Scheduler::SetThreadPriority(Thread* thread, u32 priority) {
+    std::lock_guard<std::mutex> lock(scheduler_mutex);
+
     // If thread was ready, adjust queues
     if (thread->status == THREADSTATUS_READY)
         ready_queue.move(thread, thread->current_priority, priority);
diff --git a/src/core/hle/kernel/scheduler.h b/src/core/hle/kernel/scheduler.h
index 27d0247d6..a3b5fb8ca 100644
--- a/src/core/hle/kernel/scheduler.h
+++ b/src/core/hle/kernel/scheduler.h
@@ -4,6 +4,7 @@
 
 #pragma once
 
+#include <mutex>
 #include <vector>
 #include "common/common_types.h"
 #include "common/thread_queue_list.h"
@@ -68,6 +69,8 @@ private:
     SharedPtr<Thread> current_thread = nullptr;
 
     ARM_Interface* cpu_core;
+
+    static std::mutex scheduler_mutex;
 };
 
 } // namespace Kernel

From fbd7afefaade8f96defaa5b7353da8933f626917 Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Mon, 7 May 2018 22:29:48 -0400
Subject: [PATCH 15/18] thread: Support core change on ResumeFromWait and
 improve ChangeCore.

---
 src/core/hle/kernel/thread.cpp | 115 +++++++++++++++++++++------------
 1 file changed, 73 insertions(+), 42 deletions(-)

diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index ac6252eac..04d18dc2f 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -154,6 +154,18 @@ void Thread::CancelWakeupTimer() {
     CoreTiming::UnscheduleEvent(ThreadWakeupEventType, callback_handle);
 }
 
+static boost::optional<s32> GetNextProcessorId(u64 mask) {
+    for (s32 index = 0; index < Core::NUM_CPU_CORES; ++index) {
+        if (mask & (1ULL << index)) {
+            if (!Core::System().GetInstance().Scheduler(index)->GetCurrentThread()) {
+                // Core is enabled and not running any threads, use this one
+                return index;
+            }
+        }
+    }
+    return {};
+}
+
 void Thread::ResumeFromWait() {
     ASSERT_MSG(wait_objects.empty(), "Thread is waking up while waiting for objects");
 
@@ -188,7 +200,36 @@ void Thread::ResumeFromWait() {
     wakeup_callback = nullptr;
 
     status = THREADSTATUS_READY;
-    scheduler->ScheduleThread(this, current_priority);
+
+    boost::optional<s32> new_processor_id = GetNextProcessorId(mask);
+    if (!new_processor_id) {
+        new_processor_id = processor_id;
+    }
+    if (ideal_core != -1 &&
+        Core::System().GetInstance().Scheduler(ideal_core)->GetCurrentThread() == nullptr) {
+        new_processor_id = ideal_core;
+    }
+
+    ASSERT(*new_processor_id < 4);
+
+    // Add thread to new core's scheduler
+    auto& next_scheduler = Core::System().GetInstance().Scheduler(*new_processor_id);
+
+    if (*new_processor_id != processor_id) {
+        // Remove thread from previous core's scheduler
+        scheduler->RemoveThread(this);
+        next_scheduler->AddThread(this, current_priority);
+    }
+
+    processor_id = *new_processor_id;
+
+    // If the thread was ready, unschedule from the previous core and schedule on the new core
+    scheduler->UnscheduleThread(this, current_priority);
+    next_scheduler->ScheduleThread(this, current_priority);
+
+    // Change thread's scheduler
+    scheduler = next_scheduler;
+
     Core::System::GetInstance().CpuCore(processor_id).PrepareReschedule();
 }
 
@@ -267,7 +308,7 @@ ResultVal<SharedPtr<Thread>> Thread::Create(std::string name, VAddr entry_point,
     thread->last_running_ticks = CoreTiming::GetTicks();
     thread->processor_id = processor_id;
     thread->ideal_core = processor_id;
-    thread->mask = 1 << processor_id;
+    thread->mask = 1ULL << processor_id;
     thread->wait_objects.clear();
     thread->mutex_wait_address = 0;
     thread->condvar_wait_address = 0;
@@ -417,55 +458,45 @@ void Thread::UpdatePriority() {
         lock_owner->UpdatePriority();
 }
 
-static s32 GetNextProcessorId(u64 mask) {
-    s32 processor_id{};
-    for (s32 index = 0; index < Core::NUM_CPU_CORES; ++index) {
-        if (mask & (1ULL << index)) {
-            if (!Core::System().GetInstance().Scheduler(index)->GetCurrentThread()) {
-                // Core is enabled and not running any threads, use this one
-                return index;
-            }
-
-            // Core is enabled, but running a thread, less ideal
-            processor_id = index;
-        }
-    }
-
-    return processor_id;
-}
-
 void Thread::ChangeCore(u32 core, u64 mask) {
-    const s32 new_processor_id{GetNextProcessorId(mask)};
-
-    ASSERT(ideal_core == core); // We're not doing anything with this yet, so assert the expected
-    ASSERT(new_processor_id < Core::NUM_CPU_CORES);
-
-    if (new_processor_id == processor_id) {
-        // Already running on ideal core, nothing to do here
-        return;
-    }
-
-    ASSERT(status != THREADSTATUS_RUNNING); // Unsupported
-
-    processor_id = new_processor_id;
     ideal_core = core;
     mask = mask;
 
-    // Add thread to new core's scheduler
-    auto& next_scheduler = Core::System().GetInstance().Scheduler(new_processor_id);
-    next_scheduler->AddThread(this, current_priority);
-
-    if (status == THREADSTATUS_READY) {
-        // If the thread was ready, unschedule from the previous core and schedule on the new core
-        scheduler->UnscheduleThread(this, current_priority);
-        next_scheduler->ScheduleThread(this, current_priority);
+    if (status != THREADSTATUS_READY) {
+        return;
     }
 
-    // Remove thread from previous core's scheduler
-    scheduler->RemoveThread(this);
+    boost::optional<s32> new_processor_id{GetNextProcessorId(mask)};
+
+    if (!new_processor_id) {
+        new_processor_id = processor_id;
+    }
+    if (ideal_core != -1 &&
+        Core::System().GetInstance().Scheduler(ideal_core)->GetCurrentThread() == nullptr) {
+        new_processor_id = ideal_core;
+    }
+
+    ASSERT(new_processor_id < 4);
+
+    // Add thread to new core's scheduler
+    auto& next_scheduler = Core::System().GetInstance().Scheduler(*new_processor_id);
+
+    if (*new_processor_id != processor_id) {
+        // Remove thread from previous core's scheduler
+        scheduler->RemoveThread(this);
+        next_scheduler->AddThread(this, current_priority);
+    }
+
+    processor_id = *new_processor_id;
+
+    // If the thread was ready, unschedule from the previous core and schedule on the new core
+    scheduler->UnscheduleThread(this, current_priority);
+    next_scheduler->ScheduleThread(this, current_priority);
 
     // Change thread's scheduler
     scheduler = next_scheduler;
+
+    Core::System::GetInstance().CpuCore(processor_id).PrepareReschedule();
 }
 
 ////////////////////////////////////////////////////////////////////////////////////////////////////

From edc52250b8157a9d2b8c909225114c98c7ea609e Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Mon, 7 May 2018 22:57:39 -0400
Subject: [PATCH 16/18] core: Run all CPU cores separately, even in
 single-thread mode.

---
 src/core/core.cpp | 26 ++++++++++++++++++++------
 src/core/core.h   | 10 +++-------
 2 files changed, 23 insertions(+), 13 deletions(-)

diff --git a/src/core/core.cpp b/src/core/core.cpp
index 6cbfc3035..84ab876cc 100644
--- a/src/core/core.cpp
+++ b/src/core/core.cpp
@@ -34,6 +34,19 @@ static void RunCpuCore(std::shared_ptr<Cpu> cpu_state) {
     }
 }
 
+Cpu& System::CurrentCpuCore() {
+    // If multicore is enabled, use host thread to figure out the current CPU core
+    if (Settings::values.use_multi_core) {
+        const auto& search = thread_to_cpu.find(std::this_thread::get_id());
+        ASSERT(search != thread_to_cpu.end());
+        ASSERT(search->second);
+        return *search->second;
+    }
+
+    // Otherwise, use single-threaded mode active_core variable
+    return *cpu_cores[active_core];
+}
+
 System::ResultStatus System::RunLoop(bool tight_loop) {
     status = ResultStatus::Success;
 
@@ -55,7 +68,13 @@ System::ResultStatus System::RunLoop(bool tight_loop) {
         }
     }
 
-    cpu_cores[0]->RunLoop(tight_loop);
+    for (active_core = 0; active_core < NUM_CPU_CORES; ++active_core) {
+        cpu_cores[active_core]->RunLoop(tight_loop);
+        if (Settings::values.use_multi_core) {
+            // Cores 1-3 are run on other threads in this mode
+            break;
+        }
+    }
 
     return status;
 }
@@ -127,11 +146,6 @@ PerfStats::Results System::GetAndResetPerfStats() {
 }
 
 const std::shared_ptr<Kernel::Scheduler>& System::Scheduler(size_t core_index) {
-    if (!Settings::values.use_multi_core) {
-        // Always use Core 0 scheduler when multicore is disabled
-        return cpu_cores[0]->Scheduler();
-    }
-
     ASSERT(core_index < NUM_CPU_CORES);
     return cpu_cores[core_index]->Scheduler();
 }
diff --git a/src/core/core.h b/src/core/core.h
index 5740e858b..6de707271 100644
--- a/src/core/core.h
+++ b/src/core/core.h
@@ -160,13 +160,8 @@ public:
     }
 
 private:
-    /// Returns the current CPU core based on the calling host thread
-    Cpu& CurrentCpuCore() {
-        const auto& search = thread_to_cpu.find(std::this_thread::get_id());
-        ASSERT(search != thread_to_cpu.end());
-        ASSERT(search->second);
-        return *search->second;
-    }
+    /// Returns the currently running CPU core
+    Cpu& CurrentCpuCore();
 
     /**
      * Initialize the emulated system.
@@ -184,6 +179,7 @@ private:
     std::shared_ptr<CpuBarrier> cpu_barrier;
     std::array<std::shared_ptr<Cpu>, NUM_CPU_CORES> cpu_cores;
     std::array<std::unique_ptr<std::thread>, NUM_CPU_CORES - 1> cpu_core_threads;
+    size_t active_core{}; ///< Active core, only used in single thread mode
 
     /// Service manager
     std::shared_ptr<Service::SM::ServiceManager> service_manager;

From 46ec9a9bc924aa1151db349541976521b72c41da Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Thu, 10 May 2018 19:12:46 -0400
Subject: [PATCH 17/18] thread: Rename mask to affinity_masks.

---
 src/core/hle/kernel/svc.cpp     | 2 +-
 src/core/hle/kernel/thread.cpp  | 4 ++--
 src/core/hle/kernel/thread.h    | 2 +-
 src/yuzu/debugger/wait_tree.cpp | 3 ++-
 4 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index 89c3e240a..1ae530c90 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -735,7 +735,7 @@ static ResultCode GetThreadCoreMask(Handle thread_handle, u32* core, u64* mask)
     }
 
     *core = thread->ideal_core;
-    *mask = thread->mask;
+    *mask = thread->affinity_mask;
 
     return RESULT_SUCCESS;
 }
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index 04d18dc2f..46fcdefb8 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -201,7 +201,7 @@ void Thread::ResumeFromWait() {
 
     status = THREADSTATUS_READY;
 
-    boost::optional<s32> new_processor_id = GetNextProcessorId(mask);
+    boost::optional<s32> new_processor_id = GetNextProcessorId(affinity_mask);
     if (!new_processor_id) {
         new_processor_id = processor_id;
     }
@@ -308,7 +308,7 @@ ResultVal<SharedPtr<Thread>> Thread::Create(std::string name, VAddr entry_point,
     thread->last_running_ticks = CoreTiming::GetTicks();
     thread->processor_id = processor_id;
     thread->ideal_core = processor_id;
-    thread->mask = 1ULL << processor_id;
+    thread->affinity_mask = 1ULL << processor_id;
     thread->wait_objects.clear();
     thread->mutex_wait_address = 0;
     thread->condvar_wait_address = 0;
diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h
index 3dda548ad..1d2da6d50 100644
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -248,7 +248,7 @@ public:
     std::shared_ptr<Scheduler> scheduler;
 
     u32 ideal_core{0xFFFFFFFF};
-    u64 mask{0x1};
+    u64 affinity_mask{0x1};
 
 private:
     Thread();
diff --git a/src/yuzu/debugger/wait_tree.cpp b/src/yuzu/debugger/wait_tree.cpp
index 01f9d9008..8b074db5a 100644
--- a/src/yuzu/debugger/wait_tree.cpp
+++ b/src/yuzu/debugger/wait_tree.cpp
@@ -249,7 +249,8 @@ std::vector<std::unique_ptr<WaitTreeItem>> WaitTreeThread::GetChildren() const {
 
     list.push_back(std::make_unique<WaitTreeText>(tr("processor = %1").arg(processor)));
     list.push_back(std::make_unique<WaitTreeText>(tr("ideal core = %1").arg(thread.ideal_core)));
-    list.push_back(std::make_unique<WaitTreeText>(tr("affinity mask = %1").arg(thread.mask)));
+    list.push_back(
+        std::make_unique<WaitTreeText>(tr("affinity mask = %1").arg(thread.affinity_mask)));
     list.push_back(std::make_unique<WaitTreeText>(tr("thread id = %1").arg(thread.GetThreadId())));
     list.push_back(std::make_unique<WaitTreeText>(tr("priority = %1(current) / %2(normal)")
                                                       .arg(thread.current_priority)

From 811dae12f9e1c0eb5c19f6c6a8e75b1e6260abb2 Mon Sep 17 00:00:00 2001
From: bunnei <bunneidev@gmail.com>
Date: Thu, 10 May 2018 19:34:21 -0400
Subject: [PATCH 18/18] core: Add several missing docstrings.

---
 src/core/core.h | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/src/core/core.h b/src/core/core.h
index 6de707271..f90f085ad 100644
--- a/src/core/core.h
+++ b/src/core/core.h
@@ -106,26 +106,34 @@ public:
     /// Prepare the core emulation for a reschedule
     void PrepareReschedule();
 
+    /// Gets and resets core performance statistics
     PerfStats::Results GetAndResetPerfStats();
 
+    /// Gets an ARM interface to the CPU core that is currently running
     ARM_Interface& CurrentArmInterface() {
         return CurrentCpuCore().ArmInterface();
     }
 
+    /// Gets an ARM interface to the CPU core with the specified index
     ARM_Interface& ArmInterface(size_t core_index);
 
+    /// Gets a CPU interface to the CPU core with the specified index
     Cpu& CpuCore(size_t core_index);
 
+    /// Gets the GPU interface
     Tegra::GPU& GPU() {
         return *gpu_core;
     }
 
+    /// Gets the scheduler for the CPU core that is currently running
     Kernel::Scheduler& CurrentScheduler() {
         return *CurrentCpuCore().Scheduler();
     }
 
+    /// Gets the scheduler for the CPU core with the specified index
     const std::shared_ptr<Kernel::Scheduler>& Scheduler(size_t core_index);
 
+    /// Gets the current process
     Kernel::SharedPtr<Kernel::Process>& CurrentProcess() {
         return current_process;
     }