kernel: Implement host thread register methods without locking

Locks on GetCurrentHostThreadID were causing performance issues according to Visual Studio's profiler. It was consuming twice the time as arm_interface.Run(). The cost was not in the function itself but in the lockinig it required. Reimplement these functions using atomics and static storage instead of an unordered_map. This is a side effect to avoid locking and using linked lists for reads. Replace unordered_map with a linear search.
2024-10-07 09:19:58 +01:00 · 2020-10-13 18:00:25 -03:00 · 2020-10-13 18:00:25 -03:00 · b9a9b83bee
commit b9a9b83bee
parent d291fc1a51
1 changed files with 39 additions and 29 deletions
--- a/src/core/hle/kernel/kernel.cpp
+++ b/src/core/hle/kernel/kernel.cpp
@ -7,7 +7,6 @@
 #include <bitset>
 #include <functional>
 #include <memory>
-#include <mutex>
 #include <thread>
 #include <unordered_map>
 #include <utility>
@ -107,7 +106,11 @@ struct KernelCore::Impl {
        cores.clear();

        exclusive_monitor.reset();
-        host_thread_ids.clear();
+
+        num_host_threads = 0;
+        std::fill(register_host_thread_keys.begin(), register_host_thread_keys.end(),
+                  std::thread::id{});
+        std::fill(register_host_thread_values.begin(), register_host_thread_values.end(), 0);
    }

    void InitializePhysicalCores() {
@ -177,54 +180,56 @@ struct KernelCore::Impl {

    void MakeCurrentProcess(Process* process) {
        current_process = process;
-
        if (process == nullptr) {
            return;
        }
-
-        u32 core_id = GetCurrentHostThreadID();
+        const u32 core_id = GetCurrentHostThreadID();
        if (core_id < Core::Hardware::NUM_CPU_CORES) {
            system.Memory().SetCurrentPageTable(*process, core_id);
        }
    }

    void RegisterCoreThread(std::size_t core_id) {
-        std::unique_lock lock{register_thread_mutex};
-        if (!is_multicore) {
-            single_core_thread_id = std::this_thread::get_id();
-        }
        const std::thread::id this_id = std::this_thread::get_id();
-        const auto it = host_thread_ids.find(this_id);
+        if (!is_multicore) {
+            single_core_thread_id = this_id;
+        }
+        const auto end = register_host_thread_keys.begin() + num_host_threads;
+        const auto it = std::find(register_host_thread_keys.begin(), end, this_id);
        ASSERT(core_id < Core::Hardware::NUM_CPU_CORES);
-        ASSERT(it == host_thread_ids.end());
+        ASSERT(it == end);
        ASSERT(!registered_core_threads[core_id]);
-        host_thread_ids[this_id] = static_cast<u32>(core_id);
+        InsertHostThread(static_cast<u32>(core_id));
        registered_core_threads.set(core_id);
    }

    void RegisterHostThread() {
-        std::unique_lock lock{register_thread_mutex};
        const std::thread::id this_id = std::this_thread::get_id();
-        const auto it = host_thread_ids.find(this_id);
-        if (it != host_thread_ids.end()) {
-            return;
+        const auto end = register_host_thread_keys.begin() + num_host_threads;
+        const auto it = std::find(register_host_thread_keys.begin(), end, this_id);
+        if (it == end) {
+            InsertHostThread(registered_thread_ids++);
        }
-        host_thread_ids[this_id] = registered_thread_ids++;
    }

-    u32 GetCurrentHostThreadID() const {
+    void InsertHostThread(u32 value) {
+        const size_t index = num_host_threads++;
+        ASSERT_MSG(index < NUM_REGISTRABLE_HOST_THREADS, "Too many host threads");
+        register_host_thread_values[index] = value;
+        register_host_thread_keys[index] = std::this_thread::get_id();
+    }
+
+    [[nodiscard]] u32 GetCurrentHostThreadID() const {
        const std::thread::id this_id = std::this_thread::get_id();
-        if (!is_multicore) {
-            if (single_core_thread_id == this_id) {
+        if (!is_multicore && single_core_thread_id == this_id) {
            return static_cast<u32>(system.GetCpuManager().CurrentCore());
        }
-        }
-        std::unique_lock lock{register_thread_mutex};
-        const auto it = host_thread_ids.find(this_id);
-        if (it == host_thread_ids.end()) {
+        const auto end = register_host_thread_keys.begin() + num_host_threads;
+        const auto it = std::find(register_host_thread_keys.begin(), end, this_id);
+        if (it == end) {
            return Core::INVALID_HOST_THREAD_ID;
        }
-        return it->second;
+        return register_host_thread_values[std::distance(register_host_thread_keys.begin(), it)];
    }

    Core::EmuThreadHandle GetCurrentEmuThreadID() const {
@ -322,10 +327,15 @@ struct KernelCore::Impl {
    std::vector<Kernel::PhysicalCore> cores;

    // 0-3 IDs represent core threads, >3 represent others
-    std::unordered_map<std::thread::id, u32> host_thread_ids;
-    u32 registered_thread_ids{Core::Hardware::NUM_CPU_CORES};
+    std::atomic<u32> registered_thread_ids{Core::Hardware::NUM_CPU_CORES};
    std::bitset<Core::Hardware::NUM_CPU_CORES> registered_core_threads;
-    mutable std::mutex register_thread_mutex;
+
+    // Number of host threads is a relatively high number to avoid overflowing
+    static constexpr size_t NUM_REGISTRABLE_HOST_THREADS = 64;
+    std::atomic<size_t> num_host_threads{0};
+    std::array<std::atomic<std::thread::id>, NUM_REGISTRABLE_HOST_THREADS>
+        register_host_thread_keys{};
+    std::array<std::atomic<u32>, NUM_REGISTRABLE_HOST_THREADS> register_host_thread_values{};

    // Kernel memory management
    std::unique_ptr<Memory::MemoryManager> memory_manager;