kernel/process: Decouple TLS handling from threads

Extracts out all of the thread local storage management from thread instances themselves and makes the owning process handle the management of the memory. This brings the memory management slightly more in line with how the kernel handles these allocations. Furthermore, this also makes the TLS page management a little more readable compared to the lingering implementation that was carried over from Citra.
2019-06-05 14:32:33 -04:00 · 2019-06-05 14:32:33 -04:00 · abdce723eb
commit abdce723eb
parent 55481df50f
4 changed files with 97 additions and 66 deletions
--- a/src/core/hle/kernel/process.cpp
+++ b/src/core/hle/kernel/process.cpp
@ -3,6 +3,7 @@
 // Refer to the license.txt file included.
 #include <algorithm>
 #include <bitset>
 #include <memory>
 #include <random>
 #include "common/alignment.h"
@ -48,8 +49,58 @@ void SetupMainThread(Process& owner_process, KernelCore& kernel, u32 priority) {
 }
 } // Anonymous namespace
-SharedPtr<Process> Process::Create(Core::System& system, std::string name,
+// Represents a page used for thread-local storage.
-                                   Process::ProcessType type) {
+//
 // Each TLS page contains slots that may be used by processes and threads.
 // Every process and thread is created with a slot in some arbitrary page
 // (whichever page happens to have an available slot).
 class TLSPage {
 public:
    static constexpr std::size_t num_slot_entries = Memory::PAGE_SIZE / Memory::TLS_ENTRY_SIZE;
    explicit TLSPage(VAddr address) : base_address{address} {}
    bool HasAvailableSlots() const {
        return !is_slot_used.all();
    }
    VAddr GetBaseAddress() const {
        return base_address;
    }
    std::optional<VAddr> ReserveSlot() {
        for (std::size_t i = 0; i < is_slot_used.size(); i++) {
            if (is_slot_used[i]) {
                continue;
            }
            is_slot_used[i] = true;
            return base_address + (i * Memory::TLS_ENTRY_SIZE);
        }
        return std::nullopt;
    }
    void ReleaseSlot(VAddr address) {
        // Ensure that all given addresses are consistent with how TLS pages
        // are intended to be used when releasing slots.
        ASSERT(IsWithinPage(address));
        ASSERT((address % Memory::TLS_ENTRY_SIZE) == 0);
        const std::size_t index = (address - base_address) / Memory::TLS_ENTRY_SIZE;
        is_slot_used[index] = false;
    }
 private:
    bool IsWithinPage(VAddr address) const {
        return base_address <= address && address < base_address + Memory::PAGE_SIZE;
    }
    VAddr base_address;
    std::bitset<num_slot_entries> is_slot_used;
 };
 SharedPtr<Process> Process::Create(Core::System& system, std::string name, ProcessType type) {
    auto& kernel = system.Kernel();
    SharedPtr<Process> process(new Process(system));
@ -181,61 +232,55 @@ void Process::PrepareForTermination() {
 }
 /**
- * Finds a free location for the TLS section of a thread.
+ * Attempts to find a TLS page that contains a free slot for
- * @param tls_slots The TLS page array of the thread's owner process.
+ * use by a thread.
- * Returns a tuple of (page, slot, alloc_needed) where:
+ *
- * page: The index of the first allocated TLS page that has free slots.
+ * @returns If a page with an available slot is found, then an iterator
- * slot: The index of the first free slot in the indicated page.
+ *          pointing to the page is returned. Otherwise the end iterator
- * alloc_needed: Whether there's a need to allocate a new TLS page (All pages are full).
+ *          is returned instead.
 */
-static std::tuple<std::size_t, std::size_t, bool> FindFreeThreadLocalSlot(
+static auto FindTLSPageWithAvailableSlots(std::vector<TLSPage>& tls_pages) {
-    const std::vector<std::bitset<8>>& tls_slots) {
+    return std::find_if(tls_pages.begin(), tls_pages.end(),
-    // Iterate over all the allocated pages, and try to find one where not all slots are used.
+                        [](const auto& page) { return page.HasAvailableSlots(); });
    for (std::size_t page = 0; page < tls_slots.size(); ++page) {
        const auto& page_tls_slots = tls_slots[page];
        if (!page_tls_slots.all()) {
            // We found a page with at least one free slot, find which slot it is
            for (std::size_t slot = 0; slot < page_tls_slots.size(); ++slot) {
                if (!page_tls_slots.test(slot)) {
                    return std::make_tuple(page, slot, false);
                }
            }
        }
    }
    return std::make_tuple(0, 0, true);
 }
-VAddr Process::MarkNextAvailableTLSSlotAsUsed(Thread& thread) {
+VAddr Process::CreateTLSRegion() {
-    auto [available_page, available_slot, needs_allocation] = FindFreeThreadLocalSlot(tls_slots);
+    auto tls_page_iter = FindTLSPageWithAvailableSlots(tls_pages);
    const VAddr tls_begin = vm_manager.GetTLSIORegionBaseAddress();
-    if (needs_allocation) {
+    if (tls_page_iter == tls_pages.cend()) {
-        tls_slots.emplace_back(0); // The page is completely available at the start
+        const auto region_address =
-        available_page = tls_slots.size() - 1;
+            vm_manager.FindFreeRegion(vm_manager.GetTLSIORegionBaseAddress(),
-        available_slot = 0; // Use the first slot in the new page
+                                      vm_manager.GetTLSIORegionEndAddress(), Memory::PAGE_SIZE);
        ASSERT(region_address.Succeeded());
-        // Allocate some memory from the end of the linear heap for this region.
+        const auto map_result = vm_manager.MapMemoryBlock(
-        auto& tls_memory = thread.GetTLSMemory();
+            *region_address, std::make_shared<std::vector<u8>>(Memory::PAGE_SIZE), 0,
        tls_memory->insert(tls_memory->end(), Memory::PAGE_SIZE, 0);
        vm_manager.RefreshMemoryBlockMappings(tls_memory.get());
        vm_manager.MapMemoryBlock(tls_begin + available_page * Memory::PAGE_SIZE, tls_memory, 0,
            Memory::PAGE_SIZE, MemoryState::ThreadLocal);
        ASSERT(map_result.Succeeded());
        tls_pages.emplace_back(*region_address);
        const auto reserve_result = tls_pages.back().ReserveSlot();
        ASSERT(reserve_result.has_value());
        return *reserve_result;
    }
-    tls_slots[available_page].set(available_slot);
+    return *tls_page_iter->ReserveSlot();
    return tls_begin + available_page * Memory::PAGE_SIZE + available_slot * Memory::TLS_ENTRY_SIZE;
 }
-void Process::FreeTLSSlot(VAddr tls_address) {
+void Process::FreeTLSRegion(VAddr tls_address) {
-    const VAddr tls_base = tls_address - vm_manager.GetTLSIORegionBaseAddress();
+    const VAddr aligned_address = Common::AlignDown(tls_address, Memory::PAGE_SIZE);
-    const VAddr tls_page = tls_base / Memory::PAGE_SIZE;
+    auto iter =
-    const VAddr tls_slot = (tls_base % Memory::PAGE_SIZE) / Memory::TLS_ENTRY_SIZE;
+        std::find_if(tls_pages.begin(), tls_pages.end(), [aligned_address](const auto& page) {
            return page.GetBaseAddress() == aligned_address;
        });
-    tls_slots[tls_page].reset(tls_slot);
+    // Something has gone very wrong if we're freeing a region
    // with no actual page available.
    ASSERT(iter != tls_pages.cend());
    iter->ReleaseSlot(tls_address);
 }
 void Process::LoadModule(CodeSet module_, VAddr base_addr) {
--- a/src/core/hle/kernel/process.h
+++ b/src/core/hle/kernel/process.h
@ -5,7 +5,6 @@
 #pragma once
 #include <array>
 #include <bitset>
 #include <cstddef>
 #include <list>
 #include <string>
@ -32,6 +31,7 @@ namespace Kernel {
 class KernelCore;
 class ResourceLimit;
 class Thread;
 class TLSPage;
 struct CodeSet;
@ -260,10 +260,10 @@ public:
    // Thread-local storage management
    // Marks the next available region as used and returns the address of the slot.
-    VAddr MarkNextAvailableTLSSlotAsUsed(Thread& thread);
+    [[nodiscard]] VAddr CreateTLSRegion();
    // Frees a used TLS slot identified by the given address
-    void FreeTLSSlot(VAddr tls_address);
+    void FreeTLSRegion(VAddr tls_address);
 private:
    explicit Process(Core::System& system);
@ -310,7 +310,7 @@ private:
    /// holds the TLS for a specific thread. This vector contains which parts are in use for each
    /// page as a bitmask.
    /// This vector will grow as more pages are allocated for new threads.
-    std::vector<std::bitset<8>> tls_slots;
+    std::vector<TLSPage> tls_pages;
    /// Contains the parsed process capability descriptors.
    ProcessCapabilities capabilities;
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@ -65,7 +65,7 @@ void Thread::Stop() {
    owner_process->UnregisterThread(this);
    // Mark the TLS slot in the thread's page as free.
-    owner_process->FreeTLSSlot(tls_address);
+    owner_process->FreeTLSRegion(tls_address);
 }
 void Thread::WakeAfterDelay(s64 nanoseconds) {
@ -205,9 +205,9 @@ ResultVal<SharedPtr<Thread>> Thread::Create(KernelCore& kernel, std::string name
    thread->name = std::move(name);
    thread->callback_handle = kernel.ThreadWakeupCallbackHandleTable().Create(thread).Unwrap();
    thread->owner_process = &owner_process;
    thread->tls_address = thread->owner_process->CreateTLSRegion();
    thread->scheduler = &system.Scheduler(processor_id);
    thread->scheduler->AddThread(thread);
    thread->tls_address = thread->owner_process->MarkNextAvailableTLSSlotAsUsed(*thread);
    thread->owner_process->RegisterThread(thread.get());
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@ -5,7 +5,6 @@
 #pragma once
 #include <functional>
 #include <memory>
 #include <string>
 #include <vector>
@ -78,9 +77,6 @@ enum class ThreadActivity : u32 {
 class Thread final : public WaitObject {
 public:
    using TLSMemory = std::vector<u8>;
    using TLSMemoryPtr = std::shared_ptr<TLSMemory>;
    using MutexWaitingThreads = std::vector<SharedPtr<Thread>>;
    using ThreadContext = Core::ARM_Interface::ThreadContext;
@ -169,14 +165,6 @@ public:
        return thread_id;
    }
    TLSMemoryPtr& GetTLSMemory() {
        return tls_memory;
    }
    const TLSMemoryPtr& GetTLSMemory() const {
        return tls_memory;
    }
    /// Resumes a thread from waiting
    void ResumeFromWait();
@ -463,11 +451,9 @@ private:
    u32 ideal_core{0xFFFFFFFF};
    u64 affinity_mask{0x1};
-    TLSMemoryPtr tls_memory = std::make_shared<TLSMemory>();
+    ThreadActivity activity = ThreadActivity::Normal;
    std::string name;
    ThreadActivity activity = ThreadActivity::Normal;
 };
 /**