From 820d81b9a5392951c18daa5a47d6c0ffd28baa9b Mon Sep 17 00:00:00 2001
From: Zach Hilman <zachhilman@gmail.com>
Date: Thu, 22 Nov 2018 00:33:53 -0500
Subject: [PATCH] scheduler: Add explanations for YieldWith and
 WithoutLoadBalancing

---
 src/common/thread_queue_list.h    |  4 +-
 src/core/hle/kernel/scheduler.cpp | 61 +++++++++++++++++++++++++--
 src/core/hle/kernel/scheduler.h   | 70 ++++++++++++++++++++++++++++++-
 src/core/hle/kernel/svc.cpp       | 21 ++++++----
 src/core/hle/kernel/thread.cpp    | 60 --------------------------
 src/core/hle/kernel/thread.h      |  4 --
 6 files changed, 141 insertions(+), 79 deletions(-)
diff --git a/src/common/thread_queue_list.h b/src/common/thread_queue_list.h
index 323eab97c..e7594db68 100644
--- a/src/common/thread_queue_list.h
+++ b/src/common/thread_queue_list.h
@@ -6,7 +6,6 @@
 
 #include <array>
 #include <deque>
-#include <functional>
 #include <boost/range/algorithm_ext/erase.hpp>
 
 namespace Common {
@@ -50,7 +49,8 @@ struct ThreadQueueList {
         return T();
     }
 
-    T get_first_filter(std::function<bool(T)> filter) const {
+    template <typename UnaryPredicate>
+    T get_first_filter(UnaryPredicate filter) const {
         const Queue* cur = first;
         while (cur != nullptr) {
             if (!cur->data.empty()) {
diff --git a/src/core/hle/kernel/scheduler.cpp b/src/core/hle/kernel/scheduler.cpp
index fb5e14950..624c841ad 100644
--- a/src/core/hle/kernel/scheduler.cpp
+++ b/src/core/hle/kernel/scheduler.cpp
@@ -9,6 +9,7 @@
 #include "common/logging/log.h"
 #include "core/arm/arm_interface.h"
 #include "core/core.h"
+#include "core/core_cpu.h"
 #include "core/core_timing.h"
 #include "core/hle/kernel/kernel.h"
 #include "core/hle/kernel/process.h"
@@ -169,7 +170,7 @@ void Scheduler::UnscheduleThread(Thread* thread, u32 priority) {
     ready_queue.remove(priority, thread);
 }
 
-void Scheduler::RescheduleThread(Thread* thread, u32 priority) {
+void Scheduler::MoveThreadToBackOfPriorityQueue(Thread* thread, u32 priority) {
     std::lock_guard<std::mutex> lock(scheduler_mutex);
 
     // Thread is not in queue
@@ -189,12 +190,64 @@ void Scheduler::SetThreadPriority(Thread* thread, u32 priority) {
         ready_queue.prepare(priority);
 }
 
-Thread* Scheduler::GetNextSuggestedThread(u32 core) {
+Thread* Scheduler::GetNextSuggestedThread(u32 core) const {
     std::lock_guard<std::mutex> lock(scheduler_mutex);
 
-    const auto mask = 1 << core;
+    const u32 mask = 1U << core;
     return ready_queue.get_first_filter(
-        [&mask](Thread* thread) { return (thread->GetAffinityMask() & mask) != 0; });
+        [mask](Thread const* thread) { return (thread->GetAffinityMask() & mask) != 0; });
+}
+
+void Scheduler::YieldWithoutLoadBalancing(Thread* thread) {
+    ASSERT(thread != nullptr);
+    // Avoid yielding if the thread isn't even running.
+    ASSERT(thread->GetStatus() == ThreadStatus::Running);
+
+    // Sanity check that the priority is valid
+    ASSERT(thread->GetPriority() < THREADPRIO_COUNT);
+
+    // Yield this thread
+    MoveThreadToBackOfPriorityQueue(thread, thread->GetPriority());
+    Reschedule();
+}
+
+void Scheduler::YieldWithLoadBalancing(Thread* thread) {
+    ASSERT(thread != nullptr);
+    const auto priority = thread->GetPriority();
+    const auto core = static_cast<u32>(thread->GetProcessorID());
+
+    // Avoid yielding if the thread isn't even running.
+    ASSERT(thread->GetStatus() == ThreadStatus::Running);
+
+    // Sanity check that the priority is valid
+    ASSERT(priority < THREADPRIO_COUNT);
+
+    // Reschedule thread to end of queue.
+    MoveThreadToBackOfPriorityQueue(thread, priority);
+
+    Thread* suggested_thread = nullptr;
+
+    // Search through all of the cpu cores (except this one) for a suggested thread.
+    // Take the first non-nullptr one
+    for (unsigned cur_core = 0; cur_core < Core::NUM_CPU_CORES; ++cur_core) {
+        if (cur_core == core)
+            continue;
+
+        const auto res =
+            Core::System::GetInstance().CpuCore(cur_core).Scheduler().GetNextSuggestedThread(core);
+        if (res != nullptr) {
+            suggested_thread = res;
+            break;
+        }
+    }
+
+    // If a suggested thread was found, queue that for this core
+    if (suggested_thread != nullptr)
+        suggested_thread->ChangeCore(core, suggested_thread->GetAffinityMask());
+}
+
+void Scheduler::YieldAndWaitForLoadBalancing(Thread* thread) {
+    UNIMPLEMENTED_MSG("Wait for load balancing thread yield type is not implemented!");
 }
 
 } // namespace Kernel
diff --git a/src/core/hle/kernel/scheduler.h b/src/core/hle/kernel/scheduler.h
index 8444afdbc..71b32589a 100644
--- a/src/core/hle/kernel/scheduler.h
+++ b/src/core/hle/kernel/scheduler.h
@@ -49,13 +49,79 @@ public:
     void UnscheduleThread(Thread* thread, u32 priority);
 
     /// Moves a thread to the back of the current priority queue
-    void RescheduleThread(Thread* thread, u32 priority);
+    void MoveThreadToBackOfPriorityQueue(Thread* thread, u32 priority);
 
     /// Sets the priority of a thread in the scheduler
     void SetThreadPriority(Thread* thread, u32 priority);
 
     /// Gets the next suggested thread for load balancing
-    Thread* GetNextSuggestedThread(u32 core);
+    Thread* GetNextSuggestedThread(u32 core) const;
+
+    /**
+     * YieldWithoutLoadBalancing -- analogous to normal yield on a system
+     * Moves the thread to the end of the ready queue for its priority, and then reschedules the
+     * system to the new head of the queue.
+     *
+     * Example (Single Core -- but can be extrapolated to multi):
+     * ready_queue[prio=0]: ThreadA, ThreadB, ThreadC (->exec order->)
+     * Currently Running: ThreadR
+     *
+     * ThreadR calls YieldWithoutLoadBalancing
+     *
+     * ThreadR is moved to the end of ready_queue[prio=0]:
+     * ready_queue[prio=0]: ThreadA, ThreadB, ThreadC, ThreadR (->exec order->)
+     * Currently Running: Nothing
+     *
+     * System is rescheduled (ThreadA is popped off of queue):
+     * ready_queue[prio=0]: ThreadB, ThreadC, ThreadR (->exec order->)
+     * Currently Running: ThreadA
+     *
+     * If the queue is empty at time of call, no yielding occurs. This does not cross between cores
+     * or priorities at all.
+     */
+    void YieldWithoutLoadBalancing(Thread* thread);
+
+    /**
+     * YieldWithLoadBalancing -- yield but with better selection of the new running thread
+     * Moves the current thread to the end of the ready queue for its priority, then selects a
+     * 'suggested thread' (a thread on a different core that could run on this core) from the
+     * scheduler, changes its core, and reschedules the current core to that thread.
+     *
+     * Example (Dual Core -- can be extrapolated to Quad Core, this is just normal yield if it were
+     * single core):
+     * ready_queue[core=0][prio=0]: ThreadA, ThreadB (affinities not pictured as irrelevant
+     * ready_queue[core=1][prio=0]: ThreadC[affinity=both], ThreadD[affinity=core1only]
+     * Currently Running: ThreadQ on Core 0 || ThreadP on Core 1
+     *
+     * ThreadQ calls YieldWithLoadBalancing
+     *
+     * ThreadQ is moved to the end of ready_queue[core=0][prio=0]:
+     * ready_queue[core=0][prio=0]: ThreadA, ThreadB
+     * ready_queue[core=1][prio=0]: ThreadC[affinity=both], ThreadD[affinity=core1only]
+     * Currently Running: ThreadQ on Core 0 || ThreadP on Core 1
+     *
+     * A list of suggested threads for each core is compiled
+     * Suggested Threads: {ThreadC on Core 1}
+     * If this were quad core (as the switch is), there could be between 0 and 3 threads in this
+     * list. If there are more than one, the thread is selected by highest prio.
+     *
+     * ThreadC is core changed to Core 0:
+     * ready_queue[core=0][prio=0]: ThreadC, ThreadA, ThreadB, ThreadQ
+     * ready_queue[core=1][prio=0]: ThreadD
+     * Currently Running: None on Core 0 || ThreadP on Core 1
+     *
+     * System is rescheduled (ThreadC is popped off of queue):
+     * ready_queue[core=0][prio=0]: ThreadA, ThreadB, ThreadQ
+     * ready_queue[core=1][prio=0]: ThreadD
+     * Currently Running: ThreadC on Core 0 || ThreadP on Core 1
+     *
+     * If no suggested threads can be found this will behave just as normal yield. If there are
+     * multiple candidates for the suggested thread on a core, the highest prio is taken.
+     */
+    void YieldWithLoadBalancing(Thread* thread);
+
+    /// Currently unknown -- asserts as unimplemented on call
+    void YieldAndWaitForLoadBalancing(Thread* thread);
 
     /// Returns a list of all threads managed by the scheduler
     const std::vector<SharedPtr<Thread>>& GetThreadList() const {
diff --git a/src/core/hle/kernel/svc.cpp b/src/core/hle/kernel/svc.cpp
index 467575c93..205706033 100644
--- a/src/core/hle/kernel/svc.cpp
+++ b/src/core/hle/kernel/svc.cpp
@@ -965,16 +965,23 @@ static void SleepThread(s64 nanoseconds) {
     if (!Core::System::GetInstance().CurrentScheduler().HaveReadyThreads())
         return;
 
+    enum class SleepType : s64 {
+        YieldWithoutLoadBalancing = 0,
+        YieldWithLoadBalancing = 1,
+        YieldAndWaitForLoadBalancing = 2,
+    };
+
     if (nanoseconds <= 0) {
-        switch (nanoseconds) {
-        case 0:
-            GetCurrentThread()->YieldNormal();
+        auto& scheduler{Core::System::GetInstance().CurrentScheduler()};
+        switch (static_cast<SleepType>(nanoseconds)) {
+        case SleepType::YieldWithoutLoadBalancing:
+            scheduler.YieldWithoutLoadBalancing(GetCurrentThread());
             break;
-        case -1:
-            GetCurrentThread()->YieldWithLoadBalancing();
+        case SleepType::YieldWithLoadBalancing:
+            scheduler.YieldWithLoadBalancing(GetCurrentThread());
             break;
-        case -2:
-            GetCurrentThread()->YieldAndWaitForLoadBalancing();
+        case SleepType::YieldAndWaitForLoadBalancing:
+            scheduler.YieldAndWaitForLoadBalancing(GetCurrentThread());
             break;
         default:
             UNREACHABLE_MSG(
diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp
index ddc4da1c0..4ffb76818 100644
--- a/src/core/hle/kernel/thread.cpp
+++ b/src/core/hle/kernel/thread.cpp
@@ -388,66 +388,6 @@ bool Thread::InvokeWakeupCallback(ThreadWakeupReason reason, SharedPtr<Thread> t
     return wakeup_callback(reason, std::move(thread), std::move(object), index);
 }
 
-void Thread::YieldNormal() {
-    // Avoid yielding if the thread isn't even running.
-    if (status != ThreadStatus::Running) {
-        return;
-    }
-
-    if (nominal_priority < THREADPRIO_COUNT) {
-        scheduler->RescheduleThread(this, nominal_priority);
-        scheduler->Reschedule();
-    }
-}
-
-void Thread::YieldWithLoadBalancing() {
-    auto priority = nominal_priority;
-    auto core = processor_id;
-
-    // Avoid yielding if the thread isn't even running.
-    if (status != ThreadStatus::Running) {
-        Core::System::GetInstance().CpuCore(processor_id).PrepareReschedule();
-        return;
-    }
-
-    SharedPtr<Thread> next;
-    const auto& threads = scheduler->GetThreadList();
-
-    if (priority < THREADPRIO_COUNT) {
-        // Reschedule thread to end of queue.
-        scheduler->RescheduleThread(this, priority);
-
-        const auto iter = std::find_if(threads.begin(), threads.end(),
-                                       [&priority](const SharedPtr<Thread>& thread) {
-                                           return thread->GetNominalPriority() == priority;
-                                       });
-
-        if (iter != threads.end())
-            next = iter->get();
-    }
-
-    Thread* suggested_thread = nullptr;
-
-    for (int i = 0; i < 4; ++i) {
-        if (i == core)
-            continue;
-
-        const auto res =
-            Core::System::GetInstance().CpuCore(i).Scheduler().GetNextSuggestedThread(core);
-        if (res != nullptr) {
-            suggested_thread = res;
-            break;
-        }
-    }
-
-    if (suggested_thread != nullptr)
-        suggested_thread->ChangeCore(core, suggested_thread->GetAffinityMask());
-}
-
-void Thread::YieldAndWaitForLoadBalancing() {
-    UNIMPLEMENTED_MSG("Wait for load balancing thread yield type is not implemented!");
-}
-
 ////////////////////////////////////////////////////////////////////////////////////////////////////
 
 /**
diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h
index e97434dd8..77aec099a 100644
--- a/src/core/hle/kernel/thread.h
+++ b/src/core/hle/kernel/thread.h
@@ -371,10 +371,6 @@ public:
         return affinity_mask;
     }
 
-    void YieldNormal();
-    void YieldWithLoadBalancing();
-    void YieldAndWaitForLoadBalancing();
-
 private:
     explicit Thread(KernelCore& kernel);
     ~Thread() override;